From 501069dad5214fafe1b8ba38fa26a5d07df784c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Thu, 12 Oct 2023 10:21:57 +0300
Subject: [PATCH 0001/2340] drm/i915/display: Move releasing gem object away
 from fb tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a preparation for Xe we want to remove all i915_gem_object details away
from frontbuffer tacking code. Due to this move releasing gem object
reference to i915_gem_object_set_frontbuffer.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012072158.4115795-2-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_frontbuffer.c       | 2 --
 drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index ec46716b2f49e..2ea37c0414a95 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -265,8 +265,6 @@ static void frontbuffer_release(struct kref *ref)
 	spin_unlock(&intel_bo_to_i915(obj)->display.fb_tracking.lock);
 
 	i915_active_fini(&front->write);
-
-	i915_gem_object_put(obj);
 	kfree_rcu(front, rcu);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h
index e5e870b6f186c..9fbf14867a2a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h
@@ -89,6 +89,7 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj,
 
 	if (!front) {
 		RCU_INIT_POINTER(obj->frontbuffer, NULL);
+		drm_gem_object_put(intel_bo_to_drm_bo(obj));
 	} else if (rcu_access_pointer(obj->frontbuffer)) {
 		cur = rcu_dereference_protected(obj->frontbuffer, true);
 		kref_get(&cur->ref);
-- 
GitLab


From 3594d00b71eea66d183b310c19aa5a6bf4206e62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Thu, 12 Oct 2023 10:21:58 +0300
Subject: [PATCH 0002/2340] drm/i915/display: Use intel_bo_to_drm_bo instead of
 obj->base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe and i915 objects have differing implementation. Use intel_bo_to_drm_bo
instead of obj->base as xe_bo doesn't have base pointer.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012072158.4115795-3-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index e7678571b0d78..e1d298efc510b 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -1930,10 +1930,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	if (!atomic_read(&front->bits))
 		return 0;
 
-	if (dma_resv_test_signaled(obj->base.resv, dma_resv_usage_rw(false)))
+	if (dma_resv_test_signaled(intel_bo_to_drm_bo(obj)->resv, dma_resv_usage_rw(false)))
 		goto flush;
 
-	ret = dma_resv_get_singleton(obj->base.resv, dma_resv_usage_rw(false),
+	ret = dma_resv_get_singleton(intel_bo_to_drm_bo(obj)->resv, dma_resv_usage_rw(false),
 				     &fence);
 	if (ret || !fence)
 		goto flush;
-- 
GitLab


From f17c08a6046f0c9383a61d7009216b0ad3369db4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 11 Oct 2023 17:27:04 +0300
Subject: [PATCH 0003/2340] drm/i915: drop gt/intel_gt.h include from
 skl_universal_plane.c

No longer needed after commit 94bcf876cb6a ("drm/i915/mtl: Drop
Wa_14017240301").

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231011142704.985867-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/skl_universal_plane.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 245a64332cc73..49e9d40d5e677 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -21,7 +21,6 @@
 #include "skl_scaler.h"
 #include "skl_universal_plane.h"
 #include "skl_watermark.h"
-#include "gt/intel_gt.h"
 #include "pxp/intel_pxp.h"
 
 static const u32 skl_plane_formats[] = {
-- 
GitLab


From 26eb4fcf2349b3dc02ee6f96925419eb7b3026d0 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 11 Oct 2023 23:15:32 +0300
Subject: [PATCH 0004/2340] drm/i915/aux: add separate register macros and
 functions for VLV/CHV

Add separate macros for VLV/CHV registers without the implicit dev_priv,
and with the display MMIO base baked in.

A number of implicitly used dev_priv local variables can be removed.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231011201533.1081368-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_aux.c   | 43 +++++++++++++++----
 .../gpu/drm/i915/display/intel_dp_aux_regs.h  | 14 +++---
 drivers/gpu/drm/i915/gvt/handlers.c           |  1 -
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
index 4431b6290c4cf..c106598a78c50 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
@@ -531,9 +531,40 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 	return ret;
 }
 
+static i915_reg_t vlv_aux_ctl_reg(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	enum aux_ch aux_ch = dig_port->aux_ch;
+
+	switch (aux_ch) {
+	case AUX_CH_B:
+	case AUX_CH_C:
+	case AUX_CH_D:
+		return VLV_DP_AUX_CH_CTL(aux_ch);
+	default:
+		MISSING_CASE(aux_ch);
+		return VLV_DP_AUX_CH_CTL(AUX_CH_B);
+	}
+}
+
+static i915_reg_t vlv_aux_data_reg(struct intel_dp *intel_dp, int index)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	enum aux_ch aux_ch = dig_port->aux_ch;
+
+	switch (aux_ch) {
+	case AUX_CH_B:
+	case AUX_CH_C:
+	case AUX_CH_D:
+		return VLV_DP_AUX_CH_DATA(aux_ch, index);
+	default:
+		MISSING_CASE(aux_ch);
+		return VLV_DP_AUX_CH_DATA(AUX_CH_B, index);
+	}
+}
+
 static i915_reg_t g4x_aux_ctl_reg(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -550,7 +581,6 @@ static i915_reg_t g4x_aux_ctl_reg(struct intel_dp *intel_dp)
 
 static i915_reg_t g4x_aux_data_reg(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -567,7 +597,6 @@ static i915_reg_t g4x_aux_data_reg(struct intel_dp *intel_dp, int index)
 
 static i915_reg_t ilk_aux_ctl_reg(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -586,7 +615,6 @@ static i915_reg_t ilk_aux_ctl_reg(struct intel_dp *intel_dp)
 
 static i915_reg_t ilk_aux_data_reg(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -605,7 +633,6 @@ static i915_reg_t ilk_aux_data_reg(struct intel_dp *intel_dp, int index)
 
 static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -625,7 +652,6 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp)
 
 static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -645,7 +671,6 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index)
 
 static i915_reg_t tgl_aux_ctl_reg(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -668,7 +693,6 @@ static i915_reg_t tgl_aux_ctl_reg(struct intel_dp *intel_dp)
 
 static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -757,6 +781,9 @@ void intel_dp_aux_init(struct intel_dp *intel_dp)
 	} else if (HAS_PCH_SPLIT(dev_priv)) {
 		intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = ilk_aux_data_reg;
+	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		intel_dp->aux_ch_ctl_reg = vlv_aux_ctl_reg;
+		intel_dp->aux_ch_data_reg = vlv_aux_data_reg;
 	} else {
 		intel_dp->aux_ch_ctl_reg = g4x_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = g4x_aux_data_reg;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h b/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h
index 34f6e0a48ed20..e642445364d27 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_regs.h
@@ -21,13 +21,14 @@
 #define __xe2lpd_aux_ch_idx(aux_ch)						\
 	(aux_ch >= AUX_CH_USBC1 ? aux_ch : AUX_CH_USBC4 + 1 + (aux_ch) - AUX_CH_A)
 
-/* TODO: Remove implicit dev_priv */
-#define _DPA_AUX_CH_CTL			(DISPLAY_MMIO_BASE(dev_priv) + 0x64010)
-#define _DPB_AUX_CH_CTL			(DISPLAY_MMIO_BASE(dev_priv) + 0x64110)
+#define _DPA_AUX_CH_CTL			0x64010
+#define _DPB_AUX_CH_CTL			0x64110
 #define _XELPDP_USBC1_AUX_CH_CTL	0x16f210
 #define _XELPDP_USBC2_AUX_CH_CTL	0x16f410
 #define DP_AUX_CH_CTL(aux_ch)		_MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL,	\
 						   _DPB_AUX_CH_CTL)
+#define VLV_DP_AUX_CH_CTL(aux_ch)	_MMIO(VLV_DISPLAY_BASE + \
+					      _PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL))
 #define _XELPDP_DP_AUX_CH_CTL(aux_ch)						\
 		_MMIO(_PICK_EVEN_2RANGES(aux_ch, AUX_CH_USBC1,			\
 					 _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL,	\
@@ -69,13 +70,14 @@
 #define   DP_AUX_CH_CTL_SYNC_PULSE_SKL_MASK	REG_GENMASK(4, 0) /* skl+ */
 #define   DP_AUX_CH_CTL_SYNC_PULSE_SKL(c)	REG_FIELD_PREP(DP_AUX_CH_CTL_SYNC_PULSE_SKL_MASK, (c) - 1)
 
-/* TODO: Remove implicit dev_priv */
-#define _DPA_AUX_CH_DATA1		(DISPLAY_MMIO_BASE(dev_priv) + 0x64014)
-#define _DPB_AUX_CH_DATA1		(DISPLAY_MMIO_BASE(dev_priv) + 0x64114)
+#define _DPA_AUX_CH_DATA1		0x64014
+#define _DPB_AUX_CH_DATA1		0x64114
 #define _XELPDP_USBC1_AUX_CH_DATA1	0x16f214
 #define _XELPDP_USBC2_AUX_CH_DATA1	0x16f414
 #define DP_AUX_CH_DATA(aux_ch, i)	_MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1,	\
 						    _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
+#define VLV_DP_AUX_CH_DATA(aux_ch, i)	_MMIO(VLV_DISPLAY_BASE + _PORT(aux_ch, _DPA_AUX_CH_DATA1, \
+								       _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
 #define _XELPDP_DP_AUX_CH_DATA(aux_ch, i)					\
 		_MMIO(_PICK_EVEN_2RANGES(aux_ch, AUX_CH_USBC1,			\
 					 _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1,	\
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index a9f7fa9b90bda..72addd8d380fd 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2576,7 +2576,6 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
 
 static int init_skl_mmio_info(struct intel_gvt *gvt)
 {
-	struct drm_i915_private *dev_priv = gvt->gt->i915;
 	int ret;
 
 	MMIO_DH(FORCEWAKE_RENDER_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
-- 
GitLab


From fa072c0d9240233a281097f1f2a965441654eaa2 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 11 Oct 2023 23:15:33 +0300
Subject: [PATCH 0005/2340] drm/i915/aux: rename dev_priv to i915

No reason to stick to dev_priv, rename to i915.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231011201533.1081368-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_aux.c | 58 ++++++++++-----------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
index c106598a78c50..2e2af71bcd5a8 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
@@ -74,7 +74,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp)
 
 static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	if (index)
 		return 0;
@@ -83,12 +83,12 @@ static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 	 * The clock divider is based off the hrawclk, and would like to run at
 	 * 2MHz.  So, take the hrawclk value and divide by 2000 and use that
 	 */
-	return DIV_ROUND_CLOSEST(RUNTIME_INFO(dev_priv)->rawclk_freq, 2000);
+	return DIV_ROUND_CLOSEST(RUNTIME_INFO(i915)->rawclk_freq, 2000);
 }
 
 static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	u32 freq;
 
@@ -101,18 +101,18 @@ static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 	 * divide by 2000 and use that
 	 */
 	if (dig_port->aux_ch == AUX_CH_A)
-		freq = dev_priv->display.cdclk.hw.cdclk;
+		freq = i915->display.cdclk.hw.cdclk;
 	else
-		freq = RUNTIME_INFO(dev_priv)->rawclk_freq;
+		freq = RUNTIME_INFO(i915)->rawclk_freq;
 	return DIV_ROUND_CLOSEST(freq, 2000);
 }
 
 static u32 hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 
-	if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(dev_priv)) {
+	if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(i915)) {
 		/* Workaround for non-ULT HSW */
 		switch (index) {
 		case 0: return 63;
@@ -165,12 +165,11 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
 				u32 aux_clock_divider)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv =
-			to_i915(dig_port->base.base.dev);
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	u32 timeout;
 
 	/* Max timeout value on G4x-BDW: 1.6ms */
-	if (IS_BROADWELL(dev_priv))
+	if (IS_BROADWELL(i915))
 		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
 	else
 		timeout = DP_AUX_CH_CTL_TIME_OUT_400us;
@@ -229,8 +228,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 		  u32 aux_send_ctl_flags)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *i915 =
-			to_i915(dig_port->base.base.dev);
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
 	bool is_tc_port = intel_phy_is_tc(i915, phy);
 	i915_reg_t ch_ctl, ch_data[5];
@@ -715,7 +713,7 @@ static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index)
 
 static i915_reg_t xelpdp_aux_ctl_reg(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -726,16 +724,16 @@ static i915_reg_t xelpdp_aux_ctl_reg(struct intel_dp *intel_dp)
 	case AUX_CH_USBC2:
 	case AUX_CH_USBC3:
 	case AUX_CH_USBC4:
-		return XELPDP_DP_AUX_CH_CTL(dev_priv, aux_ch);
+		return XELPDP_DP_AUX_CH_CTL(i915, aux_ch);
 	default:
 		MISSING_CASE(aux_ch);
-		return XELPDP_DP_AUX_CH_CTL(dev_priv, AUX_CH_A);
+		return XELPDP_DP_AUX_CH_CTL(i915, AUX_CH_A);
 	}
 }
 
 static i915_reg_t xelpdp_aux_data_reg(struct intel_dp *intel_dp, int index)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum aux_ch aux_ch = dig_port->aux_ch;
 
@@ -746,10 +744,10 @@ static i915_reg_t xelpdp_aux_data_reg(struct intel_dp *intel_dp, int index)
 	case AUX_CH_USBC2:
 	case AUX_CH_USBC3:
 	case AUX_CH_USBC4:
-		return XELPDP_DP_AUX_CH_DATA(dev_priv, aux_ch, index);
+		return XELPDP_DP_AUX_CH_DATA(i915, aux_ch, index);
 	default:
 		MISSING_CASE(aux_ch);
-		return XELPDP_DP_AUX_CH_DATA(dev_priv, AUX_CH_A, index);
+		return XELPDP_DP_AUX_CH_DATA(i915, AUX_CH_A, index);
 	}
 }
 
@@ -763,25 +761,25 @@ void intel_dp_aux_fini(struct intel_dp *intel_dp)
 
 void intel_dp_aux_init(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *encoder = &dig_port->base;
 	enum aux_ch aux_ch = dig_port->aux_ch;
 	char buf[AUX_CH_NAME_BUFSIZE];
 
-	if (DISPLAY_VER(dev_priv) >= 14) {
+	if (DISPLAY_VER(i915) >= 14) {
 		intel_dp->aux_ch_ctl_reg = xelpdp_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = xelpdp_aux_data_reg;
-	} else if (DISPLAY_VER(dev_priv) >= 12) {
+	} else if (DISPLAY_VER(i915) >= 12) {
 		intel_dp->aux_ch_ctl_reg = tgl_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = tgl_aux_data_reg;
-	} else if (DISPLAY_VER(dev_priv) >= 9) {
+	} else if (DISPLAY_VER(i915) >= 9) {
 		intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = skl_aux_data_reg;
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
+	} else if (HAS_PCH_SPLIT(i915)) {
 		intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = ilk_aux_data_reg;
-	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+	} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
 		intel_dp->aux_ch_ctl_reg = vlv_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = vlv_aux_data_reg;
 	} else {
@@ -789,26 +787,26 @@ void intel_dp_aux_init(struct intel_dp *intel_dp)
 		intel_dp->aux_ch_data_reg = g4x_aux_data_reg;
 	}
 
-	if (DISPLAY_VER(dev_priv) >= 9)
+	if (DISPLAY_VER(i915) >= 9)
 		intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider;
-	else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
+	else if (IS_BROADWELL(i915) || IS_HASWELL(i915))
 		intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider;
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(i915))
 		intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider;
 	else
 		intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider;
 
-	if (DISPLAY_VER(dev_priv) >= 9)
+	if (DISPLAY_VER(i915) >= 9)
 		intel_dp->get_aux_send_ctl = skl_get_aux_send_ctl;
 	else
 		intel_dp->get_aux_send_ctl = g4x_get_aux_send_ctl;
 
-	intel_dp->aux.drm_dev = &dev_priv->drm;
+	intel_dp->aux.drm_dev = &i915->drm;
 	drm_dp_aux_init(&intel_dp->aux);
 
 	/* Failure to allocate our preferred name is not critical */
 	intel_dp->aux.name = kasprintf(GFP_KERNEL, "AUX %s/%s",
-				       aux_ch_name(dev_priv, buf, sizeof(buf), aux_ch),
+				       aux_ch_name(i915, buf, sizeof(buf), aux_ch),
 				       encoder->base.name);
 
 	intel_dp->aux.transfer = intel_dp_aux_transfer;
-- 
GitLab


From a388b41a426ebd84ecd8ab12d6aaae7e06344a5b Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 11 Oct 2023 19:21:02 +0300
Subject: [PATCH 0006/2340] drm/i915: stop including i915_utils.h from
 intel_runtime_pm.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove an unnecessary include.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231011162102.1030354-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/intel_runtime_pm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index f79cda7a2503d..be43614c73fdc 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -11,8 +11,6 @@
 
 #include "intel_wakeref.h"
 
-#include "i915_utils.h"
-
 struct device;
 struct drm_i915_private;
 struct drm_printer;
-- 
GitLab


From 8fa1c7cd1fe9cdfc426a603e1f1eecd3f463c487 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 19 Oct 2023 10:02:42 -0700
Subject: [PATCH 0007/2340] drm/i915/mcr: Hold GT forcewake during steering
 operations

The steering control and semaphore registers are inside an "always on"
power domain with respect to RC6.  However there are some issues if
higher-level platform sleep states are entering/exiting at the same time
these registers are accessed.  Grabbing GT forcewake and holding it over
the entire lock/steer/unlock cycle ensures that those sleep states have
been fully exited before we access these registers.

This is expected to become a formally documented/numbered workaround
soon.

Note that this patch alone isn't expected to have an immediately
noticeable impact on MCR (mis)behavior; an upcoming pcode firmware
update will also be necessary to provide the other half of this
workaround.

v2:
 - Move the forcewake inside the Xe_LPG-specific IP version check.  This
   should only be necessary on platforms that have a steering semaphore.

Fixes: 3100240bf846 ("drm/i915/mtl: Add hardware-level lock for steering")
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231019170241.2102037-2-matthew.d.roper@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 326c2ed1d99bb..34913912d8ae2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -376,9 +376,26 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
 	 * driver threads, but also with hardware/firmware agents.  A dedicated
 	 * locking register is used.
 	 */
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+		/*
+		 * The steering control and semaphore registers are inside an
+		 * "always on" power domain with respect to RC6.  However there
+		 * are some issues if higher-level platform sleep states are
+		 * entering/exiting at the same time these registers are
+		 * accessed.  Grabbing GT forcewake and holding it over the
+		 * entire lock/steer/unlock cycle ensures that those sleep
+		 * states have been fully exited before we access these
+		 * registers.  This wakeref will be released in the unlock
+		 * routine.
+		 *
+		 * This is expected to become a formally documented/numbered
+		 * workaround soon.
+		 */
+		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT);
+
 		err = wait_for(intel_uncore_read_fw(gt->uncore,
 						    MTL_STEER_SEMAPHORE) == 0x1, 100);
+	}
 
 	/*
 	 * Even on platforms with a hardware lock, we'll continue to grab
@@ -415,8 +432,11 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags)
 {
 	spin_unlock_irqrestore(&gt->mcr_lock, flags);
 
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
 		intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1);
+
+		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_GT);
+	}
 }
 
 /**
-- 
GitLab


From 31f6a06f0c543b43a38fab10f39e5fc45ad62aa2 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 20 Oct 2023 08:24:41 -0700
Subject: [PATCH 0008/2340] drm/i915/pmu: Check if pmu is closed before
 stopping event

When the driver unbinds, pmu is unregistered and i915->uabi_engines is
set to RB_ROOT. Due to this, when i915 PMU tries to stop the engine
events, it issues a warn_on because engine lookup fails.

All perf hooks are taking care of this using a pmu->closed flag that is
set when PMU unregisters. The stop event seems to have been left out.

Check for pmu->closed in pmu_event_stop as well.

Based on discussion here -
https://patchwork.freedesktop.org/patch/492079/?series=105790&rev=2

v2: s/is/if/ in commit title
v3: Add fixes tag and cc stable

Cc: <stable@vger.kernel.org> # v5.11+
Fixes: b00bccb3f0bb ("drm/i915/pmu: Handle PCI unbind")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020152441.3764850-1-umesh.nerlige.ramappa@intel.com
---
 drivers/gpu/drm/i915/i915_pmu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 108b675088bad..f861863eb7c15 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -831,9 +831,18 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
 
 static void i915_pmu_event_stop(struct perf_event *event, int flags)
 {
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (pmu->closed)
+		goto out;
+
 	if (flags & PERF_EF_UPDATE)
 		i915_pmu_event_read(event);
 	i915_pmu_disable(event);
+
+out:
 	event->hw.state = PERF_HES_STOPPED;
 }
 
-- 
GitLab


From bc725dc1a8317abb2403b3a906106dbe0d4d4422 Mon Sep 17 00:00:00 2001
From: Vinod Govindapillai <vinod.govindapillai@intel.com>
Date: Wed, 18 Oct 2023 13:27:22 +0300
Subject: [PATCH 0009/2340] drm/i915/display: debugfs entry to list display
 capabilities

Create a separate debugfs entry to list the display capabilities
IGT can rely on this debugfs entry for tests that depend on
display device and display runtime info for both xe and i915
drivers.

v2: rename the entry to i915_display_capabilities (Chaitanya)

Signed-off-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018102723.16915-2-vinod.govindapillai@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_debugfs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 1610c2b6a6443..5d2f4fc5709be 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -642,6 +642,17 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int i915_display_capabilities(struct seq_file *m, void *unused)
+{
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	intel_display_device_info_print(DISPLAY_INFO(i915),
+					DISPLAY_RUNTIME_INFO(i915), &p);
+
+	return 0;
+}
+
 static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -1060,6 +1071,7 @@ static const struct drm_info_list intel_display_debugfs_list[] = {
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
 	{"i915_power_domain_info", i915_power_domain_info, 0},
 	{"i915_display_info", i915_display_info, 0},
+	{"i915_display_capabilities", i915_display_capabilities, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
 	{"i915_ddb_info", i915_ddb_info, 0},
-- 
GitLab


From 8015bee0bfec6920f2441e5adc77e6ac2b65be8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:47 +0300
Subject: [PATCH 0010/2340] drm/i915/display: Add framework to add parameters
 specific to display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently all module parameters are handled by i915_param.c/h. This
is a problem for display parameters when Xe driver is used. Add
a mechanism to add parameters specific to the display. This is mainly
copied from i915_[debugfs]_params.[ch]. Parameters are not yet moved. This
is done by subsequent patches.

v2:
  - Drop unused predefinition (dentry)
  - Clarify need for empty INTEL_DISPLAY_PARAMS_FOR_EACH in comment

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-2-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/Makefile                 |   2 +
 .../gpu/drm/i915/display/intel_display_core.h |   2 +
 .../drm/i915/display/intel_display_debugfs.c  |   2 +
 .../display/intel_display_debugfs_params.c    | 176 ++++++++++++++++++
 .../display/intel_display_debugfs_params.h    |  13 ++
 .../drm/i915/display/intel_display_device.c   |   8 +
 .../drm/i915/display/intel_display_device.h   |   1 +
 .../drm/i915/display/intel_display_params.c   |  71 +++++++
 .../drm/i915/display/intel_display_params.h   |  34 ++++
 drivers/gpu/drm/i915/i915_driver.c            |   2 +
 10 files changed, 311 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/display/intel_display_debugfs_params.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_display_debugfs_params.h
 create mode 100644 drivers/gpu/drm/i915/display/intel_display_params.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_display_params.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 88b2bb005014a..3b9dcb606fc1a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -95,6 +95,7 @@ i915-$(CONFIG_DEBUG_FS) += \
 	i915_debugfs.o \
 	i915_debugfs_params.o \
 	display/intel_display_debugfs.o \
+	display/intel_display_debugfs_params.o \
 	display/intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
@@ -257,6 +258,7 @@ i915-y += \
 	display/intel_display.o \
 	display/intel_display_driver.o \
 	display/intel_display_irq.o \
+	display/intel_display_params.o \
 	display/intel_display_power.o \
 	display/intel_display_power_map.o \
 	display/intel_display_power_well.o \
diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index 53e5c33e08c3b..f2c84ae522170 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -19,6 +19,7 @@
 #include "intel_cdclk.h"
 #include "intel_display_device.h"
 #include "intel_display_limits.h"
+#include "intel_display_params.h"
 #include "intel_display_power.h"
 #include "intel_dpll_mgr.h"
 #include "intel_fbc.h"
@@ -517,6 +518,7 @@ struct intel_display {
 	struct intel_hotplug hotplug;
 	struct intel_opregion opregion;
 	struct intel_overlay *overlay;
+	struct intel_display_params params;
 	struct intel_vbt_data vbt;
 	struct intel_wm wm;
 };
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 5d2f4fc5709be..42bd352f9fee7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -17,6 +17,7 @@
 #include "intel_de.h"
 #include "intel_crtc_state_dump.h"
 #include "intel_display_debugfs.h"
+#include "intel_display_debugfs_params.h"
 #include "intel_display_power.h"
 #include "intel_display_power_well.h"
 #include "intel_display_types.h"
@@ -1111,6 +1112,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 	intel_hpd_debugfs_register(i915);
 	intel_psr_debugfs_register(i915);
 	intel_wm_debugfs_register(i915);
+	intel_display_debugfs_params(i915);
 }
 
 static int i915_panel_show(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c
new file mode 100644
index 0000000000000..b7e68eb624522
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+
+#include <drm/drm_drv.h>
+
+#include "intel_display_debugfs_params.h"
+#include "i915_drv.h"
+#include "intel_display_params.h"
+
+/* int param */
+static int intel_display_param_int_show(struct seq_file *m, void *data)
+{
+	int *value = m->private;
+
+	seq_printf(m, "%d\n", *value);
+
+	return 0;
+}
+
+static int intel_display_param_int_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, intel_display_param_int_show, inode->i_private);
+}
+
+static ssize_t intel_display_param_int_write(struct file *file,
+					     const char __user *ubuf, size_t len,
+					     loff_t *offp)
+{
+	struct seq_file *m = file->private_data;
+	int *value = m->private;
+	int ret;
+
+	ret = kstrtoint_from_user(ubuf, len, 0, value);
+	if (ret) {
+		/* support boolean values too */
+		bool b;
+
+		ret = kstrtobool_from_user(ubuf, len, &b);
+		if (!ret)
+			*value = b;
+	}
+
+	return ret ?: len;
+}
+
+static const struct file_operations intel_display_param_int_fops = {
+	.owner = THIS_MODULE,
+	.open = intel_display_param_int_open,
+	.read = seq_read,
+	.write = intel_display_param_int_write,
+	.llseek = default_llseek,
+	.release = single_release,
+};
+
+static const struct file_operations intel_display_param_int_fops_ro = {
+	.owner = THIS_MODULE,
+	.open = intel_display_param_int_open,
+	.read = seq_read,
+	.llseek = default_llseek,
+	.release = single_release,
+};
+
+/* unsigned int param */
+static int intel_display_param_uint_show(struct seq_file *m, void *data)
+{
+	unsigned int *value = m->private;
+
+	seq_printf(m, "%u\n", *value);
+
+	return 0;
+}
+
+static int intel_display_param_uint_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, intel_display_param_uint_show, inode->i_private);
+}
+
+static ssize_t intel_display_param_uint_write(struct file *file,
+					      const char __user *ubuf, size_t len,
+					      loff_t *offp)
+{
+	struct seq_file *m = file->private_data;
+	unsigned int *value = m->private;
+	int ret;
+
+	ret = kstrtouint_from_user(ubuf, len, 0, value);
+	if (ret) {
+		/* support boolean values too */
+		bool b;
+
+		ret = kstrtobool_from_user(ubuf, len, &b);
+		if (!ret)
+			*value = b;
+	}
+
+	return ret ?: len;
+}
+
+static const struct file_operations intel_display_param_uint_fops = {
+	.owner = THIS_MODULE,
+	.open = intel_display_param_uint_open,
+	.read = seq_read,
+	.write = intel_display_param_uint_write,
+	.llseek = default_llseek,
+	.release = single_release,
+};
+
+static const struct file_operations intel_display_param_uint_fops_ro = {
+	.owner = THIS_MODULE,
+	.open = intel_display_param_uint_open,
+	.read = seq_read,
+	.llseek = default_llseek,
+	.release = single_release,
+};
+
+#define RO(mode) (((mode) & 0222) == 0)
+
+__maybe_unused static struct dentry *
+intel_display_debugfs_create_int(const char *name, umode_t mode,
+			struct dentry *parent, int *value)
+{
+	return debugfs_create_file_unsafe(name, mode, parent, value,
+					  RO(mode) ? &intel_display_param_int_fops_ro :
+					  &intel_display_param_int_fops);
+}
+
+__maybe_unused static struct dentry *
+intel_display_debugfs_create_uint(const char *name, umode_t mode,
+			 struct dentry *parent, unsigned int *value)
+{
+	return debugfs_create_file_unsafe(name, mode, parent, value,
+					  RO(mode) ? &intel_display_param_uint_fops_ro :
+					  &intel_display_param_uint_fops);
+}
+
+#define _intel_display_param_create_file(parent, name, mode, valp)	\
+	do {								\
+		if (mode)						\
+			_Generic(valp,					\
+				 bool * : debugfs_create_bool,		\
+				 int * : intel_display_debugfs_create_int, \
+				 unsigned int * : intel_display_debugfs_create_uint, \
+				 unsigned long * : debugfs_create_ulong, \
+				 char ** : debugfs_create_str) \
+				(name, mode, parent, valp);		\
+	} while (0)
+
+/* add a subdirectory with files for each intel display param */
+void intel_display_debugfs_params(struct drm_i915_private *i915)
+{
+	struct drm_minor *minor = i915->drm.primary;
+	struct dentry *dir;
+	char dirname[16];
+
+	snprintf(dirname, sizeof(dirname), "%s_params", i915->drm.driver->name);
+	dir = debugfs_lookup(dirname, minor->debugfs_root);
+	if (!dir)
+		dir = debugfs_create_dir(dirname, minor->debugfs_root);
+	if (IS_ERR(dir))
+		return;
+
+	/*
+	 * Note: We could create files for params needing special handling
+	 * here. Set mode in params to 0 to skip the generic create file, or
+	 * just let the generic create file fail silently with -EEXIST.
+	 */
+
+#define REGISTER(T, x, unused, mode, ...) _intel_display_param_create_file( \
+		dir, #x, mode, &i915->display.params.x);
+	INTEL_DISPLAY_PARAMS_FOR_EACH(REGISTER);
+#undef REGISTER
+}
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h
new file mode 100644
index 0000000000000..1e9945a4044c0
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_DISPLAY_DEBUGFS_PARAMS__
+#define __INTEL_DISPLAY_DEBUGFS_PARAMS__
+
+struct drm_i915_private;
+
+void intel_display_debugfs_params(struct drm_i915_private *i915);
+
+#endif /* __INTEL_DISPLAY_DEBUGFS_PARAMS__ */
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c
index 2b1ec23ba9c33..e80842d1e7c73 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.c
+++ b/drivers/gpu/drm/i915/display/intel_display_device.c
@@ -12,6 +12,7 @@
 #include "intel_de.h"
 #include "intel_display.h"
 #include "intel_display_device.h"
+#include "intel_display_params.h"
 #include "intel_display_power.h"
 #include "intel_display_reg_defs.h"
 #include "intel_fbc.h"
@@ -937,6 +938,13 @@ void intel_display_device_probe(struct drm_i915_private *i915)
 		DISPLAY_RUNTIME_INFO(i915)->ip.rel = rel;
 		DISPLAY_RUNTIME_INFO(i915)->ip.step = step;
 	}
+
+	intel_display_params_copy(&i915->display.params);
+}
+
+void intel_display_device_remove(struct drm_i915_private *i915)
+{
+	intel_display_params_free(&i915->display.params);
 }
 
 static void __intel_display_device_info_runtime_init(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index 5b5c0e53307fa..4299cc452e050 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -161,6 +161,7 @@ struct intel_display_device_info {
 
 bool intel_display_device_enabled(struct drm_i915_private *i915);
 void intel_display_device_probe(struct drm_i915_private *i915);
+void intel_display_device_remove(struct drm_i915_private *i915);
 void intel_display_device_info_runtime_init(struct drm_i915_private *i915);
 
 void intel_display_device_info_print(const struct intel_display_device_info *info,
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
new file mode 100644
index 0000000000000..91953ae271447
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_params.h"
+#include "i915_drv.h"
+
+#define intel_display_param_named(name, T, perm, desc) \
+	module_param_named(name, intel_display_modparams.name, T, perm); \
+	MODULE_PARM_DESC(name, desc)
+#define intel_display_param_named_unsafe(name, T, perm, desc) \
+	module_param_named_unsafe(name, intel_display_modparams.name, T, perm); \
+	MODULE_PARM_DESC(name, desc)
+
+static struct intel_display_params intel_display_modparams __read_mostly = {
+#define MEMBER(T, member, value, ...) .member = (value),
+	INTEL_DISPLAY_PARAMS_FOR_EACH(MEMBER)
+#undef MEMBER
+};
+/*
+ * Note: As a rule, keep module parameter sysfs permissions read-only
+ * 0400. Runtime changes are only supported through i915 debugfs.
+ *
+ * For any exceptions requiring write access and runtime changes through module
+ * parameter sysfs, prevent debugfs file creation by setting the parameter's
+ * debugfs mode to 0.
+ */
+
+__maybe_unused static void _param_dup_charp(char **valp)
+{
+	*valp = kstrdup(*valp ? *valp : "", GFP_ATOMIC);
+}
+
+__maybe_unused static void _param_nop(void *valp)
+{
+}
+
+#define _param_dup(valp)				\
+	_Generic(valp,					\
+		 char ** : _param_dup_charp,		\
+		 default : _param_nop)			\
+		(valp)
+
+void intel_display_params_copy(struct intel_display_params *dest)
+{
+	*dest = intel_display_modparams;
+#define DUP(T, x, ...) _param_dup(&dest->x);
+	INTEL_DISPLAY_PARAMS_FOR_EACH(DUP);
+#undef DUP
+}
+
+__maybe_unused static void _param_free_charp(char **valp)
+{
+	kfree(*valp);
+	*valp = NULL;
+}
+
+#define _param_free(valp)				\
+	_Generic(valp,					\
+		 char ** : _param_free_charp,		\
+		 default : _param_nop)			\
+		(valp)
+
+/* free the allocated members, *not* the passed in params itself */
+void intel_display_params_free(struct intel_display_params *params)
+{
+#define FREE(T, x, ...) _param_free(&params->x);
+	INTEL_DISPLAY_PARAMS_FOR_EACH(FREE);
+#undef FREE
+}
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
new file mode 100644
index 0000000000000..9f4f45d410a36
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_DISPLAY_PARAMS_H_
+#define _INTEL_DISPLAY_PARAMS_H_
+
+struct drm_printer;
+
+/*
+ * Invoke param, a function-like macro, for each intel display param, with
+ * arguments:
+ *
+ * param(type, name, value, mode)
+ *
+ * type: parameter type, one of {bool, int, unsigned int, unsigned long, char *}
+ * name: name of the parameter
+ * value: initial/default value of the parameter
+ * mode: debugfs file permissions, one of {0400, 0600, 0}, use 0 to not create
+ *       debugfs file
+ */
+#define INTEL_DISPLAY_PARAMS_FOR_EACH(param) /* empty define to avoid build failure */
+
+#define MEMBER(T, member, ...) T member;
+struct intel_display_params {
+	INTEL_DISPLAY_PARAMS_FOR_EACH(MEMBER);
+};
+#undef MEMBER
+
+void intel_display_params_copy(struct intel_display_params *dest);
+void intel_display_params_free(struct intel_display_params *params);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index f30a0861541cc..3d86db5638fc8 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -908,6 +908,8 @@ static void i915_driver_release(struct drm_device *dev)
 	intel_runtime_pm_driver_release(rpm);
 
 	i915_driver_late_release(dev_priv);
+
+	intel_display_device_remove(dev_priv);
 }
 
 static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
-- 
GitLab


From 7a61a6aa59e479ee22a859fe4054973d3aa6c640 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:48 +0300
Subject: [PATCH 0011/2340] drm/i915/display: Dump also display parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GPU error dump contained all module parameters. If we are moving
display parameters to intel_display_params.[ch] they are not dumped
into GPU error dump. This patch is adding moved display parameters
back to GPU error dump. Display parameters are also included in
i915_capabilities

v2: Add parameters to i915_capabilities as well

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-3-jouni.hogander@intel.com
---
 .../drm/i915/display/intel_display_params.c   | 57 +++++++++++++++++++
 .../drm/i915/display/intel_display_params.h   |  3 +
 drivers/gpu/drm/i915/i915_debugfs.c           |  3 +
 drivers/gpu/drm/i915/i915_gpu_error.c         |  3 +
 drivers/gpu/drm/i915/i915_gpu_error.h         |  2 +
 5 files changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 91953ae271447..11ee73a98b5b0 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -27,6 +27,63 @@ static struct intel_display_params intel_display_modparams __read_mostly = {
  * debugfs mode to 0.
  */
 
+__maybe_unused
+static void _param_print_bool(struct drm_printer *p, const char *driver_name,
+			      const char *name, bool val)
+{
+	drm_printf(p, "%s.%s=%s\n", driver_name, name, str_yes_no(val));
+}
+
+__maybe_unused
+static void _param_print_int(struct drm_printer *p, const char *driver_name,
+			     const char *name, int val)
+{
+	drm_printf(p, "%s.%s=%d\n", driver_name, name, val);
+}
+
+__maybe_unused
+static void _param_print_uint(struct drm_printer *p, const char *driver_name,
+			      const char *name, unsigned int val)
+{
+	drm_printf(p, "%s.%s=%u\n", driver_name, name, val);
+}
+
+__maybe_unused
+static void _param_print_ulong(struct drm_printer *p, const char *driver_name,
+			       const char *name, unsigned long val)
+{
+	drm_printf(p, "%s.%s=%lu\n", driver_name, name, val);
+}
+
+__maybe_unused
+static void _param_print_charp(struct drm_printer *p, const char *driver_name,
+			       const char *name, const char *val)
+{
+	drm_printf(p, "%s.%s=%s\n", driver_name, name, val);
+}
+
+#define _param_print(p, driver_name, name, val)			\
+	_Generic(val,						\
+		 bool : _param_print_bool,			\
+		 int : _param_print_int,			\
+		 unsigned int : _param_print_uint,		\
+		 unsigned long : _param_print_ulong,		\
+		 char * : _param_print_charp)(p, driver_name, name, val)
+
+/**
+ * intel_display_params_dump - dump intel display modparams
+ * @i915: i915 device
+ * @p: the &drm_printer
+ *
+ * Pretty printer for i915 modparams.
+ */
+void intel_display_params_dump(struct drm_i915_private *i915, struct drm_printer *p)
+{
+#define PRINT(T, x, ...) _param_print(p, i915->drm.driver->name, #x, i915->display.params.x);
+	INTEL_DISPLAY_PARAMS_FOR_EACH(PRINT);
+#undef PRINT
+}
+
 __maybe_unused static void _param_dup_charp(char **valp)
 {
 	*valp = kstrdup(*valp ? *valp : "", GFP_ATOMIC);
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 9f4f45d410a36..eac66042da4dc 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -7,6 +7,7 @@
 #define _INTEL_DISPLAY_PARAMS_H_
 
 struct drm_printer;
+struct drm_i915_private;
 
 /*
  * Invoke param, a function-like macro, for each intel display param, with
@@ -28,6 +29,8 @@ struct intel_display_params {
 };
 #undef MEMBER
 
+void intel_display_params_dump(struct drm_i915_private *i915,
+			       struct drm_printer *p);
 void intel_display_params_copy(struct intel_display_params *dest);
 void intel_display_params_free(struct intel_display_params *params);
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e9b79c2c37d84..af0077f6a6d0e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,6 +32,8 @@
 
 #include <drm/drm_debugfs.h>
 
+#include "display/intel_display_params.h"
+
 #include "gem/i915_gem_context.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_buffer_pool.h"
@@ -74,6 +76,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
 
 	kernel_param_lock(THIS_MODULE);
 	i915_params_dump(&i915->params, &p);
+	intel_display_params_dump(i915, &p);
 	kernel_param_unlock(THIS_MODULE);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f4ebcfb702899..53fff687b00ed 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -660,6 +660,7 @@ static void err_print_params(struct drm_i915_error_state_buf *m,
 	struct drm_printer p = i915_error_printer(m);
 
 	i915_params_dump(params, &p);
+	intel_display_params_dump(m->i915, &p);
 }
 
 static void err_print_pciid(struct drm_i915_error_state_buf *m,
@@ -1027,6 +1028,7 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
 static void cleanup_params(struct i915_gpu_coredump *error)
 {
 	i915_params_free(&error->params);
+	intel_display_params_free(&error->display_params);
 }
 
 static void cleanup_uc(struct intel_uc_coredump *uc)
@@ -1979,6 +1981,7 @@ static void capture_gen(struct i915_gpu_coredump *error)
 	error->suspend_count = i915->suspend_count;
 
 	i915_params_copy(&error->params, &i915->params);
+	intel_display_params_copy(&error->display_params);
 	memcpy(&error->device_info,
 	       INTEL_INFO(i915),
 	       sizeof(error->device_info));
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 9f5971f5e9801..4ce227f7e1e12 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -15,6 +15,7 @@
 #include <drm/drm_mm.h>
 
 #include "display/intel_display_device.h"
+#include "display/intel_display_params.h"
 #include "gt/intel_engine.h"
 #include "gt/intel_gt_types.h"
 #include "gt/uc/intel_uc_fw.h"
@@ -214,6 +215,7 @@ struct i915_gpu_coredump {
 	struct intel_display_runtime_info display_runtime_info;
 	struct intel_driver_caps driver_caps;
 	struct i915_params params;
+	struct intel_display_params display_params;
 
 	struct intel_overlay_error_state *overlay;
 
-- 
GitLab


From 6f4f8aef7e4220a3369b40a94f694ecc014adf13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:49 +0300
Subject: [PATCH 0012/2340] drm/i915/display: Move enable_fbc module parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-4-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/i9xx_wm.c              |  2 +-
 drivers/gpu/drm/i915/display/intel_display_params.c |  4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h |  3 ++-
 drivers/gpu/drm/i915/display/intel_fbc.c            | 10 +++++-----
 drivers/gpu/drm/i915/i915_params.c                  |  4 ----
 drivers/gpu/drm/i915/i915_params.h                  |  1 -
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c
index af0c79a4c9a4f..b37c0d02d5000 100644
--- a/drivers/gpu/drm/i915/display/i9xx_wm.c
+++ b/drivers/gpu/drm/i915/display/i9xx_wm.c
@@ -2993,7 +2993,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv,
 
 	/* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
 	if (DISPLAY_VER(dev_priv) == 5 && HAS_FBC(dev_priv) &&
-	    dev_priv->params.enable_fbc && !merged->fbc_wm_enabled) {
+	    dev_priv->display.params.enable_fbc && !merged->fbc_wm_enabled) {
 		for (level = 2; level < num_levels; level++) {
 			struct intel_wm_level *wm = &merged->wm[level];
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 11ee73a98b5b0..330613cd64dbd 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -27,6 +27,10 @@ static struct intel_display_params intel_display_modparams __read_mostly = {
  * debugfs mode to 0.
  */
 
+intel_display_param_named_unsafe(enable_fbc, int, 0400,
+	"Enable frame buffer compression for power savings "
+	"(default: -1 (use per-chip default))");
+
 __maybe_unused
 static void _param_print_bool(struct drm_printer *p, const char *driver_name,
 			      const char *name, bool val)
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index eac66042da4dc..198b03da80555 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -21,7 +21,8 @@ struct drm_i915_private;
  * mode: debugfs file permissions, one of {0400, 0600, 0}, use 0 to not create
  *       debugfs file
  */
-#define INTEL_DISPLAY_PARAMS_FOR_EACH(param) /* empty define to avoid build failure */
+#define INTEL_DISPLAY_PARAMS_FOR_EACH(param) \
+	param(int, enable_fbc, -1, 0600) \
 
 #define MEMBER(T, member, ...) T member;
 struct intel_display_params {
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 4820d21cc942a..bde12fe622750 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1174,7 +1174,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
 		return 0;
 	}
 
-	if (!i915->params.enable_fbc) {
+	if (!i915->display.params.enable_fbc) {
 		plane_state->no_fbc_reason = "disabled per module param or by default";
 		return 0;
 	}
@@ -1751,8 +1751,8 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915)
  */
 static int intel_sanitize_fbc_option(struct drm_i915_private *i915)
 {
-	if (i915->params.enable_fbc >= 0)
-		return !!i915->params.enable_fbc;
+	if (i915->display.params.enable_fbc >= 0)
+		return !!i915->display.params.enable_fbc;
 
 	if (!HAS_FBC(i915))
 		return 0;
@@ -1824,9 +1824,9 @@ void intel_fbc_init(struct drm_i915_private *i915)
 	if (need_fbc_vtd_wa(i915))
 		DISPLAY_RUNTIME_INFO(i915)->fbc_mask = 0;
 
-	i915->params.enable_fbc = intel_sanitize_fbc_option(i915);
+	i915->display.params.enable_fbc = intel_sanitize_fbc_option(i915);
 	drm_dbg_kms(&i915->drm, "Sanitized enable_fbc value: %d\n",
-		    i915->params.enable_fbc);
+		    i915->display.params.enable_fbc);
 
 	for_each_fbc_id(i915, fbc_id)
 		i915->display.fbc[fbc_id] = intel_fbc_create(i915, fbc_id);
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 036c4c3ed6ed6..42700b854b794 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -72,10 +72,6 @@ i915_param_named_unsafe(enable_dc, int, 0400,
 	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
 	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
 
-i915_param_named_unsafe(enable_fbc, int, 0400,
-	"Enable frame buffer compression for power savings "
-	"(default: -1 (use per-chip default))");
-
 i915_param_named_unsafe(lvds_channel_mode, int, 0400,
 	 "Specify LVDS channel mode "
 	 "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index d5194b039aaba..e674de29f92c8 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -52,7 +52,6 @@ struct drm_printer;
 	param(int, panel_use_ssc, -1, 0600) \
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
-	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, enable_dpt, true, 0400) \
 	param(bool, psr_safest_params, false, 0400) \
-- 
GitLab


From 942d654171bdaf41bc5c298857c5a342031d8154 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:50 +0300
Subject: [PATCH 0013/2340] drm/i915/display: Move psr related module
 parameters under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move psr related module parameters under display. Also fix error in
enable_psr2_sel_fetch module parameter descrtiption. It was saying disabled
by default while it's vice versa. Also psr_safest_params was missing
default value in description. This is now added.

v2:
  - Fix enable_psr2_sel_fetch description.
  - Add default value into psr_safest_params description.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-5-jouni.hogander@intel.com
---
 .../gpu/drm/i915/display/intel_display_params.c  | 16 ++++++++++++++++
 .../gpu/drm/i915/display/intel_display_params.h  |  5 +++++
 drivers/gpu/drm/i915/display/intel_psr.c         | 14 +++++++-------
 drivers/gpu/drm/i915/i915_params.c               | 15 ---------------
 drivers/gpu/drm/i915/i915_params.h               |  3 ---
 5 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 330613cd64dbd..0e86d0d10bfa1 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -31,6 +31,22 @@ intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
 
+intel_display_param_named_unsafe(enable_psr, int, 0400,
+	"Enable PSR "
+	"(0=disabled, 1=enable up to PSR1, 2=enable up to PSR2) "
+	"Default: -1 (use per-chip default)");
+
+intel_display_param_named(psr_safest_params, bool, 0400,
+	"Replace PSR VBT parameters by the safest and not optimal ones. This "
+	"is helpful to detect if PSR issues are related to bad values set in "
+	" VBT. (0=use VBT parameters, 1=use safest parameters)"
+	"Default: 0");
+
+intel_display_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400,
+	"Enable PSR2 selective fetch "
+	"(0=disabled, 1=enabled) "
+	"Default: 1");
+
 __maybe_unused
 static void _param_print_bool(struct drm_printer *p, const char *driver_name,
 			      const char *name, bool val)
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 198b03da80555..45a4d38d13e7d 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -6,6 +6,8 @@
 #ifndef _INTEL_DISPLAY_PARAMS_H_
 #define _INTEL_DISPLAY_PARAMS_H_
 
+#include <linux/types.h>
+
 struct drm_printer;
 struct drm_i915_private;
 
@@ -23,6 +25,9 @@ struct drm_i915_private;
  */
 #define INTEL_DISPLAY_PARAMS_FOR_EACH(param) \
 	param(int, enable_fbc, -1, 0600) \
+	param(int, enable_psr, -1, 0600) \
+	param(bool, psr_safest_params, false, 0400) \
+	param(bool, enable_psr2_sel_fetch, true, 0400) \
 
 #define MEMBER(T, member, ...) T member;
 struct intel_display_params {
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 4f1f31fc9529d..ecd24a0b86cb4 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -179,9 +179,9 @@ static bool psr_global_enabled(struct intel_dp *intel_dp)
 
 	switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) {
 	case I915_PSR_DEBUG_DEFAULT:
-		if (i915->params.enable_psr == -1)
+		if (i915->display.params.enable_psr == -1)
 			return connector->panel.vbt.psr.enable;
-		return i915->params.enable_psr;
+		return i915->display.params.enable_psr;
 	case I915_PSR_DEBUG_DISABLE:
 		return false;
 	default:
@@ -198,7 +198,7 @@ static bool psr2_global_enabled(struct intel_dp *intel_dp)
 	case I915_PSR_DEBUG_FORCE_PSR1:
 		return false;
 	default:
-		if (i915->params.enable_psr == 1)
+		if (i915->display.params.enable_psr == 1)
 			return false;
 		return true;
 	}
@@ -606,7 +606,7 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp)
 	if (DISPLAY_VER(dev_priv) >= 11)
 		val |= EDP_PSR_TP4_TIME_0us;
 
-	if (dev_priv->params.psr_safest_params) {
+	if (dev_priv->display.params.psr_safest_params) {
 		val |= EDP_PSR_TP1_TIME_2500us;
 		val |= EDP_PSR_TP2_TP3_TIME_2500us;
 		goto check_tp3_sel;
@@ -700,7 +700,7 @@ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp)
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	u32 val = 0;
 
-	if (dev_priv->params.psr_safest_params)
+	if (dev_priv->display.params.psr_safest_params)
 		return EDP_PSR2_TP2_TIME_2500us;
 
 	if (connector->panel.vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 &&
@@ -943,7 +943,7 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp,
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
-	if (!dev_priv->params.enable_psr2_sel_fetch &&
+	if (!dev_priv->display.params.enable_psr2_sel_fetch &&
 	    intel_dp->psr.debug != I915_PSR_DEBUG_ENABLE_SEL_FETCH) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "PSR2 sel fetch not enabled, disabled by parameter\n");
@@ -1056,7 +1056,7 @@ static bool _compute_psr2_wake_times(struct intel_dp *intel_dp,
 	    fast_wake_lines > max_wake_lines)
 		return false;
 
-	if (i915->params.psr_safest_params)
+	if (i915->display.params.psr_safest_params)
 		io_wake_lines = fast_wake_lines = max_wake_lines;
 
 	/* According to Bspec lower limit should be set as 7 lines. */
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 42700b854b794..c65e3314ae489 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -102,21 +102,6 @@ i915_param_named_unsafe(enable_hangcheck, bool, 0400,
 	"WARNING: Disabling this can cause system wide hangs. "
 	"(default: true)");
 
-i915_param_named_unsafe(enable_psr, int, 0400,
-	"Enable PSR "
-	"(0=disabled, 1=enable up to PSR1, 2=enable up to PSR2) "
-	"Default: -1 (use per-chip default)");
-
-i915_param_named(psr_safest_params, bool, 0400,
-	"Replace PSR VBT parameters by the safest and not optimal ones. This "
-	"is helpful to detect if PSR issues are related to bad values set in "
-	" VBT. (0=use VBT parameters, 1=use safest parameters)");
-
-i915_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400,
-	"Enable PSR2 selective fetch "
-	"(0=disabled, 1=enabled) "
-	"Default: 0");
-
 i915_param_named_unsafe(enable_sagv, bool, 0600,
 	"Enable system agent voltage/frequency scaling (SAGV) (default: true)");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index e674de29f92c8..47a05c4a8e898 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -52,10 +52,7 @@ struct drm_printer;
 	param(int, panel_use_ssc, -1, 0600) \
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
-	param(int, enable_psr, -1, 0600) \
 	param(bool, enable_dpt, true, 0400) \
-	param(bool, psr_safest_params, false, 0400) \
-	param(bool, enable_psr2_sel_fetch, true, 0400) \
 	param(bool, enable_sagv, true, 0600) \
 	param(int, disable_power_well, -1, 0400) \
 	param(int, enable_ips, 1, 0600) \
-- 
GitLab


From 29292bc6cc3785d3da6b733a413e387282664f71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:51 +0300
Subject: [PATCH 0014/2340] drm/i915/display: Move vbt_firmware module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-6-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_opregion.c       | 2 +-
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 0e86d0d10bfa1..d00ea5cda4329 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -27,6 +27,9 @@ static struct intel_display_params intel_display_modparams __read_mostly = {
  * debugfs mode to 0.
  */
 
+intel_display_param_named_unsafe(vbt_firmware, charp, 0400,
+	"Load VBT from specified file under /lib/firmware");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 45a4d38d13e7d..e51e8a1960ebb 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -24,6 +24,7 @@ struct drm_i915_private;
  *       debugfs file
  */
 #define INTEL_DISPLAY_PARAMS_FOR_EACH(param) \
+	param(char *, vbt_firmware, NULL, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
index 84078fb82b2f7..1ce785db6a5e1 100644
--- a/drivers/gpu/drm/i915/display/intel_opregion.c
+++ b/drivers/gpu/drm/i915/display/intel_opregion.c
@@ -841,7 +841,7 @@ static int intel_load_vbt_firmware(struct drm_i915_private *dev_priv)
 {
 	struct intel_opregion *opregion = &dev_priv->display.opregion;
 	const struct firmware *fw = NULL;
-	const char *name = dev_priv->params.vbt_firmware;
+	const char *name = dev_priv->display.params.vbt_firmware;
 	int ret;
 
 	if (!name || !*name)
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index c65e3314ae489..9d0535d774c93 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -87,9 +87,6 @@ i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400,
 i915_param_named_unsafe(reset, uint, 0400,
 	"Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])");
 
-i915_param_named_unsafe(vbt_firmware, charp, 0400,
-	"Load VBT from specified file under /lib/firmware");
-
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 i915_param_named(error_capture, bool, 0400,
 	"Record the GPU state following a hang. "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 47a05c4a8e898..37a1d31a233c1 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -46,7 +46,6 @@ struct drm_printer;
  *       debugfs file
  */
 #define I915_PARAMS_FOR_EACH(param) \
-	param(char *, vbt_firmware, NULL, 0400) \
 	param(int, modeset, -1, 0400) \
 	param(int, lvds_channel_mode, 0, 0400) \
 	param(int, panel_use_ssc, -1, 0600) \
-- 
GitLab


From d541697e8043b7b5d8e1f39b1c046dc140406e82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:52 +0300
Subject: [PATCH 0015/2340] drm/i915/display: Move lvds_channel_mode module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-7-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_lvds.c           | 4 ++--
 drivers/gpu/drm/i915/i915_params.c                  | 4 ----
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index d00ea5cda4329..91e2b5056d636 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -30,6 +30,10 @@ static struct intel_display_params intel_display_modparams __read_mostly = {
 intel_display_param_named_unsafe(vbt_firmware, charp, 0400,
 	"Load VBT from specified file under /lib/firmware");
 
+intel_display_param_named_unsafe(lvds_channel_mode, int, 0400,
+	 "Specify LVDS channel mode "
+	 "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index e51e8a1960ebb..09bd6614b8133 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -25,6 +25,7 @@ struct drm_i915_private;
  */
 #define INTEL_DISPLAY_PARAMS_FOR_EACH(param) \
 	param(char *, vbt_firmware, NULL, 0400) \
+	param(int, lvds_channel_mode, 0, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 2a4ca7e65775e..4b114fde57b18 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -794,8 +794,8 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder)
 	unsigned int val;
 
 	/* use the module option value if specified */
-	if (i915->params.lvds_channel_mode > 0)
-		return i915->params.lvds_channel_mode == 2;
+	if (i915->display.params.lvds_channel_mode > 0)
+		return i915->display.params.lvds_channel_mode == 2;
 
 	/* single channel LVDS is limited to 112 MHz */
 	if (fixed_mode->clock > 112999)
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 9d0535d774c93..ea55cc2c48547 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -72,10 +72,6 @@ i915_param_named_unsafe(enable_dc, int, 0400,
 	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
 	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
 
-i915_param_named_unsafe(lvds_channel_mode, int, 0400,
-	 "Specify LVDS channel mode "
-	 "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)");
-
 i915_param_named_unsafe(panel_use_ssc, int, 0400,
 	"Use Spread Spectrum Clock with panels [LVDS/eDP] "
 	"(default: auto from VBT)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 37a1d31a233c1..03ec2c2b589d8 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, lvds_channel_mode, 0, 0400) \
 	param(int, panel_use_ssc, -1, 0600) \
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
-- 
GitLab


From 94232d1637c5675f19a434e5118d0d6718ee310a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:53 +0300
Subject: [PATCH 0016/2340] drm/i915/display: Move panel_use_ssc module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-8-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_panel.c          | 4 ++--
 drivers/gpu/drm/i915/i915_params.c                  | 4 ----
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 91e2b5056d636..0813adfa56513 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -34,6 +34,10 @@ intel_display_param_named_unsafe(lvds_channel_mode, int, 0400,
 	 "Specify LVDS channel mode "
 	 "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)");
 
+intel_display_param_named_unsafe(panel_use_ssc, int, 0400,
+	"Use Spread Spectrum Clock with panels [LVDS/eDP] "
+	"(default: auto from VBT)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 09bd6614b8133..ceed4af192c9d 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -26,6 +26,7 @@ struct drm_i915_private;
 #define INTEL_DISPLAY_PARAMS_FOR_EACH(param) \
 	param(char *, vbt_firmware, NULL, 0400) \
 	param(int, lvds_channel_mode, 0, 0400) \
+	param(int, panel_use_ssc, -1, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
index 483beedac5b83..0d8e5320a4f88 100644
--- a/drivers/gpu/drm/i915/display/intel_panel.c
+++ b/drivers/gpu/drm/i915/display/intel_panel.c
@@ -46,8 +46,8 @@
 
 bool intel_panel_use_ssc(struct drm_i915_private *i915)
 {
-	if (i915->params.panel_use_ssc >= 0)
-		return i915->params.panel_use_ssc != 0;
+	if (i915->display.params.panel_use_ssc >= 0)
+		return i915->display.params.panel_use_ssc != 0;
 	return i915->display.vbt.lvds_use_ssc &&
 		!intel_has_quirk(i915, QUIRK_LVDS_SSC_DISABLE);
 }
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index ea55cc2c48547..4123424b2c2e2 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -72,10 +72,6 @@ i915_param_named_unsafe(enable_dc, int, 0400,
 	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
 	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
 
-i915_param_named_unsafe(panel_use_ssc, int, 0400,
-	"Use Spread Spectrum Clock with panels [LVDS/eDP] "
-	"(default: auto from VBT)");
-
 i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400,
 	"Override/Ignore selection of SDVO panel mode in the VBT "
 	"(-2=ignore, -1=auto [default], index in VBT BIOS table)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 03ec2c2b589d8..0bd365889e73e 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, panel_use_ssc, -1, 0600) \
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
 	param(bool, enable_dpt, true, 0400) \
-- 
GitLab


From 5fb2e673c76d27436b02cef6c6f9669e106c1b1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:54 +0300
Subject: [PATCH 0017/2340] drm/i915/display: Move vbt_sdvo_panel_type module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-9-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_bios.c           | 2 +-
 drivers/gpu/drm/i915/display/intel_display_params.c | 4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 4 ----
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 4e8f1e91bb089..70c0491aac422 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1116,7 +1116,7 @@ parse_sdvo_panel_data(struct drm_i915_private *i915,
 	struct drm_display_mode *panel_fixed_mode;
 	int index;
 
-	index = i915->params.vbt_sdvo_panel_type;
+	index = i915->display.params.vbt_sdvo_panel_type;
 	if (index == -2) {
 		drm_dbg_kms(&i915->drm,
 			    "Ignore SDVO panel mode from BIOS VBT tables.\n");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 0813adfa56513..d11c74ba001ea 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -38,6 +38,10 @@ intel_display_param_named_unsafe(panel_use_ssc, int, 0400,
 	"Use Spread Spectrum Clock with panels [LVDS/eDP] "
 	"(default: auto from VBT)");
 
+intel_display_param_named_unsafe(vbt_sdvo_panel_type, int, 0400,
+	"Override/Ignore selection of SDVO panel mode in the VBT "
+	"(-2=ignore, -1=auto [default], index in VBT BIOS table)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index ceed4af192c9d..03dea0ba81d12 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -27,6 +27,7 @@ struct drm_i915_private;
 	param(char *, vbt_firmware, NULL, 0400) \
 	param(int, lvds_channel_mode, 0, 0400) \
 	param(int, panel_use_ssc, -1, 0600) \
+	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 4123424b2c2e2..d0abcbd526a76 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -72,10 +72,6 @@ i915_param_named_unsafe(enable_dc, int, 0400,
 	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
 	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
 
-i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400,
-	"Override/Ignore selection of SDVO panel mode in the VBT "
-	"(-2=ignore, -1=auto [default], index in VBT BIOS table)");
-
 i915_param_named_unsafe(reset, uint, 0400,
 	"Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 0bd365889e73e..1ea332dfbb5d1 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
 	param(bool, enable_dpt, true, 0400) \
 	param(bool, enable_sagv, true, 0600) \
-- 
GitLab


From 0deee706f116778429d03131efb7d29273442d9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:55 +0300
Subject: [PATCH 0018/2340] drm/i915/display: Move enable_dc module parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-10-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 5 +++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_display_power.c  | 2 +-
 drivers/gpu/drm/i915/i915_params.c                  | 5 -----
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index d11c74ba001ea..60fa0bbce77d3 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -42,6 +42,11 @@ intel_display_param_named_unsafe(vbt_sdvo_panel_type, int, 0400,
 	"Override/Ignore selection of SDVO panel mode in the VBT "
 	"(-2=ignore, -1=auto [default], index in VBT BIOS table)");
 
+intel_display_param_named_unsafe(enable_dc, int, 0400,
+	"Enable power-saving display C-states. "
+	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
+	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 03dea0ba81d12..2cca06ad7d71b 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -28,6 +28,7 @@ struct drm_i915_private;
 	param(int, lvds_channel_mode, 0, 0400) \
 	param(int, panel_use_ssc, -1, 0600) \
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
+	param(int, enable_dc, -1, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index e25785ae1c20c..4832eb8da080e 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -1020,7 +1020,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
 		sanitize_disable_power_well_option(dev_priv,
 						   dev_priv->params.disable_power_well);
 	power_domains->allowed_dc_mask =
-		get_allowed_dc_mask(dev_priv, dev_priv->params.enable_dc);
+		get_allowed_dc_mask(dev_priv, dev_priv->display.params.enable_dc);
 
 	power_domains->target_dc_state =
 		sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index d0abcbd526a76..3d370e43df3cd 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -67,11 +67,6 @@ i915_param_named(modeset, int, 0400,
 	"Use kernel modesetting [KMS] (0=disable, "
 	"1=on, -1=force vga console preference [default])");
 
-i915_param_named_unsafe(enable_dc, int, 0400,
-	"Enable power-saving display C-states. "
-	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
-	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
-
 i915_param_named_unsafe(reset, uint, 0400,
 	"Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 1ea332dfbb5d1..c3487b9d69378 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, enable_dc, -1, 0400) \
 	param(bool, enable_dpt, true, 0400) \
 	param(bool, enable_sagv, true, 0600) \
 	param(int, disable_power_well, -1, 0400) \
-- 
GitLab


From 04da42b4cc9429d8fff854d144f80396cbdecb46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:56 +0300
Subject: [PATCH 0019/2340] drm/i915/display: Move enable_dpt module parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-11-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_dpt.c            | 6 ++++--
 drivers/gpu/drm/i915/display/intel_fb.c             | 2 +-
 drivers/gpu/drm/i915/display/skl_universal_plane.c  | 2 +-
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 7 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 60fa0bbce77d3..9067ffd6d2a90 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -47,6 +47,9 @@ intel_display_param_named_unsafe(enable_dc, int, 0400,
 	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; "
 	"3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)");
 
+intel_display_param_named_unsafe(enable_dpt, bool, 0400,
+	"Enable display page table (DPT) (default: true)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 2cca06ad7d71b..e5f139239f230 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -29,6 +29,7 @@ struct drm_i915_private;
 	param(int, panel_use_ssc, -1, 0600) \
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
+	param(bool, enable_dpt, true, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 48582b31b7f7f..2b067cb952f00 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -332,11 +332,13 @@ void intel_dpt_configure(struct intel_crtc *crtc)
 
 			intel_de_rmw(i915, PLANE_CHICKEN(pipe, plane_id),
 				     PLANE_CHICKEN_DISABLE_DPT,
-				     i915->params.enable_dpt ? 0 : PLANE_CHICKEN_DISABLE_DPT);
+				     i915->display.params.enable_dpt ? 0 :
+				     PLANE_CHICKEN_DISABLE_DPT);
 		}
 	} else if (DISPLAY_VER(i915) == 13) {
 		intel_de_rmw(i915, CHICKEN_MISC_2,
 			     CHICKEN_MISC_DISABLE_DPT,
-			     i915->params.enable_dpt ? 0 : CHICKEN_MISC_DISABLE_DPT);
+			     i915->display.params.enable_dpt ? 0 :
+			     CHICKEN_MISC_DISABLE_DPT);
 	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index e1d298efc510b..e1db7a3afd1ac 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -764,7 +764,7 @@ bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb)
 {
-	return fb && to_i915(fb->dev)->params.enable_dpt &&
+	return fb && to_i915(fb->dev)->display.params.enable_dpt &&
 		intel_fb_modifier_uses_dpt(to_i915(fb->dev), fb->modifier);
 }
 
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 49e9d40d5e677..68035675ae3ca 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -2488,7 +2488,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc,
 		goto error;
 	}
 
-	if (!dev_priv->params.enable_dpt &&
+	if (!dev_priv->display.params.enable_dpt &&
 	    intel_fb_modifier_uses_dpt(dev_priv, fb->modifier)) {
 		drm_dbg_kms(&dev_priv->drm, "DPT disabled, skipping initial FB\n");
 		goto error;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 3d370e43df3cd..773a0a709fc6e 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -95,9 +95,6 @@ i915_param_named_unsafe(disable_power_well, int, 0400,
 
 i915_param_named_unsafe(enable_ips, int, 0400, "Enable IPS (default: true)");
 
-i915_param_named_unsafe(enable_dpt, bool, 0400,
-	"Enable display page table (DPT) (default: true)");
-
 i915_param_named_unsafe(load_detect_test, bool, 0400,
 	"Force-enable the VGA load detect code for testing (default:false). "
 	"For developers only.");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c3487b9d69378..b8728990cb8bc 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(bool, enable_dpt, true, 0400) \
 	param(bool, enable_sagv, true, 0600) \
 	param(int, disable_power_well, -1, 0400) \
 	param(int, enable_ips, 1, 0600) \
-- 
GitLab


From 5067ec645ece12421d802e0dd9510e89122efcc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:57 +0300
Subject: [PATCH 0020/2340] drm/i915/display: Move enable_sagv module parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move enable_sagv module parameter under display and change the parameter
permissions to non-writblase (0400)

v2: Change permissions to 0400

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-12-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/skl_watermark.c        | 5 +++--
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 9067ffd6d2a90..a98df2afc9960 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -50,6 +50,9 @@ intel_display_param_named_unsafe(enable_dc, int, 0400,
 intel_display_param_named_unsafe(enable_dpt, bool, 0400,
 	"Enable display page table (DPT) (default: true)");
 
+intel_display_param_named_unsafe(enable_sagv, bool, 0400,
+	"Enable system agent voltage/frequency scaling (SAGV) (default: true)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index e5f139239f230..0a7ac416403ad 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -30,6 +30,7 @@ struct drm_i915_private;
 	param(int, vbt_sdvo_panel_type, -1, 0400) \
 	param(int, enable_dc, -1, 0400) \
 	param(bool, enable_dpt, true, 0400) \
+	param(bool, enable_sagv, true, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c
index 99b8ccdc3dfa2..56588d6e24ae6 100644
--- a/drivers/gpu/drm/i915/display/skl_watermark.c
+++ b/drivers/gpu/drm/i915/display/skl_watermark.c
@@ -412,7 +412,7 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
-	if (!i915->params.enable_sagv)
+	if (!i915->display.params.enable_sagv)
 		return false;
 
 	if (DISPLAY_VER(i915) >= 12)
@@ -3702,7 +3702,8 @@ static int intel_sagv_status_show(struct seq_file *m, void *unused)
 	};
 
 	seq_printf(m, "SAGV available: %s\n", str_yes_no(intel_has_sagv(i915)));
-	seq_printf(m, "SAGV modparam: %s\n", str_enabled_disabled(i915->params.enable_sagv));
+	seq_printf(m, "SAGV modparam: %s\n",
+		   str_enabled_disabled(i915->display.params.enable_sagv));
 	seq_printf(m, "SAGV status: %s\n", sagv_status[i915->display.sagv.status]);
 	seq_printf(m, "SAGV block time: %d usec\n", i915->display.sagv.block_time_us);
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 773a0a709fc6e..51e706f6e57e6 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -82,9 +82,6 @@ i915_param_named_unsafe(enable_hangcheck, bool, 0400,
 	"WARNING: Disabling this can cause system wide hangs. "
 	"(default: true)");
 
-i915_param_named_unsafe(enable_sagv, bool, 0600,
-	"Enable system agent voltage/frequency scaling (SAGV) (default: true)");
-
 i915_param_named_unsafe(force_probe, charp, 0400,
 	"Force probe options for specified supported devices. "
 	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index b8728990cb8bc..066f157835802 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(bool, enable_sagv, true, 0600) \
 	param(int, disable_power_well, -1, 0400) \
 	param(int, enable_ips, 1, 0600) \
 	param(int, invert_brightness, 0, 0600) \
-- 
GitLab


From bfcda58ba1555ac0596d851ae6d748cdebff1af7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:58 +0300
Subject: [PATCH 0021/2340] drm/i915/display: Move disable_power_well module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-13-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c |  4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h |  1 +
 drivers/gpu/drm/i915/display/intel_display_power.c  | 12 ++++++------
 drivers/gpu/drm/i915/i915_params.c                  |  4 ----
 drivers/gpu/drm/i915/i915_params.h                  |  1 -
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index a98df2afc9960..79a212dded80b 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -53,6 +53,10 @@ intel_display_param_named_unsafe(enable_dpt, bool, 0400,
 intel_display_param_named_unsafe(enable_sagv, bool, 0400,
 	"Enable system agent voltage/frequency scaling (SAGV) (default: true)");
 
+intel_display_param_named_unsafe(disable_power_well, int, 0400,
+	"Disable display power wells when possible "
+	"(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 0a7ac416403ad..aadbef664965d 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -31,6 +31,7 @@ struct drm_i915_private;
 	param(int, enable_dc, -1, 0400) \
 	param(bool, enable_dpt, true, 0400) \
 	param(bool, enable_sagv, true, 0600) \
+	param(int, disable_power_well, -1, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 4832eb8da080e..e390595d73416 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -967,7 +967,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
 		DISPLAY_VER(dev_priv) >= 11 ?
 	       DC_STATE_EN_DC9 : 0;
 
-	if (!dev_priv->params.disable_power_well)
+	if (!dev_priv->display.params.disable_power_well)
 		max_dc = 0;
 
 	if (enable_dc >= 0 && enable_dc <= max_dc) {
@@ -1016,9 +1016,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_domains *power_domains = &dev_priv->display.power.domains;
 
-	dev_priv->params.disable_power_well =
+	dev_priv->display.params.disable_power_well =
 		sanitize_disable_power_well_option(dev_priv,
-						   dev_priv->params.disable_power_well);
+						   dev_priv->display.params.disable_power_well);
 	power_domains->allowed_dc_mask =
 		get_allowed_dc_mask(dev_priv, dev_priv->display.params.enable_dc);
 
@@ -1950,7 +1950,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
 		intel_display_power_get(i915, POWER_DOMAIN_INIT);
 
 	/* Disable power support if the user asked so. */
-	if (!i915->params.disable_power_well) {
+	if (!i915->display.params.disable_power_well) {
 		drm_WARN_ON(&i915->drm, power_domains->disable_wakeref);
 		i915->display.power.domains.disable_wakeref = intel_display_power_get(i915,
 										      POWER_DOMAIN_INIT);
@@ -1977,7 +1977,7 @@ void intel_power_domains_driver_remove(struct drm_i915_private *i915)
 		fetch_and_zero(&i915->display.power.domains.init_wakeref);
 
 	/* Remove the refcount we took to keep power well support disabled. */
-	if (!i915->params.disable_power_well)
+	if (!i915->display.params.disable_power_well)
 		intel_display_power_put(i915, POWER_DOMAIN_INIT,
 					fetch_and_zero(&i915->display.power.domains.disable_wakeref));
 
@@ -2096,7 +2096,7 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, bool s2idle)
 	 * Even if power well support was disabled we still want to disable
 	 * power wells if power domains must be deinitialized for suspend.
 	 */
-	if (!i915->params.disable_power_well)
+	if (!i915->display.params.disable_power_well)
 		intel_display_power_put(i915, POWER_DOMAIN_INIT,
 					fetch_and_zero(&i915->display.power.domains.disable_wakeref));
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 51e706f6e57e6..eab02f71a4e55 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -86,10 +86,6 @@ i915_param_named_unsafe(force_probe, charp, 0400,
 	"Force probe options for specified supported devices. "
 	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
 
-i915_param_named_unsafe(disable_power_well, int, 0400,
-	"Disable display power wells when possible "
-	"(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)");
-
 i915_param_named_unsafe(enable_ips, int, 0400, "Enable IPS (default: true)");
 
 i915_param_named_unsafe(load_detect_test, bool, 0400,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 066f157835802..060464df03c23 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, disable_power_well, -1, 0400) \
 	param(int, enable_ips, 1, 0600) \
 	param(int, invert_brightness, 0, 0600) \
 	param(int, enable_guc, -1, 0400) \
-- 
GitLab


From c39fc2aca32a93d88e4e90ec6f2148b3491ad88f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:40:59 +0300
Subject: [PATCH 0022/2340] drm/i915/display: Move enable_ips module parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move enable_ips module parameter under display and change it as boolean.

v2:
  - Change enable_ip as boolean
  - Fix copy paste error (i915_param -> intel_display_param)

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-14-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/hsw_ips.c              | 4 ++--
 drivers/gpu/drm/i915/display/intel_display_params.c | 2 ++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 2 --
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c
index 7dc38ac020927..611a7d6ef80c2 100644
--- a/drivers/gpu/drm/i915/display/hsw_ips.c
+++ b/drivers/gpu/drm/i915/display/hsw_ips.c
@@ -193,7 +193,7 @@ bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state)
 	if (!hsw_crtc_supports_ips(crtc))
 		return false;
 
-	if (!i915->params.enable_ips)
+	if (!i915->display.params.enable_ips)
 		return false;
 
 	if (crtc_state->pipe_bpp > 24)
@@ -329,7 +329,7 @@ static int hsw_ips_debugfs_status_show(struct seq_file *m, void *unused)
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	seq_printf(m, "Enabled by kernel parameter: %s\n",
-		   str_yes_no(i915->params.enable_ips));
+		   str_yes_no(i915->display.params.enable_ips));
 
 	if (DISPLAY_VER(i915) >= 8) {
 		seq_puts(m, "Currently: unknown\n");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 79a212dded80b..ce991ad200061 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -57,6 +57,8 @@ intel_display_param_named_unsafe(disable_power_well, int, 0400,
 	"Disable display power wells when possible "
 	"(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)");
 
+intel_display_param_named_unsafe(enable_ips, bool, 0400, "Enable IPS (default: true)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index aadbef664965d..8d51488e3b3e6 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -32,6 +32,7 @@ struct drm_i915_private;
 	param(bool, enable_dpt, true, 0400) \
 	param(bool, enable_sagv, true, 0600) \
 	param(int, disable_power_well, -1, 0400) \
+	param(bool, enable_ips, true, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index eab02f71a4e55..54dcce97da2af 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -86,8 +86,6 @@ i915_param_named_unsafe(force_probe, charp, 0400,
 	"Force probe options for specified supported devices. "
 	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
 
-i915_param_named_unsafe(enable_ips, int, 0400, "Enable IPS (default: true)");
-
 i915_param_named_unsafe(load_detect_test, bool, 0400,
 	"Force-enable the VGA load detect code for testing (default:false). "
 	"For developers only.");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 060464df03c23..18bb8a93e0e85 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, enable_ips, 1, 0600) \
 	param(int, invert_brightness, 0, 0600) \
 	param(int, enable_guc, -1, 0400) \
 	param(int, guc_log_level, -1, 0400) \
-- 
GitLab


From 5234105ea8ad0c2655b2cac398c3ae564528eff1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:00 +0300
Subject: [PATCH 0023/2340] drm/i915/display: Move invert_brightness module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-15-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_backlight.c      | 9 +++++----
 drivers/gpu/drm/i915/display/intel_display_params.c | 7 +++++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 7 -------
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index 2e8f17c045222..612d4cd9dacba 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -88,10 +88,10 @@ u32 intel_backlight_invert_pwm_level(struct intel_connector *connector, u32 val)
 
 	drm_WARN_ON(&i915->drm, panel->backlight.pwm_level_max == 0);
 
-	if (i915->params.invert_brightness < 0)
+	if (i915->display.params.invert_brightness < 0)
 		return val;
 
-	if (i915->params.invert_brightness > 0 ||
+	if (i915->display.params.invert_brightness > 0 ||
 	    intel_has_quirk(i915, QUIRK_INVERT_BRIGHTNESS)) {
 		return panel->backlight.pwm_level_max - val + panel->backlight.pwm_level_min;
 	}
@@ -132,8 +132,9 @@ u32 intel_backlight_level_from_pwm(struct intel_connector *connector, u32 val)
 	drm_WARN_ON_ONCE(&i915->drm,
 			 panel->backlight.max == 0 || panel->backlight.pwm_level_max == 0);
 
-	if (i915->params.invert_brightness > 0 ||
-	    (i915->params.invert_brightness == 0 && intel_has_quirk(i915, QUIRK_INVERT_BRIGHTNESS)))
+	if (i915->display.params.invert_brightness > 0 ||
+	    (i915->display.params.invert_brightness == 0 &&
+	     intel_has_quirk(i915, QUIRK_INVERT_BRIGHTNESS)))
 		val = panel->backlight.pwm_level_max - (val - panel->backlight.pwm_level_min);
 
 	return scale(val, panel->backlight.pwm_level_min, panel->backlight.pwm_level_max,
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index ce991ad200061..2d721afaba302 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -59,6 +59,13 @@ intel_display_param_named_unsafe(disable_power_well, int, 0400,
 
 intel_display_param_named_unsafe(enable_ips, bool, 0400, "Enable IPS (default: true)");
 
+intel_display_param_named_unsafe(invert_brightness, int, 0400,
+	"Invert backlight brightness "
+	"(-1 force normal, 0 machine defaults, 1 force inversion), please "
+	"report PCI device ID, subsystem vendor and subsystem device ID "
+	"to dri-devel@lists.freedesktop.org, if your machine needs it. "
+	"It will then be included in an upcoming module version.");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 8d51488e3b3e6..4723b45224134 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -33,6 +33,7 @@ struct drm_i915_private;
 	param(bool, enable_sagv, true, 0600) \
 	param(int, disable_power_well, -1, 0400) \
 	param(bool, enable_ips, true, 0600) \
+	param(int, invert_brightness, 0, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 54dcce97da2af..423fe54484e10 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -94,13 +94,6 @@ i915_param_named_unsafe(force_reset_modeset_test, bool, 0400,
 	"Force a modeset during gpu reset for testing (default:false). "
 	"For developers only.");
 
-i915_param_named_unsafe(invert_brightness, int, 0400,
-	"Invert backlight brightness "
-	"(-1 force normal, 0 machine defaults, 1 force inversion), please "
-	"report PCI device ID, subsystem vendor and subsystem device ID "
-	"to dri-devel@lists.freedesktop.org, if your machine needs it. "
-	"It will then be included in an upcoming module version.");
-
 i915_param_named(disable_display, bool, 0400,
 	"Disable display (default: false)");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 18bb8a93e0e85..ae0873443a655 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,6 @@ struct drm_printer;
  */
 #define I915_PARAMS_FOR_EACH(param) \
 	param(int, modeset, -1, 0400) \
-	param(int, invert_brightness, 0, 0600) \
 	param(int, enable_guc, -1, 0400) \
 	param(int, guc_log_level, -1, 0400) \
 	param(char *, guc_firmware_path, NULL, 0400) \
-- 
GitLab


From 87706a67ad57725470a0512d26ea2aaca700e2d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:01 +0300
Subject: [PATCH 0024/2340] drm/i915/display: Move edp_vswing module parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-16-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_bios.c           | 4 ++--
 drivers/gpu/drm/i915/display/intel_display_params.c | 6 ++++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 6 ------
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 70c0491aac422..69db1a3a1499e 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1514,9 +1514,9 @@ parse_edp(struct drm_i915_private *i915,
 		u8 vswing;
 
 		/* Don't read from VBT if module parameter has valid value*/
-		if (i915->params.edp_vswing) {
+		if (i915->display.params.edp_vswing) {
 			panel->vbt.edp.low_vswing =
-				i915->params.edp_vswing == 1;
+				i915->display.params.edp_vswing == 1;
 		} else {
 			vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF;
 			panel->vbt.edp.low_vswing = vswing == 0;
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 2d721afaba302..70a4752235129 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -66,6 +66,12 @@ intel_display_param_named_unsafe(invert_brightness, int, 0400,
 	"to dri-devel@lists.freedesktop.org, if your machine needs it. "
 	"It will then be included in an upcoming module version.");
 
+/* WA to get away with the default setting in VBT for early platforms.Will be removed */
+intel_display_param_named_unsafe(edp_vswing, int, 0400,
+	"Ignore/Override vswing pre-emph table selection from VBT "
+	"(0=use value from vbt [default], 1=low power swing(200mV),"
+	"2=default swing(400mV))");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 4723b45224134..10c79ce230630 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -34,6 +34,7 @@ struct drm_i915_private;
 	param(int, disable_power_well, -1, 0400) \
 	param(bool, enable_ips, true, 0600) \
 	param(int, invert_brightness, 0, 0600) \
+	param(int, edp_vswing, 0, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 423fe54484e10..6b9df9f9d8427 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -111,12 +111,6 @@ i915_param_named(verbose_state_checks, bool, 0600,
 i915_param_named_unsafe(nuclear_pageflip, bool, 0400,
 	"Force enable atomic functionality on platforms that don't have full support yet.");
 
-/* WA to get away with the default setting in VBT for early platforms.Will be removed */
-i915_param_named_unsafe(edp_vswing, int, 0400,
-	"Ignore/Override vswing pre-emph table selection from VBT "
-	"(0=use value from vbt [default], 1=low power swing(200mV),"
-	"2=default swing(400mV))");
-
 i915_param_named_unsafe(enable_guc, int, 0400,
 	"Enable GuC load for GuC submission and/or HuC load. "
 	"Required functionality can be selected using bitmask values. "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index ae0873443a655..c33edaee50321 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -55,7 +55,6 @@ struct drm_printer;
 	param(char *, gsc_firmware_path, NULL, 0400) \
 	param(bool, memtest, false, 0400) \
 	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \
-	param(int, edp_vswing, 0, 0400) \
 	param(unsigned int, reset, 3, 0600) \
 	param(unsigned int, inject_probe_failure, 0, 0) \
 	param(int, enable_dpcd_backlight, -1, 0600) \
-- 
GitLab


From 5621e0652dc9eeb2be2f7784ceca50ddce1ff025 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:02 +0300
Subject: [PATCH 0025/2340] drm/i915/display: Move enable_dpcd_backlight module
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-17-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c   | 4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h   | 1 +
 drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 4 ++--
 drivers/gpu/drm/i915/i915_params.c                    | 4 ----
 drivers/gpu/drm/i915/i915_params.h                    | 1 -
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 70a4752235129..190ca12ce64b6 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -72,6 +72,10 @@ intel_display_param_named_unsafe(edp_vswing, int, 0400,
 	"(0=use value from vbt [default], 1=low power swing(200mV),"
 	"2=default swing(400mV))");
 
+intel_display_param_named(enable_dpcd_backlight, int, 0400,
+	"Enable support for DPCD backlight control"
+	"(-1=use per-VBT LFP backlight type setting [default], 0=disabled, 1=enable, 2=force VESA interface, 3=force Intel interface)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 10c79ce230630..85d1128954e95 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -35,6 +35,7 @@ struct drm_i915_private;
 	param(bool, enable_ips, true, 0600) \
 	param(int, invert_brightness, 0, 0600) \
 	param(int, edp_vswing, 0, 0400) \
+	param(int, enable_dpcd_backlight, -1, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 26ea7e9f1b899..4f58efdc688af 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -146,7 +146,7 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
 	 * HDR static metadata we need to start maintaining table of
 	 * ranges for such panels.
 	 */
-	if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL &&
+	if (i915->display.params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL &&
 	    !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type &
 	      BIT(HDMI_STATIC_METADATA_TYPE1))) {
 		drm_info(&i915->drm,
@@ -489,7 +489,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector)
 	/* Check the VBT and user's module parameters to figure out which
 	 * interfaces to probe
 	 */
-	switch (i915->params.enable_dpcd_backlight) {
+	switch (i915->display.params.enable_dpcd_backlight) {
 	case INTEL_DP_AUX_BACKLIGHT_OFF:
 		return -ENODEV;
 	case INTEL_DP_AUX_BACKLIGHT_AUTO:
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 6b9df9f9d8427..e15cd8491c7f0 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -140,10 +140,6 @@ i915_param_named_unsafe(inject_probe_failure, uint, 0400,
 	"Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)");
 #endif
 
-i915_param_named(enable_dpcd_backlight, int, 0400,
-	"Enable support for DPCD backlight control"
-	"(-1=use per-VBT LFP backlight type setting [default], 0=disabled, 1=enable, 2=force VESA interface, 3=force Intel interface)");
-
 #if IS_ENABLED(CONFIG_DRM_I915_GVT)
 i915_param_named(enable_gvt, bool, 0400,
 	"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c33edaee50321..8169234338b1f 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -57,7 +57,6 @@ struct drm_printer;
 	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \
 	param(unsigned int, reset, 3, 0600) \
 	param(unsigned int, inject_probe_failure, 0, 0) \
-	param(int, enable_dpcd_backlight, -1, 0600) \
 	param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
 	param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \
 	param(unsigned int, lmem_size, 0, 0400) \
-- 
GitLab


From 98a4784e201c22b1bab08b602ccfbe02d9108bec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:03 +0300
Subject: [PATCH 0026/2340] drm/i915/display: Move load_detect_test parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-18-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_crt.c            | 4 ++--
 drivers/gpu/drm/i915/display/intel_display_params.c | 4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 4 ----
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 913e5d230a4df..0e33a0523a759 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -841,7 +841,7 @@ intel_crt_detect(struct drm_connector *connector,
 	if (!intel_display_device_enabled(dev_priv))
 		return connector_status_disconnected;
 
-	if (dev_priv->params.load_detect_test) {
+	if (dev_priv->display.params.load_detect_test) {
 		wakeref = intel_display_power_get(dev_priv,
 						  intel_encoder->power_domain);
 		goto load_detect;
@@ -901,7 +901,7 @@ load_detect:
 		else if (DISPLAY_VER(dev_priv) < 4)
 			status = intel_crt_load_detect(crt,
 				to_intel_crtc(connector->state->crtc)->pipe);
-		else if (dev_priv->params.load_detect_test)
+		else if (dev_priv->display.params.load_detect_test)
 			status = connector_status_disconnected;
 		else
 			status = connector_status_unknown;
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 190ca12ce64b6..16a30f7b96b47 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -76,6 +76,10 @@ intel_display_param_named(enable_dpcd_backlight, int, 0400,
 	"Enable support for DPCD backlight control"
 	"(-1=use per-VBT LFP backlight type setting [default], 0=disabled, 1=enable, 2=force VESA interface, 3=force Intel interface)");
 
+intel_display_param_named_unsafe(load_detect_test, bool, 0400,
+	"Force-enable the VGA load detect code for testing (default:false). "
+	"For developers only.");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 85d1128954e95..558eecaa520ab 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -36,6 +36,7 @@ struct drm_i915_private;
 	param(int, invert_brightness, 0, 0600) \
 	param(int, edp_vswing, 0, 0400) \
 	param(int, enable_dpcd_backlight, -1, 0600) \
+	param(bool, load_detect_test, false, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index e15cd8491c7f0..cb56973a2394f 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -86,10 +86,6 @@ i915_param_named_unsafe(force_probe, charp, 0400,
 	"Force probe options for specified supported devices. "
 	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
 
-i915_param_named_unsafe(load_detect_test, bool, 0400,
-	"Force-enable the VGA load detect code for testing (default:false). "
-	"For developers only.");
-
 i915_param_named_unsafe(force_reset_modeset_test, bool, 0400,
 	"Force a modeset during gpu reset for testing (default:false). "
 	"For developers only.");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 8169234338b1f..cf5448bbc0876 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -63,7 +63,6 @@ struct drm_printer;
 	param(unsigned int, lmem_bar_size, 0, 0400) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, enable_hangcheck, true, 0600) \
-	param(bool, load_detect_test, false, 0600) \
 	param(bool, force_reset_modeset_test, false, 0600) \
 	param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
 	param(bool, disable_display, false, 0400) \
-- 
GitLab


From 1f3f5eb3b084e91f223d548b0646e8adeeff0779 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:04 +0300
Subject: [PATCH 0027/2340] drm/i915/display: Move force_reset_modeset_test
 parameter under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-19-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 4 ++++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_display_reset.c  | 2 +-
 drivers/gpu/drm/i915/i915_params.c                  | 4 ----
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 16a30f7b96b47..a6f0dd9beb928 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -80,6 +80,10 @@ intel_display_param_named_unsafe(load_detect_test, bool, 0400,
 	"Force-enable the VGA load detect code for testing (default:false). "
 	"For developers only.");
 
+intel_display_param_named_unsafe(force_reset_modeset_test, bool, 0400,
+	"Force a modeset during gpu reset for testing (default:false). "
+	"For developers only.");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 558eecaa520ab..5f76dbd4b099b 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -37,6 +37,7 @@ struct drm_i915_private;
 	param(int, edp_vswing, 0, 0400) \
 	param(int, enable_dpcd_backlight, -1, 0600) \
 	param(bool, load_detect_test, false, 0600) \
+	param(bool, force_reset_modeset_test, false, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_display_reset.c b/drivers/gpu/drm/i915/display/intel_display_reset.c
index 17178d5d7788a..c2c347b22448a 100644
--- a/drivers/gpu/drm/i915/display/intel_display_reset.c
+++ b/drivers/gpu/drm/i915/display/intel_display_reset.c
@@ -29,7 +29,7 @@ void intel_display_reset_prepare(struct drm_i915_private *dev_priv)
 		return;
 
 	/* reset doesn't touch the display */
-	if (!dev_priv->params.force_reset_modeset_test &&
+	if (!dev_priv->display.params.force_reset_modeset_test &&
 	    !gpu_reset_clobbers_display(dev_priv))
 		return;
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index cb56973a2394f..497e39b1dcfb9 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -86,10 +86,6 @@ i915_param_named_unsafe(force_probe, charp, 0400,
 	"Force probe options for specified supported devices. "
 	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
 
-i915_param_named_unsafe(force_reset_modeset_test, bool, 0400,
-	"Force a modeset during gpu reset for testing (default:false). "
-	"For developers only.");
-
 i915_param_named(disable_display, bool, 0400,
 	"Disable display (default: false)");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index cf5448bbc0876..9f3188b674e00 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -63,7 +63,6 @@ struct drm_printer;
 	param(unsigned int, lmem_bar_size, 0, 0400) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, enable_hangcheck, true, 0600) \
-	param(bool, force_reset_modeset_test, false, 0600) \
 	param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
 	param(bool, disable_display, false, 0400) \
 	param(bool, verbose_state_checks, true, 0) \
-- 
GitLab


From d3e6d002ed203d8beb66cfdf7eed948ed963ef94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:05 +0300
Subject: [PATCH 0028/2340] drm/i915/display: Move disable_display parameter
 under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-20-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_device.c | 3 ++-
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c
index e80842d1e7c73..50841818fb59f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.c
+++ b/drivers/gpu/drm/i915/display/intel_display_device.c
@@ -1153,5 +1153,6 @@ bool intel_display_device_enabled(struct drm_i915_private *i915)
 	/* Only valid when HAS_DISPLAY() is true */
 	drm_WARN_ON(&i915->drm, !HAS_DISPLAY(i915));
 
-	return !i915->params.disable_display && !intel_opregion_headless_sku(i915);
+	return !i915->display.params.disable_display &&
+		!intel_opregion_headless_sku(i915);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index a6f0dd9beb928..3579fa1fc5b77 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -84,6 +84,9 @@ intel_display_param_named_unsafe(force_reset_modeset_test, bool, 0400,
 	"Force a modeset during gpu reset for testing (default:false). "
 	"For developers only.");
 
+intel_display_param_named(disable_display, bool, 0400,
+	"Disable display (default: false)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 5f76dbd4b099b..8c4e0f6ac3e35 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -38,6 +38,7 @@ struct drm_i915_private;
 	param(int, enable_dpcd_backlight, -1, 0600) \
 	param(bool, load_detect_test, false, 0600) \
 	param(bool, force_reset_modeset_test, false, 0600) \
+	param(bool, disable_display, false, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 497e39b1dcfb9..3205c6b626703 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -86,9 +86,6 @@ i915_param_named_unsafe(force_probe, charp, 0400,
 	"Force probe options for specified supported devices. "
 	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
 
-i915_param_named(disable_display, bool, 0400,
-	"Disable display (default: false)");
-
 i915_param_named(memtest, bool, 0400,
 	"Perform a read/write test of all device memory on module load (default: off)");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 9f3188b674e00..8bce7d0576346 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -64,7 +64,6 @@ struct drm_printer;
 	/* leave bools at the end to not create holes */ \
 	param(bool, enable_hangcheck, true, 0600) \
 	param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
-	param(bool, disable_display, false, 0400) \
 	param(bool, verbose_state_checks, true, 0) \
 	param(bool, nuclear_pageflip, false, 0400) \
 	param(bool, enable_dp_mst, true, 0600) \
-- 
GitLab


From 514bec3387426f42e88a49bf62f9b0f5eb528b9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:06 +0300
Subject: [PATCH 0029/2340] drm/i915/display: Use device parameters instead of
 module in I915_STATE_WARN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also make module parameter as non writable.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-21-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.h | 2 +-
 drivers/gpu/drm/i915/i915_params.c           | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 0e5dffe8f0189..ba3548f9768d1 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -552,7 +552,7 @@ bool assert_port_valid(struct drm_i915_private *i915, enum port port);
 	struct drm_device *drm = &(__i915)->drm;			\
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on))					\
-		if (!drm_WARN(drm, i915_modparams.verbose_state_checks, format)) \
+		if (!drm_WARN(drm, __i915->params.verbose_state_checks, format)) \
 			drm_err(drm, format);				\
 	unlikely(__ret_warn_on);					\
 })
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 3205c6b626703..4e8c088c69fd8 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -93,8 +93,7 @@ i915_param_named(mmio_debug, int, 0400,
 	"Enable the MMIO debug code for the first N failures (default: off). "
 	"This may negatively affect performance.");
 
-/* Special case writable file */
-i915_param_named(verbose_state_checks, bool, 0600,
+i915_param_named(verbose_state_checks, bool, 0400,
 	"Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions.");
 
 i915_param_named_unsafe(nuclear_pageflip, bool, 0400,
-- 
GitLab


From f2e71d2c6bbb9ebf3e3dfdf533ba2cab413842aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:07 +0300
Subject: [PATCH 0030/2340] drm/i915/display: Move verbose_state_checks under
 display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: Change device parameter permissions to 0400

Cc: Luca Coelho <luciano.coelho@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-22-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.h        | 2 +-
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index ba3548f9768d1..bc95fb3773867 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -552,7 +552,7 @@ bool assert_port_valid(struct drm_i915_private *i915, enum port port);
 	struct drm_device *drm = &(__i915)->drm;			\
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on))					\
-		if (!drm_WARN(drm, __i915->params.verbose_state_checks, format)) \
+		if (!drm_WARN(drm, __i915->display.params.verbose_state_checks, format)) \
 			drm_err(drm, format);				\
 	unlikely(__ret_warn_on);					\
 })
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index 3579fa1fc5b77..cae1449e9b064 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -87,6 +87,9 @@ intel_display_param_named_unsafe(force_reset_modeset_test, bool, 0400,
 intel_display_param_named(disable_display, bool, 0400,
 	"Disable display (default: false)");
 
+intel_display_param_named(verbose_state_checks, bool, 0400,
+	"Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions.");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 8c4e0f6ac3e35..46ec097d43bec 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -39,6 +39,7 @@ struct drm_i915_private;
 	param(bool, load_detect_test, false, 0600) \
 	param(bool, force_reset_modeset_test, false, 0600) \
 	param(bool, disable_display, false, 0400) \
+	param(bool, verbose_state_checks, true, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 4e8c088c69fd8..72614c1392221 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -93,9 +93,6 @@ i915_param_named(mmio_debug, int, 0400,
 	"Enable the MMIO debug code for the first N failures (default: off). "
 	"This may negatively affect performance.");
 
-i915_param_named(verbose_state_checks, bool, 0400,
-	"Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions.");
-
 i915_param_named_unsafe(nuclear_pageflip, bool, 0400,
 	"Force enable atomic functionality on platforms that don't have full support yet.");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 8bce7d0576346..4b543beb17cad 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -64,7 +64,6 @@ struct drm_printer;
 	/* leave bools at the end to not create holes */ \
 	param(bool, enable_hangcheck, true, 0600) \
 	param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
-	param(bool, verbose_state_checks, true, 0) \
 	param(bool, nuclear_pageflip, false, 0400) \
 	param(bool, enable_dp_mst, true, 0600) \
 	param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0)
-- 
GitLab


From 192a4444abc88d0e95966a4bb5085d58bed03162 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:08 +0300
Subject: [PATCH 0031/2340] drm/i915/display: Move nuclear_pageflip under
 display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-23-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_device.c | 2 +-
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c
index 50841818fb59f..0b522c6a8d6f5 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.c
+++ b/drivers/gpu/drm/i915/display/intel_display_device.c
@@ -1113,7 +1113,7 @@ void intel_display_device_info_runtime_init(struct drm_i915_private *i915)
 	}
 
 	/* Disable nuclear pageflip by default on pre-g4x */
-	if (!i915->params.nuclear_pageflip &&
+	if (!i915->display.params.nuclear_pageflip &&
 	    DISPLAY_VER(i915) < 5 && !IS_G4X(i915))
 		i915->drm.driver_features &= ~DRIVER_ATOMIC;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index cae1449e9b064..f82f5bed69bca 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -90,6 +90,9 @@ intel_display_param_named(disable_display, bool, 0400,
 intel_display_param_named(verbose_state_checks, bool, 0400,
 	"Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions.");
 
+intel_display_param_named_unsafe(nuclear_pageflip, bool, 0400,
+	"Force enable atomic functionality on platforms that don't have full support yet.");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 46ec097d43bec..40aed14d18beb 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -40,6 +40,7 @@ struct drm_i915_private;
 	param(bool, force_reset_modeset_test, false, 0600) \
 	param(bool, disable_display, false, 0400) \
 	param(bool, verbose_state_checks, true, 0400) \
+	param(bool, nuclear_pageflip, false, 0400) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 72614c1392221..18424873442d8 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -93,9 +93,6 @@ i915_param_named(mmio_debug, int, 0400,
 	"Enable the MMIO debug code for the first N failures (default: off). "
 	"This may negatively affect performance.");
 
-i915_param_named_unsafe(nuclear_pageflip, bool, 0400,
-	"Force enable atomic functionality on platforms that don't have full support yet.");
-
 i915_param_named_unsafe(enable_guc, int, 0400,
 	"Enable GuC load for GuC submission and/or HuC load. "
 	"Required functionality can be selected using bitmask values. "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 4b543beb17cad..c7fff571db2cb 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -64,7 +64,6 @@ struct drm_printer;
 	/* leave bools at the end to not create holes */ \
 	param(bool, enable_hangcheck, true, 0600) \
 	param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
-	param(bool, nuclear_pageflip, false, 0400) \
 	param(bool, enable_dp_mst, true, 0600) \
 	param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0)
 
-- 
GitLab


From 8d3265a76fcf9f5c5064ecef563ec672d60902d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 24 Oct 2023 15:41:09 +0300
Subject: [PATCH 0032/2340] drm/i915/display: Move enable_dp_mst under display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024124109.384973-24-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_params.c | 3 +++
 drivers/gpu/drm/i915/display/intel_display_params.h | 1 +
 drivers/gpu/drm/i915/display/intel_dp.c             | 6 +++---
 drivers/gpu/drm/i915/i915_params.c                  | 3 ---
 drivers/gpu/drm/i915/i915_params.h                  | 1 -
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c
index f82f5bed69bca..11e03cfb774d5 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.c
+++ b/drivers/gpu/drm/i915/display/intel_display_params.c
@@ -93,6 +93,9 @@ intel_display_param_named(verbose_state_checks, bool, 0400,
 intel_display_param_named_unsafe(nuclear_pageflip, bool, 0400,
 	"Force enable atomic functionality on platforms that don't have full support yet.");
 
+intel_display_param_named_unsafe(enable_dp_mst, bool, 0400,
+	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
+
 intel_display_param_named_unsafe(enable_fbc, int, 0400,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h
index 40aed14d18beb..6206cc51df049 100644
--- a/drivers/gpu/drm/i915/display/intel_display_params.h
+++ b/drivers/gpu/drm/i915/display/intel_display_params.h
@@ -41,6 +41,7 @@ struct drm_i915_private;
 	param(bool, disable_display, false, 0400) \
 	param(bool, verbose_state_checks, true, 0400) \
 	param(bool, nuclear_pageflip, false, 0400) \
+	param(bool, enable_dp_mst, true, 0600) \
 	param(int, enable_fbc, -1, 0600) \
 	param(int, enable_psr, -1, 0600) \
 	param(bool, psr_safest_params, false, 0400) \
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 6f46b6692e43d..2fc0a17c45c78 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -3771,7 +3771,7 @@ intel_dp_can_mst(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
-	return i915->params.enable_dp_mst &&
+	return i915->display.params.enable_dp_mst &&
 		intel_dp_mst_source_support(intel_dp) &&
 		drm_dp_read_mst_cap(&intel_dp->aux, intel_dp->dpcd);
 }
@@ -3789,13 +3789,13 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
 		    encoder->base.base.id, encoder->base.name,
 		    str_yes_no(intel_dp_mst_source_support(intel_dp)),
 		    str_yes_no(sink_can_mst),
-		    str_yes_no(i915->params.enable_dp_mst));
+		    str_yes_no(i915->display.params.enable_dp_mst));
 
 	if (!intel_dp_mst_source_support(intel_dp))
 		return;
 
 	intel_dp->is_mst = sink_can_mst &&
-		i915->params.enable_dp_mst;
+		i915->display.params.enable_dp_mst;
 
 	drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
 					intel_dp->is_mst);
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 18424873442d8..de43048543e8b 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -114,9 +114,6 @@ i915_param_named_unsafe(dmc_firmware_path, charp, 0400,
 i915_param_named_unsafe(gsc_firmware_path, charp, 0400,
 	"GSC firmware path to use instead of the default one");
 
-i915_param_named_unsafe(enable_dp_mst, bool, 0400,
-	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
-
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 i915_param_named_unsafe(inject_probe_failure, uint, 0400,
 	"Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c7fff571db2cb..1315d7fac850f 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -64,7 +64,6 @@ struct drm_printer;
 	/* leave bools at the end to not create holes */ \
 	param(bool, enable_hangcheck, true, 0600) \
 	param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \
-	param(bool, enable_dp_mst, true, 0600) \
 	param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0)
 
 #define MEMBER(T, member, ...) T member;
-- 
GitLab


From 0520b30b219053cd789909bca45b3c486ef3ee09 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Oct 2023 22:10:04 +0200
Subject: [PATCH 0033/2340] drm/i915/mtl: avoid stringop-overflow warning

The newly added memset() causes a warning for some reason I could not
figure out:

In file included from arch/x86/include/asm/string.h:3,
                 from drivers/gpu/drm/i915/gt/intel_rc6.c:6:
In function 'rc6_res_reg_init',
    inlined from 'intel_rc6_init' at drivers/gpu/drm/i915/gt/intel_rc6.c:610:2:
arch/x86/include/asm/string_32.h:195:29: error: '__builtin_memset' writing 16 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=]
  195 | #define memset(s, c, count) __builtin_memset(s, c, count)
      |                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/i915/gt/intel_rc6.c:584:9: note: in expansion of macro 'memset'
  584 |         memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg));
      |         ^~~~~~
In function 'intel_rc6_init':

Change it to an normal initializer and an added memcpy() that does not have
this problem.

Fixes: 4bb9ca7ee074 ("drm/i915/mtl: C6 residency and C state type for MTL SAMedia")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231016201012.1022812-1-arnd@kernel.org
---
 drivers/gpu/drm/i915/gt/intel_rc6.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 8b67abd720be8..7090e4be29cb6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -581,19 +581,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
 
 static void rc6_res_reg_init(struct intel_rc6 *rc6)
 {
-	memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg));
+	i915_reg_t res_reg[INTEL_RC6_RES_MAX] = {
+		[0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG,
+	};
 
 	switch (rc6_to_gt(rc6)->type) {
 	case GT_MEDIA:
-		rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
+		res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
 		break;
 	default:
-		rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
-		rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
-		rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
-		rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
+		res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
+		res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
+		res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
+		res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
 		break;
 	}
+
+	memcpy(rc6->res_reg, res_reg, sizeof(res_reg));
 }
 
 void intel_rc6_init(struct intel_rc6 *rc6)
-- 
GitLab


From a1196dac2f504f89bc7941e8c63db50f1fe713f3 Mon Sep 17 00:00:00 2001
From: Vinod Govindapillai <vinod.govindapillai@intel.com>
Date: Wed, 18 Oct 2023 13:27:23 +0300
Subject: [PATCH 0034/2340] drm/i915: remove display device info from i915
 capabilities

Display device and display runtime info is exposed as part of
i915_display_capabilities debugfs entry. Remove this information
from i915_ capabilities as it is now reduntant.

Signed-off-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018102723.16915-3-vinod.govindapillai@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index af0077f6a6d0e..bfe92d2402ea2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -69,7 +69,6 @@ static int i915_capabilities(struct seq_file *m, void *data)
 	seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915));
 
 	intel_device_info_print(INTEL_INFO(i915), RUNTIME_INFO(i915), &p);
-	intel_display_device_info_print(DISPLAY_INFO(i915), DISPLAY_RUNTIME_INFO(i915), &p);
 	i915_print_iommu_status(i915, &p);
 	intel_gt_info_print(&to_gt(i915)->info, &p);
 	intel_driver_caps_print(&i915->caps, &p);
-- 
GitLab


From ffc02c67bf8d4909bd9571fbd14104381fe36b21 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Mon, 23 Oct 2023 14:13:05 +0200
Subject: [PATCH 0035/2340] drm/i915/gt: Use proper priority enum instead of 0

I915_PRIORITY_NORMAL is 0 so use that instead for better
readability.

Cc: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023121305.12560-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5be65..1a8e2b7db0138 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * low latency and no jitter] the chance to naturally
 			 * complete before being preempted.
 			 */
-			attr.priority = 0;
+			attr.priority = I915_PRIORITY_NORMAL;
 			if (rq->sched.attr.priority >= attr.priority)
 				attr.priority = I915_PRIORITY_HEARTBEAT;
 			if (rq->sched.attr.priority >= attr.priority)
-- 
GitLab


From b662c19654ca7fdb1dadd304ca3e26024fc89635 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Mon, 16 Oct 2023 15:55:44 +0300
Subject: [PATCH 0036/2340] drm/i915/display: Reset message bus after each
 read/write operation

Every know and then we receive the following error when running
for example IGT test kms_flip.

[drm] *ERROR* PHY G Read 0d80 failed after 3 retries.
[drm] *ERROR* PHY G Write 0d81 failed after 3 retries.

Since the error is sporadic in nature, the patch proposes
to reset the message bus after every successful or unsuccessful
read or write operation.

v2: Add FIXME's to indicate the experimental nature of
    this workaround (Rodrigo)
v3: Dropping the additional delay as moving reset to *_read_once()
    and *_write_once() functions seem unnecessary delay

Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231016125544.719963-1-mika.kahola@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index d414f6b7f993c..7516abf8dba95 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -206,6 +206,13 @@ static int __intel_cx0_read_once(struct drm_i915_private *i915, enum port port,
 
 	intel_clear_response_ready_flag(i915, port, lane);
 
+	/*
+	 * FIXME: Workaround to let HW to settle
+	 * down and let the message bus to end up
+	 * in a known state
+	 */
+	intel_cx0_bus_reset(i915, port, lane);
+
 	return REG_FIELD_GET(XELPDP_PORT_P2M_DATA_MASK, val);
 }
 
@@ -285,6 +292,13 @@ static int __intel_cx0_write_once(struct drm_i915_private *i915, enum port port,
 
 	intel_clear_response_ready_flag(i915, port, lane);
 
+	/*
+	 * FIXME: Workaround to let HW to settle
+	 * down and let the message bus to end up
+	 * in a known state
+	 */
+	intel_cx0_bus_reset(i915, port, lane);
+
 	return 0;
 }
 
-- 
GitLab


From 8d68a0ac9f3f308967bbdf3af37de818a6ed321d Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 11 Oct 2023 23:22:59 +0300
Subject: [PATCH 0037/2340] drm/i915/sprite: move sprite_name() to
 intel_sprite.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move sprite_name() where its only user is, and convert it to a function,
removing the implicit dev_priv usage.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231011202259.1090131-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.h | 1 -
 drivers/gpu/drm/i915/display/intel_sprite.c  | 7 ++++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index bc95fb3773867..cae2a3b12e20d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -105,7 +105,6 @@ enum i9xx_plane_id {
 };
 
 #define plane_name(p) ((p) + 'A')
-#define sprite_name(p, s) ((p) * DISPLAY_RUNTIME_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A')
 
 #define for_each_plane_id_on_crtc(__crtc, __p) \
 	for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 1fb16510f7501..d7b440c8caefe 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -48,6 +48,11 @@
 #include "intel_frontbuffer.h"
 #include "intel_sprite.h"
 
+static char sprite_name(struct drm_i915_private *i915, enum pipe pipe, int sprite)
+{
+	return pipe * DISPLAY_RUNTIME_INFO(i915)->num_sprites[pipe] + sprite + 'A';
+}
+
 static void i9xx_plane_linear_gamma(u16 gamma[8])
 {
 	/* The points are not evenly spaced. */
@@ -1636,7 +1641,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 				       0, plane_funcs,
 				       formats, num_formats, modifiers,
 				       DRM_PLANE_TYPE_OVERLAY,
-				       "sprite %c", sprite_name(pipe, sprite));
+				       "sprite %c", sprite_name(dev_priv, pipe, sprite));
 	kfree(modifiers);
 
 	if (ret)
-- 
GitLab


From 171c5f641031c0eee424ba79a3db4736c32e18ca Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Sat, 21 Oct 2023 00:30:17 +0200
Subject: [PATCH 0038/2340] dt-bindings: display: ssd132x: Remove '-' before
 compatible enum

This is a leftover from when the binding schema had the compatible string
property enum as a 'oneOf' child and the '-' was not removed when 'oneOf'
got dropped during the binding review process.

Reported-by: Rob Herring <robh@kernel.org>
Closes: https://lore.kernel.org/dri-devel/CAL_Jsq+h8DcnpKqhokQOODCc8+Qi3M0PrxRFKz_Y4v37yMJvvA@mail.gmail.com/
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020223029.1667190-1-javierm@redhat.com
---
 .../devicetree/bindings/display/solomon,ssd132x.yaml      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
index 0aa41bd9ddca4..37975ee61c5ad 100644
--- a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
+++ b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml
@@ -11,10 +11,10 @@ maintainers:
 
 properties:
   compatible:
-    - enum:
-        - solomon,ssd1322
-        - solomon,ssd1325
-        - solomon,ssd1327
+    enum:
+      - solomon,ssd1322
+      - solomon,ssd1325
+      - solomon,ssd1327
 
 required:
   - compatible
-- 
GitLab


From 9e4db199e66d427c50458f4d72734cc4f0b92948 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Sat, 21 Oct 2023 00:52:57 +0200
Subject: [PATCH 0039/2340] drm/ssd130x: Fix possible uninitialized usage of
 crtc_state variable

Avoid a possible uninitialized use of the crtc_state variable in function
ssd132x_primary_plane_atomic_check() and avoid the following Smatch warn:

    drivers/gpu/drm/solomon/ssd130x.c:921 ssd132x_primary_plane_atomic_check()
    error: uninitialized symbol 'crtc_state'.

Fixes: fdd591e00a9c ("drm/ssd130x: Add support for the SSD132x OLED controller family")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/dri-devel/7dd6ca45-8263-44fe-a318-2fd9d761425d@moroto.mountain/
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020225338.1686974-1-javierm@redhat.com
---
 drivers/gpu/drm/solomon/ssd130x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 32f0857aec9f1..e0174f82e3537 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -910,7 +910,7 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane,
 	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
 	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
 	struct drm_crtc *crtc = plane_state->crtc;
-	struct drm_crtc_state *crtc_state;
+	struct drm_crtc_state *crtc_state = NULL;
 	const struct drm_format_info *fi;
 	unsigned int pitch;
 	int ret;
-- 
GitLab


From 6ce33a8a45496d4eca27b45ab9b8c2436c657495 Mon Sep 17 00:00:00 2001
From: Soumya Negi <soumya.negi97@gmail.com>
Date: Wed, 25 Oct 2023 21:43:08 -0700
Subject: [PATCH 0040/2340] drm/i915/gt: Remove {} from if-else

In accordance to Linux coding style(Documentation/process/4.Coding.rst),
remove unneeded braces from if-else block as all arms of this block
contain single statements.

Suggested-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Soumya Negi <soumya.negi97@gmail.com>
Acked-by: Karolina Stolarek <karolina.stolarek@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026044309.17213-1-soumya.negi97@gmail.com
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 1c93e84278a03..9f6f9e1385329 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -226,16 +226,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	gen8_ggtt_invalidate(ggtt);
 
 	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
-		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc)) {
+		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
 			guc_ggtt_ct_invalidate(gt);
-		} else if (GRAPHICS_VER(i915) >= 12) {
+		else if (GRAPHICS_VER(i915) >= 12)
 			intel_uncore_write_fw(gt->uncore,
 					      GEN12_GUC_TLB_INV_CR,
 					      GEN12_GUC_TLB_INV_CR_INVALIDATE);
-		} else {
+		else
 			intel_uncore_write_fw(gt->uncore,
 					      GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-		}
 	}
 }
 
-- 
GitLab


From 80683bf48afcdbebbaf51057e71b2701aa07826d Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Fri, 27 Oct 2023 10:44:59 +0800
Subject: [PATCH 0041/2340] drm/atomic-helper: Fix spelling mistake
 "preceeding" -> "preceding"

There is a typo in the kernel documentation for function
drm_atomic_helper_wait_for_dependencies. Fix it.

Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027024459.12793-1-chentao@kylinos.cn
---
 drivers/gpu/drm/drm_atomic_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 71d3993971075..10aadd324cc37 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -2373,10 +2373,10 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
 EXPORT_SYMBOL(drm_atomic_helper_setup_commit);
 
 /**
- * drm_atomic_helper_wait_for_dependencies - wait for required preceeding commits
+ * drm_atomic_helper_wait_for_dependencies - wait for required preceding commits
  * @old_state: atomic state object with old state structures
  *
- * This function waits for all preceeding commits that touch the same CRTC as
+ * This function waits for all preceding commits that touch the same CRTC as
  * @old_state to both be committed to the hardware (as signalled by
  * drm_atomic_helper_commit_hw_done()) and executed by the hardware (as signalled
  * by calling drm_crtc_send_vblank_event() on the &drm_crtc_state.event).
-- 
GitLab


From 0226ba393eb1a90d63955cc407340c5d506ecacf Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 20 Oct 2023 10:19:38 +0000
Subject: [PATCH 0042/2340] drm: extract closefb logic in separate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_mode_rmfb performs two operations: drop the FB from the
file_priv->fbs list, and make sure the FB is no longer used on a
plane.

In the next commit an IOCTL which only does so former will be
introduced, so let's split it into a separate function.

No functional change, only refactoring.

v2: no change

Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Dennis Filder <d.filder@web.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020101926.145327-1-contact@emersion.fr
---
 drivers/gpu/drm/drm_framebuffer.c | 53 ++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index d3ba0698b84b4..ef3f1e4bc334a 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -394,6 +394,31 @@ static void drm_mode_rmfb_work_fn(struct work_struct *w)
 	}
 }
 
+static int drm_mode_closefb(struct drm_framebuffer *fb,
+			    struct drm_file *file_priv)
+{
+	struct drm_framebuffer *fbl;
+	bool found = false;
+
+	mutex_lock(&file_priv->fbs_lock);
+	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
+		if (fb == fbl)
+			found = true;
+
+	if (!found) {
+		mutex_unlock(&file_priv->fbs_lock);
+		return -ENOENT;
+	}
+
+	list_del_init(&fb->filp_head);
+	mutex_unlock(&file_priv->fbs_lock);
+
+	/* Drop the reference that was stored in the fbs list */
+	drm_framebuffer_put(fb);
+
+	return 0;
+}
+
 /**
  * drm_mode_rmfb - remove an FB from the configuration
  * @dev: drm device
@@ -410,9 +435,8 @@ static void drm_mode_rmfb_work_fn(struct work_struct *w)
 int drm_mode_rmfb(struct drm_device *dev, u32 fb_id,
 		  struct drm_file *file_priv)
 {
-	struct drm_framebuffer *fb = NULL;
-	struct drm_framebuffer *fbl = NULL;
-	int found = 0;
+	struct drm_framebuffer *fb;
+	int ret;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EOPNOTSUPP;
@@ -421,24 +445,13 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id,
 	if (!fb)
 		return -ENOENT;
 
-	mutex_lock(&file_priv->fbs_lock);
-	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
-		if (fb == fbl)
-			found = 1;
-	if (!found) {
-		mutex_unlock(&file_priv->fbs_lock);
-		goto fail_unref;
+	ret = drm_mode_closefb(fb, file_priv);
+	if (ret != 0) {
+		drm_framebuffer_put(fb);
+		return ret;
 	}
 
-	list_del_init(&fb->filp_head);
-	mutex_unlock(&file_priv->fbs_lock);
-
-	/* drop the reference we picked up in framebuffer lookup */
-	drm_framebuffer_put(fb);
-
 	/*
-	 * we now own the reference that was stored in the fbs list
-	 *
 	 * drm_framebuffer_remove may fail with -EINTR on pending signals,
 	 * so run this in a separate stack as there's no way to correctly
 	 * handle this after the fb is already removed from the lookup table.
@@ -457,10 +470,6 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id,
 		drm_framebuffer_put(fb);
 
 	return 0;
-
-fail_unref:
-	drm_framebuffer_put(fb);
-	return -ENOENT;
 }
 
 int drm_mode_rmfb_ioctl(struct drm_device *dev,
-- 
GitLab


From d208d875667e2a29beeec5d475f4b6b164b632fa Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 20 Oct 2023 10:19:45 +0000
Subject: [PATCH 0043/2340] drm: introduce CLOSEFB IOCTL

This new IOCTL allows callers to close a framebuffer without
disabling planes or CRTCs. This takes inspiration from Rob Clark's
unref_fb IOCTL [1] and DRM_MODE_FB_PERSIST [2].

User-space patch for wlroots available at [3]. IGT test available
at [4].

v2: add an extra pad field just in case we want to extend this IOCTL
in the future (Pekka, Sima).

[1]: https://lore.kernel.org/dri-devel/20170509153654.23464-1-robdclark@gmail.com/
[2]: https://lore.kernel.org/dri-devel/20211006151921.312714-1-contact@emersion.fr/
[3]: https://gitlab.freedesktop.org/wlroots/wlroots/-/merge_requests/4394
[4]: https://lists.freedesktop.org/archives/igt-dev/2023-October/063294.html

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Dennis Filder <d.filder@web.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020101926.145327-2-contact@emersion.fr
---
 drivers/gpu/drm/drm_crtc_internal.h |  2 ++
 drivers/gpu/drm/drm_framebuffer.c   | 22 ++++++++++++++++++++++
 drivers/gpu/drm/drm_ioctl.c         |  1 +
 include/uapi/drm/drm.h              | 20 ++++++++++++++++++++
 include/uapi/drm/drm_mode.h         | 10 ++++++++++
 5 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 8556c3b3ff88a..6b646e0783be9 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -222,6 +222,8 @@ int drm_mode_addfb2_ioctl(struct drm_device *dev,
 			  void *data, struct drm_file *file_priv);
 int drm_mode_rmfb_ioctl(struct drm_device *dev,
 			void *data, struct drm_file *file_priv);
+int drm_mode_closefb_ioctl(struct drm_device *dev,
+			   void *data, struct drm_file *file_priv);
 int drm_mode_getfb(struct drm_device *dev,
 		   void *data, struct drm_file *file_priv);
 int drm_mode_getfb2_ioctl(struct drm_device *dev,
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index ef3f1e4bc334a..09e289fca5c31 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -480,6 +480,28 @@ int drm_mode_rmfb_ioctl(struct drm_device *dev,
 	return drm_mode_rmfb(dev, *fb_id, file_priv);
 }
 
+int drm_mode_closefb_ioctl(struct drm_device *dev,
+			   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_closefb *r = data;
+	struct drm_framebuffer *fb;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EOPNOTSUPP;
+
+	if (r->pad)
+		return -EINVAL;
+
+	fb = drm_framebuffer_lookup(dev, file_priv, r->fb_id);
+	if (!fb)
+		return -ENOENT;
+
+	ret = drm_mode_closefb(fb, file_priv);
+	drm_framebuffer_put(fb);
+	return ret;
+}
+
 /**
  * drm_mode_getfb - get FB info
  * @dev: drm device for the ioctl
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 77590b0f38fa3..44fda68c28aeb 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -675,6 +675,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, 0),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CLOSEFB, drm_mode_closefb_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, 0),
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index de723566c5ae8..8662b5aeea0c8 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1218,6 +1218,26 @@ extern "C" {
 
 #define DRM_IOCTL_SYNCOBJ_EVENTFD	DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
 
+/**
+ * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
+ *
+ * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
+ * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
+ * alive. When the plane no longer uses the framebuffer (because the
+ * framebuffer is replaced with another one, or the plane is disabled), the
+ * framebuffer is cleaned up.
+ *
+ * This is useful to implement flicker-free transitions between two processes.
+ *
+ * Depending on the threat model, user-space may want to ensure that the
+ * framebuffer doesn't expose any sensitive user information: closed
+ * framebuffers attached to a plane can be read back by the next DRM master.
+ */
+#define DRM_IOCTL_MODE_CLOSEFB		DRM_IOWR(0xD0, struct drm_mode_closefb)
+
 /*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 128d09138ceb3..09e7a471ee30b 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -1323,6 +1323,16 @@ struct drm_mode_rect {
 	__s32 y2;
 };
 
+/**
+ * struct drm_mode_closefb
+ * @fb_id: Framebuffer ID.
+ * @pad: Must be zero.
+ */
+struct drm_mode_closefb {
+	__u32 fb_id;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
GitLab


From 81de3e296b10a13e5c9f13172825b0d8d9495c68 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Wed, 18 Oct 2023 11:38:15 +0200
Subject: [PATCH 0044/2340] drm/i915: Flush WC GGTT only on required platforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gen8_ggtt_invalidate() is only needed for limited set of platforms
where GGTT is mapped as WC. This was added as way to fix WC based GGTT in
commit 0f9b91c754b7 ("drm/i915: flush system agent TLBs on SNB") and
there are no reference in HW docs that forces us to use this on non-WC
backed GGTT.

This can also cause unwanted side-effects on XE_HP platforms where
GFX_FLSH_CNTL_GEN6 is not valid anymore.

v2: Add a func to detect wc ggtt detection (Ville)
v3: Improve commit log and add reference commit (Daniel)

Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: John Harrison <john.c.harrison@intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: <stable@vger.kernel.org> # v6.2+
Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018093815.1349-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 35 +++++++++++++++++++---------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 9f6f9e1385329..76b0ee2b906b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -195,6 +195,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
 	spin_unlock_irq(&uncore->lock);
 }
 
+static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915)
+{
+	/*
+	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
+	 * will be dropped. For WC mappings in general we have 64 byte burst
+	 * writes when the WC buffer is flushed, so we can't use it, but have to
+	 * resort to an uncached mapping. The WC issue is easily caught by the
+	 * readback check when writing GTT PTE entries.
+	 */
+	if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11)
+		return true;
+
+	return false;
+}
+
 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
@@ -202,8 +217,12 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 	/*
 	 * Note that as an uncached mmio write, this will flush the
 	 * WCB of the writes into the GGTT before it triggers the invalidate.
+	 *
+	 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common().
 	 */
-	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	if (needs_wc_ggtt_mapping(ggtt->vm.i915))
+		intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6,
+				      GFX_FLSH_CNTL_EN);
 }
 
 static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
@@ -1139,17 +1158,11 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
 	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
 
-	/*
-	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
-	 * will be dropped. For WC mappings in general we have 64 byte burst
-	 * writes when the WC buffer is flushed, so we can't use it, but have to
-	 * resort to an uncached mapping. The WC issue is easily caught by the
-	 * readback check when writing GTT PTE entries.
-	 */
-	if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
-		ggtt->gsm = ioremap(phys_addr, size);
-	else
+	if (needs_wc_ggtt_mapping(i915))
 		ggtt->gsm = ioremap_wc(phys_addr, size);
+	else
+		ggtt->gsm = ioremap(phys_addr, size);
+
 	if (!ggtt->gsm) {
 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
 		return -ENOMEM;
-- 
GitLab


From 88b02ebca8b6ea7457bed6809b1dd575420b7544 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Mon, 23 Oct 2023 20:36:39 +0000
Subject: [PATCH 0045/2340] drm/doc: describe PATH format for DP MST

This is already uAPI, xserver parses it. It's useful to document
since user-space might want to lookup the parent connector.

Additionally, people (me included) have misunderstood the PATH
property for being stable across reboots, but since a KMS object
ID is baked in there that's not the case. So PATH shouldn't be
used as-is in config files and such.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023203629.198109-1-contact@emersion.fr
---
 drivers/gpu/drm/drm_connector.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index c3725086f4132..b0516505f7ae9 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1198,6 +1198,12 @@ static const u32 dp_colorspaces =
  * 	drm_connector_set_path_property(), in the case of DP MST with the
  * 	path property the MST manager created. Userspace cannot change this
  * 	property.
+ *
+ * 	In the case of DP MST, the property has the format
+ * 	``mst:<parent>-<ports>`` where ``<parent>`` is the KMS object ID of the
+ * 	parent connector and ``<ports>`` is a hyphen-separated list of DP MST
+ * 	port numbers. Note, KMS object IDs are not guaranteed to be stable
+ * 	across reboots.
  * TILE:
  * 	Connector tile group property to indicate how a set of DRM connector
  * 	compose together into one logical screen. This is used by both high-res
-- 
GitLab


From bb8e97e26ce6437d2f57f37e8ba767a2b9cf0d65 Mon Sep 17 00:00:00 2001
From: Carl Vanderlip <quic_carlv@quicinc.com>
Date: Mon, 16 Oct 2023 11:00:36 -0600
Subject: [PATCH 0046/2340] accel/qaic: Enable 1 MSI fallback mode

Several virtualization use-cases either don't support 32 MultiMSIs
(Xen/VMware) or have significant drawbacks to their use (KVM's vIOMMU,
which is required to support 32 MSI, needs to allocate an alternate
system memory space for each device using vIOMMU (e.g. 8GB VM mem and
2 cards => 8 + 2 * 8 = 24GB host memory required)). Support these
cases by enabling a 1 MSI fallback mode.

Whenever all 32 MSIs requested are not available, a second request for
a single MSI is made. Its success is the initiator of single MSI mode.
This mode causes all interrupts generated by the device to be directed
to the 0th MSI (firmware >=v1.10 will do this as a response to the PCIe
MSI capability configuration). Likewise, all interrupt handlers for the
device are registered to the 0th MSI.

Since the DBC interrupt handler checks if the DBC is in use or if
there is any pending changes, the 'spurious' interrupts are
disregarded. If there is work to be done, the standard threaded IRQ
handler is dispatched.

On every interrupt, the MHI handler wakes up its threaded interrupt
handler, and attempts to wake any waiters for MHI state events.

Performance is within +-0.6% for test cases that typify real world
use. Larger differences ([-4,+132]%, avg +47%) exist for very simple
tasks (e.g. addition) compiled for single NSPs. It is assumed that the
small work and many interrupts typically cause contention (e.g. 16 NSPs
vs 4 CPUs), as evidenced by the standard deviation between runs also
decreasing (r=-0.48 between delta(Performace_test) and
delta(StdDev_test/Avg_test))

Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231016170036.5409-1-quic_jhugo@quicinc.com
---
 Documentation/accel/qaic/aic100.rst |  5 +++--
 Documentation/accel/qaic/qaic.rst   | 23 +++++++++++++++++++
 drivers/accel/qaic/mhi_controller.c |  6 ++++-
 drivers/accel/qaic/mhi_controller.h |  2 +-
 drivers/accel/qaic/qaic.h           |  2 ++
 drivers/accel/qaic/qaic_data.c      | 25 +++++++++++++++------
 drivers/accel/qaic/qaic_drv.c       | 35 ++++++++++++++++++++---------
 7 files changed, 76 insertions(+), 22 deletions(-)

diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
index c80d0f1307dbb..a5fef0869aabc 100644
--- a/Documentation/accel/qaic/aic100.rst
+++ b/Documentation/accel/qaic/aic100.rst
@@ -36,8 +36,9 @@ AIC100 DID (0xa100).
 
 AIC100 does not implement FLR (function level reset).
 
-AIC100 implements MSI but does not implement MSI-X. AIC100 requires 17 MSIs to
-operate (1 for MHI, 16 for the DMA Bridge).
+AIC100 implements MSI but does not implement MSI-X. AIC100 prefers 17 MSIs to
+operate (1 for MHI, 16 for the DMA Bridge). Falling back to 1 MSI is possible in
+scenarios where reserving 32 MSIs isn't feasible.
 
 As a PCIe device, AIC100 utilizes BARs to provide host interfaces to the device
 hardware. AIC100 provides 3, 64-bit BARs.
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
index c885023831367..9ccbfea86f5a7 100644
--- a/Documentation/accel/qaic/qaic.rst
+++ b/Documentation/accel/qaic/qaic.rst
@@ -10,6 +10,9 @@ accelerator products.
 Interrupts
 ==========
 
+IRQ Storm Mitigation
+--------------------
+
 While the AIC100 DMA Bridge hardware implements an IRQ storm mitigation
 mechanism, it is still possible for an IRQ storm to occur. A storm can happen
 if the workload is particularly quick, and the host is responsive. If the host
@@ -35,6 +38,26 @@ generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64
 IRQs over 5 minutes while keeping the host system stable, and having the same
 workload throughput performance (within run to run noise variation).
 
+Single MSI Mode
+---------------
+
+MultiMSI is not well supported on all systems; virtualized ones even less so
+(circa 2023). Between hypervisors masking the PCIe MSI capability structure to
+large memory requirements for vIOMMUs (required for supporting MultiMSI), it is
+useful to be able to fall back to a single MSI when needed.
+
+To support this fallback, we allow the case where only one MSI is able to be
+allocated, and share that one MSI between MHI and the DBCs. The device detects
+when only one MSI has been configured and directs the interrupts for the DBCs
+to the interrupt normally used for MHI. Unfortunately this means that the
+interrupt handlers for every DBC and MHI wake up for every interrupt that
+arrives; however, the DBC threaded irq handlers only are started when work to be
+done is detected (MHI will always start its threaded handler).
+
+If the DBC is configured to force MSI interrupts, this can circumvent the
+software IRQ storm mitigation mentioned above. Since the MSI is shared it is
+never disabled, allowing each new entry to the FIFO to trigger a new interrupt.
+
 
 Neural Network Control (NNC) Protocol
 =====================================
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 5036e58e7235b..41be3626587dc 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -468,7 +468,7 @@ static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
 }
 
 struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
-						    int mhi_irq)
+						    int mhi_irq, bool shared_msi)
 {
 	struct mhi_controller *mhi_cntrl;
 	int ret;
@@ -500,6 +500,10 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi
 		return ERR_PTR(-ENOMEM);
 
 	mhi_cntrl->irq[0] = mhi_irq;
+
+	if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */
+		mhi_cntrl->irq_flags = IRQF_SHARED;
+
 	mhi_cntrl->fw_image = "qcom/aic100/sbl.bin";
 
 	/* use latest configured timeout */
diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h
index 2ae45d768e247..500e7f4af2afe 100644
--- a/drivers/accel/qaic/mhi_controller.h
+++ b/drivers/accel/qaic/mhi_controller.h
@@ -8,7 +8,7 @@
 #define MHICONTROLLERQAIC_H_
 
 struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
-						    int mhi_irq);
+						    int mhi_irq, bool shared_msi);
 void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up);
 void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl);
 void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl);
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index e3f4c30f3ffd2..01e9dda0cc378 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -123,6 +123,8 @@ struct qaic_device {
 	struct srcu_struct	dev_lock;
 	/* true: Device under reset; false: Device not under reset */
 	bool			in_reset;
+	/* true: single MSI is used to operate device */
+	bool			single_msi;
 	/*
 	 * true: A tx MHI transaction has failed and a rx buffer is still queued
 	 * in control device. Such a buffer is considered lost rx buffer
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 4a8e43a7a6a40..ebc3cca1b0947 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -1466,6 +1466,16 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
 
 	rcu_id = srcu_read_lock(&dbc->ch_lock);
 
+	if (datapath_polling) {
+		srcu_read_unlock(&dbc->ch_lock, rcu_id);
+		/*
+		 * Normally datapath_polling will not have irqs enabled, but
+		 * when running with only one MSI the interrupt is shared with
+		 * MHI so it cannot be disabled. Return ASAP instead.
+		 */
+		return IRQ_HANDLED;
+	}
+
 	if (!dbc->usr) {
 		srcu_read_unlock(&dbc->ch_lock, rcu_id);
 		return IRQ_HANDLED;
@@ -1488,7 +1498,8 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	disable_irq_nosync(irq);
+	if (!dbc->qdev->single_msi)
+		disable_irq_nosync(irq);
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
 	return IRQ_WAKE_THREAD;
 }
@@ -1559,12 +1570,12 @@ irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
 	u32 tail;
 
 	rcu_id = srcu_read_lock(&dbc->ch_lock);
+	qdev = dbc->qdev;
 
 	head = readl(dbc->dbc_base + RSPHP_OFF);
 	if (head == U32_MAX) /* PCI link error */
 		goto error_out;
 
-	qdev = dbc->qdev;
 read_fifo:
 
 	if (!event_count) {
@@ -1645,14 +1656,14 @@ read_fifo:
 	goto read_fifo;
 
 normal_out:
-	if (likely(!datapath_polling))
+	if (!qdev->single_msi && likely(!datapath_polling))
 		enable_irq(irq);
-	else
+	else if (unlikely(datapath_polling))
 		schedule_work(&dbc->poll_work);
 	/* checking the fifo and enabling irqs is a race, missed event check */
 	tail = readl(dbc->dbc_base + RSPTP_OFF);
 	if (tail != U32_MAX && head != tail) {
-		if (likely(!datapath_polling))
+		if (!qdev->single_msi && likely(!datapath_polling))
 			disable_irq_nosync(irq);
 		goto read_fifo;
 	}
@@ -1661,9 +1672,9 @@ normal_out:
 
 error_out:
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
-	if (likely(!datapath_polling))
+	if (!qdev->single_msi && likely(!datapath_polling))
 		enable_irq(irq);
-	else
+	else if (unlikely(datapath_polling))
 		schedule_work(&dbc->poll_work);
 
 	return IRQ_HANDLED;
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 6f58095767df6..5eba5f03d3bf7 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -424,14 +424,24 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
 	int i;
 
 	/* Managed release since we use pcim_enable_device */
-	ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
-	if (ret < 0)
-		return ret;
+	ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI);
+	if (ret == -ENOSPC) {
+		ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+		if (ret < 0)
+			return ret;
 
-	if (ret < 32) {
-		pci_err(pdev, "%s: Requested 32 MSIs. Obtained %d MSIs which is less than the 32 required.\n",
-			__func__, ret);
-		return -ENODEV;
+		/*
+		 * Operate in one MSI mode. All interrupts will be directed to
+		 * MSI0; every interrupt will wake up all the interrupt handlers
+		 * (MHI and DBC[0-15]). Since the interrupt is now shared, it is
+		 * not disabled during DBC threaded handler, but only one thread
+		 * will be allowed to run per DBC, so while it can be
+		 * interrupted, it shouldn't race with itself.
+		 */
+		qdev->single_msi = true;
+		pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n");
+	} else if (ret < 0) {
+		return ret;
 	}
 
 	mhi_irq = pci_irq_vector(pdev, 0);
@@ -439,15 +449,17 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
 		return mhi_irq;
 
 	for (i = 0; i < qdev->num_dbc; ++i) {
-		ret = devm_request_threaded_irq(&pdev->dev, pci_irq_vector(pdev, i + 1),
+		ret = devm_request_threaded_irq(&pdev->dev,
+						pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1),
 						dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED,
 						"qaic_dbc", &qdev->dbc[i]);
 		if (ret)
 			return ret;
 
 		if (datapath_polling) {
-			qdev->dbc[i].irq = pci_irq_vector(pdev, i + 1);
-			disable_irq_nosync(qdev->dbc[i].irq);
+			qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
+			if (!qdev->single_msi)
+				disable_irq_nosync(qdev->dbc[i].irq);
 			INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
 		}
 	}
@@ -479,7 +491,8 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto cleanup_qdev;
 	}
 
-	qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq);
+	qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
+						       qdev->single_msi);
 	if (IS_ERR(qdev->mhi_cntrl)) {
 		ret = PTR_ERR(qdev->mhi_cntrl);
 		goto cleanup_qdev;
-- 
GitLab


From 6216fb03f8bd0b1439e9e799a306bafd5b462622 Mon Sep 17 00:00:00 2001
From: Ajit Pal Singh <quic_ajitpals@quicinc.com>
Date: Mon, 16 Oct 2023 11:01:13 -0600
Subject: [PATCH 0047/2340] accel/qaic: Add support for periodic timesync

Device and Host have a time synchronization mechanism that happens once
during boot when device is in SBL mode. After that, in mission-mode there
is no timesync. In an experiment after continuous operation, device time
drifted w.r.t. host by approximately 3 seconds per day. This drift leads
to mismatch in timestamp of device and Host logs. To correct this
implement periodic timesync in driver. This timesync is carried out via
QAIC_TIMESYNC_PERIODIC MHI channel.

Signed-off-by: Ajit Pal Singh <quic_ajitpals@quicinc.com>
Signed-off-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231016170114.5446-2-quic_jhugo@quicinc.com
---
 Documentation/accel/qaic/aic100.rst |   4 +
 Documentation/accel/qaic/qaic.rst   |   5 +
 drivers/accel/qaic/Makefile         |   3 +-
 drivers/accel/qaic/mhi_controller.c |  32 ++++
 drivers/accel/qaic/qaic_drv.c       |   6 +
 drivers/accel/qaic/qaic_timesync.c  | 245 ++++++++++++++++++++++++++++
 drivers/accel/qaic/qaic_timesync.h  |  11 ++
 7 files changed, 305 insertions(+), 1 deletion(-)
 create mode 100644 drivers/accel/qaic/qaic_timesync.c
 create mode 100644 drivers/accel/qaic/qaic_timesync.h

diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
index a5fef0869aabc..2ed32be7ae251 100644
--- a/Documentation/accel/qaic/aic100.rst
+++ b/Documentation/accel/qaic/aic100.rst
@@ -225,6 +225,10 @@ of the defined channels, and their uses.
 |                |         |          | device side logs with the host time    |
 |                |         |          | source.                                |
 +----------------+---------+----------+----------------------------------------+
+| QAIC_TIMESYNC  | 22 & 23 | AMSS     | Used to periodically synchronize       |
+| _PERIODIC      |         |          | timestamps in the device side logs with|
+|                |         |          | the host time source.                  |
++----------------+---------+----------+----------------------------------------+
 
 DMA Bridge
 ==========
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
index 9ccbfea86f5a7..f81020736ebfb 100644
--- a/Documentation/accel/qaic/qaic.rst
+++ b/Documentation/accel/qaic/qaic.rst
@@ -201,3 +201,8 @@ overrides this for that call. Default is 5000 (5 seconds).
 
 Sets the polling interval in microseconds (us) when datapath polling is active.
 Takes effect at the next polling interval. Default is 100 (100 us).
+
+**timesync_delay_ms (unsigned int)**
+
+Sets the time interval in milliseconds (ms) between two consecutive timesync
+operations. Default is 1000 (1000 ms).
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 2418418f7a505..3f7f6dfde7f2c 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -9,4 +9,5 @@ qaic-y := \
 	mhi_controller.o \
 	qaic_control.o \
 	qaic_data.o \
-	qaic_drv.o
+	qaic_drv.o \
+	qaic_timesync.o
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 41be3626587dc..705898777ec52 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -373,6 +373,38 @@ static struct mhi_channel_config aic100_channels[] = {
 		.auto_queue = false,
 		.wake_capable = false,
 	},
+	{
+		.name = "QAIC_TIMESYNC_PERIODIC",
+		.num = 22,
+		.num_elements = 32,
+		.local_elements = 0,
+		.event_ring = 0,
+		.dir = DMA_TO_DEVICE,
+		.ee_mask = MHI_CH_EE_AMSS,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+		.wake_capable = false,
+	},
+	{
+		.num = 23,
+		.name = "QAIC_TIMESYNC_PERIODIC",
+		.num_elements = 32,
+		.local_elements = 0,
+		.event_ring = 0,
+		.dir = DMA_FROM_DEVICE,
+		.ee_mask = MHI_CH_EE_AMSS,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+		.wake_capable = false,
+	},
 };
 
 static struct mhi_event_config aic100_events[] = {
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 5eba5f03d3bf7..08d940d3da466 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -27,6 +27,7 @@
 
 #include "mhi_controller.h"
 #include "qaic.h"
+#include "qaic_timesync.h"
 
 MODULE_IMPORT_NS(DMA_BUF);
 
@@ -599,6 +600,10 @@ static int __init qaic_init(void)
 		goto free_pci;
 	}
 
+	ret = qaic_timesync_init();
+	if (ret)
+		pr_debug("qaic: qaic_timesync_init failed %d\n", ret);
+
 	return 0;
 
 free_pci:
@@ -624,6 +629,7 @@ static void __exit qaic_exit(void)
 	 * reinitializing the link_up state after the cleanup is done.
 	 */
 	link_up = true;
+	qaic_timesync_deinit();
 	mhi_driver_unregister(&qaic_mhi_driver);
 	pci_unregister_driver(&qaic_pci_driver);
 }
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
new file mode 100644
index 0000000000000..769250272c210
--- /dev/null
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/time64.h>
+#include <linux/timer.h>
+
+#include "qaic.h"
+#include "qaic_timesync.h"
+
+#define QTIMER_REG_OFFSET			0xa28
+#define QAIC_TIMESYNC_SIGNATURE			0x55aa
+#define QAIC_CONV_QTIMER_TO_US(qtimer)		(mul_u64_u32_div(qtimer, 10, 192))
+
+static unsigned int timesync_delay_ms = 1000; /* 1 sec default */
+module_param(timesync_delay_ms, uint, 0600);
+MODULE_PARM_DESC(timesync_delay_ms, "Delay in ms between two consecutive timesync operations");
+
+enum qts_msg_type {
+	QAIC_TS_SYNC_REQ = 1,
+	QAIC_TS_ACK_TO_HOST,
+	QAIC_TS_MSG_TYPE_MAX
+};
+
+/**
+ * struct qts_hdr - Timesync message header structure.
+ * @signature: Unique signature to identify the timesync message.
+ * @reserved_1: Reserved for future use.
+ * @reserved_2: Reserved for future use.
+ * @msg_type: sub-type of the timesync message.
+ * @reserved_3: Reserved for future use.
+ */
+struct qts_hdr {
+	__le16	signature;
+	__le16	reserved_1;
+	u8	reserved_2;
+	u8	msg_type;
+	__le16	reserved_3;
+} __packed;
+
+/**
+ * struct qts_timeval - Structure to carry time information.
+ * @tv_sec: Seconds part of the time.
+ * @tv_usec: uS (microseconds) part of the time.
+ */
+struct qts_timeval {
+	__le64	tv_sec;
+	__le64	tv_usec;
+} __packed;
+
+/**
+ * struct qts_host_time_sync_msg_data - Structure to denote the timesync message.
+ * @header: Header of the timesync message.
+ * @data: Time information.
+ */
+struct qts_host_time_sync_msg_data {
+	struct	qts_hdr header;
+	struct	qts_timeval data;
+} __packed;
+
+/**
+ * struct mqts_dev - MHI QAIC Timesync Control device.
+ * @qdev: Pointer to the root device struct driven by QAIC driver.
+ * @mhi_dev: Pointer to associated MHI device.
+ * @timer: Timer handle used for timesync.
+ * @qtimer_addr: Device QTimer register pointer.
+ * @buff_in_use: atomic variable to track if the sync_msg buffer is in use.
+ * @dev: Device pointer to qdev->pdev->dev stored for easy access.
+ * @sync_msg: Buffer used to send timesync message over MHI.
+ */
+struct mqts_dev {
+	struct qaic_device *qdev;
+	struct mhi_device *mhi_dev;
+	struct timer_list timer;
+	void __iomem *qtimer_addr;
+	atomic_t buff_in_use;
+	struct device *dev;
+	struct qts_host_time_sync_msg_data *sync_msg;
+};
+
+#ifdef readq
+static u64 read_qtimer(const volatile void __iomem *addr)
+{
+	return readq(addr);
+}
+#else
+static u64 read_qtimer(const volatile void __iomem *addr)
+{
+	u64 low, high;
+
+	low = readl(addr);
+	high = readl(addr + sizeof(u32));
+	return low | (high << 32);
+}
+#endif
+
+static void qaic_timesync_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+	dev_dbg(mqtsdev->dev, "%s status: %d xfer_len: %zu\n", __func__,
+		mhi_result->transaction_status, mhi_result->bytes_xferd);
+
+	atomic_set(&mqtsdev->buff_in_use, 0);
+}
+
+static void qaic_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+	dev_err(mqtsdev->dev, "%s no data expected on dl channel\n", __func__);
+}
+
+static void qaic_timesync_timer(struct timer_list *t)
+{
+	struct mqts_dev *mqtsdev = from_timer(mqtsdev, t, timer);
+	struct qts_host_time_sync_msg_data *sync_msg;
+	u64 device_qtimer_us;
+	u64 device_qtimer;
+	u64 host_time_us;
+	u64 offset_us;
+	u64 host_sec;
+	int ret;
+
+	if (atomic_read(&mqtsdev->buff_in_use)) {
+		dev_dbg(mqtsdev->dev, "%s buffer not free, schedule next cycle\n", __func__);
+		goto mod_timer;
+	}
+	atomic_set(&mqtsdev->buff_in_use, 1);
+
+	sync_msg = mqtsdev->sync_msg;
+	sync_msg->header.signature = cpu_to_le16(QAIC_TIMESYNC_SIGNATURE);
+	sync_msg->header.msg_type = QAIC_TS_SYNC_REQ;
+	/* Read host UTC time and convert to uS*/
+	host_time_us = div_u64(ktime_get_real_ns(), NSEC_PER_USEC);
+	device_qtimer = read_qtimer(mqtsdev->qtimer_addr);
+	device_qtimer_us = QAIC_CONV_QTIMER_TO_US(device_qtimer);
+	/* Offset between host UTC and device time */
+	offset_us = host_time_us - device_qtimer_us;
+
+	host_sec = div_u64(offset_us, USEC_PER_SEC);
+	sync_msg->data.tv_usec = cpu_to_le64(offset_us - host_sec * USEC_PER_SEC);
+	sync_msg->data.tv_sec = cpu_to_le64(host_sec);
+	ret = mhi_queue_buf(mqtsdev->mhi_dev, DMA_TO_DEVICE, sync_msg, sizeof(*sync_msg), MHI_EOT);
+	if (ret && (ret != -EAGAIN)) {
+		dev_err(mqtsdev->dev, "%s unable to queue to mhi:%d\n", __func__, ret);
+		return;
+	} else if (ret == -EAGAIN) {
+		atomic_set(&mqtsdev->buff_in_use, 0);
+	}
+
+mod_timer:
+	ret = mod_timer(t, jiffies + msecs_to_jiffies(timesync_delay_ms));
+	if (ret)
+		dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret);
+}
+
+static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+	struct mqts_dev *mqtsdev;
+	struct timer_list *timer;
+	int ret;
+
+	mqtsdev = kzalloc(sizeof(*mqtsdev), GFP_KERNEL);
+	if (!mqtsdev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	timer = &mqtsdev->timer;
+	mqtsdev->mhi_dev = mhi_dev;
+	mqtsdev->qdev = qdev;
+	mqtsdev->dev = &qdev->pdev->dev;
+
+	mqtsdev->sync_msg = kzalloc(sizeof(*mqtsdev->sync_msg), GFP_KERNEL);
+	if (!mqtsdev->sync_msg) {
+		ret = -ENOMEM;
+		goto free_mqts_dev;
+	}
+	atomic_set(&mqtsdev->buff_in_use, 0);
+
+	ret = mhi_prepare_for_transfer(mhi_dev);
+	if (ret)
+		goto free_sync_msg;
+
+	/* Qtimer register pointer */
+	mqtsdev->qtimer_addr = qdev->bar_0 + QTIMER_REG_OFFSET;
+	timer_setup(timer, qaic_timesync_timer, 0);
+	timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms);
+	add_timer(timer);
+	dev_set_drvdata(&mhi_dev->dev, mqtsdev);
+
+	return 0;
+
+free_sync_msg:
+	kfree(mqtsdev->sync_msg);
+free_mqts_dev:
+	kfree(mqtsdev);
+out:
+	return ret;
+};
+
+static void qaic_timesync_remove(struct mhi_device *mhi_dev)
+{
+	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+	del_timer_sync(&mqtsdev->timer);
+	mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
+	kfree(mqtsdev->sync_msg);
+	kfree(mqtsdev);
+}
+
+static const struct mhi_device_id qaic_timesync_match_table[] = {
+	{ .chan = "QAIC_TIMESYNC_PERIODIC"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(mhi, qaic_timesync_match_table);
+
+static struct mhi_driver qaic_timesync_driver = {
+	.id_table = qaic_timesync_match_table,
+	.remove = qaic_timesync_remove,
+	.probe = qaic_timesync_probe,
+	.ul_xfer_cb = qaic_timesync_ul_xfer_cb,
+	.dl_xfer_cb = qaic_timesync_dl_xfer_cb,
+	.driver = {
+		.name = "qaic_timesync_periodic",
+	},
+};
+
+int qaic_timesync_init(void)
+{
+	return mhi_driver_register(&qaic_timesync_driver);
+}
+
+void qaic_timesync_deinit(void)
+{
+	mhi_driver_unregister(&qaic_timesync_driver);
+}
diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h
new file mode 100644
index 0000000000000..851b7acd43bbb
--- /dev/null
+++ b/drivers/accel/qaic/qaic_timesync.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __QAIC_TIMESYNC_H__
+#define __QAIC_TIMESYNC_H__
+
+int qaic_timesync_init(void);
+void qaic_timesync_deinit(void);
+#endif /* __QAIC_TIMESYNC_H__ */
-- 
GitLab


From 41cfbaa47fd7d94df5faf1c416801600f26270d8 Mon Sep 17 00:00:00 2001
From: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Date: Mon, 16 Oct 2023 11:01:14 -0600
Subject: [PATCH 0048/2340] accel/qaic: Support MHI QAIC_TIMESYNC channel

Use QAIC_TIMESYNC MHI channel to send UTC time to device in SBL
environment. Remove support for QAIC_TIMESYNC MHI channel in AMSS
environment as it is not used in that environment.

Signed-off-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Carl Vanderlip <quic_carlv@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231016170114.5446-3-quic_jhugo@quicinc.com
---
 Documentation/accel/qaic/aic100.rst |   2 +-
 drivers/accel/qaic/mhi_controller.c |   4 +-
 drivers/accel/qaic/qaic.h           |   4 +
 drivers/accel/qaic/qaic_drv.c       |   7 ++
 drivers/accel/qaic/qaic_timesync.c  | 154 +++++++++++++++++++++++++++-
 5 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
index 2ed32be7ae251..590dae77ea124 100644
--- a/Documentation/accel/qaic/aic100.rst
+++ b/Documentation/accel/qaic/aic100.rst
@@ -221,7 +221,7 @@ of the defined channels, and their uses.
 +----------------+---------+----------+----------------------------------------+
 | QAIC_DEBUG     | 18 & 19 | AMSS     | Not used.                              |
 +----------------+---------+----------+----------------------------------------+
-| QAIC_TIMESYNC  | 20 & 21 | SBL/AMSS | Used to synchronize timestamps in the  |
+| QAIC_TIMESYNC  | 20 & 21 | SBL      | Used to synchronize timestamps in the  |
 |                |         |          | device side logs with the host time    |
 |                |         |          | source.                                |
 +----------------+---------+----------+----------------------------------------+
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 705898777ec52..5d3cc30009cce 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -348,7 +348,7 @@ static struct mhi_channel_config aic100_channels[] = {
 		.local_elements = 0,
 		.event_ring = 0,
 		.dir = DMA_TO_DEVICE,
-		.ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS,
+		.ee_mask = MHI_CH_EE_SBL,
 		.pollcfg = 0,
 		.doorbell = MHI_DB_BRST_DISABLE,
 		.lpm_notify = false,
@@ -364,7 +364,7 @@ static struct mhi_channel_config aic100_channels[] = {
 		.local_elements = 0,
 		.event_ring = 0,
 		.dir = DMA_FROM_DEVICE,
-		.ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS,
+		.ee_mask = MHI_CH_EE_SBL,
 		.pollcfg = 0,
 		.doorbell = MHI_DB_BRST_DISABLE,
 		.lpm_notify = false,
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 01e9dda0cc378..bc40d52dc0104 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -139,6 +139,10 @@ struct qaic_device {
 	u32 (*gen_crc)(void *msg);
 	/* Validate the CRC of a control message */
 	bool (*valid_crc)(void *msg);
+	/* MHI "QAIC_TIMESYNC" channel device */
+	struct mhi_device	*qts_ch;
+	/* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */
+	struct workqueue_struct	*qts_wq;
 };
 
 struct qaic_drm_device {
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 08d940d3da466..b12226385003d 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -325,6 +325,7 @@ static void cleanup_qdev(struct qaic_device *qdev)
 	cleanup_srcu_struct(&qdev->dev_lock);
 	pci_set_drvdata(qdev->pdev, NULL);
 	destroy_workqueue(qdev->cntl_wq);
+	destroy_workqueue(qdev->qts_wq);
 }
 
 static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -348,6 +349,12 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
 	if (!qdev->cntl_wq)
 		return NULL;
 
+	qdev->qts_wq = alloc_workqueue("qaic_ts", WQ_UNBOUND, 0);
+	if (!qdev->qts_wq) {
+		destroy_workqueue(qdev->cntl_wq);
+		return NULL;
+	}
+
 	pci_set_drvdata(pdev, qdev);
 	qdev->pdev = pdev;
 
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
index 769250272c210..301f4462d51bd 100644
--- a/drivers/accel/qaic/qaic_timesync.c
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -22,7 +22,8 @@ module_param(timesync_delay_ms, uint, 0600);
 MODULE_PARM_DESC(timesync_delay_ms, "Delay in ms between two consecutive timesync operations");
 
 enum qts_msg_type {
-	QAIC_TS_SYNC_REQ = 1,
+	QAIC_TS_CMD_TO_HOST,
+	QAIC_TS_SYNC_REQ,
 	QAIC_TS_ACK_TO_HOST,
 	QAIC_TS_MSG_TYPE_MAX
 };
@@ -83,6 +84,16 @@ struct mqts_dev {
 	struct qts_host_time_sync_msg_data *sync_msg;
 };
 
+struct qts_resp_msg {
+	struct qts_hdr	hdr;
+} __packed;
+
+struct qts_resp {
+	struct qts_resp_msg	data;
+	struct work_struct	work;
+	struct qaic_device	*qdev;
+};
+
 #ifdef readq
 static u64 read_qtimer(const volatile void __iomem *addr)
 {
@@ -234,12 +245,151 @@ static struct mhi_driver qaic_timesync_driver = {
 	},
 };
 
+static void qaic_boot_timesync_worker(struct work_struct *work)
+{
+	struct qts_resp *resp = container_of(work, struct qts_resp, work);
+	struct qts_host_time_sync_msg_data *req;
+	struct qts_resp_msg data = resp->data;
+	struct qaic_device *qdev = resp->qdev;
+	struct mhi_device *mhi_dev;
+	struct timespec64 ts;
+	int ret;
+
+	mhi_dev = qdev->qts_ch;
+	/* Queue the response message beforehand to avoid race conditions */
+	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, &resp->data, sizeof(resp->data), MHI_EOT);
+	if (ret) {
+		kfree(resp);
+		dev_warn(&mhi_dev->dev, "Failed to re-queue response buffer %d\n", ret);
+		return;
+	}
+
+	switch (data.hdr.msg_type) {
+	case QAIC_TS_CMD_TO_HOST:
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			break;
+
+		req->header = data.hdr;
+		req->header.msg_type = QAIC_TS_SYNC_REQ;
+		ktime_get_real_ts64(&ts);
+		req->data.tv_sec = cpu_to_le64(ts.tv_sec);
+		req->data.tv_usec = cpu_to_le64(div_u64(ts.tv_nsec, NSEC_PER_USEC));
+
+		ret = mhi_queue_buf(mhi_dev, DMA_TO_DEVICE, req, sizeof(*req), MHI_EOT);
+		if (ret) {
+			kfree(req);
+			dev_dbg(&mhi_dev->dev, "Failed to send request message. Error %d\n", ret);
+		}
+		break;
+	case QAIC_TS_ACK_TO_HOST:
+		dev_dbg(&mhi_dev->dev, "ACK received from device\n");
+		break;
+	default:
+		dev_err(&mhi_dev->dev, "Invalid message type %u.\n", data.hdr.msg_type);
+	}
+}
+
+static int qaic_boot_timesync_queue_resp(struct mhi_device *mhi_dev, struct qaic_device *qdev)
+{
+	struct qts_resp *resp;
+	int ret;
+
+	resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	resp->qdev = qdev;
+	INIT_WORK(&resp->work, qaic_boot_timesync_worker);
+
+	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, &resp->data, sizeof(resp->data), MHI_EOT);
+	if (ret) {
+		kfree(resp);
+		dev_warn(&mhi_dev->dev, "Failed to queue response buffer %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qaic_boot_timesync_remove(struct mhi_device *mhi_dev)
+{
+	struct qaic_device *qdev;
+
+	qdev = dev_get_drvdata(&mhi_dev->dev);
+	mhi_unprepare_from_transfer(qdev->qts_ch);
+	qdev->qts_ch = NULL;
+}
+
+static int qaic_boot_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+	int ret;
+
+	ret = mhi_prepare_for_transfer(mhi_dev);
+	if (ret)
+		return ret;
+
+	qdev->qts_ch = mhi_dev;
+	dev_set_drvdata(&mhi_dev->dev, qdev);
+
+	ret = qaic_boot_timesync_queue_resp(mhi_dev, qdev);
+	if (ret) {
+		dev_set_drvdata(&mhi_dev->dev, NULL);
+		qdev->qts_ch = NULL;
+		mhi_unprepare_from_transfer(mhi_dev);
+	}
+
+	return ret;
+}
+
+static void qaic_boot_timesync_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	kfree(mhi_result->buf_addr);
+}
+
+static void qaic_boot_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct qts_resp *resp = container_of(mhi_result->buf_addr, struct qts_resp, data);
+
+	if (mhi_result->transaction_status || mhi_result->bytes_xferd != sizeof(resp->data)) {
+		kfree(resp);
+		return;
+	}
+
+	queue_work(resp->qdev->qts_wq, &resp->work);
+}
+
+static const struct mhi_device_id qaic_boot_timesync_match_table[] = {
+	{ .chan = "QAIC_TIMESYNC"},
+	{},
+};
+
+static struct mhi_driver qaic_boot_timesync_driver = {
+	.id_table = qaic_boot_timesync_match_table,
+	.remove = qaic_boot_timesync_remove,
+	.probe = qaic_boot_timesync_probe,
+	.ul_xfer_cb = qaic_boot_timesync_ul_xfer_cb,
+	.dl_xfer_cb = qaic_boot_timesync_dl_xfer_cb,
+	.driver = {
+		.name = "qaic_timesync",
+	},
+};
+
 int qaic_timesync_init(void)
 {
-	return mhi_driver_register(&qaic_timesync_driver);
+	int ret;
+
+	ret = mhi_driver_register(&qaic_timesync_driver);
+	if (ret)
+		return ret;
+	ret = mhi_driver_register(&qaic_boot_timesync_driver);
+
+	return ret;
 }
 
 void qaic_timesync_deinit(void)
 {
+	mhi_driver_unregister(&qaic_boot_timesync_driver);
 	mhi_driver_unregister(&qaic_timesync_driver);
 }
-- 
GitLab


From 3db2420422a5912d97966e0176050bb0fc9aa63e Mon Sep 17 00:00:00 2001
From: Sheng-Liang Pan <sheng-liang.pan@quanta.corp-partner.google.com>
Date: Fri, 27 Oct 2023 11:04:56 +0800
Subject: [PATCH 0049/2340] drm/panel-edp: Add AUO B116XTN02, BOE
 NT116WHM-N21,836X2, NV116WHM-N49 V8.0

Add panel identification entry for
- AUO B116XTN02 family (product ID:0x235c)
- BOE NT116WHM-N21,836X2 (product ID:0x09c3)
- BOE NV116WHM-N49 V8.0 (product ID:0x0979)

Signed-off-by: Sheng-Liang Pan <sheng-liang.pan@quanta.corp-partner.google.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027110435.1.Ia01fe9ec1c0953e0050a232eaa782fef2c037516@changeid
---
 drivers/gpu/drm/panel/panel-edp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index feb665df35a1d..9dce4c7024142 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1869,6 +1869,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x145c, &delay_200_500_e50, "B116XAB01.4"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"),
@@ -1877,8 +1878,10 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0786, &delay_200_500_p2e80, "NV116WHM-T01"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x07d1, &boe_nv133fhm_n61.delay, "NV133FHM-N61"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x082d, &boe_nv133fhm_n61.delay, "NV133FHM-N62"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x09c3, &delay_200_500_e50, "NT116WHM-N21,836X2"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x094b, &delay_200_500_e50, "NT116WHM-N21"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x095f, &delay_200_500_e50, "NE135FBM-N41 v8.1"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0979, &delay_200_500_e50, "NV116WHM-N49 V8.0"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x098d, &boe_nv110wtm_n61.delay, "NV110WTM-N61"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"),
-- 
GitLab


From 33f2af42a2019da4fecde30fe144a810b485762f Mon Sep 17 00:00:00 2001
From: Gilbert Adikankwu <gilbertadikankwu@gmail.com>
Date: Thu, 26 Oct 2023 11:56:23 +0100
Subject: [PATCH 0050/2340] drm/i915/gt: Remove unncessary {} from if-else

Fix checkpatch.pl error:

WARNING: braces {} are not necessary for any arm of this statement

Signed-off-by: Gilbert Adikankwu <gilbertadikankwu@gmail.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026105623.480172-1-gilbertadikankwu@gmail.com
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index f602895f6d0df..6a3246240e81d 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
 			       const struct sseu_dev_info *sseu,
 			       struct drm_printer *p)
 {
-	if (sseu->max_slices == 0) {
+	if (sseu->max_slices == 0)
 		drm_printf(p, "Unavailable\n");
-	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		sseu_print_xehp_topology(sseu, p);
-	} else {
+	else
 		sseu_print_hsw_topology(sseu, p);
-	}
 }
 
 void intel_sseu_print_ss_info(const char *type,
-- 
GitLab


From 10184a8a7f70d28ba6aae22142a7375a8c8c1924 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 26 Oct 2023 11:40:44 -0700
Subject: [PATCH 0051/2340] drm/i915/lnl: Extend C10/C20 phy

For Lunar Lake, DDI-A is connected to C10 PHY, while TC1-TC3 are connected
to C20 phy, like in Meteor Lake. Update the check in intel_is_c10phy()
accordingly.

This reverts the change in commit e388ae97e225 ("drm/i915/display:
Eliminate IS_METEORLAKE checks") that turned that into a display engine
version check. The phy <-> port connection is very SoC-specific and not
related to that version.

IS_LUNARLAKE() is defined to 0 in i915 as it's expected that the
(upcoming) xe driver is the one defining the platform, with i915 only
driving the display side.

Bspec: 70818
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026184045.1015655-2-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 2 +-
 drivers/gpu/drm/i915/i915_drv.h              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 7516abf8dba95..cc87653bc476b 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -31,7 +31,7 @@
 
 bool intel_is_c10phy(struct drm_i915_private *i915, enum phy phy)
 {
-	if (DISPLAY_VER_FULL(i915) == IP_VER(14, 0) && phy < PHY_C)
+	if ((IS_LUNARLAKE(i915) || IS_METEORLAKE(i915)) && phy < PHY_C)
 		return true;
 
 	return false;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 20bbad165d76a..8e81974607f27 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -560,6 +560,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define IS_DG2(i915)	IS_PLATFORM(i915, INTEL_DG2)
 #define IS_PONTEVECCHIO(i915) IS_PLATFORM(i915, INTEL_PONTEVECCHIO)
 #define IS_METEORLAKE(i915) IS_PLATFORM(i915, INTEL_METEORLAKE)
+#define IS_LUNARLAKE(i915) 0
 
 #define IS_DG2_G10(i915) \
 	IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10)
-- 
GitLab


From 28066f38d94f846e66f4116a8b1c409b47072011 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 26 Oct 2023 11:40:45 -0700
Subject: [PATCH 0052/2340] drm/i915/lnl: Fix check for TC phy

With MTL adding PICA between the port and the real phy, the path
add for DG2 stopped being followed and newer platforms are simply using
the older path for TC phys. LNL is no different than MTL in this aspect,
so just add it to the mess. In future the phy and port designation and
deciding if it's TC should better be cleaned up.

To make it just a bit better, also change intel_phy_is_snps() to show
this is DG2-only.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026184045.1015655-3-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 28 ++++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 28d85e1e858ea..1caf46e3e5692 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -1784,31 +1784,31 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy)
 
 bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy)
 {
+	/*
+	 * DG2's "TC1", although TC-capable output, doesn't share the same flow
+	 * as other platforms on the display engine side and rather rely on the
+	 * SNPS PHY, that is programmed separately
+	 */
 	if (IS_DG2(dev_priv))
-		/* DG2's "TC1" output uses a SNPS PHY */
 		return false;
-	else if (IS_ALDERLAKE_P(dev_priv) || DISPLAY_VER_FULL(dev_priv) == IP_VER(14, 0))
+
+	if (DISPLAY_VER(dev_priv) >= 13)
 		return phy >= PHY_F && phy <= PHY_I;
 	else if (IS_TIGERLAKE(dev_priv))
 		return phy >= PHY_D && phy <= PHY_I;
 	else if (IS_ICELAKE(dev_priv))
 		return phy >= PHY_C && phy <= PHY_F;
-	else
-		return false;
+
+	return false;
 }
 
 bool intel_phy_is_snps(struct drm_i915_private *dev_priv, enum phy phy)
 {
-	if (phy == PHY_NONE)
-		return false;
-	else if (IS_DG2(dev_priv))
-		/*
-		 * All four "combo" ports and the TC1 port (PHY E) use
-		 * Synopsis PHYs.
-		 */
-		return phy <= PHY_E;
-
-	return false;
+	/*
+	 * For DG2, and for DG2 only, all four "combo" ports and the TC1 port
+	 * (PHY E) use Synopsis PHYs. See intel_phy_is_tc().
+	 */
+	return IS_DG2(dev_priv) && phy > PHY_NONE && phy <= PHY_E;
 }
 
 enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port)
-- 
GitLab


From 685a4fffbf0fe23618f1824924e6dbb2517b446a Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 18 Oct 2023 15:28:30 -0700
Subject: [PATCH 0053/2340] drm/i915/display: Abstract C10/C20 pll hw readout

intel_cx0_phy.[ch] should contain the details about C10/C20, not leaking
it to the rest of the driver. Start abstracting this by exporting a
single PLL hw readout that handles the differences between C20 and C10
internally to that compilation unit.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018222831.4132968-2-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 20 ++++++++++++++++----
 drivers/gpu/drm/i915/display/intel_cx0_phy.h |  8 +++++---
 drivers/gpu/drm/i915/display/intel_ddi.c     |  4 ++--
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index cc87653bc476b..6fa91acc0a25c 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -1864,8 +1864,8 @@ static int intel_c10pll_calc_state(struct intel_crtc_state *crtc_state,
 	return -EINVAL;
 }
 
-void intel_c10pll_readout_hw_state(struct intel_encoder *encoder,
-				   struct intel_c10pll_state *pll_state)
+static void intel_c10pll_readout_hw_state(struct intel_encoder *encoder,
+					  struct intel_c10pll_state *pll_state)
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	u8 lane = INTEL_CX0_LANE0;
@@ -2117,8 +2117,8 @@ static bool intel_c20_use_mplla(u32 clock)
 	return false;
 }
 
-void intel_c20pll_readout_hw_state(struct intel_encoder *encoder,
-				   struct intel_c20pll_state *pll_state)
+static void intel_c20pll_readout_hw_state(struct intel_encoder *encoder,
+					  struct intel_c20pll_state *pll_state)
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	bool cntx;
@@ -3067,3 +3067,15 @@ void intel_c10pll_state_verify(struct intel_atomic_state *state,
 			crtc->base.base.id, crtc->base.name,
 			mpllb_sw_state->cmn, mpllb_hw_state.cmn);
 }
+
+void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder,
+				   struct intel_cx0pll_state *pll_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	enum phy phy = intel_port_to_phy(i915, encoder->port);
+
+	if (intel_is_c10phy(i915, phy))
+		intel_c10pll_readout_hw_state(encoder, &pll_state->c10);
+	else
+		intel_c20pll_readout_hw_state(encoder, &pll_state->c20);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.h b/drivers/gpu/drm/i915/display/intel_cx0_phy.h
index 0e0a38dac8cdd..ff7ccb7855aad 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.h
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.h
@@ -16,6 +16,7 @@ struct drm_i915_private;
 struct intel_atomic_state;
 struct intel_c10pll_state;
 struct intel_c20pll_state;
+struct intel_cx0pll_state;
 struct intel_crtc;
 struct intel_crtc_state;
 struct intel_encoder;
@@ -28,16 +29,17 @@ void intel_mtl_pll_disable(struct intel_encoder *encoder);
 enum icl_port_dpll_id
 intel_mtl_port_pll_type(struct intel_encoder *encoder,
 			const struct intel_crtc_state *crtc_state);
-void intel_c10pll_readout_hw_state(struct intel_encoder *encoder, struct intel_c10pll_state *pll_state);
+
 int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state, struct intel_encoder *encoder);
+void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder,
+				   struct intel_cx0pll_state *pll_state);
+
 void intel_c10pll_dump_hw_state(struct drm_i915_private *dev_priv,
 				const struct intel_c10pll_state *hw_state);
 int intel_c10pll_calc_port_clock(struct intel_encoder *encoder,
 				 const struct intel_c10pll_state *pll_state);
 void intel_c10pll_state_verify(struct intel_atomic_state *state,
 			       struct intel_crtc *crtc);
-void intel_c20pll_readout_hw_state(struct intel_encoder *encoder,
-				   struct intel_c20pll_state *pll_state);
 void intel_c20pll_dump_hw_state(struct drm_i915_private *i915,
 				const struct intel_c20pll_state *hw_state);
 int intel_c20pll_calc_port_clock(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 9151d5add9605..80a8ab7874db4 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3861,10 +3861,10 @@ static void mtl_ddi_get_config(struct intel_encoder *encoder,
 	if (intel_tc_port_in_tbt_alt_mode(dig_port)) {
 		crtc_state->port_clock = intel_mtl_tbt_calc_port_clock(encoder);
 	} else if (intel_is_c10phy(i915, phy)) {
-		intel_c10pll_readout_hw_state(encoder, &crtc_state->cx0pll_state.c10);
+		intel_cx0pll_readout_hw_state(encoder, &crtc_state->cx0pll_state);
 		crtc_state->port_clock = intel_c10pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c10);
 	} else {
-		intel_c20pll_readout_hw_state(encoder, &crtc_state->cx0pll_state.c20);
+		intel_cx0pll_readout_hw_state(encoder, &crtc_state->cx0pll_state);
 		crtc_state->port_clock = intel_c20pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c20);
 	}
 
-- 
GitLab


From 0a0f7935740853ce2654a7750b84c3bd34756979 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 18 Oct 2023 15:28:31 -0700
Subject: [PATCH 0054/2340] drm/i915/display: Abstract C10/C20 pll calculation

As done with the hw readout, properly abstract the C10/C20 phy details
inside intel_cx0_phy.c.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018222831.4132968-3-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 20 ++++++++++++++++----
 drivers/gpu/drm/i915/display/intel_cx0_phy.h |  6 ++----
 drivers/gpu/drm/i915/display/intel_ddi.c     |  7 +------
 drivers/gpu/drm/i915/display/intel_dpll.c    |  7 +------
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 6fa91acc0a25c..b2ad4c6172f6c 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -2392,8 +2392,8 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 		      BIT(0), cntx ? 0 : 1, MB_WRITE_COMMITTED);
 }
 
-int intel_c10pll_calc_port_clock(struct intel_encoder *encoder,
-				 const struct intel_c10pll_state *pll_state)
+static int intel_c10pll_calc_port_clock(struct intel_encoder *encoder,
+					const struct intel_c10pll_state *pll_state)
 {
 	unsigned int frac_quot = 0, frac_rem = 0, frac_den = 1;
 	unsigned int multiplier, tx_clk_div, hdmi_div, refclk = 38400;
@@ -2419,8 +2419,8 @@ int intel_c10pll_calc_port_clock(struct intel_encoder *encoder,
 	return tmpclk;
 }
 
-int intel_c20pll_calc_port_clock(struct intel_encoder *encoder,
-				 const struct intel_c20pll_state *pll_state)
+static int intel_c20pll_calc_port_clock(struct intel_encoder *encoder,
+					const struct intel_c20pll_state *pll_state)
 {
 	unsigned int frac, frac_en, frac_quot, frac_rem, frac_den;
 	unsigned int multiplier, refclk = 38400;
@@ -3079,3 +3079,15 @@ void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder,
 	else
 		intel_c20pll_readout_hw_state(encoder, &pll_state->c20);
 }
+
+int intel_cx0pll_calc_port_clock(struct intel_encoder *encoder,
+				 const struct intel_cx0pll_state *pll_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	enum phy phy = intel_port_to_phy(i915, encoder->port);
+
+	if (intel_is_c10phy(i915, phy))
+		return intel_c10pll_calc_port_clock(encoder, &pll_state->c10);
+
+	return intel_c20pll_calc_port_clock(encoder, &pll_state->c20);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.h b/drivers/gpu/drm/i915/display/intel_cx0_phy.h
index ff7ccb7855aad..222aed16e739f 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.h
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.h
@@ -33,17 +33,15 @@ intel_mtl_port_pll_type(struct intel_encoder *encoder,
 int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state, struct intel_encoder *encoder);
 void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder,
 				   struct intel_cx0pll_state *pll_state);
+int intel_cx0pll_calc_port_clock(struct intel_encoder *encoder,
+				 const struct intel_cx0pll_state *pll_state);
 
 void intel_c10pll_dump_hw_state(struct drm_i915_private *dev_priv,
 				const struct intel_c10pll_state *hw_state);
-int intel_c10pll_calc_port_clock(struct intel_encoder *encoder,
-				 const struct intel_c10pll_state *pll_state);
 void intel_c10pll_state_verify(struct intel_atomic_state *state,
 			       struct intel_crtc *crtc);
 void intel_c20pll_dump_hw_state(struct drm_i915_private *i915,
 				const struct intel_c20pll_state *hw_state);
-int intel_c20pll_calc_port_clock(struct intel_encoder *encoder,
-				 const struct intel_c20pll_state *pll_state);
 void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder,
 				     const struct intel_crtc_state *crtc_state);
 int intel_cx0_phy_check_hdmi_link_rate(struct intel_hdmi *hdmi, int clock);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 80a8ab7874db4..c4dc1f71da4bc 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3854,18 +3854,13 @@ void intel_ddi_get_clock(struct intel_encoder *encoder,
 static void mtl_ddi_get_config(struct intel_encoder *encoder,
 			       struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	enum phy phy = intel_port_to_phy(i915, encoder->port);
 	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 
 	if (intel_tc_port_in_tbt_alt_mode(dig_port)) {
 		crtc_state->port_clock = intel_mtl_tbt_calc_port_clock(encoder);
-	} else if (intel_is_c10phy(i915, phy)) {
-		intel_cx0pll_readout_hw_state(encoder, &crtc_state->cx0pll_state);
-		crtc_state->port_clock = intel_c10pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c10);
 	} else {
 		intel_cx0pll_readout_hw_state(encoder, &crtc_state->cx0pll_state);
-		crtc_state->port_clock = intel_c20pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c20);
+		crtc_state->port_clock = intel_cx0pll_calc_port_clock(encoder, &crtc_state->cx0pll_state);
 	}
 
 	intel_ddi_get_config(encoder, crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c
index d41c1dc9f66c6..509b862f9e607 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -1003,12 +1003,10 @@ static int dg2_crtc_compute_clock(struct intel_atomic_state *state,
 static int mtl_crtc_compute_clock(struct intel_atomic_state *state,
 				  struct intel_crtc *crtc)
 {
-	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	struct intel_crtc_state *crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 	struct intel_encoder *encoder =
 		intel_get_crtc_new_encoder(state, crtc_state);
-	enum phy phy = intel_port_to_phy(i915, encoder->port);
 	int ret;
 
 	ret = intel_cx0pll_calc_state(crtc_state, encoder);
@@ -1016,10 +1014,7 @@ static int mtl_crtc_compute_clock(struct intel_atomic_state *state,
 		return ret;
 
 	/* TODO: Do the readback via intel_compute_shared_dplls() */
-	if (intel_is_c10phy(i915, phy))
-		crtc_state->port_clock = intel_c10pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c10);
-	else
-		crtc_state->port_clock = intel_c20pll_calc_port_clock(encoder, &crtc_state->cx0pll_state.c20);
+	crtc_state->port_clock = intel_cx0pll_calc_port_clock(encoder, &crtc_state->cx0pll_state);
 
 	crtc_state->hw.adjusted_mode.crtc_clock = intel_crtc_dotclock(crtc_state);
 
-- 
GitLab


From 1470acbef122c7e2e588f6346ce459c26d0568a2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 27 Oct 2023 17:26:23 +0200
Subject: [PATCH 0055/2340] accel/ivpu: avoid build failure with CONFIG_PM=n

The usage count of struct dev_pm_info is an implementation detail that
is only available if CONFIG_PM is enabled, so printing it in a debug message
causes a build failure in configurations without PM:

In file included from include/linux/device.h:15,
                 from include/linux/pci.h:37,
                 from drivers/accel/ivpu/ivpu_pm.c:8:
drivers/accel/ivpu/ivpu_pm.c: In function 'ivpu_rpm_get_if_active':
drivers/accel/ivpu/ivpu_pm.c:254:51: error: 'struct dev_pm_info' has no member named 'usage_count'
  254 |                  atomic_read(&vdev->drm.dev->power.usage_count));
      |                                                   ^
include/linux/dev_printk.h:129:48: note: in definition of macro 'dev_printk'
  129 |                 _dev_printk(level, dev, fmt, ##__VA_ARGS__);            \
      |                                                ^~~~~~~~~~~
drivers/accel/ivpu/ivpu_drv.h:75:17: note: in expansion of macro 'dev_dbg'
   75 |                 dev_dbg((vdev)->drm.dev, "[%s] " fmt, #type, ##args);          \
      |                 ^~~~~~~
drivers/accel/ivpu/ivpu_pm.c:253:9: note: in expansion of macro 'ivpu_dbg'
  253 |         ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
      |         ^~~~~~~~

The print message does not seem essential, so the easiest workaround is
to just remove it.

Fixes: c39dc15191c4 ("accel/ivpu: Read clock rate only if device is up")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027152633.528490-1-arnd@kernel.org
---
 drivers/accel/ivpu/ivpu_pm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index d14b6fd796b4e..568e4295dc56b 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -250,9 +250,6 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
 {
 	int ret;
 
-	ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
-		 atomic_read(&vdev->drm.dev->power.usage_count));
-
 	ret = pm_runtime_get_if_active(vdev->drm.dev, false);
 	drm_WARN_ON(&vdev->drm, ret < 0);
 
-- 
GitLab


From a3431650f30a94b179d419ef87c21213655c28cd Mon Sep 17 00:00:00 2001
From: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Date: Wed, 18 Oct 2023 17:06:22 +0530
Subject: [PATCH 0056/2340] drm/i915/mtl: Support HBR3 rate with C10 phy and
 eDP in MTL

eDP specification supports HBR3 link rate since v1.4a. Moreover,
C10 phy can support HBR3 link rate for both DP and eDP. Therefore,
do not clamp the supported rates for eDP at 6.75Gbps.

Cc: <stable@vger.kernel.org>

BSpec: 70073 74224

Signed-off-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018113622.2761997-1-chaitanya.kumar.borah@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 2fc0a17c45c78..ab4608dafe5d9 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -430,7 +430,7 @@ static int mtl_max_source_rate(struct intel_dp *intel_dp)
 	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
 
 	if (intel_is_c10phy(i915, phy))
-		return intel_dp_is_edp(intel_dp) ? 675000 : 810000;
+		return 810000;
 
 	return 2000000;
 }
-- 
GitLab


From 8c63b47412ad3f015db47b380916722299511cc3 Mon Sep 17 00:00:00 2001
From: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:05 +0200
Subject: [PATCH 0057/2340] accel/ivpu: Update FW API

Bump boot API to 4.20
Bump JSM API to 3.15

Signed-off-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-2-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_jsm_msg.c |  17 ++
 drivers/accel/ivpu/vpu_boot_api.h |  90 ++++++++-
 drivers/accel/ivpu/vpu_jsm_api.h  | 309 ++++++++++++++++++++++++++++--
 3 files changed, 392 insertions(+), 24 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 0c2fe7142024c..35a689475c68d 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -36,6 +36,17 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
@@ -65,6 +76,12 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE);
 	}
 	#undef IVPU_CASE_TO_STR
 
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 6b71be92ba653..04c954258563a 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -11,7 +11,10 @@
  *  The bellow values will be used to construct the version info this way:
  *  fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
  *  VPU_BOOT_API_VER_MINOR;
- *  VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes.
+ *  VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
+ *  This information is collected by using vpuip_2/application/vpuFirmware/make_std_fw_image.py
+ *  If a header is missing this info we ignore the header, if a header is missing or contains
+ *  partial info a build error will be generated.
  */
 
 /*
@@ -24,12 +27,12 @@
  * Minor version changes when API backward compatibility is preserved.
  * Resets to 0 if Major version is incremented.
  */
-#define VPU_BOOT_API_VER_MINOR 12
+#define VPU_BOOT_API_VER_MINOR 20
 
 /*
  * API header changed (field names, documentation, formatting) but API itself has not been changed
  */
-#define VPU_BOOT_API_VER_PATCH 2
+#define VPU_BOOT_API_VER_PATCH 4
 
 /*
  * Index in the API version table
@@ -63,6 +66,12 @@ struct vpu_firmware_header {
 	/* Size of memory require for firmware execution */
 	u32 runtime_size;
 	u32 shave_nn_fw_size;
+	/* Size of primary preemption buffer. */
+	u32 preemption_buffer_1_size;
+	/* Size of secondary preemption buffer. */
+	u32 preemption_buffer_2_size;
+	/* Space reserved for future preemption-related fields. */
+	u32 preemption_reserved[6];
 };
 
 /*
@@ -89,6 +98,14 @@ enum VPU_BOOT_L2_CACHE_CFG_TYPE {
 	VPU_BOOT_L2_CACHE_CFG_NUM = 2
 };
 
+/** VPU MCA ECC signalling mode. By default, no signalling is used */
+enum VPU_BOOT_MCA_ECC_SIGNAL_TYPE {
+	VPU_BOOT_MCA_ECC_NONE = 0,
+	VPU_BOOT_MCA_ECC_CORR = 1,
+	VPU_BOOT_MCA_ECC_FATAL = 2,
+	VPU_BOOT_MCA_ECC_BOTH = 3
+};
+
 /**
  * Logging destinations.
  *
@@ -131,9 +148,11 @@ enum vpu_trace_destination {
 #define VPU_TRACE_PROC_BIT_ACT_SHV_3 22
 #define VPU_TRACE_PROC_NO_OF_HW_DEVS 23
 
-/* KMB HW component IDs are sequential, so define first and last IDs. */
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT
-#define VPU_TRACE_PROC_BIT_KMB_LAST  VPU_TRACE_PROC_BIT_SHV_15
+/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
+#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
+#define VPU_TRACE_PROC_BIT_30XX_LAST  VPU_TRACE_PROC_BIT_SHV_15
+#define VPU_TRACE_PROC_BIT_KMB_FIRST  VPU_TRACE_PROC_BIT_30XX_FIRST
+#define VPU_TRACE_PROC_BIT_KMB_LAST   VPU_TRACE_PROC_BIT_30XX_LAST
 
 struct vpu_boot_l2_cache_config {
 	u8 use;
@@ -148,6 +167,25 @@ struct vpu_warm_boot_section {
 	u32 is_clear_op;
 };
 
+/*
+ * When HW scheduling mode is enabled, a present period is defined.
+ * It will be used by VPU to swap between normal and focus priorities
+ * to prevent starving of normal priority band (when implemented).
+ * Host must provide a valid value at boot time in
+ * `vpu_focus_present_timer_ms`. If the value provided by the host is not within the
+ * defined range a default value will be used. Here we define the min. and max.
+ * allowed values and the and default value of the present period. Units are milliseconds.
+ */
+#define VPU_PRESENT_CALL_PERIOD_MS_DEFAULT 50
+#define VPU_PRESENT_CALL_PERIOD_MS_MIN	   16
+#define VPU_PRESENT_CALL_PERIOD_MS_MAX	   10000
+
+/**
+ * Macros to enable various operation modes within the VPU.
+ * To be defined as part of 32 bit mask.
+ */
+#define VPU_OP_MODE_SURVIVABILITY 0x1
+
 struct vpu_boot_params {
 	u32 magic;
 	u32 vpu_id;
@@ -218,6 +256,7 @@ struct vpu_boot_params {
 	 * the threshold will not be logged); applies to every enabled logging
 	 * destination and loggable HW component. See 'mvLog_t' enum for acceptable
 	 * values.
+	 * TODO: EISW-33556: Move log level definition (mvLog_t) to this file.
 	 */
 	u32 default_trace_level;
 	u32 boot_type;
@@ -249,7 +288,36 @@ struct vpu_boot_params {
 	u32 temp_sensor_period_ms;
 	/** PLL ratio for efficient clock frequency */
 	u32 pn_freq_pll_ratio;
-	u32 pad4[28];
+	/** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+	u32 dvfs_mode;
+	/**
+	 * Depending on DVFS Mode:
+	 * On-demand: Default if 0.
+	 *    Bit 0-7   - uint8_t: Highest residency percent
+	 *    Bit 8-15  - uint8_t: High residency percent
+	 *    Bit 16-23 - uint8_t: Low residency percent
+	 *    Bit 24-31 - uint8_t: Lowest residency percent
+	 *    Bit 32-35 - unsigned 4b: PLL Ratio increase amount on highest residency
+	 *    Bit 36-39 - unsigned 4b: PLL Ratio increase amount on high residency
+	 *    Bit 40-43 - unsigned 4b: PLL Ratio decrease amount on low residency
+	 *    Bit 44-47 - unsigned 4b: PLL Ratio decrease amount on lowest frequency
+	 *    Bit 48-55 - uint8_t: Period (ms) for residency decisions
+	 *    Bit 56-63 - uint8_t: Averaging windows (as multiples of period. Max: 30 decimal)
+	 * Power Save/Max Performance: Unused
+	 */
+	u64 dvfs_param;
+	/**
+	 * D0i3 delayed entry
+	 * Bit0: Disable CPU state save on D0i2 entry flow.
+	 *       0: Every D0i2 entry saves state. Save state IPC message ignored.
+	 *       1: IPC message required to save state on D0i3 entry flow.
+	 */
+	u32 d0i3_delayed_entry;
+	/* Time spent by VPU in D0i3 state */
+	u64 d0i3_residency_time_us;
+	/* Value of VPU perf counter at the time of entering D0i3 state . */
+	u64 d0i3_entry_vpu_ts;
+	u32 pad4[20];
 	/* Warm boot information: 0x400 - 0x43F */
 	u32 warm_boot_sections_count;
 	u32 warm_boot_start_address_reference;
@@ -274,8 +342,12 @@ struct vpu_boot_params {
 	u32 vpu_scheduling_mode;
 	/* Present call period in milliseconds. */
 	u32 vpu_focus_present_timer_ms;
-	/* Unused/reserved: 0x478 - 0xFFF */
-	u32 pad6[738];
+	/* VPU ECC Signaling */
+	u32 vpu_uses_ecc_mca_signal;
+	/* Values defined by VPU_OP_MODE* macros */
+	u32 vpu_operation_mode;
+	/* Unused/reserved: 0x480 - 0xFFF */
+	u32 pad6[736];
 };
 
 /*
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 2949ec8365bd5..7da7622742bee 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,12 +22,12 @@
 /*
  * Minor version changes when API backward compatibility is preserved.
  */
-#define VPU_JSM_API_VER_MINOR 0
+#define VPU_JSM_API_VER_MINOR 15
 
 /*
  * API header changed (field names, documentation, formatting) but API itself has not been changed
  */
-#define VPU_JSM_API_VER_PATCH 1
+#define VPU_JSM_API_VER_PATCH 0
 
 /*
  * Index in the API version table
@@ -84,11 +84,13 @@
  * Job flags bit masks.
  */
 #define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
+#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK	   0xFF000000
 
 /*
  * Sizes of the reserved areas in jobs, in bytes.
  */
-#define VPU_JOB_RESERVED_BYTES	     16
+#define VPU_JOB_RESERVED_BYTES 8
+
 /*
  * Sizes of the reserved areas in job queues, in bytes.
  */
@@ -108,6 +110,20 @@
  */
 #define VPU_DYNDBG_CMD_MAX_LEN 96
 
+/*
+ * For HWS command queue scheduling, we can prioritise command queues inside the
+ * same process with a relative in-process priority. Valid values for relative
+ * priority are given below - max and min.
+ */
+#define VPU_HWS_COMMAND_QUEUE_MAX_IN_PROCESS_PRIORITY 7
+#define VPU_HWS_COMMAND_QUEUE_MIN_IN_PROCESS_PRIORITY -7
+
+/*
+ * For HWS priority scheduling, we can have multiple realtime priority bands.
+ * They are numbered 0 to a MAX.
+ */
+#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U
+
 /*
  * Job format.
  */
@@ -117,8 +133,14 @@ struct vpu_job_queue_entry {
 	u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
 	u64 root_page_table_addr; /**< Address of root page table to use for this job */
 	u64 root_page_table_update_counter; /**< Page tables update events counter */
-	u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */
-	u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */
+	u64 primary_preempt_buf_addr;
+	/**< Address of the primary preemption buffer to use for this job */
+	u32 primary_preempt_buf_size;
+	/**< Size of the primary preemption buffer to use for this job */
+	u32 secondary_preempt_buf_size;
+	/**< Size of secondary preemption buffer to use for this job */
+	u64 secondary_preempt_buf_addr;
+	/**< Address of secondary preemption buffer to use for this job */
 	u8 reserved_0[VPU_JOB_RESERVED_BYTES];
 };
 
@@ -152,6 +174,46 @@ enum vpu_trace_entity_type {
 	VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2,
 };
 
+/*
+ * HWS specific log buffer header details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_header {
+	/* Written by VPU after adding a log entry. Initialised by host to 0. */
+	u32 first_free_entry_index;
+	/* Incremented by VPU every time the VPU overwrites the 0th entry;
+	 * initialised by host to 0.
+	 */
+	u32 wraparound_count;
+	/*
+	 * This is the number of buffers that can be stored in the log buffer provided by the host.
+	 * It is written by host before passing buffer to VPU. VPU should consider it read-only.
+	 */
+	u64 num_of_entries;
+	u64 reserved[2];
+};
+
+/*
+ * HWS specific log buffer entry details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_entry {
+	/* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
+	u64 vpu_timestamp;
+	/*
+	 * Operation type:
+	 *     0 - context state change
+	 *     1 - queue new work
+	 *     2 - queue unwait sync object
+	 *     3 - queue no more work
+	 *     4 - queue wait sync object
+	 */
+	u32 operation_type;
+	u32 reserved;
+	/* Operation data depends on operation type */
+	u64 operation_data[2];
+};
+
 /*
  * Host <-> VPU IPC messages types.
  */
@@ -228,6 +290,23 @@ enum vpu_ipc_msg_type {
 	 * deallocated or reassigned to another context.
 	 */
 	VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
+	/** Control command: Log buffer setting */
+	VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118,
+	/* Control command: Suspend command queue. */
+	VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119,
+	/* Control command: Resume command queue */
+	VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a,
+	/* Control command: Resume engine after reset */
+	VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
+	/* Control command: Enable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_ENABLE = 0x111c,
+	/* Control command: Disable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_DISABLE = 0x111d,
+	/**
+	 * Dump VPU state. To be used for debug purposes only.
+	 * NOTE: Please introduce new ASYNC commands before this one. *
+	 */
+	VPU_JSM_MSG_STATE_DUMP = 0x11FF,
 	/* IPC Host -> Device, General commands */
 	VPU_JSM_MSG_GENERAL_CMD = 0x1200,
 	VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
@@ -236,6 +315,10 @@ enum vpu_ipc_msg_type {
 	 * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
 	 */
 	VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
+	/**
+	 * Perform the save procedure for the D0i3 entry
+	 */
+	VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
 	/* IPC Device -> Host, Job completion */
 	VPU_JSM_MSG_JOB_DONE = 0x2100,
 	/* IPC Device -> Host, Async command completion */
@@ -304,11 +387,35 @@ enum vpu_ipc_msg_type {
 	VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
 	/** Response to control command: Set context scheduling properties */
 	VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
+	/** Response to control command: Log buffer setting */
+	VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218,
+	/* IPC Device -> Host, HWS notify index entry of log buffer written */
+	VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219,
+	/* IPC Device -> Host, HWS completion of a context suspend request */
+	VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a,
+	/* Response to control command: Resume command queue */
+	VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b,
+	/* Response to control command: Resume engine command response */
+	VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
+	/* Response to control command: Enable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
+	/* Response to control command: Disable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
+	/**
+	 * Response to state dump control command.
+	 * NOTE: Please introduce new ASYNC responses before this one. *
+	 */
+	VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
 	/* IPC Device -> Host, General command completion */
 	VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
 	VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
 	/** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */
 	VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301,
+	/**
+	 * Acknowledgment of completion of the save procedure initiated by
+	 * VPU_JSM_MSG_PWR_D0I3_ENTER
+	 */
+	VPU_JSM_MSG_PWR_D0I3_ENTER_DONE = 0x2302,
 };
 
 enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
@@ -593,12 +700,12 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
 	 * Default quantum in 100ns units for scheduling across processes
 	 * within a priority band
 	 */
-	u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+	u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
 	/*
 	 * Default grace period in 100ns units for processes that preempt each
 	 * other within a priority band
 	 */
-	u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+	u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
 	/*
 	 * For normal priority band, specifies the target VPU percentage
 	 * in situations when it's starved by the focus band.
@@ -608,32 +715,51 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
 	u32 reserved_0;
 };
 
-/* HWS create command queue request */
+/*
+ * @brief HWS create command queue request.
+ * Host will create a command queue via this command.
+ * Note: Cmdq group is a handle of an object which
+ * may contain one or more command queues.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
 struct vpu_ipc_msg_payload_hws_create_cmdq {
 	/* Process id */
 	u64 process_id;
 	/* Host SSID */
 	u32 host_ssid;
-	/* Zero Padding */
-	u32 reserved;
+	/* Engine for which queue is being created */
+	u32 engine_idx;
+	/*
+	 * Cmdq group may be set to 0 or equal to
+	 * cmdq_id while each priority band contains
+	 * only single engine instances.
+	 */
+	u64 cmdq_group;
 	/* Command queue id */
 	u64 cmdq_id;
 	/* Command queue base */
 	u64 cmdq_base;
 	/* Command queue size */
 	u32 cmdq_size;
-	/* Reserved */
+	/* Zero padding */
 	u32 reserved_0;
 };
 
-/* HWS create command queue response */
+/*
+ * @brief HWS create command queue response.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
 struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
 	/* Process id */
 	u64 process_id;
 	/* Host SSID */
 	u32 host_ssid;
-	/* Zero Padding */
-	u32 reserved;
+	/* Engine for which queue is being created */
+	u32 engine_idx;
+	/* Command queue group */
+	u64 cmdq_group;
 	/* Command queue id */
 	u64 cmdq_id;
 };
@@ -661,7 +787,7 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
 	/* Inside realtime band assigns a further priority */
 	u32 realtime_priority_level;
 	/* Priority relative to other contexts in the same process */
-	u32 in_process_priority;
+	s32 in_process_priority;
 	/* Zero padding / Reserved */
 	u32 reserved_1;
 	/* Context quantum relative to other contexts of same priority in the same process */
@@ -694,6 +820,123 @@ struct vpu_jsm_hws_register_db {
 	u64 cmdq_size;
 };
 
+/*
+ * @brief Structure to set another buffer to be used for scheduling-related logging.
+ * The size of the logging buffer and the number of entries is defined as part of the
+ * buffer itself as described next.
+ * The log buffer received from the host is made up of;
+ *   - header:     32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'.
+ *                 The header contains the number of log entries in the buffer.
+ *   - log entry:  0 to n-1, each log entry is 32 bytes in size, as shown in
+ *                 'struct vpu_hws_log_buffer_entry'.
+ *                 The entry contains the VPU timestamp, operation type and data.
+ * The host should provide the notify index value of log buffer to VPU. This is a
+ * value defined within the log buffer and when written to will generate the
+ * scheduling log notification.
+ * The host should set engine_idx and vpu_log_buffer_va to 0 to disable logging
+ * for a particular engine.
+ * VPU will handle one log buffer for each of supported engines.
+ * VPU should allow the logging to consume one host_ssid.
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ */
+struct vpu_ipc_msg_payload_hws_set_scheduling_log {
+	/* Engine ordinal */
+	u32 engine_idx;
+	/* Host SSID */
+	u32 host_ssid;
+	/*
+	 * VPU log buffer virtual address.
+	 * Set to 0 to disable logging for this engine.
+	 */
+	u64 vpu_log_buffer_va;
+	/*
+	 * Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+	 * is generated when an event log is written to this index.
+	 */
+	u64 notify_index;
+};
+
+/*
+ * @brief The scheduling log notification is generated by VPU when it writes
+ * an event into the log buffer at the notify_index. VPU notifies host with
+ * VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous
+ * message from VPU to host.
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ */
+struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
+	/* Engine ordinal */
+	u32 engine_idx;
+	/* Zero Padding */
+	u32 reserved_0;
+};
+
+/*
+ * @brief HWS suspend command queue request and done structure.
+ * Host will request the suspend of contexts and VPU will;
+ *   - Suspend all work on this context
+ *   - Preempt any running work
+ *   - Asynchronously perform the above and return success immediately once
+ *     all items above are started successfully
+ *   - Notify the host of completion of these operations via
+ *     VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ *   - Reject any other context operations on a context with an in-flight
+ *     suspend request running
+ * Same structure used when VPU notifies host of completion of a context suspend
+ * request. The ids and suspend fence value reported in this command will match
+ * the one in the request from the host to suspend the context. Once suspend is
+ * complete, VPU will not access any data relating to this command queue until
+ * it is resumed.
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ */
+struct vpu_ipc_msg_payload_hws_suspend_cmdq {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+	/*
+	 * Suspend fence value - reported by the VPU suspend context
+	 * completed once suspend is complete.
+	 */
+	u64 suspend_fence_value;
+};
+
+/*
+ * @brief HWS Resume command queue request / response structure.
+ * Host will request the resume of a context;
+ *  - VPU will resume all work on this context
+ *  - Scheduler will allow this context to be scheduled
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP
+ */
+struct vpu_ipc_msg_payload_hws_resume_cmdq {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+/*
+ * @brief HWS Resume engine request / response structure.
+ * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume.
+ * Host shall send this command to resume scheduling of any valid queue.
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE
+ */
+struct vpu_ipc_msg_payload_hws_resume_engine {
+	/* Engine to be resumed */
+	u32 engine_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
 /**
  * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and
  * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages.
@@ -938,6 +1181,35 @@ struct vpu_ipc_msg_payload_dyndbg_control {
 	char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
 };
 
+/**
+ * Payload for VPU_JSM_MSG_PWR_D0I3_ENTER
+ *
+ * This is a bi-directional payload.
+ */
+struct vpu_ipc_msg_payload_pwr_d0i3_enter {
+	/**
+	 * 0: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is not sent to the host driver
+	 *    The driver will poll for D0i2 Idle state transitions.
+	 * 1: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is sent after VPU state save is complete
+	 */
+	u32 send_response;
+	u32 reserved_0;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ *
+ * Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
+ * corresponding to a 85% duty cycle. This payload allows the host to tune these
+ * values according to application requirements.
+ */
+struct vpu_ipc_msg_payload_pwr_dct_control {
+	/** Duty cycle active time in microseconds */
+	u32 dct_active_us;
+	/** Duty cycle inactive time in microseconds */
+	u32 dct_inactive_us;
+};
+
 /*
  * Payloads union, used to define complete message format.
  */
@@ -974,6 +1246,13 @@ union vpu_ipc_msg_payload {
 	struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq;
 	struct vpu_ipc_msg_payload_hws_set_context_sched_properties
 		hws_set_context_sched_properties;
+	struct vpu_ipc_msg_payload_hws_set_scheduling_log hws_set_scheduling_log;
+	struct vpu_ipc_msg_payload_hws_scheduling_log_notification hws_scheduling_log_notification;
+	struct vpu_ipc_msg_payload_hws_suspend_cmdq hws_suspend_cmdq;
+	struct vpu_ipc_msg_payload_hws_resume_cmdq hws_resume_cmdq;
+	struct vpu_ipc_msg_payload_hws_resume_engine hws_resume_engine;
+	struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter;
+	struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
 };
 
 /*
-- 
GitLab


From 9692b1dcefe79ea47f7d5a94acff74303c6563c6 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:06 +0200
Subject: [PATCH 0058/2340] accel/ivpu: Remove unneeded drm_driver declaration

Cleanup drm_driver declaration leftover.

Reviewed-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-3-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index b6aa8893ff1ce..8f5655dfd83fb 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -31,8 +31,6 @@
 			   __stringify(DRM_IVPU_DRIVER_MINOR) "."
 #endif
 
-static const struct drm_driver driver;
-
 static struct lock_class_key submitted_jobs_xa_lock_class_key;
 
 int ivpu_dbg_mask;
-- 
GitLab


From f13108fc7bae7085616805ed176b2114cdf4bd55 Mon Sep 17 00:00:00 2001
From: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Date: Sat, 28 Oct 2023 15:34:07 +0200
Subject: [PATCH 0059/2340] accel/ivpu: Add dvfs_mode file to debugfs

Add new debugfs file to set dvfs_mode FW boot parameter and restart
the FW to allow experimenting with DVFS (dynamic voltage & frequency
scaling).

Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-4-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_debugfs.c | 28 ++++++++++++++++++++++++++++
 drivers/accel/ivpu/ivpu_fw.c      |  5 +++++
 drivers/accel/ivpu/ivpu_fw.h      |  1 +
 3 files changed, 34 insertions(+)

diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index ea453b985b491..6e0d568230241 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -115,6 +115,31 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
 	{"reset_pending", reset_pending_show, 0},
 };
 
+static ssize_t
+dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+	struct ivpu_device *vdev = file->private_data;
+	struct ivpu_fw_info *fw = vdev->fw;
+	u32 dvfs_mode;
+	int ret;
+
+	ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
+	if (ret < 0)
+		return ret;
+
+	fw->dvfs_mode = dvfs_mode;
+
+	ivpu_pm_schedule_recovery(vdev);
+
+	return size;
+}
+
+static const struct file_operations dvfs_mode_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = dvfs_mode_fops_write,
+};
+
 static int fw_log_show(struct seq_file *s, void *v)
 {
 	struct ivpu_device *vdev = s->private;
@@ -280,6 +305,9 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
 	debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
 			    &ivpu_force_recovery_fops);
 
+	debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
+			    &dvfs_mode_fops);
+
 	debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
 			    &fw_log_fops);
 	debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 6142b09cf55a4..b81827540db94 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -182,6 +182,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
 	fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
 	fw->trace_hw_component_mask = -1;
 
+	fw->dvfs_mode = 0;
+
 	ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
 		 fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
 	ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
@@ -422,6 +424,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
 		 boot_params->punit_telemetry_sram_size);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
 		 boot_params->vpu_telemetry_enable);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
+		 boot_params->dvfs_mode);
 }
 
 void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@@ -492,6 +496,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
 	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
 	boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+	boot_params->dvfs_mode = vdev->fw->dvfs_mode;
 
 	wmb(); /* Flush WC buffers after writing bootparams */
 
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 10ae2847f0ef3..66b60fa161b52 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -27,6 +27,7 @@ struct ivpu_fw_info {
 	u32 trace_level;
 	u32 trace_destination_mask;
 	u64 trace_hw_component_mask;
+	u32 dvfs_mode;
 };
 
 int ivpu_fw_init(struct ivpu_device *vdev);
-- 
GitLab


From bacc130d4671a9568352b8d7606cfe7a50d38a56 Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:08 +0200
Subject: [PATCH 0060/2340] accel/ivpu: Remove reset from power up sequence

Setting a non-zero work point resets the IP hence IP_RESET
trigger is redundant.

Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-5-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_hw_37xx.c | 4 ----
 drivers/accel/ivpu/ivpu_hw_40xx.c | 6 ------
 2 files changed, 10 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index e5cb9d8acb828..8340c84ed6dea 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -651,10 +651,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
 {
 	int ret;
 
-	ret = ivpu_hw_37xx_reset(vdev);
-	if (ret)
-		ivpu_warn(vdev, "Failed to reset HW: %d\n", ret);
-
 	ret = ivpu_hw_37xx_d0i3_disable(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 4bf4c87800446..eb8218d15f01e 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -811,12 +811,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
 {
 	int ret;
 
-	ret = ivpu_hw_40xx_reset(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to reset HW: %d\n", ret);
-		return ret;
-	}
-
 	ret = ivpu_hw_40xx_d0i3_disable(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
-- 
GitLab


From 61ab485f0eb1e5b7e2be10ec657a054d36ef5035 Mon Sep 17 00:00:00 2001
From: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:09 +0200
Subject: [PATCH 0061/2340] accel/ivpu: Add support for
 VPU_JOB_FLAGS_NULL_SUBMISSION_MASK

Add test_mode = 3 that add VPU_JOB_FLAGS_NULL_SUBMISSION_MASK
flag to the job send to the VPU device. Then the VPU will process
the job but won't execute commands (except the command to signal
the fence).

This can be used to estimate job processing overhead in the host
software and VPU firmware.

Unlike the null hardware mode, the null submission mode will
still work even if UMD uses VPU fences to track job completion.

Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-6-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c | 2 +-
 drivers/accel/ivpu/ivpu_drv.h | 7 ++++---
 drivers/accel/ivpu/ivpu_job.c | 2 ++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 8f5655dfd83fb..4ec8d25a120cf 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -39,7 +39,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
 
 int ivpu_test_mode;
 module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
-MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
+MODULE_PARM_DESC(test_mode, "Test mode: 0 - disabled , 1 - fw unit test, 2 - null hw, 3 - null submission");
 
 u8 ivpu_pll_min_ratio;
 module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 12a63f8a73e8a..fdec8272da8cf 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -147,9 +147,10 @@ extern u8 ivpu_pll_min_ratio;
 extern u8 ivpu_pll_max_ratio;
 extern bool ivpu_disable_mmu_cont_pages;
 
-#define IVPU_TEST_MODE_DISABLED  0
-#define IVPU_TEST_MODE_FW_TEST   1
-#define IVPU_TEST_MODE_NULL_HW   2
+#define IVPU_TEST_MODE_DISABLED        0
+#define IVPU_TEST_MODE_FW_TEST         1
+#define IVPU_TEST_MODE_NULL_HW         2
+#define IVPU_TEST_MODE_NULL_SUBMISSION 3
 extern int ivpu_test_mode;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 689dc0d13b8fa..646b8f8129015 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -196,6 +196,8 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
 	entry->batch_buf_addr = job->cmd_buf_vpu_addr;
 	entry->job_id = job->job_id;
 	entry->flags = 0;
+	if (unlikely(ivpu_test_mode == IVPU_TEST_MODE_NULL_SUBMISSION))
+		entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
 	wmb(); /* Ensure that tail is updated after filling entry */
 	header->tail = next_entry;
 	wmb(); /* Flush WC buffer for jobq header */
-- 
GitLab


From 8b5cec3c2ccf0b12121cb151560c2876219d87e5 Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:10 +0200
Subject: [PATCH 0062/2340] accel/ivpu: Change test_mode module param to
 bitmask

Change meaning of test_mode module parameter from integer value
to bitmask allowing setting different test features with corresponding
bits.

Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-7-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c | 4 ++--
 drivers/accel/ivpu/ivpu_drv.h | 7 +++----
 drivers/accel/ivpu/ivpu_job.c | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 4ec8d25a120cf..39bac45d88b5e 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -39,7 +39,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
 
 int ivpu_test_mode;
 module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
-MODULE_PARM_DESC(test_mode, "Test mode: 0 - disabled , 1 - fw unit test, 2 - null hw, 3 - null submission");
+MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
 
 u8 ivpu_pll_min_ratio;
 module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@@ -315,7 +315,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
 	unsigned long timeout;
 	int ret;
 
-	if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
+	if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST)
 		return 0;
 
 	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index fdec8272da8cf..ada43ba565c4a 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -147,10 +147,9 @@ extern u8 ivpu_pll_min_ratio;
 extern u8 ivpu_pll_max_ratio;
 extern bool ivpu_disable_mmu_cont_pages;
 
-#define IVPU_TEST_MODE_DISABLED        0
-#define IVPU_TEST_MODE_FW_TEST         1
-#define IVPU_TEST_MODE_NULL_HW         2
-#define IVPU_TEST_MODE_NULL_SUBMISSION 3
+#define IVPU_TEST_MODE_FW_TEST         BIT(0)
+#define IVPU_TEST_MODE_NULL_HW         BIT(1)
+#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
 extern int ivpu_test_mode;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 646b8f8129015..6e96c921547d5 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -196,7 +196,7 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
 	entry->batch_buf_addr = job->cmd_buf_vpu_addr;
 	entry->job_id = job->job_id;
 	entry->flags = 0;
-	if (unlikely(ivpu_test_mode == IVPU_TEST_MODE_NULL_SUBMISSION))
+	if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
 		entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
 	wmb(); /* Ensure that tail is updated after filling entry */
 	header->tail = next_entry;
@@ -404,7 +404,7 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
 		 job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
 		 job->engine_idx, cmdq->jobq->header.tail);
 
-	if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
+	if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) {
 		ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
 		cmdq->jobq->header.head = cmdq->jobq->header.tail;
 		wmb(); /* Flush WC buffer for jobq header */
-- 
GitLab


From db37a5bfe97588992c75c29c1f0e19c88d364ce5 Mon Sep 17 00:00:00 2001
From: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:11 +0200
Subject: [PATCH 0063/2340] accel/ivpu/40xx: Capture D0i3 entry host and device
 timestamps

The driver needs to capture the D0i3 entry timestamp to
calculate D0i3 residency time.

The D0i3 residency time and the VPU timestamp are passed
to the firmware at D0i3 exit (warm boot).

Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-8-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_hw_40xx.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index eb8218d15f01e..0eb9c827f6dcf 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -879,10 +879,18 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
 	       REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
 }
 
+static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+	vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+	vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
+}
+
 static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
 {
 	int ret = 0;
 
+	ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
+
 	if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev))
 		ivpu_warn(vdev, "Failed to reset the VPU\n");
 
-- 
GitLab


From 3de6d9597892ef899d7e90f589e23c9298013134 Mon Sep 17 00:00:00 2001
From: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:12 +0200
Subject: [PATCH 0064/2340] accel/ivpu: Pass D0i3 residency time to the VPU
 firmware

The firmware needs to know the time spent in D0i3/D3 to
calculate telemetry data. The D0i3/D3 residency time is
calculated by the driver and passed to the firmware
in the boot parameters.

The driver also passes VPU perf counter value captured
right before entering D0i3 - this allows the VPU firmware
to generate monotonic timestamps for the logs.

Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-9-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_fw.c          | 17 ++++++++++++++++-
 drivers/accel/ivpu/ivpu_hw.h          |  2 ++
 drivers/accel/ivpu/ivpu_hw_37xx.c     |  8 ++++++++
 drivers/accel/ivpu/ivpu_hw_37xx_reg.h |  2 ++
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index b81827540db94..383e4d9b97c85 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -426,14 +426,27 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
 		 boot_params->vpu_telemetry_enable);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
 		 boot_params->dvfs_mode);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+		 boot_params->d0i3_residency_time_us);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+		 boot_params->d0i3_entry_vpu_ts);
 }
 
 void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
 {
 	struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
 
-	/* In case of warm boot we only have to reset the entrypoint addr */
+	/* In case of warm boot only update variable params */
 	if (!ivpu_fw_is_cold_boot(vdev)) {
+		boot_params->d0i3_residency_time_us =
+			ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
+		boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
+
+		ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+			 boot_params->d0i3_residency_time_us);
+		ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+			 boot_params->d0i3_entry_vpu_ts);
+
 		boot_params->save_restore_ret_address = 0;
 		vdev->pm->is_warmboot = true;
 		return;
@@ -497,6 +510,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
 	boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
 	boot_params->dvfs_mode = vdev->fw->dvfs_mode;
+	boot_params->d0i3_residency_time_us = 0;
+	boot_params->d0i3_entry_vpu_ts = 0;
 
 	wmb(); /* Flush WC buffers after writing bootparams */
 
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index ab341237bcf97..fd4809b56168f 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -57,6 +57,8 @@ struct ivpu_hw_info {
 	u32 sku;
 	u16 config;
 	int dma_bits;
+	ktime_t d0i3_entry_host_ts;
+	u64 d0i3_entry_vpu_ts;
 };
 
 extern const struct ivpu_hw_ops ivpu_hw_37xx_ops;
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 8340c84ed6dea..451c9777b237b 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -714,10 +714,18 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
 	       REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
 }
 
+static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+	vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+	vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
+}
+
 static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
 {
 	int ret = 0;
 
+	ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
+
 	if (!ivpu_hw_37xx_is_idle(vdev) && ivpu_hw_37xx_reset(vdev))
 		ivpu_err(vdev, "Failed to reset the VPU\n");
 
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
index 4083beb5e9dba..f6fec19192020 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@@ -240,6 +240,8 @@
 #define VPU_37XX_CPU_SS_TIM_GEN_CONFIG					0x06021008u
 #define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
 
+#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT				0x06029000u
+
 #define VPU_37XX_CPU_SS_DOORBELL_0					0x06300000u
 #define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
 
-- 
GitLab


From 45e45362e0955fc3b0b622e8a0d788097f3de902 Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:13 +0200
Subject: [PATCH 0065/2340] accel/ivpu: Introduce
 ivpu_ipc_send_receive_active()

Split ivpu_ipc_send_receive() implementation to have a version
that does not call pm_runtime_resume_and_get(). That implementation
can be invoked when device is up and runtime resume is prohibited
(for example at the end of boot sequence).

The new function will be used for D0i3 entry IPC message addition
in the separate change.

Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-10-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_ipc.c | 33 ++++++++++++++++++++++-----------
 drivers/accel/ivpu/ivpu_ipc.h |  8 +++++---
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 6e213a5afb8cf..d069d1e1f91d5 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -288,23 +288,20 @@ consumer_del:
 	return ret;
 }
 
-int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
-			  enum vpu_ipc_msg_type expected_resp_type,
-			  struct vpu_jsm_msg *resp, u32 channel,
-			  unsigned long timeout_ms)
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+				 enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+				 u32 channel, unsigned long timeout_ms)
 {
 	struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
 	struct vpu_jsm_msg hb_resp;
 	int ret, hb_ret;
 
-	ret = ivpu_rpm_get(vdev);
-	if (ret < 0)
-		return ret;
+	drm_WARN_ON(&vdev->drm,
+		    vdev->drm.dev->power.runtime_status == RPM_SUSPENDED);
 
-	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
-					     channel, timeout_ms);
+	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
 	if (ret != -ETIMEDOUT)
-		goto rpm_put;
+		return ret;
 
 	hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
 						&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
@@ -314,7 +311,21 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
 		ivpu_pm_schedule_recovery(vdev);
 	}
 
-rpm_put:
+	return ret;
+}
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			  enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+			  u32 channel, unsigned long timeout_ms)
+{
+	int ret;
+
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
+	ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
+
 	ivpu_rpm_put(vdev);
 	return ret;
 }
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 68f5b6668e00b..6918db23daa4b 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -85,9 +85,11 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
 		     unsigned long timeout_ms);
 
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+				 enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+				 u32 channel, unsigned long timeout_ms);
 int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
-			  enum vpu_ipc_msg_type expected_resp_type,
-			  struct vpu_jsm_msg *resp, u32 channel,
-			  unsigned long timeout_ms);
+			  enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+			  u32 channel, unsigned long timeout_ms);
 
 #endif /* __IVPU_IPC_H__ */
-- 
GitLab


From cc19fedab8bdbe0f81d320ff9a6fdb3b5b01be83 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:14 +0200
Subject: [PATCH 0066/2340] accel/ivpu/37xx: Print warning when VPUIP is not
 idle during power down

Print warning if VPUIP is not idle during power down.

Use warn log level also when we fail to enter reset state
as this is not really an error but unexpected behavior.

Reviewed-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-11-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_hw_37xx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 451c9777b237b..1b47d77b4c6ee 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -726,8 +726,11 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
 
 	ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
 
-	if (!ivpu_hw_37xx_is_idle(vdev) && ivpu_hw_37xx_reset(vdev))
-		ivpu_err(vdev, "Failed to reset the VPU\n");
+	if (!ivpu_hw_37xx_is_idle(vdev)) {
+		ivpu_warn(vdev, "VPU not idle during power down\n");
+		if (ivpu_hw_37xx_reset(vdev))
+			ivpu_warn(vdev, "Failed to reset the VPU\n");
+	}
 
 	if (ivpu_pll_disable(vdev)) {
 		ivpu_err(vdev, "Failed to disable PLL\n");
-- 
GitLab


From 3198a62eb8f8411b605501cb78c2298c67ecee91 Mon Sep 17 00:00:00 2001
From: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Date: Sat, 28 Oct 2023 15:34:15 +0200
Subject: [PATCH 0067/2340] accel/ivpu: Add support for delayed D0i3 entry
 message

Currently the VPU firmware prepares for D0i3 every time the VPU
is entering D0i2 Idle state. This is not optimal as we might not
enter D0i3 every time we enter D0i2 Idle and this preparation
is quite costly.

This optimization moves D0i3 preparation to a dedicated
message sent from the host driver only when the driver is about
to enter D0i3 - this reduces power consumption and latency for
certain workloads, for example audio workloads that submit
inference every 10 ms.

The VPU needs non zero time to enter IDLE state after responding to
D0i3 entry message. If the driver does not wait for the VPU to enter
IDLE state it could cause warm boot failures.

Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028133415.1169975-12-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.h     | 10 +++++--
 drivers/accel/ivpu/ivpu_fw.c      | 48 +++++++++++++++++++++++++++++--
 drivers/accel/ivpu/ivpu_hw.h      |  6 ++++
 drivers/accel/ivpu/ivpu_hw_37xx.c |  9 +++++-
 drivers/accel/ivpu/ivpu_hw_40xx.c | 10 +++++++
 drivers/accel/ivpu/ivpu_jsm_msg.c | 21 ++++++++++++++
 drivers/accel/ivpu/ivpu_jsm_msg.h |  1 +
 drivers/accel/ivpu/ivpu_pm.c      | 11 ++++++-
 8 files changed, 108 insertions(+), 8 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index ada43ba565c4a..1b482d1d66d95 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -87,6 +87,7 @@ struct ivpu_wa_table {
 	bool d3hot_after_power_off;
 	bool interrupt_clear_with_0;
 	bool disable_clock_relinquish;
+	bool disable_d0i3_msg;
 };
 
 struct ivpu_hw_info;
@@ -125,6 +126,7 @@ struct ivpu_device {
 		int tdr;
 		int reschedule_suspend;
 		int autosuspend;
+		int d0i3_entry_msg;
 	} timeout;
 };
 
@@ -147,9 +149,11 @@ extern u8 ivpu_pll_min_ratio;
 extern u8 ivpu_pll_max_ratio;
 extern bool ivpu_disable_mmu_cont_pages;
 
-#define IVPU_TEST_MODE_FW_TEST         BIT(0)
-#define IVPU_TEST_MODE_NULL_HW         BIT(1)
-#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
+#define IVPU_TEST_MODE_FW_TEST            BIT(0)
+#define IVPU_TEST_MODE_NULL_HW            BIT(1)
+#define IVPU_TEST_MODE_NULL_SUBMISSION    BIT(2)
+#define IVPU_TEST_MODE_D0I3_MSG_DISABLE   BIT(4)
+#define IVPU_TEST_MODE_D0I3_MSG_ENABLE    BIT(5)
 extern int ivpu_test_mode;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 383e4d9b97c85..4a21be3a0c59b 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -33,12 +33,17 @@
 
 #define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
 
-#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \
+/* Check if FW API is compatible with the driver */
+#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \
 	ivpu_fw_check_api(vdev, fw_hdr, #name, \
 			  VPU_##name##_API_VER_INDEX, \
 			  VPU_##name##_API_VER_MAJOR, \
 			  VPU_##name##_API_VER_MINOR, min_major)
 
+/* Check if API version is lower that the given version */
+#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \
+	ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor)
+
 static char *ivpu_firmware;
 module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
 MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
@@ -105,6 +110,19 @@ ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw
 	return 0;
 }
 
+static bool
+ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
+			 const char *str, int index, u16 major, u16 minor)
+{
+	u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16);
+	u16 fw_minor = (u16)(fw_hdr->api_version[index]);
+
+	if (fw_major < major || (fw_major == major && fw_minor < minor))
+		return true;
+
+	return false;
+}
+
 static int ivpu_fw_parse(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
@@ -164,9 +182,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
 	ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
 		  (const char *)fw_hdr + VPU_FW_HEADER_SIZE);
 
-	if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3))
+	if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
 		return -EINVAL;
-	if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3))
+	if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
 		return -EINVAL;
 
 	fw->runtime_addr = runtime_addr;
@@ -197,6 +215,24 @@ static void ivpu_fw_release(struct ivpu_device *vdev)
 	release_firmware(vdev->fw->file);
 }
 
+/* Initialize workarounds that depend on FW version */
+static void
+ivpu_fw_init_wa(struct ivpu_device *vdev)
+{
+	const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
+
+	if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
+	    (ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
+	    (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
+		vdev->wa.disable_d0i3_msg = true;
+
+	/* Force enable the feature for testing purposes */
+	if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE)
+		vdev->wa.disable_d0i3_msg = false;
+
+	IVPU_PRINT_WA(disable_d0i3_msg);
+}
+
 static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
@@ -299,6 +335,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
 	if (ret)
 		goto err_fw_release;
 
+	ivpu_fw_init_wa(vdev);
+
 	ret = ivpu_fw_mem_init(vdev);
 	if (ret)
 		goto err_fw_release;
@@ -426,6 +464,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
 		 boot_params->vpu_telemetry_enable);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
 		 boot_params->dvfs_mode);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n",
+		 boot_params->d0i3_delayed_entry);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
 		 boot_params->d0i3_residency_time_us);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
@@ -510,6 +550,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
 	boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
 	boot_params->dvfs_mode = vdev->fw->dvfs_mode;
+	if (!IVPU_WA(disable_d0i3_msg))
+		boot_params->d0i3_delayed_entry = 1;
 	boot_params->d0i3_residency_time_us = 0;
 	boot_params->d0i3_entry_vpu_ts = 0;
 
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index fd4809b56168f..b7694b1cbc02f 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -14,6 +14,7 @@ struct ivpu_hw_ops {
 	int (*boot_fw)(struct ivpu_device *vdev);
 	int (*power_down)(struct ivpu_device *vdev);
 	bool (*is_idle)(struct ivpu_device *vdev);
+	int (*wait_for_idle)(struct ivpu_device *vdev);
 	void (*wdt_disable)(struct ivpu_device *vdev);
 	void (*diagnose_failure)(struct ivpu_device *vdev);
 	u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
@@ -86,6 +87,11 @@ static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
 	return vdev->hw->ops->is_idle(vdev);
 };
 
+static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->wait_for_idle(vdev);
+};
+
 static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
 {
 	ivpu_dbg(vdev, PM, "HW power down\n");
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 1b47d77b4c6ee..1c8c5715095be 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -37,7 +37,7 @@
 #define TIMEOUT_US		     (150 * USEC_PER_MSEC)
 #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
 #define PLL_TIMEOUT_US		     (1500 * USEC_PER_MSEC)
-#define IDLE_TIMEOUT_US		     (500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US		     (5 * USEC_PER_MSEC)
 
 #define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
 			(REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
@@ -90,6 +90,7 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 	vdev->timeout.tdr = 2000;
 	vdev->timeout.reschedule_suspend = 10;
 	vdev->timeout.autosuspend = 10;
+	vdev->timeout.d0i3_entry_msg = 5;
 }
 
 static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
@@ -714,6 +715,11 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
 	       REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
 }
 
+static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev)
+{
+	return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
 static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
 {
 	vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
@@ -1001,6 +1007,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
 	.info_init = ivpu_hw_37xx_info_init,
 	.power_up = ivpu_hw_37xx_power_up,
 	.is_idle = ivpu_hw_37xx_is_idle,
+	.wait_for_idle = ivpu_hw_37xx_wait_for_idle,
 	.power_down = ivpu_hw_37xx_power_down,
 	.boot_fw = ivpu_hw_37xx_boot_fw,
 	.wdt_disable = ivpu_hw_37xx_wdt_disable,
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 0eb9c827f6dcf..6a9672f650d18 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -39,6 +39,7 @@
 #define TIMEOUT_US		     (150 * USEC_PER_MSEC)
 #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
 #define PLL_TIMEOUT_US		     (1500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US		     (5 * USEC_PER_MSEC)
 
 #define WEIGHTS_DEFAULT              0xf711f711u
 #define WEIGHTS_ATS_DEFAULT          0x0000f711u
@@ -140,18 +141,21 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.tdr = 2000000;
 		vdev->timeout.reschedule_suspend = 1000;
 		vdev->timeout.autosuspend = -1;
+		vdev->timeout.d0i3_entry_msg = 500;
 	} else if (ivpu_is_simics(vdev)) {
 		vdev->timeout.boot = 50;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 10000;
 		vdev->timeout.reschedule_suspend = 10;
 		vdev->timeout.autosuspend = -1;
+		vdev->timeout.d0i3_entry_msg = 100;
 	} else {
 		vdev->timeout.boot = 1000;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 2000;
 		vdev->timeout.reschedule_suspend = 10;
 		vdev->timeout.autosuspend = 10;
+		vdev->timeout.d0i3_entry_msg = 5;
 	}
 }
 
@@ -879,6 +883,11 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
 	       REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
 }
 
+static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev)
+{
+	return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
 static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
 {
 	vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
@@ -1168,6 +1177,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
 	.info_init = ivpu_hw_40xx_info_init,
 	.power_up = ivpu_hw_40xx_power_up,
 	.is_idle = ivpu_hw_40xx_is_idle,
+	.wait_for_idle = ivpu_hw_40xx_wait_for_idle,
 	.power_down = ivpu_hw_40xx_power_down,
 	.boot_fw = ivpu_hw_40xx_boot_fw,
 	.wdt_disable = ivpu_hw_40xx_wdt_disable,
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 35a689475c68d..8cea0dd731b91 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -4,6 +4,7 @@
  */
 
 #include "ivpu_drv.h"
+#include "ivpu_hw.h"
 #include "ivpu_ipc.h"
 #include "ivpu_jsm_msg.h"
 
@@ -260,3 +261,23 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
 	return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
 				     VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 }
+
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (IVPU_WA(disable_d0i3_msg))
+		return 0;
+
+	req.payload.pwr_d0i3_enter.send_response = 1;
+
+	ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
+					   &resp, VPU_IPC_CHAN_GEN_CMD,
+					   vdev->timeout.d0i3_entry_msg);
+	if (ret)
+		return ret;
+
+	return ivpu_hw_wait_for_idle(vdev);
+}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index 66979a948c7c6..ae75e5dbcc41d 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -22,4 +22,5 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati
 int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
 			      u64 trace_hw_component_mask);
 int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev);
 #endif
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 568e4295dc56b..2d05fb9e31970 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -15,6 +15,7 @@
 #include "ivpu_fw.h"
 #include "ivpu_ipc.h"
 #include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
 #include "ivpu_mmu.h"
 #include "ivpu_pm.h"
 
@@ -153,6 +154,8 @@ int ivpu_pm_suspend_cb(struct device *dev)
 		}
 	}
 
+	ivpu_jsm_pwr_d0i3_enter(vdev);
+
 	ivpu_suspend(vdev);
 	ivpu_pm_prepare_warm_boot(vdev);
 
@@ -188,6 +191,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 	struct ivpu_device *vdev = to_ivpu_device(drm);
+	bool hw_is_idle = true;
 	int ret;
 
 	ivpu_dbg(vdev, PM, "Runtime suspend..\n");
@@ -200,11 +204,16 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
 		return -EAGAIN;
 	}
 
+	if (!vdev->pm->suspend_reschedule_counter)
+		hw_is_idle = false;
+	else if (ivpu_jsm_pwr_d0i3_enter(vdev))
+		hw_is_idle = false;
+
 	ret = ivpu_suspend(vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
 
-	if (!vdev->pm->suspend_reschedule_counter) {
+	if (!hw_is_idle) {
 		ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
 		ivpu_pm_prepare_cold_boot(vdev);
 	} else {
-- 
GitLab


From 36f27350ff745bd228ab04d7845dfbffc177a889 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 27 Oct 2023 10:28:22 -0700
Subject: [PATCH 0068/2340] i915/perf: Fix NULL deref bugs with drm_dbg() calls

When i915 perf interface is not available dereferencing it will lead to
NULL dereferences.

As returning -ENOTSUPP is pretty clear return when perf interface is not
available.

Fixes: 2fec539112e8 ("i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call")
Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v6.0+
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027172822.2753059-1-harshit.m.mogalapalli@oracle.com
[tursulin: added stable tag]
---
 drivers/gpu/drm/i915/i915_perf.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7178f298b3e61..f92c2a464ebe7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4227,11 +4227,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 	u32 known_open_flags;
 	int ret;
 
-	if (!perf->i915) {
-		drm_dbg(&perf->i915->drm,
-			"i915 perf interface not available for this system\n");
+	if (!perf->i915)
 		return -ENOTSUPP;
-	}
 
 	known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
 			   I915_PERF_FLAG_FD_NONBLOCK |
@@ -4607,11 +4604,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 	struct i915_oa_reg *regs;
 	int err, id;
 
-	if (!perf->i915) {
-		drm_dbg(&perf->i915->drm,
-			"i915 perf interface not available for this system\n");
+	if (!perf->i915)
 		return -ENOTSUPP;
-	}
 
 	if (!perf->metrics_kobj) {
 		drm_dbg(&perf->i915->drm,
@@ -4773,11 +4767,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 	struct i915_oa_config *oa_config;
 	int ret;
 
-	if (!perf->i915) {
-		drm_dbg(&perf->i915->drm,
-			"i915 perf interface not available for this system\n");
+	if (!perf->i915)
 		return -ENOTSUPP;
-	}
 
 	if (i915_perf_stream_paranoid && !perfmon_capable()) {
 		drm_dbg(&perf->i915->drm,
-- 
GitLab


From 79d94360d50fcd487edcfe118a47a2881534923f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Mon, 23 Oct 2023 07:58:33 -0300
Subject: [PATCH 0069/2340] drm/v3d: wait for all jobs to finish before
 unregistering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, we are only warning the user if the BIN or RENDER jobs don't
finish before we unregister V3D. We must wait for all jobs to finish
before unregistering. Therefore, warn the user if TFU or CSD jobs
are not done by the time the driver is unregistered.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Maíra Canal <mairacanal@riseup.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023105927.101502-1-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_gem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 2e94ce788c714..afa7d170d1fff 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -1072,6 +1072,8 @@ v3d_gem_destroy(struct drm_device *dev)
 	 */
 	WARN_ON(v3d->bin_job);
 	WARN_ON(v3d->render_job);
+	WARN_ON(v3d->tfu_job);
+	WARN_ON(v3d->csd_job);
 
 	drm_mm_takedown(&v3d->mm);
 
-- 
GitLab


From 0db5649e9e5962cc25f813f9fca08588f97fe5b8 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 26 Oct 2023 13:13:32 +0300
Subject: [PATCH 0070/2340] drm/i915: fix Makefile sort and indent

Unify the line continuations and indents, and sort the build lists.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026101333.875406-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/Makefile | 169 ++++++++++++++++++----------------
 1 file changed, 89 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3b9dcb606fc1a..d1f53dbf95f29 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -47,33 +47,34 @@ subdir-ccflags-y += -I$(srctree)/$(src)
 # Please keep these build lists sorted!
 
 # core driver code
-i915-y += i915_driver.o \
-	  i915_drm_client.o \
-	  i915_config.o \
-	  i915_getparam.o \
-	  i915_ioctl.o \
-	  i915_irq.o \
-	  i915_mitigations.o \
-	  i915_module.o \
-	  i915_params.o \
-	  i915_pci.o \
-	  i915_scatterlist.o \
-	  i915_suspend.o \
-	  i915_switcheroo.o \
-	  i915_sysfs.o \
-	  i915_utils.o \
-	  intel_clock_gating.o \
-	  intel_device_info.o \
-	  intel_memory_region.o \
-	  intel_pcode.o \
-	  intel_region_ttm.o \
-	  intel_runtime_pm.o \
-	  intel_sbi.o \
-	  intel_step.o \
-	  intel_uncore.o \
-	  intel_wakeref.o \
-	  vlv_sideband.o \
-	  vlv_suspend.o
+i915-y += \
+	i915_config.o \
+	i915_driver.o \
+	i915_drm_client.o \
+	i915_getparam.o \
+	i915_ioctl.o \
+	i915_irq.o \
+	i915_mitigations.o \
+	i915_module.o \
+	i915_params.o \
+	i915_pci.o \
+	i915_scatterlist.o \
+	i915_suspend.o \
+	i915_switcheroo.o \
+	i915_sysfs.o \
+	i915_utils.o \
+	intel_clock_gating.o \
+	intel_device_info.o \
+	intel_memory_region.o \
+	intel_pcode.o \
+	intel_region_ttm.o \
+	intel_runtime_pm.o \
+	intel_sbi.o \
+	intel_step.o \
+	intel_uncore.o \
+	intel_wakeref.o \
+	vlv_sideband.o \
+	vlv_suspend.o
 
 # core peripheral code
 i915-y += \
@@ -90,14 +91,16 @@ i915-y += \
 	i915_syncmap.o \
 	i915_user_extensions.o
 
-i915-$(CONFIG_COMPAT)   += i915_ioc32.o
+i915-$(CONFIG_COMPAT) += \
+	i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += \
 	i915_debugfs.o \
 	i915_debugfs_params.o \
 	display/intel_display_debugfs.o \
 	display/intel_display_debugfs_params.o \
 	display/intel_pipe_crc.o
-i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
+i915-$(CONFIG_PERF_EVENTS) += \
+	i915_pmu.o
 
 # "Graphics Technology" (aka we talk to the gpu)
 gt-y += \
@@ -154,7 +157,8 @@ gt-y += \
 	gt/sysfs_engines.o
 
 # x86 intel-gtt module support
-gt-$(CONFIG_X86) += gt/intel_ggtt_gmch.o
+gt-$(CONFIG_X86) += \
+	gt/intel_ggtt_gmch.o
 # autogenerated null render state
 gt-y += \
 	gt/gen6_renderstate.o \
@@ -173,9 +177,9 @@ gem-y += \
 	gem/i915_gem_domain.o \
 	gem/i915_gem_execbuffer.o \
 	gem/i915_gem_internal.o \
-	gem/i915_gem_object.o \
 	gem/i915_gem_lmem.o \
 	gem/i915_gem_mman.o \
+	gem/i915_gem_object.o \
 	gem/i915_gem_pages.o \
 	gem/i915_gem_phys.o \
 	gem/i915_gem_pm.o \
@@ -192,57 +196,61 @@ gem-y += \
 	gem/i915_gem_wait.o \
 	gem/i915_gemfs.o
 i915-y += \
-	  $(gem-y) \
-	  i915_active.o \
-	  i915_cmd_parser.o \
-	  i915_deps.o \
-	  i915_gem_evict.o \
-	  i915_gem_gtt.o \
-	  i915_gem_ww.o \
-	  i915_gem.o \
-	  i915_query.o \
-	  i915_request.o \
-	  i915_scheduler.o \
-	  i915_trace_points.o \
-	  i915_ttm_buddy_manager.o \
-	  i915_vma.o \
-	  i915_vma_resource.o
+	$(gem-y) \
+	i915_active.o \
+	i915_cmd_parser.o \
+	i915_deps.o \
+	i915_gem.o \
+	i915_gem_evict.o \
+	i915_gem_gtt.o \
+	i915_gem_ww.o \
+	i915_query.o \
+	i915_request.o \
+	i915_scheduler.o \
+	i915_trace_points.o \
+	i915_ttm_buddy_manager.o \
+	i915_vma.o \
+	i915_vma_resource.o
 
 # general-purpose microcontroller (GuC) support
 i915-y += \
-	  gt/uc/intel_gsc_fw.o \
-	  gt/uc/intel_gsc_proxy.o \
-	  gt/uc/intel_gsc_uc.o \
-	  gt/uc/intel_gsc_uc_debugfs.o \
-	  gt/uc/intel_gsc_uc_heci_cmd_submit.o \
-	  gt/uc/intel_guc.o \
-	  gt/uc/intel_guc_ads.o \
-	  gt/uc/intel_guc_capture.o \
-	  gt/uc/intel_guc_ct.o \
-	  gt/uc/intel_guc_debugfs.o \
-	  gt/uc/intel_guc_fw.o \
-	  gt/uc/intel_guc_hwconfig.o \
-	  gt/uc/intel_guc_log.o \
-	  gt/uc/intel_guc_log_debugfs.o \
-	  gt/uc/intel_guc_rc.o \
-	  gt/uc/intel_guc_slpc.o \
-	  gt/uc/intel_guc_submission.o \
-	  gt/uc/intel_huc.o \
-	  gt/uc/intel_huc_debugfs.o \
-	  gt/uc/intel_huc_fw.o \
-	  gt/uc/intel_uc.o \
-	  gt/uc/intel_uc_debugfs.o \
-	  gt/uc/intel_uc_fw.o
+	gt/uc/intel_gsc_fw.o \
+	gt/uc/intel_gsc_proxy.o \
+	gt/uc/intel_gsc_uc.o \
+	gt/uc/intel_gsc_uc_debugfs.o \
+	gt/uc/intel_gsc_uc_heci_cmd_submit.o\
+	gt/uc/intel_guc.o \
+	gt/uc/intel_guc_ads.o \
+	gt/uc/intel_guc_capture.o \
+	gt/uc/intel_guc_ct.o \
+	gt/uc/intel_guc_debugfs.o \
+	gt/uc/intel_guc_fw.o \
+	gt/uc/intel_guc_hwconfig.o \
+	gt/uc/intel_guc_log.o \
+	gt/uc/intel_guc_log_debugfs.o \
+	gt/uc/intel_guc_rc.o \
+	gt/uc/intel_guc_slpc.o \
+	gt/uc/intel_guc_submission.o \
+	gt/uc/intel_huc.o \
+	gt/uc/intel_huc_debugfs.o \
+	gt/uc/intel_huc_fw.o \
+	gt/uc/intel_uc.o \
+	gt/uc/intel_uc_debugfs.o \
+	gt/uc/intel_uc_fw.o
 
 # graphics system controller (GSC) support
-i915-y += gt/intel_gsc.o
+i915-y += \
+	gt/intel_gsc.o
 
 # graphics hardware monitoring (HWMON) support
-i915-$(CONFIG_HWMON) += i915_hwmon.o
+i915-$(CONFIG_HWMON) += \
+	i915_hwmon.o
 
 # modesetting core code
 i915-y += \
 	display/hsw_ips.o \
+	display/i9xx_plane.o \
+	display/i9xx_wm.o \
 	display/intel_atomic.o \
 	display/intel_atomic_plane.o \
 	display/intel_audio.o \
@@ -289,8 +297,8 @@ i915-y += \
 	display/intel_load_detect.o \
 	display/intel_lpe_audio.o \
 	display/intel_modeset_lock.o \
-	display/intel_modeset_verify.o \
 	display/intel_modeset_setup.o \
+	display/intel_modeset_verify.o \
 	display/intel_overlay.o \
 	display/intel_pch_display.o \
 	display/intel_pch_refclk.o \
@@ -304,8 +312,6 @@ i915-y += \
 	display/intel_vblank.o \
 	display/intel_vga.o \
 	display/intel_wm.o \
-	display/i9xx_plane.o \
-	display/i9xx_wm.o \
 	display/skl_scaler.o \
 	display/skl_universal_plane.o \
 	display/skl_watermark.o
@@ -359,13 +365,14 @@ i915-y += \
 	display/vlv_dsi.o \
 	display/vlv_dsi_pll.o
 
-i915-y += i915_perf.o
+i915-y += \
+	i915_perf.o
 
 # Protected execution platform (PXP) support. Base support is required for HuC
 i915-y += \
 	pxp/intel_pxp.o \
-	pxp/intel_pxp_tee.o \
-	pxp/intel_pxp_huc.o
+	pxp/intel_pxp_huc.o \
+	pxp/intel_pxp_tee.o
 
 i915-$(CONFIG_DRM_I915_PXP) += \
 	pxp/intel_pxp_cmd.o \
@@ -376,11 +383,11 @@ i915-$(CONFIG_DRM_I915_PXP) += \
 	pxp/intel_pxp_session.o
 
 # Post-mortem debug and GPU hang state capture
-i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
+i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += \
+	i915_gpu_error.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	gem/selftests/i915_gem_client_blt.o \
 	gem/selftests/igt_gem_utils.o \
-	selftests/intel_scheduler_helpers.o \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
 	selftests/igt_atomic.o \
@@ -389,10 +396,12 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/igt_mmap.o \
 	selftests/igt_reset.o \
 	selftests/igt_spinner.o \
+	selftests/intel_scheduler_helpers.o \
 	selftests/librapl.o
 
 # virtual gpu code
-i915-y += i915_vgpu.o
+i915-y += \
+	i915_vgpu.o
 
 i915-$(CONFIG_DRM_I915_GVT) += \
 	intel_gvt.o \
-- 
GitLab


From 75658332bb1052867d31c67c93bfdbd86a5f7b2a Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 26 Oct 2023 13:13:33 +0300
Subject: [PATCH 0071/2340] drm/i915: move Makefile display debugfs files next
 to display

Keep the display build lists together.

v2: Rebase

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026101333.875406-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/Makefile | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d1f53dbf95f29..239da40a401f1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -95,10 +95,7 @@ i915-$(CONFIG_COMPAT) += \
 	i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += \
 	i915_debugfs.o \
-	i915_debugfs_params.o \
-	display/intel_display_debugfs.o \
-	display/intel_display_debugfs_params.o \
-	display/intel_pipe_crc.o
+	i915_debugfs_params.o
 i915-$(CONFIG_PERF_EVENTS) += \
 	i915_pmu.o
 
@@ -320,6 +317,10 @@ i915-$(CONFIG_ACPI) += \
 	display/intel_opregion.o
 i915-$(CONFIG_DRM_FBDEV_EMULATION) += \
 	display/intel_fbdev.o
+i915-$(CONFIG_DEBUG_FS) += \
+	display/intel_display_debugfs.o \
+	display/intel_display_debugfs_params.o \
+	display/intel_pipe_crc.o
 
 # modesetting output/encoder code
 i915-y += \
-- 
GitLab


From b2139fb5051554a7f297e4ad584ef1bc26c76d5d Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Fri, 20 Oct 2023 11:44:05 +0100
Subject: [PATCH 0072/2340] drm/panfrost: Remove incorrect IS_ERR() check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sg_page_iter_page() doesn't return an error code, so the IS_ERR() check
is wrong and the error path will never be executed. This also allows
simplifying the code to remove the local variable 'page'.

CC: Adrián Larumbe <adrian.larumbe@collabora.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/376713ff-9a4f-4ea3-b097-fb5efb685d95@moroto.mountain
Signed-off-by: Steven Price <steven.price@arm.com>
Reviewed-by: Adrián Larumbe <adrian.larumbe@collabora.com>
Tested-by: Adrián Larumbe <adrian.larumbe@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020104405.53992-1-steven.price@arm.com
---
 drivers/gpu/drm/panfrost/panfrost_dump.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c
index e7942ac449c68..47751302f1bc9 100644
--- a/drivers/gpu/drm/panfrost/panfrost_dump.c
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.c
@@ -220,16 +220,8 @@ void panfrost_core_dump(struct panfrost_job *job)
 
 		iter.hdr->bomap.data[0] = bomap - bomap_start;
 
-		for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
-			struct page *page = sg_page_iter_page(&page_iter);
-
-			if (!IS_ERR(page)) {
-				*bomap++ = page_to_phys(page);
-			} else {
-				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
-				*bomap++ = 0;
-			}
-		}
+		for_each_sgtable_page(bo->base.sgt, &page_iter, 0)
+			*bomap++ = page_to_phys(sg_page_iter_page(&page_iter));
 
 		iter.hdr->bomap.iova = mapping->mmnode.start << PAGE_SHIFT;
 
-- 
GitLab


From 874d6fe4a6962cc18bb0e62dfc23adbebd0abbe2 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 23 Oct 2023 18:02:54 +0300
Subject: [PATCH 0073/2340] drm/i915/pmu: add pmu_to_i915() helper

It's tedious to duplicate the container_of() everywhere. Add a helper.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023150256.438331-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/i915_pmu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f861863eb7c15..52e42d7f23ec7 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -31,6 +31,11 @@
 static cpumask_t i915_pmu_cpumask;
 static unsigned int i915_pmu_target_cpu = -1;
 
+static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu)
+{
+	return container_of(pmu, struct drm_i915_private, pmu);
+}
+
 static u8 engine_config_sample(u64 config)
 {
 	return config & I915_PMU_SAMPLE_MASK;
@@ -141,7 +146,7 @@ static u32 frequency_enabled_mask(void)
 
 static bool pmu_needs_timer(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	u32 enable;
 
 	/*
@@ -251,7 +256,7 @@ static u64 get_rc6(struct intel_gt *gt)
 
 static void init_rc6(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	struct intel_gt *gt;
 	unsigned int i;
 
@@ -982,7 +987,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
 static struct attribute **
 create_event_attributes(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	static const struct {
 		unsigned int counter;
 		const char *name;
-- 
GitLab


From cb476dd1b8b10a40f6ba6e230f0b408916365c1f Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 23 Oct 2023 18:02:55 +0300
Subject: [PATCH 0074/2340] drm/i915/pmu: add event_to_pmu() helper

It's tedious to duplicate the container_of() everywhere. Add a helper.

Also do the logical steps of first getting from struct perf_event to
struct i915_pmu, and then from struct i915_pmu to struct
drm_i915_private if needed, instead of perf_event->i915->pmu. Not all
places even need the i915 pointer.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023150256.438331-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/i915_pmu.c | 45 +++++++++++++++------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 52e42d7f23ec7..5c3c480886dbe 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -31,6 +31,11 @@
 static cpumask_t i915_pmu_cpumask;
 static unsigned int i915_pmu_target_cpu = -1;
 
+static struct i915_pmu *event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct i915_pmu, base);
+}
+
 static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu)
 {
 	return container_of(pmu, struct drm_i915_private, pmu);
@@ -510,8 +515,8 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 
 	drm_WARN_ON(&i915->drm, event->parent);
 
@@ -577,8 +582,8 @@ config_status(struct drm_i915_private *i915, u64 config)
 
 static int engine_event_init(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	struct intel_engine_cs *engine;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
@@ -591,9 +596,8 @@ static int engine_event_init(struct perf_event *event)
 
 static int i915_pmu_event_init(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	int ret;
 
 	if (pmu->closed)
@@ -633,9 +637,8 @@ static int i915_pmu_event_init(struct perf_event *event)
 
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	u64 val = 0;
 
 	if (is_engine_event(event)) {
@@ -691,10 +694,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_event_read(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
 	struct hw_perf_event *hwc = &event->hw;
-	struct i915_pmu *pmu = &i915->pmu;
 	u64 prev, new;
 
 	if (pmu->closed) {
@@ -712,10 +713,9 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_enable(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	const unsigned int bit = event_bit(event);
-	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
 	if (bit == -1)
@@ -776,10 +776,9 @@ update:
 
 static void i915_pmu_disable(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	const unsigned int bit = event_bit(event);
-	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
 	if (bit == -1)
@@ -823,9 +822,7 @@ static void i915_pmu_disable(struct perf_event *event)
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
 
 	if (pmu->closed)
 		return;
@@ -853,9 +850,7 @@ out:
 
 static int i915_pmu_event_add(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
 
 	if (pmu->closed)
 		return -ENODEV;
-- 
GitLab


From 76310edddf11a5716f324785e9caad01a90e128a Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 23 Oct 2023 18:02:56 +0300
Subject: [PATCH 0075/2340] drm/i915/pmu: rearrange hrtimer pointer chasing

Do the logical step of first getting from struct hrtimer to struct
i915_pmu, and then from struct i915_pmu to struct drm_i915_private,
instead of hrtimer->i915->pmu.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023150256.438331-3-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/i915_pmu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 5c3c480886dbe..878a27e1c8eff 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -478,9 +478,8 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
-	struct drm_i915_private *i915 =
-		container_of(hrtimer, struct drm_i915_private, pmu.timer);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	unsigned int period_ns;
 	struct intel_gt *gt;
 	unsigned int i;
-- 
GitLab


From 8aa519f17512da50a2d850b60472de656e2b210a Mon Sep 17 00:00:00 2001
From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Date: Wed, 25 Oct 2023 18:47:09 +0530
Subject: [PATCH 0076/2340] drm/i915/mtl: Add Wa_22016670082

Implemented workaround for XeLPM+
BSpec: 51762

Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231025131709.3368517-1-dnyaneshwar.bhadane@intel.com
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 192ac0e59afa1..6ae7a4de83b06 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1674,6 +1674,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	 */
 	wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
 
+	/* Wa_22016670082 */
+	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+
 	debug_dump_steering(gt);
 }
 
-- 
GitLab


From 9e372744c0f24d358967a9a2bbde69dee1491b76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 13 Oct 2023 17:02:14 +0300
Subject: [PATCH 0077/2340] drm/i915/bios: Clamp VBT HDMI level shift on BDW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apparently some BDW machines (eg. HP Pavilion 15-ab) shipped with
a VBT inherited from some earlier HSW model. On HSW the HDMI level
shift value could go up to 11, whereas on BDW the maximum value is
9.

The DDI code does clamp the bogus value, but it does so with
a WARN which we don't really want. To avoid that let's just sanitize
the bogus VBT HDMI level shift value ahead of time for all BDW machines.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9461
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013140214.1713-1-ville.syrjala@linux.intel.com
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
---
 drivers/gpu/drm/i915/display/intel_bios.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 69db1a3a1499e..719fb550342b5 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2473,6 +2473,27 @@ static void sanitize_device_type(struct intel_bios_encoder_data *devdata,
 	devdata->child.device_type |= DEVICE_TYPE_NOT_HDMI_OUTPUT;
 }
 
+static void sanitize_hdmi_level_shift(struct intel_bios_encoder_data *devdata,
+				      enum port port)
+{
+	struct drm_i915_private *i915 = devdata->i915;
+
+	if (!intel_bios_encoder_supports_dvi(devdata))
+		return;
+
+	/*
+	 * Some BDW machines (eg. HP Pavilion 15-ab) shipped
+	 * with a HSW VBT where the level shifter value goes
+	 * up to 11, whereas the BDW max is 9.
+	 */
+	if (IS_BROADWELL(i915) && devdata->child.hdmi_level_shifter_value > 9) {
+		drm_dbg_kms(&i915->drm, "Bogus port %c VBT HDMI level shift %d, adjusting to %d\n",
+			    port_name(port), devdata->child.hdmi_level_shifter_value, 9);
+
+		devdata->child.hdmi_level_shifter_value = 9;
+	}
+}
+
 static bool
 intel_bios_encoder_supports_crt(const struct intel_bios_encoder_data *devdata)
 {
@@ -2652,6 +2673,7 @@ static void parse_ddi_port(struct intel_bios_encoder_data *devdata)
 	}
 
 	sanitize_device_type(devdata, port);
+	sanitize_hdmi_level_shift(devdata, port);
 }
 
 static bool has_ddi_port_info(struct drm_i915_private *i915)
-- 
GitLab


From f215038f4133ea9d1b525e9bb812527fe002db2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 12 Oct 2023 15:35:19 +0300
Subject: [PATCH 0078/2340] drm/i915: Use named initializers for DPLL info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use named initializers when populating the DPLL info. This
is just more convenient and less error prone as we no longer
have to keep the initializers in a specific order.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012123522.26045-2-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 130 +++++++++---------
 1 file changed, 67 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 399653a20f987..b7997b096796e 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -631,9 +631,9 @@ static const struct intel_shared_dpll_funcs ibx_pch_dpll_funcs = {
 };
 
 static const struct dpll_info pch_plls[] = {
-	{ "PCH DPLL A", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_A, 0 },
-	{ "PCH DPLL B", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_B, 0 },
-	{ },
+	{ .name = "PCH DPLL A", .funcs = &ibx_pch_dpll_funcs, .id = DPLL_ID_PCH_PLL_A, },
+	{ .name = "PCH DPLL B", .funcs = &ibx_pch_dpll_funcs, .id = DPLL_ID_PCH_PLL_B, },
+	{}
 };
 
 static const struct intel_dpll_mgr pch_pll_mgr = {
@@ -1239,13 +1239,16 @@ static const struct intel_shared_dpll_funcs hsw_ddi_lcpll_funcs = {
 };
 
 static const struct dpll_info hsw_plls[] = {
-	{ "WRPLL 1",    &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL1,     0 },
-	{ "WRPLL 2",    &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL2,     0 },
-	{ "SPLL",       &hsw_ddi_spll_funcs,  DPLL_ID_SPLL,       0 },
-	{ "LCPLL 810",  &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_810,  INTEL_DPLL_ALWAYS_ON },
-	{ "LCPLL 1350", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_1350, INTEL_DPLL_ALWAYS_ON },
-	{ "LCPLL 2700", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_2700, INTEL_DPLL_ALWAYS_ON },
-	{ },
+	{ .name = "WRPLL 1", .funcs = &hsw_ddi_wrpll_funcs, .id = DPLL_ID_WRPLL1, },
+	{ .name = "WRPLL 2", .funcs = &hsw_ddi_wrpll_funcs, .id = DPLL_ID_WRPLL2, },
+	{ .name = "SPLL", .funcs = &hsw_ddi_spll_funcs, .id = DPLL_ID_SPLL, },
+	{ .name = "LCPLL 810", .funcs = &hsw_ddi_lcpll_funcs, .id = DPLL_ID_LCPLL_810,
+	  .flags = INTEL_DPLL_ALWAYS_ON, },
+	{ .name = "LCPLL 1350", .funcs = &hsw_ddi_lcpll_funcs, .id = DPLL_ID_LCPLL_1350,
+	  .flags = INTEL_DPLL_ALWAYS_ON, },
+	{ .name = "LCPLL 2700", .funcs = &hsw_ddi_lcpll_funcs, .id = DPLL_ID_LCPLL_2700,
+	  .flags = INTEL_DPLL_ALWAYS_ON, },
+	{}
 };
 
 static const struct intel_dpll_mgr hsw_pll_mgr = {
@@ -1921,11 +1924,12 @@ static const struct intel_shared_dpll_funcs skl_ddi_dpll0_funcs = {
 };
 
 static const struct dpll_info skl_plls[] = {
-	{ "DPLL 0", &skl_ddi_dpll0_funcs, DPLL_ID_SKL_DPLL0, INTEL_DPLL_ALWAYS_ON },
-	{ "DPLL 1", &skl_ddi_pll_funcs,   DPLL_ID_SKL_DPLL1, 0 },
-	{ "DPLL 2", &skl_ddi_pll_funcs,   DPLL_ID_SKL_DPLL2, 0 },
-	{ "DPLL 3", &skl_ddi_pll_funcs,   DPLL_ID_SKL_DPLL3, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &skl_ddi_dpll0_funcs, .id = DPLL_ID_SKL_DPLL0,
+	  .flags = INTEL_DPLL_ALWAYS_ON, },
+	{ .name = "DPLL 1", .funcs = &skl_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL1, },
+	{ .name = "DPLL 2", .funcs = &skl_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL2, },
+	{ .name = "DPLL 3", .funcs = &skl_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL3, },
+	{}
 };
 
 static const struct intel_dpll_mgr skl_pll_mgr = {
@@ -2376,10 +2380,10 @@ static const struct intel_shared_dpll_funcs bxt_ddi_pll_funcs = {
 };
 
 static const struct dpll_info bxt_plls[] = {
-	{ "PORT PLL A", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 },
-	{ "PORT PLL B", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 },
-	{ "PORT PLL C", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 },
-	{ },
+	{ .name = "PORT PLL A", .funcs = &bxt_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL0, },
+	{ .name = "PORT PLL B", .funcs = &bxt_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL1, },
+	{ .name = "PORT PLL C", .funcs = &bxt_ddi_pll_funcs, .id = DPLL_ID_SKL_DPLL2, },
+	{}
 };
 
 static const struct intel_dpll_mgr bxt_pll_mgr = {
@@ -4014,14 +4018,14 @@ static const struct intel_shared_dpll_funcs mg_pll_funcs = {
 };
 
 static const struct dpll_info icl_plls[] = {
-	{ "DPLL 0",   &combo_pll_funcs, DPLL_ID_ICL_DPLL0,  0 },
-	{ "DPLL 1",   &combo_pll_funcs, DPLL_ID_ICL_DPLL1,  0 },
-	{ "TBT PLL",  &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 },
-	{ "MG PLL 1", &mg_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 },
-	{ "MG PLL 2", &mg_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 },
-	{ "MG PLL 3", &mg_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 },
-	{ "MG PLL 4", &mg_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
+	{ .name = "TBT PLL", .funcs = &tbt_pll_funcs, .id = DPLL_ID_ICL_TBTPLL, },
+	{ .name = "MG PLL 1", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL1, },
+	{ .name = "MG PLL 2", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL2, },
+	{ .name = "MG PLL 3", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL3, },
+	{ .name = "MG PLL 4", .funcs = &mg_pll_funcs, .id = DPLL_ID_ICL_MGPLL4, },
+	{}
 };
 
 static const struct intel_dpll_mgr icl_pll_mgr = {
@@ -4035,10 +4039,10 @@ static const struct intel_dpll_mgr icl_pll_mgr = {
 };
 
 static const struct dpll_info ehl_plls[] = {
-	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
-	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
-	{ "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
+	{ .name = "DPLL 4", .funcs = &combo_pll_funcs, .id = DPLL_ID_EHL_DPLL4, },
+	{}
 };
 
 static const struct intel_dpll_mgr ehl_pll_mgr = {
@@ -4058,16 +4062,16 @@ static const struct intel_shared_dpll_funcs dkl_pll_funcs = {
 };
 
 static const struct dpll_info tgl_plls[] = {
-	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0,  0 },
-	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1,  0 },
-	{ "TBT PLL",  &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 },
-	{ "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 },
-	{ "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 },
-	{ "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 },
-	{ "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 },
-	{ "TC PLL 5", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL5, 0 },
-	{ "TC PLL 6", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL6, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
+	{ .name = "TBT PLL", .funcs = &tbt_pll_funcs, .id = DPLL_ID_ICL_TBTPLL, },
+	{ .name = "TC PLL 1", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL1, },
+	{ .name = "TC PLL 2", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL2, },
+	{ .name = "TC PLL 3", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL3, },
+	{ .name = "TC PLL 4", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL4, },
+	{ .name = "TC PLL 5", .funcs = &dkl_pll_funcs, .id = DPLL_ID_TGL_MGPLL5, },
+	{ .name = "TC PLL 6", .funcs = &dkl_pll_funcs, .id = DPLL_ID_TGL_MGPLL6, },
+	{}
 };
 
 static const struct intel_dpll_mgr tgl_pll_mgr = {
@@ -4081,10 +4085,10 @@ static const struct intel_dpll_mgr tgl_pll_mgr = {
 };
 
 static const struct dpll_info rkl_plls[] = {
-	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
-	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
-	{ "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
+	{ .name = "DPLL 4", .funcs = &combo_pll_funcs, .id = DPLL_ID_EHL_DPLL4, },
+	{}
 };
 
 static const struct intel_dpll_mgr rkl_pll_mgr = {
@@ -4097,11 +4101,11 @@ static const struct intel_dpll_mgr rkl_pll_mgr = {
 };
 
 static const struct dpll_info dg1_plls[] = {
-	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_DG1_DPLL0, 0 },
-	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_DG1_DPLL1, 0 },
-	{ "DPLL 2", &combo_pll_funcs, DPLL_ID_DG1_DPLL2, 0 },
-	{ "DPLL 3", &combo_pll_funcs, DPLL_ID_DG1_DPLL3, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL1, },
+	{ .name = "DPLL 2", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL2, },
+	{ .name = "DPLL 3", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL3, },
+	{}
 };
 
 static const struct intel_dpll_mgr dg1_pll_mgr = {
@@ -4114,11 +4118,11 @@ static const struct intel_dpll_mgr dg1_pll_mgr = {
 };
 
 static const struct dpll_info adls_plls[] = {
-	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
-	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
-	{ "DPLL 2", &combo_pll_funcs, DPLL_ID_DG1_DPLL2, 0 },
-	{ "DPLL 3", &combo_pll_funcs, DPLL_ID_DG1_DPLL3, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
+	{ .name = "DPLL 2", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL2, },
+	{ .name = "DPLL 3", .funcs = &combo_pll_funcs, .id = DPLL_ID_DG1_DPLL3, },
+	{}
 };
 
 static const struct intel_dpll_mgr adls_pll_mgr = {
@@ -4131,14 +4135,14 @@ static const struct intel_dpll_mgr adls_pll_mgr = {
 };
 
 static const struct dpll_info adlp_plls[] = {
-	{ "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0,  0 },
-	{ "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1,  0 },
-	{ "TBT PLL",  &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 },
-	{ "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 },
-	{ "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 },
-	{ "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 },
-	{ "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 },
-	{ },
+	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
+	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
+	{ .name = "TBT PLL", .funcs = &tbt_pll_funcs, .id = DPLL_ID_ICL_TBTPLL, },
+	{ .name = "TC PLL 1", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL1, },
+	{ .name = "TC PLL 2", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL2, },
+	{ .name = "TC PLL 3", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL3, },
+	{ .name = "TC PLL 4", .funcs = &dkl_pll_funcs, .id = DPLL_ID_ICL_MGPLL4, },
+	{}
 };
 
 static const struct intel_dpll_mgr adlp_pll_mgr = {
-- 
GitLab


From 7e72cd6cafb166b815b7997597c09a01412da064 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 12 Oct 2023 15:35:20 +0300
Subject: [PATCH 0079/2340] drm/i915: Abstract the extra JSL/EHL DPLL4 power
 domain better
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just include the JSL/EHL DPLL4 extra power domain in the dpll_info
struct. This way the same approach could be used by other platforms
as well (should the need arise), and we don't have to sprinkle
platform checks all over the place.

Note that I'm perhaps slightly abusing things here as
power_domain==0 (which is actually POWER_DOMAIN_DISPLAY_CORE) now
indicates that no extra power domain is needed. I suppose using
POWER_DOMAIN_INVALID would be more correct, but then we'd have to
sprinkle that to all the other DPLLs.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012123522.26045-3-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 30 +++++--------------
 drivers/gpu/drm/i915/display/intel_dpll_mgr.h |  6 ++++
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index b7997b096796e..4e524cb8ed83b 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -3838,17 +3838,8 @@ static void combo_pll_enable(struct drm_i915_private *i915,
 {
 	i915_reg_t enable_reg = intel_combo_pll_enable_reg(i915, pll);
 
-	if ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
-	    pll->info->id == DPLL_ID_EHL_DPLL4) {
-
-		/*
-		 * We need to disable DC states when this DPLL is enabled.
-		 * This can be done by taking a reference on DPLL4 power
-		 * domain.
-		 */
-		pll->wakeref = intel_display_power_get(i915,
-						       POWER_DOMAIN_DC_OFF);
-	}
+	if (pll->info->power_domain)
+		pll->wakeref = intel_display_power_get(i915, pll->info->power_domain);
 
 	icl_pll_power_enable(i915, pll, enable_reg);
 
@@ -3946,10 +3937,8 @@ static void combo_pll_disable(struct drm_i915_private *i915,
 
 	icl_pll_disable(i915, pll, enable_reg);
 
-	if ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
-	    pll->info->id == DPLL_ID_EHL_DPLL4)
-		intel_display_power_put(i915, POWER_DOMAIN_DC_OFF,
-					pll->wakeref);
+	if (pll->info->power_domain)
+		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
 }
 
 static void tbt_pll_disable(struct drm_i915_private *i915,
@@ -4041,7 +4030,8 @@ static const struct intel_dpll_mgr icl_pll_mgr = {
 static const struct dpll_info ehl_plls[] = {
 	{ .name = "DPLL 0", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL0, },
 	{ .name = "DPLL 1", .funcs = &combo_pll_funcs, .id = DPLL_ID_ICL_DPLL1, },
-	{ .name = "DPLL 4", .funcs = &combo_pll_funcs, .id = DPLL_ID_EHL_DPLL4, },
+	{ .name = "DPLL 4", .funcs = &combo_pll_funcs, .id = DPLL_ID_EHL_DPLL4,
+	  .power_domain = POWER_DOMAIN_DC_OFF, },
 	{}
 };
 
@@ -4369,12 +4359,8 @@ static void readout_dpll_hw_state(struct drm_i915_private *i915,
 
 	pll->on = intel_dpll_get_hw_state(i915, pll, &pll->state.hw_state);
 
-	if ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
-	    pll->on &&
-	    pll->info->id == DPLL_ID_EHL_DPLL4) {
-		pll->wakeref = intel_display_power_get(i915,
-						       POWER_DOMAIN_DC_OFF);
-	}
+	if (pll->on && pll->info->power_domain)
+		pll->wakeref = intel_display_power_get(i915, pll->info->power_domain);
 
 	pll->state.pipe_mask = 0;
 	for_each_intel_crtc(&i915->drm, crtc) {
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
index dd4796a61751f..2e7ea0d8d3ffb 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
@@ -27,6 +27,7 @@
 
 #include <linux/types.h>
 
+#include "intel_display_power.h"
 #include "intel_wakeref.h"
 
 #define for_each_shared_dpll(__i915, __pll, __i) \
@@ -270,6 +271,11 @@ struct dpll_info {
 	 */
 	enum intel_dpll_id id;
 
+	/**
+	 * @power_domain: extra power domain required by the DPLL
+	 */
+	enum intel_display_power_domain power_domain;
+
 #define INTEL_DPLL_ALWAYS_ON	(1 << 0)
 	/**
 	 * @flags:
-- 
GitLab


From 3e7e07c4cf638b281f420be77afef7d93481a212 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 12 Oct 2023 15:35:21 +0300
Subject: [PATCH 0080/2340] drm/i915: Move the DPLL extra power domain handling
 up one level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The extra DPLL power domain is currently handled in three places:
- combo_pll_enable()
- combo_pll_disable()
- readout_dpll_hw_state()

First two of those are low level PLL funcs, but the third is a higher
level thing. So the current situation is rather inconsistent. Unify
this by moving the PLL enable/disable up one level. This also means
the extra power domain could be trivially be used by other platforms
as well.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012123522.26045-4-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 4e524cb8ed83b..556b10eefe66b 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -258,6 +258,10 @@ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state)
 	drm_WARN_ON(&i915->drm, pll->on);
 
 	drm_dbg_kms(&i915->drm, "enabling %s\n", pll->info->name);
+
+	if (pll->info->power_domain)
+		pll->wakeref = intel_display_power_get(i915, pll->info->power_domain);
+
 	pll->info->funcs->enable(i915, pll);
 	pll->on = true;
 
@@ -307,6 +311,9 @@ void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state)
 	pll->info->funcs->disable(i915, pll);
 	pll->on = false;
 
+	if (pll->info->power_domain)
+		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
+
 out:
 	mutex_unlock(&i915->display.dpll.lock);
 }
@@ -3838,9 +3845,6 @@ static void combo_pll_enable(struct drm_i915_private *i915,
 {
 	i915_reg_t enable_reg = intel_combo_pll_enable_reg(i915, pll);
 
-	if (pll->info->power_domain)
-		pll->wakeref = intel_display_power_get(i915, pll->info->power_domain);
-
 	icl_pll_power_enable(i915, pll, enable_reg);
 
 	icl_dpll_write(i915, pll);
@@ -3936,9 +3940,6 @@ static void combo_pll_disable(struct drm_i915_private *i915,
 	i915_reg_t enable_reg = intel_combo_pll_enable_reg(i915, pll);
 
 	icl_pll_disable(i915, pll, enable_reg);
-
-	if (pll->info->power_domain)
-		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
 }
 
 static void tbt_pll_disable(struct drm_i915_private *i915,
@@ -4409,6 +4410,9 @@ static void sanitize_dpll_state(struct drm_i915_private *i915,
 
 	pll->info->funcs->disable(i915, pll);
 	pll->on = false;
+
+	if (pll->info->power_domain)
+		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
 }
 
 void intel_dpll_sanitize_state(struct drm_i915_private *i915)
-- 
GitLab


From 7880d41c55f1e177a88c275d2e3ccec4debfcb51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 12 Oct 2023 15:35:22 +0300
Subject: [PATCH 0081/2340] drm/i915: Extract
 _intel_{enable,disable}_shared_dpll()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have a bit of duplicated code around the DPLL disabling. Extract
that to a new function, and for symmetry also do the same for the
enable direction.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012123522.26045-5-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 37 ++++++++++++-------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 556b10eefe66b..7958d0bd851e1 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -219,6 +219,26 @@ intel_tc_pll_enable_reg(struct drm_i915_private *i915,
 	return MG_PLL_ENABLE(tc_port);
 }
 
+static void _intel_enable_shared_dpll(struct drm_i915_private *i915,
+				      struct intel_shared_dpll *pll)
+{
+	if (pll->info->power_domain)
+		pll->wakeref = intel_display_power_get(i915, pll->info->power_domain);
+
+	pll->info->funcs->enable(i915, pll);
+	pll->on = true;
+}
+
+static void _intel_disable_shared_dpll(struct drm_i915_private *i915,
+				       struct intel_shared_dpll *pll)
+{
+	pll->info->funcs->disable(i915, pll);
+	pll->on = false;
+
+	if (pll->info->power_domain)
+		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
+}
+
 /**
  * intel_enable_shared_dpll - enable a CRTC's shared DPLL
  * @crtc_state: CRTC, and its state, which has a shared DPLL
@@ -259,11 +279,7 @@ void intel_enable_shared_dpll(const struct intel_crtc_state *crtc_state)
 
 	drm_dbg_kms(&i915->drm, "enabling %s\n", pll->info->name);
 
-	if (pll->info->power_domain)
-		pll->wakeref = intel_display_power_get(i915, pll->info->power_domain);
-
-	pll->info->funcs->enable(i915, pll);
-	pll->on = true;
+	_intel_enable_shared_dpll(i915, pll);
 
 out:
 	mutex_unlock(&i915->display.dpll.lock);
@@ -308,11 +324,8 @@ void intel_disable_shared_dpll(const struct intel_crtc_state *crtc_state)
 		goto out;
 
 	drm_dbg_kms(&i915->drm, "disabling %s\n", pll->info->name);
-	pll->info->funcs->disable(i915, pll);
-	pll->on = false;
 
-	if (pll->info->power_domain)
-		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
+	_intel_disable_shared_dpll(i915, pll);
 
 out:
 	mutex_unlock(&i915->display.dpll.lock);
@@ -4408,11 +4421,7 @@ static void sanitize_dpll_state(struct drm_i915_private *i915,
 		    "%s enabled but not in use, disabling\n",
 		    pll->info->name);
 
-	pll->info->funcs->disable(i915, pll);
-	pll->on = false;
-
-	if (pll->info->power_domain)
-		intel_display_power_put(i915, pll->info->power_domain, pll->wakeref);
+	_intel_disable_shared_dpll(i915, pll);
 }
 
 void intel_dpll_sanitize_state(struct drm_i915_private *i915)
-- 
GitLab


From b0462e94c964145c1962876f18e99f82fb4e6e9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 12 Oct 2023 15:40:33 +0300
Subject: [PATCH 0082/2340] drm/i915: Move the g45 PEG band gap HPD workaround
 to the HPD code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are asked to reprogram PEG_BAND_GAP_DATA prior to enabling
hotplug detection on the g45 HDMI/DP ports. Currently we do said
reprogamming from the DP/HDMI connector initialization functions.
That code should be mostly platform agnostic so clearly not the
best place for this. Move the workaround to the place where we
actually enable HPD detection.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012124033.26983-1-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c          | 10 ----------
 drivers/gpu/drm/i915/display/intel_hdmi.c        | 10 ----------
 drivers/gpu/drm/i915/display/intel_hotplug_irq.c | 16 ++++++++++++++++
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index ab4608dafe5d9..ea97db7d9abf7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -6231,16 +6231,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port,
 				    "HDCP init failed, skipping.\n");
 	}
 
-	/* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written
-	 * 0xd.  Failure to do so will result in spurious interrupts being
-	 * generated on the port when a cable is not attached.
-	 */
-	if (IS_G45(dev_priv)) {
-		u32 temp = intel_de_read(dev_priv, PEG_BAND_GAP_DATA);
-		intel_de_write(dev_priv, PEG_BAND_GAP_DATA,
-			       (temp & ~0xf) | 0xd);
-	}
-
 	intel_dp->frl.is_trained = false;
 	intel_dp->frl.trained_rate_gbps = 0;
 
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index ac315f8e78201..ab18cfc19c0a3 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -3030,16 +3030,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port,
 				    "HDCP init failed, skipping.\n");
 	}
 
-	/* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written
-	 * 0xd.  Failure to do so will result in spurious interrupts being
-	 * generated on the port when a cable is not attached.
-	 */
-	if (IS_G45(dev_priv)) {
-		u32 temp = intel_de_read(dev_priv, PEG_BAND_GAP_DATA);
-		intel_de_write(dev_priv, PEG_BAND_GAP_DATA,
-		               (temp & ~0xf) | 0xd);
-	}
-
 	cec_fill_conn_info_from_drm(&conn_info, connector);
 
 	intel_hdmi->cec_notifier =
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c
index f07047e9cb301..04f62f27ad74b 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c
@@ -1361,11 +1361,24 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	bxt_hpd_detection_setup(dev_priv);
 }
 
+static void g45_hpd_peg_band_gap_wa(struct drm_i915_private *i915)
+{
+	/*
+	 * For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written
+	 * 0xd.  Failure to do so will result in spurious interrupts being
+	 * generated on the port when a cable is not attached.
+	 */
+	intel_de_rmw(i915, PEG_BAND_GAP_DATA, 0xf, 0xd);
+}
+
 static void i915_hpd_enable_detection(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	u32 hotplug_en = hpd_mask_i915[encoder->hpd_pin];
 
+	if (IS_G45(i915))
+		g45_hpd_peg_band_gap_wa(i915);
+
 	/* HPD sense and interrupt enable are one and the same */
 	i915_hotplug_interrupt_update(i915, hotplug_en, hotplug_en);
 }
@@ -1389,6 +1402,9 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv)
 		hotplug_en |= CRT_HOTPLUG_ACTIVATION_PERIOD_64;
 	hotplug_en |= CRT_HOTPLUG_VOLTAGE_COMPARE_50;
 
+	if (IS_G45(dev_priv))
+		g45_hpd_peg_band_gap_wa(dev_priv);
+
 	/* Ignore TV since it's buggy */
 	i915_hotplug_interrupt_update_locked(dev_priv,
 					     HOTPLUG_INT_EN_MASK |
-- 
GitLab


From da36ce00997e10ed06c9fa66fbce546cad23815f Mon Sep 17 00:00:00 2001
From: Suraj Kandpal <suraj.kandpal@intel.com>
Date: Thu, 26 Oct 2023 17:41:38 +0530
Subject: [PATCH 0083/2340] drm/i915/hdcp: Rename HCDP 1.4 enablement function

Rename hdcp 1.4 enablement function from _intel_hdcp_enable to
intel_hdcp1_enable to better represent what version of hdcp is
being enabled

Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026121139.987437-2-suraj.kandpal@intel.com
---
 drivers/gpu/drm/i915/display/intel_hdcp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index c89da3568ebdb..7c0cfcb48521f 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -923,7 +923,7 @@ static int _intel_hdcp_disable(struct intel_connector *connector)
 	return 0;
 }
 
-static int _intel_hdcp_enable(struct intel_connector *connector)
+static int intel_hdcp1_enable(struct intel_connector *connector)
 {
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
@@ -1058,7 +1058,7 @@ static int intel_hdcp_check_link(struct intel_connector *connector)
 		goto out;
 	}
 
-	ret = _intel_hdcp_enable(connector);
+	ret = intel_hdcp1_enable(connector);
 	if (ret) {
 		drm_err(&i915->drm, "Failed to enable hdcp (%d)\n", ret);
 		intel_hdcp_update_value(connector,
@@ -2388,7 +2388,7 @@ int intel_hdcp_enable(struct intel_atomic_state *state,
 	 */
 	if (ret && intel_hdcp_capable(connector) &&
 	    hdcp->content_type != DRM_MODE_HDCP_CONTENT_TYPE1) {
-		ret = _intel_hdcp_enable(connector);
+		ret = intel_hdcp1_enable(connector);
 	}
 
 	if (!ret) {
-- 
GitLab


From 4f60f06a41f441cd5a8570c61701ba40796fa52c Mon Sep 17 00:00:00 2001
From: Suraj Kandpal <suraj.kandpal@intel.com>
Date: Thu, 26 Oct 2023 17:41:39 +0530
Subject: [PATCH 0084/2340] drm/i915/hdcp: Convert intel_hdcp_enable to a
 blanket function

Let's convert intel_hdcp_enable to a blanket function
which just has some conditions which needs to be checked
before connectors enable hdcp.
This cleans up code and avoids code duplication.

--v3
-Keep function name as intel_hdcp_enable() [Jani]

Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026121139.987437-3-suraj.kandpal@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c    |  5 +----
 drivers/gpu/drm/i915/display/intel_dp_mst.c |  5 +----
 drivers/gpu/drm/i915/display/intel_hdcp.c   | 21 ++++++++++++++++-----
 drivers/gpu/drm/i915/display/intel_hdcp.h   |  8 ++++----
 4 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c4dc1f71da4bc..2e37708fecdd0 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3259,10 +3259,7 @@ static void intel_enable_ddi(struct intel_atomic_state *state,
 	else
 		intel_enable_ddi_dp(state, encoder, crtc_state, conn_state);
 
-	/* Enable hdcp if it's desired */
-	if (conn_state->content_protection ==
-	    DRM_MODE_CONTENT_PROTECTION_DESIRED)
-		intel_hdcp_enable(state, encoder, crtc_state, conn_state);
+	intel_hdcp_enable(state, encoder, crtc_state, conn_state);
 }
 
 static void intel_disable_ddi_dp(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 7b4628f4f1240..4366da79fe817 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -836,10 +836,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 
 	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 
-	/* Enable hdcp if it's desired */
-	if (conn_state->content_protection ==
-	    DRM_MODE_CONTENT_PROTECTION_DESIRED)
-		intel_hdcp_enable(state, encoder, pipe_config, conn_state);
+	intel_hdcp_enable(state, encoder, pipe_config, conn_state);
 }
 
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 7c0cfcb48521f..44c0a93f3af82 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -2324,10 +2324,10 @@ intel_hdcp_set_streams(struct intel_digital_port *dig_port,
 	return 0;
 }
 
-int intel_hdcp_enable(struct intel_atomic_state *state,
-		      struct intel_encoder *encoder,
-		      const struct intel_crtc_state *pipe_config,
-		      const struct drm_connector_state *conn_state)
+static int _intel_hdcp_enable(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
+			      const struct intel_crtc_state *pipe_config,
+			      const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_connector *connector =
@@ -2404,6 +2404,17 @@ int intel_hdcp_enable(struct intel_atomic_state *state,
 	return ret;
 }
 
+void intel_hdcp_enable(struct intel_atomic_state *state,
+		       struct intel_encoder *encoder,
+		       const struct intel_crtc_state *crtc_state,
+		       const struct drm_connector_state *conn_state)
+{
+	/* Enable hdcp if it's desired */
+	if (conn_state->content_protection ==
+	    DRM_MODE_CONTENT_PROTECTION_DESIRED)
+		_intel_hdcp_enable(state, encoder, crtc_state, conn_state);
+}
+
 int intel_hdcp_disable(struct intel_connector *connector)
 {
 	struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
@@ -2491,7 +2502,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 	}
 
 	if (desired_and_not_enabled || content_protection_type_changed)
-		intel_hdcp_enable(state, encoder, crtc_state, conn_state);
+		_intel_hdcp_enable(state, encoder, crtc_state, conn_state);
 }
 
 void intel_hdcp_component_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h
index 5997c52a09589..a9c784fd9ba50 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.h
@@ -28,10 +28,10 @@ void intel_hdcp_atomic_check(struct drm_connector *connector,
 int intel_hdcp_init(struct intel_connector *connector,
 		    struct intel_digital_port *dig_port,
 		    const struct intel_hdcp_shim *hdcp_shim);
-int intel_hdcp_enable(struct intel_atomic_state *state,
-		      struct intel_encoder *encoder,
-		      const struct intel_crtc_state *pipe_config,
-		      const struct drm_connector_state *conn_state);
+void intel_hdcp_enable(struct intel_atomic_state *state,
+		       struct intel_encoder *encoder,
+		       const struct intel_crtc_state *pipe_config,
+		       const struct drm_connector_state *conn_state);
 int intel_hdcp_disable(struct intel_connector *connector);
 void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 			    struct intel_encoder *encoder,
-- 
GitLab


From 3b9bbd79627043a9fa9dd5b01bb29882663976e0 Mon Sep 17 00:00:00 2001
From: Suraj Kandpal <suraj.kandpal@intel.com>
Date: Thu, 26 Oct 2023 17:41:40 +0530
Subject: [PATCH 0085/2340] drm/i915/hdcp: Add more conditions to enable hdcp

When we dock a monitor we end up with a enable and disable connector
cycle but if hdcp content is running we get the userspace in
enabled state and driver maintaining a undesired state which causes
the content to stop playing and we only enable hdcp if the userspace
state in desired. This patch fixes that.

--v2
-Move code to intel_hdcp [Jani]

Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026121139.987437-4-suraj.kandpal@intel.com
---
 drivers/gpu/drm/i915/display/intel_hdcp.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 44c0a93f3af82..39b3f7c0c77c9 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -2409,9 +2409,19 @@ void intel_hdcp_enable(struct intel_atomic_state *state,
 		       const struct intel_crtc_state *crtc_state,
 		       const struct drm_connector_state *conn_state)
 {
-	/* Enable hdcp if it's desired */
+	struct intel_connector *connector =
+		to_intel_connector(conn_state->connector);
+	struct intel_hdcp *hdcp = &connector->hdcp;
+
+	/*
+	 * Enable hdcp if it's desired or if userspace is enabled and
+	 * driver set its state to undesired
+	 */
 	if (conn_state->content_protection ==
-	    DRM_MODE_CONTENT_PROTECTION_DESIRED)
+	    DRM_MODE_CONTENT_PROTECTION_DESIRED ||
+	    (conn_state->content_protection ==
+	    DRM_MODE_CONTENT_PROTECTION_ENABLED && hdcp->value ==
+	    DRM_MODE_CONTENT_PROTECTION_UNDESIRED))
 		_intel_hdcp_enable(state, encoder, crtc_state, conn_state);
 }
 
-- 
GitLab


From 949113d34fb82a5dc6f5dd3ad9168001b441792b Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Wed, 25 Oct 2023 12:28:26 +0200
Subject: [PATCH 0086/2340] drm/i915/mtl: Apply notify_guc to all GTs

Handle platforms with multiple GTs by iterate over all GTs.
Add a Fixes commit so this gets propagated for MTL support.

Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake")
Suggested-by: John Harrison <john.c.harrison@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231025102826.16955-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs_params.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c
index 614bde321589d..8bca02025e093 100644
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@@ -38,10 +38,13 @@ static int i915_param_int_open(struct inode *inode, struct file *file)
 
 static int notify_guc(struct drm_i915_private *i915)
 {
-	int ret = 0;
+	struct intel_gt *gt;
+	int i, ret = 0;
 
-	if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
-		ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc);
+	for_each_gt(gt, i915, i) {
+		if (intel_uc_uses_guc_submission(&gt->uc))
+			ret = intel_guc_global_policies_update(&gt->uc.guc);
+	}
 
 	return ret;
 }
-- 
GitLab


From 9bb66c179f50e61df20ba13c9b34ca17d00b05fb Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Thu, 26 Oct 2023 20:36:26 +0200
Subject: [PATCH 0087/2340] drm/i915: Reserve some kernel space per vm

Reserve one page in each vm for kernel space to use for things
such as workarounds.

v2: use real memory, do not decrease vm.total
v4: reserve only one page and explain flag
v5: remove allocated object on ppgtt cleanup
v6: decrease vm->total by reservation size

Suggested-by: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-1-4aa7d55d0a8a@intel.com
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 43 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++
 2 files changed, 47 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 9895e18df0435..fa46d2308b0ed 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -5,6 +5,7 @@
 
 #include <linux/log2.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
 
 #include "gen8_ppgtt.h"
@@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
+	if (vm->rsvd.obj)
+		i915_gem_object_put(vm->rsvd.obj);
+
 	if (intel_vgpu_active(vm->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
@@ -950,6 +954,41 @@ err_pd:
 	return ERR_PTR(err);
 }
 
+static int gen8_init_rsvd(struct i915_address_space *vm)
+{
+	struct drm_i915_private *i915 = vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
+
+	/* The memory will be used only by GPU. */
+	obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
+					  I915_BO_ALLOC_VOLATILE |
+					  I915_BO_ALLOC_GPU_ONLY);
+	if (IS_ERR(obj))
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto unref;
+	}
+
+	ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (ret)
+		goto unref;
+
+	vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
+	vm->rsvd.obj = obj;
+	vm->total -= vma->node.size;
+	return 0;
+unref:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1031,6 +1070,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	if (intel_vgpu_active(gt->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
+	err = gen8_init_rsvd(&ppgtt->vm);
+	if (err)
+		goto err_put;
+
 	return ppgtt;
 
 err_put:
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b471edac26992..028a5a988eea0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -249,6 +249,10 @@ struct i915_address_space {
 	struct work_struct release_work;
 
 	struct drm_mm mm;
+	struct {
+		struct drm_i915_gem_object *obj;
+		struct i915_vma *vma;
+	} rsvd;
 	struct intel_gt *gt;
 	struct drm_i915_private *i915;
 	struct device *dma;
-- 
GitLab


From 03fe4b87c6420fde29e3401f87fcdc271c960950 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Thu, 26 Oct 2023 20:36:27 +0200
Subject: [PATCH 0088/2340] drm/i915: Add WABB blit for Wa_16018031267 /
 Wa_16018063123

Apply WABB blit for Wa_16018031267 / Wa_16018063123.

v3: drop unused enum definition
v4: move selftest to separate patch, use wa only on BCS0.
v5: fixed selftest caller to context_wabb

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-2-4aa7d55d0a8a@intel.com
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h |   3 +
 drivers/gpu/drm/i915/gt/intel_gt.h          |   4 +
 drivers/gpu/drm/i915/gt/intel_lrc.c         | 100 +++++++++++++++++++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c      |   2 +-
 4 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index fdd4ddd3a978a..b8618ee3e3041 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -118,6 +118,9 @@
 #define   CCID_EXTENDED_STATE_RESTORE		BIT(2)
 #define   CCID_EXTENDED_STATE_SAVE		BIT(3)
 #define RING_BB_PER_CTX_PTR(base)		_MMIO((base) + 0x1c0) /* gen8+ */
+#define   PER_CTX_BB_FORCE			BIT(2)
+#define   PER_CTX_BB_VALID			BIT(0)
+
 #define RING_INDIRECT_CTX(base)			_MMIO((base) + 0x1c4) /* gen8+ */
 #define RING_INDIRECT_CTX_OFFSET(base)		_MMIO((base) + 0x1c8) /* gen8+ */
 #define ECOSKPD(base)				_MMIO((base) + 0x1d0)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 970bedf6b78a7..9ffdb05e231e2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,6 +82,10 @@ struct drm_printer;
 		  ##__VA_ARGS__);					\
 } while (0)
 
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+	IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
+	engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
 static inline bool gt_is_root(struct intel_gt *gt)
 {
 	return !gt->info.id;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eaf66d9031665..7c367ba8d9dcf 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -828,6 +828,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 	return 0;
 }
 
+static void
+lrc_setup_bb_per_ctx(u32 *regs,
+		     const struct intel_engine_cs *engine,
+		     u32 ctx_bb_ggtt_addr)
+{
+	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+	regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+		ctx_bb_ggtt_addr |
+		PER_CTX_BB_FORCE |
+		PER_CTX_BB_VALID;
+}
+
 static void
 lrc_setup_indirect_ctx(u32 *regs,
 		       const struct intel_engine_cs *engine,
@@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce)
 	return PAGE_SIZE * ce->wa_bb_page;
 }
 
-static u32 *context_indirect_bb(const struct intel_context *ce)
+/*
+ * per_ctx below determines which WABB section is used.
+ * When true, the function returns the location of the
+ * PER_CTX_BB.  When false, the function returns the
+ * location of the INDIRECT_CTX.
+ */
+static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
 {
 	void *ptr;
 
@@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce)
 	ptr = ce->lrc_reg_state;
 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
 	ptr += context_wa_bb_offset(ce);
+	ptr += per_ctx ? PAGE_SIZE : 0;
 
 	return ptr;
 }
@@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
 
 	if (GRAPHICS_VER(engine->i915) >= 12) {
 		ce->wa_bb_page = context_size / PAGE_SIZE;
-		context_size += PAGE_SIZE;
+		/* INDIRECT_CTX and PER_CTX_BB need separate pages. */
+		context_size += PAGE_SIZE * 2;
 	}
 
 	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
@@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
 	return gen12_emit_aux_table_inv(ce->engine, cs);
 }
 
+static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
+{
+	struct intel_gt *gt = ce->engine->gt;
+	int mocs = gt->mocs.uc_index << 1;
+
+	/**
+	 * Wa_16018031267 / Wa_16018063123 requires that SW forces the
+	 * main copy engine arbitration into round robin mode.  We
+	 * additionally need to submit the following WABB blt command
+	 * to produce 4 subblits with each subblit generating 0 byte
+	 * write requests as WABB:
+	 *
+	 * XY_FASTCOLOR_BLT
+	 *  BG0    -> 5100000E
+	 *  BG1    -> 0000003F (Dest pitch)
+	 *  BG2    -> 00000000 (X1, Y1) = (0, 0)
+	 *  BG3    -> 00040001 (X2, Y2) = (1, 4)
+	 *  BG4    -> scratch
+	 *  BG5    -> scratch
+	 *  BG6-12 -> 00000000
+	 *  BG13   -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
+	 *  BG14   -> 00000010 (Qpitch = 4)
+	 *  BG15   -> 00000000
+	 */
+	*cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
+	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
+	*cs++ = 0;
+	*cs++ = 4 << 16 | 1;
+	*cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+	*cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x20004004;
+	*cs++ = 0x10;
+	*cs++ = 0;
+
+	return cs;
+}
+
+static u32 *
+xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
+{
+	/* Wa_16018031267, Wa_16018063123 */
+	if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
+		cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
+
+	return cs;
+}
+
+static void
+setup_per_ctx_bb(const struct intel_context *ce,
+		 const struct intel_engine_cs *engine,
+		 u32 *(*emit)(const struct intel_context *, u32 *))
+{
+	/* Place PER_CTX_BB on next page after INDIRECT_CTX */
+	u32 * const start = context_wabb(ce, true);
+	u32 *cs;
+
+	cs = emit(ce, start);
+
+	/* PER_CTX_BB must manually terminate */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+	lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
+			     lrc_indirect_bb(ce) + PAGE_SIZE);
+}
+
 static void
 setup_indirect_ctx_bb(const struct intel_context *ce,
 		      const struct intel_engine_cs *engine,
 		      u32 *(*emit)(const struct intel_context *, u32 *))
 {
-	u32 * const start = context_indirect_bb(ce);
+	u32 * const start = context_wabb(ce, false);
 	u32 *cs;
 
 	cs = emit(ce, start);
@@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce,
 		/* Mutually exclusive wrt to global indirect bb */
 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
 		setup_indirect_ctx_bb(ce, engine, fn);
+		setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
 	}
 
 	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 5f826b6dcf5d6..823d38aa39346 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1596,7 +1596,7 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
 static void
 indirect_ctx_bb_setup(struct intel_context *ce)
 {
-	u32 *cs = context_indirect_bb(ce);
+	u32 *cs = context_wabb(ce, false);
 
 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
 
-- 
GitLab


From 3a32ef21ed5497f30f2bc99074014496748533d3 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Thu, 26 Oct 2023 20:36:28 +0200
Subject: [PATCH 0089/2340] drm/i915/gt: add selftest to exercise WABB

Test re-uses logic form indirect ctx BB selftest.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-3-4aa7d55d0a8a@intel.com
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 65 +++++++++++++++++++-------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 823d38aa39346..e17b8777d21dc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg)
 	return err;
 }
 
-static int indirect_ctx_submit_req(struct intel_context *ce)
+static int wabb_ctx_submit_req(struct intel_context *ce)
 {
 	struct i915_request *rq;
 	int err = 0;
@@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce)
 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
 
 static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+emit_wabb_ctx_canary(const struct intel_context *ce,
+		     u32 *cs, bool per_ctx)
 {
 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
 		MI_SRM_LRM_GLOBAL_GTT |
@@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
 	*cs++ = i915_mmio_reg_offset(RING_START(0));
 	*cs++ = i915_ggtt_offset(ce->state) +
 		context_wa_bb_offset(ce) +
-		CTX_BB_CANARY_OFFSET;
+		CTX_BB_CANARY_OFFSET +
+		(per_ctx ? PAGE_SIZE : 0);
 	*cs++ = 0;
 
 	return cs;
 }
 
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	return emit_wabb_ctx_canary(ce, cs, false);
+}
+
+static u32 *
+emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	return emit_wabb_ctx_canary(ce, cs, true);
+}
+
 static void
-indirect_ctx_bb_setup(struct intel_context *ce)
+wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
 {
-	u32 *cs = context_wabb(ce, false);
+	u32 *cs = context_wabb(ce, per_ctx);
 
 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
 
-	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+	if (per_ctx)
+		setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
+	else
+		setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
 }
 
-static bool check_ring_start(struct intel_context *ce)
+static bool check_ring_start(struct intel_context *ce, bool per_ctx)
 {
 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
-		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+		LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
+		(per_ctx ? PAGE_SIZE : 0);
 
 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
 		return true;
@@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce)
 	return false;
 }
 
-static int indirect_ctx_bb_check(struct intel_context *ce)
+static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
 {
 	int err;
 
-	err = indirect_ctx_submit_req(ce);
+	err = wabb_ctx_submit_req(ce);
 	if (err)
 		return err;
 
-	if (!check_ring_start(ce))
+	if (!check_ring_start(ce, per_ctx))
 		return -EINVAL;
 
 	return 0;
 }
 
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
 {
 	struct intel_context *a, *b;
 	int err;
@@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
 	 * As ring start is restored apriori of starting the indirect ctx bb and
 	 * as it will be different for each context, it fits to this purpose.
 	 */
-	indirect_ctx_bb_setup(a);
-	indirect_ctx_bb_setup(b);
+	wabb_ctx_setup(a, per_ctx);
+	wabb_ctx_setup(b, per_ctx);
 
-	err = indirect_ctx_bb_check(a);
+	err = wabb_ctx_check(a, per_ctx);
 	if (err)
 		goto unpin_b;
 
-	err = indirect_ctx_bb_check(b);
+	err = wabb_ctx_check(b, per_ctx);
 
 unpin_b:
 	intel_context_unpin(b);
@@ -1688,7 +1706,7 @@ put_a:
 	return err;
 }
 
-static int live_lrc_indirect_ctx_bb(void *arg)
+static int lrc_wabb_ctx(void *arg, bool per_ctx)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
@@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg)
 
 	for_each_engine(engine, gt, id) {
 		intel_engine_pm_get(engine);
-		err = __live_lrc_indirect_ctx_bb(engine);
+		err = __lrc_wabb_ctx(engine, per_ctx);
 		intel_engine_pm_put(engine);
 
 		if (igt_flush_test(gt->i915))
@@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg)
 	return err;
 }
 
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+	return lrc_wabb_ctx(arg, false);
+}
+
+static int live_lrc_per_ctx_bb(void *arg)
+{
+	return lrc_wabb_ctx(arg, true);
+}
+
 static void garbage_reset(struct intel_engine_cs *engine,
 			  struct i915_request *rq)
 {
@@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_garbage),
 		SUBTEST(live_pphwsp_runtime),
 		SUBTEST(live_lrc_indirect_ctx_bb),
+		SUBTEST(live_lrc_per_ctx_bb),
 	};
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
-- 
GitLab


From 2fb771f3b840ff59e593dad9b6289276ea545698 Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Thu, 26 Oct 2023 20:36:29 +0200
Subject: [PATCH 0090/2340] drm/i915: Set copy engine arbitration for
 Wa_16018031267 / Wa_16018063123

Set copy engine arbitration into round robin mode
for part of Wa_16018031267 / Wa_16018063123 mitigation.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-4-4aa7d55d0a8a@intel.com
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h | 3 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index b8618ee3e3041..c0c8c12edea10 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -124,6 +124,9 @@
 #define RING_INDIRECT_CTX(base)			_MMIO((base) + 0x1c4) /* gen8+ */
 #define RING_INDIRECT_CTX_OFFSET(base)		_MMIO((base) + 0x1c8) /* gen8+ */
 #define ECOSKPD(base)				_MMIO((base) + 0x1d0)
+#define   XEHP_BLITTER_SCHEDULING_MODE_MASK	REG_GENMASK(12, 11)
+#define   XEHP_BLITTER_ROUND_ROBIN_MODE		\
+		REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1)
 #define   ECO_CONSTANT_BUFFER_SR_DISABLE	REG_BIT(4)
 #define   ECO_GATING_CX_ONLY			REG_BIT(3)
 #define   GEN6_BLITTER_FBC_NOTIFY		REG_BIT(3)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6ae7a4de83b06..12859b8d2092a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2785,6 +2785,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
 			 1);
 	}
+	/* Wa_16018031267, Wa_16018063123 */
+	if (NEEDS_FASTCOLOR_BLT_WABB(engine))
+		wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
+				    XEHP_BLITTER_SCHEDULING_MODE_MASK,
+				    XEHP_BLITTER_ROUND_ROBIN_MODE);
 }
 
 static void
-- 
GitLab


From ca34d816558c3e4c3f8fe037b5a6b16c944693de Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Wed, 20 Sep 2023 15:57:16 +0300
Subject: [PATCH 0091/2340] Revert "drm/tidss: Annotate dma-fence critical
 section in commit path"

This reverts commit 4d56a4f08391857ba93465de489707b66adad114.

The DMA-fence annotations cause a lockdep warning (see below). As per
https://patchwork.freedesktop.org/patch/462170/ it sounds like the
annotations don't work correctly.

======================================================
WARNING: possible circular locking dependency detected
6.6.0-rc2+ #1 Not tainted
------------------------------------------------------
kmstest/733 is trying to acquire lock:
ffff8000819377f0 (fs_reclaim){+.+.}-{0:0}, at: __kmem_cache_alloc_node+0x58/0x2d4

but task is already holding lock:
ffff800081a06aa0 (dma_fence_map){++++}-{0:0}, at: tidss_atomic_commit_tail+0x20/0xc0 [tidss]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (dma_fence_map){++++}-{0:0}:
       __dma_fence_might_wait+0x5c/0xd0
       dma_resv_lockdep+0x1a4/0x32c
       do_one_initcall+0x84/0x2fc
       kernel_init_freeable+0x28c/0x4c4
       kernel_init+0x24/0x1dc
       ret_from_fork+0x10/0x20

-> #1 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}:
       fs_reclaim_acquire+0x70/0xe4
       __kmem_cache_alloc_node+0x58/0x2d4
       kmalloc_trace+0x38/0x78
       __kthread_create_worker+0x3c/0x150
       kthread_create_worker+0x64/0x8c
       workqueue_init+0x1e8/0x2f0
       kernel_init_freeable+0x11c/0x4c4
       kernel_init+0x24/0x1dc
       ret_from_fork+0x10/0x20

-> #0 (fs_reclaim){+.+.}-{0:0}:
       __lock_acquire+0x1370/0x20d8
       lock_acquire+0x1e8/0x308
       fs_reclaim_acquire+0xd0/0xe4
       __kmem_cache_alloc_node+0x58/0x2d4
       __kmalloc_node_track_caller+0x58/0xf0
       kmemdup+0x34/0x60
       regmap_bulk_write+0x64/0x2c0
       tc358768_bridge_pre_enable+0x8c/0x12d0 [tc358768]
       drm_atomic_bridge_call_pre_enable+0x68/0x80 [drm]
       drm_atomic_bridge_chain_pre_enable+0x50/0x158 [drm]
       drm_atomic_helper_commit_modeset_enables+0x164/0x264 [drm_kms_helper]
       tidss_atomic_commit_tail+0x58/0xc0 [tidss]
       commit_tail+0xa0/0x188 [drm_kms_helper]
       drm_atomic_helper_commit+0x1a8/0x1c0 [drm_kms_helper]
       drm_atomic_commit+0xa8/0xe0 [drm]
       drm_mode_atomic_ioctl+0x9ec/0xc80 [drm]
       drm_ioctl_kernel+0xc4/0x170 [drm]
       drm_ioctl+0x234/0x4b0 [drm]
       drm_compat_ioctl+0x110/0x12c [drm]
       __arm64_compat_sys_ioctl+0x128/0x150
       invoke_syscall+0x48/0x110
       el0_svc_common.constprop.0+0x40/0xe0
       do_el0_svc_compat+0x1c/0x38
       el0_svc_compat+0x48/0xb4
       el0t_32_sync_handler+0xb0/0x138
       el0t_32_sync+0x194/0x198

other info that might help us debug this:

Chain exists of:
  fs_reclaim --> mmu_notifier_invalidate_range_start --> dma_fence_map

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  rlock(dma_fence_map);
                               lock(mmu_notifier_invalidate_range_start);
                               lock(dma_fence_map);
  lock(fs_reclaim);

 *** DEADLOCK ***

3 locks held by kmstest/733:
 #0: ffff800082e5bba0 (crtc_ww_class_acquire){+.+.}-{0:0}, at: drm_mode_atomic_ioctl+0x118/0xc80 [drm]
 #1: ffff000004224c88 (crtc_ww_class_mutex){+.+.}-{3:3}, at: modeset_lock+0xdc/0x1a0 [drm]
 #2: ffff800081a06aa0 (dma_fence_map){++++}-{0:0}, at: tidss_atomic_commit_tail+0x20/0xc0 [tidss]

stack backtrace:
CPU: 0 PID: 733 Comm: kmstest Not tainted 6.6.0-rc2+ #1
Hardware name: Toradex Verdin AM62 on Verdin Development Board (DT)
Call trace:
 dump_backtrace+0x98/0x118
 show_stack+0x18/0x24
 dump_stack_lvl+0x60/0xac
 dump_stack+0x18/0x24
 print_circular_bug+0x288/0x368
 check_noncircular+0x168/0x17c
 __lock_acquire+0x1370/0x20d8
 lock_acquire+0x1e8/0x308
 fs_reclaim_acquire+0xd0/0xe4
 __kmem_cache_alloc_node+0x58/0x2d4
 __kmalloc_node_track_caller+0x58/0xf0
 kmemdup+0x34/0x60
 regmap_bulk_write+0x64/0x2c0
 tc358768_bridge_pre_enable+0x8c/0x12d0 [tc358768]
 drm_atomic_bridge_call_pre_enable+0x68/0x80 [drm]
 drm_atomic_bridge_chain_pre_enable+0x50/0x158 [drm]
 drm_atomic_helper_commit_modeset_enables+0x164/0x264 [drm_kms_helper]
 tidss_atomic_commit_tail+0x58/0xc0 [tidss]
 commit_tail+0xa0/0x188 [drm_kms_helper]
 drm_atomic_helper_commit+0x1a8/0x1c0 [drm_kms_helper]
 drm_atomic_commit+0xa8/0xe0 [drm]
 drm_mode_atomic_ioctl+0x9ec/0xc80 [drm]
 drm_ioctl_kernel+0xc4/0x170 [drm]
 drm_ioctl+0x234/0x4b0 [drm]
 drm_compat_ioctl+0x110/0x12c [drm]
 __arm64_compat_sys_ioctl+0x128/0x150
 invoke_syscall+0x48/0x110
 el0_svc_common.constprop.0+0x40/0xe0
 do_el0_svc_compat+0x1c/0x38
 el0_svc_compat+0x48/0xb4
 el0t_32_sync_handler+0xb0/0x138
 el0t_32_sync+0x194/0x198

Fixes: 4d56a4f08391 ("drm/tidss: Annotate dma-fence critical section in commit path")
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230920-dma-fence-annotation-revert-v1-1-7ebf6f7f5bf6@ideasonboard.com
---
 drivers/gpu/drm/tidss/tidss_kms.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c
index c979ad1af2366..d096d8d2bc8f8 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.c
+++ b/drivers/gpu/drm/tidss/tidss_kms.c
@@ -4,8 +4,6 @@
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  */
 
-#include <linux/dma-fence.h>
-
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
@@ -25,7 +23,6 @@ static void tidss_atomic_commit_tail(struct drm_atomic_state *old_state)
 {
 	struct drm_device *ddev = old_state->dev;
 	struct tidss_device *tidss = to_tidss(ddev);
-	bool fence_cookie = dma_fence_begin_signalling();
 
 	dev_dbg(ddev->dev, "%s\n", __func__);
 
@@ -36,7 +33,6 @@ static void tidss_atomic_commit_tail(struct drm_atomic_state *old_state)
 	drm_atomic_helper_commit_modeset_enables(ddev, old_state);
 
 	drm_atomic_helper_commit_hw_done(old_state);
-	dma_fence_end_signalling(fence_cookie);
 	drm_atomic_helper_wait_for_flip_done(ddev, old_state);
 
 	drm_atomic_helper_cleanup_planes(ddev, old_state);
-- 
GitLab


From 9d7c8c066916f231ca0ed4e4fce6c4b58ca3e451 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Wed, 20 Sep 2023 15:57:17 +0300
Subject: [PATCH 0092/2340] Revert "drm/omapdrm: Annotate dma-fence critical
 section in commit path"

This reverts commit 250aa22920cd5d956a5d3e9c6a43d671c2bae217.

The DMA-fence annotations cause a lockdep warning (see below). As per
https://patchwork.freedesktop.org/patch/462170/ it sounds like the
annotations don't work correctly.

======================================================
WARNING: possible circular locking dependency detected
6.5.0-rc2+ #2 Not tainted
------------------------------------------------------
kmstest/219 is trying to acquire lock:
c4705838 (&hdmi->lock){+.+.}-{3:3}, at: hdmi5_bridge_mode_set+0x1c/0x50

but task is already holding lock:
c11e1128 (dma_fence_map){++++}-{0:0}, at: omap_atomic_commit_tail+0x14/0xbc

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (dma_fence_map){++++}-{0:0}:
       __dma_fence_might_wait+0x48/0xb4
       dma_resv_lockdep+0x1b8/0x2bc
       do_one_initcall+0x68/0x3b0
       kernel_init_freeable+0x260/0x34c
       kernel_init+0x14/0x140
       ret_from_fork+0x14/0x28

-> #1 (fs_reclaim){+.+.}-{0:0}:
       fs_reclaim_acquire+0x70/0xa8
       __kmem_cache_alloc_node+0x3c/0x368
       kmalloc_trace+0x28/0x58
       _drm_do_get_edid+0x7c/0x35c
       hdmi5_bridge_get_edid+0xc8/0x1ac
       drm_bridge_connector_get_modes+0x64/0xc0
       drm_helper_probe_single_connector_modes+0x170/0x528
       drm_client_modeset_probe+0x208/0x1334
       __drm_fb_helper_initial_config_and_unlock+0x30/0x548
       omap_fbdev_client_hotplug+0x3c/0x6c
       drm_client_register+0x58/0x94
       pdev_probe+0x544/0x6b0
       platform_probe+0x58/0xbc
       really_probe+0xd8/0x3fc
       __driver_probe_device+0x94/0x1f4
       driver_probe_device+0x2c/0xc4
       __device_attach_driver+0xa4/0x11c
       bus_for_each_drv+0x84/0xdc
       __device_attach+0xac/0x20c
       bus_probe_device+0x8c/0x90
       device_add+0x588/0x7e0
       platform_device_add+0x110/0x24c
       platform_device_register_full+0x108/0x15c
       dss_bind+0x90/0xc0
       try_to_bring_up_aggregate_device+0x1e0/0x2c8
       __component_add+0xa4/0x174
       hdmi5_probe+0x1c8/0x270
       platform_probe+0x58/0xbc
       really_probe+0xd8/0x3fc
       __driver_probe_device+0x94/0x1f4
       driver_probe_device+0x2c/0xc4
       __device_attach_driver+0xa4/0x11c
       bus_for_each_drv+0x84/0xdc
       __device_attach+0xac/0x20c
       bus_probe_device+0x8c/0x90
       deferred_probe_work_func+0x8c/0xd8
       process_one_work+0x2ac/0x6e4
       worker_thread+0x30/0x4ec
       kthread+0x100/0x124
       ret_from_fork+0x14/0x28

-> #0 (&hdmi->lock){+.+.}-{3:3}:
       __lock_acquire+0x145c/0x29cc
       lock_acquire.part.0+0xb4/0x258
       __mutex_lock+0x90/0x950
       mutex_lock_nested+0x1c/0x24
       hdmi5_bridge_mode_set+0x1c/0x50
       drm_bridge_chain_mode_set+0x48/0x5c
       crtc_set_mode+0x188/0x1d0
       omap_atomic_commit_tail+0x2c/0xbc
       commit_tail+0x9c/0x188
       drm_atomic_helper_commit+0x158/0x18c
       drm_atomic_commit+0xa4/0xe8
       drm_mode_atomic_ioctl+0x9a4/0xc38
       drm_ioctl+0x210/0x4a8
       sys_ioctl+0x138/0xf00
       ret_fast_syscall+0x0/0x1c

other info that might help us debug this:

Chain exists of:
  &hdmi->lock --> fs_reclaim --> dma_fence_map

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  rlock(dma_fence_map);
                               lock(fs_reclaim);
                               lock(dma_fence_map);
  lock(&hdmi->lock);

 *** DEADLOCK ***

3 locks held by kmstest/219:
 #0: f1011de4 (crtc_ww_class_acquire){+.+.}-{0:0}, at: drm_mode_atomic_ioctl+0xf0/0xc38
 #1: c47059c8 (crtc_ww_class_mutex){+.+.}-{3:3}, at: modeset_lock+0xf8/0x230
 #2: c11e1128 (dma_fence_map){++++}-{0:0}, at: omap_atomic_commit_tail+0x14/0xbc

stack backtrace:
CPU: 1 PID: 219 Comm: kmstest Not tainted 6.5.0-rc2+ #2
Hardware name: Generic DRA74X (Flattened Device Tree)
 unwind_backtrace from show_stack+0x10/0x14
 show_stack from dump_stack_lvl+0x58/0x70
 dump_stack_lvl from check_noncircular+0x164/0x198
 check_noncircular from __lock_acquire+0x145c/0x29cc
 __lock_acquire from lock_acquire.part.0+0xb4/0x258
 lock_acquire.part.0 from __mutex_lock+0x90/0x950
 __mutex_lock from mutex_lock_nested+0x1c/0x24
 mutex_lock_nested from hdmi5_bridge_mode_set+0x1c/0x50
 hdmi5_bridge_mode_set from drm_bridge_chain_mode_set+0x48/0x5c
 drm_bridge_chain_mode_set from crtc_set_mode+0x188/0x1d0
 crtc_set_mode from omap_atomic_commit_tail+0x2c/0xbc
 omap_atomic_commit_tail from commit_tail+0x9c/0x188
 commit_tail from drm_atomic_helper_commit+0x158/0x18c
 drm_atomic_helper_commit from drm_atomic_commit+0xa4/0xe8
 drm_atomic_commit from drm_mode_atomic_ioctl+0x9a4/0xc38
 drm_mode_atomic_ioctl from drm_ioctl+0x210/0x4a8
 drm_ioctl from sys_ioctl+0x138/0xf00
 sys_ioctl from ret_fast_syscall+0x0/0x1c
Exception stack(0xf1011fa8 to 0xf1011ff0)
1fa0:                   00466d58 be9ab510 00000003 c03864bc be9ab510 be9ab4e0
1fc0: 00466d58 be9ab510 c03864bc 00000036 00466ef0 00466fc0 00467020 00466f20
1fe0: b6bc7ef4 be9ab4d0 b6bbbb00 b6cb2cc0

Fixes: 250aa22920cd ("drm/omapdrm: Annotate dma-fence critical section in commit path")
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230920-dma-fence-annotation-revert-v1-2-7ebf6f7f5bf6@ideasonboard.com
---
 drivers/gpu/drm/omapdrm/omap_drv.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index b2835b3ea6f58..6598c9c08ba11 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -69,7 +69,6 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state)
 {
 	struct drm_device *dev = old_state->dev;
 	struct omap_drm_private *priv = dev->dev_private;
-	bool fence_cookie = dma_fence_begin_signalling();
 
 	dispc_runtime_get(priv->dispc);
 
@@ -92,6 +91,8 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state)
 		omap_atomic_wait_for_completion(dev, old_state);
 
 		drm_atomic_helper_commit_planes(dev, old_state, 0);
+
+		drm_atomic_helper_commit_hw_done(old_state);
 	} else {
 		/*
 		 * OMAP3 DSS seems to have issues with the work-around above,
@@ -101,11 +102,9 @@ static void omap_atomic_commit_tail(struct drm_atomic_state *old_state)
 		drm_atomic_helper_commit_planes(dev, old_state, 0);
 
 		drm_atomic_helper_commit_modeset_enables(dev, old_state);
-	}
 
-	drm_atomic_helper_commit_hw_done(old_state);
-
-	dma_fence_end_signalling(fence_cookie);
+		drm_atomic_helper_commit_hw_done(old_state);
+	}
 
 	/*
 	 * Wait for completion of the page flips to ensure that old buffers
-- 
GitLab


From bfc87f90614523612ae7e94d06798e82bc78ade6 Mon Sep 17 00:00:00 2001
From: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:29 +0200
Subject: [PATCH 0093/2340] accel/ivpu/40xx: Allow to change profiling
 frequency

Profiling freq is a debug firmware feature. It switches default clock
to higher resolution for fine-grained and more accurate firmware task
profiling. We already configure it during boot up of VPU4.

Add debugfs knob and helpers per HW generation that allow to change it.
For vpu37xx the implementation is empty as profiling frequency can only
be changed on VPU4 or newer.

Signed-off-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-2-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_debugfs.c | 29 +++++++++++++++++++++++++++++
 drivers/accel/ivpu/ivpu_fw.c      |  7 +++++++
 drivers/accel/ivpu/ivpu_hw.h      | 12 ++++++++++++
 drivers/accel/ivpu/ivpu_hw_37xx.c | 13 +++++++++++++
 drivers/accel/ivpu/ivpu_hw_40xx.c | 15 +++++++++++++++
 5 files changed, 76 insertions(+)

diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 6e0d568230241..19035230563d7 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -14,6 +14,7 @@
 #include "ivpu_fw.h"
 #include "ivpu_fw_log.h"
 #include "ivpu_gem.h"
+#include "ivpu_hw.h"
 #include "ivpu_jsm_msg.h"
 #include "ivpu_pm.h"
 
@@ -176,6 +177,30 @@ static const struct file_operations fw_log_fops = {
 	.release = single_release,
 };
 
+static ssize_t
+fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
+			     size_t size, loff_t *pos)
+{
+	struct ivpu_device *vdev = file->private_data;
+	bool enable;
+	int ret;
+
+	ret = kstrtobool_from_user(user_buf, size, &enable);
+	if (ret < 0)
+		return ret;
+
+	ivpu_hw_profiling_freq_drive(vdev, enable);
+	ivpu_pm_schedule_recovery(vdev);
+
+	return size;
+}
+
+static const struct file_operations fw_profiling_freq_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = fw_profiling_freq_fops_write,
+};
+
 static ssize_t
 fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf,
 				     size_t size, loff_t *pos)
@@ -319,4 +344,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
 
 	debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
 			    &ivpu_reset_engine_fops);
+
+	if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX)
+		debugfs_create_file("fw_profiling_freq_drive", 0200,
+				    debugfs_root, vdev, &fw_profiling_freq_fops);
 }
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 4a21be3a0c59b..3fd74cd4205f9 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -498,6 +498,13 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
 	boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
 
+	/*
+	 * This param is a debug firmware feature.  It switches default clock
+	 * to higher resolution one for fine-grained and more accurate firmware
+	 * task profiling.
+	 */
+	boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev);
+
 	/*
 	 * Uncached region of VPU address space, covers IPC buffers, job queues
 	 * and log buffers, programmable to L2$ Uncached by VPU MTRR
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index b7694b1cbc02f..aa52e5c29a65f 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -17,6 +17,8 @@ struct ivpu_hw_ops {
 	int (*wait_for_idle)(struct ivpu_device *vdev);
 	void (*wdt_disable)(struct ivpu_device *vdev);
 	void (*diagnose_failure)(struct ivpu_device *vdev);
+	u32 (*profiling_freq_get)(struct ivpu_device *vdev);
+	void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
 	u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
 	u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
 	u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
@@ -104,6 +106,16 @@ static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
 	vdev->hw->ops->wdt_disable(vdev);
 };
 
+static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->profiling_freq_get(vdev);
+};
+
+static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+	return vdev->hw->ops->profiling_freq_drive(vdev, enable);
+};
+
 /* Register indirect accesses */
 static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
 {
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 1c8c5715095be..81f81046d39a3 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -29,6 +29,7 @@
 
 #define PLL_REF_CLK_FREQ	     (50 * 1000000)
 #define PLL_SIMULATION_FREQ	     (10 * 1000000)
+#define PLL_PROF_CLK_FREQ	     (38400 * 1000)
 #define PLL_DEFAULT_EPP_VALUE	     0x80
 
 #define TIM_SAFE_ENABLE		     0xf1d0dead
@@ -769,6 +770,16 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
 	REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
 }
 
+static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+	return PLL_PROF_CLK_FREQ;
+}
+
+static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+	/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
+}
+
 static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
 {
 	u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
@@ -1012,6 +1023,8 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
 	.boot_fw = ivpu_hw_37xx_boot_fw,
 	.wdt_disable = ivpu_hw_37xx_wdt_disable,
 	.diagnose_failure = ivpu_hw_37xx_diagnose_failure,
+	.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
+	.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
 	.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
 	.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
 	.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 6a9672f650d18..a779b905f8b15 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -931,6 +931,19 @@ static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev)
 	REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
 }
 
+static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->pll.profiling_freq;
+}
+
+static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+	if (enable)
+		vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
+	else
+		vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+}
+
 /* Register indirect accesses */
 static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
 {
@@ -1182,6 +1195,8 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
 	.boot_fw = ivpu_hw_40xx_boot_fw,
 	.wdt_disable = ivpu_hw_40xx_wdt_disable,
 	.diagnose_failure = ivpu_hw_40xx_diagnose_failure,
+	.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
+	.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
 	.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
 	.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
 	.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
-- 
GitLab


From a06eb9be49a66e16e0c7819c630c1267c25b36dc Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:30 +0200
Subject: [PATCH 0094/2340] accel/ivpu: Assure device is off if power up
 sequence fail

We should not leave device half enabled if there is failure somewhere
it power up sequence. Fix device init and resume paths.

Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-3-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c |  2 +-
 drivers/accel/ivpu/ivpu_pm.c  | 30 +++++++++++++++++-------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 39bac45d88b5e..064cabef41bb4 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -543,7 +543,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	/* Power up early so the rest of init code can access VPU registers */
 	ret = ivpu_hw_power_up(vdev);
 	if (ret)
-		goto err_xa_destroy;
+		goto err_power_down;
 
 	ret = ivpu_mmu_global_context_init(vdev);
 	if (ret)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 2d05fb9e31970..33d7c7731fe32 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -70,27 +70,31 @@ retry:
 	ret = ivpu_hw_power_up(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
-		return ret;
+		goto err_power_down;
 	}
 
 	ret = ivpu_mmu_enable(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
-		ivpu_hw_power_down(vdev);
-		return ret;
+		goto err_power_down;
 	}
 
 	ret = ivpu_boot(vdev);
-	if (ret) {
-		ivpu_mmu_disable(vdev);
-		ivpu_hw_power_down(vdev);
-		if (!ivpu_fw_is_cold_boot(vdev)) {
-			ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
-			ivpu_pm_prepare_cold_boot(vdev);
-			goto retry;
-		} else {
-			ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
-		}
+	if (ret)
+		goto err_mmu_disable;
+
+	return 0;
+
+err_mmu_disable:
+	ivpu_mmu_disable(vdev);
+err_power_down:
+	ivpu_hw_power_down(vdev);
+
+	if (!ivpu_fw_is_cold_boot(vdev)) {
+		ivpu_pm_prepare_cold_boot(vdev);
+		goto retry;
+	} else {
+		ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
 	}
 
 	return ret;
-- 
GitLab


From 57c7e3e4800ad65048a7044b03723cd85d9595af Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:31 +0200
Subject: [PATCH 0095/2340] accel/ivpu: Stop job_done_thread on suspend

Stop job_done thread when going to suspend. Use kthread_park() instead
of kthread_stop() to avoid memory allocation and potential failure
on resume.

Use separate function as thread wake up condition. Use spin lock to assure
rx_msg_list is properly protected against concurrent access. This avoid
race condition when the rx_msg_list list is modified and read in
ivpu_ipc_recive() at the same time.

Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-4-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c |  2 ++
 drivers/accel/ivpu/ivpu_ipc.c | 17 +++++++++++++++--
 drivers/accel/ivpu/ivpu_job.c | 20 ++++++++++++++++----
 drivers/accel/ivpu/ivpu_job.h |  2 ++
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 064cabef41bb4..60277ff6af69c 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -378,6 +378,7 @@ int ivpu_boot(struct ivpu_device *vdev)
 	enable_irq(vdev->irq);
 	ivpu_hw_irq_enable(vdev);
 	ivpu_ipc_enable(vdev);
+	ivpu_job_done_thread_enable(vdev);
 	return 0;
 }
 
@@ -389,6 +390,7 @@ int ivpu_shutdown(struct ivpu_device *vdev)
 	disable_irq(vdev->irq);
 	ivpu_ipc_disable(vdev);
 	ivpu_mmu_disable(vdev);
+	ivpu_job_done_thread_disable(vdev);
 
 	ret = ivpu_hw_power_down(vdev);
 	if (ret)
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index d069d1e1f91d5..270caef789bf8 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -202,6 +202,20 @@ unlock:
 	return ret;
 }
 
+static int ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
+{
+	int ret = 0;
+
+	if (IS_KTHREAD())
+		ret |= (kthread_should_stop() || kthread_should_park());
+
+	spin_lock_irq(&cons->rx_msg_lock);
+	ret |= !list_empty(&cons->rx_msg_list);
+	spin_unlock_irq(&cons->rx_msg_lock);
+
+	return ret;
+}
+
 int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		     struct ivpu_ipc_hdr *ipc_buf,
 		     struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
@@ -211,8 +225,7 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	int wait_ret, ret = 0;
 
 	wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq,
-						    (IS_KTHREAD() && kthread_should_stop()) ||
-						    !list_empty(&cons->rx_msg_list),
+						    ivpu_ipc_rx_need_wakeup(cons),
 						    msecs_to_jiffies(timeout_ms));
 
 	if (IS_KTHREAD() && kthread_should_stop())
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 6e96c921547d5..a245b2d44db77 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -590,6 +590,11 @@ static int ivpu_job_done_thread(void *arg)
 				ivpu_pm_schedule_recovery(vdev);
 			}
 		}
+		if (kthread_should_park()) {
+			ivpu_dbg(vdev, JOB, "Parked %s\n", __func__);
+			kthread_parkme();
+			ivpu_dbg(vdev, JOB, "Unparked %s\n", __func__);
+		}
 	}
 
 	ivpu_ipc_consumer_del(vdev, &cons);
@@ -610,9 +615,6 @@ int ivpu_job_done_thread_init(struct ivpu_device *vdev)
 		return -EIO;
 	}
 
-	get_task_struct(thread);
-	wake_up_process(thread);
-
 	vdev->job_done_thread = thread;
 
 	return 0;
@@ -620,6 +622,16 @@ int ivpu_job_done_thread_init(struct ivpu_device *vdev)
 
 void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
 {
+	kthread_unpark(vdev->job_done_thread);
 	kthread_stop(vdev->job_done_thread);
-	put_task_struct(vdev->job_done_thread);
+}
+
+void ivpu_job_done_thread_disable(struct ivpu_device *vdev)
+{
+	kthread_park(vdev->job_done_thread);
+}
+
+void ivpu_job_done_thread_enable(struct ivpu_device *vdev)
+{
+	kthread_unpark(vdev->job_done_thread);
 }
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index aa1f0b9479b0b..a8e914e5affcd 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -61,6 +61,8 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
 
 int ivpu_job_done_thread_init(struct ivpu_device *vdev);
 void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+void ivpu_job_done_thread_disable(struct ivpu_device *vdev);
+void ivpu_job_done_thread_enable(struct ivpu_device *vdev);
 
 void ivpu_jobs_abort_all(struct ivpu_device *vdev);
 
-- 
GitLab


From ba6b035daac8c4fa5da1f14f262d97e0ffa45a6d Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:32 +0200
Subject: [PATCH 0096/2340] accel/ivpu: Abort pending rx ipc on reset

Waking up process, which wait for particular condition, will go to
sleep again on wake_up() if the condition is not met. Add abort flag
to wake up IPC receivers, which will finish with -ECANCELED error.

This is only needed for reset, run time power management prevent to
suspend VPU when there is pending IPC processing or pending job.

Reviewed-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-5-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_ipc.c | 20 +++++++++++++++++---
 drivers/accel/ivpu/ivpu_ipc.h |  3 ++-
 drivers/accel/ivpu/ivpu_job.c |  1 +
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 270caef789bf8..255f2b8b0b5e3 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -148,6 +148,7 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	cons->channel = channel;
 	cons->tx_vpu_addr = 0;
 	cons->request_id = 0;
+	cons->aborted = false;
 	spin_lock_init(&cons->rx_msg_lock);
 	INIT_LIST_HEAD(&cons->rx_msg_list);
 	init_waitqueue_head(&cons->rx_msg_wq);
@@ -169,7 +170,8 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
 	spin_lock_irq(&cons->rx_msg_lock);
 	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
 		list_del(&rx_msg->link);
-		ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+		if (!cons->aborted)
+			ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
 		atomic_dec(&ipc->rx_msg_count);
 		kfree(rx_msg);
 	}
@@ -210,7 +212,7 @@ static int ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
 		ret |= (kthread_should_stop() || kthread_should_park());
 
 	spin_lock_irq(&cons->rx_msg_lock);
-	ret |= !list_empty(&cons->rx_msg_list);
+	ret |= !list_empty(&cons->rx_msg_list) || cons->aborted;
 	spin_unlock_irq(&cons->rx_msg_lock);
 
 	return ret;
@@ -244,6 +246,12 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		return -EAGAIN;
 	}
 	list_del(&rx_msg->link);
+	if (cons->aborted) {
+		spin_unlock_irq(&cons->rx_msg_lock);
+		ret = -ECANCELED;
+		goto out;
+	}
+
 	spin_unlock_irq(&cons->rx_msg_lock);
 
 	if (ipc_buf)
@@ -261,6 +269,7 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	}
 
 	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+out:
 	atomic_dec(&ipc->rx_msg_count);
 	kfree(rx_msg);
 
@@ -522,8 +531,12 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
 	mutex_unlock(&ipc->lock);
 
 	spin_lock_irqsave(&ipc->cons_list_lock, flags);
-	list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
+	list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
+		spin_lock(&cons->rx_msg_lock);
+		cons->aborted = true;
+		spin_unlock(&cons->rx_msg_lock);
 		wake_up(&cons->rx_msg_wq);
+	}
 	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
 }
 
@@ -532,6 +545,7 @@ void ivpu_ipc_reset(struct ivpu_device *vdev)
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 
 	mutex_lock(&ipc->lock);
+	drm_WARN_ON(&vdev->drm, ipc->on);
 
 	memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx));
 	memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx));
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 6918db23daa4b..a380787f7222a 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -47,8 +47,9 @@ struct ivpu_ipc_consumer {
 	u32 channel;
 	u32 tx_vpu_addr;
 	u32 request_id;
+	bool aborted;
 
-	spinlock_t rx_msg_lock; /* Protects rx_msg_list */
+	spinlock_t rx_msg_lock; /* Protects rx_msg_list and aborted */
 	struct list_head rx_msg_list;
 	wait_queue_head_t rx_msg_wq;
 };
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index a245b2d44db77..15a408fad4945 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -578,6 +578,7 @@ static int ivpu_job_done_thread(void *arg)
 	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
 
 	while (!kthread_should_stop()) {
+		cons.aborted = false;
 		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
 		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
 		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
-- 
GitLab


From e013aa9ab01b400cccc6c3e8b969b8e7f10bc6cb Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:33 +0200
Subject: [PATCH 0097/2340] accel/ivpu: Print CMDQ errors after consumer
 timeout

Add checking of error reason bits in IVPU_MMU_CMDQ_CONS
register when waiting for consumer timeout occurred.

Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-6-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_mmu.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 2538c78fbebe2..b15cf9cc9c433 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -230,7 +230,12 @@
 				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \
 				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT)))
 
-static char *ivpu_mmu_event_to_str(u32 cmd)
+#define IVPU_MMU_CERROR_NONE         0x0
+#define IVPU_MMU_CERROR_ILL          0x1
+#define IVPU_MMU_CERROR_ABT          0x2
+#define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3
+
+static const char *ivpu_mmu_event_to_str(u32 cmd)
 {
 	switch (cmd) {
 	case IVPU_MMU_EVT_F_UUT:
@@ -276,6 +281,22 @@ static char *ivpu_mmu_event_to_str(u32 cmd)
 	}
 }
 
+static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
+{
+	switch (err) {
+	case IVPU_MMU_CERROR_NONE:
+		return "No CMDQ Error";
+	case IVPU_MMU_CERROR_ILL:
+		return "Illegal command";
+	case IVPU_MMU_CERROR_ABT:
+		return "External abort on CMDQ read";
+	case IVPU_MMU_CERROR_ATC_INV_SYNC:
+		return "Sync failed to complete ATS invalidation";
+	default:
+		return "Unknown CMDQ Error";
+	}
+}
+
 static void ivpu_mmu_config_check(struct ivpu_device *vdev)
 {
 	u32 val_ref;
@@ -492,8 +513,15 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
 	REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);
 
 	ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
-	if (ret)
-		ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret);
+	if (ret) {
+		u32 err;
+
+		val = REGV_RD32(IVPU_MMU_REG_CMDQ_CONS);
+		err = REG_GET_FLD(IVPU_MMU_REG_CMDQ_CONS, ERR, val);
+
+		ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret,
+			 ivpu_mmu_cmdq_err_to_str(err));
+	}
 
 	return ret;
 }
-- 
GitLab


From 3bcc5209ba6af1019487b54b30fc226ecf79193d Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:34 +0200
Subject: [PATCH 0098/2340] accel/ivpu: Make DMA allocations for MMU600 write
 combined

Previously using dma_alloc_wc() API we created cache coherent
(mapped as write-back) mappings.

Because we disable MMU600 snooping it was required to do costly
page walk and cache flushes after each page table modification.

With write-combined buffers it's possible to do a single write memory
barrier to flush write-combined buffer to memory which simplifies the
driver and significantly reduce time of map/unmap operations.

Mapping time of 255 MB is reduced from 2.5 ms to 500 us.

Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-7-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_mmu_context.c | 115 ++++++++++++++------------
 1 file changed, 63 insertions(+), 52 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 0c8c653519193..7d8005bc78d2f 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -5,6 +5,9 @@
 
 #include <linux/bitfield.h>
 #include <linux/highmem.h>
+#include <linux/set_memory.h>
+
+#include <drm/drm_cache.h>
 
 #include "ivpu_drv.h"
 #include "ivpu_hw.h"
@@ -38,12 +41,57 @@
 #define IVPU_MMU_ENTRY_MAPPED  (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
 				IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
 
+static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
+{
+	dma_addr_t dma_addr;
+	struct page *page;
+	void *cpu;
+
+	page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+	if (!page)
+		return NULL;
+
+	set_pages_array_wc(&page, 1);
+
+	dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(vdev->drm.dev, dma_addr))
+		goto err_free_page;
+
+	cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	if (!cpu)
+		goto err_dma_unmap_page;
+
+
+	*dma = dma_addr;
+	return cpu;
+
+err_dma_unmap_page:
+	dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+err_free_page:
+	put_page(page);
+	return NULL;
+}
+
+static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
+{
+	struct page *page;
+
+	if (cpu_addr) {
+		page = vmalloc_to_page(cpu_addr);
+		vunmap(cpu_addr);
+		dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		set_pages_array_wb(&page, 1);
+		put_page(page);
+	}
+}
+
 static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
 {
 	dma_addr_t pgd_dma;
 
-	pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
-						  GFP_KERNEL);
+	pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
 	if (!pgtable->pgd_dma_ptr)
 		return -ENOMEM;
 
@@ -52,13 +100,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtab
 	return 0;
 }
 
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
-{
-	if (cpu_addr)
-		dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
-				  dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
-}
-
 static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
 {
 	int pgd_idx, pud_idx, pmd_idx;
@@ -83,19 +124,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
 				pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
 				pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
 
-				ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
+				ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma);
 			}
 
 			kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
-			ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+			ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
 		}
 
 		kfree(pgtable->pmd_ptrs[pgd_idx]);
 		kfree(pgtable->pte_ptrs[pgd_idx]);
-		ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+		ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
 	}
 
-	ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+	ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
 }
 
 static u64*
@@ -107,7 +148,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	if (pud_dma_ptr)
 		return pud_dma_ptr;
 
-	pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
+	pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
 	if (!pud_dma_ptr)
 		return NULL;
 
@@ -130,7 +171,7 @@ err_free_pmd_ptrs:
 	kfree(pgtable->pmd_ptrs[pgd_idx]);
 
 err_free_pud_dma_ptr:
-	ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+	ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
 	return NULL;
 }
 
@@ -144,7 +185,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	if (pmd_dma_ptr)
 		return pmd_dma_ptr;
 
-	pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+	pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
 	if (!pmd_dma_ptr)
 		return NULL;
 
@@ -159,7 +200,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	return pmd_dma_ptr;
 
 err_free_pmd_dma_ptr:
-	ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+	ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
 	return NULL;
 }
 
@@ -173,7 +214,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	if (pte_dma_ptr)
 		return pte_dma_ptr;
 
-	pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+	pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
 	if (!pte_dma_ptr)
 		return NULL;
 
@@ -248,38 +289,6 @@ static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_ad
 	ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
 }
 
-static void
-ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
-{
-	struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
-	u64 end_addr = vpu_addr + size;
-
-	/* Align to PMD entry (2 MB) */
-	vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
-
-	while (vpu_addr < end_addr) {
-		int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
-		u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
-
-		while (vpu_addr < end_addr && vpu_addr < pud_end) {
-			int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
-			u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
-
-			while (vpu_addr < end_addr && vpu_addr < pmd_end) {
-				int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
-
-				clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
-						    IVPU_MMU_PGTABLE_SIZE);
-				vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
-			}
-			clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
-					    IVPU_MMU_PGTABLE_SIZE);
-		}
-		clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
-	}
-	clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
-}
-
 static int
 ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 			   u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
@@ -352,10 +361,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 			mutex_unlock(&ctx->lock);
 			return ret;
 		}
-		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
 		vpu_addr += size;
 	}
 
+	/* Ensure page table modifications are flushed from wc buffers to memory */
+	wmb();
 	mutex_unlock(&ctx->lock);
 
 	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -381,10 +391,11 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
 		size_t size = sg_dma_len(sg) + sg->offset;
 
 		ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
-		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
 		vpu_addr += size;
 	}
 
+	/* Ensure page table modifications are flushed from wc buffers to memory */
+	wmb();
 	mutex_unlock(&ctx->lock);
 
 	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
-- 
GitLab


From 0c287c27fbffa2737f155af73646a794e540e1d3 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:35 +0200
Subject: [PATCH 0099/2340] accel/ivpu: Simplify MMU SYNC command

CMD_SYNC does not need any args as we poll for completion anyway.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-8-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_mmu.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index b15cf9cc9c433..b46f3743c0e7d 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -500,10 +500,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
 	u64 val;
 	int ret;
 
-	val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) |
-	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) |
-	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) |
-	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf);
+	val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC);
 
 	ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0);
 	if (ret)
-- 
GitLab


From 2fc1a50fa4472301a3b262fcfc78744c841d5587 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Sat, 28 Oct 2023 17:59:36 +0200
Subject: [PATCH 0100/2340] accel/ivpu: Rename VPU to NPU in product strings

VPU was rebranded as NPU (Neural Processing Unit) so user facing
strings have to be updated but the code remains as is and the module
is still called intel_vpu.ko.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-9-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/Kconfig    | 9 +++++----
 drivers/accel/ivpu/ivpu_drv.h | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 1a4c4ed9d1136..82749e0da524c 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 config DRM_ACCEL_IVPU
-	tristate "Intel VPU for Meteor Lake and newer"
+	tristate "Intel NPU (Neural Processing Unit)"
 	depends on DRM_ACCEL
 	depends on X86_64 && !UML
 	depends on PCI && PCI_MSI
@@ -9,8 +9,9 @@ config DRM_ACCEL_IVPU
 	select SHMEM
 	select GENERIC_ALLOCATOR
 	help
-	  Choose this option if you have a system that has an 14th generation Intel CPU
-	  or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
-	  inference accelerator for Computer Vision and Deep Learning applications.
+	  Choose this option if you have a system with an 14th generation
+	  Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
+	  is a CPU-integrated inference accelerator for Computer Vision
+	  and Deep Learning applications.
 
 	  If "M" is selected, the module will be called intel_vpu.
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 1b482d1d66d95..79b193ffbc264 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -19,7 +19,7 @@
 #include "ivpu_mmu_context.h"
 
 #define DRIVER_NAME "intel_vpu"
-#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
+#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
 #define DRIVER_DATE "20230117"
 
 #define PCI_DEVICE_ID_MTL   0x7d1d
-- 
GitLab


From 0da611a8702101814257a7c03f6caf0574c83b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 8 Sep 2023 10:27:23 +0200
Subject: [PATCH 0101/2340] dma-buf: add dma_fence_timestamp helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a fence signals there is a very small race window where the timestamp
isn't updated yet. sync_file solves this by busy waiting for the
timestamp to appear, but on other ocassions didn't handled this
correctly.

Provide a dma_fence_timestamp() helper function for this and use it in
all appropriate cases.

Another alternative would be to grab the spinlock when that happens.

v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case
   the accurate timestamp is needed and/or the timestamp is not based on
   ktime (e.g. hw timestamp)
v3 chk: drop the parameter again for unified handling

Signed-off-by: Yunxiang Li <Yunxiang.Li@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
CC: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence-unwrap.c     | 13 ++++---------
 drivers/dma-buf/sync_file.c            |  9 +++------
 drivers/gpu/drm/scheduler/sched_main.c |  2 +-
 include/linux/dma-fence.h              | 19 +++++++++++++++++++
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index c625bb2b5d563..628af51c81af3 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 		dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
 			if (!dma_fence_is_signaled(tmp)) {
 				++count;
-			} else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
-					    &tmp->flags)) {
-				if (ktime_after(tmp->timestamp, timestamp))
-					timestamp = tmp->timestamp;
 			} else {
-				/*
-				 * Use the current time if the fence is
-				 * currently signaling.
-				 */
-				timestamp = ktime_get();
+				ktime_t t = dma_fence_timestamp(tmp);
+
+				if (ktime_after(t, timestamp))
+					timestamp = t;
 			}
 		}
 	}
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86cee..2e9a316c596a3 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence,
 		sizeof(info->driver_name));
 
 	info->status = dma_fence_get_status(fence);
-	while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
-	       !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
-		cpu_relax();
 	info->timestamp_ns =
-		test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
-		ktime_to_ns(fence->timestamp) :
-		ktime_set(0, 0);
+		dma_fence_is_signaled(fence) ?
+			ktime_to_ns(dma_fence_timestamp(fence)) :
+			ktime_set(0, 0);
 
 	return info->status;
 }
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fd755e9534878..99797a8c836ac 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -935,7 +935,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 
 		if (next) {
 			next->s_fence->scheduled.timestamp =
-				job->s_fence->finished.timestamp;
+				dma_fence_timestamp(&job->s_fence->finished);
 			/* start TO timer for next job */
 			drm_sched_start_timeout(sched);
 		}
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0d678e9a7b248..ebe78bd3d121d 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence,
 	fence->error = error;
 }
 
+/**
+ * dma_fence_timestamp - helper to get the completion timestamp of a fence
+ * @fence: fence to get the timestamp from.
+ *
+ * After a fence is signaled the timestamp is updated with the signaling time,
+ * but setting the timestamp can race with tasks waiting for the signaling. This
+ * helper busy waits for the correct timestamp to appear.
+ */
+static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
+{
+	if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+		return ktime_get();
+
+	while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+		cpu_relax();
+
+	return fence->timestamp;
+}
+
 signed long dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
 signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
-- 
GitLab


From d1727cdd450d70cd747a466e96c63c26c78b6b11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 18 Oct 2023 18:41:20 +0300
Subject: [PATCH 0102/2340] drm/i915/mst: Swap TRANSCONF vs.
 FECSTALL_DIS_DPTSTREAM_DPTTG disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The DP modeset sequence asks us to disable TRANSCONF before clearing
the FECSTALL_DIS_DPTSTREAM_DPTTG bit, although we are still asked
to wait for the transcoder to stop only after both steps have
been done.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018154123.5479-2-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 1caf46e3e5692..cdec0425f519f 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -485,6 +485,8 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state)
 	if (!IS_I830(dev_priv))
 		val &= ~TRANSCONF_ENABLE;
 
+	intel_de_write(dev_priv, reg, val);
+
 	if (DISPLAY_VER(dev_priv) >= 14)
 		intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(cpu_transcoder),
 			     FECSTALL_DIS_DPTSTREAM_DPTTG, 0);
@@ -492,7 +494,6 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state)
 		intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
 			     FECSTALL_DIS_DPTSTREAM_DPTTG, 0);
 
-	intel_de_write(dev_priv, reg, val);
 	if ((val & TRANSCONF_ENABLE) == 0)
 		intel_wait_for_pipe_off(old_crtc_state);
 }
-- 
GitLab


From d068fa53730b9eb79e532350cd90d50950ea79fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 18 Oct 2023 18:41:21 +0300
Subject: [PATCH 0103/2340] drm/i915/mst: Disable transcoder before deleting
 the payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bspec tells us that we should disable the transcoder before
deleting the payload. Looks like this has been reversed since
MST support was added.

I suppose this shouldn't matter in practice since the downstream
device shouldn't really do anything with the new payload until
we send the ACT. But I see no compelling reason to deviate from
the bspec sequence regardless.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018154123.5479-3-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 4366da79fe817..b90a4d7604bc1 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -587,10 +587,6 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 	struct intel_dp *intel_dp = &dig_port->dp;
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
-	struct drm_dp_mst_topology_state *new_mst_state =
-		drm_atomic_get_new_mst_topology_state(&state->base, &intel_dp->mst_mgr);
-	struct drm_dp_mst_atomic_payload *new_payload =
-		drm_atomic_get_mst_payload_state(new_mst_state, connector->port);
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
 	drm_dbg_kms(&i915->drm, "active links %d\n",
@@ -598,8 +594,6 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 
 	intel_hdcp_disable(intel_mst->connector);
 
-	drm_dp_remove_payload_part1(&intel_dp->mst_mgr, new_mst_state, new_payload);
-
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
 }
 
@@ -634,6 +628,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
 
 	intel_disable_transcoder(old_crtc_state);
 
+	drm_dp_remove_payload_part1(&intel_dp->mst_mgr, new_mst_state, new_payload);
+
 	clear_act_sent(encoder, old_crtc_state);
 
 	intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(old_crtc_state->cpu_transcoder),
-- 
GitLab


From 817cb16e14de1fe29d4dfcd3cae8bce538f7d370 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 18 Oct 2023 18:41:22 +0300
Subject: [PATCH 0104/2340] drm/i915/mst: Clear ACT just before triggering
 payload allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow the bspec sequence more closely and clear ACT sent just
before triggering the allocation. Can't see why we'd want to
deviate from the spec sequence here.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018154123.5479-4-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index b90a4d7604bc1..f379fcdebe9e6 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -791,8 +791,6 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 
 	drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder);
 
-	clear_act_sent(encoder, pipe_config);
-
 	if (intel_dp_is_uhbr(pipe_config)) {
 		const struct drm_display_mode *adjusted_mode =
 			&pipe_config->hw.adjusted_mode;
@@ -806,6 +804,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 
 	intel_ddi_enable_transcoder_func(encoder, pipe_config);
 
+	clear_act_sent(encoder, pipe_config);
+
 	intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(trans), 0,
 		     TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
 
-- 
GitLab


From 08a573006d62221772bed4a079d05bb356331868 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 18 Oct 2023 18:41:23 +0300
Subject: [PATCH 0105/2340] drm/i915/mst: Always write CHICKEN_TRANS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we're asked to disable FECSTALL_DIS_DPTSTREAM_DPTTG when
the transcoder is disabled it seems prudent to also clear it
when enabliing the transcoder w/o FEC, just in case
someone else left it enabled by mistake.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018154123.5479-5-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index f379fcdebe9e6..82f425ef15953 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -817,12 +817,14 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 	drm_dp_add_payload_part2(&intel_dp->mst_mgr, &state->base,
 				 drm_atomic_get_mst_payload_state(mst_state, connector->port));
 
-	if (DISPLAY_VER(dev_priv) >= 14 && pipe_config->fec_enable)
-		intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(trans), 0,
-			     FECSTALL_DIS_DPTSTREAM_DPTTG);
-	else if (DISPLAY_VER(dev_priv) >= 12 && pipe_config->fec_enable)
-		intel_de_rmw(dev_priv, CHICKEN_TRANS(trans), 0,
-			     FECSTALL_DIS_DPTSTREAM_DPTTG);
+	if (DISPLAY_VER(dev_priv) >= 14)
+		intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(trans),
+			     FECSTALL_DIS_DPTSTREAM_DPTTG,
+			     pipe_config->fec_enable ? FECSTALL_DIS_DPTSTREAM_DPTTG : 0);
+	else if (DISPLAY_VER(dev_priv) >= 12)
+		intel_de_rmw(dev_priv, CHICKEN_TRANS(trans),
+			     FECSTALL_DIS_DPTSTREAM_DPTTG,
+			     pipe_config->fec_enable ? FECSTALL_DIS_DPTSTREAM_DPTTG : 0);
 
 	intel_audio_sdp_split_update(pipe_config);
 
-- 
GitLab


From 35963cf2cd25eeea8bdb4d02853dac1e66fb13a0 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:35 -0700
Subject: [PATCH 0106/2340] drm/sched: Add drm_sched_wqueue_* helpers

Add scheduler wqueue ready, stop, and start helpers to hide the
implementation details of the scheduler from the drivers.

v2:
  - s/sched_wqueue/sched_wqueue (Luben)
  - Remove the extra white line after the return-statement (Luben)
  - update drm_sched_wqueue_ready comment (Luben)

Cc: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-2-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 15 +++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 12 +++---
 drivers/gpu/drm/msm/adreno/adreno_device.c    |  6 ++-
 drivers/gpu/drm/scheduler/sched_main.c        | 39 ++++++++++++++++++-
 include/drm/gpu_scheduler.h                   |  3 ++
 6 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 625db444df1cb..10d56979fe3b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
-		if (!(ring && ring->sched.thread))
+		if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
 			continue;
 
 		/* stop secheduler and drain ring. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a4faea4fa0b59..a4c0bb358db71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1659,9 +1659,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !ring->sched.thread)
+		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 			continue;
-		kthread_park(ring->sched.thread);
+		drm_sched_wqueue_stop(&ring->sched);
 	}
 
 	seq_puts(m, "run ib test:\n");
@@ -1675,9 +1675,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !ring->sched.thread)
+		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 			continue;
-		kthread_unpark(ring->sched.thread);
+		drm_sched_wqueue_start(&ring->sched);
 	}
 
 	up_write(&adev->reset_domain->sem);
@@ -1897,7 +1897,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 
 	ring = adev->rings[val];
 
-	if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+	if (!ring || !ring->funcs->preempt_ib ||
+	    !drm_sched_wqueue_ready(&ring->sched))
 		return -EINVAL;
 
 	/* the last preemption failed */
@@ -1915,7 +1916,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 		goto pro_end;
 
 	/* stop the scheduler */
-	kthread_park(ring->sched.thread);
+	drm_sched_wqueue_stop(&ring->sched);
 
 	/* preempt the IB */
 	r = amdgpu_ring_preempt_ib(ring);
@@ -1949,7 +1950,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 
 failure:
 	/* restart the scheduler */
-	kthread_unpark(ring->sched.thread);
+	drm_sched_wqueue_start(&ring->sched);
 
 	up_read(&adev->reset_domain->sem);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 19b539cab7fec..a7f7afcb2bf0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4601,7 +4601,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !ring->sched.thread)
+		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 			continue;
 
 		spin_lock(&ring->sched.job_list_lock);
@@ -4740,7 +4740,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !ring->sched.thread)
+		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 			continue;
 
 		/* Clear job fence from fence drv to avoid force_completion
@@ -5282,7 +5282,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
 
-			if (!ring || !ring->sched.thread)
+			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 				continue;
 
 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5357,7 +5357,7 @@ skip_hw_reset:
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
 
-			if (!ring || !ring->sched.thread)
+			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 				continue;
 
 			drm_sched_start(&ring->sched, true);
@@ -5683,7 +5683,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 
-			if (!ring || !ring->sched.thread)
+			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 				continue;
 
 			drm_sched_stop(&ring->sched, NULL);
@@ -5811,7 +5811,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
-		if (!ring || !ring->sched.thread)
+		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
 			continue;
 
 		drm_sched_start(&ring->sched, true);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index fa527935ffd42..8fa9ce3746b60 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -809,7 +809,8 @@ static void suspend_scheduler(struct msm_gpu *gpu)
 	 */
 	for (i = 0; i < gpu->nr_rings; i++) {
 		struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
-		kthread_park(sched->thread);
+
+		drm_sched_wqueue_stop(sched);
 	}
 }
 
@@ -819,7 +820,8 @@ static void resume_scheduler(struct msm_gpu *gpu)
 
 	for (i = 0; i < gpu->nr_rings; i++) {
 		struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
-		kthread_unpark(sched->thread);
+
+		drm_sched_wqueue_start(sched);
 	}
 }
 
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 99797a8c836ac..54c1c5fe01ba9 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -439,7 +439,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 {
 	struct drm_sched_job *s_job, *tmp;
 
-	kthread_park(sched->thread);
+	drm_sched_wqueue_stop(sched);
 
 	/*
 	 * Reinsert back the bad job here - now it's safe as
@@ -552,7 +552,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 		spin_unlock(&sched->job_list_lock);
 	}
 
-	kthread_unpark(sched->thread);
+	drm_sched_wqueue_start(sched);
 }
 EXPORT_SYMBOL(drm_sched_start);
 
@@ -1252,3 +1252,38 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
 	}
 }
 EXPORT_SYMBOL(drm_sched_increase_karma);
+
+/**
+ * drm_sched_wqueue_ready - Is the scheduler ready for submission
+ *
+ * @sched: scheduler instance
+ *
+ * Returns true if submission is ready
+ */
+bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
+{
+	return !!sched->thread;
+}
+EXPORT_SYMBOL(drm_sched_wqueue_ready);
+
+/**
+ * drm_sched_wqueue_stop - stop scheduler submission
+ *
+ * @sched: scheduler instance
+ */
+void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
+{
+	kthread_park(sched->thread);
+}
+EXPORT_SYMBOL(drm_sched_wqueue_stop);
+
+/**
+ * drm_sched_wqueue_start - start scheduler submission
+ *
+ * @sched: scheduler instance
+ */
+void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
+{
+	kthread_unpark(sched->thread);
+}
+EXPORT_SYMBOL(drm_sched_wqueue_start);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 653e0eec9743e..7e622c18d3365 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -553,6 +553,9 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
 
 void drm_sched_job_cleanup(struct drm_sched_job *job);
 void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
+bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
+void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
+void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
-- 
GitLab


From a6149f0393699308fb00149be913044977bceb56 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:36 -0700
Subject: [PATCH 0107/2340] drm/sched: Convert drm scheduler to use a work
 queue rather than kthread

In Xe, the new Intel GPU driver, a choice has made to have a 1 to 1
mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
seems a bit odd but let us explain the reasoning below.

1. In Xe the submission order from multiple drm_sched_entity is not
guaranteed to be the same completion even if targeting the same hardware
engine. This is because in Xe we have a firmware scheduler, the GuC,
which allowed to reorder, timeslice, and preempt submissions. If a using
shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
apart as the TDR expects submission order == completion order. Using a
dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.

2. In Xe submissions are done via programming a ring buffer (circular
buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
control on the ring for free.

A problem with this design is currently a drm_gpu_scheduler uses a
kthread for submission / job cleanup. This doesn't scale if a large
number of drm_gpu_scheduler are used. To work around the scaling issue,
use a worker rather than kthread for submission / job cleanup.

v2:
  - (Rob Clark) Fix msm build
  - Pass in run work queue
v3:
  - (Boris) don't have loop in worker
v4:
  - (Tvrtko) break out submit ready, stop, start helpers into own patch
v5:
  - (Boris) default to ordered work queue
v6:
  - (Luben / checkpatch) fix alignment in msm_ringbuffer.c
  - (Luben) s/drm_sched_submit_queue/drm_sched_wqueue_enqueue
  - (Luben) Update comment for drm_sched_wqueue_enqueue
  - (Luben) Positive check for submit_wq in drm_sched_init
  - (Luben) s/alloc_submit_wq/own_submit_wq
v7:
  - (Luben) s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue
v8:
  - (Luben) Adjust var names / comments

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-3-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
 drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
 drivers/gpu/drm/lima/lima_sched.c          |   2 +-
 drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
 drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
 drivers/gpu/drm/scheduler/sched_main.c     | 131 +++++++++++----------
 drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
 include/drm/gpu_scheduler.h                |  14 ++-
 9 files changed, 86 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a7f7afcb2bf0e..203dfcea82b18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
 			break;
 		}
 
-		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
 				   DRM_SCHED_PRIORITY_COUNT,
 				   ring->num_hw_submission, 0,
 				   timeout, adev->reset_domain->wq,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 9b79f218e21af..c4b04b0dee16a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
 {
 	int ret;
 
-	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
+	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
 			     DRM_SCHED_PRIORITY_COUNT,
 			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
 			     msecs_to_jiffies(500), NULL, NULL,
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index 295f0353a02e5..aa030e1f7cdae 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
 
 	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
 
-	return drm_sched_init(&pipe->base, &lima_sched_ops,
+	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL,
 			      DRM_SCHED_PRIORITY_COUNT,
 			      1,
 			      lima_job_hang_limit,
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 95257ab0185dc..4968568e3b546 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
 	 /* currently managing hangcheck ourselves: */
 	sched_timeout = MAX_SCHEDULE_TIMEOUT;
 
-	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
+	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
 			     DRM_SCHED_PRIORITY_COUNT,
 			     num_hw_submissions, 0, sched_timeout,
 			     NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index e62b04109b2f7..7e64b5ef90fb2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
 	if (!drm->sched_wq)
 		return -ENOMEM;
 
-	return drm_sched_init(sched, &nouveau_sched_ops,
+	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
 			      DRM_SCHED_PRIORITY_COUNT,
 			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
 			      NULL, NULL, "nouveau_sched", drm->dev->dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index ecd2e035147fd..6d89e24322dbf 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -852,7 +852,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
 		js->queue[j].fence_context = dma_fence_context_alloc(1);
 
 		ret = drm_sched_init(&js->queue[j].sched,
-				     &panfrost_sched_ops,
+				     &panfrost_sched_ops, NULL,
 				     DRM_SCHED_PRIORITY_COUNT,
 				     nentries, 0,
 				     msecs_to_jiffies(JOB_TIMEOUT_MS),
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 54c1c5fe01ba9..d1ae05bded15c 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -48,7 +48,6 @@
  * through the jobs entity pointer.
  */
 
-#include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
@@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
 }
 
+/**
+ * drm_sched_run_job_queue - enqueue run-job work
+ * @sched: scheduler instance
+ */
+static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->pause_submit))
+		queue_work(sched->submit_wq, &sched->work_run_job);
+}
+
 /**
  * drm_sched_job_done - complete a job
  * @s_job: pointer to the job which is done
@@ -275,7 +284,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
 	dma_fence_get(&s_fence->finished);
 	drm_sched_fence_finished(s_fence, result);
 	dma_fence_put(&s_fence->finished);
-	wake_up_interruptible(&sched->wake_up_worker);
+	drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -874,7 +883,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
 void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
 {
 	if (drm_sched_can_queue(sched))
-		wake_up_interruptible(&sched->wake_up_worker);
+		drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -985,60 +994,41 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 EXPORT_SYMBOL(drm_sched_pick_best);
 
 /**
- * drm_sched_blocked - check if the scheduler is blocked
+ * drm_sched_run_job_work - main scheduler thread
  *
- * @sched: scheduler instance
- *
- * Returns true if blocked, otherwise false.
+ * @w: run job work
  */
-static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
+static void drm_sched_run_job_work(struct work_struct *w)
 {
-	if (kthread_should_park()) {
-		kthread_parkme();
-		return true;
-	}
-
-	return false;
-}
-
-/**
- * drm_sched_main - main scheduler thread
- *
- * @param: scheduler instance
- *
- * Returns 0.
- */
-static int drm_sched_main(void *param)
-{
-	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
+	struct drm_gpu_scheduler *sched =
+		container_of(w, struct drm_gpu_scheduler, work_run_job);
+	struct drm_sched_entity *entity;
+	struct drm_sched_job *cleanup_job;
 	int r;
 
-	sched_set_fifo_low(current);
+	if (READ_ONCE(sched->pause_submit))
+		return;
 
-	while (!kthread_should_stop()) {
-		struct drm_sched_entity *entity = NULL;
-		struct drm_sched_fence *s_fence;
-		struct drm_sched_job *sched_job;
-		struct dma_fence *fence;
-		struct drm_sched_job *cleanup_job = NULL;
+	cleanup_job = drm_sched_get_cleanup_job(sched);
+	entity = drm_sched_select_entity(sched);
 
-		wait_event_interruptible(sched->wake_up_worker,
-					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
-					 (!drm_sched_blocked(sched) &&
-					  (entity = drm_sched_select_entity(sched))) ||
-					 kthread_should_stop());
+	if (!entity && !cleanup_job)
+		return;	/* No more work */
 
-		if (cleanup_job)
-			sched->ops->free_job(cleanup_job);
+	if (cleanup_job)
+		sched->ops->free_job(cleanup_job);
 
-		if (!entity)
-			continue;
+	if (entity) {
+		struct dma_fence *fence;
+		struct drm_sched_fence *s_fence;
+		struct drm_sched_job *sched_job;
 
 		sched_job = drm_sched_entity_pop_job(entity);
-
 		if (!sched_job) {
 			complete_all(&entity->entity_idle);
-			continue;
+			if (!cleanup_job)
+				return;	/* No more work */
+			goto again;
 		}
 
 		s_fence = sched_job->s_fence;
@@ -1069,7 +1059,9 @@ static int drm_sched_main(void *param)
 
 		wake_up(&sched->job_scheduled);
 	}
-	return 0;
+
+again:
+	drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -1077,6 +1069,8 @@ static int drm_sched_main(void *param)
  *
  * @sched: scheduler instance
  * @ops: backend operations for this scheduler
+ * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
+ *	       allocated and used
  * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
  * @hw_submission: number of hw submissions that can be in flight
  * @hang_limit: number of times to allow a job to hang before dropping it
@@ -1091,6 +1085,7 @@ static int drm_sched_main(void *param)
  */
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
+		   struct workqueue_struct *submit_wq,
 		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
 		   long timeout, struct workqueue_struct *timeout_wq,
 		   atomic_t *score, const char *name, struct device *dev)
@@ -1121,14 +1116,22 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		return 0;
 	}
 
+	if (submit_wq) {
+		sched->submit_wq = submit_wq;
+		sched->own_submit_wq = false;
+	} else {
+		sched->submit_wq = alloc_ordered_workqueue(name, 0);
+		if (!sched->submit_wq)
+			return -ENOMEM;
+
+		sched->own_submit_wq = true;
+	}
+	ret = -ENOMEM;
 	sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
 					GFP_KERNEL | __GFP_ZERO);
-	if (!sched->sched_rq) {
-		drm_err(sched, "%s: out of memory for sched_rq\n", __func__);
-		return -ENOMEM;
-	}
+	if (!sched->sched_rq)
+		goto Out_free;
 	sched->num_rqs = num_rqs;
-	ret = -ENOMEM;
 	for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
 		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
 		if (!sched->sched_rq[i])
@@ -1136,31 +1139,26 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		drm_sched_rq_init(sched, sched->sched_rq[i]);
 	}
 
-	init_waitqueue_head(&sched->wake_up_worker);
 	init_waitqueue_head(&sched->job_scheduled);
 	INIT_LIST_HEAD(&sched->pending_list);
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
+	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
 	atomic_set(&sched->_score, 0);
 	atomic64_set(&sched->job_id_count, 0);
-
-	/* Each scheduler will run on a seperate kernel thread */
-	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
-	if (IS_ERR(sched->thread)) {
-		ret = PTR_ERR(sched->thread);
-		sched->thread = NULL;
-		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
-		goto Out_unroll;
-	}
+	sched->pause_submit = false;
 
 	sched->ready = true;
 	return 0;
 Out_unroll:
 	for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
 		kfree(sched->sched_rq[i]);
+Out_free:
 	kfree(sched->sched_rq);
 	sched->sched_rq = NULL;
+	if (sched->own_submit_wq)
+		destroy_workqueue(sched->submit_wq);
 	drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
 	return ret;
 }
@@ -1178,8 +1176,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 	struct drm_sched_entity *s_entity;
 	int i;
 
-	if (sched->thread)
-		kthread_stop(sched->thread);
+	drm_sched_wqueue_stop(sched);
 
 	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 		struct drm_sched_rq *rq = sched->sched_rq[i];
@@ -1202,6 +1199,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 	/* Confirm no work left behind accessing device structures */
 	cancel_delayed_work_sync(&sched->work_tdr);
 
+	if (sched->own_submit_wq)
+		destroy_workqueue(sched->submit_wq);
 	sched->ready = false;
 	kfree(sched->sched_rq);
 	sched->sched_rq = NULL;
@@ -1262,7 +1261,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
  */
 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
 {
-	return !!sched->thread;
+	return sched->ready;
 }
 EXPORT_SYMBOL(drm_sched_wqueue_ready);
 
@@ -1273,7 +1272,8 @@ EXPORT_SYMBOL(drm_sched_wqueue_ready);
  */
 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
 {
-	kthread_park(sched->thread);
+	WRITE_ONCE(sched->pause_submit, true);
+	cancel_work_sync(&sched->work_run_job);
 }
 EXPORT_SYMBOL(drm_sched_wqueue_stop);
 
@@ -1284,6 +1284,7 @@ EXPORT_SYMBOL(drm_sched_wqueue_stop);
  */
 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
 {
-	kthread_unpark(sched->thread);
+	WRITE_ONCE(sched->pause_submit, false);
+	queue_work(sched->submit_wq, &sched->work_run_job);
 }
 EXPORT_SYMBOL(drm_sched_wqueue_start);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 038e1ae589c71..0b6696b0d882f 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 	int ret;
 
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
-			     &v3d_bin_sched_ops,
+			     &v3d_bin_sched_ops, NULL,
 			     DRM_SCHED_PRIORITY_COUNT,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms), NULL,
@@ -397,7 +397,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 		return ret;
 
 	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
-			     &v3d_render_sched_ops,
+			     &v3d_render_sched_ops, NULL,
 			     DRM_SCHED_PRIORITY_COUNT,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms), NULL,
@@ -406,7 +406,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 		goto fail;
 
 	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
-			     &v3d_tfu_sched_ops,
+			     &v3d_tfu_sched_ops, NULL,
 			     DRM_SCHED_PRIORITY_COUNT,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms), NULL,
@@ -416,7 +416,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 
 	if (v3d_has_csd(v3d)) {
 		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
-				     &v3d_csd_sched_ops,
+				     &v3d_csd_sched_ops, NULL,
 				     DRM_SCHED_PRIORITY_COUNT,
 				     hw_jobs_limit, job_hang_limit,
 				     msecs_to_jiffies(hang_limit_ms), NULL,
@@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 			goto fail;
 
 		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
-				     &v3d_cache_clean_sched_ops,
+				     &v3d_cache_clean_sched_ops, NULL,
 				     DRM_SCHED_PRIORITY_COUNT,
 				     hw_jobs_limit, job_hang_limit,
 				     msecs_to_jiffies(hang_limit_ms), NULL,
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 7e622c18d3365..4bf94aca26316 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -475,17 +475,16 @@ struct drm_sched_backend_ops {
  * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
  *           as there's usually one run-queue per priority, but could be less.
  * @sched_rq: An allocated array of run-queues of size @num_rqs;
- * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
- *                  is ready to be scheduled.
  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
  *                 waits on this wait queue until all the scheduled jobs are
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
+ * @submit_wq: workqueue used to queue @work_run_job
  * @timeout_wq: workqueue used to queue @work_tdr
+ * @work_run_job: work which calls run_job op of each scheduler.
  * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
  *            timeout interval is over.
- * @thread: the kthread on which the scheduler which run.
  * @pending_list: the list of jobs which are currently in the job queue.
  * @job_list_lock: lock to protect the pending_list.
  * @hang_limit: once the hangs by a job crosses this limit then it is marked
@@ -494,6 +493,8 @@ struct drm_sched_backend_ops {
  * @_score: score used when the driver doesn't provide one
  * @ready: marks if the underlying HW is ready to work
  * @free_guilty: A hit to time out handler to free the guilty job.
+ * @pause_submit: pause queuing of @work_run_job on @submit_wq
+ * @own_submit_wq: scheduler owns allocation of @submit_wq
  * @dev: system &struct device
  *
  * One scheduler is implemented for each hardware ring.
@@ -505,13 +506,13 @@ struct drm_gpu_scheduler {
 	const char			*name;
 	u32                             num_rqs;
 	struct drm_sched_rq             **sched_rq;
-	wait_queue_head_t		wake_up_worker;
 	wait_queue_head_t		job_scheduled;
 	atomic_t			hw_rq_count;
 	atomic64_t			job_id_count;
+	struct workqueue_struct		*submit_wq;
 	struct workqueue_struct		*timeout_wq;
+	struct work_struct		work_run_job;
 	struct delayed_work		work_tdr;
-	struct task_struct		*thread;
 	struct list_head		pending_list;
 	spinlock_t			job_list_lock;
 	int				hang_limit;
@@ -519,11 +520,14 @@ struct drm_gpu_scheduler {
 	atomic_t                        _score;
 	bool				ready;
 	bool				free_guilty;
+	bool				pause_submit;
+	bool				own_submit_wq;
 	struct device			*dev;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
+		   struct workqueue_struct *submit_wq,
 		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
 		   long timeout, struct workqueue_struct *timeout_wq,
 		   atomic_t *score, const char *name, struct device *dev);
-- 
GitLab


From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:37 -0700
Subject: [PATCH 0108/2340] drm/sched: Split free_job into own work item

Rather than call free_job and run_job in same work item have a dedicated
work item for each. This aligns with the design and intended use of work
queues.

v2:
   - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
     timestamp in free_job() work item (Danilo)
v3:
  - Drop forward dec of drm_sched_select_entity (Boris)
  - Return in drm_sched_run_job_work if entity NULL (Boris)
v4:
  - Replace dequeue with peek and invert logic (Luben)
  - Wrap to 100 lines (Luben)
  - Update comments for *_queue / *_queue_if_ready functions (Luben)
v5:
  - Drop peek argument, blindly reinit idle (Luben)
  - s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben)
  - Update work_run_job & work_free_job kernel doc (Luben)
v6:
  - Do not move drm_sched_select_entity in file (Luben)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 146 +++++++++++++++++--------
 include/drm/gpu_scheduler.h            |   4 +-
 2 files changed, 101 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index d1ae05bded15c..3b1b2f8eafe83 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -265,6 +265,32 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
 		queue_work(sched->submit_wq, &sched->work_run_job);
 }
 
+/**
+ * drm_sched_free_job_queue - enqueue free-job work
+ * @sched: scheduler instance
+ */
+static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->pause_submit))
+		queue_work(sched->submit_wq, &sched->work_free_job);
+}
+
+/**
+ * drm_sched_free_job_queue_if_done - enqueue free-job work if ready
+ * @sched: scheduler instance
+ */
+static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched)
+{
+	struct drm_sched_job *job;
+
+	spin_lock(&sched->job_list_lock);
+	job = list_first_entry_or_null(&sched->pending_list,
+				       struct drm_sched_job, list);
+	if (job && dma_fence_is_signaled(&job->s_fence->finished))
+		drm_sched_free_job_queue(sched);
+	spin_unlock(&sched->job_list_lock);
+}
+
 /**
  * drm_sched_job_done - complete a job
  * @s_job: pointer to the job which is done
@@ -284,7 +310,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
 	dma_fence_get(&s_fence->finished);
 	drm_sched_fence_finished(s_fence, result);
 	dma_fence_put(&s_fence->finished);
-	drm_sched_run_job_queue(sched);
+	drm_sched_free_job_queue(sched);
 }
 
 /**
@@ -943,8 +969,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 						typeof(*next), list);
 
 		if (next) {
-			next->s_fence->scheduled.timestamp =
-				dma_fence_timestamp(&job->s_fence->finished);
+			if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
+				     &next->s_fence->scheduled.flags))
+				next->s_fence->scheduled.timestamp =
+					dma_fence_timestamp(&job->s_fence->finished);
 			/* start TO timer for next job */
 			drm_sched_start_timeout(sched);
 		}
@@ -994,7 +1022,40 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 EXPORT_SYMBOL(drm_sched_pick_best);
 
 /**
- * drm_sched_run_job_work - main scheduler thread
+ * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready
+ * @sched: scheduler instance
+ */
+static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched)
+{
+	if (drm_sched_select_entity(sched))
+		drm_sched_run_job_queue(sched);
+}
+
+/**
+ * drm_sched_free_job_work - worker to call free_job
+ *
+ * @w: free job work
+ */
+static void drm_sched_free_job_work(struct work_struct *w)
+{
+	struct drm_gpu_scheduler *sched =
+		container_of(w, struct drm_gpu_scheduler, work_free_job);
+	struct drm_sched_job *cleanup_job;
+
+	if (READ_ONCE(sched->pause_submit))
+		return;
+
+	cleanup_job = drm_sched_get_cleanup_job(sched);
+	if (cleanup_job) {
+		sched->ops->free_job(cleanup_job);
+
+		drm_sched_free_job_queue_if_done(sched);
+		drm_sched_run_job_queue_if_ready(sched);
+	}
+}
+
+/**
+ * drm_sched_run_job_work - worker to call run_job
  *
  * @w: run job work
  */
@@ -1003,65 +1064,51 @@ static void drm_sched_run_job_work(struct work_struct *w)
 	struct drm_gpu_scheduler *sched =
 		container_of(w, struct drm_gpu_scheduler, work_run_job);
 	struct drm_sched_entity *entity;
-	struct drm_sched_job *cleanup_job;
+	struct dma_fence *fence;
+	struct drm_sched_fence *s_fence;
+	struct drm_sched_job *sched_job;
 	int r;
 
 	if (READ_ONCE(sched->pause_submit))
 		return;
 
-	cleanup_job = drm_sched_get_cleanup_job(sched);
 	entity = drm_sched_select_entity(sched);
+	if (!entity)
+		return;
 
-	if (!entity && !cleanup_job)
+	sched_job = drm_sched_entity_pop_job(entity);
+	if (!sched_job) {
+		complete_all(&entity->entity_idle);
 		return;	/* No more work */
+	}
 
-	if (cleanup_job)
-		sched->ops->free_job(cleanup_job);
-
-	if (entity) {
-		struct dma_fence *fence;
-		struct drm_sched_fence *s_fence;
-		struct drm_sched_job *sched_job;
-
-		sched_job = drm_sched_entity_pop_job(entity);
-		if (!sched_job) {
-			complete_all(&entity->entity_idle);
-			if (!cleanup_job)
-				return;	/* No more work */
-			goto again;
-		}
-
-		s_fence = sched_job->s_fence;
-
-		atomic_inc(&sched->hw_rq_count);
-		drm_sched_job_begin(sched_job);
+	s_fence = sched_job->s_fence;
 
-		trace_drm_run_job(sched_job, entity);
-		fence = sched->ops->run_job(sched_job);
-		complete_all(&entity->entity_idle);
-		drm_sched_fence_scheduled(s_fence, fence);
+	atomic_inc(&sched->hw_rq_count);
+	drm_sched_job_begin(sched_job);
 
-		if (!IS_ERR_OR_NULL(fence)) {
-			/* Drop for original kref_init of the fence */
-			dma_fence_put(fence);
+	trace_drm_run_job(sched_job, entity);
+	fence = sched->ops->run_job(sched_job);
+	complete_all(&entity->entity_idle);
+	drm_sched_fence_scheduled(s_fence, fence);
 
-			r = dma_fence_add_callback(fence, &sched_job->cb,
-						   drm_sched_job_done_cb);
-			if (r == -ENOENT)
-				drm_sched_job_done(sched_job, fence->error);
-			else if (r)
-				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
-					  r);
-		} else {
-			drm_sched_job_done(sched_job, IS_ERR(fence) ?
-					   PTR_ERR(fence) : 0);
-		}
+	if (!IS_ERR_OR_NULL(fence)) {
+		/* Drop for original kref_init of the fence */
+		dma_fence_put(fence);
 
-		wake_up(&sched->job_scheduled);
+		r = dma_fence_add_callback(fence, &sched_job->cb,
+					   drm_sched_job_done_cb);
+		if (r == -ENOENT)
+			drm_sched_job_done(sched_job, fence->error);
+		else if (r)
+			DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
+	} else {
+		drm_sched_job_done(sched_job, IS_ERR(fence) ?
+				   PTR_ERR(fence) : 0);
 	}
 
-again:
-	drm_sched_run_job_queue(sched);
+	wake_up(&sched->job_scheduled);
+	drm_sched_run_job_queue_if_ready(sched);
 }
 
 /**
@@ -1145,6 +1192,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	atomic_set(&sched->hw_rq_count, 0);
 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
+	INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
 	atomic_set(&sched->_score, 0);
 	atomic64_set(&sched->job_id_count, 0);
 	sched->pause_submit = false;
@@ -1274,6 +1322,7 @@ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
 {
 	WRITE_ONCE(sched->pause_submit, true);
 	cancel_work_sync(&sched->work_run_job);
+	cancel_work_sync(&sched->work_free_job);
 }
 EXPORT_SYMBOL(drm_sched_wqueue_stop);
 
@@ -1286,5 +1335,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
 {
 	WRITE_ONCE(sched->pause_submit, false);
 	queue_work(sched->submit_wq, &sched->work_run_job);
+	queue_work(sched->submit_wq, &sched->work_free_job);
 }
 EXPORT_SYMBOL(drm_sched_wqueue_start);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 4bf94aca26316..6655aeeef9bb2 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -480,9 +480,10 @@ struct drm_sched_backend_ops {
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
- * @submit_wq: workqueue used to queue @work_run_job
+ * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
  * @timeout_wq: workqueue used to queue @work_tdr
  * @work_run_job: work which calls run_job op of each scheduler.
+ * @work_free_job: work which calls free_job op of each scheduler.
  * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
  *            timeout interval is over.
  * @pending_list: the list of jobs which are currently in the job queue.
@@ -512,6 +513,7 @@ struct drm_gpu_scheduler {
 	struct workqueue_struct		*submit_wq;
 	struct workqueue_struct		*timeout_wq;
 	struct work_struct		work_run_job;
+	struct work_struct		work_free_job;
 	struct delayed_work		work_tdr;
 	struct list_head		pending_list;
 	spinlock_t			job_list_lock;
-- 
GitLab


From 7a36dcfa16a5a7a87f65e03e1a3eb2b5e2fca812 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:38 -0700
Subject: [PATCH 0109/2340] drm/sched: Add drm_sched_start_timeout_unlocked
 helper

Also add a lockdep assert to drm_sched_start_timeout.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-5-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 3b1b2f8eafe83..fc387de5a0c75 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -334,11 +334,20 @@ static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
  */
 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 {
+	lockdep_assert_held(&sched->job_list_lock);
+
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    !list_empty(&sched->pending_list))
 		queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
 }
 
+static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
+{
+	spin_lock(&sched->job_list_lock);
+	drm_sched_start_timeout(sched);
+	spin_unlock(&sched->job_list_lock);
+}
+
 /**
  * drm_sched_fault - immediately start timeout handler
  *
@@ -451,11 +460,8 @@ static void drm_sched_job_timedout(struct work_struct *work)
 		spin_unlock(&sched->job_list_lock);
 	}
 
-	if (status != DRM_GPU_SCHED_STAT_ENODEV) {
-		spin_lock(&sched->job_list_lock);
-		drm_sched_start_timeout(sched);
-		spin_unlock(&sched->job_list_lock);
-	}
+	if (status != DRM_GPU_SCHED_STAT_ENODEV)
+		drm_sched_start_timeout_unlocked(sched);
 }
 
 /**
@@ -581,11 +587,8 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 			drm_sched_job_done(s_job, -ECANCELED);
 	}
 
-	if (full_recovery) {
-		spin_lock(&sched->job_list_lock);
-		drm_sched_start_timeout(sched);
-		spin_unlock(&sched->job_list_lock);
-	}
+	if (full_recovery)
+		drm_sched_start_timeout_unlocked(sched);
 
 	drm_sched_wqueue_start(sched);
 }
-- 
GitLab


From 3c6c7ca4508b6cb1a033ac954c50a1b2c97af883 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Oct 2023 20:24:39 -0700
Subject: [PATCH 0110/2340] drm/sched: Add a helper to queue TDR immediately

Add a helper whereby a driver can invoke TDR immediately.

v2:
 - Drop timeout args, rename function, use mod delayed work (Luben)
v3:
 - s/XE/Xe (Luben)
 - present tense in commit message (Luben)
 - Adjust comment for drm_sched_tdr_queue_imm (Luben)
v4:
 - Adjust commit message (Luben)

Cc: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20231031032439.1558703-6-matthew.brost@intel.com
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 18 +++++++++++++++++-
 include/drm/gpu_scheduler.h            |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fc387de5a0c75..98b2ad54fc707 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -338,7 +338,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    !list_empty(&sched->pending_list))
-		queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
+		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
 }
 
 static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
@@ -348,6 +348,22 @@ static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
 	spin_unlock(&sched->job_list_lock);
 }
 
+/**
+ * drm_sched_tdr_queue_imm: - immediately start job timeout handler
+ *
+ * @sched: scheduler for which the timeout handling should be started.
+ *
+ * Start timeout handling immediately for the named scheduler.
+ */
+void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
+{
+	spin_lock(&sched->job_list_lock);
+	sched->timeout = 0;
+	drm_sched_start_timeout(sched);
+	spin_unlock(&sched->job_list_lock);
+}
+EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
+
 /**
  * drm_sched_fault - immediately start timeout handler
  *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 6655aeeef9bb2..9daa78d2c04c1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -557,6 +557,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
 				    struct drm_gpu_scheduler **sched_list,
                                    unsigned int num_sched_list);
 
+void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
 void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
-- 
GitLab


From 43dea469e99b10ecc967a3576e50a5d416daf13c Mon Sep 17 00:00:00 2001
From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Date: Sat, 28 Oct 2023 01:20:52 +0530
Subject: [PATCH 0111/2340] drm/i915/mtl: Add Wa_14019821291

This workaround is primarily implemented by the BIOS.  However if the
BIOS applies the workaround it will reserve a small piece of our DSM
(which should be at the top, right below the WOPCM); we just need to
keep that region reserved so that nothing else attempts to re-use it.

v2: Declare regs in intel_gt_regs.h (Matt Roper)

v3: Shift WA implementation before calculation of *base (Matt Roper)

v4:
-  Change condition gscpmi base to be fall in DSM range.(Matt Roper)

Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027195052.3676632-1-dnyaneshwar.bhadane@intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h    |  3 +++
 2 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 1a766d8e7ccec..8c88075eeab23 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -386,6 +386,27 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 
 	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
+	/* Wa_14019821291 */
+	if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+		/*
+		 * This workaround is primarily implemented by the BIOS.  We
+		 * just need to figure out whether the BIOS has applied the
+		 * workaround (meaning the programmed address falls within
+		 * the DSM) and, if so, reserve that part of the DSM to
+		 * prevent accidental reuse.  The DSM location should be just
+		 * below the WOPCM.
+		 */
+		u64 gscpsmi_base = intel_uncore_read64_2x32(uncore,
+							    MTL_GSCPSMI_BASEADDR_LSB,
+							    MTL_GSCPSMI_BASEADDR_MSB);
+		if (gscpsmi_base >= i915->dsm.stolen.start &&
+		    gscpsmi_base < i915->dsm.stolen.end) {
+			*base = gscpsmi_base;
+			*size = i915->dsm.stolen.end - gscpsmi_base;
+			return;
+		}
+	}
+
 	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
 	case GEN8_STOLEN_RESERVED_1M:
 		*size = 1024 * 1024;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index eecd0a87a6478..9de41703fae58 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -537,6 +537,9 @@
 #define XEHP_SQCM				MCR_REG(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
+#define MTL_GSCPSMI_BASEADDR_LSB		_MMIO(0x880c)
+#define MTL_GSCPSMI_BASEADDR_MSB		_MMIO(0x8810)
+
 #define HSW_IDICR				_MMIO(0x9008)
 #define   IDIHASHMSK(x)				(((x) & 0x3f) << 16)
 
-- 
GitLab


From d59cf7bb73f3c702112a5a07824254345b7d089f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 31 Oct 2023 10:45:57 +0200
Subject: [PATCH 0112/2340] drm/i915/display: Use dma_fence interfaces instead
 of i915_sw_fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are preparing for Xe driver. Xe driver doesn't have i915_sw_fence
implementation. Lets drop i915_sw_fence usage from display code and
use dma_fence interfaces directly.

For this purpose stack dma fences from related objects into new plane
state. Drm_gem_plane_helper_prepare_fb can be used for fences in new
fb. Separate local implementation is used for Stacking fences from old fb
into new plane state. Then wait for these stacked fences during atomic
commit. There is no be need for separate GPU reset handling in
intel_atomic_commit_fence_wait as the fences are signaled when GPU hang is
detected and GPU is being reset.

v4:
  - Drop to_new_plane_state suffix from add_dma_resv_fences
  - Use dma_resv_usage_rw(false) (DMA_RESV_USAGE_WRITE)
v3:
  - Rename add_fences and it's parameters
  - Remove signaled check
  - Remove waiting old_plane_state fences
v2:
  - Add fences from old fb into new_plane_state->uapi.fence rather than
    into old_plane_state->uapi.fence

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031084557.1181630-1-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_atomic.c   |  3 -
 .../gpu/drm/i915/display/intel_atomic_plane.c | 83 +++++++++++--------
 drivers/gpu/drm/i915/display/intel_display.c  | 68 ++++-----------
 .../drm/i915/display/intel_display_types.h    |  2 -
 4 files changed, 61 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 5d18145da2790..ec0d5168b5035 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -331,9 +331,6 @@ void intel_atomic_state_free(struct drm_atomic_state *_state)
 
 	drm_atomic_state_default_release(&state->base);
 	kfree(state->global_objs);
-
-	i915_sw_fence_fini(&state->commit_ready);
-
 	kfree(state);
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index b1074350616cc..3b9a669234227 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -31,7 +31,10 @@
  * prepare/check/commit/cleanup steps.
  */
 
+#include <linux/dma-fence-chain.h>
+
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_blend.h>
 #include <drm/drm_fourcc.h>
 
@@ -1012,6 +1015,41 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state)
 	return 0;
 }
 
+static int add_dma_resv_fences(struct dma_resv *resv,
+			       struct drm_plane_state *new_plane_state)
+{
+	struct dma_fence *fence = dma_fence_get(new_plane_state->fence);
+	struct dma_fence *new;
+	int ret;
+
+	ret = dma_resv_get_singleton(resv, dma_resv_usage_rw(false), &new);
+	if (ret)
+		goto error;
+
+	if (new && fence) {
+		struct dma_fence_chain *chain = dma_fence_chain_alloc();
+
+		if (!chain) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		dma_fence_chain_init(chain, fence, new, 1);
+		fence = &chain->base;
+
+	} else if (new) {
+		fence = new;
+	}
+
+	dma_fence_put(new_plane_state->fence);
+	new_plane_state->fence = fence;
+	return 0;
+
+error:
+	dma_fence_put(fence);
+	return ret;
+}
+
 /**
  * intel_prepare_plane_fb - Prepare fb for usage on plane
  * @_plane: drm plane to prepare for
@@ -1035,7 +1073,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(new_plane_state->uapi.state);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct intel_plane_state *old_plane_state =
+	struct intel_plane_state *old_plane_state =
 		intel_atomic_get_old_plane_state(state, plane);
 	struct drm_i915_gem_object *obj = intel_fb_obj(new_plane_state->hw.fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_plane_state->hw.fb);
@@ -1058,55 +1096,28 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 		 * can safely continue.
 		 */
 		if (new_crtc_state && intel_crtc_needs_modeset(new_crtc_state)) {
-			ret = i915_sw_fence_await_reservation(&state->commit_ready,
-							      old_obj->base.resv,
-							      false, 0,
-							      GFP_KERNEL);
+			ret = add_dma_resv_fences(old_obj->base.resv,
+						  &new_plane_state->uapi);
 			if (ret < 0)
 				return ret;
 		}
 	}
 
-	if (new_plane_state->uapi.fence) { /* explicit fencing */
-		i915_gem_fence_wait_priority(new_plane_state->uapi.fence,
-					     &attr);
-		ret = i915_sw_fence_await_dma_fence(&state->commit_ready,
-						    new_plane_state->uapi.fence,
-						    i915_fence_timeout(dev_priv),
-						    GFP_KERNEL);
-		if (ret < 0)
-			return ret;
-	}
-
 	if (!obj)
 		return 0;
 
-
 	ret = intel_plane_pin_fb(new_plane_state);
 	if (ret)
 		return ret;
 
-	i915_gem_object_wait_priority(obj, 0, &attr);
+	ret = drm_gem_plane_helper_prepare_fb(&plane->base, &new_plane_state->uapi);
+	if (ret < 0)
+		goto unpin_fb;
 
-	if (!new_plane_state->uapi.fence) { /* implicit fencing */
-		struct dma_resv_iter cursor;
-		struct dma_fence *fence;
-
-		ret = i915_sw_fence_await_reservation(&state->commit_ready,
-						      obj->base.resv, false,
-						      i915_fence_timeout(dev_priv),
-						      GFP_KERNEL);
-		if (ret < 0)
-			goto unpin_fb;
+	if (new_plane_state->uapi.fence) {
+		i915_gem_fence_wait_priority(new_plane_state->uapi.fence,
+					     &attr);
 
-		dma_resv_iter_begin(&cursor, obj->base.resv,
-				    DMA_RESV_USAGE_WRITE);
-		dma_resv_for_each_fence_unlocked(&cursor, fence) {
-			intel_display_rps_boost_after_vblank(new_plane_state->hw.crtc,
-							     fence);
-		}
-		dma_resv_iter_end(&cursor);
-	} else {
 		intel_display_rps_boost_after_vblank(new_plane_state->hw.crtc,
 						     new_plane_state->uapi.fence);
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index cdec0425f519f..49ded4e328831 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -48,6 +48,7 @@
 #include "g4x_dp.h"
 #include "g4x_hdmi.h"
 #include "hsw_ips.h"
+#include "i915_config.h"
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "i915_utils.h"
@@ -7057,29 +7058,22 @@ void intel_atomic_helper_free_state_worker(struct work_struct *work)
 
 static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state)
 {
-	struct wait_queue_entry wait_fence, wait_reset;
-	struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev);
-
-	init_wait_entry(&wait_fence, 0);
-	init_wait_entry(&wait_reset, 0);
-	for (;;) {
-		prepare_to_wait(&intel_state->commit_ready.wait,
-				&wait_fence, TASK_UNINTERRUPTIBLE);
-		prepare_to_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags,
-					      I915_RESET_MODESET),
-				&wait_reset, TASK_UNINTERRUPTIBLE);
-
+	struct drm_i915_private *i915 = to_i915(intel_state->base.dev);
+	struct drm_plane *plane;
+	struct drm_plane_state *new_plane_state;
+	int ret, i;
 
-		if (i915_sw_fence_done(&intel_state->commit_ready) ||
-		    test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags))
-			break;
+	for_each_new_plane_in_state(&intel_state->base, plane, new_plane_state, i) {
+		if (new_plane_state->fence) {
+			ret = dma_fence_wait_timeout(new_plane_state->fence, false,
+						     i915_fence_timeout(i915));
+			if (ret <= 0)
+				break;
 
-		schedule();
+			dma_fence_put(new_plane_state->fence);
+			new_plane_state->fence = NULL;
+		}
 	}
-	finish_wait(&intel_state->commit_ready.wait, &wait_fence);
-	finish_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags,
-				  I915_RESET_MODESET),
-		    &wait_reset);
 }
 
 static void intel_atomic_cleanup_work(struct work_struct *work)
@@ -7371,32 +7365,6 @@ static void intel_atomic_commit_work(struct work_struct *work)
 	intel_atomic_commit_tail(state);
 }
 
-static int
-intel_atomic_commit_ready(struct i915_sw_fence *fence,
-			  enum i915_sw_fence_notify notify)
-{
-	struct intel_atomic_state *state =
-		container_of(fence, struct intel_atomic_state, commit_ready);
-
-	switch (notify) {
-	case FENCE_COMPLETE:
-		/* we do blocking waits in the worker, nothing to do here */
-		break;
-	case FENCE_FREE:
-		{
-			struct drm_i915_private *i915 = to_i915(state->base.dev);
-			struct intel_atomic_helper *helper =
-				&i915->display.atomic_helper;
-
-			if (llist_add(&state->freed, &helper->free_list))
-				queue_work(i915->unordered_wq, &helper->free_work);
-			break;
-		}
-	}
-
-	return NOTIFY_DONE;
-}
-
 static void intel_atomic_track_fbs(struct intel_atomic_state *state)
 {
 	struct intel_plane_state *old_plane_state, *new_plane_state;
@@ -7419,10 +7387,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state,
 
 	state->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
-	drm_atomic_state_get(&state->base);
-	i915_sw_fence_init(&state->commit_ready,
-			   intel_atomic_commit_ready);
-
 	/*
 	 * The intel_legacy_cursor_update() fast path takes care
 	 * of avoiding the vblank waits for simple cursor
@@ -7455,7 +7419,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state,
 	if (ret) {
 		drm_dbg_atomic(&dev_priv->drm,
 			       "Preparing state failed with %i\n", ret);
-		i915_sw_fence_commit(&state->commit_ready);
 		intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref);
 		return ret;
 	}
@@ -7471,8 +7434,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state,
 		struct intel_crtc *crtc;
 		int i;
 
-		i915_sw_fence_commit(&state->commit_ready);
-
 		for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
 			intel_color_cleanup_commit(new_crtc_state);
 
@@ -7486,7 +7447,6 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state,
 	drm_atomic_state_get(&state->base);
 	INIT_WORK(&state->base.commit_work, intel_atomic_commit_work);
 
-	i915_sw_fence_commit(&state->commit_ready);
 	if (nonblock && state->modeset) {
 		queue_work(dev_priv->display.wq.modeset, &state->base.commit_work);
 	} else if (nonblock) {
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 65ea37fe8cff3..047fe3f8905ac 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -676,8 +676,6 @@ struct intel_atomic_state {
 
 	bool rps_interactive;
 
-	struct i915_sw_fence commit_ready;
-
 	struct llist_node freed;
 };
 
-- 
GitLab


From 8ddfc01ace51c85a2333fb9a9cbea34d9f87885d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 1 Nov 2023 18:20:16 +0100
Subject: [PATCH 0113/2340] fbdev/simplefb: Support memory-region property

The simple-framebuffer bindings specify that the "memory-region"
property can be used as an alternative to the "reg" property to define
the framebuffer memory used by the display hardware. Implement support
for this in the simplefb driver.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101172017.3872242-2-thierry.reding@gmail.com
---
 drivers/video/fbdev/simplefb.c | 35 +++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index 62f99f6fccd3c..18025f34fde71 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -21,6 +21,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_clk.h>
 #include <linux/of_platform.h>
 #include <linux/parser.h>
@@ -121,12 +122,13 @@ struct simplefb_params {
 	u32 height;
 	u32 stride;
 	struct simplefb_format *format;
+	struct resource memory;
 };
 
 static int simplefb_parse_dt(struct platform_device *pdev,
 			   struct simplefb_params *params)
 {
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = pdev->dev.of_node, *mem;
 	int ret;
 	const char *format;
 	int i;
@@ -166,6 +168,23 @@ static int simplefb_parse_dt(struct platform_device *pdev,
 		return -EINVAL;
 	}
 
+	mem = of_parse_phandle(np, "memory-region", 0);
+	if (mem) {
+		ret = of_address_to_resource(mem, 0, &params->memory);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "failed to parse memory-region\n");
+			of_node_put(mem);
+			return ret;
+		}
+
+		if (of_property_present(np, "reg"))
+			dev_warn(&pdev->dev, "preferring \"memory-region\" over \"reg\" property\n");
+
+		of_node_put(mem);
+	} else {
+		memset(&params->memory, 0, sizeof(params->memory));
+	}
+
 	return 0;
 }
 
@@ -193,6 +212,8 @@ static int simplefb_parse_pd(struct platform_device *pdev,
 		return -EINVAL;
 	}
 
+	memset(&params->memory, 0, sizeof(params->memory));
+
 	return 0;
 }
 
@@ -431,10 +452,14 @@ static int simplefb_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "No memory resource\n");
-		return -EINVAL;
+	if (params.memory.start == 0 && params.memory.end == 0) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!res) {
+			dev_err(&pdev->dev, "No memory resource\n");
+			return -EINVAL;
+		}
+	} else {
+		res = &params.memory;
 	}
 
 	mem = request_mem_region(res->start, resource_size(res), "simplefb");
-- 
GitLab


From 92a511a568e44cf11681a2223cae4d576a1a515d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 1 Nov 2023 18:20:17 +0100
Subject: [PATCH 0114/2340] fbdev/simplefb: Add support for generic
 power-domains

The simple-framebuffer device tree bindings document the power-domains
property, so make sure that simplefb supports it. This ensures that the
power domains remain enabled as long as simplefb is active.

v2: - remove unnecessary call to simplefb_detach_genpds() since that's
      already done automatically by devres
    - fix crash if power-domains property is missing in DT

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101172017.3872242-3-thierry.reding@gmail.com
---
 drivers/video/fbdev/simplefb.c | 93 ++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index 18025f34fde71..fe682af63827b 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -25,6 +25,7 @@
 #include <linux/of_clk.h>
 #include <linux/of_platform.h>
 #include <linux/parser.h>
+#include <linux/pm_domain.h>
 #include <linux/regulator/consumer.h>
 
 static const struct fb_fix_screeninfo simplefb_fix = {
@@ -78,6 +79,11 @@ struct simplefb_par {
 	unsigned int clk_count;
 	struct clk **clks;
 #endif
+#if defined CONFIG_OF && defined CONFIG_PM_GENERIC_DOMAINS
+	unsigned int num_genpds;
+	struct device **genpds;
+	struct device_link **genpd_links;
+#endif
 #if defined CONFIG_OF && defined CONFIG_REGULATOR
 	bool regulators_enabled;
 	u32 regulator_count;
@@ -432,6 +438,89 @@ static void simplefb_regulators_enable(struct simplefb_par *par,
 static void simplefb_regulators_destroy(struct simplefb_par *par) { }
 #endif
 
+#if defined CONFIG_OF && defined CONFIG_PM_GENERIC_DOMAINS
+static void simplefb_detach_genpds(void *res)
+{
+	struct simplefb_par *par = res;
+	unsigned int i = par->num_genpds;
+
+	if (par->num_genpds <= 1)
+		return;
+
+	while (i--) {
+		if (par->genpd_links[i])
+			device_link_del(par->genpd_links[i]);
+
+		if (!IS_ERR_OR_NULL(par->genpds[i]))
+			dev_pm_domain_detach(par->genpds[i], true);
+	}
+}
+
+static int simplefb_attach_genpds(struct simplefb_par *par,
+				  struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	unsigned int i;
+	int err;
+
+	err = of_count_phandle_with_args(dev->of_node, "power-domains",
+					 "#power-domain-cells");
+	if (err < 0) {
+		dev_info(dev, "failed to parse power-domains: %d\n", err);
+		return err;
+	}
+
+	par->num_genpds = err;
+
+	/*
+	 * Single power-domain devices are handled by the driver core, so
+	 * nothing to do here.
+	 */
+	if (par->num_genpds <= 1)
+		return 0;
+
+	par->genpds = devm_kcalloc(dev, par->num_genpds, sizeof(*par->genpds),
+				   GFP_KERNEL);
+	if (!par->genpds)
+		return -ENOMEM;
+
+	par->genpd_links = devm_kcalloc(dev, par->num_genpds,
+					sizeof(*par->genpd_links),
+					GFP_KERNEL);
+	if (!par->genpd_links)
+		return -ENOMEM;
+
+	for (i = 0; i < par->num_genpds; i++) {
+		par->genpds[i] = dev_pm_domain_attach_by_id(dev, i);
+		if (IS_ERR(par->genpds[i])) {
+			err = PTR_ERR(par->genpds[i]);
+			if (err == -EPROBE_DEFER) {
+				simplefb_detach_genpds(par);
+				return err;
+			}
+
+			dev_warn(dev, "failed to attach domain %u: %d\n", i, err);
+			continue;
+		}
+
+		par->genpd_links[i] = device_link_add(dev, par->genpds[i],
+						      DL_FLAG_STATELESS |
+						      DL_FLAG_PM_RUNTIME |
+						      DL_FLAG_RPM_ACTIVE);
+		if (!par->genpd_links[i])
+			dev_warn(dev, "failed to link power-domain %u\n", i);
+	}
+
+	return devm_add_action_or_reset(dev, simplefb_detach_genpds, par);
+}
+#else
+static int simplefb_attach_genpds(struct simplefb_par *par,
+				  struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
 static int simplefb_probe(struct platform_device *pdev)
 {
 	int ret;
@@ -518,6 +607,10 @@ static int simplefb_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto error_clocks;
 
+	ret = simplefb_attach_genpds(par, pdev);
+	if (ret < 0)
+		goto error_regulators;
+
 	simplefb_clocks_enable(par, pdev);
 	simplefb_regulators_enable(par, pdev);
 
-- 
GitLab


From 1118d10f5e5ab544c489fad4da373f9988416ece Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 31 Oct 2023 08:38:56 +0100
Subject: [PATCH 0115/2340] drm/v3d: update UAPI to match user-space for V3D
 7.x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

V3D 7.x takes a new parameter to configure TFU jobs that needs
to be provided by user space.

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Maíra Canal <mcanal@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073859.25298-2-itoral@igalia.com
---
 include/uapi/drm/v3d_drm.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 3dfc0af8756aa..1a7d7a689de38 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -319,6 +319,11 @@ struct drm_v3d_submit_tfu {
 
 	/* Pointer to an array of ioctl extensions*/
 	__u64 extensions;
+
+	struct {
+		__u32 ioc;
+		__u32 pad;
+	} v71;
 };
 
 /* Submits a compute shader for dispatch.  This job will block on any
-- 
GitLab


From 0ad5bc1ce4634ce9b5eaf017b01399ec5e49a03d Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 31 Oct 2023 08:38:57 +0100
Subject: [PATCH 0116/2340] drm/v3d: fix up register addresses for V3D 7.x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch updates a number of register addresses that have
been changed in Raspberry Pi 5 (V3D 7.1) and updates the
code to use the corresponding registers and addresses based
on the actual V3D version.

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Maíra Canal <mcanal@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073859.25298-3-itoral@igalia.com
---
 drivers/gpu/drm/v3d/v3d_debugfs.c | 178 +++++++++++++++++-------------
 drivers/gpu/drm/v3d/v3d_gem.c     |   4 +-
 drivers/gpu/drm/v3d/v3d_irq.c     |  46 ++++----
 drivers/gpu/drm/v3d/v3d_regs.h    |  94 +++++++++-------
 drivers/gpu/drm/v3d/v3d_sched.c   |  38 ++++---
 5 files changed, 204 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa79..f843a50d5dce6 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -12,69 +12,83 @@
 #include "v3d_drv.h"
 #include "v3d_regs.h"
 
-#define REGDEF(reg) { reg, #reg }
+#define REGDEF(min_ver, max_ver, reg) { min_ver, max_ver, reg, #reg }
 struct v3d_reg_def {
+	u32 min_ver;
+	u32 max_ver;
 	u32 reg;
 	const char *name;
 };
 
 static const struct v3d_reg_def v3d_hub_reg_defs[] = {
-	REGDEF(V3D_HUB_AXICFG),
-	REGDEF(V3D_HUB_UIFCFG),
-	REGDEF(V3D_HUB_IDENT0),
-	REGDEF(V3D_HUB_IDENT1),
-	REGDEF(V3D_HUB_IDENT2),
-	REGDEF(V3D_HUB_IDENT3),
-	REGDEF(V3D_HUB_INT_STS),
-	REGDEF(V3D_HUB_INT_MSK_STS),
-
-	REGDEF(V3D_MMU_CTL),
-	REGDEF(V3D_MMU_VIO_ADDR),
-	REGDEF(V3D_MMU_VIO_ID),
-	REGDEF(V3D_MMU_DEBUG_INFO),
+	REGDEF(33, 42, V3D_HUB_AXICFG),
+	REGDEF(33, 71, V3D_HUB_UIFCFG),
+	REGDEF(33, 71, V3D_HUB_IDENT0),
+	REGDEF(33, 71, V3D_HUB_IDENT1),
+	REGDEF(33, 71, V3D_HUB_IDENT2),
+	REGDEF(33, 71, V3D_HUB_IDENT3),
+	REGDEF(33, 71, V3D_HUB_INT_STS),
+	REGDEF(33, 71, V3D_HUB_INT_MSK_STS),
+
+	REGDEF(33, 71, V3D_MMU_CTL),
+	REGDEF(33, 71, V3D_MMU_VIO_ADDR),
+	REGDEF(33, 71, V3D_MMU_VIO_ID),
+	REGDEF(33, 71, V3D_MMU_DEBUG_INFO),
+
+	REGDEF(71, 71, V3D_GMP_STATUS(71)),
+	REGDEF(71, 71, V3D_GMP_CFG(71)),
+	REGDEF(71, 71, V3D_GMP_VIO_ADDR(71)),
 };
 
 static const struct v3d_reg_def v3d_gca_reg_defs[] = {
-	REGDEF(V3D_GCA_SAFE_SHUTDOWN),
-	REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+	REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN),
+	REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK),
 };
 
 static const struct v3d_reg_def v3d_core_reg_defs[] = {
-	REGDEF(V3D_CTL_IDENT0),
-	REGDEF(V3D_CTL_IDENT1),
-	REGDEF(V3D_CTL_IDENT2),
-	REGDEF(V3D_CTL_MISCCFG),
-	REGDEF(V3D_CTL_INT_STS),
-	REGDEF(V3D_CTL_INT_MSK_STS),
-	REGDEF(V3D_CLE_CT0CS),
-	REGDEF(V3D_CLE_CT0CA),
-	REGDEF(V3D_CLE_CT0EA),
-	REGDEF(V3D_CLE_CT1CS),
-	REGDEF(V3D_CLE_CT1CA),
-	REGDEF(V3D_CLE_CT1EA),
-
-	REGDEF(V3D_PTB_BPCA),
-	REGDEF(V3D_PTB_BPCS),
-
-	REGDEF(V3D_GMP_STATUS),
-	REGDEF(V3D_GMP_CFG),
-	REGDEF(V3D_GMP_VIO_ADDR),
-
-	REGDEF(V3D_ERR_FDBGO),
-	REGDEF(V3D_ERR_FDBGB),
-	REGDEF(V3D_ERR_FDBGS),
-	REGDEF(V3D_ERR_STAT),
+	REGDEF(33, 71, V3D_CTL_IDENT0),
+	REGDEF(33, 71, V3D_CTL_IDENT1),
+	REGDEF(33, 71, V3D_CTL_IDENT2),
+	REGDEF(33, 71, V3D_CTL_MISCCFG),
+	REGDEF(33, 71, V3D_CTL_INT_STS),
+	REGDEF(33, 71, V3D_CTL_INT_MSK_STS),
+	REGDEF(33, 71, V3D_CLE_CT0CS),
+	REGDEF(33, 71, V3D_CLE_CT0CA),
+	REGDEF(33, 71, V3D_CLE_CT0EA),
+	REGDEF(33, 71, V3D_CLE_CT1CS),
+	REGDEF(33, 71, V3D_CLE_CT1CA),
+	REGDEF(33, 71, V3D_CLE_CT1EA),
+
+	REGDEF(33, 71, V3D_PTB_BPCA),
+	REGDEF(33, 71, V3D_PTB_BPCS),
+
+	REGDEF(33, 41, V3D_GMP_STATUS(33)),
+	REGDEF(33, 41, V3D_GMP_CFG(33)),
+	REGDEF(33, 41, V3D_GMP_VIO_ADDR(33)),
+
+	REGDEF(33, 71, V3D_ERR_FDBGO),
+	REGDEF(33, 71, V3D_ERR_FDBGB),
+	REGDEF(33, 71, V3D_ERR_FDBGS),
+	REGDEF(33, 71, V3D_ERR_STAT),
 };
 
 static const struct v3d_reg_def v3d_csd_reg_defs[] = {
-	REGDEF(V3D_CSD_STATUS),
-	REGDEF(V3D_CSD_CURRENT_CFG0),
-	REGDEF(V3D_CSD_CURRENT_CFG1),
-	REGDEF(V3D_CSD_CURRENT_CFG2),
-	REGDEF(V3D_CSD_CURRENT_CFG3),
-	REGDEF(V3D_CSD_CURRENT_CFG4),
-	REGDEF(V3D_CSD_CURRENT_CFG5),
-	REGDEF(V3D_CSD_CURRENT_CFG6),
+	REGDEF(41, 71, V3D_CSD_STATUS),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG0(41)),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG1(41)),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG2(41)),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG3(41)),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG4(41)),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG5(41)),
+	REGDEF(41, 41, V3D_CSD_CURRENT_CFG6(41)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG3(71)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG4(71)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG5(71)),
+	REGDEF(71, 71, V3D_CSD_CURRENT_CFG6(71)),
+	REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG7),
 };
 
 static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
@@ -85,38 +99,41 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
 	int i, core;
 
 	for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg,
-			   V3D_READ(v3d_hub_reg_defs[i].reg));
+		const struct v3d_reg_def *def = &v3d_hub_reg_defs[i];
+
+		if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
+			seq_printf(m, "%s (0x%04x): 0x%08x\n",
+				   def->name, def->reg, V3D_READ(def->reg));
+		}
 	}
 
-	if (v3d->ver < 41) {
-		for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+	for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+		const struct v3d_reg_def *def = &v3d_gca_reg_defs[i];
+
+		if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
 			seq_printf(m, "%s (0x%04x): 0x%08x\n",
-				   v3d_gca_reg_defs[i].name,
-				   v3d_gca_reg_defs[i].reg,
-				   V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+				   def->name, def->reg, V3D_GCA_READ(def->reg));
 		}
 	}
 
 	for (core = 0; core < v3d->cores; core++) {
 		for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) {
-			seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
-				   core,
-				   v3d_core_reg_defs[i].name,
-				   v3d_core_reg_defs[i].reg,
-				   V3D_CORE_READ(core,
-						 v3d_core_reg_defs[i].reg));
+			const struct v3d_reg_def *def = &v3d_core_reg_defs[i];
+
+			if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
+				seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
+					   core, def->name, def->reg,
+					   V3D_CORE_READ(core, def->reg));
+			}
 		}
 
-		if (v3d_has_csd(v3d)) {
-			for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) {
+		for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) {
+			const struct v3d_reg_def *def = &v3d_csd_reg_defs[i];
+
+			if (v3d->ver >= def->min_ver && v3d->ver <= def->max_ver) {
 				seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
-					   core,
-					   v3d_csd_reg_defs[i].name,
-					   v3d_csd_reg_defs[i].reg,
-					   V3D_CORE_READ(core,
-							 v3d_csd_reg_defs[i].reg));
+					   core, def->name, def->reg,
+					   V3D_CORE_READ(core, def->reg));
 			}
 		}
 	}
@@ -147,8 +164,10 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
 		   str_yes_no(ident2 & V3D_HUB_IDENT2_WITH_MMU));
 	seq_printf(m, "TFU:        %s\n",
 		   str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TFU));
-	seq_printf(m, "TSY:        %s\n",
-		   str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY));
+	if (v3d->ver <= 42) {
+		seq_printf(m, "TSY:        %s\n",
+			   str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY));
+	}
 	seq_printf(m, "MSO:        %s\n",
 		   str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_MSO));
 	seq_printf(m, "L3C:        %s (%dkb)\n",
@@ -177,10 +196,14 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
 		seq_printf(m, "  QPUs:         %d\n", nslc * qups);
 		seq_printf(m, "  Semaphores:   %d\n",
 			   V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM));
-		seq_printf(m, "  BCG int:      %d\n",
-			   (ident2 & V3D_IDENT2_BCG_INT) != 0);
-		seq_printf(m, "  Override TMU: %d\n",
-			   (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
+		if (v3d->ver <= 42) {
+			seq_printf(m, "  BCG int:      %d\n",
+				   (ident2 & V3D_IDENT2_BCG_INT) != 0);
+		}
+		if (v3d->ver < 40) {
+			seq_printf(m, "  Override TMU: %d\n",
+				   (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
+		}
 	}
 
 	return 0;
@@ -212,14 +235,15 @@ static int v3d_measure_clock(struct seq_file *m, void *unused)
 	int measure_ms = 1000;
 
 	if (v3d->ver >= 40) {
+		int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver);
 		V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3,
-			       V3D_SET_FIELD(V3D_PCTR_CYCLE_COUNT,
+			       V3D_SET_FIELD(cycle_count_reg,
 					     V3D_PCTR_S0));
 		V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1);
 		V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1);
 	} else {
 		V3D_CORE_WRITE(core, V3D_V3_PCTR_0_PCTRS0,
-			       V3D_PCTR_CYCLE_COUNT);
+			       V3D_PCTR_CYCLE_COUNT(v3d->ver));
 		V3D_CORE_WRITE(core, V3D_V3_PCTR_0_CLR, 1);
 		V3D_CORE_WRITE(core, V3D_V3_PCTR_0_EN,
 			       V3D_V3_PCTR_0_EN_ENABLE |
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index afa7d170d1fff..5688d46d30e69 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -47,9 +47,9 @@ v3d_init_hw_state(struct v3d_dev *v3d)
 static void
 v3d_idle_axi(struct v3d_dev *v3d, int core)
 {
-	V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
+	V3D_CORE_WRITE(core, V3D_GMP_CFG(v3d->ver), V3D_GMP_CFG_STOP_REQ);
 
-	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
+	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS(v3d->ver)) &
 		      (V3D_GMP_STATUS_RD_COUNT_MASK |
 		       V3D_GMP_STATUS_WR_COUNT_MASK |
 		       V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index e714d5318f309..9705e0cb576d8 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -19,16 +19,17 @@
 #include "v3d_regs.h"
 #include "v3d_trace.h"
 
-#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM |	\
-			     V3D_INT_FLDONE |	\
-			     V3D_INT_FRDONE |	\
-			     V3D_INT_CSDDONE |	\
-			     V3D_INT_GMPV))
-
-#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV |	\
-			    V3D_HUB_INT_MMU_PTI |	\
-			    V3D_HUB_INT_MMU_CAP |	\
-			    V3D_HUB_INT_TFUC))
+#define V3D_CORE_IRQS(ver) ((u32)(V3D_INT_OUTOMEM |	\
+				  V3D_INT_FLDONE |	\
+				  V3D_INT_FRDONE |	\
+				  V3D_INT_CSDDONE(ver) |	\
+				  (ver < 71 ? V3D_INT_GMPV : 0)))
+
+#define V3D_HUB_IRQS(ver) ((u32)(V3D_HUB_INT_MMU_WRV |	\
+				 V3D_HUB_INT_MMU_PTI |	\
+				 V3D_HUB_INT_MMU_CAP |	\
+				 V3D_HUB_INT_TFUC |		\
+				 (ver >= 71 ? V3D_V7_HUB_INT_GMPV : 0)))
 
 static irqreturn_t
 v3d_hub_irq(int irq, void *arg);
@@ -115,7 +116,7 @@ v3d_irq(int irq, void *arg)
 		status = IRQ_HANDLED;
 	}
 
-	if (intsts & V3D_INT_CSDDONE) {
+	if (intsts & V3D_INT_CSDDONE(v3d->ver)) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
 
@@ -127,7 +128,7 @@ v3d_irq(int irq, void *arg)
 	/* We shouldn't be triggering these if we have GMP in
 	 * always-allowed mode.
 	 */
-	if (intsts & V3D_INT_GMPV)
+	if (v3d->ver < 71 && (intsts & V3D_INT_GMPV))
 		dev_err(v3d->drm.dev, "GMP violation\n");
 
 	/* V3D 4.2 wires the hub and core IRQs together, so if we &
@@ -197,6 +198,11 @@ v3d_hub_irq(int irq, void *arg)
 		status = IRQ_HANDLED;
 	}
 
+	if (v3d->ver >= 71 && (intsts & V3D_V7_HUB_INT_GMPV)) {
+		dev_err(v3d->drm.dev, "GMP Violation\n");
+		status = IRQ_HANDLED;
+	}
+
 	return status;
 }
 
@@ -211,8 +217,8 @@ v3d_irq_init(struct v3d_dev *v3d)
 	 * for us.
 	 */
 	for (core = 0; core < v3d->cores; core++)
-		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
-	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
+	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver));
 
 	irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1);
 	if (irq1 == -EPROBE_DEFER)
@@ -256,12 +262,12 @@ v3d_irq_enable(struct v3d_dev *v3d)
 
 	/* Enable our set of interrupts, masking out any others. */
 	for (core = 0; core < v3d->cores; core++) {
-		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS);
-		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS);
+		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS(v3d->ver));
+		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS(v3d->ver));
 	}
 
-	V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS);
-	V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS);
+	V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS(v3d->ver));
+	V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS(v3d->ver));
 }
 
 void
@@ -276,8 +282,8 @@ v3d_irq_disable(struct v3d_dev *v3d)
 
 	/* Clear any pending interrupts we might have left. */
 	for (core = 0; core < v3d->cores; core++)
-		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
-	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
+	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver));
 
 	cancel_work_sync(&v3d->overflow_mem_work);
 }
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index 3663e0d6bf766..1b1a62ad95852 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -57,6 +57,7 @@
 #define V3D_HUB_INT_MSK_STS                            0x0005c
 #define V3D_HUB_INT_MSK_SET                            0x00060
 #define V3D_HUB_INT_MSK_CLR                            0x00064
+# define V3D_V7_HUB_INT_GMPV                           BIT(6)
 # define V3D_HUB_INT_MMU_WRV                           BIT(5)
 # define V3D_HUB_INT_MMU_PTI                           BIT(4)
 # define V3D_HUB_INT_MMU_CAP                           BIT(3)
@@ -64,6 +65,7 @@
 # define V3D_HUB_INT_TFUC                              BIT(1)
 # define V3D_HUB_INT_TFUF                              BIT(0)
 
+/* GCA registers only exist in V3D < 41 */
 #define V3D_GCA_CACHE_CTRL                             0x0000c
 # define V3D_GCA_CACHE_CTRL_FLUSH                      BIT(0)
 
@@ -86,7 +88,8 @@
 # define V3D_TOP_GR_BRIDGE_SW_INIT_1                   0x0000c
 # define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
 
-#define V3D_TFU_CS                                     0x00400
+#define V3D_TFU_CS(ver) ((ver >= 71) ? 0x00700 : 0x00400)
+
 /* Stops current job, empties input fifo. */
 # define V3D_TFU_CS_TFURST                             BIT(31)
 # define V3D_TFU_CS_CVTCT_MASK                         V3D_MASK(23, 16)
@@ -95,7 +98,7 @@
 # define V3D_TFU_CS_NFREE_SHIFT                        8
 # define V3D_TFU_CS_BUSY                               BIT(0)
 
-#define V3D_TFU_SU                                     0x00404
+#define V3D_TFU_SU(ver) ((ver >= 71) ? 0x00704 : 0x00404)
 /* Interrupt when FINTTHR input slots are free (0 = disabled) */
 # define V3D_TFU_SU_FINTTHR_MASK                       V3D_MASK(13, 8)
 # define V3D_TFU_SU_FINTTHR_SHIFT                      8
@@ -106,39 +109,42 @@
 # define V3D_TFU_SU_THROTTLE_MASK                      V3D_MASK(1, 0)
 # define V3D_TFU_SU_THROTTLE_SHIFT                     0
 
-#define V3D_TFU_ICFG                                   0x00408
+#define V3D_TFU_ICFG(ver) ((ver >= 71) ? 0x00708 : 0x00408)
 /* Interrupt when the conversion is complete. */
 # define V3D_TFU_ICFG_IOC                              BIT(0)
 
 /* Input Image Address */
-#define V3D_TFU_IIA                                    0x0040c
+#define V3D_TFU_IIA(ver) ((ver >= 71) ? 0x0070c : 0x0040c)
 /* Input Chroma Address */
-#define V3D_TFU_ICA                                    0x00410
+#define V3D_TFU_ICA(ver) ((ver >= 71) ? 0x00710 : 0x00410)
 /* Input Image Stride */
-#define V3D_TFU_IIS                                    0x00414
+#define V3D_TFU_IIS(ver) ((ver >= 71) ? 0x00714 : 0x00414)
 /* Input Image U-Plane Address */
-#define V3D_TFU_IUA                                    0x00418
+#define V3D_TFU_IUA(ver) ((ver >= 71) ? 0x00718 : 0x00418)
+/* Image output config (VD 7.x only) */
+#define V3D_V7_TFU_IOC                                 0x0071c
 /* Output Image Address */
-#define V3D_TFU_IOA                                    0x0041c
+#define V3D_TFU_IOA(ver) ((ver >= 71) ? 0x00720 : 0x0041c)
 /* Image Output Size */
-#define V3D_TFU_IOS                                    0x00420
+#define V3D_TFU_IOS(ver) ((ver >= 71) ? 0x00724 : 0x00420)
 /* TFU YUV Coefficient 0 */
-#define V3D_TFU_COEF0                                  0x00424
-/* Use these regs instead of the defaults. */
+#define V3D_TFU_COEF0(ver) ((ver >= 71) ? 0x00728 : 0x00424)
+/* Use these regs instead of the defaults (V3D 4.x only) */
 # define V3D_TFU_COEF0_USECOEF                         BIT(31)
 /* TFU YUV Coefficient 1 */
-#define V3D_TFU_COEF1                                  0x00428
+#define V3D_TFU_COEF1(ver) ((ver >= 71) ? 0x0072c : 0x00428)
 /* TFU YUV Coefficient 2 */
-#define V3D_TFU_COEF2                                  0x0042c
+#define V3D_TFU_COEF2(ver) ((ver >= 71) ? 0x00730 : 0x0042c)
 /* TFU YUV Coefficient 3 */
-#define V3D_TFU_COEF3                                  0x00430
+#define V3D_TFU_COEF3(ver) ((ver >= 71) ? 0x00734 : 0x00430)
 
+/* V3D 4.x only */
 #define V3D_TFU_CRC                                    0x00434
 
 /* Per-MMU registers. */
 
 #define V3D_MMUC_CONTROL                               0x01000
-# define V3D_MMUC_CONTROL_CLEAR                        BIT(3)
+#define V3D_MMUC_CONTROL_CLEAR(ver) ((ver >= 71) ? BIT(11) : BIT(3))
 # define V3D_MMUC_CONTROL_FLUSHING                     BIT(2)
 # define V3D_MMUC_CONTROL_FLUSH                        BIT(1)
 # define V3D_MMUC_CONTROL_ENABLE                       BIT(0)
@@ -246,7 +252,6 @@
 
 #define V3D_CTL_L2TCACTL                               0x00030
 # define V3D_L2TCACTL_TMUWCF                           BIT(8)
-# define V3D_L2TCACTL_L2T_NO_WM                        BIT(4)
 /* Invalidates cache lines. */
 # define V3D_L2TCACTL_FLM_FLUSH                        0
 /* Removes cachelines without writing dirty lines back. */
@@ -267,8 +272,8 @@
 #define V3D_CTL_INT_MSK_CLR                            0x00064
 # define V3D_INT_QPU_MASK                              V3D_MASK(27, 16)
 # define V3D_INT_QPU_SHIFT                             16
-# define V3D_INT_CSDDONE                               BIT(7)
-# define V3D_INT_PCTR                                  BIT(6)
+#define V3D_INT_CSDDONE(ver) ((ver >= 71) ? BIT(6) : BIT(7))
+#define V3D_INT_PCTR(ver) ((ver >= 71) ? BIT(5) : BIT(6))
 # define V3D_INT_GMPV                                  BIT(5)
 # define V3D_INT_TRFB                                  BIT(4)
 # define V3D_INT_SPILLUSE                              BIT(3)
@@ -350,21 +355,25 @@
 #define V3D_V4_PCTR_0_SRC_X(x)                         (V3D_V4_PCTR_0_SRC_0_3 + \
 							4 * (x))
 # define V3D_PCTR_S0_MASK                              V3D_MASK(6, 0)
+# define V3D_V7_PCTR_S0_MASK                           V3D_MASK(7, 0)
 # define V3D_PCTR_S0_SHIFT                             0
 # define V3D_PCTR_S1_MASK                              V3D_MASK(14, 8)
+# define V3D_V7_PCTR_S1_MASK                           V3D_MASK(15, 8)
 # define V3D_PCTR_S1_SHIFT                             8
 # define V3D_PCTR_S2_MASK                              V3D_MASK(22, 16)
+# define V3D_V7_PCTR_S2_MASK                           V3D_MASK(23, 16)
 # define V3D_PCTR_S2_SHIFT                             16
 # define V3D_PCTR_S3_MASK                              V3D_MASK(30, 24)
+# define V3D_V7_PCTR_S3_MASK                           V3D_MASK(31, 24)
 # define V3D_PCTR_S3_SHIFT                             24
-# define V3D_PCTR_CYCLE_COUNT                          32
+#define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32)
 
 /* Output values of the counters. */
 #define V3D_PCTR_0_PCTR0                               0x00680
 #define V3D_PCTR_0_PCTR31                              0x006fc
 #define V3D_PCTR_0_PCTRX(x)                            (V3D_PCTR_0_PCTR0 + \
 							4 * (x))
-#define V3D_GMP_STATUS                                 0x00800
+#define V3D_GMP_STATUS(ver) ((ver >= 71) ? 0x00600 : 0x00800)
 # define V3D_GMP_STATUS_GMPRST                         BIT(31)
 # define V3D_GMP_STATUS_WR_COUNT_MASK                  V3D_MASK(30, 24)
 # define V3D_GMP_STATUS_WR_COUNT_SHIFT                 24
@@ -377,13 +386,13 @@
 # define V3D_GMP_STATUS_INVPROT                        BIT(1)
 # define V3D_GMP_STATUS_VIO                            BIT(0)
 
-#define V3D_GMP_CFG                                    0x00804
+#define V3D_GMP_CFG(ver) ((ver >= 71) ? 0x00604 : 0x00804)
 # define V3D_GMP_CFG_LBURSTEN                          BIT(3)
 # define V3D_GMP_CFG_PGCRSEN                           BIT()
 # define V3D_GMP_CFG_STOP_REQ                          BIT(1)
 # define V3D_GMP_CFG_PROT_ENABLE                       BIT(0)
 
-#define V3D_GMP_VIO_ADDR                               0x00808
+#define V3D_GMP_VIO_ADDR(ver) ((ver >= 71) ? 0x00608 : 0x00808)
 #define V3D_GMP_VIO_TYPE                               0x0080c
 #define V3D_GMP_TABLE_ADDR                             0x00810
 #define V3D_GMP_CLEAR_LOAD                             0x00814
@@ -398,25 +407,25 @@
 # define V3D_CSD_STATUS_HAVE_CURRENT_DISPATCH          BIT(1)
 # define V3D_CSD_STATUS_HAVE_QUEUED_DISPATCH           BIT(0)
 
-#define V3D_CSD_QUEUED_CFG0                            0x00904
+#define V3D_CSD_QUEUED_CFG0(ver) ((ver >= 71) ? 0x00930 : 0x00904)
 # define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_MASK            V3D_MASK(31, 16)
 # define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_SHIFT           16
 # define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_MASK          V3D_MASK(15, 0)
 # define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_SHIFT         0
 
-#define V3D_CSD_QUEUED_CFG1                            0x00908
+#define V3D_CSD_QUEUED_CFG1(ver) ((ver >= 71) ? 0x00934 : 0x00908)
 # define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_MASK            V3D_MASK(31, 16)
 # define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_SHIFT           16
 # define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_MASK          V3D_MASK(15, 0)
 # define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_SHIFT         0
 
-#define V3D_CSD_QUEUED_CFG2                            0x0090c
+#define V3D_CSD_QUEUED_CFG2(ver) ((ver >= 71) ? 0x00938 : 0x0090c)
 # define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_MASK            V3D_MASK(31, 16)
 # define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_SHIFT           16
 # define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_MASK          V3D_MASK(15, 0)
 # define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_SHIFT         0
 
-#define V3D_CSD_QUEUED_CFG3                            0x00910
+#define V3D_CSD_QUEUED_CFG3(ver) ((ver >= 71) ? 0x0093c : 0x00910)
 # define V3D_CSD_QUEUED_CFG3_OVERLAP_WITH_PREV         BIT(26)
 # define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_MASK            V3D_MASK(25, 20)
 # define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_SHIFT           20
@@ -428,23 +437,28 @@
 # define V3D_CSD_QUEUED_CFG3_WG_SIZE_SHIFT             0
 
 /* Number of batches, minus 1 */
-#define V3D_CSD_QUEUED_CFG4                            0x00914
+#define V3D_CSD_QUEUED_CFG4(ver) ((ver >= 71) ? 0x00940 : 0x00914)
 
 /* Shader address, pnan, singleseg, threading, like a shader record. */
-#define V3D_CSD_QUEUED_CFG5                            0x00918
+#define V3D_CSD_QUEUED_CFG5(ver) ((ver >= 71) ? 0x00944 : 0x00918)
 
 /* Uniforms address (4 byte aligned) */
-#define V3D_CSD_QUEUED_CFG6                            0x0091c
-
-#define V3D_CSD_CURRENT_CFG0                          0x00920
-#define V3D_CSD_CURRENT_CFG1                          0x00924
-#define V3D_CSD_CURRENT_CFG2                          0x00928
-#define V3D_CSD_CURRENT_CFG3                          0x0092c
-#define V3D_CSD_CURRENT_CFG4                          0x00930
-#define V3D_CSD_CURRENT_CFG5                          0x00934
-#define V3D_CSD_CURRENT_CFG6                          0x00938
-
-#define V3D_CSD_CURRENT_ID0                            0x0093c
+#define V3D_CSD_QUEUED_CFG6(ver) ((ver >= 71) ? 0x00948 : 0x0091c)
+
+/* V3D 7.x+ only */
+#define V3D_V7_CSD_QUEUED_CFG7                         0x0094c
+
+#define V3D_CSD_CURRENT_CFG0(ver) ((ver >= 71) ? 0x00958 : 0x00920)
+#define V3D_CSD_CURRENT_CFG1(ver) ((ver >= 71) ? 0x0095c : 0x00924)
+#define V3D_CSD_CURRENT_CFG2(ver) ((ver >= 71) ? 0x00960 : 0x00928)
+#define V3D_CSD_CURRENT_CFG3(ver) ((ver >= 71) ? 0x00964 : 0x0092c)
+#define V3D_CSD_CURRENT_CFG4(ver) ((ver >= 71) ? 0x00968 : 0x00930)
+#define V3D_CSD_CURRENT_CFG5(ver) ((ver >= 71) ? 0x0096c : 0x00934)
+#define V3D_CSD_CURRENT_CFG6(ver) ((ver >= 71) ? 0x00970 : 0x00938)
+/* V3D 7.x+ only */
+#define V3D_V7_CSD_CURRENT_CFG7                        0x00974
+
+#define V3D_CSD_CURRENT_ID0(ver) ((ver >= 71) ? 0x00978 : 0x0093c)
 # define V3D_CSD_CURRENT_ID0_WG_X_MASK                 V3D_MASK(31, 16)
 # define V3D_CSD_CURRENT_ID0_WG_X_SHIFT                16
 # define V3D_CSD_CURRENT_ID0_WG_IN_SG_MASK             V3D_MASK(11, 8)
@@ -452,7 +466,7 @@
 # define V3D_CSD_CURRENT_ID0_L_IDX_MASK                V3D_MASK(7, 0)
 # define V3D_CSD_CURRENT_ID0_L_IDX_SHIFT               0
 
-#define V3D_CSD_CURRENT_ID1                            0x00940
+#define V3D_CSD_CURRENT_ID1(ver) ((ver >= 71) ? 0x0097c : 0x00940)
 # define V3D_CSD_CURRENT_ID0_WG_Z_MASK                 V3D_MASK(31, 16)
 # define V3D_CSD_CURRENT_ID0_WG_Z_SHIFT                16
 # define V3D_CSD_CURRENT_ID0_WG_Y_MASK                 V3D_MASK(15, 0)
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 0b6696b0d882f..f59b63e51b14b 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -190,20 +190,22 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
-	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
-	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
-	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
-	V3D_WRITE(V3D_TFU_IUA, job->args.iua);
-	V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
-	V3D_WRITE(V3D_TFU_IOS, job->args.ios);
-	V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
-	if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
-		V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
-		V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
-		V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
+	V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
+	V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
+	V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica);
+	V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua);
+	V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa);
+	if (v3d->ver >= 71)
+		V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc);
+	V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios);
+	V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]);
+	if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) {
+		V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]);
+		V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]);
+		V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]);
 	}
 	/* ICFG kicks off the job. */
-	V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
+	V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC);
 
 	return fence;
 }
@@ -215,7 +217,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 	struct v3d_dev *v3d = job->base.v3d;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
-	int i;
+	int i, csd_cfg0_reg, csd_cfg_reg_count;
 
 	v3d->csd_job = job;
 
@@ -233,10 +235,12 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 
 	v3d_switch_perfmon(v3d, &job->base);
 
-	for (i = 1; i <= 6; i++)
-		V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
+	csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver);
+	csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7;
+	for (i = 1; i <= csd_cfg_reg_count; i++)
+		V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
 	/* CFG0 write kicks off the job. */
-	V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
+	V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
 
 	return fence;
 }
@@ -336,7 +340,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job)
 {
 	struct v3d_csd_job *job = to_csd_job(sched_job);
 	struct v3d_dev *v3d = job->base.v3d;
-	u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
+	u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver));
 
 	/* If we've made progress, skip reset and let the timer get
 	 * rearmed.
-- 
GitLab


From ebb2f6eea688b9ffa46527c3e7570b2c347497b8 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 31 Oct 2023 08:38:58 +0100
Subject: [PATCH 0117/2340] dt-bindings: gpu: v3d: Add BCM2712's compatible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM2712, Raspberry Pi 5's SoC, contains a V3D core. So add its specific
compatible to the bindings.

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Maíra Canal <mcanal@igalia.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073859.25298-4-itoral@igalia.com
---
 Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
index dae55b8a267b0..dc078ceeca9ac 100644
--- a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
+++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -17,6 +17,7 @@ properties:
   compatible:
     enum:
       - brcm,2711-v3d
+      - brcm,2712-v3d
       - brcm,7268-v3d
       - brcm,7278-v3d
 
-- 
GitLab


From 6fd9487147c4f18ad77eea00bd8c9189eec74a3e Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 31 Oct 2023 08:38:59 +0100
Subject: [PATCH 0118/2340] drm/v3d: add brcm,2712-v3d as a compatible V3D
 device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required to get the V3D module to load with Raspberry Pi 5.

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Stefan Wahren <wahrenst@gmx.net>
Reviewed-by: Maíra Canal <mcanal@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073859.25298-5-itoral@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index ffbbe9d527d32..1ab46bdf8ad71 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -187,6 +187,7 @@ static const struct drm_driver v3d_drm_driver = {
 
 static const struct of_device_id v3d_of_match[] = {
 	{ .compatible = "brcm,2711-v3d" },
+	{ .compatible = "brcm,2712-v3d" },
 	{ .compatible = "brcm,7268-v3d" },
 	{ .compatible = "brcm,7278-v3d" },
 	{},
-- 
GitLab


From 2efb81e587961d5d863c2ad3156f96abde4d6a8f Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 14:45:00 +0200
Subject: [PATCH 0119/2340] drm/i915: make some error capture functions static

Not needed outside of i915_gpu_error.c.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031124502.1772160-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++----
 drivers/gpu/drm/i915/i915_gpu_error.h | 5 -----
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b4e31e59c7991..db7ac28c44e5c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -520,7 +520,7 @@ __find_vma(struct i915_vma_coredump *vma, const char *name)
 	return NULL;
 }
 
-struct i915_vma_coredump *
+static struct i915_vma_coredump *
 intel_gpu_error_find_batch(const struct intel_engine_coredump *ee)
 {
 	return __find_vma(ee->vma, "batch");
@@ -609,9 +609,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-			       const struct intel_engine_cs *engine,
-			       const struct i915_vma_coredump *vma)
+static void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+				      const struct intel_engine_cs *engine,
+				      const struct i915_vma_coredump *vma)
 {
 	char out[ASCII85_BUFSZ];
 	struct page *page;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 9f5971f5e9801..c982b162b7ff0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -275,11 +275,6 @@ static inline void intel_klog_error_capture(struct intel_gt *gt,
 
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-			       const struct intel_engine_cs *engine,
-			       const struct i915_vma_coredump *vma);
-struct i915_vma_coredump *
-intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
 
 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
 					    intel_engine_mask_t engine_mask, u32 dump_flags);
-- 
GitLab


From 4fca51984371d930a5d9d5a8b0848b892dbfdecc Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 14:45:01 +0200
Subject: [PATCH 0120/2340] drm/i915: move gpu error debugfs to
 i915_gpu_error.c

Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt
conditional compilation, as i915_gpu_error.c is only built with
DRM_I915_CAPTURE_ERROR=y.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031124502.1772160-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 108 +------------------------
 drivers/gpu/drm/i915/i915_gpu_error.c | 111 +++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.h |   8 +-
 3 files changed, 119 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e9b79c2c37d84..beffac46a5e29 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -49,6 +49,7 @@
 #include "i915_debugfs.h"
 #include "i915_debugfs_params.h"
 #include "i915_driver.h"
+#include "i915_gpu_error.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
 #include "i915_scheduler.h"
@@ -297,107 +298,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
-			      size_t count, loff_t *pos)
-{
-	struct i915_gpu_coredump *error;
-	ssize_t ret;
-	void *buf;
-
-	error = file->private_data;
-	if (!error)
-		return 0;
-
-	/* Bounce buffer required because of kernfs __user API convenience. */
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
-	if (ret <= 0)
-		goto out;
-
-	if (!copy_to_user(ubuf, buf, ret))
-		*pos += ret;
-	else
-		ret = -EFAULT;
-
-out:
-	kfree(buf);
-	return ret;
-}
-
-static int gpu_state_release(struct inode *inode, struct file *file)
-{
-	i915_gpu_coredump_put(file->private_data);
-	return 0;
-}
-
-static int i915_gpu_info_open(struct inode *inode, struct file *file)
-{
-	struct drm_i915_private *i915 = inode->i_private;
-	struct i915_gpu_coredump *gpu;
-	intel_wakeref_t wakeref;
-
-	gpu = NULL;
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
-
-	if (IS_ERR(gpu))
-		return PTR_ERR(gpu);
-
-	file->private_data = gpu;
-	return 0;
-}
-
-static const struct file_operations i915_gpu_info_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_gpu_info_open,
-	.read = gpu_state_read,
-	.llseek = default_llseek,
-	.release = gpu_state_release,
-};
-
-static ssize_t
-i915_error_state_write(struct file *filp,
-		       const char __user *ubuf,
-		       size_t cnt,
-		       loff_t *ppos)
-{
-	struct i915_gpu_coredump *error = filp->private_data;
-
-	if (!error)
-		return 0;
-
-	drm_dbg(&error->i915->drm, "Resetting error state\n");
-	i915_reset_error_state(error->i915);
-
-	return cnt;
-}
-
-static int i915_error_state_open(struct inode *inode, struct file *file)
-{
-	struct i915_gpu_coredump *error;
-
-	error = i915_first_error_state(inode->i_private);
-	if (IS_ERR(error))
-		return PTR_ERR(error);
-
-	file->private_data  = error;
-	return 0;
-}
-
-static const struct file_operations i915_error_state_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_error_state_open,
-	.read = gpu_state_read,
-	.write = i915_error_state_write,
-	.llseek = default_llseek,
-	.release = gpu_state_release,
-};
-#endif
-
 static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
@@ -837,10 +737,6 @@ static const struct i915_debugfs_files {
 	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-	{"i915_error_state", &i915_error_state_fops},
-	{"i915_gpu_info", &i915_gpu_info_fops},
-#endif
 };
 
 void i915_debugfs_register(struct drm_i915_private *dev_priv)
@@ -863,4 +759,6 @@ void i915_debugfs_register(struct drm_i915_private *dev_priv)
 	drm_debugfs_create_files(i915_debugfs_list,
 				 ARRAY_SIZE(i915_debugfs_list),
 				 minor->debugfs_root, minor);
+
+	i915_gpu_error_debugfs_register(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index db7ac28c44e5c..b4c8459deb7b2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -2137,7 +2137,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
 	return error;
 }
 
-struct i915_gpu_coredump *
+static struct i915_gpu_coredump *
 i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
 {
 	static DEFINE_MUTEX(capture_mutex);
@@ -2375,3 +2375,112 @@ void intel_klog_error_capture(struct intel_gt *gt,
 	drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
 }
 #endif
+
+static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
+			      size_t count, loff_t *pos)
+{
+	struct i915_gpu_coredump *error;
+	ssize_t ret;
+	void *buf;
+
+	error = file->private_data;
+	if (!error)
+		return 0;
+
+	/* Bounce buffer required because of kernfs __user API convenience. */
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
+	if (ret <= 0)
+		goto out;
+
+	if (!copy_to_user(ubuf, buf, ret))
+		*pos += ret;
+	else
+		ret = -EFAULT;
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+static int gpu_state_release(struct inode *inode, struct file *file)
+{
+	i915_gpu_coredump_put(file->private_data);
+	return 0;
+}
+
+static int i915_gpu_info_open(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *i915 = inode->i_private;
+	struct i915_gpu_coredump *gpu;
+	intel_wakeref_t wakeref;
+
+	gpu = NULL;
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
+
+	if (IS_ERR(gpu))
+		return PTR_ERR(gpu);
+
+	file->private_data = gpu;
+	return 0;
+}
+
+static const struct file_operations i915_gpu_info_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_gpu_info_open,
+	.read = gpu_state_read,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+		       const char __user *ubuf,
+		       size_t cnt,
+		       loff_t *ppos)
+{
+	struct i915_gpu_coredump *error = filp->private_data;
+
+	if (!error)
+		return 0;
+
+	drm_dbg(&error->i915->drm, "Resetting error state\n");
+	i915_reset_error_state(error->i915);
+
+	return cnt;
+}
+
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+	struct i915_gpu_coredump *error;
+
+	error = i915_first_error_state(inode->i_private);
+	if (IS_ERR(error))
+		return PTR_ERR(error);
+
+	file->private_data  = error;
+	return 0;
+}
+
+static const struct file_operations i915_error_state_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_error_state_open,
+	.read = gpu_state_read,
+	.write = i915_error_state_write,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+{
+	struct drm_minor *minor = i915->drm.primary;
+
+	debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915,
+			    &i915_error_state_fops);
+	debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
+			    &i915_gpu_info_fops);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index c982b162b7ff0..a6f2a7518cf0b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -276,8 +276,6 @@ static inline void intel_klog_error_capture(struct intel_gt *gt,
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 
-struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
-					    intel_engine_mask_t engine_mask, u32 dump_flags);
 void i915_capture_error_state(struct intel_gt *gt,
 			      intel_engine_mask_t engine_mask, u32 dump_flags);
 
@@ -329,6 +327,8 @@ struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
+void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
+
 #else
 
 __printf(2, 3)
@@ -411,6 +411,10 @@ static inline void i915_disable_error_state(struct drm_i915_private *i915,
 {
 }
 
+static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+{
+}
+
 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 
 #endif /* _I915_GPU_ERROR_H_ */
-- 
GitLab


From d581841076bc5de3c0ae72fd6bd50c59ce9f1638 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 14:45:02 +0200
Subject: [PATCH 0121/2340] drm/i915: move gpu error sysfs to i915_gpu_error.c

Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt
conditional compilation, as i915_gpu_error.c is only built with
DRM_I915_CAPTURE_ERROR=y.

With this, we can also make i915_first_error_state() static.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031124502.1772160-3-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 75 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.h | 17 +++---
 drivers/gpu/drm/i915/i915_sysfs.c     | 79 +--------------------------
 3 files changed, 86 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b4c8459deb7b2..f9e750217f183 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -57,6 +57,7 @@
 #include "i915_memcpy.h"
 #include "i915_reg.h"
 #include "i915_scatterlist.h"
+#include "i915_sysfs.h"
 #include "i915_utils.h"
 
 #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -2208,7 +2209,7 @@ void i915_capture_error_state(struct intel_gt *gt,
 	i915_gpu_coredump_put(error);
 }
 
-struct i915_gpu_coredump *
+static struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
 	struct i915_gpu_coredump *error;
@@ -2484,3 +2485,75 @@ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
 	debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
 			    &i915_gpu_info_fops);
 }
+
+static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+
+	struct device *kdev = kobj_to_dev(kobj);
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+	struct i915_gpu_coredump *gpu;
+	ssize_t ret = 0;
+
+	/*
+	 * FIXME: Concurrent clients triggering resets and reading + clearing
+	 * dumps can cause inconsistent sysfs reads when a user calls in with a
+	 * non-zero offset to complete a prior partial read but the
+	 * gpu_coredump has been cleared or replaced.
+	 */
+
+	gpu = i915_first_error_state(i915);
+	if (IS_ERR(gpu)) {
+		ret = PTR_ERR(gpu);
+	} else if (gpu) {
+		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_coredump_put(gpu);
+	} else {
+		const char *str = "No error state collected\n";
+		size_t len = strlen(str);
+
+		if (off < len) {
+			ret = min_t(size_t, count, len - off);
+			memcpy(buf, str + off, ret);
+		}
+	}
+
+	return ret;
+}
+
+static ssize_t error_state_write(struct file *file, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	struct device *kdev = kobj_to_dev(kobj);
+	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+
+	drm_dbg(&dev_priv->drm, "Resetting error state\n");
+	i915_reset_error_state(dev_priv);
+
+	return count;
+}
+
+static const struct bin_attribute error_state_attr = {
+	.attr.name = "error",
+	.attr.mode = S_IRUSR | S_IWUSR,
+	.size = 0,
+	.read = error_state_read,
+	.write = error_state_write,
+};
+
+void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+{
+	struct device *kdev = i915->drm.primary->kdev;
+
+	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
+		drm_err(&i915->drm, "error_state sysfs setup failed\n");
+}
+
+void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+{
+	struct device *kdev = i915->drm.primary->kdev;
+
+	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index a6f2a7518cf0b..68c964d6720a8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -323,11 +323,12 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
 }
 
-struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
 void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
+void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
+void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
 
 #else
 
@@ -396,12 +397,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 {
 }
 
-static inline struct i915_gpu_coredump *
-i915_first_error_state(struct drm_i915_private *i915)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline void i915_reset_error_state(struct drm_i915_private *i915)
 {
 }
@@ -415,6 +410,14 @@ static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915
 {
 }
 
+static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+{
+}
+
+static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+{
+}
+
 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 
 #endif /* _I915_GPU_ERROR_H_ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index e88bb4f043050..613decd477605 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = {
 	.private = (void *)1
 };
 
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-
-static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr, char *buf,
-				loff_t off, size_t count)
-{
-
-	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-	struct i915_gpu_coredump *gpu;
-	ssize_t ret = 0;
-
-	/*
-	 * FIXME: Concurrent clients triggering resets and reading + clearing
-	 * dumps can cause inconsistent sysfs reads when a user calls in with a
-	 * non-zero offset to complete a prior partial read but the
-	 * gpu_coredump has been cleared or replaced.
-	 */
-
-	gpu = i915_first_error_state(i915);
-	if (IS_ERR(gpu)) {
-		ret = PTR_ERR(gpu);
-	} else if (gpu) {
-		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
-		i915_gpu_coredump_put(gpu);
-	} else {
-		const char *str = "No error state collected\n";
-		size_t len = strlen(str);
-
-		if (off < len) {
-			ret = min_t(size_t, count, len - off);
-			memcpy(buf, str + off, ret);
-		}
-	}
-
-	return ret;
-}
-
-static ssize_t error_state_write(struct file *file, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t off, size_t count)
-{
-	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-
-	drm_dbg(&dev_priv->drm, "Resetting error state\n");
-	i915_reset_error_state(dev_priv);
-
-	return count;
-}
-
-static const struct bin_attribute error_state_attr = {
-	.attr.name = "error",
-	.attr.mode = S_IRUSR | S_IWUSR,
-	.size = 0,
-	.read = error_state_read,
-	.write = error_state_write,
-};
-
-static void i915_setup_error_capture(struct device *kdev)
-{
-	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
-		drm_err(&kdev_minor_to_i915(kdev)->drm,
-			"error_state sysfs setup failed\n");
-}
-
-static void i915_teardown_error_capture(struct device *kdev)
-{
-	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
-}
-#else
-static void i915_setup_error_capture(struct device *kdev) {}
-static void i915_teardown_error_capture(struct device *kdev) {}
-#endif
-
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		drm_warn(&dev_priv->drm,
 			 "failed to register GT sysfs directory\n");
 
-	i915_setup_error_capture(kdev);
+	i915_gpu_error_sysfs_setup(dev_priv);
 
 	intel_engines_add_sysfs(dev_priv);
 }
@@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
-	i915_teardown_error_capture(kdev);
+	i915_gpu_error_sysfs_teardown(dev_priv);
 
 	device_remove_bin_file(kdev,  &dpf_attrs_1);
 	device_remove_bin_file(kdev,  &dpf_attrs);
-- 
GitLab


From 5fbae6874c92eec51cdcdcb68a4bafb535c066bf Mon Sep 17 00:00:00 2001
From: Dorcas AnonoLitunya <anonolitunya@gmail.com>
Date: Fri, 27 Oct 2023 20:47:45 +0300
Subject: [PATCH 0122/2340] drm/i915/gt: Remove prohibited space after opening
 parenthesis

Removes space after opening parenthesis.

Fixes the checkpatch.pl error:
ERROR: space prohibited after that opening parenthesis '('

Signed-off-by: Dorcas AnonoLitunya <anonolitunya@gmail.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027174745.4058-1-anonolitunya@gmail.com
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4a11219e560e0..40687806d22a6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -47,7 +47,7 @@
 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
 #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
 
-#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE	(2 * PAGE_SIZE)
 
 #define MAX_MMIO_BASES 3
 struct engine_info {
-- 
GitLab


From 44793c6a5b784f1f25608e3773fd40e011c63391 Mon Sep 17 00:00:00 2001
From: Carl Vanderlip <quic_carlv@quicinc.com>
Date: Fri, 27 Oct 2023 12:08:10 -0600
Subject: [PATCH 0123/2340] accel/qaic: Quiet array bounds check on DMA abort
 message

Current wrapper is right-sized to the message being transferred;
however, this is smaller than the structure defining message wrappers
since the trailing element is a union of message/transfer headers of
various sizes (8 and 32 bytes on 32-bit system where issue was
reported). Using the smaller header with a small message
(wire_trans_dma_xfer is 24 bytes including header) ends up being smaller
than a wrapper with the larger header. There are no accesses outside of
the defined size, however they are possible if the larger union member
is referenced.

Abort messages are outside of hot-path and changing the wrapper struct
would require a larger rewrite, so having the memory allocated to the
message be 8 bytes too big is acceptable.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202310182253.bcb9JcyJ-lkp@intel.com/
Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027180810.4873-1-quic_jhugo@quicinc.com
---
 drivers/accel/qaic/qaic_control.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index 388abd40024ba..84915824be543 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -1138,7 +1138,7 @@ static int abort_dma_cont(struct qaic_device *qdev, struct wrapper_list *wrapper
 		if (!list_is_first(&wrapper->list, &wrappers->list))
 			kref_put(&wrapper->ref_count, free_wrapper);
 
-	wrapper = add_wrapper(wrappers, offsetof(struct wrapper_msg, trans) + sizeof(*out_trans));
+	wrapper = add_wrapper(wrappers, sizeof(*wrapper));
 
 	if (!wrapper)
 		return -ENOMEM;
-- 
GitLab


From 3b511278b6ef514b3ae3d99ff62947cddd434479 Mon Sep 17 00:00:00 2001
From: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Date: Fri, 27 Oct 2023 10:43:30 -0600
Subject: [PATCH 0124/2340] accel/qaic: Support for 0 resize slice execution in
 BO

Add support to partially execute a slice which is resized to zero.
Executing a zero size slice in a BO should mean that there is no DMA
transfers involved but you should still configure doorbell and semaphores.

For example consider a BO of size 18K and it is sliced into 3 6K slices
and user calls partial execute ioctl with resize as 10K.
slice 0 - size is 6k and offset is 0, so resize of 10K will not cut short
          this slice hence we send the entire slice for execution.
slice 1 - size is 6k and offset is 6k, so resize of 10K will cut short this
          slice and only the first 4k should be DMA along with configuring
          doorbell and semaphores.
slice 2 - size is 6k and offset is 12k, so resize of 10k will cut short
          this slice and no DMA transfer would be involved but we should
          would configure doorbell and semaphores.

This change begs to change the behavior of 0 resize. Currently, 0 resize
partial execute ioctl behaves exactly like execute ioctl i.e. no resize.
After this patch all the slice in BO should behave exactly like slice 2 in
above example.

Refactor copy_partial_exec_reqs() to make it more readable and less
complex.

Signed-off-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027164330.11978-1-quic_jhugo@quicinc.com
---
 drivers/accel/qaic/qaic_data.c | 104 ++++++++++++++-------------------
 include/uapi/drm/qaic_accel.h  |   5 +-
 2 files changed, 46 insertions(+), 63 deletions(-)

diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index ebc3cca1b0947..8da81768f2abb 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -51,6 +51,7 @@
 		})
 #define NUM_EVENTS	128
 #define NUM_DELAYS	10
+#define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
 
 static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
 module_param(wait_exec_default_timeout_ms, uint, 0600);
@@ -1058,6 +1059,16 @@ unlock_usr_srcu:
 	return ret;
 }
 
+static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size)
+{
+	u32 avail = head - tail - 1;
+
+	if (head <= tail)
+		avail += q_size;
+
+	return avail;
+}
+
 static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
 				 u32 head, u32 *ptail)
 {
@@ -1066,27 +1077,20 @@ static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slic
 	u32 tail = *ptail;
 	u32 avail;
 
-	avail = head - tail;
-	if (head <= tail)
-		avail += dbc->nelem;
-
-	--avail;
-
+	avail = fifo_space_avail(head, tail, dbc->nelem);
 	if (avail < slice->nents)
 		return -EAGAIN;
 
 	if (tail + slice->nents > dbc->nelem) {
 		avail = dbc->nelem - tail;
 		avail = min_t(u32, avail, slice->nents);
-		memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-		       sizeof(*reqs) * avail);
+		memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
 		reqs += avail;
 		avail = slice->nents - avail;
 		if (avail)
 			memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
 	} else {
-		memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-		       sizeof(*reqs) * slice->nents);
+		memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents);
 	}
 
 	*ptail = (tail + slice->nents) % dbc->nelem;
@@ -1094,46 +1098,31 @@ static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slic
 	return 0;
 }
 
-/*
- * Based on the value of resize we may only need to transmit first_n
- * entries and the last entry, with last_bytes to send from the last entry.
- * Note that first_n could be 0.
- */
 static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
-					 u64 resize, u32 dbc_id, u32 head, u32 *ptail)
+					 u64 resize, struct dma_bridge_chan *dbc, u32 head,
+					 u32 *ptail)
 {
-	struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
 	struct dbc_req *reqs = slice->reqs;
 	struct dbc_req *last_req;
 	u32 tail = *ptail;
-	u64 total_bytes;
 	u64 last_bytes;
 	u32 first_n;
 	u32 avail;
-	int ret;
-	int i;
-
-	avail = head - tail;
-	if (head <= tail)
-		avail += dbc->nelem;
 
-	--avail;
+	avail = fifo_space_avail(head, tail, dbc->nelem);
 
-	total_bytes = 0;
-	for (i = 0; i < slice->nents; i++) {
-		total_bytes += le32_to_cpu(reqs[i].len);
-		if (total_bytes >= resize)
+	/*
+	 * After this for loop is complete, first_n represents the index
+	 * of the last DMA request of this slice that needs to be
+	 * transferred after resizing and last_bytes represents DMA size
+	 * of that request.
+	 */
+	last_bytes = resize;
+	for (first_n = 0; first_n < slice->nents; first_n++)
+		if (last_bytes > le32_to_cpu(reqs[first_n].len))
+			last_bytes -= le32_to_cpu(reqs[first_n].len);
+		else
 			break;
-	}
-
-	if (total_bytes < resize) {
-		/* User space should have used the full buffer path. */
-		ret = -EINVAL;
-		return ret;
-	}
-
-	first_n = i;
-	last_bytes = i ? resize + le32_to_cpu(reqs[i].len) - total_bytes : resize;
 
 	if (avail < (first_n + 1))
 		return -EAGAIN;
@@ -1142,22 +1131,21 @@ static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_sli
 		if (tail + first_n > dbc->nelem) {
 			avail = dbc->nelem - tail;
 			avail = min_t(u32, avail, first_n);
-			memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-			       sizeof(*reqs) * avail);
+			memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
 			last_req = reqs + avail;
 			avail = first_n - avail;
 			if (avail)
 				memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
 		} else {
-			memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-			       sizeof(*reqs) * first_n);
+			memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n);
 		}
 	}
 
-	/* Copy over the last entry. Here we need to adjust len to the left over
+	/*
+	 * Copy over the last entry. Here we need to adjust len to the left over
 	 * size, and set src and dst to the entry it is copied to.
 	 */
-	last_req = dbc->req_q_base + (tail + first_n) % dbc->nelem * get_dbc_req_elem_size();
+	last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem);
 	memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
 
 	/*
@@ -1168,6 +1156,9 @@ static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_sli
 	last_req->len = cpu_to_le32((u32)last_bytes);
 	last_req->src_addr = reqs[first_n].src_addr;
 	last_req->dest_addr = reqs[first_n].dest_addr;
+	if (!last_bytes)
+		/* Disable DMA transfer */
+		last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd;
 
 	*ptail = (tail + first_n + 1) % dbc->nelem;
 
@@ -1227,26 +1218,17 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 		bo->req_id = dbc->next_req_id++;
 
 		list_for_each_entry(slice, &bo->slices, slice) {
-			/*
-			 * If this slice does not fall under the given
-			 * resize then skip this slice and continue the loop
-			 */
-			if (is_partial && pexec[i].resize && pexec[i].resize <= slice->offset)
-				continue;
-
 			for (j = 0; j < slice->nents; j++)
 				slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
 
-			/*
-			 * If it is a partial execute ioctl call then check if
-			 * resize has cut this slice short then do a partial copy
-			 * else do complete copy
-			 */
-			if (is_partial && pexec[i].resize &&
-			    pexec[i].resize < slice->offset + slice->size)
+			if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset))
+				/* Configure the slice for no DMA transfer */
+				ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail);
+			else if (is_partial && pexec[i].resize < slice->offset + slice->size)
+				/* Configure the slice to be partially DMA transferred */
 				ret = copy_partial_exec_reqs(qdev, slice,
-							     pexec[i].resize - slice->offset,
-							     dbc->id, head, tail);
+							     pexec[i].resize - slice->offset, dbc,
+							     head, tail);
 			else
 				ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
 			if (ret) {
diff --git a/include/uapi/drm/qaic_accel.h b/include/uapi/drm/qaic_accel.h
index 43ac5d8645125..9dab32316aee4 100644
--- a/include/uapi/drm/qaic_accel.h
+++ b/include/uapi/drm/qaic_accel.h
@@ -287,8 +287,9 @@ struct qaic_execute_entry {
  * struct qaic_partial_execute_entry - Defines a BO to resize and submit.
  * @handle: In. GEM handle of the BO to commit to the device.
  * @dir: In. Direction of data. 1 = to device, 2 = from device.
- * @resize: In. New size of the BO.  Must be <= the original BO size.  0 is
- *	    short for no resize.
+ * @resize: In. New size of the BO.  Must be <= the original BO size.
+ *	    @resize as 0 would be interpreted as no DMA transfer is
+ *	    involved.
  */
 struct qaic_partial_execute_entry {
 	__u32 handle;
-- 
GitLab


From 451eaa1a614c911f5a51078dcb68022874e4cb12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 31 Oct 2023 18:08:00 +0200
Subject: [PATCH 0125/2340] drm/i915: Bump GLK CDCLK frequency when driving
 multiple pipes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On GLK CDCLK frequency needs to be at least 2*96 MHz when accessing
the audio hardware. Currently we bump the CDCLK frequency up
temporarily (if not high enough already) whenever audio hardware
is being accessed, and drop it back down afterwards.

With a single active pipe this works just fine as we can switch
between all the valid CDCLK frequencies by changing the cd2x
divider, which doesn't require a full modeset. However with
multiple active pipes the cd2x divider trick no longer works,
and thus we end up blinking all displays off and back on.

To avoid this let's just bump the CDCLK frequency to >=2*96MHz
whenever multiple pipes are active. The downside is slightly
higher power consumption, but that seems like an acceptable
tradeoff. With a single active pipe we can stick to the current
more optiomal (from power comsumption POV) behaviour.

Cc: stable@vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9599
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031160800.18371-1-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 6d7ba4d0f1306..c4839c67cb0f0 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2750,6 +2750,18 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state)
 	for_each_pipe(dev_priv, pipe)
 		min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk);
 
+	/*
+	 * Avoid glk_force_audio_cdclk() causing excessive screen
+	 * blinking when multiple pipes are active by making sure
+	 * CDCLK frequency is always high enough for audio. With a
+	 * single active pipe we can always change CDCLK frequency
+	 * by changing the cd2x divider (see glk_cdclk_table[]) and
+	 * thus a full modeset won't be needed then.
+	 */
+	if (IS_GEMINILAKE(dev_priv) && cdclk_state->active_pipes &&
+	    !is_power_of_2(cdclk_state->active_pipes))
+		min_cdclk = max(2 * 96000, min_cdclk);
+
 	if (min_cdclk > dev_priv->display.cdclk.max_cdclk_freq) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "required cdclk (%d kHz) exceeds max (%d kHz)\n",
-- 
GitLab


From 7abbbe2694b3d4fd366dc91934f42c047a6d282d Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 10:55:34 +0000
Subject: [PATCH 0126/2340] drm/sched: Rename drm_sched_get_cleanup_job to be
 more descriptive

"Get cleanup job" makes it sound like helper is returning a job which will
execute some cleanup, or something, while the kerneldoc itself accurately
says "fetch the next _finished_ job". So lets rename the helper to be self
documenting.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-2-tvrtko.ursulin@linux.intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 98b2ad54fc707..fb64b35451f5a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -448,7 +448,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
 
 	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
-	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
+	/* Protects against concurrent deletion in drm_sched_get_finished_job */
 	spin_lock(&sched->job_list_lock);
 	job = list_first_entry_or_null(&sched->pending_list,
 				       struct drm_sched_job, list);
@@ -500,9 +500,9 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 
 	/*
 	 * Reinsert back the bad job here - now it's safe as
-	 * drm_sched_get_cleanup_job cannot race against us and release the
+	 * drm_sched_get_finished_job cannot race against us and release the
 	 * bad job at this point - we parked (waited for) any in progress
-	 * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
+	 * (earlier) cleanups and drm_sched_get_finished_job will not be called
 	 * now until the scheduler thread is unparked.
 	 */
 	if (bad && bad->sched == sched)
@@ -960,7 +960,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 }
 
 /**
- * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
+ * drm_sched_get_finished_job - fetch the next finished job to be destroyed
  *
  * @sched: scheduler instance
  *
@@ -968,7 +968,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
  * ready for it to be destroyed.
  */
 static struct drm_sched_job *
-drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
+drm_sched_get_finished_job(struct drm_gpu_scheduler *sched)
 {
 	struct drm_sched_job *job, *next;
 
@@ -1059,14 +1059,14 @@ static void drm_sched_free_job_work(struct work_struct *w)
 {
 	struct drm_gpu_scheduler *sched =
 		container_of(w, struct drm_gpu_scheduler, work_free_job);
-	struct drm_sched_job *cleanup_job;
+	struct drm_sched_job *job;
 
 	if (READ_ONCE(sched->pause_submit))
 		return;
 
-	cleanup_job = drm_sched_get_cleanup_job(sched);
-	if (cleanup_job) {
-		sched->ops->free_job(cleanup_job);
+	job = drm_sched_get_finished_job(sched);
+	if (job) {
+		sched->ops->free_job(job);
 
 		drm_sched_free_job_queue_if_done(sched);
 		drm_sched_run_job_queue_if_ready(sched);
-- 
GitLab


From e608d9f7ac1a94a4a63d1ef2b37dd80669ad828d Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 10:55:35 +0000
Subject: [PATCH 0127/2340] drm/sched: Move free worker re-queuing out of the
 if block

Whether or not there are more jobs to clean up does not depend on the
existance of the current job, given both drm_sched_get_finished_job and
drm_sched_free_job_queue_if_done take and drop the job list lock.
Therefore it is confusing to make it read like there is a dependency.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-3-tvrtko.ursulin@linux.intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fb64b35451f5a..e1658030613f5 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1065,12 +1065,11 @@ static void drm_sched_free_job_work(struct work_struct *w)
 		return;
 
 	job = drm_sched_get_finished_job(sched);
-	if (job) {
+	if (job)
 		sched->ops->free_job(job);
 
-		drm_sched_free_job_queue_if_done(sched);
-		drm_sched_run_job_queue_if_ready(sched);
-	}
+	drm_sched_free_job_queue_if_done(sched);
+	drm_sched_run_job_queue_if_ready(sched);
 }
 
 /**
-- 
GitLab


From 67dd1d8c9f6543661720b9a89e28a25488cb8753 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 10:55:36 +0000
Subject: [PATCH 0128/2340] drm/sched: Rename drm_sched_free_job_queue to be
 more descriptive

The current name makes it sound like helper will free a queue, while what
it does is it enqueues the free job worker.

Rename it to drm_sched_run_free_queue to align with existing
drm_sched_run_job_queue.

Despite that creating an illusion there are two queues, while in reality
there is only one, at least it creates a consistent naming for the two
enqueuing helpers.

At the same time simplify the "if done" helper by dropping the suffix and
adding a double underscore prefix to the one which just enqueues.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-4-tvrtko.ursulin@linux.intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index e1658030613f5..f9baca20b4383 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -266,20 +266,20 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
 }
 
 /**
- * drm_sched_free_job_queue - enqueue free-job work
+ * __drm_sched_run_free_queue - enqueue free-job work
  * @sched: scheduler instance
  */
-static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched)
+static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
 {
 	if (!READ_ONCE(sched->pause_submit))
 		queue_work(sched->submit_wq, &sched->work_free_job);
 }
 
 /**
- * drm_sched_free_job_queue_if_done - enqueue free-job work if ready
+ * drm_sched_run_free_queue - enqueue free-job work if ready
  * @sched: scheduler instance
  */
-static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched)
+static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
 {
 	struct drm_sched_job *job;
 
@@ -287,7 +287,7 @@ static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched)
 	job = list_first_entry_or_null(&sched->pending_list,
 				       struct drm_sched_job, list);
 	if (job && dma_fence_is_signaled(&job->s_fence->finished))
-		drm_sched_free_job_queue(sched);
+		__drm_sched_run_free_queue(sched);
 	spin_unlock(&sched->job_list_lock);
 }
 
@@ -310,7 +310,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
 	dma_fence_get(&s_fence->finished);
 	drm_sched_fence_finished(s_fence, result);
 	dma_fence_put(&s_fence->finished);
-	drm_sched_free_job_queue(sched);
+	__drm_sched_run_free_queue(sched);
 }
 
 /**
@@ -1068,7 +1068,7 @@ static void drm_sched_free_job_work(struct work_struct *w)
 	if (job)
 		sched->ops->free_job(job);
 
-	drm_sched_free_job_queue_if_done(sched);
+	drm_sched_run_free_queue(sched);
 	drm_sched_run_job_queue_if_ready(sched);
 }
 
-- 
GitLab


From 35a4279d42db534ad71a3a598029a53f22856f93 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 10:55:37 +0000
Subject: [PATCH 0129/2340] drm/sched: Rename drm_sched_run_job_queue_if_ready
 and clarify kerneldoc

"If ready" is not immediately clear what it means - is the scheduler
ready or something else? Drop the suffix, clarify kerneldoc, and employ
the same naming scheme as in drm_sched_run_free_queue:

 - drm_sched_run_job_queue   - enqueues if there is something to enqueue
                               *and* scheduler is ready (can queue)
 - __drm_sched_run_job_queue - low-level helper to simply queue the job

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-5-tvrtko.ursulin@linux.intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index f9baca20b4383..d5ddbce68fb77 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -256,10 +256,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 }
 
 /**
- * drm_sched_run_job_queue - enqueue run-job work
+ * __drm_sched_run_job_queue - enqueue run-job work
  * @sched: scheduler instance
  */
-static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
+static void __drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
 {
 	if (!READ_ONCE(sched->pause_submit))
 		queue_work(sched->submit_wq, &sched->work_run_job);
@@ -928,7 +928,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
 void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
 {
 	if (drm_sched_can_queue(sched))
-		drm_sched_run_job_queue(sched);
+		__drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -1041,13 +1041,13 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 EXPORT_SYMBOL(drm_sched_pick_best);
 
 /**
- * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready
+ * drm_sched_run_job_queue - enqueue run-job work if there are ready entities
  * @sched: scheduler instance
  */
-static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched)
+static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
 {
 	if (drm_sched_select_entity(sched))
-		drm_sched_run_job_queue(sched);
+		__drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -1069,7 +1069,7 @@ static void drm_sched_free_job_work(struct work_struct *w)
 		sched->ops->free_job(job);
 
 	drm_sched_run_free_queue(sched);
-	drm_sched_run_job_queue_if_ready(sched);
+	drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -1126,7 +1126,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
 	}
 
 	wake_up(&sched->job_scheduled);
-	drm_sched_run_job_queue_if_ready(sched);
+	drm_sched_run_job_queue(sched);
 }
 
 /**
-- 
GitLab


From f12af4c461fb6cd5ed7b48f8b4d09b22eb19fcc5 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 10:55:38 +0000
Subject: [PATCH 0130/2340] drm/sched: Drop suffix from
 drm_sched_wakeup_if_can_queue

Because a) helper is exported to other parts of the scheduler and
b) there isn't a plain drm_sched_wakeup to begin with, I think we can
drop the suffix and by doing so separate the intimiate knowledge
between the scheduler components a bit better.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-6-tvrtko.ursulin@linux.intel.com
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
 drivers/gpu/drm/scheduler/sched_main.c   | 4 ++--
 include/drm/gpu_scheduler.h              | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 409e4256f6e7d..f1db63cc81981 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
 		container_of(cb, struct drm_sched_entity, cb);
 
 	drm_sched_entity_clear_dep(f, cb);
-	drm_sched_wakeup_if_can_queue(entity->rq->sched);
+	drm_sched_wakeup(entity->rq->sched);
 }
 
 /**
@@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
 			drm_sched_rq_update_fifo(entity, submit_ts);
 
-		drm_sched_wakeup_if_can_queue(entity->rq->sched);
+		drm_sched_wakeup(entity->rq->sched);
 	}
 }
 EXPORT_SYMBOL(drm_sched_entity_push_job);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index d5ddbce68fb77..27843e37d9b76 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -920,12 +920,12 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
 }
 
 /**
- * drm_sched_wakeup_if_can_queue - Wake up the scheduler
+ * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
  * @sched: scheduler instance
  *
  * Wake up the scheduler if we can queue jobs.
  */
-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 {
 	if (drm_sched_can_queue(sched))
 		__drm_sched_run_job_queue(sched);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9daa78d2c04c1..754fd2217334e 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -559,7 +559,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
 
 void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
-- 
GitLab


From a12480855ecbba6c7473c170d91c7bf41701a38c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Thu, 2 Nov 2023 13:22:19 +0200
Subject: [PATCH 0131/2340] drm/i915/display: Use intel_bo_to_drm_bo instead of
 obj->base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are preparing for Xe. Xe_bo doesn't have obj->base. Due to this
use intel_bo_to_drm_bo instead in intel_prepare_plane_fb.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102112219.1039362-1-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_atomic_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index 3b9a669234227..06c2455bdd788 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -1096,7 +1096,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 		 * can safely continue.
 		 */
 		if (new_crtc_state && intel_crtc_needs_modeset(new_crtc_state)) {
-			ret = add_dma_resv_fences(old_obj->base.resv,
+			ret = add_dma_resv_fences(intel_bo_to_drm_bo(old_obj)->resv,
 						  &new_plane_state->uapi);
 			if (ret < 0)
 				return ret;
-- 
GitLab


From a379bf3d14602067812f219bd852ff89dff31133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 1 Nov 2023 13:42:08 +0200
Subject: [PATCH 0132/2340] drm/i915: Extract hsw_chicken_trans_reg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have the same code to determine the CHICKEN_TRANS register
offset sprinkled in a dozen places. Hoover it up into a small
helper.

TODO: find a better home for this

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101114212.9345-2-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_ddi.c     |  9 +++++++++
 drivers/gpu/drm/i915/display/intel_ddi.h     |  2 ++
 drivers/gpu/drm/i915/display/intel_display.c | 18 +++++-------------
 drivers/gpu/drm/i915/display/intel_dp_mst.c  |  8 ++------
 drivers/gpu/drm/i915/display/intel_psr.c     | 11 +++++------
 5 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 2e37708fecdd0..c75fd00e360ac 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3115,6 +3115,15 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state,
 	trans_port_sync_stop_link_train(state, encoder, crtc_state);
 }
 
+/* FIXME bad home for this function */
+i915_reg_t hsw_chicken_trans_reg(struct drm_i915_private *i915,
+				 enum transcoder cpu_transcoder)
+{
+	return DISPLAY_VER(i915) >= 14 ?
+		MTL_CHICKEN_TRANS(cpu_transcoder) :
+		CHICKEN_TRANS(cpu_transcoder);
+}
+
 static i915_reg_t
 gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv,
 			       enum port port)
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index 4999c0ee229bd..db845f2aca0c6 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -27,6 +27,8 @@ i915_reg_t dp_tp_ctl_reg(struct intel_encoder *encoder,
 			 const struct intel_crtc_state *crtc_state);
 i915_reg_t dp_tp_status_reg(struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state);
+i915_reg_t hsw_chicken_trans_reg(struct drm_i915_private *i915,
+				 enum transcoder cpu_transcoder);
 void intel_ddi_fdi_post_disable(struct intel_atomic_state *state,
 				struct intel_encoder *intel_encoder,
 				const struct intel_crtc_state *old_crtc_state,
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 49ded4e328831..121ebbaba2e43 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -488,11 +488,8 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state)
 
 	intel_de_write(dev_priv, reg, val);
 
-	if (DISPLAY_VER(dev_priv) >= 14)
-		intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(cpu_transcoder),
-			     FECSTALL_DIS_DPTSTREAM_DPTTG, 0);
-	else if (DISPLAY_VER(dev_priv) >= 12)
-		intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
+	if (DISPLAY_VER(dev_priv) >= 12)
+		intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder),
 			     FECSTALL_DIS_DPTSTREAM_DPTTG, 0);
 
 	if ((val & TRANSCONF_ENABLE) == 0)
@@ -1503,12 +1500,9 @@ static void hsw_set_linetime_wm(const struct intel_crtc_state *crtc_state)
 static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	enum transcoder transcoder = crtc_state->cpu_transcoder;
-	i915_reg_t reg = DISPLAY_VER(dev_priv) >= 14 ? MTL_CHICKEN_TRANS(transcoder) :
-			 CHICKEN_TRANS(transcoder);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
-	intel_de_rmw(dev_priv, reg,
+	intel_de_rmw(i915, hsw_chicken_trans_reg(i915, crtc_state->cpu_transcoder),
 		     HSW_FRAME_START_DELAY_MASK,
 		     HSW_FRAME_START_DELAY(crtc_state->framestart_delay - 1));
 }
@@ -3792,9 +3786,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 	}
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder)) {
-		tmp = intel_de_read(dev_priv, DISPLAY_VER(dev_priv) >= 14 ?
-				    MTL_CHICKEN_TRANS(pipe_config->cpu_transcoder) :
-				    CHICKEN_TRANS(pipe_config->cpu_transcoder));
+		tmp = intel_de_read(dev_priv, hsw_chicken_trans_reg(dev_priv, pipe_config->cpu_transcoder));
 
 		pipe_config->framestart_delay = REG_FIELD_GET(HSW_FRAME_START_DELAY_MASK, tmp) + 1;
 	} else {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 82f425ef15953..a32498c7826a9 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -817,12 +817,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 	drm_dp_add_payload_part2(&intel_dp->mst_mgr, &state->base,
 				 drm_atomic_get_mst_payload_state(mst_state, connector->port));
 
-	if (DISPLAY_VER(dev_priv) >= 14)
-		intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(trans),
-			     FECSTALL_DIS_DPTSTREAM_DPTTG,
-			     pipe_config->fec_enable ? FECSTALL_DIS_DPTSTREAM_DPTTG : 0);
-	else if (DISPLAY_VER(dev_priv) >= 12)
-		intel_de_rmw(dev_priv, CHICKEN_TRANS(trans),
+	if (DISPLAY_VER(dev_priv) >= 12)
+		intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, trans),
 			     FECSTALL_DIS_DPTSTREAM_DPTTG,
 			     pipe_config->fec_enable ? FECSTALL_DIS_DPTSTREAM_DPTTG : 0);
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index ecd24a0b86cb4..920f773361630 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -29,6 +29,7 @@
 #include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_crtc.h"
+#include "intel_ddi.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
@@ -1452,12 +1453,10 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 		 * All supported adlp panels have 1-based X granularity, this may
 		 * cause issues if non-supported panels are used.
 		 */
-		if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0))
-			intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(cpu_transcoder), 0,
-				     ADLP_1_BASED_X_GRANULARITY);
-		else if (IS_ALDERLAKE_P(dev_priv))
-			intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0,
-				     ADLP_1_BASED_X_GRANULARITY);
+		if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) ||
+		    IS_ALDERLAKE_P(dev_priv))
+			intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder),
+				     0, ADLP_1_BASED_X_GRANULARITY);
 
 		/* Wa_16012604467:adlp,mtl[a0,b0] */
 		if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0))
-- 
GitLab


From f18020a5bd23b5f9b5b406b70198a5e51af67df0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 1 Nov 2023 13:42:09 +0200
Subject: [PATCH 0133/2340] drm/i915: Stop using a 'reg' variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'reg' is a very non-descriptive name. Just get rid of the silly
local variable and spell out the full register name always.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101114212.9345-3-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 121ebbaba2e43..9e9c03287869b 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -398,7 +398,6 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state)
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
-	i915_reg_t reg;
 	u32 val;
 
 	drm_dbg_kms(&dev_priv->drm, "enabling pipe %c\n", pipe_name(pipe));
@@ -431,16 +430,16 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state)
 		intel_de_rmw(dev_priv, PIPE_ARB_CTL(pipe),
 			     0, PIPE_ARB_USE_PROG_SLOTS);
 
-	reg = TRANSCONF(cpu_transcoder);
-	val = intel_de_read(dev_priv, reg);
+	val = intel_de_read(dev_priv, TRANSCONF(cpu_transcoder));
 	if (val & TRANSCONF_ENABLE) {
 		/* we keep both pipes enabled on 830 */
 		drm_WARN_ON(&dev_priv->drm, !IS_I830(dev_priv));
 		return;
 	}
 
-	intel_de_write(dev_priv, reg, val | TRANSCONF_ENABLE);
-	intel_de_posting_read(dev_priv, reg);
+	intel_de_write(dev_priv, TRANSCONF(cpu_transcoder),
+		       val | TRANSCONF_ENABLE);
+	intel_de_posting_read(dev_priv, TRANSCONF(cpu_transcoder));
 
 	/*
 	 * Until the pipe starts PIPEDSL reads will return a stale value,
@@ -459,7 +458,6 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state)
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
-	i915_reg_t reg;
 	u32 val;
 
 	drm_dbg_kms(&dev_priv->drm, "disabling pipe %c\n", pipe_name(pipe));
@@ -470,8 +468,7 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state)
 	 */
 	assert_planes_disabled(crtc);
 
-	reg = TRANSCONF(cpu_transcoder);
-	val = intel_de_read(dev_priv, reg);
+	val = intel_de_read(dev_priv, TRANSCONF(cpu_transcoder));
 	if ((val & TRANSCONF_ENABLE) == 0)
 		return;
 
@@ -486,7 +483,7 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state)
 	if (!IS_I830(dev_priv))
 		val &= ~TRANSCONF_ENABLE;
 
-	intel_de_write(dev_priv, reg, val);
+	intel_de_write(dev_priv, TRANSCONF(cpu_transcoder), val);
 
 	if (DISPLAY_VER(dev_priv) >= 12)
 		intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder),
-- 
GitLab


From e5aaad610f296a79bc1096b73a31013ee0d43240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 1 Nov 2023 13:42:10 +0200
Subject: [PATCH 0134/2340] drm/i915: Extract mchbar_reg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stop repeating the same logic to determine the correct
config space register for MCHBAR.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101114212.9345-4-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/soc/intel_gmch.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/soc/intel_gmch.c b/drivers/gpu/drm/i915/soc/intel_gmch.c
index f32e9f78770a2..40874ebfb64c7 100644
--- a/drivers/gpu/drm/i915/soc/intel_gmch.c
+++ b/drivers/gpu/drm/i915/soc/intel_gmch.c
@@ -33,18 +33,22 @@ int intel_gmch_bridge_setup(struct drm_i915_private *i915)
 					i915->gmch.pdev);
 }
 
+static int mchbar_reg(struct drm_i915_private *i915)
+{
+	return GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
+}
+
 /* Allocate space for the MCH regs if needed, return nonzero on error */
 static int
 intel_alloc_mchbar_resource(struct drm_i915_private *i915)
 {
-	int reg = GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
 	u32 temp_lo, temp_hi = 0;
 	u64 mchbar_addr;
 	int ret;
 
 	if (GRAPHICS_VER(i915) >= 4)
-		pci_read_config_dword(i915->gmch.pdev, reg + 4, &temp_hi);
-	pci_read_config_dword(i915->gmch.pdev, reg, &temp_lo);
+		pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915) + 4, &temp_hi);
+	pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &temp_lo);
 	mchbar_addr = ((u64)temp_hi << 32) | temp_lo;
 
 	/* If ACPI doesn't have it, assume we need to allocate it ourselves */
@@ -68,10 +72,10 @@ intel_alloc_mchbar_resource(struct drm_i915_private *i915)
 	}
 
 	if (GRAPHICS_VER(i915) >= 4)
-		pci_write_config_dword(i915->gmch.pdev, reg + 4,
+		pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915) + 4,
 				       upper_32_bits(i915->gmch.mch_res.start));
 
-	pci_write_config_dword(i915->gmch.pdev, reg,
+	pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915),
 			       lower_32_bits(i915->gmch.mch_res.start));
 	return 0;
 }
@@ -79,7 +83,6 @@ intel_alloc_mchbar_resource(struct drm_i915_private *i915)
 /* Setup MCHBAR if possible, return true if we should disable it again */
 void intel_gmch_bar_setup(struct drm_i915_private *i915)
 {
-	int mchbar_reg = GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
 	u32 temp;
 	bool enabled;
 
@@ -92,7 +95,7 @@ void intel_gmch_bar_setup(struct drm_i915_private *i915)
 		pci_read_config_dword(i915->gmch.pdev, DEVEN, &temp);
 		enabled = !!(temp & DEVEN_MCHBAR_EN);
 	} else {
-		pci_read_config_dword(i915->gmch.pdev, mchbar_reg, &temp);
+		pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &temp);
 		enabled = temp & 1;
 	}
 
@@ -110,15 +113,13 @@ void intel_gmch_bar_setup(struct drm_i915_private *i915)
 		pci_write_config_dword(i915->gmch.pdev, DEVEN,
 				       temp | DEVEN_MCHBAR_EN);
 	} else {
-		pci_read_config_dword(i915->gmch.pdev, mchbar_reg, &temp);
-		pci_write_config_dword(i915->gmch.pdev, mchbar_reg, temp | 1);
+		pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915), &temp);
+		pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915), temp | 1);
 	}
 }
 
 void intel_gmch_bar_teardown(struct drm_i915_private *i915)
 {
-	int mchbar_reg = GRAPHICS_VER(i915) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
-
 	if (i915->gmch.mchbar_need_disable) {
 		if (IS_I915G(i915) || IS_I915GM(i915)) {
 			u32 deven_val;
@@ -131,10 +132,10 @@ void intel_gmch_bar_teardown(struct drm_i915_private *i915)
 		} else {
 			u32 mchbar_val;
 
-			pci_read_config_dword(i915->gmch.pdev, mchbar_reg,
+			pci_read_config_dword(i915->gmch.pdev, mchbar_reg(i915),
 					      &mchbar_val);
 			mchbar_val &= ~1;
-			pci_write_config_dword(i915->gmch.pdev, mchbar_reg,
+			pci_write_config_dword(i915->gmch.pdev, mchbar_reg(i915),
 					       mchbar_val);
 		}
 	}
-- 
GitLab


From cf6e11650395fd27fabff294b95225886b7a9f8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 1 Nov 2023 13:42:11 +0200
Subject: [PATCH 0135/2340] drm/i915/dsi: Remove dead GLK checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GLK has its own glk_dsi_clear_device_ready() so remove
the dead GLK checks from vlv_dsi_clear_device_ready().
Sadly BXT still uses vlv_dsi_clear_device_ready() so the
code still looks like a mess due to the difference in VLV/CHV
vs. BXT port A/C shenanigans.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101114212.9345-5-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/vlv_dsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 55da627a8b8d2..64023fb8dd743 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -570,7 +570,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder)
 	drm_dbg_kms(&dev_priv->drm, "\n");
 	for_each_dsi_port(port, intel_dsi->ports) {
 		/* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */
-		i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ?
+		i915_reg_t port_ctrl = IS_BROXTON(dev_priv) ?
 			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A);
 
 		intel_de_write(dev_priv, MIPI_DEVICE_READY(port),
@@ -589,7 +589,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder)
 		 * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI
 		 * Port A only. MIPI Port C has no similar bit for checking.
 		 */
-		if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || port == PORT_A) &&
+		if ((IS_BROXTON(dev_priv) || port == PORT_A) &&
 		    intel_de_wait_for_clear(dev_priv, port_ctrl,
 					    AFE_LATCHOUT, 30))
 			drm_err(&dev_priv->drm, "DSI LP not going Low\n");
-- 
GitLab


From bda4a7ab26725081e222e71e00a98f4462247216 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 1 Nov 2023 13:42:12 +0200
Subject: [PATCH 0136/2340] drm/i915/dsi: Extract port_ctrl_reg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code to determine the pre-ICL DSI port control register is
repeated several times. Consolidate.

vlv_dsi_clear_device_ready() is left with the open-coded version
due to the weirdness with port A vs. C on VLV/CHV.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101114212.9345-6-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/vlv_dsi.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 64023fb8dd743..bda49734ca33d 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -561,6 +561,12 @@ static void glk_dsi_clear_device_ready(struct intel_encoder *encoder)
 	glk_dsi_disable_mipi_io(encoder);
 }
 
+static i915_reg_t port_ctrl_reg(struct drm_i915_private *i915, enum port port)
+{
+	return IS_GEMINILAKE(i915) || IS_BROXTON(i915) ?
+		BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+}
+
 static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
@@ -627,8 +633,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder,
 	}
 
 	for_each_dsi_port(port, intel_dsi->ports) {
-		i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ?
-			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		i915_reg_t port_ctrl = port_ctrl_reg(dev_priv, port);
 		u32 temp;
 
 		temp = intel_de_read(dev_priv, port_ctrl);
@@ -664,8 +669,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder)
 	enum port port;
 
 	for_each_dsi_port(port, intel_dsi->ports) {
-		i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ?
-			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		i915_reg_t port_ctrl = port_ctrl_reg(dev_priv, port);
 
 		/* de-assert ip_tg_enable signal */
 		intel_de_rmw(dev_priv, port_ctrl, DPI_ENABLE, 0);
@@ -955,9 +959,8 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
 
 	/* XXX: this only works for one DSI output */
 	for_each_dsi_port(port, intel_dsi->ports) {
-		i915_reg_t ctrl_reg = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ?
-			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
-		bool enabled = intel_de_read(dev_priv, ctrl_reg) & DPI_ENABLE;
+		i915_reg_t port_ctrl = port_ctrl_reg(dev_priv, port);
+		bool enabled = intel_de_read(dev_priv, port_ctrl) & DPI_ENABLE;
 
 		/*
 		 * Due to some hardware limitations on VLV/CHV, the DPI enable
-- 
GitLab


From 27b086382c22efb7e0a16442f7bdc2e120108ef3 Mon Sep 17 00:00:00 2001
From: Kunwu Chan <chentao@kylinos.cn>
Date: Fri, 3 Nov 2023 11:09:22 +0000
Subject: [PATCH 0137/2340] drm/i915: Fix potential spectre vulnerability

Fix smatch warning:
drivers/gpu/drm/i915/gem/i915_gem_context.c:847 set_proto_ctx_sseu()
warn: potential spectre issue 'pc->user_engines' [r] (local cap)

Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)")
Cc: <stable@vger.kernel.org> # v5.15+
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103110922.430122-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9a9ff84c90d7e..e38f06a6e56eb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -844,6 +844,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
 		if (idx >= pc->num_user_engines)
 			return -EINVAL;
 
+		idx = array_index_nospec(idx, pc->num_user_engines);
 		pe = &pc->user_engines[idx];
 
 		/* Only render engine supports RPCS configuration. */
-- 
GitLab


From 15c28f0fc800a93801d56f164f1c4124b068ee58 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 09:32:47 +0000
Subject: [PATCH 0138/2340] drm/i915: Remove unused for_each_uabi_class_engine

Unused macro after 99919be74aa3 ("drm/i915/gem: Zap the i915_gem_object_blt code")
removed some code.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102093248.362659-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8e81974607f27..4352cdd13c447 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -403,11 +403,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915)
 	     (engine__); \
 	     (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node)))
 
-#define for_each_uabi_class_engine(engine__, class__, i915__) \
-	for ((engine__) = intel_engine_lookup_user((i915__), (class__), 0); \
-	     (engine__) && (engine__)->uabi_class == (class__); \
-	     (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node)))
-
 #define INTEL_INFO(i915)	((i915)->__info)
 #define RUNTIME_INFO(i915)	(&(i915)->__runtime)
 #define DRIVER_CAPS(i915)	(&(i915)->caps)
-- 
GitLab


From 22d54ab6596ce4693c8d8b38371136067310a603 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 2 Nov 2023 09:32:48 +0000
Subject: [PATCH 0139/2340] drm/i915: Move for_each_engine* out of i915_drv.h

Iterators operate on struct intel_gt so lets move it to intel_gt.h in
order to make i915_drv.h less of a dumping ground for stuff.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102093248.362659-2-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.h          |  1 +
 drivers/gpu/drm/i915/gt/intel_gt.h                 | 14 ++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.h                    | 14 --------------
 drivers/gpu/drm/i915/selftests/intel_uncore.c      |  2 ++
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index d68675925b79e..1d97c435a0152 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -10,6 +10,7 @@
 #include "i915_request.h"
 #include "intel_engine_types.h"
 #include "intel_wakeref.h"
+#include "intel_gt.h"
 #include "intel_gt_pm.h"
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 2cac499d5aa3e..df0f656c03626 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -167,6 +167,20 @@ void intel_gt_release_all(struct drm_i915_private *i915);
 	     (id__)++) \
 		for_each_if(((gt__) = (i915__)->gt[(id__)]))
 
+/* Simple iterator over all initialised engines */
+#define for_each_engine(engine__, gt__, id__) \
+	for ((id__) = 0; \
+	     (id__) < I915_NUM_ENGINES; \
+	     (id__)++) \
+		for_each_if ((engine__) = (gt__)->engine[(id__)])
+
+/* Iterator over subset of engines selected by mask */
+#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
+	for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \
+	     (tmp__) ? \
+	     ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
+	     0;)
+
 void intel_gt_info_print(const struct intel_gt_info *info,
 			 struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
index 8f9b874fdc9c8..3aa1d014c14df 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
@@ -6,8 +6,8 @@
 
 #include <drm/drm_print.h>
 
-#include "i915_drv.h" /* for_each_engine! */
 #include "intel_engine.h"
+#include "intel_gt.h"
 #include "intel_gt_debugfs.h"
 #include "intel_gt_engines_debugfs.h"
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4352cdd13c447..739f43af9b3a7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -381,20 +381,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915)
 	return i915->gt[0];
 }
 
-/* Simple iterator over all initialised engines */
-#define for_each_engine(engine__, gt__, id__) \
-	for ((id__) = 0; \
-	     (id__) < I915_NUM_ENGINES; \
-	     (id__)++) \
-		for_each_if ((engine__) = (gt__)->engine[(id__)])
-
-/* Iterator over subset of engines selected by mask */
-#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
-	for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \
-	     (tmp__) ? \
-	     ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
-	     0;)
-
 #define rb_to_uabi_engine(rb) \
 	rb_entry_safe(rb, struct intel_engine_cs, uabi_node)
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c
index 03ea75cd84dd5..4f98aa8a861ea 100644
--- a/drivers/gpu/drm/i915/selftests/intel_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c
@@ -24,6 +24,8 @@
 
 #include "../i915_selftest.h"
 
+#include "gt/intel_gt.h"
+
 static int intel_fw_table_check(const struct intel_forcewake_range *ranges,
 				unsigned int num_ranges,
 				bool is_watertight)
-- 
GitLab


From 70a3cbbe620ee66afb0c066624196077767e61b2 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Thu, 26 Oct 2023 14:56:36 +0200
Subject: [PATCH 0140/2340] drm/i915/tc: Fix -Wformat-truncation in
 intel_tc_port_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix below compiler warning:

intel_tc.c:1879:11: error: ‘%d’ directive output may be truncated
writing between 1 and 11 bytes into a region of size 3
[-Werror=format-truncation=]
"%c/TC#%d", port_name(port), tc_port + 1);
           ^~
intel_tc.c:1878:2: note: ‘snprintf’ output between 7 and 17 bytes
into a destination of size 8
  snprintf(tc->port_name, sizeof(tc->port_name),
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    "%c/TC#%d", port_name(port), tc_port + 1);
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

v2: use kasprintf(Imre)
v3: use const for port_name, and fix tc mem leak(Imre)

Fixes: 3eafcddf766b ("drm/i915/tc: Move TC port fields to a new intel_tc_port struct")
Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231026125636.5080-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/display/intel_tc.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 37b0f8529b4f9..f64d348a969ef 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -58,7 +58,7 @@ struct intel_tc_port {
 	struct delayed_work link_reset_work;
 	int link_refcount;
 	bool legacy_port:1;
-	char port_name[8];
+	const char *port_name;
 	enum tc_port_mode mode;
 	enum tc_port_mode init_mode;
 	enum phy_fia phy_fia;
@@ -1875,8 +1875,12 @@ int intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy)
 	else
 		tc->phy_ops = &icl_tc_phy_ops;
 
-	snprintf(tc->port_name, sizeof(tc->port_name),
-		 "%c/TC#%d", port_name(port), tc_port + 1);
+	tc->port_name = kasprintf(GFP_KERNEL, "%c/TC#%d", port_name(port),
+				  tc_port + 1);
+	if (!tc->port_name) {
+		kfree(tc);
+		return -ENOMEM;
+	}
 
 	mutex_init(&tc->lock);
 	/* TODO: Combine the two works */
@@ -1897,6 +1901,7 @@ void intel_tc_port_cleanup(struct intel_digital_port *dig_port)
 {
 	intel_tc_port_suspend(dig_port);
 
+	kfree(dig_port->tc->port_name);
 	kfree(dig_port->tc);
 	dig_port->tc = NULL;
 }
-- 
GitLab


From 5faf6e1853d30d113ebc9977e015d0152e5e1970 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Wed, 27 Sep 2023 14:38:37 +0100
Subject: [PATCH 0141/2340] drm: Do not round to megabytes for greater than
 1MiB sizes in fdinfo stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is better not to lose precision and not revert to 1 MiB size
granularity for every size greater than 1 MiB.

Sizes in KiB should not be so troublesome to read (and in fact machine
parsing is I expect the norm here), they align with other api like
/proc/meminfo, and they allow writing tests for the interface without
having to embed drm.ko implementation knowledge into them. (Like knowing
that minimum buffer size one can use for successful verification has to be
1MiB aligned, and on top account for any pre-existing memory utilisation
outside of driver's control.)

But probably even more importantly I think that it is just better to show
the accurate sizes and not arbitrary lose precision for a little bit of a
stretched use case of eyeballing fdinfo text directly.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Adrián Larumbe <adrian.larumbe@collabora.com>
Cc: steven.price@arm.com
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://lore.kernel.org/r/20230927133843.247957-2-tvrtko.ursulin@linux.intel.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/drm_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 446458aca8e99..5ddaffd325865 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -913,7 +913,7 @@ static void print_size(struct drm_printer *p, const char *stat,
 	unsigned u;
 
 	for (u = 0; u < ARRAY_SIZE(units) - 1; u++) {
-		if (sz < SZ_1K)
+		if (sz == 0 || !IS_ALIGNED(sz, SZ_1K))
 			break;
 		sz = div_u64(sz, SZ_1K);
 	}
-- 
GitLab


From ab67821fa9e01ff35790b8bbf256c1b65c3f628f Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 2 Nov 2023 21:44:34 +0200
Subject: [PATCH 0142/2340] drm/i915/dp_mst: Disable DSC on ICL MST outputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enabling DSC on ICL MST outputs is broken leading to FIFO pipe /
transcoder underruns and blank screen. On TGL+ platforms MST/DSC works -
after fixing the known issues in [1] - however to make this work on ICL
requires more work.

So far DSC on MST probably didn't get enabled for users - due to an issue
fixed by [2] - but after fixing that, DSC could get enabled, leading to a
blank screen in ICL/MST configurations which do work atm. To prevent
this disable MST/DSC on ICL for now.

[1] https://lore.kernel.org/all/20231030155843.2251023-1-imre.deak@intel.com
[2] https://lore.kernel.org/all/20231030155843.2251023-31-imre.deak@intel.com

v2 (Ville):
- Use DISPLAY_VER >= 12 instead of > 11.
- Explain the ICL DSC issue in code comment.

Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102194434.2634786-1-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index a32498c7826a9..de608c8ee7b96 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -43,6 +43,7 @@
 #include "intel_dpio_phy.h"
 #include "intel_hdcp.h"
 #include "intel_hotplug.h"
+#include "intel_vdsc.h"
 #include "skl_scaler.h"
 
 static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp,
@@ -297,6 +298,18 @@ static int intel_dp_mst_update_slots(struct intel_encoder *encoder,
 	return 0;
 }
 
+static bool
+intel_dp_mst_dsc_source_support(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
+
+	/*
+	 * FIXME: Enabling DSC on ICL results in blank screen and FIFO pipe /
+	 * transcoder underruns, re-enable DSC after fixing this issue.
+	 */
+	return DISPLAY_VER(i915) >= 12 && intel_dsc_source_support(crtc_state);
+}
+
 static bool
 intel_dp_mst_compute_config_limits(struct intel_dp *intel_dp,
 				   struct intel_crtc_state *crtc_state,
@@ -375,6 +388,9 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 			    str_yes_no(ret),
 			    str_yes_no(intel_dp->force_dsc_en));
 
+		if (!intel_dp_mst_dsc_source_support(pipe_config))
+			return -EINVAL;
+
 		if (!intel_dp_mst_compute_config_limits(intel_dp,
 							pipe_config,
 							true,
-- 
GitLab


From 09a93cc4f7d1893777f6b788bffe60d64e4d5df7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Tue, 5 Sep 2023 18:06:34 -0300
Subject: [PATCH 0143/2340] drm/v3d: Implement show_fdinfo() callback for GPU
 usage stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch exposes the accumulated amount of active time per client
through the fdinfo infrastructure. The amount of active time is exposed
for each V3D queue: BIN, RENDER, CSD, TFU and CACHE_CLEAN.

In order to calculate the amount of active time per client, a CPU clock
is used through the function local_clock(). The point where the jobs has
started is marked and is finally compared with the time that the job had
finished.

Moreover, the number of jobs submitted to each queue is also exposed on
fdinfo through the identifier "v3d-jobs-<queue>".

Co-developed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Acked-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230905213416.1290219-3-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.c   | 36 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/v3d/v3d_drv.h   | 23 +++++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_gem.c   |  1 +
 drivers/gpu/drm/v3d/v3d_irq.c   | 21 +++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_sched.c | 20 ++++++++++++++++++
 5 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 1ab46bdf8ad71..82984b4df2a2c 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/sched/clock.h>
 #include <linux/reset.h>
 
 #include <drm/drm_drv.h>
@@ -111,6 +112,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 	v3d_priv->v3d = v3d;
 
 	for (i = 0; i < V3D_MAX_QUEUES; i++) {
+		v3d_priv->enabled_ns[i] = 0;
+		v3d_priv->start_ns[i] = 0;
+		v3d_priv->jobs_sent[i] = 0;
+
 		sched = &v3d->queue[i].sched;
 		drm_sched_entity_init(&v3d_priv->sched_entity[i],
 				      DRM_SCHED_PRIORITY_NORMAL, &sched,
@@ -136,7 +141,35 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
 	kfree(v3d_priv);
 }
 
-DEFINE_DRM_GEM_FOPS(v3d_drm_fops);
+static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct v3d_file_priv *file_priv = file->driver_priv;
+	u64 timestamp = local_clock();
+	enum v3d_queue queue;
+
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+		/* Note that, in case of a GPU reset, the time spent during an
+		 * attempt of executing the job is not computed in the runtime.
+		 */
+		drm_printf(p, "drm-engine-%s: \t%llu ns\n",
+			   v3d_queue_to_string(queue),
+			   file_priv->start_ns[queue] ? file_priv->enabled_ns[queue]
+						      + timestamp - file_priv->start_ns[queue]
+						      : file_priv->enabled_ns[queue]);
+
+		/* Note that we only count jobs that completed. Therefore, jobs
+		 * that were resubmitted due to a GPU reset are not computed.
+		 */
+		drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n",
+			   v3d_queue_to_string(queue), file_priv->jobs_sent[queue]);
+	}
+}
+
+static const struct file_operations v3d_drm_fops = {
+	.owner = THIS_MODULE,
+	DRM_GEM_FOPS,
+	.show_fdinfo = drm_show_fdinfo,
+};
 
 /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
  * protection between clients.  Note that render nodes would be
@@ -176,6 +209,7 @@ static const struct drm_driver v3d_drm_driver = {
 	.ioctls = v3d_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
 	.fops = &v3d_drm_fops,
+	.show_fdinfo = v3d_show_fdinfo,
 
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 106454f28956b..8f9d93239a174 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -21,6 +21,18 @@ struct reset_control;
 
 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
+static inline char *v3d_queue_to_string(enum v3d_queue queue)
+{
+	switch (queue) {
+	case V3D_BIN: return "bin";
+	case V3D_RENDER: return "render";
+	case V3D_TFU: return "tfu";
+	case V3D_CSD: return "csd";
+	case V3D_CACHE_CLEAN: return "cache_clean";
+	}
+	return "UNKNOWN";
+}
+
 struct v3d_queue_state {
 	struct drm_gpu_scheduler sched;
 
@@ -167,6 +179,12 @@ struct v3d_file_priv {
 	} perfmon;
 
 	struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
+
+	u64 start_ns[V3D_MAX_QUEUES];
+
+	u64 enabled_ns[V3D_MAX_QUEUES];
+
+	u64 jobs_sent[V3D_MAX_QUEUES];
 };
 
 struct v3d_bo {
@@ -238,6 +256,11 @@ struct v3d_job {
 	 */
 	struct v3d_perfmon *perfmon;
 
+	/* File descriptor of the process that submitted the job that could be used
+	 * for collecting stats by process of GPU usage.
+	 */
+	struct drm_file *file;
+
 	/* Callback for the freeing of the job on refcount going to 0. */
 	void (*free)(struct kref *ref);
 };
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5688d46d30e69..a33e90f29bd52 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	job = *container;
 	job->v3d = v3d;
 	job->free = free;
+	job->file = file_priv;
 
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 				 v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 9705e0cb576d8..ba390b782103a 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/sched/clock.h>
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
@@ -101,6 +102,11 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->bin_job->base.irq_fence);
+		struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
+
+		file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
+		file->jobs_sent[V3D_BIN]++;
+		file->start_ns[V3D_BIN] = 0;
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -110,6 +116,11 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->render_job->base.irq_fence);
+		struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
+
+		file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
+		file->jobs_sent[V3D_RENDER]++;
+		file->start_ns[V3D_RENDER] = 0;
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -119,6 +130,11 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_CSDDONE(v3d->ver)) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
+		struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
+
+		file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
+		file->jobs_sent[V3D_CSD]++;
+		file->start_ns[V3D_CSD] = 0;
 
 		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -155,6 +171,11 @@ v3d_hub_irq(int irq, void *arg)
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->tfu_job->base.irq_fence);
+		struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
+
+		file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
+		file->jobs_sent[V3D_TFU]++;
+		file->start_ns[V3D_TFU] = 0;
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index f59b63e51b14b..19afceacda837 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -18,6 +18,7 @@
  * semaphores to interlock between them.
  */
 
+#include <linux/sched/clock.h>
 #include <linux/kthread.h>
 
 #include "v3d_drv.h"
@@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_bin_job *job = to_bin_job(sched_job);
 	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_file_priv *file = job->base.file->driver_priv;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 	unsigned long irqflags;
@@ -107,6 +109,8 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	file->start_ns[V3D_BIN] = local_clock();
+
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* Set the current and end address of the control list.
@@ -131,6 +135,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_render_job *job = to_render_job(sched_job);
 	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_file_priv *file = job->base.file->driver_priv;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 
@@ -158,6 +163,8 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	file->start_ns[V3D_RENDER] = local_clock();
+
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* XXX: Set the QCFG */
@@ -176,6 +183,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_tfu_job *job = to_tfu_job(sched_job);
 	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_file_priv *file = job->base.file->driver_priv;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 
@@ -190,6 +198,8 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
+	file->start_ns[V3D_TFU] = local_clock();
+
 	V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
 	V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
 	V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica);
@@ -215,6 +225,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_csd_job *job = to_csd_job(sched_job);
 	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_file_priv *file = job->base.file->driver_priv;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 	int i, csd_cfg0_reg, csd_cfg_reg_count;
@@ -233,6 +244,8 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 
+	file->start_ns[V3D_CSD] = local_clock();
+
 	v3d_switch_perfmon(v3d, &job->base);
 
 	csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver);
@@ -250,9 +263,16 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_dev *v3d = job->v3d;
+	struct v3d_file_priv *file = job->file->driver_priv;
+
+	file->start_ns[V3D_CACHE_CLEAN] = local_clock();
 
 	v3d_clean_caches(v3d);
 
+	file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN];
+	file->jobs_sent[V3D_CACHE_CLEAN]++;
+	file->start_ns[V3D_CACHE_CLEAN] = 0;
+
 	return NULL;
 }
 
-- 
GitLab


From 509433d8146c64ca9e0bcc370ec910821fffe80c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Tue, 5 Sep 2023 18:06:35 -0300
Subject: [PATCH 0144/2340] drm/v3d: Expose the total GPU usage stats on sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous patch exposed the accumulated amount of active time per
client for each V3D queue. But this doesn't provide a global notion of
the GPU usage.

Therefore, provide the accumulated amount of active time for each V3D
queue (BIN, RENDER, CSD, TFU and CACHE_CLEAN), considering all the jobs
submitted to the queue, independent of the client.

This data is exposed through the sysfs interface, so that if the
interface is queried at two different points of time the usage percentage
of each of the queues can be calculated.

Co-developed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Acked-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230905213416.1290219-3-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/Makefile    |  3 +-
 drivers/gpu/drm/v3d/v3d_drv.c   |  9 +++++
 drivers/gpu/drm/v3d/v3d_drv.h   |  8 ++++
 drivers/gpu/drm/v3d/v3d_gem.c   |  6 ++-
 drivers/gpu/drm/v3d/v3d_irq.c   | 28 +++++++++++++
 drivers/gpu/drm/v3d/v3d_sched.c | 15 ++++++-
 drivers/gpu/drm/v3d/v3d_sysfs.c | 69 +++++++++++++++++++++++++++++++++
 7 files changed, 135 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/v3d/v3d_sysfs.c

diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
index e8b3141370207..4b21b20e49981 100644
--- a/drivers/gpu/drm/v3d/Makefile
+++ b/drivers/gpu/drm/v3d/Makefile
@@ -11,7 +11,8 @@ v3d-y := \
 	v3d_mmu.o \
 	v3d_perfmon.o \
 	v3d_trace_points.o \
-	v3d_sched.o
+	v3d_sched.o \
+	v3d_sysfs.o
 
 v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
 
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 82984b4df2a2c..44a1ca57d6a44 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -316,8 +316,14 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
 	if (ret)
 		goto irq_disable;
 
+	ret = v3d_sysfs_init(dev);
+	if (ret)
+		goto drm_unregister;
+
 	return 0;
 
+drm_unregister:
+	drm_dev_unregister(drm);
 irq_disable:
 	v3d_irq_disable(v3d);
 gem_destroy:
@@ -331,6 +337,9 @@ static void v3d_platform_drm_remove(struct platform_device *pdev)
 {
 	struct drm_device *drm = platform_get_drvdata(pdev);
 	struct v3d_dev *v3d = to_v3d_dev(drm);
+	struct device *dev = &pdev->dev;
+
+	v3d_sysfs_destroy(dev);
 
 	drm_dev_unregister(drm);
 
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 8f9d93239a174..4c59fefaa0b4b 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -38,6 +38,10 @@ struct v3d_queue_state {
 
 	u64 fence_context;
 	u64 emit_seqno;
+
+	u64 start_ns;
+	u64 enabled_ns;
+	u64 jobs_sent;
 };
 
 /* Performance monitor object. The perform lifetime is controlled by userspace
@@ -441,3 +445,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv);
+
+/* v3d_sysfs.c */
+int v3d_sysfs_init(struct device *dev);
+void v3d_sysfs_destroy(struct device *dev);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index a33e90f29bd52..712675134c048 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -1014,8 +1014,12 @@ v3d_gem_init(struct drm_device *dev)
 	u32 pt_size = 4096 * 1024;
 	int ret, i;
 
-	for (i = 0; i < V3D_MAX_QUEUES; i++)
+	for (i = 0; i < V3D_MAX_QUEUES; i++) {
 		v3d->queue[i].fence_context = dma_fence_context_alloc(1);
+		v3d->queue[i].start_ns = 0;
+		v3d->queue[i].enabled_ns = 0;
+		v3d->queue[i].jobs_sent = 0;
+	}
 
 	spin_lock_init(&v3d->mm_lock);
 	spin_lock_init(&v3d->job_lock);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index ba390b782103a..afc76390a197a 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -103,10 +103,17 @@ v3d_irq(int irq, void *arg)
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->bin_job->base.irq_fence);
 		struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
+		u64 runtime = local_clock() - file->start_ns[V3D_BIN];
 
 		file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
 		file->jobs_sent[V3D_BIN]++;
+		v3d->queue[V3D_BIN].jobs_sent++;
+
 		file->start_ns[V3D_BIN] = 0;
+		v3d->queue[V3D_BIN].start_ns = 0;
+
+		file->enabled_ns[V3D_BIN] += runtime;
+		v3d->queue[V3D_BIN].enabled_ns += runtime;
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -117,10 +124,17 @@ v3d_irq(int irq, void *arg)
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->render_job->base.irq_fence);
 		struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
+		u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
 
 		file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
 		file->jobs_sent[V3D_RENDER]++;
+		v3d->queue[V3D_RENDER].jobs_sent++;
+
 		file->start_ns[V3D_RENDER] = 0;
+		v3d->queue[V3D_RENDER].start_ns = 0;
+
+		file->enabled_ns[V3D_RENDER] += runtime;
+		v3d->queue[V3D_RENDER].enabled_ns += runtime;
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -131,10 +145,17 @@ v3d_irq(int irq, void *arg)
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
 		struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
+		u64 runtime = local_clock() - file->start_ns[V3D_CSD];
 
 		file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
 		file->jobs_sent[V3D_CSD]++;
+		v3d->queue[V3D_CSD].jobs_sent++;
+
 		file->start_ns[V3D_CSD] = 0;
+		v3d->queue[V3D_CSD].start_ns = 0;
+
+		file->enabled_ns[V3D_CSD] += runtime;
+		v3d->queue[V3D_CSD].enabled_ns += runtime;
 
 		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -172,10 +193,17 @@ v3d_hub_irq(int irq, void *arg)
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->tfu_job->base.irq_fence);
 		struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
+		u64 runtime = local_clock() - file->start_ns[V3D_TFU];
 
 		file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
 		file->jobs_sent[V3D_TFU]++;
+		v3d->queue[V3D_TFU].jobs_sent++;
+
 		file->start_ns[V3D_TFU] = 0;
+		v3d->queue[V3D_TFU].start_ns = 0;
+
+		file->enabled_ns[V3D_TFU] += runtime;
+		v3d->queue[V3D_TFU].enabled_ns += runtime;
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 19afceacda837..fccbea2a5f2eb 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -110,6 +110,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 			    job->start, job->end);
 
 	file->start_ns[V3D_BIN] = local_clock();
+	v3d->queue[V3D_BIN].start_ns = file->start_ns[V3D_BIN];
 
 	v3d_switch_perfmon(v3d, &job->base);
 
@@ -164,6 +165,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 			    job->start, job->end);
 
 	file->start_ns[V3D_RENDER] = local_clock();
+	v3d->queue[V3D_RENDER].start_ns = file->start_ns[V3D_RENDER];
 
 	v3d_switch_perfmon(v3d, &job->base);
 
@@ -199,6 +201,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
 	file->start_ns[V3D_TFU] = local_clock();
+	v3d->queue[V3D_TFU].start_ns = file->start_ns[V3D_TFU];
 
 	V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
 	V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
@@ -245,6 +248,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 
 	file->start_ns[V3D_CSD] = local_clock();
+	v3d->queue[V3D_CSD].start_ns = file->start_ns[V3D_CSD];
 
 	v3d_switch_perfmon(v3d, &job->base);
 
@@ -264,14 +268,23 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_dev *v3d = job->v3d;
 	struct v3d_file_priv *file = job->file->driver_priv;
+	u64 runtime;
 
 	file->start_ns[V3D_CACHE_CLEAN] = local_clock();
+	v3d->queue[V3D_CACHE_CLEAN].start_ns = file->start_ns[V3D_CACHE_CLEAN];
 
 	v3d_clean_caches(v3d);
 
-	file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN];
+	runtime = local_clock() - file->start_ns[V3D_CACHE_CLEAN];
+
+	file->enabled_ns[V3D_CACHE_CLEAN] += runtime;
+	v3d->queue[V3D_CACHE_CLEAN].enabled_ns += runtime;
+
 	file->jobs_sent[V3D_CACHE_CLEAN]++;
+	v3d->queue[V3D_CACHE_CLEAN].jobs_sent++;
+
 	file->start_ns[V3D_CACHE_CLEAN] = 0;
+	v3d->queue[V3D_CACHE_CLEAN].start_ns = 0;
 
 	return NULL;
 }
diff --git a/drivers/gpu/drm/v3d/v3d_sysfs.c b/drivers/gpu/drm/v3d/v3d_sysfs.c
new file mode 100644
index 0000000000000..d106845ba890c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_sysfs.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Igalia S.L.
+ */
+
+#include <linux/sched/clock.h>
+#include <linux/sysfs.h>
+
+#include "v3d_drv.h"
+
+static ssize_t
+gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct v3d_dev *v3d = to_v3d_dev(drm);
+	enum v3d_queue queue;
+	u64 timestamp = local_clock();
+	u64 active_runtime;
+	ssize_t len = 0;
+
+	len += sysfs_emit(buf, "queue\ttimestamp\tjobs\truntime\n");
+
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+		if (v3d->queue[queue].start_ns)
+			active_runtime = timestamp - v3d->queue[queue].start_ns;
+		else
+			active_runtime = 0;
+
+		/* Each line will display the queue name, timestamp, the number
+		 * of jobs sent to that queue and the runtime, as can be seem here:
+		 *
+		 * queue	timestamp	jobs	runtime
+		 * bin		239043069420	22620	17438164056
+		 * render	239043069420	22619	27284814161
+		 * tfu		239043069420	8763	394592566
+		 * csd		239043069420	3168	10787905530
+		 * cache_clean	239043069420	6127	237375940
+		 */
+		len += sysfs_emit_at(buf, len, "%s\t%llu\t%llu\t%llu\n",
+				     v3d_queue_to_string(queue),
+				     timestamp,
+				     v3d->queue[queue].jobs_sent,
+				     v3d->queue[queue].enabled_ns + active_runtime);
+	}
+
+	return len;
+}
+static DEVICE_ATTR_RO(gpu_stats);
+
+static struct attribute *v3d_sysfs_entries[] = {
+	&dev_attr_gpu_stats.attr,
+	NULL,
+};
+
+static struct attribute_group v3d_sysfs_attr_group = {
+	.attrs = v3d_sysfs_entries,
+};
+
+int
+v3d_sysfs_init(struct device *dev)
+{
+	return sysfs_create_group(&dev->kobj, &v3d_sysfs_attr_group);
+}
+
+void
+v3d_sysfs_destroy(struct device *dev)
+{
+	return sysfs_remove_group(&dev->kobj, &v3d_sysfs_attr_group);
+}
-- 
GitLab


From 27d9620e9a9a6bc27a646b464b85860d91e21af3 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Mon, 23 Oct 2023 11:05:56 +0200
Subject: [PATCH 0145/2340] drm/panel: nt35510: fix typo

Replace 'HFP' with 'HBP'.

Fixes: 899f24ed8d3a ("drm/panel: Add driver for Novatek NT35510-based panels")
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023090613.1694133-1-dario.binacchi@amarulasolutions.com
---
 drivers/gpu/drm/panel/panel-novatek-nt35510.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35510.c b/drivers/gpu/drm/panel/panel-novatek-nt35510.c
index d6dceb8580081..83a9cf53d2690 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt35510.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt35510.c
@@ -1023,7 +1023,7 @@ static const struct nt35510_config nt35510_hydis_hva40wv1 = {
 		.hdisplay = 480,
 		.hsync_start = 480 + 2, /* HFP = 2 */
 		.hsync_end = 480 + 2 + 0, /* HSync = 0 */
-		.htotal = 480 + 2 + 0 + 5, /* HFP = 5 */
+		.htotal = 480 + 2 + 0 + 5, /* HBP = 5 */
 		.vdisplay = 800,
 		.vsync_start = 800 + 2, /* VFP = 2 */
 		.vsync_end = 800 + 2 + 0, /* VSync = 0 */
-- 
GitLab


From 34df0a031d8f3488fe72627b041a1f82437fa6ec Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Thu, 2 Nov 2023 10:58:31 -0700
Subject: [PATCH 0146/2340] drm/i915/gt: Temporarily disable CPU caching into
 DMA for MTL

FIXME: It is suspected that some Address Translation Service (ATS)
issue on IOMMU is causing CAT errors to occur on some MTL workloads.
Applying a write barrier to the ppgtt set entry functions appeared
to have no effect, so we must temporarily use I915_MAP_WC in the
map_pt_dma class of functions on MTL until a proper ATS solution is
found.

Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
CC: Chris Wilson <chris.p.wilson@linux.intel.com>
Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Acked-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102175831.872763-1-jonathan.cavitt@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4fbed27ef0ecc..21719563a602a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -95,6 +95,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
 	void *vaddr;
 
 	type = intel_gt_coherent_map_type(vm->gt, obj, true);
+	/*
+	 * FIXME: It is suspected that some Address Translation Service (ATS)
+	 * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+	 * Applying a write barrier to the ppgtt set entry functions appeared
+	 * to have no effect, so we must temporarily use I915_MAP_WC here on
+	 * MTL until a proper ATS solution is found.
+	 */
+	if (IS_METEORLAKE(vm->i915))
+		type = I915_MAP_WC;
+
 	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
@@ -109,6 +119,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
 	void *vaddr;
 
 	type = intel_gt_coherent_map_type(vm->gt, obj, true);
+	/*
+	 * FIXME: It is suspected that some Address Translation Service (ATS)
+	 * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+	 * Applying a write barrier to the ppgtt set entry functions appeared
+	 * to have no effect, so we must temporarily use I915_MAP_WC here on
+	 * MTL until a proper ATS solution is found.
+	 */
+	if (IS_METEORLAKE(vm->i915))
+		type = I915_MAP_WC;
+
 	vaddr = i915_gem_object_pin_map(obj, type);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
-- 
GitLab


From 2b981d57e480e024cde2a0ecb6edee28a8ec39d6 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Mon, 6 Nov 2023 13:42:28 +0200
Subject: [PATCH 0147/2340] drm/i915/display: Support PSR entry VSC packet to
 be transmitted one frame earlier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Display driver shall read DPCD 00071h[3:1] during configuration
to get PSR setup time. This register provides the setup time
requirement on the VSC SDP entry packet. If setup time cannot be
met with the current timings
(e.g., PSR setup time + other blanking requirements > blanking time),
driver should enable sending VSC SDP one frame earlier before sending
the capture frame.

BSpec: 69895 (PSR Entry Setup Frames 17:16)

v2: Write frames before su entry to correct register (Ville, Jouni)
    Move frames before su entry calculation to it's
    own function (Ville, Jouni)
    Rename PSR Entry Setup Frames register to indicate
    Lunarlake specificity (Jouni)
v3: Modify setup entry frames calculation function to
    return the actual frames (Ville)
    Match comment with actual implementation (Jouni)
v4: Drop "set" from function naming (Jouni, Ville)
    Use i915 instead of dev_priv (Jouni)

Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231106114228.146574-1-mika.kahola@intel.com
---
 .../drm/i915/display/intel_display_types.h    |  1 +
 drivers/gpu/drm/i915/display/intel_psr.c      | 81 +++++++++++++++----
 drivers/gpu/drm/i915/display/intel_psr_regs.h |  2 +
 3 files changed, 70 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 047fe3f8905ac..92f06d67fd1e7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1708,6 +1708,7 @@ struct intel_psr {
 	u32 dc3co_exitline;
 	u32 dc3co_exit_delay;
 	struct delayed_work dc3co_work;
+	u8 entry_setup_frames;
 };
 
 struct intel_dp {
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 920f773361630..3691f882e1c0a 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -593,6 +593,9 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp)
 	if (intel_dp->psr.req_psr2_sdp_prior_scanline)
 		dpcd_val |= DP_PSR_SU_REGION_SCANLINE_CAPTURE;
 
+	if (intel_dp->psr.entry_setup_frames > 0)
+		dpcd_val |= DP_PSR_FRAME_CAPTURE;
+
 	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val);
 
 	drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0);
@@ -691,6 +694,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp)
 	if (DISPLAY_VER(dev_priv) >= 8)
 		val |= EDP_PSR_CRC_ENABLE;
 
+	if (DISPLAY_VER(dev_priv) >= 20)
+		val |= LNL_EDP_PSR_ENTRY_SETUP_FRAMES(intel_dp->psr.entry_setup_frames);
+
 	intel_de_rmw(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder),
 		     ~EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK, val);
 }
@@ -728,11 +734,27 @@ static int psr2_block_count(struct intel_dp *intel_dp)
 	return psr2_block_count_lines(intel_dp) / 4;
 }
 
+static u8 frames_before_su_entry(struct intel_dp *intel_dp)
+{
+	u8 frames_before_su_entry;
+
+	frames_before_su_entry = max_t(u8,
+				       intel_dp->psr.sink_sync_latency + 1,
+				       2);
+
+	/* Entry setup frames must be at least 1 less than frames before SU entry */
+	if (intel_dp->psr.entry_setup_frames >= frames_before_su_entry)
+		frames_before_su_entry = intel_dp->psr.entry_setup_frames + 1;
+
+	return frames_before_su_entry;
+}
+
 static void hsw_activate_psr2(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	enum transcoder cpu_transcoder = intel_dp->psr.transcoder;
 	u32 val = EDP_PSR2_ENABLE;
+	u32 psr_val = 0;
 
 	val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp));
 
@@ -742,7 +764,8 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12)
 		val |= EDP_Y_COORDINATE_ENABLE;
 
-	val |= EDP_PSR2_FRAME_BEFORE_SU(max_t(u8, intel_dp->psr.sink_sync_latency + 1, 2));
+	val |= EDP_PSR2_FRAME_BEFORE_SU(frames_before_su_entry(intel_dp));
+
 	val |= intel_psr2_get_tp_time(intel_dp);
 
 	if (DISPLAY_VER(dev_priv) >= 12) {
@@ -786,6 +809,9 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 	if (intel_dp->psr.req_psr2_sdp_prior_scanline)
 		val |= EDP_PSR2_SU_SDP_SCANLINE;
 
+	if (DISPLAY_VER(dev_priv) >= 20)
+		psr_val |= LNL_EDP_PSR_ENTRY_SETUP_FRAMES(intel_dp->psr.entry_setup_frames);
+
 	if (intel_dp->psr.psr2_sel_fetch_enabled) {
 		u32 tmp;
 
@@ -799,7 +825,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 	 * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is
 	 * recommending keep this bit unset while PSR2 is enabled.
 	 */
-	intel_de_write(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder), 0);
+	intel_de_write(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder), psr_val);
 
 	intel_de_write(dev_priv, EDP_PSR2_CTL(cpu_transcoder), val);
 }
@@ -1067,6 +1093,39 @@ static bool _compute_psr2_wake_times(struct intel_dp *intel_dp,
 	return true;
 }
 
+static u8 intel_psr_entry_setup_frames(struct intel_dp *intel_dp,
+				       const struct drm_display_mode *adjusted_mode)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	int psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd);
+	u8 entry_setup_frames = 0;
+
+	if (psr_setup_time < 0) {
+		drm_dbg_kms(&i915->drm,
+			    "PSR condition failed: Invalid PSR setup time (0x%02x)\n",
+			    intel_dp->psr_dpcd[1]);
+		return -ETIME;
+	}
+
+	if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) >
+	    adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) {
+		if (DISPLAY_VER(i915) >= 20) {
+			/* setup entry frames can be up to 3 frames */
+			entry_setup_frames = 1;
+			drm_dbg_kms(&i915->drm,
+				    "PSR setup entry frames %d\n",
+				    entry_setup_frames);
+		} else {
+			drm_dbg_kms(&i915->drm,
+				    "PSR condition failed: PSR setup time (%d us) too long\n",
+				    psr_setup_time);
+			return -ETIME;
+		}
+	}
+
+	return entry_setup_frames;
+}
+
 static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 				    struct intel_crtc_state *crtc_state)
 {
@@ -1214,7 +1273,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
-	int psr_setup_time;
+	u8 entry_setup_frames;
 
 	/*
 	 * Current PSR panels don't work reliably with VRR enabled
@@ -1243,19 +1302,13 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 		return;
 	}
 
-	psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd);
-	if (psr_setup_time < 0) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "PSR condition failed: Invalid PSR setup time (0x%02x)\n",
-			    intel_dp->psr_dpcd[1]);
-		return;
-	}
+	entry_setup_frames = intel_psr_entry_setup_frames(intel_dp, adjusted_mode);
 
-	if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) >
-	    adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) {
+	if (entry_setup_frames >= 0) {
+		intel_dp->psr.entry_setup_frames = entry_setup_frames;
+	} else {
 		drm_dbg_kms(&dev_priv->drm,
-			    "PSR condition failed: PSR setup time (%d us) too long\n",
-			    psr_setup_time);
+			    "PSR condition failed: PSR setup timing not met\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr_regs.h b/drivers/gpu/drm/i915/display/intel_psr_regs.h
index d39951383c929..efe4306b37e0c 100644
--- a/drivers/gpu/drm/i915/display/intel_psr_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_psr_regs.h
@@ -35,6 +35,8 @@
 #define   EDP_PSR_MIN_LINK_ENTRY_TIME_0_LINES	REG_FIELD_PREP(EDP_PSR_MIN_LINK_ENTRY_TIME_MASK, 3)
 #define   EDP_PSR_MAX_SLEEP_TIME_MASK		REG_GENMASK(24, 20)
 #define   EDP_PSR_MAX_SLEEP_TIME(x)		REG_FIELD_PREP(EDP_PSR_MAX_SLEEP_TIME_MASK, (x))
+#define   LNL_EDP_PSR_ENTRY_SETUP_FRAMES_MASK	REG_GENMASK(17, 16)
+#define   LNL_EDP_PSR_ENTRY_SETUP_FRAMES(x)	REG_FIELD_PREP(LNL_EDP_PSR_ENTRY_SETUP_FRAMES_MASK, (x))
 #define   EDP_PSR_SKIP_AUX_EXIT			REG_BIT(12)
 #define   EDP_PSR_TP_MASK			REG_BIT(11)
 #define   EDP_PSR_TP_TP1_TP2			REG_FIELD_PREP(EDP_PSR_TP_MASK, 0)
-- 
GitLab


From c015fb6d01adb616fb54824feb55ce5ab18e8ca1 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Mon, 6 Nov 2023 14:08:26 +0100
Subject: [PATCH 0148/2340] accel/ivpu: Fix compilation with CONFIG_PM=n

Use pm_runtime_status_suspended() instead of dev->power.runtime_status
field that is not available without PM.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://lore.kernel.org/all/20231106130827.1600948-1-jacek.lawrynowicz@linux.intel.com
---
 drivers/accel/ivpu/ivpu_ipc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 255f2b8b0b5e3..618dbc17df801 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -6,6 +6,7 @@
 #include <linux/genalloc.h>
 #include <linux/highmem.h>
 #include <linux/kthread.h>
+#include <linux/pm_runtime.h>
 #include <linux/wait.h>
 
 #include "ivpu_drv.h"
@@ -318,8 +319,7 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
 	struct vpu_jsm_msg hb_resp;
 	int ret, hb_ret;
 
-	drm_WARN_ON(&vdev->drm,
-		    vdev->drm.dev->power.runtime_status == RPM_SUSPENDED);
+	drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
 
 	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
 	if (ret != -ETIMEDOUT)
-- 
GitLab


From 3fef3e6ff86a405e51f4a7072109147b4b47caca Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 2 Nov 2023 17:52:22 +0200
Subject: [PATCH 0149/2340] drm/i915: move display mutex inits to display code

The core code has no business accessing i915->display directly. These
could be further spread to respective files, but this is a start.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102155223.2298316-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_driver.c | 6 ++++++
 drivers/gpu/drm/i915/i915_driver.c                  | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 44b59ac301e69..0a5b922f2ad6f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -181,6 +181,12 @@ void intel_display_driver_early_probe(struct drm_i915_private *i915)
 	if (!HAS_DISPLAY(i915))
 		return;
 
+	mutex_init(&i915->display.backlight.lock);
+	mutex_init(&i915->display.audio.mutex);
+	mutex_init(&i915->display.wm.wm_mutex);
+	mutex_init(&i915->display.pps.mutex);
+	mutex_init(&i915->display.hdcp.hdcp_mutex);
+
 	intel_display_irq_init(i915);
 	intel_dkl_phy_init(i915);
 	intel_color_init_hooks(i915);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 3d86db5638fc8..8dead878e51ab 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -230,16 +230,10 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->gpu_error.lock);
-	mutex_init(&dev_priv->display.backlight.lock);
 
 	mutex_init(&dev_priv->sb_lock);
 	cpu_latency_qos_add_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE);
 
-	mutex_init(&dev_priv->display.audio.mutex);
-	mutex_init(&dev_priv->display.wm.wm_mutex);
-	mutex_init(&dev_priv->display.pps.mutex);
-	mutex_init(&dev_priv->display.hdcp.hdcp_mutex);
-
 	i915_memcpy_init_early(dev_priv);
 	intel_runtime_pm_init_early(&dev_priv->runtime_pm);
 
-- 
GitLab


From 607a2c64e879580ef361af65d6052367057bee14 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 2 Nov 2023 17:52:23 +0200
Subject: [PATCH 0150/2340] drm/i915: move display spinlock init to display
 code

The gem code has no business accessing i915->display directly.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102155223.2298316-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_driver.c | 1 +
 drivers/gpu/drm/i915/i915_gem.c                     | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 0a5b922f2ad6f..62f7b10484bec 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -181,6 +181,7 @@ void intel_display_driver_early_probe(struct drm_i915_private *i915)
 	if (!HAS_DISPLAY(i915))
 		return;
 
+	spin_lock_init(&i915->display.fb_tracking.lock);
 	mutex_init(&i915->display.backlight.lock);
 	mutex_init(&i915->display.audio.mutex);
 	mutex_init(&i915->display.wm.wm_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 147d5b95b9ac2..c9caa86387898 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1301,8 +1301,6 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
 	i915_gem_init__mm(dev_priv);
 	i915_gem_init__contexts(dev_priv);
-
-	spin_lock_init(&dev_priv->display.fb_tracking.lock);
 }
 
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
-- 
GitLab


From 1d9e6bc97eabac150b775d91d9a656ba24e92014 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Mon, 6 Nov 2023 17:19:59 -0300
Subject: [PATCH 0151/2340] drm/i915/xelpmp: Add Wa_16021867713

This workaround applies to all steppings of Xe_LPM+. Implement the KMD
part.

v2:
    - Put the definition of VDBOX_CGCTL3F1C() in the correct sort order.
      (Matt)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231106201959.156943-1-gustavo.sousa@intel.com
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h |  2 ++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index c0c8c12edea10..a8eac59e37793 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -263,5 +263,7 @@
 #define VDBOX_CGCTL3F18(base)			_MMIO((base) + 0x3f18)
 #define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
 
+#define VDBOX_CGCTL3F1C(base)			_MMIO((base) + 0x3f1c)
+#define   MFXPIPE_CLKGATE_DIS			REG_BIT(3)
 
 #endif /* __INTEL_ENGINE_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 12859b8d2092a..63205edfea509 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1662,9 +1662,23 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	debug_dump_steering(gt);
 }
 
+static void
+wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+	struct intel_engine_cs *engine;
+	int id;
+
+	for_each_engine(engine, gt, id)
+		if (engine->class == VIDEO_DECODE_CLASS)
+			wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
+				    MFXPIPE_CLKGATE_DIS);
+}
+
 static void
 xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
+	wa_16021867713(gt, wal);
+
 	/*
 	 * Wa_14018778641
 	 * Wa_18018781329
-- 
GitLab


From bc8d6a9df99038f61adf2881ad9f717abe414e06 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Thu, 2 Nov 2023 18:17:20 -0400
Subject: [PATCH 0152/2340] drm/sched: Don't disturb the entity when in RR-mode
 scheduling

Don't call drm_sched_select_entity() in drm_sched_run_job_queue().  In fact,
rename __drm_sched_run_job_queue() to just drm_sched_run_job_queue(), and let
it do just that, schedule the work item for execution.

The problem is that drm_sched_run_job_queue() calls drm_sched_select_entity()
to determine if the scheduler has an entity ready in one of its run-queues,
and in the case of the Round-Robin (RR) scheduling, the function
drm_sched_rq_select_entity_rr() does just that, selects the _next_ entity
which is ready, sets up the run-queue and completion and returns that
entity. The FIFO scheduling algorithm is unaffected.

Now, since drm_sched_run_job_work() also calls drm_sched_select_entity(), then
in the case of RR scheduling, that would result in drm_sched_select_entity()
having been called twice, which may result in skipping a ready entity if more
than one entity is ready. This commit fixes this by eliminating the call to
drm_sched_select_entity() from drm_sched_run_job_queue(), and leaves it only
in drm_sched_run_job_work().

v2: Rebased on top of Tvrtko's renames series of patches. (Luben)
    Add fixes-tag. (Tvrtko)

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Fixes: f7fe64ad0f22ff ("drm/sched: Split free_job into own work item")
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107041020.10035-2-ltuikov89@gmail.com
---
 drivers/gpu/drm/scheduler/sched_main.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 27843e37d9b76..cd0dc3f81d05f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -256,10 +256,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 }
 
 /**
- * __drm_sched_run_job_queue - enqueue run-job work
+ * drm_sched_run_job_queue - enqueue run-job work
  * @sched: scheduler instance
  */
-static void __drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
+static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
 {
 	if (!READ_ONCE(sched->pause_submit))
 		queue_work(sched->submit_wq, &sched->work_run_job);
@@ -928,7 +928,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 {
 	if (drm_sched_can_queue(sched))
-		__drm_sched_run_job_queue(sched);
+		drm_sched_run_job_queue(sched);
 }
 
 /**
@@ -1040,16 +1040,6 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 }
 EXPORT_SYMBOL(drm_sched_pick_best);
 
-/**
- * drm_sched_run_job_queue - enqueue run-job work if there are ready entities
- * @sched: scheduler instance
- */
-static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
-{
-	if (drm_sched_select_entity(sched))
-		__drm_sched_run_job_queue(sched);
-}
-
 /**
  * drm_sched_free_job_work - worker to call free_job
  *
-- 
GitLab


From 89d04995f76ce8318fe752feb33b1a45893b1a38 Mon Sep 17 00:00:00 2001
From: Emma Anholt <emma@anholt.net>
Date: Tue, 31 Oct 2023 11:16:48 -0700
Subject: [PATCH 0153/2340] MAINTAINERS: Drop Emma Anholt from all M lines.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I am not active in the Linux kernel and don't want to see patches.

Signed-off-by: Emma Anholt <emma@anholt.net>
Acked-by: Maíra Canal <mcanal@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031181648.48675-1-emma@anholt.net
---
 MAINTAINERS | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f13e476ed8038..4e1a63479cad4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6405,8 +6405,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/sun4i/sun8i*
 
 DRM DRIVER FOR ARM PL111 CLCD
-M:	Emma Anholt <emma@anholt.net>
-S:	Supported
+S:	Orphan
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/pl111/
 
@@ -6521,8 +6520,7 @@ F:	Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
 F:	drivers/gpu/drm/panel/panel-himax-hx8394.c
 
 DRM DRIVER FOR HX8357D PANELS
-M:	Emma Anholt <emma@anholt.net>
-S:	Maintained
+S:	Orphan
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/himax,hx8357d.txt
 F:	drivers/gpu/drm/tiny/hx8357d.c
@@ -7106,7 +7104,6 @@ F:	Documentation/devicetree/bindings/display/ti/
 F:	drivers/gpu/drm/omapdrm/
 
 DRM DRIVERS FOR V3D
-M:	Emma Anholt <emma@anholt.net>
 M:	Melissa Wen <mwen@igalia.com>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -7115,7 +7112,6 @@ F:	drivers/gpu/drm/v3d/
 F:	include/uapi/drm/v3d_drm.h
 
 DRM DRIVERS FOR VC4
-M:	Emma Anholt <emma@anholt.net>
 M:	Maxime Ripard <mripard@kernel.org>
 S:	Supported
 T:	git git://github.com/anholt/linux
-- 
GitLab


From 560ea72c76eb6d0c59f77580414e64cc09f1093d Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 30 Oct 2023 01:13:08 +0200
Subject: [PATCH 0154/2340] drm/i915/dp_mst: Fix race between connector
 registration and setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After drm_connector_init() is called the connector is visible to the
rest of the kernel via the drm_mode_config::connector_list. Make
sure that the DSC AUX device and capabilities are setup by that time.

Another race condition is adding the connector to the connector list
before drm_connector_helper_add() sets the connector helper functions.
That's an unrelated issue, for which the fix is for a follow-up. One
solution would be adding the connector to the connector list only
during its registration in drm_connector_register().

Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Fixes: 808b43fa7e56 ("drm/i915/dp_mst: Set connector DSC capabilities and decompression AUX")
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-2-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index de608c8ee7b96..4625900416e9e 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1168,6 +1168,14 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 	intel_connector->port = port;
 	drm_dp_mst_get_port_malloc(port);
 
+	/*
+	 * TODO: set the AUX for the actual MST port decompressing the stream.
+	 * At the moment the driver only supports enabling this globally in the
+	 * first downstream MST branch, via intel_dp's (root port) AUX.
+	 */
+	intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
+	intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
+
 	connector = &intel_connector->base;
 	ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
 				 DRM_MODE_CONNECTOR_DisplayPort);
@@ -1179,14 +1187,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 
 	drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs);
 
-	/*
-	 * TODO: set the AUX for the actual MST port decompressing the stream.
-	 * At the moment the driver only supports enabling this globally in the
-	 * first downstream MST branch, via intel_dp's (root port) AUX.
-	 */
-	intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
-	intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
-
 	for_each_pipe(dev_priv, pipe) {
 		struct drm_encoder *enc =
 			&intel_dp->mst_encoders[pipe]->base.base;
-- 
GitLab


From 7707dd6022593f3edd8e182e7935870cf326f874 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 24 Oct 2023 04:08:57 +0300
Subject: [PATCH 0155/2340] drm/dp_mst: Fix fractional DSC bpp handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current code does '(bpp << 4) / 16' in the MST PBN
calculation, but that is just the same as 'bpp' so the
DSC codepath achieves absolutely nothing. Fix it up so that
the fractional part of the bpp value is actually used instead
of truncated away. 64*1006 has enough zero lsbs that we can
just shift that down in the dividend and thus still manage
to stick to a 32bit divisor.

And while touching this, let's just make the whole thing more
straightforward by making the passed in bpp value .4 binary
fixed point always, instead of having to pass in different
things based on whether DSC is enabled or not.

v2:
- Fix DSC kunit test cases.

Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: David Francis <David.Francis@amd.com>
Cc: Mikita Lipski <mikita.lipski@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Fixes: dc48529fb14e ("drm/dp_mst: Add PBN calculation for DSC modes")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
[Imre: Fix kunit test cases]
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-3-imre.deak@intel.com
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  2 +-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |  2 +-
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 20 +++++--------------
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  5 ++---
 drivers/gpu/drm/nouveau/dispnv50/disp.c       |  3 +--
 .../gpu/drm/tests/drm_dp_mst_helper_test.c    |  6 +++---
 include/drm/display/drm_dp_mst_helper.h       |  2 +-
 7 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c6fd34bab3585..df3c42bb92742 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6876,7 +6876,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 								    max_bpc);
 		bpp = convert_dc_color_depth_into_bpc(color_depth) * 3;
 		clock = adjusted_mode->clock;
-		dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false);
+		dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp << 4);
 	}
 
 	dm_new_connector_state->vcpi_slots =
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 57230661132bd..2afd1bc74978d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1636,7 +1636,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	} else {
 		/* check if mode could be supported within full_pbn */
 		bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
-		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
+		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp << 4);
 
 		if (pbn > aconnector->mst_output_port->full_pbn)
 			return DC_FAIL_BANDWIDTH_VALIDATE;
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index c490e8befc2fa..bc2c9dfe229fc 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -4718,13 +4718,12 @@ EXPORT_SYMBOL(drm_dp_check_act_status);
 
 /**
  * drm_dp_calc_pbn_mode() - Calculate the PBN for a mode.
- * @clock: dot clock for the mode
- * @bpp: bpp for the mode.
- * @dsc: DSC mode. If true, bpp has units of 1/16 of a bit per pixel
+ * @clock: dot clock
+ * @bpp: bpp as .4 binary fixed point
  *
  * This uses the formula in the spec to calculate the PBN value for a mode.
  */
-int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc)
+int drm_dp_calc_pbn_mode(int clock, int bpp)
 {
 	/*
 	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
@@ -4735,18 +4734,9 @@ int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc)
 	 * peak_kbps *= (1006/1000)
 	 * peak_kbps *= (64/54)
 	 * peak_kbps *= 8    convert to bytes
-	 *
-	 * If the bpp is in units of 1/16, further divide by 16. Put this
-	 * factor in the numerator rather than the denominator to avoid
-	 * integer overflow
 	 */
-
-	if (dsc)
-		return DIV_ROUND_UP_ULL(mul_u32_u32(clock * (bpp / 16), 64 * 1006),
-					8 * 54 * 1000 * 1000);
-
-	return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006),
-				8 * 54 * 1000 * 1000);
+	return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006 >> 4),
+				1000 * 8 * 54 * 1000);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 4625900416e9e..6f9c9f3b5b121 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -107,8 +107,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 			continue;
 
 		crtc_state->pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock,
-						       dsc ? bpp << 4 : bpp,
-						       dsc);
+						       bpp << 4);
 
 		slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr,
 						      connector->port,
@@ -982,7 +981,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 		return ret;
 
 	if (mode_rate > max_rate || mode->clock > max_dotclk ||
-	    drm_dp_calc_pbn_mode(mode->clock, min_bpp, false) > port->full_pbn) {
+	    drm_dp_calc_pbn_mode(mode->clock, min_bpp << 4) > port->full_pbn) {
 		*status = MODE_CLOCK_HIGH;
 		return 0;
 	}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 52f1569ee37c1..3d390f5029f65 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -982,8 +982,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
 		const int clock = crtc_state->adjusted_mode.clock;
 
 		asyh->or.bpc = connector->display_info.bpc;
-		asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3,
-						    false);
+		asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3 << 4);
 	}
 
 	mst_state = drm_atomic_get_mst_topology_state(state, &mstm->mgr);
diff --git a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c
index 545beea33e8c7..e3c818dfc0e6d 100644
--- a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c
+++ b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c
@@ -42,13 +42,13 @@ static const struct drm_dp_mst_calc_pbn_mode_test drm_dp_mst_calc_pbn_mode_cases
 		.clock = 332880,
 		.bpp = 24,
 		.dsc = true,
-		.expected = 50
+		.expected = 1191
 	},
 	{
 		.clock = 324540,
 		.bpp = 24,
 		.dsc = true,
-		.expected = 49
+		.expected = 1161
 	},
 };
 
@@ -56,7 +56,7 @@ static void drm_test_dp_mst_calc_pbn_mode(struct kunit *test)
 {
 	const struct drm_dp_mst_calc_pbn_mode_test *params = test->param_value;
 
-	KUNIT_EXPECT_EQ(test, drm_dp_calc_pbn_mode(params->clock, params->bpp, params->dsc),
+	KUNIT_EXPECT_EQ(test, drm_dp_calc_pbn_mode(params->clock, params->bpp << 4),
 			params->expected);
 }
 
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index 4429d3b1745b6..655862b3d2a49 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -842,7 +842,7 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector,
 int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr,
 			     int link_rate, int link_lane_count);
 
-int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc);
+int drm_dp_calc_pbn_mode(int clock, int bpp);
 
 void drm_dp_mst_update_slots(struct drm_dp_mst_topology_state *mst_state, uint8_t link_encoding_cap);
 
-- 
GitLab


From 9dcf67deeab6fbc4984175278b1b2c59881dca52 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:08:58 +0300
Subject: [PATCH 0156/2340] drm/dp_mst: Add helper to determine if an MST port
 is downstream of another port

Add drm_dp_mst_port_downstream_of_parent() required by the i915
driver in a follow-up patch to resolve a BW overallocation of MST
streams going through a given MST port.

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Lyude Paul <lyude@redhat.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-4-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 52 +++++++++++++++++++
 include/drm/display/drm_dp_mst_helper.h       |  3 ++
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index bc2c9dfe229fc..fa0049572f225 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5126,6 +5126,58 @@ static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port,
 	return false;
 }
 
+static bool
+drm_dp_mst_port_downstream_of_parent_locked(struct drm_dp_mst_topology_mgr *mgr,
+					    struct drm_dp_mst_port *port,
+					    struct drm_dp_mst_port *parent)
+{
+	if (!mgr->mst_primary)
+		return false;
+
+	port = drm_dp_mst_topology_get_port_validated_locked(mgr->mst_primary,
+							     port);
+	if (!port)
+		return false;
+
+	if (!parent)
+		return true;
+
+	parent = drm_dp_mst_topology_get_port_validated_locked(mgr->mst_primary,
+							       parent);
+	if (!parent)
+		return false;
+
+	if (!parent->mstb)
+		return false;
+
+	return drm_dp_mst_port_downstream_of_branch(port, parent->mstb);
+}
+
+/**
+ * drm_dp_mst_port_downstream_of_parent - check if a port is downstream of a parent port
+ * @mgr: MST topology manager
+ * @port: the port being looked up
+ * @parent: the parent port
+ *
+ * The function returns %true if @port is downstream of @parent. If @parent is
+ * %NULL - denoting the root port - the function returns %true if @port is in
+ * @mgr's topology.
+ */
+bool
+drm_dp_mst_port_downstream_of_parent(struct drm_dp_mst_topology_mgr *mgr,
+				     struct drm_dp_mst_port *port,
+				     struct drm_dp_mst_port *parent)
+{
+	bool ret;
+
+	mutex_lock(&mgr->lock);
+	ret = drm_dp_mst_port_downstream_of_parent_locked(mgr, port, parent);
+	mutex_unlock(&mgr->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_dp_mst_port_downstream_of_parent);
+
 static int
 drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
 				      struct drm_dp_mst_topology_state *state);
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index 655862b3d2a49..e44485aa74e94 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -892,6 +892,9 @@ drm_atomic_get_new_mst_topology_state(struct drm_atomic_state *state,
 struct drm_dp_mst_atomic_payload *
 drm_atomic_get_mst_payload_state(struct drm_dp_mst_topology_state *state,
 				 struct drm_dp_mst_port *port);
+bool drm_dp_mst_port_downstream_of_parent(struct drm_dp_mst_topology_mgr *mgr,
+					  struct drm_dp_mst_port *port,
+					  struct drm_dp_mst_port *parent);
 int __must_check
 drm_dp_atomic_find_time_slots(struct drm_atomic_state *state,
 			      struct drm_dp_mst_topology_mgr *mgr,
-- 
GitLab


From 1cd0a5ea427931016c3e95b20dc20f17604937cc Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:08:59 +0300
Subject: [PATCH 0157/2340] drm/dp_mst: Factor out a helper to check the atomic
 state of a topology manager

Factor out a helper to check the atomic state for one MST topology
manager, returning the MST port where the BW limit check has failed.
This will be used in a follow-up patch by the i915 driver to improve the
BW sharing between MST streams.

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Lyude Paul <lyude@redhat.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-5-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 93 +++++++++++++++----
 include/drm/display/drm_dp_mst_helper.h       |  4 +
 2 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index fa0049572f225..0fbfdd23ef4eb 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5180,11 +5180,13 @@ EXPORT_SYMBOL(drm_dp_mst_port_downstream_of_parent);
 
 static int
 drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
-				      struct drm_dp_mst_topology_state *state);
+				      struct drm_dp_mst_topology_state *state,
+				      struct drm_dp_mst_port **failing_port);
 
 static int
 drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb,
-				      struct drm_dp_mst_topology_state *state)
+				      struct drm_dp_mst_topology_state *state,
+				      struct drm_dp_mst_port **failing_port)
 {
 	struct drm_dp_mst_atomic_payload *payload;
 	struct drm_dp_mst_port *port;
@@ -5213,7 +5215,7 @@ drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb,
 		drm_dbg_atomic(mstb->mgr->dev, "[MSTB:%p] Checking bandwidth limits\n", mstb);
 
 	list_for_each_entry(port, &mstb->ports, next) {
-		ret = drm_dp_mst_atomic_check_port_bw_limit(port, state);
+		ret = drm_dp_mst_atomic_check_port_bw_limit(port, state, failing_port);
 		if (ret < 0)
 			return ret;
 
@@ -5225,7 +5227,8 @@ drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb,
 
 static int
 drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
-				      struct drm_dp_mst_topology_state *state)
+				      struct drm_dp_mst_topology_state *state,
+				      struct drm_dp_mst_port **failing_port)
 {
 	struct drm_dp_mst_atomic_payload *payload;
 	int pbn_used = 0;
@@ -5246,13 +5249,15 @@ drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
 			drm_dbg_atomic(port->mgr->dev,
 				       "[MSTB:%p] [MST PORT:%p] no BW available for the port\n",
 				       port->parent, port);
+			*failing_port = port;
 			return -EINVAL;
 		}
 
 		pbn_used = payload->pbn;
 	} else {
 		pbn_used = drm_dp_mst_atomic_check_mstb_bw_limit(port->mstb,
-								 state);
+								 state,
+								 failing_port);
 		if (pbn_used <= 0)
 			return pbn_used;
 	}
@@ -5261,6 +5266,7 @@ drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port,
 		drm_dbg_atomic(port->mgr->dev,
 			       "[MSTB:%p] [MST PORT:%p] required PBN of %d exceeds port limit of %d\n",
 			       port->parent, port, pbn_used, port->full_pbn);
+		*failing_port = port;
 		return -ENOSPC;
 	}
 
@@ -5438,20 +5444,79 @@ int drm_dp_mst_atomic_enable_dsc(struct drm_atomic_state *state,
 }
 EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
 
+/**
+ * drm_dp_mst_atomic_check_mgr - Check the atomic state of an MST topology manager
+ * @state: The global atomic state
+ * @mgr: Manager to check
+ * @mst_state: The MST atomic state for @mgr
+ * @failing_port: Returns the port with a BW limitation
+ *
+ * Checks the given MST manager's topology state for an atomic update to ensure
+ * that it's valid. This includes checking whether there's enough bandwidth to
+ * support the new timeslot allocations in the atomic update.
+ *
+ * Any atomic drivers supporting DP MST must make sure to call this or
+ * the drm_dp_mst_atomic_check() function after checking the rest of their state
+ * in their &drm_mode_config_funcs.atomic_check() callback.
+ *
+ * See also:
+ * drm_dp_mst_atomic_check()
+ * drm_dp_atomic_find_time_slots()
+ * drm_dp_atomic_release_time_slots()
+ *
+ * Returns:
+ *   - 0 if the new state is valid
+ *   - %-ENOSPC, if the new state is invalid, because of BW limitation
+ *         @failing_port is set to:
+ *         - The non-root port where a BW limit check failed
+ *           The returned port pointer is valid until at least
+ *           one payload downstream of it exists.
+ *         - %NULL if the BW limit check failed at the root port
+ *   - %-EINVAL, if the new state is invalid, because the root port has
+ *     too many payloads.
+ */
+int drm_dp_mst_atomic_check_mgr(struct drm_atomic_state *state,
+				struct drm_dp_mst_topology_mgr *mgr,
+				struct drm_dp_mst_topology_state *mst_state,
+				struct drm_dp_mst_port **failing_port)
+{
+	int ret;
+
+	*failing_port = NULL;
+
+	if (!mgr->mst_state)
+		return 0;
+
+	ret = drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state);
+	if (ret)
+		return ret;
+
+	mutex_lock(&mgr->lock);
+	ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary,
+						    mst_state,
+						    failing_port);
+	mutex_unlock(&mgr->lock);
+
+	return ret < 0 ? ret : 0;
+}
+EXPORT_SYMBOL(drm_dp_mst_atomic_check_mgr);
+
 /**
  * drm_dp_mst_atomic_check - Check that the new state of an MST topology in an
  * atomic update is valid
  * @state: Pointer to the new &struct drm_dp_mst_topology_state
  *
  * Checks the given topology state for an atomic update to ensure that it's
- * valid. This includes checking whether there's enough bandwidth to support
- * the new timeslot allocations in the atomic update.
+ * valid, calling drm_dp_mst_atomic_check_mgr() for all MST manager in the
+ * atomic state. This includes checking whether there's enough bandwidth to
+ * support the new timeslot allocations in the atomic update.
  *
  * Any atomic drivers supporting DP MST must make sure to call this after
  * checking the rest of their state in their
  * &drm_mode_config_funcs.atomic_check() callback.
  *
  * See also:
+ * drm_dp_mst_atomic_check_mgr()
  * drm_dp_atomic_find_time_slots()
  * drm_dp_atomic_release_time_slots()
  *
@@ -5466,21 +5531,11 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state)
 	int i, ret = 0;
 
 	for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
-		if (!mgr->mst_state)
-			continue;
+		struct drm_dp_mst_port *tmp_port;
 
-		ret = drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state);
+		ret = drm_dp_mst_atomic_check_mgr(state, mgr, mst_state, &tmp_port);
 		if (ret)
 			break;
-
-		mutex_lock(&mgr->lock);
-		ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary,
-							    mst_state);
-		mutex_unlock(&mgr->lock);
-		if (ret < 0)
-			break;
-		else
-			ret = 0;
 	}
 
 	return ret;
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index e44485aa74e94..a4aad6df71f18 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -916,6 +916,10 @@ int drm_dp_send_power_updown_phy(struct drm_dp_mst_topology_mgr *mgr,
 int drm_dp_send_query_stream_enc_status(struct drm_dp_mst_topology_mgr *mgr,
 		struct drm_dp_mst_port *port,
 		struct drm_dp_query_stream_enc_status_ack_reply *status);
+int __must_check drm_dp_mst_atomic_check_mgr(struct drm_atomic_state *state,
+					     struct drm_dp_mst_topology_mgr *mgr,
+					     struct drm_dp_mst_topology_state *mst_state,
+					     struct drm_dp_mst_port **failing_port);
 int __must_check drm_dp_mst_atomic_check(struct drm_atomic_state *state);
 int __must_check drm_dp_mst_root_conn_atomic_check(struct drm_connector_state *new_conn_state,
 						   struct drm_dp_mst_topology_mgr *mgr);
-- 
GitLab


From d075bca47c18779301fee5a9d140f146cde4b532 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:00 +0300
Subject: [PATCH 0158/2340] drm/dp_mst: Swap the order of checking root vs.
 non-root port BW limitations

drm_dp_mst_atomic_check_mgr() should check for BW limitation starting
from sink ports continuing towards the root port, so that drivers can
use the @failing_port returned to resolve a BW overallocation in an
ideal way. For instance from streams A,B,C in a topology A,B going
through @failing_port and C not going through it, a BW overallocation of
A,B due to a limit of the port must be resolved first before considering
the limits of other ports closer to the root port. This way can avoid
reducing the BW of stream C unnecessarily due to a BW limit closer to the
root port.

Based on the above swap the order of the BW check for the root port and
the check for all the ports downstream of it (the latter going through
the topology already in the sink->root port direction).

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-6-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 0fbfdd23ef4eb..8433b9c59cf2b 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5469,9 +5469,13 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc);
  *   - %-ENOSPC, if the new state is invalid, because of BW limitation
  *         @failing_port is set to:
  *         - The non-root port where a BW limit check failed
+ *           with all the ports downstream of @failing_port passing
+ *           the BW limit check.
  *           The returned port pointer is valid until at least
  *           one payload downstream of it exists.
  *         - %NULL if the BW limit check failed at the root port
+ *           with all the ports downstream of the root port passing
+ *           the BW limit check.
  *   - %-EINVAL, if the new state is invalid, because the root port has
  *     too many payloads.
  */
@@ -5487,17 +5491,16 @@ int drm_dp_mst_atomic_check_mgr(struct drm_atomic_state *state,
 	if (!mgr->mst_state)
 		return 0;
 
-	ret = drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state);
-	if (ret)
-		return ret;
-
 	mutex_lock(&mgr->lock);
 	ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary,
 						    mst_state,
 						    failing_port);
 	mutex_unlock(&mgr->lock);
 
-	return ret < 0 ? ret : 0;
+	if (ret < 0)
+		return ret;
+
+	return drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state);
 }
 EXPORT_SYMBOL(drm_dp_mst_atomic_check_mgr);
 
-- 
GitLab


From 6f1aa39d6497d4d27f8ee132e9cb8bdbfe7c0674 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:01 +0300
Subject: [PATCH 0159/2340] drm/dp_mst: Allow DSC in any Synaptics last branch
 device

The Synaptics MST branch devices support DSC decompression on all their
output ports, provided that they are last branch devices (with their
output ports connected to the sinks). The Thinkpad 40B0 TBT dock for
instance has two such branch devices, a secondary one connected to one
of the output ports of the primary; hence the decompression needs to be
enabled in both branch devices to enable decompression for all the
sinks.

Based on the above add support for enabling decompression in last
Synaptics branch devices.

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-7-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 8433b9c59cf2b..a462229908fc9 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5994,6 +5994,7 @@ static bool drm_dp_mst_is_virtual_dpcd(struct drm_dp_mst_port *port)
 struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port)
 {
 	struct drm_dp_mst_port *immediate_upstream_port;
+	struct drm_dp_aux *immediate_upstream_aux;
 	struct drm_dp_mst_port *fec_port;
 	struct drm_dp_desc desc = {};
 	u8 endpoint_fec;
@@ -6058,21 +6059,25 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port)
 	 * - Port is on primary branch device
 	 * - Not a VGA adapter (DP_DWN_STRM_PORT_TYPE_ANALOG)
 	 */
-	if (drm_dp_read_desc(port->mgr->aux, &desc, true))
+	if (immediate_upstream_port)
+		immediate_upstream_aux = &immediate_upstream_port->aux;
+	else
+		immediate_upstream_aux = port->mgr->aux;
+
+	if (drm_dp_read_desc(immediate_upstream_aux, &desc, true))
 		return NULL;
 
-	if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) &&
-	    port->mgr->dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14 &&
-	    port->parent == port->mgr->mst_primary) {
+	if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD)) {
 		u8 dpcd_ext[DP_RECEIVER_CAP_SIZE];
 
-		if (drm_dp_read_dpcd_caps(port->mgr->aux, dpcd_ext) < 0)
+		if (drm_dp_read_dpcd_caps(immediate_upstream_aux, dpcd_ext) < 0)
 			return NULL;
 
-		if ((dpcd_ext[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT) &&
+		if (dpcd_ext[DP_DPCD_REV] >= DP_DPCD_REV_14 &&
+		    ((dpcd_ext[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT) &&
 		    ((dpcd_ext[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK)
-		     != DP_DWN_STRM_PORT_TYPE_ANALOG))
-			return port->mgr->aux;
+		     != DP_DWN_STRM_PORT_TYPE_ANALOG)))
+			return immediate_upstream_aux;
 	}
 
 	/*
-- 
GitLab


From b348150406564595cf6c1be388e9797fa97c2a5d Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:02 +0300
Subject: [PATCH 0160/2340] drm/dp: Add DP_HBLANK_EXPANSION_CAPABLE and
 DSC_PASSTHROUGH_EN DPCD flags

Add the DPCD flag to enable DSC passthrough in a last branch device,
used in a follow-up i915 patch.

Also add a flag to detect HBLANK expansion support in a branch device,
used by a workaround in a follow-up i915 patch.

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-8-imre.deak@intel.com
---
 include/drm/display/drm_dp.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index e69cece404b3c..763d45a612f35 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -148,6 +148,7 @@
 #define DP_RECEIVE_PORT_0_CAP_0		    0x008
 # define DP_LOCAL_EDID_PRESENT		    (1 << 1)
 # define DP_ASSOCIATED_TO_PRECEDING_PORT    (1 << 2)
+# define DP_HBLANK_EXPANSION_CAPABLE        (1 << 3)
 
 #define DP_RECEIVE_PORT_0_BUFFER_SIZE	    0x009
 
@@ -699,6 +700,7 @@
 
 #define DP_DSC_ENABLE                       0x160   /* DP 1.4 */
 # define DP_DECOMPRESSION_EN                (1 << 0)
+# define DP_DSC_PASSTHROUGH_EN		    (1 << 1)
 #define DP_DSC_CONFIGURATION				0x161	/* DP 2.0 */
 
 #define DP_PSR_EN_CFG				0x170   /* XXX 1.2? */
-- 
GitLab


From a6315ec25eed0e9a70cb1cfc43cf694911546a5c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 13:22:16 +0300
Subject: [PATCH 0161/2340] drm/dp_mst: Add HBLANK expansion quirk for
 Synaptics MST hubs

Add a quirk for Synaptics MST hubs, which require a workaround - at leat
on i915 - for some modes, on which the hub applies HBLANK expansion.
These modes will only work by enabling DSC decompression for them, a
follow-up patch will do this in i915.

v2:
- Fix the quirk name in its DocBook description.

Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-9-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_helper.c | 2 ++
 include/drm/display/drm_dp_helper.h     | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 8a1b64c57dfdb..2a95a05f0437e 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -2242,6 +2242,8 @@ static const struct dpcd_quirk dpcd_quirk_list[] = {
 	{ OUI(0x00, 0x00, 0x00), DEVICE_ID('C', 'H', '7', '5', '1', '1'), false, BIT(DP_DPCD_QUIRK_NO_SINK_COUNT) },
 	/* Synaptics DP1.4 MST hubs can support DSC without virtual DPCD */
 	{ OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) },
+	/* Synaptics DP1.4 MST hubs require DSC for some modes on which it applies HBLANK expansion. */
+	{ OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC) },
 	/* Apple MacBookPro 2017 15 inch eDP Retina panel reports too low DP_MAX_LINK_RATE */
 	{ OUI(0x00, 0x10, 0xfa), DEVICE_ID(101, 68, 21, 101, 98, 97), false, BIT(DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS) },
 };
diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index 3369104e2d257..01636a679ee6c 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -632,6 +632,13 @@ enum drm_dp_quirk {
 	 * the DP_MAX_LINK_RATE register reporting a lower max multiplier.
 	 */
 	DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS,
+	/**
+	 * @DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC:
+	 *
+	 * The device applies HBLANK expansion for some modes, but this
+	 * requires enabling DSC.
+	 */
+	DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC,
 };
 
 /**
-- 
GitLab


From c1d6a22b7219bd52c66e9e038a282ba79f04be1f Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 13:22:17 +0300
Subject: [PATCH 0162/2340] drm/dp: Add helpers to calculate the link BW
 overhead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add helpers drivers can use to calculate the BW allocation overhead -
due to SSC, FEC, DSC and data alignment on symbol cycles - and the
channel coding efficiency - due to the 8b/10b, 128b/132b encoding. On
128b/132b links the FEC overhead is part of the coding efficiency, so
not accounted for in the BW allocation overhead.

The drivers can use these functions to calculate a ratio, controlling
the stream symbol insertion rate of the source device in each SST TU
or MST MTP frame. Drivers can calculate this

m/n = (pixel_data_rate * drm_dp_bw_overhead()) /
      (link_data_rate * drm_dp_bw_channel_coding_efficiency())

ratio for a given link and pixel stream and with that the

slots_per_mtp = CEIL(64 * m / n)

allocated slots per MTP for the stream in a link frame and with
that the

pbn = slots_per_mtp * drm_mst_get_pbn_divider()

allocated PBNs for the stream on the MST link path.

Take drm_dp_bw_overhead() into use in drm_dp_calc_pbn_mode(), for
drivers calculating the PBN value directly.

v2:
- Add dockbook description to drm_dp_bw_channel_coding_efficiency().
  (LKP).
- Clarify the way m/n ratio is calculated in the commit log.
v3:
- Fix compile breakage for !CONFIG_BACKLIGHT_CLASS_DEVICE. (LKP)
- Account for FEC_PM overhead (+ 0.0015625 %), add comment
  with the formula to calculate the total FEC overhead. (Ville)
v4:
- Rename DRM_DP_OVERHEAD_SSC to DRM_DP_OVERHEAD_SSC_REF_CLK. (Ville)
v5:
- Clarify in the commit log what MTP means.
- Simplify the commit log's formula to calculate PBN.

Cc: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: kernel test robot <lkp@intel.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v2)
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-1-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_helper.c       | 132 ++++++++++++++++++
 drivers/gpu/drm/display/drm_dp_mst_topology.c |  23 ++-
 include/drm/display/drm_dp_helper.h           |  11 ++
 3 files changed, 160 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 2a95a05f0437e..64c151e4f78ca 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -3897,3 +3897,135 @@ int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux)
 EXPORT_SYMBOL(drm_panel_dp_aux_backlight);
 
 #endif
+
+/* See DP Standard v2.1 2.6.4.4.1.1, 2.8.4.4, 2.8.7 */
+static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16,
+				     int symbol_size, bool is_mst)
+{
+	int cycles = DIV_ROUND_UP(pixels * bpp_x16, 16 * symbol_size * lane_count);
+	int align = is_mst ? 4 / lane_count : 1;
+
+	return ALIGN(cycles, align);
+}
+
+static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_count,
+					 int bpp_x16, int symbol_size, bool is_mst)
+{
+	int slice_pixels = DIV_ROUND_UP(pixels, slice_count);
+	int slice_data_cycles = drm_dp_link_symbol_cycles(lane_count, slice_pixels,
+							  bpp_x16, symbol_size, is_mst);
+	int slice_eoc_cycles = is_mst ? 4 / lane_count : 1;
+
+	return slice_count * (slice_data_cycles + slice_eoc_cycles);
+}
+
+/**
+ * drm_dp_bw_overhead - Calculate the BW overhead of a DP link stream
+ * @lane_count: DP link lane count
+ * @hactive: pixel count of the active period in one scanline of the stream
+ * @dsc_slice_count: DSC slice count if @flags/DRM_DP_LINK_BW_OVERHEAD_DSC is set
+ * @bpp_x16: bits per pixel in .4 binary fixed point
+ * @flags: DRM_DP_OVERHEAD_x flags
+ *
+ * Calculate the BW allocation overhead of a DP link stream, depending
+ * on the link's
+ * - @lane_count
+ * - SST/MST mode (@flags / %DRM_DP_OVERHEAD_MST)
+ * - symbol size (@flags / %DRM_DP_OVERHEAD_UHBR)
+ * - FEC mode (@flags / %DRM_DP_OVERHEAD_FEC)
+ * - SSC/REF_CLK mode (@flags / %DRM_DP_OVERHEAD_SSC_REF_CLK)
+ * as well as the stream's
+ * - @hactive timing
+ * - @bpp_x16 color depth
+ * - compression mode (@flags / %DRM_DP_OVERHEAD_DSC).
+ * Note that this overhead doesn't account for the 8b/10b, 128b/132b
+ * channel coding efficiency, for that see
+ * @drm_dp_link_bw_channel_coding_efficiency().
+ *
+ * Returns the overhead as 100% + overhead% in 1ppm units.
+ */
+int drm_dp_bw_overhead(int lane_count, int hactive,
+		       int dsc_slice_count,
+		       int bpp_x16, unsigned long flags)
+{
+	int symbol_size = flags & DRM_DP_BW_OVERHEAD_UHBR ? 32 : 8;
+	bool is_mst = flags & DRM_DP_BW_OVERHEAD_MST;
+	u32 overhead = 1000000;
+	int symbol_cycles;
+
+	/*
+	 * DP Standard v2.1 2.6.4.1
+	 * SSC downspread and ref clock variation margin:
+	 *   5300ppm + 300ppm ~ 0.6%
+	 */
+	if (flags & DRM_DP_BW_OVERHEAD_SSC_REF_CLK)
+		overhead += 6000;
+
+	/*
+	 * DP Standard v2.1 2.6.4.1.1, 3.5.1.5.4:
+	 * FEC symbol insertions for 8b/10b channel coding:
+	 * After each 250 data symbols on 2-4 lanes:
+	 *   250 LL + 5 FEC_PARITY_PH + 1 CD_ADJ   (256 byte FEC block)
+	 * After each 2 x 250 data symbols on 1 lane:
+	 *   2 * 250 LL + 11 FEC_PARITY_PH + 1 CD_ADJ (512 byte FEC block)
+	 * After 256 (2-4 lanes) or 128 (1 lane) FEC blocks:
+	 *   256 * 256 bytes + 1 FEC_PM
+	 * or
+	 *   128 * 512 bytes + 1 FEC_PM
+	 * (256 * 6 + 1) / (256 * 250) = 2.4015625 %
+	 */
+	if (flags & DRM_DP_BW_OVERHEAD_FEC)
+		overhead += 24016;
+
+	/*
+	 * DP Standard v2.1 2.7.9, 5.9.7
+	 * The FEC overhead for UHBR is accounted for in its 96.71% channel
+	 * coding efficiency.
+	 */
+	WARN_ON((flags & DRM_DP_BW_OVERHEAD_UHBR) &&
+		(flags & DRM_DP_BW_OVERHEAD_FEC));
+
+	if (flags & DRM_DP_BW_OVERHEAD_DSC)
+		symbol_cycles = drm_dp_link_dsc_symbol_cycles(lane_count, hactive,
+							      dsc_slice_count,
+							      bpp_x16, symbol_size,
+							      is_mst);
+	else
+		symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive,
+							  bpp_x16, symbol_size,
+							  is_mst);
+
+	return DIV_ROUND_UP_ULL(mul_u32_u32(symbol_cycles * symbol_size * lane_count,
+					    overhead * 16),
+				hactive * bpp_x16);
+}
+EXPORT_SYMBOL(drm_dp_bw_overhead);
+
+/**
+ * drm_dp_bw_channel_coding_efficiency - Get a DP link's channel coding efficiency
+ * @is_uhbr: Whether the link has a 128b/132b channel coding
+ *
+ * Return the channel coding efficiency of the given DP link type, which is
+ * either 8b/10b or 128b/132b (aka UHBR). The corresponding overhead includes
+ * the 8b -> 10b, 128b -> 132b pixel data to link symbol conversion overhead
+ * and for 128b/132b any link or PHY level control symbol insertion overhead
+ * (LLCP, FEC, PHY sync, see DP Standard v2.1 3.5.2.18). For 8b/10b the
+ * corresponding FEC overhead is BW allocation specific, included in the value
+ * returned by drm_dp_bw_overhead().
+ *
+ * Returns the efficiency in the 100%/coding-overhead% ratio in
+ * 1ppm units.
+ */
+int drm_dp_bw_channel_coding_efficiency(bool is_uhbr)
+{
+	if (is_uhbr)
+		return 967100;
+	else
+		/*
+		 * Note that on 8b/10b MST the efficiency is only
+		 * 78.75% due to the 1 out of 64 MTPH packet overhead,
+		 * not accounted for here.
+		 */
+		return 800000;
+}
+EXPORT_SYMBOL(drm_dp_bw_channel_coding_efficiency);
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index a462229908fc9..0854fe428f173 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -4726,17 +4726,28 @@ EXPORT_SYMBOL(drm_dp_check_act_status);
 int drm_dp_calc_pbn_mode(int clock, int bpp)
 {
 	/*
-	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
 	 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
 	 * common multiplier to render an integer PBN for all link rate/lane
 	 * counts combinations
 	 * calculate
-	 * peak_kbps *= (1006/1000)
-	 * peak_kbps *= (64/54)
-	 * peak_kbps *= 8    convert to bytes
+	 * peak_kbps = clock * bpp / 16
+	 * peak_kbps *= SSC overhead / 1000000
+	 * peak_kbps /= 8    convert to Kbytes
+	 * peak_kBps *= (64/54) / 1000    convert to PBN
 	 */
-	return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006 >> 4),
-				1000 * 8 * 54 * 1000);
+	/*
+	 * TODO: Use the actual link and mode parameters to calculate
+	 * the overhead. For now it's assumed that these are
+	 * 4 link lanes, 4096 hactive pixels, which don't add any
+	 * significant data padding overhead and that there is no DSC
+	 * or FEC overhead.
+	 */
+	int overhead = drm_dp_bw_overhead(4, 4096, 0, bpp,
+					  DRM_DP_BW_OVERHEAD_MST |
+					  DRM_DP_BW_OVERHEAD_SSC_REF_CLK);
+
+	return DIV64_U64_ROUND_UP(mul_u32_u32(clock * bpp, 64 * overhead >> 4),
+				  1000000ULL * 8 * 54 * 1000);
 }
 EXPORT_SYMBOL(drm_dp_calc_pbn_mode);
 
diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index 01636a679ee6c..3c59f64d4a691 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -788,4 +788,15 @@ bool drm_dp_downstream_rgb_to_ycbcr_conversion(const u8 dpcd[DP_RECEIVER_CAP_SIZ
 					       const u8 port_cap[4], u8 color_spc);
 int drm_dp_pcon_convert_rgb_to_ycbcr(struct drm_dp_aux *aux, u8 color_spc);
 
+#define DRM_DP_BW_OVERHEAD_MST		BIT(0)
+#define DRM_DP_BW_OVERHEAD_UHBR		BIT(1)
+#define DRM_DP_BW_OVERHEAD_SSC_REF_CLK	BIT(2)
+#define DRM_DP_BW_OVERHEAD_FEC		BIT(3)
+#define DRM_DP_BW_OVERHEAD_DSC		BIT(4)
+
+int drm_dp_bw_overhead(int lane_count, int hactive,
+		       int dsc_slice_count,
+		       int bpp_x16, unsigned long flags);
+int drm_dp_bw_channel_coding_efficiency(bool is_uhbr);
+
 #endif /* _DRM_DP_HELPER_H_ */
-- 
GitLab


From d91680efcaaba6cc2e7cd83e4aa5e1d0f1c6f684 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:05 +0300
Subject: [PATCH 0163/2340] drm/i915/dp_mst: Enable FEC early once it's known
 DSC is needed

Enable FEC in crtc_state, as soon as it's known it will be needed by
DSC. This fixes the calculation of BW allocation overhead, in case DSC
is enabled by falling back to it during the encoder compute config
phase (vs. enabling FEC due to DSC being enabled on other streams).

v2:
- Enable FEC only in intel_dp_mst_find_vcpi_slots_for_bpp(), since
  only by that will crtc_state->port_clock be set, which in turn is
  needed by intel_dp_is_uhbr().

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-11-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c     | 6 +++---
 drivers/gpu/drm/i915/display/intel_dp.h     | 5 +++++
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 7 +++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index ea97db7d9abf7..516671f50b6c4 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1369,9 +1369,9 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp,
 	return false;
 }
 
-static bool intel_dp_supports_fec(struct intel_dp *intel_dp,
-				  const struct intel_connector *connector,
-				  const struct intel_crtc_state *pipe_config)
+bool intel_dp_supports_fec(struct intel_dp *intel_dp,
+			   const struct intel_connector *connector,
+			   const struct intel_crtc_state *pipe_config)
 {
 	return intel_dp_source_supports_fec(intel_dp, pipe_config) &&
 		drm_dp_sink_supports_fec(connector->dp.fec_capability);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 484aea215a251..0258580a6aadc 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -137,6 +137,11 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 }
 
 u32 intel_dp_mode_to_fec_clock(u32 mode_clock);
+
+bool intel_dp_supports_fec(struct intel_dp *intel_dp,
+			   const struct intel_connector *connector,
+			   const struct intel_crtc_state *pipe_config);
+
 u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 pipe_bpp);
 
 void intel_ddi_update_pipe(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 6f9c9f3b5b121..28db1775d3a5d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -95,6 +95,13 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 	crtc_state->lane_count = limits->max_lane_count;
 	crtc_state->port_clock = limits->max_rate;
 
+	if (dsc) {
+		if (!intel_dp_supports_fec(intel_dp, connector, crtc_state))
+			return -EINVAL;
+
+		crtc_state->fec_enable = !intel_dp_is_uhbr(crtc_state);
+	}
+
 	mst_state->pbn_div = drm_dp_get_vc_payload_bw(&intel_dp->mst_mgr,
 						      crtc_state->port_clock,
 						      crtc_state->lane_count);
-- 
GitLab


From 3e306daab76ac32b3496583e1db43baabe8a062e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:06 +0300
Subject: [PATCH 0164/2340] drm/i915/dp: Specify the FEC overhead as an
 increment vs. a remainder

A follow-up patch will add up all the overheads on a DP link, where it
makes more sense to specify each overhead factor in terms of the added
overhead amount vs. the reciprocal remainder (of usable BW remaining
after deducting the overhead). Prepare for that here, keeping the
existing behavior.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-12-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 516671f50b6c4..6186926f865ef 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -85,8 +85,8 @@
 #define DP_DSC_MAX_ENC_THROUGHPUT_0		340000
 #define DP_DSC_MAX_ENC_THROUGHPUT_1		400000
 
-/* DP DSC FEC Overhead factor = 1/(0.972261) */
-#define DP_DSC_FEC_OVERHEAD_FACTOR		972261
+/* DP DSC FEC Overhead factor in ppm = 1/(0.972261) = 1.028530 */
+#define DP_DSC_FEC_OVERHEAD_FACTOR		1028530
 
 /* Compliance test status bits  */
 #define INTEL_DP_RESOLUTION_SHIFT_MASK	0
@@ -680,8 +680,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 
 u32 intel_dp_mode_to_fec_clock(u32 mode_clock)
 {
-	return div_u64(mul_u32_u32(mode_clock, 1000000U),
-		       DP_DSC_FEC_OVERHEAD_FACTOR);
+	return div_u64(mul_u32_u32(mode_clock, DP_DSC_FEC_OVERHEAD_FACTOR),
+		       1000000U);
 }
 
 static int
-- 
GitLab


From 7ff2090c7c98644ea04be7ff8e304b74f47cf9dc Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:07 +0300
Subject: [PATCH 0165/2340] drm/i915/dp: Pass actual BW overhead to m_n
 calculation

A follow-up MST patch will need to specify the total BW allocation
overhead, prepare for that here by passing the amount of overhead
to intel_link_compute_m_n(), keeping the existing behavior.

v2:
- Fix passing the correct crtc_state->fec_enable param in
  intel_dp_mst_compute_link_config() /
  intel_dp_dsc_mst_compute_link_config().

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-13-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 40 +++++++++++++++++---
 drivers/gpu/drm/i915/display/intel_display.h |  4 +-
 drivers/gpu/drm/i915/display/intel_dp.c      | 30 ++++++++++++---
 drivers/gpu/drm/i915/display/intel_dp.h      |  2 +
 drivers/gpu/drm/i915/display/intel_dp_mst.c  |  8 ++--
 drivers/gpu/drm/i915/display/intel_fdi.c     |  5 ++-
 6 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 9e9c03287869b..3c7ef2cc3b8b1 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2389,17 +2389,45 @@ static void compute_m_n(u32 *ret_m, u32 *ret_n,
 	intel_reduce_m_n_ratio(ret_m, ret_n);
 }
 
+static void
+add_bw_alloc_overhead(int link_clock, int bw_overhead,
+		      int pixel_data_rate, int link_data_rate,
+		      u32 *data_m, u32 *data_n)
+{
+	bool is_uhbr = intel_dp_is_uhbr_rate(link_clock);
+	int ch_coding_efficiency =
+		drm_dp_bw_channel_coding_efficiency(is_uhbr);
+
+	/*
+	 * TODO: adjust for actual UHBR channel coding efficiency and BW
+	 * overhead.
+	 */
+	if (is_uhbr) {
+		*data_m = pixel_data_rate;
+		*data_n = link_data_rate * 8 / 10;
+		return;
+	}
+
+	*data_m = DIV_ROUND_UP_ULL(mul_u32_u32(pixel_data_rate, bw_overhead),
+				   1000000);
+	*data_n = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_data_rate, ch_coding_efficiency),
+				     1000000);
+}
+
 void
 intel_link_compute_m_n(u16 bits_per_pixel, int nlanes,
 		       int pixel_clock, int link_clock,
-		       struct intel_link_m_n *m_n,
-		       bool fec_enable)
+		       int bw_overhead,
+		       struct intel_link_m_n *m_n)
 {
 	u32 data_clock = bits_per_pixel * pixel_clock;
+	u32 data_m;
+	u32 data_n;
 
-	if (fec_enable)
-		data_clock = intel_dp_mode_to_fec_clock(data_clock);
-
+	add_bw_alloc_overhead(link_clock, bw_overhead,
+			      data_clock,
+			      link_clock * 10 * nlanes,
+			      &data_m, &data_n);
 	/*
 	 * Windows/BIOS uses fixed M/N values always. Follow suit.
 	 *
@@ -2409,7 +2437,7 @@ intel_link_compute_m_n(u16 bits_per_pixel, int nlanes,
 	 */
 	m_n->tu = 64;
 	compute_m_n(&m_n->data_m, &m_n->data_n,
-		    data_clock, link_clock * nlanes * 8,
+		    data_m, data_n,
 		    0x8000000);
 
 	compute_m_n(&m_n->link_m, &m_n->link_n,
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index cae2a3b12e20d..6d0636acb7250 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -394,8 +394,8 @@ u8 intel_calc_active_pipes(struct intel_atomic_state *state,
 			   u8 active_pipes);
 void intel_link_compute_m_n(u16 bpp, int nlanes,
 			    int pixel_clock, int link_clock,
-			    struct intel_link_m_n *m_n,
-			    bool fec_enable);
+			    int bw_overhead,
+			    struct intel_link_m_n *m_n);
 u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
 			      u32 pixel_format, u64 modifier);
 enum drm_mode_status
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 6186926f865ef..4dc923bc4c97a 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -121,10 +121,15 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp)
 
 static void intel_dp_unset_edid(struct intel_dp *intel_dp);
 
+bool intel_dp_is_uhbr_rate(int rate)
+{
+	return rate >= 1000000;
+}
+
 /* Is link rate UHBR and thus 128b/132b? */
 bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state)
 {
-	return crtc_state->port_clock >= 1000000;
+	return intel_dp_is_uhbr_rate(crtc_state->port_clock);
 }
 
 static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp)
@@ -684,6 +689,20 @@ u32 intel_dp_mode_to_fec_clock(u32 mode_clock)
 		       1000000U);
 }
 
+int intel_dp_bw_fec_overhead(bool fec_enabled)
+{
+	/*
+	 * TODO: Calculate the actual overhead for a given mode.
+	 * The hard-coded 1/0.972261=2.853% overhead factor
+	 * corresponds (for instance) to the 8b/10b DP FEC 2.4% +
+	 * 0.453% DSC overhead. This is enough for a 3840 width mode,
+	 * which has a DSC overhead of up to ~0.2%, but may not be
+	 * enough for a 1024 width mode where this is ~0.8% (on a 4
+	 * lane DP link, with 2 DSC slices and 8 bpp color depth).
+	 */
+	return fec_enabled ? DP_DSC_FEC_OVERHEAD_FACTOR : 1000000;
+}
+
 static int
 small_joiner_ram_size_bits(struct drm_i915_private *i915)
 {
@@ -2655,8 +2674,9 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
 		pixel_clock /= pipe_config->splitter.link_count;
 
 	intel_link_compute_m_n(link_bpp, pipe_config->lane_count, pixel_clock,
-			       pipe_config->port_clock, &pipe_config->dp_m2_n2,
-			       pipe_config->fec_enable);
+			       pipe_config->port_clock,
+			       intel_dp_bw_fec_overhead(pipe_config->fec_enable),
+			       &pipe_config->dp_m2_n2);
 
 	/* FIXME: abstract this better */
 	if (pipe_config->splitter.enable)
@@ -2837,8 +2857,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 			       pipe_config->lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
-			       &pipe_config->dp_m_n,
-			       pipe_config->fec_enable);
+			       intel_dp_bw_fec_overhead(pipe_config->fec_enable),
+			       &pipe_config->dp_m_n);
 
 	/* FIXME: abstract this better */
 	if (pipe_config->splitter.enable)
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 0258580a6aadc..2080575fef69a 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -77,6 +77,7 @@ void intel_dp_audio_compute_config(struct intel_encoder *encoder,
 				   struct drm_connector_state *conn_state);
 bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp);
 bool intel_dp_is_edp(struct intel_dp *intel_dp);
+bool intel_dp_is_uhbr_rate(int rate);
 bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state);
 bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
 enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port,
@@ -137,6 +138,7 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 }
 
 u32 intel_dp_mode_to_fec_clock(u32 mode_clock);
+int intel_dp_bw_fec_overhead(bool fec_enabled);
 
 bool intel_dp_supports_fec(struct intel_dp *intel_dp,
 			   const struct intel_connector *connector,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 28db1775d3a5d..1f6fbab7d4ada 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -180,8 +180,8 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 			       crtc_state->lane_count,
 			       adjusted_mode->crtc_clock,
 			       crtc_state->port_clock,
-			       &crtc_state->dp_m_n,
-			       crtc_state->fec_enable);
+			       intel_dp_bw_fec_overhead(crtc_state->fec_enable),
+			       &crtc_state->dp_m_n);
 	crtc_state->dp_m_n.tu = slots;
 
 	return 0;
@@ -275,8 +275,8 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
 			       crtc_state->lane_count,
 			       adjusted_mode->crtc_clock,
 			       crtc_state->port_clock,
-			       &crtc_state->dp_m_n,
-			       crtc_state->fec_enable);
+			       intel_dp_bw_fec_overhead(crtc_state->fec_enable),
+			       &crtc_state->dp_m_n);
 	crtc_state->dp_m_n.tu = slots;
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c
index e6429dfebe159..1d87fbc1e8138 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -10,6 +10,7 @@
 #include "intel_crtc.h"
 #include "intel_ddi.h"
 #include "intel_de.h"
+#include "intel_dp.h"
 #include "intel_display_types.h"
 #include "intel_fdi.h"
 #include "intel_fdi_regs.h"
@@ -339,7 +340,9 @@ int ilk_fdi_compute_config(struct intel_crtc *crtc,
 	pipe_config->fdi_lanes = lane;
 
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
-			       link_bw, &pipe_config->fdi_m_n, false);
+			       link_bw,
+			       intel_dp_bw_fec_overhead(false),
+			       &pipe_config->fdi_m_n);
 
 	return 0;
 }
-- 
GitLab


From 4e0837a8d00aa349910a73a6e14102f4c5d81ed5 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:08 +0300
Subject: [PATCH 0166/2340] drm/i915/dp_mst: Account for FEC and DSC overhead
 during BW allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Atm, the BW allocated for an MST stream doesn't take into account the
DSC control symbol (EOC) and data alignment overhead on the local (first
downstream) MST link (reflected by the data M/N/TU values) and - besides
the above overheads - the FEC symbol overhead on 8b/10b remote
(after a downstream branch device) MST links.

In addition the FEC overhead used on the local link is a fixed amount,
which only applies to certain modes, but not enough for all modes; add a
code comment clarifying this.

Fix the above by calculating the data M/N values with the total BW
overhead (not including the SSC overhead, since this isn't enabled by
the source device) and using this the PBN and TU values for the local
link and PBN for remote links (including SSC, since this is mandatory
for links after downstream branch devices).

For now keep the current fixed FEC overhead as a minimum, since this is
what bspec requires for audio functionality.

Calculate the effective link BW in a clearer way, applying the channel
coding efficiency based on the coding type. The calculation was correct
for 8b/10b, but not for 128b/132b links; this patch leaves the behavior
for this unchanged, leaving the fix for a follow-up.

v2:
- Fix TU size programmed to the HW, making it match the payload size
  programmed to the payload table.
v3:
- Add code comment about the connection between the payload's size in
  the payload table and the corresponding PBN value. (Ville)
- Add WARN_ON(remote_m_n.tu < dp_m_n.tu). (Ville)
- Add code comment about factors not accounted for by the BW
  calculation in intel_dp_mst_mode_valid_ctx() (and compute config).
  (Ville)
- Simplify calculation of PBN to remote_m_n.tu * mst_state->pbn_div.

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-2-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 124 ++++++++++++++++----
 1 file changed, 99 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 1f6fbab7d4ada..9d6b76d1a52a7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -67,6 +67,63 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp
 	return 0;
 }
 
+static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state,
+				    const struct intel_connector *connector,
+				    bool ssc, bool dsc, int bpp)
+{
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+	unsigned long flags = DRM_DP_BW_OVERHEAD_MST;
+	int dsc_slice_count = 0;
+	int overhead;
+
+	flags |= intel_dp_is_uhbr(crtc_state) ? DRM_DP_BW_OVERHEAD_UHBR : 0;
+	flags |= ssc ? DRM_DP_BW_OVERHEAD_SSC_REF_CLK : 0;
+	flags |= crtc_state->fec_enable ? DRM_DP_BW_OVERHEAD_FEC : 0;
+
+	if (dsc) {
+		flags |= DRM_DP_BW_OVERHEAD_DSC;
+		/* TODO: add support for bigjoiner */
+		dsc_slice_count = intel_dp_dsc_get_slice_count(connector,
+							       adjusted_mode->clock,
+							       adjusted_mode->hdisplay,
+							       false);
+	}
+
+	overhead = drm_dp_bw_overhead(crtc_state->lane_count,
+				      adjusted_mode->hdisplay,
+				      dsc_slice_count,
+				      to_bpp_x16(bpp),
+				      flags);
+
+	/*
+	 * TODO: clarify whether a minimum required by the fixed FEC overhead
+	 * in the bspec audio programming sequence is required here.
+	 */
+	return max(overhead, intel_dp_bw_fec_overhead(crtc_state->fec_enable));
+}
+
+static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state,
+				     const struct intel_connector *connector,
+				     bool ssc, bool dsc,
+				     int bpp,
+				     struct intel_link_m_n *m_n)
+{
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+	int overhead = intel_dp_mst_bw_overhead(crtc_state,
+						connector,
+						ssc, dsc, bpp);
+
+	intel_link_compute_m_n(bpp, crtc_state->lane_count,
+			       adjusted_mode->crtc_clock,
+			       crtc_state->port_clock,
+			       overhead,
+			       m_n);
+
+	m_n->tu = DIV_ROUND_UP_ULL(mul_u32_u32(m_n->data_m, 64), m_n->data_n);
+}
+
 static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 						struct intel_crtc_state *crtc_state,
 						int max_bpp,
@@ -107,14 +164,40 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 						      crtc_state->lane_count);
 
 	for (bpp = max_bpp; bpp >= min_bpp; bpp -= step) {
+		struct intel_link_m_n remote_m_n;
+		int link_bpp;
+
 		drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp);
 
 		ret = intel_dp_mst_check_constraints(i915, bpp, adjusted_mode, crtc_state, dsc);
 		if (ret)
 			continue;
 
-		crtc_state->pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock,
-						       bpp << 4);
+		link_bpp = dsc ? bpp :
+			intel_dp_output_bpp(crtc_state->output_format, bpp);
+
+		intel_dp_mst_compute_m_n(crtc_state, connector, false, dsc, link_bpp,
+					 &crtc_state->dp_m_n);
+		intel_dp_mst_compute_m_n(crtc_state, connector, true, dsc, link_bpp,
+					 &remote_m_n);
+
+		/*
+		 * The TU size programmed to the HW determines which slots in
+		 * an MTP frame are used for this stream, which needs to match
+		 * the payload size programmed to the first downstream branch
+		 * device's payload table.
+		 *
+		 * Note that atm the payload's PBN value DRM core sends via
+		 * the ALLOCATE_PAYLOAD side-band message matches the payload
+		 * size (which it calculates from the PBN value) it programs
+		 * to the first branch device's payload table. The allocation
+		 * in the payload table could be reduced though (to
+		 * crtc_state->dp_m_n.tu), provided that the driver doesn't
+		 * enable SSC on the corresponding link.
+		 */
+		drm_WARN_ON(&i915->drm, remote_m_n.tu < crtc_state->dp_m_n.tu);
+		crtc_state->dp_m_n.tu = remote_m_n.tu;
+		crtc_state->pbn = remote_m_n.tu * mst_state->pbn_div;
 
 		slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr,
 						      connector->port,
@@ -123,6 +206,8 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 			return slots;
 
 		if (slots >= 0) {
+			drm_WARN_ON(&i915->drm, slots != remote_m_n.tu);
+
 			ret = drm_dp_mst_atomic_check(state);
 			/*
 			 * If we got slots >= 0 and we can fit those based on check
@@ -156,10 +241,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 					    struct drm_connector_state *conn_state,
 					    struct link_config_limits *limits)
 {
-	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->hw.adjusted_mode;
 	int slots = -EINVAL;
-	int link_bpp;
 
 	/*
 	 * FIXME: allocate the BW according to link_bpp, which in the case of
@@ -174,16 +256,6 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	if (slots < 0)
 		return slots;
 
-	link_bpp = intel_dp_output_bpp(crtc_state->output_format, crtc_state->pipe_bpp);
-
-	intel_link_compute_m_n(link_bpp,
-			       crtc_state->lane_count,
-			       adjusted_mode->crtc_clock,
-			       crtc_state->port_clock,
-			       intel_dp_bw_fec_overhead(crtc_state->fec_enable),
-			       &crtc_state->dp_m_n);
-	crtc_state->dp_m_n.tu = slots;
-
 	return 0;
 }
 
@@ -195,8 +267,6 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
-	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->hw.adjusted_mode;
 	int slots = -EINVAL;
 	int i, num_bpc;
 	u8 dsc_bpc[3] = {};
@@ -271,14 +341,6 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
 			return slots;
 	}
 
-	intel_link_compute_m_n(crtc_state->dsc.compressed_bpp,
-			       crtc_state->lane_count,
-			       adjusted_mode->crtc_clock,
-			       crtc_state->port_clock,
-			       intel_dp_bw_fec_overhead(crtc_state->fec_enable),
-			       &crtc_state->dp_m_n);
-	crtc_state->dp_m_n.tu = slots;
-
 	return 0;
 }
 static int intel_dp_mst_update_slots(struct intel_encoder *encoder,
@@ -987,6 +1049,18 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	if (ret)
 		return ret;
 
+	/*
+	 * TODO:
+	 * - Also check if compression would allow for the mode
+	 * - Calculate the overhead using drm_dp_bw_overhead() /
+	 *   drm_dp_bw_channel_coding_efficiency(), similarly to the
+	 *   compute config code, as drm_dp_calc_pbn_mode() doesn't
+	 *   account with all the overheads.
+	 * - Check here and during compute config the BW reported by
+	 *   DFP_Link_Available_Payload_Bandwidth_Number (or the
+	 *   corresponding link capabilities of the sink) in case the
+	 *   stream is uncompressed for it by the last branch device.
+	 */
 	if (mode_rate > max_rate || mode->clock > max_dotclk ||
 	    drm_dp_calc_pbn_mode(mode->clock, min_bpp << 4) > port->full_pbn) {
 		*status = MODE_CLOCK_HIGH;
-- 
GitLab


From 5d78cd80efdd4ac221a0ccd884082280ddef6128 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:09 +0300
Subject: [PATCH 0167/2340] drm/i915/dp_mst: Add atomic state for all streams
 on pre-tgl platforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If an MST stream is modeset, its state must be checked along all the
other streams on the same MST link, for instance to resolve a BW
overallocation of a non-sink MST port or to make sure that the FEC is
enabled/disabled the same way for all these streams.

To prepare for that this patch adds all the stream CRTCs to the atomic
state and marks them for modeset similarly to tgl+ platforms. (If the
state computation doesn't change the state the CRTC is switched back to
fastset mode.)

So far on tgl+ this was required because all streams in the topology
shared the master transcoder. For older platforms this didn't apply but
adding all the state is required now on all platforms based on the
above.

v2:
- Add code and commit log comment clarifying the requirements on old/new
  platforms. (Stan)
- Rename the function based on the new semantic. (Ville)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-15-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 9d6b76d1a52a7..c9faac174aa2c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -563,19 +563,23 @@ static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder,
  * that shares the same MST stream as mode changed,
  * intel_modeset_pipe_config()+intel_crtc_check_fastset() will take care to do
  * a fastset when possible.
+ *
+ * On TGL+ this is required since each stream go through a master transcoder,
+ * so if the master transcoder needs modeset, all other streams in the
+ * topology need a modeset. All platforms need to add the atomic state
+ * for all streams in the topology, since a modeset on one may require
+ * changing the MST link BW usage of the others, which in turn needs a
+ * recomputation of the corresponding CRTC states.
  */
 static int
-intel_dp_mst_atomic_master_trans_check(struct intel_connector *connector,
-				       struct intel_atomic_state *state)
+intel_dp_mst_atomic_topology_check(struct intel_connector *connector,
+				   struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct drm_connector_list_iter connector_list_iter;
 	struct intel_connector *connector_iter;
 	int ret = 0;
 
-	if (DISPLAY_VER(dev_priv) < 12)
-		return  0;
-
 	if (!intel_connector_needs_modeset(state, &connector->base))
 		return 0;
 
@@ -629,7 +633,7 @@ intel_dp_mst_atomic_check(struct drm_connector *connector,
 	if (ret)
 		return ret;
 
-	ret = intel_dp_mst_atomic_master_trans_check(intel_connector, state);
+	ret = intel_dp_mst_atomic_topology_check(intel_connector, state);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 53f468aa90091d3a75ff17b1c2f4874a9b862b38 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:10 +0300
Subject: [PATCH 0168/2340] drm/i915/dp_mst: Program the DSC PPS SDP for each
 stream

Atm the DSC PPS SDP is programmed only if the first stream is compressed
and then it's programmed only for the first stream. This left all other
compressed streams blank. Program the SDP for all streams.

v2:
- Rebase on upstream include "intel_vdsc.h" change.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-3-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c    | 12 +++++++-----
 drivers/gpu/drm/i915/display/intel_dp_mst.c |  1 +
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c75fd00e360ac..87fa7f7369254 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2505,7 +2505,8 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 	/* 6.o Configure and enable FEC if needed */
 	intel_ddi_enable_fec(encoder, crtc_state);
 
-	intel_dsc_dp_pps_write(encoder, crtc_state);
+	if (!is_mst)
+		intel_dsc_dp_pps_write(encoder, crtc_state);
 }
 
 static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state,
@@ -2643,7 +2644,8 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 	/* 7.l Configure and enable FEC if needed */
 	intel_ddi_enable_fec(encoder, crtc_state);
 
-	intel_dsc_dp_pps_write(encoder, crtc_state);
+	if (!is_mst)
+		intel_dsc_dp_pps_write(encoder, crtc_state);
 }
 
 static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state,
@@ -2705,10 +2707,10 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state,
 
 	intel_ddi_enable_fec(encoder, crtc_state);
 
-	if (!is_mst)
+	if (!is_mst) {
 		intel_ddi_enable_transcoder_clock(encoder, crtc_state);
-
-	intel_dsc_dp_pps_write(encoder, crtc_state);
+		intel_dsc_dp_pps_write(encoder, crtc_state);
+	}
 }
 
 static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index c9faac174aa2c..5efc3cea73e52 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -860,6 +860,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
 	if (DISPLAY_VER(dev_priv) < 12 || !first_mst_stream)
 		intel_ddi_enable_transcoder_clock(encoder, pipe_config);
 
+	intel_dsc_dp_pps_write(&dig_port->base, pipe_config);
 	intel_ddi_set_dp_msa(pipe_config, conn_state);
 }
 
-- 
GitLab


From aaa80e756e1cd8eb0561d7e244a9937fc23944d2 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:11 +0300
Subject: [PATCH 0169/2340] drm/i915/dp: Make sure the DSC PPS SDP is disabled
 whenever DSC is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Atm the DSC PPS SDP will stay enabled after enabling and disabling DSC.
This leaves an output blank after switching off DSC on it. Make sure the
SDP is disabled for an uncompressed output.

v2:
- Disable the SDP already during output disabling. (Ville)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-17-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c     | 5 ++++-
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 ++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 4dc923bc4c97a..8ee2d89155868 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -4069,7 +4069,10 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder,
 			 VIDEO_DIP_ENABLE_SPD_HSW | VIDEO_DIP_ENABLE_DRM_GLK;
 	u32 val = intel_de_read(dev_priv, reg) & ~dip_enable;
 
-	/* TODO: Add DSC case (DIP_ENABLE_PPS) */
+	/* TODO: Sanitize DSC enabling wrt. intel_dsc_dp_pps_write(). */
+	if (!enable && HAS_DSC(dev_priv))
+		val &= ~VDIP_ENABLE_PPS;
+
 	/* When PSR is enabled, this routine doesn't disable VSC DIP */
 	if (!crtc_state->has_psr)
 		val &= ~VIDEO_DIP_ENABLE_VSC_HSW;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 5efc3cea73e52..afa6d428cb0c6 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -746,9 +746,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
 	 * BSpec 4287: disable DIP after the transcoder is disabled and before
 	 * the transcoder clock select is set to none.
 	 */
-	if (last_mst_stream)
-		intel_dp_set_infoframes(&dig_port->base, false,
-					old_crtc_state, NULL);
+	intel_dp_set_infoframes(&dig_port->base, false,
+				old_crtc_state, NULL);
 	/*
 	 * From TGL spec: "If multi-stream slave transcoder: Configure
 	 * Transcoder Clock Select to direct no clock to the transcoder"
-- 
GitLab


From b40887f8c8a874acad4158adfa2182b73db1fb31 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:12 +0300
Subject: [PATCH 0170/2340] drm/i915/dp_mst: Add missing DSC compression
 disabling

Add the missing DSC compression disabling step for MST streams,
similarly to how this is done for SST outputs.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-18-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index afa6d428cb0c6..5214cdb049b30 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -730,6 +730,8 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
 
 	intel_ddi_disable_transcoder_func(old_crtc_state);
 
+	intel_dsc_disable(old_crtc_state);
+
 	if (DISPLAY_VER(dev_priv) >= 9)
 		skl_scaler_disable(old_crtc_state);
 	else
-- 
GitLab


From 0cfdf662d4ef71569c8b9a628defd51586e102c3 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:13 +0300
Subject: [PATCH 0171/2340] drm/i915/dp: Rename intel_ddi_disable_fec_state()
 to intel_ddi_disable_fec()

Rename intel_ddi_disable_fec_state() to intel_ddi_disable_fec(), for
symmetry with intel_ddi_enable_fec().

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-19-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 87fa7f7369254..c6cdac30bb4b6 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2234,8 +2234,8 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder,
 		     0, DP_TP_CTL_FEC_ENABLE);
 }
 
-static void intel_ddi_disable_fec_state(struct intel_encoder *encoder,
-					const struct intel_crtc_state *crtc_state)
+static void intel_ddi_disable_fec(struct intel_encoder *encoder,
+				  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
@@ -2868,8 +2868,7 @@ static void disable_ddi_buf(struct intel_encoder *encoder,
 		intel_de_rmw(dev_priv, dp_tp_ctl_reg(encoder, crtc_state),
 			     DP_TP_CTL_ENABLE, 0);
 
-	/* Disable FEC in DP Sink */
-	intel_ddi_disable_fec_state(encoder, crtc_state);
+	intel_ddi_disable_fec(encoder, crtc_state);
 
 	if (wait)
 		intel_wait_ddi_buf_idle(dev_priv, port);
@@ -2884,7 +2883,7 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder,
 		mtl_disable_ddi_buf(encoder, crtc_state);
 
 		/* 3.f Disable DP_TP_CTL FEC Enable if it is needed */
-		intel_ddi_disable_fec_state(encoder, crtc_state);
+		intel_ddi_disable_fec(encoder, crtc_state);
 	} else {
 		disable_ddi_buf(encoder, crtc_state);
 	}
-- 
GitLab


From 6e916b35afa8a3729b254cdd839fa12618e8591f Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:14 +0300
Subject: [PATCH 0172/2340] drm/i915/dp: Wait for FEC detected status in the
 sink

As required by the DP standard wait for the sink to detect the FEC
decode enabling symbol sent by the source.

There is a difference between SST and MST when the source enables
the FEC encoding: on SST this happens only after enabling the
transcoder, whereas on MST it happens already after enabling the
transcoder function (before enabling the transcoder). Wait for the
detected status at the earliest spot accordingly.

v2:
- Wait for the FEC detected status on SST after the transcoder is
  enabled.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-20-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c    | 73 +++++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_ddi.h    |  3 +
 drivers/gpu/drm/i915/display/intel_dp_mst.c |  4 ++
 3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c6cdac30bb4b6..c2ca459d9ca14 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -25,6 +25,7 @@
  *
  */
 
+#include <linux/iopoll.h>
 #include <linux/string_helpers.h>
 
 #include <drm/display/drm_scdc_helper.h>
@@ -2220,6 +2221,74 @@ static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp,
 	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_CONFIGURATION, DP_FEC_READY) <= 0)
 		drm_dbg_kms(&i915->drm,
 			    "Failed to set FEC_READY in the sink\n");
+
+	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_STATUS,
+			       DP_FEC_DECODE_EN_DETECTED | DP_FEC_DECODE_DIS_DETECTED) <= 0)
+		drm_dbg_kms(&i915->drm, "Failed to clear FEC detected flags\n");
+}
+
+static int read_fec_detected_status(struct drm_dp_aux *aux)
+{
+	int ret;
+	u8 status;
+
+	ret = drm_dp_dpcd_readb(aux, DP_FEC_STATUS, &status);
+	if (ret < 0)
+		return ret;
+
+	return status;
+}
+
+static void wait_for_fec_detected(struct drm_dp_aux *aux, bool enabled)
+{
+	struct drm_i915_private *i915 = to_i915(aux->drm_dev);
+	int mask = enabled ? DP_FEC_DECODE_EN_DETECTED : DP_FEC_DECODE_DIS_DETECTED;
+	int status;
+	int err;
+
+	err = readx_poll_timeout(read_fec_detected_status, aux, status,
+				 status & mask || status < 0,
+				 10000, 200000);
+
+	if (!err && status >= 0)
+		return;
+
+	if (err == -ETIMEDOUT)
+		drm_err(&i915->drm, "Timeout waiting for FEC %s to get detected\n",
+			str_enabled_disabled(enabled));
+	else
+		drm_dbg_kms(&i915->drm, "FEC detected status read error: %d\n", status);
+}
+
+void intel_ddi_wait_for_fec_status(struct intel_encoder *encoder,
+				   const struct intel_crtc_state *crtc_state,
+				   bool enabled)
+{
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	int ret;
+
+	if (!crtc_state->fec_enable)
+		return;
+
+	if (enabled)
+		ret = intel_de_wait_for_set(i915, dp_tp_status_reg(encoder, crtc_state),
+					    DP_TP_STATUS_FEC_ENABLE_LIVE, 1);
+	else
+		ret = intel_de_wait_for_clear(i915, dp_tp_status_reg(encoder, crtc_state),
+					      DP_TP_STATUS_FEC_ENABLE_LIVE, 1);
+
+	if (ret)
+		drm_err(&i915->drm,
+			"Timeout waiting for FEC live state to get %s\n",
+			str_enabled_disabled(enabled));
+
+	/*
+	 * At least the Synoptics MST hub doesn't set the detected flag for
+	 * FEC decoding disabling so skip waiting for that.
+	 */
+	if (enabled)
+		wait_for_fec_detected(&intel_dp->aux, enabled);
 }
 
 static void intel_ddi_enable_fec(struct intel_encoder *encoder,
@@ -2887,6 +2956,8 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder,
 	} else {
 		disable_ddi_buf(encoder, crtc_state);
 	}
+
+	intel_ddi_wait_for_fec_status(encoder, crtc_state, false);
 }
 
 static void intel_ddi_post_disable_dp(struct intel_atomic_state *state,
@@ -3262,6 +3333,8 @@ static void intel_enable_ddi(struct intel_atomic_state *state,
 
 	intel_enable_transcoder(crtc_state);
 
+	intel_ddi_wait_for_fec_status(encoder, crtc_state, true);
+
 	intel_crtc_vblank_on(crtc_state);
 
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI))
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index db845f2aca0c6..63853a1f6582c 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -62,6 +62,9 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state
 void intel_ddi_enable_transcoder_clock(struct intel_encoder *encoder,
 				       const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_transcoder_clock(const  struct intel_crtc_state *crtc_state);
+void intel_ddi_wait_for_fec_status(struct intel_encoder *encoder,
+				   const struct intel_crtc_state *crtc_state,
+				   bool enabled);
 void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
 			  const struct drm_connector_state *conn_state);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 5214cdb049b30..4d0827a3cd9b1 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -878,6 +878,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 	struct drm_dp_mst_topology_state *mst_state =
 		drm_atomic_get_new_mst_topology_state(&state->base, &intel_dp->mst_mgr);
 	enum transcoder trans = pipe_config->cpu_transcoder;
+	bool first_mst_stream = intel_dp->active_mst_links == 1;
 
 	drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder);
 
@@ -904,6 +905,9 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 
 	wait_for_act_sent(encoder, pipe_config);
 
+	if (first_mst_stream)
+		intel_ddi_wait_for_fec_status(encoder, pipe_config, true);
+
 	drm_dp_add_payload_part2(&intel_dp->mst_mgr, &state->base,
 				 drm_atomic_get_mst_payload_state(mst_state, connector->port));
 
-- 
GitLab


From 8ab5a03643fc529f0e8663bc4d5b43f8f6885922 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:15 +0300
Subject: [PATCH 0173/2340] drm/i915/dp: Disable FEC ready flag in the sink

Disable the FEC ready flag in the sink during a disabling modeset.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-21-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c2ca459d9ca14..1abf74af6c5ce 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2211,18 +2211,21 @@ static void intel_dp_sink_set_msa_timing_par_ignore_state(struct intel_dp *intel
 }
 
 static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp,
-					const struct intel_crtc_state *crtc_state)
+					const struct intel_crtc_state *crtc_state,
+					bool enable)
 {
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	if (!crtc_state->fec_enable)
 		return;
 
-	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_CONFIGURATION, DP_FEC_READY) <= 0)
-		drm_dbg_kms(&i915->drm,
-			    "Failed to set FEC_READY in the sink\n");
+	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_CONFIGURATION,
+			       enable ? DP_FEC_READY : 0) <= 0)
+		drm_dbg_kms(&i915->drm, "Failed to set FEC_READY to %s in the sink\n",
+			    enable ? "enabled" : "disabled");
 
-	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_STATUS,
+	if (enable &&
+	    drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_STATUS,
 			       DP_FEC_DECODE_EN_DETECTED | DP_FEC_DECODE_DIS_DETECTED) <= 0)
 		drm_dbg_kms(&i915->drm, "Failed to clear FEC detected flags\n");
 }
@@ -2541,7 +2544,7 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 	 * in the FEC_CONFIGURATION register to 1 before initiating link
 	 * training
 	 */
-	intel_dp_sink_set_fec_ready(intel_dp, crtc_state);
+	intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true);
 
 	intel_dp_check_frl_training(intel_dp);
 	intel_dp_pcon_dsc_configure(intel_dp, crtc_state);
@@ -2692,7 +2695,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 	 * in the FEC_CONFIGURATION register to 1 before initiating link
 	 * training
 	 */
-	intel_dp_sink_set_fec_ready(intel_dp, crtc_state);
+	intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true);
 
 	intel_dp_check_frl_training(intel_dp);
 	intel_dp_pcon_dsc_configure(intel_dp, crtc_state);
@@ -2768,7 +2771,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state,
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
 	intel_dp_sink_set_decompression_state(intel_dp, crtc_state,
 					      true);
-	intel_dp_sink_set_fec_ready(intel_dp, crtc_state);
+	intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true);
 	intel_dp_start_link_train(intel_dp, crtc_state);
 	if ((port != PORT_A || DISPLAY_VER(dev_priv) >= 9) &&
 	    !is_trans_port_sync_mode(crtc_state))
@@ -2997,6 +3000,8 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state,
 
 	intel_disable_ddi_buf(encoder, old_crtc_state);
 
+	intel_dp_sink_set_fec_ready(intel_dp, old_crtc_state, false);
+
 	/*
 	 * From TGL spec: "If single stream or multi-stream master transcoder:
 	 * Configure Transcoder Clock select to direct no clock to the
-- 
GitLab


From 55eaef164174480df6827edeac15620f3cbcd52b Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:16 +0300
Subject: [PATCH 0174/2340] drm/i915/dp_mst: Handle the Synaptics HBlank
 expansion quirk

The Synaptics MST hubs expose some sink EDID modes with a reduced HBLANK
period, presumedly to save BW, which the hub expands before forwarding
the stream to the sink. In particular a 4k mode with a standard CVT
HBLANK period is exposed with either a CVT reduced blank RBv1,v2 (80,
160 pixel) or a non-CVT 56 pixel HBLANK period. The DP standard
describes the above HBLANK expansion functionality, but it requires
enabling this explicitly, whereas these hubs apply the expansion
transparently.

In some TBT docks with such a Synaptics hub (DELL WD22TB4) the above modes
will work okay until DSC decompression is enabled in the hub for the given
sink, but after this the same mode will not work reliably in decompressed
mode. In another TBT dock (Thinkpad 40B0) the above modes will not work
in uncompressed/18bpp mode (regardless of whether DSC decompression was
enabled before or not).

As a workaround force enable DSC for such modes.  Apply the WA when the
HBLANK period is 300ns or below, matching the above tested modes with a
533.25MHz dotclock and maximum 160 HBLANK pixels.

OTOH DSC for these modes will only work above a certain compressed bpp
threshold which depends on the link rate, so apply this limit as well
in the workaround.

On platforms, pipe/port configurations where DSC is not supported, for
instance on ICL where DSC/MST is still work-in-progress, limit the
minimum link bpp to 24.

Apply the workaround only for Synaptics hubs which support the HBLANK
expansion.

v2:
- Apply the WA whenever the HBLANK period is 300ns or below.
v3:
- Clarify in the commit log the failure modes of the different docks.
- Handle platforms/pipe/port configurations without DSC support.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-4-imre.deak@intel.com
---
 .../drm/i915/display/intel_display_types.h    |   2 +
 drivers/gpu/drm/i915/display/intel_dp_mst.c   | 133 +++++++++++++++++-
 2 files changed, 131 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 92f06d67fd1e7..1ef12f0d571f7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -624,6 +624,8 @@ struct intel_connector {
 		struct drm_dp_aux *dsc_decompression_aux;
 		u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE];
 		u8 fec_capability;
+
+		u8 dsc_hblank_expansion_quirk:1;
 	} dp;
 
 	/* Work struct to schedule a uevent on link train failure */
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 4d0827a3cd9b1..4f066a868b9a2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -378,8 +378,91 @@ intel_dp_mst_dsc_source_support(const struct intel_crtc_state *crtc_state)
 	return DISPLAY_VER(i915) >= 12 && intel_dsc_source_support(crtc_state);
 }
 
+static int mode_hblank_period_ns(const struct drm_display_mode *mode)
+{
+	return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(mode->htotal - mode->hdisplay,
+						 NSEC_PER_SEC / 1000),
+				     mode->crtc_clock);
+}
+
+static bool
+hblank_expansion_quirk_needs_dsc(const struct intel_connector *connector,
+				 const struct intel_crtc_state *crtc_state)
+{
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+
+	if (!connector->dp.dsc_hblank_expansion_quirk)
+		return false;
+
+	if (mode_hblank_period_ns(adjusted_mode) > 300)
+		return false;
+
+	return true;
+}
+
+static bool
+adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *connector,
+					     const struct intel_crtc_state *crtc_state,
+					     struct link_config_limits *limits,
+					     bool dsc)
+{
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	const struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	int min_bpp_x16 = limits->link.min_bpp_x16;
+
+	if (!hblank_expansion_quirk_needs_dsc(connector, crtc_state))
+		return true;
+
+	if (!dsc) {
+		if (intel_dp_mst_dsc_source_support(crtc_state)) {
+			drm_dbg_kms(&i915->drm,
+				    "[CRTC:%d:%s][CONNECTOR:%d:%s] DSC needed by hblank expansion quirk\n",
+				    crtc->base.base.id, crtc->base.name,
+				    connector->base.base.id, connector->base.name);
+			return false;
+		}
+
+		drm_dbg_kms(&i915->drm,
+			    "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to 24 due to hblank expansion quirk\n",
+			    crtc->base.base.id, crtc->base.name,
+			    connector->base.base.id, connector->base.name);
+
+		if (limits->link.max_bpp_x16 < to_bpp_x16(24))
+			return false;
+
+		limits->link.min_bpp_x16 = to_bpp_x16(24);
+
+		return true;
+	}
+
+	drm_WARN_ON(&i915->drm, limits->min_rate != limits->max_rate);
+
+	if (limits->max_rate < 540000)
+		min_bpp_x16 = to_bpp_x16(13);
+	else if (limits->max_rate < 810000)
+		min_bpp_x16 = to_bpp_x16(10);
+
+	if (limits->link.min_bpp_x16 >= min_bpp_x16)
+		return true;
+
+	drm_dbg_kms(&i915->drm,
+		    "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " BPP_X16_FMT " in DSC mode due to hblank expansion quirk\n",
+		    crtc->base.base.id, crtc->base.name,
+		    connector->base.base.id, connector->base.name,
+		    BPP_X16_ARGS(min_bpp_x16));
+
+	if (limits->link.max_bpp_x16 < min_bpp_x16)
+		return false;
+
+	limits->link.min_bpp_x16 = min_bpp_x16;
+
+	return true;
+}
+
 static bool
 intel_dp_mst_compute_config_limits(struct intel_dp *intel_dp,
+				   const struct intel_connector *connector,
 				   struct intel_crtc_state *crtc_state,
 				   bool dsc,
 				   struct link_config_limits *limits)
@@ -407,10 +490,16 @@ intel_dp_mst_compute_config_limits(struct intel_dp *intel_dp,
 
 	intel_dp_adjust_compliance_config(intel_dp, crtc_state, limits);
 
-	return intel_dp_compute_config_link_bpp_limits(intel_dp,
-						       crtc_state,
-						       dsc,
-						       limits);
+	if (!intel_dp_compute_config_link_bpp_limits(intel_dp,
+						     crtc_state,
+						     dsc,
+						     limits))
+		return false;
+
+	return adjust_limits_for_dsc_hblank_expansion_quirk(connector,
+							    crtc_state,
+							    limits,
+							    dsc);
 }
 
 static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
@@ -420,6 +509,8 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
+	const struct intel_connector *connector =
+		to_intel_connector(conn_state->connector);
 	const struct drm_display_mode *adjusted_mode =
 		&pipe_config->hw.adjusted_mode;
 	struct link_config_limits limits;
@@ -435,6 +526,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	dsc_needed = intel_dp->force_dsc_en ||
 		     !intel_dp_mst_compute_config_limits(intel_dp,
+							 connector,
 							 pipe_config,
 							 false,
 							 &limits);
@@ -460,6 +552,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 			return -EINVAL;
 
 		if (!intel_dp_mst_compute_config_limits(intel_dp,
+							connector,
 							pipe_config,
 							true,
 							&limits))
@@ -1236,6 +1329,36 @@ intel_dp_mst_read_decompression_port_dsc_caps(struct intel_dp *intel_dp,
 	intel_dp_get_dsc_sink_cap(dpcd_caps[DP_DPCD_REV], connector);
 }
 
+static bool detect_dsc_hblank_expansion_quirk(const struct intel_connector *connector)
+{
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct drm_dp_desc desc;
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
+
+	if (!connector->dp.dsc_decompression_aux)
+		return false;
+
+	if (drm_dp_read_desc(connector->dp.dsc_decompression_aux,
+			     &desc, true) < 0)
+		return false;
+
+	if (!drm_dp_has_quirk(&desc,
+			      DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC))
+		return false;
+
+	if (drm_dp_read_dpcd_caps(connector->dp.dsc_decompression_aux, dpcd) < 0)
+		return false;
+
+	if (!(dpcd[DP_RECEIVE_PORT_0_CAP_0] & DP_HBLANK_EXPANSION_CAPABLE))
+		return false;
+
+	drm_dbg_kms(&i915->drm,
+		    "[CONNECTOR:%d:%s] DSC HBLANK expansion quirk detected\n",
+		    connector->base.base.id, connector->base.name);
+
+	return true;
+}
+
 static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 							struct drm_dp_mst_port *port,
 							const char *pathprop)
@@ -1265,6 +1388,8 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 	 */
 	intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
 	intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
+	intel_connector->dp.dsc_hblank_expansion_quirk =
+		detect_dsc_hblank_expansion_quirk(intel_connector);
 
 	connector = &intel_connector->base;
 	ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
-- 
GitLab


From 503611c8a08ab660c718c295d26180e585058d95 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:17 +0300
Subject: [PATCH 0175/2340] drm/i915/dp_mst: Enable decompression in the sink
 from the MST encoder hooks

Enable/disable the DSC decompression in the sink/branch from the MST
encoder hooks. This prepares for an upcoming patch toggling DSC for each
stream as needed, but for now keeps the current behavior, as DSC is only
enabled for the first MST stream.

v2:
- Rebased on latest drm-tip.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-5-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c    | 12 ++++++++----
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 15 ++++++++++++++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 1abf74af6c5ce..cc2a38fc22d0d 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2538,7 +2538,9 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 		intel_dp_set_power(intel_dp, DP_SET_POWER_D0);
 
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
-	intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
+	if (!is_mst)
+		intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
+
 	/*
 	 * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit
 	 * in the FEC_CONFIGURATION register to 1 before initiating link
@@ -2689,7 +2691,8 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 		intel_dp_set_power(intel_dp, DP_SET_POWER_D0);
 
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
-	intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
+	if (!is_mst)
+		intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
 	/*
 	 * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit
 	 * in the FEC_CONFIGURATION register to 1 before initiating link
@@ -2769,8 +2772,9 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state,
 	if (!is_mst)
 		intel_dp_set_power(intel_dp, DP_SET_POWER_D0);
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
-	intel_dp_sink_set_decompression_state(intel_dp, crtc_state,
-					      true);
+	if (!is_mst)
+		intel_dp_sink_set_decompression_state(intel_dp, crtc_state,
+						      true);
 	intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true);
 	intel_dp_start_link_train(intel_dp, crtc_state);
 	if ((port != PORT_A || DISPLAY_VER(dev_priv) >= 9) &&
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 4f066a868b9a2..eab69f57655d9 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -776,6 +776,13 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 	intel_hdcp_disable(intel_mst->connector);
 
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+
+	if (intel_dp->active_mst_links == 1) /* last stream ? */
+		/*
+		 * TODO: disable decompression for all streams/in any MST ports, not
+		 * only in the first downstream branch device.
+		 */
+		intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state, false);
 }
 
 static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
@@ -932,9 +939,15 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
 
 	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true);
 
-	if (first_mst_stream)
+	if (first_mst_stream) {
+		/*
+		 * TODO: enable decompression for all streams/in any MST ports, not
+		 * only in the first downstream branch device.
+		 */
+		intel_dp_sink_set_decompression_state(intel_dp, pipe_config, true);
 		dig_port->base.pre_enable(state, &dig_port->base,
 						pipe_config, NULL);
+	}
 
 	intel_dp->active_mst_links++;
 
-- 
GitLab


From 7c4631ff6233043b71b68c80f3b9f35510cdda33 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 13:22:18 +0300
Subject: [PATCH 0176/2340] drm/i915/dp: Enable DSC via the connector
 decompression AUX

Enable DSC using the DSC AUX device stored for this purpose in the
connector. For clarity add separate functions to enable/disable the
decompression, since these sequences will diverge more in follow-up
patches that also enable/disable DSC passthrough and on MST do the
actual enabling/disabling only for the first/last user of the given
AUX device. As a preparation for the latter refcounting change, also
pass the atomic state to the functions.

While at it set/clear only the DP_DECOMPRESSION_EN flag in the
DP_DSC_ENABLE DPCD register, preserving the reserved register bits.

Besides preserving the reserved register bits, the behavior stays as
before, as DSC is still only enabled for the first MST stream (which a
follow-up patch changes, enabling it for all streams).

v2:
- Add a helper function setting/clearing the decompression flag,
  preserving the reserved register bits.
v3:
- Add separate functions to enable/disable decompression and pass the
  atomic state to these.
- Add DocBook for both functions.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v2)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-24-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c    | 19 +++--
 drivers/gpu/drm/i915/display/intel_dp.c     | 90 ++++++++++++++++++---
 drivers/gpu/drm/i915/display/intel_dp.h     |  9 ++-
 drivers/gpu/drm/i915/display/intel_dp_mst.c |  4 +-
 4 files changed, 101 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index cc2a38fc22d0d..6cec72ac6422e 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2539,7 +2539,9 @@ static void mtl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
 	if (!is_mst)
-		intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
+		intel_dp_sink_enable_decompression(state,
+						   to_intel_connector(conn_state->connector),
+						   crtc_state);
 
 	/*
 	 * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit
@@ -2692,7 +2694,9 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state,
 
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
 	if (!is_mst)
-		intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true);
+		intel_dp_sink_enable_decompression(state,
+						   to_intel_connector(conn_state->connector),
+						   crtc_state);
 	/*
 	 * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit
 	 * in the FEC_CONFIGURATION register to 1 before initiating link
@@ -2773,8 +2777,9 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state,
 		intel_dp_set_power(intel_dp, DP_SET_POWER_D0);
 	intel_dp_configure_protocol_converter(intel_dp, crtc_state);
 	if (!is_mst)
-		intel_dp_sink_set_decompression_state(intel_dp, crtc_state,
-						      true);
+		intel_dp_sink_enable_decompression(state,
+						   to_intel_connector(conn_state->connector),
+						   crtc_state);
 	intel_dp_sink_set_fec_ready(intel_dp, crtc_state, true);
 	intel_dp_start_link_train(intel_dp, crtc_state);
 	if ((port != PORT_A || DISPLAY_VER(dev_priv) >= 9) &&
@@ -3360,6 +3365,8 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state,
 				 const struct drm_connector_state *old_conn_state)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_connector *connector =
+		to_intel_connector(old_conn_state->connector);
 
 	intel_dp->link_trained = false;
 
@@ -3368,8 +3375,8 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state,
 	intel_psr_disable(intel_dp, old_crtc_state);
 	intel_edp_backlight_off(old_conn_state);
 	/* Disable the decompression in DP Sink */
-	intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state,
-					      false);
+	intel_dp_sink_disable_decompression(state,
+					    connector, old_crtc_state);
 	/* Disable Ignore_MSA bit in DP Sink */
 	intel_dp_sink_set_msa_timing_par_ignore_state(intel_dp, old_crtc_state,
 						      false);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 8ee2d89155868..7d098a8c994c2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2937,24 +2937,94 @@ static bool downstream_hpd_needs_d0(struct intel_dp *intel_dp)
 		intel_dp->downstream_ports[0] & DP_DS_PORT_HPD;
 }
 
-void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp,
-					   const struct intel_crtc_state *crtc_state,
-					   bool enable)
+static int
+write_dsc_decompression_flag(struct drm_dp_aux *aux, u8 flag, bool set)
 {
-	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
-	int ret;
+	int err;
+	u8 val;
 
-	if (!crtc_state->dsc.compression_enable)
-		return;
+	err = drm_dp_dpcd_readb(aux, DP_DSC_ENABLE, &val);
+	if (err < 0)
+		return err;
 
-	ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE,
-				 enable ? DP_DECOMPRESSION_EN : 0);
-	if (ret < 0)
+	if (set)
+		val |= flag;
+	else
+		val &= ~flag;
+
+	return drm_dp_dpcd_writeb(aux, DP_DSC_ENABLE, val);
+}
+
+static void
+intel_dp_sink_set_dsc_decompression(struct intel_connector *connector,
+				    bool enable)
+{
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+
+	if (write_dsc_decompression_flag(connector->dp.dsc_decompression_aux,
+					 DP_DECOMPRESSION_EN, enable) < 0)
 		drm_dbg_kms(&i915->drm,
 			    "Failed to %s sink decompression state\n",
 			    str_enable_disable(enable));
 }
 
+/**
+ * intel_dp_sink_enable_decompression - Enable DSC decompression in sink/last branch device
+ * @state: atomic state
+ * @connector: connector to enable the decompression for
+ * @new_crtc_state: new state for the CRTC driving @connector
+ *
+ * Enable the DSC decompression if required in the %DP_DSC_ENABLE DPCD
+ * register of the appropriate sink/branch device. On SST this is always the
+ * sink device, whereas on MST based on each device's DSC capabilities it's
+ * either the last branch device (enabling decompression in it) or both the
+ * last branch device (enabling passthrough in it) and the sink device
+ * (enabling decompression in it).
+ */
+void intel_dp_sink_enable_decompression(struct intel_atomic_state *state,
+					struct intel_connector *connector,
+					const struct intel_crtc_state *new_crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+
+	if (!new_crtc_state->dsc.compression_enable)
+		return;
+
+	if (drm_WARN_ON(&i915->drm,
+			!connector->dp.dsc_decompression_aux))
+		return;
+
+	/* TODO: Enable passthrough in the MST last branch device if needed. */
+	intel_dp_sink_set_dsc_decompression(connector, true);
+}
+
+/**
+ * intel_dp_sink_disable_decompression - Disable DSC decompression in sink/last branch device
+ * @state: atomic state
+ * @connector: connector to disable the decompression for
+ * @old_crtc_state: old state for the CRTC driving @connector
+ *
+ * Disable the DSC decompression if required in the %DP_DSC_ENABLE DPCD
+ * register of the appropriate sink/branch device, corresponding to the
+ * sequence in intel_dp_sink_enable_decompression().
+ */
+void intel_dp_sink_disable_decompression(struct intel_atomic_state *state,
+					 struct intel_connector *connector,
+					 const struct intel_crtc_state *old_crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+
+	if (!old_crtc_state->dsc.compression_enable)
+		return;
+
+	if (drm_WARN_ON(&i915->drm,
+			!connector->dp.dsc_decompression_aux))
+		return;
+
+	intel_dp_sink_set_dsc_decompression(connector, false);
+	/* TODO: Disable passthrough in the MST last branch device if needed. */
+}
+
 static void
 intel_edp_init_source_oui(struct intel_dp *intel_dp, bool careful)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 2080575fef69a..7cc23d846dfbf 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -57,9 +57,12 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode);
 void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp,
 					   const struct intel_crtc_state *crtc_state);
-void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp,
-					   const struct intel_crtc_state *crtc_state,
-					   bool enable);
+void intel_dp_sink_enable_decompression(struct intel_atomic_state *state,
+					struct intel_connector *connector,
+					const struct intel_crtc_state *new_crtc_state);
+void intel_dp_sink_disable_decompression(struct intel_atomic_state *state,
+					 struct intel_connector *connector,
+					 const struct intel_crtc_state *old_crtc_state);
 void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
 void intel_dp_encoder_shutdown(struct intel_encoder *intel_encoder);
 void intel_dp_encoder_flush_work(struct drm_encoder *encoder);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index eab69f57655d9..bc992e77ffc7a 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -782,7 +782,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 		 * TODO: disable decompression for all streams/in any MST ports, not
 		 * only in the first downstream branch device.
 		 */
-		intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state, false);
+		intel_dp_sink_disable_decompression(state, connector, old_crtc_state);
 }
 
 static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
@@ -944,7 +944,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
 		 * TODO: enable decompression for all streams/in any MST ports, not
 		 * only in the first downstream branch device.
 		 */
-		intel_dp_sink_set_decompression_state(intel_dp, pipe_config, true);
+		intel_dp_sink_enable_decompression(state, connector, pipe_config);
 		dig_port->base.pre_enable(state, &dig_port->base,
 						pipe_config, NULL);
 	}
-- 
GitLab


From 751dbac1a0235ea7303e5e76fade2762e8298907 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 13:22:19 +0300
Subject: [PATCH 0177/2340] drm/i915/dp_mst: Enable DSC passthrough
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable passing through DSC streams to the sink in last branch devices.

v2:
- Fix the DPCD register address while setting/clearing the passthrough
  flag.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-25-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 7d098a8c994c2..24a3c0c276350 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2968,6 +2968,24 @@ intel_dp_sink_set_dsc_decompression(struct intel_connector *connector,
 			    str_enable_disable(enable));
 }
 
+static void
+intel_dp_sink_set_dsc_passthrough(const struct intel_connector *connector,
+				  bool enable)
+{
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct drm_dp_aux *aux = connector->port ?
+				 connector->port->passthrough_aux : NULL;
+
+	if (!aux)
+		return;
+
+	if (write_dsc_decompression_flag(aux,
+					 DP_DSC_PASSTHROUGH_EN, enable) < 0)
+		drm_dbg_kms(&i915->drm,
+			    "Failed to %s sink compression passthrough state\n",
+			    str_enable_disable(enable));
+}
+
 /**
  * intel_dp_sink_enable_decompression - Enable DSC decompression in sink/last branch device
  * @state: atomic state
@@ -2994,7 +3012,7 @@ void intel_dp_sink_enable_decompression(struct intel_atomic_state *state,
 			!connector->dp.dsc_decompression_aux))
 		return;
 
-	/* TODO: Enable passthrough in the MST last branch device if needed. */
+	intel_dp_sink_set_dsc_passthrough(connector, true);
 	intel_dp_sink_set_dsc_decompression(connector, true);
 }
 
@@ -3022,7 +3040,7 @@ void intel_dp_sink_disable_decompression(struct intel_atomic_state *state,
 		return;
 
 	intel_dp_sink_set_dsc_decompression(connector, false);
-	/* TODO: Disable passthrough in the MST last branch device if needed. */
+	intel_dp_sink_set_dsc_passthrough(connector, false);
 }
 
 static void
-- 
GitLab


From b2608c6b3212e4258379c161d8657c526bda902c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:20 +0300
Subject: [PATCH 0178/2340] drm/i915/dp_mst: Enable MST DSC decompression for
 all streams

Enable DSC decompression for all streams. In particular atm if a sink is
connected to a last branch device that is downstream of the first branch
device connected to the source, decompression is not enabled for it.
Similarly it's not enabled if the sink supports this with the last
branch device passing through the compressed stream to it.

Enable DSC in the above cases as well. Since last branch devices may
handle the decompression for multiple ports, toggling DSC needs to be
refcounted, add this using the DSC AUX device as a reference.

v2:
- Fix refcounting, setting/clearing
  connector->dp.dsc_decompression_enabled always as needed. (Stan)
- Make the refcounting more uniform for the SST vs. MST case.
- Add state checks for connector->dp.dsc_decompression_enabled and
  connector crtc.
- Sanitize connector DSC decompression state during HW setup.
- s/use_count/ref_count/
v3:
- Remove stale TODO: comment to set the actual decompression_aux.

Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-6-imre.deak@intel.com
---
 .../drm/i915/display/intel_display_types.h    |  1 +
 drivers/gpu/drm/i915/display/intel_dp.c       | 72 ++++++++++++++++++-
 drivers/gpu/drm/i915/display/intel_dp_mst.c   | 24 ++-----
 .../drm/i915/display/intel_modeset_setup.c    |  6 ++
 4 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 1ef12f0d571f7..926bf9c1a3ede 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -626,6 +626,7 @@ struct intel_connector {
 		u8 fec_capability;
 
 		u8 dsc_hblank_expansion_quirk:1;
+		u8 dsc_decompression_enabled:1;
 	} dp;
 
 	/* Work struct to schedule a uevent on link train failure */
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 24a3c0c276350..4ba50e68cb0f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1403,6 +1403,7 @@ static bool intel_dp_supports_dsc(const struct intel_connector *connector,
 		return false;
 
 	return intel_dsc_source_support(crtc_state) &&
+		connector->dp.dsc_decompression_aux &&
 		drm_dp_sink_supports_dsc(connector->dp.dsc_dpcd);
 }
 
@@ -2986,6 +2987,65 @@ intel_dp_sink_set_dsc_passthrough(const struct intel_connector *connector,
 			    str_enable_disable(enable));
 }
 
+static int intel_dp_dsc_aux_ref_count(struct intel_atomic_state *state,
+				      const struct intel_connector *connector,
+				      bool for_get_ref)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	struct drm_connector *_connector_iter;
+	struct drm_connector_state *old_conn_state;
+	struct drm_connector_state *new_conn_state;
+	int ref_count = 0;
+	int i;
+
+	/*
+	 * On SST the decompression AUX device won't be shared, each connector
+	 * uses for this its own AUX targeting the sink device.
+	 */
+	if (!connector->mst_port)
+		return connector->dp.dsc_decompression_enabled ? 1 : 0;
+
+	for_each_oldnew_connector_in_state(&state->base, _connector_iter,
+					   old_conn_state, new_conn_state, i) {
+		const struct intel_connector *
+			connector_iter = to_intel_connector(_connector_iter);
+
+		if (connector_iter->mst_port != connector->mst_port)
+			continue;
+
+		if (!connector_iter->dp.dsc_decompression_enabled)
+			continue;
+
+		drm_WARN_ON(&i915->drm,
+			    (for_get_ref && !new_conn_state->crtc) ||
+			    (!for_get_ref && !old_conn_state->crtc));
+
+		if (connector_iter->dp.dsc_decompression_aux ==
+		    connector->dp.dsc_decompression_aux)
+			ref_count++;
+	}
+
+	return ref_count;
+}
+
+static bool intel_dp_dsc_aux_get_ref(struct intel_atomic_state *state,
+				     struct intel_connector *connector)
+{
+	bool ret = intel_dp_dsc_aux_ref_count(state, connector, true) == 0;
+
+	connector->dp.dsc_decompression_enabled = true;
+
+	return ret;
+}
+
+static bool intel_dp_dsc_aux_put_ref(struct intel_atomic_state *state,
+				     struct intel_connector *connector)
+{
+	connector->dp.dsc_decompression_enabled = false;
+
+	return intel_dp_dsc_aux_ref_count(state, connector, false) == 0;
+}
+
 /**
  * intel_dp_sink_enable_decompression - Enable DSC decompression in sink/last branch device
  * @state: atomic state
@@ -3009,7 +3069,11 @@ void intel_dp_sink_enable_decompression(struct intel_atomic_state *state,
 		return;
 
 	if (drm_WARN_ON(&i915->drm,
-			!connector->dp.dsc_decompression_aux))
+			!connector->dp.dsc_decompression_aux ||
+			connector->dp.dsc_decompression_enabled))
+		return;
+
+	if (!intel_dp_dsc_aux_get_ref(state, connector))
 		return;
 
 	intel_dp_sink_set_dsc_passthrough(connector, true);
@@ -3036,7 +3100,11 @@ void intel_dp_sink_disable_decompression(struct intel_atomic_state *state,
 		return;
 
 	if (drm_WARN_ON(&i915->drm,
-			!connector->dp.dsc_decompression_aux))
+			!connector->dp.dsc_decompression_aux ||
+			!connector->dp.dsc_decompression_enabled))
+		return;
+
+	if (!intel_dp_dsc_aux_put_ref(state, connector))
 		return;
 
 	intel_dp_sink_set_dsc_decompression(connector, false);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index bc992e77ffc7a..b3d952bbb3cf0 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -777,12 +777,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
 
-	if (intel_dp->active_mst_links == 1) /* last stream ? */
-		/*
-		 * TODO: disable decompression for all streams/in any MST ports, not
-		 * only in the first downstream branch device.
-		 */
-		intel_dp_sink_disable_decompression(state, connector, old_crtc_state);
+	intel_dp_sink_disable_decompression(state, connector, old_crtc_state);
 }
 
 static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
@@ -939,15 +934,11 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
 
 	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true);
 
-	if (first_mst_stream) {
-		/*
-		 * TODO: enable decompression for all streams/in any MST ports, not
-		 * only in the first downstream branch device.
-		 */
-		intel_dp_sink_enable_decompression(state, connector, pipe_config);
+	intel_dp_sink_enable_decompression(state, connector, pipe_config);
+
+	if (first_mst_stream)
 		dig_port->base.pre_enable(state, &dig_port->base,
 						pipe_config, NULL);
-	}
 
 	intel_dp->active_mst_links++;
 
@@ -1394,12 +1385,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 	intel_connector->port = port;
 	drm_dp_mst_get_port_malloc(port);
 
-	/*
-	 * TODO: set the AUX for the actual MST port decompressing the stream.
-	 * At the moment the driver only supports enabling this globally in the
-	 * first downstream MST branch, via intel_dp's (root port) AUX.
-	 */
-	intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
+	intel_connector->dp.dsc_decompression_aux = drm_dp_mst_dsc_aux_for_port(port);
 	intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
 	intel_connector->dp.dsc_hblank_expansion_quirk =
 		detect_dsc_hblank_expansion_quirk(intel_connector);
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
index b8f43efb0ab5a..94eece7f63be3 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c
+++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
@@ -318,6 +318,12 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private
 			const struct intel_crtc_state *crtc_state =
 				to_intel_crtc_state(crtc->base.state);
 
+			if (crtc_state->dsc.compression_enable) {
+				drm_WARN_ON(&i915->drm, !connector->dp.dsc_decompression_aux);
+				connector->dp.dsc_decompression_enabled = true;
+			} else {
+				connector->dp.dsc_decompression_enabled = false;
+			}
 			conn_state->max_bpc = (crtc_state->pipe_bpp ?: 24) / 3;
 		}
 	}
-- 
GitLab


From 1e4bd5c14e4c72fc74a985e05fdbc735d2cf7566 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:21 +0300
Subject: [PATCH 0179/2340] drm/i915: Factor out function to clear pipe update
 flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Factor out a helper to clear the pipe update flags, used by a follow-up
patch to modeset an MST topology.

v2:
- Move the intel_crtc_needs_modeset() check to the callers. (Ville)
v3 (Ville):
- Rename clear_pipe_update_flags_on_modeset_crtc() to
  intel_crtc_flag_modeset().
- Also set crtc_state->uapi.mode_changed in the function.
- Leave out the unrelated change to use
  intel_modeset_pipes_in_mask_early().

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-7-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 49 +++++++++-----------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 3c7ef2cc3b8b1..afa5613fd847c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5542,6 +5542,16 @@ int intel_modeset_pipes_in_mask_early(struct intel_atomic_state *state,
 	return 0;
 }
 
+static void
+intel_crtc_flag_modeset(struct intel_crtc_state *crtc_state)
+{
+	crtc_state->uapi.mode_changed = true;
+
+	crtc_state->update_pipe = false;
+	crtc_state->update_m_n = false;
+	crtc_state->update_lrr = false;
+}
+
 /**
  * intel_modeset_all_pipes_late - force a full modeset on all pipes
  * @state: intel atomic state
@@ -5575,9 +5585,8 @@ int intel_modeset_all_pipes_late(struct intel_atomic_state *state,
 		if (ret)
 			return ret;
 
-		crtc_state->update_pipe = false;
-		crtc_state->update_m_n = false;
-		crtc_state->update_lrr = false;
+		intel_crtc_flag_modeset(crtc_state);
+
 		crtc_state->update_planes |= crtc_state->active_planes;
 		crtc_state->async_flip_planes = 0;
 		crtc_state->do_async_flip = false;
@@ -5690,17 +5699,17 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta
 	else
 		new_crtc_state->uapi.mode_changed = false;
 
-	if (intel_crtc_needs_modeset(new_crtc_state) ||
-	    intel_compare_link_m_n(&old_crtc_state->dp_m_n,
+	if (intel_compare_link_m_n(&old_crtc_state->dp_m_n,
 				   &new_crtc_state->dp_m_n))
 		new_crtc_state->update_m_n = false;
 
-	if (intel_crtc_needs_modeset(new_crtc_state) ||
-	    (old_crtc_state->hw.adjusted_mode.crtc_vtotal == new_crtc_state->hw.adjusted_mode.crtc_vtotal &&
+	if ((old_crtc_state->hw.adjusted_mode.crtc_vtotal == new_crtc_state->hw.adjusted_mode.crtc_vtotal &&
 	     old_crtc_state->hw.adjusted_mode.crtc_vblank_end == new_crtc_state->hw.adjusted_mode.crtc_vblank_end))
 		new_crtc_state->update_lrr = false;
 
-	if (!intel_crtc_needs_modeset(new_crtc_state))
+	if (intel_crtc_needs_modeset(new_crtc_state))
+		intel_crtc_flag_modeset(new_crtc_state);
+	else
 		new_crtc_state->update_pipe = true;
 }
 
@@ -6475,12 +6484,8 @@ int intel_atomic_check(struct drm_device *dev,
 		if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
 			enum transcoder master = new_crtc_state->mst_master_transcoder;
 
-			if (intel_cpu_transcoders_need_modeset(state, BIT(master))) {
-				new_crtc_state->uapi.mode_changed = true;
-				new_crtc_state->update_pipe = false;
-				new_crtc_state->update_m_n = false;
-				new_crtc_state->update_lrr = false;
-			}
+			if (intel_cpu_transcoders_need_modeset(state, BIT(master)))
+				intel_crtc_flag_modeset(new_crtc_state);
 		}
 
 		if (is_trans_port_sync_mode(new_crtc_state)) {
@@ -6489,21 +6494,13 @@ int intel_atomic_check(struct drm_device *dev,
 			if (new_crtc_state->master_transcoder != INVALID_TRANSCODER)
 				trans |= BIT(new_crtc_state->master_transcoder);
 
-			if (intel_cpu_transcoders_need_modeset(state, trans)) {
-				new_crtc_state->uapi.mode_changed = true;
-				new_crtc_state->update_pipe = false;
-				new_crtc_state->update_m_n = false;
-				new_crtc_state->update_lrr = false;
-			}
+			if (intel_cpu_transcoders_need_modeset(state, trans))
+				intel_crtc_flag_modeset(new_crtc_state);
 		}
 
 		if (new_crtc_state->bigjoiner_pipes) {
-			if (intel_pipes_need_modeset(state, new_crtc_state->bigjoiner_pipes)) {
-				new_crtc_state->uapi.mode_changed = true;
-				new_crtc_state->update_pipe = false;
-				new_crtc_state->update_m_n = false;
-				new_crtc_state->update_lrr = false;
-			}
+			if (intel_pipes_need_modeset(state, new_crtc_state->bigjoiner_pipes))
+				intel_crtc_flag_modeset(new_crtc_state);
 		}
 	}
 
-- 
GitLab


From e37137380931ae971e0380ba4cea6b16843da953 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:22 +0300
Subject: [PATCH 0180/2340] drm/i915/dp_mst: Force modeset CRTC if DSC toggling
 requires it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enabling / disabling DSC decompression in the branch device downstream
of the source may reset the whole branch device. To avoid this while the
streams are still active, force a modeset on all CRTC/ports connected to
this branch device.

v2:
- Check the CRTC state for each connector in the topology, instead of
  the CRTC being checked for a modeset requirement. (Ville)
- Add DocBook for the new function.
v3:
- Rebased on a change not to use
  intel_modeset_pipes_in_mask_early().

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-8-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c |  3 +
 drivers/gpu/drm/i915/display/intel_dp_mst.c  | 88 ++++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_dp_mst.h  |  2 +
 3 files changed, 93 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index afa5613fd847c..c4cb0e2273a00 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6481,6 +6481,9 @@ int intel_atomic_check(struct drm_device *dev,
 		if (!new_crtc_state->hw.enable || intel_crtc_needs_modeset(new_crtc_state))
 			continue;
 
+		if (intel_dp_mst_crtc_needs_modeset(state, crtc))
+			intel_crtc_flag_modeset(new_crtc_state);
+
 		if (intel_dp_mst_is_slave_trans(new_crtc_state)) {
 			enum transcoder master = new_crtc_state->mst_master_transcoder;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index b3d952bbb3cf0..079986b09f8bf 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1628,3 +1628,91 @@ int intel_dp_mst_add_topology_state_for_crtc(struct intel_atomic_state *state,
 
 	return 0;
 }
+
+static struct intel_connector *
+get_connector_in_state_for_crtc(struct intel_atomic_state *state,
+				const struct intel_crtc *crtc)
+{
+	struct drm_connector_state *old_conn_state;
+	struct drm_connector_state *new_conn_state;
+	struct drm_connector *_connector;
+	int i;
+
+	for_each_oldnew_connector_in_state(&state->base, _connector,
+					   old_conn_state, new_conn_state, i) {
+		struct intel_connector *connector =
+			to_intel_connector(_connector);
+
+		if (old_conn_state->crtc == &crtc->base ||
+		    new_conn_state->crtc == &crtc->base)
+			return connector;
+	}
+
+	return NULL;
+}
+
+/**
+ * intel_dp_mst_crtc_needs_modeset - check if changes in topology need to modeset the given CRTC
+ * @state: atomic state
+ * @crtc: CRTC for which to check the modeset requirement
+ *
+ * Check if any change in a MST topology requires a forced modeset on @crtc in
+ * this topology. One such change is enabling/disabling the DSC decompression
+ * state in the first branch device's UFP DPCD as required by one CRTC, while
+ * the other @crtc in the same topology is still active, requiring a full modeset
+ * on @crtc.
+ */
+bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state,
+				     struct intel_crtc *crtc)
+{
+	const struct intel_connector *crtc_connector;
+	const struct drm_connector_state *conn_state;
+	const struct drm_connector *_connector;
+	int i;
+
+	if (!intel_crtc_has_type(intel_atomic_get_new_crtc_state(state, crtc),
+				 INTEL_OUTPUT_DP_MST))
+		return false;
+
+	crtc_connector = get_connector_in_state_for_crtc(state, crtc);
+
+	if (!crtc_connector)
+		/* None of the connectors in the topology needs modeset */
+		return false;
+
+	for_each_new_connector_in_state(&state->base, _connector, conn_state, i) {
+		const struct intel_connector *connector =
+			to_intel_connector(_connector);
+		const struct intel_crtc_state *new_crtc_state;
+		const struct intel_crtc_state *old_crtc_state;
+		struct intel_crtc *crtc_iter;
+
+		if (connector->mst_port != crtc_connector->mst_port ||
+		    !conn_state->crtc)
+			continue;
+
+		crtc_iter = to_intel_crtc(conn_state->crtc);
+
+		new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc_iter);
+		old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc_iter);
+
+		if (!intel_crtc_needs_modeset(new_crtc_state))
+			continue;
+
+		if (old_crtc_state->dsc.compression_enable ==
+		    new_crtc_state->dsc.compression_enable)
+			continue;
+		/*
+		 * Toggling the decompression flag because of this stream in
+		 * the first downstream branch device's UFP DPCD may reset the
+		 * whole branch device. To avoid the reset while other streams
+		 * are also active modeset the whole MST topology in this
+		 * case.
+		 */
+		if (connector->dp.dsc_decompression_aux ==
+		    &connector->mst_port->aux)
+			return true;
+	}
+
+	return false;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h
index f1815bb722672..fc5e85776a858 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h
@@ -22,5 +22,7 @@ bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state);
 bool intel_dp_mst_source_support(struct intel_dp *intel_dp);
 int intel_dp_mst_add_topology_state_for_crtc(struct intel_atomic_state *state,
 					     struct intel_crtc *crtc);
+bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state,
+				     struct intel_crtc *crtc);
 
 #endif /* __INTEL_DP_MST_H__ */
-- 
GitLab


From 36f579ffc6921408fd2e466a6930463bac56b926 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:23 +0300
Subject: [PATCH 0181/2340] drm/i915/dp_mst: Improve BW sharing between MST
 streams

At the moment modesetting a stream CRTC will fail if the stream's BW
along with the current BW of all the other streams on the same MST link
is above the total BW of the MST link. Make the BW sharing more dynamic
by trying to reduce the link bpp of one or more streams on the MST link
in this case.

When selecting a stream to reduce the BW for, take into account which
link segment in the MST topology ran out of BW and which streams go
through this link segment. For instance with A,B,C streams in the same
MST topology A and B may share the BW of a link segment downstream of a
branch device, stream C not downstream of the branch device, hence not
affecting this BW. If this link segment's BW runs out one or both of
stream A/B's BW will be reduced until their total BW is within limits.

While reducing the link bpp for a given stream DSC may need to be
enabled for it, which requires FEC on the whole MST link. Check for this
condition and recompute the state for all streams taking the FEC
overhead into account (on 8b/10b links).

v2:
- Rebase on s/min_bpp_pipes/min_bpp_reached_pipes/ change.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-29-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c |   5 +-
 drivers/gpu/drm/i915/display/intel_dp.c      |  11 +-
 drivers/gpu/drm/i915/display/intel_dp.h      |   3 +
 drivers/gpu/drm/i915/display/intel_dp_mst.c  | 129 +++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_dp_mst.h  |   3 +
 drivers/gpu/drm/i915/display/intel_link_bw.c |  16 ++-
 drivers/gpu/drm/i915/display/intel_link_bw.h |   1 +
 7 files changed, 161 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c4cb0e2273a00..bad43626e2047 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -4698,6 +4698,7 @@ intel_modeset_pipe_config(struct intel_atomic_state *state,
 	if (ret)
 		return ret;
 
+	crtc_state->fec_enable = limits->force_fec_pipes & BIT(crtc->pipe);
 	crtc_state->max_link_bpp_x16 = limits->max_bpp_x16[crtc->pipe];
 
 	if (crtc_state->pipe_bpp > to_bpp_int(crtc_state->max_link_bpp_x16)) {
@@ -6524,10 +6525,6 @@ int intel_atomic_check(struct drm_device *dev,
 		goto fail;
 	}
 
-	ret = drm_dp_mst_atomic_check(&state->base);
-	if (ret)
-		goto fail;
-
 	ret = intel_atomic_check_planes(state);
 	if (ret)
 		goto fail;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 4ba50e68cb0f2..01655adf4c157 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2138,8 +2138,9 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 		&pipe_config->hw.adjusted_mode;
 	int ret;
 
-	pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
-		intel_dp_supports_fec(intel_dp, connector, pipe_config);
+	pipe_config->fec_enable = pipe_config->fec_enable ||
+		(!intel_dp_is_edp(intel_dp) &&
+		 intel_dp_supports_fec(intel_dp, connector, pipe_config));
 
 	if (!intel_dp_supports_dsc(connector, pipe_config))
 		return -EINVAL;
@@ -2327,6 +2328,8 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
+	const struct intel_connector *connector =
+		to_intel_connector(conn_state->connector);
 	const struct drm_display_mode *adjusted_mode =
 		&pipe_config->hw.adjusted_mode;
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -2335,6 +2338,10 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	bool dsc_needed;
 	int ret = 0;
 
+	if (pipe_config->fec_enable &&
+	    !intel_dp_supports_fec(intel_dp, connector, pipe_config))
+		return -EINVAL;
+
 	if (intel_dp_need_bigjoiner(intel_dp, adjusted_mode->crtc_hdisplay,
 				    adjusted_mode->crtc_clock))
 		pipe_config->bigjoiner_pipes = GENMASK(crtc->pipe + 1, crtc->pipe);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 7cc23d846dfbf..91051973be2d5 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -140,6 +140,9 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 	return ~((1 << lane_count) - 1) & 0xf;
 }
 
+bool intel_dp_supports_fec(struct intel_dp *intel_dp,
+			   const struct intel_connector *connector,
+			   const struct intel_crtc_state *pipe_config);
 u32 intel_dp_mode_to_fec_clock(u32 mode_clock);
 int intel_dp_bw_fec_overhead(bool fec_enabled);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 079986b09f8bf..b300e4b065dfd 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -43,6 +43,7 @@
 #include "intel_dpio_phy.h"
 #include "intel_hdcp.h"
 #include "intel_hotplug.h"
+#include "intel_link_bw.h"
 #include "intel_vdsc.h"
 #include "skl_scaler.h"
 
@@ -517,6 +518,10 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	bool dsc_needed;
 	int ret = 0;
 
+	if (pipe_config->fec_enable &&
+	    !intel_dp_supports_fec(intel_dp, connector, pipe_config))
+		return -EINVAL;
+
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
 
@@ -636,6 +641,130 @@ intel_dp_mst_transcoder_mask(struct intel_atomic_state *state,
 	return transcoders;
 }
 
+static u8 get_pipes_downstream_of_mst_port(struct intel_atomic_state *state,
+					   struct drm_dp_mst_topology_mgr *mst_mgr,
+					   struct drm_dp_mst_port *parent_port)
+{
+	const struct intel_digital_connector_state *conn_state;
+	struct intel_connector *connector;
+	u8 mask = 0;
+	int i;
+
+	for_each_new_intel_connector_in_state(state, connector, conn_state, i) {
+		if (!conn_state->base.crtc)
+			continue;
+
+		if (&connector->mst_port->mst_mgr != mst_mgr)
+			continue;
+
+		if (connector->port != parent_port &&
+		    !drm_dp_mst_port_downstream_of_parent(mst_mgr,
+							  connector->port,
+							  parent_port))
+			continue;
+
+		mask |= BIT(to_intel_crtc(conn_state->base.crtc)->pipe);
+	}
+
+	return mask;
+}
+
+static int intel_dp_mst_check_fec_change(struct intel_atomic_state *state,
+					 struct drm_dp_mst_topology_mgr *mst_mgr,
+					 struct intel_link_bw_limits *limits)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	struct intel_crtc *crtc;
+	u8 mst_pipe_mask;
+	u8 fec_pipe_mask = 0;
+	int ret;
+
+	mst_pipe_mask = get_pipes_downstream_of_mst_port(state, mst_mgr, NULL);
+
+	for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, mst_pipe_mask) {
+		struct intel_crtc_state *crtc_state =
+			intel_atomic_get_new_crtc_state(state, crtc);
+
+		/* Atomic connector check should've added all the MST CRTCs. */
+		if (drm_WARN_ON(&i915->drm, !crtc_state))
+			return -EINVAL;
+
+		if (crtc_state->fec_enable)
+			fec_pipe_mask |= BIT(crtc->pipe);
+	}
+
+	if (!fec_pipe_mask || mst_pipe_mask == fec_pipe_mask)
+		return 0;
+
+	limits->force_fec_pipes |= mst_pipe_mask;
+
+	ret = intel_modeset_pipes_in_mask_early(state, "MST FEC",
+						mst_pipe_mask);
+
+	return ret ? : -EAGAIN;
+}
+
+static int intel_dp_mst_check_bw(struct intel_atomic_state *state,
+				 struct drm_dp_mst_topology_mgr *mst_mgr,
+				 struct drm_dp_mst_topology_state *mst_state,
+				 struct intel_link_bw_limits *limits)
+{
+	struct drm_dp_mst_port *mst_port;
+	u8 mst_port_pipes;
+	int ret;
+
+	ret = drm_dp_mst_atomic_check_mgr(&state->base, mst_mgr, mst_state, &mst_port);
+	if (ret != -ENOSPC)
+		return ret;
+
+	mst_port_pipes = get_pipes_downstream_of_mst_port(state, mst_mgr, mst_port);
+
+	ret = intel_link_bw_reduce_bpp(state, limits,
+				       mst_port_pipes, "MST link BW");
+
+	return ret ? : -EAGAIN;
+}
+
+/**
+ * intel_dp_mst_atomic_check_link - check all modeset MST link configuration
+ * @state: intel atomic state
+ * @limits: link BW limits
+ *
+ * Check the link configuration for all modeset MST outputs. If the
+ * configuration is invalid @limits will be updated if possible to
+ * reduce the total BW, after which the configuration for all CRTCs in
+ * @state must be recomputed with the updated @limits.
+ *
+ * Returns:
+ *   - 0 if the confugration is valid
+ *   - %-EAGAIN, if the configuration is invalid and @limits got updated
+ *     with fallback values with which the configuration of all CRTCs in
+ *     @state must be recomputed
+ *   - Other negative error, if the configuration is invalid without a
+ *     fallback possibility, or the check failed for another reason
+ */
+int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state,
+				   struct intel_link_bw_limits *limits)
+{
+	struct drm_dp_mst_topology_mgr *mgr;
+	struct drm_dp_mst_topology_state *mst_state;
+	int ret;
+	int i;
+
+	for_each_new_mst_mgr_in_state(&state->base, mgr, mst_state, i) {
+		ret = intel_dp_mst_check_fec_change(state, mgr, limits);
+		if (ret)
+			return ret;
+
+		ret = intel_dp_mst_check_bw(state, mgr, mst_state,
+					    limits);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder,
 					    struct intel_crtc_state *crtc_state,
 					    struct drm_connector_state *conn_state)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h
index fc5e85776a858..8ca1d599091c6 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h
@@ -13,6 +13,7 @@ struct intel_crtc;
 struct intel_crtc_state;
 struct intel_digital_port;
 struct intel_dp;
+struct intel_link_bw_limits;
 
 int intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_id);
 void intel_dp_mst_encoder_cleanup(struct intel_digital_port *dig_port);
@@ -22,6 +23,8 @@ bool intel_dp_mst_is_slave_trans(const struct intel_crtc_state *crtc_state);
 bool intel_dp_mst_source_support(struct intel_dp *intel_dp);
 int intel_dp_mst_add_topology_state_for_crtc(struct intel_atomic_state *state,
 					     struct intel_crtc *crtc);
+int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state,
+				   struct intel_link_bw_limits *limits);
 bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state,
 				     struct intel_crtc *crtc);
 
diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c
index c5eb5f2425361..390db5c0c24a5 100644
--- a/drivers/gpu/drm/i915/display/intel_link_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_link_bw.c
@@ -7,6 +7,7 @@
 
 #include "intel_atomic.h"
 #include "intel_display_types.h"
+#include "intel_dp_mst.h"
 #include "intel_fdi.h"
 #include "intel_link_bw.h"
 
@@ -21,6 +22,7 @@ void intel_link_bw_init_limits(struct drm_i915_private *i915, struct intel_link_
 {
 	enum pipe pipe;
 
+	limits->force_fec_pipes = 0;
 	limits->bpp_limit_reached_pipes = 0;
 	for_each_pipe(i915, pipe)
 		limits->max_bpp_x16[pipe] = INT_MAX;
@@ -143,6 +145,10 @@ static int check_all_link_config(struct intel_atomic_state *state,
 	/* TODO: Check additional shared display link configurations like MST */
 	int ret;
 
+	ret = intel_dp_mst_atomic_check_link(state, limits);
+	if (ret)
+		return ret;
+
 	ret = intel_fdi_atomic_check_link(state, limits);
 	if (ret)
 		return ret;
@@ -158,6 +164,12 @@ assert_link_limit_change_valid(struct drm_i915_private *i915,
 	bool bpps_changed = false;
 	enum pipe pipe;
 
+	/* FEC can't be forced off after it was forced on. */
+	if (drm_WARN_ON(&i915->drm,
+			(old_limits->force_fec_pipes & new_limits->force_fec_pipes) !=
+			old_limits->force_fec_pipes))
+		return false;
+
 	for_each_pipe(i915, pipe) {
 		/* The bpp limit can only decrease. */
 		if (drm_WARN_ON(&i915->drm,
@@ -172,7 +184,9 @@ assert_link_limit_change_valid(struct drm_i915_private *i915,
 
 	/* At least one limit must change. */
 	if (drm_WARN_ON(&i915->drm,
-			!bpps_changed))
+			!bpps_changed &&
+			new_limits->force_fec_pipes ==
+			old_limits->force_fec_pipes))
 		return false;
 
 	return true;
diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h
index e07df22a779a2..2cf57307cc249 100644
--- a/drivers/gpu/drm/i915/display/intel_link_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_link_bw.h
@@ -16,6 +16,7 @@ struct intel_atomic_state;
 struct intel_crtc_state;
 
 struct intel_link_bw_limits {
+	u8 force_fec_pipes;
 	u8 bpp_limit_reached_pipes;
 	/* in 1/16 bpp units */
 	int max_bpp_x16[I915_MAX_PIPES];
-- 
GitLab


From 3a5f80e4ce973c6702ec31e5823502860208e030 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 24 Oct 2023 04:09:24 +0300
Subject: [PATCH 0182/2340] drm/i915/dp_mst: Check BW limitations only after
 all streams are computed

After the previous patch the BW limits on the whole MST topology will be
checked after computing the state for all the streams in the topology.
Accordingly remove the check during the stream's encoder compute config
step, to prevent failing an atomic commit due to a BW limit, if this can
be resolved only by reducing the BW of other streams on the same MST
link.

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-30-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index b300e4b065dfd..35989e287ee01 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -209,13 +209,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		if (slots >= 0) {
 			drm_WARN_ON(&i915->drm, slots != remote_m_n.tu);
 
-			ret = drm_dp_mst_atomic_check(state);
-			/*
-			 * If we got slots >= 0 and we can fit those based on check
-			 * then we can exit the loop. Otherwise keep trying.
-			 */
-			if (!ret)
-				break;
+			break;
 		}
 	}
 
-- 
GitLab


From 99831ab9ce46b1163ac66e92a04614da2da41b1b Mon Sep 17 00:00:00 2001
From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Date: Tue, 24 Oct 2023 04:09:25 +0300
Subject: [PATCH 0183/2340] drm/i915: Query compressed bpp properly using
 correct DPCD and DP Spec info

Currently we seem to be using wrong DPCD register for reading
compressed bpps, reading min/max input bpc instead of compressed bpp.
Fix that, so that we now apply min/max compressed bpp limitations we
get from DP Spec Table 2-157 DP v2.0 and/or correspondent DPCD
register DP_DSC_MAX_BITS_PER_PIXEL_LOW/HIGH.

This might also allow us to get rid of an ugly compressed bpp
recalculation, which we had to add to make some MST hubs usable.

v2: - Fix operator precedence
v3: - Added debug info about compressed bpps
v4: - Don't try to intersect Sink input bpp and compressed bpps.
v5: - Decrease step while looking for suitable compressed bpp to
      accommodate.
v6: - Use helper for getting min and max compressed_bpp (Ankit)
v7: - Fix checkpatch warning (Ankit)

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-31-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c     | 20 ++++----
 drivers/gpu/drm/i915/display/intel_dp.h     |  4 ++
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 52 ++++++++++-----------
 3 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 01655adf4c157..670cf535e7c65 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1811,7 +1811,7 @@ u16 intel_dp_dsc_max_sink_compressed_bppx16(const struct intel_connector *connec
 	return 0;
 }
 
-static int dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config)
+int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config)
 {
 	/* From Mandatory bit rate range Support Table 2-157 (DP v2.0) */
 	switch (pipe_config->output_format) {
@@ -1828,9 +1828,9 @@ static int dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config)
 	return 0;
 }
 
-static int dsc_sink_max_compressed_bpp(const struct intel_connector *connector,
-				       struct intel_crtc_state *pipe_config,
-				       int bpc)
+int intel_dp_dsc_sink_max_compressed_bpp(const struct intel_connector *connector,
+					 struct intel_crtc_state *pipe_config,
+					 int bpc)
 {
 	return intel_dp_dsc_max_sink_compressed_bppx16(connector,
 						       pipe_config, bpc) >> 4;
@@ -1944,12 +1944,14 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp,
 	int dsc_joiner_max_bpp;
 
 	dsc_src_min_bpp = dsc_src_min_compressed_bpp();
-	dsc_sink_min_bpp = dsc_sink_min_compressed_bpp(pipe_config);
+	dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config);
 	dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp);
 	dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16));
 
 	dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp);
-	dsc_sink_max_bpp = dsc_sink_max_compressed_bpp(connector, pipe_config, pipe_bpp / 3);
+	dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector,
+								pipe_config,
+								pipe_bpp / 3);
 	dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp;
 
 	dsc_joiner_max_bpp = get_max_compressed_bpp_with_joiner(i915, adjusted_mode->clock,
@@ -2104,12 +2106,14 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp,
 	pipe_config->lane_count = limits->max_lane_count;
 
 	dsc_src_min_bpp = dsc_src_min_compressed_bpp();
-	dsc_sink_min_bpp = dsc_sink_min_compressed_bpp(pipe_config);
+	dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config);
 	dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp);
 	dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16));
 
 	dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp);
-	dsc_sink_max_bpp = dsc_sink_max_compressed_bpp(connector, pipe_config, pipe_bpp / 3);
+	dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector,
+								pipe_config,
+								pipe_bpp / 3);
 	dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp;
 	dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16));
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 91051973be2d5..e80da67554196 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -129,6 +129,10 @@ u16 intel_dp_dsc_get_max_compressed_bpp(struct drm_i915_private *i915,
 					enum intel_output_format output_format,
 					u32 pipe_bpp,
 					u32 timeslots);
+int intel_dp_dsc_sink_min_compressed_bpp(struct intel_crtc_state *pipe_config);
+int intel_dp_dsc_sink_max_compressed_bpp(const struct intel_connector *connector,
+					 struct intel_crtc_state *pipe_config,
+					 int bpc);
 u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector,
 				int mode_clock, int mode_hdisplay,
 				bool bigjoiner);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 35989e287ee01..107f7418ddc51 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -164,6 +164,9 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 						      crtc_state->port_clock,
 						      crtc_state->lane_count);
 
+	drm_dbg_kms(&i915->drm, "Looking for slots in range min bpp %d max bpp %d\n",
+		    min_bpp, max_bpp);
+
 	for (bpp = max_bpp; bpp >= min_bpp; bpp -= step) {
 		struct intel_link_m_n remote_m_n;
 		int link_bpp;
@@ -267,8 +270,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
 	u8 dsc_bpc[3] = {};
 	int min_bpp, max_bpp, sink_min_bpp, sink_max_bpp;
 	u8 dsc_max_bpc;
-	bool need_timeslot_recalc = false;
-	u32 last_compressed_bpp;
+	int min_compressed_bpp, max_compressed_bpp;
 
 	/* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */
 	if (DISPLAY_VER(i915) >= 12)
@@ -304,37 +306,31 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
 	if (max_bpp > sink_max_bpp)
 		max_bpp = sink_max_bpp;
 
-	min_bpp = max(min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16));
-	max_bpp = min(max_bpp, to_bpp_int(limits->link.max_bpp_x16));
-
-	slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, max_bpp,
-						     min_bpp, limits,
-						     conn_state, 2 * 3, true);
+	max_compressed_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector,
+								  crtc_state,
+								  max_bpp / 3);
+	max_compressed_bpp = min(max_compressed_bpp,
+				 to_bpp_int(limits->link.max_bpp_x16));
 
-	if (slots < 0)
-		return slots;
+	min_compressed_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state);
+	min_compressed_bpp = max(min_compressed_bpp,
+				 to_bpp_int_roundup(limits->link.min_bpp_x16));
 
-	last_compressed_bpp = crtc_state->dsc.compressed_bpp;
+	drm_dbg_kms(&i915->drm, "DSC Sink supported compressed min bpp %d compressed max bpp %d\n",
+		    min_compressed_bpp, max_compressed_bpp);
 
-	crtc_state->dsc.compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915,
-									last_compressed_bpp,
-									crtc_state->pipe_bpp);
+	/* Align compressed bpps according to our own constraints */
+	max_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, max_compressed_bpp,
+							    crtc_state->pipe_bpp);
+	min_compressed_bpp = intel_dp_dsc_nearest_valid_bpp(i915, min_compressed_bpp,
+							    crtc_state->pipe_bpp);
 
-	if (crtc_state->dsc.compressed_bpp != last_compressed_bpp)
-		need_timeslot_recalc = true;
+	slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, max_compressed_bpp,
+						     min_compressed_bpp, limits,
+						     conn_state, 1, true);
 
-	/*
-	 * Apparently some MST hubs dislike if vcpi slots are not matching precisely
-	 * the actual compressed bpp we use.
-	 */
-	if (need_timeslot_recalc) {
-		slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state,
-							     crtc_state->dsc.compressed_bpp,
-							     crtc_state->dsc.compressed_bpp,
-							     limits, conn_state, 2 * 3, true);
-		if (slots < 0)
-			return slots;
-	}
+	if (slots < 0)
+		return slots;
 
 	return 0;
 }
-- 
GitLab


From b035224134626f506c1af73ca32bbc64dd2925ad Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Tue, 31 Oct 2023 08:31:53 +0100
Subject: [PATCH 0184/2340] accel/ivpu: Allocate vpu_addr in gem->open()
 callback

Use gem->open() callback to simplify the code and prepare for gem_shmem
conversion. It is called during handle creation for a gem object,
during prime import and in BO_CREATE ioctl. Hence can be used for vpu_addr
allocation. On the way remove unused bo->user_ptr field.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073156.1301669-2-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_gem.c         | 66 ++++++++++++---------------
 drivers/accel/ivpu/ivpu_gem.h         |  1 -
 drivers/accel/ivpu/ivpu_mmu_context.c |  2 +
 3 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index c91852f2edc82..d1077cf90b659 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -281,15 +281,6 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 	int ret;
 
-	if (!range) {
-		if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
-			range = &vdev->hw->ranges.shave;
-		else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
-			range = &vdev->hw->ranges.dma;
-		else
-			range = &vdev->hw->ranges.user;
-	}
-
 	mutex_lock(&ctx->lock);
 	ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
 	if (!ret) {
@@ -299,6 +290,9 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	}
 	mutex_unlock(&ctx->lock);
 
+	if (ret)
+		ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
+
 	return ret;
 }
 
@@ -337,9 +331,7 @@ void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
 }
 
 static struct ivpu_bo *
-ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
-	      u64 size, u32 flags, const struct ivpu_bo_ops *ops,
-	      const struct ivpu_addr_range *range, u64 user_ptr)
+ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, const struct ivpu_bo_ops *ops)
 {
 	struct ivpu_bo *bo;
 	int ret = 0;
@@ -364,7 +356,6 @@ ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
 	bo->base.funcs = &ivpu_gem_funcs;
 	bo->flags = flags;
 	bo->ops = ops;
-	bo->user_ptr = user_ptr;
 
 	if (ops->type == IVPU_BO_TYPE_SHMEM)
 		ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
@@ -384,14 +375,6 @@ ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
 		}
 	}
 
-	if (mmu_context) {
-		ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
-		if (ret) {
-			ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
-			goto err_release;
-		}
-	}
-
 	return bo;
 
 err_release:
@@ -401,6 +384,23 @@ err_free:
 	return ERR_PTR(ret);
 }
 
+static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_addr_range *range;
+
+	if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
+		range = &vdev->hw->ranges.shave;
+	else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
+		range = &vdev->hw->ranges.dma;
+	else
+		range = &vdev->hw->ranges.user;
+
+	return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
+}
+
 static void ivpu_bo_free(struct drm_gem_object *obj)
 {
 	struct ivpu_bo *bo = to_ivpu_bo(obj);
@@ -516,6 +516,7 @@ static const struct vm_operations_struct ivpu_vm_ops = {
 
 static const struct drm_gem_object_funcs ivpu_gem_funcs = {
 	.free = ivpu_bo_free,
+	.open = ivpu_bo_open,
 	.mmap = ivpu_bo_mmap,
 	.vm_ops = &ivpu_vm_ops,
 	.get_sg_table = ivpu_bo_get_sg_table,
@@ -537,7 +538,7 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (size == 0)
 		return -EINVAL;
 
-	bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+	bo = ivpu_bo_alloc(vdev, size, args->flags, &shmem_ops);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
 			 bo, file_priv->ctx.id, args->size, args->flags);
@@ -578,13 +579,17 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		range = &vdev->hw->ranges.global;
 	}
 
-	bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+	bo = ivpu_bo_alloc(vdev, size, flags, &internal_ops);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
 			 bo, vpu_addr, size, flags);
 		return NULL;
 	}
 
+	ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
+	if (ret)
+		goto err_put;
+
 	ret = ivpu_bo_pin(bo);
 	if (ret)
 		goto err_put;
@@ -631,7 +636,7 @@ struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_
 
 	get_dma_buf(buf);
 
-	bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
+	bo = ivpu_bo_alloc(vdev, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
 		goto err_detach;
@@ -651,8 +656,6 @@ err_detach:
 
 int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
-	struct ivpu_file_priv *file_priv = file->driver_priv;
-	struct ivpu_device *vdev = to_ivpu_device(dev);
 	struct drm_ivpu_bo_info *args = data;
 	struct drm_gem_object *obj;
 	struct ivpu_bo *bo;
@@ -665,21 +668,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 	bo = to_ivpu_bo(obj);
 
 	mutex_lock(&bo->lock);
-
-	if (!bo->ctx) {
-		ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
-		if (ret) {
-			ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
-			goto unlock;
-		}
-	}
-
 	args->flags = bo->flags;
 	args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
 	args->vpu_addr = bo->vpu_addr;
 	args->size = obj->size;
-unlock:
 	mutex_unlock(&bo->lock);
+
 	drm_gem_object_put(obj);
 	return ret;
 }
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index a0b4d4a32b3bf..9bbbcd7792658 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -29,7 +29,6 @@ struct ivpu_bo {
 	u64 vpu_addr;
 	u32 handle;
 	u32 flags;
-	uintptr_t user_ptr;
 	u32 job_status;
 };
 
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 7d8005bc78d2f..f6de6a4ea6088 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -410,6 +410,8 @@ ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
 {
 	lockdep_assert_held(&ctx->lock);
 
+	WARN_ON(!range);
+
 	if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
 		if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
 						 range->start, range->end, DRM_MM_INSERT_BEST))
-- 
GitLab


From 48aea7f2a2efae6a1bd201061c71a81b3f3b7e55 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Tue, 31 Oct 2023 08:31:54 +0100
Subject: [PATCH 0185/2340] accel/ivpu: Fix locking in
 ivpu_bo_remove_all_bos_from_context()

ivpu_bo_remove_all_bos_from_context() could race with ivpu_bo_free()
when prime buffer was closed after vpu device was closed.

Move the bo_list from context to vdev and use a dedicated lock to
sync it. This list is not modified when BO is added/removed from
a context.

Also rename ivpu_bo_free_vpu_addr() to ivpu_bo_unbind() because this
function does more then just free vpu_addr.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073156.1301669-3-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c         |   7 +-
 drivers/accel/ivpu/ivpu_drv.h         |   3 +
 drivers/accel/ivpu/ivpu_gem.c         | 131 +++++++++++++-------------
 drivers/accel/ivpu/ivpu_gem.h         |   6 +-
 drivers/accel/ivpu/ivpu_mmu.c         |   5 +-
 drivers/accel/ivpu/ivpu_mmu_context.c |  36 ++++---
 drivers/accel/ivpu/ivpu_mmu_context.h |  11 +--
 7 files changed, 109 insertions(+), 90 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 60277ff6af69c..cc6a4dca2aaa4 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -91,8 +91,8 @@ static void file_priv_release(struct kref *ref)
 	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
 
 	ivpu_cmdq_release_all(file_priv);
-	ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
 	ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+	ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx);
 	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
 	drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
 	mutex_destroy(&file_priv->lock);
@@ -528,6 +528,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
 	xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
 	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
+	INIT_LIST_HEAD(&vdev->bo_list);
+
+	ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
+	if (ret)
+		goto err_xa_destroy;
 
 	ret = ivpu_pci_init(vdev);
 	if (ret)
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 79b193ffbc264..e0517ec4133d4 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -115,6 +115,9 @@ struct ivpu_device {
 	struct xarray context_xa;
 	struct xa_limit context_xa_limit;
 
+	struct mutex bo_list_lock; /* Protects bo_list */
+	struct list_head bo_list;
+
 	struct xarray submitted_jobs_xa;
 	struct task_struct *job_done_thread;
 
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index d1077cf90b659..915c53d7bb97b 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -26,6 +26,17 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs;
 
 static struct lock_class_key prime_bo_lock_class_key;
 
+static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
+{
+	if (bo->ctx)
+		ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n",
+			 action, bo->base.size, (bool)bo->pages, (bool)bo->sgt, bo->handle,
+			 bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
+	else
+		ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n",
+			 action, bo->base.size, (bool)bo->pages, (bool)bo->sgt, bo->handle);
+}
+
 static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
 {
 	/* Pages are managed by the underlying dma-buf */
@@ -245,9 +256,8 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
 
 	mutex_lock(&bo->lock);
 
-	if (!bo->vpu_addr) {
-		ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
-			 bo->ctx->id, bo->handle);
+	if (!bo->ctx) {
+		ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle);
 		ret = -EINVAL;
 		goto unlock;
 	}
@@ -281,53 +291,68 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 	int ret;
 
-	mutex_lock(&ctx->lock);
-	ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
+	mutex_lock(&bo->lock);
+
+	ret = ivpu_mmu_context_insert_node(ctx, range, bo->base.size, &bo->mm_node);
 	if (!ret) {
 		bo->ctx = ctx;
 		bo->vpu_addr = bo->mm_node.start;
-		list_add_tail(&bo->ctx_node, &ctx->bo_list);
+	} else {
+		ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
 	}
-	mutex_unlock(&ctx->lock);
+	ivpu_dbg_bo(vdev, bo, "alloc");
 
-	if (ret)
-		ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
+	mutex_unlock(&bo->lock);
 
 	return ret;
 }
 
-static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
 {
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	struct ivpu_mmu_context *ctx = bo->ctx;
 
-	ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
-		 ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+	lockdep_assert_held(&bo->lock);
 
-	mutex_lock(&bo->lock);
+	ivpu_dbg_bo(vdev, bo, "unbind");
 
+	/* TODO: dma_unmap */
 	if (bo->mmu_mapped) {
+		drm_WARN_ON(&vdev->drm, !bo->ctx);
+		drm_WARN_ON(&vdev->drm, !bo->vpu_addr);
 		drm_WARN_ON(&vdev->drm, !bo->sgt);
-		ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+		ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt);
 		bo->mmu_mapped = false;
 	}
 
-	mutex_lock(&ctx->lock);
-	list_del(&bo->ctx_node);
-	bo->vpu_addr = 0;
-	bo->ctx = NULL;
-	ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
-	mutex_unlock(&ctx->lock);
+	if (bo->ctx) {
+		ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node);
+		bo->vpu_addr = 0;
+		bo->ctx = NULL;
+	}
+}
 
+static void ivpu_bo_unbind(struct ivpu_bo *bo)
+{
+	mutex_lock(&bo->lock);
+	ivpu_bo_unbind_locked(bo);
 	mutex_unlock(&bo->lock);
 }
 
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
 {
-	struct ivpu_bo *bo, *tmp;
+	struct ivpu_bo *bo;
+
+	if (drm_WARN_ON(&vdev->drm, !ctx))
+		return;
 
-	list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
-		ivpu_bo_free_vpu_addr(bo);
+	mutex_lock(&vdev->bo_list_lock);
+	list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
+		mutex_lock(&bo->lock);
+		if (bo->ctx == ctx)
+			ivpu_bo_unbind_locked(bo);
+		mutex_unlock(&bo->lock);
+	}
+	mutex_unlock(&vdev->bo_list_lock);
 }
 
 static struct ivpu_bo *
@@ -375,6 +400,10 @@ ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, const struct ivpu_b
 		}
 	}
 
+	mutex_lock(&vdev->bo_list_lock);
+	list_add_tail(&bo->bo_list_node, &vdev->bo_list);
+	mutex_unlock(&vdev->bo_list_lock);
+
 	return bo;
 
 err_release:
@@ -406,19 +435,16 @@ static void ivpu_bo_free(struct drm_gem_object *obj)
 	struct ivpu_bo *bo = to_ivpu_bo(obj);
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 
-	if (bo->ctx)
-		ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
-			 bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
-	else
-		ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
-			 (bool)bo->sgt, bo->mmu_mapped);
+	mutex_lock(&vdev->bo_list_lock);
+	list_del(&bo->bo_list_node);
+	mutex_unlock(&vdev->bo_list_lock);
 
 	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
 
-	vunmap(bo->kvaddr);
+	ivpu_dbg_bo(vdev, bo, "free");
 
-	if (bo->ctx)
-		ivpu_bo_free_vpu_addr(bo);
+	ivpu_bo_unbind(bo);
+	vunmap(bo->kvaddr);
 
 	if (bo->sgt)
 		ivpu_bo_unmap_and_free_pages(bo);
@@ -435,10 +461,6 @@ static void ivpu_bo_free(struct drm_gem_object *obj)
 static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 {
 	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
-	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
-		 bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
 
 	if (obj->import_attach) {
 		/* Drop the reference drm_gem_mmap_obj() acquired.*/
@@ -553,9 +575,6 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	drm_gem_object_put(&bo->base);
 
-	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
-
 	return ret;
 }
 
@@ -609,9 +628,6 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		goto err_put;
 	}
 
-	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 bo->vpu_addr, ivpu_bo_size(bo), flags);
-
 	return bo;
 
 err_put:
@@ -708,41 +724,30 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 {
 	unsigned long dma_refcount = 0;
 
+	mutex_lock(&bo->lock);
+
 	if (bo->base.dma_buf && bo->base.dma_buf->file)
 		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
 
 	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
-		   bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
+		   bo->ctx ? bo->ctx->id : -1, bo->handle, bo->vpu_addr, bo->base.size,
 		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+
+	mutex_unlock(&bo->lock);
 }
 
 void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
 {
 	struct ivpu_device *vdev = to_ivpu_device(dev);
-	struct ivpu_file_priv *file_priv;
-	unsigned long ctx_id;
 	struct ivpu_bo *bo;
 
 	drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
 		   "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
 
-	mutex_lock(&vdev->gctx.lock);
-	list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+	mutex_lock(&vdev->bo_list_lock);
+	list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
 		ivpu_bo_print_info(bo, p);
-	mutex_unlock(&vdev->gctx.lock);
-
-	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
-		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
-		if (!file_priv)
-			continue;
-
-		mutex_lock(&file_priv->ctx.lock);
-		list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
-			ivpu_bo_print_info(bo, p);
-		mutex_unlock(&file_priv->ctx.lock);
-
-		ivpu_file_priv_put(&file_priv);
-	}
+	mutex_unlock(&vdev->bo_list_lock);
 }
 
 void ivpu_bo_list_print(struct drm_device *dev)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index 9bbbcd7792658..214d8fd143106 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -17,10 +17,10 @@ struct ivpu_bo {
 	const struct ivpu_bo_ops *ops;
 
 	struct ivpu_mmu_context *ctx;
-	struct list_head ctx_node;
+	struct list_head bo_list_node;
 	struct drm_mm_node mm_node;
 
-	struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
+	struct mutex lock; /* Protects: pages, sgt, ctx, mmu_mapped, vpu_addr */
 	struct sg_table *sgt;
 	struct page **pages;
 	bool mmu_mapped;
@@ -48,7 +48,7 @@ struct ivpu_bo_ops {
 };
 
 int ivpu_bo_pin(struct ivpu_bo *bo);
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
 void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
 void ivpu_bo_list_print(struct drm_device *dev);
 
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index b46f3743c0e7d..2228c44b115fa 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -775,9 +775,12 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
 
 	ivpu_dbg(vdev, MMU, "Init..\n");
 
-	drmm_mutex_init(&vdev->drm, &mmu->lock);
 	ivpu_mmu_config_check(vdev);
 
+	ret = drmm_mutex_init(&vdev->drm, &mmu->lock);
+	if (ret)
+		return ret;
+
 	ret = ivpu_mmu_structs_alloc(vdev);
 	if (ret)
 		return ret;
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index f6de6a4ea6088..122c90fecf665 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -335,6 +335,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 	u64 prot;
 	u64 i;
 
+	if (drm_WARN_ON(&vdev->drm, !ctx))
+		return -EINVAL;
+
 	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
 		return -EINVAL;
 	/*
@@ -382,8 +385,8 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
 	int ret;
 	u64 i;
 
-	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
-		ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
+	if (drm_WARN_ON(&vdev->drm, !ctx))
+		return;
 
 	mutex_lock(&ctx->lock);
 
@@ -404,30 +407,34 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
 }
 
 int
-ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
-				    const struct ivpu_addr_range *range,
-				    u64 size, struct drm_mm_node *node)
+ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+			     u64 size, struct drm_mm_node *node)
 {
-	lockdep_assert_held(&ctx->lock);
+	int ret;
 
 	WARN_ON(!range);
 
+	mutex_lock(&ctx->lock);
 	if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
-		if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
-						 range->start, range->end, DRM_MM_INSERT_BEST))
-			return 0;
+		ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
+						  range->start, range->end, DRM_MM_INSERT_BEST);
+		if (!ret)
+			goto unlock;
 	}
 
-	return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
-					   range->start, range->end, DRM_MM_INSERT_BEST);
+	ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
+					  range->start, range->end, DRM_MM_INSERT_BEST);
+unlock:
+	mutex_unlock(&ctx->lock);
+	return ret;
 }
 
 void
-ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
+ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
 {
-	lockdep_assert_held(&ctx->lock);
-
+	mutex_lock(&ctx->lock);
 	drm_mm_remove_node(node);
+	mutex_unlock(&ctx->lock);
 }
 
 static int
@@ -437,7 +444,6 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
 	int ret;
 
 	mutex_init(&ctx->lock);
-	INIT_LIST_HEAD(&ctx->bo_list);
 
 	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
 	if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index f15d8c630d8ad..535db3a1fc74f 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -23,10 +23,9 @@ struct ivpu_mmu_pgtable {
 };
 
 struct ivpu_mmu_context {
-	struct mutex lock; /* protects: mm, pgtable, bo_list */
+	struct mutex lock; /* Protects: mm, pgtable */
 	struct drm_mm mm;
 	struct ivpu_mmu_pgtable pgtable;
-	struct list_head bo_list;
 	u32 id;
 };
 
@@ -39,11 +38,9 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context
 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
 
-int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
-					const struct ivpu_addr_range *range,
-					u64 size, struct drm_mm_node *node);
-void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx,
-					 struct drm_mm_node *node);
+int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+				 u64 size, struct drm_mm_node *node);
+void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
 
 int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 			     u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
-- 
GitLab


From 48d45fac3940347becd290b96b2fc6d5ad8171f7 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Tue, 31 Oct 2023 08:31:55 +0100
Subject: [PATCH 0186/2340] accel/ivpu: Remove support for uncached buffers

Usages of DRM_IVPU_BO_UNCACHED should be replaced by DRM_IVPU_BO_WC.
There is no functional benefit from DRM_IVPU_BO_UNCACHED if these
buffers are never mapped to host VM.

This allows to cut the buffer handling code in the kernel driver
by half.

Usage of DRM_IVPU_BO_UNCACHED buffers was removed from user-space
driver and will not be part of first UMD release.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073156.1301669-4-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_fw.c  | 2 +-
 drivers/accel/ivpu/ivpu_gem.c | 3 ---
 include/uapi/drm/ivpu_accel.h | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 3fd74cd4205f9..3d0c18d68f6c6 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -286,7 +286,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
 
 	if (fw->shave_nn_size) {
 		fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
-							  fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
+							  fw->shave_nn_size, DRM_IVPU_BO_WC);
 		if (!fw->mem_shave_nn) {
 			ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
 			ret = -ENOMEM;
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 915c53d7bb97b..2a91eb1e36278 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -89,8 +89,6 @@ static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
 
 	if (bo->flags & DRM_IVPU_BO_WC)
 		set_pages_array_wc(pages, npages);
-	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		set_pages_array_uc(pages, npages);
 
 	bo->pages = pages;
 	return 0;
@@ -366,7 +364,6 @@ ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, const struct ivpu_b
 
 	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
 	case DRM_IVPU_BO_CACHED:
-	case DRM_IVPU_BO_UNCACHED:
 	case DRM_IVPU_BO_WC:
 		break;
 	default:
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 262db0c3beeea..de1944e42c655 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -196,7 +196,7 @@ struct drm_ivpu_bo_create {
 	 *
 	 * %DRM_IVPU_BO_UNCACHED:
 	 *
-	 * Allocated BO will not be cached on host side nor snooped on the VPU side.
+	 * Not supported. Use DRM_IVPU_BO_WC instead.
 	 *
 	 * %DRM_IVPU_BO_WC:
 	 *
-- 
GitLab


From 8d88e4cdce4f5c56de55174a4d32ea9c06f7fa66 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Tue, 31 Oct 2023 08:31:56 +0100
Subject: [PATCH 0187/2340] accel/ivpu: Use GEM shmem helper for all buffers

Use struct drm_gem_shmem_object as a base for struct ivpu_bo.
This cuts by 50% the buffer management code.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231031073156.1301669-5-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/Kconfig    |   2 +-
 drivers/accel/ivpu/ivpu_drv.c |   6 +-
 drivers/accel/ivpu/ivpu_gem.c | 518 ++++++++--------------------------
 drivers/accel/ivpu/ivpu_gem.h |  70 +----
 drivers/accel/ivpu/ivpu_job.c |   8 +-
 5 files changed, 144 insertions(+), 460 deletions(-)

diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 82749e0da524c..682c532452863 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -6,7 +6,7 @@ config DRM_ACCEL_IVPU
 	depends on X86_64 && !UML
 	depends on PCI && PCI_MSI
 	select FW_LOADER
-	select SHMEM
+	select DRM_GEM_SHMEM_HELPER
 	select GENERIC_ALLOCATOR
 	help
 	  Choose this option if you have a system with an 14th generation
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index cc6a4dca2aaa4..51fa60b6254ce 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -360,7 +360,7 @@ int ivpu_boot(struct ivpu_device *vdev)
 	int ret;
 
 	/* Update boot params located at first 4KB of FW memory */
-	ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
+	ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
 
 	ret = ivpu_hw_boot_fw(vdev);
 	if (ret) {
@@ -409,7 +409,9 @@ static const struct drm_driver driver = {
 
 	.open = ivpu_open,
 	.postclose = ivpu_postclose,
-	.gem_prime_import = ivpu_gem_prime_import,
+
+	.gem_create_object = ivpu_gem_create_object,
+	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
 
 	.ioctls = ivpu_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 2a91eb1e36278..1dda4f38ea25c 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -20,224 +20,18 @@
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
 
-MODULE_IMPORT_NS(DMA_BUF);
-
 static const struct drm_gem_object_funcs ivpu_gem_funcs;
 
-static struct lock_class_key prime_bo_lock_class_key;
-
 static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
 {
 	if (bo->ctx)
 		ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n",
-			 action, bo->base.size, (bool)bo->pages, (bool)bo->sgt, bo->handle,
-			 bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
+			 action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
+			 bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
 	else
 		ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n",
-			 action, bo->base.size, (bool)bo->pages, (bool)bo->sgt, bo->handle);
-}
-
-static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
-{
-	/* Pages are managed by the underlying dma-buf */
-	return 0;
-}
-
-static void prime_free_pages_locked(struct ivpu_bo *bo)
-{
-	/* Pages are managed by the underlying dma-buf */
-}
-
-static int prime_map_pages_locked(struct ivpu_bo *bo)
-{
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	struct sg_table *sgt;
-
-	sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
-	if (IS_ERR(sgt)) {
-		ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
-		return PTR_ERR(sgt);
-	}
-
-	bo->sgt = sgt;
-	return 0;
-}
-
-static void prime_unmap_pages_locked(struct ivpu_bo *bo)
-{
-	dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
-	bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops prime_ops = {
-	.type = IVPU_BO_TYPE_PRIME,
-	.name = "prime",
-	.alloc_pages = prime_alloc_pages_locked,
-	.free_pages = prime_free_pages_locked,
-	.map_pages = prime_map_pages_locked,
-	.unmap_pages = prime_unmap_pages_locked,
-};
-
-static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
-{
-	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-	struct page **pages;
-
-	pages = drm_gem_get_pages(&bo->base);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
-	if (bo->flags & DRM_IVPU_BO_WC)
-		set_pages_array_wc(pages, npages);
-
-	bo->pages = pages;
-	return 0;
-}
-
-static void shmem_free_pages_locked(struct ivpu_bo *bo)
-{
-	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	drm_gem_put_pages(&bo->base, bo->pages, true, false);
-	bo->pages = NULL;
-}
-
-static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
-{
-	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	struct sg_table *sgt;
-	int ret;
-
-	sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
-	if (IS_ERR(sgt)) {
-		ivpu_err(vdev, "Failed to allocate sgtable\n");
-		return PTR_ERR(sgt);
-	}
-
-	ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
-	if (ret) {
-		ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
-		goto err_free_sgt;
-	}
-
-	bo->sgt = sgt;
-	return 0;
-
-err_free_sgt:
-	kfree(sgt);
-	return ret;
-}
-
-static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
-{
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
-	dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
-	sg_free_table(bo->sgt);
-	kfree(bo->sgt);
-	bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops shmem_ops = {
-	.type = IVPU_BO_TYPE_SHMEM,
-	.name = "shmem",
-	.alloc_pages = shmem_alloc_pages_locked,
-	.free_pages = shmem_free_pages_locked,
-	.map_pages = ivpu_bo_map_pages_locked,
-	.unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
-{
-	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-	struct page **pages;
-	int ret;
-
-	pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	for (i = 0; i < npages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
-		if (!pages[i]) {
-			ret = -ENOMEM;
-			goto err_free_pages;
-		}
-		cond_resched();
-	}
-
-	bo->pages = pages;
-	return 0;
-
-err_free_pages:
-	while (i--)
-		put_page(pages[i]);
-	kvfree(pages);
-	return ret;
-}
-
-static void internal_free_pages_locked(struct ivpu_bo *bo)
-{
-	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-
-	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	for (i = 0; i < npages; i++)
-		put_page(bo->pages[i]);
-
-	kvfree(bo->pages);
-	bo->pages = NULL;
-}
-
-static const struct ivpu_bo_ops internal_ops = {
-	.type = IVPU_BO_TYPE_INTERNAL,
-	.name = "internal",
-	.alloc_pages = internal_alloc_pages_locked,
-	.free_pages = internal_free_pages_locked,
-	.map_pages = ivpu_bo_map_pages_locked,
-	.unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
-{
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	int ret;
-
-	lockdep_assert_held(&bo->lock);
-	drm_WARN_ON(&vdev->drm, bo->sgt);
-
-	ret = bo->ops->alloc_pages(bo);
-	if (ret) {
-		ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
-		return ret;
-	}
-
-	ret = bo->ops->map_pages(bo);
-	if (ret) {
-		ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
-		goto err_free_pages;
-	}
-	return ret;
-
-err_free_pages:
-	bo->ops->free_pages(bo);
-	return ret;
-}
-
-static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
-{
-	mutex_lock(&bo->lock);
-
-	WARN_ON(!bo->sgt);
-	bo->ops->unmap_pages(bo);
-	WARN_ON(bo->sgt);
-	bo->ops->free_pages(bo);
-	WARN_ON(bo->pages);
-
-	mutex_unlock(&bo->lock);
+			 action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
+			 bo->handle);
 }
 
 /*
@@ -254,20 +48,24 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
 
 	mutex_lock(&bo->lock);
 
+	ivpu_dbg_bo(vdev, bo, "pin");
+
 	if (!bo->ctx) {
 		ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle);
 		ret = -EINVAL;
 		goto unlock;
 	}
 
-	if (!bo->sgt) {
-		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
-		if (ret)
+	if (!bo->mmu_mapped) {
+		struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+
+		if (IS_ERR(sgt)) {
+			ret = PTR_ERR(sgt);
+			ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
 			goto unlock;
-	}
+		}
 
-	if (!bo->mmu_mapped) {
-		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
 					       ivpu_bo_is_snooped(bo));
 		if (ret) {
 			ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
@@ -291,13 +89,14 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 
 	mutex_lock(&bo->lock);
 
-	ret = ivpu_mmu_context_insert_node(ctx, range, bo->base.size, &bo->mm_node);
+	ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
 	if (!ret) {
 		bo->ctx = ctx;
 		bo->vpu_addr = bo->mm_node.start;
 	} else {
 		ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
 	}
+
 	ivpu_dbg_bo(vdev, bo, "alloc");
 
 	mutex_unlock(&bo->lock);
@@ -314,11 +113,12 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
 	ivpu_dbg_bo(vdev, bo, "unbind");
 
 	/* TODO: dma_unmap */
+
 	if (bo->mmu_mapped) {
 		drm_WARN_ON(&vdev->drm, !bo->ctx);
 		drm_WARN_ON(&vdev->drm, !bo->vpu_addr);
-		drm_WARN_ON(&vdev->drm, !bo->sgt);
-		ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt);
+		drm_WARN_ON(&vdev->drm, !bo->base.sgt);
+		ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt);
 		bo->mmu_mapped = false;
 	}
 
@@ -353,15 +153,32 @@ void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m
 	mutex_unlock(&vdev->bo_list_lock);
 }
 
-static struct ivpu_bo *
-ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, const struct ivpu_bo_ops *ops)
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size)
 {
 	struct ivpu_bo *bo;
-	int ret = 0;
 
-	if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
+	if (size == 0 || !PAGE_ALIGNED(size))
 		return ERR_PTR(-EINVAL);
 
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	bo->base.base.funcs = &ivpu_gem_funcs;
+	bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
+
+	INIT_LIST_HEAD(&bo->bo_list_node);
+	mutex_init(&bo->lock);
+
+	return &bo->base.base;
+}
+
+static struct ivpu_bo *
+ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
+{
+	struct drm_gem_shmem_object *shmem;
+	struct ivpu_bo *bo;
+
 	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
 	case DRM_IVPU_BO_CACHED:
 	case DRM_IVPU_BO_WC:
@@ -370,44 +187,22 @@ ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags, const struct ivpu_b
 		return ERR_PTR(-EINVAL);
 	}
 
-	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-	if (!bo)
-		return ERR_PTR(-ENOMEM);
+	shmem = drm_gem_shmem_create(&vdev->drm, size);
+	if (IS_ERR(shmem))
+		return ERR_CAST(shmem);
 
-	mutex_init(&bo->lock);
-	bo->base.funcs = &ivpu_gem_funcs;
+	bo = to_ivpu_bo(&shmem->base);
+	bo->base.map_wc = flags & DRM_IVPU_BO_WC;
 	bo->flags = flags;
-	bo->ops = ops;
-
-	if (ops->type == IVPU_BO_TYPE_SHMEM)
-		ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
-	else
-		drm_gem_private_object_init(&vdev->drm, &bo->base, size);
-
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize drm object\n");
-		goto err_free;
-	}
-
-	if (flags & DRM_IVPU_BO_MAPPABLE) {
-		ret = drm_gem_create_mmap_offset(&bo->base);
-		if (ret) {
-			ivpu_err(vdev, "Failed to allocate mmap offset\n");
-			goto err_release;
-		}
-	}
 
 	mutex_lock(&vdev->bo_list_lock);
 	list_add_tail(&bo->bo_list_node, &vdev->bo_list);
 	mutex_unlock(&vdev->bo_list_lock);
 
-	return bo;
+	ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 bo->vpu_addr, bo->base.base.size, flags);
 
-err_release:
-	drm_gem_object_release(&bo->base);
-err_free:
-	kfree(bo);
-	return ERR_PTR(ret);
+	return bo;
 }
 
 static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
@@ -429,8 +224,8 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
 
 static void ivpu_bo_free(struct drm_gem_object *obj)
 {
+	struct ivpu_device *vdev = to_ivpu_device(obj->dev);
 	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 
 	mutex_lock(&vdev->bo_list_lock);
 	list_del(&bo->bo_list_node);
@@ -441,108 +236,64 @@ static void ivpu_bo_free(struct drm_gem_object *obj)
 	ivpu_dbg_bo(vdev, bo, "free");
 
 	ivpu_bo_unbind(bo);
-	vunmap(bo->kvaddr);
-
-	if (bo->sgt)
-		ivpu_bo_unmap_and_free_pages(bo);
-
-	if (bo->base.import_attach)
-		drm_prime_gem_destroy(&bo->base, bo->sgt);
-
-	drm_gem_object_release(&bo->base);
-
 	mutex_destroy(&bo->lock);
-	kfree(bo);
-}
-
-static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
-{
-	struct ivpu_bo *bo = to_ivpu_bo(obj);
 
-	if (obj->import_attach) {
-		/* Drop the reference drm_gem_mmap_obj() acquired.*/
-		drm_gem_object_put(obj);
-		vma->vm_private_data = NULL;
-		return dma_buf_mmap(obj->dma_buf, vma, 0);
-	}
-
-	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND);
-	vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
-
-	return 0;
+	drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+	drm_gem_shmem_free(&bo->base);
 }
 
-static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
-{
-	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	loff_t npages = obj->size >> PAGE_SHIFT;
-	int ret = 0;
-
-	mutex_lock(&bo->lock);
-
-	if (!bo->sgt)
-		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
-
-	mutex_unlock(&bo->lock);
-
-	if (ret)
-		return ERR_PTR(ret);
-
-	return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
-}
+static const struct dma_buf_ops ivpu_bo_dmabuf_ops =  {
+	.cache_sgt_mapping = true,
+	.attach = drm_gem_map_attach,
+	.detach = drm_gem_map_detach,
+	.map_dma_buf = drm_gem_map_dma_buf,
+	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
 
-static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
+static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags)
 {
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_gem_object *obj = vma->vm_private_data;
-	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	loff_t npages = obj->size >> PAGE_SHIFT;
-	pgoff_t page_offset;
-	struct page *page;
-	vm_fault_t ret;
-	int err;
+	struct drm_device *dev = obj->dev;
+	struct dma_buf_export_info exp_info = {
+		.exp_name = KBUILD_MODNAME,
+		.owner = dev->driver->fops->owner,
+		.ops = &ivpu_bo_dmabuf_ops,
+		.size = obj->size,
+		.flags = flags,
+		.priv = obj,
+		.resv = obj->resv,
+	};
+	void *sgt;
 
-	mutex_lock(&bo->lock);
+	/*
+	 * Make sure that pages are allocated and dma-mapped before exporting the bo.
+	 * DMA-mapping is required if the bo will be imported to the same device.
+	 */
+	sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj));
+	if (IS_ERR(sgt))
+		return sgt;
 
-	if (!bo->sgt) {
-		err = ivpu_bo_alloc_and_map_pages_locked(bo);
-		if (err) {
-			ret = vmf_error(err);
-			goto unlock;
-		}
-	}
-
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-	if (page_offset >= npages) {
-		ret = VM_FAULT_SIGBUS;
-	} else {
-		page = bo->pages[page_offset];
-		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
-	}
-
-unlock:
-	mutex_unlock(&bo->lock);
-
-	return ret;
+	return drm_gem_dmabuf_export(dev, &exp_info);
 }
 
-static const struct vm_operations_struct ivpu_vm_ops = {
-	.fault = ivpu_vm_fault,
-	.open = drm_gem_vm_open,
-	.close = drm_gem_vm_close,
-};
-
 static const struct drm_gem_object_funcs ivpu_gem_funcs = {
 	.free = ivpu_bo_free,
 	.open = ivpu_bo_open,
-	.mmap = ivpu_bo_mmap,
-	.vm_ops = &ivpu_vm_ops,
-	.get_sg_table = ivpu_bo_get_sg_table,
+	.export = ivpu_bo_export,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = drm_gem_shmem_object_mmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
 };
 
-int
-ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
 	struct ivpu_device *vdev = file_priv->vdev;
@@ -557,20 +308,20 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (size == 0)
 		return -EINVAL;
 
-	bo = ivpu_bo_alloc(vdev, size, args->flags, &shmem_ops);
+	bo = ivpu_bo_create(vdev, size, args->flags);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
 			 bo, file_priv->ctx.id, args->size, args->flags);
 		return PTR_ERR(bo);
 	}
 
-	ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+	ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle);
 	if (!ret) {
 		args->vpu_addr = bo->vpu_addr;
 		args->handle = bo->handle;
 	}
 
-	drm_gem_object_put(&bo->base);
+	drm_gem_object_put(&bo->base.base);
 
 	return ret;
 }
@@ -580,8 +331,8 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 {
 	const struct ivpu_addr_range *range;
 	struct ivpu_addr_range fixed_range;
+	struct iosys_map map;
 	struct ivpu_bo *bo;
-	pgprot_t prot;
 	int ret;
 
 	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
@@ -595,7 +346,7 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		range = &vdev->hw->ranges.global;
 	}
 
-	bo = ivpu_bo_alloc(vdev, size, flags, &internal_ops);
+	bo = ivpu_bo_create(vdev, size, flags);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
 			 bo, vpu_addr, size, flags);
@@ -610,61 +361,23 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 	if (ret)
 		goto err_put;
 
-	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	if (bo->flags & DRM_IVPU_BO_WC)
-		set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
-	bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
-	if (!bo->kvaddr) {
-		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+	ret = drm_gem_shmem_vmap(&bo->base, &map);
+	if (ret)
 		goto err_put;
-	}
 
 	return bo;
 
 err_put:
-	drm_gem_object_put(&bo->base);
+	drm_gem_object_put(&bo->base.base);
 	return NULL;
 }
 
 void ivpu_bo_free_internal(struct ivpu_bo *bo)
 {
-	drm_gem_object_put(&bo->base);
-}
-
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
-{
-	struct ivpu_device *vdev = to_ivpu_device(dev);
-	struct dma_buf_attachment *attach;
-	struct ivpu_bo *bo;
-
-	attach = dma_buf_attach(buf, dev->dev);
-	if (IS_ERR(attach))
-		return ERR_CAST(attach);
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
 
-	get_dma_buf(buf);
-
-	bo = ivpu_bo_alloc(vdev, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops);
-	if (IS_ERR(bo)) {
-		ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
-		goto err_detach;
-	}
-
-	lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
-
-	bo->base.import_attach = attach;
-
-	return &bo->base;
-
-err_detach:
-	dma_buf_detach(buf, attach);
-	dma_buf_put(buf);
-	return ERR_CAST(bo);
+	drm_gem_shmem_vunmap(&bo->base, &map);
+	drm_gem_object_put(&bo->base.base);
 }
 
 int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -723,12 +436,23 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 
 	mutex_lock(&bo->lock);
 
-	if (bo->base.dma_buf && bo->base.dma_buf->file)
-		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+	if (bo->base.base.dma_buf && bo->base.base.dma_buf->file)
+		dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count);
+
+	drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu",
+		   bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size,
+		   bo->flags, kref_read(&bo->base.base.refcount), dma_refcount);
+
+	if (bo->base.base.import_attach)
+		drm_printf(p, " imported");
+
+	if (bo->base.pages)
+		drm_printf(p, " has_pages");
+
+	if (bo->mmu_mapped)
+		drm_printf(p, " mmu_mapped");
 
-	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
-		   bo->ctx ? bo->ctx->id : -1, bo->handle, bo->vpu_addr, bo->base.size,
-		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+	drm_printf(p, "\n");
 
 	mutex_unlock(&bo->lock);
 }
@@ -738,8 +462,8 @@ void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
 	struct ivpu_device *vdev = to_ivpu_device(dev);
 	struct ivpu_bo *bo;
 
-	drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
-		   "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+	drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n",
+		   "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs");
 
 	mutex_lock(&vdev->bo_list_lock);
 	list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index 214d8fd143106..d75cad0d3c742 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -6,83 +6,52 @@
 #define __IVPU_GEM_H__
 
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_mm.h>
 
-struct dma_buf;
-struct ivpu_bo_ops;
 struct ivpu_file_priv;
 
 struct ivpu_bo {
-	struct drm_gem_object base;
-	const struct ivpu_bo_ops *ops;
-
+	struct drm_gem_shmem_object base;
 	struct ivpu_mmu_context *ctx;
 	struct list_head bo_list_node;
 	struct drm_mm_node mm_node;
 
-	struct mutex lock; /* Protects: pages, sgt, ctx, mmu_mapped, vpu_addr */
-	struct sg_table *sgt;
-	struct page **pages;
-	bool mmu_mapped;
-
-	void *kvaddr;
+	struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
 	u64 vpu_addr;
 	u32 handle;
 	u32 flags;
-	u32 job_status;
-};
-
-enum ivpu_bo_type {
-	IVPU_BO_TYPE_SHMEM = 1,
-	IVPU_BO_TYPE_INTERNAL,
-	IVPU_BO_TYPE_PRIME,
-};
-
-struct ivpu_bo_ops {
-	enum ivpu_bo_type type;
-	const char *name;
-	int (*alloc_pages)(struct ivpu_bo *bo);
-	void (*free_pages)(struct ivpu_bo *bo);
-	int (*map_pages)(struct ivpu_bo *bo);
-	void (*unmap_pages)(struct ivpu_bo *bo);
+	u32 job_status; /* Valid only for command buffer */
+	bool mmu_mapped;
 };
 
 int ivpu_bo_pin(struct ivpu_bo *bo);
 void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
-void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
-void ivpu_bo_list_print(struct drm_device *dev);
 
-struct ivpu_bo *
-ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
+struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
 void ivpu_bo_free_internal(struct ivpu_bo *bo);
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
-void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
 
 int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
 static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
 {
-	return container_of(obj, struct ivpu_bo, base);
+	return container_of(obj, struct ivpu_bo, base.base);
 }
 
 static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo)
 {
-	return bo->kvaddr;
+	return bo->base.vaddr;
 }
 
 static inline size_t ivpu_bo_size(struct ivpu_bo *bo)
 {
-	return bo->base.size;
-}
-
-static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
-{
-	if (offset > ivpu_bo_size(bo) || !bo->pages)
-		return NULL;
-
-	return bo->pages[offset / PAGE_SIZE];
+	return bo->base.base.size;
 }
 
 static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
@@ -95,20 +64,9 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
 	return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
 }
 
-static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
-{
-	if (bo->flags & DRM_IVPU_BO_WC)
-		return pgprot_writecombine(prot);
-
-	if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		return pgprot_noncached(prot);
-
-	return prot;
-}
-
 static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
 {
-	return to_ivpu_device(bo->base.dev);
+	return to_ivpu_device(bo->base.base.dev);
 }
 
 static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 15a408fad4945..02acd8dba02aa 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -266,7 +266,7 @@ static void job_release(struct kref *ref)
 
 	for (i = 0; i < job->bo_count; i++)
 		if (job->bos[i])
-			drm_gem_object_put(&job->bos[i]->base);
+			drm_gem_object_put(&job->bos[i]->base.base);
 
 	dma_fence_put(job->done_fence);
 	ivpu_file_priv_put(&job->file_priv);
@@ -450,7 +450,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 	}
 
 	bo = job->bos[CMD_BUF_IDX];
-	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+	if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
 		ivpu_warn(vdev, "Buffer is already in use\n");
 		return -EBUSY;
 	}
@@ -470,7 +470,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 	}
 
 	for (i = 0; i < buf_count; i++) {
-		ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+		ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
 		if (ret) {
 			ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
 			goto unlock_reservations;
@@ -479,7 +479,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 
 	for (i = 0; i < buf_count; i++) {
 		usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP;
-		dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage);
+		dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage);
 	}
 
 unlock_reservations:
-- 
GitLab


From d08361e1f66381ba615852cb6155f028a52a0fa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Sep 2023 15:25:39 +0300
Subject: [PATCH 0188/2340] drm/i915: Drop redundant !modeset check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 7de5b6b54630 ("drm/i915: Don't flag both full
modeset and fastset at the same time")
intel_crtc_needs_fastset() and intel_crtc_needs_modeset() have
been mutually exclusive. Drop the redundant check.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230907122541.32261-2-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index bad43626e2047..4bce36e14da69 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6831,7 +6831,7 @@ static void intel_update_crtc(struct intel_atomic_state *state,
 	 * valid pipe configuration from the BIOS we need to take care
 	 * of enabling them on the CRTC's first fastset.
 	 */
-	if (intel_crtc_needs_fastset(new_crtc_state) && !modeset &&
+	if (intel_crtc_needs_fastset(new_crtc_state) &&
 	    old_crtc_state->inherited)
 		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
 }
-- 
GitLab


From c39c93578106f035218078c300db6361cf6a326c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Sep 2023 15:25:40 +0300
Subject: [PATCH 0189/2340] drm/i915: Split intel_update_crtc() into two parts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split intel_update_crtc() into two parts such that the first
part performs all the non-vblank evasion preparatory stuff,
and the second part just does the vblank evasion stuff.

For now we just call these back to back so that there is
no funcitonal change.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230907122541.32261-3-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 4bce36e14da69..bedd338ee9be1 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6760,8 +6760,8 @@ static void intel_enable_crtc(struct intel_atomic_state *state,
 	intel_crtc_enable_pipe_crc(crtc);
 }
 
-static void intel_update_crtc(struct intel_atomic_state *state,
-			      struct intel_crtc *crtc)
+static void intel_pre_update_crtc(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc)
 {
 	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	const struct intel_crtc_state *old_crtc_state =
@@ -6803,6 +6803,15 @@ static void intel_update_crtc(struct intel_atomic_state *state,
 		intel_color_commit_noarm(new_crtc_state);
 
 	intel_crtc_planes_update_noarm(state, crtc);
+}
+
+static void intel_update_crtc(struct intel_atomic_state *state,
+			      struct intel_crtc *crtc)
+{
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
 	/* Perform vblank evasion around commit operation */
 	intel_pipe_update_start(state, crtc);
@@ -6926,6 +6935,7 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state)
 			continue;
 
 		intel_enable_crtc(state, crtc);
+		intel_pre_update_crtc(state, crtc);
 		intel_update_crtc(state, crtc);
 	}
 }
@@ -6978,6 +6988,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 			entries[pipe] = new_crtc_state->wm.skl.ddb;
 			update_pipes &= ~BIT(pipe);
 
+			intel_pre_update_crtc(state, crtc);
 			intel_update_crtc(state, crtc);
 
 			/*
@@ -7045,6 +7056,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 		entries[pipe] = new_crtc_state->wm.skl.ddb;
 		update_pipes &= ~BIT(pipe);
 
+		intel_pre_update_crtc(state, crtc);
 		intel_update_crtc(state, crtc);
 	}
 
-- 
GitLab


From c610e841f19d57233062868f2408349e9ecade91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Sep 2023 15:25:41 +0300
Subject: [PATCH 0190/2340] drm/i915: Do plane/etc. updates more atomically
 across pipes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Perform all the intel_pre_update_crtc() stuff for all pipes first,
and only then do the intel_update_crtc() vblank evasion stuff for
every pipe back to back. This should make it more likely that
the plane updates from multiple pipes happen on the same frame
(assuming the pipes are running in sync, eg. due to bigjoiner
or port sync).

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230907122541.32261-4-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 26 ++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index bedd338ee9be1..3effafcbb411a 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6936,6 +6936,12 @@ static void intel_commit_modeset_enables(struct intel_atomic_state *state)
 
 		intel_enable_crtc(state, crtc);
 		intel_pre_update_crtc(state, crtc);
+	}
+
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (!new_crtc_state->hw.active)
+			continue;
+
 		intel_update_crtc(state, crtc);
 	}
 }
@@ -6973,6 +6979,15 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	 * So first lets enable all pipes that do not need a fullmodeset as
 	 * those don't have any external dependency.
 	 */
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
+
+		if ((update_pipes & BIT(pipe)) == 0)
+			continue;
+
+		intel_pre_update_crtc(state, crtc);
+	}
+
 	while (update_pipes) {
 		for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 						    new_crtc_state, i) {
@@ -6988,7 +7003,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 			entries[pipe] = new_crtc_state->wm.skl.ddb;
 			update_pipes &= ~BIT(pipe);
 
-			intel_pre_update_crtc(state, crtc);
 			intel_update_crtc(state, crtc);
 
 			/*
@@ -7044,6 +7058,15 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	/*
 	 * Finally we do the plane updates/etc. for all pipes that got enabled.
 	 */
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
+
+		if ((update_pipes & BIT(pipe)) == 0)
+			continue;
+
+		intel_pre_update_crtc(state, crtc);
+	}
+
 	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
 		enum pipe pipe = crtc->pipe;
 
@@ -7056,7 +7079,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 		entries[pipe] = new_crtc_state->wm.skl.ddb;
 		update_pipes &= ~BIT(pipe);
 
-		intel_pre_update_crtc(state, crtc);
 		intel_update_crtc(state, crtc);
 	}
 
-- 
GitLab


From 65c02404380fb328e4d1fe40318ac6de0e63327a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 12 Oct 2023 15:24:42 +0300
Subject: [PATCH 0191/2340] drm/i915/gvt: Clean up zero initializers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just use a simple {} to zero initialize arrays/structs instead
of the hodgepodge of stuff we are using currently.

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231012122442.15718-7-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +-
 drivers/gpu/drm/i915/gvt/fb_decoder.c | 6 +++---
 drivers/gpu/drm/i915/gvt/handlers.c   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 05f9348b7a9d8..d4a3f3e093b0b 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -3047,7 +3047,7 @@ put_obj:
 
 static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
-	u32 per_ctx_start[CACHELINE_DWORDS] = {0};
+	u32 per_ctx_start[CACHELINE_DWORDS] = {};
 	unsigned char *bb_start_sva;
 
 	if (!wa_ctx->per_ctx.valid)
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
index 835c3fde8a202..313efdabee572 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.c
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -56,7 +56,7 @@ static const struct pixel_format bdw_pixel_formats[] = {
 	{DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"},
 
 	/* non-supported format has bpp default to 0 */
-	{0, 0, NULL},
+	{}
 };
 
 static const struct pixel_format skl_pixel_formats[] = {
@@ -76,7 +76,7 @@ static const struct pixel_format skl_pixel_formats[] = {
 	{DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"},
 
 	/* non-supported format has bpp default to 0 */
-	{0, 0, NULL},
+	{}
 };
 
 static int bdw_format_to_drm(int format)
@@ -293,7 +293,7 @@ static const struct cursor_mode_format cursor_pixel_formats[] = {
 	{DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"},
 
 	/* non-supported format has bpp default to 0 */
-	{0, 0, 0, 0, NULL},
+	{}
 };
 
 static int cursor_mode_to_drm(int mode)
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 72addd8d380fd..90f6c1ece57d4 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -538,7 +538,7 @@ static u32 bxt_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, enum port port)
 	int refclk = vgpu->gvt->gt->i915->display.dpll.ref_clks.nssc;
 	enum dpio_phy phy = DPIO_PHY0;
 	enum dpio_channel ch = DPIO_CH0;
-	struct dpll clock = {0};
+	struct dpll clock = {};
 	u32 temp;
 
 	/* Port to PHY mapping is fixed, see bxt_ddi_phy_info{} */
-- 
GitLab


From bae9fca9684335478ff147413bd69c8d77b66cf9 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Tue, 7 Nov 2023 21:55:33 +0000
Subject: [PATCH 0192/2340] drm: i915: Adapt to -Walloc-size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 14 introduces a new -Walloc-size included in -Wextra which errors out
like:
```
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c: In function ‘eb_copy_relocations’:
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1681:24: error: allocation of insufficient size ‘1’ for type ‘struct drm_i915_gem_relocation_entry’ with size ‘32’ [-Werror=alloc-size]
 1681 |                 relocs = kvmalloc_array(size, 1, GFP_KERNEL);
      |                        ^

```

So, just swap the number of members and size arguments to match the prototype, as
we're initialising 1 element of size `size`. GCC then sees we're not
doing anything wrong.

Signed-off-by: Sam James <sam@gentoo.org>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107215538.1891359-1-sam@gentoo.org
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 683fd8d3151c3..45b9d9e34b8b8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1678,7 +1678,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
 		size = nreloc * sizeof(*relocs);
 
-		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+		relocs = kvmalloc_array(1, size, GFP_KERNEL);
 		if (!relocs) {
 			err = -ENOMEM;
 			goto err;
-- 
GitLab


From 078a5b498d6a3e9c2acb637427258eb6b3079923 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Wed, 25 Oct 2023 15:24:27 +0200
Subject: [PATCH 0193/2340] drm/tests: Remove slow tests

Both the drm_buddy and drm_mm tests have been converted from selftest to
kunit recently.

However, a significant portion of them are trying to exert some part of
their API over a huge number of iterations and with random variations of
their parameters.  They are thus more a way to discover new bugs than
actual unit tests.

This is fine in itself but leads to very slow runtime (up to 25s for
some drm_test_mm_reserve and drm_test_mm_insert on a Ryzen 7950x while
running the tests in qemu) which make them a poor fit for kunit.

Let's remove those tests from the drm_mm and drm_buddy test suites for
now, and if it's ever needed we can always create proper unit tests for
them later on.

This made the entire DRM tests execution time (as of v6.6-rc1) come from
65s to less than .5s on a Ryzen 7950x system when running under qemu,
and from 9 minutes to about 4s on a RaspberryPi4.

Acked-by: Daniel Vetter <daniel@ffwll.ch>
Suggested-by: Daniel Vetter <daniel@ffwll.ch>
Link: https://lore.kernel.org/r/20231025132428.723672-1-mripard@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/tests/drm_buddy_test.c |  465 ------
 drivers/gpu/drm/tests/drm_mm_test.c    | 2016 +-----------------------
 2 files changed, 56 insertions(+), 2425 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index 09ee6f6af896b..ea2af6bd9abeb 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -13,315 +13,11 @@
 
 #include "../lib/drm_random.h"
 
-#define TIMEOUT(name__)								\
-	unsigned long name__ = jiffies + MAX_SCHEDULE_TIMEOUT
-
-static unsigned int random_seed;
-
 static inline u64 get_size(int order, u64 chunk_size)
 {
 	return (1 << order) * chunk_size;
 }
 
-__printf(2, 3)
-static bool __timeout(unsigned long timeout, const char *fmt, ...)
-{
-	va_list va;
-
-	if (!signal_pending(current)) {
-		cond_resched();
-		if (time_before(jiffies, timeout))
-			return false;
-	}
-
-	if (fmt) {
-		va_start(va, fmt);
-		vprintk(fmt, va);
-		va_end(va);
-	}
-
-	return true;
-}
-
-static void __dump_block(struct kunit *test, struct drm_buddy *mm,
-			 struct drm_buddy_block *block, bool buddy)
-{
-	kunit_err(test, "block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%d buddy=%d\n",
-		  block->header, drm_buddy_block_state(block),
-			  drm_buddy_block_order(block), drm_buddy_block_offset(block),
-			  drm_buddy_block_size(mm, block), !block->parent, buddy);
-}
-
-static void dump_block(struct kunit *test, struct drm_buddy *mm,
-		       struct drm_buddy_block *block)
-{
-	struct drm_buddy_block *buddy;
-
-	__dump_block(test, mm, block, false);
-
-	buddy = drm_get_buddy(block);
-	if (buddy)
-		__dump_block(test, mm, buddy, true);
-}
-
-static int check_block(struct kunit *test, struct drm_buddy *mm,
-		       struct drm_buddy_block *block)
-{
-	struct drm_buddy_block *buddy;
-	unsigned int block_state;
-	u64 block_size;
-	u64 offset;
-	int err = 0;
-
-	block_state = drm_buddy_block_state(block);
-
-	if (block_state != DRM_BUDDY_ALLOCATED &&
-	    block_state != DRM_BUDDY_FREE && block_state != DRM_BUDDY_SPLIT) {
-		kunit_err(test, "block state mismatch\n");
-		err = -EINVAL;
-	}
-
-	block_size = drm_buddy_block_size(mm, block);
-	offset = drm_buddy_block_offset(block);
-
-	if (block_size < mm->chunk_size) {
-		kunit_err(test, "block size smaller than min size\n");
-		err = -EINVAL;
-	}
-
-	/* We can't use is_power_of_2() for a u64 on 32-bit systems. */
-	if (block_size & (block_size - 1)) {
-		kunit_err(test, "block size not power of two\n");
-		err = -EINVAL;
-	}
-
-	if (!IS_ALIGNED(block_size, mm->chunk_size)) {
-		kunit_err(test, "block size not aligned to min size\n");
-		err = -EINVAL;
-	}
-
-	if (!IS_ALIGNED(offset, mm->chunk_size)) {
-		kunit_err(test, "block offset not aligned to min size\n");
-		err = -EINVAL;
-	}
-
-	if (!IS_ALIGNED(offset, block_size)) {
-		kunit_err(test, "block offset not aligned to block size\n");
-		err = -EINVAL;
-	}
-
-	buddy = drm_get_buddy(block);
-
-	if (!buddy && block->parent) {
-		kunit_err(test, "buddy has gone fishing\n");
-		err = -EINVAL;
-	}
-
-	if (buddy) {
-		if (drm_buddy_block_offset(buddy) != (offset ^ block_size)) {
-			kunit_err(test, "buddy has wrong offset\n");
-			err = -EINVAL;
-		}
-
-		if (drm_buddy_block_size(mm, buddy) != block_size) {
-			kunit_err(test, "buddy size mismatch\n");
-			err = -EINVAL;
-		}
-
-		if (drm_buddy_block_state(buddy) == block_state &&
-		    block_state == DRM_BUDDY_FREE) {
-			kunit_err(test, "block and its buddy are free\n");
-			err = -EINVAL;
-		}
-	}
-
-	return err;
-}
-
-static int check_blocks(struct kunit *test, struct drm_buddy *mm,
-			struct list_head *blocks, u64 expected_size, bool is_contiguous)
-{
-	struct drm_buddy_block *block;
-	struct drm_buddy_block *prev;
-	u64 total;
-	int err = 0;
-
-	block = NULL;
-	prev = NULL;
-	total = 0;
-
-	list_for_each_entry(block, blocks, link) {
-		err = check_block(test, mm, block);
-
-		if (!drm_buddy_block_is_allocated(block)) {
-			kunit_err(test, "block not allocated\n");
-			err = -EINVAL;
-		}
-
-		if (is_contiguous && prev) {
-			u64 prev_block_size;
-			u64 prev_offset;
-			u64 offset;
-
-			prev_offset = drm_buddy_block_offset(prev);
-			prev_block_size = drm_buddy_block_size(mm, prev);
-			offset = drm_buddy_block_offset(block);
-
-			if (offset != (prev_offset + prev_block_size)) {
-				kunit_err(test, "block offset mismatch\n");
-				err = -EINVAL;
-			}
-		}
-
-		if (err)
-			break;
-
-		total += drm_buddy_block_size(mm, block);
-		prev = block;
-	}
-
-	if (!err) {
-		if (total != expected_size) {
-			kunit_err(test, "size mismatch, expected=%llx, found=%llx\n",
-				  expected_size, total);
-			err = -EINVAL;
-		}
-		return err;
-	}
-
-	if (prev) {
-		kunit_err(test, "prev block, dump:\n");
-		dump_block(test, mm, prev);
-	}
-
-	kunit_err(test, "bad block, dump:\n");
-	dump_block(test, mm, block);
-
-	return err;
-}
-
-static int check_mm(struct kunit *test, struct drm_buddy *mm)
-{
-	struct drm_buddy_block *root;
-	struct drm_buddy_block *prev;
-	unsigned int i;
-	u64 total;
-	int err = 0;
-
-	if (!mm->n_roots) {
-		kunit_err(test, "n_roots is zero\n");
-		return -EINVAL;
-	}
-
-	if (mm->n_roots != hweight64(mm->size)) {
-		kunit_err(test, "n_roots mismatch, n_roots=%u, expected=%lu\n",
-			  mm->n_roots, hweight64(mm->size));
-		return -EINVAL;
-	}
-
-	root = NULL;
-	prev = NULL;
-	total = 0;
-
-	for (i = 0; i < mm->n_roots; ++i) {
-		struct drm_buddy_block *block;
-		unsigned int order;
-
-		root = mm->roots[i];
-		if (!root) {
-			kunit_err(test, "root(%u) is NULL\n", i);
-			err = -EINVAL;
-			break;
-		}
-
-		err = check_block(test, mm, root);
-
-		if (!drm_buddy_block_is_free(root)) {
-			kunit_err(test, "root not free\n");
-			err = -EINVAL;
-		}
-
-		order = drm_buddy_block_order(root);
-
-		if (!i) {
-			if (order != mm->max_order) {
-				kunit_err(test, "max order root missing\n");
-				err = -EINVAL;
-			}
-		}
-
-		if (prev) {
-			u64 prev_block_size;
-			u64 prev_offset;
-			u64 offset;
-
-			prev_offset = drm_buddy_block_offset(prev);
-			prev_block_size = drm_buddy_block_size(mm, prev);
-			offset = drm_buddy_block_offset(root);
-
-			if (offset != (prev_offset + prev_block_size)) {
-				kunit_err(test, "root offset mismatch\n");
-				err = -EINVAL;
-			}
-		}
-
-		block = list_first_entry_or_null(&mm->free_list[order],
-						 struct drm_buddy_block, link);
-		if (block != root) {
-			kunit_err(test, "root mismatch at order=%u\n", order);
-			err = -EINVAL;
-		}
-
-		if (err)
-			break;
-
-		prev = root;
-		total += drm_buddy_block_size(mm, root);
-	}
-
-	if (!err) {
-		if (total != mm->size) {
-			kunit_err(test, "expected mm size=%llx, found=%llx\n",
-				  mm->size, total);
-			err = -EINVAL;
-		}
-		return err;
-	}
-
-	if (prev) {
-		kunit_err(test, "prev root(%u), dump:\n", i - 1);
-		dump_block(test, mm, prev);
-	}
-
-	if (root) {
-		kunit_err(test, "bad root(%u), dump:\n", i);
-		dump_block(test, mm, root);
-	}
-
-	return err;
-}
-
-static void mm_config(u64 *size, u64 *chunk_size)
-{
-	DRM_RND_STATE(prng, random_seed);
-	u32 s, ms;
-
-	/* Nothing fancy, just try to get an interesting bit pattern */
-
-	prandom_seed_state(&prng, random_seed);
-
-	/* Let size be a random number of pages up to 8 GB (2M pages) */
-	s = 1 + drm_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng);
-	/* Let the chunk size be a random power of 2 less than size */
-	ms = BIT(drm_prandom_u32_max_state(ilog2(s), &prng));
-	/* Round size down to the chunk size */
-	s &= -ms;
-
-	/* Convert from pages to bytes */
-	*chunk_size = (u64)ms << 12;
-	*size = (u64)s << 12;
-}
-
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
@@ -403,96 +99,6 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 	drm_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_smoke(struct kunit *test)
-{
-	u64 mm_size, chunk_size, start = 0;
-	unsigned long flags = 0;
-	struct drm_buddy mm;
-	int *order;
-	int i;
-
-	DRM_RND_STATE(prng, random_seed);
-	TIMEOUT(end_time);
-
-	mm_config(&mm_size, &chunk_size);
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, chunk_size),
-			       "buddy_init failed\n");
-
-	order = drm_random_order(mm.max_order + 1, &prng);
-	KUNIT_ASSERT_TRUE(test, order);
-
-	for (i = 0; i <= mm.max_order; ++i) {
-		struct drm_buddy_block *block;
-		int max_order = order[i];
-		bool timeout = false;
-		LIST_HEAD(blocks);
-		u64 total, size;
-		LIST_HEAD(tmp);
-		int order, err;
-
-		KUNIT_ASSERT_FALSE_MSG(test, check_mm(test, &mm),
-				       "pre-mm check failed, abort\n");
-
-		order = max_order;
-		total = 0;
-
-		do {
-retry:
-			size = get_size(order, chunk_size);
-			err = drm_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags);
-			if (err) {
-				if (err == -ENOMEM) {
-					KUNIT_FAIL(test, "buddy_alloc hit -ENOMEM with order=%d\n",
-						   order);
-				} else {
-					if (order--) {
-						err = 0;
-						goto retry;
-					}
-
-					KUNIT_FAIL(test, "buddy_alloc with order=%d failed\n",
-						   order);
-				}
-
-				break;
-			}
-
-			block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-			KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
-
-			list_move_tail(&block->link, &blocks);
-			KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), order,
-					    "buddy_alloc order mismatch\n");
-
-			total += drm_buddy_block_size(&mm, block);
-
-			if (__timeout(end_time, NULL)) {
-				timeout = true;
-				break;
-			}
-		} while (total < mm.size);
-
-		if (!err)
-			err = check_blocks(test, &mm, &blocks, total, false);
-
-		drm_buddy_free_list(&mm, &blocks);
-
-		if (!err) {
-			KUNIT_EXPECT_FALSE_MSG(test, check_mm(test, &mm),
-					       "post-mm check failed\n");
-		}
-
-		if (err || timeout)
-			break;
-
-		cond_resched();
-	}
-
-	kfree(order);
-	drm_buddy_fini(&mm);
-}
-
 static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 {
 	u64 mm_size, size, start = 0;
@@ -634,64 +240,6 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
 	drm_buddy_fini(&mm);
 }
 
-static void drm_test_buddy_alloc_range(struct kunit *test)
-{
-	unsigned long flags = DRM_BUDDY_RANGE_ALLOCATION;
-	u64 offset, size, rem, chunk_size, end;
-	unsigned long page_num;
-	struct drm_buddy mm;
-	LIST_HEAD(blocks);
-
-	mm_config(&size, &chunk_size);
-
-	KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, size, chunk_size),
-			       "buddy_init failed");
-
-	KUNIT_ASSERT_FALSE_MSG(test, check_mm(test, &mm),
-			       "pre-mm check failed, abort!");
-
-	rem = mm.size;
-	offset = 0;
-
-	for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) {
-		struct drm_buddy_block *block;
-		LIST_HEAD(tmp);
-
-		size = min(page_num * mm.chunk_size, rem);
-		end = offset + size;
-
-		KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, offset, end,
-								    size, mm.chunk_size,
-									&tmp, flags),
-				"alloc_range with offset=%llx, size=%llx failed\n", offset, size);
-
-		block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
-		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_range has no blocks\n");
-
-		KUNIT_ASSERT_EQ_MSG(test, drm_buddy_block_offset(block), offset,
-				    "alloc_range start offset mismatch, found=%llx, expected=%llx\n",
-							drm_buddy_block_offset(block), offset);
-
-		KUNIT_ASSERT_FALSE(test, check_blocks(test, &mm, &tmp, size, true));
-
-		list_splice_tail(&tmp, &blocks);
-
-		offset += size;
-
-		rem -= size;
-		if (!rem)
-			break;
-
-		cond_resched();
-	}
-
-	drm_buddy_free_list(&mm, &blocks);
-
-	KUNIT_EXPECT_FALSE_MSG(test, check_mm(test, &mm), "post-mm check failed\n");
-
-	drm_buddy_fini(&mm);
-}
-
 static void drm_test_buddy_alloc_limit(struct kunit *test)
 {
 	u64 size = U64_MAX, start = 0;
@@ -727,29 +275,16 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
 	drm_buddy_fini(&mm);
 }
 
-static int drm_buddy_suite_init(struct kunit_suite *suite)
-{
-	while (!random_seed)
-		random_seed = get_random_u32();
-
-	kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", random_seed);
-
-	return 0;
-}
-
 static struct kunit_case drm_buddy_tests[] = {
 	KUNIT_CASE(drm_test_buddy_alloc_limit),
-	KUNIT_CASE(drm_test_buddy_alloc_range),
 	KUNIT_CASE(drm_test_buddy_alloc_optimistic),
 	KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
-	KUNIT_CASE(drm_test_buddy_alloc_smoke),
 	KUNIT_CASE(drm_test_buddy_alloc_pathological),
 	{}
 };
 
 static struct kunit_suite drm_buddy_test_suite = {
 	.name = "drm_buddy",
-	.suite_init = drm_buddy_suite_init,
 	.test_cases = drm_buddy_tests,
 };
 
diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c
index 186b28dc70380..4e9247cf9977f 100644
--- a/drivers/gpu/drm/tests/drm_mm_test.c
+++ b/drivers/gpu/drm/tests/drm_mm_test.c
@@ -17,10 +17,6 @@
 
 #include "../lib/drm_random.h"
 
-static unsigned int random_seed;
-static unsigned int max_iterations = 8192;
-static unsigned int max_prime = 128;
-
 enum {
 	BEST,
 	BOTTOMUP,
@@ -37,10 +33,6 @@ static const struct insert_mode {
 	[TOPDOWN] = { "top-down", DRM_MM_INSERT_HIGH },
 	[EVICT] = { "evict", DRM_MM_INSERT_EVICT },
 	{}
-}, evict_modes[] = {
-	{ "bottom-up", DRM_MM_INSERT_LOW },
-	{ "top-down", DRM_MM_INSERT_HIGH },
-	{}
 };
 
 static bool assert_no_holes(struct kunit *test, const struct drm_mm *mm)
@@ -97,57 +89,6 @@ static bool assert_one_hole(struct kunit *test, const struct drm_mm *mm, u64 sta
 	return ok;
 }
 
-static bool assert_continuous(struct kunit *test, const struct drm_mm *mm, u64 size)
-{
-	struct drm_mm_node *node, *check, *found;
-	unsigned long n;
-	u64 addr;
-
-	if (!assert_no_holes(test, mm))
-		return false;
-
-	n = 0;
-	addr = 0;
-	drm_mm_for_each_node(node, mm) {
-		if (node->start != addr) {
-			KUNIT_FAIL(test, "node[%ld] list out of order, expected %llx found %llx\n",
-				   n, addr, node->start);
-			return false;
-		}
-
-		if (node->size != size) {
-			KUNIT_FAIL(test, "node[%ld].size incorrect, expected %llx, found %llx\n",
-				   n, size, node->size);
-			return false;
-		}
-
-		if (drm_mm_hole_follows(node)) {
-			KUNIT_FAIL(test, "node[%ld] is followed by a hole!\n", n);
-			return false;
-		}
-
-		found = NULL;
-		drm_mm_for_each_node_in_range(check, mm, addr, addr + size) {
-			if (node != check) {
-				KUNIT_FAIL(test,
-					   "lookup return wrong node, expected start %llx, found %llx\n",
-					   node->start, check->start);
-				return false;
-			}
-			found = check;
-		}
-		if (!found) {
-			KUNIT_FAIL(test, "lookup failed for node %llx + %llx\n", addr, size);
-			return false;
-		}
-
-		addr += size;
-		n++;
-	}
-
-	return true;
-}
-
 static u64 misalignment(struct drm_mm_node *node, u64 alignment)
 {
 	u64 rem;
@@ -270,215 +211,6 @@ static void drm_test_mm_debug(struct kunit *test)
 			       nodes[0].start, nodes[0].size);
 }
 
-static struct drm_mm_node *set_node(struct drm_mm_node *node,
-				    u64 start, u64 size)
-{
-	node->start = start;
-	node->size = size;
-	return node;
-}
-
-static bool expect_reserve_fail(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *node)
-{
-	int err;
-
-	err = drm_mm_reserve_node(mm, node);
-	if (likely(err == -ENOSPC))
-		return true;
-
-	if (!err) {
-		KUNIT_FAIL(test, "impossible reserve succeeded, node %llu + %llu\n",
-			   node->start, node->size);
-		drm_mm_remove_node(node);
-	} else {
-		KUNIT_FAIL(test,
-			   "impossible reserve failed with wrong error %d [expected %d], node %llu + %llu\n",
-		       err, -ENOSPC, node->start, node->size);
-	}
-	return false;
-}
-
-static bool noinline_for_stack check_reserve_boundaries(struct kunit *test, struct drm_mm *mm,
-							unsigned int count,
-							u64 size)
-{
-	const struct boundary {
-		u64 start, size;
-		const char *name;
-	} boundaries[] = {
-#define B(st, sz) { (st), (sz), "{ " #st ", " #sz "}" }
-		B(0, 0),
-		B(-size, 0),
-		B(size, 0),
-		B(size * count, 0),
-		B(-size, size),
-		B(-size, -size),
-		B(-size, 2 * size),
-		B(0, -size),
-		B(size, -size),
-		B(count * size, size),
-		B(count * size, -size),
-		B(count * size, count * size),
-		B(count * size, -count * size),
-		B(count * size, -(count + 1) * size),
-		B((count + 1) * size, size),
-		B((count + 1) * size, -size),
-		B((count + 1) * size, -2 * size),
-#undef B
-	};
-	struct drm_mm_node tmp = {};
-	int n;
-
-	for (n = 0; n < ARRAY_SIZE(boundaries); n++) {
-		if (!expect_reserve_fail(test, mm, set_node(&tmp, boundaries[n].start,
-							    boundaries[n].size))) {
-			KUNIT_FAIL(test, "boundary[%d:%s] failed, count=%u, size=%lld\n",
-				   n, boundaries[n].name, count, size);
-			return false;
-		}
-	}
-
-	return true;
-}
-
-static int __drm_test_mm_reserve(struct kunit *test, unsigned int count, u64 size)
-{
-	DRM_RND_STATE(prng, random_seed);
-	struct drm_mm mm;
-	struct drm_mm_node tmp, *nodes, *node, *next;
-	unsigned int *order, n, m, o = 0;
-	int ret, err;
-
-	/* For exercising drm_mm_reserve_node(), we want to check that
-	 * reservations outside of the drm_mm range are rejected, and to
-	 * overlapping and otherwise already occupied ranges. Afterwards,
-	 * the tree and nodes should be intact.
-	 */
-
-	DRM_MM_BUG_ON(!count);
-	DRM_MM_BUG_ON(!size);
-
-	ret = -ENOMEM;
-	order = drm_random_order(count, &prng);
-	if (!order)
-		goto err;
-
-	nodes = vzalloc(array_size(count, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	ret = -EINVAL;
-	drm_mm_init(&mm, 0, count * size);
-
-	if (!check_reserve_boundaries(test, &mm, count, size))
-		goto out;
-
-	for (n = 0; n < count; n++) {
-		nodes[n].start = order[n] * size;
-		nodes[n].size = size;
-
-		err = drm_mm_reserve_node(&mm, &nodes[n]);
-		if (err) {
-			KUNIT_FAIL(test, "reserve failed, step %d, start %llu\n",
-				   n, nodes[n].start);
-			ret = err;
-			goto out;
-		}
-
-		if (!drm_mm_node_allocated(&nodes[n])) {
-			KUNIT_FAIL(test, "reserved node not allocated! step %d, start %llu\n",
-				   n, nodes[n].start);
-			goto out;
-		}
-
-		if (!expect_reserve_fail(test, &mm, &nodes[n]))
-			goto out;
-	}
-
-	/* After random insertion the nodes should be in order */
-	if (!assert_continuous(test, &mm, size))
-		goto out;
-
-	/* Repeated use should then fail */
-	drm_random_reorder(order, count, &prng);
-	for (n = 0; n < count; n++) {
-		if (!expect_reserve_fail(test, &mm, set_node(&tmp, order[n] * size, 1)))
-			goto out;
-
-		/* Remove and reinsert should work */
-		drm_mm_remove_node(&nodes[order[n]]);
-		err = drm_mm_reserve_node(&mm, &nodes[order[n]]);
-		if (err) {
-			KUNIT_FAIL(test, "reserve failed, step %d, start %llu\n",
-				   n, nodes[n].start);
-			ret = err;
-			goto out;
-		}
-	}
-
-	if (!assert_continuous(test, &mm, size))
-		goto out;
-
-	/* Overlapping use should then fail */
-	for (n = 0; n < count; n++) {
-		if (!expect_reserve_fail(test, &mm, set_node(&tmp, 0, size * count)))
-			goto out;
-	}
-	for (n = 0; n < count; n++) {
-		if (!expect_reserve_fail(test, &mm, set_node(&tmp, size * n, size * (count - n))))
-			goto out;
-	}
-
-	/* Remove several, reinsert, check full */
-	for_each_prime_number(n, min(max_prime, count)) {
-		for (m = 0; m < n; m++) {
-			node = &nodes[order[(o + m) % count]];
-			drm_mm_remove_node(node);
-		}
-
-		for (m = 0; m < n; m++) {
-			node = &nodes[order[(o + m) % count]];
-			err = drm_mm_reserve_node(&mm, node);
-			if (err) {
-				KUNIT_FAIL(test, "reserve failed, step %d/%d, start %llu\n",
-					   m, n, node->start);
-				ret = err;
-				goto out;
-			}
-		}
-
-		o += n;
-
-		if (!assert_continuous(test, &mm, size))
-			goto out;
-	}
-
-	ret = 0;
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	vfree(nodes);
-	kfree(order);
-err:
-	return ret;
-}
-
-static void drm_test_mm_reserve(struct kunit *test)
-{
-	const unsigned int count = min_t(unsigned int, BIT(10), max_iterations);
-	int n;
-
-	for_each_prime_number_from(n, 1, 54) {
-		u64 size = BIT_ULL(n);
-
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_reserve(test, count, size - 1));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_reserve(test, count, size));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_reserve(test, count, size + 1));
-
-		cond_resched();
-	}
-}
-
 static bool expect_insert(struct kunit *test, struct drm_mm *mm,
 			  struct drm_mm_node *node, u64 size, u64 alignment, unsigned long color,
 			const struct insert_mode *mode)
@@ -503,1754 +235,118 @@ static bool expect_insert(struct kunit *test, struct drm_mm *mm,
 	return true;
 }
 
-static bool expect_insert_fail(struct kunit *test, struct drm_mm *mm, u64 size)
-{
-	struct drm_mm_node tmp = {};
-	int err;
-
-	err = drm_mm_insert_node(mm, &tmp, size);
-	if (likely(err == -ENOSPC))
-		return true;
-
-	if (!err) {
-		KUNIT_FAIL(test, "impossible insert succeeded, node %llu + %llu\n",
-			   tmp.start, tmp.size);
-		drm_mm_remove_node(&tmp);
-	} else {
-		KUNIT_FAIL(test,
-			   "impossible insert failed with wrong error %d [expected %d], size %llu\n",
-			   err, -ENOSPC, size);
-	}
-	return false;
-}
-
-static int __drm_test_mm_insert(struct kunit *test, unsigned int count, u64 size, bool replace)
+static void drm_test_mm_align_pot(struct kunit *test, int max)
 {
-	DRM_RND_STATE(prng, random_seed);
-	const struct insert_mode *mode;
 	struct drm_mm mm;
-	struct drm_mm_node *nodes, *node, *next;
-	unsigned int *order, n, m, o = 0;
-	int ret;
-
-	/* Fill a range with lots of nodes, check it doesn't fail too early */
-
-	DRM_MM_BUG_ON(!count);
-	DRM_MM_BUG_ON(!size);
-
-	ret = -ENOMEM;
-	nodes = vmalloc(array_size(count, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	order = drm_random_order(count, &prng);
-	if (!order)
-		goto err_nodes;
-
-	ret = -EINVAL;
-	drm_mm_init(&mm, 0, count * size);
-
-	for (mode = insert_modes; mode->name; mode++) {
-		for (n = 0; n < count; n++) {
-			struct drm_mm_node tmp;
-
-			node = replace ? &tmp : &nodes[n];
-			memset(node, 0, sizeof(*node));
-			if (!expect_insert(test, &mm, node, size, 0, n, mode)) {
-				KUNIT_FAIL(test, "%s insert failed, size %llu step %d\n",
-					   mode->name, size, n);
-				goto out;
-			}
-
-			if (replace) {
-				drm_mm_replace_node(&tmp, &nodes[n]);
-				if (drm_mm_node_allocated(&tmp)) {
-					KUNIT_FAIL(test,
-						   "replaced old-node still allocated! step %d\n",
-						   n);
-					goto out;
-				}
-
-				if (!assert_node(test, &nodes[n], &mm, size, 0, n)) {
-					KUNIT_FAIL(test,
-						   "replaced node did not inherit parameters, size %llu step %d\n",
-						   size, n);
-					goto out;
-				}
-
-				if (tmp.start != nodes[n].start) {
-					KUNIT_FAIL(test,
-						   "replaced node mismatch location expected [%llx + %llx], found [%llx + %llx]\n",
-						   tmp.start, size, nodes[n].start, nodes[n].size);
-					goto out;
-				}
-			}
-		}
-
-		/* After random insertion the nodes should be in order */
-		if (!assert_continuous(test, &mm, size))
-			goto out;
-
-		/* Repeated use should then fail */
-		if (!expect_insert_fail(test, &mm, size))
-			goto out;
+	struct drm_mm_node *node, *next;
+	int bit;
 
-		/* Remove one and reinsert, as the only hole it should refill itself */
-		for (n = 0; n < count; n++) {
-			u64 addr = nodes[n].start;
+	/* Check that we can align to the full u64 address space */
 
-			drm_mm_remove_node(&nodes[n]);
-			if (!expect_insert(test, &mm, &nodes[n], size, 0, n, mode)) {
-				KUNIT_FAIL(test, "%s reinsert failed, size %llu step %d\n",
-					   mode->name, size, n);
-				goto out;
-			}
+	drm_mm_init(&mm, 1, U64_MAX - 2);
 
-			if (nodes[n].start != addr) {
-				KUNIT_FAIL(test,
-					   "%s reinsert node moved, step %d, expected %llx, found %llx\n",
-					   mode->name, n, addr, nodes[n].start);
-				goto out;
-			}
+	for (bit = max - 1; bit; bit--) {
+		u64 align, size;
 
-			if (!assert_continuous(test, &mm, size))
-				goto out;
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node) {
+			KUNIT_FAIL(test, "failed to allocate node");
+			goto out;
 		}
 
-		/* Remove several, reinsert, check full */
-		for_each_prime_number(n, min(max_prime, count)) {
-			for (m = 0; m < n; m++) {
-				node = &nodes[order[(o + m) % count]];
-				drm_mm_remove_node(node);
-			}
-
-			for (m = 0; m < n; m++) {
-				node = &nodes[order[(o + m) % count]];
-				if (!expect_insert(test, &mm, node, size, 0, n, mode)) {
-					KUNIT_FAIL(test,
-						   "%s multiple reinsert failed, size %llu step %d\n",
-							   mode->name, size, n);
-					goto out;
-				}
-			}
-
-			o += n;
-
-			if (!assert_continuous(test, &mm, size))
-				goto out;
-
-			if (!expect_insert_fail(test, &mm, size))
-				goto out;
+		align = BIT_ULL(bit);
+		size = BIT_ULL(bit - 1) + 1;
+		if (!expect_insert(test, &mm, node, size, align, bit, &insert_modes[0])) {
+			KUNIT_FAIL(test, "insert failed with alignment=%llx [%d]", align, bit);
+			goto out;
 		}
 
-		drm_mm_for_each_node_safe(node, next, &mm)
-			drm_mm_remove_node(node);
-		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
-
 		cond_resched();
 	}
 
-	ret = 0;
 out:
-	drm_mm_for_each_node_safe(node, next, &mm)
+	drm_mm_for_each_node_safe(node, next, &mm) {
 		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_nodes:
-	vfree(nodes);
-	return ret;
-}
-
-static void drm_test_mm_insert(struct kunit *test)
-{
-	const unsigned int count = min_t(unsigned int, BIT(10), max_iterations);
-	unsigned int n;
-
-	for_each_prime_number_from(n, 1, 54) {
-		u64 size = BIT_ULL(n);
-
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size - 1, false));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size, false));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size + 1, false));
-
-		cond_resched();
+		kfree(node);
 	}
+	drm_mm_takedown(&mm);
 }
 
-static void drm_test_mm_replace(struct kunit *test)
+static void drm_test_mm_align32(struct kunit *test)
 {
-	const unsigned int count = min_t(unsigned int, BIT(10), max_iterations);
-	unsigned int n;
-
-	/* Reuse __drm_test_mm_insert to exercise replacement by inserting a dummy node,
-	 * then replacing it with the intended node. We want to check that
-	 * the tree is intact and all the information we need is carried
-	 * across to the target node.
-	 */
-
-	for_each_prime_number_from(n, 1, 54) {
-		u64 size = BIT_ULL(n);
-
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size - 1, true));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size, true));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert(test, count, size + 1, true));
-
-		cond_resched();
-	}
+	drm_test_mm_align_pot(test, 32);
 }
 
-static bool expect_insert_in_range(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *node,
-				   u64 size, u64 alignment, unsigned long color,
-				   u64 range_start, u64 range_end, const struct insert_mode *mode)
+static void drm_test_mm_align64(struct kunit *test)
 {
-	int err;
-
-	err = drm_mm_insert_node_in_range(mm, node,
-					  size, alignment, color,
-					  range_start, range_end,
-					  mode->mode);
-	if (err) {
-		KUNIT_FAIL(test,
-			   "insert (size=%llu, alignment=%llu, color=%lu, mode=%s) nto range [%llx, %llx] failed with err=%d\n",
-				   size, alignment, color, mode->name,
-				   range_start, range_end, err);
-		return false;
-	}
-
-	if (!assert_node(test, node, mm, size, alignment, color)) {
-		drm_mm_remove_node(node);
-		return false;
-	}
-
-	return true;
+	drm_test_mm_align_pot(test, 64);
 }
 
-static bool expect_insert_in_range_fail(struct kunit *test, struct drm_mm *mm,
-					u64 size, u64 range_start, u64 range_end)
+static void drm_test_mm_once(struct kunit *test, unsigned int mode)
 {
-	struct drm_mm_node tmp = {};
-	int err;
+	struct drm_mm mm;
+	struct drm_mm_node rsvd_lo, rsvd_hi, node;
 
-	err = drm_mm_insert_node_in_range(mm, &tmp, size, 0, 0, range_start, range_end,
-					  0);
-	if (likely(err == -ENOSPC))
-		return true;
+	drm_mm_init(&mm, 0, 7);
 
-	if (!err) {
-		KUNIT_FAIL(test,
-			   "impossible insert succeeded, node %llx + %llu, range [%llx, %llx]\n",
-				   tmp.start, tmp.size, range_start, range_end);
-		drm_mm_remove_node(&tmp);
-	} else {
-		KUNIT_FAIL(test,
-			   "impossible insert failed with wrong error %d [expected %d], size %llu, range [%llx, %llx]\n",
-				   err, -ENOSPC, size, range_start, range_end);
+	memset(&rsvd_lo, 0, sizeof(rsvd_lo));
+	rsvd_lo.start = 1;
+	rsvd_lo.size = 1;
+	if (drm_mm_reserve_node(&mm, &rsvd_lo)) {
+		KUNIT_FAIL(test, "Could not reserve low node\n");
+		goto err;
 	}
 
-	return false;
-}
-
-static bool assert_contiguous_in_range(struct kunit *test, struct drm_mm *mm,
-				       u64 size, u64 start, u64 end)
-{
-	struct drm_mm_node *node;
-	unsigned int n;
-
-	if (!expect_insert_in_range_fail(test, mm, size, start, end))
-		return false;
-
-	n = div64_u64(start + size - 1, size);
-	drm_mm_for_each_node(node, mm) {
-		if (node->start < start || node->start + node->size > end) {
-			KUNIT_FAIL(test,
-				   "node %d out of range, address [%llx + %llu], range [%llx, %llx]\n",
-					   n, node->start, node->start + node->size, start, end);
-			return false;
-		}
-
-		if (node->start != n * size) {
-			KUNIT_FAIL(test, "node %d out of order, expected start %llx, found %llx\n",
-				   n, n * size, node->start);
-			return false;
-		}
-
-		if (node->size != size) {
-			KUNIT_FAIL(test, "node %d has wrong size, expected size %llx, found %llx\n",
-				   n, size, node->size);
-			return false;
-		}
-
-		if (drm_mm_hole_follows(node) && drm_mm_hole_node_end(node) < end) {
-			KUNIT_FAIL(test, "node %d is followed by a hole!\n", n);
-			return false;
-		}
-
-		n++;
+	memset(&rsvd_hi, 0, sizeof(rsvd_hi));
+	rsvd_hi.start = 5;
+	rsvd_hi.size = 1;
+	if (drm_mm_reserve_node(&mm, &rsvd_hi)) {
+		KUNIT_FAIL(test, "Could not reserve low node\n");
+		goto err_lo;
 	}
 
-	if (start > 0) {
-		node = __drm_mm_interval_first(mm, 0, start - 1);
-		if (drm_mm_node_allocated(node)) {
-			KUNIT_FAIL(test, "node before start: node=%llx+%llu, start=%llx\n",
-				   node->start, node->size, start);
-			return false;
-		}
+	if (!drm_mm_hole_follows(&rsvd_lo) || !drm_mm_hole_follows(&rsvd_hi)) {
+		KUNIT_FAIL(test, "Expected a hole after lo and high nodes!\n");
+		goto err_hi;
 	}
 
-	if (end < U64_MAX) {
-		node = __drm_mm_interval_first(mm, end, U64_MAX);
-		if (drm_mm_node_allocated(node)) {
-			KUNIT_FAIL(test, "node after end: node=%llx+%llu, end=%llx\n",
-				   node->start, node->size, end);
-			return false;
-		}
+	memset(&node, 0, sizeof(node));
+	if (drm_mm_insert_node_generic(&mm, &node, 2, 0, 0, mode)) {
+		KUNIT_FAIL(test, "Could not insert the node into the available hole!\n");
+		goto err_hi;
 	}
 
-	return true;
+	drm_mm_remove_node(&node);
+err_hi:
+	drm_mm_remove_node(&rsvd_hi);
+err_lo:
+	drm_mm_remove_node(&rsvd_lo);
+err:
+	drm_mm_takedown(&mm);
 }
 
-static int __drm_test_mm_insert_range(struct kunit *test, unsigned int count, u64 size,
-				      u64 start, u64 end)
+static void drm_test_mm_lowest(struct kunit *test)
 {
-	const struct insert_mode *mode;
-	struct drm_mm mm;
-	struct drm_mm_node *nodes, *node, *next;
-	unsigned int n, start_n, end_n;
-	int ret;
-
-	DRM_MM_BUG_ON(!count);
-	DRM_MM_BUG_ON(!size);
-	DRM_MM_BUG_ON(end <= start);
-
-	/* Very similar to __drm_test_mm_insert(), but now instead of populating the
-	 * full range of the drm_mm, we try to fill a small portion of it.
-	 */
-
-	ret = -ENOMEM;
-	nodes = vzalloc(array_size(count, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	ret = -EINVAL;
-	drm_mm_init(&mm, 0, count * size);
-
-	start_n = div64_u64(start + size - 1, size);
-	end_n = div64_u64(end - size, size);
-
-	for (mode = insert_modes; mode->name; mode++) {
-		for (n = start_n; n <= end_n; n++) {
-			if (!expect_insert_in_range(test, &mm, &nodes[n], size, size, n,
-						    start, end, mode)) {
-				KUNIT_FAIL(test,
-					   "%s insert failed, size %llu, step %d [%d, %d], range [%llx, %llx]\n",
-						   mode->name, size, n, start_n, end_n, start, end);
-				goto out;
-			}
-		}
-
-		if (!assert_contiguous_in_range(test, &mm, size, start, end)) {
-			KUNIT_FAIL(test,
-				   "%s: range [%llx, %llx] not full after initialisation, size=%llu\n",
-				   mode->name, start, end, size);
-			goto out;
-		}
-
-		/* Remove one and reinsert, it should refill itself */
-		for (n = start_n; n <= end_n; n++) {
-			u64 addr = nodes[n].start;
-
-			drm_mm_remove_node(&nodes[n]);
-			if (!expect_insert_in_range(test, &mm, &nodes[n], size, size, n,
-						    start, end, mode)) {
-				KUNIT_FAIL(test, "%s reinsert failed, step %d\n", mode->name, n);
-				goto out;
-			}
-
-			if (nodes[n].start != addr) {
-				KUNIT_FAIL(test,
-					   "%s reinsert node moved, step %d, expected %llx, found %llx\n",
-					   mode->name, n, addr, nodes[n].start);
-				goto out;
-			}
-		}
-
-		if (!assert_contiguous_in_range(test, &mm, size, start, end)) {
-			KUNIT_FAIL(test,
-				   "%s: range [%llx, %llx] not full after reinsertion, size=%llu\n",
-				   mode->name, start, end, size);
-			goto out;
-		}
-
-		drm_mm_for_each_node_safe(node, next, &mm)
-			drm_mm_remove_node(node);
-		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
-
-		cond_resched();
-	}
-
-	ret = 0;
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	vfree(nodes);
-	return ret;
-}
-
-static int insert_outside_range(struct kunit *test)
-{
-	struct drm_mm mm;
-	const unsigned int start = 1024;
-	const unsigned int end = 2048;
-	const unsigned int size = end - start;
-
-	drm_mm_init(&mm, start, size);
-
-	if (!expect_insert_in_range_fail(test, &mm, 1, 0, start))
-		return -EINVAL;
-
-	if (!expect_insert_in_range_fail(test, &mm, size,
-					 start - size / 2, start + (size + 1) / 2))
-		return -EINVAL;
-
-	if (!expect_insert_in_range_fail(test, &mm, size,
-					 end - (size + 1) / 2, end + size / 2))
-		return -EINVAL;
-
-	if (!expect_insert_in_range_fail(test, &mm, 1, end, end + size))
-		return -EINVAL;
-
-	drm_mm_takedown(&mm);
-	return 0;
-}
-
-static void drm_test_mm_insert_range(struct kunit *test)
-{
-	const unsigned int count = min_t(unsigned int, BIT(13), max_iterations);
-	unsigned int n;
-
-	/* Check that requests outside the bounds of drm_mm are rejected. */
-	KUNIT_ASSERT_FALSE(test, insert_outside_range(test));
-
-	for_each_prime_number_from(n, 1, 50) {
-		const u64 size = BIT_ULL(n);
-		const u64 max = count * size;
-
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 1, max));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max - 1));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size, 0, max / 2));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size,
-								    max / 2, max / 2));
-		KUNIT_ASSERT_FALSE(test, __drm_test_mm_insert_range(test, count, size,
-								    max / 4 + 1, 3 * max / 4 - 1));
-
-		cond_resched();
-	}
-}
-
-static int prepare_frag(struct kunit *test, struct drm_mm *mm, struct drm_mm_node *nodes,
-			unsigned int num_insert, const struct insert_mode *mode)
-{
-	unsigned int size = 4096;
-	unsigned int i;
-
-	for (i = 0; i < num_insert; i++) {
-		if (!expect_insert(test, mm, &nodes[i], size, 0, i, mode) != 0) {
-			KUNIT_FAIL(test, "%s insert failed\n", mode->name);
-			return -EINVAL;
-		}
-	}
-
-	/* introduce fragmentation by freeing every other node */
-	for (i = 0; i < num_insert; i++) {
-		if (i % 2 == 0)
-			drm_mm_remove_node(&nodes[i]);
-	}
-
-	return 0;
-}
-
-static u64 get_insert_time(struct kunit *test, struct drm_mm *mm,
-			   unsigned int num_insert, struct drm_mm_node *nodes,
-			   const struct insert_mode *mode)
-{
-	unsigned int size = 8192;
-	ktime_t start;
-	unsigned int i;
-
-	start = ktime_get();
-	for (i = 0; i < num_insert; i++) {
-		if (!expect_insert(test, mm, &nodes[i], size, 0, i, mode) != 0) {
-			KUNIT_FAIL(test, "%s insert failed\n", mode->name);
-			return 0;
-		}
-	}
-
-	return ktime_to_ns(ktime_sub(ktime_get(), start));
-}
-
-static void drm_test_mm_frag(struct kunit *test)
-{
-	struct drm_mm mm;
-	const struct insert_mode *mode;
-	struct drm_mm_node *nodes, *node, *next;
-	unsigned int insert_size = 10000;
-	unsigned int scale_factor = 4;
-
-	/* We need 4 * insert_size nodes to hold intermediate allocated
-	 * drm_mm nodes.
-	 * 1 times for prepare_frag()
-	 * 1 times for get_insert_time()
-	 * 2 times for get_insert_time()
-	 */
-	nodes = vzalloc(array_size(insert_size * 4, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	/* For BOTTOMUP and TOPDOWN, we first fragment the
-	 * address space using prepare_frag() and then try to verify
-	 * that insertions scale quadratically from 10k to 20k insertions
-	 */
-	drm_mm_init(&mm, 1, U64_MAX - 2);
-	for (mode = insert_modes; mode->name; mode++) {
-		u64 insert_time1, insert_time2;
-
-		if (mode->mode != DRM_MM_INSERT_LOW &&
-		    mode->mode != DRM_MM_INSERT_HIGH)
-			continue;
-
-		if (prepare_frag(test, &mm, nodes, insert_size, mode))
-			goto err;
-
-		insert_time1 = get_insert_time(test, &mm, insert_size,
-					       nodes + insert_size, mode);
-		if (insert_time1 == 0)
-			goto err;
-
-		insert_time2 = get_insert_time(test, &mm, (insert_size * 2),
-					       nodes + insert_size * 2, mode);
-		if (insert_time2 == 0)
-			goto err;
-
-		kunit_info(test, "%s fragmented insert of %u and %u insertions took %llu and %llu nsecs\n",
-			   mode->name, insert_size, insert_size * 2, insert_time1, insert_time2);
-
-		if (insert_time2 > (scale_factor * insert_time1)) {
-			KUNIT_FAIL(test, "%s fragmented insert took %llu nsecs more\n",
-				   mode->name, insert_time2 - (scale_factor * insert_time1));
-			goto err;
-		}
-
-		drm_mm_for_each_node_safe(node, next, &mm)
-			drm_mm_remove_node(node);
-	}
-
-err:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	vfree(nodes);
-}
-
-static void drm_test_mm_align(struct kunit *test)
-{
-	const struct insert_mode *mode;
-	const unsigned int max_count = min(8192u, max_prime);
-	struct drm_mm mm;
-	struct drm_mm_node *nodes, *node, *next;
-	unsigned int prime;
-
-	/* For each of the possible insertion modes, we pick a few
-	 * arbitrary alignments and check that the inserted node
-	 * meets our requirements.
-	 */
-
-	nodes = vzalloc(array_size(max_count, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	drm_mm_init(&mm, 1, U64_MAX - 2);
-
-	for (mode = insert_modes; mode->name; mode++) {
-		unsigned int i = 0;
-
-		for_each_prime_number_from(prime, 1, max_count) {
-			u64 size = next_prime_number(prime);
-
-			if (!expect_insert(test, &mm, &nodes[i], size, prime, i, mode)) {
-				KUNIT_FAIL(test, "%s insert failed with alignment=%d",
-					   mode->name, prime);
-				goto out;
-			}
-
-			i++;
-		}
-
-		drm_mm_for_each_node_safe(node, next, &mm)
-			drm_mm_remove_node(node);
-		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	vfree(nodes);
-}
-
-static void drm_test_mm_align_pot(struct kunit *test, int max)
-{
-	struct drm_mm mm;
-	struct drm_mm_node *node, *next;
-	int bit;
-
-	/* Check that we can align to the full u64 address space */
-
-	drm_mm_init(&mm, 1, U64_MAX - 2);
-
-	for (bit = max - 1; bit; bit--) {
-		u64 align, size;
-
-		node = kzalloc(sizeof(*node), GFP_KERNEL);
-		if (!node) {
-			KUNIT_FAIL(test, "failed to allocate node");
-			goto out;
-		}
-
-		align = BIT_ULL(bit);
-		size = BIT_ULL(bit - 1) + 1;
-		if (!expect_insert(test, &mm, node, size, align, bit, &insert_modes[0])) {
-			KUNIT_FAIL(test, "insert failed with alignment=%llx [%d]", align, bit);
-			goto out;
-		}
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm) {
-		drm_mm_remove_node(node);
-		kfree(node);
-	}
-	drm_mm_takedown(&mm);
-}
-
-static void drm_test_mm_align32(struct kunit *test)
-{
-	drm_test_mm_align_pot(test, 32);
-}
-
-static void drm_test_mm_align64(struct kunit *test)
-{
-	drm_test_mm_align_pot(test, 64);
-}
-
-static void show_scan(struct kunit *test, const struct drm_mm_scan *scan)
-{
-	kunit_info(test, "scan: hit [%llx, %llx], size=%lld, align=%lld, color=%ld\n",
-		   scan->hit_start, scan->hit_end, scan->size, scan->alignment, scan->color);
-}
-
-static void show_holes(struct kunit *test, const struct drm_mm *mm, int count)
-{
-	u64 hole_start, hole_end;
-	struct drm_mm_node *hole;
-
-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		struct drm_mm_node *next = list_next_entry(hole, node_list);
-		const char *node1 = NULL, *node2 = NULL;
-
-		if (drm_mm_node_allocated(hole))
-			node1 = kasprintf(GFP_KERNEL, "[%llx + %lld, color=%ld], ",
-					  hole->start, hole->size, hole->color);
-
-		if (drm_mm_node_allocated(next))
-			node2 = kasprintf(GFP_KERNEL, ", [%llx + %lld, color=%ld]",
-					  next->start, next->size, next->color);
-
-		kunit_info(test, "%sHole [%llx - %llx, size %lld]%s\n", node1,
-			   hole_start, hole_end, hole_end - hole_start, node2);
-
-		kfree(node2);
-		kfree(node1);
-
-		if (!--count)
-			break;
-	}
-}
-
-struct evict_node {
-	struct drm_mm_node node;
-	struct list_head link;
-};
-
-static bool evict_nodes(struct kunit *test, struct drm_mm_scan *scan,
-			struct evict_node *nodes, unsigned int *order, unsigned int count,
-			bool use_color, struct list_head *evict_list)
-{
-	struct evict_node *e, *en;
-	unsigned int i;
-
-	for (i = 0; i < count; i++) {
-		e = &nodes[order ? order[i] : i];
-		list_add(&e->link, evict_list);
-		if (drm_mm_scan_add_block(scan, &e->node))
-			break;
-	}
-	list_for_each_entry_safe(e, en, evict_list, link) {
-		if (!drm_mm_scan_remove_block(scan, &e->node))
-			list_del(&e->link);
-	}
-	if (list_empty(evict_list)) {
-		KUNIT_FAIL(test,
-			   "Failed to find eviction: size=%lld [avail=%d], align=%lld (color=%lu)\n",
-			   scan->size, count, scan->alignment, scan->color);
-		return false;
-	}
-
-	list_for_each_entry(e, evict_list, link)
-		drm_mm_remove_node(&e->node);
-
-	if (use_color) {
-		struct drm_mm_node *node;
-
-		while ((node = drm_mm_scan_color_evict(scan))) {
-			e = container_of(node, typeof(*e), node);
-			drm_mm_remove_node(&e->node);
-			list_add(&e->link, evict_list);
-		}
-	} else {
-		if (drm_mm_scan_color_evict(scan)) {
-			KUNIT_FAIL(test,
-				   "drm_mm_scan_color_evict unexpectedly reported overlapping nodes!\n");
-			return false;
-		}
-	}
-
-	return true;
-}
-
-static bool evict_nothing(struct kunit *test, struct drm_mm *mm,
-			  unsigned int total_size, struct evict_node *nodes)
-{
-	struct drm_mm_scan scan;
-	LIST_HEAD(evict_list);
-	struct evict_node *e;
-	struct drm_mm_node *node;
-	unsigned int n;
-
-	drm_mm_scan_init(&scan, mm, 1, 0, 0, 0);
-	for (n = 0; n < total_size; n++) {
-		e = &nodes[n];
-		list_add(&e->link, &evict_list);
-		drm_mm_scan_add_block(&scan, &e->node);
-	}
-	list_for_each_entry(e, &evict_list, link)
-		drm_mm_scan_remove_block(&scan, &e->node);
-
-	for (n = 0; n < total_size; n++) {
-		e = &nodes[n];
-
-		if (!drm_mm_node_allocated(&e->node)) {
-			KUNIT_FAIL(test, "node[%d] no longer allocated!\n", n);
-			return false;
-		}
-
-		e->link.next = NULL;
-	}
-
-	drm_mm_for_each_node(node, mm) {
-		e = container_of(node, typeof(*e), node);
-		e->link.next = &e->link;
-	}
-
-	for (n = 0; n < total_size; n++) {
-		e = &nodes[n];
-
-		if (!e->link.next) {
-			KUNIT_FAIL(test, "node[%d] no longer connected!\n", n);
-			return false;
-		}
-	}
-
-	return assert_continuous(test, mm, nodes[0].node.size);
-}
-
-static bool evict_everything(struct kunit *test, struct drm_mm *mm,
-			     unsigned int total_size, struct evict_node *nodes)
-{
-	struct drm_mm_scan scan;
-	LIST_HEAD(evict_list);
-	struct evict_node *e;
-	unsigned int n;
-	int err;
-
-	drm_mm_scan_init(&scan, mm, total_size, 0, 0, 0);
-	for (n = 0; n < total_size; n++) {
-		e = &nodes[n];
-		list_add(&e->link, &evict_list);
-		if (drm_mm_scan_add_block(&scan, &e->node))
-			break;
-	}
-
-	err = 0;
-	list_for_each_entry(e, &evict_list, link) {
-		if (!drm_mm_scan_remove_block(&scan, &e->node)) {
-			if (!err) {
-				KUNIT_FAIL(test, "Node %lld not marked for eviction!\n",
-					   e->node.start);
-				err = -EINVAL;
-			}
-		}
-	}
-	if (err)
-		return false;
-
-	list_for_each_entry(e, &evict_list, link)
-		drm_mm_remove_node(&e->node);
-
-	if (!assert_one_hole(test, mm, 0, total_size))
-		return false;
-
-	list_for_each_entry(e, &evict_list, link) {
-		err = drm_mm_reserve_node(mm, &e->node);
-		if (err) {
-			KUNIT_FAIL(test, "Failed to reinsert node after eviction: start=%llx\n",
-				   e->node.start);
-			return false;
-		}
-	}
-
-	return assert_continuous(test, mm, nodes[0].node.size);
-}
-
-static int evict_something(struct kunit *test, struct drm_mm *mm,
-			   u64 range_start, u64 range_end, struct evict_node *nodes,
-			   unsigned int *order, unsigned int count, unsigned int size,
-			   unsigned int alignment, const struct insert_mode *mode)
-{
-	struct drm_mm_scan scan;
-	LIST_HEAD(evict_list);
-	struct evict_node *e;
-	struct drm_mm_node tmp;
-	int err;
-
-	drm_mm_scan_init_with_range(&scan, mm, size, alignment, 0, range_start,
-				    range_end, mode->mode);
-	if (!evict_nodes(test, &scan, nodes, order, count, false, &evict_list))
-		return -EINVAL;
-
-	memset(&tmp, 0, sizeof(tmp));
-	err = drm_mm_insert_node_generic(mm, &tmp, size, alignment, 0,
-					 DRM_MM_INSERT_EVICT);
-	if (err) {
-		KUNIT_FAIL(test, "Failed to insert into eviction hole: size=%d, align=%d\n",
-			   size, alignment);
-		show_scan(test, &scan);
-		show_holes(test, mm, 3);
-		return err;
-	}
-
-	if (tmp.start < range_start || tmp.start + tmp.size > range_end) {
-		KUNIT_FAIL(test,
-			   "Inserted [address=%llu + %llu] did not fit into the request range [%llu, %llu]\n",
-			   tmp.start, tmp.size, range_start, range_end);
-		err = -EINVAL;
-	}
-
-	if (!assert_node(test, &tmp, mm, size, alignment, 0) ||
-	    drm_mm_hole_follows(&tmp)) {
-		KUNIT_FAIL(test,
-			   "Inserted did not fill the eviction hole: size=%lld [%d], align=%d [rem=%lld], start=%llx, hole-follows?=%d\n",
-			   tmp.size, size, alignment, misalignment(&tmp, alignment),
-			   tmp.start, drm_mm_hole_follows(&tmp));
-		err = -EINVAL;
-	}
-
-	drm_mm_remove_node(&tmp);
-	if (err)
-		return err;
-
-	list_for_each_entry(e, &evict_list, link) {
-		err = drm_mm_reserve_node(mm, &e->node);
-		if (err) {
-			KUNIT_FAIL(test, "Failed to reinsert node after eviction: start=%llx\n",
-				   e->node.start);
-			return err;
-		}
-	}
-
-	if (!assert_continuous(test, mm, nodes[0].node.size)) {
-		KUNIT_FAIL(test, "range is no longer continuous\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void drm_test_mm_evict(struct kunit *test)
-{
-	DRM_RND_STATE(prng, random_seed);
-	const unsigned int size = 8192;
-	const struct insert_mode *mode;
-	struct drm_mm mm;
-	struct evict_node *nodes;
-	struct drm_mm_node *node, *next;
-	unsigned int *order, n;
-
-	/* Here we populate a full drm_mm and then try and insert a new node
-	 * by evicting other nodes in a random order. The drm_mm_scan should
-	 * pick the first matching hole it finds from the random list. We
-	 * repeat that for different allocation strategies, alignments and
-	 * sizes to try and stress the hole finder.
-	 */
-
-	nodes = vzalloc(array_size(size, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	order = drm_random_order(size, &prng);
-	if (!order)
-		goto err_nodes;
-
-	drm_mm_init(&mm, 0, size);
-	for (n = 0; n < size; n++) {
-		if (drm_mm_insert_node(&mm, &nodes[n].node, 1)) {
-			KUNIT_FAIL(test, "insert failed, step %d\n", n);
-			goto out;
-		}
-	}
-
-	/* First check that using the scanner doesn't break the mm */
-	if (!evict_nothing(test, &mm, size, nodes)) {
-		KUNIT_FAIL(test, "evict_nothing() failed\n");
-		goto out;
-	}
-	if (!evict_everything(test, &mm, size, nodes)) {
-		KUNIT_FAIL(test, "evict_everything() failed\n");
-		goto out;
-	}
-
-	for (mode = evict_modes; mode->name; mode++) {
-		for (n = 1; n <= size; n <<= 1) {
-			drm_random_reorder(order, size, &prng);
-			if (evict_something(test, &mm, 0, U64_MAX, nodes, order, size, n, 1,
-					    mode)) {
-				KUNIT_FAIL(test, "%s evict_something(size=%u) failed\n",
-					   mode->name, n);
-				goto out;
-			}
-		}
-
-		for (n = 1; n < size; n <<= 1) {
-			drm_random_reorder(order, size, &prng);
-			if (evict_something(test, &mm, 0, U64_MAX, nodes, order, size,
-					    size / 2, n, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_something(size=%u, alignment=%u) failed\n",
-					   mode->name, size / 2, n);
-				goto out;
-			}
-		}
-
-		for_each_prime_number_from(n, 1, min(size, max_prime)) {
-			unsigned int nsize = (size - n + 1) / 2;
-
-			DRM_MM_BUG_ON(!nsize);
-
-			drm_random_reorder(order, size, &prng);
-			if (evict_something(test, &mm, 0, U64_MAX, nodes, order, size,
-					    nsize, n, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_something(size=%u, alignment=%u) failed\n",
-					   mode->name, nsize, n);
-				goto out;
-			}
-		}
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_nodes:
-	vfree(nodes);
-}
-
-static void drm_test_mm_evict_range(struct kunit *test)
-{
-	DRM_RND_STATE(prng, random_seed);
-	const unsigned int size = 8192;
-	const unsigned int range_size = size / 2;
-	const unsigned int range_start = size / 4;
-	const unsigned int range_end = range_start + range_size;
-	const struct insert_mode *mode;
-	struct drm_mm mm;
-	struct evict_node *nodes;
-	struct drm_mm_node *node, *next;
-	unsigned int *order, n;
-
-	/* Like drm_test_mm_evict() but now we are limiting the search to a
-	 * small portion of the full drm_mm.
-	 */
-
-	nodes = vzalloc(array_size(size, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	order = drm_random_order(size, &prng);
-	if (!order)
-		goto err_nodes;
-
-	drm_mm_init(&mm, 0, size);
-	for (n = 0; n < size; n++) {
-		if (drm_mm_insert_node(&mm, &nodes[n].node, 1)) {
-			KUNIT_FAIL(test, "insert failed, step %d\n", n);
-			goto out;
-		}
-	}
-
-	for (mode = evict_modes; mode->name; mode++) {
-		for (n = 1; n <= range_size; n <<= 1) {
-			drm_random_reorder(order, size, &prng);
-			if (evict_something(test, &mm, range_start, range_end, nodes,
-					    order, size, n, 1, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_something(size=%u) failed with range [%u, %u]\n",
-					   mode->name, n, range_start, range_end);
-				goto out;
-			}
-		}
-
-		for (n = 1; n <= range_size; n <<= 1) {
-			drm_random_reorder(order, size, &prng);
-			if (evict_something(test, &mm, range_start, range_end, nodes,
-					    order, size, range_size / 2, n, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_something(size=%u, alignment=%u) failed with range [%u, %u]\n",
-					   mode->name, range_size / 2, n, range_start, range_end);
-				goto out;
-			}
-		}
-
-		for_each_prime_number_from(n, 1, min(range_size, max_prime)) {
-			unsigned int nsize = (range_size - n + 1) / 2;
-
-			DRM_MM_BUG_ON(!nsize);
-
-			drm_random_reorder(order, size, &prng);
-			if (evict_something(test, &mm, range_start, range_end, nodes,
-					    order, size, nsize, n, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_something(size=%u, alignment=%u) failed with range [%u, %u]\n",
-					   mode->name, nsize, n, range_start, range_end);
-				goto out;
-			}
-		}
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_nodes:
-	vfree(nodes);
-}
-
-static unsigned int node_index(const struct drm_mm_node *node)
-{
-	return div64_u64(node->start, node->size);
-}
-
-static void drm_test_mm_topdown(struct kunit *test)
-{
-	const struct insert_mode *topdown = &insert_modes[TOPDOWN];
-
-	DRM_RND_STATE(prng, random_seed);
-	const unsigned int count = 8192;
-	unsigned int size;
-	unsigned long *bitmap;
-	struct drm_mm mm;
-	struct drm_mm_node *nodes, *node, *next;
-	unsigned int *order, n, m, o = 0;
-
-	/* When allocating top-down, we expect to be returned a node
-	 * from a suitable hole at the top of the drm_mm. We check that
-	 * the returned node does match the highest available slot.
-	 */
-
-	nodes = vzalloc(array_size(count, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	bitmap = bitmap_zalloc(count, GFP_KERNEL);
-	if (!bitmap)
-		goto err_nodes;
-
-	order = drm_random_order(count, &prng);
-	if (!order)
-		goto err_bitmap;
-
-	for (size = 1; size <= 64; size <<= 1) {
-		drm_mm_init(&mm, 0, size * count);
-		for (n = 0; n < count; n++) {
-			if (!expect_insert(test, &mm, &nodes[n], size, 0, n, topdown)) {
-				KUNIT_FAIL(test, "insert failed, size %u step %d\n", size, n);
-				goto out;
-			}
-
-			if (drm_mm_hole_follows(&nodes[n])) {
-				KUNIT_FAIL(test,
-					   "hole after topdown insert %d, start=%llx\n, size=%u",
-					   n, nodes[n].start, size);
-				goto out;
-			}
-
-			if (!assert_one_hole(test, &mm, 0, size * (count - n - 1)))
-				goto out;
-		}
-
-		if (!assert_continuous(test, &mm, size))
-			goto out;
-
-		drm_random_reorder(order, count, &prng);
-		for_each_prime_number_from(n, 1, min(count, max_prime)) {
-			for (m = 0; m < n; m++) {
-				node = &nodes[order[(o + m) % count]];
-				drm_mm_remove_node(node);
-				__set_bit(node_index(node), bitmap);
-			}
-
-			for (m = 0; m < n; m++) {
-				unsigned int last;
-
-				node = &nodes[order[(o + m) % count]];
-				if (!expect_insert(test, &mm, node, size, 0, 0, topdown)) {
-					KUNIT_FAIL(test, "insert failed, step %d/%d\n", m, n);
-					goto out;
-				}
-
-				if (drm_mm_hole_follows(node)) {
-					KUNIT_FAIL(test,
-						   "hole after topdown insert %d/%d, start=%llx\n",
-						   m, n, node->start);
-					goto out;
-				}
-
-				last = find_last_bit(bitmap, count);
-				if (node_index(node) != last) {
-					KUNIT_FAIL(test,
-						   "node %d/%d, size %d, not inserted into upmost hole, expected %d, found %d\n",
-						   m, n, size, last, node_index(node));
-					goto out;
-				}
-
-				__clear_bit(last, bitmap);
-			}
-
-			DRM_MM_BUG_ON(find_first_bit(bitmap, count) != count);
-
-			o += n;
-		}
-
-		drm_mm_for_each_node_safe(node, next, &mm)
-			drm_mm_remove_node(node);
-		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_bitmap:
-	bitmap_free(bitmap);
-err_nodes:
-	vfree(nodes);
-}
-
-static void drm_test_mm_bottomup(struct kunit *test)
-{
-	const struct insert_mode *bottomup = &insert_modes[BOTTOMUP];
-
-	DRM_RND_STATE(prng, random_seed);
-	const unsigned int count = 8192;
-	unsigned int size;
-	unsigned long *bitmap;
-	struct drm_mm mm;
-	struct drm_mm_node *nodes, *node, *next;
-	unsigned int *order, n, m, o = 0;
-
-	/* Like drm_test_mm_topdown, but instead of searching for the last hole,
-	 * we search for the first.
-	 */
-
-	nodes = vzalloc(array_size(count, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	bitmap = bitmap_zalloc(count, GFP_KERNEL);
-	if (!bitmap)
-		goto err_nodes;
-
-	order = drm_random_order(count, &prng);
-	if (!order)
-		goto err_bitmap;
-
-	for (size = 1; size <= 64; size <<= 1) {
-		drm_mm_init(&mm, 0, size * count);
-		for (n = 0; n < count; n++) {
-			if (!expect_insert(test, &mm, &nodes[n], size, 0, n, bottomup)) {
-				KUNIT_FAIL(test,
-					   "bottomup insert failed, size %u step %d\n", size, n);
-				goto out;
-			}
-
-			if (!assert_one_hole(test, &mm, size * (n + 1), size * count))
-				goto out;
-		}
-
-		if (!assert_continuous(test, &mm, size))
-			goto out;
-
-		drm_random_reorder(order, count, &prng);
-		for_each_prime_number_from(n, 1, min(count, max_prime)) {
-			for (m = 0; m < n; m++) {
-				node = &nodes[order[(o + m) % count]];
-				drm_mm_remove_node(node);
-				__set_bit(node_index(node), bitmap);
-			}
-
-			for (m = 0; m < n; m++) {
-				unsigned int first;
-
-				node = &nodes[order[(o + m) % count]];
-				if (!expect_insert(test, &mm, node, size, 0, 0, bottomup)) {
-					KUNIT_FAIL(test, "insert failed, step %d/%d\n", m, n);
-					goto out;
-				}
-
-				first = find_first_bit(bitmap, count);
-				if (node_index(node) != first) {
-					KUNIT_FAIL(test,
-						   "node %d/%d not inserted into bottom hole, expected %d, found %d\n",
-						   m, n, first, node_index(node));
-					goto out;
-				}
-				__clear_bit(first, bitmap);
-			}
-
-			DRM_MM_BUG_ON(find_first_bit(bitmap, count) != count);
-
-			o += n;
-		}
-
-		drm_mm_for_each_node_safe(node, next, &mm)
-			drm_mm_remove_node(node);
-		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_bitmap:
-	bitmap_free(bitmap);
-err_nodes:
-	vfree(nodes);
-}
-
-static void drm_test_mm_once(struct kunit *test, unsigned int mode)
-{
-	struct drm_mm mm;
-	struct drm_mm_node rsvd_lo, rsvd_hi, node;
-
-	drm_mm_init(&mm, 0, 7);
-
-	memset(&rsvd_lo, 0, sizeof(rsvd_lo));
-	rsvd_lo.start = 1;
-	rsvd_lo.size = 1;
-	if (drm_mm_reserve_node(&mm, &rsvd_lo)) {
-		KUNIT_FAIL(test, "Could not reserve low node\n");
-		goto err;
-	}
-
-	memset(&rsvd_hi, 0, sizeof(rsvd_hi));
-	rsvd_hi.start = 5;
-	rsvd_hi.size = 1;
-	if (drm_mm_reserve_node(&mm, &rsvd_hi)) {
-		KUNIT_FAIL(test, "Could not reserve low node\n");
-		goto err_lo;
-	}
-
-	if (!drm_mm_hole_follows(&rsvd_lo) || !drm_mm_hole_follows(&rsvd_hi)) {
-		KUNIT_FAIL(test, "Expected a hole after lo and high nodes!\n");
-		goto err_hi;
-	}
-
-	memset(&node, 0, sizeof(node));
-	if (drm_mm_insert_node_generic(&mm, &node, 2, 0, 0, mode)) {
-		KUNIT_FAIL(test, "Could not insert the node into the available hole!\n");
-		goto err_hi;
-	}
-
-	drm_mm_remove_node(&node);
-err_hi:
-	drm_mm_remove_node(&rsvd_hi);
-err_lo:
-	drm_mm_remove_node(&rsvd_lo);
-err:
-	drm_mm_takedown(&mm);
-}
-
-static void drm_test_mm_lowest(struct kunit *test)
-{
-	drm_test_mm_once(test, DRM_MM_INSERT_LOW);
-}
+	drm_test_mm_once(test, DRM_MM_INSERT_LOW);
+}
 
 static void drm_test_mm_highest(struct kunit *test)
 {
 	drm_test_mm_once(test, DRM_MM_INSERT_HIGH);
 }
 
-static void separate_adjacent_colors(const struct drm_mm_node *node,
-				     unsigned long color, u64 *start, u64 *end)
-{
-	if (drm_mm_node_allocated(node) && node->color != color)
-		++*start;
-
-	node = list_next_entry(node, node_list);
-	if (drm_mm_node_allocated(node) && node->color != color)
-		--*end;
-}
-
-static bool colors_abutt(struct kunit *test, const struct drm_mm_node *node)
-{
-	if (!drm_mm_hole_follows(node) &&
-	    drm_mm_node_allocated(list_next_entry(node, node_list))) {
-		KUNIT_FAIL(test, "colors abutt; %ld [%llx + %llx] is next to %ld [%llx + %llx]!\n",
-			   node->color, node->start, node->size,
-		       list_next_entry(node, node_list)->color,
-		       list_next_entry(node, node_list)->start,
-		       list_next_entry(node, node_list)->size);
-		return true;
-	}
-
-	return false;
-}
-
-static void drm_test_mm_color(struct kunit *test)
-{
-	const unsigned int count = min(4096u, max_iterations);
-	const struct insert_mode *mode;
-	struct drm_mm mm;
-	struct drm_mm_node *node, *nn;
-	unsigned int n;
-
-	/* Color adjustment complicates everything. First we just check
-	 * that when we insert a node we apply any color_adjustment callback.
-	 * The callback we use should ensure that there is a gap between
-	 * any two nodes, and so after each insertion we check that those
-	 * holes are inserted and that they are preserved.
-	 */
-
-	drm_mm_init(&mm, 0, U64_MAX);
-
-	for (n = 1; n <= count; n++) {
-		node = kzalloc(sizeof(*node), GFP_KERNEL);
-		if (!node)
-			goto out;
-
-		if (!expect_insert(test, &mm, node, n, 0, n, &insert_modes[0])) {
-			KUNIT_FAIL(test, "insert failed, step %d\n", n);
-			kfree(node);
-			goto out;
-		}
-	}
-
-	drm_mm_for_each_node_safe(node, nn, &mm) {
-		if (node->color != node->size) {
-			KUNIT_FAIL(test, "invalid color stored: expected %lld, found %ld\n",
-				   node->size, node->color);
-
-			goto out;
-		}
-
-		drm_mm_remove_node(node);
-		kfree(node);
-	}
-
-	/* Now, let's start experimenting with applying a color callback */
-	mm.color_adjust = separate_adjacent_colors;
-	for (mode = insert_modes; mode->name; mode++) {
-		u64 last;
-
-		node = kzalloc(sizeof(*node), GFP_KERNEL);
-		if (!node)
-			goto out;
-
-		node->size = 1 + 2 * count;
-		node->color = node->size;
-
-		if (drm_mm_reserve_node(&mm, node)) {
-			KUNIT_FAIL(test, "initial reserve failed!\n");
-			goto out;
-		}
-
-		last = node->start + node->size;
-
-		for (n = 1; n <= count; n++) {
-			int rem;
-
-			node = kzalloc(sizeof(*node), GFP_KERNEL);
-			if (!node)
-				goto out;
-
-			node->start = last;
-			node->size = n + count;
-			node->color = node->size;
-
-			if (drm_mm_reserve_node(&mm, node) != -ENOSPC) {
-				KUNIT_FAIL(test, "reserve %d did not report color overlap!", n);
-				goto out;
-			}
-
-			node->start += n + 1;
-			rem = misalignment(node, n + count);
-			node->start += n + count - rem;
-
-			if (drm_mm_reserve_node(&mm, node)) {
-				KUNIT_FAIL(test, "reserve %d failed", n);
-				goto out;
-			}
-
-			last = node->start + node->size;
-		}
-
-		for (n = 1; n <= count; n++) {
-			node = kzalloc(sizeof(*node), GFP_KERNEL);
-			if (!node)
-				goto out;
-
-			if (!expect_insert(test, &mm, node, n, n, n, mode)) {
-				KUNIT_FAIL(test, "%s insert failed, step %d\n", mode->name, n);
-				kfree(node);
-				goto out;
-			}
-		}
-
-		drm_mm_for_each_node_safe(node, nn, &mm) {
-			u64 rem;
-
-			if (node->color != node->size) {
-				KUNIT_FAIL(test,
-					   "%s invalid color stored: expected %lld, found %ld\n",
-					   mode->name, node->size, node->color);
-
-				goto out;
-			}
-
-			if (colors_abutt(test, node))
-				goto out;
-
-			div64_u64_rem(node->start, node->size, &rem);
-			if (rem) {
-				KUNIT_FAIL(test,
-					   "%s colored node misaligned, start=%llx expected alignment=%lld [rem=%lld]\n",
-					   mode->name, node->start, node->size, rem);
-				goto out;
-			}
-
-			drm_mm_remove_node(node);
-			kfree(node);
-		}
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, nn, &mm) {
-		drm_mm_remove_node(node);
-		kfree(node);
-	}
-	drm_mm_takedown(&mm);
-}
-
-static int evict_color(struct kunit *test, struct drm_mm *mm, u64 range_start,
-		       u64 range_end, struct evict_node *nodes, unsigned int *order,
-		unsigned int count, unsigned int size, unsigned int alignment,
-		unsigned long color, const struct insert_mode *mode)
-{
-	struct drm_mm_scan scan;
-	LIST_HEAD(evict_list);
-	struct evict_node *e;
-	struct drm_mm_node tmp;
-	int err;
-
-	drm_mm_scan_init_with_range(&scan, mm, size, alignment, color, range_start,
-				    range_end, mode->mode);
-	if (!evict_nodes(test, &scan, nodes, order, count, true, &evict_list))
-		return -EINVAL;
-
-	memset(&tmp, 0, sizeof(tmp));
-	err = drm_mm_insert_node_generic(mm, &tmp, size, alignment, color,
-					 DRM_MM_INSERT_EVICT);
-	if (err) {
-		KUNIT_FAIL(test,
-			   "Failed to insert into eviction hole: size=%d, align=%d, color=%lu, err=%d\n",
-			   size, alignment, color, err);
-		show_scan(test, &scan);
-		show_holes(test, mm, 3);
-		return err;
-	}
-
-	if (tmp.start < range_start || tmp.start + tmp.size > range_end) {
-		KUNIT_FAIL(test,
-			   "Inserted [address=%llu + %llu] did not fit into the request range [%llu, %llu]\n",
-			   tmp.start, tmp.size, range_start, range_end);
-		err = -EINVAL;
-	}
-
-	if (colors_abutt(test, &tmp))
-		err = -EINVAL;
-
-	if (!assert_node(test, &tmp, mm, size, alignment, color)) {
-		KUNIT_FAIL(test,
-			   "Inserted did not fit the eviction hole: size=%lld [%d], align=%d [rem=%lld], start=%llx\n",
-			   tmp.size, size, alignment, misalignment(&tmp, alignment), tmp.start);
-		err = -EINVAL;
-	}
-
-	drm_mm_remove_node(&tmp);
-	if (err)
-		return err;
-
-	list_for_each_entry(e, &evict_list, link) {
-		err = drm_mm_reserve_node(mm, &e->node);
-		if (err) {
-			KUNIT_FAIL(test, "Failed to reinsert node after eviction: start=%llx\n",
-				   e->node.start);
-			return err;
-		}
-	}
-
-	cond_resched();
-	return 0;
-}
-
-static void drm_test_mm_color_evict(struct kunit *test)
-{
-	DRM_RND_STATE(prng, random_seed);
-	const unsigned int total_size = min(8192u, max_iterations);
-	const struct insert_mode *mode;
-	unsigned long color = 0;
-	struct drm_mm mm;
-	struct evict_node *nodes;
-	struct drm_mm_node *node, *next;
-	unsigned int *order, n;
-
-	/* Check that the drm_mm_scan also honours color adjustment when
-	 * choosing its victims to create a hole. Our color_adjust does not
-	 * allow two nodes to be placed together without an intervening hole
-	 * enlarging the set of victims that must be evicted.
-	 */
-
-	nodes = vzalloc(array_size(total_size, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	order = drm_random_order(total_size, &prng);
-	if (!order)
-		goto err_nodes;
-
-	drm_mm_init(&mm, 0, 2 * total_size - 1);
-	mm.color_adjust = separate_adjacent_colors;
-	for (n = 0; n < total_size; n++) {
-		if (!expect_insert(test, &mm, &nodes[n].node,
-				   1, 0, color++,
-				   &insert_modes[0])) {
-			KUNIT_FAIL(test, "insert failed, step %d\n", n);
-			goto out;
-		}
-	}
-
-	for (mode = evict_modes; mode->name; mode++) {
-		for (n = 1; n <= total_size; n <<= 1) {
-			drm_random_reorder(order, total_size, &prng);
-			if (evict_color(test, &mm, 0, U64_MAX, nodes, order, total_size,
-					n, 1, color++, mode)) {
-				KUNIT_FAIL(test, "%s evict_color(size=%u) failed\n", mode->name, n);
-				goto out;
-			}
-		}
-
-		for (n = 1; n < total_size; n <<= 1) {
-			drm_random_reorder(order, total_size, &prng);
-			if (evict_color(test, &mm, 0, U64_MAX, nodes, order, total_size,
-					total_size / 2, n, color++, mode)) {
-				KUNIT_FAIL(test, "%s evict_color(size=%u, alignment=%u) failed\n",
-					   mode->name, total_size / 2, n);
-				goto out;
-			}
-		}
-
-		for_each_prime_number_from(n, 1, min(total_size, max_prime)) {
-			unsigned int nsize = (total_size - n + 1) / 2;
-
-			DRM_MM_BUG_ON(!nsize);
-
-			drm_random_reorder(order, total_size, &prng);
-			if (evict_color(test, &mm, 0, U64_MAX, nodes, order, total_size,
-					nsize, n, color++, mode)) {
-				KUNIT_FAIL(test, "%s evict_color(size=%u, alignment=%u) failed\n",
-					   mode->name, nsize, n);
-				goto out;
-			}
-		}
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_nodes:
-	vfree(nodes);
-}
-
-static void drm_test_mm_color_evict_range(struct kunit *test)
-{
-	DRM_RND_STATE(prng, random_seed);
-	const unsigned int total_size = 8192;
-	const unsigned int range_size = total_size / 2;
-	const unsigned int range_start = total_size / 4;
-	const unsigned int range_end = range_start + range_size;
-	const struct insert_mode *mode;
-	unsigned long color = 0;
-	struct drm_mm mm;
-	struct evict_node *nodes;
-	struct drm_mm_node *node, *next;
-	unsigned int *order, n;
-
-	/* Like drm_test_mm_color_evict(), but limited to small portion of the full
-	 * drm_mm range.
-	 */
-
-	nodes = vzalloc(array_size(total_size, sizeof(*nodes)));
-	KUNIT_ASSERT_TRUE(test, nodes);
-
-	order = drm_random_order(total_size, &prng);
-	if (!order)
-		goto err_nodes;
-
-	drm_mm_init(&mm, 0, 2 * total_size - 1);
-	mm.color_adjust = separate_adjacent_colors;
-	for (n = 0; n < total_size; n++) {
-		if (!expect_insert(test, &mm, &nodes[n].node,
-				   1, 0, color++,
-				   &insert_modes[0])) {
-			KUNIT_FAIL(test, "insert failed, step %d\n", n);
-			goto out;
-		}
-	}
-
-	for (mode = evict_modes; mode->name; mode++) {
-		for (n = 1; n <= range_size; n <<= 1) {
-			drm_random_reorder(order, range_size, &prng);
-			if (evict_color(test, &mm, range_start, range_end, nodes, order,
-					total_size, n, 1, color++, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_color(size=%u) failed for range [%x, %x]\n",
-						mode->name, n, range_start, range_end);
-				goto out;
-			}
-		}
-
-		for (n = 1; n < range_size; n <<= 1) {
-			drm_random_reorder(order, total_size, &prng);
-			if (evict_color(test, &mm, range_start, range_end, nodes, order,
-					total_size, range_size / 2, n, color++, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_color(size=%u, alignment=%u) failed for range [%x, %x]\n",
-					   mode->name, total_size / 2, n, range_start, range_end);
-				goto out;
-			}
-		}
-
-		for_each_prime_number_from(n, 1, min(range_size, max_prime)) {
-			unsigned int nsize = (range_size - n + 1) / 2;
-
-			DRM_MM_BUG_ON(!nsize);
-
-			drm_random_reorder(order, total_size, &prng);
-			if (evict_color(test, &mm, range_start, range_end, nodes, order,
-					total_size, nsize, n, color++, mode)) {
-				KUNIT_FAIL(test,
-					   "%s evict_color(size=%u, alignment=%u) failed for range [%x, %x]\n",
-					   mode->name, nsize, n, range_start, range_end);
-				goto out;
-			}
-		}
-
-		cond_resched();
-	}
-
-out:
-	drm_mm_for_each_node_safe(node, next, &mm)
-		drm_mm_remove_node(node);
-	drm_mm_takedown(&mm);
-	kfree(order);
-err_nodes:
-	vfree(nodes);
-}
-
-static int drm_mm_suite_init(struct kunit_suite *suite)
-{
-	while (!random_seed)
-		random_seed = get_random_u32();
-
-	kunit_info(suite,
-		   "Testing DRM range manager, with random_seed=0x%x max_iterations=%u max_prime=%u\n",
-		   random_seed, max_iterations, max_prime);
-
-	return 0;
-}
-
-module_param(random_seed, uint, 0400);
-module_param(max_iterations, uint, 0400);
-module_param(max_prime, uint, 0400);
-
 static struct kunit_case drm_mm_tests[] = {
 	KUNIT_CASE(drm_test_mm_init),
 	KUNIT_CASE(drm_test_mm_debug),
-	KUNIT_CASE(drm_test_mm_reserve),
-	KUNIT_CASE(drm_test_mm_insert),
-	KUNIT_CASE(drm_test_mm_replace),
-	KUNIT_CASE(drm_test_mm_insert_range),
-	KUNIT_CASE(drm_test_mm_frag),
-	KUNIT_CASE(drm_test_mm_align),
 	KUNIT_CASE(drm_test_mm_align32),
 	KUNIT_CASE(drm_test_mm_align64),
-	KUNIT_CASE(drm_test_mm_evict),
-	KUNIT_CASE(drm_test_mm_evict_range),
-	KUNIT_CASE(drm_test_mm_topdown),
-	KUNIT_CASE(drm_test_mm_bottomup),
 	KUNIT_CASE(drm_test_mm_lowest),
 	KUNIT_CASE(drm_test_mm_highest),
-	KUNIT_CASE(drm_test_mm_color),
-	KUNIT_CASE(drm_test_mm_color_evict),
-	KUNIT_CASE(drm_test_mm_color_evict_range),
 	{}
 };
 
 static struct kunit_suite drm_mm_test_suite = {
 	.name = "drm_mm",
-	.suite_init = drm_mm_suite_init,
 	.test_cases = drm_mm_tests,
 };
 
-- 
GitLab


From a0a0bd3effea22d9886ee6ff98c591acd9565787 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Wed, 25 Oct 2023 15:24:28 +0200
Subject: [PATCH 0194/2340] drm/todo: Add entry to clean up former seltests
 suites

Most of those suites are undocumented and aren't really clear about what
they are testing. Let's add a TODO entry as a future task to get started
into KUnit and DRM.

Acked-by: Daniel Vetter <daniel@ffwll.ch>
Link: https://lore.kernel.org/r/20231025132428.723672-2-mripard@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 Documentation/gpu/todo.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 03fe5d1247be2..b62c7fa0c2bcc 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -621,6 +621,23 @@ Contact: Javier Martinez Canillas <javierm@redhat.com>
 
 Level: Intermediate
 
+Clean up and document former selftests suites
+---------------------------------------------
+
+Some KUnit test suites (drm_buddy, drm_cmdline_parser, drm_damage_helper,
+drm_format, drm_framebuffer, drm_dp_mst_helper, drm_mm, drm_plane_helper and
+drm_rect) are former selftests suites that have been converted over when KUnit
+was first introduced.
+
+These suites were fairly undocumented, and with different goals than what unit
+tests can be. Trying to identify what each test in these suites actually test
+for, whether that makes sense for a unit test, and either remove it if it
+doesn't or document it if it does would be of great help.
+
+Contact: Maxime Ripard <mripard@kernel.org>
+
+Level: Intermediate
+
 Enable trinity for DRM
 ----------------------
 
-- 
GitLab


From c400eb4d6f5f603f4f3f6cc4b6fdacd416ff142e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Mon, 6 Nov 2023 10:41:20 -0300
Subject: [PATCH 0195/2340] MAINTAINERS: Add Maira to V3D maintainers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've been contributing to V3D with improvements, reviews, testing and
debugging. Therefore, add myself as a co-maintainer of the V3D driver.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Acked-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231106134201.725805-1-mcanal@igalia.com
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4e1a63479cad4..ec79843f842aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7105,6 +7105,7 @@ F:	drivers/gpu/drm/omapdrm/
 
 DRM DRIVERS FOR V3D
 M:	Melissa Wen <mwen@igalia.com>
+M:	Maíra Canal <mcanal@igalia.com>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
-- 
GitLab


From 8eb80946ab0c18a853be5f90d6b6ccbe3fd42989 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 12:16:38 +0200
Subject: [PATCH 0196/2340] drm/edid: split out drm_eld.h from drm_edid.h

The drm_edid.[ch] files are starting to be a bit crowded, and with plans
to add more ELD related functionality, it's perhaps cleanest to split
the ELD code out to a header of its own.

Include drm_eld.h from drm_edid.h for starters, and leave it to
follow-up work to only include drm_eld.h where needed.

Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/0c6d631fa1058036d72dd25d1cabc90a7c52490e.1698747331.git.jani.nikula@intel.com
---
 Documentation/gpu/drm-kms-helpers.rst |   3 +
 include/drm/drm_edid.h                | 149 +-----------------------
 include/drm/drm_eld.h                 | 159 ++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 148 deletions(-)
 create mode 100644 include/drm/drm_eld.h

diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index b748b8ae70b2a..cfa8e6c399b64 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -363,6 +363,9 @@ EDID Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_edid.c
    :export:
 
+.. kernel-doc:: include/drm/drm_eld.h
+   :internal:
+
 SCDC Helper Functions Reference
 ===============================
 
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 882d2638708e5..1ff52f57ab9cc 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -25,6 +25,7 @@
 
 #include <linux/types.h>
 #include <linux/hdmi.h>
+#include <drm/drm_eld.h> /* FIXME: remove this, include directly where needed */
 #include <drm/drm_mode.h>
 
 struct drm_device;
@@ -269,64 +270,6 @@ struct detailed_timing {
 #define DRM_EDID_DSC_MAX_SLICES			0xf
 #define DRM_EDID_DSC_TOTAL_CHUNK_KBYTES		0x3f
 
-/* ELD Header Block */
-#define DRM_ELD_HEADER_BLOCK_SIZE	4
-
-#define DRM_ELD_VER			0
-# define DRM_ELD_VER_SHIFT		3
-# define DRM_ELD_VER_MASK		(0x1f << 3)
-# define DRM_ELD_VER_CEA861D		(2 << 3) /* supports 861D or below */
-# define DRM_ELD_VER_CANNED		(0x1f << 3)
-
-#define DRM_ELD_BASELINE_ELD_LEN	2	/* in dwords! */
-
-/* ELD Baseline Block for ELD_Ver == 2 */
-#define DRM_ELD_CEA_EDID_VER_MNL	4
-# define DRM_ELD_CEA_EDID_VER_SHIFT	5
-# define DRM_ELD_CEA_EDID_VER_MASK	(7 << 5)
-# define DRM_ELD_CEA_EDID_VER_NONE	(0 << 5)
-# define DRM_ELD_CEA_EDID_VER_CEA861	(1 << 5)
-# define DRM_ELD_CEA_EDID_VER_CEA861A	(2 << 5)
-# define DRM_ELD_CEA_EDID_VER_CEA861BCD	(3 << 5)
-# define DRM_ELD_MNL_SHIFT		0
-# define DRM_ELD_MNL_MASK		(0x1f << 0)
-
-#define DRM_ELD_SAD_COUNT_CONN_TYPE	5
-# define DRM_ELD_SAD_COUNT_SHIFT	4
-# define DRM_ELD_SAD_COUNT_MASK		(0xf << 4)
-# define DRM_ELD_CONN_TYPE_SHIFT	2
-# define DRM_ELD_CONN_TYPE_MASK		(3 << 2)
-# define DRM_ELD_CONN_TYPE_HDMI		(0 << 2)
-# define DRM_ELD_CONN_TYPE_DP		(1 << 2)
-# define DRM_ELD_SUPPORTS_AI		(1 << 1)
-# define DRM_ELD_SUPPORTS_HDCP		(1 << 0)
-
-#define DRM_ELD_AUD_SYNCH_DELAY		6	/* in units of 2 ms */
-# define DRM_ELD_AUD_SYNCH_DELAY_MAX	0xfa	/* 500 ms */
-
-#define DRM_ELD_SPEAKER			7
-# define DRM_ELD_SPEAKER_MASK		0x7f
-# define DRM_ELD_SPEAKER_RLRC		(1 << 6)
-# define DRM_ELD_SPEAKER_FLRC		(1 << 5)
-# define DRM_ELD_SPEAKER_RC		(1 << 4)
-# define DRM_ELD_SPEAKER_RLR		(1 << 3)
-# define DRM_ELD_SPEAKER_FC		(1 << 2)
-# define DRM_ELD_SPEAKER_LFE		(1 << 1)
-# define DRM_ELD_SPEAKER_FLR		(1 << 0)
-
-#define DRM_ELD_PORT_ID			8	/* offsets 8..15 inclusive */
-# define DRM_ELD_PORT_ID_LEN		8
-
-#define DRM_ELD_MANUFACTURER_NAME0	16
-#define DRM_ELD_MANUFACTURER_NAME1	17
-
-#define DRM_ELD_PRODUCT_CODE0		18
-#define DRM_ELD_PRODUCT_CODE1		19
-
-#define DRM_ELD_MONITOR_NAME_STRING	20	/* offsets 20..(20+mnl-1) inclusive */
-
-#define DRM_ELD_CEA_SAD(mnl, sad)	(20 + (mnl) + 3 * (sad))
-
 struct edid {
 	u8 header[8];
 	/* Vendor & product info */
@@ -409,96 +352,6 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame,
 				   const struct drm_display_mode *mode,
 				   enum hdmi_quantization_range rgb_quant_range);
 
-/**
- * drm_eld_mnl - Get ELD monitor name length in bytes.
- * @eld: pointer to an eld memory structure with mnl set
- */
-static inline int drm_eld_mnl(const uint8_t *eld)
-{
-	return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT;
-}
-
-/**
- * drm_eld_sad - Get ELD SAD structures.
- * @eld: pointer to an eld memory structure with sad_count set
- */
-static inline const uint8_t *drm_eld_sad(const uint8_t *eld)
-{
-	unsigned int ver, mnl;
-
-	ver = (eld[DRM_ELD_VER] & DRM_ELD_VER_MASK) >> DRM_ELD_VER_SHIFT;
-	if (ver != 2 && ver != 31)
-		return NULL;
-
-	mnl = drm_eld_mnl(eld);
-	if (mnl > 16)
-		return NULL;
-
-	return eld + DRM_ELD_CEA_SAD(mnl, 0);
-}
-
-/**
- * drm_eld_sad_count - Get ELD SAD count.
- * @eld: pointer to an eld memory structure with sad_count set
- */
-static inline int drm_eld_sad_count(const uint8_t *eld)
-{
-	return (eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_SAD_COUNT_MASK) >>
-		DRM_ELD_SAD_COUNT_SHIFT;
-}
-
-/**
- * drm_eld_calc_baseline_block_size - Calculate baseline block size in bytes
- * @eld: pointer to an eld memory structure with mnl and sad_count set
- *
- * This is a helper for determining the payload size of the baseline block, in
- * bytes, for e.g. setting the Baseline_ELD_Len field in the ELD header block.
- */
-static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld)
-{
-	return DRM_ELD_MONITOR_NAME_STRING - DRM_ELD_HEADER_BLOCK_SIZE +
-		drm_eld_mnl(eld) + drm_eld_sad_count(eld) * 3;
-}
-
-/**
- * drm_eld_size - Get ELD size in bytes
- * @eld: pointer to a complete eld memory structure
- *
- * The returned value does not include the vendor block. It's vendor specific,
- * and comprises of the remaining bytes in the ELD memory buffer after
- * drm_eld_size() bytes of header and baseline block.
- *
- * The returned value is guaranteed to be a multiple of 4.
- */
-static inline int drm_eld_size(const uint8_t *eld)
-{
-	return DRM_ELD_HEADER_BLOCK_SIZE + eld[DRM_ELD_BASELINE_ELD_LEN] * 4;
-}
-
-/**
- * drm_eld_get_spk_alloc - Get speaker allocation
- * @eld: pointer to an ELD memory structure
- *
- * The returned value is the speakers mask. User has to use %DRM_ELD_SPEAKER
- * field definitions to identify speakers.
- */
-static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld)
-{
-	return eld[DRM_ELD_SPEAKER] & DRM_ELD_SPEAKER_MASK;
-}
-
-/**
- * drm_eld_get_conn_type - Get device type hdmi/dp connected
- * @eld: pointer to an ELD memory structure
- *
- * The caller need to use %DRM_ELD_CONN_TYPE_HDMI or %DRM_ELD_CONN_TYPE_DP to
- * identify the display type connected.
- */
-static inline u8 drm_eld_get_conn_type(const uint8_t *eld)
-{
-	return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK;
-}
-
 /**
  * drm_edid_decode_mfg_id - Decode the manufacturer ID
  * @mfg_id: The manufacturer ID
diff --git a/include/drm/drm_eld.h b/include/drm/drm_eld.h
new file mode 100644
index 0000000000000..9bde89bd96eaf
--- /dev/null
+++ b/include/drm/drm_eld.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __DRM_ELD_H__
+#define __DRM_ELD_H__
+
+#include <linux/types.h>
+
+/* ELD Header Block */
+#define DRM_ELD_HEADER_BLOCK_SIZE	4
+
+#define DRM_ELD_VER			0
+# define DRM_ELD_VER_SHIFT		3
+# define DRM_ELD_VER_MASK		(0x1f << 3)
+# define DRM_ELD_VER_CEA861D		(2 << 3) /* supports 861D or below */
+# define DRM_ELD_VER_CANNED		(0x1f << 3)
+
+#define DRM_ELD_BASELINE_ELD_LEN	2	/* in dwords! */
+
+/* ELD Baseline Block for ELD_Ver == 2 */
+#define DRM_ELD_CEA_EDID_VER_MNL	4
+# define DRM_ELD_CEA_EDID_VER_SHIFT	5
+# define DRM_ELD_CEA_EDID_VER_MASK	(7 << 5)
+# define DRM_ELD_CEA_EDID_VER_NONE	(0 << 5)
+# define DRM_ELD_CEA_EDID_VER_CEA861	(1 << 5)
+# define DRM_ELD_CEA_EDID_VER_CEA861A	(2 << 5)
+# define DRM_ELD_CEA_EDID_VER_CEA861BCD	(3 << 5)
+# define DRM_ELD_MNL_SHIFT		0
+# define DRM_ELD_MNL_MASK		(0x1f << 0)
+
+#define DRM_ELD_SAD_COUNT_CONN_TYPE	5
+# define DRM_ELD_SAD_COUNT_SHIFT	4
+# define DRM_ELD_SAD_COUNT_MASK		(0xf << 4)
+# define DRM_ELD_CONN_TYPE_SHIFT	2
+# define DRM_ELD_CONN_TYPE_MASK		(3 << 2)
+# define DRM_ELD_CONN_TYPE_HDMI		(0 << 2)
+# define DRM_ELD_CONN_TYPE_DP		(1 << 2)
+# define DRM_ELD_SUPPORTS_AI		(1 << 1)
+# define DRM_ELD_SUPPORTS_HDCP		(1 << 0)
+
+#define DRM_ELD_AUD_SYNCH_DELAY		6	/* in units of 2 ms */
+# define DRM_ELD_AUD_SYNCH_DELAY_MAX	0xfa	/* 500 ms */
+
+#define DRM_ELD_SPEAKER			7
+# define DRM_ELD_SPEAKER_MASK		0x7f
+# define DRM_ELD_SPEAKER_RLRC		(1 << 6)
+# define DRM_ELD_SPEAKER_FLRC		(1 << 5)
+# define DRM_ELD_SPEAKER_RC		(1 << 4)
+# define DRM_ELD_SPEAKER_RLR		(1 << 3)
+# define DRM_ELD_SPEAKER_FC		(1 << 2)
+# define DRM_ELD_SPEAKER_LFE		(1 << 1)
+# define DRM_ELD_SPEAKER_FLR		(1 << 0)
+
+#define DRM_ELD_PORT_ID			8	/* offsets 8..15 inclusive */
+# define DRM_ELD_PORT_ID_LEN		8
+
+#define DRM_ELD_MANUFACTURER_NAME0	16
+#define DRM_ELD_MANUFACTURER_NAME1	17
+
+#define DRM_ELD_PRODUCT_CODE0		18
+#define DRM_ELD_PRODUCT_CODE1		19
+
+#define DRM_ELD_MONITOR_NAME_STRING	20	/* offsets 20..(20+mnl-1) inclusive */
+
+#define DRM_ELD_CEA_SAD(mnl, sad)	(20 + (mnl) + 3 * (sad))
+
+/**
+ * drm_eld_mnl - Get ELD monitor name length in bytes.
+ * @eld: pointer to an eld memory structure with mnl set
+ */
+static inline int drm_eld_mnl(const uint8_t *eld)
+{
+	return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT;
+}
+
+/**
+ * drm_eld_sad - Get ELD SAD structures.
+ * @eld: pointer to an eld memory structure with sad_count set
+ */
+static inline const uint8_t *drm_eld_sad(const uint8_t *eld)
+{
+	unsigned int ver, mnl;
+
+	ver = (eld[DRM_ELD_VER] & DRM_ELD_VER_MASK) >> DRM_ELD_VER_SHIFT;
+	if (ver != 2 && ver != 31)
+		return NULL;
+
+	mnl = drm_eld_mnl(eld);
+	if (mnl > 16)
+		return NULL;
+
+	return eld + DRM_ELD_CEA_SAD(mnl, 0);
+}
+
+/**
+ * drm_eld_sad_count - Get ELD SAD count.
+ * @eld: pointer to an eld memory structure with sad_count set
+ */
+static inline int drm_eld_sad_count(const uint8_t *eld)
+{
+	return (eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_SAD_COUNT_MASK) >>
+		DRM_ELD_SAD_COUNT_SHIFT;
+}
+
+/**
+ * drm_eld_calc_baseline_block_size - Calculate baseline block size in bytes
+ * @eld: pointer to an eld memory structure with mnl and sad_count set
+ *
+ * This is a helper for determining the payload size of the baseline block, in
+ * bytes, for e.g. setting the Baseline_ELD_Len field in the ELD header block.
+ */
+static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld)
+{
+	return DRM_ELD_MONITOR_NAME_STRING - DRM_ELD_HEADER_BLOCK_SIZE +
+		drm_eld_mnl(eld) + drm_eld_sad_count(eld) * 3;
+}
+
+/**
+ * drm_eld_size - Get ELD size in bytes
+ * @eld: pointer to a complete eld memory structure
+ *
+ * The returned value does not include the vendor block. It's vendor specific,
+ * and comprises of the remaining bytes in the ELD memory buffer after
+ * drm_eld_size() bytes of header and baseline block.
+ *
+ * The returned value is guaranteed to be a multiple of 4.
+ */
+static inline int drm_eld_size(const uint8_t *eld)
+{
+	return DRM_ELD_HEADER_BLOCK_SIZE + eld[DRM_ELD_BASELINE_ELD_LEN] * 4;
+}
+
+/**
+ * drm_eld_get_spk_alloc - Get speaker allocation
+ * @eld: pointer to an ELD memory structure
+ *
+ * The returned value is the speakers mask. User has to use %DRM_ELD_SPEAKER
+ * field definitions to identify speakers.
+ */
+static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld)
+{
+	return eld[DRM_ELD_SPEAKER] & DRM_ELD_SPEAKER_MASK;
+}
+
+/**
+ * drm_eld_get_conn_type - Get device type hdmi/dp connected
+ * @eld: pointer to an ELD memory structure
+ *
+ * The caller need to use %DRM_ELD_CONN_TYPE_HDMI or %DRM_ELD_CONN_TYPE_DP to
+ * identify the display type connected.
+ */
+static inline u8 drm_eld_get_conn_type(const uint8_t *eld)
+{
+	return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK;
+}
+
+#endif /* __DRM_ELD_H__ */
-- 
GitLab


From 533914536bf5cb5984755244f5aa13cf93cc84d3 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 12:16:39 +0200
Subject: [PATCH 0197/2340] drm/eld: replace uint8_t with u8

Unify on kernel types.

Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/6e048fc4c8a3ebec638ce27b0b8b969a3d2fa8bc.1698747331.git.jani.nikula@intel.com
---
 include/drm/drm_eld.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/drm/drm_eld.h b/include/drm/drm_eld.h
index 9bde89bd96eaf..7b674256b9aa0 100644
--- a/include/drm/drm_eld.h
+++ b/include/drm/drm_eld.h
@@ -70,7 +70,7 @@
  * drm_eld_mnl - Get ELD monitor name length in bytes.
  * @eld: pointer to an eld memory structure with mnl set
  */
-static inline int drm_eld_mnl(const uint8_t *eld)
+static inline int drm_eld_mnl(const u8 *eld)
 {
 	return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT;
 }
@@ -79,7 +79,7 @@ static inline int drm_eld_mnl(const uint8_t *eld)
  * drm_eld_sad - Get ELD SAD structures.
  * @eld: pointer to an eld memory structure with sad_count set
  */
-static inline const uint8_t *drm_eld_sad(const uint8_t *eld)
+static inline const u8 *drm_eld_sad(const u8 *eld)
 {
 	unsigned int ver, mnl;
 
@@ -98,7 +98,7 @@ static inline const uint8_t *drm_eld_sad(const uint8_t *eld)
  * drm_eld_sad_count - Get ELD SAD count.
  * @eld: pointer to an eld memory structure with sad_count set
  */
-static inline int drm_eld_sad_count(const uint8_t *eld)
+static inline int drm_eld_sad_count(const u8 *eld)
 {
 	return (eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_SAD_COUNT_MASK) >>
 		DRM_ELD_SAD_COUNT_SHIFT;
@@ -111,7 +111,7 @@ static inline int drm_eld_sad_count(const uint8_t *eld)
  * This is a helper for determining the payload size of the baseline block, in
  * bytes, for e.g. setting the Baseline_ELD_Len field in the ELD header block.
  */
-static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld)
+static inline int drm_eld_calc_baseline_block_size(const u8 *eld)
 {
 	return DRM_ELD_MONITOR_NAME_STRING - DRM_ELD_HEADER_BLOCK_SIZE +
 		drm_eld_mnl(eld) + drm_eld_sad_count(eld) * 3;
@@ -127,7 +127,7 @@ static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld)
  *
  * The returned value is guaranteed to be a multiple of 4.
  */
-static inline int drm_eld_size(const uint8_t *eld)
+static inline int drm_eld_size(const u8 *eld)
 {
 	return DRM_ELD_HEADER_BLOCK_SIZE + eld[DRM_ELD_BASELINE_ELD_LEN] * 4;
 }
@@ -139,7 +139,7 @@ static inline int drm_eld_size(const uint8_t *eld)
  * The returned value is the speakers mask. User has to use %DRM_ELD_SPEAKER
  * field definitions to identify speakers.
  */
-static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld)
+static inline u8 drm_eld_get_spk_alloc(const u8 *eld)
 {
 	return eld[DRM_ELD_SPEAKER] & DRM_ELD_SPEAKER_MASK;
 }
@@ -151,7 +151,7 @@ static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld)
  * The caller need to use %DRM_ELD_CONN_TYPE_HDMI or %DRM_ELD_CONN_TYPE_DP to
  * identify the display type connected.
  */
-static inline u8 drm_eld_get_conn_type(const uint8_t *eld)
+static inline u8 drm_eld_get_conn_type(const u8 *eld)
 {
 	return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK;
 }
-- 
GitLab


From 439590ace7755657523a1a0230c6099cb0a6e15f Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 12:16:40 +0200
Subject: [PATCH 0198/2340] drm/edid: include drm_eld.h only where required

Reduce the dependencies on drm_eld.h. Some files might be able to drop
the dependency on drm_edid.h too with the direct inclusion of drm_eld.h.

Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/9f5963ce900d747f3279312c0cd1da599fd83f94.1698747331.git.jani.nikula@intel.com
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c    | 1 +
 drivers/gpu/drm/drm_edid.c                           | 1 +
 drivers/gpu/drm/i915/display/intel_audio.c           | 1 +
 drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 1 +
 drivers/gpu/drm/i915/display/intel_sdvo.c            | 1 +
 drivers/gpu/drm/nouveau/dispnv50/disp.c              | 1 +
 drivers/gpu/drm/radeon/radeon_audio.c                | 1 +
 drivers/gpu/drm/tegra/hdmi.c                         | 1 +
 drivers/gpu/drm/tegra/sor.c                          | 1 +
 include/drm/drm_edid.h                               | 1 -
 sound/core/pcm_drm_eld.c                             | 1 +
 sound/soc/codecs/hdac_hdmi.c                         | 1 +
 sound/soc/codecs/hdmi-codec.c                        | 1 +
 sound/x86/intel_hdmi_audio.c                         | 1 +
 14 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c6fd34bab3585..42fa2e142712d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -87,6 +87,7 @@
 #include <drm/drm_blend.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <drm/drm_vblank.h>
 #include <drm/drm_audio_component.h>
 #include <drm/drm_gem_atomic_helper.h>
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ec1cb4890acb5..85f719f67b504 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -41,6 +41,7 @@
 #include <drm/drm_displayid.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_print.h>
 
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index 19605264a35c3..39f5b698e08a0 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -25,6 +25,7 @@
 #include <linux/kernel.h>
 
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <drm/i915_component.h>
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
index 66fe880af8f3f..2d15e82c0b3d3 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
@@ -4,6 +4,7 @@
  */
 
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 
 #include "i915_drv.h"
 #include "intel_crtc_state_dump.h"
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 950ba0431f5f0..f967fe06f5496 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -35,6 +35,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 
 #include "i915_drv.h"
 #include "i915_reg.h"
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index a0ac8c258d9ff..334ca7f919f29 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -38,6 +38,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index d6ccaf24ee0c7..279bf130a18c4 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -26,6 +26,7 @@
 #include <linux/component.h>
 
 #include <drm/drm_crtc.h>
+#include <drm/drm_eld.h>
 #include "dce6_afmt.h"
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 0ba3ca3ac509d..a1fcee665023b 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -24,6 +24,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_debugfs.h>
+#include <drm/drm_eld.h>
 #include <drm/drm_file.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index d5a3d3f4fece4..83341576630d8 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -20,6 +20,7 @@
 #include <drm/display/drm_scdc_helper.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_debugfs.h>
+#include <drm/drm_eld.h>
 #include <drm/drm_file.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_simple_kms_helper.h>
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 1ff52f57ab9cc..e98aa68187007 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -25,7 +25,6 @@
 
 #include <linux/types.h>
 #include <linux/hdmi.h>
-#include <drm/drm_eld.h> /* FIXME: remove this, include directly where needed */
 #include <drm/drm_mode.h>
 
 struct drm_device;
diff --git a/sound/core/pcm_drm_eld.c b/sound/core/pcm_drm_eld.c
index 07075071972dd..1cdca4d4fc9cd 100644
--- a/sound/core/pcm_drm_eld.c
+++ b/sound/core/pcm_drm_eld.c
@@ -6,6 +6,7 @@
 #include <linux/export.h>
 #include <linux/hdmi.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <sound/pcm.h>
 #include <sound/pcm_drm_eld.h>
 
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 8b6b760296948..d1b53fc1efb66 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -16,6 +16,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/hdmi.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <sound/pcm_params.h>
 #include <sound/jack.h>
 #include <sound/soc.h>
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index 13689e718d36f..9b4e9192cd79a 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -17,6 +17,7 @@
 #include <sound/pcm_iec958.h>
 
 #include <drm/drm_crtc.h> /* This is only to get MAX_ELD_BYTES */
+#include <drm/drm_eld.h>
 
 #define HDMI_CODEC_CHMAP_IDX_UNKNOWN  -1
 
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index ab95fb34a6358..02f5a7f9b7288 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -30,6 +30,7 @@
 #include <sound/control.h>
 #include <sound/jack.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
 #include <drm/intel_lpe_audio.h>
 #include "intel_hdmi_audio.h"
 
-- 
GitLab


From e8d0b2c06fd779709baea71d5e8bfd99b2116518 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 12:16:41 +0200
Subject: [PATCH 0199/2340] drm/edid: use a temp variable for sads to drop one
 level of dereferences

Use a temporary variable struct cea_sad *, instead of using struct
cea_sad ** directly with the double dereferences. It's arguably easier
on the eyes, and drops a set of parenthesis too.

Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/b6e2f295ae5491c2bb0f528508f0f5fca921dc77.1698747331.git.jani.nikula@intel.com
---
 drivers/gpu/drm/drm_edid.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 85f719f67b504..4f3e7e855f639 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -5591,7 +5591,7 @@ static void drm_edid_to_eld(struct drm_connector *connector,
 }
 
 static int _drm_edid_to_sad(const struct drm_edid *drm_edid,
-			    struct cea_sad **sads)
+			    struct cea_sad **psads)
 {
 	const struct cea_db *db;
 	struct cea_db_iter iter;
@@ -5600,19 +5600,21 @@ static int _drm_edid_to_sad(const struct drm_edid *drm_edid,
 	cea_db_iter_edid_begin(drm_edid, &iter);
 	cea_db_iter_for_each(db, &iter) {
 		if (cea_db_tag(db) == CTA_DB_AUDIO) {
+			struct cea_sad *sads;
 			int j;
 
 			count = cea_db_payload_len(db) / 3; /* SAD is 3B */
-			*sads = kcalloc(count, sizeof(**sads), GFP_KERNEL);
-			if (!*sads)
+			sads = kcalloc(count, sizeof(*sads), GFP_KERNEL);
+			*psads = sads;
+			if (!sads)
 				return -ENOMEM;
 			for (j = 0; j < count; j++) {
 				const u8 *sad = &db->data[j * 3];
 
-				(*sads)[j].format = (sad[0] & 0x78) >> 3;
-				(*sads)[j].channels = sad[0] & 0x7;
-				(*sads)[j].freq = sad[1] & 0x7F;
-				(*sads)[j].byte2 = sad[2];
+				sads[j].format = (sad[0] & 0x78) >> 3;
+				sads[j].channels = sad[0] & 0x7;
+				sads[j].freq = sad[1] & 0x7F;
+				sads[j].byte2 = sad[2];
 			}
 			break;
 		}
-- 
GitLab


From 8af4681189e58a51be8a0fc9f0687e615cdb82c9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 12:16:42 +0200
Subject: [PATCH 0200/2340] drm/edid: add helpers to get/set struct cea_sad
 from/to 3-byte sad

Add helpers to pack/unpack SADs. Both ways and non-static, as follow-up
work needs them.

v2: Add include to get the declarations

Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/21d657ca854ce26423b461c0bb71e7a0727ba437.1698747331.git.jani.nikula@intel.com
---
 drivers/gpu/drm/drm_edid.c     | 34 +++++++++++++++++++++++++---------
 drivers/gpu/drm/drm_internal.h |  6 ++++++
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 4f3e7e855f639..9619f91c8331f 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -46,6 +46,7 @@
 #include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
 
 static int oui(u8 first, u8 second, u8 third)
 {
@@ -5507,6 +5508,27 @@ static void clear_eld(struct drm_connector *connector)
 	connector->audio_latency[1] = 0;
 }
 
+/*
+ * Get 3-byte SAD buffer from struct cea_sad.
+ */
+void drm_edid_cta_sad_get(const struct cea_sad *cta_sad, u8 *sad)
+{
+	sad[0] = cta_sad->format << 3 | cta_sad->channels;
+	sad[1] = cta_sad->freq;
+	sad[2] = cta_sad->byte2;
+}
+
+/*
+ * Set struct cea_sad from 3-byte SAD buffer.
+ */
+void drm_edid_cta_sad_set(struct cea_sad *cta_sad, const u8 *sad)
+{
+	cta_sad->format = (sad[0] & 0x78) >> 3;
+	cta_sad->channels = sad[0] & 0x07;
+	cta_sad->freq = sad[1] & 0x7f;
+	cta_sad->byte2 = sad[2];
+}
+
 /*
  * drm_edid_to_eld - build ELD from EDID
  * @connector: connector corresponding to the HDMI/DP sink
@@ -5601,21 +5623,15 @@ static int _drm_edid_to_sad(const struct drm_edid *drm_edid,
 	cea_db_iter_for_each(db, &iter) {
 		if (cea_db_tag(db) == CTA_DB_AUDIO) {
 			struct cea_sad *sads;
-			int j;
+			int i;
 
 			count = cea_db_payload_len(db) / 3; /* SAD is 3B */
 			sads = kcalloc(count, sizeof(*sads), GFP_KERNEL);
 			*psads = sads;
 			if (!sads)
 				return -ENOMEM;
-			for (j = 0; j < count; j++) {
-				const u8 *sad = &db->data[j * 3];
-
-				sads[j].format = (sad[0] & 0x78) >> 3;
-				sads[j].channels = sad[0] & 0x7;
-				sads[j].freq = sad[1] & 0x7F;
-				sads[j].byte2 = sad[2];
-			}
+			for (i = 0; i < count; i++)
+				drm_edid_cta_sad_set(&sads[i], &db->data[i * 3]);
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 8462b657c3758..b12c463bc4605 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -22,6 +22,7 @@
  */
 
 #include <linux/kthread.h>
+#include <linux/types.h>
 
 #include <drm/drm_ioctl.h>
 #include <drm/drm_vblank.h>
@@ -31,6 +32,7 @@
 
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
+struct cea_sad;
 struct dentry;
 struct dma_buf;
 struct iosys_map;
@@ -267,3 +269,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
 				const struct drm_framebuffer *fb);
 void drm_framebuffer_debugfs_init(struct drm_device *dev);
+
+/* drm_edid.c */
+void drm_edid_cta_sad_get(const struct cea_sad *cta_sad, u8 *sad);
+void drm_edid_cta_sad_set(struct cea_sad *cta_sad, const u8 *sad);
-- 
GitLab


From f415a6078f640ab15bae34d3c6a1d8e6071363de Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 31 Oct 2023 12:16:43 +0200
Subject: [PATCH 0201/2340] drm/eld: add helpers to modify the SADs of an ELD

Occasionally it's necessary for drivers to modify the SADs of an ELD,
but it's not so cool to have drivers poke at the ELD buffer directly.

Using the helpers to translate between 3-byte SAD and struct cea_sad,
add ELD helpers to get/set the SADs from/to an ELD.

v2: s/i/sad_index/ (Mitul)

Cc: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/8e9a05f2b1e0dd184132d636e1e778e8917ec25d.1698747331.git.jani.nikula@intel.com
---
 Documentation/gpu/drm-kms-helpers.rst |  3 ++
 drivers/gpu/drm/Makefile              |  1 +
 drivers/gpu/drm/drm_eld.c             | 55 +++++++++++++++++++++++++++
 include/drm/drm_eld.h                 |  5 +++
 4 files changed, 64 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_eld.c

diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index cfa8e6c399b64..59cfe8a7a8bac 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -366,6 +366,9 @@ EDID Helper Functions Reference
 .. kernel-doc:: include/drm/drm_eld.h
    :internal:
 
+.. kernel-doc:: drivers/gpu/drm/drm_eld.c
+   :export:
+
 SCDC Helper Functions Reference
 ===============================
 
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 8e1bde059170e..cdbe91ac0bfc8 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -22,6 +22,7 @@ drm-y := \
 	drm_drv.o \
 	drm_dumb_buffers.o \
 	drm_edid.o \
+	drm_eld.o \
 	drm_encoder.o \
 	drm_file.o \
 	drm_fourcc.o \
diff --git a/drivers/gpu/drm/drm_eld.c b/drivers/gpu/drm/drm_eld.c
new file mode 100644
index 0000000000000..5177991aa2726
--- /dev/null
+++ b/drivers/gpu/drm/drm_eld.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
+
+#include "drm_internal.h"
+
+/**
+ * drm_eld_sad_get - get SAD from ELD to struct cea_sad
+ * @eld: ELD buffer
+ * @sad_index: SAD index
+ * @cta_sad: destination struct cea_sad
+ *
+ * @return: 0 on success, or negative on errors
+ */
+int drm_eld_sad_get(const u8 *eld, int sad_index, struct cea_sad *cta_sad)
+{
+	const u8 *sad;
+
+	if (sad_index >= drm_eld_sad_count(eld))
+		return -EINVAL;
+
+	sad = eld + DRM_ELD_CEA_SAD(drm_eld_mnl(eld), sad_index);
+
+	drm_edid_cta_sad_set(cta_sad, sad);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_eld_sad_get);
+
+/**
+ * drm_eld_sad_set - set SAD to ELD from struct cea_sad
+ * @eld: ELD buffer
+ * @sad_index: SAD index
+ * @cta_sad: source struct cea_sad
+ *
+ * @return: 0 on success, or negative on errors
+ */
+int drm_eld_sad_set(u8 *eld, int sad_index, const struct cea_sad *cta_sad)
+{
+	u8 *sad;
+
+	if (sad_index >= drm_eld_sad_count(eld))
+		return -EINVAL;
+
+	sad = eld + DRM_ELD_CEA_SAD(drm_eld_mnl(eld), sad_index);
+
+	drm_edid_cta_sad_get(cta_sad, sad);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_eld_sad_set);
diff --git a/include/drm/drm_eld.h b/include/drm/drm_eld.h
index 7b674256b9aa0..0a88d10b28b03 100644
--- a/include/drm/drm_eld.h
+++ b/include/drm/drm_eld.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+struct cea_sad;
+
 /* ELD Header Block */
 #define DRM_ELD_HEADER_BLOCK_SIZE	4
 
@@ -75,6 +77,9 @@ static inline int drm_eld_mnl(const u8 *eld)
 	return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT;
 }
 
+int drm_eld_sad_get(const u8 *eld, int sad_index, struct cea_sad *cta_sad);
+int drm_eld_sad_set(u8 *eld, int sad_index, const struct cea_sad *cta_sad);
+
 /**
  * drm_eld_sad - Get ELD SAD structures.
  * @eld: pointer to an eld memory structure with sad_count set
-- 
GitLab


From f3123c2590005c5ff631653d31428e40cd10c618 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Thu, 9 Nov 2023 18:53:26 -0500
Subject: [PATCH 0202/2340] drm/sched: Qualify drm_sched_wakeup() by
 drm_sched_entity_is_ready()

Don't "wake up" the GPU scheduler unless the entity is ready, as well as we
can queue to the scheduler, i.e. there is no point in waking up the scheduler
for the entity unless the entity is ready.

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Fixes: bc8d6a9df99038 ("drm/sched: Don't disturb the entity when in RR-mode scheduling")
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110000123.72565-2-ltuikov89@gmail.com
---
 drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
 drivers/gpu/drm/scheduler/sched_main.c   | 8 +++++---
 include/drm/gpu_scheduler.h              | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index f1db63cc81981..4d42b1e4daa67 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
 		container_of(cb, struct drm_sched_entity, cb);
 
 	drm_sched_entity_clear_dep(f, cb);
-	drm_sched_wakeup(entity->rq->sched);
+	drm_sched_wakeup(entity->rq->sched, entity);
 }
 
 /**
@@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
 			drm_sched_rq_update_fifo(entity, submit_ts);
 
-		drm_sched_wakeup(entity->rq->sched);
+		drm_sched_wakeup(entity->rq->sched, entity);
 	}
 }
 EXPORT_SYMBOL(drm_sched_entity_push_job);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index cd0dc3f81d05f..8f5e466bd5823 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -925,10 +925,12 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
  *
  * Wake up the scheduler if we can queue jobs.
  */
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
+		      struct drm_sched_entity *entity)
 {
-	if (drm_sched_can_queue(sched))
-		drm_sched_run_job_queue(sched);
+	if (drm_sched_entity_is_ready(entity))
+		if (drm_sched_can_queue(sched))
+			drm_sched_run_job_queue(sched);
 }
 
 /**
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 754fd2217334e..09916c84703f5 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -559,7 +559,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
 
 void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity);
 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
-- 
GitLab


From fc6e7679296530106ee0954e8ddef1aa58b2e0b5 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Tue, 7 Nov 2023 12:41:51 -0800
Subject: [PATCH 0203/2340] drm/panel-edp: drm/panel-edp: Fix AUO B116XAK01
 name and timing

Rename AUO 0x405c B116XAK01 to B116XAK01.0 and adjust the timing of
auo_b116xak01: T3=200, T12=500, T7_max = 50 according to decoding edid
and datasheet.

Fixes: da458286a5e2 ("drm/panel: Add support for AUO B116XAK01 panel")
Cc: stable@vger.kernel.org
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107204611.3082200-2-hsinyi@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 9dce4c7024142..2fba7c1f49ce0 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -973,6 +973,8 @@ static const struct panel_desc auo_b116xak01 = {
 	},
 	.delay = {
 		.hpd_absent = 200,
+		.unprepare = 500,
+		.enable = 50,
 	},
 };
 
@@ -1870,7 +1872,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02"),
-	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"),
-- 
GitLab


From 962845c090c4f85fa4f6872a5b6c89ee61f53cc0 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Tue, 7 Nov 2023 12:41:52 -0800
Subject: [PATCH 0204/2340] drm/panel-edp: drm/panel-edp: Fix AUO B116XTN02
 name

Rename AUO 0x235c B116XTN02 to B116XTN02.3 according to decoding edid.

Fixes: 3db2420422a5 ("drm/panel-edp: Add AUO B116XTN02, BOE NT116WHM-N21,836X2, NV116WHM-N49 V8.0")
Cc: stable@vger.kernel.org
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107204611.3082200-3-hsinyi@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 2fba7c1f49ce0..d41d205f7f5bf 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1871,7 +1871,7 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x145c, &delay_200_500_e50, "B116XAB01.4"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"),
-	EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02.3"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"),
-- 
GitLab


From 4d53cf81479500d7af787fe6bc881c24ec31f005 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Tue, 7 Nov 2023 12:41:53 -0800
Subject: [PATCH 0205/2340] drm/panel-edp: drm/panel-edp: Add several generic
 edp panels

Add a few generic edp panels used by mt8186 chromebooks.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107204611.3082200-4-hsinyi@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 51 +++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index d41d205f7f5bf..599a949d74d16 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1832,6 +1832,12 @@ static const struct panel_delay delay_200_500_e50 = {
 	.enable = 50,
 };
 
+static const struct panel_delay delay_200_500_e80 = {
+	.hpd_absent = 200,
+	.unprepare = 500,
+	.enable = 80,
+};
+
 static const struct panel_delay delay_200_500_e80_d50 = {
 	.hpd_absent = 200,
 	.unprepare = 500,
@@ -1851,6 +1857,19 @@ static const struct panel_delay delay_200_500_e200 = {
 	.enable = 200,
 };
 
+static const struct panel_delay delay_200_500_e200_d10 = {
+	.hpd_absent = 200,
+	.unprepare = 500,
+	.enable = 200,
+	.disable = 10,
+};
+
+static const struct panel_delay delay_200_150_e200 = {
+	.hpd_absent = 200,
+	.unprepare = 150,
+	.enable = 200,
+};
+
 #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \
 { \
 	.name = _name, \
@@ -1871,37 +1890,69 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x145c, &delay_200_500_e50, "B116XAB01.4"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x208d, &delay_200_500_e50, "B140HTN02.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x235c, &delay_200_500_e50, "B116XTN02.3"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x239b, &delay_200_500_e50, "B116XAN06.1"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x255c, &delay_200_500_e50, "B116XTN02.5"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x403d, &delay_200_500_e50, "B140HAN04.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x635c, &delay_200_500_e50, "B116XAN06.3"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0x639c, &delay_200_500_e50, "B140HAK02.7"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"),
+	EDP_PANEL_ENTRY('A', 'U', 'O', 0xf390, &delay_200_500_e50, "B140XTN07.7"),
 
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0715, &delay_200_150_e200, "NT116WHM-N21"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0731, &delay_200_500_e80, "NT116WHM-N42"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0741, &delay_200_500_e200, "NT116WHM-N44"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0786, &delay_200_500_p2e80, "NV116WHM-T01"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x07d1, &boe_nv133fhm_n61.delay, "NV133FHM-N61"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x07f6, &delay_200_500_e200, "NT140FHM-N44"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x082d, &boe_nv133fhm_n61.delay, "NV133FHM-N62"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x08b2, &delay_200_500_e200, "NT140WHM-N49"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x09c3, &delay_200_500_e50, "NT116WHM-N21,836X2"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x094b, &delay_200_500_e50, "NT116WHM-N21"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0951, &delay_200_500_e80, "NV116WHM-N47"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x095f, &delay_200_500_e50, "NE135FBM-N41 v8.1"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0979, &delay_200_500_e50, "NV116WHM-N49 V8.0"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x098d, &boe_nv110wtm_n61.delay, "NV110WTM-N61"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x09ae, &delay_200_500_e200, "NT140FHM-N45"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"),
 	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ac5, &delay_200_500_e50, "NV116WHM-N4C"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b43, &delay_200_500_e200, "NV140FHM-T09"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b56, &delay_200_500_e80, "NT140FHM-N47"),
+	EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c20, &delay_200_500_e80, "NT140FHM-N47"),
 
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1132, &delay_200_500_e80_d50, "N116BGE-EA2"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1138, &innolux_n116bca_ea1.delay, "N116BCA-EA1-RC4"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1139, &delay_200_500_e80_d50, "N116BGE-EA2"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1145, &delay_200_500_e80_d50, "N116BCN-EB1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x114c, &innolux_n116bca_ea1.delay, "N116BCA-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1152, &delay_200_500_e80_d50, "N116BCN-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1153, &delay_200_500_e80_d50, "N116BGE-EA2"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1154, &delay_200_500_e80_d50, "N116BCA-EA2"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1157, &delay_200_500_e80_d50, "N116BGE-EA2"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x115b, &delay_200_500_e80_d50, "N116BCN-EB1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1247, &delay_200_500_e80_d50, "N120ACA-EA1"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x142b, &delay_200_500_e80_d50, "N140HCA-EAC"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x144f, &delay_200_500_e80_d50, "N140HGA-EA1"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1468, &delay_200_500_e80, "N140HGA-EA1"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14e5, &delay_200_500_e80_d50, "N140HGA-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d4, &delay_200_500_e80_d50, "N140HCA-EAC"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d6, &delay_200_500_e80_d50, "N140BGA-EA4"),
+
+	EDP_PANEL_ENTRY('H', 'K', 'C', 0x2d5c, &delay_200_500_e200, "MB116AN01-2"),
 
+	EDP_PANEL_ENTRY('I', 'V', 'O', 0x048e, &delay_200_500_e200_d10, "M116NWR6 R5"),
 	EDP_PANEL_ENTRY('I', 'V', 'O', 0x057d, &delay_200_500_e200, "R140NWF5 RH"),
 	EDP_PANEL_ENTRY('I', 'V', 'O', 0x854a, &delay_200_500_p2e100, "M133NW4J"),
 	EDP_PANEL_ENTRY('I', 'V', 'O', 0x854b, &delay_200_500_p2e100, "R133NW4K-R0"),
+	EDP_PANEL_ENTRY('I', 'V', 'O', 0x8c4d, &delay_200_150_e200, "R140NWFM R1"),
 
 	EDP_PANEL_ENTRY('K', 'D', 'B', 0x0624, &kingdisplay_kd116n21_30nv_a010.delay, "116N21-30NV-A010"),
+	EDP_PANEL_ENTRY('K', 'D', 'C', 0x0809, &delay_200_500_e50, "KD116N2930A15"),
 	EDP_PANEL_ENTRY('K', 'D', 'B', 0x1120, &delay_200_500_e80_d50, "116N29-30NK-C007"),
 
 	EDP_PANEL_ENTRY('S', 'H', 'P', 0x1511, &delay_200_500_e50, "LQ140M1JW48"),
-- 
GitLab


From 36245bd02e88e68ac5955c2958c968879d7b75a9 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Thu, 9 Nov 2023 19:11:46 -0500
Subject: [PATCH 0206/2340] drm/sched: Define pr_fmt() for DRM using pr_*()

Define pr_fmt() as "[drm] " for DRM code using pr_*() facilities, especially
when no devices are available. This makes it easier to browse kernel logs.

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Acked-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110002659.113208-2-ltuikov89@gmail.com
---
 include/drm/drm_print.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index a93a387f8a1a1..e8fe60d0eb878 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -26,6 +26,20 @@
 #ifndef DRM_PRINT_H_
 #define DRM_PRINT_H_
 
+/* Define this before including linux/printk.h, so that the format
+ * string in pr_*() macros is correctly set for DRM. If a file wants
+ * to define this to something else, it should do so before including
+ * this header file.
+ *
+ * It is encouraged code using pr_err() to prefix their format with
+ * the string "*ERROR* ", to make it easier to scan kernel logs. For
+ * instance,
+ *   pr_err("*ERROR* <the rest of your format string here>", args).
+ */
+#ifndef pr_fmt
+#define pr_fmt(fmt) "[drm] " fmt
+#endif
+
 #include <linux/compiler.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
-- 
GitLab


From a78422e9dff366b3a46ae44caf6ec8ded9c9fc2f Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Fri, 10 Nov 2023 01:16:33 +0100
Subject: [PATCH 0207/2340] drm/sched: implement dynamic job-flow control

Currently, job flow control is implemented simply by limiting the number
of jobs in flight. Therefore, a scheduler is initialized with a credit
limit that corresponds to the number of jobs which can be sent to the
hardware.

This implies that for each job, drivers need to account for the maximum
job size possible in order to not overflow the ring buffer.

However, there are drivers, such as Nouveau, where the job size has a
rather large range. For such drivers it can easily happen that job
submissions not even filling the ring by 1% can block subsequent
submissions, which, in the worst case, can lead to the ring run dry.

In order to overcome this issue, allow for tracking the actual job size
instead of the number of jobs. Therefore, add a field to track a job's
credit count, which represents the number of credits a job contributes
to the scheduler's credit limit.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com
---
 Documentation/gpu/drm-mm.rst                  |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |   2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |   2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c         |   2 +-
 drivers/gpu/drm/lima/lima_device.c            |   2 +-
 drivers/gpu/drm/lima/lima_sched.c             |   2 +-
 drivers/gpu/drm/msm/msm_gem_submit.c          |   2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c       |   2 +-
 drivers/gpu/drm/panfrost/panfrost_drv.c       |   2 +-
 drivers/gpu/drm/panfrost/panfrost_job.c       |   2 +-
 .../gpu/drm/scheduler/gpu_scheduler_trace.h   |   2 +-
 drivers/gpu/drm/scheduler/sched_main.c        | 170 ++++++++++++++----
 drivers/gpu/drm/v3d/v3d_gem.c                 |   2 +-
 include/drm/gpu_scheduler.h                   |  28 ++-
 14 files changed, 175 insertions(+), 51 deletions(-)

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index 602010cb6894c..acc5901ac8408 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -552,6 +552,12 @@ Overview
 .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
    :doc: Overview
 
+Flow Control
+------------
+
+.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
+   :doc: Flow Control
+
 Scheduler Function References
 -----------------------------
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1f357198533f3..62bb7fc7448ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (!entity)
 		return 0;
 
-	return drm_sched_job_init(&(*job)->base, entity, owner);
+	return drm_sched_job_init(&(*job)->base, entity, 1, owner);
 }
 
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 2416c526f9b06..3d0f8d182506e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 	ret = drm_sched_job_init(&submit->sched_job,
 				 &ctx->sched_entity[args->pipe],
-				 submit->ctx);
+				 1, submit->ctx);
 	if (ret)
 		goto err_submit_put;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 9276756e1397d..5105d290e72e2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struct device *dev)
 	u32 idle, mask;
 
 	/* If there are any jobs in the HW queue, we're not idle */
-	if (atomic_read(&gpu->sched.hw_rq_count))
+	if (atomic_read(&gpu->sched.credit_count))
 		return -EBUSY;
 
 	/* Check whether the hardware (except FE and MC) is idle */
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
index 02cef0cea6572..0bf7105c8748b 100644
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -514,7 +514,7 @@ int lima_device_suspend(struct device *dev)
 
 	/* check any task running */
 	for (i = 0; i < lima_pipe_num; i++) {
-		if (atomic_read(&ldev->pipe[i].base.hw_rq_count))
+		if (atomic_read(&ldev->pipe[i].base.credit_count))
 			return -EBUSY;
 	}
 
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index aa030e1f7cdae..c3bf8cda84982 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sched_task *task,
 	for (i = 0; i < num_bos; i++)
 		drm_gem_object_get(&bos[i]->base.base);
 
-	err = drm_sched_job_init(&task->base, &context->base, vm);
+	err = drm_sched_job_init(&task->base, &context->base, 1, vm);
 	if (err) {
 		kfree(task->bos);
 		return err;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 99744de6c05a1..c002cabe7b9c5 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 		return ERR_PTR(ret);
 	}
 
-	ret = drm_sched_job_init(&submit->base, queue->entity, queue);
+	ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue);
 	if (ret) {
 		kfree(submit->hw_fence);
 		kfree(submit);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 7e64b5ef90fb2..1b2cc3f2e1c7e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job,
 
 	}
 
-	ret = drm_sched_job_init(&job->base, &entity->base, NULL);
+	ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
 	if (ret)
 		goto err_free_chains;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index b834777b409b0..54d1c19bea84d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -274,7 +274,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 
 	ret = drm_sched_job_init(&job->base,
 				 &file_priv->sched_entity[slot],
-				 NULL);
+				 1, NULL);
 	if (ret)
 		goto out_put_job;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 6d89e24322dbf..f9446e197428d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -963,7 +963,7 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)
 
 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
 		/* If there are any jobs in the HW queue, we're not idle */
-		if (atomic_read(&js->queue[i].sched.hw_rq_count))
+		if (atomic_read(&js->queue[i].sched.credit_count))
 			return false;
 	}
 
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
index 3143ecaaff862..f8ed093b7356e 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job,
 			   __assign_str(name, sched_job->sched->name);
 			   __entry->job_count = spsc_queue_count(&entity->job_queue);
 			   __entry->hw_job_count = atomic_read(
-				   &sched_job->sched->hw_rq_count);
+				   &sched_job->sched->credit_count);
 			   ),
 	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
 		      __entry->entity, __entry->id,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 8f5e466bd5823..044a8c4875ba6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -48,6 +48,30 @@
  * through the jobs entity pointer.
  */
 
+/**
+ * DOC: Flow Control
+ *
+ * The DRM GPU scheduler provides a flow control mechanism to regulate the rate
+ * in which the jobs fetched from scheduler entities are executed.
+ *
+ * In this context the &drm_gpu_scheduler keeps track of a driver specified
+ * credit limit representing the capacity of this scheduler and a credit count;
+ * every &drm_sched_job carries a driver specified number of credits.
+ *
+ * Once a job is executed (but not yet finished), the job's credits contribute
+ * to the scheduler's credit count until the job is finished. If by executing
+ * one more job the scheduler's credit count would exceed the scheduler's
+ * credit limit, the job won't be executed. Instead, the scheduler will wait
+ * until the credit count has decreased enough to not overflow its credit limit.
+ * This implies waiting for previously executed jobs.
+ *
+ * Optionally, drivers may register a callback (update_job_credits) provided by
+ * struct drm_sched_backend_ops to update the job's credits dynamically. The
+ * scheduler executes this callback every time the scheduler considers a job for
+ * execution and subsequently checks whether the job fits the scheduler's credit
+ * limit.
+ */
+
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
@@ -75,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
 module_param_named(sched_policy, drm_sched_policy, int, 0444);
 
+static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
+{
+	u32 credits;
+
+	drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit,
+					      atomic_read(&sched->credit_count),
+					      &credits));
+
+	return credits;
+}
+
+/**
+ * drm_sched_can_queue -- Can we queue more to the hardware?
+ * @sched: scheduler instance
+ * @entity: the scheduler entity
+ *
+ * Return true if we can push at least one more job from @entity, false
+ * otherwise.
+ */
+static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
+				struct drm_sched_entity *entity)
+{
+	struct drm_sched_job *s_job;
+
+	s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+	if (!s_job)
+		return false;
+
+	if (sched->ops->update_job_credits) {
+		s_job->credits = sched->ops->update_job_credits(s_job);
+
+		drm_WARN(sched, !s_job->credits,
+			 "Jobs with zero credits bypass job-flow control.\n");
+	}
+
+	/* If a job exceeds the credit limit, truncate it to the credit limit
+	 * itself to guarantee forward progress.
+	 */
+	if (drm_WARN(sched, s_job->credits > sched->credit_limit,
+		     "Jobs may not exceed the credit limit, truncate.\n"))
+		s_job->credits = sched->credit_limit;
+
+	return drm_sched_available_credits(sched) >= s_job->credits;
+}
+
 static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
 							    const struct rb_node *b)
 {
@@ -186,12 +255,18 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 /**
  * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
  *
+ * @sched: the gpu scheduler
  * @rq: scheduler run queue to check.
  *
- * Try to find a ready entity, returns NULL if none found.
+ * Try to find the next ready entity.
+ *
+ * Return an entity if one is found; return an error-pointer (!NULL) if an
+ * entity was ready, but the scheduler had insufficient credits to accommodate
+ * its job; return NULL, if no ready entity was found.
  */
 static struct drm_sched_entity *
-drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
+drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
+			      struct drm_sched_rq *rq)
 {
 	struct drm_sched_entity *entity;
 
@@ -201,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
 	if (entity) {
 		list_for_each_entry_continue(entity, &rq->entities, list) {
 			if (drm_sched_entity_is_ready(entity)) {
+				/* If we can't queue yet, preserve the current
+				 * entity in terms of fairness.
+				 */
+				if (!drm_sched_can_queue(sched, entity)) {
+					spin_unlock(&rq->lock);
+					return ERR_PTR(-ENOSPC);
+				}
+
 				rq->current_entity = entity;
 				reinit_completion(&entity->entity_idle);
 				spin_unlock(&rq->lock);
@@ -210,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
 	}
 
 	list_for_each_entry(entity, &rq->entities, list) {
-
 		if (drm_sched_entity_is_ready(entity)) {
+			/* If we can't queue yet, preserve the current entity in
+			 * terms of fairness.
+			 */
+			if (!drm_sched_can_queue(sched, entity)) {
+				spin_unlock(&rq->lock);
+				return ERR_PTR(-ENOSPC);
+			}
+
 			rq->current_entity = entity;
 			reinit_completion(&entity->entity_idle);
 			spin_unlock(&rq->lock);
@@ -230,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
 /**
  * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
  *
+ * @sched: the gpu scheduler
  * @rq: scheduler run queue to check.
  *
- * Find oldest waiting ready entity, returns NULL if none found.
+ * Find oldest waiting ready entity.
+ *
+ * Return an entity if one is found; return an error-pointer (!NULL) if an
+ * entity was ready, but the scheduler had insufficient credits to accommodate
+ * its job; return NULL, if no ready entity was found.
  */
 static struct drm_sched_entity *
-drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
+drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
+				struct drm_sched_rq *rq)
 {
 	struct rb_node *rb;
 
@@ -245,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 
 		entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
 		if (drm_sched_entity_is_ready(entity)) {
+			/* If we can't queue yet, preserve the current entity in
+			 * terms of fairness.
+			 */
+			if (!drm_sched_can_queue(sched, entity)) {
+				spin_unlock(&rq->lock);
+				return ERR_PTR(-ENOSPC);
+			}
+
 			rq->current_entity = entity;
 			reinit_completion(&entity->entity_idle);
 			break;
@@ -302,7 +406,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
 	struct drm_sched_fence *s_fence = s_job->s_fence;
 	struct drm_gpu_scheduler *sched = s_fence->sched;
 
-	atomic_dec(&sched->hw_rq_count);
+	atomic_sub(s_job->credits, &sched->credit_count);
 	atomic_dec(sched->score);
 
 	trace_drm_sched_process_job(s_fence);
@@ -525,7 +629,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 					      &s_job->cb)) {
 			dma_fence_put(s_job->s_fence->parent);
 			s_job->s_fence->parent = NULL;
-			atomic_dec(&sched->hw_rq_count);
+			atomic_sub(s_job->credits, &sched->credit_count);
 		} else {
 			/*
 			 * remove job from pending_list.
@@ -586,7 +690,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
 		struct dma_fence *fence = s_job->s_fence->parent;
 
-		atomic_inc(&sched->hw_rq_count);
+		atomic_add(s_job->credits, &sched->credit_count);
 
 		if (!full_recovery)
 			continue;
@@ -667,6 +771,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
  * drm_sched_job_init - init a scheduler job
  * @job: scheduler job to init
  * @entity: scheduler entity to use
+ * @credits: the number of credits this job contributes to the schedulers
+ * credit limit
  * @owner: job owner for debugging
  *
  * Refer to drm_sched_entity_push_job() documentation
@@ -684,7 +790,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
  */
 int drm_sched_job_init(struct drm_sched_job *job,
 		       struct drm_sched_entity *entity,
-		       void *owner)
+		       u32 credits, void *owner)
 {
 	if (!entity->rq) {
 		/* This will most likely be followed by missing frames
@@ -695,7 +801,13 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		return -ENOENT;
 	}
 
+	if (unlikely(!credits)) {
+		pr_err("*ERROR* %s: credits cannot be 0!\n", __func__);
+		return -EINVAL;
+	}
+
 	job->entity = entity;
+	job->credits = credits;
 	job->s_fence = drm_sched_fence_alloc(entity, owner);
 	if (!job->s_fence)
 		return -ENOMEM;
@@ -907,21 +1019,10 @@ void drm_sched_job_cleanup(struct drm_sched_job *job)
 }
 EXPORT_SYMBOL(drm_sched_job_cleanup);
 
-/**
- * drm_sched_can_queue -- Can we queue more to the hardware?
- * @sched: scheduler instance
- *
- * Return true if we can push more jobs to the hw, otherwise false.
- */
-static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
-{
-	return atomic_read(&sched->hw_rq_count) <
-		sched->hw_submission_limit;
-}
-
 /**
  * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
  * @sched: scheduler instance
+ * @entity: the scheduler entity
  *
  * Wake up the scheduler if we can queue jobs.
  */
@@ -929,7 +1030,7 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
 		      struct drm_sched_entity *entity)
 {
 	if (drm_sched_entity_is_ready(entity))
-		if (drm_sched_can_queue(sched))
+		if (drm_sched_can_queue(sched, entity))
 			drm_sched_run_job_queue(sched);
 }
 
@@ -938,7 +1039,11 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
  *
  * @sched: scheduler instance
  *
- * Returns the entity to process or NULL if none are found.
+ * Return an entity to process or NULL if none are found.
+ *
+ * Note, that we break out of the for-loop when "entity" is non-null, which can
+ * also be an error-pointer--this assures we don't process lower priority
+ * run-queues. See comments in the respectively called functions.
  */
 static struct drm_sched_entity *
 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
@@ -946,19 +1051,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 	struct drm_sched_entity *entity;
 	int i;
 
-	if (!drm_sched_can_queue(sched))
-		return NULL;
-
 	/* Kernel run queue has higher priority than normal run queue*/
 	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
-			drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
-			drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
+			drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
+			drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
 		if (entity)
 			break;
 	}
 
-	return entity;
+	return IS_ERR(entity) ? NULL : entity;
 }
 
 /**
@@ -1094,7 +1196,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
 
 	s_fence = sched_job->s_fence;
 
-	atomic_inc(&sched->hw_rq_count);
+	atomic_add(sched_job->credits, &sched->credit_count);
 	drm_sched_job_begin(sched_job);
 
 	trace_drm_run_job(sched_job, entity);
@@ -1129,7 +1231,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
  * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
  *	       allocated and used
  * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
- * @hw_submission: number of hw submissions that can be in flight
+ * @credit_limit: the number of credits this scheduler can hold from all jobs
  * @hang_limit: number of times to allow a job to hang before dropping it
  * @timeout: timeout value in jiffies for the scheduler
  * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
@@ -1143,14 +1245,14 @@ static void drm_sched_run_job_work(struct work_struct *w)
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
 		   struct workqueue_struct *submit_wq,
-		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+		   u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
 		   long timeout, struct workqueue_struct *timeout_wq,
 		   atomic_t *score, const char *name, struct device *dev)
 {
 	int i, ret;
 
 	sched->ops = ops;
-	sched->hw_submission_limit = hw_submission;
+	sched->credit_limit = credit_limit;
 	sched->name = name;
 	sched->timeout = timeout;
 	sched->timeout_wq = timeout_wq ? : system_wq;
@@ -1199,7 +1301,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	init_waitqueue_head(&sched->job_scheduled);
 	INIT_LIST_HEAD(&sched->pending_list);
 	spin_lock_init(&sched->job_list_lock);
-	atomic_set(&sched->hw_rq_count, 0);
+	atomic_set(&sched->credit_count, 0);
 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
 	INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 712675134c048..9d2ac23c29e33 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -418,7 +418,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	job->file = file_priv;
 
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
-				 v3d_priv);
+				 1, v3d_priv);
 	if (ret)
 		goto fail;
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 09916c84703f5..1d60eab747de4 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -321,6 +321,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  * @sched: the scheduler instance on which this job is scheduled.
  * @s_fence: contains the fences for the scheduling of job.
  * @finish_cb: the callback for the finished fence.
+ * @credits: the number of credits this job contributes to the scheduler
  * @work: Helper to reschdeule job kill to different context.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
@@ -340,6 +341,8 @@ struct drm_sched_job {
 	struct drm_gpu_scheduler	*sched;
 	struct drm_sched_fence		*s_fence;
 
+	u32				credits;
+
 	/*
 	 * work is used only after finish_cb has been used and will not be
 	 * accessed anymore.
@@ -463,13 +466,27 @@ struct drm_sched_backend_ops {
          * and it's time to clean it up.
 	 */
 	void (*free_job)(struct drm_sched_job *sched_job);
+
+	/**
+	 * @update_job_credits: Called when the scheduler is considering this
+	 * job for execution.
+	 *
+	 * This callback returns the number of credits the job would take if
+	 * pushed to the hardware. Drivers may use this to dynamically update
+	 * the job's credit count. For instance, deduct the number of credits
+	 * for already signalled native fences.
+	 *
+	 * This callback is optional.
+	 */
+	u32 (*update_job_credits)(struct drm_sched_job *sched_job);
 };
 
 /**
  * struct drm_gpu_scheduler - scheduler instance-specific data
  *
  * @ops: backend operations provided by the driver.
- * @hw_submission_limit: the max size of the hardware queue.
+ * @credit_limit: the credit limit of this scheduler
+ * @credit_count: the current credit count of this scheduler
  * @timeout: the time after which a job is removed from the scheduler.
  * @name: name of the ring for which this scheduler is being used.
  * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
@@ -478,7 +495,6 @@ struct drm_sched_backend_ops {
  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
  *                 waits on this wait queue until all the scheduled jobs are
  *                 finished.
- * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
  * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
  * @timeout_wq: workqueue used to queue @work_tdr
@@ -502,13 +518,13 @@ struct drm_sched_backend_ops {
  */
 struct drm_gpu_scheduler {
 	const struct drm_sched_backend_ops	*ops;
-	uint32_t			hw_submission_limit;
+	u32				credit_limit;
+	atomic_t			credit_count;
 	long				timeout;
 	const char			*name;
 	u32                             num_rqs;
 	struct drm_sched_rq             **sched_rq;
 	wait_queue_head_t		job_scheduled;
-	atomic_t			hw_rq_count;
 	atomic64_t			job_id_count;
 	struct workqueue_struct		*submit_wq;
 	struct workqueue_struct		*timeout_wq;
@@ -530,14 +546,14 @@ struct drm_gpu_scheduler {
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
 		   struct workqueue_struct *submit_wq,
-		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+		   u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
 		   long timeout, struct workqueue_struct *timeout_wq,
 		   atomic_t *score, const char *name, struct device *dev);
 
 void drm_sched_fini(struct drm_gpu_scheduler *sched);
 int drm_sched_job_init(struct drm_sched_job *job,
 		       struct drm_sched_entity *entity,
-		       void *owner);
+		       u32 credits, void *owner);
 void drm_sched_job_arm(struct drm_sched_job *job);
 int drm_sched_job_add_dependency(struct drm_sched_job *job,
 				 struct dma_fence *fence);
-- 
GitLab


From 48d054c2d34cdc67acb8cc9cfac326d91f1470ed Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Wed, 8 Nov 2023 12:52:58 +0530
Subject: [PATCH 0208/2340] drm/panelreplay: dpcd register definition for
 panelreplay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add DPCD register definition for discovering, enabling and
checking status of panel replay of the sink.

Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-2-animesh.manna@intel.com
---
 include/drm/display/drm_dp.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index 763d45a612f35..83d2039c018b4 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -544,6 +544,10 @@
 /* DFP Capability Extension */
 #define DP_DFP_CAPABILITY_EXTENSION_SUPPORT	0x0a3	/* 2.0 */
 
+#define DP_PANEL_REPLAY_CAP                 0x0b0  /* DP 2.0 */
+# define DP_PANEL_REPLAY_SUPPORT            (1 << 0)
+# define DP_PANEL_REPLAY_SU_SUPPORT         (1 << 1)
+
 /* Link Configuration */
 #define	DP_LINK_BW_SET		            0x100
 # define DP_LINK_RATE_TABLE		    0x00    /* eDP 1.4 */
@@ -718,6 +722,13 @@
 #define DP_BRANCH_DEVICE_CTRL		    0x1a1
 # define DP_BRANCH_DEVICE_IRQ_HPD	    (1 << 0)
 
+#define PANEL_REPLAY_CONFIG                             0x1b0  /* DP 2.0 */
+# define DP_PANEL_REPLAY_ENABLE                         (1 << 0)
+# define DP_PANEL_REPLAY_UNRECOVERABLE_ERROR_EN         (1 << 3)
+# define DP_PANEL_REPLAY_RFB_STORAGE_ERROR_EN           (1 << 4)
+# define DP_PANEL_REPLAY_ACTIVE_FRAME_CRC_ERROR_EN      (1 << 5)
+# define DP_PANEL_REPLAY_SU_ENABLE                      (1 << 6)
+
 #define DP_PAYLOAD_ALLOCATE_SET		    0x1c0
 #define DP_PAYLOAD_ALLOCATE_START_TIME_SLOT 0x1c1
 #define DP_PAYLOAD_ALLOCATE_TIME_SLOT_COUNT 0x1c2
@@ -1107,6 +1118,18 @@
 #define DP_LANE_ALIGN_STATUS_UPDATED_ESI       0x200e /* status same as 0x204 */
 #define DP_SINK_STATUS_ESI                     0x200f /* status same as 0x205 */
 
+#define DP_PANEL_REPLAY_ERROR_STATUS                   0x2020  /* DP 2.1*/
+# define DP_PANEL_REPLAY_LINK_CRC_ERROR                (1 << 0)
+# define DP_PANEL_REPLAY_RFB_STORAGE_ERROR             (1 << 1)
+# define DP_PANEL_REPLAY_VSC_SDP_UNCORRECTABLE_ERROR   (1 << 2)
+
+#define DP_SINK_DEVICE_PR_AND_FRAME_LOCK_STATUS        0x2022  /* DP 2.1 */
+# define DP_SINK_DEVICE_PANEL_REPLAY_STATUS_MASK       (7 << 0)
+# define DP_SINK_FRAME_LOCKED_SHIFT                    3
+# define DP_SINK_FRAME_LOCKED_MASK                     (3 << 3)
+# define DP_SINK_FRAME_LOCKED_STATUS_VALID_SHIFT       5
+# define DP_SINK_FRAME_LOCKED_STATUS_VALID_MASK        (1 << 5)
+
 /* Extended Receiver Capability: See DP_DPCD_REV for definitions */
 #define DP_DP13_DPCD_REV                    0x2200
 
-- 
GitLab


From dd8f2298e34bf64f07ad5ff27c5964994783e7a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 8 Nov 2023 12:52:59 +0530
Subject: [PATCH 0209/2340] drm/i915/psr: Move psr specific dpcd init into own
 function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch is preparing adding panel replay specific dpcd init.

Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-3-animesh.manna@intel.com
---
 drivers/gpu/drm/i915/display/intel_psr.c | 41 +++++++++++++-----------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 3691f882e1c0a..627d821649336 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -475,27 +475,22 @@ exit:
 	intel_dp->psr.su_y_granularity = y;
 }
 
-void intel_psr_init_dpcd(struct intel_dp *intel_dp)
+static void _psr_init_dpcd(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv =
+	struct drm_i915_private *i915 =
 		to_i915(dp_to_dig_port(intel_dp)->base.base.dev);
 
-	drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd,
-			 sizeof(intel_dp->psr_dpcd));
-
-	if (!intel_dp->psr_dpcd[0])
-		return;
-	drm_dbg_kms(&dev_priv->drm, "eDP panel supports PSR version %x\n",
+	drm_dbg_kms(&i915->drm, "eDP panel supports PSR version %x\n",
 		    intel_dp->psr_dpcd[0]);
 
 	if (drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_NO_PSR)) {
-		drm_dbg_kms(&dev_priv->drm,
+		drm_dbg_kms(&i915->drm,
 			    "PSR support not currently available for this panel\n");
 		return;
 	}
 
 	if (!(intel_dp->edp_dpcd[1] & DP_EDP_SET_POWER_CAP)) {
-		drm_dbg_kms(&dev_priv->drm,
+		drm_dbg_kms(&i915->drm,
 			    "Panel lacks power state control, PSR cannot be enabled\n");
 		return;
 	}
@@ -504,8 +499,8 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 	intel_dp->psr.sink_sync_latency =
 		intel_dp_get_sink_sync_latency(intel_dp);
 
-	if (DISPLAY_VER(dev_priv) >= 9 &&
-	    (intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED)) {
+	if (DISPLAY_VER(i915) >= 9 &&
+	    intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED) {
 		bool y_req = intel_dp->psr_dpcd[1] &
 			     DP_PSR2_SU_Y_COORDINATE_REQUIRED;
 		bool alpm = intel_dp_get_alpm_status(intel_dp);
@@ -522,14 +517,24 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 		 * GTC first.
 		 */
 		intel_dp->psr.sink_psr2_support = y_req && alpm;
-		drm_dbg_kms(&dev_priv->drm, "PSR2 %ssupported\n",
+		drm_dbg_kms(&i915->drm, "PSR2 %ssupported\n",
 			    intel_dp->psr.sink_psr2_support ? "" : "not ");
+	}
+}
 
-		if (intel_dp->psr.sink_psr2_support) {
-			intel_dp->psr.colorimetry_support =
-				intel_dp_get_colorimetry_status(intel_dp);
-			intel_dp_get_su_granularity(intel_dp);
-		}
+void intel_psr_init_dpcd(struct intel_dp *intel_dp)
+{
+	drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd,
+			 sizeof(intel_dp->psr_dpcd));
+
+	if (intel_dp->psr_dpcd[0])
+		_psr_init_dpcd(intel_dp);
+	/* TODO: Add PR case here */
+
+	if (intel_dp->psr.sink_psr2_support) {
+		intel_dp->psr.colorimetry_support =
+			intel_dp_get_colorimetry_status(intel_dp);
+		intel_dp_get_su_granularity(intel_dp);
 	}
 }
 
-- 
GitLab


From b8cf5b5d266ec20e1ab90f38c8d779c669c2d219 Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Wed, 8 Nov 2023 12:53:00 +0530
Subject: [PATCH 0210/2340] drm/i915/panelreplay: Initializaton and compute
 config for panel replay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Modify existing PSR implementation to enable panel replay feature of DP 2.0
which is similar to PSR feature of EDP panel. There is different DPCD
address to check panel capability compare to PSR and vsc sdp header
is different.

v1: Initial version.
v2:
- Set source_panel_replay_support flag under HAS_PANEL_REPLAY()
condition check. [Jouni]
- Code restructured around intel_panel_replay_init
and renamed to intel_panel_replay_init_dpcd. [Jouni]
- Remove the initial code modification around has_psr2 flag. [Jouni]
- Add CAN_PANEL_REPLAY() in intel_encoder_can_psr which is used to
enable in intel_psr_post_plane_update. [Jouni]
v3:
- Initialize both psr and panel-replay. [Jouni]
- Initialize both panel replay and psr if detected. [Jouni]
- Refactoring psr function by introducing _psr_compute_config(). [Jouni]
- Add check for !is_edp while deriving source_panel_replay_support. [Jouni]
- Enable panel replay dpcd initialization in a separate patch. [Jouni]

v4:
- HAS_PANEL_REPLAY() check not needed during sink capability check. [Jouni]
- Set either panel replay source support or psr. [Jouni]

v5:
- HAS_PANEL_REPLAY() removed and use HAS_DP20() instead. [Jouni]
- Move psr related code to intel_psr.c. [Jani]
- Reset sink_panel_replay_support flag during disconnection. [Jani]

v6: return statement restored which is removed by misatke. [Jouni]
v7: cosmetic changes. [Arun]

Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-4-animesh.manna@intel.com
---
 .../drm/i915/display/intel_display_types.h    | 14 +---
 drivers/gpu/drm/i915/display/intel_dp.c       | 49 +++++++++--
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  3 +
 drivers/gpu/drm/i915/display/intel_psr.c      | 84 ++++++++++++++-----
 drivers/gpu/drm/i915/display/intel_psr.h      |  7 ++
 5 files changed, 117 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 926bf9c1a3ede..c83132d9dff0d 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1211,6 +1211,7 @@ struct intel_crtc_state {
 	bool has_psr2;
 	bool enable_psr2_sel_fetch;
 	bool req_psr2_sdp_prior_scanline;
+	bool has_panel_replay;
 	bool wm_level_disabled;
 	u32 dc3co_exitline;
 	u16 su_y_granularity;
@@ -1708,6 +1709,8 @@ struct intel_psr {
 	bool irq_aux_error;
 	u16 su_w_granularity;
 	u16 su_y_granularity;
+	bool source_panel_replay_support;
+	bool sink_panel_replay_support;
 	u32 dc3co_exitline;
 	u32 dc3co_exit_delay;
 	struct delayed_work dc3co_work;
@@ -1994,17 +1997,6 @@ dp_to_lspcon(struct intel_dp *intel_dp)
 
 #define dp_to_i915(__intel_dp) to_i915(dp_to_dig_port(__intel_dp)->base.base.dev)
 
-#define CAN_PSR(intel_dp) ((intel_dp)->psr.sink_support && \
-			   (intel_dp)->psr.source_support)
-
-static inline bool intel_encoder_can_psr(struct intel_encoder *encoder)
-{
-	if (!intel_encoder_is_dp(encoder))
-		return false;
-
-	return CAN_PSR(enc_to_intel_dp(encoder));
-}
-
 static inline struct intel_digital_port *
 hdmi_to_dig_port(struct intel_hdmi *intel_hdmi)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 670cf535e7c65..03e520c432485 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2470,12 +2470,22 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	/*
-	 * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118
-	 * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/
-	 * Colorimetry Format indication.
-	 */
-	vsc->revision = 0x5;
+	if (crtc_state->has_panel_replay) {
+		/*
+		 * Prepare VSC Header for SU as per DP 2.0 spec, Table 2-223
+		 * VSC SDP supporting 3D stereo, Panel Replay, and Pixel
+		 * Encoding/Colorimetry Format indication.
+		 */
+		vsc->revision = 0x7;
+	} else {
+		/*
+		 * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118
+		 * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/
+		 * Colorimetry Format indication.
+		 */
+		vsc->revision = 0x5;
+	}
+
 	vsc->length = 0x13;
 
 	/* DP 1.4a spec, Table 2-120 */
@@ -2584,6 +2594,21 @@ void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp,
 			vsc->revision = 0x4;
 			vsc->length = 0xe;
 		}
+	} else if (crtc_state->has_panel_replay) {
+		if (intel_dp->psr.colorimetry_support &&
+		    intel_dp_needs_vsc_sdp(crtc_state, conn_state)) {
+			/* [Panel Replay with colorimetry info] */
+			intel_dp_compute_vsc_colorimetry(crtc_state, conn_state,
+							 vsc);
+		} else {
+			/*
+			 * [Panel Replay without colorimetry info]
+			 * Prepare VSC Header for SU as per DP 2.0 spec, Table 2-223
+			 * VSC SDP supporting 3D stereo + Panel Replay.
+			 */
+			vsc->revision = 0x6;
+			vsc->length = 0x10;
+		}
 	} else {
 		/*
 		 * [PSR1]
@@ -4052,11 +4077,16 @@ static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc,
 	sdp->sdp_header.HB2 = vsc->revision; /* Revision Number */
 	sdp->sdp_header.HB3 = vsc->length; /* Number of Valid Data Bytes */
 
+	if (vsc->revision == 0x6) {
+		sdp->db[0] = 1;
+		sdp->db[3] = 1;
+	}
+
 	/*
-	 * Only revision 0x5 supports Pixel Encoding/Colorimetry Format as
-	 * per DP 1.4a spec.
+	 * Revision 0x5 and revision 0x7 supports Pixel Encoding/Colorimetry
+	 * Format as per DP 1.4a spec and DP 2.0 respectively.
 	 */
-	if (vsc->revision != 0x5)
+	if (!(vsc->revision == 0x5 || vsc->revision == 0x7))
 		goto out;
 
 	/* VSC SDP Payload for DB16 through DB18 */
@@ -5603,6 +5633,7 @@ intel_dp_detect(struct drm_connector *connector,
 	if (status == connector_status_disconnected) {
 		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
 		memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd));
+		intel_dp->psr.sink_panel_replay_support = false;
 
 		if (intel_dp->is_mst) {
 			drm_dbg_kms(&dev_priv->drm,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 107f7418ddc51..b943dbf394a22 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -44,6 +44,7 @@
 #include "intel_hdcp.h"
 #include "intel_hotplug.h"
 #include "intel_link_bw.h"
+#include "intel_psr.h"
 #include "intel_vdsc.h"
 #include "skl_scaler.h"
 
@@ -592,6 +593,8 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
 
+	intel_psr_compute_config(intel_dp, pipe_config, conn_state);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 627d821649336..3ffdeb451a836 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -173,6 +173,15 @@
  * irrelevant for normal operation.
  */
 
+bool intel_encoder_can_psr(struct intel_encoder *encoder)
+{
+	if (intel_encoder_is_dp(encoder) || encoder->type == INTEL_OUTPUT_DP_MST)
+		return CAN_PSR(enc_to_intel_dp(encoder)) ||
+		       CAN_PANEL_REPLAY(enc_to_intel_dp(encoder));
+	else
+		return false;
+}
+
 static bool psr_global_enabled(struct intel_dp *intel_dp)
 {
 	struct intel_connector *connector = intel_dp->attached_connector;
@@ -475,6 +484,25 @@ exit:
 	intel_dp->psr.su_y_granularity = y;
 }
 
+static void _panel_replay_init_dpcd(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	u8 pr_dpcd = 0;
+
+	intel_dp->psr.sink_panel_replay_support = false;
+	drm_dp_dpcd_readb(&intel_dp->aux, DP_PANEL_REPLAY_CAP, &pr_dpcd);
+
+	if (!(pr_dpcd & DP_PANEL_REPLAY_SUPPORT)) {
+		drm_dbg_kms(&i915->drm,
+			    "Panel replay is not supported by panel\n");
+		return;
+	}
+
+	drm_dbg_kms(&i915->drm,
+		    "Panel replay is supported by panel\n");
+	intel_dp->psr.sink_panel_replay_support = true;
+}
+
 static void _psr_init_dpcd(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *i915 =
@@ -524,12 +552,13 @@ static void _psr_init_dpcd(struct intel_dp *intel_dp)
 
 void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 {
+	_panel_replay_init_dpcd(intel_dp);
+
 	drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd,
 			 sizeof(intel_dp->psr_dpcd));
 
 	if (intel_dp->psr_dpcd[0])
 		_psr_init_dpcd(intel_dp);
-	/* TODO: Add PR case here */
 
 	if (intel_dp->psr.sink_psr2_support) {
 		intel_dp->psr.colorimetry_support =
@@ -1271,13 +1300,11 @@ unsupported:
 	return false;
 }
 
-void intel_psr_compute_config(struct intel_dp *intel_dp,
-			      struct intel_crtc_state *crtc_state,
-			      struct drm_connector_state *conn_state)
+static bool _psr_compute_config(struct intel_dp *intel_dp,
+				struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	const struct drm_display_mode *adjusted_mode =
-		&crtc_state->hw.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	u8 entry_setup_frames;
 
 	/*
@@ -1285,10 +1312,30 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 	 * So if VRR is enabled, do not enable PSR.
 	 */
 	if (crtc_state->vrr.enable)
-		return;
+		return false;
 
 	if (!CAN_PSR(intel_dp))
-		return;
+		return false;
+
+	entry_setup_frames = intel_psr_entry_setup_frames(intel_dp, adjusted_mode);
+
+	if (entry_setup_frames >= 0) {
+		intel_dp->psr.entry_setup_frames = entry_setup_frames;
+	} else {
+		drm_dbg_kms(&dev_priv->drm,
+			    "PSR condition failed: PSR setup timing not met\n");
+		return false;
+	}
+
+	return true;
+}
+
+void intel_psr_compute_config(struct intel_dp *intel_dp,
+			      struct intel_crtc_state *crtc_state,
+			      struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 
 	if (!psr_global_enabled(intel_dp)) {
 		drm_dbg_kms(&dev_priv->drm, "PSR disabled by flag\n");
@@ -1307,17 +1354,11 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 		return;
 	}
 
-	entry_setup_frames = intel_psr_entry_setup_frames(intel_dp, adjusted_mode);
-
-	if (entry_setup_frames >= 0) {
-		intel_dp->psr.entry_setup_frames = entry_setup_frames;
-	} else {
-		drm_dbg_kms(&dev_priv->drm,
-			    "PSR condition failed: PSR setup timing not met\n");
-		return;
-	}
+	if (CAN_PANEL_REPLAY(intel_dp))
+		crtc_state->has_panel_replay = true;
+	else
+		crtc_state->has_psr = _psr_compute_config(intel_dp, crtc_state);
 
-	crtc_state->has_psr = true;
 	crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state);
 
 	crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC);
@@ -2750,7 +2791,7 @@ void intel_psr_init(struct intel_dp *intel_dp)
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
-	if (!HAS_PSR(dev_priv))
+	if (!(HAS_PSR(dev_priv) || HAS_DP20(dev_priv)))
 		return;
 
 	/*
@@ -2768,7 +2809,10 @@ void intel_psr_init(struct intel_dp *intel_dp)
 		return;
 	}
 
-	intel_dp->psr.source_support = true;
+	if (HAS_DP20(dev_priv) && !intel_dp_is_edp(intel_dp))
+		intel_dp->psr.source_panel_replay_support = true;
+	else
+		intel_dp->psr.source_support = true;
 
 	/* Set link_standby x link_off defaults */
 	if (DISPLAY_VER(dev_priv) < 12)
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index bf35f42df6bc4..6a1f4573852b1 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -21,6 +21,13 @@ struct intel_encoder;
 struct intel_plane;
 struct intel_plane_state;
 
+#define CAN_PSR(intel_dp) ((intel_dp)->psr.sink_support && \
+			   (intel_dp)->psr.source_support)
+
+#define CAN_PANEL_REPLAY(intel_dp) ((intel_dp)->psr.sink_panel_replay_support && \
+				    (intel_dp)->psr.source_panel_replay_support)
+
+bool intel_encoder_can_psr(struct intel_encoder *encoder);
 void intel_psr_init_dpcd(struct intel_dp *intel_dp);
 void intel_psr_pre_plane_update(struct intel_atomic_state *state,
 				struct intel_crtc *crtc);
-- 
GitLab


From cceeaa312d390e4f8407c056ae27ba7edd50307e Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Wed, 8 Nov 2023 12:53:01 +0530
Subject: [PATCH 0211/2340] drm/i915/panelreplay: Enable panel replay dpcd
 initialization for DP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to similarity panel replay dpcd initialization got added in psr
function which is specific for edp panel. This patch enables panel
replay initialization for dp connector.

Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-5-animesh.manna@intel.com
---
 drivers/gpu/drm/i915/display/intel_psr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 3ffdeb451a836..3c25c8d858031 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -2794,6 +2794,9 @@ void intel_psr_init(struct intel_dp *intel_dp)
 	if (!(HAS_PSR(dev_priv) || HAS_DP20(dev_priv)))
 		return;
 
+	if (!intel_dp_is_edp(intel_dp))
+		intel_psr_init_dpcd(intel_dp);
+
 	/*
 	 * HSW spec explicitly says PSR is tied to port A.
 	 * BDW+ platforms have a instance of PSR registers per transcoder but
-- 
GitLab


From 3257e55d3ea7e35ea76ff6ae07347b803f068068 Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Wed, 8 Nov 2023 12:53:02 +0530
Subject: [PATCH 0212/2340] drm/i915/panelreplay: enable/disable panel replay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TRANS_DP2_CTL register is programmed to enable panel replay from source
and sink is enabled through panel replay dpcd configuration address.

Bspec: 1407940617

v1: Initial version.
v2:
- Use pr_* flags instead psr_* flags. [Jouni]
- Remove intel_dp_is_edp check as edp1.5 also has panel replay. [Jouni]

v3: Cover letter updated and selective fetch condition check is added
before updating its bit in PSR2_MAN_TRK_CTL register. [Jouni]

v4: Selective fetch related PSR2_MAN_TRK_CTL programmming dropped. [Jouni]

v5: Added PSR2_MAN_TRK_CTL programming as needed for Continuous Full
Frame (CFF) update.

v6: Rebased on latest.

Note: Initial plan is to enable panel replay in  full-screen live active
frame update mode. In a incremental approach panel replay will be enabled
in selctive update mode if there is any gap in curent implementation.

Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-6-animesh.manna@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c      |  7 ++-
 .../drm/i915/display/intel_display_types.h    |  1 +
 drivers/gpu/drm/i915/display/intel_psr.c      | 63 ++++++++++++++-----
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 6cec72ac6422e..0712a5200ad35 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2800,10 +2800,15 @@ static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state,
 				    const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	if (HAS_DP20(dev_priv))
+	if (HAS_DP20(dev_priv)) {
 		intel_dp_128b132b_sdp_crc16(enc_to_intel_dp(encoder),
 					    crtc_state);
+		if (crtc_state->has_panel_replay)
+			drm_dp_dpcd_writeb(&intel_dp->aux, PANEL_REPLAY_CONFIG,
+					   DP_PANEL_REPLAY_ENABLE);
+	}
 
 	if (DISPLAY_VER(dev_priv) >= 14)
 		mtl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index c83132d9dff0d..d2a15817dee2c 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1711,6 +1711,7 @@ struct intel_psr {
 	u16 su_y_granularity;
 	bool source_panel_replay_support;
 	bool sink_panel_replay_support;
+	bool panel_replay_enabled;
 	u32 dc3co_exitline;
 	u32 dc3co_exit_delay;
 	struct delayed_work dc3co_work;
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 3c25c8d858031..ea292832ca475 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -609,8 +609,11 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp)
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	u8 dpcd_val = DP_PSR_ENABLE;
 
-	/* Enable ALPM at sink for psr2 */
+	if (intel_dp->psr.panel_replay_enabled)
+		return;
+
 	if (intel_dp->psr.psr2_enabled) {
+		/* Enable ALPM at sink for psr2 */
 		drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG,
 				   DP_ALPM_ENABLE |
 				   DP_ALPM_LOCK_ERROR_IRQ_HPD_ENABLE);
@@ -783,6 +786,17 @@ static u8 frames_before_su_entry(struct intel_dp *intel_dp)
 	return frames_before_su_entry;
 }
 
+static void dg2_activate_panel_replay(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+
+	intel_de_rmw(dev_priv, PSR2_MAN_TRK_CTL(intel_dp->psr.transcoder),
+		     0, ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME);
+
+	intel_de_rmw(dev_priv, TRANS_DP2_CTL(intel_dp->psr.transcoder), 0,
+		     TRANS_DP2_PANEL_REPLAY_ENABLE);
+}
+
 static void hsw_activate_psr2(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
@@ -1379,18 +1393,23 @@ void intel_psr_get_config(struct intel_encoder *encoder,
 		return;
 
 	intel_dp = &dig_port->dp;
-	if (!CAN_PSR(intel_dp))
+	if (!(CAN_PSR(intel_dp) || CAN_PANEL_REPLAY(intel_dp)))
 		return;
 
 	mutex_lock(&intel_dp->psr.lock);
 	if (!intel_dp->psr.enabled)
 		goto unlock;
 
-	/*
-	 * Not possible to read EDP_PSR/PSR2_CTL registers as it is
-	 * enabled/disabled because of frontbuffer tracking and others.
-	 */
-	pipe_config->has_psr = true;
+	if (intel_dp->psr.panel_replay_enabled) {
+		pipe_config->has_panel_replay = true;
+	} else {
+		/*
+		 * Not possible to read EDP_PSR/PSR2_CTL registers as it is
+		 * enabled/disabled because of frontbuffer tracking and others.
+		 */
+		pipe_config->has_psr = true;
+	}
+
 	pipe_config->has_psr2 = intel_dp->psr.psr2_enabled;
 	pipe_config->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC);
 
@@ -1427,8 +1446,10 @@ static void intel_psr_activate(struct intel_dp *intel_dp)
 
 	lockdep_assert_held(&intel_dp->psr.lock);
 
-	/* psr1 and psr2 are mutually exclusive.*/
-	if (intel_dp->psr.psr2_enabled)
+	/* psr1, psr2 and panel-replay are mutually exclusive.*/
+	if (intel_dp->psr.panel_replay_enabled)
+		dg2_activate_panel_replay(intel_dp);
+	else if (intel_dp->psr.psr2_enabled)
 		hsw_activate_psr2(intel_dp);
 	else
 		hsw_activate_psr1(intel_dp);
@@ -1606,6 +1627,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
 	drm_WARN_ON(&dev_priv->drm, intel_dp->psr.enabled);
 
 	intel_dp->psr.psr2_enabled = crtc_state->has_psr2;
+	intel_dp->psr.panel_replay_enabled = crtc_state->has_panel_replay;
 	intel_dp->psr.busy_frontbuffer_bits = 0;
 	intel_dp->psr.pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
 	intel_dp->psr.transcoder = crtc_state->cpu_transcoder;
@@ -1621,8 +1643,12 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
 	if (!psr_interrupt_error_check(intel_dp))
 		return;
 
-	drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n",
-		    intel_dp->psr.psr2_enabled ? "2" : "1");
+	if (intel_dp->psr.panel_replay_enabled)
+		drm_dbg_kms(&dev_priv->drm, "Enabling Panel Replay\n");
+	else
+		drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n",
+			    intel_dp->psr.psr2_enabled ? "2" : "1");
+
 	intel_write_dp_vsc_sdp(encoder, crtc_state, &crtc_state->psr_vsc);
 	intel_snps_phy_update_psr_power_state(dev_priv, phy, true);
 	intel_psr_enable_sink(intel_dp);
@@ -1651,7 +1677,10 @@ static void intel_psr_exit(struct intel_dp *intel_dp)
 		return;
 	}
 
-	if (intel_dp->psr.psr2_enabled) {
+	if (intel_dp->psr.panel_replay_enabled) {
+		intel_de_rmw(dev_priv, TRANS_DP2_CTL(intel_dp->psr.transcoder),
+			     TRANS_DP2_PANEL_REPLAY_ENABLE, 0);
+	} else if (intel_dp->psr.psr2_enabled) {
 		tgl_disallow_dc3co_on_psr2_exit(intel_dp);
 
 		val = intel_de_rmw(dev_priv, EDP_PSR2_CTL(cpu_transcoder),
@@ -1700,8 +1729,11 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 	if (!intel_dp->psr.enabled)
 		return;
 
-	drm_dbg_kms(&dev_priv->drm, "Disabling PSR%s\n",
-		    intel_dp->psr.psr2_enabled ? "2" : "1");
+	if (intel_dp->psr.panel_replay_enabled)
+		drm_dbg_kms(&dev_priv->drm, "Disabling Panel Replay\n");
+	else
+		drm_dbg_kms(&dev_priv->drm, "Disabling PSR%s\n",
+			    intel_dp->psr.psr2_enabled ? "2" : "1");
 
 	intel_psr_exit(intel_dp);
 	intel_psr_wait_exit_locked(intel_dp);
@@ -1734,6 +1766,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 		drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, 0);
 
 	intel_dp->psr.enabled = false;
+	intel_dp->psr.panel_replay_enabled = false;
 	intel_dp->psr.psr2_enabled = false;
 	intel_dp->psr.psr2_sel_fetch_enabled = false;
 	intel_dp->psr.psr2_sel_fetch_cff_enabled = false;
@@ -2305,7 +2338,7 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state,
 		intel_atomic_get_new_crtc_state(state, crtc);
 	struct intel_encoder *encoder;
 
-	if (!crtc_state->has_psr)
+	if (!(crtc_state->has_psr || crtc_state->has_panel_replay))
 		return;
 
 	for_each_intel_encoder_mask_with_psr(state->base.dev, encoder,
-- 
GitLab


From e4ae85e364fc652ea15d85b0f3a6da304c9b5ce7 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 7 Nov 2023 10:18:01 +0000
Subject: [PATCH 0213/2340] drm/i915: Add ability for tracking buffer objects
 per client

In order to show per client memory usage lets add some infrastructure
which enables tracking buffer objects owned by clients.

We add a per client list protected by a new per client lock and to support
delayed destruction (post client exit) we make tracked objects hold
references to the owning client.

Also, object memory region teardown is moved to the existing RCU free
callback to allow safe dereference from the fdinfo RCU read section.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 13 +++++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 12 +++++++
 drivers/gpu/drm/i915/i915_drm_client.c        | 36 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_drm_client.h        | 32 +++++++++++++++++
 4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c26d875558250..25eeeb863209e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -106,6 +106,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->mm.link);
 
+#ifdef CONFIG_PROC_FS
+	INIT_LIST_HEAD(&obj->client_link);
+#endif
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	spin_lock_init(&obj->lut_lock);
 
@@ -293,6 +297,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head)
 		container_of(head, typeof(*obj), rcu);
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
+	/* We need to keep this alive for RCU read access from fdinfo. */
+	if (obj->mm.n_placements > 1)
+		kfree(obj->mm.placements);
+
 	i915_gem_object_free(obj);
 
 	GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
@@ -389,9 +397,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj)
 	if (obj->ops->release)
 		obj->ops->release(obj);
 
-	if (obj->mm.n_placements > 1)
-		kfree(obj->mm.placements);
-
 	if (obj->shares_resv_from)
 		i915_vm_resv_put(obj->shares_resv_from);
 
@@ -442,6 +447,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
 
+	i915_drm_client_remove_object(obj);
+
 	/*
 	 * Before we free the object, make sure any pure RCU-only
 	 * read-side critical sections are complete, e.g.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2292404007c81..0c5cdab278b69 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -302,6 +302,18 @@ struct drm_i915_gem_object {
 	 */
 	struct i915_address_space *shares_resv_from;
 
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @client: @i915_drm_client which created the object
+	 */
+	struct i915_drm_client *client;
+
+	/**
+	 * @client_link: Link into @i915_drm_client.objects_list
+	 */
+	struct list_head client_link;
+#endif
+
 	union {
 		struct rcu_head rcu;
 		struct llist_node freed;
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 2a44b3876cb51..2e5e69edc0f9a 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void)
 	kref_init(&client->kref);
 	spin_lock_init(&client->ctx_lock);
 	INIT_LIST_HEAD(&client->ctx_list);
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&client->objects_lock);
+	INIT_LIST_HEAD(&client->objects_list);
+#endif
 
 	return client;
 }
@@ -108,4 +112,36 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
 		show_client_class(p, i915, file_priv->client, i);
 }
+
+void i915_drm_client_add_object(struct i915_drm_client *client,
+				struct drm_i915_gem_object *obj)
+{
+	unsigned long flags;
+
+	GEM_WARN_ON(obj->client);
+	GEM_WARN_ON(!list_empty(&obj->client_link));
+
+	spin_lock_irqsave(&client->objects_lock, flags);
+	obj->client = i915_drm_client_get(client);
+	list_add_tail_rcu(&obj->client_link, &client->objects_list);
+	spin_unlock_irqrestore(&client->objects_lock, flags);
+}
+
+bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+	struct i915_drm_client *client = fetch_and_zero(&obj->client);
+	unsigned long flags;
+
+	/* Object may not be associated with a client. */
+	if (!client)
+		return false;
+
+	spin_lock_irqsave(&client->objects_lock, flags);
+	list_del_rcu(&obj->client_link);
+	spin_unlock_irqrestore(&client->objects_lock, flags);
+
+	i915_drm_client_put(client);
+
+	return true;
+}
 #endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 67816c912bca1..5f58fdf7dcb8c 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -12,6 +12,9 @@
 
 #include <uapi/drm/i915_drm.h>
 
+#include "i915_file_private.h"
+#include "gem/i915_gem_object_types.h"
+
 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
 struct drm_file;
@@ -25,6 +28,20 @@ struct i915_drm_client {
 	spinlock_t ctx_lock; /* For add/remove from ctx_list. */
 	struct list_head ctx_list; /* List of contexts belonging to client. */
 
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @objects_lock: lock protecting @objects_list
+	 */
+	spinlock_t objects_lock;
+
+	/**
+	 * @objects_list: list of objects created by this client
+	 *
+	 * Protected by @objects_lock.
+	 */
+	struct list_head objects_list;
+#endif
+
 	/**
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
@@ -49,4 +66,19 @@ struct i915_drm_client *i915_drm_client_alloc(void);
 
 void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
 
+#ifdef CONFIG_PROC_FS
+void i915_drm_client_add_object(struct i915_drm_client *client,
+				struct drm_i915_gem_object *obj);
+bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+#else
+static inline void i915_drm_client_add_object(struct i915_drm_client *client,
+					      struct drm_i915_gem_object *obj)
+{
+}
+
+static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+}
+#endif
+
 #endif /* !__I915_DRM_CLIENT_H__ */
-- 
GitLab


From ca02a0119f814b792484cba0c148fba292327ed6 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 7 Nov 2023 10:18:02 +0000
Subject: [PATCH 0214/2340] drm/i915: Record which client owns a VM

To enable accounting of indirect client memory usage (such as page tables)
in the following patch, lets start recording the creator of each PPGTT.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-2-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c       | 11 ++++++++---
 drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  3 +++
 drivers/gpu/drm/i915/gem/selftests/mock_context.c |  4 ++--
 drivers/gpu/drm/i915/gt/intel_gtt.h               |  1 +
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e38f06a6e56eb..dcbfe32fd30c3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -279,7 +279,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
 }
 
 static struct i915_gem_proto_context *
-proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+proto_context_create(struct drm_i915_file_private *fpriv,
+		     struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gem_proto_context *pc, *err;
 
@@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags)
 	if (!pc)
 		return ERR_PTR(-ENOMEM);
 
+	pc->fpriv = fpriv;
 	pc->num_user_engines = -1;
 	pc->user_engines = NULL;
 	pc->user_flags = BIT(UCONTEXT_BANNABLE) |
@@ -1622,6 +1624,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
 			err = PTR_ERR(ppgtt);
 			goto err_ctx;
 		}
+		ppgtt->vm.fpriv = pc->fpriv;
 		vm = &ppgtt->vm;
 	}
 	if (vm)
@@ -1741,7 +1744,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	/* 0 reserved for invalid/unassigned ppgtt */
 	xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1);
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(file_priv, i915, 0);
 	if (IS_ERR(pc)) {
 		err = PTR_ERR(pc);
 		goto err;
@@ -1823,6 +1826,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 
 	GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
 	args->vm_id = id;
+	ppgtt->vm.fpriv = file_priv;
 	return 0;
 
 err_put:
@@ -2285,7 +2289,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ext_data.pc = proto_context_create(i915, args->flags);
+	ext_data.pc = proto_context_create(file->driver_priv, i915,
+					   args->flags);
 	if (IS_ERR(ext_data.pc))
 		return PTR_ERR(ext_data.pc);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index cb78214a7dcdb..c573c067779f5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -188,6 +188,9 @@ struct i915_gem_proto_engine {
  * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
  */
 struct i915_gem_proto_context {
+	/** @fpriv: Client which creates the context */
+	struct drm_i915_file_private *fpriv;
+
 	/** @vm: See &i915_gem_context.vm */
 	struct i915_address_space *vm;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index e199d7dbb8769..2b0327cc47c2d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file *file)
 	int err;
 	u32 id;
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(fpriv, i915, 0);
 	if (IS_ERR(pc))
 		return ERR_CAST(pc);
 
@@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915,
 	struct i915_gem_context *ctx;
 	struct i915_gem_proto_context *pc;
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(NULL, i915, 0);
 	if (IS_ERR(pc))
 		return ERR_CAST(pc);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 028a5a988eea0..6b85222ee3ea6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -255,6 +255,7 @@ struct i915_address_space {
 	} rsvd;
 	struct intel_gt *gt;
 	struct drm_i915_private *i915;
+	struct drm_i915_file_private *fpriv;
 	struct device *dma;
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
-- 
GitLab


From 978e1a52ca1f0228eccc51ad5ed3a118bac1ad1c Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 7 Nov 2023 10:18:03 +0000
Subject: [PATCH 0215/2340] drm/i915: Track page table backing store usage

Account page table backing store against the owning client memory usage
stats.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-3-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 21719563a602a..86f73fe558ca6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
 	if (!IS_ERR(obj)) {
 		obj->base.resv = i915_vm_resv_get(vm);
 		obj->shares_resv_from = vm;
+
+		if (vm->fpriv)
+			i915_drm_client_add_object(vm->fpriv->client, obj);
 	}
 
 	return obj;
@@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
 	if (!IS_ERR(obj)) {
 		obj->base.resv = i915_vm_resv_get(vm);
 		obj->shares_resv_from = vm;
+
+		if (vm->fpriv)
+			i915_drm_client_add_object(vm->fpriv->client, obj);
 	}
 
 	return obj;
-- 
GitLab


From dc1a2775070f0618b661500310b2ea8643592ed1 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 7 Nov 2023 10:18:04 +0000
Subject: [PATCH 0216/2340] drm/i915: Account ring buffer and context state
 storage

Account ring buffers and logical context space against the owning client
memory usage stats.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-4-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gt/intel_context.c | 14 ++++++++++++++
 drivers/gpu/drm/i915/i915_drm_client.c  | 10 ++++++++++
 drivers/gpu/drm/i915/i915_drm_client.h  |  9 +++++++++
 3 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index a53b26178f0ad..a2f1245741bbf 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 
+#include "i915_drm_client.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 
@@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine)
 
 int intel_context_alloc_state(struct intel_context *ce)
 {
+	struct i915_gem_context *ctx;
 	int err = 0;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
@@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce)
 			goto unlock;
 
 		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+		rcu_read_lock();
+		ctx = rcu_dereference(ce->gem_context);
+		if (ctx && !kref_get_unless_zero(&ctx->ref))
+			ctx = NULL;
+		rcu_read_unlock();
+		if (ctx) {
+			if (ctx->client)
+				i915_drm_client_add_context_objects(ctx->client,
+								    ce);
+			i915_gem_context_put(ctx);
+		}
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 2e5e69edc0f9a..a61356012df80 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -144,4 +144,14 @@ bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
 
 	return true;
 }
+
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+					 struct intel_context *ce)
+{
+	if (ce->state)
+		i915_drm_client_add_object(client, ce->state->obj);
+
+	if (ce->ring != ce->engine->legacy.ring && ce->ring->vma)
+		i915_drm_client_add_object(client, ce->ring->vma->obj);
+}
 #endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 5f58fdf7dcb8c..69cedfcd3d69e 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -14,6 +14,7 @@
 
 #include "i915_file_private.h"
 #include "gem/i915_gem_object_types.h"
+#include "gt/intel_context_types.h"
 
 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
@@ -70,6 +71,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
 void i915_drm_client_add_object(struct i915_drm_client *client,
 				struct drm_i915_gem_object *obj);
 bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+					 struct intel_context *ce);
 #else
 static inline void i915_drm_client_add_object(struct i915_drm_client *client,
 					      struct drm_i915_gem_object *obj)
@@ -79,6 +82,12 @@ static inline void i915_drm_client_add_object(struct i915_drm_client *client,
 static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
 {
 }
+
+static inline void
+i915_drm_client_add_context_objects(struct i915_drm_client *client,
+				    struct intel_context *ce)
+{
+}
 #endif
 
 #endif /* !__I915_DRM_CLIENT_H__ */
-- 
GitLab


From 3b38d35157530c12c84fc02cccd469b9a0a00ae7 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 7 Nov 2023 10:18:05 +0000
Subject: [PATCH 0217/2340] drm/i915: Add stable memory region names

At the moment memory region names are a bit too varied and too
inconsistent to be used for ABI purposes, like for upcoming fdinfo
memory stats.

System memory can be either system or system-ttm. Local memory has the
instance number appended, others do not. Not only incosistent but thi
kind of implementation detail is uninteresting for intended users of
fdinfo memory stats.

Add a stable name always formed as $type$instance. Could have chosen a
different stable scheme, but I think any consistent and stable scheme
should do just fine.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-5-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/intel_memory_region.c | 19 +++++++++++++++++++
 drivers/gpu/drm/i915/intel_memory_region.h |  1 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 3d1fdea9811d0..60a03340bbd43 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem,
 	return err;
 }
 
+static const char *region_type_str(u16 type)
+{
+	switch (type) {
+	case INTEL_MEMORY_SYSTEM:
+		return "system";
+	case INTEL_MEMORY_LOCAL:
+		return "local";
+	case INTEL_MEMORY_STOLEN_LOCAL:
+		return "stolen-local";
+	case INTEL_MEMORY_STOLEN_SYSTEM:
+		return "stolen-system";
+	default:
+		return "unknown";
+	}
+}
+
 struct intel_memory_region *
 intel_memory_region_create(struct drm_i915_private *i915,
 			   resource_size_t start,
@@ -244,6 +260,9 @@ intel_memory_region_create(struct drm_i915_private *i915,
 	mem->type = type;
 	mem->instance = instance;
 
+	snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u",
+		 region_type_str(type), instance);
+
 	mutex_init(&mem->objects.lock);
 	INIT_LIST_HEAD(&mem->objects.list);
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 2953ed5c32486..9ba36454e51b0 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -80,6 +80,7 @@ struct intel_memory_region {
 	u16 instance;
 	enum intel_region_id id;
 	char name[16];
+	char uabi_name[16];
 	bool private; /* not for userspace */
 
 	struct {
-- 
GitLab


From 968853033d8aa4dbb80fbafa6f5d9b6a0ea21272 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 7 Nov 2023 10:18:06 +0000
Subject: [PATCH 0218/2340] drm/i915: Implement fdinfo memory stats printing

Use the newly added drm_print_memory_stats helper to show memory
utilisation of our objects in drm/driver specific fdinfo output.

To collect the stats we walk the per memory regions object lists
and accumulate object size into the respective drm_memory_stats
categories.

v2:
 * Only account against the active region.
 * Use DMA_RESV_USAGE_BOOKKEEP when testing for active. (Tejas)

v3:
 * Update commit text. (Aravind)
 * Update to use memory regions uabi names.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-6-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drm_client.c | 64 ++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index a61356012df80..7efffdaa508da 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -45,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref)
 }
 
 #ifdef CONFIG_PROC_FS
+static void
+obj_meminfo(struct drm_i915_gem_object *obj,
+	    struct drm_memory_stats stats[INTEL_REGION_UNKNOWN])
+{
+	const enum intel_region_id id = obj->mm.region ?
+					obj->mm.region->id : INTEL_REGION_SMEM;
+	const u64 sz = obj->base.size;
+
+	if (obj->base.handle_count > 1)
+		stats[id].shared += sz;
+	else
+		stats[id].private += sz;
+
+	if (i915_gem_object_has_pages(obj)) {
+		stats[id].resident += sz;
+
+		if (!dma_resv_test_signaled(obj->base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP))
+			stats[id].active += sz;
+		else if (i915_gem_object_is_shrinkable(obj) &&
+			 obj->mm.madv == I915_MADV_DONTNEED)
+			stats[id].purgeable += sz;
+	}
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {};
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_drm_client *client = fpriv->client;
+	struct drm_i915_private *i915 = fpriv->i915;
+	struct drm_i915_gem_object *obj;
+	struct intel_memory_region *mr;
+	struct list_head *pos;
+	unsigned int id;
+
+	/* Public objects. */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, obj, id)
+		obj_meminfo(obj, stats);
+	spin_unlock(&file->table_lock);
+
+	/* Internal objects. */
+	rcu_read_lock();
+	list_for_each_rcu(pos, &client->objects_list) {
+		obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj),
+							 client_link));
+		if (!obj)
+			continue;
+		obj_meminfo(obj, stats);
+		i915_gem_object_put(obj);
+	}
+	rcu_read_unlock();
+
+	for_each_memory_region(mr, i915, id)
+		drm_print_memory_stats(p,
+				       &stats[id],
+				       DRM_GEM_OBJECT_RESIDENT |
+				       DRM_GEM_OBJECT_PURGEABLE,
+				       mr->uabi_name);
+}
+
 static const char * const uabi_class_names[] = {
 	[I915_ENGINE_CLASS_RENDER] = "render",
 	[I915_ENGINE_CLASS_COPY] = "copy",
@@ -106,6 +168,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 	 * ******************************************************************
 	 */
 
+	show_meminfo(p, file);
+
 	if (GRAPHICS_VER(i915) < 8)
 		return;
 
-- 
GitLab


From 22aa1a209018dc2eca78745f7666db63637cd5dc Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 2 Nov 2023 15:15:07 +0100
Subject: [PATCH 0219/2340] drm/panfrost: Really power off GPU cores in
 panfrost_gpu_power_off()

The layout of the registers {TILER,SHADER,L2}_PWROFF_LO, used to request
powering off cores, is the same as the {TILER,SHADER,L2}_PWRON_LO ones:
this means that in order to request poweroff of cores, we are supposed
to write a bitmask of cores that should be powered off!
This means that the panfrost_gpu_power_off() function has always been
doing nothing.

Fix powering off the GPU by writing a bitmask of the cores to poweroff
to the relevant PWROFF_LO registers and then check that the transition
(from ON to OFF) has finished by polling the relevant PWRTRANS_LO
registers.

While at it, in order to avoid code duplication, move the core mask
logic from panfrost_gpu_power_on() to a new panfrost_get_core_mask()
function, used in both poweron and poweroff.

Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102141507.73481-1-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_gpu.c | 64 ++++++++++++++++++-------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index f0be7e19b13ed..97f097ee5c1da 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -362,28 +362,38 @@ unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev)
 	return ((u64)hi << 32) | lo;
 }
 
+static u64 panfrost_get_core_mask(struct panfrost_device *pfdev)
+{
+	u64 core_mask;
+
+	if (pfdev->features.l2_present == 1)
+		return U64_MAX;
+
+	/*
+	 * Only support one core group now.
+	 * ~(l2_present - 1) unsets all bits in l2_present except
+	 * the bottom bit. (l2_present - 2) has all the bits in
+	 * the first core group set. AND them together to generate
+	 * a mask of cores in the first core group.
+	 */
+	core_mask = ~(pfdev->features.l2_present - 1) &
+		     (pfdev->features.l2_present - 2);
+	dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n",
+		      hweight64(core_mask),
+		      hweight64(pfdev->features.shader_present));
+
+	return core_mask;
+}
+
 void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 {
 	int ret;
 	u32 val;
-	u64 core_mask = U64_MAX;
+	u64 core_mask;
 
 	panfrost_gpu_init_quirks(pfdev);
+	core_mask = panfrost_get_core_mask(pfdev);
 
-	if (pfdev->features.l2_present != 1) {
-		/*
-		 * Only support one core group now.
-		 * ~(l2_present - 1) unsets all bits in l2_present except
-		 * the bottom bit. (l2_present - 2) has all the bits in
-		 * the first core group set. AND them together to generate
-		 * a mask of cores in the first core group.
-		 */
-		core_mask = ~(pfdev->features.l2_present - 1) &
-			     (pfdev->features.l2_present - 2);
-		dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n",
-			      hweight64(core_mask),
-			      hweight64(pfdev->features.shader_present));
-	}
 	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
 		val, val == (pfdev->features.l2_present & core_mask),
@@ -408,9 +418,27 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 
 void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 {
-	gpu_write(pfdev, TILER_PWROFF_LO, 0);
-	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
-	gpu_write(pfdev, L2_PWROFF_LO, 0);
+	u64 core_mask = panfrost_get_core_mask(pfdev);
+	int ret;
+	u32 val;
+
+	gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present & core_mask);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
+					 val, !val, 1, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "shader power transition timeout");
+
+	gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
+					 val, !val, 1, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "tiler power transition timeout");
+
+	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present & core_mask);
+	ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
+				 val, !val, 0, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "l2 power transition timeout");
 }
 
 int panfrost_gpu_init(struct panfrost_device *pfdev)
-- 
GitLab


From 57d4e26717b030fd794df3534e6b2e806eb761e4 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 9 Nov 2023 11:25:38 +0100
Subject: [PATCH 0220/2340] drm/panfrost: Perform hard reset to recover GPU if
 soft reset fails

Even though soft reset should ideally never fail, during development of
some power management features I managed to get some bits wrong: this
resulted in GPU soft reset failures, where the GPU was never able to
recover, not even after suspend/resume cycles, meaning that the only
way to get functionality back was to reboot the machine.

Perform a hard reset after a soft reset failure to be able to recover
the GPU during runtime (so, without any machine reboot).

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109102543.42971-2-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_gpu.c  | 13 ++++++++++---
 drivers/gpu/drm/panfrost/panfrost_regs.h |  1 +
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 97f097ee5c1da..c9469bf7770a4 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -60,14 +60,21 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
 
 	gpu_write(pfdev, GPU_INT_MASK, 0);
 	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
-	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
 
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT,
 		val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000);
 
 	if (ret) {
-		dev_err(pfdev->dev, "gpu soft reset timed out\n");
-		return ret;
+		dev_err(pfdev->dev, "gpu soft reset timed out, attempting hard reset\n");
+
+		gpu_write(pfdev, GPU_CMD, GPU_CMD_HARD_RESET);
+		ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, val,
+						 val & GPU_IRQ_RESET_COMPLETED, 100, 10000);
+		if (ret) {
+			dev_err(pfdev->dev, "gpu hard reset timed out\n");
+			return ret;
+		}
 	}
 
 	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 55ec807550b3f..c25743b05c551 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -44,6 +44,7 @@
 	 GPU_IRQ_MULTIPLE_FAULT)
 #define GPU_CMD				0x30
 #define   GPU_CMD_SOFT_RESET		0x01
+#define   GPU_CMD_HARD_RESET		0x02
 #define   GPU_CMD_PERFCNT_CLEAR		0x03
 #define   GPU_CMD_PERFCNT_SAMPLE	0x04
 #define   GPU_CMD_CYCLE_COUNT_START	0x05
-- 
GitLab


From 4d74420ffcf4c99165908f058066dd242813dc75 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 9 Nov 2023 11:25:39 +0100
Subject: [PATCH 0221/2340] drm/panfrost: Tighten polling for soft reset and
 power on

In many cases, soft reset takes more than 1 microsecond, but definitely
less than 10; moreover in the poweron flow, tilers, shaders and l2 will
become ready (each) in less than 10 microseconds as well.

Even in the cases (at least on my platforms, rarely) in which those take
more than 10 microseconds, it's very unlikely to see both soft reset and
poweron to take more than 70 microseconds.

Shorten the polling delay to 10 microseconds to consistently reduce the
runtime resume time of the GPU.

As an indicative example, measurements taken on a MediaTek MT8195 SoC

Average runtime resume time in nanoseconds before this commit:
GDM, user selection up/down:            88435ns
GDM, Text Entry (typing user/password): 91489ns
GNOME Desktop, idling, GKRELLM running: 73200ns

After this commit:

GDM: user selection up/down:            26690ns
GDM: Text Entry (typing user/password): 27917ns
GNOME Desktop, idling, GKRELLM running:	25304ns

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109102543.42971-3-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_gpu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index c9469bf7770a4..09f5e1563ebd4 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -63,7 +63,7 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
 
 	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT,
-		val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000);
+		val, val & GPU_IRQ_RESET_COMPLETED, 10, 10000);
 
 	if (ret) {
 		dev_err(pfdev->dev, "gpu soft reset timed out, attempting hard reset\n");
@@ -404,7 +404,7 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
 		val, val == (pfdev->features.l2_present & core_mask),
-		100, 20000);
+		10, 20000);
 	if (ret)
 		dev_err(pfdev->dev, "error powering up gpu L2");
 
@@ -412,13 +412,13 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 		  pfdev->features.shader_present & core_mask);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO,
 		val, val == (pfdev->features.shader_present & core_mask),
-		100, 20000);
+		10, 20000);
 	if (ret)
 		dev_err(pfdev->dev, "error powering up gpu shader");
 
 	gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO,
-		val, val == pfdev->features.tiler_present, 100, 1000);
+		val, val == pfdev->features.tiler_present, 10, 1000);
 	if (ret)
 		dev_err(pfdev->dev, "error powering up gpu tiler");
 }
-- 
GitLab


From 56e76c0179185568049913257c18069293f8bde9 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 9 Nov 2023 11:25:40 +0100
Subject: [PATCH 0222/2340] drm/panfrost: Implement ability to turn on/off GPU
 clocks in suspend

Currently, the GPU is being internally powered off for runtime suspend
and turned back on for runtime resume through commands sent to it, but
note that the GPU doesn't need to be clocked during the poweroff state,
hence it is possible to save some power on selected platforms.

Add suspend and resume handlers for full system sleep and then add
a new panfrost_gpu_pm enumeration and a pm_features variable in the
panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
enable this power saving technique only on SoCs that are able to
safely use it.

Note that this was implemented only for the system sleep case and not
for runtime PM because testing on one of my MediaTek platforms showed
issues when turning on and off clocks aggressively (in PM runtime)
resulting in a full system lockup.

Doing this only for full system sleep never showed issues during my
testing by suspending and resuming the system continuously for more
than 100 cycles.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109102543.42971-4-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
 drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 28f7046e1b1a4..b4ddbc3b80698 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
 	panfrost_job_enable_interrupts(pfdev);
 }
 
-static int panfrost_device_resume(struct device *dev)
+static int panfrost_device_runtime_resume(struct device *dev)
 {
 	struct panfrost_device *pfdev = dev_get_drvdata(dev);
 
@@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev)
 	return 0;
 }
 
-static int panfrost_device_suspend(struct device *dev)
+static int panfrost_device_runtime_suspend(struct device *dev)
 {
 	struct panfrost_device *pfdev = dev_get_drvdata(dev);
 
@@ -426,5 +426,58 @@ static int panfrost_device_suspend(struct device *dev)
 	return 0;
 }
 
-EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
-			      panfrost_device_resume, NULL);
+static int panfrost_device_resume(struct device *dev)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+	int ret;
+
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
+		ret = clk_enable(pfdev->clock);
+		if (ret)
+			return ret;
+
+		if (pfdev->bus_clock) {
+			ret = clk_enable(pfdev->bus_clock);
+			if (ret)
+				goto err_bus_clk;
+		}
+	}
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret)
+		goto err_resume;
+
+	return 0;
+
+err_resume:
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
+		clk_disable(pfdev->bus_clock);
+err_bus_clk:
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
+		clk_disable(pfdev->clock);
+	return ret;
+}
+
+static int panfrost_device_suspend(struct device *dev)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
+		if (pfdev->bus_clock)
+			clk_disable(pfdev->bus_clock);
+
+		clk_disable(pfdev->clock);
+	}
+
+	return 0;
+}
+
+EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
+	RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL)
+	SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
+};
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 1e85656dc2f7f..32453b97c39cb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -25,6 +25,14 @@ struct panfrost_perfcnt;
 #define NUM_JOB_SLOTS 3
 #define MAX_PM_DOMAINS 5
 
+/**
+ * enum panfrost_gpu_pm - Supported kernel power management features
+ * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
+ */
+enum panfrost_gpu_pm {
+	GPU_PM_CLK_DIS,
+};
+
 struct panfrost_features {
 	u16 id;
 	u16 revision;
@@ -75,6 +83,9 @@ struct panfrost_compatible {
 
 	/* Vendor implementation quirks callback */
 	void (*vendor_quirk)(struct panfrost_device *pfdev);
+
+	/* Allowed PM features */
+	u8 pm_features;
 };
 
 struct panfrost_device {
-- 
GitLab


From 32f175d4261a2c940fe7913f509f9f41c105b2b6 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 9 Nov 2023 11:25:41 +0100
Subject: [PATCH 0223/2340] drm/panfrost: Set clocks on/off during system sleep
 on MediaTek SoCs

All of the MediaTek SoCs supported by Panfrost can switch the clocks
off and on during system sleep to save some power without any user
experience penalty.

Measurements taken on multiple MediaTek SoCs (MT8183/8186/8192/8195)
show that adding this will not prolong the time that is required to
resume the system in any meaningful way.

As an example, for MT8195 - a "before" with only runtime PM operations
(so, without turning on/off GPU clocks), and an "after" executing both
the system sleep .resume() handler and .runtime_resume() (so the time
refers to T_Resume + T_Runtime_Resume):

Average Panfrost-only system sleep resume time, before: ~28000ns
Average Panfrost-only system sleep resume time, after:  ~33500ns

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109102543.42971-5-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 54d1c19bea84d..6d2897c03a8f6 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -730,6 +730,7 @@ static const struct panfrost_compatible mediatek_mt8183_b_data = {
 	.supply_names = mediatek_mt8183_b_supplies,
 	.num_pm_domains = ARRAY_SIZE(mediatek_mt8183_pm_domains),
 	.pm_domain_names = mediatek_mt8183_pm_domains,
+	.pm_features = BIT(GPU_PM_CLK_DIS),
 };
 
 static const char * const mediatek_mt8186_pm_domains[] = { "core0", "core1" };
@@ -738,6 +739,7 @@ static const struct panfrost_compatible mediatek_mt8186_data = {
 	.supply_names = mediatek_mt8183_b_supplies,
 	.num_pm_domains = ARRAY_SIZE(mediatek_mt8186_pm_domains),
 	.pm_domain_names = mediatek_mt8186_pm_domains,
+	.pm_features = BIT(GPU_PM_CLK_DIS),
 };
 
 static const char * const mediatek_mt8192_supplies[] = { "mali", NULL };
@@ -748,6 +750,7 @@ static const struct panfrost_compatible mediatek_mt8192_data = {
 	.supply_names = mediatek_mt8192_supplies,
 	.num_pm_domains = ARRAY_SIZE(mediatek_mt8192_pm_domains),
 	.pm_domain_names = mediatek_mt8192_pm_domains,
+	.pm_features = BIT(GPU_PM_CLK_DIS),
 };
 
 static const struct of_device_id dt_match[] = {
-- 
GitLab


From 889a2b06f823575f4e8def0fe60d294365b0e1f9 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 9 Nov 2023 11:25:42 +0100
Subject: [PATCH 0224/2340] drm/panfrost: Implement ability to turn on/off
 regulators in suspend

Some platforms/SoCs can power off the GPU entirely by completely cutting
off power, greatly enhancing battery time during system suspend: add a
new pm_feature GPU_PM_VREG_OFF to allow turning off the GPU regulators
during full suspend only on selected platforms.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109102543.42971-6-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_device.c | 19 ++++++++++++++++++-
 drivers/gpu/drm/panfrost/panfrost_device.h |  2 ++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index b4ddbc3b80698..c90ad5ee34e7a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -431,10 +431,21 @@ static int panfrost_device_resume(struct device *dev)
 	struct panfrost_device *pfdev = dev_get_drvdata(dev);
 	int ret;
 
+	if (pfdev->comp->pm_features & BIT(GPU_PM_VREG_OFF)) {
+		unsigned long freq = pfdev->pfdevfreq.fast_rate;
+		struct dev_pm_opp *opp;
+
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		if (IS_ERR(opp))
+			return PTR_ERR(opp);
+		dev_pm_opp_set_opp(dev, opp);
+		dev_pm_opp_put(opp);
+	}
+
 	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
 		ret = clk_enable(pfdev->clock);
 		if (ret)
-			return ret;
+			goto err_clk;
 
 		if (pfdev->bus_clock) {
 			ret = clk_enable(pfdev->bus_clock);
@@ -455,6 +466,9 @@ err_resume:
 err_bus_clk:
 	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
 		clk_disable(pfdev->clock);
+err_clk:
+	if (pfdev->comp->pm_features & BIT(GPU_PM_VREG_OFF))
+		dev_pm_opp_set_opp(dev, NULL);
 	return ret;
 }
 
@@ -474,6 +488,9 @@ static int panfrost_device_suspend(struct device *dev)
 		clk_disable(pfdev->clock);
 	}
 
+	if (pfdev->comp->pm_features & BIT(GPU_PM_VREG_OFF))
+		dev_pm_opp_set_opp(dev, NULL);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 32453b97c39cb..7de77990a2217 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -28,9 +28,11 @@ struct panfrost_perfcnt;
 /**
  * enum panfrost_gpu_pm - Supported kernel power management features
  * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
+ * @GPU_PM_VREG_OFF: Allow turning off regulators during system suspend
  */
 enum panfrost_gpu_pm {
 	GPU_PM_CLK_DIS,
+	GPU_PM_VREG_OFF,
 };
 
 struct panfrost_features {
-- 
GitLab


From 540527b1385fb203cc4513ca838b4de60bbbc49a Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Thu, 9 Nov 2023 11:25:43 +0100
Subject: [PATCH 0225/2340] drm/panfrost: Set regulators on/off during system
 sleep on MediaTek SoCs

All of the MediaTek SoCs supported by Panfrost can completely cut power
to the GPU during full system sleep without any user-noticeable delay
in the resume operation, as shown by measurements taken on multiple
MediaTek SoCs (MT8183/86/92/95).

As an example, for MT8195 - a "before" with only runtime PM operations
(so, without turning on/off regulators), and an "after" executing both
the system sleep .resume() handler and .runtime_resume() (so the time
refers to T_Resume + T_Runtime_Resume):

Average Panfrost-only system sleep resume time, before: ~33500ns
Average Panfrost-only system sleep resume time, after:  ~336200ns

Keep in mind that this additional ~308200 nanoseconds delay happens only
in resume from a full system suspend, and not in runtime PM operations,
hence it is acceptable.

Measurements were also taken on MT8186, showing a delay of ~312000 ns.

Testing of this happened on all of the aforementioned MediaTek SoCs, but:
MT8183 got tested only by KernelCI with <=10 suspend/resume cycles
MT8186, MT8192, MT8195 were tested manually with over 100 suspend/resume
cycles with GNOME DE (Mutter + Wayland).

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109102543.42971-7-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 6d2897c03a8f6..7a628e89d379a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -730,7 +730,7 @@ static const struct panfrost_compatible mediatek_mt8183_b_data = {
 	.supply_names = mediatek_mt8183_b_supplies,
 	.num_pm_domains = ARRAY_SIZE(mediatek_mt8183_pm_domains),
 	.pm_domain_names = mediatek_mt8183_pm_domains,
-	.pm_features = BIT(GPU_PM_CLK_DIS),
+	.pm_features = BIT(GPU_PM_CLK_DIS) | BIT(GPU_PM_VREG_OFF),
 };
 
 static const char * const mediatek_mt8186_pm_domains[] = { "core0", "core1" };
@@ -739,7 +739,7 @@ static const struct panfrost_compatible mediatek_mt8186_data = {
 	.supply_names = mediatek_mt8183_b_supplies,
 	.num_pm_domains = ARRAY_SIZE(mediatek_mt8186_pm_domains),
 	.pm_domain_names = mediatek_mt8186_pm_domains,
-	.pm_features = BIT(GPU_PM_CLK_DIS),
+	.pm_features = BIT(GPU_PM_CLK_DIS) | BIT(GPU_PM_VREG_OFF),
 };
 
 static const char * const mediatek_mt8192_supplies[] = { "mali", NULL };
@@ -750,7 +750,7 @@ static const struct panfrost_compatible mediatek_mt8192_data = {
 	.supply_names = mediatek_mt8192_supplies,
 	.num_pm_domains = ARRAY_SIZE(mediatek_mt8192_pm_domains),
 	.pm_domain_names = mediatek_mt8192_pm_domains,
-	.pm_features = BIT(GPU_PM_CLK_DIS),
+	.pm_features = BIT(GPU_PM_CLK_DIS) | BIT(GPU_PM_VREG_OFF),
 };
 
 static const struct of_device_id dt_match[] = {
-- 
GitLab


From 88a6e46cd3e33756b168c7f2366bf7029a16da56 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 9 Nov 2023 18:07:21 +0200
Subject: [PATCH 0226/2340] drm/i915: abstract plane protection check

Centralize the conditions in a function.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109160722.3372379-1-jani.nikula@intel.com
---
 .../gpu/drm/i915/display/skl_universal_plane.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 68035675ae3ca..98acf25a5ca30 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -1866,6 +1866,19 @@ static bool pxp_is_borked(struct drm_i915_gem_object *obj)
 	return i915_gem_object_is_protected(obj) && !bo_has_valid_encryption(obj);
 }
 
+static void check_protection(struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+
+	if (DISPLAY_VER(i915) < 11)
+		return;
+
+	plane_state->decrypt = bo_has_valid_encryption(intel_fb_obj(fb));
+	plane_state->force_black = pxp_is_borked(intel_fb_obj(fb));
+}
+
 static int skl_plane_check(struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *plane_state)
 {
@@ -1910,10 +1923,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	if (DISPLAY_VER(dev_priv) >= 11) {
-		plane_state->decrypt = bo_has_valid_encryption(intel_fb_obj(fb));
-		plane_state->force_black = pxp_is_borked(intel_fb_obj(fb));
-	}
+	check_protection(plane_state);
 
 	/* HW only has 8 bits pixel precision, disable plane if invisible */
 	if (!(plane_state->hw.alpha >> 8))
-- 
GitLab


From 9b1c97fc0ce6090c328b5723250f4deeefc95fcd Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 9 Nov 2023 18:07:22 +0200
Subject: [PATCH 0227/2340] drm/i915: remove excess functions from plane
 protection check

Reduce the function calls by reusing ->decrypt.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109160722.3372379-2-jani.nikula@intel.com
---
 .../gpu/drm/i915/display/skl_universal_plane.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 98acf25a5ca30..39499a0ec6c01 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -1854,29 +1854,19 @@ static bool skl_fb_scalable(const struct drm_framebuffer *fb)
 	}
 }
 
-static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
-	return intel_pxp_key_check(i915->pxp, obj, false) == 0;
-}
-
-static bool pxp_is_borked(struct drm_i915_gem_object *obj)
-{
-	return i915_gem_object_is_protected(obj) && !bo_has_valid_encryption(obj);
-}
-
 static void check_protection(struct intel_plane_state *plane_state)
 {
 	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	if (DISPLAY_VER(i915) < 11)
 		return;
 
-	plane_state->decrypt = bo_has_valid_encryption(intel_fb_obj(fb));
-	plane_state->force_black = pxp_is_borked(intel_fb_obj(fb));
+	plane_state->decrypt = intel_pxp_key_check(i915->pxp, obj, false) == 0;
+	plane_state->force_black = i915_gem_object_is_protected(obj) &&
+		!plane_state->decrypt;
 }
 
 static int skl_plane_check(struct intel_crtc_state *crtc_state,
-- 
GitLab


From ff5a55a3e80eba616f4d06fa255bfe4f8b032bec Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 10 Nov 2023 13:48:06 +0200
Subject: [PATCH 0228/2340] MAINTAINERS: update drm/i915 W: and B: entries

The 01.org page has ceased to exist, and the relevant documentation is
now hosted at https://drm.pages.freedesktop.org/intel-docs/

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110114807.3455739-1-jani.nikula@intel.com
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e05506ea89170..d42116703f2f9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10473,9 +10473,9 @@ M:	Rodrigo Vivi <rodrigo.vivi@intel.com>
 M:	Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
 L:	intel-gfx@lists.freedesktop.org
 S:	Supported
-W:	https://01.org/linuxgraphics/
+W:	https://drm.pages.freedesktop.org/intel-docs/
 Q:	http://patchwork.freedesktop.org/project/intel-gfx/
-B:	https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
+B:	https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html
 C:	irc://irc.oftc.net/intel-gfx
 T:	git git://anongit.freedesktop.org/drm-intel
 F:	Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
-- 
GitLab


From f8e9325f09c778fb61d3cebd27a9f3738e6fea48 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 10 Nov 2023 13:48:07 +0200
Subject: [PATCH 0229/2340] drm/i915: update in-source bug filing URLs

The bug filing documentation has been moved from the gitlab wiki to
gitlab pages at https://drm.pages.freedesktop.org/intel-docs/.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110114807.3455739-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/Kconfig          | 2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
 drivers/gpu/drm/i915/i915_utils.h     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index ce397a8797f7b..b5d6e3352071f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -94,7 +94,7 @@ config DRM_I915_CAPTURE_ERROR
 	  This option enables capturing the GPU state when a hang is detected.
 	  This information is vital for triaging hangs and assists in debugging.
 	  Please report any hang for triaging according to:
-	    https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
+	    https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html
 
 	  If in doubt, say "Y".
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 53fff687b00ed..6ff410dc1a4a8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -2168,7 +2168,7 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
 	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
 		pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
 		pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n");
-		pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n");
+		pr_info("Please see https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html for details.\n");
 		pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
 		pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n");
 		pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n",
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index c61066498bf2f..f98577967b7fc 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -40,7 +40,7 @@
 struct drm_i915_private;
 struct timer_list;
 
-#define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs"
+#define FDO_BUG_URL "https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html"
 
 #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
 			     __stringify(x), (long)(x))
-- 
GitLab


From e4178256094a76cc36d9b9aabe7482615959b26f Mon Sep 17 00:00:00 2001
From: Gurchetan Singh <gurchetansingh@chromium.org>
Date: Wed, 18 Oct 2023 11:17:26 -0700
Subject: [PATCH 0230/2340] drm/virtio: use uint64_t more in
 virtio_gpu_context_init_ioctl

drm_virtgpu_context_set_param defines both param and
value to be u64s.

Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Reviewed-by: Josh Simonot <josh.simonot@gmail.com>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018181727.772-1-gurchetansingh@chromium.org
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index b24b11f25197d..8d13b17c215b4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -565,8 +565,8 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev,
 					 void *data, struct drm_file *file)
 {
 	int ret = 0;
-	uint32_t num_params, i, param, value;
-	uint64_t valid_ring_mask;
+	uint32_t num_params, i;
+	uint64_t valid_ring_mask, param, value;
 	size_t len;
 	struct drm_virtgpu_context_set_param *ctx_set_params = NULL;
 	struct virtio_gpu_device *vgdev = dev->dev_private;
-- 
GitLab


From 7add80126bcedddd157ddc09988b032c93ed56c7 Mon Sep 17 00:00:00 2001
From: Gurchetan Singh <gurchetansingh@chromium.org>
Date: Wed, 18 Oct 2023 11:17:27 -0700
Subject: [PATCH 0231/2340] drm/uapi: add explicit virtgpu context debug name

There are two problems with the current method of determining the
virtio-gpu debug name.

1) TASK_COMM_LEN is defined to be 16 bytes only, and this is a
   Linux kernel idiom (see PR_SET_NAME + PR_GET_NAME). Though,
   Android/FreeBSD get around this via setprogname(..)/getprogname(..)
   in libc.

   On Android, names longer than 16 bytes are common.  For example,
   one often encounters a program like "com.android.systemui".

   The virtio-gpu spec allows the debug name to be up to 64 bytes, so
   ideally userspace should be able to set debug names up to 64 bytes.

2) The current implementation determines the debug name using whatever
   task initiated virtgpu.  This is could be a "RenderThread" of a
   larger program, when we actually want to propagate the debug name
   of the program.

To fix these issues, add a new CONTEXT_INIT param that allows userspace
to set the debug name when creating a context.

It takes a null-terminated C-string as the param value. The length of the
string (excluding the terminator) **should** be <= 64 bytes.  Otherwise,
the debug_name will be truncated to 64 bytes.

Link to open-source userspace:
https://android-review.googlesource.com/c/platform/hardware/google/gfxstream/+/2787176

Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Reviewed-by: Josh Simonot <josh.simonot@gmail.com>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231018181727.772-2-gurchetansingh@chromium.org
---
 drivers/gpu/drm/virtio/virtgpu_drv.h   |  5 ++++
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 36 +++++++++++++++++++++-----
 include/uapi/drm/virtgpu_drm.h         |  2 ++
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 96365a772f774..bb7d86a0c6a1f 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -58,6 +58,9 @@
 #define MAX_CAPSET_ID 63
 #define MAX_RINGS 64
 
+/* See virtio_gpu_ctx_create. One additional character for NULL terminator. */
+#define DEBUG_NAME_MAX_LEN 65
+
 struct virtio_gpu_object_params {
 	unsigned long size;
 	bool dumb;
@@ -274,6 +277,8 @@ struct virtio_gpu_fpriv {
 	uint64_t base_fence_ctx;
 	uint64_t ring_idx_mask;
 	struct mutex context_lock;
+	char debug_name[DEBUG_NAME_MAX_LEN];
+	bool explicit_debug_name;
 };
 
 /* virtgpu_ioctl.c */
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 8d13b17c215b4..1e2042419f957 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -42,12 +42,19 @@
 static void virtio_gpu_create_context_locked(struct virtio_gpu_device *vgdev,
 					     struct virtio_gpu_fpriv *vfpriv)
 {
-	char dbgname[TASK_COMM_LEN];
+	if (vfpriv->explicit_debug_name) {
+		virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id,
+					      vfpriv->context_init,
+					      strlen(vfpriv->debug_name),
+					      vfpriv->debug_name);
+	} else {
+		char dbgname[TASK_COMM_LEN];
 
-	get_task_comm(dbgname, current);
-	virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id,
-				      vfpriv->context_init, strlen(dbgname),
-				      dbgname);
+		get_task_comm(dbgname, current);
+		virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id,
+					      vfpriv->context_init, strlen(dbgname),
+					      dbgname);
+	}
 
 	vfpriv->context_created = true;
 }
@@ -107,6 +114,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
 	case VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs:
 		value = vgdev->capset_id_mask;
 		break;
+	case VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME:
+		value = vgdev->has_context_init ? 1 : 0;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -580,7 +590,7 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev,
 		return -EINVAL;
 
 	/* Number of unique parameters supported at this time. */
-	if (num_params > 3)
+	if (num_params > 4)
 		return -EINVAL;
 
 	ctx_set_params = memdup_user(u64_to_user_ptr(args->ctx_set_params),
@@ -642,6 +652,20 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev,
 
 			vfpriv->ring_idx_mask = value;
 			break;
+		case VIRTGPU_CONTEXT_PARAM_DEBUG_NAME:
+			if (vfpriv->explicit_debug_name) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+
+			ret = strncpy_from_user(vfpriv->debug_name,
+						u64_to_user_ptr(value),
+						DEBUG_NAME_MAX_LEN - 1);
+			if (ret < 0)
+				goto out_unlock;
+
+			vfpriv->explicit_debug_name = true;
+			break;
 		default:
 			ret = -EINVAL;
 			goto out_unlock;
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index b1d0e56565bcb..c2ce71987e9bb 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -97,6 +97,7 @@ struct drm_virtgpu_execbuffer {
 #define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing  */
 #define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */
 #define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs 7 /* Bitmask of supported capability set ids */
+#define VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME 8 /* Ability to set debug name from userspace */
 
 struct drm_virtgpu_getparam {
 	__u64 param;
@@ -198,6 +199,7 @@ struct drm_virtgpu_resource_create_blob {
 #define VIRTGPU_CONTEXT_PARAM_CAPSET_ID       0x0001
 #define VIRTGPU_CONTEXT_PARAM_NUM_RINGS       0x0002
 #define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003
+#define VIRTGPU_CONTEXT_PARAM_DEBUG_NAME      0x0004
 struct drm_virtgpu_context_set_param {
 	__u64 param;
 	__u64 value;
-- 
GitLab


From 59be90248b422f2924872de0be2867652214096a Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Thu, 9 Nov 2023 13:21:48 +0200
Subject: [PATCH 0232/2340] drm/i915/mtl: C20 state verification

Add state verification for C20 as we have one
for C10.

V2: Use abstractation of HW readout (Gustavo)
    Drop MPLLA/B from message for TX and CMN
    parameters (Gustavo)

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com> (v1,v2)
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109112148.309669-1-mika.kahola@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c  | 120 +++++++++++++-----
 drivers/gpu/drm/i915/display/intel_cx0_phy.h  |   2 +-
 .../drm/i915/display/intel_modeset_verify.c   |   2 +-
 3 files changed, 88 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index b2ad4c6172f6c..a8fa765808020 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -3017,55 +3017,33 @@ intel_mtl_port_pll_type(struct intel_encoder *encoder,
 		return ICL_PORT_DPLL_DEFAULT;
 }
 
-void intel_c10pll_state_verify(struct intel_atomic_state *state,
-			       struct intel_crtc *crtc)
+static void intel_c10pll_state_verify(const struct intel_crtc_state *state,
+				      struct intel_crtc *crtc,
+				      struct intel_encoder *encoder,
+				      struct intel_c10pll_state *mpllb_hw_state)
 {
-	struct drm_i915_private *i915 = to_i915(state->base.dev);
-	const struct intel_crtc_state *new_crtc_state =
-		intel_atomic_get_new_crtc_state(state, crtc);
-	struct intel_c10pll_state mpllb_hw_state = {};
-	const struct intel_c10pll_state *mpllb_sw_state = &new_crtc_state->cx0pll_state.c10;
-	struct intel_encoder *encoder;
-	enum phy phy;
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	const struct intel_c10pll_state *mpllb_sw_state = &state->cx0pll_state.c10;
 	int i;
 
-	if (DISPLAY_VER(i915) < 14)
-		return;
-
-	if (!new_crtc_state->hw.active)
-		return;
-
-	/* intel_get_crtc_new_encoder() only works for modeset/fastset commits */
-	if (!intel_crtc_needs_modeset(new_crtc_state) &&
-	    !intel_crtc_needs_fastset(new_crtc_state))
-		return;
-
-	encoder = intel_get_crtc_new_encoder(state, new_crtc_state);
-	phy = intel_port_to_phy(i915, encoder->port);
-
-	if (!intel_is_c10phy(i915, phy))
-		return;
-
-	intel_c10pll_readout_hw_state(encoder, &mpllb_hw_state);
-
 	for (i = 0; i < ARRAY_SIZE(mpllb_sw_state->pll); i++) {
 		u8 expected = mpllb_sw_state->pll[i];
 
-		I915_STATE_WARN(i915, mpllb_hw_state.pll[i] != expected,
+		I915_STATE_WARN(i915, mpllb_hw_state->pll[i] != expected,
 				"[CRTC:%d:%s] mismatch in C10MPLLB: Register[%d] (expected 0x%02x, found 0x%02x)",
 				crtc->base.base.id, crtc->base.name, i,
-				expected, mpllb_hw_state.pll[i]);
+				expected, mpllb_hw_state->pll[i]);
 	}
 
-	I915_STATE_WARN(i915, mpllb_hw_state.tx != mpllb_sw_state->tx,
+	I915_STATE_WARN(i915, mpllb_hw_state->tx != mpllb_sw_state->tx,
 			"[CRTC:%d:%s] mismatch in C10MPLLB: Register TX0 (expected 0x%02x, found 0x%02x)",
 			crtc->base.base.id, crtc->base.name,
-			mpllb_sw_state->tx, mpllb_hw_state.tx);
+			mpllb_sw_state->tx, mpllb_hw_state->tx);
 
-	I915_STATE_WARN(i915, mpllb_hw_state.cmn != mpllb_sw_state->cmn,
+	I915_STATE_WARN(i915, mpllb_hw_state->cmn != mpllb_sw_state->cmn,
 			"[CRTC:%d:%s] mismatch in C10MPLLB: Register CMN0 (expected 0x%02x, found 0x%02x)",
 			crtc->base.base.id, crtc->base.name,
-			mpllb_sw_state->cmn, mpllb_hw_state.cmn);
+			mpllb_sw_state->cmn, mpllb_hw_state->cmn);
 }
 
 void intel_cx0pll_readout_hw_state(struct intel_encoder *encoder,
@@ -3091,3 +3069,77 @@ int intel_cx0pll_calc_port_clock(struct intel_encoder *encoder,
 
 	return intel_c20pll_calc_port_clock(encoder, &pll_state->c20);
 }
+
+static void intel_c20pll_state_verify(const struct intel_crtc_state *state,
+				      struct intel_crtc *crtc,
+				      struct intel_encoder *encoder,
+				      struct intel_c20pll_state *mpll_hw_state)
+{
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	const struct intel_c20pll_state *mpll_sw_state = &state->cx0pll_state.c20;
+	bool use_mplla;
+	int i;
+
+	use_mplla = intel_c20_use_mplla(mpll_hw_state->clock);
+	if (use_mplla) {
+		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) {
+			I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i],
+					"[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)",
+					crtc->base.base.id, crtc->base.name, i,
+					mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]);
+		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mpllb); i++) {
+			I915_STATE_WARN(i915, mpll_hw_state->mpllb[i] != mpll_sw_state->mpllb[i],
+					"[CRTC:%d:%s] mismatch in C20MPLLB: Register[%d] (expected 0x%04x, found 0x%04x)",
+					crtc->base.base.id, crtc->base.name, i,
+					mpll_sw_state->mpllb[i], mpll_hw_state->mpllb[i]);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mpll_sw_state->tx); i++) {
+		I915_STATE_WARN(i915, mpll_hw_state->tx[i] != mpll_sw_state->tx[i],
+				"[CRTC:%d:%s] mismatch in C20: Register TX[%i] (expected 0x%04x, found 0x%04x)",
+				crtc->base.base.id, crtc->base.name, i,
+				mpll_sw_state->tx[i], mpll_hw_state->tx[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mpll_sw_state->cmn); i++) {
+		I915_STATE_WARN(i915, mpll_hw_state->cmn[i] != mpll_sw_state->cmn[i],
+				"[CRTC:%d:%s] mismatch in C20: Register CMN[%i] (expected 0x%04x, found 0x%04x)",
+				crtc->base.base.id, crtc->base.name, i,
+				mpll_sw_state->cmn[i], mpll_hw_state->cmn[i]);
+	}
+}
+
+void intel_cx0pll_state_verify(struct intel_atomic_state *state,
+			       struct intel_crtc *crtc)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct intel_encoder *encoder;
+	struct intel_cx0pll_state mpll_hw_state = {};
+	enum phy phy;
+
+	if (DISPLAY_VER(i915) < 14)
+		return;
+
+	if (!new_crtc_state->hw.active)
+		return;
+
+	/* intel_get_crtc_new_encoder() only works for modeset/fastset commits */
+	if (!intel_crtc_needs_modeset(new_crtc_state) &&
+	    !intel_crtc_needs_fastset(new_crtc_state))
+		return;
+
+	encoder = intel_get_crtc_new_encoder(state, new_crtc_state);
+	phy = intel_port_to_phy(i915, encoder->port);
+
+	intel_cx0pll_readout_hw_state(encoder, &mpll_hw_state);
+
+	if (intel_is_c10phy(i915, phy))
+		intel_c10pll_state_verify(new_crtc_state, crtc, encoder, &mpll_hw_state.c10);
+	else
+		intel_c20pll_state_verify(new_crtc_state, crtc, encoder, &mpll_hw_state.c20);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.h b/drivers/gpu/drm/i915/display/intel_cx0_phy.h
index 222aed16e739f..c6682677253a8 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.h
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.h
@@ -38,7 +38,7 @@ int intel_cx0pll_calc_port_clock(struct intel_encoder *encoder,
 
 void intel_c10pll_dump_hw_state(struct drm_i915_private *dev_priv,
 				const struct intel_c10pll_state *hw_state);
-void intel_c10pll_state_verify(struct intel_atomic_state *state,
+void intel_cx0pll_state_verify(struct intel_atomic_state *state,
 			       struct intel_crtc *crtc);
 void intel_c20pll_dump_hw_state(struct drm_i915_private *i915,
 				const struct intel_c20pll_state *hw_state);
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_verify.c b/drivers/gpu/drm/i915/display/intel_modeset_verify.c
index 5e1c2c7804126..076298a8d4058 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_verify.c
+++ b/drivers/gpu/drm/i915/display/intel_modeset_verify.c
@@ -244,7 +244,7 @@ void intel_modeset_verify_crtc(struct intel_atomic_state *state,
 	verify_crtc_state(state, crtc);
 	intel_shared_dpll_state_verify(state, crtc);
 	intel_mpllb_state_verify(state, crtc);
-	intel_c10pll_state_verify(state, crtc);
+	intel_cx0pll_state_verify(state, crtc);
 }
 
 void intel_modeset_verify_disabled(struct intel_atomic_state *state)
-- 
GitLab


From 546ca4d35dccaca6613766ed36ccfb2b5bd63bfe Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:31 +0100
Subject: [PATCH 0233/2340] drm/gpuvm: convert WARN() to drm_WARN() variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use drm_WARN() and drm_WARN_ON() variants to indicate drivers the
context the failing VM resides in.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-2-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c            | 32 ++++++++++++++------------
 drivers/gpu/drm/nouveau/nouveau_uvmm.c |  3 ++-
 include/drm/drm_gpuvm.h                |  7 ++++++
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 08c088319652e..d7367a202fee6 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -614,12 +614,12 @@ static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
 static void __drm_gpuva_remove(struct drm_gpuva *va);
 
 static bool
-drm_gpuvm_check_overflow(u64 addr, u64 range)
+drm_gpuvm_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
 {
 	u64 end;
 
-	return WARN(check_add_overflow(addr, range, &end),
-		    "GPUVA address limited to %zu bytes.\n", sizeof(end));
+	return drm_WARN(gpuvm->drm, check_add_overflow(addr, range, &end),
+			"GPUVA address limited to %zu bytes.\n", sizeof(end));
 }
 
 static bool
@@ -647,7 +647,7 @@ static bool
 drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
 		      u64 addr, u64 range)
 {
-	return !drm_gpuvm_check_overflow(addr, range) &&
+	return !drm_gpuvm_check_overflow(gpuvm, addr, range) &&
 	       drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
 	       !drm_gpuvm_in_kernel_node(gpuvm, addr, range);
 }
@@ -656,6 +656,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
  * drm_gpuvm_init() - initialize a &drm_gpuvm
  * @gpuvm: pointer to the &drm_gpuvm to initialize
  * @name: the name of the GPU VA space
+ * @drm: the &drm_device this VM resides in
  * @start_offset: the start offset of the GPU VA space
  * @range: the size of the GPU VA space
  * @reserve_offset: the start of the kernel reserved GPU VA area
@@ -668,8 +669,8 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
  * &name is expected to be managed by the surrounding driver structures.
  */
 void
-drm_gpuvm_init(struct drm_gpuvm *gpuvm,
-	       const char *name,
+drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+	       struct drm_device *drm,
 	       u64 start_offset, u64 range,
 	       u64 reserve_offset, u64 reserve_range,
 	       const struct drm_gpuvm_ops *ops)
@@ -677,20 +678,20 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
 	gpuvm->rb.tree = RB_ROOT_CACHED;
 	INIT_LIST_HEAD(&gpuvm->rb.list);
 
-	drm_gpuvm_check_overflow(start_offset, range);
-	gpuvm->mm_start = start_offset;
-	gpuvm->mm_range = range;
-
 	gpuvm->name = name ? name : "unknown";
 	gpuvm->ops = ops;
+	gpuvm->drm = drm;
 
-	memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
+	drm_gpuvm_check_overflow(gpuvm, start_offset, range);
+	gpuvm->mm_start = start_offset;
+	gpuvm->mm_range = range;
 
+	memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
 	if (reserve_range) {
 		gpuvm->kernel_alloc_node.va.addr = reserve_offset;
 		gpuvm->kernel_alloc_node.va.range = reserve_range;
 
-		if (likely(!drm_gpuvm_check_overflow(reserve_offset,
+		if (likely(!drm_gpuvm_check_overflow(gpuvm, reserve_offset,
 						     reserve_range)))
 			__drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node);
 	}
@@ -712,8 +713,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
 	if (gpuvm->kernel_alloc_node.va.range)
 		__drm_gpuva_remove(&gpuvm->kernel_alloc_node);
 
-	WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
-	     "GPUVA tree is not empty, potentially leaking memory.");
+	drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
+		 "GPUVA tree is not empty, potentially leaking memory.\n");
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
 
@@ -795,7 +796,8 @@ drm_gpuva_remove(struct drm_gpuva *va)
 	struct drm_gpuvm *gpuvm = va->vm;
 
 	if (unlikely(va == &gpuvm->kernel_alloc_node)) {
-		WARN(1, "Can't destroy kernel reserved node.\n");
+		drm_WARN(gpuvm->drm, 1,
+			 "Can't destroy kernel reserved node.\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 5cf892c50f43c..aaf5d28bd5872 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1808,6 +1808,7 @@ int
 nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 		  u64 kernel_managed_addr, u64 kernel_managed_size)
 {
+	struct drm_device *drm = cli->drm->dev;
 	int ret;
 	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
 
@@ -1836,7 +1837,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 	uvmm->kernel_managed_addr = kernel_managed_addr;
 	uvmm->kernel_managed_size = kernel_managed_size;
 
-	drm_gpuvm_init(&uvmm->base, cli->name,
+	drm_gpuvm_init(&uvmm->base, cli->name, drm,
 		       NOUVEAU_VA_SPACE_START,
 		       NOUVEAU_VA_SPACE_END,
 		       kernel_managed_addr, kernel_managed_size,
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index bdfafc4a7705e..687fd5893624f 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -29,6 +29,7 @@
 #include <linux/rbtree.h>
 #include <linux/types.h>
 
+#include <drm/drm_device.h>
 #include <drm/drm_gem.h>
 
 struct drm_gpuvm;
@@ -201,6 +202,11 @@ struct drm_gpuvm {
 	 */
 	const char *name;
 
+	/**
+	 * @drm: the &drm_device this VM lives in
+	 */
+	struct drm_device *drm;
+
 	/**
 	 * @mm_start: start of the VA space
 	 */
@@ -241,6 +247,7 @@ struct drm_gpuvm {
 };
 
 void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+		    struct drm_device *drm,
 		    u64 start_offset, u64 range,
 		    u64 reserve_offset, u64 reserve_range,
 		    const struct drm_gpuvm_ops *ops);
-- 
GitLab


From d1adea27d0c8a08031b075f1bf4c5ce6f135ad7c Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:32 +0100
Subject: [PATCH 0234/2340] drm/gpuvm: don't always WARN in
 drm_gpuvm_check_overflow()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't always WARN in drm_gpuvm_check_overflow() and separate it into a
drm_gpuvm_check_overflow() and a dedicated
drm_gpuvm_warn_check_overflow() variant.

This avoids printing warnings due to invalid userspace requests.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-3-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index d7367a202fee6..445767f8fbc4a 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -614,12 +614,18 @@ static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
 static void __drm_gpuva_remove(struct drm_gpuva *va);
 
 static bool
-drm_gpuvm_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
+drm_gpuvm_check_overflow(u64 addr, u64 range)
 {
 	u64 end;
 
-	return drm_WARN(gpuvm->drm, check_add_overflow(addr, range, &end),
-			"GPUVA address limited to %zu bytes.\n", sizeof(end));
+	return check_add_overflow(addr, range, &end);
+}
+
+static bool
+drm_gpuvm_warn_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
+{
+	return drm_WARN(gpuvm->drm, drm_gpuvm_check_overflow(addr, range),
+			"GPUVA address limited to %zu bytes.\n", sizeof(addr));
 }
 
 static bool
@@ -647,7 +653,7 @@ static bool
 drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
 		      u64 addr, u64 range)
 {
-	return !drm_gpuvm_check_overflow(gpuvm, addr, range) &&
+	return !drm_gpuvm_check_overflow(addr, range) &&
 	       drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
 	       !drm_gpuvm_in_kernel_node(gpuvm, addr, range);
 }
@@ -682,7 +688,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 	gpuvm->ops = ops;
 	gpuvm->drm = drm;
 
-	drm_gpuvm_check_overflow(gpuvm, start_offset, range);
+	drm_gpuvm_warn_check_overflow(gpuvm, start_offset, range);
 	gpuvm->mm_start = start_offset;
 	gpuvm->mm_range = range;
 
@@ -691,8 +697,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 		gpuvm->kernel_alloc_node.va.addr = reserve_offset;
 		gpuvm->kernel_alloc_node.va.range = reserve_range;
 
-		if (likely(!drm_gpuvm_check_overflow(gpuvm, reserve_offset,
-						     reserve_range)))
+		if (likely(!drm_gpuvm_warn_check_overflow(gpuvm, reserve_offset,
+							  reserve_range)))
 			__drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node);
 	}
 }
-- 
GitLab


From 9297cfc9405bc6b60540b8b8aaf930b7e449e15a Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:33 +0100
Subject: [PATCH 0235/2340] drm/gpuvm: export drm_gpuvm_range_valid()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers may use this function to validate userspace requests in advance,
hence export it.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-4-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c | 14 +++++++++++++-
 include/drm/drm_gpuvm.h     |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 445767f8fbc4a..2669f9bbc3770 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -649,7 +649,18 @@ drm_gpuvm_in_kernel_node(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
 	return krange && addr < kend && kstart < end;
 }
 
-static bool
+/**
+ * drm_gpuvm_range_valid() - checks whether the given range is valid for the
+ * given &drm_gpuvm
+ * @gpuvm: the GPUVM to check the range for
+ * @addr: the base address
+ * @range: the range starting from the base address
+ *
+ * Checks whether the range is within the GPUVM's managed boundaries.
+ *
+ * Returns: true for a valid range, false otherwise
+ */
+bool
 drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
 		      u64 addr, u64 range)
 {
@@ -657,6 +668,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
 	       drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
 	       !drm_gpuvm_in_kernel_node(gpuvm, addr, range);
 }
+EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
 
 /**
  * drm_gpuvm_init() - initialize a &drm_gpuvm
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 687fd5893624f..13eac6f700610 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -253,6 +253,7 @@ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 		    const struct drm_gpuvm_ops *ops);
 void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
 
+bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
 bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
 
 static inline struct drm_gpuva *
-- 
GitLab


From b41e297abd2347075ec640daf0e5da576e3d7418 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:34 +0100
Subject: [PATCH 0236/2340] drm/nouveau: make use of drm_gpuvm_range_valid()

Use drm_gpuvm_range_valid() in order to validate userspace requests.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-5-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_uvmm.c | 17 +----------------
 drivers/gpu/drm/nouveau/nouveau_uvmm.h |  3 ---
 2 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index aaf5d28bd5872..641a911528db4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -929,25 +929,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm,
 static int
 nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range)
 {
-	u64 end = addr + range;
-	u64 kernel_managed_end = uvmm->kernel_managed_addr +
-				 uvmm->kernel_managed_size;
-
 	if (addr & ~PAGE_MASK)
 		return -EINVAL;
 
 	if (range & ~PAGE_MASK)
 		return -EINVAL;
 
-	if (end <= addr)
-		return -EINVAL;
-
-	if (addr < NOUVEAU_VA_SPACE_START ||
-	    end > NOUVEAU_VA_SPACE_END)
-		return -EINVAL;
-
-	if (addr < kernel_managed_end &&
-	    end > uvmm->kernel_managed_addr)
+	if (!drm_gpuvm_range_valid(&uvmm->base, addr, range))
 		return -EINVAL;
 
 	return 0;
@@ -1834,9 +1822,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 		goto out_unlock;
 	}
 
-	uvmm->kernel_managed_addr = kernel_managed_addr;
-	uvmm->kernel_managed_size = kernel_managed_size;
-
 	drm_gpuvm_init(&uvmm->base, cli->name, drm,
 		       NOUVEAU_VA_SPACE_START,
 		       NOUVEAU_VA_SPACE_END,
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
index a308c59760a54..06a0c36de392f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -14,9 +14,6 @@ struct nouveau_uvmm {
 	struct mutex mutex;
 	struct dma_resv resv;
 
-	u64 kernel_managed_addr;
-	u64 kernel_managed_size;
-
 	bool disabled;
 };
 
-- 
GitLab


From bbe8458037e74b9887ba2f0f0b8084a13ade3a90 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:35 +0100
Subject: [PATCH 0237/2340] drm/gpuvm: add common dma-resv per struct drm_gpuvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a common dma-resv for GEM objects not being used outside of this
GPU-VM. This is used in a subsequent patch to generalize dma-resv,
external and evicted object handling and GEM validation.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-6-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c            | 53 ++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_uvmm.c | 13 ++++++-
 include/drm/drm_gpuvm.h                | 33 ++++++++++++++++
 3 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 2669f9bbc3770..af5805e4d7c9f 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -61,6 +61,15 @@
  * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva
  * entries from within dma-fence signalling critical sections it is enough to
  * pre-allocate the &drm_gpuva structures.
+ *
+ * &drm_gem_objects which are private to a single VM can share a common
+ * &dma_resv in order to improve locking efficiency (e.g. with &drm_exec).
+ * For this purpose drivers must pass a &drm_gem_object to drm_gpuvm_init(), in
+ * the following called 'resv object', which serves as the container of the
+ * GPUVM's shared &dma_resv. This resv object can be a driver specific
+ * &drm_gem_object, such as the &drm_gem_object containing the root page table,
+ * but it can also be a 'dummy' object, which can be allocated with
+ * drm_gpuvm_resv_object_alloc().
  */
 
 /**
@@ -670,11 +679,49 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
 
+static void
+drm_gpuvm_gem_object_free(struct drm_gem_object *obj)
+{
+	drm_gem_object_release(obj);
+	kfree(obj);
+}
+
+static const struct drm_gem_object_funcs drm_gpuvm_object_funcs = {
+	.free = drm_gpuvm_gem_object_free,
+};
+
+/**
+ * drm_gpuvm_resv_object_alloc() - allocate a dummy &drm_gem_object
+ * @drm: the drivers &drm_device
+ *
+ * Allocates a dummy &drm_gem_object which can be passed to drm_gpuvm_init() in
+ * order to serve as root GEM object providing the &drm_resv shared across
+ * &drm_gem_objects local to a single GPUVM.
+ *
+ * Returns: the &drm_gem_object on success, NULL on failure
+ */
+struct drm_gem_object *
+drm_gpuvm_resv_object_alloc(struct drm_device *drm)
+{
+	struct drm_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return NULL;
+
+	obj->funcs = &drm_gpuvm_object_funcs;
+	drm_gem_private_object_init(drm, obj, 0);
+
+	return obj;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc);
+
 /**
  * drm_gpuvm_init() - initialize a &drm_gpuvm
  * @gpuvm: pointer to the &drm_gpuvm to initialize
  * @name: the name of the GPU VA space
  * @drm: the &drm_device this VM resides in
+ * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv
  * @start_offset: the start offset of the GPU VA space
  * @range: the size of the GPU VA space
  * @reserve_offset: the start of the kernel reserved GPU VA area
@@ -689,6 +736,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
 void
 drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 	       struct drm_device *drm,
+	       struct drm_gem_object *r_obj,
 	       u64 start_offset, u64 range,
 	       u64 reserve_offset, u64 reserve_range,
 	       const struct drm_gpuvm_ops *ops)
@@ -699,6 +747,9 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 	gpuvm->name = name ? name : "unknown";
 	gpuvm->ops = ops;
 	gpuvm->drm = drm;
+	gpuvm->r_obj = r_obj;
+
+	drm_gem_object_get(r_obj);
 
 	drm_gpuvm_warn_check_overflow(gpuvm, start_offset, range);
 	gpuvm->mm_start = start_offset;
@@ -733,6 +784,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
 
 	drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
 		 "GPUVA tree is not empty, potentially leaking memory.\n");
+
+	drm_gem_object_put(gpuvm->r_obj);
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 641a911528db4..f74bf30bc6831 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1797,8 +1797,9 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 		  u64 kernel_managed_addr, u64 kernel_managed_size)
 {
 	struct drm_device *drm = cli->drm->dev;
-	int ret;
+	struct drm_gem_object *r_obj;
 	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
+	int ret;
 
 	mutex_init(&uvmm->mutex);
 	dma_resv_init(&uvmm->resv);
@@ -1822,11 +1823,19 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 		goto out_unlock;
 	}
 
-	drm_gpuvm_init(&uvmm->base, cli->name, drm,
+	r_obj = drm_gpuvm_resv_object_alloc(drm);
+	if (!r_obj) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj,
 		       NOUVEAU_VA_SPACE_START,
 		       NOUVEAU_VA_SPACE_END,
 		       kernel_managed_addr, kernel_managed_size,
 		       NULL);
+	/* GPUVM takes care from here on. */
+	drm_gem_object_put(r_obj);
 
 	ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
 			    cli->vmm.vmm.object.oclass, RAW,
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 13eac6f700610..ff3377cbfe52a 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -244,10 +244,16 @@ struct drm_gpuvm {
 	 * @ops: &drm_gpuvm_ops providing the split/merge steps to drivers
 	 */
 	const struct drm_gpuvm_ops *ops;
+
+	/**
+	 * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv.
+	 */
+	struct drm_gem_object *r_obj;
 };
 
 void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 		    struct drm_device *drm,
+		    struct drm_gem_object *r_obj,
 		    u64 start_offset, u64 range,
 		    u64 reserve_offset, u64 reserve_range,
 		    const struct drm_gpuvm_ops *ops);
@@ -256,6 +262,33 @@ void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
 bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
 bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
 
+struct drm_gem_object *
+drm_gpuvm_resv_object_alloc(struct drm_device *drm);
+
+/**
+ * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv
+ * @gpuvm__: the &drm_gpuvm
+ *
+ * Returns: a pointer to the &drm_gpuvm's shared &dma_resv
+ */
+#define drm_gpuvm_resv(gpuvm__) ((gpuvm__)->r_obj->resv)
+
+/**
+ * drm_gpuvm_resv_obj() - returns the &drm_gem_object holding the &drm_gpuvm's
+ * &dma_resv
+ * @gpuvm__: the &drm_gpuvm
+ *
+ * Returns: a pointer to the &drm_gem_object holding the &drm_gpuvm's shared
+ * &dma_resv
+ */
+#define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj)
+
+#define drm_gpuvm_resv_held(gpuvm__) \
+	dma_resv_held(drm_gpuvm_resv(gpuvm__))
+
+#define drm_gpuvm_resv_assert_held(gpuvm__) \
+	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
+
 static inline struct drm_gpuva *
 __drm_gpuva_next(struct drm_gpuva *va)
 {
-- 
GitLab


From 6118411428a393fb0868bad9025d71875418058b Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:36 +0100
Subject: [PATCH 0238/2340] drm/nouveau: make use of the GPUVM's shared
 dma-resv

DRM GEM objects private to a single GPUVM can use a shared dma-resv.
Make use of the shared dma-resv of GPUVM rather than a driver specific
one.

The shared dma-resv originates from a "root" GEM object serving as
container for the dma-resv to make it compatible with drm_exec.

In order to make sure the object proving the shared dma-resv can't be
freed up before the objects making use of it, let every such GEM object
take a reference on it.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-7-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   | 11 +++++++++--
 drivers/gpu/drm/nouveau/nouveau_bo.h   |  5 +++++
 drivers/gpu/drm/nouveau/nouveau_gem.c  | 10 ++++++++--
 drivers/gpu/drm/nouveau/nouveau_uvmm.c |  7 ++-----
 drivers/gpu/drm/nouveau/nouveau_uvmm.h |  1 -
 5 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0f3bd187ede67..7afad86da64b7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 	 * If nouveau_bo_new() allocated this buffer, the GEM object was never
 	 * initialized, so don't attempt to release it.
 	 */
-	if (bo->base.dev)
+	if (bo->base.dev) {
+		/* Gem objects not being shared with other VMs get their
+		 * dma_resv from a root GEM object.
+		 */
+		if (nvbo->no_share)
+			drm_gem_object_put(nvbo->r_obj);
+
 		drm_gem_object_release(&bo->base);
-	else
+	} else {
 		dma_resv_fini(&bo->base._resv);
+	}
 
 	kfree(nvbo);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 07f671cf895e0..70c551921a9ee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,11 @@ struct nouveau_bo {
 	struct list_head entry;
 	int pbbo_index;
 	bool validate_mapped;
+
+	/* Root GEM object we derive the dma_resv of in case this BO is not
+	 * shared between VMs.
+	 */
+	struct drm_gem_object *r_obj;
 	bool no_share;
 
 	/* GPU address space is independent of CPU word size */
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a0d303e5ce3d8..49c2bcbef1299 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 	if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
 		return 0;
 
-	if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv)
+	if (nvbo->no_share && uvmm &&
+	    drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv)
 		return -EPERM;
 
 	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
@@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 		if (unlikely(!uvmm))
 			return -EINVAL;
 
-		resv = &uvmm->resv;
+		resv = drm_gpuvm_resv(&uvmm->base);
 	}
 
 	if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART)))
@@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		nvbo->valid_domains &= domain;
 
+	if (nvbo->no_share) {
+		nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base);
+		drm_gem_object_get(nvbo->r_obj);
+	}
+
 	*pnvbo = nvbo;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index f74bf30bc6831..8977a518de96e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1802,7 +1802,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 	int ret;
 
 	mutex_init(&uvmm->mutex);
-	dma_resv_init(&uvmm->resv);
 	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
 	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
 
@@ -1842,14 +1841,14 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 			    kernel_managed_addr, kernel_managed_size,
 			    NULL, 0, &cli->uvmm.vmm.vmm);
 	if (ret)
-		goto out_free_gpuva_mgr;
+		goto out_gpuvm_fini;
 
 	cli->uvmm.vmm.cli = cli;
 	mutex_unlock(&cli->mutex);
 
 	return 0;
 
-out_free_gpuva_mgr:
+out_gpuvm_fini:
 	drm_gpuvm_destroy(&uvmm->base);
 out_unlock:
 	mutex_unlock(&cli->mutex);
@@ -1907,6 +1906,4 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
 	nouveau_vmm_fini(&uvmm->vmm);
 	drm_gpuvm_destroy(&uvmm->base);
 	mutex_unlock(&cli->mutex);
-
-	dma_resv_fini(&uvmm->resv);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
index 06a0c36de392f..22607270fae0d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -12,7 +12,6 @@ struct nouveau_uvmm {
 	struct nouveau_vmm vmm;
 	struct maple_tree region_mt;
 	struct mutex mutex;
-	struct dma_resv resv;
 
 	bool disabled;
 };
-- 
GitLab


From 809ef191ee600e8bcbe2f8a769e00d2d54c16094 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:37 +0100
Subject: [PATCH 0239/2340] drm/gpuvm: add drm_gpuvm_flags to drm_gpuvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce flags for struct drm_gpuvm, this required by subsequent
commits.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-8-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c            |  3 +++
 drivers/gpu/drm/nouveau/nouveau_uvmm.c |  2 +-
 include/drm/drm_gpuvm.h                | 16 ++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index af5805e4d7c9f..53e2c406fb04e 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -720,6 +720,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc);
  * drm_gpuvm_init() - initialize a &drm_gpuvm
  * @gpuvm: pointer to the &drm_gpuvm to initialize
  * @name: the name of the GPU VA space
+ * @flags: the &drm_gpuvm_flags for this GPUVM
  * @drm: the &drm_device this VM resides in
  * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv
  * @start_offset: the start offset of the GPU VA space
@@ -735,6 +736,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc);
  */
 void
 drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+	       enum drm_gpuvm_flags flags,
 	       struct drm_device *drm,
 	       struct drm_gem_object *r_obj,
 	       u64 start_offset, u64 range,
@@ -745,6 +747,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 	INIT_LIST_HEAD(&gpuvm->rb.list);
 
 	gpuvm->name = name ? name : "unknown";
+	gpuvm->flags = flags;
 	gpuvm->ops = ops;
 	gpuvm->drm = drm;
 	gpuvm->r_obj = r_obj;
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 8977a518de96e..f765e38353066 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1828,7 +1828,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 		goto out_unlock;
 	}
 
-	drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj,
+	drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
 		       NOUVEAU_VA_SPACE_START,
 		       NOUVEAU_VA_SPACE_END,
 		       kernel_managed_addr, kernel_managed_size,
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index ff3377cbfe52a..0c2e24155a93d 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -184,6 +184,16 @@ static inline bool drm_gpuva_invalidated(struct drm_gpuva *va)
 	return va->flags & DRM_GPUVA_INVALIDATED;
 }
 
+/**
+ * enum drm_gpuvm_flags - flags for struct drm_gpuvm
+ */
+enum drm_gpuvm_flags {
+	/**
+	 * @DRM_GPUVM_USERBITS: user defined bits
+	 */
+	DRM_GPUVM_USERBITS = BIT(0),
+};
+
 /**
  * struct drm_gpuvm - DRM GPU VA Manager
  *
@@ -202,6 +212,11 @@ struct drm_gpuvm {
 	 */
 	const char *name;
 
+	/**
+	 * @flags: the &drm_gpuvm_flags of this GPUVM
+	 */
+	enum drm_gpuvm_flags flags;
+
 	/**
 	 * @drm: the &drm_device this VM lives in
 	 */
@@ -252,6 +267,7 @@ struct drm_gpuvm {
 };
 
 void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+		    enum drm_gpuvm_flags flags,
 		    struct drm_device *drm,
 		    struct drm_gem_object *r_obj,
 		    u64 start_offset, u64 range,
-- 
GitLab


From 266f7618e761c8a6aa89dbfe43cda1b69cdbbf14 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:38 +0100
Subject: [PATCH 0240/2340] drm/nouveau: separately allocate struct
 nouveau_uvmm

Allocate struct nouveau_uvmm separately in preparation for subsequent
commits introducing reference counting for struct drm_gpuvm.

While at it, get rid of nouveau_uvmm_init() as indirection of
nouveau_uvmm_ioctl_vm_init() and perform some minor cleanups.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-9-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_drm.c  |  5 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h  | 10 ++--
 drivers/gpu/drm/nouveau/nouveau_uvmm.c | 63 +++++++++++++-------------
 drivers/gpu/drm/nouveau/nouveau_uvmm.h |  4 --
 4 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 50589f982d1a4..f603eaef15608 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
 static void
 nouveau_cli_fini(struct nouveau_cli *cli)
 {
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli);
+
 	/* All our channels are dead now, which means all the fences they
 	 * own are signalled, and all callback functions have been called.
 	 *
@@ -199,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
 	WARN_ON(!list_empty(&cli->worker));
 
 	usif_client_fini(cli);
-	nouveau_uvmm_fini(&cli->uvmm);
+	if (uvmm)
+		nouveau_uvmm_fini(uvmm);
 	nouveau_sched_entity_fini(&cli->sched_entity);
 	nouveau_vmm_fini(&cli->svm);
 	nouveau_vmm_fini(&cli->vmm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 3666a7403e472..e514110bf391f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -93,7 +93,10 @@ struct nouveau_cli {
 	struct nvif_mmu mmu;
 	struct nouveau_vmm vmm;
 	struct nouveau_vmm svm;
-	struct nouveau_uvmm uvmm;
+	struct {
+		struct nouveau_uvmm *ptr;
+		bool disabled;
+	} uvmm;
 
 	struct nouveau_sched_entity sched_entity;
 
@@ -121,10 +124,7 @@ struct nouveau_cli_work {
 static inline struct nouveau_uvmm *
 nouveau_cli_uvmm(struct nouveau_cli *cli)
 {
-	if (!cli || !cli->uvmm.vmm.cli)
-		return NULL;
-
-	return &cli->uvmm;
+	return cli ? cli->uvmm.ptr : NULL;
 }
 
 static inline struct nouveau_uvmm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index f765e38353066..54be12c1272f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1636,18 +1636,6 @@ err_free:
 	return ret;
 }
 
-int
-nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
-			   void *data,
-			   struct drm_file *file_priv)
-{
-	struct nouveau_cli *cli = nouveau_cli(file_priv);
-	struct drm_nouveau_vm_init *init = data;
-
-	return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr,
-				 init->kernel_managed_size);
-}
-
 static int
 nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args)
 {
@@ -1793,17 +1781,25 @@ nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
 }
 
 int
-nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
-		  u64 kernel_managed_addr, u64 kernel_managed_size)
+nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+			   void *data,
+			   struct drm_file *file_priv)
 {
+	struct nouveau_uvmm *uvmm;
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct drm_device *drm = cli->drm->dev;
 	struct drm_gem_object *r_obj;
-	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
+	struct drm_nouveau_vm_init *init = data;
+	u64 kernel_managed_end;
 	int ret;
 
-	mutex_init(&uvmm->mutex);
-	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
-	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+	if (check_add_overflow(init->kernel_managed_addr,
+			       init->kernel_managed_size,
+			       &kernel_managed_end))
+		return -EINVAL;
+
+	if (kernel_managed_end > NOUVEAU_VA_SPACE_END)
+		return -EINVAL;
 
 	mutex_lock(&cli->mutex);
 
@@ -1812,44 +1808,49 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
 		goto out_unlock;
 	}
 
-	if (kernel_managed_end <= kernel_managed_addr) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	if (kernel_managed_end > NOUVEAU_VA_SPACE_END) {
-		ret = -EINVAL;
+	uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL);
+	if (!uvmm) {
+		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
 	r_obj = drm_gpuvm_resv_object_alloc(drm);
 	if (!r_obj) {
+		kfree(uvmm);
 		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
+	mutex_init(&uvmm->mutex);
+	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+
 	drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
 		       NOUVEAU_VA_SPACE_START,
 		       NOUVEAU_VA_SPACE_END,
-		       kernel_managed_addr, kernel_managed_size,
+		       init->kernel_managed_addr,
+		       init->kernel_managed_size,
 		       NULL);
 	/* GPUVM takes care from here on. */
 	drm_gem_object_put(r_obj);
 
 	ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
 			    cli->vmm.vmm.object.oclass, RAW,
-			    kernel_managed_addr, kernel_managed_size,
-			    NULL, 0, &cli->uvmm.vmm.vmm);
+			    init->kernel_managed_addr,
+			    init->kernel_managed_size,
+			    NULL, 0, &uvmm->vmm.vmm);
 	if (ret)
 		goto out_gpuvm_fini;
 
-	cli->uvmm.vmm.cli = cli;
+	uvmm->vmm.cli = cli;
+	cli->uvmm.ptr = uvmm;
 	mutex_unlock(&cli->mutex);
 
 	return 0;
 
 out_gpuvm_fini:
 	drm_gpuvm_destroy(&uvmm->base);
+	kfree(uvmm);
 out_unlock:
 	mutex_unlock(&cli->mutex);
 	return ret;
@@ -1864,9 +1865,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
 	struct nouveau_sched_entity *entity = &cli->sched_entity;
 	struct drm_gpuva *va, *next;
 
-	if (!cli)
-		return;
-
 	rmb(); /* for list_empty to work without lock */
 	wait_event(entity->job.wq, list_empty(&entity->job.list.head));
 
@@ -1905,5 +1903,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
 	mutex_lock(&cli->mutex);
 	nouveau_vmm_fini(&uvmm->vmm);
 	drm_gpuvm_destroy(&uvmm->base);
+	kfree(uvmm);
 	mutex_unlock(&cli->mutex);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
index 22607270fae0d..f0a6d98ace4fd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -12,8 +12,6 @@ struct nouveau_uvmm {
 	struct nouveau_vmm vmm;
 	struct maple_tree region_mt;
 	struct mutex mutex;
-
-	bool disabled;
 };
 
 struct nouveau_uvma_region {
@@ -78,8 +76,6 @@ struct nouveau_uvmm_bind_job_args {
 
 #define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base)
 
-int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
-		      u64 kernel_managed_addr, u64 kernel_managed_size);
 void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm);
 
 void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem);
-- 
GitLab


From 8af72338dd81d1f8667e0240bd28f5fc98b3f20d Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:39 +0100
Subject: [PATCH 0241/2340] drm/gpuvm: reference count drm_gpuvm structures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement reference counting for struct drm_gpuvm.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-10-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c            | 56 +++++++++++++++++++++-----
 drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 ++++++---
 include/drm/drm_gpuvm.h                | 31 +++++++++++++-
 3 files changed, 90 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 53e2c406fb04e..ef968eba6fe63 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -746,6 +746,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 	gpuvm->rb.tree = RB_ROOT_CACHED;
 	INIT_LIST_HEAD(&gpuvm->rb.list);
 
+	kref_init(&gpuvm->kref);
+
 	gpuvm->name = name ? name : "unknown";
 	gpuvm->flags = flags;
 	gpuvm->ops = ops;
@@ -770,15 +772,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_init);
 
-/**
- * drm_gpuvm_destroy() - cleanup a &drm_gpuvm
- * @gpuvm: pointer to the &drm_gpuvm to clean up
- *
- * Note that it is a bug to call this function on a manager that still
- * holds GPU VA mappings.
- */
-void
-drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
+static void
+drm_gpuvm_fini(struct drm_gpuvm *gpuvm)
 {
 	gpuvm->name = NULL;
 
@@ -790,7 +785,35 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
 
 	drm_gem_object_put(gpuvm->r_obj);
 }
-EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
+
+static void
+drm_gpuvm_free(struct kref *kref)
+{
+	struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref);
+
+	drm_gpuvm_fini(gpuvm);
+
+	if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free))
+		return;
+
+	gpuvm->ops->vm_free(gpuvm);
+}
+
+/**
+ * drm_gpuvm_put() - drop a struct drm_gpuvm reference
+ * @gpuvm: the &drm_gpuvm to release the reference of
+ *
+ * This releases a reference to @gpuvm.
+ *
+ * This function may be called from atomic context.
+ */
+void
+drm_gpuvm_put(struct drm_gpuvm *gpuvm)
+{
+	if (gpuvm)
+		kref_put(&gpuvm->kref, drm_gpuvm_free);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_put);
 
 static int
 __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
@@ -839,11 +862,21 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm,
 {
 	u64 addr = va->va.addr;
 	u64 range = va->va.range;
+	int ret;
 
 	if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range)))
 		return -EINVAL;
 
-	return __drm_gpuva_insert(gpuvm, va);
+	ret = __drm_gpuva_insert(gpuvm, va);
+	if (likely(!ret))
+		/* Take a reference of the GPUVM for the successfully inserted
+		 * drm_gpuva. We can't take the reference in
+		 * __drm_gpuva_insert() itself, since we don't want to increse
+		 * the reference count for the GPUVM's kernel_alloc_node.
+		 */
+		drm_gpuvm_get(gpuvm);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(drm_gpuva_insert);
 
@@ -876,6 +909,7 @@ drm_gpuva_remove(struct drm_gpuva *va)
 	}
 
 	__drm_gpuva_remove(va);
+	drm_gpuvm_put(va->vm);
 }
 EXPORT_SYMBOL_GPL(drm_gpuva_remove);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 54be12c1272f2..cb2f06565c462 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1780,6 +1780,18 @@ nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
 	}
 }
 
+static void
+nouveau_uvmm_free(struct drm_gpuvm *gpuvm)
+{
+	struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm);
+
+	kfree(uvmm);
+}
+
+static const struct drm_gpuvm_ops gpuvm_ops = {
+	.vm_free = nouveau_uvmm_free,
+};
+
 int
 nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
 			   void *data,
@@ -1830,7 +1842,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
 		       NOUVEAU_VA_SPACE_END,
 		       init->kernel_managed_addr,
 		       init->kernel_managed_size,
-		       NULL);
+		       &gpuvm_ops);
 	/* GPUVM takes care from here on. */
 	drm_gem_object_put(r_obj);
 
@@ -1849,8 +1861,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
 	return 0;
 
 out_gpuvm_fini:
-	drm_gpuvm_destroy(&uvmm->base);
-	kfree(uvmm);
+	drm_gpuvm_put(&uvmm->base);
 out_unlock:
 	mutex_unlock(&cli->mutex);
 	return ret;
@@ -1902,7 +1913,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
 
 	mutex_lock(&cli->mutex);
 	nouveau_vmm_fini(&uvmm->vmm);
-	drm_gpuvm_destroy(&uvmm->base);
-	kfree(uvmm);
+	drm_gpuvm_put(&uvmm->base);
 	mutex_unlock(&cli->mutex);
 }
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 0c2e24155a93d..4e6e1fd3485ad 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -247,6 +247,11 @@ struct drm_gpuvm {
 		struct list_head list;
 	} rb;
 
+	/**
+	 * @kref: reference count of this object
+	 */
+	struct kref kref;
+
 	/**
 	 * @kernel_alloc_node:
 	 *
@@ -273,7 +278,23 @@ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 		    u64 start_offset, u64 range,
 		    u64 reserve_offset, u64 reserve_range,
 		    const struct drm_gpuvm_ops *ops);
-void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
+
+/**
+ * drm_gpuvm_get() - acquire a struct drm_gpuvm reference
+ * @gpuvm: the &drm_gpuvm to acquire the reference of
+ *
+ * This function acquires an additional reference to @gpuvm. It is illegal to
+ * call this without already holding a reference. No locks required.
+ */
+static inline struct drm_gpuvm *
+drm_gpuvm_get(struct drm_gpuvm *gpuvm)
+{
+	kref_get(&gpuvm->kref);
+
+	return gpuvm;
+}
+
+void drm_gpuvm_put(struct drm_gpuvm *gpuvm);
 
 bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
 bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
@@ -673,6 +694,14 @@ static inline void drm_gpuva_init_from_op(struct drm_gpuva *va,
  * operations to drivers.
  */
 struct drm_gpuvm_ops {
+	/**
+	 * @vm_free: called when the last reference of a struct drm_gpuvm is
+	 * dropped
+	 *
+	 * This callback is mandatory.
+	 */
+	void (*vm_free)(struct drm_gpuvm *gpuvm);
+
 	/**
 	 * @op_alloc: called when the &drm_gpuvm allocates
 	 * a struct drm_gpuva_op
-- 
GitLab


From 94bc2249f08e141fb4aa120bfdc392c7a5e78211 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:40 +0100
Subject: [PATCH 0242/2340] drm/gpuvm: add an abstraction for a VM / BO
 combination
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an abstraction layer between the drm_gpuva mappings of a particular
drm_gem_object and this GEM object itself. The abstraction represents a
combination of a drm_gem_object and drm_gpuvm. The drm_gem_object holds
a list of drm_gpuvm_bo structures (the structure representing this
abstraction), while each drm_gpuvm_bo contains list of mappings of this
GEM object.

This has multiple advantages:

1) We can use the drm_gpuvm_bo structure to attach it to various lists
   of the drm_gpuvm. This is useful for tracking external and evicted
   objects per VM, which is introduced in subsequent patches.

2) Finding mappings of a certain drm_gem_object mapped in a certain
   drm_gpuvm becomes much cheaper.

3) Drivers can derive and extend the structure to easily represent
   driver specific states of a BO for a certain GPUVM.

The idea of this abstraction was taken from amdgpu, hence the credit for
this idea goes to the developers of amdgpu.

Cc: Christian König <christian.koenig@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-11-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c            | 340 +++++++++++++++++++++----
 drivers/gpu/drm/nouveau/nouveau_uvmm.c |  63 +++--
 include/drm/drm_gem.h                  |  32 +--
 include/drm/drm_gpuvm.h                | 185 +++++++++++++-
 4 files changed, 534 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index ef968eba6fe63..7e193bca8bab0 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -70,6 +70,18 @@
  * &drm_gem_object, such as the &drm_gem_object containing the root page table,
  * but it can also be a 'dummy' object, which can be allocated with
  * drm_gpuvm_resv_object_alloc().
+ *
+ * In order to connect a struct drm_gpuva its backing &drm_gem_object each
+ * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each
+ * &drm_gpuvm_bo contains a list of &drm_gpuva structures.
+ *
+ * A &drm_gpuvm_bo is an abstraction that represents a combination of a
+ * &drm_gpuvm and a &drm_gem_object. Every such combination should be unique.
+ * This is ensured by the API through drm_gpuvm_bo_obtain() and
+ * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding
+ * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this
+ * particular combination. If not existent a new instance is created and linked
+ * to the &drm_gem_object.
  */
 
 /**
@@ -395,21 +407,28 @@
 /**
  * DOC: Locking
  *
- * Generally, the GPU VA manager does not take care of locking itself, it is
- * the drivers responsibility to take care about locking. Drivers might want to
- * protect the following operations: inserting, removing and iterating
- * &drm_gpuva objects as well as generating all kinds of operations, such as
- * split / merge or prefetch.
- *
- * The GPU VA manager also does not take care of the locking of the backing
- * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to
- * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively
- * a driver specific external lock. For the latter see also
- * drm_gem_gpuva_set_lock().
- *
- * However, the GPU VA manager contains lockdep checks to ensure callers of its
- * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is
- * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink().
+ * In terms of managing &drm_gpuva entries DRM GPUVM does not take care of
+ * locking itself, it is the drivers responsibility to take care about locking.
+ * Drivers might want to protect the following operations: inserting, removing
+ * and iterating &drm_gpuva objects as well as generating all kinds of
+ * operations, such as split / merge or prefetch.
+ *
+ * DRM GPUVM also does not take care of the locking of the backing
+ * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by
+ * itself; drivers are responsible to enforce mutual exclusion using either the
+ * GEMs dma_resv lock or alternatively a driver specific external lock. For the
+ * latter see also drm_gem_gpuva_set_lock().
+ *
+ * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold
+ * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed
+ * by functions such as drm_gpuva_link() or drm_gpuva_unlink(), but also
+ * drm_gpuvm_bo_obtain() and drm_gpuvm_bo_put().
+ *
+ * The latter is required since on creation and destruction of a &drm_gpuvm_bo
+ * the &drm_gpuvm_bo is attached / removed from the &drm_gem_objects gpuva list.
+ * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and
+ * &drm_gem_object must be able to observe previous creations and destructions
+ * of &drm_gpuvm_bos in order to keep instances unique.
  */
 
 /**
@@ -439,6 +458,7 @@
  *	{
  *		struct drm_gpuva_ops *ops;
  *		struct drm_gpuva_op *op
+ *		struct drm_gpuvm_bo *vm_bo;
  *
  *		driver_lock_va_space();
  *		ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range,
@@ -446,6 +466,10 @@
  *		if (IS_ERR(ops))
  *			return PTR_ERR(ops);
  *
+ *		vm_bo = drm_gpuvm_bo_obtain(gpuvm, obj);
+ *		if (IS_ERR(vm_bo))
+ *			return PTR_ERR(vm_bo);
+ *
  *		drm_gpuva_for_each_op(op, ops) {
  *			struct drm_gpuva *va;
  *
@@ -458,7 +482,7 @@
  *
  *				driver_vm_map();
  *				drm_gpuva_map(gpuvm, va, &op->map);
- *				drm_gpuva_link(va);
+ *				drm_gpuva_link(va, vm_bo);
  *
  *				break;
  *			case DRM_GPUVA_OP_REMAP: {
@@ -485,11 +509,11 @@
  *				driver_vm_remap();
  *				drm_gpuva_remap(prev, next, &op->remap);
  *
- *				drm_gpuva_unlink(va);
  *				if (prev)
- *					drm_gpuva_link(prev);
+ *					drm_gpuva_link(prev, va->vm_bo);
  *				if (next)
- *					drm_gpuva_link(next);
+ *					drm_gpuva_link(next, va->vm_bo);
+ *				drm_gpuva_unlink(va);
  *
  *				break;
  *			}
@@ -505,6 +529,7 @@
  *				break;
  *			}
  *		}
+ *		drm_gpuvm_bo_put(vm_bo);
  *		driver_unlock_va_space();
  *
  *		return 0;
@@ -514,6 +539,7 @@
  *
  *	struct driver_context {
  *		struct drm_gpuvm *gpuvm;
+ *		struct drm_gpuvm_bo *vm_bo;
  *		struct drm_gpuva *new_va;
  *		struct drm_gpuva *prev_va;
  *		struct drm_gpuva *next_va;
@@ -534,6 +560,7 @@
  *				  struct drm_gem_object *obj, u64 offset)
  *	{
  *		struct driver_context ctx;
+ *		struct drm_gpuvm_bo *vm_bo;
  *		struct drm_gpuva_ops *ops;
  *		struct drm_gpuva_op *op;
  *		int ret = 0;
@@ -543,16 +570,23 @@
  *		ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL);
  *		ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL);
  *		ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL);
- *		if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) {
+ *		ctx.vm_bo = drm_gpuvm_bo_create(gpuvm, obj);
+ *		if (!ctx.new_va || !ctx.prev_va || !ctx.next_va || !vm_bo) {
  *			ret = -ENOMEM;
  *			goto out;
  *		}
  *
+ *		// Typically protected with a driver specific GEM gpuva lock
+ *		// used in the fence signaling path for drm_gpuva_link() and
+ *		// drm_gpuva_unlink(), hence pre-allocate.
+ *		ctx.vm_bo = drm_gpuvm_bo_obtain_prealloc(ctx.vm_bo);
+ *
  *		driver_lock_va_space();
  *		ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset);
  *		driver_unlock_va_space();
  *
  *	out:
+ *		drm_gpuvm_bo_put(ctx.vm_bo);
  *		kfree(ctx.new_va);
  *		kfree(ctx.prev_va);
  *		kfree(ctx.next_va);
@@ -565,7 +599,7 @@
  *
  *		drm_gpuva_map(ctx->vm, ctx->new_va, &op->map);
  *
- *		drm_gpuva_link(ctx->new_va);
+ *		drm_gpuva_link(ctx->new_va, ctx->vm_bo);
  *
  *		// prevent the new GPUVA from being freed in
  *		// driver_mapping_create()
@@ -577,22 +611,23 @@
  *	int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx)
  *	{
  *		struct driver_context *ctx = __ctx;
+ *		struct drm_gpuva *va = op->remap.unmap->va;
  *
  *		drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
  *
- *		drm_gpuva_unlink(op->remap.unmap->va);
- *		kfree(op->remap.unmap->va);
- *
  *		if (op->remap.prev) {
- *			drm_gpuva_link(ctx->prev_va);
+ *			drm_gpuva_link(ctx->prev_va, va->vm_bo);
  *			ctx->prev_va = NULL;
  *		}
  *
  *		if (op->remap.next) {
- *			drm_gpuva_link(ctx->next_va);
+ *			drm_gpuva_link(ctx->next_va, va->vm_bo);
  *			ctx->next_va = NULL;
  *		}
  *
+ *		drm_gpuva_unlink(va);
+ *		kfree(va);
+ *
  *		return 0;
  *	}
  *
@@ -815,6 +850,199 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_put);
 
+/**
+ * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo
+ * @gpuvm: The &drm_gpuvm the @obj is mapped in.
+ * @obj: The &drm_gem_object being mapped in the @gpuvm.
+ *
+ * If provided by the driver, this function uses the &drm_gpuvm_ops
+ * vm_bo_alloc() callback to allocate.
+ *
+ * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure
+ */
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
+		    struct drm_gem_object *obj)
+{
+	const struct drm_gpuvm_ops *ops = gpuvm->ops;
+	struct drm_gpuvm_bo *vm_bo;
+
+	if (ops && ops->vm_bo_alloc)
+		vm_bo = ops->vm_bo_alloc();
+	else
+		vm_bo = kzalloc(sizeof(*vm_bo), GFP_KERNEL);
+
+	if (unlikely(!vm_bo))
+		return NULL;
+
+	vm_bo->vm = drm_gpuvm_get(gpuvm);
+	vm_bo->obj = obj;
+	drm_gem_object_get(obj);
+
+	kref_init(&vm_bo->kref);
+	INIT_LIST_HEAD(&vm_bo->list.gpuva);
+	INIT_LIST_HEAD(&vm_bo->list.entry.gem);
+
+	return vm_bo;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
+
+static void
+drm_gpuvm_bo_destroy(struct kref *kref)
+{
+	struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
+						  kref);
+	struct drm_gpuvm *gpuvm = vm_bo->vm;
+	const struct drm_gpuvm_ops *ops = gpuvm->ops;
+	struct drm_gem_object *obj = vm_bo->obj;
+	bool lock = !drm_gpuvm_resv_protected(gpuvm);
+
+	if (!lock)
+		drm_gpuvm_resv_assert_held(gpuvm);
+
+	drm_gem_gpuva_assert_lock_held(obj);
+	list_del(&vm_bo->list.entry.gem);
+
+	if (ops && ops->vm_bo_free)
+		ops->vm_bo_free(vm_bo);
+	else
+		kfree(vm_bo);
+
+	drm_gpuvm_put(gpuvm);
+	drm_gem_object_put(obj);
+}
+
+/**
+ * drm_gpuvm_bo_put() - drop a struct drm_gpuvm_bo reference
+ * @vm_bo: the &drm_gpuvm_bo to release the reference of
+ *
+ * This releases a reference to @vm_bo.
+ *
+ * If the reference count drops to zero, the &gpuvm_bo is destroyed, which
+ * includes removing it from the GEMs gpuva list. Hence, if a call to this
+ * function can potentially let the reference count drop to zero the caller must
+ * hold the dma-resv or driver specific GEM gpuva lock.
+ *
+ * This function may only be called from non-atomic context.
+ */
+void
+drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo)
+{
+	might_sleep();
+
+	if (vm_bo)
+		kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put);
+
+static struct drm_gpuvm_bo *
+__drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
+		    struct drm_gem_object *obj)
+{
+	struct drm_gpuvm_bo *vm_bo;
+
+	drm_gem_gpuva_assert_lock_held(obj);
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj)
+		if (vm_bo->vm == gpuvm)
+			return vm_bo;
+
+	return NULL;
+}
+
+/**
+ * drm_gpuvm_bo_find() - find the &drm_gpuvm_bo for the given
+ * &drm_gpuvm and &drm_gem_object
+ * @gpuvm: The &drm_gpuvm the @obj is mapped in.
+ * @obj: The &drm_gem_object being mapped in the @gpuvm.
+ *
+ * Find the &drm_gpuvm_bo representing the combination of the given
+ * &drm_gpuvm and &drm_gem_object. If found, increases the reference
+ * count of the &drm_gpuvm_bo accordingly.
+ *
+ * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure
+ */
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
+		  struct drm_gem_object *obj)
+{
+	struct drm_gpuvm_bo *vm_bo = __drm_gpuvm_bo_find(gpuvm, obj);
+
+	return vm_bo ? drm_gpuvm_bo_get(vm_bo) : NULL;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find);
+
+/**
+ * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the
+ * given &drm_gpuvm and &drm_gem_object
+ * @gpuvm: The &drm_gpuvm the @obj is mapped in.
+ * @obj: The &drm_gem_object being mapped in the @gpuvm.
+ *
+ * Find the &drm_gpuvm_bo representing the combination of the given
+ * &drm_gpuvm and &drm_gem_object. If found, increases the reference
+ * count of the &drm_gpuvm_bo accordingly. If not found, allocates a new
+ * &drm_gpuvm_bo.
+ *
+ * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
+ *
+ * Returns: a pointer to the &drm_gpuvm_bo on success, an ERR_PTR on failure
+ */
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm,
+		    struct drm_gem_object *obj)
+{
+	struct drm_gpuvm_bo *vm_bo;
+
+	vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
+	if (vm_bo)
+		return vm_bo;
+
+	vm_bo = drm_gpuvm_bo_create(gpuvm, obj);
+	if (!vm_bo)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_gpuva_assert_lock_held(obj);
+	list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list);
+
+	return vm_bo;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain);
+
+/**
+ * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo
+ * for the given &drm_gpuvm and &drm_gem_object
+ * @__vm_bo: A pre-allocated struct drm_gpuvm_bo.
+ *
+ * Find the &drm_gpuvm_bo representing the combination of the given
+ * &drm_gpuvm and &drm_gem_object. If found, increases the reference
+ * count of the found &drm_gpuvm_bo accordingly, while the @__vm_bo reference
+ * count is decreased. If not found @__vm_bo is returned without further
+ * increase of the reference count.
+ *
+ * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
+ *
+ * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing
+ * &drm_gpuvm_bo was found
+ */
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo)
+{
+	struct drm_gpuvm *gpuvm = __vm_bo->vm;
+	struct drm_gem_object *obj = __vm_bo->obj;
+	struct drm_gpuvm_bo *vm_bo;
+
+	vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
+	if (vm_bo) {
+		drm_gpuvm_bo_put(__vm_bo);
+		return vm_bo;
+	}
+
+	drm_gem_gpuva_assert_lock_held(obj);
+	list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list);
+
+	return __vm_bo;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc);
+
 static int
 __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
 		   struct drm_gpuva *va)
@@ -916,24 +1144,33 @@ EXPORT_SYMBOL_GPL(drm_gpuva_remove);
 /**
  * drm_gpuva_link() - link a &drm_gpuva
  * @va: the &drm_gpuva to link
+ * @vm_bo: the &drm_gpuvm_bo to add the &drm_gpuva to
  *
- * This adds the given &va to the GPU VA list of the &drm_gem_object it is
- * associated with.
+ * This adds the given &va to the GPU VA list of the &drm_gpuvm_bo and the
+ * &drm_gpuvm_bo to the &drm_gem_object it is associated with.
+ *
+ * For every &drm_gpuva entry added to the &drm_gpuvm_bo an additional
+ * reference of the latter is taken.
  *
  * This function expects the caller to protect the GEM's GPUVA list against
- * concurrent access using the GEMs dma_resv lock.
+ * concurrent access using either the GEMs dma_resv lock or a driver specific
+ * lock set through drm_gem_gpuva_set_lock().
  */
 void
-drm_gpuva_link(struct drm_gpuva *va)
+drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo)
 {
 	struct drm_gem_object *obj = va->gem.obj;
+	struct drm_gpuvm *gpuvm = va->vm;
 
 	if (unlikely(!obj))
 		return;
 
-	drm_gem_gpuva_assert_lock_held(obj);
+	drm_WARN_ON(gpuvm->drm, obj != vm_bo->obj);
 
-	list_add_tail(&va->gem.entry, &obj->gpuva.list);
+	va->vm_bo = drm_gpuvm_bo_get(vm_bo);
+
+	drm_gem_gpuva_assert_lock_held(obj);
+	list_add_tail(&va->gem.entry, &vm_bo->list.gpuva);
 }
 EXPORT_SYMBOL_GPL(drm_gpuva_link);
 
@@ -944,20 +1181,31 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link);
  * This removes the given &va from the GPU VA list of the &drm_gem_object it is
  * associated with.
  *
+ * This removes the given &va from the GPU VA list of the &drm_gpuvm_bo and
+ * the &drm_gpuvm_bo from the &drm_gem_object it is associated with in case
+ * this call unlinks the last &drm_gpuva from the &drm_gpuvm_bo.
+ *
+ * For every &drm_gpuva entry removed from the &drm_gpuvm_bo a reference of
+ * the latter is dropped.
+ *
  * This function expects the caller to protect the GEM's GPUVA list against
- * concurrent access using the GEMs dma_resv lock.
+ * concurrent access using either the GEMs dma_resv lock or a driver specific
+ * lock set through drm_gem_gpuva_set_lock().
  */
 void
 drm_gpuva_unlink(struct drm_gpuva *va)
 {
 	struct drm_gem_object *obj = va->gem.obj;
+	struct drm_gpuvm_bo *vm_bo = va->vm_bo;
 
 	if (unlikely(!obj))
 		return;
 
 	drm_gem_gpuva_assert_lock_held(obj);
-
 	list_del_init(&va->gem.entry);
+
+	va->vm_bo = NULL;
+	drm_gpuvm_bo_put(vm_bo);
 }
 EXPORT_SYMBOL_GPL(drm_gpuva_unlink);
 
@@ -1102,10 +1350,10 @@ drm_gpuva_remap(struct drm_gpuva *prev,
 		struct drm_gpuva *next,
 		struct drm_gpuva_op_remap *op)
 {
-	struct drm_gpuva *curr = op->unmap->va;
-	struct drm_gpuvm *gpuvm = curr->vm;
+	struct drm_gpuva *va = op->unmap->va;
+	struct drm_gpuvm *gpuvm = va->vm;
 
-	drm_gpuva_remove(curr);
+	drm_gpuva_remove(va);
 
 	if (op->prev) {
 		drm_gpuva_init_from_op(prev, op->prev);
@@ -1747,9 +1995,8 @@ err_free_ops:
 EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create);
 
 /**
- * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM
- * @gpuvm: the &drm_gpuvm representing the GPU VA space
- * @obj: the &drm_gem_object to unmap
+ * drm_gpuvm_bo_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM
+ * @vm_bo: the &drm_gpuvm_bo abstraction
  *
  * This function creates a list of operations to perform unmapping for every
  * GPUVA attached to a GEM.
@@ -1766,15 +2013,14 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create);
  * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
  */
 struct drm_gpuva_ops *
-drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
-			       struct drm_gem_object *obj)
+drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo)
 {
 	struct drm_gpuva_ops *ops;
 	struct drm_gpuva_op *op;
 	struct drm_gpuva *va;
 	int ret;
 
-	drm_gem_gpuva_assert_lock_held(obj);
+	drm_gem_gpuva_assert_lock_held(vm_bo->obj);
 
 	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
 	if (!ops)
@@ -1782,8 +2028,8 @@ drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
 
 	INIT_LIST_HEAD(&ops->list);
 
-	drm_gem_for_each_gpuva(va, obj) {
-		op = gpuva_op_alloc(gpuvm);
+	drm_gpuvm_bo_for_each_va(va, vm_bo) {
+		op = gpuva_op_alloc(vm_bo->vm);
 		if (!op) {
 			ret = -ENOMEM;
 			goto err_free_ops;
@@ -1797,10 +2043,10 @@ drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
 	return ops;
 
 err_free_ops:
-	drm_gpuva_ops_free(gpuvm, ops);
+	drm_gpuva_ops_free(vm_bo->vm, ops);
 	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create);
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_unmap_ops_create);
 
 /**
  * drm_gpuva_ops_free() - free the given &drm_gpuva_ops
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index cb2f06565c462..eda7bb8624f11 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -62,6 +62,8 @@ struct bind_job_op {
 	enum vm_bind_op op;
 	u32 flags;
 
+	struct drm_gpuvm_bo *vm_bo;
+
 	struct {
 		u64 addr;
 		u64 range;
@@ -1101,22 +1103,28 @@ bind_validate_region(struct nouveau_job *job)
 }
 
 static void
-bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
+bind_link_gpuvas(struct bind_job_op *bop)
 {
+	struct nouveau_uvma_prealloc *new = &bop->new;
+	struct drm_gpuvm_bo *vm_bo = bop->vm_bo;
+	struct drm_gpuva_ops *ops = bop->ops;
 	struct drm_gpuva_op *op;
 
 	drm_gpuva_for_each_op(op, ops) {
 		switch (op->op) {
 		case DRM_GPUVA_OP_MAP:
-			drm_gpuva_link(&new->map->va);
+			drm_gpuva_link(&new->map->va, vm_bo);
 			break;
-		case DRM_GPUVA_OP_REMAP:
+		case DRM_GPUVA_OP_REMAP: {
+			struct drm_gpuva *va = op->remap.unmap->va;
+
 			if (op->remap.prev)
-				drm_gpuva_link(&new->prev->va);
+				drm_gpuva_link(&new->prev->va, va->vm_bo);
 			if (op->remap.next)
-				drm_gpuva_link(&new->next->va);
-			drm_gpuva_unlink(op->remap.unmap->va);
+				drm_gpuva_link(&new->next->va, va->vm_bo);
+			drm_gpuva_unlink(va);
 			break;
+		}
 		case DRM_GPUVA_OP_UNMAP:
 			drm_gpuva_unlink(op->unmap.va);
 			break;
@@ -1138,10 +1146,17 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
 
 	list_for_each_op(op, &bind_job->ops) {
 		if (op->op == OP_MAP) {
-			op->gem.obj = drm_gem_object_lookup(job->file_priv,
-							    op->gem.handle);
-			if (!op->gem.obj)
+			struct drm_gem_object *obj = op->gem.obj =
+				drm_gem_object_lookup(job->file_priv,
+						      op->gem.handle);
+			if (!obj)
 				return -ENOENT;
+
+			dma_resv_lock(obj->resv, NULL);
+			op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj);
+			dma_resv_unlock(obj->resv);
+			if (IS_ERR(op->vm_bo))
+				return PTR_ERR(op->vm_bo);
 		}
 
 		ret = bind_validate_op(job, op);
@@ -1352,7 +1367,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
 		case OP_UNMAP_SPARSE:
 		case OP_MAP:
 		case OP_UNMAP:
-			bind_link_gpuvas(op->ops, &op->new);
+			bind_link_gpuvas(op);
 			break;
 		default:
 			break;
@@ -1499,6 +1514,12 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
 		if (!IS_ERR_OR_NULL(op->ops))
 			drm_gpuva_ops_free(&uvmm->base, op->ops);
 
+		if (!IS_ERR_OR_NULL(op->vm_bo)) {
+			dma_resv_lock(obj->resv, NULL);
+			drm_gpuvm_bo_put(op->vm_bo);
+			dma_resv_unlock(obj->resv);
+		}
+
 		if (obj)
 			drm_gem_object_put(obj);
 	}
@@ -1752,15 +1773,18 @@ void
 nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem)
 {
 	struct drm_gem_object *obj = &nvbo->bo.base;
+	struct drm_gpuvm_bo *vm_bo;
 	struct drm_gpuva *va;
 
 	dma_resv_assert_held(obj->resv);
 
-	drm_gem_for_each_gpuva(va, obj) {
-		struct nouveau_uvma *uvma = uvma_from_va(va);
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		drm_gpuvm_bo_for_each_va(va, vm_bo) {
+			struct nouveau_uvma *uvma = uvma_from_va(va);
 
-		nouveau_uvma_map(uvma, mem);
-		drm_gpuva_invalidate(va, false);
+			nouveau_uvma_map(uvma, mem);
+			drm_gpuva_invalidate(va, false);
+		}
 	}
 }
 
@@ -1768,15 +1792,18 @@ void
 nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
 {
 	struct drm_gem_object *obj = &nvbo->bo.base;
+	struct drm_gpuvm_bo *vm_bo;
 	struct drm_gpuva *va;
 
 	dma_resv_assert_held(obj->resv);
 
-	drm_gem_for_each_gpuva(va, obj) {
-		struct nouveau_uvma *uvma = uvma_from_va(va);
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		drm_gpuvm_bo_for_each_va(va, vm_bo) {
+			struct nouveau_uvma *uvma = uvma_from_va(va);
 
-		nouveau_uvma_unmap(uvma);
-		drm_gpuva_invalidate(va, true);
+			nouveau_uvma_unmap(uvma);
+			drm_gpuva_invalidate(va, true);
+		}
 	}
 }
 
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 16364487fde9f..369505447acd8 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -580,7 +580,7 @@ int drm_gem_evict(struct drm_gem_object *obj);
  * drm_gem_gpuva_init() - initialize the gpuva list of a GEM object
  * @obj: the &drm_gem_object
  *
- * This initializes the &drm_gem_object's &drm_gpuva list.
+ * This initializes the &drm_gem_object's &drm_gpuvm_bo list.
  *
  * Calling this function is only necessary for drivers intending to support the
  * &drm_driver_feature DRIVER_GEM_GPUVA.
@@ -593,28 +593,28 @@ static inline void drm_gem_gpuva_init(struct drm_gem_object *obj)
 }
 
 /**
- * drm_gem_for_each_gpuva() - iternator to walk over a list of gpuvas
- * @entry__: &drm_gpuva structure to assign to in each iteration step
- * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with
+ * drm_gem_for_each_gpuvm_bo() - iterator to walk over a list of &drm_gpuvm_bo
+ * @entry__: &drm_gpuvm_bo structure to assign to in each iteration step
+ * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with
  *
- * This iterator walks over all &drm_gpuva structures associated with the
- * &drm_gpuva_manager.
+ * This iterator walks over all &drm_gpuvm_bo structures associated with the
+ * &drm_gem_object.
  */
-#define drm_gem_for_each_gpuva(entry__, obj__) \
-	list_for_each_entry(entry__, &(obj__)->gpuva.list, gem.entry)
+#define drm_gem_for_each_gpuvm_bo(entry__, obj__) \
+	list_for_each_entry(entry__, &(obj__)->gpuva.list, list.entry.gem)
 
 /**
- * drm_gem_for_each_gpuva_safe() - iternator to safely walk over a list of
- * gpuvas
- * @entry__: &drm_gpuva structure to assign to in each iteration step
- * @next__: &next &drm_gpuva to store the next step
- * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with
+ * drm_gem_for_each_gpuvm_bo_safe() - iterator to safely walk over a list of
+ * &drm_gpuvm_bo
+ * @entry__: &drm_gpuvm_bostructure to assign to in each iteration step
+ * @next__: &next &drm_gpuvm_bo to store the next step
+ * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with
  *
- * This iterator walks over all &drm_gpuva structures associated with the
+ * This iterator walks over all &drm_gpuvm_bo structures associated with the
  * &drm_gem_object. It is implemented with list_for_each_entry_safe(), hence
  * it is save against removal of elements.
  */
-#define drm_gem_for_each_gpuva_safe(entry__, next__, obj__) \
-	list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, gem.entry)
+#define drm_gem_for_each_gpuvm_bo_safe(entry__, next__, obj__) \
+	list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, list.entry.gem)
 
 #endif /* __DRM_GEM_H__ */
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 4e6e1fd3485ad..b12fb22b0e226 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -25,6 +25,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/dma-resv.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <linux/types.h>
@@ -33,6 +34,7 @@
 #include <drm/drm_gem.h>
 
 struct drm_gpuvm;
+struct drm_gpuvm_bo;
 struct drm_gpuvm_ops;
 
 /**
@@ -73,6 +75,12 @@ struct drm_gpuva {
 	 */
 	struct drm_gpuvm *vm;
 
+	/**
+	 * @vm_bo: the &drm_gpuvm_bo abstraction for the mapped
+	 * &drm_gem_object
+	 */
+	struct drm_gpuvm_bo *vm_bo;
+
 	/**
 	 * @flags: the &drm_gpuva_flags for this mapping
 	 */
@@ -108,7 +116,7 @@ struct drm_gpuva {
 		struct drm_gem_object *obj;
 
 		/**
-		 * @entry: the &list_head to attach this object to a &drm_gem_object
+		 * @entry: the &list_head to attach this object to a &drm_gpuvm_bo
 		 */
 		struct list_head entry;
 	} gem;
@@ -141,7 +149,7 @@ struct drm_gpuva {
 int drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va);
 void drm_gpuva_remove(struct drm_gpuva *va);
 
-void drm_gpuva_link(struct drm_gpuva *va);
+void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo);
 void drm_gpuva_unlink(struct drm_gpuva *va);
 
 struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm,
@@ -188,10 +196,16 @@ static inline bool drm_gpuva_invalidated(struct drm_gpuva *va)
  * enum drm_gpuvm_flags - flags for struct drm_gpuvm
  */
 enum drm_gpuvm_flags {
+	/**
+	 * @DRM_GPUVM_RESV_PROTECTED: GPUVM is protected externally by the
+	 * GPUVM's &dma_resv lock
+	 */
+	DRM_GPUVM_RESV_PROTECTED = BIT(0),
+
 	/**
 	 * @DRM_GPUVM_USERBITS: user defined bits
 	 */
-	DRM_GPUVM_USERBITS = BIT(0),
+	DRM_GPUVM_USERBITS = BIT(1),
 };
 
 /**
@@ -302,6 +316,19 @@ bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
 struct drm_gem_object *
 drm_gpuvm_resv_object_alloc(struct drm_device *drm);
 
+/**
+ * drm_gpuvm_resv_protected() - indicates whether &DRM_GPUVM_RESV_PROTECTED is
+ * set
+ * @gpuvm: the &drm_gpuvm
+ *
+ * Returns: true if &DRM_GPUVM_RESV_PROTECTED is set, false otherwise.
+ */
+static inline bool
+drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm)
+{
+	return gpuvm->flags & DRM_GPUVM_RESV_PROTECTED;
+}
+
 /**
  * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv
  * @gpuvm__: the &drm_gpuvm
@@ -320,6 +347,12 @@ drm_gpuvm_resv_object_alloc(struct drm_device *drm);
  */
 #define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj)
 
+#define drm_gpuvm_resv_held(gpuvm__) \
+	dma_resv_held(drm_gpuvm_resv(gpuvm__))
+
+#define drm_gpuvm_resv_assert_held(gpuvm__) \
+	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
+
 #define drm_gpuvm_resv_held(gpuvm__) \
 	dma_resv_held(drm_gpuvm_resv(gpuvm__))
 
@@ -404,6 +437,125 @@ __drm_gpuva_next(struct drm_gpuva *va)
 #define drm_gpuvm_for_each_va_safe(va__, next__, gpuvm__) \
 	list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry)
 
+/**
+ * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and
+ * &drm_gem_object combination
+ *
+ * This structure is an abstraction representing a &drm_gpuvm and
+ * &drm_gem_object combination. It serves as an indirection to accelerate
+ * iterating all &drm_gpuvas within a &drm_gpuvm backed by the same
+ * &drm_gem_object.
+ *
+ * Furthermore it is used cache evicted GEM objects for a certain GPU-VM to
+ * accelerate validation.
+ *
+ * Typically, drivers want to create an instance of a struct drm_gpuvm_bo once
+ * a GEM object is mapped first in a GPU-VM and release the instance once the
+ * last mapping of the GEM object in this GPU-VM is unmapped.
+ */
+struct drm_gpuvm_bo {
+	/**
+	 * @vm: The &drm_gpuvm the @obj is mapped in. This is a reference
+	 * counted pointer.
+	 */
+	struct drm_gpuvm *vm;
+
+	/**
+	 * @obj: The &drm_gem_object being mapped in @vm. This is a reference
+	 * counted pointer.
+	 */
+	struct drm_gem_object *obj;
+
+	/**
+	 * @kref: The reference count for this &drm_gpuvm_bo.
+	 */
+	struct kref kref;
+
+	/**
+	 * @list: Structure containing all &list_heads.
+	 */
+	struct {
+		/**
+		 * @gpuva: The list of linked &drm_gpuvas.
+		 *
+		 * It is safe to access entries from this list as long as the
+		 * GEM's gpuva lock is held. See also struct drm_gem_object.
+		 */
+		struct list_head gpuva;
+
+		/**
+		 * @entry: Structure containing all &list_heads serving as
+		 * entry.
+		 */
+		struct {
+			/**
+			 * @gem: List entry to attach to the &drm_gem_objects
+			 * gpuva list.
+			 */
+			struct list_head gem;
+		} entry;
+	} list;
+};
+
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
+		    struct drm_gem_object *obj);
+
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm,
+		    struct drm_gem_object *obj);
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *vm_bo);
+
+/**
+ * drm_gpuvm_bo_get() - acquire a struct drm_gpuvm_bo reference
+ * @vm_bo: the &drm_gpuvm_bo to acquire the reference of
+ *
+ * This function acquires an additional reference to @vm_bo. It is illegal to
+ * call this without already holding a reference. No locks required.
+ */
+static inline struct drm_gpuvm_bo *
+drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo)
+{
+	kref_get(&vm_bo->kref);
+	return vm_bo;
+}
+
+void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
+
+struct drm_gpuvm_bo *
+drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
+		  struct drm_gem_object *obj);
+
+/**
+ * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva
+ * @va__: &drm_gpuva structure to assign to in each iteration step
+ * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with
+ *
+ * This iterator walks over all &drm_gpuva structures associated with the
+ * &drm_gpuvm_bo.
+ *
+ * The caller must hold the GEM's gpuva lock.
+ */
+#define drm_gpuvm_bo_for_each_va(va__, vm_bo__) \
+	list_for_each_entry(va__, &(vm_bo)->list.gpuva, gem.entry)
+
+/**
+ * drm_gpuvm_bo_for_each_va_safe() - iterator to safely walk over a list of
+ * &drm_gpuva
+ * @va__: &drm_gpuva structure to assign to in each iteration step
+ * @next__: &next &drm_gpuva to store the next step
+ * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with
+ *
+ * This iterator walks over all &drm_gpuva structures associated with the
+ * &drm_gpuvm_bo. It is implemented with list_for_each_entry_safe(), hence
+ * it is save against removal of elements.
+ *
+ * The caller must hold the GEM's gpuva lock.
+ */
+#define drm_gpuvm_bo_for_each_va_safe(va__, next__, vm_bo__) \
+	list_for_each_entry_safe(va__, next__, &(vm_bo)->list.gpuva, gem.entry)
+
 /**
  * enum drm_gpuva_op_type - GPU VA operation type
  *
@@ -673,8 +825,7 @@ drm_gpuvm_prefetch_ops_create(struct drm_gpuvm *gpuvm,
 				 u64 addr, u64 range);
 
 struct drm_gpuva_ops *
-drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
-			       struct drm_gem_object *obj);
+drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo);
 
 void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm,
 			struct drm_gpuva_ops *ops);
@@ -726,6 +877,30 @@ struct drm_gpuvm_ops {
 	 */
 	void (*op_free)(struct drm_gpuva_op *op);
 
+	/**
+	 * @vm_bo_alloc: called when the &drm_gpuvm allocates
+	 * a struct drm_gpuvm_bo
+	 *
+	 * Some drivers may want to embed struct drm_gpuvm_bo into driver
+	 * specific structures. By implementing this callback drivers can
+	 * allocate memory accordingly.
+	 *
+	 * This callback is optional.
+	 */
+	struct drm_gpuvm_bo *(*vm_bo_alloc)(void);
+
+	/**
+	 * @vm_bo_free: called when the &drm_gpuvm frees a
+	 * struct drm_gpuvm_bo
+	 *
+	 * Some drivers may want to embed struct drm_gpuvm_bo into driver
+	 * specific structures. By implementing this callback drivers can
+	 * free the previously allocated memory accordingly.
+	 *
+	 * This callback is optional.
+	 */
+	void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo);
+
 	/**
 	 * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the
 	 * mapping once all previous steps were completed
-- 
GitLab


From 50c1a36f594bb3dd33f3f9386c5d960cd12327d8 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 8 Nov 2023 01:12:41 +0100
Subject: [PATCH 0243/2340] drm/gpuvm: track/lock/validate external/evicted
 objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the DRM GPUVM offers common infrastructure to track GPU VA
allocations and mappings, generically connect GPU VA mappings to their
backing buffers and perform more complex mapping operations on the GPU VA
space.

However, there are more design patterns commonly used by drivers, which
can potentially be generalized in order to make the DRM GPUVM represent
a basis for GPU-VM implementations. In this context, this patch aims
at generalizing the following elements.

1) Provide a common dma-resv for GEM objects not being used outside of
   this GPU-VM.

2) Provide tracking of external GEM objects (GEM objects which are
   shared with other GPU-VMs).

3) Provide functions to efficiently lock all GEM objects dma-resv the
   GPU-VM contains mappings of.

4) Provide tracking of evicted GEM objects the GPU-VM contains mappings
   of, such that validation of evicted GEM objects is accelerated.

5) Provide some convinience functions for common patterns.

Big thanks to Boris Brezillon for his help to figure out locking for
drivers updating the GPU VA space within the fence signalling path.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-12-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c | 633 ++++++++++++++++++++++++++++++++++++
 include/drm/drm_gpuvm.h     | 250 ++++++++++++++
 2 files changed, 883 insertions(+)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 7e193bca8bab0..54f5e8851de5d 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -82,6 +82,21 @@
  * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this
  * particular combination. If not existent a new instance is created and linked
  * to the &drm_gem_object.
+ *
+ * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used
+ * as entry for the &drm_gpuvm's lists of external and evicted objects. Those
+ * lists are maintained in order to accelerate locking of dma-resv locks and
+ * validation of evicted objects bound in a &drm_gpuvm. For instance, all
+ * &drm_gem_object's &dma_resv of a given &drm_gpuvm can be locked by calling
+ * drm_gpuvm_exec_lock(). Once locked drivers can call drm_gpuvm_validate() in
+ * order to validate all evicted &drm_gem_objects. It is also possible to lock
+ * additional &drm_gem_objects by providing the corresponding parameters to
+ * drm_gpuvm_exec_lock() as well as open code the &drm_exec loop while making
+ * use of helper functions such as drm_gpuvm_prepare_range() or
+ * drm_gpuvm_prepare_objects().
+ *
+ * Every bound &drm_gem_object is treated as external object when its &dma_resv
+ * structure is different than the &drm_gpuvm's common &dma_resv structure.
  */
 
 /**
@@ -429,6 +444,20 @@
  * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and
  * &drm_gem_object must be able to observe previous creations and destructions
  * of &drm_gpuvm_bos in order to keep instances unique.
+ *
+ * The &drm_gpuvm's lists for keeping track of external and evicted objects are
+ * protected against concurrent insertion / removal and iteration internally.
+ *
+ * However, drivers still need ensure to protect concurrent calls to functions
+ * iterating those lists, namely drm_gpuvm_prepare_objects() and
+ * drm_gpuvm_validate().
+ *
+ * Alternatively, drivers can set the &DRM_GPUVM_RESV_PROTECTED flag to indicate
+ * that the corresponding &dma_resv locks are held in order to protect the
+ * lists. If &DRM_GPUVM_RESV_PROTECTED is set, internal locking is disabled and
+ * the corresponding lockdep checks are enabled. This is an optimization for
+ * drivers which are capable of taking the corresponding &dma_resv locks and
+ * hence do not require internal locking.
  */
 
 /**
@@ -641,6 +670,201 @@
  *	}
  */
 
+/**
+ * get_next_vm_bo_from_list() - get the next vm_bo element
+ * @__gpuvm: the &drm_gpuvm
+ * @__list_name: the name of the list we're iterating on
+ * @__local_list: a pointer to the local list used to store already iterated items
+ * @__prev_vm_bo: the previous element we got from get_next_vm_bo_from_list()
+ *
+ * This helper is here to provide lockless list iteration. Lockless as in, the
+ * iterator releases the lock immediately after picking the first element from
+ * the list, so list insertion deletion can happen concurrently.
+ *
+ * Elements popped from the original list are kept in a local list, so removal
+ * and is_empty checks can still happen while we're iterating the list.
+ */
+#define get_next_vm_bo_from_list(__gpuvm, __list_name, __local_list, __prev_vm_bo)	\
+	({										\
+		struct drm_gpuvm_bo *__vm_bo = NULL;					\
+											\
+		drm_gpuvm_bo_put(__prev_vm_bo);						\
+											\
+		spin_lock(&(__gpuvm)->__list_name.lock);				\
+		if (!(__gpuvm)->__list_name.local_list)					\
+			(__gpuvm)->__list_name.local_list = __local_list;		\
+		else									\
+			drm_WARN_ON((__gpuvm)->drm,					\
+				    (__gpuvm)->__list_name.local_list != __local_list);	\
+											\
+		while (!list_empty(&(__gpuvm)->__list_name.list)) {			\
+			__vm_bo = list_first_entry(&(__gpuvm)->__list_name.list,	\
+						   struct drm_gpuvm_bo,			\
+						   list.entry.__list_name);		\
+			if (kref_get_unless_zero(&__vm_bo->kref)) {			\
+				list_move_tail(&(__vm_bo)->list.entry.__list_name,	\
+					       __local_list);				\
+				break;							\
+			} else {							\
+				list_del_init(&(__vm_bo)->list.entry.__list_name);	\
+				__vm_bo = NULL;						\
+			}								\
+		}									\
+		spin_unlock(&(__gpuvm)->__list_name.lock);				\
+											\
+		__vm_bo;								\
+	})
+
+/**
+ * for_each_vm_bo_in_list() - internal vm_bo list iterator
+ * @__gpuvm: the &drm_gpuvm
+ * @__list_name: the name of the list we're iterating on
+ * @__local_list: a pointer to the local list used to store already iterated items
+ * @__vm_bo: the struct drm_gpuvm_bo to assign in each iteration step
+ *
+ * This helper is here to provide lockless list iteration. Lockless as in, the
+ * iterator releases the lock immediately after picking the first element from the
+ * list, hence list insertion and deletion can happen concurrently.
+ *
+ * It is not allowed to re-assign the vm_bo pointer from inside this loop.
+ *
+ * Typical use:
+ *
+ *	struct drm_gpuvm_bo *vm_bo;
+ *	LIST_HEAD(my_local_list);
+ *
+ *	ret = 0;
+ *	for_each_vm_bo_in_list(gpuvm, <list_name>, &my_local_list, vm_bo) {
+ *		ret = do_something_with_vm_bo(..., vm_bo);
+ *		if (ret)
+ *			break;
+ *	}
+ *	// Drop ref in case we break out of the loop.
+ *	drm_gpuvm_bo_put(vm_bo);
+ *	restore_vm_bo_list(gpuvm, <list_name>, &my_local_list);
+ *
+ *
+ * Only used for internal list iterations, not meant to be exposed to the outside
+ * world.
+ */
+#define for_each_vm_bo_in_list(__gpuvm, __list_name, __local_list, __vm_bo)	\
+	for (__vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name,		\
+						__local_list, NULL);		\
+	     __vm_bo;								\
+	     __vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name,		\
+						__local_list, __vm_bo))
+
+static void
+__restore_vm_bo_list(struct drm_gpuvm *gpuvm, spinlock_t *lock,
+		     struct list_head *list, struct list_head **local_list)
+{
+	/* Merge back the two lists, moving local list elements to the
+	 * head to preserve previous ordering, in case it matters.
+	 */
+	spin_lock(lock);
+	if (*local_list) {
+		list_splice(*local_list, list);
+		*local_list = NULL;
+	}
+	spin_unlock(lock);
+}
+
+/**
+ * restore_vm_bo_list() - move vm_bo elements back to their original list
+ * @__gpuvm: the &drm_gpuvm
+ * @__list_name: the name of the list we're iterating on
+ *
+ * When we're done iterating a vm_bo list, we should call restore_vm_bo_list()
+ * to restore the original state and let new iterations take place.
+ */
+#define restore_vm_bo_list(__gpuvm, __list_name)			\
+	__restore_vm_bo_list((__gpuvm), &(__gpuvm)->__list_name.lock,	\
+			     &(__gpuvm)->__list_name.list,		\
+			     &(__gpuvm)->__list_name.local_list)
+
+static void
+cond_spin_lock(spinlock_t *lock, bool cond)
+{
+	if (cond)
+		spin_lock(lock);
+}
+
+static void
+cond_spin_unlock(spinlock_t *lock, bool cond)
+{
+	if (cond)
+		spin_unlock(lock);
+}
+
+static void
+__drm_gpuvm_bo_list_add(struct drm_gpuvm *gpuvm, spinlock_t *lock,
+			struct list_head *entry, struct list_head *list)
+{
+	cond_spin_lock(lock, !!lock);
+	if (list_empty(entry))
+		list_add_tail(entry, list);
+	cond_spin_unlock(lock, !!lock);
+}
+
+/**
+ * drm_gpuvm_bo_list_add() - insert a vm_bo into the given list
+ * @__vm_bo: the &drm_gpuvm_bo
+ * @__list_name: the name of the list to insert into
+ * @__lock: whether to lock with the internal spinlock
+ *
+ * Inserts the given @__vm_bo into the list specified by @__list_name.
+ */
+#define drm_gpuvm_bo_list_add(__vm_bo, __list_name, __lock)			\
+	__drm_gpuvm_bo_list_add((__vm_bo)->vm,					\
+				__lock ? &(__vm_bo)->vm->__list_name.lock :	\
+					 NULL,					\
+				&(__vm_bo)->list.entry.__list_name,		\
+				&(__vm_bo)->vm->__list_name.list)
+
+static void
+__drm_gpuvm_bo_list_del(struct drm_gpuvm *gpuvm, spinlock_t *lock,
+			struct list_head *entry, bool init)
+{
+	cond_spin_lock(lock, !!lock);
+	if (init) {
+		if (!list_empty(entry))
+			list_del_init(entry);
+	} else {
+		list_del(entry);
+	}
+	cond_spin_unlock(lock, !!lock);
+}
+
+/**
+ * drm_gpuvm_bo_list_del_init() - remove a vm_bo from the given list
+ * @__vm_bo: the &drm_gpuvm_bo
+ * @__list_name: the name of the list to insert into
+ * @__lock: whether to lock with the internal spinlock
+ *
+ * Removes the given @__vm_bo from the list specified by @__list_name.
+ */
+#define drm_gpuvm_bo_list_del_init(__vm_bo, __list_name, __lock)		\
+	__drm_gpuvm_bo_list_del((__vm_bo)->vm,					\
+				__lock ? &(__vm_bo)->vm->__list_name.lock :	\
+					 NULL,					\
+				&(__vm_bo)->list.entry.__list_name,		\
+				true)
+
+/**
+ * drm_gpuvm_bo_list_del() - remove a vm_bo from the given list
+ * @__vm_bo: the &drm_gpuvm_bo
+ * @__list_name: the name of the list to insert into
+ * @__lock: whether to lock with the internal spinlock
+ *
+ * Removes the given @__vm_bo from the list specified by @__list_name.
+ */
+#define drm_gpuvm_bo_list_del(__vm_bo, __list_name, __lock)			\
+	__drm_gpuvm_bo_list_del((__vm_bo)->vm,					\
+				__lock ? &(__vm_bo)->vm->__list_name.lock :	\
+					 NULL,					\
+				&(__vm_bo)->list.entry.__list_name,		\
+				false)
+
 #define to_drm_gpuva(__node)	container_of((__node), struct drm_gpuva, rb.node)
 
 #define GPUVA_START(node) ((node)->va.addr)
@@ -781,6 +1005,12 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
 	gpuvm->rb.tree = RB_ROOT_CACHED;
 	INIT_LIST_HEAD(&gpuvm->rb.list);
 
+	INIT_LIST_HEAD(&gpuvm->extobj.list);
+	spin_lock_init(&gpuvm->extobj.lock);
+
+	INIT_LIST_HEAD(&gpuvm->evict.list);
+	spin_lock_init(&gpuvm->evict.lock);
+
 	kref_init(&gpuvm->kref);
 
 	gpuvm->name = name ? name : "unknown";
@@ -818,6 +1048,11 @@ drm_gpuvm_fini(struct drm_gpuvm *gpuvm)
 	drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
 		 "GPUVA tree is not empty, potentially leaking memory.\n");
 
+	drm_WARN(gpuvm->drm, !list_empty(&gpuvm->extobj.list),
+		 "Extobj list should be empty.\n");
+	drm_WARN(gpuvm->drm, !list_empty(&gpuvm->evict.list),
+		 "Evict list should be empty.\n");
+
 	drm_gem_object_put(gpuvm->r_obj);
 }
 
@@ -850,6 +1085,343 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_put);
 
+static int
+__drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
+			    struct drm_exec *exec,
+			    unsigned int num_fences)
+{
+	struct drm_gpuvm_bo *vm_bo;
+	LIST_HEAD(extobjs);
+	int ret = 0;
+
+	for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) {
+		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
+		if (ret)
+			break;
+	}
+	/* Drop ref in case we break out of the loop. */
+	drm_gpuvm_bo_put(vm_bo);
+	restore_vm_bo_list(gpuvm, extobj);
+
+	return ret;
+}
+
+static int
+drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm,
+				 struct drm_exec *exec,
+				 unsigned int num_fences)
+{
+	struct drm_gpuvm_bo *vm_bo;
+	int ret = 0;
+
+	drm_gpuvm_resv_assert_held(gpuvm);
+	list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) {
+		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
+		if (ret)
+			break;
+
+		if (vm_bo->evicted)
+			drm_gpuvm_bo_list_add(vm_bo, evict, false);
+	}
+
+	return ret;
+}
+
+/**
+ * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs
+ * @gpuvm: the &drm_gpuvm
+ * @exec: the &drm_exec locking context
+ * @num_fences: the amount of &dma_fences to reserve
+ *
+ * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given
+ * &drm_gpuvm contains mappings of.
+ *
+ * Using this function directly, it is the drivers responsibility to call
+ * drm_exec_init() and drm_exec_fini() accordingly.
+ *
+ * Note: This function is safe against concurrent insertion and removal of
+ * external objects, however it is not safe against concurrent usage itself.
+ *
+ * Drivers need to make sure to protect this case with either an outer VM lock
+ * or by calling drm_gpuvm_prepare_vm() before this function within the
+ * drm_exec_until_all_locked() loop, such that the GPUVM's dma-resv lock ensures
+ * mutual exclusion.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
+			  struct drm_exec *exec,
+			  unsigned int num_fences)
+{
+	if (drm_gpuvm_resv_protected(gpuvm))
+		return drm_gpuvm_prepare_objects_locked(gpuvm, exec,
+							num_fences);
+	else
+		return __drm_gpuvm_prepare_objects(gpuvm, exec, num_fences);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects);
+
+/**
+ * drm_gpuvm_prepare_range() - prepare all BOs mapped within a given range
+ * @gpuvm: the &drm_gpuvm
+ * @exec: the &drm_exec locking context
+ * @addr: the start address within the VA space
+ * @range: the range to iterate within the VA space
+ * @num_fences: the amount of &dma_fences to reserve
+ *
+ * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr
+ * and @addr + @range.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec,
+			u64 addr, u64 range, unsigned int num_fences)
+{
+	struct drm_gpuva *va;
+	u64 end = addr + range;
+	int ret;
+
+	drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) {
+		struct drm_gem_object *obj = va->gem.obj;
+
+		ret = drm_exec_prepare_obj(exec, obj, num_fences);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range);
+
+/**
+ * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs
+ * @vm_exec: the &drm_gpuvm_exec wrapper
+ *
+ * Acquires all dma-resv locks of all &drm_gem_objects the given
+ * &drm_gpuvm contains mappings of.
+ *
+ * Addionally, when calling this function with struct drm_gpuvm_exec::extra
+ * being set the driver receives the given @fn callback to lock additional
+ * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers
+ * would call drm_exec_prepare_obj() from within this callback.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec)
+{
+	struct drm_gpuvm *gpuvm = vm_exec->vm;
+	struct drm_exec *exec = &vm_exec->exec;
+	unsigned int num_fences = vm_exec->num_fences;
+	int ret;
+
+	drm_exec_init(exec, vm_exec->flags);
+
+	drm_exec_until_all_locked(exec) {
+		ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences);
+		drm_exec_retry_on_contention(exec);
+		if (ret)
+			goto err;
+
+		ret = drm_gpuvm_prepare_objects(gpuvm, exec, num_fences);
+		drm_exec_retry_on_contention(exec);
+		if (ret)
+			goto err;
+
+		if (vm_exec->extra.fn) {
+			ret = vm_exec->extra.fn(vm_exec);
+			drm_exec_retry_on_contention(exec);
+			if (ret)
+				goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	drm_exec_fini(exec);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock);
+
+static int
+fn_lock_array(struct drm_gpuvm_exec *vm_exec)
+{
+	struct {
+		struct drm_gem_object **objs;
+		unsigned int num_objs;
+	} *args = vm_exec->extra.priv;
+
+	return drm_exec_prepare_array(&vm_exec->exec, args->objs,
+				      args->num_objs, vm_exec->num_fences);
+}
+
+/**
+ * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs
+ * @vm_exec: the &drm_gpuvm_exec wrapper
+ * @objs: additional &drm_gem_objects to lock
+ * @num_objs: the number of additional &drm_gem_objects to lock
+ *
+ * Acquires all dma-resv locks of all &drm_gem_objects the given &drm_gpuvm
+ * contains mappings of, plus the ones given through @objs.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec,
+			  struct drm_gem_object **objs,
+			  unsigned int num_objs)
+{
+	struct {
+		struct drm_gem_object **objs;
+		unsigned int num_objs;
+	} args;
+
+	args.objs = objs;
+	args.num_objs = num_objs;
+
+	vm_exec->extra.fn = fn_lock_array;
+	vm_exec->extra.priv = &args;
+
+	return drm_gpuvm_exec_lock(vm_exec);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_array);
+
+/**
+ * drm_gpuvm_exec_lock_range() - prepare all BOs mapped within a given range
+ * @vm_exec: the &drm_gpuvm_exec wrapper
+ * @addr: the start address within the VA space
+ * @range: the range to iterate within the VA space
+ *
+ * Acquires all dma-resv locks of all &drm_gem_objects mapped between @addr and
+ * @addr + @range.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec,
+			  u64 addr, u64 range)
+{
+	struct drm_gpuvm *gpuvm = vm_exec->vm;
+	struct drm_exec *exec = &vm_exec->exec;
+	int ret;
+
+	drm_exec_init(exec, vm_exec->flags);
+
+	drm_exec_until_all_locked(exec) {
+		ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range,
+					      vm_exec->num_fences);
+		drm_exec_retry_on_contention(exec);
+		if (ret)
+			goto err;
+	}
+
+	return ret;
+
+err:
+	drm_exec_fini(exec);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_range);
+
+static int
+__drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec)
+{
+	const struct drm_gpuvm_ops *ops = gpuvm->ops;
+	struct drm_gpuvm_bo *vm_bo;
+	LIST_HEAD(evict);
+	int ret = 0;
+
+	for_each_vm_bo_in_list(gpuvm, evict, &evict, vm_bo) {
+		ret = ops->vm_bo_validate(vm_bo, exec);
+		if (ret)
+			break;
+	}
+	/* Drop ref in case we break out of the loop. */
+	drm_gpuvm_bo_put(vm_bo);
+	restore_vm_bo_list(gpuvm, evict);
+
+	return ret;
+}
+
+static int
+drm_gpuvm_validate_locked(struct drm_gpuvm *gpuvm, struct drm_exec *exec)
+{
+	const struct drm_gpuvm_ops *ops = gpuvm->ops;
+	struct drm_gpuvm_bo *vm_bo, *next;
+	int ret = 0;
+
+	drm_gpuvm_resv_assert_held(gpuvm);
+
+	list_for_each_entry_safe(vm_bo, next, &gpuvm->evict.list,
+				 list.entry.evict) {
+		ret = ops->vm_bo_validate(vm_bo, exec);
+		if (ret)
+			break;
+
+		dma_resv_assert_held(vm_bo->obj->resv);
+		if (!vm_bo->evicted)
+			drm_gpuvm_bo_list_del_init(vm_bo, evict, false);
+	}
+
+	return ret;
+}
+
+/**
+ * drm_gpuvm_validate() - validate all BOs marked as evicted
+ * @gpuvm: the &drm_gpuvm to validate evicted BOs
+ * @exec: the &drm_exec instance used for locking the GPUVM
+ *
+ * Calls the &drm_gpuvm_ops::vm_bo_validate callback for all evicted buffer
+ * objects being mapped in the given &drm_gpuvm.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec)
+{
+	const struct drm_gpuvm_ops *ops = gpuvm->ops;
+
+	if (unlikely(!ops || !ops->vm_bo_validate))
+		return -EOPNOTSUPP;
+
+	if (drm_gpuvm_resv_protected(gpuvm))
+		return drm_gpuvm_validate_locked(gpuvm, exec);
+	else
+		return __drm_gpuvm_validate(gpuvm, exec);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_validate);
+
+/**
+ * drm_gpuvm_resv_add_fence - add fence to private and all extobj
+ * dma-resv
+ * @gpuvm: the &drm_gpuvm to add a fence to
+ * @exec: the &drm_exec locking context
+ * @fence: fence to add
+ * @private_usage: private dma-resv usage
+ * @extobj_usage: extobj dma-resv usage
+ */
+void
+drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm,
+			 struct drm_exec *exec,
+			 struct dma_fence *fence,
+			 enum dma_resv_usage private_usage,
+			 enum dma_resv_usage extobj_usage)
+{
+	struct drm_gem_object *obj;
+	unsigned long index;
+
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		dma_resv_assert_held(obj->resv);
+		dma_resv_add_fence(obj->resv, fence,
+				   drm_gpuvm_is_extobj(gpuvm, obj) ?
+				   extobj_usage : private_usage);
+	}
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_resv_add_fence);
+
 /**
  * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo
  * @gpuvm: The &drm_gpuvm the @obj is mapped in.
@@ -883,6 +1455,9 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
 	INIT_LIST_HEAD(&vm_bo->list.gpuva);
 	INIT_LIST_HEAD(&vm_bo->list.entry.gem);
 
+	INIT_LIST_HEAD(&vm_bo->list.entry.extobj);
+	INIT_LIST_HEAD(&vm_bo->list.entry.evict);
+
 	return vm_bo;
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
@@ -900,6 +1475,9 @@ drm_gpuvm_bo_destroy(struct kref *kref)
 	if (!lock)
 		drm_gpuvm_resv_assert_held(gpuvm);
 
+	drm_gpuvm_bo_list_del(vm_bo, extobj, lock);
+	drm_gpuvm_bo_list_del(vm_bo, evict, lock);
+
 	drm_gem_gpuva_assert_lock_held(obj);
 	list_del(&vm_bo->list.entry.gem);
 
@@ -1043,6 +1621,61 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo)
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc);
 
+/**
+ * drm_gpuvm_bo_extobj_add() - adds the &drm_gpuvm_bo to its &drm_gpuvm's
+ * extobj list
+ * @vm_bo: The &drm_gpuvm_bo to add to its &drm_gpuvm's the extobj list.
+ *
+ * Adds the given @vm_bo to its &drm_gpuvm's extobj list if not on the list
+ * already and if the corresponding &drm_gem_object is an external object,
+ * actually.
+ */
+void
+drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo)
+{
+	struct drm_gpuvm *gpuvm = vm_bo->vm;
+	bool lock = !drm_gpuvm_resv_protected(gpuvm);
+
+	if (!lock)
+		drm_gpuvm_resv_assert_held(gpuvm);
+
+	if (drm_gpuvm_is_extobj(gpuvm, vm_bo->obj))
+		drm_gpuvm_bo_list_add(vm_bo, extobj, lock);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add);
+
+/**
+ * drm_gpuvm_bo_evict() - add / remove a &drm_gpuvm_bo to / from the &drm_gpuvms
+ * evicted list
+ * @vm_bo: the &drm_gpuvm_bo to add or remove
+ * @evict: indicates whether the object is evicted
+ *
+ * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list.
+ */
+void
+drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict)
+{
+	struct drm_gpuvm *gpuvm = vm_bo->vm;
+	struct drm_gem_object *obj = vm_bo->obj;
+	bool lock = !drm_gpuvm_resv_protected(gpuvm);
+
+	dma_resv_assert_held(obj->resv);
+	vm_bo->evicted = evict;
+
+	/* Can't add external objects to the evicted list directly if not using
+	 * internal spinlocks, since in this case the evicted list is protected
+	 * with the VM's common dma-resv lock.
+	 */
+	if (drm_gpuvm_is_extobj(gpuvm, obj) && !lock)
+		return;
+
+	if (evict)
+		drm_gpuvm_bo_list_add(vm_bo, evict, lock);
+	else
+		drm_gpuvm_bo_list_del_init(vm_bo, evict, lock);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_bo_evict);
+
 static int
 __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
 		   struct drm_gpuva *va)
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index b12fb22b0e226..8ca10461d8ac8 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -32,6 +32,7 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_exec.h>
 
 struct drm_gpuvm;
 struct drm_gpuvm_bo;
@@ -283,6 +284,50 @@ struct drm_gpuvm {
 	 * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv.
 	 */
 	struct drm_gem_object *r_obj;
+
+	/**
+	 * @extobj: structure holding the extobj list
+	 */
+	struct {
+		/**
+		 * @list: &list_head storing &drm_gpuvm_bos serving as
+		 * external object
+		 */
+		struct list_head list;
+
+		/**
+		 * @local_list: pointer to the local list temporarily storing
+		 * entries from the external object list
+		 */
+		struct list_head *local_list;
+
+		/**
+		 * @lock: spinlock to protect the extobj list
+		 */
+		spinlock_t lock;
+	} extobj;
+
+	/**
+	 * @evict: structure holding the evict list and evict list lock
+	 */
+	struct {
+		/**
+		 * @list: &list_head storing &drm_gpuvm_bos currently being
+		 * evicted
+		 */
+		struct list_head list;
+
+		/**
+		 * @local_list: pointer to the local list temporarily storing
+		 * entries from the evicted object list
+		 */
+		struct list_head *local_list;
+
+		/**
+		 * @lock: spinlock to protect the evict list
+		 */
+		spinlock_t lock;
+	} evict;
 };
 
 void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
@@ -359,6 +404,22 @@ drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm)
 #define drm_gpuvm_resv_assert_held(gpuvm__) \
 	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
 
+/**
+ * drm_gpuvm_is_extobj() - indicates whether the given &drm_gem_object is an
+ * external object
+ * @gpuvm: the &drm_gpuvm to check
+ * @obj: the &drm_gem_object to check
+ *
+ * Returns: true if the &drm_gem_object &dma_resv differs from the
+ * &drm_gpuvms &dma_resv, false otherwise
+ */
+static inline bool
+drm_gpuvm_is_extobj(struct drm_gpuvm *gpuvm,
+		    struct drm_gem_object *obj)
+{
+	return obj && obj->resv != drm_gpuvm_resv(gpuvm);
+}
+
 static inline struct drm_gpuva *
 __drm_gpuva_next(struct drm_gpuva *va)
 {
@@ -437,6 +498,144 @@ __drm_gpuva_next(struct drm_gpuva *va)
 #define drm_gpuvm_for_each_va_safe(va__, next__, gpuvm__) \
 	list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry)
 
+/**
+ * struct drm_gpuvm_exec - &drm_gpuvm abstraction of &drm_exec
+ *
+ * This structure should be created on the stack as &drm_exec should be.
+ *
+ * Optionally, @extra can be set in order to lock additional &drm_gem_objects.
+ */
+struct drm_gpuvm_exec {
+	/**
+	 * @exec: the &drm_exec structure
+	 */
+	struct drm_exec exec;
+
+	/**
+	 * @flags: the flags for the struct drm_exec
+	 */
+	uint32_t flags;
+
+	/**
+	 * @vm: the &drm_gpuvm to lock its DMA reservations
+	 */
+	struct drm_gpuvm *vm;
+
+	/**
+	 * @num_fences: the number of fences to reserve for the &dma_resv of the
+	 * locked &drm_gem_objects
+	 */
+	unsigned int num_fences;
+
+	/**
+	 * @extra: Callback and corresponding private data for the driver to
+	 * lock arbitrary additional &drm_gem_objects.
+	 */
+	struct {
+		/**
+		 * @fn: The driver callback to lock additional &drm_gem_objects.
+		 */
+		int (*fn)(struct drm_gpuvm_exec *vm_exec);
+
+		/**
+		 * @priv: driver private data for the @fn callback
+		 */
+		void *priv;
+	} extra;
+};
+
+/**
+ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
+ * @gpuvm: the &drm_gpuvm
+ * @exec: the &drm_exec context
+ * @num_fences: the amount of &dma_fences to reserve
+ *
+ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object.
+ *
+ * Using this function directly, it is the drivers responsibility to call
+ * drm_exec_init() and drm_exec_fini() accordingly.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+static inline int
+drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
+		     struct drm_exec *exec,
+		     unsigned int num_fences)
+{
+	return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
+}
+
+int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
+			      struct drm_exec *exec,
+			      unsigned int num_fences);
+
+int drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm,
+			    struct drm_exec *exec,
+			    u64 addr, u64 range,
+			    unsigned int num_fences);
+
+int drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec);
+
+int drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec,
+			      struct drm_gem_object **objs,
+			      unsigned int num_objs);
+
+int drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec,
+			      u64 addr, u64 range);
+
+/**
+ * drm_gpuvm_exec_unlock() - lock all dma-resv of all assoiciated BOs
+ * @vm_exec: the &drm_gpuvm_exec wrapper
+ *
+ * Releases all dma-resv locks of all &drm_gem_objects previously acquired
+ * through drm_gpuvm_exec_lock() or its variants.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+static inline void
+drm_gpuvm_exec_unlock(struct drm_gpuvm_exec *vm_exec)
+{
+	drm_exec_fini(&vm_exec->exec);
+}
+
+int drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec);
+void drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm,
+			      struct drm_exec *exec,
+			      struct dma_fence *fence,
+			      enum dma_resv_usage private_usage,
+			      enum dma_resv_usage extobj_usage);
+
+/**
+ * drm_gpuvm_exec_resv_add_fence()
+ * @vm_exec: the &drm_gpuvm_exec wrapper
+ * @fence: fence to add
+ * @private_usage: private dma-resv usage
+ * @extobj_usage: extobj dma-resv usage
+ *
+ * See drm_gpuvm_resv_add_fence().
+ */
+static inline void
+drm_gpuvm_exec_resv_add_fence(struct drm_gpuvm_exec *vm_exec,
+			      struct dma_fence *fence,
+			      enum dma_resv_usage private_usage,
+			      enum dma_resv_usage extobj_usage)
+{
+	drm_gpuvm_resv_add_fence(vm_exec->vm, &vm_exec->exec, fence,
+				 private_usage, extobj_usage);
+}
+
+/**
+ * drm_gpuvm_exec_validate()
+ * @vm_exec: the &drm_gpuvm_exec wrapper
+ *
+ * See drm_gpuvm_validate().
+ */
+static inline int
+drm_gpuvm_exec_validate(struct drm_gpuvm_exec *vm_exec)
+{
+	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+}
+
 /**
  * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and
  * &drm_gem_object combination
@@ -466,6 +665,12 @@ struct drm_gpuvm_bo {
 	 */
 	struct drm_gem_object *obj;
 
+	/**
+	 * @evicted: Indicates whether the &drm_gem_object is evicted; field
+	 * protected by the &drm_gem_object's dma-resv lock.
+	 */
+	bool evicted;
+
 	/**
 	 * @kref: The reference count for this &drm_gpuvm_bo.
 	 */
@@ -493,6 +698,18 @@ struct drm_gpuvm_bo {
 			 * gpuva list.
 			 */
 			struct list_head gem;
+
+			/**
+			 * @evict: List entry to attach to the &drm_gpuvms
+			 * extobj list.
+			 */
+			struct list_head extobj;
+
+			/**
+			 * @evict: List entry to attach to the &drm_gpuvms evict
+			 * list.
+			 */
+			struct list_head evict;
 		} entry;
 	} list;
 };
@@ -527,6 +744,27 @@ struct drm_gpuvm_bo *
 drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
 		  struct drm_gem_object *obj);
 
+void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict);
+
+/**
+ * drm_gpuvm_bo_gem_evict()
+ * @obj: the &drm_gem_object
+ * @evict: indicates whether @obj is evicted
+ *
+ * See drm_gpuvm_bo_evict().
+ */
+static inline void
+drm_gpuvm_bo_gem_evict(struct drm_gem_object *obj, bool evict)
+{
+	struct drm_gpuvm_bo *vm_bo;
+
+	drm_gem_gpuva_assert_lock_held(obj);
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj)
+		drm_gpuvm_bo_evict(vm_bo, evict);
+}
+
+void drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo);
+
 /**
  * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva
  * @va__: &drm_gpuva structure to assign to in each iteration step
@@ -901,6 +1139,18 @@ struct drm_gpuvm_ops {
 	 */
 	void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo);
 
+	/**
+	 * @vm_bo_validate: called from drm_gpuvm_validate()
+	 *
+	 * Drivers receive this callback for every evicted &drm_gem_object being
+	 * mapped in the corresponding &drm_gpuvm.
+	 *
+	 * Typically, drivers would call their driver specific variant of
+	 * ttm_bo_validate() from within this callback.
+	 */
+	int (*vm_bo_validate)(struct drm_gpuvm_bo *vm_bo,
+			      struct drm_exec *exec);
+
 	/**
 	 * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the
 	 * mapping once all previous steps were completed
-- 
GitLab


From b0e396d68fef9c9c050dfbb590cc0066441f65c7 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Fri, 10 Nov 2023 20:01:57 -0500
Subject: [PATCH 0244/2340] Revert "drm/sched: Define pr_fmt() for DRM using
 pr_*()"

From Jani:
The drm_print.[ch] facilities use very few pr_*() calls directly. The
users of pr_*() calls do not necessarily include <drm/drm_print.h> at
all, and really don't have to.

Even the ones that do include it, usually have <linux/...> includes
first, and <drm/...> includes next. Notably, <linux/kernel.h> includes
<linux/printk.h>.

And, of course, <linux/printk.h> defines pr_fmt() itself if not already
defined.

No, it's encouraged not to use pr_*() at all, and prefer drm device
based logging, or device based logging.

This reverts commit 36245bd02e88e68ac5955c2958c968879d7b75a9.

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/878r75wzm9.fsf@intel.com
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Jani Nikula <jani.nikula@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231111024130.11464-2-ltuikov89@gmail.com
---
 include/drm/drm_print.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index e8fe60d0eb878..a93a387f8a1a1 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -26,20 +26,6 @@
 #ifndef DRM_PRINT_H_
 #define DRM_PRINT_H_
 
-/* Define this before including linux/printk.h, so that the format
- * string in pr_*() macros is correctly set for DRM. If a file wants
- * to define this to something else, it should do so before including
- * this header file.
- *
- * It is encouraged code using pr_err() to prefix their format with
- * the string "*ERROR* ", to make it easier to scan kernel logs. For
- * instance,
- *   pr_err("*ERROR* <the rest of your format string here>", args).
- */
-#ifndef pr_fmt
-#define pr_fmt(fmt) "[drm] " fmt
-#endif
-
 #include <linux/compiler.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
-- 
GitLab


From 38b2d9d385102f430eb023aee1ed0ed37d9173f5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Oct 2023 16:06:30 +0200
Subject: [PATCH 0245/2340] drm/format-helper: Cache buffers with struct
 drm_format_conv_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hold temporary memory for format conversion in an instance of struct
drm_format_conv_state. Update internal helpers of DRM's format-conversion
code accordingly. Drivers will later be able to maintain this cache by
themselves.

Besides caching, struct drm_format_conv_state will be useful to hold
additional information for format conversion, such as palette data or
foreground/background colors. This will enable conversion from indexed
color formats to component-based formats.

v5:
	* improve documentation (Javier, Noralf)
v3:
	* rename struct drm_xfrm_buf to struct drm_format_conv_state
	  (Javier)
	* remove managed cleanup
	* add drm_format_conv_state_copy() for shadow-plane support

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Noralf Trønnes <noralf@tronnes.org>
Tested-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-2-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_format_helper.c | 118 +++++++++++++++++++++++++---
 include/drm/drm_format_helper.h     |  51 ++++++++++++
 2 files changed, 158 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index f93a4efcee909..28d013d427788 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -20,6 +20,97 @@
 #include <drm/drm_print.h>
 #include <drm/drm_rect.h>
 
+/**
+ * drm_format_conv_state_init - Initialize format-conversion state
+ * @state: The state to initialize
+ *
+ * Clears all fields in struct drm_format_conv_state. The state will
+ * be empty with no preallocated resources.
+ */
+void drm_format_conv_state_init(struct drm_format_conv_state *state)
+{
+	state->tmp.mem = NULL;
+	state->tmp.size = 0;
+	state->tmp.preallocated = false;
+}
+EXPORT_SYMBOL(drm_format_conv_state_init);
+
+/**
+ * drm_format_conv_state_copy - Copy format-conversion state
+ * @state: Destination state
+ * @old_state: Source state
+ *
+ * Copies format-conversion state from @old_state to @state; except for
+ * temporary storage.
+ */
+void drm_format_conv_state_copy(struct drm_format_conv_state *state,
+				const struct drm_format_conv_state *old_state)
+{
+	/*
+	 * So far, there's only temporary storage here, which we don't
+	 * duplicate. Just clear the fields.
+	 */
+	state->tmp.mem = NULL;
+	state->tmp.size = 0;
+	state->tmp.preallocated = false;
+}
+EXPORT_SYMBOL(drm_format_conv_state_copy);
+
+/**
+ * drm_format_conv_state_reserve - Allocates storage for format conversion
+ * @state: The format-conversion state
+ * @new_size: The minimum allocation size
+ * @flags: Flags for kmalloc()
+ *
+ * Allocates at least @new_size bytes and returns a pointer to the memory
+ * range. After calling this function, previously returned memory blocks
+ * are invalid. It's best to collect all memory requirements of a format
+ * conversion and call this function once to allocate the range.
+ *
+ * Returns:
+ * A pointer to the allocated memory range, or NULL otherwise.
+ */
+void *drm_format_conv_state_reserve(struct drm_format_conv_state *state,
+				    size_t new_size, gfp_t flags)
+{
+	void *mem;
+
+	if (new_size <= state->tmp.size)
+		goto out;
+	else if (state->tmp.preallocated)
+		return NULL;
+
+	mem = krealloc(state->tmp.mem, new_size, flags);
+	if (!mem)
+		return NULL;
+
+	state->tmp.mem = mem;
+	state->tmp.size = new_size;
+
+out:
+	return state->tmp.mem;
+}
+EXPORT_SYMBOL(drm_format_conv_state_reserve);
+
+/**
+ * drm_format_conv_state_release - Releases an format-conversion storage
+ * @state: The format-conversion state
+ *
+ * Releases the memory range references by the format-conversion state.
+ * After this call, all pointers to the memory are invalid. Prefer
+ * drm_format_conv_state_init() for cleaning up and unloading a driver.
+ */
+void drm_format_conv_state_release(struct drm_format_conv_state *state)
+{
+	if (state->tmp.preallocated)
+		return;
+
+	kfree(state->tmp.mem);
+	state->tmp.mem = NULL;
+	state->tmp.size = 0;
+}
+EXPORT_SYMBOL(drm_format_conv_state_release);
+
 static unsigned int clip_offset(const struct drm_rect *clip, unsigned int pitch, unsigned int cpp)
 {
 	return clip->y1 * pitch + clip->x1 * cpp;
@@ -45,6 +136,7 @@ EXPORT_SYMBOL(drm_fb_clip_offset);
 static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_pixsize,
 			 const void *vaddr, const struct drm_framebuffer *fb,
 			 const struct drm_rect *clip, bool vaddr_cached_hint,
+			 struct drm_format_conv_state *state,
 			 void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels))
 {
 	unsigned long linepixels = drm_rect_width(clip);
@@ -60,7 +152,7 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p
 	 * one line at a time.
 	 */
 	if (!vaddr_cached_hint) {
-		stmp = kmalloc(sbuf_len, GFP_KERNEL);
+		stmp = drm_format_conv_state_reserve(state, sbuf_len, GFP_KERNEL);
 		if (!stmp)
 			return -ENOMEM;
 	}
@@ -79,8 +171,6 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p
 		dst += dst_pitch;
 	}
 
-	kfree(stmp);
-
 	return 0;
 }
 
@@ -88,6 +178,7 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p
 static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsigned long dst_pixsize,
 			      const void *vaddr, const struct drm_framebuffer *fb,
 			      const struct drm_rect *clip, bool vaddr_cached_hint,
+			      struct drm_format_conv_state *state,
 			      void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels))
 {
 	unsigned long linepixels = drm_rect_width(clip);
@@ -101,9 +192,9 @@ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsign
 	void *dbuf;
 
 	if (vaddr_cached_hint) {
-		dbuf = kmalloc(dbuf_len, GFP_KERNEL);
+		dbuf = drm_format_conv_state_reserve(state, dbuf_len, GFP_KERNEL);
 	} else {
-		dbuf = kmalloc(stmp_off + sbuf_len, GFP_KERNEL);
+		dbuf = drm_format_conv_state_reserve(state, stmp_off + sbuf_len, GFP_KERNEL);
 		stmp = dbuf + stmp_off;
 	}
 	if (!dbuf)
@@ -124,8 +215,6 @@ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsign
 		dst += dst_pitch;
 	}
 
-	kfree(dbuf);
-
 	return 0;
 }
 
@@ -139,17 +228,24 @@ static int drm_fb_xfrm(struct iosys_map *dst,
 	static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = {
 		0, 0, 0, 0
 	};
+	struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT;
+	int ret;
 
 	if (!dst_pitch)
 		dst_pitch = default_dst_pitch;
 
 	/* TODO: handle src in I/O memory here */
 	if (dst[0].is_iomem)
-		return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0],
-					  src[0].vaddr, fb, clip, vaddr_cached_hint, xfrm_line);
+		ret = __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0],
+					 src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state,
+					 xfrm_line);
 	else
-		return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0],
-				     src[0].vaddr, fb, clip, vaddr_cached_hint, xfrm_line);
+		ret = __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0],
+				    src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state,
+				    xfrm_line);
+	drm_format_conv_state_release(&fmtcnv_state);
+
+	return ret;
 }
 
 /**
diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 291deb09475bb..724a9baf7315d 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -15,6 +15,57 @@ struct drm_rect;
 
 struct iosys_map;
 
+/**
+ * struct drm_format_conv_state - Stores format-conversion state
+ *
+ * DRM helpers for format conversion store temporary state in
+ * struct drm_xfrm_buf. The buffer's resources can be reused
+ * among multiple conversion operations.
+ *
+ * All fields are considered private.
+ */
+struct drm_format_conv_state {
+	struct {
+		void *mem;
+		size_t size;
+		bool preallocated;
+	} tmp;
+};
+
+#define __DRM_FORMAT_CONV_STATE_INIT(_mem, _size, _preallocated) { \
+		.tmp = { \
+			.mem = (_mem), \
+			.size = (_size), \
+			.preallocated = (_preallocated), \
+		} \
+	}
+
+/**
+ * DRM_FORMAT_CONV_STATE_INIT - Initializer for struct drm_format_conv_state
+ *
+ * Initializes an instance of struct drm_format_conv_state to default values.
+ */
+#define DRM_FORMAT_CONV_STATE_INIT \
+	__DRM_FORMAT_CONV_STATE_INIT(NULL, 0, false)
+
+/**
+ * DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED - Initializer for struct drm_format_conv_state
+ * @_mem: The preallocated memory area
+ * @_size: The number of bytes in _mem
+ *
+ * Initializes an instance of struct drm_format_conv_state to preallocated
+ * storage. The caller is responsible for releasing the provided memory range.
+ */
+#define DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED(_mem, _size) \
+	__DRM_FORMAT_CONV_STATE_INIT(_mem, _size, true)
+
+void drm_format_conv_state_init(struct drm_format_conv_state *state);
+void drm_format_conv_state_copy(struct drm_format_conv_state *state,
+				const struct drm_format_conv_state *old_state);
+void *drm_format_conv_state_reserve(struct drm_format_conv_state *state,
+				    size_t new_size, gfp_t flags);
+void drm_format_conv_state_release(struct drm_format_conv_state *state);
+
 unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format,
 				const struct drm_rect *clip);
 
-- 
GitLab


From 903674588a48df25bb79b1bedbfc48450f1d5d8f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Oct 2023 16:06:31 +0200
Subject: [PATCH 0246/2340] drm/atomic-helper: Add format-conversion state to
 shadow-plane state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Store an instance of struct drm_format_conv_state in the shadow-plane
state struct drm_shadow_plane_state. Many drivers with shadow planes
use DRM's format helpers to copy or convert the framebuffer data to
backing storage in the scanout buffer. The shadow plane provides the
necessary state and manages the conversion's intermediate buffer memory.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Noralf Trønnes <noralf@tronnes.org>
Tested-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-3-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_gem_atomic_helper.c |  9 +++++++++
 include/drm/drm_gem_atomic_helper.h     | 10 ++++++++++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
index 5d4b9cd077f7a..e440f458b6633 100644
--- a/drivers/gpu/drm/drm_gem_atomic_helper.c
+++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
@@ -218,7 +218,14 @@ void
 __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane,
 				       struct drm_shadow_plane_state *new_shadow_plane_state)
 {
+	struct drm_plane_state *plane_state = plane->state;
+	struct drm_shadow_plane_state *shadow_plane_state =
+		to_drm_shadow_plane_state(plane_state);
+
 	__drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base);
+
+	drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state,
+				   &new_shadow_plane_state->fmtcnv_state);
 }
 EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state);
 
@@ -266,6 +273,7 @@ EXPORT_SYMBOL(drm_gem_duplicate_shadow_plane_state);
  */
 void __drm_gem_destroy_shadow_plane_state(struct drm_shadow_plane_state *shadow_plane_state)
 {
+	drm_format_conv_state_release(&shadow_plane_state->fmtcnv_state);
 	__drm_atomic_helper_plane_destroy_state(&shadow_plane_state->base);
 }
 EXPORT_SYMBOL(__drm_gem_destroy_shadow_plane_state);
@@ -302,6 +310,7 @@ void __drm_gem_reset_shadow_plane(struct drm_plane *plane,
 				  struct drm_shadow_plane_state *shadow_plane_state)
 {
 	__drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base);
+	drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state);
 }
 EXPORT_SYMBOL(__drm_gem_reset_shadow_plane);
 
diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h
index 40b8b039518e0..3e01c619a25e0 100644
--- a/include/drm/drm_gem_atomic_helper.h
+++ b/include/drm/drm_gem_atomic_helper.h
@@ -5,6 +5,7 @@
 
 #include <linux/iosys-map.h>
 
+#include <drm/drm_format_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_plane.h>
 
@@ -49,6 +50,15 @@ struct drm_shadow_plane_state {
 	/** @base: plane state */
 	struct drm_plane_state base;
 
+	/**
+	 * @fmtcnv_state: Format-conversion state
+	 *
+	 * Per-plane state for format conversion.
+	 * Flags for copying shadow buffers into backend storage. Also holds
+	 * temporary storage for format conversion.
+	 */
+	struct drm_format_conv_state fmtcnv_state;
+
 	/* Transitional state - do not export or duplicate */
 
 	/**
-- 
GitLab


From 4cd24d4b1a9548f42cdb7f449edc6f869a8ae730 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Oct 2023 16:06:32 +0200
Subject: [PATCH 0247/2340] drm/format-helper: Pass format-conversion state to
 helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass an instance of struct drm_format_conv_state to DRM's format
conversion helpers. Update all callers.

Most drivers can use the format-conversion state from their shadow-
plane state. The shadow plane's destroy function releases the
allocated buffer. Drivers will later be able to allocate a buffer
of appropriate size in their plane's atomic_check code.

The gud driver uses a separate thread for committing updates. For
now, the update worker contains its own format-conversion state.

Images in the format-helper tests are small. The tests preallocate
a static page for the temporary buffer. Unloading the module releases
the memory.

v6:
	* update patch for ssd132x support
v5:
	* avoid using unusupported shadow-plane state in repaper (Noralf)
	* fix documentation (Noralf, kernel test robot)
v3:
	* store buffer in shadow-plane state (Javier, Maxime)
	* replace ARRAY_SIZE() with sizeof() (Jani)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Tested-by: Javier Martinez Canillas <javierm@redhat.com> # ssd130x
Cc: Noralf Trønnes <noralf@tronnes.org>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Lechner <david@lechnology.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-4-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_format_helper.c           | 123 ++++++++++--------
 drivers/gpu/drm/drm_mipi_dbi.c                |  19 ++-
 drivers/gpu/drm/gud/gud_pipe.c                |  30 +++--
 drivers/gpu/drm/solomon/ssd130x.c             |  16 ++-
 .../gpu/drm/tests/drm_format_helper_test.c    |  72 ++++++----
 drivers/gpu/drm/tiny/cirrus.c                 |   3 +-
 drivers/gpu/drm/tiny/ili9225.c                |  10 +-
 drivers/gpu/drm/tiny/ofdrm.c                  |   2 +-
 drivers/gpu/drm/tiny/repaper.c                |  10 +-
 drivers/gpu/drm/tiny/simpledrm.c              |   2 +-
 drivers/gpu/drm/tiny/st7586.c                 |  19 +--
 include/drm/drm_format_helper.h               |  30 +++--
 include/drm/drm_mipi_dbi.h                    |   4 +-
 13 files changed, 200 insertions(+), 140 deletions(-)

diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index 28d013d427788..b1be458ed4dda 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -223,29 +223,25 @@ static int drm_fb_xfrm(struct iosys_map *dst,
 		       const unsigned int *dst_pitch, const u8 *dst_pixsize,
 		       const struct iosys_map *src, const struct drm_framebuffer *fb,
 		       const struct drm_rect *clip, bool vaddr_cached_hint,
+		       struct drm_format_conv_state *state,
 		       void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels))
 {
 	static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = {
 		0, 0, 0, 0
 	};
-	struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT;
-	int ret;
 
 	if (!dst_pitch)
 		dst_pitch = default_dst_pitch;
 
 	/* TODO: handle src in I/O memory here */
 	if (dst[0].is_iomem)
-		ret = __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0],
-					 src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state,
-					 xfrm_line);
+		return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0],
+					  src[0].vaddr, fb, clip, vaddr_cached_hint, state,
+					  xfrm_line);
 	else
-		ret = __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0],
-				    src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state,
-				    xfrm_line);
-	drm_format_conv_state_release(&fmtcnv_state);
-
-	return ret;
+		return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0],
+				     src[0].vaddr, fb, clip, vaddr_cached_hint, state,
+				     xfrm_line);
 }
 
 /**
@@ -331,6 +327,7 @@ static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
  * @cached: Source buffer is mapped cached (eg. not write-combined)
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and swaps per-pixel
  * bytes during the process. Destination and framebuffer formats must match. The
@@ -345,7 +342,8 @@ static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels
  */
 void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch,
 		 const struct iosys_map *src, const struct drm_framebuffer *fb,
-		 const struct drm_rect *clip, bool cached)
+		 const struct drm_rect *clip, bool cached,
+		 struct drm_format_conv_state *state)
 {
 	const struct drm_format_info *format = fb->format;
 	u8 cpp = DIV_ROUND_UP(drm_format_info_bpp(format, 0), 8);
@@ -364,7 +362,7 @@ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch,
 		return;
 	}
 
-	drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, swab_line);
+	drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, state, swab_line);
 }
 EXPORT_SYMBOL(drm_fb_swab);
 
@@ -391,6 +389,7 @@ static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigne
  * @src: Array of XRGB8888 source buffers
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts the
  * color format during the process. Destination and framebuffer formats must match. The
@@ -405,13 +404,13 @@ static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigne
  */
 void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch,
 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-			       const struct drm_rect *clip)
+			       const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		1,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_rgb332_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb332);
@@ -460,6 +459,7 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf,
  * @src: Array of XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  * @swab: Swap bytes
  *
  * This function copies parts of a framebuffer to display memory and converts the
@@ -475,7 +475,8 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf,
  */
 void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch,
 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-			       const struct drm_rect *clip, bool swab)
+			       const struct drm_rect *clip, struct drm_format_conv_state *state,
+			       bool swab)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		2,
@@ -488,7 +489,7 @@ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pi
 	else
 		xfrm_line = drm_fb_xrgb8888_to_rgb565_line;
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, xfrm_line);
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, xfrm_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565);
 
@@ -517,6 +518,7 @@ static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsig
  * @src: Array of XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts
  * the color format during the process. The parameters @dst, @dst_pitch and
@@ -532,13 +534,13 @@ static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsig
  */
 void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip)
+				 const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		2,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_xrgb1555_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb1555);
@@ -569,6 +571,7 @@ static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsig
  * @src: Array of XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts
  * the color format during the process. The parameters @dst, @dst_pitch and
@@ -584,13 +587,13 @@ static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsig
  */
 void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip)
+				 const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		2,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_argb1555_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb1555);
@@ -621,6 +624,7 @@ static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsig
  * @src: Array of XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts
  * the color format during the process. The parameters @dst, @dst_pitch and
@@ -636,13 +640,13 @@ static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsig
  */
 void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip)
+				 const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		2,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_rgba5551_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgba5551);
@@ -671,6 +675,7 @@ static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigne
  * @src: Array of XRGB8888 source buffers
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts the
  * color format during the process. Destination and framebuffer formats must match. The
@@ -686,13 +691,13 @@ static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigne
  */
 void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch,
 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-			       const struct drm_rect *clip)
+			       const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		3,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_rgb888_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888);
@@ -719,6 +724,7 @@ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsig
  * @src: Array of XRGB8888 source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts the
  * color format during the process. The parameters @dst, @dst_pitch and @src refer
@@ -734,13 +740,13 @@ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsig
  */
 void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip)
+				 const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		4,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_argb8888_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888);
@@ -765,13 +771,14 @@ static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsig
 static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch,
 					const struct iosys_map *src,
 					const struct drm_framebuffer *fb,
-					const struct drm_rect *clip)
+					const struct drm_rect *clip,
+					struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		4,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_abgr8888_line);
 }
 
@@ -795,13 +802,14 @@ static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsig
 static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch,
 					const struct iosys_map *src,
 					const struct drm_framebuffer *fb,
-					const struct drm_rect *clip)
+					const struct drm_rect *clip,
+					struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		4,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_xbgr8888_line);
 }
 
@@ -831,6 +839,7 @@ static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, un
  * @src: Array of XRGB8888 source buffers
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts the
  * color format during the process. Destination and framebuffer formats must match. The
@@ -846,13 +855,14 @@ static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, un
  */
 void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch,
 				    const struct iosys_map *src, const struct drm_framebuffer *fb,
-				    const struct drm_rect *clip)
+				    const struct drm_rect *clip,
+				    struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		4,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_xrgb2101010_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010);
@@ -884,6 +894,7 @@ static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, un
  * @src: Array of XRGB8888 source buffers
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts
  * the color format during the process. The parameters @dst, @dst_pitch and
@@ -899,13 +910,14 @@ static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, un
  */
 void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch,
 				    const struct iosys_map *src, const struct drm_framebuffer *fb,
-				    const struct drm_rect *clip)
+				    const struct drm_rect *clip,
+				    struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		4,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_argb2101010_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb2101010);
@@ -935,6 +947,7 @@ static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned
  * @src: Array of XRGB8888 source buffers
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts the
  * color format during the process. Destination and framebuffer formats must match. The
@@ -954,13 +967,13 @@ static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned
  */
 void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch,
 			      const struct iosys_map *src, const struct drm_framebuffer *fb,
-			      const struct drm_rect *clip)
+			      const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
 		1,
 	};
 
-	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
+	drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state,
 		    drm_fb_xrgb8888_to_gray8_line);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8);
@@ -974,6 +987,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8);
  * @src:	The framebuffer memory to copy from
  * @fb:		The framebuffer to copy from
  * @clip:	Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory. If the
  * formats of the display and the framebuffer mismatch, the blit function
@@ -992,7 +1006,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8);
  */
 int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format,
 		const struct iosys_map *src, const struct drm_framebuffer *fb,
-		const struct drm_rect *clip)
+		const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	uint32_t fb_format = fb->format->format;
 
@@ -1000,44 +1014,44 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
 		drm_fb_memcpy(dst, dst_pitch, src, fb, clip);
 		return 0;
 	} else if (fb_format == (dst_format | DRM_FORMAT_BIG_ENDIAN)) {
-		drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+		drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state);
 		return 0;
 	} else if (fb_format == (dst_format & ~DRM_FORMAT_BIG_ENDIAN)) {
-		drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+		drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state);
 		return 0;
 	} else if (fb_format == DRM_FORMAT_XRGB8888) {
 		if (dst_format == DRM_FORMAT_RGB565) {
-			drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, false);
+			drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state, false);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_XRGB1555) {
-			drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_ARGB1555) {
-			drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_RGBA5551) {
-			drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_RGB888) {
-			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_ARGB8888) {
-			drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_XBGR8888) {
-			drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_ABGR8888) {
-			drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_XRGB2101010) {
-			drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_ARGB2101010) {
-			drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip);
+			drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip, state);
 			return 0;
 		} else if (dst_format == DRM_FORMAT_BGRX8888) {
-			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+			drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state);
 			return 0;
 		}
 	}
@@ -1074,6 +1088,7 @@ static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int
  * @src: Array of XRGB8888 source buffers
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @state: Transform and conversion state
  *
  * This function copies parts of a framebuffer to display memory and converts the
  * color format during the process. Destination and framebuffer formats must match. The
@@ -1098,7 +1113,7 @@ static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int
  */
 void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch,
 			     const struct iosys_map *src, const struct drm_framebuffer *fb,
-			     const struct drm_rect *clip)
+			     const struct drm_rect *clip, struct drm_format_conv_state *state)
 {
 	static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = {
 		0, 0, 0, 0
@@ -1138,7 +1153,7 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc
 	 * Allocate a buffer to be used for both copying from the cma
 	 * memory and to store the intermediate grayscale line pixels.
 	 */
-	src32 = kmalloc(len_src32 + linepixels, GFP_KERNEL);
+	src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL);
 	if (!src32)
 		return;
 
@@ -1152,8 +1167,6 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc
 		vaddr += fb->pitches[0];
 		mono += dst_pitch_0;
 	}
-
-	kfree(src32);
 }
 EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono);
 
diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index e90f0bf895b33..daac649aabdbe 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -197,12 +197,14 @@ EXPORT_SYMBOL(mipi_dbi_command_stackbuf);
  * @fb: The source framebuffer
  * @clip: Clipping rectangle of the area to be copied
  * @swap: When true, swap MSB/LSB of 16-bit values
+ * @fmtcnv_state: Format-conversion state
  *
  * Returns:
  * Zero on success, negative error code on failure.
  */
 int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb,
-		      struct drm_rect *clip, bool swap)
+		      struct drm_rect *clip, bool swap,
+		      struct drm_format_conv_state *fmtcnv_state)
 {
 	struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0);
 	struct iosys_map dst_map = IOSYS_MAP_INIT_VADDR(dst);
@@ -215,12 +217,13 @@ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *
 	switch (fb->format->format) {
 	case DRM_FORMAT_RGB565:
 		if (swap)
-			drm_fb_swab(&dst_map, NULL, src, fb, clip, !gem->import_attach);
+			drm_fb_swab(&dst_map, NULL, src, fb, clip, !gem->import_attach,
+				    fmtcnv_state);
 		else
 			drm_fb_memcpy(&dst_map, NULL, src, fb, clip);
 		break;
 	case DRM_FORMAT_XRGB8888:
-		drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, swap);
+		drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, fmtcnv_state, swap);
 		break;
 	default:
 		drm_err_once(fb->dev, "Format is not supported: %p4cc\n",
@@ -252,7 +255,7 @@ static void mipi_dbi_set_window_address(struct mipi_dbi_dev *dbidev,
 }
 
 static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb,
-			      struct drm_rect *rect)
+			      struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state)
 {
 	struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev);
 	unsigned int height = rect->y2 - rect->y1;
@@ -270,7 +273,7 @@ static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb,
 	if (!dbi->dc || !full || swap ||
 	    fb->format->format == DRM_FORMAT_XRGB8888) {
 		tr = dbidev->tx_buf;
-		ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap);
+		ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap, fmtcnv_state);
 		if (ret)
 			goto err_msg;
 	} else {
@@ -332,7 +335,8 @@ void mipi_dbi_pipe_update(struct drm_simple_display_pipe *pipe,
 		return;
 
 	if (drm_atomic_helper_damage_merged(old_state, state, &rect))
-		mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect);
+		mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
+				  &shadow_plane_state->fmtcnv_state);
 
 	drm_dev_exit(idx);
 }
@@ -368,7 +372,8 @@ void mipi_dbi_enable_flush(struct mipi_dbi_dev *dbidev,
 	if (!drm_dev_enter(&dbidev->drm, &idx))
 		return;
 
-	mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect);
+	mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
+			  &shadow_plane_state->fmtcnv_state);
 	backlight_enable(dbidev->backlight);
 
 	drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c
index d2f199ea3c111..28171de84718e 100644
--- a/drivers/gpu/drm/gud/gud_pipe.c
+++ b/drivers/gpu/drm/gud/gud_pipe.c
@@ -51,7 +51,8 @@ static bool gud_is_big_endian(void)
 
 static size_t gud_xrgb8888_to_r124(u8 *dst, const struct drm_format_info *format,
 				   void *src, struct drm_framebuffer *fb,
-				   struct drm_rect *rect)
+				   struct drm_rect *rect,
+				   struct drm_format_conv_state *fmtcnv_state)
 {
 	unsigned int block_width = drm_format_info_block_width(format, 0);
 	unsigned int bits_per_pixel = 8 / block_width;
@@ -75,7 +76,7 @@ static size_t gud_xrgb8888_to_r124(u8 *dst, const struct drm_format_info *format
 
 	iosys_map_set_vaddr(&dst_map, buf);
 	iosys_map_set_vaddr(&vmap, src);
-	drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, rect);
+	drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, rect, fmtcnv_state);
 	pix8 = buf;
 
 	for (y = 0; y < height; y++) {
@@ -152,7 +153,8 @@ static size_t gud_xrgb8888_to_color(u8 *dst, const struct drm_format_info *forma
 static int gud_prep_flush(struct gud_device *gdrm, struct drm_framebuffer *fb,
 			  const struct iosys_map *src, bool cached_reads,
 			  const struct drm_format_info *format, struct drm_rect *rect,
-			  struct gud_set_buffer_req *req)
+			  struct gud_set_buffer_req *req,
+			  struct drm_format_conv_state *fmtcnv_state)
 {
 	u8 compression = gdrm->compression;
 	struct iosys_map dst;
@@ -178,23 +180,23 @@ retry:
 	 */
 	if (format != fb->format) {
 		if (format->format == GUD_DRM_FORMAT_R1) {
-			len = gud_xrgb8888_to_r124(buf, format, vaddr, fb, rect);
+			len = gud_xrgb8888_to_r124(buf, format, vaddr, fb, rect, fmtcnv_state);
 			if (!len)
 				return -ENOMEM;
 		} else if (format->format == DRM_FORMAT_R8) {
-			drm_fb_xrgb8888_to_gray8(&dst, NULL, src, fb, rect);
+			drm_fb_xrgb8888_to_gray8(&dst, NULL, src, fb, rect, fmtcnv_state);
 		} else if (format->format == DRM_FORMAT_RGB332) {
-			drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect);
+			drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect, fmtcnv_state);
 		} else if (format->format == DRM_FORMAT_RGB565) {
-			drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect,
+			drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, fmtcnv_state,
 						  gud_is_big_endian());
 		} else if (format->format == DRM_FORMAT_RGB888) {
-			drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect);
+			drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect, fmtcnv_state);
 		} else {
 			len = gud_xrgb8888_to_color(buf, format, vaddr, fb, rect);
 		}
 	} else if (gud_is_big_endian() && format->cpp[0] > 1) {
-		drm_fb_swab(&dst, NULL, src, fb, rect, cached_reads);
+		drm_fb_swab(&dst, NULL, src, fb, rect, cached_reads, fmtcnv_state);
 	} else if (compression && cached_reads && pitch == fb->pitches[0]) {
 		/* can compress directly from the framebuffer */
 		buf = vaddr + rect->y1 * pitch;
@@ -266,7 +268,8 @@ static int gud_usb_bulk(struct gud_device *gdrm, size_t len)
 
 static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb,
 			  const struct iosys_map *src, bool cached_reads,
-			  const struct drm_format_info *format, struct drm_rect *rect)
+			  const struct drm_format_info *format, struct drm_rect *rect,
+			  struct drm_format_conv_state *fmtcnv_state)
 {
 	struct gud_set_buffer_req req;
 	size_t len, trlen;
@@ -274,7 +277,7 @@ static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb,
 
 	drm_dbg(&gdrm->drm, "Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect));
 
-	ret = gud_prep_flush(gdrm, fb, src, cached_reads, format, rect, &req);
+	ret = gud_prep_flush(gdrm, fb, src, cached_reads, format, rect, &req, fmtcnv_state);
 	if (ret)
 		return ret;
 
@@ -318,6 +321,7 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb
 			     const struct iosys_map *src, bool cached_reads,
 			     struct drm_rect *damage)
 {
+	struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT;
 	const struct drm_format_info *format;
 	unsigned int i, lines;
 	size_t pitch;
@@ -340,7 +344,7 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb
 		rect.y1 += i * lines;
 		rect.y2 = min_t(u32, rect.y1 + lines, damage->y2);
 
-		ret = gud_flush_rect(gdrm, fb, src, cached_reads, format, &rect);
+		ret = gud_flush_rect(gdrm, fb, src, cached_reads, format, &rect, &fmtcnv_state);
 		if (ret) {
 			if (ret != -ENODEV && ret != -ECONNRESET &&
 			    ret != -ESHUTDOWN && ret != -EPROTO)
@@ -350,6 +354,8 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb
 			break;
 		}
 	}
+
+	drm_format_conv_state_release(&fmtcnv_state);
 }
 
 void gud_flush_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index e0174f82e3537..ee0a06c506926 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -808,7 +808,8 @@ static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array)
 static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb,
 				const struct iosys_map *vmap,
 				struct drm_rect *rect,
-				u8 *buf, u8 *data_array)
+				u8 *buf, u8 *data_array,
+				struct drm_format_conv_state *fmtcnv_state)
 {
 	struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev);
 	struct iosys_map dst;
@@ -826,7 +827,7 @@ static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb,
 		return ret;
 
 	iosys_map_set_vaddr(&dst, buf);
-	drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect);
+	drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state);
 
 	drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 
@@ -838,7 +839,8 @@ static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb,
 static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb,
 				const struct iosys_map *vmap,
 				struct drm_rect *rect, u8 *buf,
-				u8 *data_array)
+				u8 *data_array,
+				struct drm_format_conv_state *fmtcnv_state)
 {
 	struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev);
 	unsigned int dst_pitch = drm_rect_width(rect);
@@ -855,7 +857,7 @@ static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb,
 		return ret;
 
 	iosys_map_set_vaddr(&dst, buf);
-	drm_fb_xrgb8888_to_gray8(&dst, &dst_pitch, vmap, fb, rect);
+	drm_fb_xrgb8888_to_gray8(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state);
 
 	drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 
@@ -968,7 +970,8 @@ static void ssd130x_primary_plane_atomic_update(struct drm_plane *plane,
 
 		ssd130x_fb_blit_rect(fb, &shadow_plane_state->data[0], &dst_clip,
 				     ssd130x_plane_state->buffer,
-				     ssd130x_crtc_state->data_array);
+				     ssd130x_crtc_state->data_array,
+				     &shadow_plane_state->fmtcnv_state);
 	}
 
 	drm_dev_exit(idx);
@@ -1002,7 +1005,8 @@ static void ssd132x_primary_plane_atomic_update(struct drm_plane *plane,
 
 		ssd132x_fb_blit_rect(fb, &shadow_plane_state->data[0], &dst_clip,
 				     ssd130x_plane_state->buffer,
-				     ssd130x_crtc_state->data_array);
+				     ssd130x_crtc_state->data_array,
+				     &shadow_plane_state->fmtcnv_state);
 	}
 
 	drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c
index f6408e56f7861..08992636ec05f 100644
--- a/drivers/gpu/drm/tests/drm_format_helper_test.c
+++ b/drivers/gpu/drm/tests/drm_format_helper_test.c
@@ -20,6 +20,10 @@
 
 #define TEST_USE_DEFAULT_PITCH 0
 
+static unsigned char fmtcnv_state_mem[PAGE_SIZE];
+static struct drm_format_conv_state fmtcnv_state =
+	DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED(fmtcnv_state_mem, sizeof(fmtcnv_state_mem));
+
 struct convert_to_gray8_result {
 	unsigned int dst_pitch;
 	const u8 expected[TEST_BUF_SIZE];
@@ -630,8 +634,7 @@ static void drm_test_fb_xrgb8888_to_gray8(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_gray8(&dst, dst_pitch, &src, &fb, &params->clip);
-
+	drm_fb_xrgb8888_to_gray8(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
@@ -664,7 +667,7 @@ static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_rgb332(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_rgb332(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
@@ -697,12 +700,14 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, &params->clip, false);
+	drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, &params->clip,
+				  &fmtcnv_state, false);
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
 	buf = dst.vaddr; /* restore original value of buf */
-	drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, &params->clip, true);
+	drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, &params->clip,
+				  &fmtcnv_state, true);
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected_swab, dst_size);
 
@@ -711,7 +716,8 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB565, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB565, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 
@@ -748,7 +754,7 @@ static void drm_test_fb_xrgb8888_to_xrgb1555(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_xrgb1555(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_xrgb1555(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -757,7 +763,8 @@ static void drm_test_fb_xrgb8888_to_xrgb1555(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB1555, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB1555, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 
@@ -794,7 +801,7 @@ static void drm_test_fb_xrgb8888_to_argb1555(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_argb1555(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_argb1555(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -803,7 +810,8 @@ static void drm_test_fb_xrgb8888_to_argb1555(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB1555, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB1555, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 
@@ -840,7 +848,7 @@ static void drm_test_fb_xrgb8888_to_rgba5551(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_rgba5551(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_rgba5551(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -849,7 +857,8 @@ static void drm_test_fb_xrgb8888_to_rgba5551(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGBA5551, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGBA5551, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16));
 
@@ -890,7 +899,7 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
 	buf = dst.vaddr; /* restore original value of buf */
@@ -898,7 +907,8 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB888, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB888, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	KUNIT_EXPECT_FALSE(test, blit_result);
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
@@ -933,7 +943,7 @@ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_argb8888(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_argb8888(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -942,7 +952,8 @@ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB8888, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB8888, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
@@ -979,7 +990,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -989,7 +1000,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test)
 	int blit_result = 0;
 
 	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB2101010, &src, &fb,
-				  &params->clip);
+				  &params->clip, &fmtcnv_state);
 
 	KUNIT_EXPECT_FALSE(test, blit_result);
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
@@ -1024,7 +1035,7 @@ static void drm_test_fb_xrgb8888_to_argb2101010(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_argb2101010(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_argb2101010(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -1034,7 +1045,7 @@ static void drm_test_fb_xrgb8888_to_argb2101010(struct kunit *test)
 	int blit_result = 0;
 
 	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB2101010, &src, &fb,
-				  &params->clip);
+				  &params->clip, &fmtcnv_state);
 
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
@@ -1071,7 +1082,7 @@ static void drm_test_fb_xrgb8888_to_mono(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_xrgb8888_to_mono(&dst, dst_pitch, &src, &fb, &params->clip);
+	drm_fb_xrgb8888_to_mono(&dst, dst_pitch, &src, &fb, &params->clip, &fmtcnv_state);
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
@@ -1104,7 +1115,7 @@ static void drm_test_fb_swab(struct kunit *test)
 	const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ?
 		NULL : &result->dst_pitch;
 
-	drm_fb_swab(&dst, dst_pitch, &src, &fb, &params->clip, false);
+	drm_fb_swab(&dst, dst_pitch, &src, &fb, &params->clip, false, &fmtcnv_state);
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
@@ -1114,7 +1125,7 @@ static void drm_test_fb_swab(struct kunit *test)
 	int blit_result;
 
 	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888 | DRM_FORMAT_BIG_ENDIAN,
-				  &src, &fb, &params->clip);
+				  &src, &fb, &params->clip, &fmtcnv_state);
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
 	KUNIT_EXPECT_FALSE(test, blit_result);
@@ -1123,7 +1134,8 @@ static void drm_test_fb_swab(struct kunit *test)
 	buf = dst.vaddr;
 	memset(buf, 0, dst_size);
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_BGRX8888, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_BGRX8888, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
 	KUNIT_EXPECT_FALSE(test, blit_result);
@@ -1137,7 +1149,8 @@ static void drm_test_fb_swab(struct kunit *test)
 	mock_format.format |= DRM_FORMAT_BIG_ENDIAN;
 	fb.format = &mock_format;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
 	KUNIT_EXPECT_FALSE(test, blit_result);
@@ -1175,7 +1188,8 @@ static void drm_test_fb_xrgb8888_to_abgr8888(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ABGR8888, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ABGR8888, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
@@ -1214,7 +1228,8 @@ static void drm_test_fb_xrgb8888_to_xbgr8888(struct kunit *test)
 
 	int blit_result = 0;
 
-	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XBGR8888, &src, &fb, &params->clip);
+	blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XBGR8888, &src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32));
 
@@ -1817,7 +1832,8 @@ static void drm_test_fb_memcpy(struct kunit *test)
 
 	int blit_result;
 
-	blit_result = drm_fb_blit(dst, dst_pitches, params->format, src, &fb, &params->clip);
+	blit_result = drm_fb_blit(dst, dst_pitches, params->format, src, &fb, &params->clip,
+				  &fmtcnv_state);
 
 	KUNIT_EXPECT_FALSE(test, blit_result);
 	for (size_t i = 0; i < fb.format->num_planes; i++) {
diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index c5c34cd2edc1f..4e3a152f897ac 100644
--- a/drivers/gpu/drm/tiny/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -411,7 +411,8 @@ static void cirrus_primary_plane_helper_atomic_update(struct drm_plane *plane,
 		unsigned int offset = drm_fb_clip_offset(pitch, format, &damage);
 		struct iosys_map dst = IOSYS_MAP_INIT_OFFSET(&vaddr, offset);
 
-		drm_fb_blit(&dst, &pitch, format->format, shadow_plane_state->data, fb, &damage);
+		drm_fb_blit(&dst, &pitch, format->format, shadow_plane_state->data, fb,
+			    &damage, &shadow_plane_state->fmtcnv_state);
 	}
 
 	drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c
index 4ceb68ffac4be..dd8b0a181be94 100644
--- a/drivers/gpu/drm/tiny/ili9225.c
+++ b/drivers/gpu/drm/tiny/ili9225.c
@@ -78,7 +78,7 @@ static inline int ili9225_command(struct mipi_dbi *dbi, u8 cmd, u16 data)
 }
 
 static void ili9225_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb,
-			     struct drm_rect *rect)
+			     struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state)
 {
 	struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev);
 	unsigned int height = rect->y2 - rect->y1;
@@ -98,7 +98,7 @@ static void ili9225_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb,
 	if (!dbi->dc || !full || swap ||
 	    fb->format->format == DRM_FORMAT_XRGB8888) {
 		tr = dbidev->tx_buf;
-		ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap);
+		ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap, fmtcnv_state);
 		if (ret)
 			goto err_msg;
 	} else {
@@ -171,7 +171,8 @@ static void ili9225_pipe_update(struct drm_simple_display_pipe *pipe,
 		return;
 
 	if (drm_atomic_helper_damage_merged(old_state, state, &rect))
-		ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect);
+		ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
+				 &shadow_plane_state->fmtcnv_state);
 
 	drm_dev_exit(idx);
 }
@@ -281,7 +282,8 @@ static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe,
 
 	ili9225_command(dbi, ILI9225_DISPLAY_CONTROL_1, 0x1017);
 
-	ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect);
+	ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
+			 &shadow_plane_state->fmtcnv_state);
 
 out_exit:
 	drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 2d999a0facdee..404c83032cecc 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -817,7 +817,7 @@ static void ofdrm_primary_plane_helper_atomic_update(struct drm_plane *plane,
 
 		iosys_map_incr(&dst, drm_fb_clip_offset(dst_pitch, dst_format, &dst_clip));
 		drm_fb_blit(&dst, &dst_pitch, dst_format->format, shadow_plane_state->data, fb,
-			    &damage);
+			    &damage, &shadow_plane_state->fmtcnv_state);
 	}
 
 	drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index 73dd4f4289c20..8fd6758f5725f 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -509,7 +509,8 @@ static void repaper_get_temperature(struct repaper_epd *epd)
 	epd->factored_stage_time = epd->stage_time * factor10x / 10;
 }
 
-static int repaper_fb_dirty(struct drm_framebuffer *fb)
+static int repaper_fb_dirty(struct drm_framebuffer *fb,
+			    struct drm_format_conv_state *fmtcnv_state)
 {
 	struct drm_gem_dma_object *dma_obj = drm_fb_dma_get_gem_obj(fb, 0);
 	struct repaper_epd *epd = drm_to_epd(fb->dev);
@@ -545,7 +546,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb)
 
 	iosys_map_set_vaddr(&dst, buf);
 	iosys_map_set_vaddr(&vmap, dma_obj->vaddr);
-	drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip);
+	drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip, fmtcnv_state);
 
 	drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 
@@ -830,13 +831,16 @@ static void repaper_pipe_update(struct drm_simple_display_pipe *pipe,
 				struct drm_plane_state *old_state)
 {
 	struct drm_plane_state *state = pipe->plane.state;
+	struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT;
 	struct drm_rect rect;
 
 	if (!pipe->crtc.state->active)
 		return;
 
 	if (drm_atomic_helper_damage_merged(old_state, state, &rect))
-		repaper_fb_dirty(state->fb);
+		repaper_fb_dirty(state->fb, &fmtcnv_state);
+
+	drm_format_conv_state_release(&fmtcnv_state);
 }
 
 static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = {
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 8bdaf66044fca..4916e9b0d46a3 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -609,7 +609,7 @@ static void simpledrm_primary_plane_helper_atomic_update(struct drm_plane *plane
 
 		iosys_map_incr(&dst, drm_fb_clip_offset(sdev->pitch, sdev->format, &dst_clip));
 		drm_fb_blit(&dst, &sdev->pitch, sdev->format->format, shadow_plane_state->data,
-			    fb, &damage);
+			    fb, &damage, &shadow_plane_state->fmtcnv_state);
 	}
 
 	drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c
index 3cf4eec16a813..7336fa1ddaed1 100644
--- a/drivers/gpu/drm/tiny/st7586.c
+++ b/drivers/gpu/drm/tiny/st7586.c
@@ -64,7 +64,8 @@ static const u8 st7586_lookup[] = { 0x7, 0x4, 0x2, 0x0 };
 
 static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr,
 				       struct drm_framebuffer *fb,
-				       struct drm_rect *clip)
+				       struct drm_rect *clip,
+				       struct drm_format_conv_state *fmtcnv_state)
 {
 	size_t len = (clip->x2 - clip->x1) * (clip->y2 - clip->y1);
 	unsigned int x, y;
@@ -77,7 +78,7 @@ static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr,
 
 	iosys_map_set_vaddr(&dst_map, buf);
 	iosys_map_set_vaddr(&vmap, vaddr);
-	drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, clip);
+	drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, clip, fmtcnv_state);
 	src = buf;
 
 	for (y = clip->y1; y < clip->y2; y++) {
@@ -93,7 +94,7 @@ static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr,
 }
 
 static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb,
-			   struct drm_rect *clip)
+			   struct drm_rect *clip, struct drm_format_conv_state *fmtcnv_state)
 {
 	int ret;
 
@@ -101,7 +102,7 @@ static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuf
 	if (ret)
 		return ret;
 
-	st7586_xrgb8888_to_gray332(dst, src->vaddr, fb, clip);
+	st7586_xrgb8888_to_gray332(dst, src->vaddr, fb, clip, fmtcnv_state);
 
 	drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
 
@@ -109,7 +110,7 @@ static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuf
 }
 
 static void st7586_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb,
-			    struct drm_rect *rect)
+			    struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state)
 {
 	struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev);
 	struct mipi_dbi *dbi = &dbidev->dbi;
@@ -121,7 +122,7 @@ static void st7586_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb,
 
 	DRM_DEBUG_KMS("Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect));
 
-	ret = st7586_buf_copy(dbidev->tx_buf, src, fb, rect);
+	ret = st7586_buf_copy(dbidev->tx_buf, src, fb, rect, fmtcnv_state);
 	if (ret)
 		goto err_msg;
 
@@ -160,7 +161,8 @@ static void st7586_pipe_update(struct drm_simple_display_pipe *pipe,
 		return;
 
 	if (drm_atomic_helper_damage_merged(old_state, state, &rect))
-		st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect);
+		st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
+				&shadow_plane_state->fmtcnv_state);
 
 	drm_dev_exit(idx);
 }
@@ -238,7 +240,8 @@ static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe,
 
 	msleep(100);
 
-	st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect);
+	st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
+			&shadow_plane_state->fmtcnv_state);
 
 	mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON);
 out_exit:
diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index 724a9baf7315d..f13b34e0b752b 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -74,45 +74,49 @@ void drm_fb_memcpy(struct iosys_map *dst, const unsigned int *dst_pitch,
 		   const struct drm_rect *clip);
 void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch,
 		 const struct iosys_map *src, const struct drm_framebuffer *fb,
-		 const struct drm_rect *clip, bool cached);
+		 const struct drm_rect *clip, bool cached,
+		 struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch,
 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-			       const struct drm_rect *clip);
+			       const struct drm_rect *clip, struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch,
 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-			       const struct drm_rect *clip, bool swab);
+			       const struct drm_rect *clip, struct drm_format_conv_state *state,
+			       bool swab);
 void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip);
+				 const struct drm_rect *clip, struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip);
+				 const struct drm_rect *clip, struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip);
+				 const struct drm_rect *clip, struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch,
 			       const struct iosys_map *src, const struct drm_framebuffer *fb,
-			       const struct drm_rect *clip);
+			       const struct drm_rect *clip, struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch,
 				 const struct iosys_map *src, const struct drm_framebuffer *fb,
-				 const struct drm_rect *clip);
+				 const struct drm_rect *clip, struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch,
 				    const struct iosys_map *src, const struct drm_framebuffer *fb,
-				    const struct drm_rect *clip);
+				    const struct drm_rect *clip,
+				    struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch,
 				    const struct iosys_map *src, const struct drm_framebuffer *fb,
-				    const struct drm_rect *clip);
+				    const struct drm_rect *clip,
+				    struct drm_format_conv_state *state);
 void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch,
 			      const struct iosys_map *src, const struct drm_framebuffer *fb,
-			      const struct drm_rect *clip);
+			      const struct drm_rect *clip, struct drm_format_conv_state *state);
 
 int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format,
 		const struct iosys_map *src, const struct drm_framebuffer *fb,
-		const struct drm_rect *rect);
+		const struct drm_rect *clip, struct drm_format_conv_state *state);
 
 void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch,
 			     const struct iosys_map *src, const struct drm_framebuffer *fb,
-			     const struct drm_rect *clip);
+			     const struct drm_rect *clip, struct drm_format_conv_state *state);
 
 size_t drm_fb_build_fourcc_list(struct drm_device *dev,
 				const u32 *native_fourccs, size_t native_nfourccs,
diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h
index 816f196b3d4c5..e8e0f8d39f3a6 100644
--- a/include/drm/drm_mipi_dbi.h
+++ b/include/drm/drm_mipi_dbi.h
@@ -12,6 +12,7 @@
 #include <drm/drm_device.h>
 #include <drm/drm_simple_kms_helper.h>
 
+struct drm_format_conv_state;
 struct drm_rect;
 struct gpio_desc;
 struct iosys_map;
@@ -192,7 +193,8 @@ int mipi_dbi_command_buf(struct mipi_dbi *dbi, u8 cmd, u8 *data, size_t len);
 int mipi_dbi_command_stackbuf(struct mipi_dbi *dbi, u8 cmd, const u8 *data,
 			      size_t len);
 int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb,
-		      struct drm_rect *clip, bool swap);
+		      struct drm_rect *clip, bool swap,
+		      struct drm_format_conv_state *fmtcnv_state);
 
 /**
  * mipi_dbi_command - MIPI DCS command with optional parameter(s)
-- 
GitLab


From 58b184dcb3f4c52c15b6ff4fa2fa0d69d1e1313f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Oct 2023 16:06:33 +0200
Subject: [PATCH 0248/2340] drm/ofdrm: Preallocate format-conversion buffer in
 atomic_check

Preallocate the format-conversion state's storage in the plane's
atomic_check function if a format conversion is necessary. Allows
the update to fail if no memory is available. Avoids the same
allocation within atomic_update, which may not fail.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-5-tzimmermann@suse.de
---
 drivers/gpu/drm/tiny/ofdrm.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 404c83032cecc..05a72473cfc65 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -758,7 +758,11 @@ static const uint64_t ofdrm_primary_plane_format_modifiers[] = {
 static int ofdrm_primary_plane_helper_atomic_check(struct drm_plane *plane,
 						   struct drm_atomic_state *new_state)
 {
+	struct drm_device *dev = plane->dev;
+	struct ofdrm_device *odev = ofdrm_device_of_dev(dev);
 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(new_state, plane);
+	struct drm_shadow_plane_state *new_shadow_plane_state =
+		to_drm_shadow_plane_state(new_plane_state);
 	struct drm_framebuffer *new_fb = new_plane_state->fb;
 	struct drm_crtc *new_crtc = new_plane_state->crtc;
 	struct drm_crtc_state *new_crtc_state = NULL;
@@ -777,6 +781,16 @@ static int ofdrm_primary_plane_helper_atomic_check(struct drm_plane *plane,
 	else if (!new_plane_state->visible)
 		return 0;
 
+	if (new_fb->format != odev->format) {
+		void *buf;
+
+		/* format conversion necessary; reserve buffer */
+		buf = drm_format_conv_state_reserve(&new_shadow_plane_state->fmtcnv_state,
+						    odev->pitch, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
 	new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc);
 
 	new_ofdrm_crtc_state = to_ofdrm_crtc_state(new_crtc_state);
-- 
GitLab


From e7c814d305e110d6db3f440d14490a8d0d9477d9 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Oct 2023 16:06:34 +0200
Subject: [PATCH 0249/2340] drm/simpledrm: Preallocate format-conversion buffer
 in atomic_check

Preallocate the format-conversion state's storage in the plane's
atomic_check function if a format conversion is necessary. Allows
the update to fail if no memory is available. Avoids the same
allocation within atomic_update, which may not fail.

Also inline drm_plane_helper_atomic_check() into the driver and thus
return early for invisible planes. Avoids memory allocation entirely
in this case.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-6-tzimmermann@suse.de
---
 drivers/gpu/drm/tiny/simpledrm.c | 41 +++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 4916e9b0d46a3..22a827102dfd0 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -19,6 +19,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_fbdev_generic.h>
 #include <drm/drm_format_helper.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_shmem_helper.h>
@@ -579,6 +580,44 @@ static const uint64_t simpledrm_primary_plane_format_modifiers[] = {
 	DRM_FORMAT_MOD_INVALID
 };
 
+static int simpledrm_primary_plane_helper_atomic_check(struct drm_plane *plane,
+						       struct drm_atomic_state *state)
+{
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+	struct drm_shadow_plane_state *new_shadow_plane_state =
+		to_drm_shadow_plane_state(new_plane_state);
+	struct drm_framebuffer *new_fb = new_plane_state->fb;
+	struct drm_crtc *new_crtc = new_plane_state->crtc;
+	struct drm_crtc_state *new_crtc_state = NULL;
+	struct drm_device *dev = plane->dev;
+	struct simpledrm_device *sdev = simpledrm_device_of_dev(dev);
+	int ret;
+
+	if (new_crtc)
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc);
+
+	ret = drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state,
+						  DRM_PLANE_NO_SCALING,
+						  DRM_PLANE_NO_SCALING,
+						  false, false);
+	if (ret)
+		return ret;
+	else if (!new_plane_state->visible)
+		return 0;
+
+	if (new_fb->format != sdev->format) {
+		void *buf;
+
+		/* format conversion necessary; reserve buffer */
+		buf = drm_format_conv_state_reserve(&new_shadow_plane_state->fmtcnv_state,
+						    sdev->pitch, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static void simpledrm_primary_plane_helper_atomic_update(struct drm_plane *plane,
 							 struct drm_atomic_state *state)
 {
@@ -635,7 +674,7 @@ static void simpledrm_primary_plane_helper_atomic_disable(struct drm_plane *plan
 
 static const struct drm_plane_helper_funcs simpledrm_primary_plane_helper_funcs = {
 	DRM_GEM_SHADOW_PLANE_HELPER_FUNCS,
-	.atomic_check = drm_plane_helper_atomic_check,
+	.atomic_check = simpledrm_primary_plane_helper_atomic_check,
 	.atomic_update = simpledrm_primary_plane_helper_atomic_update,
 	.atomic_disable = simpledrm_primary_plane_helper_atomic_disable,
 };
-- 
GitLab


From c669875041d038e91fa99766a07ec2d8bd6dcf6a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 9 Oct 2023 16:06:36 +0200
Subject: [PATCH 0250/2340] drm/ssd130x: Preallocate format-conversion buffer
 in atomic_check

Preallocate the format-conversion state's storage in the plane's
atomic_check function if a format conversion is necessary. Allows
the update to fail if no memory is available. Avoids the same
allocation within atomic_update, which may not fail.

v6:
	* update patch for ssd132x support

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Tested-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-8-tzimmermann@suse.de
---
 drivers/gpu/drm/solomon/ssd130x.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index ee0a06c506926..bef293922b98f 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -873,6 +873,7 @@ static int ssd130x_primary_plane_atomic_check(struct drm_plane *plane,
 	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
 	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
 	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
+	struct drm_shadow_plane_state *shadow_plane_state = &ssd130x_state->base;
 	struct drm_crtc *crtc = plane_state->crtc;
 	struct drm_crtc_state *crtc_state = NULL;
 	const struct drm_format_info *fi;
@@ -897,6 +898,16 @@ static int ssd130x_primary_plane_atomic_check(struct drm_plane *plane,
 
 	pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
 
+	if (plane_state->fb->format != fi) {
+		void *buf;
+
+		/* format conversion necessary; reserve buffer */
+		buf = drm_format_conv_state_reserve(&shadow_plane_state->fmtcnv_state,
+						    pitch, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
 	ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
 	if (!ssd130x_state->buffer)
 		return -ENOMEM;
@@ -911,6 +922,7 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane,
 	struct ssd130x_device *ssd130x = drm_to_ssd130x(drm);
 	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
 	struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state);
+	struct drm_shadow_plane_state *shadow_plane_state = &ssd130x_state->base;
 	struct drm_crtc *crtc = plane_state->crtc;
 	struct drm_crtc_state *crtc_state = NULL;
 	const struct drm_format_info *fi;
@@ -935,6 +947,16 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane,
 
 	pitch = drm_format_info_min_pitch(fi, 0, ssd130x->width);
 
+	if (plane_state->fb->format != fi) {
+		void *buf;
+
+		/* format conversion necessary; reserve buffer */
+		buf = drm_format_conv_state_reserve(&shadow_plane_state->fmtcnv_state,
+						    pitch, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
 	ssd130x_state->buffer = kcalloc(pitch, ssd130x->height, GFP_KERNEL);
 	if (!ssd130x_state->buffer)
 		return -ENOMEM;
-- 
GitLab


From 78dfe8a0ef779159a6ff51231d71b3a65c55ccf5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 1 Nov 2023 11:35:42 +0100
Subject: [PATCH 0251/2340] drm: Remove struct drm_flip_task from DRM
 interfaces

Contain struct drm_flip_task and its helper functions
drm_flip_work_allocate_task() and drm_flip_work_queue_task() within
drm_flip_work.c There are no callers outside of the flip-work code.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101103618.23806-2-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_flip_work.c | 27 +++++++--------------------
 include/drm/drm_flip_work.h     | 18 ++----------------
 2 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c
index 060b753881a27..8c6090a90d564 100644
--- a/drivers/gpu/drm/drm_flip_work.c
+++ b/drivers/gpu/drm/drm_flip_work.c
@@ -27,14 +27,12 @@
 #include <drm/drm_print.h>
 #include <drm/drm_util.h>
 
-/**
- * drm_flip_work_allocate_task - allocate a flip-work task
- * @data: data associated to the task
- * @flags: allocator flags
- *
- * Allocate a drm_flip_task object and attach private data to it.
- */
-struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags)
+struct drm_flip_task {
+	struct list_head node;
+	void *data;
+};
+
+static struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags)
 {
 	struct drm_flip_task *task;
 
@@ -44,18 +42,8 @@ struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags)
 
 	return task;
 }
-EXPORT_SYMBOL(drm_flip_work_allocate_task);
 
-/**
- * drm_flip_work_queue_task - queue a specific task
- * @work: the flip-work
- * @task: the task to handle
- *
- * Queues task, that will later be run (passed back to drm_flip_func_t
- * func) on a work queue after drm_flip_work_commit() is called.
- */
-void drm_flip_work_queue_task(struct drm_flip_work *work,
-			      struct drm_flip_task *task)
+static void drm_flip_work_queue_task(struct drm_flip_work *work, struct drm_flip_task *task)
 {
 	unsigned long flags;
 
@@ -63,7 +51,6 @@ void drm_flip_work_queue_task(struct drm_flip_work *work,
 	list_add_tail(&task->node, &work->queued);
 	spin_unlock_irqrestore(&work->lock, flags);
 }
-EXPORT_SYMBOL(drm_flip_work_queue_task);
 
 /**
  * drm_flip_work_queue - queue work
diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
index 21c3d512d25c4..6be4ba6f35143 100644
--- a/include/drm/drm_flip_work.h
+++ b/include/drm/drm_flip_work.h
@@ -33,9 +33,8 @@
  *
  * Util to queue up work to run from work-queue context after flip/vblank.
  * Typically this can be used to defer unref of framebuffer's, cursor
- * bo's, etc until after vblank.  The APIs are all thread-safe.
- * Moreover, drm_flip_work_queue_task and drm_flip_work_queue can be called
- * in atomic context.
+ * bo's, etc until after vblank.  The APIs are all thread-safe. Moreover,
+ * drm_flip_work_queue can be called in atomic context.
  */
 
 struct drm_flip_work;
@@ -51,16 +50,6 @@ struct drm_flip_work;
  */
 typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val);
 
-/**
- * struct drm_flip_task - flip work task
- * @node: list entry element
- * @data: data to pass to &drm_flip_work.func
- */
-struct drm_flip_task {
-	struct list_head node;
-	void *data;
-};
-
 /**
  * struct drm_flip_work - flip work queue
  * @name: debug name
@@ -79,9 +68,6 @@ struct drm_flip_work {
 	spinlock_t lock;
 };
 
-struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags);
-void drm_flip_work_queue_task(struct drm_flip_work *work,
-			      struct drm_flip_task *task);
 void drm_flip_work_queue(struct drm_flip_work *work, void *val);
 void drm_flip_work_commit(struct drm_flip_work *work,
 		struct workqueue_struct *wq);
-- 
GitLab


From ce64630dca7026ed9dc880dcd005977f662c99fe Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 1 Nov 2023 11:35:43 +0100
Subject: [PATCH 0252/2340] drm: Fix flip-task docs

Say that drm_flip_work_commit() is safe to call in atomic context. Turn
the name into a hyperlink.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101103618.23806-3-tzimmermann@suse.de
---
 include/drm/drm_flip_work.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
index 6be4ba6f35143..1eef3283a109c 100644
--- a/include/drm/drm_flip_work.h
+++ b/include/drm/drm_flip_work.h
@@ -31,10 +31,10 @@
 /**
  * DOC: flip utils
  *
- * Util to queue up work to run from work-queue context after flip/vblank.
+ * Utility to queue up work to run from work-queue context after flip/vblank.
  * Typically this can be used to defer unref of framebuffer's, cursor
- * bo's, etc until after vblank.  The APIs are all thread-safe. Moreover,
- * drm_flip_work_queue can be called in atomic context.
+ * bo's, etc until after vblank. The APIs are all thread-safe. Moreover,
+ * drm_flip_work_commit() can be called in atomic context.
  */
 
 struct drm_flip_work;
-- 
GitLab


From 0c2287c9652150cf659408b66c1789830822132f Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Fri, 10 Nov 2023 15:40:10 +0530
Subject: [PATCH 0253/2340] drm/display/dp: Add helper function to get DSC bpp
 precision

Add helper to get the DSC bits_per_pixel precision for the DP sink.

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-2-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/display/drm_dp_helper.c | 27 +++++++++++++++++++++++++
 include/drm/display/drm_dp_helper.h     |  1 +
 2 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 64c151e4f78ca..ff8d67d0e9dc9 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -2325,6 +2325,33 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
 }
 EXPORT_SYMBOL(drm_dp_read_desc);
 
+/**
+ * drm_dp_dsc_sink_bpp_incr() - Get bits per pixel increment
+ * @dsc_dpcd: DSC capabilities from DPCD
+ *
+ * Returns the bpp precision supported by the DP sink.
+ */
+u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE])
+{
+	u8 bpp_increment_dpcd = dsc_dpcd[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT];
+
+	switch (bpp_increment_dpcd) {
+	case DP_DSC_BITS_PER_PIXEL_1_16:
+		return 16;
+	case DP_DSC_BITS_PER_PIXEL_1_8:
+		return 8;
+	case DP_DSC_BITS_PER_PIXEL_1_4:
+		return 4;
+	case DP_DSC_BITS_PER_PIXEL_1_2:
+		return 2;
+	case DP_DSC_BITS_PER_PIXEL_1_1:
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_dsc_sink_bpp_incr);
+
 /**
  * drm_dp_dsc_sink_max_slice_count() - Get the max slice count
  * supported by the DSC sink.
diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index 3c59f64d4a691..194715083399e 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -164,6 +164,7 @@ drm_dp_is_branch(const u8 dpcd[DP_RECEIVER_CAP_SIZE])
 }
 
 /* DP/eDP DSC support */
+u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]);
 u8 drm_dp_dsc_sink_max_slice_count(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE],
 				   bool is_edp);
 u8 drm_dp_dsc_sink_line_buf_depth(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]);
-- 
GitLab


From 59a266f068b4f9f54c58e4066ac9ee9023ad9232 Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Fri, 10 Nov 2023 15:40:11 +0530
Subject: [PATCH 0254/2340] drm/i915/display: Store compressed bpp in U6.4
 format

DSC parameter bits_per_pixel is stored in U6.4 format.
The 4 bits represent the fractional part of the bpp.
Currently we use compressed_bpp member of dsc structure to store
only the integral part of the bits_per_pixel.
To store the full bits_per_pixel along with the fractional part,
compressed_bpp is changed to store bpp in U6.4 formats. Intergral
part is retrieved by simply right shifting the member compressed_bpp by 4.

v2:
-Use to_bpp_int, to_bpp_frac_dec, to_bpp_x16 helpers while dealing
 with compressed bpp. (Suraj)
-Fix comment styling. (Suraj)

v3:
-Add separate file for 6.4 fixed point helper(Jani, Nikula)
-Add comment for magic values(Suraj)

v4:
-Fix checkpatch warnings caused by renaming(Suraj)

v5:
-Rebase.
-Use existing helpers for conversion of bpp_int to bpp_x16
 and vice versa.

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-3-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/i915/display/icl_dsi.c        | 10 +++----
 drivers/gpu/drm/i915/display/intel_audio.c    |  2 +-
 drivers/gpu/drm/i915/display/intel_bios.c     |  4 +--
 drivers/gpu/drm/i915/display/intel_cdclk.c    |  5 ++--
 drivers/gpu/drm/i915/display/intel_display.c  |  2 +-
 .../drm/i915/display/intel_display_types.h    |  3 ++-
 drivers/gpu/drm/i915/display/intel_dp.c       | 27 ++++++++++---------
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  2 +-
 drivers/gpu/drm/i915/display/intel_link_bw.c  |  2 +-
 drivers/gpu/drm/i915/display/intel_vdsc.c     |  4 +--
 10 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index c4585e445198d..481fcb6508503 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -330,7 +330,7 @@ static int afe_clk(struct intel_encoder *encoder,
 	int bpp;
 
 	if (crtc_state->dsc.compression_enable)
-		bpp = crtc_state->dsc.compressed_bpp;
+		bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16);
 	else
 		bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 
@@ -860,7 +860,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 	 * compressed and non-compressed bpp.
 	 */
 	if (crtc_state->dsc.compression_enable) {
-		mul = crtc_state->dsc.compressed_bpp;
+		mul = to_bpp_int(crtc_state->dsc.compressed_bpp_x16);
 		div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 	}
 
@@ -884,7 +884,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 		int bpp, line_time_us, byte_clk_period_ns;
 
 		if (crtc_state->dsc.compression_enable)
-			bpp = crtc_state->dsc.compressed_bpp;
+			bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16);
 		else
 			bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 
@@ -1451,8 +1451,8 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder,
 	struct drm_display_mode *adjusted_mode =
 					&pipe_config->hw.adjusted_mode;
 
-	if (pipe_config->dsc.compressed_bpp) {
-		int div = pipe_config->dsc.compressed_bpp;
+	if (pipe_config->dsc.compressed_bpp_x16) {
+		int div = to_bpp_int(pipe_config->dsc.compressed_bpp_x16);
 		int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
 
 		adjusted_mode->crtc_htotal =
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index 19605264a35c3..aa93ccd6c2aad 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -528,7 +528,7 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
 	h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
 	h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
 	pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock;
-	vdsc_bpp = crtc_state->dsc.compressed_bpp;
+	vdsc_bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16);
 	cdclk = i915->display.cdclk.hw.cdclk;
 	/* fec= 0.972261, using rounding multiplier of 1000000 */
 	fec_coeff = 972261;
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 719fb550342b5..2fd72b2fd109b 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -3414,8 +3414,8 @@ static void fill_dsc(struct intel_crtc_state *crtc_state,
 
 	crtc_state->pipe_bpp = bpc * 3;
 
-	crtc_state->dsc.compressed_bpp = min(crtc_state->pipe_bpp,
-					     VBT_DSC_MAX_BPP(dsc->max_bpp));
+	crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(min(crtc_state->pipe_bpp,
+							    VBT_DSC_MAX_BPP(dsc->max_bpp)));
 
 	/*
 	 * FIXME: This is ugly, and slice count should take DSC engine
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index c4839c67cb0f0..b93d1ad7936d5 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2598,8 +2598,9 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
 		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
 		 */
 		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
-		int min_cdclk_bj = (crtc_state->dsc.compressed_bpp * pixel_clock) /
-				   (2 * bigjoiner_interface_bits);
+		int min_cdclk_bj =
+			(to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) *
+			 pixel_clock) / (2 * bigjoiner_interface_bits);
 
 		min_cdclk = max(min_cdclk, min_cdclk_bj);
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 3effafcbb411a..b4a8e3087e508 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5434,7 +5434,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 	PIPE_CONF_CHECK_I(dsc.compression_enable);
 	PIPE_CONF_CHECK_I(dsc.dsc_split);
-	PIPE_CONF_CHECK_I(dsc.compressed_bpp);
+	PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16);
 
 	PIPE_CONF_CHECK_BOOL(splitter.enable);
 	PIPE_CONF_CHECK_I(splitter.link_count);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index d2a15817dee2c..779b18fc6c394 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1363,7 +1363,8 @@ struct intel_crtc_state {
 	struct {
 		bool compression_enable;
 		bool dsc_split;
-		u16 compressed_bpp;
+		/* Compressed Bpp in U6.4 format (first 4 bits for fractional part) */
+		u16 compressed_bpp_x16;
 		u8 slice_count;
 		struct drm_dsc_config config;
 	} dsc;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 03e520c432485..e341e4b155c9c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1885,7 +1885,8 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
 					      valid_dsc_bpp[i],
 					      timeslots);
 		if (ret == 0) {
-			pipe_config->dsc.compressed_bpp = valid_dsc_bpp[i];
+			pipe_config->dsc.compressed_bpp_x16 =
+				to_bpp_x16(valid_dsc_bpp[i]);
 			return 0;
 		}
 	}
@@ -1923,7 +1924,8 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp,
 					      compressed_bpp,
 					      timeslots);
 		if (ret == 0) {
-			pipe_config->dsc.compressed_bpp = compressed_bpp;
+			pipe_config->dsc.compressed_bpp_x16 =
+				to_bpp_x16(compressed_bpp);
 			return 0;
 		}
 	}
@@ -2120,7 +2122,8 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp,
 	/* Compressed BPP should be less than the Input DSC bpp */
 	dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
 
-	pipe_config->dsc.compressed_bpp = max(dsc_min_bpp, dsc_max_bpp);
+	pipe_config->dsc.compressed_bpp_x16 =
+		to_bpp_x16(max(dsc_min_bpp, dsc_max_bpp));
 
 	pipe_config->pipe_bpp = pipe_bpp;
 
@@ -2209,18 +2212,18 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 	ret = intel_dp_dsc_compute_params(connector, pipe_config);
 	if (ret < 0) {
 		drm_dbg_kms(&dev_priv->drm,
-			    "Cannot compute valid DSC parameters for Input Bpp = %d "
-			    "Compressed BPP = %d\n",
+			    "Cannot compute valid DSC parameters for Input Bpp = %d"
+			    "Compressed BPP = " BPP_X16_FMT "\n",
 			    pipe_config->pipe_bpp,
-			    pipe_config->dsc.compressed_bpp);
+			    BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16));
 		return ret;
 	}
 
 	pipe_config->dsc.compression_enable = true;
 	drm_dbg_kms(&dev_priv->drm, "DP DSC computed with Input Bpp = %d "
-		    "Compressed Bpp = %d Slice Count = %d\n",
+		    "Compressed Bpp = " BPP_X16_FMT " Slice Count = %d\n",
 		    pipe_config->pipe_bpp,
-		    pipe_config->dsc.compressed_bpp,
+		    BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16),
 		    pipe_config->dsc.slice_count);
 
 	return 0;
@@ -2393,15 +2396,15 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 
 	if (pipe_config->dsc.compression_enable) {
 		drm_dbg_kms(&i915->drm,
-			    "DP lane count %d clock %d Input bpp %d Compressed bpp %d\n",
+			    "DP lane count %d clock %d Input bpp %d Compressed bpp " BPP_X16_FMT "\n",
 			    pipe_config->lane_count, pipe_config->port_clock,
 			    pipe_config->pipe_bpp,
-			    pipe_config->dsc.compressed_bpp);
+			    BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16));
 
 		drm_dbg_kms(&i915->drm,
 			    "DP link rate required %i available %i\n",
 			    intel_dp_link_required(adjusted_mode->crtc_clock,
-						   pipe_config->dsc.compressed_bpp),
+						   to_bpp_int_roundup(pipe_config->dsc.compressed_bpp_x16)),
 			    intel_dp_max_data_rate(pipe_config->port_clock,
 						   pipe_config->lane_count));
 	} else {
@@ -2863,7 +2866,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		drm_dp_enhanced_frame_cap(intel_dp->dpcd);
 
 	if (pipe_config->dsc.compression_enable)
-		link_bpp = pipe_config->dsc.compressed_bpp;
+		link_bpp = to_bpp_int(pipe_config->dsc.compressed_bpp_x16);
 	else
 		link_bpp = intel_dp_output_bpp(pipe_config->output_format,
 					       pipe_config->pipe_bpp);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index b943dbf394a22..4db902dc6eb16 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -228,7 +228,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		if (!dsc)
 			crtc_state->pipe_bpp = bpp;
 		else
-			crtc_state->dsc.compressed_bpp = bpp;
+			crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(bpp);
 		drm_dbg_kms(&i915->drm, "Got %d slots for pipe bpp %d dsc %d\n", slots, bpp, dsc);
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c
index 390db5c0c24a5..02a0af2aa5bae 100644
--- a/drivers/gpu/drm/i915/display/intel_link_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_link_bw.c
@@ -70,7 +70,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state,
 			return PTR_ERR(crtc_state);
 
 		if (crtc_state->dsc.compression_enable)
-			link_bpp = crtc_state->dsc.compressed_bpp;
+			link_bpp = crtc_state->dsc.compressed_bpp_x16;
 		else
 			/*
 			 * TODO: for YUV420 the actual link bpp is only half
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 6757dbae9ee5e..3a1ed574edbbb 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -248,7 +248,7 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config)
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config;
-	u16 compressed_bpp = pipe_config->dsc.compressed_bpp;
+	u16 compressed_bpp = to_bpp_int(pipe_config->dsc.compressed_bpp_x16);
 	int err;
 	int ret;
 
@@ -874,7 +874,7 @@ static void intel_dsc_get_pps_config(struct intel_crtc_state *crtc_state)
 	if (vdsc_cfg->native_420)
 		vdsc_cfg->bits_per_pixel >>= 1;
 
-	crtc_state->dsc.compressed_bpp = vdsc_cfg->bits_per_pixel >> 4;
+	crtc_state->dsc.compressed_bpp_x16 = vdsc_cfg->bits_per_pixel;
 
 	/* PPS 2 */
 	pps_temp = intel_dsc_pps_read_and_verify(crtc_state, 2);
-- 
GitLab


From 87c8812f4b009b5a5d38b1560b45d4a1cc4b24c5 Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Fri, 10 Nov 2023 15:40:12 +0530
Subject: [PATCH 0255/2340] drm/i915/display: Consider fractional vdsc bpp
 while computing m_n values

MTL+ supports fractional compressed bits_per_pixel, with precision of
1/16. This compressed bpp is stored in U6.4 format.
Accommodate this precision while computing m_n values.

v1:
Replace the computation of 'data_clock' with 'data_clock =
DIV_ROUND_UP(data_clock, 16).' (Sui Jingfeng).

v2:
Rebase and pass bits_per_pixel in U6.4 format.

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-4-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c |  4 ++--
 drivers/gpu/drm/i915/display/intel_dp.c      | 16 ++++++++--------
 drivers/gpu/drm/i915/display/intel_dp_mst.c  | 14 +++++++-------
 drivers/gpu/drm/i915/display/intel_fdi.c     |  3 ++-
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index b4a8e3087e508..125903007a292 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2415,12 +2415,12 @@ add_bw_alloc_overhead(int link_clock, int bw_overhead,
 }
 
 void
-intel_link_compute_m_n(u16 bits_per_pixel, int nlanes,
+intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes,
 		       int pixel_clock, int link_clock,
 		       int bw_overhead,
 		       struct intel_link_m_n *m_n)
 {
-	u32 data_clock = bits_per_pixel * pixel_clock;
+	u32 data_clock = DIV_ROUND_UP(bits_per_pixel_x16 * pixel_clock, 16);
 	u32 data_m;
 	u32 data_n;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index e341e4b155c9c..d4118d25f39bd 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2688,7 +2688,7 @@ static bool can_enable_drrs(struct intel_connector *connector,
 static void
 intel_dp_drrs_compute_config(struct intel_connector *connector,
 			     struct intel_crtc_state *pipe_config,
-			     int link_bpp)
+			     int link_bpp_x16)
 {
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	const struct drm_display_mode *downclock_mode =
@@ -2713,7 +2713,7 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
 	if (pipe_config->splitter.enable)
 		pixel_clock /= pipe_config->splitter.link_count;
 
-	intel_link_compute_m_n(link_bpp, pipe_config->lane_count, pixel_clock,
+	intel_link_compute_m_n(link_bpp_x16, pipe_config->lane_count, pixel_clock,
 			       pipe_config->port_clock,
 			       intel_dp_bw_fec_overhead(pipe_config->fec_enable),
 			       &pipe_config->dp_m2_n2);
@@ -2817,7 +2817,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	const struct drm_display_mode *fixed_mode;
 	struct intel_connector *connector = intel_dp->attached_connector;
-	int ret = 0, link_bpp;
+	int ret = 0, link_bpp_x16;
 
 	if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && encoder->port != PORT_A)
 		pipe_config->has_pch_encoder = true;
@@ -2866,10 +2866,10 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		drm_dp_enhanced_frame_cap(intel_dp->dpcd);
 
 	if (pipe_config->dsc.compression_enable)
-		link_bpp = to_bpp_int(pipe_config->dsc.compressed_bpp_x16);
+		link_bpp_x16 = pipe_config->dsc.compressed_bpp_x16;
 	else
-		link_bpp = intel_dp_output_bpp(pipe_config->output_format,
-					       pipe_config->pipe_bpp);
+		link_bpp_x16 = to_bpp_x16(intel_dp_output_bpp(pipe_config->output_format,
+							      pipe_config->pipe_bpp));
 
 	if (intel_dp->mso_link_count) {
 		int n = intel_dp->mso_link_count;
@@ -2893,7 +2893,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 
 	intel_dp_audio_compute_config(encoder, pipe_config, conn_state);
 
-	intel_link_compute_m_n(link_bpp,
+	intel_link_compute_m_n(link_bpp_x16,
 			       pipe_config->lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
@@ -2909,7 +2909,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 
 	intel_vrr_compute_config(pipe_config, conn_state);
 	intel_psr_compute_config(intel_dp, pipe_config, conn_state);
-	intel_dp_drrs_compute_config(connector, pipe_config, link_bpp);
+	intel_dp_drrs_compute_config(connector, pipe_config, link_bpp_x16);
 	intel_dp_compute_vsc_sdp(intel_dp, pipe_config, conn_state);
 	intel_dp_compute_hdr_metadata_infoframe_sdp(intel_dp, pipe_config, conn_state);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 4db902dc6eb16..0cb9405f59eaa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -71,7 +71,7 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp
 
 static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state,
 				    const struct intel_connector *connector,
-				    bool ssc, bool dsc, int bpp)
+				    bool ssc, bool dsc, int bpp_x16)
 {
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
@@ -95,7 +95,7 @@ static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state,
 	overhead = drm_dp_bw_overhead(crtc_state->lane_count,
 				      adjusted_mode->hdisplay,
 				      dsc_slice_count,
-				      to_bpp_x16(bpp),
+				      bpp_x16,
 				      flags);
 
 	/*
@@ -108,16 +108,16 @@ static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state,
 static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state,
 				     const struct intel_connector *connector,
 				     bool ssc, bool dsc,
-				     int bpp,
+				     int bpp_x16,
 				     struct intel_link_m_n *m_n)
 {
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
 	int overhead = intel_dp_mst_bw_overhead(crtc_state,
 						connector,
-						ssc, dsc, bpp);
+						ssc, dsc, bpp_x16);
 
-	intel_link_compute_m_n(bpp, crtc_state->lane_count,
+	intel_link_compute_m_n(bpp_x16, crtc_state->lane_count,
 			       adjusted_mode->crtc_clock,
 			       crtc_state->port_clock,
 			       overhead,
@@ -181,9 +181,9 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		link_bpp = dsc ? bpp :
 			intel_dp_output_bpp(crtc_state->output_format, bpp);
 
-		intel_dp_mst_compute_m_n(crtc_state, connector, false, dsc, link_bpp,
+		intel_dp_mst_compute_m_n(crtc_state, connector, false, dsc, to_bpp_x16(link_bpp),
 					 &crtc_state->dp_m_n);
-		intel_dp_mst_compute_m_n(crtc_state, connector, true, dsc, link_bpp,
+		intel_dp_mst_compute_m_n(crtc_state, connector, true, dsc, to_bpp_x16(link_bpp),
 					 &remote_m_n);
 
 		/*
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c
index 1d87fbc1e8138..295a0f24ebbf7 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -339,7 +339,8 @@ int ilk_fdi_compute_config(struct intel_crtc *crtc,
 
 	pipe_config->fdi_lanes = lane;
 
-	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
+	intel_link_compute_m_n(to_bpp_x16(pipe_config->pipe_bpp),
+			       lane, fdi_dotclock,
 			       link_bw,
 			       intel_dp_bw_fec_overhead(false),
 			       &pipe_config->fdi_m_n);
-- 
GitLab


From 08fcb5ab7b32848b1852145baf89007a3e3c28b9 Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Fri, 10 Nov 2023 15:40:13 +0530
Subject: [PATCH 0256/2340] drm/i915/audio: Consider fractional vdsc bpp while
 computing tu_data

MTL+ supports fractional compressed bits_per_pixel, with precision of
1/16. This compressed bpp is stored in U6.4 format.
Accommodate the precision during calculation of transfer unit data
for hblank_early calculation.

v2:
-Fix tu_data calculation while dealing with U6.4 format. (Stan)

v3:
-Use BPP_X16_FMT to print vdsc bpp.

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-5-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/i915/display/intel_audio.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index aa93ccd6c2aad..8796d90c46a6b 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -521,25 +521,25 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
 	unsigned int link_clks_available, link_clks_required;
 	unsigned int tu_data, tu_line, link_clks_active;
 	unsigned int h_active, h_total, hblank_delta, pixel_clk;
-	unsigned int fec_coeff, cdclk, vdsc_bpp;
+	unsigned int fec_coeff, cdclk, vdsc_bppx16;
 	unsigned int link_clk, lanes;
 	unsigned int hblank_rise;
 
 	h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
 	h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
 	pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock;
-	vdsc_bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16);
+	vdsc_bppx16 = crtc_state->dsc.compressed_bpp_x16;
 	cdclk = i915->display.cdclk.hw.cdclk;
 	/* fec= 0.972261, using rounding multiplier of 1000000 */
 	fec_coeff = 972261;
 	link_clk = crtc_state->port_clock;
 	lanes = crtc_state->lane_count;
 
-	drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :"
-		    "lanes = %u vdsc_bpp = %u cdclk = %u\n",
-		    h_active, link_clk, lanes, vdsc_bpp, cdclk);
+	drm_dbg_kms(&i915->drm,
+		    "h_active = %u link_clk = %u : lanes = %u vdsc_bpp = " BPP_X16_FMT " cdclk = %u\n",
+		    h_active, link_clk, lanes, BPP_X16_ARGS(vdsc_bppx16), cdclk);
 
-	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
+	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk))
 		return 0;
 
 	link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
@@ -551,8 +551,8 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
 		hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk),
 						  mul_u32_u32(link_clk, cdclk));
 
-	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
-			    mul_u32_u32(link_clk * lanes, fec_coeff));
+	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 1000000),
+			    mul_u32_u32(link_clk * lanes * 16, fec_coeff));
 	tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
 			    mul_u32_u32(64 * pixel_clk, 1000000));
 	link_clks_active  = (tu_line - 1) * 64 + tu_data;
-- 
GitLab


From 2df50cb46a4c64107e7a70e8b00e7ffc0806b5a3 Mon Sep 17 00:00:00 2001
From: Vandita Kulkarni <vandita.kulkarni@intel.com>
Date: Fri, 10 Nov 2023 15:40:14 +0530
Subject: [PATCH 0257/2340] drm/i915/dsc/mtl: Add support for fractional bpp

Consider the fractional bpp while reading the qp values.

v2: Use helpers for fractional, integral bits of bits_per_pixel. (Suraj)

Signed-off-by: Vandita Kulkarni <vandita.kulkarni@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-6-ankit.k.nautiyal@intel.com
---
 .../gpu/drm/i915/display/intel_qp_tables.c    |  3 ---
 drivers/gpu/drm/i915/display/intel_vdsc.c     | 25 +++++++++++++++----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_qp_tables.c b/drivers/gpu/drm/i915/display/intel_qp_tables.c
index 543cdc46aa1df..600c815e37e4e 100644
--- a/drivers/gpu/drm/i915/display/intel_qp_tables.c
+++ b/drivers/gpu/drm/i915/display/intel_qp_tables.c
@@ -34,9 +34,6 @@
  * These qp tables are as per the C model
  * and it has the rows pointing to bpps which increment
  * in steps of 0.5
- * We do not support fractional bpps as of today,
- * hence we would skip the fractional bpps during
- * our references for qp calclulations.
  */
 static const u8 rc_range_minqp444_8bpc[DSC_NUM_BUF_RANGES][RC_RANGE_QP444_8BPC_MAX_NUM_BPP] = {
 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 3a1ed574edbbb..5f2fb702e367e 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -77,8 +77,8 @@ intel_vdsc_set_min_max_qp(struct drm_dsc_config *vdsc_cfg, int buf,
 static void
 calculate_rc_params(struct drm_dsc_config *vdsc_cfg)
 {
+	int bpp = to_bpp_int(vdsc_cfg->bits_per_pixel);
 	int bpc = vdsc_cfg->bits_per_component;
-	int bpp = vdsc_cfg->bits_per_pixel >> 4;
 	int qp_bpc_modifier = (bpc - 8) * 2;
 	int uncompressed_bpg_rate;
 	int first_line_bpg_offset;
@@ -148,7 +148,13 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg)
 		static const s8 ofs_und8[] = {
 			10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12
 		};
-
+		/*
+		 * For 420 format since bits_per_pixel (bpp) is set to target bpp * 2,
+		 * QP table values for target bpp 4.0 to 4.4375 (rounded to 4.0) are
+		 * actually for bpp 8 to 8.875 (rounded to 4.0 * 2 i.e 8).
+		 * Similarly values for target bpp 4.5 to 4.8375 (rounded to 4.5)
+		 * are for bpp 9 to 9.875 (rounded to 4.5 * 2 i.e 9), and so on.
+		 */
 		bpp_i  = bpp - 8;
 		for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) {
 			u8 range_bpg_offset;
@@ -178,6 +184,9 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg)
 				range_bpg_offset & DSC_RANGE_BPG_OFFSET_MASK;
 		}
 	} else {
+		/* fractional bpp part * 10000 (for precision up to 4 decimal places) */
+		int fractional_bits = to_bpp_frac(vdsc_cfg->bits_per_pixel);
+
 		static const s8 ofs_und6[] = {
 			0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12
 		};
@@ -191,7 +200,14 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg)
 			10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12
 		};
 
-		bpp_i  = (2 * (bpp - 6));
+		/*
+		 * QP table rows have values in increment of 0.5.
+		 * So 6.0 bpp to 6.4375 will have index 0, 6.5 to 6.9375 will have index 1,
+		 * and so on.
+		 * 0.5 fractional part with 4 decimal precision becomes 5000
+		 */
+		bpp_i  = ((bpp - 6) + (fractional_bits < 5000 ? 0 : 1));
+
 		for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) {
 			u8 range_bpg_offset;
 
@@ -279,8 +295,7 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config)
 	/* Gen 11 does not support VBR */
 	vdsc_cfg->vbr_enable = false;
 
-	/* Gen 11 only supports integral values of bpp */
-	vdsc_cfg->bits_per_pixel = compressed_bpp << 4;
+	vdsc_cfg->bits_per_pixel = pipe_config->dsc.compressed_bpp_x16;
 
 	/*
 	 * According to DSC 1.2 specs in Section 4.1 if native_420 is set
-- 
GitLab


From dc59990efda0bc785a3c26c41880cc513f9ed09f Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Fri, 10 Nov 2023 15:40:15 +0530
Subject: [PATCH 0258/2340] drm/i915/dp: Iterate over output bpp with
 fractional step size

This patch adds support to iterate over compressed output bpp as per the
fractional step, supported by DP sink.

v2:
-Avoid ending up with compressed bpp, same as pipe bpp. (Stan)

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-7-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 41 +++++++++++++++----------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index d4118d25f39bd..23686c3913e7d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1737,15 +1737,15 @@ static bool intel_dp_dsc_supports_format(const struct intel_connector *connector
 	return drm_dp_dsc_sink_supports_format(connector->dp.dsc_dpcd, sink_dsc_format);
 }
 
-static bool is_bw_sufficient_for_dsc_config(u16 compressed_bpp, u32 link_clock,
+static bool is_bw_sufficient_for_dsc_config(u16 compressed_bppx16, u32 link_clock,
 					    u32 lane_count, u32 mode_clock,
 					    enum intel_output_format output_format,
 					    int timeslots)
 {
 	u32 available_bw, required_bw;
 
-	available_bw = (link_clock * lane_count * timeslots)  / 8;
-	required_bw = compressed_bpp * (intel_dp_mode_to_fec_clock(mode_clock));
+	available_bw = (link_clock * lane_count * timeslots * 16)  / 8;
+	required_bw = compressed_bppx16 * (intel_dp_mode_to_fec_clock(mode_clock));
 
 	return available_bw > required_bw;
 }
@@ -1753,7 +1753,7 @@ static bool is_bw_sufficient_for_dsc_config(u16 compressed_bpp, u32 link_clock,
 static int dsc_compute_link_config(struct intel_dp *intel_dp,
 				   struct intel_crtc_state *pipe_config,
 				   struct link_config_limits *limits,
-				   u16 compressed_bpp,
+				   u16 compressed_bppx16,
 				   int timeslots)
 {
 	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
@@ -1768,8 +1768,8 @@ static int dsc_compute_link_config(struct intel_dp *intel_dp,
 		for (lane_count = limits->min_lane_count;
 		     lane_count <= limits->max_lane_count;
 		     lane_count <<= 1) {
-			if (!is_bw_sufficient_for_dsc_config(compressed_bpp, link_rate, lane_count,
-							     adjusted_mode->clock,
+			if (!is_bw_sufficient_for_dsc_config(compressed_bppx16, link_rate,
+							     lane_count, adjusted_mode->clock,
 							     pipe_config->output_format,
 							     timeslots))
 				continue;
@@ -1882,7 +1882,7 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
 		ret = dsc_compute_link_config(intel_dp,
 					      pipe_config,
 					      limits,
-					      valid_dsc_bpp[i],
+					      valid_dsc_bpp[i] << 4,
 					      timeslots);
 		if (ret == 0) {
 			pipe_config->dsc.compressed_bpp_x16 =
@@ -1902,6 +1902,7 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
  */
 static int
 xelpd_dsc_compute_link_config(struct intel_dp *intel_dp,
+			      const struct intel_connector *connector,
 			      struct intel_crtc_state *pipe_config,
 			      struct link_config_limits *limits,
 			      int dsc_max_bpp,
@@ -1909,23 +1910,31 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp,
 			      int pipe_bpp,
 			      int timeslots)
 {
-	u16 compressed_bpp;
+	u8 bppx16_incr = drm_dp_dsc_sink_bpp_incr(connector->dp.dsc_dpcd);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	u16 compressed_bppx16;
+	u8 bppx16_step;
 	int ret;
 
+	if (DISPLAY_VER(i915) < 14 || bppx16_incr <= 1)
+		bppx16_step = 16;
+	else
+		bppx16_step = 16 / bppx16_incr;
+
 	/* Compressed BPP should be less than the Input DSC bpp */
-	dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
+	dsc_max_bpp = min(dsc_max_bpp << 4, (pipe_bpp << 4) - bppx16_step);
+	dsc_min_bpp = dsc_min_bpp << 4;
 
-	for (compressed_bpp = dsc_max_bpp;
-	     compressed_bpp >= dsc_min_bpp;
-	     compressed_bpp--) {
+	for (compressed_bppx16 = dsc_max_bpp;
+	     compressed_bppx16 >= dsc_min_bpp;
+	     compressed_bppx16 -= bppx16_step) {
 		ret = dsc_compute_link_config(intel_dp,
 					      pipe_config,
 					      limits,
-					      compressed_bpp,
+					      compressed_bppx16,
 					      timeslots);
 		if (ret == 0) {
-			pipe_config->dsc.compressed_bpp_x16 =
-				to_bpp_x16(compressed_bpp);
+			pipe_config->dsc.compressed_bpp_x16 = compressed_bppx16;
 			return 0;
 		}
 	}
@@ -1963,7 +1972,7 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp,
 	dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16));
 
 	if (DISPLAY_VER(i915) >= 13)
-		return xelpd_dsc_compute_link_config(intel_dp, pipe_config, limits,
+		return xelpd_dsc_compute_link_config(intel_dp, connector, pipe_config, limits,
 						     dsc_max_bpp, dsc_min_bpp, pipe_bpp, timeslots);
 	return icl_dsc_compute_link_config(intel_dp, pipe_config, limits,
 					   dsc_max_bpp, dsc_min_bpp, pipe_bpp, timeslots);
-- 
GitLab


From 680c1e31a59b223d677a22b508017d26b71a636a Mon Sep 17 00:00:00 2001
From: Swati Sharma <swati2.sharma@intel.com>
Date: Fri, 10 Nov 2023 15:40:16 +0530
Subject: [PATCH 0259/2340] drm/i915/dsc: Add debugfs entry to validate DSC
 fractional bpp

DSC_Sink_BPP_Precision entry is added to i915_dsc_fec_support_show
to depict sink's precision.
Also, new debugfs entry is created to enforce fractional bpp.
If Force_DSC_Fractional_BPP_en is set then while iterating over
output bpp with fractional step size we will continue if output_bpp is
computed as integer. With this approach, we will be able to validate
DSC with fractional bpp.

v2:
Add drm_modeset_unlock to new line(Suraj)

Signed-off-by: Swati Sharma <swati2.sharma@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-8-ankit.k.nautiyal@intel.com
---
 .../drm/i915/display/intel_display_debugfs.c  | 84 +++++++++++++++++++
 .../drm/i915/display/intel_display_types.h    |  1 +
 2 files changed, 85 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 42bd352f9fee7..a7675a7339c34 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -1257,6 +1257,8 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 								      DP_DSC_YCbCr420_Native)),
 			   str_yes_no(drm_dp_dsc_sink_supports_format(connector->dp.dsc_dpcd,
 								      DP_DSC_YCbCr444)));
+		seq_printf(m, "DSC_Sink_BPP_Precision: %d\n",
+			   drm_dp_dsc_sink_bpp_incr(connector->dp.dsc_dpcd));
 		seq_printf(m, "Force_DSC_Enable: %s\n",
 			   str_yes_no(intel_dp->force_dsc_en));
 		if (!intel_dp_is_edp(intel_dp))
@@ -1449,6 +1451,85 @@ static const struct file_operations i915_dsc_output_format_fops = {
 	.write = i915_dsc_output_format_write
 };
 
+static int i915_dsc_fractional_bpp_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct drm_device *dev = connector->dev;
+	struct drm_crtc *crtc;
+	struct intel_dp *intel_dp;
+	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct intel_encoder *encoder = intel_attached_encoder(intel_connector);
+	int ret;
+
+	if (!encoder)
+		return -ENODEV;
+
+	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	if (ret)
+		return ret;
+
+	crtc = connector->state->crtc;
+	if (connector->status != connector_status_connected || !crtc) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	intel_dp = intel_attached_dp(intel_connector);
+	seq_printf(m, "Force_DSC_Fractional_BPP_Enable: %s\n",
+		   str_yes_no(intel_dp->force_dsc_fractional_bpp_en));
+
+out:
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+	return ret;
+}
+
+static ssize_t i915_dsc_fractional_bpp_write(struct file *file,
+					     const char __user *ubuf,
+					     size_t len, loff_t *offp)
+{
+	struct drm_connector *connector =
+		((struct seq_file *)file->private_data)->private;
+	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	bool dsc_fractional_bpp_enable = false;
+	int ret;
+
+	if (len == 0)
+		return 0;
+
+	drm_dbg(&i915->drm,
+		"Copied %zu bytes from user to force fractional bpp for DSC\n", len);
+
+	ret = kstrtobool_from_user(ubuf, len, &dsc_fractional_bpp_enable);
+	if (ret < 0)
+		return ret;
+
+	drm_dbg(&i915->drm, "Got %s for DSC Fractional BPP Enable\n",
+		(dsc_fractional_bpp_enable) ? "true" : "false");
+	intel_dp->force_dsc_fractional_bpp_en = dsc_fractional_bpp_enable;
+
+	*offp += len;
+
+	return len;
+}
+
+static int i915_dsc_fractional_bpp_open(struct inode *inode,
+					struct file *file)
+{
+	return single_open(file, i915_dsc_fractional_bpp_show, inode->i_private);
+}
+
+static const struct file_operations i915_dsc_fractional_bpp_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_dsc_fractional_bpp_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.write = i915_dsc_fractional_bpp_write
+};
+
 /*
  * Returns the Current CRTC's bpc.
  * Example usage: cat /sys/kernel/debug/dri/0/crtc-0/i915_current_bpc
@@ -1526,6 +1607,9 @@ void intel_connector_debugfs_add(struct intel_connector *intel_connector)
 
 		debugfs_create_file("i915_dsc_output_format", 0644, root,
 				    connector, &i915_dsc_output_format_fops);
+
+		debugfs_create_file("i915_dsc_fractional_bpp", 0644, root,
+				    connector, &i915_dsc_fractional_bpp_fops);
 	}
 
 	if (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 779b18fc6c394..9a44350ba05dd 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1815,6 +1815,7 @@ struct intel_dp {
 	/* Display stream compression testing */
 	bool force_dsc_en;
 	int force_dsc_output_format;
+	bool force_dsc_fractional_bpp_en;
 	int force_dsc_bpc;
 
 	bool hobl_failed;
-- 
GitLab


From 5bbdcc86a481d82433e0905a548335bd3683eadf Mon Sep 17 00:00:00 2001
From: Swati Sharma <swati2.sharma@intel.com>
Date: Fri, 10 Nov 2023 15:40:17 +0530
Subject: [PATCH 0260/2340] drm/i915/dsc: Allow DSC only with fractional bpp
 when forced from debugfs

If force_dsc_fractional_bpp_en is set through debugfs allow DSC iff
compressed bpp is fractional. Continue if the computed compressed bpp
turns out to be a integer.

v2:
-Use helpers for fractional, integral bits of bits_per_pixel. (Suraj)
-Fix comment (Suraj)

Signed-off-by: Swati Sharma <swati2.sharma@intel.com>
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-9-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 23686c3913e7d..effb3da291e15 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1928,6 +1928,9 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp,
 	for (compressed_bppx16 = dsc_max_bpp;
 	     compressed_bppx16 >= dsc_min_bpp;
 	     compressed_bppx16 -= bppx16_step) {
+		if (intel_dp->force_dsc_fractional_bpp_en &&
+		    !to_bpp_frac(compressed_bppx16))
+			continue;
 		ret = dsc_compute_link_config(intel_dp,
 					      pipe_config,
 					      limits,
@@ -1935,6 +1938,10 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp,
 					      timeslots);
 		if (ret == 0) {
 			pipe_config->dsc.compressed_bpp_x16 = compressed_bppx16;
+			if (intel_dp->force_dsc_fractional_bpp_en &&
+			    to_bpp_frac(compressed_bppx16))
+				drm_dbg_kms(&i915->drm, "Forcing DSC fractional bpp\n");
+
 			return 0;
 		}
 	}
-- 
GitLab


From a6865fe6fd784a8edec6bd6d396f8c054ade0de8 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Mon, 13 Nov 2023 11:37:37 +0200
Subject: [PATCH 0261/2340] drm/i915/display: Use int for entry setup frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At least one TGL had regression when using u8 types
for entry setup frames calculation. So, let's switch
to use ints instead.

intel_psr_entry_setup_frames() function expects
to return u8 but since in case of error the error
code -ETIME is returned. This doesn't fit into u8
and hence the return value is not as expected.

Fixes: 2b981d57e480 ("drm/i915/display: Support PSR entry VSC packet to be transmitted one frame earlier")
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113093737.358137-1-mika.kahola@intel.com
---
 drivers/gpu/drm/i915/display/intel_psr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index ea292832ca475..2fc89ac5b27a9 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1141,12 +1141,12 @@ static bool _compute_psr2_wake_times(struct intel_dp *intel_dp,
 	return true;
 }
 
-static u8 intel_psr_entry_setup_frames(struct intel_dp *intel_dp,
-				       const struct drm_display_mode *adjusted_mode)
+static int intel_psr_entry_setup_frames(struct intel_dp *intel_dp,
+					const struct drm_display_mode *adjusted_mode)
 {
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	int psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd);
-	u8 entry_setup_frames = 0;
+	int entry_setup_frames = 0;
 
 	if (psr_setup_time < 0) {
 		drm_dbg_kms(&i915->drm,
-- 
GitLab


From dd99d5b1ab93e7b731dda3d39cc7caf4639f8652 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 13 Nov 2023 16:20:51 +0200
Subject: [PATCH 0262/2340] drm/i915/dp: Tune down FEC detection timeout error
 message

At least a Realtek DP branch device with the

OUI 00-e0-4c dev-ID Dp1.4 HW-rev 1.0 SW-rev 131.1

device identification doesn't report detecting the FEC decoding start
symbol. Tune down the corresponding error to a debug message.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113142051.258864-1-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 0712a5200ad35..f70af660dfcfa 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2257,8 +2257,8 @@ static void wait_for_fec_detected(struct drm_dp_aux *aux, bool enabled)
 		return;
 
 	if (err == -ETIMEDOUT)
-		drm_err(&i915->drm, "Timeout waiting for FEC %s to get detected\n",
-			str_enabled_disabled(enabled));
+		drm_dbg_kms(&i915->drm, "Timeout waiting for FEC %s to get detected\n",
+			    str_enabled_disabled(enabled));
 	else
 		drm_dbg_kms(&i915->drm, "FEC detected status read error: %d\n", status);
 }
-- 
GitLab


From 2e122362d25e1b977aa5c5c88c03956be4228e02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 24 Oct 2023 13:07:10 +0200
Subject: [PATCH 0263/2340] iosys-map: Rename locals used inside macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Widely used variable names can be used by macro users, potentially
leading to name collisions.
Suffix locals used inside the macros with an underscore, to reduce the
collision potential.

Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231024110710.3039807-1-michal.winiarski@intel.com
---
 include/linux/iosys-map.h | 44 +++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
index 1b06d074ade03..e3649a6563dd8 100644
--- a/include/linux/iosys-map.h
+++ b/include/linux/iosys-map.h
@@ -168,9 +168,9 @@ struct iosys_map {
  * about the use of uninitialized variable.
  */
 #define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({				\
-	struct iosys_map copy = *map_;					\
-	iosys_map_incr(&copy, offset_);					\
-	copy;								\
+	struct iosys_map copy_ = *map_;					\
+	iosys_map_incr(&copy_, offset_);				\
+	copy_;								\
 })
 
 /**
@@ -391,14 +391,14 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
  * Returns:
  * The value read from the mapping.
  */
-#define iosys_map_rd(map__, offset__, type__) ({				\
-	type__ val;								\
-	if ((map__)->is_iomem) {						\
-		__iosys_map_rd_io(val, (map__)->vaddr_iomem + (offset__), type__);\
-	} else {								\
-		__iosys_map_rd_sys(val, (map__)->vaddr + (offset__), type__);	\
-	}									\
-	val;									\
+#define iosys_map_rd(map__, offset__, type__) ({					\
+	type__ val_;									\
+	if ((map__)->is_iomem) {							\
+		__iosys_map_rd_io(val_, (map__)->vaddr_iomem + (offset__), type__);	\
+	} else {									\
+		__iosys_map_rd_sys(val_, (map__)->vaddr + (offset__), type__);		\
+	}										\
+	val_;										\
 })
 
 /**
@@ -413,13 +413,13 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
  * or if pointer may be unaligned (and problematic for the architecture
  * supported), use iosys_map_memcpy_to()
  */
-#define iosys_map_wr(map__, offset__, type__, val__) ({				\
-	type__ val = (val__);							\
-	if ((map__)->is_iomem) {						\
-		__iosys_map_wr_io(val, (map__)->vaddr_iomem + (offset__), type__);\
-	} else {								\
-		__iosys_map_wr_sys(val, (map__)->vaddr + (offset__), type__);	\
-	}									\
+#define iosys_map_wr(map__, offset__, type__, val__) ({					\
+	type__ val_ = (val__);								\
+	if ((map__)->is_iomem) {							\
+		__iosys_map_wr_io(val_, (map__)->vaddr_iomem + (offset__), type__);	\
+	} else {									\
+		__iosys_map_wr_sys(val_, (map__)->vaddr + (offset__), type__);		\
+	}										\
 })
 
 /**
@@ -485,9 +485,9 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
  * The value read from the mapping.
  */
 #define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({	\
-	struct_type__ *s;							\
+	struct_type__ *s_;							\
 	iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__),	\
-		     typeof(s->field__));					\
+		     typeof(s_->field__));					\
 })
 
 /**
@@ -508,9 +508,9 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
  * usage and memory layout.
  */
 #define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({	\
-	struct_type__ *s;								\
+	struct_type__ *s_;								\
 	iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__),		\
-		     typeof(s->field__), val__);					\
+		     typeof(s_->field__), val__);					\
 })
 
 #endif /* __IOSYS_MAP_H__ */
-- 
GitLab


From 34b98a5f7a185c19715cc98c57d7e27b4785dfdf Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date: Sun, 12 Nov 2023 01:42:36 +0300
Subject: [PATCH 0264/2340] drm/virtio: Fix return value for
 VIRTGPU_CONTEXT_PARAM_DEBUG_NAME

The strncpy_from_user() returns number of copied bytes and not zero on
success. The non-zero return value of ioctl is treated as error. Return
zero on success instead of the number of copied bytes.

Fixes: 7add80126bce ("drm/uapi: add explicit virtgpu context debug name")
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Reviewed-by: Gurchetan Singh <gurchetansingh@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231111224236.890431-1-dmitry.osipenko@collabora.com
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 1e2042419f957..e4f76f3155504 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -665,6 +665,7 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev,
 				goto out_unlock;
 
 			vfpriv->explicit_debug_name = true;
+			ret = 0;
 			break;
 		default:
 			ret = -EINVAL;
-- 
GitLab


From 8a4353d077788b4efb11beb8c4e3869ea7aeaff7 Mon Sep 17 00:00:00 2001
From: Vinod Govindapillai <vinod.govindapillai@intel.com>
Date: Sat, 11 Nov 2023 13:43:20 +0200
Subject: [PATCH 0265/2340] drm/i915/xe2lpd: implement WA for underruns while
 enabling FBC

FIFO underruns are observed when FBC is enabled on plane 2 or
plane 3. Recommended WA is to update the FBC enabling sequence.
The plane binding register bits need to be updated separately
before programming the FBC enable bit.

Bspec: 74151
Reviewed-by: Mika Kahola <mika.kahola@intel.com> #v3
Signed-off-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231111114320.87277-2-vinod.govindapillai@intel.com
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index bde12fe622750..b73cf1c5ba33d 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -608,6 +608,7 @@ static u32 ivb_dpfc_ctl(struct intel_fbc *fbc)
 static void ivb_fbc_activate(struct intel_fbc *fbc)
 {
 	struct drm_i915_private *i915 = fbc->i915;
+	u32 dpfc_ctl;
 
 	if (DISPLAY_VER(i915) >= 10)
 		glk_fbc_program_cfb_stride(fbc);
@@ -617,8 +618,13 @@ static void ivb_fbc_activate(struct intel_fbc *fbc)
 	if (intel_gt_support_legacy_fencing(to_gt(i915)))
 		snb_fbc_program_fence(fbc);
 
+	/* wa_14019417088 Alternative WA*/
+	dpfc_ctl = ivb_dpfc_ctl(fbc);
+	if (DISPLAY_VER(i915) >= 20)
+		intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl);
+
 	intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id),
-		       DPFC_CTL_EN | ivb_dpfc_ctl(fbc));
+		       DPFC_CTL_EN | dpfc_ctl);
 }
 
 static bool ivb_fbc_is_compressing(struct intel_fbc *fbc)
-- 
GitLab


From f740f031cce7703a966ad0279d0f15973d61df16 Mon Sep 17 00:00:00 2001
From: Marco Pagani <marpagan@redhat.com>
Date: Wed, 15 Nov 2023 11:35:36 +0100
Subject: [PATCH 0266/2340] drm/test: rearrange test entries in Kconfig and
 Makefile

Rearrange entries in Kconfig and Makefile alphabetically to make room
for additional KUnit test suites.

Signed-off-by: Marco Pagani <marpagan@redhat.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115103537.220760-1-marpagan@redhat.com
---
 drivers/gpu/drm/Kconfig        | 10 +++++-----
 drivers/gpu/drm/tests/Makefile |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 3eee8636f847a..cdbc56e076498 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -75,15 +75,15 @@ config DRM_KUNIT_TEST_HELPERS
 config DRM_KUNIT_TEST
 	tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
 	depends on DRM && KUNIT
-	select PRIME_NUMBERS
+	select DRM_BUDDY
 	select DRM_DISPLAY_DP_HELPER
 	select DRM_DISPLAY_HELPER
-	select DRM_LIB_RANDOM
-	select DRM_KMS_HELPER
-	select DRM_BUDDY
+	select DRM_EXEC
 	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_KMS_HELPER
 	select DRM_KUNIT_TEST_HELPERS
-	select DRM_EXEC
+	select DRM_LIB_RANDOM
+	select PRIME_NUMBERS
 	default KUNIT_ALL_TESTS
 	help
 	  This builds unit tests for DRM. This option is not useful for
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index ba7baa6226751..2645af241ff0b 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_connector_test.o \
 	drm_damage_helper_test.o \
 	drm_dp_mst_helper_test.o \
+	drm_exec_test.o \
 	drm_format_helper_test.o \
 	drm_format_test.o \
 	drm_framebuffer_test.o \
@@ -17,7 +18,6 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_modes_test.o \
 	drm_plane_helper_test.o \
 	drm_probe_helper_test.o \
-	drm_rect_test.o	\
-	drm_exec_test.o
+	drm_rect_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
-- 
GitLab


From 312292a4ee19dddcbc7cf58349596b6a7e39fcd0 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 2 Nov 2023 14:08:06 +0100
Subject: [PATCH 0267/2340] drm/client: Do not acquire module reference

Do not acquire a reference on the module that provides a client's
callback functions in drm_client_init(). The additional reference
prevents the user from unloading the callback functions' module and
thus creating dangling pointers.

This is only necessary if there is no direct dependency between the
caller of drm_client_init() and the provider of the callbacks in
struct drm_client_funcs. If this case ever existed, it has been
removed from the DRM code. Callers of drm_client_init() also provide
the callback implementation. The lifetime of the clients is tied to
the dependency chain's outer-most module, which is the hardware's
DRM driver. Before client helpers could be unloaded, the driver module
would have to be unloaded, which also unregisters all clients.

Driver modules that set up DRM clients can now be unloaded.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102131056.7256-2-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_client.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index c3027115d0552..9403b3f576f7b 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -5,7 +5,6 @@
 
 #include <linux/iosys-map.h>
 #include <linux/list.h>
-#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -84,16 +83,13 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
 	if (!drm_core_check_feature(dev, DRIVER_MODESET) || !dev->driver->dumb_create)
 		return -EOPNOTSUPP;
 
-	if (funcs && !try_module_get(funcs->owner))
-		return -ENODEV;
-
 	client->dev = dev;
 	client->name = name;
 	client->funcs = funcs;
 
 	ret = drm_client_modeset_create(client);
 	if (ret)
-		goto err_put_module;
+		return ret;
 
 	ret = drm_client_open(client);
 	if (ret)
@@ -105,10 +101,6 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
 
 err_free:
 	drm_client_modeset_free(client);
-err_put_module:
-	if (funcs)
-		module_put(funcs->owner);
-
 	return ret;
 }
 EXPORT_SYMBOL(drm_client_init);
@@ -177,8 +169,6 @@ void drm_client_release(struct drm_client_dev *client)
 	drm_client_modeset_free(client);
 	drm_client_close(client);
 	drm_dev_put(dev);
-	if (client->funcs)
-		module_put(client->funcs->owner);
 }
 EXPORT_SYMBOL(drm_client_release);
 
-- 
GitLab


From ef75c25e8fedbfcf07ae4223fb7cc9ea5fb342a7 Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Wed, 8 Nov 2023 12:53:03 +0530
Subject: [PATCH 0268/2340] drm/i915/panelreplay: Debugfs support for panel
 replay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add debugfs support which will print source and sink status
per connector basis. Existing i915_psr_status and
i915_psr_sink_status will be used to get the source and
sink status of panel replay.

v1: Initial version. [rb-ed by Arun]
v2: Added check for DP 2.0 and connector type in connector_debugfs_add().
v3: Optimization and cosmetic changes. [Jouni]

Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-7-animesh.manna@intel.com
---
 drivers/gpu/drm/i915/display/intel_psr.c | 87 +++++++++++++++++-------
 1 file changed, 63 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 2fc89ac5b27a9..8d180132a74b1 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -2865,12 +2865,19 @@ static int psr_get_status_and_error_status(struct intel_dp *intel_dp,
 {
 	struct drm_dp_aux *aux = &intel_dp->aux;
 	int ret;
+	unsigned int offset;
 
-	ret = drm_dp_dpcd_readb(aux, DP_PSR_STATUS, status);
+	offset = intel_dp->psr.panel_replay_enabled ?
+		 DP_SINK_DEVICE_PR_AND_FRAME_LOCK_STATUS : DP_PSR_STATUS;
+
+	ret = drm_dp_dpcd_readb(aux, offset, status);
 	if (ret != 1)
 		return ret;
 
-	ret = drm_dp_dpcd_readb(aux, DP_PSR_ERROR_STATUS, error_status);
+	offset = intel_dp->psr.panel_replay_enabled ?
+		 DP_PANEL_REPLAY_ERROR_STATUS : DP_PSR_ERROR_STATUS;
+
+	ret = drm_dp_dpcd_readb(aux, offset, error_status);
 	if (ret != 1)
 		return ret;
 
@@ -3091,7 +3098,7 @@ psr_source_status(struct intel_dp *intel_dp, struct seq_file *m)
 			status = live_status[status_val];
 	}
 
-	seq_printf(m, "Source PSR status: %s [0x%08x]\n", status, val);
+	seq_printf(m, "Source PSR/PanelReplay status: %s [0x%08x]\n", status, val);
 }
 
 static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp)
@@ -3104,18 +3111,22 @@ static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp)
 	bool enabled;
 	u32 val;
 
-	seq_printf(m, "Sink support: %s", str_yes_no(psr->sink_support));
+	seq_printf(m, "Sink support: PSR = %s",
+		   str_yes_no(psr->sink_support));
+
 	if (psr->sink_support)
 		seq_printf(m, " [0x%02x]", intel_dp->psr_dpcd[0]);
-	seq_puts(m, "\n");
+	seq_printf(m, ", Panel Replay = %s\n", str_yes_no(psr->sink_panel_replay_support));
 
-	if (!psr->sink_support)
+	if (!(psr->sink_support || psr->sink_panel_replay_support))
 		return 0;
 
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	mutex_lock(&psr->lock);
 
-	if (psr->enabled)
+	if (psr->panel_replay_enabled)
+		status = "Panel Replay Enabled";
+	else if (psr->enabled)
 		status = psr->psr2_enabled ? "PSR2 enabled" : "PSR1 enabled";
 	else
 		status = "disabled";
@@ -3128,14 +3139,17 @@ static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp)
 		goto unlock;
 	}
 
-	if (psr->psr2_enabled) {
+	if (psr->panel_replay_enabled) {
+		val = intel_de_read(dev_priv, TRANS_DP2_CTL(cpu_transcoder));
+		enabled = val & TRANS_DP2_PANEL_REPLAY_ENABLE;
+	} else if (psr->psr2_enabled) {
 		val = intel_de_read(dev_priv, EDP_PSR2_CTL(cpu_transcoder));
 		enabled = val & EDP_PSR2_ENABLE;
 	} else {
 		val = intel_de_read(dev_priv, psr_ctl_reg(dev_priv, cpu_transcoder));
 		enabled = val & EDP_PSR_ENABLE;
 	}
-	seq_printf(m, "Source PSR ctl: %s [0x%08x]\n",
+	seq_printf(m, "Source PSR/PanelReplay ctl: %s [0x%08x]\n",
 		   str_enabled_disabled(enabled), val);
 	psr_source_status(intel_dp, m);
 	seq_printf(m, "Busy frontbuffer bits: 0x%08x\n",
@@ -3273,6 +3287,16 @@ void intel_psr_debugfs_register(struct drm_i915_private *i915)
 			    i915, &i915_edp_psr_status_fops);
 }
 
+static const char *psr_mode_str(struct intel_dp *intel_dp)
+{
+	if (intel_dp->psr.panel_replay_enabled)
+		return "PANEL-REPLAY";
+	else if (intel_dp->psr.enabled)
+		return "PSR";
+
+	return "unknown";
+}
+
 static int i915_psr_sink_status_show(struct seq_file *m, void *data)
 {
 	struct intel_connector *connector = m->private;
@@ -3287,12 +3311,19 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data)
 		"reserved",
 		"sink internal error",
 	};
+	static const char * const panel_replay_status[] = {
+		"Sink device frame is locked to the Source device",
+		"Sink device is coasting, using the VTotal target",
+		"Sink device is governing the frame rate (frame rate unlock is granted)",
+		"Sink device in the process of re-locking with the Source device",
+	};
 	const char *str;
 	int ret;
 	u8 status, error_status;
+	u32 idx;
 
-	if (!CAN_PSR(intel_dp)) {
-		seq_puts(m, "PSR Unsupported\n");
+	if (!(CAN_PSR(intel_dp) || CAN_PANEL_REPLAY(intel_dp))) {
+		seq_puts(m, "PSR/Panel-Replay Unsupported\n");
 		return -ENODEV;
 	}
 
@@ -3303,15 +3334,20 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data)
 	if (ret)
 		return ret;
 
-	status &= DP_PSR_SINK_STATE_MASK;
-	if (status < ARRAY_SIZE(sink_status))
-		str = sink_status[status];
-	else
-		str = "unknown";
+	str = "unknown";
+	if (intel_dp->psr.panel_replay_enabled) {
+		idx = (status & DP_SINK_FRAME_LOCKED_MASK) >> DP_SINK_FRAME_LOCKED_SHIFT;
+		if (idx < ARRAY_SIZE(panel_replay_status))
+			str = panel_replay_status[idx];
+	} else if (intel_dp->psr.enabled) {
+		idx = status & DP_PSR_SINK_STATE_MASK;
+		if (idx < ARRAY_SIZE(sink_status))
+			str = sink_status[idx];
+	}
 
-	seq_printf(m, "Sink PSR status: 0x%x [%s]\n", status, str);
+	seq_printf(m, "Sink %s status: 0x%x [%s]\n", psr_mode_str(intel_dp), status, str);
 
-	seq_printf(m, "Sink PSR error status: 0x%x", error_status);
+	seq_printf(m, "Sink %s error status: 0x%x", psr_mode_str(intel_dp), error_status);
 
 	if (error_status & (DP_PSR_RFB_STORAGE_ERROR |
 			    DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR |
@@ -3320,11 +3356,11 @@ static int i915_psr_sink_status_show(struct seq_file *m, void *data)
 	else
 		seq_puts(m, "\n");
 	if (error_status & DP_PSR_RFB_STORAGE_ERROR)
-		seq_puts(m, "\tPSR RFB storage error\n");
+		seq_printf(m, "\t%s RFB storage error\n", psr_mode_str(intel_dp));
 	if (error_status & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR)
-		seq_puts(m, "\tPSR VSC SDP uncorrectable error\n");
+		seq_printf(m, "\t%s VSC SDP uncorrectable error\n", psr_mode_str(intel_dp));
 	if (error_status & DP_PSR_LINK_CRC_ERROR)
-		seq_puts(m, "\tPSR Link CRC error\n");
+		seq_printf(m, "\t%s Link CRC error\n", psr_mode_str(intel_dp));
 
 	return ret;
 }
@@ -3344,13 +3380,16 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector)
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct dentry *root = connector->base.debugfs_entry;
 
-	if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP)
-		return;
+	if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) {
+		if (!(HAS_DP20(i915) &&
+		      connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort))
+			return;
+	}
 
 	debugfs_create_file("i915_psr_sink_status", 0444, root,
 			    connector, &i915_psr_sink_status_fops);
 
-	if (HAS_PSR(i915))
+	if (HAS_PSR(i915) || HAS_DP20(i915))
 		debugfs_create_file("i915_psr_status", 0444, root,
 				    connector, &i915_psr_status_fops);
 }
-- 
GitLab


From 7c8601aea3a5e8a829a73cc9e572309c12ce9aca Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 14 Nov 2023 16:10:12 +0200
Subject: [PATCH 0269/2340] drm/i915: Fix fractional bpp handling in
 intel_link_bw_reduce_bpp()

Convert crtc_state->pipe_bpp to U6.4 format as expected by the rest of
the function.

Fixes: 59a266f068b4 ("drm/i915/display: Store compressed bpp in U6.4 format")
Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Cc: Suraj Kandpal <suraj.kandpal@intel.com>
Cc: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114141012.603960-1-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_link_bw.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c
index 02a0af2aa5bae..9c6d35a405a18 100644
--- a/drivers/gpu/drm/i915/display/intel_link_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_link_bw.c
@@ -55,11 +55,11 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state,
 	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	enum pipe max_bpp_pipe = INVALID_PIPE;
 	struct intel_crtc *crtc;
-	int max_bpp = 0;
+	int max_bpp_x16 = 0;
 
 	for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, pipe_mask) {
 		struct intel_crtc_state *crtc_state;
-		int link_bpp;
+		int link_bpp_x16;
 
 		if (limits->bpp_limit_reached_pipes & BIT(crtc->pipe))
 			continue;
@@ -70,7 +70,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state,
 			return PTR_ERR(crtc_state);
 
 		if (crtc_state->dsc.compression_enable)
-			link_bpp = crtc_state->dsc.compressed_bpp_x16;
+			link_bpp_x16 = crtc_state->dsc.compressed_bpp_x16;
 		else
 			/*
 			 * TODO: for YUV420 the actual link bpp is only half
@@ -78,10 +78,10 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state,
 			 * is based on the pipe bpp value, set the actual link bpp
 			 * limit here once the MST BW allocation is fixed.
 			 */
-			link_bpp = crtc_state->pipe_bpp;
+			link_bpp_x16 = to_bpp_x16(crtc_state->pipe_bpp);
 
-		if (link_bpp > max_bpp) {
-			max_bpp = link_bpp;
+		if (link_bpp_x16 > max_bpp_x16) {
+			max_bpp_x16 = link_bpp_x16;
 			max_bpp_pipe = crtc->pipe;
 		}
 	}
@@ -89,7 +89,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state,
 	if (max_bpp_pipe == INVALID_PIPE)
 		return -ENOSPC;
 
-	limits->max_bpp_x16[max_bpp_pipe] = to_bpp_x16(max_bpp) - 1;
+	limits->max_bpp_x16[max_bpp_pipe] = max_bpp_x16 - 1;
 
 	return intel_modeset_pipes_in_mask_early(state, reason,
 						 BIT(max_bpp_pipe));
-- 
GitLab


From d7b4832cbeb85075293b1211a9c89fad4fdda1f1 Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Thu, 9 Nov 2023 11:21:48 -0800
Subject: [PATCH 0270/2340] drm/i915: Read a shadowed mmio register for ggtt
 flush

We read RENDER_HEAD as a part of the flush. If GT is in
deeper sleep states, this could lead to read errors since we are
not using a forcewake. Safer to read a shadowed register instead.

Cc: John Harrison <john.c.harrison@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109192148.475156-1-vinay.belgaumkar@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ed32bf5b15464..ea814ea5f700f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 
 		spin_lock_irqsave(&uncore->lock, flags);
 		intel_uncore_posting_read_fw(uncore,
-					     RING_HEAD(RENDER_RING_BASE));
+					     RING_TAIL(RENDER_RING_BASE));
 		spin_unlock_irqrestore(&uncore->lock, flags);
 	}
 }
-- 
GitLab


From 98ed369800f79a2cd199b8415d14d82a5f2e007f Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Fri, 10 Nov 2023 08:55:18 +0530
Subject: [PATCH 0271/2340] drm/i915/dsb: DSB code refactoring

Refactor DSB implementation to be compatible with Xe driver.

v1: RFC version.
v2: Make intel_dsb structure opaque from external usage. [Jani]
v3: Rebased on latest.
v4:
- Add boundary check in dsb_buffer_memset(). [Luca]
- Use size_t instead of u32. [Luca]
v5: WARN_ON() added for out of boudary case with some optimization. [Luca]
v6: Rebased on latest and fix a rebase-miss.

Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110032518.3564279-1-animesh.manna@intel.com
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 drivers/gpu/drm/i915/display/intel_dsb.c      | 98 +++++++------------
 .../gpu/drm/i915/display/intel_dsb_buffer.c   | 82 ++++++++++++++++
 .../gpu/drm/i915/display/intel_dsb_buffer.h   | 29 ++++++
 4 files changed, 148 insertions(+), 62 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/display/intel_dsb_buffer.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_dsb_buffer.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 239da40a401f1..7e5d6a39d4503 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -277,6 +277,7 @@ i915-y += \
 	display/intel_dpt.o \
 	display/intel_drrs.o \
 	display/intel_dsb.o \
+	display/intel_dsb_buffer.o \
 	display/intel_fb.o \
 	display/intel_fb_pin.o \
 	display/intel_fbc.o \
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 78b6fe24dcd8b..9598d50f68f22 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -4,9 +4,6 @@
  *
  */
 
-#include "gem/i915_gem_internal.h"
-#include "gem/i915_gem_lmem.h"
-
 #include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
@@ -14,12 +11,13 @@
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dsb.h"
+#include "intel_dsb_buffer.h"
 #include "intel_dsb_regs.h"
 #include "intel_vblank.h"
 #include "intel_vrr.h"
 #include "skl_watermark.h"
 
-struct i915_vma;
+#define CACHELINE_BYTES 64
 
 enum dsb_id {
 	INVALID_DSB = -1,
@@ -32,8 +30,7 @@ enum dsb_id {
 struct intel_dsb {
 	enum dsb_id id;
 
-	u32 *cmd_buf;
-	struct i915_vma *vma;
+	struct intel_dsb_buffer dsb_buf;
 	struct intel_crtc *crtc;
 
 	/*
@@ -109,15 +106,17 @@ static void intel_dsb_dump(struct intel_dsb *dsb)
 {
 	struct intel_crtc *crtc = dsb->crtc;
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
-	const u32 *buf = dsb->cmd_buf;
 	int i;
 
 	drm_dbg_kms(&i915->drm, "[CRTC:%d:%s] DSB %d commands {\n",
 		    crtc->base.base.id, crtc->base.name, dsb->id);
 	for (i = 0; i < ALIGN(dsb->free_pos, 64 / 4); i += 4)
 		drm_dbg_kms(&i915->drm,
-			    " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-			    i * 4, buf[i], buf[i+1], buf[i+2], buf[i+3]);
+			    " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4,
+			    intel_dsb_buffer_read(&dsb->dsb_buf, i),
+			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 1),
+			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 2),
+			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 3));
 	drm_dbg_kms(&i915->drm, "}\n");
 }
 
@@ -129,8 +128,6 @@ static bool is_dsb_busy(struct drm_i915_private *i915, enum pipe pipe,
 
 static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw)
 {
-	u32 *buf = dsb->cmd_buf;
-
 	if (!assert_dsb_has_room(dsb))
 		return;
 
@@ -139,14 +136,13 @@ static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw)
 
 	dsb->ins_start_offset = dsb->free_pos;
 
-	buf[dsb->free_pos++] = ldw;
-	buf[dsb->free_pos++] = udw;
+	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, ldw);
+	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, udw);
 }
 
 static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb,
 					u32 opcode, i915_reg_t reg)
 {
-	const u32 *buf = dsb->cmd_buf;
 	u32 prev_opcode, prev_reg;
 
 	/*
@@ -157,8 +153,10 @@ static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb,
 	if (dsb->free_pos == 0)
 		return false;
 
-	prev_opcode = buf[dsb->ins_start_offset + 1] & ~DSB_REG_VALUE_MASK;
-	prev_reg = buf[dsb->ins_start_offset + 1] & DSB_REG_VALUE_MASK;
+	prev_opcode = intel_dsb_buffer_read(&dsb->dsb_buf,
+					    dsb->ins_start_offset + 1) & ~DSB_REG_VALUE_MASK;
+	prev_reg =  intel_dsb_buffer_read(&dsb->dsb_buf,
+					  dsb->ins_start_offset + 1) & DSB_REG_VALUE_MASK;
 
 	return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg);
 }
@@ -191,6 +189,8 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_
 void intel_dsb_reg_write(struct intel_dsb *dsb,
 			 i915_reg_t reg, u32 val)
 {
+	u32 old_val;
+
 	/*
 	 * For example the buffer will look like below for 3 dwords for auto
 	 * increment register:
@@ -214,31 +214,32 @@ void intel_dsb_reg_write(struct intel_dsb *dsb,
 			       (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
 			       i915_mmio_reg_offset(reg));
 	} else {
-		u32 *buf = dsb->cmd_buf;
-
 		if (!assert_dsb_has_room(dsb))
 			return;
 
 		/* convert to indexed write? */
 		if (intel_dsb_prev_ins_is_mmio_write(dsb, reg)) {
-			u32 prev_val = buf[dsb->ins_start_offset + 0];
+			u32 prev_val = intel_dsb_buffer_read(&dsb->dsb_buf,
+							     dsb->ins_start_offset + 0);
 
-			buf[dsb->ins_start_offset + 0] = 1; /* count */
-			buf[dsb->ins_start_offset + 1] =
-				(DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) |
-				i915_mmio_reg_offset(reg);
-			buf[dsb->ins_start_offset + 2] = prev_val;
+			intel_dsb_buffer_write(&dsb->dsb_buf,
+					       dsb->ins_start_offset + 0, 1); /* count */
+			intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 1,
+					       (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) |
+					       i915_mmio_reg_offset(reg));
+			intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 2, prev_val);
 
 			dsb->free_pos++;
 		}
 
-		buf[dsb->free_pos++] = val;
+		intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, val);
 		/* Update the count */
-		buf[dsb->ins_start_offset]++;
+		old_val = intel_dsb_buffer_read(&dsb->dsb_buf, dsb->ins_start_offset);
+		intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset, old_val + 1);
 
 		/* if number of data words is odd, then the last dword should be 0.*/
 		if (dsb->free_pos & 0x1)
-			buf[dsb->free_pos] = 0;
+			intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0);
 	}
 }
 
@@ -297,8 +298,8 @@ static void intel_dsb_align_tail(struct intel_dsb *dsb)
 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
 
 	if (aligned_tail > tail)
-		memset(&dsb->cmd_buf[dsb->free_pos], 0,
-		       aligned_tail - tail);
+		intel_dsb_buffer_memset(&dsb->dsb_buf, dsb->free_pos, 0,
+					aligned_tail - tail);
 
 	dsb->free_pos = aligned_tail / 4;
 }
@@ -317,7 +318,7 @@ void intel_dsb_finish(struct intel_dsb *dsb)
 
 	intel_dsb_align_tail(dsb);
 
-	i915_gem_object_flush_map(dsb->vma->obj);
+	intel_dsb_buffer_flush_map(&dsb->dsb_buf);
 }
 
 static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
@@ -361,7 +362,7 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
 			  ctrl | DSB_ENABLE);
 
 	intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id),
-			  i915_ggtt_offset(dsb->vma));
+			  intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf));
 
 	if (dewake_scanline >= 0) {
 		int diff, hw_dewake_scanline;
@@ -383,7 +384,7 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
 	}
 
 	intel_de_write_fw(dev_priv, DSB_TAIL(pipe, dsb->id),
-			  i915_ggtt_offset(dsb->vma) + tail);
+			  intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf) + tail);
 }
 
 /**
@@ -408,7 +409,7 @@ void intel_dsb_wait(struct intel_dsb *dsb)
 	enum pipe pipe = crtc->pipe;
 
 	if (wait_for(!is_dsb_busy(dev_priv, pipe, dsb->id), 1)) {
-		u32 offset = i915_ggtt_offset(dsb->vma);
+		u32 offset = intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf);
 
 		intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id),
 				  DSB_ENABLE | DSB_HALT);
@@ -445,12 +446,9 @@ struct intel_dsb *intel_dsb_prepare(const struct intel_crtc_state *crtc_state,
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
-	struct drm_i915_gem_object *obj;
 	intel_wakeref_t wakeref;
 	struct intel_dsb *dsb;
-	struct i915_vma *vma;
 	unsigned int size;
-	u32 *buf;
 
 	if (!HAS_DSB(i915))
 		return NULL;
@@ -464,37 +462,13 @@ struct intel_dsb *intel_dsb_prepare(const struct intel_crtc_state *crtc_state,
 	/* ~1 qword per instruction, full cachelines */
 	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
 
-	if (HAS_LMEM(i915)) {
-		obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size),
-						  I915_BO_ALLOC_CONTIGUOUS);
-		if (IS_ERR(obj))
-			goto out_put_rpm;
-	} else {
-		obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size));
-		if (IS_ERR(obj))
-			goto out_put_rpm;
-
-		i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
-	}
-
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
-	if (IS_ERR(vma)) {
-		i915_gem_object_put(obj);
-		goto out_put_rpm;
-	}
-
-	buf = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
-	if (IS_ERR(buf)) {
-		i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+	if (!intel_dsb_buffer_create(crtc, &dsb->dsb_buf, size))
 		goto out_put_rpm;
-	}
 
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 	dsb->id = DSB1;
-	dsb->vma = vma;
 	dsb->crtc = crtc;
-	dsb->cmd_buf = buf;
 	dsb->size = size / 4; /* in dwords */
 	dsb->free_pos = 0;
 	dsb->ins_start_offset = 0;
@@ -522,6 +496,6 @@ out:
  */
 void intel_dsb_cleanup(struct intel_dsb *dsb)
 {
-	i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP);
+	intel_dsb_buffer_cleanup(&dsb->dsb_buf);
 	kfree(dsb);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dsb_buffer.c b/drivers/gpu/drm/i915/display/intel_dsb_buffer.c
new file mode 100644
index 0000000000000..c77d48bda26ac
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsb_buffer.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_display_types.h"
+#include "intel_dsb_buffer.h"
+
+u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
+{
+	return i915_ggtt_offset(dsb_buf->vma);
+}
+
+void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
+{
+	dsb_buf->cmd_buf[idx] = val;
+}
+
+u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
+{
+	return dsb_buf->cmd_buf[idx];
+}
+
+void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
+{
+	WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
+
+	memset(&dsb_buf->cmd_buf[idx], val, size);
+}
+
+bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
+{
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *buf;
+
+	if (HAS_LMEM(i915)) {
+		obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size),
+						  I915_BO_ALLOC_CONTIGUOUS);
+		if (IS_ERR(obj))
+			return false;
+	} else {
+		obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size));
+		if (IS_ERR(obj))
+			return false;
+
+		i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+	}
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return false;
+	}
+
+	buf = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
+	if (IS_ERR(buf)) {
+		i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+		return false;
+	}
+
+	dsb_buf->vma = vma;
+	dsb_buf->cmd_buf = buf;
+	dsb_buf->buf_size = size;
+
+	return true;
+}
+
+void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
+{
+	i915_vma_unpin_and_release(&dsb_buf->vma, I915_VMA_RELEASE_MAP);
+}
+
+void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
+{
+	i915_gem_object_flush_map(dsb_buf->vma->obj);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dsb_buffer.h b/drivers/gpu/drm/i915/display/intel_dsb_buffer.h
new file mode 100644
index 0000000000000..425acd3939059
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsb_buffer.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_DSB_BUFFER_H
+#define _INTEL_DSB_BUFFER_H
+
+#include <linux/types.h>
+
+struct intel_crtc;
+struct i915_vma;
+
+struct intel_dsb_buffer {
+	u32 *cmd_buf;
+	struct i915_vma *vma;
+	size_t buf_size;
+};
+
+u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf);
+void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val);
+u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx);
+void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size);
+bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf,
+			     size_t size);
+void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf);
+void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf);
+
+#endif
-- 
GitLab


From 57bdac8ee2998d6bba091326e16967b4e5f74ae8 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Wed, 15 Nov 2023 13:10:33 +0100
Subject: [PATCH 0272/2340] drm/i915/gt: add missing new-line to GT_TRACE

Trace requires new-line at the end of message (in opposition to printk),
otherwise trace dump becomes messy.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Acked-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115-eols-v1-1-d47a2f52b807@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index f5899d503e234..471b7cdc10ba0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -167,7 +167,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
 
-	GT_TRACE(gt, "force:%s", str_yes_no(force));
+	GT_TRACE(gt, "force:%s\n", str_yes_no(force));
 
 	/* Use a raw wakeref to avoid calling intel_display_power_get early */
 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
-- 
GitLab


From e899505533852bf1da133f2f4c9a9655ff77f7e5 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Wed, 15 Nov 2023 11:54:03 +0100
Subject: [PATCH 0273/2340] drm/i915: do not clean GT table on error path

The only task of intel_gt_release_all is to zero gt table. Calling
it on error path prevents intel_gt_driver_late_release_all (called from
i915_driver_late_release) to cleanup GTs, causing leakage.
After i915_driver_late_release GT array is not used anymore so
it does not need cleaning at all.

Sample leak report:

BUG i915_request (...): Objects remaining in i915_request on __kmem_cache_shutdown()
...
Object 0xffff888113420040 @offset=64
Allocated in __i915_request_create+0x75/0x610 [i915] age=18339 cpu=1 pid=1454
 kmem_cache_alloc+0x25b/0x270
 __i915_request_create+0x75/0x610 [i915]
 i915_request_create+0x109/0x290 [i915]
 __engines_record_defaults+0xca/0x440 [i915]
 intel_gt_init+0x275/0x430 [i915]
 i915_gem_init+0x135/0x2c0 [i915]
 i915_driver_probe+0x8d1/0xdc0 [i915]

v2: removed whole intel_gt_release_all

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8489
Fixes: bec68cc9ea42 ("drm/i915: Prepare for multiple GTs")
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115-dont_clean_gt_on_error_path-v2-1-54250125470a@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 11 -----------
 drivers/gpu/drm/i915/i915_driver.c |  4 +---
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ea814ea5f700f..a425db5ed3a22 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -982,8 +982,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
 
 err:
 	i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret);
-	intel_gt_release_all(i915);
-
 	return ret;
 }
 
@@ -1002,15 +1000,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915)
 	return 0;
 }
 
-void intel_gt_release_all(struct drm_i915_private *i915)
-{
-	struct intel_gt *gt;
-	unsigned int id;
-
-	for_each_gt(gt, i915, id)
-		i915->gt[id] = NULL;
-}
-
 void intel_gt_info_print(const struct intel_gt_info *info,
 			 struct drm_printer *p)
 {
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 8a0e2c745e1f9..802de2c6decb7 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -782,7 +782,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ret = i915_driver_mmio_probe(i915);
 	if (ret < 0)
-		goto out_tiles_cleanup;
+		goto out_runtime_pm_put;
 
 	ret = i915_driver_hw_probe(i915);
 	if (ret < 0)
@@ -842,8 +842,6 @@ out_cleanup_hw:
 	i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
 	i915_driver_mmio_release(i915);
-out_tiles_cleanup:
-	intel_gt_release_all(i915);
 out_runtime_pm_put:
 	enable_rpm_wakeref_asserts(&i915->runtime_pm);
 	i915_driver_late_release(i915);
-- 
GitLab


From 043a2d5d71d87553985816065da41cff72f5f2df Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Mon, 13 Nov 2023 18:02:48 +0100
Subject: [PATCH 0274/2340] accel/ivpu: Rename cons->rx_msg_lock

Now the cons->rx_msg_lock also protects 'abort' field so rename the lock.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113170252.758137-2-jacek.lawrynowicz@linux.intel.com
---
 drivers/accel/ivpu/ivpu_ipc.c | 27 +++++++++++++--------------
 drivers/accel/ivpu/ivpu_ipc.h |  2 +-
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 88453762c9d53..31ae0e71a8a36 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -150,7 +150,7 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	cons->tx_vpu_addr = 0;
 	cons->request_id = 0;
 	cons->aborted = false;
-	spin_lock_init(&cons->rx_msg_lock);
+	spin_lock_init(&cons->rx_lock);
 	INIT_LIST_HEAD(&cons->rx_msg_list);
 	init_waitqueue_head(&cons->rx_msg_wq);
 
@@ -168,7 +168,7 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
 	list_del(&cons->link);
 	spin_unlock_irq(&ipc->cons_list_lock);
 
-	spin_lock_irq(&cons->rx_msg_lock);
+	spin_lock_irq(&cons->rx_lock);
 	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
 		list_del(&rx_msg->link);
 		if (!cons->aborted)
@@ -176,7 +176,7 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
 		atomic_dec(&ipc->rx_msg_count);
 		kfree(rx_msg);
 	}
-	spin_unlock_irq(&cons->rx_msg_lock);
+	spin_unlock_irq(&cons->rx_lock);
 
 	ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
 }
@@ -212,9 +212,9 @@ static int ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
 	if (IS_KTHREAD())
 		ret |= (kthread_should_stop() || kthread_should_park());
 
-	spin_lock_irq(&cons->rx_msg_lock);
+	spin_lock_irq(&cons->rx_lock);
 	ret |= !list_empty(&cons->rx_msg_list) || cons->aborted;
-	spin_unlock_irq(&cons->rx_msg_lock);
+	spin_unlock_irq(&cons->rx_lock);
 
 	return ret;
 }
@@ -237,20 +237,19 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	if (wait_ret == 0)
 		return -ETIMEDOUT;
 
-	spin_lock_irq(&cons->rx_msg_lock);
+	spin_lock_irq(&cons->rx_lock);
 	rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
 	if (!rx_msg) {
-		spin_unlock_irq(&cons->rx_msg_lock);
+		spin_unlock_irq(&cons->rx_lock);
 		return -EAGAIN;
 	}
 	list_del(&rx_msg->link);
 	if (cons->aborted) {
-		spin_unlock_irq(&cons->rx_msg_lock);
+		spin_unlock_irq(&cons->rx_lock);
 		ret = -ECANCELED;
 		goto out;
 	}
-
-	spin_unlock_irq(&cons->rx_msg_lock);
+	spin_unlock_irq(&cons->rx_lock);
 
 	if (ipc_buf)
 		memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
@@ -383,9 +382,9 @@ ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	rx_msg->ipc_hdr = ipc_hdr;
 	rx_msg->jsm_msg = jsm_msg;
 
-	spin_lock_irqsave(&cons->rx_msg_lock, flags);
+	spin_lock_irqsave(&cons->rx_lock, flags);
 	list_add_tail(&rx_msg->link, &cons->rx_msg_list);
-	spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
+	spin_unlock_irqrestore(&cons->rx_lock, flags);
 
 	wake_up(&cons->rx_msg_wq);
 }
@@ -529,9 +528,9 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
 
 	spin_lock_irqsave(&ipc->cons_list_lock, flags);
 	list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
-		spin_lock(&cons->rx_msg_lock);
+		spin_lock(&cons->rx_lock);
 		cons->aborted = true;
-		spin_unlock(&cons->rx_msg_lock);
+		spin_unlock(&cons->rx_lock);
 		wake_up(&cons->rx_msg_wq);
 	}
 	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index a380787f7222a..71b2e648dffd7 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -49,7 +49,7 @@ struct ivpu_ipc_consumer {
 	u32 request_id;
 	bool aborted;
 
-	spinlock_t rx_msg_lock; /* Protects rx_msg_list and aborted */
+	spinlock_t rx_lock; /* Protects rx_msg_list and aborted */
 	struct list_head rx_msg_list;
 	wait_queue_head_t rx_msg_wq;
 };
-- 
GitLab


From 12fbf8ac39b08f810b767e2e3dc32258907ce890 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Mon, 13 Nov 2023 18:02:49 +0100
Subject: [PATCH 0275/2340] accel/ivpu: Do not use irqsave in ivpu_ipc_dispatch

ivpu_ipc_dispatch is always called with irqs disabled. Add lockdep
assertion and remove unneeded _irqsave/_irqrestore.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113170252.758137-3-jacek.lawrynowicz@linux.intel.com
---
 drivers/accel/ivpu/ivpu_ipc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 31ae0e71a8a36..781c7e40505a6 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -367,9 +367,9 @@ ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_rx_msg *rx_msg;
-	unsigned long flags;
 
 	lockdep_assert_held(&ipc->cons_list_lock);
+	lockdep_assert_irqs_disabled();
 
 	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
 	if (!rx_msg) {
@@ -382,9 +382,9 @@ ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	rx_msg->ipc_hdr = ipc_hdr;
 	rx_msg->jsm_msg = jsm_msg;
 
-	spin_lock_irqsave(&cons->rx_lock, flags);
+	spin_lock(&cons->rx_lock);
 	list_add_tail(&rx_msg->link, &cons->rx_msg_list);
-	spin_unlock_irqrestore(&cons->rx_lock, flags);
+	spin_unlock(&cons->rx_lock);
 
 	wake_up(&cons->rx_msg_wq);
 }
-- 
GitLab


From b3c10b71a61c3a0412b7c390831c88405be3d24f Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Mon, 13 Nov 2023 18:02:50 +0100
Subject: [PATCH 0276/2340] accel/ivpu: Do not use cons->aborted for
 job_done_thread

This allow to simplify ivpu_ipc_receive() as now we do not have
to process all messages in aborted state - they will be freed in
ivpu_ipc_consumer_del().

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113170252.758137-4-jacek.lawrynowicz@linux.intel.com
---
 drivers/accel/ivpu/ivpu_ipc.c | 18 +++++++++---------
 drivers/accel/ivpu/ivpu_job.c |  1 -
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 781c7e40505a6..1dd4413dc88fa 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -238,17 +238,16 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		return -ETIMEDOUT;
 
 	spin_lock_irq(&cons->rx_lock);
+	if (cons->aborted) {
+		spin_unlock_irq(&cons->rx_lock);
+		return -ECANCELED;
+	}
 	rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
 	if (!rx_msg) {
 		spin_unlock_irq(&cons->rx_lock);
 		return -EAGAIN;
 	}
 	list_del(&rx_msg->link);
-	if (cons->aborted) {
-		spin_unlock_irq(&cons->rx_lock);
-		ret = -ECANCELED;
-		goto out;
-	}
 	spin_unlock_irq(&cons->rx_lock);
 
 	if (ipc_buf)
@@ -266,7 +265,6 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	}
 
 	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
-out:
 	atomic_dec(&ipc->rx_msg_count);
 	kfree(rx_msg);
 
@@ -528,9 +526,11 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
 
 	spin_lock_irqsave(&ipc->cons_list_lock, flags);
 	list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
-		spin_lock(&cons->rx_lock);
-		cons->aborted = true;
-		spin_unlock(&cons->rx_lock);
+		if (cons->channel != VPU_IPC_CHAN_JOB_RET) {
+			spin_lock(&cons->rx_lock);
+			cons->aborted = true;
+			spin_unlock(&cons->rx_lock);
+		}
 		wake_up(&cons->rx_msg_wq);
 	}
 	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 02acd8dba02aa..77b1b8abadd6e 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -578,7 +578,6 @@ static int ivpu_job_done_thread(void *arg)
 	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
 
 	while (!kthread_should_stop()) {
-		cons.aborted = false;
 		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
 		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
 		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
-- 
GitLab


From 58cde80f45a2b1683ea3c24a9a9a4b0e1005336b Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Mon, 13 Nov 2023 18:02:51 +0100
Subject: [PATCH 0277/2340] accel/ivpu: Use dedicated work for job timeout
 detection

Change to use work for timeout detection. Needed for thread_irq
conversion.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113170252.758137-5-jacek.lawrynowicz@linux.intel.com
---
 drivers/accel/ivpu/ivpu_job.c | 24 +++++++++---------------
 drivers/accel/ivpu/ivpu_pm.c  | 31 +++++++++++++++++++++++++++++++
 drivers/accel/ivpu/ivpu_pm.h  |  3 +++
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 77b1b8abadd6e..796366d679846 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -24,10 +24,6 @@
 #define JOB_ID_CONTEXT_MASK  GENMASK(31, 8)
 #define JOB_MAX_BUFFER_COUNT 65535
 
-static unsigned int ivpu_tdr_timeout_ms;
-module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
-MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
-
 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
 {
 	ivpu_hw_reg_db_set(vdev, cmdq->db_id);
@@ -342,6 +338,8 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
 	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d engine %d status 0x%x\n",
 		 job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
 
+	ivpu_stop_job_timeout_detection(vdev);
+
 	job_put(job);
 	return 0;
 }
@@ -357,6 +355,9 @@ static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
 	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
 	if (ret)
 		ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
+
+	if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
+		ivpu_start_job_timeout_detection(vdev);
 }
 
 void ivpu_jobs_abort_all(struct ivpu_device *vdev)
@@ -400,6 +401,8 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
 	if (ret)
 		goto err_xa_erase;
 
+	ivpu_start_job_timeout_detection(vdev);
+
 	ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
 		 job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
 		 job->engine_idx, cmdq->jobq->header.tail);
@@ -569,7 +572,6 @@ static int ivpu_job_done_thread(void *arg)
 	struct ivpu_device *vdev = (struct ivpu_device *)arg;
 	struct ivpu_ipc_consumer cons;
 	struct vpu_jsm_msg jsm_msg;
-	bool jobs_submitted;
 	unsigned int timeout;
 	int ret;
 
@@ -578,18 +580,10 @@ static int ivpu_job_done_thread(void *arg)
 	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
 
 	while (!kthread_should_stop()) {
-		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
-		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
 		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
-		if (!ret) {
+		if (!ret)
 			ivpu_job_done_message(vdev, &jsm_msg);
-		} else if (ret == -ETIMEDOUT) {
-			if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
-				ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
-				ivpu_hw_diagnose_failure(vdev);
-				ivpu_pm_schedule_recovery(vdev);
-			}
-		}
+
 		if (kthread_should_park()) {
 			ivpu_dbg(vdev, JOB, "Parked %s\n", __func__);
 			kthread_parkme();
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index d915f3c2991f4..0af8864cb3b55 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -23,6 +23,10 @@ static bool ivpu_disable_recovery;
 module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
 MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
 
+static unsigned long ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
 #define PM_RESCHEDULE_LIMIT     5
 
 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
@@ -141,6 +145,31 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
 	}
 }
 
+static void ivpu_job_timeout_work(struct work_struct *work)
+{
+	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
+	struct ivpu_device *vdev = pm->vdev;
+	unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+	ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
+	ivpu_hw_diagnose_failure(vdev);
+
+	ivpu_pm_schedule_recovery(vdev);
+}
+
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
+{
+	unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+	/* No-op if already queued */
+	queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+}
+
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
+{
+	cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+}
+
 int ivpu_pm_suspend_cb(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
@@ -317,6 +346,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
 
 	atomic_set(&pm->in_reset, 0);
 	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+	INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
 
 	if (ivpu_disable_recovery)
 		delay = -1;
@@ -331,6 +361,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
 
 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
 {
+	drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
 	cancel_work_sync(&vdev->pm->recovery_work);
 }
 
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index 044db150be078..97c6e0b0aa42d 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -12,6 +12,7 @@ struct ivpu_device;
 
 struct ivpu_pm_info {
 	struct ivpu_device *vdev;
+	struct delayed_work job_timeout_work;
 	struct work_struct recovery_work;
 	atomic_t in_reset;
 	atomic_t reset_counter;
@@ -37,5 +38,7 @@ int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
 void ivpu_rpm_put(struct ivpu_device *vdev);
 
 void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
 
 #endif /* __IVPU_PM_H__ */
-- 
GitLab


From 3b434a3445fff3149128db0169da864d67057325 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Mon, 13 Nov 2023 18:02:52 +0100
Subject: [PATCH 0278/2340] accel/ivpu: Use threaded IRQ to handle JOB done
 messages

Remove job_done thread and replace it with generic callback based
mechanism.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113170252.758137-6-jacek.lawrynowicz@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c     |  30 +++--
 drivers/accel/ivpu/ivpu_drv.h     |   3 +-
 drivers/accel/ivpu/ivpu_hw_37xx.c |  29 +++--
 drivers/accel/ivpu/ivpu_hw_40xx.c |  30 ++---
 drivers/accel/ivpu/ivpu_ipc.c     | 196 +++++++++++++++++-------------
 drivers/accel/ivpu/ivpu_ipc.h     |  22 +++-
 drivers/accel/ivpu/ivpu_job.c     |  84 +++----------
 drivers/accel/ivpu/ivpu_job.h     |   6 +-
 8 files changed, 199 insertions(+), 201 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index bc15b06e17440..64927682161b2 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -318,13 +318,11 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
 	if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST)
 		return 0;
 
-	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
+	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL);
 
 	timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
 	while (1) {
-		ret = ivpu_ipc_irq_handler(vdev);
-		if (ret)
-			break;
+		ivpu_ipc_irq_handler(vdev, NULL);
 		ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
 		if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
 			break;
@@ -378,7 +376,6 @@ int ivpu_boot(struct ivpu_device *vdev)
 	enable_irq(vdev->irq);
 	ivpu_hw_irq_enable(vdev);
 	ivpu_ipc_enable(vdev);
-	ivpu_job_done_thread_enable(vdev);
 	return 0;
 }
 
@@ -388,7 +385,6 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev)
 	disable_irq(vdev->irq);
 	ivpu_ipc_disable(vdev);
 	ivpu_mmu_disable(vdev);
-	ivpu_job_done_thread_disable(vdev);
 }
 
 int ivpu_shutdown(struct ivpu_device *vdev)
@@ -429,6 +425,13 @@ static const struct drm_driver driver = {
 	.minor = DRM_IVPU_DRIVER_MINOR,
 };
 
+static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
+{
+	struct ivpu_device *vdev = arg;
+
+	return ivpu_ipc_irq_thread_handler(vdev);
+}
+
 static int ivpu_irq_init(struct ivpu_device *vdev)
 {
 	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
@@ -442,8 +445,8 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
 
 	vdev->irq = pci_irq_vector(pdev, 0);
 
-	ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
-			       IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+	ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
+					ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
 
@@ -581,20 +584,15 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 
 	ivpu_pm_init(vdev);
 
-	ret = ivpu_job_done_thread_init(vdev);
-	if (ret)
-		goto err_ipc_fini;
-
 	ret = ivpu_boot(vdev);
 	if (ret)
-		goto err_job_done_thread_fini;
+		goto err_ipc_fini;
 
+	ivpu_job_done_consumer_init(vdev);
 	ivpu_pm_enable(vdev);
 
 	return 0;
 
-err_job_done_thread_fini:
-	ivpu_job_done_thread_fini(vdev);
 err_ipc_fini:
 	ivpu_ipc_fini(vdev);
 err_fw_fini:
@@ -619,7 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
 	ivpu_shutdown(vdev);
 	if (IVPU_WA(d3hot_after_power_off))
 		pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
-	ivpu_job_done_thread_fini(vdev);
+	ivpu_job_done_consumer_fini(vdev);
 	ivpu_pm_cancel_recovery(vdev);
 
 	ivpu_ipc_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index f1e7072449f1e..ebc4b84f27b20 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -17,6 +17,7 @@
 #include <uapi/drm/ivpu_accel.h>
 
 #include "ivpu_mmu_context.h"
+#include "ivpu_ipc.h"
 
 #define DRIVER_NAME "intel_vpu"
 #define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
@@ -120,7 +121,7 @@ struct ivpu_device {
 	struct list_head bo_list;
 
 	struct xarray submitted_jobs_xa;
-	struct task_struct *job_done_thread;
+	struct ivpu_ipc_consumer job_done_consumer;
 
 	atomic64_t unique_id_counter;
 
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index a172cfb1c31f8..4ab1f14cf3604 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -891,17 +891,20 @@ static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
 }
 
 /* Handler for IRQs from VPU core (irqV) */
-static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
 {
 	u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
 
+	if (!status)
+		return false;
+
 	REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
 
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
 		ivpu_mmu_irq_evtq_handler(vdev);
 
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
-		ivpu_ipc_irq_handler(vdev);
+		ivpu_ipc_irq_handler(vdev, wake_thread);
 
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
 		ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -918,17 +921,17 @@ static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
 		ivpu_hw_37xx_irq_noc_firewall_handler(vdev);
 
-	return status;
+	return true;
 }
 
 /* Handler for IRQs from Buttress core (irqB) */
-static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
 {
 	u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
 	bool schedule_recovery = false;
 
-	if (status == 0)
-		return 0;
+	if (!status)
+		return false;
 
 	if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
 		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
@@ -964,23 +967,27 @@ static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
 	if (schedule_recovery)
 		ivpu_pm_schedule_recovery(vdev);
 
-	return status;
+	return true;
 }
 
 static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr)
 {
 	struct ivpu_device *vdev = ptr;
-	u32 ret_irqv, ret_irqb;
+	bool irqv_handled, irqb_handled, wake_thread = false;
 
 	REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
 
-	ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq);
-	ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq);
+	irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread);
+	irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq);
 
 	/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
 	REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
 
-	return IRQ_RETVAL(ret_irqb | ret_irqv);
+	if (wake_thread)
+		return IRQ_WAKE_THREAD;
+	if (irqv_handled || irqb_handled)
+		return IRQ_HANDLED;
+	return IRQ_NONE;
 }
 
 static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index f7e1f5c0667bd..eba2fdef2ace1 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -1047,13 +1047,12 @@ static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
 }
 
 /* Handler for IRQs from VPU core (irqV) */
-static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
 {
 	u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
-	irqreturn_t ret = IRQ_NONE;
 
 	if (!status)
-		return IRQ_NONE;
+		return false;
 
 	REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
 
@@ -1061,7 +1060,7 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
 		ivpu_mmu_irq_evtq_handler(vdev);
 
 	if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
-		ret |= ivpu_ipc_irq_handler(vdev);
+		ivpu_ipc_irq_handler(vdev, wake_thread);
 
 	if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
 		ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -1078,17 +1077,17 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
 	if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
 		ivpu_hw_40xx_irq_noc_firewall_handler(vdev);
 
-	return ret;
+	return true;
 }
 
 /* Handler for IRQs from Buttress core (irqB) */
-static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
 {
 	bool schedule_recovery = false;
 	u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
 
-	if (status == 0)
-		return IRQ_NONE;
+	if (!status)
+		return false;
 
 	if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
 		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
@@ -1140,26 +1139,27 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
 	if (schedule_recovery)
 		ivpu_pm_schedule_recovery(vdev);
 
-	return IRQ_HANDLED;
+	return true;
 }
 
 static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
 {
+	bool irqv_handled, irqb_handled, wake_thread = false;
 	struct ivpu_device *vdev = ptr;
-	irqreturn_t ret = IRQ_NONE;
 
 	REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
 
-	ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
-	ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
+	irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread);
+	irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq);
 
 	/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
 	REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
 
-	if (ret & IRQ_WAKE_THREAD)
+	if (wake_thread)
 		return IRQ_WAKE_THREAD;
-
-	return ret;
+	if (irqv_handled || irqb_handled)
+		return IRQ_HANDLED;
+	return IRQ_NONE;
 }
 
 static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 1dd4413dc88fa..e86621f16f85a 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -5,7 +5,6 @@
 
 #include <linux/genalloc.h>
 #include <linux/highmem.h>
-#include <linux/kthread.h>
 #include <linux/pm_runtime.h>
 #include <linux/wait.h>
 
@@ -18,19 +17,12 @@
 #include "ivpu_pm.h"
 
 #define IPC_MAX_RX_MSG	128
-#define IS_KTHREAD()	(get_current()->flags & PF_KTHREAD)
 
 struct ivpu_ipc_tx_buf {
 	struct ivpu_ipc_hdr ipc;
 	struct vpu_jsm_msg jsm;
 };
 
-struct ivpu_ipc_rx_msg {
-	struct list_head link;
-	struct ivpu_ipc_hdr *ipc_hdr;
-	struct vpu_jsm_msg *jsm_msg;
-};
-
 static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
 			      struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
 {
@@ -140,8 +132,49 @@ static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
 	ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
 }
 
-void
-ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
+static void
+ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		    struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg;
+
+	lockdep_assert_held(&ipc->cons_lock);
+	lockdep_assert_irqs_disabled();
+
+	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
+	if (!rx_msg) {
+		ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+		return;
+	}
+
+	atomic_inc(&ipc->rx_msg_count);
+
+	rx_msg->ipc_hdr = ipc_hdr;
+	rx_msg->jsm_msg = jsm_msg;
+	rx_msg->callback = cons->rx_callback;
+
+	if (rx_msg->callback) {
+		list_add_tail(&rx_msg->link, &ipc->cb_msg_list);
+	} else {
+		spin_lock(&cons->rx_lock);
+		list_add_tail(&rx_msg->link, &cons->rx_msg_list);
+		spin_unlock(&cons->rx_lock);
+		wake_up(&cons->rx_msg_wq);
+	}
+}
+
+static void
+ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg)
+{
+	list_del(&rx_msg->link);
+	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+	atomic_dec(&vdev->ipc->rx_msg_count);
+	kfree(rx_msg);
+}
+
+void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+			   u32 channel, ivpu_ipc_rx_callback_t rx_callback)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 
@@ -150,13 +183,14 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	cons->tx_vpu_addr = 0;
 	cons->request_id = 0;
 	cons->aborted = false;
+	cons->rx_callback = rx_callback;
 	spin_lock_init(&cons->rx_lock);
 	INIT_LIST_HEAD(&cons->rx_msg_list);
 	init_waitqueue_head(&cons->rx_msg_wq);
 
-	spin_lock_irq(&ipc->cons_list_lock);
+	spin_lock_irq(&ipc->cons_lock);
 	list_add_tail(&cons->link, &ipc->cons_list);
-	spin_unlock_irq(&ipc->cons_list_lock);
+	spin_unlock_irq(&ipc->cons_lock);
 }
 
 void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
@@ -164,18 +198,13 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_rx_msg *rx_msg, *r;
 
-	spin_lock_irq(&ipc->cons_list_lock);
+	spin_lock_irq(&ipc->cons_lock);
 	list_del(&cons->link);
-	spin_unlock_irq(&ipc->cons_list_lock);
+	spin_unlock_irq(&ipc->cons_lock);
 
 	spin_lock_irq(&cons->rx_lock);
-	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
-		list_del(&rx_msg->link);
-		if (!cons->aborted)
-			ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
-		atomic_dec(&ipc->rx_msg_count);
-		kfree(rx_msg);
-	}
+	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+		ivpu_ipc_rx_msg_del(vdev, rx_msg);
 	spin_unlock_irq(&cons->rx_lock);
 
 	ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
@@ -205,15 +234,12 @@ unlock:
 	return ret;
 }
 
-static int ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
+static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
 {
-	int ret = 0;
-
-	if (IS_KTHREAD())
-		ret |= (kthread_should_stop() || kthread_should_park());
+	bool ret;
 
 	spin_lock_irq(&cons->rx_lock);
-	ret |= !list_empty(&cons->rx_msg_list) || cons->aborted;
+	ret = !list_empty(&cons->rx_msg_list) || cons->aborted;
 	spin_unlock_irq(&cons->rx_lock);
 
 	return ret;
@@ -221,19 +247,18 @@ static int ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
 
 int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		     struct ivpu_ipc_hdr *ipc_buf,
-		     struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
+		     struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms)
 {
-	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_rx_msg *rx_msg;
 	int wait_ret, ret = 0;
 
+	if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n"))
+		return -EINVAL;
+
 	wait_ret = wait_event_timeout(cons->rx_msg_wq,
 				      ivpu_ipc_rx_need_wakeup(cons),
 				      msecs_to_jiffies(timeout_ms));
 
-	if (IS_KTHREAD() && kthread_should_stop())
-		return -EINTR;
-
 	if (wait_ret == 0)
 		return -ETIMEDOUT;
 
@@ -247,27 +272,23 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		spin_unlock_irq(&cons->rx_lock);
 		return -EAGAIN;
 	}
-	list_del(&rx_msg->link);
-	spin_unlock_irq(&cons->rx_lock);
 
 	if (ipc_buf)
 		memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
 	if (rx_msg->jsm_msg) {
-		u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
+		u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
 
 		if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
 			ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
 			ret = -EBADMSG;
 		}
 
-		if (ipc_payload)
-			memcpy(ipc_payload, rx_msg->jsm_msg, size);
+		if (jsm_msg)
+			memcpy(jsm_msg, rx_msg->jsm_msg, size);
 	}
 
-	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
-	atomic_dec(&ipc->rx_msg_count);
-	kfree(rx_msg);
-
+	ivpu_ipc_rx_msg_del(vdev, rx_msg);
+	spin_unlock_irq(&cons->rx_lock);
 	return ret;
 }
 
@@ -280,7 +301,7 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
 	struct ivpu_ipc_consumer cons;
 	int ret;
 
-	ivpu_ipc_consumer_add(vdev, &cons, channel);
+	ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
 
 	ret = ivpu_ipc_send(vdev, &cons, req);
 	if (ret) {
@@ -359,35 +380,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons
 	return false;
 }
 
-static void
-ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
-		  struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
-{
-	struct ivpu_ipc_info *ipc = vdev->ipc;
-	struct ivpu_ipc_rx_msg *rx_msg;
-
-	lockdep_assert_held(&ipc->cons_list_lock);
-	lockdep_assert_irqs_disabled();
-
-	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
-	if (!rx_msg) {
-		ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
-		return;
-	}
-
-	atomic_inc(&ipc->rx_msg_count);
-
-	rx_msg->ipc_hdr = ipc_hdr;
-	rx_msg->jsm_msg = jsm_msg;
-
-	spin_lock(&cons->rx_lock);
-	list_add_tail(&rx_msg->link, &cons->rx_msg_list);
-	spin_unlock(&cons->rx_lock);
-
-	wake_up(&cons->rx_msg_wq);
-}
-
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_consumer *cons;
@@ -405,7 +398,7 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
 		if (vpu_addr == REG_IO_ERROR) {
 			ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
-			return -EIO;
+			return;
 		}
 
 		ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
@@ -435,15 +428,15 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		}
 
 		dispatched = false;
-		spin_lock_irqsave(&ipc->cons_list_lock, flags);
+		spin_lock_irqsave(&ipc->cons_lock, flags);
 		list_for_each_entry(cons, &ipc->cons_list, link) {
 			if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
-				ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
+				ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg);
 				dispatched = true;
 				break;
 			}
 		}
-		spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+		spin_unlock_irqrestore(&ipc->cons_lock, flags);
 
 		if (!dispatched) {
 			ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
@@ -451,7 +444,28 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		}
 	}
 
-	return 0;
+	if (wake_thread)
+		*wake_thread = !list_empty(&ipc->cb_msg_list);
+}
+
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg, *r;
+	struct list_head cb_msg_list;
+
+	INIT_LIST_HEAD(&cb_msg_list);
+
+	spin_lock_irq(&ipc->cons_lock);
+	list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list);
+	spin_unlock_irq(&ipc->cons_lock);
+
+	list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) {
+		rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+		ivpu_ipc_rx_msg_del(vdev, rx_msg);
+	}
+
+	return IRQ_HANDLED;
 }
 
 int ivpu_ipc_init(struct ivpu_device *vdev)
@@ -486,10 +500,10 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
 		goto err_free_rx;
 	}
 
+	spin_lock_init(&ipc->cons_lock);
 	INIT_LIST_HEAD(&ipc->cons_list);
-	spin_lock_init(&ipc->cons_list_lock);
+	INIT_LIST_HEAD(&ipc->cb_msg_list);
 	drmm_mutex_init(&vdev->drm, &ipc->lock);
-
 	ivpu_ipc_reset(vdev);
 	return 0;
 
@@ -502,6 +516,13 @@ err_free_tx:
 
 void ivpu_ipc_fini(struct ivpu_device *vdev)
 {
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	drm_WARN_ON(&vdev->drm, ipc->on);
+	drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
+	drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
+	drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
+
 	ivpu_ipc_mem_fini(vdev);
 }
 
@@ -518,22 +539,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_consumer *cons, *c;
-	unsigned long flags;
+	struct ivpu_ipc_rx_msg *rx_msg, *r;
+
+	drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
 
 	mutex_lock(&ipc->lock);
 	ipc->on = false;
 	mutex_unlock(&ipc->lock);
 
-	spin_lock_irqsave(&ipc->cons_list_lock, flags);
+	spin_lock_irq(&ipc->cons_lock);
 	list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
-		if (cons->channel != VPU_IPC_CHAN_JOB_RET) {
-			spin_lock(&cons->rx_lock);
+		spin_lock(&cons->rx_lock);
+		if (!cons->rx_callback)
 			cons->aborted = true;
-			spin_unlock(&cons->rx_lock);
-		}
+		list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+			ivpu_ipc_rx_msg_del(vdev, rx_msg);
+		spin_unlock(&cons->rx_lock);
 		wake_up(&cons->rx_msg_wq);
 	}
-	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+	spin_unlock_irq(&ipc->cons_lock);
+
+	drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
 }
 
 void ivpu_ipc_reset(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 71b2e648dffd7..40ca3cc4e61f7 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -42,12 +42,24 @@ struct ivpu_ipc_hdr {
 	u8 status;
 } __packed __aligned(IVPU_IPC_ALIGNMENT);
 
+typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev,
+				       struct ivpu_ipc_hdr *ipc_hdr,
+				       struct vpu_jsm_msg *jsm_msg);
+
+struct ivpu_ipc_rx_msg {
+	struct list_head link;
+	struct ivpu_ipc_hdr *ipc_hdr;
+	struct vpu_jsm_msg *jsm_msg;
+	ivpu_ipc_rx_callback_t callback;
+};
+
 struct ivpu_ipc_consumer {
 	struct list_head link;
 	u32 channel;
 	u32 tx_vpu_addr;
 	u32 request_id;
 	bool aborted;
+	ivpu_ipc_rx_callback_t rx_callback;
 
 	spinlock_t rx_lock; /* Protects rx_msg_list and aborted */
 	struct list_head rx_msg_list;
@@ -61,8 +73,9 @@ struct ivpu_ipc_info {
 
 	atomic_t rx_msg_count;
 
-	spinlock_t cons_list_lock; /* Protects cons_list */
+	spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */
 	struct list_head cons_list;
+	struct list_head cb_msg_list;
 
 	atomic_t request_id;
 	struct mutex lock; /* Lock on status */
@@ -76,14 +89,15 @@ void ivpu_ipc_enable(struct ivpu_device *vdev);
 void ivpu_ipc_disable(struct ivpu_device *vdev);
 void ivpu_ipc_reset(struct ivpu_device *vdev);
 
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread);
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
 
 void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
-			   u32 channel);
+			   u32 channel, ivpu_ipc_rx_callback_t callback);
 void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
 
 int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
-		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
+		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
 		     unsigned long timeout_ms);
 
 int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 796366d679846..7206cf9cdb4a4 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -7,7 +7,6 @@
 
 #include <linux/bitfield.h>
 #include <linux/highmem.h>
-#include <linux/kthread.h>
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <uapi/drm/ivpu_accel.h>
@@ -344,22 +343,6 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
 	return 0;
 }
 
-static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
-{
-	struct vpu_ipc_msg_payload_job_done *payload;
-	struct vpu_jsm_msg *job_ret_msg = msg;
-	int ret;
-
-	payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
-
-	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
-	if (ret)
-		ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
-
-	if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
-		ivpu_start_job_timeout_detection(vdev);
-}
-
 void ivpu_jobs_abort_all(struct ivpu_device *vdev)
 {
 	struct ivpu_job *job;
@@ -567,65 +550,36 @@ free_handles:
 	return ret;
 }
 
-static int ivpu_job_done_thread(void *arg)
+static void
+ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
+		       struct vpu_jsm_msg *jsm_msg)
 {
-	struct ivpu_device *vdev = (struct ivpu_device *)arg;
-	struct ivpu_ipc_consumer cons;
-	struct vpu_jsm_msg jsm_msg;
-	unsigned int timeout;
+	struct vpu_ipc_msg_payload_job_done *payload;
 	int ret;
 
-	ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
-
-	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
-
-	while (!kthread_should_stop()) {
-		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
-		if (!ret)
-			ivpu_job_done_message(vdev, &jsm_msg);
-
-		if (kthread_should_park()) {
-			ivpu_dbg(vdev, JOB, "Parked %s\n", __func__);
-			kthread_parkme();
-			ivpu_dbg(vdev, JOB, "Unparked %s\n", __func__);
-		}
+	if (!jsm_msg) {
+		ivpu_err(vdev, "IPC message has no JSM payload\n");
+		return;
 	}
 
-	ivpu_ipc_consumer_del(vdev, &cons);
-
-	ivpu_jobs_abort_all(vdev);
-
-	ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
-	return 0;
-}
-
-int ivpu_job_done_thread_init(struct ivpu_device *vdev)
-{
-	struct task_struct *thread;
-
-	thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
-	if (IS_ERR(thread)) {
-		ivpu_err(vdev, "Failed to start job completion thread\n");
-		return -EIO;
+	if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
+		ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result);
+		return;
 	}
 
-	vdev->job_done_thread = thread;
-
-	return 0;
-}
-
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
-{
-	kthread_unpark(vdev->job_done_thread);
-	kthread_stop(vdev->job_done_thread);
+	payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
+	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+	if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
+		ivpu_start_job_timeout_detection(vdev);
 }
 
-void ivpu_job_done_thread_disable(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
 {
-	kthread_park(vdev->job_done_thread);
+	ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer,
+			      VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback);
 }
 
-void ivpu_job_done_thread_enable(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
 {
-	kthread_unpark(vdev->job_done_thread);
+	ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
 }
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 4b604c90041ea..45a2f2ec82e5b 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -59,10 +59,8 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
 void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
 
-int ivpu_job_done_thread_init(struct ivpu_device *vdev);
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
-void ivpu_job_done_thread_disable(struct ivpu_device *vdev);
-void ivpu_job_done_thread_enable(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
 
 void ivpu_jobs_abort_all(struct ivpu_device *vdev);
 
-- 
GitLab


From d3715a6471c8f0a90fb852c10a5a84948d6a1ff5 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 9 Nov 2023 15:54:36 -0800
Subject: [PATCH 0279/2340] drm/i915/huc: Stop printing about unsupported HuC
 on MTL

On MTL, the HuC is only supported on the media GT, so our validation
check on the module parameter detects an inconsistency on the root GT
(the modparams asks to enable HuC, but the support is not there) and
prints the following info message:

[drm] GT0: Incompatible option enable_guc=3 - HuC is not supported!

This can be confusing to the user and make them think that something is
wrong when it isn't, so we need to silence it.
Given that any platform that supports HuC also supports GuC, if a user
tries to enable HuC on a platform that really doesn't support it they'll
already see a message about GuC not being supported, so instead of just
silencing the HuC message on newer platforms we can just get rid of it
entirely.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <john.c.harrison@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231109235436.2349963-1-daniele.ceraolospurio@intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 27f6561dd7319..3872d309ed31f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc)
 		gt_info(gt,  "Incompatible option enable_guc=%d - %s\n",
 			i915->params.enable_guc, "GuC is not supported!");
 
-	if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
-	    !intel_uc_supports_huc(uc))
-		gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
-			i915->params.enable_guc, "HuC is not supported!");
-
 	if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
 	    !intel_uc_supports_guc_submission(uc))
 		gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
-- 
GitLab


From 9a626c1f36cfc409707528b53e36069c46aa5a9f Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 14 Nov 2023 17:55:28 +0200
Subject: [PATCH 0280/2340] drm/i915/display: keep struct intel_display members
 sorted

Like the comment says,

	/* Grouping using anonymous structs. Keep sorted. */

Stick to it.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114155528.96935-1-jani.nikula@intel.com
---
 .../gpu/drm/i915/display/intel_display_core.h  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index f2c84ae522170..3308f07968ff8 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -345,15 +345,6 @@ struct intel_display {
 		struct intel_global_obj obj;
 	} dbuf;
 
-	struct {
-		wait_queue_head_t waitqueue;
-
-		/* mutex to protect pmdemand programming sequence */
-		struct mutex lock;
-
-		struct intel_global_obj obj;
-	} pmdemand;
-
 	struct {
 		/*
 		 * dkl.phy_lock protects against concurrent access of the
@@ -441,6 +432,15 @@ struct intel_display {
 		bool false_color;
 	} ips;
 
+	struct {
+		wait_queue_head_t waitqueue;
+
+		/* mutex to protect pmdemand programming sequence */
+		struct mutex lock;
+
+		struct intel_global_obj obj;
+	} pmdemand;
+
 	struct {
 		struct i915_power_domains domains;
 
-- 
GitLab


From 9d6953335284fc37f25bf8488a15ee9444198248 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 14 Nov 2023 12:45:32 +0200
Subject: [PATCH 0281/2340] drm/i915: move *_crtc_clock_get() to intel_dpll.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Considering what the functions do, intel_dpll.c is a more suitable
location, and lets us make some functions static while at it.

This also means intel_display.c no longer does any DPIO access.

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114104534.4180144-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c  | 171 -----------------
 drivers/gpu/drm/i915/display/intel_display.h  |   2 -
 drivers/gpu/drm/i915/display/intel_dpll.c     | 175 +++++++++++++++++-
 drivers/gpu/drm/i915/display/intel_dpll.h     |   9 +-
 .../gpu/drm/i915/display/intel_pch_display.c  |   1 +
 5 files changed, 181 insertions(+), 177 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 125903007a292..959db3f61e844 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -73,7 +73,6 @@
 #include "intel_dp.h"
 #include "intel_dp_link_training.h"
 #include "intel_dp_mst.h"
-#include "intel_dpio_phy.h"
 #include "intel_dpll.h"
 #include "intel_dpll_mgr.h"
 #include "intel_dpt.h"
@@ -2859,67 +2858,6 @@ static void i9xx_get_pfit_config(struct intel_crtc_state *crtc_state)
 		intel_de_read(dev_priv, PFIT_PGM_RATIOS);
 }
 
-static void vlv_crtc_clock_get(struct intel_crtc *crtc,
-			       struct intel_crtc_state *pipe_config)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum pipe pipe = crtc->pipe;
-	struct dpll clock;
-	u32 mdiv;
-	int refclk = 100000;
-
-	/* In case of DSI, DPLL will not be used */
-	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
-		return;
-
-	vlv_dpio_get(dev_priv);
-	mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
-	vlv_dpio_put(dev_priv);
-
-	clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
-	clock.m2 = mdiv & DPIO_M2DIV_MASK;
-	clock.n = (mdiv >> DPIO_N_SHIFT) & 0xf;
-	clock.p1 = (mdiv >> DPIO_P1_SHIFT) & 7;
-	clock.p2 = (mdiv >> DPIO_P2_SHIFT) & 0x1f;
-
-	pipe_config->port_clock = vlv_calc_dpll_params(refclk, &clock);
-}
-
-static void chv_crtc_clock_get(struct intel_crtc *crtc,
-			       struct intel_crtc_state *pipe_config)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum pipe pipe = crtc->pipe;
-	enum dpio_channel port = vlv_pipe_to_channel(pipe);
-	struct dpll clock;
-	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3;
-	int refclk = 100000;
-
-	/* In case of DSI, DPLL will not be used */
-	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
-		return;
-
-	vlv_dpio_get(dev_priv);
-	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
-	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
-	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
-	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
-	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
-	vlv_dpio_put(dev_priv);
-
-	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
-	clock.m2 = (pll_dw0 & 0xff) << 22;
-	if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN)
-		clock.m2 |= pll_dw2 & 0x3fffff;
-	clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf;
-	clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7;
-	clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f;
-
-	pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock);
-}
-
 static enum intel_output_format
 bdw_get_pipe_misc_output_format(struct intel_crtc *crtc)
 {
@@ -3840,115 +3778,6 @@ bool intel_crtc_get_pipe_config(struct intel_crtc_state *crtc_state)
 	return true;
 }
 
-static int i9xx_pll_refclk(struct drm_device *dev,
-			   const struct intel_crtc_state *pipe_config)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	u32 dpll = pipe_config->dpll_hw_state.dpll;
-
-	if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN)
-		return dev_priv->display.vbt.lvds_ssc_freq;
-	else if (HAS_PCH_SPLIT(dev_priv))
-		return 120000;
-	else if (DISPLAY_VER(dev_priv) != 2)
-		return 96000;
-	else
-		return 48000;
-}
-
-/* Returns the clock of the currently programmed mode of the given pipe. */
-void i9xx_crtc_clock_get(struct intel_crtc *crtc,
-			 struct intel_crtc_state *pipe_config)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	u32 dpll = pipe_config->dpll_hw_state.dpll;
-	u32 fp;
-	struct dpll clock;
-	int port_clock;
-	int refclk = i9xx_pll_refclk(dev, pipe_config);
-
-	if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0)
-		fp = pipe_config->dpll_hw_state.fp0;
-	else
-		fp = pipe_config->dpll_hw_state.fp1;
-
-	clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT;
-	if (IS_PINEVIEW(dev_priv)) {
-		clock.n = ffs((fp & FP_N_PINEVIEW_DIV_MASK) >> FP_N_DIV_SHIFT) - 1;
-		clock.m2 = (fp & FP_M2_PINEVIEW_DIV_MASK) >> FP_M2_DIV_SHIFT;
-	} else {
-		clock.n = (fp & FP_N_DIV_MASK) >> FP_N_DIV_SHIFT;
-		clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT;
-	}
-
-	if (DISPLAY_VER(dev_priv) != 2) {
-		if (IS_PINEVIEW(dev_priv))
-			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >>
-				DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW);
-		else
-			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK) >>
-			       DPLL_FPA01_P1_POST_DIV_SHIFT);
-
-		switch (dpll & DPLL_MODE_MASK) {
-		case DPLLB_MODE_DAC_SERIAL:
-			clock.p2 = dpll & DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 ?
-				5 : 10;
-			break;
-		case DPLLB_MODE_LVDS:
-			clock.p2 = dpll & DPLLB_LVDS_P2_CLOCK_DIV_7 ?
-				7 : 14;
-			break;
-		default:
-			drm_dbg_kms(&dev_priv->drm,
-				    "Unknown DPLL mode %08x in programmed "
-				    "mode\n", (int)(dpll & DPLL_MODE_MASK));
-			return;
-		}
-
-		if (IS_PINEVIEW(dev_priv))
-			port_clock = pnv_calc_dpll_params(refclk, &clock);
-		else
-			port_clock = i9xx_calc_dpll_params(refclk, &clock);
-	} else {
-		enum pipe lvds_pipe;
-
-		if (IS_I85X(dev_priv) &&
-		    intel_lvds_port_enabled(dev_priv, LVDS, &lvds_pipe) &&
-		    lvds_pipe == crtc->pipe) {
-			u32 lvds = intel_de_read(dev_priv, LVDS);
-
-			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS) >>
-				       DPLL_FPA01_P1_POST_DIV_SHIFT);
-
-			if (lvds & LVDS_CLKB_POWER_UP)
-				clock.p2 = 7;
-			else
-				clock.p2 = 14;
-		} else {
-			if (dpll & PLL_P1_DIVIDE_BY_TWO)
-				clock.p1 = 2;
-			else {
-				clock.p1 = ((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830) >>
-					    DPLL_FPA01_P1_POST_DIV_SHIFT) + 2;
-			}
-			if (dpll & PLL_P2_DIVIDE_BY_4)
-				clock.p2 = 4;
-			else
-				clock.p2 = 2;
-		}
-
-		port_clock = i9xx_calc_dpll_params(refclk, &clock);
-	}
-
-	/*
-	 * This value includes pixel_multiplier. We will use
-	 * port_clock to compute adjusted_mode.crtc_clock in the
-	 * encoder's get_config() function.
-	 */
-	pipe_config->port_clock = port_clock;
-}
-
 int intel_dotclock_calculate(int link_freq,
 			     const struct intel_link_m_n *m_n)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 6d0636acb7250..8548f49e39722 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -481,8 +481,6 @@ void intel_cpu_transcoder_get_m1_n1(struct intel_crtc *crtc,
 void intel_cpu_transcoder_get_m2_n2(struct intel_crtc *crtc,
 				    enum transcoder cpu_transcoder,
 				    struct intel_link_m_n *m_n);
-void i9xx_crtc_clock_get(struct intel_crtc *crtc,
-			 struct intel_crtc_state *pipe_config);
 int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n);
 int intel_crtc_dotclock(const struct intel_crtc_state *pipe_config);
 enum intel_display_power_domain intel_port_to_power_domain(struct intel_digital_port *dig_port);
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c
index 509b862f9e607..ac732a3a70a83 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -16,6 +16,7 @@
 #include "intel_dpio_phy.h"
 #include "intel_dpll.h"
 #include "intel_lvds.h"
+#include "intel_lvds_regs.h"
 #include "intel_panel.h"
 #include "intel_pps.h"
 #include "intel_snps_phy.h"
@@ -311,7 +312,7 @@ static const struct intel_limit intel_limits_bxt = {
  * divided-down version of it.
  */
 /* m1 is reserved as 0 in Pineview, n is a ring counter */
-int pnv_calc_dpll_params(int refclk, struct dpll *clock)
+static int pnv_calc_dpll_params(int refclk, struct dpll *clock)
 {
 	clock->m = clock->m2 + 2;
 	clock->p = clock->p1 * clock->p2;
@@ -342,7 +343,7 @@ int i9xx_calc_dpll_params(int refclk, struct dpll *clock)
 	return clock->dot;
 }
 
-int vlv_calc_dpll_params(int refclk, struct dpll *clock)
+static int vlv_calc_dpll_params(int refclk, struct dpll *clock)
 {
 	clock->m = clock->m1 * clock->m2;
 	clock->p = clock->p1 * clock->p2 * 5;
@@ -368,6 +369,176 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock)
 	return clock->dot;
 }
 
+static int i9xx_pll_refclk(struct drm_device *dev,
+			   const struct intel_crtc_state *pipe_config)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	u32 dpll = pipe_config->dpll_hw_state.dpll;
+
+	if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN)
+		return dev_priv->display.vbt.lvds_ssc_freq;
+	else if (HAS_PCH_SPLIT(dev_priv))
+		return 120000;
+	else if (DISPLAY_VER(dev_priv) != 2)
+		return 96000;
+	else
+		return 48000;
+}
+
+/* Returns the clock of the currently programmed mode of the given pipe. */
+void i9xx_crtc_clock_get(struct intel_crtc *crtc,
+			 struct intel_crtc_state *pipe_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	u32 dpll = pipe_config->dpll_hw_state.dpll;
+	u32 fp;
+	struct dpll clock;
+	int port_clock;
+	int refclk = i9xx_pll_refclk(dev, pipe_config);
+
+	if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0)
+		fp = pipe_config->dpll_hw_state.fp0;
+	else
+		fp = pipe_config->dpll_hw_state.fp1;
+
+	clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT;
+	if (IS_PINEVIEW(dev_priv)) {
+		clock.n = ffs((fp & FP_N_PINEVIEW_DIV_MASK) >> FP_N_DIV_SHIFT) - 1;
+		clock.m2 = (fp & FP_M2_PINEVIEW_DIV_MASK) >> FP_M2_DIV_SHIFT;
+	} else {
+		clock.n = (fp & FP_N_DIV_MASK) >> FP_N_DIV_SHIFT;
+		clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT;
+	}
+
+	if (DISPLAY_VER(dev_priv) != 2) {
+		if (IS_PINEVIEW(dev_priv))
+			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >>
+				DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW);
+		else
+			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK) >>
+			       DPLL_FPA01_P1_POST_DIV_SHIFT);
+
+		switch (dpll & DPLL_MODE_MASK) {
+		case DPLLB_MODE_DAC_SERIAL:
+			clock.p2 = dpll & DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 ?
+				5 : 10;
+			break;
+		case DPLLB_MODE_LVDS:
+			clock.p2 = dpll & DPLLB_LVDS_P2_CLOCK_DIV_7 ?
+				7 : 14;
+			break;
+		default:
+			drm_dbg_kms(&dev_priv->drm,
+				    "Unknown DPLL mode %08x in programmed "
+				    "mode\n", (int)(dpll & DPLL_MODE_MASK));
+			return;
+		}
+
+		if (IS_PINEVIEW(dev_priv))
+			port_clock = pnv_calc_dpll_params(refclk, &clock);
+		else
+			port_clock = i9xx_calc_dpll_params(refclk, &clock);
+	} else {
+		enum pipe lvds_pipe;
+
+		if (IS_I85X(dev_priv) &&
+		    intel_lvds_port_enabled(dev_priv, LVDS, &lvds_pipe) &&
+		    lvds_pipe == crtc->pipe) {
+			u32 lvds = intel_de_read(dev_priv, LVDS);
+
+			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS) >>
+				       DPLL_FPA01_P1_POST_DIV_SHIFT);
+
+			if (lvds & LVDS_CLKB_POWER_UP)
+				clock.p2 = 7;
+			else
+				clock.p2 = 14;
+		} else {
+			if (dpll & PLL_P1_DIVIDE_BY_TWO)
+				clock.p1 = 2;
+			else {
+				clock.p1 = ((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830) >>
+					    DPLL_FPA01_P1_POST_DIV_SHIFT) + 2;
+			}
+			if (dpll & PLL_P2_DIVIDE_BY_4)
+				clock.p2 = 4;
+			else
+				clock.p2 = 2;
+		}
+
+		port_clock = i9xx_calc_dpll_params(refclk, &clock);
+	}
+
+	/*
+	 * This value includes pixel_multiplier. We will use
+	 * port_clock to compute adjusted_mode.crtc_clock in the
+	 * encoder's get_config() function.
+	 */
+	pipe_config->port_clock = port_clock;
+}
+
+void vlv_crtc_clock_get(struct intel_crtc *crtc,
+			struct intel_crtc_state *pipe_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	enum pipe pipe = crtc->pipe;
+	struct dpll clock;
+	u32 mdiv;
+	int refclk = 100000;
+
+	/* In case of DSI, DPLL will not be used */
+	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
+		return;
+
+	vlv_dpio_get(dev_priv);
+	mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
+	vlv_dpio_put(dev_priv);
+
+	clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
+	clock.m2 = mdiv & DPIO_M2DIV_MASK;
+	clock.n = (mdiv >> DPIO_N_SHIFT) & 0xf;
+	clock.p1 = (mdiv >> DPIO_P1_SHIFT) & 7;
+	clock.p2 = (mdiv >> DPIO_P2_SHIFT) & 0x1f;
+
+	pipe_config->port_clock = vlv_calc_dpll_params(refclk, &clock);
+}
+
+void chv_crtc_clock_get(struct intel_crtc *crtc,
+			struct intel_crtc_state *pipe_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	enum pipe pipe = crtc->pipe;
+	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	struct dpll clock;
+	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3;
+	int refclk = 100000;
+
+	/* In case of DSI, DPLL will not be used */
+	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
+		return;
+
+	vlv_dpio_get(dev_priv);
+	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
+	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
+	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
+	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
+	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
+	vlv_dpio_put(dev_priv);
+
+	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
+	clock.m2 = (pll_dw0 & 0xff) << 22;
+	if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN)
+		clock.m2 |= pll_dw2 & 0x3fffff;
+	clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf;
+	clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7;
+	clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f;
+
+	pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock);
+}
+
 /*
  * Returns whether the given set of divisors are valid for a given refclk with
  * the given connectors.
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.h b/drivers/gpu/drm/i915/display/intel_dpll.h
index bbc30542f29fa..ac01bb19cc6c3 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll.h
@@ -20,8 +20,6 @@ int intel_dpll_crtc_compute_clock(struct intel_atomic_state *state,
 				  struct intel_crtc *crtc);
 int intel_dpll_crtc_get_shared_dpll(struct intel_atomic_state *state,
 				    struct intel_crtc *crtc);
-int vlv_calc_dpll_params(int refclk, struct dpll *clock);
-int pnv_calc_dpll_params(int refclk, struct dpll *clock);
 int i9xx_calc_dpll_params(int refclk, struct dpll *clock);
 u32 i9xx_dpll_compute_fp(const struct dpll *dpll);
 void vlv_compute_dpll(struct intel_crtc_state *crtc_state);
@@ -41,6 +39,13 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state,
 			struct dpll *best_clock);
 int chv_calc_dpll_params(int refclk, struct dpll *pll_clock);
 
+void i9xx_crtc_clock_get(struct intel_crtc *crtc,
+			 struct intel_crtc_state *pipe_config);
+void vlv_crtc_clock_get(struct intel_crtc *crtc,
+			struct intel_crtc_state *pipe_config);
+void chv_crtc_clock_get(struct intel_crtc *crtc,
+			struct intel_crtc_state *pipe_config);
+
 void assert_pll_enabled(struct drm_i915_private *i915, enum pipe pipe);
 void assert_pll_disabled(struct drm_i915_private *i915, enum pipe pipe);
 
diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c
index 866786e6b32f6..baf679759e004 100644
--- a/drivers/gpu/drm/i915/display/intel_pch_display.c
+++ b/drivers/gpu/drm/i915/display/intel_pch_display.c
@@ -8,6 +8,7 @@
 #include "intel_crt.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
+#include "intel_dpll.h"
 #include "intel_fdi.h"
 #include "intel_fdi_regs.h"
 #include "intel_lvds.h"
-- 
GitLab


From 9fda18c2c32a42e6c9fb68893b9628d6a5319555 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 14 Nov 2023 12:45:33 +0200
Subject: [PATCH 0282/2340] drm/i915: add vlv_pipe_to_phy() helper to replace
 DPIO_PHY()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a helper with better typing and handing for bogus input, and better
in line with vlv_dig_port_to_channel(), vlv_dig_port_to_phy(), and
vlv_pipe_to_channel().

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114104534.4180144-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_dpio_phy.c | 14 ++++++++++++++
 drivers/gpu/drm/i915/display/intel_dpio_phy.h |  5 +++++
 drivers/gpu/drm/i915/display/intel_pps.c      |  2 +-
 drivers/gpu/drm/i915/i915_reg.h               |  2 --
 drivers/gpu/drm/i915/vlv_sideband.c           |  6 ++++--
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index 62b93d097e447..d6af46e33424b 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -666,6 +666,20 @@ enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_port)
 	}
 }
 
+enum dpio_phy vlv_pipe_to_phy(enum pipe pipe)
+{
+	switch (pipe) {
+	default:
+		MISSING_CASE(pipe);
+		fallthrough;
+	case PIPE_A:
+	case PIPE_B:
+		return DPIO_PHY0;
+	case PIPE_C:
+		return DPIO_PHY1;
+	}
+}
+
 enum dpio_channel vlv_pipe_to_channel(enum pipe pipe)
 {
 	switch (pipe) {
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.h b/drivers/gpu/drm/i915/display/intel_dpio_phy.h
index 4d43dbbdf81ce..9adc4e8c17381 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.h
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.h
@@ -44,6 +44,7 @@ u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder);
 
 enum dpio_channel vlv_dig_port_to_channel(struct intel_digital_port *dig_port);
 enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_port);
+enum dpio_phy vlv_pipe_to_phy(enum pipe pipe);
 enum dpio_channel vlv_pipe_to_channel(enum pipe pipe);
 
 void chv_set_phy_signal_level(struct intel_encoder *encoder,
@@ -116,6 +117,10 @@ static inline enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_p
 {
 	return DPIO_PHY0;
 }
+static inline enum dpio_phy vlv_pipe_to_phy(enum pipe pipe)
+{
+	return DPIO_PHY0;
+}
 static inline enum dpio_channel vlv_pipe_to_channel(enum pipe pipe)
 {
 	return DPIO_CH0;
diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
index 73f0f1714b377..a8fa3a20990e7 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.c
+++ b/drivers/gpu/drm/i915/display/intel_pps.c
@@ -90,7 +90,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp)
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	enum pipe pipe = intel_dp->pps.pps_pipe;
 	bool pll_enabled, release_cl_override = false;
-	enum dpio_phy phy = DPIO_PHY(pipe);
+	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
 	enum dpio_channel ch = vlv_pipe_to_channel(pipe);
 	u32 DP;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 135e8d8dbdf06..27dc903f0553c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -195,8 +195,6 @@
 #define  DPIO_SFR_BYPASS		(1 << 1)
 #define  DPIO_CMNRST			(1 << 0)
 
-#define DPIO_PHY(pipe)			((pipe) >> 1)
-
 /*
  * Per pipe/PLL DPIO regs
  */
diff --git a/drivers/gpu/drm/i915/vlv_sideband.c b/drivers/gpu/drm/i915/vlv_sideband.c
index b98dec3ad817a..f7df552178454 100644
--- a/drivers/gpu/drm/i915/vlv_sideband.c
+++ b/drivers/gpu/drm/i915/vlv_sideband.c
@@ -229,7 +229,8 @@ static u32 vlv_dpio_phy_iosf_port(struct drm_i915_private *i915, enum dpio_phy p
 
 u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg)
 {
-	u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe));
+	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
+	u32 port = vlv_dpio_phy_iosf_port(i915, phy);
 	u32 val = 0;
 
 	vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val);
@@ -248,7 +249,8 @@ u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg)
 void vlv_dpio_write(struct drm_i915_private *i915,
 		    enum pipe pipe, int reg, u32 val)
 {
-	u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe));
+	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
+	u32 port = vlv_dpio_phy_iosf_port(i915, phy);
 
 	vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val);
 }
-- 
GitLab


From f70a68bc1d18b7af52d368b80d1d0fed747ef2a9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 14 Nov 2023 12:45:34 +0200
Subject: [PATCH 0283/2340] drm/i915: convert vlv_dpio_read()/write() from pipe
 to phy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vlv_dpio_read() and vlv_dpio_write() really operate on the phy, not
pipe. Passing the pipe instead of the phy as parameter is supposed to be
a convenience, but when the caller has the phy, it becomes an
inconvenience. See e.g. chv_dpio_cmn_power_well_enable() and
assert_chv_phy_powergate().

Figure out the phy in the callers, and pass phy to the dpio functions.

v2: retract one overzealous pipe->phy change (Ville)

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114104534.4180144-3-jani.nikula@intel.com
---
 .../i915/display/intel_display_power_well.c   |  23 +--
 drivers/gpu/drm/i915/display/intel_dpio_phy.c | 157 +++++++++---------
 drivers/gpu/drm/i915/display/intel_dpll.c     | 106 ++++++------
 drivers/gpu/drm/i915/vlv_sideband.c           |  10 +-
 drivers/gpu/drm/i915/vlv_sideband.h           |   6 +-
 5 files changed, 151 insertions(+), 151 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c
index 07d6500500992..47cd6bb04366f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power_well.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c
@@ -1400,20 +1400,16 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 {
 	enum i915_power_well_id id = i915_power_well_instance(power_well)->id;
 	enum dpio_phy phy;
-	enum pipe pipe;
 	u32 tmp;
 
 	drm_WARN_ON_ONCE(&dev_priv->drm,
 			 id != VLV_DISP_PW_DPIO_CMN_BC &&
 			 id != CHV_DISP_PW_DPIO_CMN_D);
 
-	if (id == VLV_DISP_PW_DPIO_CMN_BC) {
-		pipe = PIPE_A;
+	if (id == VLV_DISP_PW_DPIO_CMN_BC)
 		phy = DPIO_PHY0;
-	} else {
-		pipe = PIPE_C;
+	else
 		phy = DPIO_PHY1;
-	}
 
 	/* since ref/cri clock was enabled */
 	udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
@@ -1428,24 +1424,24 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 	vlv_dpio_get(dev_priv);
 
 	/* Enable dynamic power down */
-	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28);
+	tmp = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW28);
 	tmp |= DPIO_DYNPWRDOWNEN_CH0 | DPIO_CL1POWERDOWNEN |
 		DPIO_SUS_CLK_CONFIG_GATE_CLKREQ;
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW28, tmp);
+	vlv_dpio_write(dev_priv, phy, CHV_CMN_DW28, tmp);
 
 	if (id == VLV_DISP_PW_DPIO_CMN_BC) {
-		tmp = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW6_CH1);
+		tmp = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW6_CH1);
 		tmp |= DPIO_DYNPWRDOWNEN_CH1;
-		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW6_CH1, tmp);
+		vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW6_CH1, tmp);
 	} else {
 		/*
 		 * Force the non-existing CL2 off. BXT does this
 		 * too, so maybe it saves some power even though
 		 * CL2 doesn't exist?
 		 */
-		tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30);
+		tmp = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW30);
 		tmp |= DPIO_CL2_LDOFUSE_PWRENB;
-		vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp);
+		vlv_dpio_write(dev_priv, phy, CHV_CMN_DW30, tmp);
 	}
 
 	vlv_dpio_put(dev_priv);
@@ -1499,7 +1495,6 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
 static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpio_phy phy,
 				     enum dpio_channel ch, bool override, unsigned int mask)
 {
-	enum pipe pipe = phy == DPIO_PHY0 ? PIPE_A : PIPE_C;
 	u32 reg, val, expected, actual;
 
 	/*
@@ -1518,7 +1513,7 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi
 		reg = _CHV_CMN_DW6_CH1;
 
 	vlv_dpio_get(dev_priv);
-	val = vlv_dpio_read(dev_priv, pipe, reg);
+	val = vlv_dpio_read(dev_priv, phy, reg);
 	vlv_dpio_put(dev_priv);
 
 	/*
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index d6af46e33424b..4ca910874a4fd 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -703,50 +703,50 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel ch = vlv_dig_port_to_channel(dig_port);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	u32 val;
 	int i;
 
 	vlv_dpio_get(dev_priv);
 
 	/* Clear calc init */
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW10(ch));
 	val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
 	val &= ~(DPIO_PCS_TX1DEEMP_MASK | DPIO_PCS_TX2DEEMP_MASK);
 	val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW10(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW10(ch));
 		val &= ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
 		val &= ~(DPIO_PCS_TX1DEEMP_MASK | DPIO_PCS_TX2DEEMP_MASK);
 		val |= DPIO_PCS_TX1DEEMP_9P5 | DPIO_PCS_TX2DEEMP_9P5;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW10(ch), val);
 	}
 
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW9(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW9(ch));
 	val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK);
 	val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW9(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW9(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW9(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW9(ch));
 		val &= ~(DPIO_PCS_TX1MARGIN_MASK | DPIO_PCS_TX2MARGIN_MASK);
 		val |= DPIO_PCS_TX1MARGIN_000 | DPIO_PCS_TX2MARGIN_000;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW9(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW9(ch), val);
 	}
 
 	/* Program swing deemph */
 	for (i = 0; i < crtc_state->lane_count; i++) {
-		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW4(ch, i));
+		val = vlv_dpio_read(dev_priv, phy, CHV_TX_DW4(ch, i));
 		val &= ~DPIO_SWING_DEEMPH9P5_MASK;
 		val |= deemph_reg_value << DPIO_SWING_DEEMPH9P5_SHIFT;
-		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW4(ch, i), val);
+		vlv_dpio_write(dev_priv, phy, CHV_TX_DW4(ch, i), val);
 	}
 
 	/* Program swing margin */
 	for (i = 0; i < crtc_state->lane_count; i++) {
-		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i));
+		val = vlv_dpio_read(dev_priv, phy, CHV_TX_DW2(ch, i));
 
 		val &= ~DPIO_SWING_MARGIN000_MASK;
 		val |= margin_reg_value << DPIO_SWING_MARGIN000_SHIFT;
@@ -759,7 +759,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 		val &= ~(0xff << DPIO_UNIQ_TRANS_SCALE_SHIFT);
 		val |= 0x9a << DPIO_UNIQ_TRANS_SCALE_SHIFT;
 
-		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val);
+		vlv_dpio_write(dev_priv, phy, CHV_TX_DW2(ch, i), val);
 	}
 
 	/*
@@ -769,23 +769,23 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 	 * 27 for ch0 and ch1.
 	 */
 	for (i = 0; i < crtc_state->lane_count; i++) {
-		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW3(ch, i));
+		val = vlv_dpio_read(dev_priv, phy, CHV_TX_DW3(ch, i));
 		if (uniq_trans_scale)
 			val |= DPIO_TX_UNIQ_TRANS_SCALE_EN;
 		else
 			val &= ~DPIO_TX_UNIQ_TRANS_SCALE_EN;
-		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW3(ch, i), val);
+		vlv_dpio_write(dev_priv, phy, CHV_TX_DW3(ch, i), val);
 	}
 
 	/* Start swing calculation */
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW10(ch));
 	val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW10(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW10(ch));
 		val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW10(ch), val);
 	}
 
 	vlv_dpio_put(dev_priv);
@@ -796,43 +796,43 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder,
 			      bool reset)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	enum dpio_channel ch = vlv_dig_port_to_channel(enc_to_dig_port(encoder));
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_channel ch = vlv_dig_port_to_channel(enc_to_dig_port(encoder));
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	u32 val;
 
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW0(ch));
 	if (reset)
 		val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
 	else
 		val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW0(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW0(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW0(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW0(ch));
 		if (reset)
 			val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
 		else
 			val |= DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW0(ch), val);
 	}
 
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW1(ch));
 	val |= CHV_PCS_REQ_SOFTRESET_EN;
 	if (reset)
 		val &= ~DPIO_PCS_CLK_SOFT_RESET;
 	else
 		val |= DPIO_PCS_CLK_SOFT_RESET;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW1(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW1(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW1(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW1(ch));
 		val |= CHV_PCS_REQ_SOFTRESET_EN;
 		if (reset)
 			val &= ~DPIO_PCS_CLK_SOFT_RESET;
 		else
 			val |= DPIO_PCS_CLK_SOFT_RESET;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW1(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW1(ch), val);
 	}
 }
 
@@ -843,6 +843,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel ch = vlv_dig_port_to_channel(dig_port);
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	enum pipe pipe = crtc->pipe;
 	unsigned int lane_mask =
 		intel_dp_unused_lane_mask(crtc_state->lane_count);
@@ -865,40 +866,40 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 
 	/* program left/right clock distribution */
 	if (pipe != PIPE_B) {
-		val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0);
+		val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW5_CH0);
 		val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK);
 		if (ch == DPIO_CH0)
 			val |= CHV_BUFLEFTENA1_FORCE;
 		if (ch == DPIO_CH1)
 			val |= CHV_BUFRIGHTENA1_FORCE;
-		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val);
+		vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW5_CH0, val);
 	} else {
-		val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1);
+		val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW1_CH1);
 		val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK);
 		if (ch == DPIO_CH0)
 			val |= CHV_BUFLEFTENA2_FORCE;
 		if (ch == DPIO_CH1)
 			val |= CHV_BUFRIGHTENA2_FORCE;
-		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
+		vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW1_CH1, val);
 	}
 
 	/* program clock channel usage */
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW8(ch));
 	val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE;
 	if (pipe != PIPE_B)
 		val &= ~CHV_PCS_USEDCLKCHANNEL;
 	else
 		val |= CHV_PCS_USEDCLKCHANNEL;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW8(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW8(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW8(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW8(ch));
 		val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE;
 		if (pipe != PIPE_B)
 			val &= ~CHV_PCS_USEDCLKCHANNEL;
 		else
 			val |= CHV_PCS_USEDCLKCHANNEL;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW8(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW8(ch), val);
 	}
 
 	/*
@@ -906,12 +907,12 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	 * matches the pipe, but here we need to
 	 * pick the CL based on the port.
 	 */
-	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW19(ch));
+	val = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW19(ch));
 	if (pipe != PIPE_B)
 		val &= ~CHV_CMN_USEDCLKCHANNEL;
 	else
 		val |= CHV_CMN_USEDCLKCHANNEL;
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
+	vlv_dpio_write(dev_priv, phy, CHV_CMN_DW19(ch), val);
 
 	vlv_dpio_put(dev_priv);
 }
@@ -924,21 +925,21 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel ch = vlv_dig_port_to_channel(dig_port);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	int data, i, stagger;
 	u32 val;
 
 	vlv_dpio_get(dev_priv);
 
 	/* allow hardware to manage TX FIFO reset source */
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW11(ch));
 	val &= ~DPIO_LANEDESKEW_STRAP_OVRD;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW11(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW11(ch));
 		val &= ~DPIO_LANEDESKEW_STRAP_OVRD;
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW11(ch), val);
 	}
 
 	/* Program Tx lane latency optimal setting*/
@@ -948,7 +949,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 			data = 0x0;
 		else
 			data = (i == 1) ? 0x0 : 0x1;
-		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW14(ch, i),
+		vlv_dpio_write(dev_priv, phy, CHV_TX_DW14(ch, i),
 				data << DPIO_UPAR_SHIFT);
 	}
 
@@ -964,17 +965,17 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	else
 		stagger = 0x2;
 
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW11(ch));
 	val |= DPIO_TX2_STAGGER_MASK(0x1f);
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW11(ch), val);
 
 	if (crtc_state->lane_count > 2) {
-		val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch));
+		val = vlv_dpio_read(dev_priv, phy, VLV_PCS23_DW11(ch));
 		val |= DPIO_TX2_STAGGER_MASK(0x1f);
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val);
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW11(ch), val);
 	}
 
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW12(ch),
+	vlv_dpio_write(dev_priv, phy, VLV_PCS01_DW12(ch),
 		       DPIO_LANESTAGGER_STRAP(stagger) |
 		       DPIO_LANESTAGGER_STRAP_OVRD |
 		       DPIO_TX1_STAGGER_MASK(0x1f) |
@@ -982,7 +983,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 		       DPIO_TX2_STAGGER_MULT(0));
 
 	if (crtc_state->lane_count > 2) {
-		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW12(ch),
+		vlv_dpio_write(dev_priv, phy, VLV_PCS23_DW12(ch),
 			       DPIO_LANESTAGGER_STRAP(stagger) |
 			       DPIO_LANESTAGGER_STRAP_OVRD |
 			       DPIO_TX1_STAGGER_MASK(0x1f) |
@@ -1012,19 +1013,20 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum pipe pipe = to_intel_crtc(old_crtc_state->uapi.crtc)->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
 	u32 val;
 
 	vlv_dpio_get(dev_priv);
 
 	/* disable left/right clock distribution */
 	if (pipe != PIPE_B) {
-		val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW5_CH0);
+		val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW5_CH0);
 		val &= ~(CHV_BUFLEFTENA1_MASK | CHV_BUFRIGHTENA1_MASK);
-		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW5_CH0, val);
+		vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW5_CH0, val);
 	} else {
-		val = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW1_CH1);
+		val = vlv_dpio_read(dev_priv, phy, _CHV_CMN_DW1_CH1);
 		val &= ~(CHV_BUFLEFTENA2_MASK | CHV_BUFRIGHTENA2_MASK);
-		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
+		vlv_dpio_write(dev_priv, phy, _CHV_CMN_DW1_CH1, val);
 	}
 
 	vlv_dpio_put(dev_priv);
@@ -1050,22 +1052,22 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dig_port_to_channel(dig_port);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 
 	vlv_dpio_get(dev_priv);
 
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000);
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value);
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port),
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW5(port), 0x00000000);
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW4(port), demph_reg_value);
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW2(port),
 			 uniqtranscale_reg_value);
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW3(port), 0x0C782040);
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW3(port), 0x0C782040);
 
 	if (tx3_demph)
-		vlv_dpio_write(dev_priv, pipe, VLV_TX3_DW4(port), tx3_demph);
+		vlv_dpio_write(dev_priv, phy, VLV_TX3_DW4(port), tx3_demph);
 
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000);
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value);
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW11(port), 0x00030000);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW9(port), preemph_reg_value);
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
 
 	vlv_dpio_put(dev_priv);
 }
@@ -1077,24 +1079,24 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dig_port_to_channel(dig_port);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 
 	/* Program Tx lane resets to default */
 	vlv_dpio_get(dev_priv);
 
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW0(port),
 			 DPIO_PCS_TX_LANE2_RESET |
 			 DPIO_PCS_TX_LANE1_RESET);
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port),
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW1(port),
 			 DPIO_PCS_CLK_CRI_RXEB_EIOS_EN |
 			 DPIO_PCS_CLK_CRI_RXDIGFILTSG_EN |
 			 (1<<DPIO_PCS_CLK_DATAWIDTH_SHIFT) |
 				 DPIO_PCS_CLK_SOFT_RESET);
 
 	/* Fix up inter-pair skew failure */
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00);
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500);
-	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW12(port), 0x00750f00);
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW11(port), 0x00001500);
+	vlv_dpio_write(dev_priv, phy, VLV_TX_DW14(port), 0x40400000);
 
 	vlv_dpio_put(dev_priv);
 }
@@ -1108,23 +1110,24 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dig_port_to_channel(dig_port);
 	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
 	u32 val;
 
 	vlv_dpio_get(dev_priv);
 
 	/* Enable clock channels for this port */
-	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
+	val = vlv_dpio_read(dev_priv, phy, VLV_PCS01_DW8(port));
 	val = 0;
 	if (pipe)
 		val |= (1<<21);
 	else
 		val &= ~(1<<21);
 	val |= 0x001000c4;
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW8(port), val);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW8(port), val);
 
 	/* Program lane clock */
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW14(port), 0x00760018);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW23(port), 0x00400888);
 
 	vlv_dpio_put(dev_priv);
 }
@@ -1136,10 +1139,10 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum dpio_channel port = vlv_dig_port_to_channel(dig_port);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 
 	vlv_dpio_get(dev_priv);
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000);
-	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW0(port), 0x00000000);
+	vlv_dpio_write(dev_priv, phy, VLV_PCS_DW1(port), 0x00e00060);
 	vlv_dpio_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c
index ac732a3a70a83..3038655377ea4 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -483,7 +483,7 @@ void vlv_crtc_clock_get(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum pipe pipe = crtc->pipe;
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	struct dpll clock;
 	u32 mdiv;
 	int refclk = 100000;
@@ -493,7 +493,7 @@ void vlv_crtc_clock_get(struct intel_crtc *crtc,
 		return;
 
 	vlv_dpio_get(dev_priv);
-	mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
+	mdiv = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW3(crtc->pipe));
 	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
@@ -510,8 +510,8 @@ void chv_crtc_clock_get(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum pipe pipe = crtc->pipe;
-	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	enum dpio_channel port = vlv_pipe_to_channel(crtc->pipe);
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	struct dpll clock;
 	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3;
 	int refclk = 100000;
@@ -521,11 +521,11 @@ void chv_crtc_clock_get(struct intel_crtc *crtc,
 		return;
 
 	vlv_dpio_get(dev_priv);
-	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
-	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
-	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
-	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
-	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
+	cmn_dw13 = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW13(port));
+	pll_dw0 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW0(port));
+	pll_dw1 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW1(port));
+	pll_dw2 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW2(port));
+	pll_dw3 = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW3(port));
 	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
@@ -1811,7 +1811,7 @@ void i9xx_enable_pll(const struct intel_crtc_state *crtc_state)
 }
 
 static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv,
-				 enum pipe pipe)
+				 enum dpio_phy phy)
 {
 	u32 reg_val;
 
@@ -1819,30 +1819,31 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv,
 	 * PLLB opamp always calibrates to max value of 0x3f, force enable it
 	 * and set it to a reasonable value instead.
 	 */
-	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1));
+	reg_val = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW9(1));
 	reg_val &= 0xffffff00;
 	reg_val |= 0x00000030;
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW9(1), reg_val);
 
-	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13);
+	reg_val = vlv_dpio_read(dev_priv, phy, VLV_REF_DW13);
 	reg_val &= 0x00ffffff;
 	reg_val |= 0x8c000000;
-	vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val);
+	vlv_dpio_write(dev_priv, phy, VLV_REF_DW13, reg_val);
 
-	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1));
+	reg_val = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW9(1));
 	reg_val &= 0xffffff00;
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW9(1), reg_val);
 
-	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13);
+	reg_val = vlv_dpio_read(dev_priv, phy, VLV_REF_DW13);
 	reg_val &= 0x00ffffff;
 	reg_val |= 0xb0000000;
-	vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val);
+	vlv_dpio_write(dev_priv, phy, VLV_REF_DW13, reg_val);
 }
 
 static void vlv_prepare_pll(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	enum pipe pipe = crtc->pipe;
 	u32 mdiv;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2;
@@ -1860,18 +1861,18 @@ static void vlv_prepare_pll(const struct intel_crtc_state *crtc_state)
 
 	/* PLL B needs special handling */
 	if (pipe == PIPE_B)
-		vlv_pllb_recal_opamp(dev_priv, pipe);
+		vlv_pllb_recal_opamp(dev_priv, phy);
 
 	/* Set up Tx target for periodic Rcomp update */
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9_BCAST, 0x0100000f);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW9_BCAST, 0x0100000f);
 
 	/* Disable target IRef on PLL */
-	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW8(pipe));
+	reg_val = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW8(pipe));
 	reg_val &= 0x00ffffff;
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW8(pipe), reg_val);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW8(pipe), reg_val);
 
 	/* Disable fast lock */
-	vlv_dpio_write(dev_priv, pipe, VLV_CMN_DW0, 0x610);
+	vlv_dpio_write(dev_priv, phy, VLV_CMN_DW0, 0x610);
 
 	/* Set idtafcrecal before PLL is enabled */
 	mdiv = ((bestm1 << DPIO_M1DIV_SHIFT) | (bestm2 & DPIO_M2DIV_MASK));
@@ -1885,46 +1886,46 @@ static void vlv_prepare_pll(const struct intel_crtc_state *crtc_state)
 	 * Note: don't use the DAC post divider as it seems unstable.
 	 */
 	mdiv |= (DPIO_POST_DIV_HDMIDP << DPIO_POST_DIV_SHIFT);
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW3(pipe), mdiv);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW3(pipe), mdiv);
 
 	mdiv |= DPIO_ENABLE_CALIBRATION;
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW3(pipe), mdiv);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW3(pipe), mdiv);
 
 	/* Set HBR and RBR LPF coefficients */
 	if (crtc_state->port_clock == 162000 ||
 	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG) ||
 	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI))
-		vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW10(pipe),
+		vlv_dpio_write(dev_priv, phy, VLV_PLL_DW10(pipe),
 				 0x009f0003);
 	else
-		vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW10(pipe),
+		vlv_dpio_write(dev_priv, phy, VLV_PLL_DW10(pipe),
 				 0x00d0000f);
 
 	if (intel_crtc_has_dp_encoder(crtc_state)) {
 		/* Use SSC source */
 		if (pipe == PIPE_A)
-			vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe),
+			vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe),
 					 0x0df40000);
 		else
-			vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe),
+			vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe),
 					 0x0df70000);
 	} else { /* HDMI or VGA */
 		/* Use bend source */
 		if (pipe == PIPE_A)
-			vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe),
+			vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe),
 					 0x0df70000);
 		else
-			vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW5(pipe),
+			vlv_dpio_write(dev_priv, phy, VLV_PLL_DW5(pipe),
 					 0x0df40000);
 	}
 
-	coreclk = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW7(pipe));
+	coreclk = vlv_dpio_read(dev_priv, phy, VLV_PLL_DW7(pipe));
 	coreclk = (coreclk & 0x0000ff00) | 0x01c00000;
 	if (intel_crtc_has_dp_encoder(crtc_state))
 		coreclk |= 0x01000000;
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW7(pipe), coreclk);
 
-	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
+	vlv_dpio_write(dev_priv, phy, VLV_PLL_DW11(pipe), 0x87871000);
 
 	vlv_dpio_put(dev_priv);
 }
@@ -1975,6 +1976,7 @@ static void chv_prepare_pll(const struct intel_crtc_state *crtc_state)
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	u32 loopfilter, tribuf_calcntr;
 	u32 bestm2, bestp1, bestp2, bestm2_frac;
 	u32 dpio_val;
@@ -1991,39 +1993,39 @@ static void chv_prepare_pll(const struct intel_crtc_state *crtc_state)
 	vlv_dpio_get(dev_priv);
 
 	/* p1 and p2 divider */
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
+	vlv_dpio_write(dev_priv, phy, CHV_CMN_DW13(port),
 			5 << DPIO_CHV_S1_DIV_SHIFT |
 			bestp1 << DPIO_CHV_P1_DIV_SHIFT |
 			bestp2 << DPIO_CHV_P2_DIV_SHIFT |
 			1 << DPIO_CHV_K_DIV_SHIFT);
 
 	/* Feedback post-divider - m2 */
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW0(port), bestm2);
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW0(port), bestm2);
 
 	/* Feedback refclk divider - n and m1 */
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW1(port),
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW1(port),
 			DPIO_CHV_M1_DIV_BY_2 |
 			1 << DPIO_CHV_N_DIV_SHIFT);
 
 	/* M2 fraction division */
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW2(port), bestm2_frac);
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW2(port), bestm2_frac);
 
 	/* M2 fraction division enable */
-	dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
+	dpio_val = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW3(port));
 	dpio_val &= ~(DPIO_CHV_FEEDFWD_GAIN_MASK | DPIO_CHV_FRAC_DIV_EN);
 	dpio_val |= (2 << DPIO_CHV_FEEDFWD_GAIN_SHIFT);
 	if (bestm2_frac)
 		dpio_val |= DPIO_CHV_FRAC_DIV_EN;
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW3(port), dpio_val);
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW3(port), dpio_val);
 
 	/* Program digital lock detect threshold */
-	dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW9(port));
+	dpio_val = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW9(port));
 	dpio_val &= ~(DPIO_CHV_INT_LOCK_THRESHOLD_MASK |
 					DPIO_CHV_INT_LOCK_THRESHOLD_SEL_COARSE);
 	dpio_val |= (0x5 << DPIO_CHV_INT_LOCK_THRESHOLD_SHIFT);
 	if (!bestm2_frac)
 		dpio_val |= DPIO_CHV_INT_LOCK_THRESHOLD_SEL_COARSE;
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW9(port), dpio_val);
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW9(port), dpio_val);
 
 	/* Loop filter */
 	if (vco == 5400000) {
@@ -2048,16 +2050,16 @@ static void chv_prepare_pll(const struct intel_crtc_state *crtc_state)
 		loopfilter |= (0x3 << DPIO_CHV_GAIN_CTRL_SHIFT);
 		tribuf_calcntr = 0;
 	}
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW6(port), loopfilter);
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW6(port), loopfilter);
 
-	dpio_val = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW8(port));
+	dpio_val = vlv_dpio_read(dev_priv, phy, CHV_PLL_DW8(port));
 	dpio_val &= ~DPIO_CHV_TDC_TARGET_CNT_MASK;
 	dpio_val |= (tribuf_calcntr << DPIO_CHV_TDC_TARGET_CNT_SHIFT);
-	vlv_dpio_write(dev_priv, pipe, CHV_PLL_DW8(port), dpio_val);
+	vlv_dpio_write(dev_priv, phy, CHV_PLL_DW8(port), dpio_val);
 
 	/* AFC Recal */
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port),
-			vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
+	vlv_dpio_write(dev_priv, phy, CHV_CMN_DW14(port),
+			vlv_dpio_read(dev_priv, phy, CHV_CMN_DW14(port)) |
 			DPIO_AFC_RECAL);
 
 	vlv_dpio_put(dev_priv);
@@ -2069,14 +2071,15 @@ static void _chv_enable_pll(const struct intel_crtc_state *crtc_state)
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	enum dpio_phy phy = vlv_pipe_to_phy(crtc->pipe);
 	u32 tmp;
 
 	vlv_dpio_get(dev_priv);
 
 	/* Enable back the 10bit clock to display controller */
-	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
+	tmp = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW14(port));
 	tmp |= DPIO_DCLKP_EN;
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
+	vlv_dpio_write(dev_priv, phy, CHV_CMN_DW14(port), tmp);
 
 	vlv_dpio_put(dev_priv);
 
@@ -2197,6 +2200,7 @@ void vlv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
+	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
 	u32 val;
 
 	/* Make sure the pipe isn't still relying on us */
@@ -2213,9 +2217,9 @@ void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	vlv_dpio_get(dev_priv);
 
 	/* Disable 10bit clock to display controller */
-	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
+	val = vlv_dpio_read(dev_priv, phy, CHV_CMN_DW14(port));
 	val &= ~DPIO_DCLKP_EN;
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val);
+	vlv_dpio_write(dev_priv, phy, CHV_CMN_DW14(port), val);
 
 	vlv_dpio_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/vlv_sideband.c b/drivers/gpu/drm/i915/vlv_sideband.c
index f7df552178454..63a7958763095 100644
--- a/drivers/gpu/drm/i915/vlv_sideband.c
+++ b/drivers/gpu/drm/i915/vlv_sideband.c
@@ -227,9 +227,8 @@ static u32 vlv_dpio_phy_iosf_port(struct drm_i915_private *i915, enum dpio_phy p
 		return IOSF_PORT_DPIO;
 }
 
-u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg)
+u32 vlv_dpio_read(struct drm_i915_private *i915, enum dpio_phy phy, int reg)
 {
-	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
 	u32 port = vlv_dpio_phy_iosf_port(i915, phy);
 	u32 val = 0;
 
@@ -240,16 +239,15 @@ u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg)
 	 * so ideally we should check the register offset instead...
 	 */
 	drm_WARN(&i915->drm, val == 0xffffffff,
-		 "DPIO read pipe %c reg 0x%x == 0x%x\n",
-		 pipe_name(pipe), reg, val);
+		 "DPIO PHY%d read reg 0x%x == 0x%x\n",
+		 phy, reg, val);
 
 	return val;
 }
 
 void vlv_dpio_write(struct drm_i915_private *i915,
-		    enum pipe pipe, int reg, u32 val)
+		    enum dpio_phy phy, int reg, u32 val)
 {
-	enum dpio_phy phy = vlv_pipe_to_phy(pipe);
 	u32 port = vlv_dpio_phy_iosf_port(i915, phy);
 
 	vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val);
diff --git a/drivers/gpu/drm/i915/vlv_sideband.h b/drivers/gpu/drm/i915/vlv_sideband.h
index 9ce283d96b80a..4073966860e2a 100644
--- a/drivers/gpu/drm/i915/vlv_sideband.h
+++ b/drivers/gpu/drm/i915/vlv_sideband.h
@@ -11,7 +11,7 @@
 
 #include "vlv_sideband_reg.h"
 
-enum pipe;
+enum dpio_phy;
 struct drm_i915_private;
 
 enum {
@@ -75,9 +75,9 @@ static inline void vlv_dpio_get(struct drm_i915_private *i915)
 	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO));
 }
 
-u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg);
+u32 vlv_dpio_read(struct drm_i915_private *i915, enum dpio_phy phy, int reg);
 void vlv_dpio_write(struct drm_i915_private *i915,
-		    enum pipe pipe, int reg, u32 val);
+		    enum dpio_phy phy, int reg, u32 val);
 
 static inline void vlv_dpio_put(struct drm_i915_private *i915)
 {
-- 
GitLab


From c8031019dc95e3ab7cc0b09f1894c5f52dc0c187 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Tue, 10 Oct 2023 07:35:06 -0700
Subject: [PATCH 0284/2340] drm/amdgpu: Implement a new 64bit sequence memory
 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Developed a new driver which allocates a 64bit memory on
each request in sequence order. At the moment, user queue
fence memory is the main consumer of this seq64 driver.

v2: Worked on review comments from Christian for the following
    modifications

    - Move driver name from "semaphore" to "seq64"
    - Remove unnecessary PT/PD mapping
    - Move enable_mes check into init/fini functions.

v3: Worked on review comments from Christian

    - drop enable_mes check
    - use DECLARE_BITMAP for bit array
    - added kerneldoc for seq64

v4: Worked on review comments from Christian
    - Rename amdgpu_seq64_get name with amdgpu_seq64_alloc

v5: Worked on review comments from Christian
    - Fix seq64 lockdep warning
    - move fpriv->seq64_va check into amdgpu_seq64_unmap()
    - make the function amdgpu_seq64_unmap() return as void.
    - reserve the buffers as not interruptible.

v6: port to drm_exec (Alex)
v7: disable for now (Arun)

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile        |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   7 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c  | 247 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h  |  49 ++++
 6 files changed, 311 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2afecc55090f7..260e32ef7bae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-	amdgpu_ring_mux.o amdgpu_xcp.o
+	amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d92ca1576771..4d100f8d75bf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,6 +109,7 @@
 #include "amdgpu_mca.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
 
 #define MAX_GPU_INSTANCE		64
 
@@ -468,6 +469,7 @@ struct amdgpu_fpriv {
 	struct amdgpu_vm	vm;
 	struct amdgpu_bo_va	*prt_va;
 	struct amdgpu_bo_va	*csa_va;
+	struct amdgpu_bo_va	*seq64_va;
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
@@ -986,6 +988,9 @@ struct amdgpu_device {
 	/* GDS */
 	struct amdgpu_gds		gds;
 
+	/* for userq and VM fences */
+	struct amdgpu_seq64		seq64;
+
 	/* KFD */
 	struct amdgpu_kfd_dev		kfd;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7eeaf0aa7f812..151f8b950b726 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2676,6 +2676,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 					goto init_failed;
 				}
 			}
+
+			r = amdgpu_seq64_init(adev);
+			if (r) {
+				DRM_ERROR("allocate seq64 failed %d\n", r);
+				goto init_failed;
+			}
 		}
 	}
 
@@ -3138,6 +3144,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 			amdgpu_device_wb_fini(adev);
 			amdgpu_device_mem_scratch_fini(adev);
 			amdgpu_ib_pool_fini(adev);
+			amdgpu_seq64_fini(adev);
 		}
 
 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 583cf03950cd7..b5ebafd4a3adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1428,6 +1428,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		fpriv->csa_va = NULL;
 	}
 
+	amdgpu_seq64_unmap(adev, fpriv);
+
 	pasid = fpriv->vm.pasid;
 	pd = amdgpu_bo_ref(fpriv->vm.root.bo);
 	if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 0000000000000..f3de02193138b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ * @seq64_addr: seq64 vaddr start address
+ * @size: seq64 pool size
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va, u64 seq64_addr,
+		     uint32_t size)
+{
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return -EINVAL;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!*bo_va) {
+		r = -ENOMEM;
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size,
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+	if (r) {
+		DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_update(adev, *bo_va, false);
+	if (r) {
+		DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+error:
+	drm_exec_fini(&exec);
+	return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+	struct amdgpu_vm *vm;
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	if (!fpriv->seq64_va)
+		return;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return;
+
+	vm = &fpriv->vm;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+	fpriv->seq64_va = NULL;
+
+error:
+	drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @gpu_addr: allocated gpu VA start address
+ * @cpu_addr: allocated cpu VA start address
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr,
+		       u64 **cpu_addr)
+{
+	unsigned long bit_pos;
+	u32 offset;
+
+	bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+
+	if (bit_pos < adev->seq64.num_sem) {
+		__set_bit(bit_pos, adev->seq64.used);
+		offset = bit_pos << 6; /* convert to qw offset */
+	} else {
+		return -EINVAL;
+	}
+
+	*gpu_addr = offset + AMDGPU_SEQ64_VADDR_START;
+	*cpu_addr = offset + adev->seq64.cpu_base_addr;
+
+	return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @gpu_addr: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ *
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr)
+{
+	u32 offset;
+
+	offset = gpu_addr - AMDGPU_SEQ64_VADDR_START;
+
+	offset >>= 6;
+	if (offset < adev->seq64.num_sem)
+		__clear_bit(offset, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->seq64.sbo,
+			      NULL,
+			      (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->seq64.sbo)
+		return 0;
+
+	/*
+	 * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+	 * 64bit slots
+	 */
+	r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->seq64.sbo, NULL,
+				    (void **)&adev->seq64.cpu_base_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+		return r;
+	}
+
+	memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE);
+
+	adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+	memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 0000000000000..2196e72be508e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#define AMDGPU_SEQ64_SIZE		(2ULL << 20)
+#define AMDGPU_MAX_SEQ64_SLOTS		(AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8))
+#define AMDGPU_SEQ64_VADDR_OFFSET	0x50000
+#define AMDGPU_SEQ64_VADDR_START	(AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET)
+
+struct amdgpu_seq64 {
+	struct amdgpu_bo *sbo;
+	u32 num_sem;
+	u64 *cpu_base_addr;
+	DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
-- 
GitLab


From e6ed364efae39455cb1d6b1895a1d31599608a2b Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 16 Mar 2022 18:46:56 -0400
Subject: [PATCH 0285/2340] drm/amdgpu: update mappings not managed by KFD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When restoring after an eviction, use amdgpu_vm_handle_moved to update
BO VA mappings in KFD VMs that are not managed through the KFD API. This
should allow using the render node API to create more flexible memory
mappings in KFD VMs.

v2: rebase on drm_exec changes (Alex)

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 41fbc4fd0fac3..b98c0969d3be8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2825,12 +2825,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (ret)
 		goto validate_map_fail;
 
-	ret = process_sync_pds_resv(process_info, &sync_obj);
-	if (ret) {
-		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
-		goto validate_map_fail;
-	}
-
 	/* Validate BOs and map them to GPUVM (update VM page tables). */
 	list_for_each_entry(mem, &process_info->kfd_bo_list,
 			    validate_list) {
@@ -2881,6 +2875,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (failed_size)
 		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
 
+	/* Update mappings not managed by KFD */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node) {
+		struct amdgpu_device *adev = amdgpu_ttm_adev(
+			peer_vm->root.bo->tbo.bdev);
+
+		ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+		if (ret) {
+			pr_debug("Memory eviction: handle moved failed. Try again\n");
+			goto validate_map_fail;
+		}
+	}
+
 	/* Update page directories */
 	ret = process_update_pds(process_info, &sync_obj);
 	if (ret) {
@@ -2888,6 +2895,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 		goto validate_map_fail;
 	}
 
+	/* Sync with fences on all the page tables. They implicitly depend on any
+	 * move fences from amdgpu_vm_handle_moved above.
+	 */
+	ret = process_sync_pds_resv(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+		goto validate_map_fail;
+	}
+
 	/* Wait for validate and PT updates to finish */
 	amdgpu_sync_wait(&sync_obj, false);
 
-- 
GitLab


From 94e2dae0a8bfd456abfd866f1eee8342f0858012 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 24 Feb 2023 18:22:32 -0500
Subject: [PATCH 0286/2340] drm/amdkfd: Move TLB flushing logic into amdgpu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will make it possible for amdgpu GEM ioctls to flush TLBs on compute
VMs.

This removes VMID-based TLB flushing and always uses PASID-based
flushing. This still works because it scans the VMID-PASID mapping
registers to find the right VMID. It's only slightly less efficient. This
is not a production use case.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 29 --------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  5 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 44 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  5 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h      | 10 ++++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 31 ---------------
 6 files changed, 57 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b8412202a1b0e..6ab17330a6ed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -710,35 +710,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-				     uint16_t vmid)
-{
-	if (adev->family == AMDGPU_FAMILY_AI) {
-		int i;
-
-		for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-	} else {
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
-	}
-
-	return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-				      uint16_t pasid,
-				      enum TLB_FLUSH_TYPE flush_type,
-				      uint32_t inst)
-{
-	bool all_hub = false;
-
-	if (adev->family == AMDGPU_FAMILY_AI ||
-	    adev->family == AMDGPU_FAMILY_RV)
-		all_hub = true;
-
-	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
-}
-
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
 {
 	return adev->have_atomics_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index dac983da961d6..16794c2eea35d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -162,11 +162,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-				uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-				uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
-				uint32_t inst);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d1b8afd105c9f..d59154ddaaed3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1437,6 +1437,50 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	return 0;
 }
 
+/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask)
+{
+	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+	bool all_hub = false;
+	int xcc = 0, r = 0;
+
+	WARN_ON_ONCE(!vm->is_compute_context);
+
+	/*
+	 * It can be that we race and lose here, but that is extremely unlikely
+	 * and the worst thing which could happen is that we flush the changes
+	 * into the TLB once more which is harmless.
+	 */
+	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+		return 0;
+
+	if (adev->family == AMDGPU_FAMILY_AI ||
+	    adev->family == AMDGPU_FAMILY_RV)
+		all_hub = true;
+
+	for_each_inst(xcc, xcc_mask) {
+		r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+						   all_hub, xcc);
+		if (r)
+			break;
+	}
+	return r;
+}
+
 /**
  * amdgpu_vm_bo_add - add a bo to a specific vm
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2cd86d2bf73f7..b6cd565562ad8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -324,6 +324,7 @@ struct amdgpu_vm {
 	/* Last finished delayed update */
 	atomic64_t		tlb_seq;
 	struct dma_fence	*last_tlb_flush;
+	atomic64_t		kfd_last_flushed_seq;
 
 	/* How many times we had to re-generate the page tables */
 	uint64_t		generation;
@@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm,
 			   struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask);
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9cc32f577e38a..a40f8cfc6aa57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -748,7 +748,6 @@ struct kfd_process_device {
 	/* VM context for GPUVM allocations */
 	struct file *drm_file;
 	void *drm_priv;
-	atomic64_t tlb_seq;
 
 	/* GPUVM allocations storage */
 	struct idr alloc_idr;
@@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+				 enum TLB_FLUSH_TYPE type)
+{
+	struct amdgpu_device *adev = pdd->dev->adev;
+	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+	amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
 
 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7a33e06f5c900..c10d050e1a612 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1667,7 +1667,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 		return ret;
 	}
 	pdd->drm_priv = drm_file->private_data;
-	atomic64_set(&pdd->tlb_seq, 0);
 
 	ret = kfd_process_device_reserve_ib_mem(pdd);
 	if (ret)
@@ -2059,36 +2058,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
-	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
-	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
-	struct kfd_node *dev = pdd->dev;
-	uint32_t xcc_mask = dev->xcc_mask;
-	int xcc = 0;
-
-	/*
-	 * It can be that we race and lose here, but that is extremely unlikely
-	 * and the worst thing which could happen is that we flush the changes
-	 * into the TLB once more which is harmless.
-	 */
-	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
-		return;
-
-	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
-		/* Nothing to flush until a VMID is assigned, which
-		 * only happens when the first queue is created.
-		 */
-		if (pdd->qpd.vmid)
-			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
-							pdd->qpd.vmid);
-	} else {
-		for_each_inst(xcc, xcc_mask)
-			amdgpu_amdkfd_flush_gpu_tlb_pasid(
-				dev->adev, pdd->process->pasid, type, xcc);
-	}
-}
-
 /* assumes caller holds process lock. */
 int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
 {
-- 
GitLab


From fbbcb3f2b7c269c92218f315d22d6ab00524798a Mon Sep 17 00:00:00 2001
From: Ma Jun <Jun.Ma2@amd.com>
Date: Thu, 2 Nov 2023 11:19:46 +0800
Subject: [PATCH 0287/2340] drm/amd/pm: Fix return value and drop redundant
 param

Fix the return value and drop redundant parameter
of get_asic_baco_capability function.

Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h       |  2 +-
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c                  |  8 +++-----
 drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c     | 11 ++++-------
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c   |  7 +++----
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h   |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c   |  9 ++++-----
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h   |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c |  9 ++++-----
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h |  2 +-
 drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h         |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c            | 12 +++++-------
 11 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index cd3c40a860293..b14231f470dc5 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -421,7 +421,7 @@ struct amd_pm_funcs {
 	int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
-	int (*get_asic_baco_capability)(void *handle, bool *cap);
+	bool (*get_asic_baco_capability)(void *handle);
 	int (*get_asic_baco_state)(void *handle, int *state);
 	int (*set_asic_baco_state)(void *handle, int state);
 	int (*get_ppfeature_status)(void *handle, char *buf);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 08cb79401410a..1ae3b81548fa3 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -185,8 +185,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 	void *pp_handle = adev->powerplay.pp_handle;
-	bool baco_cap;
-	int ret = 0;
+	bool ret;
 
 	if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
 		return false;
@@ -204,12 +203,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 
 	mutex_lock(&adev->pm.mutex);
 
-	ret = pp_funcs->get_asic_baco_capability(pp_handle,
-						 &baco_cap);
+	ret = pp_funcs->get_asic_baco_capability(pp_handle);
 
 	mutex_unlock(&adev->pm.mutex);
 
-	return ret ? false : baco_cap;
+	return ret;
 }
 
 int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 914c153871575..aed0e2cefbf99 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1371,21 +1371,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count)
 	return phm_set_active_display_count(hwmgr, count);
 }
 
-static int pp_get_asic_baco_capability(void *handle, bool *cap)
+static bool pp_get_asic_baco_capability(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	*cap = false;
 	if (!hwmgr)
-		return -EINVAL;
+		return false;
 
 	if (!(hwmgr->not_vf && amdgpu_dpm) ||
 		!hwmgr->hwmgr_func->get_asic_baco_capability)
-		return 0;
+		return false;
 
-	hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap);
-
-	return 0;
+	return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr);
 }
 
 static int pp_get_asic_baco_state(void *handle, int *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
index 044cda005aed1..e8a9471c1898b 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
@@ -33,21 +33,20 @@
 #include "smu/smu_7_1_2_d.h"
 #include "smu/smu_7_1_2_sh_mask.h"
 
-int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
 		return 0;
 
 	reg = RREG32(mmCC_BIF_BX_FUSESTRAP0);
 
 	if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK)
-		*cap = true;
+		return true;
 
-	return 0;
+	return false;
 }
 
 int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
index be0d98abb5369..73a773f4ce2eb 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
index de0a37f7c6324..c66ef97415358 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
@@ -28,14 +28,13 @@
 #include "vega10_inc.h"
 #include "smu9_baco.h"
 
-int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg, data;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
-		return 0;
+		return false;
 
 	WREG32(0x12074, 0xFFF0003B);
 	data = RREG32(0x12075);
@@ -44,10 +43,10 @@ int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
 		reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0);
 
 		if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK)
-			*cap = true;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
index 84e90f801ac30..9ff7c2ea1b58d 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 994c0d374bfae..dad4c80aee58a 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -36,23 +36,22 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = {
 	{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0},
 };
 
-int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
-		return 0;
+		return false;
 
 	if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) {
 		reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0);
 
 		if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK)
-			*cap = true;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dcb..bdad9c915631b 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
index 81650727a5def..6f536159df4d8 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
@@ -351,7 +351,7 @@ struct pp_hwmgr_func {
 	int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
-	int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
+	bool (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr);
 	int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 	int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 	int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 1ead323f1c781..37c2605f9b351 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3005,19 +3005,17 @@ static int smu_set_xgmi_pstate(void *handle,
 	return ret;
 }
 
-static int smu_get_baco_capability(void *handle, bool *cap)
+static bool smu_get_baco_capability(void *handle)
 {
 	struct smu_context *smu = handle;
 
-	*cap = false;
-
 	if (!smu->pm_enabled)
-		return 0;
+		return false;
 
-	if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support)
-		*cap = smu->ppt_funcs->baco_is_support(smu);
+	if (!smu->ppt_funcs || !smu->ppt_funcs->baco_is_support)
+		return false;
 
-	return 0;
+	return smu->ppt_funcs->baco_is_support(smu);
 }
 
 static int smu_baco_set_state(void *handle, int state)
-- 
GitLab


From b5a52d2afe1b75f9d51461bb235ca40735e99fe7 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Sun, 5 Nov 2023 16:06:50 +0000
Subject: [PATCH 0288/2340] amdgpu: Adjust kmalloc_array calls for new
 -Walloc-size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 14 introduces a new -Walloc-size included in -Wextra which errors out
on various files in drivers/gpu/drm/amd/amdgpu like:
```
amdgpu_amdkfd_gfx_v8.c:241:15: error: allocation of insufficient size ‘4’ for type ‘uint32_t[2]’ {aka ‘unsigned int[2]'} with size ‘8’ [-Werror=alloc-size]
```

This is because each HQD_N_REGS is actually a uint32_t[2]. Move the * 2 to
the size argument so GCC sees we're allocating enough.

Originally did 'sizeof(uint32_t) * 2' for the size but a friend suggested
'sizeof(**dump)' better communicates the intent.

Link: https://lore.kernel.org/all/87wmuwo7i3.fsf@gentoo.org/
Signed-off-by: Sam James <sam@gentoo.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c   | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 625db444df1cb..0ba15dcbe4e13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -200,7 +200,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index f6598b9e4faa3..a5c7259cf2a3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);         \
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6bf448ab3dffc..ca4a6b82817f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+4)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index cd06e4a6d1da4..0f3e2944edd7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+4+2+3+7)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 00fbc0f44c929..5a35a8ca89222 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -363,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -460,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
-- 
GitLab


From d8a3813713c3843351123138c8b191142c266521 Mon Sep 17 00:00:00 2001
From: Abhinav Singh <singhabhinav9051571833@gmail.com>
Date: Fri, 3 Nov 2023 21:20:13 +0530
Subject: [PATCH 0289/2340] drm/radeon: Fix warning using plain integer as NULL

sparse static analysis tools generate a warning with this message
"Using plain integer as NULL pointer". In this case this warning is
being shown because we are trying to intialize a pointer to NULL using
integer value 0.

Signed-off-by: Abhinav Singh <singhabhinav9051571833@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/clearstate_evergreen.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h
index 63a1ffbb3cedd..3b645558f133f 100644
--- a/drivers/gpu/drm/radeon/clearstate_evergreen.h
+++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h
@@ -1049,7 +1049,7 @@ static const struct cs_extent_def SECT_CONTEXT_defs[] =
     {SECT_CONTEXT_def_5, 0x0000a29e, 5 },
     {SECT_CONTEXT_def_6, 0x0000a2a5, 56 },
     {SECT_CONTEXT_def_7, 0x0000a2de, 290 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CLEAR_def_1[] =
 {
@@ -1060,7 +1060,7 @@ static const u32 SECT_CLEAR_def_1[] =
 static const struct cs_extent_def SECT_CLEAR_defs[] =
 {
     {SECT_CLEAR_def_1, 0x0000ffc0, 3 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CTRLCONST_def_1[] =
 {
@@ -1070,11 +1070,11 @@ static const u32 SECT_CTRLCONST_def_1[] =
 static const struct cs_extent_def SECT_CTRLCONST_defs[] =
 {
     {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const struct cs_section_def evergreen_cs_data[] = {
     { SECT_CONTEXT_defs, SECT_CONTEXT },
     { SECT_CLEAR_defs, SECT_CLEAR },
     { SECT_CTRLCONST_defs, SECT_CTRLCONST },
-    { 0, SECT_NONE }
+    { NULL, SECT_NONE }
 };
-- 
GitLab


From 857c838c782728318c581cb656fddd74faa89ad2 Mon Sep 17 00:00:00 2001
From: Ma Jun <Jun.Ma2@amd.com>
Date: Fri, 3 Nov 2023 09:48:06 +0800
Subject: [PATCH 0290/2340] drm/amd/pm: Move some functions to smu_v13_0.c as
 generic code

Use generic functions and remove the duplicate code

Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 38 ++++++++++++++-----
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 36 +-----------------
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 36 +-----------------
 3 files changed, 32 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index cf1b84060bc3d..b446f860baf68 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2283,22 +2283,40 @@ int smu_v13_0_baco_set_state(struct smu_context *smu,
 
 int smu_v13_0_baco_enter(struct smu_context *smu)
 {
-	int ret = 0;
-
-	ret = smu_v13_0_baco_set_state(smu,
-				       SMU_BACO_STATE_ENTER);
-	if (ret)
-		return ret;
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
 
-	msleep(10);
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		return smu_v13_0_baco_set_armd3_sequence(smu,
+				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
+					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
+	} else {
+		ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_ENTER);
+		if (!ret)
+			usleep_range(10000, 11000);
 
-	return ret;
+		return ret;
+	}
 }
 
 int smu_v13_0_baco_exit(struct smu_context *smu)
 {
-	return smu_v13_0_baco_set_state(smu,
-					SMU_BACO_STATE_EXIT);
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		/* Wait for PMFW handling for the Dstate change */
+		usleep_range(10000, 11000);
+		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+	} else {
+		ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+	}
+
+	if (!ret)
+		adev->gfx.is_poweron = false;
+
+	return ret;
 }
 
 int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 82c4e1f1c6f07..a4debd7ab8bd0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2558,38 +2558,6 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 					       NULL);
 }
 
-static int smu_v13_0_0_baco_enter(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
-		return smu_v13_0_baco_set_armd3_sequence(smu,
-				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
-					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
-	else
-		return smu_v13_0_baco_enter(smu);
-}
-
-static int smu_v13_0_0_baco_exit(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
-		/* Wait for PMFW handling for the Dstate change */
-		usleep_range(10000, 11000);
-		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
-	} else {
-		ret = smu_v13_0_baco_exit(smu);
-	}
-
-	if (!ret)
-		adev->gfx.is_poweron = false;
-
-	return ret;
-}
-
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -3037,8 +3005,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.baco_is_support = smu_v13_0_baco_is_support,
 	.baco_get_state = smu_v13_0_baco_get_state,
 	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_0_baco_enter,
-	.baco_exit = smu_v13_0_0_baco_exit,
+	.baco_enter = smu_v13_0_baco_enter,
+	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_0_mode1_reset,
 	.mode2_reset = smu_v13_0_0_mode2_reset,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 81eafed76045e..f5596f031d00c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2515,38 +2515,6 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v13_0_7_baco_enter(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
-		return smu_v13_0_baco_set_armd3_sequence(smu,
-				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
-					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
-	else
-		return smu_v13_0_baco_enter(smu);
-}
-
-static int smu_v13_0_7_baco_exit(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
-		/* Wait for PMFW handling for the Dstate change */
-		usleep_range(10000, 11000);
-		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
-	} else {
-		ret = smu_v13_0_baco_exit(smu);
-	}
-
-	if (!ret)
-		adev->gfx.is_poweron = false;
-
-	return ret;
-}
-
 static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -2628,8 +2596,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.baco_is_support = smu_v13_0_baco_is_support,
 	.baco_get_state = smu_v13_0_baco_get_state,
 	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_7_baco_enter,
-	.baco_exit = smu_v13_0_7_baco_exit,
+	.baco_enter = smu_v13_0_baco_enter,
+	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_mode1_reset,
 	.set_mp1_state = smu_v13_0_7_set_mp1_state,
-- 
GitLab


From 5ce8eccd53a357f91f2c2fe29918f9c65a1fe970 Mon Sep 17 00:00:00 2001
From: Ma Jun <Jun.Ma2@amd.com>
Date: Wed, 8 Nov 2023 16:00:30 +0800
Subject: [PATCH 0291/2340] drm/amd/pm: Make
 smu_v13_0_baco_set_armd3_sequence() static

smu_v13_0_baco_set_armd3_sequence is not used by other files, so
make it as static type.

Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   | 3 ---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 95cb919718aeb..48f5926d81535 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -210,9 +210,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu);
 int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 					       struct pp_smu_nv_clock_table *max_clocks);
 
-int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
-				      enum smu_baco_seq baco_seq);
-
 bool smu_v13_0_baco_is_support(struct smu_context *smu);
 
 enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index b446f860baf68..86fc7273d5888 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2199,7 +2199,7 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
 	return ret;
 }
 
-int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
+static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
 				      enum smu_baco_seq baco_seq)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
-- 
GitLab


From 8a1de314d1890793bbf9e77542574ceda007564e Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Wed, 2 Aug 2023 07:49:14 +0530
Subject: [PATCH 0292/2340] drm/amdgpu: Refactor 'amdgpu_connector_dvi_detect'
 in amdgpu_connectors.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the below:

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
WARNING: Missing a blank line after declarations
WARNING: Too many leading tabs - consider code refactoring
+                                               if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
WARNING: Too many leading tabs - consider code refactoring
+                                                       if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {

Cc: Guchun Chen <guchun.chen@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Acked-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_connectors.c    | 69 +++++++++++--------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 7473a42f7d454..96f63fd39b9e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct amdgpu_connector_atom_dig *dig_connector;
 	int bpc = 8;
-	unsigned mode_clock, max_tmds_clock;
+	unsigned int mode_clock, max_tmds_clock;
 
 	switch (connector->connector_type) {
 	case DRM_MODE_CONNECTOR_DVII:
@@ -255,6 +255,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector)
 		return amdgpu_connector->edid;
 	} else if (edid_blob) {
 		struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
+
 		if (edid)
 			amdgpu_connector->edid = edid;
 	}
@@ -581,6 +582,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
 			amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
 		} else {
 			const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
 			amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
 		}
 
@@ -797,6 +799,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
 		amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
 	else {
 		const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
 		amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
 	}
 
@@ -979,6 +982,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
 	return false;
 }
 
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+					struct drm_connector *connector,
+					struct amdgpu_connector *amdgpu_connector)
+{
+	struct drm_connector *list_connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *list_amdgpu_connector;
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+		drm_connector_list_iter_begin(dev, &iter);
+		drm_for_each_connector_iter(list_connector,
+					    &iter) {
+			if (connector == list_connector)
+				continue;
+			list_amdgpu_connector = to_amdgpu_connector(list_connector);
+			if (list_amdgpu_connector->shared_ddc &&
+			    list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+			     amdgpu_connector->ddc_bus->rec.i2c_id) {
+				/* cases where both connectors are digital */
+				if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+					/* hpd is our only option in this case */
+					if (!amdgpu_display_hpd_sense(adev,
+								      amdgpu_connector->hpd.hpd)) {
+						amdgpu_connector_free_edid(connector);
+						*status = connector_status_disconnected;
+					}
+				}
+			}
+		}
+		drm_connector_list_iter_end(&iter);
+	}
+}
+
 /*
  * DVI is complicated
  * Do a DDC probe, if DDC probe passes, get the full EDID so
@@ -1065,32 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 			 * DDC line.  The latter is more complex because with DVI<->HDMI adapters
 			 * you don't really know what's connected to which port as both are digital.
 			 */
-			if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
-				struct drm_connector *list_connector;
-				struct drm_connector_list_iter iter;
-				struct amdgpu_connector *list_amdgpu_connector;
-
-				drm_connector_list_iter_begin(dev, &iter);
-				drm_for_each_connector_iter(list_connector,
-							    &iter) {
-					if (connector == list_connector)
-						continue;
-					list_amdgpu_connector = to_amdgpu_connector(list_connector);
-					if (list_amdgpu_connector->shared_ddc &&
-					    (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
-					     amdgpu_connector->ddc_bus->rec.i2c_id)) {
-						/* cases where both connectors are digital */
-						if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
-							/* hpd is our only option in this case */
-							if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
-								amdgpu_connector_free_edid(connector);
-								ret = connector_status_disconnected;
-							}
-						}
-					}
-				}
-				drm_connector_list_iter_end(&iter);
-			}
+			 amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
 		}
 	}
 
@@ -1192,6 +1205,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 static void amdgpu_connector_dvi_force(struct drm_connector *connector)
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
 	if (connector->force == DRM_FORCE_ON)
 		amdgpu_connector->use_digital = false;
 	if (connector->force == DRM_FORCE_ON_DIGITAL)
@@ -1426,6 +1440,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 				ret = connector_status_connected;
 			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
 				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
 				ret = encoder_funcs->detect(encoder, connector);
 			}
 		}
-- 
GitLab


From f4fac4163c2f99aada9cc60292f2ea377afe6c71 Mon Sep 17 00:00:00 2001
From: Laurent Morichetti <laurent.morichetti@amd.com>
Date: Wed, 8 Nov 2023 16:16:18 -0800
Subject: [PATCH 0293/2340] drm/amdkfd: Clear the VALU exception state in the
 trap handler

The trap handler could be entered with pending VALU exceptions, so
clear the exception state before issuing vector instructions.

Reviewed-by: Jay Cornwall <jay.cornwall@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Laurent Morichetti <laurent.morichetti@amd.com>
Tested-by: Lancelot Six <lancelot.six@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 664 +++++++++---------
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    |   6 +
 2 files changed, 338 insertions(+), 332 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d7cd5fa313ff8..df75863393fcb 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -2069,7 +2069,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx10_hex[] = {
-	0xbf820001, 0xbf820220,
+	0xbf820001, 0xbf820221,
 	0xb0804004, 0xb978f802,
 	0x8a78ff78, 0x00020006,
 	0xb97bf803, 0x876eff78,
@@ -2118,391 +2118,391 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbf900004, 0xbf8cc07f,
 	0x877aff7f, 0x04000000,
 	0x8f7a857a, 0x886d7a6d,
-	0xbefa037e, 0x877bff7f,
-	0x0000ffff, 0xbefe03c1,
-	0xbeff03c1, 0xdc5f8000,
-	0x007a0000, 0x7e000280,
-	0xbefe037a, 0xbeff037b,
-	0xb97b02dc, 0x8f7b997b,
-	0xb97a3a05, 0x807a817a,
-	0xbf0d997b, 0xbf850002,
-	0x8f7a897a, 0xbf820001,
-	0x8f7a8a7a, 0xb97b1e06,
-	0x8f7b8a7b, 0x807a7b7a,
+	0x7e008200, 0xbefa037e,
 	0x877bff7f, 0x0000ffff,
-	0x807aff7a, 0x00000200,
-	0x807a7e7a, 0x827b807b,
-	0xd7610000, 0x00010870,
-	0xd7610000, 0x00010a71,
-	0xd7610000, 0x00010c72,
-	0xd7610000, 0x00010e73,
-	0xd7610000, 0x00011074,
-	0xd7610000, 0x00011275,
-	0xd7610000, 0x00011476,
-	0xd7610000, 0x00011677,
-	0xd7610000, 0x00011a79,
-	0xd7610000, 0x00011c7e,
-	0xd7610000, 0x00011e7f,
-	0xbefe03ff, 0x00003fff,
-	0xbeff0380, 0xdc5f8040,
-	0x007a0000, 0xd760007a,
-	0x00011d00, 0xd760007b,
-	0x00011f00, 0xbefe037a,
-	0xbeff037b, 0xbef4037e,
-	0x8775ff7f, 0x0000ffff,
-	0x8875ff75, 0x00040000,
-	0xbef60380, 0xbef703ff,
-	0x10807fac, 0xbef1037c,
-	0xbef00380, 0xb97302dc,
-	0x8f739973, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820002,
-	0xbeff03c1, 0xbf820009,
+	0xbefe03c1, 0xbeff03c1,
+	0xdc5f8000, 0x007a0000,
+	0x7e000280, 0xbefe037a,
+	0xbeff037b, 0xb97b02dc,
+	0x8f7b997b, 0xb97a3a05,
+	0x807a817a, 0xbf0d997b,
+	0xbf850002, 0x8f7a897a,
+	0xbf820001, 0x8f7a8a7a,
+	0xb97b1e06, 0x8f7b8a7b,
+	0x807a7b7a, 0x877bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xd7610000,
+	0x00010870, 0xd7610000,
+	0x00010a71, 0xd7610000,
+	0x00010c72, 0xd7610000,
+	0x00010e73, 0xd7610000,
+	0x00011074, 0xd7610000,
+	0x00011275, 0xd7610000,
+	0x00011476, 0xd7610000,
+	0x00011677, 0xd7610000,
+	0x00011a79, 0xd7610000,
+	0x00011c7e, 0xd7610000,
+	0x00011e7f, 0xbefe03ff,
+	0x00003fff, 0xbeff0380,
+	0xdc5f8040, 0x007a0000,
+	0xd760007a, 0x00011d00,
+	0xd760007b, 0x00011f00,
+	0xbefe037a, 0xbeff037b,
+	0xbef4037e, 0x8775ff7f,
+	0x0000ffff, 0x8875ff75,
+	0x00040000, 0xbef60380,
+	0xbef703ff, 0x10807fac,
+	0xbef1037c, 0xbef00380,
+	0xb97302dc, 0x8f739973,
+	0xbefe03c1, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820002, 0xbeff03c1,
+	0xbf820009, 0xbef603ff,
+	0x01000000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
+	0x705d0300, 0xbf820008,
 	0xbef603ff, 0x01000000,
-	0xe0704080, 0x705d0100,
-	0xe0704100, 0x705d0200,
-	0xe0704180, 0x705d0300,
-	0xbf820008, 0xbef603ff,
-	0x01000000, 0xe0704100,
-	0x705d0100, 0xe0704200,
-	0x705d0200, 0xe0704300,
-	0x705d0300, 0xb9703a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0xbef603ff,
-	0x01000000, 0x7e000280,
-	0x7e020280, 0x7e040280,
-	0xbefc0380, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xd7610002, 0x0000f86c,
-	0x807c817c, 0x8a7aff6d,
-	0x80000000, 0xd7610002,
-	0x0000f87a, 0x807c817c,
-	0xd7610002, 0x0000f86e,
-	0x807c817c, 0xd7610002,
-	0x0000f86f, 0x807c817c,
-	0xd7610002, 0x0000f878,
-	0x807c817c, 0xb97af803,
-	0xd7610002, 0x0000f87a,
-	0x807c817c, 0xd7610002,
-	0x0000f87b, 0x807c817c,
-	0xb971f801, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xb971f814, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xb971f815, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xbefe03ff, 0x0000ffff,
-	0xbeff0380, 0xe0704000,
-	0x705d0200, 0xbefe03c1,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
 	0xb9703a05, 0x80708170,
 	0xbf0d9973, 0xbf850002,
 	0x8f708970, 0xbf820001,
 	0x8f708a70, 0xb97a1e06,
 	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
 	0xbef603ff, 0x01000000,
-	0xbef90380, 0xbefc0380,
-	0xbf800000, 0xbe802f00,
-	0xbe822f02, 0xbe842f04,
-	0xbe862f06, 0xbe882f08,
-	0xbe8a2f0a, 0xbe8c2f0c,
-	0xbe8e2f0e, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0xbefc0380,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xd7610002,
+	0x0000f86c, 0x807c817c,
+	0x8a7aff6d, 0x80000000,
+	0xd7610002, 0x0000f87a,
+	0x807c817c, 0xd7610002,
+	0x0000f86e, 0x807c817c,
+	0xd7610002, 0x0000f86f,
+	0x807c817c, 0xd7610002,
+	0x0000f878, 0x807c817c,
+	0xb97af803, 0xd7610002,
+	0x0000f87a, 0x807c817c,
+	0xd7610002, 0x0000f87b,
+	0x807c817c, 0xb971f801,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xb971f814,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xb971f815,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xbefe03ff,
+	0x0000ffff, 0xbeff0380,
+	0xe0704000, 0x705d0200,
+	0xbefe03c1, 0xb9703a05,
+	0x80708170, 0xbf0d9973,
+	0xbf850002, 0x8f708970,
+	0xbf820001, 0x8f708a70,
+	0xb97a1e06, 0x8f7a8a7a,
+	0x80707a70, 0xbef603ff,
+	0x01000000, 0xbef90380,
+	0xbefc0380, 0xbf800000,
+	0xbe802f00, 0xbe822f02,
+	0xbe842f04, 0xbe862f06,
+	0xbe882f08, 0xbe8a2f0a,
+	0xbe8c2f0c, 0xbe8e2f0e,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
 	0x80798179, 0xd7610002,
-	0x0000f20c, 0x80798179,
-	0xd7610002, 0x0000f20d,
+	0x0000f20b, 0x80798179,
+	0xd7610002, 0x0000f20c,
 	0x80798179, 0xd7610002,
-	0x0000f20e, 0x80798179,
-	0xd7610002, 0x0000f20f,
-	0x80798179, 0xbf06a079,
-	0xbf840006, 0xe0704000,
-	0x705d0200, 0x8070ff70,
-	0x00000080, 0xbef90380,
-	0x7e040280, 0x807c907c,
-	0xbf0aff7c, 0x00000060,
-	0xbf85ffbc, 0xbe802f00,
-	0xbe822f02, 0xbe842f04,
-	0xbe862f06, 0xbe882f08,
-	0xbe8a2f0a, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x0000f20d, 0x80798179,
+	0xd7610002, 0x0000f20e,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f20f, 0x80798179,
+	0xbf06a079, 0xbf840006,
+	0xe0704000, 0x705d0200,
+	0x8070ff70, 0x00000080,
+	0xbef90380, 0x7e040280,
+	0x807c907c, 0xbf0aff7c,
+	0x00000060, 0xbf85ffbc,
+	0xbe802f00, 0xbe822f02,
+	0xbe842f04, 0xbe862f06,
+	0xbe882f08, 0xbe8a2f0a,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
-	0x80798179, 0xe0704000,
-	0x705d0200, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb97b4306,
-	0x877bc17b, 0xbf840044,
-	0xbf8a0000, 0x877aff6d,
-	0x80000000, 0xbf840040,
-	0x8f7b867b, 0x8f7b827b,
-	0xbef6037b, 0xb9703a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0x8070ff70,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0xd7650000,
-	0x000100c1, 0xd7660000,
-	0x000200c1, 0x16000084,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbefc0380,
-	0xbf850012, 0xbe8303ff,
-	0x00000080, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf8c0000, 0xe0704000,
-	0x705d0100, 0x807c037c,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000080,
-	0xbf0a7b7c, 0xbf85fff4,
-	0xbf820011, 0xbe8303ff,
-	0x00000100, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf8c0000, 0xe0704000,
-	0x705d0100, 0x807c037c,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000100,
-	0xbf0a7b7c, 0xbf85fff4,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xe0704000, 0x705d0200,
 	0xbefe03c1, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850004, 0xbef003ff,
-	0x00000200, 0xbeff0380,
-	0xbf820003, 0xbef003ff,
-	0x00000400, 0xbeff03c1,
-	0xb97b3a05, 0x807b817b,
-	0x8f7b827b, 0x907c9973,
+	0xbf850002, 0xbeff0380,
+	0xbf820001, 0xbeff03c1,
+	0xb97b4306, 0x877bc17b,
+	0xbf840044, 0xbf8a0000,
+	0x877aff6d, 0x80000000,
+	0xbf840040, 0x8f7b867b,
+	0x8f7b827b, 0xbef6037b,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0xd7650000, 0x000100c1,
+	0xd7660000, 0x000200c1,
+	0x16000084, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850017, 0xbef603ff,
-	0x01000000, 0xbefc0384,
-	0xbf0a7b7c, 0xbf840037,
-	0x7e008700, 0x7e028701,
-	0x7e048702, 0x7e068703,
-	0xe0704000, 0x705d0000,
-	0xe0704080, 0x705d0100,
-	0xe0704100, 0x705d0200,
-	0xe0704180, 0x705d0300,
-	0x807c847c, 0x8070ff70,
-	0x00000200, 0xbf0a7b7c,
-	0xbf85ffef, 0xbf820025,
+	0xbefc0380, 0xbf850012,
+	0xbe8303ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7c,
+	0xbf85fff4, 0xbf820011,
+	0xbe8303ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7c,
+	0xbf85fff4, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850004,
+	0xbef003ff, 0x00000200,
+	0xbeff0380, 0xbf820003,
+	0xbef003ff, 0x00000400,
+	0xbeff03c1, 0xb97b3a05,
+	0x807b817b, 0x8f7b827b,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850017,
 	0xbef603ff, 0x01000000,
 	0xbefc0384, 0xbf0a7b7c,
-	0xbf840011, 0x7e008700,
+	0xbf840037, 0x7e008700,
 	0x7e028701, 0x7e048702,
 	0x7e068703, 0xe0704000,
-	0x705d0000, 0xe0704100,
-	0x705d0100, 0xe0704200,
-	0x705d0200, 0xe0704300,
+	0x705d0000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
 	0x705d0300, 0x807c847c,
-	0x8070ff70, 0x00000400,
+	0x8070ff70, 0x00000200,
 	0xbf0a7b7c, 0xbf85ffef,
-	0xb97b1e06, 0x877bc17b,
-	0xbf84000c, 0x8f7b837b,
-	0x807b7c7b, 0xbefe03c1,
-	0xbeff0380, 0x7e008700,
+	0xbf820025, 0xbef603ff,
+	0x01000000, 0xbefc0384,
+	0xbf0a7b7c, 0xbf840011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
 	0xe0704000, 0x705d0000,
-	0x807c817c, 0x8070ff70,
-	0x00000080, 0xbf0a7b7c,
-	0xbf85fff8, 0xbf82013b,
-	0xbef4037e, 0x8775ff7f,
-	0x0000ffff, 0x8875ff75,
-	0x00040000, 0xbef60380,
-	0xbef703ff, 0x10807fac,
-	0xb97202dc, 0x8f729972,
-	0x876eff7f, 0x04000000,
-	0xbf840034, 0xbefe03c1,
-	0x907c9972, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb96f4306,
-	0x876fc16f, 0xbf840029,
-	0x8f6f866f, 0x8f6f826f,
-	0xbef6036f, 0xb9783a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x8078ff78,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbefc0380, 0xbf850009,
-	0xe0310000, 0x781d0000,
-	0x807cff7c, 0x00000080,
-	0x8078ff78, 0x00000080,
-	0xbf0a6f7c, 0xbf85fff8,
-	0xbf820008, 0xe0310000,
-	0x781d0000, 0x807cff7c,
-	0x00000100, 0x8078ff78,
-	0x00000100, 0xbf0a6f7c,
-	0xbf85fff8, 0xbef80380,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffef, 0xb97b1e06,
+	0x877bc17b, 0xbf84000c,
+	0x8f7b837b, 0x807b7c7b,
+	0xbefe03c1, 0xbeff0380,
+	0x7e008700, 0xe0704000,
+	0x705d0000, 0x807c817c,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7c, 0xbf85fff8,
+	0xbf82013b, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xb97202dc,
+	0x8f729972, 0x876eff7f,
+	0x04000000, 0xbf840034,
 	0xbefe03c1, 0x907c9972,
 	0x877c817c, 0xbf06817c,
 	0xbf850002, 0xbeff0380,
 	0xbf820001, 0xbeff03c1,
-	0xb96f3a05, 0x806f816f,
-	0x8f6f826f, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbf850024, 0xbef603ff,
-	0x01000000, 0xbeee0378,
+	0xb96f4306, 0x876fc16f,
+	0xbf840029, 0x8f6f866f,
+	0x8f6f826f, 0xbef6036f,
+	0xb9783a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
-	0xbefc0384, 0xbf0a6f7c,
-	0xbf840050, 0xe0304000,
-	0x785d0000, 0xe0304080,
-	0x785d0100, 0xe0304100,
-	0x785d0200, 0xe0304180,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000200, 0xbf0a6f7c,
-	0xbf85ffee, 0xe0304000,
-	0x6e5d0000, 0xe0304080,
-	0x6e5d0100, 0xe0304100,
-	0x6e5d0200, 0xe0304180,
-	0x6e5d0300, 0xbf8c3f70,
-	0xbf820034, 0xbef603ff,
-	0x01000000, 0xbeee0378,
-	0x8078ff78, 0x00000400,
-	0xbefc0384, 0xbf0a6f7c,
-	0xbf840012, 0xe0304000,
-	0x785d0000, 0xe0304100,
-	0x785d0100, 0xe0304200,
-	0x785d0200, 0xe0304300,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000400, 0xbf0a6f7c,
-	0xbf85ffee, 0xb96f1e06,
-	0x876fc16f, 0xbf84000e,
-	0x8f6f836f, 0x806f7c6f,
-	0xbefe03c1, 0xbeff0380,
+	0x8078ff78, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850009, 0xe0310000,
+	0x781d0000, 0x807cff7c,
+	0x00000080, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff8, 0xbf820008,
+	0xe0310000, 0x781d0000,
+	0x807cff7c, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7c, 0xbf85fff8,
+	0xbef80380, 0xbefe03c1,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb96f3a05,
+	0x806f816f, 0x8f6f826f,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850024,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000200, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840050,
 	0xe0304000, 0x785d0000,
+	0xe0304080, 0x785d0100,
+	0xe0304100, 0x785d0200,
+	0xe0304180, 0x785d0300,
 	0xbf8c3f70, 0x7e008500,
-	0x807c817c, 0x8078ff78,
-	0x00000080, 0xbf0a6f7c,
-	0xbf85fff7, 0xbeff03c1,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7c, 0xbf85ffee,
 	0xe0304000, 0x6e5d0000,
-	0xe0304100, 0x6e5d0100,
-	0xe0304200, 0x6e5d0200,
-	0xe0304300, 0x6e5d0300,
-	0xbf8c3f70, 0xb9783a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x80f8ff78,
-	0x00000050, 0xbef603ff,
-	0x01000000, 0xbefc03ff,
-	0x0000006c, 0x80f89078,
-	0xf429003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc847c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0x80f8a078,
-	0xf42d003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc887c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0x80f8c078,
-	0xf431003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc907c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0xbe883108,
-	0xbe8a310a, 0xbe8c310c,
-	0xbe8e310e, 0xbf06807c,
-	0xbf84fff0, 0xba80f801,
-	0x00000000, 0xbf8a0000,
+	0xe0304080, 0x6e5d0100,
+	0xe0304100, 0x6e5d0200,
+	0xe0304180, 0x6e5d0300,
+	0xbf8c3f70, 0xbf820034,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000400, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840012,
+	0xe0304000, 0x785d0000,
+	0xe0304100, 0x785d0100,
+	0xe0304200, 0x785d0200,
+	0xe0304300, 0x785d0300,
+	0xbf8c3f70, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb96f1e06, 0x876fc16f,
+	0xbf84000e, 0x8f6f836f,
+	0x806f7c6f, 0xbefe03c1,
+	0xbeff0380, 0xe0304000,
+	0x785d0000, 0xbf8c3f70,
+	0x7e008500, 0x807c817c,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7c, 0xbf85fff7,
+	0xbeff03c1, 0xe0304000,
+	0x6e5d0000, 0xe0304100,
+	0x6e5d0100, 0xe0304200,
+	0x6e5d0200, 0xe0304300,
+	0x6e5d0300, 0xbf8c3f70,
 	0xb9783a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
 	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
 	0xbef603ff, 0x01000000,
-	0xf4211bfa, 0xf0000000,
-	0x80788478, 0xf4211b3a,
+	0xbefc03ff, 0x0000006c,
+	0x80f89078, 0xf429003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc847c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0x80f8a078, 0xf42d003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc887c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0x80f8c078, 0xf431003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc907c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0xbe883108, 0xbe8a310a,
+	0xbe8c310c, 0xbe8e310e,
+	0xbf06807c, 0xbf84fff0,
+	0xba80f801, 0x00000000,
+	0xbf8a0000, 0xb9783a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef603ff,
+	0x01000000, 0xf4211bfa,
 	0xf0000000, 0x80788478,
-	0xf4211b7a, 0xf0000000,
-	0x80788478, 0xf4211c3a,
+	0xf4211b3a, 0xf0000000,
+	0x80788478, 0xf4211b7a,
 	0xf0000000, 0x80788478,
-	0xf4211c7a, 0xf0000000,
-	0x80788478, 0xf4211eba,
+	0xf4211c3a, 0xf0000000,
+	0x80788478, 0xf4211c7a,
 	0xf0000000, 0x80788478,
-	0xf4211efa, 0xf0000000,
-	0x80788478, 0xf4211e7a,
+	0xf4211eba, 0xf0000000,
+	0x80788478, 0xf4211efa,
 	0xf0000000, 0x80788478,
-	0xf4211cfa, 0xf0000000,
-	0x80788478, 0xf4211bba,
+	0xf4211e7a, 0xf0000000,
+	0x80788478, 0xf4211cfa,
 	0xf0000000, 0x80788478,
-	0xbf8cc07f, 0xb9eef814,
 	0xf4211bba, 0xf0000000,
 	0x80788478, 0xbf8cc07f,
-	0xb9eef815, 0xbefc036f,
-	0xbefe0370, 0xbeff0371,
-	0x876f7bff, 0x000003ff,
-	0xb9ef4803, 0x876f7bff,
-	0xfffff800, 0x906f8b6f,
-	0xb9efa2c3, 0xb9f3f801,
-	0xb96e3a05, 0x806e816e,
-	0xbf0d9972, 0xbf850002,
-	0x8f6e896e, 0xbf820001,
-	0x8f6e8a6e, 0xb96f1e06,
-	0x8f6f8a6f, 0x806e6f6e,
-	0x806eff6e, 0x00000200,
-	0x806e746e, 0x826f8075,
-	0x876fff6f, 0x0000ffff,
-	0xf4091c37, 0xfa000050,
-	0xf4091d37, 0xfa000060,
-	0xf4011e77, 0xfa000074,
-	0xbf8cc07f, 0x876dff6d,
-	0x0000ffff, 0x87fe7e7e,
-	0x87ea6a6a, 0xb9faf802,
-	0xbe80226c, 0xbf810000,
+	0xb9eef814, 0xf4211bba,
+	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef815,
+	0xbefc036f, 0xbefe0370,
+	0xbeff0371, 0x876f7bff,
+	0x000003ff, 0xb9ef4803,
+	0x876f7bff, 0xfffff800,
+	0x906f8b6f, 0xb9efa2c3,
+	0xb9f3f801, 0xb96e3a05,
+	0x806e816e, 0xbf0d9972,
+	0xbf850002, 0x8f6e896e,
+	0xbf820001, 0x8f6e8a6e,
+	0xb96f1e06, 0x8f6f8a6f,
+	0x806e6f6e, 0x806eff6e,
+	0x00000200, 0x806e746e,
+	0x826f8075, 0x876fff6f,
+	0x0000ffff, 0xf4091c37,
+	0xfa000050, 0xf4091d37,
+	0xfa000060, 0xf4011e77,
+	0xfa000074, 0xbf8cc07f,
+	0x876dff6d, 0x0000ffff,
+	0x87fe7e7e, 0x87ea6a6a,
+	0xb9faf802, 0xbe80226c,
+	0xbf810000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
-	0xbf9f0000, 0x00000000,
 };
 
 static const uint32_t cwsr_trap_gfx11_hex[] = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index fdab646244225..e0140df0b0ec8 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -369,6 +369,12 @@ L_SLEEP:
 	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
 
 #if NO_SQC_STORE
+#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID
+	// gfx10: If there was a VALU exception, the exception state must be
+	// cleared before executing the VALU instructions below.
+	v_clrexcp
+#endif
+
 	// Trap temporaries must be saved via VGPR but all VGPRs are in use.
 	// There is no ttmp space to hold the resource constant for VGPR save.
 	// Save v0 by itself since it requires only two SGPRs.
-- 
GitLab


From efb91fea652a42fcc037d2a9ef4ecd1ffc5ff4b7 Mon Sep 17 00:00:00 2001
From: Aurabindo Pillai <aurabindo.pillai@amd.com>
Date: Fri, 27 Oct 2023 15:59:48 -0400
Subject: [PATCH 0294/2340] drm/amd/display: Fix a debugfs null pointer error

[WHY & HOW]
Check whether get_subvp_en() callback exists before calling it.

Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Reviewed-by: Alex Hung <alex.hung@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 13a177d343762..45c972f2630d8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -3647,12 +3647,16 @@ static int capabilities_show(struct seq_file *m, void *unused)
 	bool mall_supported = dc->caps.mall_size_total;
 	bool subvp_supported = dc->caps.subvp_fw_processing_delay_us;
 	unsigned int mall_in_use = false;
-	unsigned int subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+	unsigned int subvp_in_use = false;
+
 	struct hubbub *hubbub = dc->res_pool->hubbub;
 
 	if (hubbub->funcs->get_mall_en)
 		hubbub->funcs->get_mall_en(hubbub, &mall_in_use);
 
+	if (dc->cap_funcs.get_subvp_en)
+		subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+
 	seq_printf(m, "mall supported: %s, enabled: %s\n",
 			   mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no");
 	seq_printf(m, "sub-viewport supported: %s, enabled: %s\n",
-- 
GitLab


From ef71bb4119c786f6f1d132b8863698874321798b Mon Sep 17 00:00:00 2001
From: Yang Wang <kevinyang.wang@amd.com>
Date: Fri, 10 Nov 2023 13:14:41 +0800
Subject: [PATCH 0295/2340] drm/amdgpu: correct mca ipid die/socket/addr decode

correct mca ipid die/socket/addr decode

v2: squash in fix from Yang

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c   | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 0e5a77c3c2e21..f723a4190ee52 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2392,8 +2392,8 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT
 
 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
 {
-	uint64_t ipid = entry->regs[MCA_REG_IDX_IPID];
-	uint32_t insthi;
+	u64 ipid = entry->regs[MCA_REG_IDX_IPID];
+	u32 instidhi, instid;
 
 	/* NOTE: All MCA IPID register share the same format,
 	 * so the driver can share the MCMP1 register header file.
@@ -2402,9 +2402,15 @@ static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_
 	info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
 	info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
 
-	insthi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
-	info->aid = ((insthi >> 2) & 0x03);
-	info->socket_id = insthi & 0x03;
+	/*
+	 * Unfied DieID Format: SAASS. A:AID, S:Socket.
+	 * Unfied DieID[4] = InstanceId[0]
+	 * Unfied DieID[0:3] = InstanceIdHi[0:3]
+	 */
+	instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
+	instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo);
+	info->aid = ((instidhi >> 2) & 0x03);
+	info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03);
 }
 
 static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
@@ -2578,6 +2584,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct
 	uint32_t instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	instlo &= GENMASK(31, 1);
 	switch (instlo) {
 	case 0x36430400: /* SMNAID XCD 0 */
 	case 0x38430400: /* SMNAID XCD 1 */
@@ -2596,6 +2603,7 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd
 	uint32_t errcode, instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	instlo &= GENMASK(31, 1);
 	if (instlo != 0x03b30400)
 		return false;
 
-- 
GitLab


From fcfc6ceec3ebb725a0d6381a1120e7cd546e1df4 Mon Sep 17 00:00:00 2001
From: Yihan Zhu <yihan.zhu@amd.com>
Date: Mon, 30 Oct 2023 13:29:51 -0400
Subject: [PATCH 0296/2340] drm/amd/display: Enable CM low mem power
 optimization

[WHY & HOW]
MPC MCM low mem power optimization still causes color distortion on
first SCE enablement, only forces light sleep for it.

DPP low memory power optimization still needs this bit to save power.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Yihan Zhu <yihan.zhu@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c    | 13 ++++++++-----
 .../gpu/drm/amd/display/dc/dcn35/dcn35_resource.c   |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 994b21ed272f1..1a2adb3547182 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -71,21 +71,24 @@ void mpc32_power_on_blnd_lut(
 {
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
 
+/*
 	if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) {
 		if (power_on) {
 			REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
 			REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
 		} else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
-			ASSERT(false);
-			/* TODO: change to mpc
-			 *  dpp_base->ctx->dc->optimized_required = true;
-			 *  dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
-			 */
+			//TODO: change to mpc
+			dpp_base->ctx->dc->optimized_required = true;
+			dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
 		}
 	} else {
 		REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0,
 				MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1);
 	}
+*/
+
+	REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0,
+			MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1);
 }
 
 static enum dc_lut_mode mpc32_get_post1dlut_current(struct mpc *mpc, uint32_t mpcc_id)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
index c7e011d26d417..7a3faf2b1f068 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
@@ -724,7 +724,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 			.i2c = true,
 			.dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
 			.dscl = true,
-			.cm = false,
+			.cm = true,
 			.mpc = true,
 			.optc = true,
 			.vpg = true,
-- 
GitLab


From c41028a2a16303e5a59e11338d6ef5475945c79d Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Thu, 9 Nov 2023 16:42:32 -0500
Subject: [PATCH 0297/2340] drm/amd/display: add a debugfs interface for the
 DMUB trace mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For features that are implemented primarily in DMUB (e.g. PSR), it is
useful to be able to trace them at a DMUB level from the kernel,
especially when debugging issues. So, introduce a debugfs interface that
is able to read and set the DMUB trace mask dynamically at runtime and
document how to use it.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 Documentation/gpu/amdgpu/display/dc-debug.rst |  41 +++++++
 .../gpu/amdgpu/display/trace-groups-table.csv |  29 +++++
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 101 ++++++++++++++++++
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  40 ++++++-
 4 files changed, 209 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/gpu/amdgpu/display/trace-groups-table.csv

diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst
index 40c55a6189187..817631b1dbf33 100644
--- a/Documentation/gpu/amdgpu/display/dc-debug.rst
+++ b/Documentation/gpu/amdgpu/display/dc-debug.rst
@@ -75,3 +75,44 @@ change in real-time by using something like::
 
 When reporting a bug related to DC, consider attaching this log before and
 after you reproduce the bug.
+
+DMUB Firmware Debug
+===================
+
+Sometimes, dmesg logs aren't enough. This is especially true if a feature is
+implemented primarily in DMUB firmware. In such cases, all we see in dmesg when
+an issue arises is some generic timeout error. So, to get more relevant
+information, we can trace DMUB commands by enabling the relevant bits in
+`amdgpu_dm_dmub_trace_mask`.
+
+Currently, we support the tracing of the following groups:
+
+Trace Groups
+------------
+
+.. csv-table::
+   :header-rows: 1
+   :widths: 1, 1
+   :file: ./trace-groups-table.csv
+
+**Note: Not all ASICs support all of the listed trace groups**
+
+So, to enable just PSR tracing you can use the following command::
+
+  # echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask
+
+Then, you need to enable logging trace events to the buffer, which you can do
+using the following::
+
+  # echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+
+Lastly, after you are able to reproduce the issue you are trying to debug,
+you can disable tracing and read the trace log by using the following::
+
+  # echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+  # cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer
+
+So, when reporting bugs related to features such as PSR and ABM, consider
+enabling the relevant bits in the mask before reproducing the issue and
+attach the log that you obtain from the trace buffer in any bug reports that you
+create.
diff --git a/Documentation/gpu/amdgpu/display/trace-groups-table.csv b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
new file mode 100644
index 0000000000000..3f6a50d1d8831
--- /dev/null
+++ b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
@@ -0,0 +1,29 @@
+Name, Mask Value
+INFO, 0x1
+IRQ SVC, 0x2
+VBIOS, 0x4
+REGISTER, 0x8
+PHY DBG, 0x10
+PSR, 0x20
+AUX, 0x40
+SMU, 0x80
+MALL, 0x100
+ABM, 0x200
+ALPM, 0x400
+TIMER, 0x800
+HW LOCK MGR, 0x1000
+INBOX1, 0x2000
+PHY SEQ, 0x4000
+PSR STATE, 0x8000
+ZSTATE, 0x10000
+TRANSMITTER CTL, 0x20000
+PANEL CNTL, 0x40000
+FAMS, 0x80000
+DPIA, 0x100000
+SUBVP, 0x200000
+INBOX0, 0x400000
+SDP, 0x4000000
+REPLAY, 0x8000000
+REPLAY RESIDENCY, 0x20000000
+CURSOR INFO, 0x80000000
+IPS, 0x100000000
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 45c972f2630d8..98b41ec7288e8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -2971,6 +2971,104 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val)
 	return 0;
 }
 
+static int dmub_trace_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = data;
+	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+	enum dmub_gpint_command cmd;
+	enum dmub_status status;
+	u64 mask = 0xffff;
+	u8 shift = 0;
+	u32 res;
+	int i;
+
+	if (!srv->fw_version)
+		return -EINVAL;
+
+	for (i = 0;  i < 4; i++) {
+		res = (val & mask) >> shift;
+
+		switch (i) {
+		case 0:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0;
+			break;
+		case 1:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1;
+			break;
+		case 2:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2;
+			break;
+		case 3:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3;
+			break;
+		}
+
+		status = dmub_srv_send_gpint_command(srv, cmd, res, 30);
+
+		if (status == DMUB_STATUS_TIMEOUT)
+			return -ETIMEDOUT;
+		else if (status == DMUB_STATUS_INVALID)
+			return -EINVAL;
+		else if (status != DMUB_STATUS_OK)
+			return -EIO;
+
+		usleep_range(100, 1000);
+
+		mask <<= 16;
+		shift += 16;
+	}
+
+	return 0;
+}
+
+static int dmub_trace_mask_show(void *data, u64 *val)
+{
+	enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
+	struct amdgpu_device *adev = data;
+	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+	enum dmub_status status;
+	u8 shift = 0;
+	u64 raw = 0;
+	u64 res = 0;
+	int i = 0;
+
+	if (!srv->fw_version)
+		return -EINVAL;
+
+	while (i < 4) {
+		status = dmub_srv_send_gpint_command(srv, cmd, 0, 30);
+
+		if (status == DMUB_STATUS_OK) {
+			status = dmub_srv_get_gpint_response(srv, (u32 *) &raw);
+
+			if (status == DMUB_STATUS_INVALID)
+				return -EINVAL;
+			else if (status != DMUB_STATUS_OK)
+				return -EIO;
+		} else if (status == DMUB_STATUS_TIMEOUT) {
+			return -ETIMEDOUT;
+		} else if (status == DMUB_STATUS_INVALID) {
+			return -EINVAL;
+		} else {
+			return -EIO;
+		}
+
+		usleep_range(100, 1000);
+
+		cmd++;
+		res |= (raw << shift);
+		shift += 16;
+		i++;
+	}
+
+	*val = res;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show,
+			 dmub_trace_mask_set, "0x%llx\n");
+
 /*
  * Set dmcub trace event IRQ enable or disable.
  * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
@@ -3884,6 +3982,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
 	debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root,
 				   adev, &force_timing_sync_ops);
 
+	debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root,
+				   adev, &dmub_trace_mask_fops);
+
 	debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root,
 				   adev, &dmcub_trace_event_state_fops);
 
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index ed4379c047151..aa6e6923afede 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -818,18 +818,54 @@ enum dmub_gpint_command {
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101,
+
 	/**
-	 * DESC: Updates the trace buffer lower 32-bit mask.
+	 * DESC: Updates the trace buffer mask bit0~bit15.
 	 * ARGS: The new mask
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102,
+
 	/**
-	 * DESC: Updates the trace buffer mask bi0~bit15.
+	 * DESC: Updates the trace buffer mask bit16~bit31.
 	 * ARGS: The new mask
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit32~bit47.
+	 * ARGS: The new mask
+	 * RETURN: Lower 32-bit mask.
+	 */
+	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit48~bit63.
+	 * ARGS: The new mask
+	 * RETURN: Lower 32-bit mask.
+	 */
+	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115,
+
+	/**
+	 * DESC: Read the trace buffer mask bi0~bit15.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116,
+
+	/**
+	 * DESC: Read the trace buffer mask bit16~bit31.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117,
+
+	/**
+	 * DESC: Read the trace buffer mask bi32~bit47.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit32~bit63.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119,
 };
 
 /**
-- 
GitLab


From 8b8eed05a1c650c27e78bc47d07f7d6c9ba779e8 Mon Sep 17 00:00:00 2001
From: Mounika Adhuri <moadhuri@amd.com>
Date: Fri, 6 Oct 2023 15:05:42 +0530
Subject: [PATCH 0298/2340] drm/amd/display: Refactor resource into component
 directory

[WHY]
Move all resource files to unique folder resource.

[HOW]
Created resource folder in dc, moved the
dcnxx_resource.c and dcnxx_resource.h files into
corresponding new folders inside the resource and
made appropriate changes for compilation in Makefiles.

Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Mounika Adhuri <moadhuri@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/Makefile          |   1 +
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |   2 +-
 drivers/gpu/drm/amd/display/dc/Makefile       |   5 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |   4 +-
 .../gpu/drm/amd/display/dc/dce100/Makefile    |  46 ----
 .../gpu/drm/amd/display/dc/dce110/Makefile    |   4 +-
 .../gpu/drm/amd/display/dc/dce112/Makefile    |   3 +-
 .../gpu/drm/amd/display/dc/dce120/Makefile    |   2 +-
 drivers/gpu/drm/amd/display/dc/dce80/Makefile |   3 +-
 drivers/gpu/drm/amd/display/dc/dcn10/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn201/Makefile    |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn21/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile |   1 -
 .../gpu/drm/amd/display/dc/dcn301/Makefile    |   2 +-
 .../gpu/drm/amd/display/dc/dcn302/Makefile    |   2 +-
 .../gpu/drm/amd/display/dc/dcn303/Makefile    |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn314/Makefile    |   2 +-
 .../gpu/drm/amd/display/dc/dcn315/Makefile    |  30 ---
 .../gpu/drm/amd/display/dc/dcn316/Makefile    |  30 ---
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile |   2 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c |   2 +-
 .../gpu/drm/amd/display/dc/dcn321/Makefile    |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn35/Makefile |   2 +-
 .../drm/amd/display/dc/dml/calcs/dcn_calcs.c  |   2 +-
 .../gpu/drm/amd/display/dc/resource/Makefile  | 199 ++++++++++++++++++
 .../{ => resource}/dce100/dce100_resource.c   |   0
 .../{ => resource}/dce100/dce100_resource.h   |   0
 .../{ => resource}/dce110/dce110_resource.c   |   0
 .../{ => resource}/dce110/dce110_resource.h   |   0
 .../{ => resource}/dce112/dce112_resource.c   |   0
 .../{ => resource}/dce112/dce112_resource.h   |   0
 .../{ => resource}/dce120/dce120_resource.c   |   2 +-
 .../{ => resource}/dce120/dce120_resource.h   |   0
 .../display/dc/resource/dce80/CMakeLists.txt  |   4 +
 .../dc/{ => resource}/dce80/dce80_resource.c  |   0
 .../dc/{ => resource}/dce80/dce80_resource.h  |   0
 .../dc/{ => resource}/dcn10/dcn10_resource.c  |  25 ++-
 .../dc/{ => resource}/dcn10/dcn10_resource.h  |   0
 .../dc/{ => resource}/dcn20/dcn20_resource.c  |  24 +--
 .../dc/{ => resource}/dcn20/dcn20_resource.h  |   0
 .../{ => resource}/dcn201/dcn201_resource.c   |  14 +-
 .../{ => resource}/dcn201/dcn201_resource.h   |   0
 .../dc/{ => resource}/dcn21/dcn21_resource.c  |   6 +-
 .../dc/{ => resource}/dcn21/dcn21_resource.h  |   0
 .../dc/{ => resource}/dcn30/dcn30_resource.c  |   2 +-
 .../dc/{ => resource}/dcn30/dcn30_resource.h  |   0
 .../{ => resource}/dcn301/dcn301_resource.c   |   4 +-
 .../{ => resource}/dcn301/dcn301_resource.h   |   0
 .../{ => resource}/dcn302/dcn302_resource.c   |   4 +-
 .../{ => resource}/dcn302/dcn302_resource.h   |   0
 .../{ => resource}/dcn303/dcn303_resource.c   |   4 +-
 .../{ => resource}/dcn303/dcn303_resource.h   |   0
 .../dc/{ => resource}/dcn31/dcn31_resource.c  |   2 +-
 .../dc/{ => resource}/dcn31/dcn31_resource.h  |   0
 .../{ => resource}/dcn314/dcn314_resource.c   |   0
 .../{ => resource}/dcn314/dcn314_resource.h   |   0
 .../{ => resource}/dcn315/dcn315_resource.c   |   0
 .../{ => resource}/dcn315/dcn315_resource.h   |   0
 .../{ => resource}/dcn316/dcn316_resource.c   |   0
 .../{ => resource}/dcn316/dcn316_resource.h   |   0
 .../dc/{ => resource}/dcn32/dcn32_resource.c  |   4 +-
 .../dc/{ => resource}/dcn32/dcn32_resource.h  |   0
 .../{ => resource}/dcn321/dcn321_resource.c   |   2 +-
 .../{ => resource}/dcn321/dcn321_resource.h   |   0
 .../dc/{ => resource}/dcn35/dcn35_resource.c  |   2 +-
 .../dc/{ => resource}/dcn35/dcn35_resource.h  |   0
 68 files changed, 276 insertions(+), 181 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/display/dc/dce100/Makefile
 delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn315/Makefile
 delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn316/Makefile
 create mode 100644 drivers/gpu/drm/amd/display/dc/resource/Makefile
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce100/dce100_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce100/dce100_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce110/dce110_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce110/dce110_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce112/dce112_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce112/dce112_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce120/dce120_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce120/dce120_resource.h (100%)
 create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce80/dce80_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dce80/dce80_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn10/dcn10_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn10/dcn10_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn20/dcn20_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn20/dcn20_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn201/dcn201_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn201/dcn201_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn21/dcn21_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn21/dcn21_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn30/dcn30_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn30/dcn30_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn301/dcn301_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn301/dcn301_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn302/dcn302_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn302/dcn302_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn303/dcn303_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn303/dcn303_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn31/dcn31_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn31/dcn31_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn314/dcn314_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn314/dcn314_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn315/dcn315_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn315/dcn315_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn316/dcn316_resource.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn316/dcn316_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn32/dcn32_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn32/dcn32_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn321/dcn321_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn321/dcn321_resource.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn35/dcn35_resource.c (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => resource}/dcn35/dcn35_resource.h (100%)

diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index af17ab8027dfd..71192fc81a20e 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -30,6 +30,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 11da0eebee6c4..ec87e31c6fe88 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -44,7 +44,7 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 
-#include "dc/dcn20/dcn20_resource.h"
+#include "dc/resource/dcn20/dcn20_resource.h"
 
 #define PEAK_FACTOR_X1000 1006
 
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 3a169b78e7e43..ab51a065cf0e5 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for Display Core (dc) component.
 
-DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource
 
 ifdef CONFIG_DRM_AMD_DC_FP
 
@@ -38,8 +38,6 @@ DC_LIBS += dcn302
 DC_LIBS += dcn303
 DC_LIBS += dcn31
 DC_LIBS += dcn314
-DC_LIBS += dcn315
-DC_LIBS += dcn316
 DC_LIBS += dcn32
 DC_LIBS += dcn321
 DC_LIBS += dcn35
@@ -51,7 +49,6 @@ DC_LIBS += dce120
 
 DC_LIBS += dce112
 DC_LIBS += dce110
-DC_LIBS += dce100
 DC_LIBS += dce80
 
 ifdef CONFIG_DRM_AMD_DC_SI
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index a1f1d10039927..6159d819c5c57 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -69,8 +69,8 @@
 #include "dcn314/dcn314_resource.h"
 #include "dcn315/dcn315_resource.h"
 #include "dcn316/dcn316_resource.h"
-#include "../dcn32/dcn32_resource.h"
-#include "../dcn321/dcn321_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
 #include "dcn35/dcn35_resource.h"
 
 #define VISUAL_CONFIRM_BASE_DEFAULT 3
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile
deleted file mode 100644
index 0d2f6bbf75585..0000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Copyright 2017 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Makefile for the 'controller' sub-component of DAL.
-# It provides the control and status of HW CRTC block.
-
-CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init)
-
-DCE100 = dce100_resource.o
-
-AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCE100)
-
-
-###############################################################################
-# DCE 10x
-###############################################################################
-ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0
-TG_DCE100 = dce100_resource.o
-
-AMD_DAL_TG_DCE100 = $(addprefix \
-	$(AMDDALPATH)/dc/dce100/,$(TG_DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100)
-endif
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index 695a50ed5ad26..f0777d61c2cbb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -26,8 +26,8 @@
 CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
 
 DCE110 = dce110_timing_generator.o \
-dce110_compressor.o dce110_resource.o \
-dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \
+dce110_compressor.o dce110_opp_regamma_v.o \
+dce110_opp_csc_v.o dce110_timing_generator_v.o \
 dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o
 
 AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110))
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index e846ef58cab3f..7e92effec8944 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
 
-DCE112 = dce112_compressor.o \
-dce112_resource.o
+DCE112 = dce112_compressor.o
 
 AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index 097cf407a15d1..1e3ef68a452a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -26,7 +26,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
 
-DCE120 = dce120_resource.o dce120_timing_generator.o \
+DCE120 = dce120_timing_generator.o
 
 AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 93dd68c31275b..7eefffbdc9253 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
 
-DCE80 = dce80_timing_generator.o \
-	dce80_resource.o
+DCE80 = dce80_timing_generator.o
 
 AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 2d2007c3e2b61..1eb7418ced3aa 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for DCN.
 
-DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o \
+DCN10 = dcn10_init.o dcn10_ipp.o \
 		dcn10_hw_sequencer_debug.o \
 		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
 		dcn10_hubp.o dcn10_mpc.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index d7dc9696a8c82..1cac1eca8111e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for DCN.
 
-DCN20 = dcn20_resource.o dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
+DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
 		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index 3a41a97b0729c..c069a894db925 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for DCN.
-DCN201 = dcn201_init.o dcn201_resource.o \
+DCN201 = dcn201_init.o \
 	dcn201_hubbub.o\
 	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \
 	dcn201_dccg.o dcn201_link_encoder.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index ce1be0afae4a9..dd1eea7212f47 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for DCN21.
 
-DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
+DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o \
 	 dcn21_link_encoder.o dcn21_dccg.o
 
 AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index af4d2065d2c13..9dcf06c0954d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -38,7 +38,6 @@ DCN30 := \
 	dcn30_dwb_cm.o \
 	dcn30_cm_common.o \
 	dcn30_mmhubbub.o \
-	dcn30_resource.o \
 	dcn30_dio_link_encoder.o
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 30fbc5e06dcaa..cbf59d7e78c4f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn30.
 
-DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
+DCN301 = dcn301_init.o dcn301_dccg.o \
 		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \
 		dcn301_optc.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
index 95b66baf39e91..0fcd03569d74e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
@@ -5,7 +5,7 @@
 #
 # Makefile for dcn302.
 
-DCN3_02 = dcn302_init.o dcn302_resource.o
+DCN3_02 = dcn302_init.o
 
 AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
index d7b3ad780e5db..a954e316aca25 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
@@ -6,7 +6,7 @@
 #
 # Makefile for dcn303.
 
-DCN3_03 = dcn303_init.o dcn303_resource.o
+DCN3_03 = dcn303_init.o
 
 AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index 96e45c9efb461..212287008c0a6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn31.
 
-DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \
+DCN31 = dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \
 	dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
 	dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
 	dcn31_afmt.o dcn31_vpg.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index 72456debb99f9..6ea47e00d62d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn314.
 
-DCN314 = dcn314_resource.o dcn314_init.o \
+DCN314 = dcn314_init.o \
 		dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
 
 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
deleted file mode 100644
index 59381d24800b3..0000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Copyright © 2021 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# Authors: AMD
-#
-# Makefile for dcn315.
-
-DCN315 = dcn315_resource.o
-
-AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
deleted file mode 100644
index 819d44a9439b0..0000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Copyright 2021 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# Authors: AMD
-#
-# Makefile for dcn316.
-
-DCN316 = dcn316_resource.o
-
-AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 8bb251307247e..3bb17dd01e4ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn32.
 
-DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \
+DCN32 = dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \
 		dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \
 		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \
 		dcn32_resource_helpers.o dcn32_mpc.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index bc5f0db23d0c3..ef0a2b01734d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -24,7 +24,7 @@
  */
 
 // header file of functions being implemented
-#include "dcn32_resource.h"
+#include "dcn32/dcn32_resource.h"
 #include "dcn20/dcn20_resource.h"
 #include "dml/dcn32/display_mode_vba_util_32.h"
 #include "dml/dcn32/dcn32_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
index 0a199c83bb5b3..c195c47f58b46 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn321.
 
-DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o
+DCN321 = dcn321_dio_link_encoder.o
 
 AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
index 20d0eef1a13bf..719afb5a3b127 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for DCN35.
 
-DCN35 = dcn35_resource.o dcn35_init.o dcn35_dio_stream_encoder.o \
+DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \
 	dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \
 	dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \
 	dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
index 50b0434354f8f..0c4a8fe8e5ca6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -30,7 +30,7 @@
 #include "dcn_calc_auto.h"
 #include "dal_asic_id.h"
 #include "resource.h"
-#include "dcn10/dcn10_resource.h"
+#include "resource/dcn10/dcn10_resource.h"
 #include "dcn10/dcn10_hubbub.h"
 #include "dml/dml1_display_rq_dlg_calc.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile
new file mode 100644
index 0000000000000..0a75ed8962a55
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile
@@ -0,0 +1,199 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'resource' sub-component of DAL.
+#
+
+
+###############################################################################
+#  DCE
+###############################################################################
+
+RESOURCE_DCE100 = dce100_resource.o
+
+AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE100)
+
+###############################################################################
+
+RESOURCE_DCE110 = dce110_resource.o
+
+AMD_DAL_RESOURCE_DCE110 = $(addprefix $(AMDDALPATH)/dc/resource/dce110/,$(RESOURCE_DCE110))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE110)
+
+###############################################################################
+
+RESOURCE_DCE112 = dce112_resource.o
+
+AMD_DAL_RESOURCE_DCE112 = $(addprefix $(AMDDALPATH)/dc/resource/dce112/,$(RESOURCE_DCE112))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE112)
+
+###############################################################################
+
+RESOURCE_DCE120 = dce120_resource.o
+
+AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOURCE_DCE120))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120)
+
+###############################################################################
+
+RESOURCE_DCE80 = dce80_resource.o
+
+AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80)
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+RESOURCE_DCN10 = dcn10_resource.o
+
+AMD_DAL_RESOURCE_DCN10 = $(addprefix $(AMDDALPATH)/dc/resource/dcn10/,$(RESOURCE_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN10)
+
+###############################################################################
+
+RESOURCE_DCN20 = dcn20_resource.o
+
+AMD_DAL_RESOURCE_DCN20 = $(addprefix $(AMDDALPATH)/dc/resource/dcn20/,$(RESOURCE_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN20)
+
+###############################################################################
+
+RESOURCE_DCN201 = dcn201_resource.o
+
+AMD_DAL_RESOURCE_DCN201 = $(addprefix $(AMDDALPATH)/dc/resource/dcn201/,$(RESOURCE_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN201)
+
+###############################################################################
+
+RESOURCE_DCN21 = dcn21_resource.o
+
+AMD_DAL_RESOURCE_DCN21 = $(addprefix $(AMDDALPATH)/dc/resource/dcn21/,$(RESOURCE_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN21)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+RESOURCE_DCN30 = dcn30_resource.o
+
+AMD_DAL_RESOURCE_DCN30 = $(addprefix $(AMDDALPATH)/dc/resource/dcn30/,$(RESOURCE_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN30)
+
+###############################################################################
+
+RESOURCE_DCN301 = dcn301_resource.o
+
+AMD_DAL_RESOURCE_DCN301 = $(addprefix $(AMDDALPATH)/dc/resource/dcn301/,$(RESOURCE_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN301)
+
+###############################################################################
+
+RESOURCE_DCN302 = dcn302_resource.o
+
+AMD_DAL_RESOURCE_DCN302 = $(addprefix $(AMDDALPATH)/dc/resource/dcn302/,$(RESOURCE_DCN302))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN302)
+
+###############################################################################
+
+RESOURCE_DCN303 = dcn303_resource.o
+
+AMD_DAL_RESOURCE_DCN303 = $(addprefix $(AMDDALPATH)/dc/resource/dcn303/,$(RESOURCE_DCN303))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN303)
+
+###############################################################################
+
+RESOURCE_DCN31 = dcn31_resource.o
+
+AMD_DAL_RESOURCE_DCN31 = $(addprefix $(AMDDALPATH)/dc/resource/dcn31/,$(RESOURCE_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN31)
+
+###############################################################################
+
+RESOURCE_DCN314 = dcn314_resource.o
+
+AMD_DAL_RESOURCE_DCN314 = $(addprefix $(AMDDALPATH)/dc/resource/dcn314/,$(RESOURCE_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN314)
+
+###############################################################################
+
+RESOURCE_DCN315 = dcn315_resource.o
+
+AMD_DAL_RESOURCE_DCN315 = $(addprefix $(AMDDALPATH)/dc/resource/dcn315/,$(RESOURCE_DCN315))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN315)
+
+###############################################################################
+
+RESOURCE_DCN316 = dcn316_resource.o
+
+AMD_DAL_RESOURCE_DCN316 = $(addprefix $(AMDDALPATH)/dc/resource/dcn316/,$(RESOURCE_DCN316))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN316)
+
+###############################################################################
+
+RESOURCE_DCN32 = dcn32_resource.o
+
+AMD_DAL_RESOURCE_DCN32 = $(addprefix $(AMDDALPATH)/dc/resource/dcn32/,$(RESOURCE_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN32)
+
+###############################################################################
+
+RESOURCE_DCN321 = dcn321_resource.o
+
+AMD_DAL_RESOURCE_DCN321 = $(addprefix $(AMDDALPATH)/dc/resource/dcn321/,$(RESOURCE_DCN321))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN321)
+
+###############################################################################
+
+RESOURCE_DCN35 = dcn35_resource.o
+
+AMD_DAL_RESOURCE_DCN35 = $(addprefix $(AMDDALPATH)/dc/resource/dcn35/,$(RESOURCE_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN35)
+
+###############################################################################
+
+###############################################################################
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
index 962de79be169a..20662edd0ae45 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -36,7 +36,7 @@
 
 #include "dce110/dce110_resource.h"
 #include "virtual/virtual_stream_encoder.h"
-#include "dce120_timing_generator.h"
+#include "dce120/dce120_timing_generator.h"
 #include "irq/dce120/irq_service_dce120.h"
 #include "dce/dce_opp.h"
 #include "dce/dce_clock_source.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
new file mode 100644
index 0000000000000..19dd73bc9ab03
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
@@ -0,0 +1,4 @@
+dal3_subdirectory_sources(
+  dce80_resource.c
+  dce80_resource.h
+  )
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index b94c5c97eee77..af1b31f4e69a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -26,29 +26,32 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn10_init.h"
+#include "dcn10/dcn10_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
-#include "dcn10_resource.h"
-#include "dcn10_ipp.h"
-#include "dcn10_mpc.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn10/dcn10_ipp.h"
+#include "dcn10/dcn10_mpc.h"
+
+#include "dcn10/dcn10_dwb.h"
+
 #include "irq/dcn10/irq_service_dcn10.h"
-#include "dcn10_dpp.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_dpp.h"
+#include "dcn10/dcn10_optc.h"
 #include "dcn10/dcn10_hwseq.h"
 #include "dce110/dce110_hwseq.h"
-#include "dcn10_opp.h"
-#include "dcn10_link_encoder.h"
-#include "dcn10_stream_encoder.h"
+#include "dcn10/dcn10_opp.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn10/dcn10_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dce112/dce112_resource.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
 #include "dce/dce_panel_cntl.h"
 
 #include "soc15_hw_ip.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index 0a422fbb14bc8..f04bb5b1471dd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -29,7 +29,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn20_init.h"
+#include "dcn20/dcn20_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -39,29 +39,29 @@
 
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
-#include "dcn20_hubbub.h"
-#include "dcn20_mpc.h"
-#include "dcn20_hubp.h"
+#include "dcn20/dcn20_hubbub.h"
+#include "dcn20/dcn20_mpc.h"
+#include "dcn20/dcn20_hubp.h"
 #include "irq/dcn20/irq_service_dcn20.h"
-#include "dcn20_dpp.h"
-#include "dcn20_optc.h"
+#include "dcn20/dcn20_dpp.h"
+#include "dcn20/dcn20_optc.h"
 #include "dcn20/dcn20_hwseq.h"
 #include "dce110/dce110_hwseq.h"
 #include "dcn10/dcn10_resource.h"
-#include "dcn20_opp.h"
+#include "dcn20/dcn20_opp.h"
 
-#include "dcn20_dsc.h"
+#include "dcn20/dcn20_dsc.h"
 
-#include "dcn20_link_encoder.h"
-#include "dcn20_stream_encoder.h"
+#include "dcn20/dcn20_link_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
-#include "dcn20_dccg.h"
-#include "dcn20_vmid.h"
+#include "dcn20/dcn20_dccg.h"
+#include "dcn20/dcn20_vmid.h"
 #include "dce/dce_panel_cntl.h"
 
 #include "navi10_ip_offset.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
index bca22d8676969..914b234d7f6b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
@@ -26,7 +26,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn201_init.h"
+#include "dcn201/dcn201_init.h"
 #include "dml/dcn20/dcn20_fpu.h"
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -36,16 +36,16 @@
 
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
-#include "dcn201_mpc.h"
-#include "dcn201_hubp.h"
+#include "dcn201/dcn201_mpc.h"
+#include "dcn201/dcn201_hubp.h"
 #include "irq/dcn201/irq_service_dcn201.h"
 #include "dcn201/dcn201_dpp.h"
 #include "dcn201/dcn201_hubbub.h"
-#include "dcn201_dccg.h"
-#include "dcn201_optc.h"
+#include "dcn201/dcn201_dccg.h"
+#include "dcn201/dcn201_optc.h"
 #include "dcn201/dcn201_hwseq.h"
 #include "dce110/dce110_hwseq.h"
-#include "dcn201_opp.h"
+#include "dcn201/dcn201_opp.h"
 #include "dcn201/dcn201_link_encoder.h"
 #include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
@@ -55,7 +55,7 @@
 #include "dce110/dce110_resource.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
-#include "dcn201_hubbub.h"
+#include "dcn201/dcn201_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 
 #include "cyan_skillfish_ip_offset.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index 42277b280586e..c07da45e1e2c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -29,7 +29,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn21_init.h"
+#include "dcn21/dcn21_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -44,7 +44,7 @@
 #include "dcn20/dcn20_hubbub.h"
 #include "dcn20/dcn20_mpc.h"
 #include "dcn20/dcn20_hubp.h"
-#include "dcn21_hubp.h"
+#include "dcn21/dcn21_hubp.h"
 #include "irq/dcn21/irq_service_dcn21.h"
 #include "dcn20/dcn20_dpp.h"
 #include "dcn20/dcn20_optc.h"
@@ -61,7 +61,7 @@
 #include "dml/display_mode_vba.h"
 #include "dcn20/dcn20_dccg.h"
 #include "dcn21/dcn21_dccg.h"
-#include "dcn21_hubbub.h"
+#include "dcn21/dcn21_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 #include "dce/dce_panel_cntl.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 7b259cb5f4187..2b6dcb489b901 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn30_init.h"
+#include "dcn30/dcn30_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index f3b75f283aa25..511ff6b5b9856 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn301_init.h"
+#include "dcn301/dcn301_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -61,7 +61,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn30/dcn30_dio_stream_encoder.h"
 #include "dcn301/dcn301_dio_link_encoder.h"
-#include "dcn301_panel_cntl.h"
+#include "dcn301/dcn301_panel_cntl.h"
 
 #include "vangogh_ip_offset.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
index 63ac984a04f7e..5791b5cc28752 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -23,9 +23,9 @@
  *
  */
 
-#include "dcn302_init.h"
+#include "dcn302/dcn302_init.h"
 #include "dcn302_resource.h"
-#include "dcn302_dccg.h"
+#include "dcn302/dcn302_dccg.h"
 #include "irq/dcn302/irq_service_dcn302.h"
 
 #include "dcn30/dcn30_dio_link_encoder.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index 49cb7fde416a4..25cd6236b054e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -23,9 +23,9 @@
  * Authors: AMD
  */
 
-#include "dcn303_init.h"
+#include "dcn303/dcn303_init.h"
 #include "dcn303_resource.h"
-#include "dcn303_dccg.h"
+#include "dcn303/dcn303_dccg.h"
 #include "irq/dcn303/irq_service_dcn303.h"
 
 #include "dcn30/dcn30_dio_link_encoder.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index 79416cfb22f09..31035fc3d8686 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -70,7 +70,7 @@
 #include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
-#include "dcn31_panel_cntl.h"
+#include "dcn31/dcn31_panel_cntl.h"
 
 #include "dcn30/dcn30_dwb.h"
 #include "dcn30/dcn30_mmhubbub.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 89b072447dba9..36e4c7bef403c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn32_init.h"
+#include "dcn32/dcn32_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -41,7 +41,7 @@
 #include "dcn31/dcn31_hubbub.h"
 #include "dcn32/dcn32_hubbub.h"
 #include "dcn32/dcn32_mpc.h"
-#include "dcn32_hubp.h"
+#include "dcn32/dcn32_hubp.h"
 #include "irq/dcn32/irq_service_dcn32.h"
 #include "dcn32/dcn32_dpp.h"
 #include "dcn32/dcn32_optc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index f7de3eca1225a..bedb70b98162b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -63,7 +63,7 @@
 #include "dcn31/dcn31_apg.h"
 #include "dcn31/dcn31_dio_link_encoder.h"
 #include "dcn32/dcn32_dio_link_encoder.h"
-#include "dcn321_dio_link_encoder.h"
+#include "dcn321/dcn321_dio_link_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
rename to drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 7a3faf2b1f068..a4027bf053c5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -78,7 +78,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
 #include "dcn35/dcn35_hwseq.h"
-#include "dcn35_dio_link_encoder.h"
+#include "dcn35/dcn35_dio_link_encoder.h"
 #include "dml/dcn31/dcn31_fpu.h" /*todo*/
 #include "dml/dcn35/dcn35_fpu.h"
 #include "dcn35/dcn35_dwb.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h
rename to drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
-- 
GitLab


From 68cfc5d8e459f50e5f46dca3b0f3c97a75f39975 Mon Sep 17 00:00:00 2001
From: Wenjing Liu <wenjing.liu@amd.com>
Date: Fri, 20 Oct 2023 16:26:19 -0400
Subject: [PATCH 0299/2340] drm/amd/display: Try to acquire a free OTG master
 not used in cur ctx first

[WHY & HOW]
The current otg master pipe allocation logic is not optimized based
current resource context. We should try to acquire a free OTG master not
used in cur cts first to avoid unnecessary pipe switch from current
state.

Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 6159d819c5c57..49f8c90f0d2bf 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3586,6 +3586,7 @@ static void mark_seamless_boot_stream(
  *       |________|_______________|___________|_____________|
  */
 static bool acquire_otg_master_pipe_for_stream(
+		const struct dc_state *cur_ctx,
 		struct dc_state *new_ctx,
 		const struct resource_pool *pool,
 		struct dc_stream_state *stream)
@@ -3599,7 +3600,10 @@ static bool acquire_otg_master_pipe_for_stream(
 	int pipe_idx;
 	struct pipe_ctx *pipe_ctx = NULL;
 
-	pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
+	pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+			&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+		pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
 	if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) {
 		pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx];
 		memset(pipe_ctx, 0, sizeof(*pipe_ctx));
@@ -3659,7 +3663,7 @@ enum dc_status resource_map_pool_resources(
 
 	if (!acquired)
 		/* acquire new resources */
-		acquired = acquire_otg_master_pipe_for_stream(
+		acquired = acquire_otg_master_pipe_for_stream(dc->current_state,
 				context, pool, stream);
 
 	pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
-- 
GitLab


From 673d6d73eba79a1205ac403b68ef63da1c823da2 Mon Sep 17 00:00:00 2001
From: Wenjing Liu <wenjing.liu@amd.com>
Date: Mon, 30 Oct 2023 15:21:12 -0400
Subject: [PATCH 0300/2340] drm/amd/display: Prefer currently used OTG master
 when acquiring free pipe

[WHY & HOW]
When acquiring an OTG master pipe we should prefer currently enabled OTG
master pipes first. If there are no free pipes used as current OTG
master pipe then we will try to acquire a currently unused free pipe as
new OTG master instead of tearing down current secondary pipes from ODM
or MPC combine.

Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Reviewed-by: Dillon Varone <dillon.varone@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/core/dc_resource.c | 37 ++++++++++++++++++-
 drivers/gpu/drm/amd/display/dc/inc/resource.h | 12 ++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 49f8c90f0d2bf..42a9277107430 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1764,6 +1764,29 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
 	return free_pipe_idx;
 }
 
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+		const struct resource_context *cur_res_ctx,
+		struct resource_context *new_res_ctx,
+		const struct resource_pool *pool)
+{
+	int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+	const struct pipe_ctx *new_pipe, *cur_pipe;
+	int i;
+
+	for (i = 0; i < pool->pipe_count; i++) {
+		cur_pipe = &cur_res_ctx->pipe_ctx[i];
+		new_pipe = &new_res_ctx->pipe_ctx[i];
+
+		if (resource_is_pipe_type(cur_pipe, OTG_MASTER) &&
+				resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+			free_pipe_idx = i;
+			break;
+		}
+	}
+
+	return free_pipe_idx;
+}
+
 int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine(
 		const struct resource_context *cur_res_ctx,
 		struct resource_context *new_res_ctx,
@@ -3600,8 +3623,20 @@ static bool acquire_otg_master_pipe_for_stream(
 	int pipe_idx;
 	struct pipe_ctx *pipe_ctx = NULL;
 
-	pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+	/*
+	 * Upper level code is responsible to optimize unnecessary addition and
+	 * removal for unchanged streams. So unchanged stream will keep the same
+	 * OTG master instance allocated. When current stream is removed and a
+	 * new stream is added, we want to reuse the OTG instance made available
+	 * by the removed stream first. If not found, we try to avoid of using
+	 * any free pipes already used in current context as this could tear
+	 * down exiting ODM/MPC/MPO configuration unnecessarily.
+	 */
+	pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
 			&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+		pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+				&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
 	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
 		pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
 	if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 06ca8bfb91e7d..0458d2d749f47 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -500,6 +500,18 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
 		struct resource_context *new_res_ctx,
 		const struct resource_pool *pool);
 
+/*
+ * Look for a free pipe in new resource context that is used in current resource
+ * context as an OTG master pipe.
+ *
+ * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
+ * pipe idx of the free pipe
+ */
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+		const struct resource_context *cur_res_ctx,
+		struct resource_context *new_res_ctx,
+		const struct resource_pool *pool);
+
 /*
  * Look for a free pipe in new resource context that is used as a secondary DPP
  * pipe in any MPCC combine in current resource context.
-- 
GitLab


From 5f70d4ff8095a2ad362d2a00eb8d9f7e20f3daa1 Mon Sep 17 00:00:00 2001
From: Daniel Miess <daniel.miess@amd.com>
Date: Thu, 12 Oct 2023 12:55:47 -0400
Subject: [PATCH 0301/2340] drm/amd/display: Enable DCN clock gating for DCN35

[WHY & HOW]
Enable DCN clock gating for DCN35.
Disable DTBCLK gate before link training
and re-enable afterwards

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Daniel Miess <daniel.miess@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h |  6 ++-
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 30 +++++++++++++
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h |  7 +++-
 .../drm/amd/display/dc/dcn35/dcn35_pg_cntl.c  | 10 +----
 .../drm/amd/display/dc/dcn35/dcn35_pg_cntl.h  |  1 -
 .../drm/amd/display/dc/hwss/dce/dce_hwseq.h   | 15 ++++++-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 42 +++++++++++++------
 .../gpu/drm/amd/display/dc/inc/hw/pg_cntl.h   |  2 -
 .../dc/resource/dcn35/dcn35_resource.c        | 16 ++++++-
 .../dc/resource/dcn35/dcn35_resource.h        |  1 +
 .../include/asic_reg/dcn/dcn_3_5_0_sh_mask.h  |  8 ++++
 11 files changed, 108 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
index ab6d09c6fe348..76da59d8caaf5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
@@ -291,7 +291,11 @@
 	type SYMCLKB_FE_SRC_SEL;\
 	type SYMCLKC_FE_SRC_SEL;\
 	type SYMCLKD_FE_SRC_SEL;\
-	type SYMCLKE_FE_SRC_SEL;
+	type SYMCLKE_FE_SRC_SEL;\
+	type DTBCLK_P0_GATE_DISABLE;\
+	type DTBCLK_P1_GATE_DISABLE;\
+	type DTBCLK_P2_GATE_DISABLE;\
+	type DTBCLK_P3_GATE_DISABLE;\
 
 struct dccg_shift {
 	DCCG_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index 479f3683c0b70..142efd390d862 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -256,6 +256,21 @@ static void dccg35_set_dtbclk_dto(
 	if (params->ref_dtbclk_khz && req_dtbclk_khz) {
 		uint32_t modulo, phase;
 
+		switch (params->otg_inst) {
+		case 0:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+			break;
+		case 1:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+			break;
+		case 2:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+			break;
+		case 3:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+			break;
+		}
+
 		// phase / modulo = dtbclk / dtbclk ref
 		modulo = params->ref_dtbclk_khz * 1000;
 		phase = req_dtbclk_khz * 1000;
@@ -280,6 +295,21 @@ static void dccg35_set_dtbclk_dto(
 		REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				PIPE_DTO_SRC_SEL[params->otg_inst], 2);
 	} else {
+		switch (params->otg_inst) {
+		case 0:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
+			break;
+		case 1:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0);
+			break;
+		case 2:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0);
+			break;
+		case 3:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0);
+			break;
+		}
+
 		REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				DTBCLK_DTO_ENABLE[params->otg_inst], 0,
 				PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
index 423feb4c2f3f5..bde48bee0119b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
@@ -34,6 +34,7 @@
 #define DCCG_REG_LIST_DCN35() \
 	DCCG_REG_LIST_DCN314(),\
 	SR(DPPCLK_CTRL),\
+	SR(DCCG_GATE_DISABLE_CNTL5),\
 	SR(DCCG_GATE_DISABLE_CNTL6),\
 	SR(DCCG_GLOBAL_FGCG_REP_CNTL),\
 	SR(SYMCLKA_CLOCK_ENABLE),\
@@ -174,7 +175,11 @@
 	DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
 	DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
 	DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
-	DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh)
+	DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
 
 struct dccg *dccg35_create(
 		struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
index 46f71ff08fd17..0f60c40e1fc50 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
@@ -332,13 +332,6 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	pg_cntl->pg_res_enable[PG_DCIO] = power_on;
 }
 
-void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on)
-{
-	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
-
-	REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0);
-}
-
 static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl)
 {
 	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
@@ -508,8 +501,7 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = {
 	.mpcc_pg_control = pg_cntl35_mpcc_pg_control,
 	.opp_pg_control = pg_cntl35_opp_pg_control,
 	.optc_pg_control = pg_cntl35_optc_pg_control,
-	.dwb_pg_control = pg_cntl35_dwb_pg_control,
-	.set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22
+	.dwb_pg_control = pg_cntl35_dwb_pg_control
 };
 
 struct pg_cntl *pg_cntl35_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
index 069dae08e2224..3de240884d22f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
@@ -183,7 +183,6 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl,
 	unsigned int optc_inst, bool power_on);
 void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on);
 void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl);
-void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on);
 
 struct pg_cntl *pg_cntl35_create(
 	struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
index 44b4df6469d1a..52f045cfd52a9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
@@ -682,6 +682,7 @@ struct dce_hwseq_registers {
 	uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
 	uint32_t HPO_TOP_HW_CONTROL;
 	uint32_t DMU_CLK_CNTL;
+	uint32_t DCCG_GATE_DISABLE_CNTL4;
 	uint32_t DCCG_GATE_DISABLE_CNTL5;
 };
  /* set field name */
@@ -1199,7 +1200,19 @@ struct dce_hwseq_registers {
 	type PHYBSYMCLK_ROOT_GATE_DISABLE;\
 	type PHYCSYMCLK_ROOT_GATE_DISABLE;\
 	type PHYDSYMCLK_ROOT_GATE_DISABLE;\
-	type PHYESYMCLK_ROOT_GATE_DISABLE;
+	type PHYESYMCLK_ROOT_GATE_DISABLE;\
+	type DTBCLK_P0_GATE_DISABLE;\
+	type DTBCLK_P1_GATE_DISABLE;\
+	type DTBCLK_P2_GATE_DISABLE;\
+	type DTBCLK_P3_GATE_DISABLE;\
+	type DPSTREAMCLK0_GATE_DISABLE;\
+	type DPSTREAMCLK1_GATE_DISABLE;\
+	type DPSTREAMCLK2_GATE_DISABLE;\
+	type DPSTREAMCLK3_GATE_DISABLE;\
+	type DPIASYMCLK0_GATE_DISABLE;\
+	type DPIASYMCLK1_GATE_DISABLE;\
+	type DPIASYMCLK2_GATE_DISABLE;\
+	type DPIASYMCLK3_GATE_DISABLE;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 5a8258287438e..39260371beb9d 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -145,17 +145,36 @@ void dcn35_init_hw(struct dc *dc)
 		hws->funcs.bios_golden_init(dc);
 	}
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
-
-	/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
-	REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+	if (!dc->debug.disable_clock_gate) {
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
+
+		/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
+		REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4,
+				DPIASYMCLK0_GATE_DISABLE, 0,
+				DPIASYMCLK1_GATE_DISABLE, 0,
+				DPIASYMCLK2_GATE_DISABLE, 0,
+				DPIASYMCLK3_GATE_DISABLE, 0);
+
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF);
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5,
+				DTBCLK_P0_GATE_DISABLE, 0,
+				DTBCLK_P1_GATE_DISABLE, 0,
+				DTBCLK_P2_GATE_DISABLE, 0,
+				DTBCLK_P3_GATE_DISABLE, 0);
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5,
+				DPSTREAMCLK0_GATE_DISABLE, 0,
+				DPSTREAMCLK1_GATE_DISABLE, 0,
+				DPSTREAMCLK2_GATE_DISABLE, 0,
+				DPSTREAMCLK3_GATE_DISABLE, 0);
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf);
+	}
 
 	// Initialize the dccg
 	if (res_pool->dccg->funcs->dccg_init)
@@ -332,9 +351,6 @@ void dcn35_init_hw(struct dc *dc)
 	if (dc->res_pool->pg_cntl) {
 		if (dc->res_pool->pg_cntl->funcs->init_pg_status)
 			dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl);
-
-		if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22)
-			dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
index b9812afb886be..00ea3864dd4df 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
@@ -47,8 +47,6 @@ struct pg_cntl_funcs {
 	void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on);
 	void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
 	void (*init_pg_status)(struct pg_cntl *pg_cntl);
-
-	void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on);
 };
 
 #endif //__DC_PG_CNTL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index a4027bf053c5f..5c935d94a95cd 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -626,7 +626,19 @@ static struct dce_hwseq_registers hwseq_reg;
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
-	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh)
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh)
 
 static const struct dce_hwseq_shift hwseq_shift = {
 		HWSEQ_DCN35_MASK_SH_LIST(__SHIFT)
@@ -705,7 +717,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_dcc = DCC_ENABLE,
 	.disable_dpp_power_gate = true,
 	.disable_hubp_power_gate = true,
-	.disable_clock_gate = true,
+	.disable_clock_gate = false,
 	.disable_dsc_power_gate = true,
 	.vsr_support = true,
 	.performance_trace = false,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
index 99aea102e3f74..a51c4a9eaafe5 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
@@ -166,6 +166,7 @@ struct resource_pool *dcn35_create_resource_pool(
 	SR(MMHUBBUB_MEM_PWR_CNTL), \
 	SR(DCCG_GATE_DISABLE_CNTL), \
 	SR(DCCG_GATE_DISABLE_CNTL2), \
+	SR(DCCG_GATE_DISABLE_CNTL4), \
 	SR(DCCG_GATE_DISABLE_CNTL5), \
 	SR(DCFCLK_CNTL),\
 	SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
index b646648792113..fca72e2ec9294 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
@@ -6220,12 +6220,20 @@
 #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE__SHIFT                                         0x3
 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE__SHIFT                                         0x4
 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE__SHIFT                                        0x11
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE__SHIFT                                              0x17
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE__SHIFT                                              0x18
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE__SHIFT                                              0x19
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE__SHIFT                                              0x1a
 #define DCCG_GATE_DISABLE_CNTL4__PHYA_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000001L
 #define DCCG_GATE_DISABLE_CNTL4__PHYB_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000002L
 #define DCCG_GATE_DISABLE_CNTL4__PHYC_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000004L
 #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000008L
 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000010L
 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE_MASK                                          0x00020000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE_MASK                                                0x00800000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE_MASK                                                0x01000000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE_MASK                                                0x02000000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE_MASK                                                0x04000000L
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_SRC_SEL__SHIFT                                                         0x0
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_EN__SHIFT                                                              0x3
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK1_SRC_SEL__SHIFT                                                         0x4
-- 
GitLab


From cc6201b773f12388c234aa10145322ccc429959e Mon Sep 17 00:00:00 2001
From: Duncan Ma <duncan.ma@amd.com>
Date: Tue, 31 Oct 2023 16:57:36 -0400
Subject: [PATCH 0302/2340] drm/amd/display: Add disable timeout option

[WHY]
Driver continues running whenever there is
is timeout from smu or dmcub.

It is difficult to track failure state
when dcn, dc or dmcub changes on root failure.

[HOW]
Add disable_timeout option to halt driver
whenever there is a failure in response.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Duncan Ma <duncan.ma@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/dc/clk_mgr/dcn35/dcn35_smu.c  |  3 ++
 drivers/gpu/drm/amd/display/dc/dc.h           |  1 +
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 29 +++++++++++++++----
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index b6b8c3ca1572c..af0a0f2925950 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -116,6 +116,9 @@ static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
 			msleep(delay_us/1000);
 		else if (delay_us > 0)
 			udelay(delay_us);
+
+		if (clk_mgr->base.ctx->dc->debug.disable_timeout)
+			max_retries++;
 	} while (max_retries--);
 
 	return res_val;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 9316b737a8ba8..ae54ef6837f98 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -978,6 +978,7 @@ struct dc_debug_options {
 	bool psp_disabled_wa;
 	unsigned int ips2_eval_delay_us;
 	unsigned int ips2_entry_delay_us;
+	bool disable_timeout;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 0e07699c1e835..53400cc05b5b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -241,7 +241,12 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 
 	// Wait for DMUB to process command
 	if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
-		status = dmub_srv_wait_for_idle(dmub, 100000);
+		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+			do {
+				status = dmub_srv_wait_for_idle(dmub, 100000);
+			} while (status != DMUB_STATUS_OK);
+		} else
+			status = dmub_srv_wait_for_idle(dmub, 100000);
 
 		if (status != DMUB_STATUS_OK) {
 			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
@@ -1147,10 +1152,16 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 		return true;
 
 	if (wait) {
-		status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
-		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
-			return false;
+		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+			do {
+				status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+			} while (status != DMUB_STATUS_OK);
+		} else {
+			status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+			if (status != DMUB_STATUS_OK) {
+				DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
+				return false;
+			}
 		}
 	} else
 		return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub);
@@ -1187,7 +1198,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 	const uint32_t max_num_polls = 10000;
 	uint32_t allow_state = 0;
 	uint32_t commit_state = 0;
-	uint32_t i;
+	int i;
 
 	if (dc->debug.dmcub_emulation)
 		return;
@@ -1220,6 +1231,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 						break;
 
 					udelay(1);
+
+					if (dc->debug.disable_timeout)
+						i--;
 				}
 				ASSERT(i < max_num_polls);
 
@@ -1242,6 +1256,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 					break;
 
 				udelay(1);
+
+				if (dc->debug.disable_timeout)
+					i--;
 			}
 			ASSERT(i < max_num_polls);
 		}
-- 
GitLab


From c21a764a98cb59d673cad3da64f35f4dec951951 Mon Sep 17 00:00:00 2001
From: Krunoslav Kovac <krunoslav.kovac@amd.com>
Date: Wed, 1 Nov 2023 15:59:56 -0400
Subject: [PATCH 0303/2340] drm/amd/display: Send PQ bit in AMD VSIF

[WHY & HOW]
PB9 bit 5 was added to signal PQ EOTF in AMD vendor specific infoframe.
This change sets it when appropriate.

Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Krunoslav Kovac <krunoslav.kovac@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index ccecddafeb05c..47296d155c3a5 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -693,10 +693,12 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
 	if (app_tf != TRANSFER_FUNC_UNKNOWN) {
 		infopacket->valid = true;
 
-		if (app_tf != TRANSFER_FUNC_PQ2084) {
+		if (app_tf == TRANSFER_FUNC_PQ2084)
+			infopacket->sb[9] |= 0x20; // PB9 = [Bit 5 = PQ EOTF Active]
+		else {
 			infopacket->sb[6] |= 0x08;  // PB6 = [Bit 3 = Native Color Active]
 			if (app_tf == TRANSFER_FUNC_GAMMA_22)
-				infopacket->sb[9] |= 0x04;  // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
+				infopacket->sb[9] |= 0x04;  // PB9 = [Bit 2 = Gamma 2.2 EOTF Active]
 		}
 	}
 }
-- 
GitLab


From c29085d29562990559163302d9e28d1e88223d90 Mon Sep 17 00:00:00 2001
From: Fangzhi Zuo <jerry.zuo@amd.com>
Date: Mon, 30 Oct 2023 16:23:08 -0400
Subject: [PATCH 0304/2340] drm/amd/display: Enable DSC Flag in MST Mode
 Validation

[WHY & HOW]
When dsc is possible, MST mode validation includes:
1. if maximum dsc compression cannot fit into end to end bw, mode pruned
2. if native bw cannot fit into end to end bw, try to enabled dsc to see
   whether a feasible dsc config can be found
3. if native bw can fit into end to end bw, mode supported

Reviewed-by: Wayne Lin <wayne.lin@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Fangzhi Zuo <jerry.zuo@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 44 +++++++++++--------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index ec87e31c6fe88..8d7d4024f2859 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1602,9 +1602,8 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	struct dc_link_settings cur_link_settings;
 	unsigned int end_to_end_bw_in_kbps = 0;
 	unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
-	unsigned int max_compressed_bw_in_kbps = 0;
 	struct dc_dsc_bw_range bw_range = {0};
-	uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
+	struct dc_dsc_config_options dsc_options = {0};
 
 	/*
 	 * Consider the case with the depth of the mst topology tree is equal or less than 2
@@ -1620,30 +1619,39 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	   (aconnector->mst_output_port->passthrough_aux ||
 	    aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
 		cur_link_settings = stream->link->verified_link_cap;
+		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings);
+		down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
 
-		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
-							       &cur_link_settings);
-		down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
-
-		/* pick the bottleneck */
-		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
-					    down_link_bw_in_kbps);
-
-		/*
-		 * use the maximum dsc compression bandwidth as the required
-		 * bandwidth for the mode
-		 */
-		max_compressed_bw_in_kbps = bw_range.min_kbps;
+		/* pick the end to end bw bottleneck */
+		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps);
 
-		if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) {
-			DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n");
+		if (end_to_end_bw_in_kbps < bw_range.min_kbps) {
+			DRM_DEBUG_DRIVER("maximum dsc compression cannot fit into end-to-end bandwidth\n");
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 		}
+
+		if (end_to_end_bw_in_kbps < bw_range.stream_kbps) {
+			dc_dsc_get_default_config_option(stream->link->dc, &dsc_options);
+			dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16;
+			if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0],
+					&stream->sink->dsc_caps.dsc_dec_caps,
+					&dsc_options,
+					end_to_end_bw_in_kbps,
+					&stream->timing,
+					dc_link_get_highest_encoding_format(stream->link),
+					&stream->timing.dsc_cfg)) {
+				stream->timing.flags.DSC = 1;
+				DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc and dsc config found\n");
+			} else {
+				DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc but dsc config not found\n");
+				return DC_FAIL_BANDWIDTH_VALIDATE;
+			}
+		}
 	} else {
 		/* check if mode could be supported within full_pbn */
 		bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
 		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
-		if (pbn > full_pbn)
+		if (pbn > aconnector->mst_output_port->full_pbn)
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 	}
 
-- 
GitLab


From ed4ae8f77f2c4ff05244db99330d1eff828d9f7d Mon Sep 17 00:00:00 2001
From: Anthony Koo <anthony.koo@amd.com>
Date: Sat, 4 Nov 2023 01:53:13 -0400
Subject: [PATCH 0305/2340] drm/amd/display: Add new command to disable replay
 timing resync

[WHY & HOW]
Add new command to disable replay timing resync

Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Anthony Koo <anthony.koo@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index aa6e6923afede..55573083bc31c 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -2876,6 +2876,10 @@ enum dmub_cmd_replay_type {
 	 * Set power opt and coasting vtotal.
 	 */
 	DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL	= 4,
+	/**
+	 * Set disabled iiming sync.
+	 */
+	DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED	= 5,
 };
 
 /**
@@ -3038,6 +3042,27 @@ struct dmub_cmd_replay_set_power_opt_data {
 	uint32_t power_opt;
 };
 
+/**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_cmd_replay_set_timing_sync_data {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad[3];
+	/**
+	 * REPLAY set_timing_sync
+	 */
+	bool timing_sync_supported;
+};
+
 /**
  * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
  */
@@ -3104,6 +3129,20 @@ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal {
 	struct dmub_cmd_replay_set_coasting_vtotal_data replay_set_coasting_vtotal_data;
 };
 
+/**
+ * Definition of a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_rb_cmd_replay_set_timing_sync {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+	 */
+	struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data;
+};
+
 /**
  * Set of HW components that can be locked.
  *
@@ -4237,6 +4276,8 @@ union dmub_rb_cmd {
 	 * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command.
 	 */
 	struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal;
+
+	struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync;
 };
 
 /**
-- 
GitLab


From 8a2553d5c7ade00d1b508bbd418d5c4803c12fdd Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Date: Mon, 6 Nov 2023 19:30:51 -0700
Subject: [PATCH 0306/2340] drm/amd/display: Add missing chips for HDCP

[WHAT]
Add missing HDCP ID in the message id enum.

Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/include/hdcp_msg_types.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
index 42229b4effdce..eced9ad91f1d5 100644
--- a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
+++ b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
@@ -69,6 +69,11 @@ enum hdcp_message_id {
 	HDCP_MESSAGE_ID_READ_RXSTATUS,
 	HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
 
+	/* PS175 chip */
+
+	HDCP_MESSAGE_ID_WRITE_PS175_CMD,
+	HDCP_MESSAGE_ID_READ_PS175_RSP,
+
 	HDCP_MESSAGE_ID_MAX
 };
 
-- 
GitLab


From 466a7d115326ece682c2b60d1c77d1d0b9010b4f Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 10 Nov 2023 16:34:51 -0600
Subject: [PATCH 0307/2340] drm/amd: Use the first non-dGPU PCI device for BW
 limits

When bandwidth limits are looked up using pcie_bandwidth_available()
virtual links such as USB4 are analyzed which might not represent the
real speed. Furthermore devices may change speeds autonomously which
may introduce conditional variation to the results reported in the
status registers.

Instead look at the capabilities of first PCI device outside of
dGPU to decide upper limits that the dGPU will work at.

For eGPU this effectively means that it will use the speed of the link
partner.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925#note_2145860
Link: https://www.usb.org/document-library/usb4r-specification-v20
      USB4 V2 with Errata and ECN through June 2023
      Section 11.2.1
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 ++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 151f8b950b726..ec3bf949ee2ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5737,6 +5737,39 @@ recover_end:
 	return r;
 }
 
+/**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+					    enum pci_bus_speed *speed,
+					    enum pcie_link_width *width)
+{
+	struct pci_dev *parent = adev->pdev;
+
+	if (!speed || !width)
+		return;
+
+	*speed = PCI_SPEED_UNKNOWN;
+	*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	while ((parent = pci_upstream_bridge(parent))) {
+		/* skip upstream/downstream switches internal to dGPU*/
+		if (parent->vendor == PCI_VENDOR_ID_ATI)
+			continue;
+		*speed = pcie_get_speed_cap(parent);
+		*width = pcie_get_width_cap(parent);
+		break;
+	}
+}
+
 /**
  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
  *
@@ -5770,8 +5803,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
 		return;
 
-	pcie_bandwidth_available(adev->pdev, NULL,
-				 &platform_speed_cap, &platform_link_width);
+	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+					&platform_link_width);
 
 	if (adev->pm.pcie_gen_mask == 0) {
 		/* asic caps */
-- 
GitLab


From d9b3a066dfcd3fe50b4dc561d8510c43c0ad8863 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 10 Nov 2023 16:34:52 -0600
Subject: [PATCH 0308/2340] drm/amd: Exclude dGPUs in eGPU enclosures from DPM
 quirks

The PCIe speed capabilities advertised by a USB4 or TBT3 link are
limited to PCIe gen 1 per the USB4 spec. In reality the speed will
change dynamically based on fabric conditions and other traffic.

DPM is disabled when dGPUs are connected directly to Intel hosts
since the PCIe root port isn't able to handle dynamic speed
switching.

As this limitation is specifically for PCIe root ports in the SoC,
don't apply it when connected to an eGPU enclosure connected to an
Intel host.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2885
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ec3bf949ee2ff..b5edf40b5d03f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1551,11 +1551,15 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
  */
-static bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
 {
 #if IS_ENABLED(CONFIG_X86)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 
+	/* eGPU change speeds based on USB4 fabric conditions */
+	if (dev_is_removable(adev->dev))
+		return true;
+
 	if (c->x86_vendor == X86_VENDOR_INTEL)
 		return false;
 #endif
@@ -2395,7 +2399,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
-	if (!amdgpu_device_pcie_dynamic_switching_supported())
+	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
 
 	total = true;
-- 
GitLab


From 59e4db5375f587954eb779ac9c7888a6c81c306b Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Sun, 5 Nov 2023 20:22:06 -0500
Subject: [PATCH 0309/2340] drm/amd/display: Promote DC to 3.2.260

- Add missing chips for HDCP
- Add new command to disable replay timing resync
- Fix encoder disable logic
- Enable DSC Flag in MST Mode Validation
- Change the DMCUB mailbox memory location from FB to inbox
- Add disable timeout option
- Negate IPS allow and commit bits
- Enable DCN clock gating for DCN35
- Prefer currently used OTG master when acquiring free pipe
- Try to acquire a free OTG master not used in cur ctx first
- Clear dpcd_sink_ext_caps if not set
- Enable fast plane updates on DCN3.2 and above
- Add null checks for 8K60 lightup
- Refactor resource into component directory
- Fix DSC not Enabled on Direct MST Sink
- Guard against invalid RPTR/WPTR being set
- Enable CM low mem power optimization
- Fix a debugfs null pointer error

Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index ae54ef6837f98..541e781267b92 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -49,7 +49,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.259"
+#define DC_VER "3.2.260"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
GitLab


From 75fb313c55fa102f973c440f55dc63ffc61f3b54 Mon Sep 17 00:00:00 2001
From: Shiwu Zhang <shiwu.zhang@amd.com>
Date: Tue, 31 Oct 2023 12:32:54 +0800
Subject: [PATCH 0310/2340] drm/amdgpu: expose the connected port num info
 through sysfs

By catting the xgmi_port_num sysfs node, it prints out the info in the
format of <src node id>:<src port num> -> <dst node id>:<dst port num>
for one xgmi link.

For example, in case of 4 sockets fully and evenly connected setup, it
would be like as below for the first node in the hive.
01:02 -> 02:03
01:03 -> 02:02
01:07 -> 03:04
01:04 -> 03:07
01:06 -> 04:05
01:05 -> 04:06
Based on the fact that there is two xgmi links between each socket pair,
"01:02 -> 02:03" means that the current socket in question use the port 2
to connect with port 3 of the second node in the hive and so on.

v2: print out the src/dst node id for each xgmi link (lijo)
v3: replace the current_node++ with +1 to align with dst node (le)
    and use the dev_err instead of pr_err (lijo)
v4: fix checkpatch warning (alex)

Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com>
Acked-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 44 ++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index bd20cb3b98198..44d8c1a11e1b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -413,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
 	return sysfs_emit(buf, "%s\n", buf);
 }
 
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i, j, size = 0;
+	int current_node;
+	/*
+	 * get the node id in the sysfs for the current socket and show
+	 * it in the port num info output in the sysfs for easy reading.
+	 * it is NOT the one retrieved from xgmi ta.
+	 */
+	for (i = 0; i < top->num_nodes; i++) {
+		if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			current_node = i;
+			break;
+		}
+	}
+
+	for (i = 0; i < top->num_nodes; i++) {
+		for (j = 0; j < top->nodes[i].num_links; j++)
+			/* node id in sysfs starts from 1 rather than 0 so +1 here */
+			size += sysfs_emit_at(buf, size, "%02x:%02x ->  %02x:%02x\n", current_node + 1,
+					      top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+					      top->nodes[i].port_num[j].dst_xgmi_port_num);
+	}
+
+	return size;
+}
+
 #define AMDGPU_XGMI_SET_FICAA(o)	((o) | 0x456801)
 static ssize_t amdgpu_xgmi_show_error(struct device *dev,
 				      struct device_attribute *attr,
@@ -452,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
 static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
 static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
 static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
 
 static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 					 struct amdgpu_hive_info *hive)
@@ -487,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 	if (ret)
 		pr_err("failed to create xgmi_num_links\n");
 
+	/* Create xgmi port num file if supported */
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+		ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+		if (ret)
+			dev_err(adev->dev, "failed to create xgmi_port_num\n");
+	}
+
 	/* Create sysfs link to hive info folder on the first device */
 	if (hive->kobj.parent != (&adev->dev->kobj)) {
 		ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
@@ -517,6 +557,8 @@ remove_file:
 	device_remove_file(adev->dev, &dev_attr_xgmi_error);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
 
 success:
 	return ret;
@@ -533,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
 	device_remove_file(adev->dev, &dev_attr_xgmi_error);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
 
 	if (hive->kobj.parent != (&adev->dev->kobj))
 		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
-- 
GitLab


From a3cc7dbe9957f856b84a504687a85e22e02a49db Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 28 Sep 2023 15:18:16 +0530
Subject: [PATCH 0311/2340] drm/amdgpu: add pm metrics structure definition

Define the pm metrics structures to be exposed via sysfs.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index b14231f470dc5..f9c438d16c565 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -1127,4 +1127,19 @@ struct gpu_metrics_v3_0 {
 	/* Metrics table alpha filter time constant [us] */
 	uint32_t			time_filter_alphavalue;
 };
+
+struct amdgpu_pmmetrics_header {
+	uint16_t structure_size;
+	uint16_t pad;
+	uint32_t mp1_ip_discovery_version;
+	uint32_t pmfw_version;
+	uint32_t pmmetrics_version;
+};
+
+struct amdgpu_pm_metrics {
+	struct amdgpu_pmmetrics_header common_header;
+
+	uint8_t data[];
+};
+
 #endif
-- 
GitLab


From 425285d39afddaf4a9dab36045b816af0cc3e400 Mon Sep 17 00:00:00 2001
From: Prike Liang <Prike.Liang@amd.com>
Date: Thu, 9 Nov 2023 11:37:18 +0800
Subject: [PATCH 0312/2340] drm/amdgpu: add amdgpu runpm usage trace for
 separate funcs

Add trace for amdgpu runpm separate funcs usage and this will
help debugging on the case of runpm usage missed to dereference.
In the normal case the runpm usage count referred by one kind
of functionality pairwise and usage should be changed from 1 to 0,
otherwise there will be an issue in the amdgpu runpm usage
dereference.

Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h   | 15 +++++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e7e87a3b2601e..decbbe3d4f06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,6 +42,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
+#include "amdgpu_trace.h"
 
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -63,6 +64,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 		attach->peer2peer = false;
 
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(1, __func__);
 	if (r < 0)
 		goto out;
 
@@ -70,6 +72,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 
 out:
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(0, __func__);
 	return r;
 }
 
@@ -90,6 +93,7 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(0, __func__);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index dc230212746a2..70bff8cecfda7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -183,6 +183,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, flags | AMDGPU_FENCE_FLAG_INT);
 	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(1, __func__);
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
 		struct dma_fence *old;
@@ -310,6 +311,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 		dma_fence_put(fence);
 		pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		trace_amdgpu_runpm_reference_dumps(0, __func__);
 	} while (last_seq != seq);
 
 	return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 2fd1bfb35916f..f539b1d002343 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -554,6 +554,21 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
 		      __entry->value)
 );
 
+TRACE_EVENT(amdgpu_runpm_reference_dumps,
+	    TP_PROTO(uint32_t index, const char *func),
+	    TP_ARGS(index, func),
+	    TP_STRUCT__entry(
+			     __field(uint32_t, index)
+			     __string(func, func)
+			     ),
+	    TP_fast_assign(
+			   __entry->index = index;
+			   __assign_str(func, func);
+			   ),
+	    TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
+		      __entry->index,
+		      __get_str(func))
+);
 #undef AMDGPU_JOB_GET_TIMELINE_NAME
 #endif
 
-- 
GitLab


From 699d392903c3cebb7d2a2a3505ec9047c419dcd7 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Sun, 12 Nov 2023 10:09:44 +0530
Subject: [PATCH 0313/2340] drm/amdgpu: Add function parameter 'xcc_mask' not
 described in 'amdgpu_vm_flush_compute_tlb'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the below:

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1373: warning: Function parameter or member 'xcc_mask' not described in 'amdgpu_vm_flush_compute_tlb'

Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d59154ddaaed3..7da71b6a9dc6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1443,6 +1443,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
  *
  * Flush TLB if needed for a compute VM.
  *
-- 
GitLab


From 47fbee5f27ed9ef43340db03ba2b58a673a6e72b Mon Sep 17 00:00:00 2001
From: Jeffrey Hugo <quic_jhugo@quicinc.com>
Date: Fri, 3 Nov 2023 09:33:02 -0600
Subject: [PATCH 0314/2340] accel/qaic: Update MAX_ORDER use to be inclusive

MAX_ORDER was redefined so that valid allocations to the page allocator
are in the range of 0..MAX_ORDER, inclusive in the commit
23baf831a32c ("mm, treewide: redefine MAX_ORDER sanely").

We are treating MAX_ORDER as an exclusive value, and thus could be
requesting larger allocations.  Update our use to match the redefinition
of MAX_ORDER.

Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103153302.20642-1-quic_jhugo@quicinc.com
---
 drivers/accel/qaic/qaic_data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 8da81768f2abb..8998c28e566e1 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -452,7 +452,7 @@ static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 s
 		 * later
 		 */
 		buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE;
-		max_order = min(MAX_ORDER - 1, get_order(size));
+		max_order = min(MAX_ORDER, get_order(size));
 	} else {
 		/* allocate a single page for book keeping */
 		nr_pages = 1;
-- 
GitLab


From 89a410b2e416f2216b29183d6b8537abeccc7abb Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 15 Nov 2023 10:21:18 -0800
Subject: [PATCH 0315/2340] drm/i915/dg2: Wa_18028616096 now applies to all DG2

The workaround database was just updated to extend this workaround to
DG2-G11 (whereas previously it applied only to G10 and G12).

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115182117.2551522-2-matthew.d.roper@intel.com
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 63205edfea509..9bc0654efdc0c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2937,6 +2937,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 		 * Wa_22015475538:dg2
 		 */
 		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+		/* Wa_18028616096 */
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
 	}
 
 	if (IS_DG2_G11(i915)) {
@@ -2965,11 +2968,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 			   true);
 	}
 
-	if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) {
-		/* Wa_18028616096 */
-		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
-	}
-
 	if (IS_XEHPSDV(i915)) {
 		/* Wa_1409954639 */
 		wa_mcr_masked_en(wal,
-- 
GitLab


From 3c7a5eb700661e8905ab4e50c2d09c6568125280 Mon Sep 17 00:00:00 2001
From: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Date: Thu, 16 Nov 2023 13:25:11 -0800
Subject: [PATCH 0316/2340] drm/i915/mtl: Update Wa_22018931422

Commit 78cc55e0b64c ("drm/i915/mcr: Hold GT forcewake during steering
operations") introduced the workaround which was in early stages. With a
valid lineage number update Workaround for future tracking.

Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116212511.1760446-1-radhakrishna.sripada@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 34913912d8ae2..e253750a51c56 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
 		 * registers.  This wakeref will be released in the unlock
 		 * routine.
 		 *
-		 * This is expected to become a formally documented/numbered
-		 * workaround soon.
+		 * Wa_22018931422
 		 */
 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT);
 
-- 
GitLab


From e04d24c4e8062b5ed0bee7a871423a454d24ffed Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Thu, 16 Nov 2023 22:44:53 -0500
Subject: [PATCH 0317/2340] drm/print: Handle NULL drm device in __drm_printk()

drm_{err,warn,...}() use __drm_printk() which takes a drm device pointer and
uses the embedded device pointer to print the device. This facility handles
NULL device pointer, but not NULL drm device pointer. This patch makes
__drm_printk() also handle a NULL drm device pointer. The printed output is
identical to if drm->dev had been NULL.

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Reviewed-by: Jani Nikula <jani.nikula@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117035427.68125-2-ltuikov89@gmail.com
---
 include/drm/drm_print.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index a93a387f8a1a1..dd4883df876a6 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -453,7 +453,7 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev,
 
 /* Helper for struct drm_device based logging. */
 #define __drm_printk(drm, level, type, fmt, ...)			\
-	dev_##level##type((drm)->dev, "[drm] " fmt, ##__VA_ARGS__)
+	dev_##level##type((drm) ? (drm)->dev : NULL, "[drm] " fmt, ##__VA_ARGS__)
 
 
 #define drm_info(drm, fmt, ...)					\
-- 
GitLab


From 8bcac1be55e188e5bac3353b550c9cddb70fbbed Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 14:25:31 -0600
Subject: [PATCH 0318/2340] dt-bindings: display: nv3051d: Update NewVision
 NV3051D compatibles

Update the NewVision NV3051D compatible strings by adding a new panel,
the powkiddy,rk2023-panel, and removing another entry, the
anbernic,rg353v-panel.

The rk2023-panel is similar to the rg353p-panel but has slightly
different timings so it needs a new string.

The rg353v-panel is duplicate to the rg353p-panel, so remove it. No
current devices use it and changes to the driver mean it is no longer
valid as a compatible string.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231117202536.1387815-2-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117202536.1387815-2-macroalpha82@gmail.com
---
 .../devicetree/bindings/display/panel/newvision,nv3051d.yaml    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml b/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml
index cce775a87f871..7a634fbc465e0 100644
--- a/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml
+++ b/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml
@@ -21,7 +21,7 @@ properties:
       - enum:
           - anbernic,rg351v-panel
           - anbernic,rg353p-panel
-          - anbernic,rg353v-panel
+          - powkiddy,rk2023-panel
       - const: newvision,nv3051d
 
   reg: true
-- 
GitLab


From 697ebc319b942403a6fee894607fd2cd47cca069 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 14:25:32 -0600
Subject: [PATCH 0319/2340] drm/panel: nv3051d: Hold panel in reset for
 unprepare

Improve the panel's ability to restore from suspend by holding the
panel in suspend after unprepare.

Fixes: b1d39f0f4264 ("drm/panel: Add NewVision NV3051D MIPI-DSI LCD panel")
Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Link: https://lore.kernel.org/r/20231117202536.1387815-3-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117202536.1387815-3-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-newvision-nv3051d.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
index 79de6c886292b..c44c6945662f2 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
@@ -261,6 +261,8 @@ static int panel_nv3051d_unprepare(struct drm_panel *panel)
 
 	usleep_range(10000, 15000);
 
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+
 	regulator_disable(ctx->vdd);
 
 	return 0;
-- 
GitLab


From 0aa1cfa3d287930cbecc52cd2b38683a4bf98463 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 14:25:33 -0600
Subject: [PATCH 0320/2340] drm/panel: nv3051d: Add Powkiddy RK2023 Panel
 Support

Refactor the driver to add support for the powkiddy,rk2023-panel
panel. This panel is extremely similar to the rg353p-panel but
requires a smaller vertical back porch and isn't as tolerant of
higher speeds. Note that while all of these panels are identical in
size (70x57) it is possible future panels may not be.

Tested on my RG351V, RG353P, RG353V, and RK2023.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Link: https://lore.kernel.org/r/20231117202536.1387815-4-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117202536.1387815-4-macroalpha82@gmail.com
---
 .../gpu/drm/panel/panel-newvision-nv3051d.c   | 55 +++++++++++++++----
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
index c44c6945662f2..94d89ffd596be 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
@@ -28,6 +28,7 @@ struct nv3051d_panel_info {
 	unsigned int num_modes;
 	u16 width_mm, height_mm;
 	u32 bus_flags;
+	u32 mode_flags;
 };
 
 struct panel_nv3051d {
@@ -387,15 +388,7 @@ static int panel_nv3051d_probe(struct mipi_dsi_device *dsi)
 
 	dsi->lanes = 4;
 	dsi->format = MIPI_DSI_FMT_RGB888;
-	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
-			  MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET;
-
-	/*
-	 * The panel in the RG351V is identical to the 353P, except it
-	 * requires MIPI_DSI_CLOCK_NON_CONTINUOUS to operate correctly.
-	 */
-	if (of_device_is_compatible(dev->of_node, "anbernic,rg351v-panel"))
-		dsi->mode_flags |= MIPI_DSI_CLOCK_NON_CONTINUOUS;
+	dsi->mode_flags = ctx->panel_info->mode_flags;
 
 	drm_panel_init(&ctx->panel, &dsi->dev, &panel_nv3051d_funcs,
 		       DRM_MODE_CONNECTOR_DSI);
@@ -483,16 +476,56 @@ static const struct drm_display_mode nv3051d_rgxx3_modes[] = {
 	},
 };
 
-static const struct nv3051d_panel_info nv3051d_rgxx3_info = {
+static const struct drm_display_mode nv3051d_rk2023_modes[] = {
+	{
+		.hdisplay       = 640,
+		.hsync_start    = 640 + 40,
+		.hsync_end      = 640 + 40 + 2,
+		.htotal         = 640 + 40 + 2 + 80,
+		.vdisplay       = 480,
+		.vsync_start    = 480 + 18,
+		.vsync_end      = 480 + 18 + 2,
+		.vtotal         = 480 + 18 + 2 + 4,
+		.clock          = 24150,
+		.flags          = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+};
+
+static const struct nv3051d_panel_info nv3051d_rg351v_info = {
 	.display_modes = nv3051d_rgxx3_modes,
 	.num_modes = ARRAY_SIZE(nv3051d_rgxx3_modes),
 	.width_mm = 70,
 	.height_mm = 57,
 	.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		      MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET |
+		      MIPI_DSI_CLOCK_NON_CONTINUOUS,
+};
+
+static const struct nv3051d_panel_info nv3051d_rg353p_info = {
+	.display_modes = nv3051d_rgxx3_modes,
+	.num_modes = ARRAY_SIZE(nv3051d_rgxx3_modes),
+	.width_mm = 70,
+	.height_mm = 57,
+	.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		      MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET,
+};
+
+static const struct nv3051d_panel_info nv3051d_rk2023_info = {
+	.display_modes = nv3051d_rk2023_modes,
+	.num_modes = ARRAY_SIZE(nv3051d_rk2023_modes),
+	.width_mm = 70,
+	.height_mm = 57,
+	.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		      MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET,
 };
 
 static const struct of_device_id newvision_nv3051d_of_match[] = {
-	{ .compatible = "newvision,nv3051d", .data = &nv3051d_rgxx3_info },
+	{ .compatible = "anbernic,rg351v-panel", .data = &nv3051d_rg351v_info },
+	{ .compatible = "anbernic,rg353p-panel", .data = &nv3051d_rg353p_info },
+	{ .compatible = "powkiddy,rk2023-panel", .data = &nv3051d_rk2023_info },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, newvision_nv3051d_of_match);
-- 
GitLab


From c18b1b49764a1db824ed74286338b6283b619286 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 13:44:01 -0600
Subject: [PATCH 0321/2340] drm/panel-elida-kd35t133: trival: update panel size
 from 5.5 to 3.5

The comments at the top of the driver state the panel size incorrectly
as 5.5" instead of 3.5".

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Link: https://lore.kernel.org/r/20231117194405.1386265-2-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117194405.1386265-2-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-elida-kd35t133.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
index e7be15b681021..6cd8536c09ffd 100644
--- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
+++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Elida kd35t133 5.5" MIPI-DSI panel driver
+ * Elida kd35t133 3.5" MIPI-DSI panel driver
  * Copyright (C) 2020 Theobroma Systems Design und Consulting GmbH
  *
  * based on
-- 
GitLab


From 03c5b2a5f6c39fe4e090346536cf1c14ee18b61e Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 13:44:02 -0600
Subject: [PATCH 0322/2340] drm/panel-elida-kd35t133: hold panel in reset for
 unprepare

For devices like the Anbernic RG351M and RG351P the panel is wired to
an always on regulator. When the device suspends and wakes up, there
are some slight artifacts on the screen that go away over time. If
instead we hold the panel in reset status after it is unprepared,
this does not happen.

Fixes: 5b6603360c12 ("drm/panel: add panel driver for Elida KD35T133 panels")
Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Link: https://lore.kernel.org/r/20231117194405.1386265-3-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117194405.1386265-3-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-elida-kd35t133.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
index 6cd8536c09ffd..f1fc4a26f4476 100644
--- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
+++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
@@ -104,6 +104,8 @@ static int kd35t133_unprepare(struct drm_panel *panel)
 		return ret;
 	}
 
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+
 	regulator_disable(ctx->iovcc);
 	regulator_disable(ctx->vdd);
 
-- 
GitLab


From 3fc828b8ce2362982237f46a7cd46677f9094a8e Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 13:44:03 -0600
Subject: [PATCH 0323/2340] drm/panel-elida-kd35t133: drop
 drm_connector_set_orientation_from_panel

Stop calling drm_connector_set_orientation_from_panel() as its now
called by the panel bridge directly when it is initialized.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Link: https://lore.kernel.org/r/20231117194405.1386265-4-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117194405.1386265-4-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-elida-kd35t133.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
index f1fc4a26f4476..29b4ee63d83b5 100644
--- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
+++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
@@ -211,11 +211,6 @@ static int kd35t133_get_modes(struct drm_panel *panel,
 	connector->display_info.width_mm = mode->width_mm;
 	connector->display_info.height_mm = mode->height_mm;
 	drm_mode_probed_add(connector, mode);
-	/*
-	 * TODO: Remove once all drm drivers call
-	 * drm_connector_set_orientation_from_panel()
-	 */
-	drm_connector_set_panel_orientation(connector, ctx->orientation);
 
 	return 1;
 }
-- 
GitLab


From 5dea0c3fedee65413271a5700e653eff633e9a7f Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 13:44:04 -0600
Subject: [PATCH 0324/2340] drm/panel-elida-kd35t133: Drop shutdown logic

The driver shutdown is duplicate as it calls drm_unprepare and
drm_disable which are called anyway when associated drivers are
shutdown/removed.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231117194405.1386265-5-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117194405.1386265-5-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-elida-kd35t133.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
index 29b4ee63d83b5..fea3d9e849051 100644
--- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
+++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
@@ -296,27 +296,11 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi)
 	return 0;
 }
 
-static void kd35t133_shutdown(struct mipi_dsi_device *dsi)
-{
-	struct kd35t133 *ctx = mipi_dsi_get_drvdata(dsi);
-	int ret;
-
-	ret = drm_panel_unprepare(&ctx->panel);
-	if (ret < 0)
-		dev_err(&dsi->dev, "Failed to unprepare panel: %d\n", ret);
-
-	ret = drm_panel_disable(&ctx->panel);
-	if (ret < 0)
-		dev_err(&dsi->dev, "Failed to disable panel: %d\n", ret);
-}
-
 static void kd35t133_remove(struct mipi_dsi_device *dsi)
 {
 	struct kd35t133 *ctx = mipi_dsi_get_drvdata(dsi);
 	int ret;
 
-	kd35t133_shutdown(dsi);
-
 	ret = mipi_dsi_detach(dsi);
 	if (ret < 0)
 		dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret);
@@ -337,7 +321,6 @@ static struct mipi_dsi_driver kd35t133_driver = {
 	},
 	.probe	= kd35t133_probe,
 	.remove = kd35t133_remove,
-	.shutdown = kd35t133_shutdown,
 };
 module_mipi_dsi_driver(kd35t133_driver);
 
-- 
GitLab


From 9f5ac1969df6dc0c2282454b147138c32d065b41 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 17 Nov 2023 13:44:05 -0600
Subject: [PATCH 0325/2340] drm/panel-elida-kd35t133: Drop prepare/unprepare
 logic

Drop the prepare/unprepare logic, as this is now tracked elsewhere
since this commit [1].

[1] commit d2aacaf07395 ("drm/panel: Check for already prepared/enabled in drm_panel")

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231117194405.1386265-6-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117194405.1386265-6-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-elida-kd35t133.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
index fea3d9e849051..00791ea81e90c 100644
--- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c
+++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c
@@ -43,7 +43,6 @@ struct kd35t133 {
 	struct regulator *vdd;
 	struct regulator *iovcc;
 	enum drm_panel_orientation orientation;
-	bool prepared;
 };
 
 static inline struct kd35t133 *panel_to_kd35t133(struct drm_panel *panel)
@@ -91,9 +90,6 @@ static int kd35t133_unprepare(struct drm_panel *panel)
 	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
 	int ret;
 
-	if (!ctx->prepared)
-		return 0;
-
 	ret = mipi_dsi_dcs_set_display_off(dsi);
 	if (ret < 0)
 		dev_err(ctx->dev, "failed to set display off: %d\n", ret);
@@ -109,8 +105,6 @@ static int kd35t133_unprepare(struct drm_panel *panel)
 	regulator_disable(ctx->iovcc);
 	regulator_disable(ctx->vdd);
 
-	ctx->prepared = false;
-
 	return 0;
 }
 
@@ -120,9 +114,6 @@ static int kd35t133_prepare(struct drm_panel *panel)
 	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
 	int ret;
 
-	if (ctx->prepared)
-		return 0;
-
 	dev_dbg(ctx->dev, "Resetting the panel\n");
 	ret = regulator_enable(ctx->vdd);
 	if (ret < 0) {
@@ -166,8 +157,6 @@ static int kd35t133_prepare(struct drm_panel *panel)
 
 	msleep(50);
 
-	ctx->prepared = true;
-
 	return 0;
 
 disable_iovcc:
-- 
GitLab


From 44eea8d08078bbce4d0f76c16706ab57ec38da62 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Mon, 13 Nov 2023 08:54:56 +0000
Subject: [PATCH 0326/2340] drm/i915: Remove return type from
 i915_drm_client_remove_object

There is no need to return anything in the version which was merged and
also the implementation of the !CONFIG_PROC_FS wasn't returning anything,
causing a build failure there.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: e4ae85e364fc ("drm/i915: Add ability for tracking buffer objects per client")
Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311110104.8TlHVxUI-lkp@intel.com/
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113085457.199053-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drm_client.c | 6 ++----
 drivers/gpu/drm/i915/i915_drm_client.h | 5 +++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 7efffdaa508da..be9acfd9410e7 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -191,22 +191,20 @@ void i915_drm_client_add_object(struct i915_drm_client *client,
 	spin_unlock_irqrestore(&client->objects_lock, flags);
 }
 
-bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+void i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
 {
 	struct i915_drm_client *client = fetch_and_zero(&obj->client);
 	unsigned long flags;
 
 	/* Object may not be associated with a client. */
 	if (!client)
-		return false;
+		return;
 
 	spin_lock_irqsave(&client->objects_lock, flags);
 	list_del_rcu(&obj->client_link);
 	spin_unlock_irqrestore(&client->objects_lock, flags);
 
 	i915_drm_client_put(client);
-
-	return true;
 }
 
 void i915_drm_client_add_context_objects(struct i915_drm_client *client,
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 69cedfcd3d69e..a439dd7899366 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -70,7 +70,7 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
 #ifdef CONFIG_PROC_FS
 void i915_drm_client_add_object(struct i915_drm_client *client,
 				struct drm_i915_gem_object *obj);
-bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+void i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
 void i915_drm_client_add_context_objects(struct i915_drm_client *client,
 					 struct intel_context *ce);
 #else
@@ -79,7 +79,8 @@ static inline void i915_drm_client_add_object(struct i915_drm_client *client,
 {
 }
 
-static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+static inline void
+i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
 {
 }
 
-- 
GitLab


From e31b380741bfa27d274a9f9610fd732e1204ea24 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Mon, 13 Nov 2023 08:54:57 +0000
Subject: [PATCH 0327/2340] drm/i915: Add __rcu annotation to cursor when
 iterating client objects

__rcu annotation is needed to avoid the sparse warnings such as:

  .../i915_drm_client.c:92:9: sparse: sparse: incompatible types in comparison expression (different address spaces):
  .../i915_drm_client.c:92:9: sparse:    struct list_head [noderef] __rcu *
  .../i915_drm_client.c:92:9: sparse:    struct list_head *

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 968853033d8a ("drm/i915: Implement fdinfo memory stats printing")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311110610.h0m6ydI5-lkp@intel.com/
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113085457.199053-2-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drm_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index be9acfd9410e7..fa6852713beed 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -78,7 +78,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 	struct drm_i915_private *i915 = fpriv->i915;
 	struct drm_i915_gem_object *obj;
 	struct intel_memory_region *mr;
-	struct list_head *pos;
+	struct list_head __rcu *pos;
 	unsigned int id;
 
 	/* Public objects. */
-- 
GitLab


From dfed6b58d54f3a5d7e6bc1fb060e2c936330eba2 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 16 Nov 2023 08:44:56 +0000
Subject: [PATCH 0328/2340] drm/i915/gsc: Mark internal GSC engine with
 reserved uabi class

The GSC CS is not exposed to the user, so we skipped assigning a uabi
class number for it. However, the trace logs use the uabi class and
instance to identify the engine, so leaving uabi class unset makes the
GSC CS show up as the RCS in those logs.

Given that the engine is not exposed to the user, we can't add a new
case in the uabi enum, so we insted internally define a kernel
internal class as -1.

At the same time remove special handling for the name and complete
the uabi_classes array so internal class is automatically correctly
assigned.

Engine will show as 65535:0 other0 in the logs/traces which should
be unique enough.

v2:
 * Fix uabi class u8 vs u16 type confusion.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 194babe26bdc ("drm/i915/mtl: don't expose GSC command streamer to the user")
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116084456.291533-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 39 ++++++++++++---------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 118164ddbb2e8..833987015b8bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -41,12 +41,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine)
 	llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist);
 }
 
-static const u8 uabi_classes[] = {
+#define I915_NO_UABI_CLASS ((u16)(-1))
+
+static const u16 uabi_classes[] = {
 	[RENDER_CLASS] = I915_ENGINE_CLASS_RENDER,
 	[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
 	[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
 	[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
 	[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
+	[OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */
 };
 
 static int engine_cmp(void *priv, const struct list_head *A,
@@ -200,6 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
+	u16 name_instance, other_instance = 0;
 	struct legacy_ring ring = {};
 	struct list_head *it, *next;
 	struct rb_node **p, *prev;
@@ -216,27 +220,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
 		if (intel_gt_has_unrecoverable_error(engine->gt))
 			continue; /* ignore incomplete engines */
 
-		/*
-		 * We don't want to expose the GSC engine to the users, but we
-		 * still rename it so it is easier to identify in the debug logs
-		 */
-		if (engine->id == GSC0) {
-			engine_rename(engine, "gsc", 0);
-			continue;
-		}
-
 		GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
 		engine->uabi_class = uabi_classes[engine->class];
+		if (engine->uabi_class == I915_NO_UABI_CLASS) {
+			name_instance = other_instance++;
+		} else {
+			GEM_BUG_ON(engine->uabi_class >=
+				   ARRAY_SIZE(i915->engine_uabi_class_count));
+			name_instance =
+				i915->engine_uabi_class_count[engine->uabi_class]++;
+		}
+		engine->uabi_instance = name_instance;
 
-		GEM_BUG_ON(engine->uabi_class >=
-			   ARRAY_SIZE(i915->engine_uabi_class_count));
-		engine->uabi_instance =
-			i915->engine_uabi_class_count[engine->uabi_class]++;
-
-		/* Replace the internal name with the final user facing name */
+		/*
+		 * Replace the internal name with the final user and log facing
+		 * name.
+		 */
 		engine_rename(engine,
 			      intel_engine_class_repr(engine->class),
-			      engine->uabi_instance);
+			      name_instance);
+
+		if (engine->uabi_class == I915_NO_UABI_CLASS)
+			continue;
 
 		rb_link_node(&engine->uabi_node, prev, p);
 		rb_insert_color(&engine->uabi_node, &i915->uabi_engines);
-- 
GitLab


From b49e894c3fd83f67aae2a4778b98ea3838e41020 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Mon, 30 Oct 2023 18:40:12 +0100
Subject: [PATCH 0329/2340] drm/i915: Replace custom intel runtime_pm tracker
 with ref_tracker library

Beside reusing existing code, the main advantage of ref_tracker is
tracking per instance of wakeref. It allows also to catch double
put.
On the other side we lose information about the first acquire and
the last release, but the advantages outweigh it.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030-ref_tracker_i915-v1-1-006fe6b96421@intel.com
---
 drivers/gpu/drm/i915/Kconfig.debug            |   4 +
 .../drm/i915/display/intel_display_power.c    |   2 +-
 drivers/gpu/drm/i915/i915_driver.c            |   2 +-
 drivers/gpu/drm/i915/intel_runtime_pm.c       | 221 ++----------------
 drivers/gpu/drm/i915/intel_runtime_pm.h       |  11 +-
 drivers/gpu/drm/i915/intel_wakeref.c          |  28 +++
 drivers/gpu/drm/i915/intel_wakeref.h          |  35 ++-
 7 files changed, 86 insertions(+), 217 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 2d21930d55015..8d82d7a1a9c5f 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -24,7 +24,9 @@ config DRM_I915_DEBUG
 	select DEBUG_FS
 	select PREEMPT_COUNT
 	select I2C_CHARDEV
+	select REF_TRACKER
 	select STACKDEPOT
+	select STACKTRACE
 	select DRM_DP_AUX_CHARDEV
 	select X86_MSR # used by igt/pm_rpm
 	select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
@@ -231,7 +233,9 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	bool "Enable extra state checking for runtime PM"
 	depends on DRM_I915
 	default n
+	select REF_TRACKER
 	select STACKDEPOT
+	select STACKTRACE
 	help
 	  Choose this option to turn on extra state checking for the
 	  runtime PM functionality. This may introduce overhead during
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index e25785ae1c20c..f78bfcf2ce000 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -405,7 +405,7 @@ print_async_put_domains_state(struct i915_power_domains *power_domains)
 						     struct drm_i915_private,
 						     display.power.domains);
 
-	drm_dbg(&i915->drm, "async_put_wakeref %u\n",
+	drm_dbg(&i915->drm, "async_put_wakeref %lu\n",
 		power_domains->async_put_wakeref);
 
 	print_power_domains(power_domains, "async_put_domains[0]",
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 802de2c6decb7..db6f7c30adde1 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1037,7 +1037,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915)
 	intel_power_domains_driver_remove(i915);
 	enable_rpm_wakeref_asserts(&i915->runtime_pm);
 
-	intel_runtime_pm_driver_release(&i915->runtime_pm);
+	intel_runtime_pm_driver_last_release(&i915->runtime_pm);
 }
 
 static bool suspend_to_idle(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8743153fad878..91491111dbd5a 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -52,182 +52,37 @@
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 
-#include <linux/sort.h>
-
-#define STACKDEPTH 8
-
-static noinline depot_stack_handle_t __save_depot_stack(void)
-{
-	unsigned long entries[STACKDEPTH];
-	unsigned int n;
-
-	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-	return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
-}
-
 static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	spin_lock_init(&rpm->debug.lock);
-	stack_depot_init();
+	ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev));
 }
 
-static noinline depot_stack_handle_t
+static intel_wakeref_t
 track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	depot_stack_handle_t stack, *stacks;
-	unsigned long flags;
-
-	if (rpm->no_wakeref_tracking)
-		return -1;
-
-	stack = __save_depot_stack();
-	if (!stack)
+	if (!rpm->available || rpm->no_wakeref_tracking)
 		return -1;
 
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-
-	if (!rpm->debug.count)
-		rpm->debug.last_acquire = stack;
-
-	stacks = krealloc(rpm->debug.owners,
-			  (rpm->debug.count + 1) * sizeof(*stacks),
-			  GFP_NOWAIT | __GFP_NOWARN);
-	if (stacks) {
-		stacks[rpm->debug.count++] = stack;
-		rpm->debug.owners = stacks;
-	} else {
-		stack = -1;
-	}
-
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	return stack;
+	return intel_ref_tracker_alloc(&rpm->debug);
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-					     depot_stack_handle_t stack)
+					     intel_wakeref_t wakeref)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
-	unsigned long flags, n;
-	bool found = false;
-
-	if (unlikely(stack == -1))
+	if (!rpm->available || rpm->no_wakeref_tracking)
 		return;
 
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-	for (n = rpm->debug.count; n--; ) {
-		if (rpm->debug.owners[n] == stack) {
-			memmove(rpm->debug.owners + n,
-				rpm->debug.owners + n + 1,
-				(--rpm->debug.count - n) * sizeof(stack));
-			found = true;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	if (drm_WARN(&i915->drm, !found,
-		     "Unmatched wakeref (tracking %lu), count %u\n",
-		     rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
-		char *buf;
-
-		buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-		if (!buf)
-			return;
-
-		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
-
-		stack = READ_ONCE(rpm->debug.last_release);
-		if (stack) {
-			stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-			DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
-		}
-
-		kfree(buf);
-	}
+	intel_ref_tracker_free(&rpm->debug, wakeref);
 }
 
-static int cmphandle(const void *_a, const void *_b)
+static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
 {
-	const depot_stack_handle_t * const a = _a, * const b = _b;
-
-	if (*a < *b)
-		return -1;
-	else if (*a > *b)
-		return 1;
-	else
-		return 0;
-}
-
-static void
-__print_intel_runtime_pm_wakeref(struct drm_printer *p,
-				 const struct intel_runtime_pm_debug *dbg)
-{
-	unsigned long i;
-	char *buf;
-
-	buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-	if (!buf)
-		return;
-
-	if (dbg->last_acquire) {
-		stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref last acquired:\n%s", buf);
-	}
-
-	if (dbg->last_release) {
-		stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref last released:\n%s", buf);
-	}
-
-	drm_printf(p, "Wakeref count: %lu\n", dbg->count);
-
-	sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
-
-	for (i = 0; i < dbg->count; i++) {
-		depot_stack_handle_t stack = dbg->owners[i];
-		unsigned long rep;
-
-		rep = 1;
-		while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
-			rep++, i++;
-		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
-	}
-
-	kfree(buf);
-}
-
-static noinline void
-__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
-		       struct intel_runtime_pm_debug *saved)
-{
-	*saved = *debug;
-
-	debug->owners = NULL;
-	debug->count = 0;
-	debug->last_release = __save_depot_stack();
-}
-
-static void
-dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
-{
-	if (debug->count) {
-		struct drm_printer p = drm_debug_printer("i915");
-
-		__print_intel_runtime_pm_wakeref(&p, debug);
-	}
-
-	kfree(debug->owners);
+	ref_tracker_dir_exit(&rpm->debug);
 }
 
 static noinline void
 __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 {
-	struct intel_runtime_pm_debug dbg = {};
 	unsigned long flags;
 
 	if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
@@ -235,60 +90,14 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 					 flags))
 		return;
 
-	__untrack_all_wakerefs(&rpm->debug, &dbg);
+	ref_tracker_dir_print_locked(&rpm->debug, INTEL_REFTRACK_PRINT_LIMIT);
 	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	dump_and_free_wakeref_tracking(&dbg);
-}
-
-static noinline void
-untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
-{
-	struct intel_runtime_pm_debug dbg = {};
-	unsigned long flags;
-
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-	__untrack_all_wakerefs(&rpm->debug, &dbg);
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	dump_and_free_wakeref_tracking(&dbg);
 }
 
 void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
 				    struct drm_printer *p)
 {
-	struct intel_runtime_pm_debug dbg = {};
-
-	do {
-		unsigned long alloc = dbg.count;
-		depot_stack_handle_t *s;
-
-		spin_lock_irq(&rpm->debug.lock);
-		dbg.count = rpm->debug.count;
-		if (dbg.count <= alloc) {
-			memcpy(dbg.owners,
-			       rpm->debug.owners,
-			       dbg.count * sizeof(*s));
-		}
-		dbg.last_acquire = rpm->debug.last_acquire;
-		dbg.last_release = rpm->debug.last_release;
-		spin_unlock_irq(&rpm->debug.lock);
-		if (dbg.count <= alloc)
-			break;
-
-		s = krealloc(dbg.owners,
-			     dbg.count * sizeof(*s),
-			     GFP_NOWAIT | __GFP_NOWARN);
-		if (!s)
-			goto out;
-
-		dbg.owners = s;
-	} while (1);
-
-	__print_intel_runtime_pm_wakeref(p, &dbg);
-
-out:
-	kfree(dbg.owners);
+	intel_ref_tracker_show(&rpm->debug, p);
 }
 
 #else
@@ -297,14 +106,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 }
 
-static depot_stack_handle_t
+static intel_wakeref_t
 track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 	return -1;
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-					     intel_wakeref_t wref)
+					     intel_wakeref_t wakeref)
 {
 }
 
@@ -639,7 +448,11 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 		 "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
 		 intel_rpm_raw_wakeref_count(count),
 		 intel_rpm_wakelock_count(count));
+}
 
+void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm)
+{
+	intel_runtime_pm_driver_release(rpm);
 	untrack_all_intel_runtime_pm_wakerefs(rpm);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index f79cda7a2503d..f3cb7d4c7000f 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -77,15 +77,7 @@ struct intel_runtime_pm {
 	 * paired rpm_put) we can remove corresponding pairs of and keep
 	 * the array trimmed to active wakerefs.
 	 */
-	struct intel_runtime_pm_debug {
-		spinlock_t lock;
-
-		depot_stack_handle_t last_acquire;
-		depot_stack_handle_t last_release;
-
-		depot_stack_handle_t *owners;
-		unsigned long count;
-	} debug;
+	struct ref_tracker_dir debug;
 #endif
 };
 
@@ -189,6 +181,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
+void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm);
 
 intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
 intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 623a690893868..2401b88b55a40 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -191,3 +191,31 @@ void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf)
 	intel_wakeref_auto(wf, 0);
 	INTEL_WAKEREF_BUG_ON(wf->wakeref);
 }
+
+void intel_ref_tracker_show(struct ref_tracker_dir *dir,
+			    struct drm_printer *p)
+{
+	const size_t buf_size = PAGE_SIZE;
+	char *buf, *sb, *se;
+	size_t count;
+
+	buf = kmalloc(buf_size, GFP_NOWAIT);
+	if (!buf)
+		return;
+
+	count = ref_tracker_dir_snprint(dir, buf, buf_size);
+	if (!count)
+		goto free;
+	/* printk does not like big buffers, so we split it */
+	for (sb = buf; *sb; sb = se + 1) {
+		se = strchrnul(sb, '\n');
+		drm_printf(p, "%.*s", (int)(se - sb + 1), sb);
+		if (!*se)
+			break;
+	}
+	if (count >= buf_size)
+		drm_printf(p, "\n...dropped %zd extra bytes of leak report.\n",
+			   count + 1 - buf_size);
+free:
+	kfree(buf);
+}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index ec881b0973689..909cad13ac1f7 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -7,16 +7,25 @@
 #ifndef INTEL_WAKEREF_H
 #define INTEL_WAKEREF_H
 
+#include <drm/drm_print.h>
+
 #include <linux/atomic.h>
 #include <linux/bitfield.h>
 #include <linux/bits.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/refcount.h>
+#include <linux/ref_tracker.h>
+#include <linux/slab.h>
 #include <linux/stackdepot.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 
+typedef unsigned long intel_wakeref_t;
+
+#define INTEL_REFTRACK_DEAD_COUNT 16
+#define INTEL_REFTRACK_PRINT_LIMIT 16
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr)
 #else
@@ -26,8 +35,6 @@
 struct intel_runtime_pm;
 struct intel_wakeref;
 
-typedef depot_stack_handle_t intel_wakeref_t;
-
 struct intel_wakeref_ops {
 	int (*get)(struct intel_wakeref *wf);
 	int (*put)(struct intel_wakeref *wf);
@@ -261,6 +268,30 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf)
  */
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf);
 
+#define INTEL_WAKEREF_DEF ((intel_wakeref_t)(-1))
+
+static inline intel_wakeref_t intel_ref_tracker_alloc(struct ref_tracker_dir *dir)
+{
+	struct ref_tracker *user = NULL;
+
+	ref_tracker_alloc(dir, &user, GFP_NOWAIT);
+
+	return (intel_wakeref_t)user ?: INTEL_WAKEREF_DEF;
+}
+
+static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir,
+					  intel_wakeref_t handle)
+{
+	struct ref_tracker *user;
+
+	user = (handle == INTEL_WAKEREF_DEF) ? NULL : (void *)handle;
+
+	ref_tracker_free(dir, &user);
+}
+
+void intel_ref_tracker_show(struct ref_tracker_dir *dir,
+			    struct drm_printer *p);
+
 struct intel_wakeref_auto {
 	struct drm_i915_private *i915;
 	struct timer_list timer;
-- 
GitLab


From 5e4e06e4087eb91b0e5405ed42e792415d055e45 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Mon, 30 Oct 2023 18:40:13 +0100
Subject: [PATCH 0330/2340] drm/i915: Track gt pm wakerefs

Track every intel_gt_pm_get() until its corresponding release in
intel_gt_pm_put() by returning a cookie to the caller for acquire that
must be passed by on released. When there is an imbalance, we can see who
either tried to free a stale wakeref, or who forgot to free theirs.

v2: track recently added calls in gen8_ggtt_bind_get_ce and
    destroyed_worker_func

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030-ref_tracker_i915-v1-2-006fe6b96421@intel.com
---
 drivers/gpu/drm/i915/Kconfig.debug            | 14 +++++++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 14 ++++---
 .../i915/gem/selftests/i915_gem_coherency.c   | 10 +++--
 .../drm/i915/gem/selftests/i915_gem_mman.c    | 14 ++++---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 13 +++++--
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  3 +-
 drivers/gpu/drm/i915/gt/intel_context.h       |  4 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  7 ++--
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  2 +
 .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c          | 16 ++++----
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         | 12 +++---
 drivers/gpu/drm/i915/gt/intel_gt_pm.h         | 38 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_engine_cs.c  | 20 ++++++----
 drivers/gpu/drm/i915/gt/selftest_gt_pm.c      |  5 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c      | 10 +++--
 drivers/gpu/drm/i915/gt/selftest_rps.c        | 17 +++++----
 drivers/gpu/drm/i915/gt/selftest_slpc.c       |  5 ++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 14 ++++---
 drivers/gpu/drm/i915/i915_pmu.c               | 16 ++++----
 drivers/gpu/drm/i915/intel_wakeref.c          |  7 +++-
 drivers/gpu/drm/i915/intel_wakeref.h          | 38 +++++++++++++++++--
 24 files changed, 197 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 8d82d7a1a9c5f..5b7162076850c 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -40,6 +40,7 @@ config DRM_I915_DEBUG
 	select DRM_I915_DEBUG_GEM_ONCE
 	select DRM_I915_DEBUG_MMIO
 	select DRM_I915_DEBUG_RUNTIME_PM
+	select DRM_I915_DEBUG_WAKEREF
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
 	default n
@@ -244,3 +245,16 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	  Recommended for driver developers only.
 
 	  If in doubt, say "N"
+
+config DRM_I915_DEBUG_WAKEREF
+	bool "Enable extra tracking for wakerefs"
+	depends on DRM_I915
+	select REF_TRACKER
+	select STACKDEPOT
+	select STACKTRACE
+	help
+	  Choose this option to turn on extra state checking and usage
+	  tracking for the wakerefPM functionality. This may introduce
+	  overhead during driver runtime.
+
+	  If in doubt, say "N"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 45b9d9e34b8b8..b1aa62dfb155d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -253,6 +253,8 @@ struct i915_execbuffer {
 	struct intel_gt *gt; /* gt for the execbuf */
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
+	intel_wakeref_t wakeref;
+	intel_wakeref_t wakeref_gt0;
 
 	/** our requests to build */
 	struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -2719,13 +2721,13 @@ eb_select_engine(struct i915_execbuffer *eb)
 
 	for_each_child(ce, child)
 		intel_context_get(child);
-	intel_gt_pm_get(gt);
+	eb->wakeref = intel_gt_pm_get(ce->engine->gt);
 	/*
 	 * Keep GT0 active on MTL so that i915_vma_parked() doesn't
 	 * free VMAs while execbuf ioctl is validating VMAs.
 	 */
 	if (gt->info.id)
-		intel_gt_pm_get(to_gt(gt->i915));
+		eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
 
 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
 		err = intel_context_alloc_state(ce);
@@ -2765,9 +2767,9 @@ eb_select_engine(struct i915_execbuffer *eb)
 
 err:
 	if (gt->info.id)
-		intel_gt_pm_put(to_gt(gt->i915));
+		intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
 
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(ce->engine->gt, eb->wakeref);
 	for_each_child(ce, child)
 		intel_context_put(child);
 	intel_context_put(ce);
@@ -2785,8 +2787,8 @@ eb_put_engine(struct i915_execbuffer *eb)
 	 * i915_vma_parked() from interfering while execbuf validates vmas.
 	 */
 	if (eb->gt->info.id)
-		intel_gt_pm_put(to_gt(eb->gt->i915));
-	intel_gt_pm_put(eb->gt);
+		intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
+	intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
 	for_each_child(eb->context, child)
 		intel_context_put(child);
 	intel_context_put(eb->context);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3bef1beec7cbb..3fd68a099a85e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -85,6 +85,7 @@ out:
 
 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	u32 __iomem *map;
 	int err = 0;
@@ -99,7 +100,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
@@ -112,12 +113,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	i915_vma_unpin_iomap(vma);
 
 out_rpm:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	u32 __iomem *map;
 	int err = 0;
@@ -132,7 +134,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
@@ -145,7 +147,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	i915_vma_unpin_iomap(vma);
 
 out_rpm:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 72957a36a36be..2c51a2c452fc5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
-	intel_gt_pm_get(to_gt(i915));
+	intel_gt_pm_get_untracked(to_gt(i915));
 	cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
 	igt_flush_test(i915);
-	intel_gt_pm_put(to_gt(i915));
+	intel_gt_pm_put_untracked(to_gt(i915));
 	i915_gem_driver_register__shrinker(i915);
 }
 
@@ -778,6 +778,7 @@ err_obj:
 
 static int gtt_set(struct drm_i915_gem_object *obj)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void __iomem *map;
 	int err = 0;
@@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
@@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 	i915_vma_unpin_iomap(vma);
 
 out:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
 static int gtt_check(struct drm_i915_gem_object *obj)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void __iomem *map;
 	int err = 0;
@@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
@@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 	i915_vma_unpin_iomap(vma);
 
 out:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index ecc990ec1b952..d650beb8ed22f 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b)
 
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * Since we are waiting on a request, the GPU should be busy
 	 * and should have its own rpm reference.
 	 */
-	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
+	wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
+	if (GEM_WARN_ON(!wakeref))
 		return;
 
 	/*
@@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 * which we can add a new waiter and avoid the cost of re-enabling
 	 * the irq.
 	 */
-	WRITE_ONCE(b->irq_armed, true);
+	WRITE_ONCE(b->irq_armed, wakeref);
 
 	/* Requests may have completed before we could enable the interrupt. */
 	if (!b->irq_enabled++ && b->irq_enable(b))
@@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 
 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 {
+	intel_wakeref_t wakeref = b->irq_armed;
+
 	GEM_BUG_ON(!b->irq_enabled);
 	if (!--b->irq_enabled)
 		b->irq_disable(b);
 
-	WRITE_ONCE(b->irq_armed, false);
-	intel_gt_pm_put_async(b->irq_engine->gt);
+	WRITE_ONCE(b->irq_armed, 0);
+	intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
 }
 
 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 72dfd3748c4c3..bdf09fd67b6e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "intel_engine_types.h"
+#include "intel_wakeref.h"
 
 /*
  * Rather than have every client wait upon all user interrupts,
@@ -43,7 +44,7 @@ struct intel_breadcrumbs {
 	spinlock_t irq_lock; /* protects the interrupt from hardirq context */
 	struct irq_work irq_work; /* for use from inside irq_lock */
 	unsigned int irq_enabled;
-	bool irq_armed;
+	intel_wakeref_t irq_armed;
 
 	/* Not all breadcrumbs are attached to physical HW */
 	intel_engine_mask_t	engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index a80e3b7c24ff9..25564c01507e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce)
 		return;
 
 	ce->ops->enter(ce);
-	intel_gt_pm_get(ce->vm->gt);
+	ce->wakeref = intel_gt_pm_get(ce->vm->gt);
 }
 
 static inline void intel_context_mark_active(struct intel_context *ce)
@@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce)
 	if (--ce->active_count)
 		return;
 
-	intel_gt_pm_put_async(ce->vm->gt);
+	intel_gt_pm_put_async(ce->vm->gt, ce->wakeref);
 	ce->ops->exit(ce);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index aceaac28a33eb..7eccbd70d89fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,7 @@
 #include "i915_utils.h"
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
+#include "intel_wakeref.h"
 
 #include "uc/intel_guc_fwif.h"
 
@@ -112,6 +113,7 @@ struct intel_context {
 	u32 ring_size;
 	struct intel_ring *ring;
 	struct intel_timeline *timeline;
+	intel_wakeref_t wakeref;
 
 	unsigned long flags;
 #define CONTEXT_BARRIER_BIT		0
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e91fc881dbf18..96bdb93a948d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 
 	ENGINE_TRACE(engine, "\n");
 
-	intel_gt_pm_get(engine->gt);
+	engine->wakeref_track = intel_gt_pm_get(engine->gt);
 
 	/* Discard stale context state from across idling */
 	ce = engine->kernel_context;
@@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq,
 	 */
 	GEM_BUG_ON(rq->context->active_count != 1);
 	__intel_gt_pm_get(engine->gt);
+	rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref);
 
 	/*
 	 * We have to serialise all potential retirement paths with our
@@ -285,7 +286,7 @@ static int __engine_park(struct intel_wakeref *wf)
 		engine->park(engine);
 
 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
-	intel_gt_pm_put_async(engine->gt);
+	intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
 	return 0;
 }
 
@@ -296,7 +297,7 @@ static const struct intel_wakeref_ops wf_ops = {
 
 void intel_engine_init__pm(struct intel_engine_cs *engine)
 {
-	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops);
+	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
 	intel_engine_init_heartbeat(engine);
 
 	intel_gsc_idle_msg_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8769760257fd9..960e6be2042fe 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -446,7 +446,9 @@ struct intel_engine_cs {
 	unsigned long serial;
 
 	unsigned long wakeref_serial;
+	intel_wakeref_t wakeref_track;
 	struct intel_wakeref wakeref;
+
 	struct file *default_state;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e8f42ec6b1b47..42aade0faf2d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	if (engine->fw_domain && !--engine->fw_active)
 		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
-	intel_gt_pm_put_async(engine->gt);
+	intel_gt_pm_put_async_untracked(engine->gt);
 
 	/*
 	 * If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 76b0ee2b906b8..21a7e3191c182 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -296,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
 	return intel_gt_is_bind_context_ready(gt);
 }
 
-static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
 {
 	struct intel_context *ce;
 	struct intel_gt *gt = ggtt->vm.gt;
@@ -313,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
 	 * would conflict with fs_reclaim trying to allocate memory while
 	 * doing rpm_resume().
 	 */
-	if (!intel_gt_pm_get_if_awake(gt))
+	*wakeref = intel_gt_pm_get_if_awake(gt);
+	if (!*wakeref)
 		return NULL;
 
 	intel_engine_pm_get(ce->engine);
@@ -321,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
 	return ce;
 }
 
-static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
 {
 	intel_engine_pm_put(ce->engine);
-	intel_gt_pm_put(ce->engine->gt);
+	intel_gt_pm_put(ce->engine->gt, wakeref);
 }
 
 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
@@ -337,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
 	struct sgt_iter iter;
 	struct i915_request *rq;
 	struct intel_context *ce;
+	intel_wakeref_t wakeref;
 	u32 *cs;
 
 	if (!num_entries)
 		return true;
 
-	ce = gen8_ggtt_bind_get_ce(ggtt);
+	ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
 	if (!ce)
 		return false;
 
@@ -418,13 +420,13 @@ queue_err_rq:
 		offset += n_ptes;
 	}
 
-	gen8_ggtt_bind_put_ce(ce);
+	gen8_ggtt_bind_put_ce(ce, wakeref);
 	return true;
 
 err_rq:
 	i915_request_put(rq);
 put_ce:
-	gen8_ggtt_bind_put_ce(ce);
+	gen8_ggtt_bind_put_ce(ce, wakeref);
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 471b7cdc10ba0..220ac4f92edfc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -28,19 +28,20 @@
 static void user_forcewake(struct intel_gt *gt, bool suspend)
 {
 	int count = atomic_read(&gt->user_wakeref);
+	intel_wakeref_t wakeref;
 
 	/* Inside suspend/resume so single threaded, no races to worry about. */
 	if (likely(!count))
 		return;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	if (suspend) {
 		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
 		atomic_sub(count, &gt->wakeref.count);
 	} else {
 		atomic_add(count, &gt->wakeref.count);
 	}
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 }
 
 static void runtime_begin(struct intel_gt *gt)
@@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
 	 * runtime_pm is per-device rather than per-tile, so this is still the
 	 * correct structure.
 	 */
-	intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops);
+	intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops, "GT");
 	seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
 }
 
@@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err;
 
 	err = intel_gt_has_unrecoverable_error(gt);
@@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt)
 	 */
 	gt_sanitize(gt, true);
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	intel_rc6_sanitize(&gt->rc6);
@@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 out_fw:
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 	intel_gt_bind_context_set_ready(gt);
 	return err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b1eeb5b33918c..911fd0160221b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
 	return intel_wakeref_is_active(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_get(struct intel_gt *gt)
+static inline void intel_gt_pm_get_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_get(&gt->wakeref);
 }
 
+static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt)
+{
+	intel_gt_pm_get_untracked(gt);
+	return intel_wakeref_track(&gt->wakeref);
+}
+
 static inline void __intel_gt_pm_get(struct intel_gt *gt)
 {
 	__intel_wakeref_get(&gt->wakeref);
 }
 
-static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
 {
-	return intel_wakeref_get_if_active(&gt->wakeref);
+	if (!intel_wakeref_get_if_active(&gt->wakeref))
+		return 0;
+
+	return intel_wakeref_track(&gt->wakeref);
 }
 
 static inline void intel_gt_pm_might_get(struct intel_gt *gt)
@@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt)
 	intel_wakeref_might_get(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_put(struct intel_gt *gt)
+static inline void intel_gt_pm_put_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_put(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle)
+{
+	intel_wakeref_untrack(&gt->wakeref, handle);
+	intel_gt_pm_put_untracked(gt);
+}
+
+static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_put_async(&gt->wakeref);
 }
@@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
 	intel_wakeref_might_put(&gt->wakeref);
 }
 
-#define with_intel_gt_pm(gt, tmp) \
-	for (tmp = 1, intel_gt_pm_get(gt); tmp; \
-	     intel_gt_pm_put(gt), tmp = 0)
+static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle)
+{
+	intel_wakeref_untrack(&gt->wakeref, handle);
+	intel_gt_pm_put_async_untracked(gt);
+}
+
+#define with_intel_gt_pm(gt, wf) \
+	for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)
 
 /**
  * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
  * @wf: pointer to a temporary wakeref.
  */
 #define with_intel_gt_pm_if_awake(gt, wf) \
-	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)
 
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index f900cc68d6d98..7114c116e9284 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -27,7 +27,7 @@
 void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
 {
 	atomic_inc(&gt->user_wakeref);
-	intel_gt_pm_get(gt);
+	intel_gt_pm_get_untracked(gt);
 	if (GRAPHICS_VER(gt->i915) >= 6)
 		intel_uncore_forcewake_user_get(gt->uncore);
 }
@@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
 {
 	if (GRAPHICS_VER(gt->i915) >= 6)
 		intel_uncore_forcewake_user_put(gt->uncore);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put_untracked(gt);
 	atomic_dec(&gt->user_wakeref);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 86cecf7a11054..5ffa5e30f4192 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B)
 	return *a - *b;
 }
 
-static void perf_begin(struct intel_gt *gt)
+static intel_wakeref_t perf_begin(struct intel_gt *gt)
 {
-	intel_gt_pm_get(gt);
+	intel_wakeref_t wakeref = intel_gt_pm_get(gt);
 
 	/* Boost gpufreq to max [waitboost] and keep it fixed */
 	atomic_inc(&gt->rps.num_waiters);
 	queue_work(gt->i915->unordered_wq, &gt->rps.work);
 	flush_work(&gt->rps.work);
+
+	return wakeref;
 }
 
-static int perf_end(struct intel_gt *gt)
+static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref)
 {
 	atomic_dec(&gt->rps.num_waiters);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return igt_flush_test(gt->i915);
 }
@@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	perf_begin(gt);
+	wakeref = perf_begin(gt);
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce = engine->kernel_context;
 		struct i915_vma *batch;
@@ -207,7 +210,7 @@ out:
 		pr_info("%s: MI_BB_START cycles: %u\n",
 			engine->name, trifilter(cycles));
 	}
-	if (perf_end(gt))
+	if (perf_end(gt, wakeref))
 		err = -EIO;
 
 	return err;
@@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	perf_begin(gt);
+	wakeref = perf_begin(gt);
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce = engine->kernel_context;
 		struct i915_vma *base, *nop;
@@ -364,7 +368,7 @@ out:
 		pr_info("%s: 16K MI_NOOP cycles: %u\n",
 			engine->name, trifilter(cycles));
 	}
-	if (perf_end(gt))
+	if (perf_end(gt, wakeref))
 		err = -EIO;
 
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 0971241707ce8..33351deeea4f0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (!gt->clock_frequency) { /* unknown */
@@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg)
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
 	for_each_engine(engine, gt, id) {
@@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg)
 	}
 
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 79aa6ac66ad2f..f40de408cd3a9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg)
 {
 	struct intel_gt *gt = arg;
 	const typeof(*igt_atomic_phases) *p;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	igt_global_reset_lock(gt);
 
 	/* Flush any requests before we get started and check basics */
@@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg)
 
 unlock:
 	igt_global_reset_unlock(gt);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
@@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg)
 	const typeof(*igt_atomic_phases) *p;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
@@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg)
 	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	igt_global_reset_lock(gt);
 
 	/* Flush any requests before we get started and check basics */
@@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg)
 
 out_unlock:
 	igt_global_reset_unlock(gt);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index fb30f733b0366..dcef8d4989197 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
@@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg)
 	saved_work = rps->work.func;
 	rps->work.func = dummy_rps_work;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	intel_rps_disable(&gt->rps);
 
 	intel_gt_check_clock_frequency(gt);
@@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg)
 	}
 
 	intel_rps_enable(&gt->rps);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	igt_spinner_fini(&spin);
 
@@ -376,6 +377,7 @@ int live_rps_control(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/*
@@ -398,7 +400,7 @@ int live_rps_control(void *arg)
 	saved_work = rps->work.func;
 	rps->work.func = dummy_rps_work;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 		ktime_t min_dt, max_dt;
@@ -488,7 +490,7 @@ int live_rps_control(void *arg)
 			break;
 		}
 	}
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	igt_spinner_fini(&spin);
 
@@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	u32 pm_events;
 	int err = 0;
 
@@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg)
 	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
 		return 0;
 
-	intel_gt_pm_get(gt);
-	pm_events = rps->pm_events;
-	intel_gt_pm_put(gt);
+	pm_events = 0;
+	with_intel_gt_pm(gt, wakeref)
+		pm_events = rps->pm_events;
 	if (!pm_events) {
 		pr_err("No RPS PM events registered, but RPS is enabled?\n");
 		return -ENODEV;
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 952c8d52d68a2..302d0540295d7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	struct intel_rps *rps = &gt->rps;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	struct igt_spinner spin;
 	u32 slpc_min_freq, slpc_max_freq;
 	int err = 0;
@@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	}
 
 	intel_gt_pm_wait_for_idle(gt);
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 		u32 max_act_freq;
@@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 	igt_spinner_fini(&spin);
 	intel_gt_pm_wait_for_idle(gt);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d37698bd6b91a..04f8377fd7a39 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 			if (deregister)
 				guc_signal_context_fence(ce);
 			if (destroyed) {
-				intel_gt_pm_put_async(guc_to_gt(guc));
+				intel_gt_pm_put_async_untracked(guc_to_gt(guc));
 				release_guc_id(guc, ce);
 				__guc_context_destroy(ce);
 			}
@@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	unsigned long flags;
 	u32 reset_count;
 	bool in_reset;
+	intel_wakeref_t wakeref;
 
 	spin_lock_irqsave(&guc->timestamp.lock, flags);
 
@@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	 * start_gt_clk is derived from GuC state. To get a consistent
 	 * view of activity, we query the GuC state only if gt is awake.
 	 */
-	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+	wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+	if (wakeref) {
 		stats_saved = *stats;
 		gt_stamp_saved = guc->timestamp.gt_stamp;
 		/*
@@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 		 */
 		guc_update_engine_gt_clks(engine);
 		guc_update_pm_timestamp(guc, now);
-		intel_gt_pm_put_async(gt);
+		intel_gt_pm_put_async(gt, wakeref);
 		if (i915_reset_count(gpu_error) != reset_count) {
 			*stats = stats_saved;
 			guc->timestamp.gt_stamp = gt_stamp_saved;
@@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w)
 	struct intel_guc *guc = container_of(w, struct intel_guc,
 					     submission_state.destroyed_worker);
 	struct intel_gt *gt = guc_to_gt(guc);
-	int tmp;
+	intel_wakeref_t wakeref;
 
-	with_intel_gt_pm(gt, tmp)
+	with_intel_gt_pm(gt, wakeref)
 		deregister_destroyed_contexts(guc);
 }
 
@@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
 		intel_context_put(ce);
 	} else if (context_destroyed(ce)) {
 		/* Context has been destroyed */
-		intel_gt_pm_put_async(guc_to_gt(guc));
+		intel_gt_pm_put_async_untracked(guc_to_gt(guc));
 		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 	}
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 878a27e1c8eff..21eb0c5b320d5 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -223,19 +223,19 @@ static u64 get_rc6(struct intel_gt *gt)
 	struct drm_i915_private *i915 = gt->i915;
 	const unsigned int gt_id = gt->info.id;
 	struct i915_pmu *pmu = &i915->pmu;
+	intel_wakeref_t wakeref;
 	unsigned long flags;
-	bool awake = false;
 	u64 val;
 
-	if (intel_gt_pm_get_if_awake(gt)) {
+	wakeref = intel_gt_pm_get_if_awake(gt);
+	if (wakeref) {
 		val = __get_rc6(gt);
-		intel_gt_pm_put_async(gt);
-		awake = true;
+		intel_gt_pm_put_async(gt, wakeref);
 	}
 
 	spin_lock_irqsave(&pmu->lock, flags);
 
-	if (awake) {
+	if (wakeref) {
 		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
 	} else {
 		/*
@@ -439,12 +439,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 	const unsigned int gt_id = gt->info.id;
 	struct i915_pmu *pmu = &i915->pmu;
 	struct intel_rps *rps = &gt->rps;
+	intel_wakeref_t wakeref;
 
 	if (!frequency_sampling_enabled(pmu, gt_id))
 		return;
 
 	/* Report 0/0 (actual/requested) frequency while parked. */
-	if (!intel_gt_pm_get_if_awake(gt))
+	wakeref = intel_gt_pm_get_if_awake(gt);
+	if (!wakeref)
 		return;
 
 	if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
@@ -473,7 +475,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 				period_ns / 1000);
 	}
 
-	intel_gt_pm_put_async(gt);
+	intel_gt_pm_put_async(gt, wakeref);
 }
 
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 2401b88b55a40..dea2f63184f89 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -99,7 +99,8 @@ static void __intel_wakeref_put_work(struct work_struct *wrk)
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct drm_i915_private *i915,
 			  const struct intel_wakeref_ops *ops,
-			  struct intel_wakeref_lockclass *key)
+			  struct intel_wakeref_lockclass *key,
+			  const char *name)
 {
 	wf->i915 = i915;
 	wf->ops = ops;
@@ -111,6 +112,10 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
 	INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work);
 	lockdep_init_map(&wf->work.work.lockdep_map,
 			 "wakeref.work", &key->work, 0);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+	ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name);
+#endif
 }
 
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 909cad13ac1f7..68aa3be482515 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -50,6 +50,10 @@ struct intel_wakeref {
 	const struct intel_wakeref_ops *ops;
 
 	struct delayed_work work;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+	struct ref_tracker_dir debug;
+#endif
 };
 
 struct intel_wakeref_lockclass {
@@ -60,11 +64,12 @@ struct intel_wakeref_lockclass {
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct drm_i915_private *i915,
 			  const struct intel_wakeref_ops *ops,
-			  struct intel_wakeref_lockclass *key);
-#define intel_wakeref_init(wf, i915, ops) do {				\
+			  struct intel_wakeref_lockclass *key,
+			  const char *name);
+#define intel_wakeref_init(wf, i915, ops, name) do {			\
 	static struct intel_wakeref_lockclass __key;			\
 									\
-	__intel_wakeref_init((wf), (i915), (ops), &__key);		\
+	__intel_wakeref_init((wf), (i915), (ops), &__key, name);	\
 } while (0)
 
 int __intel_wakeref_get_first(struct intel_wakeref *wf);
@@ -292,6 +297,33 @@ static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir,
 void intel_ref_tracker_show(struct ref_tracker_dir *dir,
 			    struct drm_printer *p);
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+	return intel_ref_tracker_alloc(&wf->debug);
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+					 intel_wakeref_t handle)
+{
+	intel_ref_tracker_free(&wf->debug, handle);
+}
+
+#else
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+	return -1;
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+					 intel_wakeref_t handle)
+{
+}
+
+#endif
+
 struct intel_wakeref_auto {
 	struct drm_i915_private *i915;
 	struct timer_list timer;
-- 
GitLab


From fcd479a79120bf0cd507d85f898297a3b868dda6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 14 Nov 2023 16:23:33 +0200
Subject: [PATCH 0331/2340] drm/i915: Also check for VGA converter in eDP probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unfortunately even the HPD based detection added in
commit cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe")
fails to detect that the VBT's eDP/DDI-A is a ghost on
Asus B360M-A (CFL+CNP). On that board eDP/DDI-A has its HPD
asserted despite nothing being actually connected there :(
The straps/fuses also indicate that the eDP port is present.

So if one boots with a VGA monitor connected the eDP probe will
mistake the DP->VGA converter hooked to DDI-E for an eDP panel
on DDI-A.

As a last resort check what kind of DP device we've detected,
and if it looks like a DP->VGA converter then conclude that
the eDP port should be ignored.

Cc: stable@vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9636
Fixes: cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114142333.15799-1-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index effb3da291e15..3867a0d9e0043 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -6270,8 +6270,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	 * (eg. Acer Chromebook C710), so we'll check it only if multiple
 	 * ports are attempting to use the same AUX CH, according to VBT.
 	 */
-	if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) &&
-	    !intel_digital_port_connected(encoder)) {
+	if (intel_bios_dp_has_shared_aux_ch(encoder->devdata)) {
 		/*
 		 * If this fails, presume the DPCD answer came
 		 * from some other port using the same AUX CH.
@@ -6279,10 +6278,27 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 		 * FIXME maybe cleaner to check this before the
 		 * DPCD read? Would need sort out the VDD handling...
 		 */
-		drm_info(&dev_priv->drm,
-			 "[ENCODER:%d:%s] HPD is down, disabling eDP\n",
-			 encoder->base.base.id, encoder->base.name);
-		goto out_vdd_off;
+		if (!intel_digital_port_connected(encoder)) {
+			drm_info(&dev_priv->drm,
+				 "[ENCODER:%d:%s] HPD is down, disabling eDP\n",
+				 encoder->base.base.id, encoder->base.name);
+			goto out_vdd_off;
+		}
+
+		/*
+		 * Unfortunately even the HPD based detection fails on
+		 * eg. Asus B360M-A (CFL+CNP), so as a last resort fall
+		 * back to checking for a VGA branch device. Only do this
+		 * on known affected platforms to minimize false positives.
+		 */
+		if (DISPLAY_VER(dev_priv) == 9 && drm_dp_is_branch(intel_dp->dpcd) &&
+		    (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK) ==
+		    DP_DWN_STRM_PORT_TYPE_ANALOG) {
+			drm_info(&dev_priv->drm,
+				 "[ENCODER:%d:%s] VGA converter detected, disabling eDP\n",
+				 encoder->base.base.id, encoder->base.name);
+			goto out_vdd_off;
+		}
 	}
 
 	mutex_lock(&dev_priv->drm.mode_config.mutex);
-- 
GitLab


From 7521c8a657ba5c48ccd39cde7102a001fb0d9c70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 17 Nov 2023 19:18:31 +0200
Subject: [PATCH 0332/2340] drm/i915/fbc: Split plane size vs. surface size
 checks apart
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do separate checks for the visible plane size vs. the surface
size (which I take to mean offset+size). For now both use the
same max w/h, but we can relax the surface size limits as
a followup.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117171833.25816-1-ville.syrjala@linux.intel.com
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 32 +++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index b73cf1c5ba33d..fe21371db38c7 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1050,6 +1050,31 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
+static bool intel_fbc_plane_size_valid(const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
+	unsigned int w, h, max_w, max_h;
+
+	if (DISPLAY_VER(i915) >= 10) {
+		max_w = 5120;
+		max_h = 4096;
+	} else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) {
+		max_w = 4096;
+		max_h = 4096;
+	} else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) {
+		max_w = 4096;
+		max_h = 2048;
+	} else {
+		max_w = 2048;
+		max_h = 1536;
+	}
+
+	w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	h = drm_rect_height(&plane_state->uapi.src) >> 16;
+
+	return w <= max_w && h <= max_h;
+}
+
 static bool i8xx_fbc_tiling_valid(const struct intel_plane_state *plane_state)
 {
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
@@ -1247,11 +1272,16 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
 		return 0;
 	}
 
-	if (!intel_fbc_hw_tracking_covers_screen(plane_state)) {
+	if (!intel_fbc_plane_size_valid(plane_state)) {
 		plane_state->no_fbc_reason = "plane size too big";
 		return 0;
 	}
 
+	if (!intel_fbc_hw_tracking_covers_screen(plane_state)) {
+		plane_state->no_fbc_reason = "surface size too big";
+		return 0;
+	}
+
 	/*
 	 * Work around a problem on GEN9+ HW, where enabling FBC on a plane
 	 * having a Y offset that isn't divisible by 4 causes FIFO underrun
-- 
GitLab


From 5c38280cb73ef351c4f92ea06e0fa65847f87185 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 17 Nov 2023 19:18:32 +0200
Subject: [PATCH 0333/2340] drm/i915/fbc: Bump max surface size to 8kx4k on
 icl+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FBC on icl+ should supposedly be fine with surface sizes up to
8kx4k. Bump up the limit.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117171833.25816-2-ville.syrjala@linux.intel.com
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index fe21371db38c7..0ac222eaddd2a 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1028,7 +1028,10 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *
 	struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
 	unsigned int effective_w, effective_h, max_w, max_h;
 
-	if (DISPLAY_VER(i915) >= 10) {
+	if (DISPLAY_VER(i915) >= 11) {
+		max_w = 8192;
+		max_h = 4096;
+	} else if (DISPLAY_VER(i915) >= 10) {
 		max_w = 5120;
 		max_h = 4096;
 	} else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) {
-- 
GitLab


From f1dfb517cc5731b10aab3309629bfe80596a0d49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 17 Nov 2023 19:18:33 +0200
Subject: [PATCH 0334/2340] drm/i915/fbc: Bump ivb FBC max surface size to
 4kx4k
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IVB Bspec says:
"Frame Buffer Compression is only supported with memory surfaces of 4096 lines
or less and pipe source sizes of 4096 pixels by 2048 lines or less. "

so seems like we should be able to bump the offset+size limit to
at least 4kx4k. Make it so.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117171833.25816-3-ville.syrjala@linux.intel.com
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 0ac222eaddd2a..63f389a1707d9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1034,7 +1034,7 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *
 	} else if (DISPLAY_VER(i915) >= 10) {
 		max_w = 5120;
 		max_h = 4096;
-	} else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) {
+	} else if (DISPLAY_VER(i915) >= 7) {
 		max_w = 4096;
 		max_h = 4096;
 	} else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) {
-- 
GitLab


From a32324280474b8279ac28aee672f45de6ab755a5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 16 Nov 2023 11:14:10 -0800
Subject: [PATCH 0335/2340] dma-buf: Replace strlcpy() with strscpy()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

strlcpy() reads the entire source buffer first. This read may exceed
the destination size limit. This is both inefficient and can lead
to linear read overflows if a source string is not NUL-terminated[1].
Additionally, it returns the size of the source string, not the
resulting size of the destination string. In an effort to remove strlcpy()
completely[2], replace strlcpy() here with strscpy().

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [1]
Link: https://github.com/KSPP/linux/issues/89 [2]
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Azeem Shaikh <azeemshaikh38@gmail.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Acked-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116191409.work.634-kees@kernel.org
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 21916bba77d58..8fe5aa67b1678 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -46,12 +46,12 @@ static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct dma_buf *dmabuf;
 	char name[DMA_BUF_NAME_LEN];
-	size_t ret = 0;
+	ssize_t ret = 0;
 
 	dmabuf = dentry->d_fsdata;
 	spin_lock(&dmabuf->name_lock);
 	if (dmabuf->name)
-		ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN);
+		ret = strscpy(name, dmabuf->name, sizeof(name));
 	spin_unlock(&dmabuf->name_lock);
 
 	return dynamic_dname(buffer, buflen, "/%s:%s",
-- 
GitLab


From 5fc6aa7db080fd90ef00846aac04e8a211088132 Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Wed, 25 Oct 2023 21:32:46 +0000
Subject: [PATCH 0336/2340] drm/rockchip: vop2: Add NV20 and NV30 support

Add support for the 10-bit 4:2:2 and 4:4:4 formats NV20 and NV30.

These formats can be tested using modetest [1]:

  modetest -P <plane_id>@<crtc_id>:1920x1080@<format>

e.g. on a ROCK 3 Model A (rk3568):

  modetest -P 43@67:1920x1080@NV20 -F tiles,tiles
  modetest -P 43@67:1920x1080@NV30 -F smpte,smpte

[1] https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/329

Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
Tested-by: Christopher Obbard <chris.obbard@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231025213248.2641962-1-jonas@kwiboo.se
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++
 drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 6862fb146ace9..312da57833620 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -325,11 +325,14 @@ static enum vop2_data_format vop2_convert_format(u32 format)
 	case DRM_FORMAT_NV16:
 	case DRM_FORMAT_NV61:
 		return VOP2_FMT_YUV422SP;
+	case DRM_FORMAT_NV20:
 	case DRM_FORMAT_Y210:
 		return VOP2_FMT_YUV422SP_10;
 	case DRM_FORMAT_NV24:
 	case DRM_FORMAT_NV42:
 		return VOP2_FMT_YUV444SP;
+	case DRM_FORMAT_NV30:
+		return VOP2_FMT_YUV444SP_10;
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_YVYU:
 		return VOP2_FMT_VYUY422;
@@ -414,6 +417,8 @@ static bool vop2_win_uv_swap(u32 format)
 	case DRM_FORMAT_NV16:
 	case DRM_FORMAT_NV24:
 	case DRM_FORMAT_NV15:
+	case DRM_FORMAT_NV20:
+	case DRM_FORMAT_NV30:
 	case DRM_FORMAT_YUYV:
 	case DRM_FORMAT_UYVY:
 		return true;
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 22288ad7f3264..2c45d81983a5a 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -48,8 +48,10 @@ static const uint32_t formats_rk356x_esmart[] = {
 	DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
 	DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
 	DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
 	DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
 	DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
 	DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
 	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
 };
-- 
GitLab


From 1044f4a31734eef000f42cdaaf35bb2f76286be5 Mon Sep 17 00:00:00 2001
From: Johan Jonker <jbx6244@gmail.com>
Date: Thu, 2 Nov 2023 14:41:48 +0100
Subject: [PATCH 0337/2340] drm/rockchip: rk3066_hdmi: Remove useless
 mode_fixup

The mode_fixup implementation doesn't do anything, so we can simply
remove it.

Signed-off-by: Johan Jonker <jbx6244@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/5649ac03-db92-42a9-d86a-76dfa1af7c64@gmail.com
---
 drivers/gpu/drm/rockchip/rk3066_hdmi.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index fa6e592e0276c..5c269081c6919 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -434,14 +434,6 @@ static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder)
 	rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A);
 }
 
-static bool
-rk3066_hdmi_encoder_mode_fixup(struct drm_encoder *encoder,
-			       const struct drm_display_mode *mode,
-			       struct drm_display_mode *adj_mode)
-{
-	return true;
-}
-
 static int
 rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
 				 struct drm_crtc_state *crtc_state,
@@ -459,7 +451,6 @@ static const
 struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
 	.enable       = rk3066_hdmi_encoder_enable,
 	.disable      = rk3066_hdmi_encoder_disable,
-	.mode_fixup   = rk3066_hdmi_encoder_mode_fixup,
 	.mode_set     = rk3066_hdmi_encoder_mode_set,
 	.atomic_check = rk3066_hdmi_encoder_atomic_check,
 };
-- 
GitLab


From ae3436a5e7c2ef4f92938133bd99f92fc47ea34e Mon Sep 17 00:00:00 2001
From: Johan Jonker <jbx6244@gmail.com>
Date: Thu, 2 Nov 2023 14:42:04 +0100
Subject: [PATCH 0338/2340] drm/rockchip: rk3066_hdmi: Switch encoder hooks to
 atomic

The rk3066_hdmi encoder still uses the non atomic variants
of enable and disable. Convert to their atomic equivalents.
In atomic mode there is no need to save the adjusted mode,
so remove the mode_set function.

Signed-off-by: Johan Jonker <jbx6244@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/034c3446-d619-f4c3-3aaa-ab51dc19d07f@gmail.com
---
 drivers/gpu/drm/rockchip/rk3066_hdmi.c | 35 +++++++++++++-------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index 5c269081c6919..0e7aae3419608 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -55,7 +55,6 @@ struct rk3066_hdmi {
 	unsigned int tmdsclk;
 
 	struct hdmi_data_info hdmi_data;
-	struct drm_display_mode previous_mode;
 };
 
 static struct rk3066_hdmi *encoder_to_rk3066_hdmi(struct drm_encoder *encoder)
@@ -387,21 +386,21 @@ static int rk3066_hdmi_setup(struct rk3066_hdmi *hdmi,
 	return 0;
 }
 
-static void
-rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder,
-			     struct drm_display_mode *mode,
-			     struct drm_display_mode *adj_mode)
+static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder,
+				       struct drm_atomic_state *state)
 {
 	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
+	struct drm_connector_state *conn_state;
+	struct drm_crtc_state *crtc_state;
+	int mux, val;
 
-	/* Store the display mode for plugin/DPMS poweron events. */
-	drm_mode_copy(&hdmi->previous_mode, adj_mode);
-}
+	conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
+	if (WARN_ON(!conn_state))
+		return;
 
-static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
-{
-	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
-	int mux, val;
+	crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
+	if (WARN_ON(!crtc_state))
+		return;
 
 	mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
 	if (mux)
@@ -414,10 +413,11 @@ static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
 	DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder enable select: vop%s\n",
 		      (mux) ? "1" : "0");
 
-	rk3066_hdmi_setup(hdmi, &hdmi->previous_mode);
+	rk3066_hdmi_setup(hdmi, &crtc_state->adjusted_mode);
 }
 
-static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder)
+static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder,
+					struct drm_atomic_state *state)
 {
 	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
 
@@ -449,10 +449,9 @@ rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
 
 static const
 struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
-	.enable       = rk3066_hdmi_encoder_enable,
-	.disable      = rk3066_hdmi_encoder_disable,
-	.mode_set     = rk3066_hdmi_encoder_mode_set,
-	.atomic_check = rk3066_hdmi_encoder_atomic_check,
+	.atomic_check   = rk3066_hdmi_encoder_atomic_check,
+	.atomic_enable  = rk3066_hdmi_encoder_enable,
+	.atomic_disable = rk3066_hdmi_encoder_disable,
 };
 
 static enum drm_connector_status
-- 
GitLab


From 5f35a624c1e30b5bae5023b3c256e94e0ad4f806 Mon Sep 17 00:00:00 2001
From: Abhinav Singh <singhabhinav9051571833@gmail.com>
Date: Tue, 14 Nov 2023 00:43:03 +0530
Subject: [PATCH 0339/2340] drm/nouveau/fence:: fix warning directly
 dereferencing a rcu pointer

Fix a sparse warning with this message
"warning:dereference of noderef expression". In this context it means we
are dereferencing a __rcu tagged pointer directly.

We should not be directly dereferencing a rcu pointer. To get a normal
(non __rcu tagged pointer) from a __rcu tagged pointer we are using the
function unrcu_pointer(...). The non __rcu tagged pointer then can be
dereferenced just like a normal pointer.

I tested with qemu with this command
qemu-system-x86_64 \
	-m 2G \
	-smp 2 \
	-kernel bzImage \
	-append "console=ttyS0 root=/dev/sda earlyprintk=serial net.ifnames=0" \
	-drive file=bullseye.img,format=raw \
	-net user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 \
	-net nic,model=e1000 \
	-enable-kvm \
	-nographic \
	-pidfile vm.pid \
	2>&1 | tee vm.log
with lockdep enabled.

Fixes: 0ec5f02f0e2c ("drm/nouveau: prevent stale fence->channel pointers, and protect with rcu")
Signed-off-by: Abhinav Singh <singhabhinav9051571833@gmail.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113191303.3277733-1-singhabhinav9051571833@gmail.com
---
 drivers/gpu/drm/nouveau/nv04_fence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 5b71a5a5cd85c..cdbc75e3d1f66 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -39,7 +39,7 @@ struct nv04_fence_priv {
 static int
 nv04_fence_emit(struct nouveau_fence *fence)
 {
-	struct nvif_push *push = fence->channel->chan.push;
+	struct nvif_push *push = unrcu_pointer(fence->channel)->chan.push;
 	int ret = PUSH_WAIT(push, 2);
 	if (ret == 0) {
 		PUSH_NVSQ(push, NV_SW, 0x0150, fence->base.seqno);
-- 
GitLab


From 548b61a8ce18dec8757fcc112eac5bd125161408 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Wed, 15 Nov 2023 14:44:09 -0800
Subject: [PATCH 0340/2340] drm/msm/gpu: Move gpu devcore's to gpu device

The dpu devcore's are already associated with the dpu device.  So we
should associate the gpu devcore's with the gpu device, for easier
classification.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/567738/
---
 drivers/gpu/drm/msm/msm_gpu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 7f64c66673002..2b7c9db3ded3f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -292,8 +292,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
 	/* Set the active crash state to be dumped on failure */
 	gpu->crashstate = state;
 
-	/* FIXME: Release the crashstate if this errors out? */
-	dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
+	dev_coredumpm(&gpu->pdev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
 		msm_gpu_devcoredump_read, msm_gpu_devcoredump_free);
 }
 #else
-- 
GitLab


From 4bea53b9c7c72fd12a0ceebe88a71723c0a514b8 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Fri, 17 Nov 2023 07:14:19 -0800
Subject: [PATCH 0341/2340] drm/msm: Reduce fallout of fence signaling vs
 reclaim hangs

Until various PM devfreq/QoS and interconnect patches land, we could
potentially trigger reclaim from gpu scheduler thread, and under enough
memory pressure that could trigger a sort of deadlock.  Eventually the
wait will timeout and we'll move on to consider other GEM objects.  But
given that there is still a potential for deadlock/stalling, we should
reduce the timeout to contain the damage.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/568031/
---
 drivers/gpu/drm/msm/msm_gem_shrinker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 5a7d48c02c4b9..07ca4ddfe4e37 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -75,7 +75,7 @@ static bool
 wait_for_idle(struct drm_gem_object *obj)
 {
 	enum dma_resv_usage usage = dma_resv_usage_rw(true);
-	return dma_resv_wait_timeout(obj->resv, usage, false, 1000) > 0;
+	return dma_resv_wait_timeout(obj->resv, usage, false, 10) > 0;
 }
 
 static bool
-- 
GitLab


From 12578c075f89d6bd1b8af21751fbc2e1f78d2ce0 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Fri, 17 Nov 2023 07:24:28 -0800
Subject: [PATCH 0342/2340] drm/msm/gpu: Skip retired submits in recover worker

If we somehow raced with submit retiring, either while waiting for
worker to have a chance to run or acquiring the gpu lock, then the
recover worker should just bail.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/568034/
---
 drivers/gpu/drm/msm/msm_gpu.c | 41 +++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 2b7c9db3ded3f..095390774f22b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -365,29 +365,31 @@ static void recover_worker(struct kthread_work *work)
 	DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 
 	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
-	if (submit) {
-		/* Increment the fault counts */
-		submit->queue->faults++;
-		if (submit->aspace)
-			submit->aspace->faults++;
 
-		get_comm_cmdline(submit, &comm, &cmd);
+	/*
+	 * If the submit retired while we were waiting for the worker to run,
+	 * or waiting to acquire the gpu lock, then nothing more to do.
+	 */
+	if (!submit)
+		goto out_unlock;
 
-		if (comm && cmd) {
-			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
-				gpu->name, comm, cmd);
+	/* Increment the fault counts */
+	submit->queue->faults++;
+	if (submit->aspace)
+		submit->aspace->faults++;
 
-			msm_rd_dump_submit(priv->hangrd, submit,
-				"offending task: %s (%s)", comm, cmd);
-		} else {
-			msm_rd_dump_submit(priv->hangrd, submit, NULL);
-		}
+	get_comm_cmdline(submit, &comm, &cmd);
+
+	if (comm && cmd) {
+		DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
+			      gpu->name, comm, cmd);
+
+		msm_rd_dump_submit(priv->hangrd, submit,
+				   "offending task: %s (%s)", comm, cmd);
 	} else {
-		/*
-		 * We couldn't attribute this fault to any particular context,
-		 * so increment the global fault count instead.
-		 */
-		gpu->global_faults++;
+		DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name);
+
+		msm_rd_dump_submit(priv->hangrd, submit, NULL);
 	}
 
 	/* Record the crash state */
@@ -440,6 +442,7 @@ static void recover_worker(struct kthread_work *work)
 
 	pm_runtime_put(&gpu->pdev->dev);
 
+out_unlock:
 	mutex_unlock(&gpu->lock);
 
 	msm_gpu_retire(gpu);
-- 
GitLab


From f6afe4f09f6605b725b39670b3568ab2927c9b43 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 10 Oct 2023 08:54:25 +0300
Subject: [PATCH 0343/2340] drm/msm: don't create GPU-related debugfs files
 with no GPU present

If there is no GPU present, skip creation of the GPU-related debugfs
files, making the MSM's debugfs more usable.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/561742/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_debugfs.c | 41 +++++++++++++++++++------------
 drivers/gpu/drm/msm/msm_rd.c      |  3 +++
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 04d304eed2237..4494f6d1c7cbb 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -304,36 +304,21 @@ int msm_debugfs_late_init(struct drm_device *dev)
 	return ret;
 }
 
-void msm_debugfs_init(struct drm_minor *minor)
+static void msm_debugfs_gpu_init(struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	struct dentry *gpu_devfreq;
 
-	drm_debugfs_create_files(msm_debugfs_list,
-				 ARRAY_SIZE(msm_debugfs_list),
-				 minor->debugfs_root, minor);
-
 	debugfs_create_file("gpu", S_IRUSR, minor->debugfs_root,
 		dev, &msm_gpu_fops);
 
-	if (priv->kms) {
-		drm_debugfs_create_files(msm_kms_debugfs_list,
-					 ARRAY_SIZE(msm_kms_debugfs_list),
-					 minor->debugfs_root, minor);
-		debugfs_create_file("kms", S_IRUSR, minor->debugfs_root,
-				    dev, &msm_kms_fops);
-	}
-
 	debugfs_create_u32("hangcheck_period_ms", 0600, minor->debugfs_root,
 		&priv->hangcheck_period);
 
 	debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root,
 		&priv->disable_err_irq);
 
-	debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root,
-		dev, &shrink_fops);
-
 	gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root);
 
 	debugfs_create_bool("idle_clamp",0600, gpu_devfreq,
@@ -344,6 +329,30 @@ void msm_debugfs_init(struct drm_minor *minor)
 
 	debugfs_create_u32("downdifferential",0600, gpu_devfreq,
 			   &priv->gpu_devfreq_config.downdifferential);
+}
+
+void msm_debugfs_init(struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+
+	drm_debugfs_create_files(msm_debugfs_list,
+				 ARRAY_SIZE(msm_debugfs_list),
+				 minor->debugfs_root, minor);
+
+	if (priv->gpu_pdev)
+		msm_debugfs_gpu_init(minor);
+
+	if (priv->kms) {
+		drm_debugfs_create_files(msm_kms_debugfs_list,
+					 ARRAY_SIZE(msm_kms_debugfs_list),
+					 minor->debugfs_root, minor);
+		debugfs_create_file("kms", S_IRUSR, minor->debugfs_root,
+				    dev, &msm_kms_fops);
+	}
+
+	debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root,
+		dev, &shrink_fops);
 
 	if (priv->kms && priv->kms->funcs->debugfs_init)
 		priv->kms->funcs->debugfs_init(priv->kms, minor);
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 5adc51f7ab592..ca44fd291c5ba 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -270,6 +270,9 @@ int msm_rd_debugfs_init(struct drm_minor *minor)
 	struct msm_rd_state *rd;
 	int ret;
 
+	if (!priv->gpu_pdev)
+		return 0;
+
 	/* only create on first minor: */
 	if (priv->rd)
 		return 0;
-- 
GitLab


From d1912f6972b857e63577e8094f5f1242d9fe3c7d Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 6 Nov 2023 10:50:25 -0800
Subject: [PATCH 0344/2340] drm/msm: Small uabi fixes

Correct the minor version exposed and error return value for
MSM_INFO_GET_NAME.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/566155/
---
 drivers/gpu/drm/msm/msm_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 2aae7d107f335..d751d2023f842 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -39,7 +39,7 @@
  * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST)
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	10
+#define MSM_VERSION_MINOR	11
 #define MSM_VERSION_PATCHLEVEL	0
 
 static void msm_deinit_vram(struct drm_device *ddev);
@@ -620,7 +620,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		break;
 	case MSM_INFO_GET_NAME:
 		if (args->value && (args->len < strlen(msm_obj->name))) {
-			ret = -EINVAL;
+			ret = -ETOOSMALL;
 			break;
 		}
 		args->len = strlen(msm_obj->name);
-- 
GitLab


From 9902cb999e4e913d98e8afe4b36c08e4a793e1ce Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 6 Nov 2023 10:50:26 -0800
Subject: [PATCH 0345/2340] drm/msm/gem: Add metadata

The EXT_external_objects extension is a bit awkward as it doesn't pass
explicit modifiers, leaving the importer to guess with incomplete
information.  In the case of vk (turnip) exporting and gl (freedreno)
importing, the "OPTIMAL_TILING_EXT" layout depends on VkImageCreateInfo
flags (among other things), which the importer does not know.  Which
unfortunately leaves us with the need for a metadata back-channel.

The contents of the metadata are defined by userspace.  The
EXT_external_objects extension is only required to work between
compatible versions of gl and vk drivers, as defined by device and
driver UUIDs.

v2: add missing metadata kfree
v3: Rework to move copy_from/to_user out from under gem obj lock
    to avoid angering lockdep about deadlocks against fs-reclaim

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/566157/
---
 drivers/gpu/drm/msm/msm_drv.c | 92 ++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/msm/msm_gem.c |  1 +
 drivers/gpu/drm/msm/msm_gem.h |  4 ++
 include/uapi/drm/msm_drm.h    |  2 +
 4 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index d751d2023f842..75d60aab57809 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -37,9 +37,10 @@
  * - 1.9.0 - Add MSM_SUBMIT_FENCE_SN_IN
  * - 1.10.0 - Add MSM_SUBMIT_BO_NO_IMPLICIT
  * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST)
+ * - 1.12.0 - Add MSM_INFO_SET_METADATA and MSM_INFO_GET_METADATA
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	11
+#define MSM_VERSION_MINOR	12
 #define MSM_VERSION_PATCHLEVEL	0
 
 static void msm_deinit_vram(struct drm_device *ddev);
@@ -546,6 +547,85 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev,
 	return msm_gem_set_iova(obj, ctx->aspace, iova);
 }
 
+static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj,
+					   __user void *metadata,
+					   u32 metadata_size)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	void *buf;
+	int ret;
+
+	/* Impose a moderate upper bound on metadata size: */
+	if (metadata_size > 128) {
+		return -EOVERFLOW;
+	}
+
+	/* Use a temporary buf to keep copy_from_user() outside of gem obj lock: */
+	buf = memdup_user(metadata, metadata_size);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	ret = msm_gem_lock_interruptible(obj);
+	if (ret)
+		goto out;
+
+	msm_obj->metadata =
+		krealloc(msm_obj->metadata, metadata_size, GFP_KERNEL);
+	msm_obj->metadata_size = metadata_size;
+	memcpy(msm_obj->metadata, buf, metadata_size);
+
+	msm_gem_unlock(obj);
+
+out:
+	kfree(buf);
+
+	return ret;
+}
+
+static int msm_ioctl_gem_info_get_metadata(struct drm_gem_object *obj,
+					   __user void *metadata,
+					   u32 *metadata_size)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	void *buf;
+	int ret, len;
+
+	if (!metadata) {
+		/*
+		 * Querying the size is inherently racey, but
+		 * EXT_external_objects expects the app to confirm
+		 * via device and driver UUIDs that the exporter and
+		 * importer versions match.  All we can do from the
+		 * kernel side is check the length under obj lock
+		 * when userspace tries to retrieve the metadata
+		 */
+		*metadata_size = msm_obj->metadata_size;
+		return 0;
+	}
+
+	ret = msm_gem_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
+	/* Avoid copy_to_user() under gem obj lock: */
+	len = msm_obj->metadata_size;
+	buf = kmemdup(msm_obj->metadata, len, GFP_KERNEL);
+
+	msm_gem_unlock(obj);
+
+	if (*metadata_size < len) {
+		ret = -ETOOSMALL;
+	} else if (copy_to_user(metadata, buf, len)) {
+		ret = -EFAULT;
+	} else {
+		*metadata_size = len;
+	}
+
+	kfree(buf);
+
+	return 0;
+}
+
 static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -568,6 +648,8 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		break;
 	case MSM_INFO_SET_NAME:
 	case MSM_INFO_GET_NAME:
+	case MSM_INFO_SET_METADATA:
+	case MSM_INFO_GET_METADATA:
 		break;
 	default:
 		return -EINVAL;
@@ -630,6 +712,14 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 				ret = -EFAULT;
 		}
 		break;
+	case MSM_INFO_SET_METADATA:
+		ret = msm_ioctl_gem_info_set_metadata(
+			obj, u64_to_user_ptr(args->value), args->len);
+		break;
+	case MSM_INFO_GET_METADATA:
+		ret = msm_ioctl_gem_info_get_metadata(
+			obj, u64_to_user_ptr(args->value), &args->len);
+		break;
 	}
 
 	drm_gem_object_put(obj);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index db1e748daa753..32303cc8e646a 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -1058,6 +1058,7 @@ static void msm_gem_free_object(struct drm_gem_object *obj)
 
 	drm_gem_object_release(obj);
 
+	kfree(msm_obj->metadata);
 	kfree(msm_obj);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 8ddef54431408..6352bd3cc5ed6 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -108,6 +108,10 @@ struct msm_gem_object {
 
 	char name[32]; /* Identifier to print for the debugfs files */
 
+	/* userspace metadata backchannel */
+	void *metadata;
+	u32 metadata_size;
+
 	/**
 	 * pin_count: Number of times the pages are pinned
 	 *
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 6c34272a13fd6..6f2a7ad04aa40 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -139,6 +139,8 @@ struct drm_msm_gem_new {
 #define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 #define MSM_INFO_SET_IOVA	0x04   /* set the iova, passed by value */
 #define MSM_INFO_GET_FLAGS	0x05   /* get the MSM_BO_x flags */
+#define MSM_INFO_SET_METADATA	0x06   /* set userspace metadata */
+#define MSM_INFO_GET_METADATA	0x07   /* get userspace metadata */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-- 
GitLab


From b2acb89af1a400be721bcb14f137aa22b509caba Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Tue, 24 Oct 2023 10:08:04 -0700
Subject: [PATCH 0346/2340] drm/msm/gem: Demote userspace errors to
 DRM_UT_DRIVER

Error messages resulting from incorrect usage of the kernel uabi should
not spam dmesg by default.  But it is useful to enable them to debug
userspace.  So demote to DRM_UT_DRIVER.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/564189/
---
 drivers/gpu/drm/msm/msm_gem.c        |  6 ++---
 drivers/gpu/drm/msm/msm_gem_submit.c | 36 ++++++++++++++++------------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 32303cc8e646a..175ee4ab8a6f7 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -226,9 +226,9 @@ static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj,
 
 	msm_gem_assert_locked(obj);
 
-	if (GEM_WARN_ON(msm_obj->madv > madv)) {
-		DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n",
-			msm_obj->madv, madv);
+	if (msm_obj->madv > madv) {
+		DRM_DEV_DEBUG_DRIVER(obj->dev->dev, "Invalid madv state: %u vs %u\n",
+				     msm_obj->madv, madv);
 		return ERR_PTR(-EBUSY);
 	}
 
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 99744de6c05a1..207b6ba1565d8 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -17,6 +17,12 @@
 #include "msm_gem.h"
 #include "msm_gpu_trace.h"
 
+/* For userspace errors, use DRM_UT_DRIVER.. so that userspace can enable
+ * error msgs for debugging, but we don't spam dmesg by default
+ */
+#define SUBMIT_ERROR(submit, fmt, ...) \
+	DRM_DEV_DEBUG_DRIVER((submit)->dev->dev, fmt, ##__VA_ARGS__)
+
 /*
  * Cmdstream submission:
  */
@@ -136,7 +142,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 
 		if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) ||
 			!(submit_bo.flags & MANDATORY_FLAGS)) {
-			DRM_ERROR("invalid flags: %x\n", submit_bo.flags);
+			SUBMIT_ERROR(submit, "invalid flags: %x\n", submit_bo.flags);
 			ret = -EINVAL;
 			i = 0;
 			goto out;
@@ -158,7 +164,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		 */
 		obj = idr_find(&file->object_idr, submit->bos[i].handle);
 		if (!obj) {
-			DRM_ERROR("invalid handle %u at index %u\n", submit->bos[i].handle, i);
+			SUBMIT_ERROR(submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i);
 			ret = -EINVAL;
 			goto out_unlock;
 		}
@@ -202,13 +208,13 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 			break;
 		default:
-			DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
+			SUBMIT_ERROR(submit, "invalid type: %08x\n", submit_cmd.type);
 			return -EINVAL;
 		}
 
 		if (submit_cmd.size % 4) {
-			DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
-					submit_cmd.size);
+			SUBMIT_ERROR(submit, "non-aligned cmdstream buffer size: %u\n",
+				     submit_cmd.size);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -306,8 +312,8 @@ retry:
 
 fail:
 	if (ret == -EALREADY) {
-		DRM_ERROR("handle %u at index %u already on submit list\n",
-				submit->bos[i].handle, i);
+		SUBMIT_ERROR(submit, "handle %u at index %u already on submit list\n",
+			     submit->bos[i].handle, i);
 		ret = -EINVAL;
 	}
 
@@ -448,8 +454,8 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
 		struct drm_gem_object **obj, uint64_t *iova, bool *valid)
 {
 	if (idx >= submit->nr_bos) {
-		DRM_ERROR("invalid buffer index: %u (out of %u)\n",
-				idx, submit->nr_bos);
+		SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n",
+			     idx, submit->nr_bos);
 		return -EINVAL;
 	}
 
@@ -475,7 +481,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 		return 0;
 
 	if (offset % 4) {
-		DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset);
+		SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset);
 		return -EINVAL;
 	}
 
@@ -497,8 +503,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 		bool valid;
 
 		if (submit_reloc.submit_offset % 4) {
-			DRM_ERROR("non-aligned reloc offset: %u\n",
-					submit_reloc.submit_offset);
+			SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n",
+				     submit_reloc.submit_offset);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -508,7 +514,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 
 		if ((off >= (obj->size / 4)) ||
 				(off < last_offset)) {
-			DRM_ERROR("invalid offset %u at reloc %u\n", off, i);
+			SUBMIT_ERROR(submit, "invalid offset %u at reloc %u\n", off, i);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -881,7 +887,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		if (!submit->cmd[i].size ||
 			((submit->cmd[i].size + submit->cmd[i].offset) >
 				obj->size / 4)) {
-			DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
+			SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -893,7 +899,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 		if (!gpu->allow_relocs) {
 			if (submit->cmd[i].nr_relocs) {
-				DRM_ERROR("relocs not allowed\n");
+				SUBMIT_ERROR(submit, "relocs not allowed\n");
 				ret = -EINVAL;
 				goto out;
 			}
-- 
GitLab


From 6c15808d9b7640c3209d53cd2d8d56cfbf9f7175 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Tue, 24 Oct 2023 10:08:05 -0700
Subject: [PATCH 0347/2340] drm/msm/gem: Demote allocations to __GFP_NOWARN

For allocations with userspace controlled size, we should not warn on
allocation failure.  Fixes KASAN splat:

   WARNING: CPU: 6 PID: 29557 at mm/page_alloc.c:5398 __alloc_pages+0x160c/0x2204
   Modules linked in: bridge stp llc hci_vhci tun veth xt_cgroup uinput xt_MASQUERADE rfcomm ip6table_nat fuse 8021q r8153_ecm cdc_ether usbnet r8152 mii venus_enc venus_dec uvcvideo algif_hash algif_skcipher af_alg qcom_spmi_adc_tm5 qcom_spmi_adc5 qcom_vadc_common qcom_spmi_temp_alarm cros_ec_typec typec hci_uart btqca qcom_stats snd_soc_sc7180 venus_core ath10k_snoc ath10k_core ath coresight_tmc coresight_replicator coresight_etm4x coresight_funnel snd_soc_lpass_sc7180 mac80211 coresight bluetooth ecdh_generic ecc cfg80211 cros_ec_sensorhub lzo_rle lzo_compress zram joydev
   CPU: 6 PID: 29557 Comm: syz-executor Not tainted 5.15.110-lockdep-19320-g89d010b0a9df #1 45bdd400697a78353f2927c116615abba810e5dd
   Hardware name: Google Kingoftown (DT)
   pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
   pc : __alloc_pages+0x160c/0x2204
   lr : __alloc_pages+0x58/0x2204
   sp : ffffffc0214176c0
   x29: ffffffc0214178a0 x28: ffffff801f7b4000 x27: 0000000000000000
   x26: ffffff808a4fa000 x25: 1ffffff011290781 x24: ffffff808a59c000
   x23: 0000000000000010 x22: ffffffc0080e6980 x21: 0000000000000010
   x20: 0000000000000000 x19: 00000000080001f8 x18: 0000000000000000
   x17: 0000000000000000 x16: 0000000000000000 x15: 0000000020000500
   x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000001
   x11: 0000000000000000 x10: 1ffffff804282f06 x9 : 0000000000000000
   x8 : ffffffc021417848 x7 : 0000000000000000 x6 : ffffffc0082ac788
   x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000010
   x2 : 0000000000000008 x1 : 0000000000000000 x0 : ffffffc021417830
   Call trace:
   __alloc_pages+0x160c/0x2204
   kmalloc_order+0x50/0xf4
   kmalloc_order_trace+0x38/0x18c
   __kmalloc+0x300/0x45c
   msm_ioctl_gem_submit+0x284/0x5988
   drm_ioctl_kernel+0x270/0x418
   drm_ioctl+0x5e0/0xbf8
   __arm64_sys_ioctl+0x154/0x1d0
   invoke_syscall+0x98/0x278
   el0_svc_common+0x214/0x274
   do_el0_svc+0x9c/0x19c
   el0_svc+0x5c/0xc0
   el0t_64_sync_handler+0x78/0x108
   el0t_64_sync+0x1a4/0x1a8

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/564191/
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 207b6ba1565d8..30d72191cee68 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -43,7 +43,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 	if (sz > SIZE_MAX)
 		return ERR_PTR(-ENOMEM);
 
-	submit = kzalloc(sz, GFP_KERNEL);
+	submit = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
 	if (!submit)
 		return ERR_PTR(-ENOMEM);
 
@@ -234,7 +234,7 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
 			ret = -ENOMEM;
 			goto out;
 		}
-		submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL);
+		submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
 		if (!submit->cmd[i].relocs) {
 			ret = -ENOMEM;
 			goto out;
-- 
GitLab


From ce3e112e7ae854249d8755906acc5f27e1542114 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:42 +0100
Subject: [PATCH 0348/2340] drm/bridge: tpd12s015: Drop buggy __exit annotation
 for remove function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With tpd12s015_remove() marked with __exit this function is discarded
when the driver is compiled as a built-in. The result is that when the
driver unbinds there is no cleanup done which results in resource
leakage or worse.

Fixes: cff5e6f7e83f ("drm/bridge: Add driver for the TI TPD12S015 HDMI level shifter")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-19-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/bridge/ti-tpd12s015.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c
index e0e015243a602..b588fea12502d 100644
--- a/drivers/gpu/drm/bridge/ti-tpd12s015.c
+++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c
@@ -179,7 +179,7 @@ static int tpd12s015_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit tpd12s015_remove(struct platform_device *pdev)
+static int tpd12s015_remove(struct platform_device *pdev)
 {
 	struct tpd12s015_device *tpd = platform_get_drvdata(pdev);
 
@@ -197,7 +197,7 @@ MODULE_DEVICE_TABLE(of, tpd12s015_of_match);
 
 static struct platform_driver tpd12s015_driver = {
 	.probe	= tpd12s015_probe,
-	.remove	= __exit_p(tpd12s015_remove),
+	.remove = tpd12s015_remove,
 	.driver	= {
 		.name	= "tpd12s015",
 		.of_match_table = tpd12s015_of_match,
-- 
GitLab


From da20c383de2aa6bfa4c36ed4311e16051aaeab43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:43 +0100
Subject: [PATCH 0349/2340] drm/arcpgu: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-20-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/tiny/arcpgu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c
index e5b10e41554a2..4f8f3172379e3 100644
--- a/drivers/gpu/drm/tiny/arcpgu.c
+++ b/drivers/gpu/drm/tiny/arcpgu.c
@@ -404,14 +404,12 @@ err_unload:
 	return ret;
 }
 
-static int arcpgu_remove(struct platform_device *pdev)
+static void arcpgu_remove(struct platform_device *pdev)
 {
 	struct drm_device *drm = platform_get_drvdata(pdev);
 
 	drm_dev_unregister(drm);
 	arcpgu_unload(drm);
-
-	return 0;
 }
 
 static const struct of_device_id arcpgu_of_table[] = {
@@ -423,7 +421,7 @@ MODULE_DEVICE_TABLE(of, arcpgu_of_table);
 
 static struct platform_driver arcpgu_platform_driver = {
 	.probe = arcpgu_probe,
-	.remove = arcpgu_remove,
+	.remove_new = arcpgu_remove,
 	.driver = {
 		   .name = "arcpgu",
 		   .of_match_table = arcpgu_of_table,
-- 
GitLab


From b47914741a80f59483cba0b59c43a92c01bc4b2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:44 +0100
Subject: [PATCH 0350/2340] drm/armada: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert the armada drm drivers from always returning zero in
the remove callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-21-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/armada/armada_crtc.c | 5 ++---
 drivers/gpu/drm/armada/armada_drv.c  | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 15dd667aa2e76..52d2c942d3d2a 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -1066,10 +1066,9 @@ static int armada_lcd_probe(struct platform_device *pdev)
 	return component_add(&pdev->dev, &armada_lcd_ops);
 }
 
-static int armada_lcd_remove(struct platform_device *pdev)
+static void armada_lcd_remove(struct platform_device *pdev)
 {
 	component_del(&pdev->dev, &armada_lcd_ops);
-	return 0;
 }
 
 static const struct of_device_id armada_lcd_of_match[] = {
@@ -1095,7 +1094,7 @@ MODULE_DEVICE_TABLE(platform, armada_lcd_platform_ids);
 
 struct platform_driver armada_lcd_platform_driver = {
 	.probe	= armada_lcd_probe,
-	.remove	= armada_lcd_remove,
+	.remove_new = armada_lcd_remove,
 	.driver = {
 		.name	= "armada-lcd",
 		.owner	=  THIS_MODULE,
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index fa1c67598706d..e51ecc4f7ef44 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -226,10 +226,9 @@ static int armada_drm_probe(struct platform_device *pdev)
 					       match);
 }
 
-static int armada_drm_remove(struct platform_device *pdev)
+static void armada_drm_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &armada_master_ops);
-	return 0;
 }
 
 static void armada_drm_shutdown(struct platform_device *pdev)
@@ -249,7 +248,7 @@ MODULE_DEVICE_TABLE(platform, armada_drm_platform_ids);
 
 static struct platform_driver armada_drm_platform_driver = {
 	.probe	= armada_drm_probe,
-	.remove	= armada_drm_remove,
+	.remove_new = armada_drm_remove,
 	.shutdown = armada_drm_shutdown,
 	.driver	= {
 		.name	= "armada-drm",
-- 
GitLab


From be79252e7c83885bd0043168954b8400a42153ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:45 +0100
Subject: [PATCH 0351/2340] drm/bridge: cdns-mhdp8546: Improve error reporting
 in remove callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the generic error message issued by the driver core when the remove
callback returns non-zero ("remove callback returned a non-zero value. This
will be ignored.") by a message that tells the actual problem.

Also simplify a bit by checking the return value of wait_event_timeout a
bit later.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-22-u.kleine-koenig@pengutronix.de
---
 .../drm/bridge/cadence/cdns-mhdp8546-core.c    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
index 6af565ac307ae..66c87d36ce51e 100644
--- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
+++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
@@ -2600,7 +2600,6 @@ static int cdns_mhdp_remove(struct platform_device *pdev)
 {
 	struct cdns_mhdp_device *mhdp = platform_get_drvdata(pdev);
 	unsigned long timeout = msecs_to_jiffies(100);
-	bool stop_fw = false;
 	int ret;
 
 	drm_bridge_remove(&mhdp->bridge);
@@ -2608,18 +2607,19 @@ static int cdns_mhdp_remove(struct platform_device *pdev)
 	ret = wait_event_timeout(mhdp->fw_load_wq,
 				 mhdp->hw_state == MHDP_HW_READY,
 				 timeout);
-	if (ret == 0)
-		dev_err(mhdp->dev, "%s: Timeout waiting for fw loading\n",
-			__func__);
-	else
-		stop_fw = true;
-
 	spin_lock(&mhdp->start_lock);
 	mhdp->hw_state = MHDP_HW_STOPPED;
 	spin_unlock(&mhdp->start_lock);
 
-	if (stop_fw)
+	if (ret == 0) {
+		dev_err(mhdp->dev, "%s: Timeout waiting for fw loading\n",
+			__func__);
+	} else {
 		ret = cdns_mhdp_set_firmware_active(mhdp, false);
+		if (ret)
+			dev_err(mhdp->dev, "Failed to stop firmware (%pe)\n",
+				ERR_PTR(ret));
+	}
 
 	phy_exit(mhdp->phy);
 
@@ -2635,7 +2635,7 @@ static int cdns_mhdp_remove(struct platform_device *pdev)
 
 	clk_disable_unprepare(mhdp->clk);
 
-	return ret;
+	return 0;
 }
 
 static const struct of_device_id mhdp_ids[] = {
-- 
GitLab


From 3438cf177ae51f11255d36a94b17939b06ce1717 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:46 +0100
Subject: [PATCH 0352/2340] drm/bridge: cdns-mhdp8546: Convert to platform
 remove callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-23-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
index 66c87d36ce51e..7d470527455b1 100644
--- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
+++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
@@ -2596,7 +2596,7 @@ clk_disable:
 	return ret;
 }
 
-static int cdns_mhdp_remove(struct platform_device *pdev)
+static void cdns_mhdp_remove(struct platform_device *pdev)
 {
 	struct cdns_mhdp_device *mhdp = platform_get_drvdata(pdev);
 	unsigned long timeout = msecs_to_jiffies(100);
@@ -2634,8 +2634,6 @@ static int cdns_mhdp_remove(struct platform_device *pdev)
 	/* Ignoring mhdp->hdcp.check_work and mhdp->hdcp.prop_work here. */
 
 	clk_disable_unprepare(mhdp->clk);
-
-	return 0;
 }
 
 static const struct of_device_id mhdp_ids[] = {
@@ -2658,7 +2656,7 @@ static struct platform_driver mhdp_driver = {
 		.of_match_table	= mhdp_ids,
 	},
 	.probe	= cdns_mhdp_probe,
-	.remove	= cdns_mhdp_remove,
+	.remove_new = cdns_mhdp_remove,
 };
 module_platform_driver(mhdp_driver);
 
-- 
GitLab


From 3cdbe59868ef5228b561bb30bde13cc1021ee8a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:47 +0100
Subject: [PATCH 0353/2340] drm/bridge: tpd12s015: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.

To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new(), which already returns void. Eventually after all drivers
are converted, .remove_new() will be renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-24-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/bridge/ti-tpd12s015.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c
index b588fea12502d..f9fb35683a273 100644
--- a/drivers/gpu/drm/bridge/ti-tpd12s015.c
+++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c
@@ -179,13 +179,11 @@ static int tpd12s015_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int tpd12s015_remove(struct platform_device *pdev)
+static void tpd12s015_remove(struct platform_device *pdev)
 {
 	struct tpd12s015_device *tpd = platform_get_drvdata(pdev);
 
 	drm_bridge_remove(&tpd->bridge);
-
-	return 0;
 }
 
 static const struct of_device_id tpd12s015_of_match[] = {
@@ -197,7 +195,7 @@ MODULE_DEVICE_TABLE(of, tpd12s015_of_match);
 
 static struct platform_driver tpd12s015_driver = {
 	.probe	= tpd12s015_probe,
-	.remove = tpd12s015_remove,
+	.remove_new = tpd12s015_remove,
 	.driver	= {
 		.name	= "tpd12s015",
 		.of_match_table = tpd12s015_of_match,
-- 
GitLab


From d437dab5b06e3dc73f6a58a6c9fd0b13d1ed80e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:48 +0100
Subject: [PATCH 0354/2340] drm/etnaviv: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert the etnaviv drm driver from always returning zero in
the remove callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-25-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.c | 6 ++----
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 5 ++---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index a8d3fa81e4ec5..6228ce6032482 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -640,16 +640,14 @@ static int etnaviv_pdev_probe(struct platform_device *pdev)
 	return component_master_add_with_match(dev, &etnaviv_master_ops, match);
 }
 
-static int etnaviv_pdev_remove(struct platform_device *pdev)
+static void etnaviv_pdev_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &etnaviv_master_ops);
-
-	return 0;
 }
 
 static struct platform_driver etnaviv_platform_driver = {
 	.probe      = etnaviv_pdev_probe,
-	.remove     = etnaviv_pdev_remove,
+	.remove_new = etnaviv_pdev_remove,
 	.driver     = {
 		.name   = "etnaviv",
 	},
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 5105d290e72e2..9b8445d2a128f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1904,11 +1904,10 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int etnaviv_gpu_platform_remove(struct platform_device *pdev)
+static void etnaviv_gpu_platform_remove(struct platform_device *pdev)
 {
 	component_del(&pdev->dev, &gpu_ops);
 	pm_runtime_disable(&pdev->dev);
-	return 0;
 }
 
 static int etnaviv_gpu_rpm_suspend(struct device *dev)
@@ -1970,6 +1969,6 @@ struct platform_driver etnaviv_gpu_driver = {
 		.of_match_table = etnaviv_gpu_match,
 	},
 	.probe = etnaviv_gpu_platform_probe,
-	.remove = etnaviv_gpu_platform_remove,
+	.remove_new = etnaviv_gpu_platform_remove,
 	.id_table = gpu_ids,
 };
-- 
GitLab


From 60096f0a77600ac2cc92b82fee279d1905576950 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:50 +0100
Subject: [PATCH 0355/2340] drm/imx/dcss: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-27-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/imx/dcss/dcss-drv.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/imx/dcss/dcss-drv.c b/drivers/gpu/drm/imx/dcss/dcss-drv.c
index b61cec0cc79d9..ad5f29ea8f6a1 100644
--- a/drivers/gpu/drm/imx/dcss/dcss-drv.c
+++ b/drivers/gpu/drm/imx/dcss/dcss-drv.c
@@ -80,7 +80,7 @@ err:
 	return err;
 }
 
-static int dcss_drv_platform_remove(struct platform_device *pdev)
+static void dcss_drv_platform_remove(struct platform_device *pdev)
 {
 	struct dcss_drv *mdrv = dev_get_drvdata(&pdev->dev);
 
@@ -88,8 +88,6 @@ static int dcss_drv_platform_remove(struct platform_device *pdev)
 	dcss_dev_destroy(mdrv->dcss);
 
 	kfree(mdrv);
-
-	return 0;
 }
 
 static void dcss_drv_platform_shutdown(struct platform_device *pdev)
@@ -120,7 +118,7 @@ MODULE_DEVICE_TABLE(of, dcss_of_match);
 
 static struct platform_driver dcss_platform_driver = {
 	.probe	= dcss_drv_platform_probe,
-	.remove	= dcss_drv_platform_remove,
+	.remove_new = dcss_drv_platform_remove,
 	.shutdown = dcss_drv_platform_shutdown,
 	.driver	= {
 		.name = "imx-dcss",
-- 
GitLab


From a7e43c0a1a70e0d71fdda5d315927e1b3bc7e8d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:51 +0100
Subject: [PATCH 0356/2340] drm/imx: lcdc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.

To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new(), which already returns void. Eventually after all drivers
are converted, .remove_new() will be renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-28-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/imx/lcdc/imx-lcdc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c
index 22b65f4a0e303..0902983374d0b 100644
--- a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c
+++ b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c
@@ -515,14 +515,12 @@ static int imx_lcdc_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int imx_lcdc_remove(struct platform_device *pdev)
+static void imx_lcdc_remove(struct platform_device *pdev)
 {
 	struct drm_device *drm = platform_get_drvdata(pdev);
 
 	drm_dev_unregister(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void imx_lcdc_shutdown(struct platform_device *pdev)
@@ -536,7 +534,7 @@ static struct platform_driver imx_lcdc_driver = {
 		.of_match_table = imx_lcdc_of_dev_id,
 	},
 	.probe = imx_lcdc_probe,
-	.remove = imx_lcdc_remove,
+	.remove_new = imx_lcdc_remove,
 	.shutdown = imx_lcdc_shutdown,
 };
 module_platform_driver(imx_lcdc_driver);
-- 
GitLab


From 8c67c9a4e4582c30408308cfdfd8719180075f9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:52 +0100
Subject: [PATCH 0357/2340] drm/kmb: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-29-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/kmb/kmb_drv.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index 24035b53441cc..169b83987ce25 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -448,7 +448,7 @@ static const struct drm_driver kmb_driver = {
 	.minor = DRIVER_MINOR,
 };
 
-static int kmb_remove(struct platform_device *pdev)
+static void kmb_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct drm_device *drm = dev_get_drvdata(dev);
@@ -473,7 +473,6 @@ static int kmb_remove(struct platform_device *pdev)
 	/* Unregister DSI host */
 	kmb_dsi_host_unregister(kmb->kmb_dsi);
 	drm_atomic_helper_shutdown(drm);
-	return 0;
 }
 
 static int kmb_probe(struct platform_device *pdev)
@@ -621,7 +620,7 @@ static SIMPLE_DEV_PM_OPS(kmb_pm_ops, kmb_pm_suspend, kmb_pm_resume);
 
 static struct platform_driver kmb_platform_driver = {
 	.probe = kmb_probe,
-	.remove = kmb_remove,
+	.remove_new = kmb_remove,
 	.driver = {
 		.name = "kmb-drm",
 		.pm = &kmb_pm_ops,
-- 
GitLab


From 30b749adb13db8e672d1a53266876732b8a2edb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:53 +0100
Subject: [PATCH 0358/2340] drm/mediatek: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.

To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new(), which already returns void. Eventually after all drivers
are converted, .remove_new() will be renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-30-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 5 ++---
 drivers/gpu/drm/mediatek/mtk_ethdr.c            | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 6bf6367853fba..3fdef3ad4ffde 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -531,16 +531,15 @@ static int mtk_disp_ovl_adaptor_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mtk_disp_ovl_adaptor_remove(struct platform_device *pdev)
+static void mtk_disp_ovl_adaptor_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &mtk_disp_ovl_adaptor_master_ops);
 	pm_runtime_disable(&pdev->dev);
-	return 0;
 }
 
 struct platform_driver mtk_disp_ovl_adaptor_driver = {
 	.probe		= mtk_disp_ovl_adaptor_probe,
-	.remove		= mtk_disp_ovl_adaptor_remove,
+	.remove_new	= mtk_disp_ovl_adaptor_remove,
 	.driver		= {
 		.name	= "mediatek-disp-ovl-adaptor",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/mediatek/mtk_ethdr.c b/drivers/gpu/drm/mediatek/mtk_ethdr.c
index db7ac666ec5e1..6a5d0c345aab9 100644
--- a/drivers/gpu/drm/mediatek/mtk_ethdr.c
+++ b/drivers/gpu/drm/mediatek/mtk_ethdr.c
@@ -346,10 +346,9 @@ static int mtk_ethdr_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mtk_ethdr_remove(struct platform_device *pdev)
+static void mtk_ethdr_remove(struct platform_device *pdev)
 {
 	component_del(&pdev->dev, &mtk_ethdr_component_ops);
-	return 0;
 }
 
 static const struct of_device_id mtk_ethdr_driver_dt_match[] = {
@@ -361,7 +360,7 @@ MODULE_DEVICE_TABLE(of, mtk_ethdr_driver_dt_match);
 
 struct platform_driver mtk_ethdr_driver = {
 	.probe		= mtk_ethdr_probe,
-	.remove		= mtk_ethdr_remove,
+	.remove_new	= mtk_ethdr_remove,
 	.driver		= {
 		.name	= "mediatek-disp-ethdr",
 		.owner	= THIS_MODULE,
-- 
GitLab


From ac96555768099bf3e87b59abe9f156bb6fffbbd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:54 +0100
Subject: [PATCH 0359/2340] drm/meson: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.

To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new(), which already returns void. Eventually after all drivers
are converted, .remove_new() will be renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-31-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/meson/meson_dw_mipi_dsi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c b/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c
index e5fe4e994f43b..a6bc1bdb3d0d8 100644
--- a/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c
+++ b/drivers/gpu/drm/meson/meson_dw_mipi_dsi.c
@@ -323,13 +323,11 @@ static int meson_dw_mipi_dsi_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int meson_dw_mipi_dsi_remove(struct platform_device *pdev)
+static void meson_dw_mipi_dsi_remove(struct platform_device *pdev)
 {
 	struct meson_dw_mipi_dsi *mipi_dsi = platform_get_drvdata(pdev);
 
 	dw_mipi_dsi_remove(mipi_dsi->dmd);
-
-	return 0;
 }
 
 static const struct of_device_id meson_dw_mipi_dsi_of_table[] = {
@@ -340,7 +338,7 @@ MODULE_DEVICE_TABLE(of, meson_dw_mipi_dsi_of_table);
 
 static struct platform_driver meson_dw_mipi_dsi_platform_driver = {
 	.probe		= meson_dw_mipi_dsi_probe,
-	.remove		= meson_dw_mipi_dsi_remove,
+	.remove_new	= meson_dw_mipi_dsi_remove,
 	.driver		= {
 		.name		= DRIVER_NAME,
 		.of_match_table	= meson_dw_mipi_dsi_of_table,
-- 
GitLab


From 0fa2db3bc7498d7b88e6742571cb832f749d625f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:55 +0100
Subject: [PATCH 0360/2340] drm/nouveau: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-32-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/nouveau/nouveau_platform.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 23cd43a7fd19a..bf2dc7567ea40 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -43,11 +43,10 @@ static int nouveau_platform_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int nouveau_platform_remove(struct platform_device *pdev)
+static void nouveau_platform_remove(struct platform_device *pdev)
 {
 	struct drm_device *dev = platform_get_drvdata(pdev);
 	nouveau_drm_device_remove(dev);
-	return 0;
 }
 
 #if IS_ENABLED(CONFIG_OF)
@@ -93,5 +92,5 @@ struct platform_driver nouveau_platform_driver = {
 		.of_match_table = of_match_ptr(nouveau_platform_match),
 	},
 	.probe = nouveau_platform_probe,
-	.remove = nouveau_platform_remove,
+	.remove_new = nouveau_platform_remove,
 };
-- 
GitLab


From 16b01df3c5db447e05cff60c2f612d76c0cd7baf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:56 +0100
Subject: [PATCH 0361/2340] drm/sprd: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert the sprd drm drivers from always returning zero in the
remove callback to the void returning variant.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jyri Sarha <jyri.sarha@iki.fi>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-33-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/sprd/sprd_dpu.c | 6 ++----
 drivers/gpu/drm/sprd/sprd_drm.c | 5 ++---
 drivers/gpu/drm/sprd/sprd_dsi.c | 6 ++----
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/sprd/sprd_dpu.c b/drivers/gpu/drm/sprd/sprd_dpu.c
index 48183bbd0590d..deb3bb96e2a8a 100644
--- a/drivers/gpu/drm/sprd/sprd_dpu.c
+++ b/drivers/gpu/drm/sprd/sprd_dpu.c
@@ -859,16 +859,14 @@ static int sprd_dpu_probe(struct platform_device *pdev)
 	return component_add(&pdev->dev, &dpu_component_ops);
 }
 
-static int sprd_dpu_remove(struct platform_device *pdev)
+static void sprd_dpu_remove(struct platform_device *pdev)
 {
 	component_del(&pdev->dev, &dpu_component_ops);
-
-	return 0;
 }
 
 struct platform_driver sprd_dpu_driver = {
 	.probe = sprd_dpu_probe,
-	.remove = sprd_dpu_remove,
+	.remove_new = sprd_dpu_remove,
 	.driver = {
 		.name = "sprd-dpu-drv",
 		.of_match_table = dpu_match_table,
diff --git a/drivers/gpu/drm/sprd/sprd_drm.c b/drivers/gpu/drm/sprd/sprd_drm.c
index 0aa39156f2fab..a74cd0caf6450 100644
--- a/drivers/gpu/drm/sprd/sprd_drm.c
+++ b/drivers/gpu/drm/sprd/sprd_drm.c
@@ -138,10 +138,9 @@ static int sprd_drm_probe(struct platform_device *pdev)
 	return drm_of_component_probe(&pdev->dev, component_compare_of, &drm_component_ops);
 }
 
-static int sprd_drm_remove(struct platform_device *pdev)
+static void sprd_drm_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &drm_component_ops);
-	return 0;
 }
 
 static void sprd_drm_shutdown(struct platform_device *pdev)
@@ -164,7 +163,7 @@ MODULE_DEVICE_TABLE(of, drm_match_table);
 
 static struct platform_driver sprd_drm_driver = {
 	.probe = sprd_drm_probe,
-	.remove = sprd_drm_remove,
+	.remove_new = sprd_drm_remove,
 	.shutdown = sprd_drm_shutdown,
 	.driver = {
 		.name = "sprd-drm-drv",
diff --git a/drivers/gpu/drm/sprd/sprd_dsi.c b/drivers/gpu/drm/sprd/sprd_dsi.c
index d7b143a756015..0b69c140eab36 100644
--- a/drivers/gpu/drm/sprd/sprd_dsi.c
+++ b/drivers/gpu/drm/sprd/sprd_dsi.c
@@ -1051,18 +1051,16 @@ static int sprd_dsi_probe(struct platform_device *pdev)
 	return mipi_dsi_host_register(&dsi->host);
 }
 
-static int sprd_dsi_remove(struct platform_device *pdev)
+static void sprd_dsi_remove(struct platform_device *pdev)
 {
 	struct sprd_dsi *dsi = dev_get_drvdata(&pdev->dev);
 
 	mipi_dsi_host_unregister(&dsi->host);
-
-	return 0;
 }
 
 struct platform_driver sprd_dsi_driver = {
 	.probe = sprd_dsi_probe,
-	.remove = sprd_dsi_remove,
+	.remove_new = sprd_dsi_remove,
 	.driver = {
 		.name = "sprd-dsi-drv",
 		.of_match_table = dsi_match_table,
-- 
GitLab


From 38360bf96d816e175bc602c4ee76953cd303b71d Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Tue, 19 Sep 2023 10:12:50 +0300
Subject: [PATCH 0362/2340] drm/tilcdc: Fix irq free on unload

The driver only frees the reserved irq if priv->irq_enabled is set to
true. However, the driver mistakenly sets priv->irq_enabled to false,
instead of true, in tilcdc_irq_install(), and thus the driver never
frees the irq, causing issues on loading the driver a second time.

Fixes: b6366814fa77 ("drm/tilcdc: Convert to Linux IRQ interfaces")
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230919-lcdc-v1-1-ba60da7421e1@ideasonboard.com
---
 drivers/gpu/drm/tilcdc/tilcdc_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 8ebd7134ee21b..2f6eaac7f659b 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -138,7 +138,7 @@ static int tilcdc_irq_install(struct drm_device *dev, unsigned int irq)
 	if (ret)
 		return ret;
 
-	priv->irq_enabled = false;
+	priv->irq_enabled = true;
 
 	return 0;
 }
-- 
GitLab


From a4dea9a06f72c7885f8d4dccedec7e477878d798 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 14 Nov 2023 17:14:06 +0200
Subject: [PATCH 0363/2340] drm/edid/firmware: drop
 drm_kms_helper.edid_firmware backward compat

Since the edid_firmware module parameter was moved from
drm_kms_helper.ko to drm.ko in v4.15, we've had a backwards
compatibility helper in place, with a DRM_NOTE() suggesting to migrate
to drm.edid_firmware. This was added in commit ac6c35a4d8c7 ("drm: add
backwards compatibility support for drm_kms_helper.edid_firmware").

More than five years and 30+ kernel releases later, drop the backward
compatibility.

v2: Drop the warnings too

Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114151406.61230-1-jani.nikula@intel.com
---
 drivers/gpu/drm/drm_edid_load.c         | 16 -------------
 drivers/gpu/drm/drm_kms_helper_common.c | 32 -------------------------
 include/drm/drm_edid.h                  |  5 ----
 3 files changed, 53 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index 5d9ef267ebb3c..60fcb80bce61e 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -23,22 +23,6 @@ module_param_string(edid_firmware, edid_firmware, sizeof(edid_firmware), 0644);
 MODULE_PARM_DESC(edid_firmware, "Do not probe monitor, use specified EDID blob "
 	"from built-in data or /lib/firmware instead. ");
 
-/* Use only for backward compatibility with drm_kms_helper.edid_firmware */
-int __drm_set_edid_firmware_path(const char *path)
-{
-	scnprintf(edid_firmware, sizeof(edid_firmware), "%s", path);
-
-	return 0;
-}
-EXPORT_SYMBOL(__drm_set_edid_firmware_path);
-
-/* Use only for backward compatibility with drm_kms_helper.edid_firmware */
-int __drm_get_edid_firmware_path(char *buf, size_t bufsize)
-{
-	return scnprintf(buf, bufsize, "%s", edid_firmware);
-}
-EXPORT_SYMBOL(__drm_get_edid_firmware_path);
-
 #define GENERIC_EDIDS 6
 static const char * const generic_edid_name[GENERIC_EDIDS] = {
 	"edid/800x600.bin",
diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c
index 0bf0fc1abf54c..0c7550c0462b5 100644
--- a/drivers/gpu/drm/drm_kms_helper_common.c
+++ b/drivers/gpu/drm/drm_kms_helper_common.c
@@ -27,38 +27,6 @@
 
 #include <linux/module.h>
 
-#include <drm/drm_edid.h>
-#include <drm/drm_print.h>
-
-#include "drm_crtc_helper_internal.h"
-
 MODULE_AUTHOR("David Airlie, Jesse Barnes");
 MODULE_DESCRIPTION("DRM KMS helper");
 MODULE_LICENSE("GPL and additional rights");
-
-#if IS_ENABLED(CONFIG_DRM_LOAD_EDID_FIRMWARE)
-
-/* Backward compatibility for drm_kms_helper.edid_firmware */
-static int edid_firmware_set(const char *val, const struct kernel_param *kp)
-{
-	DRM_NOTE("drm_kms_helper.edid_firmware is deprecated, please use drm.edid_firmware instead.\n");
-
-	return __drm_set_edid_firmware_path(val);
-}
-
-static int edid_firmware_get(char *buffer, const struct kernel_param *kp)
-{
-	return __drm_get_edid_firmware_path(buffer, PAGE_SIZE);
-}
-
-static const struct kernel_param_ops edid_firmware_ops = {
-	.set = edid_firmware_set,
-	.get = edid_firmware_get,
-};
-
-module_param_cb(edid_firmware, &edid_firmware_ops, NULL, 0644);
-__MODULE_PARM_TYPE(edid_firmware, "charp");
-MODULE_PARM_DESC(edid_firmware,
-		 "DEPRECATED. Use drm.edid_firmware module parameter instead.");
-
-#endif
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index e98aa68187007..518d1b8106c76 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -329,11 +329,6 @@ int drm_edid_to_speaker_allocation(const struct edid *edid, u8 **sadb);
 int drm_av_sync_delay(struct drm_connector *connector,
 		      const struct drm_display_mode *mode);
 
-#ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE
-int __drm_set_edid_firmware_path(const char *path);
-int __drm_get_edid_firmware_path(char *buf, size_t bufsize);
-#endif
-
 bool drm_edid_are_equal(const struct edid *edid1, const struct edid *edid2);
 
 int
-- 
GitLab


From 191dc43935d1ece82bc6c9653463b3b1cd8198fb Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:31 +0200
Subject: [PATCH 0364/2340] drm/dp_mst: Store the MST PBN divider value in
 fixed point format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On UHBR links the PBN divider is a fractional number, accordingly store
it in fixed point format. For now drm_dp_get_vc_payload_bw() always
returns a whole number and all callers will use only the integer part of
it which should preserve the current behavior. The next patch will fix
drm_dp_get_vc_payload_bw() for UHBR rates returning a fractional number
for those (also accounting for the channel coding efficiency correctly).

Cc: Lyude Paul <lyude@redhat.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Wayne Lin <wayne.lin@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
[Rebased changes in dm_helpers_construct_old_payload() on drm-intel-next]
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-2-imre.deak@intel.com
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  5 +++--
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  4 +++-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |  5 +++--
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 22 +++++++++++++------
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  3 ++-
 drivers/gpu/drm/nouveau/dispnv50/disp.c       |  6 +++--
 include/drm/display/drm_dp_mst_helper.h       |  7 +++---
 7 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index df3c42bb92742..2253e8fd2d184 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -85,6 +85,7 @@
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_blend.h>
+#include <drm/drm_fixed.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_vblank.h>
@@ -6863,8 +6864,8 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 	if (IS_ERR(mst_state))
 		return PTR_ERR(mst_state);
 
-	if (!mst_state->pbn_div)
-		mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link);
+	if (!mst_state->pbn_div.full)
+		mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
 
 	if (!state->duplicated) {
 		int max_bpc = conn_state->max_requested_bpc;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index baf7e5254fb3e..53e323b71d263 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -31,6 +31,7 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_fixed.h>
 
 #include "dm_services.h"
 #include "amdgpu.h"
@@ -205,13 +206,14 @@ void dm_helpers_dp_update_branch_info(
 
 static void dm_helpers_construct_old_payload(
 			struct dc_link *link,
-			int pbn_per_slot,
+			fixed20_12 pbn_per_slot_fp,
 			struct drm_dp_mst_atomic_payload *new_payload,
 			struct drm_dp_mst_atomic_payload *old_payload)
 {
 	struct link_mst_stream_allocation_table current_link_table =
 									link->mst_stream_alloc_table;
 	struct link_mst_stream_allocation *dc_alloc;
+	int pbn_per_slot = dfixed_trunc(pbn_per_slot_fp);
 	int i;
 
 	*old_payload = *new_payload;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 2afd1bc74978d..d80f19e442bfb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -27,6 +27,7 @@
 #include <drm/display/drm_dp_mst_helper.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fixed.h>
 #include "dm_services.h"
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
@@ -935,10 +936,10 @@ static int increase_dsc_bpp(struct drm_atomic_state *state,
 		link_timeslots_used = 0;
 
 		for (i = 0; i < count; i++)
-			link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, mst_state->pbn_div);
+			link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, dfixed_trunc(mst_state->pbn_div));
 
 		fair_pbn_alloc =
-			(63 - link_timeslots_used) / remaining_to_increase * mst_state->pbn_div;
+			(63 - link_timeslots_used) / remaining_to_increase * dfixed_trunc(mst_state->pbn_div);
 
 		if (initial_slack[next_index] > fair_pbn_alloc) {
 			vars[next_index].pbn += fair_pbn_alloc;
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 0854fe428f173..7efbd8ef664f0 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -43,6 +43,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_fixed.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
@@ -3578,16 +3579,22 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
  * value is in units of PBNs/(timeslots/1 MTP). This value can be used to
  * convert the number of PBNs required for a given stream to the number of
  * timeslots this stream requires in each MTP.
+ *
+ * Returns the BW / timeslot value in 20.12 fixed point format.
  */
-int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr,
-			     int link_rate, int link_lane_count)
+fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr,
+				    int link_rate, int link_lane_count)
 {
+	fixed20_12 ret;
+
 	if (link_rate == 0 || link_lane_count == 0)
 		drm_dbg_kms(mgr->dev, "invalid link rate/lane count: (%d / %d)\n",
 			    link_rate, link_lane_count);
 
 	/* See DP v2.0 2.6.4.2, VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */
-	return link_rate * link_lane_count / 54000;
+	ret.full = dfixed_const(link_rate * link_lane_count / 54000);
+
+	return ret;
 }
 EXPORT_SYMBOL(drm_dp_get_vc_payload_bw);
 
@@ -4335,7 +4342,7 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state,
 		}
 	}
 
-	req_slots = DIV_ROUND_UP(pbn, topology_state->pbn_div);
+	req_slots = DIV_ROUND_UP(pbn, dfixed_trunc(topology_state->pbn_div));
 
 	drm_dbg_atomic(mgr->dev, "[CONNECTOR:%d:%s] [MST PORT:%p] TU %d -> %d\n",
 		       port->connector->base.id, port->connector->name,
@@ -4872,7 +4879,8 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
 	state = to_drm_dp_mst_topology_state(mgr->base.state);
 	seq_printf(m, "\n*** Atomic state info ***\n");
 	seq_printf(m, "payload_mask: %x, max_payloads: %d, start_slot: %u, pbn_div: %d\n",
-		   state->payload_mask, mgr->max_payloads, state->start_slot, state->pbn_div);
+		   state->payload_mask, mgr->max_payloads, state->start_slot,
+		   dfixed_trunc(state->pbn_div));
 
 	seq_printf(m, "\n| idx | port | vcpi | slots | pbn | dsc | status |     sink name     |\n");
 	for (i = 0; i < mgr->max_payloads; i++) {
@@ -5330,10 +5338,10 @@ drm_dp_mst_atomic_check_payload_alloc_limits(struct drm_dp_mst_topology_mgr *mgr
 	}
 
 	if (!payload_count)
-		mst_state->pbn_div = 0;
+		mst_state->pbn_div.full = dfixed_const(0);
 
 	drm_dbg_atomic(mgr->dev, "[MST MGR:%p] mst state %p TU pbn_div=%d avail=%d used=%d\n",
-		       mgr, mst_state, mst_state->pbn_div, avail_slots,
+		       mgr, mst_state, dfixed_trunc(mst_state->pbn_div), avail_slots,
 		       mst_state->total_avail_slots - avail_slots);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 0cb9405f59eaa..e5d6b811c22ef 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -26,6 +26,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_fixed.h>
 #include <drm/drm_probe_helper.h>
 
 #include "i915_drv.h"
@@ -202,7 +203,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		 */
 		drm_WARN_ON(&i915->drm, remote_m_n.tu < crtc_state->dp_m_n.tu);
 		crtc_state->dp_m_n.tu = remote_m_n.tu;
-		crtc_state->pbn = remote_m_n.tu * mst_state->pbn_div;
+		crtc_state->pbn = remote_m_n.tu * dfixed_trunc(mst_state->pbn_div);
 
 		slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr,
 						      connector->port,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 3d390f5029f65..95fb097e5453b 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -39,6 +39,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fixed.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -945,7 +946,8 @@ nv50_msto_prepare(struct drm_atomic_state *state,
 	if (ret == 0) {
 		nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index,
 				      payload->vc_start_slot, payload->time_slots,
-				      payload->pbn, payload->time_slots * mst_state->pbn_div);
+				      payload->pbn,
+				      payload->time_slots * dfixed_trunc(mst_state->pbn_div));
 	} else {
 		nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
 	}
@@ -989,7 +991,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
 	if (IS_ERR(mst_state))
 		return PTR_ERR(mst_state);
 
-	if (!mst_state->pbn_div) {
+	if (!mst_state->pbn_div.full) {
 		struct nouveau_encoder *outp = mstc->mstm->outp;
 
 		mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr,
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index a4aad6df71f18..9b19d8bd520af 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -25,6 +25,7 @@
 #include <linux/types.h>
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_fixed.h>
 
 #if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
 #include <linux/stackdepot.h>
@@ -617,7 +618,7 @@ struct drm_dp_mst_topology_state {
 	 * @pbn_div: The current PBN divisor for this topology. The driver is expected to fill this
 	 * out itself.
 	 */
-	int pbn_div;
+	fixed20_12 pbn_div;
 };
 
 #define to_dp_mst_topology_mgr(x) container_of(x, struct drm_dp_mst_topology_mgr, base)
@@ -839,8 +840,8 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector,
 				 struct drm_dp_mst_topology_mgr *mgr,
 				 struct drm_dp_mst_port *port);
 
-int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr,
-			     int link_rate, int link_lane_count);
+fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr,
+				    int link_rate, int link_lane_count);
 
 int drm_dp_calc_pbn_mode(int clock, int bpp);
 
-- 
GitLab


From d389989ed530b3d8944974b7ee866b089720bc9c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 17 Nov 2023 17:09:27 +0200
Subject: [PATCH 0365/2340] drm/dp_mst: Fix PBN divider calculation for UHBR
 rates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current way of calculating the pbn_div value, the link BW per each
MTP slot, worked only for DP 1.4 link rates. Fix things up for UHBR
rates calculating with the correct channel coding efficiency based on
the link rate.

v2:
- Return the fractional pbn_div value from drm_dp_get_vc_payload_bw().
v3:
- Fix rounding up quotient while calculating req_slots. (Ville)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117150929.1767227-1-imre.deak@intel.com
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 10 +++++++---
 include/drm/display/drm_dp_helper.h           | 13 +++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 7efbd8ef664f0..ab39c7ad9b899 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3585,14 +3585,18 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
 fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr,
 				    int link_rate, int link_lane_count)
 {
+	int ch_coding_efficiency =
+		drm_dp_bw_channel_coding_efficiency(drm_dp_is_uhbr_rate(link_rate));
 	fixed20_12 ret;
 
 	if (link_rate == 0 || link_lane_count == 0)
 		drm_dbg_kms(mgr->dev, "invalid link rate/lane count: (%d / %d)\n",
 			    link_rate, link_lane_count);
 
-	/* See DP v2.0 2.6.4.2, VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */
-	ret.full = dfixed_const(link_rate * link_lane_count / 54000);
+	/* See DP v2.0 2.6.4.2, 2.7.6.3 VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */
+	ret.full = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_rate * link_lane_count,
+						  ch_coding_efficiency),
+				      (1000000ULL * 8 * 5400) >> 12);
 
 	return ret;
 }
@@ -4342,7 +4346,7 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state,
 		}
 	}
 
-	req_slots = DIV_ROUND_UP(pbn, dfixed_trunc(topology_state->pbn_div));
+	req_slots = DIV_ROUND_UP(dfixed_const(pbn), topology_state->pbn_div.full);
 
 	drm_dbg_atomic(mgr->dev, "[CONNECTOR:%d:%s] [MST PORT:%p] TU %d -> %d\n",
 		       port->connector->base.id, port->connector->name,
diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index 194715083399e..b88cc53425e0b 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -252,6 +252,19 @@ drm_edp_backlight_supported(const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE])
 	return !!(edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP);
 }
 
+/**
+ * drm_dp_is_uhbr_rate - Determine if a link rate is UHBR
+ * @link_rate: link rate in 10kbits/s units
+ *
+ * Determine if the provided link rate is an UHBR rate.
+ *
+ * Returns: %True if @link_rate is an UHBR rate.
+ */
+static inline bool drm_dp_is_uhbr_rate(int link_rate)
+{
+	return link_rate >= 1000000;
+}
+
 /*
  * DisplayPort AUX channel
  */
-- 
GitLab


From 94c80946ee27c9c56eb4ba3e6c024ba13ad06b9e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 20 Nov 2023 14:52:56 +0200
Subject: [PATCH 0366/2340] drm/dp_mst: Add kunit tests for
 drm_dp_get_vc_payload_bw()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add kunit test cases for drm_dp_get_vc_payload_bw() with all the DP1.4
and UHBR link configurations.

v2:
- List test cases in decreasing rate,lane count order matching the
  corresponding DP Standard tables. (Ville)
- Add references to the DP Standard tables.
v3:
- Sort the testcases properly.
v4:
- Avoid 'stack frame size x exceeds limit y in
  drm_test_dp_mst_calc_pbn_div()' compiler warn. (LKP)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: dri-devel@lists.freedesktop.org
Cc: kernel test robot <lkp@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> (v3)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120125256.2433782-1-imre.deak@intel.com
---
 .../gpu/drm/tests/drm_dp_mst_helper_test.c    | 160 ++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c
index e3c818dfc0e6d..d916e548fcb12 100644
--- a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c
+++ b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c
@@ -68,6 +68,152 @@ static void dp_mst_calc_pbn_mode_desc(const struct drm_dp_mst_calc_pbn_mode_test
 KUNIT_ARRAY_PARAM(drm_dp_mst_calc_pbn_mode, drm_dp_mst_calc_pbn_mode_cases,
 		  dp_mst_calc_pbn_mode_desc);
 
+struct drm_dp_mst_calc_pbn_div_test {
+	int link_rate;
+	int lane_count;
+	fixed20_12 expected;
+};
+
+#define fp_init(__int, __frac) { \
+	.full = (__int) * (1 << 12) + \
+		(__frac) * (1 << 12) / 100000 \
+}
+
+static const struct drm_dp_mst_calc_pbn_div_test drm_dp_mst_calc_pbn_div_dp1_4_cases[] = {
+	/*
+	 * UHBR rates (DP Standard v2.1 2.7.6.3, specifying the rounded to
+	 *             closest value to 2 decimal places):
+	 * .expected = .link_rate * .lane_count * 0.9671 / 8 / 54 / 100
+	 * DP1.4 rates (DP Standard v2.1 2.6.4.2):
+	 * .expected = .link_rate * .lane_count * 0.8000 / 8 / 54 / 100
+	 *
+	 * truncated to 5 decimal places.
+	 */
+	{
+		.link_rate = 2000000,
+		.lane_count = 4,
+		.expected = fp_init(179,  9259),  /* 179.09259 */
+	},
+	{
+		.link_rate = 2000000,
+		.lane_count = 2,
+		.expected = fp_init(89, 54629),
+	},
+	{
+		.link_rate = 2000000,
+		.lane_count = 1,
+		.expected = fp_init(44, 77314),
+	},
+	{
+		.link_rate = 1350000,
+		.lane_count = 4,
+		.expected = fp_init(120, 88750),
+	},
+	{
+		.link_rate = 1350000,
+		.lane_count = 2,
+		.expected = fp_init(60, 44375),
+	},
+	{
+		.link_rate = 1350000,
+		.lane_count = 1,
+		.expected = fp_init(30, 22187),
+	},
+	{
+		.link_rate = 1000000,
+		.lane_count = 4,
+		.expected = fp_init(89, 54629),
+	},
+	{
+		.link_rate = 1000000,
+		.lane_count = 2,
+		.expected = fp_init(44, 77314),
+	},
+	{
+		.link_rate = 1000000,
+		.lane_count = 1,
+		.expected = fp_init(22, 38657),
+	},
+	{
+		.link_rate = 810000,
+		.lane_count = 4,
+		.expected = fp_init(60, 0),
+	},
+	{
+		.link_rate = 810000,
+		.lane_count = 2,
+		.expected = fp_init(30, 0),
+	},
+	{
+		.link_rate = 810000,
+		.lane_count = 1,
+		.expected = fp_init(15, 0),
+	},
+	{
+		.link_rate = 540000,
+		.lane_count = 4,
+		.expected = fp_init(40, 0),
+	},
+	{
+		.link_rate = 540000,
+		.lane_count = 2,
+		.expected = fp_init(20, 0),
+	},
+	{
+		.link_rate = 540000,
+		.lane_count = 1,
+		.expected = fp_init(10, 0),
+	},
+	{
+		.link_rate = 270000,
+		.lane_count = 4,
+		.expected = fp_init(20, 0),
+	},
+	{
+		.link_rate = 270000,
+		.lane_count = 2,
+		.expected = fp_init(10, 0),
+	},
+	{
+		.link_rate = 270000,
+		.lane_count = 1,
+		.expected = fp_init(5, 0),
+	},
+	{
+		.link_rate = 162000,
+		.lane_count = 4,
+		.expected = fp_init(12, 0),
+	},
+	{
+		.link_rate = 162000,
+		.lane_count = 2,
+		.expected = fp_init(6, 0),
+	},
+	{
+		.link_rate = 162000,
+		.lane_count = 1,
+		.expected = fp_init(3, 0),
+	},
+};
+
+static void drm_test_dp_mst_calc_pbn_div(struct kunit *test)
+{
+	const struct drm_dp_mst_calc_pbn_div_test *params = test->param_value;
+	/* mgr->dev is only needed by drm_dbg_kms(), but it's not called for the test cases. */
+	struct drm_dp_mst_topology_mgr *mgr = test->priv;
+
+	KUNIT_EXPECT_EQ(test, drm_dp_get_vc_payload_bw(mgr, params->link_rate, params->lane_count).full,
+			params->expected.full);
+}
+
+static void dp_mst_calc_pbn_div_desc(const struct drm_dp_mst_calc_pbn_div_test *t, char *desc)
+{
+	sprintf(desc, "Link rate %d lane count %d", t->link_rate, t->lane_count);
+}
+
+KUNIT_ARRAY_PARAM(drm_dp_mst_calc_pbn_div, drm_dp_mst_calc_pbn_div_dp1_4_cases,
+		  dp_mst_calc_pbn_div_desc);
+
 static u8 data[] = { 0xff, 0x00, 0xdd };
 
 struct drm_dp_mst_sideband_msg_req_test {
@@ -416,13 +562,27 @@ KUNIT_ARRAY_PARAM(drm_dp_mst_sideband_msg_req, drm_dp_mst_sideband_msg_req_cases
 
 static struct kunit_case drm_dp_mst_helper_tests[] = {
 	KUNIT_CASE_PARAM(drm_test_dp_mst_calc_pbn_mode, drm_dp_mst_calc_pbn_mode_gen_params),
+	KUNIT_CASE_PARAM(drm_test_dp_mst_calc_pbn_div, drm_dp_mst_calc_pbn_div_gen_params),
 	KUNIT_CASE_PARAM(drm_test_dp_mst_sideband_msg_req_decode,
 			 drm_dp_mst_sideband_msg_req_gen_params),
 	{ }
 };
 
+static int drm_dp_mst_helper_tests_init(struct kunit *test)
+{
+	struct drm_dp_mst_topology_mgr *mgr;
+
+	mgr = kunit_kzalloc(test, sizeof(*mgr), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, mgr);
+
+	test->priv = mgr;
+
+	return 0;
+}
+
 static struct kunit_suite drm_dp_mst_helper_test_suite = {
 	.name = "drm_dp_mst_helper",
+	.init = drm_dp_mst_helper_tests_init,
 	.test_cases = drm_dp_mst_helper_tests,
 };
 
-- 
GitLab


From 3c460872d2a3e6915a475e6c04cb30fcb2b87115 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:34 +0200
Subject: [PATCH 0367/2340] drm/i915/dp: Replace intel_dp_is_uhbr_rate() with
 drm_dp_is_uhbr_rate()

Replace intel_dp_is_uhbr_rate() with the recently added
drm_dp_is_uhbr_rate().

Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-5-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 2 +-
 drivers/gpu/drm/i915/display/intel_dp.c      | 7 +------
 drivers/gpu/drm/i915/display/intel_dp.h      | 1 -
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 959db3f61e844..625b1ba93229d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2393,7 +2393,7 @@ add_bw_alloc_overhead(int link_clock, int bw_overhead,
 		      int pixel_data_rate, int link_data_rate,
 		      u32 *data_m, u32 *data_n)
 {
-	bool is_uhbr = intel_dp_is_uhbr_rate(link_clock);
+	bool is_uhbr = drm_dp_is_uhbr_rate(link_clock);
 	int ch_coding_efficiency =
 		drm_dp_bw_channel_coding_efficiency(is_uhbr);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 3867a0d9e0043..fb15579127918 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -121,15 +121,10 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp)
 
 static void intel_dp_unset_edid(struct intel_dp *intel_dp);
 
-bool intel_dp_is_uhbr_rate(int rate)
-{
-	return rate >= 1000000;
-}
-
 /* Is link rate UHBR and thus 128b/132b? */
 bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state)
 {
-	return intel_dp_is_uhbr_rate(crtc_state->port_clock);
+	return drm_dp_is_uhbr_rate(crtc_state->port_clock);
 }
 
 static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index e80da67554196..664fa93bbf534 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -80,7 +80,6 @@ void intel_dp_audio_compute_config(struct intel_encoder *encoder,
 				   struct drm_connector_state *conn_state);
 bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp);
 bool intel_dp_is_edp(struct intel_dp *intel_dp);
-bool intel_dp_is_uhbr_rate(int rate);
 bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state);
 bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
 enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port,
-- 
GitLab


From c7ae0978f71222641059c20b2b025de0d8e989c7 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:35 +0200
Subject: [PATCH 0368/2340] drm/i915/dp: Account for channel coding efficiency
 on UHBR links

Apply the correct BW allocation overhead and channel coding efficiency
on UHBR link rates, similarly to DP1.4 link rates.

Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-6-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 625b1ba93229d..dd8ac647fe0ea 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2397,16 +2397,6 @@ add_bw_alloc_overhead(int link_clock, int bw_overhead,
 	int ch_coding_efficiency =
 		drm_dp_bw_channel_coding_efficiency(is_uhbr);
 
-	/*
-	 * TODO: adjust for actual UHBR channel coding efficiency and BW
-	 * overhead.
-	 */
-	if (is_uhbr) {
-		*data_m = pixel_data_rate;
-		*data_n = link_data_rate * 8 / 10;
-		return;
-	}
-
 	*data_m = DIV_ROUND_UP_ULL(mul_u32_u32(pixel_data_rate, bw_overhead),
 				   1000000);
 	*data_n = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_data_rate, ch_coding_efficiency),
-- 
GitLab


From 7e17537719107e7b3b942d76919d020f8c779271 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:36 +0200
Subject: [PATCH 0369/2340] drm/i915/dp: Fix UHBR link M/N values

The link M/N ratio is the data rate / link symbol clock rate, fix things
up accordingly. On DP 1.4 this ratio was correct as the link symbol clock
rate in that case matched the link data rate (in bytes/sec units, the
symbol size being 8 bits), however it wasn't correct for UHBR rates
where the symbol size is 32 bits.

Kudos to Arun noticing in Bspec the incorrect use of link data rate in
the ratio's N value.

Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-7-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 16 ++++++++-----
 drivers/gpu/drm/i915/display/intel_dp.c      | 24 ++++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_dp.h      |  2 ++
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index dd8ac647fe0ea..c6a2e217a7dc7 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2410,6 +2410,7 @@ intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes,
 		       struct intel_link_m_n *m_n)
 {
 	u32 data_clock = DIV_ROUND_UP(bits_per_pixel_x16 * pixel_clock, 16);
+	u32 link_symbol_clock = intel_dp_link_symbol_clock(link_clock);
 	u32 data_m;
 	u32 data_n;
 
@@ -2430,7 +2431,7 @@ intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes,
 		    0x8000000);
 
 	compute_m_n(&m_n->link_m, &m_n->link_n,
-		    pixel_clock, link_clock,
+		    pixel_clock, link_symbol_clock,
 		    0x80000);
 }
 
@@ -3772,20 +3773,23 @@ int intel_dotclock_calculate(int link_freq,
 			     const struct intel_link_m_n *m_n)
 {
 	/*
-	 * The calculation for the data clock is:
+	 * The calculation for the data clock -> pixel clock is:
 	 * pixel_clock = ((m/n)*(link_clock * nr_lanes))/bpp
 	 * But we want to avoid losing precison if possible, so:
 	 * pixel_clock = ((m * link_clock * nr_lanes)/(n*bpp))
 	 *
-	 * and the link clock is simpler:
-	 * link_clock = (m * link_clock) / n
+	 * and for link freq (10kbs units) -> pixel clock it is:
+	 * link_symbol_clock = link_freq * 10 / link_symbol_size
+	 * pixel_clock = (m * link_symbol_clock) / n
+	 *    or for more precision:
+	 * pixel_clock = (m * link_freq * 10) / (n * link_symbol_size)
 	 */
 
 	if (!m_n->link_n)
 		return 0;
 
-	return DIV_ROUND_UP_ULL(mul_u32_u32(m_n->link_m, link_freq),
-				m_n->link_n);
+	return DIV_ROUND_UP_ULL(mul_u32_u32(m_n->link_m, link_freq * 10),
+				m_n->link_n * intel_dp_link_symbol_size(link_freq));
 }
 
 int intel_crtc_dotclock(const struct intel_crtc_state *pipe_config)
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index fb15579127918..d58acc36e3be3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -127,6 +127,30 @@ bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state)
 	return drm_dp_is_uhbr_rate(crtc_state->port_clock);
 }
 
+/**
+ * intel_dp_link_symbol_size - get the link symbol size for a given link rate
+ * @rate: link rate in 10kbit/s units
+ *
+ * Returns the link symbol size in bits/symbol units depending on the link
+ * rate -> channel coding.
+ */
+int intel_dp_link_symbol_size(int rate)
+{
+	return drm_dp_is_uhbr_rate(rate) ? 32 : 10;
+}
+
+/**
+ * intel_dp_link_symbol_clock - convert link rate to link symbol clock
+ * @rate: link rate in 10kbit/s units
+ *
+ * Returns the link symbol clock frequency in kHz units depending on the
+ * link rate and channel coding.
+ */
+int intel_dp_link_symbol_clock(int rate)
+{
+	return DIV_ROUND_CLOSEST(rate * 10, intel_dp_link_symbol_size(rate));
+}
+
 static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp)
 {
 	intel_dp->sink_rates[0] = 162000;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 664fa93bbf534..777aa858f899c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -81,6 +81,8 @@ void intel_dp_audio_compute_config(struct intel_encoder *encoder,
 bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp);
 bool intel_dp_is_edp(struct intel_dp *intel_dp);
 bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state);
+int intel_dp_link_symbol_size(int rate);
+int intel_dp_link_symbol_clock(int rate);
 bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
 enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port,
 				  bool long_hpd);
-- 
GitLab


From e86fb4dcfb3c4e9da8855312ada0f22629423b00 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:37 +0200
Subject: [PATCH 0370/2340] drm/i915/dp_mst: Calculate the BW overhead in
 intel_dp_mst_find_vcpi_slots_for_bpp()

The next patch will calculate the PBN value directly from the pixel data
rate and the BW allocation overhead, not requiring the data, link M/N
and TU values for this. To prepare for that move the calculation of BW
overheads from intel_dp_mst_compute_m_n() to
intel_dp_mst_find_vcpi_slots_for_bpp().

While at it store link_bpp in a .4 fixed point format.

Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-8-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 27 ++++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index e5d6b811c22ef..a5ddd1781969d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -108,15 +108,12 @@ static int intel_dp_mst_bw_overhead(const struct intel_crtc_state *crtc_state,
 
 static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state,
 				     const struct intel_connector *connector,
-				     bool ssc, bool dsc,
+				     int overhead,
 				     int bpp_x16,
 				     struct intel_link_m_n *m_n)
 {
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
-	int overhead = intel_dp_mst_bw_overhead(crtc_state,
-						connector,
-						ssc, dsc, bpp_x16);
 
 	intel_link_compute_m_n(bpp_x16, crtc_state->lane_count,
 			       adjusted_mode->crtc_clock,
@@ -171,7 +168,9 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 
 	for (bpp = max_bpp; bpp >= min_bpp; bpp -= step) {
 		struct intel_link_m_n remote_m_n;
-		int link_bpp;
+		int local_bw_overhead;
+		int remote_bw_overhead;
+		int link_bpp_x16;
 
 		drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp);
 
@@ -179,12 +178,22 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		if (ret)
 			continue;
 
-		link_bpp = dsc ? bpp :
-			intel_dp_output_bpp(crtc_state->output_format, bpp);
+		link_bpp_x16 = to_bpp_x16(dsc ? bpp :
+					  intel_dp_output_bpp(crtc_state->output_format, bpp));
 
-		intel_dp_mst_compute_m_n(crtc_state, connector, false, dsc, to_bpp_x16(link_bpp),
+		local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector,
+							     false, dsc, link_bpp_x16);
+		remote_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector,
+							      true, dsc, link_bpp_x16);
+
+		intel_dp_mst_compute_m_n(crtc_state, connector,
+					 local_bw_overhead,
+					 link_bpp_x16,
 					 &crtc_state->dp_m_n);
-		intel_dp_mst_compute_m_n(crtc_state, connector, true, dsc, to_bpp_x16(link_bpp),
+
+		intel_dp_mst_compute_m_n(crtc_state, connector,
+					 remote_bw_overhead,
+					 link_bpp_x16,
 					 &remote_m_n);
 
 		/*
-- 
GitLab


From 9069b77545ca5afc222effa994c65a64ac5e6462 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 17 Nov 2023 17:09:29 +0200
Subject: [PATCH 0371/2340] drm/i915/dp_mst: Fix PBN / MTP_TU size calculation
 for UHBR rates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Atm the allocated MST PBN value is calculated from the TU size (number
of allocated MTP slots) as

  PBN = TU * pbn_div

pbn_div being the link BW for each MTP slot. For DP 1.4 link rates this
worked, as pbn_div there is guraranteed to be an integer number, however
on UHBR this isn't the case. To get a PBN, TU pair where TU is a
properly rounded-up value covering all the BW corresponding to PBN,
calculate first PBN and from PBN the TU value.

Calculate PBN directly from the effective pixel data rate, instead of
calculating it indirectly from the corresponding TU and pbn_div values
(which are in turn derived from the pixel data rate and BW overhead).

Add a helper function to calculate the effective data rate, also adding
a note that callers of intel_dp_link_required() may also need to check
the effective data rate (vs. the data rate w/o the BW overhead).

While at it add a note to check if WA#14013163432 is applicable.

v2:
- Fix PBN calculation, deriving it from the effective data rate directly
  instead of using the indirect TU and pbn_div values for this.
- Add a note about WA#14013163432. (Arun)
v3:
- Fix rounding up quotient while calculating remote_tu. (Ville)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com> (v1)
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117150929.1767227-3-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c     | 19 ++++++++++++
 drivers/gpu/drm/i915/display/intel_dp.h     |  2 ++
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 33 ++++++++++++++-------
 3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index d58acc36e3be3..e7502a8240470 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -355,6 +355,9 @@ int intel_dp_max_lane_count(struct intel_dp *intel_dp)
 /*
  * The required data bandwidth for a mode with given pixel clock and bpp. This
  * is the required net bandwidth independent of the data bandwidth efficiency.
+ *
+ * TODO: check if callers of this functions should use
+ * intel_dp_effective_data_rate() instead.
  */
 int
 intel_dp_link_required(int pixel_clock, int bpp)
@@ -363,6 +366,22 @@ intel_dp_link_required(int pixel_clock, int bpp)
 	return DIV_ROUND_UP(pixel_clock * bpp, 8);
 }
 
+/**
+ * intel_dp_effective_data_rate - Return the pixel data rate accounting for BW allocation overhead
+ * @pixel_clock: pixel clock in kHz
+ * @bpp_x16: bits per pixel .4 fixed point format
+ * @bw_overhead: BW allocation overhead in 1ppm units
+ *
+ * Return the effective pixel data rate in kB/sec units taking into account
+ * the provided SSC, FEC, DSC BW allocation overhead.
+ */
+int intel_dp_effective_data_rate(int pixel_clock, int bpp_x16,
+				 int bw_overhead)
+{
+	return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_clock * bpp_x16, bw_overhead),
+				1000000 * 16 * 8);
+}
+
 /*
  * Given a link rate and lanes, get the data bandwidth.
  *
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 777aa858f899c..05db46b111f21 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -103,6 +103,8 @@ bool intel_dp_source_supports_tps4(struct drm_i915_private *i915);
 
 bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
+int intel_dp_effective_data_rate(int pixel_clock, int bpp_x16,
+				 int bw_overhead);
 int intel_dp_max_data_rate(int max_link_rate, int max_lanes);
 bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp);
 bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index a5ddd1781969d..d3d53e1b44891 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -115,6 +115,7 @@ static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state,
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
 
+	/* TODO: Check WA 14013163432 to set data M/N for full BW utilization. */
 	intel_link_compute_m_n(bpp_x16, crtc_state->lane_count,
 			       adjusted_mode->crtc_clock,
 			       crtc_state->port_clock,
@@ -124,6 +125,18 @@ static void intel_dp_mst_compute_m_n(const struct intel_crtc_state *crtc_state,
 	m_n->tu = DIV_ROUND_UP_ULL(mul_u32_u32(m_n->data_m, 64), m_n->data_n);
 }
 
+static int intel_dp_mst_calc_pbn(int pixel_clock, int bpp_x16, int bw_overhead)
+{
+	int effective_data_rate =
+		intel_dp_effective_data_rate(pixel_clock, bpp_x16, bw_overhead);
+
+	/*
+	 * TODO: Use drm_dp_calc_pbn_mode() instead, once it's converted
+	 * to calculate PBN with the BW overhead passed to it.
+	 */
+	return DIV_ROUND_UP(effective_data_rate * 64, 54 * 1000);
+}
+
 static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 						struct intel_crtc_state *crtc_state,
 						int max_bpp,
@@ -167,10 +180,10 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		    min_bpp, max_bpp);
 
 	for (bpp = max_bpp; bpp >= min_bpp; bpp -= step) {
-		struct intel_link_m_n remote_m_n;
 		int local_bw_overhead;
 		int remote_bw_overhead;
 		int link_bpp_x16;
+		int remote_tu;
 
 		drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp);
 
@@ -191,11 +204,6 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 					 link_bpp_x16,
 					 &crtc_state->dp_m_n);
 
-		intel_dp_mst_compute_m_n(crtc_state, connector,
-					 remote_bw_overhead,
-					 link_bpp_x16,
-					 &remote_m_n);
-
 		/*
 		 * The TU size programmed to the HW determines which slots in
 		 * an MTP frame are used for this stream, which needs to match
@@ -210,9 +218,14 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 		 * crtc_state->dp_m_n.tu), provided that the driver doesn't
 		 * enable SSC on the corresponding link.
 		 */
-		drm_WARN_ON(&i915->drm, remote_m_n.tu < crtc_state->dp_m_n.tu);
-		crtc_state->dp_m_n.tu = remote_m_n.tu;
-		crtc_state->pbn = remote_m_n.tu * dfixed_trunc(mst_state->pbn_div);
+		crtc_state->pbn = intel_dp_mst_calc_pbn(adjusted_mode->crtc_clock,
+							link_bpp_x16,
+							remote_bw_overhead);
+
+		remote_tu = DIV_ROUND_UP(dfixed_const(crtc_state->pbn), mst_state->pbn_div.full);
+
+		drm_WARN_ON(&i915->drm, remote_tu < crtc_state->dp_m_n.tu);
+		crtc_state->dp_m_n.tu = remote_tu;
 
 		slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr,
 						      connector->port,
@@ -221,7 +234,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder,
 			return slots;
 
 		if (slots >= 0) {
-			drm_WARN_ON(&i915->drm, slots != remote_m_n.tu);
+			drm_WARN_ON(&i915->drm, slots != crtc_state->dp_m_n.tu);
 
 			break;
 		}
-- 
GitLab


From 5ee4badb4b195bd871ba6d5a2d43aac03587230a Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:39 +0200
Subject: [PATCH 0372/2340] drm/i915/dp: Report a rounded-down value as the
 maximum data rate

Callers of intel_dp_max_data_rate() use the return value as an upper
bound for the BW a given mode requires. As such the rounding shouldn't
result in a bigger value than the actual upper bound. Use round-down
instead of -closest accordingly.

Cc: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-10-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index e7502a8240470..7fd883b954a6f 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -413,7 +413,7 @@ intel_dp_max_data_rate(int max_link_rate, int max_lanes)
 		 */
 		int max_link_rate_kbps = max_link_rate * 10;
 
-		max_link_rate_kbps = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(max_link_rate_kbps, 9671), 10000);
+		max_link_rate_kbps = DIV_ROUND_DOWN_ULL(mul_u32_u32(max_link_rate_kbps, 9671), 10000);
 		max_link_rate = max_link_rate_kbps / 8;
 	}
 
@@ -423,7 +423,7 @@ intel_dp_max_data_rate(int max_link_rate, int max_lanes)
 	 * out to be a nop by coincidence, and can be skipped:
 	 *
 	 *	int max_link_rate_kbps = max_link_rate * 10;
-	 *	max_link_rate_kbps = DIV_ROUND_CLOSEST_ULL(max_link_rate_kbps * 8, 10);
+	 *	max_link_rate_kbps = DIV_ROUND_DOWN_ULL(max_link_rate_kbps * 8, 10);
 	 *	max_link_rate = max_link_rate_kbps / 8;
 	 */
 
-- 
GitLab


From b9de01d85a62ddc4fce8f28eeba64b5682431158 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:40 +0200
Subject: [PATCH 0373/2340] drm/i915/dp: Simplify intel_dp_max_data_rate()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify intel_dp_max_data_rate() using
drm_dp_bw_channel_coding_efficiency() to calculate the max data rate for
both DP1.4 and UHBR link rates. This trades a redundant multiply/divide
for readability.

Cc: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-11-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_dp.c | 26 ++++++++++++-------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 7fd883b954a6f..6d5a794b6b717 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -405,29 +405,27 @@ int intel_dp_effective_data_rate(int pixel_clock, int bpp_x16,
 int
 intel_dp_max_data_rate(int max_link_rate, int max_lanes)
 {
-	if (max_link_rate >= 1000000) {
-		/*
-		 * UHBR rates always use 128b/132b channel encoding, and have
-		 * 97.71% data bandwidth efficiency. Consider max_link_rate the
-		 * link bit rate in units of 10000 bps.
-		 */
-		int max_link_rate_kbps = max_link_rate * 10;
-
-		max_link_rate_kbps = DIV_ROUND_DOWN_ULL(mul_u32_u32(max_link_rate_kbps, 9671), 10000);
-		max_link_rate = max_link_rate_kbps / 8;
-	}
+	int ch_coding_efficiency =
+		drm_dp_bw_channel_coding_efficiency(drm_dp_is_uhbr_rate(max_link_rate));
+	int max_link_rate_kbps = max_link_rate * 10;
 
+	/*
+	 * UHBR rates always use 128b/132b channel encoding, and have
+	 * 97.71% data bandwidth efficiency. Consider max_link_rate the
+	 * link bit rate in units of 10000 bps.
+	 */
 	/*
 	 * Lower than UHBR rates always use 8b/10b channel encoding, and have
 	 * 80% data bandwidth efficiency for SST non-FEC. However, this turns
-	 * out to be a nop by coincidence, and can be skipped:
+	 * out to be a nop by coincidence:
 	 *
 	 *	int max_link_rate_kbps = max_link_rate * 10;
 	 *	max_link_rate_kbps = DIV_ROUND_DOWN_ULL(max_link_rate_kbps * 8, 10);
 	 *	max_link_rate = max_link_rate_kbps / 8;
 	 */
-
-	return max_link_rate * max_lanes;
+	return DIV_ROUND_DOWN_ULL(mul_u32_u32(max_link_rate_kbps * max_lanes,
+					      ch_coding_efficiency),
+				  1000000 * 8);
 }
 
 bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp)
-- 
GitLab


From 297c76d94c8911b5d7b58afc51cfde715dd155fe Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 16 Nov 2023 15:18:41 +0200
Subject: [PATCH 0374/2340] drm/i915/dp: Reuse
 intel_dp_{max,effective}_data_rate in intel_link_compute_m_n()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reuse intel_dp_max_data_rate() and intel_dp_effective_data_rate() in
intel_link_compute_m_n(), instead of open-coding the equivalent. Note
the kbit/sec -> kByte/sec unit change in the M/N values, but this not
reducing the precision, as the link rate value is based anyway on a less
precise 10 kbit/sec value.

Suggested-by: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-12-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 25 +++-----------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c6a2e217a7dc7..f8503d917c3d0 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2388,36 +2388,17 @@ static void compute_m_n(u32 *ret_m, u32 *ret_n,
 	intel_reduce_m_n_ratio(ret_m, ret_n);
 }
 
-static void
-add_bw_alloc_overhead(int link_clock, int bw_overhead,
-		      int pixel_data_rate, int link_data_rate,
-		      u32 *data_m, u32 *data_n)
-{
-	bool is_uhbr = drm_dp_is_uhbr_rate(link_clock);
-	int ch_coding_efficiency =
-		drm_dp_bw_channel_coding_efficiency(is_uhbr);
-
-	*data_m = DIV_ROUND_UP_ULL(mul_u32_u32(pixel_data_rate, bw_overhead),
-				   1000000);
-	*data_n = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_data_rate, ch_coding_efficiency),
-				     1000000);
-}
-
 void
 intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes,
 		       int pixel_clock, int link_clock,
 		       int bw_overhead,
 		       struct intel_link_m_n *m_n)
 {
-	u32 data_clock = DIV_ROUND_UP(bits_per_pixel_x16 * pixel_clock, 16);
 	u32 link_symbol_clock = intel_dp_link_symbol_clock(link_clock);
-	u32 data_m;
-	u32 data_n;
+	u32 data_m = intel_dp_effective_data_rate(pixel_clock, bits_per_pixel_x16,
+						  bw_overhead);
+	u32 data_n = intel_dp_max_data_rate(link_clock, nlanes);
 
-	add_bw_alloc_overhead(link_clock, bw_overhead,
-			      data_clock,
-			      link_clock * 10 * nlanes,
-			      &data_m, &data_n);
 	/*
 	 * Windows/BIOS uses fixed M/N values always. Follow suit.
 	 *
-- 
GitLab


From 5032c607e886e0c40749a05d37b835c1757d38ff Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Mon, 20 Nov 2023 17:07:31 +0530
Subject: [PATCH 0375/2340] drm/i915: ATS-M device ID update

ATS-M device ID update.

BSpec: 44477

Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120113731.1570589-1-haridhar.kalvala@intel.com
---
 include/drm/i915_pciids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 1c9ea6ab3eb90..fcf1849aa47ca 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -738,7 +738,8 @@
 	INTEL_DG2_G12_IDS(info)
 
 #define INTEL_ATS_M150_IDS(info) \
-	INTEL_VGA_DEVICE(0x56C0, info)
+	INTEL_VGA_DEVICE(0x56C0, info), \
+	INTEL_VGA_DEVICE(0x56C2, info)
 
 #define INTEL_ATS_M75_IDS(info) \
 	INTEL_VGA_DEVICE(0x56C1, info)
-- 
GitLab


From 9c6894320f49c146270623bb37cd700db7cf13df Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Tue, 21 Nov 2023 19:54:59 -0500
Subject: [PATCH 0376/2340] fbdev/simplefb: Suppress error on missing power
 domains

When the power domains are missing, the call to of_count_phandle_with_args
fails with -ENOENT. The power domains are not required and there are
some device trees that do not specify them. Suppress this error to fix
devices without power domains attached to simplefb.

Fixes: 92a511a568e4 ("fbdev/simplefb: Add support for generic power-domains")
Closes: https://lore.kernel.org/linux-fbdev/ZVwFNfkqjrvhFHM0@radian
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122005457.330066-3-mailingradian@gmail.com
---
 drivers/video/fbdev/simplefb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index fe682af63827b..6f58ee276ad1b 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -466,6 +466,10 @@ static int simplefb_attach_genpds(struct simplefb_par *par,
 	err = of_count_phandle_with_args(dev->of_node, "power-domains",
 					 "#power-domain-cells");
 	if (err < 0) {
+		/* Nothing wrong if optional PDs are missing */
+		if (err == -ENOENT)
+			return 0;
+
 		dev_info(dev, "failed to parse power-domains: %d\n", err);
 		return err;
 	}
-- 
GitLab


From 211ed0b3ac9a29aa228d3cbb5f2a4d6c7ddadcaf Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 3 Nov 2023 22:18:16 +0200
Subject: [PATCH 0377/2340] drm/i915/dsi: assume BXT gpio works for non-native
 GPIO

Purely a guess. Drop the nop function.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-2-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 24b2cbcfc1eff..b2c0cc11f8c17 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -372,14 +372,6 @@ static void bxt_exec_gpio(struct intel_connector *connector,
 	gpiod_set_value(gpio_desc, value);
 }
 
-static void icl_exec_gpio(struct intel_connector *connector,
-			  u8 gpio_source, u8 gpio_index, bool value)
-{
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-
-	drm_dbg_kms(&dev_priv->drm, "Skipping ICL GPIO element execution\n");
-}
-
 enum {
 	MIPI_RESET_1 = 0,
 	MIPI_AVDD_EN_1,
@@ -491,7 +483,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	if (native)
 		icl_native_gpio_set_value(dev_priv, gpio_number, value);
 	else if (DISPLAY_VER(dev_priv) >= 11)
-		icl_exec_gpio(connector, gpio_source, gpio_index, value);
+		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
 	else if (IS_VALLEYVIEW(dev_priv))
 		vlv_exec_gpio(connector, gpio_source, gpio_number, value);
 	else if (IS_CHERRYVIEW(dev_priv))
-- 
GitLab


From 03930e3d97565b6640a3a552d2b41252aae33f25 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 3 Nov 2023 22:18:17 +0200
Subject: [PATCH 0378/2340] drm/i915/dsi: switch mipi_exec_gpio() from dev_priv
 to i915

Follow the contemporary conventions.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-3-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index b2c0cc11f8c17..8b962f2ac475a 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -454,11 +454,11 @@ static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv,
 static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 {
 	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_connector *connector = intel_dsi->attached_connector;
 	u8 gpio_source, gpio_index = 0, gpio_number;
 	bool value;
-	bool native = DISPLAY_VER(dev_priv) >= 11;
+	bool native = DISPLAY_VER(i915) >= 11;
 
 	if (connector->panel.vbt.dsi.seq_version >= 3)
 		gpio_index = *data++;
@@ -477,16 +477,16 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	/* pull up/down */
 	value = *data++ & 1;
 
-	drm_dbg_kms(&dev_priv->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n",
+	drm_dbg_kms(&i915->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n",
 		    gpio_index, gpio_number, gpio_source, str_yes_no(native), str_on_off(value));
 
 	if (native)
-		icl_native_gpio_set_value(dev_priv, gpio_number, value);
-	else if (DISPLAY_VER(dev_priv) >= 11)
+		icl_native_gpio_set_value(i915, gpio_number, value);
+	else if (DISPLAY_VER(i915) >= 11)
 		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
-	else if (IS_VALLEYVIEW(dev_priv))
+	else if (IS_VALLEYVIEW(i915))
 		vlv_exec_gpio(connector, gpio_source, gpio_number, value);
-	else if (IS_CHERRYVIEW(dev_priv))
+	else if (IS_CHERRYVIEW(i915))
 		chv_exec_gpio(connector, gpio_source, gpio_number, value);
 	else
 		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
-- 
GitLab


From 703a7d2b77f74e5f53545a6d0788cd1b9d0167d6 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 3 Nov 2023 22:18:18 +0200
Subject: [PATCH 0379/2340] drm/i915/dsi: clarify GPIO exec sequence

With the various sequence versions and pointer increments interleaved,
it's a bit hard to decipher what's going on. Add separate paths for
different sequence versions.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-4-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 31 +++++++++++---------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 8b962f2ac475a..11073efe26c04 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -456,26 +456,29 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	struct drm_device *dev = intel_dsi->base.base.dev;
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_connector *connector = intel_dsi->attached_connector;
-	u8 gpio_source, gpio_index = 0, gpio_number;
+	u8 gpio_source = 0, gpio_index = 0, gpio_number;
 	bool value;
+	int size;
 	bool native = DISPLAY_VER(i915) >= 11;
 
-	if (connector->panel.vbt.dsi.seq_version >= 3)
-		gpio_index = *data++;
+	if (connector->panel.vbt.dsi.seq_version >= 3) {
+		size = 3;
 
-	gpio_number = *data++;
+		gpio_index = data[0];
+		gpio_number = data[1];
+		value = data[2] & BIT(0);
 
-	/* gpio source in sequence v2 only */
-	if (connector->panel.vbt.dsi.seq_version == 2)
-		gpio_source = (*data >> 1) & 3;
-	else
-		gpio_source = 0;
+		if (connector->panel.vbt.dsi.seq_version >= 4 && data[2] & BIT(1))
+			native = false;
+	} else {
+		size = 2;
 
-	if (connector->panel.vbt.dsi.seq_version >= 4 && *data & BIT(1))
-		native = false;
+		gpio_number = data[0];
+		value = data[1] & BIT(0);
 
-	/* pull up/down */
-	value = *data++ & 1;
+		if (connector->panel.vbt.dsi.seq_version == 2)
+			gpio_source = (data[1] >> 1) & 3;
+	}
 
 	drm_dbg_kms(&i915->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n",
 		    gpio_index, gpio_number, gpio_source, str_yes_no(native), str_on_off(value));
@@ -491,7 +494,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	else
 		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
 
-	return data;
+	return data + size;
 }
 
 #ifdef CONFIG_ACPI
-- 
GitLab


From ba24d15859e0277f036266bacdde031625c2dd8a Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 3 Nov 2023 22:18:19 +0200
Subject: [PATCH 0380/2340] drm/i915/dsi: rename platform specific
 *_exec_gpio() to *_gpio_set_value()

The lowest level functions are about setting GPIO values, not about
executing any sequences anymore.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-5-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 11073efe26c04..f977d63a0ad48 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -243,8 +243,8 @@ static const u8 *mipi_exec_delay(struct intel_dsi *intel_dsi, const u8 *data)
 	return data;
 }
 
-static void vlv_exec_gpio(struct intel_connector *connector,
-			  u8 gpio_source, u8 gpio_index, bool value)
+static void vlv_gpio_set_value(struct intel_connector *connector,
+			       u8 gpio_source, u8 gpio_index, bool value)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct gpio_map *map;
@@ -291,8 +291,8 @@ static void vlv_exec_gpio(struct intel_connector *connector,
 	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
-static void chv_exec_gpio(struct intel_connector *connector,
-			  u8 gpio_source, u8 gpio_index, bool value)
+static void chv_gpio_set_value(struct intel_connector *connector,
+			       u8 gpio_source, u8 gpio_index, bool value)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	u16 cfg0, cfg1;
@@ -345,8 +345,8 @@ static void chv_exec_gpio(struct intel_connector *connector,
 	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
-static void bxt_exec_gpio(struct intel_connector *connector,
-			  u8 gpio_source, u8 gpio_index, bool value)
+static void bxt_gpio_set_value(struct intel_connector *connector,
+			       u8 gpio_source, u8 gpio_index, bool value)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	/* XXX: this table is a quick ugly hack. */
@@ -486,13 +486,13 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	if (native)
 		icl_native_gpio_set_value(i915, gpio_number, value);
 	else if (DISPLAY_VER(i915) >= 11)
-		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
+		bxt_gpio_set_value(connector, gpio_source, gpio_index, value);
 	else if (IS_VALLEYVIEW(i915))
-		vlv_exec_gpio(connector, gpio_source, gpio_number, value);
+		vlv_gpio_set_value(connector, gpio_source, gpio_number, value);
 	else if (IS_CHERRYVIEW(i915))
-		chv_exec_gpio(connector, gpio_source, gpio_number, value);
+		chv_gpio_set_value(connector, gpio_source, gpio_number, value);
 	else
-		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
+		bxt_gpio_set_value(connector, gpio_source, gpio_index, value);
 
 	return data + size;
 }
-- 
GitLab


From acc06840fb9e22e3f7febec1ec1a976a04929cde Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 3 Nov 2023 22:18:20 +0200
Subject: [PATCH 0381/2340] drm/i915/dsi: bxt/icl GPIO set value do not need
 gpio source

Drop the unused parameter.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-6-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f977d63a0ad48..4af43cf3cee00 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -346,7 +346,7 @@ static void chv_gpio_set_value(struct intel_connector *connector,
 }
 
 static void bxt_gpio_set_value(struct intel_connector *connector,
-			       u8 gpio_source, u8 gpio_index, bool value)
+			       u8 gpio_index, bool value)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	/* XXX: this table is a quick ugly hack. */
@@ -486,13 +486,13 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	if (native)
 		icl_native_gpio_set_value(i915, gpio_number, value);
 	else if (DISPLAY_VER(i915) >= 11)
-		bxt_gpio_set_value(connector, gpio_source, gpio_index, value);
+		bxt_gpio_set_value(connector, gpio_index, value);
 	else if (IS_VALLEYVIEW(i915))
 		vlv_gpio_set_value(connector, gpio_source, gpio_number, value);
 	else if (IS_CHERRYVIEW(i915))
 		chv_gpio_set_value(connector, gpio_source, gpio_number, value);
 	else
-		bxt_gpio_set_value(connector, gpio_source, gpio_index, value);
+		bxt_gpio_set_value(connector, gpio_index, value);
 
 	return data + size;
 }
-- 
GitLab


From 1c8953b27d11269c9a9fa2d1bbd62bf3415749c0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:21 +0200
Subject: [PATCH 0382/2340] drm/i915/dsi: Replace while(1) with one with clear
 exit condition

Move existing condition to while(), so it will be clear on what
circumstances the loop is successfully finishing.

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-7-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 4af43cf3cee00..290a112f1b630 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -702,13 +702,10 @@ static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
 	if (connector->panel.vbt.dsi.seq_version >= 3)
 		data += 4;
 
-	while (1) {
+	while (*data != MIPI_SEQ_ELEM_END) {
 		u8 operation_byte = *data++;
 		u8 operation_size = 0;
 
-		if (operation_byte == MIPI_SEQ_ELEM_END)
-			break;
-
 		if (operation_byte < ARRAY_SIZE(exec_elem))
 			mipi_elem_exec = exec_elem[operation_byte];
 		else
-- 
GitLab


From a1f763fe869c6875a6649bb0c145e589e08087a0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:22 +0200
Subject: [PATCH 0383/2340] drm/i915/dsi: Get rid of redundant 'else'

In the snippets like the following

	if (...)
		return / goto / break / continue ...;
	else
		...

the 'else' is redundant. Get rid of it.

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-8-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 56 ++++++++++----------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 290a112f1b630..4ed5ede9ec5bc 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -142,7 +142,7 @@ static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
 	if (seq_port) {
 		if (intel_dsi->ports & BIT(PORT_B))
 			return PORT_B;
-		else if (intel_dsi->ports & BIT(PORT_C))
+		if (intel_dsi->ports & BIT(PORT_C))
 			return PORT_C;
 	}
 
@@ -670,8 +670,8 @@ static const char *sequence_name(enum mipi_seq seq_id)
 {
 	if (seq_id < ARRAY_SIZE(seq_name) && seq_name[seq_id])
 		return seq_name[seq_id];
-	else
-		return "(unknown)";
+
+	return "(unknown)";
 }
 
 static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
@@ -865,36 +865,34 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	 * multiply by 100 to preserve remainder
 	 */
 	if (intel_dsi->video_mode == BURST_MODE) {
-		if (mipi_config->target_burst_mode_freq) {
-			u32 bitrate = intel_dsi_bitrate(intel_dsi);
-
-			/*
-			 * Sometimes the VBT contains a slightly lower clock,
-			 * then the bitrate we have calculated, in this case
-			 * just replace it with the calculated bitrate.
-			 */
-			if (mipi_config->target_burst_mode_freq < bitrate &&
-			    intel_fuzzy_clock_check(
-					mipi_config->target_burst_mode_freq,
-					bitrate))
-				mipi_config->target_burst_mode_freq = bitrate;
-
-			if (mipi_config->target_burst_mode_freq < bitrate) {
-				drm_err(&dev_priv->drm,
-					"Burst mode freq is less than computed\n");
-				return false;
-			}
+		u32 bitrate;
 
-			burst_mode_ratio = DIV_ROUND_UP(
-				mipi_config->target_burst_mode_freq * 100,
-				bitrate);
+		if (mipi_config->target_burst_mode_freq == 0) {
+			drm_err(&dev_priv->drm, "Burst mode target is not set\n");
+			return false;
+		}
 
-			intel_dsi->pclk = DIV_ROUND_UP(intel_dsi->pclk * burst_mode_ratio, 100);
-		} else {
-			drm_err(&dev_priv->drm,
-				"Burst mode target is not set\n");
+		bitrate = intel_dsi_bitrate(intel_dsi);
+
+		/*
+		 * Sometimes the VBT contains a slightly lower clock, then
+		 * the bitrate we have calculated, in this case just replace it
+		 * with the calculated bitrate.
+		 */
+		if (mipi_config->target_burst_mode_freq < bitrate &&
+		    intel_fuzzy_clock_check(mipi_config->target_burst_mode_freq,
+					    bitrate))
+			mipi_config->target_burst_mode_freq = bitrate;
+
+		if (mipi_config->target_burst_mode_freq < bitrate) {
+			drm_err(&dev_priv->drm, "Burst mode freq is less than computed\n");
 			return false;
 		}
+
+		burst_mode_ratio =
+			DIV_ROUND_UP(mipi_config->target_burst_mode_freq * 100, bitrate);
+
+		intel_dsi->pclk = DIV_ROUND_UP(intel_dsi->pclk * burst_mode_ratio, 100);
 	} else
 		burst_mode_ratio = 100;
 
-- 
GitLab


From 246bcae104475136cd3eb87793726b5cc4320ad1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:23 +0200
Subject: [PATCH 0384/2340] drm/i915/dsi: Replace check with a (missing) MIPI
 sequence name

Names of the MIPI sequence steps are sequential and defined, no
need to check for the gaps. However in seq_name the MIPI_SEQ_END
is missing. Add it there, and drop unneeded NULL check in
sequence_name().

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-9-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 4ed5ede9ec5bc..d270437217b3d 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -653,6 +653,7 @@ static const fn_mipi_elem_exec exec_elem[] = {
  */
 
 static const char * const seq_name[] = {
+	[MIPI_SEQ_END] = "MIPI_SEQ_END",
 	[MIPI_SEQ_DEASSERT_RESET] = "MIPI_SEQ_DEASSERT_RESET",
 	[MIPI_SEQ_INIT_OTP] = "MIPI_SEQ_INIT_OTP",
 	[MIPI_SEQ_DISPLAY_ON] = "MIPI_SEQ_DISPLAY_ON",
@@ -668,7 +669,7 @@ static const char * const seq_name[] = {
 
 static const char *sequence_name(enum mipi_seq seq_id)
 {
-	if (seq_id < ARRAY_SIZE(seq_name) && seq_name[seq_id])
+	if (seq_id < ARRAY_SIZE(seq_name))
 		return seq_name[seq_id];
 
 	return "(unknown)";
-- 
GitLab


From e2a97a08ce179ee2ac33a0e24b890fb0638ac3f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 3 Nov 2023 22:18:24 +0200
Subject: [PATCH 0385/2340] drm/i915/dsi: Remove GPIO lookup table at the end
 of intel_dsi_vbt_gpio_init()

To properly deal with GPIOs used in MIPI panel sequences a temporary
GPIO lookup will be used. Since there can only be 1 GPIO lookup table
for the "0000:00:02.0" device this will not work if the GPIO lookup
table used by intel_dsi_vbt_gpio_init() is still registered.

After getting the "backlight" and "panel" GPIOs the lookup table
registered by intel_dsi_vbt_gpio_init() is no longer necessary,
remove it so that another temporary lookup-table for the "0000:00:02.0"
device can be added.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-10-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 25 +++++++-------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index d270437217b3d..8e6beef90e5ed 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -955,6 +955,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 	struct intel_connector *connector = intel_dsi->attached_connector;
 	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
 	enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+	struct gpiod_lookup_table *gpiod_lookup_table = NULL;
 	bool want_backlight_gpio = false;
 	bool want_panel_gpio = false;
 	struct pinctrl *pinctrl;
@@ -962,12 +963,12 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 
 	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
 	    mipi_config->pwm_blc == PPS_BLC_PMIC) {
-		gpiod_add_lookup_table(&pmic_panel_gpio_table);
+		gpiod_lookup_table = &pmic_panel_gpio_table;
 		want_panel_gpio = true;
 	}
 
 	if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) {
-		gpiod_add_lookup_table(&soc_panel_gpio_table);
+		gpiod_lookup_table = &soc_panel_gpio_table;
 		want_panel_gpio = true;
 		want_backlight_gpio = true;
 
@@ -984,6 +985,9 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 				"Failed to set pinmux to PWM\n");
 	}
 
+	if (gpiod_lookup_table)
+		gpiod_add_lookup_table(gpiod_lookup_table);
+
 	if (want_panel_gpio) {
 		intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags);
 		if (IS_ERR(intel_dsi->gpio_panel)) {
@@ -1002,15 +1006,13 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 			intel_dsi->gpio_backlight = NULL;
 		}
 	}
+
+	if (gpiod_lookup_table)
+		gpiod_remove_lookup_table(gpiod_lookup_table);
 }
 
 void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
 {
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_connector *connector = intel_dsi->attached_connector;
-	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
-
 	if (intel_dsi->gpio_panel) {
 		gpiod_put(intel_dsi->gpio_panel);
 		intel_dsi->gpio_panel = NULL;
@@ -1020,13 +1022,4 @@ void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
 		gpiod_put(intel_dsi->gpio_backlight);
 		intel_dsi->gpio_backlight = NULL;
 	}
-
-	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
-	    mipi_config->pwm_blc == PPS_BLC_PMIC)
-		gpiod_remove_lookup_table(&pmic_panel_gpio_table);
-
-	if (IS_VALLEYVIEW(dev_priv) && mipi_config->pwm_blc == PPS_BLC_SOC) {
-		pinctrl_unregister_mappings(soc_pwm_pinctrl_map);
-		gpiod_remove_lookup_table(&soc_panel_gpio_table);
-	}
 }
-- 
GitLab


From 61442d610f771ec4c45c3882c006644bee2cf38c Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 3 Nov 2023 22:18:25 +0200
Subject: [PATCH 0386/2340] drm/i915/dsi: Fix wrong initial value for GPIOs in
 bxt_gpio_set_value()

Fix wrong initial value for GPIOs in bxt_gpio_set_value().

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-11-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 8e6beef90e5ed..0f9da0168a7b4 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -356,9 +356,7 @@ static void bxt_gpio_set_value(struct intel_connector *connector,
 	if (!gpio_desc) {
 		gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev,
 						 NULL, gpio_index,
-						 value ? GPIOD_OUT_LOW :
-						 GPIOD_OUT_HIGH);
-
+						 value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW);
 		if (IS_ERR_OR_NULL(gpio_desc)) {
 			drm_err(&dev_priv->drm,
 				"GPIO index %u request failed (%ld)\n",
-- 
GitLab


From 47ab0203946a57e3451b4b3e2b23634b27e32440 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:26 +0200
Subject: [PATCH 0387/2340] drm/i915/dsi: Extract common soc_gpio_set_value()
 helper

Extract a common soc_gpio_set_value() helper that may be used by a few
SoCs.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-12-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 46 +++++++++++---------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 0f9da0168a7b4..9847a92fdfc3d 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -243,6 +243,31 @@ static const u8 *mipi_exec_delay(struct intel_dsi *intel_dsi, const u8 *data)
 	return data;
 }
 
+static void soc_gpio_set_value(struct intel_connector *connector, const char *con_id,
+			       u8 gpio_index, bool value)
+{
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	/* XXX: this table is a quick ugly hack. */
+	static struct gpio_desc *soc_gpio_table[U8_MAX + 1];
+	struct gpio_desc *gpio_desc = soc_gpio_table[gpio_index];
+
+	if (gpio_desc) {
+		gpiod_set_value(gpio_desc, value);
+	} else {
+		gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev,
+						 con_id, gpio_index,
+						 value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW);
+		if (IS_ERR(gpio_desc)) {
+			drm_err(&dev_priv->drm,
+				"GPIO index %u request failed (%pe)\n",
+				gpio_index, gpio_desc);
+			return;
+		}
+
+		soc_gpio_table[gpio_index] = gpio_desc;
+	}
+}
+
 static void vlv_gpio_set_value(struct intel_connector *connector,
 			       u8 gpio_source, u8 gpio_index, bool value)
 {
@@ -348,26 +373,7 @@ static void chv_gpio_set_value(struct intel_connector *connector,
 static void bxt_gpio_set_value(struct intel_connector *connector,
 			       u8 gpio_index, bool value)
 {
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	/* XXX: this table is a quick ugly hack. */
-	static struct gpio_desc *bxt_gpio_table[U8_MAX + 1];
-	struct gpio_desc *gpio_desc = bxt_gpio_table[gpio_index];
-
-	if (!gpio_desc) {
-		gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev,
-						 NULL, gpio_index,
-						 value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW);
-		if (IS_ERR_OR_NULL(gpio_desc)) {
-			drm_err(&dev_priv->drm,
-				"GPIO index %u request failed (%ld)\n",
-				gpio_index, PTR_ERR(gpio_desc));
-			return;
-		}
-
-		bxt_gpio_table[gpio_index] = gpio_desc;
-	}
-
-	gpiod_set_value(gpio_desc, value);
+	soc_gpio_set_value(connector, NULL, gpio_index, value);
 }
 
 enum {
-- 
GitLab


From 8241b55f1ded100295ea95d72fd2e95e69776923 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:27 +0200
Subject: [PATCH 0388/2340] drm/i915/dsi: Replace poking of VLV GPIOs behind
 the driver's back

It's a dirty hack in the driver that pokes GPIO registers behind
the driver's back. Moreoever it might be problematic as simultaneous
I/O may hang the system, see the commit 40ecab551232 ("pinctrl:
baytrail: Really serialize all register accesses") for the details.
Taking all this into consideration replace the hack with proper
GPIO APIs being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-13-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 98 ++++++--------------
 1 file changed, 28 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 9847a92fdfc3d..552bc6564d79b 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -55,43 +55,6 @@
 #define MIPI_VIRTUAL_CHANNEL_SHIFT	1
 #define MIPI_PORT_SHIFT			3
 
-/* base offsets for gpio pads */
-#define VLV_GPIO_NC_0_HV_DDI0_HPD	0x4130
-#define VLV_GPIO_NC_1_HV_DDI0_DDC_SDA	0x4120
-#define VLV_GPIO_NC_2_HV_DDI0_DDC_SCL	0x4110
-#define VLV_GPIO_NC_3_PANEL0_VDDEN	0x4140
-#define VLV_GPIO_NC_4_PANEL0_BKLTEN	0x4150
-#define VLV_GPIO_NC_5_PANEL0_BKLTCTL	0x4160
-#define VLV_GPIO_NC_6_HV_DDI1_HPD	0x4180
-#define VLV_GPIO_NC_7_HV_DDI1_DDC_SDA	0x4190
-#define VLV_GPIO_NC_8_HV_DDI1_DDC_SCL	0x4170
-#define VLV_GPIO_NC_9_PANEL1_VDDEN	0x4100
-#define VLV_GPIO_NC_10_PANEL1_BKLTEN	0x40E0
-#define VLV_GPIO_NC_11_PANEL1_BKLTCTL	0x40F0
-
-#define VLV_GPIO_PCONF0(base_offset)	(base_offset)
-#define VLV_GPIO_PAD_VAL(base_offset)	((base_offset) + 8)
-
-struct gpio_map {
-	u16 base_offset;
-	bool init;
-};
-
-static struct gpio_map vlv_gpio_table[] = {
-	{ VLV_GPIO_NC_0_HV_DDI0_HPD },
-	{ VLV_GPIO_NC_1_HV_DDI0_DDC_SDA },
-	{ VLV_GPIO_NC_2_HV_DDI0_DDC_SCL },
-	{ VLV_GPIO_NC_3_PANEL0_VDDEN },
-	{ VLV_GPIO_NC_4_PANEL0_BKLTEN },
-	{ VLV_GPIO_NC_5_PANEL0_BKLTCTL },
-	{ VLV_GPIO_NC_6_HV_DDI1_HPD },
-	{ VLV_GPIO_NC_7_HV_DDI1_DDC_SDA },
-	{ VLV_GPIO_NC_8_HV_DDI1_DDC_SCL },
-	{ VLV_GPIO_NC_9_PANEL1_VDDEN },
-	{ VLV_GPIO_NC_10_PANEL1_BKLTEN },
-	{ VLV_GPIO_NC_11_PANEL1_BKLTCTL },
-};
-
 struct i2c_adapter_lookup {
 	u16 slave_addr;
 	struct intel_dsi *intel_dsi;
@@ -268,52 +231,47 @@ static void soc_gpio_set_value(struct intel_connector *connector, const char *co
 	}
 }
 
-static void vlv_gpio_set_value(struct intel_connector *connector,
-			       u8 gpio_source, u8 gpio_index, bool value)
+static void soc_opaque_gpio_set_value(struct intel_connector *connector,
+				      const char *chip, const char *con_id,
+				      u8 gpio_index, bool value)
 {
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	struct gpio_map *map;
-	u16 pconf0, padval;
-	u32 tmp;
-	u8 port;
+	struct gpiod_lookup_table *lookup;
 
-	if (gpio_index >= ARRAY_SIZE(vlv_gpio_table)) {
-		drm_dbg_kms(&dev_priv->drm, "unknown gpio index %u\n",
-			    gpio_index);
+	lookup = kzalloc(struct_size(lookup, table, 2), GFP_KERNEL);
+	if (!lookup)
 		return;
-	}
 
-	map = &vlv_gpio_table[gpio_index];
+	lookup->dev_id = "0000:00:02.0";
+	lookup->table[0] =
+		GPIO_LOOKUP_IDX(chip, gpio_index, con_id, gpio_index, GPIO_ACTIVE_HIGH);
 
-	if (connector->panel.vbt.dsi.seq_version >= 3) {
-		/* XXX: this assumes vlv_gpio_table only has NC GPIOs. */
-		port = IOSF_PORT_GPIO_NC;
-	} else {
-		if (gpio_source == 0) {
-			port = IOSF_PORT_GPIO_NC;
-		} else if (gpio_source == 1) {
+	gpiod_add_lookup_table(lookup);
+
+	soc_gpio_set_value(connector, con_id, gpio_index, value);
+
+	gpiod_remove_lookup_table(lookup);
+	kfree(lookup);
+}
+
+static void vlv_gpio_set_value(struct intel_connector *connector,
+			       u8 gpio_source, u8 gpio_index, bool value)
+{
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+
+	/* XXX: this assumes vlv_gpio_table only has NC GPIOs. */
+	if (connector->panel.vbt.dsi.seq_version < 3) {
+		if (gpio_source == 1) {
 			drm_dbg_kms(&dev_priv->drm, "SC gpio not supported\n");
 			return;
-		} else {
+		}
+		if (gpio_source > 1) {
 			drm_dbg_kms(&dev_priv->drm,
 				    "unknown gpio source %u\n", gpio_source);
 			return;
 		}
 	}
 
-	pconf0 = VLV_GPIO_PCONF0(map->base_offset);
-	padval = VLV_GPIO_PAD_VAL(map->base_offset);
-
-	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
-	if (!map->init) {
-		/* FIXME: remove constant below */
-		vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00);
-		map->init = true;
-	}
-
-	tmp = 0x4 | value;
-	vlv_iosf_sb_write(dev_priv, port, padval, tmp);
-	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
+	soc_opaque_gpio_set_value(connector, "INT33FC:01", "Panel N", gpio_index, value);
 }
 
 static void chv_gpio_set_value(struct intel_connector *connector,
-- 
GitLab


From bd079b19b417d835a671649a27271918700f2fd9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:28 +0200
Subject: [PATCH 0389/2340] drm/i915/dsi: Prepare soc_gpio_set_value() to
 distinguish GPIO communities

Currently soc_gpio_set_value() supports only a single indexing for GPIO
pin.  For CHV case, for example, we will need to distinguish community
based index from the one that VBT is using. Introduce an additional
parameter to soc_gpio_set_value() and its callers.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-14-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 552bc6564d79b..b1736c1301ea0 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -206,8 +206,8 @@ static const u8 *mipi_exec_delay(struct intel_dsi *intel_dsi, const u8 *data)
 	return data;
 }
 
-static void soc_gpio_set_value(struct intel_connector *connector, const char *con_id,
-			       u8 gpio_index, bool value)
+static void soc_gpio_set_value(struct intel_connector *connector, u8 gpio_index,
+			       const char *con_id, u8 idx, bool value)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	/* XXX: this table is a quick ugly hack. */
@@ -217,8 +217,7 @@ static void soc_gpio_set_value(struct intel_connector *connector, const char *co
 	if (gpio_desc) {
 		gpiod_set_value(gpio_desc, value);
 	} else {
-		gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev,
-						 con_id, gpio_index,
+		gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev, con_id, idx,
 						 value ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW);
 		if (IS_ERR(gpio_desc)) {
 			drm_err(&dev_priv->drm,
@@ -232,8 +231,8 @@ static void soc_gpio_set_value(struct intel_connector *connector, const char *co
 }
 
 static void soc_opaque_gpio_set_value(struct intel_connector *connector,
-				      const char *chip, const char *con_id,
-				      u8 gpio_index, bool value)
+				      u8 gpio_index, const char *chip,
+				      const char *con_id, u8 idx, bool value)
 {
 	struct gpiod_lookup_table *lookup;
 
@@ -243,11 +242,11 @@ static void soc_opaque_gpio_set_value(struct intel_connector *connector,
 
 	lookup->dev_id = "0000:00:02.0";
 	lookup->table[0] =
-		GPIO_LOOKUP_IDX(chip, gpio_index, con_id, gpio_index, GPIO_ACTIVE_HIGH);
+		GPIO_LOOKUP_IDX(chip, idx, con_id, idx, GPIO_ACTIVE_HIGH);
 
 	gpiod_add_lookup_table(lookup);
 
-	soc_gpio_set_value(connector, con_id, gpio_index, value);
+	soc_gpio_set_value(connector, gpio_index, con_id, idx, value);
 
 	gpiod_remove_lookup_table(lookup);
 	kfree(lookup);
@@ -271,7 +270,8 @@ static void vlv_gpio_set_value(struct intel_connector *connector,
 		}
 	}
 
-	soc_opaque_gpio_set_value(connector, "INT33FC:01", "Panel N", gpio_index, value);
+	soc_opaque_gpio_set_value(connector, gpio_index,
+				  "INT33FC:01", "Panel N", gpio_index, value);
 }
 
 static void chv_gpio_set_value(struct intel_connector *connector,
@@ -331,7 +331,7 @@ static void chv_gpio_set_value(struct intel_connector *connector,
 static void bxt_gpio_set_value(struct intel_connector *connector,
 			       u8 gpio_index, bool value)
 {
-	soc_gpio_set_value(connector, NULL, gpio_index, value);
+	soc_gpio_set_value(connector, gpio_index, NULL, gpio_index, value);
 }
 
 enum {
-- 
GitLab


From a23e60938a7dfdac11bbacf1f5da4a99c46432e1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:29 +0200
Subject: [PATCH 0390/2340] drm/i915/dsi: Replace poking of CHV GPIOs behind
 the driver's back

It's a dirty hack in the driver that pokes GPIO registers behind
the driver's back. Moreoever it might be problematic as simultaneous
I/O may hang the system, see the commit 0bd50d719b00 ("pinctrl:
cherryview: prevent concurrent access to GPIO controllers") for
the details. Taking all this into consideration replace the hack
with proper GPIO APIs being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-15-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 47 +++++---------------
 1 file changed, 10 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index b1736c1301ea0..9c6946ccb193e 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -66,19 +66,6 @@ struct i2c_adapter_lookup {
 #define CHV_GPIO_IDX_START_SW		100
 #define CHV_GPIO_IDX_START_SE		198
 
-#define CHV_VBT_MAX_PINS_PER_FMLY	15
-
-#define CHV_GPIO_PAD_CFG0(f, i)		(0x4400 + (f) * 0x400 + (i) * 8)
-#define  CHV_GPIO_GPIOEN		(1 << 15)
-#define  CHV_GPIO_GPIOCFG_GPIO		(0 << 8)
-#define  CHV_GPIO_GPIOCFG_GPO		(1 << 8)
-#define  CHV_GPIO_GPIOCFG_GPI		(2 << 8)
-#define  CHV_GPIO_GPIOCFG_HIZ		(3 << 8)
-#define  CHV_GPIO_GPIOTXSTATE(state)	((!!(state)) << 1)
-
-#define CHV_GPIO_PAD_CFG1(f, i)		(0x4400 + (f) * 0x400 + (i) * 8 + 4)
-#define  CHV_GPIO_CFGLOCK		(1 << 31)
-
 /* ICL DSI Display GPIO Pins */
 #define  ICL_GPIO_DDSP_HPD_A		0
 #define  ICL_GPIO_L_VDDEN_1		1
@@ -278,23 +265,21 @@ static void chv_gpio_set_value(struct intel_connector *connector,
 			       u8 gpio_source, u8 gpio_index, bool value)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	u16 cfg0, cfg1;
-	u16 family_num;
-	u8 port;
 
 	if (connector->panel.vbt.dsi.seq_version >= 3) {
 		if (gpio_index >= CHV_GPIO_IDX_START_SE) {
 			/* XXX: it's unclear whether 255->57 is part of SE. */
-			gpio_index -= CHV_GPIO_IDX_START_SE;
-			port = CHV_IOSF_PORT_GPIO_SE;
+			soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:03", "Panel SE",
+						  gpio_index - CHV_GPIO_IDX_START_SE, value);
 		} else if (gpio_index >= CHV_GPIO_IDX_START_SW) {
-			gpio_index -= CHV_GPIO_IDX_START_SW;
-			port = CHV_IOSF_PORT_GPIO_SW;
+			soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:00", "Panel SW",
+						  gpio_index - CHV_GPIO_IDX_START_SW, value);
 		} else if (gpio_index >= CHV_GPIO_IDX_START_E) {
-			gpio_index -= CHV_GPIO_IDX_START_E;
-			port = CHV_IOSF_PORT_GPIO_E;
+			soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:02", "Panel E",
+						  gpio_index - CHV_GPIO_IDX_START_E, value);
 		} else {
-			port = CHV_IOSF_PORT_GPIO_N;
+			soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:01", "Panel N",
+						  gpio_index - CHV_GPIO_IDX_START_N, value);
 		}
 	} else {
 		/* XXX: The spec is unclear about CHV GPIO on seq v2 */
@@ -311,21 +296,9 @@ static void chv_gpio_set_value(struct intel_connector *connector,
 			return;
 		}
 
-		port = CHV_IOSF_PORT_GPIO_N;
+		soc_opaque_gpio_set_value(connector, gpio_index, "INT33FF:01", "Panel N",
+					  gpio_index - CHV_GPIO_IDX_START_N, value);
 	}
-
-	family_num = gpio_index / CHV_VBT_MAX_PINS_PER_FMLY;
-	gpio_index = gpio_index % CHV_VBT_MAX_PINS_PER_FMLY;
-
-	cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index);
-	cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index);
-
-	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
-	vlv_iosf_sb_write(dev_priv, port, cfg1, 0);
-	vlv_iosf_sb_write(dev_priv, port, cfg0,
-			  CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO |
-			  CHV_GPIO_GPIOTXSTATE(value));
-	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void bxt_gpio_set_value(struct intel_connector *connector,
-- 
GitLab


From 08c3d1f91f41d930f7cca3672d9aa1eec68e2c4b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:30 +0200
Subject: [PATCH 0391/2340] drm/i915/dsi: Combine checks in mipi_exec_gpio()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For a couple of cases the branches call the same bxt_gpio_set_value().
As Ville suggested they can be combined by dropping the DISPLAY_VER()
check from Gen 11 to Gen 9. Do it that way.

Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-16-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 9c6946ccb193e..275d0218394c1 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -420,14 +420,12 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 
 	if (native)
 		icl_native_gpio_set_value(i915, gpio_number, value);
-	else if (DISPLAY_VER(i915) >= 11)
+	else if (DISPLAY_VER(i915) >= 9)
 		bxt_gpio_set_value(connector, gpio_index, value);
 	else if (IS_VALLEYVIEW(i915))
 		vlv_gpio_set_value(connector, gpio_source, gpio_number, value);
 	else if (IS_CHERRYVIEW(i915))
 		chv_gpio_set_value(connector, gpio_source, gpio_number, value);
-	else
-		bxt_gpio_set_value(connector, gpio_index, value);
 
 	return data + size;
 }
-- 
GitLab


From f52ffea0745943bb6af674f30f4243b3721b7cd6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 Nov 2023 22:18:31 +0200
Subject: [PATCH 0392/2340] drm/i915/iosf: Drop unused APIs

Drop unused vlv_iosf_sb_read() and vlv_iosf_sb_write().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103201831.1037416-17-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/vlv_sideband.c | 17 -----------------
 drivers/gpu/drm/i915/vlv_sideband.h |  3 ---
 2 files changed, 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/vlv_sideband.c b/drivers/gpu/drm/i915/vlv_sideband.c
index 63a7958763095..ffa195560d0dd 100644
--- a/drivers/gpu/drm/i915/vlv_sideband.c
+++ b/drivers/gpu/drm/i915/vlv_sideband.c
@@ -166,23 +166,6 @@ u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr)
 	return val;
 }
 
-u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg)
-{
-	u32 val = 0;
-
-	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port,
-			SB_CRRDDA_NP, reg, &val);
-
-	return val;
-}
-
-void vlv_iosf_sb_write(struct drm_i915_private *i915,
-		       u8 port, u32 reg, u32 val)
-{
-	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port,
-			SB_CRWRDA_NP, reg, &val);
-}
-
 u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg)
 {
 	u32 val = 0;
diff --git a/drivers/gpu/drm/i915/vlv_sideband.h b/drivers/gpu/drm/i915/vlv_sideband.h
index 4073966860e2a..c20cf41b2d39b 100644
--- a/drivers/gpu/drm/i915/vlv_sideband.h
+++ b/drivers/gpu/drm/i915/vlv_sideband.h
@@ -26,9 +26,6 @@ enum {
 };
 
 void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports);
-u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg);
-void vlv_iosf_sb_write(struct drm_i915_private *i915,
-		       u8 port, u32 reg, u32 val);
 void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports);
 
 static inline void vlv_bunit_get(struct drm_i915_private *i915)
-- 
GitLab


From b3c5a7de9aeb51cb19160f3f61343ed87487abde Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 22 Nov 2023 08:49:26 +0800
Subject: [PATCH 0393/2340] drm/nouveau/fifo: Remove duplicated include in
 chan.c

./drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c: chid.h is included more than once.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7603
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Lyude Paul <lyude@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122004926.84933-1-yang.lee@linux.alibaba.com
---
 drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index 87a62d4ff4bda..7d4716dcd5129 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -24,7 +24,6 @@
 #include "chan.h"
 #include "chid.h"
 #include "cgrp.h"
-#include "chid.h"
 #include "runl.h"
 #include "priv.h"
 
-- 
GitLab


From f4814c20d14ca168382e8887c768f290e4a2a861 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Nov 2023 23:18:29 +0100
Subject: [PATCH 0394/2340] drm/rockchip: rk3066_hdmi: include drm/drm_atomic.h

Without this header, the newly added code fails to build:

drivers/gpu/drm/rockchip/rk3066_hdmi.c: In function 'rk3066_hdmi_encoder_enable':
drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:22: error: implicit declaration of function 'drm_atomic_get_new_connector_state'; did you mean 'drm_atomic_helper_connector_reset'? [-Werror=implicit-function-declaration]
  397 |         conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
      |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |                      drm_atomic_helper_connector_reset
drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:20: error: assignment to 'struct drm_connector_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
  397 |         conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
      |                    ^
drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:22: error: implicit declaration of function 'drm_atomic_get_new_crtc_state'; did you mean 'drm_atomic_helper_swap_state'? [-Werror=implicit-function-declaration]
  401 |         crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
      |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |                      drm_atomic_helper_swap_state
drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:20: error: assignment to 'struct drm_crtc_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
  401 |         crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
      |                    ^

Fixes: ae3436a5e7c2 ("drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122221838.3164349-1-arnd@kernel.org
---
 drivers/gpu/drm/rockchip/rk3066_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index 0e7aae3419608..7d561c5a650fc 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -4,6 +4,7 @@
  *    Zheng Yang <zhengyang@rock-chips.com>
  */
 
+#include <drm/drm_atomic.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
-- 
GitLab


From 2bb7a27bd7c311c4928d6a8b5edf4b2aaa948ea8 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Thu, 16 Nov 2023 11:05:12 +0200
Subject: [PATCH 0395/2340] drm/i915/display: Use int type for
 entry_setup_frames

entry_setup_frames variable is defined as u8. However, the
function call intel_psr_entry_setup_frames() can return
negative error code. There is a type mismatch here, so let's
switch to use int here as well.

Fixes: 2b981d57e480 ("drm/i915/display: Support PSR entry VSC packet to be transmitted one frame earlier")

Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116090512.480373-1-mika.kahola@intel.com
---
 drivers/gpu/drm/i915/display/intel_psr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 8d180132a74b1..204da50e3f286 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1319,7 +1319,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp,
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
-	u8 entry_setup_frames;
+	int entry_setup_frames;
 
 	/*
 	 * Current PSR panels don't work reliably with VRR enabled
-- 
GitLab


From 8a9fd9ecc4f1f72839c94cc2ec6846d6d9a71987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Mon, 20 Nov 2023 15:02:14 +0200
Subject: [PATCH 0396/2340] drm/i915/display: Do not check psr2 if psr/panel
 replay is not supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do not continue to psr2 checks if psr or panel replay is not supported.

Cc: Animesh Manna <animesh.manna@intel.com>

Fixes: b8cf5b5d266e ("drm/i915/panelreplay: Initializaton and compute config for panel replay")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9670
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Animesh Manna <animesh.manna@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120130214.3332726-1-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_psr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 204da50e3f286..15c1804dcd59e 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1373,6 +1373,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 	else
 		crtc_state->has_psr = _psr_compute_config(intel_dp, crtc_state);
 
+	if (!(crtc_state->has_panel_replay || crtc_state->has_psr))
+		return;
+
 	crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state);
 
 	crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC);
-- 
GitLab


From 66b73e9a402d822723b3af5263eb12d735628eac Mon Sep 17 00:00:00 2001
From: Matt Coster <matt.coster@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:22 +0000
Subject: [PATCH 0397/2340] sizes.h: Add entries between SZ_32G and SZ_64T

sizes.h has a gap in defines between SZ_32G and SZ_64T. Add the missing
defines so they can be used in drivers.

Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/58b227d96f27859b453caf0ceaaac81a6616304b.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/linux/sizes.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index 84aa448d8bb32..c3a00b967d18a 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -47,8 +47,17 @@
 #define SZ_8G				_AC(0x200000000, ULL)
 #define SZ_16G				_AC(0x400000000, ULL)
 #define SZ_32G				_AC(0x800000000, ULL)
+#define SZ_64G				_AC(0x1000000000, ULL)
+#define SZ_128G				_AC(0x2000000000, ULL)
+#define SZ_256G				_AC(0x4000000000, ULL)
+#define SZ_512G				_AC(0x8000000000, ULL)
 
 #define SZ_1T				_AC(0x10000000000, ULL)
+#define SZ_2T				_AC(0x20000000000, ULL)
+#define SZ_4T				_AC(0x40000000000, ULL)
+#define SZ_8T				_AC(0x80000000000, ULL)
+#define SZ_16T				_AC(0x100000000000, ULL)
+#define SZ_32T				_AC(0x200000000000, ULL)
 #define SZ_64T				_AC(0x400000000000, ULL)
 
 #endif /* __LINUX_SIZES_H__ */
-- 
GitLab


From a191f73d85484f804284674c14f2d9f572c18adb Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:23 +0000
Subject: [PATCH 0398/2340] drm/gpuvm: Helper to get range of unmap from a
 remap op.

Determining the start and range of the unmap stage of a remap op is a
common piece of code currently implemented by multiple drivers. Add a
helper for this.

Changes since v7:
- Renamed helper to drm_gpuva_op_remap_to_unmap_range()
- Improved documentation

Changes since v6:
- Remove use of __always_inline

Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://lore.kernel.org/r/8a0a5b5eeec459d3c60fcdaa5a638ad14a18a59e.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_gpuvm.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 8ca10461d8ac8..f94fec9a8517b 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -1213,4 +1213,32 @@ void drm_gpuva_remap(struct drm_gpuva *prev,
 
 void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op);
 
+/**
+ * drm_gpuva_op_remap_to_unmap_range() - Helper to get the start and range of
+ * the unmap stage of a remap op.
+ * @op: Remap op.
+ * @start_addr: Output pointer for the start of the required unmap.
+ * @range: Output pointer for the length of the required unmap.
+ *
+ * The given start address and range will be set such that they represent the
+ * range of the address space that was previously covered by the mapping being
+ * re-mapped, but is now empty.
+ */
+static inline void
+drm_gpuva_op_remap_to_unmap_range(const struct drm_gpuva_op_remap *op,
+				  u64 *start_addr, u64 *range)
+{
+	const u64 va_start = op->prev ?
+			     op->prev->va.addr + op->prev->va.range :
+			     op->unmap->va->va.addr;
+	const u64 va_end = op->next ?
+			   op->next->va.addr :
+			   op->unmap->va->va.addr + op->unmap->va->va.range;
+
+	if (start_addr)
+		*start_addr = va_start;
+	if (range)
+		*range = va_end - va_start;
+}
+
 #endif /* __DRM_GPUVM_H__ */
-- 
GitLab


From 6a85c3b14728f0d5cb59ca0d38894db2a9839fce Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:24 +0000
Subject: [PATCH 0399/2340] dt-bindings: gpu: Add Imagination Technologies
 PowerVR/IMG GPU

Add the device tree binding documentation for the IMG AXE GPU used in
TI AM62 SoCs.

Co-developed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/deb0a4659423a3b8a74addee7178b6df7679575d.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 .../devicetree/bindings/gpu/img,powervr.yaml  | 73 +++++++++++++++++++
 MAINTAINERS                                   |  7 ++
 2 files changed, 80 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpu/img,powervr.yaml

diff --git a/Documentation/devicetree/bindings/gpu/img,powervr.yaml b/Documentation/devicetree/bindings/gpu/img,powervr.yaml
new file mode 100644
index 0000000000000..a13298f1a1827
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/img,powervr.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2023 Imagination Technologies Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpu/img,powervr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Imagination Technologies PowerVR and IMG GPU
+
+maintainers:
+  - Frank Binns <frank.binns@imgtec.com>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - ti,am62-gpu
+      - const: img,img-axe # IMG AXE GPU model/revision is fully discoverable
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: core
+      - const: mem
+      - const: sys
+    minItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,am62-gpu
+    then:
+      properties:
+        clocks:
+          maxItems: 1
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+    gpu@fd00000 {
+        compatible = "ti,am62-gpu", "img,img-axe";
+        reg = <0x0fd00000 0x20000>;
+        clocks = <&k3_clks 187 0>;
+        clock-names = "core";
+        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+        power-domains = <&k3_pds 187 TI_SCI_PD_EXCLUSIVE>;
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 8a70be8f08eee..59c60abf341ec 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10392,6 +10392,13 @@ IMGTEC IR DECODER DRIVER
 S:	Orphan
 F:	drivers/media/rc/img-ir/
 
+IMGTEC POWERVR DRM DRIVER
+M:	Frank Binns <frank.binns@imgtec.com>
+M:	Donald Robson <donald.robson@imgtec.com>
+M:	Matt Coster <matt.coster@imgtec.com>
+S:	Supported
+F:	Documentation/devicetree/bindings/gpu/img,powervr.yaml
+
 IMON SOUNDGRAPH USB IR RECEIVER
 M:	Sean Young <sean@mess.org>
 L:	linux-media@vger.kernel.org
-- 
GitLab


From 1088d89e551530a9f5128770d74a1516090f1e41 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:25 +0000
Subject: [PATCH 0400/2340] drm/imagination/uapi: Add PowerVR driver UAPI

Add the UAPI implementation for the PowerVR driver.

Changes from v8:
- Fixed documentation for unmapping, which previously suggested the
  size was not used
- Corrected license identifier

Changes from v7:
- Remove prefixes from DRM_PVR_BO_* flags
- Improve struct drm_pvr_ioctl_create_hwrt_dataset_args documentation
- Remove references to static area carveouts
- CREATE_BO ioctl now returns an error if provided size isn't page aligned
- Clarify documentation for DRM_PVR_STATIC_DATA_AREA_EOT

Changes from v6:
- Add padding to struct drm_pvr_dev_query_gpu_info
- Improve BYPASS_CACHE flag documentation
- Add SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE flag

Changes from v4:
- Remove CREATE_ZEROED flag for BO creation (all buffers are now zeroed)

Co-developed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Link: https://lore.kernel.org/r/8c95a3a1d685e2b44d361b95a19eae5a478fb9d1.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 MAINTAINERS                |    1 +
 include/uapi/drm/pvr_drm.h | 1297 ++++++++++++++++++++++++++++++++++++
 2 files changed, 1298 insertions(+)
 create mode 100644 include/uapi/drm/pvr_drm.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 59c60abf341ec..37642011c319d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10398,6 +10398,7 @@ M:	Donald Robson <donald.robson@imgtec.com>
 M:	Matt Coster <matt.coster@imgtec.com>
 S:	Supported
 F:	Documentation/devicetree/bindings/gpu/img,powervr.yaml
+F:	include/uapi/drm/pvr_drm.h
 
 IMON SOUNDGRAPH USB IR RECEIVER
 M:	Sean Young <sean@mess.org>
diff --git a/include/uapi/drm/pvr_drm.h b/include/uapi/drm/pvr_drm.h
new file mode 100644
index 0000000000000..1834375390c45
--- /dev/null
+++ b/include/uapi/drm/pvr_drm.h
@@ -0,0 +1,1297 @@
+/* SPDX-License-Identifier: (GPL-2.0-only WITH Linux-syscall-note) OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_DRM_UAPI_H
+#define PVR_DRM_UAPI_H
+
+#include "drm.h"
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * DOC: PowerVR UAPI
+ *
+ * The PowerVR IOCTL argument structs have a few limitations in place, in
+ * addition to the standard kernel restrictions:
+ *
+ *  - All members must be type-aligned.
+ *  - The overall struct must be padded to 64-bit alignment.
+ *  - Explicit padding is almost always required. This takes the form of
+ *    ``_padding_[x]`` members of sufficient size to pad to the next power-of-two
+ *    alignment, where [x] is the offset into the struct in hexadecimal. Arrays
+ *    are never used for alignment. Padding fields must be zeroed; this is
+ *    always checked.
+ *  - Unions may only appear as the last member of a struct.
+ *  - Individual union members may grow in the future. The space between the
+ *    end of a union member and the end of its containing union is considered
+ *    "implicit padding" and must be zeroed. This is always checked.
+ *
+ * In addition to the IOCTL argument structs, the PowerVR UAPI makes use of
+ * DEV_QUERY argument structs. These are used to fetch information about the
+ * device and runtime. These structs are subject to the same rules set out
+ * above.
+ */
+
+/**
+ * struct drm_pvr_obj_array - Container used to pass arrays of objects
+ *
+ * It is not unusual to have to extend objects to pass new parameters, and the DRM
+ * ioctl infrastructure is supporting that by padding ioctl arguments with zeros
+ * when the data passed by userspace is smaller than the struct defined in the
+ * drm_ioctl_desc, thus keeping things backward compatible. This type is just
+ * applying the same concepts to indirect objects passed through arrays referenced
+ * from the main ioctl arguments structure: the stride basically defines the size
+ * of the object passed by userspace, which allows the kernel driver to pad with
+ * zeros when it's smaller than the size of the object it expects.
+ *
+ * Use ``DRM_PVR_OBJ_ARRAY()`` to fill object array fields, unless you
+ * have a very good reason not to.
+ */
+struct drm_pvr_obj_array {
+	/** @stride: Stride of object struct. Used for versioning. */
+	__u32 stride;
+
+	/** @count: Number of objects in the array. */
+	__u32 count;
+
+	/** @array: User pointer to an array of objects. */
+	__u64 array;
+};
+
+/**
+ * DRM_PVR_OBJ_ARRAY() - Helper macro for filling &struct drm_pvr_obj_array.
+ * @cnt: Number of elements pointed to py @ptr.
+ * @ptr: Pointer to start of a C array.
+ *
+ * Return: Literal of type &struct drm_pvr_obj_array.
+ */
+#define DRM_PVR_OBJ_ARRAY(cnt, ptr) \
+	{ .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) }
+
+/**
+ * DOC: PowerVR IOCTL interface
+ */
+
+/**
+ * PVR_IOCTL() - Build a PowerVR IOCTL number
+ * @_ioctl: An incrementing id for this IOCTL. Added to %DRM_COMMAND_BASE.
+ * @_mode: Must be one of %DRM_IOR, %DRM_IOW or %DRM_IOWR.
+ * @_data: The type of the args struct passed by this IOCTL.
+ *
+ * The struct referred to by @_data must have a ``drm_pvr_ioctl_`` prefix and an
+ * ``_args suffix``. They are therefore omitted from @_data.
+ *
+ * This should only be used to build the constants described below; it should
+ * never be used to call an IOCTL directly.
+ *
+ * Return: An IOCTL number to be passed to ioctl() from userspace.
+ */
+#define PVR_IOCTL(_ioctl, _mode, _data) \
+	_mode(DRM_COMMAND_BASE + (_ioctl), struct drm_pvr_ioctl_##_data##_args)
+
+#define DRM_IOCTL_PVR_DEV_QUERY PVR_IOCTL(0x00, DRM_IOWR, dev_query)
+#define DRM_IOCTL_PVR_CREATE_BO PVR_IOCTL(0x01, DRM_IOWR, create_bo)
+#define DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET PVR_IOCTL(0x02, DRM_IOWR, get_bo_mmap_offset)
+#define DRM_IOCTL_PVR_CREATE_VM_CONTEXT PVR_IOCTL(0x03, DRM_IOWR, create_vm_context)
+#define DRM_IOCTL_PVR_DESTROY_VM_CONTEXT PVR_IOCTL(0x04, DRM_IOW, destroy_vm_context)
+#define DRM_IOCTL_PVR_VM_MAP PVR_IOCTL(0x05, DRM_IOW, vm_map)
+#define DRM_IOCTL_PVR_VM_UNMAP PVR_IOCTL(0x06, DRM_IOW, vm_unmap)
+#define DRM_IOCTL_PVR_CREATE_CONTEXT PVR_IOCTL(0x07, DRM_IOWR, create_context)
+#define DRM_IOCTL_PVR_DESTROY_CONTEXT PVR_IOCTL(0x08, DRM_IOW, destroy_context)
+#define DRM_IOCTL_PVR_CREATE_FREE_LIST PVR_IOCTL(0x09, DRM_IOWR, create_free_list)
+#define DRM_IOCTL_PVR_DESTROY_FREE_LIST PVR_IOCTL(0x0a, DRM_IOW, destroy_free_list)
+#define DRM_IOCTL_PVR_CREATE_HWRT_DATASET PVR_IOCTL(0x0b, DRM_IOWR, create_hwrt_dataset)
+#define DRM_IOCTL_PVR_DESTROY_HWRT_DATASET PVR_IOCTL(0x0c, DRM_IOW, destroy_hwrt_dataset)
+#define DRM_IOCTL_PVR_SUBMIT_JOBS PVR_IOCTL(0x0d, DRM_IOW, submit_jobs)
+
+/**
+ * DOC: PowerVR IOCTL DEV_QUERY interface
+ */
+
+/**
+ * struct drm_pvr_dev_query_gpu_info - Container used to fetch information about
+ * the graphics processor.
+ *
+ * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set
+ * to %DRM_PVR_DEV_QUERY_GPU_INFO_GET.
+ */
+struct drm_pvr_dev_query_gpu_info {
+	/**
+	 * @gpu_id: GPU identifier.
+	 *
+	 * For all currently supported GPUs this is the BVNC encoded as a 64-bit
+	 * value as follows:
+	 *
+	 *    +--------+--------+--------+-------+
+	 *    | 63..48 | 47..32 | 31..16 | 15..0 |
+	 *    +========+========+========+=======+
+	 *    | B      | V      | N      | C     |
+	 *    +--------+--------+--------+-------+
+	 */
+	__u64 gpu_id;
+
+	/**
+	 * @num_phantoms: Number of Phantoms present.
+	 */
+	__u32 num_phantoms;
+
+	/** @_padding_c: Reserved. This field must be zeroed. */
+	__u32 _padding_c;
+};
+
+/**
+ * struct drm_pvr_dev_query_runtime_info - Container used to fetch information
+ * about the graphics runtime.
+ *
+ * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set
+ * to %DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET.
+ */
+struct drm_pvr_dev_query_runtime_info {
+	/**
+	 * @free_list_min_pages: Minimum allowed free list size,
+	 * in PM physical pages.
+	 */
+	__u64 free_list_min_pages;
+
+	/**
+	 * @free_list_max_pages: Maximum allowed free list size,
+	 * in PM physical pages.
+	 */
+	__u64 free_list_max_pages;
+
+	/**
+	 * @common_store_alloc_region_size: Size of the Allocation
+	 * Region within the Common Store used for coefficient and shared
+	 * registers, in dwords.
+	 */
+	__u32 common_store_alloc_region_size;
+
+	/**
+	 * @common_store_partition_space_size: Size of the
+	 * Partition Space within the Common Store for output buffers, in
+	 * dwords.
+	 */
+	__u32 common_store_partition_space_size;
+
+	/**
+	 * @max_coeffs: Maximum coefficients, in dwords.
+	 */
+	__u32 max_coeffs;
+
+	/**
+	 * @cdm_max_local_mem_size_regs: Maximum amount of local
+	 * memory available to a compute kernel, in dwords.
+	 */
+	__u32 cdm_max_local_mem_size_regs;
+};
+
+/**
+ * struct drm_pvr_dev_query_quirks - Container used to fetch information about
+ * hardware fixes for which the device may require support in the user mode
+ * driver.
+ *
+ * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set
+ * to %DRM_PVR_DEV_QUERY_QUIRKS_GET.
+ */
+struct drm_pvr_dev_query_quirks {
+	/**
+	 * @quirks: A userspace address for the hardware quirks __u32 array.
+	 *
+	 * The first @musthave_count items in the list are quirks that the
+	 * client must support for this device. If userspace does not support
+	 * all these quirks then functionality is not guaranteed and client
+	 * initialisation must fail.
+	 * The remaining quirks in the list affect userspace and the kernel or
+	 * firmware. They are disabled by default and require userspace to
+	 * opt-in. The opt-in mechanism depends on the quirk.
+	 */
+	__u64 quirks;
+
+	/** @count: Length of @quirks (number of __u32). */
+	__u16 count;
+
+	/**
+	 * @musthave_count: The number of entries in @quirks that are
+	 * mandatory, starting at index 0.
+	 */
+	__u16 musthave_count;
+
+	/** @_padding_c: Reserved. This field must be zeroed. */
+	__u32 _padding_c;
+};
+
+/**
+ * struct drm_pvr_dev_query_enhancements - Container used to fetch information
+ * about optional enhancements supported by the device that require support in
+ * the user mode driver.
+ *
+ * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set
+ * to %DRM_PVR_DEV_ENHANCEMENTS_GET.
+ */
+struct drm_pvr_dev_query_enhancements {
+	/**
+	 * @enhancements: A userspace address for the hardware enhancements
+	 * __u32 array.
+	 *
+	 * These enhancements affect userspace and the kernel or firmware. They
+	 * are disabled by default and require userspace to opt-in. The opt-in
+	 * mechanism depends on the enhancement.
+	 */
+	__u64 enhancements;
+
+	/** @count: Length of @enhancements (number of __u32). */
+	__u16 count;
+
+	/** @_padding_a: Reserved. This field must be zeroed. */
+	__u16 _padding_a;
+
+	/** @_padding_c: Reserved. This field must be zeroed. */
+	__u32 _padding_c;
+};
+
+/**
+ * enum drm_pvr_heap_id - Array index for heap info data returned by
+ * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+ *
+ * For compatibility reasons all indices will be present in the returned array,
+ * however some heaps may not be present. These are indicated where
+ * &struct drm_pvr_heap.size is set to zero.
+ */
+enum drm_pvr_heap_id {
+	/** @DRM_PVR_HEAP_GENERAL: General purpose heap. */
+	DRM_PVR_HEAP_GENERAL = 0,
+	/** @DRM_PVR_HEAP_PDS_CODE_DATA: PDS code and data heap. */
+	DRM_PVR_HEAP_PDS_CODE_DATA,
+	/** @DRM_PVR_HEAP_USC_CODE: USC code heap. */
+	DRM_PVR_HEAP_USC_CODE,
+	/** @DRM_PVR_HEAP_RGNHDR: Region header heap. Only used if GPU has BRN63142. */
+	DRM_PVR_HEAP_RGNHDR,
+	/** @DRM_PVR_HEAP_VIS_TEST: Visibility test heap. */
+	DRM_PVR_HEAP_VIS_TEST,
+	/** @DRM_PVR_HEAP_TRANSFER_FRAG: Transfer fragment heap. */
+	DRM_PVR_HEAP_TRANSFER_FRAG,
+
+	/**
+	 * @DRM_PVR_HEAP_COUNT: The number of heaps returned by
+	 * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+	 *
+	 * More heaps may be added, so this also serves as the copy limit when
+	 * sent by the caller.
+	 */
+	DRM_PVR_HEAP_COUNT
+	/* Please only add additional heaps above DRM_PVR_HEAP_COUNT! */
+};
+
+/**
+ * struct drm_pvr_heap - Container holding information about a single heap.
+ *
+ * This will always be fetched as an array.
+ */
+struct drm_pvr_heap {
+	/** @base: Base address of heap. */
+	__u64 base;
+
+	/** @size: Size of heap, in bytes. Will be 0 if the heap is not present. */
+	__u64 size;
+
+	/** @flags: Flags for this heap. Currently always 0. */
+	__u32 flags;
+
+	/** @page_size_log2: Log2 of page size. */
+	__u32 page_size_log2;
+};
+
+/**
+ * struct drm_pvr_dev_query_heap_info - Container used to fetch information
+ * about heaps supported by the device driver.
+ *
+ * Please note all driver-supported heaps will be returned up to &heaps.count.
+ * Some heaps will not be present in all devices, which will be indicated by
+ * &struct drm_pvr_heap.size being set to zero.
+ *
+ * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set
+ * to %DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+ */
+struct drm_pvr_dev_query_heap_info {
+	/**
+	 * @heaps: Array of &struct drm_pvr_heap. If pointer is NULL, the count
+	 * and stride will be updated with those known to the driver version, to
+	 * facilitate allocation by the caller.
+	 */
+	struct drm_pvr_obj_array heaps;
+};
+
+/**
+ * enum drm_pvr_static_data_area_usage - Array index for static data area info
+ * returned by %DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET.
+ *
+ * For compatibility reasons all indices will be present in the returned array,
+ * however some areas may not be present. These are indicated where
+ * &struct drm_pvr_static_data_area.size is set to zero.
+ */
+enum drm_pvr_static_data_area_usage {
+	/**
+	 * @DRM_PVR_STATIC_DATA_AREA_EOT: End of Tile PDS program code segment.
+	 *
+	 * The End of Tile PDS task runs at completion of a tile during a fragment job, and is
+	 * responsible for emitting the tile to the Pixel Back End.
+	 */
+	DRM_PVR_STATIC_DATA_AREA_EOT = 0,
+
+	/**
+	 * @DRM_PVR_STATIC_DATA_AREA_FENCE: MCU fence area, used during cache flush and
+	 * invalidation.
+	 *
+	 * This must point to valid physical memory but the contents otherwise are not used.
+	 */
+	DRM_PVR_STATIC_DATA_AREA_FENCE,
+
+	/**
+	 * @DRM_PVR_STATIC_DATA_AREA_VDM_SYNC: VDM sync program.
+	 *
+	 * The VDM sync program is used to synchronise multiple areas of the GPU hardware.
+	 */
+	DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+
+	/**
+	 * @DRM_PVR_STATIC_DATA_AREA_YUV_CSC: YUV coefficients.
+	 *
+	 * Area contains up to 16 slots with stride of 64 bytes. Each is a 3x4 matrix of u16 fixed
+	 * point numbers, with 1 sign bit, 2 integer bits and 13 fractional bits.
+	 *
+	 * The slots are :
+	 * 0 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR
+	 * 1 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR (full range)
+	 * 2 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR (conformant range)
+	 * 3 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR (full range)
+	 * 4 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR (conformant range)
+	 * 5 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR (full range)
+	 * 6 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR (conformant range)
+	 * 7 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR (full range)
+	 * 8 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR (conformant range)
+	 * 9 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR (conformant range, 10 bit)
+	 * 10 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR (conformant range, 10 bit)
+	 * 11 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR (conformant range, 10 bit)
+	 * 14 = Identity (biased)
+	 * 15 = Identity
+	 */
+	DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
+};
+
+/**
+ * struct drm_pvr_static_data_area - Container holding information about a
+ * single static data area.
+ *
+ * This will always be fetched as an array.
+ */
+struct drm_pvr_static_data_area {
+	/**
+	 * @area_usage: Usage of static data area.
+	 * See &enum drm_pvr_static_data_area_usage.
+	 */
+	__u16 area_usage;
+
+	/**
+	 * @location_heap_id: Array index of heap where this of static data
+	 * area is located. This array is fetched using
+	 * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+	 */
+	__u16 location_heap_id;
+
+	/** @size: Size of static data area. Not present if set to zero. */
+	__u32 size;
+
+	/** @offset: Offset of static data area from start of heap. */
+	__u64 offset;
+};
+
+/**
+ * struct drm_pvr_dev_query_static_data_areas - Container used to fetch
+ * information about the static data areas in heaps supported by the device
+ * driver.
+ *
+ * Please note all driver-supported static data areas will be returned up to
+ * &static_data_areas.count. Some will not be present for all devices which,
+ * will be indicated by &struct drm_pvr_static_data_area.size being set to zero.
+ *
+ * Further, some heaps will not be present either. See &struct
+ * drm_pvr_dev_query_heap_info.
+ *
+ * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set
+ * to %DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET.
+ */
+struct drm_pvr_dev_query_static_data_areas {
+	/**
+	 * @static_data_areas: Array of &struct drm_pvr_static_data_area. If
+	 * pointer is NULL, the count and stride will be updated with those
+	 * known to the driver version, to facilitate allocation by the caller.
+	 */
+	struct drm_pvr_obj_array static_data_areas;
+};
+
+/**
+ * enum drm_pvr_dev_query - For use with &drm_pvr_ioctl_dev_query_args.type to
+ * indicate the type of the receiving container.
+ *
+ * Append only. Do not reorder.
+ */
+enum drm_pvr_dev_query {
+	/**
+	 * @DRM_PVR_DEV_QUERY_GPU_INFO_GET: The dev query args contain a pointer
+	 * to &struct drm_pvr_dev_query_gpu_info.
+	 */
+	DRM_PVR_DEV_QUERY_GPU_INFO_GET = 0,
+
+	/**
+	 * @DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET: The dev query args contain a
+	 * pointer to &struct drm_pvr_dev_query_runtime_info.
+	 */
+	DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET,
+
+	/**
+	 * @DRM_PVR_DEV_QUERY_QUIRKS_GET: The dev query args contain a pointer
+	 * to &struct drm_pvr_dev_query_quirks.
+	 */
+	DRM_PVR_DEV_QUERY_QUIRKS_GET,
+
+	/**
+	 * @DRM_PVR_DEV_QUERY_ENHANCEMENTS_GET: The dev query args contain a
+	 * pointer to &struct drm_pvr_dev_query_enhancements.
+	 */
+	DRM_PVR_DEV_QUERY_ENHANCEMENTS_GET,
+
+	/**
+	 * @DRM_PVR_DEV_QUERY_HEAP_INFO_GET: The dev query args contain a
+	 * pointer to &struct drm_pvr_dev_query_heap_info.
+	 */
+	DRM_PVR_DEV_QUERY_HEAP_INFO_GET,
+
+	/**
+	 * @DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET: The dev query args contain
+	 * a pointer to &struct drm_pvr_dev_query_static_data_areas.
+	 */
+	DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET,
+};
+
+/**
+ * struct drm_pvr_ioctl_dev_query_args - Arguments for %DRM_IOCTL_PVR_DEV_QUERY.
+ */
+struct drm_pvr_ioctl_dev_query_args {
+	/**
+	 * @type: Type of query and output struct. See &enum drm_pvr_dev_query.
+	 */
+	__u32 type;
+
+	/**
+	 * @size: Size of the receiving struct, see @type.
+	 *
+	 * After a successful call this will be updated to the written byte
+	 * length.
+	 * Can also be used to get the minimum byte length (see @pointer).
+	 * This allows additional fields to be appended to the structs in
+	 * future.
+	 */
+	__u32 size;
+
+	/**
+	 * @pointer: Pointer to struct @type.
+	 *
+	 * Must be large enough to contain @size bytes.
+	 * If pointer is NULL, the expected size will be returned in the @size
+	 * field, but no other data will be written.
+	 */
+	__u64 pointer;
+};
+
+/**
+ * DOC: PowerVR IOCTL CREATE_BO interface
+ */
+
+/**
+ * DOC: Flags for CREATE_BO
+ *
+ * We use "device" to refer to the GPU here because of the ambiguity between CPU and GPU in some
+ * fonts.
+ *
+ * Device mapping options
+ *    :DRM_PVR_BO_BYPASS_DEVICE_CACHE: Specify that device accesses to this memory will bypass the
+ *       cache. This is used for buffers that will either be regularly updated by the CPU (eg free
+ *       lists) or will be accessed only once and therefore isn't worth caching (eg partial render
+ *       buffers).
+ *       By default, the device flushes its memory caches after every job, so this is not normally
+ *       required for coherency.
+ *    :DRM_PVR_BO_PM_FW_PROTECT: Specify that only the Parameter Manager (PM) and/or firmware
+ *       processor should be allowed to access this memory when mapped to the device. It is not
+ *       valid to specify this flag with DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS.
+ *
+ * CPU mapping options
+ *    :DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS: Allow userspace to map and access the contents of this
+ *       memory. It is not valid to specify this flag with DRM_PVR_BO_PM_FW_PROTECT.
+ */
+#define DRM_PVR_BO_BYPASS_DEVICE_CACHE _BITULL(0)
+#define DRM_PVR_BO_PM_FW_PROTECT _BITULL(1)
+#define DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS _BITULL(2)
+/* Bits 3..63 are reserved. */
+
+#define DRM_PVR_BO_FLAGS_MASK (DRM_PVR_BO_BYPASS_DEVICE_CACHE | DRM_PVR_BO_PM_FW_PROTECT | \
+			       DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS)
+
+/**
+ * struct drm_pvr_ioctl_create_bo_args - Arguments for %DRM_IOCTL_PVR_CREATE_BO
+ */
+struct drm_pvr_ioctl_create_bo_args {
+	/**
+	 * @size: [IN] Size of buffer object to create. This must be page size
+	 * aligned.
+	 */
+	__u64 size;
+
+	/**
+	 * @handle: [OUT] GEM handle of the new buffer object for use in
+	 * userspace.
+	 */
+	__u32 handle;
+
+	/** @_padding_c: Reserved. This field must be zeroed. */
+	__u32 _padding_c;
+
+	/**
+	 * @flags: [IN] Options which will affect the behaviour of this
+	 * creation operation and future mapping operations on the created
+	 * object. This field must be a valid combination of ``DRM_PVR_BO_*``
+	 * values, with all bits marked as reserved set to zero.
+	 */
+	__u64 flags;
+};
+
+/**
+ * DOC: PowerVR IOCTL GET_BO_MMAP_OFFSET interface
+ */
+
+/**
+ * struct drm_pvr_ioctl_get_bo_mmap_offset_args - Arguments for
+ * %DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET
+ *
+ * Like other DRM drivers, the "mmap" IOCTL doesn't actually map any memory.
+ * Instead, it allocates a fake offset which refers to the specified buffer
+ * object. This offset can be used with a real mmap call on the DRM device
+ * itself.
+ */
+struct drm_pvr_ioctl_get_bo_mmap_offset_args {
+	/** @handle: [IN] GEM handle of the buffer object to be mapped. */
+	__u32 handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+
+	/** @offset: [OUT] Fake offset to use in the real mmap call. */
+	__u64 offset;
+};
+
+/**
+ * DOC: PowerVR IOCTL CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT interfaces
+ */
+
+/**
+ * struct drm_pvr_ioctl_create_vm_context_args - Arguments for
+ * %DRM_IOCTL_PVR_CREATE_VM_CONTEXT
+ */
+struct drm_pvr_ioctl_create_vm_context_args {
+	/** @handle: [OUT] Handle for new VM context. */
+	__u32 handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+};
+
+/**
+ * struct drm_pvr_ioctl_destroy_vm_context_args - Arguments for
+ * %DRM_IOCTL_PVR_DESTROY_VM_CONTEXT
+ */
+struct drm_pvr_ioctl_destroy_vm_context_args {
+	/**
+	 * @handle: [IN] Handle for VM context to be destroyed.
+	 */
+	__u32 handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+};
+
+/**
+ * DOC: PowerVR IOCTL VM_MAP and VM_UNMAP interfaces
+ *
+ * The VM UAPI allows userspace to create buffer object mappings in GPU virtual address space.
+ *
+ * The client is responsible for managing GPU address space. It should allocate mappings within
+ * the heaps returned by %DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+ *
+ * %DRM_IOCTL_PVR_VM_MAP creates a new mapping. The client provides the target virtual address for
+ * the mapping. Size and offset within the mapped buffer object can be specified, so the client can
+ * partially map a buffer.
+ *
+ * %DRM_IOCTL_PVR_VM_UNMAP removes a mapping. The entire mapping will be removed from GPU address
+ * space only if the size of the mapping matches that known to the driver.
+ */
+
+/**
+ * struct drm_pvr_ioctl_vm_map_args - Arguments for %DRM_IOCTL_PVR_VM_MAP.
+ */
+struct drm_pvr_ioctl_vm_map_args {
+	/**
+	 * @vm_context_handle: [IN] Handle for VM context for this mapping to
+	 * exist in.
+	 */
+	__u32 vm_context_handle;
+
+	/** @flags: [IN] Flags which affect this mapping. Currently always 0. */
+	__u32 flags;
+
+	/**
+	 * @device_addr: [IN] Requested device-virtual address for the mapping.
+	 * This must be non-zero and aligned to the device page size for the
+	 * heap containing the requested address. It is an error to specify an
+	 * address which is not contained within one of the heaps returned by
+	 * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+	 */
+	__u64 device_addr;
+
+	/**
+	 * @handle: [IN] Handle of the target buffer object. This must be a
+	 * valid handle returned by %DRM_IOCTL_PVR_CREATE_BO.
+	 */
+	__u32 handle;
+
+	/** @_padding_14: Reserved. This field must be zeroed. */
+	__u32 _padding_14;
+
+	/**
+	 * @offset: [IN] Offset into the target bo from which to begin the
+	 * mapping.
+	 */
+	__u64 offset;
+
+	/**
+	 * @size: [IN] Size of the requested mapping. Must be aligned to
+	 * the device page size for the heap containing the requested address,
+	 * as well as the host page size. When added to @device_addr, the
+	 * result must not overflow the heap which contains @device_addr (i.e.
+	 * the range specified by @device_addr and @size must be completely
+	 * contained within a single heap specified by
+	 * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET).
+	 */
+	__u64 size;
+};
+
+/**
+ * struct drm_pvr_ioctl_vm_unmap_args - Arguments for %DRM_IOCTL_PVR_VM_UNMAP.
+ */
+struct drm_pvr_ioctl_vm_unmap_args {
+	/**
+	 * @vm_context_handle: [IN] Handle for VM context that this mapping
+	 * exists in.
+	 */
+	__u32 vm_context_handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+
+	/**
+	 * @device_addr: [IN] Device-virtual address at the start of the target
+	 * mapping. This must be non-zero.
+	 */
+	__u64 device_addr;
+
+	/**
+	 * @size: Size in bytes of the target mapping. This must be non-zero.
+	 */
+	__u64 size;
+};
+
+/**
+ * DOC: PowerVR IOCTL CREATE_CONTEXT and DESTROY_CONTEXT interfaces
+ */
+
+/**
+ * enum drm_pvr_ctx_priority - Arguments for
+ * &drm_pvr_ioctl_create_context_args.priority
+ */
+enum drm_pvr_ctx_priority {
+	/** @DRM_PVR_CTX_PRIORITY_LOW: Priority below normal. */
+	DRM_PVR_CTX_PRIORITY_LOW = -512,
+
+	/** @DRM_PVR_CTX_PRIORITY_NORMAL: Normal priority. */
+	DRM_PVR_CTX_PRIORITY_NORMAL = 0,
+
+	/**
+	 * @DRM_PVR_CTX_PRIORITY_HIGH: Priority above normal.
+	 * Note this requires ``CAP_SYS_NICE`` or ``DRM_MASTER``.
+	 */
+	DRM_PVR_CTX_PRIORITY_HIGH = 512,
+};
+
+/**
+ * enum drm_pvr_ctx_type - Arguments for
+ * &struct drm_pvr_ioctl_create_context_args.type
+ */
+enum drm_pvr_ctx_type {
+	/**
+	 * @DRM_PVR_CTX_TYPE_RENDER: Render context. Use &struct
+	 * drm_pvr_ioctl_create_render_context_args for context creation arguments.
+	 */
+	DRM_PVR_CTX_TYPE_RENDER = 0,
+
+	/**
+	 * @DRM_PVR_CTX_TYPE_COMPUTE: Compute context. Use &struct
+	 * drm_pvr_ioctl_create_compute_context_args for context creation arguments.
+	 */
+	DRM_PVR_CTX_TYPE_COMPUTE,
+
+	/**
+	 * @DRM_PVR_CTX_TYPE_TRANSFER_FRAG: Transfer context for fragment data masters. Use
+	 * &struct drm_pvr_ioctl_create_transfer_context_args for context creation arguments.
+	 */
+	DRM_PVR_CTX_TYPE_TRANSFER_FRAG,
+};
+
+/**
+ * struct drm_pvr_ioctl_create_context_args - Arguments for
+ * %DRM_IOCTL_PVR_CREATE_CONTEXT
+ */
+struct drm_pvr_ioctl_create_context_args {
+	/**
+	 * @type: [IN] Type of context to create.
+	 *
+	 * This must be one of the values defined by &enum drm_pvr_ctx_type.
+	 */
+	__u32 type;
+
+	/** @flags: [IN] Flags for context. */
+	__u32 flags;
+
+	/**
+	 * @priority: [IN] Priority of new context.
+	 *
+	 * This must be one of the values defined by &enum drm_pvr_ctx_priority.
+	 */
+	__s32 priority;
+
+	/** @handle: [OUT] Handle for new context. */
+	__u32 handle;
+
+	/**
+	 * @static_context_state: [IN] Pointer to static context state stream.
+	 */
+	__u64 static_context_state;
+
+	/**
+	 * @static_context_state_len: [IN] Length of static context state, in bytes.
+	 */
+	__u32 static_context_state_len;
+
+	/**
+	 * @vm_context_handle: [IN] Handle for VM context that this context is
+	 * associated with.
+	 */
+	__u32 vm_context_handle;
+
+	/**
+	 * @callstack_addr: [IN] Address for initial call stack pointer. Only valid
+	 * if @type is %DRM_PVR_CTX_TYPE_RENDER, otherwise must be 0.
+	 */
+	__u64 callstack_addr;
+};
+
+/**
+ * struct drm_pvr_ioctl_destroy_context_args - Arguments for
+ * %DRM_IOCTL_PVR_DESTROY_CONTEXT
+ */
+struct drm_pvr_ioctl_destroy_context_args {
+	/**
+	 * @handle: [IN] Handle for context to be destroyed.
+	 */
+	__u32 handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+};
+
+/**
+ * DOC: PowerVR IOCTL CREATE_FREE_LIST and DESTROY_FREE_LIST interfaces
+ */
+
+/**
+ * struct drm_pvr_ioctl_create_free_list_args - Arguments for
+ * %DRM_IOCTL_PVR_CREATE_FREE_LIST
+ *
+ * Free list arguments have the following constraints :
+ *
+ * - @max_num_pages must be greater than zero.
+ * - @grow_threshold must be between 0 and 100.
+ * - @grow_num_pages must be less than or equal to &max_num_pages.
+ * - @initial_num_pages, @max_num_pages and @grow_num_pages must be multiples
+ *   of 4.
+ * - When &grow_num_pages is 0, @initial_num_pages must be equal to
+ *   @max_num_pages.
+ * - When &grow_num_pages is non-zero, @initial_num_pages must be less than
+ *   @max_num_pages.
+ */
+struct drm_pvr_ioctl_create_free_list_args {
+	/**
+	 * @free_list_gpu_addr: [IN] Address of GPU mapping of buffer object
+	 * containing memory to be used by free list.
+	 *
+	 * The mapped region of the buffer object must be at least
+	 * @max_num_pages * ``sizeof(__u32)``.
+	 *
+	 * The buffer object must have been created with
+	 * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT set and
+	 * %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS not set.
+	 */
+	__u64 free_list_gpu_addr;
+
+	/** @initial_num_pages: [IN] Pages initially allocated to free list. */
+	__u32 initial_num_pages;
+
+	/** @max_num_pages: [IN] Maximum number of pages in free list. */
+	__u32 max_num_pages;
+
+	/** @grow_num_pages: [IN] Pages to grow free list by per request. */
+	__u32 grow_num_pages;
+
+	/**
+	 * @grow_threshold: [IN] Percentage of FL memory used that should
+	 * trigger a new grow request.
+	 */
+	__u32 grow_threshold;
+
+	/**
+	 * @vm_context_handle: [IN] Handle for VM context that the free list buffer
+	 * object is mapped in.
+	 */
+	__u32 vm_context_handle;
+
+	/**
+	 * @handle: [OUT] Handle for created free list.
+	 */
+	__u32 handle;
+};
+
+/**
+ * struct drm_pvr_ioctl_destroy_free_list_args - Arguments for
+ * %DRM_IOCTL_PVR_DESTROY_FREE_LIST
+ */
+struct drm_pvr_ioctl_destroy_free_list_args {
+	/**
+	 * @handle: [IN] Handle for free list to be destroyed.
+	 */
+	__u32 handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+};
+
+/**
+ * DOC: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces
+ */
+
+/**
+ * struct drm_pvr_create_hwrt_geom_data_args - Geometry data arguments used for
+ * &struct drm_pvr_ioctl_create_hwrt_dataset_args.geom_data_args.
+ */
+struct drm_pvr_create_hwrt_geom_data_args {
+	/** @tpc_dev_addr: [IN] Tail pointer cache GPU virtual address. */
+	__u64 tpc_dev_addr;
+
+	/** @tpc_size: [IN] Size of TPC, in bytes. */
+	__u32 tpc_size;
+
+	/** @tpc_stride: [IN] Stride between layers in TPC, in pages */
+	__u32 tpc_stride;
+
+	/** @vheap_table_dev_addr: [IN] VHEAP table GPU virtual address. */
+	__u64 vheap_table_dev_addr;
+
+	/** @rtc_dev_addr: [IN] Render Target Cache virtual address. */
+	__u64 rtc_dev_addr;
+};
+
+/**
+ * struct drm_pvr_create_hwrt_rt_data_args - Render target arguments used for
+ * &struct drm_pvr_ioctl_create_hwrt_dataset_args.rt_data_args.
+ */
+struct drm_pvr_create_hwrt_rt_data_args {
+	/** @pm_mlist_dev_addr: [IN] PM MLIST GPU virtual address. */
+	__u64 pm_mlist_dev_addr;
+
+	/** @macrotile_array_dev_addr: [IN] Macrotile array GPU virtual address. */
+	__u64 macrotile_array_dev_addr;
+
+	/** @region_header_dev_addr: [IN] Region header array GPU virtual address. */
+	__u64 region_header_dev_addr;
+};
+
+#define PVR_DRM_HWRT_FREE_LIST_LOCAL 0
+#define PVR_DRM_HWRT_FREE_LIST_GLOBAL 1U
+
+/**
+ * struct drm_pvr_ioctl_create_hwrt_dataset_args - Arguments for
+ * %DRM_IOCTL_PVR_CREATE_HWRT_DATASET
+ */
+struct drm_pvr_ioctl_create_hwrt_dataset_args {
+	/** @geom_data_args: [IN] Geometry data arguments. */
+	struct drm_pvr_create_hwrt_geom_data_args geom_data_args;
+
+	/**
+	 * @rt_data_args: [IN] Array of render target arguments.
+	 *
+	 * Each entry in this array represents a render target in a double buffered
+	 * setup.
+	 */
+	struct drm_pvr_create_hwrt_rt_data_args rt_data_args[2];
+
+	/**
+	 * @free_list_handles: [IN] Array of free list handles.
+	 *
+	 * free_list_handles[PVR_DRM_HWRT_FREE_LIST_LOCAL] must have initial
+	 * size of at least that reported by
+	 * &drm_pvr_dev_query_runtime_info.free_list_min_pages.
+	 */
+	__u32 free_list_handles[2];
+
+	/** @width: [IN] Width in pixels. */
+	__u32 width;
+
+	/** @height: [IN] Height in pixels. */
+	__u32 height;
+
+	/** @samples: [IN] Number of samples. */
+	__u32 samples;
+
+	/** @layers: [IN] Number of layers. */
+	__u32 layers;
+
+	/** @isp_merge_lower_x: [IN] Lower X coefficient for triangle merging. */
+	__u32 isp_merge_lower_x;
+
+	/** @isp_merge_lower_y: [IN] Lower Y coefficient for triangle merging. */
+	__u32 isp_merge_lower_y;
+
+	/** @isp_merge_scale_x: [IN] Scale X coefficient for triangle merging. */
+	__u32 isp_merge_scale_x;
+
+	/** @isp_merge_scale_y: [IN] Scale Y coefficient for triangle merging. */
+	__u32 isp_merge_scale_y;
+
+	/** @isp_merge_upper_x: [IN] Upper X coefficient for triangle merging. */
+	__u32 isp_merge_upper_x;
+
+	/** @isp_merge_upper_y: [IN] Upper Y coefficient for triangle merging. */
+	__u32 isp_merge_upper_y;
+
+	/**
+	 * @region_header_size: [IN] Size of region header array. This common field is used by
+	 * both render targets in this data set.
+	 *
+	 * The units for this field differ depending on what version of the simple internal
+	 * parameter format the device uses. If format 2 is in use then this is interpreted as the
+	 * number of region headers. For other formats it is interpreted as the size in dwords.
+	 */
+	__u32 region_header_size;
+
+	/**
+	 * @handle: [OUT] Handle for created HWRT dataset.
+	 */
+	__u32 handle;
+};
+
+/**
+ * struct drm_pvr_ioctl_destroy_hwrt_dataset_args - Arguments for
+ * %DRM_IOCTL_PVR_DESTROY_HWRT_DATASET
+ */
+struct drm_pvr_ioctl_destroy_hwrt_dataset_args {
+	/**
+	 * @handle: [IN] Handle for HWRT dataset to be destroyed.
+	 */
+	__u32 handle;
+
+	/** @_padding_4: Reserved. This field must be zeroed. */
+	__u32 _padding_4;
+};
+
+/**
+ * DOC: PowerVR IOCTL SUBMIT_JOBS interface
+ */
+
+/**
+ * DOC: Flags for the drm_pvr_sync_op object.
+ *
+ * .. c:macro:: DRM_PVR_SYNC_OP_HANDLE_TYPE_MASK
+ *
+ *    Handle type mask for the drm_pvr_sync_op::flags field.
+ *
+ * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ
+ *
+ *    Indicates the handle passed in drm_pvr_sync_op::handle is a syncobj handle.
+ *    This is the default type.
+ *
+ * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ
+ *
+ *    Indicates the handle passed in drm_pvr_sync_op::handle is a timeline syncobj handle.
+ *
+ * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_SIGNAL
+ *
+ *    Signal operation requested. The out-fence bound to the job will be attached to
+ *    the syncobj whose handle is passed in drm_pvr_sync_op::handle.
+ *
+ * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_WAIT
+ *
+ *    Wait operation requested. The job will wait for this particular syncobj or syncobj
+ *    point to be signaled before being started.
+ *    This is the default operation.
+ */
+#define DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK 0xf
+#define DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ 0
+#define DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ 1
+#define DRM_PVR_SYNC_OP_FLAG_SIGNAL _BITULL(31)
+#define DRM_PVR_SYNC_OP_FLAG_WAIT 0
+
+#define DRM_PVR_SYNC_OP_FLAGS_MASK (DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK | \
+				    DRM_PVR_SYNC_OP_FLAG_SIGNAL)
+
+/**
+ * struct drm_pvr_sync_op - Object describing a sync operation
+ */
+struct drm_pvr_sync_op {
+	/** @handle: Handle of sync object. */
+	__u32 handle;
+
+	/** @flags: Combination of ``DRM_PVR_SYNC_OP_FLAG_`` flags. */
+	__u32 flags;
+
+	/** @value: Timeline value for this drm_syncobj. MBZ for a binary syncobj. */
+	__u64 value;
+};
+
+/**
+ * DOC: Flags for SUBMIT_JOB ioctl geometry command.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST
+ *
+ *    Indicates if this the first command to be issued for a render.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST
+ *
+ *    Indicates if this the last command to be issued for a render.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE
+ *
+ *    Forces to use single core in a multi core device.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK
+ *
+ *    Logical OR of all the geometry cmd flags.
+ */
+#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST _BITULL(0)
+#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST _BITULL(1)
+#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE _BITULL(2)
+#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK                                 \
+	(DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST |                                   \
+	 DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST |                                    \
+	 DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE)
+
+/**
+ * DOC: Flags for SUBMIT_JOB ioctl fragment command.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE
+ *
+ *    Use single core in a multi core setup.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER
+ *
+ *    Indicates whether a depth buffer is present.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER
+ *
+ *    Indicates whether a stencil buffer is present.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP
+ *
+ *    Disallow compute overlapped with this render.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS
+ *
+ *    Indicates whether this render produces visibility results.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER
+ *
+ *    Indicates whether partial renders write to a scratch buffer instead of
+ *    the final surface. It also forces the full screen copy expected to be
+ *    present on the last render after all partial renders have completed.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE
+ *
+ *    Disable pixel merging for this render.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK
+ *
+ *    Logical OR of all the fragment cmd flags.
+ */
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE _BITULL(0)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER _BITULL(1)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER _BITULL(2)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP _BITULL(3)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER _BITULL(4)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS _BITULL(5)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER _BITULL(6)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE _BITULL(7)
+#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK                                 \
+	(DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE |                             \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER |                             \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER |                           \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP |                     \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER |                           \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS |                         \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER |                          \
+	 DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE)
+
+/**
+ * DOC: Flags for SUBMIT_JOB ioctl compute command.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP
+ *
+ *    Disallow other jobs overlapped with this compute.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE
+ *
+ *    Forces to use single core in a multi core device.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK
+ *
+ *    Logical OR of all the compute cmd flags.
+ */
+#define DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP _BITULL(0)
+#define DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE _BITULL(1)
+#define DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK         \
+	(DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP | \
+	 DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE)
+
+/**
+ * DOC: Flags for SUBMIT_JOB ioctl transfer command.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE
+ *
+ *    Forces job to use a single core in a multi core device.
+ *
+ * .. c:macro:: DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK
+ *
+ *    Logical OR of all the transfer cmd flags.
+ */
+#define DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE _BITULL(0)
+
+#define DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK \
+	DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE
+
+/**
+ * enum drm_pvr_job_type - Arguments for &struct drm_pvr_job.job_type
+ */
+enum drm_pvr_job_type {
+	/** @DRM_PVR_JOB_TYPE_GEOMETRY: Job type is geometry. */
+	DRM_PVR_JOB_TYPE_GEOMETRY = 0,
+
+	/** @DRM_PVR_JOB_TYPE_FRAGMENT: Job type is fragment. */
+	DRM_PVR_JOB_TYPE_FRAGMENT,
+
+	/** @DRM_PVR_JOB_TYPE_COMPUTE: Job type is compute. */
+	DRM_PVR_JOB_TYPE_COMPUTE,
+
+	/** @DRM_PVR_JOB_TYPE_TRANSFER_FRAG: Job type is a fragment transfer. */
+	DRM_PVR_JOB_TYPE_TRANSFER_FRAG,
+};
+
+/**
+ * struct drm_pvr_hwrt_data_ref - Reference HWRT data
+ */
+struct drm_pvr_hwrt_data_ref {
+	/** @set_handle: HWRT data set handle. */
+	__u32 set_handle;
+
+	/** @data_index: Index of the HWRT data inside the data set. */
+	__u32 data_index;
+};
+
+/**
+ * struct drm_pvr_job - Job arguments passed to the %DRM_IOCTL_PVR_SUBMIT_JOBS ioctl
+ */
+struct drm_pvr_job {
+	/**
+	 * @type: [IN] Type of job being submitted
+	 *
+	 * This must be one of the values defined by &enum drm_pvr_job_type.
+	 */
+	__u32 type;
+
+	/**
+	 * @context_handle: [IN] Context handle.
+	 *
+	 * When @job_type is %DRM_PVR_JOB_TYPE_RENDER, %DRM_PVR_JOB_TYPE_COMPUTE or
+	 * %DRM_PVR_JOB_TYPE_TRANSFER_FRAG, this must be a valid handle returned by
+	 * %DRM_IOCTL_PVR_CREATE_CONTEXT. The type of context must be compatible
+	 * with the type of job being submitted.
+	 *
+	 * When @job_type is %DRM_PVR_JOB_TYPE_NULL, this must be zero.
+	 */
+	__u32 context_handle;
+
+	/**
+	 * @flags: [IN] Flags for command.
+	 *
+	 * Those are job-dependent. See all ``DRM_PVR_SUBMIT_JOB_*``.
+	 */
+	__u32 flags;
+
+	/**
+	 * @cmd_stream_len: [IN] Length of command stream, in bytes.
+	 */
+	__u32 cmd_stream_len;
+
+	/**
+	 * @cmd_stream: [IN] Pointer to command stream for command.
+	 *
+	 * The command stream must be u64-aligned.
+	 */
+	__u64 cmd_stream;
+
+	/** @sync_ops: [IN] Fragment sync operations. */
+	struct drm_pvr_obj_array sync_ops;
+
+	/**
+	 * @hwrt: [IN] HWRT data used by render jobs (geometry or fragment).
+	 *
+	 * Must be zero for non-render jobs.
+	 */
+	struct drm_pvr_hwrt_data_ref hwrt;
+};
+
+/**
+ * struct drm_pvr_ioctl_submit_jobs_args - Arguments for %DRM_IOCTL_PVR_SUBMIT_JOB
+ *
+ * If the syscall returns an error it is important to check the value of
+ * @jobs.count. This indicates the index into @jobs.array where the
+ * error occurred.
+ */
+struct drm_pvr_ioctl_submit_jobs_args {
+	/** @jobs: [IN] Array of jobs to submit. */
+	struct drm_pvr_obj_array jobs;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* PVR_DRM_UAPI_H */
-- 
GitLab


From 4babef0708656c54e67ee0ee3994ee98898f51d1 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:26 +0000
Subject: [PATCH 0401/2340] drm/imagination: Add skeleton PowerVR driver

This adds the basic skeleton of the driver. The driver registers
itself with DRM on probe. Ioctl handlers are currently implemented
as stubs.

Changes since v8:
- Corrected license identifiers

Changes since v5:
- Update compatible string & description to match marketing name
- Checkpatch fixes in to/from_pvr_device/file macros

Changes since v3:
- Clarify supported GPU generations in driver description
- Use drm_dev_unplug() when removing device
- Change from_* and to_* functions to macros
- Fix IS_PTR/PTR_ERR confusion in pvr_probe()
- Remove err_out labels in favour of direct returning
- Remove specific am62 compatible match string
- Drop MODULE_FIRMWARE()

Co-developed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/fed8a77e29620a61aed2684f802339759082cf1b.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 MAINTAINERS                              |   1 +
 drivers/gpu/drm/Kconfig                  |   2 +
 drivers/gpu/drm/Makefile                 |   1 +
 drivers/gpu/drm/imagination/Kconfig      |  15 +
 drivers/gpu/drm/imagination/Makefile     |   9 +
 drivers/gpu/drm/imagination/pvr_device.h | 153 +++++++
 drivers/gpu/drm/imagination/pvr_drv.c    | 509 +++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_drv.h    |  22 +
 8 files changed, 712 insertions(+)
 create mode 100644 drivers/gpu/drm/imagination/Kconfig
 create mode 100644 drivers/gpu/drm/imagination/Makefile
 create mode 100644 drivers/gpu/drm/imagination/pvr_device.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_drv.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_drv.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 37642011c319d..debbc1956b8e8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10398,6 +10398,7 @@ M:	Donald Robson <donald.robson@imgtec.com>
 M:	Matt Coster <matt.coster@imgtec.com>
 S:	Supported
 F:	Documentation/devicetree/bindings/gpu/img,powervr.yaml
+F:	drivers/gpu/drm/imagination/
 F:	include/uapi/drm/pvr_drm.h
 
 IMON SOUNDGRAPH USB IR RECEIVER
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index cdbc56e076498..740c1c0bd0680 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -394,6 +394,8 @@ source "drivers/gpu/drm/solomon/Kconfig"
 
 source "drivers/gpu/drm/sprd/Kconfig"
 
+source "drivers/gpu/drm/imagination/Kconfig"
+
 config DRM_HYPERV
 	tristate "DRM Support for Hyper-V synthetic video device"
 	depends on DRM && PCI && MMU && HYPERV
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index cdbe91ac0bfc8..b4cb0835620af 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -199,3 +199,4 @@ obj-$(CONFIG_DRM_HYPERV) += hyperv/
 obj-y			+= solomon/
 obj-$(CONFIG_DRM_SPRD) += sprd/
 obj-$(CONFIG_DRM_LOONGSON) += loongson/
+obj-$(CONFIG_DRM_POWERVR) += imagination/
diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig
new file mode 100644
index 0000000000000..03d0a5031f5cc
--- /dev/null
+++ b/drivers/gpu/drm/imagination/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (c) 2023 Imagination Technologies Ltd.
+
+config DRM_POWERVR
+	tristate "Imagination Technologies PowerVR (Series 6 and later) & IMG Graphics"
+	depends on ARM64
+	depends on DRM
+	select DRM_GEM_SHMEM_HELPER
+	select DRM_SCHED
+	select FW_LOADER
+	help
+	  Choose this option if you have a system that has an Imagination
+	  Technologies PowerVR (Series 6 or later) or IMG GPU.
+
+	  If "M" is selected, the module will be called powervr.
diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
new file mode 100644
index 0000000000000..f12a06ada9ec2
--- /dev/null
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (c) 2023 Imagination Technologies Ltd.
+
+subdir-ccflags-y := -I$(srctree)/$(src)
+
+powervr-y := \
+	pvr_drv.o \
+
+obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
new file mode 100644
index 0000000000000..d3629164a629f
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_DEVICE_H
+#define PVR_DEVICE_H
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_mm.h>
+
+#include <linux/bits.h>
+#include <linux/compiler_attributes.h>
+#include <linux/compiler_types.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+/**
+ * struct pvr_device - powervr-specific wrapper for &struct drm_device
+ */
+struct pvr_device {
+	/**
+	 * @base: The underlying &struct drm_device.
+	 *
+	 * Do not access this member directly, instead call
+	 * from_pvr_device().
+	 */
+	struct drm_device base;
+};
+
+/**
+ * struct pvr_file - powervr-specific data to be assigned to &struct
+ * drm_file.driver_priv
+ */
+struct pvr_file {
+	/**
+	 * @file: A reference to the parent &struct drm_file.
+	 *
+	 * Do not access this member directly, instead call from_pvr_file().
+	 */
+	struct drm_file *file;
+
+	/**
+	 * @pvr_dev: A reference to the powervr-specific wrapper for the
+	 *           associated device. Saves on repeated calls to
+	 *           to_pvr_device().
+	 */
+	struct pvr_device *pvr_dev;
+};
+
+#define from_pvr_device(pvr_dev) (&(pvr_dev)->base)
+
+#define to_pvr_device(drm_dev) container_of_const(drm_dev, struct pvr_device, base)
+
+#define from_pvr_file(pvr_file) ((pvr_file)->file)
+
+#define to_pvr_file(file) ((file)->driver_priv)
+
+/**
+ * DOC: IOCTL validation helpers
+ *
+ * To validate the constraints imposed on IOCTL argument structs, a collection
+ * of macros and helper functions exist in ``pvr_device.h``.
+ *
+ * Of the current helpers, it should only be necessary to call
+ * PVR_IOCTL_UNION_PADDING_CHECK() directly. This macro should be used once in
+ * every code path which extracts a union member from a struct passed from
+ * userspace.
+ */
+
+/**
+ * pvr_ioctl_union_padding_check() - Validate that the implicit padding between
+ * the end of a union member and the end of the union itself is zeroed.
+ * @instance: Pointer to the instance of the struct to validate.
+ * @union_offset: Offset into the type of @instance of the target union. Must
+ * be 64-bit aligned.
+ * @union_size: Size of the target union in the type of @instance. Must be
+ * 64-bit aligned.
+ * @member_size: Size of the target member in the target union specified by
+ * @union_offset and @union_size. It is assumed that the offset of the target
+ * member is zero relative to @union_offset. Must be 64-bit aligned.
+ *
+ * You probably want to use PVR_IOCTL_UNION_PADDING_CHECK() instead of calling
+ * this function directly, since that macro abstracts away much of the setup,
+ * and also provides some static validation. See its docs for details.
+ *
+ * Return:
+ *  * %true if every byte between the end of the used member of the union and
+ *    the end of that union is zeroed, or
+ *  * %false otherwise.
+ */
+static __always_inline bool
+pvr_ioctl_union_padding_check(void *instance, size_t union_offset,
+			      size_t union_size, size_t member_size)
+{
+	/*
+	 * void pointer arithmetic is technically illegal - cast to a byte
+	 * pointer so this addition works safely.
+	 */
+	void *padding_start = ((u8 *)instance) + union_offset + member_size;
+	size_t padding_size = union_size - member_size;
+
+	return !memchr_inv(padding_start, 0, padding_size);
+}
+
+/**
+ * PVR_STATIC_ASSERT_64BIT_ALIGNED() - Inline assertion for 64-bit alignment.
+ * @static_expr_: Target expression to evaluate.
+ *
+ * If @static_expr_ does not evaluate to a constant integer which would be a
+ * 64-bit aligned address (i.e. a multiple of 8), compilation will fail.
+ *
+ * Return:
+ * The value of @static_expr_.
+ */
+#define PVR_STATIC_ASSERT_64BIT_ALIGNED(static_expr_)                     \
+	({                                                                \
+		static_assert(((static_expr_) & (sizeof(u64) - 1)) == 0); \
+		(static_expr_);                                           \
+	})
+
+/**
+ * PVR_IOCTL_UNION_PADDING_CHECK() - Validate that the implicit padding between
+ * the end of a union member and the end of the union itself is zeroed.
+ * @struct_instance_: An expression which evaluates to a pointer to a UAPI data
+ * struct.
+ * @union_: The name of the union member of @struct_instance_ to check. If the
+ * union member is nested within the type of @struct_instance_, this may
+ * contain the member access operator (".").
+ * @member_: The name of the member of @union_ to assess.
+ *
+ * This is a wrapper around pvr_ioctl_union_padding_check() which performs
+ * alignment checks and simplifies things for the caller.
+ *
+ * Return:
+ *  * %true if every byte in @struct_instance_ between the end of @member_ and
+ *    the end of @union_ is zeroed, or
+ *  * %false otherwise.
+ */
+#define PVR_IOCTL_UNION_PADDING_CHECK(struct_instance_, union_, member_)     \
+	({                                                                   \
+		typeof(struct_instance_) __instance = (struct_instance_);    \
+		size_t __union_offset = PVR_STATIC_ASSERT_64BIT_ALIGNED(     \
+			offsetof(typeof(*__instance), union_));              \
+		size_t __union_size = PVR_STATIC_ASSERT_64BIT_ALIGNED(       \
+			sizeof(__instance->union_));                         \
+		size_t __member_size = PVR_STATIC_ASSERT_64BIT_ALIGNED(      \
+			sizeof(__instance->union_.member_));                 \
+		pvr_ioctl_union_padding_check(__instance, __union_offset,    \
+					      __union_size, __member_size);  \
+	})
+
+#endif /* PVR_DEVICE_H */
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
new file mode 100644
index 0000000000000..3d4f2ba2c0159
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_drv.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+/**
+ * DOC: PowerVR (Series 6 and later) and IMG Graphics Driver
+ *
+ * This driver supports the following PowerVR/IMG graphics cores from Imagination Technologies:
+ *
+ * * AXE-1-16M (found in Texas Instruments AM62)
+ */
+
+/**
+ * pvr_ioctl_create_bo() - IOCTL to create a GEM buffer object.
+ * @drm_dev: [IN] Target DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ * &struct drm_pvr_ioctl_create_bo_args.
+ * @file: [IN] DRM file-private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_CREATE_BO.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if the value of &drm_pvr_ioctl_create_bo_args.size is zero
+ *    or wider than &typedef size_t,
+ *  * -%EINVAL if any bits in &drm_pvr_ioctl_create_bo_args.flags that are
+ *    reserved or undefined are set,
+ *  * -%EINVAL if any padding fields in &drm_pvr_ioctl_create_bo_args are not
+ *    zero,
+ *  * Any error encountered while creating the object (see
+ *    pvr_gem_object_create()), or
+ *  * Any error encountered while transferring ownership of the object into a
+ *    userspace-accessible handle (see pvr_gem_object_into_handle()).
+ */
+static int
+pvr_ioctl_create_bo(struct drm_device *drm_dev, void *raw_args,
+		    struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_get_bo_mmap_offset() - IOCTL to generate a "fake" offset to be
+ * used when calling mmap() from userspace to map the given GEM buffer object
+ * @drm_dev: [IN] DRM device (unused).
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_get_bo_mmap_offset_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET.
+ *
+ * This IOCTL does *not* perform an mmap. See the docs on
+ * &struct drm_pvr_ioctl_get_bo_mmap_offset_args for details.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%ENOENT if the handle does not reference a valid GEM buffer object,
+ *  * -%EINVAL if any padding fields in &struct
+ *    drm_pvr_ioctl_get_bo_mmap_offset_args are not zero, or
+ *  * Any error returned by drm_gem_create_mmap_offset().
+ */
+static int
+pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args,
+			     struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_dev_query() - IOCTL to copy information about a device
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_dev_query_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_DEV_QUERY.
+ * If the given receiving struct pointer is NULL, or the indicated size is too
+ * small, the expected size of the struct type will be returned in the size
+ * argument field.
+ *
+ * Return:
+ *  * 0 on success or when fetching the size with args->pointer == NULL, or
+ *  * -%E2BIG if the indicated size of the receiving struct is less than is
+ *    required to contain the copied data, or
+ *  * -%EINVAL if the indicated struct type is unknown, or
+ *  * -%ENOMEM if local memory could not be allocated, or
+ *  * -%EFAULT if local memory could not be copied to userspace.
+ */
+static int
+pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args,
+		    struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_create_context() - IOCTL to create a context
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_create_context_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_CREATE_CONTEXT.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if provided arguments are invalid, or
+ *  * -%EFAULT if arguments can't be copied from userspace, or
+ *  * Any error returned by pvr_create_render_context().
+ */
+static int
+pvr_ioctl_create_context(struct drm_device *drm_dev, void *raw_args,
+			 struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_destroy_context() - IOCTL to destroy a context
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_destroy_context_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_DESTROY_CONTEXT.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if context not in context list.
+ */
+static int
+pvr_ioctl_destroy_context(struct drm_device *drm_dev, void *raw_args,
+			  struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_create_free_list() - IOCTL to create a free list
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_create_free_list_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_CREATE_FREE_LIST.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_free_list_create().
+ */
+static int
+pvr_ioctl_create_free_list(struct drm_device *drm_dev, void *raw_args,
+			   struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_destroy_free_list() - IOCTL to destroy a free list
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type
+ *                 &struct drm_pvr_ioctl_destroy_free_list_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_DESTROY_FREE_LIST.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if free list not in object list.
+ */
+static int
+pvr_ioctl_destroy_free_list(struct drm_device *drm_dev, void *raw_args,
+			    struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_create_hwrt_dataset() - IOCTL to create a HWRT dataset
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_create_hwrt_dataset_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_CREATE_HWRT_DATASET.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_hwrt_dataset_create().
+ */
+static int
+pvr_ioctl_create_hwrt_dataset(struct drm_device *drm_dev, void *raw_args,
+			      struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_destroy_hwrt_dataset() - IOCTL to destroy a HWRT dataset
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type
+ *                 &struct drm_pvr_ioctl_destroy_hwrt_dataset_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_DESTROY_HWRT_DATASET.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if HWRT dataset not in object list.
+ */
+static int
+pvr_ioctl_destroy_hwrt_dataset(struct drm_device *drm_dev, void *raw_args,
+			       struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_create_vm_context() - IOCTL to create a VM context
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN/OUT] Arguments passed to this IOCTL. This must be of type
+ *                     &struct drm_pvr_ioctl_create_vm_context_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_CREATE_VM_CONTEXT.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_vm_create_context().
+ */
+static int
+pvr_ioctl_create_vm_context(struct drm_device *drm_dev, void *raw_args,
+			    struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_destroy_vm_context() - IOCTL to destroy a VM context
+￼* @drm_dev: [IN] DRM device.
+￼* @raw_args: [IN] Arguments passed to this IOCTL. This must be of type
+￼*                 &struct drm_pvr_ioctl_destroy_vm_context_args.
+￼* @file: [IN] DRM file private data.
+￼*
+￼* Called from userspace with %DRM_IOCTL_PVR_DESTROY_VM_CONTEXT.
+￼*
+￼* Return:
+￼*  * 0 on success, or
+￼*  * -%EINVAL if object not in object list.
+ */
+static int
+pvr_ioctl_destroy_vm_context(struct drm_device *drm_dev, void *raw_args,
+			     struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_vm_map() - IOCTL to map buffer to GPU address space.
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type
+ *                 &struct drm_pvr_ioctl_vm_map_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_VM_MAP.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if &drm_pvr_ioctl_vm_op_map_args.flags is not zero,
+ *  * -%EINVAL if the bounds specified by &drm_pvr_ioctl_vm_op_map_args.offset
+ *    and &drm_pvr_ioctl_vm_op_map_args.size are not valid or do not fall
+ *    within the buffer object specified by
+ *    &drm_pvr_ioctl_vm_op_map_args.handle,
+ *  * -%EINVAL if the bounds specified by
+ *    &drm_pvr_ioctl_vm_op_map_args.device_addr and
+ *    &drm_pvr_ioctl_vm_op_map_args.size do not form a valid device-virtual
+ *    address range which falls entirely within a single heap, or
+ *  * -%ENOENT if &drm_pvr_ioctl_vm_op_map_args.handle does not refer to a
+ *    valid PowerVR buffer object.
+ */
+static int
+pvr_ioctl_vm_map(struct drm_device *drm_dev, void *raw_args,
+		 struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/**
+ * pvr_ioctl_vm_unmap() - IOCTL to unmap buffer from GPU address space.
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type
+ *                 &struct drm_pvr_ioctl_vm_unmap_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_VM_UNMAP.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if &drm_pvr_ioctl_vm_op_unmap_args.device_addr is not a valid
+ *    device page-aligned device-virtual address, or
+ *  * -%ENOENT if there is currently no PowerVR buffer object mapped at
+ *    &drm_pvr_ioctl_vm_op_unmap_args.device_addr.
+ */
+static int
+pvr_ioctl_vm_unmap(struct drm_device *drm_dev, void *raw_args,
+		   struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+/*
+ * pvr_ioctl_submit_job() - IOCTL to submit a job to the GPU
+ * @drm_dev: [IN] DRM device.
+ * @raw_args: [IN] Arguments passed to this IOCTL. This must be of type
+ *                 &struct drm_pvr_ioctl_submit_job_args.
+ * @file: [IN] DRM file private data.
+ *
+ * Called from userspace with %DRM_IOCTL_PVR_SUBMIT_JOB.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if arguments are invalid.
+ */
+static int
+pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args,
+		      struct drm_file *file)
+{
+	return -ENOTTY;
+}
+
+#define DRM_PVR_IOCTL(_name, _func, _flags) \
+	DRM_IOCTL_DEF_DRV(PVR_##_name, pvr_ioctl_##_func, _flags)
+
+/* clang-format off */
+
+static const struct drm_ioctl_desc pvr_drm_driver_ioctls[] = {
+	DRM_PVR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(CREATE_BO, create_bo, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(GET_BO_MMAP_OFFSET, get_bo_mmap_offset, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(CREATE_VM_CONTEXT, create_vm_context, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(DESTROY_VM_CONTEXT, destroy_vm_context, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(VM_MAP, vm_map, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(VM_UNMAP, vm_unmap, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(CREATE_CONTEXT, create_context, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(DESTROY_CONTEXT, destroy_context, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(CREATE_FREE_LIST, create_free_list, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(DESTROY_FREE_LIST, destroy_free_list, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(CREATE_HWRT_DATASET, create_hwrt_dataset, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(DESTROY_HWRT_DATASET, destroy_hwrt_dataset, DRM_RENDER_ALLOW),
+	DRM_PVR_IOCTL(SUBMIT_JOBS, submit_jobs, DRM_RENDER_ALLOW),
+};
+
+/* clang-format on */
+
+#undef DRM_PVR_IOCTL
+
+/**
+ * pvr_drm_driver_open() - Driver callback when a new &struct drm_file is opened
+ * @drm_dev: [IN] DRM device.
+ * @file: [IN] DRM file private data.
+ *
+ * Allocates powervr-specific file private data (&struct pvr_file).
+ *
+ * Registered in &pvr_drm_driver.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%ENOMEM if the allocation of a &struct ipvr_file fails, or
+ *  * Any error returned by pvr_memory_context_init().
+ */
+static int
+pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file)
+{
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct pvr_file *pvr_file;
+
+	pvr_file = kzalloc(sizeof(*pvr_file), GFP_KERNEL);
+	if (!pvr_file)
+		return -ENOMEM;
+
+	/*
+	 * Store reference to base DRM file private data for use by
+	 * from_pvr_file.
+	 */
+	pvr_file->file = file;
+
+	/*
+	 * Store reference to powervr-specific outer device struct in file
+	 * private data for convenient access.
+	 */
+	pvr_file->pvr_dev = pvr_dev;
+
+	/*
+	 * Store reference to powervr-specific file private data in DRM file
+	 * private data.
+	 */
+	file->driver_priv = pvr_file;
+
+	return 0;
+}
+
+/**
+ * pvr_drm_driver_postclose() - One of the driver callbacks when a &struct
+ * drm_file is closed.
+ * @drm_dev: [IN] DRM device (unused).
+ * @file: [IN] DRM file private data.
+ *
+ * Frees powervr-specific file private data (&struct pvr_file).
+ *
+ * Registered in &pvr_drm_driver.
+ */
+static void
+pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
+			 struct drm_file *file)
+{
+	struct pvr_file *pvr_file = to_pvr_file(file);
+
+	kfree(pvr_file);
+	file->driver_priv = NULL;
+}
+
+DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
+
+static struct drm_driver pvr_drm_driver = {
+	.driver_features = DRIVER_RENDER,
+	.open = pvr_drm_driver_open,
+	.postclose = pvr_drm_driver_postclose,
+	.ioctls = pvr_drm_driver_ioctls,
+	.num_ioctls = ARRAY_SIZE(pvr_drm_driver_ioctls),
+	.fops = &pvr_drm_driver_fops,
+
+	.name = PVR_DRIVER_NAME,
+	.desc = PVR_DRIVER_DESC,
+	.date = PVR_DRIVER_DATE,
+	.major = PVR_DRIVER_MAJOR,
+	.minor = PVR_DRIVER_MINOR,
+	.patchlevel = PVR_DRIVER_PATCHLEVEL,
+
+};
+
+static int
+pvr_probe(struct platform_device *plat_dev)
+{
+	struct pvr_device *pvr_dev;
+	struct drm_device *drm_dev;
+
+	pvr_dev = devm_drm_dev_alloc(&plat_dev->dev, &pvr_drm_driver,
+				     struct pvr_device, base);
+	if (IS_ERR(pvr_dev))
+		return PTR_ERR(pvr_dev);
+
+	drm_dev = &pvr_dev->base;
+
+	platform_set_drvdata(plat_dev, drm_dev);
+
+	return drm_dev_register(drm_dev, 0);
+}
+
+static int
+pvr_remove(struct platform_device *plat_dev)
+{
+	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
+
+	drm_dev_unplug(drm_dev);
+
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "img,img-axe", .data = NULL },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static struct platform_driver pvr_driver = {
+	.probe = pvr_probe,
+	.remove = pvr_remove,
+	.driver = {
+		.name = PVR_DRIVER_NAME,
+		.of_match_table = dt_match,
+	},
+};
+module_platform_driver(pvr_driver);
+
+MODULE_AUTHOR("Imagination Technologies Ltd.");
+MODULE_DESCRIPTION(PVR_DRIVER_DESC);
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_IMPORT_NS(DMA_BUF);
diff --git a/drivers/gpu/drm/imagination/pvr_drv.h b/drivers/gpu/drm/imagination/pvr_drv.h
new file mode 100644
index 0000000000000..c29d11af88fe9
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_drv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_DRV_H
+#define PVR_DRV_H
+
+#include "linux/compiler_attributes.h"
+#include <uapi/drm/pvr_drm.h>
+
+#define PVR_DRIVER_NAME "powervr"
+#define PVR_DRIVER_DESC "Imagination PowerVR (Series 6 and later) & IMG Graphics"
+#define PVR_DRIVER_DATE "20230904"
+
+/*
+ * Driver interface version:
+ *  - 1.0: Initial interface
+ */
+#define PVR_DRIVER_MAJOR 1
+#define PVR_DRIVER_MINOR 0
+#define PVR_DRIVER_PATCHLEVEL 0
+
+#endif /* PVR_DRV_H */
-- 
GitLab


From 1f88f017e6499261f46d3468befac7b1cdc96e52 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:27 +0000
Subject: [PATCH 0402/2340] drm/imagination: Get GPU resources

Acquire clock and register resources, and enable/map as appropriate.

Changes since v8:
- Corrected license identifiers

Changes since v3:
- Remove regulator resource (not used on supported platform)
- Use devm helpers
- Use devm_clk_get_optional() for optional clocks
- Don't prepare clocks on resource acquisition
- Drop pvr_device_clk_core_get_freq() helper
- Drop pvr_device_reg_fini()
- Drop NULLing of clocks in pvr_device_clk_init()
- Use dev_err_probe() on clock acquisition failure
- Remove PVR_CR_READ/WRITE helper macros
- Improve documentation for GPU clocks
- Remove regs resource (not used in this commit)

Co-developed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/579027bd5be4eb3218c9784050ded2326ecbc352.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile     |   1 +
 drivers/gpu/drm/imagination/pvr_device.c | 147 ++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_device.h | 152 +++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_drv.c    |  18 ++-
 4 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_device.c

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index f12a06ada9ec2..d36007f2825cf 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -4,6 +4,7 @@
 subdir-ccflags-y := -I$(srctree)/$(src)
 
 powervr-y := \
+	pvr_device.o \
 	pvr_drv.o \
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
new file mode 100644
index 0000000000000..abcdf733f57b8
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+
+#include <drm/drm_print.h>
+
+#include <linux/clk.h>
+#include <linux/compiler_attributes.h>
+#include <linux/compiler_types.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+/**
+ * pvr_device_reg_init() - Initialize kernel access to a PowerVR device's
+ * control registers.
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Sets struct pvr_device->regs.
+ *
+ * This method of mapping the device control registers into memory ensures that
+ * they are unmapped when the driver is detached (i.e. no explicit cleanup is
+ * required).
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by devm_platform_ioremap_resource().
+ */
+static int
+pvr_device_reg_init(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct platform_device *plat_dev = to_platform_device(drm_dev->dev);
+	void __iomem *regs;
+
+	pvr_dev->regs = NULL;
+
+	regs = devm_platform_ioremap_resource(plat_dev, 0);
+	if (IS_ERR(regs))
+		return dev_err_probe(drm_dev->dev, PTR_ERR(regs),
+				     "failed to ioremap gpu registers\n");
+
+	pvr_dev->regs = regs;
+
+	return 0;
+}
+
+/**
+ * pvr_device_clk_init() - Initialize clocks required by a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Sets struct pvr_device->core_clk, struct pvr_device->sys_clk and
+ * struct pvr_device->mem_clk.
+ *
+ * Three clocks are required by the PowerVR device: core, sys and mem. On
+ * return, this function guarantees that the clocks are in one of the following
+ * states:
+ *
+ *  * All successfully initialized,
+ *  * Core errored, sys and mem uninitialized,
+ *  * Core deinitialized, sys errored, mem uninitialized, or
+ *  * Core and sys deinitialized, mem errored.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by devm_clk_get(), or
+ *  * Any error returned by devm_clk_get_optional().
+ */
+static int pvr_device_clk_init(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct clk *core_clk;
+	struct clk *sys_clk;
+	struct clk *mem_clk;
+
+	core_clk = devm_clk_get(drm_dev->dev, "core");
+	if (IS_ERR(core_clk))
+		return dev_err_probe(drm_dev->dev, PTR_ERR(core_clk),
+				     "failed to get core clock\n");
+
+	sys_clk = devm_clk_get_optional(drm_dev->dev, "sys");
+	if (IS_ERR(sys_clk))
+		return dev_err_probe(drm_dev->dev, PTR_ERR(core_clk),
+				     "failed to get sys clock\n");
+
+	mem_clk = devm_clk_get_optional(drm_dev->dev, "mem");
+	if (IS_ERR(mem_clk))
+		return dev_err_probe(drm_dev->dev, PTR_ERR(core_clk),
+				     "failed to get mem clock\n");
+
+	pvr_dev->core_clk = core_clk;
+	pvr_dev->sys_clk = sys_clk;
+	pvr_dev->mem_clk = mem_clk;
+
+	return 0;
+}
+
+/**
+ * pvr_device_init() - Initialize a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ *
+ * If this function returns successfully, the device will have been fully
+ * initialized. Otherwise, any parts of the device initialized before an error
+ * occurs will be de-initialized before returning.
+ *
+ * NOTE: The initialization steps currently taken are the bare minimum required
+ *       to read from the control registers. The device is unlikely to function
+ *       until further initialization steps are added. [This note should be
+ *       removed when that happens.]
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by pvr_device_reg_init(),
+ *  * Any error returned by pvr_device_clk_init(), or
+ *  * Any error returned by pvr_device_gpu_init().
+ */
+int
+pvr_device_init(struct pvr_device *pvr_dev)
+{
+	int err;
+
+	/* Enable and initialize clocks required for the device to operate. */
+	err = pvr_device_clk_init(pvr_dev);
+	if (err)
+		return err;
+
+	/* Map the control registers into memory. */
+	return pvr_device_reg_init(pvr_dev);
+}
+
+/**
+ * pvr_device_fini() - Deinitialize a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ */
+void
+pvr_device_fini(struct pvr_device *pvr_dev)
+{
+	/*
+	 * Deinitialization stages are performed in reverse order compared to
+	 * the initialization stages in pvr_device_init().
+	 */
+}
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index d3629164a629f..d9d2804f44543 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -11,9 +11,22 @@
 #include <linux/bits.h>
 #include <linux/compiler_attributes.h>
 #include <linux/compiler_types.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+
+/* Forward declaration from <linux/clk.h>. */
+struct clk;
+
+/* Forward declaration from <linux/firmware.h>. */
+struct firmware;
 
 /**
  * struct pvr_device - powervr-specific wrapper for &struct drm_device
@@ -26,6 +39,37 @@ struct pvr_device {
 	 * from_pvr_device().
 	 */
 	struct drm_device base;
+
+	/**
+	 * @regs: Device control registers.
+	 *
+	 * These are mapped into memory when the device is initialized; that
+	 * location is where this pointer points.
+	 */
+	void __iomem *regs;
+
+	/**
+	 * @core_clk: General core clock.
+	 *
+	 * This is the primary clock used by the entire GPU core.
+	 */
+	struct clk *core_clk;
+
+	/**
+	 * @sys_clk: Optional system bus clock.
+	 *
+	 * This may be used on some platforms to provide an independent clock to the SoC Interface
+	 * (SOCIF). If present, this needs to be enabled/disabled together with @core_clk.
+	 */
+	struct clk *sys_clk;
+
+	/**
+	 * @mem_clk: Optional memory clock.
+	 *
+	 * This may be used on some platforms to provide an independent clock to the Memory
+	 * Interface (MEMIF). If present, this needs to be enabled/disabled together with @core_clk.
+	 */
+	struct clk *mem_clk;
 };
 
 /**
@@ -56,6 +100,114 @@ struct pvr_file {
 
 #define to_pvr_file(file) ((file)->driver_priv)
 
+int pvr_device_init(struct pvr_device *pvr_dev);
+void pvr_device_fini(struct pvr_device *pvr_dev);
+
+/**
+ * PVR_CR_FIELD_GET() - Extract a single field from a PowerVR control register
+ * @val: Value of the target register.
+ * @field: Field specifier, as defined in "pvr_rogue_cr_defs.h".
+ *
+ * Return: The extracted field.
+ */
+#define PVR_CR_FIELD_GET(val, field) FIELD_GET(~ROGUE_CR_##field##_CLRMSK, val)
+
+/**
+ * pvr_cr_read32() - Read a 32-bit register from a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ * @reg: Target register.
+ *
+ * Return: The value of the requested register.
+ */
+static __always_inline u32
+pvr_cr_read32(struct pvr_device *pvr_dev, u32 reg)
+{
+	return ioread32(pvr_dev->regs + reg);
+}
+
+/**
+ * pvr_cr_read64() - Read a 64-bit register from a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ * @reg: Target register.
+ *
+ * Return: The value of the requested register.
+ */
+static __always_inline u64
+pvr_cr_read64(struct pvr_device *pvr_dev, u32 reg)
+{
+	return ioread64(pvr_dev->regs + reg);
+}
+
+/**
+ * pvr_cr_write32() - Write to a 32-bit register in a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ * @reg: Target register.
+ * @val: Value to write.
+ */
+static __always_inline void
+pvr_cr_write32(struct pvr_device *pvr_dev, u32 reg, u32 val)
+{
+	iowrite32(val, pvr_dev->regs + reg);
+}
+
+/**
+ * pvr_cr_write64() - Write to a 64-bit register in a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ * @reg: Target register.
+ * @val: Value to write.
+ */
+static __always_inline void
+pvr_cr_write64(struct pvr_device *pvr_dev, u32 reg, u64 val)
+{
+	iowrite64(val, pvr_dev->regs + reg);
+}
+
+/**
+ * pvr_cr_poll_reg32() - Wait for a 32-bit register to match a given value by
+ *                       polling
+ * @pvr_dev: Target PowerVR device.
+ * @reg_addr: Address of register.
+ * @reg_value: Expected register value (after masking).
+ * @reg_mask: Mask of bits valid for comparison with @reg_value.
+ * @timeout_usec: Timeout length, in us.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%ETIMEDOUT on timeout.
+ */
+static __always_inline int
+pvr_cr_poll_reg32(struct pvr_device *pvr_dev, u32 reg_addr, u32 reg_value,
+		  u32 reg_mask, u64 timeout_usec)
+{
+	u32 value;
+
+	return readl_poll_timeout(pvr_dev->regs + reg_addr, value,
+		(value & reg_mask) == reg_value, 0, timeout_usec);
+}
+
+/**
+ * pvr_cr_poll_reg64() - Wait for a 64-bit register to match a given value by
+ *                       polling
+ * @pvr_dev: Target PowerVR device.
+ * @reg_addr: Address of register.
+ * @reg_value: Expected register value (after masking).
+ * @reg_mask: Mask of bits valid for comparison with @reg_value.
+ * @timeout_usec: Timeout length, in us.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%ETIMEDOUT on timeout.
+ */
+static __always_inline int
+pvr_cr_poll_reg64(struct pvr_device *pvr_dev, u32 reg_addr, u64 reg_value,
+		  u64 reg_mask, u64 timeout_usec)
+{
+	u64 value;
+
+	return readq_poll_timeout(pvr_dev->regs + reg_addr, value,
+		(value & reg_mask) == reg_value, 0, timeout_usec);
+}
+
 /**
  * DOC: IOCTL validation helpers
  *
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index 3d4f2ba2c0159..597188b31f965 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -464,6 +464,7 @@ pvr_probe(struct platform_device *plat_dev)
 {
 	struct pvr_device *pvr_dev;
 	struct drm_device *drm_dev;
+	int err;
 
 	pvr_dev = devm_drm_dev_alloc(&plat_dev->dev, &pvr_drm_driver,
 				     struct pvr_device, base);
@@ -474,14 +475,29 @@ pvr_probe(struct platform_device *plat_dev)
 
 	platform_set_drvdata(plat_dev, drm_dev);
 
-	return drm_dev_register(drm_dev, 0);
+	err = pvr_device_init(pvr_dev);
+	if (err)
+		return err;
+
+	err = drm_dev_register(drm_dev, 0);
+	if (err)
+		goto err_device_fini;
+
+	return 0;
+
+err_device_fini:
+	pvr_device_fini(pvr_dev);
+
+	return err;
 }
 
 static int
 pvr_remove(struct platform_device *plat_dev)
 {
 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
 
+	pvr_device_fini(pvr_dev);
 	drm_dev_unplug(drm_dev);
 
 	return 0;
-- 
GitLab


From b41ae495207eaab1363ac3d424e67f3f354ca2ce Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:28 +0000
Subject: [PATCH 0403/2340] drm/imagination: Add GPU register headers

Changes since v8:
- Corrected license identifiers

Changes since v5:
- Split up header commit due to size

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/9f1fbf6c18e9644ac10712e05893701f06aee6ae.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 .../gpu/drm/imagination/pvr_rogue_cr_defs.h   | 6193 +++++++++++++++++
 .../imagination/pvr_rogue_cr_defs_client.h    |  159 +
 drivers/gpu/drm/imagination/pvr_rogue_defs.h  |  179 +
 3 files changed, 6531 insertions(+)
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_defs.h

diff --git a/drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h
new file mode 100644
index 0000000000000..2a90d02796d3e
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs.h
@@ -0,0 +1,6193 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+/*  *** Autogenerated C -- do not edit ***  */
+
+#ifndef PVR_ROGUE_CR_DEFS_H
+#define PVR_ROGUE_CR_DEFS_H
+
+/* clang-format off */
+
+#define ROGUE_CR_DEFS_REVISION 1
+
+/* Register ROGUE_CR_RASTERISATION_INDIRECT */
+#define ROGUE_CR_RASTERISATION_INDIRECT 0x8238U
+#define ROGUE_CR_RASTERISATION_INDIRECT_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_RASTERISATION_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_RASTERISATION_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_PBE_INDIRECT */
+#define ROGUE_CR_PBE_INDIRECT 0x83E0U
+#define ROGUE_CR_PBE_INDIRECT_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_PBE_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_PBE_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_PBE_PERF_INDIRECT */
+#define ROGUE_CR_PBE_PERF_INDIRECT 0x83D8U
+#define ROGUE_CR_PBE_PERF_INDIRECT_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_PBE_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_PBE_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_TPU_PERF_INDIRECT */
+#define ROGUE_CR_TPU_PERF_INDIRECT 0x83F0U
+#define ROGUE_CR_TPU_PERF_INDIRECT_MASKFULL 0x0000000000000007ULL
+#define ROGUE_CR_TPU_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_TPU_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF8U
+
+/* Register ROGUE_CR_RASTERISATION_PERF_INDIRECT */
+#define ROGUE_CR_RASTERISATION_PERF_INDIRECT 0x8318U
+#define ROGUE_CR_RASTERISATION_PERF_INDIRECT_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_RASTERISATION_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_RASTERISATION_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT */
+#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT 0x8028U
+#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT_MASKFULL 0x0000000000000007ULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_TPU_MCU_L0_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF8U
+
+/* Register ROGUE_CR_USC_PERF_INDIRECT */
+#define ROGUE_CR_USC_PERF_INDIRECT 0x8030U
+#define ROGUE_CR_USC_PERF_INDIRECT_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_USC_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_USC_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_BLACKPEARL_INDIRECT */
+#define ROGUE_CR_BLACKPEARL_INDIRECT 0x8388U
+#define ROGUE_CR_BLACKPEARL_INDIRECT_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_BLACKPEARL_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BLACKPEARL_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_BLACKPEARL_PERF_INDIRECT */
+#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT 0x83F8U
+#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BLACKPEARL_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_TEXAS3_PERF_INDIRECT */
+#define ROGUE_CR_TEXAS3_PERF_INDIRECT 0x83D0U
+#define ROGUE_CR_TEXAS3_PERF_INDIRECT_MASKFULL 0x0000000000000007ULL
+#define ROGUE_CR_TEXAS3_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_TEXAS3_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFF8U
+
+/* Register ROGUE_CR_TEXAS_PERF_INDIRECT */
+#define ROGUE_CR_TEXAS_PERF_INDIRECT 0x8288U
+#define ROGUE_CR_TEXAS_PERF_INDIRECT_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_TEXAS_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_TEXAS_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_BX_TU_PERF_INDIRECT */
+#define ROGUE_CR_BX_TU_PERF_INDIRECT 0xC900U
+#define ROGUE_CR_BX_TU_PERF_INDIRECT_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_BX_TU_PERF_INDIRECT_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BX_TU_PERF_INDIRECT_ADDRESS_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_CLK_CTRL */
+#define ROGUE_CR_CLK_CTRL 0x0000U
+#define ROGUE_CR_CLK_CTRL__PBE2_XE__MASKFULL 0xFFFFFF003F3FFFFFULL
+#define ROGUE_CR_CLK_CTRL__S7_TOP__MASKFULL 0xCFCF03000F3F3F0FULL
+#define ROGUE_CR_CLK_CTRL_MASKFULL 0xFFFFFF003F3FFFFFULL
+#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_SHIFT 62U
+#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_CLRMSK 0x3FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_ON 0x4000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_TEXAS_AUTO 0x8000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_IPP_SHIFT 60U
+#define ROGUE_CR_CLK_CTRL_IPP_CLRMSK 0xCFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_IPP_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_IPP_ON 0x1000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_IPP_AUTO 0x2000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FBC_SHIFT 58U
+#define ROGUE_CR_CLK_CTRL_FBC_CLRMSK 0xF3FFFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_FBC_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FBC_ON 0x0400000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FBC_AUTO 0x0800000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FBDC_SHIFT 56U
+#define ROGUE_CR_CLK_CTRL_FBDC_CLRMSK 0xFCFFFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_FBDC_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FBDC_ON 0x0100000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FBDC_AUTO 0x0200000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_SHIFT 54U
+#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_CLRMSK 0xFF3FFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_ON 0x0040000000000000ULL
+#define ROGUE_CR_CLK_CTRL_FB_TLCACHE_AUTO 0x0080000000000000ULL
+#define ROGUE_CR_CLK_CTRL_USCS_SHIFT 52U
+#define ROGUE_CR_CLK_CTRL_USCS_CLRMSK 0xFFCFFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_USCS_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_USCS_ON 0x0010000000000000ULL
+#define ROGUE_CR_CLK_CTRL_USCS_AUTO 0x0020000000000000ULL
+#define ROGUE_CR_CLK_CTRL_PBE_SHIFT 50U
+#define ROGUE_CR_CLK_CTRL_PBE_CLRMSK 0xFFF3FFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_PBE_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_PBE_ON 0x0004000000000000ULL
+#define ROGUE_CR_CLK_CTRL_PBE_AUTO 0x0008000000000000ULL
+#define ROGUE_CR_CLK_CTRL_MCU_L1_SHIFT 48U
+#define ROGUE_CR_CLK_CTRL_MCU_L1_CLRMSK 0xFFFCFFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_MCU_L1_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_MCU_L1_ON 0x0001000000000000ULL
+#define ROGUE_CR_CLK_CTRL_MCU_L1_AUTO 0x0002000000000000ULL
+#define ROGUE_CR_CLK_CTRL_CDM_SHIFT 46U
+#define ROGUE_CR_CLK_CTRL_CDM_CLRMSK 0xFFFF3FFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_CDM_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_CDM_ON 0x0000400000000000ULL
+#define ROGUE_CR_CLK_CTRL_CDM_AUTO 0x0000800000000000ULL
+#define ROGUE_CR_CLK_CTRL_SIDEKICK_SHIFT 44U
+#define ROGUE_CR_CLK_CTRL_SIDEKICK_CLRMSK 0xFFFFCFFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_SIDEKICK_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_SIDEKICK_ON 0x0000100000000000ULL
+#define ROGUE_CR_CLK_CTRL_SIDEKICK_AUTO 0x0000200000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_SHIFT 42U
+#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_CLRMSK 0xFFFFF3FFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_ON 0x0000040000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_SIDEKICK_AUTO 0x0000080000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_SHIFT 40U
+#define ROGUE_CR_CLK_CTRL_BIF_CLRMSK 0xFFFFFCFFFFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_BIF_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_ON 0x0000010000000000ULL
+#define ROGUE_CR_CLK_CTRL_BIF_AUTO 0x0000020000000000ULL
+#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_SHIFT 28U
+#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_CLRMSK 0xFFFFFFFFCFFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_ON 0x0000000010000000ULL
+#define ROGUE_CR_CLK_CTRL_TPU_MCU_DEMUX_AUTO 0x0000000020000000ULL
+#define ROGUE_CR_CLK_CTRL_MCU_L0_SHIFT 26U
+#define ROGUE_CR_CLK_CTRL_MCU_L0_CLRMSK 0xFFFFFFFFF3FFFFFFULL
+#define ROGUE_CR_CLK_CTRL_MCU_L0_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_MCU_L0_ON 0x0000000004000000ULL
+#define ROGUE_CR_CLK_CTRL_MCU_L0_AUTO 0x0000000008000000ULL
+#define ROGUE_CR_CLK_CTRL_TPU_SHIFT 24U
+#define ROGUE_CR_CLK_CTRL_TPU_CLRMSK 0xFFFFFFFFFCFFFFFFULL
+#define ROGUE_CR_CLK_CTRL_TPU_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_TPU_ON 0x0000000001000000ULL
+#define ROGUE_CR_CLK_CTRL_TPU_AUTO 0x0000000002000000ULL
+#define ROGUE_CR_CLK_CTRL_USC_SHIFT 20U
+#define ROGUE_CR_CLK_CTRL_USC_CLRMSK 0xFFFFFFFFFFCFFFFFULL
+#define ROGUE_CR_CLK_CTRL_USC_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_USC_ON 0x0000000000100000ULL
+#define ROGUE_CR_CLK_CTRL_USC_AUTO 0x0000000000200000ULL
+#define ROGUE_CR_CLK_CTRL_TLA_SHIFT 18U
+#define ROGUE_CR_CLK_CTRL_TLA_CLRMSK 0xFFFFFFFFFFF3FFFFULL
+#define ROGUE_CR_CLK_CTRL_TLA_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_TLA_ON 0x0000000000040000ULL
+#define ROGUE_CR_CLK_CTRL_TLA_AUTO 0x0000000000080000ULL
+#define ROGUE_CR_CLK_CTRL_SLC_SHIFT 16U
+#define ROGUE_CR_CLK_CTRL_SLC_CLRMSK 0xFFFFFFFFFFFCFFFFULL
+#define ROGUE_CR_CLK_CTRL_SLC_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_SLC_ON 0x0000000000010000ULL
+#define ROGUE_CR_CLK_CTRL_SLC_AUTO 0x0000000000020000ULL
+#define ROGUE_CR_CLK_CTRL_UVS_SHIFT 14U
+#define ROGUE_CR_CLK_CTRL_UVS_CLRMSK 0xFFFFFFFFFFFF3FFFULL
+#define ROGUE_CR_CLK_CTRL_UVS_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_UVS_ON 0x0000000000004000ULL
+#define ROGUE_CR_CLK_CTRL_UVS_AUTO 0x0000000000008000ULL
+#define ROGUE_CR_CLK_CTRL_PDS_SHIFT 12U
+#define ROGUE_CR_CLK_CTRL_PDS_CLRMSK 0xFFFFFFFFFFFFCFFFULL
+#define ROGUE_CR_CLK_CTRL_PDS_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_PDS_ON 0x0000000000001000ULL
+#define ROGUE_CR_CLK_CTRL_PDS_AUTO 0x0000000000002000ULL
+#define ROGUE_CR_CLK_CTRL_VDM_SHIFT 10U
+#define ROGUE_CR_CLK_CTRL_VDM_CLRMSK 0xFFFFFFFFFFFFF3FFULL
+#define ROGUE_CR_CLK_CTRL_VDM_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_VDM_ON 0x0000000000000400ULL
+#define ROGUE_CR_CLK_CTRL_VDM_AUTO 0x0000000000000800ULL
+#define ROGUE_CR_CLK_CTRL_PM_SHIFT 8U
+#define ROGUE_CR_CLK_CTRL_PM_CLRMSK 0xFFFFFFFFFFFFFCFFULL
+#define ROGUE_CR_CLK_CTRL_PM_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_PM_ON 0x0000000000000100ULL
+#define ROGUE_CR_CLK_CTRL_PM_AUTO 0x0000000000000200ULL
+#define ROGUE_CR_CLK_CTRL_GPP_SHIFT 6U
+#define ROGUE_CR_CLK_CTRL_GPP_CLRMSK 0xFFFFFFFFFFFFFF3FULL
+#define ROGUE_CR_CLK_CTRL_GPP_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_GPP_ON 0x0000000000000040ULL
+#define ROGUE_CR_CLK_CTRL_GPP_AUTO 0x0000000000000080ULL
+#define ROGUE_CR_CLK_CTRL_TE_SHIFT 4U
+#define ROGUE_CR_CLK_CTRL_TE_CLRMSK 0xFFFFFFFFFFFFFFCFULL
+#define ROGUE_CR_CLK_CTRL_TE_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_TE_ON 0x0000000000000010ULL
+#define ROGUE_CR_CLK_CTRL_TE_AUTO 0x0000000000000020ULL
+#define ROGUE_CR_CLK_CTRL_TSP_SHIFT 2U
+#define ROGUE_CR_CLK_CTRL_TSP_CLRMSK 0xFFFFFFFFFFFFFFF3ULL
+#define ROGUE_CR_CLK_CTRL_TSP_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_TSP_ON 0x0000000000000004ULL
+#define ROGUE_CR_CLK_CTRL_TSP_AUTO 0x0000000000000008ULL
+#define ROGUE_CR_CLK_CTRL_ISP_SHIFT 0U
+#define ROGUE_CR_CLK_CTRL_ISP_CLRMSK 0xFFFFFFFFFFFFFFFCULL
+#define ROGUE_CR_CLK_CTRL_ISP_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL_ISP_ON 0x0000000000000001ULL
+#define ROGUE_CR_CLK_CTRL_ISP_AUTO 0x0000000000000002ULL
+
+/* Register ROGUE_CR_CLK_STATUS */
+#define ROGUE_CR_CLK_STATUS 0x0008U
+#define ROGUE_CR_CLK_STATUS__PBE2_XE__MASKFULL 0x00000001FFF077FFULL
+#define ROGUE_CR_CLK_STATUS__S7_TOP__MASKFULL 0x00000001B3101773ULL
+#define ROGUE_CR_CLK_STATUS_MASKFULL 0x00000001FFF077FFULL
+#define ROGUE_CR_CLK_STATUS_MCU_FBTC_SHIFT 32U
+#define ROGUE_CR_CLK_STATUS_MCU_FBTC_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_MCU_FBTC_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_MCU_FBTC_RUNNING 0x0000000100000000ULL
+#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_SHIFT 31U
+#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_CLRMSK 0xFFFFFFFF7FFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_BIF_TEXAS_RUNNING 0x0000000080000000ULL
+#define ROGUE_CR_CLK_STATUS_IPP_SHIFT 30U
+#define ROGUE_CR_CLK_STATUS_IPP_CLRMSK 0xFFFFFFFFBFFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_IPP_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_IPP_RUNNING 0x0000000040000000ULL
+#define ROGUE_CR_CLK_STATUS_FBC_SHIFT 29U
+#define ROGUE_CR_CLK_STATUS_FBC_CLRMSK 0xFFFFFFFFDFFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_FBC_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_FBC_RUNNING 0x0000000020000000ULL
+#define ROGUE_CR_CLK_STATUS_FBDC_SHIFT 28U
+#define ROGUE_CR_CLK_STATUS_FBDC_CLRMSK 0xFFFFFFFFEFFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_FBDC_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_FBDC_RUNNING 0x0000000010000000ULL
+#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_SHIFT 27U
+#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_CLRMSK 0xFFFFFFFFF7FFFFFFULL
+#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_FB_TLCACHE_RUNNING 0x0000000008000000ULL
+#define ROGUE_CR_CLK_STATUS_USCS_SHIFT 26U
+#define ROGUE_CR_CLK_STATUS_USCS_CLRMSK 0xFFFFFFFFFBFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_USCS_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_USCS_RUNNING 0x0000000004000000ULL
+#define ROGUE_CR_CLK_STATUS_PBE_SHIFT 25U
+#define ROGUE_CR_CLK_STATUS_PBE_CLRMSK 0xFFFFFFFFFDFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_PBE_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_PBE_RUNNING 0x0000000002000000ULL
+#define ROGUE_CR_CLK_STATUS_MCU_L1_SHIFT 24U
+#define ROGUE_CR_CLK_STATUS_MCU_L1_CLRMSK 0xFFFFFFFFFEFFFFFFULL
+#define ROGUE_CR_CLK_STATUS_MCU_L1_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_MCU_L1_RUNNING 0x0000000001000000ULL
+#define ROGUE_CR_CLK_STATUS_CDM_SHIFT 23U
+#define ROGUE_CR_CLK_STATUS_CDM_CLRMSK 0xFFFFFFFFFF7FFFFFULL
+#define ROGUE_CR_CLK_STATUS_CDM_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_CDM_RUNNING 0x0000000000800000ULL
+#define ROGUE_CR_CLK_STATUS_SIDEKICK_SHIFT 22U
+#define ROGUE_CR_CLK_STATUS_SIDEKICK_CLRMSK 0xFFFFFFFFFFBFFFFFULL
+#define ROGUE_CR_CLK_STATUS_SIDEKICK_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_SIDEKICK_RUNNING 0x0000000000400000ULL
+#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_SHIFT 21U
+#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_BIF_SIDEKICK_RUNNING 0x0000000000200000ULL
+#define ROGUE_CR_CLK_STATUS_BIF_SHIFT 20U
+#define ROGUE_CR_CLK_STATUS_BIF_CLRMSK 0xFFFFFFFFFFEFFFFFULL
+#define ROGUE_CR_CLK_STATUS_BIF_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_BIF_RUNNING 0x0000000000100000ULL
+#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_SHIFT 14U
+#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_CLRMSK 0xFFFFFFFFFFFFBFFFULL
+#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_TPU_MCU_DEMUX_RUNNING 0x0000000000004000ULL
+#define ROGUE_CR_CLK_STATUS_MCU_L0_SHIFT 13U
+#define ROGUE_CR_CLK_STATUS_MCU_L0_CLRMSK 0xFFFFFFFFFFFFDFFFULL
+#define ROGUE_CR_CLK_STATUS_MCU_L0_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_MCU_L0_RUNNING 0x0000000000002000ULL
+#define ROGUE_CR_CLK_STATUS_TPU_SHIFT 12U
+#define ROGUE_CR_CLK_STATUS_TPU_CLRMSK 0xFFFFFFFFFFFFEFFFULL
+#define ROGUE_CR_CLK_STATUS_TPU_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_TPU_RUNNING 0x0000000000001000ULL
+#define ROGUE_CR_CLK_STATUS_USC_SHIFT 10U
+#define ROGUE_CR_CLK_STATUS_USC_CLRMSK 0xFFFFFFFFFFFFFBFFULL
+#define ROGUE_CR_CLK_STATUS_USC_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_USC_RUNNING 0x0000000000000400ULL
+#define ROGUE_CR_CLK_STATUS_TLA_SHIFT 9U
+#define ROGUE_CR_CLK_STATUS_TLA_CLRMSK 0xFFFFFFFFFFFFFDFFULL
+#define ROGUE_CR_CLK_STATUS_TLA_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_TLA_RUNNING 0x0000000000000200ULL
+#define ROGUE_CR_CLK_STATUS_SLC_SHIFT 8U
+#define ROGUE_CR_CLK_STATUS_SLC_CLRMSK 0xFFFFFFFFFFFFFEFFULL
+#define ROGUE_CR_CLK_STATUS_SLC_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_SLC_RUNNING 0x0000000000000100ULL
+#define ROGUE_CR_CLK_STATUS_UVS_SHIFT 7U
+#define ROGUE_CR_CLK_STATUS_UVS_CLRMSK 0xFFFFFFFFFFFFFF7FULL
+#define ROGUE_CR_CLK_STATUS_UVS_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_UVS_RUNNING 0x0000000000000080ULL
+#define ROGUE_CR_CLK_STATUS_PDS_SHIFT 6U
+#define ROGUE_CR_CLK_STATUS_PDS_CLRMSK 0xFFFFFFFFFFFFFFBFULL
+#define ROGUE_CR_CLK_STATUS_PDS_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_PDS_RUNNING 0x0000000000000040ULL
+#define ROGUE_CR_CLK_STATUS_VDM_SHIFT 5U
+#define ROGUE_CR_CLK_STATUS_VDM_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_CLK_STATUS_VDM_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_VDM_RUNNING 0x0000000000000020ULL
+#define ROGUE_CR_CLK_STATUS_PM_SHIFT 4U
+#define ROGUE_CR_CLK_STATUS_PM_CLRMSK 0xFFFFFFFFFFFFFFEFULL
+#define ROGUE_CR_CLK_STATUS_PM_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_PM_RUNNING 0x0000000000000010ULL
+#define ROGUE_CR_CLK_STATUS_GPP_SHIFT 3U
+#define ROGUE_CR_CLK_STATUS_GPP_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_CLK_STATUS_GPP_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_GPP_RUNNING 0x0000000000000008ULL
+#define ROGUE_CR_CLK_STATUS_TE_SHIFT 2U
+#define ROGUE_CR_CLK_STATUS_TE_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_CLK_STATUS_TE_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_TE_RUNNING 0x0000000000000004ULL
+#define ROGUE_CR_CLK_STATUS_TSP_SHIFT 1U
+#define ROGUE_CR_CLK_STATUS_TSP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_CLK_STATUS_TSP_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_TSP_RUNNING 0x0000000000000002ULL
+#define ROGUE_CR_CLK_STATUS_ISP_SHIFT 0U
+#define ROGUE_CR_CLK_STATUS_ISP_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_CLK_STATUS_ISP_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS_ISP_RUNNING 0x0000000000000001ULL
+
+/* Register ROGUE_CR_CORE_ID */
+#define ROGUE_CR_CORE_ID__PBVNC 0x0020U
+#define ROGUE_CR_CORE_ID__PBVNC__MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_CORE_ID__PBVNC__BRANCH_ID_SHIFT 48U
+#define ROGUE_CR_CORE_ID__PBVNC__BRANCH_ID_CLRMSK 0x0000FFFFFFFFFFFFULL
+#define ROGUE_CR_CORE_ID__PBVNC__VERSION_ID_SHIFT 32U
+#define ROGUE_CR_CORE_ID__PBVNC__VERSION_ID_CLRMSK 0xFFFF0000FFFFFFFFULL
+#define ROGUE_CR_CORE_ID__PBVNC__NUMBER_OF_SCALABLE_UNITS_SHIFT 16U
+#define ROGUE_CR_CORE_ID__PBVNC__NUMBER_OF_SCALABLE_UNITS_CLRMSK 0xFFFFFFFF0000FFFFULL
+#define ROGUE_CR_CORE_ID__PBVNC__CONFIG_ID_SHIFT 0U
+#define ROGUE_CR_CORE_ID__PBVNC__CONFIG_ID_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_CORE_ID */
+#define ROGUE_CR_CORE_ID 0x0018U
+#define ROGUE_CR_CORE_ID_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_CORE_ID_ID_SHIFT 16U
+#define ROGUE_CR_CORE_ID_ID_CLRMSK 0x0000FFFFU
+#define ROGUE_CR_CORE_ID_CONFIG_SHIFT 0U
+#define ROGUE_CR_CORE_ID_CONFIG_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_CORE_REVISION */
+#define ROGUE_CR_CORE_REVISION 0x0020U
+#define ROGUE_CR_CORE_REVISION_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_CORE_REVISION_DESIGNER_SHIFT 24U
+#define ROGUE_CR_CORE_REVISION_DESIGNER_CLRMSK 0x00FFFFFFU
+#define ROGUE_CR_CORE_REVISION_MAJOR_SHIFT 16U
+#define ROGUE_CR_CORE_REVISION_MAJOR_CLRMSK 0xFF00FFFFU
+#define ROGUE_CR_CORE_REVISION_MINOR_SHIFT 8U
+#define ROGUE_CR_CORE_REVISION_MINOR_CLRMSK 0xFFFF00FFU
+#define ROGUE_CR_CORE_REVISION_MAINTENANCE_SHIFT 0U
+#define ROGUE_CR_CORE_REVISION_MAINTENANCE_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_DESIGNER_REV_FIELD1 */
+#define ROGUE_CR_DESIGNER_REV_FIELD1 0x0028U
+#define ROGUE_CR_DESIGNER_REV_FIELD1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_DESIGNER_REV_FIELD1_DESIGNER_REV_FIELD1_SHIFT 0U
+#define ROGUE_CR_DESIGNER_REV_FIELD1_DESIGNER_REV_FIELD1_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_DESIGNER_REV_FIELD2 */
+#define ROGUE_CR_DESIGNER_REV_FIELD2 0x0030U
+#define ROGUE_CR_DESIGNER_REV_FIELD2_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_DESIGNER_REV_FIELD2_DESIGNER_REV_FIELD2_SHIFT 0U
+#define ROGUE_CR_DESIGNER_REV_FIELD2_DESIGNER_REV_FIELD2_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_CHANGESET_NUMBER */
+#define ROGUE_CR_CHANGESET_NUMBER 0x0040U
+#define ROGUE_CR_CHANGESET_NUMBER_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_CHANGESET_NUMBER_CHANGESET_NUMBER_SHIFT 0U
+#define ROGUE_CR_CHANGESET_NUMBER_CHANGESET_NUMBER_CLRMSK 0x0000000000000000ULL
+
+/* Register ROGUE_CR_CLK_XTPLUS_CTRL */
+#define ROGUE_CR_CLK_XTPLUS_CTRL 0x0080U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_MASKFULL 0x0000003FFFFF0000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_SHIFT 36U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_CLRMSK 0xFFFFFFCFFFFFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_ON 0x0000001000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_TDM_AUTO 0x0000002000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_SHIFT 34U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_CLRMSK 0xFFFFFFF3FFFFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_ON 0x0000000400000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_ASTC_AUTO 0x0000000800000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_SHIFT 32U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_CLRMSK 0xFFFFFFFCFFFFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_ON 0x0000000100000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_IPF_AUTO 0x0000000200000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_SHIFT 30U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_CLRMSK 0xFFFFFFFF3FFFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_ON 0x0000000040000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_COMPUTE_AUTO 0x0000000080000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_SHIFT 28U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_CLRMSK 0xFFFFFFFFCFFFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_ON 0x0000000010000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PIXEL_AUTO 0x0000000020000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_SHIFT 26U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_CLRMSK 0xFFFFFFFFF3FFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_ON 0x0000000004000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_VERTEX_AUTO 0x0000000008000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_SHIFT 24U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_CLRMSK 0xFFFFFFFFFCFFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_ON 0x0000000001000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USCPS_AUTO 0x0000000002000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_SHIFT 22U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_CLRMSK 0xFFFFFFFFFF3FFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_ON 0x0000000000400000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_PDS_SHARED_AUTO 0x0000000000800000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_SHIFT 20U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_CLRMSK 0xFFFFFFFFFFCFFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_ON 0x0000000000100000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_BIF_BLACKPEARL_AUTO 0x0000000000200000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_SHIFT 18U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_CLRMSK 0xFFFFFFFFFFF3FFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_ON 0x0000000000040000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_USC_SHARED_AUTO 0x0000000000080000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_SHIFT 16U
+#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_CLRMSK 0xFFFFFFFFFFFCFFFFULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_ON 0x0000000000010000ULL
+#define ROGUE_CR_CLK_XTPLUS_CTRL_GEOMETRY_AUTO 0x0000000000020000ULL
+
+/* Register ROGUE_CR_CLK_XTPLUS_STATUS */
+#define ROGUE_CR_CLK_XTPLUS_STATUS 0x0088U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_MASKFULL 0x00000000000007FFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_SHIFT 10U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_CLRMSK 0xFFFFFFFFFFFFFBFFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_TDM_RUNNING 0x0000000000000400ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_SHIFT 9U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_CLRMSK 0xFFFFFFFFFFFFFDFFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_IPF_RUNNING 0x0000000000000200ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_SHIFT 8U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_CLRMSK 0xFFFFFFFFFFFFFEFFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_COMPUTE_RUNNING 0x0000000000000100ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_SHIFT 7U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_CLRMSK 0xFFFFFFFFFFFFFF7FULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_ASTC_RUNNING 0x0000000000000080ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_SHIFT 6U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_CLRMSK 0xFFFFFFFFFFFFFFBFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PIXEL_RUNNING 0x0000000000000040ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_SHIFT 5U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_VERTEX_RUNNING 0x0000000000000020ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_SHIFT 4U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_CLRMSK 0xFFFFFFFFFFFFFFEFULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USCPS_RUNNING 0x0000000000000010ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_SHIFT 3U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_PDS_SHARED_RUNNING 0x0000000000000008ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_SHIFT 2U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_BIF_BLACKPEARL_RUNNING 0x0000000000000004ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_SHIFT 1U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_USC_SHARED_RUNNING 0x0000000000000002ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_SHIFT 0U
+#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_XTPLUS_STATUS_GEOMETRY_RUNNING 0x0000000000000001ULL
+
+/* Register ROGUE_CR_SOFT_RESET */
+#define ROGUE_CR_SOFT_RESET 0x0100U
+#define ROGUE_CR_SOFT_RESET__PBE2_XE__MASKFULL 0xFFEFFFFFFFFFFC3DULL
+#define ROGUE_CR_SOFT_RESET_MASKFULL 0x00E7FFFFFFFFFC3DULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM3_CORE_SHIFT 63U
+#define ROGUE_CR_SOFT_RESET_PHANTOM3_CORE_CLRMSK 0x7FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM3_CORE_EN 0x8000000000000000ULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM2_CORE_SHIFT 62U
+#define ROGUE_CR_SOFT_RESET_PHANTOM2_CORE_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM2_CORE_EN 0x4000000000000000ULL
+#define ROGUE_CR_SOFT_RESET_BERNADO2_CORE_SHIFT 61U
+#define ROGUE_CR_SOFT_RESET_BERNADO2_CORE_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_BERNADO2_CORE_EN 0x2000000000000000ULL
+#define ROGUE_CR_SOFT_RESET_JONES_CORE_SHIFT 60U
+#define ROGUE_CR_SOFT_RESET_JONES_CORE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_JONES_CORE_EN 0x1000000000000000ULL
+#define ROGUE_CR_SOFT_RESET_TILING_CORE_SHIFT 59U
+#define ROGUE_CR_SOFT_RESET_TILING_CORE_CLRMSK 0xF7FFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_TILING_CORE_EN 0x0800000000000000ULL
+#define ROGUE_CR_SOFT_RESET_TE3_SHIFT 58U
+#define ROGUE_CR_SOFT_RESET_TE3_CLRMSK 0xFBFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_TE3_EN 0x0400000000000000ULL
+#define ROGUE_CR_SOFT_RESET_VCE_SHIFT 57U
+#define ROGUE_CR_SOFT_RESET_VCE_CLRMSK 0xFDFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_VCE_EN 0x0200000000000000ULL
+#define ROGUE_CR_SOFT_RESET_VBS_SHIFT 56U
+#define ROGUE_CR_SOFT_RESET_VBS_CLRMSK 0xFEFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_VBS_EN 0x0100000000000000ULL
+#define ROGUE_CR_SOFT_RESET_DPX1_CORE_SHIFT 55U
+#define ROGUE_CR_SOFT_RESET_DPX1_CORE_CLRMSK 0xFF7FFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DPX1_CORE_EN 0x0080000000000000ULL
+#define ROGUE_CR_SOFT_RESET_DPX0_CORE_SHIFT 54U
+#define ROGUE_CR_SOFT_RESET_DPX0_CORE_CLRMSK 0xFFBFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DPX0_CORE_EN 0x0040000000000000ULL
+#define ROGUE_CR_SOFT_RESET_FBA_SHIFT 53U
+#define ROGUE_CR_SOFT_RESET_FBA_CLRMSK 0xFFDFFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_FBA_EN 0x0020000000000000ULL
+#define ROGUE_CR_SOFT_RESET_FB_CDC_SHIFT 51U
+#define ROGUE_CR_SOFT_RESET_FB_CDC_CLRMSK 0xFFF7FFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_FB_CDC_EN 0x0008000000000000ULL
+#define ROGUE_CR_SOFT_RESET_SH_SHIFT 50U
+#define ROGUE_CR_SOFT_RESET_SH_CLRMSK 0xFFFBFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_SH_EN 0x0004000000000000ULL
+#define ROGUE_CR_SOFT_RESET_VRDM_SHIFT 49U
+#define ROGUE_CR_SOFT_RESET_VRDM_CLRMSK 0xFFFDFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_VRDM_EN 0x0002000000000000ULL
+#define ROGUE_CR_SOFT_RESET_MCU_FBTC_SHIFT 48U
+#define ROGUE_CR_SOFT_RESET_MCU_FBTC_CLRMSK 0xFFFEFFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_MCU_FBTC_EN 0x0001000000000000ULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM1_CORE_SHIFT 47U
+#define ROGUE_CR_SOFT_RESET_PHANTOM1_CORE_CLRMSK 0xFFFF7FFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM1_CORE_EN 0x0000800000000000ULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM0_CORE_SHIFT 46U
+#define ROGUE_CR_SOFT_RESET_PHANTOM0_CORE_CLRMSK 0xFFFFBFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_PHANTOM0_CORE_EN 0x0000400000000000ULL
+#define ROGUE_CR_SOFT_RESET_BERNADO1_CORE_SHIFT 45U
+#define ROGUE_CR_SOFT_RESET_BERNADO1_CORE_CLRMSK 0xFFFFDFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_BERNADO1_CORE_EN 0x0000200000000000ULL
+#define ROGUE_CR_SOFT_RESET_BERNADO0_CORE_SHIFT 44U
+#define ROGUE_CR_SOFT_RESET_BERNADO0_CORE_CLRMSK 0xFFFFEFFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_BERNADO0_CORE_EN 0x0000100000000000ULL
+#define ROGUE_CR_SOFT_RESET_IPP_SHIFT 43U
+#define ROGUE_CR_SOFT_RESET_IPP_CLRMSK 0xFFFFF7FFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_IPP_EN 0x0000080000000000ULL
+#define ROGUE_CR_SOFT_RESET_BIF_TEXAS_SHIFT 42U
+#define ROGUE_CR_SOFT_RESET_BIF_TEXAS_CLRMSK 0xFFFFFBFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_BIF_TEXAS_EN 0x0000040000000000ULL
+#define ROGUE_CR_SOFT_RESET_TORNADO_CORE_SHIFT 41U
+#define ROGUE_CR_SOFT_RESET_TORNADO_CORE_CLRMSK 0xFFFFFDFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_TORNADO_CORE_EN 0x0000020000000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_H_CORE_SHIFT 40U
+#define ROGUE_CR_SOFT_RESET_DUST_H_CORE_CLRMSK 0xFFFFFEFFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_H_CORE_EN 0x0000010000000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_G_CORE_SHIFT 39U
+#define ROGUE_CR_SOFT_RESET_DUST_G_CORE_CLRMSK 0xFFFFFF7FFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_G_CORE_EN 0x0000008000000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_F_CORE_SHIFT 38U
+#define ROGUE_CR_SOFT_RESET_DUST_F_CORE_CLRMSK 0xFFFFFFBFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_F_CORE_EN 0x0000004000000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_E_CORE_SHIFT 37U
+#define ROGUE_CR_SOFT_RESET_DUST_E_CORE_CLRMSK 0xFFFFFFDFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_E_CORE_EN 0x0000002000000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_D_CORE_SHIFT 36U
+#define ROGUE_CR_SOFT_RESET_DUST_D_CORE_CLRMSK 0xFFFFFFEFFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_D_CORE_EN 0x0000001000000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_C_CORE_SHIFT 35U
+#define ROGUE_CR_SOFT_RESET_DUST_C_CORE_CLRMSK 0xFFFFFFF7FFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_C_CORE_EN 0x0000000800000000ULL
+#define ROGUE_CR_SOFT_RESET_MMU_SHIFT 34U
+#define ROGUE_CR_SOFT_RESET_MMU_CLRMSK 0xFFFFFFFBFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_MMU_EN 0x0000000400000000ULL
+#define ROGUE_CR_SOFT_RESET_BIF1_SHIFT 33U
+#define ROGUE_CR_SOFT_RESET_BIF1_CLRMSK 0xFFFFFFFDFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_BIF1_EN 0x0000000200000000ULL
+#define ROGUE_CR_SOFT_RESET_GARTEN_SHIFT 32U
+#define ROGUE_CR_SOFT_RESET_GARTEN_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_GARTEN_EN 0x0000000100000000ULL
+#define ROGUE_CR_SOFT_RESET_CPU_SHIFT 32U
+#define ROGUE_CR_SOFT_RESET_CPU_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_CPU_EN 0x0000000100000000ULL
+#define ROGUE_CR_SOFT_RESET_RASCAL_CORE_SHIFT 31U
+#define ROGUE_CR_SOFT_RESET_RASCAL_CORE_CLRMSK 0xFFFFFFFF7FFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_RASCAL_CORE_EN 0x0000000080000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_B_CORE_SHIFT 30U
+#define ROGUE_CR_SOFT_RESET_DUST_B_CORE_CLRMSK 0xFFFFFFFFBFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_B_CORE_EN 0x0000000040000000ULL
+#define ROGUE_CR_SOFT_RESET_DUST_A_CORE_SHIFT 29U
+#define ROGUE_CR_SOFT_RESET_DUST_A_CORE_CLRMSK 0xFFFFFFFFDFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_DUST_A_CORE_EN 0x0000000020000000ULL
+#define ROGUE_CR_SOFT_RESET_FB_TLCACHE_SHIFT 28U
+#define ROGUE_CR_SOFT_RESET_FB_TLCACHE_CLRMSK 0xFFFFFFFFEFFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_FB_TLCACHE_EN 0x0000000010000000ULL
+#define ROGUE_CR_SOFT_RESET_SLC_SHIFT 27U
+#define ROGUE_CR_SOFT_RESET_SLC_CLRMSK 0xFFFFFFFFF7FFFFFFULL
+#define ROGUE_CR_SOFT_RESET_SLC_EN 0x0000000008000000ULL
+#define ROGUE_CR_SOFT_RESET_TLA_SHIFT 26U
+#define ROGUE_CR_SOFT_RESET_TLA_CLRMSK 0xFFFFFFFFFBFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_TLA_EN 0x0000000004000000ULL
+#define ROGUE_CR_SOFT_RESET_UVS_SHIFT 25U
+#define ROGUE_CR_SOFT_RESET_UVS_CLRMSK 0xFFFFFFFFFDFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_UVS_EN 0x0000000002000000ULL
+#define ROGUE_CR_SOFT_RESET_TE_SHIFT 24U
+#define ROGUE_CR_SOFT_RESET_TE_CLRMSK 0xFFFFFFFFFEFFFFFFULL
+#define ROGUE_CR_SOFT_RESET_TE_EN 0x0000000001000000ULL
+#define ROGUE_CR_SOFT_RESET_GPP_SHIFT 23U
+#define ROGUE_CR_SOFT_RESET_GPP_CLRMSK 0xFFFFFFFFFF7FFFFFULL
+#define ROGUE_CR_SOFT_RESET_GPP_EN 0x0000000000800000ULL
+#define ROGUE_CR_SOFT_RESET_FBDC_SHIFT 22U
+#define ROGUE_CR_SOFT_RESET_FBDC_CLRMSK 0xFFFFFFFFFFBFFFFFULL
+#define ROGUE_CR_SOFT_RESET_FBDC_EN 0x0000000000400000ULL
+#define ROGUE_CR_SOFT_RESET_FBC_SHIFT 21U
+#define ROGUE_CR_SOFT_RESET_FBC_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_SOFT_RESET_FBC_EN 0x0000000000200000ULL
+#define ROGUE_CR_SOFT_RESET_PM_SHIFT 20U
+#define ROGUE_CR_SOFT_RESET_PM_CLRMSK 0xFFFFFFFFFFEFFFFFULL
+#define ROGUE_CR_SOFT_RESET_PM_EN 0x0000000000100000ULL
+#define ROGUE_CR_SOFT_RESET_PBE_SHIFT 19U
+#define ROGUE_CR_SOFT_RESET_PBE_CLRMSK 0xFFFFFFFFFFF7FFFFULL
+#define ROGUE_CR_SOFT_RESET_PBE_EN 0x0000000000080000ULL
+#define ROGUE_CR_SOFT_RESET_USC_SHARED_SHIFT 18U
+#define ROGUE_CR_SOFT_RESET_USC_SHARED_CLRMSK 0xFFFFFFFFFFFBFFFFULL
+#define ROGUE_CR_SOFT_RESET_USC_SHARED_EN 0x0000000000040000ULL
+#define ROGUE_CR_SOFT_RESET_MCU_L1_SHIFT 17U
+#define ROGUE_CR_SOFT_RESET_MCU_L1_CLRMSK 0xFFFFFFFFFFFDFFFFULL
+#define ROGUE_CR_SOFT_RESET_MCU_L1_EN 0x0000000000020000ULL
+#define ROGUE_CR_SOFT_RESET_BIF_SHIFT 16U
+#define ROGUE_CR_SOFT_RESET_BIF_CLRMSK 0xFFFFFFFFFFFEFFFFULL
+#define ROGUE_CR_SOFT_RESET_BIF_EN 0x0000000000010000ULL
+#define ROGUE_CR_SOFT_RESET_CDM_SHIFT 15U
+#define ROGUE_CR_SOFT_RESET_CDM_CLRMSK 0xFFFFFFFFFFFF7FFFULL
+#define ROGUE_CR_SOFT_RESET_CDM_EN 0x0000000000008000ULL
+#define ROGUE_CR_SOFT_RESET_VDM_SHIFT 14U
+#define ROGUE_CR_SOFT_RESET_VDM_CLRMSK 0xFFFFFFFFFFFFBFFFULL
+#define ROGUE_CR_SOFT_RESET_VDM_EN 0x0000000000004000ULL
+#define ROGUE_CR_SOFT_RESET_TESS_SHIFT 13U
+#define ROGUE_CR_SOFT_RESET_TESS_CLRMSK 0xFFFFFFFFFFFFDFFFULL
+#define ROGUE_CR_SOFT_RESET_TESS_EN 0x0000000000002000ULL
+#define ROGUE_CR_SOFT_RESET_PDS_SHIFT 12U
+#define ROGUE_CR_SOFT_RESET_PDS_CLRMSK 0xFFFFFFFFFFFFEFFFULL
+#define ROGUE_CR_SOFT_RESET_PDS_EN 0x0000000000001000ULL
+#define ROGUE_CR_SOFT_RESET_ISP_SHIFT 11U
+#define ROGUE_CR_SOFT_RESET_ISP_CLRMSK 0xFFFFFFFFFFFFF7FFULL
+#define ROGUE_CR_SOFT_RESET_ISP_EN 0x0000000000000800ULL
+#define ROGUE_CR_SOFT_RESET_TSP_SHIFT 10U
+#define ROGUE_CR_SOFT_RESET_TSP_CLRMSK 0xFFFFFFFFFFFFFBFFULL
+#define ROGUE_CR_SOFT_RESET_TSP_EN 0x0000000000000400ULL
+#define ROGUE_CR_SOFT_RESET_SYSARB_SHIFT 5U
+#define ROGUE_CR_SOFT_RESET_SYSARB_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_SOFT_RESET_SYSARB_EN 0x0000000000000020ULL
+#define ROGUE_CR_SOFT_RESET_TPU_MCU_DEMUX_SHIFT 4U
+#define ROGUE_CR_SOFT_RESET_TPU_MCU_DEMUX_CLRMSK 0xFFFFFFFFFFFFFFEFULL
+#define ROGUE_CR_SOFT_RESET_TPU_MCU_DEMUX_EN 0x0000000000000010ULL
+#define ROGUE_CR_SOFT_RESET_MCU_L0_SHIFT 3U
+#define ROGUE_CR_SOFT_RESET_MCU_L0_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_SOFT_RESET_MCU_L0_EN 0x0000000000000008ULL
+#define ROGUE_CR_SOFT_RESET_TPU_SHIFT 2U
+#define ROGUE_CR_SOFT_RESET_TPU_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_SOFT_RESET_TPU_EN 0x0000000000000004ULL
+#define ROGUE_CR_SOFT_RESET_USC_SHIFT 0U
+#define ROGUE_CR_SOFT_RESET_USC_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_SOFT_RESET_USC_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_SOFT_RESET2 */
+#define ROGUE_CR_SOFT_RESET2 0x0108U
+#define ROGUE_CR_SOFT_RESET2_MASKFULL 0x00000000001FFFFFULL
+#define ROGUE_CR_SOFT_RESET2_SPFILTER_SHIFT 12U
+#define ROGUE_CR_SOFT_RESET2_SPFILTER_CLRMSK 0xFFE00FFFU
+#define ROGUE_CR_SOFT_RESET2_TDM_SHIFT 11U
+#define ROGUE_CR_SOFT_RESET2_TDM_CLRMSK 0xFFFFF7FFU
+#define ROGUE_CR_SOFT_RESET2_TDM_EN 0x00000800U
+#define ROGUE_CR_SOFT_RESET2_ASTC_SHIFT 10U
+#define ROGUE_CR_SOFT_RESET2_ASTC_CLRMSK 0xFFFFFBFFU
+#define ROGUE_CR_SOFT_RESET2_ASTC_EN 0x00000400U
+#define ROGUE_CR_SOFT_RESET2_BLACKPEARL_SHIFT 9U
+#define ROGUE_CR_SOFT_RESET2_BLACKPEARL_CLRMSK 0xFFFFFDFFU
+#define ROGUE_CR_SOFT_RESET2_BLACKPEARL_EN 0x00000200U
+#define ROGUE_CR_SOFT_RESET2_USCPS_SHIFT 8U
+#define ROGUE_CR_SOFT_RESET2_USCPS_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_SOFT_RESET2_USCPS_EN 0x00000100U
+#define ROGUE_CR_SOFT_RESET2_IPF_SHIFT 7U
+#define ROGUE_CR_SOFT_RESET2_IPF_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_SOFT_RESET2_IPF_EN 0x00000080U
+#define ROGUE_CR_SOFT_RESET2_GEOMETRY_SHIFT 6U
+#define ROGUE_CR_SOFT_RESET2_GEOMETRY_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SOFT_RESET2_GEOMETRY_EN 0x00000040U
+#define ROGUE_CR_SOFT_RESET2_USC_SHARED_SHIFT 5U
+#define ROGUE_CR_SOFT_RESET2_USC_SHARED_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SOFT_RESET2_USC_SHARED_EN 0x00000020U
+#define ROGUE_CR_SOFT_RESET2_PDS_SHARED_SHIFT 4U
+#define ROGUE_CR_SOFT_RESET2_PDS_SHARED_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SOFT_RESET2_PDS_SHARED_EN 0x00000010U
+#define ROGUE_CR_SOFT_RESET2_BIF_BLACKPEARL_SHIFT 3U
+#define ROGUE_CR_SOFT_RESET2_BIF_BLACKPEARL_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SOFT_RESET2_BIF_BLACKPEARL_EN 0x00000008U
+#define ROGUE_CR_SOFT_RESET2_PIXEL_SHIFT 2U
+#define ROGUE_CR_SOFT_RESET2_PIXEL_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SOFT_RESET2_PIXEL_EN 0x00000004U
+#define ROGUE_CR_SOFT_RESET2_CDM_SHIFT 1U
+#define ROGUE_CR_SOFT_RESET2_CDM_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SOFT_RESET2_CDM_EN 0x00000002U
+#define ROGUE_CR_SOFT_RESET2_VERTEX_SHIFT 0U
+#define ROGUE_CR_SOFT_RESET2_VERTEX_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SOFT_RESET2_VERTEX_EN 0x00000001U
+
+/* Register ROGUE_CR_EVENT_STATUS */
+#define ROGUE_CR_EVENT_STATUS 0x0130U
+#define ROGUE_CR_EVENT_STATUS__ROGUEXE__MASKFULL 0x00000000E01DFFFFULL
+#define ROGUE_CR_EVENT_STATUS__SIGNALS__MASKFULL 0x00000000E007FFFFULL
+#define ROGUE_CR_EVENT_STATUS_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_EVENT_STATUS_TDM_FENCE_FINISHED_SHIFT 31U
+#define ROGUE_CR_EVENT_STATUS_TDM_FENCE_FINISHED_CLRMSK 0x7FFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_TDM_FENCE_FINISHED_EN 0x80000000U
+#define ROGUE_CR_EVENT_STATUS_TDM_BUFFER_STALL_SHIFT 30U
+#define ROGUE_CR_EVENT_STATUS_TDM_BUFFER_STALL_CLRMSK 0xBFFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_TDM_BUFFER_STALL_EN 0x40000000U
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_SIGNAL_FAILURE_SHIFT 29U
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_SIGNAL_FAILURE_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_SIGNAL_FAILURE_EN 0x20000000U
+#define ROGUE_CR_EVENT_STATUS_DPX_OUT_OF_MEMORY_SHIFT 28U
+#define ROGUE_CR_EVENT_STATUS_DPX_OUT_OF_MEMORY_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_DPX_OUT_OF_MEMORY_EN 0x10000000U
+#define ROGUE_CR_EVENT_STATUS_DPX_MMU_PAGE_FAULT_SHIFT 27U
+#define ROGUE_CR_EVENT_STATUS_DPX_MMU_PAGE_FAULT_CLRMSK 0xF7FFFFFFU
+#define ROGUE_CR_EVENT_STATUS_DPX_MMU_PAGE_FAULT_EN 0x08000000U
+#define ROGUE_CR_EVENT_STATUS_RPM_OUT_OF_MEMORY_SHIFT 26U
+#define ROGUE_CR_EVENT_STATUS_RPM_OUT_OF_MEMORY_CLRMSK 0xFBFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_RPM_OUT_OF_MEMORY_EN 0x04000000U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC3_FINISHED_SHIFT 25U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC3_FINISHED_CLRMSK 0xFDFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_FBA_FC3_FINISHED_EN 0x02000000U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC2_FINISHED_SHIFT 24U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC2_FINISHED_CLRMSK 0xFEFFFFFFU
+#define ROGUE_CR_EVENT_STATUS_FBA_FC2_FINISHED_EN 0x01000000U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC1_FINISHED_SHIFT 23U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC1_FINISHED_CLRMSK 0xFF7FFFFFU
+#define ROGUE_CR_EVENT_STATUS_FBA_FC1_FINISHED_EN 0x00800000U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC0_FINISHED_SHIFT 22U
+#define ROGUE_CR_EVENT_STATUS_FBA_FC0_FINISHED_CLRMSK 0xFFBFFFFFU
+#define ROGUE_CR_EVENT_STATUS_FBA_FC0_FINISHED_EN 0x00400000U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC3_FINISHED_SHIFT 21U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC3_FINISHED_CLRMSK 0xFFDFFFFFU
+#define ROGUE_CR_EVENT_STATUS_RDM_FC3_FINISHED_EN 0x00200000U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC2_FINISHED_SHIFT 20U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC2_FINISHED_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_EVENT_STATUS_RDM_FC2_FINISHED_EN 0x00100000U
+#define ROGUE_CR_EVENT_STATUS_SAFETY_SHIFT 20U
+#define ROGUE_CR_EVENT_STATUS_SAFETY_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_EVENT_STATUS_SAFETY_EN 0x00100000U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC1_FINISHED_SHIFT 19U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC1_FINISHED_CLRMSK 0xFFF7FFFFU
+#define ROGUE_CR_EVENT_STATUS_RDM_FC1_FINISHED_EN 0x00080000U
+#define ROGUE_CR_EVENT_STATUS_SLAVE_REQ_SHIFT 19U
+#define ROGUE_CR_EVENT_STATUS_SLAVE_REQ_CLRMSK 0xFFF7FFFFU
+#define ROGUE_CR_EVENT_STATUS_SLAVE_REQ_EN 0x00080000U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC0_FINISHED_SHIFT 18U
+#define ROGUE_CR_EVENT_STATUS_RDM_FC0_FINISHED_CLRMSK 0xFFFBFFFFU
+#define ROGUE_CR_EVENT_STATUS_RDM_FC0_FINISHED_EN 0x00040000U
+#define ROGUE_CR_EVENT_STATUS_TDM_CONTEXT_STORE_FINISHED_SHIFT 18U
+#define ROGUE_CR_EVENT_STATUS_TDM_CONTEXT_STORE_FINISHED_CLRMSK 0xFFFBFFFFU
+#define ROGUE_CR_EVENT_STATUS_TDM_CONTEXT_STORE_FINISHED_EN 0x00040000U
+#define ROGUE_CR_EVENT_STATUS_SHG_FINISHED_SHIFT 17U
+#define ROGUE_CR_EVENT_STATUS_SHG_FINISHED_CLRMSK 0xFFFDFFFFU
+#define ROGUE_CR_EVENT_STATUS_SHG_FINISHED_EN 0x00020000U
+#define ROGUE_CR_EVENT_STATUS_SPFILTER_SIGNAL_UPDATE_SHIFT 17U
+#define ROGUE_CR_EVENT_STATUS_SPFILTER_SIGNAL_UPDATE_CLRMSK 0xFFFDFFFFU
+#define ROGUE_CR_EVENT_STATUS_SPFILTER_SIGNAL_UPDATE_EN 0x00020000U
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_BUFFER_STALL_SHIFT 16U
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_BUFFER_STALL_CLRMSK 0xFFFEFFFFU
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_BUFFER_STALL_EN 0x00010000U
+#define ROGUE_CR_EVENT_STATUS_USC_TRIGGER_SHIFT 15U
+#define ROGUE_CR_EVENT_STATUS_USC_TRIGGER_CLRMSK 0xFFFF7FFFU
+#define ROGUE_CR_EVENT_STATUS_USC_TRIGGER_EN 0x00008000U
+#define ROGUE_CR_EVENT_STATUS_ZLS_FINISHED_SHIFT 14U
+#define ROGUE_CR_EVENT_STATUS_ZLS_FINISHED_CLRMSK 0xFFFFBFFFU
+#define ROGUE_CR_EVENT_STATUS_ZLS_FINISHED_EN 0x00004000U
+#define ROGUE_CR_EVENT_STATUS_GPIO_ACK_SHIFT 13U
+#define ROGUE_CR_EVENT_STATUS_GPIO_ACK_CLRMSK 0xFFFFDFFFU
+#define ROGUE_CR_EVENT_STATUS_GPIO_ACK_EN 0x00002000U
+#define ROGUE_CR_EVENT_STATUS_GPIO_REQ_SHIFT 12U
+#define ROGUE_CR_EVENT_STATUS_GPIO_REQ_CLRMSK 0xFFFFEFFFU
+#define ROGUE_CR_EVENT_STATUS_GPIO_REQ_EN 0x00001000U
+#define ROGUE_CR_EVENT_STATUS_POWER_ABORT_SHIFT 11U
+#define ROGUE_CR_EVENT_STATUS_POWER_ABORT_CLRMSK 0xFFFFF7FFU
+#define ROGUE_CR_EVENT_STATUS_POWER_ABORT_EN 0x00000800U
+#define ROGUE_CR_EVENT_STATUS_POWER_COMPLETE_SHIFT 10U
+#define ROGUE_CR_EVENT_STATUS_POWER_COMPLETE_CLRMSK 0xFFFFFBFFU
+#define ROGUE_CR_EVENT_STATUS_POWER_COMPLETE_EN 0x00000400U
+#define ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT_SHIFT 9U
+#define ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT_CLRMSK 0xFFFFFDFFU
+#define ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT_EN 0x00000200U
+#define ROGUE_CR_EVENT_STATUS_PM_3D_MEM_FREE_SHIFT 8U
+#define ROGUE_CR_EVENT_STATUS_PM_3D_MEM_FREE_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_EVENT_STATUS_PM_3D_MEM_FREE_EN 0x00000100U
+#define ROGUE_CR_EVENT_STATUS_PM_OUT_OF_MEMORY_SHIFT 7U
+#define ROGUE_CR_EVENT_STATUS_PM_OUT_OF_MEMORY_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_EVENT_STATUS_PM_OUT_OF_MEMORY_EN 0x00000080U
+#define ROGUE_CR_EVENT_STATUS_TA_TERMINATE_SHIFT 6U
+#define ROGUE_CR_EVENT_STATUS_TA_TERMINATE_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_EVENT_STATUS_TA_TERMINATE_EN 0x00000040U
+#define ROGUE_CR_EVENT_STATUS_TA_FINISHED_SHIFT 5U
+#define ROGUE_CR_EVENT_STATUS_TA_FINISHED_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_EVENT_STATUS_TA_FINISHED_EN 0x00000020U
+#define ROGUE_CR_EVENT_STATUS_ISP_END_MACROTILE_SHIFT 4U
+#define ROGUE_CR_EVENT_STATUS_ISP_END_MACROTILE_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_EVENT_STATUS_ISP_END_MACROTILE_EN 0x00000010U
+#define ROGUE_CR_EVENT_STATUS_PIXELBE_END_RENDER_SHIFT 3U
+#define ROGUE_CR_EVENT_STATUS_PIXELBE_END_RENDER_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_EVENT_STATUS_PIXELBE_END_RENDER_EN 0x00000008U
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_FINISHED_SHIFT 2U
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_FINISHED_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_EVENT_STATUS_COMPUTE_FINISHED_EN 0x00000004U
+#define ROGUE_CR_EVENT_STATUS_KERNEL_FINISHED_SHIFT 1U
+#define ROGUE_CR_EVENT_STATUS_KERNEL_FINISHED_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_EVENT_STATUS_KERNEL_FINISHED_EN 0x00000002U
+#define ROGUE_CR_EVENT_STATUS_TLA_COMPLETE_SHIFT 0U
+#define ROGUE_CR_EVENT_STATUS_TLA_COMPLETE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_EVENT_STATUS_TLA_COMPLETE_EN 0x00000001U
+
+/* Register ROGUE_CR_TIMER */
+#define ROGUE_CR_TIMER 0x0160U
+#define ROGUE_CR_TIMER_MASKFULL 0x8000FFFFFFFFFFFFULL
+#define ROGUE_CR_TIMER_BIT31_SHIFT 63U
+#define ROGUE_CR_TIMER_BIT31_CLRMSK 0x7FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_TIMER_BIT31_EN 0x8000000000000000ULL
+#define ROGUE_CR_TIMER_VALUE_SHIFT 0U
+#define ROGUE_CR_TIMER_VALUE_CLRMSK 0xFFFF000000000000ULL
+
+/* Register ROGUE_CR_TLA_STATUS */
+#define ROGUE_CR_TLA_STATUS 0x0178U
+#define ROGUE_CR_TLA_STATUS_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_TLA_STATUS_BLIT_COUNT_SHIFT 39U
+#define ROGUE_CR_TLA_STATUS_BLIT_COUNT_CLRMSK 0x0000007FFFFFFFFFULL
+#define ROGUE_CR_TLA_STATUS_REQUEST_SHIFT 7U
+#define ROGUE_CR_TLA_STATUS_REQUEST_CLRMSK 0xFFFFFF800000007FULL
+#define ROGUE_CR_TLA_STATUS_FIFO_FULLNESS_SHIFT 1U
+#define ROGUE_CR_TLA_STATUS_FIFO_FULLNESS_CLRMSK 0xFFFFFFFFFFFFFF81ULL
+#define ROGUE_CR_TLA_STATUS_BUSY_SHIFT 0U
+#define ROGUE_CR_TLA_STATUS_BUSY_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_TLA_STATUS_BUSY_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_PM_PARTIAL_RENDER_ENABLE */
+#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE 0x0338U
+#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_OP_SHIFT 0U
+#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_OP_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_PM_PARTIAL_RENDER_ENABLE_OP_EN 0x00000001U
+
+/* Register ROGUE_CR_SIDEKICK_IDLE */
+#define ROGUE_CR_SIDEKICK_IDLE 0x03C8U
+#define ROGUE_CR_SIDEKICK_IDLE_MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_SIDEKICK_IDLE_FB_CDC_SHIFT 6U
+#define ROGUE_CR_SIDEKICK_IDLE_FB_CDC_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SIDEKICK_IDLE_FB_CDC_EN 0x00000040U
+#define ROGUE_CR_SIDEKICK_IDLE_MMU_SHIFT 5U
+#define ROGUE_CR_SIDEKICK_IDLE_MMU_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SIDEKICK_IDLE_MMU_EN 0x00000020U
+#define ROGUE_CR_SIDEKICK_IDLE_BIF128_SHIFT 4U
+#define ROGUE_CR_SIDEKICK_IDLE_BIF128_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SIDEKICK_IDLE_BIF128_EN 0x00000010U
+#define ROGUE_CR_SIDEKICK_IDLE_TLA_SHIFT 3U
+#define ROGUE_CR_SIDEKICK_IDLE_TLA_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SIDEKICK_IDLE_TLA_EN 0x00000008U
+#define ROGUE_CR_SIDEKICK_IDLE_GARTEN_SHIFT 2U
+#define ROGUE_CR_SIDEKICK_IDLE_GARTEN_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN 0x00000004U
+#define ROGUE_CR_SIDEKICK_IDLE_HOSTIF_SHIFT 1U
+#define ROGUE_CR_SIDEKICK_IDLE_HOSTIF_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SIDEKICK_IDLE_HOSTIF_EN 0x00000002U
+#define ROGUE_CR_SIDEKICK_IDLE_SOCIF_SHIFT 0U
+#define ROGUE_CR_SIDEKICK_IDLE_SOCIF_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SIDEKICK_IDLE_SOCIF_EN 0x00000001U
+
+/* Register ROGUE_CR_MARS_IDLE */
+#define ROGUE_CR_MARS_IDLE 0x08F8U
+#define ROGUE_CR_MARS_IDLE_MASKFULL 0x0000000000000007ULL
+#define ROGUE_CR_MARS_IDLE_MH_SYSARB0_SHIFT 2U
+#define ROGUE_CR_MARS_IDLE_MH_SYSARB0_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_MARS_IDLE_MH_SYSARB0_EN 0x00000004U
+#define ROGUE_CR_MARS_IDLE_CPU_SHIFT 1U
+#define ROGUE_CR_MARS_IDLE_CPU_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_MARS_IDLE_CPU_EN 0x00000002U
+#define ROGUE_CR_MARS_IDLE_SOCIF_SHIFT 0U
+#define ROGUE_CR_MARS_IDLE_SOCIF_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MARS_IDLE_SOCIF_EN 0x00000001U
+
+/* Register ROGUE_CR_VDM_CONTEXT_STORE_STATUS */
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS 0x0430U
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_MASKFULL 0x00000000000000F3ULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_LAST_PIPE_SHIFT 4U
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_LAST_PIPE_CLRMSK 0xFFFFFF0FU
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_NEED_RESUME_SHIFT 1U
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_NEED_RESUME_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_NEED_RESUME_EN 0x00000002U
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_COMPLETE_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_COMPLETE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_VDM_CONTEXT_STORE_STATUS_COMPLETE_EN 0x00000001U
+
+/* Register ROGUE_CR_VDM_CONTEXT_STORE_TASK0 */
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0 0x0438U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE1_SHIFT 32U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE1_CLRMSK 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE0_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK0_PDS_STATE0_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_VDM_CONTEXT_STORE_TASK1 */
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1 0x0440U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1_PDS_STATE2_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK1_PDS_STATE2_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_VDM_CONTEXT_STORE_TASK2 */
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2 0x0448U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT2_SHIFT 32U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT2_CLRMSK 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT1_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_STORE_TASK2_STREAM_OUT1_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_VDM_CONTEXT_RESUME_TASK0 */
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0 0x0450U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE1_SHIFT 32U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE1_CLRMSK 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE0_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK0_PDS_STATE0_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_VDM_CONTEXT_RESUME_TASK1 */
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1 0x0458U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1_PDS_STATE2_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK1_PDS_STATE2_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_VDM_CONTEXT_RESUME_TASK2 */
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2 0x0460U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT2_SHIFT 32U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT2_CLRMSK 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT1_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_RESUME_TASK2_STREAM_OUT1_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_CDM_CONTEXT_STORE_STATUS */
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS 0x04A0U
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_NEED_RESUME_SHIFT 1U
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_NEED_RESUME_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_NEED_RESUME_EN 0x00000002U
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_COMPLETE_SHIFT 0U
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_COMPLETE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_CDM_CONTEXT_STORE_STATUS_COMPLETE_EN 0x00000001U
+
+/* Register ROGUE_CR_CDM_CONTEXT_PDS0 */
+#define ROGUE_CR_CDM_CONTEXT_PDS0 0x04A8U
+#define ROGUE_CR_CDM_CONTEXT_PDS0_MASKFULL 0xFFFFFFF0FFFFFFF0ULL
+#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_SHIFT 36U
+#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_CLRMSK 0x0000000FFFFFFFFFULL
+#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNSIZE 16U
+#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_SHIFT 4U
+#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_CLRMSK 0xFFFFFFFF0000000FULL
+#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_CDM_CONTEXT_PDS1 */
+#define ROGUE_CR_CDM_CONTEXT_PDS1 0x04B0U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__MASKFULL 0x000000007FFFFFFFULL
+#define ROGUE_CR_CDM_CONTEXT_PDS1_MASKFULL 0x000000003FFFFFFFULL
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__PDS_SEQ_DEP_SHIFT 30U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__PDS_SEQ_DEP_CLRMSK 0xBFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__PDS_SEQ_DEP_EN 0x40000000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_PDS_SEQ_DEP_SHIFT 29U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_PDS_SEQ_DEP_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_PDS_SEQ_DEP_EN 0x20000000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__USC_SEQ_DEP_SHIFT 29U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__USC_SEQ_DEP_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__USC_SEQ_DEP_EN 0x20000000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_USC_SEQ_DEP_SHIFT 28U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_USC_SEQ_DEP_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_USC_SEQ_DEP_EN 0x10000000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TARGET_SHIFT 28U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TARGET_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TARGET_EN 0x10000000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_TARGET_SHIFT 27U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_TARGET_CLRMSK 0xF7FFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_TARGET_EN 0x08000000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__UNIFIED_SIZE_SHIFT 22U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__UNIFIED_SIZE_CLRMSK 0xF03FFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_UNIFIED_SIZE_SHIFT 21U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_UNIFIED_SIZE_CLRMSK 0xF81FFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SHARED_SHIFT 21U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SHARED_CLRMSK 0xFFDFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SHARED_EN 0x00200000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SHARED_SHIFT 20U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SHARED_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SHARED_EN 0x00100000U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SIZE_SHIFT 12U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__COMMON_SIZE_CLRMSK 0xFFE00FFFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SIZE_SHIFT 11U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_COMMON_SIZE_CLRMSK 0xFFF007FFU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_TEMP_SIZE_SHIFT 7U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_TEMP_SIZE_CLRMSK 0xFFFFF87FU
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TEMP_SIZE_SHIFT 7U
+#define ROGUE_CR_CDM_CONTEXT_PDS1__TEMPSIZE8__TEMP_SIZE_CLRMSK 0xFFFFF07FU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_DATA_SIZE_SHIFT 1U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_DATA_SIZE_CLRMSK 0xFFFFFF81U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_FENCE_SHIFT 0U
+#define ROGUE_CR_CDM_CONTEXT_PDS1_FENCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_CDM_CONTEXT_PDS1_FENCE_EN 0x00000001U
+
+/* Register ROGUE_CR_CDM_TERMINATE_PDS */
+#define ROGUE_CR_CDM_TERMINATE_PDS 0x04B8U
+#define ROGUE_CR_CDM_TERMINATE_PDS_MASKFULL 0xFFFFFFF0FFFFFFF0ULL
+#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_SHIFT 36U
+#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_CLRMSK 0x0000000FFFFFFFFFULL
+#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNSIZE 16U
+#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_SHIFT 4U
+#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_CLRMSK 0xFFFFFFFF0000000FULL
+#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_CDM_TERMINATE_PDS1 */
+#define ROGUE_CR_CDM_TERMINATE_PDS1 0x04C0U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__MASKFULL 0x000000007FFFFFFFULL
+#define ROGUE_CR_CDM_TERMINATE_PDS1_MASKFULL 0x000000003FFFFFFFULL
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__PDS_SEQ_DEP_SHIFT 30U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__PDS_SEQ_DEP_CLRMSK 0xBFFFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__PDS_SEQ_DEP_EN 0x40000000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_PDS_SEQ_DEP_SHIFT 29U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_PDS_SEQ_DEP_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_PDS_SEQ_DEP_EN 0x20000000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__USC_SEQ_DEP_SHIFT 29U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__USC_SEQ_DEP_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__USC_SEQ_DEP_EN 0x20000000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_USC_SEQ_DEP_SHIFT 28U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_USC_SEQ_DEP_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_USC_SEQ_DEP_EN 0x10000000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TARGET_SHIFT 28U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TARGET_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TARGET_EN 0x10000000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_TARGET_SHIFT 27U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_TARGET_CLRMSK 0xF7FFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_TARGET_EN 0x08000000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__UNIFIED_SIZE_SHIFT 22U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__UNIFIED_SIZE_CLRMSK 0xF03FFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_UNIFIED_SIZE_SHIFT 21U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_UNIFIED_SIZE_CLRMSK 0xF81FFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SHARED_SHIFT 21U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SHARED_CLRMSK 0xFFDFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SHARED_EN 0x00200000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SHARED_SHIFT 20U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SHARED_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SHARED_EN 0x00100000U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SIZE_SHIFT 12U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__COMMON_SIZE_CLRMSK 0xFFE00FFFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SIZE_SHIFT 11U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_COMMON_SIZE_CLRMSK 0xFFF007FFU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_TEMP_SIZE_SHIFT 7U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_TEMP_SIZE_CLRMSK 0xFFFFF87FU
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TEMP_SIZE_SHIFT 7U
+#define ROGUE_CR_CDM_TERMINATE_PDS1__TEMPSIZE8__TEMP_SIZE_CLRMSK 0xFFFFF07FU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_DATA_SIZE_SHIFT 1U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_DATA_SIZE_CLRMSK 0xFFFFFF81U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_FENCE_SHIFT 0U
+#define ROGUE_CR_CDM_TERMINATE_PDS1_FENCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_CDM_TERMINATE_PDS1_FENCE_EN 0x00000001U
+
+/* Register ROGUE_CR_CDM_CONTEXT_LOAD_PDS0 */
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0 0x04D8U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_MASKFULL 0xFFFFFFF0FFFFFFF0ULL
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_SHIFT 36U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_CLRMSK 0x0000000FFFFFFFFFULL
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNSIZE 16U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_SHIFT 4U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_CLRMSK 0xFFFFFFFF0000000FULL
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_CDM_CONTEXT_LOAD_PDS1 */
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1 0x04E0U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__MASKFULL 0x000000007FFFFFFFULL
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_MASKFULL 0x000000003FFFFFFFULL
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__PDS_SEQ_DEP_SHIFT 30U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__PDS_SEQ_DEP_CLRMSK 0xBFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__PDS_SEQ_DEP_EN 0x40000000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_PDS_SEQ_DEP_SHIFT 29U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_PDS_SEQ_DEP_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_PDS_SEQ_DEP_EN 0x20000000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__USC_SEQ_DEP_SHIFT 29U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__USC_SEQ_DEP_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__USC_SEQ_DEP_EN 0x20000000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_USC_SEQ_DEP_SHIFT 28U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_USC_SEQ_DEP_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_USC_SEQ_DEP_EN 0x10000000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TARGET_SHIFT 28U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TARGET_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TARGET_EN 0x10000000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TARGET_SHIFT 27U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TARGET_CLRMSK 0xF7FFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TARGET_EN 0x08000000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__UNIFIED_SIZE_SHIFT 22U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__UNIFIED_SIZE_CLRMSK 0xF03FFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_UNIFIED_SIZE_SHIFT 21U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_UNIFIED_SIZE_CLRMSK 0xF81FFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SHARED_SHIFT 21U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SHARED_CLRMSK 0xFFDFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SHARED_EN 0x00200000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SHARED_SHIFT 20U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SHARED_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SHARED_EN 0x00100000U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SIZE_SHIFT 12U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__COMMON_SIZE_CLRMSK 0xFFE00FFFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SIZE_SHIFT 11U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_COMMON_SIZE_CLRMSK 0xFFF007FFU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TEMP_SIZE_SHIFT 7U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_TEMP_SIZE_CLRMSK 0xFFFFF87FU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TEMP_SIZE_SHIFT 7U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1__TEMPSIZE8__TEMP_SIZE_CLRMSK 0xFFFFF07FU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_DATA_SIZE_SHIFT 1U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_DATA_SIZE_CLRMSK 0xFFFFFF81U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_FENCE_SHIFT 0U
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_FENCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_CDM_CONTEXT_LOAD_PDS1_FENCE_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_WRAPPER_CONFIG */
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG 0x0810U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_MASKFULL 0x000001030F01FFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_FW_IDLE_ENABLE_SHIFT 40U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_FW_IDLE_ENABLE_CLRMSK 0xFFFFFEFFFFFFFFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_FW_IDLE_ENABLE_EN 0x0000010000000000ULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_DISABLE_BOOT_SHIFT 33U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_DISABLE_BOOT_CLRMSK 0xFFFFFFFDFFFFFFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_DISABLE_BOOT_EN 0x0000000200000000ULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_L2_CACHE_OFF_SHIFT 32U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_L2_CACHE_OFF_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_L2_CACHE_OFF_EN 0x0000000100000000ULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_OS_ID_SHIFT 25U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_OS_ID_CLRMSK 0xFFFFFFFFF1FFFFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_TRUSTED_SHIFT 24U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_TRUSTED_CLRMSK 0xFFFFFFFFFEFFFFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_TRUSTED_EN 0x0000000001000000ULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_SHIFT 16U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_CLRMSK 0xFFFFFFFFFFFEFFFFULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_MIPS32 0x0000000000000000ULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_MICROMIPS 0x0000000000010000ULL
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_REGBANK_BASE_ADDR_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_CONFIG_REGBANK_BASE_ADDR_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1 */
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1 0x0818U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MASKFULL 0x00000000FFFFF001ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2 */
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2 0x0820U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_ADDR_OUT_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_OS_ID_SHIFT 6U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_TRUSTED_SHIFT 5U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_TRUSTED_EN 0x0000000000000020ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_REGION_SIZE_POW2_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1 */
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1 0x0828U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MASKFULL 0x00000000FFFFF001ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2 */
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2 0x0830U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_ADDR_OUT_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_OS_ID_SHIFT 6U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_TRUSTED_SHIFT 5U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_TRUSTED_EN 0x0000000000000020ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_REGION_SIZE_POW2_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1 */
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1 0x0838U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MASKFULL 0x00000000FFFFF001ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2 */
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2 0x0840U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_ADDR_OUT_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_OS_ID_SHIFT 6U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_TRUSTED_SHIFT 5U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_TRUSTED_EN 0x0000000000000020ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_REGION_SIZE_POW2_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1 */
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1 0x0848U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MASKFULL 0x00000000FFFFF001ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2 */
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2 0x0850U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_ADDR_OUT_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_OS_ID_SHIFT 6U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_TRUSTED_SHIFT 5U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_TRUSTED_EN 0x0000000000000020ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_REGION_SIZE_POW2_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP4_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1 */
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1 0x0858U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MASKFULL 0x00000000FFFFF001ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2 */
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2 0x0860U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_MASKFULL 0x000000FFFFFFF1FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_ADDR_OUT_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_ADDR_OUT_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_OS_ID_SHIFT 6U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_OS_ID_CLRMSK 0xFFFFFFFFFFFFFE3FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_TRUSTED_SHIFT 5U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_TRUSTED_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_TRUSTED_EN 0x0000000000000020ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_REGION_SIZE_POW2_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_REGION_SIZE_POW2_CLRMSK 0xFFFFFFFFFFFFFFE0ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS */
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS 0x0868U
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_MASKFULL 0x00000001FFFFFFFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_EVENT_SHIFT 32U
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_EVENT_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_EVENT_EN 0x0000000100000000ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_ADDRESS_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_STATUS_ADDRESS_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR */
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR 0x0870U
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_EVENT_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_EVENT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_ADDR_REMAP_UNMAPPED_CLEAR_EVENT_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG */
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG 0x0878U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MASKFULL 0xFFFFFFF7FFFFFFBFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ADDR_OUT_SHIFT 36U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ADDR_OUT_CLRMSK 0x0000000FFFFFFFFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_OS_ID_SHIFT 32U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_OS_ID_CLRMSK 0xFFFFFFF8FFFFFFFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_TRUSTED_SHIFT 11U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_TRUSTED_CLRMSK 0xFFFFFFFFFFFFF7FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_TRUSTED_EN 0x0000000000000800ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_SHIFT 7U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_CLRMSK 0xFFFFFFFFFFFFF87FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_4KB 0x0000000000000000ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_16KB 0x0000000000000080ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_64KB 0x0000000000000100ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_256KB 0x0000000000000180ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_1MB 0x0000000000000200ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_4MB 0x0000000000000280ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_16MB 0x0000000000000300ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_64MB 0x0000000000000380ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_REGION_SIZE_256MB 0x0000000000000400ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ENTRY_SHIFT 1U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_ENTRY_CLRMSK 0xFFFFFFFFFFFFFFC1ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_CONFIG_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ */
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ 0x0880U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_MASKFULL 0x000000000000003FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_ENTRY_SHIFT 1U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_ENTRY_CLRMSK 0xFFFFFFC1U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_REQUEST_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_REQUEST_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_READ_REQUEST_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA */
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA 0x0888U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MASKFULL 0xFFFFFFF7FFFFFF81ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_ADDR_OUT_SHIFT 36U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_ADDR_OUT_CLRMSK 0x0000000FFFFFFFFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_OS_ID_SHIFT 32U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_OS_ID_CLRMSK 0xFFFFFFF8FFFFFFFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_BASE_ADDR_IN_SHIFT 12U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_BASE_ADDR_IN_CLRMSK 0xFFFFFFFF00000FFFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_TRUSTED_SHIFT 11U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_TRUSTED_CLRMSK 0xFFFFFFFFFFFFF7FFULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_TRUSTED_EN 0x0000000000000800ULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_REGION_SIZE_SHIFT 7U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_REGION_SIZE_CLRMSK 0xFFFFFFFFFFFFF87FULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MODE_ENABLE_SHIFT 0U
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MODE_ENABLE_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MIPS_ADDR_REMAP_RANGE_DATA_MODE_ENABLE_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE */
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE 0x08A0U
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_EVENT_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_EVENT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE_EVENT_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS */
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS 0x08A8U
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR */
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR 0x08B0U
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE */
+#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE 0x08B8U
+#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_EVENT_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_EVENT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_WRAPPER_NMI_ENABLE_EVENT_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_WRAPPER_NMI_EVENT */
+#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT 0x08C0U
+#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_TRIGGER_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_TRIGGER_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_WRAPPER_NMI_EVENT_TRIGGER_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_DEBUG_CONFIG */
+#define ROGUE_CR_MIPS_DEBUG_CONFIG 0x08C8U
+#define ROGUE_CR_MIPS_DEBUG_CONFIG_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_MIPS_DEBUG_CONFIG_DISABLE_PROBE_DEBUG_SHIFT 0U
+#define ROGUE_CR_MIPS_DEBUG_CONFIG_DISABLE_PROBE_DEBUG_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_DEBUG_CONFIG_DISABLE_PROBE_DEBUG_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_EXCEPTION_STATUS */
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS 0x08D0U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_MASKFULL 0x000000000000003FULL
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_SLEEP_SHIFT 5U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_SLEEP_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_SLEEP_EN 0x00000020U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NMI_TAKEN_SHIFT 4U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NMI_TAKEN_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NMI_TAKEN_EN 0x00000010U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_EXL_SHIFT 3U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_EXL_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_EXL_EN 0x00000008U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_ERL_SHIFT 2U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_ERL_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_NEST_ERL_EN 0x00000004U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_EXL_SHIFT 1U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_EXL_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_EXL_EN 0x00000002U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_ERL_SHIFT 0U
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_ERL_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MIPS_EXCEPTION_STATUS_SI_ERL_EN 0x00000001U
+
+/* Register ROGUE_CR_MIPS_WRAPPER_STATUS */
+#define ROGUE_CR_MIPS_WRAPPER_STATUS 0x08E8U
+#define ROGUE_CR_MIPS_WRAPPER_STATUS_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_MIPS_WRAPPER_STATUS_OUTSTANDING_REQUESTS_SHIFT 0U
+#define ROGUE_CR_MIPS_WRAPPER_STATUS_OUTSTANDING_REQUESTS_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_XPU_BROADCAST */
+#define ROGUE_CR_XPU_BROADCAST 0x0890U
+#define ROGUE_CR_XPU_BROADCAST_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_XPU_BROADCAST_MASK_SHIFT 0U
+#define ROGUE_CR_XPU_BROADCAST_MASK_CLRMSK 0xFFFFFE00U
+
+/* Register ROGUE_CR_META_SP_MSLVDATAX */
+#define ROGUE_CR_META_SP_MSLVDATAX 0x0A00U
+#define ROGUE_CR_META_SP_MSLVDATAX_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_META_SP_MSLVDATAX_MSLVDATAX_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVDATAX_MSLVDATAX_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_META_SP_MSLVDATAT */
+#define ROGUE_CR_META_SP_MSLVDATAT 0x0A08U
+#define ROGUE_CR_META_SP_MSLVDATAT_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_META_SP_MSLVDATAT_MSLVDATAT_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVDATAT_MSLVDATAT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_META_SP_MSLVCTRL0 */
+#define ROGUE_CR_META_SP_MSLVCTRL0 0x0A10U
+#define ROGUE_CR_META_SP_MSLVCTRL0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_META_SP_MSLVCTRL0_ADDR_SHIFT 2U
+#define ROGUE_CR_META_SP_MSLVCTRL0_ADDR_CLRMSK 0x00000003U
+#define ROGUE_CR_META_SP_MSLVCTRL0_AUTOINCR_SHIFT 1U
+#define ROGUE_CR_META_SP_MSLVCTRL0_AUTOINCR_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_META_SP_MSLVCTRL0_AUTOINCR_EN 0x00000002U
+#define ROGUE_CR_META_SP_MSLVCTRL0_RD_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVCTRL0_RD_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_META_SP_MSLVCTRL0_RD_EN 0x00000001U
+
+/* Register ROGUE_CR_META_SP_MSLVCTRL1 */
+#define ROGUE_CR_META_SP_MSLVCTRL1 0x0A18U
+#define ROGUE_CR_META_SP_MSLVCTRL1_MASKFULL 0x00000000F7F4003FULL
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRTHREAD_SHIFT 30U
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRTHREAD_CLRMSK 0x3FFFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_LOCK2_INTERLOCK_SHIFT 29U
+#define ROGUE_CR_META_SP_MSLVCTRL1_LOCK2_INTERLOCK_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_LOCK2_INTERLOCK_EN 0x20000000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_ATOMIC_INTERLOCK_SHIFT 28U
+#define ROGUE_CR_META_SP_MSLVCTRL1_ATOMIC_INTERLOCK_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_ATOMIC_INTERLOCK_EN 0x10000000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_SHIFT 26U
+#define ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_CLRMSK 0xFBFFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN 0x04000000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_COREMEM_IDLE_SHIFT 25U
+#define ROGUE_CR_META_SP_MSLVCTRL1_COREMEM_IDLE_CLRMSK 0xFDFFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_COREMEM_IDLE_EN 0x02000000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_READY_SHIFT 24U
+#define ROGUE_CR_META_SP_MSLVCTRL1_READY_CLRMSK 0xFEFFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_READY_EN 0x01000000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRID_SHIFT 21U
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERRID_CLRMSK 0xFF1FFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERR_SHIFT 20U
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERR_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_DEFERR_EN 0x00100000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_WR_ACTIVE_SHIFT 18U
+#define ROGUE_CR_META_SP_MSLVCTRL1_WR_ACTIVE_CLRMSK 0xFFFBFFFFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_WR_ACTIVE_EN 0x00040000U
+#define ROGUE_CR_META_SP_MSLVCTRL1_THREAD_SHIFT 4U
+#define ROGUE_CR_META_SP_MSLVCTRL1_THREAD_CLRMSK 0xFFFFFFCFU
+#define ROGUE_CR_META_SP_MSLVCTRL1_TRANS_SIZE_SHIFT 2U
+#define ROGUE_CR_META_SP_MSLVCTRL1_TRANS_SIZE_CLRMSK 0xFFFFFFF3U
+#define ROGUE_CR_META_SP_MSLVCTRL1_BYTE_ROUND_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVCTRL1_BYTE_ROUND_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_META_SP_MSLVHANDSHKE */
+#define ROGUE_CR_META_SP_MSLVHANDSHKE 0x0A50U
+#define ROGUE_CR_META_SP_MSLVHANDSHKE_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_META_SP_MSLVHANDSHKE_INPUT_SHIFT 2U
+#define ROGUE_CR_META_SP_MSLVHANDSHKE_INPUT_CLRMSK 0xFFFFFFF3U
+#define ROGUE_CR_META_SP_MSLVHANDSHKE_OUTPUT_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVHANDSHKE_OUTPUT_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_META_SP_MSLVT0KICK */
+#define ROGUE_CR_META_SP_MSLVT0KICK 0x0A80U
+#define ROGUE_CR_META_SP_MSLVT0KICK_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT0KICK_MSLVT0KICK_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT0KICK_MSLVT0KICK_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT0KICKI */
+#define ROGUE_CR_META_SP_MSLVT0KICKI 0x0A88U
+#define ROGUE_CR_META_SP_MSLVT0KICKI_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT0KICKI_MSLVT0KICKI_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT0KICKI_MSLVT0KICKI_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT1KICK */
+#define ROGUE_CR_META_SP_MSLVT1KICK 0x0A90U
+#define ROGUE_CR_META_SP_MSLVT1KICK_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT1KICK_MSLVT1KICK_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT1KICK_MSLVT1KICK_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT1KICKI */
+#define ROGUE_CR_META_SP_MSLVT1KICKI 0x0A98U
+#define ROGUE_CR_META_SP_MSLVT1KICKI_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT1KICKI_MSLVT1KICKI_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT1KICKI_MSLVT1KICKI_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT2KICK */
+#define ROGUE_CR_META_SP_MSLVT2KICK 0x0AA0U
+#define ROGUE_CR_META_SP_MSLVT2KICK_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT2KICK_MSLVT2KICK_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT2KICK_MSLVT2KICK_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT2KICKI */
+#define ROGUE_CR_META_SP_MSLVT2KICKI 0x0AA8U
+#define ROGUE_CR_META_SP_MSLVT2KICKI_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT2KICKI_MSLVT2KICKI_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT2KICKI_MSLVT2KICKI_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT3KICK */
+#define ROGUE_CR_META_SP_MSLVT3KICK 0x0AB0U
+#define ROGUE_CR_META_SP_MSLVT3KICK_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT3KICK_MSLVT3KICK_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT3KICK_MSLVT3KICK_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVT3KICKI */
+#define ROGUE_CR_META_SP_MSLVT3KICKI 0x0AB8U
+#define ROGUE_CR_META_SP_MSLVT3KICKI_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_META_SP_MSLVT3KICKI_MSLVT3KICKI_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVT3KICKI_MSLVT3KICKI_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_META_SP_MSLVRST */
+#define ROGUE_CR_META_SP_MSLVRST 0x0AC0U
+#define ROGUE_CR_META_SP_MSLVRST_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_META_SP_MSLVRST_SOFTRESET_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVRST_SOFTRESET_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_META_SP_MSLVRST_SOFTRESET_EN 0x00000001U
+
+/* Register ROGUE_CR_META_SP_MSLVIRQSTATUS */
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS 0x0AC8U
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_MASKFULL 0x000000000000000CULL
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT3_SHIFT 3U
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT3_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT3_EN 0x00000008U
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_SHIFT 2U
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_EN 0x00000004U
+
+/* Register ROGUE_CR_META_SP_MSLVIRQENABLE */
+#define ROGUE_CR_META_SP_MSLVIRQENABLE 0x0AD0U
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_MASKFULL 0x000000000000000CULL
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT1_SHIFT 3U
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT1_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT1_EN 0x00000008U
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT0_SHIFT 2U
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT0_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_META_SP_MSLVIRQENABLE_EVENT0_EN 0x00000004U
+
+/* Register ROGUE_CR_META_SP_MSLVIRQLEVEL */
+#define ROGUE_CR_META_SP_MSLVIRQLEVEL 0x0AD8U
+#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MODE_SHIFT 0U
+#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MODE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_META_SP_MSLVIRQLEVEL_MODE_EN 0x00000001U
+
+/* Register ROGUE_CR_MTS_SCHEDULE */
+#define ROGUE_CR_MTS_SCHEDULE 0x0B00U
+#define ROGUE_CR_MTS_SCHEDULE_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE1 */
+#define ROGUE_CR_MTS_SCHEDULE1 0x10B00U
+#define ROGUE_CR_MTS_SCHEDULE1_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE1_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE1_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE1_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE1_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE1_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE1_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE1_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE1_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE1_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE1_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE1_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE2 */
+#define ROGUE_CR_MTS_SCHEDULE2 0x20B00U
+#define ROGUE_CR_MTS_SCHEDULE2_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE2_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE2_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE2_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE2_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE2_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE2_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE2_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE2_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE2_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE2_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE2_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE3 */
+#define ROGUE_CR_MTS_SCHEDULE3 0x30B00U
+#define ROGUE_CR_MTS_SCHEDULE3_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE3_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE3_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE3_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE3_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE3_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE3_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE3_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE3_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE3_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE3_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE3_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE4 */
+#define ROGUE_CR_MTS_SCHEDULE4 0x40B00U
+#define ROGUE_CR_MTS_SCHEDULE4_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE4_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE4_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE4_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE4_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE4_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE4_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE4_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE4_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE4_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE4_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE4_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE5 */
+#define ROGUE_CR_MTS_SCHEDULE5 0x50B00U
+#define ROGUE_CR_MTS_SCHEDULE5_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE5_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE5_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE5_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE5_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE5_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE5_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE5_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE5_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE5_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE5_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE5_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE6 */
+#define ROGUE_CR_MTS_SCHEDULE6 0x60B00U
+#define ROGUE_CR_MTS_SCHEDULE6_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE6_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE6_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE6_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE6_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE6_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE6_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE6_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE6_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE6_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE6_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE6_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_SCHEDULE7 */
+#define ROGUE_CR_MTS_SCHEDULE7 0x70B00U
+#define ROGUE_CR_MTS_SCHEDULE7_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_MTS_SCHEDULE7_HOST_SHIFT 8U
+#define ROGUE_CR_MTS_SCHEDULE7_HOST_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_MTS_SCHEDULE7_HOST_BG_TIMER 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE7_HOST_HOST 0x00000100U
+#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_SHIFT 6U
+#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT1 0x00000040U
+#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT2 0x00000080U
+#define ROGUE_CR_MTS_SCHEDULE7_PRIORITY_PRT3 0x000000C0U
+#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_SHIFT 5U
+#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_BGCTX 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE7_CONTEXT_INTCTX 0x00000020U
+#define ROGUE_CR_MTS_SCHEDULE7_TASK_SHIFT 4U
+#define ROGUE_CR_MTS_SCHEDULE7_TASK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SCHEDULE7_TASK_NON_COUNTED 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE7_TASK_COUNTED 0x00000010U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_CLRMSK 0xFFFFFFF0U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM0 0x00000000U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM1 0x00000001U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM2 0x00000002U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM3 0x00000003U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM4 0x00000004U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM5 0x00000005U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM6 0x00000006U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM7 0x00000007U
+#define ROGUE_CR_MTS_SCHEDULE7_DM_DM_ALL 0x0000000FU
+
+/* Register ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC */
+#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC 0x0B30U
+#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_DM_ASSOC_SHIFT 0U
+#define ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC */
+#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC 0x0B38U
+#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_DM_ASSOC_SHIFT 0U
+#define ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC */
+#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC 0x0B40U
+#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_DM_ASSOC_SHIFT 0U
+#define ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC */
+#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC 0x0B48U
+#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_DM_ASSOC_SHIFT 0U
+#define ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG */
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG 0x0B50U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG__S7_TOP__MASKFULL 0x000FF0FFFFFFF701ULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_MASKFULL 0x0000FFFFFFFFF001ULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_SHIFT 44U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_CLRMSK 0xFFFF0FFFFFFFFFFFULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG__S7_TOP__FENCE_PC_BASE_SHIFT 44U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG__S7_TOP__FENCE_PC_BASE_CLRMSK 0xFFF00FFFFFFFFFFFULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_DM_SHIFT 40U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_DM_CLRMSK 0xFFFFF0FFFFFFFFFFULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_ADDR_SHIFT 12U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PERSISTENCE_SHIFT 9U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PERSISTENCE_CLRMSK 0xFFFFFFFFFFFFF9FFULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_SLC_COHERENT_SHIFT 8U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_SLC_COHERENT_CLRMSK 0xFFFFFFFFFFFFFEFFULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_SLC_COHERENT_EN 0x0000000000000100ULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_SHIFT 0U
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_META 0x0000000000000000ULL
+#define ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_MTS 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE */
+#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE 0x0B58U
+#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U
+#define ROGUE_CR_MTS_DM0_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE */
+#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE 0x0B60U
+#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U
+#define ROGUE_CR_MTS_DM1_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE */
+#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE 0x0B68U
+#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U
+#define ROGUE_CR_MTS_DM2_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE */
+#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE 0x0B70U
+#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U
+#define ROGUE_CR_MTS_DM3_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE */
+#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE 0x0B78U
+#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U
+#define ROGUE_CR_MTS_DM4_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE */
+#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE 0x0B80U
+#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE_INT_ENABLE_SHIFT 0U
+#define ROGUE_CR_MTS_DM5_INTERRUPT_ENABLE_INT_ENABLE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_INTCTX */
+#define ROGUE_CR_MTS_INTCTX 0x0B98U
+#define ROGUE_CR_MTS_INTCTX_MASKFULL 0x000000003FFFFFFFULL
+#define ROGUE_CR_MTS_INTCTX_DM_HOST_SCHEDULE_SHIFT 22U
+#define ROGUE_CR_MTS_INTCTX_DM_HOST_SCHEDULE_CLRMSK 0xC03FFFFFU
+#define ROGUE_CR_MTS_INTCTX_DM_PTR_SHIFT 18U
+#define ROGUE_CR_MTS_INTCTX_DM_PTR_CLRMSK 0xFFC3FFFFU
+#define ROGUE_CR_MTS_INTCTX_THREAD_ACTIVE_SHIFT 16U
+#define ROGUE_CR_MTS_INTCTX_THREAD_ACTIVE_CLRMSK 0xFFFCFFFFU
+#define ROGUE_CR_MTS_INTCTX_DM_TIMER_SCHEDULE_SHIFT 8U
+#define ROGUE_CR_MTS_INTCTX_DM_TIMER_SCHEDULE_CLRMSK 0xFFFF00FFU
+#define ROGUE_CR_MTS_INTCTX_DM_INTERRUPT_SCHEDULE_SHIFT 0U
+#define ROGUE_CR_MTS_INTCTX_DM_INTERRUPT_SCHEDULE_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_MTS_BGCTX */
+#define ROGUE_CR_MTS_BGCTX 0x0BA0U
+#define ROGUE_CR_MTS_BGCTX_MASKFULL 0x0000000000003FFFULL
+#define ROGUE_CR_MTS_BGCTX_DM_PTR_SHIFT 10U
+#define ROGUE_CR_MTS_BGCTX_DM_PTR_CLRMSK 0xFFFFC3FFU
+#define ROGUE_CR_MTS_BGCTX_THREAD_ACTIVE_SHIFT 8U
+#define ROGUE_CR_MTS_BGCTX_THREAD_ACTIVE_CLRMSK 0xFFFFFCFFU
+#define ROGUE_CR_MTS_BGCTX_DM_NONCOUNTED_SCHEDULE_SHIFT 0U
+#define ROGUE_CR_MTS_BGCTX_DM_NONCOUNTED_SCHEDULE_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE */
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE 0x0BA8U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM7_SHIFT 56U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM7_CLRMSK 0x00FFFFFFFFFFFFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM6_SHIFT 48U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM6_CLRMSK 0xFF00FFFFFFFFFFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM5_SHIFT 40U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM5_CLRMSK 0xFFFF00FFFFFFFFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM4_SHIFT 32U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM4_CLRMSK 0xFFFFFF00FFFFFFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM3_SHIFT 24U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM3_CLRMSK 0xFFFFFFFF00FFFFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM2_SHIFT 16U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM2_CLRMSK 0xFFFFFFFFFF00FFFFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM1_SHIFT 8U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM1_CLRMSK 0xFFFFFFFFFFFF00FFULL
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM0_SHIFT 0U
+#define ROGUE_CR_MTS_BGCTX_COUNTED_SCHEDULE_DM0_CLRMSK 0xFFFFFFFFFFFFFF00ULL
+
+/* Register ROGUE_CR_MTS_GPU_INT_STATUS */
+#define ROGUE_CR_MTS_GPU_INT_STATUS 0x0BB0U
+#define ROGUE_CR_MTS_GPU_INT_STATUS_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MTS_GPU_INT_STATUS_STATUS_SHIFT 0U
+#define ROGUE_CR_MTS_GPU_INT_STATUS_STATUS_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_MTS_SCHEDULE_ENABLE */
+#define ROGUE_CR_MTS_SCHEDULE_ENABLE 0x0BC8U
+#define ROGUE_CR_MTS_SCHEDULE_ENABLE_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_MTS_SCHEDULE_ENABLE_MASK_SHIFT 0U
+#define ROGUE_CR_MTS_SCHEDULE_ENABLE_MASK_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_IRQ_OS0_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS0_EVENT_STATUS 0x0BD8U
+#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS0_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS0_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR 0x0BE8U
+#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS0_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS1_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS1_EVENT_STATUS 0x10BD8U
+#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS1_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS1_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR 0x10BE8U
+#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS1_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS2_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS2_EVENT_STATUS 0x20BD8U
+#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS2_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS2_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR 0x20BE8U
+#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS2_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS3_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS3_EVENT_STATUS 0x30BD8U
+#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS3_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS3_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR 0x30BE8U
+#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS3_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS4_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS4_EVENT_STATUS 0x40BD8U
+#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS4_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS4_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR 0x40BE8U
+#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS4_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS5_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS5_EVENT_STATUS 0x50BD8U
+#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS5_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS5_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR 0x50BE8U
+#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS5_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS6_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS6_EVENT_STATUS 0x60BD8U
+#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS6_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS6_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR 0x60BE8U
+#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS6_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS7_EVENT_STATUS */
+#define ROGUE_CR_IRQ_OS7_EVENT_STATUS 0x70BD8U
+#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS7_EVENT_STATUS_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_IRQ_OS7_EVENT_CLEAR */
+#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR 0x70BE8U
+#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_SOURCE_SHIFT 0U
+#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_SOURCE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_IRQ_OS7_EVENT_CLEAR_SOURCE_EN 0x00000001U
+
+/* Register ROGUE_CR_META_BOOT */
+#define ROGUE_CR_META_BOOT 0x0BF8U
+#define ROGUE_CR_META_BOOT_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_META_BOOT_MODE_SHIFT 0U
+#define ROGUE_CR_META_BOOT_MODE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_META_BOOT_MODE_EN 0x00000001U
+
+/* Register ROGUE_CR_GARTEN_SLC */
+#define ROGUE_CR_GARTEN_SLC 0x0BB8U
+#define ROGUE_CR_GARTEN_SLC_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_GARTEN_SLC_FORCE_COHERENCY_SHIFT 0U
+#define ROGUE_CR_GARTEN_SLC_FORCE_COHERENCY_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_GARTEN_SLC_FORCE_COHERENCY_EN 0x00000001U
+
+/* Register ROGUE_CR_PPP */
+#define ROGUE_CR_PPP 0x0CD0U
+#define ROGUE_CR_PPP_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PPP_CHECKSUM_SHIFT 0U
+#define ROGUE_CR_PPP_CHECKSUM_CLRMSK 0x00000000U
+
+#define ROGUE_CR_ISP_RENDER_DIR_TYPE_MASK 0x00000003U
+/* Top-left to bottom-right */
+#define ROGUE_CR_ISP_RENDER_DIR_TYPE_TL2BR 0x00000000U
+/* Top-right to bottom-left */
+#define ROGUE_CR_ISP_RENDER_DIR_TYPE_TR2BL 0x00000001U
+/* Bottom-left to top-right */
+#define ROGUE_CR_ISP_RENDER_DIR_TYPE_BL2TR 0x00000002U
+/* Bottom-right to top-left */
+#define ROGUE_CR_ISP_RENDER_DIR_TYPE_BR2TL 0x00000003U
+
+#define ROGUE_CR_ISP_RENDER_MODE_TYPE_MASK 0x00000003U
+/* Normal render */
+#define ROGUE_CR_ISP_RENDER_MODE_TYPE_NORM 0x00000000U
+/* Fast 2D render */
+#define ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_2D 0x00000002U
+/* Fast scale render */
+#define ROGUE_CR_ISP_RENDER_MODE_TYPE_FAST_SCALE 0x00000003U
+
+/* Register ROGUE_CR_ISP_RENDER */
+#define ROGUE_CR_ISP_RENDER 0x0F08U
+#define ROGUE_CR_ISP_RENDER_MASKFULL 0x00000000000001FFULL
+#define ROGUE_CR_ISP_RENDER_FAST_RENDER_FORCE_PROTECT_SHIFT 8U
+#define ROGUE_CR_ISP_RENDER_FAST_RENDER_FORCE_PROTECT_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_ISP_RENDER_FAST_RENDER_FORCE_PROTECT_EN 0x00000100U
+#define ROGUE_CR_ISP_RENDER_PROCESS_PROTECTED_TILES_SHIFT 7U
+#define ROGUE_CR_ISP_RENDER_PROCESS_PROTECTED_TILES_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_ISP_RENDER_PROCESS_PROTECTED_TILES_EN 0x00000080U
+#define ROGUE_CR_ISP_RENDER_PROCESS_UNPROTECTED_TILES_SHIFT 6U
+#define ROGUE_CR_ISP_RENDER_PROCESS_UNPROTECTED_TILES_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_ISP_RENDER_PROCESS_UNPROTECTED_TILES_EN 0x00000040U
+#define ROGUE_CR_ISP_RENDER_DISABLE_EOMT_SHIFT 5U
+#define ROGUE_CR_ISP_RENDER_DISABLE_EOMT_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_ISP_RENDER_DISABLE_EOMT_EN 0x00000020U
+#define ROGUE_CR_ISP_RENDER_RESUME_SHIFT 4U
+#define ROGUE_CR_ISP_RENDER_RESUME_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_ISP_RENDER_RESUME_EN 0x00000010U
+#define ROGUE_CR_ISP_RENDER_DIR_SHIFT 2U
+#define ROGUE_CR_ISP_RENDER_DIR_CLRMSK 0xFFFFFFF3U
+#define ROGUE_CR_ISP_RENDER_DIR_TL2BR 0x00000000U
+#define ROGUE_CR_ISP_RENDER_DIR_TR2BL 0x00000004U
+#define ROGUE_CR_ISP_RENDER_DIR_BL2TR 0x00000008U
+#define ROGUE_CR_ISP_RENDER_DIR_BR2TL 0x0000000CU
+#define ROGUE_CR_ISP_RENDER_MODE_SHIFT 0U
+#define ROGUE_CR_ISP_RENDER_MODE_CLRMSK 0xFFFFFFFCU
+#define ROGUE_CR_ISP_RENDER_MODE_NORM 0x00000000U
+#define ROGUE_CR_ISP_RENDER_MODE_FAST_2D 0x00000002U
+#define ROGUE_CR_ISP_RENDER_MODE_FAST_SCALE 0x00000003U
+
+/* Register ROGUE_CR_ISP_CTL */
+#define ROGUE_CR_ISP_CTL 0x0F38U
+#define ROGUE_CR_ISP_CTL_MASKFULL 0x00000000FFFFF3FFULL
+#define ROGUE_CR_ISP_CTL_SKIP_INIT_HDRS_SHIFT 31U
+#define ROGUE_CR_ISP_CTL_SKIP_INIT_HDRS_CLRMSK 0x7FFFFFFFU
+#define ROGUE_CR_ISP_CTL_SKIP_INIT_HDRS_EN 0x80000000U
+#define ROGUE_CR_ISP_CTL_LINE_STYLE_SHIFT 30U
+#define ROGUE_CR_ISP_CTL_LINE_STYLE_CLRMSK 0xBFFFFFFFU
+#define ROGUE_CR_ISP_CTL_LINE_STYLE_EN 0x40000000U
+#define ROGUE_CR_ISP_CTL_LINE_STYLE_PIX_SHIFT 29U
+#define ROGUE_CR_ISP_CTL_LINE_STYLE_PIX_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_ISP_CTL_LINE_STYLE_PIX_EN 0x20000000U
+#define ROGUE_CR_ISP_CTL_PAIR_TILES_VERT_SHIFT 28U
+#define ROGUE_CR_ISP_CTL_PAIR_TILES_VERT_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_ISP_CTL_PAIR_TILES_VERT_EN 0x10000000U
+#define ROGUE_CR_ISP_CTL_PAIR_TILES_SHIFT 27U
+#define ROGUE_CR_ISP_CTL_PAIR_TILES_CLRMSK 0xF7FFFFFFU
+#define ROGUE_CR_ISP_CTL_PAIR_TILES_EN 0x08000000U
+#define ROGUE_CR_ISP_CTL_CREQ_BUF_EN_SHIFT 26U
+#define ROGUE_CR_ISP_CTL_CREQ_BUF_EN_CLRMSK 0xFBFFFFFFU
+#define ROGUE_CR_ISP_CTL_CREQ_BUF_EN_EN 0x04000000U
+#define ROGUE_CR_ISP_CTL_TILE_AGE_EN_SHIFT 25U
+#define ROGUE_CR_ISP_CTL_TILE_AGE_EN_CLRMSK 0xFDFFFFFFU
+#define ROGUE_CR_ISP_CTL_TILE_AGE_EN_EN 0x02000000U
+#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_SHIFT 23U
+#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_CLRMSK 0xFE7FFFFFU
+#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_DX9 0x00000000U
+#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_DX10 0x00800000U
+#define ROGUE_CR_ISP_CTL_ISP_SAMPLE_POS_MODE_OGL 0x01000000U
+#define ROGUE_CR_ISP_CTL_NUM_TILES_PER_USC_SHIFT 21U
+#define ROGUE_CR_ISP_CTL_NUM_TILES_PER_USC_CLRMSK 0xFF9FFFFFU
+#define ROGUE_CR_ISP_CTL_DBIAS_IS_INT_SHIFT 20U
+#define ROGUE_CR_ISP_CTL_DBIAS_IS_INT_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_ISP_CTL_DBIAS_IS_INT_EN 0x00100000U
+#define ROGUE_CR_ISP_CTL_OVERLAP_CHECK_MODE_SHIFT 19U
+#define ROGUE_CR_ISP_CTL_OVERLAP_CHECK_MODE_CLRMSK 0xFFF7FFFFU
+#define ROGUE_CR_ISP_CTL_OVERLAP_CHECK_MODE_EN 0x00080000U
+#define ROGUE_CR_ISP_CTL_PT_UPFRONT_DEPTH_DISABLE_SHIFT 18U
+#define ROGUE_CR_ISP_CTL_PT_UPFRONT_DEPTH_DISABLE_CLRMSK 0xFFFBFFFFU
+#define ROGUE_CR_ISP_CTL_PT_UPFRONT_DEPTH_DISABLE_EN 0x00040000U
+#define ROGUE_CR_ISP_CTL_PROCESS_EMPTY_TILES_SHIFT 17U
+#define ROGUE_CR_ISP_CTL_PROCESS_EMPTY_TILES_CLRMSK 0xFFFDFFFFU
+#define ROGUE_CR_ISP_CTL_PROCESS_EMPTY_TILES_EN 0x00020000U
+#define ROGUE_CR_ISP_CTL_SAMPLE_POS_SHIFT 16U
+#define ROGUE_CR_ISP_CTL_SAMPLE_POS_CLRMSK 0xFFFEFFFFU
+#define ROGUE_CR_ISP_CTL_SAMPLE_POS_EN 0x00010000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_SHIFT 12U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_CLRMSK 0xFFFF0FFFU
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_ONE 0x00000000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_TWO 0x00001000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_THREE 0x00002000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FOUR 0x00003000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FIVE 0x00004000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_SIX 0x00005000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_SEVEN 0x00006000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_EIGHT 0x00007000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_NINE 0x00008000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_TEN 0x00009000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_ELEVEN 0x0000A000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_TWELVE 0x0000B000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_THIRTEEN 0x0000C000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FOURTEEN 0x0000D000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_FIFTEEN 0x0000E000U
+#define ROGUE_CR_ISP_CTL_PIPE_ENABLE_PIPE_SIXTEEN 0x0000F000U
+#define ROGUE_CR_ISP_CTL_VALID_ID_SHIFT 4U
+#define ROGUE_CR_ISP_CTL_VALID_ID_CLRMSK 0xFFFFFC0FU
+#define ROGUE_CR_ISP_CTL_UPASS_START_SHIFT 0U
+#define ROGUE_CR_ISP_CTL_UPASS_START_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_ISP_STATUS */
+#define ROGUE_CR_ISP_STATUS 0x1038U
+#define ROGUE_CR_ISP_STATUS_MASKFULL 0x0000000000000007ULL
+#define ROGUE_CR_ISP_STATUS_SPLIT_MAX_SHIFT 2U
+#define ROGUE_CR_ISP_STATUS_SPLIT_MAX_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_ISP_STATUS_SPLIT_MAX_EN 0x00000004U
+#define ROGUE_CR_ISP_STATUS_ACTIVE_SHIFT 1U
+#define ROGUE_CR_ISP_STATUS_ACTIVE_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_ISP_STATUS_ACTIVE_EN 0x00000002U
+#define ROGUE_CR_ISP_STATUS_EOR_SHIFT 0U
+#define ROGUE_CR_ISP_STATUS_EOR_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_ISP_STATUS_EOR_EN 0x00000001U
+
+/* Register group: ROGUE_CR_ISP_XTP_RESUME, with 64 repeats */
+#define ROGUE_CR_ISP_XTP_RESUME_REPEATCOUNT 64U
+/* Register ROGUE_CR_ISP_XTP_RESUME0 */
+#define ROGUE_CR_ISP_XTP_RESUME0 0x3A00U
+#define ROGUE_CR_ISP_XTP_RESUME0_MASKFULL 0x00000000003FF3FFULL
+#define ROGUE_CR_ISP_XTP_RESUME0_TILE_X_SHIFT 12U
+#define ROGUE_CR_ISP_XTP_RESUME0_TILE_X_CLRMSK 0xFFC00FFFU
+#define ROGUE_CR_ISP_XTP_RESUME0_TILE_Y_SHIFT 0U
+#define ROGUE_CR_ISP_XTP_RESUME0_TILE_Y_CLRMSK 0xFFFFFC00U
+
+/* Register group: ROGUE_CR_ISP_XTP_STORE, with 32 repeats */
+#define ROGUE_CR_ISP_XTP_STORE_REPEATCOUNT 32U
+/* Register ROGUE_CR_ISP_XTP_STORE0 */
+#define ROGUE_CR_ISP_XTP_STORE0 0x3C00U
+#define ROGUE_CR_ISP_XTP_STORE0_MASKFULL 0x000000007F3FF3FFULL
+#define ROGUE_CR_ISP_XTP_STORE0_ACTIVE_SHIFT 30U
+#define ROGUE_CR_ISP_XTP_STORE0_ACTIVE_CLRMSK 0xBFFFFFFFU
+#define ROGUE_CR_ISP_XTP_STORE0_ACTIVE_EN 0x40000000U
+#define ROGUE_CR_ISP_XTP_STORE0_EOR_SHIFT 29U
+#define ROGUE_CR_ISP_XTP_STORE0_EOR_CLRMSK 0xDFFFFFFFU
+#define ROGUE_CR_ISP_XTP_STORE0_EOR_EN 0x20000000U
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_LAST_SHIFT 28U
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_LAST_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_LAST_EN 0x10000000U
+#define ROGUE_CR_ISP_XTP_STORE0_MT_SHIFT 24U
+#define ROGUE_CR_ISP_XTP_STORE0_MT_CLRMSK 0xF0FFFFFFU
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_X_SHIFT 12U
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_X_CLRMSK 0xFFC00FFFU
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_Y_SHIFT 0U
+#define ROGUE_CR_ISP_XTP_STORE0_TILE_Y_CLRMSK 0xFFFFFC00U
+
+/* Register group: ROGUE_CR_BIF_CAT_BASE, with 8 repeats */
+#define ROGUE_CR_BIF_CAT_BASE_REPEATCOUNT 8U
+/* Register ROGUE_CR_BIF_CAT_BASE0 */
+#define ROGUE_CR_BIF_CAT_BASE0 0x1200U
+#define ROGUE_CR_BIF_CAT_BASE0_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE0_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE0_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE0_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE1 */
+#define ROGUE_CR_BIF_CAT_BASE1 0x1208U
+#define ROGUE_CR_BIF_CAT_BASE1_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE1_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE1_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE1_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE2 */
+#define ROGUE_CR_BIF_CAT_BASE2 0x1210U
+#define ROGUE_CR_BIF_CAT_BASE2_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE2_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE2_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE2_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE2_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE3 */
+#define ROGUE_CR_BIF_CAT_BASE3 0x1218U
+#define ROGUE_CR_BIF_CAT_BASE3_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE3_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE3_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE3_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE3_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE4 */
+#define ROGUE_CR_BIF_CAT_BASE4 0x1220U
+#define ROGUE_CR_BIF_CAT_BASE4_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE4_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE4_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE4_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE4_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE5 */
+#define ROGUE_CR_BIF_CAT_BASE5 0x1228U
+#define ROGUE_CR_BIF_CAT_BASE5_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE5_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE5_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE5_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE5_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE6 */
+#define ROGUE_CR_BIF_CAT_BASE6 0x1230U
+#define ROGUE_CR_BIF_CAT_BASE6_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE6_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE6_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE6_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE6_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE7 */
+#define ROGUE_CR_BIF_CAT_BASE7 0x1238U
+#define ROGUE_CR_BIF_CAT_BASE7_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_BIF_CAT_BASE7_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE7_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_CAT_BASE7_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_CAT_BASE7_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_CAT_BASE_INDEX */
+#define ROGUE_CR_BIF_CAT_BASE_INDEX 0x1240U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_MASKFULL 0x00070707073F0707ULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_RVTX_SHIFT 48U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_RVTX_CLRMSK 0xFFF8FFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_RAY_SHIFT 40U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_RAY_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_HOST_SHIFT 32U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_HOST_CLRMSK 0xFFFFFFF8FFFFFFFFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_TLA_SHIFT 24U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_TLA_CLRMSK 0xFFFFFFFFF8FFFFFFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_TDM_SHIFT 19U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_TDM_CLRMSK 0xFFFFFFFFFFC7FFFFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_CDM_SHIFT 16U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_CDM_CLRMSK 0xFFFFFFFFFFF8FFFFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_PIXEL_SHIFT 8U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_PIXEL_CLRMSK 0xFFFFFFFFFFFFF8FFULL
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_TA_SHIFT 0U
+#define ROGUE_CR_BIF_CAT_BASE_INDEX_TA_CLRMSK 0xFFFFFFFFFFFFFFF8ULL
+
+/* Register ROGUE_CR_BIF_PM_CAT_BASE_VCE0 */
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0 0x1248U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_MASKFULL 0x0FFFFFFFFFFFF003ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_INIT_PAGE_SHIFT 40U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_WRAP_SHIFT 1U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_WRAP_EN 0x0000000000000002ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_VALID_SHIFT 0U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE0_VALID_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_BIF_PM_CAT_BASE_TE0 */
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0 0x1250U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_MASKFULL 0x0FFFFFFFFFFFF003ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_INIT_PAGE_SHIFT 40U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_WRAP_SHIFT 1U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_WRAP_EN 0x0000000000000002ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_VALID_SHIFT 0U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE0_VALID_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_BIF_PM_CAT_BASE_ALIST0 */
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0 0x1260U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_MASKFULL 0x0FFFFFFFFFFFF003ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_INIT_PAGE_SHIFT 40U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_WRAP_SHIFT 1U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_WRAP_EN 0x0000000000000002ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_VALID_SHIFT 0U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST0_VALID_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_BIF_PM_CAT_BASE_VCE1 */
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1 0x1268U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_MASKFULL 0x0FFFFFFFFFFFF003ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_INIT_PAGE_SHIFT 40U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_WRAP_SHIFT 1U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_WRAP_EN 0x0000000000000002ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_VALID_SHIFT 0U
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_VCE1_VALID_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_BIF_PM_CAT_BASE_TE1 */
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1 0x1270U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_MASKFULL 0x0FFFFFFFFFFFF003ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_INIT_PAGE_SHIFT 40U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_WRAP_SHIFT 1U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_WRAP_EN 0x0000000000000002ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_VALID_SHIFT 0U
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_TE1_VALID_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_BIF_PM_CAT_BASE_ALIST1 */
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1 0x1280U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_MASKFULL 0x0FFFFFFFFFFFF003ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_INIT_PAGE_SHIFT 40U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_INIT_PAGE_CLRMSK 0xF00000FFFFFFFFFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_ADDR_SHIFT 12U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_WRAP_SHIFT 1U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_WRAP_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_WRAP_EN 0x0000000000000002ULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_VALID_SHIFT 0U
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_VALID_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_BIF_PM_CAT_BASE_ALIST1_VALID_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_BIF_MMU_ENTRY_STATUS */
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS 0x1288U
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_MASKFULL 0x000000FFFFFFF0F3ULL
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_ADDRESS_SHIFT 12U
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_ADDRESS_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_CAT_BASE_SHIFT 4U
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_CAT_BASE_CLRMSK 0xFFFFFFFFFFFFFF0FULL
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_DATA_TYPE_SHIFT 0U
+#define ROGUE_CR_BIF_MMU_ENTRY_STATUS_DATA_TYPE_CLRMSK 0xFFFFFFFFFFFFFFFCULL
+
+/* Register ROGUE_CR_BIF_MMU_ENTRY */
+#define ROGUE_CR_BIF_MMU_ENTRY 0x1290U
+#define ROGUE_CR_BIF_MMU_ENTRY_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_BIF_MMU_ENTRY_ENABLE_SHIFT 1U
+#define ROGUE_CR_BIF_MMU_ENTRY_ENABLE_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BIF_MMU_ENTRY_ENABLE_EN 0x00000002U
+#define ROGUE_CR_BIF_MMU_ENTRY_PENDING_SHIFT 0U
+#define ROGUE_CR_BIF_MMU_ENTRY_PENDING_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_MMU_ENTRY_PENDING_EN 0x00000001U
+
+/* Register ROGUE_CR_BIF_CTRL_INVAL */
+#define ROGUE_CR_BIF_CTRL_INVAL 0x12A0U
+#define ROGUE_CR_BIF_CTRL_INVAL_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_BIF_CTRL_INVAL_TLB1_SHIFT 3U
+#define ROGUE_CR_BIF_CTRL_INVAL_TLB1_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_BIF_CTRL_INVAL_TLB1_EN 0x00000008U
+#define ROGUE_CR_BIF_CTRL_INVAL_PC_SHIFT 2U
+#define ROGUE_CR_BIF_CTRL_INVAL_PC_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_CTRL_INVAL_PC_EN 0x00000004U
+#define ROGUE_CR_BIF_CTRL_INVAL_PD_SHIFT 1U
+#define ROGUE_CR_BIF_CTRL_INVAL_PD_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BIF_CTRL_INVAL_PD_EN 0x00000002U
+#define ROGUE_CR_BIF_CTRL_INVAL_PT_SHIFT 0U
+#define ROGUE_CR_BIF_CTRL_INVAL_PT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_CTRL_INVAL_PT_EN 0x00000001U
+
+/* Register ROGUE_CR_BIF_CTRL */
+#define ROGUE_CR_BIF_CTRL 0x12A8U
+#define ROGUE_CR_BIF_CTRL__XE_MEM__MASKFULL 0x000000000000033FULL
+#define ROGUE_CR_BIF_CTRL_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_CPU_SHIFT 9U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_CPU_CLRMSK 0xFFFFFDFFU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_CPU_EN 0x00000200U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF4_SHIFT 8U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF4_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF4_EN 0x00000100U
+#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_QUEUE_BYPASS_SHIFT 7U
+#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_QUEUE_BYPASS_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_QUEUE_BYPASS_EN 0x00000080U
+#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_AUTO_PREFETCH_SHIFT 6U
+#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_AUTO_PREFETCH_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_BIF_CTRL_ENABLE_MMU_AUTO_PREFETCH_EN 0x00000040U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF3_SHIFT 5U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF3_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF3_EN 0x00000020U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF2_SHIFT 4U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF2_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF2_EN 0x00000010U
+#define ROGUE_CR_BIF_CTRL_PAUSE_BIF1_SHIFT 3U
+#define ROGUE_CR_BIF_CTRL_PAUSE_BIF1_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_BIF_CTRL_PAUSE_BIF1_EN 0x00000008U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_PM_SHIFT 2U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_PM_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_PM_EN 0x00000004U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF1_SHIFT 1U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF1_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF1_EN 0x00000002U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF0_SHIFT 0U
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF0_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_CTRL_PAUSE_MMU_BIF0_EN 0x00000001U
+
+/* Register ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS */
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS 0x12B0U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_MASKFULL 0x000000000000F775ULL
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_SHIFT 12U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_SHIFT 8U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_SHIFT 5U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_SHIFT 4U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_EN 0x00000010U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_SHIFT 0U
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_FAULT_BANK0_MMU_STATUS_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS */
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS 0x12B8U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__MASKFULL 0x001FFFFFFFFFFFF0ULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_MASKFULL 0x0007FFFFFFFFFFF0ULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__RNW_SHIFT 52U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__RNW_CLRMSK 0xFFEFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__RNW_EN 0x0010000000000000ULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_RNW_SHIFT 50U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_RNW_CLRMSK 0xFFFBFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_RNW_EN 0x0004000000000000ULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_SB_SHIFT 46U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_SB_CLRMSK 0xFFF03FFFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_SHIFT 44U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_CLRMSK 0xFFFC0FFFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_SHIFT 40U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_ID_SHIFT 40U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS__XE_MEM__TAG_ID_CLRMSK 0xFFFFC0FFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_SHIFT 4U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U
+#define ROGUE_CR_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS */
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS 0x12C0U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_MASKFULL 0x000000000000F775ULL
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_CAT_BASE_SHIFT 12U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_PAGE_SIZE_SHIFT 8U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_DATA_TYPE_SHIFT 5U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_RO_SHIFT 4U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_RO_EN 0x00000010U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_SHIFT 0U
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_FAULT_BANK1_MMU_STATUS_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS */
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS 0x12C8U
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_MASKFULL 0x0007FFFFFFFFFFF0ULL
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_RNW_SHIFT 50U
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_RNW_CLRMSK 0xFFFBFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_RNW_EN 0x0004000000000000ULL
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_SB_SHIFT 44U
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_SB_CLRMSK 0xFFFC0FFFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_ID_SHIFT 40U
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_SHIFT 4U
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U
+#define ROGUE_CR_BIF_FAULT_BANK1_REQ_STATUS_ADDRESS_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_BIF_MMU_STATUS */
+#define ROGUE_CR_BIF_MMU_STATUS 0x12D0U
+#define ROGUE_CR_BIF_MMU_STATUS__XE_MEM__MASKFULL 0x000000001FFFFFF7ULL
+#define ROGUE_CR_BIF_MMU_STATUS_MASKFULL 0x000000001FFFFFF7ULL
+#define ROGUE_CR_BIF_MMU_STATUS_PM_FAULT_SHIFT 28U
+#define ROGUE_CR_BIF_MMU_STATUS_PM_FAULT_CLRMSK 0xEFFFFFFFU
+#define ROGUE_CR_BIF_MMU_STATUS_PM_FAULT_EN 0x10000000U
+#define ROGUE_CR_BIF_MMU_STATUS_PC_DATA_SHIFT 20U
+#define ROGUE_CR_BIF_MMU_STATUS_PC_DATA_CLRMSK 0xF00FFFFFU
+#define ROGUE_CR_BIF_MMU_STATUS_PD_DATA_SHIFT 12U
+#define ROGUE_CR_BIF_MMU_STATUS_PD_DATA_CLRMSK 0xFFF00FFFU
+#define ROGUE_CR_BIF_MMU_STATUS_PT_DATA_SHIFT 4U
+#define ROGUE_CR_BIF_MMU_STATUS_PT_DATA_CLRMSK 0xFFFFF00FU
+#define ROGUE_CR_BIF_MMU_STATUS_STALLED_SHIFT 2U
+#define ROGUE_CR_BIF_MMU_STATUS_STALLED_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_MMU_STATUS_STALLED_EN 0x00000004U
+#define ROGUE_CR_BIF_MMU_STATUS_PAUSED_SHIFT 1U
+#define ROGUE_CR_BIF_MMU_STATUS_PAUSED_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BIF_MMU_STATUS_PAUSED_EN 0x00000002U
+#define ROGUE_CR_BIF_MMU_STATUS_BUSY_SHIFT 0U
+#define ROGUE_CR_BIF_MMU_STATUS_BUSY_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_MMU_STATUS_BUSY_EN 0x00000001U
+
+/* Register group: ROGUE_CR_BIF_TILING_CFG, with 8 repeats */
+#define ROGUE_CR_BIF_TILING_CFG_REPEATCOUNT 8U
+/* Register ROGUE_CR_BIF_TILING_CFG0 */
+#define ROGUE_CR_BIF_TILING_CFG0 0x12D8U
+#define ROGUE_CR_BIF_TILING_CFG0_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG0_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG0_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG0_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG0_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG0_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG0_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG0_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG1 */
+#define ROGUE_CR_BIF_TILING_CFG1 0x12E0U
+#define ROGUE_CR_BIF_TILING_CFG1_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG1_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG1_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG1_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG1_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG1_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG1_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG1_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG2 */
+#define ROGUE_CR_BIF_TILING_CFG2 0x12E8U
+#define ROGUE_CR_BIF_TILING_CFG2_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG2_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG2_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG2_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG2_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG2_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG2_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG2_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG3 */
+#define ROGUE_CR_BIF_TILING_CFG3 0x12F0U
+#define ROGUE_CR_BIF_TILING_CFG3_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG3_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG3_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG3_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG3_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG3_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG3_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG3_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG4 */
+#define ROGUE_CR_BIF_TILING_CFG4 0x12F8U
+#define ROGUE_CR_BIF_TILING_CFG4_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG4_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG4_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG4_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG4_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG4_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG4_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG4_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG5 */
+#define ROGUE_CR_BIF_TILING_CFG5 0x1300U
+#define ROGUE_CR_BIF_TILING_CFG5_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG5_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG5_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG5_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG5_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG5_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG5_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG5_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG6 */
+#define ROGUE_CR_BIF_TILING_CFG6 0x1308U
+#define ROGUE_CR_BIF_TILING_CFG6_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG6_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG6_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG6_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG6_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG6_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG6_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG6_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_TILING_CFG7 */
+#define ROGUE_CR_BIF_TILING_CFG7 0x1310U
+#define ROGUE_CR_BIF_TILING_CFG7_MASKFULL 0xFFFFFFFF0FFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG7_XSTRIDE_SHIFT 61U
+#define ROGUE_CR_BIF_TILING_CFG7_XSTRIDE_CLRMSK 0x1FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG7_ENABLE_SHIFT 60U
+#define ROGUE_CR_BIF_TILING_CFG7_ENABLE_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG7_ENABLE_EN 0x1000000000000000ULL
+#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_SHIFT 32U
+#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_CLRMSK 0xF0000000FFFFFFFFULL
+#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG7_MAX_ADDRESS_ALIGNSIZE 4096U
+#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_SHIFT 0U
+#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_CLRMSK 0xFFFFFFFFF0000000ULL
+#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_ALIGNSHIFT 12U
+#define ROGUE_CR_BIF_TILING_CFG7_MIN_ADDRESS_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_BIF_READS_EXT_STATUS */
+#define ROGUE_CR_BIF_READS_EXT_STATUS 0x1320U
+#define ROGUE_CR_BIF_READS_EXT_STATUS_MASKFULL 0x000000000FFFFFFFULL
+#define ROGUE_CR_BIF_READS_EXT_STATUS_MMU_SHIFT 16U
+#define ROGUE_CR_BIF_READS_EXT_STATUS_MMU_CLRMSK 0xF000FFFFU
+#define ROGUE_CR_BIF_READS_EXT_STATUS_BANK1_SHIFT 0U
+#define ROGUE_CR_BIF_READS_EXT_STATUS_BANK1_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_BIF_READS_INT_STATUS */
+#define ROGUE_CR_BIF_READS_INT_STATUS 0x1328U
+#define ROGUE_CR_BIF_READS_INT_STATUS_MASKFULL 0x0000000007FFFFFFULL
+#define ROGUE_CR_BIF_READS_INT_STATUS_MMU_SHIFT 16U
+#define ROGUE_CR_BIF_READS_INT_STATUS_MMU_CLRMSK 0xF800FFFFU
+#define ROGUE_CR_BIF_READS_INT_STATUS_BANK1_SHIFT 0U
+#define ROGUE_CR_BIF_READS_INT_STATUS_BANK1_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_BIFPM_READS_INT_STATUS */
+#define ROGUE_CR_BIFPM_READS_INT_STATUS 0x1330U
+#define ROGUE_CR_BIFPM_READS_INT_STATUS_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_BIFPM_READS_INT_STATUS_BANK0_SHIFT 0U
+#define ROGUE_CR_BIFPM_READS_INT_STATUS_BANK0_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_BIFPM_READS_EXT_STATUS */
+#define ROGUE_CR_BIFPM_READS_EXT_STATUS 0x1338U
+#define ROGUE_CR_BIFPM_READS_EXT_STATUS_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_BIFPM_READS_EXT_STATUS_BANK0_SHIFT 0U
+#define ROGUE_CR_BIFPM_READS_EXT_STATUS_BANK0_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_BIFPM_STATUS_MMU */
+#define ROGUE_CR_BIFPM_STATUS_MMU 0x1350U
+#define ROGUE_CR_BIFPM_STATUS_MMU_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_BIFPM_STATUS_MMU_REQUESTS_SHIFT 0U
+#define ROGUE_CR_BIFPM_STATUS_MMU_REQUESTS_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_BIF_STATUS_MMU */
+#define ROGUE_CR_BIF_STATUS_MMU 0x1358U
+#define ROGUE_CR_BIF_STATUS_MMU_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_BIF_STATUS_MMU_REQUESTS_SHIFT 0U
+#define ROGUE_CR_BIF_STATUS_MMU_REQUESTS_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_BIF_FAULT_READ */
+#define ROGUE_CR_BIF_FAULT_READ 0x13E0U
+#define ROGUE_CR_BIF_FAULT_READ_MASKFULL 0x000000FFFFFFFFF0ULL
+#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_SHIFT 4U
+#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_CLRMSK 0xFFFFFF000000000FULL
+#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_ALIGNSHIFT 4U
+#define ROGUE_CR_BIF_FAULT_READ_ADDRESS_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS */
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS 0x1430U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_MASKFULL 0x000000000000F775ULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_SHIFT 12U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_SHIFT 8U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_SHIFT 5U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_SHIFT 4U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_RO_EN 0x00000010U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_SHIFT 0U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_MMU_STATUS_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS */
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS 0x1438U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_MASKFULL 0x0007FFFFFFFFFFF0ULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_RNW_SHIFT 50U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_RNW_CLRMSK 0xFFFBFFFFFFFFFFFFULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_RNW_EN 0x0004000000000000ULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_SHIFT 44U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_SB_CLRMSK 0xFFFC0FFFFFFFFFFFULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_SHIFT 40U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_SHIFT 4U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U
+#define ROGUE_CR_TEXAS_BIF_FAULT_BANK0_REQ_STATUS_ADDRESS_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_MCU_FENCE */
+#define ROGUE_CR_MCU_FENCE 0x1740U
+#define ROGUE_CR_MCU_FENCE_MASKFULL 0x000007FFFFFFFFE0ULL
+#define ROGUE_CR_MCU_FENCE_DM_SHIFT 40U
+#define ROGUE_CR_MCU_FENCE_DM_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_MCU_FENCE_DM_VERTEX 0x0000000000000000ULL
+#define ROGUE_CR_MCU_FENCE_DM_PIXEL 0x0000010000000000ULL
+#define ROGUE_CR_MCU_FENCE_DM_COMPUTE 0x0000020000000000ULL
+#define ROGUE_CR_MCU_FENCE_DM_RAY_VERTEX 0x0000030000000000ULL
+#define ROGUE_CR_MCU_FENCE_DM_RAY 0x0000040000000000ULL
+#define ROGUE_CR_MCU_FENCE_DM_FASTRENDER 0x0000050000000000ULL
+#define ROGUE_CR_MCU_FENCE_ADDR_SHIFT 5U
+#define ROGUE_CR_MCU_FENCE_ADDR_CLRMSK 0xFFFFFF000000001FULL
+#define ROGUE_CR_MCU_FENCE_ADDR_ALIGNSHIFT 5U
+#define ROGUE_CR_MCU_FENCE_ADDR_ALIGNSIZE 32U
+
+/* Register group: ROGUE_CR_SCRATCH, with 16 repeats */
+#define ROGUE_CR_SCRATCH_REPEATCOUNT 16U
+/* Register ROGUE_CR_SCRATCH0 */
+#define ROGUE_CR_SCRATCH0 0x1A00U
+#define ROGUE_CR_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH1 */
+#define ROGUE_CR_SCRATCH1 0x1A08U
+#define ROGUE_CR_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH2 */
+#define ROGUE_CR_SCRATCH2 0x1A10U
+#define ROGUE_CR_SCRATCH2_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH2_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH3 */
+#define ROGUE_CR_SCRATCH3 0x1A18U
+#define ROGUE_CR_SCRATCH3_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH3_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH4 */
+#define ROGUE_CR_SCRATCH4 0x1A20U
+#define ROGUE_CR_SCRATCH4_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH4_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH4_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH5 */
+#define ROGUE_CR_SCRATCH5 0x1A28U
+#define ROGUE_CR_SCRATCH5_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH5_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH5_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH6 */
+#define ROGUE_CR_SCRATCH6 0x1A30U
+#define ROGUE_CR_SCRATCH6_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH6_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH6_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH7 */
+#define ROGUE_CR_SCRATCH7 0x1A38U
+#define ROGUE_CR_SCRATCH7_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH7_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH7_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH8 */
+#define ROGUE_CR_SCRATCH8 0x1A40U
+#define ROGUE_CR_SCRATCH8_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH8_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH8_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH9 */
+#define ROGUE_CR_SCRATCH9 0x1A48U
+#define ROGUE_CR_SCRATCH9_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH9_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH9_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH10 */
+#define ROGUE_CR_SCRATCH10 0x1A50U
+#define ROGUE_CR_SCRATCH10_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH10_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH10_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH11 */
+#define ROGUE_CR_SCRATCH11 0x1A58U
+#define ROGUE_CR_SCRATCH11_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH11_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH11_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH12 */
+#define ROGUE_CR_SCRATCH12 0x1A60U
+#define ROGUE_CR_SCRATCH12_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH12_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH12_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH13 */
+#define ROGUE_CR_SCRATCH13 0x1A68U
+#define ROGUE_CR_SCRATCH13_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH13_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH13_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH14 */
+#define ROGUE_CR_SCRATCH14 0x1A70U
+#define ROGUE_CR_SCRATCH14_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH14_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH14_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SCRATCH15 */
+#define ROGUE_CR_SCRATCH15 0x1A78U
+#define ROGUE_CR_SCRATCH15_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SCRATCH15_DATA_SHIFT 0U
+#define ROGUE_CR_SCRATCH15_DATA_CLRMSK 0x00000000U
+
+/* Register group: ROGUE_CR_OS0_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS0_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS0_SCRATCH0 */
+#define ROGUE_CR_OS0_SCRATCH0 0x1A80U
+#define ROGUE_CR_OS0_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS0_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS0_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS0_SCRATCH1 */
+#define ROGUE_CR_OS0_SCRATCH1 0x1A88U
+#define ROGUE_CR_OS0_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS0_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS0_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS0_SCRATCH2 */
+#define ROGUE_CR_OS0_SCRATCH2 0x1A90U
+#define ROGUE_CR_OS0_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS0_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS0_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS0_SCRATCH3 */
+#define ROGUE_CR_OS0_SCRATCH3 0x1A98U
+#define ROGUE_CR_OS0_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS0_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS0_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS1_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS1_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS1_SCRATCH0 */
+#define ROGUE_CR_OS1_SCRATCH0 0x11A80U
+#define ROGUE_CR_OS1_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS1_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS1_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS1_SCRATCH1 */
+#define ROGUE_CR_OS1_SCRATCH1 0x11A88U
+#define ROGUE_CR_OS1_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS1_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS1_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS1_SCRATCH2 */
+#define ROGUE_CR_OS1_SCRATCH2 0x11A90U
+#define ROGUE_CR_OS1_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS1_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS1_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS1_SCRATCH3 */
+#define ROGUE_CR_OS1_SCRATCH3 0x11A98U
+#define ROGUE_CR_OS1_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS1_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS1_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS2_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS2_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS2_SCRATCH0 */
+#define ROGUE_CR_OS2_SCRATCH0 0x21A80U
+#define ROGUE_CR_OS2_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS2_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS2_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS2_SCRATCH1 */
+#define ROGUE_CR_OS2_SCRATCH1 0x21A88U
+#define ROGUE_CR_OS2_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS2_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS2_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS2_SCRATCH2 */
+#define ROGUE_CR_OS2_SCRATCH2 0x21A90U
+#define ROGUE_CR_OS2_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS2_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS2_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS2_SCRATCH3 */
+#define ROGUE_CR_OS2_SCRATCH3 0x21A98U
+#define ROGUE_CR_OS2_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS2_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS2_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS3_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS3_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS3_SCRATCH0 */
+#define ROGUE_CR_OS3_SCRATCH0 0x31A80U
+#define ROGUE_CR_OS3_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS3_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS3_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS3_SCRATCH1 */
+#define ROGUE_CR_OS3_SCRATCH1 0x31A88U
+#define ROGUE_CR_OS3_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS3_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS3_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS3_SCRATCH2 */
+#define ROGUE_CR_OS3_SCRATCH2 0x31A90U
+#define ROGUE_CR_OS3_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS3_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS3_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS3_SCRATCH3 */
+#define ROGUE_CR_OS3_SCRATCH3 0x31A98U
+#define ROGUE_CR_OS3_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS3_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS3_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS4_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS4_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS4_SCRATCH0 */
+#define ROGUE_CR_OS4_SCRATCH0 0x41A80U
+#define ROGUE_CR_OS4_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS4_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS4_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS4_SCRATCH1 */
+#define ROGUE_CR_OS4_SCRATCH1 0x41A88U
+#define ROGUE_CR_OS4_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS4_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS4_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS4_SCRATCH2 */
+#define ROGUE_CR_OS4_SCRATCH2 0x41A90U
+#define ROGUE_CR_OS4_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS4_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS4_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS4_SCRATCH3 */
+#define ROGUE_CR_OS4_SCRATCH3 0x41A98U
+#define ROGUE_CR_OS4_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS4_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS4_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS5_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS5_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS5_SCRATCH0 */
+#define ROGUE_CR_OS5_SCRATCH0 0x51A80U
+#define ROGUE_CR_OS5_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS5_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS5_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS5_SCRATCH1 */
+#define ROGUE_CR_OS5_SCRATCH1 0x51A88U
+#define ROGUE_CR_OS5_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS5_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS5_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS5_SCRATCH2 */
+#define ROGUE_CR_OS5_SCRATCH2 0x51A90U
+#define ROGUE_CR_OS5_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS5_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS5_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS5_SCRATCH3 */
+#define ROGUE_CR_OS5_SCRATCH3 0x51A98U
+#define ROGUE_CR_OS5_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS5_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS5_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS6_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS6_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS6_SCRATCH0 */
+#define ROGUE_CR_OS6_SCRATCH0 0x61A80U
+#define ROGUE_CR_OS6_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS6_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS6_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS6_SCRATCH1 */
+#define ROGUE_CR_OS6_SCRATCH1 0x61A88U
+#define ROGUE_CR_OS6_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS6_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS6_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS6_SCRATCH2 */
+#define ROGUE_CR_OS6_SCRATCH2 0x61A90U
+#define ROGUE_CR_OS6_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS6_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS6_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS6_SCRATCH3 */
+#define ROGUE_CR_OS6_SCRATCH3 0x61A98U
+#define ROGUE_CR_OS6_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS6_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS6_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register group: ROGUE_CR_OS7_SCRATCH, with 2 repeats */
+#define ROGUE_CR_OS7_SCRATCH_REPEATCOUNT 2U
+/* Register ROGUE_CR_OS7_SCRATCH0 */
+#define ROGUE_CR_OS7_SCRATCH0 0x71A80U
+#define ROGUE_CR_OS7_SCRATCH0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS7_SCRATCH0_DATA_SHIFT 0U
+#define ROGUE_CR_OS7_SCRATCH0_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS7_SCRATCH1 */
+#define ROGUE_CR_OS7_SCRATCH1 0x71A88U
+#define ROGUE_CR_OS7_SCRATCH1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_OS7_SCRATCH1_DATA_SHIFT 0U
+#define ROGUE_CR_OS7_SCRATCH1_DATA_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OS7_SCRATCH2 */
+#define ROGUE_CR_OS7_SCRATCH2 0x71A90U
+#define ROGUE_CR_OS7_SCRATCH2_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS7_SCRATCH2_DATA_SHIFT 0U
+#define ROGUE_CR_OS7_SCRATCH2_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_OS7_SCRATCH3 */
+#define ROGUE_CR_OS7_SCRATCH3 0x71A98U
+#define ROGUE_CR_OS7_SCRATCH3_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_OS7_SCRATCH3_DATA_SHIFT 0U
+#define ROGUE_CR_OS7_SCRATCH3_DATA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_SPFILTER_SIGNAL_DESCR */
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR 0x2700U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_SHIFT 0U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_CLRMSK 0xFFFF0000U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_ALIGNSHIFT 4U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_SIZE_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN */
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN 0x2708U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_MASKFULL 0x000000FFFFFFFFF0ULL
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_SHIFT 4U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_CLRMSK 0xFFFFFF000000000FULL
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_ALIGNSHIFT 4U
+#define ROGUE_CR_SPFILTER_SIGNAL_DESCR_MIN_ADDR_ALIGNSIZE 16U
+
+/* Register group: ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG, with 16 repeats */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG_REPEATCOUNT 16U
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0 0x3000U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1 0x3008U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG1_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2 0x3010U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG2_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3 0x3018U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG3_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4 0x3020U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG4_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5 0x3028U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG5_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6 0x3030U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG6_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7 0x3038U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG7_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8 0x3040U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG8_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9 0x3048U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG9_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10 0x3050U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG10_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11 0x3058U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG11_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12 0x3060U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG12_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13 0x3068U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG13_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14 0x3070U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG14_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15 */
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15 0x3078U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_MASKFULL 0x7FFFF7FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_TRUSTED_SHIFT 62U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_TRUSTED_CLRMSK 0xBFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_TRUSTED_EN 0x4000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_LOAD_STORE_EN_SHIFT 61U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_LOAD_STORE_EN_CLRMSK 0xDFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_LOAD_STORE_EN_EN 0x2000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_FETCH_EN_SHIFT 60U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_FETCH_EN_CLRMSK 0xEFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_FETCH_EN_EN 0x1000000000000000ULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_SIZE_SHIFT 44U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_SIZE_CLRMSK 0xF0000FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_CBASE_SHIFT 40U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_CBASE_CLRMSK 0xFFFFF8FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG15_DEVVADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_BOOT */
+#define ROGUE_CR_FWCORE_BOOT 0x3090U
+#define ROGUE_CR_FWCORE_BOOT_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_FWCORE_BOOT_ENABLE_SHIFT 0U
+#define ROGUE_CR_FWCORE_BOOT_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_BOOT_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_RESET_ADDR */
+#define ROGUE_CR_FWCORE_RESET_ADDR 0x3098U
+#define ROGUE_CR_FWCORE_RESET_ADDR_MASKFULL 0x00000000FFFFFFFEULL
+#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_SHIFT 1U
+#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_CLRMSK 0x00000001U
+#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_ALIGNSHIFT 1U
+#define ROGUE_CR_FWCORE_RESET_ADDR_ADDR_ALIGNSIZE 2U
+
+/* Register ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR */
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR 0x30A0U
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_MASKFULL 0x00000000FFFFFFFEULL
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_SHIFT 1U
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_CLRMSK 0x00000001U
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_ALIGNSHIFT 1U
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_ADDR_ADDR_ALIGNSIZE 2U
+
+/* Register ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT */
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT 0x30A8U
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_TRIGGER_EN_SHIFT 0U
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_TRIGGER_EN_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_WRAPPER_NMI_EVENT_TRIGGER_EN_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS */
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS 0x30B0U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_MASKFULL 0x000000000000F771ULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_CAT_BASE_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_PAGE_SIZE_SHIFT 8U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_DATA_TYPE_SHIFT 5U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_RO_SHIFT 4U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_RO_EN 0x00000010U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_SHIFT 0U
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_MEM_FAULT_MMU_STATUS_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS */
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS 0x30B8U
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_MASKFULL 0x001FFFFFFFFFFFF0ULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_RNW_SHIFT 52U
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_RNW_CLRMSK 0xFFEFFFFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_RNW_EN 0x0010000000000000ULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_SB_SHIFT 46U
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_SB_CLRMSK 0xFFF03FFFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_ID_SHIFT 40U
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFC0FFFFFFFFFFULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_SHIFT 4U
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U
+#define ROGUE_CR_FWCORE_MEM_FAULT_REQ_STATUS_ADDRESS_ALIGNSIZE 16U
+
+/* Register ROGUE_CR_FWCORE_MEM_CTRL_INVAL */
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL 0x30C0U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_TLB_SHIFT 3U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_TLB_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_TLB_EN 0x00000008U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PC_SHIFT 2U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PC_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PC_EN 0x00000004U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PD_SHIFT 1U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PD_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PD_EN 0x00000002U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PT_SHIFT 0U
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_MEM_CTRL_INVAL_PT_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_MEM_MMU_STATUS */
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS 0x30C8U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_MASKFULL 0x000000000FFFFFF7ULL
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PC_DATA_SHIFT 20U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PC_DATA_CLRMSK 0xF00FFFFFU
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PD_DATA_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PD_DATA_CLRMSK 0xFFF00FFFU
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PT_DATA_SHIFT 4U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PT_DATA_CLRMSK 0xFFFFF00FU
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_STALLED_SHIFT 2U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_STALLED_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_STALLED_EN 0x00000004U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PAUSED_SHIFT 1U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PAUSED_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_PAUSED_EN 0x00000002U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_BUSY_SHIFT 0U
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_BUSY_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_MEM_MMU_STATUS_BUSY_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS */
+#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS 0x30D8U
+#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS_MASKFULL 0x0000000000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS_MMU_SHIFT 0U
+#define ROGUE_CR_FWCORE_MEM_READS_EXT_STATUS_MMU_CLRMSK 0xFFFFF000U
+
+/* Register ROGUE_CR_FWCORE_MEM_READS_INT_STATUS */
+#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS 0x30E0U
+#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS_MASKFULL 0x00000000000007FFULL
+#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS_MMU_SHIFT 0U
+#define ROGUE_CR_FWCORE_MEM_READS_INT_STATUS_MMU_CLRMSK 0xFFFFF800U
+
+/* Register ROGUE_CR_FWCORE_WRAPPER_FENCE */
+#define ROGUE_CR_FWCORE_WRAPPER_FENCE 0x30E8U
+#define ROGUE_CR_FWCORE_WRAPPER_FENCE_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_FWCORE_WRAPPER_FENCE_ID_SHIFT 0U
+#define ROGUE_CR_FWCORE_WRAPPER_FENCE_ID_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_WRAPPER_FENCE_ID_EN 0x00000001U
+
+/* Register group: ROGUE_CR_FWCORE_MEM_CAT_BASE, with 8 repeats */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE_REPEATCOUNT 8U
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE0 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE0 0x30F0U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE0_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE1 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE1 0x30F8U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE1_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE2 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE2 0x3100U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE2_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE3 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE3 0x3108U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE3_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE4 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE4 0x3110U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE4_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE5 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE5 0x3118U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE5_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE6 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE6 0x3120U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE6_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_MEM_CAT_BASE7 */
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE7 0x3128U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_MASKFULL 0x000000FFFFFFF000ULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_SHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_CLRMSK 0xFFFFFF0000000FFFULL
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_FWCORE_MEM_CAT_BASE7_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_FWCORE_WDT_RESET */
+#define ROGUE_CR_FWCORE_WDT_RESET 0x3130U
+#define ROGUE_CR_FWCORE_WDT_RESET_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_FWCORE_WDT_RESET_EN_SHIFT 0U
+#define ROGUE_CR_FWCORE_WDT_RESET_EN_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_WDT_RESET_EN_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_WDT_CTRL */
+#define ROGUE_CR_FWCORE_WDT_CTRL 0x3138U
+#define ROGUE_CR_FWCORE_WDT_CTRL_MASKFULL 0x00000000FFFF1F01ULL
+#define ROGUE_CR_FWCORE_WDT_CTRL_PROT_SHIFT 16U
+#define ROGUE_CR_FWCORE_WDT_CTRL_PROT_CLRMSK 0x0000FFFFU
+#define ROGUE_CR_FWCORE_WDT_CTRL_THRESHOLD_SHIFT 8U
+#define ROGUE_CR_FWCORE_WDT_CTRL_THRESHOLD_CLRMSK 0xFFFFE0FFU
+#define ROGUE_CR_FWCORE_WDT_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_FWCORE_WDT_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_FWCORE_WDT_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_FWCORE_WDT_COUNT */
+#define ROGUE_CR_FWCORE_WDT_COUNT 0x3140U
+#define ROGUE_CR_FWCORE_WDT_COUNT_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_FWCORE_WDT_COUNT_VALUE_SHIFT 0U
+#define ROGUE_CR_FWCORE_WDT_COUNT_VALUE_CLRMSK 0x00000000U
+
+/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED0, with 4 repeats */
+#define ROGUE_CR_FWCORE_DMI_RESERVED0_REPEATCOUNT 4U
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED00 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED00 0x3400U
+#define ROGUE_CR_FWCORE_DMI_RESERVED00_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED01 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED01 0x3408U
+#define ROGUE_CR_FWCORE_DMI_RESERVED01_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED02 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED02 0x3410U
+#define ROGUE_CR_FWCORE_DMI_RESERVED02_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED03 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED03 0x3418U
+#define ROGUE_CR_FWCORE_DMI_RESERVED03_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_DATA0 */
+#define ROGUE_CR_FWCORE_DMI_DATA0 0x3420U
+#define ROGUE_CR_FWCORE_DMI_DATA0_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_DATA1 */
+#define ROGUE_CR_FWCORE_DMI_DATA1 0x3428U
+#define ROGUE_CR_FWCORE_DMI_DATA1_MASKFULL 0x0000000000000000ULL
+
+/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED1, with 5 repeats */
+#define ROGUE_CR_FWCORE_DMI_RESERVED1_REPEATCOUNT 5U
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED10 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED10 0x3430U
+#define ROGUE_CR_FWCORE_DMI_RESERVED10_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED11 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED11 0x3438U
+#define ROGUE_CR_FWCORE_DMI_RESERVED11_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED12 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED12 0x3440U
+#define ROGUE_CR_FWCORE_DMI_RESERVED12_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED13 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED13 0x3448U
+#define ROGUE_CR_FWCORE_DMI_RESERVED13_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED14 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED14 0x3450U
+#define ROGUE_CR_FWCORE_DMI_RESERVED14_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_DMCONTROL */
+#define ROGUE_CR_FWCORE_DMI_DMCONTROL 0x3480U
+#define ROGUE_CR_FWCORE_DMI_DMCONTROL_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_DMSTATUS */
+#define ROGUE_CR_FWCORE_DMI_DMSTATUS 0x3488U
+#define ROGUE_CR_FWCORE_DMI_DMSTATUS_MASKFULL 0x0000000000000000ULL
+
+/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED2, with 4 repeats */
+#define ROGUE_CR_FWCORE_DMI_RESERVED2_REPEATCOUNT 4U
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED20 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED20 0x3490U
+#define ROGUE_CR_FWCORE_DMI_RESERVED20_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED21 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED21 0x3498U
+#define ROGUE_CR_FWCORE_DMI_RESERVED21_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED22 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED22 0x34A0U
+#define ROGUE_CR_FWCORE_DMI_RESERVED22_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED23 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED23 0x34A8U
+#define ROGUE_CR_FWCORE_DMI_RESERVED23_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_ABSTRACTCS */
+#define ROGUE_CR_FWCORE_DMI_ABSTRACTCS 0x34B0U
+#define ROGUE_CR_FWCORE_DMI_ABSTRACTCS_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_COMMAND */
+#define ROGUE_CR_FWCORE_DMI_COMMAND 0x34B8U
+#define ROGUE_CR_FWCORE_DMI_COMMAND_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_SBCS */
+#define ROGUE_CR_FWCORE_DMI_SBCS 0x35C0U
+#define ROGUE_CR_FWCORE_DMI_SBCS_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_SBADDRESS0 */
+#define ROGUE_CR_FWCORE_DMI_SBADDRESS0 0x35C8U
+#define ROGUE_CR_FWCORE_DMI_SBADDRESS0_MASKFULL 0x0000000000000000ULL
+
+/* Register group: ROGUE_CR_FWCORE_DMI_RESERVED3, with 2 repeats */
+#define ROGUE_CR_FWCORE_DMI_RESERVED3_REPEATCOUNT 2U
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED30 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED30 0x34D0U
+#define ROGUE_CR_FWCORE_DMI_RESERVED30_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_RESERVED31 */
+#define ROGUE_CR_FWCORE_DMI_RESERVED31 0x34D8U
+#define ROGUE_CR_FWCORE_DMI_RESERVED31_MASKFULL 0x0000000000000000ULL
+
+/* Register group: ROGUE_CR_FWCORE_DMI_SBDATA, with 4 repeats */
+#define ROGUE_CR_FWCORE_DMI_SBDATA_REPEATCOUNT 4U
+/* Register ROGUE_CR_FWCORE_DMI_SBDATA0 */
+#define ROGUE_CR_FWCORE_DMI_SBDATA0 0x35E0U
+#define ROGUE_CR_FWCORE_DMI_SBDATA0_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_SBDATA1 */
+#define ROGUE_CR_FWCORE_DMI_SBDATA1 0x35E8U
+#define ROGUE_CR_FWCORE_DMI_SBDATA1_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_SBDATA2 */
+#define ROGUE_CR_FWCORE_DMI_SBDATA2 0x35F0U
+#define ROGUE_CR_FWCORE_DMI_SBDATA2_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_SBDATA3 */
+#define ROGUE_CR_FWCORE_DMI_SBDATA3 0x35F8U
+#define ROGUE_CR_FWCORE_DMI_SBDATA3_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_FWCORE_DMI_HALTSUM0 */
+#define ROGUE_CR_FWCORE_DMI_HALTSUM0 0x3600U
+#define ROGUE_CR_FWCORE_DMI_HALTSUM0_MASKFULL 0x0000000000000000ULL
+
+/* Register ROGUE_CR_SLC_CTRL_MISC */
+#define ROGUE_CR_SLC_CTRL_MISC 0x3800U
+#define ROGUE_CR_SLC_CTRL_MISC_MASKFULL 0xFFFFFFFF01FF010FULL
+#define ROGUE_CR_SLC_CTRL_MISC_SCRAMBLE_BITS_SHIFT 32U
+#define ROGUE_CR_SLC_CTRL_MISC_SCRAMBLE_BITS_CLRMSK 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_SHIFT 24U
+#define ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_CLRMSK 0xFFFFFFFFFEFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_EN 0x0000000001000000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_SHIFT 16U
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_CLRMSK 0xFFFFFFFFFF00FFFFULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_INTERLEAVED_64_BYTE 0x0000000000000000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_INTERLEAVED_128_BYTE 0x0000000000010000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_SIMPLE_HASH1 0x0000000000100000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_SIMPLE_HASH2 0x0000000000110000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_PVR_HASH1 0x0000000000200000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_PVR_HASH2_SCRAMBLE 0x0000000000210000ULL
+#define ROGUE_CR_SLC_CTRL_MISC_PAUSE_SHIFT 8U
+#define ROGUE_CR_SLC_CTRL_MISC_PAUSE_CLRMSK 0xFFFFFFFFFFFFFEFFULL
+#define ROGUE_CR_SLC_CTRL_MISC_PAUSE_EN 0x0000000000000100ULL
+#define ROGUE_CR_SLC_CTRL_MISC_RESP_PRIORITY_SHIFT 3U
+#define ROGUE_CR_SLC_CTRL_MISC_RESP_PRIORITY_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_SLC_CTRL_MISC_RESP_PRIORITY_EN 0x0000000000000008ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_LINE_USE_LIMIT_SHIFT 2U
+#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_LINE_USE_LIMIT_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_LINE_USE_LIMIT_EN 0x0000000000000004ULL
+#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_SHIFT 1U
+#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_EN 0x0000000000000002ULL
+#define ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_SHIFT 0U
+#define ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_SLC_CTRL_FLUSH_INVAL */
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL 0x3818U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_MASKFULL 0x0000000080000FFFULL
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_LAZY_SHIFT 31U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_LAZY_CLRMSK 0x7FFFFFFFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_LAZY_EN 0x80000000U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FASTRENDER_SHIFT 11U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FASTRENDER_CLRMSK 0xFFFFF7FFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FASTRENDER_EN 0x00000800U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_VERTEX_SHIFT 10U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_VERTEX_CLRMSK 0xFFFFFBFFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_VERTEX_EN 0x00000400U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_SHIFT 9U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_CLRMSK 0xFFFFFDFFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_RAY_EN 0x00000200U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FRC_SHIFT 8U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FRC_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_FRC_EN 0x00000100U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXE_SHIFT 7U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXE_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXE_EN 0x00000080U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXD_SHIFT 6U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXD_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_VXD_EN 0x00000040U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_HOST_META_SHIFT 5U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_HOST_META_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_HOST_META_EN 0x00000020U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_MMU_SHIFT 4U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_MMU_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_MMU_EN 0x00000010U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_COMPUTE_SHIFT 3U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_COMPUTE_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_COMPUTE_EN 0x00000008U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_PIXEL_SHIFT 2U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_PIXEL_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_PIXEL_EN 0x00000004U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_TA_SHIFT 1U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_TA_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_DM_TA_EN 0x00000002U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_ALL_SHIFT 0U
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_ALL_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SLC_CTRL_FLUSH_INVAL_ALL_EN 0x00000001U
+
+/* Register ROGUE_CR_SLC_STATUS0 */
+#define ROGUE_CR_SLC_STATUS0 0x3820U
+#define ROGUE_CR_SLC_STATUS0_MASKFULL 0x0000000000000007ULL
+#define ROGUE_CR_SLC_STATUS0_FLUSH_INVAL_PENDING_SHIFT 2U
+#define ROGUE_CR_SLC_STATUS0_FLUSH_INVAL_PENDING_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SLC_STATUS0_FLUSH_INVAL_PENDING_EN 0x00000004U
+#define ROGUE_CR_SLC_STATUS0_INVAL_PENDING_SHIFT 1U
+#define ROGUE_CR_SLC_STATUS0_INVAL_PENDING_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SLC_STATUS0_INVAL_PENDING_EN 0x00000002U
+#define ROGUE_CR_SLC_STATUS0_FLUSH_PENDING_SHIFT 0U
+#define ROGUE_CR_SLC_STATUS0_FLUSH_PENDING_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SLC_STATUS0_FLUSH_PENDING_EN 0x00000001U
+
+/* Register ROGUE_CR_SLC_CTRL_BYPASS */
+#define ROGUE_CR_SLC_CTRL_BYPASS 0x3828U
+#define ROGUE_CR_SLC_CTRL_BYPASS__XE_MEM__MASKFULL 0x0FFFFFFFFFFF7FFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_MASKFULL 0x000000000FFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_ZLS_SHIFT 59U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_ZLS_CLRMSK 0xF7FFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_ZLS_EN 0x0800000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_HEADER_SHIFT 58U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_HEADER_CLRMSK 0xFBFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_HEADER_EN 0x0400000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_HEADER_SHIFT 57U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_HEADER_CLRMSK 0xFDFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_HEADER_EN 0x0200000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_DATA_SHIFT 56U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_DATA_CLRMSK 0xFEFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_ZLS_DATA_EN 0x0100000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_DATA_SHIFT 55U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_DATA_CLRMSK 0xFF7FFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_DECOMP_TCU_DATA_EN 0x0080000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_PBE_SHIFT 54U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_PBE_CLRMSK 0xFFBFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TFBC_COMP_PBE_EN 0x0040000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_DM_COMPUTE_SHIFT 53U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_DM_COMPUTE_CLRMSK 0xFFDFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_DM_COMPUTE_EN 0x0020000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_PDSRW_NOLINEFILL_SHIFT 52U
+#define ROGUE_CR_SLC_CTRL_BYPASS_PDSRW_NOLINEFILL_CLRMSK 0xFFEFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_PDSRW_NOLINEFILL_EN 0x0010000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_PBE_NOLINEFILL_SHIFT 51U
+#define ROGUE_CR_SLC_CTRL_BYPASS_PBE_NOLINEFILL_CLRMSK 0xFFF7FFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_PBE_NOLINEFILL_EN 0x0008000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBC_SHIFT 50U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBC_CLRMSK 0xFFFBFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBC_EN 0x0004000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_RREQ_SHIFT 49U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_RREQ_CLRMSK 0xFFFDFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_RREQ_EN 0x0002000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CREQ_SHIFT 48U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CREQ_CLRMSK 0xFFFEFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CREQ_EN 0x0001000000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_PREQ_SHIFT 47U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_PREQ_CLRMSK 0xFFFF7FFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_PREQ_EN 0x0000800000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_DBSC_SHIFT 46U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_DBSC_CLRMSK 0xFFFFBFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_DBSC_EN 0x0000400000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_SHIFT 45U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_CLRMSK 0xFFFFDFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TCU_EN 0x0000200000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PBE_SHIFT 44U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PBE_CLRMSK 0xFFFFEFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PBE_EN 0x0000100000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_ISP_SHIFT 43U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_ISP_CLRMSK 0xFFFFF7FFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_ISP_EN 0x0000080000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PM_SHIFT 42U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PM_CLRMSK 0xFFFFFBFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PM_EN 0x0000040000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TDM_SHIFT 41U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TDM_CLRMSK 0xFFFFFDFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TDM_EN 0x0000020000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_CDM_SHIFT 40U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_CDM_CLRMSK 0xFFFFFEFFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_CDM_EN 0x0000010000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_PDS_STATE_SHIFT 39U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_PDS_STATE_CLRMSK 0xFFFFFF7FFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_PDS_STATE_EN 0x0000008000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_DB_SHIFT 38U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_DB_CLRMSK 0xFFFFFFBFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_DB_EN 0x0000004000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_VTX_VAR_SHIFT 37U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_VTX_VAR_CLRMSK 0xFFFFFFDFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TSPF_VTX_VAR_EN 0x0000002000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_VDM_SHIFT 36U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_VDM_CLRMSK 0xFFFFFFEFFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_VDM_EN 0x0000001000000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_STREAM_SHIFT 35U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_STREAM_CLRMSK 0xFFFFFFF7FFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_STREAM_EN 0x0000000800000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_REGION_SHIFT 34U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_REGION_CLRMSK 0xFFFFFFFBFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PSG_REGION_EN 0x0000000400000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_VCE_SHIFT 33U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_VCE_CLRMSK 0xFFFFFFFDFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_VCE_EN 0x0000000200000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PPP_SHIFT 32U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PPP_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_PPP_EN 0x0000000100000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FASTRENDER_SHIFT 31U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FASTRENDER_CLRMSK 0xFFFFFFFF7FFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FASTRENDER_EN 0x0000000080000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PM_ALIST_SHIFT 30U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PM_ALIST_CLRMSK 0xFFFFFFFFBFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PM_ALIST_EN 0x0000000040000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_TE_SHIFT 29U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_TE_CLRMSK 0xFFFFFFFFDFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_TE_EN 0x0000000020000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_VCE_SHIFT 28U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_VCE_CLRMSK 0xFFFFFFFFEFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PB_VCE_EN 0x0000000010000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_VERTEX_SHIFT 27U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_VERTEX_CLRMSK 0xFFFFFFFFF7FFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_VERTEX_EN 0x0000000008000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_SHIFT 26U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_CLRMSK 0xFFFFFFFFFBFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_RAY_EN 0x0000000004000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CPF_SHIFT 25U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CPF_CLRMSK 0xFFFFFFFFFDFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_CPF_EN 0x0000000002000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPU_SHIFT 24U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPU_CLRMSK 0xFFFFFFFFFEFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPU_EN 0x0000000001000000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBDC_SHIFT 23U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBDC_CLRMSK 0xFFFFFFFFFF7FFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_FBDC_EN 0x0000000000800000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TLA_SHIFT 22U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TLA_CLRMSK 0xFFFFFFFFFFBFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TLA_EN 0x0000000000400000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_N_SHIFT 21U
+#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_N_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_N_EN 0x0000000000200000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_SHIFT 20U
+#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_CLRMSK 0xFFFFFFFFFFEFFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_BYP_CC_EN 0x0000000000100000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MCU_SHIFT 19U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MCU_CLRMSK 0xFFFFFFFFFFF7FFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MCU_EN 0x0000000000080000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PDS_SHIFT 18U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PDS_CLRMSK 0xFFFFFFFFFFFBFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_PDS_EN 0x0000000000040000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPF_SHIFT 17U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPF_CLRMSK 0xFFFFFFFFFFFDFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TPF_EN 0x0000000000020000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_TPC_SHIFT 16U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_TPC_CLRMSK 0xFFFFFFFFFFFEFFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_TA_TPC_EN 0x0000000000010000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_OBJ_SHIFT 15U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_OBJ_CLRMSK 0xFFFFFFFFFFFF7FFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_IPF_OBJ_EN 0x0000000000008000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_USC_SHIFT 14U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_USC_CLRMSK 0xFFFFFFFFFFFFBFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_USC_EN 0x0000000000004000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_META_SHIFT 13U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_META_CLRMSK 0xFFFFFFFFFFFFDFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_META_EN 0x0000000000002000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_HOST_SHIFT 12U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_HOST_CLRMSK 0xFFFFFFFFFFFFEFFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_HOST_EN 0x0000000000001000ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PT_SHIFT 11U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PT_CLRMSK 0xFFFFFFFFFFFFF7FFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PT_EN 0x0000000000000800ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PD_SHIFT 10U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PD_CLRMSK 0xFFFFFFFFFFFFFBFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PD_EN 0x0000000000000400ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PC_SHIFT 9U
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PC_CLRMSK 0xFFFFFFFFFFFFFDFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_REQ_MMU_PC_EN 0x0000000000000200ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FRC_SHIFT 8U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FRC_CLRMSK 0xFFFFFFFFFFFFFEFFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_FRC_EN 0x0000000000000100ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXE_SHIFT 7U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXE_CLRMSK 0xFFFFFFFFFFFFFF7FULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXE_EN 0x0000000000000080ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXD_SHIFT 6U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXD_CLRMSK 0xFFFFFFFFFFFFFFBFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_VXD_EN 0x0000000000000040ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_HOST_META_SHIFT 5U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_HOST_META_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_HOST_META_EN 0x0000000000000020ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_MMU_SHIFT 4U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_MMU_CLRMSK 0xFFFFFFFFFFFFFFEFULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_MMU_EN 0x0000000000000010ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_COMPUTE_SHIFT 3U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_COMPUTE_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_COMPUTE_EN 0x0000000000000008ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PIXEL_SHIFT 2U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PIXEL_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_PIXEL_EN 0x0000000000000004ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_TA_SHIFT 1U
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_TA_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_DM_TA_EN 0x0000000000000002ULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_ALL_SHIFT 0U
+#define ROGUE_CR_SLC_CTRL_BYPASS_ALL_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_SLC_CTRL_BYPASS_ALL_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_SLC_STATUS1 */
+#define ROGUE_CR_SLC_STATUS1 0x3870U
+#define ROGUE_CR_SLC_STATUS1_MASKFULL 0x800003FF03FFFFFFULL
+#define ROGUE_CR_SLC_STATUS1_PAUSED_SHIFT 63U
+#define ROGUE_CR_SLC_STATUS1_PAUSED_CLRMSK 0x7FFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC_STATUS1_PAUSED_EN 0x8000000000000000ULL
+#define ROGUE_CR_SLC_STATUS1_READS1_SHIFT 32U
+#define ROGUE_CR_SLC_STATUS1_READS1_CLRMSK 0xFFFFFC00FFFFFFFFULL
+#define ROGUE_CR_SLC_STATUS1_READS0_SHIFT 16U
+#define ROGUE_CR_SLC_STATUS1_READS0_CLRMSK 0xFFFFFFFFFC00FFFFULL
+#define ROGUE_CR_SLC_STATUS1_READS1_EXT_SHIFT 8U
+#define ROGUE_CR_SLC_STATUS1_READS1_EXT_CLRMSK 0xFFFFFFFFFFFF00FFULL
+#define ROGUE_CR_SLC_STATUS1_READS0_EXT_SHIFT 0U
+#define ROGUE_CR_SLC_STATUS1_READS0_EXT_CLRMSK 0xFFFFFFFFFFFFFF00ULL
+
+/* Register ROGUE_CR_SLC_IDLE */
+#define ROGUE_CR_SLC_IDLE 0x3898U
+#define ROGUE_CR_SLC_IDLE__XE_MEM__MASKFULL 0x00000000000003FFULL
+#define ROGUE_CR_SLC_IDLE_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_SLC_IDLE_MH_SYSARB1_SHIFT 9U
+#define ROGUE_CR_SLC_IDLE_MH_SYSARB1_CLRMSK 0xFFFFFDFFU
+#define ROGUE_CR_SLC_IDLE_MH_SYSARB1_EN 0x00000200U
+#define ROGUE_CR_SLC_IDLE_MH_SYSARB0_SHIFT 8U
+#define ROGUE_CR_SLC_IDLE_MH_SYSARB0_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_SLC_IDLE_MH_SYSARB0_EN 0x00000100U
+#define ROGUE_CR_SLC_IDLE_IMGBV4_SHIFT 7U
+#define ROGUE_CR_SLC_IDLE_IMGBV4_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_SLC_IDLE_IMGBV4_EN 0x00000080U
+#define ROGUE_CR_SLC_IDLE_CACHE_BANKS_SHIFT 6U
+#define ROGUE_CR_SLC_IDLE_CACHE_BANKS_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SLC_IDLE_CACHE_BANKS_EN 0x00000040U
+#define ROGUE_CR_SLC_IDLE_RBOFIFO_SHIFT 5U
+#define ROGUE_CR_SLC_IDLE_RBOFIFO_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SLC_IDLE_RBOFIFO_EN 0x00000020U
+#define ROGUE_CR_SLC_IDLE_FRC_CONV_SHIFT 4U
+#define ROGUE_CR_SLC_IDLE_FRC_CONV_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SLC_IDLE_FRC_CONV_EN 0x00000010U
+#define ROGUE_CR_SLC_IDLE_VXE_CONV_SHIFT 3U
+#define ROGUE_CR_SLC_IDLE_VXE_CONV_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SLC_IDLE_VXE_CONV_EN 0x00000008U
+#define ROGUE_CR_SLC_IDLE_VXD_CONV_SHIFT 2U
+#define ROGUE_CR_SLC_IDLE_VXD_CONV_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SLC_IDLE_VXD_CONV_EN 0x00000004U
+#define ROGUE_CR_SLC_IDLE_BIF1_CONV_SHIFT 1U
+#define ROGUE_CR_SLC_IDLE_BIF1_CONV_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SLC_IDLE_BIF1_CONV_EN 0x00000002U
+#define ROGUE_CR_SLC_IDLE_CBAR_SHIFT 0U
+#define ROGUE_CR_SLC_IDLE_CBAR_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SLC_IDLE_CBAR_EN 0x00000001U
+
+/* Register ROGUE_CR_SLC_STATUS2 */
+#define ROGUE_CR_SLC_STATUS2 0x3908U
+#define ROGUE_CR_SLC_STATUS2_MASKFULL 0x000003FF03FFFFFFULL
+#define ROGUE_CR_SLC_STATUS2_READS3_SHIFT 32U
+#define ROGUE_CR_SLC_STATUS2_READS3_CLRMSK 0xFFFFFC00FFFFFFFFULL
+#define ROGUE_CR_SLC_STATUS2_READS2_SHIFT 16U
+#define ROGUE_CR_SLC_STATUS2_READS2_CLRMSK 0xFFFFFFFFFC00FFFFULL
+#define ROGUE_CR_SLC_STATUS2_READS3_EXT_SHIFT 8U
+#define ROGUE_CR_SLC_STATUS2_READS3_EXT_CLRMSK 0xFFFFFFFFFFFF00FFULL
+#define ROGUE_CR_SLC_STATUS2_READS2_EXT_SHIFT 0U
+#define ROGUE_CR_SLC_STATUS2_READS2_EXT_CLRMSK 0xFFFFFFFFFFFFFF00ULL
+
+/* Register ROGUE_CR_SLC_CTRL_MISC2 */
+#define ROGUE_CR_SLC_CTRL_MISC2 0x3930U
+#define ROGUE_CR_SLC_CTRL_MISC2_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SLC_CTRL_MISC2_SCRAMBLE_BITS_SHIFT 0U
+#define ROGUE_CR_SLC_CTRL_MISC2_SCRAMBLE_BITS_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE */
+#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE 0x3938U
+#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_BYPASS_SHIFT 0U
+#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_BYPASS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SLC_CROSSBAR_LOAD_BALANCE_BYPASS_EN 0x00000001U
+
+/* Register ROGUE_CR_USC_UVS0_CHECKSUM */
+#define ROGUE_CR_USC_UVS0_CHECKSUM 0x5000U
+#define ROGUE_CR_USC_UVS0_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVS0_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVS0_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_UVS1_CHECKSUM */
+#define ROGUE_CR_USC_UVS1_CHECKSUM 0x5008U
+#define ROGUE_CR_USC_UVS1_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVS1_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVS1_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_UVS2_CHECKSUM */
+#define ROGUE_CR_USC_UVS2_CHECKSUM 0x5010U
+#define ROGUE_CR_USC_UVS2_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVS2_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVS2_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_UVS3_CHECKSUM */
+#define ROGUE_CR_USC_UVS3_CHECKSUM 0x5018U
+#define ROGUE_CR_USC_UVS3_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVS3_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVS3_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PPP_SIGNATURE */
+#define ROGUE_CR_PPP_SIGNATURE 0x5020U
+#define ROGUE_CR_PPP_SIGNATURE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PPP_SIGNATURE_VALUE_SHIFT 0U
+#define ROGUE_CR_PPP_SIGNATURE_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TE_SIGNATURE */
+#define ROGUE_CR_TE_SIGNATURE 0x5028U
+#define ROGUE_CR_TE_SIGNATURE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TE_SIGNATURE_VALUE_SHIFT 0U
+#define ROGUE_CR_TE_SIGNATURE_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TE_CHECKSUM */
+#define ROGUE_CR_TE_CHECKSUM 0x5110U
+#define ROGUE_CR_TE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TE_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_TE_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_UVB_CHECKSUM */
+#define ROGUE_CR_USC_UVB_CHECKSUM 0x5118U
+#define ROGUE_CR_USC_UVB_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVB_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVB_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_VCE_CHECKSUM */
+#define ROGUE_CR_VCE_CHECKSUM 0x5030U
+#define ROGUE_CR_VCE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_VCE_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_VCE_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_ISP_PDS_CHECKSUM */
+#define ROGUE_CR_ISP_PDS_CHECKSUM 0x5038U
+#define ROGUE_CR_ISP_PDS_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_ISP_PDS_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_ISP_PDS_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_ISP_TPF_CHECKSUM */
+#define ROGUE_CR_ISP_TPF_CHECKSUM 0x5040U
+#define ROGUE_CR_ISP_TPF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_ISP_TPF_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_ISP_TPF_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TFPU_PLANE0_CHECKSUM */
+#define ROGUE_CR_TFPU_PLANE0_CHECKSUM 0x5048U
+#define ROGUE_CR_TFPU_PLANE0_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TFPU_PLANE0_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_TFPU_PLANE0_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TFPU_PLANE1_CHECKSUM */
+#define ROGUE_CR_TFPU_PLANE1_CHECKSUM 0x5050U
+#define ROGUE_CR_TFPU_PLANE1_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TFPU_PLANE1_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_TFPU_PLANE1_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PBE_CHECKSUM */
+#define ROGUE_CR_PBE_CHECKSUM 0x5058U
+#define ROGUE_CR_PBE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PBE_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_PBE_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PDS_DOUTM_STM_SIGNATURE */
+#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE 0x5060U
+#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE_VALUE_SHIFT 0U
+#define ROGUE_CR_PDS_DOUTM_STM_SIGNATURE_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_IFPU_ISP_CHECKSUM */
+#define ROGUE_CR_IFPU_ISP_CHECKSUM 0x5068U
+#define ROGUE_CR_IFPU_ISP_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_IFPU_ISP_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_IFPU_ISP_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_UVS4_CHECKSUM */
+#define ROGUE_CR_USC_UVS4_CHECKSUM 0x5100U
+#define ROGUE_CR_USC_UVS4_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVS4_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVS4_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_UVS5_CHECKSUM */
+#define ROGUE_CR_USC_UVS5_CHECKSUM 0x5108U
+#define ROGUE_CR_USC_UVS5_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_UVS5_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_USC_UVS5_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PPP_CLIP_CHECKSUM */
+#define ROGUE_CR_PPP_CLIP_CHECKSUM 0x5120U
+#define ROGUE_CR_PPP_CLIP_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PPP_CLIP_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_PPP_CLIP_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_TA_PHASE */
+#define ROGUE_CR_PERF_TA_PHASE 0x6008U
+#define ROGUE_CR_PERF_TA_PHASE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_TA_PHASE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_TA_PHASE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_3D_PHASE */
+#define ROGUE_CR_PERF_3D_PHASE 0x6010U
+#define ROGUE_CR_PERF_3D_PHASE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_3D_PHASE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_3D_PHASE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_COMPUTE_PHASE */
+#define ROGUE_CR_PERF_COMPUTE_PHASE 0x6018U
+#define ROGUE_CR_PERF_COMPUTE_PHASE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_COMPUTE_PHASE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_COMPUTE_PHASE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_TA_CYCLE */
+#define ROGUE_CR_PERF_TA_CYCLE 0x6020U
+#define ROGUE_CR_PERF_TA_CYCLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_TA_CYCLE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_TA_CYCLE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_3D_CYCLE */
+#define ROGUE_CR_PERF_3D_CYCLE 0x6028U
+#define ROGUE_CR_PERF_3D_CYCLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_3D_CYCLE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_3D_CYCLE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_COMPUTE_CYCLE */
+#define ROGUE_CR_PERF_COMPUTE_CYCLE 0x6030U
+#define ROGUE_CR_PERF_COMPUTE_CYCLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_COMPUTE_CYCLE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_COMPUTE_CYCLE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_TA_OR_3D_CYCLE */
+#define ROGUE_CR_PERF_TA_OR_3D_CYCLE 0x6038U
+#define ROGUE_CR_PERF_TA_OR_3D_CYCLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_TA_OR_3D_CYCLE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_TA_OR_3D_CYCLE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_INITIAL_TA_CYCLE */
+#define ROGUE_CR_PERF_INITIAL_TA_CYCLE 0x6040U
+#define ROGUE_CR_PERF_INITIAL_TA_CYCLE_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_INITIAL_TA_CYCLE_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_INITIAL_TA_CYCLE_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC0_READ_STALL */
+#define ROGUE_CR_PERF_SLC0_READ_STALL 0x60B8U
+#define ROGUE_CR_PERF_SLC0_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC0_READ_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC0_READ_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC0_WRITE_STALL */
+#define ROGUE_CR_PERF_SLC0_WRITE_STALL 0x60C0U
+#define ROGUE_CR_PERF_SLC0_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC0_WRITE_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC0_WRITE_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC1_READ_STALL */
+#define ROGUE_CR_PERF_SLC1_READ_STALL 0x60E0U
+#define ROGUE_CR_PERF_SLC1_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC1_READ_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC1_READ_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC1_WRITE_STALL */
+#define ROGUE_CR_PERF_SLC1_WRITE_STALL 0x60E8U
+#define ROGUE_CR_PERF_SLC1_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC1_WRITE_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC1_WRITE_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC2_READ_STALL */
+#define ROGUE_CR_PERF_SLC2_READ_STALL 0x6158U
+#define ROGUE_CR_PERF_SLC2_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC2_READ_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC2_READ_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC2_WRITE_STALL */
+#define ROGUE_CR_PERF_SLC2_WRITE_STALL 0x6160U
+#define ROGUE_CR_PERF_SLC2_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC2_WRITE_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC2_WRITE_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC3_READ_STALL */
+#define ROGUE_CR_PERF_SLC3_READ_STALL 0x6180U
+#define ROGUE_CR_PERF_SLC3_READ_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC3_READ_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC3_READ_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_SLC3_WRITE_STALL */
+#define ROGUE_CR_PERF_SLC3_WRITE_STALL 0x6188U
+#define ROGUE_CR_PERF_SLC3_WRITE_STALL_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_SLC3_WRITE_STALL_COUNT_SHIFT 0U
+#define ROGUE_CR_PERF_SLC3_WRITE_STALL_COUNT_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PERF_3D_SPINUP */
+#define ROGUE_CR_PERF_3D_SPINUP 0x6220U
+#define ROGUE_CR_PERF_3D_SPINUP_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PERF_3D_SPINUP_CYCLES_SHIFT 0U
+#define ROGUE_CR_PERF_3D_SPINUP_CYCLES_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_AXI_ACE_LITE_CONFIGURATION */
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION 0x38C0U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_MASKFULL 0x00003FFFFFFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ENABLE_FENCE_OUT_SHIFT 45U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ENABLE_FENCE_OUT_CLRMSK 0xFFFFDFFFFFFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ENABLE_FENCE_OUT_EN 0x0000200000000000ULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_OSID_SECURITY_SHIFT 37U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_OSID_SECURITY_CLRMSK 0xFFFFE01FFFFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITELINEUNIQUE_SHIFT 36U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITELINEUNIQUE_CLRMSK \
+	0xFFFFFFEFFFFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITELINEUNIQUE_EN \
+	0x0000001000000000ULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITE_SHIFT 35U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITE_CLRMSK 0xFFFFFFF7FFFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_WRITE_EN 0x0000000800000000ULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_READ_SHIFT 34U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_READ_CLRMSK 0xFFFFFFFBFFFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_DISABLE_COHERENT_READ_EN 0x0000000400000000ULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_CACHE_MAINTENANCE_SHIFT 30U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_CACHE_MAINTENANCE_CLRMSK 0xFFFFFFFC3FFFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_COHERENT_SHIFT 26U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_COHERENT_CLRMSK 0xFFFFFFFFC3FFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_COHERENT_SHIFT 22U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_COHERENT_CLRMSK 0xFFFFFFFFFC3FFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_BARRIER_SHIFT 20U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_BARRIER_CLRMSK 0xFFFFFFFFFFCFFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_BARRIER_SHIFT 18U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_BARRIER_CLRMSK 0xFFFFFFFFFFF3FFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_CACHE_MAINTENANCE_SHIFT 16U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_CACHE_MAINTENANCE_CLRMSK 0xFFFFFFFFFFFCFFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_COHERENT_SHIFT 14U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_COHERENT_CLRMSK 0xFFFFFFFFFFFF3FFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_COHERENT_SHIFT 12U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_COHERENT_CLRMSK 0xFFFFFFFFFFFFCFFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_NON_SNOOPING_SHIFT 10U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFF3FFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_NON_SNOOPING_SHIFT 8U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFFCFFULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_NON_SNOOPING_SHIFT 4U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFFF0FULL
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_NON_SNOOPING_SHIFT 0U
+#define ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_NON_SNOOPING_CLRMSK 0xFFFFFFFFFFFFFFF0ULL
+
+/* Register ROGUE_CR_POWER_ESTIMATE_RESULT */
+#define ROGUE_CR_POWER_ESTIMATE_RESULT 0x6328U
+#define ROGUE_CR_POWER_ESTIMATE_RESULT_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_POWER_ESTIMATE_RESULT_VALUE_SHIFT 0U
+#define ROGUE_CR_POWER_ESTIMATE_RESULT_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TA_PERF */
+#define ROGUE_CR_TA_PERF 0x7600U
+#define ROGUE_CR_TA_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_TA_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_TA_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_TA_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_TA_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_TA_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_TA_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_TA_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_TA_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_TA_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_TA_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_TA_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_TA_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_TA_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_TA_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_TA_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_TA_PERF_SELECT0 */
+#define ROGUE_CR_TA_PERF_SELECT0 0x7608U
+#define ROGUE_CR_TA_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TA_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_TA_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_TA_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TA_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_TA_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TA_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TA_PERF_SELECT1 */
+#define ROGUE_CR_TA_PERF_SELECT1 0x7610U
+#define ROGUE_CR_TA_PERF_SELECT1_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TA_PERF_SELECT1_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT1_MODE_SHIFT 21U
+#define ROGUE_CR_TA_PERF_SELECT1_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT1_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_TA_PERF_SELECT1_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TA_PERF_SELECT1_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_TA_PERF_SELECT1_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TA_PERF_SELECT1_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TA_PERF_SELECT2 */
+#define ROGUE_CR_TA_PERF_SELECT2 0x7618U
+#define ROGUE_CR_TA_PERF_SELECT2_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TA_PERF_SELECT2_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT2_MODE_SHIFT 21U
+#define ROGUE_CR_TA_PERF_SELECT2_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT2_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_TA_PERF_SELECT2_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TA_PERF_SELECT2_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_TA_PERF_SELECT2_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TA_PERF_SELECT2_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TA_PERF_SELECT3 */
+#define ROGUE_CR_TA_PERF_SELECT3 0x7620U
+#define ROGUE_CR_TA_PERF_SELECT3_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TA_PERF_SELECT3_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT3_MODE_SHIFT 21U
+#define ROGUE_CR_TA_PERF_SELECT3_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECT3_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_TA_PERF_SELECT3_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TA_PERF_SELECT3_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_TA_PERF_SELECT3_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TA_PERF_SELECT3_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TA_PERF_SELECTED_BITS */
+#define ROGUE_CR_TA_PERF_SELECTED_BITS 0x7648U
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG3_SHIFT 48U
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG3_CLRMSK 0x0000FFFFFFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG2_SHIFT 32U
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG2_CLRMSK 0xFFFF0000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG1_SHIFT 16U
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG1_CLRMSK 0xFFFFFFFF0000FFFFULL
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG0_SHIFT 0U
+#define ROGUE_CR_TA_PERF_SELECTED_BITS_REG0_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TA_PERF_COUNTER_0 */
+#define ROGUE_CR_TA_PERF_COUNTER_0 0x7650U
+#define ROGUE_CR_TA_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_TA_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TA_PERF_COUNTER_1 */
+#define ROGUE_CR_TA_PERF_COUNTER_1 0x7658U
+#define ROGUE_CR_TA_PERF_COUNTER_1_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_COUNTER_1_REG_SHIFT 0U
+#define ROGUE_CR_TA_PERF_COUNTER_1_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TA_PERF_COUNTER_2 */
+#define ROGUE_CR_TA_PERF_COUNTER_2 0x7660U
+#define ROGUE_CR_TA_PERF_COUNTER_2_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_COUNTER_2_REG_SHIFT 0U
+#define ROGUE_CR_TA_PERF_COUNTER_2_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TA_PERF_COUNTER_3 */
+#define ROGUE_CR_TA_PERF_COUNTER_3 0x7668U
+#define ROGUE_CR_TA_PERF_COUNTER_3_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TA_PERF_COUNTER_3_REG_SHIFT 0U
+#define ROGUE_CR_TA_PERF_COUNTER_3_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_RASTERISATION_PERF */
+#define ROGUE_CR_RASTERISATION_PERF 0x7700U
+#define ROGUE_CR_RASTERISATION_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_RASTERISATION_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_RASTERISATION_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_RASTERISATION_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_RASTERISATION_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_RASTERISATION_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_RASTERISATION_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_RASTERISATION_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_RASTERISATION_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_RASTERISATION_PERF_SELECT0 */
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0 0x7708U
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_RASTERISATION_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_RASTERISATION_PERF_COUNTER_0 */
+#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0 0x7750U
+#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_RASTERISATION_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_HUB_BIFPMCACHE_PERF */
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF 0x7800U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0 */
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0 0x7808U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0 */
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0 0x7850U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_HUB_BIFPMCACHE_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TPU_MCU_L0_PERF */
+#define ROGUE_CR_TPU_MCU_L0_PERF 0x7900U
+#define ROGUE_CR_TPU_MCU_L0_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_TPU_MCU_L0_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_TPU_MCU_L0_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_TPU_MCU_L0_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_TPU_MCU_L0_PERF_SELECT0 */
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0 0x7908U
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TPU_MCU_L0_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0 */
+#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0 0x7950U
+#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_TPU_MCU_L0_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_USC_PERF */
+#define ROGUE_CR_USC_PERF 0x8100U
+#define ROGUE_CR_USC_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_USC_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_USC_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_USC_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_USC_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_USC_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_USC_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_USC_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_USC_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_USC_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_USC_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_USC_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_USC_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_USC_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_USC_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_USC_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_USC_PERF_SELECT0 */
+#define ROGUE_CR_USC_PERF_SELECT0 0x8108U
+#define ROGUE_CR_USC_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_USC_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_USC_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_USC_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_USC_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_USC_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_USC_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_USC_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_USC_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_USC_PERF_COUNTER_0 */
+#define ROGUE_CR_USC_PERF_COUNTER_0 0x8150U
+#define ROGUE_CR_USC_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_USC_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_USC_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_JONES_IDLE */
+#define ROGUE_CR_JONES_IDLE 0x8328U
+#define ROGUE_CR_JONES_IDLE_MASKFULL 0x0000000000007FFFULL
+#define ROGUE_CR_JONES_IDLE_TDM_SHIFT 14U
+#define ROGUE_CR_JONES_IDLE_TDM_CLRMSK 0xFFFFBFFFU
+#define ROGUE_CR_JONES_IDLE_TDM_EN 0x00004000U
+#define ROGUE_CR_JONES_IDLE_FB_CDC_TLA_SHIFT 13U
+#define ROGUE_CR_JONES_IDLE_FB_CDC_TLA_CLRMSK 0xFFFFDFFFU
+#define ROGUE_CR_JONES_IDLE_FB_CDC_TLA_EN 0x00002000U
+#define ROGUE_CR_JONES_IDLE_FB_CDC_SHIFT 12U
+#define ROGUE_CR_JONES_IDLE_FB_CDC_CLRMSK 0xFFFFEFFFU
+#define ROGUE_CR_JONES_IDLE_FB_CDC_EN 0x00001000U
+#define ROGUE_CR_JONES_IDLE_MMU_SHIFT 11U
+#define ROGUE_CR_JONES_IDLE_MMU_CLRMSK 0xFFFFF7FFU
+#define ROGUE_CR_JONES_IDLE_MMU_EN 0x00000800U
+#define ROGUE_CR_JONES_IDLE_TLA_SHIFT 10U
+#define ROGUE_CR_JONES_IDLE_TLA_CLRMSK 0xFFFFFBFFU
+#define ROGUE_CR_JONES_IDLE_TLA_EN 0x00000400U
+#define ROGUE_CR_JONES_IDLE_GARTEN_SHIFT 9U
+#define ROGUE_CR_JONES_IDLE_GARTEN_CLRMSK 0xFFFFFDFFU
+#define ROGUE_CR_JONES_IDLE_GARTEN_EN 0x00000200U
+#define ROGUE_CR_JONES_IDLE_HOSTIF_SHIFT 8U
+#define ROGUE_CR_JONES_IDLE_HOSTIF_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_JONES_IDLE_HOSTIF_EN 0x00000100U
+#define ROGUE_CR_JONES_IDLE_SOCIF_SHIFT 7U
+#define ROGUE_CR_JONES_IDLE_SOCIF_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_JONES_IDLE_SOCIF_EN 0x00000080U
+#define ROGUE_CR_JONES_IDLE_TILING_SHIFT 6U
+#define ROGUE_CR_JONES_IDLE_TILING_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_JONES_IDLE_TILING_EN 0x00000040U
+#define ROGUE_CR_JONES_IDLE_IPP_SHIFT 5U
+#define ROGUE_CR_JONES_IDLE_IPP_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_JONES_IDLE_IPP_EN 0x00000020U
+#define ROGUE_CR_JONES_IDLE_USCS_SHIFT 4U
+#define ROGUE_CR_JONES_IDLE_USCS_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_JONES_IDLE_USCS_EN 0x00000010U
+#define ROGUE_CR_JONES_IDLE_PM_SHIFT 3U
+#define ROGUE_CR_JONES_IDLE_PM_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_JONES_IDLE_PM_EN 0x00000008U
+#define ROGUE_CR_JONES_IDLE_CDM_SHIFT 2U
+#define ROGUE_CR_JONES_IDLE_CDM_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_JONES_IDLE_CDM_EN 0x00000004U
+#define ROGUE_CR_JONES_IDLE_VDM_SHIFT 1U
+#define ROGUE_CR_JONES_IDLE_VDM_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_JONES_IDLE_VDM_EN 0x00000002U
+#define ROGUE_CR_JONES_IDLE_BIF_SHIFT 0U
+#define ROGUE_CR_JONES_IDLE_BIF_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_JONES_IDLE_BIF_EN 0x00000001U
+
+/* Register ROGUE_CR_TORNADO_PERF */
+#define ROGUE_CR_TORNADO_PERF 0x8228U
+#define ROGUE_CR_TORNADO_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_TORNADO_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_TORNADO_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_TORNADO_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_TORNADO_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_TORNADO_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_TORNADO_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_TORNADO_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_TORNADO_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_TORNADO_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_TORNADO_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_TORNADO_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_TORNADO_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_TORNADO_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_TORNADO_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_TORNADO_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_TORNADO_PERF_SELECT0 */
+#define ROGUE_CR_TORNADO_PERF_SELECT0 0x8230U
+#define ROGUE_CR_TORNADO_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TORNADO_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TORNADO_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_TORNADO_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_TORNADO_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_TORNADO_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TORNADO_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_TORNADO_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TORNADO_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TORNADO_PERF_COUNTER_0 */
+#define ROGUE_CR_TORNADO_PERF_COUNTER_0 0x8268U
+#define ROGUE_CR_TORNADO_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TORNADO_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_TORNADO_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_TEXAS_PERF */
+#define ROGUE_CR_TEXAS_PERF 0x8290U
+#define ROGUE_CR_TEXAS_PERF_MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_TEXAS_PERF_CLR_5_SHIFT 6U
+#define ROGUE_CR_TEXAS_PERF_CLR_5_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_TEXAS_PERF_CLR_5_EN 0x00000040U
+#define ROGUE_CR_TEXAS_PERF_CLR_4_SHIFT 5U
+#define ROGUE_CR_TEXAS_PERF_CLR_4_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_TEXAS_PERF_CLR_4_EN 0x00000020U
+#define ROGUE_CR_TEXAS_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_TEXAS_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_TEXAS_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_TEXAS_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_TEXAS_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_TEXAS_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_TEXAS_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_TEXAS_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_TEXAS_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_TEXAS_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_TEXAS_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_TEXAS_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_TEXAS_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_TEXAS_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_TEXAS_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_TEXAS_PERF_SELECT0 */
+#define ROGUE_CR_TEXAS_PERF_SELECT0 0x8298U
+#define ROGUE_CR_TEXAS_PERF_SELECT0_MASKFULL 0x3FFF3FFF803FFFFFULL
+#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_TEXAS_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_TEXAS_PERF_SELECT0_MODE_SHIFT 31U
+#define ROGUE_CR_TEXAS_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFF7FFFFFFFULL
+#define ROGUE_CR_TEXAS_PERF_SELECT0_MODE_EN 0x0000000080000000ULL
+#define ROGUE_CR_TEXAS_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_TEXAS_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFC0FFFFULL
+#define ROGUE_CR_TEXAS_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_TEXAS_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_TEXAS_PERF_COUNTER_0 */
+#define ROGUE_CR_TEXAS_PERF_COUNTER_0 0x82D8U
+#define ROGUE_CR_TEXAS_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_TEXAS_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_TEXAS_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_JONES_PERF */
+#define ROGUE_CR_JONES_PERF 0x8330U
+#define ROGUE_CR_JONES_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_JONES_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_JONES_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_JONES_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_JONES_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_JONES_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_JONES_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_JONES_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_JONES_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_JONES_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_JONES_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_JONES_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_JONES_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_JONES_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_JONES_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_JONES_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_JONES_PERF_SELECT0 */
+#define ROGUE_CR_JONES_PERF_SELECT0 0x8338U
+#define ROGUE_CR_JONES_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_JONES_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_JONES_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_JONES_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_JONES_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_JONES_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_JONES_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_JONES_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_JONES_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_JONES_PERF_COUNTER_0 */
+#define ROGUE_CR_JONES_PERF_COUNTER_0 0x8368U
+#define ROGUE_CR_JONES_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_JONES_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_JONES_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_BLACKPEARL_PERF */
+#define ROGUE_CR_BLACKPEARL_PERF 0x8400U
+#define ROGUE_CR_BLACKPEARL_PERF_MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_5_SHIFT 6U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_5_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_5_EN 0x00000040U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_4_SHIFT 5U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_4_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_4_EN 0x00000020U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BLACKPEARL_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_BLACKPEARL_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_BLACKPEARL_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BLACKPEARL_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_BLACKPEARL_PERF_SELECT0 */
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0 0x8408U
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MASKFULL 0x3FFF3FFF803FFFFFULL
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MODE_SHIFT 31U
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFF7FFFFFFFULL
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_MODE_EN 0x0000000080000000ULL
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFC0FFFFULL
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_BLACKPEARL_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_BLACKPEARL_PERF_COUNTER_0 */
+#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0 0x8448U
+#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_BLACKPEARL_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_PBE_PERF */
+#define ROGUE_CR_PBE_PERF 0x8478U
+#define ROGUE_CR_PBE_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_PBE_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_PBE_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_PBE_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_PBE_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_PBE_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_PBE_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_PBE_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_PBE_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_PBE_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_PBE_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_PBE_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_PBE_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_PBE_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_PBE_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_PBE_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_PBE_PERF_SELECT0 */
+#define ROGUE_CR_PBE_PERF_SELECT0 0x8480U
+#define ROGUE_CR_PBE_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_PBE_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_PBE_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_PBE_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_PBE_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_PBE_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_PBE_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_PBE_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_PBE_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_PBE_PERF_COUNTER_0 */
+#define ROGUE_CR_PBE_PERF_COUNTER_0 0x84B0U
+#define ROGUE_CR_PBE_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_PBE_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_PBE_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_OCP_REVINFO */
+#define ROGUE_CR_OCP_REVINFO 0x9000U
+#define ROGUE_CR_OCP_REVINFO_MASKFULL 0x00000007FFFFFFFFULL
+#define ROGUE_CR_OCP_REVINFO_HWINFO_SYSBUS_SHIFT 33U
+#define ROGUE_CR_OCP_REVINFO_HWINFO_SYSBUS_CLRMSK 0xFFFFFFF9FFFFFFFFULL
+#define ROGUE_CR_OCP_REVINFO_HWINFO_MEMBUS_SHIFT 32U
+#define ROGUE_CR_OCP_REVINFO_HWINFO_MEMBUS_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_OCP_REVINFO_HWINFO_MEMBUS_EN 0x0000000100000000ULL
+#define ROGUE_CR_OCP_REVINFO_REVISION_SHIFT 0U
+#define ROGUE_CR_OCP_REVINFO_REVISION_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_OCP_SYSCONFIG */
+#define ROGUE_CR_OCP_SYSCONFIG 0x9010U
+#define ROGUE_CR_OCP_SYSCONFIG_MASKFULL 0x0000000000000FFFULL
+#define ROGUE_CR_OCP_SYSCONFIG_DUST2_STANDBY_MODE_SHIFT 10U
+#define ROGUE_CR_OCP_SYSCONFIG_DUST2_STANDBY_MODE_CLRMSK 0xFFFFF3FFU
+#define ROGUE_CR_OCP_SYSCONFIG_DUST1_STANDBY_MODE_SHIFT 8U
+#define ROGUE_CR_OCP_SYSCONFIG_DUST1_STANDBY_MODE_CLRMSK 0xFFFFFCFFU
+#define ROGUE_CR_OCP_SYSCONFIG_DUST0_STANDBY_MODE_SHIFT 6U
+#define ROGUE_CR_OCP_SYSCONFIG_DUST0_STANDBY_MODE_CLRMSK 0xFFFFFF3FU
+#define ROGUE_CR_OCP_SYSCONFIG_RASCAL_STANDBYMODE_SHIFT 4U
+#define ROGUE_CR_OCP_SYSCONFIG_RASCAL_STANDBYMODE_CLRMSK 0xFFFFFFCFU
+#define ROGUE_CR_OCP_SYSCONFIG_STANDBY_MODE_SHIFT 2U
+#define ROGUE_CR_OCP_SYSCONFIG_STANDBY_MODE_CLRMSK 0xFFFFFFF3U
+#define ROGUE_CR_OCP_SYSCONFIG_IDLE_MODE_SHIFT 0U
+#define ROGUE_CR_OCP_SYSCONFIG_IDLE_MODE_CLRMSK 0xFFFFFFFCU
+
+/* Register ROGUE_CR_OCP_IRQSTATUS_RAW_0 */
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_0 0x9020U
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_INIT_MINTERRUPT_RAW_SHIFT 0U
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_INIT_MINTERRUPT_RAW_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_0_INIT_MINTERRUPT_RAW_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQSTATUS_RAW_1 */
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_1 0x9028U
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_TARGET_SINTERRUPT_RAW_SHIFT 0U
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_TARGET_SINTERRUPT_RAW_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_1_TARGET_SINTERRUPT_RAW_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQSTATUS_RAW_2 */
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_2 0x9030U
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_RGX_IRQ_RAW_SHIFT 0U
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_RGX_IRQ_RAW_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQSTATUS_RAW_2_RGX_IRQ_RAW_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQSTATUS_0 */
+#define ROGUE_CR_OCP_IRQSTATUS_0 0x9038U
+#define ROGUE_CR_OCP_IRQSTATUS_0_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQSTATUS_0_INIT_MINTERRUPT_STATUS_SHIFT 0U
+#define ROGUE_CR_OCP_IRQSTATUS_0_INIT_MINTERRUPT_STATUS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQSTATUS_0_INIT_MINTERRUPT_STATUS_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQSTATUS_1 */
+#define ROGUE_CR_OCP_IRQSTATUS_1 0x9040U
+#define ROGUE_CR_OCP_IRQSTATUS_1_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQSTATUS_1_TARGET_SINTERRUPT_STATUS_SHIFT 0U
+#define ROGUE_CR_OCP_IRQSTATUS_1_TARGET_SINTERRUPT_STATUS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQSTATUS_1_TARGET_SINTERRUPT_STATUS_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQSTATUS_2 */
+#define ROGUE_CR_OCP_IRQSTATUS_2 0x9048U
+#define ROGUE_CR_OCP_IRQSTATUS_2_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQSTATUS_2_RGX_IRQ_STATUS_SHIFT 0U
+#define ROGUE_CR_OCP_IRQSTATUS_2_RGX_IRQ_STATUS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQSTATUS_2_RGX_IRQ_STATUS_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQENABLE_SET_0 */
+#define ROGUE_CR_OCP_IRQENABLE_SET_0 0x9050U
+#define ROGUE_CR_OCP_IRQENABLE_SET_0_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQENABLE_SET_0_INIT_MINTERRUPT_ENABLE_SHIFT 0U
+#define ROGUE_CR_OCP_IRQENABLE_SET_0_INIT_MINTERRUPT_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQENABLE_SET_0_INIT_MINTERRUPT_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQENABLE_SET_1 */
+#define ROGUE_CR_OCP_IRQENABLE_SET_1 0x9058U
+#define ROGUE_CR_OCP_IRQENABLE_SET_1_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQENABLE_SET_1_TARGET_SINTERRUPT_ENABLE_SHIFT 0U
+#define ROGUE_CR_OCP_IRQENABLE_SET_1_TARGET_SINTERRUPT_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQENABLE_SET_1_TARGET_SINTERRUPT_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQENABLE_SET_2 */
+#define ROGUE_CR_OCP_IRQENABLE_SET_2 0x9060U
+#define ROGUE_CR_OCP_IRQENABLE_SET_2_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQENABLE_SET_2_RGX_IRQ_ENABLE_SHIFT 0U
+#define ROGUE_CR_OCP_IRQENABLE_SET_2_RGX_IRQ_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQENABLE_SET_2_RGX_IRQ_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQENABLE_CLR_0 */
+#define ROGUE_CR_OCP_IRQENABLE_CLR_0 0x9068U
+#define ROGUE_CR_OCP_IRQENABLE_CLR_0_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQENABLE_CLR_0_INIT_MINTERRUPT_DISABLE_SHIFT 0U
+#define ROGUE_CR_OCP_IRQENABLE_CLR_0_INIT_MINTERRUPT_DISABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQENABLE_CLR_0_INIT_MINTERRUPT_DISABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQENABLE_CLR_1 */
+#define ROGUE_CR_OCP_IRQENABLE_CLR_1 0x9070U
+#define ROGUE_CR_OCP_IRQENABLE_CLR_1_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQENABLE_CLR_1_TARGET_SINTERRUPT_DISABLE_SHIFT 0U
+#define ROGUE_CR_OCP_IRQENABLE_CLR_1_TARGET_SINTERRUPT_DISABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQENABLE_CLR_1_TARGET_SINTERRUPT_DISABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQENABLE_CLR_2 */
+#define ROGUE_CR_OCP_IRQENABLE_CLR_2 0x9078U
+#define ROGUE_CR_OCP_IRQENABLE_CLR_2_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_IRQENABLE_CLR_2_RGX_IRQ_DISABLE_SHIFT 0U
+#define ROGUE_CR_OCP_IRQENABLE_CLR_2_RGX_IRQ_DISABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_IRQENABLE_CLR_2_RGX_IRQ_DISABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_IRQ_EVENT */
+#define ROGUE_CR_OCP_IRQ_EVENT 0x9080U
+#define ROGUE_CR_OCP_IRQ_EVENT_MASKFULL 0x00000000000FFFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNEXPECTED_RDATA_SHIFT 19U
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNEXPECTED_RDATA_CLRMSK 0xFFFFFFFFFFF7FFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNEXPECTED_RDATA_EN 0x0000000000080000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNSUPPORTED_MCMD_SHIFT 18U
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNSUPPORTED_MCMD_CLRMSK 0xFFFFFFFFFFFBFFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETH_RCVD_UNSUPPORTED_MCMD_EN 0x0000000000040000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNEXPECTED_RDATA_SHIFT 17U
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNEXPECTED_RDATA_CLRMSK 0xFFFFFFFFFFFDFFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNEXPECTED_RDATA_EN 0x0000000000020000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNSUPPORTED_MCMD_SHIFT 16U
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNSUPPORTED_MCMD_CLRMSK 0xFFFFFFFFFFFEFFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_TARGETS_RCVD_UNSUPPORTED_MCMD_EN 0x0000000000010000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_IMG_PAGE_BOUNDARY_CROSS_SHIFT 15U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFF7FFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000008000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_RESP_ERR_FAIL_SHIFT 14U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFBFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_RESP_ERR_FAIL_EN 0x0000000000004000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_UNUSED_TAGID_SHIFT 13U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFDFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RCVD_UNUSED_TAGID_EN 0x0000000000002000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RDATA_FIFO_OVERFILL_SHIFT 12U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFEFFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT3_RDATA_FIFO_OVERFILL_EN 0x0000000000001000ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_IMG_PAGE_BOUNDARY_CROSS_SHIFT 11U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFFF7FFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000000800ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_RESP_ERR_FAIL_SHIFT 10U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFFBFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_RESP_ERR_FAIL_EN 0x0000000000000400ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_UNUSED_TAGID_SHIFT 9U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFFDFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RCVD_UNUSED_TAGID_EN 0x0000000000000200ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RDATA_FIFO_OVERFILL_SHIFT 8U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFFEFFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT2_RDATA_FIFO_OVERFILL_EN 0x0000000000000100ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_IMG_PAGE_BOUNDARY_CROSS_SHIFT 7U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFFFF7FULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000000080ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_RESP_ERR_FAIL_SHIFT 6U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFFFBFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_RESP_ERR_FAIL_EN 0x0000000000000040ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_UNUSED_TAGID_SHIFT 5U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RCVD_UNUSED_TAGID_EN 0x0000000000000020ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RDATA_FIFO_OVERFILL_SHIFT 4U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFFFEFULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT1_RDATA_FIFO_OVERFILL_EN 0x0000000000000010ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_IMG_PAGE_BOUNDARY_CROSS_SHIFT 3U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_IMG_PAGE_BOUNDARY_CROSS_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_IMG_PAGE_BOUNDARY_CROSS_EN 0x0000000000000008ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_RESP_ERR_FAIL_SHIFT 2U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_RESP_ERR_FAIL_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_RESP_ERR_FAIL_EN 0x0000000000000004ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_UNUSED_TAGID_SHIFT 1U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_UNUSED_TAGID_CLRMSK 0xFFFFFFFFFFFFFFFDULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RCVD_UNUSED_TAGID_EN 0x0000000000000002ULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RDATA_FIFO_OVERFILL_SHIFT 0U
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RDATA_FIFO_OVERFILL_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_OCP_IRQ_EVENT_INIT0_RDATA_FIFO_OVERFILL_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_OCP_DEBUG_CONFIG */
+#define ROGUE_CR_OCP_DEBUG_CONFIG 0x9088U
+#define ROGUE_CR_OCP_DEBUG_CONFIG_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_OCP_DEBUG_CONFIG_REG_SHIFT 0U
+#define ROGUE_CR_OCP_DEBUG_CONFIG_REG_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_OCP_DEBUG_CONFIG_REG_EN 0x00000001U
+
+/* Register ROGUE_CR_OCP_DEBUG_STATUS */
+#define ROGUE_CR_OCP_DEBUG_STATUS 0x9090U
+#define ROGUE_CR_OCP_DEBUG_STATUS_MASKFULL 0x001F1F77FFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SDISCACK_SHIFT 51U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SDISCACK_CLRMSK 0xFFE7FFFFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SCONNECT_SHIFT 50U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SCONNECT_CLRMSK 0xFFFBFFFFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SCONNECT_EN 0x0004000000000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_MCONNECT_SHIFT 48U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_MCONNECT_CLRMSK 0xFFFCFFFFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SDISCACK_SHIFT 43U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SDISCACK_CLRMSK 0xFFFFE7FFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SCONNECT_SHIFT 42U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SCONNECT_CLRMSK 0xFFFFFBFFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SCONNECT_EN 0x0000040000000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_MCONNECT_SHIFT 40U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_MCONNECT_CLRMSK 0xFFFFFCFFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_BUSY_SHIFT 38U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_BUSY_CLRMSK 0xFFFFFFBFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_BUSY_EN 0x0000004000000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_CMD_FIFO_FULL_SHIFT 37U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_CMD_FIFO_FULL_CLRMSK 0xFFFFFFDFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_CMD_FIFO_FULL_EN 0x0000002000000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SRESP_ERROR_SHIFT 36U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SRESP_ERROR_CLRMSK 0xFFFFFFEFFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETH_SRESP_ERROR_EN 0x0000001000000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_BUSY_SHIFT 34U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_BUSY_CLRMSK 0xFFFFFFFBFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_BUSY_EN 0x0000000400000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_CMD_FIFO_FULL_SHIFT 33U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_CMD_FIFO_FULL_CLRMSK 0xFFFFFFFDFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_CMD_FIFO_FULL_EN 0x0000000200000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SRESP_ERROR_SHIFT 32U
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SRESP_ERROR_CLRMSK 0xFFFFFFFEFFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_TARGETS_SRESP_ERROR_EN 0x0000000100000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_RESERVED_SHIFT 31U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_RESERVED_CLRMSK 0xFFFFFFFF7FFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_RESERVED_EN 0x0000000080000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SWAIT_SHIFT 30U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SWAIT_CLRMSK 0xFFFFFFFFBFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SWAIT_EN 0x0000000040000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCREQ_SHIFT 29U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCREQ_CLRMSK 0xFFFFFFFFDFFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCREQ_EN 0x0000000020000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCACK_SHIFT 27U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MDISCACK_CLRMSK 0xFFFFFFFFE7FFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SCONNECT_SHIFT 26U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SCONNECT_CLRMSK 0xFFFFFFFFFBFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_SCONNECT_EN 0x0000000004000000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MCONNECT_SHIFT 24U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT3_MCONNECT_CLRMSK 0xFFFFFFFFFCFFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_RESERVED_SHIFT 23U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_RESERVED_CLRMSK 0xFFFFFFFFFF7FFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_RESERVED_EN 0x0000000000800000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SWAIT_SHIFT 22U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SWAIT_CLRMSK 0xFFFFFFFFFFBFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SWAIT_EN 0x0000000000400000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCREQ_SHIFT 21U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCREQ_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCREQ_EN 0x0000000000200000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCACK_SHIFT 19U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MDISCACK_CLRMSK 0xFFFFFFFFFFE7FFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SCONNECT_SHIFT 18U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SCONNECT_CLRMSK 0xFFFFFFFFFFFBFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_SCONNECT_EN 0x0000000000040000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MCONNECT_SHIFT 16U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT2_MCONNECT_CLRMSK 0xFFFFFFFFFFFCFFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_RESERVED_SHIFT 15U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_RESERVED_CLRMSK 0xFFFFFFFFFFFF7FFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_RESERVED_EN 0x0000000000008000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SWAIT_SHIFT 14U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SWAIT_CLRMSK 0xFFFFFFFFFFFFBFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SWAIT_EN 0x0000000000004000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCREQ_SHIFT 13U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCREQ_CLRMSK 0xFFFFFFFFFFFFDFFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCREQ_EN 0x0000000000002000ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCACK_SHIFT 11U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MDISCACK_CLRMSK 0xFFFFFFFFFFFFE7FFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SCONNECT_SHIFT 10U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SCONNECT_CLRMSK 0xFFFFFFFFFFFFFBFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_SCONNECT_EN 0x0000000000000400ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MCONNECT_SHIFT 8U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT1_MCONNECT_CLRMSK 0xFFFFFFFFFFFFFCFFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_RESERVED_SHIFT 7U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_RESERVED_CLRMSK 0xFFFFFFFFFFFFFF7FULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_RESERVED_EN 0x0000000000000080ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SWAIT_SHIFT 6U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SWAIT_CLRMSK 0xFFFFFFFFFFFFFFBFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SWAIT_EN 0x0000000000000040ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCREQ_SHIFT 5U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCREQ_CLRMSK 0xFFFFFFFFFFFFFFDFULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCREQ_EN 0x0000000000000020ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCACK_SHIFT 3U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MDISCACK_CLRMSK 0xFFFFFFFFFFFFFFE7ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SCONNECT_SHIFT 2U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SCONNECT_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_SCONNECT_EN 0x0000000000000004ULL
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MCONNECT_SHIFT 0U
+#define ROGUE_CR_OCP_DEBUG_STATUS_INIT0_MCONNECT_CLRMSK 0xFFFFFFFFFFFFFFFCULL
+
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PM_ALIST_SHIFT 6U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PM_ALIST_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PM_ALIST_EN 0x00000040U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_HOST_SHIFT 5U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_HOST_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_HOST_EN 0x00000020U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_META_SHIFT 4U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_META_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_META_EN 0x00000010U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_ZLS_SHIFT 3U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_ZLS_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_ZLS_EN 0x00000008U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_TE_SHIFT 2U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_TE_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_TE_EN 0x00000004U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_VCE_SHIFT 1U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_VCE_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_PB_VCE_EN 0x00000002U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_TLA_SHIFT 0U
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_TLA_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_TRUST_DM_TYPE_TLA_EN 0x00000001U
+
+#define ROGUE_CR_BIF_TRUST_DM_MASK 0x0000007FU
+
+/* Register ROGUE_CR_BIF_TRUST */
+#define ROGUE_CR_BIF_TRUST 0xA000U
+#define ROGUE_CR_BIF_TRUST_MASKFULL 0x00000000001FFFFFULL
+#define ROGUE_CR_BIF_TRUST_OTHER_RAY_VERTEX_DM_TRUSTED_SHIFT 20U
+#define ROGUE_CR_BIF_TRUST_OTHER_RAY_VERTEX_DM_TRUSTED_CLRMSK 0xFFEFFFFFU
+#define ROGUE_CR_BIF_TRUST_OTHER_RAY_VERTEX_DM_TRUSTED_EN 0x00100000U
+#define ROGUE_CR_BIF_TRUST_MCU_RAY_VERTEX_DM_TRUSTED_SHIFT 19U
+#define ROGUE_CR_BIF_TRUST_MCU_RAY_VERTEX_DM_TRUSTED_CLRMSK 0xFFF7FFFFU
+#define ROGUE_CR_BIF_TRUST_MCU_RAY_VERTEX_DM_TRUSTED_EN 0x00080000U
+#define ROGUE_CR_BIF_TRUST_OTHER_RAY_DM_TRUSTED_SHIFT 18U
+#define ROGUE_CR_BIF_TRUST_OTHER_RAY_DM_TRUSTED_CLRMSK 0xFFFBFFFFU
+#define ROGUE_CR_BIF_TRUST_OTHER_RAY_DM_TRUSTED_EN 0x00040000U
+#define ROGUE_CR_BIF_TRUST_MCU_RAY_DM_TRUSTED_SHIFT 17U
+#define ROGUE_CR_BIF_TRUST_MCU_RAY_DM_TRUSTED_CLRMSK 0xFFFDFFFFU
+#define ROGUE_CR_BIF_TRUST_MCU_RAY_DM_TRUSTED_EN 0x00020000U
+#define ROGUE_CR_BIF_TRUST_ENABLE_SHIFT 16U
+#define ROGUE_CR_BIF_TRUST_ENABLE_CLRMSK 0xFFFEFFFFU
+#define ROGUE_CR_BIF_TRUST_ENABLE_EN 0x00010000U
+#define ROGUE_CR_BIF_TRUST_DM_TRUSTED_SHIFT 9U
+#define ROGUE_CR_BIF_TRUST_DM_TRUSTED_CLRMSK 0xFFFF01FFU
+#define ROGUE_CR_BIF_TRUST_OTHER_COMPUTE_DM_TRUSTED_SHIFT 8U
+#define ROGUE_CR_BIF_TRUST_OTHER_COMPUTE_DM_TRUSTED_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_BIF_TRUST_OTHER_COMPUTE_DM_TRUSTED_EN 0x00000100U
+#define ROGUE_CR_BIF_TRUST_MCU_COMPUTE_DM_TRUSTED_SHIFT 7U
+#define ROGUE_CR_BIF_TRUST_MCU_COMPUTE_DM_TRUSTED_CLRMSK 0xFFFFFF7FU
+#define ROGUE_CR_BIF_TRUST_MCU_COMPUTE_DM_TRUSTED_EN 0x00000080U
+#define ROGUE_CR_BIF_TRUST_PBE_COMPUTE_DM_TRUSTED_SHIFT 6U
+#define ROGUE_CR_BIF_TRUST_PBE_COMPUTE_DM_TRUSTED_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_BIF_TRUST_PBE_COMPUTE_DM_TRUSTED_EN 0x00000040U
+#define ROGUE_CR_BIF_TRUST_OTHER_PIXEL_DM_TRUSTED_SHIFT 5U
+#define ROGUE_CR_BIF_TRUST_OTHER_PIXEL_DM_TRUSTED_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_BIF_TRUST_OTHER_PIXEL_DM_TRUSTED_EN 0x00000020U
+#define ROGUE_CR_BIF_TRUST_MCU_PIXEL_DM_TRUSTED_SHIFT 4U
+#define ROGUE_CR_BIF_TRUST_MCU_PIXEL_DM_TRUSTED_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_BIF_TRUST_MCU_PIXEL_DM_TRUSTED_EN 0x00000010U
+#define ROGUE_CR_BIF_TRUST_PBE_PIXEL_DM_TRUSTED_SHIFT 3U
+#define ROGUE_CR_BIF_TRUST_PBE_PIXEL_DM_TRUSTED_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_BIF_TRUST_PBE_PIXEL_DM_TRUSTED_EN 0x00000008U
+#define ROGUE_CR_BIF_TRUST_OTHER_VERTEX_DM_TRUSTED_SHIFT 2U
+#define ROGUE_CR_BIF_TRUST_OTHER_VERTEX_DM_TRUSTED_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_BIF_TRUST_OTHER_VERTEX_DM_TRUSTED_EN 0x00000004U
+#define ROGUE_CR_BIF_TRUST_MCU_VERTEX_DM_TRUSTED_SHIFT 1U
+#define ROGUE_CR_BIF_TRUST_MCU_VERTEX_DM_TRUSTED_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_BIF_TRUST_MCU_VERTEX_DM_TRUSTED_EN 0x00000002U
+#define ROGUE_CR_BIF_TRUST_PBE_VERTEX_DM_TRUSTED_SHIFT 0U
+#define ROGUE_CR_BIF_TRUST_PBE_VERTEX_DM_TRUSTED_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_BIF_TRUST_PBE_VERTEX_DM_TRUSTED_EN 0x00000001U
+
+/* Register ROGUE_CR_SYS_BUS_SECURE */
+#define ROGUE_CR_SYS_BUS_SECURE 0xA100U
+#define ROGUE_CR_SYS_BUS_SECURE__SECR__MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_SYS_BUS_SECURE_MASKFULL 0x0000000000000001ULL
+#define ROGUE_CR_SYS_BUS_SECURE_ENABLE_SHIFT 0U
+#define ROGUE_CR_SYS_BUS_SECURE_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SYS_BUS_SECURE_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_FBA_FC0_CHECKSUM */
+#define ROGUE_CR_FBA_FC0_CHECKSUM 0xD170U
+#define ROGUE_CR_FBA_FC0_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_FBA_FC0_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_FBA_FC0_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_FBA_FC1_CHECKSUM */
+#define ROGUE_CR_FBA_FC1_CHECKSUM 0xD178U
+#define ROGUE_CR_FBA_FC1_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_FBA_FC1_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_FBA_FC1_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_FBA_FC2_CHECKSUM */
+#define ROGUE_CR_FBA_FC2_CHECKSUM 0xD180U
+#define ROGUE_CR_FBA_FC2_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_FBA_FC2_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_FBA_FC2_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_FBA_FC3_CHECKSUM */
+#define ROGUE_CR_FBA_FC3_CHECKSUM 0xD188U
+#define ROGUE_CR_FBA_FC3_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_FBA_FC3_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_FBA_FC3_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_CLK_CTRL2 */
+#define ROGUE_CR_CLK_CTRL2 0xD200U
+#define ROGUE_CR_CLK_CTRL2_MASKFULL 0x0000000000000F33ULL
+#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_SHIFT 10U
+#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_CLRMSK 0xFFFFFFFFFFFFF3FFULL
+#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_ON 0x0000000000000400ULL
+#define ROGUE_CR_CLK_CTRL2_MCU_FBTC_AUTO 0x0000000000000800ULL
+#define ROGUE_CR_CLK_CTRL2_VRDM_SHIFT 8U
+#define ROGUE_CR_CLK_CTRL2_VRDM_CLRMSK 0xFFFFFFFFFFFFFCFFULL
+#define ROGUE_CR_CLK_CTRL2_VRDM_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL2_VRDM_ON 0x0000000000000100ULL
+#define ROGUE_CR_CLK_CTRL2_VRDM_AUTO 0x0000000000000200ULL
+#define ROGUE_CR_CLK_CTRL2_SH_SHIFT 4U
+#define ROGUE_CR_CLK_CTRL2_SH_CLRMSK 0xFFFFFFFFFFFFFFCFULL
+#define ROGUE_CR_CLK_CTRL2_SH_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL2_SH_ON 0x0000000000000010ULL
+#define ROGUE_CR_CLK_CTRL2_SH_AUTO 0x0000000000000020ULL
+#define ROGUE_CR_CLK_CTRL2_FBA_SHIFT 0U
+#define ROGUE_CR_CLK_CTRL2_FBA_CLRMSK 0xFFFFFFFFFFFFFFFCULL
+#define ROGUE_CR_CLK_CTRL2_FBA_OFF 0x0000000000000000ULL
+#define ROGUE_CR_CLK_CTRL2_FBA_ON 0x0000000000000001ULL
+#define ROGUE_CR_CLK_CTRL2_FBA_AUTO 0x0000000000000002ULL
+
+/* Register ROGUE_CR_CLK_STATUS2 */
+#define ROGUE_CR_CLK_STATUS2 0xD208U
+#define ROGUE_CR_CLK_STATUS2_MASKFULL 0x0000000000000015ULL
+#define ROGUE_CR_CLK_STATUS2_VRDM_SHIFT 4U
+#define ROGUE_CR_CLK_STATUS2_VRDM_CLRMSK 0xFFFFFFFFFFFFFFEFULL
+#define ROGUE_CR_CLK_STATUS2_VRDM_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS2_VRDM_RUNNING 0x0000000000000010ULL
+#define ROGUE_CR_CLK_STATUS2_SH_SHIFT 2U
+#define ROGUE_CR_CLK_STATUS2_SH_CLRMSK 0xFFFFFFFFFFFFFFFBULL
+#define ROGUE_CR_CLK_STATUS2_SH_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS2_SH_RUNNING 0x0000000000000004ULL
+#define ROGUE_CR_CLK_STATUS2_FBA_SHIFT 0U
+#define ROGUE_CR_CLK_STATUS2_FBA_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_CLK_STATUS2_FBA_GATED 0x0000000000000000ULL
+#define ROGUE_CR_CLK_STATUS2_FBA_RUNNING 0x0000000000000001ULL
+
+/* Register ROGUE_CR_RPM_SHF_FPL */
+#define ROGUE_CR_RPM_SHF_FPL 0xD520U
+#define ROGUE_CR_RPM_SHF_FPL_MASKFULL 0x3FFFFFFFFFFFFFFCULL
+#define ROGUE_CR_RPM_SHF_FPL_SIZE_SHIFT 40U
+#define ROGUE_CR_RPM_SHF_FPL_SIZE_CLRMSK 0xC00000FFFFFFFFFFULL
+#define ROGUE_CR_RPM_SHF_FPL_BASE_SHIFT 2U
+#define ROGUE_CR_RPM_SHF_FPL_BASE_CLRMSK 0xFFFFFF0000000003ULL
+#define ROGUE_CR_RPM_SHF_FPL_BASE_ALIGNSHIFT 2U
+#define ROGUE_CR_RPM_SHF_FPL_BASE_ALIGNSIZE 4U
+
+/* Register ROGUE_CR_RPM_SHF_FPL_READ */
+#define ROGUE_CR_RPM_SHF_FPL_READ 0xD528U
+#define ROGUE_CR_RPM_SHF_FPL_READ_MASKFULL 0x00000000007FFFFFULL
+#define ROGUE_CR_RPM_SHF_FPL_READ_TOGGLE_SHIFT 22U
+#define ROGUE_CR_RPM_SHF_FPL_READ_TOGGLE_CLRMSK 0xFFBFFFFFU
+#define ROGUE_CR_RPM_SHF_FPL_READ_TOGGLE_EN 0x00400000U
+#define ROGUE_CR_RPM_SHF_FPL_READ_OFFSET_SHIFT 0U
+#define ROGUE_CR_RPM_SHF_FPL_READ_OFFSET_CLRMSK 0xFFC00000U
+
+/* Register ROGUE_CR_RPM_SHF_FPL_WRITE */
+#define ROGUE_CR_RPM_SHF_FPL_WRITE 0xD530U
+#define ROGUE_CR_RPM_SHF_FPL_WRITE_MASKFULL 0x00000000007FFFFFULL
+#define ROGUE_CR_RPM_SHF_FPL_WRITE_TOGGLE_SHIFT 22U
+#define ROGUE_CR_RPM_SHF_FPL_WRITE_TOGGLE_CLRMSK 0xFFBFFFFFU
+#define ROGUE_CR_RPM_SHF_FPL_WRITE_TOGGLE_EN 0x00400000U
+#define ROGUE_CR_RPM_SHF_FPL_WRITE_OFFSET_SHIFT 0U
+#define ROGUE_CR_RPM_SHF_FPL_WRITE_OFFSET_CLRMSK 0xFFC00000U
+
+/* Register ROGUE_CR_RPM_SHG_FPL */
+#define ROGUE_CR_RPM_SHG_FPL 0xD538U
+#define ROGUE_CR_RPM_SHG_FPL_MASKFULL 0x3FFFFFFFFFFFFFFCULL
+#define ROGUE_CR_RPM_SHG_FPL_SIZE_SHIFT 40U
+#define ROGUE_CR_RPM_SHG_FPL_SIZE_CLRMSK 0xC00000FFFFFFFFFFULL
+#define ROGUE_CR_RPM_SHG_FPL_BASE_SHIFT 2U
+#define ROGUE_CR_RPM_SHG_FPL_BASE_CLRMSK 0xFFFFFF0000000003ULL
+#define ROGUE_CR_RPM_SHG_FPL_BASE_ALIGNSHIFT 2U
+#define ROGUE_CR_RPM_SHG_FPL_BASE_ALIGNSIZE 4U
+
+/* Register ROGUE_CR_RPM_SHG_FPL_READ */
+#define ROGUE_CR_RPM_SHG_FPL_READ 0xD540U
+#define ROGUE_CR_RPM_SHG_FPL_READ_MASKFULL 0x00000000007FFFFFULL
+#define ROGUE_CR_RPM_SHG_FPL_READ_TOGGLE_SHIFT 22U
+#define ROGUE_CR_RPM_SHG_FPL_READ_TOGGLE_CLRMSK 0xFFBFFFFFU
+#define ROGUE_CR_RPM_SHG_FPL_READ_TOGGLE_EN 0x00400000U
+#define ROGUE_CR_RPM_SHG_FPL_READ_OFFSET_SHIFT 0U
+#define ROGUE_CR_RPM_SHG_FPL_READ_OFFSET_CLRMSK 0xFFC00000U
+
+/* Register ROGUE_CR_RPM_SHG_FPL_WRITE */
+#define ROGUE_CR_RPM_SHG_FPL_WRITE 0xD548U
+#define ROGUE_CR_RPM_SHG_FPL_WRITE_MASKFULL 0x00000000007FFFFFULL
+#define ROGUE_CR_RPM_SHG_FPL_WRITE_TOGGLE_SHIFT 22U
+#define ROGUE_CR_RPM_SHG_FPL_WRITE_TOGGLE_CLRMSK 0xFFBFFFFFU
+#define ROGUE_CR_RPM_SHG_FPL_WRITE_TOGGLE_EN 0x00400000U
+#define ROGUE_CR_RPM_SHG_FPL_WRITE_OFFSET_SHIFT 0U
+#define ROGUE_CR_RPM_SHG_FPL_WRITE_OFFSET_CLRMSK 0xFFC00000U
+
+/* Register ROGUE_CR_SH_PERF */
+#define ROGUE_CR_SH_PERF 0xD5F8U
+#define ROGUE_CR_SH_PERF_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_SH_PERF_CLR_3_SHIFT 4U
+#define ROGUE_CR_SH_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SH_PERF_CLR_3_EN 0x00000010U
+#define ROGUE_CR_SH_PERF_CLR_2_SHIFT 3U
+#define ROGUE_CR_SH_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SH_PERF_CLR_2_EN 0x00000008U
+#define ROGUE_CR_SH_PERF_CLR_1_SHIFT 2U
+#define ROGUE_CR_SH_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SH_PERF_CLR_1_EN 0x00000004U
+#define ROGUE_CR_SH_PERF_CLR_0_SHIFT 1U
+#define ROGUE_CR_SH_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SH_PERF_CLR_0_EN 0x00000002U
+#define ROGUE_CR_SH_PERF_CTRL_ENABLE_SHIFT 0U
+#define ROGUE_CR_SH_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SH_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register ROGUE_CR_SH_PERF_SELECT0 */
+#define ROGUE_CR_SH_PERF_SELECT0 0xD600U
+#define ROGUE_CR_SH_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define ROGUE_CR_SH_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define ROGUE_CR_SH_PERF_SELECT0_MODE_SHIFT 21U
+#define ROGUE_CR_SH_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define ROGUE_CR_SH_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define ROGUE_CR_SH_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define ROGUE_CR_SH_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define ROGUE_CR_SH_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define ROGUE_CR_SH_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_SH_PERF_COUNTER_0 */
+#define ROGUE_CR_SH_PERF_COUNTER_0 0xD628U
+#define ROGUE_CR_SH_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SH_PERF_COUNTER_0_REG_SHIFT 0U
+#define ROGUE_CR_SH_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SHF_SHG_CHECKSUM */
+#define ROGUE_CR_SHF_SHG_CHECKSUM 0xD1C0U
+#define ROGUE_CR_SHF_SHG_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SHF_SHG_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_SHF_SHG_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM */
+#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM 0xD1C8U
+#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_SHF_VERTEX_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SHF_VARY_BIF_CHECKSUM */
+#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM 0xD1D0U
+#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_SHF_VARY_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_RPM_BIF_CHECKSUM */
+#define ROGUE_CR_RPM_BIF_CHECKSUM 0xD1D8U
+#define ROGUE_CR_RPM_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_RPM_BIF_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_RPM_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SHG_BIF_CHECKSUM */
+#define ROGUE_CR_SHG_BIF_CHECKSUM 0xD1E0U
+#define ROGUE_CR_SHG_BIF_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SHG_BIF_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_SHG_BIF_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register ROGUE_CR_SHG_FE_BE_CHECKSUM */
+#define ROGUE_CR_SHG_FE_BE_CHECKSUM 0xD1E8U
+#define ROGUE_CR_SHG_FE_BE_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_SHG_FE_BE_CHECKSUM_VALUE_SHIFT 0U
+#define ROGUE_CR_SHG_FE_BE_CHECKSUM_VALUE_CLRMSK 0x00000000U
+
+/* Register DPX_CR_BF_PERF */
+#define DPX_CR_BF_PERF 0xC458U
+#define DPX_CR_BF_PERF_MASKFULL 0x000000000000001FULL
+#define DPX_CR_BF_PERF_CLR_3_SHIFT 4U
+#define DPX_CR_BF_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define DPX_CR_BF_PERF_CLR_3_EN 0x00000010U
+#define DPX_CR_BF_PERF_CLR_2_SHIFT 3U
+#define DPX_CR_BF_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define DPX_CR_BF_PERF_CLR_2_EN 0x00000008U
+#define DPX_CR_BF_PERF_CLR_1_SHIFT 2U
+#define DPX_CR_BF_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define DPX_CR_BF_PERF_CLR_1_EN 0x00000004U
+#define DPX_CR_BF_PERF_CLR_0_SHIFT 1U
+#define DPX_CR_BF_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define DPX_CR_BF_PERF_CLR_0_EN 0x00000002U
+#define DPX_CR_BF_PERF_CTRL_ENABLE_SHIFT 0U
+#define DPX_CR_BF_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define DPX_CR_BF_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register DPX_CR_BF_PERF_SELECT0 */
+#define DPX_CR_BF_PERF_SELECT0 0xC460U
+#define DPX_CR_BF_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define DPX_CR_BF_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define DPX_CR_BF_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define DPX_CR_BF_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define DPX_CR_BF_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define DPX_CR_BF_PERF_SELECT0_MODE_SHIFT 21U
+#define DPX_CR_BF_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define DPX_CR_BF_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define DPX_CR_BF_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define DPX_CR_BF_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define DPX_CR_BF_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define DPX_CR_BF_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register DPX_CR_BF_PERF_COUNTER_0 */
+#define DPX_CR_BF_PERF_COUNTER_0 0xC488U
+#define DPX_CR_BF_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define DPX_CR_BF_PERF_COUNTER_0_REG_SHIFT 0U
+#define DPX_CR_BF_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register DPX_CR_BT_PERF */
+#define DPX_CR_BT_PERF 0xC3D0U
+#define DPX_CR_BT_PERF_MASKFULL 0x000000000000001FULL
+#define DPX_CR_BT_PERF_CLR_3_SHIFT 4U
+#define DPX_CR_BT_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define DPX_CR_BT_PERF_CLR_3_EN 0x00000010U
+#define DPX_CR_BT_PERF_CLR_2_SHIFT 3U
+#define DPX_CR_BT_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define DPX_CR_BT_PERF_CLR_2_EN 0x00000008U
+#define DPX_CR_BT_PERF_CLR_1_SHIFT 2U
+#define DPX_CR_BT_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define DPX_CR_BT_PERF_CLR_1_EN 0x00000004U
+#define DPX_CR_BT_PERF_CLR_0_SHIFT 1U
+#define DPX_CR_BT_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define DPX_CR_BT_PERF_CLR_0_EN 0x00000002U
+#define DPX_CR_BT_PERF_CTRL_ENABLE_SHIFT 0U
+#define DPX_CR_BT_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define DPX_CR_BT_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register DPX_CR_BT_PERF_SELECT0 */
+#define DPX_CR_BT_PERF_SELECT0 0xC3D8U
+#define DPX_CR_BT_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define DPX_CR_BT_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define DPX_CR_BT_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define DPX_CR_BT_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define DPX_CR_BT_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define DPX_CR_BT_PERF_SELECT0_MODE_SHIFT 21U
+#define DPX_CR_BT_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define DPX_CR_BT_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define DPX_CR_BT_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define DPX_CR_BT_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define DPX_CR_BT_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define DPX_CR_BT_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register DPX_CR_BT_PERF_COUNTER_0 */
+#define DPX_CR_BT_PERF_COUNTER_0 0xC420U
+#define DPX_CR_BT_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define DPX_CR_BT_PERF_COUNTER_0_REG_SHIFT 0U
+#define DPX_CR_BT_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register DPX_CR_RQ_USC_DEBUG */
+#define DPX_CR_RQ_USC_DEBUG 0xC110U
+#define DPX_CR_RQ_USC_DEBUG_MASKFULL 0x00000000FFFFFFFFULL
+#define DPX_CR_RQ_USC_DEBUG_CHECKSUM_SHIFT 0U
+#define DPX_CR_RQ_USC_DEBUG_CHECKSUM_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register DPX_CR_BIF_FAULT_BANK_MMU_STATUS */
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS 0xC5C8U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_MASKFULL 0x000000000000F775ULL
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_CAT_BASE_SHIFT 12U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_CAT_BASE_CLRMSK 0xFFFF0FFFU
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_PAGE_SIZE_SHIFT 8U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_PAGE_SIZE_CLRMSK 0xFFFFF8FFU
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_DATA_TYPE_SHIFT 5U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_DATA_TYPE_CLRMSK 0xFFFFFF9FU
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_RO_SHIFT 4U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_RO_CLRMSK 0xFFFFFFEFU
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_RO_EN 0x00000010U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_PM_META_RO_SHIFT 2U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_PM_META_RO_CLRMSK 0xFFFFFFFBU
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_PM_META_RO_EN 0x00000004U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_SHIFT 0U
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_CLRMSK 0xFFFFFFFEU
+#define DPX_CR_BIF_FAULT_BANK_MMU_STATUS_FAULT_EN 0x00000001U
+
+/* Register DPX_CR_BIF_FAULT_BANK_REQ_STATUS */
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS 0xC5D0U
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_MASKFULL 0x03FFFFFFFFFFFFF0ULL
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_RNW_SHIFT 57U
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_RNW_CLRMSK 0xFDFFFFFFFFFFFFFFULL
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_RNW_EN 0x0200000000000000ULL
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_SB_SHIFT 44U
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_SB_CLRMSK 0xFE000FFFFFFFFFFFULL
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_ID_SHIFT 40U
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_TAG_ID_CLRMSK 0xFFFFF0FFFFFFFFFFULL
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_SHIFT 4U
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_CLRMSK 0xFFFFFF000000000FULL
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_ALIGNSHIFT 4U
+#define DPX_CR_BIF_FAULT_BANK_REQ_STATUS_ADDRESS_ALIGNSIZE 16U
+
+/* Register DPX_CR_BIF_MMU_STATUS */
+#define DPX_CR_BIF_MMU_STATUS 0xC5D8U
+#define DPX_CR_BIF_MMU_STATUS_MASKFULL 0x000000000FFFFFF7ULL
+#define DPX_CR_BIF_MMU_STATUS_PC_DATA_SHIFT 20U
+#define DPX_CR_BIF_MMU_STATUS_PC_DATA_CLRMSK 0xF00FFFFFU
+#define DPX_CR_BIF_MMU_STATUS_PD_DATA_SHIFT 12U
+#define DPX_CR_BIF_MMU_STATUS_PD_DATA_CLRMSK 0xFFF00FFFU
+#define DPX_CR_BIF_MMU_STATUS_PT_DATA_SHIFT 4U
+#define DPX_CR_BIF_MMU_STATUS_PT_DATA_CLRMSK 0xFFFFF00FU
+#define DPX_CR_BIF_MMU_STATUS_STALLED_SHIFT 2U
+#define DPX_CR_BIF_MMU_STATUS_STALLED_CLRMSK 0xFFFFFFFBU
+#define DPX_CR_BIF_MMU_STATUS_STALLED_EN 0x00000004U
+#define DPX_CR_BIF_MMU_STATUS_PAUSED_SHIFT 1U
+#define DPX_CR_BIF_MMU_STATUS_PAUSED_CLRMSK 0xFFFFFFFDU
+#define DPX_CR_BIF_MMU_STATUS_PAUSED_EN 0x00000002U
+#define DPX_CR_BIF_MMU_STATUS_BUSY_SHIFT 0U
+#define DPX_CR_BIF_MMU_STATUS_BUSY_CLRMSK 0xFFFFFFFEU
+#define DPX_CR_BIF_MMU_STATUS_BUSY_EN 0x00000001U
+
+/* Register DPX_CR_RT_PERF */
+#define DPX_CR_RT_PERF 0xC700U
+#define DPX_CR_RT_PERF_MASKFULL 0x000000000000001FULL
+#define DPX_CR_RT_PERF_CLR_3_SHIFT 4U
+#define DPX_CR_RT_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define DPX_CR_RT_PERF_CLR_3_EN 0x00000010U
+#define DPX_CR_RT_PERF_CLR_2_SHIFT 3U
+#define DPX_CR_RT_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define DPX_CR_RT_PERF_CLR_2_EN 0x00000008U
+#define DPX_CR_RT_PERF_CLR_1_SHIFT 2U
+#define DPX_CR_RT_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define DPX_CR_RT_PERF_CLR_1_EN 0x00000004U
+#define DPX_CR_RT_PERF_CLR_0_SHIFT 1U
+#define DPX_CR_RT_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define DPX_CR_RT_PERF_CLR_0_EN 0x00000002U
+#define DPX_CR_RT_PERF_CTRL_ENABLE_SHIFT 0U
+#define DPX_CR_RT_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define DPX_CR_RT_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register DPX_CR_RT_PERF_SELECT0 */
+#define DPX_CR_RT_PERF_SELECT0 0xC708U
+#define DPX_CR_RT_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define DPX_CR_RT_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define DPX_CR_RT_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define DPX_CR_RT_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define DPX_CR_RT_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define DPX_CR_RT_PERF_SELECT0_MODE_SHIFT 21U
+#define DPX_CR_RT_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define DPX_CR_RT_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define DPX_CR_RT_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define DPX_CR_RT_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define DPX_CR_RT_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define DPX_CR_RT_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register DPX_CR_RT_PERF_COUNTER_0 */
+#define DPX_CR_RT_PERF_COUNTER_0 0xC730U
+#define DPX_CR_RT_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define DPX_CR_RT_PERF_COUNTER_0_REG_SHIFT 0U
+#define DPX_CR_RT_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register DPX_CR_BX_TU_PERF */
+#define DPX_CR_BX_TU_PERF 0xC908U
+#define DPX_CR_BX_TU_PERF_MASKFULL 0x000000000000001FULL
+#define DPX_CR_BX_TU_PERF_CLR_3_SHIFT 4U
+#define DPX_CR_BX_TU_PERF_CLR_3_CLRMSK 0xFFFFFFEFU
+#define DPX_CR_BX_TU_PERF_CLR_3_EN 0x00000010U
+#define DPX_CR_BX_TU_PERF_CLR_2_SHIFT 3U
+#define DPX_CR_BX_TU_PERF_CLR_2_CLRMSK 0xFFFFFFF7U
+#define DPX_CR_BX_TU_PERF_CLR_2_EN 0x00000008U
+#define DPX_CR_BX_TU_PERF_CLR_1_SHIFT 2U
+#define DPX_CR_BX_TU_PERF_CLR_1_CLRMSK 0xFFFFFFFBU
+#define DPX_CR_BX_TU_PERF_CLR_1_EN 0x00000004U
+#define DPX_CR_BX_TU_PERF_CLR_0_SHIFT 1U
+#define DPX_CR_BX_TU_PERF_CLR_0_CLRMSK 0xFFFFFFFDU
+#define DPX_CR_BX_TU_PERF_CLR_0_EN 0x00000002U
+#define DPX_CR_BX_TU_PERF_CTRL_ENABLE_SHIFT 0U
+#define DPX_CR_BX_TU_PERF_CTRL_ENABLE_CLRMSK 0xFFFFFFFEU
+#define DPX_CR_BX_TU_PERF_CTRL_ENABLE_EN 0x00000001U
+
+/* Register DPX_CR_BX_TU_PERF_SELECT0 */
+#define DPX_CR_BX_TU_PERF_SELECT0 0xC910U
+#define DPX_CR_BX_TU_PERF_SELECT0_MASKFULL 0x3FFF3FFF003FFFFFULL
+#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MAX_SHIFT 48U
+#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MAX_CLRMSK 0xC000FFFFFFFFFFFFULL
+#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MIN_SHIFT 32U
+#define DPX_CR_BX_TU_PERF_SELECT0_BATCH_MIN_CLRMSK 0xFFFFC000FFFFFFFFULL
+#define DPX_CR_BX_TU_PERF_SELECT0_MODE_SHIFT 21U
+#define DPX_CR_BX_TU_PERF_SELECT0_MODE_CLRMSK 0xFFFFFFFFFFDFFFFFULL
+#define DPX_CR_BX_TU_PERF_SELECT0_MODE_EN 0x0000000000200000ULL
+#define DPX_CR_BX_TU_PERF_SELECT0_GROUP_SELECT_SHIFT 16U
+#define DPX_CR_BX_TU_PERF_SELECT0_GROUP_SELECT_CLRMSK 0xFFFFFFFFFFE0FFFFULL
+#define DPX_CR_BX_TU_PERF_SELECT0_BIT_SELECT_SHIFT 0U
+#define DPX_CR_BX_TU_PERF_SELECT0_BIT_SELECT_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register DPX_CR_BX_TU_PERF_COUNTER_0 */
+#define DPX_CR_BX_TU_PERF_COUNTER_0 0xC938U
+#define DPX_CR_BX_TU_PERF_COUNTER_0_MASKFULL 0x00000000FFFFFFFFULL
+#define DPX_CR_BX_TU_PERF_COUNTER_0_REG_SHIFT 0U
+#define DPX_CR_BX_TU_PERF_COUNTER_0_REG_CLRMSK 0x00000000U
+
+/* Register DPX_CR_RS_PDS_RR_CHECKSUM */
+#define DPX_CR_RS_PDS_RR_CHECKSUM 0xC0F0U
+#define DPX_CR_RS_PDS_RR_CHECKSUM_MASKFULL 0x00000000FFFFFFFFULL
+#define DPX_CR_RS_PDS_RR_CHECKSUM_VALUE_SHIFT 0U
+#define DPX_CR_RS_PDS_RR_CHECKSUM_VALUE_CLRMSK 0xFFFFFFFF00000000ULL
+
+/* Register ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT */
+#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT 0xE140U
+#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT_MASKFULL 0x00000000000000FFULL
+#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT_ID_SHIFT 0U
+#define ROGUE_CR_MMU_CBASE_MAPPING_CONTEXT_ID_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_MMU_CBASE_MAPPING */
+#define ROGUE_CR_MMU_CBASE_MAPPING 0xE148U
+#define ROGUE_CR_MMU_CBASE_MAPPING_MASKFULL 0x000000000FFFFFFFULL
+#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_SHIFT 0U
+#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_CLRMSK 0xF0000000U
+#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_ALIGNSHIFT 12U
+#define ROGUE_CR_MMU_CBASE_MAPPING_BASE_ADDR_ALIGNSIZE 4096U
+
+/* Register ROGUE_CR_MMU_FAULT_STATUS */
+#define ROGUE_CR_MMU_FAULT_STATUS 0xE150U
+#define ROGUE_CR_MMU_FAULT_STATUS_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_ADDRESS_SHIFT 28U
+#define ROGUE_CR_MMU_FAULT_STATUS_ADDRESS_CLRMSK 0x000000000FFFFFFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_CONTEXT_SHIFT 20U
+#define ROGUE_CR_MMU_FAULT_STATUS_CONTEXT_CLRMSK 0xFFFFFFFFF00FFFFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_TAG_SB_SHIFT 12U
+#define ROGUE_CR_MMU_FAULT_STATUS_TAG_SB_CLRMSK 0xFFFFFFFFFFF00FFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_REQ_ID_SHIFT 6U
+#define ROGUE_CR_MMU_FAULT_STATUS_REQ_ID_CLRMSK 0xFFFFFFFFFFFFF03FULL
+#define ROGUE_CR_MMU_FAULT_STATUS_LEVEL_SHIFT 4U
+#define ROGUE_CR_MMU_FAULT_STATUS_LEVEL_CLRMSK 0xFFFFFFFFFFFFFFCFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_RNW_SHIFT 3U
+#define ROGUE_CR_MMU_FAULT_STATUS_RNW_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_MMU_FAULT_STATUS_RNW_EN 0x0000000000000008ULL
+#define ROGUE_CR_MMU_FAULT_STATUS_TYPE_SHIFT 1U
+#define ROGUE_CR_MMU_FAULT_STATUS_TYPE_CLRMSK 0xFFFFFFFFFFFFFFF9ULL
+#define ROGUE_CR_MMU_FAULT_STATUS_FAULT_SHIFT 0U
+#define ROGUE_CR_MMU_FAULT_STATUS_FAULT_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MMU_FAULT_STATUS_FAULT_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_MMU_FAULT_STATUS_META */
+#define ROGUE_CR_MMU_FAULT_STATUS_META 0xE158U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_ADDRESS_SHIFT 28U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_ADDRESS_CLRMSK 0x000000000FFFFFFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_CONTEXT_SHIFT 20U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_CONTEXT_CLRMSK 0xFFFFFFFFF00FFFFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_TAG_SB_SHIFT 12U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_TAG_SB_CLRMSK 0xFFFFFFFFFFF00FFFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_REQ_ID_SHIFT 6U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_REQ_ID_CLRMSK 0xFFFFFFFFFFFFF03FULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_LEVEL_SHIFT 4U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_LEVEL_CLRMSK 0xFFFFFFFFFFFFFFCFULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_RNW_SHIFT 3U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_RNW_CLRMSK 0xFFFFFFFFFFFFFFF7ULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_RNW_EN 0x0000000000000008ULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_TYPE_SHIFT 1U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_TYPE_CLRMSK 0xFFFFFFFFFFFFFFF9ULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_FAULT_SHIFT 0U
+#define ROGUE_CR_MMU_FAULT_STATUS_META_FAULT_CLRMSK 0xFFFFFFFFFFFFFFFEULL
+#define ROGUE_CR_MMU_FAULT_STATUS_META_FAULT_EN 0x0000000000000001ULL
+
+/* Register ROGUE_CR_SLC3_CTRL_MISC */
+#define ROGUE_CR_SLC3_CTRL_MISC 0xE200U
+#define ROGUE_CR_SLC3_CTRL_MISC_MASKFULL 0x0000000000000107ULL
+#define ROGUE_CR_SLC3_CTRL_MISC_WRITE_COMBINER_SHIFT 8U
+#define ROGUE_CR_SLC3_CTRL_MISC_WRITE_COMBINER_CLRMSK 0xFFFFFEFFU
+#define ROGUE_CR_SLC3_CTRL_MISC_WRITE_COMBINER_EN 0x00000100U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_SHIFT 0U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_CLRMSK 0xFFFFFFF8U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_LINEAR 0x00000000U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_IN_PAGE_HASH 0x00000001U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_FIXED_PVR_HASH 0x00000002U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_SCRAMBLE_PVR_HASH 0x00000003U
+#define ROGUE_CR_SLC3_CTRL_MISC_ADDR_DECODE_MODE_WEAVED_HASH 0x00000004U
+
+/* Register ROGUE_CR_SLC3_SCRAMBLE */
+#define ROGUE_CR_SLC3_SCRAMBLE 0xE208U
+#define ROGUE_CR_SLC3_SCRAMBLE_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC3_SCRAMBLE_BITS_SHIFT 0U
+#define ROGUE_CR_SLC3_SCRAMBLE_BITS_CLRMSK 0x0000000000000000ULL
+
+/* Register ROGUE_CR_SLC3_SCRAMBLE2 */
+#define ROGUE_CR_SLC3_SCRAMBLE2 0xE210U
+#define ROGUE_CR_SLC3_SCRAMBLE2_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC3_SCRAMBLE2_BITS_SHIFT 0U
+#define ROGUE_CR_SLC3_SCRAMBLE2_BITS_CLRMSK 0x0000000000000000ULL
+
+/* Register ROGUE_CR_SLC3_SCRAMBLE3 */
+#define ROGUE_CR_SLC3_SCRAMBLE3 0xE218U
+#define ROGUE_CR_SLC3_SCRAMBLE3_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC3_SCRAMBLE3_BITS_SHIFT 0U
+#define ROGUE_CR_SLC3_SCRAMBLE3_BITS_CLRMSK 0x0000000000000000ULL
+
+/* Register ROGUE_CR_SLC3_SCRAMBLE4 */
+#define ROGUE_CR_SLC3_SCRAMBLE4 0xE260U
+#define ROGUE_CR_SLC3_SCRAMBLE4_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC3_SCRAMBLE4_BITS_SHIFT 0U
+#define ROGUE_CR_SLC3_SCRAMBLE4_BITS_CLRMSK 0x0000000000000000ULL
+
+/* Register ROGUE_CR_SLC3_STATUS */
+#define ROGUE_CR_SLC3_STATUS 0xE220U
+#define ROGUE_CR_SLC3_STATUS_MASKFULL 0xFFFFFFFFFFFFFFFFULL
+#define ROGUE_CR_SLC3_STATUS_WRITES1_SHIFT 48U
+#define ROGUE_CR_SLC3_STATUS_WRITES1_CLRMSK 0x0000FFFFFFFFFFFFULL
+#define ROGUE_CR_SLC3_STATUS_WRITES0_SHIFT 32U
+#define ROGUE_CR_SLC3_STATUS_WRITES0_CLRMSK 0xFFFF0000FFFFFFFFULL
+#define ROGUE_CR_SLC3_STATUS_READS1_SHIFT 16U
+#define ROGUE_CR_SLC3_STATUS_READS1_CLRMSK 0xFFFFFFFF0000FFFFULL
+#define ROGUE_CR_SLC3_STATUS_READS0_SHIFT 0U
+#define ROGUE_CR_SLC3_STATUS_READS0_CLRMSK 0xFFFFFFFFFFFF0000ULL
+
+/* Register ROGUE_CR_SLC3_IDLE */
+#define ROGUE_CR_SLC3_IDLE 0xE228U
+#define ROGUE_CR_SLC3_IDLE_MASKFULL 0x00000000000FFFFFULL
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST2_SHIFT 18U
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST2_CLRMSK 0xFFF3FFFFU
+#define ROGUE_CR_SLC3_IDLE_MMU_SHIFT 17U
+#define ROGUE_CR_SLC3_IDLE_MMU_CLRMSK 0xFFFDFFFFU
+#define ROGUE_CR_SLC3_IDLE_MMU_EN 0x00020000U
+#define ROGUE_CR_SLC3_IDLE_RDI_SHIFT 16U
+#define ROGUE_CR_SLC3_IDLE_RDI_CLRMSK 0xFFFEFFFFU
+#define ROGUE_CR_SLC3_IDLE_RDI_EN 0x00010000U
+#define ROGUE_CR_SLC3_IDLE_IMGBV4_SHIFT 12U
+#define ROGUE_CR_SLC3_IDLE_IMGBV4_CLRMSK 0xFFFF0FFFU
+#define ROGUE_CR_SLC3_IDLE_CACHE_BANKS_SHIFT 4U
+#define ROGUE_CR_SLC3_IDLE_CACHE_BANKS_CLRMSK 0xFFFFF00FU
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST_SHIFT 2U
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_DUST_CLRMSK 0xFFFFFFF3U
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_JONES_SHIFT 1U
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_JONES_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SLC3_IDLE_ORDERQ_JONES_EN 0x00000002U
+#define ROGUE_CR_SLC3_IDLE_XBAR_SHIFT 0U
+#define ROGUE_CR_SLC3_IDLE_XBAR_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SLC3_IDLE_XBAR_EN 0x00000001U
+
+/* Register ROGUE_CR_SLC3_FAULT_STOP_STATUS */
+#define ROGUE_CR_SLC3_FAULT_STOP_STATUS 0xE248U
+#define ROGUE_CR_SLC3_FAULT_STOP_STATUS_MASKFULL 0x0000000000001FFFULL
+#define ROGUE_CR_SLC3_FAULT_STOP_STATUS_BIF_SHIFT 0U
+#define ROGUE_CR_SLC3_FAULT_STOP_STATUS_BIF_CLRMSK 0xFFFFE000U
+
+/* Register ROGUE_CR_VDM_CONTEXT_STORE_MODE */
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE 0xF048U
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MASKFULL 0x0000000000000003ULL
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_SHIFT 0U
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_CLRMSK 0xFFFFFFFCU
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_INDEX 0x00000000U
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_INSTANCE 0x00000001U
+#define ROGUE_CR_VDM_CONTEXT_STORE_MODE_MODE_LIST 0x00000002U
+
+/* Register ROGUE_CR_CONTEXT_MAPPING0 */
+#define ROGUE_CR_CONTEXT_MAPPING0 0xF078U
+#define ROGUE_CR_CONTEXT_MAPPING0_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING0_2D_SHIFT 24U
+#define ROGUE_CR_CONTEXT_MAPPING0_2D_CLRMSK 0x00FFFFFFU
+#define ROGUE_CR_CONTEXT_MAPPING0_CDM_SHIFT 16U
+#define ROGUE_CR_CONTEXT_MAPPING0_CDM_CLRMSK 0xFF00FFFFU
+#define ROGUE_CR_CONTEXT_MAPPING0_3D_SHIFT 8U
+#define ROGUE_CR_CONTEXT_MAPPING0_3D_CLRMSK 0xFFFF00FFU
+#define ROGUE_CR_CONTEXT_MAPPING0_TA_SHIFT 0U
+#define ROGUE_CR_CONTEXT_MAPPING0_TA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_CONTEXT_MAPPING1 */
+#define ROGUE_CR_CONTEXT_MAPPING1 0xF080U
+#define ROGUE_CR_CONTEXT_MAPPING1_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING1_HOST_SHIFT 8U
+#define ROGUE_CR_CONTEXT_MAPPING1_HOST_CLRMSK 0xFFFF00FFU
+#define ROGUE_CR_CONTEXT_MAPPING1_TLA_SHIFT 0U
+#define ROGUE_CR_CONTEXT_MAPPING1_TLA_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_CONTEXT_MAPPING2 */
+#define ROGUE_CR_CONTEXT_MAPPING2 0xF088U
+#define ROGUE_CR_CONTEXT_MAPPING2_MASKFULL 0x0000000000FFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING2_ALIST0_SHIFT 16U
+#define ROGUE_CR_CONTEXT_MAPPING2_ALIST0_CLRMSK 0xFF00FFFFU
+#define ROGUE_CR_CONTEXT_MAPPING2_TE0_SHIFT 8U
+#define ROGUE_CR_CONTEXT_MAPPING2_TE0_CLRMSK 0xFFFF00FFU
+#define ROGUE_CR_CONTEXT_MAPPING2_VCE0_SHIFT 0U
+#define ROGUE_CR_CONTEXT_MAPPING2_VCE0_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_CONTEXT_MAPPING3 */
+#define ROGUE_CR_CONTEXT_MAPPING3 0xF090U
+#define ROGUE_CR_CONTEXT_MAPPING3_MASKFULL 0x0000000000FFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING3_ALIST1_SHIFT 16U
+#define ROGUE_CR_CONTEXT_MAPPING3_ALIST1_CLRMSK 0xFF00FFFFU
+#define ROGUE_CR_CONTEXT_MAPPING3_TE1_SHIFT 8U
+#define ROGUE_CR_CONTEXT_MAPPING3_TE1_CLRMSK 0xFFFF00FFU
+#define ROGUE_CR_CONTEXT_MAPPING3_VCE1_SHIFT 0U
+#define ROGUE_CR_CONTEXT_MAPPING3_VCE1_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_BIF_JONES_OUTSTANDING_READ */
+#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ 0xF098U
+#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ_COUNTER_SHIFT 0U
+#define ROGUE_CR_BIF_JONES_OUTSTANDING_READ_COUNTER_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ */
+#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ 0xF0A0U
+#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ_COUNTER_SHIFT 0U
+#define ROGUE_CR_BIF_BLACKPEARL_OUTSTANDING_READ_COUNTER_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_BIF_DUST_OUTSTANDING_READ */
+#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ 0xF0A8U
+#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ_MASKFULL 0x000000000000FFFFULL
+#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ_COUNTER_SHIFT 0U
+#define ROGUE_CR_BIF_DUST_OUTSTANDING_READ_COUNTER_CLRMSK 0xFFFF0000U
+
+/* Register ROGUE_CR_CONTEXT_MAPPING4 */
+#define ROGUE_CR_CONTEXT_MAPPING4 0xF210U
+#define ROGUE_CR_CONTEXT_MAPPING4_MASKFULL 0x0000FFFFFFFFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING4_3D_MMU_STACK_SHIFT 40U
+#define ROGUE_CR_CONTEXT_MAPPING4_3D_MMU_STACK_CLRMSK 0xFFFF00FFFFFFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING4_3D_UFSTACK_SHIFT 32U
+#define ROGUE_CR_CONTEXT_MAPPING4_3D_UFSTACK_CLRMSK 0xFFFFFF00FFFFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING4_3D_FSTACK_SHIFT 24U
+#define ROGUE_CR_CONTEXT_MAPPING4_3D_FSTACK_CLRMSK 0xFFFFFFFF00FFFFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING4_TA_MMU_STACK_SHIFT 16U
+#define ROGUE_CR_CONTEXT_MAPPING4_TA_MMU_STACK_CLRMSK 0xFFFFFFFFFF00FFFFULL
+#define ROGUE_CR_CONTEXT_MAPPING4_TA_UFSTACK_SHIFT 8U
+#define ROGUE_CR_CONTEXT_MAPPING4_TA_UFSTACK_CLRMSK 0xFFFFFFFFFFFF00FFULL
+#define ROGUE_CR_CONTEXT_MAPPING4_TA_FSTACK_SHIFT 0U
+#define ROGUE_CR_CONTEXT_MAPPING4_TA_FSTACK_CLRMSK 0xFFFFFFFFFFFFFF00ULL
+
+/* Register ROGUE_CR_MULTICORE_GPU */
+#define ROGUE_CR_MULTICORE_GPU 0xF300U
+#define ROGUE_CR_MULTICORE_GPU_MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_FRAGMENT_SHIFT 6U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_FRAGMENT_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_FRAGMENT_EN 0x00000040U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_GEOMETRY_SHIFT 5U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_GEOMETRY_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_GEOMETRY_EN 0x00000020U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_COMPUTE_SHIFT 4U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_COMPUTE_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_COMPUTE_EN 0x00000010U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_PRIMARY_SHIFT 3U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_PRIMARY_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_MULTICORE_GPU_CAPABILITY_PRIMARY_EN 0x00000008U
+#define ROGUE_CR_MULTICORE_GPU_ID_SHIFT 0U
+#define ROGUE_CR_MULTICORE_GPU_ID_CLRMSK 0xFFFFFFF8U
+
+/* Register ROGUE_CR_MULTICORE_SYSTEM */
+#define ROGUE_CR_MULTICORE_SYSTEM 0xF308U
+#define ROGUE_CR_MULTICORE_SYSTEM_MASKFULL 0x000000000000000FULL
+#define ROGUE_CR_MULTICORE_SYSTEM_GPU_COUNT_SHIFT 0U
+#define ROGUE_CR_MULTICORE_SYSTEM_GPU_COUNT_CLRMSK 0xFFFFFFF0U
+
+/* Register ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON */
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON 0xF310U
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_TYPE_SHIFT 30U
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_TYPE_CLRMSK 0x3FFFFFFFU
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_SHIFT 8U
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_CLRMSK 0xC00000FFU
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_GPU_ENABLE_SHIFT 0U
+#define ROGUE_CR_MULTICORE_FRAGMENT_CTRL_COMMON_GPU_ENABLE_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON */
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON 0xF320U
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_TYPE_SHIFT 30U
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_TYPE_CLRMSK 0x3FFFFFFFU
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_SHIFT 8U
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_CLRMSK 0xC00000FFU
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_GPU_ENABLE_SHIFT 0U
+#define ROGUE_CR_MULTICORE_GEOMETRY_CTRL_COMMON_GPU_ENABLE_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON */
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON 0xF330U
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_MASKFULL 0x00000000FFFFFFFFULL
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_TYPE_SHIFT 30U
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_TYPE_CLRMSK 0x3FFFFFFFU
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_SHIFT 8U
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_WORKLOAD_EXECUTE_COUNT_CLRMSK 0xC00000FFU
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_GPU_ENABLE_SHIFT 0U
+#define ROGUE_CR_MULTICORE_COMPUTE_CTRL_COMMON_GPU_ENABLE_CLRMSK 0xFFFFFF00U
+
+/* Register ROGUE_CR_ECC_RAM_ERR_INJ */
+#define ROGUE_CR_ECC_RAM_ERR_INJ 0xF340U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_ECC_RAM_ERR_INJ_SLC_SIDEKICK_SHIFT 4U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_SLC_SIDEKICK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_ECC_RAM_ERR_INJ_SLC_SIDEKICK_EN 0x00000010U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_USC_SHIFT 3U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_USC_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_USC_EN 0x00000008U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_TPU_MCU_L0_SHIFT 2U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_TPU_MCU_L0_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_ECC_RAM_ERR_INJ_TPU_MCU_L0_EN 0x00000004U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_RASCAL_SHIFT 1U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_RASCAL_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_ECC_RAM_ERR_INJ_RASCAL_EN 0x00000002U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_MARS_SHIFT 0U
+#define ROGUE_CR_ECC_RAM_ERR_INJ_MARS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_ECC_RAM_ERR_INJ_MARS_EN 0x00000001U
+
+/* Register ROGUE_CR_ECC_RAM_INIT_KICK */
+#define ROGUE_CR_ECC_RAM_INIT_KICK 0xF348U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_ECC_RAM_INIT_KICK_SLC_SIDEKICK_SHIFT 4U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_SLC_SIDEKICK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_ECC_RAM_INIT_KICK_SLC_SIDEKICK_EN 0x00000010U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_USC_SHIFT 3U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_USC_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_USC_EN 0x00000008U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_TPU_MCU_L0_SHIFT 2U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_TPU_MCU_L0_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_ECC_RAM_INIT_KICK_TPU_MCU_L0_EN 0x00000004U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_RASCAL_SHIFT 1U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_RASCAL_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_ECC_RAM_INIT_KICK_RASCAL_EN 0x00000002U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_MARS_SHIFT 0U
+#define ROGUE_CR_ECC_RAM_INIT_KICK_MARS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_ECC_RAM_INIT_KICK_MARS_EN 0x00000001U
+
+/* Register ROGUE_CR_ECC_RAM_INIT_DONE */
+#define ROGUE_CR_ECC_RAM_INIT_DONE 0xF350U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_MASKFULL 0x000000000000001FULL
+#define ROGUE_CR_ECC_RAM_INIT_DONE_SLC_SIDEKICK_SHIFT 4U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_SLC_SIDEKICK_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_ECC_RAM_INIT_DONE_SLC_SIDEKICK_EN 0x00000010U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_USC_SHIFT 3U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_USC_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_USC_EN 0x00000008U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_TPU_MCU_L0_SHIFT 2U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_TPU_MCU_L0_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_ECC_RAM_INIT_DONE_TPU_MCU_L0_EN 0x00000004U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_RASCAL_SHIFT 1U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_RASCAL_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_ECC_RAM_INIT_DONE_RASCAL_EN 0x00000002U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_MARS_SHIFT 0U
+#define ROGUE_CR_ECC_RAM_INIT_DONE_MARS_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_ECC_RAM_INIT_DONE_MARS_EN 0x00000001U
+
+/* Register ROGUE_CR_SAFETY_EVENT_ENABLE */
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE 0xF390U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_SHIFT 3U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_EN 0x00000008U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_SHIFT 2U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_EN 0x00000004U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_SHIFT 1U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_EN 0x00000002U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_SAFETY_EVENT_STATUS */
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE 0xF398U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__TRP_FAIL_SHIFT 3U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__TRP_FAIL_EN 0x00000008U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_FW_SHIFT 2U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_FW_EN 0x00000004U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_GPU_SHIFT 1U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__FAULT_GPU_EN 0x00000002U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SAFETY_EVENT_STATUS__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_SAFETY_EVENT_CLEAR */
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE 0xF3A0U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__TRP_FAIL_SHIFT 3U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__TRP_FAIL_EN 0x00000008U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_FW_SHIFT 2U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_FW_EN 0x00000004U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_GPU_SHIFT 1U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__FAULT_GPU_EN 0x00000002U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_SAFETY_EVENT_CLEAR__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U
+
+/* Register ROGUE_CR_MTS_SAFETY_EVENT_ENABLE */
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE 0xF3D8U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__MASKFULL 0x000000000000007FULL
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_SHIFT 6U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_CLRMSK 0xFFFFFFBFU
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__CPU_PAGE_FAULT_EN 0x00000040U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_SHIFT 5U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_CLRMSK 0xFFFFFFDFU
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__SAFE_COMPUTE_FAIL_EN 0x00000020U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_SHIFT 4U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_CLRMSK 0xFFFFFFEFU
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__WATCHDOG_TIMEOUT_EN 0x00000010U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_SHIFT 3U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_CLRMSK 0xFFFFFFF7U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__TRP_FAIL_EN 0x00000008U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_SHIFT 2U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_CLRMSK 0xFFFFFFFBU
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_FW_EN 0x00000004U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_SHIFT 1U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_CLRMSK 0xFFFFFFFDU
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__FAULT_GPU_EN 0x00000002U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_SHIFT 0U
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_CLRMSK 0xFFFFFFFEU
+#define ROGUE_CR_MTS_SAFETY_EVENT_ENABLE__ROGUEXE__GPU_PAGE_FAULT_EN 0x00000001U
+
+/* clang-format on */
+
+#endif /* PVR_ROGUE_CR_DEFS_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h
new file mode 100644
index 0000000000000..46186b56effc9
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_cr_defs_client.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_CR_DEFS_CLIENT_H
+#define PVR_ROGUE_CR_DEFS_CLIENT_H
+
+/* clang-format off */
+
+/*
+ * This register controls the anti-aliasing mode of the Tiling Co-Processor, independent control is
+ * provided in both X & Y axis.
+ * This register needs to be set based on the ISP Samples Per Pixel a core supports.
+ *
+ * When ISP Samples Per Pixel = 1:
+ * 2xmsaa is achieved by enabling Y - TE does AA on Y plane only
+ * 4xmsaa is achieved by enabling Y and X - TE does AA on X and Y plane
+ * 8xmsaa not supported by XE cores
+ *
+ * When ISP Samples Per Pixel = 2:
+ * 2xmsaa is achieved by enabling X2 - does not affect TE
+ * 4xmsaa is achieved by enabling Y and X2 - TE does AA on Y plane only
+ * 8xmsaa is achieved by enabling Y, X and X2 - TE does AA on X and Y plane
+ * 8xmsaa not supported by XE cores
+ *
+ * When ISP Samples Per Pixel = 4:
+ * 2xmsaa is achieved by enabling X2 - does not affect TE
+ * 4xmsaa is achieved by enabling Y2 and X2 - TE does AA on Y plane only
+ * 8xmsaa not supported by XE cores
+ */
+/* Register ROGUE_CR_TE_AA */
+#define ROGUE_CR_TE_AA 0x0C00U
+#define ROGUE_CR_TE_AA_MASKFULL 0x000000000000000Full
+/* Y2
+ * Indicates 4xmsaa when X2 and Y2 are set to 1. This does not affect TE and is only used within
+ * TPW.
+ */
+#define ROGUE_CR_TE_AA_Y2_SHIFT 3
+#define ROGUE_CR_TE_AA_Y2_CLRMSK 0xFFFFFFF7
+#define ROGUE_CR_TE_AA_Y2_EN 0x00000008
+/* Y
+ * Anti-Aliasing in Y Plane Enabled
+ */
+#define ROGUE_CR_TE_AA_Y_SHIFT 2
+#define ROGUE_CR_TE_AA_Y_CLRMSK 0xFFFFFFFB
+#define ROGUE_CR_TE_AA_Y_EN 0x00000004
+/* X
+ * Anti-Aliasing in X Plane Enabled
+ */
+#define ROGUE_CR_TE_AA_X_SHIFT 1
+#define ROGUE_CR_TE_AA_X_CLRMSK 0xFFFFFFFD
+#define ROGUE_CR_TE_AA_X_EN 0x00000002
+/* X2
+ * 2x Anti-Aliasing Enabled, affects PPP only
+ */
+#define ROGUE_CR_TE_AA_X2_SHIFT                             (0U)
+#define ROGUE_CR_TE_AA_X2_CLRMSK                            (0xFFFFFFFEU)
+#define ROGUE_CR_TE_AA_X2_EN                                (0x00000001U)
+
+/* MacroTile Boundaries X Plane */
+/* Register ROGUE_CR_TE_MTILE1 */
+#define ROGUE_CR_TE_MTILE1 0x0C08
+#define ROGUE_CR_TE_MTILE1_MASKFULL 0x0000000007FFFFFFull
+/* X1 default: 0x00000004
+ * X1 MacroTile boundary, left tile X for second column of macrotiles (16MT mode) - 32 pixels across
+ * tile
+ */
+#define ROGUE_CR_TE_MTILE1_X1_SHIFT 18
+#define ROGUE_CR_TE_MTILE1_X1_CLRMSK 0xF803FFFF
+/* X2 default: 0x00000008
+ * X2 MacroTile boundary, left tile X for third(16MT) column of macrotiles - 32 pixels across tile
+ */
+#define ROGUE_CR_TE_MTILE1_X2_SHIFT 9U
+#define ROGUE_CR_TE_MTILE1_X2_CLRMSK 0xFFFC01FF
+/* X3 default: 0x0000000c
+ * X3 MacroTile boundary, left tile X for fourth column of macrotiles (16MT) - 32 pixels across tile
+ */
+#define ROGUE_CR_TE_MTILE1_X3_SHIFT 0
+#define ROGUE_CR_TE_MTILE1_X3_CLRMSK 0xFFFFFE00
+
+/* MacroTile Boundaries Y Plane. */
+/* Register ROGUE_CR_TE_MTILE2 */
+#define ROGUE_CR_TE_MTILE2 0x0C10
+#define ROGUE_CR_TE_MTILE2_MASKFULL 0x0000000007FFFFFFull
+/* Y1 default: 0x00000004
+ * X1 MacroTile boundary, ltop tile Y for second column of macrotiles (16MT mode) - 32 pixels tile
+ * height
+ */
+#define ROGUE_CR_TE_MTILE2_Y1_SHIFT 18
+#define ROGUE_CR_TE_MTILE2_Y1_CLRMSK 0xF803FFFF
+/* Y2 default: 0x00000008
+ * X2 MacroTile boundary, top tile Y for third(16MT) column of macrotiles - 32 pixels tile height
+ */
+#define ROGUE_CR_TE_MTILE2_Y2_SHIFT 9
+#define ROGUE_CR_TE_MTILE2_Y2_CLRMSK 0xFFFC01FF
+/* Y3 default: 0x0000000c
+ * X3 MacroTile boundary, top tile Y for fourth column of macrotiles (16MT) - 32 pixels tile height
+ */
+#define ROGUE_CR_TE_MTILE2_Y3_SHIFT 0
+#define ROGUE_CR_TE_MTILE2_Y3_CLRMSK 0xFFFFFE00
+
+/*
+ * In order to perform the tiling operation and generate the display list the maximum screen size
+ * must be configured in terms of the number of tiles in X & Y axis.
+ */
+
+/* Register ROGUE_CR_TE_SCREEN */
+#define ROGUE_CR_TE_SCREEN 0x0C18U
+#define ROGUE_CR_TE_SCREEN_MASKFULL 0x00000000001FF1FFull
+/* YMAX default: 0x00000010
+ * Maximum Y tile address visible on screen, 32 pixel tile height, 16Kx16K max screen size
+ */
+#define ROGUE_CR_TE_SCREEN_YMAX_SHIFT 12
+#define ROGUE_CR_TE_SCREEN_YMAX_CLRMSK 0xFFE00FFF
+/* XMAX default: 0x00000010
+ * Maximum X tile address visible on screen, 32 pixel tile width, 16Kx16K max screen size
+ */
+#define ROGUE_CR_TE_SCREEN_XMAX_SHIFT 0
+#define ROGUE_CR_TE_SCREEN_XMAX_CLRMSK 0xFFFFFE00
+
+/*
+ * In order to perform the tiling operation and generate the display list the maximum screen size
+ * must be configured in terms of the number of pixels in X & Y axis since this may not be the same
+ * as the number of tiles defined in the RGX_CR_TE_SCREEN register.
+ */
+/* Register ROGUE_CR_PPP_SCREEN */
+#define ROGUE_CR_PPP_SCREEN 0x0C98
+#define ROGUE_CR_PPP_SCREEN_MASKFULL 0x000000007FFF7FFFull
+/* PIXYMAX
+ * Screen height in pixels. (16K x 16K max screen size)
+ */
+#define ROGUE_CR_PPP_SCREEN_PIXYMAX_SHIFT 16
+#define ROGUE_CR_PPP_SCREEN_PIXYMAX_CLRMSK 0x8000FFFF
+/* PIXXMAX
+ * Screen width in pixels.(16K x 16K max screen size)
+ */
+#define ROGUE_CR_PPP_SCREEN_PIXXMAX_SHIFT 0
+#define ROGUE_CR_PPP_SCREEN_PIXXMAX_CLRMSK 0xFFFF8000
+
+/* Register ROGUE_CR_ISP_MTILE_SIZE */
+#define ROGUE_CR_ISP_MTILE_SIZE 0x0F18
+#define ROGUE_CR_ISP_MTILE_SIZE_MASKFULL 0x0000000003FF03FFull
+/* X
+ * Macrotile width, in tiles. A value of zero corresponds to the maximum size
+ */
+#define ROGUE_CR_ISP_MTILE_SIZE_X_SHIFT 16
+#define ROGUE_CR_ISP_MTILE_SIZE_X_CLRMSK 0xFC00FFFF
+#define ROGUE_CR_ISP_MTILE_SIZE_X_ALIGNSHIFT 0
+#define ROGUE_CR_ISP_MTILE_SIZE_X_ALIGNSIZE 1
+/* Y
+ * Macrotile height, in tiles. A value of zero corresponds to the maximum size
+ */
+#define ROGUE_CR_ISP_MTILE_SIZE_Y_SHIFT 0
+#define ROGUE_CR_ISP_MTILE_SIZE_Y_CLRMSK 0xFFFFFC00
+#define ROGUE_CR_ISP_MTILE_SIZE_Y_ALIGNSHIFT 0
+#define ROGUE_CR_ISP_MTILE_SIZE_Y_ALIGNSIZE 1
+
+/* clang-format on */
+
+#endif /* PVR_ROGUE_CR_DEFS_CLIENT_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_defs.h b/drivers/gpu/drm/imagination/pvr_rogue_defs.h
new file mode 100644
index 0000000000000..932b016860089
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_defs.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_DEFS_H
+#define PVR_ROGUE_DEFS_H
+
+#include "pvr_rogue_cr_defs.h"
+
+#include <linux/bits.h>
+
+/*
+ ******************************************************************************
+ * ROGUE Defines
+ ******************************************************************************
+ */
+
+#define ROGUE_FW_MAX_NUM_OS (8U)
+#define ROGUE_FW_HOST_OS (0U)
+#define ROGUE_FW_GUEST_OSID_START (1U)
+
+#define ROGUE_FW_THREAD_0 (0U)
+#define ROGUE_FW_THREAD_1 (1U)
+
+#define GET_ROGUE_CACHE_LINE_SIZE(x) ((((s32)(x)) > 0) ? ((x) / 8) : (0))
+
+#define MAX_HW_GEOM_FRAG_CONTEXTS 2U
+
+#define ROGUE_CR_CLK_CTRL_ALL_ON \
+	(0x5555555555555555ull & ROGUE_CR_CLK_CTRL_MASKFULL)
+#define ROGUE_CR_CLK_CTRL_ALL_AUTO \
+	(0xaaaaaaaaaaaaaaaaull & ROGUE_CR_CLK_CTRL_MASKFULL)
+#define ROGUE_CR_CLK_CTRL2_ALL_ON \
+	(0x5555555555555555ull & ROGUE_CR_CLK_CTRL2_MASKFULL)
+#define ROGUE_CR_CLK_CTRL2_ALL_AUTO \
+	(0xaaaaaaaaaaaaaaaaull & ROGUE_CR_CLK_CTRL2_MASKFULL)
+
+#define ROGUE_CR_SOFT_RESET_DUST_n_CORE_EN    \
+	(ROGUE_CR_SOFT_RESET_DUST_A_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_B_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_C_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_D_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_E_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_F_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_G_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_H_CORE_EN)
+
+/* SOFT_RESET Rascal and DUSTs bits */
+#define ROGUE_CR_SOFT_RESET_RASCALDUSTS_EN    \
+	(ROGUE_CR_SOFT_RESET_RASCAL_CORE_EN | \
+	 ROGUE_CR_SOFT_RESET_DUST_n_CORE_EN)
+
+/* SOFT_RESET steps as defined in the TRM */
+#define ROGUE_S7_SOFT_RESET_DUSTS (ROGUE_CR_SOFT_RESET_DUST_n_CORE_EN)
+
+#define ROGUE_S7_SOFT_RESET_JONES                                 \
+	(ROGUE_CR_SOFT_RESET_PM_EN | ROGUE_CR_SOFT_RESET_VDM_EN | \
+	 ROGUE_CR_SOFT_RESET_ISP_EN)
+
+#define ROGUE_S7_SOFT_RESET_JONES_ALL                             \
+	(ROGUE_S7_SOFT_RESET_JONES | ROGUE_CR_SOFT_RESET_BIF_EN | \
+	 ROGUE_CR_SOFT_RESET_SLC_EN | ROGUE_CR_SOFT_RESET_GARTEN_EN)
+
+#define ROGUE_S7_SOFT_RESET2                                                  \
+	(ROGUE_CR_SOFT_RESET2_BLACKPEARL_EN | ROGUE_CR_SOFT_RESET2_PIXEL_EN | \
+	 ROGUE_CR_SOFT_RESET2_CDM_EN | ROGUE_CR_SOFT_RESET2_VERTEX_EN)
+
+#define ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT (12U)
+#define ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE \
+	BIT(ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT)
+
+#define ROGUE_BIF_PM_VIRTUAL_PAGE_ALIGNSHIFT (14U)
+#define ROGUE_BIF_PM_VIRTUAL_PAGE_SIZE BIT(ROGUE_BIF_PM_VIRTUAL_PAGE_ALIGNSHIFT)
+
+#define ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE (16U)
+
+/*
+ * To get the number of required Dusts, divide the number of
+ * clusters by 2 and round up
+ */
+#define ROGUE_REQ_NUM_DUSTS(CLUSTERS) (((CLUSTERS) + 1U) / 2U)
+
+/*
+ * To get the number of required Bernado/Phantom(s), divide
+ * the number of clusters by 4 and round up
+ */
+#define ROGUE_REQ_NUM_PHANTOMS(CLUSTERS) (((CLUSTERS) + 3U) / 4U)
+#define ROGUE_REQ_NUM_BERNADOS(CLUSTERS) (((CLUSTERS) + 3U) / 4U)
+#define ROGUE_REQ_NUM_BLACKPEARLS(CLUSTERS) (((CLUSTERS) + 3U) / 4U)
+
+/*
+ * FW MMU contexts
+ */
+#define MMU_CONTEXT_MAPPING_FWPRIV (0x0) /* FW code/private data */
+#define MMU_CONTEXT_MAPPING_FWIF (0x0) /* Host/FW data */
+
+/*
+ * Utility macros to calculate CAT_BASE register addresses
+ */
+#define BIF_CAT_BASEX(n)          \
+	(ROGUE_CR_BIF_CAT_BASE0 + \
+	 (n) * (ROGUE_CR_BIF_CAT_BASE1 - ROGUE_CR_BIF_CAT_BASE0))
+
+#define FWCORE_MEM_CAT_BASEX(n)                 \
+	(ROGUE_CR_FWCORE_MEM_CAT_BASE0 +        \
+	 (n) * (ROGUE_CR_FWCORE_MEM_CAT_BASE1 - \
+		ROGUE_CR_FWCORE_MEM_CAT_BASE0))
+
+/*
+ * FWCORE wrapper register defines
+ */
+#define FWCORE_ADDR_REMAP_CONFIG0_MMU_CONTEXT_SHIFT \
+	ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_SHIFT
+#define FWCORE_ADDR_REMAP_CONFIG0_MMU_CONTEXT_CLRMSK \
+	ROGUE_CR_FWCORE_ADDR_REMAP_CONFIG0_CBASE_CLRMSK
+#define FWCORE_ADDR_REMAP_CONFIG0_SIZE_ALIGNSHIFT (12U)
+
+#define ROGUE_MAX_COMPUTE_SHARED_REGISTERS (2 * 1024)
+#define ROGUE_MAX_VERTEX_SHARED_REGISTERS 1024
+#define ROGUE_MAX_PIXEL_SHARED_REGISTERS 1024
+#define ROGUE_CSRM_LINE_SIZE_IN_DWORDS (64 * 4 * 4)
+
+#define ROGUE_CDMCTRL_USC_COMMON_SIZE_ALIGNSIZE 64
+#define ROGUE_CDMCTRL_USC_COMMON_SIZE_UPPER 256
+
+/*
+ * The maximum amount of local memory which can be allocated by a single kernel
+ * (in dwords/32-bit registers).
+ *
+ * ROGUE_CDMCTRL_USC_COMMON_SIZE_ALIGNSIZE is in bytes so we divide by four.
+ */
+#define ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS ((ROGUE_CDMCTRL_USC_COMMON_SIZE_ALIGNSIZE * \
+						   ROGUE_CDMCTRL_USC_COMMON_SIZE_UPPER) >> 2)
+
+/*
+ ******************************************************************************
+ * WA HWBRNs
+ ******************************************************************************
+ */
+
+/* GPU CR timer tick in GPU cycles */
+#define ROGUE_CRTIME_TICK_IN_CYCLES (256U)
+
+/* for nohw multicore return max cores possible to client */
+#define ROGUE_MULTICORE_MAX_NOHW_CORES (4U)
+
+/*
+ * If the size of the SLC is less than this value then the TPU bypasses the SLC.
+ */
+#define ROGUE_TPU_CACHED_SLC_SIZE_THRESHOLD (128U * 1024U)
+
+/*
+ * If the size of the SLC is bigger than this value then the TCU must not be
+ * bypassed in the SLC.
+ * In XE_MEMORY_HIERARCHY cores, the TCU is bypassed by default.
+ */
+#define ROGUE_TCU_CACHED_SLC_SIZE_THRESHOLD (32U * 1024U)
+
+/*
+ * Register used by the FW to track the current boot stage (not used in MIPS)
+ */
+#define ROGUE_FW_BOOT_STAGE_REGISTER (ROGUE_CR_POWER_ESTIMATE_RESULT)
+
+/*
+ * Virtualisation definitions
+ */
+#define ROGUE_VIRTUALISATION_REG_SIZE_PER_OS \
+	(ROGUE_CR_MTS_SCHEDULE1 - ROGUE_CR_MTS_SCHEDULE)
+
+/*
+ * Macro used to indicate which version of HWPerf is active
+ */
+#define ROGUE_FEATURE_HWPERF_ROGUE
+
+/*
+ * Maximum number of cores supported by TRP
+ */
+#define ROGUE_TRP_MAX_NUM_CORES (4U)
+
+#endif /* PVR_ROGUE_DEFS_H */
-- 
GitLab


From 7900e00434eda5ebe7e0c6c995f8528929a8182c Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:29 +0000
Subject: [PATCH 0404/2340] drm/imagination: Add firmware and MMU related
 headers

Changes since v8:
- Corrected license identifiers

Changes since v5:
- Split up header commit due to size

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/23ee233dfbe6f2239328f8201fd6d8c1017cea58.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 .../drm/imagination/pvr_rogue_heap_config.h   | 113 ++++++
 drivers/gpu/drm/imagination/pvr_rogue_meta.h  | 356 ++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_rogue_mips.h  | 335 ++++++++++++++++
 .../drm/imagination/pvr_rogue_mips_check.h    |  58 +++
 .../gpu/drm/imagination/pvr_rogue_mmu_defs.h  | 136 +++++++
 5 files changed, 998 insertions(+)
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_heap_config.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_meta.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_mips.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_mips_check.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h

diff --git a/drivers/gpu/drm/imagination/pvr_rogue_heap_config.h b/drivers/gpu/drm/imagination/pvr_rogue_heap_config.h
new file mode 100644
index 0000000000000..6847660067033
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_heap_config.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_HEAP_CONFIG_H
+#define PVR_ROGUE_HEAP_CONFIG_H
+
+#include <linux/sizes.h>
+
+/*
+ * ROGUE Device Virtual Address Space Definitions
+ *
+ * This file defines the ROGUE virtual address heaps that are used in
+ * application memory contexts. It also shows where the Firmware memory heap
+ * fits into this, but the firmware heap is only ever created in the
+ * kernel driver and never exposed to userspace.
+ *
+ * ROGUE_PDSCODEDATA_HEAP_BASE and ROGUE_USCCODE_HEAP_BASE will be programmed,
+ * on a global basis, into ROGUE_CR_PDS_EXEC_BASE and ROGUE_CR_USC_CODE_BASE_*
+ * respectively. Therefore if client drivers use multiple configs they must
+ * still be consistent with their definitions for these heaps.
+ *
+ * Base addresses have to be a multiple of 4MiB.
+ * Heaps must not start at 0x0000000000, as this is reserved for internal
+ * use within the driver.
+ * Range comments, those starting in column 0 below are a section heading of
+ * sorts and are above the heaps in that range. Often this is the reserved
+ * size of the heap within the range.
+ */
+
+/* 0x00_0000_0000 ************************************************************/
+
+/* 0x00_0000_0000 - 0x00_0040_0000 */
+/* 0 MiB to 4 MiB, size of 4 MiB : RESERVED */
+
+/* 0x00_0040_0000 - 0x7F_FFC0_0000 **/
+/* 4 MiB to 512 GiB, size of 512 GiB less 4 MiB : RESERVED **/
+
+/* 0x80_0000_0000 ************************************************************/
+
+/* 0x80_0000_0000 - 0x9F_FFFF_FFFF **/
+/* 512 GiB to 640 GiB, size of 128 GiB : GENERAL_HEAP **/
+#define ROGUE_GENERAL_HEAP_BASE 0x8000000000ull
+#define ROGUE_GENERAL_HEAP_SIZE SZ_128G
+
+/* 0xA0_0000_0000 - 0xAF_FFFF_FFFF */
+/* 640 GiB to 704 GiB, size of 64 GiB : FREE */
+
+/* B0_0000_0000 - 0xB7_FFFF_FFFF */
+/* 704 GiB to 736 GiB, size of 32 GiB : FREE */
+
+/* 0xB8_0000_0000 - 0xBF_FFFF_FFFF */
+/* 736 GiB to 768 GiB, size of 32 GiB : RESERVED */
+
+/* 0xC0_0000_0000 ************************************************************/
+
+/* 0xC0_0000_0000 - 0xD9_FFFF_FFFF */
+/* 768 GiB to 872 GiB, size of 104 GiB : FREE */
+
+/* 0xDA_0000_0000 - 0xDA_FFFF_FFFF */
+/* 872 GiB to 876 GiB, size of 4 GiB : PDSCODEDATA_HEAP */
+#define ROGUE_PDSCODEDATA_HEAP_BASE 0xDA00000000ull
+#define ROGUE_PDSCODEDATA_HEAP_SIZE SZ_4G
+
+/* 0xDB_0000_0000 - 0xDB_FFFF_FFFF */
+/* 876 GiB to 880 GiB, size of 256 MiB (reserved 4GiB) : BRN **/
+/*
+ * The BRN63142 quirk workaround requires Region Header memory to be at the top
+ * of a 16GiB aligned range. This is so when masked with 0x03FFFFFFFF the
+ * address will avoid aliasing PB addresses. Start at 879.75GiB. Size of 256MiB.
+ */
+#define ROGUE_RGNHDR_HEAP_BASE 0xDBF0000000ull
+#define ROGUE_RGNHDR_HEAP_SIZE SZ_256M
+
+/* 0xDC_0000_0000 - 0xDF_FFFF_FFFF */
+/* 880 GiB to 896 GiB, size of 16 GiB : FREE */
+
+/* 0xE0_0000_0000 - 0xE0_FFFF_FFFF */
+/* 896 GiB to 900 GiB, size of 4 GiB : USCCODE_HEAP */
+#define ROGUE_USCCODE_HEAP_BASE 0xE000000000ull
+#define ROGUE_USCCODE_HEAP_SIZE SZ_4G
+
+/* 0xE1_0000_0000 - 0xE1_BFFF_FFFF */
+/* 900 GiB to 903 GiB, size of 3 GiB : RESERVED */
+
+/* 0xE1_C000_000 - 0xE1_FFFF_FFFF */
+/* 903 GiB to 904 GiB, reserved 1 GiB, : FIRMWARE_HEAP */
+#define ROGUE_FW_HEAP_BASE 0xE1C0000000ull
+
+/* 0xE2_0000_0000 - 0xE3_FFFF_FFFF */
+/* 904 GiB to 912 GiB, size of 8 GiB : FREE */
+
+/* 0xE4_0000_0000 - 0xE7_FFFF_FFFF */
+/* 912 GiB to 968 GiB, size of 16 GiB : TRANSFER_FRAG */
+#define ROGUE_TRANSFER_FRAG_HEAP_BASE 0xE400000000ull
+#define ROGUE_TRANSFER_FRAG_HEAP_SIZE SZ_16G
+
+/* 0xE8_0000_0000 - 0xF1_FFFF_FFFF */
+/* 928 GiB to 968 GiB, size of 40 GiB : RESERVED */
+
+/* 0xF2_0000_0000 - 0xF2_001F_FFFF **/
+/* 968 GiB to 969 GiB, size of 2 MiB : VISTEST_HEAP */
+#define ROGUE_VISTEST_HEAP_BASE 0xF200000000ull
+#define ROGUE_VISTEST_HEAP_SIZE SZ_2M
+
+/* 0xF2_4000_0000 - 0xF2_FFFF_FFFF */
+/* 969 GiB to 972 GiB, size of 3 GiB : FREE */
+
+/* 0xF3_0000_0000 - 0xFF_FFFF_FFFF */
+/* 972 GiB to 1024 GiB, size of 52 GiB : FREE */
+
+/* 0xFF_FFFF_FFFF ************************************************************/
+
+#endif /* PVR_ROGUE_HEAP_CONFIG_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_meta.h b/drivers/gpu/drm/imagination/pvr_rogue_meta.h
new file mode 100644
index 0000000000000..3020e6582daab
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_meta.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_META_H
+#define PVR_ROGUE_META_H
+
+/***** The META HW register definitions in the file are updated manually *****/
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+/*
+ ******************************************************************************
+ * META registers and MACROS
+ *****************************************************************************
+ */
+#define META_CR_CTRLREG_BASE(t) (0x04800000U + (0x1000U * (t)))
+
+#define META_CR_TXPRIVEXT (0x048000E8)
+#define META_CR_TXPRIVEXT_MINIM_EN BIT(7)
+
+#define META_CR_SYSC_JTAG_THREAD (0x04830030)
+#define META_CR_SYSC_JTAG_THREAD_PRIV_EN (0x00000004)
+
+#define META_CR_PERF_COUNT0 (0x0480FFE0)
+#define META_CR_PERF_COUNT1 (0x0480FFE8)
+#define META_CR_PERF_COUNT_CTRL_SHIFT (28)
+#define META_CR_PERF_COUNT_CTRL_MASK (0xF0000000)
+#define META_CR_PERF_COUNT_CTRL_DCACHEHITS (8 << META_CR_PERF_COUNT_CTRL_SHIFT)
+#define META_CR_PERF_COUNT_CTRL_ICACHEHITS (9 << META_CR_PERF_COUNT_CTRL_SHIFT)
+#define META_CR_PERF_COUNT_CTRL_ICACHEMISS \
+	(0xA << META_CR_PERF_COUNT_CTRL_SHIFT)
+#define META_CR_PERF_COUNT_CTRL_ICORE (0xD << META_CR_PERF_COUNT_CTRL_SHIFT)
+#define META_CR_PERF_COUNT_THR_SHIFT (24)
+#define META_CR_PERF_COUNT_THR_MASK (0x0F000000)
+#define META_CR_PERF_COUNT_THR_0 (0x1 << META_CR_PERF_COUNT_THR_SHIFT)
+#define META_CR_PERF_COUNT_THR_1 (0x2 << META_CR_PERF_COUNT_THR_1)
+
+#define META_CR_TxVECINT_BHALT (0x04820500)
+#define META_CR_PERF_ICORE0 (0x0480FFD0)
+#define META_CR_PERF_ICORE1 (0x0480FFD8)
+#define META_CR_PERF_ICORE_DCACHEMISS (0x8)
+
+#define META_CR_PERF_COUNT(ctrl, thr)                                        \
+	((META_CR_PERF_COUNT_CTRL_##ctrl << META_CR_PERF_COUNT_CTRL_SHIFT) | \
+	 ((thr) << META_CR_PERF_COUNT_THR_SHIFT))
+
+#define META_CR_TXUXXRXDT_OFFSET (META_CR_CTRLREG_BASE(0U) + 0x0000FFF0U)
+#define META_CR_TXUXXRXRQ_OFFSET (META_CR_CTRLREG_BASE(0U) + 0x0000FFF8U)
+
+/* Poll for done. */
+#define META_CR_TXUXXRXRQ_DREADY_BIT (0x80000000U)
+/* Set for read. */
+#define META_CR_TXUXXRXRQ_RDnWR_BIT (0x00010000U)
+#define META_CR_TXUXXRXRQ_TX_S (12)
+#define META_CR_TXUXXRXRQ_RX_S (4)
+#define META_CR_TXUXXRXRQ_UXX_S (0)
+
+/* Internal ctrl regs. */
+#define META_CR_TXUIN_ID (0x0)
+/* Data unit regs. */
+#define META_CR_TXUD0_ID (0x1)
+/* Data unit regs. */
+#define META_CR_TXUD1_ID (0x2)
+/* Address unit regs. */
+#define META_CR_TXUA0_ID (0x3)
+/* Address unit regs. */
+#define META_CR_TXUA1_ID (0x4)
+/* PC registers. */
+#define META_CR_TXUPC_ID (0x5)
+
+/* Macros to calculate register access values. */
+#define META_CR_CORE_REG(thr, reg_num, unit)          \
+	(((u32)(thr) << META_CR_TXUXXRXRQ_TX_S) |     \
+	 ((u32)(reg_num) << META_CR_TXUXXRXRQ_RX_S) | \
+	 ((u32)(unit) << META_CR_TXUXXRXRQ_UXX_S))
+
+#define META_CR_THR0_PC META_CR_CORE_REG(0, 0, META_CR_TXUPC_ID)
+#define META_CR_THR0_PCX META_CR_CORE_REG(0, 1, META_CR_TXUPC_ID)
+#define META_CR_THR0_SP META_CR_CORE_REG(0, 0, META_CR_TXUA0_ID)
+
+#define META_CR_THR1_PC META_CR_CORE_REG(1, 0, META_CR_TXUPC_ID)
+#define META_CR_THR1_PCX META_CR_CORE_REG(1, 1, META_CR_TXUPC_ID)
+#define META_CR_THR1_SP META_CR_CORE_REG(1, 0, META_CR_TXUA0_ID)
+
+#define SP_ACCESS(thread) META_CR_CORE_REG(thread, 0, META_CR_TXUA0_ID)
+#define PC_ACCESS(thread) META_CR_CORE_REG(thread, 0, META_CR_TXUPC_ID)
+
+#define META_CR_COREREG_ENABLE (0x0000000U)
+#define META_CR_COREREG_STATUS (0x0000010U)
+#define META_CR_COREREG_DEFR (0x00000A0U)
+#define META_CR_COREREG_PRIVEXT (0x00000E8U)
+
+#define META_CR_T0ENABLE_OFFSET \
+	(META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_ENABLE)
+#define META_CR_T0STATUS_OFFSET \
+	(META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_STATUS)
+#define META_CR_T0DEFR_OFFSET (META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_DEFR)
+#define META_CR_T0PRIVEXT_OFFSET \
+	(META_CR_CTRLREG_BASE(0U) + META_CR_COREREG_PRIVEXT)
+
+#define META_CR_T1ENABLE_OFFSET \
+	(META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_ENABLE)
+#define META_CR_T1STATUS_OFFSET \
+	(META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_STATUS)
+#define META_CR_T1DEFR_OFFSET (META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_DEFR)
+#define META_CR_T1PRIVEXT_OFFSET \
+	(META_CR_CTRLREG_BASE(1U) + META_CR_COREREG_PRIVEXT)
+
+#define META_CR_TXENABLE_ENABLE_BIT (0x00000001U) /* Set if running */
+#define META_CR_TXSTATUS_PRIV (0x00020000U)
+#define META_CR_TXPRIVEXT_MINIM (0x00000080U)
+
+#define META_MEM_GLOBAL_RANGE_BIT (0x80000000U)
+
+#define META_CR_TXCLKCTRL (0x048000B0)
+#define META_CR_TXCLKCTRL_ALL_ON (0x55111111)
+#define META_CR_TXCLKCTRL_ALL_AUTO (0xAA222222)
+
+#define META_CR_MMCU_LOCAL_EBCTRL (0x04830600)
+#define META_CR_MMCU_LOCAL_EBCTRL_ICWIN (0x3 << 14)
+#define META_CR_MMCU_LOCAL_EBCTRL_DCWIN (0x3 << 6)
+#define META_CR_SYSC_DCPART(n) (0x04830200 + (n) * 0x8)
+#define META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE (0x1 << 31)
+#define META_CR_SYSC_ICPART(n) (0x04830220 + (n) * 0x8)
+#define META_CR_SYSC_XCPARTX_LOCAL_ADDR_OFFSET_TOP_HALF (0x8 << 16)
+#define META_CR_SYSC_XCPARTX_LOCAL_ADDR_FULL_CACHE (0xF)
+#define META_CR_SYSC_XCPARTX_LOCAL_ADDR_HALF_CACHE (0x7)
+#define META_CR_MMCU_DCACHE_CTRL (0x04830018)
+#define META_CR_MMCU_ICACHE_CTRL (0x04830020)
+#define META_CR_MMCU_XCACHE_CTRL_CACHE_HITS_EN (0x1)
+
+/*
+ ******************************************************************************
+ * META LDR Format
+ ******************************************************************************
+ */
+/* Block header structure. */
+struct rogue_meta_ldr_block_hdr {
+	u32 dev_id;
+	u32 sl_code;
+	u32 sl_data;
+	u16 pc_ctrl;
+	u16 crc;
+};
+
+/* High level data stream block structure. */
+struct rogue_meta_ldr_l1_data_blk {
+	u16 cmd;
+	u16 length;
+	u32 next;
+	u32 cmd_data[4];
+};
+
+/* High level data stream block structure. */
+struct rogue_meta_ldr_l2_data_blk {
+	u16 tag;
+	u16 length;
+	u32 block_data[4];
+};
+
+/* Config command structure. */
+struct rogue_meta_ldr_cfg_blk {
+	u32 type;
+	u32 block_data[4];
+};
+
+/* Block type definitions */
+#define ROGUE_META_LDR_COMMENT_TYPE_MASK (0x0010U)
+#define ROGUE_META_LDR_BLK_IS_COMMENT(x) (((x) & ROGUE_META_LDR_COMMENT_TYPE_MASK) != 0U)
+
+/*
+ * Command definitions
+ *  Value   Name            Description
+ *  0       LoadMem         Load memory with binary data.
+ *  1       LoadCore        Load a set of core registers.
+ *  2       LoadMMReg       Load a set of memory mapped registers.
+ *  3       StartThreads    Set each thread PC and SP, then enable threads.
+ *  4       ZeroMem         Zeros a memory region.
+ *  5       Config          Perform a configuration command.
+ */
+#define ROGUE_META_LDR_CMD_MASK (0x000FU)
+
+#define ROGUE_META_LDR_CMD_LOADMEM (0x0000U)
+#define ROGUE_META_LDR_CMD_LOADCORE (0x0001U)
+#define ROGUE_META_LDR_CMD_LOADMMREG (0x0002U)
+#define ROGUE_META_LDR_CMD_START_THREADS (0x0003U)
+#define ROGUE_META_LDR_CMD_ZEROMEM (0x0004U)
+#define ROGUE_META_LDR_CMD_CONFIG (0x0005U)
+
+/*
+ * Config Command definitions
+ *  Value   Name        Description
+ *  0       Pause       Pause for x times 100 instructions
+ *  1       Read        Read a value from register - No value return needed.
+ *                      Utilises effects of issuing reads to certain registers
+ *  2       Write       Write to mem location
+ *  3       MemSet      Set mem to value
+ *  4       MemCheck    check mem for specific value.
+ */
+#define ROGUE_META_LDR_CFG_PAUSE (0x0000)
+#define ROGUE_META_LDR_CFG_READ (0x0001)
+#define ROGUE_META_LDR_CFG_WRITE (0x0002)
+#define ROGUE_META_LDR_CFG_MEMSET (0x0003)
+#define ROGUE_META_LDR_CFG_MEMCHECK (0x0004)
+
+/*
+ ******************************************************************************
+ * ROGUE FW segmented MMU definitions
+ ******************************************************************************
+ */
+/* All threads can access the segment. */
+#define ROGUE_FW_SEGMMU_ALLTHRS (0xf << 8U)
+/* Writable. */
+#define ROGUE_FW_SEGMMU_WRITEABLE (0x1U << 1U)
+/* All threads can access and writable. */
+#define ROGUE_FW_SEGMMU_ALLTHRS_WRITEABLE \
+	(ROGUE_FW_SEGMMU_ALLTHRS | ROGUE_FW_SEGMMU_WRITEABLE)
+
+/* Direct map region 10 used for mapping GPU memory - max 8MB. */
+#define ROGUE_FW_SEGMMU_DMAP_GPU_ID (10U)
+#define ROGUE_FW_SEGMMU_DMAP_GPU_ADDR_START (0x07000000U)
+#define ROGUE_FW_SEGMMU_DMAP_GPU_MAX_SIZE (0x00800000U)
+
+/* Segment IDs. */
+#define ROGUE_FW_SEGMMU_DATA_ID (1U)
+#define ROGUE_FW_SEGMMU_BOOTLDR_ID (2U)
+#define ROGUE_FW_SEGMMU_TEXT_ID (ROGUE_FW_SEGMMU_BOOTLDR_ID)
+
+/*
+ * SLC caching strategy in S7 and volcanic is emitted through the segment MMU.
+ * All the segments configured through the macro ROGUE_FW_SEGMMU_OUTADDR_TOP are
+ * CACHED in the SLC.
+ * The interface has been kept the same to simplify the code changes.
+ * The bifdm argument is ignored (no longer relevant) in S7 and volcanic.
+ */
+#define ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC(pers, slc_policy, mmu_ctx)  \
+	((((u64)((pers) & 0x3)) << 52) | (((u64)((mmu_ctx) & 0xFF)) << 44) | \
+	 (((u64)((slc_policy) & 0x1)) << 40))
+#define ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC_CACHED(mmu_ctx) \
+	ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC(0x3, 0x0, mmu_ctx)
+#define ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC_UNCACHED(mmu_ctx) \
+	ROGUE_FW_SEGMMU_OUTADDR_TOP_VIVT_SLC(0x0, 0x1, mmu_ctx)
+
+/*
+ * To configure the Page Catalog and BIF-DM fed into the BIF for Garten
+ * accesses through this segment.
+ */
+#define ROGUE_FW_SEGMMU_OUTADDR_TOP_SLC(pc, bifdm) \
+	(((u64)((u64)(pc) & 0xFU) << 44U) | ((u64)((u64)(bifdm) & 0xFU) << 40U))
+
+#define ROGUE_FW_SEGMMU_META_BIFDM_ID (0x7U)
+
+/* META segments have 4kB minimum size. */
+#define ROGUE_FW_SEGMMU_ALIGN (0x1000U)
+
+/* Segmented MMU registers (n = segment id). */
+#define META_CR_MMCU_SEGMENT_N_BASE(n) (0x04850000U + ((n) * 0x10U))
+#define META_CR_MMCU_SEGMENT_N_LIMIT(n) (0x04850004U + ((n) * 0x10U))
+#define META_CR_MMCU_SEGMENT_N_OUTA0(n) (0x04850008U + ((n) * 0x10U))
+#define META_CR_MMCU_SEGMENT_N_OUTA1(n) (0x0485000CU + ((n) * 0x10U))
+
+/*
+ * The following defines must be recalculated if the Meta MMU segments used
+ * to access Host-FW data are changed
+ * Current combinations are:
+ * - SLC uncached, META cached,   FW base address 0x70000000
+ * - SLC uncached, META uncached, FW base address 0xF0000000
+ * - SLC cached,   META cached,   FW base address 0x10000000
+ * - SLC cached,   META uncached, FW base address 0x90000000
+ */
+#define ROGUE_FW_SEGMMU_DATA_BASE_ADDRESS (0x10000000U)
+#define ROGUE_FW_SEGMMU_DATA_META_CACHED (0x0U)
+#define ROGUE_FW_SEGMMU_DATA_META_UNCACHED (META_MEM_GLOBAL_RANGE_BIT)
+#define ROGUE_FW_SEGMMU_DATA_META_CACHE_MASK (META_MEM_GLOBAL_RANGE_BIT)
+/*
+ * For non-VIVT SLCs the cacheability of the FW data in the SLC is selected in
+ * the PTEs for the FW data, not in the Meta Segment MMU, which means these
+ * defines have no real effect in those cases.
+ */
+#define ROGUE_FW_SEGMMU_DATA_VIVT_SLC_CACHED (0x0U)
+#define ROGUE_FW_SEGMMU_DATA_VIVT_SLC_UNCACHED (0x60000000U)
+#define ROGUE_FW_SEGMMU_DATA_VIVT_SLC_CACHE_MASK (0x60000000U)
+
+/*
+ ******************************************************************************
+ * ROGUE FW Bootloader defaults
+ ******************************************************************************
+ */
+#define ROGUE_FW_BOOTLDR_META_ADDR (0x40000000U)
+#define ROGUE_FW_BOOTLDR_DEVV_ADDR_0 (0xC0000000U)
+#define ROGUE_FW_BOOTLDR_DEVV_ADDR_1 (0x000000E1)
+#define ROGUE_FW_BOOTLDR_DEVV_ADDR                     \
+	((((u64)ROGUE_FW_BOOTLDR_DEVV_ADDR_1) << 32) | \
+	 ROGUE_FW_BOOTLDR_DEVV_ADDR_0)
+#define ROGUE_FW_BOOTLDR_LIMIT (0x1FFFF000)
+#define ROGUE_FW_MAX_BOOTLDR_OFFSET (0x1000)
+
+/* Bootloader configuration offset is in dwords (512 bytes) */
+#define ROGUE_FW_BOOTLDR_CONF_OFFSET (0x80)
+
+/*
+ ******************************************************************************
+ * ROGUE META Stack
+ ******************************************************************************
+ */
+#define ROGUE_META_STACK_SIZE (0x1000U)
+
+/*
+ ******************************************************************************
+ * ROGUE META Core memory
+ ******************************************************************************
+ */
+/* Code and data both map to the same physical memory. */
+#define ROGUE_META_COREMEM_CODE_ADDR (0x80000000U)
+#define ROGUE_META_COREMEM_DATA_ADDR (0x82000000U)
+#define ROGUE_META_COREMEM_OFFSET_MASK (0x01ffffffU)
+
+#define ROGUE_META_IS_COREMEM_CODE(a, b)                                \
+	({                                                              \
+		u32 _a = (a), _b = (b);                                 \
+		((_a) >= ROGUE_META_COREMEM_CODE_ADDR) &&               \
+			((_a) < (ROGUE_META_COREMEM_CODE_ADDR + (_b))); \
+	})
+#define ROGUE_META_IS_COREMEM_DATA(a, b)                                \
+	({                                                              \
+		u32 _a = (a), _b = (b);                                 \
+		((_a) >= ROGUE_META_COREMEM_DATA_ADDR) &&               \
+			((_a) < (ROGUE_META_COREMEM_DATA_ADDR + (_b))); \
+	})
+/*
+ ******************************************************************************
+ * 2nd thread
+ ******************************************************************************
+ */
+#define ROGUE_FW_THR1_PC (0x18930000)
+#define ROGUE_FW_THR1_SP (0x78890000)
+
+/*
+ ******************************************************************************
+ * META compatibility
+ ******************************************************************************
+ */
+
+#define META_CR_CORE_ID (0x04831000)
+#define META_CR_CORE_ID_VER_SHIFT (16U)
+#define META_CR_CORE_ID_VER_CLRMSK (0XFF00FFFFU)
+
+#define ROGUE_CR_META_MTP218_CORE_ID_VALUE 0x19
+#define ROGUE_CR_META_MTP219_CORE_ID_VALUE 0x1E
+#define ROGUE_CR_META_LTP218_CORE_ID_VALUE 0x1C
+#define ROGUE_CR_META_LTP217_CORE_ID_VALUE 0x1F
+
+#define ROGUE_FW_PROCESSOR_META "META"
+
+#endif /* PVR_ROGUE_META_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_mips.h b/drivers/gpu/drm/imagination/pvr_rogue_mips.h
new file mode 100644
index 0000000000000..41ed618fda3f9
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_mips.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_MIPS_H
+#define PVR_ROGUE_MIPS_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+/* Utility defines for memory management. */
+#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K (12)
+#define ROGUE_MIPSFW_PAGE_SIZE_4K (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K)
+#define ROGUE_MIPSFW_PAGE_MASK_4K (ROGUE_MIPSFW_PAGE_SIZE_4K - 1)
+#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_64K (16)
+#define ROGUE_MIPSFW_PAGE_SIZE_64K (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_64K)
+#define ROGUE_MIPSFW_PAGE_MASK_64K (ROGUE_MIPSFW_PAGE_SIZE_64K - 1)
+#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_256K (18)
+#define ROGUE_MIPSFW_PAGE_SIZE_256K (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_256K)
+#define ROGUE_MIPSFW_PAGE_MASK_256K (ROGUE_MIPSFW_PAGE_SIZE_256K - 1)
+#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_1MB (20)
+#define ROGUE_MIPSFW_PAGE_SIZE_1MB (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_1MB)
+#define ROGUE_MIPSFW_PAGE_MASK_1MB (ROGUE_MIPSFW_PAGE_SIZE_1MB - 1)
+#define ROGUE_MIPSFW_LOG2_PAGE_SIZE_4MB (22)
+#define ROGUE_MIPSFW_PAGE_SIZE_4MB (0x1 << ROGUE_MIPSFW_LOG2_PAGE_SIZE_4MB)
+#define ROGUE_MIPSFW_PAGE_MASK_4MB (ROGUE_MIPSFW_PAGE_SIZE_4MB - 1)
+#define ROGUE_MIPSFW_LOG2_PTE_ENTRY_SIZE (2)
+/* log2 page table sizes dependent on FW heap size and page size (for each OS). */
+#define ROGUE_MIPSFW_LOG2_PAGETABLE_SIZE_4K(pvr_dev) ((pvr_dev)->fw_dev.fw_heap_info.log2_size - \
+						      ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K +    \
+						      ROGUE_MIPSFW_LOG2_PTE_ENTRY_SIZE)
+#define ROGUE_MIPSFW_LOG2_PAGETABLE_SIZE_64K(pvr_dev) ((pvr_dev)->fw_dev.fw_heap_info.log2_size - \
+						       ROGUE_MIPSFW_LOG2_PAGE_SIZE_64K +   \
+						       ROGUE_MIPSFW_LOG2_PTE_ENTRY_SIZE)
+/* Maximum number of page table pages (both Host and MIPS pages). */
+#define ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES (4)
+/* Total number of TLB entries. */
+#define ROGUE_MIPSFW_NUMBER_OF_TLB_ENTRIES (16)
+/* "Uncached" caching policy. */
+#define ROGUE_MIPSFW_UNCACHED_CACHE_POLICY (2)
+/* "Write-back write-allocate" caching policy. */
+#define ROGUE_MIPSFW_WRITEBACK_CACHE_POLICY (3)
+/* "Write-through no write-allocate" caching policy. */
+#define ROGUE_MIPSFW_WRITETHROUGH_CACHE_POLICY (1)
+/* Cached policy used by MIPS in case of physical bus on 32 bit. */
+#define ROGUE_MIPSFW_CACHED_POLICY (ROGUE_MIPSFW_WRITEBACK_CACHE_POLICY)
+/* Cached policy used by MIPS in case of physical bus on more than 32 bit. */
+#define ROGUE_MIPSFW_CACHED_POLICY_ABOVE_32BIT (ROGUE_MIPSFW_WRITETHROUGH_CACHE_POLICY)
+/* Total number of Remap entries. */
+#define ROGUE_MIPSFW_NUMBER_OF_REMAP_ENTRIES (2 * ROGUE_MIPSFW_NUMBER_OF_TLB_ENTRIES)
+
+/* MIPS EntryLo/PTE format. */
+
+#define ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_SHIFT (31U)
+#define ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_CLRMSK (0X7FFFFFFF)
+#define ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_EN (0X80000000)
+
+#define ROGUE_MIPSFW_ENTRYLO_EXEC_INHIBIT_SHIFT (30U)
+#define ROGUE_MIPSFW_ENTRYLO_EXEC_INHIBIT_CLRMSK (0XBFFFFFFF)
+#define ROGUE_MIPSFW_ENTRYLO_EXEC_INHIBIT_EN (0X40000000)
+
+/* Page Frame Number */
+#define ROGUE_MIPSFW_ENTRYLO_PFN_SHIFT (6)
+#define ROGUE_MIPSFW_ENTRYLO_PFN_ALIGNSHIFT (12)
+/* Mask used for the MIPS Page Table in case of physical bus on 32 bit. */
+#define ROGUE_MIPSFW_ENTRYLO_PFN_MASK (0x03FFFFC0)
+#define ROGUE_MIPSFW_ENTRYLO_PFN_SIZE (20)
+/* Mask used for the MIPS Page Table in case of physical bus on more than 32 bit. */
+#define ROGUE_MIPSFW_ENTRYLO_PFN_MASK_ABOVE_32BIT (0x3FFFFFC0)
+#define ROGUE_MIPSFW_ENTRYLO_PFN_SIZE_ABOVE_32BIT (24)
+#define ROGUE_MIPSFW_ADDR_TO_ENTRYLO_PFN_RSHIFT (ROGUE_MIPSFW_ENTRYLO_PFN_ALIGNSHIFT - \
+						 ROGUE_MIPSFW_ENTRYLO_PFN_SHIFT)
+
+#define ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_SHIFT (3U)
+#define ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_CLRMSK (0XFFFFFFC7)
+
+#define ROGUE_MIPSFW_ENTRYLO_DIRTY_SHIFT (2U)
+#define ROGUE_MIPSFW_ENTRYLO_DIRTY_CLRMSK (0XFFFFFFFB)
+#define ROGUE_MIPSFW_ENTRYLO_DIRTY_EN (0X00000004)
+
+#define ROGUE_MIPSFW_ENTRYLO_VALID_SHIFT (1U)
+#define ROGUE_MIPSFW_ENTRYLO_VALID_CLRMSK (0XFFFFFFFD)
+#define ROGUE_MIPSFW_ENTRYLO_VALID_EN (0X00000002)
+
+#define ROGUE_MIPSFW_ENTRYLO_GLOBAL_SHIFT (0U)
+#define ROGUE_MIPSFW_ENTRYLO_GLOBAL_CLRMSK (0XFFFFFFFE)
+#define ROGUE_MIPSFW_ENTRYLO_GLOBAL_EN (0X00000001)
+
+#define ROGUE_MIPSFW_ENTRYLO_DVG (ROGUE_MIPSFW_ENTRYLO_DIRTY_EN | \
+				  ROGUE_MIPSFW_ENTRYLO_VALID_EN | \
+				  ROGUE_MIPSFW_ENTRYLO_GLOBAL_EN)
+#define ROGUE_MIPSFW_ENTRYLO_UNCACHED (ROGUE_MIPSFW_UNCACHED_CACHE_POLICY << \
+				       ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_SHIFT)
+#define ROGUE_MIPSFW_ENTRYLO_DVG_UNCACHED (ROGUE_MIPSFW_ENTRYLO_DVG | \
+					   ROGUE_MIPSFW_ENTRYLO_UNCACHED)
+
+/* Remap Range Config Addr Out. */
+/* These defines refer to the upper half of the Remap Range Config register. */
+#define ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_MASK (0x0FFFFFF0)
+#define ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_SHIFT (4) /* wrt upper half of the register. */
+#define ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_ALIGNSHIFT (12)
+#define ROGUE_MIPSFW_ADDR_TO_RR_ADDR_OUT_RSHIFT (ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_ALIGNSHIFT - \
+						 ROGUE_MIPSFW_REMAP_RANGE_ADDR_OUT_SHIFT)
+
+/*
+ * Pages to trampoline problematic physical addresses:
+ *   - ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN : 0x1FC0_0000
+ *   - ROGUE_MIPSFW_DATA_REMAP_PHYS_ADDR_IN : 0x1FC0_1000
+ *   - ROGUE_MIPSFW_CODE_REMAP_PHYS_ADDR_IN : 0x1FC0_2000
+ *   - (benign trampoline)               : 0x1FC0_3000
+ * that would otherwise be erroneously remapped by the MIPS wrapper.
+ * (see "Firmware virtual layout and remap configuration" section below)
+ */
+
+#define ROGUE_MIPSFW_TRAMPOLINE_LOG2_NUMPAGES (2)
+#define ROGUE_MIPSFW_TRAMPOLINE_NUMPAGES BIT(ROGUE_MIPSFW_TRAMPOLINE_LOG2_NUMPAGES)
+#define ROGUE_MIPSFW_TRAMPOLINE_SIZE (ROGUE_MIPSFW_TRAMPOLINE_NUMPAGES << \
+				      ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K)
+#define ROGUE_MIPSFW_TRAMPOLINE_LOG2_SEGMENT_SIZE (ROGUE_MIPSFW_TRAMPOLINE_LOG2_NUMPAGES + \
+						   ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K)
+
+#define ROGUE_MIPSFW_TRAMPOLINE_TARGET_PHYS_ADDR (ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN)
+#define ROGUE_MIPSFW_TRAMPOLINE_OFFSET(a) ((a) - ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN)
+
+#define ROGUE_MIPSFW_SENSITIVE_ADDR(a) (ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN == \
+					(~((1 << ROGUE_MIPSFW_TRAMPOLINE_LOG2_SEGMENT_SIZE) - 1) \
+					 & (a)))
+
+/* Firmware virtual layout and remap configuration. */
+/*
+ * For each remap region we define:
+ * - the virtual base used by the Firmware to access code/data through that region
+ * - the microAptivAP physical address correspondent to the virtual base address,
+ *   used as input address and remapped to the actual physical address
+ * - log2 of size of the region remapped by the MIPS wrapper, i.e. number of bits from
+ *   the bottom of the base input address that survive onto the output address
+ *   (this defines both the alignment and the maximum size of the remapped region)
+ * - one or more code/data segments within the remapped region.
+ */
+
+/* Boot remap setup. */
+#define ROGUE_MIPSFW_BOOT_REMAP_VIRTUAL_BASE (0xBFC00000)
+#define ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN (0x1FC00000)
+#define ROGUE_MIPSFW_BOOT_REMAP_LOG2_SEGMENT_SIZE (12)
+#define ROGUE_MIPSFW_BOOT_NMI_CODE_VIRTUAL_BASE (ROGUE_MIPSFW_BOOT_REMAP_VIRTUAL_BASE)
+
+/* Data remap setup. */
+#define ROGUE_MIPSFW_DATA_REMAP_VIRTUAL_BASE (0xBFC01000)
+#define ROGUE_MIPSFW_DATA_CACHED_REMAP_VIRTUAL_BASE (0x9FC01000)
+#define ROGUE_MIPSFW_DATA_REMAP_PHYS_ADDR_IN (0x1FC01000)
+#define ROGUE_MIPSFW_DATA_REMAP_LOG2_SEGMENT_SIZE (12)
+#define ROGUE_MIPSFW_BOOT_NMI_DATA_VIRTUAL_BASE (ROGUE_MIPSFW_DATA_REMAP_VIRTUAL_BASE)
+
+/* Code remap setup. */
+#define ROGUE_MIPSFW_CODE_REMAP_VIRTUAL_BASE (0x9FC02000)
+#define ROGUE_MIPSFW_CODE_REMAP_PHYS_ADDR_IN (0x1FC02000)
+#define ROGUE_MIPSFW_CODE_REMAP_LOG2_SEGMENT_SIZE (12)
+#define ROGUE_MIPSFW_EXCEPTIONS_VIRTUAL_BASE (ROGUE_MIPSFW_CODE_REMAP_VIRTUAL_BASE)
+
+/* Permanent mappings setup. */
+#define ROGUE_MIPSFW_PT_VIRTUAL_BASE (0xCF000000)
+#define ROGUE_MIPSFW_REGISTERS_VIRTUAL_BASE (0xCF800000)
+#define ROGUE_MIPSFW_STACK_VIRTUAL_BASE (0xCF600000)
+
+/* Bootloader configuration data. */
+/*
+ * Bootloader configuration offset (where ROGUE_MIPSFW_BOOT_DATA lives)
+ * within the bootloader/NMI data page.
+ */
+#define ROGUE_MIPSFW_BOOTLDR_CONF_OFFSET (0x0)
+
+/* NMI shared data. */
+/* Base address of the shared data within the bootloader/NMI data page. */
+#define ROGUE_MIPSFW_NMI_SHARED_DATA_BASE (0x100)
+/* Size used by Debug dump data. */
+#define ROGUE_MIPSFW_NMI_SHARED_SIZE (0x2B0)
+/* Offsets in the NMI shared area in 32-bit words. */
+#define ROGUE_MIPSFW_NMI_SYNC_FLAG_OFFSET (0x0)
+#define ROGUE_MIPSFW_NMI_STATE_OFFSET (0x1)
+#define ROGUE_MIPSFW_NMI_ERROR_STATE_SET (0x1)
+
+/* MIPS boot stage. */
+#define ROGUE_MIPSFW_BOOT_STAGE_OFFSET (0x400)
+
+/*
+ * MIPS private data in the bootloader data page.
+ * Memory below this offset is used by the FW only, no interface data allowed.
+ */
+#define ROGUE_MIPSFW_PRIVATE_DATA_OFFSET (0x800)
+
+struct rogue_mipsfw_boot_data {
+	u64 stack_phys_addr;
+	u64 reg_base;
+	u64 pt_phys_addr[ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES];
+	u32 pt_log2_page_size;
+	u32 pt_num_pages;
+	u32 reserved1;
+	u32 reserved2;
+};
+
+#define ROGUE_MIPSFW_GET_OFFSET_IN_DWORDS(offset) ((offset) / sizeof(u32))
+#define ROGUE_MIPSFW_GET_OFFSET_IN_QWORDS(offset) ((offset) / sizeof(u64))
+
+/* Used for compatibility checks. */
+#define ROGUE_MIPSFW_ARCHTYPE_VER_CLRMSK (0xFFFFE3FFU)
+#define ROGUE_MIPSFW_ARCHTYPE_VER_SHIFT (10U)
+#define ROGUE_MIPSFW_CORE_ID_VALUE (0x001U)
+#define ROGUE_FW_PROCESSOR_MIPS "MIPS"
+
+/* microAptivAP cache line size. */
+#define ROGUE_MIPSFW_MICROAPTIVEAP_CACHELINE_SIZE (16U)
+
+/*
+ * The SOCIF transactions are identified with the top 16 bits of the physical address emitted by
+ * the MIPS.
+ */
+#define ROGUE_MIPSFW_WRAPPER_CONFIG_REGBANK_ADDR_ALIGN (16U)
+
+/* Values to put in the MIPS selectors for performance counters. */
+/* Icache accesses in COUNTER0. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_ICACHE_ACCESSES_C0 (9U)
+/* Icache misses in COUNTER1. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_ICACHE_MISSES_C1 (9U)
+
+/* Dcache accesses in COUNTER0. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_DCACHE_ACCESSES_C0 (10U)
+/* Dcache misses in COUNTER1. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_DCACHE_MISSES_C1 (11U)
+
+/* ITLB instruction accesses in COUNTER0. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_ITLB_INSTR_ACCESSES_C0 (5U)
+/* JTLB instruction accesses misses in COUNTER1. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_JTLB_INSTR_MISSES_C1 (7U)
+
+  /* Instructions completed in COUNTER0. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_INSTR_COMPLETED_C0 (1U)
+/* JTLB data misses in COUNTER1. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_JTLB_DATA_MISSES_C1 (8U)
+
+/* Shift for the Event field in the MIPS perf ctrl registers. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_EVENT_SHIFT (5U)
+
+/* Additional flags for performance counters. See MIPS manual for further reference. */
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_COUNT_USER_MODE (8U)
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_COUNT_KERNEL_MODE (2U)
+#define ROGUE_MIPSFW_PERF_COUNT_CTRL_COUNT_EXL (1U)
+
+#define ROGUE_MIPSFW_C0_NBHWIRQ	8
+
+/* Macros to decode C0_Cause register. */
+#define ROGUE_MIPSFW_C0_CAUSE_EXCCODE(cause) (((cause) & 0x7c) >> 2)
+#define ROGUE_MIPSFW_C0_CAUSE_EXCCODE_FWERROR 9
+/* Use only when Coprocessor Unusable exception. */
+#define ROGUE_MIPSFW_C0_CAUSE_UNUSABLE_UNIT(cause) (((cause) >> 28) & 0x3)
+#define ROGUE_MIPSFW_C0_CAUSE_PENDING_HWIRQ(cause) (((cause) & 0x3fc00) >> 10)
+#define ROGUE_MIPSFW_C0_CAUSE_FDCIPENDING BIT(21)
+#define ROGUE_MIPSFW_C0_CAUSE_IV BIT(23)
+#define ROGUE_MIPSFW_C0_CAUSE_IC BIT(25)
+#define ROGUE_MIPSFW_C0_CAUSE_PCIPENDING BIT(26)
+#define ROGUE_MIPSFW_C0_CAUSE_TIPENDING BIT(30)
+#define ROGUE_MIPSFW_C0_CAUSE_BRANCH_DELAY BIT(31)
+
+/* Macros to decode C0_Debug register. */
+#define ROGUE_MIPSFW_C0_DEBUG_EXCCODE(debug) (((debug) >> 10) & 0x1f)
+#define ROGUE_MIPSFW_C0_DEBUG_DSS BIT(0)
+#define ROGUE_MIPSFW_C0_DEBUG_DBP BIT(1)
+#define ROGUE_MIPSFW_C0_DEBUG_DDBL BIT(2)
+#define ROGUE_MIPSFW_C0_DEBUG_DDBS BIT(3)
+#define ROGUE_MIPSFW_C0_DEBUG_DIB BIT(4)
+#define ROGUE_MIPSFW_C0_DEBUG_DINT BIT(5)
+#define ROGUE_MIPSFW_C0_DEBUG_DIBIMPR BIT(6)
+#define ROGUE_MIPSFW_C0_DEBUG_DDBLIMPR BIT(18)
+#define ROGUE_MIPSFW_C0_DEBUG_DDBSIMPR BIT(19)
+#define ROGUE_MIPSFW_C0_DEBUG_IEXI BIT(20)
+#define ROGUE_MIPSFW_C0_DEBUG_DBUSEP BIT(21)
+#define ROGUE_MIPSFW_C0_DEBUG_CACHEEP BIT(22)
+#define ROGUE_MIPSFW_C0_DEBUG_MCHECKP BIT(23)
+#define ROGUE_MIPSFW_C0_DEBUG_IBUSEP BIT(24)
+#define ROGUE_MIPSFW_C0_DEBUG_DM BIT(30)
+#define ROGUE_MIPSFW_C0_DEBUG_DBD BIT(31)
+
+/* Macros to decode TLB entries. */
+#define ROGUE_MIPSFW_TLB_GET_MASK(page_mask) (((page_mask) >> 13) & 0XFFFFU)
+/* Page size in KB. */
+#define ROGUE_MIPSFW_TLB_GET_PAGE_SIZE(page_mask) ((((page_mask) | 0x1FFF) + 1) >> 11)
+/* Page size in KB. */
+#define ROGUE_MIPSFW_TLB_GET_PAGE_MASK(page_size) ((((page_size) << 11) - 1) & ~0x7FF)
+#define ROGUE_MIPSFW_TLB_GET_VPN2(entry_hi) ((entry_hi) >> 13)
+#define ROGUE_MIPSFW_TLB_GET_COHERENCY(entry_lo) (((entry_lo) >> 3) & 0x7U)
+#define ROGUE_MIPSFW_TLB_GET_PFN(entry_lo) (((entry_lo) >> 6) & 0XFFFFFU)
+/* GET_PA uses a non-standard PFN mask for 36 bit addresses. */
+#define ROGUE_MIPSFW_TLB_GET_PA(entry_lo) (((u64)(entry_lo) & \
+					    ROGUE_MIPSFW_ENTRYLO_PFN_MASK_ABOVE_32BIT) << 6)
+#define ROGUE_MIPSFW_TLB_GET_INHIBIT(entry_lo) (((entry_lo) >> 30) & 0x3U)
+#define ROGUE_MIPSFW_TLB_GET_DGV(entry_lo) ((entry_lo) & 0x7U)
+#define ROGUE_MIPSFW_TLB_GLOBAL BIT(0)
+#define ROGUE_MIPSFW_TLB_VALID BIT(1)
+#define ROGUE_MIPSFW_TLB_DIRTY BIT(2)
+#define ROGUE_MIPSFW_TLB_XI BIT(30)
+#define ROGUE_MIPSFW_TLB_RI BIT(31)
+
+#define ROGUE_MIPSFW_REMAP_GET_REGION_SIZE(region_size_encoding) (1 << (((region_size_encoding) \
+									+ 1) << 1))
+
+struct rogue_mips_tlb_entry {
+	u32 tlb_page_mask;
+	u32 tlb_hi;
+	u32 tlb_lo0;
+	u32 tlb_lo1;
+};
+
+struct rogue_mips_remap_entry {
+	u32 remap_addr_in;  /* Always 4k aligned. */
+	u32 remap_addr_out; /* Always 4k aligned. */
+	u32 remap_region_size;
+};
+
+struct rogue_mips_state {
+	u32 error_state; /* This must come first in the structure. */
+	u32 error_epc;
+	u32 status_register;
+	u32 cause_register;
+	u32 bad_register;
+	u32 epc;
+	u32 sp;
+	u32 debug;
+	u32 depc;
+	u32 bad_instr;
+	u32 unmapped_address;
+	struct rogue_mips_tlb_entry tlb[ROGUE_MIPSFW_NUMBER_OF_TLB_ENTRIES];
+	struct rogue_mips_remap_entry remap[ROGUE_MIPSFW_NUMBER_OF_REMAP_ENTRIES];
+};
+
+#include "pvr_rogue_mips_check.h"
+
+#endif /* PVR_ROGUE_MIPS_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_mips_check.h b/drivers/gpu/drm/imagination/pvr_rogue_mips_check.h
new file mode 100644
index 0000000000000..824b4bf33ac14
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_mips_check.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_MIPS_CHECK_H
+#define PVR_ROGUE_MIPS_CHECK_H
+
+#include <linux/build_bug.h>
+
+static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_page_mask) == 0,
+	      "offsetof(struct rogue_mips_tlb_entry, tlb_page_mask) incorrect");
+static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_hi) == 4,
+	      "offsetof(struct rogue_mips_tlb_entry, tlb_hi) incorrect");
+static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_lo0) == 8,
+	      "offsetof(struct rogue_mips_tlb_entry, tlb_lo0) incorrect");
+static_assert(offsetof(struct rogue_mips_tlb_entry, tlb_lo1) == 12,
+	      "offsetof(struct rogue_mips_tlb_entry, tlb_lo1) incorrect");
+static_assert(sizeof(struct rogue_mips_tlb_entry) == 16,
+	      "struct rogue_mips_tlb_entry is incorrect size");
+
+static_assert(offsetof(struct rogue_mips_remap_entry, remap_addr_in) == 0,
+	      "offsetof(struct rogue_mips_remap_entry, remap_addr_in) incorrect");
+static_assert(offsetof(struct rogue_mips_remap_entry, remap_addr_out) == 4,
+	      "offsetof(struct rogue_mips_remap_entry, remap_addr_out) incorrect");
+static_assert(offsetof(struct rogue_mips_remap_entry, remap_region_size) == 8,
+	      "offsetof(struct rogue_mips_remap_entry, remap_region_size) incorrect");
+static_assert(sizeof(struct rogue_mips_remap_entry) == 12,
+	      "struct rogue_mips_remap_entry is incorrect size");
+
+static_assert(offsetof(struct rogue_mips_state, error_state) == 0,
+	      "offsetof(struct rogue_mips_state, error_state) incorrect");
+static_assert(offsetof(struct rogue_mips_state, error_epc) == 4,
+	      "offsetof(struct rogue_mips_state, error_epc) incorrect");
+static_assert(offsetof(struct rogue_mips_state, status_register) == 8,
+	      "offsetof(struct rogue_mips_state, status_register) incorrect");
+static_assert(offsetof(struct rogue_mips_state, cause_register) == 12,
+	      "offsetof(struct rogue_mips_state, cause_register) incorrect");
+static_assert(offsetof(struct rogue_mips_state, bad_register) == 16,
+	      "offsetof(struct rogue_mips_state, bad_register) incorrect");
+static_assert(offsetof(struct rogue_mips_state, epc) == 20,
+	      "offsetof(struct rogue_mips_state, epc) incorrect");
+static_assert(offsetof(struct rogue_mips_state, sp) == 24,
+	      "offsetof(struct rogue_mips_state, sp) incorrect");
+static_assert(offsetof(struct rogue_mips_state, debug) == 28,
+	      "offsetof(struct rogue_mips_state, debug) incorrect");
+static_assert(offsetof(struct rogue_mips_state, depc) == 32,
+	      "offsetof(struct rogue_mips_state, depc) incorrect");
+static_assert(offsetof(struct rogue_mips_state, bad_instr) == 36,
+	      "offsetof(struct rogue_mips_state, bad_instr) incorrect");
+static_assert(offsetof(struct rogue_mips_state, unmapped_address) == 40,
+	      "offsetof(struct rogue_mips_state, unmapped_address) incorrect");
+static_assert(offsetof(struct rogue_mips_state, tlb) == 44,
+	      "offsetof(struct rogue_mips_state, tlb) incorrect");
+static_assert(offsetof(struct rogue_mips_state, remap) == 300,
+	      "offsetof(struct rogue_mips_state, remap) incorrect");
+static_assert(sizeof(struct rogue_mips_state) == 684,
+	      "struct rogue_mips_state is incorrect size");
+
+#endif /* PVR_ROGUE_MIPS_CHECK_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h b/drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h
new file mode 100644
index 0000000000000..f361ccdd54051
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_mmu_defs.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+/*  *** Autogenerated C -- do not edit ***  */
+
+#ifndef PVR_ROGUE_MMU_DEFS_H
+#define PVR_ROGUE_MMU_DEFS_H
+
+#define ROGUE_MMU_DEFS_REVISION 0
+
+#define ROGUE_BIF_DM_ENCODING_VERTEX (0x00000000U)
+#define ROGUE_BIF_DM_ENCODING_PIXEL (0x00000001U)
+#define ROGUE_BIF_DM_ENCODING_COMPUTE (0x00000002U)
+#define ROGUE_BIF_DM_ENCODING_TLA (0x00000003U)
+#define ROGUE_BIF_DM_ENCODING_PB_VCE (0x00000004U)
+#define ROGUE_BIF_DM_ENCODING_PB_TE (0x00000005U)
+#define ROGUE_BIF_DM_ENCODING_META (0x00000007U)
+#define ROGUE_BIF_DM_ENCODING_HOST (0x00000008U)
+#define ROGUE_BIF_DM_ENCODING_PM_ALIST (0x00000009U)
+
+#define ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT (30U)
+#define ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK (0xFFFFFF003FFFFFFFULL)
+#define ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT (21U)
+#define ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK (0xFFFFFFFFC01FFFFFULL)
+#define ROGUE_MMUCTRL_VADDR_PT_INDEX_SHIFT (12U)
+#define ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK (0xFFFFFFFFFFE00FFFULL)
+
+#define ROGUE_MMUCTRL_ENTRIES_PC_VALUE (0x00000400U)
+#define ROGUE_MMUCTRL_ENTRIES_PD_VALUE (0x00000200U)
+#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE (0x00000200U)
+
+#define ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE (0x00000020U)
+#define ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE (0x00000040U)
+#define ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE (0x00000040U)
+
+#define ROGUE_MMUCTRL_PAGE_SIZE_MASK (0x00000007U)
+#define ROGUE_MMUCTRL_PAGE_SIZE_4KB (0x00000000U)
+#define ROGUE_MMUCTRL_PAGE_SIZE_16KB (0x00000001U)
+#define ROGUE_MMUCTRL_PAGE_SIZE_64KB (0x00000002U)
+#define ROGUE_MMUCTRL_PAGE_SIZE_256KB (0x00000003U)
+#define ROGUE_MMUCTRL_PAGE_SIZE_1MB (0x00000004U)
+#define ROGUE_MMUCTRL_PAGE_SIZE_2MB (0x00000005U)
+
+#define ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT (12U)
+#define ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK (0xFFFFFF0000000FFFULL)
+
+#define ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT (14U)
+#define ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK (0xFFFFFF0000003FFFULL)
+
+#define ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT (16U)
+#define ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK (0xFFFFFF000000FFFFULL)
+
+#define ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT (18U)
+#define ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK (0xFFFFFF000003FFFFULL)
+
+#define ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT (20U)
+#define ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK (0xFFFFFF00000FFFFFULL)
+
+#define ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT (21U)
+#define ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK (0xFFFFFF00001FFFFFULL)
+
+#define ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_SHIFT (12U)
+#define ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK (0xFFFFFF0000000FFFULL)
+
+#define ROGUE_MMUCTRL_PT_BASE_16KB_RANGE_SHIFT (10U)
+#define ROGUE_MMUCTRL_PT_BASE_16KB_RANGE_CLRMSK (0xFFFFFF00000003FFULL)
+
+#define ROGUE_MMUCTRL_PT_BASE_64KB_RANGE_SHIFT (8U)
+#define ROGUE_MMUCTRL_PT_BASE_64KB_RANGE_CLRMSK (0xFFFFFF00000000FFULL)
+
+#define ROGUE_MMUCTRL_PT_BASE_256KB_RANGE_SHIFT (6U)
+#define ROGUE_MMUCTRL_PT_BASE_256KB_RANGE_CLRMSK (0xFFFFFF000000003FULL)
+
+#define ROGUE_MMUCTRL_PT_BASE_1MB_RANGE_SHIFT (5U)
+#define ROGUE_MMUCTRL_PT_BASE_1MB_RANGE_CLRMSK (0xFFFFFF000000001FULL)
+
+#define ROGUE_MMUCTRL_PT_BASE_2MB_RANGE_SHIFT (5U)
+#define ROGUE_MMUCTRL_PT_BASE_2MB_RANGE_CLRMSK (0xFFFFFF000000001FULL)
+
+#define ROGUE_MMUCTRL_PT_DATA_PM_META_PROTECT_SHIFT (62U)
+#define ROGUE_MMUCTRL_PT_DATA_PM_META_PROTECT_CLRMSK (0xBFFFFFFFFFFFFFFFULL)
+#define ROGUE_MMUCTRL_PT_DATA_PM_META_PROTECT_EN (0x4000000000000000ULL)
+#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_HI_SHIFT (40U)
+#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_HI_CLRMSK (0xC00000FFFFFFFFFFULL)
+#define ROGUE_MMUCTRL_PT_DATA_PAGE_SHIFT (12U)
+#define ROGUE_MMUCTRL_PT_DATA_PAGE_CLRMSK (0xFFFFFF0000000FFFULL)
+#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_LO_SHIFT (6U)
+#define ROGUE_MMUCTRL_PT_DATA_VP_PAGE_LO_CLRMSK (0xFFFFFFFFFFFFF03FULL)
+#define ROGUE_MMUCTRL_PT_DATA_ENTRY_PENDING_SHIFT (5U)
+#define ROGUE_MMUCTRL_PT_DATA_ENTRY_PENDING_CLRMSK (0xFFFFFFFFFFFFFFDFULL)
+#define ROGUE_MMUCTRL_PT_DATA_ENTRY_PENDING_EN (0x0000000000000020ULL)
+#define ROGUE_MMUCTRL_PT_DATA_PM_SRC_SHIFT (4U)
+#define ROGUE_MMUCTRL_PT_DATA_PM_SRC_CLRMSK (0xFFFFFFFFFFFFFFEFULL)
+#define ROGUE_MMUCTRL_PT_DATA_PM_SRC_EN (0x0000000000000010ULL)
+#define ROGUE_MMUCTRL_PT_DATA_SLC_BYPASS_CTRL_SHIFT (3U)
+#define ROGUE_MMUCTRL_PT_DATA_SLC_BYPASS_CTRL_CLRMSK (0xFFFFFFFFFFFFFFF7ULL)
+#define ROGUE_MMUCTRL_PT_DATA_SLC_BYPASS_CTRL_EN (0x0000000000000008ULL)
+#define ROGUE_MMUCTRL_PT_DATA_CC_SHIFT (2U)
+#define ROGUE_MMUCTRL_PT_DATA_CC_CLRMSK (0xFFFFFFFFFFFFFFFBULL)
+#define ROGUE_MMUCTRL_PT_DATA_CC_EN (0x0000000000000004ULL)
+#define ROGUE_MMUCTRL_PT_DATA_READ_ONLY_SHIFT (1U)
+#define ROGUE_MMUCTRL_PT_DATA_READ_ONLY_CLRMSK (0xFFFFFFFFFFFFFFFDULL)
+#define ROGUE_MMUCTRL_PT_DATA_READ_ONLY_EN (0x0000000000000002ULL)
+#define ROGUE_MMUCTRL_PT_DATA_VALID_SHIFT (0U)
+#define ROGUE_MMUCTRL_PT_DATA_VALID_CLRMSK (0xFFFFFFFFFFFFFFFEULL)
+#define ROGUE_MMUCTRL_PT_DATA_VALID_EN (0x0000000000000001ULL)
+
+#define ROGUE_MMUCTRL_PD_DATA_ENTRY_PENDING_SHIFT (40U)
+#define ROGUE_MMUCTRL_PD_DATA_ENTRY_PENDING_CLRMSK (0xFFFFFEFFFFFFFFFFULL)
+#define ROGUE_MMUCTRL_PD_DATA_ENTRY_PENDING_EN (0x0000010000000000ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PT_BASE_SHIFT (5U)
+#define ROGUE_MMUCTRL_PD_DATA_PT_BASE_CLRMSK (0xFFFFFF000000001FULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_SHIFT (1U)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_CLRMSK (0xFFFFFFFFFFFFFFF1ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_4KB (0x0000000000000000ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_16KB (0x0000000000000002ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_64KB (0x0000000000000004ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_256KB (0x0000000000000006ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_1MB (0x0000000000000008ULL)
+#define ROGUE_MMUCTRL_PD_DATA_PAGE_SIZE_2MB (0x000000000000000aULL)
+#define ROGUE_MMUCTRL_PD_DATA_VALID_SHIFT (0U)
+#define ROGUE_MMUCTRL_PD_DATA_VALID_CLRMSK (0xFFFFFFFFFFFFFFFEULL)
+#define ROGUE_MMUCTRL_PD_DATA_VALID_EN (0x0000000000000001ULL)
+
+#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_SHIFT (4U)
+#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_CLRMSK (0x0000000FU)
+#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT (12U)
+#define ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE (4096U)
+#define ROGUE_MMUCTRL_PC_DATA_ENTRY_PENDING_SHIFT (1U)
+#define ROGUE_MMUCTRL_PC_DATA_ENTRY_PENDING_CLRMSK (0xFFFFFFFDU)
+#define ROGUE_MMUCTRL_PC_DATA_ENTRY_PENDING_EN (0x00000002U)
+#define ROGUE_MMUCTRL_PC_DATA_VALID_SHIFT (0U)
+#define ROGUE_MMUCTRL_PC_DATA_VALID_CLRMSK (0xFFFFFFFEU)
+#define ROGUE_MMUCTRL_PC_DATA_VALID_EN (0x00000001U)
+
+#endif /* PVR_ROGUE_MMU_DEFS_H */
-- 
GitLab


From a26f067feac1f6142c3ccbaeaee8f84078bca9d4 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:30 +0000
Subject: [PATCH 0405/2340] drm/imagination: Add FWIF headers

Changes since v8:
- Corrected license identifiers

Changes since v7:
- Add padding to struct rogue_fwif_ccb_ctl to place read and write offsets
  in different cache lines

Changes since v5:
- Split up header commit due to size
- Add BRN 71242 to device info

Changes since v4:
- Add FW header device info

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/aa681533a02bd2d46af17a6a6010f4d6048fbb0a.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/pvr_rogue_fwif.h  | 2188 +++++++++++++++++
 .../drm/imagination/pvr_rogue_fwif_check.h    |  493 ++++
 .../drm/imagination/pvr_rogue_fwif_client.h   |  373 +++
 .../imagination/pvr_rogue_fwif_client_check.h |  133 +
 .../drm/imagination/pvr_rogue_fwif_common.h   |   60 +
 .../drm/imagination/pvr_rogue_fwif_dev_info.h |  113 +
 .../pvr_rogue_fwif_resetframework.h           |   28 +
 .../drm/imagination/pvr_rogue_fwif_shared.h   |  258 ++
 .../imagination/pvr_rogue_fwif_shared_check.h |  108 +
 .../drm/imagination/pvr_rogue_fwif_stream.h   |   78 +
 10 files changed, 3832 insertions(+)
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h

diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif.h
new file mode 100644
index 0000000000000..172886be4c820
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif.h
@@ -0,0 +1,2188 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_H
+#define PVR_ROGUE_FWIF_H
+
+#include <linux/bits.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "pvr_rogue_defs.h"
+#include "pvr_rogue_fwif_common.h"
+#include "pvr_rogue_fwif_shared.h"
+
+/*
+ ****************************************************************************
+ * Logging type
+ ****************************************************************************
+ */
+#define ROGUE_FWIF_LOG_TYPE_NONE 0x00000000U
+#define ROGUE_FWIF_LOG_TYPE_TRACE 0x00000001U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_MAIN 0x00000002U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_MTS 0x00000004U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_CLEANUP 0x00000008U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_CSW 0x00000010U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_BIF 0x00000020U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_PM 0x00000040U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_RTD 0x00000080U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_SPM 0x00000100U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_POW 0x00000200U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_HWR 0x00000400U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_HWP 0x00000800U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_RPM 0x00001000U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_DMA 0x00002000U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_MISC 0x00004000U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_DEBUG 0x80000000U
+#define ROGUE_FWIF_LOG_TYPE_GROUP_MASK 0x80007FFEU
+#define ROGUE_FWIF_LOG_TYPE_MASK 0x80007FFFU
+
+/* String used in pvrdebug -h output */
+#define ROGUE_FWIF_LOG_GROUPS_STRING_LIST \
+	"main,mts,cleanup,csw,bif,pm,rtd,spm,pow,hwr,hwp,rpm,dma,misc,debug"
+
+/* Table entry to map log group strings to log type value */
+struct rogue_fwif_log_group_map_entry {
+	const char *log_group_name;
+	u32 log_group_type;
+};
+
+/*
+ ****************************************************************************
+ * ROGUE FW signature checks
+ ****************************************************************************
+ */
+#define ROGUE_FW_SIG_BUFFER_SIZE_MIN (8192)
+
+#define ROGUE_FWIF_TIMEDIFF_ID ((0x1UL << 28) | ROGUE_CR_TIMER)
+
+/*
+ ****************************************************************************
+ * Trace Buffer
+ ****************************************************************************
+ */
+
+/* Default size of ROGUE_FWIF_TRACEBUF_SPACE in DWords */
+#define ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS 12000U
+#define ROGUE_FW_TRACE_BUFFER_ASSERT_SIZE 200U
+#define ROGUE_FW_THREAD_NUM 1U
+#define ROGUE_FW_THREAD_MAX 2U
+
+#define ROGUE_FW_POLL_TYPE_SET 0x80000000U
+
+struct rogue_fwif_file_info_buf {
+	char path[ROGUE_FW_TRACE_BUFFER_ASSERT_SIZE];
+	char info[ROGUE_FW_TRACE_BUFFER_ASSERT_SIZE];
+	u32 line_num;
+	u32 padding;
+} __aligned(8);
+
+struct rogue_fwif_tracebuf_space {
+	u32 trace_pointer;
+
+	u32 trace_buffer_fw_addr;
+
+	/* To be used by host when reading from trace buffer */
+	u32 *trace_buffer;
+
+	struct rogue_fwif_file_info_buf assert_buf;
+} __aligned(8);
+
+/* Total number of FW fault logs stored */
+#define ROGUE_FWIF_FWFAULTINFO_MAX (8U)
+
+struct rogue_fw_fault_info {
+	aligned_u64 cr_timer;
+	aligned_u64 os_timer;
+
+	u32 data __aligned(8);
+	u32 reserved;
+	struct rogue_fwif_file_info_buf fault_buf;
+} __aligned(8);
+
+enum rogue_fwif_pow_state {
+	ROGUE_FWIF_POW_OFF, /* idle and ready to full power down */
+	ROGUE_FWIF_POW_ON, /* running HW commands */
+	ROGUE_FWIF_POW_FORCED_IDLE, /* forced idle */
+	ROGUE_FWIF_POW_IDLE, /* idle waiting for host handshake */
+};
+
+/* Firmware HWR states */
+/* The HW state is ok or locked up */
+#define ROGUE_FWIF_HWR_HARDWARE_OK BIT(0)
+/* Tells if a HWR reset is in progress */
+#define ROGUE_FWIF_HWR_RESET_IN_PROGRESS BIT(1)
+/* A DM unrelated lockup has been detected */
+#define ROGUE_FWIF_HWR_GENERAL_LOCKUP BIT(3)
+/* At least one DM is running without being close to a lockup */
+#define ROGUE_FWIF_HWR_DM_RUNNING_OK BIT(4)
+/* At least one DM is close to lockup */
+#define ROGUE_FWIF_HWR_DM_STALLING BIT(5)
+/* The FW has faulted and needs to restart */
+#define ROGUE_FWIF_HWR_FW_FAULT BIT(6)
+/* The FW has requested the host to restart it */
+#define ROGUE_FWIF_HWR_RESTART_REQUESTED BIT(7)
+
+#define ROGUE_FWIF_PHR_STATE_SHIFT (8U)
+/* The FW has requested the host to restart it, per PHR configuration */
+#define ROGUE_FWIF_PHR_RESTART_REQUESTED ((1) << ROGUE_FWIF_PHR_STATE_SHIFT)
+/* A PHR triggered GPU reset has just finished */
+#define ROGUE_FWIF_PHR_RESTART_FINISHED ((2) << ROGUE_FWIF_PHR_STATE_SHIFT)
+#define ROGUE_FWIF_PHR_RESTART_MASK \
+	(ROGUE_FWIF_PHR_RESTART_REQUESTED | ROGUE_FWIF_PHR_RESTART_FINISHED)
+
+#define ROGUE_FWIF_PHR_MODE_OFF (0UL)
+#define ROGUE_FWIF_PHR_MODE_RD_RESET (1UL)
+#define ROGUE_FWIF_PHR_MODE_FULL_RESET (2UL)
+
+/* Firmware per-DM HWR states */
+/* DM is working if all flags are cleared */
+#define ROGUE_FWIF_DM_STATE_WORKING (0)
+/* DM is idle and ready for HWR */
+#define ROGUE_FWIF_DM_STATE_READY_FOR_HWR BIT(0)
+/* DM need to skip to next cmd before resuming processing */
+#define ROGUE_FWIF_DM_STATE_NEEDS_SKIP BIT(2)
+/* DM need partial render cleanup before resuming processing */
+#define ROGUE_FWIF_DM_STATE_NEEDS_PR_CLEANUP BIT(3)
+/* DM need to increment Recovery Count once fully recovered */
+#define ROGUE_FWIF_DM_STATE_NEEDS_TRACE_CLEAR BIT(4)
+/* DM was identified as locking up and causing HWR */
+#define ROGUE_FWIF_DM_STATE_GUILTY_LOCKUP BIT(5)
+/* DM was innocently affected by another lockup which caused HWR */
+#define ROGUE_FWIF_DM_STATE_INNOCENT_LOCKUP BIT(6)
+/* DM was identified as over-running and causing HWR */
+#define ROGUE_FWIF_DM_STATE_GUILTY_OVERRUNING BIT(7)
+/* DM was innocently affected by another DM over-running which caused HWR */
+#define ROGUE_FWIF_DM_STATE_INNOCENT_OVERRUNING BIT(8)
+/* DM was forced into HWR as it delayed more important workloads */
+#define ROGUE_FWIF_DM_STATE_HARD_CONTEXT_SWITCH BIT(9)
+/* DM was forced into HWR due to an uncorrected GPU ECC error */
+#define ROGUE_FWIF_DM_STATE_GPU_ECC_HWR BIT(10)
+
+/* Firmware's connection state */
+enum rogue_fwif_connection_fw_state {
+	/* Firmware is offline */
+	ROGUE_FW_CONNECTION_FW_OFFLINE = 0,
+	/* Firmware is initialised */
+	ROGUE_FW_CONNECTION_FW_READY,
+	/* Firmware connection is fully established */
+	ROGUE_FW_CONNECTION_FW_ACTIVE,
+	/* Firmware is clearing up connection data*/
+	ROGUE_FW_CONNECTION_FW_OFFLOADING,
+	ROGUE_FW_CONNECTION_FW_STATE_COUNT
+};
+
+/* OS' connection state */
+enum rogue_fwif_connection_os_state {
+	/* OS is offline */
+	ROGUE_FW_CONNECTION_OS_OFFLINE = 0,
+	/* OS's KM driver is setup and waiting */
+	ROGUE_FW_CONNECTION_OS_READY,
+	/* OS connection is fully established */
+	ROGUE_FW_CONNECTION_OS_ACTIVE,
+	ROGUE_FW_CONNECTION_OS_STATE_COUNT
+};
+
+struct rogue_fwif_os_runtime_flags {
+	unsigned int os_state : 3;
+	unsigned int fl_ok : 1;
+	unsigned int fl_grow_pending : 1;
+	unsigned int isolated_os : 1;
+	unsigned int reserved : 26;
+};
+
+#define PVR_SLR_LOG_ENTRIES 10
+/* MAX_CLIENT_CCB_NAME not visible to this header */
+#define PVR_SLR_LOG_STRLEN 30
+
+struct rogue_fwif_slr_entry {
+	aligned_u64 timestamp;
+	u32 fw_ctx_addr;
+	u32 num_ufos;
+	char ccb_name[PVR_SLR_LOG_STRLEN];
+	char padding[2];
+} __aligned(8);
+
+#define MAX_THREAD_NUM 2
+
+/* firmware trace control data */
+struct rogue_fwif_tracebuf {
+	u32 log_type;
+	struct rogue_fwif_tracebuf_space tracebuf[MAX_THREAD_NUM];
+	/*
+	 * Member initialised only when sTraceBuf is actually allocated (in
+	 * ROGUETraceBufferInitOnDemandResources)
+	 */
+	u32 tracebuf_size_in_dwords;
+	/* Compatibility and other flags */
+	u32 tracebuf_flags;
+} __aligned(8);
+
+/* firmware system data shared with the Host driver */
+struct rogue_fwif_sysdata {
+	/* Configuration flags from host */
+	u32 config_flags;
+	/* Extended configuration flags from host */
+	u32 config_flags_ext;
+	enum rogue_fwif_pow_state pow_state;
+	u32 hw_perf_ridx;
+	u32 hw_perf_widx;
+	u32 hw_perf_wrap_count;
+	/* Constant after setup, needed in FW */
+	u32 hw_perf_size;
+	/* The number of times the FW drops a packet due to buffer full */
+	u32 hw_perf_drop_count;
+
+	/*
+	 * ui32HWPerfUt, ui32FirstDropOrdinal, ui32LastDropOrdinal only valid
+	 * when FW is built with ROGUE_HWPERF_UTILIZATION &
+	 * ROGUE_HWPERF_DROP_TRACKING defined in rogue_fw_hwperf.c
+	 */
+	/* Buffer utilisation, high watermark of bytes in use */
+	u32 hw_perf_ut;
+	/* The ordinal of the first packet the FW dropped */
+	u32 first_drop_ordinal;
+	/* The ordinal of the last packet the FW dropped */
+	u32 last_drop_ordinal;
+	/* State flags for each Operating System mirrored from Fw coremem */
+	struct rogue_fwif_os_runtime_flags
+		os_runtime_flags_mirror[ROGUE_FW_MAX_NUM_OS];
+
+	struct rogue_fw_fault_info fault_info[ROGUE_FWIF_FWFAULTINFO_MAX];
+	u32 fw_faults;
+	u32 cr_poll_addr[MAX_THREAD_NUM];
+	u32 cr_poll_mask[MAX_THREAD_NUM];
+	u32 cr_poll_count[MAX_THREAD_NUM];
+	aligned_u64 start_idle_time;
+
+#if defined(SUPPORT_ROGUE_FW_STATS_FRAMEWORK)
+#	define ROGUE_FWIF_STATS_FRAMEWORK_LINESIZE (8)
+#	define ROGUE_FWIF_STATS_FRAMEWORK_MAX \
+		(2048 * ROGUE_FWIF_STATS_FRAMEWORK_LINESIZE)
+	u32 fw_stats_buf[ROGUE_FWIF_STATS_FRAMEWORK_MAX] __aligned(8);
+#endif
+	u32 hwr_state_flags;
+	u32 hwr_recovery_flags[PVR_FWIF_DM_MAX];
+	/* Compatibility and other flags */
+	u32 fw_sys_data_flags;
+	/* Identify whether MC config is P-P or P-S */
+	u32 mc_config;
+} __aligned(8);
+
+/* per-os firmware shared data */
+struct rogue_fwif_osdata {
+	/* Configuration flags from an OS */
+	u32 fw_os_config_flags;
+	/* Markers to signal that the host should perform a full sync check */
+	u32 fw_sync_check_mark;
+	u32 host_sync_check_mark;
+
+	u32 forced_updates_requested;
+	u8 slr_log_wp;
+	struct rogue_fwif_slr_entry slr_log_first;
+	struct rogue_fwif_slr_entry slr_log[PVR_SLR_LOG_ENTRIES];
+	aligned_u64 last_forced_update_time;
+
+	/* Interrupt count from Threads > */
+	u32 interrupt_count[MAX_THREAD_NUM];
+	u32 kccb_cmds_executed;
+	u32 power_sync_fw_addr;
+	/* Compatibility and other flags */
+	u32 fw_os_data_flags;
+	u32 padding;
+} __aligned(8);
+
+/* Firmware trace time-stamp field breakup */
+
+/* ROGUE_CR_TIMER register read (48 bits) value*/
+#define ROGUE_FWT_TIMESTAMP_TIME_SHIFT (0U)
+#define ROGUE_FWT_TIMESTAMP_TIME_CLRMSK (0xFFFF000000000000ull)
+
+/* Extra debug-info (16 bits) */
+#define ROGUE_FWT_TIMESTAMP_DEBUG_INFO_SHIFT (48U)
+#define ROGUE_FWT_TIMESTAMP_DEBUG_INFO_CLRMSK ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK
+
+/* Debug-info sub-fields */
+/*
+ * Bit 0: ROGUE_CR_EVENT_STATUS_MMU_PAGE_FAULT bit from ROGUE_CR_EVENT_STATUS
+ * register
+ */
+#define ROGUE_FWT_DEBUG_INFO_MMU_PAGE_FAULT_SHIFT (0U)
+#define ROGUE_FWT_DEBUG_INFO_MMU_PAGE_FAULT_SET \
+	BIT(ROGUE_FWT_DEBUG_INFO_MMU_PAGE_FAULT_SHIFT)
+
+/* Bit 1: ROGUE_CR_BIF_MMU_ENTRY_PENDING bit from ROGUE_CR_BIF_MMU_ENTRY register */
+#define ROGUE_FWT_DEBUG_INFO_MMU_ENTRY_PENDING_SHIFT (1U)
+#define ROGUE_FWT_DEBUG_INFO_MMU_ENTRY_PENDING_SET \
+	BIT(ROGUE_FWT_DEBUG_INFO_MMU_ENTRY_PENDING_SHIFT)
+
+/* Bit 2: ROGUE_CR_SLAVE_EVENT register is non-zero */
+#define ROGUE_FWT_DEBUG_INFO_SLAVE_EVENTS_SHIFT (2U)
+#define ROGUE_FWT_DEBUG_INFO_SLAVE_EVENTS_SET \
+	BIT(ROGUE_FWT_DEBUG_INFO_SLAVE_EVENTS_SHIFT)
+
+/* Bit 3-15: Unused bits */
+
+#define ROGUE_FWT_DEBUG_INFO_STR_MAXLEN 64
+#define ROGUE_FWT_DEBUG_INFO_STR_PREPEND " (debug info: "
+#define ROGUE_FWT_DEBUG_INFO_STR_APPEND ")"
+
+/*
+ ******************************************************************************
+ * HWR Data
+ ******************************************************************************
+ */
+enum rogue_hwrtype {
+	ROGUE_HWRTYPE_UNKNOWNFAILURE = 0,
+	ROGUE_HWRTYPE_OVERRUN = 1,
+	ROGUE_HWRTYPE_POLLFAILURE = 2,
+	ROGUE_HWRTYPE_BIF0FAULT = 3,
+	ROGUE_HWRTYPE_BIF1FAULT = 4,
+	ROGUE_HWRTYPE_TEXASBIF0FAULT = 5,
+	ROGUE_HWRTYPE_MMUFAULT = 6,
+	ROGUE_HWRTYPE_MMUMETAFAULT = 7,
+	ROGUE_HWRTYPE_MIPSTLBFAULT = 8,
+	ROGUE_HWRTYPE_ECCFAULT = 9,
+	ROGUE_HWRTYPE_MMURISCVFAULT = 10,
+};
+
+#define ROGUE_FWIF_HWRTYPE_BIF_BANK_GET(hwr_type) \
+	(((hwr_type) == ROGUE_HWRTYPE_BIF0FAULT) ? 0 : 1)
+
+#define ROGUE_FWIF_HWRTYPE_PAGE_FAULT_GET(hwr_type)       \
+	((((hwr_type) == ROGUE_HWRTYPE_BIF0FAULT) ||      \
+	  ((hwr_type) == ROGUE_HWRTYPE_BIF1FAULT) ||      \
+	  ((hwr_type) == ROGUE_HWRTYPE_TEXASBIF0FAULT) || \
+	  ((hwr_type) == ROGUE_HWRTYPE_MMUFAULT) ||       \
+	  ((hwr_type) == ROGUE_HWRTYPE_MMUMETAFAULT) ||   \
+	  ((hwr_type) == ROGUE_HWRTYPE_MIPSTLBFAULT) ||   \
+	  ((hwr_type) == ROGUE_HWRTYPE_MMURISCVFAULT))    \
+		 ? true                                   \
+		 : false)
+
+struct rogue_bifinfo {
+	aligned_u64 bif_req_status;
+	aligned_u64 bif_mmu_status;
+	aligned_u64 pc_address; /* phys address of the page catalogue */
+	aligned_u64 reserved;
+};
+
+struct rogue_eccinfo {
+	u32 fault_gpu;
+};
+
+struct rogue_mmuinfo {
+	aligned_u64 mmu_status[2];
+	aligned_u64 pc_address; /* phys address of the page catalogue */
+	aligned_u64 reserved;
+};
+
+struct rogue_pollinfo {
+	u32 thread_num;
+	u32 cr_poll_addr;
+	u32 cr_poll_mask;
+	u32 cr_poll_last_value;
+	aligned_u64 reserved;
+} __aligned(8);
+
+struct rogue_tlbinfo {
+	u32 bad_addr;
+	u32 entry_lo;
+};
+
+struct rogue_hwrinfo {
+	union {
+		struct rogue_bifinfo bif_info;
+		struct rogue_mmuinfo mmu_info;
+		struct rogue_pollinfo poll_info;
+		struct rogue_tlbinfo tlb_info;
+		struct rogue_eccinfo ecc_info;
+	} hwr_data;
+
+	aligned_u64 cr_timer;
+	aligned_u64 os_timer;
+	u32 frame_num;
+	u32 pid;
+	u32 active_hwrt_data;
+	u32 hwr_number;
+	u32 event_status;
+	u32 hwr_recovery_flags;
+	enum rogue_hwrtype hwr_type;
+	u32 dm;
+	u32 core_id;
+	aligned_u64 cr_time_of_kick;
+	aligned_u64 cr_time_hw_reset_start;
+	aligned_u64 cr_time_hw_reset_finish;
+	aligned_u64 cr_time_freelist_ready;
+	aligned_u64 reserved[2];
+} __aligned(8);
+
+/* Number of first HWR logs recorded (never overwritten by newer logs) */
+#define ROGUE_FWIF_HWINFO_MAX_FIRST 8U
+/* Number of latest HWR logs (older logs are overwritten by newer logs) */
+#define ROGUE_FWIF_HWINFO_MAX_LAST 8U
+/* Total number of HWR logs stored in a buffer */
+#define ROGUE_FWIF_HWINFO_MAX \
+	(ROGUE_FWIF_HWINFO_MAX_FIRST + ROGUE_FWIF_HWINFO_MAX_LAST)
+/* Index of the last log in the HWR log buffer */
+#define ROGUE_FWIF_HWINFO_LAST_INDEX (ROGUE_FWIF_HWINFO_MAX - 1U)
+
+struct rogue_fwif_hwrinfobuf {
+	struct rogue_hwrinfo hwr_info[ROGUE_FWIF_HWINFO_MAX];
+	u32 hwr_counter;
+	u32 write_index;
+	u32 dd_req_count;
+	u32 hwr_info_buf_flags; /* Compatibility and other flags */
+	u32 hwr_dm_locked_up_count[PVR_FWIF_DM_MAX];
+	u32 hwr_dm_overran_count[PVR_FWIF_DM_MAX];
+	u32 hwr_dm_recovered_count[PVR_FWIF_DM_MAX];
+	u32 hwr_dm_false_detect_count[PVR_FWIF_DM_MAX];
+} __aligned(8);
+
+#define ROGUE_FWIF_CTXSWITCH_PROFILE_FAST_EN (1)
+#define ROGUE_FWIF_CTXSWITCH_PROFILE_MEDIUM_EN (2)
+#define ROGUE_FWIF_CTXSWITCH_PROFILE_SLOW_EN (3)
+#define ROGUE_FWIF_CTXSWITCH_PROFILE_NODELAY_EN (4)
+
+#define ROGUE_FWIF_CDM_ARBITRATION_TASK_DEMAND_EN (1)
+#define ROGUE_FWIF_CDM_ARBITRATION_ROUND_ROBIN_EN (2)
+
+#define ROGUE_FWIF_ISP_SCHEDMODE_VER1_IPP (1)
+#define ROGUE_FWIF_ISP_SCHEDMODE_VER2_ISP (2)
+/*
+ ******************************************************************************
+ * ROGUE firmware Init Config Data
+ ******************************************************************************
+ */
+
+/* Flag definitions affecting the firmware globally */
+#define ROGUE_FWIF_INICFG_CTXSWITCH_MODE_RAND BIT(0)
+#define ROGUE_FWIF_INICFG_CTXSWITCH_SRESET_EN BIT(1)
+#define ROGUE_FWIF_INICFG_HWPERF_EN BIT(2)
+#define ROGUE_FWIF_INICFG_DM_KILL_MODE_RAND_EN BIT(3)
+#define ROGUE_FWIF_INICFG_POW_RASCALDUST BIT(4)
+/* Bit 5 is reserved. */
+#define ROGUE_FWIF_INICFG_FBCDC_V3_1_EN BIT(6)
+#define ROGUE_FWIF_INICFG_CHECK_MLIST_EN BIT(7)
+#define ROGUE_FWIF_INICFG_DISABLE_CLKGATING_EN BIT(8)
+/* Bit 9 is reserved. */
+/* Bit 10 is reserved. */
+/* Bit 11 is reserved. */
+#define ROGUE_FWIF_INICFG_REGCONFIG_EN BIT(12)
+#define ROGUE_FWIF_INICFG_ASSERT_ON_OUTOFMEMORY BIT(13)
+#define ROGUE_FWIF_INICFG_HWP_DISABLE_FILTER BIT(14)
+/* Bit 15 is reserved. */
+#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT (16)
+#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_FAST \
+	(ROGUE_FWIF_CTXSWITCH_PROFILE_FAST_EN    \
+	 << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT)
+#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_MEDIUM \
+	(ROGUE_FWIF_CTXSWITCH_PROFILE_MEDIUM_EN    \
+	 << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT)
+#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SLOW \
+	(ROGUE_FWIF_CTXSWITCH_PROFILE_SLOW_EN    \
+	 << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT)
+#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_NODELAY \
+	(ROGUE_FWIF_CTXSWITCH_PROFILE_NODELAY_EN    \
+	 << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT)
+#define ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_MASK \
+	(7 << ROGUE_FWIF_INICFG_CTXSWITCH_PROFILE_SHIFT)
+#define ROGUE_FWIF_INICFG_DISABLE_DM_OVERLAP BIT(19)
+#define ROGUE_FWIF_INICFG_ASSERT_ON_HWR_TRIGGER BIT(20)
+#define ROGUE_FWIF_INICFG_FABRIC_COHERENCY_ENABLED BIT(21)
+#define ROGUE_FWIF_INICFG_VALIDATE_IRQ BIT(22)
+#define ROGUE_FWIF_INICFG_DISABLE_PDP_EN BIT(23)
+#define ROGUE_FWIF_INICFG_SPU_POWER_STATE_MASK_CHANGE_EN BIT(24)
+#define ROGUE_FWIF_INICFG_WORKEST BIT(25)
+#define ROGUE_FWIF_INICFG_PDVFS BIT(26)
+#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT (27)
+#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_TASK_DEMAND \
+	(ROGUE_FWIF_CDM_ARBITRATION_TASK_DEMAND_EN    \
+	 << ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT)
+#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_ROUND_ROBIN \
+	(ROGUE_FWIF_CDM_ARBITRATION_ROUND_ROBIN_EN    \
+	 << ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT)
+#define ROGUE_FWIF_INICFG_CDM_ARBITRATION_MASK \
+	(3 << ROGUE_FWIF_INICFG_CDM_ARBITRATION_SHIFT)
+#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_SHIFT (29)
+#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_NONE (0)
+#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER1_IPP \
+	(ROGUE_FWIF_ISP_SCHEDMODE_VER1_IPP      \
+	 << ROGUE_FWIF_INICFG_ISPSCHEDMODE_SHIFT)
+#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER2_ISP \
+	(ROGUE_FWIF_ISP_SCHEDMODE_VER2_ISP      \
+	 << ROGUE_FWIF_INICFG_ISPSCHEDMODE_SHIFT)
+#define ROGUE_FWIF_INICFG_ISPSCHEDMODE_MASK        \
+	(ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER1_IPP | \
+	 ROGUE_FWIF_INICFG_ISPSCHEDMODE_VER2_ISP)
+#define ROGUE_FWIF_INICFG_VALIDATE_SOCUSC_TIMER BIT(31)
+
+#define ROGUE_FWIF_INICFG_ALL (0xFFFFFFFFU)
+
+/* Extended Flag definitions affecting the firmware globally */
+#define ROGUE_FWIF_INICFG_EXT_TFBC_CONTROL_SHIFT (0)
+/* [7]   YUV10 override
+ * [6:4] Quality
+ * [3]   Quality enable
+ * [2:1] Compression scheme
+ * [0]   Lossy group
+ */
+#define ROGUE_FWIF_INICFG_EXT_TFBC_CONTROL_MASK (0xFF)
+#define ROGUE_FWIF_INICFG_EXT_ALL (ROGUE_FWIF_INICFG_EXT_TFBC_CONTROL_MASK)
+
+/* Flag definitions affecting only workloads submitted by a particular OS */
+#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_TDM_EN BIT(0)
+#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_GEOM_EN BIT(1)
+#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_FRAG_EN BIT(2)
+#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_CDM_EN BIT(3)
+
+#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_TDM BIT(4)
+#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_GEOM BIT(5)
+#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_FRAG BIT(6)
+#define ROGUE_FWIF_INICFG_OS_LOW_PRIO_CS_CDM BIT(7)
+
+#define ROGUE_FWIF_INICFG_OS_ALL (0xFF)
+
+#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_DM_ALL     \
+	(ROGUE_FWIF_INICFG_OS_CTXSWITCH_TDM_EN |  \
+	 ROGUE_FWIF_INICFG_OS_CTXSWITCH_GEOM_EN | \
+	 ROGUE_FWIF_INICFG_OS_CTXSWITCH_FRAG_EN |   \
+	 ROGUE_FWIF_INICFG_OS_CTXSWITCH_CDM_EN)
+
+#define ROGUE_FWIF_INICFG_OS_CTXSWITCH_CLRMSK \
+	~(ROGUE_FWIF_INICFG_OS_CTXSWITCH_DM_ALL)
+
+#define ROGUE_FWIF_FILTCFG_TRUNCATE_HALF BIT(3)
+#define ROGUE_FWIF_FILTCFG_TRUNCATE_INT BIT(2)
+#define ROGUE_FWIF_FILTCFG_NEW_FILTER_MODE BIT(1)
+
+enum rogue_activepm_conf {
+	ROGUE_ACTIVEPM_FORCE_OFF = 0,
+	ROGUE_ACTIVEPM_FORCE_ON = 1,
+	ROGUE_ACTIVEPM_DEFAULT = 2
+};
+
+enum rogue_rd_power_island_conf {
+	ROGUE_RD_POWER_ISLAND_FORCE_OFF = 0,
+	ROGUE_RD_POWER_ISLAND_FORCE_ON = 1,
+	ROGUE_RD_POWER_ISLAND_DEFAULT = 2
+};
+
+struct rogue_fw_register_list {
+	/* Register number */
+	u16 reg_num;
+	/* Indirect register number (or 0 if not used) */
+	u16 indirect_reg_num;
+	/* Start value for indirect register */
+	u16 indirect_start_val;
+	/* End value for indirect register */
+	u16 indirect_end_val;
+};
+
+struct rogue_fwif_dllist_node {
+	u32 p;
+	u32 n;
+};
+
+/*
+ * This number is used to represent an invalid page catalogue physical address
+ */
+#define ROGUE_FWIF_INVALID_PC_PHYADDR 0xFFFFFFFFFFFFFFFFLLU
+
+/* This number is used to represent unallocated page catalog base register */
+#define ROGUE_FW_BIF_INVALID_PCSET 0xFFFFFFFFU
+
+/* Firmware memory context. */
+struct rogue_fwif_fwmemcontext {
+	/* device physical address of context's page catalogue */
+	aligned_u64 pc_dev_paddr;
+	/*
+	 * associated page catalog base register (ROGUE_FW_BIF_INVALID_PCSET ==
+	 * unallocated)
+	 */
+	u32 page_cat_base_reg_set;
+	/* breakpoint address */
+	u32 breakpoint_addr;
+	/* breakpoint handler address */
+	u32 bp_handler_addr;
+	/* DM and enable control for BP */
+	u32 breakpoint_ctl;
+	/* Compatibility and other flags */
+	u32 fw_mem_ctx_flags;
+	u32 padding;
+} __aligned(8);
+
+/*
+ * FW context state flags
+ */
+#define ROGUE_FWIF_CONTEXT_FLAGS_NEED_RESUME (0x00000001U)
+#define ROGUE_FWIF_CONTEXT_FLAGS_MC_NEED_RESUME_MASKFULL (0x000000FFU)
+#define ROGUE_FWIF_CONTEXT_FLAGS_TDM_HEADER_STALE (0x00000100U)
+#define ROGUE_FWIF_CONTEXT_FLAGS_LAST_KICK_SECURE (0x00000200U)
+
+#define ROGUE_NUM_GEOM_CORES_MAX 4
+
+/*
+ * FW-accessible TA state which must be written out to memory on context store
+ */
+struct rogue_fwif_geom_ctx_state_per_geom {
+	/* To store in mid-TA */
+	aligned_u64 geom_reg_vdm_call_stack_pointer;
+	/* Initial value (in case is 'lost' due to a lock-up */
+	aligned_u64 geom_reg_vdm_call_stack_pointer_init;
+	u32 geom_reg_vbs_so_prim[4];
+	u16 geom_current_idx;
+	u16 padding[3];
+} __aligned(8);
+
+struct rogue_fwif_geom_ctx_state {
+	/* FW-accessible TA state which must be written out to memory on context store */
+	struct rogue_fwif_geom_ctx_state_per_geom geom_core[ROGUE_NUM_GEOM_CORES_MAX];
+} __aligned(8);
+
+/*
+ * FW-accessible ISP state which must be written out to memory on context store
+ */
+struct rogue_fwif_frag_ctx_state {
+	u32 frag_reg_pm_deallocated_mask_status;
+	u32 frag_reg_dm_pds_mtilefree_status;
+	/* Compatibility and other flags */
+	u32 ctx_state_flags;
+	/*
+	 * frag_reg_isp_store should be the last element of the structure as this
+	 * is an array whose size is determined at runtime after detecting the
+	 * ROGUE core
+	 */
+	u32 frag_reg_isp_store[];
+} __aligned(8);
+
+#define ROGUE_FWIF_CTX_USING_BUFFER_A (0)
+#define ROGUE_FWIF_CTX_USING_BUFFER_B (1U)
+
+struct rogue_fwif_compute_ctx_state {
+	u32 ctx_state_flags; /* Target buffer and other flags */
+};
+
+struct rogue_fwif_fwcommoncontext {
+	/* CCB details for this firmware context */
+	u32 ccbctl_fw_addr; /* CCB control */
+	u32 ccb_fw_addr; /* CCB base */
+	struct rogue_fwif_dma_addr ccb_meta_dma_addr;
+
+	/* Context suspend state */
+	/* geom/frag context suspend state, read/written by FW */
+	u32 context_state_addr __aligned(8);
+
+	/* Flags e.g. for context switching */
+	u32 fw_com_ctx_flags;
+	u32 priority;
+	u32 priority_seq_num;
+
+	/* Framework state */
+	/* Register updates for Framework */
+	u32 rf_cmd_addr __aligned(8);
+
+	/* Statistic updates waiting to be passed back to the host... */
+	/* True when some stats are pending */
+	bool stats_pending __aligned(4);
+	/* Number of stores on this context since last update */
+	s32 stats_num_stores;
+	/* Number of OOMs on this context since last update */
+	s32 stats_num_out_of_memory;
+	/* Number of PRs on this context since last update */
+	s32 stats_num_partial_renders;
+	/* Data Master type */
+	u32 dm;
+	/* Device Virtual Address of the signal the context is waiting on */
+	aligned_u64 wait_signal_address;
+	/* List entry for the wait-signal list */
+	struct rogue_fwif_dllist_node wait_signal_node __aligned(8);
+	/* List entry for the buffer stalled list */
+	struct rogue_fwif_dllist_node buf_stalled_node __aligned(8);
+	/* Address of the circular buffer queue pointers */
+	aligned_u64 cbuf_queue_ctrl_addr;
+
+	aligned_u64 robustness_address;
+	/* Max HWR deadline limit in ms */
+	u32 max_deadline_ms;
+	/* Following HWR circular buffer read-offset needs resetting */
+	bool read_offset_needs_reset;
+
+	/* List entry for the waiting list */
+	struct rogue_fwif_dllist_node waiting_node __aligned(8);
+	/* List entry for the run list */
+	struct rogue_fwif_dllist_node run_node __aligned(8);
+	/* UFO that last failed (or NULL) */
+	struct rogue_fwif_ufo last_failed_ufo;
+
+	/* Memory context */
+	u32 fw_mem_context_fw_addr;
+
+	/* References to the host side originators */
+	/* the Server Common Context */
+	u32 server_common_context_id;
+	/* associated process ID */
+	u32 pid;
+
+	/* True when Geom DM OOM is not allowed */
+	bool geom_oom_disabled __aligned(4);
+} __aligned(8);
+
+/* Firmware render context. */
+struct rogue_fwif_fwrendercontext {
+	/* Geometry firmware context. */
+	struct rogue_fwif_fwcommoncontext geom_context;
+	/* Fragment firmware context. */
+	struct rogue_fwif_fwcommoncontext frag_context;
+
+	struct rogue_fwif_static_rendercontext_state static_render_context_state;
+
+	/* Number of commands submitted to the WorkEst FW CCB */
+	u32 work_est_ccb_submitted;
+
+	/* Compatibility and other flags */
+	u32 fw_render_ctx_flags;
+} __aligned(8);
+
+/* Firmware compute context. */
+struct rogue_fwif_fwcomputecontext {
+	/* Firmware context for the CDM */
+	struct rogue_fwif_fwcommoncontext cdm_context;
+
+	struct rogue_fwif_static_computecontext_state
+		static_compute_context_state;
+
+	/* Number of commands submitted to the WorkEst FW CCB */
+	u32 work_est_ccb_submitted;
+
+	/* Compatibility and other flags */
+	u32 compute_ctx_flags;
+
+	u32 wgp_state;
+	u32 wgp_checksum;
+	u32 core_mask_a;
+	u32 core_mask_b;
+} __aligned(8);
+
+/* Firmware TDM context. */
+struct rogue_fwif_fwtdmcontext {
+	/* Firmware context for the TDM */
+	struct rogue_fwif_fwcommoncontext tdm_context;
+
+	/* Number of commands submitted to the WorkEst FW CCB */
+	u32 work_est_ccb_submitted;
+} __aligned(8);
+
+/* Firmware TQ3D context. */
+struct rogue_fwif_fwtransfercontext {
+	/* Firmware context for TQ3D. */
+	struct rogue_fwif_fwcommoncontext tq_context;
+} __aligned(8);
+
+/*
+ ******************************************************************************
+ * Defines for CMD_TYPE corruption detection and forward compatibility check
+ ******************************************************************************
+ */
+
+/*
+ * CMD_TYPE 32bit contains:
+ * 31:16	Reserved for magic value to detect corruption (16 bits)
+ * 15		Reserved for ROGUE_CCB_TYPE_TASK (1 bit)
+ * 14:0		Bits available for CMD_TYPEs (15 bits)
+ */
+
+/* Magic value to detect corruption */
+#define ROGUE_CMD_MAGIC_DWORD (0x2ABC)
+#define ROGUE_CMD_MAGIC_DWORD_MASK (0xFFFF0000U)
+#define ROGUE_CMD_MAGIC_DWORD_SHIFT (16U)
+#define ROGUE_CMD_MAGIC_DWORD_SHIFTED \
+	(ROGUE_CMD_MAGIC_DWORD << ROGUE_CMD_MAGIC_DWORD_SHIFT)
+
+/* Kernel CCB control for ROGUE */
+struct rogue_fwif_ccb_ctl {
+	/* write offset into array of commands (MUST be aligned to 16 bytes!) */
+	u32 write_offset;
+	/* Padding to ensure read and write offsets are in separate cache lines. */
+	u8 padding[128 - sizeof(u32)];
+	/* read offset into array of commands */
+	u32 read_offset;
+	/* Offset wrapping mask (Total capacity of the CCB - 1) */
+	u32 wrap_mask;
+	/* size of each command in bytes */
+	u32 cmd_size;
+	u32 padding2;
+} __aligned(8);
+
+/* Kernel CCB command structure for ROGUE */
+
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT (0x1U) /* MMU_CTRL_INVAL_PT_EN */
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD (0x2U) /* MMU_CTRL_INVAL_PD_EN */
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC (0x4U) /* MMU_CTRL_INVAL_PC_EN */
+
+/*
+ * can't use PM_TLB0 bit from BIFPM_CTRL reg because it collides with PT
+ * bit from BIF_CTRL reg
+ */
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_PMTLB (0x10)
+/* BIF_CTRL_INVAL_TLB1_EN */
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB \
+	(ROGUE_FWIF_MMUCACHEDATA_FLAGS_PMTLB | 0x8)
+/* MMU_CTRL_INVAL_ALL_CONTEXTS_EN */
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_CTX_ALL (0x800)
+
+/* indicates FW should interrupt the host */
+#define ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT (0x4000000U)
+
+struct rogue_fwif_mmucachedata {
+	u32 cache_flags;
+	u32 mmu_cache_sync_fw_addr;
+	u32 mmu_cache_sync_update_value;
+};
+
+#define ROGUE_FWIF_BPDATA_FLAGS_ENABLE BIT(0)
+#define ROGUE_FWIF_BPDATA_FLAGS_WRITE BIT(1)
+#define ROGUE_FWIF_BPDATA_FLAGS_CTL BIT(2)
+#define ROGUE_FWIF_BPDATA_FLAGS_REGS BIT(3)
+
+struct rogue_fwif_bpdata {
+	/* Memory context */
+	u32 fw_mem_context_fw_addr;
+	/* Breakpoint address */
+	u32 bp_addr;
+	/* Breakpoint handler */
+	u32 bp_handler_addr;
+	/* Breakpoint control */
+	u32 bp_dm;
+	u32 bp_data_flags;
+	/* Number of temporary registers to overallocate */
+	u32 temp_regs;
+	/* Number of shared registers to overallocate */
+	u32 shared_regs;
+	/* DM associated with the breakpoint */
+	u32 dm;
+};
+
+#define ROGUE_FWIF_KCCB_CMD_KICK_DATA_MAX_NUM_CLEANUP_CTLS \
+	(ROGUE_FWIF_PRBUFFER_MAXSUPPORTED + 1U) /* +1 is RTDATASET cleanup */
+
+struct rogue_fwif_kccb_cmd_kick_data {
+	/* address of the firmware context */
+	u32 context_fw_addr;
+	/* Client CCB woff update */
+	u32 client_woff_update;
+	/* Client CCB wrap mask update after CCCB growth */
+	u32 client_wrap_mask_update;
+	/* number of CleanupCtl pointers attached */
+	u32 num_cleanup_ctl;
+	/* CleanupCtl structures associated with command */
+	u32 cleanup_ctl_fw_addr
+		[ROGUE_FWIF_KCCB_CMD_KICK_DATA_MAX_NUM_CLEANUP_CTLS];
+	/*
+	 * offset to the CmdHeader which houses the workload estimation kick
+	 * data.
+	 */
+	u32 work_est_cmd_header_offset;
+};
+
+struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data {
+	struct rogue_fwif_kccb_cmd_kick_data geom_cmd_kick_data;
+	struct rogue_fwif_kccb_cmd_kick_data frag_cmd_kick_data;
+};
+
+struct rogue_fwif_kccb_cmd_force_update_data {
+	/* address of the firmware context */
+	u32 context_fw_addr;
+	/* Client CCB fence offset */
+	u32 ccb_fence_offset;
+};
+
+enum rogue_fwif_cleanup_type {
+	/* FW common context cleanup */
+	ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
+	/* FW HW RT data cleanup */
+	ROGUE_FWIF_CLEANUP_HWRTDATA,
+	/* FW freelist cleanup */
+	ROGUE_FWIF_CLEANUP_FREELIST,
+	/* FW ZS Buffer cleanup */
+	ROGUE_FWIF_CLEANUP_ZSBUFFER,
+};
+
+struct rogue_fwif_cleanup_request {
+	/* Cleanup type */
+	enum rogue_fwif_cleanup_type cleanup_type;
+	union {
+		/* FW common context to cleanup */
+		u32 context_fw_addr;
+		/* HW RT to cleanup */
+		u32 hwrt_data_fw_addr;
+		/* Freelist to cleanup */
+		u32 freelist_fw_addr;
+		/* ZS Buffer to cleanup */
+		u32 zs_buffer_fw_addr;
+	} cleanup_data;
+};
+
+enum rogue_fwif_power_type {
+	ROGUE_FWIF_POW_OFF_REQ = 1,
+	ROGUE_FWIF_POW_FORCED_IDLE_REQ,
+	ROGUE_FWIF_POW_NUM_UNITS_CHANGE,
+	ROGUE_FWIF_POW_APM_LATENCY_CHANGE
+};
+
+enum rogue_fwif_power_force_idle_type {
+	ROGUE_FWIF_POWER_FORCE_IDLE = 1,
+	ROGUE_FWIF_POWER_CANCEL_FORCED_IDLE,
+	ROGUE_FWIF_POWER_HOST_TIMEOUT,
+};
+
+struct rogue_fwif_power_request {
+	/* Type of power request */
+	enum rogue_fwif_power_type pow_type;
+	union {
+		/* Number of active Dusts */
+		u32 num_of_dusts;
+		/* If the operation is mandatory */
+		bool forced __aligned(4);
+		/*
+		 * Type of Request. Consolidating Force Idle, Cancel Forced
+		 * Idle, Host Timeout
+		 */
+		enum rogue_fwif_power_force_idle_type pow_request_type;
+	} power_req_data;
+};
+
+struct rogue_fwif_slcflushinvaldata {
+	/* Context to fence on (only useful when bDMContext == TRUE) */
+	u32 context_fw_addr;
+	/* Invalidate the cache as well as flushing */
+	bool inval __aligned(4);
+	/* The data to flush/invalidate belongs to a specific DM context */
+	bool dm_context __aligned(4);
+	/* Optional address of range (only useful when bDMContext == FALSE) */
+	aligned_u64 address;
+	/* Optional size of range (only useful when bDMContext == FALSE) */
+	aligned_u64 size;
+};
+
+enum rogue_fwif_hwperf_update_config {
+	ROGUE_FWIF_HWPERF_CTRL_TOGGLE = 0,
+	ROGUE_FWIF_HWPERF_CTRL_SET = 1,
+	ROGUE_FWIF_HWPERF_CTRL_EMIT_FEATURES_EV = 2
+};
+
+struct rogue_fwif_hwperf_ctrl {
+	enum rogue_fwif_hwperf_update_config opcode; /* Control operation code */
+	aligned_u64 mask; /* Mask of events to toggle */
+};
+
+struct rogue_fwif_hwperf_config_enable_blks {
+	/* Number of ROGUE_HWPERF_CONFIG_MUX_CNTBLK in the array */
+	u32 num_blocks;
+	/* Address of the ROGUE_HWPERF_CONFIG_MUX_CNTBLK array */
+	u32 block_configs_fw_addr;
+};
+
+struct rogue_fwif_hwperf_config_da_blks {
+	/* Number of ROGUE_HWPERF_CONFIG_CNTBLK in the array */
+	u32 num_blocks;
+	/* Address of the ROGUE_HWPERF_CONFIG_CNTBLK array */
+	u32 block_configs_fw_addr;
+};
+
+struct rogue_fwif_coreclkspeedchange_data {
+	u32 new_clock_speed; /* New clock speed */
+};
+
+#define ROGUE_FWIF_HWPERF_CTRL_BLKS_MAX 16
+
+struct rogue_fwif_hwperf_ctrl_blks {
+	bool enable;
+	/* Number of block IDs in the array */
+	u32 num_blocks;
+	/* Array of ROGUE_HWPERF_CNTBLK_ID values */
+	u16 block_ids[ROGUE_FWIF_HWPERF_CTRL_BLKS_MAX];
+};
+
+struct rogue_fwif_hwperf_select_custom_cntrs {
+	u16 custom_block;
+	u16 num_counters;
+	u32 custom_counter_ids_fw_addr;
+};
+
+struct rogue_fwif_zsbuffer_backing_data {
+	u32 zs_buffer_fw_addr; /* ZS-Buffer FW address */
+
+	bool done __aligned(4); /* action backing/unbacking succeeded */
+};
+
+struct rogue_fwif_freelist_gs_data {
+	/* Freelist FW address */
+	u32 freelist_fw_addr;
+	/* Amount of the Freelist change */
+	u32 delta_pages;
+	/* New amount of pages on the freelist (including ready pages) */
+	u32 new_pages;
+	/* Number of ready pages to be held in reserve until OOM */
+	u32 ready_pages;
+};
+
+#define MAX_FREELISTS_SIZE 3
+#define MAX_HW_GEOM_FRAG_CONTEXTS_SIZE 3
+
+#define ROGUE_FWIF_MAX_FREELISTS_TO_RECONSTRUCT \
+	(MAX_HW_GEOM_FRAG_CONTEXTS_SIZE * MAX_FREELISTS_SIZE * 2U)
+#define ROGUE_FWIF_FREELISTS_RECONSTRUCTION_FAILED_FLAG 0x80000000U
+
+struct rogue_fwif_freelists_reconstruction_data {
+	u32 freelist_count;
+	u32 freelist_ids[ROGUE_FWIF_MAX_FREELISTS_TO_RECONSTRUCT];
+};
+
+struct rogue_fwif_write_offset_update_data {
+	/*
+	 * Context to that may need to be resumed following write offset update
+	 */
+	u32 context_fw_addr;
+} __aligned(8);
+
+/*
+ ******************************************************************************
+ * Proactive DVFS Structures
+ ******************************************************************************
+ */
+#define NUM_OPP_VALUES 16
+
+struct pdvfs_opp {
+	u32 volt; /* V  */
+	u32 freq; /* Hz */
+} __aligned(8);
+
+struct rogue_fwif_pdvfs_opp {
+	struct pdvfs_opp opp_values[NUM_OPP_VALUES];
+	u32 min_opp_point;
+	u32 max_opp_point;
+} __aligned(8);
+
+struct rogue_fwif_pdvfs_max_freq_data {
+	u32 max_opp_point;
+} __aligned(8);
+
+struct rogue_fwif_pdvfs_min_freq_data {
+	u32 min_opp_point;
+} __aligned(8);
+
+/*
+ ******************************************************************************
+ * Register configuration structures
+ ******************************************************************************
+ */
+
+#define ROGUE_FWIF_REG_CFG_MAX_SIZE 512
+
+enum rogue_fwif_regdata_cmd_type {
+	ROGUE_FWIF_REGCFG_CMD_ADD = 101,
+	ROGUE_FWIF_REGCFG_CMD_CLEAR = 102,
+	ROGUE_FWIF_REGCFG_CMD_ENABLE = 103,
+	ROGUE_FWIF_REGCFG_CMD_DISABLE = 104
+};
+
+enum rogue_fwif_reg_cfg_type {
+	/* Sidekick power event */
+	ROGUE_FWIF_REG_CFG_TYPE_PWR_ON = 0,
+	/* Rascal / dust power event */
+	ROGUE_FWIF_REG_CFG_TYPE_DUST_CHANGE,
+	/* Geometry kick */
+	ROGUE_FWIF_REG_CFG_TYPE_GEOM,
+	/* Fragment kick */
+	ROGUE_FWIF_REG_CFG_TYPE_FRAG,
+	/* Compute kick */
+	ROGUE_FWIF_REG_CFG_TYPE_CDM,
+	/* TLA kick */
+	ROGUE_FWIF_REG_CFG_TYPE_TLA,
+	/* TDM kick */
+	ROGUE_FWIF_REG_CFG_TYPE_TDM,
+	/* Applies to all types. Keep as last element */
+	ROGUE_FWIF_REG_CFG_TYPE_ALL
+};
+
+struct rogue_fwif_reg_cfg_rec {
+	u64 sddr;
+	u64 mask;
+	u64 value;
+};
+
+struct rogue_fwif_regconfig_data {
+	enum rogue_fwif_regdata_cmd_type cmd_type;
+	enum rogue_fwif_reg_cfg_type reg_config_type;
+	struct rogue_fwif_reg_cfg_rec reg_config __aligned(8);
+};
+
+struct rogue_fwif_reg_cfg {
+	/*
+	 * PDump WRW command write granularity is 32 bits.
+	 * Add padding to ensure array size is 32 bit granular.
+	 */
+	u8 num_regs_type[ALIGN((u32)ROGUE_FWIF_REG_CFG_TYPE_ALL,
+			       sizeof(u32))] __aligned(8);
+	struct rogue_fwif_reg_cfg_rec
+		reg_configs[ROGUE_FWIF_REG_CFG_MAX_SIZE] __aligned(8);
+} __aligned(8);
+
+enum rogue_fwif_os_state_change {
+	ROGUE_FWIF_OS_ONLINE = 1,
+	ROGUE_FWIF_OS_OFFLINE
+};
+
+struct rogue_fwif_os_state_change_data {
+	u32 osid;
+	enum rogue_fwif_os_state_change new_os_state;
+} __aligned(8);
+
+enum rogue_fwif_counter_dump_request {
+	ROGUE_FWIF_PWR_COUNTER_DUMP_START = 1,
+	ROGUE_FWIF_PWR_COUNTER_DUMP_STOP,
+	ROGUE_FWIF_PWR_COUNTER_DUMP_SAMPLE,
+};
+
+struct rogue_fwif_counter_dump_data {
+	enum rogue_fwif_counter_dump_request counter_dump_request;
+} __aligned(8);
+
+enum rogue_fwif_kccb_cmd_type {
+	/* Common commands */
+	ROGUE_FWIF_KCCB_CMD_KICK = 101U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	ROGUE_FWIF_KCCB_CMD_MMUCACHE = 102U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	ROGUE_FWIF_KCCB_CMD_BP = 103U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* SLC flush and invalidation request */
+	ROGUE_FWIF_KCCB_CMD_SLCFLUSHINVAL = 105U |
+					    ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/*
+	 * Requests cleanup of a FW resource (type specified in the command
+	 * data)
+	 */
+	ROGUE_FWIF_KCCB_CMD_CLEANUP = 106U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Power request */
+	ROGUE_FWIF_KCCB_CMD_POW = 107U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Backing for on-demand ZS-Buffer done */
+	ROGUE_FWIF_KCCB_CMD_ZSBUFFER_BACKING_UPDATE =
+		108U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Unbacking for on-demand ZS-Buffer done */
+	ROGUE_FWIF_KCCB_CMD_ZSBUFFER_UNBACKING_UPDATE =
+		109U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Freelist Grow done */
+	ROGUE_FWIF_KCCB_CMD_FREELIST_GROW_UPDATE =
+		110U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Freelists Reconstruction done */
+	ROGUE_FWIF_KCCB_CMD_FREELISTS_RECONSTRUCTION_UPDATE =
+		112U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/*
+	 * Informs the firmware that the host has added more data to a CDM2
+	 * Circular Buffer
+	 */
+	ROGUE_FWIF_KCCB_CMD_NOTIFY_WRITE_OFFSET_UPDATE =
+		114U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Health check request */
+	ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK = 115U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Forcing signalling of all unmet UFOs for a given CCB offset */
+	ROGUE_FWIF_KCCB_CMD_FORCE_UPDATE = 116U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* There is a geometry and a fragment command in this single kick */
+	ROGUE_FWIF_KCCB_CMD_COMBINED_GEOM_FRAG_KICK = 117U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Informs the FW that a Guest OS has come online / offline. */
+	ROGUE_FWIF_KCCB_CMD_OS_ONLINE_STATE_CONFIGURE	= 118U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Commands only permitted to the native or host OS */
+	ROGUE_FWIF_KCCB_CMD_REGCONFIG = 200U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Configure HWPerf events (to be generated) and HWPerf buffer address (if required) */
+	ROGUE_FWIF_KCCB_CMD_HWPERF_UPDATE_CONFIG = 201U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Enable or disable multiple HWPerf blocks (reusing existing configuration) */
+	ROGUE_FWIF_KCCB_CMD_HWPERF_CTRL_BLKS = 203U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Core clock speed change event */
+	ROGUE_FWIF_KCCB_CMD_CORECLKSPEEDCHANGE = 204U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/*
+	 * Ask the firmware to update its cached ui32LogType value from the (shared)
+	 * tracebuf control structure
+	 */
+	ROGUE_FWIF_KCCB_CMD_LOGTYPE_UPDATE = 206U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Set a maximum frequency/OPP point */
+	ROGUE_FWIF_KCCB_CMD_PDVFS_LIMIT_MAX_FREQ = 207U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/*
+	 * Changes the relative scheduling priority for a particular OSid. It can
+	 * only be serviced for the Host DDK
+	 */
+	ROGUE_FWIF_KCCB_CMD_OSID_PRIORITY_CHANGE = 208U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Set or clear firmware state flags */
+	ROGUE_FWIF_KCCB_CMD_STATEFLAGS_CTRL = 209U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Set a minimum frequency/OPP point */
+	ROGUE_FWIF_KCCB_CMD_PDVFS_LIMIT_MIN_FREQ = 212U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Configure Periodic Hardware Reset behaviour */
+	ROGUE_FWIF_KCCB_CMD_PHR_CFG = 213U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Configure Safety Firmware Watchdog */
+	ROGUE_FWIF_KCCB_CMD_WDG_CFG = 215U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Controls counter dumping in the FW */
+	ROGUE_FWIF_KCCB_CMD_COUNTER_DUMP = 216U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Configure, clear and enable multiple HWPerf blocks */
+	ROGUE_FWIF_KCCB_CMD_HWPERF_CONFIG_ENABLE_BLKS = 217U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Configure the custom counters for HWPerf */
+	ROGUE_FWIF_KCCB_CMD_HWPERF_SELECT_CUSTOM_CNTRS = 218U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Configure directly addressable counters for HWPerf */
+	ROGUE_FWIF_KCCB_CMD_HWPERF_CONFIG_BLKS = 220U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+};
+
+#define ROGUE_FWIF_LAST_ALLOWED_GUEST_KCCB_CMD \
+	(ROGUE_FWIF_KCCB_CMD_REGCONFIG - 1)
+
+/* Kernel CCB command packet */
+struct rogue_fwif_kccb_cmd {
+	/* Command type */
+	enum rogue_fwif_kccb_cmd_type cmd_type;
+	/* Compatibility and other flags */
+	u32 kccb_flags;
+
+	/*
+	 * NOTE: Make sure that uCmdData is the last member of this struct
+	 * This is to calculate actual command size for device mem copy.
+	 * (Refer ROGUEGetCmdMemCopySize())
+	 */
+	union {
+		/* Data for Kick command */
+		struct rogue_fwif_kccb_cmd_kick_data cmd_kick_data;
+		/* Data for combined geom/frag Kick command */
+		struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data
+			combined_geom_frag_cmd_kick_data;
+		/* Data for MMU cache command */
+		struct rogue_fwif_mmucachedata mmu_cache_data;
+		/* Data for Breakpoint Commands */
+		struct rogue_fwif_bpdata bp_data;
+		/* Data for SLC Flush/Inval commands */
+		struct rogue_fwif_slcflushinvaldata slc_flush_inval_data;
+		/* Data for cleanup commands */
+		struct rogue_fwif_cleanup_request cleanup_data;
+		/* Data for power request commands */
+		struct rogue_fwif_power_request pow_data;
+		/* Data for HWPerf control command */
+		struct rogue_fwif_hwperf_ctrl hw_perf_ctrl;
+		/*
+		 * Data for HWPerf configure, clear and enable performance
+		 * counter block command
+		 */
+		struct rogue_fwif_hwperf_config_enable_blks
+			hw_perf_cfg_enable_blks;
+		/*
+		 * Data for HWPerf enable or disable performance counter block
+		 * commands
+		 */
+		struct rogue_fwif_hwperf_ctrl_blks hw_perf_ctrl_blks;
+		/* Data for HWPerf configure the custom counters to read */
+		struct rogue_fwif_hwperf_select_custom_cntrs
+			hw_perf_select_cstm_cntrs;
+		/* Data for HWPerf configure Directly Addressable blocks */
+		struct rogue_fwif_hwperf_config_da_blks hw_perf_cfg_da_blks;
+		/* Data for core clock speed change */
+		struct rogue_fwif_coreclkspeedchange_data
+			core_clk_speed_change_data;
+		/* Feedback for Z/S Buffer backing/unbacking */
+		struct rogue_fwif_zsbuffer_backing_data zs_buffer_backing_data;
+		/* Feedback for Freelist grow/shrink */
+		struct rogue_fwif_freelist_gs_data free_list_gs_data;
+		/* Feedback for Freelists reconstruction*/
+		struct rogue_fwif_freelists_reconstruction_data
+			free_lists_reconstruction_data;
+		/* Data for custom register configuration */
+		struct rogue_fwif_regconfig_data reg_config_data;
+		/* Data for informing the FW about the write offset update */
+		struct rogue_fwif_write_offset_update_data
+			write_offset_update_data;
+		/* Data for setting the max frequency/OPP */
+		struct rogue_fwif_pdvfs_max_freq_data pdvfs_max_freq_data;
+		/* Data for setting the min frequency/OPP */
+		struct rogue_fwif_pdvfs_min_freq_data pdvfs_min_freq_data;
+		/* Data for updating the Guest Online states */
+		struct rogue_fwif_os_state_change_data cmd_os_online_state_data;
+		/* Dev address for TBI buffer allocated on demand */
+		u32 tbi_buffer_fw_addr;
+		/* Data for dumping of register ranges */
+		struct rogue_fwif_counter_dump_data counter_dump_config_data;
+		/* Data for signalling all unmet fences for a given CCB */
+		struct rogue_fwif_kccb_cmd_force_update_data force_update_data;
+	} cmd_data __aligned(8);
+} __aligned(8);
+
+PVR_FW_STRUCT_SIZE_ASSERT(struct rogue_fwif_kccb_cmd);
+
+/*
+ ******************************************************************************
+ * Firmware CCB command structure for ROGUE
+ ******************************************************************************
+ */
+
+struct rogue_fwif_fwccb_cmd_zsbuffer_backing_data {
+	u32 zs_buffer_id;
+};
+
+struct rogue_fwif_fwccb_cmd_freelist_gs_data {
+	u32 freelist_id;
+};
+
+struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data {
+	u32 freelist_count;
+	u32 hwr_counter;
+	u32 freelist_ids[ROGUE_FWIF_MAX_FREELISTS_TO_RECONSTRUCT];
+};
+
+/* 1 if a page fault happened */
+#define ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG_PF BIT(0)
+/* 1 if applicable to all contexts */
+#define ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG_ALL_CTXS BIT(1)
+
+struct rogue_fwif_fwccb_cmd_context_reset_data {
+	/* Context affected by the reset */
+	u32 server_common_context_id;
+	/* Reason for reset */
+	enum rogue_context_reset_reason reset_reason;
+	/* Data Master affected by the reset */
+	u32 dm;
+	/* Job ref running at the time of reset */
+	u32 reset_job_ref;
+	/* ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_FLAG bitfield */
+	u32 flags;
+	/* At what page catalog address */
+	aligned_u64 pc_address;
+	/* Page fault address (only when applicable) */
+	aligned_u64 fault_address;
+};
+
+struct rogue_fwif_fwccb_cmd_fw_pagefault_data {
+	/* Page fault address */
+	u64 fw_fault_addr;
+};
+
+enum rogue_fwif_fwccb_cmd_type {
+	/* Requests ZSBuffer to be backed with physical pages */
+	ROGUE_FWIF_FWCCB_CMD_ZSBUFFER_BACKING = 101U |
+						ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Requests ZSBuffer to be unbacked */
+	ROGUE_FWIF_FWCCB_CMD_ZSBUFFER_UNBACKING = 102U |
+						  ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Requests an on-demand freelist grow/shrink */
+	ROGUE_FWIF_FWCCB_CMD_FREELIST_GROW = 103U |
+					     ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Requests freelists reconstruction */
+	ROGUE_FWIF_FWCCB_CMD_FREELISTS_RECONSTRUCTION =
+		104U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Notifies host of a HWR event on a context */
+	ROGUE_FWIF_FWCCB_CMD_CONTEXT_RESET_NOTIFICATION =
+		105U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Requests an on-demand debug dump */
+	ROGUE_FWIF_FWCCB_CMD_DEBUG_DUMP = 106U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	/* Requests an on-demand update on process stats */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_STATS = 107U |
+					    ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	ROGUE_FWIF_FWCCB_CMD_CORE_CLK_RATE_CHANGE =
+		108U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+	ROGUE_FWIF_FWCCB_CMD_REQUEST_GPU_RESTART =
+		109U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+
+	/* Notifies host of a FW pagefault */
+	ROGUE_FWIF_FWCCB_CMD_CONTEXT_FW_PF_NOTIFICATION =
+		112U | ROGUE_CMD_MAGIC_DWORD_SHIFTED,
+};
+
+enum rogue_fwif_fwccb_cmd_update_stats_type {
+	/*
+	 * PVRSRVStatsUpdateRenderContextStats should increase the value of the
+	 * ui32TotalNumPartialRenders stat
+	 */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_PARTIAL_RENDERS = 1,
+	/*
+	 * PVRSRVStatsUpdateRenderContextStats should increase the value of the
+	 * ui32TotalNumOutOfMemory stat
+	 */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_OUT_OF_MEMORY,
+	/*
+	 * PVRSRVStatsUpdateRenderContextStats should increase the value of the
+	 * ui32NumGeomStores stat
+	 */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_GEOM_STORES,
+	/*
+	 * PVRSRVStatsUpdateRenderContextStats should increase the value of the
+	 * ui32NumFragStores stat
+	 */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_FRAG_STORES,
+	/*
+	 * PVRSRVStatsUpdateRenderContextStats should increase the value of the
+	 * ui32NumCDMStores stat
+	 */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_CDM_STORES,
+	/*
+	 * PVRSRVStatsUpdateRenderContextStats should increase the value of the
+	 * ui32NumTDMStores stat
+	 */
+	ROGUE_FWIF_FWCCB_CMD_UPDATE_NUM_TDM_STORES
+};
+
+struct rogue_fwif_fwccb_cmd_update_stats_data {
+	/* Element to update */
+	enum rogue_fwif_fwccb_cmd_update_stats_type element_to_update;
+	/* The pid of the process whose stats are being updated */
+	u32 pid_owner;
+	/* Adjustment to be made to the statistic */
+	s32 adjustment_value;
+};
+
+struct rogue_fwif_fwccb_cmd_core_clk_rate_change_data {
+	u32 core_clk_rate;
+} __aligned(8);
+
+struct rogue_fwif_fwccb_cmd {
+	/* Command type */
+	enum rogue_fwif_fwccb_cmd_type cmd_type;
+	/* Compatibility and other flags */
+	u32 fwccb_flags;
+
+	union {
+		/* Data for Z/S-Buffer on-demand (un)backing*/
+		struct rogue_fwif_fwccb_cmd_zsbuffer_backing_data
+			cmd_zs_buffer_backing;
+		/* Data for on-demand freelist grow/shrink */
+		struct rogue_fwif_fwccb_cmd_freelist_gs_data cmd_free_list_gs;
+		/* Data for freelists reconstruction */
+		struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data
+			cmd_freelists_reconstruction;
+		/* Data for context reset notification */
+		struct rogue_fwif_fwccb_cmd_context_reset_data
+			cmd_context_reset_notification;
+		/* Data for updating process stats */
+		struct rogue_fwif_fwccb_cmd_update_stats_data
+			cmd_update_stats_data;
+		struct rogue_fwif_fwccb_cmd_core_clk_rate_change_data
+			cmd_core_clk_rate_change;
+		struct rogue_fwif_fwccb_cmd_fw_pagefault_data cmd_fw_pagefault;
+	} cmd_data __aligned(8);
+} __aligned(8);
+
+PVR_FW_STRUCT_SIZE_ASSERT(struct rogue_fwif_fwccb_cmd);
+
+/*
+ ******************************************************************************
+ * Workload estimation Firmware CCB command structure for ROGUE
+ ******************************************************************************
+ */
+struct rogue_fwif_workest_fwccb_cmd {
+	/* Index for return data array */
+	u16 return_data_index;
+	/* The cycles the workload took on the hardware */
+	u32 cycles_taken;
+};
+
+/*
+ ******************************************************************************
+ * Client CCB commands for ROGUE
+ ******************************************************************************
+ */
+
+/*
+ * Required memory alignment for 64-bit variables accessible by Meta
+ * (The gcc meta aligns 64-bit variables to 64-bit; therefore, memory shared
+ * between the host and meta that contains 64-bit variables has to maintain
+ * this alignment)
+ */
+#define ROGUE_FWIF_FWALLOC_ALIGN sizeof(u64)
+
+#define ROGUE_CCB_TYPE_TASK BIT(15)
+#define ROGUE_CCB_FWALLOC_ALIGN(size)                \
+	(((size) + (ROGUE_FWIF_FWALLOC_ALIGN - 1)) & \
+	 ~(ROGUE_FWIF_FWALLOC_ALIGN - 1))
+
+#define ROGUE_FWIF_CCB_CMD_TYPE_GEOM \
+	(201U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_TQ_3D \
+	(202U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_FRAG \
+	(203U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR \
+	(204U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_CDM \
+	(205U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_TQ_TDM \
+	(206U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_FBSC_INVALIDATE \
+	(207U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_TQ_2D \
+	(208U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_PRE_TIMESTAMP \
+	(209U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_NULL \
+	(210U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+#define ROGUE_FWIF_CCB_CMD_TYPE_ABORT \
+	(211U | ROGUE_CMD_MAGIC_DWORD_SHIFTED | ROGUE_CCB_TYPE_TASK)
+
+/* Leave a gap between CCB specific commands and generic commands */
+#define ROGUE_FWIF_CCB_CMD_TYPE_FENCE (212U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+#define ROGUE_FWIF_CCB_CMD_TYPE_UPDATE (213U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+#define ROGUE_FWIF_CCB_CMD_TYPE_RMW_UPDATE \
+	(214U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+#define ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR (215U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+#define ROGUE_FWIF_CCB_CMD_TYPE_PRIORITY (216U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+/*
+ * Pre and Post timestamp commands are supposed to sandwich the DM cmd. The
+ * padding code with the CCB wrap upsets the FW if we don't have the task type
+ * bit cleared for POST_TIMESTAMPs. That's why we have 2 different cmd types.
+ */
+#define ROGUE_FWIF_CCB_CMD_TYPE_POST_TIMESTAMP \
+	(217U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+#define ROGUE_FWIF_CCB_CMD_TYPE_UNFENCED_UPDATE \
+	(218U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+#define ROGUE_FWIF_CCB_CMD_TYPE_UNFENCED_RMW_UPDATE \
+	(219U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+
+#define ROGUE_FWIF_CCB_CMD_TYPE_PADDING (221U | ROGUE_CMD_MAGIC_DWORD_SHIFTED)
+
+struct rogue_fwif_workest_kick_data {
+	/* Index for the KM Workload estimation return data array */
+	u16 return_data_index __aligned(8);
+	/* Predicted time taken to do the work in cycles */
+	u32 cycles_prediction __aligned(8);
+	/* Deadline for the workload */
+	aligned_u64 deadline;
+};
+
+struct rogue_fwif_ccb_cmd_header {
+	u32 cmd_type;
+	u32 cmd_size;
+	/*
+	 * external job reference - provided by client and used in debug for
+	 * tracking submitted work
+	 */
+	u32 ext_job_ref;
+	/*
+	 * internal job reference - generated by services and used in debug for
+	 * tracking submitted work
+	 */
+	u32 int_job_ref;
+	/* Workload Estimation - Workload Estimation Data */
+	struct rogue_fwif_workest_kick_data work_est_kick_data __aligned(8);
+};
+
+/*
+ ******************************************************************************
+ * Client CCB commands which are only required by the kernel
+ ******************************************************************************
+ */
+struct rogue_fwif_cmd_priority {
+	s32 priority;
+};
+
+/*
+ ******************************************************************************
+ * Signature and Checksums Buffer
+ ******************************************************************************
+ */
+struct rogue_fwif_sigbuf_ctl {
+	/* Ptr to Signature Buffer memory */
+	u32 buffer_fw_addr;
+	/* Amount of space left for storing regs in the buffer */
+	u32 left_size_in_regs;
+} __aligned(8);
+
+struct rogue_fwif_counter_dump_ctl {
+	/* Ptr to counter dump buffer */
+	u32 buffer_fw_addr;
+	/* Amount of space for storing in the buffer */
+	u32 size_in_dwords;
+} __aligned(8);
+
+struct rogue_fwif_firmware_gcov_ctl {
+	/* Ptr to firmware gcov buffer */
+	u32 buffer_fw_addr;
+	/* Amount of space for storing in the buffer */
+	u32 size;
+} __aligned(8);
+
+/*
+ *****************************************************************************
+ * ROGUE Compatibility checks
+ *****************************************************************************
+ */
+
+/*
+ * WARNING: Whenever the layout of ROGUE_FWIF_COMPCHECKS_BVNC changes, the
+ * following define should be increased by 1 to indicate to the compatibility
+ * logic that layout has changed.
+ */
+#define ROGUE_FWIF_COMPCHECKS_LAYOUT_VERSION 3
+
+struct rogue_fwif_compchecks_bvnc {
+	/* WARNING: This field must be defined as first one in this structure */
+	u32 layout_version;
+	aligned_u64 bvnc;
+} __aligned(8);
+
+struct rogue_fwif_init_options {
+	u8 os_count_support;
+	u8 padding[7];
+} __aligned(8);
+
+#define ROGUE_FWIF_COMPCHECKS_BVNC_DECLARE_AND_INIT(name) \
+	struct rogue_fwif_compchecks_bvnc(name) = {       \
+		ROGUE_FWIF_COMPCHECKS_LAYOUT_VERSION,     \
+		0,                                        \
+	}
+
+static inline void rogue_fwif_compchecks_bvnc_init(struct rogue_fwif_compchecks_bvnc *compchecks)
+{
+	compchecks->layout_version = ROGUE_FWIF_COMPCHECKS_LAYOUT_VERSION;
+	compchecks->bvnc = 0;
+}
+
+struct rogue_fwif_compchecks {
+	/* hardware BVNC (from the ROGUE registers) */
+	struct rogue_fwif_compchecks_bvnc hw_bvnc;
+	/* firmware BVNC */
+	struct rogue_fwif_compchecks_bvnc fw_bvnc;
+	/* identifier of the FW processor version */
+	u32 fw_processor_version;
+	/* software DDK version */
+	u32 ddk_version;
+	/* software DDK build no. */
+	u32 ddk_build;
+	/* build options bit-field */
+	u32 build_options;
+	/* initialisation options bit-field */
+	struct rogue_fwif_init_options init_options;
+	/* Information is valid */
+	bool updated __aligned(4);
+	u32 padding;
+} __aligned(8);
+
+/*
+ ******************************************************************************
+ * Updated configuration post FW data init.
+ ******************************************************************************
+ */
+struct rogue_fwif_runtime_cfg {
+	/* APM latency in ms before signalling IDLE to the host */
+	u32 active_pm_latency_ms;
+	/* Compatibility and other flags */
+	u32 runtime_cfg_flags;
+	/*
+	 * If set, APM latency does not reset to system default each GPU power
+	 * transition
+	 */
+	bool active_pm_latency_persistant __aligned(4);
+	/* Core clock speed, currently only used to calculate timer ticks */
+	u32 core_clock_speed;
+	/* Last number of dusts change requested by the host */
+	u32 default_dusts_num_init;
+	/* Periodic Hardware Reset configuration values */
+	u32 phr_mode;
+	/* New number of milliseconds C/S is allowed to last */
+	u32 hcs_deadline_ms;
+	/* The watchdog period in microseconds */
+	u32 wdg_period_us;
+	/* Array of priorities per OS */
+	u32 osid_priority[ROGUE_FW_MAX_NUM_OS];
+	/* On-demand allocated HWPerf buffer address, to be passed to the FW */
+	u32 hwperf_buf_fw_addr;
+
+	bool padding __aligned(4);
+};
+
+/*
+ *****************************************************************************
+ * Control data for ROGUE
+ *****************************************************************************
+ */
+
+#define ROGUE_FWIF_HWR_DEBUG_DUMP_ALL (99999U)
+
+enum rogue_fwif_tpu_dm {
+	ROGUE_FWIF_TPU_DM_PDM = 0,
+	ROGUE_FWIF_TPU_DM_VDM = 1,
+	ROGUE_FWIF_TPU_DM_CDM = 2,
+	ROGUE_FWIF_TPU_DM_TDM = 3,
+	ROGUE_FWIF_TPU_DM_LAST
+};
+
+enum rogue_fwif_gpio_val_mode {
+	/* No GPIO validation */
+	ROGUE_FWIF_GPIO_VAL_OFF = 0,
+	/*
+	 * Simple test case that initiates by sending data via the GPIO and then
+	 * sends back any data received over the GPIO
+	 */
+	ROGUE_FWIF_GPIO_VAL_GENERAL = 1,
+	/*
+	 * More complex test case that writes and reads data across the entire
+	 * GPIO AP address range.
+	 */
+	ROGUE_FWIF_GPIO_VAL_AP = 2,
+	/* Validates the GPIO Testbench. */
+	ROGUE_FWIF_GPIO_VAL_TESTBENCH = 5,
+	/* Send and then receive each byte in the range 0-255. */
+	ROGUE_FWIF_GPIO_VAL_LOOPBACK = 6,
+	/* Send and then receive each power-of-2 byte in the range 0-255. */
+	ROGUE_FWIF_GPIO_VAL_LOOPBACK_LITE = 7,
+	ROGUE_FWIF_GPIO_VAL_LAST
+};
+
+enum fw_perf_conf {
+	FW_PERF_CONF_NONE = 0,
+	FW_PERF_CONF_ICACHE = 1,
+	FW_PERF_CONF_DCACHE = 2,
+	FW_PERF_CONF_JTLB_INSTR = 5,
+	FW_PERF_CONF_INSTRUCTIONS = 6
+};
+
+enum fw_boot_stage {
+	FW_BOOT_STAGE_TLB_INIT_FAILURE = -2,
+	FW_BOOT_STAGE_NOT_AVAILABLE = -1,
+	FW_BOOT_NOT_STARTED = 0,
+	FW_BOOT_BLDR_STARTED = 1,
+	FW_BOOT_CACHE_DONE,
+	FW_BOOT_TLB_DONE,
+	FW_BOOT_MAIN_STARTED,
+	FW_BOOT_ALIGNCHECKS_DONE,
+	FW_BOOT_INIT_DONE,
+};
+
+/*
+ * Kernel CCB return slot responses. Usage of bit-fields instead of bare
+ * integers allows FW to possibly pack-in several responses for each single kCCB
+ * command.
+ */
+/* Command executed (return status from FW) */
+#define ROGUE_FWIF_KCCB_RTN_SLOT_CMD_EXECUTED BIT(0)
+/* A cleanup was requested but resource busy */
+#define ROGUE_FWIF_KCCB_RTN_SLOT_CLEANUP_BUSY BIT(1)
+/* Poll failed in FW for a HW operation to complete */
+#define ROGUE_FWIF_KCCB_RTN_SLOT_POLL_FAILURE BIT(2)
+/* Reset value of a kCCB return slot (set by host) */
+#define ROGUE_FWIF_KCCB_RTN_SLOT_NO_RESPONSE 0x0U
+
+struct rogue_fwif_connection_ctl {
+	/* Fw-Os connection states */
+	enum rogue_fwif_connection_fw_state connection_fw_state;
+	enum rogue_fwif_connection_os_state connection_os_state;
+	u32 alive_fw_token;
+	u32 alive_os_token;
+} __aligned(8);
+
+struct rogue_fwif_osinit {
+	/* Kernel CCB */
+	u32 kernel_ccbctl_fw_addr;
+	u32 kernel_ccb_fw_addr;
+	u32 kernel_ccb_rtn_slots_fw_addr;
+
+	/* Firmware CCB */
+	u32 firmware_ccbctl_fw_addr;
+	u32 firmware_ccb_fw_addr;
+
+	/* Workload Estimation Firmware CCB */
+	u32 work_est_firmware_ccbctl_fw_addr;
+	u32 work_est_firmware_ccb_fw_addr;
+
+	u32 rogue_fwif_hwr_info_buf_ctl_fw_addr;
+
+	u32 hwr_debug_dump_limit;
+
+	u32 fw_os_data_fw_addr;
+
+	/* Compatibility checks to be populated by the Firmware */
+	struct rogue_fwif_compchecks rogue_comp_checks;
+} __aligned(8);
+
+/* BVNC Features */
+struct rogue_hwperf_bvnc_block {
+	/* Counter block ID, see ROGUE_HWPERF_CNTBLK_ID */
+	u16 block_id;
+
+	/* Number of counters in this block type */
+	u16 num_counters;
+
+	/* Number of blocks of this type */
+	u16 num_blocks;
+
+	u16 reserved;
+};
+
+#define ROGUE_HWPERF_MAX_BVNC_LEN (24)
+
+#define ROGUE_HWPERF_MAX_BVNC_BLOCK_LEN (16U)
+
+/* BVNC Features */
+struct rogue_hwperf_bvnc {
+	/* BVNC string */
+	char bvnc_string[ROGUE_HWPERF_MAX_BVNC_LEN];
+	/* See ROGUE_HWPERF_FEATURE_FLAGS */
+	u32 bvnc_km_feature_flags;
+	/* Number of blocks described in aBvncBlocks */
+	u16 num_bvnc_blocks;
+	/* Number of GPU cores present */
+	u16 bvnc_gpu_cores;
+	/* Supported Performance Blocks for BVNC */
+	struct rogue_hwperf_bvnc_block
+		bvnc_blocks[ROGUE_HWPERF_MAX_BVNC_BLOCK_LEN];
+};
+
+PVR_FW_STRUCT_SIZE_ASSERT(struct rogue_hwperf_bvnc);
+
+struct rogue_fwif_sysinit {
+	/* Fault read address */
+	aligned_u64 fault_phys_addr;
+
+	/* PDS execution base */
+	aligned_u64 pds_exec_base;
+	/* UCS execution base */
+	aligned_u64 usc_exec_base;
+	/* FBCDC bindless texture state table base */
+	aligned_u64 fbcdc_state_table_base;
+	aligned_u64 fbcdc_large_state_table_base;
+	/* Texture state base */
+	aligned_u64 texture_heap_base;
+
+	/* Event filter for Firmware events */
+	u64 hw_perf_filter;
+
+	aligned_u64 slc3_fence_dev_addr;
+
+	u32 tpu_trilinear_frac_mask[ROGUE_FWIF_TPU_DM_LAST] __aligned(8);
+
+	/* Signature and Checksum Buffers for DMs */
+	struct rogue_fwif_sigbuf_ctl sigbuf_ctl[PVR_FWIF_DM_MAX];
+
+	struct rogue_fwif_pdvfs_opp pdvfs_opp_info;
+
+	struct rogue_fwif_dma_addr coremem_data_store;
+
+	struct rogue_fwif_counter_dump_ctl counter_dump_ctl;
+
+	u32 filter_flags;
+
+	u32 runtime_cfg_fw_addr;
+
+	u32 trace_buf_ctl_fw_addr;
+	u32 fw_sys_data_fw_addr;
+
+	u32 gpu_util_fw_cb_ctl_fw_addr;
+	u32 reg_cfg_fw_addr;
+	u32 hwperf_ctl_fw_addr;
+
+	u32 align_checks;
+
+	/* Core clock speed at FW boot time */
+	u32 initial_core_clock_speed;
+
+	/* APM latency in ms before signalling IDLE to the host */
+	u32 active_pm_latency_ms;
+
+	/* Flag to be set by the Firmware after successful start */
+	bool firmware_started __aligned(4);
+
+	/* Host/FW Trace synchronisation Partition Marker */
+	u32 marker_val;
+
+	/* Firmware initialization complete time */
+	u32 firmware_started_timestamp;
+
+	u32 jones_disable_mask;
+
+	/* Firmware performance counter config */
+	enum fw_perf_conf firmware_perf;
+
+	/*
+	 * FW Pointer to memory containing core clock rate in Hz.
+	 * Firmware (PDVFS) updates the memory when running on non primary FW
+	 * thread to communicate to host driver.
+	 */
+	u32 core_clock_rate_fw_addr;
+
+	enum rogue_fwif_gpio_val_mode gpio_validation_mode;
+
+	/* Used in HWPerf for decoding BVNC Features */
+	struct rogue_hwperf_bvnc bvnc_km_feature_flags;
+
+	/* Value to write into ROGUE_CR_TFBC_COMPRESSION_CONTROL */
+	u32 tfbc_compression_control;
+} __aligned(8);
+
+/*
+ *****************************************************************************
+ * Timer correlation shared data and defines
+ *****************************************************************************
+ */
+
+struct rogue_fwif_time_corr {
+	aligned_u64 os_timestamp;
+	aligned_u64 os_mono_timestamp;
+	aligned_u64 cr_timestamp;
+
+	/*
+	 * Utility variable used to convert CR timer deltas to OS timer deltas
+	 * (nS), where the deltas are relative to the timestamps above:
+	 * deltaOS = (deltaCR * K) >> decimal_shift, see full explanation below
+	 */
+	aligned_u64 cr_delta_to_os_delta_kns;
+
+	u32 core_clock_speed;
+	u32 reserved;
+} __aligned(8);
+
+/*
+ * The following macros are used to help converting FW timestamps to the Host
+ * time domain. On the FW the ROGUE_CR_TIMER counter is used to keep track of
+ * time; it increments by 1 every 256 GPU clock ticks, so the general
+ * formula to perform the conversion is:
+ *
+ * [ GPU clock speed in Hz, if (scale == 10^9) then deltaOS is in nS,
+ *   otherwise if (scale == 10^6) then deltaOS is in uS ]
+ *
+ *             deltaCR * 256                                   256 * scale
+ *  deltaOS = --------------- * scale = deltaCR * K    [ K = --------------- ]
+ *             GPUclockspeed                                  GPUclockspeed
+ *
+ * The actual K is multiplied by 2^20 (and deltaCR * K is divided by 2^20)
+ * to get some better accuracy and to avoid returning 0 in the integer
+ * division 256000000/GPUfreq if GPUfreq is greater than 256MHz.
+ * This is the same as keeping K as a decimal number.
+ *
+ * The maximum deltaOS is slightly more than 5hrs for all GPU frequencies
+ * (deltaCR * K is more or less a constant), and it's relative to the base
+ * OS timestamp sampled as a part of the timer correlation data.
+ * This base is refreshed on GPU power-on, DVFS transition and periodic
+ * frequency calibration (executed every few seconds if the FW is doing
+ * some work), so as long as the GPU is doing something and one of these
+ * events is triggered then deltaCR * K will not overflow and deltaOS will be
+ * correct.
+ */
+
+#define ROGUE_FWIF_CRDELTA_TO_OSDELTA_ACCURACY_SHIFT (20)
+
+#define ROGUE_FWIF_GET_DELTA_OSTIME_NS(delta_cr, k) \
+	(((delta_cr) * (k)) >> ROGUE_FWIF_CRDELTA_TO_OSDELTA_ACCURACY_SHIFT)
+
+/*
+ ******************************************************************************
+ * GPU Utilisation
+ ******************************************************************************
+ */
+
+/* See rogue_common.h for a list of GPU states */
+#define ROGUE_FWIF_GPU_UTIL_TIME_MASK \
+	(0xFFFFFFFFFFFFFFFFull & ~ROGUE_FWIF_GPU_UTIL_STATE_MASK)
+
+#define ROGUE_FWIF_GPU_UTIL_GET_TIME(word) \
+	((word)(&ROGUE_FWIF_GPU_UTIL_TIME_MASK))
+#define ROGUE_FWIF_GPU_UTIL_GET_STATE(word) \
+	((word)(&ROGUE_FWIF_GPU_UTIL_STATE_MASK))
+
+/*
+ * The OS timestamps computed by the FW are approximations of the real time,
+ * which means they could be slightly behind or ahead the real timer on the
+ * Host. In some cases we can perform subtractions between FW approximated
+ * timestamps and real OS timestamps, so we need a form of protection against
+ * negative results if for instance the FW one is a bit ahead of time.
+ */
+#define ROGUE_FWIF_GPU_UTIL_GET_PERIOD(newtime, oldtime) \
+	(((newtime) > (oldtime)) ? ((newtime) - (oldtime)) : 0U)
+
+#define ROGUE_FWIF_GPU_UTIL_MAKE_WORD(time, state) \
+	(ROGUE_FWIF_GPU_UTIL_GET_TIME(time) |      \
+	 ROGUE_FWIF_GPU_UTIL_GET_STATE(state))
+
+/*
+ * The timer correlation array must be big enough to ensure old entries won't be
+ * overwritten before all the HWPerf events linked to those entries are
+ * processed by the MISR. The update frequency of this array depends on how fast
+ * the system can change state (basically how small the APM latency is) and
+ * perform DVFS transitions.
+ *
+ * The minimum size is 2 (not 1) to avoid race conditions between the FW reading
+ * an entry while the Host is updating it. With 2 entries in the worst case the
+ * FW will read old data, which is still quite ok if the Host is updating the
+ * timer correlation at that time.
+ */
+#define ROGUE_FWIF_TIME_CORR_ARRAY_SIZE 256U
+#define ROGUE_FWIF_TIME_CORR_CURR_INDEX(seqcount) \
+	((seqcount) % ROGUE_FWIF_TIME_CORR_ARRAY_SIZE)
+
+/* Make sure the timer correlation array size is a power of 2 */
+static_assert((ROGUE_FWIF_TIME_CORR_ARRAY_SIZE &
+	       (ROGUE_FWIF_TIME_CORR_ARRAY_SIZE - 1U)) == 0U,
+	      "ROGUE_FWIF_TIME_CORR_ARRAY_SIZE must be a power of two");
+
+struct rogue_fwif_gpu_util_fwcb {
+	struct rogue_fwif_time_corr time_corr[ROGUE_FWIF_TIME_CORR_ARRAY_SIZE];
+	u32 time_corr_seq_count;
+
+	/* Compatibility and other flags */
+	u32 gpu_util_flags;
+
+	/* Last GPU state + OS time of the last state update */
+	aligned_u64 last_word;
+
+	/* Counters for the amount of time the GPU was active/idle/blocked */
+	aligned_u64 stats_counters[PVR_FWIF_GPU_UTIL_STATE_NUM];
+} __aligned(8);
+
+struct rogue_fwif_rta_ctl {
+	/* Render number */
+	u32 render_target_index;
+	/* index in RTA */
+	u32 current_render_target;
+	/* total active RTs */
+	u32 active_render_targets;
+	/* total active RTs from the first TA kick, for OOM */
+	u32 cumul_active_render_targets;
+	/* Array of valid RT indices */
+	u32 valid_render_targets_fw_addr;
+	/* Array of number of occurred partial renders per render target */
+	u32 rta_num_partial_renders_fw_addr;
+	/* Number of render targets in the array */
+	u32 max_rts;
+	/* Compatibility and other flags */
+	u32 rta_ctl_flags;
+} __aligned(8);
+
+struct rogue_fwif_freelist {
+	aligned_u64 freelist_dev_addr;
+	aligned_u64 current_dev_addr;
+	u32 current_stack_top;
+	u32 max_pages;
+	u32 grow_pages;
+	/* HW pages */
+	u32 current_pages;
+	u32 allocated_page_count;
+	u32 allocated_mmu_page_count;
+	u32 freelist_id;
+
+	bool grow_pending __aligned(4);
+	/* Pages that should be used only when OOM is reached */
+	u32 ready_pages;
+	/* Compatibility and other flags */
+	u32 freelist_flags;
+	/* PM Global PB on which Freelist is loaded */
+	u32 pm_global_pb;
+	u32 padding;
+} __aligned(8);
+
+/*
+ ******************************************************************************
+ * HWRTData
+ ******************************************************************************
+ */
+
+/* HWRTData flags */
+/* Deprecated flags 1:0 */
+#define HWRTDATA_HAS_LAST_GEOM BIT(2)
+#define HWRTDATA_PARTIAL_RENDERED BIT(3)
+#define HWRTDATA_DISABLE_TILE_REORDERING BIT(4)
+#define HWRTDATA_NEED_BRN65101_BLIT BIT(5)
+#define HWRTDATA_FIRST_BRN65101_STRIP BIT(6)
+#define HWRTDATA_NEED_BRN67182_2ND_RENDER BIT(7)
+
+enum rogue_fwif_rtdata_state {
+	ROGUE_FWIF_RTDATA_STATE_NONE = 0,
+	ROGUE_FWIF_RTDATA_STATE_KICK_GEOM,
+	ROGUE_FWIF_RTDATA_STATE_KICK_GEOM_FIRST,
+	ROGUE_FWIF_RTDATA_STATE_GEOM_FINISHED,
+	ROGUE_FWIF_RTDATA_STATE_KICK_FRAG,
+	ROGUE_FWIF_RTDATA_STATE_FRAG_FINISHED,
+	ROGUE_FWIF_RTDATA_STATE_FRAG_CONTEXT_STORED,
+	ROGUE_FWIF_RTDATA_STATE_GEOM_OUTOFMEM,
+	ROGUE_FWIF_RTDATA_STATE_PARTIALRENDERFINISHED,
+	/*
+	 * In case of HWR, we can't set the RTDATA state to NONE, as this will
+	 * cause any TA to become a first TA. To ensure all related TA's are
+	 * skipped, we use the HWR state
+	 */
+	ROGUE_FWIF_RTDATA_STATE_HWR,
+	ROGUE_FWIF_RTDATA_STATE_UNKNOWN = 0x7FFFFFFFU
+};
+
+struct rogue_fwif_hwrtdata_common {
+	bool geom_caches_need_zeroing __aligned(4);
+
+	u32 screen_pixel_max;
+	aligned_u64 multi_sample_ctl;
+	u64 flipped_multi_sample_ctl;
+	u32 tpc_stride;
+	u32 tpc_size;
+	u32 te_screen;
+	u32 mtile_stride;
+	u32 teaa;
+	u32 te_mtile1;
+	u32 te_mtile2;
+	u32 isp_merge_lower_x;
+	u32 isp_merge_lower_y;
+	u32 isp_merge_upper_x;
+	u32 isp_merge_upper_y;
+	u32 isp_merge_scale_x;
+	u32 isp_merge_scale_y;
+	u32 rgn_header_size;
+	u32 isp_mtile_size;
+	u32 padding;
+} __aligned(8);
+
+struct rogue_fwif_hwrtdata {
+	/* MList Data Store */
+	aligned_u64 pm_mlist_dev_addr;
+
+	aligned_u64 vce_cat_base[4];
+	aligned_u64 vce_last_cat_base[4];
+	aligned_u64 te_cat_base[4];
+	aligned_u64 te_last_cat_base[4];
+	aligned_u64 alist_cat_base;
+	aligned_u64 alist_last_cat_base;
+
+	aligned_u64 pm_alist_stack_pointer;
+	u32 pm_mlist_stack_pointer;
+
+	u32 hwrt_data_common_fw_addr;
+
+	u32 hwrt_data_flags;
+	enum rogue_fwif_rtdata_state state;
+
+	u32 freelists_fw_addr[MAX_FREELISTS_SIZE] __aligned(8);
+	u32 freelist_hwr_snapshot[MAX_FREELISTS_SIZE];
+
+	aligned_u64 vheap_table_dev_addr;
+
+	struct rogue_fwif_rta_ctl rta_ctl;
+
+	aligned_u64 tail_ptrs_dev_addr;
+	aligned_u64 macrotile_array_dev_addr;
+	aligned_u64 rgn_header_dev_addr;
+	aligned_u64 rtc_dev_addr;
+
+	u32 owner_geom_not_used_by_host __aligned(8);
+
+	bool geom_caches_need_zeroing __aligned(4);
+
+	struct rogue_fwif_cleanup_ctl cleanup_state __aligned(64);
+} __aligned(8);
+
+/*
+ ******************************************************************************
+ * Sync checkpoints
+ ******************************************************************************
+ */
+
+#define PVR_SYNC_CHECKPOINT_UNDEF 0x000
+#define PVR_SYNC_CHECKPOINT_ACTIVE 0xac1     /* Checkpoint has not signaled. */
+#define PVR_SYNC_CHECKPOINT_SIGNALED 0x519   /* Checkpoint has signaled. */
+#define PVR_SYNC_CHECKPOINT_ERRORED 0xeff    /* Checkpoint has been errored. */
+
+#include "pvr_rogue_fwif_check.h"
+
+#endif /* PVR_ROGUE_FWIF_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h
new file mode 100644
index 0000000000000..51dc37e78f41d
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_check.h
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_CHECK_H
+#define PVR_ROGUE_FWIF_CHECK_H
+
+#include <linux/build_bug.h>
+
+#define OFFSET_CHECK(type, member, offset) \
+	static_assert(offsetof(type, member) == (offset), \
+		      "offsetof(" #type ", " #member ") incorrect")
+
+#define SIZE_CHECK(type, size) \
+	static_assert(sizeof(type) == (size), #type " is incorrect size")
+
+OFFSET_CHECK(struct rogue_fwif_file_info_buf, path, 0);
+OFFSET_CHECK(struct rogue_fwif_file_info_buf, info, 200);
+OFFSET_CHECK(struct rogue_fwif_file_info_buf, line_num, 400);
+SIZE_CHECK(struct rogue_fwif_file_info_buf, 408);
+
+OFFSET_CHECK(struct rogue_fwif_tracebuf_space, trace_pointer, 0);
+OFFSET_CHECK(struct rogue_fwif_tracebuf_space, trace_buffer_fw_addr, 4);
+OFFSET_CHECK(struct rogue_fwif_tracebuf_space, trace_buffer, 8);
+OFFSET_CHECK(struct rogue_fwif_tracebuf_space, assert_buf, 16);
+SIZE_CHECK(struct rogue_fwif_tracebuf_space, 424);
+
+OFFSET_CHECK(struct rogue_fwif_tracebuf, log_type, 0);
+OFFSET_CHECK(struct rogue_fwif_tracebuf, tracebuf, 8);
+OFFSET_CHECK(struct rogue_fwif_tracebuf, tracebuf_size_in_dwords, 856);
+OFFSET_CHECK(struct rogue_fwif_tracebuf, tracebuf_flags, 860);
+SIZE_CHECK(struct rogue_fwif_tracebuf, 864);
+
+OFFSET_CHECK(struct rogue_fw_fault_info, cr_timer, 0);
+OFFSET_CHECK(struct rogue_fw_fault_info, os_timer, 8);
+OFFSET_CHECK(struct rogue_fw_fault_info, data, 16);
+OFFSET_CHECK(struct rogue_fw_fault_info, reserved, 20);
+OFFSET_CHECK(struct rogue_fw_fault_info, fault_buf, 24);
+SIZE_CHECK(struct rogue_fw_fault_info, 432);
+
+OFFSET_CHECK(struct rogue_fwif_sysdata, config_flags, 0);
+OFFSET_CHECK(struct rogue_fwif_sysdata, config_flags_ext, 4);
+OFFSET_CHECK(struct rogue_fwif_sysdata, pow_state, 8);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_ridx, 12);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_widx, 16);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_wrap_count, 20);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_size, 24);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_drop_count, 28);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hw_perf_ut, 32);
+OFFSET_CHECK(struct rogue_fwif_sysdata, first_drop_ordinal, 36);
+OFFSET_CHECK(struct rogue_fwif_sysdata, last_drop_ordinal, 40);
+OFFSET_CHECK(struct rogue_fwif_sysdata, os_runtime_flags_mirror, 44);
+OFFSET_CHECK(struct rogue_fwif_sysdata, fault_info, 80);
+OFFSET_CHECK(struct rogue_fwif_sysdata, fw_faults, 3536);
+OFFSET_CHECK(struct rogue_fwif_sysdata, cr_poll_addr, 3540);
+OFFSET_CHECK(struct rogue_fwif_sysdata, cr_poll_mask, 3548);
+OFFSET_CHECK(struct rogue_fwif_sysdata, cr_poll_count, 3556);
+OFFSET_CHECK(struct rogue_fwif_sysdata, start_idle_time, 3568);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hwr_state_flags, 3576);
+OFFSET_CHECK(struct rogue_fwif_sysdata, hwr_recovery_flags, 3580);
+OFFSET_CHECK(struct rogue_fwif_sysdata, fw_sys_data_flags, 3616);
+OFFSET_CHECK(struct rogue_fwif_sysdata, mc_config, 3620);
+SIZE_CHECK(struct rogue_fwif_sysdata, 3624);
+
+OFFSET_CHECK(struct rogue_fwif_slr_entry, timestamp, 0);
+OFFSET_CHECK(struct rogue_fwif_slr_entry, fw_ctx_addr, 8);
+OFFSET_CHECK(struct rogue_fwif_slr_entry, num_ufos, 12);
+OFFSET_CHECK(struct rogue_fwif_slr_entry, ccb_name, 16);
+SIZE_CHECK(struct rogue_fwif_slr_entry, 48);
+
+OFFSET_CHECK(struct rogue_fwif_osdata, fw_os_config_flags, 0);
+OFFSET_CHECK(struct rogue_fwif_osdata, fw_sync_check_mark, 4);
+OFFSET_CHECK(struct rogue_fwif_osdata, host_sync_check_mark, 8);
+OFFSET_CHECK(struct rogue_fwif_osdata, forced_updates_requested, 12);
+OFFSET_CHECK(struct rogue_fwif_osdata, slr_log_wp, 16);
+OFFSET_CHECK(struct rogue_fwif_osdata, slr_log_first, 24);
+OFFSET_CHECK(struct rogue_fwif_osdata, slr_log, 72);
+OFFSET_CHECK(struct rogue_fwif_osdata, last_forced_update_time, 552);
+OFFSET_CHECK(struct rogue_fwif_osdata, interrupt_count, 560);
+OFFSET_CHECK(struct rogue_fwif_osdata, kccb_cmds_executed, 568);
+OFFSET_CHECK(struct rogue_fwif_osdata, power_sync_fw_addr, 572);
+OFFSET_CHECK(struct rogue_fwif_osdata, fw_os_data_flags, 576);
+SIZE_CHECK(struct rogue_fwif_osdata, 584);
+
+OFFSET_CHECK(struct rogue_bifinfo, bif_req_status, 0);
+OFFSET_CHECK(struct rogue_bifinfo, bif_mmu_status, 8);
+OFFSET_CHECK(struct rogue_bifinfo, pc_address, 16);
+OFFSET_CHECK(struct rogue_bifinfo, reserved, 24);
+SIZE_CHECK(struct rogue_bifinfo, 32);
+
+OFFSET_CHECK(struct rogue_eccinfo, fault_gpu, 0);
+SIZE_CHECK(struct rogue_eccinfo, 4);
+
+OFFSET_CHECK(struct rogue_mmuinfo, mmu_status, 0);
+OFFSET_CHECK(struct rogue_mmuinfo, pc_address, 16);
+OFFSET_CHECK(struct rogue_mmuinfo, reserved, 24);
+SIZE_CHECK(struct rogue_mmuinfo, 32);
+
+OFFSET_CHECK(struct rogue_pollinfo, thread_num, 0);
+OFFSET_CHECK(struct rogue_pollinfo, cr_poll_addr, 4);
+OFFSET_CHECK(struct rogue_pollinfo, cr_poll_mask, 8);
+OFFSET_CHECK(struct rogue_pollinfo, cr_poll_last_value, 12);
+OFFSET_CHECK(struct rogue_pollinfo, reserved, 16);
+SIZE_CHECK(struct rogue_pollinfo, 24);
+
+OFFSET_CHECK(struct rogue_tlbinfo, bad_addr, 0);
+OFFSET_CHECK(struct rogue_tlbinfo, entry_lo, 4);
+SIZE_CHECK(struct rogue_tlbinfo, 8);
+
+OFFSET_CHECK(struct rogue_hwrinfo, hwr_data, 0);
+OFFSET_CHECK(struct rogue_hwrinfo, cr_timer, 32);
+OFFSET_CHECK(struct rogue_hwrinfo, os_timer, 40);
+OFFSET_CHECK(struct rogue_hwrinfo, frame_num, 48);
+OFFSET_CHECK(struct rogue_hwrinfo, pid, 52);
+OFFSET_CHECK(struct rogue_hwrinfo, active_hwrt_data, 56);
+OFFSET_CHECK(struct rogue_hwrinfo, hwr_number, 60);
+OFFSET_CHECK(struct rogue_hwrinfo, event_status, 64);
+OFFSET_CHECK(struct rogue_hwrinfo, hwr_recovery_flags, 68);
+OFFSET_CHECK(struct rogue_hwrinfo, hwr_type, 72);
+OFFSET_CHECK(struct rogue_hwrinfo, dm, 76);
+OFFSET_CHECK(struct rogue_hwrinfo, core_id, 80);
+OFFSET_CHECK(struct rogue_hwrinfo, cr_time_of_kick, 88);
+OFFSET_CHECK(struct rogue_hwrinfo, cr_time_hw_reset_start, 96);
+OFFSET_CHECK(struct rogue_hwrinfo, cr_time_hw_reset_finish, 104);
+OFFSET_CHECK(struct rogue_hwrinfo, cr_time_freelist_ready, 112);
+OFFSET_CHECK(struct rogue_hwrinfo, reserved, 120);
+SIZE_CHECK(struct rogue_hwrinfo, 136);
+
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_info, 0);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_counter, 2176);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, write_index, 2180);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, dd_req_count, 2184);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_info_buf_flags, 2188);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_locked_up_count, 2192);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_overran_count, 2228);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_recovered_count, 2264);
+OFFSET_CHECK(struct rogue_fwif_hwrinfobuf, hwr_dm_false_detect_count, 2300);
+SIZE_CHECK(struct rogue_fwif_hwrinfobuf, 2336);
+
+OFFSET_CHECK(struct rogue_fwif_fwmemcontext, pc_dev_paddr, 0);
+OFFSET_CHECK(struct rogue_fwif_fwmemcontext, page_cat_base_reg_set, 8);
+OFFSET_CHECK(struct rogue_fwif_fwmemcontext, breakpoint_addr, 12);
+OFFSET_CHECK(struct rogue_fwif_fwmemcontext, bp_handler_addr, 16);
+OFFSET_CHECK(struct rogue_fwif_fwmemcontext, breakpoint_ctl, 20);
+OFFSET_CHECK(struct rogue_fwif_fwmemcontext, fw_mem_ctx_flags, 24);
+SIZE_CHECK(struct rogue_fwif_fwmemcontext, 32);
+
+OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_reg_vdm_call_stack_pointer, 0);
+OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_reg_vdm_call_stack_pointer_init, 8);
+OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_reg_vbs_so_prim, 16);
+OFFSET_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, geom_current_idx, 32);
+SIZE_CHECK(struct rogue_fwif_geom_ctx_state_per_geom, 40);
+
+OFFSET_CHECK(struct rogue_fwif_geom_ctx_state, geom_core, 0);
+SIZE_CHECK(struct rogue_fwif_geom_ctx_state, 160);
+
+OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, frag_reg_pm_deallocated_mask_status, 0);
+OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, frag_reg_dm_pds_mtilefree_status, 4);
+OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, ctx_state_flags, 8);
+OFFSET_CHECK(struct rogue_fwif_frag_ctx_state, frag_reg_isp_store, 12);
+SIZE_CHECK(struct rogue_fwif_frag_ctx_state, 16);
+
+OFFSET_CHECK(struct rogue_fwif_compute_ctx_state, ctx_state_flags, 0);
+SIZE_CHECK(struct rogue_fwif_compute_ctx_state, 4);
+
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, ccbctl_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, ccb_fw_addr, 4);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, ccb_meta_dma_addr, 8);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, context_state_addr, 24);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, fw_com_ctx_flags, 28);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, priority, 32);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, priority_seq_num, 36);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, rf_cmd_addr, 40);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_pending, 44);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_num_stores, 48);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_num_out_of_memory, 52);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, stats_num_partial_renders, 56);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, dm, 60);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, wait_signal_address, 64);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, wait_signal_node, 72);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, buf_stalled_node, 80);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, cbuf_queue_ctrl_addr, 88);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, robustness_address, 96);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, max_deadline_ms, 104);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, read_offset_needs_reset, 108);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, waiting_node, 112);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, run_node, 120);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, last_failed_ufo, 128);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, fw_mem_context_fw_addr, 136);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, server_common_context_id, 140);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, pid, 144);
+OFFSET_CHECK(struct rogue_fwif_fwcommoncontext, geom_oom_disabled, 148);
+SIZE_CHECK(struct rogue_fwif_fwcommoncontext, 152);
+
+OFFSET_CHECK(struct rogue_fwif_ccb_ctl, write_offset, 0);
+OFFSET_CHECK(struct rogue_fwif_ccb_ctl, padding, 4);
+OFFSET_CHECK(struct rogue_fwif_ccb_ctl, read_offset, 128);
+OFFSET_CHECK(struct rogue_fwif_ccb_ctl, wrap_mask, 132);
+OFFSET_CHECK(struct rogue_fwif_ccb_ctl, cmd_size, 136);
+OFFSET_CHECK(struct rogue_fwif_ccb_ctl, padding2, 140);
+SIZE_CHECK(struct rogue_fwif_ccb_ctl, 144);
+
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, context_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, client_woff_update, 4);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, client_wrap_mask_update, 8);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, num_cleanup_ctl, 12);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, cleanup_ctl_fw_addr, 16);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_kick_data, work_est_cmd_header_offset, 28);
+SIZE_CHECK(struct rogue_fwif_kccb_cmd_kick_data, 32);
+
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data, geom_cmd_kick_data, 0);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data, frag_cmd_kick_data, 32);
+SIZE_CHECK(struct rogue_fwif_kccb_cmd_combined_geom_frag_kick_data, 64);
+
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_force_update_data, context_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd_force_update_data, ccb_fence_offset, 4);
+SIZE_CHECK(struct rogue_fwif_kccb_cmd_force_update_data, 8);
+
+OFFSET_CHECK(struct rogue_fwif_cleanup_request, cleanup_type, 0);
+OFFSET_CHECK(struct rogue_fwif_cleanup_request, cleanup_data, 4);
+SIZE_CHECK(struct rogue_fwif_cleanup_request, 8);
+
+OFFSET_CHECK(struct rogue_fwif_power_request, pow_type, 0);
+OFFSET_CHECK(struct rogue_fwif_power_request, power_req_data, 4);
+SIZE_CHECK(struct rogue_fwif_power_request, 8);
+
+OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, context_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, inval, 4);
+OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, dm_context, 8);
+OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, address, 16);
+OFFSET_CHECK(struct rogue_fwif_slcflushinvaldata, size, 24);
+SIZE_CHECK(struct rogue_fwif_slcflushinvaldata, 32);
+
+OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl, opcode, 0);
+OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl, mask, 8);
+SIZE_CHECK(struct rogue_fwif_hwperf_ctrl, 16);
+
+OFFSET_CHECK(struct rogue_fwif_hwperf_config_enable_blks, num_blocks, 0);
+OFFSET_CHECK(struct rogue_fwif_hwperf_config_enable_blks, block_configs_fw_addr, 4);
+SIZE_CHECK(struct rogue_fwif_hwperf_config_enable_blks, 8);
+
+OFFSET_CHECK(struct rogue_fwif_hwperf_config_da_blks, num_blocks, 0);
+OFFSET_CHECK(struct rogue_fwif_hwperf_config_da_blks, block_configs_fw_addr, 4);
+SIZE_CHECK(struct rogue_fwif_hwperf_config_da_blks, 8);
+
+OFFSET_CHECK(struct rogue_fwif_coreclkspeedchange_data, new_clock_speed, 0);
+SIZE_CHECK(struct rogue_fwif_coreclkspeedchange_data, 4);
+
+OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl_blks, enable, 0);
+OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl_blks, num_blocks, 4);
+OFFSET_CHECK(struct rogue_fwif_hwperf_ctrl_blks, block_ids, 8);
+SIZE_CHECK(struct rogue_fwif_hwperf_ctrl_blks, 40);
+
+OFFSET_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, custom_block, 0);
+OFFSET_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, num_counters, 2);
+OFFSET_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, custom_counter_ids_fw_addr, 4);
+SIZE_CHECK(struct rogue_fwif_hwperf_select_custom_cntrs, 8);
+
+OFFSET_CHECK(struct rogue_fwif_zsbuffer_backing_data, zs_buffer_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_zsbuffer_backing_data, done, 4);
+SIZE_CHECK(struct rogue_fwif_zsbuffer_backing_data, 8);
+
+OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, freelist_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, delta_pages, 4);
+OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, new_pages, 8);
+OFFSET_CHECK(struct rogue_fwif_freelist_gs_data, ready_pages, 12);
+SIZE_CHECK(struct rogue_fwif_freelist_gs_data, 16);
+
+OFFSET_CHECK(struct rogue_fwif_freelists_reconstruction_data, freelist_count, 0);
+OFFSET_CHECK(struct rogue_fwif_freelists_reconstruction_data, freelist_ids, 4);
+SIZE_CHECK(struct rogue_fwif_freelists_reconstruction_data, 76);
+
+OFFSET_CHECK(struct rogue_fwif_write_offset_update_data, context_fw_addr, 0);
+SIZE_CHECK(struct rogue_fwif_write_offset_update_data, 8);
+
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd, cmd_type, 0);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd, kccb_flags, 4);
+OFFSET_CHECK(struct rogue_fwif_kccb_cmd, cmd_data, 8);
+SIZE_CHECK(struct rogue_fwif_kccb_cmd, 88);
+
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, server_common_context_id, 0);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, reset_reason, 4);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, dm, 8);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, reset_job_ref, 12);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, flags, 16);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, pc_address, 24);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, fault_address, 32);
+SIZE_CHECK(struct rogue_fwif_fwccb_cmd_context_reset_data, 40);
+
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd_fw_pagefault_data, fw_fault_addr, 0);
+SIZE_CHECK(struct rogue_fwif_fwccb_cmd_fw_pagefault_data, 8);
+
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd, cmd_type, 0);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd, fwccb_flags, 4);
+OFFSET_CHECK(struct rogue_fwif_fwccb_cmd, cmd_data, 8);
+SIZE_CHECK(struct rogue_fwif_fwccb_cmd, 88);
+
+OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, cmd_type, 0);
+OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, cmd_size, 4);
+OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, ext_job_ref, 8);
+OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, int_job_ref, 12);
+OFFSET_CHECK(struct rogue_fwif_ccb_cmd_header, work_est_kick_data, 16);
+SIZE_CHECK(struct rogue_fwif_ccb_cmd_header, 40);
+
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, active_pm_latency_ms, 0);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, runtime_cfg_flags, 4);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, active_pm_latency_persistant, 8);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, core_clock_speed, 12);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, default_dusts_num_init, 16);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, phr_mode, 20);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, hcs_deadline_ms, 24);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, wdg_period_us, 28);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, osid_priority, 32);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, hwperf_buf_fw_addr, 64);
+OFFSET_CHECK(struct rogue_fwif_runtime_cfg, padding, 68);
+SIZE_CHECK(struct rogue_fwif_runtime_cfg, 72);
+
+OFFSET_CHECK(struct rogue_fwif_connection_ctl, connection_fw_state, 0);
+OFFSET_CHECK(struct rogue_fwif_connection_ctl, connection_os_state, 4);
+OFFSET_CHECK(struct rogue_fwif_connection_ctl, alive_fw_token, 8);
+OFFSET_CHECK(struct rogue_fwif_connection_ctl, alive_os_token, 12);
+SIZE_CHECK(struct rogue_fwif_connection_ctl, 16);
+
+OFFSET_CHECK(struct rogue_fwif_compchecks_bvnc, layout_version, 0);
+OFFSET_CHECK(struct rogue_fwif_compchecks_bvnc, bvnc, 8);
+SIZE_CHECK(struct rogue_fwif_compchecks_bvnc, 16);
+
+OFFSET_CHECK(struct rogue_fwif_init_options, os_count_support, 0);
+SIZE_CHECK(struct rogue_fwif_init_options, 8);
+
+OFFSET_CHECK(struct rogue_fwif_compchecks, hw_bvnc, 0);
+OFFSET_CHECK(struct rogue_fwif_compchecks, fw_bvnc, 16);
+OFFSET_CHECK(struct rogue_fwif_compchecks, fw_processor_version, 32);
+OFFSET_CHECK(struct rogue_fwif_compchecks, ddk_version, 36);
+OFFSET_CHECK(struct rogue_fwif_compchecks, ddk_build, 40);
+OFFSET_CHECK(struct rogue_fwif_compchecks, build_options, 44);
+OFFSET_CHECK(struct rogue_fwif_compchecks, init_options, 48);
+OFFSET_CHECK(struct rogue_fwif_compchecks, updated, 56);
+SIZE_CHECK(struct rogue_fwif_compchecks, 64);
+
+OFFSET_CHECK(struct rogue_fwif_osinit, kernel_ccbctl_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_osinit, kernel_ccb_fw_addr, 4);
+OFFSET_CHECK(struct rogue_fwif_osinit, kernel_ccb_rtn_slots_fw_addr, 8);
+OFFSET_CHECK(struct rogue_fwif_osinit, firmware_ccbctl_fw_addr, 12);
+OFFSET_CHECK(struct rogue_fwif_osinit, firmware_ccb_fw_addr, 16);
+OFFSET_CHECK(struct rogue_fwif_osinit, work_est_firmware_ccbctl_fw_addr, 20);
+OFFSET_CHECK(struct rogue_fwif_osinit, work_est_firmware_ccb_fw_addr, 24);
+OFFSET_CHECK(struct rogue_fwif_osinit, rogue_fwif_hwr_info_buf_ctl_fw_addr, 28);
+OFFSET_CHECK(struct rogue_fwif_osinit, hwr_debug_dump_limit, 32);
+OFFSET_CHECK(struct rogue_fwif_osinit, fw_os_data_fw_addr, 36);
+OFFSET_CHECK(struct rogue_fwif_osinit, rogue_comp_checks, 40);
+SIZE_CHECK(struct rogue_fwif_osinit, 104);
+
+OFFSET_CHECK(struct rogue_fwif_sigbuf_ctl, buffer_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_sigbuf_ctl, left_size_in_regs, 4);
+SIZE_CHECK(struct rogue_fwif_sigbuf_ctl, 8);
+
+OFFSET_CHECK(struct pdvfs_opp, volt, 0);
+OFFSET_CHECK(struct pdvfs_opp, freq, 4);
+SIZE_CHECK(struct pdvfs_opp, 8);
+
+OFFSET_CHECK(struct rogue_fwif_pdvfs_opp, opp_values, 0);
+OFFSET_CHECK(struct rogue_fwif_pdvfs_opp, min_opp_point, 128);
+OFFSET_CHECK(struct rogue_fwif_pdvfs_opp, max_opp_point, 132);
+SIZE_CHECK(struct rogue_fwif_pdvfs_opp, 136);
+
+OFFSET_CHECK(struct rogue_fwif_counter_dump_ctl, buffer_fw_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_counter_dump_ctl, size_in_dwords, 4);
+SIZE_CHECK(struct rogue_fwif_counter_dump_ctl, 8);
+
+OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_string, 0);
+OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_km_feature_flags, 24);
+OFFSET_CHECK(struct rogue_hwperf_bvnc, num_bvnc_blocks, 28);
+OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_gpu_cores, 30);
+OFFSET_CHECK(struct rogue_hwperf_bvnc, bvnc_blocks, 32);
+SIZE_CHECK(struct rogue_hwperf_bvnc, 160);
+
+OFFSET_CHECK(struct rogue_fwif_sysinit, fault_phys_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_sysinit, pds_exec_base, 8);
+OFFSET_CHECK(struct rogue_fwif_sysinit, usc_exec_base, 16);
+OFFSET_CHECK(struct rogue_fwif_sysinit, fbcdc_state_table_base, 24);
+OFFSET_CHECK(struct rogue_fwif_sysinit, fbcdc_large_state_table_base, 32);
+OFFSET_CHECK(struct rogue_fwif_sysinit, texture_heap_base, 40);
+OFFSET_CHECK(struct rogue_fwif_sysinit, hw_perf_filter, 48);
+OFFSET_CHECK(struct rogue_fwif_sysinit, slc3_fence_dev_addr, 56);
+OFFSET_CHECK(struct rogue_fwif_sysinit, tpu_trilinear_frac_mask, 64);
+OFFSET_CHECK(struct rogue_fwif_sysinit, sigbuf_ctl, 80);
+OFFSET_CHECK(struct rogue_fwif_sysinit, pdvfs_opp_info, 152);
+OFFSET_CHECK(struct rogue_fwif_sysinit, coremem_data_store, 288);
+OFFSET_CHECK(struct rogue_fwif_sysinit, counter_dump_ctl, 304);
+OFFSET_CHECK(struct rogue_fwif_sysinit, filter_flags, 312);
+OFFSET_CHECK(struct rogue_fwif_sysinit, runtime_cfg_fw_addr, 316);
+OFFSET_CHECK(struct rogue_fwif_sysinit, trace_buf_ctl_fw_addr, 320);
+OFFSET_CHECK(struct rogue_fwif_sysinit, fw_sys_data_fw_addr, 324);
+OFFSET_CHECK(struct rogue_fwif_sysinit, gpu_util_fw_cb_ctl_fw_addr, 328);
+OFFSET_CHECK(struct rogue_fwif_sysinit, reg_cfg_fw_addr, 332);
+OFFSET_CHECK(struct rogue_fwif_sysinit, hwperf_ctl_fw_addr, 336);
+OFFSET_CHECK(struct rogue_fwif_sysinit, align_checks, 340);
+OFFSET_CHECK(struct rogue_fwif_sysinit, initial_core_clock_speed, 344);
+OFFSET_CHECK(struct rogue_fwif_sysinit, active_pm_latency_ms, 348);
+OFFSET_CHECK(struct rogue_fwif_sysinit, firmware_started, 352);
+OFFSET_CHECK(struct rogue_fwif_sysinit, marker_val, 356);
+OFFSET_CHECK(struct rogue_fwif_sysinit, firmware_started_timestamp, 360);
+OFFSET_CHECK(struct rogue_fwif_sysinit, jones_disable_mask, 364);
+OFFSET_CHECK(struct rogue_fwif_sysinit, firmware_perf, 368);
+OFFSET_CHECK(struct rogue_fwif_sysinit, core_clock_rate_fw_addr, 372);
+OFFSET_CHECK(struct rogue_fwif_sysinit, gpio_validation_mode, 376);
+OFFSET_CHECK(struct rogue_fwif_sysinit, bvnc_km_feature_flags, 380);
+OFFSET_CHECK(struct rogue_fwif_sysinit, tfbc_compression_control, 540);
+SIZE_CHECK(struct rogue_fwif_sysinit, 544);
+
+OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, time_corr, 0);
+OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, time_corr_seq_count, 10240);
+OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, gpu_util_flags, 10244);
+OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, last_word, 10248);
+OFFSET_CHECK(struct rogue_fwif_gpu_util_fwcb, stats_counters, 10256);
+SIZE_CHECK(struct rogue_fwif_gpu_util_fwcb, 10280);
+
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, render_target_index, 0);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, current_render_target, 4);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, active_render_targets, 8);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, cumul_active_render_targets, 12);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, valid_render_targets_fw_addr, 16);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, rta_num_partial_renders_fw_addr, 20);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, max_rts, 24);
+OFFSET_CHECK(struct rogue_fwif_rta_ctl, rta_ctl_flags, 28);
+SIZE_CHECK(struct rogue_fwif_rta_ctl, 32);
+
+OFFSET_CHECK(struct rogue_fwif_freelist, freelist_dev_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_freelist, current_dev_addr, 8);
+OFFSET_CHECK(struct rogue_fwif_freelist, current_stack_top, 16);
+OFFSET_CHECK(struct rogue_fwif_freelist, max_pages, 20);
+OFFSET_CHECK(struct rogue_fwif_freelist, grow_pages, 24);
+OFFSET_CHECK(struct rogue_fwif_freelist, current_pages, 28);
+OFFSET_CHECK(struct rogue_fwif_freelist, allocated_page_count, 32);
+OFFSET_CHECK(struct rogue_fwif_freelist, allocated_mmu_page_count, 36);
+OFFSET_CHECK(struct rogue_fwif_freelist, freelist_id, 40);
+OFFSET_CHECK(struct rogue_fwif_freelist, grow_pending, 44);
+OFFSET_CHECK(struct rogue_fwif_freelist, ready_pages, 48);
+OFFSET_CHECK(struct rogue_fwif_freelist, freelist_flags, 52);
+OFFSET_CHECK(struct rogue_fwif_freelist, pm_global_pb, 56);
+SIZE_CHECK(struct rogue_fwif_freelist, 64);
+
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, geom_caches_need_zeroing, 0);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, screen_pixel_max, 4);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, multi_sample_ctl, 8);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, flipped_multi_sample_ctl, 16);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, tpc_stride, 24);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, tpc_size, 28);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, te_screen, 32);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, mtile_stride, 36);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, teaa, 40);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, te_mtile1, 44);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, te_mtile2, 48);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_lower_x, 52);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_lower_y, 56);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_upper_x, 60);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_upper_y, 64);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_scale_x, 68);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_merge_scale_y, 72);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, rgn_header_size, 76);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata_common, isp_mtile_size, 80);
+SIZE_CHECK(struct rogue_fwif_hwrtdata_common, 88);
+
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, pm_mlist_dev_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, vce_cat_base, 8);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, vce_last_cat_base, 40);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, te_cat_base, 72);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, te_last_cat_base, 104);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, alist_cat_base, 136);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, alist_last_cat_base, 144);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, pm_alist_stack_pointer, 152);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, pm_mlist_stack_pointer, 160);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, hwrt_data_common_fw_addr, 164);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, hwrt_data_flags, 168);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, state, 172);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, freelists_fw_addr, 176);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, freelist_hwr_snapshot, 188);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, vheap_table_dev_addr, 200);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, rta_ctl, 208);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, tail_ptrs_dev_addr, 240);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, macrotile_array_dev_addr, 248);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, rgn_header_dev_addr, 256);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, rtc_dev_addr, 264);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, owner_geom_not_used_by_host, 272);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, geom_caches_need_zeroing, 276);
+OFFSET_CHECK(struct rogue_fwif_hwrtdata, cleanup_state, 320);
+SIZE_CHECK(struct rogue_fwif_hwrtdata, 384);
+
+OFFSET_CHECK(struct rogue_fwif_sync_checkpoint, state, 0);
+OFFSET_CHECK(struct rogue_fwif_sync_checkpoint, fw_ref_count, 4);
+SIZE_CHECK(struct rogue_fwif_sync_checkpoint, 8);
+
+#endif /* PVR_ROGUE_FWIF_CHECK_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h
new file mode 100644
index 0000000000000..6e224400083a2
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client.h
@@ -0,0 +1,373 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_CLIENT_H
+#define PVR_ROGUE_FWIF_CLIENT_H
+
+#include <linux/bits.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include "pvr_rogue_fwif_shared.h"
+
+/*
+ * Page size used for Parameter Management.
+ */
+#define ROGUE_PM_PAGE_SIZE SZ_4K
+
+/*
+ * Minimum/Maximum PB size.
+ *
+ * Base page size is dependent on core:
+ *   S6/S6XT/S7               = 50 pages
+ *   S8XE                     = 40 pages
+ *   S8XE with BRN66011 fixed = 25 pages
+ *
+ * Minimum PB = Base Pages + (NUM_TE_PIPES-1)*16K + (NUM_VCE_PIPES-1)*64K +
+ *              IF_PM_PREALLOC(NUM_TE_PIPES*16K + NUM_VCE_PIPES*16K)
+ *
+ * Maximum PB size must ensure that no PM address space can be fully used,
+ * because if the full address space was used it would wrap and corrupt itself.
+ * Since there are two freelists (local is always minimum sized) this can be
+ * described as following three conditions being met:
+ *
+ *   (Minimum PB + Maximum PB)  <  ALIST PM address space size (16GB)
+ *   (Minimum PB + Maximum PB)  <  TE PM address space size (16GB) / NUM_TE_PIPES
+ *   (Minimum PB + Maximum PB)  <  VCE PM address space size (16GB) / NUM_VCE_PIPES
+ *
+ * Since the max of NUM_TE_PIPES and NUM_VCE_PIPES is 4, we have a hard limit
+ * of 4GB minus the Minimum PB. For convenience we take the smaller power-of-2
+ * value of 2GB. This is far more than any current applications use.
+ */
+#define ROGUE_PM_MAX_FREELIST_SIZE SZ_2G
+
+/*
+ * Flags supported by the geometry DM command i.e. &struct rogue_fwif_cmd_geom.
+ */
+
+#define ROGUE_GEOM_FLAGS_FIRSTKICK BIT_MASK(0)
+#define ROGUE_GEOM_FLAGS_LASTKICK BIT_MASK(1)
+/* Use single core in a multi core setup. */
+#define ROGUE_GEOM_FLAGS_SINGLE_CORE BIT_MASK(3)
+
+/*
+ * Flags supported by the fragment DM command i.e. &struct rogue_fwif_cmd_frag.
+ */
+
+/* Use single core in a multi core setup. */
+#define ROGUE_FRAG_FLAGS_SINGLE_CORE BIT_MASK(3)
+/* Indicates whether this render produces visibility results. */
+#define ROGUE_FRAG_FLAGS_GET_VIS_RESULTS BIT_MASK(5)
+/* Indicates whether a depth buffer is present. */
+#define ROGUE_FRAG_FLAGS_DEPTHBUFFER BIT_MASK(7)
+/* Indicates whether a stencil buffer is present. */
+#define ROGUE_FRAG_FLAGS_STENCILBUFFER BIT_MASK(8)
+/* Disable pixel merging for this render. */
+#define ROGUE_FRAG_FLAGS_DISABLE_PIXELMERGE BIT_MASK(15)
+/* Indicates whether a scratch buffer is present. */
+#define ROGUE_FRAG_FLAGS_SCRATCHBUFFER BIT_MASK(19)
+/* Disallow compute overlapped with this render. */
+#define ROGUE_FRAG_FLAGS_PREVENT_CDM_OVERLAP BIT_MASK(26)
+
+/*
+ * Flags supported by the compute DM command i.e. &struct rogue_fwif_cmd_compute.
+ */
+
+#define ROGUE_COMPUTE_FLAG_PREVENT_ALL_OVERLAP BIT_MASK(2)
+/*!< Use single core in a multi core setup. */
+#define ROGUE_COMPUTE_FLAG_SINGLE_CORE BIT_MASK(5)
+
+/*
+ * Flags supported by the transfer DM command i.e. &struct rogue_fwif_cmd_transfer.
+ */
+
+/*!< Use single core in a multi core setup. */
+#define ROGUE_TRANSFER_FLAGS_SINGLE_CORE BIT_MASK(1)
+
+/*
+ ************************************************
+ * Parameter/HWRTData control structures.
+ ************************************************
+ */
+
+/*
+ * Configuration registers which need to be loaded by the firmware before a geometry
+ * job can be started.
+ */
+struct rogue_fwif_geom_regs {
+	u64 vdm_ctrl_stream_base;
+	u64 tpu_border_colour_table;
+
+	/* Only used when feature VDM_DRAWINDIRECT present. */
+	u64 vdm_draw_indirect0;
+	/* Only used when feature VDM_DRAWINDIRECT present. */
+	u32 vdm_draw_indirect1;
+
+	u32 ppp_ctrl;
+	u32 te_psg;
+	/* Only used when BRN 49927 present. */
+	u32 tpu;
+
+	u32 vdm_context_resume_task0_size;
+	/* Only used when feature VDM_OBJECT_LEVEL_LLS present. */
+	u32 vdm_context_resume_task3_size;
+
+	/* Only used when BRN 56279 or BRN 67381 present. */
+	u32 pds_ctrl;
+
+	u32 view_idx;
+
+	/* Only used when feature TESSELLATION present */
+	u32 pds_coeff_free_prog;
+
+	u32 padding;
+};
+
+/* Only used when BRN 44455 or BRN 63027 present. */
+struct rogue_fwif_dummy_rgnhdr_init_geom_regs {
+	u64 te_psgregion_addr;
+};
+
+/*
+ * Represents a geometry command that can be used to tile a whole scene's objects as
+ * per TA behavior.
+ */
+struct rogue_fwif_cmd_geom {
+	/*
+	 * rogue_fwif_cmd_geom_frag_shared field must always be at the beginning of the
+	 * struct.
+	 *
+	 * The command struct (rogue_fwif_cmd_geom) is shared between Client and
+	 * Firmware. Kernel is unable to perform read/write operations on the
+	 * command struct, the SHARED region is the only exception from this rule.
+	 * This region must be the first member so that Kernel can easily access it.
+	 * For more info, see rogue_fwif_cmd_geom_frag_shared definition.
+	 */
+	struct rogue_fwif_cmd_geom_frag_shared cmd_shared;
+
+	struct rogue_fwif_geom_regs regs __aligned(8);
+	u32 flags __aligned(8);
+
+	/*
+	 * Holds the geometry/fragment fence value to allow the fragment partial render command
+	 * to go through.
+	 */
+	struct rogue_fwif_ufo partial_render_geom_frag_fence;
+
+	/* Only used when BRN 44455 or BRN 63027 present. */
+	struct rogue_fwif_dummy_rgnhdr_init_geom_regs dummy_rgnhdr_init_geom_regs __aligned(8);
+
+	/* Only used when BRN 61484 or BRN 66333 present. */
+	u32 brn61484_66333_live_rt;
+
+	u32 padding;
+};
+
+/*
+ * Configuration registers which need to be loaded by the firmware before ISP
+ * can be started.
+ */
+struct rogue_fwif_frag_regs {
+	u32 usc_pixel_output_ctrl;
+
+#define ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL 8U
+	u32 usc_clear_register[ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL];
+
+	u32 isp_bgobjdepth;
+	u32 isp_bgobjvals;
+	u32 isp_aa;
+	/* Only used when feature S7_TOP_INFRASTRUCTURE present. */
+	u32 isp_xtp_pipe_enable;
+
+	u32 isp_ctl;
+
+	/* Only used when BRN 49927 present. */
+	u32 tpu;
+
+	u32 event_pixel_pds_info;
+
+	/* Only used when feature CLUSTER_GROUPING present. */
+	u32 pixel_phantom;
+
+	u32 view_idx;
+
+	u32 event_pixel_pds_data;
+
+	/* Only used when BRN 65101 present. */
+	u32 brn65101_event_pixel_pds_data;
+
+	/* Only used when feature GPU_MULTICORE_SUPPORT or BRN 47217 present. */
+	u32 isp_oclqry_stride;
+
+	/* Only used when feature ZLS_SUBTILE present. */
+	u32 isp_zls_pixels;
+
+	/* Only used when feature ISP_ZLS_D24_S8_PACKING_OGL_MODE present. */
+	u32 rgx_cr_blackpearl_fix;
+
+	/* All values below the ALIGN(8) must be 64 bit. */
+	aligned_u64 isp_scissor_base;
+	u64 isp_dbias_base;
+	u64 isp_oclqry_base;
+	u64 isp_zlsctl;
+	u64 isp_zload_store_base;
+	u64 isp_stencil_load_store_base;
+
+	/*
+	 * Only used when feature FBCDC_ALGORITHM present and value < 3 or feature
+	 * FB_CDC_V4 present. Additionally, BRNs 48754, 60227, 72310 and 72311 must
+	 * not be present.
+	 */
+	u64 fb_cdc_zls;
+
+#define ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS 3U
+	u64 pbe_word[8U][ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS];
+	u64 tpu_border_colour_table;
+	u64 pds_bgnd[3U];
+
+	/* Only used when BRN 65101 present. */
+	u64 pds_bgnd_brn65101[3U];
+
+	u64 pds_pr_bgnd[3U];
+
+	/* Only used when BRN 62850 or 62865 present. */
+	u64 isp_dummy_stencil_store_base;
+
+	/* Only used when BRN 66193 present. */
+	u64 isp_dummy_depth_store_base;
+
+	/* Only used when BRN 67182 present. */
+	u32 rgnhdr_single_rt_size;
+	/* Only used when BRN 67182 present. */
+	u32 rgnhdr_scratch_offset;
+};
+
+struct rogue_fwif_cmd_frag {
+	struct rogue_fwif_cmd_geom_frag_shared cmd_shared __aligned(8);
+
+	struct rogue_fwif_frag_regs regs __aligned(8);
+	/* command control flags. */
+	u32 flags;
+	/* Stride IN BYTES for Z-Buffer in case of RTAs. */
+	u32 zls_stride;
+	/* Stride IN BYTES for S-Buffer in case of RTAs. */
+	u32 sls_stride;
+
+	/* Only used if feature GPU_MULTICORE_SUPPORT present. */
+	u32 execute_count;
+};
+
+/*
+ * Configuration registers which need to be loaded by the firmware before CDM
+ * can be started.
+ */
+struct rogue_fwif_compute_regs {
+	u64 tpu_border_colour_table;
+
+	/* Only used when feature CDM_USER_MODE_QUEUE present. */
+	u64 cdm_cb_queue;
+
+	/* Only used when feature CDM_USER_MODE_QUEUE present. */
+	u64 cdm_cb_base;
+	/* Only used when feature CDM_USER_MODE_QUEUE present. */
+	u64 cdm_cb;
+
+	/* Only used when feature CDM_USER_MODE_QUEUE is not present. */
+	u64 cdm_ctrl_stream_base;
+
+	u64 cdm_context_state_base_addr;
+
+	/* Only used when BRN 49927 is present. */
+	u32 tpu;
+	u32 cdm_resume_pds1;
+
+	/* Only used when feature COMPUTE_MORTON_CAPABLE present. */
+	u32 cdm_item;
+
+	/* Only used when feature CLUSTER_GROUPING present. */
+	u32 compute_cluster;
+
+	/* Only used when feature TPU_DM_GLOBAL_REGISTERS present. */
+	u32 tpu_tag_cdm_ctrl;
+
+	u32 padding;
+};
+
+struct rogue_fwif_cmd_compute {
+	/* Common command attributes */
+	struct rogue_fwif_cmd_common common __aligned(8);
+
+	/* CDM registers */
+	struct rogue_fwif_compute_regs regs;
+
+	/* Control flags */
+	u32 flags __aligned(8);
+
+	/* Only used when feature UNIFIED_STORE_VIRTUAL_PARTITIONING present. */
+	u32 num_temp_regions;
+
+	/* Only used when feature CDM_USER_MODE_QUEUE present. */
+	u32 stream_start_offset;
+
+	/* Only used when feature GPU_MULTICORE_SUPPORT present. */
+	u32 execute_count;
+};
+
+struct rogue_fwif_transfer_regs {
+	/*
+	 * All 32 bit values should be added in the top section. This then requires only a
+	 * single RGXFW_ALIGN to align all the 64 bit values in the second section.
+	 */
+	u32 isp_bgobjvals;
+
+	u32 usc_pixel_output_ctrl;
+	u32 usc_clear_register0;
+	u32 usc_clear_register1;
+	u32 usc_clear_register2;
+	u32 usc_clear_register3;
+
+	u32 isp_mtile_size;
+	u32 isp_render_origin;
+	u32 isp_ctl;
+
+	/* Only used when feature S7_TOP_INFRASTRUCTURE present. */
+	u32 isp_xtp_pipe_enable;
+	u32 isp_aa;
+
+	u32 event_pixel_pds_info;
+
+	u32 event_pixel_pds_code;
+	u32 event_pixel_pds_data;
+
+	u32 isp_render;
+	u32 isp_rgn;
+
+	/* Only used when feature GPU_MULTICORE_SUPPORT present. */
+	u32 frag_screen;
+
+	/* All values below the aligned_u64 must be 64 bit. */
+	aligned_u64 pds_bgnd0_base;
+	u64 pds_bgnd1_base;
+	u64 pds_bgnd3_sizeinfo;
+
+	u64 isp_mtile_base;
+#define ROGUE_PBE_WORDS_REQUIRED_FOR_TQS 3
+	/* TQ_MAX_RENDER_TARGETS * PBE_STATE_SIZE */
+	u64 pbe_wordx_mrty[3U * ROGUE_PBE_WORDS_REQUIRED_FOR_TQS];
+};
+
+struct rogue_fwif_cmd_transfer {
+	/* Common command attributes */
+	struct rogue_fwif_cmd_common common __aligned(8);
+
+	struct rogue_fwif_transfer_regs regs __aligned(8);
+
+	u32 flags;
+
+	u32 padding;
+};
+
+#include "pvr_rogue_fwif_client_check.h"
+
+#endif /* PVR_ROGUE_FWIF_CLIENT_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h
new file mode 100644
index 0000000000000..54aa4474163e6
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_client_check.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_CLIENT_CHECK_H
+#define PVR_ROGUE_FWIF_CLIENT_CHECK_H
+
+#include <linux/build_bug.h>
+
+#define OFFSET_CHECK(type, member, offset) \
+	static_assert(offsetof(type, member) == (offset), \
+		      "offsetof(" #type ", " #member ") incorrect")
+
+#define SIZE_CHECK(type, size) \
+	static_assert(sizeof(type) == (size), #type " is incorrect size")
+
+OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_ctrl_stream_base, 0);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, tpu_border_colour_table, 8);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_draw_indirect0, 16);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_draw_indirect1, 24);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, ppp_ctrl, 28);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, te_psg, 32);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, tpu, 36);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_context_resume_task0_size, 40);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, vdm_context_resume_task3_size, 44);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, pds_ctrl, 48);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, view_idx, 52);
+OFFSET_CHECK(struct rogue_fwif_geom_regs, pds_coeff_free_prog, 56);
+SIZE_CHECK(struct rogue_fwif_geom_regs, 64);
+
+OFFSET_CHECK(struct rogue_fwif_dummy_rgnhdr_init_geom_regs, te_psgregion_addr, 0);
+SIZE_CHECK(struct rogue_fwif_dummy_rgnhdr_init_geom_regs, 8);
+
+OFFSET_CHECK(struct rogue_fwif_cmd_geom, cmd_shared, 0);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom, regs, 16);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom, flags, 80);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom, partial_render_geom_frag_fence, 84);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom, dummy_rgnhdr_init_geom_regs, 96);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom, brn61484_66333_live_rt, 104);
+SIZE_CHECK(struct rogue_fwif_cmd_geom, 112);
+
+OFFSET_CHECK(struct rogue_fwif_frag_regs, usc_pixel_output_ctrl, 0);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, usc_clear_register, 4);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_bgobjdepth, 36);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_bgobjvals, 40);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_aa, 44);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_xtp_pipe_enable, 48);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_ctl, 52);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, tpu, 56);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, event_pixel_pds_info, 60);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, pixel_phantom, 64);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, view_idx, 68);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, event_pixel_pds_data, 72);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, brn65101_event_pixel_pds_data, 76);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_oclqry_stride, 80);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_zls_pixels, 84);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, rgx_cr_blackpearl_fix, 88);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_scissor_base, 96);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_dbias_base, 104);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_oclqry_base, 112);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_zlsctl, 120);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_zload_store_base, 128);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_stencil_load_store_base, 136);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, fb_cdc_zls, 144);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, pbe_word, 152);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, tpu_border_colour_table, 344);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, pds_bgnd, 352);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, pds_bgnd_brn65101, 376);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, pds_pr_bgnd, 400);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_dummy_stencil_store_base, 424);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, isp_dummy_depth_store_base, 432);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, rgnhdr_single_rt_size, 440);
+OFFSET_CHECK(struct rogue_fwif_frag_regs, rgnhdr_scratch_offset, 444);
+SIZE_CHECK(struct rogue_fwif_frag_regs, 448);
+
+OFFSET_CHECK(struct rogue_fwif_cmd_frag, cmd_shared, 0);
+OFFSET_CHECK(struct rogue_fwif_cmd_frag, regs, 16);
+OFFSET_CHECK(struct rogue_fwif_cmd_frag, flags, 464);
+OFFSET_CHECK(struct rogue_fwif_cmd_frag, zls_stride, 468);
+OFFSET_CHECK(struct rogue_fwif_cmd_frag, sls_stride, 472);
+OFFSET_CHECK(struct rogue_fwif_cmd_frag, execute_count, 476);
+SIZE_CHECK(struct rogue_fwif_cmd_frag, 480);
+
+OFFSET_CHECK(struct rogue_fwif_compute_regs, tpu_border_colour_table, 0);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_cb_queue, 8);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_cb_base, 16);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_cb, 24);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_ctrl_stream_base, 32);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_context_state_base_addr, 40);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, tpu, 48);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_resume_pds1, 52);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, cdm_item, 56);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, compute_cluster, 60);
+OFFSET_CHECK(struct rogue_fwif_compute_regs, tpu_tag_cdm_ctrl, 64);
+SIZE_CHECK(struct rogue_fwif_compute_regs, 72);
+
+OFFSET_CHECK(struct rogue_fwif_cmd_compute, common, 0);
+OFFSET_CHECK(struct rogue_fwif_cmd_compute, regs, 8);
+OFFSET_CHECK(struct rogue_fwif_cmd_compute, flags, 80);
+OFFSET_CHECK(struct rogue_fwif_cmd_compute, num_temp_regions, 84);
+OFFSET_CHECK(struct rogue_fwif_cmd_compute, stream_start_offset, 88);
+OFFSET_CHECK(struct rogue_fwif_cmd_compute, execute_count, 92);
+SIZE_CHECK(struct rogue_fwif_cmd_compute, 96);
+
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_bgobjvals, 0);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_pixel_output_ctrl, 4);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register0, 8);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register1, 12);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register2, 16);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, usc_clear_register3, 20);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_mtile_size, 24);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_render_origin, 28);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_ctl, 32);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_xtp_pipe_enable, 36);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_aa, 40);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, event_pixel_pds_info, 44);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, event_pixel_pds_code, 48);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, event_pixel_pds_data, 52);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_render, 56);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_rgn, 60);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, frag_screen, 64);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, pds_bgnd0_base, 72);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, pds_bgnd1_base, 80);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, pds_bgnd3_sizeinfo, 88);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, isp_mtile_base, 96);
+OFFSET_CHECK(struct rogue_fwif_transfer_regs, pbe_wordx_mrty, 104);
+SIZE_CHECK(struct rogue_fwif_transfer_regs, 176);
+
+OFFSET_CHECK(struct rogue_fwif_cmd_transfer, common, 0);
+OFFSET_CHECK(struct rogue_fwif_cmd_transfer, regs, 8);
+OFFSET_CHECK(struct rogue_fwif_cmd_transfer, flags, 184);
+SIZE_CHECK(struct rogue_fwif_cmd_transfer, 192);
+
+#endif /* PVR_ROGUE_FWIF_CLIENT_CHECK_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h
new file mode 100644
index 0000000000000..6ebb95ba98a60
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_common.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_COMMON_H
+#define PVR_ROGUE_FWIF_COMMON_H
+
+#include <linux/build_bug.h>
+
+/*
+ * This macro represents a mask of LSBs that must be zero on data structure
+ * sizes and offsets to ensure they are 8-byte granular on types shared between
+ * the FW and host driver.
+ */
+#define PVR_FW_ALIGNMENT_LSB 7U
+
+/* Macro to test structure size alignment. */
+#define PVR_FW_STRUCT_SIZE_ASSERT(_a)                            \
+	static_assert((sizeof(_a) & PVR_FW_ALIGNMENT_LSB) == 0U, \
+		      "Size of " #_a " is not properly aligned")
+
+/* The master definition for data masters known to the firmware. */
+
+#define PVR_FWIF_DM_GP (0)
+/* Either TDM or 2D DM is present. */
+/* When the 'tla' feature is present in the hw (as per @pvr_device_features). */
+#define PVR_FWIF_DM_2D (1)
+/*
+ * When the 'fastrender_dm' feature is present in the hw (as per
+ * @pvr_device_features).
+ */
+#define PVR_FWIF_DM_TDM (1)
+
+#define PVR_FWIF_DM_GEOM (2)
+#define PVR_FWIF_DM_FRAG (3)
+#define PVR_FWIF_DM_CDM (4)
+#define PVR_FWIF_DM_RAY (5)
+#define PVR_FWIF_DM_GEOM2 (6)
+#define PVR_FWIF_DM_GEOM3 (7)
+#define PVR_FWIF_DM_GEOM4 (8)
+
+#define PVR_FWIF_DM_LAST PVR_FWIF_DM_GEOM4
+
+/* Maximum number of DM in use: GP, 2D/TDM, GEOM, 3D, CDM, RAY, GEOM2, GEOM3, GEOM4 */
+#define PVR_FWIF_DM_MAX (PVR_FWIF_DM_LAST + 1U)
+
+/* GPU Utilisation states */
+#define PVR_FWIF_GPU_UTIL_STATE_IDLE 0U
+#define PVR_FWIF_GPU_UTIL_STATE_ACTIVE 1U
+#define PVR_FWIF_GPU_UTIL_STATE_BLOCKED 2U
+#define PVR_FWIF_GPU_UTIL_STATE_NUM 3U
+#define PVR_FWIF_GPU_UTIL_STATE_MASK 0x3ULL
+
+/*
+ * Maximum amount of register writes that can be done by the register
+ * programmer (FW or META DMA). This is not a HW limitation, it is only
+ * a protection against malformed inputs to the register programmer.
+ */
+#define PVR_MAX_NUM_REGISTER_PROGRAMMER_WRITES 128U
+
+#endif /* PVR_ROGUE_FWIF_COMMON_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h
new file mode 100644
index 0000000000000..168277bce9485
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_dev_info.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef __PVR_ROGUE_FWIF_DEV_INFO_H__
+#define __PVR_ROGUE_FWIF_DEV_INFO_H__
+
+enum {
+	PVR_FW_HAS_BRN_44079 = 0,
+	PVR_FW_HAS_BRN_47217,
+	PVR_FW_HAS_BRN_48492,
+	PVR_FW_HAS_BRN_48545,
+	PVR_FW_HAS_BRN_49927,
+	PVR_FW_HAS_BRN_50767,
+	PVR_FW_HAS_BRN_51764,
+	PVR_FW_HAS_BRN_62269,
+	PVR_FW_HAS_BRN_63142,
+	PVR_FW_HAS_BRN_63553,
+	PVR_FW_HAS_BRN_66011,
+	PVR_FW_HAS_BRN_71242,
+
+	PVR_FW_HAS_BRN_MAX
+};
+
+enum {
+	PVR_FW_HAS_ERN_35421 = 0,
+	PVR_FW_HAS_ERN_38020,
+	PVR_FW_HAS_ERN_38748,
+	PVR_FW_HAS_ERN_42064,
+	PVR_FW_HAS_ERN_42290,
+	PVR_FW_HAS_ERN_42606,
+	PVR_FW_HAS_ERN_47025,
+	PVR_FW_HAS_ERN_57596,
+
+	PVR_FW_HAS_ERN_MAX
+};
+
+enum {
+	PVR_FW_HAS_FEATURE_AXI_ACELITE = 0,
+	PVR_FW_HAS_FEATURE_CDM_CONTROL_STREAM_FORMAT,
+	PVR_FW_HAS_FEATURE_CLUSTER_GROUPING,
+	PVR_FW_HAS_FEATURE_COMMON_STORE_SIZE_IN_DWORDS,
+	PVR_FW_HAS_FEATURE_COMPUTE,
+	PVR_FW_HAS_FEATURE_COMPUTE_MORTON_CAPABLE,
+	PVR_FW_HAS_FEATURE_COMPUTE_OVERLAP,
+	PVR_FW_HAS_FEATURE_COREID_PER_OS,
+	PVR_FW_HAS_FEATURE_DYNAMIC_DUST_POWER,
+	PVR_FW_HAS_FEATURE_ECC_RAMS,
+	PVR_FW_HAS_FEATURE_FBCDC,
+	PVR_FW_HAS_FEATURE_FBCDC_ALGORITHM,
+	PVR_FW_HAS_FEATURE_FBCDC_ARCHITECTURE,
+	PVR_FW_HAS_FEATURE_FBC_MAX_DEFAULT_DESCRIPTORS,
+	PVR_FW_HAS_FEATURE_FBC_MAX_LARGE_DESCRIPTORS,
+	PVR_FW_HAS_FEATURE_FB_CDC_V4,
+	PVR_FW_HAS_FEATURE_GPU_MULTICORE_SUPPORT,
+	PVR_FW_HAS_FEATURE_GPU_VIRTUALISATION,
+	PVR_FW_HAS_FEATURE_GS_RTA_SUPPORT,
+	PVR_FW_HAS_FEATURE_IRQ_PER_OS,
+	PVR_FW_HAS_FEATURE_ISP_MAX_TILES_IN_FLIGHT,
+	PVR_FW_HAS_FEATURE_ISP_SAMPLES_PER_PIXEL,
+	PVR_FW_HAS_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE,
+	PVR_FW_HAS_FEATURE_LAYOUT_MARS,
+	PVR_FW_HAS_FEATURE_MAX_PARTITIONS,
+	PVR_FW_HAS_FEATURE_META,
+	PVR_FW_HAS_FEATURE_META_COREMEM_SIZE,
+	PVR_FW_HAS_FEATURE_MIPS,
+	PVR_FW_HAS_FEATURE_NUM_CLUSTERS,
+	PVR_FW_HAS_FEATURE_NUM_ISP_IPP_PIPES,
+	PVR_FW_HAS_FEATURE_NUM_OSIDS,
+	PVR_FW_HAS_FEATURE_NUM_RASTER_PIPES,
+	PVR_FW_HAS_FEATURE_PBE2_IN_XE,
+	PVR_FW_HAS_FEATURE_PBVNC_COREID_REG,
+	PVR_FW_HAS_FEATURE_PERFBUS,
+	PVR_FW_HAS_FEATURE_PERF_COUNTER_BATCH,
+	PVR_FW_HAS_FEATURE_PHYS_BUS_WIDTH,
+	PVR_FW_HAS_FEATURE_RISCV_FW_PROCESSOR,
+	PVR_FW_HAS_FEATURE_ROGUEXE,
+	PVR_FW_HAS_FEATURE_S7_TOP_INFRASTRUCTURE,
+	PVR_FW_HAS_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT,
+	PVR_FW_HAS_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT_V2,
+	PVR_FW_HAS_FEATURE_SIMPLE_PARAMETER_FORMAT_VERSION,
+	PVR_FW_HAS_FEATURE_SLC_BANKS,
+	PVR_FW_HAS_FEATURE_SLC_CACHE_LINE_SIZE_BITS,
+	PVR_FW_HAS_FEATURE_SLC_SIZE_CONFIGURABLE,
+	PVR_FW_HAS_FEATURE_SLC_SIZE_IN_KILOBYTES,
+	PVR_FW_HAS_FEATURE_SOC_TIMER,
+	PVR_FW_HAS_FEATURE_SYS_BUS_SECURE_RESET,
+	PVR_FW_HAS_FEATURE_TESSELLATION,
+	PVR_FW_HAS_FEATURE_TILE_REGION_PROTECTION,
+	PVR_FW_HAS_FEATURE_TILE_SIZE_X,
+	PVR_FW_HAS_FEATURE_TILE_SIZE_Y,
+	PVR_FW_HAS_FEATURE_TLA,
+	PVR_FW_HAS_FEATURE_TPU_CEM_DATAMASTER_GLOBAL_REGISTERS,
+	PVR_FW_HAS_FEATURE_TPU_DM_GLOBAL_REGISTERS,
+	PVR_FW_HAS_FEATURE_TPU_FILTERING_MODE_CONTROL,
+	PVR_FW_HAS_FEATURE_USC_MIN_OUTPUT_REGISTERS_PER_PIX,
+	PVR_FW_HAS_FEATURE_VDM_DRAWINDIRECT,
+	PVR_FW_HAS_FEATURE_VDM_OBJECT_LEVEL_LLS,
+	PVR_FW_HAS_FEATURE_VIRTUAL_ADDRESS_SPACE_BITS,
+	PVR_FW_HAS_FEATURE_WATCHDOG_TIMER,
+	PVR_FW_HAS_FEATURE_WORKGROUP_PROTECTION,
+	PVR_FW_HAS_FEATURE_XE_ARCHITECTURE,
+	PVR_FW_HAS_FEATURE_XE_MEMORY_HIERARCHY,
+	PVR_FW_HAS_FEATURE_XE_TPU2,
+	PVR_FW_HAS_FEATURE_XPU_MAX_REGBANKS_ADDR_WIDTH,
+	PVR_FW_HAS_FEATURE_XPU_MAX_SLAVES,
+	PVR_FW_HAS_FEATURE_XPU_REGISTER_BROADCAST,
+	PVR_FW_HAS_FEATURE_XT_TOP_INFRASTRUCTURE,
+	PVR_FW_HAS_FEATURE_ZLS_SUBTILE,
+
+	PVR_FW_HAS_FEATURE_MAX
+};
+
+#endif /* __PVR_ROGUE_FWIF_DEV_INFO_H__ */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h
new file mode 100644
index 0000000000000..1db1f4c532bc6
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_resetframework.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_RESETFRAMEWORK_H
+#define PVR_ROGUE_FWIF_RESETFRAMEWORK_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#include "pvr_rogue_fwif_shared.h"
+
+struct rogue_fwif_rf_registers {
+	union {
+		u64 cdmreg_cdm_cb_base;
+		u64 cdmreg_cdm_ctrl_stream_base;
+	};
+	u64 cdmreg_cdm_cb_queue;
+	u64 cdmreg_cdm_cb;
+};
+
+struct rogue_fwif_rf_cmd {
+	/* THIS MUST BE THE LAST MEMBER OF THE CONTAINING STRUCTURE */
+	struct rogue_fwif_rf_registers fw_registers __aligned(8);
+};
+
+#define ROGUE_FWIF_RF_CMD_SIZE sizeof(struct rogue_fwif_rf_cmd)
+
+#endif /* PVR_ROGUE_FWIF_RESETFRAMEWORK_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h
new file mode 100644
index 0000000000000..6c09c15bf9bd8
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_SHARED_H
+#define PVR_ROGUE_FWIF_SHARED_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define ROGUE_FWIF_NUM_RTDATAS 2U
+#define ROGUE_FWIF_NUM_GEOMDATAS 1U
+#define ROGUE_FWIF_NUM_RTDATA_FREELISTS 2U
+#define ROGUE_NUM_GEOM_CORES 1U
+
+#define ROGUE_NUM_GEOM_CORES_SIZE 2U
+
+/*
+ * Maximum number of UFOs in a CCB command.
+ * The number is based on having 32 sync prims (as originally), plus 32 sync
+ * checkpoints.
+ * Once the use of sync prims is no longer supported, we will retain
+ * the same total (64) as the number of sync checkpoints which may be
+ * supporting a fence is not visible to the client driver and has to
+ * allow for the number of different timelines involved in fence merges.
+ */
+#define ROGUE_FWIF_CCB_CMD_MAX_UFOS (32U + 32U)
+
+/*
+ * This is a generic limit imposed on any DM (GEOMETRY,FRAGMENT,CDM,TDM,2D,TRANSFER)
+ * command passed through the bridge.
+ * Just across the bridge in the server, any incoming kick command size is
+ * checked against this maximum limit.
+ * In case the incoming command size is larger than the specified limit,
+ * the bridge call is retired with error.
+ */
+#define ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE (1024U)
+
+#define ROGUE_FWIF_PRBUFFER_START (0)
+#define ROGUE_FWIF_PRBUFFER_ZSBUFFER (0)
+#define ROGUE_FWIF_PRBUFFER_MSAABUFFER (1)
+#define ROGUE_FWIF_PRBUFFER_MAXSUPPORTED (2)
+
+struct rogue_fwif_dma_addr {
+	aligned_u64 dev_addr;
+	u32 fw_addr;
+	u32 padding;
+} __aligned(8);
+
+struct rogue_fwif_ufo {
+	u32 addr;
+	u32 value;
+};
+
+#define ROGUE_FWIF_UFO_ADDR_IS_SYNC_CHECKPOINT (1)
+
+struct rogue_fwif_sync_checkpoint {
+	u32 state;
+	u32 fw_ref_count;
+};
+
+struct rogue_fwif_cleanup_ctl {
+	/* Number of commands received by the FW */
+	u32 submitted_commands;
+	/* Number of commands executed by the FW */
+	u32 executed_commands;
+} __aligned(8);
+
+/*
+ * Used to share frame numbers across UM-KM-FW,
+ * frame number is set in UM,
+ * frame number is required in both KM for HTB and FW for FW trace.
+ *
+ * May be used to house Kick flags in the future.
+ */
+struct rogue_fwif_cmd_common {
+	/* associated frame number */
+	u32 frame_num;
+};
+
+/*
+ * Geometry and fragment commands require set of firmware addresses that are stored in the Kernel.
+ * Client has handle(s) to Kernel containers storing these addresses, instead of raw addresses. We
+ * have to patch/write these addresses in KM to prevent UM from controlling FW addresses directly.
+ * Typedefs for geometry and fragment commands are shared between Client and Firmware (both
+ * single-BVNC). Kernel is implemented in a multi-BVNC manner, so it can't use geometry|fragment
+ * CMD type definitions directly. Therefore we have a SHARED block that is shared between UM-KM-FW
+ * across all BVNC configurations.
+ */
+struct rogue_fwif_cmd_geom_frag_shared {
+	/* Common command attributes */
+	struct rogue_fwif_cmd_common cmn;
+
+	/*
+	 * RTData associated with this command, this is used for context
+	 * selection and for storing out HW-context, when TA is switched out for
+	 * continuing later
+	 */
+	u32 hwrt_data_fw_addr;
+
+	/* Supported PR Buffers like Z/S/MSAA Scratch */
+	u32 pr_buffer_fw_addr[ROGUE_FWIF_PRBUFFER_MAXSUPPORTED];
+};
+
+/*
+ * Client Circular Command Buffer (CCCB) control structure.
+ * This is shared between the Server and the Firmware and holds byte offsets
+ * into the CCCB as well as the wrapping mask to aid wrap around. A given
+ * snapshot of this queue with Cmd 1 running on the GPU might be:
+ *
+ *          Roff                           Doff                 Woff
+ * [..........|-1----------|=2===|=3===|=4===|~5~~~~|~6~~~~|~7~~~~|..........]
+ *            <      runnable commands       ><   !ready to run   >
+ *
+ * Cmd 1    : Currently executing on the GPU data master.
+ * Cmd 2,3,4: Fence dependencies met, commands runnable.
+ * Cmd 5... : Fence dependency not met yet.
+ */
+struct rogue_fwif_cccb_ctl {
+	/* Host write offset into CCB. This must be aligned to 16 bytes. */
+	u32 write_offset;
+	/*
+	 * Firmware read offset into CCB. Points to the command that is runnable
+	 * on GPU, if R!=W
+	 */
+	u32 read_offset;
+	/*
+	 * Firmware fence dependency offset. Points to commands not ready, i.e.
+	 * fence dependencies are not met.
+	 */
+	u32 dep_offset;
+	/* Offset wrapping mask, total capacity in bytes of the CCB-1 */
+	u32 wrap_mask;
+
+	/* Only used if SUPPORT_AGP is present. */
+	u32 read_offset2;
+
+	/* Only used if SUPPORT_AGP4 is present. */
+	u32 read_offset3;
+	/* Only used if SUPPORT_AGP4 is present. */
+	u32 read_offset4;
+
+	u32 padding;
+} __aligned(8);
+
+#define ROGUE_FW_LOCAL_FREELIST (0)
+#define ROGUE_FW_GLOBAL_FREELIST (1)
+#define ROGUE_FW_FREELIST_TYPE_LAST ROGUE_FW_GLOBAL_FREELIST
+#define ROGUE_FW_MAX_FREELISTS (ROGUE_FW_FREELIST_TYPE_LAST + 1U)
+
+struct rogue_fwif_geom_registers_caswitch {
+	u64 geom_reg_vdm_context_state_base_addr;
+	u64 geom_reg_vdm_context_state_resume_addr;
+	u64 geom_reg_ta_context_state_base_addr;
+
+	struct {
+		u64 geom_reg_vdm_context_store_task0;
+		u64 geom_reg_vdm_context_store_task1;
+		u64 geom_reg_vdm_context_store_task2;
+
+		/* VDM resume state update controls */
+		u64 geom_reg_vdm_context_resume_task0;
+		u64 geom_reg_vdm_context_resume_task1;
+		u64 geom_reg_vdm_context_resume_task2;
+
+		u64 geom_reg_vdm_context_store_task3;
+		u64 geom_reg_vdm_context_store_task4;
+
+		u64 geom_reg_vdm_context_resume_task3;
+		u64 geom_reg_vdm_context_resume_task4;
+	} geom_state[2];
+};
+
+#define ROGUE_FWIF_GEOM_REGISTERS_CSWITCH_SIZE \
+	sizeof(struct rogue_fwif_geom_registers_caswitch)
+
+struct rogue_fwif_cdm_registers_cswitch {
+	u64 cdmreg_cdm_context_pds0;
+	u64 cdmreg_cdm_context_pds1;
+	u64 cdmreg_cdm_terminate_pds;
+	u64 cdmreg_cdm_terminate_pds1;
+
+	/* CDM resume controls */
+	u64 cdmreg_cdm_resume_pds0;
+	u64 cdmreg_cdm_context_pds0_b;
+	u64 cdmreg_cdm_resume_pds0_b;
+};
+
+struct rogue_fwif_static_rendercontext_state {
+	/* Geom registers for ctx switch */
+	struct rogue_fwif_geom_registers_caswitch ctxswitch_regs[ROGUE_NUM_GEOM_CORES_SIZE]
+		__aligned(8);
+};
+
+#define ROGUE_FWIF_STATIC_RENDERCONTEXT_SIZE \
+	sizeof(struct rogue_fwif_static_rendercontext_state)
+
+struct rogue_fwif_static_computecontext_state {
+	/* CDM registers for ctx switch */
+	struct rogue_fwif_cdm_registers_cswitch ctxswitch_regs __aligned(8);
+};
+
+#define ROGUE_FWIF_STATIC_COMPUTECONTEXT_SIZE \
+	sizeof(struct rogue_fwif_static_computecontext_state)
+
+enum rogue_fwif_prbuffer_state {
+	ROGUE_FWIF_PRBUFFER_UNBACKED = 0,
+	ROGUE_FWIF_PRBUFFER_BACKED,
+	ROGUE_FWIF_PRBUFFER_BACKING_PENDING,
+	ROGUE_FWIF_PRBUFFER_UNBACKING_PENDING,
+};
+
+struct rogue_fwif_prbuffer {
+	/* Buffer ID*/
+	u32 buffer_id;
+	/* Needs On-demand Z/S/MSAA Buffer allocation */
+	bool on_demand __aligned(4);
+	/* Z/S/MSAA -Buffer state */
+	enum rogue_fwif_prbuffer_state state;
+	/* Cleanup state */
+	struct rogue_fwif_cleanup_ctl cleanup_sate;
+	/* Compatibility and other flags */
+	u32 prbuffer_flags;
+} __aligned(8);
+
+/* Last reset reason for a context. */
+enum rogue_context_reset_reason {
+	/* No reset reason recorded */
+	ROGUE_CONTEXT_RESET_REASON_NONE = 0,
+	/* Caused a reset due to locking up */
+	ROGUE_CONTEXT_RESET_REASON_GUILTY_LOCKUP = 1,
+	/* Affected by another context locking up */
+	ROGUE_CONTEXT_RESET_REASON_INNOCENT_LOCKUP = 2,
+	/* Overran the global deadline */
+	ROGUE_CONTEXT_RESET_REASON_GUILTY_OVERRUNING = 3,
+	/* Affected by another context overrunning */
+	ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING = 4,
+	/* Forced reset to ensure scheduling requirements */
+	ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH = 5,
+	/* FW Safety watchdog triggered */
+	ROGUE_CONTEXT_RESET_REASON_FW_WATCHDOG = 12,
+	/* FW page fault (no HWR) */
+	ROGUE_CONTEXT_RESET_REASON_FW_PAGEFAULT = 13,
+	/* FW execution error (GPU reset requested) */
+	ROGUE_CONTEXT_RESET_REASON_FW_EXEC_ERR = 14,
+	/* Host watchdog detected FW error */
+	ROGUE_CONTEXT_RESET_REASON_HOST_WDG_FW_ERR = 15,
+	/* Geometry DM OOM event is not allowed */
+	ROGUE_CONTEXT_GEOM_OOM_DISABLED = 16,
+};
+
+struct rogue_context_reset_reason_data {
+	enum rogue_context_reset_reason reset_reason;
+	u32 reset_ext_job_ref;
+};
+
+#include "pvr_rogue_fwif_shared_check.h"
+
+#endif /* PVR_ROGUE_FWIF_SHARED_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h
new file mode 100644
index 0000000000000..597ed54bbd3af
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_shared_check.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_SHARED_CHECK_H
+#define PVR_ROGUE_FWIF_SHARED_CHECK_H
+
+#include <linux/build_bug.h>
+
+#define OFFSET_CHECK(type, member, offset) \
+	static_assert(offsetof(type, member) == (offset), \
+		      "offsetof(" #type ", " #member ") incorrect")
+
+#define SIZE_CHECK(type, size) \
+	static_assert(sizeof(type) == (size), #type " is incorrect size")
+
+OFFSET_CHECK(struct rogue_fwif_dma_addr, dev_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_dma_addr, fw_addr, 8);
+SIZE_CHECK(struct rogue_fwif_dma_addr, 16);
+
+OFFSET_CHECK(struct rogue_fwif_ufo, addr, 0);
+OFFSET_CHECK(struct rogue_fwif_ufo, value, 4);
+SIZE_CHECK(struct rogue_fwif_ufo, 8);
+
+OFFSET_CHECK(struct rogue_fwif_cleanup_ctl, submitted_commands, 0);
+OFFSET_CHECK(struct rogue_fwif_cleanup_ctl, executed_commands, 4);
+SIZE_CHECK(struct rogue_fwif_cleanup_ctl, 8);
+
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, write_offset, 0);
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset, 4);
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, dep_offset, 8);
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, wrap_mask, 12);
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset2, 16);
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset3, 20);
+OFFSET_CHECK(struct rogue_fwif_cccb_ctl, read_offset4, 24);
+SIZE_CHECK(struct rogue_fwif_cccb_ctl, 32);
+
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_reg_vdm_context_state_base_addr, 0);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_reg_vdm_context_state_resume_addr, 8);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_reg_ta_context_state_base_addr, 16);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_store_task0, 24);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_store_task1, 32);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_store_task2, 40);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_resume_task0, 48);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_resume_task1, 56);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_resume_task2, 64);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_store_task3, 72);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_store_task4, 80);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_resume_task3, 88);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[0].geom_reg_vdm_context_resume_task4, 96);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_store_task0, 104);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_store_task1, 112);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_store_task2, 120);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_resume_task0, 128);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_resume_task1, 136);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_resume_task2, 144);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_store_task3, 152);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_store_task4, 160);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_resume_task3, 168);
+OFFSET_CHECK(struct rogue_fwif_geom_registers_caswitch,
+	     geom_state[1].geom_reg_vdm_context_resume_task4, 176);
+SIZE_CHECK(struct rogue_fwif_geom_registers_caswitch, 184);
+
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0, 0);
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds1, 8);
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds, 16);
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds1, 24);
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0, 32);
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0_b, 40);
+OFFSET_CHECK(struct rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0_b, 48);
+SIZE_CHECK(struct rogue_fwif_cdm_registers_cswitch, 56);
+
+OFFSET_CHECK(struct rogue_fwif_static_rendercontext_state, ctxswitch_regs, 0);
+SIZE_CHECK(struct rogue_fwif_static_rendercontext_state, 368);
+
+OFFSET_CHECK(struct rogue_fwif_static_computecontext_state, ctxswitch_regs, 0);
+SIZE_CHECK(struct rogue_fwif_static_computecontext_state, 56);
+
+OFFSET_CHECK(struct rogue_fwif_cmd_common, frame_num, 0);
+SIZE_CHECK(struct rogue_fwif_cmd_common, 4);
+
+OFFSET_CHECK(struct rogue_fwif_cmd_geom_frag_shared, cmn, 0);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom_frag_shared, hwrt_data_fw_addr, 4);
+OFFSET_CHECK(struct rogue_fwif_cmd_geom_frag_shared, pr_buffer_fw_addr, 8);
+SIZE_CHECK(struct rogue_fwif_cmd_geom_frag_shared, 16);
+
+#endif /* PVR_ROGUE_FWIF_SHARED_CHECK_H */
diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h
new file mode 100644
index 0000000000000..1c2c4ebedc257
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_stream.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_STREAM_H
+#define PVR_ROGUE_FWIF_STREAM_H
+
+/**
+ * DOC: Streams
+ *
+ * Commands are submitted to the kernel driver in the form of streams.
+ *
+ * A command stream has the following layout :
+ *  - A 64-bit header containing:
+ *    * A u32 containing the length of the main stream inclusive of the length of the header.
+ *    * A u32 for padding.
+ *  - The main stream data.
+ *  - The extension stream (optional), which is composed of:
+ *    * One or more headers.
+ *    * The extension stream data, corresponding to the extension headers.
+ *
+ * The main stream provides the base command data. This has a fixed layout based on the features
+ * supported by a given GPU.
+ *
+ * The extension stream provides the command parameters that are required for BRNs & ERNs for the
+ * current GPU. This stream is comprised of one or more headers, followed by data for each given
+ * BRN/ERN.
+ *
+ * Each header is a u32 containing a bitmask of quirks & enhancements in the extension stream, a
+ * "type" field determining the set of quirks & enhancements the bitmask represents, and a
+ * continuation bit determining whether any more headers are present. The headers are then followed
+ * by command data; this is specific to each quirk/enhancement. All unused / reserved bits in the
+ * header must be set to 0.
+ *
+ * All parameters and headers in the main and extension streams must be naturally aligned.
+ *
+ * If a parameter appears in both the main and extension streams, then the extension parameter is
+ * used.
+ */
+
+/*
+ * Stream extension header definition
+ */
+#define PVR_STREAM_EXTHDR_TYPE_SHIFT 29U
+#define PVR_STREAM_EXTHDR_TYPE_MASK (7U << PVR_STREAM_EXTHDR_TYPE_SHIFT)
+#define PVR_STREAM_EXTHDR_TYPE_MAX 8U
+#define PVR_STREAM_EXTHDR_CONTINUATION BIT(28U)
+
+#define PVR_STREAM_EXTHDR_DATA_MASK ~(PVR_STREAM_EXTHDR_TYPE_MASK | PVR_STREAM_EXTHDR_CONTINUATION)
+
+/*
+ * Stream extension header - Geometry 0
+ */
+#define PVR_STREAM_EXTHDR_TYPE_GEOM0 0U
+
+#define PVR_STREAM_EXTHDR_GEOM0_BRN49927 BIT(0U)
+
+#define PVR_STREAM_EXTHDR_GEOM0_VALID PVR_STREAM_EXTHDR_GEOM0_BRN49927
+
+/*
+ * Stream extension header - Fragment 0
+ */
+#define PVR_STREAM_EXTHDR_TYPE_FRAG0 0U
+
+#define PVR_STREAM_EXTHDR_FRAG0_BRN47217 BIT(0U)
+#define PVR_STREAM_EXTHDR_FRAG0_BRN49927 BIT(1U)
+
+#define PVR_STREAM_EXTHDR_FRAG0_VALID PVR_STREAM_EXTHDR_FRAG0_BRN49927
+
+/*
+ * Stream extension header - Compute 0
+ */
+#define PVR_STREAM_EXTHDR_TYPE_COMPUTE0 0U
+
+#define PVR_STREAM_EXTHDR_COMPUTE0_BRN49927 BIT(0U)
+
+#define PVR_STREAM_EXTHDR_COMPUTE0_VALID PVR_STREAM_EXTHDR_COMPUTE0_BRN49927
+
+#endif /* PVR_ROGUE_FWIF_STREAM_H */
-- 
GitLab


From f99f5f3ea7efd54ba0529c4f2d7c72712918a522 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:31 +0000
Subject: [PATCH 0406/2340] drm/imagination: Add GPU ID parsing and firmware
 loading

Read the GPU ID register at probe time and select the correct
features/quirks/enhancements. Use the GPU ID to form the firmware
file name and load the firmware.

The features/quirks/enhancements arrays are currently hardcoded in
the driver for the supported GPUs. We are looking at moving this
information to the firmware image.

Changes since v8:
- Corrected license identifiers

Changes since v7:
- Fix kerneldoc for pvr_device_info_set_enhancements()

Changes since v5:
- Add BRN 71242 to device info

Changes since v4:
- Retrieve device information from firmware header
- Pull forward firmware header parsing from FW infrastructure patch
- Use devm_add_action_or_reset to release firmware

Changes since v3:
- Use drm_dev_{enter,exit}

Co-developed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/1ff76f7a5b45c742279c78910f8491b8a5e7f6e6.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile          |   2 +
 drivers/gpu/drm/imagination/pvr_device.c      | 323 ++++++++++-
 drivers/gpu/drm/imagination/pvr_device.h      | 220 ++++++++
 drivers/gpu/drm/imagination/pvr_device_info.c | 254 +++++++++
 drivers/gpu/drm/imagination/pvr_device_info.h | 186 +++++++
 drivers/gpu/drm/imagination/pvr_drv.c         | 521 +++++++++++++++++-
 drivers/gpu/drm/imagination/pvr_drv.h         | 107 ++++
 drivers/gpu/drm/imagination/pvr_fw.c          | 145 +++++
 drivers/gpu/drm/imagination/pvr_fw.h          |  34 ++
 drivers/gpu/drm/imagination/pvr_fw_info.h     | 135 +++++
 10 files changed, 1925 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_device_info.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_device_info.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_info.h

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index d36007f2825cf..0b863e9f51b86 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -5,6 +5,8 @@ subdir-ccflags-y := -I$(srctree)/$(src)
 
 powervr-y := \
 	pvr_device.o \
+	pvr_device_info.o \
 	pvr_drv.o \
+	pvr_fw.o
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index abcdf733f57b8..05e382cacb277 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -2,19 +2,31 @@
 /* Copyright (c) 2023 Imagination Technologies Ltd. */
 
 #include "pvr_device.h"
+#include "pvr_device_info.h"
+
+#include "pvr_fw.h"
+#include "pvr_rogue_cr_defs.h"
 
 #include <drm/drm_print.h>
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/compiler_attributes.h>
 #include <linux/compiler_types.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/firmware.h>
 #include <linux/gfp.h>
+#include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
+
+/* Major number for the supported version of the firmware. */
+#define PVR_FW_VERSION_MAJOR 1
 
 /**
  * pvr_device_reg_init() - Initialize kernel access to a PowerVR device's
@@ -100,6 +112,209 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
 	return 0;
 }
 
+/**
+ * pvr_build_firmware_filename() - Construct a PowerVR firmware filename
+ * @pvr_dev: Target PowerVR device.
+ * @base: First part of the filename.
+ * @major: Major version number.
+ *
+ * A PowerVR firmware filename consists of three parts separated by underscores
+ * (``'_'``) along with a '.fw' file suffix. The first part is the exact value
+ * of @base, the second part is the hardware version string derived from @pvr_fw
+ * and the final part is the firmware version number constructed from @major with
+ * a 'v' prefix, e.g. powervr/rogue_4.40.2.51_v1.fw.
+ *
+ * The returned string will have been slab allocated and must be freed with
+ * kfree().
+ *
+ * Return:
+ *  * The constructed filename on success, or
+ *  * Any error returned by kasprintf().
+ */
+static char *
+pvr_build_firmware_filename(struct pvr_device *pvr_dev, const char *base,
+			    u8 major)
+{
+	struct pvr_gpu_id *gpu_id = &pvr_dev->gpu_id;
+
+	return kasprintf(GFP_KERNEL, "%s_%d.%d.%d.%d_v%d.fw", base, gpu_id->b,
+			 gpu_id->v, gpu_id->n, gpu_id->c, major);
+}
+
+static void
+pvr_release_firmware(void *data)
+{
+	struct pvr_device *pvr_dev = data;
+
+	release_firmware(pvr_dev->fw_dev.firmware);
+}
+
+/**
+ * pvr_request_firmware() - Load firmware for a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ *
+ * See pvr_build_firmware_filename() for details on firmware file naming.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by pvr_build_firmware_filename(), or
+ *  * Any error returned by request_firmware().
+ */
+static int
+pvr_request_firmware(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = &pvr_dev->base;
+	char *filename;
+	const struct firmware *fw;
+	int err;
+
+	filename = pvr_build_firmware_filename(pvr_dev, "powervr/rogue",
+					       PVR_FW_VERSION_MAJOR);
+	if (IS_ERR(filename))
+		return PTR_ERR(filename);
+
+	/*
+	 * This function takes a copy of &filename, meaning we can free our
+	 * instance before returning.
+	 */
+	err = request_firmware(&fw, filename, pvr_dev->base.dev);
+	if (err) {
+		drm_err(drm_dev, "failed to load firmware %s (err=%d)\n",
+			filename, err);
+		goto err_free_filename;
+	}
+
+	drm_info(drm_dev, "loaded firmware %s\n", filename);
+	kfree(filename);
+
+	pvr_dev->fw_dev.firmware = fw;
+
+	return devm_add_action_or_reset(drm_dev->dev, pvr_release_firmware, pvr_dev);
+
+err_free_filename:
+	kfree(filename);
+
+	return err;
+}
+
+/**
+ * pvr_load_gpu_id() - Load a PowerVR device's GPU ID (BVNC) from control registers.
+ *
+ * Sets struct pvr_dev.gpu_id.
+ *
+ * @pvr_dev: Target PowerVR device.
+ */
+static void
+pvr_load_gpu_id(struct pvr_device *pvr_dev)
+{
+	struct pvr_gpu_id *gpu_id = &pvr_dev->gpu_id;
+	u64 bvnc;
+
+	/*
+	 * Try reading the BVNC using the newer (cleaner) method first. If the
+	 * B value is zero, fall back to the older method.
+	 */
+	bvnc = pvr_cr_read64(pvr_dev, ROGUE_CR_CORE_ID__PBVNC);
+
+	gpu_id->b = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__BRANCH_ID);
+	if (gpu_id->b != 0) {
+		gpu_id->v = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__VERSION_ID);
+		gpu_id->n = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__NUMBER_OF_SCALABLE_UNITS);
+		gpu_id->c = PVR_CR_FIELD_GET(bvnc, CORE_ID__PBVNC__CONFIG_ID);
+	} else {
+		u32 core_rev = pvr_cr_read32(pvr_dev, ROGUE_CR_CORE_REVISION);
+		u32 core_id = pvr_cr_read32(pvr_dev, ROGUE_CR_CORE_ID);
+		u16 core_id_config = PVR_CR_FIELD_GET(core_id, CORE_ID_CONFIG);
+
+		gpu_id->b = PVR_CR_FIELD_GET(core_rev, CORE_REVISION_MAJOR);
+		gpu_id->v = PVR_CR_FIELD_GET(core_rev, CORE_REVISION_MINOR);
+		gpu_id->n = FIELD_GET(0xFF00, core_id_config);
+		gpu_id->c = FIELD_GET(0x00FF, core_id_config);
+	}
+}
+
+/**
+ * pvr_set_dma_info() - Set PowerVR device DMA information
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Sets the DMA mask and max segment size for the PowerVR device.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by PVR_FEATURE_VALUE(), or
+ *  * Any error returned by dma_set_mask().
+ */
+
+static int
+pvr_set_dma_info(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	u16 phys_bus_width;
+	int err;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, phys_bus_width, &phys_bus_width);
+	if (err) {
+		drm_err(drm_dev, "Failed to get device physical bus width\n");
+		return err;
+	}
+
+	err = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(phys_bus_width));
+	if (err) {
+		drm_err(drm_dev, "Failed to set DMA mask (err=%d)\n", err);
+		return err;
+	}
+
+	dma_set_max_seg_size(drm_dev->dev, UINT_MAX);
+
+	return 0;
+}
+
+/**
+ * pvr_device_gpu_init() - GPU-specific initialization for a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ *
+ * The following steps are taken to ensure the device is ready:
+ *
+ *  1. Read the hardware version information from control registers,
+ *  2. Initialise the hardware feature information,
+ *  3. Setup the device DMA information,
+ *  4. Setup the device-scoped memory context, and
+ *  5. Load firmware into the device.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%ENODEV if the GPU is not supported,
+ *  * Any error returned by pvr_set_dma_info(),
+ *  * Any error returned by pvr_memory_context_init(), or
+ *  * Any error returned by pvr_request_firmware().
+ */
+static int
+pvr_device_gpu_init(struct pvr_device *pvr_dev)
+{
+	int err;
+
+	pvr_load_gpu_id(pvr_dev);
+
+	err = pvr_request_firmware(pvr_dev);
+	if (err)
+		return err;
+
+	err = pvr_fw_validate_init_device_info(pvr_dev);
+	if (err)
+		return err;
+
+	if (PVR_HAS_FEATURE(pvr_dev, meta))
+		pvr_dev->fw_dev.processor_type = PVR_FW_PROCESSOR_TYPE_META;
+	else if (PVR_HAS_FEATURE(pvr_dev, mips))
+		pvr_dev->fw_dev.processor_type = PVR_FW_PROCESSOR_TYPE_MIPS;
+	else if (PVR_HAS_FEATURE(pvr_dev, riscv_fw_processor))
+		pvr_dev->fw_dev.processor_type = PVR_FW_PROCESSOR_TYPE_RISCV;
+	else
+		return -EINVAL;
+
+	return pvr_set_dma_info(pvr_dev);
+}
+
 /**
  * pvr_device_init() - Initialize a PowerVR device
  * @pvr_dev: Target PowerVR device.
@@ -130,7 +345,12 @@ pvr_device_init(struct pvr_device *pvr_dev)
 		return err;
 
 	/* Map the control registers into memory. */
-	return pvr_device_reg_init(pvr_dev);
+	err = pvr_device_reg_init(pvr_dev);
+	if (err)
+		return err;
+
+	/* Perform GPU-specific initialization steps. */
+	return pvr_device_gpu_init(pvr_dev);
 }
 
 /**
@@ -145,3 +365,104 @@ pvr_device_fini(struct pvr_device *pvr_dev)
 	 * the initialization stages in pvr_device_init().
 	 */
 }
+
+bool
+pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk)
+{
+	switch (quirk) {
+	case 47217:
+		return PVR_HAS_QUIRK(pvr_dev, 47217);
+	case 48545:
+		return PVR_HAS_QUIRK(pvr_dev, 48545);
+	case 49927:
+		return PVR_HAS_QUIRK(pvr_dev, 49927);
+	case 51764:
+		return PVR_HAS_QUIRK(pvr_dev, 51764);
+	case 62269:
+		return PVR_HAS_QUIRK(pvr_dev, 62269);
+	default:
+		return false;
+	};
+}
+
+bool
+pvr_device_has_uapi_enhancement(struct pvr_device *pvr_dev, u32 enhancement)
+{
+	switch (enhancement) {
+	case 35421:
+		return PVR_HAS_ENHANCEMENT(pvr_dev, 35421);
+	case 42064:
+		return PVR_HAS_ENHANCEMENT(pvr_dev, 42064);
+	default:
+		return false;
+	};
+}
+
+/**
+ * pvr_device_has_feature() - Look up device feature based on feature definition
+ * @pvr_dev: Device pointer.
+ * @feature: Feature to look up. Should be one of %PVR_FEATURE_*.
+ *
+ * Returns:
+ *  * %true if feature is present on device, or
+ *  * %false if feature is not present on device.
+ */
+bool
+pvr_device_has_feature(struct pvr_device *pvr_dev, u32 feature)
+{
+	switch (feature) {
+	case PVR_FEATURE_CLUSTER_GROUPING:
+		return PVR_HAS_FEATURE(pvr_dev, cluster_grouping);
+
+	case PVR_FEATURE_COMPUTE_MORTON_CAPABLE:
+		return PVR_HAS_FEATURE(pvr_dev, compute_morton_capable);
+
+	case PVR_FEATURE_FB_CDC_V4:
+		return PVR_HAS_FEATURE(pvr_dev, fb_cdc_v4);
+
+	case PVR_FEATURE_GPU_MULTICORE_SUPPORT:
+		return PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support);
+
+	case PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE:
+		return PVR_HAS_FEATURE(pvr_dev, isp_zls_d24_s8_packing_ogl_mode);
+
+	case PVR_FEATURE_S7_TOP_INFRASTRUCTURE:
+		return PVR_HAS_FEATURE(pvr_dev, s7_top_infrastructure);
+
+	case PVR_FEATURE_TESSELLATION:
+		return PVR_HAS_FEATURE(pvr_dev, tessellation);
+
+	case PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS:
+		return PVR_HAS_FEATURE(pvr_dev, tpu_dm_global_registers);
+
+	case PVR_FEATURE_VDM_DRAWINDIRECT:
+		return PVR_HAS_FEATURE(pvr_dev, vdm_drawindirect);
+
+	case PVR_FEATURE_VDM_OBJECT_LEVEL_LLS:
+		return PVR_HAS_FEATURE(pvr_dev, vdm_object_level_lls);
+
+	case PVR_FEATURE_ZLS_SUBTILE:
+		return PVR_HAS_FEATURE(pvr_dev, zls_subtile);
+
+	/* Derived features. */
+	case PVR_FEATURE_CDM_USER_MODE_QUEUE: {
+		u8 cdm_control_stream_format = 0;
+
+		PVR_FEATURE_VALUE(pvr_dev, cdm_control_stream_format, &cdm_control_stream_format);
+		return (cdm_control_stream_format >= 2 && cdm_control_stream_format <= 4);
+	}
+
+	case PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP:
+		if (PVR_HAS_FEATURE(pvr_dev, fbcdc_algorithm)) {
+			u8 fbcdc_algorithm = 0;
+
+			PVR_FEATURE_VALUE(pvr_dev, fbcdc_algorithm, &fbcdc_algorithm);
+			return (fbcdc_algorithm < 3 || PVR_HAS_FEATURE(pvr_dev, fb_cdc_v4));
+		}
+		return false;
+
+	default:
+		WARN(true, "Looking up undefined feature %u\n", feature);
+		return false;
+	}
+}
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index d9d2804f44543..36fe67bed27ff 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -4,6 +4,9 @@
 #ifndef PVR_DEVICE_H
 #define PVR_DEVICE_H
 
+#include "pvr_device_info.h"
+#include "pvr_fw.h"
+
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/drm_mm.h>
@@ -28,6 +31,26 @@ struct clk;
 /* Forward declaration from <linux/firmware.h>. */
 struct firmware;
 
+/**
+ * struct pvr_gpu_id - Hardware GPU ID information for a PowerVR device
+ * @b: Branch ID.
+ * @v: Version ID.
+ * @n: Number of scalable units.
+ * @c: Config ID.
+ */
+struct pvr_gpu_id {
+	u16 b, v, n, c;
+};
+
+/**
+ * struct pvr_fw_version - Firmware version information
+ * @major: Major version number.
+ * @minor: Minor version number.
+ */
+struct pvr_fw_version {
+	u16 major, minor;
+};
+
 /**
  * struct pvr_device - powervr-specific wrapper for &struct drm_device
  */
@@ -40,6 +63,35 @@ struct pvr_device {
 	 */
 	struct drm_device base;
 
+	/** @gpu_id: GPU ID detected at runtime. */
+	struct pvr_gpu_id gpu_id;
+
+	/**
+	 * @features: Hardware feature information.
+	 *
+	 * Do not access this member directly, instead use PVR_HAS_FEATURE()
+	 * or PVR_FEATURE_VALUE() macros.
+	 */
+	struct pvr_device_features features;
+
+	/**
+	 * @quirks: Hardware quirk information.
+	 *
+	 * Do not access this member directly, instead use PVR_HAS_QUIRK().
+	 */
+	struct pvr_device_quirks quirks;
+
+	/**
+	 * @enhancements: Hardware enhancement information.
+	 *
+	 * Do not access this member directly, instead use
+	 * PVR_HAS_ENHANCEMENT().
+	 */
+	struct pvr_device_enhancements enhancements;
+
+	/** @fw_version: Firmware version detected at runtime. */
+	struct pvr_fw_version fw_version;
+
 	/**
 	 * @regs: Device control registers.
 	 *
@@ -70,6 +122,9 @@ struct pvr_device {
 	 * Interface (MEMIF). If present, this needs to be enabled/disabled together with @core_clk.
 	 */
 	struct clk *mem_clk;
+
+	/** @fw_dev: Firmware related data. */
+	struct pvr_fw_device fw_dev;
 };
 
 /**
@@ -92,6 +147,76 @@ struct pvr_file {
 	struct pvr_device *pvr_dev;
 };
 
+/**
+ * PVR_HAS_FEATURE() - Tests whether a PowerVR device has a given feature
+ * @pvr_dev: [IN] Target PowerVR device.
+ * @feature: [IN] Hardware feature name.
+ *
+ * Feature names are derived from those found in &struct pvr_device_features by
+ * dropping the 'has_' prefix, which is applied by this macro.
+ *
+ * Return:
+ *  * true if the named feature is present in the hardware
+ *  * false if the named feature is not present in the hardware
+ */
+#define PVR_HAS_FEATURE(pvr_dev, feature) ((pvr_dev)->features.has_##feature)
+
+/**
+ * PVR_FEATURE_VALUE() - Gets a PowerVR device feature value
+ * @pvr_dev: [IN] Target PowerVR device.
+ * @feature: [IN] Feature name.
+ * @value_out: [OUT] Feature value.
+ *
+ * This macro will get a feature value for those features that have values.
+ * If the feature is not present, nothing will be stored to @value_out.
+ *
+ * Feature names are derived from those found in &struct pvr_device_features by
+ * dropping the 'has_' prefix.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if the named feature is not present in the hardware
+ */
+#define PVR_FEATURE_VALUE(pvr_dev, feature, value_out)             \
+	({                                                         \
+		struct pvr_device *_pvr_dev = pvr_dev;             \
+		int _ret = -EINVAL;                                \
+		if (_pvr_dev->features.has_##feature) {            \
+			*(value_out) = _pvr_dev->features.feature; \
+			_ret = 0;                                  \
+		}                                                  \
+		_ret;                                              \
+	})
+
+/**
+ * PVR_HAS_QUIRK() - Tests whether a physical device has a given quirk
+ * @pvr_dev: [IN] Target PowerVR device.
+ * @quirk: [IN] Hardware quirk name.
+ *
+ * Quirk numbers are derived from those found in #pvr_device_quirks by
+ * dropping the 'has_brn' prefix, which is applied by this macro.
+ *
+ * Returns
+ *  * true if the quirk is present in the hardware, or
+ *  * false if the quirk is not present in the hardware.
+ */
+#define PVR_HAS_QUIRK(pvr_dev, quirk) ((pvr_dev)->quirks.has_brn##quirk)
+
+/**
+ * PVR_HAS_ENHANCEMENT() - Tests whether a physical device has a given
+ *                         enhancement
+ * @pvr_dev: [IN] Target PowerVR device.
+ * @enhancement: [IN] Hardware enhancement name.
+ *
+ * Enhancement numbers are derived from those found in #pvr_device_enhancements
+ * by dropping the 'has_ern' prefix, which is applied by this macro.
+ *
+ * Returns
+ *  * true if the enhancement is present in the hardware, or
+ *  * false if the enhancement is not present in the hardware.
+ */
+#define PVR_HAS_ENHANCEMENT(pvr_dev, enhancement) ((pvr_dev)->enhancements.has_ern##enhancement)
+
 #define from_pvr_device(pvr_dev) (&(pvr_dev)->base)
 
 #define to_pvr_device(drm_dev) container_of_const(drm_dev, struct pvr_device, base)
@@ -100,9 +225,77 @@ struct pvr_file {
 
 #define to_pvr_file(file) ((file)->driver_priv)
 
+/**
+ * PVR_PACKED_BVNC() - Packs B, V, N and C values into a 64-bit unsigned integer
+ * @b: Branch ID.
+ * @v: Version ID.
+ * @n: Number of scalable units.
+ * @c: Config ID.
+ *
+ * The packed layout is as follows:
+ *
+ *    +--------+--------+--------+-------+
+ *    | 63..48 | 47..32 | 31..16 | 15..0 |
+ *    +========+========+========+=======+
+ *    | B      | V      | N      | C     |
+ *    +--------+--------+--------+-------+
+ *
+ * pvr_gpu_id_to_packed_bvnc() should be used instead of this macro when a
+ * &struct pvr_gpu_id is available in order to ensure proper type checking.
+ *
+ * Return: Packed BVNC.
+ */
+/* clang-format off */
+#define PVR_PACKED_BVNC(b, v, n, c) \
+	((((u64)(b) & GENMASK_ULL(15, 0)) << 48) | \
+	 (((u64)(v) & GENMASK_ULL(15, 0)) << 32) | \
+	 (((u64)(n) & GENMASK_ULL(15, 0)) << 16) | \
+	 (((u64)(c) & GENMASK_ULL(15, 0)) <<  0))
+/* clang-format on */
+
+/**
+ * pvr_gpu_id_to_packed_bvnc() - Packs B, V, N and C values into a 64-bit
+ * unsigned integer
+ * @gpu_id: GPU ID.
+ *
+ * The packed layout is as follows:
+ *
+ *    +--------+--------+--------+-------+
+ *    | 63..48 | 47..32 | 31..16 | 15..0 |
+ *    +========+========+========+=======+
+ *    | B      | V      | N      | C     |
+ *    +--------+--------+--------+-------+
+ *
+ * This should be used in preference to PVR_PACKED_BVNC() when a &struct
+ * pvr_gpu_id is available in order to ensure proper type checking.
+ *
+ * Return: Packed BVNC.
+ */
+static __always_inline u64
+pvr_gpu_id_to_packed_bvnc(struct pvr_gpu_id *gpu_id)
+{
+	return PVR_PACKED_BVNC(gpu_id->b, gpu_id->v, gpu_id->n, gpu_id->c);
+}
+
+static __always_inline void
+packed_bvnc_to_pvr_gpu_id(u64 bvnc, struct pvr_gpu_id *gpu_id)
+{
+	gpu_id->b = (bvnc & GENMASK_ULL(63, 48)) >> 48;
+	gpu_id->v = (bvnc & GENMASK_ULL(47, 32)) >> 32;
+	gpu_id->n = (bvnc & GENMASK_ULL(31, 16)) >> 16;
+	gpu_id->c = bvnc & GENMASK_ULL(15, 0);
+}
+
 int pvr_device_init(struct pvr_device *pvr_dev);
 void pvr_device_fini(struct pvr_device *pvr_dev);
 
+bool
+pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk);
+bool
+pvr_device_has_uapi_enhancement(struct pvr_device *pvr_dev, u32 enhancement);
+bool
+pvr_device_has_feature(struct pvr_device *pvr_dev, u32 feature);
+
 /**
  * PVR_CR_FIELD_GET() - Extract a single field from a PowerVR control register
  * @val: Value of the target register.
@@ -208,6 +401,29 @@ pvr_cr_poll_reg64(struct pvr_device *pvr_dev, u32 reg_addr, u64 reg_value,
 		(value & reg_mask) == reg_value, 0, timeout_usec);
 }
 
+/**
+ * pvr_round_up_to_cacheline_size() - Round up a provided size to be cacheline
+ *                                    aligned
+ * @pvr_dev: Target PowerVR device.
+ * @size: Initial size, in bytes.
+ *
+ * Returns:
+ *  * Size aligned to cacheline size.
+ */
+static __always_inline size_t
+pvr_round_up_to_cacheline_size(struct pvr_device *pvr_dev, size_t size)
+{
+	u16 slc_cacheline_size_bits = 0;
+	u16 slc_cacheline_size_bytes;
+
+	WARN_ON(!PVR_HAS_FEATURE(pvr_dev, slc_cache_line_size_bits));
+	PVR_FEATURE_VALUE(pvr_dev, slc_cache_line_size_bits,
+			  &slc_cacheline_size_bits);
+	slc_cacheline_size_bytes = slc_cacheline_size_bits / 8;
+
+	return round_up(size, slc_cacheline_size_bytes);
+}
+
 /**
  * DOC: IOCTL validation helpers
  *
@@ -302,4 +518,8 @@ pvr_ioctl_union_padding_check(void *instance, size_t union_offset,
 					      __union_size, __member_size);  \
 	})
 
+#define PVR_FW_PROCESSOR_TYPE_META  0
+#define PVR_FW_PROCESSOR_TYPE_MIPS  1
+#define PVR_FW_PROCESSOR_TYPE_RISCV 2
+
 #endif /* PVR_DEVICE_H */
diff --git a/drivers/gpu/drm/imagination/pvr_device_info.c b/drivers/gpu/drm/imagination/pvr_device_info.c
new file mode 100644
index 0000000000000..11e6bef52ecd0
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_device_info.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_device_info.h"
+#include "pvr_rogue_fwif_dev_info.h"
+
+#include <drm/drm_print.h>
+
+#include <linux/bits.h>
+#include <linux/minmax.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+#define QUIRK_MAPPING(quirk) \
+	[PVR_FW_HAS_BRN_##quirk] = offsetof(struct pvr_device, quirks.has_brn##quirk)
+
+static const uintptr_t quirks_mapping[] = {
+	QUIRK_MAPPING(44079),
+	QUIRK_MAPPING(47217),
+	QUIRK_MAPPING(48492),
+	QUIRK_MAPPING(48545),
+	QUIRK_MAPPING(49927),
+	QUIRK_MAPPING(50767),
+	QUIRK_MAPPING(51764),
+	QUIRK_MAPPING(62269),
+	QUIRK_MAPPING(63142),
+	QUIRK_MAPPING(63553),
+	QUIRK_MAPPING(66011),
+	QUIRK_MAPPING(71242),
+};
+
+#undef QUIRK_MAPPING
+
+#define ENHANCEMENT_MAPPING(enhancement)                             \
+	[PVR_FW_HAS_ERN_##enhancement] = offsetof(struct pvr_device, \
+						  enhancements.has_ern##enhancement)
+
+static const uintptr_t enhancements_mapping[] = {
+	ENHANCEMENT_MAPPING(35421),
+	ENHANCEMENT_MAPPING(38020),
+	ENHANCEMENT_MAPPING(38748),
+	ENHANCEMENT_MAPPING(42064),
+	ENHANCEMENT_MAPPING(42290),
+	ENHANCEMENT_MAPPING(42606),
+	ENHANCEMENT_MAPPING(47025),
+	ENHANCEMENT_MAPPING(57596),
+};
+
+#undef ENHANCEMENT_MAPPING
+
+static void pvr_device_info_set_common(struct pvr_device *pvr_dev, const u64 *bitmask,
+				       u32 bitmask_size, const uintptr_t *mapping, u32 mapping_max)
+{
+	const u32 mapping_max_size = (mapping_max + 63) >> 6;
+	const u32 nr_bits = min(bitmask_size * 64, mapping_max);
+
+	/* Warn if any unsupported values in the bitmask. */
+	if (bitmask_size > mapping_max_size) {
+		if (mapping == quirks_mapping)
+			drm_warn(from_pvr_device(pvr_dev), "Unsupported quirks in firmware image");
+		else
+			drm_warn(from_pvr_device(pvr_dev),
+				 "Unsupported enhancements in firmware image");
+	} else if (bitmask_size == mapping_max_size && (mapping_max & 63)) {
+		u64 invalid_mask = ~0ull << (mapping_max & 63);
+
+		if (bitmask[bitmask_size - 1] & invalid_mask) {
+			if (mapping == quirks_mapping)
+				drm_warn(from_pvr_device(pvr_dev),
+					 "Unsupported quirks in firmware image");
+			else
+				drm_warn(from_pvr_device(pvr_dev),
+					 "Unsupported enhancements in firmware image");
+		}
+	}
+
+	for (u32 i = 0; i < nr_bits; i++) {
+		if (bitmask[i >> 6] & BIT_ULL(i & 63))
+			*(bool *)((u8 *)pvr_dev + mapping[i]) = true;
+	}
+}
+
+/**
+ * pvr_device_info_set_quirks() - Set device quirks from device information in firmware
+ * @pvr_dev: Device pointer.
+ * @quirks: Pointer to quirks mask in device information.
+ * @quirks_size: Size of quirks mask, in u64s.
+ */
+void pvr_device_info_set_quirks(struct pvr_device *pvr_dev, const u64 *quirks, u32 quirks_size)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(quirks_mapping) != PVR_FW_HAS_BRN_MAX);
+
+	pvr_device_info_set_common(pvr_dev, quirks, quirks_size, quirks_mapping,
+				   ARRAY_SIZE(quirks_mapping));
+}
+
+/**
+ * pvr_device_info_set_enhancements() - Set device enhancements from device information in firmware
+ * @pvr_dev: Device pointer.
+ * @enhancements: Pointer to enhancements mask in device information.
+ * @enhancements_size: Size of enhancements mask, in u64s.
+ */
+void pvr_device_info_set_enhancements(struct pvr_device *pvr_dev, const u64 *enhancements,
+				      u32 enhancements_size)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(enhancements_mapping) != PVR_FW_HAS_ERN_MAX);
+
+	pvr_device_info_set_common(pvr_dev, enhancements, enhancements_size,
+				   enhancements_mapping, ARRAY_SIZE(enhancements_mapping));
+}
+
+#define FEATURE_MAPPING(fw_feature, feature)                                        \
+	[PVR_FW_HAS_FEATURE_##fw_feature] = {                                       \
+		.flag_offset = offsetof(struct pvr_device, features.has_##feature), \
+		.value_offset = 0                                                   \
+	}
+
+#define FEATURE_MAPPING_VALUE(fw_feature, feature)                                  \
+	[PVR_FW_HAS_FEATURE_##fw_feature] = {                                       \
+		.flag_offset = offsetof(struct pvr_device, features.has_##feature), \
+		.value_offset = offsetof(struct pvr_device, features.feature)       \
+	}
+
+static const struct {
+	uintptr_t flag_offset;
+	uintptr_t value_offset;
+} features_mapping[] = {
+	FEATURE_MAPPING(AXI_ACELITE, axi_acelite),
+	FEATURE_MAPPING_VALUE(CDM_CONTROL_STREAM_FORMAT, cdm_control_stream_format),
+	FEATURE_MAPPING(CLUSTER_GROUPING, cluster_grouping),
+	FEATURE_MAPPING_VALUE(COMMON_STORE_SIZE_IN_DWORDS, common_store_size_in_dwords),
+	FEATURE_MAPPING(COMPUTE, compute),
+	FEATURE_MAPPING(COMPUTE_MORTON_CAPABLE, compute_morton_capable),
+	FEATURE_MAPPING(COMPUTE_OVERLAP, compute_overlap),
+	FEATURE_MAPPING(COREID_PER_OS, coreid_per_os),
+	FEATURE_MAPPING(DYNAMIC_DUST_POWER, dynamic_dust_power),
+	FEATURE_MAPPING_VALUE(ECC_RAMS, ecc_rams),
+	FEATURE_MAPPING_VALUE(FBCDC, fbcdc),
+	FEATURE_MAPPING_VALUE(FBCDC_ALGORITHM, fbcdc_algorithm),
+	FEATURE_MAPPING_VALUE(FBCDC_ARCHITECTURE, fbcdc_architecture),
+	FEATURE_MAPPING_VALUE(FBC_MAX_DEFAULT_DESCRIPTORS, fbc_max_default_descriptors),
+	FEATURE_MAPPING_VALUE(FBC_MAX_LARGE_DESCRIPTORS, fbc_max_large_descriptors),
+	FEATURE_MAPPING(FB_CDC_V4, fb_cdc_v4),
+	FEATURE_MAPPING(GPU_MULTICORE_SUPPORT, gpu_multicore_support),
+	FEATURE_MAPPING(GPU_VIRTUALISATION, gpu_virtualisation),
+	FEATURE_MAPPING(GS_RTA_SUPPORT, gs_rta_support),
+	FEATURE_MAPPING(IRQ_PER_OS, irq_per_os),
+	FEATURE_MAPPING_VALUE(ISP_MAX_TILES_IN_FLIGHT, isp_max_tiles_in_flight),
+	FEATURE_MAPPING_VALUE(ISP_SAMPLES_PER_PIXEL, isp_samples_per_pixel),
+	FEATURE_MAPPING(ISP_ZLS_D24_S8_PACKING_OGL_MODE, isp_zls_d24_s8_packing_ogl_mode),
+	FEATURE_MAPPING_VALUE(LAYOUT_MARS, layout_mars),
+	FEATURE_MAPPING_VALUE(MAX_PARTITIONS, max_partitions),
+	FEATURE_MAPPING_VALUE(META, meta),
+	FEATURE_MAPPING_VALUE(META_COREMEM_SIZE, meta_coremem_size),
+	FEATURE_MAPPING(MIPS, mips),
+	FEATURE_MAPPING_VALUE(NUM_CLUSTERS, num_clusters),
+	FEATURE_MAPPING_VALUE(NUM_ISP_IPP_PIPES, num_isp_ipp_pipes),
+	FEATURE_MAPPING_VALUE(NUM_OSIDS, num_osids),
+	FEATURE_MAPPING_VALUE(NUM_RASTER_PIPES, num_raster_pipes),
+	FEATURE_MAPPING(PBE2_IN_XE, pbe2_in_xe),
+	FEATURE_MAPPING(PBVNC_COREID_REG, pbvnc_coreid_reg),
+	FEATURE_MAPPING(PERFBUS, perfbus),
+	FEATURE_MAPPING(PERF_COUNTER_BATCH, perf_counter_batch),
+	FEATURE_MAPPING_VALUE(PHYS_BUS_WIDTH, phys_bus_width),
+	FEATURE_MAPPING(RISCV_FW_PROCESSOR, riscv_fw_processor),
+	FEATURE_MAPPING(ROGUEXE, roguexe),
+	FEATURE_MAPPING(S7_TOP_INFRASTRUCTURE, s7_top_infrastructure),
+	FEATURE_MAPPING(SIMPLE_INTERNAL_PARAMETER_FORMAT, simple_internal_parameter_format),
+	FEATURE_MAPPING(SIMPLE_INTERNAL_PARAMETER_FORMAT_V2, simple_internal_parameter_format_v2),
+	FEATURE_MAPPING_VALUE(SIMPLE_PARAMETER_FORMAT_VERSION, simple_parameter_format_version),
+	FEATURE_MAPPING_VALUE(SLC_BANKS, slc_banks),
+	FEATURE_MAPPING_VALUE(SLC_CACHE_LINE_SIZE_BITS, slc_cache_line_size_bits),
+	FEATURE_MAPPING(SLC_SIZE_CONFIGURABLE, slc_size_configurable),
+	FEATURE_MAPPING_VALUE(SLC_SIZE_IN_KILOBYTES, slc_size_in_kilobytes),
+	FEATURE_MAPPING(SOC_TIMER, soc_timer),
+	FEATURE_MAPPING(SYS_BUS_SECURE_RESET, sys_bus_secure_reset),
+	FEATURE_MAPPING(TESSELLATION, tessellation),
+	FEATURE_MAPPING(TILE_REGION_PROTECTION, tile_region_protection),
+	FEATURE_MAPPING_VALUE(TILE_SIZE_X, tile_size_x),
+	FEATURE_MAPPING_VALUE(TILE_SIZE_Y, tile_size_y),
+	FEATURE_MAPPING(TLA, tla),
+	FEATURE_MAPPING(TPU_CEM_DATAMASTER_GLOBAL_REGISTERS, tpu_cem_datamaster_global_registers),
+	FEATURE_MAPPING(TPU_DM_GLOBAL_REGISTERS, tpu_dm_global_registers),
+	FEATURE_MAPPING(TPU_FILTERING_MODE_CONTROL, tpu_filtering_mode_control),
+	FEATURE_MAPPING_VALUE(USC_MIN_OUTPUT_REGISTERS_PER_PIX, usc_min_output_registers_per_pix),
+	FEATURE_MAPPING(VDM_DRAWINDIRECT, vdm_drawindirect),
+	FEATURE_MAPPING(VDM_OBJECT_LEVEL_LLS, vdm_object_level_lls),
+	FEATURE_MAPPING_VALUE(VIRTUAL_ADDRESS_SPACE_BITS, virtual_address_space_bits),
+	FEATURE_MAPPING(WATCHDOG_TIMER, watchdog_timer),
+	FEATURE_MAPPING(WORKGROUP_PROTECTION, workgroup_protection),
+	FEATURE_MAPPING_VALUE(XE_ARCHITECTURE, xe_architecture),
+	FEATURE_MAPPING(XE_MEMORY_HIERARCHY, xe_memory_hierarchy),
+	FEATURE_MAPPING(XE_TPU2, xe_tpu2),
+	FEATURE_MAPPING_VALUE(XPU_MAX_REGBANKS_ADDR_WIDTH, xpu_max_regbanks_addr_width),
+	FEATURE_MAPPING_VALUE(XPU_MAX_SLAVES, xpu_max_slaves),
+	FEATURE_MAPPING_VALUE(XPU_REGISTER_BROADCAST, xpu_register_broadcast),
+	FEATURE_MAPPING(XT_TOP_INFRASTRUCTURE, xt_top_infrastructure),
+	FEATURE_MAPPING(ZLS_SUBTILE, zls_subtile),
+};
+
+#undef FEATURE_MAPPING_VALUE
+#undef FEATURE_MAPPING
+
+/**
+ * pvr_device_info_set_features() - Set device features from device information in firmware
+ * @pvr_dev: Device pointer.
+ * @features: Pointer to features mask in device information.
+ * @features_size: Size of features mask, in u64s.
+ * @feature_param_size: Size of feature parameters, in u64s.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL on malformed stream.
+ */
+int pvr_device_info_set_features(struct pvr_device *pvr_dev, const u64 *features, u32 features_size,
+				 u32 feature_param_size)
+{
+	const u32 mapping_max = ARRAY_SIZE(features_mapping);
+	const u32 mapping_max_size = (mapping_max + 63) >> 6;
+	const u32 nr_bits = min(features_size * 64, mapping_max);
+	const u64 *feature_params = features + features_size;
+	u32 param_idx = 0;
+
+	BUILD_BUG_ON(ARRAY_SIZE(features_mapping) != PVR_FW_HAS_FEATURE_MAX);
+
+	/* Verify no unsupported values in the bitmask. */
+	if (features_size > mapping_max_size) {
+		drm_warn(from_pvr_device(pvr_dev), "Unsupported features in firmware image");
+	} else if (features_size == mapping_max_size && (mapping_max & 63)) {
+		u64 invalid_mask = ~0ull << (mapping_max & 63);
+
+		if (features[features_size - 1] & invalid_mask)
+			drm_warn(from_pvr_device(pvr_dev),
+				 "Unsupported features in firmware image");
+	}
+
+	for (u32 i = 0; i < nr_bits; i++) {
+		if (features[i >> 6] & BIT_ULL(i & 63)) {
+			*(bool *)((u8 *)pvr_dev + features_mapping[i].flag_offset) = true;
+
+			if (features_mapping[i].value_offset) {
+				if (param_idx >= feature_param_size)
+					return -EINVAL;
+
+				*(u64 *)((u8 *)pvr_dev + features_mapping[i].value_offset) =
+					feature_params[param_idx];
+				param_idx++;
+			}
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_device_info.h b/drivers/gpu/drm/imagination/pvr_device_info.h
new file mode 100644
index 0000000000000..f61fb988b5539
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_device_info.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_DEVICE_INFO_H
+#define PVR_DEVICE_INFO_H
+
+#include <linux/types.h>
+
+struct pvr_device;
+
+/*
+ * struct pvr_device_features - Hardware feature information
+ */
+struct pvr_device_features {
+	bool has_axi_acelite;
+	bool has_cdm_control_stream_format;
+	bool has_cluster_grouping;
+	bool has_common_store_size_in_dwords;
+	bool has_compute;
+	bool has_compute_morton_capable;
+	bool has_compute_overlap;
+	bool has_coreid_per_os;
+	bool has_dynamic_dust_power;
+	bool has_ecc_rams;
+	bool has_fb_cdc_v4;
+	bool has_fbc_max_default_descriptors;
+	bool has_fbc_max_large_descriptors;
+	bool has_fbcdc;
+	bool has_fbcdc_algorithm;
+	bool has_fbcdc_architecture;
+	bool has_gpu_multicore_support;
+	bool has_gpu_virtualisation;
+	bool has_gs_rta_support;
+	bool has_irq_per_os;
+	bool has_isp_max_tiles_in_flight;
+	bool has_isp_samples_per_pixel;
+	bool has_isp_zls_d24_s8_packing_ogl_mode;
+	bool has_layout_mars;
+	bool has_max_partitions;
+	bool has_meta;
+	bool has_meta_coremem_size;
+	bool has_mips;
+	bool has_num_clusters;
+	bool has_num_isp_ipp_pipes;
+	bool has_num_osids;
+	bool has_num_raster_pipes;
+	bool has_pbe2_in_xe;
+	bool has_pbvnc_coreid_reg;
+	bool has_perfbus;
+	bool has_perf_counter_batch;
+	bool has_phys_bus_width;
+	bool has_riscv_fw_processor;
+	bool has_roguexe;
+	bool has_s7_top_infrastructure;
+	bool has_simple_internal_parameter_format;
+	bool has_simple_internal_parameter_format_v2;
+	bool has_simple_parameter_format_version;
+	bool has_slc_banks;
+	bool has_slc_cache_line_size_bits;
+	bool has_slc_size_configurable;
+	bool has_slc_size_in_kilobytes;
+	bool has_soc_timer;
+	bool has_sys_bus_secure_reset;
+	bool has_tessellation;
+	bool has_tile_region_protection;
+	bool has_tile_size_x;
+	bool has_tile_size_y;
+	bool has_tla;
+	bool has_tpu_cem_datamaster_global_registers;
+	bool has_tpu_dm_global_registers;
+	bool has_tpu_filtering_mode_control;
+	bool has_usc_min_output_registers_per_pix;
+	bool has_vdm_drawindirect;
+	bool has_vdm_object_level_lls;
+	bool has_virtual_address_space_bits;
+	bool has_watchdog_timer;
+	bool has_workgroup_protection;
+	bool has_xe_architecture;
+	bool has_xe_memory_hierarchy;
+	bool has_xe_tpu2;
+	bool has_xpu_max_regbanks_addr_width;
+	bool has_xpu_max_slaves;
+	bool has_xpu_register_broadcast;
+	bool has_xt_top_infrastructure;
+	bool has_zls_subtile;
+
+	u64 cdm_control_stream_format;
+	u64 common_store_size_in_dwords;
+	u64 ecc_rams;
+	u64 fbc_max_default_descriptors;
+	u64 fbc_max_large_descriptors;
+	u64 fbcdc;
+	u64 fbcdc_algorithm;
+	u64 fbcdc_architecture;
+	u64 isp_max_tiles_in_flight;
+	u64 isp_samples_per_pixel;
+	u64 layout_mars;
+	u64 max_partitions;
+	u64 meta;
+	u64 meta_coremem_size;
+	u64 num_clusters;
+	u64 num_isp_ipp_pipes;
+	u64 num_osids;
+	u64 num_raster_pipes;
+	u64 phys_bus_width;
+	u64 simple_parameter_format_version;
+	u64 slc_banks;
+	u64 slc_cache_line_size_bits;
+	u64 slc_size_in_kilobytes;
+	u64 tile_size_x;
+	u64 tile_size_y;
+	u64 usc_min_output_registers_per_pix;
+	u64 virtual_address_space_bits;
+	u64 xe_architecture;
+	u64 xpu_max_regbanks_addr_width;
+	u64 xpu_max_slaves;
+	u64 xpu_register_broadcast;
+};
+
+/*
+ * struct pvr_device_quirks - Hardware quirk information
+ */
+struct pvr_device_quirks {
+	bool has_brn44079;
+	bool has_brn47217;
+	bool has_brn48492;
+	bool has_brn48545;
+	bool has_brn49927;
+	bool has_brn50767;
+	bool has_brn51764;
+	bool has_brn62269;
+	bool has_brn63142;
+	bool has_brn63553;
+	bool has_brn66011;
+	bool has_brn71242;
+};
+
+/*
+ * struct pvr_device_enhancements - Hardware enhancement information
+ */
+struct pvr_device_enhancements {
+	bool has_ern35421;
+	bool has_ern38020;
+	bool has_ern38748;
+	bool has_ern42064;
+	bool has_ern42290;
+	bool has_ern42606;
+	bool has_ern47025;
+	bool has_ern57596;
+};
+
+void pvr_device_info_set_quirks(struct pvr_device *pvr_dev, const u64 *bitmask,
+				u32 bitmask_len);
+void pvr_device_info_set_enhancements(struct pvr_device *pvr_dev, const u64 *bitmask,
+				      u32 bitmask_len);
+int pvr_device_info_set_features(struct pvr_device *pvr_dev, const u64 *features, u32 features_size,
+				 u32 feature_param_size);
+
+/*
+ * Meta cores
+ *
+ * These are the values for the 'meta' feature when the feature is present
+ * (as per &struct pvr_device_features)/
+ */
+#define PVR_META_MTP218 (1)
+#define PVR_META_MTP219 (2)
+#define PVR_META_LTP218 (3)
+#define PVR_META_LTP217 (4)
+
+enum {
+	PVR_FEATURE_CDM_USER_MODE_QUEUE,
+	PVR_FEATURE_CLUSTER_GROUPING,
+	PVR_FEATURE_COMPUTE_MORTON_CAPABLE,
+	PVR_FEATURE_FB_CDC_V4,
+	PVR_FEATURE_GPU_MULTICORE_SUPPORT,
+	PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE,
+	PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP,
+	PVR_FEATURE_S7_TOP_INFRASTRUCTURE,
+	PVR_FEATURE_TESSELLATION,
+	PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS,
+	PVR_FEATURE_VDM_DRAWINDIRECT,
+	PVR_FEATURE_VDM_OBJECT_LEVEL_LLS,
+	PVR_FEATURE_ZLS_SUBTILE,
+};
+
+#endif /* PVR_DEVICE_INFO_H */
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index 597188b31f965..1e4408f33e94e 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -3,6 +3,9 @@
 
 #include "pvr_device.h"
 #include "pvr_drv.h"
+#include "pvr_rogue_defs.h"
+#include "pvr_rogue_fwif_client.h"
+#include "pvr_rogue_fwif_shared.h"
 
 #include <uapi/drm/pvr_drm.h>
 
@@ -87,6 +90,382 @@ pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args,
 	return -ENOTTY;
 }
 
+static __always_inline u64
+pvr_fw_version_packed(u32 major, u32 minor)
+{
+	return ((u64)major << 32) | minor;
+}
+
+static u32
+rogue_get_common_store_partition_space_size(struct pvr_device *pvr_dev)
+{
+	u32 max_partitions = 0;
+	u32 tile_size_x = 0;
+	u32 tile_size_y = 0;
+
+	PVR_FEATURE_VALUE(pvr_dev, tile_size_x, &tile_size_x);
+	PVR_FEATURE_VALUE(pvr_dev, tile_size_y, &tile_size_y);
+	PVR_FEATURE_VALUE(pvr_dev, max_partitions, &max_partitions);
+
+	if (tile_size_x == 16 && tile_size_y == 16) {
+		u32 usc_min_output_registers_per_pix = 0;
+
+		PVR_FEATURE_VALUE(pvr_dev, usc_min_output_registers_per_pix,
+				  &usc_min_output_registers_per_pix);
+
+		return tile_size_x * tile_size_y * max_partitions *
+		       usc_min_output_registers_per_pix;
+	}
+
+	return max_partitions * 1024;
+}
+
+static u32
+rogue_get_common_store_alloc_region_size(struct pvr_device *pvr_dev)
+{
+	u32 common_store_size_in_dwords = 512 * 4 * 4;
+	u32 alloc_region_size;
+
+	PVR_FEATURE_VALUE(pvr_dev, common_store_size_in_dwords, &common_store_size_in_dwords);
+
+	alloc_region_size = common_store_size_in_dwords - (256U * 4U) -
+			    rogue_get_common_store_partition_space_size(pvr_dev);
+
+	if (PVR_HAS_QUIRK(pvr_dev, 44079)) {
+		u32 common_store_split_point = (768U * 4U * 4U);
+
+		return min(common_store_split_point - (256U * 4U), alloc_region_size);
+	}
+
+	return alloc_region_size;
+}
+
+static inline u32
+rogue_get_num_phantoms(struct pvr_device *pvr_dev)
+{
+	u32 num_clusters = 1;
+
+	PVR_FEATURE_VALUE(pvr_dev, num_clusters, &num_clusters);
+
+	return ROGUE_REQ_NUM_PHANTOMS(num_clusters);
+}
+
+static inline u32
+rogue_get_max_coeffs(struct pvr_device *pvr_dev)
+{
+	u32 max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
+	u32 pending_allocation_shared_regs = 2U * 1024U;
+	u32 pending_allocation_coeff_regs = 0U;
+	u32 num_phantoms = rogue_get_num_phantoms(pvr_dev);
+	u32 tiles_in_flight = 0;
+	u32 max_coeff_pixel_portion;
+
+	PVR_FEATURE_VALUE(pvr_dev, isp_max_tiles_in_flight, &tiles_in_flight);
+	max_coeff_pixel_portion = DIV_ROUND_UP(tiles_in_flight, num_phantoms);
+	max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
+
+	/*
+	 * Compute tasks on cores with BRN48492 and without compute overlap may lock
+	 * up without two additional lines of coeffs.
+	 */
+	if (PVR_HAS_QUIRK(pvr_dev, 48492) && !PVR_HAS_FEATURE(pvr_dev, compute_overlap))
+		pending_allocation_coeff_regs = 2U * 1024U;
+
+	if (PVR_HAS_ENHANCEMENT(pvr_dev, 38748))
+		pending_allocation_shared_regs = 0;
+
+	if (PVR_HAS_ENHANCEMENT(pvr_dev, 38020))
+		max_coeff_additional_portion += ROGUE_MAX_COMPUTE_SHARED_REGISTERS;
+
+	return rogue_get_common_store_alloc_region_size(pvr_dev) + pending_allocation_coeff_regs -
+		(max_coeff_pixel_portion + max_coeff_additional_portion +
+		 pending_allocation_shared_regs);
+}
+
+static inline u32
+rogue_get_cdm_max_local_mem_size_regs(struct pvr_device *pvr_dev)
+{
+	u32 available_coeffs_in_dwords = rogue_get_max_coeffs(pvr_dev);
+
+	if (PVR_HAS_QUIRK(pvr_dev, 48492) && PVR_HAS_FEATURE(pvr_dev, roguexe) &&
+	    !PVR_HAS_FEATURE(pvr_dev, compute_overlap)) {
+		/* Driver must not use the 2 reserved lines. */
+		available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
+	}
+
+	/*
+	 * The maximum amount of local memory available to a kernel is the minimum
+	 * of the total number of coefficient registers available and the max common
+	 * store allocation size which can be made by the CDM.
+	 *
+	 * If any coeff lines are reserved for tessellation or pixel then we need to
+	 * subtract those too.
+	 */
+	return min(available_coeffs_in_dwords, (u32)ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
+}
+
+/**
+ * pvr_dev_query_gpu_info_get()
+ * @pvr_dev: Device pointer.
+ * @args: [IN] Device query arguments containing a pointer to a userspace
+ *        struct drm_pvr_dev_query_gpu_info.
+ *
+ * If the query object pointer is NULL, the size field is updated with the
+ * expected size of the query object.
+ *
+ * Returns:
+ *  * 0 on success, or if size is requested using a NULL pointer, or
+ *  * -%E2BIG if the indicated length of the allocation is less than is
+ *    required to contain the copied data, or
+ *  * -%EFAULT if local memory could not be copied to userspace.
+ */
+static int
+pvr_dev_query_gpu_info_get(struct pvr_device *pvr_dev,
+			   struct drm_pvr_ioctl_dev_query_args *args)
+{
+	struct drm_pvr_dev_query_gpu_info gpu_info = {0};
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_gpu_info);
+		return 0;
+	}
+
+	gpu_info.gpu_id =
+		pvr_gpu_id_to_packed_bvnc(&pvr_dev->gpu_id);
+	gpu_info.num_phantoms = rogue_get_num_phantoms(pvr_dev);
+
+	err = PVR_UOBJ_SET(args->pointer, args->size, gpu_info);
+	if (err < 0)
+		return err;
+
+	if (args->size > sizeof(gpu_info))
+		args->size = sizeof(gpu_info);
+	return 0;
+}
+
+/**
+ * pvr_dev_query_runtime_info_get()
+ * @pvr_dev: Device pointer.
+ * @args: [IN] Device query arguments containing a pointer to a userspace
+ *        struct drm_pvr_dev_query_runtime_info.
+ *
+ * If the query object pointer is NULL, the size field is updated with the
+ * expected size of the query object.
+ *
+ * Returns:
+ *  * 0 on success, or if size is requested using a NULL pointer, or
+ *  * -%E2BIG if the indicated length of the allocation is less than is
+ *    required to contain the copied data, or
+ *  * -%EFAULT if local memory could not be copied to userspace.
+ */
+static int
+pvr_dev_query_runtime_info_get(struct pvr_device *pvr_dev,
+			       struct drm_pvr_ioctl_dev_query_args *args)
+{
+	struct drm_pvr_dev_query_runtime_info runtime_info = {0};
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_runtime_info);
+		return 0;
+	}
+
+	runtime_info.free_list_min_pages = 0; /* FIXME */
+	runtime_info.free_list_max_pages =
+		ROGUE_PM_MAX_FREELIST_SIZE / ROGUE_PM_PAGE_SIZE;
+	runtime_info.common_store_alloc_region_size =
+		rogue_get_common_store_alloc_region_size(pvr_dev);
+	runtime_info.common_store_partition_space_size =
+		rogue_get_common_store_partition_space_size(pvr_dev);
+	runtime_info.max_coeffs = rogue_get_max_coeffs(pvr_dev);
+	runtime_info.cdm_max_local_mem_size_regs =
+		rogue_get_cdm_max_local_mem_size_regs(pvr_dev);
+
+	err = PVR_UOBJ_SET(args->pointer, args->size, runtime_info);
+	if (err < 0)
+		return err;
+
+	if (args->size > sizeof(runtime_info))
+		args->size = sizeof(runtime_info);
+	return 0;
+}
+
+/**
+ * pvr_dev_query_quirks_get() - Unpack array of quirks at the address given
+ * in a struct drm_pvr_dev_query_quirks, or gets the amount of space required
+ * for it.
+ * @pvr_dev: Device pointer.
+ * @args: [IN] Device query arguments containing a pointer to a userspace
+ *        struct drm_pvr_dev_query_query_quirks.
+ *
+ * If the query object pointer is NULL, the size field is updated with the
+ * expected size of the query object.
+ * If the userspace pointer in the query object is NULL, or the count is
+ * short, no data is copied.
+ * The count field will be updated to that copied, or if either pointer is
+ * NULL, that which would have been copied.
+ * The size field in the query object will be updated to the size copied.
+ *
+ * Returns:
+ *  * 0 on success, or if size/count is requested using a NULL pointer, or
+ *  * -%EINVAL if args contained non-zero reserved fields, or
+ *  * -%E2BIG if the indicated length of the allocation is less than is
+ *    required to contain the copied data, or
+ *  * -%EFAULT if local memory could not be copied to userspace.
+ */
+static int
+pvr_dev_query_quirks_get(struct pvr_device *pvr_dev,
+			 struct drm_pvr_ioctl_dev_query_args *args)
+{
+	/*
+	 * @FIXME - hardcoding of numbers here is intended as an
+	 * intermediate step so the UAPI can be fixed, but requires a
+	 * a refactor in the future to store them in a more appropriate
+	 * location
+	 */
+	static const u32 umd_quirks_musthave[] = {
+		47217,
+		49927,
+		62269,
+	};
+	static const u32 umd_quirks[] = {
+		48545,
+		51764,
+	};
+	struct drm_pvr_dev_query_quirks query;
+	u32 out[ARRAY_SIZE(umd_quirks_musthave) + ARRAY_SIZE(umd_quirks)];
+	size_t out_musthave_count = 0;
+	size_t out_count = 0;
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_quirks);
+		return 0;
+	}
+
+	err = PVR_UOBJ_GET(query, args->size, args->pointer);
+
+	if (err < 0)
+		return err;
+	if (query._padding_c)
+		return -EINVAL;
+
+	for (int i = 0; i < ARRAY_SIZE(umd_quirks_musthave); i++) {
+		if (pvr_device_has_uapi_quirk(pvr_dev, umd_quirks_musthave[i])) {
+			out[out_count++] = umd_quirks_musthave[i];
+			out_musthave_count++;
+		}
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(umd_quirks); i++) {
+		if (pvr_device_has_uapi_quirk(pvr_dev, umd_quirks[i]))
+			out[out_count++] = umd_quirks[i];
+	}
+
+	if (!query.quirks)
+		goto copy_out;
+	if (query.count < out_count)
+		return -E2BIG;
+
+	if (copy_to_user(u64_to_user_ptr(query.quirks), out,
+			 out_count * sizeof(u32))) {
+		return -EFAULT;
+	}
+
+	query.musthave_count = out_musthave_count;
+
+copy_out:
+	query.count = out_count;
+	err = PVR_UOBJ_SET(args->pointer, args->size, query);
+	if (err < 0)
+		return err;
+
+	args->size = sizeof(query);
+	return 0;
+}
+
+/**
+ * pvr_dev_query_enhancements_get() - Unpack array of enhancements at the
+ * address given in a struct drm_pvr_dev_query_enhancements, or gets the amount
+ * of space required for it.
+ * @pvr_dev: Device pointer.
+ * @args: [IN] Device query arguments containing a pointer to a userspace
+ *        struct drm_pvr_dev_query_enhancements.
+ *
+ * If the query object pointer is NULL, the size field is updated with the
+ * expected size of the query object.
+ * If the userspace pointer in the query object is NULL, or the count is
+ * short, no data is copied.
+ * The count field will be updated to that copied, or if either pointer is
+ * NULL, that which would have been copied.
+ * The size field in the query object will be updated to the size copied.
+ *
+ * Returns:
+ *  * 0 on success, or if size/count is requested using a NULL pointer, or
+ *  * -%EINVAL if args contained non-zero reserved fields, or
+ *  * -%E2BIG if the indicated length of the allocation is less than is
+ *    required to contain the copied data, or
+ *  * -%EFAULT if local memory could not be copied to userspace.
+ */
+static int
+pvr_dev_query_enhancements_get(struct pvr_device *pvr_dev,
+			       struct drm_pvr_ioctl_dev_query_args *args)
+{
+	/*
+	 * @FIXME - hardcoding of numbers here is intended as an
+	 * intermediate step so the UAPI can be fixed, but requires a
+	 * a refactor in the future to store them in a more appropriate
+	 * location
+	 */
+	const u32 umd_enhancements[] = {
+		35421,
+		42064,
+	};
+	struct drm_pvr_dev_query_enhancements query;
+	u32 out[ARRAY_SIZE(umd_enhancements)];
+	size_t out_idx = 0;
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_enhancements);
+		return 0;
+	}
+
+	err = PVR_UOBJ_GET(query, args->size, args->pointer);
+
+	if (err < 0)
+		return err;
+	if (query._padding_a)
+		return -EINVAL;
+	if (query._padding_c)
+		return -EINVAL;
+
+	for (int i = 0; i < ARRAY_SIZE(umd_enhancements); i++) {
+		if (pvr_device_has_uapi_enhancement(pvr_dev, umd_enhancements[i]))
+			out[out_idx++] = umd_enhancements[i];
+	}
+
+	if (!query.enhancements)
+		goto copy_out;
+	if (query.count < out_idx)
+		return -E2BIG;
+
+	if (copy_to_user(u64_to_user_ptr(query.enhancements), out,
+			 out_idx * sizeof(u32))) {
+		return -EFAULT;
+	}
+
+copy_out:
+	query.count = out_idx;
+	err = PVR_UOBJ_SET(args->pointer, args->size, query);
+	if (err < 0)
+		return err;
+
+	args->size = sizeof(query);
+	return 0;
+}
+
 /**
  * pvr_ioctl_dev_query() - IOCTL to copy information about a device
  * @drm_dev: [IN] DRM device.
@@ -111,7 +490,41 @@ static int
 pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args,
 		    struct drm_file *file)
 {
-	return -ENOTTY;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct drm_pvr_ioctl_dev_query_args *args = raw_args;
+	int idx;
+	int ret = -EINVAL;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	switch ((enum drm_pvr_dev_query)args->type) {
+	case DRM_PVR_DEV_QUERY_GPU_INFO_GET:
+		ret = pvr_dev_query_gpu_info_get(pvr_dev, args);
+		break;
+
+	case DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET:
+		ret = pvr_dev_query_runtime_info_get(pvr_dev, args);
+		break;
+
+	case DRM_PVR_DEV_QUERY_QUIRKS_GET:
+		ret = pvr_dev_query_quirks_get(pvr_dev, args);
+		break;
+
+	case DRM_PVR_DEV_QUERY_ENHANCEMENTS_GET:
+		ret = pvr_dev_query_enhancements_get(pvr_dev, args);
+		break;
+
+	case DRM_PVR_DEV_QUERY_HEAP_INFO_GET:
+		return -EINVAL;
+
+	case DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET:
+		return -EINVAL;
+	}
+
+	drm_dev_exit(idx);
+
+	return ret;
 }
 
 /**
@@ -349,6 +762,112 @@ pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args,
 	return -ENOTTY;
 }
 
+int
+pvr_get_uobj(u64 usr_ptr, u32 usr_stride, u32 min_stride, u32 obj_size, void *out)
+{
+	if (usr_stride < min_stride)
+		return -EINVAL;
+
+	return copy_struct_from_user(out, obj_size, u64_to_user_ptr(usr_ptr), usr_stride);
+}
+
+int
+pvr_set_uobj(u64 usr_ptr, u32 usr_stride, u32 min_stride, u32 obj_size, const void *in)
+{
+	if (usr_stride < min_stride)
+		return -EINVAL;
+
+	if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_stride, obj_size)))
+		return -EFAULT;
+
+	if (usr_stride > obj_size &&
+	    clear_user(u64_to_user_ptr(usr_ptr + obj_size), usr_stride - obj_size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int
+pvr_get_uobj_array(const struct drm_pvr_obj_array *in, u32 min_stride, u32 obj_size, void **out)
+{
+	int ret = 0;
+	void *out_alloc;
+
+	if (in->stride < min_stride)
+		return -EINVAL;
+
+	if (!in->count)
+		return 0;
+
+	out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
+	if (!out_alloc)
+		return -ENOMEM;
+
+	if (obj_size == in->stride) {
+		if (copy_from_user(out_alloc, u64_to_user_ptr(in->array),
+				   (unsigned long)obj_size * in->count))
+			ret = -EFAULT;
+	} else {
+		void __user *in_ptr = u64_to_user_ptr(in->array);
+		void *out_ptr = out_alloc;
+
+		for (u32 i = 0; i < in->count; i++) {
+			ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride);
+			if (ret)
+				break;
+
+			out_ptr += obj_size;
+			in_ptr += in->stride;
+		}
+	}
+
+	if (ret) {
+		kvfree(out_alloc);
+		return ret;
+	}
+
+	*out = out_alloc;
+	return 0;
+}
+
+int
+pvr_set_uobj_array(const struct drm_pvr_obj_array *out, u32 min_stride, u32 obj_size,
+		   const void *in)
+{
+	if (out->stride < min_stride)
+		return -EINVAL;
+
+	if (!out->count)
+		return 0;
+
+	if (obj_size == out->stride) {
+		if (copy_to_user(u64_to_user_ptr(out->array), in,
+				 (unsigned long)obj_size * out->count))
+			return -EFAULT;
+	} else {
+		u32 cpy_elem_size = min_t(u32, out->stride, obj_size);
+		void __user *out_ptr = u64_to_user_ptr(out->array);
+		const void *in_ptr = in;
+
+		for (u32 i = 0; i < out->count; i++) {
+			if (copy_to_user(out_ptr, in_ptr, cpy_elem_size))
+				return -EFAULT;
+
+			out_ptr += obj_size;
+			in_ptr += out->stride;
+		}
+
+		if (out->stride > obj_size &&
+		    clear_user(u64_to_user_ptr(out->array + obj_size),
+			       out->stride - obj_size)) {
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
 #define DRM_PVR_IOCTL(_name, _func, _flags) \
 	DRM_IOCTL_DEF_DRV(PVR_##_name, pvr_ioctl_##_func, _flags)
 
diff --git a/drivers/gpu/drm/imagination/pvr_drv.h b/drivers/gpu/drm/imagination/pvr_drv.h
index c29d11af88fe9..378fe477b7596 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.h
+++ b/drivers/gpu/drm/imagination/pvr_drv.h
@@ -19,4 +19,111 @@
 #define PVR_DRIVER_MINOR 0
 #define PVR_DRIVER_PATCHLEVEL 0
 
+int pvr_get_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 obj_size, void *out);
+int pvr_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 obj_size, const void *in);
+int pvr_get_uobj_array(const struct drm_pvr_obj_array *in, u32 min_stride, u32 obj_size,
+		       void **out);
+int pvr_set_uobj_array(const struct drm_pvr_obj_array *out, u32 min_stride, u32 obj_size,
+		       const void *in);
+
+#define PVR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \
+	(offsetof(_typename, _last_mandatory_field) + \
+	 sizeof(((_typename *)NULL)->_last_mandatory_field))
+
+/* NOLINTBEGIN(bugprone-macro-parentheses) */
+#define PVR_UOBJ_DECL(_typename, _last_mandatory_field) \
+	, _typename : PVR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field)
+/* NOLINTEND(bugprone-macro-parentheses) */
+
+/**
+ * DOC: PVR user objects.
+ *
+ * Macros used to aid copying structured and array data to and from
+ * userspace. Objects can differ in size, provided the minimum size
+ * allowed is specified (using the last mandatory field in the struct).
+ * All types used with PVR_UOBJ_GET/SET macros must be listed here under
+ * PVR_UOBJ_MIN_SIZE, with the last mandatory struct field specified.
+ */
+
+/**
+ * PVR_UOBJ_MIN_SIZE() - Fetch the minimum copy size of a compatible type object.
+ * @_obj_name: The name of the object. Cannot be a typename - this is deduced.
+ *
+ * This cannot fail. Using the macro with an incompatible type will result in a
+ * compiler error.
+ *
+ * To add compatibility for a type, list it within the macro in an orderly
+ * fashion. The second argument is the name of the last mandatory field of the
+ * struct type, which is used to calculate the size. See also PVR_UOBJ_DECL().
+ *
+ * Return: The minimum copy size.
+ */
+#define PVR_UOBJ_MIN_SIZE(_obj_name) _Generic(_obj_name \
+	PVR_UOBJ_DECL(struct drm_pvr_job, hwrt) \
+	PVR_UOBJ_DECL(struct drm_pvr_sync_op, value) \
+	PVR_UOBJ_DECL(struct drm_pvr_dev_query_gpu_info, num_phantoms) \
+	PVR_UOBJ_DECL(struct drm_pvr_dev_query_runtime_info, cdm_max_local_mem_size_regs) \
+	PVR_UOBJ_DECL(struct drm_pvr_dev_query_quirks, _padding_c) \
+	PVR_UOBJ_DECL(struct drm_pvr_dev_query_enhancements, _padding_c) \
+	PVR_UOBJ_DECL(struct drm_pvr_heap, page_size_log2) \
+	PVR_UOBJ_DECL(struct drm_pvr_dev_query_heap_info, heaps) \
+	PVR_UOBJ_DECL(struct drm_pvr_static_data_area, offset) \
+	PVR_UOBJ_DECL(struct drm_pvr_dev_query_static_data_areas, static_data_areas) \
+	)
+
+/**
+ * PVR_UOBJ_GET() - Copies from _src_usr_ptr to &_dest_obj.
+ * @_dest_obj: The destination container object in kernel space.
+ * @_usr_size: The size of the source container in user space.
+ * @_src_usr_ptr: __u64 raw pointer to the source container in user space.
+ *
+ * Return: Error code. See pvr_get_uobj().
+ */
+#define PVR_UOBJ_GET(_dest_obj, _usr_size, _src_usr_ptr) \
+	pvr_get_uobj(_src_usr_ptr, _usr_size, \
+		     PVR_UOBJ_MIN_SIZE(_dest_obj), \
+		     sizeof(_dest_obj), &(_dest_obj))
+
+/**
+ * PVR_UOBJ_SET() - Copies from &_src_obj to _dest_usr_ptr.
+ * @_dest_usr_ptr: __u64 raw pointer to the destination container in user space.
+ * @_usr_size: The size of the destination container in user space.
+ * @_src_obj: The source container object in kernel space.
+ *
+ * Return: Error code. See pvr_set_uobj().
+ */
+#define PVR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \
+	pvr_set_uobj(_dest_usr_ptr, _usr_size, \
+		     PVR_UOBJ_MIN_SIZE(_src_obj), \
+		     sizeof(_src_obj), &(_src_obj))
+
+/**
+ * PVR_UOBJ_GET_ARRAY() - Copies from @_src_drm_pvr_obj_array.array to
+ * alloced memory and returns a pointer in _dest_array.
+ * @_dest_array: The destination C array object in kernel space.
+ * @_src_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw
+ * pointer to the source C array in user space and the size of each array
+ * element in user space (the 'stride').
+ *
+ * Return: Error code. See pvr_get_uobj_array().
+ */
+#define PVR_UOBJ_GET_ARRAY(_dest_array, _src_drm_pvr_obj_array) \
+	pvr_get_uobj_array(_src_drm_pvr_obj_array, \
+			   PVR_UOBJ_MIN_SIZE((_dest_array)[0]), \
+			   sizeof((_dest_array)[0]), (void **)&(_dest_array))
+
+/**
+ * PVR_UOBJ_SET_ARRAY() - Copies from _src_array to @_dest_drm_pvr_obj_array.array.
+ * @_dest_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw
+ * pointer to the destination C array in user space and the size of each array
+ * element in user space (the 'stride').
+ * @_src_array: The source C array object in kernel space.
+ *
+ * Return: Error code. See pvr_set_uobj_array().
+ */
+#define PVR_UOBJ_SET_ARRAY(_dest_drm_pvr_obj_array, _src_array) \
+	pvr_set_uobj_array(_dest_drm_pvr_obj_array, \
+			   PVR_UOBJ_MIN_SIZE((_src_array)[0]), \
+			   sizeof((_src_array)[0]), _src_array)
+
 #endif /* PVR_DRV_H */
diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c
new file mode 100644
index 0000000000000..a9377c735c9cd
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_device_info.h"
+#include "pvr_fw.h"
+
+#include <drm/drm_drv.h>
+#include <linux/firmware.h>
+#include <linux/sizes.h>
+
+#define FW_MAX_SUPPORTED_MAJOR_VERSION 1
+
+/**
+ * pvr_fw_validate() - Parse firmware header and check compatibility
+ * @pvr_dev: Device pointer.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -EINVAL if firmware is incompatible.
+ */
+static int
+pvr_fw_validate(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	const struct firmware *firmware = pvr_dev->fw_dev.firmware;
+	const struct pvr_fw_layout_entry *layout_entries;
+	const struct pvr_fw_info_header *header;
+	const u8 *fw = firmware->data;
+	u32 fw_offset = firmware->size - SZ_4K;
+	u32 layout_table_size;
+	u32 entry;
+
+	if (firmware->size < SZ_4K || (firmware->size % FW_BLOCK_SIZE))
+		return -EINVAL;
+
+	header = (const struct pvr_fw_info_header *)&fw[fw_offset];
+
+	if (header->info_version != PVR_FW_INFO_VERSION) {
+		drm_err(drm_dev, "Unsupported fw info version %u\n",
+			header->info_version);
+		return -EINVAL;
+	}
+
+	if (header->header_len != sizeof(struct pvr_fw_info_header) ||
+	    header->layout_entry_size != sizeof(struct pvr_fw_layout_entry) ||
+	    header->layout_entry_num > PVR_FW_INFO_MAX_NUM_ENTRIES) {
+		drm_err(drm_dev, "FW info format mismatch\n");
+		return -EINVAL;
+	}
+
+	if (!(header->flags & PVR_FW_FLAGS_OPEN_SOURCE) ||
+	    header->fw_version_major > FW_MAX_SUPPORTED_MAJOR_VERSION ||
+	    header->fw_version_major == 0) {
+		drm_err(drm_dev, "Unsupported FW version %u.%u (build: %u%s)\n",
+			header->fw_version_major, header->fw_version_minor,
+			header->fw_version_build,
+			(header->flags & PVR_FW_FLAGS_OPEN_SOURCE) ? " OS" : "");
+		return -EINVAL;
+	}
+
+	if (pvr_gpu_id_to_packed_bvnc(&pvr_dev->gpu_id) != header->bvnc) {
+		struct pvr_gpu_id fw_gpu_id;
+
+		packed_bvnc_to_pvr_gpu_id(header->bvnc, &fw_gpu_id);
+		drm_err(drm_dev, "FW built for incorrect GPU ID %i.%i.%i.%i (expected %i.%i.%i.%i)\n",
+			fw_gpu_id.b, fw_gpu_id.v, fw_gpu_id.n, fw_gpu_id.c,
+			pvr_dev->gpu_id.b, pvr_dev->gpu_id.v, pvr_dev->gpu_id.n, pvr_dev->gpu_id.c);
+		return -EINVAL;
+	}
+
+	fw_offset += header->header_len;
+	layout_table_size =
+		header->layout_entry_size * header->layout_entry_num;
+	if ((fw_offset + layout_table_size) > firmware->size)
+		return -EINVAL;
+
+	layout_entries = (const struct pvr_fw_layout_entry *)&fw[fw_offset];
+	for (entry = 0; entry < header->layout_entry_num; entry++) {
+		u32 start_addr = layout_entries[entry].base_addr;
+		u32 end_addr = start_addr + layout_entries[entry].alloc_size;
+
+		if (start_addr >= end_addr)
+			return -EINVAL;
+	}
+
+	fw_offset = (firmware->size - SZ_4K) - header->device_info_size;
+
+	drm_info(drm_dev, "FW version v%u.%u (build %u OS)\n", header->fw_version_major,
+		 header->fw_version_minor, header->fw_version_build);
+
+	pvr_dev->fw_version.major = header->fw_version_major;
+	pvr_dev->fw_version.minor = header->fw_version_minor;
+
+	pvr_dev->fw_dev.header = header;
+	pvr_dev->fw_dev.layout_entries = layout_entries;
+
+	return 0;
+}
+
+static int
+pvr_fw_get_device_info(struct pvr_device *pvr_dev)
+{
+	const struct firmware *firmware = pvr_dev->fw_dev.firmware;
+	struct pvr_fw_device_info_header *header;
+	const u8 *fw = firmware->data;
+	const u64 *dev_info;
+	u32 fw_offset;
+
+	fw_offset = (firmware->size - SZ_4K) - pvr_dev->fw_dev.header->device_info_size;
+
+	header = (struct pvr_fw_device_info_header *)&fw[fw_offset];
+	dev_info = (u64 *)(header + 1);
+
+	pvr_device_info_set_quirks(pvr_dev, dev_info, header->brn_mask_size);
+	dev_info += header->brn_mask_size;
+
+	pvr_device_info_set_enhancements(pvr_dev, dev_info, header->ern_mask_size);
+	dev_info += header->ern_mask_size;
+
+	return pvr_device_info_set_features(pvr_dev, dev_info, header->feature_mask_size,
+					    header->feature_param_size);
+}
+
+/**
+ * pvr_fw_validate_init_device_info() - Validate firmware and initialise device information
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function must be called before querying device information.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if firmware validation fails.
+ */
+int
+pvr_fw_validate_init_device_info(struct pvr_device *pvr_dev)
+{
+	int err;
+
+	err = pvr_fw_validate(pvr_dev);
+	if (err)
+		return err;
+
+	return pvr_fw_get_device_info(pvr_dev);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_fw.h b/drivers/gpu/drm/imagination/pvr_fw.h
new file mode 100644
index 0000000000000..8331344e83ea6
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FW_H
+#define PVR_FW_H
+
+#include "pvr_fw_info.h"
+
+#include <linux/types.h>
+
+/* Forward declarations from "pvr_device.h". */
+struct pvr_device;
+struct pvr_file;
+
+struct pvr_fw_device {
+	/** @firmware: Handle to the firmware loaded into the device. */
+	const struct firmware *firmware;
+
+	/** @header: Pointer to firmware header. */
+	const struct pvr_fw_info_header *header;
+
+	/** @layout_entries: Pointer to firmware layout. */
+	const struct pvr_fw_layout_entry *layout_entries;
+
+	/**
+	 * @processor_type: FW processor type for this device. Must be one of
+	 *                  %PVR_FW_PROCESSOR_TYPE_*.
+	 */
+	u16 processor_type;
+};
+
+int pvr_fw_validate_init_device_info(struct pvr_device *pvr_dev);
+
+#endif /* PVR_FW_H */
diff --git a/drivers/gpu/drm/imagination/pvr_fw_info.h b/drivers/gpu/drm/imagination/pvr_fw_info.h
new file mode 100644
index 0000000000000..ad5d44a3067ac
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_info.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FW_INFO_H
+#define PVR_FW_INFO_H
+
+#include <linux/bits.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/*
+ * Firmware binary block unit in bytes.
+ * Raw data stored in FW binary will be aligned to this size.
+ */
+#define FW_BLOCK_SIZE SZ_4K
+
+/* Maximum number of entries in firmware layout table. */
+#define PVR_FW_INFO_MAX_NUM_ENTRIES 8
+
+enum pvr_fw_section_id {
+	META_CODE = 0,
+	META_PRIVATE_DATA,
+	META_COREMEM_CODE,
+	META_COREMEM_DATA,
+	MIPS_CODE,
+	MIPS_EXCEPTIONS_CODE,
+	MIPS_BOOT_CODE,
+	MIPS_PRIVATE_DATA,
+	MIPS_BOOT_DATA,
+	MIPS_STACK,
+	RISCV_UNCACHED_CODE,
+	RISCV_CACHED_CODE,
+	RISCV_PRIVATE_DATA,
+	RISCV_COREMEM_CODE,
+	RISCV_COREMEM_DATA,
+};
+
+enum pvr_fw_section_type {
+	NONE = 0,
+	FW_CODE,
+	FW_DATA,
+	FW_COREMEM_CODE,
+	FW_COREMEM_DATA,
+};
+
+/*
+ * FW binary format with FW info attached:
+ *
+ *          Contents        Offset
+ *     +-----------------+
+ *     |                 |    0
+ *     |                 |
+ *     | Original binary |
+ *     |      file       |
+ *     |   (.ldr/.elf)   |
+ *     |                 |
+ *     |                 |
+ *     +-----------------+
+ *     |   Device info   |  FILE_SIZE - 4K - device_info_size
+ *     +-----------------+
+ *     | FW info header  |  FILE_SIZE - 4K
+ *     +-----------------+
+ *     |                 |
+ *     | FW layout table |
+ *     |                 |
+ *     +-----------------+
+ *                          FILE_SIZE
+ */
+
+#define PVR_FW_INFO_VERSION 3
+
+#define PVR_FW_FLAGS_OPEN_SOURCE BIT(0)
+
+/** struct pvr_fw_info_header - Firmware header */
+struct pvr_fw_info_header {
+	/** @info_version: FW info header version. */
+	u32 info_version;
+	/** @header_len: Header length. */
+	u32 header_len;
+	/** @layout_entry_num: Number of entries in the layout table. */
+	u32 layout_entry_num;
+	/** @layout_entry_size: Size of an entry in the layout table. */
+	u32 layout_entry_size;
+	/** @bvnc: GPU ID supported by firmware. */
+	aligned_u64 bvnc;
+	/** @fw_page_size: Page size of processor on which firmware executes. */
+	u32 fw_page_size;
+	/** @flags: Compatibility flags. */
+	u32 flags;
+	/** @fw_version_major: Firmware major version number. */
+	u16 fw_version_major;
+	/** @fw_version_minor: Firmware minor version number. */
+	u16 fw_version_minor;
+	/** @fw_version_build: Firmware build number. */
+	u32 fw_version_build;
+	/** @device_info_size: Size of device info structure. */
+	u32 device_info_size;
+	/** @padding: Padding. */
+	u32 padding;
+};
+
+/**
+ * struct pvr_fw_layout_entry - Entry in firmware layout table, describing a
+ *                              section of the firmware image
+ */
+struct pvr_fw_layout_entry {
+	/** @id: Section ID. */
+	enum pvr_fw_section_id id;
+	/** @type: Section type. */
+	enum pvr_fw_section_type type;
+	/** @base_addr: Base address of section in FW address space. */
+	u32 base_addr;
+	/** @max_size: Maximum size of section, in bytes. */
+	u32 max_size;
+	/** @alloc_size: Allocation size of section, in bytes. */
+	u32 alloc_size;
+	/** @alloc_offset: Allocation offset of section. */
+	u32 alloc_offset;
+};
+
+/**
+ * struct pvr_fw_device_info_header - Device information header.
+ */
+struct pvr_fw_device_info_header {
+	/* BRN Mask size (in u64s). */
+	u64 brn_mask_size;
+	/* ERN Mask size (in u64s). */
+	u64 ern_mask_size;
+	/* Feature Mask size (in u64s). */
+	u64 feature_mask_size;
+	/* Feature Parameter size (in u64s). */
+	u64 feature_param_size;
+};
+
+#endif /* PVR_FW_INFO_H */
-- 
GitLab


From ff5f643de0bf27874c4033cd57a0bd034b5c7d11 Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:32 +0000
Subject: [PATCH 0407/2340] drm/imagination: Add GEM and VM related code

Add a GEM implementation based on drm_gem_shmem, and support code for the
PowerVR GPU MMU. The GPU VA manager is used for address space management.

Changes since v8:
- Updated for changes to drm_gpuvm
- Switched to dma_resv locking for vm ops
- Removed linked lists for collecting BOs in vm_context and for freeing
  after ops. This is now done internally in drm_gpuvm
- Corrected license identifiers

Changes since v7:
- kernel-doc fixes
- Remove prefixes from DRM_PVR_BO_* flags
- CREATE_BO ioctl now returns an error if provided size isn't page aligned
- Optimised MMU flushes

Changes since v6:
- Don't initialise kernel_vm_ctx when using MIPS firmware processor
- Rename drm_gpuva_manager uses to drm_gpuvm
- Sync GEM object to device on creation

Changes since v5:
- Use WRITE_ONCE() when writing to page tables
- Add memory barriers to page table insertion
- Fixed double backing page alloc on page table objects
- Fix BO mask checks in DRM_IOCTL_PVR_CREATE_BO handler
- Document use of pvr_page_table_*_idx when preallocing page table objs
- Remove pvr_vm_gpuva_mapping_init()
- Remove NULL check for unmap op in remap function
- Protect gem object with mutex during drm_gpuva_link/unlink
- Defer free or release of page table pages until after TLB flush
- Use drm_gpuva_op_remap_get_unmap_range() helper

Changes since v4:
- Correct sync function in vmap/vunmap function documentation
- Update for upstream GPU VA manager
- Fix missing frees when unmapping drm_gpuva objects
- Always zero GEM BOs on creation

Changes since v3:
- Split MMU and VM code
- Register page table allocations with kmemleak
- Use drm_dev_{enter,exit}

Changes since v2:
- Use GPU VA manager
- Use drm_gem_shmem

Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/3c96dd170efe759b73897e3675d7310a7c4b06d0.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Kconfig      |    1 +
 drivers/gpu/drm/imagination/Makefile     |    5 +-
 drivers/gpu/drm/imagination/pvr_device.c |   27 +-
 drivers/gpu/drm/imagination/pvr_device.h |   24 +
 drivers/gpu/drm/imagination/pvr_drv.c    |  289 ++-
 drivers/gpu/drm/imagination/pvr_gem.c    |  414 ++++
 drivers/gpu/drm/imagination/pvr_gem.h    |  170 ++
 drivers/gpu/drm/imagination/pvr_mmu.c    | 2573 ++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_mmu.h    |  108 +
 drivers/gpu/drm/imagination/pvr_vm.c     | 1091 +++++++++
 drivers/gpu/drm/imagination/pvr_vm.h     |   65 +
 11 files changed, 4756 insertions(+), 11 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_gem.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_gem.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_vm.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_vm.h

diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig
index 03d0a5031f5cc..6f82edf89144c 100644
--- a/drivers/gpu/drm/imagination/Kconfig
+++ b/drivers/gpu/drm/imagination/Kconfig
@@ -7,6 +7,7 @@ config DRM_POWERVR
 	depends on DRM
 	select DRM_GEM_SHMEM_HELPER
 	select DRM_SCHED
+	select DRM_GPUVM
 	select FW_LOADER
 	help
 	  Choose this option if you have a system that has an Imagination
diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 0b863e9f51b86..678c3dbb43260 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -7,6 +7,9 @@ powervr-y := \
 	pvr_device.o \
 	pvr_device_info.o \
 	pvr_drv.o \
-	pvr_fw.o
+	pvr_fw.o \
+	pvr_gem.o \
+	pvr_mmu.o \
+	pvr_vm.o
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 05e382cacb277..201ae780494f8 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -6,6 +6,7 @@
 
 #include "pvr_fw.h"
 #include "pvr_rogue_cr_defs.h"
+#include "pvr_vm.h"
 
 #include <drm/drm_print.h>
 
@@ -312,7 +313,30 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
 	else
 		return -EINVAL;
 
-	return pvr_set_dma_info(pvr_dev);
+	err = pvr_set_dma_info(pvr_dev);
+	if (err)
+		return err;
+
+	if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
+		pvr_dev->kernel_vm_ctx = pvr_vm_create_context(pvr_dev, false);
+		if (IS_ERR(pvr_dev->kernel_vm_ctx))
+			return PTR_ERR(pvr_dev->kernel_vm_ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_device_gpu_fini() - GPU-specific deinitialization for a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ */
+static void
+pvr_device_gpu_fini(struct pvr_device *pvr_dev)
+{
+	if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
+		WARN_ON(!pvr_vm_context_put(pvr_dev->kernel_vm_ctx));
+		pvr_dev->kernel_vm_ctx = NULL;
+	}
 }
 
 /**
@@ -364,6 +388,7 @@ pvr_device_fini(struct pvr_device *pvr_dev)
 	 * Deinitialization stages are performed in reverse order compared to
 	 * the initialization stages in pvr_device_init().
 	 */
+	pvr_device_gpu_fini(pvr_dev);
 }
 
 bool
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 36fe67bed27ff..bfc853ffd58fb 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -123,8 +123,24 @@ struct pvr_device {
 	 */
 	struct clk *mem_clk;
 
+	/**
+	 * @kernel_vm_ctx: Virtual memory context used for kernel mappings.
+	 *
+	 * This is used for mappings in the firmware address region when a META firmware processor
+	 * is in use.
+	 *
+	 * When a MIPS firmware processor is in use, this will be %NULL.
+	 */
+	struct pvr_vm_context *kernel_vm_ctx;
+
 	/** @fw_dev: Firmware related data. */
 	struct pvr_fw_device fw_dev;
+
+	/**
+	 * @mmu_flush_cache_flags: Records which MMU caches require flushing
+	 * before submitting the next job.
+	 */
+	atomic_t mmu_flush_cache_flags;
 };
 
 /**
@@ -145,6 +161,14 @@ struct pvr_file {
 	 *           to_pvr_device().
 	 */
 	struct pvr_device *pvr_dev;
+
+	/**
+	 * @vm_ctx_handles: Array of VM contexts belonging to this file. Array
+	 * members are of type "struct pvr_vm_context *".
+	 *
+	 * This array is used to allocate handles returned to userspace.
+	 */
+	struct xarray vm_ctx_handles;
 };
 
 /**
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index 1e4408f33e94e..6d3bc886e1c3b 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -3,9 +3,12 @@
 
 #include "pvr_device.h"
 #include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_mmu.h"
 #include "pvr_rogue_defs.h"
 #include "pvr_rogue_fwif_client.h"
 #include "pvr_rogue_fwif_shared.h"
+#include "pvr_vm.h"
 
 #include <uapi/drm/pvr_drm.h>
 
@@ -60,7 +63,72 @@ static int
 pvr_ioctl_create_bo(struct drm_device *drm_dev, void *raw_args,
 		    struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_bo_args *args = raw_args;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct pvr_file *pvr_file = to_pvr_file(file);
+
+	struct pvr_gem_object *pvr_obj;
+	size_t sanitized_size;
+
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	/* All padding fields must be zeroed. */
+	if (args->_padding_c != 0) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * On 64-bit platforms (our primary target), size_t is a u64. However,
+	 * on other architectures we have to check for overflow when casting
+	 * down to size_t from u64.
+	 *
+	 * We also disallow zero-sized allocations, and reserved (kernel-only)
+	 * flags.
+	 */
+	if (args->size > SIZE_MAX || args->size == 0 || args->flags &
+	    ~DRM_PVR_BO_FLAGS_MASK || args->size & (PVR_DEVICE_PAGE_SIZE - 1)) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	sanitized_size = (size_t)args->size;
+
+	/*
+	 * Create a buffer object and transfer ownership to a userspace-
+	 * accessible handle.
+	 */
+	pvr_obj = pvr_gem_object_create(pvr_dev, sanitized_size, args->flags);
+	if (IS_ERR(pvr_obj)) {
+		err = PTR_ERR(pvr_obj);
+		goto err_drm_dev_exit;
+	}
+
+	/* This function will not modify &args->handle unless it succeeds. */
+	err = pvr_gem_object_into_handle(pvr_obj, pvr_file, &args->handle);
+	if (err)
+		goto err_destroy_obj;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_destroy_obj:
+	/*
+	 * GEM objects are refcounted, so there is no explicit destructor
+	 * function. Instead, we release the singular reference we currently
+	 * hold on the object and let GEM take care of the rest.
+	 */
+	pvr_gem_object_put(pvr_obj);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -87,7 +155,61 @@ static int
 pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args,
 			     struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_get_bo_mmap_offset_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_gem_object *pvr_obj;
+	struct drm_gem_object *gem_obj;
+	int idx;
+	int ret;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	/* All padding fields must be zeroed. */
+	if (args->_padding_4 != 0) {
+		ret = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * Obtain a kernel reference to the buffer object. This reference is
+	 * counted and must be manually dropped before returning. If a buffer
+	 * object cannot be found for the specified handle, return -%ENOENT (No
+	 * such file or directory).
+	 */
+	pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle);
+	if (!pvr_obj) {
+		ret = -ENOENT;
+		goto err_drm_dev_exit;
+	}
+
+	gem_obj = gem_from_pvr_gem(pvr_obj);
+
+	/*
+	 * Allocate a fake offset which can be used in userspace calls to mmap
+	 * on the DRM device file. If this fails, return the error code. This
+	 * operation is idempotent.
+	 */
+	ret = drm_gem_create_mmap_offset(gem_obj);
+	if (ret != 0) {
+		/* Drop our reference to the buffer object. */
+		drm_gem_object_put(gem_obj);
+		goto err_drm_dev_exit;
+	}
+
+	/*
+	 * Read out the fake offset allocated by the earlier call to
+	 * drm_gem_create_mmap_offset.
+	 */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	/* Drop our reference to the buffer object. */
+	pvr_gem_object_put(pvr_obj);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return ret;
 }
 
 static __always_inline u64
@@ -516,10 +638,12 @@ pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args,
 		break;
 
 	case DRM_PVR_DEV_QUERY_HEAP_INFO_GET:
-		return -EINVAL;
+		ret = pvr_heap_info_get(pvr_dev, args);
+		break;
 
 	case DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET:
-		return -EINVAL;
+		ret = pvr_static_data_areas_get(pvr_dev, args);
+		break;
 	}
 
 	drm_dev_exit(idx);
@@ -666,7 +790,46 @@ static int
 pvr_ioctl_create_vm_context(struct drm_device *drm_dev, void *raw_args,
 			    struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_vm_context_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	if (args->_padding_4) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	vm_ctx = pvr_vm_create_context(pvr_file->pvr_dev, true);
+	if (IS_ERR(vm_ctx)) {
+		err = PTR_ERR(vm_ctx);
+		goto err_drm_dev_exit;
+	}
+
+	/* Allocate object handle for userspace. */
+	err = xa_alloc(&pvr_file->vm_ctx_handles,
+		       &args->handle,
+		       vm_ctx,
+		       xa_limit_32b,
+		       GFP_KERNEL);
+	if (err < 0)
+		goto err_cleanup;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_cleanup:
+	pvr_vm_context_put(vm_ctx);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -686,7 +849,19 @@ static int
 pvr_ioctl_destroy_vm_context(struct drm_device *drm_dev, void *raw_args,
 			     struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_destroy_vm_context_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+
+	if (args->_padding_4)
+		return -EINVAL;
+
+	vm_ctx = xa_erase(&pvr_file->vm_ctx_handles, args->handle);
+	if (!vm_ctx)
+		return -EINVAL;
+
+	pvr_vm_context_put(vm_ctx);
+	return 0;
 }
 
 /**
@@ -716,7 +891,79 @@ static int
 pvr_ioctl_vm_map(struct drm_device *drm_dev, void *raw_args,
 		 struct drm_file *file)
 {
-	return -ENOTTY;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct drm_pvr_ioctl_vm_map_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+
+	struct pvr_gem_object *pvr_obj;
+	size_t pvr_obj_size;
+
+	u64 offset_plus_size;
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	/* Initial validation of args. */
+	if (args->_padding_14) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	if (args->flags != 0 ||
+	    check_add_overflow(args->offset, args->size, &offset_plus_size) ||
+	    !pvr_find_heap_containing(pvr_dev, args->device_addr, args->size)) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+	if (!vm_ctx) {
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle);
+	if (!pvr_obj) {
+		err = -ENOENT;
+		goto err_put_vm_context;
+	}
+
+	pvr_obj_size = pvr_gem_object_size(pvr_obj);
+
+	/*
+	 * Validate offset and size args. The alignment of these will be
+	 * checked when mapping; for now just check that they're within valid
+	 * bounds
+	 */
+	if (args->offset >= pvr_obj_size || offset_plus_size > pvr_obj_size) {
+		err = -EINVAL;
+		goto err_put_pvr_object;
+	}
+
+	err = pvr_vm_map(vm_ctx, pvr_obj, args->offset,
+			 args->device_addr, args->size);
+	if (err)
+		goto err_put_pvr_object;
+
+	/*
+	 * In order to set up the mapping, we needed a reference to &pvr_obj.
+	 * However, pvr_vm_map() obtains and stores its own reference, so we
+	 * must release ours before returning.
+	 */
+
+err_put_pvr_object:
+	pvr_gem_object_put(pvr_obj);
+
+err_put_vm_context:
+	pvr_vm_context_put(vm_ctx);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -739,7 +986,24 @@ static int
 pvr_ioctl_vm_unmap(struct drm_device *drm_dev, void *raw_args,
 		   struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_vm_unmap_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_vm_context *vm_ctx;
+	int err;
+
+	/* Initial validation of args. */
+	if (args->_padding_4)
+		return -EINVAL;
+
+	vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+	if (!vm_ctx)
+		return -EINVAL;
+
+	err = pvr_vm_unmap(vm_ctx, args->device_addr, args->size);
+
+	pvr_vm_context_put(vm_ctx);
+
+	return err;
 }
 
 /*
@@ -930,6 +1194,8 @@ pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file)
 	 */
 	pvr_file->pvr_dev = pvr_dev;
 
+	xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1);
+
 	/*
 	 * Store reference to powervr-specific file private data in DRM file
 	 * private data.
@@ -955,6 +1221,9 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 {
 	struct pvr_file *pvr_file = to_pvr_file(file);
 
+	/* Drop references on any remaining objects. */
+	pvr_destroy_vm_contexts_for_file(pvr_file);
+
 	kfree(pvr_file);
 	file->driver_priv = NULL;
 }
@@ -962,7 +1231,7 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
 
 static struct drm_driver pvr_drm_driver = {
-	.driver_features = DRIVER_RENDER,
+	.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER,
 	.open = pvr_drm_driver_open,
 	.postclose = pvr_drm_driver_postclose,
 	.ioctls = pvr_drm_driver_ioctls,
@@ -976,6 +1245,8 @@ static struct drm_driver pvr_drm_driver = {
 	.minor = PVR_DRIVER_MINOR,
 	.patchlevel = PVR_DRIVER_PATCHLEVEL,
 
+	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+	.gem_create_object = pvr_gem_create_object,
 };
 
 static int
diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c
new file mode 100644
index 0000000000000..6a8c81fe8c1e8
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_gem.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_gem.h"
+#include "pvr_vm.h"
+
+#include <drm/drm_gem.h>
+#include <drm/drm_prime.h>
+
+#include <linux/compiler.h>
+#include <linux/compiler_attributes.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/iosys-map.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+
+static void pvr_gem_object_free(struct drm_gem_object *obj)
+{
+	drm_gem_shmem_object_free(obj);
+}
+
+static int pvr_gem_mmap(struct drm_gem_object *gem_obj, struct vm_area_struct *vma)
+{
+	struct pvr_gem_object *pvr_obj = gem_to_pvr_gem(gem_obj);
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+
+	if (!(pvr_obj->flags & DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS))
+		return -EINVAL;
+
+	return drm_gem_shmem_mmap(shmem_obj, vma);
+}
+
+static const struct drm_gem_object_funcs pvr_gem_object_funcs = {
+	.free = pvr_gem_object_free,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = pvr_gem_mmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * pvr_gem_object_flags_validate() - Verify that a collection of PowerVR GEM
+ * mapping and/or creation flags form a valid combination.
+ * @flags: PowerVR GEM mapping/creation flags to validate.
+ *
+ * This function explicitly allows kernel-only flags. All ioctl entrypoints
+ * should do their own validation as well as relying on this function.
+ *
+ * Return:
+ *  * %true if @flags contains valid mapping and/or creation flags, or
+ *  * %false otherwise.
+ */
+static bool
+pvr_gem_object_flags_validate(u64 flags)
+{
+	static const u64 invalid_combinations[] = {
+		/*
+		 * Memory flagged as PM/FW-protected cannot be mapped to
+		 * userspace. To make this explicit, we require that the two
+		 * flags allowing each of these respective features are never
+		 * specified together.
+		 */
+		(DRM_PVR_BO_PM_FW_PROTECT |
+		 DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS),
+	};
+
+	int i;
+
+	/*
+	 * Check for bits set in undefined regions. Reserved regions refer to
+	 * options that can only be set by the kernel. These are explicitly
+	 * allowed in most cases, and must be checked specifically in IOCTL
+	 * callback code.
+	 */
+	if ((flags & PVR_BO_UNDEFINED_MASK) != 0)
+		return false;
+
+	/*
+	 * Check for all combinations of flags marked as invalid in the array
+	 * above.
+	 */
+	for (i = 0; i < ARRAY_SIZE(invalid_combinations); ++i) {
+		u64 combo = invalid_combinations[i];
+
+		if ((flags & combo) == combo)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * pvr_gem_object_into_handle() - Convert a reference to an object into a
+ * userspace-accessible handle.
+ * @pvr_obj: [IN] Target PowerVR-specific object.
+ * @pvr_file: [IN] File to associate the handle with.
+ * @handle: [OUT] Pointer to store the created handle in. Remains unmodified if
+ * an error is encountered.
+ *
+ * If an error is encountered, ownership of @pvr_obj will not have been
+ * transferred. If this function succeeds, however, further use of @pvr_obj is
+ * considered undefined behaviour unless another reference to it is explicitly
+ * held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while attempting to allocate a handle on @pvr_file.
+ */
+int
+pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj,
+			   struct pvr_file *pvr_file, u32 *handle)
+{
+	struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj);
+	struct drm_file *file = from_pvr_file(pvr_file);
+
+	u32 new_handle;
+	int err;
+
+	err = drm_gem_handle_create(file, gem_obj, &new_handle);
+	if (err)
+		return err;
+
+	/*
+	 * Release our reference to @pvr_obj, effectively transferring
+	 * ownership to the handle.
+	 */
+	pvr_gem_object_put(pvr_obj);
+
+	/*
+	 * Do not store the new handle in @handle until no more errors can
+	 * occur.
+	 */
+	*handle = new_handle;
+
+	return 0;
+}
+
+/**
+ * pvr_gem_object_from_handle() - Obtain a reference to an object from a
+ * userspace handle.
+ * @pvr_file: PowerVR-specific file to which @handle is associated.
+ * @handle: Userspace handle referencing the target object.
+ *
+ * On return, @handle always maintains its reference to the requested object
+ * (if it had one in the first place). If this function succeeds, the returned
+ * object will hold an additional reference. When the caller is finished with
+ * the returned object, they should call pvr_gem_object_put() on it to release
+ * this reference.
+ *
+ * Return:
+ *  * A pointer to the requested PowerVR-specific object on success, or
+ *  * %NULL otherwise.
+ */
+struct pvr_gem_object *
+pvr_gem_object_from_handle(struct pvr_file *pvr_file, u32 handle)
+{
+	struct drm_file *file = from_pvr_file(pvr_file);
+	struct drm_gem_object *gem_obj;
+
+	gem_obj = drm_gem_object_lookup(file, handle);
+	if (!gem_obj)
+		return NULL;
+
+	return gem_to_pvr_gem(gem_obj);
+}
+
+/**
+ * pvr_gem_object_vmap() - Map a PowerVR GEM object into CPU virtual address
+ * space.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * Once the caller is finished with the CPU mapping, they must call
+ * pvr_gem_object_vunmap() on @pvr_obj.
+ *
+ * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_cpu() is called to make
+ * sure the CPU mapping is consistent.
+ *
+ * Return:
+ *  * A pointer to the CPU mapping on success,
+ *  * -%ENOMEM if the mapping fails, or
+ *  * Any error encountered while attempting to acquire a reference to the
+ *    backing pages for @pvr_obj.
+ */
+void *
+pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj)
+{
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
+	struct iosys_map map;
+	int err;
+
+	dma_resv_lock(obj->resv, NULL);
+
+	err = drm_gem_shmem_vmap(shmem_obj, &map);
+	if (err)
+		goto err_unlock;
+
+	if (pvr_obj->flags & PVR_BO_CPU_CACHED) {
+		struct device *dev = shmem_obj->base.dev->dev;
+
+		/* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped
+		 * in GPU space yet.
+		 */
+		if (shmem_obj->sgt)
+			dma_sync_sgtable_for_cpu(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+	}
+
+	dma_resv_unlock(obj->resv);
+
+	return map.vaddr;
+
+err_unlock:
+	dma_resv_unlock(obj->resv);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_gem_object_vunmap() - Unmap a PowerVR memory object from CPU virtual
+ * address space.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_device() is called to make
+ * sure the GPU mapping is consistent.
+ */
+void
+pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj)
+{
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(shmem_obj->vaddr);
+	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
+
+	if (WARN_ON(!map.vaddr))
+		return;
+
+	dma_resv_lock(obj->resv, NULL);
+
+	if (pvr_obj->flags & PVR_BO_CPU_CACHED) {
+		struct device *dev = shmem_obj->base.dev->dev;
+
+		/* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped
+		 * in GPU space yet.
+		 */
+		if (shmem_obj->sgt)
+			dma_sync_sgtable_for_device(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+	}
+
+	drm_gem_shmem_vunmap(shmem_obj, &map);
+
+	dma_resv_unlock(obj->resv);
+}
+
+/**
+ * pvr_gem_object_zero() - Zeroes the physical memory behind an object.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while attempting to map @pvr_obj to the CPU (see
+ *    pvr_gem_object_vmap()).
+ */
+static int
+pvr_gem_object_zero(struct pvr_gem_object *pvr_obj)
+{
+	void *cpu_ptr;
+
+	cpu_ptr = pvr_gem_object_vmap(pvr_obj);
+	if (IS_ERR(cpu_ptr))
+		return PTR_ERR(cpu_ptr);
+
+	memset(cpu_ptr, 0, pvr_gem_object_size(pvr_obj));
+
+	/* Make sure the zero-ing is done before vumap-ing the object. */
+	wmb();
+
+	pvr_gem_object_vunmap(pvr_obj);
+
+	return 0;
+}
+
+/**
+ * pvr_gem_create_object() - Allocate and pre-initializes a pvr_gem_object
+ * @drm_dev: DRM device creating this object.
+ * @size: Size of the object to allocate in bytes.
+ *
+ * Return:
+ *  * The new pre-initialized GEM object on success,
+ *  * -ENOMEM if the allocation failed.
+ */
+struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size)
+{
+	struct drm_gem_object *gem_obj;
+	struct pvr_gem_object *pvr_obj;
+
+	pvr_obj = kzalloc(sizeof(*pvr_obj), GFP_KERNEL);
+	if (!pvr_obj)
+		return ERR_PTR(-ENOMEM);
+
+	gem_obj = gem_from_pvr_gem(pvr_obj);
+	gem_obj->funcs = &pvr_gem_object_funcs;
+
+	return gem_obj;
+}
+
+/**
+ * pvr_gem_object_create() - Creates a PowerVR-specific buffer object.
+ * @pvr_dev: Target PowerVR device.
+ * @size: Size of the object to allocate in bytes. Must be greater than zero.
+ * Any value which is not an exact multiple of the system page size will be
+ * rounded up to satisfy this condition.
+ * @flags: Options which affect both this operation and future mapping
+ * operations performed on the returned object. Must be a combination of
+ * DRM_PVR_BO_* and/or PVR_BO_* flags.
+ *
+ * The created object may be larger than @size, but can never be smaller. To
+ * get the exact size, call pvr_gem_object_size() on the returned pointer.
+ *
+ * Return:
+ *  * The newly-minted PowerVR-specific buffer object on success,
+ *  * -%EINVAL if @size is zero or @flags is not valid,
+ *  * -%ENOMEM if sufficient physical memory cannot be allocated, or
+ *  * Any other error returned by drm_gem_create_mmap_offset().
+ */
+struct pvr_gem_object *
+pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
+{
+	struct drm_gem_shmem_object *shmem_obj;
+	struct pvr_gem_object *pvr_obj;
+	struct sg_table *sgt;
+	int err;
+
+	/* Verify @size and @flags before continuing. */
+	if (size == 0 || !pvr_gem_object_flags_validate(flags))
+		return ERR_PTR(-EINVAL);
+
+	shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size);
+	if (IS_ERR(shmem_obj))
+		return ERR_CAST(shmem_obj);
+
+	shmem_obj->pages_mark_dirty_on_put = true;
+	shmem_obj->map_wc = !(flags & PVR_BO_CPU_CACHED);
+	pvr_obj = shmem_gem_to_pvr_gem(shmem_obj);
+	pvr_obj->flags = flags;
+
+	sgt = drm_gem_shmem_get_pages_sgt(shmem_obj);
+	if (IS_ERR(sgt)) {
+		err = PTR_ERR(sgt);
+		goto err_shmem_object_free;
+	}
+
+	dma_sync_sgtable_for_device(shmem_obj->base.dev->dev, sgt,
+				    DMA_BIDIRECTIONAL);
+
+	/*
+	 * Do this last because pvr_gem_object_zero() requires a fully
+	 * configured instance of struct pvr_gem_object.
+	 */
+	pvr_gem_object_zero(pvr_obj);
+
+	return pvr_obj;
+
+err_shmem_object_free:
+	drm_gem_shmem_free(shmem_obj);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_gem_get_dma_addr() - Get DMA address for given offset in object
+ * @pvr_obj: Pointer to object to lookup address in.
+ * @offset: Offset within object to lookup address at.
+ * @dma_addr_out: Pointer to location to store DMA address.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if object is not currently backed, or if @offset is out of valid
+ *    range for this object.
+ */
+int
+pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset,
+		     dma_addr_t *dma_addr_out)
+{
+	struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+	u32 accumulated_offset = 0;
+	struct scatterlist *sgl;
+	unsigned int sgt_idx;
+
+	WARN_ON(!shmem_obj->sgt);
+	for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, sgt_idx) {
+		u32 new_offset = accumulated_offset + sg_dma_len(sgl);
+
+		if (offset >= accumulated_offset && offset < new_offset) {
+			*dma_addr_out = sg_dma_address(sgl) +
+					(offset - accumulated_offset);
+			return 0;
+		}
+
+		accumulated_offset = new_offset;
+	}
+
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_gem.h b/drivers/gpu/drm/imagination/pvr_gem.h
new file mode 100644
index 0000000000000..e0e5ea509a2e8
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_gem.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_GEM_H
+#define PVR_GEM_H
+
+#include "pvr_rogue_heap_config.h"
+#include "pvr_rogue_meta.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/const.h>
+#include <linux/compiler_attributes.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h". */
+struct pvr_device;
+struct pvr_file;
+
+/**
+ * DOC: Flags for DRM_IOCTL_PVR_CREATE_BO (kernel-only)
+ *
+ * Kernel-only values allowed in &pvr_gem_object->flags. The majority of options
+ * for this field are specified in the UAPI header "pvr_drm.h" with a
+ * DRM_PVR_BO_ prefix. To distinguish these internal options (which must exist
+ * in ranges marked as "reserved" in the UAPI header), we drop the DRM prefix.
+ * The public options should be used directly, DRM prefix and all.
+ *
+ * To avoid potentially confusing gaps in the UAPI options, these kernel-only
+ * options are specified "in reverse", starting at bit 63.
+ *
+ * We use "reserved" to refer to bits defined here and not exposed in the UAPI.
+ * Bits not defined anywhere are "undefined".
+ *
+ * CPU mapping options
+ *    :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this
+ *       flag to override this behaviour and map the object cached.
+ *
+ * Firmware options
+ *    :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard
+ *       reset. Set this flag to override this behaviour and preserve buffer contents on reset.
+ */
+#define PVR_BO_CPU_CACHED BIT_ULL(63)
+
+#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62)
+
+#define PVR_BO_KERNEL_FLAGS_MASK (PVR_BO_CPU_CACHED | PVR_BO_FW_NO_CLEAR_ON_RESET)
+
+/* Bits 61..3 are undefined. */
+/* Bits 2..0 are defined in the UAPI. */
+
+/* Other utilities. */
+#define PVR_BO_UNDEFINED_MASK ~(PVR_BO_KERNEL_FLAGS_MASK | DRM_PVR_BO_FLAGS_MASK)
+
+/*
+ * All firmware-mapped memory uses (mostly) the same flags. Specifically,
+ * firmware-mapped memory should be:
+ *  * Read/write on the device,
+ *  * Read/write on the CPU, and
+ *  * Write-combined on the CPU.
+ *
+ * The only variation is in caching on the device.
+ */
+#define PVR_BO_FW_FLAGS_DEVICE_CACHED (ULL(0))
+#define PVR_BO_FW_FLAGS_DEVICE_UNCACHED DRM_PVR_BO_BYPASS_DEVICE_CACHE
+
+/**
+ * struct pvr_gem_object - powervr-specific wrapper for &struct drm_gem_object
+ */
+struct pvr_gem_object {
+	/**
+	 * @base: The underlying &struct drm_gem_shmem_object.
+	 *
+	 * Do not access this member directly, instead call
+	 * shem_gem_from_pvr_gem().
+	 */
+	struct drm_gem_shmem_object base;
+
+	/**
+	 * @flags: Options set at creation-time. Some of these options apply to
+	 * the creation operation itself (which are stored here for reference)
+	 * with the remainder used for mapping options to both the device and
+	 * CPU. These are used every time this object is mapped, but may be
+	 * changed after creation.
+	 *
+	 * Must be a combination of DRM_PVR_BO_* and/or PVR_BO_* flags.
+	 *
+	 * .. note::
+	 *
+	 *    This member is declared const to indicate that none of these
+	 *    options may change or be changed throughout the object's
+	 *    lifetime.
+	 */
+	u64 flags;
+
+};
+
+static_assert(offsetof(struct pvr_gem_object, base) == 0,
+	      "offsetof(struct pvr_gem_object, base) not zero");
+
+#define shmem_gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base)
+
+#define shmem_gem_to_pvr_gem(shmem_obj) container_of_const(shmem_obj, struct pvr_gem_object, base)
+
+#define gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base.base)
+
+#define gem_to_pvr_gem(gem_obj) container_of_const(gem_obj, struct pvr_gem_object, base.base)
+
+/* Functions defined in pvr_gem.c */
+
+struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size);
+
+struct pvr_gem_object *pvr_gem_object_create(struct pvr_device *pvr_dev,
+					     size_t size, u64 flags);
+
+int pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj,
+			       struct pvr_file *pvr_file, u32 *handle);
+struct pvr_gem_object *pvr_gem_object_from_handle(struct pvr_file *pvr_file,
+						  u32 handle);
+
+static __always_inline struct sg_table *
+pvr_gem_object_get_pages_sgt(struct pvr_gem_object *pvr_obj)
+{
+	return drm_gem_shmem_get_pages_sgt(shmem_gem_from_pvr_gem(pvr_obj));
+}
+
+void *pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj);
+void pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj);
+
+int pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset,
+			 dma_addr_t *dma_addr_out);
+
+/**
+ * pvr_gem_object_get() - Acquire reference on pvr_gem_object
+ * @pvr_obj: Pointer to object to acquire reference on.
+ */
+static __always_inline void
+pvr_gem_object_get(struct pvr_gem_object *pvr_obj)
+{
+	drm_gem_object_get(gem_from_pvr_gem(pvr_obj));
+}
+
+/**
+ * pvr_gem_object_put() - Release reference on pvr_gem_object
+ * @pvr_obj: Pointer to object to release reference on.
+ */
+static __always_inline void
+pvr_gem_object_put(struct pvr_gem_object *pvr_obj)
+{
+	drm_gem_object_put(gem_from_pvr_gem(pvr_obj));
+}
+
+static __always_inline size_t
+pvr_gem_object_size(struct pvr_gem_object *pvr_obj)
+{
+	return gem_from_pvr_gem(pvr_obj)->size;
+}
+
+#endif /* PVR_GEM_H */
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c
new file mode 100644
index 0000000000000..7f77e03df9125
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_mmu.c
@@ -0,0 +1,2573 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_mmu.h"
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_mmu_defs.h"
+
+#include <drm/drm_drv.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/kmemleak.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+
+#define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
+#define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
+
+/*
+ * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
+ * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
+ * ensures that the selected host page size corresponds to a valid device page
+ * size and sets up values needed by the MMU code below.
+ */
+#if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
+#else
+# error Unsupported device page size PVR_DEVICE_PAGE_SIZE
+#endif
+
+#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
+	(ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
+	 (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
+
+enum pvr_mmu_sync_level {
+	PVR_MMU_SYNC_LEVEL_NONE = -1,
+	PVR_MMU_SYNC_LEVEL_0 = 0,
+	PVR_MMU_SYNC_LEVEL_1 = 1,
+	PVR_MMU_SYNC_LEVEL_2 = 2,
+};
+
+#define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \
+				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \
+				    ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB)
+#define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD)
+#define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC)
+
+/**
+ * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to
+ *                             pvr_mmu_flush_exec().
+ * @pvr_dev: Target PowerVR device.
+ * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
+ *
+ * This function must be called following any possible change to the MMU page
+ * tables.
+ */
+static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
+{
+	atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags);
+}
+
+/**
+ * pvr_mmu_flush_request_all() - Request flush of all MMU caches when
+ * subsequently calling pvr_mmu_flush_exec().
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function must be called following any possible change to the MMU page
+ * tables.
+ */
+void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
+{
+	/* TODO: implement */
+}
+
+/**
+ * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously
+ * requested.
+ * @pvr_dev: Target PowerVR device.
+ * @wait: Do not return until the flush is completed.
+ *
+ * This function must be called prior to submitting any new GPU job. The flush
+ * will complete before the jobs are scheduled, so this can be called once after
+ * a series of maps. However, a single unmap should always be immediately
+ * followed by a flush and it should be explicitly waited by setting @wait.
+ *
+ * As a failure to flush the MMU caches could risk memory corruption, if the
+ * flush fails (implying the firmware is not responding) then the GPU device is
+ * marked as lost.
+ *
+ * Returns:
+ *  * 0 on success when @wait is true, or
+ *  * -%EIO if the device is unavailable, or
+ *  * Any error encountered while submitting the flush command via the KCCB.
+ */
+int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
+{
+	/* TODO: implement */
+	return -ENODEV;
+}
+
+/**
+ * DOC: PowerVR Virtual Memory Handling
+ */
+/**
+ * DOC: PowerVR Virtual Memory Handling (constants)
+ *
+ * .. c:macro:: PVR_IDX_INVALID
+ *
+ *    Default value for a u16-based index.
+ *
+ *    This value cannot be zero, since zero is a valid index value.
+ */
+#define PVR_IDX_INVALID ((u16)(-1))
+
+/**
+ * DOC: MMU backing pages
+ */
+/**
+ * DOC: MMU backing pages (constants)
+ *
+ * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
+ *
+ *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
+ *    at least as large as this value for the current implementation; this is
+ *    checked at compile-time.
+ */
+#define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
+static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_mmu_backing_page - Represents a single page used to back a page
+ *                              table of any level.
+ * @dma_addr: DMA address of this page.
+ * @host_ptr: CPU address of this page.
+ * @pvr_dev: The PowerVR device to which this page is associated. **For
+ *           internal use only.**
+ */
+struct pvr_mmu_backing_page {
+	dma_addr_t dma_addr;
+	void *host_ptr;
+/* private: internal use only */
+	struct page *raw_page;
+	struct pvr_device *pvr_dev;
+};
+
+/**
+ * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
+ * @page: Target backing page.
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function performs three distinct operations:
+ *
+ * 1. Allocate a single page,
+ * 2. Map the page to the CPU, and
+ * 3. Map the page to DMA-space.
+ *
+ * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%ENOMEM if allocation of the backing page or mapping of the backing
+ *    page to DMA fails.
+ */
+static int
+pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
+			  struct pvr_device *pvr_dev)
+{
+	struct device *dev = from_pvr_device(pvr_dev)->dev;
+
+	struct page *raw_page;
+	int err;
+
+	dma_addr_t dma_addr;
+	void *host_ptr;
+
+	raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
+	if (!raw_page)
+		return -ENOMEM;
+
+	host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	if (!host_ptr) {
+		err = -ENOMEM;
+		goto err_free_page;
+	}
+
+	dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
+				DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_addr)) {
+		err = -ENOMEM;
+		goto err_unmap_page;
+	}
+
+	page->dma_addr = dma_addr;
+	page->host_ptr = host_ptr;
+	page->pvr_dev = pvr_dev;
+	page->raw_page = raw_page;
+	kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
+
+	return 0;
+
+err_unmap_page:
+	vunmap(host_ptr);
+
+err_free_page:
+	__free_page(raw_page);
+
+	return err;
+}
+
+/**
+ * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
+ * @page: Target backing page.
+ *
+ * This function performs the mirror operations to pvr_mmu_backing_page_init(),
+ * in reverse order:
+ *
+ * 1. Unmap the page from DMA-space,
+ * 2. Unmap the page from the CPU, and
+ * 3. Free the page.
+ *
+ * It also zeros @page.
+ *
+ * It is a no-op to call this function a second (or further) time on any @page.
+ */
+static void
+pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
+{
+	struct device *dev = from_pvr_device(page->pvr_dev)->dev;
+
+	/* Do nothing if no allocation is present. */
+	if (!page->pvr_dev)
+		return;
+
+	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
+		       DMA_TO_DEVICE);
+
+	kmemleak_free(page->host_ptr);
+	vunmap(page->host_ptr);
+
+	__free_page(page->raw_page);
+
+	memset(page, 0, sizeof(*page));
+}
+
+/**
+ * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
+ *                              device.
+ * @page: Target backing page.
+ *
+ * .. caution::
+ *
+ *    **This is potentially an expensive function call.** Only call
+ *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
+ *    make to the backing page in the immediate future.
+ */
+static void
+pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags)
+{
+	struct pvr_device *pvr_dev = page->pvr_dev;
+	struct device *dev;
+
+	/*
+	 * Do nothing if no allocation is present. This may be the case if
+	 * we are unmapping pages.
+	 */
+	if (!pvr_dev)
+		return;
+
+	dev = from_pvr_device(pvr_dev)->dev;
+
+	dma_sync_single_for_device(dev, page->dma_addr,
+				   PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
+
+	pvr_mmu_set_flush_flags(pvr_dev, flags);
+}
+
+/**
+ * DOC: Raw page tables
+ */
+
+#define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
+	typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
+
+#define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
+	(((entry_).val &                                           \
+	  ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
+	 ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
+
+#define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
+	((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
+	  << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
+	 ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
+
+/**
+ * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 31..4
+ *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
+ *        page table base address, which is 4KiB aligned.
+ *
+ *    * - 3..2
+ *      - *(reserved)*
+ *
+ *    * - 1
+ *      - **Pending:** When valid bit is not set, indicates that a valid
+ *        entry is pending and the MMU should wait for the driver to map
+ *        the entry. This is used to support page demand mapping of
+ *        memory.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid L1 page
+ *        table. If the valid bit is not set, then an attempted use of
+ *        the page would result in a page fault.
+ */
+struct pvr_page_table_l2_entry_raw {
+	u32 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
+	      ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
+
+static bool
+pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
+{
+	return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
+ *                                     page table.
+ * @entry: Target raw level 2 page table entry.
+ * @child_table_dma_addr: DMA address of the level 1 page table to be
+ *                        associated with @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
+ */
+static void
+pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
+				dma_addr_t child_table_dma_addr)
+{
+	child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
+
+	WRITE_ONCE(entry->val,
+		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
+		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
+		   PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr));
+}
+
+static void
+pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
+{
+	WRITE_ONCE(entry->val, 0);
+}
+
+/**
+ * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 63..41
+ *      - *(reserved)*
+ *
+ *    * - 40
+ *      - **Pending:** When valid bit is not set, indicates that a valid entry
+ *        is pending and the MMU should wait for the driver to map the entry.
+ *        This is used to support page demand mapping of memory.
+ *
+ *    * - 39..5
+ *      - **Level 0 Page Table Base Address:** The way this value is
+ *        interpreted depends on the page size. Bits not specified in the
+ *        table below (e.g. bits 11..5 for page size 4KiB) should be
+ *        considered reserved.
+ *
+ *        This table shows the bits used in an L1 page table entry to
+ *        represent the Physical Table Base Address for a given Page Size.
+ *        Since each L1 page table entry covers 2MiB of address space, the
+ *        maximum page size is 2MiB.
+ *
+ *        .. flat-table::
+ *           :widths: 1 1 1 1
+ *           :header-rows: 1
+ *           :stub-columns: 1
+ *
+ *           * - Page size
+ *             - L0 page table base address bits
+ *             - Number of L0 page table entries
+ *             - Size of L0 page table
+ *
+ *           * - 4KiB
+ *             - 39..12
+ *             - 512
+ *             - 4KiB
+ *
+ *           * - 16KiB
+ *             - 39..10
+ *             - 128
+ *             - 1KiB
+ *
+ *           * - 64KiB
+ *             - 39..8
+ *             - 32
+ *             - 256B
+ *
+ *           * - 256KiB
+ *             - 39..6
+ *             - 8
+ *             - 64B
+ *
+ *           * - 1MiB
+ *             - 39..5 (4 = '0')
+ *             - 2
+ *             - 16B
+ *
+ *           * - 2MiB
+ *             - 39..5 (4..3 = '00')
+ *             - 1
+ *             - 8B
+ *
+ *    * - 4
+ *      - *(reserved)*
+ *
+ *    * - 3..1
+ *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid L0 page table.
+ *        If the valid bit is not set, then an attempted use of the page would
+ *        result in a page fault.
+ */
+struct pvr_page_table_l1_entry_raw {
+	u64 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
+	      ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
+
+static bool
+pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
+{
+	return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
+ *                                     page table.
+ * @entry: Target raw level 1 page table entry.
+ * @child_table_dma_addr: DMA address of the level 0 page table to be
+ *                        associated with @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of 4 KiB.
+ */
+static void
+pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
+				dma_addr_t child_table_dma_addr)
+{
+	WRITE_ONCE(entry->val,
+		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
+		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
+		   PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) |
+		   /*
+		    * The use of a 4K-specific macro here is correct. It is
+		    * a future optimization to allocate sub-host-page-sized
+		    * blocks for individual tables, so the condition that any
+		    * page table address is aligned to the size of the
+		    * largest (a 4KB) table currently holds.
+		    */
+		   (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK));
+}
+
+static void
+pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
+{
+	WRITE_ONCE(entry->val, 0);
+}
+
+/**
+ * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 63
+ *      - *(reserved)*
+ *
+ *    * - 62
+ *      - **PM/FW Protect:** Indicates a protected region which only the
+ *        Parameter Manager (PM) or firmware processor can write to.
+ *
+ *    * - 61..40
+ *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
+ *        (PM) memory. This field is only used if the additional level of PB
+ *        virtualization is enabled. The VP Page field is needed by the PM in
+ *        order to correctly reconstitute the free lists after render
+ *        completion. This (High) field holds bits 39..18 of the value; the
+ *        Low field holds bits 17..12. Bits 11..0 are always zero because the
+ *        value is always aligned to the 4KiB page size.
+ *
+ *    * - 39..12
+ *      - **Physical Page Address:** The way this value is interpreted depends
+ *        on the page size. Bits not specified in the table below (e.g. bits
+ *        20..12 for page size 2MiB) should be considered reserved.
+ *
+ *        This table shows the bits used in an L0 page table entry to represent
+ *        the Physical Page Address for a given page size (as defined in the
+ *        associated L1 page table entry).
+ *
+ *        .. flat-table::
+ *           :widths: 1 1
+ *           :header-rows: 1
+ *           :stub-columns: 1
+ *
+ *           * - Page size
+ *             - Physical address bits
+ *
+ *           * - 4KiB
+ *             - 39..12
+ *
+ *           * - 16KiB
+ *             - 39..14
+ *
+ *           * - 64KiB
+ *             - 39..16
+ *
+ *           * - 256KiB
+ *             - 39..18
+ *
+ *           * - 1MiB
+ *             - 39..20
+ *
+ *           * - 2MiB
+ *             - 39..21
+ *
+ *    * - 11..6
+ *      - **VP Page (Low):** Continuation of VP Page (High).
+ *
+ *    * - 5
+ *      - **Pending:** When valid bit is not set, indicates that a valid entry
+ *        is pending and the MMU should wait for the driver to map the entry.
+ *        This is used to support page demand mapping of memory.
+ *
+ *    * - 4
+ *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
+ *        entries when indicated by the PM. Note that this bit will only be set
+ *        by the PM, not by the device driver.
+ *
+ *    * - 3
+ *      - **SLC Bypass Control:** Specifies requests to this page should bypass
+ *        the System Level Cache (SLC), if enabled in SLC configuration.
+ *
+ *    * - 2
+ *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
+ *        does not require a cache flush between operations on the CPU and the
+ *        device).
+ *
+ *    * - 1
+ *      - **Read Only:** If set, this bit indicates that the page is read only.
+ *        An attempted write to this page would result in a write-protection
+ *        fault.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid page. If the
+ *        valid bit is not set, then an attempted use of the page would result
+ *        in a page fault.
+ */
+struct pvr_page_table_l0_entry_raw {
+	u64 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
+	      ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
+
+/**
+ * struct pvr_page_flags_raw - The configurable flags from a single entry in a
+ *                             level 0 page table.
+ * @val: The raw value of these flags. Since these are a strict subset of
+ *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
+ *
+ * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
+ * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
+ *
+ * This type should never be instantiated directly; instead use
+ * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
+ */
+struct pvr_page_flags_raw {
+	struct pvr_page_table_l0_entry_raw val;
+} __packed;
+static_assert(sizeof(struct pvr_page_flags_raw) ==
+	      sizeof(struct pvr_page_table_l0_entry_raw));
+
+static bool
+pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
+{
+	return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
+ *                                     page table.
+ * @entry: Target raw level 0 page table entry.
+ * @dma_addr: DMA address of the physical page to be associated with @entry.
+ * @flags: Options to be set on @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
+ *
+ * The @flags parameter is directly assigned into @entry. It is the callers
+ * responsibility to ensure that only bits specified in
+ * &struct pvr_page_flags_raw are set in @flags.
+ */
+static void
+pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
+				dma_addr_t dma_addr,
+				struct pvr_page_flags_raw flags)
+{
+	WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
+			       PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
+			       (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
+			       flags.val.val);
+}
+
+static void
+pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
+{
+	WRITE_ONCE(entry->val, 0);
+}
+
+/**
+ * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
+ *                               table entry.
+ * @read_only: This page is read-only (see: Read Only).
+ * @cache_coherent: This page does not require cache flushes (see: Cache
+ *                  Coherency).
+ * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
+ * @pm_fw_protect: This page is only for use by the firmware or Parameter
+ *                 Manager (see PM/FW Protect).
+ *
+ * For more details on the use of these four options, see their respective
+ * entries in the table under &struct pvr_page_table_l0_entry_raw.
+ *
+ * Return:
+ * A new &struct pvr_page_flags_raw instance which can be passed directly to
+ * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
+ */
+static struct pvr_page_flags_raw
+pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
+			  bool pm_fw_protect)
+{
+	struct pvr_page_flags_raw flags;
+
+	flags.val.val =
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
+		PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
+
+	return flags;
+}
+
+/**
+ * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ */
+struct pvr_page_table_l2_raw {
+	/** @entries: The raw values of this table. */
+	struct pvr_page_table_l2_entry_raw
+		entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ */
+struct pvr_page_table_l1_raw {
+	/** @entries: The raw values of this table. */
+	struct pvr_page_table_l1_entry_raw
+		entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ *
+ * .. caution::
+ *
+ *    The size of level 0 page tables is variable depending on the page size
+ *    specified in the associated level 1 page table entry. Since the device
+ *    page size in use is pegged to the host page size, it cannot vary at
+ *    runtime. This structure is therefore only defined to contain the required
+ *    number of entries for the current device page size. **You should never
+ *    read or write beyond the last supported entry.**
+ */
+struct pvr_page_table_l0_raw {
+	/** @entries: The raw values of this table. */
+	struct pvr_page_table_l0_entry_raw
+		entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * DOC: Mirror page tables
+ */
+
+/*
+ * We pre-declare these types because they cross-depend on pointers to each
+ * other.
+ */
+struct pvr_page_table_l1;
+struct pvr_page_table_l0;
+
+/**
+ * struct pvr_page_table_l2 - A wrapped level 2 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l2_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l2_get_entry_raw().
+ *
+ * A level 2 page table forms the root of the page table tree structure, so
+ * this type has no &parent or &parent_idx members.
+ */
+struct pvr_page_table_l2 {
+	/**
+	 * @entries: The children of this node in the page table tree
+	 * structure. These are also mirror tables. The indexing of this array
+	 * is identical to that of the raw equivalent
+	 * (&pvr_page_table_l1_raw.entries).
+	 */
+	struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
+
+	/**
+	 * @backing_page: A handle to the memory which holds the raw
+	 * equivalent of this table. **For internal use only.**
+	 */
+	struct pvr_mmu_backing_page backing_page;
+
+	/**
+	 * @entry_count: The current number of valid entries (that we know of)
+	 * in this table. This value is essentially a refcount - the table is
+	 * destroyed when this value is decremented to zero by
+	 * pvr_page_table_l2_remove().
+	 */
+	u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l2_init() - Initialize a level 2 page table.
+ * @table: Target level 2 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
+		       struct pvr_device *pvr_dev)
+{
+	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l2_fini() - Teardown a level 2 page table.
+ * @table: Target level 2 page table.
+ *
+ * It is an error to attempt to use @table after calling this function.
+ */
+static void
+pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
+{
+	pvr_mmu_backing_page_fini(&table->backing_page);
+}
+
+/**
+ * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
+ *                            device.
+ * @table: Target level 2 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l2_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child level 1 page tables of @table also need to be flushed, this should
+ * be done first using pvr_page_table_l1_sync() *before* calling this function.
+ */
+static void
+pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
+{
+	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS);
+}
+
+/**
+ * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
+ *                               page table.
+ * @table: Target level 2 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l2_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l2_raw *
+pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
+{
+	return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 2 page table.
+ * @table: Target level 2 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 2 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer.
+ *
+ * Return:
+ * A pointer to the requested raw level 2 page table entry.
+ */
+static struct pvr_page_table_l2_entry_raw *
+pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
+{
+	return &pvr_page_table_l2_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 2 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
+{
+	struct pvr_page_table_l2_entry_raw entry_raw =
+		*pvr_page_table_l2_get_entry_raw(table, idx);
+
+	return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_page_table_l1 - A wrapped level 1 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l1_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l1_get_entry_raw().
+ */
+struct pvr_page_table_l1 {
+	/**
+	 * @entries: The children of this node in the page table tree
+	 * structure. These are also mirror tables. The indexing of this array
+	 * is identical to that of the raw equivalent
+	 * (&pvr_page_table_l0_raw.entries).
+	 */
+	struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
+
+	/**
+	 * @backing_page: A handle to the memory which holds the raw
+	 * equivalent of this table. **For internal use only.**
+	 */
+	struct pvr_mmu_backing_page backing_page;
+
+	union {
+		/**
+		 * @parent: The parent of this node in the page table tree structure.
+		 *
+		 * This is also a mirror table.
+		 *
+		 * Only valid when the L1 page table is active. When the L1 page table
+		 * has been removed and queued for destruction, the next_free field
+		 * should be used instead.
+		 */
+		struct pvr_page_table_l2 *parent;
+
+		/**
+		 * @next_free: Pointer to the next L1 page table to take/free.
+		 *
+		 * Used to form a linked list of L1 page tables. This is used
+		 * when preallocating tables and when the page table has been
+		 * removed and queued for destruction.
+		 */
+		struct pvr_page_table_l1 *next_free;
+	};
+
+	/**
+	 * @parent_idx: The index of the entry in the parent table (see
+	 * @parent) which corresponds to this table.
+	 */
+	u16 parent_idx;
+
+	/**
+	 * @entry_count: The current number of valid entries (that we know of)
+	 * in this table. This value is essentially a refcount - the table is
+	 * destroyed when this value is decremented to zero by
+	 * pvr_page_table_l1_remove().
+	 */
+	u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l1_init() - Initialize a level 1 page table.
+ * @table: Target level 1 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * When this function returns successfully, @table is still not considered
+ * valid. It must be inserted into the page table tree structure with
+ * pvr_page_table_l2_insert() before it is ready for use.
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
+		       struct pvr_device *pvr_dev)
+{
+	table->parent_idx = PVR_IDX_INVALID;
+
+	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l1_free() - Teardown a level 1 page table.
+ * @table: Target level 1 page table.
+ *
+ * It is an error to attempt to use @table after calling this function, even
+ * indirectly. This includes calling pvr_page_table_l2_remove(), which must
+ * be called *before* pvr_page_table_l1_free().
+ */
+static void
+pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
+{
+	pvr_mmu_backing_page_fini(&table->backing_page);
+	kfree(table);
+}
+
+/**
+ * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
+ *                            device.
+ * @table: Target level 1 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l1_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child level 0 page tables of @table also need to be flushed, this should
+ * be done first using pvr_page_table_l0_sync() *before* calling this function.
+ */
+static void
+pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
+{
+	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS);
+}
+
+/**
+ * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
+ *                               page table.
+ * @table: Target level 1 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l1_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l1_raw *
+pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
+{
+	return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 1 page table.
+ * @table: Target level 1 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 1 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer.
+ *
+ * Return:
+ * A pointer to the requested raw level 1 page table entry.
+ */
+static struct pvr_page_table_l1_entry_raw *
+pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
+{
+	return &pvr_page_table_l1_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 1 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
+{
+	struct pvr_page_table_l1_entry_raw entry_raw =
+		*pvr_page_table_l1_get_entry_raw(table, idx);
+
+	return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_page_table_l0 - A wrapped level 0 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l0_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l0_get_entry_raw().
+ *
+ * There is no mirror representation of an individual page, so this type has no
+ * &entries member.
+ */
+struct pvr_page_table_l0 {
+	/**
+	 * @backing_page: A handle to the memory which holds the raw
+	 * equivalent of this table. **For internal use only.**
+	 */
+	struct pvr_mmu_backing_page backing_page;
+
+	union {
+		/**
+		 * @parent: The parent of this node in the page table tree structure.
+		 *
+		 * This is also a mirror table.
+		 *
+		 * Only valid when the L0 page table is active. When the L0 page table
+		 * has been removed and queued for destruction, the next_free field
+		 * should be used instead.
+		 */
+		struct pvr_page_table_l1 *parent;
+
+		/**
+		 * @next_free: Pointer to the next L0 page table to take/free.
+		 *
+		 * Used to form a linked list of L0 page tables. This is used
+		 * when preallocating tables and when the page table has been
+		 * removed and queued for destruction.
+		 */
+		struct pvr_page_table_l0 *next_free;
+	};
+
+	/**
+	 * @parent_idx: The index of the entry in the parent table (see
+	 * @parent) which corresponds to this table.
+	 */
+	u16 parent_idx;
+
+	/**
+	 * @entry_count: The current number of valid entries (that we know of)
+	 * in this table. This value is essentially a refcount - the table is
+	 * destroyed when this value is decremented to zero by
+	 * pvr_page_table_l0_remove().
+	 */
+	u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l0_init() - Initialize a level 0 page table.
+ * @table: Target level 0 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * When this function returns successfully, @table is still not considered
+ * valid. It must be inserted into the page table tree structure with
+ * pvr_page_table_l1_insert() before it is ready for use.
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
+		       struct pvr_device *pvr_dev)
+{
+	table->parent_idx = PVR_IDX_INVALID;
+
+	return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l0_free() - Teardown a level 0 page table.
+ * @table: Target level 0 page table.
+ *
+ * It is an error to attempt to use @table after calling this function, even
+ * indirectly. This includes calling pvr_page_table_l1_remove(), which must
+ * be called *before* pvr_page_table_l0_free().
+ */
+static void
+pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
+{
+	pvr_mmu_backing_page_fini(&table->backing_page);
+	kfree(table);
+}
+
+/**
+ * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
+ *                            device.
+ * @table: Target level 0 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l0_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child pages of @table also need to be flushed, this should be done first
+ * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
+ * this function.
+ */
+static void
+pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
+{
+	pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS);
+}
+
+/**
+ * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
+ *                               page table.
+ * @table: Target level 0 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l0_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l0_raw *
+pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
+{
+	return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 0 page table.
+ * @table: Target level 0 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 0 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer. This is espcially important for level 0 page tables, which
+ * can have a variable number of entries.
+ *
+ * Return:
+ * A pointer to the requested raw level 0 page table entry.
+ */
+static struct pvr_page_table_l0_entry_raw *
+pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
+{
+	return &pvr_page_table_l0_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 0 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
+{
+	struct pvr_page_table_l0_entry_raw entry_raw =
+		*pvr_page_table_l0_get_entry_raw(table, idx);
+
+	return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_mmu_context - context holding data for operations at page
+ * catalogue level, intended for use with a VM context.
+ */
+struct pvr_mmu_context {
+	/** @pvr_dev: The PVR device associated with the owning VM context. */
+	struct pvr_device *pvr_dev;
+
+	/** @page_table_l2: The MMU table root. */
+	struct pvr_page_table_l2 page_table_l2;
+};
+
+/**
+ * struct pvr_page_table_ptr - A reference to a single physical page as indexed
+ * by the page table structure.
+ *
+ * Intended for embedding in a &struct pvr_mmu_op_context.
+ */
+struct pvr_page_table_ptr {
+	/**
+	 * @l1_table: A cached handle to the level 1 page table the
+	 * context is currently traversing.
+	 */
+	struct pvr_page_table_l1 *l1_table;
+
+	/**
+	 * @l0_table: A cached handle to the level 0 page table the
+	 * context is currently traversing.
+	 */
+	struct pvr_page_table_l0 *l0_table;
+
+	/**
+	 * @l2_idx: Index into the level 2 page table the context is
+	 * currently referencing.
+	 */
+	u16 l2_idx;
+
+	/**
+	 * @l1_idx: Index into the level 1 page table the context is
+	 * currently referencing.
+	 */
+	u16 l1_idx;
+
+	/**
+	 * @l0_idx: Index into the level 0 page table the context is
+	 * currently referencing.
+	 */
+	u16 l0_idx;
+};
+
+/**
+ * struct pvr_mmu_op_context - context holding data for individual
+ * device-virtual mapping operations. Intended for use with a VM bind operation.
+ */
+struct pvr_mmu_op_context {
+	/** @mmu_ctx: The MMU context associated with the owning VM context. */
+	struct pvr_mmu_context *mmu_ctx;
+
+	/** @map: Data specifically for map operations. */
+	struct {
+		/**
+		 * @sgt: Scatter gather table containing pages pinned for use by
+		 * this context - these are currently pinned when initialising
+		 * the VM bind operation.
+		 */
+		struct sg_table *sgt;
+
+		/** @sgt_offset: Start address of the device-virtual mapping. */
+		u64 sgt_offset;
+
+		/**
+		 * @l1_prealloc_tables: Preallocated l1 page table objects
+		 * use by this context when creating a page mapping. Linked list
+		 * fully created during initialisation.
+		 */
+		struct pvr_page_table_l1 *l1_prealloc_tables;
+
+		/**
+		 * @l0_prealloc_tables: Preallocated l0 page table objects
+		 * use by this context when creating a page mapping. Linked list
+		 * fully created during initialisation.
+		 */
+		struct pvr_page_table_l0 *l0_prealloc_tables;
+	} map;
+
+	/** @unmap: Data specifically for unmap operations. */
+	struct {
+		/**
+		 * @l1_free_tables: Collects page table objects freed by unmap
+		 * ops. Linked list empty at creation.
+		 */
+		struct pvr_page_table_l1 *l1_free_tables;
+
+		/**
+		 * @l0_free_tables: Collects page table objects freed by unmap
+		 * ops. Linked list empty at creation.
+		 */
+		struct pvr_page_table_l0 *l0_free_tables;
+	} unmap;
+
+	/**
+	 * @curr_page: A reference to a single physical page as indexed by the
+	 * page table structure.
+	 */
+	struct pvr_page_table_ptr curr_page;
+
+	/**
+	 * @sync_level_required: The maximum level of the page table tree
+	 * structure which has (possibly) been modified since it was last
+	 * flushed to the device.
+	 *
+	 * This field should only be set with pvr_mmu_op_context_require_sync()
+	 * or indirectly by pvr_mmu_op_context_sync_partial().
+	 */
+	enum pvr_mmu_sync_level sync_level_required;
+};
+
+/**
+ * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
+ * table into a level 2 page table.
+ * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
+ * table into.
+ * @child_table: Target level 1 page table to be referenced by the new entry.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L2 entry.
+ *
+ * It is the caller's responsibility to execute any memory barries to ensure
+ * that the creation of @child_table is ordered before the L2 entry is inserted.
+ */
+static void
+pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
+			 struct pvr_page_table_l1 *child_table)
+{
+	struct pvr_page_table_l2 *l2_table =
+		&op_ctx->mmu_ctx->page_table_l2;
+	struct pvr_page_table_l2_entry_raw *entry_raw =
+		pvr_page_table_l2_get_entry_raw(l2_table,
+						op_ctx->curr_page.l2_idx);
+
+	pvr_page_table_l2_entry_raw_set(entry_raw,
+					child_table->backing_page.dma_addr);
+
+	child_table->parent = l2_table;
+	child_table->parent_idx = op_ctx->curr_page.l2_idx;
+	l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
+	++l2_table->entry_count;
+	op_ctx->curr_page.l1_table = child_table;
+}
+
+/**
+ * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
+ * table.
+ * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L2 entry.
+ */
+static void
+pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
+{
+	struct pvr_page_table_l2 *l2_table =
+		&op_ctx->mmu_ctx->page_table_l2;
+	struct pvr_page_table_l2_entry_raw *entry_raw =
+		pvr_page_table_l2_get_entry_raw(l2_table,
+						op_ctx->curr_page.l1_table->parent_idx);
+
+	WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
+
+	pvr_page_table_l2_entry_raw_clear(entry_raw);
+
+	l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
+	op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
+	op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables;
+	op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table;
+	op_ctx->curr_page.l1_table = NULL;
+
+	--l2_table->entry_count;
+}
+
+/**
+ * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
+ * table into a level 1 page table.
+ * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
+ * table into.
+ * @child_table: L0 page table to insert.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L1 entry.
+ *
+ * It is the caller's responsibility to execute any memory barries to ensure
+ * that the creation of @child_table is ordered before the L1 entry is inserted.
+ */
+static void
+pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
+			 struct pvr_page_table_l0 *child_table)
+{
+	struct pvr_page_table_l1_entry_raw *entry_raw =
+		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
+						op_ctx->curr_page.l1_idx);
+
+	pvr_page_table_l1_entry_raw_set(entry_raw,
+					child_table->backing_page.dma_addr);
+
+	child_table->parent = op_ctx->curr_page.l1_table;
+	child_table->parent_idx = op_ctx->curr_page.l1_idx;
+	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
+	++op_ctx->curr_page.l1_table->entry_count;
+	op_ctx->curr_page.l0_table = child_table;
+}
+
+/**
+ * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
+ *                              table.
+ * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
+ *
+ * If this function results in the L1 table becoming empty, it will be removed
+ * from its parent level 2 page table and destroyed.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L1 entry.
+ */
+static void
+pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
+{
+	struct pvr_page_table_l1_entry_raw *entry_raw =
+		pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
+						op_ctx->curr_page.l0_table->parent_idx);
+
+	WARN_ON(op_ctx->curr_page.l0_table->parent !=
+		op_ctx->curr_page.l1_table);
+
+	pvr_page_table_l1_entry_raw_clear(entry_raw);
+
+	op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
+	op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
+	op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables;
+	op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table;
+	op_ctx->curr_page.l0_table = NULL;
+
+	if (--op_ctx->curr_page.l1_table->entry_count == 0) {
+		/* Clear the parent L2 page table entry. */
+		if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
+			pvr_page_table_l2_remove(op_ctx);
+	}
+}
+
+/**
+ * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
+ * into a level 0 page table.
+ * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
+ * @dma_addr: Target DMA address to be referenced by the new entry.
+ * @flags: Page options to be stored in the new entry.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L0 entry.
+ */
+static void
+pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
+			 dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
+{
+	struct pvr_page_table_l0_entry_raw *entry_raw =
+		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
+						op_ctx->curr_page.l0_idx);
+
+	pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
+
+	/*
+	 * There is no entry to set here - we don't keep a mirror of
+	 * individual pages.
+	 */
+
+	++op_ctx->curr_page.l0_table->entry_count;
+}
+
+/**
+ * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
+ * table.
+ * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
+ *
+ * If this function results in the L0 table becoming empty, it will be removed
+ * from its parent L1 page table and destroyed.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L0 entry.
+ */
+static void
+pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
+{
+	struct pvr_page_table_l0_entry_raw *entry_raw =
+		pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
+						op_ctx->curr_page.l0_idx);
+
+	pvr_page_table_l0_entry_raw_clear(entry_raw);
+
+	/*
+	 * There is no entry to clear here - we don't keep a mirror of
+	 * individual pages.
+	 */
+
+	if (--op_ctx->curr_page.l0_table->entry_count == 0) {
+		/* Clear the parent L1 page table entry. */
+		if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
+			pvr_page_table_l1_remove(op_ctx);
+	}
+}
+
+/**
+ * DOC: Page table index utilities
+ */
+
+/**
+ * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 2 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l2_idx(u64 device_addr)
+{
+	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
+	       ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
+}
+
+/**
+ * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 1 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l1_idx(u64 device_addr)
+{
+	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
+	       ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
+}
+
+/**
+ * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 0 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l0_idx(u64 device_addr)
+{
+	return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
+	       ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
+}
+
+/**
+ * DOC: High-level page table operations
+ */
+
+/**
+ * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
+ * necessary) a level 1 page table from the specified level 2 page table entry.
+ * @op_ctx: Target MMU op context.
+ * @should_insert: [IN] Specifies whether new page tables should be inserted
+ * when empty page table entries are encountered during traversal.
+ *
+ * Return:
+ *  * 0 on success, or
+ *
+ *    If @should_insert is %false:
+ *     * -%ENXIO if a level 1 page table would have been inserted.
+ *
+ *    If @should_insert is %true:
+ *     * Any error encountered while inserting the level 1 page table.
+ */
+static int
+pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
+				bool should_insert)
+{
+	struct pvr_page_table_l2 *l2_table =
+		&op_ctx->mmu_ctx->page_table_l2;
+	struct pvr_page_table_l1 *table;
+
+	if (pvr_page_table_l2_entry_is_valid(l2_table,
+					     op_ctx->curr_page.l2_idx)) {
+		op_ctx->curr_page.l1_table =
+			l2_table->entries[op_ctx->curr_page.l2_idx];
+		return 0;
+	}
+
+	if (!should_insert)
+		return -ENXIO;
+
+	/* Take a prealloced table. */
+	table = op_ctx->map.l1_prealloc_tables;
+	if (!table)
+		return -ENOMEM;
+
+	/* Pop */
+	op_ctx->map.l1_prealloc_tables = table->next_free;
+	table->next_free = NULL;
+
+	/* Ensure new table is fully written out before adding to L2 page table. */
+	wmb();
+
+	pvr_page_table_l2_insert(op_ctx, table);
+
+	return 0;
+}
+
+/**
+ * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
+ * necessary) a level 0 page table from the specified level 1 page table entry.
+ * @op_ctx: Target MMU op context.
+ * @should_insert: [IN] Specifies whether new page tables should be inserted
+ * when empty page table entries are encountered during traversal.
+ *
+ * Return:
+ *  * 0 on success,
+ *
+ *    If @should_insert is %false:
+ *     * -%ENXIO if a level 0 page table would have been inserted.
+ *
+ *    If @should_insert is %true:
+ *     * Any error encountered while inserting the level 0 page table.
+ */
+static int
+pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
+				bool should_insert)
+{
+	struct pvr_page_table_l0 *table;
+
+	if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
+					     op_ctx->curr_page.l1_idx)) {
+		op_ctx->curr_page.l0_table =
+			op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
+		return 0;
+	}
+
+	if (!should_insert)
+		return -ENXIO;
+
+	/* Take a prealloced table. */
+	table = op_ctx->map.l0_prealloc_tables;
+	if (!table)
+		return -ENOMEM;
+
+	/* Pop */
+	op_ctx->map.l0_prealloc_tables = table->next_free;
+	table->next_free = NULL;
+
+	/* Ensure new table is fully written out before adding to L1 page table. */
+	wmb();
+
+	pvr_page_table_l1_insert(op_ctx, table);
+
+	return 0;
+}
+
+/**
+ * pvr_mmu_context_create() - Create an MMU context.
+ * @pvr_dev: PVR device associated with owning VM context.
+ *
+ * Returns:
+ *  * Newly created MMU context object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l2_init().
+ */
+struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
+{
+	struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	int err;
+
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
+	if (err)
+		return ERR_PTR(err);
+
+	ctx->pvr_dev = pvr_dev;
+
+	return ctx;
+}
+
+/**
+ * pvr_mmu_context_destroy() - Destroy an MMU context.
+ * @ctx: Target MMU context.
+ */
+void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
+{
+	pvr_page_table_l2_fini(&ctx->page_table_l2);
+	kfree(ctx);
+}
+
+/**
+ * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
+ * page table structure behind a VM context.
+ * @ctx: Target MMU context.
+ */
+dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
+{
+	return ctx->page_table_l2.backing_page.dma_addr;
+}
+
+/**
+ * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
+ * @ctx: MMU context of owning VM context.
+ *
+ * Returns:
+ *  * Newly created page table object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l1_init().
+ */
+static struct pvr_page_table_l1 *
+pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
+{
+	int err;
+
+	struct pvr_page_table_l1 *table =
+		kzalloc(sizeof(*table), GFP_KERNEL);
+
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	err = pvr_page_table_l1_init(table, ctx->pvr_dev);
+	if (err) {
+		kfree(table);
+		return ERR_PTR(err);
+	}
+
+	return table;
+}
+
+/**
+ * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
+ * @ctx: MMU context of owning VM context.
+ *
+ * Returns:
+ *  * Newly created page table object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l0_init().
+ */
+static struct pvr_page_table_l0 *
+pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
+{
+	int err;
+
+	struct pvr_page_table_l0 *table =
+		kzalloc(sizeof(*table), GFP_KERNEL);
+
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	err = pvr_page_table_l0_init(table, ctx->pvr_dev);
+	if (err) {
+		kfree(table);
+		return ERR_PTR(err);
+	}
+
+	return table;
+}
+
+/**
+ * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
+ * sync operation for the referenced page tables up to a specified level.
+ * @op_ctx: Target MMU op context.
+ * @level: Maximum page table level for which a sync is required.
+ */
+static void
+pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
+				enum pvr_mmu_sync_level level)
+{
+	if (op_ctx->sync_level_required < level)
+		op_ctx->sync_level_required = level;
+}
+
+/**
+ * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
+ * page tables referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @level: Maximum page table level to sync.
+ *
+ * Do not call this function directly. Instead use
+ * pvr_mmu_op_context_sync_partial() which is checked against the current
+ * value of &op_ctx->sync_level_required as set by
+ * pvr_mmu_op_context_require_sync().
+ */
+static void
+pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
+			       enum pvr_mmu_sync_level level)
+{
+	/*
+	 * We sync the page table levels in ascending order (starting from the
+	 * leaf node) to ensure consistency.
+	 */
+
+	WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
+
+	if (level <= PVR_MMU_SYNC_LEVEL_NONE)
+		return;
+
+	if (op_ctx->curr_page.l0_table)
+		pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
+
+	if (level < PVR_MMU_SYNC_LEVEL_1)
+		return;
+
+	if (op_ctx->curr_page.l1_table)
+		pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
+
+	if (level < PVR_MMU_SYNC_LEVEL_2)
+		return;
+
+	pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
+}
+
+/**
+ * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
+ * page tables referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @level: Requested page table level to sync up to (inclusive).
+ *
+ * If @level is greater than the maximum level recorded by @op_ctx as requiring
+ * a sync operation, only the previously recorded maximum will be used.
+ *
+ * Additionally, if @level is greater than or equal to the maximum level
+ * recorded by @op_ctx as requiring a sync operation, that maximum level will be
+ * reset as a full sync will be performed. This is equivalent to calling
+ * pvr_mmu_op_context_sync().
+ */
+static void
+pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
+				enum pvr_mmu_sync_level level)
+{
+	/*
+	 * If the requested sync level is greater than or equal to the
+	 * currently required sync level, we do two things:
+	 *  * Don't waste time syncing levels we haven't previously marked as
+	 *    requiring a sync, and
+	 *  * Reset the required sync level since we are about to sync
+	 *    everything that was previously marked as requiring a sync.
+	 */
+	if (level >= op_ctx->sync_level_required) {
+		level = op_ctx->sync_level_required;
+		op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+	}
+
+	pvr_mmu_op_context_sync_manual(op_ctx, level);
+}
+
+/**
+ * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
+ * a MMU op context.
+ * @op_ctx: Target MMU op context.
+ *
+ * The maximum level marked internally as requiring a sync will be reset so
+ * that subsequent calls to this function will be no-ops unless @op_ctx is
+ * otherwise updated.
+ */
+static void
+pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
+{
+	pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
+
+	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+}
+
+/**
+ * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
+ * the page table tree structure needed to reference the physical page
+ * referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @should_create: Specifies whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ * @load_level_required: Maximum page table level to load.
+ *
+ * If @should_create is %true, this function may modify the stored required
+ * sync level of @op_ctx as new page tables are created and inserted into their
+ * respective parents.
+ *
+ * Since there is only one root page table, it is technically incorrect to call
+ * this function with a value of @load_level_required greater than or equal to
+ * the root level number. However, this is not explicitly disallowed here.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by pvr_page_table_l1_get_or_create() if
+ *    @load_level_required >= 1 except -%ENXIO, or
+ *  * Any error returned by pvr_page_table_l0_get_or_create() if
+ *    @load_level_required >= 0 except -%ENXIO.
+ */
+static int
+pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
+			       bool should_create,
+			       enum pvr_mmu_sync_level load_level_required)
+{
+	const struct pvr_page_table_l1 *l1_head_before =
+		op_ctx->map.l1_prealloc_tables;
+	const struct pvr_page_table_l0 *l0_head_before =
+		op_ctx->map.l0_prealloc_tables;
+	int err;
+
+	/* Clear tables we're about to fetch in case of error states. */
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
+		op_ctx->curr_page.l1_table = NULL;
+
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
+		op_ctx->curr_page.l0_table = NULL;
+
+	/* Get or create L1 page table. */
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
+		err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
+		if (err) {
+			/*
+			 * If @should_create is %false and no L1 page table was
+			 * found, return early but without an error. Since
+			 * pvr_page_table_l1_get_or_create() can only return
+			 * -%ENXIO if @should_create is %false, there is no
+			 * need to check it here.
+			 */
+			if (err == -ENXIO)
+				err = 0;
+
+			return err;
+		}
+	}
+
+	/* Get or create L0 page table. */
+	if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
+		err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
+		if (err) {
+			/*
+			 * If @should_create is %false and no L0 page table was
+			 * found, return early but without an error. Since
+			 * pvr_page_table_l0_get_or_insert() can only return
+			 * -%ENXIO if @should_create is %false, there is no
+			 * need to check it here.
+			 */
+			if (err == -ENXIO)
+				err = 0;
+
+			/*
+			 * At this point, an L1 page table could have been
+			 * inserted but is now empty due to the failed attempt
+			 * at inserting an L0 page table. In this instance, we
+			 * must remove the empty L1 page table ourselves as
+			 * pvr_page_table_l1_remove() is never called as part
+			 * of the error path in
+			 * pvr_page_table_l0_get_or_insert().
+			 */
+			if (l1_head_before != op_ctx->map.l1_prealloc_tables) {
+				pvr_page_table_l2_remove(op_ctx);
+				pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
+			}
+
+			return err;
+		}
+	}
+
+	/*
+	 * A sync is only needed if table objects were inserted. This can be
+	 * inferred by checking if the pointer at the head of the linked list
+	 * has changed.
+	 */
+	if (l1_head_before != op_ctx->map.l1_prealloc_tables)
+		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
+	else if (l0_head_before != op_ctx->map.l0_prealloc_tables)
+		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
+
+	return 0;
+}
+
+/**
+ * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
+ * context, syncing any page tables previously assigned to it which are no
+ * longer relevant.
+ * @op_ctx: Target MMU op context.
+ * @device_addr: New pointer target.
+ * @should_create: Specify whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ *
+ * This function performs a full sync on the pointer, regardless of which
+ * levels are modified.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_op_context_load_tables().
+ */
+static int
+pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
+				 u64 device_addr, bool should_create)
+{
+	pvr_mmu_op_context_sync(op_ctx);
+
+	op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
+	op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
+	op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
+	op_ctx->curr_page.l1_table = NULL;
+	op_ctx->curr_page.l0_table = NULL;
+
+	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
+					      PVR_MMU_SYNC_LEVEL_1);
+}
+
+/**
+ * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
+ * context.
+ * @op_ctx: Target MMU op context.
+ * @should_create: Specify whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ *
+ * If @should_create is %false, it is the caller's responsibility to verify that
+ * the state of the table references in @op_ctx is valid on return. If -%ENXIO
+ * is returned, at least one of the table references is invalid. It should be
+ * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
+ * returned, unlike other error codes. The caller should check which references
+ * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
+ * to be valid, since it represents the root of the page table tree structure.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EPERM if the operation would wrap at the top of the page table
+ *    hierarchy,
+ *  * -%ENXIO if @should_create is %false and a page table of any level would
+ *    have otherwise been created, or
+ *  * Any error returned while attempting to create missing page tables if
+ *    @should_create is %true.
+ */
+static int
+pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
+			     bool should_create)
+{
+	s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+
+	if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
+		goto load_tables;
+
+	op_ctx->curr_page.l0_idx = 0;
+	load_level_required = PVR_MMU_SYNC_LEVEL_0;
+
+	if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
+		goto load_tables;
+
+	op_ctx->curr_page.l1_idx = 0;
+	load_level_required = PVR_MMU_SYNC_LEVEL_1;
+
+	if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
+		goto load_tables;
+
+	/*
+	 * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
+	 * zero here. However, that would wrap the top layer of the page table
+	 * hierarchy which is not a valid operation. Instead, we warn and return
+	 * an error.
+	 */
+	WARN(true,
+	     "%s(%p) attempted to loop the top of the page table hierarchy",
+	     __func__, op_ctx);
+	return -EPERM;
+
+	/* If indices have wrapped, we need to load new tables. */
+load_tables:
+	/* First, flush tables which will be unloaded. */
+	pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
+
+	/* Then load tables from the required level down. */
+	return pvr_mmu_op_context_load_tables(op_ctx, should_create,
+					      load_level_required);
+}
+
+/**
+ * DOC: Single page operations
+ */
+
+/**
+ * pvr_page_create() - Create a device-virtual memory page and insert it into
+ * a level 0 page table.
+ * @op_ctx: Target MMU op context pointing at the device-virtual address of the
+ * target page.
+ * @dma_addr: DMA address of the physical page backing the created page.
+ * @flags: Page options saved on the level 0 page table entry for reading by
+ *         the device.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EEXIST if the requested page already exists.
+ */
+static int
+pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
+		struct pvr_page_flags_raw flags)
+{
+	/* Do not create a new page if one already exists. */
+	if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
+					     op_ctx->curr_page.l0_idx)) {
+		return -EEXIST;
+	}
+
+	pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
+
+	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+
+	return 0;
+}
+
+/**
+ * pvr_page_destroy() - Destroy a device page after removing it from its
+ * parent level 0 page table.
+ * @op_ctx: Target MMU op context.
+ */
+static void
+pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
+{
+	/* Do nothing if the page does not exist. */
+	if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
+					      op_ctx->curr_page.l0_idx)) {
+		return;
+	}
+
+	/* Clear the parent L0 page table entry. */
+	pvr_page_table_l0_remove(op_ctx);
+
+	pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+}
+
+/**
+ * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
+ * @op_ctx: Target MMU op context.
+ */
+void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
+{
+	const bool flush_caches =
+		op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
+
+	pvr_mmu_op_context_sync(op_ctx);
+
+	/* Unmaps should be flushed immediately. Map flushes can be deferred. */
+	if (flush_caches && !op_ctx->map.sgt)
+		pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true);
+
+	while (op_ctx->map.l0_prealloc_tables) {
+		struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables;
+
+		op_ctx->map.l0_prealloc_tables =
+			op_ctx->map.l0_prealloc_tables->next_free;
+		pvr_page_table_l0_free(tmp);
+	}
+
+	while (op_ctx->map.l1_prealloc_tables) {
+		struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables;
+
+		op_ctx->map.l1_prealloc_tables =
+			op_ctx->map.l1_prealloc_tables->next_free;
+		pvr_page_table_l1_free(tmp);
+	}
+
+	while (op_ctx->unmap.l0_free_tables) {
+		struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables;
+
+		op_ctx->unmap.l0_free_tables =
+			op_ctx->unmap.l0_free_tables->next_free;
+		pvr_page_table_l0_free(tmp);
+	}
+
+	while (op_ctx->unmap.l1_free_tables) {
+		struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables;
+
+		op_ctx->unmap.l1_free_tables =
+			op_ctx->unmap.l1_free_tables->next_free;
+		pvr_page_table_l1_free(tmp);
+	}
+
+	kfree(op_ctx);
+}
+
+/**
+ * pvr_mmu_op_context_create() - Create an MMU op context.
+ * @ctx: MMU context associated with owning VM context.
+ * @sgt: Scatter gather table containing pages pinned for use by this context.
+ * @sgt_offset: Start offset of the requested device-virtual memory mapping.
+ * @size: Size in bytes of the requested device-virtual memory mapping. For an
+ * unmapping, this should be zero so that no page tables are allocated.
+ *
+ * Returns:
+ *  * Newly created MMU op context object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l2_init().
+ */
+struct pvr_mmu_op_context *
+pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
+			  u64 sgt_offset, u64 size)
+{
+	int err;
+
+	struct pvr_mmu_op_context *op_ctx =
+		kzalloc(sizeof(*op_ctx), GFP_KERNEL);
+
+	if (!op_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	op_ctx->mmu_ctx = ctx;
+	op_ctx->map.sgt = sgt;
+	op_ctx->map.sgt_offset = sgt_offset;
+	op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+
+	if (size) {
+		/*
+		 * The number of page table objects we need to prealloc is
+		 * indicated by the mapping size, start offset and the sizes
+		 * of the areas mapped per PT or PD. The range calculation is
+		 * identical to that for the index into a table for a device
+		 * address, so we reuse those functions here.
+		 */
+		const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
+		const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
+		const u32 l1_count = l1_end_idx - l1_start_idx + 1;
+		const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
+		const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
+		const u32 l0_count = l0_end_idx - l0_start_idx + 1;
+
+		/*
+		 * Alloc and push page table entries until we have enough of
+		 * each type, ending with linked lists of l0 and l1 entries in
+		 * reverse order.
+		 */
+		for (int i = 0; i < l1_count; i++) {
+			struct pvr_page_table_l1 *l1_tmp =
+				pvr_page_table_l1_alloc(ctx);
+
+			err = PTR_ERR_OR_ZERO(l1_tmp);
+			if (err)
+				goto err_cleanup;
+
+			l1_tmp->next_free = op_ctx->map.l1_prealloc_tables;
+			op_ctx->map.l1_prealloc_tables = l1_tmp;
+		}
+
+		for (int i = 0; i < l0_count; i++) {
+			struct pvr_page_table_l0 *l0_tmp =
+				pvr_page_table_l0_alloc(ctx);
+
+			err = PTR_ERR_OR_ZERO(l0_tmp);
+			if (err)
+				goto err_cleanup;
+
+			l0_tmp->next_free = op_ctx->map.l0_prealloc_tables;
+			op_ctx->map.l0_prealloc_tables = l0_tmp;
+		}
+	}
+
+	return op_ctx;
+
+err_cleanup:
+	pvr_mmu_op_context_destroy(op_ctx);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
+ * starting from the current page of an MMU op context.
+ * @op_ctx: Target MMU op context pointing at the first page to unmap.
+ * @nr_pages: Number of pages to unmap.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while advancing @op_ctx.curr_page with
+ *    pvr_mmu_op_context_next_page() (except -%ENXIO).
+ */
+static int
+pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
+				   u64 nr_pages)
+{
+	int err;
+
+	if (nr_pages == 0)
+		return 0;
+
+	/*
+	 * Destroy first page outside loop, as it doesn't require a page
+	 * advance beforehand. If the L0 page table reference in
+	 * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
+	 * @op_ctx.curr_page (so skip ahead).
+	 */
+	if (op_ctx->curr_page.l0_table)
+		pvr_page_destroy(op_ctx);
+
+	for (u64 page = 1; page < nr_pages; ++page) {
+		err = pvr_mmu_op_context_next_page(op_ctx, false);
+		/*
+		 * If the page table tree structure at @op_ctx.curr_page is
+		 * incomplete, skip ahead. We don't care about unmapping pages
+		 * that cannot exist.
+		 *
+		 * FIXME: This could be made more efficient by jumping ahead
+		 * using pvr_mmu_op_context_set_curr_page().
+		 */
+		if (err == -ENXIO)
+			continue;
+		else if (err)
+			return err;
+
+		pvr_page_destroy(op_ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_mmu_unmap() - Unmap pages from a memory context.
+ * @op_ctx: Target MMU op context.
+ * @device_addr: First device-virtual address to unmap.
+ * @size: Size in bytes to unmap.
+ *
+ * The total amount of device-virtual memory unmapped is
+ * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_page_table_ptr_init(), or
+ *  * Any error code returned by pvr_page_table_ptr_unmap().
+ */
+int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
+{
+	int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
+
+	if (err)
+		return err;
+
+	return pvr_mmu_op_context_unmap_curr_page(op_ctx,
+						  size >> PVR_DEVICE_PAGE_SHIFT);
+}
+
+/**
+ * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
+ * device-virtual memory.
+ * @op_ctx: Target MMU op context pointing to the first page that should be
+ * mapped.
+ * @sgl: Target scatter-gather table entry.
+ * @offset: Offset into @sgl to map from. Must result in a starting address
+ * from @sgl which is CPU page-aligned.
+ * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
+ * of the device page size.
+ * @page_flags: Page options to be applied to every device-virtual memory page
+ * in the created mapping.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if the range specified by @offset and @size is not completely
+ *    within @sgl, or
+ *  * Any error encountered while creating a page with pvr_page_create(), or
+ *  * Any error encountered while advancing @op_ctx.curr_page with
+ *    pvr_mmu_op_context_next_page().
+ */
+static int
+pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
+		u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
+{
+	const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
+	dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
+	const unsigned int dma_len = sg_dma_len(sgl);
+	struct pvr_page_table_ptr ptr_copy;
+	unsigned int page;
+	int err;
+
+	if (size > dma_len || offset > dma_len - size)
+		return -EINVAL;
+
+	/*
+	 * Before progressing, save a copy of the start pointer so we can use
+	 * it again if we enter an error state and have to destroy pages.
+	 */
+	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
+
+	/*
+	 * Create first page outside loop, as it doesn't require a page advance
+	 * beforehand.
+	 */
+	err = pvr_page_create(op_ctx, dma_addr, page_flags);
+	if (err)
+		return err;
+
+	for (page = 1; page < pages; ++page) {
+		err = pvr_mmu_op_context_next_page(op_ctx, true);
+		if (err)
+			goto err_destroy_pages;
+
+		dma_addr += PVR_DEVICE_PAGE_SIZE;
+
+		err = pvr_page_create(op_ctx, dma_addr, page_flags);
+		if (err)
+			goto err_destroy_pages;
+	}
+
+	return 0;
+
+err_destroy_pages:
+	memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
+	err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
+
+	return err;
+}
+
+/**
+ * pvr_mmu_map() - Map an object's virtual memory to physical memory.
+ * @op_ctx: Target MMU op context.
+ * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
+ * of the device page size.
+ * @flags: Flags from pvr_gem_object associated with the mapping.
+ * @device_addr: Virtual device address to map to. Must be device page-aligned.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_page_table_ptr_init(), or
+ *  * Any error code returned by pvr_mmu_map_sgl(), or
+ *  * Any error code returned by pvr_page_table_ptr_next_page().
+ */
+int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
+		u64 device_addr)
+{
+	struct pvr_page_table_ptr ptr_copy;
+	struct pvr_page_flags_raw flags_raw;
+	struct scatterlist *sgl;
+	u64 mapped_size = 0;
+	unsigned int count;
+	int err;
+
+	if (!size)
+		return 0;
+
+	if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
+		return -EINVAL;
+
+	err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
+	if (err)
+		return -EINVAL;
+
+	memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
+
+	flags_raw = pvr_page_flags_raw_create(false, false,
+					      flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE,
+					      flags & DRM_PVR_BO_PM_FW_PROTECT);
+
+	/* Map scatter gather table */
+	for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
+		const size_t sgl_len = sg_dma_len(sgl);
+		u64 sgl_offset, map_sgl_len;
+
+		if (sgl_len <= op_ctx->map.sgt_offset) {
+			op_ctx->map.sgt_offset -= sgl_len;
+			continue;
+		}
+
+		sgl_offset = op_ctx->map.sgt_offset;
+		map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
+
+		err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
+				      flags_raw);
+		if (err)
+			break;
+
+		/*
+		 * Flag the L0 page table as requiring a flush when the MMU op
+		 * context is destroyed.
+		 */
+		pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+
+		op_ctx->map.sgt_offset = 0;
+		mapped_size += map_sgl_len;
+
+		if (mapped_size >= size)
+			break;
+
+		err = pvr_mmu_op_context_next_page(op_ctx, true);
+		if (err)
+			break;
+	}
+
+	if (err && mapped_size) {
+		memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
+		pvr_mmu_op_context_unmap_curr_page(op_ctx,
+						   mapped_size >> PVR_DEVICE_PAGE_SHIFT);
+	}
+
+	return err;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.h b/drivers/gpu/drm/imagination/pvr_mmu.h
new file mode 100644
index 0000000000000..a8ecd460168dc
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_mmu.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_MMU_H
+#define PVR_MMU_H
+
+#include <linux/memory.h>
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h" */
+struct pvr_device;
+
+/* Forward declaration from "pvr_mmu.c" */
+struct pvr_mmu_context;
+struct pvr_mmu_op_context;
+
+/* Forward declaration from "pvr_vm.c" */
+struct pvr_vm_context;
+
+/* Forward declaration from <linux/scatterlist.h> */
+struct sg_table;
+
+/**
+ * DOC: Public API (constants)
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_SIZE
+ *
+ *    Fixed page size referenced by leaf nodes in the page table tree
+ *    structure. In the current implementation, this value is pegged to the
+ *    CPU page size (%PAGE_SIZE). It is therefore an error to specify a CPU
+ *    page size which is not also a supported device page size. The supported
+ *    device page sizes are: 4KiB, 16KiB, 64KiB, 256KiB, 1MiB and 2MiB.
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_SHIFT
+ *
+ *    Shift value used to efficiently multiply or divide by
+ *    %PVR_DEVICE_PAGE_SIZE.
+ *
+ *    This value is derived from %PVR_DEVICE_PAGE_SIZE.
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_MASK
+ *
+ *    Mask used to round a value down to the nearest multiple of
+ *    %PVR_DEVICE_PAGE_SIZE. When bitwise negated, it will indicate whether a
+ *    value is already a multiple of %PVR_DEVICE_PAGE_SIZE.
+ *
+ *    This value is derived from %PVR_DEVICE_PAGE_SIZE.
+ */
+
+/* PVR_DEVICE_PAGE_SIZE determines the page size */
+#define PVR_DEVICE_PAGE_SIZE (PAGE_SIZE)
+#define PVR_DEVICE_PAGE_SHIFT (PAGE_SHIFT)
+#define PVR_DEVICE_PAGE_MASK (PAGE_MASK)
+
+/**
+ * DOC: Page table index utilities (constants)
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_SPACE_SIZE
+ *
+ *    Size of device-virtual address space which can be represented in the page
+ *    table structure.
+ *
+ *    This value is checked at runtime against
+ *    &pvr_device_features.virtual_address_space_bits by
+ *    pvr_vm_create_context(), which will return an error if the feature value
+ *    does not match this constant.
+ *
+ *    .. admonition:: Future work
+ *
+ *       It should be possible to support other values of
+ *       &pvr_device_features.virtual_address_space_bits, but so far no
+ *       hardware has been created which advertises an unsupported value.
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_BITS
+ *
+ *    Number of bits needed to represent any value less than
+ *    %PVR_PAGE_TABLE_ADDR_SPACE_SIZE exactly.
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_MASK
+ *
+ *    Bitmask of device-virtual addresses which are valid in the page table
+ *    structure.
+ *
+ *    This value is derived from %PVR_PAGE_TABLE_ADDR_SPACE_SIZE, so the same
+ *    notes on that constant apply here.
+ */
+#define PVR_PAGE_TABLE_ADDR_SPACE_SIZE SZ_1T
+#define PVR_PAGE_TABLE_ADDR_BITS __ffs(PVR_PAGE_TABLE_ADDR_SPACE_SIZE)
+#define PVR_PAGE_TABLE_ADDR_MASK (PVR_PAGE_TABLE_ADDR_SPACE_SIZE - 1)
+
+void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev);
+int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait);
+
+struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev);
+void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx);
+
+dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx);
+
+void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx);
+struct pvr_mmu_op_context *
+pvr_mmu_op_context_create(struct pvr_mmu_context *ctx,
+			  struct sg_table *sgt, u64 sgt_offset, u64 size);
+
+int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
+		u64 device_addr);
+int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size);
+
+#endif /* PVR_MMU_H */
diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
new file mode 100644
index 0000000000000..4c48a208c390b
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -0,0 +1,1091 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_vm.h"
+
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_mmu.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_heap_config.h"
+
+#include <drm/drm_exec.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gpuvm.h>
+
+#include <linux/container_of.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp_types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/stddef.h>
+
+/**
+ * DOC: Memory context
+ *
+ * This is the "top level" datatype in the VM code. It's exposed in the public
+ * API as an opaque handle.
+ */
+
+/**
+ * struct pvr_vm_context - Context type used to represent a single VM.
+ */
+struct pvr_vm_context {
+	/**
+	 * @pvr_dev: The PowerVR device to which this context is bound.
+	 * This binding is immutable for the life of the context.
+	 */
+	struct pvr_device *pvr_dev;
+
+	/** @mmu_ctx: The context for binding to physical memory. */
+	struct pvr_mmu_context *mmu_ctx;
+
+	/** @gpuva_mgr: GPUVA manager object associated with this context. */
+	struct drm_gpuvm gpuvm_mgr;
+
+	/** @lock: Global lock on this VM. */
+	struct mutex lock;
+
+	/**
+	 * @fw_mem_ctx_obj: Firmware object representing firmware memory
+	 * context.
+	 */
+	struct pvr_fw_object *fw_mem_ctx_obj;
+
+	/** @ref_count: Reference count of object. */
+	struct kref ref_count;
+
+	/**
+	 * @dummy_gem: GEM object to enable VM reservation. All private BOs
+	 * should use the @dummy_gem.resv and not their own _resv field.
+	 */
+	struct drm_gem_object dummy_gem;
+};
+
+struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx)
+{
+	if (vm_ctx)
+		kref_get(&vm_ctx->ref_count);
+
+	return vm_ctx;
+}
+
+/**
+ * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the
+ *                                     page table structure behind a VM context.
+ * @vm_ctx: Target VM context.
+ */
+dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx)
+{
+	return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx);
+}
+
+/**
+ * pvr_vm_get_dma_resv() - Expose the dma_resv owned by the VM context.
+ * @vm_ctx: Target VM context.
+ *
+ * This is used to allow private BOs to share a dma_resv for faster fence
+ * updates.
+ *
+ * Returns: The dma_resv pointer.
+ */
+struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx)
+{
+	return vm_ctx->dummy_gem.resv;
+}
+
+/**
+ * DOC: Memory mappings
+ */
+
+/**
+ * struct pvr_vm_gpuva - Wrapper type representing a single VM mapping.
+ */
+struct pvr_vm_gpuva {
+	/** @base: The wrapped drm_gpuva object. */
+	struct drm_gpuva base;
+};
+
+static __always_inline
+struct pvr_vm_gpuva *to_pvr_vm_gpuva(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva, struct pvr_vm_gpuva, base);
+}
+
+enum pvr_vm_bind_type {
+	PVR_VM_BIND_TYPE_MAP,
+	PVR_VM_BIND_TYPE_UNMAP,
+};
+
+/**
+ * struct pvr_vm_bind_op - Context of a map/unmap operation.
+ */
+struct pvr_vm_bind_op {
+	/** @type: Map or unmap. */
+	enum pvr_vm_bind_type type;
+
+	/** @pvr_obj: Object associated with mapping (map only). */
+	struct pvr_gem_object *pvr_obj;
+
+	/**
+	 * @vm_ctx: VM context where the mapping will be created or destroyed.
+	 */
+	struct pvr_vm_context *vm_ctx;
+
+	/** @mmu_op_ctx: MMU op context. */
+	struct pvr_mmu_op_context *mmu_op_ctx;
+
+	/** @gpuvm_bo: Prealloced wrapped BO for attaching to the gpuvm. */
+	struct drm_gpuvm_bo *gpuvm_bo;
+
+	/**
+	 * @new_va: Prealloced VA mapping object (init in callback).
+	 * Used when creating a mapping.
+	 */
+	struct pvr_vm_gpuva *new_va;
+
+	/**
+	 * @prev_va: Prealloced VA mapping object (init in callback).
+	 * Used when a mapping or unmapping operation overlaps an existing
+	 * mapping and splits away the beginning into a new mapping.
+	 */
+	struct pvr_vm_gpuva *prev_va;
+
+	/**
+	 * @next_va: Prealloced VA mapping object (init in callback).
+	 * Used when a mapping or unmapping operation overlaps an existing
+	 * mapping and splits away the end into a new mapping.
+	 */
+	struct pvr_vm_gpuva *next_va;
+
+	/** @offset: Offset into @pvr_obj to begin mapping from. */
+	u64 offset;
+
+	/** @device_addr: Device-virtual address at the start of the mapping. */
+	u64 device_addr;
+
+	/** @size: Size of the desired mapping. */
+	u64 size;
+};
+
+/**
+ * pvr_vm_bind_op_exec() - Execute a single bind op.
+ * @bind_op: Bind op context.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * Any error code returned by drm_gpuva_sm_map(), drm_gpuva_sm_unmap(), or
+ *    a callback function.
+ */
+static int pvr_vm_bind_op_exec(struct pvr_vm_bind_op *bind_op)
+{
+	switch (bind_op->type) {
+	case PVR_VM_BIND_TYPE_MAP:
+		return drm_gpuvm_sm_map(&bind_op->vm_ctx->gpuvm_mgr,
+					bind_op, bind_op->device_addr,
+					bind_op->size,
+					gem_from_pvr_gem(bind_op->pvr_obj),
+					bind_op->offset);
+
+	case PVR_VM_BIND_TYPE_UNMAP:
+		return drm_gpuvm_sm_unmap(&bind_op->vm_ctx->gpuvm_mgr,
+					  bind_op, bind_op->device_addr,
+					  bind_op->size);
+	}
+
+	/*
+	 * This shouldn't happen unless something went wrong
+	 * in drm_sched.
+	 */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static void pvr_vm_bind_op_fini(struct pvr_vm_bind_op *bind_op)
+{
+	drm_gpuvm_bo_put(bind_op->gpuvm_bo);
+
+	kfree(bind_op->new_va);
+	kfree(bind_op->prev_va);
+	kfree(bind_op->next_va);
+
+	if (bind_op->pvr_obj)
+		pvr_gem_object_put(bind_op->pvr_obj);
+
+	if (bind_op->mmu_op_ctx)
+		pvr_mmu_op_context_destroy(bind_op->mmu_op_ctx);
+}
+
+static int
+pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
+			struct pvr_vm_context *vm_ctx,
+			struct pvr_gem_object *pvr_obj, u64 offset,
+			u64 device_addr, u64 size)
+{
+	const bool is_user = vm_ctx == vm_ctx->pvr_dev->kernel_vm_ctx;
+	const u64 pvr_obj_size = pvr_gem_object_size(pvr_obj);
+	struct sg_table *sgt;
+	u64 offset_plus_size;
+	int err;
+
+	if (check_add_overflow(offset, size, &offset_plus_size))
+		return -EINVAL;
+
+	if (is_user &&
+	    !pvr_find_heap_containing(vm_ctx->pvr_dev, device_addr, size)) {
+		return -EINVAL;
+	}
+
+	if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
+	    offset & ~PAGE_MASK || size & ~PAGE_MASK ||
+	    offset >= pvr_obj_size || offset_plus_size > pvr_obj_size)
+		return -EINVAL;
+
+	bind_op->type = PVR_VM_BIND_TYPE_MAP;
+
+	bind_op->gpuvm_bo = drm_gpuvm_bo_create(&vm_ctx->gpuvm_mgr,
+						gem_from_pvr_gem(pvr_obj));
+	if (!bind_op->gpuvm_bo)
+		return -ENOMEM;
+
+	bind_op->new_va = kzalloc(sizeof(*bind_op->new_va), GFP_KERNEL);
+	bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL);
+	bind_op->next_va = kzalloc(sizeof(*bind_op->next_va), GFP_KERNEL);
+	if (!bind_op->new_va || !bind_op->prev_va || !bind_op->next_va) {
+		err = -ENOMEM;
+		goto err_bind_op_fini;
+	}
+
+	/* Pin pages so they're ready for use. */
+	sgt = pvr_gem_object_get_pages_sgt(pvr_obj);
+	err = PTR_ERR_OR_ZERO(sgt);
+	if (err)
+		goto err_bind_op_fini;
+
+	bind_op->mmu_op_ctx =
+		pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt, offset, size);
+	err = PTR_ERR_OR_ZERO(bind_op->mmu_op_ctx);
+	if (err) {
+		bind_op->mmu_op_ctx = NULL;
+		goto err_bind_op_fini;
+	}
+
+	bind_op->pvr_obj = pvr_obj;
+	bind_op->vm_ctx = vm_ctx;
+	bind_op->device_addr = device_addr;
+	bind_op->size = size;
+	bind_op->offset = offset;
+
+	return 0;
+
+err_bind_op_fini:
+	pvr_vm_bind_op_fini(bind_op);
+
+	return err;
+}
+
+static int
+pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op,
+			  struct pvr_vm_context *vm_ctx, u64 device_addr,
+			  u64 size)
+{
+	int err;
+
+	if (!pvr_device_addr_and_size_are_valid(device_addr, size))
+		return -EINVAL;
+
+	bind_op->type = PVR_VM_BIND_TYPE_UNMAP;
+
+	bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL);
+	bind_op->next_va = kzalloc(sizeof(*bind_op->next_va), GFP_KERNEL);
+	if (!bind_op->prev_va || !bind_op->next_va) {
+		err = -ENOMEM;
+		goto err_bind_op_fini;
+	}
+
+	bind_op->mmu_op_ctx =
+		pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0);
+	err = PTR_ERR_OR_ZERO(bind_op->mmu_op_ctx);
+	if (err) {
+		bind_op->mmu_op_ctx = NULL;
+		goto err_bind_op_fini;
+	}
+
+	bind_op->vm_ctx = vm_ctx;
+	bind_op->device_addr = device_addr;
+	bind_op->size = size;
+
+	return 0;
+
+err_bind_op_fini:
+	pvr_vm_bind_op_fini(bind_op);
+
+	return err;
+}
+
+static int
+pvr_vm_bind_op_lock_resvs(struct drm_exec *exec, struct pvr_vm_bind_op *bind_op)
+{
+	drm_exec_until_all_locked(exec) {
+		struct drm_gem_object *r_obj = &bind_op->vm_ctx->dummy_gem;
+		struct drm_gpuvm *gpuvm = &bind_op->vm_ctx->gpuvm_mgr;
+		struct pvr_gem_object *pvr_obj = bind_op->pvr_obj;
+		struct drm_gpuvm_bo *gpuvm_bo;
+
+		/* Acquire lock on the vm_context's reserve object. */
+		int err = drm_exec_lock_obj(exec, r_obj);
+
+		drm_exec_retry_on_contention(exec);
+		if (err)
+			return err;
+
+		/* Acquire lock on all BOs in the context. */
+		list_for_each_entry(gpuvm_bo, &gpuvm->extobj.list,
+				    list.entry.extobj) {
+			err = drm_exec_lock_obj(exec, gpuvm_bo->obj);
+
+			drm_exec_retry_on_contention(exec);
+			if (err)
+				return err;
+		}
+
+		/* Unmap operations don't have an object to lock. */
+		if (!pvr_obj)
+			break;
+
+		/* Acquire lock on the GEM being mapped. */
+		err = drm_exec_lock_obj(exec,
+					gem_from_pvr_gem(bind_op->pvr_obj));
+
+		drm_exec_retry_on_contention(exec);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
+ * @op: gpuva op containing the remap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by drm_gpuvm_sm_map following a successful mapping while
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_map().
+ */
+static int
+pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx)
+{
+	struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj);
+	struct pvr_vm_bind_op *ctx = op_ctx;
+	int err;
+
+	if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK)
+		return -EINVAL;
+
+	err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags,
+			  op->map.va.addr);
+	if (err)
+		return err;
+
+	drm_gpuva_map(&ctx->vm_ctx->gpuvm_mgr, &ctx->new_va->base, &op->map);
+	drm_gpuva_link(&ctx->new_va->base, ctx->gpuvm_bo);
+	ctx->new_va = NULL;
+
+	return 0;
+}
+
+/**
+ * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context.
+ * @op: gpuva op containing the unmap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by drm_gpuvm_sm_unmap following a successful unmapping while
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_unmap().
+ */
+static int
+pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx)
+{
+	struct pvr_vm_bind_op *ctx = op_ctx;
+
+	int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr,
+				op->unmap.va->va.range);
+
+	if (err)
+		return err;
+
+	drm_gpuva_unmap(&op->unmap);
+	drm_gpuva_unlink(op->unmap.va);
+
+	return 0;
+}
+
+/**
+ * pvr_vm_gpuva_remap() - Remap a mapping within a memory context.
+ * @op: gpuva op containing the remap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by either drm_gpuvm_sm_map or drm_gpuvm_sm_unmap when a
+ * mapping or unmapping operation causes a region to be split. The
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap().
+ */
+static int
+pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx)
+{
+	struct pvr_vm_bind_op *ctx = op_ctx;
+	u64 va_start = 0, va_range = 0;
+	int err;
+
+	drm_gpuva_op_remap_to_unmap_range(&op->remap, &va_start, &va_range);
+	err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start, va_range);
+	if (err)
+		return err;
+
+	/* No actual remap required: the page table tree depth is fixed to 3,
+	 * and we use 4k page table entries only for now.
+	 */
+	drm_gpuva_remap(&ctx->prev_va->base, &ctx->next_va->base, &op->remap);
+
+	if (op->remap.prev) {
+		pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->base.gem.obj));
+		drm_gpuva_link(&ctx->prev_va->base, ctx->gpuvm_bo);
+		ctx->prev_va = NULL;
+	}
+
+	if (op->remap.next) {
+		pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->base.gem.obj));
+		drm_gpuva_link(&ctx->next_va->base, ctx->gpuvm_bo);
+		ctx->next_va = NULL;
+	}
+
+	drm_gpuva_unlink(op->remap.unmap->va);
+
+	return 0;
+}
+
+/*
+ * Public API
+ *
+ * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h".
+ */
+
+/**
+ * pvr_device_addr_is_valid() - Tests whether a device-virtual address
+ *                              is valid.
+ * @device_addr: Virtual device address to test.
+ *
+ * Return:
+ *  * %true if @device_addr is within the valid range for a device page
+ *    table and is aligned to the device page size, or
+ *  * %false otherwise.
+ */
+bool
+pvr_device_addr_is_valid(u64 device_addr)
+{
+	return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 &&
+	       (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0;
+}
+
+/**
+ * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
+ * address and associated size are both valid.
+ * @device_addr: Virtual device address to test.
+ * @size: Size of the range based at @device_addr to test.
+ *
+ * Calling pvr_device_addr_is_valid() twice (once on @size, and again on
+ * @device_addr + @size) to verify a device-virtual address range initially
+ * seems intuitive, but it produces a false-negative when the address range
+ * is right at the end of device-virtual address space.
+ *
+ * This function catches that corner case, as well as checking that
+ * @size is non-zero.
+ *
+ * Return:
+ *  * %true if @device_addr is device page aligned; @size is device page
+ *    aligned; the range specified by @device_addr and @size is within the
+ *    bounds of the device-virtual address space, and @size is non-zero, or
+ *  * %false otherwise.
+ */
+bool
+pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
+{
+	return pvr_device_addr_is_valid(device_addr) &&
+	       size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
+	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
+}
+
+void pvr_gpuvm_free(struct drm_gpuvm *gpuvm)
+{
+
+}
+
+static const struct drm_gpuvm_ops pvr_vm_gpuva_ops = {
+	.vm_free = pvr_gpuvm_free,
+	.sm_step_map = pvr_vm_gpuva_map,
+	.sm_step_remap = pvr_vm_gpuva_remap,
+	.sm_step_unmap = pvr_vm_gpuva_unmap,
+};
+
+/**
+ * pvr_vm_create_context() - Create a new VM context.
+ * @pvr_dev: Target PowerVR device.
+ * @is_userspace_context: %true if this context is for userspace. This will
+ *                        create a firmware memory context for the VM context
+ *                        and disable warnings when tearing down mappings.
+ *
+ * Return:
+ *  * A handle to the newly-minted VM context on success,
+ *  * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is
+ *    missing or has an unsupported value,
+ *  * -%ENOMEM if allocation of the structure behind the opaque handle fails,
+ *    or
+ *  * Any error encountered while setting up internal structures.
+ */
+struct pvr_vm_context *
+pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+
+	struct pvr_vm_context *vm_ctx;
+	u16 device_addr_bits;
+
+	int err;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits,
+				&device_addr_bits);
+	if (err) {
+		drm_err(drm_dev,
+			"Failed to get device virtual address space bits\n");
+		return ERR_PTR(err);
+	}
+
+	if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) {
+		drm_err(drm_dev,
+			"Device has unsupported virtual address space size\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL);
+	if (!vm_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0);
+
+	vm_ctx->pvr_dev = pvr_dev;
+	kref_init(&vm_ctx->ref_count);
+	mutex_init(&vm_ctx->lock);
+
+	drm_gpuvm_init(&vm_ctx->gpuvm_mgr,
+		       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
+		       0, &pvr_dev->base, &vm_ctx->dummy_gem,
+		       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
+
+	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
+	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
+	if (err) {
+		vm_ctx->mmu_ctx = NULL;
+		goto err_put_ctx;
+	}
+
+	if (is_userspace_context) {
+		/* TODO: Create FW mem context */
+		err = -ENODEV;
+		goto err_put_ctx;
+	}
+
+	return vm_ctx;
+
+err_put_ctx:
+	pvr_vm_context_put(vm_ctx);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_vm_context_release() - Teardown a VM context.
+ * @ref_count: Pointer to reference counter of the VM context.
+ *
+ * This function ensures that no mappings are left dangling by unmapping them
+ * all in order of ascending device-virtual address.
+ */
+static void
+pvr_vm_context_release(struct kref *ref_count)
+{
+	struct pvr_vm_context *vm_ctx =
+		container_of(ref_count, struct pvr_vm_context, ref_count);
+
+	/* TODO: Destroy FW mem context */
+	WARN_ON(vm_ctx->fw_mem_ctx_obj);
+
+	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start,
+			     vm_ctx->gpuvm_mgr.mm_range));
+
+	drm_gpuvm_put(&vm_ctx->gpuvm_mgr);
+	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
+	drm_gem_private_object_fini(&vm_ctx->dummy_gem);
+	mutex_destroy(&vm_ctx->lock);
+
+	kfree(vm_ctx);
+}
+
+/**
+ * pvr_vm_context_lookup() - Look up VM context from handle
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Object handle.
+ *
+ * Takes reference on VM context object. Call pvr_vm_context_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, or is not a VM context)
+ */
+struct pvr_vm_context *
+pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle)
+{
+	struct pvr_vm_context *vm_ctx;
+
+	xa_lock(&pvr_file->vm_ctx_handles);
+	vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle);
+	if (vm_ctx)
+		kref_get(&vm_ctx->ref_count);
+
+	xa_unlock(&pvr_file->vm_ctx_handles);
+
+	return vm_ctx;
+}
+
+/**
+ * pvr_vm_context_put() - Release a reference on a VM context
+ * @vm_ctx: Target VM context.
+ *
+ * Returns:
+ *  * %true if the VM context was destroyed, or
+ *  * %false if there are any references still remaining.
+ */
+bool
+pvr_vm_context_put(struct pvr_vm_context *vm_ctx)
+{
+	if (vm_ctx)
+		return kref_put(&vm_ctx->ref_count, pvr_vm_context_release);
+
+	return true;
+}
+
+/**
+ * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the
+ * given file.
+ * @pvr_file: Pointer to pvr_file structure.
+ *
+ * Removes all vm_contexts associated with @pvr_file from the device VM context
+ * list and drops initial references. vm_contexts will then be destroyed once
+ * all outstanding references are dropped.
+ */
+void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
+{
+	struct pvr_vm_context *vm_ctx;
+	unsigned long handle;
+
+	xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) {
+		/* vm_ctx is not used here because that would create a race with xa_erase */
+		pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle));
+	}
+}
+
+/**
+ * pvr_vm_map() - Map a section of physical memory into a section of
+ * device-virtual memory.
+ * @vm_ctx: Target VM context.
+ * @pvr_obj: Target PowerVR memory object.
+ * @pvr_obj_offset: Offset into @pvr_obj to map from.
+ * @device_addr: Virtual device address at the start of the requested mapping.
+ * @size: Size of the requested mapping.
+ *
+ * No handle is returned to represent the mapping. Instead, callers should
+ * remember @device_addr and use that as a handle.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
+ *    address; the region specified by @pvr_obj_offset and @size does not fall
+ *    entirely within @pvr_obj, or any part of the specified region of @pvr_obj
+ *    is not device-virtual page-aligned,
+ *  * Any error encountered while performing internal operations required to
+ *    destroy the mapping (returned from pvr_vm_gpuva_map or
+ *    pvr_vm_gpuva_remap).
+ */
+int
+pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj,
+	   u64 pvr_obj_offset, u64 device_addr, u64 size)
+{
+	struct pvr_vm_bind_op bind_op = {0};
+	struct drm_exec exec;
+
+	int err = pvr_vm_bind_op_map_init(&bind_op, vm_ctx, pvr_obj,
+					  pvr_obj_offset, device_addr,
+					  size);
+
+	if (err)
+		return err;
+
+	drm_exec_init(&exec,
+		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+
+	pvr_gem_object_get(pvr_obj);
+
+	err = pvr_vm_bind_op_lock_resvs(&exec, &bind_op);
+	if (err)
+		goto err_cleanup;
+
+	err = pvr_vm_bind_op_exec(&bind_op);
+
+	drm_exec_fini(&exec);
+
+err_cleanup:
+	pvr_vm_bind_op_fini(&bind_op);
+
+	return err;
+}
+
+/**
+ * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory.
+ * @vm_ctx: Target VM context.
+ * @device_addr: Virtual device address at the start of the target mapping.
+ * @size: Size of the target mapping.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
+ *    address,
+ *  * Any error encountered while performing internal operations required to
+ *    destroy the mapping (returned from pvr_vm_gpuva_unmap or
+ *    pvr_vm_gpuva_remap).
+ */
+int
+pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
+{
+	struct pvr_vm_bind_op bind_op = {0};
+	struct drm_exec exec;
+
+	int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr,
+					    size);
+
+	if (err)
+		return err;
+
+	drm_exec_init(&exec,
+		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+
+	err = pvr_vm_bind_op_lock_resvs(&exec, &bind_op);
+	if (err)
+		goto err_cleanup;
+
+	err = pvr_vm_bind_op_exec(&bind_op);
+
+	drm_exec_fini(&exec);
+
+err_cleanup:
+	pvr_vm_bind_op_fini(&bind_op);
+
+	return err;
+}
+
+/* Static data areas are determined by firmware. */
+static const struct drm_pvr_static_data_area static_data_areas[] = {
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE,
+		.location_heap_id = DRM_PVR_HEAP_GENERAL,
+		.offset = 0,
+		.size = 128,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
+		.location_heap_id = DRM_PVR_HEAP_GENERAL,
+		.offset = 128,
+		.size = 1024,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
+		.offset = 0,
+		.size = 128,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_EOT,
+		.location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
+		.offset = 128,
+		.size = 128,
+	},
+	{
+		.area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+		.location_heap_id = DRM_PVR_HEAP_USC_CODE,
+		.offset = 0,
+		.size = 128,
+	},
+};
+
+#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE)
+
+/*
+ * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding
+ * static data area for each heap.
+ */
+static const struct drm_pvr_heap pvr_heaps[] = {
+	[DRM_PVR_HEAP_GENERAL] = {
+		.base = ROGUE_GENERAL_HEAP_BASE,
+		.size = ROGUE_GENERAL_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_PDS_CODE_DATA] = {
+		.base = ROGUE_PDSCODEDATA_HEAP_BASE,
+		.size = ROGUE_PDSCODEDATA_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_USC_CODE] = {
+		.base = ROGUE_USCCODE_HEAP_BASE,
+		.size = ROGUE_USCCODE_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_RGNHDR] = {
+		.base = ROGUE_RGNHDR_HEAP_BASE,
+		.size = ROGUE_RGNHDR_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_VIS_TEST] = {
+		.base = ROGUE_VISTEST_HEAP_BASE,
+		.size = ROGUE_VISTEST_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+	[DRM_PVR_HEAP_TRANSFER_FRAG] = {
+		.base = ROGUE_TRANSFER_FRAG_HEAP_BASE,
+		.size = ROGUE_TRANSFER_FRAG_HEAP_SIZE,
+		.flags = 0,
+		.page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+	},
+};
+
+int
+pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
+			  struct drm_pvr_ioctl_dev_query_args *args)
+{
+	struct drm_pvr_dev_query_static_data_areas query = {0};
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_static_data_areas);
+		return 0;
+	}
+
+	err = PVR_UOBJ_GET(query, args->size, args->pointer);
+	if (err < 0)
+		return err;
+
+	if (!query.static_data_areas.array) {
+		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
+		query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area);
+		goto copy_out;
+	}
+
+	if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas))
+		query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
+
+	err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas);
+	if (err < 0)
+		return err;
+
+copy_out:
+	err = PVR_UOBJ_SET(args->pointer, args->size, query);
+	if (err < 0)
+		return err;
+
+	args->size = sizeof(query);
+	return 0;
+}
+
+int
+pvr_heap_info_get(const struct pvr_device *pvr_dev,
+		  struct drm_pvr_ioctl_dev_query_args *args)
+{
+	struct drm_pvr_dev_query_heap_info query = {0};
+	u64 dest;
+	int err;
+
+	if (!args->pointer) {
+		args->size = sizeof(struct drm_pvr_dev_query_heap_info);
+		return 0;
+	}
+
+	err = PVR_UOBJ_GET(query, args->size, args->pointer);
+	if (err < 0)
+		return err;
+
+	if (!query.heaps.array) {
+		query.heaps.count = ARRAY_SIZE(pvr_heaps);
+		query.heaps.stride = sizeof(struct drm_pvr_heap);
+		goto copy_out;
+	}
+
+	if (query.heaps.count > ARRAY_SIZE(pvr_heaps))
+		query.heaps.count = ARRAY_SIZE(pvr_heaps);
+
+	/* Region header heap is only present if BRN63142 is present. */
+	dest = query.heaps.array;
+	for (size_t i = 0; i < query.heaps.count; i++) {
+		struct drm_pvr_heap heap = pvr_heaps[i];
+
+		if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142))
+			heap.size = 0;
+
+		err = PVR_UOBJ_SET(dest, query.heaps.stride, heap);
+		if (err < 0)
+			return err;
+
+		dest += query.heaps.stride;
+	}
+
+copy_out:
+	err = PVR_UOBJ_SET(args->pointer, args->size, query);
+	if (err < 0)
+		return err;
+
+	args->size = sizeof(query);
+	return 0;
+}
+
+/**
+ * pvr_heap_contains_range() - Determine if a given heap contains the specified
+ *                             device-virtual address range.
+ * @pvr_heap: Target heap.
+ * @start: Inclusive start of the target range.
+ * @end: Inclusive end of the target range.
+ *
+ * It is an error to call this function with values of @start and @end that do
+ * not satisfy the condition @start <= @end.
+ */
+static __always_inline bool
+pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end)
+{
+	return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size;
+}
+
+/**
+ * pvr_find_heap_containing() - Find a heap which contains the specified
+ *                              device-virtual address range.
+ * @pvr_dev: Target PowerVR device.
+ * @start: Start of the target range.
+ * @size: Size of the target range.
+ *
+ * Return:
+ *  * A pointer to a constant instance of struct drm_pvr_heap representing the
+ *    heap containing the entire range specified by @start and @size on
+ *    success, or
+ *  * %NULL if no such heap exists.
+ */
+const struct drm_pvr_heap *
+pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size)
+{
+	u64 end;
+
+	if (check_add_overflow(start, size - 1, &end))
+		return NULL;
+
+	/*
+	 * There are no guarantees about the order of address ranges in
+	 * &pvr_heaps, so iterate over the entire array for a heap whose
+	 * range completely encompasses the given range.
+	 */
+	for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) {
+		/* Filter heaps that present only with an associated quirk */
+		if (heap_id == DRM_PVR_HEAP_RGNHDR &&
+		    !PVR_HAS_QUIRK(pvr_dev, 63142)) {
+			continue;
+		}
+
+		if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end))
+			return &pvr_heaps[heap_id];
+	}
+
+	return NULL;
+}
+
+/**
+ * pvr_vm_find_gem_object() - Look up a buffer object from a given
+ *                            device-virtual address.
+ * @vm_ctx: [IN] Target VM context.
+ * @device_addr: [IN] Virtual device address at the start of the required
+ *               object.
+ * @mapped_offset_out: [OUT] Pointer to location to write offset of the start
+ *                     of the mapped region within the buffer object. May be
+ *                     %NULL if this information is not required.
+ * @mapped_size_out: [OUT] Pointer to location to write size of the mapped
+ *                   region. May be %NULL if this information is not required.
+ *
+ * If successful, a reference will be taken on the buffer object. The caller
+ * must drop the reference with pvr_gem_object_put().
+ *
+ * Return:
+ *  * The PowerVR buffer object mapped at @device_addr if one exists, or
+ *  * %NULL otherwise.
+ */
+struct pvr_gem_object *
+pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr,
+		       u64 *mapped_offset_out, u64 *mapped_size_out)
+{
+	struct pvr_gem_object *pvr_obj;
+	struct drm_gpuva *va;
+
+	mutex_lock(&vm_ctx->lock);
+
+	va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, device_addr, 1);
+	if (!va)
+		goto err_unlock;
+
+	pvr_obj = gem_to_pvr_gem(va->gem.obj);
+	pvr_gem_object_get(pvr_obj);
+
+	if (mapped_offset_out)
+		*mapped_offset_out = va->gem.offset;
+	if (mapped_size_out)
+		*mapped_size_out = va->va.range;
+
+	mutex_unlock(&vm_ctx->lock);
+
+	return pvr_obj;
+
+err_unlock:
+	mutex_unlock(&vm_ctx->lock);
+
+	return NULL;
+}
+
+/**
+ * pvr_vm_get_fw_mem_context: Get object representing firmware memory context
+ * @vm_ctx: Target VM context.
+ *
+ * Returns:
+ *  * FW object representing firmware memory context, or
+ *  * %NULL if this VM context does not have a firmware memory context.
+ */
+struct pvr_fw_object *
+pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx)
+{
+	return vm_ctx->fw_mem_ctx_obj;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
new file mode 100644
index 0000000000000..cf8b97553dc85
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_vm.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_VM_H
+#define PVR_VM_H
+
+#include "pvr_rogue_mmu_defs.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h" */
+struct pvr_device;
+struct pvr_file;
+
+/* Forward declaration from "pvr_gem.h" */
+struct pvr_gem_object;
+
+/* Forward declaration from "pvr_vm.c" */
+struct pvr_vm_context;
+
+/* Forward declaration from <uapi/drm/pvr_drm.h> */
+struct drm_pvr_ioctl_get_heap_info_args;
+
+/* Forward declaration from <drm/drm_exec.h> */
+struct drm_exec;
+
+/* Functions defined in pvr_vm.c */
+
+bool pvr_device_addr_is_valid(u64 device_addr);
+bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
+
+struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
+					     bool is_userspace_context);
+
+int pvr_vm_map(struct pvr_vm_context *vm_ctx,
+	       struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
+	       u64 device_addr, u64 size);
+int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size);
+
+dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx);
+struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx);
+
+int pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
+			      struct drm_pvr_ioctl_dev_query_args *args);
+int pvr_heap_info_get(const struct pvr_device *pvr_dev,
+		      struct drm_pvr_ioctl_dev_query_args *args);
+const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev,
+						    u64 addr, u64 size);
+
+struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx,
+					      u64 device_addr,
+					      u64 *mapped_offset_out,
+					      u64 *mapped_size_out);
+
+struct pvr_fw_object *
+pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx);
+
+struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle);
+struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx);
+bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx);
+void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file);
+
+#endif /* PVR_VM_H */
-- 
GitLab


From 727538a4bbff07736ecfd704efd7e21718fca3e4 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:33 +0000
Subject: [PATCH 0408/2340] drm/imagination: Implement power management

Add power management to the driver, using runtime pm. The power off
sequence depends on firmware commands which are not implemented in this
patch.

Changes since v8:
- Corrected license identifiers

Changes since v5:
- Use RUNTIME_PM_OPS() to declare PM callbacks
- Add Kconfig dependency on CONFIG_PM

Changes since v4:
- Suspend runtime PM before unplugging device on rmmod

Changes since v3:
- Don't power device when calling pvr_device_gpu_fini()
- Documentation for pvr_dev->lost has been improved
- pvr_power_init() renamed to pvr_watchdog_init()
- Use drm_dev_{enter,exit}

Changes since v2:
- Use runtime PM
- Implement watchdog

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/e09af4ef1ff514e1d6d7f97c7c5032c643c56f9c.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Kconfig      |   1 +
 drivers/gpu/drm/imagination/Makefile     |   1 +
 drivers/gpu/drm/imagination/pvr_device.c |  23 +-
 drivers/gpu/drm/imagination/pvr_device.h |  22 ++
 drivers/gpu/drm/imagination/pvr_drv.c    |  20 +-
 drivers/gpu/drm/imagination/pvr_power.c  | 271 +++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_power.h  |  39 ++++
 7 files changed, 374 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_power.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_power.h

diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig
index 6f82edf89144c..0abd1b9bf3be3 100644
--- a/drivers/gpu/drm/imagination/Kconfig
+++ b/drivers/gpu/drm/imagination/Kconfig
@@ -5,6 +5,7 @@ config DRM_POWERVR
 	tristate "Imagination Technologies PowerVR (Series 6 and later) & IMG Graphics"
 	depends on ARM64
 	depends on DRM
+	depends on PM
 	select DRM_GEM_SHMEM_HELPER
 	select DRM_SCHED
 	select DRM_GPUVM
diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 678c3dbb43260..d9e00a0db6b2a 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -10,6 +10,7 @@ powervr-y := \
 	pvr_fw.o \
 	pvr_gem.o \
 	pvr_mmu.o \
+	pvr_power.o \
 	pvr_vm.o
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 201ae780494f8..e16282325178e 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -5,6 +5,7 @@
 #include "pvr_device_info.h"
 
 #include "pvr_fw.h"
+#include "pvr_power.h"
 #include "pvr_rogue_cr_defs.h"
 #include "pvr_vm.h"
 
@@ -361,6 +362,8 @@ pvr_device_gpu_fini(struct pvr_device *pvr_dev)
 int
 pvr_device_init(struct pvr_device *pvr_dev)
 {
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct device *dev = drm_dev->dev;
 	int err;
 
 	/* Enable and initialize clocks required for the device to operate. */
@@ -368,13 +371,29 @@ pvr_device_init(struct pvr_device *pvr_dev)
 	if (err)
 		return err;
 
+	/* Explicitly power the GPU so we can access control registers before the FW is booted. */
+	err = pm_runtime_resume_and_get(dev);
+	if (err)
+		return err;
+
 	/* Map the control registers into memory. */
 	err = pvr_device_reg_init(pvr_dev);
 	if (err)
-		return err;
+		goto err_pm_runtime_put;
 
 	/* Perform GPU-specific initialization steps. */
-	return pvr_device_gpu_init(pvr_dev);
+	err = pvr_device_gpu_init(pvr_dev);
+	if (err)
+		goto err_pm_runtime_put;
+
+	pm_runtime_put(dev);
+
+	return 0;
+
+err_pm_runtime_put:
+	pm_runtime_put_sync_suspend(dev);
+
+	return err;
 }
 
 /**
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index bfc853ffd58fb..771ba879f02dd 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -141,6 +141,28 @@ struct pvr_device {
 	 * before submitting the next job.
 	 */
 	atomic_t mmu_flush_cache_flags;
+
+	struct {
+		/** @work: Work item for watchdog callback. */
+		struct delayed_work work;
+
+		/** @old_kccb_cmds_executed: KCCB command execution count at last watchdog poll. */
+		u32 old_kccb_cmds_executed;
+
+		/** @kccb_stall_count: Number of watchdog polls KCCB has been stalled for. */
+		u32 kccb_stall_count;
+	} watchdog;
+
+	/**
+	 * @lost: %true if the device has been lost.
+	 *
+	 * This variable is set if the device has become irretrievably unavailable, e.g. if the
+	 * firmware processor has stopped responding and can not be revived via a hard reset.
+	 */
+	bool lost;
+
+	/** @sched_wq: Workqueue for schedulers. */
+	struct workqueue_struct *sched_wq;
 };
 
 /**
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index 6d3bc886e1c3b..12e136217c456 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -5,6 +5,7 @@
 #include "pvr_drv.h"
 #include "pvr_gem.h"
 #include "pvr_mmu.h"
+#include "pvr_power.h"
 #include "pvr_rogue_defs.h"
 #include "pvr_rogue_fwif_client.h"
 #include "pvr_rogue_fwif_shared.h"
@@ -1265,9 +1266,16 @@ pvr_probe(struct platform_device *plat_dev)
 
 	platform_set_drvdata(plat_dev, drm_dev);
 
+	devm_pm_runtime_enable(&plat_dev->dev);
+	pm_runtime_mark_last_busy(&plat_dev->dev);
+
+	pm_runtime_set_autosuspend_delay(&plat_dev->dev, 50);
+	pm_runtime_use_autosuspend(&plat_dev->dev);
+	pvr_watchdog_init(pvr_dev);
+
 	err = pvr_device_init(pvr_dev);
 	if (err)
-		return err;
+		goto err_watchdog_fini;
 
 	err = drm_dev_register(drm_dev, 0);
 	if (err)
@@ -1278,6 +1286,9 @@ pvr_probe(struct platform_device *plat_dev)
 err_device_fini:
 	pvr_device_fini(pvr_dev);
 
+err_watchdog_fini:
+	pvr_watchdog_fini(pvr_dev);
+
 	return err;
 }
 
@@ -1287,8 +1298,10 @@ pvr_remove(struct platform_device *plat_dev)
 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
 
+	pm_runtime_suspend(drm_dev->dev);
 	pvr_device_fini(pvr_dev);
 	drm_dev_unplug(drm_dev);
+	pvr_watchdog_fini(pvr_dev);
 
 	return 0;
 }
@@ -1299,11 +1312,16 @@ static const struct of_device_id dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, dt_match);
 
+static const struct dev_pm_ops pvr_pm_ops = {
+	RUNTIME_PM_OPS(pvr_power_device_suspend, pvr_power_device_resume, pvr_power_device_idle)
+};
+
 static struct platform_driver pvr_driver = {
 	.probe = pvr_probe,
 	.remove = pvr_remove,
 	.driver = {
 		.name = PVR_DRIVER_NAME,
+		.pm = &pvr_pm_ops,
 		.of_match_table = dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c
new file mode 100644
index 0000000000000..88f14a4d31aba
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_power.h"
+#include "pvr_rogue_fwif.h"
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#define POWER_SYNC_TIMEOUT_US (1000000) /* 1s */
+
+#define WATCHDOG_TIME_MS (500)
+
+static int
+pvr_power_send_command(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *pow_cmd)
+{
+	/* TODO: implement */
+	return -ENODEV;
+}
+
+static int
+pvr_power_request_idle(struct pvr_device *pvr_dev)
+{
+	struct rogue_fwif_kccb_cmd pow_cmd;
+
+	/* Send FORCED_IDLE request to FW. */
+	pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW;
+	pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_FORCED_IDLE_REQ;
+	pow_cmd.cmd_data.pow_data.power_req_data.pow_request_type = ROGUE_FWIF_POWER_FORCE_IDLE;
+
+	return pvr_power_send_command(pvr_dev, &pow_cmd);
+}
+
+static int
+pvr_power_request_pwr_off(struct pvr_device *pvr_dev)
+{
+	struct rogue_fwif_kccb_cmd pow_cmd;
+
+	/* Send POW_OFF request to firmware. */
+	pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW;
+	pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_OFF_REQ;
+	pow_cmd.cmd_data.pow_data.power_req_data.forced = true;
+
+	return pvr_power_send_command(pvr_dev, &pow_cmd);
+}
+
+static int
+pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset)
+{
+	if (!hard_reset) {
+		int err;
+
+		cancel_delayed_work_sync(&pvr_dev->watchdog.work);
+
+		err = pvr_power_request_idle(pvr_dev);
+		if (err)
+			return err;
+
+		err = pvr_power_request_pwr_off(pvr_dev);
+		if (err)
+			return err;
+	}
+
+	/* TODO: stop firmware */
+	return -ENODEV;
+}
+
+static int
+pvr_power_fw_enable(struct pvr_device *pvr_dev)
+{
+	int err;
+
+	/* TODO: start firmware */
+	err = -ENODEV;
+	if (err)
+		return err;
+
+	queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work,
+			   msecs_to_jiffies(WATCHDOG_TIME_MS));
+
+	return 0;
+}
+
+bool
+pvr_power_is_idle(struct pvr_device *pvr_dev)
+{
+	/* TODO: implement */
+	return true;
+}
+
+static bool
+pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
+{
+	/* TODO: implement */
+	return false;
+}
+
+static void
+pvr_watchdog_worker(struct work_struct *work)
+{
+	struct pvr_device *pvr_dev = container_of(work, struct pvr_device,
+						  watchdog.work.work);
+	bool stalled;
+
+	if (pvr_dev->lost)
+		return;
+
+	if (pm_runtime_get_if_in_use(from_pvr_device(pvr_dev)->dev) <= 0)
+		goto out_requeue;
+
+	stalled = pvr_watchdog_kccb_stalled(pvr_dev);
+
+	if (stalled) {
+		drm_err(from_pvr_device(pvr_dev), "FW stalled, trying hard reset");
+
+		pvr_power_reset(pvr_dev, true);
+		/* Device may be lost at this point. */
+	}
+
+	pm_runtime_put(from_pvr_device(pvr_dev)->dev);
+
+out_requeue:
+	if (!pvr_dev->lost) {
+		queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work,
+				   msecs_to_jiffies(WATCHDOG_TIME_MS));
+	}
+}
+
+/**
+ * pvr_watchdog_init() - Initialise watchdog for device
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%ENOMEM on out of memory.
+ */
+int
+pvr_watchdog_init(struct pvr_device *pvr_dev)
+{
+	INIT_DELAYED_WORK(&pvr_dev->watchdog.work, pvr_watchdog_worker);
+
+	return 0;
+}
+
+int
+pvr_power_device_suspend(struct device *dev)
+{
+	struct platform_device *plat_dev = to_platform_device(dev);
+	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	int idx;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	clk_disable_unprepare(pvr_dev->mem_clk);
+	clk_disable_unprepare(pvr_dev->sys_clk);
+	clk_disable_unprepare(pvr_dev->core_clk);
+
+	drm_dev_exit(idx);
+
+	return 0;
+}
+
+int
+pvr_power_device_resume(struct device *dev)
+{
+	struct platform_device *plat_dev = to_platform_device(dev);
+	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	err = clk_prepare_enable(pvr_dev->core_clk);
+	if (err)
+		goto err_drm_dev_exit;
+
+	err = clk_prepare_enable(pvr_dev->sys_clk);
+	if (err)
+		goto err_core_clk_disable;
+
+	err = clk_prepare_enable(pvr_dev->mem_clk);
+	if (err)
+		goto err_sys_clk_disable;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_sys_clk_disable:
+	clk_disable_unprepare(pvr_dev->sys_clk);
+
+err_core_clk_disable:
+	clk_disable_unprepare(pvr_dev->core_clk);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
+}
+
+int
+pvr_power_device_idle(struct device *dev)
+{
+	struct platform_device *plat_dev = to_platform_device(dev);
+	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+
+	return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY;
+}
+
+/**
+ * pvr_power_reset() - Reset the GPU
+ * @pvr_dev: Device pointer
+ * @hard_reset: %true for hard reset, %false for soft reset
+ *
+ * If @hard_reset is %false and the FW processor fails to respond during the reset process, this
+ * function will attempt a hard reset.
+ *
+ * If a hard reset fails then the GPU device is reported as lost.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_power_get, pvr_power_fw_disable or pvr_power_fw_enable().
+ */
+int
+pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
+{
+	/* TODO: Implement hard reset. */
+	int err;
+
+	/*
+	 * Take a power reference during the reset. This should prevent any interference with the
+	 * power state during reset.
+	 */
+	WARN_ON(pvr_power_get(pvr_dev));
+
+	err = pvr_power_fw_disable(pvr_dev, false);
+	if (err)
+		goto err_power_put;
+
+	err = pvr_power_fw_enable(pvr_dev);
+
+err_power_put:
+	pvr_power_put(pvr_dev);
+
+	return err;
+}
+
+/**
+ * pvr_watchdog_fini() - Shutdown watchdog for device
+ * @pvr_dev: Target PowerVR device.
+ */
+void
+pvr_watchdog_fini(struct pvr_device *pvr_dev)
+{
+	cancel_delayed_work_sync(&pvr_dev->watchdog.work);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_power.h b/drivers/gpu/drm/imagination/pvr_power.h
new file mode 100644
index 0000000000000..360980f454d74
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_power.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_POWER_H
+#define PVR_POWER_H
+
+#include "pvr_device.h"
+
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+
+int pvr_watchdog_init(struct pvr_device *pvr_dev);
+void pvr_watchdog_fini(struct pvr_device *pvr_dev);
+
+bool pvr_power_is_idle(struct pvr_device *pvr_dev);
+
+int pvr_power_device_suspend(struct device *dev);
+int pvr_power_device_resume(struct device *dev);
+int pvr_power_device_idle(struct device *dev);
+
+int pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset);
+
+static __always_inline int
+pvr_power_get(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+
+	return pm_runtime_resume_and_get(drm_dev->dev);
+}
+
+static __always_inline int
+pvr_power_put(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+
+	return pm_runtime_put(drm_dev->dev);
+}
+
+#endif /* PVR_POWER_H */
-- 
GitLab


From cc1aeedb98ad347c06ff59e991b2f94dfb4c565d Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:34 +0000
Subject: [PATCH 0409/2340] drm/imagination: Implement firmware infrastructure
 and META FW support

The infrastructure includes parsing of the firmware image, initialising
FW-side structures, handling the kernel and firmware command
ringbuffers and starting & stopping the firmware processor.

This patch also adds the necessary support code for the META firmware
processor.

Changes since v8:
- Fix documentation for pvr_fwccb_process()
- Corrected license identifiers

Changes since v6:
- Add a minimum retry count to pvr_kccb_reserve_slot_sync()

Changes since v5:
- Add workaround for BRN 71242
- Attempt to recover GPU on MMU flush command failure

Changes since v4:
- Remove use of drm_gem_shmem_get_pages()
- Remove interrupt resource name

Changes since v3:
- Hard reset FW processor on watchdog timeout
- Switch to threaded IRQ
- Rework FW object creation/initialisation to aid hard reset
- Added MODULE_FIRMWARE()
- Use drm_dev_{enter,exit}

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Link: https://lore.kernel.org/r/bb52a8dc84f296b37dc6668dfe8fbaf2ba551139.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile          |    4 +
 drivers/gpu/drm/imagination/pvr_ccb.c         |  635 ++++++++
 drivers/gpu/drm/imagination/pvr_ccb.h         |   71 +
 drivers/gpu/drm/imagination/pvr_device.c      |  103 ++
 drivers/gpu/drm/imagination/pvr_device.h      |   60 +
 drivers/gpu/drm/imagination/pvr_drv.c         |    1 +
 drivers/gpu/drm/imagination/pvr_fw.c          | 1342 +++++++++++++++++
 drivers/gpu/drm/imagination/pvr_fw.h          |  474 ++++++
 drivers/gpu/drm/imagination/pvr_fw_meta.c     |  554 +++++++
 drivers/gpu/drm/imagination/pvr_fw_meta.h     |   14 +
 .../gpu/drm/imagination/pvr_fw_startstop.c    |  306 ++++
 .../gpu/drm/imagination/pvr_fw_startstop.h    |   13 +
 drivers/gpu/drm/imagination/pvr_fw_trace.c    |  120 ++
 drivers/gpu/drm/imagination/pvr_fw_trace.h    |   78 +
 drivers/gpu/drm/imagination/pvr_mmu.c         |   70 +-
 drivers/gpu/drm/imagination/pvr_power.c       |  166 +-
 drivers/gpu/drm/imagination/pvr_power.h       |    2 +
 drivers/gpu/drm/imagination/pvr_vm.c          |   26 +-
 18 files changed, 4015 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_ccb.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_ccb.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_meta.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_meta.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_startstop.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_startstop.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_trace.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_trace.h

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index d9e00a0db6b2a..5c1c918af7a33 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -4,10 +4,14 @@
 subdir-ccflags-y := -I$(srctree)/$(src)
 
 powervr-y := \
+	pvr_ccb.o \
 	pvr_device.o \
 	pvr_device_info.o \
 	pvr_drv.o \
 	pvr_fw.o \
+	pvr_fw_meta.o \
+	pvr_fw_startstop.o \
+	pvr_fw_trace.o \
 	pvr_gem.o \
 	pvr_mmu.o \
 	pvr_power.o \
diff --git a/drivers/gpu/drm/imagination/pvr_ccb.c b/drivers/gpu/drm/imagination/pvr_ccb.c
new file mode 100644
index 0000000000000..48f06f58f3f12
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_ccb.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_ccb.h"
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_fw.h"
+#include "pvr_gem.h"
+#include "pvr_power.h"
+
+#include <drm/drm_managed.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#define RESERVE_SLOT_TIMEOUT (1 * HZ) /* 1s */
+#define RESERVE_SLOT_MIN_RETRIES 10
+
+static void
+ccb_ctrl_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_ccb_ctl *ctrl = cpu_ptr;
+	struct pvr_ccb *pvr_ccb = priv;
+
+	ctrl->write_offset = 0;
+	ctrl->read_offset = 0;
+	ctrl->wrap_mask = pvr_ccb->num_cmds - 1;
+	ctrl->cmd_size = pvr_ccb->cmd_size;
+}
+
+/**
+ * pvr_ccb_init() - Initialise a CCB
+ * @pvr_dev: Device pointer.
+ * @pvr_ccb: Pointer to CCB structure to initialise.
+ * @num_cmds_log2: Log2 of number of commands in this CCB.
+ * @cmd_size: Command size for this CCB.
+ *
+ * Return:
+ *  * Zero on success, or
+ *  * Any error code returned by pvr_fw_object_create_and_map().
+ */
+static int
+pvr_ccb_init(struct pvr_device *pvr_dev, struct pvr_ccb *pvr_ccb,
+	     u32 num_cmds_log2, size_t cmd_size)
+{
+	u32 num_cmds = 1 << num_cmds_log2;
+	u32 ccb_size = num_cmds * cmd_size;
+	int err;
+
+	pvr_ccb->num_cmds = num_cmds;
+	pvr_ccb->cmd_size = cmd_size;
+
+	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_ccb->lock);
+	if (err)
+		return err;
+
+	/*
+	 * Map CCB and control structure as uncached, so we don't have to flush
+	 * CPU cache repeatedly when polling for space.
+	 */
+	pvr_ccb->ctrl = pvr_fw_object_create_and_map(pvr_dev, sizeof(*pvr_ccb->ctrl),
+						     PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						     ccb_ctrl_init, pvr_ccb, &pvr_ccb->ctrl_obj);
+	if (IS_ERR(pvr_ccb->ctrl))
+		return PTR_ERR(pvr_ccb->ctrl);
+
+	pvr_ccb->ccb = pvr_fw_object_create_and_map(pvr_dev, ccb_size,
+						    PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						    NULL, NULL, &pvr_ccb->ccb_obj);
+	if (IS_ERR(pvr_ccb->ccb)) {
+		err = PTR_ERR(pvr_ccb->ccb);
+		goto err_free_ctrl;
+	}
+
+	pvr_fw_object_get_fw_addr(pvr_ccb->ctrl_obj, &pvr_ccb->ctrl_fw_addr);
+	pvr_fw_object_get_fw_addr(pvr_ccb->ccb_obj, &pvr_ccb->ccb_fw_addr);
+
+	WRITE_ONCE(pvr_ccb->ctrl->write_offset, 0);
+	WRITE_ONCE(pvr_ccb->ctrl->read_offset, 0);
+	WRITE_ONCE(pvr_ccb->ctrl->wrap_mask, num_cmds - 1);
+	WRITE_ONCE(pvr_ccb->ctrl->cmd_size, cmd_size);
+
+	return 0;
+
+err_free_ctrl:
+	pvr_fw_object_unmap_and_destroy(pvr_ccb->ctrl_obj);
+
+	return err;
+}
+
+/**
+ * pvr_ccb_fini() - Release CCB structure
+ * @pvr_ccb: CCB to release.
+ */
+void
+pvr_ccb_fini(struct pvr_ccb *pvr_ccb)
+{
+	pvr_fw_object_unmap_and_destroy(pvr_ccb->ccb_obj);
+	pvr_fw_object_unmap_and_destroy(pvr_ccb->ctrl_obj);
+}
+
+/**
+ * pvr_ccb_slot_available_locked() - Test whether any slots are available in CCB
+ * @pvr_ccb: CCB to test.
+ * @write_offset: Address to store number of next available slot. May be %NULL.
+ *
+ * Caller must hold @pvr_ccb->lock.
+ *
+ * Return:
+ *  * %true if a slot is available, or
+ *  * %false if no slot is available.
+ */
+static __always_inline bool
+pvr_ccb_slot_available_locked(struct pvr_ccb *pvr_ccb, u32 *write_offset)
+{
+	struct rogue_fwif_ccb_ctl *ctrl = pvr_ccb->ctrl;
+	u32 next_write_offset = (READ_ONCE(ctrl->write_offset) + 1) & READ_ONCE(ctrl->wrap_mask);
+
+	lockdep_assert_held(&pvr_ccb->lock);
+
+	if (READ_ONCE(ctrl->read_offset) != next_write_offset) {
+		if (write_offset)
+			*write_offset = next_write_offset;
+		return true;
+	}
+
+	return false;
+}
+
+static void
+process_fwccb_command(struct pvr_device *pvr_dev, struct rogue_fwif_fwccb_cmd *cmd)
+{
+	switch (cmd->cmd_type) {
+	case ROGUE_FWIF_FWCCB_CMD_REQUEST_GPU_RESTART:
+		pvr_power_reset(pvr_dev, false);
+		break;
+
+	default:
+		drm_info(from_pvr_device(pvr_dev), "Received unknown FWCCB command %x\n",
+			 cmd->cmd_type);
+		break;
+	}
+}
+
+/**
+ * pvr_fwccb_process() - Process any pending FWCCB commands
+ * @pvr_dev: Target PowerVR device
+ */
+void pvr_fwccb_process(struct pvr_device *pvr_dev)
+{
+	struct rogue_fwif_fwccb_cmd *fwccb = pvr_dev->fwccb.ccb;
+	struct rogue_fwif_ccb_ctl *ctrl = pvr_dev->fwccb.ctrl;
+	u32 read_offset;
+
+	mutex_lock(&pvr_dev->fwccb.lock);
+
+	while ((read_offset = READ_ONCE(ctrl->read_offset)) != READ_ONCE(ctrl->write_offset)) {
+		struct rogue_fwif_fwccb_cmd cmd = fwccb[read_offset];
+
+		WRITE_ONCE(ctrl->read_offset, (read_offset + 1) & READ_ONCE(ctrl->wrap_mask));
+
+		/* Drop FWCCB lock while we process command. */
+		mutex_unlock(&pvr_dev->fwccb.lock);
+
+		process_fwccb_command(pvr_dev, &cmd);
+
+		mutex_lock(&pvr_dev->fwccb.lock);
+	}
+
+	mutex_unlock(&pvr_dev->fwccb.lock);
+}
+
+/**
+ * pvr_kccb_capacity() - Returns the maximum number of usable KCCB slots.
+ * @pvr_dev: Target PowerVR device
+ *
+ * Return:
+ *  * The maximum number of active slots.
+ */
+static u32 pvr_kccb_capacity(struct pvr_device *pvr_dev)
+{
+	/* Capacity is the number of slot minus one to cope with the wrapping
+	 * mechanisms. If we were to use all slots, we might end up with
+	 * read_offset == write_offset, which the FW considers as a KCCB-is-empty
+	 * condition.
+	 */
+	return pvr_dev->kccb.slot_count - 1;
+}
+
+/**
+ * pvr_kccb_used_slot_count_locked() - Get the number of used slots
+ * @pvr_dev: Device pointer.
+ *
+ * KCCB lock must be held.
+ *
+ * Return:
+ *  * The number of slots currently used.
+ */
+static u32
+pvr_kccb_used_slot_count_locked(struct pvr_device *pvr_dev)
+{
+	struct pvr_ccb *pvr_ccb = &pvr_dev->kccb.ccb;
+	struct rogue_fwif_ccb_ctl *ctrl = pvr_ccb->ctrl;
+	u32 wr_offset = READ_ONCE(ctrl->write_offset);
+	u32 rd_offset = READ_ONCE(ctrl->read_offset);
+	u32 used_count;
+
+	lockdep_assert_held(&pvr_ccb->lock);
+
+	if (wr_offset >= rd_offset)
+		used_count = wr_offset - rd_offset;
+	else
+		used_count = wr_offset + pvr_dev->kccb.slot_count - rd_offset;
+
+	return used_count;
+}
+
+/**
+ * pvr_kccb_send_cmd_reserved_powered() - Send command to the KCCB, with the PM ref
+ * held and a slot pre-reserved
+ * @pvr_dev: Device pointer.
+ * @cmd: Command to sent.
+ * @kccb_slot: Address to store the KCCB slot for this command. May be %NULL.
+ */
+void
+pvr_kccb_send_cmd_reserved_powered(struct pvr_device *pvr_dev,
+				   struct rogue_fwif_kccb_cmd *cmd,
+				   u32 *kccb_slot)
+{
+	struct pvr_ccb *pvr_ccb = &pvr_dev->kccb.ccb;
+	struct rogue_fwif_kccb_cmd *kccb = pvr_ccb->ccb;
+	struct rogue_fwif_ccb_ctl *ctrl = pvr_ccb->ctrl;
+	u32 old_write_offset;
+	u32 new_write_offset;
+
+	WARN_ON(pvr_dev->lost);
+
+	mutex_lock(&pvr_ccb->lock);
+
+	if (WARN_ON(!pvr_dev->kccb.reserved_count))
+		goto out_unlock;
+
+	old_write_offset = READ_ONCE(ctrl->write_offset);
+
+	/* We reserved the slot, we should have one available. */
+	if (WARN_ON(!pvr_ccb_slot_available_locked(pvr_ccb, &new_write_offset)))
+		goto out_unlock;
+
+	memcpy(&kccb[old_write_offset], cmd,
+	       sizeof(struct rogue_fwif_kccb_cmd));
+	if (kccb_slot) {
+		*kccb_slot = old_write_offset;
+		/* Clear return status for this slot. */
+		WRITE_ONCE(pvr_dev->kccb.rtn[old_write_offset],
+			   ROGUE_FWIF_KCCB_RTN_SLOT_NO_RESPONSE);
+	}
+	mb(); /* memory barrier */
+	WRITE_ONCE(ctrl->write_offset, new_write_offset);
+	pvr_dev->kccb.reserved_count--;
+
+	/* Kick MTS */
+	pvr_fw_mts_schedule(pvr_dev,
+			    PVR_FWIF_DM_GP & ~ROGUE_CR_MTS_SCHEDULE_DM_CLRMSK);
+
+out_unlock:
+	mutex_unlock(&pvr_ccb->lock);
+}
+
+/**
+ * pvr_kccb_try_reserve_slot() - Try to reserve a KCCB slot
+ * @pvr_dev: Device pointer.
+ *
+ * Return:
+ *  * true if a KCCB slot was reserved, or
+ *  * false otherwise.
+ */
+static bool pvr_kccb_try_reserve_slot(struct pvr_device *pvr_dev)
+{
+	bool reserved = false;
+	u32 used_count;
+
+	mutex_lock(&pvr_dev->kccb.ccb.lock);
+
+	used_count = pvr_kccb_used_slot_count_locked(pvr_dev);
+	if (pvr_dev->kccb.reserved_count < pvr_kccb_capacity(pvr_dev) - used_count) {
+		pvr_dev->kccb.reserved_count++;
+		reserved = true;
+	}
+
+	mutex_unlock(&pvr_dev->kccb.ccb.lock);
+
+	return reserved;
+}
+
+/**
+ * pvr_kccb_reserve_slot_sync() - Try to reserve a slot synchronously
+ * @pvr_dev: Device pointer.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -EBUSY if no slots were reserved after %RESERVE_SLOT_TIMEOUT, with a minimum of
+ *    %RESERVE_SLOT_MIN_RETRIES retries.
+ */
+static int pvr_kccb_reserve_slot_sync(struct pvr_device *pvr_dev)
+{
+	unsigned long start_timestamp = jiffies;
+	bool reserved = false;
+	u32 retries = 0;
+
+	while ((jiffies - start_timestamp) < (u32)RESERVE_SLOT_TIMEOUT ||
+	       retries < RESERVE_SLOT_MIN_RETRIES) {
+		reserved = pvr_kccb_try_reserve_slot(pvr_dev);
+		if (reserved)
+			break;
+
+		usleep_range(1, 50);
+
+		if (retries < U32_MAX)
+			retries++;
+	}
+
+	return reserved ? 0 : -EBUSY;
+}
+
+/**
+ * pvr_kccb_send_cmd_powered() - Send command to the KCCB, with a PM ref held
+ * @pvr_dev: Device pointer.
+ * @cmd: Command to sent.
+ * @kccb_slot: Address to store the KCCB slot for this command. May be %NULL.
+ *
+ * Returns:
+ *  * Zero on success, or
+ *  * -EBUSY if timeout while waiting for a free KCCB slot.
+ */
+int
+pvr_kccb_send_cmd_powered(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *cmd,
+			  u32 *kccb_slot)
+{
+	int err;
+
+	err = pvr_kccb_reserve_slot_sync(pvr_dev);
+	if (err)
+		return err;
+
+	pvr_kccb_send_cmd_reserved_powered(pvr_dev, cmd, kccb_slot);
+	return 0;
+}
+
+/**
+ * pvr_kccb_send_cmd() - Send command to the KCCB
+ * @pvr_dev: Device pointer.
+ * @cmd: Command to sent.
+ * @kccb_slot: Address to store the KCCB slot for this command. May be %NULL.
+ *
+ * Returns:
+ *  * Zero on success, or
+ *  * -EBUSY if timeout while waiting for a free KCCB slot.
+ */
+int
+pvr_kccb_send_cmd(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *cmd,
+		  u32 *kccb_slot)
+{
+	int err;
+
+	err = pvr_power_get(pvr_dev);
+	if (err)
+		return err;
+
+	err = pvr_kccb_send_cmd_powered(pvr_dev, cmd, kccb_slot);
+
+	pvr_power_put(pvr_dev);
+
+	return err;
+}
+
+/**
+ * pvr_kccb_wait_for_completion() - Wait for a KCCB command to complete
+ * @pvr_dev: Device pointer.
+ * @slot_nr: KCCB slot to wait on.
+ * @timeout: Timeout length (in jiffies).
+ * @rtn_out: Location to store KCCB command result. May be %NULL.
+ *
+ * Returns:
+ *  * Zero on success, or
+ *  * -ETIMEDOUT on timeout.
+ */
+int
+pvr_kccb_wait_for_completion(struct pvr_device *pvr_dev, u32 slot_nr,
+			     u32 timeout, u32 *rtn_out)
+{
+	int ret = wait_event_timeout(pvr_dev->kccb.rtn_q, READ_ONCE(pvr_dev->kccb.rtn[slot_nr]) &
+				     ROGUE_FWIF_KCCB_RTN_SLOT_CMD_EXECUTED, timeout);
+
+	if (ret && rtn_out)
+		*rtn_out = READ_ONCE(pvr_dev->kccb.rtn[slot_nr]);
+
+	return ret ? 0 : -ETIMEDOUT;
+}
+
+/**
+ * pvr_kccb_is_idle() - Returns whether the device's KCCB is idle
+ * @pvr_dev: Device pointer
+ *
+ * Returns:
+ *  * %true if the KCCB is idle (contains no commands), or
+ *  * %false if the KCCB contains pending commands.
+ */
+bool
+pvr_kccb_is_idle(struct pvr_device *pvr_dev)
+{
+	struct rogue_fwif_ccb_ctl *ctrl = pvr_dev->kccb.ccb.ctrl;
+	bool idle;
+
+	mutex_lock(&pvr_dev->kccb.ccb.lock);
+
+	idle = (READ_ONCE(ctrl->write_offset) == READ_ONCE(ctrl->read_offset));
+
+	mutex_unlock(&pvr_dev->kccb.ccb.lock);
+
+	return idle;
+}
+
+static const char *
+pvr_kccb_fence_get_driver_name(struct dma_fence *f)
+{
+	return PVR_DRIVER_NAME;
+}
+
+static const char *
+pvr_kccb_fence_get_timeline_name(struct dma_fence *f)
+{
+	return "kccb";
+}
+
+static const struct dma_fence_ops pvr_kccb_fence_ops = {
+	.get_driver_name = pvr_kccb_fence_get_driver_name,
+	.get_timeline_name = pvr_kccb_fence_get_timeline_name,
+};
+
+/**
+ * struct pvr_kccb_fence - Fence object used to wait for a KCCB slot
+ */
+struct pvr_kccb_fence {
+	/** @base: Base dma_fence object. */
+	struct dma_fence base;
+
+	/** @node: Node used to insert the fence in the pvr_device::kccb::waiters list. */
+	struct list_head node;
+};
+
+/**
+ * pvr_kccb_wake_up_waiters() - Check the KCCB waiters
+ * @pvr_dev: Target PowerVR device
+ *
+ * Signal as many KCCB fences as we have slots available.
+ */
+void pvr_kccb_wake_up_waiters(struct pvr_device *pvr_dev)
+{
+	struct pvr_kccb_fence *fence, *tmp_fence;
+	u32 used_count, available_count;
+
+	/* Wake up those waiting for KCCB slot execution. */
+	wake_up_all(&pvr_dev->kccb.rtn_q);
+
+	/* Then iterate over all KCCB fences and signal as many as we can. */
+	mutex_lock(&pvr_dev->kccb.ccb.lock);
+	used_count = pvr_kccb_used_slot_count_locked(pvr_dev);
+
+	if (WARN_ON(used_count + pvr_dev->kccb.reserved_count > pvr_kccb_capacity(pvr_dev)))
+		goto out_unlock;
+
+	available_count = pvr_kccb_capacity(pvr_dev) - used_count - pvr_dev->kccb.reserved_count;
+	list_for_each_entry_safe(fence, tmp_fence, &pvr_dev->kccb.waiters, node) {
+		if (!available_count)
+			break;
+
+		list_del(&fence->node);
+		pvr_dev->kccb.reserved_count++;
+		available_count--;
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+
+out_unlock:
+	mutex_unlock(&pvr_dev->kccb.ccb.lock);
+}
+
+/**
+ * pvr_kccb_fini() - Cleanup device KCCB
+ * @pvr_dev: Target PowerVR device
+ */
+void pvr_kccb_fini(struct pvr_device *pvr_dev)
+{
+	pvr_ccb_fini(&pvr_dev->kccb.ccb);
+	WARN_ON(!list_empty(&pvr_dev->kccb.waiters));
+	WARN_ON(pvr_dev->kccb.reserved_count);
+}
+
+/**
+ * pvr_kccb_init() - Initialise device KCCB
+ * @pvr_dev: Target PowerVR device
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_ccb_init().
+ */
+int
+pvr_kccb_init(struct pvr_device *pvr_dev)
+{
+	pvr_dev->kccb.slot_count = 1 << ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT;
+	INIT_LIST_HEAD(&pvr_dev->kccb.waiters);
+	pvr_dev->kccb.fence_ctx.id = dma_fence_context_alloc(1);
+	spin_lock_init(&pvr_dev->kccb.fence_ctx.lock);
+
+	return pvr_ccb_init(pvr_dev, &pvr_dev->kccb.ccb,
+			    ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT,
+			    sizeof(struct rogue_fwif_kccb_cmd));
+}
+
+/**
+ * pvr_kccb_fence_alloc() - Allocate a pvr_kccb_fence object
+ *
+ * Return:
+ *  * NULL if the allocation fails, or
+ *  * A valid dma_fence pointer otherwise.
+ */
+struct dma_fence *pvr_kccb_fence_alloc(void)
+{
+	struct pvr_kccb_fence *kccb_fence;
+
+	kccb_fence = kzalloc(sizeof(*kccb_fence), GFP_KERNEL);
+	if (!kccb_fence)
+		return NULL;
+
+	return &kccb_fence->base;
+}
+
+/**
+ * pvr_kccb_fence_put() - Drop a KCCB fence reference
+ * @fence: The fence to drop the reference on.
+ *
+ * If the fence hasn't been initialized yet, dma_fence_free() is called. This
+ * way we have a single function taking care of both cases.
+ */
+void pvr_kccb_fence_put(struct dma_fence *fence)
+{
+	if (!fence)
+		return;
+
+	if (!fence->ops) {
+		dma_fence_free(fence);
+	} else {
+		WARN_ON(fence->ops != &pvr_kccb_fence_ops);
+		dma_fence_put(fence);
+	}
+}
+
+/**
+ * pvr_kccb_reserve_slot() - Reserve a KCCB slot for later use
+ * @pvr_dev: Target PowerVR device
+ * @f: KCCB fence object previously allocated with pvr_kccb_fence_alloc()
+ *
+ * Try to reserve a KCCB slot, and if there's no slot available,
+ * initializes the fence object and queue it to the waiters list.
+ *
+ * If NULL is returned, that means the slot is reserved. In that case,
+ * the @f is freed and shouldn't be accessed after that point.
+ *
+ * Return:
+ *  * NULL if a slot was available directly, or
+ *  * A valid dma_fence object to wait on if no slot was available.
+ */
+struct dma_fence *
+pvr_kccb_reserve_slot(struct pvr_device *pvr_dev, struct dma_fence *f)
+{
+	struct pvr_kccb_fence *fence = container_of(f, struct pvr_kccb_fence, base);
+	struct dma_fence *out_fence = NULL;
+	u32 used_count;
+
+	mutex_lock(&pvr_dev->kccb.ccb.lock);
+
+	used_count = pvr_kccb_used_slot_count_locked(pvr_dev);
+	if (pvr_dev->kccb.reserved_count >= pvr_kccb_capacity(pvr_dev) - used_count) {
+		dma_fence_init(&fence->base, &pvr_kccb_fence_ops,
+			       &pvr_dev->kccb.fence_ctx.lock,
+			       pvr_dev->kccb.fence_ctx.id,
+			       atomic_inc_return(&pvr_dev->kccb.fence_ctx.seqno));
+		out_fence = dma_fence_get(&fence->base);
+		list_add_tail(&fence->node, &pvr_dev->kccb.waiters);
+	} else {
+		pvr_kccb_fence_put(f);
+		pvr_dev->kccb.reserved_count++;
+	}
+
+	mutex_unlock(&pvr_dev->kccb.ccb.lock);
+
+	return out_fence;
+}
+
+/**
+ * pvr_kccb_release_slot() - Release a KCCB slot reserved with
+ * pvr_kccb_reserve_slot()
+ * @pvr_dev: Target PowerVR device
+ *
+ * Should only be called if something failed after the
+ * pvr_kccb_reserve_slot() call and you know you won't call
+ * pvr_kccb_send_cmd_reserved().
+ */
+void pvr_kccb_release_slot(struct pvr_device *pvr_dev)
+{
+	mutex_lock(&pvr_dev->kccb.ccb.lock);
+	if (!WARN_ON(!pvr_dev->kccb.reserved_count))
+		pvr_dev->kccb.reserved_count--;
+	mutex_unlock(&pvr_dev->kccb.ccb.lock);
+}
+
+/**
+ * pvr_fwccb_init() - Initialise device FWCCB
+ * @pvr_dev: Target PowerVR device
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_ccb_init().
+ */
+int
+pvr_fwccb_init(struct pvr_device *pvr_dev)
+{
+	return pvr_ccb_init(pvr_dev, &pvr_dev->fwccb,
+			    ROGUE_FWIF_FWCCB_NUMCMDS_LOG2,
+			    sizeof(struct rogue_fwif_fwccb_cmd));
+}
diff --git a/drivers/gpu/drm/imagination/pvr_ccb.h b/drivers/gpu/drm/imagination/pvr_ccb.h
new file mode 100644
index 0000000000000..4c8aef31eeb04
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_ccb.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_CCB_H
+#define PVR_CCB_H
+
+#include "pvr_rogue_fwif.h"
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/* Forward declaration from pvr_device.h. */
+struct pvr_device;
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+struct pvr_ccb {
+	/** @ctrl_obj: FW object representing CCB control structure. */
+	struct pvr_fw_object *ctrl_obj;
+	/** @ccb_obj: FW object representing CCB. */
+	struct pvr_fw_object *ccb_obj;
+
+	/** @ctrl_fw_addr: FW virtual address of CCB control structure. */
+	u32 ctrl_fw_addr;
+	/** @ccb_fw_addr: FW virtual address of CCB. */
+	u32 ccb_fw_addr;
+
+	/** @num_cmds: Number of commands in this CCB. */
+	u32 num_cmds;
+
+	/** @cmd_size: Size of each command in this CCB, in bytes. */
+	u32 cmd_size;
+
+	/** @lock: Mutex protecting @ctrl and @ccb. */
+	struct mutex lock;
+	/**
+	 * @ctrl: Kernel mapping of CCB control structure. @lock must be held
+	 *        when accessing.
+	 */
+	struct rogue_fwif_ccb_ctl *ctrl;
+	/** @ccb: Kernel mapping of CCB. @lock must be held when accessing. */
+	void *ccb;
+};
+
+int pvr_kccb_init(struct pvr_device *pvr_dev);
+void pvr_kccb_fini(struct pvr_device *pvr_dev);
+int pvr_fwccb_init(struct pvr_device *pvr_dev);
+void pvr_ccb_fini(struct pvr_ccb *ccb);
+
+void pvr_fwccb_process(struct pvr_device *pvr_dev);
+
+struct dma_fence *pvr_kccb_fence_alloc(void);
+void pvr_kccb_fence_put(struct dma_fence *fence);
+struct dma_fence *
+pvr_kccb_reserve_slot(struct pvr_device *pvr_dev, struct dma_fence *f);
+void pvr_kccb_release_slot(struct pvr_device *pvr_dev);
+int pvr_kccb_send_cmd(struct pvr_device *pvr_dev,
+		      struct rogue_fwif_kccb_cmd *cmd, u32 *kccb_slot);
+int pvr_kccb_send_cmd_powered(struct pvr_device *pvr_dev,
+			      struct rogue_fwif_kccb_cmd *cmd,
+			      u32 *kccb_slot);
+void pvr_kccb_send_cmd_reserved_powered(struct pvr_device *pvr_dev,
+					struct rogue_fwif_kccb_cmd *cmd,
+					u32 *kccb_slot);
+int pvr_kccb_wait_for_completion(struct pvr_device *pvr_dev, u32 slot_nr, u32 timeout,
+				 u32 *rtn_out);
+bool pvr_kccb_is_idle(struct pvr_device *pvr_dev);
+void pvr_kccb_wake_up_waiters(struct pvr_device *pvr_dev);
+
+#endif /* PVR_CCB_H */
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index e16282325178e..1be14cdbdace1 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -114,6 +114,87 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
 	return 0;
 }
 
+static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
+{
+	struct pvr_device *pvr_dev = data;
+	irqreturn_t ret = IRQ_NONE;
+
+	/* We are in the threaded handler, we can keep dequeuing events until we
+	 * don't see any. This should allow us to reduce the number of interrupts
+	 * when the GPU is receiving a massive amount of short jobs.
+	 */
+	while (pvr_fw_irq_pending(pvr_dev)) {
+		pvr_fw_irq_clear(pvr_dev);
+
+		if (pvr_dev->fw_dev.booted) {
+			pvr_fwccb_process(pvr_dev);
+			pvr_kccb_wake_up_waiters(pvr_dev);
+		}
+
+		pm_runtime_mark_last_busy(from_pvr_device(pvr_dev)->dev);
+
+		ret = IRQ_HANDLED;
+	}
+
+	/* Unmask FW irqs before returning, so new interrupts can be received. */
+	pvr_fw_irq_enable(pvr_dev);
+	return ret;
+}
+
+static irqreturn_t pvr_device_irq_handler(int irq, void *data)
+{
+	struct pvr_device *pvr_dev = data;
+
+	if (!pvr_fw_irq_pending(pvr_dev))
+		return IRQ_NONE; /* Spurious IRQ - ignore. */
+
+	/* Mask the FW interrupts before waking up the thread. Will be unmasked
+	 * when the thread handler is done processing events.
+	 */
+	pvr_fw_irq_disable(pvr_dev);
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * pvr_device_irq_init() - Initialise IRQ required by a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * Any error returned by platform_get_irq_byname(), or
+ *  * Any error returned by request_irq().
+ */
+static int
+pvr_device_irq_init(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct platform_device *plat_dev = to_platform_device(drm_dev->dev);
+
+	init_waitqueue_head(&pvr_dev->kccb.rtn_q);
+
+	pvr_dev->irq = platform_get_irq(plat_dev, 0);
+	if (pvr_dev->irq < 0)
+		return pvr_dev->irq;
+
+	/* Clear any pending events before requesting the IRQ line. */
+	pvr_fw_irq_clear(pvr_dev);
+	pvr_fw_irq_enable(pvr_dev);
+
+	return request_threaded_irq(pvr_dev->irq, pvr_device_irq_handler,
+				    pvr_device_irq_thread_handler,
+				    IRQF_SHARED, "gpu", pvr_dev);
+}
+
+/**
+ * pvr_device_irq_fini() - Deinitialise IRQ required by a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ */
+static void
+pvr_device_irq_fini(struct pvr_device *pvr_dev)
+{
+	free_irq(pvr_dev->irq, pvr_dev);
+}
+
 /**
  * pvr_build_firmware_filename() - Construct a PowerVR firmware filename
  * @pvr_dev: Target PowerVR device.
@@ -324,7 +405,19 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
 			return PTR_ERR(pvr_dev->kernel_vm_ctx);
 	}
 
+	err = pvr_fw_init(pvr_dev);
+	if (err)
+		goto err_vm_ctx_put;
+
 	return 0;
+
+err_vm_ctx_put:
+	if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
+		pvr_vm_context_put(pvr_dev->kernel_vm_ctx);
+		pvr_dev->kernel_vm_ctx = NULL;
+	}
+
+	return err;
 }
 
 /**
@@ -334,6 +427,8 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
 static void
 pvr_device_gpu_fini(struct pvr_device *pvr_dev)
 {
+	pvr_fw_fini(pvr_dev);
+
 	if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
 		WARN_ON(!pvr_vm_context_put(pvr_dev->kernel_vm_ctx));
 		pvr_dev->kernel_vm_ctx = NULL;
@@ -386,10 +481,17 @@ pvr_device_init(struct pvr_device *pvr_dev)
 	if (err)
 		goto err_pm_runtime_put;
 
+	err = pvr_device_irq_init(pvr_dev);
+	if (err)
+		goto err_device_gpu_fini;
+
 	pm_runtime_put(dev);
 
 	return 0;
 
+err_device_gpu_fini:
+	pvr_device_gpu_fini(pvr_dev);
+
 err_pm_runtime_put:
 	pm_runtime_put_sync_suspend(dev);
 
@@ -407,6 +509,7 @@ pvr_device_fini(struct pvr_device *pvr_dev)
 	 * Deinitialization stages are performed in reverse order compared to
 	 * the initialization stages in pvr_device_init().
 	 */
+	pvr_device_irq_fini(pvr_dev);
 	pvr_device_gpu_fini(pvr_dev);
 }
 
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 771ba879f02dd..39347595c6f07 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -4,6 +4,7 @@
 #ifndef PVR_DEVICE_H
 #define PVR_DEVICE_H
 
+#include "pvr_ccb.h"
 #include "pvr_device_info.h"
 #include "pvr_fw.h"
 
@@ -123,6 +124,12 @@ struct pvr_device {
 	 */
 	struct clk *mem_clk;
 
+	/** @irq: IRQ number. */
+	int irq;
+
+	/** @fwccb: Firmware CCB. */
+	struct pvr_ccb fwccb;
+
 	/**
 	 * @kernel_vm_ctx: Virtual memory context used for kernel mappings.
 	 *
@@ -153,6 +160,49 @@ struct pvr_device {
 		u32 kccb_stall_count;
 	} watchdog;
 
+	struct {
+		/** @ccb: Kernel CCB. */
+		struct pvr_ccb ccb;
+
+		/** @rtn_q: Waitqueue for KCCB command return waiters. */
+		wait_queue_head_t rtn_q;
+
+		/** @rtn_obj: Object representing KCCB return slots. */
+		struct pvr_fw_object *rtn_obj;
+
+		/**
+		 * @rtn: Pointer to CPU mapping of KCCB return slots. Must be accessed by
+		 *       READ_ONCE()/WRITE_ONCE().
+		 */
+		u32 *rtn;
+
+		/** @slot_count: Total number of KCCB slots available. */
+		u32 slot_count;
+
+		/** @reserved_count: Number of KCCB slots reserved for future use. */
+		u32 reserved_count;
+
+		/**
+		 * @waiters: List of KCCB slot waiters.
+		 */
+		struct list_head waiters;
+
+		/** @fence_ctx: KCCB fence context. */
+		struct {
+			/** @id: KCCB fence context ID allocated with dma_fence_context_alloc(). */
+			u64 id;
+
+			/** @seqno: Sequence number incremented each time a fence is created. */
+			atomic_t seqno;
+
+			/**
+			 * @lock: Lock used to synchronize access to fences allocated by this
+			 * context.
+			 */
+			spinlock_t lock;
+		} fence_ctx;
+	} kccb;
+
 	/**
 	 * @lost: %true if the device has been lost.
 	 *
@@ -161,6 +211,16 @@ struct pvr_device {
 	 */
 	bool lost;
 
+	/**
+	 * @reset_sem: Reset semaphore.
+	 *
+	 * GPU reset code will lock this for writing. Any code that submits commands to the firmware
+	 * that isn't in an IRQ handler or on the scheduler workqueue must lock this for reading.
+	 * Once this has been successfully locked, &pvr_dev->lost _must_ be checked, and -%EIO must
+	 * be returned if it is set.
+	 */
+	struct rw_semaphore reset_sem;
+
 	/** @sched_wq: Workqueue for schedulers. */
 	struct workqueue_struct *sched_wq;
 };
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index 12e136217c456..33b38c0d79c8e 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -1331,3 +1331,4 @@ MODULE_AUTHOR("Imagination Technologies Ltd.");
 MODULE_DESCRIPTION(PVR_DRIVER_DESC);
 MODULE_LICENSE("Dual MIT/GPL");
 MODULE_IMPORT_NS(DMA_BUF);
+MODULE_FIRMWARE("powervr/rogue_33.15.11.3_v1.fw");
diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c
index a9377c735c9cd..f8ed981f18077 100644
--- a/drivers/gpu/drm/imagination/pvr_fw.c
+++ b/drivers/gpu/drm/imagination/pvr_fw.c
@@ -1,16 +1,84 @@
 // SPDX-License-Identifier: GPL-2.0-only OR MIT
 /* Copyright (c) 2023 Imagination Technologies Ltd. */
 
+#include "pvr_ccb.h"
 #include "pvr_device.h"
 #include "pvr_device_info.h"
 #include "pvr_fw.h"
+#include "pvr_fw_info.h"
+#include "pvr_fw_startstop.h"
+#include "pvr_fw_trace.h"
+#include "pvr_gem.h"
+#include "pvr_power.h"
+#include "pvr_rogue_fwif_dev_info.h"
+#include "pvr_rogue_heap_config.h"
+#include "pvr_vm.h"
 
 #include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_mm.h>
+#include <linux/clk.h>
 #include <linux/firmware.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
 #include <linux/sizes.h>
 
 #define FW_MAX_SUPPORTED_MAJOR_VERSION 1
 
+#define FW_BOOT_TIMEOUT_USEC 5000000
+
+/* Config heap occupies top 192k of the firmware heap. */
+#define PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY SZ_64K
+#define PVR_ROGUE_FW_CONFIG_HEAP_SIZE (3 * PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY)
+
+/* Main firmware allocations should come from the remainder of the heap. */
+#define PVR_ROGUE_FW_MAIN_HEAP_BASE ROGUE_FW_HEAP_BASE
+
+/* Offsets from start of configuration area of FW heap. */
+#define PVR_ROGUE_FWIF_CONNECTION_CTL_OFFSET 0
+#define PVR_ROGUE_FWIF_OSINIT_OFFSET \
+	(PVR_ROGUE_FWIF_CONNECTION_CTL_OFFSET + PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY)
+#define PVR_ROGUE_FWIF_SYSINIT_OFFSET \
+	(PVR_ROGUE_FWIF_OSINIT_OFFSET + PVR_ROGUE_FW_CONFIG_HEAP_GRANULARITY)
+
+#define PVR_ROGUE_FAULT_PAGE_SIZE SZ_4K
+
+#define PVR_SYNC_OBJ_SIZE sizeof(u32)
+
+const struct pvr_fw_layout_entry *
+pvr_fw_find_layout_entry(struct pvr_device *pvr_dev, enum pvr_fw_section_id id)
+{
+	const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries;
+	u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num;
+	u32 entry;
+
+	for (entry = 0; entry < num_layout_entries; entry++) {
+		if (layout_entries[entry].id == id)
+			return &layout_entries[entry];
+	}
+
+	return NULL;
+}
+
+static const struct pvr_fw_layout_entry *
+pvr_fw_find_private_data(struct pvr_device *pvr_dev)
+{
+	const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries;
+	u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num;
+	u32 entry;
+
+	for (entry = 0; entry < num_layout_entries; entry++) {
+		if (layout_entries[entry].id == META_PRIVATE_DATA ||
+		    layout_entries[entry].id == MIPS_PRIVATE_DATA ||
+		    layout_entries[entry].id == RISCV_PRIVATE_DATA)
+			return &layout_entries[entry];
+	}
+
+	return NULL;
+}
+
+#define DEV_INFO_MASK_SIZE(x) DIV_ROUND_UP(x, 64)
+
 /**
  * pvr_fw_validate() - Parse firmware header and check compatibility
  * @pvr_dev: Device pointer.
@@ -122,6 +190,704 @@ pvr_fw_get_device_info(struct pvr_device *pvr_dev)
 					    header->feature_param_size);
 }
 
+static void
+layout_get_sizes(struct pvr_device *pvr_dev)
+{
+	const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries;
+	u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num;
+	struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem;
+
+	fw_mem->code_alloc_size = 0;
+	fw_mem->data_alloc_size = 0;
+	fw_mem->core_code_alloc_size = 0;
+	fw_mem->core_data_alloc_size = 0;
+
+	/* Extract section sizes from FW layout table. */
+	for (u32 entry = 0; entry < num_layout_entries; entry++) {
+		switch (layout_entries[entry].type) {
+		case FW_CODE:
+			fw_mem->code_alloc_size += layout_entries[entry].alloc_size;
+			break;
+		case FW_DATA:
+			fw_mem->data_alloc_size += layout_entries[entry].alloc_size;
+			break;
+		case FW_COREMEM_CODE:
+			fw_mem->core_code_alloc_size +=
+				layout_entries[entry].alloc_size;
+			break;
+		case FW_COREMEM_DATA:
+			fw_mem->core_data_alloc_size +=
+				layout_entries[entry].alloc_size;
+			break;
+		case NONE:
+			break;
+		}
+	}
+}
+
+int
+pvr_fw_find_mmu_segment(struct pvr_device *pvr_dev, u32 addr, u32 size, void *fw_code_ptr,
+			void *fw_data_ptr, void *fw_core_code_ptr, void *fw_core_data_ptr,
+			void **host_addr_out)
+{
+	const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries;
+	u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num;
+	u32 end_addr = addr + size;
+	int entry = 0;
+
+	/* Ensure requested range is not zero, and size is not causing addr to overflow. */
+	if (end_addr <= addr)
+		return -EINVAL;
+
+	for (entry = 0; entry < num_layout_entries; entry++) {
+		u32 entry_start_addr = layout_entries[entry].base_addr;
+		u32 entry_end_addr = entry_start_addr + layout_entries[entry].alloc_size;
+
+		if (addr >= entry_start_addr && addr < entry_end_addr &&
+		    end_addr > entry_start_addr && end_addr <= entry_end_addr) {
+			switch (layout_entries[entry].type) {
+			case FW_CODE:
+				*host_addr_out = fw_code_ptr;
+				break;
+
+			case FW_DATA:
+				*host_addr_out = fw_data_ptr;
+				break;
+
+			case FW_COREMEM_CODE:
+				*host_addr_out = fw_core_code_ptr;
+				break;
+
+			case FW_COREMEM_DATA:
+				*host_addr_out = fw_core_data_ptr;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+			/* Direct Mem write to mapped memory */
+			addr -= layout_entries[entry].base_addr;
+			addr += layout_entries[entry].alloc_offset;
+
+			/*
+			 * Add offset to pointer to FW allocation only if that
+			 * allocation is available
+			 */
+			*(u8 **)host_addr_out += addr;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int
+pvr_fw_create_fwif_connection_ctl(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	fw_dev->fwif_connection_ctl =
+		pvr_fw_object_create_and_map_offset(pvr_dev,
+						    fw_dev->fw_heap_info.config_offset +
+						    PVR_ROGUE_FWIF_CONNECTION_CTL_OFFSET,
+						    sizeof(*fw_dev->fwif_connection_ctl),
+						    PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						    NULL, NULL,
+						    &fw_dev->mem.fwif_connection_ctl_obj);
+	if (IS_ERR(fw_dev->fwif_connection_ctl)) {
+		drm_err(drm_dev,
+			"Unable to allocate FWIF connection control memory\n");
+		return PTR_ERR(fw_dev->fwif_connection_ctl);
+	}
+
+	return 0;
+}
+
+static void
+pvr_fw_fini_fwif_connection_ctl(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	pvr_fw_object_unmap_and_destroy(fw_dev->mem.fwif_connection_ctl_obj);
+}
+
+static void
+fw_osinit_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_osinit *fwif_osinit = cpu_ptr;
+	struct pvr_device *pvr_dev = priv;
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mem *fw_mem = &fw_dev->mem;
+
+	fwif_osinit->kernel_ccbctl_fw_addr = pvr_dev->kccb.ccb.ctrl_fw_addr;
+	fwif_osinit->kernel_ccb_fw_addr = pvr_dev->kccb.ccb.ccb_fw_addr;
+	pvr_fw_object_get_fw_addr(pvr_dev->kccb.rtn_obj,
+				  &fwif_osinit->kernel_ccb_rtn_slots_fw_addr);
+
+	fwif_osinit->firmware_ccbctl_fw_addr = pvr_dev->fwccb.ctrl_fw_addr;
+	fwif_osinit->firmware_ccb_fw_addr = pvr_dev->fwccb.ccb_fw_addr;
+
+	fwif_osinit->work_est_firmware_ccbctl_fw_addr = 0;
+	fwif_osinit->work_est_firmware_ccb_fw_addr = 0;
+
+	pvr_fw_object_get_fw_addr(fw_mem->hwrinfobuf_obj,
+				  &fwif_osinit->rogue_fwif_hwr_info_buf_ctl_fw_addr);
+	pvr_fw_object_get_fw_addr(fw_mem->osdata_obj, &fwif_osinit->fw_os_data_fw_addr);
+
+	fwif_osinit->hwr_debug_dump_limit = 0;
+
+	rogue_fwif_compchecks_bvnc_init(&fwif_osinit->rogue_comp_checks.hw_bvnc);
+	rogue_fwif_compchecks_bvnc_init(&fwif_osinit->rogue_comp_checks.fw_bvnc);
+}
+
+static void
+fw_osdata_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_osdata *fwif_osdata = cpu_ptr;
+	struct pvr_device *pvr_dev = priv;
+	struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem;
+
+	pvr_fw_object_get_fw_addr(fw_mem->power_sync_obj, &fwif_osdata->power_sync_fw_addr);
+}
+
+static void
+fw_fault_page_init(void *cpu_ptr, void *priv)
+{
+	u32 *fault_page = cpu_ptr;
+
+	for (int i = 0; i < PVR_ROGUE_FAULT_PAGE_SIZE / sizeof(*fault_page); i++)
+		fault_page[i] = 0xdeadbee0;
+}
+
+static void
+fw_sysinit_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_sysinit *fwif_sysinit = cpu_ptr;
+	struct pvr_device *pvr_dev = priv;
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mem *fw_mem = &fw_dev->mem;
+	dma_addr_t fault_dma_addr = 0;
+	u32 clock_speed_hz = clk_get_rate(pvr_dev->core_clk);
+
+	WARN_ON(!clock_speed_hz);
+
+	WARN_ON(pvr_fw_object_get_dma_addr(fw_mem->fault_page_obj, 0, &fault_dma_addr));
+	fwif_sysinit->fault_phys_addr = (u64)fault_dma_addr;
+
+	fwif_sysinit->pds_exec_base = ROGUE_PDSCODEDATA_HEAP_BASE;
+	fwif_sysinit->usc_exec_base = ROGUE_USCCODE_HEAP_BASE;
+
+	pvr_fw_object_get_fw_addr(fw_mem->runtime_cfg_obj, &fwif_sysinit->runtime_cfg_fw_addr);
+	pvr_fw_object_get_fw_addr(fw_dev->fw_trace.tracebuf_ctrl_obj,
+				  &fwif_sysinit->trace_buf_ctl_fw_addr);
+	pvr_fw_object_get_fw_addr(fw_mem->sysdata_obj, &fwif_sysinit->fw_sys_data_fw_addr);
+	pvr_fw_object_get_fw_addr(fw_mem->gpu_util_fwcb_obj,
+				  &fwif_sysinit->gpu_util_fw_cb_ctl_fw_addr);
+	if (fw_mem->core_data_obj) {
+		pvr_fw_object_get_fw_addr(fw_mem->core_data_obj,
+					  &fwif_sysinit->coremem_data_store.fw_addr);
+	}
+
+	/* Currently unsupported. */
+	fwif_sysinit->counter_dump_ctl.buffer_fw_addr = 0;
+	fwif_sysinit->counter_dump_ctl.size_in_dwords = 0;
+
+	/* Skip alignment checks. */
+	fwif_sysinit->align_checks = 0;
+
+	fwif_sysinit->filter_flags = 0;
+	fwif_sysinit->hw_perf_filter = 0;
+	fwif_sysinit->firmware_perf = FW_PERF_CONF_NONE;
+	fwif_sysinit->initial_core_clock_speed = clock_speed_hz;
+	fwif_sysinit->active_pm_latency_ms = 0;
+	fwif_sysinit->gpio_validation_mode = ROGUE_FWIF_GPIO_VAL_OFF;
+	fwif_sysinit->firmware_started = false;
+	fwif_sysinit->marker_val = 1;
+
+	memset(&fwif_sysinit->bvnc_km_feature_flags, 0,
+	       sizeof(fwif_sysinit->bvnc_km_feature_flags));
+}
+
+#define ROGUE_FWIF_SLC_MIN_SIZE_FOR_DM_OVERLAP_KB 4
+
+static void
+fw_sysdata_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_sysdata *fwif_sysdata = cpu_ptr;
+	struct pvr_device *pvr_dev = priv;
+	u32 slc_size_in_kilobytes = 0;
+	u32 config_flags = 0;
+
+	WARN_ON(PVR_FEATURE_VALUE(pvr_dev, slc_size_in_kilobytes, &slc_size_in_kilobytes));
+
+	if (slc_size_in_kilobytes < ROGUE_FWIF_SLC_MIN_SIZE_FOR_DM_OVERLAP_KB)
+		config_flags |= ROGUE_FWIF_INICFG_DISABLE_DM_OVERLAP;
+
+	fwif_sysdata->config_flags = config_flags;
+}
+
+static void
+fw_runtime_cfg_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_runtime_cfg *runtime_cfg = cpu_ptr;
+	struct pvr_device *pvr_dev = priv;
+	u32 clock_speed_hz = clk_get_rate(pvr_dev->core_clk);
+
+	WARN_ON(!clock_speed_hz);
+
+	runtime_cfg->core_clock_speed = clock_speed_hz;
+	runtime_cfg->active_pm_latency_ms = 0;
+	runtime_cfg->active_pm_latency_persistant = true;
+	WARN_ON(PVR_FEATURE_VALUE(pvr_dev, num_clusters,
+				  &runtime_cfg->default_dusts_num_init) != 0);
+}
+
+static void
+fw_gpu_util_fwcb_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_gpu_util_fwcb *gpu_util_fwcb = cpu_ptr;
+
+	gpu_util_fwcb->last_word = PVR_FWIF_GPU_UTIL_STATE_IDLE;
+}
+
+static int
+pvr_fw_create_structures(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mem *fw_mem = &fw_dev->mem;
+	int err;
+
+	fw_dev->power_sync = pvr_fw_object_create_and_map(pvr_dev, sizeof(*fw_dev->power_sync),
+							  PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							  NULL, NULL, &fw_mem->power_sync_obj);
+	if (IS_ERR(fw_dev->power_sync)) {
+		drm_err(drm_dev, "Unable to allocate FW power_sync structure\n");
+		return PTR_ERR(fw_dev->power_sync);
+	}
+
+	fw_dev->hwrinfobuf = pvr_fw_object_create_and_map(pvr_dev, sizeof(*fw_dev->hwrinfobuf),
+							  PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							  NULL, NULL, &fw_mem->hwrinfobuf_obj);
+	if (IS_ERR(fw_dev->hwrinfobuf)) {
+		drm_err(drm_dev,
+			"Unable to allocate FW hwrinfobuf structure\n");
+		err = PTR_ERR(fw_dev->hwrinfobuf);
+		goto err_release_power_sync;
+	}
+
+	err = pvr_fw_object_create(pvr_dev, PVR_SYNC_OBJ_SIZE,
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   NULL, NULL, &fw_mem->mmucache_sync_obj);
+	if (err) {
+		drm_err(drm_dev,
+			"Unable to allocate MMU cache sync object\n");
+		goto err_release_hwrinfobuf;
+	}
+
+	fw_dev->fwif_sysdata = pvr_fw_object_create_and_map(pvr_dev,
+							    sizeof(*fw_dev->fwif_sysdata),
+							    PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							    fw_sysdata_init, pvr_dev,
+							    &fw_mem->sysdata_obj);
+	if (IS_ERR(fw_dev->fwif_sysdata)) {
+		drm_err(drm_dev, "Unable to allocate FW SYSDATA structure\n");
+		err = PTR_ERR(fw_dev->fwif_sysdata);
+		goto err_release_mmucache_sync_obj;
+	}
+
+	err = pvr_fw_object_create(pvr_dev, PVR_ROGUE_FAULT_PAGE_SIZE,
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   fw_fault_page_init, NULL, &fw_mem->fault_page_obj);
+	if (err) {
+		drm_err(drm_dev, "Unable to allocate FW fault page\n");
+		goto err_release_sysdata;
+	}
+
+	err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_gpu_util_fwcb),
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   fw_gpu_util_fwcb_init, pvr_dev, &fw_mem->gpu_util_fwcb_obj);
+	if (err) {
+		drm_err(drm_dev, "Unable to allocate GPU util FWCB\n");
+		goto err_release_fault_page;
+	}
+
+	err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_runtime_cfg),
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   fw_runtime_cfg_init, pvr_dev, &fw_mem->runtime_cfg_obj);
+	if (err) {
+		drm_err(drm_dev, "Unable to allocate FW runtime config\n");
+		goto err_release_gpu_util_fwcb;
+	}
+
+	err = pvr_fw_trace_init(pvr_dev);
+	if (err)
+		goto err_release_runtime_cfg;
+
+	fw_dev->fwif_osdata = pvr_fw_object_create_and_map(pvr_dev,
+							   sizeof(*fw_dev->fwif_osdata),
+							   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							   fw_osdata_init, pvr_dev,
+							   &fw_mem->osdata_obj);
+	if (IS_ERR(fw_dev->fwif_osdata)) {
+		drm_err(drm_dev, "Unable to allocate FW OSDATA structure\n");
+		err = PTR_ERR(fw_dev->fwif_osdata);
+		goto err_fw_trace_fini;
+	}
+
+	fw_dev->fwif_osinit =
+		pvr_fw_object_create_and_map_offset(pvr_dev,
+						    fw_dev->fw_heap_info.config_offset +
+						    PVR_ROGUE_FWIF_OSINIT_OFFSET,
+						    sizeof(*fw_dev->fwif_osinit),
+						    PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						    fw_osinit_init, pvr_dev, &fw_mem->osinit_obj);
+	if (IS_ERR(fw_dev->fwif_osinit)) {
+		drm_err(drm_dev, "Unable to allocate FW OSINIT structure\n");
+		err = PTR_ERR(fw_dev->fwif_osinit);
+		goto err_release_osdata;
+	}
+
+	fw_dev->fwif_sysinit =
+		pvr_fw_object_create_and_map_offset(pvr_dev,
+						    fw_dev->fw_heap_info.config_offset +
+						    PVR_ROGUE_FWIF_SYSINIT_OFFSET,
+						    sizeof(*fw_dev->fwif_sysinit),
+						    PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						    fw_sysinit_init, pvr_dev, &fw_mem->sysinit_obj);
+	if (IS_ERR(fw_dev->fwif_sysinit)) {
+		drm_err(drm_dev, "Unable to allocate FW SYSINIT structure\n");
+		err = PTR_ERR(fw_dev->fwif_sysinit);
+		goto err_release_osinit;
+	}
+
+	return 0;
+
+err_release_osinit:
+	pvr_fw_object_unmap_and_destroy(fw_mem->osinit_obj);
+
+err_release_osdata:
+	pvr_fw_object_unmap_and_destroy(fw_mem->osdata_obj);
+
+err_fw_trace_fini:
+	pvr_fw_trace_fini(pvr_dev);
+
+err_release_runtime_cfg:
+	pvr_fw_object_destroy(fw_mem->runtime_cfg_obj);
+
+err_release_gpu_util_fwcb:
+	pvr_fw_object_destroy(fw_mem->gpu_util_fwcb_obj);
+
+err_release_fault_page:
+	pvr_fw_object_destroy(fw_mem->fault_page_obj);
+
+err_release_sysdata:
+	pvr_fw_object_unmap_and_destroy(fw_mem->sysdata_obj);
+
+err_release_mmucache_sync_obj:
+	pvr_fw_object_destroy(fw_mem->mmucache_sync_obj);
+
+err_release_hwrinfobuf:
+	pvr_fw_object_unmap_and_destroy(fw_mem->hwrinfobuf_obj);
+
+err_release_power_sync:
+	pvr_fw_object_unmap_and_destroy(fw_mem->power_sync_obj);
+
+	return err;
+}
+
+static void
+pvr_fw_destroy_structures(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mem *fw_mem = &fw_dev->mem;
+
+	pvr_fw_trace_fini(pvr_dev);
+	pvr_fw_object_destroy(fw_mem->runtime_cfg_obj);
+	pvr_fw_object_destroy(fw_mem->gpu_util_fwcb_obj);
+	pvr_fw_object_destroy(fw_mem->fault_page_obj);
+	pvr_fw_object_unmap_and_destroy(fw_mem->sysdata_obj);
+	pvr_fw_object_unmap_and_destroy(fw_mem->sysinit_obj);
+
+	pvr_fw_object_destroy(fw_mem->mmucache_sync_obj);
+	pvr_fw_object_unmap_and_destroy(fw_mem->hwrinfobuf_obj);
+	pvr_fw_object_unmap_and_destroy(fw_mem->power_sync_obj);
+	pvr_fw_object_unmap_and_destroy(fw_mem->osdata_obj);
+	pvr_fw_object_unmap_and_destroy(fw_mem->osinit_obj);
+}
+
+/**
+ * pvr_fw_process() - Process firmware image, allocate FW memory and create boot
+ *                    arguments
+ * @pvr_dev: Device pointer.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_fw_object_create_and_map_offset(), or
+ *  * Any error returned by pvr_fw_object_create_and_map().
+ */
+static int
+pvr_fw_process(struct pvr_device *pvr_dev)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem;
+	const u8 *fw = pvr_dev->fw_dev.firmware->data;
+	const struct pvr_fw_layout_entry *private_data;
+	u8 *fw_code_ptr;
+	u8 *fw_data_ptr;
+	u8 *fw_core_code_ptr;
+	u8 *fw_core_data_ptr;
+	int err;
+
+	layout_get_sizes(pvr_dev);
+
+	private_data = pvr_fw_find_private_data(pvr_dev);
+	if (!private_data)
+		return -EINVAL;
+
+	/* Allocate and map memory for firmware sections. */
+
+	/*
+	 * Code allocation must be at the start of the firmware heap, otherwise
+	 * firmware processor will be unable to boot.
+	 *
+	 * This has the useful side-effect that for every other object in the
+	 * driver, a firmware address of 0 is invalid.
+	 */
+	fw_code_ptr = pvr_fw_object_create_and_map_offset(pvr_dev, 0, fw_mem->code_alloc_size,
+							  PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							  NULL, NULL, &fw_mem->code_obj);
+	if (IS_ERR(fw_code_ptr)) {
+		drm_err(drm_dev, "Unable to allocate FW code memory\n");
+		return PTR_ERR(fw_code_ptr);
+	}
+
+	if (pvr_dev->fw_dev.defs->has_fixed_data_addr()) {
+		u32 base_addr = private_data->base_addr & pvr_dev->fw_dev.fw_heap_info.offset_mask;
+
+		fw_data_ptr =
+			pvr_fw_object_create_and_map_offset(pvr_dev, base_addr,
+							    fw_mem->data_alloc_size,
+							    PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							    NULL, NULL, &fw_mem->data_obj);
+	} else {
+		fw_data_ptr = pvr_fw_object_create_and_map(pvr_dev, fw_mem->data_alloc_size,
+							   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							   NULL, NULL, &fw_mem->data_obj);
+	}
+	if (IS_ERR(fw_data_ptr)) {
+		drm_err(drm_dev, "Unable to allocate FW data memory\n");
+		err = PTR_ERR(fw_data_ptr);
+		goto err_free_fw_code_obj;
+	}
+
+	/* Core code and data sections are optional. */
+	if (fw_mem->core_code_alloc_size) {
+		fw_core_code_ptr =
+			pvr_fw_object_create_and_map(pvr_dev, fw_mem->core_code_alloc_size,
+						     PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						     NULL, NULL, &fw_mem->core_code_obj);
+		if (IS_ERR(fw_core_code_ptr)) {
+			drm_err(drm_dev,
+				"Unable to allocate FW core code memory\n");
+			err = PTR_ERR(fw_core_code_ptr);
+			goto err_free_fw_data_obj;
+		}
+	} else {
+		fw_core_code_ptr = NULL;
+	}
+
+	if (fw_mem->core_data_alloc_size) {
+		fw_core_data_ptr =
+			pvr_fw_object_create_and_map(pvr_dev, fw_mem->core_data_alloc_size,
+						     PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						     NULL, NULL, &fw_mem->core_data_obj);
+		if (IS_ERR(fw_core_data_ptr)) {
+			drm_err(drm_dev,
+				"Unable to allocate FW core data memory\n");
+			err = PTR_ERR(fw_core_data_ptr);
+			goto err_free_fw_core_code_obj;
+		}
+	} else {
+		fw_core_data_ptr = NULL;
+	}
+
+	fw_mem->code = kzalloc(fw_mem->code_alloc_size, GFP_KERNEL);
+	fw_mem->data = kzalloc(fw_mem->data_alloc_size, GFP_KERNEL);
+	if (fw_mem->core_code_alloc_size)
+		fw_mem->core_code = kzalloc(fw_mem->core_code_alloc_size, GFP_KERNEL);
+	if (fw_mem->core_data_alloc_size)
+		fw_mem->core_data = kzalloc(fw_mem->core_data_alloc_size, GFP_KERNEL);
+
+	if (!fw_mem->code || !fw_mem->data ||
+	    (!fw_mem->core_code && fw_mem->core_code_alloc_size) ||
+	    (!fw_mem->core_data && fw_mem->core_data_alloc_size)) {
+		err = -ENOMEM;
+		goto err_free_kdata;
+	}
+
+	err = pvr_dev->fw_dev.defs->fw_process(pvr_dev, fw,
+					       fw_mem->code, fw_mem->data, fw_mem->core_code,
+					       fw_mem->core_data, fw_mem->core_code_alloc_size);
+
+	if (err)
+		goto err_free_fw_core_data_obj;
+
+	memcpy(fw_code_ptr, fw_mem->code, fw_mem->code_alloc_size);
+	memcpy(fw_data_ptr, fw_mem->data, fw_mem->data_alloc_size);
+	if (fw_mem->core_code)
+		memcpy(fw_core_code_ptr, fw_mem->core_code, fw_mem->core_code_alloc_size);
+	if (fw_mem->core_data)
+		memcpy(fw_core_data_ptr, fw_mem->core_data, fw_mem->core_data_alloc_size);
+
+	/* We're finished with the firmware section memory on the CPU, unmap. */
+	if (fw_core_data_ptr)
+		pvr_fw_object_vunmap(fw_mem->core_data_obj);
+	if (fw_core_code_ptr)
+		pvr_fw_object_vunmap(fw_mem->core_code_obj);
+	pvr_fw_object_vunmap(fw_mem->data_obj);
+	fw_data_ptr = NULL;
+	pvr_fw_object_vunmap(fw_mem->code_obj);
+	fw_code_ptr = NULL;
+
+	err = pvr_fw_create_fwif_connection_ctl(pvr_dev);
+	if (err)
+		goto err_free_fw_core_data_obj;
+
+	return 0;
+
+err_free_kdata:
+	kfree(fw_mem->core_data);
+	kfree(fw_mem->core_code);
+	kfree(fw_mem->data);
+	kfree(fw_mem->code);
+
+err_free_fw_core_data_obj:
+	if (fw_core_data_ptr)
+		pvr_fw_object_unmap_and_destroy(fw_mem->core_data_obj);
+
+err_free_fw_core_code_obj:
+	if (fw_core_code_ptr)
+		pvr_fw_object_unmap_and_destroy(fw_mem->core_code_obj);
+
+err_free_fw_data_obj:
+	if (fw_data_ptr)
+		pvr_fw_object_vunmap(fw_mem->data_obj);
+	pvr_fw_object_destroy(fw_mem->data_obj);
+
+err_free_fw_code_obj:
+	if (fw_code_ptr)
+		pvr_fw_object_vunmap(fw_mem->code_obj);
+	pvr_fw_object_destroy(fw_mem->code_obj);
+
+	return err;
+}
+
+static int
+pvr_copy_to_fw(struct pvr_fw_object *dest_obj, u8 *src_ptr, u32 size)
+{
+	u8 *dest_ptr = pvr_fw_object_vmap(dest_obj);
+
+	if (IS_ERR(dest_ptr))
+		return PTR_ERR(dest_ptr);
+
+	memcpy(dest_ptr, src_ptr, size);
+
+	pvr_fw_object_vunmap(dest_obj);
+
+	return 0;
+}
+
+static int
+pvr_fw_reinit_code_data(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem;
+	int err;
+
+	err = pvr_copy_to_fw(fw_mem->code_obj, fw_mem->code, fw_mem->code_alloc_size);
+	if (err)
+		return err;
+
+	err = pvr_copy_to_fw(fw_mem->data_obj, fw_mem->data, fw_mem->data_alloc_size);
+	if (err)
+		return err;
+
+	if (fw_mem->core_code) {
+		err = pvr_copy_to_fw(fw_mem->core_code_obj, fw_mem->core_code,
+				     fw_mem->core_code_alloc_size);
+		if (err)
+			return err;
+	}
+
+	if (fw_mem->core_data) {
+		err = pvr_copy_to_fw(fw_mem->core_data_obj, fw_mem->core_data,
+				     fw_mem->core_data_alloc_size);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void
+pvr_fw_cleanup(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem;
+
+	pvr_fw_fini_fwif_connection_ctl(pvr_dev);
+	if (fw_mem->core_code_obj)
+		pvr_fw_object_destroy(fw_mem->core_code_obj);
+	if (fw_mem->core_data_obj)
+		pvr_fw_object_destroy(fw_mem->core_data_obj);
+	pvr_fw_object_destroy(fw_mem->code_obj);
+	pvr_fw_object_destroy(fw_mem->data_obj);
+}
+
+/**
+ * pvr_wait_for_fw_boot() - Wait for firmware to finish booting
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%ETIMEDOUT if firmware fails to boot within timeout.
+ */
+int
+pvr_wait_for_fw_boot(struct pvr_device *pvr_dev)
+{
+	ktime_t deadline = ktime_add_us(ktime_get(), FW_BOOT_TIMEOUT_USEC);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	while (ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) {
+		if (READ_ONCE(fw_dev->fwif_sysinit->firmware_started))
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * pvr_fw_heap_info_init() - Calculate size and masks for FW heap
+ * @pvr_dev: Target PowerVR device.
+ * @log2_size: Log2 of raw heap size.
+ * @reserved_size: Size of reserved area of heap, in bytes. May be zero.
+ */
+void
+pvr_fw_heap_info_init(struct pvr_device *pvr_dev, u32 log2_size, u32 reserved_size)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	fw_dev->fw_heap_info.gpu_addr = PVR_ROGUE_FW_MAIN_HEAP_BASE;
+	fw_dev->fw_heap_info.log2_size = log2_size;
+	fw_dev->fw_heap_info.reserved_size = reserved_size;
+	fw_dev->fw_heap_info.raw_size = 1 << fw_dev->fw_heap_info.log2_size;
+	fw_dev->fw_heap_info.offset_mask = fw_dev->fw_heap_info.raw_size - 1;
+	fw_dev->fw_heap_info.config_offset = fw_dev->fw_heap_info.raw_size -
+					     PVR_ROGUE_FW_CONFIG_HEAP_SIZE;
+	fw_dev->fw_heap_info.size = fw_dev->fw_heap_info.raw_size -
+				    (PVR_ROGUE_FW_CONFIG_HEAP_SIZE + reserved_size);
+}
+
 /**
  * pvr_fw_validate_init_device_info() - Validate firmware and initialise device information
  * @pvr_dev: Target PowerVR device.
@@ -143,3 +909,579 @@ pvr_fw_validate_init_device_info(struct pvr_device *pvr_dev)
 
 	return pvr_fw_get_device_info(pvr_dev);
 }
+
+/**
+ * pvr_fw_init() - Initialise and boot firmware
+ * @pvr_dev: Target PowerVR device
+ *
+ * On successful completion of the function the PowerVR device will be
+ * initialised and ready to use.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -%EINVAL on invalid firmware image,
+ *  * -%ENOMEM on out of memory, or
+ *  * -%ETIMEDOUT if firmware processor fails to boot or on register poll timeout.
+ */
+int
+pvr_fw_init(struct pvr_device *pvr_dev)
+{
+	u32 kccb_size_log2 = ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT;
+	u32 kccb_rtn_size = (1 << kccb_size_log2) * sizeof(*pvr_dev->kccb.rtn);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	int err;
+
+	if (fw_dev->processor_type == PVR_FW_PROCESSOR_TYPE_META)
+		fw_dev->defs = &pvr_fw_defs_meta;
+	else
+		return -EINVAL;
+
+	err = fw_dev->defs->init(pvr_dev);
+	if (err)
+		return err;
+
+	drm_mm_init(&fw_dev->fw_mm, ROGUE_FW_HEAP_BASE, fw_dev->fw_heap_info.raw_size);
+	fw_dev->fw_mm_base = ROGUE_FW_HEAP_BASE;
+	spin_lock_init(&fw_dev->fw_mm_lock);
+
+	INIT_LIST_HEAD(&fw_dev->fw_objs.list);
+	err = drmm_mutex_init(from_pvr_device(pvr_dev), &fw_dev->fw_objs.lock);
+	if (err)
+		goto err_mm_takedown;
+
+	err = pvr_fw_process(pvr_dev);
+	if (err)
+		goto err_mm_takedown;
+
+	/* Initialise KCCB and FWCCB. */
+	err = pvr_kccb_init(pvr_dev);
+	if (err)
+		goto err_fw_cleanup;
+
+	err = pvr_fwccb_init(pvr_dev);
+	if (err)
+		goto err_kccb_fini;
+
+	/* Allocate memory for KCCB return slots. */
+	pvr_dev->kccb.rtn = pvr_fw_object_create_and_map(pvr_dev, kccb_rtn_size,
+							 PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							 NULL, NULL, &pvr_dev->kccb.rtn_obj);
+	if (IS_ERR(pvr_dev->kccb.rtn)) {
+		err = PTR_ERR(pvr_dev->kccb.rtn);
+		goto err_fwccb_fini;
+	}
+
+	err = pvr_fw_create_structures(pvr_dev);
+	if (err)
+		goto err_kccb_rtn_release;
+
+	err = pvr_fw_start(pvr_dev);
+	if (err)
+		goto err_destroy_structures;
+
+	err = pvr_wait_for_fw_boot(pvr_dev);
+	if (err) {
+		drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n");
+		goto err_fw_stop;
+	}
+
+	fw_dev->booted = true;
+
+	return 0;
+
+err_fw_stop:
+	pvr_fw_stop(pvr_dev);
+
+err_destroy_structures:
+	pvr_fw_destroy_structures(pvr_dev);
+
+err_kccb_rtn_release:
+	pvr_fw_object_unmap_and_destroy(pvr_dev->kccb.rtn_obj);
+
+err_fwccb_fini:
+	pvr_ccb_fini(&pvr_dev->fwccb);
+
+err_kccb_fini:
+	pvr_kccb_fini(pvr_dev);
+
+err_fw_cleanup:
+	pvr_fw_cleanup(pvr_dev);
+
+err_mm_takedown:
+	drm_mm_takedown(&fw_dev->fw_mm);
+
+	if (fw_dev->defs->fini)
+		fw_dev->defs->fini(pvr_dev);
+
+	return err;
+}
+
+/**
+ * pvr_fw_fini() - Shutdown firmware processor and free associated memory
+ * @pvr_dev: Target PowerVR device
+ */
+void
+pvr_fw_fini(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	fw_dev->booted = false;
+
+	pvr_fw_destroy_structures(pvr_dev);
+	pvr_fw_object_unmap_and_destroy(pvr_dev->kccb.rtn_obj);
+
+	/*
+	 * Ensure FWCCB worker has finished executing before destroying FWCCB. The IRQ handler has
+	 * been unregistered at this point so no new work should be being submitted.
+	 */
+	pvr_ccb_fini(&pvr_dev->fwccb);
+	pvr_kccb_fini(pvr_dev);
+	pvr_fw_cleanup(pvr_dev);
+
+	mutex_lock(&pvr_dev->fw_dev.fw_objs.lock);
+	WARN_ON(!list_empty(&pvr_dev->fw_dev.fw_objs.list));
+	mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock);
+
+	drm_mm_takedown(&fw_dev->fw_mm);
+
+	if (fw_dev->defs->fini)
+		fw_dev->defs->fini(pvr_dev);
+}
+
+/**
+ * pvr_fw_mts_schedule() - Schedule work via an MTS kick
+ * @pvr_dev: Target PowerVR device
+ * @val: Kick mask. Should be a combination of %ROGUE_CR_MTS_SCHEDULE_*
+ */
+void
+pvr_fw_mts_schedule(struct pvr_device *pvr_dev, u32 val)
+{
+	/* Ensure memory is flushed before kicking MTS. */
+	wmb();
+
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_SCHEDULE, val);
+
+	/* Ensure the MTS kick goes through before continuing. */
+	mb();
+}
+
+/**
+ * pvr_fw_structure_cleanup() - Send FW cleanup request for an object
+ * @pvr_dev: Target PowerVR device.
+ * @type: Type of object to cleanup. Must be one of &enum rogue_fwif_cleanup_type.
+ * @fw_obj: Pointer to FW object containing object to cleanup.
+ * @offset: Offset within FW object of object to cleanup.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -EBUSY if object is busy,
+ *  * -ETIMEDOUT on timeout, or
+ *  * -EIO if device is lost.
+ */
+int
+pvr_fw_structure_cleanup(struct pvr_device *pvr_dev, u32 type, struct pvr_fw_object *fw_obj,
+			 u32 offset)
+{
+	struct rogue_fwif_kccb_cmd cmd;
+	int slot_nr;
+	int idx;
+	int err;
+	u32 rtn;
+
+	struct rogue_fwif_cleanup_request *cleanup_req = &cmd.cmd_data.cleanup_data;
+
+	down_read(&pvr_dev->reset_sem);
+
+	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx)) {
+		err = -EIO;
+		goto err_up_read;
+	}
+
+	cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_CLEANUP;
+	cmd.kccb_flags = 0;
+	cleanup_req->cleanup_type = type;
+
+	switch (type) {
+	case ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT:
+		pvr_fw_object_get_fw_addr_offset(fw_obj, offset,
+						 &cleanup_req->cleanup_data.context_fw_addr);
+		break;
+	case ROGUE_FWIF_CLEANUP_HWRTDATA:
+		pvr_fw_object_get_fw_addr_offset(fw_obj, offset,
+						 &cleanup_req->cleanup_data.hwrt_data_fw_addr);
+		break;
+	case ROGUE_FWIF_CLEANUP_FREELIST:
+		pvr_fw_object_get_fw_addr_offset(fw_obj, offset,
+						 &cleanup_req->cleanup_data.freelist_fw_addr);
+		break;
+	default:
+		err = -EINVAL;
+		goto err_drm_dev_exit;
+	}
+
+	err = pvr_kccb_send_cmd(pvr_dev, &cmd, &slot_nr);
+	if (err)
+		goto err_drm_dev_exit;
+
+	err = pvr_kccb_wait_for_completion(pvr_dev, slot_nr, HZ, &rtn);
+	if (err)
+		goto err_drm_dev_exit;
+
+	if (rtn & ROGUE_FWIF_KCCB_RTN_SLOT_CLEANUP_BUSY)
+		err = -EBUSY;
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+err_up_read:
+	up_read(&pvr_dev->reset_sem);
+
+	return err;
+}
+
+/**
+ * pvr_fw_object_fw_map() - Map a FW object in firmware address space
+ * @pvr_dev: Device pointer.
+ * @fw_obj: FW object to map.
+ * @dev_addr: Desired address in device space, if a specific address is
+ *            required. 0 otherwise.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if @fw_obj is already mapped but has no references, or
+ *  * Any error returned by DRM.
+ */
+static int
+pvr_fw_object_fw_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj, u64 dev_addr)
+{
+	struct pvr_gem_object *pvr_obj = fw_obj->gem;
+	struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	int err;
+
+	spin_lock(&fw_dev->fw_mm_lock);
+
+	if (drm_mm_node_allocated(&fw_obj->fw_mm_node)) {
+		err = -EINVAL;
+		goto err_unlock;
+	}
+
+	if (!dev_addr) {
+		/*
+		 * Allocate from the main heap only (firmware heap minus
+		 * config space).
+		 */
+		err = drm_mm_insert_node_in_range(&fw_dev->fw_mm, &fw_obj->fw_mm_node,
+						  gem_obj->size, 0, 0,
+						  fw_dev->fw_heap_info.gpu_addr,
+						  fw_dev->fw_heap_info.gpu_addr +
+						  fw_dev->fw_heap_info.size, 0);
+		if (err)
+			goto err_unlock;
+	} else {
+		fw_obj->fw_mm_node.start = dev_addr;
+		fw_obj->fw_mm_node.size = gem_obj->size;
+		err = drm_mm_reserve_node(&fw_dev->fw_mm, &fw_obj->fw_mm_node);
+		if (err)
+			goto err_unlock;
+	}
+
+	spin_unlock(&fw_dev->fw_mm_lock);
+
+	/* Map object on GPU. */
+	err = fw_dev->defs->vm_map(pvr_dev, fw_obj);
+	if (err)
+		goto err_remove_node;
+
+	fw_obj->fw_addr_offset = (u32)(fw_obj->fw_mm_node.start - fw_dev->fw_mm_base);
+
+	return 0;
+
+err_remove_node:
+	spin_lock(&fw_dev->fw_mm_lock);
+	drm_mm_remove_node(&fw_obj->fw_mm_node);
+
+err_unlock:
+	spin_unlock(&fw_dev->fw_mm_lock);
+
+	return err;
+}
+
+/**
+ * pvr_fw_object_fw_unmap() - Unmap a previously mapped FW object
+ * @fw_obj: FW object to unmap.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if object is not currently mapped.
+ */
+static int
+pvr_fw_object_fw_unmap(struct pvr_fw_object *fw_obj)
+{
+	struct pvr_gem_object *pvr_obj = fw_obj->gem;
+	struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj);
+	struct pvr_device *pvr_dev = to_pvr_device(gem_obj->dev);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	fw_dev->defs->vm_unmap(pvr_dev, fw_obj);
+
+	spin_lock(&fw_dev->fw_mm_lock);
+
+	if (!drm_mm_node_allocated(&fw_obj->fw_mm_node)) {
+		spin_unlock(&fw_dev->fw_mm_lock);
+		return -EINVAL;
+	}
+
+	drm_mm_remove_node(&fw_obj->fw_mm_node);
+
+	spin_unlock(&fw_dev->fw_mm_lock);
+
+	return 0;
+}
+
+static void *
+pvr_fw_object_create_and_map_common(struct pvr_device *pvr_dev, size_t size,
+				    u64 flags, u64 dev_addr,
+				    void (*init)(void *cpu_ptr, void *priv),
+				    void *init_priv, struct pvr_fw_object **fw_obj_out)
+{
+	struct pvr_fw_object *fw_obj;
+	void *cpu_ptr;
+	int err;
+
+	/* %DRM_PVR_BO_PM_FW_PROTECT is implicit for FW objects. */
+	flags |= DRM_PVR_BO_PM_FW_PROTECT;
+
+	fw_obj = kzalloc(sizeof(*fw_obj), GFP_KERNEL);
+	if (!fw_obj)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&fw_obj->node);
+	fw_obj->init = init;
+	fw_obj->init_priv = init_priv;
+
+	fw_obj->gem = pvr_gem_object_create(pvr_dev, size, flags);
+	if (IS_ERR(fw_obj->gem)) {
+		err = PTR_ERR(fw_obj->gem);
+		fw_obj->gem = NULL;
+		goto err_put_object;
+	}
+
+	err = pvr_fw_object_fw_map(pvr_dev, fw_obj, dev_addr);
+	if (err)
+		goto err_put_object;
+
+	cpu_ptr = pvr_fw_object_vmap(fw_obj);
+	if (IS_ERR(cpu_ptr)) {
+		err = PTR_ERR(cpu_ptr);
+		goto err_put_object;
+	}
+
+	*fw_obj_out = fw_obj;
+
+	if (fw_obj->init)
+		fw_obj->init(cpu_ptr, fw_obj->init_priv);
+
+	mutex_lock(&pvr_dev->fw_dev.fw_objs.lock);
+	list_add_tail(&fw_obj->node, &pvr_dev->fw_dev.fw_objs.list);
+	mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock);
+
+	return cpu_ptr;
+
+err_put_object:
+	pvr_fw_object_destroy(fw_obj);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_fw_object_create() - Create a FW object and map to firmware
+ * @pvr_dev: PowerVR device pointer.
+ * @size: Size of object, in bytes.
+ * @flags: Options which affect both this operation and future mapping
+ * operations performed on the returned object. Must be a combination of
+ * DRM_PVR_BO_* and/or PVR_BO_* flags.
+ * @init: Initialisation callback.
+ * @init_priv: Private pointer to pass to initialisation callback.
+ * @fw_obj_out: Pointer to location to store created object pointer.
+ *
+ * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT is implied for all FW objects. Consequently,
+ * this function will fail if @flags has %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS
+ * set.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_fw_object_create_common().
+ */
+int
+pvr_fw_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags,
+		     void (*init)(void *cpu_ptr, void *priv), void *init_priv,
+		     struct pvr_fw_object **fw_obj_out)
+{
+	void *cpu_ptr;
+
+	cpu_ptr = pvr_fw_object_create_and_map_common(pvr_dev, size, flags, 0, init, init_priv,
+						      fw_obj_out);
+	if (IS_ERR(cpu_ptr))
+		return PTR_ERR(cpu_ptr);
+
+	pvr_fw_object_vunmap(*fw_obj_out);
+
+	return 0;
+}
+
+/**
+ * pvr_fw_object_create_and_map() - Create a FW object and map to firmware and CPU
+ * @pvr_dev: PowerVR device pointer.
+ * @size: Size of object, in bytes.
+ * @flags: Options which affect both this operation and future mapping
+ * operations performed on the returned object. Must be a combination of
+ * DRM_PVR_BO_* and/or PVR_BO_* flags.
+ * @init: Initialisation callback.
+ * @init_priv: Private pointer to pass to initialisation callback.
+ * @fw_obj_out: Pointer to location to store created object pointer.
+ *
+ * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT is implied for all FW objects. Consequently,
+ * this function will fail if @flags has %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS
+ * set.
+ *
+ * Caller is responsible for calling pvr_fw_object_vunmap() to release the CPU
+ * mapping.
+ *
+ * Returns:
+ *  * Pointer to CPU mapping of newly created object, or
+ *  * Any error returned by pvr_fw_object_create(), or
+ *  * Any error returned by pvr_fw_object_vmap().
+ */
+void *
+pvr_fw_object_create_and_map(struct pvr_device *pvr_dev, size_t size, u64 flags,
+			     void (*init)(void *cpu_ptr, void *priv),
+			     void *init_priv, struct pvr_fw_object **fw_obj_out)
+{
+	return pvr_fw_object_create_and_map_common(pvr_dev, size, flags, 0, init, init_priv,
+						   fw_obj_out);
+}
+
+/**
+ * pvr_fw_object_create_and_map_offset() - Create a FW object and map to
+ * firmware at the provided offset and to the CPU.
+ * @pvr_dev: PowerVR device pointer.
+ * @dev_offset: Base address of desired FW mapping, offset from start of FW heap.
+ * @size: Size of object, in bytes.
+ * @flags: Options which affect both this operation and future mapping
+ * operations performed on the returned object. Must be a combination of
+ * DRM_PVR_BO_* and/or PVR_BO_* flags.
+ * @init: Initialisation callback.
+ * @init_priv: Private pointer to pass to initialisation callback.
+ * @fw_obj_out: Pointer to location to store created object pointer.
+ *
+ * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT is implied for all FW objects. Consequently,
+ * this function will fail if @flags has %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS
+ * set.
+ *
+ * Caller is responsible for calling pvr_fw_object_vunmap() to release the CPU
+ * mapping.
+ *
+ * Returns:
+ *  * Pointer to CPU mapping of newly created object, or
+ *  * Any error returned by pvr_fw_object_create(), or
+ *  * Any error returned by pvr_fw_object_vmap().
+ */
+void *
+pvr_fw_object_create_and_map_offset(struct pvr_device *pvr_dev,
+				    u32 dev_offset, size_t size, u64 flags,
+				    void (*init)(void *cpu_ptr, void *priv),
+				    void *init_priv, struct pvr_fw_object **fw_obj_out)
+{
+	u64 dev_addr = pvr_dev->fw_dev.fw_mm_base + dev_offset;
+
+	return pvr_fw_object_create_and_map_common(pvr_dev, size, flags, dev_addr, init, init_priv,
+						   fw_obj_out);
+}
+
+/**
+ * pvr_fw_object_destroy() - Destroy a pvr_fw_object
+ * @fw_obj: Pointer to object to destroy.
+ */
+void pvr_fw_object_destroy(struct pvr_fw_object *fw_obj)
+{
+	struct pvr_gem_object *pvr_obj = fw_obj->gem;
+	struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj);
+	struct pvr_device *pvr_dev = to_pvr_device(gem_obj->dev);
+
+	mutex_lock(&pvr_dev->fw_dev.fw_objs.lock);
+	list_del(&fw_obj->node);
+	mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock);
+
+	if (drm_mm_node_allocated(&fw_obj->fw_mm_node)) {
+		/* If we can't unmap, leak the memory. */
+		if (WARN_ON(pvr_fw_object_fw_unmap(fw_obj)))
+			return;
+	}
+
+	if (fw_obj->gem)
+		pvr_gem_object_put(fw_obj->gem);
+
+	kfree(fw_obj);
+}
+
+/**
+ * pvr_fw_object_get_fw_addr_offset() - Return address of object in firmware address space, with
+ * given offset.
+ * @fw_obj: Pointer to object.
+ * @offset: Desired offset from start of object.
+ * @fw_addr_out: Location to store address to.
+ */
+void pvr_fw_object_get_fw_addr_offset(struct pvr_fw_object *fw_obj, u32 offset, u32 *fw_addr_out)
+{
+	struct pvr_gem_object *pvr_obj = fw_obj->gem;
+	struct pvr_device *pvr_dev = to_pvr_device(gem_from_pvr_gem(pvr_obj)->dev);
+
+	*fw_addr_out = pvr_dev->fw_dev.defs->get_fw_addr_with_offset(fw_obj, offset);
+}
+
+/*
+ * pvr_fw_hard_reset() - Re-initialise the FW code and data segments, and reset all global FW
+ *                       structures
+ * @pvr_dev: Device pointer
+ *
+ * If this function returns an error then the caller must regard the device as lost.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_fw_init_dev_structures() or pvr_fw_reset_all().
+ */
+int
+pvr_fw_hard_reset(struct pvr_device *pvr_dev)
+{
+	struct list_head *pos;
+	int err;
+
+	/* Reset all FW objects */
+	mutex_lock(&pvr_dev->fw_dev.fw_objs.lock);
+
+	list_for_each(pos, &pvr_dev->fw_dev.fw_objs.list) {
+		struct pvr_fw_object *fw_obj = container_of(pos, struct pvr_fw_object, node);
+		void *cpu_ptr = pvr_fw_object_vmap(fw_obj);
+
+		WARN_ON(IS_ERR(cpu_ptr));
+
+		if (!(fw_obj->gem->flags & PVR_BO_FW_NO_CLEAR_ON_RESET)) {
+			memset(cpu_ptr, 0, pvr_gem_object_size(fw_obj->gem));
+
+			if (fw_obj->init)
+				fw_obj->init(cpu_ptr, fw_obj->init_priv);
+		}
+
+		pvr_fw_object_vunmap(fw_obj);
+	}
+
+	mutex_unlock(&pvr_dev->fw_dev.fw_objs.lock);
+
+	err = pvr_fw_reinit_code_data(pvr_dev);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_fw.h b/drivers/gpu/drm/imagination/pvr_fw.h
index 8331344e83ea6..5cd3ef08d82b4 100644
--- a/drivers/gpu/drm/imagination/pvr_fw.h
+++ b/drivers/gpu/drm/imagination/pvr_fw.h
@@ -5,6 +5,10 @@
 #define PVR_FW_H
 
 #include "pvr_fw_info.h"
+#include "pvr_fw_trace.h"
+#include "pvr_gem.h"
+
+#include <drm/drm_mm.h>
 
 #include <linux/types.h>
 
@@ -12,6 +16,289 @@
 struct pvr_device;
 struct pvr_file;
 
+/* Forward declaration from "pvr_vm.h". */
+struct pvr_vm_context;
+
+#define ROGUE_FWIF_FWCCB_NUMCMDS_LOG2 5
+
+#define ROGUE_FWIF_KCCB_NUMCMDS_LOG2_DEFAULT 7
+
+/**
+ * struct pvr_fw_object - container for firmware memory allocations
+ */
+struct pvr_fw_object {
+	/** @ref_count: FW object reference counter. */
+	struct kref ref_count;
+
+	/** @gem: GEM object backing the FW object. */
+	struct pvr_gem_object *gem;
+
+	/**
+	 * @fw_mm_node: Node representing mapping in FW address space. @pvr_obj->lock must
+	 *              be held when writing.
+	 */
+	struct drm_mm_node fw_mm_node;
+
+	/**
+	 * @fw_addr_offset: Virtual address offset of firmware mapping. Only
+	 *                  valid if @flags has %PVR_GEM_OBJECT_FLAGS_FW_MAPPED
+	 *                  set.
+	 */
+	u32 fw_addr_offset;
+
+	/**
+	 * @init: Initialisation callback. Will be called on object creation and FW hard reset.
+	 *        Object will have been zeroed before this is called.
+	 */
+	void (*init)(void *cpu_ptr, void *priv);
+
+	/** @init_priv: Private data for initialisation callback. */
+	void *init_priv;
+
+	/** @node: Node for firmware object list. */
+	struct list_head node;
+};
+
+/**
+ * struct pvr_fw_defs - FW processor function table and static definitions
+ */
+struct pvr_fw_defs {
+	/**
+	 * @init:
+	 *
+	 * FW processor specific initialisation.
+	 * @pvr_dev: Target PowerVR device.
+	 *
+	 * This function must call pvr_fw_heap_calculate() to initialise the firmware heap for this
+	 * FW processor.
+	 *
+	 * This function is mandatory.
+	 *
+	 * Returns:
+	 *  * 0 on success, or
+	 *  * Any appropriate error on failure.
+	 */
+	int (*init)(struct pvr_device *pvr_dev);
+
+	/**
+	 * @fini:
+	 *
+	 * FW processor specific finalisation.
+	 * @pvr_dev: Target PowerVR device.
+	 *
+	 * This function is optional.
+	 */
+	void (*fini)(struct pvr_device *pvr_dev);
+
+	/**
+	 * @fw_process:
+	 *
+	 * Load and process firmware image.
+	 * @pvr_dev: Target PowerVR device.
+	 * @fw: Pointer to firmware image.
+	 * @fw_code_ptr: Pointer to firmware code section.
+	 * @fw_data_ptr: Pointer to firmware data section.
+	 * @fw_core_code_ptr: Pointer to firmware core code section. May be %NULL.
+	 * @fw_core_data_ptr: Pointer to firmware core data section. May be %NULL.
+	 * @core_code_alloc_size: Total allocation size of core code section.
+	 *
+	 * This function is mandatory.
+	 *
+	 * Returns:
+	 *  * 0 on success, or
+	 *  * Any appropriate error on failure.
+	 */
+	int (*fw_process)(struct pvr_device *pvr_dev, const u8 *fw,
+			  u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr,
+			  u8 *fw_core_data_ptr, u32 core_code_alloc_size);
+
+	/**
+	 * @vm_map:
+	 *
+	 * Map FW object into FW processor address space.
+	 * @pvr_dev: Target PowerVR device.
+	 * @fw_obj: FW object to map.
+	 *
+	 * This function is mandatory.
+	 *
+	 * Returns:
+	 *  * 0 on success, or
+	 *  * Any appropriate error on failure.
+	 */
+	int (*vm_map)(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj);
+
+	/**
+	 * @vm_unmap:
+	 *
+	 * Unmap FW object from FW processor address space.
+	 * @pvr_dev: Target PowerVR device.
+	 * @fw_obj: FW object to map.
+	 *
+	 * This function is mandatory.
+	 */
+	void (*vm_unmap)(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj);
+
+	/**
+	 * @get_fw_addr_with_offset:
+	 *
+	 * Called to get address of object in firmware address space, with offset.
+	 * @fw_obj: Pointer to object.
+	 * @offset: Desired offset from start of object.
+	 *
+	 * This function is mandatory.
+	 *
+	 * Returns:
+	 *  * Address in firmware address space.
+	 */
+	u32 (*get_fw_addr_with_offset)(struct pvr_fw_object *fw_obj, u32 offset);
+
+	/**
+	 * @wrapper_init:
+	 *
+	 * Called to initialise FW wrapper.
+	 * @pvr_dev: Target PowerVR device.
+	 *
+	 * This function is mandatory.
+	 *
+	 * Returns:
+	 *  * 0 on success.
+	 *  * Any appropriate error on failure.
+	 */
+	int (*wrapper_init)(struct pvr_device *pvr_dev);
+
+	/**
+	 * @has_fixed_data_addr:
+	 *
+	 * Called to check if firmware fixed data must be loaded at the address given by the
+	 * firmware layout table.
+	 *
+	 * This function is mandatory.
+	 *
+	 * Returns:
+	 *  * %true if firmware fixed data must be loaded at the address given by the firmware
+	 *    layout table.
+	 *  * %false otherwise.
+	 */
+	bool (*has_fixed_data_addr)(void);
+
+	/**
+	 * @irq: FW Interrupt information.
+	 *
+	 * Those are processor dependent, and should be initialized by the
+	 * processor backend in pvr_fw_funcs::init().
+	 */
+	struct {
+		/** @enable_reg: FW interrupt enable register. */
+		u32 enable_reg;
+
+		/** @status_reg: FW interrupt status register. */
+		u32 status_reg;
+
+		/**
+		 * @clear_reg: FW interrupt clear register.
+		 *
+		 * If @status_reg == @clear_reg, we clear by write a bit to zero,
+		 * otherwise we clear by writing a bit to one.
+		 */
+		u32 clear_reg;
+
+		/** @event_mask: Bitmask of events to listen for. */
+		u32 event_mask;
+
+		/** @clear_mask: Value to write to the clear_reg in order to clear FW IRQs. */
+		u32 clear_mask;
+	} irq;
+};
+
+/**
+ * struct pvr_fw_mem - FW memory allocations
+ */
+struct pvr_fw_mem {
+	/** @code_obj: Object representing firmware code. */
+	struct pvr_fw_object *code_obj;
+
+	/** @data_obj: Object representing firmware data. */
+	struct pvr_fw_object *data_obj;
+
+	/**
+	 * @core_code_obj: Object representing firmware core code. May be
+	 *                 %NULL if firmware does not contain this section.
+	 */
+	struct pvr_fw_object *core_code_obj;
+
+	/**
+	 * @core_data_obj: Object representing firmware core data. May be
+	 *                 %NULL if firmware does not contain this section.
+	 */
+	struct pvr_fw_object *core_data_obj;
+
+	/** @code: Driver-side copy of firmware code. */
+	u8 *code;
+
+	/** @data: Driver-side copy of firmware data. */
+	u8 *data;
+
+	/**
+	 * @core_code: Driver-side copy of firmware core code. May be %NULL if firmware does not
+	 *             contain this section.
+	 */
+	u8 *core_code;
+
+	/**
+	 * @core_data: Driver-side copy of firmware core data. May be %NULL if firmware does not
+	 *             contain this section.
+	 */
+	u8 *core_data;
+
+	/** @code_alloc_size: Allocation size of firmware code section. */
+	u32 code_alloc_size;
+
+	/** @data_alloc_size: Allocation size of firmware data section. */
+	u32 data_alloc_size;
+
+	/** @core_code_alloc_size: Allocation size of firmware core code section. */
+	u32 core_code_alloc_size;
+
+	/** @core_data_alloc_size: Allocation size of firmware core data section. */
+	u32 core_data_alloc_size;
+
+	/**
+	 * @fwif_connection_ctl_obj: Object representing FWIF connection control
+	 *                           structure.
+	 */
+	struct pvr_fw_object *fwif_connection_ctl_obj;
+
+	/** @osinit_obj: Object representing FW OSINIT structure. */
+	struct pvr_fw_object *osinit_obj;
+
+	/** @sysinit_obj: Object representing FW SYSINIT structure. */
+	struct pvr_fw_object *sysinit_obj;
+
+	/** @osdata_obj: Object representing FW OSDATA structure. */
+	struct pvr_fw_object *osdata_obj;
+
+	/** @hwrinfobuf_obj: Object representing FW hwrinfobuf structure. */
+	struct pvr_fw_object *hwrinfobuf_obj;
+
+	/** @sysdata_obj: Object representing FW SYSDATA structure. */
+	struct pvr_fw_object *sysdata_obj;
+
+	/** @power_sync_obj: Object representing power sync state. */
+	struct pvr_fw_object *power_sync_obj;
+
+	/** @fault_page_obj: Object representing FW fault page. */
+	struct pvr_fw_object *fault_page_obj;
+
+	/** @gpu_util_fwcb_obj: Object representing FW GPU utilisation control structure. */
+	struct pvr_fw_object *gpu_util_fwcb_obj;
+
+	/** @runtime_cfg_obj: Object representing FW runtime config structure. */
+	struct pvr_fw_object *runtime_cfg_obj;
+
+	/** @mmucache_sync_obj: Object used as the sync parameter in an MMU cache operation. */
+	struct pvr_fw_object *mmucache_sync_obj;
+};
+
 struct pvr_fw_device {
 	/** @firmware: Handle to the firmware loaded into the device. */
 	const struct firmware *firmware;
@@ -22,13 +309,200 @@ struct pvr_fw_device {
 	/** @layout_entries: Pointer to firmware layout. */
 	const struct pvr_fw_layout_entry *layout_entries;
 
+	/** @mem: Structure containing objects representing firmware memory allocations. */
+	struct pvr_fw_mem mem;
+
+	/** @booted: %true if the firmware has been booted, %false otherwise. */
+	bool booted;
+
 	/**
 	 * @processor_type: FW processor type for this device. Must be one of
 	 *                  %PVR_FW_PROCESSOR_TYPE_*.
 	 */
 	u16 processor_type;
+
+	/** @funcs: Function table for the FW processor used by this device. */
+	const struct pvr_fw_defs *defs;
+
+	/** @processor_data: Pointer to data specific to FW processor. */
+	union {
+		/** @mips_data: Pointer to MIPS-specific data. */
+		struct pvr_fw_mips_data *mips_data;
+	} processor_data;
+
+	/** @fw_heap_info: Firmware heap information. */
+	struct {
+		/** @gpu_addr: Base address of firmware heap in GPU address space. */
+		u64 gpu_addr;
+
+		/** @size: Size of main area of heap. */
+		u32 size;
+
+		/** @offset_mask: Mask for offsets within FW heap. */
+		u32 offset_mask;
+
+		/** @raw_size: Raw size of heap, including reserved areas. */
+		u32 raw_size;
+
+		/** @log2_size: Log2 of raw size of heap. */
+		u32 log2_size;
+
+		/** @config_offset: Offset of config area within heap. */
+		u32 config_offset;
+
+		/** @reserved_size: Size of reserved area in heap. */
+		u32 reserved_size;
+	} fw_heap_info;
+
+	/** @fw_mm: Firmware address space allocator. */
+	struct drm_mm fw_mm;
+
+	/** @fw_mm_lock: Lock protecting access to &fw_mm. */
+	spinlock_t fw_mm_lock;
+
+	/** @fw_mm_base: Base address of address space managed by @fw_mm. */
+	u64 fw_mm_base;
+
+	/**
+	 * @fwif_connection_ctl: Pointer to CPU mapping of FWIF connection
+	 *                       control structure.
+	 */
+	struct rogue_fwif_connection_ctl *fwif_connection_ctl;
+
+	/** @fwif_sysinit: Pointer to CPU mapping of FW SYSINIT structure. */
+	struct rogue_fwif_sysinit *fwif_sysinit;
+
+	/** @fwif_sysdata: Pointer to CPU mapping of FW SYSDATA structure. */
+	struct rogue_fwif_sysdata *fwif_sysdata;
+
+	/** @fwif_osinit: Pointer to CPU mapping of FW OSINIT structure. */
+	struct rogue_fwif_osinit *fwif_osinit;
+
+	/** @fwif_osdata: Pointer to CPU mapping of FW OSDATA structure. */
+	struct rogue_fwif_osdata *fwif_osdata;
+
+	/** @power_sync: Pointer to CPU mapping of power sync state. */
+	u32 *power_sync;
+
+	/** @hwrinfobuf: Pointer to CPU mapping of FW HWR info buffer. */
+	struct rogue_fwif_hwrinfobuf *hwrinfobuf;
+
+	/** @fw_trace: Device firmware trace buffer state. */
+	struct pvr_fw_trace fw_trace;
+
+	/** @fw_objs: Structure tracking FW objects. */
+	struct {
+		/** @list: Head of FW object list. */
+		struct list_head list;
+
+		/** @lock: Lock protecting access to FW object list. */
+		struct mutex lock;
+	} fw_objs;
 };
 
+#define pvr_fw_irq_read_reg(pvr_dev, name) \
+	pvr_cr_read32((pvr_dev), (pvr_dev)->fw_dev.defs->irq.name ## _reg)
+
+#define pvr_fw_irq_write_reg(pvr_dev, name, value) \
+	pvr_cr_write32((pvr_dev), (pvr_dev)->fw_dev.defs->irq.name ## _reg, value)
+
+#define pvr_fw_irq_pending(pvr_dev) \
+	(pvr_fw_irq_read_reg(pvr_dev, status) & (pvr_dev)->fw_dev.defs->irq.event_mask)
+
+#define pvr_fw_irq_clear(pvr_dev) \
+	pvr_fw_irq_write_reg(pvr_dev, clear, (pvr_dev)->fw_dev.defs->irq.clear_mask)
+
+#define pvr_fw_irq_enable(pvr_dev) \
+	pvr_fw_irq_write_reg(pvr_dev, enable, (pvr_dev)->fw_dev.defs->irq.event_mask)
+
+#define pvr_fw_irq_disable(pvr_dev) \
+	pvr_fw_irq_write_reg(pvr_dev, enable, 0)
+
+extern const struct pvr_fw_defs pvr_fw_defs_meta;
+extern const struct pvr_fw_defs pvr_fw_defs_mips;
+
 int pvr_fw_validate_init_device_info(struct pvr_device *pvr_dev);
+int pvr_fw_init(struct pvr_device *pvr_dev);
+void pvr_fw_fini(struct pvr_device *pvr_dev);
+
+int pvr_wait_for_fw_boot(struct pvr_device *pvr_dev);
+
+int
+pvr_fw_hard_reset(struct pvr_device *pvr_dev);
+
+void pvr_fw_mts_schedule(struct pvr_device *pvr_dev, u32 val);
+
+void
+pvr_fw_heap_info_init(struct pvr_device *pvr_dev, u32 log2_size, u32 reserved_size);
+
+const struct pvr_fw_layout_entry *
+pvr_fw_find_layout_entry(struct pvr_device *pvr_dev, enum pvr_fw_section_id id);
+int
+pvr_fw_find_mmu_segment(struct pvr_device *pvr_dev, u32 addr, u32 size, void *fw_code_ptr,
+			void *fw_data_ptr, void *fw_core_code_ptr, void *fw_core_data_ptr,
+			void **host_addr_out);
+
+int
+pvr_fw_structure_cleanup(struct pvr_device *pvr_dev, u32 type, struct pvr_fw_object *fw_obj,
+			 u32 offset);
+
+int pvr_fw_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags,
+			 void (*init)(void *cpu_ptr, void *priv), void *init_priv,
+			 struct pvr_fw_object **pvr_obj_out);
+
+void *pvr_fw_object_create_and_map(struct pvr_device *pvr_dev, size_t size, u64 flags,
+				   void (*init)(void *cpu_ptr, void *priv),
+				   void *init_priv, struct pvr_fw_object **pvr_obj_out);
+
+void *
+pvr_fw_object_create_and_map_offset(struct pvr_device *pvr_dev, u32 dev_offset, size_t size,
+				    u64 flags, void (*init)(void *cpu_ptr, void *priv),
+				    void *init_priv, struct pvr_fw_object **pvr_obj_out);
+
+static __always_inline void *
+pvr_fw_object_vmap(struct pvr_fw_object *fw_obj)
+{
+	return pvr_gem_object_vmap(fw_obj->gem);
+}
+
+static __always_inline void
+pvr_fw_object_vunmap(struct pvr_fw_object *fw_obj)
+{
+	pvr_gem_object_vunmap(fw_obj->gem);
+}
+
+void pvr_fw_object_destroy(struct pvr_fw_object *fw_obj);
+
+static __always_inline void
+pvr_fw_object_unmap_and_destroy(struct pvr_fw_object *fw_obj)
+{
+	pvr_fw_object_vunmap(fw_obj);
+	pvr_fw_object_destroy(fw_obj);
+}
+
+/**
+ * pvr_fw_get_dma_addr() - Get DMA address for given offset in firmware object
+ * @fw_obj: Pointer to object to lookup address in.
+ * @offset: Offset within object to lookup address at.
+ * @dma_addr_out: Pointer to location to store DMA address.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if object is not currently backed, or if @offset is out of valid
+ *    range for this object.
+ */
+static __always_inline int
+pvr_fw_object_get_dma_addr(struct pvr_fw_object *fw_obj, u32 offset, dma_addr_t *dma_addr_out)
+{
+	return pvr_gem_get_dma_addr(fw_obj->gem, offset, dma_addr_out);
+}
+
+void pvr_fw_object_get_fw_addr_offset(struct pvr_fw_object *fw_obj, u32 offset, u32 *fw_addr_out);
+
+static __always_inline void
+pvr_fw_object_get_fw_addr(struct pvr_fw_object *fw_obj, u32 *fw_addr_out)
+{
+	pvr_fw_object_get_fw_addr_offset(fw_obj, 0, fw_addr_out);
+}
 
 #endif /* PVR_FW_H */
diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c
new file mode 100644
index 0000000000000..119934c36184a
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_fw_info.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_cr_defs.h"
+#include "pvr_rogue_meta.h"
+#include "pvr_vm.h"
+
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/ktime.h>
+#include <linux/types.h>
+
+#define ROGUE_FW_HEAP_META_SHIFT 25 /* 32 MB */
+
+#define POLL_TIMEOUT_USEC 1000000
+
+/**
+ * pvr_meta_cr_read32() - Read a META register via the Slave Port
+ * @pvr_dev: Device pointer.
+ * @reg_addr: Address of register to read.
+ * @reg_value_out: Pointer to location to store register value.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_cr_poll_reg32().
+ */
+int
+pvr_meta_cr_read32(struct pvr_device *pvr_dev, u32 reg_addr, u32 *reg_value_out)
+{
+	int err;
+
+	/* Wait for Slave Port to be Ready. */
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL1,
+				ROGUE_CR_META_SP_MSLVCTRL1_READY_EN |
+					ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN,
+				ROGUE_CR_META_SP_MSLVCTRL1_READY_EN |
+					ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN,
+				POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	/* Issue a Read. */
+	pvr_cr_write32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL0,
+		       reg_addr | ROGUE_CR_META_SP_MSLVCTRL0_RD_EN);
+	(void)pvr_cr_read32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL0); /* Fence write. */
+
+	/* Wait for Slave Port to be Ready. */
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_META_SP_MSLVCTRL1,
+				ROGUE_CR_META_SP_MSLVCTRL1_READY_EN |
+					ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN,
+				ROGUE_CR_META_SP_MSLVCTRL1_READY_EN |
+					ROGUE_CR_META_SP_MSLVCTRL1_GBLPORT_IDLE_EN,
+				POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	*reg_value_out = pvr_cr_read32(pvr_dev, ROGUE_CR_META_SP_MSLVDATAX);
+
+	return 0;
+}
+
+static int
+pvr_meta_wrapper_init(struct pvr_device *pvr_dev)
+{
+	u64 garten_config;
+
+	/* Configure META to Master boot. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_META_BOOT, ROGUE_CR_META_BOOT_MODE_EN);
+
+	/* Set Garten IDLE to META idle and Set the Garten Wrapper BIF Fence address. */
+
+	/* Garten IDLE bit controlled by META. */
+	garten_config = ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_META;
+
+	/* The fence addr is set during the fw init sequence. */
+
+	/* Set PC = 0 for fences. */
+	garten_config &=
+		ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_CLRMSK;
+	garten_config |=
+		(u64)MMU_CONTEXT_MAPPING_FWPRIV
+		<< ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_PC_BASE_SHIFT;
+
+	/* Set SLC DM=META. */
+	garten_config |= ((u64)ROGUE_FW_SEGMMU_META_BIFDM_ID)
+			 << ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_FENCE_DM_SHIFT;
+
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG, garten_config);
+
+	return 0;
+}
+
+static __always_inline void
+add_boot_arg(u32 **boot_conf, u32 param, u32 data)
+{
+	*(*boot_conf)++ = param;
+	*(*boot_conf)++ = data;
+}
+
+static int
+meta_ldr_cmd_loadmem(struct drm_device *drm_dev, const u8 *fw,
+		     struct rogue_meta_ldr_l1_data_blk *l1_data, u32 coremem_size, u8 *fw_code_ptr,
+		     u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr, const u32 fw_size)
+{
+	struct rogue_meta_ldr_l2_data_blk *l2_block =
+		(struct rogue_meta_ldr_l2_data_blk *)(fw +
+						      l1_data->cmd_data[1]);
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	u32 offset = l1_data->cmd_data[0];
+	u32 data_size;
+	void *write_addr;
+	int err;
+
+	/* Verify header is within bounds. */
+	if (((u8 *)l2_block - fw) >= fw_size || ((u8 *)(l2_block + 1) - fw) >= fw_size)
+		return -EINVAL;
+
+	data_size = l2_block->length - 6 /* L2 Tag length and checksum */;
+
+	/* Verify data is within bounds. */
+	if (((u8 *)l2_block->block_data - fw) >= fw_size ||
+	    ((((u8 *)l2_block->block_data) + data_size) - fw) >= fw_size)
+		return -EINVAL;
+
+	if (!ROGUE_META_IS_COREMEM_CODE(offset, coremem_size) &&
+	    !ROGUE_META_IS_COREMEM_DATA(offset, coremem_size)) {
+		/* Global range is aliased to local range */
+		offset &= ~META_MEM_GLOBAL_RANGE_BIT;
+	}
+
+	err = pvr_fw_find_mmu_segment(pvr_dev, offset, data_size, fw_code_ptr, fw_data_ptr,
+				      fw_core_code_ptr, fw_core_data_ptr, &write_addr);
+	if (err) {
+		drm_err(drm_dev,
+			"Addr 0x%x (size: %d) not found in any firmware segment",
+			offset, data_size);
+		return err;
+	}
+
+	memcpy(write_addr, l2_block->block_data, data_size);
+
+	return 0;
+}
+
+static int
+meta_ldr_cmd_zeromem(struct drm_device *drm_dev,
+		     struct rogue_meta_ldr_l1_data_blk *l1_data, u32 coremem_size,
+		     u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr)
+{
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	u32 offset = l1_data->cmd_data[0];
+	u32 byte_count = l1_data->cmd_data[1];
+	void *write_addr;
+	int err;
+
+	if (ROGUE_META_IS_COREMEM_DATA(offset, coremem_size)) {
+		/* cannot zero coremem directly */
+		return 0;
+	}
+
+	/* Global range is aliased to local range */
+	offset &= ~META_MEM_GLOBAL_RANGE_BIT;
+
+	err = pvr_fw_find_mmu_segment(pvr_dev, offset, byte_count, fw_code_ptr, fw_data_ptr,
+				      fw_core_code_ptr, fw_core_data_ptr, &write_addr);
+	if (err) {
+		drm_err(drm_dev,
+			"Addr 0x%x (size: %d) not found in any firmware segment",
+			offset, byte_count);
+		return err;
+	}
+
+	memset(write_addr, 0, byte_count);
+
+	return 0;
+}
+
+static int
+meta_ldr_cmd_config(struct drm_device *drm_dev, const u8 *fw,
+		    struct rogue_meta_ldr_l1_data_blk *l1_data,
+		    const u32 fw_size, u32 **boot_conf_ptr)
+{
+	struct rogue_meta_ldr_l2_data_blk *l2_block =
+		(struct rogue_meta_ldr_l2_data_blk *)(fw +
+						      l1_data->cmd_data[0]);
+	struct rogue_meta_ldr_cfg_blk *config_command;
+	u32 l2_block_size;
+	u32 curr_block_size = 0;
+	u32 *boot_conf = boot_conf_ptr ? *boot_conf_ptr : NULL;
+
+	/* Verify block header is within bounds. */
+	if (((u8 *)l2_block - fw) >= fw_size || ((u8 *)(l2_block + 1) - fw) >= fw_size)
+		return -EINVAL;
+
+	l2_block_size = l2_block->length - 6 /* L2 Tag length and checksum */;
+	config_command = (struct rogue_meta_ldr_cfg_blk *)l2_block->block_data;
+
+	if (((u8 *)config_command - fw) >= fw_size ||
+	    ((((u8 *)config_command) + l2_block_size) - fw) >= fw_size)
+		return -EINVAL;
+
+	while (l2_block_size >= 12) {
+		if (config_command->type != ROGUE_META_LDR_CFG_WRITE)
+			return -EINVAL;
+
+		/*
+		 * Only write to bootloader if we got a valid pointer to the FW
+		 * code allocation.
+		 */
+		if (boot_conf) {
+			u32 register_offset = config_command->block_data[0];
+			u32 register_value = config_command->block_data[1];
+
+			/* Do register write */
+			add_boot_arg(&boot_conf, register_offset,
+				     register_value);
+		}
+
+		curr_block_size = 12;
+		l2_block_size -= curr_block_size;
+		config_command = (struct rogue_meta_ldr_cfg_blk
+					  *)((uintptr_t)config_command +
+					     curr_block_size);
+	}
+
+	if (boot_conf_ptr)
+		*boot_conf_ptr = boot_conf;
+
+	return 0;
+}
+
+/**
+ * process_ldr_command_stream() - Process LDR firmware image and populate
+ *                                firmware sections
+ * @pvr_dev: Device pointer.
+ * @fw: Pointer to firmware image.
+ * @fw_code_ptr: Pointer to FW code section.
+ * @fw_data_ptr: Pointer to FW data section.
+ * @fw_core_code_ptr: Pointer to FW coremem code section.
+ * @fw_core_data_ptr: Pointer to FW coremem data section.
+ * @boot_conf_ptr: Pointer to boot config argument pointer.
+ *
+ * Returns :
+ *  * 0 on success, or
+ *  * -EINVAL on any error in LDR command stream.
+ */
+static int
+process_ldr_command_stream(struct pvr_device *pvr_dev, const u8 *fw, u8 *fw_code_ptr,
+			   u8 *fw_data_ptr, u8 *fw_core_code_ptr,
+			   u8 *fw_core_data_ptr, u32 **boot_conf_ptr)
+{
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	struct rogue_meta_ldr_block_hdr *ldr_header =
+		(struct rogue_meta_ldr_block_hdr *)fw;
+	struct rogue_meta_ldr_l1_data_blk *l1_data =
+		(struct rogue_meta_ldr_l1_data_blk *)(fw + ldr_header->sl_data);
+	const u32 fw_size = pvr_dev->fw_dev.firmware->size;
+	int err;
+
+	u32 *boot_conf = boot_conf_ptr ? *boot_conf_ptr : NULL;
+	u32 coremem_size;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, meta_coremem_size, &coremem_size);
+	if (err)
+		return err;
+
+	coremem_size *= SZ_1K;
+
+	while (l1_data) {
+		/* Verify block header is within bounds. */
+		if (((u8 *)l1_data - fw) >= fw_size || ((u8 *)(l1_data + 1) - fw) >= fw_size)
+			return -EINVAL;
+
+		if (ROGUE_META_LDR_BLK_IS_COMMENT(l1_data->cmd)) {
+			/* Don't process comment blocks */
+			goto next_block;
+		}
+
+		switch (l1_data->cmd & ROGUE_META_LDR_CMD_MASK)
+		case ROGUE_META_LDR_CMD_LOADMEM: {
+			err = meta_ldr_cmd_loadmem(drm_dev, fw, l1_data,
+						   coremem_size,
+						   fw_code_ptr, fw_data_ptr,
+						   fw_core_code_ptr,
+						   fw_core_data_ptr, fw_size);
+			if (err)
+				return err;
+			break;
+
+		case ROGUE_META_LDR_CMD_START_THREADS:
+			/* Don't process this block */
+			break;
+
+		case ROGUE_META_LDR_CMD_ZEROMEM:
+			err = meta_ldr_cmd_zeromem(drm_dev, l1_data,
+						   coremem_size,
+						   fw_code_ptr, fw_data_ptr,
+						   fw_core_code_ptr,
+						   fw_core_data_ptr);
+			if (err)
+				return err;
+			break;
+
+		case ROGUE_META_LDR_CMD_CONFIG:
+			err = meta_ldr_cmd_config(drm_dev, fw, l1_data, fw_size,
+						  &boot_conf);
+			if (err)
+				return err;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+next_block:
+		if (l1_data->next == 0xFFFFFFFF)
+			break;
+
+		l1_data = (struct rogue_meta_ldr_l1_data_blk *)(fw +
+								l1_data->next);
+	}
+
+	if (boot_conf_ptr)
+		*boot_conf_ptr = boot_conf;
+
+	return 0;
+}
+
+static void
+configure_seg_id(u64 seg_out_addr, u32 seg_base, u32 seg_limit, u32 seg_id,
+		 u32 **boot_conf_ptr)
+{
+	u32 seg_out_addr0 = seg_out_addr & 0x00000000FFFFFFFFUL;
+	u32 seg_out_addr1 = (seg_out_addr >> 32) & 0x00000000FFFFFFFFUL;
+	u32 *boot_conf = *boot_conf_ptr;
+
+	/* META segments have a minimum size. */
+	u32 limit_off = max(seg_limit, ROGUE_FW_SEGMMU_ALIGN);
+
+	/* The limit is an offset, therefore off = size - 1. */
+	limit_off -= 1;
+
+	seg_base |= ROGUE_FW_SEGMMU_ALLTHRS_WRITEABLE;
+
+	add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_BASE(seg_id), seg_base);
+	add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_LIMIT(seg_id), limit_off);
+	add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_OUTA0(seg_id), seg_out_addr0);
+	add_boot_arg(&boot_conf, META_CR_MMCU_SEGMENT_N_OUTA1(seg_id), seg_out_addr1);
+
+	*boot_conf_ptr = boot_conf;
+}
+
+static u64 get_fw_obj_gpu_addr(struct pvr_fw_object *fw_obj)
+{
+	struct pvr_device *pvr_dev = to_pvr_device(gem_from_pvr_gem(fw_obj->gem)->dev);
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+
+	return fw_obj->fw_addr_offset + fw_dev->fw_heap_info.gpu_addr;
+}
+
+static void
+configure_seg_mmu(struct pvr_device *pvr_dev, u32 **boot_conf_ptr)
+{
+	const struct pvr_fw_layout_entry *layout_entries = pvr_dev->fw_dev.layout_entries;
+	u32 num_layout_entries = pvr_dev->fw_dev.header->layout_entry_num;
+	u64 seg_out_addr_top;
+	u32 i;
+
+	seg_out_addr_top =
+		ROGUE_FW_SEGMMU_OUTADDR_TOP_SLC(MMU_CONTEXT_MAPPING_FWPRIV,
+						ROGUE_FW_SEGMMU_META_BIFDM_ID);
+
+	for (i = 0; i < num_layout_entries; i++) {
+		/*
+		 * FW code is using the bootloader segment which is already
+		 * configured on boot. FW coremem code and data don't use the
+		 * segment MMU. Only the FW data segment needs to be configured.
+		 */
+		if (layout_entries[i].type == FW_DATA) {
+			u32 seg_id = ROGUE_FW_SEGMMU_DATA_ID;
+			u64 seg_out_addr = get_fw_obj_gpu_addr(pvr_dev->fw_dev.mem.data_obj);
+
+			seg_out_addr += layout_entries[i].alloc_offset;
+			seg_out_addr |= seg_out_addr_top;
+
+			/* Write the sequence to the bootldr. */
+			configure_seg_id(seg_out_addr,
+					 layout_entries[i].base_addr,
+					 layout_entries[i].alloc_size, seg_id,
+					 boot_conf_ptr);
+
+			break;
+		}
+	}
+}
+
+static void
+configure_meta_caches(u32 **boot_conf_ptr)
+{
+	u32 *boot_conf = *boot_conf_ptr;
+	u32 d_cache_t0, i_cache_t0;
+	u32 d_cache_t1, i_cache_t1;
+	u32 d_cache_t2, i_cache_t2;
+	u32 d_cache_t3, i_cache_t3;
+
+	/* Initialise I/Dcache settings */
+	d_cache_t0 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE;
+	d_cache_t1 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE;
+	d_cache_t2 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE;
+	d_cache_t3 = META_CR_SYSC_DCPARTX_CACHED_WRITE_ENABLE;
+	i_cache_t0 = 0;
+	i_cache_t1 = 0;
+	i_cache_t2 = 0;
+	i_cache_t3 = 0;
+
+	d_cache_t0 |= META_CR_SYSC_XCPARTX_LOCAL_ADDR_FULL_CACHE;
+	i_cache_t0 |= META_CR_SYSC_XCPARTX_LOCAL_ADDR_FULL_CACHE;
+
+	/* Local region MMU enhanced bypass: WIN-3 mode for code and data caches */
+	add_boot_arg(&boot_conf, META_CR_MMCU_LOCAL_EBCTRL,
+		     META_CR_MMCU_LOCAL_EBCTRL_ICWIN |
+			     META_CR_MMCU_LOCAL_EBCTRL_DCWIN);
+
+	/* Data cache partitioning thread 0 to 3 */
+	add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(0), d_cache_t0);
+	add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(1), d_cache_t1);
+	add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(2), d_cache_t2);
+	add_boot_arg(&boot_conf, META_CR_SYSC_DCPART(3), d_cache_t3);
+
+	/* Enable data cache hits */
+	add_boot_arg(&boot_conf, META_CR_MMCU_DCACHE_CTRL,
+		     META_CR_MMCU_XCACHE_CTRL_CACHE_HITS_EN);
+
+	/* Instruction cache partitioning thread 0 to 3 */
+	add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(0), i_cache_t0);
+	add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(1), i_cache_t1);
+	add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(2), i_cache_t2);
+	add_boot_arg(&boot_conf, META_CR_SYSC_ICPART(3), i_cache_t3);
+
+	/* Enable instruction cache hits */
+	add_boot_arg(&boot_conf, META_CR_MMCU_ICACHE_CTRL,
+		     META_CR_MMCU_XCACHE_CTRL_CACHE_HITS_EN);
+
+	add_boot_arg(&boot_conf, 0x040000C0, 0);
+
+	*boot_conf_ptr = boot_conf;
+}
+
+static int
+pvr_meta_fw_process(struct pvr_device *pvr_dev, const u8 *fw,
+		    u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr,
+		    u32 core_code_alloc_size)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	u32 *boot_conf;
+	int err;
+
+	boot_conf = ((u32 *)fw_code_ptr) + ROGUE_FW_BOOTLDR_CONF_OFFSET;
+
+	/* Slave port and JTAG accesses are privileged. */
+	add_boot_arg(&boot_conf, META_CR_SYSC_JTAG_THREAD,
+		     META_CR_SYSC_JTAG_THREAD_PRIV_EN);
+
+	configure_seg_mmu(pvr_dev, &boot_conf);
+
+	/* Populate FW sections from LDR image. */
+	err = process_ldr_command_stream(pvr_dev, fw, fw_code_ptr, fw_data_ptr, fw_core_code_ptr,
+					 fw_core_data_ptr, &boot_conf);
+	if (err)
+		return err;
+
+	configure_meta_caches(&boot_conf);
+
+	/* End argument list. */
+	add_boot_arg(&boot_conf, 0, 0);
+
+	if (fw_dev->mem.core_code_obj) {
+		u32 core_code_fw_addr;
+
+		pvr_fw_object_get_fw_addr(fw_dev->mem.core_code_obj, &core_code_fw_addr);
+		add_boot_arg(&boot_conf, core_code_fw_addr, core_code_alloc_size);
+	} else {
+		add_boot_arg(&boot_conf, 0, 0);
+	}
+	/* None of the cores supported by this driver have META DMA. */
+	add_boot_arg(&boot_conf, 0, 0);
+
+	return 0;
+}
+
+static int
+pvr_meta_init(struct pvr_device *pvr_dev)
+{
+	pvr_fw_heap_info_init(pvr_dev, ROGUE_FW_HEAP_META_SHIFT, 0);
+
+	return 0;
+}
+
+static u32
+pvr_meta_get_fw_addr_with_offset(struct pvr_fw_object *fw_obj, u32 offset)
+{
+	u32 fw_addr = fw_obj->fw_addr_offset + offset + ROGUE_FW_SEGMMU_DATA_BASE_ADDRESS;
+
+	/* META cacheability is determined by address. */
+	if (fw_obj->gem->flags & PVR_BO_FW_FLAGS_DEVICE_UNCACHED)
+		fw_addr |= ROGUE_FW_SEGMMU_DATA_META_UNCACHED |
+			   ROGUE_FW_SEGMMU_DATA_VIVT_SLC_UNCACHED;
+
+	return fw_addr;
+}
+
+static int
+pvr_meta_vm_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
+{
+	struct pvr_gem_object *pvr_obj = fw_obj->gem;
+
+	return pvr_vm_map(pvr_dev->kernel_vm_ctx, pvr_obj, 0, fw_obj->fw_mm_node.start,
+			  pvr_gem_object_size(pvr_obj));
+}
+
+static void
+pvr_meta_vm_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
+{
+	pvr_vm_unmap(pvr_dev->kernel_vm_ctx, fw_obj->fw_mm_node.start,
+		     fw_obj->fw_mm_node.size);
+}
+
+static bool
+pvr_meta_has_fixed_data_addr(void)
+{
+	return false;
+}
+
+const struct pvr_fw_defs pvr_fw_defs_meta = {
+	.init = pvr_meta_init,
+	.fw_process = pvr_meta_fw_process,
+	.vm_map = pvr_meta_vm_map,
+	.vm_unmap = pvr_meta_vm_unmap,
+	.get_fw_addr_with_offset = pvr_meta_get_fw_addr_with_offset,
+	.wrapper_init = pvr_meta_wrapper_init,
+	.has_fixed_data_addr = pvr_meta_has_fixed_data_addr,
+	.irq = {
+		.enable_reg = ROGUE_CR_META_SP_MSLVIRQENABLE,
+		.status_reg = ROGUE_CR_META_SP_MSLVIRQSTATUS,
+		.clear_reg = ROGUE_CR_META_SP_MSLVIRQSTATUS,
+		.event_mask = ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_EN,
+		.clear_mask = ROGUE_CR_META_SP_MSLVIRQSTATUS_TRIGVECT2_CLRMSK,
+	},
+};
diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.h b/drivers/gpu/drm/imagination/pvr_fw_meta.h
new file mode 100644
index 0000000000000..911ad700cba66
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_meta.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FW_META_H
+#define PVR_FW_META_H
+
+#include <linux/types.h>
+
+/* Forward declaration from pvr_device.h */
+struct pvr_device;
+
+int pvr_meta_cr_read32(struct pvr_device *pvr_dev, u32 reg_addr, u32 *reg_value_out);
+
+#endif /* PVR_FW_META_H */
diff --git a/drivers/gpu/drm/imagination/pvr_fw_startstop.c b/drivers/gpu/drm/imagination/pvr_fw_startstop.c
new file mode 100644
index 0000000000000..36cec227cfe3c
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_startstop.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_fw_meta.h"
+#include "pvr_fw_startstop.h"
+#include "pvr_rogue_cr_defs.h"
+#include "pvr_rogue_meta.h"
+#include "pvr_vm.h"
+
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/ktime.h>
+#include <linux/types.h>
+
+#define POLL_TIMEOUT_USEC 1000000
+
+static void
+rogue_axi_ace_list_init(struct pvr_device *pvr_dev)
+{
+	/* Setup AXI-ACE config. Set everything to outer cache. */
+	u64 reg_val =
+		(3U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_NON_SNOOPING_SHIFT) |
+		(3U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_NON_SNOOPING_SHIFT) |
+		(2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_CACHE_MAINTENANCE_SHIFT) |
+		(2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWDOMAIN_COHERENT_SHIFT) |
+		(2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARDOMAIN_COHERENT_SHIFT) |
+		(2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_AWCACHE_COHERENT_SHIFT) |
+		(2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_COHERENT_SHIFT) |
+		(2U << ROGUE_CR_AXI_ACE_LITE_CONFIGURATION_ARCACHE_CACHE_MAINTENANCE_SHIFT);
+
+	pvr_cr_write64(pvr_dev, ROGUE_CR_AXI_ACE_LITE_CONFIGURATION, reg_val);
+}
+
+static void
+rogue_bif_init(struct pvr_device *pvr_dev)
+{
+	dma_addr_t pc_dma_addr;
+	u64 pc_addr;
+
+	/* Acquire the address of the Kernel Page Catalogue. */
+	pc_dma_addr = pvr_vm_get_page_table_root_addr(pvr_dev->kernel_vm_ctx);
+
+	/* Write the kernel catalogue base. */
+	pc_addr = ((((u64)pc_dma_addr >> ROGUE_CR_BIF_CAT_BASE0_ADDR_ALIGNSHIFT)
+		    << ROGUE_CR_BIF_CAT_BASE0_ADDR_SHIFT) &
+		   ~ROGUE_CR_BIF_CAT_BASE0_ADDR_CLRMSK);
+
+	pvr_cr_write64(pvr_dev, BIF_CAT_BASEX(MMU_CONTEXT_MAPPING_FWPRIV),
+		       pc_addr);
+}
+
+static int
+rogue_slc_init(struct pvr_device *pvr_dev)
+{
+	u16 slc_cache_line_size_bits;
+	u32 reg_val;
+	int err;
+
+	/*
+	 * SLC Misc control.
+	 *
+	 * Note: This is a 64bit register and we set only the lower 32bits
+	 *       leaving the top 32bits (ROGUE_CR_SLC_CTRL_MISC_SCRAMBLE_BITS)
+	 *       unchanged from the HW default.
+	 */
+	reg_val = (pvr_cr_read32(pvr_dev, ROGUE_CR_SLC_CTRL_MISC) &
+		      ROGUE_CR_SLC_CTRL_MISC_ENABLE_PSG_HAZARD_CHECK_EN) |
+		     ROGUE_CR_SLC_CTRL_MISC_ADDR_DECODE_MODE_PVR_HASH1;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, slc_cache_line_size_bits, &slc_cache_line_size_bits);
+	if (err)
+		return err;
+
+	/* Bypass burst combiner if SLC line size is smaller than 1024 bits. */
+	if (slc_cache_line_size_bits < 1024)
+		reg_val |= ROGUE_CR_SLC_CTRL_MISC_BYPASS_BURST_COMBINER_EN;
+
+	if (PVR_HAS_QUIRK(pvr_dev, 71242) && !PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support))
+		reg_val |= ROGUE_CR_SLC_CTRL_MISC_LAZYWB_OVERRIDE_EN;
+
+	pvr_cr_write32(pvr_dev, ROGUE_CR_SLC_CTRL_MISC, reg_val);
+
+	return 0;
+}
+
+/**
+ * pvr_fw_start() - Start FW processor and boot firmware
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by rogue_slc_init().
+ */
+int
+pvr_fw_start(struct pvr_device *pvr_dev)
+{
+	bool has_reset2 = PVR_HAS_FEATURE(pvr_dev, xe_tpu2);
+	u64 soft_reset_mask;
+	int err;
+
+	if (PVR_HAS_FEATURE(pvr_dev, pbe2_in_xe))
+		soft_reset_mask = ROGUE_CR_SOFT_RESET__PBE2_XE__MASKFULL;
+	else
+		soft_reset_mask = ROGUE_CR_SOFT_RESET_MASKFULL;
+
+	if (PVR_HAS_FEATURE(pvr_dev, sys_bus_secure_reset)) {
+		/*
+		 * Disable the default sys_bus_secure protection to perform
+		 * minimal setup.
+		 */
+		pvr_cr_write32(pvr_dev, ROGUE_CR_SYS_BUS_SECURE, 0);
+		(void)pvr_cr_read32(pvr_dev, ROGUE_CR_SYS_BUS_SECURE); /* Fence write */
+	}
+
+	/* Set Rogue in soft-reset. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, soft_reset_mask);
+	if (has_reset2)
+		pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET2, ROGUE_CR_SOFT_RESET2_MASKFULL);
+
+	/* Read soft-reset to fence previous write in order to clear the SOCIF pipeline. */
+	(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET);
+	if (has_reset2)
+		(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET2);
+
+	/* Take Rascal and Dust out of reset. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET,
+		       soft_reset_mask ^ ROGUE_CR_SOFT_RESET_RASCALDUSTS_EN);
+	if (has_reset2)
+		pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET2, 0);
+
+	(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET);
+	if (has_reset2)
+		(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET2);
+
+	/* Take everything out of reset but the FW processor. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, ROGUE_CR_SOFT_RESET_GARTEN_EN);
+	if (has_reset2)
+		pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET2, 0);
+
+	(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET);
+	if (has_reset2)
+		(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET2);
+
+	err = rogue_slc_init(pvr_dev);
+	if (err)
+		goto err_reset;
+
+	/* Initialise Firmware wrapper. */
+	pvr_dev->fw_dev.defs->wrapper_init(pvr_dev);
+
+	/* We must init the AXI-ACE interface before first BIF transaction. */
+	rogue_axi_ace_list_init(pvr_dev);
+
+	if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
+		/* Initialise BIF. */
+		rogue_bif_init(pvr_dev);
+	}
+
+	/* Need to wait for at least 16 cycles before taking the FW processor out of reset ... */
+	udelay(3);
+
+	pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, 0x0);
+	(void)pvr_cr_read64(pvr_dev, ROGUE_CR_SOFT_RESET);
+
+	/* ... and afterwards. */
+	udelay(3);
+
+	return 0;
+
+err_reset:
+	/* Put everything back into soft-reset. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, soft_reset_mask);
+
+	return err;
+}
+
+/**
+ * pvr_fw_stop() - Stop FW processor
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_cr_poll_reg32().
+ */
+int
+pvr_fw_stop(struct pvr_device *pvr_dev)
+{
+	const u32 sidekick_idle_mask = ROGUE_CR_SIDEKICK_IDLE_MASKFULL &
+				       ~(ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN |
+					 ROGUE_CR_SIDEKICK_IDLE_SOCIF_EN |
+					 ROGUE_CR_SIDEKICK_IDLE_HOSTIF_EN);
+	bool skip_garten_idle = false;
+	u32 reg_value;
+	int err;
+
+	/*
+	 * Wait for Sidekick/Jones to signal IDLE except for the Garten Wrapper.
+	 * For cores with the LAYOUT_MARS feature, SIDEKICK would have been
+	 * powered down by the FW.
+	 */
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SIDEKICK_IDLE, sidekick_idle_mask,
+				sidekick_idle_mask, POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	/* Unset MTS DM association with threads. */
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC,
+		       ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_MASKFULL &
+		       ROGUE_CR_MTS_INTCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK);
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC,
+		       ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_MASKFULL &
+		       ROGUE_CR_MTS_BGCTX_THREAD0_DM_ASSOC_DM_ASSOC_CLRMSK);
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC,
+		       ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_MASKFULL &
+		       ROGUE_CR_MTS_INTCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK);
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC,
+		       ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_MASKFULL &
+		       ROGUE_CR_MTS_BGCTX_THREAD1_DM_ASSOC_DM_ASSOC_CLRMSK);
+
+	/* Extra Idle checks. */
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIF_STATUS_MMU, 0,
+				ROGUE_CR_BIF_STATUS_MMU_MASKFULL,
+				POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIFPM_STATUS_MMU, 0,
+				ROGUE_CR_BIFPM_STATUS_MMU_MASKFULL,
+				POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	if (!PVR_HAS_FEATURE(pvr_dev, xt_top_infrastructure)) {
+		err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIF_READS_EXT_STATUS, 0,
+					ROGUE_CR_BIF_READS_EXT_STATUS_MASKFULL,
+					POLL_TIMEOUT_USEC);
+		if (err)
+			return err;
+	}
+
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_BIFPM_READS_EXT_STATUS, 0,
+				ROGUE_CR_BIFPM_READS_EXT_STATUS_MASKFULL,
+				POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	err = pvr_cr_poll_reg64(pvr_dev, ROGUE_CR_SLC_STATUS1, 0,
+				ROGUE_CR_SLC_STATUS1_MASKFULL,
+				POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	/*
+	 * Wait for SLC to signal IDLE.
+	 * For cores with the LAYOUT_MARS feature, SLC would have been powered
+	 * down by the FW.
+	 */
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SLC_IDLE,
+				ROGUE_CR_SLC_IDLE_MASKFULL,
+				ROGUE_CR_SLC_IDLE_MASKFULL, POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	/*
+	 * Wait for Sidekick/Jones to signal IDLE except for the Garten Wrapper.
+	 * For cores with the LAYOUT_MARS feature, SIDEKICK would have been powered
+	 * down by the FW.
+	 */
+	err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SIDEKICK_IDLE, sidekick_idle_mask,
+				sidekick_idle_mask, POLL_TIMEOUT_USEC);
+	if (err)
+		return err;
+
+	if (pvr_dev->fw_dev.processor_type == PVR_FW_PROCESSOR_TYPE_META) {
+		err = pvr_meta_cr_read32(pvr_dev, META_CR_TxVECINT_BHALT, &reg_value);
+		if (err)
+			return err;
+
+		/*
+		 * Wait for Sidekick/Jones to signal IDLE including the Garten
+		 * Wrapper if there is no debugger attached (TxVECINT_BHALT =
+		 * 0x0).
+		 */
+		if (reg_value)
+			skip_garten_idle = true;
+	}
+
+	if (!skip_garten_idle) {
+		err = pvr_cr_poll_reg32(pvr_dev, ROGUE_CR_SIDEKICK_IDLE,
+					ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN,
+					ROGUE_CR_SIDEKICK_IDLE_GARTEN_EN,
+					POLL_TIMEOUT_USEC);
+		if (err)
+			return err;
+	}
+
+	if (PVR_HAS_FEATURE(pvr_dev, pbe2_in_xe))
+		pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET,
+			       ROGUE_CR_SOFT_RESET__PBE2_XE__MASKFULL);
+	else
+		pvr_cr_write64(pvr_dev, ROGUE_CR_SOFT_RESET, ROGUE_CR_SOFT_RESET_MASKFULL);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_fw_startstop.h b/drivers/gpu/drm/imagination/pvr_fw_startstop.h
new file mode 100644
index 0000000000000..a3cef061bd601
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_startstop.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FW_STARTSTOP_H
+#define PVR_FW_STARTSTOP_H
+
+/* Forward declaration from pvr_device.h. */
+struct pvr_device;
+
+int pvr_fw_start(struct pvr_device *pvr_dev);
+int pvr_fw_stop(struct pvr_device *pvr_dev);
+
+#endif /* PVR_FW_STARTSTOP_H */
diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c
new file mode 100644
index 0000000000000..0a8340369f1ac
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_fw_trace.h"
+
+#include <drm/drm_file.h>
+
+#include <linux/build_bug.h>
+#include <linux/dcache.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+static void
+tracebuf_ctrl_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_tracebuf *tracebuf_ctrl = cpu_ptr;
+	struct pvr_fw_trace *fw_trace = priv;
+	u32 thread_nr;
+
+	tracebuf_ctrl->tracebuf_size_in_dwords = ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS;
+	tracebuf_ctrl->tracebuf_flags = 0;
+
+	if (fw_trace->group_mask)
+		tracebuf_ctrl->log_type = fw_trace->group_mask | ROGUE_FWIF_LOG_TYPE_TRACE;
+	else
+		tracebuf_ctrl->log_type = ROGUE_FWIF_LOG_TYPE_NONE;
+
+	for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) {
+		struct rogue_fwif_tracebuf_space *tracebuf_space =
+			&tracebuf_ctrl->tracebuf[thread_nr];
+		struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr];
+
+		pvr_fw_object_get_fw_addr(trace_buffer->buf_obj,
+					  &tracebuf_space->trace_buffer_fw_addr);
+
+		tracebuf_space->trace_buffer = trace_buffer->buf;
+		tracebuf_space->trace_pointer = 0;
+	}
+}
+
+int pvr_fw_trace_init(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace;
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	u32 thread_nr;
+	int err;
+
+	for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) {
+		struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr];
+
+		trace_buffer->buf =
+			pvr_fw_object_create_and_map(pvr_dev,
+						     ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS *
+						     sizeof(*trace_buffer->buf),
+						     PVR_BO_FW_FLAGS_DEVICE_UNCACHED |
+						     PVR_BO_FW_NO_CLEAR_ON_RESET,
+						     NULL, NULL, &trace_buffer->buf_obj);
+		if (IS_ERR(trace_buffer->buf)) {
+			drm_err(drm_dev, "Unable to allocate trace buffer\n");
+			err = PTR_ERR(trace_buffer->buf);
+			trace_buffer->buf = NULL;
+			goto err_free_buf;
+		}
+	}
+
+	/* TODO: Provide control of group mask. */
+	fw_trace->group_mask = 0;
+
+	fw_trace->tracebuf_ctrl =
+		pvr_fw_object_create_and_map(pvr_dev,
+					     sizeof(*fw_trace->tracebuf_ctrl),
+					     PVR_BO_FW_FLAGS_DEVICE_UNCACHED |
+					     PVR_BO_FW_NO_CLEAR_ON_RESET,
+					     tracebuf_ctrl_init, fw_trace,
+					     &fw_trace->tracebuf_ctrl_obj);
+	if (IS_ERR(fw_trace->tracebuf_ctrl)) {
+		drm_err(drm_dev, "Unable to allocate trace buffer control structure\n");
+		err = PTR_ERR(fw_trace->tracebuf_ctrl);
+		goto err_free_buf;
+	}
+
+	BUILD_BUG_ON(ARRAY_SIZE(fw_trace->tracebuf_ctrl->tracebuf) !=
+		     ARRAY_SIZE(fw_trace->buffers));
+
+	for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) {
+		struct rogue_fwif_tracebuf_space *tracebuf_space =
+			&fw_trace->tracebuf_ctrl->tracebuf[thread_nr];
+		struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr];
+
+		trace_buffer->tracebuf_space = tracebuf_space;
+	}
+
+	return 0;
+
+err_free_buf:
+	for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) {
+		struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr];
+
+		if (trace_buffer->buf)
+			pvr_fw_object_unmap_and_destroy(trace_buffer->buf_obj);
+	}
+
+	return err;
+}
+
+void pvr_fw_trace_fini(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace;
+	u32 thread_nr;
+
+	for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); thread_nr++) {
+		struct pvr_fw_trace_buffer *trace_buffer = &fw_trace->buffers[thread_nr];
+
+		pvr_fw_object_unmap_and_destroy(trace_buffer->buf_obj);
+	}
+	pvr_fw_object_unmap_and_destroy(fw_trace->tracebuf_ctrl_obj);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.h b/drivers/gpu/drm/imagination/pvr_fw_trace.h
new file mode 100644
index 0000000000000..0074d2b18da0b
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FW_TRACE_H
+#define PVR_FW_TRACE_H
+
+#include <drm/drm_file.h>
+#include <linux/types.h>
+
+#include "pvr_rogue_fwif.h"
+
+/* Forward declaration from pvr_device.h. */
+struct pvr_device;
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+/* Forward declarations from pvr_rogue_fwif.h */
+struct rogue_fwif_tracebuf;
+struct rogue_fwif_tracebuf_space;
+
+/**
+ * struct pvr_fw_trace_buffer - Structure representing a trace buffer
+ */
+struct pvr_fw_trace_buffer {
+	/** @buf_obj: FW buffer object representing trace buffer. */
+	struct pvr_fw_object *buf_obj;
+
+	/** @buf: Pointer to CPU mapping of trace buffer. */
+	u32 *buf;
+
+	/**
+	 * @tracebuf_space: Pointer to FW tracebuf_space structure for this
+	 *                  trace buffer.
+	 */
+	struct rogue_fwif_tracebuf_space *tracebuf_space;
+};
+
+/**
+ * struct pvr_fw_trace - Device firmware trace data
+ */
+struct pvr_fw_trace {
+	/**
+	 * @tracebuf_ctrl_obj: Object representing FW trace buffer control
+	 *                     structure.
+	 */
+	struct pvr_fw_object *tracebuf_ctrl_obj;
+
+	/**
+	 * @tracebuf_ctrl: Pointer to CPU mapping of FW trace buffer control
+	 *                 structure.
+	 */
+	struct rogue_fwif_tracebuf *tracebuf_ctrl;
+
+	/**
+	 * @buffers: Array representing the actual trace buffers owned by this
+	 *           device.
+	 */
+	struct pvr_fw_trace_buffer buffers[ROGUE_FW_THREAD_MAX];
+
+	/** @group_mask: Mask of enabled trace groups. */
+	u32 group_mask;
+};
+
+int pvr_fw_trace_init(struct pvr_device *pvr_dev);
+void pvr_fw_trace_fini(struct pvr_device *pvr_dev);
+
+#if defined(CONFIG_DEBUG_FS)
+/* Forward declaration from <linux/dcache.h>. */
+struct dentry;
+
+void pvr_fw_trace_mask_update(struct pvr_device *pvr_dev, u32 old_mask,
+			      u32 new_mask);
+
+void pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir);
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+#endif /* PVR_FW_TRACE_H */
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c
index 7f77e03df9125..b71d30e5f3805 100644
--- a/drivers/gpu/drm/imagination/pvr_mmu.c
+++ b/drivers/gpu/drm/imagination/pvr_mmu.c
@@ -3,9 +3,11 @@
 
 #include "pvr_mmu.h"
 
+#include "pvr_ccb.h"
 #include "pvr_device.h"
 #include "pvr_fw.h"
 #include "pvr_gem.h"
+#include "pvr_power.h"
 #include "pvr_rogue_fwif.h"
 #include "pvr_rogue_mmu_defs.h"
 
@@ -95,7 +97,7 @@ static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
  */
 void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
 {
-	/* TODO: implement */
+	pvr_mmu_set_flush_flags(pvr_dev, PVR_MMU_SYNC_LEVEL_2_FLAGS);
 }
 
 /**
@@ -120,8 +122,70 @@ void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
  */
 int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
 {
-	/* TODO: implement */
-	return -ENODEV;
+	struct rogue_fwif_kccb_cmd cmd_mmu_cache = {};
+	struct rogue_fwif_mmucachedata *cmd_mmu_cache_data =
+		&cmd_mmu_cache.cmd_data.mmu_cache_data;
+	int err = 0;
+	u32 slot;
+	int idx;
+
+	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx))
+		return -EIO;
+
+	/* Can't flush MMU if the firmware hasn't booted yet. */
+	if (!pvr_dev->fw_dev.booted)
+		goto err_drm_dev_exit;
+
+	cmd_mmu_cache_data->cache_flags =
+		atomic_xchg(&pvr_dev->mmu_flush_cache_flags, 0);
+
+	if (!cmd_mmu_cache_data->cache_flags)
+		goto err_drm_dev_exit;
+
+	cmd_mmu_cache.cmd_type = ROGUE_FWIF_KCCB_CMD_MMUCACHE;
+
+	pvr_fw_object_get_fw_addr(pvr_dev->fw_dev.mem.mmucache_sync_obj,
+				  &cmd_mmu_cache_data->mmu_cache_sync_fw_addr);
+	cmd_mmu_cache_data->mmu_cache_sync_update_value = 0;
+
+	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
+	if (err)
+		goto err_reset_and_retry;
+
+	err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
+	if (err)
+		goto err_reset_and_retry;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_reset_and_retry:
+	/*
+	 * Flush command failure is most likely the result of a firmware lockup. Hard
+	 * reset the GPU and retry.
+	 */
+	err = pvr_power_reset(pvr_dev, true);
+	if (err)
+		goto err_drm_dev_exit; /* Device is lost. */
+
+	/* Retry sending flush request. */
+	err = pvr_kccb_send_cmd(pvr_dev, &cmd_mmu_cache, &slot);
+	if (err) {
+		pvr_device_lost(pvr_dev);
+		goto err_drm_dev_exit;
+	}
+
+	if (wait) {
+		err = pvr_kccb_wait_for_completion(pvr_dev, slot, HZ, NULL);
+		if (err)
+			pvr_device_lost(pvr_dev);
+	}
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c
index 88f14a4d31aba..5f7cb7beb8792 100644
--- a/drivers/gpu/drm/imagination/pvr_power.c
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -3,6 +3,7 @@
 
 #include "pvr_device.h"
 #include "pvr_fw.h"
+#include "pvr_fw_startstop.h"
 #include "pvr_power.h"
 #include "pvr_rogue_fwif.h"
 
@@ -21,11 +22,38 @@
 
 #define WATCHDOG_TIME_MS (500)
 
+/**
+ * pvr_device_lost() - Mark GPU device as lost
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This will cause the DRM device to be unplugged.
+ */
+void
+pvr_device_lost(struct pvr_device *pvr_dev)
+{
+	if (!pvr_dev->lost) {
+		pvr_dev->lost = true;
+		drm_dev_unplug(from_pvr_device(pvr_dev));
+	}
+}
+
 static int
 pvr_power_send_command(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *pow_cmd)
 {
-	/* TODO: implement */
-	return -ENODEV;
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	u32 slot_nr;
+	u32 value;
+	int err;
+
+	WRITE_ONCE(*fw_dev->power_sync, 0);
+
+	err = pvr_kccb_send_cmd_powered(pvr_dev, pow_cmd, &slot_nr);
+	if (err)
+		return err;
+
+	/* Wait for FW to acknowledge. */
+	return readl_poll_timeout(pvr_dev->fw_dev.power_sync, value, value != 0, 100,
+				  POWER_SYNC_TIMEOUT_US);
 }
 
 static int
@@ -71,8 +99,7 @@ pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset)
 			return err;
 	}
 
-	/* TODO: stop firmware */
-	return -ENODEV;
+	return pvr_fw_stop(pvr_dev);
 }
 
 static int
@@ -80,11 +107,17 @@ pvr_power_fw_enable(struct pvr_device *pvr_dev)
 {
 	int err;
 
-	/* TODO: start firmware */
-	err = -ENODEV;
+	err = pvr_fw_start(pvr_dev);
 	if (err)
 		return err;
 
+	err = pvr_wait_for_fw_boot(pvr_dev);
+	if (err) {
+		drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n");
+		pvr_fw_stop(pvr_dev);
+		return err;
+	}
+
 	queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work,
 			   msecs_to_jiffies(WATCHDOG_TIME_MS));
 
@@ -94,14 +127,39 @@ pvr_power_fw_enable(struct pvr_device *pvr_dev)
 bool
 pvr_power_is_idle(struct pvr_device *pvr_dev)
 {
-	/* TODO: implement */
-	return true;
+	/*
+	 * FW power state can be out of date if a KCCB command has been submitted but the FW hasn't
+	 * started processing it yet. So also check the KCCB status.
+	 */
+	enum rogue_fwif_pow_state pow_state = READ_ONCE(pvr_dev->fw_dev.fwif_sysdata->pow_state);
+	bool kccb_idle = pvr_kccb_is_idle(pvr_dev);
+
+	return (pow_state == ROGUE_FWIF_POW_IDLE) && kccb_idle;
 }
 
 static bool
 pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
 {
-	/* TODO: implement */
+	/* Check KCCB commands are progressing. */
+	u32 kccb_cmds_executed = pvr_dev->fw_dev.fwif_osdata->kccb_cmds_executed;
+	bool kccb_is_idle = pvr_kccb_is_idle(pvr_dev);
+
+	if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed && !kccb_is_idle) {
+		pvr_dev->watchdog.kccb_stall_count++;
+
+		/*
+		 * If we have commands pending with no progress for 2 consecutive polls then
+		 * consider KCCB command processing stalled.
+		 */
+		if (pvr_dev->watchdog.kccb_stall_count == 2) {
+			pvr_dev->watchdog.kccb_stall_count = 0;
+			return true;
+		}
+	} else {
+		pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed;
+		pvr_dev->watchdog.kccb_stall_count = 0;
+	}
+
 	return false;
 }
 
@@ -118,6 +176,9 @@ pvr_watchdog_worker(struct work_struct *work)
 	if (pm_runtime_get_if_in_use(from_pvr_device(pvr_dev)->dev) <= 0)
 		goto out_requeue;
 
+	if (!pvr_dev->fw_dev.booted)
+		goto out_pm_runtime_put;
+
 	stalled = pvr_watchdog_kccb_stalled(pvr_dev);
 
 	if (stalled) {
@@ -127,6 +188,7 @@ pvr_watchdog_worker(struct work_struct *work)
 		/* Device may be lost at this point. */
 	}
 
+out_pm_runtime_put:
 	pm_runtime_put(from_pvr_device(pvr_dev)->dev);
 
 out_requeue:
@@ -158,18 +220,26 @@ pvr_power_device_suspend(struct device *dev)
 	struct platform_device *plat_dev = to_platform_device(dev);
 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	int err = 0;
 	int idx;
 
 	if (!drm_dev_enter(drm_dev, &idx))
 		return -EIO;
 
+	if (pvr_dev->fw_dev.booted) {
+		err = pvr_power_fw_disable(pvr_dev, false);
+		if (err)
+			goto err_drm_dev_exit;
+	}
+
 	clk_disable_unprepare(pvr_dev->mem_clk);
 	clk_disable_unprepare(pvr_dev->sys_clk);
 	clk_disable_unprepare(pvr_dev->core_clk);
 
+err_drm_dev_exit:
 	drm_dev_exit(idx);
 
-	return 0;
+	return err;
 }
 
 int
@@ -196,10 +266,19 @@ pvr_power_device_resume(struct device *dev)
 	if (err)
 		goto err_sys_clk_disable;
 
+	if (pvr_dev->fw_dev.booted) {
+		err = pvr_power_fw_enable(pvr_dev);
+		if (err)
+			goto err_mem_clk_disable;
+	}
+
 	drm_dev_exit(idx);
 
 	return 0;
 
+err_mem_clk_disable:
+	clk_disable_unprepare(pvr_dev->mem_clk);
+
 err_sys_clk_disable:
 	clk_disable_unprepare(pvr_dev->sys_clk);
 
@@ -239,7 +318,6 @@ pvr_power_device_idle(struct device *dev)
 int
 pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
 {
-	/* TODO: Implement hard reset. */
 	int err;
 
 	/*
@@ -248,13 +326,69 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
 	 */
 	WARN_ON(pvr_power_get(pvr_dev));
 
-	err = pvr_power_fw_disable(pvr_dev, false);
-	if (err)
-		goto err_power_put;
+	down_write(&pvr_dev->reset_sem);
+
+	if (pvr_dev->lost) {
+		err = -EIO;
+		goto err_up_write;
+	}
+
+	/* Disable IRQs for the duration of the reset. */
+	disable_irq(pvr_dev->irq);
+
+	do {
+		err = pvr_power_fw_disable(pvr_dev, hard_reset);
+		if (!err) {
+			if (hard_reset) {
+				pvr_dev->fw_dev.booted = false;
+				WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev));
+
+				err = pvr_fw_hard_reset(pvr_dev);
+				if (err)
+					goto err_device_lost;
+
+				err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev);
+				pvr_dev->fw_dev.booted = true;
+				if (err)
+					goto err_device_lost;
+			} else {
+				/* Clear the FW faulted flags. */
+				pvr_dev->fw_dev.fwif_sysdata->hwr_state_flags &=
+					~(ROGUE_FWIF_HWR_FW_FAULT |
+					  ROGUE_FWIF_HWR_RESTART_REQUESTED);
+			}
+
+			pvr_fw_irq_clear(pvr_dev);
+
+			err = pvr_power_fw_enable(pvr_dev);
+		}
+
+		if (err && hard_reset)
+			goto err_device_lost;
+
+		if (err && !hard_reset) {
+			drm_err(from_pvr_device(pvr_dev), "FW stalled, trying hard reset");
+			hard_reset = true;
+		}
+	} while (err);
+
+	enable_irq(pvr_dev->irq);
+
+	up_write(&pvr_dev->reset_sem);
+
+	pvr_power_put(pvr_dev);
+
+	return 0;
+
+err_device_lost:
+	drm_err(from_pvr_device(pvr_dev), "GPU device lost");
+	pvr_device_lost(pvr_dev);
+
+	/* Leave IRQs disabled if the device is lost. */
 
-	err = pvr_power_fw_enable(pvr_dev);
+err_up_write:
+	up_write(&pvr_dev->reset_sem);
 
-err_power_put:
 	pvr_power_put(pvr_dev);
 
 	return err;
diff --git a/drivers/gpu/drm/imagination/pvr_power.h b/drivers/gpu/drm/imagination/pvr_power.h
index 360980f454d74..9a9312dcb2dab 100644
--- a/drivers/gpu/drm/imagination/pvr_power.h
+++ b/drivers/gpu/drm/imagination/pvr_power.h
@@ -12,6 +12,8 @@
 int pvr_watchdog_init(struct pvr_device *pvr_dev);
 void pvr_watchdog_fini(struct pvr_device *pvr_dev);
 
+void pvr_device_lost(struct pvr_device *pvr_dev);
+
 bool pvr_power_is_idle(struct pvr_device *pvr_dev);
 
 int pvr_power_device_suspend(struct device *dev);
diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 4c48a208c390b..3ad1366294b93 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -540,6 +540,16 @@ static const struct drm_gpuvm_ops pvr_vm_gpuva_ops = {
 	.sm_step_unmap = pvr_vm_gpuva_unmap,
 };
 
+static void
+fw_mem_context_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_fwmemcontext *fw_mem_ctx = cpu_ptr;
+	struct pvr_vm_context *vm_ctx = priv;
+
+	fw_mem_ctx->pc_dev_paddr = pvr_vm_get_page_table_root_addr(vm_ctx);
+	fw_mem_ctx->page_cat_base_reg_set = ROGUE_FW_BIF_INVALID_PCSET;
+}
+
 /**
  * pvr_vm_create_context() - Create a new VM context.
  * @pvr_dev: Target PowerVR device.
@@ -602,13 +612,19 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
 	}
 
 	if (is_userspace_context) {
-		/* TODO: Create FW mem context */
-		err = -ENODEV;
-		goto err_put_ctx;
+		err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_fwmemcontext),
+					   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+					   fw_mem_context_init, vm_ctx, &vm_ctx->fw_mem_ctx_obj);
+
+		if (err)
+			goto err_page_table_destroy;
 	}
 
 	return vm_ctx;
 
+err_page_table_destroy:
+	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
+
 err_put_ctx:
 	pvr_vm_context_put(vm_ctx);
 
@@ -628,8 +644,8 @@ pvr_vm_context_release(struct kref *ref_count)
 	struct pvr_vm_context *vm_ctx =
 		container_of(ref_count, struct pvr_vm_context, ref_count);
 
-	/* TODO: Destroy FW mem context */
-	WARN_ON(vm_ctx->fw_mem_ctx_obj);
+	if (vm_ctx->fw_mem_ctx_obj)
+		pvr_fw_object_destroy(vm_ctx->fw_mem_ctx_obj);
 
 	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start,
 			     vm_ctx->gpuvm_mgr.mm_range));
-- 
GitLab


From 927f3e0253c11276f0237ca1a14e77c48957c069 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:35 +0000
Subject: [PATCH 0410/2340] drm/imagination: Implement MIPS firmware processor
 and MMU support

Add support for the MIPS firmware processor, used in the Series AXE GPU.
The MIPS firmware processor uses a separate MMU to the rest of the GPU, so
this patch adds support for that as well.

Changes since v8:
- Corrected license identifiers

Changes since v6:
- Fix integer overflow in VM map error path

Changes since v5:
- Use alloc_page() when allocating MIPS pagetable

Changes since v3:
- Get regs resource (removed from GPU resources commit)

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Link: https://lore.kernel.org/r/a114f7b3e97cb07460c7f2842901716a9207b0c4.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile      |   4 +-
 drivers/gpu/drm/imagination/pvr_device.c  |   5 +-
 drivers/gpu/drm/imagination/pvr_device.h  |   3 +
 drivers/gpu/drm/imagination/pvr_fw.c      |   2 +
 drivers/gpu/drm/imagination/pvr_fw_mips.c | 252 ++++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_fw_mips.h |  48 +++++
 drivers/gpu/drm/imagination/pvr_vm_mips.c | 238 ++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_vm_mips.h |  22 ++
 8 files changed, 572 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_mips.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_fw_mips.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_vm_mips.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_vm_mips.h

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 5c1c918af7a33..71dc36cc6b9df 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -10,11 +10,13 @@ powervr-y := \
 	pvr_drv.o \
 	pvr_fw.o \
 	pvr_fw_meta.o \
+	pvr_fw_mips.o \
 	pvr_fw_startstop.o \
 	pvr_fw_trace.o \
 	pvr_gem.o \
 	pvr_mmu.o \
 	pvr_power.o \
-	pvr_vm.o
+	pvr_vm.o \
+	pvr_vm_mips.o
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 1be14cdbdace1..2d6db4715f850 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -50,16 +50,19 @@ pvr_device_reg_init(struct pvr_device *pvr_dev)
 {
 	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
 	struct platform_device *plat_dev = to_platform_device(drm_dev->dev);
+	struct resource *regs_resource;
 	void __iomem *regs;
 
+	pvr_dev->regs_resource = NULL;
 	pvr_dev->regs = NULL;
 
-	regs = devm_platform_ioremap_resource(plat_dev, 0);
+	regs = devm_platform_get_and_ioremap_resource(plat_dev, 0, &regs_resource);
 	if (IS_ERR(regs))
 		return dev_err_probe(drm_dev->dev, PTR_ERR(regs),
 				     "failed to ioremap gpu registers\n");
 
 	pvr_dev->regs = regs;
+	pvr_dev->regs_resource = regs_resource;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 39347595c6f07..8853249f48840 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -93,6 +93,9 @@ struct pvr_device {
 	/** @fw_version: Firmware version detected at runtime. */
 	struct pvr_fw_version fw_version;
 
+	/** @regs_resource: Resource representing device control registers. */
+	struct resource *regs_resource;
+
 	/**
 	 * @regs: Device control registers.
 	 *
diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c
index f8ed981f18077..3debc9870a82a 100644
--- a/drivers/gpu/drm/imagination/pvr_fw.c
+++ b/drivers/gpu/drm/imagination/pvr_fw.c
@@ -933,6 +933,8 @@ pvr_fw_init(struct pvr_device *pvr_dev)
 
 	if (fw_dev->processor_type == PVR_FW_PROCESSOR_TYPE_META)
 		fw_dev->defs = &pvr_fw_defs_meta;
+	else if (fw_dev->processor_type == PVR_FW_PROCESSOR_TYPE_MIPS)
+		fw_dev->defs = &pvr_fw_defs_mips;
 	else
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/imagination/pvr_fw_mips.c b/drivers/gpu/drm/imagination/pvr_fw_mips.c
new file mode 100644
index 0000000000000..0bed0257e2ab7
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_mips.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_fw_mips.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_mips.h"
+#include "pvr_vm_mips.h"
+
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/types.h>
+
+#define ROGUE_FW_HEAP_MIPS_BASE 0xC0000000
+#define ROGUE_FW_HEAP_MIPS_SHIFT 24 /* 16 MB */
+#define ROGUE_FW_HEAP_MIPS_RESERVED_SIZE SZ_1M
+
+/**
+ * process_elf_command_stream() - Process ELF firmware image and populate
+ *                                firmware sections
+ * @pvr_dev: Device pointer.
+ * @fw: Pointer to firmware image.
+ * @fw_code_ptr: Pointer to FW code section.
+ * @fw_data_ptr: Pointer to FW data section.
+ * @fw_core_code_ptr: Pointer to FW coremem code section.
+ * @fw_core_data_ptr: Pointer to FW coremem data section.
+ *
+ * Returns :
+ *  * 0 on success, or
+ *  * -EINVAL on any error in ELF command stream.
+ */
+static int
+process_elf_command_stream(struct pvr_device *pvr_dev, const u8 *fw, u8 *fw_code_ptr,
+			   u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr)
+{
+	struct elf32_hdr *header = (struct elf32_hdr *)fw;
+	struct elf32_phdr *program_header = (struct elf32_phdr *)(fw + header->e_phoff);
+	struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+	u32 entry;
+	int err;
+
+	for (entry = 0; entry < header->e_phnum; entry++, program_header++) {
+		void *write_addr;
+
+		/* Only consider loadable entries in the ELF segment table */
+		if (program_header->p_type != PT_LOAD)
+			continue;
+
+		err = pvr_fw_find_mmu_segment(pvr_dev, program_header->p_vaddr,
+					      program_header->p_memsz, fw_code_ptr, fw_data_ptr,
+					      fw_core_code_ptr, fw_core_data_ptr, &write_addr);
+		if (err) {
+			drm_err(drm_dev,
+				"Addr 0x%x (size: %d) not found in any firmware segment",
+				program_header->p_vaddr, program_header->p_memsz);
+			return err;
+		}
+
+		/* Write to FW allocation only if available */
+		if (write_addr) {
+			memcpy(write_addr, fw + program_header->p_offset,
+			       program_header->p_filesz);
+
+			memset((u8 *)write_addr + program_header->p_filesz, 0,
+			       program_header->p_memsz - program_header->p_filesz);
+		}
+	}
+
+	return 0;
+}
+
+static int
+pvr_mips_init(struct pvr_device *pvr_dev)
+{
+	pvr_fw_heap_info_init(pvr_dev, ROGUE_FW_HEAP_MIPS_SHIFT, ROGUE_FW_HEAP_MIPS_RESERVED_SIZE);
+
+	return pvr_vm_mips_init(pvr_dev);
+}
+
+static void
+pvr_mips_fini(struct pvr_device *pvr_dev)
+{
+	pvr_vm_mips_fini(pvr_dev);
+}
+
+static int
+pvr_mips_fw_process(struct pvr_device *pvr_dev, const u8 *fw,
+		    u8 *fw_code_ptr, u8 *fw_data_ptr, u8 *fw_core_code_ptr, u8 *fw_core_data_ptr,
+		    u32 core_code_alloc_size)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data;
+	const struct pvr_fw_layout_entry *boot_code_entry;
+	const struct pvr_fw_layout_entry *boot_data_entry;
+	const struct pvr_fw_layout_entry *exception_code_entry;
+	const struct pvr_fw_layout_entry *stack_entry;
+	struct rogue_mipsfw_boot_data *boot_data;
+	dma_addr_t dma_addr;
+	u32 page_nr;
+	int err;
+
+	err = process_elf_command_stream(pvr_dev, fw, fw_code_ptr, fw_data_ptr, fw_core_code_ptr,
+					 fw_core_data_ptr);
+	if (err)
+		return err;
+
+	boot_code_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_BOOT_CODE);
+	boot_data_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_BOOT_DATA);
+	exception_code_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_EXCEPTIONS_CODE);
+	if (!boot_code_entry || !boot_data_entry || !exception_code_entry)
+		return -EINVAL;
+
+	WARN_ON(pvr_gem_get_dma_addr(fw_dev->mem.code_obj->gem, boot_code_entry->alloc_offset,
+				     &mips_data->boot_code_dma_addr));
+	WARN_ON(pvr_gem_get_dma_addr(fw_dev->mem.data_obj->gem, boot_data_entry->alloc_offset,
+				     &mips_data->boot_data_dma_addr));
+	WARN_ON(pvr_gem_get_dma_addr(fw_dev->mem.code_obj->gem,
+				     exception_code_entry->alloc_offset,
+				     &mips_data->exception_code_dma_addr));
+
+	stack_entry = pvr_fw_find_layout_entry(pvr_dev, MIPS_STACK);
+	if (!stack_entry)
+		return -EINVAL;
+
+	boot_data = (struct rogue_mipsfw_boot_data *)(fw_data_ptr + boot_data_entry->alloc_offset +
+						      ROGUE_MIPSFW_BOOTLDR_CONF_OFFSET);
+
+	WARN_ON(pvr_fw_object_get_dma_addr(fw_dev->mem.data_obj, stack_entry->alloc_offset,
+					   &dma_addr));
+	boot_data->stack_phys_addr = dma_addr;
+
+	boot_data->reg_base = pvr_dev->regs_resource->start;
+
+	for (page_nr = 0; page_nr < ARRAY_SIZE(boot_data->pt_phys_addr); page_nr++) {
+		/* Firmware expects 4k pages, but host page size might be different. */
+		u32 src_page_nr = (page_nr * ROGUE_MIPSFW_PAGE_SIZE_4K) >> PAGE_SHIFT;
+		u32 page_offset = (page_nr * ROGUE_MIPSFW_PAGE_SIZE_4K) & ~PAGE_MASK;
+
+		boot_data->pt_phys_addr[page_nr] = mips_data->pt_dma_addr[src_page_nr] +
+						   page_offset;
+	}
+
+	boot_data->pt_log2_page_size = ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K;
+	boot_data->pt_num_pages = ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES;
+	boot_data->reserved1 = 0;
+	boot_data->reserved2 = 0;
+
+	return 0;
+}
+
+static int
+pvr_mips_wrapper_init(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_mips_data *mips_data = pvr_dev->fw_dev.processor_data.mips_data;
+	const u64 remap_settings = ROGUE_MIPSFW_BOOT_REMAP_LOG2_SEGMENT_SIZE;
+	u32 phys_bus_width;
+
+	int err = PVR_FEATURE_VALUE(pvr_dev, phys_bus_width, &phys_bus_width);
+
+	if (WARN_ON(err))
+		return err;
+
+	/* Currently MIPS FW only supported with physical bus width > 32 bits. */
+	if (WARN_ON(phys_bus_width <= 32))
+		return -EINVAL;
+
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MIPS_WRAPPER_CONFIG,
+		       (ROGUE_MIPSFW_REGISTERS_VIRTUAL_BASE >>
+			ROGUE_MIPSFW_WRAPPER_CONFIG_REGBANK_ADDR_ALIGN) |
+		       ROGUE_CR_MIPS_WRAPPER_CONFIG_BOOT_ISA_MODE_MICROMIPS);
+
+	/* Configure remap for boot code, boot data and exceptions code areas. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1,
+		       ROGUE_MIPSFW_BOOT_REMAP_PHYS_ADDR_IN |
+		       ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG1_MODE_ENABLE_EN);
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2,
+		       (mips_data->boot_code_dma_addr &
+			~ROGUE_CR_MIPS_ADDR_REMAP1_CONFIG2_ADDR_OUT_CLRMSK) | remap_settings);
+
+	if (PVR_HAS_QUIRK(pvr_dev, 63553)) {
+		/*
+		 * WA always required on 36 bit cores, to avoid continuous unmapped memory accesses
+		 * to address 0x0.
+		 */
+		WARN_ON(phys_bus_width != 36);
+
+		pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1,
+			       ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG1_MODE_ENABLE_EN);
+		pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2,
+			       (mips_data->boot_code_dma_addr &
+				~ROGUE_CR_MIPS_ADDR_REMAP5_CONFIG2_ADDR_OUT_CLRMSK) |
+			       remap_settings);
+	}
+
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1,
+		       ROGUE_MIPSFW_DATA_REMAP_PHYS_ADDR_IN |
+		       ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG1_MODE_ENABLE_EN);
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2,
+		       (mips_data->boot_data_dma_addr &
+			~ROGUE_CR_MIPS_ADDR_REMAP2_CONFIG2_ADDR_OUT_CLRMSK) | remap_settings);
+
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1,
+		       ROGUE_MIPSFW_CODE_REMAP_PHYS_ADDR_IN |
+		       ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG1_MODE_ENABLE_EN);
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2,
+		       (mips_data->exception_code_dma_addr &
+			~ROGUE_CR_MIPS_ADDR_REMAP3_CONFIG2_ADDR_OUT_CLRMSK) | remap_settings);
+
+	/* Garten IDLE bit controlled by MIPS. */
+	pvr_cr_write64(pvr_dev, ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG,
+		       ROGUE_CR_MTS_GARTEN_WRAPPER_CONFIG_IDLE_CTRL_META);
+
+	/* Turn on the EJTAG probe. */
+	pvr_cr_write32(pvr_dev, ROGUE_CR_MIPS_DEBUG_CONFIG, 0);
+
+	return 0;
+}
+
+static u32
+pvr_mips_get_fw_addr_with_offset(struct pvr_fw_object *fw_obj, u32 offset)
+{
+	struct pvr_device *pvr_dev = to_pvr_device(gem_from_pvr_gem(fw_obj->gem)->dev);
+
+	/* MIPS cacheability is determined by page table. */
+	return ((fw_obj->fw_addr_offset + offset) & pvr_dev->fw_dev.fw_heap_info.offset_mask) |
+	       ROGUE_FW_HEAP_MIPS_BASE;
+}
+
+static bool
+pvr_mips_has_fixed_data_addr(void)
+{
+	return true;
+}
+
+const struct pvr_fw_defs pvr_fw_defs_mips = {
+	.init = pvr_mips_init,
+	.fini = pvr_mips_fini,
+	.fw_process = pvr_mips_fw_process,
+	.vm_map = pvr_vm_mips_map,
+	.vm_unmap = pvr_vm_mips_unmap,
+	.get_fw_addr_with_offset = pvr_mips_get_fw_addr_with_offset,
+	.wrapper_init = pvr_mips_wrapper_init,
+	.has_fixed_data_addr = pvr_mips_has_fixed_data_addr,
+	.irq = {
+		.enable_reg = ROGUE_CR_MIPS_WRAPPER_IRQ_ENABLE,
+		.status_reg = ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS,
+		.clear_reg = ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR,
+		.event_mask = ROGUE_CR_MIPS_WRAPPER_IRQ_STATUS_EVENT_EN,
+		.clear_mask = ROGUE_CR_MIPS_WRAPPER_IRQ_CLEAR_EVENT_EN,
+	},
+};
diff --git a/drivers/gpu/drm/imagination/pvr_fw_mips.h b/drivers/gpu/drm/imagination/pvr_fw_mips.h
new file mode 100644
index 0000000000000..408dbe63a90cf
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_fw_mips.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FW_MIPS_H
+#define PVR_FW_MIPS_H
+
+#include "pvr_rogue_mips.h"
+
+#include <asm/page.h>
+#include <linux/types.h>
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_gem_object;
+
+#define PVR_MIPS_PT_PAGE_COUNT ((ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * ROGUE_MIPSFW_PAGE_SIZE_4K) \
+				>> PAGE_SHIFT)
+/**
+ * struct pvr_fw_mips_data - MIPS-specific data
+ */
+struct pvr_fw_mips_data {
+	/**
+	 * @pt_pages: Pages containing MIPS pagetable.
+	 */
+	struct page *pt_pages[PVR_MIPS_PT_PAGE_COUNT];
+
+	/** @pt: Pointer to CPU mapping of MIPS pagetable. */
+	u32 *pt;
+
+	/** @pt_dma_addr: DMA mappings of MIPS pagetable. */
+	dma_addr_t pt_dma_addr[PVR_MIPS_PT_PAGE_COUNT];
+
+	/** @boot_code_dma_addr: DMA address of MIPS boot code. */
+	dma_addr_t boot_code_dma_addr;
+
+	/** @boot_data_dma_addr: DMA address of MIPS boot data. */
+	dma_addr_t boot_data_dma_addr;
+
+	/** @exception_code_dma_addr: DMA address of MIPS exception code. */
+	dma_addr_t exception_code_dma_addr;
+
+	/** @cache_policy: Cache policy for this processor. */
+	u32 cache_policy;
+
+	/** @pfn_mask: PFN mask for MIPS pagetable. */
+	u32 pfn_mask;
+};
+
+#endif /* PVR_FW_MIPS_H */
diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c
new file mode 100644
index 0000000000000..7268cf6e630be
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_fw_mips.h"
+#include "pvr_gem.h"
+#include "pvr_mmu.h"
+#include "pvr_rogue_mips.h"
+#include "pvr_vm.h"
+#include "pvr_vm_mips.h"
+
+#include <drm/drm_managed.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+/**
+ * pvr_vm_mips_init() - Initialise MIPS FW pagetable
+ * @pvr_dev: Target PowerVR device.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -%EINVAL,
+ *  * Any error returned by pvr_gem_object_create(), or
+ *  * And error returned by pvr_gem_object_vmap().
+ */
+int
+pvr_vm_mips_init(struct pvr_device *pvr_dev)
+{
+	u32 pt_size = 1 << ROGUE_MIPSFW_LOG2_PAGETABLE_SIZE_4K(pvr_dev);
+	struct device *dev = from_pvr_device(pvr_dev)->dev;
+	struct pvr_fw_mips_data *mips_data;
+	u32 phys_bus_width;
+	int page_nr;
+	int err;
+
+	/* Page table size must be at most ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * 4k pages. */
+	if (pt_size > ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * SZ_4K)
+		return -EINVAL;
+
+	if (PVR_FEATURE_VALUE(pvr_dev, phys_bus_width, &phys_bus_width))
+		return -EINVAL;
+
+	mips_data = drmm_kzalloc(from_pvr_device(pvr_dev), sizeof(*mips_data), GFP_KERNEL);
+	if (!mips_data)
+		return -ENOMEM;
+
+	for (page_nr = 0; page_nr < ARRAY_SIZE(mips_data->pt_pages); page_nr++) {
+		mips_data->pt_pages[page_nr] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!mips_data->pt_pages[page_nr]) {
+			err = -ENOMEM;
+			goto err_free_pages;
+		}
+
+		mips_data->pt_dma_addr[page_nr] = dma_map_page(dev, mips_data->pt_pages[page_nr], 0,
+							       PAGE_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, mips_data->pt_dma_addr[page_nr])) {
+			err = -ENOMEM;
+			goto err_free_pages;
+		}
+	}
+
+	mips_data->pt = vmap(mips_data->pt_pages, pt_size >> PAGE_SHIFT, VM_MAP,
+			     pgprot_writecombine(PAGE_KERNEL));
+	if (!mips_data->pt) {
+		err = -ENOMEM;
+		goto err_free_pages;
+	}
+
+	mips_data->pfn_mask = (phys_bus_width > 32) ? ROGUE_MIPSFW_ENTRYLO_PFN_MASK_ABOVE_32BIT :
+						      ROGUE_MIPSFW_ENTRYLO_PFN_MASK;
+
+	mips_data->cache_policy = (phys_bus_width > 32) ? ROGUE_MIPSFW_CACHED_POLICY_ABOVE_32BIT :
+							  ROGUE_MIPSFW_CACHED_POLICY;
+
+	pvr_dev->fw_dev.processor_data.mips_data = mips_data;
+
+	return 0;
+
+err_free_pages:
+	for (; page_nr >= 0; page_nr--) {
+		if (mips_data->pt_dma_addr[page_nr])
+			dma_unmap_page(from_pvr_device(pvr_dev)->dev,
+				       mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE);
+
+		if (mips_data->pt_pages[page_nr])
+			__free_page(mips_data->pt_pages[page_nr]);
+	}
+
+	return err;
+}
+
+/**
+ * pvr_vm_mips_fini() - Release MIPS FW pagetable
+ * @pvr_dev: Target PowerVR device.
+ */
+void
+pvr_vm_mips_fini(struct pvr_device *pvr_dev)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data;
+	int page_nr;
+
+	vunmap(mips_data->pt);
+	for (page_nr = ARRAY_SIZE(mips_data->pt_pages) - 1; page_nr >= 0; page_nr--) {
+		dma_unmap_page(from_pvr_device(pvr_dev)->dev,
+			       mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE);
+
+		__free_page(mips_data->pt_pages[page_nr]);
+	}
+
+	fw_dev->processor_data.mips_data = NULL;
+}
+
+static u32
+get_mips_pte_flags(bool read, bool write, u32 cache_policy)
+{
+	u32 flags = 0;
+
+	if (read && write) /* Read/write. */
+		flags |= ROGUE_MIPSFW_ENTRYLO_DIRTY_EN;
+	else if (write)    /* Write only. */
+		flags |= ROGUE_MIPSFW_ENTRYLO_READ_INHIBIT_EN;
+	else
+		WARN_ON(!read);
+
+	flags |= cache_policy << ROGUE_MIPSFW_ENTRYLO_CACHE_POLICY_SHIFT;
+
+	flags |= ROGUE_MIPSFW_ENTRYLO_VALID_EN | ROGUE_MIPSFW_ENTRYLO_GLOBAL_EN;
+
+	return flags;
+}
+
+/**
+ * pvr_vm_mips_map() - Map a FW object into MIPS address space
+ * @pvr_dev: Target PowerVR device.
+ * @fw_obj: FW object to map.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -%EINVAL if object does not reside within FW address space, or
+ *  * Any error returned by pvr_fw_object_get_dma_addr().
+ */
+int
+pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data;
+	struct pvr_gem_object *pvr_obj = fw_obj->gem;
+	const u64 start = fw_obj->fw_mm_node.start;
+	const u64 size = fw_obj->fw_mm_node.size;
+	u64 end;
+	u32 cache_policy;
+	u32 pte_flags;
+	u32 start_pfn;
+	u32 end_pfn;
+	s32 pfn;
+	int err;
+
+	if (check_add_overflow(start, size - 1, &end))
+		return -EINVAL;
+
+	if (start < ROGUE_FW_HEAP_BASE ||
+	    start >= ROGUE_FW_HEAP_BASE + fw_dev->fw_heap_info.raw_size ||
+	    end < ROGUE_FW_HEAP_BASE ||
+	    end >= ROGUE_FW_HEAP_BASE + fw_dev->fw_heap_info.raw_size ||
+	    (start & ROGUE_MIPSFW_PAGE_MASK_4K) ||
+	    ((end + 1) & ROGUE_MIPSFW_PAGE_MASK_4K))
+		return -EINVAL;
+
+	start_pfn = (start & fw_dev->fw_heap_info.offset_mask) >> ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K;
+	end_pfn = (end & fw_dev->fw_heap_info.offset_mask) >> ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K;
+
+	if (pvr_obj->flags & PVR_BO_FW_FLAGS_DEVICE_UNCACHED)
+		cache_policy = ROGUE_MIPSFW_UNCACHED_CACHE_POLICY;
+	else
+		cache_policy = mips_data->cache_policy;
+
+	pte_flags = get_mips_pte_flags(true, true, cache_policy);
+
+	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
+		dma_addr_t dma_addr;
+		u32 pte;
+
+		err = pvr_fw_object_get_dma_addr(fw_obj,
+						 (pfn - start_pfn) <<
+						 ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K,
+						 &dma_addr);
+		if (err)
+			goto err_unmap_pages;
+
+		pte = ((dma_addr >> ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K)
+		       << ROGUE_MIPSFW_ENTRYLO_PFN_SHIFT) & mips_data->pfn_mask;
+		pte |= pte_flags;
+
+		WRITE_ONCE(mips_data->pt[pfn], pte);
+	}
+
+	pvr_mmu_flush_request_all(pvr_dev);
+
+	return 0;
+
+err_unmap_pages:
+	for (; pfn >= start_pfn; pfn--)
+		WRITE_ONCE(mips_data->pt[pfn], 0);
+
+	pvr_mmu_flush_request_all(pvr_dev);
+	WARN_ON(pvr_mmu_flush_exec(pvr_dev, true));
+
+	return err;
+}
+
+/**
+ * pvr_vm_mips_unmap() - Unmap a FW object into MIPS address space
+ * @pvr_dev: Target PowerVR device.
+ * @fw_obj: FW object to unmap.
+ */
+void
+pvr_vm_mips_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
+{
+	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
+	struct pvr_fw_mips_data *mips_data = fw_dev->processor_data.mips_data;
+	const u64 start = fw_obj->fw_mm_node.start;
+	const u64 size = fw_obj->fw_mm_node.size;
+	const u64 end = start + size;
+
+	const u32 start_pfn = (start & fw_dev->fw_heap_info.offset_mask) >>
+			      ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K;
+	const u32 end_pfn = (end & fw_dev->fw_heap_info.offset_mask) >>
+			    ROGUE_MIPSFW_LOG2_PAGE_SIZE_4K;
+
+	for (u32 pfn = start_pfn; pfn < end_pfn; pfn++)
+		WRITE_ONCE(mips_data->pt[pfn], 0);
+
+	pvr_mmu_flush_request_all(pvr_dev);
+	WARN_ON(pvr_mmu_flush_exec(pvr_dev, true));
+}
diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.h b/drivers/gpu/drm/imagination/pvr_vm_mips.h
new file mode 100644
index 0000000000000..0fd59f68fb5b8
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_VM_MIPS_H
+#define PVR_VM_MIPS_H
+
+/* Forward declaration from pvr_device.h. */
+struct pvr_device;
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+int
+pvr_vm_mips_init(struct pvr_device *pvr_dev);
+void
+pvr_vm_mips_fini(struct pvr_device *pvr_dev);
+int
+pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj);
+void
+pvr_vm_mips_unmap(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj);
+
+#endif /* PVR_VM_MIPS_H */
-- 
GitLab


From 6eedddab733b350886571f98b810108b13bf74ae Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:36 +0000
Subject: [PATCH 0411/2340] drm/imagination: Implement free list and HWRT
 create and destroy ioctls

Implement ioctls to create and destroy free lists and HWRT datasets. Free
lists are used for GPU-side memory allocation during geometry processing.
HWRT datasets are the FW-side structures representing render targets.

Changes since v8:
- Corrected license identifiers

Changes since v6:
- Fix out-of-bounds shift in get_cr_multisamplectl_val()

Changes since v4:
- Remove use of drm_gem_shmem_get_pages()

Changes since v3:
- Support free list grow requests from FW
- Use drm_dev_{enter,exit}

Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/919358c5887a7628da588c455a5bb7e3ea4b47ae.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile        |   2 +
 drivers/gpu/drm/imagination/pvr_ccb.c       |  10 +
 drivers/gpu/drm/imagination/pvr_device.h    |  24 +
 drivers/gpu/drm/imagination/pvr_drv.c       | 112 +++-
 drivers/gpu/drm/imagination/pvr_free_list.c | 625 ++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_free_list.h | 195 ++++++
 drivers/gpu/drm/imagination/pvr_hwrt.c      | 549 +++++++++++++++++
 drivers/gpu/drm/imagination/pvr_hwrt.h      | 165 ++++++
 8 files changed, 1678 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_free_list.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_free_list.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_hwrt.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_hwrt.h

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 71dc36cc6b9df..2bd501018d5d5 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -8,12 +8,14 @@ powervr-y := \
 	pvr_device.o \
 	pvr_device_info.o \
 	pvr_drv.o \
+	pvr_free_list.o \
 	pvr_fw.o \
 	pvr_fw_meta.o \
 	pvr_fw_mips.o \
 	pvr_fw_startstop.o \
 	pvr_fw_trace.o \
 	pvr_gem.o \
+	pvr_hwrt.o \
 	pvr_mmu.o \
 	pvr_power.o \
 	pvr_vm.o \
diff --git a/drivers/gpu/drm/imagination/pvr_ccb.c b/drivers/gpu/drm/imagination/pvr_ccb.c
index 48f06f58f3f12..4deeac7ed40a4 100644
--- a/drivers/gpu/drm/imagination/pvr_ccb.c
+++ b/drivers/gpu/drm/imagination/pvr_ccb.c
@@ -4,6 +4,7 @@
 #include "pvr_ccb.h"
 #include "pvr_device.h"
 #include "pvr_drv.h"
+#include "pvr_free_list.h"
 #include "pvr_fw.h"
 #include "pvr_gem.h"
 #include "pvr_power.h"
@@ -139,6 +140,15 @@ process_fwccb_command(struct pvr_device *pvr_dev, struct rogue_fwif_fwccb_cmd *c
 		pvr_power_reset(pvr_dev, false);
 		break;
 
+	case ROGUE_FWIF_FWCCB_CMD_FREELISTS_RECONSTRUCTION:
+		pvr_free_list_process_reconstruct_req(pvr_dev,
+						      &cmd->cmd_data.cmd_freelists_reconstruction);
+		break;
+
+	case ROGUE_FWIF_FWCCB_CMD_FREELIST_GROW:
+		pvr_free_list_process_grow_req(pvr_dev, &cmd->cmd_data.cmd_free_list_gs);
+		break;
+
 	default:
 		drm_info(from_pvr_device(pvr_dev), "Received unknown FWCCB command %x\n",
 			 cmd->cmd_type);
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 8853249f48840..f5b82b7935665 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -152,6 +152,14 @@ struct pvr_device {
 	 */
 	atomic_t mmu_flush_cache_flags;
 
+	/**
+	 * @free_list_ids: Array of free lists belonging to this device. Array members
+	 *                 are of type "struct pvr_free_list *".
+	 *
+	 * This array is used to allocate IDs used by the firmware.
+	 */
+	struct xarray free_list_ids;
+
 	struct {
 		/** @work: Work item for watchdog callback. */
 		struct delayed_work work;
@@ -247,6 +255,22 @@ struct pvr_file {
 	 */
 	struct pvr_device *pvr_dev;
 
+	/**
+	 * @free_list_handles: Array of free lists belonging to this file. Array
+	 * members are of type "struct pvr_free_list *".
+	 *
+	 * This array is used to allocate handles returned to userspace.
+	 */
+	struct xarray free_list_handles;
+
+	/**
+	 * @hwrt_handles: Array of HWRT datasets belonging to this file. Array
+	 * members are of type "struct pvr_hwrt_dataset *".
+	 *
+	 * This array is used to allocate handles returned to userspace.
+	 */
+	struct xarray hwrt_handles;
+
 	/**
 	 * @vm_ctx_handles: Array of VM contexts belonging to this file. Array
 	 * members are of type "struct pvr_vm_context *".
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index 33b38c0d79c8e..f485e2cc60f96 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -3,7 +3,9 @@
 
 #include "pvr_device.h"
 #include "pvr_drv.h"
+#include "pvr_free_list.h"
 #include "pvr_gem.h"
+#include "pvr_hwrt.h"
 #include "pvr_mmu.h"
 #include "pvr_power.h"
 #include "pvr_rogue_defs.h"
@@ -711,7 +713,41 @@ static int
 pvr_ioctl_create_free_list(struct drm_device *drm_dev, void *raw_args,
 			   struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_free_list_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_free_list *free_list;
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	free_list = pvr_free_list_create(pvr_file, args);
+	if (IS_ERR(free_list)) {
+		err = PTR_ERR(free_list);
+		goto err_drm_dev_exit;
+	}
+
+	/* Allocate object handle for userspace. */
+	err = xa_alloc(&pvr_file->free_list_handles,
+		       &args->handle,
+		       free_list,
+		       xa_limit_32b,
+		       GFP_KERNEL);
+	if (err < 0)
+		goto err_cleanup;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_cleanup:
+	pvr_free_list_put(free_list);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -731,7 +767,19 @@ static int
 pvr_ioctl_destroy_free_list(struct drm_device *drm_dev, void *raw_args,
 			    struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_destroy_free_list_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_free_list *free_list;
+
+	if (args->_padding_4)
+		return -EINVAL;
+
+	free_list = xa_erase(&pvr_file->free_list_handles, args->handle);
+	if (!free_list)
+		return -EINVAL;
+
+	pvr_free_list_put(free_list);
+	return 0;
 }
 
 /**
@@ -751,7 +799,41 @@ static int
 pvr_ioctl_create_hwrt_dataset(struct drm_device *drm_dev, void *raw_args,
 			      struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_hwrt_dataset_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_hwrt_dataset *hwrt;
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	hwrt = pvr_hwrt_dataset_create(pvr_file, args);
+	if (IS_ERR(hwrt)) {
+		err = PTR_ERR(hwrt);
+		goto err_drm_dev_exit;
+	}
+
+	/* Allocate object handle for userspace. */
+	err = xa_alloc(&pvr_file->hwrt_handles,
+		       &args->handle,
+		       hwrt,
+		       xa_limit_32b,
+		       GFP_KERNEL);
+	if (err < 0)
+		goto err_cleanup;
+
+	drm_dev_exit(idx);
+
+	return 0;
+
+err_cleanup:
+	pvr_hwrt_dataset_put(hwrt);
+
+err_drm_dev_exit:
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 /**
@@ -771,7 +853,19 @@ static int
 pvr_ioctl_destroy_hwrt_dataset(struct drm_device *drm_dev, void *raw_args,
 			       struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_destroy_hwrt_dataset_args *args = raw_args;
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	struct pvr_hwrt_dataset *hwrt;
+
+	if (args->_padding_4)
+		return -EINVAL;
+
+	hwrt = xa_erase(&pvr_file->hwrt_handles, args->handle);
+	if (!hwrt)
+		return -EINVAL;
+
+	pvr_hwrt_dataset_put(hwrt);
+	return 0;
 }
 
 /**
@@ -1195,6 +1289,8 @@ pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file)
 	 */
 	pvr_file->pvr_dev = pvr_dev;
 
+	xa_init_flags(&pvr_file->free_list_handles, XA_FLAGS_ALLOC1);
+	xa_init_flags(&pvr_file->hwrt_handles, XA_FLAGS_ALLOC1);
 	xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1);
 
 	/*
@@ -1223,6 +1319,8 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 	struct pvr_file *pvr_file = to_pvr_file(file);
 
 	/* Drop references on any remaining objects. */
+	pvr_destroy_free_lists_for_file(pvr_file);
+	pvr_destroy_hwrt_datasets_for_file(pvr_file);
 	pvr_destroy_vm_contexts_for_file(pvr_file);
 
 	kfree(pvr_file);
@@ -1281,6 +1379,8 @@ pvr_probe(struct platform_device *plat_dev)
 	if (err)
 		goto err_device_fini;
 
+	xa_init_flags(&pvr_dev->free_list_ids, XA_FLAGS_ALLOC1);
+
 	return 0;
 
 err_device_fini:
@@ -1298,6 +1398,10 @@ pvr_remove(struct platform_device *plat_dev)
 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
 
+	WARN_ON(!xa_empty(&pvr_dev->free_list_ids));
+
+	xa_destroy(&pvr_dev->free_list_ids);
+
 	pm_runtime_suspend(drm_dev->dev);
 	pvr_device_fini(pvr_dev);
 	drm_dev_unplug(drm_dev);
diff --git a/drivers/gpu/drm/imagination/pvr_free_list.c b/drivers/gpu/drm/imagination/pvr_free_list.c
new file mode 100644
index 0000000000000..c61fd417edcb7
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_free_list.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_free_list.h"
+#include "pvr_gem.h"
+#include "pvr_hwrt.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_vm.h"
+
+#include <drm/drm_gem.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <uapi/drm/pvr_drm.h>
+
+#define FREE_LIST_ENTRY_SIZE sizeof(u32)
+
+#define FREE_LIST_ALIGNMENT \
+	((ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE / FREE_LIST_ENTRY_SIZE) - 1)
+
+#define FREE_LIST_MIN_PAGES 50
+#define FREE_LIST_MIN_PAGES_BRN66011 40
+#define FREE_LIST_MIN_PAGES_ROGUEXE 25
+
+/**
+ * pvr_get_free_list_min_pages() - Get minimum free list size for this device
+ * @pvr_dev: Device pointer.
+ *
+ * Returns:
+ *  * Minimum free list size, in PM physical pages.
+ */
+u32
+pvr_get_free_list_min_pages(struct pvr_device *pvr_dev)
+{
+	u32 value;
+
+	if (PVR_HAS_FEATURE(pvr_dev, roguexe)) {
+		if (PVR_HAS_QUIRK(pvr_dev, 66011))
+			value = FREE_LIST_MIN_PAGES_BRN66011;
+		else
+			value = FREE_LIST_MIN_PAGES_ROGUEXE;
+	} else {
+		value = FREE_LIST_MIN_PAGES;
+	}
+
+	return value;
+}
+
+static int
+free_list_create_kernel_structure(struct pvr_file *pvr_file,
+				  struct drm_pvr_ioctl_create_free_list_args *args,
+				  struct pvr_free_list *free_list)
+{
+	struct pvr_gem_object *free_list_obj;
+	struct pvr_vm_context *vm_ctx;
+	u64 free_list_size;
+	int err;
+
+	if (args->grow_threshold > 100 ||
+	    args->initial_num_pages > args->max_num_pages ||
+	    args->grow_num_pages > args->max_num_pages ||
+	    args->max_num_pages == 0 ||
+	    (args->initial_num_pages < args->max_num_pages && !args->grow_num_pages) ||
+	    (args->initial_num_pages == args->max_num_pages && args->grow_num_pages))
+		return -EINVAL;
+
+	if ((args->initial_num_pages & FREE_LIST_ALIGNMENT) ||
+	    (args->max_num_pages & FREE_LIST_ALIGNMENT) ||
+	    (args->grow_num_pages & FREE_LIST_ALIGNMENT))
+		return -EINVAL;
+
+	vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+	if (!vm_ctx)
+		return -EINVAL;
+
+	free_list_obj = pvr_vm_find_gem_object(vm_ctx, args->free_list_gpu_addr,
+					       NULL, &free_list_size);
+	if (!free_list_obj) {
+		err = -EINVAL;
+		goto err_put_vm_context;
+	}
+
+	if ((free_list_obj->flags & DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS) ||
+	    !(free_list_obj->flags & DRM_PVR_BO_PM_FW_PROTECT) ||
+	    free_list_size < (args->max_num_pages * FREE_LIST_ENTRY_SIZE)) {
+		err = -EINVAL;
+		goto err_put_free_list_obj;
+	}
+
+	free_list->pvr_dev = pvr_file->pvr_dev;
+	free_list->current_pages = 0;
+	free_list->max_pages = args->max_num_pages;
+	free_list->grow_pages = args->grow_num_pages;
+	free_list->grow_threshold = args->grow_threshold;
+	free_list->obj = free_list_obj;
+	free_list->free_list_gpu_addr = args->free_list_gpu_addr;
+	free_list->initial_num_pages = args->initial_num_pages;
+
+	pvr_vm_context_put(vm_ctx);
+
+	return 0;
+
+err_put_free_list_obj:
+	pvr_gem_object_put(free_list_obj);
+
+err_put_vm_context:
+	pvr_vm_context_put(vm_ctx);
+
+	return err;
+}
+
+static void
+free_list_destroy_kernel_structure(struct pvr_free_list *free_list)
+{
+	WARN_ON(!list_empty(&free_list->hwrt_list));
+
+	pvr_gem_object_put(free_list->obj);
+}
+
+/**
+ * calculate_free_list_ready_pages_locked() - Function to work out the number of free
+ *                                            list pages to reserve for growing within
+ *                                            the FW without having to wait for the
+ *                                            host to progress a grow request
+ * @free_list: Pointer to free list.
+ * @pages: Total pages currently in free list.
+ *
+ * If the threshold or grow size means less than the alignment size (4 pages on
+ * Rogue), then the feature is not used.
+ *
+ * Caller must hold &free_list->lock.
+ *
+ * Return: number of pages to reserve.
+ */
+static u32
+calculate_free_list_ready_pages_locked(struct pvr_free_list *free_list, u32 pages)
+{
+	u32 ready_pages;
+
+	lockdep_assert_held(&free_list->lock);
+
+	ready_pages = ((pages * free_list->grow_threshold) / 100);
+
+	/* The number of pages must be less than the grow size. */
+	ready_pages = min(ready_pages, free_list->grow_pages);
+
+	/*
+	 * The number of pages must be a multiple of the free list align size.
+	 */
+	ready_pages &= ~FREE_LIST_ALIGNMENT;
+
+	return ready_pages;
+}
+
+static u32
+calculate_free_list_ready_pages(struct pvr_free_list *free_list, u32 pages)
+{
+	u32 ret;
+
+	mutex_lock(&free_list->lock);
+
+	ret = calculate_free_list_ready_pages_locked(free_list, pages);
+
+	mutex_unlock(&free_list->lock);
+
+	return ret;
+}
+
+static void
+free_list_fw_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_freelist *fw_data = cpu_ptr;
+	struct pvr_free_list *free_list = priv;
+	u32 ready_pages;
+
+	/* Fill out FW structure */
+	ready_pages = calculate_free_list_ready_pages(free_list,
+						      free_list->initial_num_pages);
+
+	fw_data->max_pages = free_list->max_pages;
+	fw_data->current_pages = free_list->initial_num_pages - ready_pages;
+	fw_data->grow_pages = free_list->grow_pages;
+	fw_data->ready_pages = ready_pages;
+	fw_data->freelist_id = free_list->fw_id;
+	fw_data->grow_pending = false;
+	fw_data->current_stack_top = fw_data->current_pages - 1;
+	fw_data->freelist_dev_addr = free_list->free_list_gpu_addr;
+	fw_data->current_dev_addr = (fw_data->freelist_dev_addr +
+				     ((fw_data->max_pages - fw_data->current_pages) *
+				      FREE_LIST_ENTRY_SIZE)) &
+				    ~((u64)ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE - 1);
+}
+
+static int
+free_list_create_fw_structure(struct pvr_file *pvr_file,
+			      struct drm_pvr_ioctl_create_free_list_args *args,
+			      struct pvr_free_list *free_list)
+{
+	struct pvr_device *pvr_dev = pvr_file->pvr_dev;
+
+	/*
+	 * Create and map the FW structure so we can initialise it. This is not
+	 * accessed on the CPU side post-initialisation so the mapping lifetime
+	 * is only for this function.
+	 */
+	free_list->fw_data = pvr_fw_object_create_and_map(pvr_dev, sizeof(*free_list->fw_data),
+							  PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+							  free_list_fw_init, free_list,
+							  &free_list->fw_obj);
+	if (IS_ERR(free_list->fw_data))
+		return PTR_ERR(free_list->fw_data);
+
+	return 0;
+}
+
+static void
+free_list_destroy_fw_structure(struct pvr_free_list *free_list)
+{
+	pvr_fw_object_unmap_and_destroy(free_list->fw_obj);
+}
+
+static int
+pvr_free_list_insert_pages_locked(struct pvr_free_list *free_list,
+				  struct sg_table *sgt, u32 offset, u32 num_pages)
+{
+	struct sg_dma_page_iter dma_iter;
+	u32 *page_list;
+
+	lockdep_assert_held(&free_list->lock);
+
+	page_list = pvr_gem_object_vmap(free_list->obj);
+	if (IS_ERR(page_list))
+		return PTR_ERR(page_list);
+
+	offset /= FREE_LIST_ENTRY_SIZE;
+	/* clang-format off */
+	for_each_sgtable_dma_page(sgt, &dma_iter, 0) {
+		dma_addr_t dma_addr = sg_page_iter_dma_address(&dma_iter);
+		u64 dma_pfn = dma_addr >>
+			       ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT;
+		u32 dma_addr_offset;
+
+		BUILD_BUG_ON(ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE > PAGE_SIZE);
+
+		for (dma_addr_offset = 0; dma_addr_offset < PAGE_SIZE;
+		     dma_addr_offset += ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE) {
+			WARN_ON_ONCE(dma_pfn >> 32);
+
+			page_list[offset++] = (u32)dma_pfn;
+			dma_pfn++;
+
+			num_pages--;
+			if (!num_pages)
+				break;
+		}
+
+		if (!num_pages)
+			break;
+	};
+	/* clang-format on */
+
+	/* Make sure our free_list update is flushed. */
+	wmb();
+
+	pvr_gem_object_vunmap(free_list->obj);
+
+	return 0;
+}
+
+static int
+pvr_free_list_insert_node_locked(struct pvr_free_list_node *free_list_node)
+{
+	struct pvr_free_list *free_list = free_list_node->free_list;
+	struct sg_table *sgt;
+	u32 start_page;
+	u32 offset;
+	int err;
+
+	lockdep_assert_held(&free_list->lock);
+
+	start_page = free_list->max_pages - free_list->current_pages -
+		     free_list_node->num_pages;
+	offset = (start_page * FREE_LIST_ENTRY_SIZE) &
+		  ~((u64)ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE - 1);
+
+	sgt = drm_gem_shmem_get_pages_sgt(&free_list_node->mem_obj->base);
+	if (WARN_ON(IS_ERR(sgt)))
+		return PTR_ERR(sgt);
+
+	err = pvr_free_list_insert_pages_locked(free_list, sgt,
+						offset, free_list_node->num_pages);
+	if (!err)
+		free_list->current_pages += free_list_node->num_pages;
+
+	return err;
+}
+
+static int
+pvr_free_list_grow(struct pvr_free_list *free_list, u32 num_pages)
+{
+	struct pvr_device *pvr_dev = free_list->pvr_dev;
+	struct pvr_free_list_node *free_list_node;
+	int err;
+
+	mutex_lock(&free_list->lock);
+
+	if (num_pages & FREE_LIST_ALIGNMENT) {
+		err = -EINVAL;
+		goto err_unlock;
+	}
+
+	free_list_node = kzalloc(sizeof(*free_list_node), GFP_KERNEL);
+	if (!free_list_node) {
+		err = -ENOMEM;
+		goto err_unlock;
+	}
+
+	free_list_node->num_pages = num_pages;
+	free_list_node->free_list = free_list;
+
+	free_list_node->mem_obj = pvr_gem_object_create(pvr_dev,
+							num_pages <<
+							ROGUE_BIF_PM_PHYSICAL_PAGE_ALIGNSHIFT,
+							PVR_BO_FW_FLAGS_DEVICE_CACHED);
+	if (IS_ERR(free_list_node->mem_obj)) {
+		err = PTR_ERR(free_list_node->mem_obj);
+		goto err_free;
+	}
+
+	err = pvr_free_list_insert_node_locked(free_list_node);
+	if (err)
+		goto err_destroy_gem_object;
+
+	list_add_tail(&free_list_node->node, &free_list->mem_block_list);
+
+	/*
+	 * Reserve a number ready pages to allow the FW to process OOM quickly
+	 * and asynchronously request a grow.
+	 */
+	free_list->ready_pages =
+		calculate_free_list_ready_pages_locked(free_list,
+						       free_list->current_pages);
+	free_list->current_pages -= free_list->ready_pages;
+
+	mutex_unlock(&free_list->lock);
+
+	return 0;
+
+err_destroy_gem_object:
+	pvr_gem_object_put(free_list_node->mem_obj);
+
+err_free:
+	kfree(free_list_node);
+
+err_unlock:
+	mutex_unlock(&free_list->lock);
+
+	return err;
+}
+
+void pvr_free_list_process_grow_req(struct pvr_device *pvr_dev,
+				    struct rogue_fwif_fwccb_cmd_freelist_gs_data *req)
+{
+	struct pvr_free_list *free_list = pvr_free_list_lookup_id(pvr_dev, req->freelist_id);
+	struct rogue_fwif_kccb_cmd resp_cmd = {
+		.cmd_type = ROGUE_FWIF_KCCB_CMD_FREELIST_GROW_UPDATE,
+	};
+	struct rogue_fwif_freelist_gs_data *resp = &resp_cmd.cmd_data.free_list_gs_data;
+	u32 grow_pages = 0;
+
+	/* If we don't have a freelist registered for this ID, we can't do much. */
+	if (WARN_ON(!free_list))
+		return;
+
+	/* Since the FW made the request, it has already consumed the ready pages,
+	 * update the host struct.
+	 */
+	free_list->current_pages += free_list->ready_pages;
+	free_list->ready_pages = 0;
+
+	/* If the grow succeeds, update the grow_pages argument. */
+	if (!pvr_free_list_grow(free_list, free_list->grow_pages))
+		grow_pages = free_list->grow_pages;
+
+	/* Now prepare the response and send it back to the FW. */
+	pvr_fw_object_get_fw_addr(free_list->fw_obj, &resp->freelist_fw_addr);
+	resp->delta_pages = grow_pages;
+	resp->new_pages = free_list->current_pages + free_list->ready_pages;
+	resp->ready_pages = free_list->ready_pages;
+	pvr_free_list_put(free_list);
+
+	WARN_ON(pvr_kccb_send_cmd(pvr_dev, &resp_cmd, NULL));
+}
+
+static void
+pvr_free_list_free_node(struct pvr_free_list_node *free_list_node)
+{
+	pvr_gem_object_put(free_list_node->mem_obj);
+
+	kfree(free_list_node);
+}
+
+/**
+ * pvr_free_list_create() - Create a new free list and return an object pointer
+ * @pvr_file: Pointer to pvr_file structure.
+ * @args: Creation arguments from userspace.
+ *
+ * Return:
+ *  * Pointer to new free_list, or
+ *  * ERR_PTR(-%ENOMEM) on out of memory.
+ */
+struct pvr_free_list *
+pvr_free_list_create(struct pvr_file *pvr_file,
+		     struct drm_pvr_ioctl_create_free_list_args *args)
+{
+	struct pvr_free_list *free_list;
+	int err;
+
+	/* Create and fill out the kernel structure */
+	free_list = kzalloc(sizeof(*free_list), GFP_KERNEL);
+
+	if (!free_list)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&free_list->ref_count);
+	INIT_LIST_HEAD(&free_list->mem_block_list);
+	INIT_LIST_HEAD(&free_list->hwrt_list);
+	mutex_init(&free_list->lock);
+
+	err = free_list_create_kernel_structure(pvr_file, args, free_list);
+	if (err < 0)
+		goto err_free;
+
+	/* Allocate global object ID for firmware. */
+	err = xa_alloc(&pvr_file->pvr_dev->free_list_ids,
+		       &free_list->fw_id,
+		       free_list,
+		       xa_limit_32b,
+		       GFP_KERNEL);
+	if (err)
+		goto err_destroy_kernel_structure;
+
+	err = free_list_create_fw_structure(pvr_file, args, free_list);
+	if (err < 0)
+		goto err_free_fw_id;
+
+	err = pvr_free_list_grow(free_list, args->initial_num_pages);
+	if (err < 0)
+		goto err_fw_struct_cleanup;
+
+	return free_list;
+
+err_fw_struct_cleanup:
+	WARN_ON(pvr_fw_structure_cleanup(free_list->pvr_dev,
+					 ROGUE_FWIF_CLEANUP_FREELIST,
+					 free_list->fw_obj, 0));
+
+err_free_fw_id:
+	xa_erase(&free_list->pvr_dev->free_list_ids, free_list->fw_id);
+
+err_destroy_kernel_structure:
+	free_list_destroy_kernel_structure(free_list);
+
+err_free:
+	mutex_destroy(&free_list->lock);
+	kfree(free_list);
+
+	return ERR_PTR(err);
+}
+
+static void
+pvr_free_list_release(struct kref *ref_count)
+{
+	struct pvr_free_list *free_list =
+		container_of(ref_count, struct pvr_free_list, ref_count);
+	struct list_head *pos, *n;
+	int err;
+
+	xa_erase(&free_list->pvr_dev->free_list_ids, free_list->fw_id);
+
+	err = pvr_fw_structure_cleanup(free_list->pvr_dev,
+				       ROGUE_FWIF_CLEANUP_FREELIST,
+				       free_list->fw_obj, 0);
+	if (err == -EBUSY) {
+		/* Flush the FWCCB to process any HWR or freelist reconstruction
+		 * request that might keep the freelist busy, and try again.
+		 */
+		pvr_fwccb_process(free_list->pvr_dev);
+		err = pvr_fw_structure_cleanup(free_list->pvr_dev,
+					       ROGUE_FWIF_CLEANUP_FREELIST,
+					       free_list->fw_obj, 0);
+	}
+
+	WARN_ON(err);
+
+	/* clang-format off */
+	list_for_each_safe(pos, n, &free_list->mem_block_list) {
+		struct pvr_free_list_node *free_list_node =
+			container_of(pos, struct pvr_free_list_node, node);
+
+		list_del(pos);
+		pvr_free_list_free_node(free_list_node);
+	}
+	/* clang-format on */
+
+	free_list_destroy_kernel_structure(free_list);
+	free_list_destroy_fw_structure(free_list);
+	mutex_destroy(&free_list->lock);
+	kfree(free_list);
+}
+
+/**
+ * pvr_destroy_free_lists_for_file: Destroy any free lists associated with the
+ * given file.
+ * @pvr_file: Pointer to pvr_file structure.
+ *
+ * Removes all free lists associated with @pvr_file from the device free_list
+ * list and drops initial references. Free lists will then be destroyed once
+ * all outstanding references are dropped.
+ */
+void pvr_destroy_free_lists_for_file(struct pvr_file *pvr_file)
+{
+	struct pvr_free_list *free_list;
+	unsigned long handle;
+
+	xa_for_each(&pvr_file->free_list_handles, handle, free_list) {
+		(void)free_list;
+		pvr_free_list_put(xa_erase(&pvr_file->free_list_handles, handle));
+	}
+}
+
+/**
+ * pvr_free_list_put() - Release reference on free list
+ * @free_list: Pointer to list to release reference on
+ */
+void
+pvr_free_list_put(struct pvr_free_list *free_list)
+{
+	if (free_list)
+		kref_put(&free_list->ref_count, pvr_free_list_release);
+}
+
+void pvr_free_list_add_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data)
+{
+	mutex_lock(&free_list->lock);
+
+	list_add_tail(&hwrt_data->freelist_node, &free_list->hwrt_list);
+
+	mutex_unlock(&free_list->lock);
+}
+
+void pvr_free_list_remove_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data)
+{
+	mutex_lock(&free_list->lock);
+
+	list_del(&hwrt_data->freelist_node);
+
+	mutex_unlock(&free_list->lock);
+}
+
+static void
+pvr_free_list_reconstruct(struct pvr_device *pvr_dev, u32 freelist_id)
+{
+	struct pvr_free_list *free_list = pvr_free_list_lookup_id(pvr_dev, freelist_id);
+	struct pvr_free_list_node *free_list_node;
+	struct rogue_fwif_freelist *fw_data;
+	struct pvr_hwrt_data *hwrt_data;
+
+	if (!free_list)
+		return;
+
+	mutex_lock(&free_list->lock);
+
+	/* Rebuild the free list based on the memory block list. */
+	free_list->current_pages = 0;
+
+	list_for_each_entry(free_list_node, &free_list->mem_block_list, node)
+		WARN_ON(pvr_free_list_insert_node_locked(free_list_node));
+
+	/*
+	 * Remove the ready pages, which are reserved to allow the FW to process OOM quickly and
+	 * asynchronously request a grow.
+	 */
+	free_list->current_pages -= free_list->ready_pages;
+
+	fw_data = free_list->fw_data;
+	fw_data->current_stack_top = fw_data->current_pages - 1;
+	fw_data->allocated_page_count = 0;
+	fw_data->allocated_mmu_page_count = 0;
+
+	/* Reset the state of any associated HWRTs. */
+	list_for_each_entry(hwrt_data, &free_list->hwrt_list, freelist_node) {
+		struct rogue_fwif_hwrtdata *hwrt_fw_data = pvr_fw_object_vmap(hwrt_data->fw_obj);
+
+		if (!WARN_ON(IS_ERR(hwrt_fw_data))) {
+			hwrt_fw_data->state = ROGUE_FWIF_RTDATA_STATE_HWR;
+			hwrt_fw_data->hwrt_data_flags &= ~HWRTDATA_HAS_LAST_GEOM;
+		}
+
+		pvr_fw_object_vunmap(hwrt_data->fw_obj);
+	}
+
+	mutex_unlock(&free_list->lock);
+
+	pvr_free_list_put(free_list);
+}
+
+void
+pvr_free_list_process_reconstruct_req(struct pvr_device *pvr_dev,
+				struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data *req)
+{
+	struct rogue_fwif_kccb_cmd resp_cmd = {
+		.cmd_type = ROGUE_FWIF_KCCB_CMD_FREELISTS_RECONSTRUCTION_UPDATE,
+	};
+	struct rogue_fwif_freelists_reconstruction_data *resp =
+		&resp_cmd.cmd_data.free_lists_reconstruction_data;
+
+	for (u32 i = 0; i < req->freelist_count; i++)
+		pvr_free_list_reconstruct(pvr_dev, req->freelist_ids[i]);
+
+	resp->freelist_count = req->freelist_count;
+	memcpy(resp->freelist_ids, req->freelist_ids,
+	       req->freelist_count * sizeof(resp->freelist_ids[0]));
+
+	WARN_ON(pvr_kccb_send_cmd(pvr_dev, &resp_cmd, NULL));
+}
diff --git a/drivers/gpu/drm/imagination/pvr_free_list.h b/drivers/gpu/drm/imagination/pvr_free_list.h
new file mode 100644
index 0000000000000..bfb4f5fc622c9
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_free_list.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_FREE_LIST_H
+#define PVR_FREE_LIST_H
+
+#include <linux/compiler_attributes.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <uapi/drm/pvr_drm.h>
+
+#include "pvr_device.h"
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_gem_object;
+
+/* Forward declaration from pvr_hwrt.h. */
+struct pvr_hwrt_data;
+
+/**
+ * struct pvr_free_list_node - structure representing an allocation in the free
+ *                             list
+ */
+struct pvr_free_list_node {
+	/** @node: List node for &pvr_free_list.mem_block_list. */
+	struct list_head node;
+
+	/** @free_list: Pointer to owning free list. */
+	struct pvr_free_list *free_list;
+
+	/** @num_pages: Number of pages in this node. */
+	u32 num_pages;
+
+	/** @mem_obj: GEM object representing the pages in this node. */
+	struct pvr_gem_object *mem_obj;
+};
+
+/**
+ * struct pvr_free_list - structure representing a free list
+ */
+struct pvr_free_list {
+	/** @ref_count: Reference count of object. */
+	struct kref ref_count;
+
+	/** @pvr_dev: Pointer to device that owns this object. */
+	struct pvr_device *pvr_dev;
+
+	/** @obj: GEM object representing the free list. */
+	struct pvr_gem_object *obj;
+
+	/** @fw_obj: FW object representing the FW-side structure. */
+	struct pvr_fw_object *fw_obj;
+
+	/** @fw_data: Pointer to CPU mapping of the FW-side structure. */
+	struct rogue_fwif_freelist *fw_data;
+
+	/**
+	 * @lock: Mutex protecting modification of the free list. Must be held when accessing any
+	 *        of the members below.
+	 */
+	struct mutex lock;
+
+	/** @fw_id: Firmware ID for this object. */
+	u32 fw_id;
+
+	/** @current_pages: Current number of pages in free list. */
+	u32 current_pages;
+
+	/** @max_pages: Maximum number of pages in free list. */
+	u32 max_pages;
+
+	/** @grow_pages: Pages to grow free list by per request. */
+	u32 grow_pages;
+
+	/**
+	 * @grow_threshold: Percentage of FL memory used that should trigger a
+	 *                  new grow request.
+	 */
+	u32 grow_threshold;
+
+	/**
+	 * @ready_pages: Number of pages reserved for FW to use while a grow
+	 *               request is being processed.
+	 */
+	u32 ready_pages;
+
+	/** @mem_block_list: List of memory blocks in this free list. */
+	struct list_head mem_block_list;
+
+	/** @hwrt_list: List of HWRTs using this free list. */
+	struct list_head hwrt_list;
+
+	/** @initial_num_pages: Initial number of pages in free list. */
+	u32 initial_num_pages;
+
+	/** @free_list_gpu_addr: Address of free list in GPU address space. */
+	u64 free_list_gpu_addr;
+};
+
+struct pvr_free_list *
+pvr_free_list_create(struct pvr_file *pvr_file,
+		     struct drm_pvr_ioctl_create_free_list_args *args);
+
+void
+pvr_destroy_free_lists_for_file(struct pvr_file *pvr_file);
+
+u32
+pvr_get_free_list_min_pages(struct pvr_device *pvr_dev);
+
+static __always_inline struct pvr_free_list *
+pvr_free_list_get(struct pvr_free_list *free_list)
+{
+	if (free_list)
+		kref_get(&free_list->ref_count);
+
+	return free_list;
+}
+
+/**
+ * pvr_free_list_lookup() - Lookup free list pointer from handle and file
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Object handle.
+ *
+ * Takes reference on free list object. Call pvr_free_list_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, is not a free list, or
+ *    does not belong to @pvr_file)
+ */
+static __always_inline struct pvr_free_list *
+pvr_free_list_lookup(struct pvr_file *pvr_file, u32 handle)
+{
+	struct pvr_free_list *free_list;
+
+	xa_lock(&pvr_file->free_list_handles);
+	free_list = pvr_free_list_get(xa_load(&pvr_file->free_list_handles, handle));
+	xa_unlock(&pvr_file->free_list_handles);
+
+	return free_list;
+}
+
+/**
+ * pvr_free_list_lookup_id() - Lookup free list pointer from FW ID
+ * @pvr_dev: Device pointer.
+ * @id: FW object ID.
+ *
+ * Takes reference on free list object. Call pvr_free_list_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, or is not a free list)
+ */
+static __always_inline struct pvr_free_list *
+pvr_free_list_lookup_id(struct pvr_device *pvr_dev, u32 id)
+{
+	struct pvr_free_list *free_list;
+
+	xa_lock(&pvr_dev->free_list_ids);
+
+	/* Contexts are removed from the ctx_ids set in the context release path,
+	 * meaning the ref_count reached zero before they get removed. We need
+	 * to make sure we're not trying to acquire a context that's being
+	 * destroyed.
+	 */
+	free_list = xa_load(&pvr_dev->free_list_ids, id);
+	if (free_list && !kref_get_unless_zero(&free_list->ref_count))
+		free_list = NULL;
+	xa_unlock(&pvr_dev->free_list_ids);
+
+	return free_list;
+}
+
+void
+pvr_free_list_put(struct pvr_free_list *free_list);
+
+void
+pvr_free_list_add_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data);
+void
+pvr_free_list_remove_hwrt(struct pvr_free_list *free_list, struct pvr_hwrt_data *hwrt_data);
+
+void pvr_free_list_process_grow_req(struct pvr_device *pvr_dev,
+				    struct rogue_fwif_fwccb_cmd_freelist_gs_data *req);
+
+void
+pvr_free_list_process_reconstruct_req(struct pvr_device *pvr_dev,
+				struct rogue_fwif_fwccb_cmd_freelists_reconstruction_data *req);
+
+#endif /* PVR_FREE_LIST_H */
diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.c b/drivers/gpu/drm/imagination/pvr_hwrt.c
new file mode 100644
index 0000000000000..c4213c18489e6
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_hwrt.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_free_list.h"
+#include "pvr_hwrt.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_cr_defs_client.h"
+#include "pvr_rogue_fwif.h"
+
+#include <drm/drm_gem.h>
+#include <linux/bitops.h>
+#include <linux/math.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <uapi/drm/pvr_drm.h>
+
+static_assert(ROGUE_FWIF_NUM_RTDATAS == 2);
+static_assert(ROGUE_FWIF_NUM_GEOMDATAS == 1);
+static_assert(ROGUE_FWIF_NUM_RTDATA_FREELISTS == 2);
+
+/*
+ * struct pvr_rt_mtile_info - Render target macrotile information
+ */
+struct pvr_rt_mtile_info {
+	u32 mtile_x[3];
+	u32 mtile_y[3];
+	u32 tile_max_x;
+	u32 tile_max_y;
+	u32 tile_size_x;
+	u32 tile_size_y;
+	u32 num_tiles_x;
+	u32 num_tiles_y;
+};
+
+/* Size of Shadow Render Target Cache entry */
+#define SRTC_ENTRY_SIZE sizeof(u32)
+/* Size of Renders Accumulation Array entry */
+#define RAA_ENTRY_SIZE sizeof(u32)
+
+static int
+hwrt_init_kernel_structure(struct pvr_file *pvr_file,
+			   struct drm_pvr_ioctl_create_hwrt_dataset_args *args,
+			   struct pvr_hwrt_dataset *hwrt)
+{
+	struct pvr_device *pvr_dev = pvr_file->pvr_dev;
+	int err;
+	int i;
+
+	hwrt->pvr_dev = pvr_dev;
+	hwrt->max_rts = args->layers;
+
+	/* Get pointers to the free lists */
+	for (i = 0; i < ARRAY_SIZE(hwrt->free_lists); i++) {
+		hwrt->free_lists[i] = pvr_free_list_lookup(pvr_file,  args->free_list_handles[i]);
+		if (!hwrt->free_lists[i]) {
+			err = -EINVAL;
+			goto err_put_free_lists;
+		}
+	}
+
+	if (hwrt->free_lists[ROGUE_FW_LOCAL_FREELIST]->current_pages <
+	    pvr_get_free_list_min_pages(pvr_dev)) {
+		err = -EINVAL;
+		goto err_put_free_lists;
+	}
+
+	return 0;
+
+err_put_free_lists:
+	for (i = 0; i < ARRAY_SIZE(hwrt->free_lists); i++) {
+		pvr_free_list_put(hwrt->free_lists[i]);
+		hwrt->free_lists[i] = NULL;
+	}
+
+	return err;
+}
+
+static void
+hwrt_fini_kernel_structure(struct pvr_hwrt_dataset *hwrt)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hwrt->free_lists); i++) {
+		pvr_free_list_put(hwrt->free_lists[i]);
+		hwrt->free_lists[i] = NULL;
+	}
+}
+
+static void
+hwrt_fini_common_fw_structure(struct pvr_hwrt_dataset *hwrt)
+{
+	pvr_fw_object_destroy(hwrt->common_fw_obj);
+}
+
+static int
+get_cr_isp_mtile_size_val(struct pvr_device *pvr_dev, u32 samples,
+			  struct pvr_rt_mtile_info *info, u32 *value_out)
+{
+	u32 x = info->mtile_x[0];
+	u32 y = info->mtile_y[0];
+	u32 samples_per_pixel;
+	int err;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, isp_samples_per_pixel, &samples_per_pixel);
+	if (err)
+		return err;
+
+	if (samples_per_pixel == 1) {
+		if (samples >= 4)
+			x <<= 1;
+		if (samples >= 2)
+			y <<= 1;
+	} else if (samples_per_pixel == 2) {
+		if (samples >= 8)
+			x <<= 1;
+		if (samples >= 4)
+			y <<= 1;
+	} else if (samples_per_pixel == 4) {
+		if (samples >= 8)
+			y <<= 1;
+	} else {
+		WARN(true, "Unsupported ISP samples per pixel value");
+		return -EINVAL;
+	}
+
+	*value_out = ((x << ROGUE_CR_ISP_MTILE_SIZE_X_SHIFT) & ~ROGUE_CR_ISP_MTILE_SIZE_X_CLRMSK) |
+		     ((y << ROGUE_CR_ISP_MTILE_SIZE_Y_SHIFT) & ~ROGUE_CR_ISP_MTILE_SIZE_Y_CLRMSK);
+
+	return 0;
+}
+
+static int
+get_cr_multisamplectl_val(u32 samples, bool y_flip, u64 *value_out)
+{
+	static const struct {
+		u8 x[8];
+		u8 y[8];
+	} sample_positions[4] = {
+		/* 1 sample */
+		{
+			.x = { 8 },
+			.y = { 8 },
+		},
+		/* 2 samples */
+		{
+			.x = { 12, 4 },
+			.y = { 12, 4 },
+		},
+		/* 4 samples */
+		{
+			.x = { 6, 14, 2, 10 },
+			.y = { 2, 6, 10, 14 },
+		},
+		/* 8 samples */
+		{
+			.x = { 9, 7, 13, 5, 3, 1, 11, 15 },
+			.y = { 5, 11, 9, 3, 13, 7, 15, 1 },
+		},
+	};
+	const int idx = fls(samples) - 1;
+	u64 value = 0;
+
+	if (idx < 0 || idx > 3)
+		return -EINVAL;
+
+	for (u32 i = 0; i < 8; i++) {
+		value |= ((u64)sample_positions[idx].x[i]) << (i * 8);
+		if (y_flip)
+			value |= (((u64)(16 - sample_positions[idx].y[i]) & 0xf)) << (i * 8 + 4);
+		else
+			value |= ((u64)sample_positions[idx].y[i]) << (i * 8 + 4);
+	}
+
+	*value_out = value;
+
+	return 0;
+}
+
+static int
+get_cr_te_aa_val(struct pvr_device *pvr_dev, u32 samples, u32 *value_out)
+{
+	u32 samples_per_pixel;
+	u32 value = 0;
+	int err = 0;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, isp_samples_per_pixel, &samples_per_pixel);
+	if (err)
+		return err;
+
+	switch (samples_per_pixel) {
+	case 1:
+		if (samples >= 2)
+			value |= ROGUE_CR_TE_AA_Y_EN;
+		if (samples >= 4)
+			value |= ROGUE_CR_TE_AA_X_EN;
+		break;
+	case 2:
+		if (samples >= 2)
+			value |= ROGUE_CR_TE_AA_X2_EN;
+		if (samples >= 4)
+			value |= ROGUE_CR_TE_AA_Y_EN;
+		if (samples >= 8)
+			value |= ROGUE_CR_TE_AA_X_EN;
+		break;
+	case 4:
+		if (samples >= 2)
+			value |= ROGUE_CR_TE_AA_X2_EN;
+		if (samples >= 4)
+			value |= ROGUE_CR_TE_AA_Y2_EN;
+		if (samples >= 8)
+			value |= ROGUE_CR_TE_AA_Y_EN;
+		break;
+	default:
+		WARN(true, "Unsupported ISP samples per pixel value");
+		return -EINVAL;
+	}
+
+	*value_out = value;
+
+	return 0;
+}
+
+static void
+hwrtdata_common_init(void *cpu_ptr, void *priv)
+{
+	struct pvr_hwrt_dataset *hwrt = priv;
+
+	memcpy(cpu_ptr, &hwrt->common, sizeof(hwrt->common));
+}
+
+static int
+hwrt_init_common_fw_structure(struct pvr_file *pvr_file,
+			      struct drm_pvr_ioctl_create_hwrt_dataset_args *args,
+			      struct pvr_hwrt_dataset *hwrt)
+{
+	struct drm_pvr_create_hwrt_geom_data_args *geom_data_args = &args->geom_data_args;
+	struct pvr_device *pvr_dev = pvr_file->pvr_dev;
+	struct pvr_rt_mtile_info info;
+	int err;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, tile_size_x, &info.tile_size_x);
+	if (WARN_ON(err))
+		return err;
+
+	err = PVR_FEATURE_VALUE(pvr_dev, tile_size_y, &info.tile_size_y);
+	if (WARN_ON(err))
+		return err;
+
+	info.num_tiles_x = DIV_ROUND_UP(args->width, info.tile_size_x);
+	info.num_tiles_y = DIV_ROUND_UP(args->height, info.tile_size_y);
+
+	if (PVR_HAS_FEATURE(pvr_dev, simple_parameter_format_version)) {
+		u32 parameter_format;
+
+		err = PVR_FEATURE_VALUE(pvr_dev, simple_parameter_format_version,
+					&parameter_format);
+		if (WARN_ON(err))
+			return err;
+
+		WARN_ON(parameter_format != 2);
+
+		/*
+		 * Set up 16 macrotiles with a multiple of 2x2 tiles per macrotile, which is
+		 * aligned to a tile group.
+		 */
+		info.mtile_x[0] = DIV_ROUND_UP(info.num_tiles_x, 8) * 2;
+		info.mtile_y[0] = DIV_ROUND_UP(info.num_tiles_y, 8) * 2;
+		info.mtile_x[1] = 0;
+		info.mtile_y[1] = 0;
+		info.mtile_x[2] = 0;
+		info.mtile_y[2] = 0;
+		info.tile_max_x = round_up(info.num_tiles_x, 2) - 1;
+		info.tile_max_y = round_up(info.num_tiles_y, 2) - 1;
+	} else {
+		/* Set up 16 macrotiles with a multiple of 4x4 tiles per macrotile. */
+		info.mtile_x[0] = round_up(DIV_ROUND_UP(info.num_tiles_x, 4), 4);
+		info.mtile_y[0] = round_up(DIV_ROUND_UP(info.num_tiles_y, 4), 4);
+		info.mtile_x[1] = info.mtile_x[0] * 2;
+		info.mtile_y[1] = info.mtile_y[0] * 2;
+		info.mtile_x[2] = info.mtile_x[0] * 3;
+		info.mtile_y[2] = info.mtile_y[0] * 3;
+		info.tile_max_x = info.num_tiles_x - 1;
+		info.tile_max_y = info.num_tiles_y - 1;
+	}
+
+	hwrt->common.geom_caches_need_zeroing = false;
+
+	hwrt->common.isp_merge_lower_x = args->isp_merge_lower_x;
+	hwrt->common.isp_merge_lower_y = args->isp_merge_lower_y;
+	hwrt->common.isp_merge_upper_x = args->isp_merge_upper_x;
+	hwrt->common.isp_merge_upper_y = args->isp_merge_upper_y;
+	hwrt->common.isp_merge_scale_x = args->isp_merge_scale_x;
+	hwrt->common.isp_merge_scale_y = args->isp_merge_scale_y;
+
+	err = get_cr_multisamplectl_val(args->samples, false,
+					&hwrt->common.multi_sample_ctl);
+	if (err)
+		return err;
+
+	err = get_cr_multisamplectl_val(args->samples, true,
+					&hwrt->common.flipped_multi_sample_ctl);
+	if (err)
+		return err;
+
+	hwrt->common.mtile_stride = info.mtile_x[0] * info.mtile_y[0];
+
+	err = get_cr_te_aa_val(pvr_dev, args->samples, &hwrt->common.teaa);
+	if (err)
+		return err;
+
+	hwrt->common.screen_pixel_max =
+		(((args->width - 1) << ROGUE_CR_PPP_SCREEN_PIXXMAX_SHIFT) &
+		 ~ROGUE_CR_PPP_SCREEN_PIXXMAX_CLRMSK) |
+		(((args->height - 1) << ROGUE_CR_PPP_SCREEN_PIXYMAX_SHIFT) &
+		 ~ROGUE_CR_PPP_SCREEN_PIXYMAX_CLRMSK);
+
+	hwrt->common.te_screen =
+		((info.tile_max_x << ROGUE_CR_TE_SCREEN_XMAX_SHIFT) &
+		 ~ROGUE_CR_TE_SCREEN_XMAX_CLRMSK) |
+		((info.tile_max_y << ROGUE_CR_TE_SCREEN_YMAX_SHIFT) &
+		 ~ROGUE_CR_TE_SCREEN_YMAX_CLRMSK);
+	hwrt->common.te_mtile1 =
+		((info.mtile_x[0] << ROGUE_CR_TE_MTILE1_X1_SHIFT) & ~ROGUE_CR_TE_MTILE1_X1_CLRMSK) |
+		((info.mtile_x[1] << ROGUE_CR_TE_MTILE1_X2_SHIFT) & ~ROGUE_CR_TE_MTILE1_X2_CLRMSK) |
+		((info.mtile_x[2] << ROGUE_CR_TE_MTILE1_X3_SHIFT) & ~ROGUE_CR_TE_MTILE1_X3_CLRMSK);
+	hwrt->common.te_mtile2 =
+		((info.mtile_y[0] << ROGUE_CR_TE_MTILE2_Y1_SHIFT) & ~ROGUE_CR_TE_MTILE2_Y1_CLRMSK) |
+		((info.mtile_y[1] << ROGUE_CR_TE_MTILE2_Y2_SHIFT) & ~ROGUE_CR_TE_MTILE2_Y2_CLRMSK) |
+		((info.mtile_y[2] << ROGUE_CR_TE_MTILE2_Y3_SHIFT) & ~ROGUE_CR_TE_MTILE2_Y3_CLRMSK);
+
+	err = get_cr_isp_mtile_size_val(pvr_dev, args->samples, &info,
+					&hwrt->common.isp_mtile_size);
+	if (err)
+		return err;
+
+	hwrt->common.tpc_stride = geom_data_args->tpc_stride;
+	hwrt->common.tpc_size = geom_data_args->tpc_size;
+
+	hwrt->common.rgn_header_size = args->region_header_size;
+
+	err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_hwrtdata_common),
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED, hwrtdata_common_init, hwrt,
+				   &hwrt->common_fw_obj);
+
+	return err;
+}
+
+static void
+hwrt_fw_data_init(void *cpu_ptr, void *priv)
+{
+	struct pvr_hwrt_data *hwrt_data = priv;
+
+	memcpy(cpu_ptr, &hwrt_data->data, sizeof(hwrt_data->data));
+}
+
+static int
+hwrt_data_init_fw_structure(struct pvr_file *pvr_file,
+			    struct pvr_hwrt_dataset *hwrt,
+			    struct drm_pvr_ioctl_create_hwrt_dataset_args *args,
+			    struct drm_pvr_create_hwrt_rt_data_args *rt_data_args,
+			    struct pvr_hwrt_data *hwrt_data)
+{
+	struct drm_pvr_create_hwrt_geom_data_args *geom_data_args = &args->geom_data_args;
+	struct pvr_device *pvr_dev = pvr_file->pvr_dev;
+	struct rogue_fwif_rta_ctl *rta_ctl;
+	int free_list_i;
+	int err;
+
+	pvr_fw_object_get_fw_addr(hwrt->common_fw_obj,
+				  &hwrt_data->data.hwrt_data_common_fw_addr);
+
+	for (free_list_i = 0; free_list_i < ARRAY_SIZE(hwrt->free_lists); free_list_i++) {
+		pvr_fw_object_get_fw_addr(hwrt->free_lists[free_list_i]->fw_obj,
+					  &hwrt_data->data.freelists_fw_addr[free_list_i]);
+	}
+
+	hwrt_data->data.tail_ptrs_dev_addr = geom_data_args->tpc_dev_addr;
+	hwrt_data->data.vheap_table_dev_addr = geom_data_args->vheap_table_dev_addr;
+	hwrt_data->data.rtc_dev_addr = geom_data_args->rtc_dev_addr;
+
+	hwrt_data->data.pm_mlist_dev_addr = rt_data_args->pm_mlist_dev_addr;
+	hwrt_data->data.macrotile_array_dev_addr = rt_data_args->macrotile_array_dev_addr;
+	hwrt_data->data.rgn_header_dev_addr = rt_data_args->region_header_dev_addr;
+
+	rta_ctl = &hwrt_data->data.rta_ctl;
+
+	rta_ctl->render_target_index = 0;
+	rta_ctl->active_render_targets = 0;
+	rta_ctl->valid_render_targets_fw_addr = 0;
+	rta_ctl->rta_num_partial_renders_fw_addr = 0;
+	rta_ctl->max_rts = args->layers;
+
+	if (args->layers > 1) {
+		err = pvr_fw_object_create(pvr_dev, args->layers * SRTC_ENTRY_SIZE,
+					   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+					   NULL, NULL, &hwrt_data->srtc_obj);
+		if (err)
+			return err;
+		pvr_fw_object_get_fw_addr(hwrt_data->srtc_obj,
+					  &rta_ctl->valid_render_targets_fw_addr);
+
+		err = pvr_fw_object_create(pvr_dev, args->layers * RAA_ENTRY_SIZE,
+					   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+					   NULL, NULL, &hwrt_data->raa_obj);
+		if (err)
+			goto err_put_shadow_rt_cache;
+		pvr_fw_object_get_fw_addr(hwrt_data->raa_obj,
+					  &rta_ctl->rta_num_partial_renders_fw_addr);
+	}
+
+	err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_hwrtdata),
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   hwrt_fw_data_init, hwrt_data, &hwrt_data->fw_obj);
+	if (err)
+		goto err_put_raa_obj;
+
+	pvr_free_list_add_hwrt(hwrt->free_lists[0], hwrt_data);
+
+	return 0;
+
+err_put_raa_obj:
+	if (args->layers > 1)
+		pvr_fw_object_destroy(hwrt_data->raa_obj);
+
+err_put_shadow_rt_cache:
+	if (args->layers > 1)
+		pvr_fw_object_destroy(hwrt_data->srtc_obj);
+
+	return err;
+}
+
+static void
+hwrt_data_fini_fw_structure(struct pvr_hwrt_dataset *hwrt, int hwrt_nr)
+{
+	struct pvr_hwrt_data *hwrt_data = &hwrt->data[hwrt_nr];
+
+	pvr_free_list_remove_hwrt(hwrt->free_lists[0], hwrt_data);
+
+	if (hwrt->max_rts > 1) {
+		pvr_fw_object_destroy(hwrt_data->raa_obj);
+		pvr_fw_object_destroy(hwrt_data->srtc_obj);
+	}
+
+	pvr_fw_object_destroy(hwrt_data->fw_obj);
+}
+
+/**
+ * pvr_hwrt_dataset_create() - Create a new HWRT dataset
+ * @pvr_file: Pointer to pvr_file structure.
+ * @args: Creation arguments from userspace.
+ *
+ * Return:
+ *  * Pointer to new HWRT, or
+ *  * ERR_PTR(-%ENOMEM) on out of memory.
+ */
+struct pvr_hwrt_dataset *
+pvr_hwrt_dataset_create(struct pvr_file *pvr_file,
+			struct drm_pvr_ioctl_create_hwrt_dataset_args *args)
+{
+	struct pvr_hwrt_dataset *hwrt;
+	int err;
+
+	/* Create and fill out the kernel structure */
+	hwrt = kzalloc(sizeof(*hwrt), GFP_KERNEL);
+
+	if (!hwrt)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&hwrt->ref_count);
+
+	err = hwrt_init_kernel_structure(pvr_file, args, hwrt);
+	if (err < 0)
+		goto err_free;
+
+	err = hwrt_init_common_fw_structure(pvr_file, args, hwrt);
+	if (err < 0)
+		goto err_free;
+
+	for (int i = 0; i < ARRAY_SIZE(hwrt->data); i++) {
+		err = hwrt_data_init_fw_structure(pvr_file, hwrt, args,
+						  &args->rt_data_args[i],
+						  &hwrt->data[i]);
+		if (err < 0) {
+			i--;
+			/* Destroy already created structures. */
+			for (; i >= 0; i--)
+				hwrt_data_fini_fw_structure(hwrt, i);
+			goto err_free;
+		}
+
+		hwrt->data[i].hwrt_dataset = hwrt;
+	}
+
+	return hwrt;
+
+err_free:
+	pvr_hwrt_dataset_put(hwrt);
+
+	return ERR_PTR(err);
+}
+
+static void
+pvr_hwrt_dataset_release(struct kref *ref_count)
+{
+	struct pvr_hwrt_dataset *hwrt =
+		container_of(ref_count, struct pvr_hwrt_dataset, ref_count);
+
+	for (int i = ARRAY_SIZE(hwrt->data) - 1; i >= 0; i--) {
+		WARN_ON(pvr_fw_structure_cleanup(hwrt->pvr_dev, ROGUE_FWIF_CLEANUP_HWRTDATA,
+						 hwrt->data[i].fw_obj, 0));
+		hwrt_data_fini_fw_structure(hwrt, i);
+	}
+
+	hwrt_fini_common_fw_structure(hwrt);
+	hwrt_fini_kernel_structure(hwrt);
+
+	kfree(hwrt);
+}
+
+/**
+ * pvr_destroy_hwrt_datasets_for_file: Destroy any HWRT datasets associated
+ * with the given file.
+ * @pvr_file: Pointer to pvr_file structure.
+ *
+ * Removes all HWRT datasets associated with @pvr_file from the device
+ * hwrt_dataset list and drops initial references. HWRT datasets will then be
+ * destroyed once all outstanding references are dropped.
+ */
+void pvr_destroy_hwrt_datasets_for_file(struct pvr_file *pvr_file)
+{
+	struct pvr_hwrt_dataset *hwrt;
+	unsigned long handle;
+
+	xa_for_each(&pvr_file->hwrt_handles, handle, hwrt) {
+		(void)hwrt;
+		pvr_hwrt_dataset_put(xa_erase(&pvr_file->hwrt_handles, handle));
+	}
+}
+
+/**
+ * pvr_hwrt_dataset_put() - Release reference on HWRT dataset
+ * @hwrt: Pointer to HWRT dataset to release reference on
+ */
+void
+pvr_hwrt_dataset_put(struct pvr_hwrt_dataset *hwrt)
+{
+	if (hwrt)
+		kref_put(&hwrt->ref_count, pvr_hwrt_dataset_release);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.h b/drivers/gpu/drm/imagination/pvr_hwrt.h
new file mode 100644
index 0000000000000..76992948d0472
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_hwrt.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_HWRT_H
+#define PVR_HWRT_H
+
+#include <linux/compiler_attributes.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <uapi/drm/pvr_drm.h>
+
+#include "pvr_device.h"
+#include "pvr_rogue_fwif_shared.h"
+
+/* Forward declaration from pvr_free_list.h. */
+struct pvr_free_list;
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+/**
+ * struct pvr_hwrt_data - structure representing HWRT data
+ */
+struct pvr_hwrt_data {
+	/** @fw_obj: FW object representing the FW-side structure. */
+	struct pvr_fw_object *fw_obj;
+
+	/** @data: Local copy of FW-side structure. */
+	struct rogue_fwif_hwrtdata data;
+
+	/** @freelist_node: List node connecting this HWRT to the local freelist. */
+	struct list_head freelist_node;
+
+	/**
+	 * @srtc_obj: FW object representing shadow render target cache.
+	 *
+	 * Only valid if @max_rts > 1.
+	 */
+	struct pvr_fw_object *srtc_obj;
+
+	/**
+	 * @raa_obj: FW object representing renders accumulation array.
+	 *
+	 * Only valid if @max_rts > 1.
+	 */
+	struct pvr_fw_object *raa_obj;
+
+	/** @hwrt_dataset: Back pointer to owning HWRT dataset. */
+	struct pvr_hwrt_dataset *hwrt_dataset;
+};
+
+/**
+ * struct pvr_hwrt_dataset - structure representing a HWRT data set.
+ */
+struct pvr_hwrt_dataset {
+	/** @ref_count: Reference count of object. */
+	struct kref ref_count;
+
+	/** @pvr_dev: Pointer to device that owns this object. */
+	struct pvr_device *pvr_dev;
+
+	/** @common_fw_obj: FW object representing common FW-side structure. */
+	struct pvr_fw_object *common_fw_obj;
+
+	struct rogue_fwif_hwrtdata_common common;
+
+	/** @data: HWRT data structures belonging to this set. */
+	struct pvr_hwrt_data data[ROGUE_FWIF_NUM_RTDATAS];
+
+	/** @free_lists: Free lists used by HWRT data set. */
+	struct pvr_free_list *free_lists[ROGUE_FWIF_NUM_RTDATA_FREELISTS];
+
+	/** @max_rts: Maximum render targets for this HWRT data set. */
+	u16 max_rts;
+};
+
+struct pvr_hwrt_dataset *
+pvr_hwrt_dataset_create(struct pvr_file *pvr_file,
+			struct drm_pvr_ioctl_create_hwrt_dataset_args *args);
+
+void
+pvr_destroy_hwrt_datasets_for_file(struct pvr_file *pvr_file);
+
+/**
+ * pvr_hwrt_dataset_lookup() - Lookup HWRT dataset pointer from handle
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Object handle.
+ *
+ * Takes reference on dataset object. Call pvr_hwrt_dataset_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, or is not a HWRT
+ *    dataset)
+ */
+static __always_inline struct pvr_hwrt_dataset *
+pvr_hwrt_dataset_lookup(struct pvr_file *pvr_file, u32 handle)
+{
+	struct pvr_hwrt_dataset *hwrt;
+
+	xa_lock(&pvr_file->hwrt_handles);
+	hwrt = xa_load(&pvr_file->hwrt_handles, handle);
+
+	if (hwrt)
+		kref_get(&hwrt->ref_count);
+
+	xa_unlock(&pvr_file->hwrt_handles);
+
+	return hwrt;
+}
+
+void
+pvr_hwrt_dataset_put(struct pvr_hwrt_dataset *hwrt);
+
+/**
+ * pvr_hwrt_data_lookup() - Lookup HWRT data pointer from handle and index
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Object handle.
+ * @index: Index of RT data within dataset.
+ *
+ * Takes reference on dataset object. Call pvr_hwrt_data_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, or is not a HWRT
+ *    dataset, or index is out of range)
+ */
+static __always_inline struct pvr_hwrt_data *
+pvr_hwrt_data_lookup(struct pvr_file *pvr_file, u32 handle, u32 index)
+{
+	struct pvr_hwrt_dataset *hwrt_dataset = pvr_hwrt_dataset_lookup(pvr_file, handle);
+
+	if (hwrt_dataset) {
+		if (index < ARRAY_SIZE(hwrt_dataset->data))
+			return &hwrt_dataset->data[index];
+
+		pvr_hwrt_dataset_put(hwrt_dataset);
+	}
+
+	return NULL;
+}
+
+/**
+ * pvr_hwrt_data_put() - Release reference on HWRT data
+ * @hwrt: Pointer to HWRT data to release reference on
+ */
+static __always_inline void
+pvr_hwrt_data_put(struct pvr_hwrt_data *hwrt)
+{
+	if (hwrt)
+		pvr_hwrt_dataset_put(hwrt->hwrt_dataset);
+}
+
+static __always_inline struct pvr_hwrt_data *
+pvr_hwrt_data_get(struct pvr_hwrt_data *hwrt)
+{
+	if (hwrt)
+		kref_get(&hwrt->hwrt_dataset->ref_count);
+
+	return hwrt;
+}
+
+#endif /* PVR_HWRT_H */
-- 
GitLab


From d2d79d29bb98a32c511f7339a8e93b47544fdeac Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:37 +0000
Subject: [PATCH 0412/2340] drm/imagination: Implement context
 creation/destruction ioctls

Implement ioctls for the creation and destruction of contexts. Contexts are
used for job submission and each is associated with a particular job type.

Changes since v8:
- Fixed one error path in pvr_stream_process_1()
- Corrected license identifiers

Changes since v5:
- Fix context release in final error path in pvr_context_create()

Changes since v3:
- Use drm_dev_{enter,exit}

Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Link: https://lore.kernel.org/r/ac474a1f7dda2582d290798e4837140a2989aa2a.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile          |   4 +
 drivers/gpu/drm/imagination/pvr_cccb.c        | 267 ++++++++++++++
 drivers/gpu/drm/imagination/pvr_cccb.h        | 109 ++++++
 drivers/gpu/drm/imagination/pvr_context.c     | 341 ++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_context.h     | 161 +++++++++
 drivers/gpu/drm/imagination/pvr_device.h      |  21 ++
 drivers/gpu/drm/imagination/pvr_drv.c         |  29 +-
 drivers/gpu/drm/imagination/pvr_stream.c      | 285 +++++++++++++++
 drivers/gpu/drm/imagination/pvr_stream.h      |  75 ++++
 drivers/gpu/drm/imagination/pvr_stream_defs.c | 125 +++++++
 drivers/gpu/drm/imagination/pvr_stream_defs.h |  16 +
 11 files changed, 1431 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_cccb.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_cccb.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_context.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_context.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_stream.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_stream.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_stream_defs.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_stream_defs.h

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 2bd501018d5d5..7f7bea8c60c4f 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -5,6 +5,8 @@ subdir-ccflags-y := -I$(srctree)/$(src)
 
 powervr-y := \
 	pvr_ccb.o \
+	pvr_cccb.o \
+	pvr_context.o \
 	pvr_device.o \
 	pvr_device_info.o \
 	pvr_drv.o \
@@ -18,6 +20,8 @@ powervr-y := \
 	pvr_hwrt.o \
 	pvr_mmu.o \
 	pvr_power.o \
+	pvr_stream.o \
+	pvr_stream_defs.o \
 	pvr_vm.o \
 	pvr_vm_mips.o
 
diff --git a/drivers/gpu/drm/imagination/pvr_cccb.c b/drivers/gpu/drm/imagination/pvr_cccb.c
new file mode 100644
index 0000000000000..4fabab41bea79
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_cccb.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_ccb.h"
+#include "pvr_cccb.h"
+#include "pvr_device.h"
+#include "pvr_gem.h"
+#include "pvr_hwrt.h"
+
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+static __always_inline u32
+get_ccb_space(u32 w_off, u32 r_off, u32 ccb_size)
+{
+	return (((r_off) - (w_off)) + ((ccb_size) - 1)) & ((ccb_size) - 1);
+}
+
+static void
+cccb_ctrl_init(void *cpu_ptr, void *priv)
+{
+	struct rogue_fwif_cccb_ctl *ctrl = cpu_ptr;
+	struct pvr_cccb *pvr_cccb = priv;
+
+	WRITE_ONCE(ctrl->write_offset, 0);
+	WRITE_ONCE(ctrl->read_offset, 0);
+	WRITE_ONCE(ctrl->dep_offset, 0);
+	WRITE_ONCE(ctrl->wrap_mask, pvr_cccb->wrap_mask);
+}
+
+/**
+ * pvr_cccb_init() - Initialise a Client CCB
+ * @pvr_dev: Device pointer.
+ * @pvr_cccb: Pointer to Client CCB structure to initialise.
+ * @size_log2: Log2 size of Client CCB in bytes.
+ * @name: Name of owner of Client CCB. Used for fence context.
+ *
+ * Return:
+ *  * Zero on success, or
+ *  * Any error code returned by pvr_fw_object_create_and_map().
+ */
+int
+pvr_cccb_init(struct pvr_device *pvr_dev, struct pvr_cccb *pvr_cccb,
+	      u32 size_log2, const char *name)
+{
+	size_t size = 1 << size_log2;
+	int err;
+
+	pvr_cccb->size = size;
+	pvr_cccb->write_offset = 0;
+	pvr_cccb->wrap_mask = size - 1;
+
+	/*
+	 * Map CCCB and control structure as uncached, so we don't have to flush
+	 * CPU cache repeatedly when polling for space.
+	 */
+	pvr_cccb->ctrl = pvr_fw_object_create_and_map(pvr_dev, sizeof(*pvr_cccb->ctrl),
+						      PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						      cccb_ctrl_init, pvr_cccb,
+						      &pvr_cccb->ctrl_obj);
+	if (IS_ERR(pvr_cccb->ctrl))
+		return PTR_ERR(pvr_cccb->ctrl);
+
+	pvr_cccb->cccb = pvr_fw_object_create_and_map(pvr_dev, size,
+						      PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+						      NULL, NULL, &pvr_cccb->cccb_obj);
+	if (IS_ERR(pvr_cccb->cccb)) {
+		err = PTR_ERR(pvr_cccb->cccb);
+		goto err_free_ctrl;
+	}
+
+	pvr_fw_object_get_fw_addr(pvr_cccb->ctrl_obj, &pvr_cccb->ctrl_fw_addr);
+	pvr_fw_object_get_fw_addr(pvr_cccb->cccb_obj, &pvr_cccb->cccb_fw_addr);
+
+	return 0;
+
+err_free_ctrl:
+	pvr_fw_object_unmap_and_destroy(pvr_cccb->ctrl_obj);
+
+	return err;
+}
+
+/**
+ * pvr_cccb_fini() - Release Client CCB structure
+ * @pvr_cccb: Client CCB to release.
+ */
+void
+pvr_cccb_fini(struct pvr_cccb *pvr_cccb)
+{
+	pvr_fw_object_unmap_and_destroy(pvr_cccb->cccb_obj);
+	pvr_fw_object_unmap_and_destroy(pvr_cccb->ctrl_obj);
+}
+
+/**
+ * pvr_cccb_cmdseq_fits() - Check if a command sequence fits in the CCCB
+ * @pvr_cccb: Target Client CCB.
+ * @size: Size of the command sequence.
+ *
+ * Check if a command sequence fits in the CCCB we have at hand.
+ *
+ * Return:
+ *  * true if the command sequence fits in the CCCB, or
+ *  * false otherwise.
+ */
+bool pvr_cccb_cmdseq_fits(struct pvr_cccb *pvr_cccb, size_t size)
+{
+	struct rogue_fwif_cccb_ctl *ctrl = pvr_cccb->ctrl;
+	u32 read_offset, remaining;
+	bool fits = false;
+
+	read_offset = READ_ONCE(ctrl->read_offset);
+	remaining = pvr_cccb->size - pvr_cccb->write_offset;
+
+	/* Always ensure we have enough room for a padding command at the end of the CCCB.
+	 * If our command sequence does not fit, reserve the remaining space for a padding
+	 * command.
+	 */
+	if (size + PADDING_COMMAND_SIZE > remaining)
+		size += remaining;
+
+	if (get_ccb_space(pvr_cccb->write_offset, read_offset, pvr_cccb->size) >= size)
+		fits = true;
+
+	return fits;
+}
+
+/**
+ * pvr_cccb_write_command_with_header() - Write a command + command header to a
+ *                                        Client CCB
+ * @pvr_cccb: Target Client CCB.
+ * @cmd_type: Client CCB command type. Must be one of %ROGUE_FWIF_CCB_CMD_TYPE_*.
+ * @cmd_size: Size of command in bytes.
+ * @cmd_data: Pointer to command to write.
+ * @ext_job_ref: External job reference.
+ * @int_job_ref: Internal job reference.
+ *
+ * Caller must make sure there's enough space in CCCB to queue this command. This
+ * can be done by calling pvr_cccb_cmdseq_fits().
+ *
+ * This function is not protected by any lock. The caller must ensure there's
+ * no concurrent caller, which should be guaranteed by the drm_sched model (job
+ * submission is serialized in drm_sched_main()).
+ */
+void
+pvr_cccb_write_command_with_header(struct pvr_cccb *pvr_cccb, u32 cmd_type, u32 cmd_size,
+				   void *cmd_data, u32 ext_job_ref, u32 int_job_ref)
+{
+	u32 sz_with_hdr = pvr_cccb_get_size_of_cmd_with_hdr(cmd_size);
+	struct rogue_fwif_ccb_cmd_header cmd_header = {
+		.cmd_type = cmd_type,
+		.cmd_size = ALIGN(cmd_size, 8),
+		.ext_job_ref = ext_job_ref,
+		.int_job_ref = int_job_ref,
+	};
+	struct rogue_fwif_cccb_ctl *ctrl = pvr_cccb->ctrl;
+	u32 remaining = pvr_cccb->size - pvr_cccb->write_offset;
+	u32 required_size, cccb_space, read_offset;
+
+	/*
+	 * Always ensure we have enough room for a padding command at the end of
+	 * the CCCB.
+	 */
+	if (remaining < sz_with_hdr + PADDING_COMMAND_SIZE) {
+		/*
+		 * Command would need to wrap, so we need to pad the remainder
+		 * of the CCCB.
+		 */
+		required_size = sz_with_hdr + remaining;
+	} else {
+		required_size = sz_with_hdr;
+	}
+
+	read_offset = READ_ONCE(ctrl->read_offset);
+	cccb_space = get_ccb_space(pvr_cccb->write_offset, read_offset, pvr_cccb->size);
+	if (WARN_ON(cccb_space < required_size))
+		return;
+
+	if (required_size != sz_with_hdr) {
+		/* Add padding command */
+		struct rogue_fwif_ccb_cmd_header pad_cmd = {
+			.cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_PADDING,
+			.cmd_size = remaining - sizeof(pad_cmd),
+		};
+
+		memcpy(&pvr_cccb->cccb[pvr_cccb->write_offset], &pad_cmd, sizeof(pad_cmd));
+		pvr_cccb->write_offset = 0;
+	}
+
+	memcpy(&pvr_cccb->cccb[pvr_cccb->write_offset], &cmd_header, sizeof(cmd_header));
+	memcpy(&pvr_cccb->cccb[pvr_cccb->write_offset + sizeof(cmd_header)], cmd_data, cmd_size);
+	pvr_cccb->write_offset += sz_with_hdr;
+}
+
+static void fill_cmd_kick_data(struct pvr_cccb *cccb, u32 ctx_fw_addr,
+			       struct pvr_hwrt_data *hwrt,
+			       struct rogue_fwif_kccb_cmd_kick_data *k)
+{
+	k->context_fw_addr = ctx_fw_addr;
+	k->client_woff_update = cccb->write_offset;
+	k->client_wrap_mask_update = cccb->wrap_mask;
+
+	if (hwrt) {
+		u32 cleanup_state_offset = offsetof(struct rogue_fwif_hwrtdata, cleanup_state);
+
+		pvr_fw_object_get_fw_addr_offset(hwrt->fw_obj, cleanup_state_offset,
+						 &k->cleanup_ctl_fw_addr[k->num_cleanup_ctl++]);
+	}
+}
+
+/**
+ * pvr_cccb_send_kccb_kick: Send KCCB kick to trigger command processing
+ * @pvr_dev: Device pointer.
+ * @pvr_cccb: Pointer to CCCB to process.
+ * @cctx_fw_addr: FW virtual address for context owning this Client CCB.
+ * @hwrt: HWRT data set associated with this kick. May be %NULL.
+ *
+ * You must call pvr_kccb_reserve_slot() and wait for the returned fence to
+ * signal (if this function didn't return NULL) before calling
+ * pvr_cccb_send_kccb_kick().
+ */
+void
+pvr_cccb_send_kccb_kick(struct pvr_device *pvr_dev,
+			struct pvr_cccb *pvr_cccb, u32 cctx_fw_addr,
+			struct pvr_hwrt_data *hwrt)
+{
+	struct rogue_fwif_kccb_cmd cmd_kick = {
+		.cmd_type = ROGUE_FWIF_KCCB_CMD_KICK,
+	};
+
+	fill_cmd_kick_data(pvr_cccb, cctx_fw_addr, hwrt, &cmd_kick.cmd_data.cmd_kick_data);
+
+	/* Make sure the writes to the CCCB are flushed before sending the KICK. */
+	wmb();
+
+	pvr_kccb_send_cmd_reserved_powered(pvr_dev, &cmd_kick, NULL);
+}
+
+void
+pvr_cccb_send_kccb_combined_kick(struct pvr_device *pvr_dev,
+				 struct pvr_cccb *geom_cccb,
+				 struct pvr_cccb *frag_cccb,
+				 u32 geom_ctx_fw_addr,
+				 u32 frag_ctx_fw_addr,
+				 struct pvr_hwrt_data *hwrt,
+				 bool frag_is_pr)
+{
+	struct rogue_fwif_kccb_cmd cmd_kick = {
+		.cmd_type = ROGUE_FWIF_KCCB_CMD_COMBINED_GEOM_FRAG_KICK,
+	};
+
+	fill_cmd_kick_data(geom_cccb, geom_ctx_fw_addr, hwrt,
+			   &cmd_kick.cmd_data.combined_geom_frag_cmd_kick_data.geom_cmd_kick_data);
+
+	/* If this is a partial-render job, we don't attach resources to cleanup-ctl array,
+	 * because the resources are already retained by the geometry job.
+	 */
+	fill_cmd_kick_data(frag_cccb, frag_ctx_fw_addr, frag_is_pr ? NULL : hwrt,
+			   &cmd_kick.cmd_data.combined_geom_frag_cmd_kick_data.frag_cmd_kick_data);
+
+	/* Make sure the writes to the CCCB are flushed before sending the KICK. */
+	wmb();
+
+	pvr_kccb_send_cmd_reserved_powered(pvr_dev, &cmd_kick, NULL);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_cccb.h b/drivers/gpu/drm/imagination/pvr_cccb.h
new file mode 100644
index 0000000000000..f35b3d4c9575b
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_cccb.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_CCCB_H
+#define PVR_CCCB_H
+
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_fwif_shared.h"
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#define PADDING_COMMAND_SIZE sizeof(struct rogue_fwif_ccb_cmd_header)
+
+/* Forward declaration from pvr_device.h. */
+struct pvr_device;
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+/* Forward declaration from pvr_hwrt.h. */
+struct pvr_hwrt_data;
+
+struct pvr_cccb {
+	/** @ctrl_obj: FW object representing CCCB control structure. */
+	struct pvr_fw_object *ctrl_obj;
+
+	/** @ccb_obj: FW object representing CCCB. */
+	struct pvr_fw_object *cccb_obj;
+
+	/**
+	 * @ctrl: Kernel mapping of CCCB control structure. @lock must be held
+	 *        when accessing.
+	 */
+	struct rogue_fwif_cccb_ctl *ctrl;
+
+	/** @cccb: Kernel mapping of CCCB. @lock must be held when accessing.*/
+	u8 *cccb;
+
+	/** @ctrl_fw_addr: FW virtual address of CCCB control structure. */
+	u32 ctrl_fw_addr;
+	/** @ccb_fw_addr: FW virtual address of CCCB. */
+	u32 cccb_fw_addr;
+
+	/** @size: Size of CCCB in bytes. */
+	size_t size;
+
+	/** @write_offset: CCCB write offset. */
+	u32 write_offset;
+
+	/** @wrap_mask: CCCB wrap mask. */
+	u32 wrap_mask;
+};
+
+int pvr_cccb_init(struct pvr_device *pvr_dev, struct pvr_cccb *cccb,
+		  u32 size_log2, const char *name);
+void pvr_cccb_fini(struct pvr_cccb *cccb);
+
+void pvr_cccb_write_command_with_header(struct pvr_cccb *pvr_cccb,
+					u32 cmd_type, u32 cmd_size, void *cmd_data,
+					u32 ext_job_ref, u32 int_job_ref);
+void pvr_cccb_send_kccb_kick(struct pvr_device *pvr_dev,
+			     struct pvr_cccb *pvr_cccb, u32 cctx_fw_addr,
+			     struct pvr_hwrt_data *hwrt);
+void pvr_cccb_send_kccb_combined_kick(struct pvr_device *pvr_dev,
+				      struct pvr_cccb *geom_cccb,
+				      struct pvr_cccb *frag_cccb,
+				      u32 geom_ctx_fw_addr,
+				      u32 frag_ctx_fw_addr,
+				      struct pvr_hwrt_data *hwrt,
+				      bool frag_is_pr);
+bool pvr_cccb_cmdseq_fits(struct pvr_cccb *pvr_cccb, size_t size);
+
+/**
+ * pvr_cccb_get_size_of_cmd_with_hdr() - Get the size of a command and its header.
+ * @cmd_size: Command size.
+ *
+ * Returns the size of the command and its header.
+ */
+static __always_inline u32
+pvr_cccb_get_size_of_cmd_with_hdr(u32 cmd_size)
+{
+	WARN_ON(!IS_ALIGNED(cmd_size, 8));
+	return sizeof(struct rogue_fwif_ccb_cmd_header) + ALIGN(cmd_size, 8);
+}
+
+/**
+ * pvr_cccb_cmdseq_can_fit() - Check if a command sequence can fit in the CCCB.
+ * @size: Command sequence size.
+ *
+ * Returns:
+ *  * true it the CCCB is big enough to contain a command sequence, or
+ *  * false otherwise.
+ */
+static __always_inline bool
+pvr_cccb_cmdseq_can_fit(struct pvr_cccb *pvr_cccb, size_t size)
+{
+	/* We divide the capacity by two to simplify our CCCB fencing logic:
+	 * we want to be sure that, no matter what we had queued before, we
+	 * are able to either queue our command sequence at the end or add a
+	 * padding command and queue the command sequence at the beginning
+	 * of the CCCB. If the command sequence size is bigger than half the
+	 * CCCB capacity, we'd have to queue the padding command and make sure
+	 * the FW is done processing it before queueing our command sequence.
+	 */
+	return size + PADDING_COMMAND_SIZE <= pvr_cccb->size / 2;
+}
+
+#endif /* PVR_CCCB_H */
diff --git a/drivers/gpu/drm/imagination/pvr_context.c b/drivers/gpu/drm/imagination/pvr_context.c
new file mode 100644
index 0000000000000..6cec5aa5a7598
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_context.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_cccb.h"
+#include "pvr_context.h"
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_power.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_fwif_common.h"
+#include "pvr_rogue_fwif_resetframework.h"
+#include "pvr_stream_defs.h"
+#include "pvr_vm.h"
+
+#include <drm/drm_auth.h>
+#include <drm/drm_managed.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+static int
+remap_priority(struct pvr_file *pvr_file, s32 uapi_priority,
+	       enum pvr_context_priority *priority_out)
+{
+	switch (uapi_priority) {
+	case DRM_PVR_CTX_PRIORITY_LOW:
+		*priority_out = PVR_CTX_PRIORITY_LOW;
+		break;
+	case DRM_PVR_CTX_PRIORITY_NORMAL:
+		*priority_out = PVR_CTX_PRIORITY_MEDIUM;
+		break;
+	case DRM_PVR_CTX_PRIORITY_HIGH:
+		if (!capable(CAP_SYS_NICE) && !drm_is_current_master(from_pvr_file(pvr_file)))
+			return -EACCES;
+		*priority_out = PVR_CTX_PRIORITY_HIGH;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int get_fw_obj_size(enum drm_pvr_ctx_type type)
+{
+	switch (type) {
+	case DRM_PVR_CTX_TYPE_RENDER:
+		return sizeof(struct rogue_fwif_fwrendercontext);
+	case DRM_PVR_CTX_TYPE_COMPUTE:
+		return sizeof(struct rogue_fwif_fwcomputecontext);
+	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
+		return sizeof(struct rogue_fwif_fwtransfercontext);
+	}
+
+	return -EINVAL;
+}
+
+static int
+process_static_context_state(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs,
+			     u64 stream_user_ptr, u32 stream_size, void *dest)
+{
+	void *stream;
+	int err;
+
+	stream = kzalloc(stream_size, GFP_KERNEL);
+	if (!stream)
+		return -ENOMEM;
+
+	if (copy_from_user(stream, u64_to_user_ptr(stream_user_ptr), stream_size)) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	err = pvr_stream_process(pvr_dev, cmd_defs, stream, stream_size, dest);
+	if (err)
+		goto err_free;
+
+	kfree(stream);
+
+	return 0;
+
+err_free:
+	kfree(stream);
+
+	return err;
+}
+
+static int init_render_fw_objs(struct pvr_context *ctx,
+			       struct drm_pvr_ioctl_create_context_args *args,
+			       void *fw_ctx_map)
+{
+	struct rogue_fwif_static_rendercontext_state *static_rendercontext_state;
+	struct rogue_fwif_fwrendercontext *fw_render_context = fw_ctx_map;
+
+	if (!args->static_context_state_len)
+		return -EINVAL;
+
+	static_rendercontext_state = &fw_render_context->static_render_context_state;
+
+	/* Copy static render context state from userspace. */
+	return process_static_context_state(ctx->pvr_dev,
+					    &pvr_static_render_context_state_stream,
+					    args->static_context_state,
+					    args->static_context_state_len,
+					    &static_rendercontext_state->ctxswitch_regs[0]);
+}
+
+static int init_compute_fw_objs(struct pvr_context *ctx,
+				struct drm_pvr_ioctl_create_context_args *args,
+				void *fw_ctx_map)
+{
+	struct rogue_fwif_fwcomputecontext *fw_compute_context = fw_ctx_map;
+	struct rogue_fwif_cdm_registers_cswitch *ctxswitch_regs;
+
+	if (!args->static_context_state_len)
+		return -EINVAL;
+
+	ctxswitch_regs = &fw_compute_context->static_compute_context_state.ctxswitch_regs;
+
+	/* Copy static render context state from userspace. */
+	return process_static_context_state(ctx->pvr_dev,
+					    &pvr_static_compute_context_state_stream,
+					    args->static_context_state,
+					    args->static_context_state_len,
+					    ctxswitch_regs);
+}
+
+static int init_transfer_fw_objs(struct pvr_context *ctx,
+				 struct drm_pvr_ioctl_create_context_args *args,
+				 void *fw_ctx_map)
+{
+	if (args->static_context_state_len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int init_fw_objs(struct pvr_context *ctx,
+			struct drm_pvr_ioctl_create_context_args *args,
+			void *fw_ctx_map)
+{
+	switch (ctx->type) {
+	case DRM_PVR_CTX_TYPE_RENDER:
+		return init_render_fw_objs(ctx, args, fw_ctx_map);
+	case DRM_PVR_CTX_TYPE_COMPUTE:
+		return init_compute_fw_objs(ctx, args, fw_ctx_map);
+	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
+		return init_transfer_fw_objs(ctx, args, fw_ctx_map);
+	}
+
+	return -EINVAL;
+}
+
+static void
+ctx_fw_data_init(void *cpu_ptr, void *priv)
+{
+	struct pvr_context *ctx = priv;
+
+	memcpy(cpu_ptr, ctx->data, ctx->data_size);
+}
+
+/**
+ * pvr_context_create() - Create a context.
+ * @pvr_file: File to attach the created context to.
+ * @args: Context creation arguments.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * A negative error code on failure.
+ */
+int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_context_args *args)
+{
+	struct pvr_device *pvr_dev = pvr_file->pvr_dev;
+	struct pvr_context *ctx;
+	int ctx_size;
+	int err;
+
+	/* Context creation flags are currently unused and must be zero. */
+	if (args->flags)
+		return -EINVAL;
+
+	ctx_size = get_fw_obj_size(args->type);
+	if (ctx_size < 0)
+		return ctx_size;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->data_size = ctx_size;
+	ctx->type = args->type;
+	ctx->flags = args->flags;
+	ctx->pvr_dev = pvr_dev;
+	kref_init(&ctx->ref_count);
+
+	err = remap_priority(pvr_file, args->priority, &ctx->priority);
+	if (err)
+		goto err_free_ctx;
+
+	ctx->vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+	if (IS_ERR(ctx->vm_ctx)) {
+		err = PTR_ERR(ctx->vm_ctx);
+		goto err_free_ctx;
+	}
+
+	ctx->data = kzalloc(ctx_size, GFP_KERNEL);
+	if (!ctx->data) {
+		err = -ENOMEM;
+		goto err_put_vm;
+	}
+
+	err = init_fw_objs(ctx, args, ctx->data);
+	if (err)
+		goto err_free_ctx_data;
+
+	err = pvr_fw_object_create(pvr_dev, ctx_size, PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   ctx_fw_data_init, ctx, &ctx->fw_obj);
+	if (err)
+		goto err_free_ctx_data;
+
+	err = xa_alloc(&pvr_dev->ctx_ids, &ctx->ctx_id, ctx, xa_limit_32b, GFP_KERNEL);
+	if (err)
+		goto err_destroy_fw_obj;
+
+	err = xa_alloc(&pvr_file->ctx_handles, &args->handle, ctx, xa_limit_32b, GFP_KERNEL);
+	if (err) {
+		/*
+		 * It's possible that another thread could have taken a reference on the context at
+		 * this point as it is in the ctx_ids xarray. Therefore instead of directly
+		 * destroying the context, drop a reference instead.
+		 */
+		pvr_context_put(ctx);
+		return err;
+	}
+
+	return 0;
+
+err_destroy_fw_obj:
+	pvr_fw_object_destroy(ctx->fw_obj);
+
+err_free_ctx_data:
+	kfree(ctx->data);
+
+err_put_vm:
+	pvr_vm_context_put(ctx->vm_ctx);
+
+err_free_ctx:
+	kfree(ctx);
+	return err;
+}
+
+static void
+pvr_context_release(struct kref *ref_count)
+{
+	struct pvr_context *ctx =
+		container_of(ref_count, struct pvr_context, ref_count);
+	struct pvr_device *pvr_dev = ctx->pvr_dev;
+
+	xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id);
+	pvr_fw_object_destroy(ctx->fw_obj);
+	kfree(ctx->data);
+	pvr_vm_context_put(ctx->vm_ctx);
+	kfree(ctx);
+}
+
+/**
+ * pvr_context_put() - Release reference on context
+ * @ctx: Target context.
+ */
+void
+pvr_context_put(struct pvr_context *ctx)
+{
+	if (ctx)
+		kref_put(&ctx->ref_count, pvr_context_release);
+}
+
+/**
+ * pvr_context_destroy() - Destroy context
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Userspace context handle.
+ *
+ * Removes context from context list and drops initial reference. Context will
+ * then be destroyed once all outstanding references are dropped.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EINVAL if context not in context list.
+ */
+int
+pvr_context_destroy(struct pvr_file *pvr_file, u32 handle)
+{
+	struct pvr_context *ctx = xa_erase(&pvr_file->ctx_handles, handle);
+
+	if (!ctx)
+		return -EINVAL;
+
+	/* Release the reference held by the handle set. */
+	pvr_context_put(ctx);
+
+	return 0;
+}
+
+/**
+ * pvr_destroy_contexts_for_file: Destroy any contexts associated with the given file
+ * @pvr_file: Pointer to pvr_file structure.
+ *
+ * Removes all contexts associated with @pvr_file from the device context list and drops initial
+ * references. Contexts will then be destroyed once all outstanding references are dropped.
+ */
+void pvr_destroy_contexts_for_file(struct pvr_file *pvr_file)
+{
+	struct pvr_context *ctx;
+	unsigned long handle;
+
+	xa_for_each(&pvr_file->ctx_handles, handle, ctx)
+		pvr_context_destroy(pvr_file, handle);
+}
+
+/**
+ * pvr_context_device_init() - Device level initialization for queue related resources.
+ * @pvr_dev: The device to initialize.
+ */
+void pvr_context_device_init(struct pvr_device *pvr_dev)
+{
+	xa_init_flags(&pvr_dev->ctx_ids, XA_FLAGS_ALLOC1);
+}
+
+/**
+ * pvr_context_device_fini() - Device level cleanup for queue related resources.
+ * @pvr_dev: The device to cleanup.
+ */
+void pvr_context_device_fini(struct pvr_device *pvr_dev)
+{
+	WARN_ON(!xa_empty(&pvr_dev->ctx_ids));
+	xa_destroy(&pvr_dev->ctx_ids);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_context.h b/drivers/gpu/drm/imagination/pvr_context.h
new file mode 100644
index 0000000000000..502dfad113e44
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_context.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_CONTEXT_H
+#define PVR_CONTEXT_H
+
+#include <drm/gpu_scheduler.h>
+
+#include <linux/compiler_attributes.h>
+#include <linux/dma-fence.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <uapi/drm/pvr_drm.h>
+
+#include "pvr_cccb.h"
+#include "pvr_device.h"
+
+/* Forward declaration from pvr_gem.h. */
+struct pvr_fw_object;
+
+enum pvr_context_priority {
+	PVR_CTX_PRIORITY_LOW = 0,
+	PVR_CTX_PRIORITY_MEDIUM,
+	PVR_CTX_PRIORITY_HIGH,
+};
+
+/**
+ * struct pvr_context - Context data
+ */
+struct pvr_context {
+	/** @ref_count: Refcount for context. */
+	struct kref ref_count;
+
+	/** @pvr_dev: Pointer to owning device. */
+	struct pvr_device *pvr_dev;
+
+	/** @vm_ctx: Pointer to associated VM context. */
+	struct pvr_vm_context *vm_ctx;
+
+	/** @type: Type of context. */
+	enum drm_pvr_ctx_type type;
+
+	/** @flags: Context flags. */
+	u32 flags;
+
+	/** @priority: Context priority*/
+	enum pvr_context_priority priority;
+
+	/** @fw_obj: FW object representing FW-side context data. */
+	struct pvr_fw_object *fw_obj;
+
+	/** @data: Pointer to local copy of FW context data. */
+	void *data;
+
+	/** @data_size: Size of FW context data, in bytes. */
+	u32 data_size;
+
+	/** @ctx_id: FW context ID. */
+	u32 ctx_id;
+};
+
+/**
+ * pvr_context_get() - Take additional reference on context.
+ * @ctx: Context pointer.
+ *
+ * Call pvr_context_put() to release.
+ *
+ * Returns:
+ *  * The requested context on success, or
+ *  * %NULL if no context pointer passed.
+ */
+static __always_inline struct pvr_context *
+pvr_context_get(struct pvr_context *ctx)
+{
+	if (ctx)
+		kref_get(&ctx->ref_count);
+
+	return ctx;
+}
+
+/**
+ * pvr_context_lookup() - Lookup context pointer from handle and file.
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Context handle.
+ *
+ * Takes reference on context. Call pvr_context_put() to release.
+ *
+ * Return:
+ *  * The requested context on success, or
+ *  * %NULL on failure (context does not exist, or does not belong to @pvr_file).
+ */
+static __always_inline struct pvr_context *
+pvr_context_lookup(struct pvr_file *pvr_file, u32 handle)
+{
+	struct pvr_context *ctx;
+
+	/* Take the array lock to protect against context removal.  */
+	xa_lock(&pvr_file->ctx_handles);
+	ctx = pvr_context_get(xa_load(&pvr_file->ctx_handles, handle));
+	xa_unlock(&pvr_file->ctx_handles);
+
+	return ctx;
+}
+
+/**
+ * pvr_context_lookup_id() - Lookup context pointer from ID.
+ * @pvr_dev: Device pointer.
+ * @id: FW context ID.
+ *
+ * Takes reference on context. Call pvr_context_put() to release.
+ *
+ * Return:
+ *  * The requested context on success, or
+ *  * %NULL on failure (context does not exist).
+ */
+static __always_inline struct pvr_context *
+pvr_context_lookup_id(struct pvr_device *pvr_dev, u32 id)
+{
+	struct pvr_context *ctx;
+
+	/* Take the array lock to protect against context removal.  */
+	xa_lock(&pvr_dev->ctx_ids);
+
+	/* Contexts are removed from the ctx_ids set in the context release path,
+	 * meaning the ref_count reached zero before they get removed. We need
+	 * to make sure we're not trying to acquire a context that's being
+	 * destroyed.
+	 */
+	ctx = xa_load(&pvr_dev->ctx_ids, id);
+	if (!kref_get_unless_zero(&ctx->ref_count))
+		ctx = NULL;
+
+	xa_unlock(&pvr_dev->ctx_ids);
+
+	return ctx;
+}
+
+static __always_inline u32
+pvr_context_get_fw_addr(struct pvr_context *ctx)
+{
+	u32 ctx_fw_addr = 0;
+
+	pvr_fw_object_get_fw_addr(ctx->fw_obj, &ctx_fw_addr);
+
+	return ctx_fw_addr;
+}
+
+void pvr_context_put(struct pvr_context *ctx);
+
+int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_context_args *args);
+
+int pvr_context_destroy(struct pvr_file *pvr_file, u32 handle);
+
+void pvr_destroy_contexts_for_file(struct pvr_file *pvr_file);
+
+void pvr_context_device_init(struct pvr_device *pvr_dev);
+
+void pvr_context_device_fini(struct pvr_device *pvr_dev);
+
+#endif /* PVR_CONTEXT_H */
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index f5b82b7935665..299f3e022c826 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -7,6 +7,8 @@
 #include "pvr_ccb.h"
 #include "pvr_device_info.h"
 #include "pvr_fw.h"
+#include "pvr_rogue_fwif_stream.h"
+#include "pvr_stream.h"
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
@@ -146,12 +148,23 @@ struct pvr_device {
 	/** @fw_dev: Firmware related data. */
 	struct pvr_fw_device fw_dev;
 
+	/** @stream_musthave_quirks: Bit array of "must-have" quirks for stream commands. */
+	u32 stream_musthave_quirks[PVR_STREAM_TYPE_MAX][PVR_STREAM_EXTHDR_TYPE_MAX];
+
 	/**
 	 * @mmu_flush_cache_flags: Records which MMU caches require flushing
 	 * before submitting the next job.
 	 */
 	atomic_t mmu_flush_cache_flags;
 
+	/**
+	 * @ctx_ids: Array of contexts belonging to this device. Array members
+	 *           are of type "struct pvr_context *".
+	 *
+	 * This array is used to allocate IDs used by the firmware.
+	 */
+	struct xarray ctx_ids;
+
 	/**
 	 * @free_list_ids: Array of free lists belonging to this device. Array members
 	 *                 are of type "struct pvr_free_list *".
@@ -255,6 +268,14 @@ struct pvr_file {
 	 */
 	struct pvr_device *pvr_dev;
 
+	/**
+	 * @ctx_handles: Array of contexts belonging to this file. Array members
+	 *               are of type "struct pvr_context *".
+	 *
+	 * This array is used to allocate handles returned to userspace.
+	 */
+	struct xarray ctx_handles;
+
 	/**
 	 * @free_list_handles: Array of free lists belonging to this file. Array
 	 * members are of type "struct pvr_free_list *".
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index f485e2cc60f96..e76363f7dbf78 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only OR MIT
 /* Copyright (c) 2023 Imagination Technologies Ltd. */
 
+#include "pvr_context.h"
 #include "pvr_device.h"
 #include "pvr_drv.h"
 #include "pvr_free_list.h"
@@ -673,7 +674,19 @@ static int
 pvr_ioctl_create_context(struct drm_device *drm_dev, void *raw_args,
 			 struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_create_context_args *args = raw_args;
+	struct pvr_file *pvr_file = file->driver_priv;
+	int idx;
+	int ret;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	ret = pvr_context_create(pvr_file, args);
+
+	drm_dev_exit(idx);
+
+	return ret;
 }
 
 /**
@@ -693,7 +706,13 @@ static int
 pvr_ioctl_destroy_context(struct drm_device *drm_dev, void *raw_args,
 			  struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_destroy_context_args *args = raw_args;
+	struct pvr_file *pvr_file = file->driver_priv;
+
+	if (args->_padding_4)
+		return -EINVAL;
+
+	return pvr_context_destroy(pvr_file, args->handle);
 }
 
 /**
@@ -1289,6 +1308,7 @@ pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file)
 	 */
 	pvr_file->pvr_dev = pvr_dev;
 
+	xa_init_flags(&pvr_file->ctx_handles, XA_FLAGS_ALLOC1);
 	xa_init_flags(&pvr_file->free_list_handles, XA_FLAGS_ALLOC1);
 	xa_init_flags(&pvr_file->hwrt_handles, XA_FLAGS_ALLOC1);
 	xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1);
@@ -1318,6 +1338,9 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 {
 	struct pvr_file *pvr_file = to_pvr_file(file);
 
+	/* Kill remaining contexts. */
+	pvr_destroy_contexts_for_file(pvr_file);
+
 	/* Drop references on any remaining objects. */
 	pvr_destroy_free_lists_for_file(pvr_file);
 	pvr_destroy_hwrt_datasets_for_file(pvr_file);
@@ -1363,6 +1386,7 @@ pvr_probe(struct platform_device *plat_dev)
 	drm_dev = &pvr_dev->base;
 
 	platform_set_drvdata(plat_dev, drm_dev);
+	pvr_context_device_init(pvr_dev);
 
 	devm_pm_runtime_enable(&plat_dev->dev);
 	pm_runtime_mark_last_busy(&plat_dev->dev);
@@ -1406,6 +1430,7 @@ pvr_remove(struct platform_device *plat_dev)
 	pvr_device_fini(pvr_dev);
 	drm_dev_unplug(drm_dev);
 	pvr_watchdog_fini(pvr_dev);
+	pvr_context_device_fini(pvr_dev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/imagination/pvr_stream.c b/drivers/gpu/drm/imagination/pvr_stream.c
new file mode 100644
index 0000000000000..975336a4facfd
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_stream.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_rogue_fwif_stream.h"
+#include "pvr_stream.h"
+
+#include <linux/align.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <uapi/drm/pvr_drm.h>
+
+static __always_inline bool
+stream_def_is_supported(struct pvr_device *pvr_dev, const struct pvr_stream_def *stream_def)
+{
+	if (stream_def->feature == PVR_FEATURE_NONE)
+		return true;
+
+	if (!(stream_def->feature & PVR_FEATURE_NOT) &&
+	    pvr_device_has_feature(pvr_dev, stream_def->feature)) {
+		return true;
+	}
+
+	if ((stream_def->feature & PVR_FEATURE_NOT) &&
+	    !pvr_device_has_feature(pvr_dev, stream_def->feature & ~PVR_FEATURE_NOT)) {
+		return true;
+	}
+
+	return false;
+}
+
+static int
+pvr_stream_get_data(u8 *stream, u32 *stream_offset, u32 stream_size, u32 data_size, u32 align_size,
+		    void *dest)
+{
+	*stream_offset = ALIGN(*stream_offset, align_size);
+
+	if ((*stream_offset + data_size) > stream_size)
+		return -EINVAL;
+
+	memcpy(dest, stream + *stream_offset, data_size);
+
+	(*stream_offset) += data_size;
+
+	return 0;
+}
+
+/**
+ * pvr_stream_process_1() - Process a single stream and fill destination structure
+ * @pvr_dev: Device pointer.
+ * @stream_def: Stream definition.
+ * @nr_entries: Number of entries in &stream_def.
+ * @stream: Pointer to stream.
+ * @stream_offset: Starting offset within stream.
+ * @stream_size: Size of input stream, in bytes.
+ * @dest: Pointer to destination structure.
+ * @dest_size: Size of destination structure.
+ * @stream_offset_out: Pointer to variable to write updated stream offset to. May be NULL.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL on malformed stream.
+ */
+static int
+pvr_stream_process_1(struct pvr_device *pvr_dev, const struct pvr_stream_def *stream_def,
+		     u32 nr_entries, u8 *stream, u32 stream_offset, u32 stream_size,
+		     u8 *dest, u32 dest_size, u32 *stream_offset_out)
+{
+	int err = 0;
+	u32 i;
+
+	for (i = 0; i < nr_entries; i++) {
+		if (stream_def[i].offset >= dest_size) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (!stream_def_is_supported(pvr_dev, &stream_def[i]))
+			continue;
+
+		switch (stream_def[i].size) {
+		case PVR_STREAM_SIZE_8:
+			err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u8),
+						  sizeof(u8), dest + stream_def[i].offset);
+			if (err)
+				return err;
+			break;
+
+		case PVR_STREAM_SIZE_16:
+			err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u16),
+						  sizeof(u16), dest + stream_def[i].offset);
+			if (err)
+				return err;
+			break;
+
+		case PVR_STREAM_SIZE_32:
+			err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u32),
+						  sizeof(u32), dest + stream_def[i].offset);
+			if (err)
+				return err;
+			break;
+
+		case PVR_STREAM_SIZE_64:
+			err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u64),
+						  sizeof(u64), dest + stream_def[i].offset);
+			if (err)
+				return err;
+			break;
+
+		case PVR_STREAM_SIZE_ARRAY:
+			err = pvr_stream_get_data(stream, &stream_offset, stream_size,
+						  stream_def[i].array_size, sizeof(u64),
+						  dest + stream_def[i].offset);
+			if (err)
+				return err;
+			break;
+		}
+	}
+
+	if (stream_offset_out)
+		*stream_offset_out = stream_offset;
+
+	return err;
+}
+
+static int
+pvr_stream_process_ext_stream(struct pvr_device *pvr_dev,
+			      const struct pvr_stream_cmd_defs *cmd_defs, void *ext_stream,
+			      u32 stream_offset, u32 ext_stream_size, void *dest)
+{
+	u32 musthave_masks[PVR_STREAM_EXTHDR_TYPE_MAX];
+	u32 ext_header;
+	int err = 0;
+	u32 i;
+
+	/* Copy "must have" mask from device. We clear this as we process the stream. */
+	memcpy(musthave_masks, pvr_dev->stream_musthave_quirks[cmd_defs->type],
+	       sizeof(musthave_masks));
+
+	do {
+		const struct pvr_stream_ext_header *header;
+		u32 type;
+		u32 data;
+
+		err = pvr_stream_get_data(ext_stream, &stream_offset, ext_stream_size, sizeof(u32),
+					  sizeof(ext_header), &ext_header);
+		if (err)
+			return err;
+
+		type = (ext_header & PVR_STREAM_EXTHDR_TYPE_MASK) >> PVR_STREAM_EXTHDR_TYPE_SHIFT;
+		data = ext_header & PVR_STREAM_EXTHDR_DATA_MASK;
+
+		if (type >= cmd_defs->ext_nr_headers)
+			return -EINVAL;
+
+		header = &cmd_defs->ext_headers[type];
+		if (data & ~header->valid_mask)
+			return -EINVAL;
+
+		musthave_masks[type] &= ~data;
+
+		for (i = 0; i < header->ext_streams_num; i++) {
+			const struct pvr_stream_ext_def *ext_def = &header->ext_streams[i];
+
+			if (!(ext_header & ext_def->header_mask))
+				continue;
+
+			if (!pvr_device_has_uapi_quirk(pvr_dev, ext_def->quirk))
+				return -EINVAL;
+
+			err = pvr_stream_process_1(pvr_dev, ext_def->stream, ext_def->stream_len,
+						   ext_stream, stream_offset,
+						   ext_stream_size, dest,
+						   cmd_defs->dest_size, &stream_offset);
+			if (err)
+				return err;
+		}
+	} while (ext_header & PVR_STREAM_EXTHDR_CONTINUATION);
+
+	/*
+	 * Verify that "must have" mask is now zero. If it isn't then one of the "must have" quirks
+	 * for this command was not present.
+	 */
+	for (i = 0; i < cmd_defs->ext_nr_headers; i++) {
+		if (musthave_masks[i])
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_stream_process() - Build FW structure from stream
+ * @pvr_dev: Device pointer.
+ * @cmd_defs: Stream definition.
+ * @stream: Pointer to command stream.
+ * @stream_size: Size of command stream, in bytes.
+ * @dest_out: Pointer to destination buffer.
+ *
+ * Caller is responsible for freeing the output structure.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -%ENOMEM on out of memory, or
+ *  * -%EINVAL on malformed stream.
+ */
+int
+pvr_stream_process(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs,
+		   void *stream, u32 stream_size, void *dest_out)
+{
+	u32 stream_offset = 0;
+	u32 main_stream_len;
+	u32 padding;
+	int err;
+
+	if (!stream || !stream_size)
+		return -EINVAL;
+
+	err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u32),
+				  sizeof(u32), &main_stream_len);
+	if (err)
+		return err;
+
+	/*
+	 * u32 after stream length is padding to ensure u64 alignment, but may be used for expansion
+	 * in the future. Verify it's zero.
+	 */
+	err = pvr_stream_get_data(stream, &stream_offset, stream_size, sizeof(u32),
+				  sizeof(u32), &padding);
+	if (err)
+		return err;
+
+	if (main_stream_len < stream_offset || main_stream_len > stream_size || padding)
+		return -EINVAL;
+
+	err = pvr_stream_process_1(pvr_dev, cmd_defs->main_stream, cmd_defs->main_stream_len,
+				   stream, stream_offset, main_stream_len, dest_out,
+				   cmd_defs->dest_size, &stream_offset);
+	if (err)
+		return err;
+
+	if (stream_offset < stream_size) {
+		err = pvr_stream_process_ext_stream(pvr_dev, cmd_defs, stream, stream_offset,
+						    stream_size, dest_out);
+		if (err)
+			return err;
+	} else {
+		u32 i;
+
+		/*
+		 * If we don't have an extension stream then there must not be any "must have"
+		 * quirks for this command.
+		 */
+		for (i = 0; i < cmd_defs->ext_nr_headers; i++) {
+			if (pvr_dev->stream_musthave_quirks[cmd_defs->type][i])
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_stream_create_musthave_masks() - Create "must have" masks for streams based on current device
+ *                                      quirks
+ * @pvr_dev: Device pointer.
+ */
+void
+pvr_stream_create_musthave_masks(struct pvr_device *pvr_dev)
+{
+	memset(pvr_dev->stream_musthave_quirks, 0, sizeof(pvr_dev->stream_musthave_quirks));
+
+	if (pvr_device_has_uapi_quirk(pvr_dev, 47217))
+		pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_FRAG][0] |=
+			PVR_STREAM_EXTHDR_FRAG0_BRN47217;
+
+	if (pvr_device_has_uapi_quirk(pvr_dev, 49927)) {
+		pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_GEOM][0] |=
+			PVR_STREAM_EXTHDR_GEOM0_BRN49927;
+		pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_FRAG][0] |=
+			PVR_STREAM_EXTHDR_FRAG0_BRN49927;
+		pvr_dev->stream_musthave_quirks[PVR_STREAM_TYPE_COMPUTE][0] |=
+			PVR_STREAM_EXTHDR_COMPUTE0_BRN49927;
+	}
+}
diff --git a/drivers/gpu/drm/imagination/pvr_stream.h b/drivers/gpu/drm/imagination/pvr_stream.h
new file mode 100644
index 0000000000000..d92acb3a61d7f
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_stream.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_STREAM_H
+#define PVR_STREAM_H
+
+#include <linux/bits.h>
+#include <linux/limits.h>
+#include <linux/types.h>
+
+struct pvr_device;
+
+struct pvr_job;
+
+enum pvr_stream_type {
+	PVR_STREAM_TYPE_GEOM = 0,
+	PVR_STREAM_TYPE_FRAG,
+	PVR_STREAM_TYPE_COMPUTE,
+	PVR_STREAM_TYPE_TRANSFER,
+	PVR_STREAM_TYPE_STATIC_RENDER_CONTEXT,
+	PVR_STREAM_TYPE_STATIC_COMPUTE_CONTEXT,
+
+	PVR_STREAM_TYPE_MAX
+};
+
+enum pvr_stream_size {
+	PVR_STREAM_SIZE_8 = 0,
+	PVR_STREAM_SIZE_16,
+	PVR_STREAM_SIZE_32,
+	PVR_STREAM_SIZE_64,
+	PVR_STREAM_SIZE_ARRAY,
+};
+
+#define PVR_FEATURE_NOT  BIT(31)
+#define PVR_FEATURE_NONE U32_MAX
+
+struct pvr_stream_def {
+	u32 offset;
+	enum pvr_stream_size size;
+	u32 array_size;
+	u32 feature;
+};
+
+struct pvr_stream_ext_def {
+	const struct pvr_stream_def *stream;
+	u32 stream_len;
+	u32 header_mask;
+	u32 quirk;
+};
+
+struct pvr_stream_ext_header {
+	const struct pvr_stream_ext_def *ext_streams;
+	u32 ext_streams_num;
+	u32 valid_mask;
+};
+
+struct pvr_stream_cmd_defs {
+	enum pvr_stream_type type;
+
+	const struct pvr_stream_def *main_stream;
+	u32 main_stream_len;
+
+	u32 ext_nr_headers;
+	const struct pvr_stream_ext_header *ext_headers;
+
+	size_t dest_size;
+};
+
+int
+pvr_stream_process(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs,
+		   void *stream, u32 stream_size, void *dest_out);
+void
+pvr_stream_create_musthave_masks(struct pvr_device *pvr_dev);
+
+#endif /* PVR_STREAM_H */
diff --git a/drivers/gpu/drm/imagination/pvr_stream_defs.c b/drivers/gpu/drm/imagination/pvr_stream_defs.c
new file mode 100644
index 0000000000000..59265a7accce7
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_stream_defs.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device_info.h"
+#include "pvr_rogue_fwif_client.h"
+#include "pvr_rogue_fwif_stream.h"
+#include "pvr_stream.h"
+#include "pvr_stream_defs.h"
+
+#include <linux/stddef.h>
+#include <uapi/drm/pvr_drm.h>
+
+#define PVR_STREAM_DEF_SET(owner, member, _size, _array_size, _feature) \
+	{ .offset = offsetof(struct owner, member), \
+	  .size = (_size),  \
+	  .array_size = (_array_size), \
+	  .feature = (_feature) }
+
+#define PVR_STREAM_DEF(owner, member, member_size)  \
+	PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ ## member_size, 0, PVR_FEATURE_NONE)
+
+#define PVR_STREAM_DEF_FEATURE(owner, member, member_size, feature) \
+	PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ ## member_size, 0, feature)
+
+#define PVR_STREAM_DEF_NOT_FEATURE(owner, member, member_size, feature)       \
+	PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ ## member_size, 0, \
+			   (feature) | PVR_FEATURE_NOT)
+
+#define PVR_STREAM_DEF_ARRAY(owner, member)                                       \
+	PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ARRAY,                  \
+			   sizeof(((struct owner *)0)->member), PVR_FEATURE_NONE)
+
+#define PVR_STREAM_DEF_ARRAY_FEATURE(owner, member, feature)            \
+	PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ARRAY,         \
+			   sizeof(((struct owner *)0)->member), feature)
+
+#define PVR_STREAM_DEF_ARRAY_NOT_FEATURE(owner, member, feature)                             \
+	PVR_STREAM_DEF_SET(owner, member, PVR_STREAM_SIZE_ARRAY,                             \
+			   sizeof(((struct owner *)0)->member), (feature) | PVR_FEATURE_NOT)
+
+/*
+ * When adding new parameters to the stream definition, the new parameters must go after the
+ * existing parameters, to preserve order. As parameters are naturally aligned, care must be taken
+ * with respect to implicit padding in the stream; padding should be minimised as much as possible.
+ */
+static const struct pvr_stream_def rogue_fwif_static_render_context_state_stream[] = {
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_reg_vdm_context_state_base_addr, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_reg_vdm_context_state_resume_addr, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_reg_ta_context_state_base_addr, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_store_task0, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_store_task1, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_store_task2, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_store_task3, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_store_task4, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_resume_task0, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_resume_task1, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_resume_task2, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_resume_task3, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[0].geom_reg_vdm_context_resume_task4, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_store_task0, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_store_task1, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_store_task2, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_store_task3, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_store_task4, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_resume_task0, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_resume_task1, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_resume_task2, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_resume_task3, 64),
+	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
+		       geom_state[1].geom_reg_vdm_context_resume_task4, 64),
+};
+
+const struct pvr_stream_cmd_defs pvr_static_render_context_state_stream = {
+	.type = PVR_STREAM_TYPE_STATIC_RENDER_CONTEXT,
+
+	.main_stream = rogue_fwif_static_render_context_state_stream,
+	.main_stream_len = ARRAY_SIZE(rogue_fwif_static_render_context_state_stream),
+
+	.ext_nr_headers = 0,
+
+	.dest_size = sizeof(struct rogue_fwif_geom_registers_caswitch),
+};
+
+static const struct pvr_stream_def rogue_fwif_static_compute_context_state_stream[] = {
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0, 64),
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds1, 64),
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds, 64),
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_terminate_pds1, 64),
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0, 64),
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_context_pds0_b, 64),
+	PVR_STREAM_DEF(rogue_fwif_cdm_registers_cswitch, cdmreg_cdm_resume_pds0_b, 64),
+};
+
+const struct pvr_stream_cmd_defs pvr_static_compute_context_state_stream = {
+	.type = PVR_STREAM_TYPE_STATIC_COMPUTE_CONTEXT,
+
+	.main_stream = rogue_fwif_static_compute_context_state_stream,
+	.main_stream_len = ARRAY_SIZE(rogue_fwif_static_compute_context_state_stream),
+
+	.ext_nr_headers = 0,
+
+	.dest_size = sizeof(struct rogue_fwif_cdm_registers_cswitch),
+};
diff --git a/drivers/gpu/drm/imagination/pvr_stream_defs.h b/drivers/gpu/drm/imagination/pvr_stream_defs.h
new file mode 100644
index 0000000000000..f33b821658334
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_stream_defs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_STREAM_DEFS_H
+#define PVR_STREAM_DEFS_H
+
+#include "pvr_stream.h"
+
+extern const struct pvr_stream_cmd_defs pvr_cmd_geom_stream;
+extern const struct pvr_stream_cmd_defs pvr_cmd_frag_stream;
+extern const struct pvr_stream_cmd_defs pvr_cmd_compute_stream;
+extern const struct pvr_stream_cmd_defs pvr_cmd_transfer_stream;
+extern const struct pvr_stream_cmd_defs pvr_static_render_context_state_stream;
+extern const struct pvr_stream_cmd_defs pvr_static_compute_context_state_stream;
+
+#endif /* PVR_STREAM_DEFS_H */
-- 
GitLab


From eaf01ee5ba28b97f96a3d3eec4c5fbfb37ee4cde Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:38 +0000
Subject: [PATCH 0413/2340] drm/imagination: Implement job submission and
 scheduling

Implement job submission ioctl. Job scheduling is implemented using
drm_sched.

Jobs are submitted in a stream format. This is intended to allow the UAPI
data format to be independent of the actual FWIF structures in use, which
vary depending on the GPU in use.

The stream formats are documented at:
https://gitlab.freedesktop.org/mesa/mesa/-/blob/f8d2b42ae65c2f16f36a43e0ae39d288431e4263/src/imagination/csbgen/rogue_kmd_stream.xml

Changes since v8:
- Updated for upstreamed DRM scheduler changes
- Removed workaround code for the pending_list previously being updated
  after run_job() returned
- Fixed null deref in pvr_queue_cleanup_fw_context() for bad stream ptr
  given to create_context ioctl
- Corrected license identifiers

Changes since v7:
- Updated for v8 "DRM scheduler changes for XE" patchset

Changes since v6:
- Fix fence handling in pvr_sync_signal_array_add()
- Add handling for SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE flag
- Fix missing dma_resv locking in job submit path

Changes since v5:
- Fix leak in job creation error path

Changes since v4:
- Use a regular workqueue for job scheduling

Changes since v3:
- Support partial render jobs
- Add job timeout handler
- Split sync handling out of job code
- Use drm_dev_{enter,exit}

Changes since v2:
- Use drm_sched for job scheduling

Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/c98dab7a5f5fb891fbed7e4990d19b5d13964365.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Kconfig           |    1 +
 drivers/gpu/drm/imagination/Makefile          |    3 +
 drivers/gpu/drm/imagination/pvr_context.c     |  125 +-
 drivers/gpu/drm/imagination/pvr_context.h     |   44 +
 drivers/gpu/drm/imagination/pvr_device.c      |   31 +
 drivers/gpu/drm/imagination/pvr_device.h      |   21 +
 drivers/gpu/drm/imagination/pvr_drv.c         |   40 +-
 drivers/gpu/drm/imagination/pvr_job.c         |  788 +++++++++
 drivers/gpu/drm/imagination/pvr_job.h         |  161 ++
 drivers/gpu/drm/imagination/pvr_power.c       |   28 +
 drivers/gpu/drm/imagination/pvr_queue.c       | 1432 +++++++++++++++++
 drivers/gpu/drm/imagination/pvr_queue.h       |  169 ++
 drivers/gpu/drm/imagination/pvr_stream_defs.c |  226 +++
 drivers/gpu/drm/imagination/pvr_sync.c        |  289 ++++
 drivers/gpu/drm/imagination/pvr_sync.h        |   84 +
 15 files changed, 3438 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/imagination/pvr_job.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_job.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_queue.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_queue.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_sync.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_sync.h

diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig
index 0abd1b9bf3be3..3bfa2ac212dcc 100644
--- a/drivers/gpu/drm/imagination/Kconfig
+++ b/drivers/gpu/drm/imagination/Kconfig
@@ -6,6 +6,7 @@ config DRM_POWERVR
 	depends on ARM64
 	depends on DRM
 	depends on PM
+	select DRM_EXEC
 	select DRM_GEM_SHMEM_HELPER
 	select DRM_SCHED
 	select DRM_GPUVM
diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 7f7bea8c60c4f..7e3515c99caa9 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -18,10 +18,13 @@ powervr-y := \
 	pvr_fw_trace.o \
 	pvr_gem.o \
 	pvr_hwrt.o \
+	pvr_job.o \
 	pvr_mmu.o \
 	pvr_power.o \
+	pvr_queue.o \
 	pvr_stream.o \
 	pvr_stream_defs.o \
+	pvr_sync.o \
 	pvr_vm.o \
 	pvr_vm_mips.o
 
diff --git a/drivers/gpu/drm/imagination/pvr_context.c b/drivers/gpu/drm/imagination/pvr_context.c
index 6cec5aa5a7598..eded5e955cc0a 100644
--- a/drivers/gpu/drm/imagination/pvr_context.c
+++ b/drivers/gpu/drm/imagination/pvr_context.c
@@ -6,10 +6,12 @@
 #include "pvr_device.h"
 #include "pvr_drv.h"
 #include "pvr_gem.h"
+#include "pvr_job.h"
 #include "pvr_power.h"
 #include "pvr_rogue_fwif.h"
 #include "pvr_rogue_fwif_common.h"
 #include "pvr_rogue_fwif_resetframework.h"
+#include "pvr_stream.h"
 #include "pvr_stream_defs.h"
 #include "pvr_vm.h"
 
@@ -164,6 +166,116 @@ ctx_fw_data_init(void *cpu_ptr, void *priv)
 	memcpy(cpu_ptr, ctx->data, ctx->data_size);
 }
 
+/**
+ * pvr_context_destroy_queues() - Destroy all queues attached to a context.
+ * @ctx: Context to destroy queues on.
+ *
+ * Should be called when the last reference to a context object is dropped.
+ * It releases all resources attached to the queues bound to this context.
+ */
+static void pvr_context_destroy_queues(struct pvr_context *ctx)
+{
+	switch (ctx->type) {
+	case DRM_PVR_CTX_TYPE_RENDER:
+		pvr_queue_destroy(ctx->queues.fragment);
+		pvr_queue_destroy(ctx->queues.geometry);
+		break;
+	case DRM_PVR_CTX_TYPE_COMPUTE:
+		pvr_queue_destroy(ctx->queues.compute);
+		break;
+	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
+		pvr_queue_destroy(ctx->queues.transfer);
+		break;
+	}
+}
+
+/**
+ * pvr_context_create_queues() - Create all queues attached to a context.
+ * @ctx: Context to create queues on.
+ * @args: Context creation arguments passed by userspace.
+ * @fw_ctx_map: CPU mapping of the FW context object.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * A negative error code otherwise.
+ */
+static int pvr_context_create_queues(struct pvr_context *ctx,
+				     struct drm_pvr_ioctl_create_context_args *args,
+				     void *fw_ctx_map)
+{
+	int err;
+
+	switch (ctx->type) {
+	case DRM_PVR_CTX_TYPE_RENDER:
+		ctx->queues.geometry = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_GEOMETRY,
+							args, fw_ctx_map);
+		if (IS_ERR(ctx->queues.geometry)) {
+			err = PTR_ERR(ctx->queues.geometry);
+			ctx->queues.geometry = NULL;
+			goto err_destroy_queues;
+		}
+
+		ctx->queues.fragment = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_FRAGMENT,
+							args, fw_ctx_map);
+		if (IS_ERR(ctx->queues.fragment)) {
+			err = PTR_ERR(ctx->queues.fragment);
+			ctx->queues.fragment = NULL;
+			goto err_destroy_queues;
+		}
+		return 0;
+
+	case DRM_PVR_CTX_TYPE_COMPUTE:
+		ctx->queues.compute = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_COMPUTE,
+						       args, fw_ctx_map);
+		if (IS_ERR(ctx->queues.compute)) {
+			err = PTR_ERR(ctx->queues.compute);
+			ctx->queues.compute = NULL;
+			goto err_destroy_queues;
+		}
+		return 0;
+
+	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
+		ctx->queues.transfer = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_TRANSFER_FRAG,
+							args, fw_ctx_map);
+		if (IS_ERR(ctx->queues.transfer)) {
+			err = PTR_ERR(ctx->queues.transfer);
+			ctx->queues.transfer = NULL;
+			goto err_destroy_queues;
+		}
+		return 0;
+	}
+
+	return -EINVAL;
+
+err_destroy_queues:
+	pvr_context_destroy_queues(ctx);
+	return err;
+}
+
+/**
+ * pvr_context_kill_queues() - Kill queues attached to context.
+ * @ctx: Context to kill queues on.
+ *
+ * Killing the queues implies making them unusable for future jobs, while still
+ * letting the currently submitted jobs a chance to finish. Queue resources will
+ * stay around until pvr_context_destroy_queues() is called.
+ */
+static void pvr_context_kill_queues(struct pvr_context *ctx)
+{
+	switch (ctx->type) {
+	case DRM_PVR_CTX_TYPE_RENDER:
+		pvr_queue_kill(ctx->queues.fragment);
+		pvr_queue_kill(ctx->queues.geometry);
+		break;
+	case DRM_PVR_CTX_TYPE_COMPUTE:
+		pvr_queue_kill(ctx->queues.compute);
+		break;
+	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
+		pvr_queue_kill(ctx->queues.transfer);
+		break;
+	}
+}
+
 /**
  * pvr_context_create() - Create a context.
  * @pvr_file: File to attach the created context to.
@@ -214,10 +326,14 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co
 		goto err_put_vm;
 	}
 
-	err = init_fw_objs(ctx, args, ctx->data);
+	err = pvr_context_create_queues(ctx, args, ctx->data);
 	if (err)
 		goto err_free_ctx_data;
 
+	err = init_fw_objs(ctx, args, ctx->data);
+	if (err)
+		goto err_destroy_queues;
+
 	err = pvr_fw_object_create(pvr_dev, ctx_size, PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
 				   ctx_fw_data_init, ctx, &ctx->fw_obj);
 	if (err)
@@ -243,6 +359,9 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co
 err_destroy_fw_obj:
 	pvr_fw_object_destroy(ctx->fw_obj);
 
+err_destroy_queues:
+	pvr_context_destroy_queues(ctx);
+
 err_free_ctx_data:
 	kfree(ctx->data);
 
@@ -262,6 +381,7 @@ pvr_context_release(struct kref *ref_count)
 	struct pvr_device *pvr_dev = ctx->pvr_dev;
 
 	xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id);
+	pvr_context_destroy_queues(ctx);
 	pvr_fw_object_destroy(ctx->fw_obj);
 	kfree(ctx->data);
 	pvr_vm_context_put(ctx->vm_ctx);
@@ -299,6 +419,9 @@ pvr_context_destroy(struct pvr_file *pvr_file, u32 handle)
 	if (!ctx)
 		return -EINVAL;
 
+	/* Make sure nothing can be queued to the queues after that point. */
+	pvr_context_kill_queues(ctx);
+
 	/* Release the reference held by the handle set. */
 	pvr_context_put(ctx);
 
diff --git a/drivers/gpu/drm/imagination/pvr_context.h b/drivers/gpu/drm/imagination/pvr_context.h
index 502dfad113e44..0c7b97dfa6baf 100644
--- a/drivers/gpu/drm/imagination/pvr_context.h
+++ b/drivers/gpu/drm/imagination/pvr_context.h
@@ -15,6 +15,7 @@
 
 #include "pvr_cccb.h"
 #include "pvr_device.h"
+#include "pvr_queue.h"
 
 /* Forward declaration from pvr_gem.h. */
 struct pvr_fw_object;
@@ -58,8 +59,51 @@ struct pvr_context {
 
 	/** @ctx_id: FW context ID. */
 	u32 ctx_id;
+
+	/**
+	 * @faulty: Set to 1 when the context queues had unfinished job when
+	 * a GPU reset happened.
+	 *
+	 * In that case, the context is in an inconsistent state and can't be
+	 * used anymore.
+	 */
+	atomic_t faulty;
+
+	/** @queues: Union containing all kind of queues. */
+	union {
+		struct {
+			/** @geometry: Geometry queue. */
+			struct pvr_queue *geometry;
+
+			/** @fragment: Fragment queue. */
+			struct pvr_queue *fragment;
+		};
+
+		/** @compute: Compute queue. */
+		struct pvr_queue *compute;
+
+		/** @compute: Transfer queue. */
+		struct pvr_queue *transfer;
+	} queues;
 };
 
+static __always_inline struct pvr_queue *
+pvr_context_get_queue_for_job(struct pvr_context *ctx, enum drm_pvr_job_type type)
+{
+	switch (type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.geometry : NULL;
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.fragment : NULL;
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return ctx->type == DRM_PVR_CTX_TYPE_COMPUTE ? ctx->queues.compute : NULL;
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+		return ctx->type == DRM_PVR_CTX_TYPE_TRANSFER_FRAG ? ctx->queues.transfer : NULL;
+	}
+
+	return NULL;
+}
+
 /**
  * pvr_context_get() - Take additional reference on context.
  * @ctx: Context pointer.
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 2d6db4715f850..199f812d1ee71 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -6,7 +6,9 @@
 
 #include "pvr_fw.h"
 #include "pvr_power.h"
+#include "pvr_queue.h"
 #include "pvr_rogue_cr_defs.h"
+#include "pvr_stream.h"
 #include "pvr_vm.h"
 
 #include <drm/drm_print.h>
@@ -117,6 +119,32 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
 	return 0;
 }
 
+/**
+ * pvr_device_process_active_queues() - Process all queue related events.
+ * @pvr_dev: PowerVR device to check
+ *
+ * This is called any time we receive a FW event. It iterates over all
+ * active queues and calls pvr_queue_process() on them.
+ */
+void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
+{
+	struct pvr_queue *queue, *tmp_queue;
+	LIST_HEAD(active_queues);
+
+	mutex_lock(&pvr_dev->queues.lock);
+
+	/* Move all active queues to a temporary list. Queues that remain
+	 * active after we're done processing them are re-inserted to
+	 * the queues.active list by pvr_queue_process().
+	 */
+	list_splice_init(&pvr_dev->queues.active, &active_queues);
+
+	list_for_each_entry_safe(queue, tmp_queue, &active_queues, node)
+		pvr_queue_process(queue);
+
+	mutex_unlock(&pvr_dev->queues.lock);
+}
+
 static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
 {
 	struct pvr_device *pvr_dev = data;
@@ -132,6 +160,7 @@ static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
 		if (pvr_dev->fw_dev.booted) {
 			pvr_fwccb_process(pvr_dev);
 			pvr_kccb_wake_up_waiters(pvr_dev);
+			pvr_device_process_active_queues(pvr_dev);
 		}
 
 		pm_runtime_mark_last_busy(from_pvr_device(pvr_dev)->dev);
@@ -398,6 +427,8 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
 	else
 		return -EINVAL;
 
+	pvr_stream_create_musthave_masks(pvr_dev);
+
 	err = pvr_set_dma_info(pvr_dev);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 299f3e022c826..b6e9a7e8072ee 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -173,6 +173,26 @@ struct pvr_device {
 	 */
 	struct xarray free_list_ids;
 
+	/**
+	 * @job_ids: Array of jobs belonging to this device. Array members
+	 *           are of type "struct pvr_job *".
+	 */
+	struct xarray job_ids;
+
+	/**
+	 * @queues: Queue-related fields.
+	 */
+	struct {
+		/** @active: Active queue list. */
+		struct list_head active;
+
+		/** @idle: Idle queue list. */
+		struct list_head idle;
+
+		/** @lock: Lock protecting access to the active/idle lists. */
+		struct mutex lock;
+	} queues;
+
 	struct {
 		/** @work: Work item for watchdog callback. */
 		struct delayed_work work;
@@ -442,6 +462,7 @@ packed_bvnc_to_pvr_gpu_id(u64 bvnc, struct pvr_gpu_id *gpu_id)
 
 int pvr_device_init(struct pvr_device *pvr_dev);
 void pvr_device_fini(struct pvr_device *pvr_dev);
+void pvr_device_reset(struct pvr_device *pvr_dev);
 
 bool
 pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk);
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index e76363f7dbf78..a6f1d52aaebb4 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -7,6 +7,7 @@
 #include "pvr_free_list.h"
 #include "pvr_gem.h"
 #include "pvr_hwrt.h"
+#include "pvr_job.h"
 #include "pvr_mmu.h"
 #include "pvr_power.h"
 #include "pvr_rogue_defs.h"
@@ -32,6 +33,8 @@
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/xarray.h>
 
 /**
  * DOC: PowerVR (Series 6 and later) and IMG Graphics Driver
@@ -397,7 +400,8 @@ pvr_dev_query_runtime_info_get(struct pvr_device *pvr_dev,
 		return 0;
 	}
 
-	runtime_info.free_list_min_pages = 0; /* FIXME */
+	runtime_info.free_list_min_pages =
+		pvr_get_free_list_min_pages(pvr_dev);
 	runtime_info.free_list_max_pages =
 		ROGUE_PM_MAX_FREELIST_SIZE / ROGUE_PM_PAGE_SIZE;
 	runtime_info.common_store_alloc_region_size =
@@ -1137,7 +1141,20 @@ static int
 pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args,
 		      struct drm_file *file)
 {
-	return -ENOTTY;
+	struct drm_pvr_ioctl_submit_jobs_args *args = raw_args;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct pvr_file *pvr_file = to_pvr_file(file);
+	int idx;
+	int err;
+
+	if (!drm_dev_enter(drm_dev, &idx))
+		return -EIO;
+
+	err = pvr_submit_jobs(pvr_dev, pvr_file, args);
+
+	drm_dev_exit(idx);
+
+	return err;
 }
 
 int
@@ -1353,7 +1370,8 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
 
 static struct drm_driver pvr_drm_driver = {
-	.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER,
+	.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER |
+			   DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
 	.open = pvr_drm_driver_open,
 	.postclose = pvr_drm_driver_postclose,
 	.ioctls = pvr_drm_driver_ioctls,
@@ -1386,8 +1404,15 @@ pvr_probe(struct platform_device *plat_dev)
 	drm_dev = &pvr_dev->base;
 
 	platform_set_drvdata(plat_dev, drm_dev);
+
+	init_rwsem(&pvr_dev->reset_sem);
+
 	pvr_context_device_init(pvr_dev);
 
+	err = pvr_queue_device_init(pvr_dev);
+	if (err)
+		goto err_context_fini;
+
 	devm_pm_runtime_enable(&plat_dev->dev);
 	pm_runtime_mark_last_busy(&plat_dev->dev);
 
@@ -1404,6 +1429,7 @@ pvr_probe(struct platform_device *plat_dev)
 		goto err_device_fini;
 
 	xa_init_flags(&pvr_dev->free_list_ids, XA_FLAGS_ALLOC1);
+	xa_init_flags(&pvr_dev->job_ids, XA_FLAGS_ALLOC1);
 
 	return 0;
 
@@ -1413,6 +1439,11 @@ err_device_fini:
 err_watchdog_fini:
 	pvr_watchdog_fini(pvr_dev);
 
+	pvr_queue_device_fini(pvr_dev);
+
+err_context_fini:
+	pvr_context_device_fini(pvr_dev);
+
 	return err;
 }
 
@@ -1422,14 +1453,17 @@ pvr_remove(struct platform_device *plat_dev)
 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
 
+	WARN_ON(!xa_empty(&pvr_dev->job_ids));
 	WARN_ON(!xa_empty(&pvr_dev->free_list_ids));
 
+	xa_destroy(&pvr_dev->job_ids);
 	xa_destroy(&pvr_dev->free_list_ids);
 
 	pm_runtime_suspend(drm_dev->dev);
 	pvr_device_fini(pvr_dev);
 	drm_dev_unplug(drm_dev);
 	pvr_watchdog_fini(pvr_dev);
+	pvr_queue_device_fini(pvr_dev);
 	pvr_context_device_fini(pvr_dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c
new file mode 100644
index 0000000000000..9d0812710295c
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_job.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_context.h"
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_hwrt.h"
+#include "pvr_job.h"
+#include "pvr_mmu.h"
+#include "pvr_power.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_fwif_client.h"
+#include "pvr_stream.h"
+#include "pvr_stream_defs.h"
+#include "pvr_sync.h"
+
+#include <drm/drm_exec.h>
+#include <drm/drm_gem.h>
+#include <linux/types.h>
+#include <uapi/drm/pvr_drm.h>
+
+static void pvr_job_release(struct kref *kref)
+{
+	struct pvr_job *job = container_of(kref, struct pvr_job, ref_count);
+
+	xa_erase(&job->pvr_dev->job_ids, job->id);
+
+	pvr_hwrt_data_put(job->hwrt);
+	pvr_context_put(job->ctx);
+
+	WARN_ON(job->paired_job);
+
+	pvr_queue_job_cleanup(job);
+	pvr_job_release_pm_ref(job);
+
+	kfree(job->cmd);
+	kfree(job);
+}
+
+/**
+ * pvr_job_put() - Release reference on job
+ * @job: Target job.
+ */
+void
+pvr_job_put(struct pvr_job *job)
+{
+	if (job)
+		kref_put(&job->ref_count, pvr_job_release);
+}
+
+/**
+ * pvr_job_process_stream() - Build job FW structure from stream
+ * @pvr_dev: Device pointer.
+ * @cmd_defs: Stream definition.
+ * @stream: Pointer to command stream.
+ * @stream_size: Size of command stream, in bytes.
+ * @job: Pointer to job.
+ *
+ * Caller is responsible for freeing the output structure.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -%ENOMEM on out of memory, or
+ *  * -%EINVAL on malformed stream.
+ */
+static int
+pvr_job_process_stream(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs,
+		       void *stream, u32 stream_size, struct pvr_job *job)
+{
+	int err;
+
+	job->cmd = kzalloc(cmd_defs->dest_size, GFP_KERNEL);
+	if (!job->cmd)
+		return -ENOMEM;
+
+	job->cmd_len = cmd_defs->dest_size;
+
+	err = pvr_stream_process(pvr_dev, cmd_defs, stream, stream_size, job->cmd);
+	if (err)
+		kfree(job->cmd);
+
+	return err;
+}
+
+static int pvr_fw_cmd_init(struct pvr_device *pvr_dev, struct pvr_job *job,
+			   const struct pvr_stream_cmd_defs *stream_def,
+			   u64 stream_userptr, u32 stream_len)
+{
+	void *stream;
+	int err;
+
+	stream = kzalloc(stream_len, GFP_KERNEL);
+	if (!stream)
+		return -ENOMEM;
+
+	if (copy_from_user(stream, u64_to_user_ptr(stream_userptr), stream_len)) {
+		err = -EFAULT;
+		goto err_free_stream;
+	}
+
+	err = pvr_job_process_stream(pvr_dev, stream_def, stream, stream_len, job);
+
+err_free_stream:
+	kfree(stream);
+
+	return err;
+}
+
+static u32
+convert_geom_flags(u32 in_flags)
+{
+	u32 out_flags = 0;
+
+	if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST)
+		out_flags |= ROGUE_GEOM_FLAGS_FIRSTKICK;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST)
+		out_flags |= ROGUE_GEOM_FLAGS_LASTKICK;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE)
+		out_flags |= ROGUE_GEOM_FLAGS_SINGLE_CORE;
+
+	return out_flags;
+}
+
+static u32
+convert_frag_flags(u32 in_flags)
+{
+	u32 out_flags = 0;
+
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE)
+		out_flags |= ROGUE_FRAG_FLAGS_SINGLE_CORE;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER)
+		out_flags |= ROGUE_FRAG_FLAGS_DEPTHBUFFER;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER)
+		out_flags |= ROGUE_FRAG_FLAGS_STENCILBUFFER;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP)
+		out_flags |= ROGUE_FRAG_FLAGS_PREVENT_CDM_OVERLAP;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER)
+		out_flags |= ROGUE_FRAG_FLAGS_SCRATCHBUFFER;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS)
+		out_flags |= ROGUE_FRAG_FLAGS_GET_VIS_RESULTS;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE)
+		out_flags |= ROGUE_FRAG_FLAGS_DISABLE_PIXELMERGE;
+
+	return out_flags;
+}
+
+static int
+pvr_geom_job_fw_cmd_init(struct pvr_job *job,
+			 struct drm_pvr_job *args)
+{
+	struct rogue_fwif_cmd_geom *cmd;
+	int err;
+
+	if (args->flags & ~DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK)
+		return -EINVAL;
+
+	if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER)
+		return -EINVAL;
+
+	if (!job->hwrt)
+		return -EINVAL;
+
+	job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_GEOM;
+	err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_geom_stream,
+			      args->cmd_stream, args->cmd_stream_len);
+	if (err)
+		return err;
+
+	cmd = job->cmd;
+	cmd->cmd_shared.cmn.frame_num = 0;
+	cmd->flags = convert_geom_flags(args->flags);
+	pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr);
+	return 0;
+}
+
+static int
+pvr_frag_job_fw_cmd_init(struct pvr_job *job,
+			 struct drm_pvr_job *args)
+{
+	struct rogue_fwif_cmd_frag *cmd;
+	int err;
+
+	if (args->flags & ~DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK)
+		return -EINVAL;
+
+	if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER)
+		return -EINVAL;
+
+	if (!job->hwrt)
+		return -EINVAL;
+
+	job->fw_ccb_cmd_type = (args->flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER) ?
+			       ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR :
+			       ROGUE_FWIF_CCB_CMD_TYPE_FRAG;
+	err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_frag_stream,
+			      args->cmd_stream, args->cmd_stream_len);
+	if (err)
+		return err;
+
+	cmd = job->cmd;
+	cmd->cmd_shared.cmn.frame_num = 0;
+	cmd->flags = convert_frag_flags(args->flags);
+	pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr);
+	return 0;
+}
+
+static u32
+convert_compute_flags(u32 in_flags)
+{
+	u32 out_flags = 0;
+
+	if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP)
+		out_flags |= ROGUE_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
+	if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE)
+		out_flags |= ROGUE_COMPUTE_FLAG_SINGLE_CORE;
+
+	return out_flags;
+}
+
+static int
+pvr_compute_job_fw_cmd_init(struct pvr_job *job,
+			    struct drm_pvr_job *args)
+{
+	struct rogue_fwif_cmd_compute *cmd;
+	int err;
+
+	if (args->flags & ~DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK)
+		return -EINVAL;
+
+	if (job->ctx->type != DRM_PVR_CTX_TYPE_COMPUTE)
+		return -EINVAL;
+
+	job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_CDM;
+	err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_compute_stream,
+			      args->cmd_stream, args->cmd_stream_len);
+	if (err)
+		return err;
+
+	cmd = job->cmd;
+	cmd->common.frame_num = 0;
+	cmd->flags = convert_compute_flags(args->flags);
+	return 0;
+}
+
+static u32
+convert_transfer_flags(u32 in_flags)
+{
+	u32 out_flags = 0;
+
+	if (in_flags & DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE)
+		out_flags |= ROGUE_TRANSFER_FLAGS_SINGLE_CORE;
+
+	return out_flags;
+}
+
+static int
+pvr_transfer_job_fw_cmd_init(struct pvr_job *job,
+			     struct drm_pvr_job *args)
+{
+	struct rogue_fwif_cmd_transfer *cmd;
+	int err;
+
+	if (args->flags & ~DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK)
+		return -EINVAL;
+
+	if (job->ctx->type != DRM_PVR_CTX_TYPE_TRANSFER_FRAG)
+		return -EINVAL;
+
+	job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_TQ_3D;
+	err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_transfer_stream,
+			      args->cmd_stream, args->cmd_stream_len);
+	if (err)
+		return err;
+
+	cmd = job->cmd;
+	cmd->common.frame_num = 0;
+	cmd->flags = convert_transfer_flags(args->flags);
+	return 0;
+}
+
+static int
+pvr_job_fw_cmd_init(struct pvr_job *job,
+		    struct drm_pvr_job *args)
+{
+	switch (args->type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return pvr_geom_job_fw_cmd_init(job, args);
+
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return pvr_frag_job_fw_cmd_init(job, args);
+
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return pvr_compute_job_fw_cmd_init(job, args);
+
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+		return pvr_transfer_job_fw_cmd_init(job, args);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * struct pvr_job_data - Helper container for pairing jobs with the
+ * sync_ops supplied for them by the user.
+ */
+struct pvr_job_data {
+	/** @job: Pointer to the job. */
+	struct pvr_job *job;
+
+	/** @sync_ops: Pointer to the sync_ops associated with @job. */
+	struct drm_pvr_sync_op *sync_ops;
+
+	/** @sync_op_count: Number of members of @sync_ops. */
+	u32 sync_op_count;
+};
+
+/**
+ * prepare_job_syncs() - Prepare all sync objects for a single job.
+ * @pvr_file: PowerVR file.
+ * @job_data: Precreated job and sync_ops array.
+ * @signal_array: xarray to receive signal sync objects.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_sync_signal_array_collect_ops(),
+ *    pvr_sync_add_deps_to_job(), drm_sched_job_add_resv_dependencies() or
+ *    pvr_sync_signal_array_update_fences().
+ */
+static int
+prepare_job_syncs(struct pvr_file *pvr_file,
+		  struct pvr_job_data *job_data,
+		  struct xarray *signal_array)
+{
+	struct dma_fence *done_fence;
+	int err = pvr_sync_signal_array_collect_ops(signal_array,
+						    from_pvr_file(pvr_file),
+						    job_data->sync_op_count,
+						    job_data->sync_ops);
+
+	if (err)
+		return err;
+
+	err = pvr_sync_add_deps_to_job(pvr_file, &job_data->job->base,
+				       job_data->sync_op_count,
+				       job_data->sync_ops, signal_array);
+	if (err)
+		return err;
+
+	if (job_data->job->hwrt) {
+		/* The geometry job writes the HWRT region headers, which are
+		 * then read by the fragment job.
+		 */
+		struct drm_gem_object *obj =
+			gem_from_pvr_gem(job_data->job->hwrt->fw_obj->gem);
+		enum dma_resv_usage usage =
+			dma_resv_usage_rw(job_data->job->type ==
+					  DRM_PVR_JOB_TYPE_GEOMETRY);
+
+		dma_resv_lock(obj->resv, NULL);
+		err = drm_sched_job_add_resv_dependencies(&job_data->job->base,
+							  obj->resv, usage);
+		dma_resv_unlock(obj->resv);
+		if (err)
+			return err;
+	}
+
+	/* We need to arm the job to get the job done fence. */
+	done_fence = pvr_queue_job_arm(job_data->job);
+
+	err = pvr_sync_signal_array_update_fences(signal_array,
+						  job_data->sync_op_count,
+						  job_data->sync_ops,
+						  done_fence);
+	return err;
+}
+
+/**
+ * prepare_job_syncs_for_each() - Prepare all sync objects for an array of jobs.
+ * @file: PowerVR file.
+ * @job_data: Array of precreated jobs and their sync_ops.
+ * @job_count: Number of jobs.
+ * @signal_array: xarray to receive signal sync objects.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_vm_bind_job_prepare_syncs().
+ */
+static int
+prepare_job_syncs_for_each(struct pvr_file *pvr_file,
+			   struct pvr_job_data *job_data,
+			   u32 *job_count,
+			   struct xarray *signal_array)
+{
+	for (u32 i = 0; i < *job_count; i++) {
+		int err = prepare_job_syncs(pvr_file, &job_data[i],
+					    signal_array);
+
+		if (err) {
+			*job_count = i;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static struct pvr_job *
+create_job(struct pvr_device *pvr_dev,
+	   struct pvr_file *pvr_file,
+	   struct drm_pvr_job *args)
+{
+	struct pvr_job *job = NULL;
+	int err;
+
+	if (!args->cmd_stream || !args->cmd_stream_len)
+		return ERR_PTR(-EINVAL);
+
+	if (args->type != DRM_PVR_JOB_TYPE_GEOMETRY &&
+	    args->type != DRM_PVR_JOB_TYPE_FRAGMENT &&
+	    (args->hwrt.set_handle || args->hwrt.data_index))
+		return ERR_PTR(-EINVAL);
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&job->ref_count);
+	job->type = args->type;
+	job->pvr_dev = pvr_dev;
+
+	err = xa_alloc(&pvr_dev->job_ids, &job->id, job, xa_limit_32b, GFP_KERNEL);
+	if (err)
+		goto err_put_job;
+
+	job->ctx = pvr_context_lookup(pvr_file, args->context_handle);
+	if (!job->ctx) {
+		err = -EINVAL;
+		goto err_put_job;
+	}
+
+	if (args->hwrt.set_handle) {
+		job->hwrt = pvr_hwrt_data_lookup(pvr_file, args->hwrt.set_handle,
+						 args->hwrt.data_index);
+		if (!job->hwrt) {
+			err = -EINVAL;
+			goto err_put_job;
+		}
+	}
+
+	err = pvr_job_fw_cmd_init(job, args);
+	if (err)
+		goto err_put_job;
+
+	err = pvr_queue_job_init(job);
+	if (err)
+		goto err_put_job;
+
+	return job;
+
+err_put_job:
+	pvr_job_put(job);
+	return ERR_PTR(err);
+}
+
+/**
+ * pvr_job_data_fini() - Cleanup all allocs used to set up job submission.
+ * @job_data: Job data array.
+ * @job_count: Number of members of @job_data.
+ */
+static void
+pvr_job_data_fini(struct pvr_job_data *job_data, u32 job_count)
+{
+	for (u32 i = 0; i < job_count; i++) {
+		pvr_job_put(job_data[i].job);
+		kvfree(job_data[i].sync_ops);
+	}
+}
+
+/**
+ * pvr_job_data_init() - Init an array of created jobs, associating them with
+ * the appropriate sync_ops args, which will be copied in.
+ * @pvr_dev: Target PowerVR device.
+ * @pvr_file: Pointer to PowerVR file structure.
+ * @job_args: Job args array copied from user.
+ * @job_count: Number of members of @job_args.
+ * @job_data_out: Job data array.
+ */
+static int pvr_job_data_init(struct pvr_device *pvr_dev,
+			     struct pvr_file *pvr_file,
+			     struct drm_pvr_job *job_args,
+			     u32 *job_count,
+			     struct pvr_job_data *job_data_out)
+{
+	int err = 0, i = 0;
+
+	for (; i < *job_count; i++) {
+		job_data_out[i].job =
+			create_job(pvr_dev, pvr_file, &job_args[i]);
+		err = PTR_ERR_OR_ZERO(job_data_out[i].job);
+
+		if (err) {
+			*job_count = i;
+			job_data_out[i].job = NULL;
+			goto err_cleanup;
+		}
+
+		err = PVR_UOBJ_GET_ARRAY(job_data_out[i].sync_ops,
+					 &job_args[i].sync_ops);
+		if (err) {
+			*job_count = i;
+
+			/* Ensure the job created above is also cleaned up. */
+			i++;
+			goto err_cleanup;
+		}
+
+		job_data_out[i].sync_op_count = job_args[i].sync_ops.count;
+	}
+
+	return 0;
+
+err_cleanup:
+	pvr_job_data_fini(job_data_out, i);
+
+	return err;
+}
+
+static void
+push_jobs(struct pvr_job_data *job_data, u32 job_count)
+{
+	for (u32 i = 0; i < job_count; i++)
+		pvr_queue_job_push(job_data[i].job);
+}
+
+static int
+prepare_fw_obj_resv(struct drm_exec *exec, struct pvr_fw_object *fw_obj)
+{
+	return drm_exec_prepare_obj(exec, gem_from_pvr_gem(fw_obj->gem), 1);
+}
+
+static int
+jobs_lock_all_objs(struct drm_exec *exec, struct pvr_job_data *job_data,
+		   u32 job_count)
+{
+	for (u32 i = 0; i < job_count; i++) {
+		struct pvr_job *job = job_data[i].job;
+
+		/* Grab a lock on a the context, to guard against
+		 * concurrent submission to the same queue.
+		 */
+		int err = drm_exec_lock_obj(exec,
+					    gem_from_pvr_gem(job->ctx->fw_obj->gem));
+
+		if (err)
+			return err;
+
+		if (job->hwrt) {
+			err = prepare_fw_obj_resv(exec,
+						  job->hwrt->fw_obj);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int
+prepare_job_resvs_for_each(struct drm_exec *exec, struct pvr_job_data *job_data,
+			   u32 job_count)
+{
+	drm_exec_until_all_locked(exec) {
+		int err = jobs_lock_all_objs(exec, job_data, job_count);
+
+		drm_exec_retry_on_contention(exec);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void
+update_job_resvs(struct pvr_job *job)
+{
+	if (job->hwrt) {
+		enum dma_resv_usage usage = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
+					    DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ;
+		struct drm_gem_object *obj = gem_from_pvr_gem(job->hwrt->fw_obj->gem);
+
+		dma_resv_add_fence(obj->resv, &job->base.s_fence->finished, usage);
+	}
+}
+
+static void
+update_job_resvs_for_each(struct pvr_job_data *job_data, u32 job_count)
+{
+	for (u32 i = 0; i < job_count; i++)
+		update_job_resvs(job_data[i].job);
+}
+
+static bool can_combine_jobs(struct pvr_job *a, struct pvr_job *b)
+{
+	struct pvr_job *geom_job = a, *frag_job = b;
+	struct dma_fence *fence;
+	unsigned long index;
+
+	/* Geometry and fragment jobs can be combined if they are queued to the
+	 * same context and targeting the same HWRT.
+	 */
+	if (a->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
+	    b->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
+	    a->ctx != b->ctx ||
+	    a->hwrt != b->hwrt)
+		return false;
+
+	xa_for_each(&frag_job->base.dependencies, index, fence) {
+		/* We combine when we see an explicit geom -> frag dep. */
+		if (&geom_job->base.s_fence->scheduled == fence)
+			return true;
+	}
+
+	return false;
+}
+
+static struct dma_fence *
+get_last_queued_job_scheduled_fence(struct pvr_queue *queue,
+				    struct pvr_job_data *job_data,
+				    u32 cur_job_pos)
+{
+	/* We iterate over the current job array in reverse order to grab the
+	 * last to-be-queued job targeting the same queue.
+	 */
+	for (u32 i = cur_job_pos; i > 0; i--) {
+		struct pvr_job *job = job_data[i - 1].job;
+
+		if (job->ctx == queue->ctx && job->type == queue->type)
+			return dma_fence_get(&job->base.s_fence->scheduled);
+	}
+
+	/* If we didn't find any, we just return the last queued job scheduled
+	 * fence attached to the queue.
+	 */
+	return dma_fence_get(queue->last_queued_job_scheduled_fence);
+}
+
+static int
+pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count)
+{
+	for (u32 i = 0; i < *job_count - 1; i++) {
+		struct pvr_job *geom_job = job_data[i].job;
+		struct pvr_job *frag_job = job_data[i + 1].job;
+		struct pvr_queue *frag_queue;
+		struct dma_fence *f;
+
+		if (!can_combine_jobs(job_data[i].job, job_data[i + 1].job))
+			continue;
+
+		/* The fragment job will be submitted by the geometry queue. We
+		 * need to make sure it comes after all the other fragment jobs
+		 * queued before it.
+		 */
+		frag_queue = pvr_context_get_queue_for_job(frag_job->ctx,
+							   frag_job->type);
+		f = get_last_queued_job_scheduled_fence(frag_queue, job_data,
+							i);
+		if (f) {
+			int err = drm_sched_job_add_dependency(&geom_job->base,
+							       f);
+			if (err) {
+				*job_count = i;
+				return err;
+			}
+		}
+
+		/* The KCCB slot will be reserved by the geometry job, so we can
+		 * drop the KCCB fence on the fragment job.
+		 */
+		pvr_kccb_fence_put(frag_job->kccb_fence);
+		frag_job->kccb_fence = NULL;
+
+		geom_job->paired_job = frag_job;
+		frag_job->paired_job = geom_job;
+
+		/* Skip the fragment job we just paired to the geometry job. */
+		i++;
+	}
+
+	return 0;
+}
+
+/**
+ * pvr_submit_jobs() - Submit jobs to the GPU
+ * @pvr_dev: Target PowerVR device.
+ * @pvr_file: Pointer to PowerVR file structure.
+ * @args: Ioctl args.
+ * @job_count: Number of jobs in @jobs_args. On error this will be updated
+ * with the index into @jobs_args where the error occurred.
+ *
+ * This initial implementation is entirely synchronous; on return the GPU will
+ * be idle. This will not be the case for future implementations.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * -%EFAULT if arguments can not be copied from user space, or
+ *  * -%EINVAL on invalid arguments, or
+ *  * Any other error.
+ */
+int
+pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
+		struct drm_pvr_ioctl_submit_jobs_args *args)
+{
+	struct pvr_job_data *job_data = NULL;
+	struct drm_pvr_job *job_args;
+	struct xarray signal_array;
+	u32 jobs_alloced = 0;
+	struct drm_exec exec;
+	int err;
+
+	if (!args->jobs.count)
+		return -EINVAL;
+
+	err = PVR_UOBJ_GET_ARRAY(job_args, &args->jobs);
+	if (err)
+		return err;
+
+	job_data = kvmalloc_array(args->jobs.count, sizeof(*job_data),
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!job_data) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	err = pvr_job_data_init(pvr_dev, pvr_file, job_args, &args->jobs.count,
+				job_data);
+	if (err)
+		goto out_free;
+
+	jobs_alloced = args->jobs.count;
+
+	/*
+	 * Flush MMU if needed - this has been deferred until now to avoid
+	 * overuse of this expensive operation.
+	 */
+	err = pvr_mmu_flush_exec(pvr_dev, false);
+	if (err)
+		goto out_job_data_cleanup;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+
+	xa_init_flags(&signal_array, XA_FLAGS_ALLOC);
+
+	err = prepare_job_syncs_for_each(pvr_file, job_data, &args->jobs.count,
+					 &signal_array);
+	if (err)
+		goto out_exec_fini;
+
+	err = prepare_job_resvs_for_each(&exec, job_data, args->jobs.count);
+	if (err)
+		goto out_exec_fini;
+
+	err = pvr_jobs_link_geom_frag(job_data, &args->jobs.count);
+	if (err)
+		goto out_exec_fini;
+
+	/* Anything after that point must succeed because we start exposing job
+	 * finished fences to the outside world.
+	 */
+	update_job_resvs_for_each(job_data, args->jobs.count);
+	push_jobs(job_data, args->jobs.count);
+	pvr_sync_signal_array_push_fences(&signal_array);
+	err = 0;
+
+out_exec_fini:
+	drm_exec_fini(&exec);
+	pvr_sync_signal_array_cleanup(&signal_array);
+
+out_job_data_cleanup:
+	pvr_job_data_fini(job_data, jobs_alloced);
+
+out_free:
+	kvfree(job_data);
+	kvfree(job_args);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_job.h b/drivers/gpu/drm/imagination/pvr_job.h
new file mode 100644
index 0000000000000..0ca003c5c4751
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_job.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_JOB_H
+#define PVR_JOB_H
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <linux/kref.h>
+#include <linux/types.h>
+
+#include <drm/drm_gem.h>
+#include <drm/gpu_scheduler.h>
+
+#include "pvr_power.h"
+
+/* Forward declaration from "pvr_context.h". */
+struct pvr_context;
+
+/* Forward declarations from "pvr_device.h". */
+struct pvr_device;
+struct pvr_file;
+
+/* Forward declarations from "pvr_hwrt.h". */
+struct pvr_hwrt_data;
+
+/* Forward declaration from "pvr_queue.h". */
+struct pvr_queue;
+
+struct pvr_job {
+	/** @base: drm_sched_job object. */
+	struct drm_sched_job base;
+
+	/** @ref_count: Refcount for job. */
+	struct kref ref_count;
+
+	/** @type: Type of job. */
+	enum drm_pvr_job_type type;
+
+	/** @id: Job ID number. */
+	u32 id;
+
+	/**
+	 * @paired_job: Job paired to this job.
+	 *
+	 * This field is only meaningful for geometry and fragment jobs.
+	 *
+	 * Paired jobs are executed on the same context, and need to be submitted
+	 * atomically to the FW, to make sure the partial render logic has a
+	 * fragment job to execute when the Parameter Manager runs out of memory.
+	 *
+	 * The geometry job should point to the fragment job it's paired with,
+	 * and the fragment job should point to the geometry job it's paired with.
+	 */
+	struct pvr_job *paired_job;
+
+	/** @cccb_fence: Fence used to wait for CCCB space. */
+	struct dma_fence *cccb_fence;
+
+	/** @kccb_fence: Fence used to wait for KCCB space. */
+	struct dma_fence *kccb_fence;
+
+	/** @done_fence: Fence to signal when the job is done. */
+	struct dma_fence *done_fence;
+
+	/** @pvr_dev: Device pointer. */
+	struct pvr_device *pvr_dev;
+
+	/** @ctx: Pointer to owning context. */
+	struct pvr_context *ctx;
+
+	/** @cmd: Command data. Format depends on @type. */
+	void *cmd;
+
+	/** @cmd_len: Length of command data, in bytes. */
+	u32 cmd_len;
+
+	/**
+	 * @fw_ccb_cmd_type: Firmware CCB command type. Must be one of %ROGUE_FWIF_CCB_CMD_TYPE_*.
+	 */
+	u32 fw_ccb_cmd_type;
+
+	/** @hwrt: HWRT object. Will be NULL for compute and transfer jobs. */
+	struct pvr_hwrt_data *hwrt;
+
+	/**
+	 * @has_pm_ref: True if the job has a power ref, thus forcing the GPU to stay on until
+	 * the job is done.
+	 */
+	bool has_pm_ref;
+};
+
+/**
+ * pvr_job_get() - Take additional reference on job.
+ * @job: Job pointer.
+ *
+ * Call pvr_job_put() to release.
+ *
+ * Returns:
+ *  * The requested job on success, or
+ *  * %NULL if no job pointer passed.
+ */
+static __always_inline struct pvr_job *
+pvr_job_get(struct pvr_job *job)
+{
+	if (job)
+		kref_get(&job->ref_count);
+
+	return job;
+}
+
+void pvr_job_put(struct pvr_job *job);
+
+/**
+ * pvr_job_release_pm_ref() - Release the PM ref if the job acquired it.
+ * @job: The job to release the PM ref on.
+ */
+static __always_inline void
+pvr_job_release_pm_ref(struct pvr_job *job)
+{
+	if (job->has_pm_ref) {
+		pvr_power_put(job->pvr_dev);
+		job->has_pm_ref = false;
+	}
+}
+
+/**
+ * pvr_job_get_pm_ref() - Get a PM ref and attach it to the job.
+ * @job: The job to attach the PM ref to.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_power_get() otherwise.
+ */
+static __always_inline int
+pvr_job_get_pm_ref(struct pvr_job *job)
+{
+	int err;
+
+	if (job->has_pm_ref)
+		return 0;
+
+	err = pvr_power_get(job->pvr_dev);
+	if (!err)
+		job->has_pm_ref = true;
+
+	return err;
+}
+
+int pvr_job_wait_first_non_signaled_native_dep(struct pvr_job *job);
+
+bool pvr_job_non_native_deps_done(struct pvr_job *job);
+
+int pvr_job_fits_in_cccb(struct pvr_job *job, unsigned long native_dep_count);
+
+void pvr_job_submit(struct pvr_job *job);
+
+int pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
+		    struct drm_pvr_ioctl_submit_jobs_args *args);
+
+#endif /* PVR_JOB_H */
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c
index 5f7cb7beb8792..ba7816fd28ec7 100644
--- a/drivers/gpu/drm/imagination/pvr_power.c
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -5,6 +5,7 @@
 #include "pvr_fw.h"
 #include "pvr_fw_startstop.h"
 #include "pvr_power.h"
+#include "pvr_queue.h"
 #include "pvr_rogue_fwif.h"
 
 #include <drm/drm_drv.h>
@@ -155,6 +156,21 @@ pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
 			pvr_dev->watchdog.kccb_stall_count = 0;
 			return true;
 		}
+	} else if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed) {
+		bool has_active_contexts;
+
+		mutex_lock(&pvr_dev->queues.lock);
+		has_active_contexts = list_empty(&pvr_dev->queues.active);
+		mutex_unlock(&pvr_dev->queues.lock);
+
+		if (has_active_contexts) {
+			/* Send a HEALTH_CHECK command so we can verify FW is still alive. */
+			struct rogue_fwif_kccb_cmd health_check_cmd;
+
+			health_check_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK;
+
+			pvr_kccb_send_cmd_powered(pvr_dev, &health_check_cmd, NULL);
+		}
 	} else {
 		pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed;
 		pvr_dev->watchdog.kccb_stall_count = 0;
@@ -318,6 +334,7 @@ pvr_power_device_idle(struct device *dev)
 int
 pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
 {
+	bool queues_disabled = false;
 	int err;
 
 	/*
@@ -337,6 +354,11 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
 	disable_irq(pvr_dev->irq);
 
 	do {
+		if (hard_reset) {
+			pvr_queue_device_pre_reset(pvr_dev);
+			queues_disabled = true;
+		}
+
 		err = pvr_power_fw_disable(pvr_dev, hard_reset);
 		if (!err) {
 			if (hard_reset) {
@@ -372,6 +394,9 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
 		}
 	} while (err);
 
+	if (queues_disabled)
+		pvr_queue_device_post_reset(pvr_dev);
+
 	enable_irq(pvr_dev->irq);
 
 	up_write(&pvr_dev->reset_sem);
@@ -386,6 +411,9 @@ err_device_lost:
 
 	/* Leave IRQs disabled if the device is lost. */
 
+	if (queues_disabled)
+		pvr_queue_device_post_reset(pvr_dev);
+
 err_up_write:
 	up_write(&pvr_dev->reset_sem);
 
diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c
new file mode 100644
index 0000000000000..d65c3fbedf5ac
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_queue.c
@@ -0,0 +1,1432 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+
+#include "pvr_cccb.h"
+#include "pvr_context.h"
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_job.h"
+#include "pvr_queue.h"
+#include "pvr_vm.h"
+
+#include "pvr_rogue_fwif_client.h"
+
+#define MAX_DEADLINE_MS 30000
+
+#define CTX_COMPUTE_CCCB_SIZE_LOG2 15
+#define CTX_FRAG_CCCB_SIZE_LOG2 15
+#define CTX_GEOM_CCCB_SIZE_LOG2 15
+#define CTX_TRANSFER_CCCB_SIZE_LOG2 15
+
+static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
+{
+	u32 num_isp_store_registers;
+
+	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
+		num_isp_store_registers = 1;
+	} else {
+		int err;
+
+		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
+		if (WARN_ON(err))
+			return err;
+	}
+
+	return sizeof(struct rogue_fwif_frag_ctx_state) +
+	       (num_isp_store_registers *
+		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
+}
+
+static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
+{
+	u32 num_isp_store_registers;
+	int err;
+
+	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
+		err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
+		if (WARN_ON(err))
+			return err;
+
+		if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
+			u32 xpu_max_slaves;
+
+			err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
+			if (WARN_ON(err))
+				return err;
+
+			num_isp_store_registers *= (1 + xpu_max_slaves);
+		}
+	} else {
+		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
+		if (WARN_ON(err))
+			return err;
+	}
+
+	return sizeof(struct rogue_fwif_frag_ctx_state) +
+	       (num_isp_store_registers *
+		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
+}
+
+static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
+{
+	switch (type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return sizeof(struct rogue_fwif_geom_ctx_state);
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return get_frag_ctx_state_size(pvr_dev);
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return sizeof(struct rogue_fwif_compute_ctx_state);
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+		return get_xfer_ctx_state_size(pvr_dev);
+	}
+
+	WARN(1, "Invalid queue type");
+	return -EINVAL;
+}
+
+static u32 get_ctx_offset(enum drm_pvr_job_type type)
+{
+	switch (type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+		return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
+	}
+
+	return 0;
+}
+
+static const char *
+pvr_queue_fence_get_driver_name(struct dma_fence *f)
+{
+	return PVR_DRIVER_NAME;
+}
+
+static void pvr_queue_fence_release(struct dma_fence *f)
+{
+	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
+
+	pvr_context_put(fence->queue->ctx);
+	dma_fence_free(f);
+}
+
+static const char *
+pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
+
+	switch (fence->queue->type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return "geometry";
+
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return "fragment";
+
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return "compute";
+
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+		return "transfer";
+	}
+
+	WARN(1, "Invalid queue type");
+	return "invalid";
+}
+
+static const char *
+pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
+
+	switch (fence->queue->type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return "geometry-cccb";
+
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return "fragment-cccb";
+
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return "compute-cccb";
+
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+		return "transfer-cccb";
+	}
+
+	WARN(1, "Invalid queue type");
+	return "invalid";
+}
+
+static const struct dma_fence_ops pvr_queue_job_fence_ops = {
+	.get_driver_name = pvr_queue_fence_get_driver_name,
+	.get_timeline_name = pvr_queue_job_fence_get_timeline_name,
+	.release = pvr_queue_fence_release,
+};
+
+/**
+ * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
+ * backed by a UFO.
+ * @f: The dma_fence to turn into a pvr_queue_fence.
+ *
+ * Return:
+ *  * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
+ *  * NULL otherwise.
+ */
+static struct pvr_queue_fence *
+to_pvr_queue_job_fence(struct dma_fence *f)
+{
+	struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
+
+	if (sched_fence)
+		f = sched_fence->parent;
+
+	if (f && f->ops == &pvr_queue_job_fence_ops)
+		return container_of(f, struct pvr_queue_fence, base);
+
+	return NULL;
+}
+
+static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
+	.get_driver_name = pvr_queue_fence_get_driver_name,
+	.get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
+	.release = pvr_queue_fence_release,
+};
+
+/**
+ * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
+ * @f: The dma_fence object to put.
+ *
+ * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
+ * otherwise we free the object with dma_fence_free(). This allows us
+ * to do the right thing before and after pvr_queue_fence_init() had been
+ * called.
+ */
+static void pvr_queue_fence_put(struct dma_fence *f)
+{
+	if (!f)
+		return;
+
+	if (WARN_ON(f->ops &&
+		    f->ops != &pvr_queue_cccb_fence_ops &&
+		    f->ops != &pvr_queue_job_fence_ops))
+		return;
+
+	/* If the fence hasn't been initialized yet, free the object directly. */
+	if (f->ops)
+		dma_fence_put(f);
+	else
+		dma_fence_free(f);
+}
+
+/**
+ * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
+ *
+ * Call this function to allocate job CCCB and done fences. This only
+ * allocates the objects. Initialization happens when the underlying
+ * dma_fence object is to be returned to drm_sched (in prepare_job() or
+ * run_job()).
+ *
+ * Return:
+ *  * A valid pointer if the allocation succeeds, or
+ *  * NULL if the allocation fails.
+ */
+static struct dma_fence *
+pvr_queue_fence_alloc(void)
+{
+	struct pvr_queue_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	return &fence->base;
+}
+
+/**
+ * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
+ * @f: The fence to initialize
+ * @queue: The queue this fence belongs to.
+ * @fence_ops: The fence operations.
+ * @fence_ctx: The fence context.
+ *
+ * Wrapper around dma_fence_init() that takes care of initializing the
+ * pvr_queue_fence::queue field too.
+ */
+static void
+pvr_queue_fence_init(struct dma_fence *f,
+		     struct pvr_queue *queue,
+		     const struct dma_fence_ops *fence_ops,
+		     struct pvr_queue_fence_ctx *fence_ctx)
+{
+	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
+
+	pvr_context_get(queue->ctx);
+	fence->queue = queue;
+	dma_fence_init(&fence->base, fence_ops,
+		       &fence_ctx->lock, fence_ctx->id,
+		       atomic_inc_return(&fence_ctx->seqno));
+}
+
+/**
+ * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
+ * @fence: The fence to initialize.
+ * @queue: The queue this fence belongs to.
+ *
+ * Initializes a fence that can be used to wait for CCCB space.
+ *
+ * Should be called in the ::prepare_job() path, so the fence returned to
+ * drm_sched is valid.
+ */
+static void
+pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
+{
+	pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
+			     &queue->cccb_fence_ctx.base);
+}
+
+/**
+ * pvr_queue_job_fence_init() - Initializes a job done fence object.
+ * @fence: The fence to initialize.
+ * @queue: The queue this fence belongs to.
+ *
+ * Initializes a fence that will be signaled when the GPU is done executing
+ * a job.
+ *
+ * Should be called *before* the ::run_job() path, so the fence is initialised
+ * before being placed in the pending_list.
+ */
+static void
+pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
+{
+	pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
+			     &queue->job_fence_ctx);
+}
+
+/**
+ * pvr_queue_fence_ctx_init() - Queue fence context initialization.
+ * @fence_ctx: The context to initialize
+ */
+static void
+pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
+{
+	spin_lock_init(&fence_ctx->lock);
+	fence_ctx->id = dma_fence_context_alloc(1);
+	atomic_set(&fence_ctx->seqno, 0);
+}
+
+static u32 ufo_cmds_size(u32 elem_count)
+{
+	/* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
+	u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
+	u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
+	u32 size = full_cmd_count *
+		   pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
+						     sizeof(struct rogue_fwif_ufo));
+
+	if (remaining_elems) {
+		size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
+							  sizeof(struct rogue_fwif_ufo));
+	}
+
+	return size;
+}
+
+static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
+{
+	/* One UFO cmd for the fence signaling, one UFO cmd per native fence native,
+	 * and a command for the job itself.
+	 */
+	return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) +
+	       pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len);
+}
+
+/**
+ * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
+ * @job: Job to operate on.
+ *
+ * Returns: Number of non-signaled native deps remaining.
+ */
+static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
+{
+	unsigned long remaining_count = 0;
+	struct dma_fence *fence = NULL;
+	unsigned long index;
+
+	xa_for_each(&job->base.dependencies, index, fence) {
+		struct pvr_queue_fence *jfence;
+
+		jfence = to_pvr_queue_job_fence(fence);
+		if (!jfence)
+			continue;
+
+		if (!dma_fence_is_signaled(&jfence->base))
+			remaining_count++;
+	}
+
+	return remaining_count;
+}
+
+/**
+ * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
+ * @queue: The queue this job will be submitted to.
+ * @job: The job to get the CCCB fence on.
+ *
+ * The CCCB fence is a synchronization primitive allowing us to delay job
+ * submission until there's enough space in the CCCB to submit the job.
+ *
+ * Return:
+ *  * NULL if there's enough space in the CCCB to submit this job, or
+ *  * A valid dma_fence object otherwise.
+ */
+static struct dma_fence *
+pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
+{
+	struct pvr_queue_fence *cccb_fence;
+	unsigned int native_deps_remaining;
+
+	/* If the fence is NULL, that means we already checked that we had
+	 * enough space in the cccb for our job.
+	 */
+	if (!job->cccb_fence)
+		return NULL;
+
+	mutex_lock(&queue->cccb_fence_ctx.job_lock);
+
+	/* Count remaining native dependencies and check if the job fits in the CCCB. */
+	native_deps_remaining = job_count_remaining_native_deps(job);
+	if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
+		pvr_queue_fence_put(job->cccb_fence);
+		job->cccb_fence = NULL;
+		goto out_unlock;
+	}
+
+	/* There should be no job attached to the CCCB fence context:
+	 * drm_sched_entity guarantees that jobs are submitted one at a time.
+	 */
+	if (WARN_ON(queue->cccb_fence_ctx.job))
+		pvr_job_put(queue->cccb_fence_ctx.job);
+
+	queue->cccb_fence_ctx.job = pvr_job_get(job);
+
+	/* Initialize the fence before returning it. */
+	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
+	if (!WARN_ON(cccb_fence->queue))
+		pvr_queue_cccb_fence_init(job->cccb_fence, queue);
+
+out_unlock:
+	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
+
+	return dma_fence_get(job->cccb_fence);
+}
+
+/**
+ * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
+ * @queue: The queue this job will be submitted to.
+ * @job: The job to get the KCCB fence on.
+ *
+ * The KCCB fence is a synchronization primitive allowing us to delay job
+ * submission until there's enough space in the KCCB to submit the job.
+ *
+ * Return:
+ *  * NULL if there's enough space in the KCCB to submit this job, or
+ *  * A valid dma_fence object otherwise.
+ */
+static struct dma_fence *
+pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
+{
+	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
+	struct dma_fence *kccb_fence = NULL;
+
+	/* If the fence is NULL, that means we already checked that we had
+	 * enough space in the KCCB for our job.
+	 */
+	if (!job->kccb_fence)
+		return NULL;
+
+	if (!WARN_ON(job->kccb_fence->ops)) {
+		kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
+		job->kccb_fence = NULL;
+	}
+
+	return kccb_fence;
+}
+
+static struct dma_fence *
+pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job)
+{
+	struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
+				   job->paired_job : NULL;
+	struct dma_fence *f;
+	unsigned long index;
+
+	if (!frag_job)
+		return NULL;
+
+	xa_for_each(&frag_job->base.dependencies, index, f) {
+		/* Skip already signaled fences. */
+		if (dma_fence_is_signaled(f))
+			continue;
+
+		/* Skip our own fence. */
+		if (f == &job->base.s_fence->scheduled)
+			continue;
+
+		return dma_fence_get(f);
+	}
+
+	return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity);
+}
+
+/**
+ * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
+ * @sched_job: The job to query the next internal dependency on
+ * @s_entity: The entity this job is queue on.
+ *
+ * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
+ * its own internal dependencies. We use this function to return our internal dependencies.
+ */
+static struct dma_fence *
+pvr_queue_prepare_job(struct drm_sched_job *sched_job,
+		      struct drm_sched_entity *s_entity)
+{
+	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
+	struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
+	struct dma_fence *internal_dep = NULL;
+
+	/*
+	 * Initialize the done_fence, so we can signal it. This must be done
+	 * here because otherwise by the time of run_job() the job will end up
+	 * in the pending list without a valid fence.
+	 */
+	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
+		/*
+		 * This will be called on a paired fragment job after being
+		 * submitted to firmware. We can tell if this is the case and
+		 * bail early from whether run_job() has been called on the
+		 * geometry job, which would issue a pm ref.
+		 */
+		if (job->paired_job->has_pm_ref)
+			return NULL;
+
+		/*
+		 * In this case we need to use the job's own ctx to initialise
+		 * the done_fence.  The other steps are done in the ctx of the
+		 * paired geometry job.
+		 */
+		pvr_queue_job_fence_init(job->done_fence,
+					 job->ctx->queues.fragment);
+	} else {
+		pvr_queue_job_fence_init(job->done_fence, queue);
+	}
+
+	/* CCCB fence is used to make sure we have enough space in the CCCB to
+	 * submit our commands.
+	 */
+	internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
+
+	/* KCCB fence is used to make sure we have a KCCB slot to queue our
+	 * CMD_KICK.
+	 */
+	if (!internal_dep)
+		internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
+
+	/* Any extra internal dependency should be added here, using the following
+	 * pattern:
+	 *
+	 *	if (!internal_dep)
+	 *		internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
+	 */
+
+	/* The paired job fence should come last, when everything else is ready. */
+	if (!internal_dep)
+		internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job);
+
+	return internal_dep;
+}
+
+/**
+ * pvr_queue_update_active_state_locked() - Update the queue active state.
+ * @queue: Queue to update the state on.
+ *
+ * Locked version of pvr_queue_update_active_state(). Must be called with
+ * pvr_device::queue::lock held.
+ */
+static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
+{
+	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
+
+	lockdep_assert_held(&pvr_dev->queues.lock);
+
+	/* The queue is temporary out of any list when it's being reset,
+	 * we don't want a call to pvr_queue_update_active_state_locked()
+	 * to re-insert it behind our back.
+	 */
+	if (list_empty(&queue->node))
+		return;
+
+	if (!atomic_read(&queue->in_flight_job_count))
+		list_move_tail(&queue->node, &pvr_dev->queues.idle);
+	else
+		list_move_tail(&queue->node, &pvr_dev->queues.active);
+}
+
+/**
+ * pvr_queue_update_active_state() - Update the queue active state.
+ * @queue: Queue to update the state on.
+ *
+ * Active state is based on the in_flight_job_count value.
+ *
+ * Updating the active state implies moving the queue in or out of the
+ * active queue list, which also defines whether the queue is checked
+ * or not when a FW event is received.
+ *
+ * This function should be called any time a job is submitted or it done
+ * fence is signaled.
+ */
+static void pvr_queue_update_active_state(struct pvr_queue *queue)
+{
+	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
+
+	mutex_lock(&pvr_dev->queues.lock);
+	pvr_queue_update_active_state_locked(queue);
+	mutex_unlock(&pvr_dev->queues.lock);
+}
+
+static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
+{
+	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
+	struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
+	struct pvr_cccb *cccb = &queue->cccb;
+	struct pvr_queue_fence *jfence;
+	struct dma_fence *fence;
+	unsigned long index;
+	u32 ufo_count = 0;
+
+	/* We need to add the queue to the active list before updating the CCCB,
+	 * otherwise we might miss the FW event informing us that something
+	 * happened on this queue.
+	 */
+	atomic_inc(&queue->in_flight_job_count);
+	pvr_queue_update_active_state(queue);
+
+	xa_for_each(&job->base.dependencies, index, fence) {
+		jfence = to_pvr_queue_job_fence(fence);
+		if (!jfence)
+			continue;
+
+		/* Skip the partial render fence, we will place it at the end. */
+		if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job &&
+		    &job->paired_job->base.s_fence->scheduled == fence)
+			continue;
+
+		if (dma_fence_is_signaled(&jfence->base))
+			continue;
+
+		pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
+					  &ufos[ufo_count].addr);
+		ufos[ufo_count++].value = jfence->base.seqno;
+
+		if (ufo_count == ARRAY_SIZE(ufos)) {
+			pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
+							   sizeof(ufos), ufos, 0, 0);
+			ufo_count = 0;
+		}
+	}
+
+	/* Partial render fence goes last. */
+	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
+		jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
+		if (!WARN_ON(!jfence)) {
+			pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
+						  &ufos[ufo_count].addr);
+			ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
+		}
+	}
+
+	if (ufo_count) {
+		pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
+						   sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
+	}
+
+	if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
+		struct rogue_fwif_cmd_geom *cmd = job->cmd;
+
+		/* Reference value for the partial render test is the current queue fence
+		 * seqno minus one.
+		 */
+		pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
+					  &cmd->partial_render_geom_frag_fence.addr);
+		cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
+	}
+
+	/* Submit job to FW */
+	pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
+					   job->id, job->id);
+
+	/* Signal the job fence. */
+	pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
+	ufos[0].value = job->done_fence->seqno;
+	pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
+					   sizeof(ufos[0]), ufos, 0, 0);
+}
+
+/**
+ * pvr_queue_run_job() - Submit a job to the FW.
+ * @sched_job: The job to submit.
+ *
+ * This function is called when all non-native dependencies have been met and
+ * when the commands resulting from this job are guaranteed to fit in the CCCB.
+ */
+static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
+{
+	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
+	struct pvr_device *pvr_dev = job->pvr_dev;
+	int err;
+
+	/* The fragment job is issued along the geometry job when we use combined
+	 * geom+frag kicks. When we get there, we should simply return the
+	 * done_fence that's been initialized earlier.
+	 */
+	if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
+	    job->done_fence->ops) {
+		return dma_fence_get(job->done_fence);
+	}
+
+	/* The only kind of jobs that can be paired are geometry and fragment, and
+	 * we bail out early if we see a fragment job that's paired with a geomtry
+	 * job.
+	 * Paired jobs must also target the same context and point to the same
+	 * HWRT.
+	 */
+	if (WARN_ON(job->paired_job &&
+		    (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
+		     job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
+		     job->hwrt != job->paired_job->hwrt ||
+		     job->ctx != job->paired_job->ctx)))
+		return ERR_PTR(-EINVAL);
+
+	err = pvr_job_get_pm_ref(job);
+	if (WARN_ON(err))
+		return ERR_PTR(err);
+
+	if (job->paired_job) {
+		err = pvr_job_get_pm_ref(job->paired_job);
+		if (WARN_ON(err))
+			return ERR_PTR(err);
+	}
+
+	/* Submit our job to the CCCB */
+	pvr_queue_submit_job_to_cccb(job);
+
+	if (job->paired_job) {
+		struct pvr_job *geom_job = job;
+		struct pvr_job *frag_job = job->paired_job;
+		struct pvr_queue *geom_queue = job->ctx->queues.geometry;
+		struct pvr_queue *frag_queue = job->ctx->queues.fragment;
+
+		/* Submit the fragment job along the geometry job and send a combined kick. */
+		pvr_queue_submit_job_to_cccb(frag_job);
+		pvr_cccb_send_kccb_combined_kick(pvr_dev,
+						 &geom_queue->cccb, &frag_queue->cccb,
+						 pvr_context_get_fw_addr(geom_job->ctx) +
+						 geom_queue->ctx_offset,
+						 pvr_context_get_fw_addr(frag_job->ctx) +
+						 frag_queue->ctx_offset,
+						 job->hwrt,
+						 frag_job->fw_ccb_cmd_type ==
+						 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
+	} else {
+		struct pvr_queue *queue = container_of(job->base.sched,
+						       struct pvr_queue, scheduler);
+
+		pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
+					pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
+					job->hwrt);
+	}
+
+	return dma_fence_get(job->done_fence);
+}
+
+static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
+{
+	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
+}
+
+static void pvr_queue_start(struct pvr_queue *queue)
+{
+	struct pvr_job *job;
+
+	/* Make sure we CPU-signal the UFO object, so other queues don't get
+	 * blocked waiting on it.
+	 */
+	*queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
+
+	list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
+		if (dma_fence_is_signaled(job->done_fence)) {
+			/* Jobs might have completed after drm_sched_stop() was called.
+			 * In that case, re-assign the parent field to the done_fence.
+			 */
+			WARN_ON(job->base.s_fence->parent);
+			job->base.s_fence->parent = dma_fence_get(job->done_fence);
+		} else {
+			/* If we had unfinished jobs, flag the entity as guilty so no
+			 * new job can be submitted.
+			 */
+			atomic_set(&queue->ctx->faulty, 1);
+		}
+	}
+
+	drm_sched_start(&queue->scheduler, true);
+}
+
+/**
+ * pvr_queue_timedout_job() - Handle a job timeout event.
+ * @s_job: The job this timeout occurred on.
+ *
+ * FIXME: We don't do anything here to unblock the situation, we just stop+start
+ * the scheduler, and re-assign parent fences in the middle.
+ *
+ * Return:
+ *  * DRM_GPU_SCHED_STAT_NOMINAL.
+ */
+static enum drm_gpu_sched_stat
+pvr_queue_timedout_job(struct drm_sched_job *s_job)
+{
+	struct drm_gpu_scheduler *sched = s_job->sched;
+	struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
+	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
+	struct pvr_job *job;
+	u32 job_count = 0;
+
+	dev_err(sched->dev, "Job timeout\n");
+
+	/* Before we stop the scheduler, make sure the queue is out of any list, so
+	 * any call to pvr_queue_update_active_state_locked() that might happen
+	 * until the scheduler is really stopped doesn't end up re-inserting the
+	 * queue in the active list. This would cause
+	 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
+	 * other when accessing the pending_list, since drm_sched_stop() doesn't
+	 * grab the job_list_lock when modifying the list (it's assuming the
+	 * only other accessor is the scheduler, and it's safe to not grab the
+	 * lock since it's stopped).
+	 */
+	mutex_lock(&pvr_dev->queues.lock);
+	list_del_init(&queue->node);
+	mutex_unlock(&pvr_dev->queues.lock);
+
+	drm_sched_stop(sched, s_job);
+
+	/* Re-assign job parent fences. */
+	list_for_each_entry(job, &sched->pending_list, base.list) {
+		job->base.s_fence->parent = dma_fence_get(job->done_fence);
+		job_count++;
+	}
+	WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
+
+	/* Re-insert the queue in the proper list, and kick a queue processing
+	 * operation if there were jobs pending.
+	 */
+	mutex_lock(&pvr_dev->queues.lock);
+	if (!job_count) {
+		list_move_tail(&queue->node, &pvr_dev->queues.idle);
+	} else {
+		atomic_set(&queue->in_flight_job_count, job_count);
+		list_move_tail(&queue->node, &pvr_dev->queues.active);
+		pvr_queue_process(queue);
+	}
+	mutex_unlock(&pvr_dev->queues.lock);
+
+	drm_sched_start(sched, true);
+
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+/**
+ * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
+ * @sched_job: Job object to free.
+ */
+static void pvr_queue_free_job(struct drm_sched_job *sched_job)
+{
+	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
+
+	drm_sched_job_cleanup(sched_job);
+	job->paired_job = NULL;
+	pvr_job_put(job);
+}
+
+static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
+	.prepare_job = pvr_queue_prepare_job,
+	.run_job = pvr_queue_run_job,
+	.timedout_job = pvr_queue_timedout_job,
+	.free_job = pvr_queue_free_job,
+};
+
+/**
+ * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object
+ * @f: Fence to test.
+ *
+ * A UFO-backed fence is a fence that can be signaled or waited upon FW-side.
+ * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue
+ * they are pushed to, but those fences are not directly exposed to the outside
+ * world, so we also need to check if the fence we're being passed is a
+ * drm_sched_fence that was coming from our driver.
+ */
+bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
+{
+	struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
+
+	if (sched_fence &&
+	    sched_fence->sched->ops == &pvr_queue_sched_ops)
+		return true;
+
+	if (f && f->ops == &pvr_queue_job_fence_ops)
+		return true;
+
+	return false;
+}
+
+/**
+ * pvr_queue_signal_done_fences() - Signal done fences.
+ * @queue: Queue to check.
+ *
+ * Signal done fences of jobs whose seqno is less than the current value of
+ * the UFO object attached to the queue.
+ */
+static void
+pvr_queue_signal_done_fences(struct pvr_queue *queue)
+{
+	struct pvr_job *job, *tmp_job;
+	u32 cur_seqno;
+
+	spin_lock(&queue->scheduler.job_list_lock);
+	cur_seqno = *queue->timeline_ufo.value;
+	list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
+		if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
+			break;
+
+		if (!dma_fence_is_signaled(job->done_fence)) {
+			dma_fence_signal(job->done_fence);
+			pvr_job_release_pm_ref(job);
+			atomic_dec(&queue->in_flight_job_count);
+		}
+	}
+	spin_unlock(&queue->scheduler.job_list_lock);
+}
+
+/**
+ * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space
+ * can be unblocked
+ * pushed to the CCCB
+ * @queue: Queue to check
+ *
+ * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
+ * its CCCB fence, which should kick drm_sched.
+ */
+static void
+pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
+{
+	struct pvr_queue_fence *cccb_fence;
+	u32 native_deps_remaining;
+	struct pvr_job *job;
+
+	mutex_lock(&queue->cccb_fence_ctx.job_lock);
+	job = queue->cccb_fence_ctx.job;
+	if (!job)
+		goto out_unlock;
+
+	/* If we have a job attached to the CCCB fence context, its CCCB fence
+	 * shouldn't be NULL.
+	 */
+	if (WARN_ON(!job->cccb_fence)) {
+		job = NULL;
+		goto out_unlock;
+	}
+
+	/* If we get there, CCCB fence has to be initialized. */
+	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
+	if (WARN_ON(!cccb_fence->queue)) {
+		job = NULL;
+		goto out_unlock;
+	}
+
+	/* Evict signaled dependencies before checking for CCCB space.
+	 * If the job fits, signal the CCCB fence, this should unblock
+	 * the drm_sched_entity.
+	 */
+	native_deps_remaining = job_count_remaining_native_deps(job);
+	if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
+		job = NULL;
+		goto out_unlock;
+	}
+
+	dma_fence_signal(job->cccb_fence);
+	pvr_queue_fence_put(job->cccb_fence);
+	job->cccb_fence = NULL;
+	queue->cccb_fence_ctx.job = NULL;
+
+out_unlock:
+	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
+
+	pvr_job_put(job);
+}
+
+/**
+ * pvr_queue_process() - Process events that happened on a queue.
+ * @queue: Queue to check
+ *
+ * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
+ */
+void pvr_queue_process(struct pvr_queue *queue)
+{
+	lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
+
+	pvr_queue_check_job_waiting_for_cccb_space(queue);
+	pvr_queue_signal_done_fences(queue);
+	pvr_queue_update_active_state_locked(queue);
+}
+
+static u32 get_dm_type(struct pvr_queue *queue)
+{
+	switch (queue->type) {
+	case DRM_PVR_JOB_TYPE_GEOMETRY:
+		return PVR_FWIF_DM_GEOM;
+	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
+	case DRM_PVR_JOB_TYPE_FRAGMENT:
+		return PVR_FWIF_DM_FRAG;
+	case DRM_PVR_JOB_TYPE_COMPUTE:
+		return PVR_FWIF_DM_CDM;
+	}
+
+	return ~0;
+}
+
+/**
+ * init_fw_context() - Initializes the queue part of a FW context.
+ * @queue: Queue object to initialize the FW context for.
+ * @fw_ctx_map: The FW context CPU mapping.
+ *
+ * FW contexts are containing various states, one of them being a per-queue state
+ * that needs to be initialized for each queue being exposed by a context. This
+ * function takes care of that.
+ */
+static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
+{
+	struct pvr_context *ctx = queue->ctx;
+	struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
+	struct rogue_fwif_fwcommoncontext *cctx_fw;
+	struct pvr_cccb *cccb = &queue->cccb;
+
+	cctx_fw = fw_ctx_map + queue->ctx_offset;
+	cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
+	cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
+
+	cctx_fw->dm = get_dm_type(queue);
+	cctx_fw->priority = ctx->priority;
+	cctx_fw->priority_seq_num = 0;
+	cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
+	cctx_fw->pid = task_tgid_nr(current);
+	cctx_fw->server_common_context_id = ctx->ctx_id;
+
+	pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
+
+	pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
+}
+
+/**
+ * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
+ * @queue: Queue on FW context to clean up.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by pvr_fw_structure_cleanup() otherwise.
+ */
+static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
+{
+	if (!queue->ctx->fw_obj)
+		return 0;
+
+	return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
+					ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
+					queue->ctx->fw_obj, queue->ctx_offset);
+}
+
+/**
+ * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
+ * @job: The job to initialize.
+ *
+ * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
+ * and pvr_queue_job_push() can't fail. We also make sure the context type is
+ * valid and the job can fit in the CCCB.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * An error code if something failed.
+ */
+int pvr_queue_job_init(struct pvr_job *job)
+{
+	/* Fragment jobs need at least one native fence wait on the geometry job fence. */
+	u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
+	struct pvr_queue *queue;
+	int err;
+
+	if (atomic_read(&job->ctx->faulty))
+		return -EIO;
+
+	queue = pvr_context_get_queue_for_job(job->ctx, job->type);
+	if (!queue)
+		return -EINVAL;
+
+	if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
+		return -E2BIG;
+
+	err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE);
+	if (err)
+		return err;
+
+	job->cccb_fence = pvr_queue_fence_alloc();
+	job->kccb_fence = pvr_kccb_fence_alloc();
+	job->done_fence = pvr_queue_fence_alloc();
+	if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * pvr_queue_job_arm() - Arm a job object.
+ * @job: The job to arm.
+ *
+ * Initializes fences and return the drm_sched finished fence so it can
+ * be exposed to the outside world. Once this function is called, you should
+ * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
+ * no one grabbed a reference to the returned fence. The latter can happen if
+ * we do multi-job submission, and something failed when creating/initializing
+ * a job. In that case, we know the fence didn't leave the driver, and we
+ * can thus guarantee nobody will wait on an dead fence object.
+ *
+ * Return:
+ *  * A dma_fence object.
+ */
+struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
+{
+	drm_sched_job_arm(&job->base);
+
+	return &job->base.s_fence->finished;
+}
+
+/**
+ * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
+ * @job: The job to cleanup.
+ *
+ * Should be called in the job release path.
+ */
+void pvr_queue_job_cleanup(struct pvr_job *job)
+{
+	pvr_queue_fence_put(job->done_fence);
+	pvr_queue_fence_put(job->cccb_fence);
+	pvr_kccb_fence_put(job->kccb_fence);
+
+	if (job->base.s_fence)
+		drm_sched_job_cleanup(&job->base);
+}
+
+/**
+ * pvr_queue_job_push() - Push a job to its queue.
+ * @job: The job to push.
+ *
+ * Must be called after pvr_queue_job_init() and after all dependencies
+ * have been added to the job. This will effectively queue the job to
+ * the drm_sched_entity attached to the queue. We grab a reference on
+ * the job object, so the caller is free to drop its reference when it's
+ * done accessing the job object.
+ */
+void pvr_queue_job_push(struct pvr_job *job)
+{
+	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
+
+	/* Keep track of the last queued job scheduled fence for combined submit. */
+	dma_fence_put(queue->last_queued_job_scheduled_fence);
+	queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
+
+	pvr_job_get(job);
+	drm_sched_entity_push_job(&job->base);
+}
+
+static void reg_state_init(void *cpu_ptr, void *priv)
+{
+	struct pvr_queue *queue = priv;
+
+	if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
+		struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
+
+		geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
+			queue->callstack_addr;
+	}
+}
+
+/**
+ * pvr_queue_create() - Create a queue object.
+ * @ctx: The context this queue will be attached to.
+ * @type: The type of jobs being pushed to this queue.
+ * @args: The arguments passed to the context creation function.
+ * @fw_ctx_map: CPU mapping of the FW context object.
+ *
+ * Create a queue object that will be used to queue and track jobs.
+ *
+ * Return:
+ *  * A valid pointer to a pvr_queue object, or
+ *  * An error pointer if the creation/initialization failed.
+ */
+struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
+				   enum drm_pvr_job_type type,
+				   struct drm_pvr_ioctl_create_context_args *args,
+				   void *fw_ctx_map)
+{
+	static const struct {
+		u32 cccb_size;
+		const char *name;
+	} props[] = {
+		[DRM_PVR_JOB_TYPE_GEOMETRY] = {
+			.cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
+			.name = "geometry",
+		},
+		[DRM_PVR_JOB_TYPE_FRAGMENT] = {
+			.cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
+			.name = "fragment"
+		},
+		[DRM_PVR_JOB_TYPE_COMPUTE] = {
+			.cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
+			.name = "compute"
+		},
+		[DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
+			.cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
+			.name = "transfer_frag"
+		},
+	};
+	struct pvr_device *pvr_dev = ctx->pvr_dev;
+	struct drm_gpu_scheduler *sched;
+	struct pvr_queue *queue;
+	int ctx_state_size, err;
+	void *cpu_map;
+
+	if (WARN_ON(type >= sizeof(props)))
+		return ERR_PTR(-EINVAL);
+
+	switch (ctx->type) {
+	case DRM_PVR_CTX_TYPE_RENDER:
+		if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
+		    type != DRM_PVR_JOB_TYPE_FRAGMENT)
+			return ERR_PTR(-EINVAL);
+		break;
+	case DRM_PVR_CTX_TYPE_COMPUTE:
+		if (type != DRM_PVR_JOB_TYPE_COMPUTE)
+			return ERR_PTR(-EINVAL);
+		break;
+	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
+		if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
+			return ERR_PTR(-EINVAL);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	ctx_state_size = get_ctx_state_size(pvr_dev, type);
+	if (ctx_state_size < 0)
+		return ERR_PTR(ctx_state_size);
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return ERR_PTR(-ENOMEM);
+
+	queue->type = type;
+	queue->ctx_offset = get_ctx_offset(type);
+	queue->ctx = ctx;
+	queue->callstack_addr = args->callstack_addr;
+	sched = &queue->scheduler;
+	INIT_LIST_HEAD(&queue->node);
+	mutex_init(&queue->cccb_fence_ctx.job_lock);
+	pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
+	pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
+
+	err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
+	if (err)
+		goto err_free_queue;
+
+	err = pvr_fw_object_create(pvr_dev, ctx_state_size,
+				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+				   reg_state_init, queue, &queue->reg_state_obj);
+	if (err)
+		goto err_cccb_fini;
+
+	init_fw_context(queue, fw_ctx_map);
+
+	if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
+	    args->callstack_addr) {
+		err = -EINVAL;
+		goto err_release_reg_state;
+	}
+
+	cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
+					       PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
+					       NULL, NULL, &queue->timeline_ufo.fw_obj);
+	if (IS_ERR(cpu_map)) {
+		err = PTR_ERR(cpu_map);
+		goto err_release_reg_state;
+	}
+
+	queue->timeline_ufo.value = cpu_map;
+
+	err = drm_sched_init(&queue->scheduler,
+			     &pvr_queue_sched_ops,
+			     pvr_dev->sched_wq, 1, 64 * 1024, 1,
+			     msecs_to_jiffies(500),
+			     pvr_dev->sched_wq, NULL, "pvr-queue",
+			     pvr_dev->base.dev);
+	if (err)
+		goto err_release_ufo;
+
+	err = drm_sched_entity_init(&queue->entity,
+				    DRM_SCHED_PRIORITY_MIN,
+				    &sched, 1, &ctx->faulty);
+	if (err)
+		goto err_sched_fini;
+
+	mutex_lock(&pvr_dev->queues.lock);
+	list_add_tail(&queue->node, &pvr_dev->queues.idle);
+	mutex_unlock(&pvr_dev->queues.lock);
+
+	return queue;
+
+err_sched_fini:
+	drm_sched_fini(&queue->scheduler);
+
+err_release_ufo:
+	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
+
+err_release_reg_state:
+	pvr_fw_object_destroy(queue->reg_state_obj);
+
+err_cccb_fini:
+	pvr_cccb_fini(&queue->cccb);
+
+err_free_queue:
+	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
+	kfree(queue);
+
+	return ERR_PTR(err);
+}
+
+void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
+{
+	struct pvr_queue *queue;
+
+	mutex_lock(&pvr_dev->queues.lock);
+	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
+		pvr_queue_stop(queue, NULL);
+	list_for_each_entry(queue, &pvr_dev->queues.active, node)
+		pvr_queue_stop(queue, NULL);
+	mutex_unlock(&pvr_dev->queues.lock);
+}
+
+void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
+{
+	struct pvr_queue *queue;
+
+	mutex_lock(&pvr_dev->queues.lock);
+	list_for_each_entry(queue, &pvr_dev->queues.active, node)
+		pvr_queue_start(queue);
+	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
+		pvr_queue_start(queue);
+	mutex_unlock(&pvr_dev->queues.lock);
+}
+
+/**
+ * pvr_queue_kill() - Kill a queue.
+ * @queue: The queue to kill.
+ *
+ * Kill the queue so no new jobs can be pushed. Should be called when the
+ * context handle is destroyed. The queue object might last longer if jobs
+ * are still in flight and holding a reference to the context this queue
+ * belongs to.
+ */
+void pvr_queue_kill(struct pvr_queue *queue)
+{
+	drm_sched_entity_destroy(&queue->entity);
+	dma_fence_put(queue->last_queued_job_scheduled_fence);
+	queue->last_queued_job_scheduled_fence = NULL;
+}
+
+/**
+ * pvr_queue_destroy() - Destroy a queue.
+ * @queue: The queue to destroy.
+ *
+ * Cleanup the queue and free the resources attached to it. Should be
+ * called from the context release function.
+ */
+void pvr_queue_destroy(struct pvr_queue *queue)
+{
+	if (!queue)
+		return;
+
+	mutex_lock(&queue->ctx->pvr_dev->queues.lock);
+	list_del_init(&queue->node);
+	mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
+
+	drm_sched_fini(&queue->scheduler);
+	drm_sched_entity_fini(&queue->entity);
+
+	if (WARN_ON(queue->last_queued_job_scheduled_fence))
+		dma_fence_put(queue->last_queued_job_scheduled_fence);
+
+	pvr_queue_cleanup_fw_context(queue);
+
+	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
+	pvr_fw_object_destroy(queue->reg_state_obj);
+	pvr_cccb_fini(&queue->cccb);
+	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
+	kfree(queue);
+}
+
+/**
+ * pvr_queue_device_init() - Device-level initialization of queue related fields.
+ * @pvr_dev: The device to initialize.
+ *
+ * Initializes all fields related to queue management in pvr_device.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * An error code on failure.
+ */
+int pvr_queue_device_init(struct pvr_device *pvr_dev)
+{
+	int err;
+
+	INIT_LIST_HEAD(&pvr_dev->queues.active);
+	INIT_LIST_HEAD(&pvr_dev->queues.idle);
+	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
+	if (err)
+		return err;
+
+	pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
+	if (!pvr_dev->sched_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
+ * @pvr_dev: The device to cleanup.
+ *
+ * Cleanup/free all queue-related resources attached to a pvr_device object.
+ */
+void pvr_queue_device_fini(struct pvr_device *pvr_dev)
+{
+	destroy_workqueue(pvr_dev->sched_wq);
+}
diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h
new file mode 100644
index 0000000000000..b5ce2c742150b
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_queue.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_QUEUE_H
+#define PVR_QUEUE_H
+
+#include <drm/gpu_scheduler.h>
+
+#include "pvr_cccb.h"
+#include "pvr_device.h"
+
+struct pvr_context;
+struct pvr_queue;
+
+/**
+ * struct pvr_queue_fence_ctx - Queue fence context
+ *
+ * Used to implement dma_fence_ops for pvr_job::{done,cccb}_fence.
+ */
+struct pvr_queue_fence_ctx {
+	/** @id: Fence context ID allocated with dma_fence_context_alloc(). */
+	u64 id;
+
+	/** @seqno: Sequence number incremented each time a fence is created. */
+	atomic_t seqno;
+
+	/** @lock: Lock used to synchronize access to fences allocated by this context. */
+	spinlock_t lock;
+};
+
+/**
+ * struct pvr_queue_cccb_fence_ctx - CCCB fence context
+ *
+ * Context used to manage fences controlling access to the CCCB. No fences are
+ * issued if there's enough space in the CCCB to push job commands.
+ */
+struct pvr_queue_cccb_fence_ctx {
+	/** @base: Base queue fence context. */
+	struct pvr_queue_fence_ctx base;
+
+	/**
+	 * @job: Job waiting for CCCB space.
+	 *
+	 * Thanks to the serializationg done at the drm_sched_entity level,
+	 * there's no more than one job waiting for CCCB at a given time.
+	 *
+	 * This field is NULL if no jobs are currently waiting for CCCB space.
+	 *
+	 * Must be accessed with @job_lock held.
+	 */
+	struct pvr_job *job;
+
+	/** @lock: Lock protecting access to the job object. */
+	struct mutex job_lock;
+};
+
+/**
+ * struct pvr_queue_fence - Queue fence object
+ */
+struct pvr_queue_fence {
+	/** @base: Base dma_fence. */
+	struct dma_fence base;
+
+	/** @queue: Queue that created this fence. */
+	struct pvr_queue *queue;
+};
+
+/**
+ * struct pvr_queue - Job queue
+ *
+ * Used to queue and track execution of pvr_job objects.
+ */
+struct pvr_queue {
+	/** @scheduler: Single entity scheduler use to push jobs to this queue. */
+	struct drm_gpu_scheduler scheduler;
+
+	/** @entity: Scheduling entity backing this queue. */
+	struct drm_sched_entity entity;
+
+	/** @type: Type of jobs queued to this queue. */
+	enum drm_pvr_job_type type;
+
+	/** @ctx: Context object this queue is bound to. */
+	struct pvr_context *ctx;
+
+	/** @node: Used to add the queue to the active/idle queue list. */
+	struct list_head node;
+
+	/**
+	 * @in_flight_job_count: Number of jobs submitted to the CCCB that
+	 * have not been processed yet.
+	 */
+	atomic_t in_flight_job_count;
+
+	/**
+	 * @cccb_fence_ctx: CCCB fence context.
+	 *
+	 * Used to control access to the CCCB is full, such that we don't
+	 * end up trying to push commands to the CCCB if there's not enough
+	 * space to receive all commands needed for a job to complete.
+	 */
+	struct pvr_queue_cccb_fence_ctx cccb_fence_ctx;
+
+	/** @job_fence_ctx: Job fence context object. */
+	struct pvr_queue_fence_ctx job_fence_ctx;
+
+	/** @timeline_ufo: Timeline UFO for the context queue. */
+	struct {
+		/** @fw_obj: FW object representing the UFO value. */
+		struct pvr_fw_object *fw_obj;
+
+		/** @value: CPU mapping of the UFO value. */
+		u32 *value;
+	} timeline_ufo;
+
+	/**
+	 * last_queued_job_scheduled_fence: The scheduled fence of the last
+	 * job queued to this queue.
+	 *
+	 * We use it to insert frag -> geom dependencies when issuing combined
+	 * geom+frag jobs, to guarantee that the fragment job that's part of
+	 * the combined operation comes after all fragment jobs that were queued
+	 * before it.
+	 */
+	struct dma_fence *last_queued_job_scheduled_fence;
+
+	/** @cccb: Client Circular Command Buffer. */
+	struct pvr_cccb cccb;
+
+	/** @reg_state_obj: FW object representing the register state of this queue. */
+	struct pvr_fw_object *reg_state_obj;
+
+	/** @ctx_offset: Offset of the queue context in the FW context object. */
+	u32 ctx_offset;
+
+	/** @callstack_addr: Initial call stack address for register state object. */
+	u64 callstack_addr;
+};
+
+bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f);
+
+int pvr_queue_job_init(struct pvr_job *job);
+
+void pvr_queue_job_cleanup(struct pvr_job *job);
+
+void pvr_queue_job_push(struct pvr_job *job);
+
+struct dma_fence *pvr_queue_job_arm(struct pvr_job *job);
+
+struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
+				   enum drm_pvr_job_type type,
+				   struct drm_pvr_ioctl_create_context_args *args,
+				   void *fw_ctx_map);
+
+void pvr_queue_kill(struct pvr_queue *queue);
+
+void pvr_queue_destroy(struct pvr_queue *queue);
+
+void pvr_queue_process(struct pvr_queue *queue);
+
+void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev);
+
+void pvr_queue_device_post_reset(struct pvr_device *pvr_dev);
+
+int pvr_queue_device_init(struct pvr_device *pvr_dev);
+
+void pvr_queue_device_fini(struct pvr_device *pvr_dev);
+
+#endif /* PVR_QUEUE_H */
diff --git a/drivers/gpu/drm/imagination/pvr_stream_defs.c b/drivers/gpu/drm/imagination/pvr_stream_defs.c
index 59265a7accce7..f8bd1a8c01db4 100644
--- a/drivers/gpu/drm/imagination/pvr_stream_defs.c
+++ b/drivers/gpu/drm/imagination/pvr_stream_defs.c
@@ -43,6 +43,232 @@
  * existing parameters, to preserve order. As parameters are naturally aligned, care must be taken
  * with respect to implicit padding in the stream; padding should be minimised as much as possible.
  */
+static const struct pvr_stream_def rogue_fwif_cmd_geom_stream[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.vdm_ctrl_stream_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.tpu_border_colour_table, 64),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_draw_indirect0, 64,
+			       PVR_FEATURE_VDM_DRAWINDIRECT),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_draw_indirect1, 32,
+			       PVR_FEATURE_VDM_DRAWINDIRECT),
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.ppp_ctrl, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.te_psg, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.vdm_context_resume_task0_size, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_context_resume_task3_size, 32,
+			       PVR_FEATURE_VDM_OBJECT_LEVEL_LLS),
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.view_idx, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.pds_coeff_free_prog, 32,
+			       PVR_FEATURE_TESSELLATION),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_geom_stream_brn49927[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.tpu, 32),
+};
+
+static const struct pvr_stream_ext_def cmd_geom_ext_streams_0[] = {
+	{
+		.stream = rogue_fwif_cmd_geom_stream_brn49927,
+		.stream_len = ARRAY_SIZE(rogue_fwif_cmd_geom_stream_brn49927),
+		.header_mask = PVR_STREAM_EXTHDR_GEOM0_BRN49927,
+		.quirk = 49927,
+	},
+};
+
+static const struct pvr_stream_ext_header cmd_geom_ext_headers[] = {
+	{
+		.ext_streams = cmd_geom_ext_streams_0,
+		.ext_streams_num = ARRAY_SIZE(cmd_geom_ext_streams_0),
+		.valid_mask = PVR_STREAM_EXTHDR_GEOM0_VALID,
+	},
+};
+
+const struct pvr_stream_cmd_defs pvr_cmd_geom_stream = {
+	.type = PVR_STREAM_TYPE_GEOM,
+
+	.main_stream = rogue_fwif_cmd_geom_stream,
+	.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_geom_stream),
+
+	.ext_nr_headers = ARRAY_SIZE(cmd_geom_ext_headers),
+	.ext_headers = cmd_geom_ext_headers,
+
+	.dest_size = sizeof(struct rogue_fwif_cmd_geom),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_frag_stream[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_scissor_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_dbias_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_oclqry_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_zlsctl, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_zload_store_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_stencil_load_store_base, 64),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.fb_cdc_zls, 64,
+			       PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP),
+	PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pbe_word),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.tpu_border_colour_table, 64),
+	PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pds_bgnd),
+	PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pds_pr_bgnd),
+	PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.usc_clear_register),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.usc_pixel_output_ctrl, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_bgobjdepth, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_bgobjvals, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_aa, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_xtp_pipe_enable, 32,
+			       PVR_FEATURE_S7_TOP_INFRASTRUCTURE),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_ctl, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.event_pixel_pds_info, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.pixel_phantom, 32,
+			       PVR_FEATURE_CLUSTER_GROUPING),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.view_idx, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.event_pixel_pds_data, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_oclqry_stride, 32,
+			       PVR_FEATURE_GPU_MULTICORE_SUPPORT),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_zls_pixels, 32,
+			       PVR_FEATURE_ZLS_SUBTILE),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.rgx_cr_blackpearl_fix, 32,
+			       PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, zls_stride, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, sls_stride, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, execute_count, 32,
+			       PVR_FEATURE_GPU_MULTICORE_SUPPORT),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_frag_stream_brn47217[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_oclqry_stride, 32),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_frag_stream_brn49927[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.tpu, 32),
+};
+
+static const struct pvr_stream_ext_def cmd_frag_ext_streams_0[] = {
+	{
+		.stream = rogue_fwif_cmd_frag_stream_brn47217,
+		.stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream_brn47217),
+		.header_mask = PVR_STREAM_EXTHDR_FRAG0_BRN47217,
+		.quirk = 47217,
+	},
+	{
+		.stream = rogue_fwif_cmd_frag_stream_brn49927,
+		.stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream_brn49927),
+		.header_mask = PVR_STREAM_EXTHDR_FRAG0_BRN49927,
+		.quirk = 49927,
+	},
+};
+
+static const struct pvr_stream_ext_header cmd_frag_ext_headers[] = {
+	{
+		.ext_streams = cmd_frag_ext_streams_0,
+		.ext_streams_num = ARRAY_SIZE(cmd_frag_ext_streams_0),
+		.valid_mask = PVR_STREAM_EXTHDR_FRAG0_VALID,
+	},
+};
+
+const struct pvr_stream_cmd_defs pvr_cmd_frag_stream = {
+	.type = PVR_STREAM_TYPE_FRAG,
+
+	.main_stream = rogue_fwif_cmd_frag_stream,
+	.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream),
+
+	.ext_nr_headers = ARRAY_SIZE(cmd_frag_ext_headers),
+	.ext_headers = cmd_frag_ext_headers,
+
+	.dest_size = sizeof(struct rogue_fwif_cmd_frag),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_compute_stream[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.tpu_border_colour_table, 64),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb_queue, 64,
+			       PVR_FEATURE_CDM_USER_MODE_QUEUE),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb_base, 64,
+			       PVR_FEATURE_CDM_USER_MODE_QUEUE),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb, 64,
+			       PVR_FEATURE_CDM_USER_MODE_QUEUE),
+	PVR_STREAM_DEF_NOT_FEATURE(rogue_fwif_cmd_compute, regs.cdm_ctrl_stream_base, 64,
+				   PVR_FEATURE_CDM_USER_MODE_QUEUE),
+	PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.cdm_context_state_base_addr, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.cdm_resume_pds1, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_item, 32,
+			       PVR_FEATURE_COMPUTE_MORTON_CAPABLE),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.compute_cluster, 32,
+			       PVR_FEATURE_CLUSTER_GROUPING),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.tpu_tag_cdm_ctrl, 32,
+			       PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, stream_start_offset, 32,
+			       PVR_FEATURE_CDM_USER_MODE_QUEUE),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, execute_count, 32,
+			       PVR_FEATURE_GPU_MULTICORE_SUPPORT),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_compute_stream_brn49927[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.tpu, 32),
+};
+
+static const struct pvr_stream_ext_def cmd_compute_ext_streams_0[] = {
+	{
+		.stream = rogue_fwif_cmd_compute_stream_brn49927,
+		.stream_len = ARRAY_SIZE(rogue_fwif_cmd_compute_stream_brn49927),
+		.header_mask = PVR_STREAM_EXTHDR_COMPUTE0_BRN49927,
+		.quirk = 49927,
+	},
+};
+
+static const struct pvr_stream_ext_header cmd_compute_ext_headers[] = {
+	{
+		.ext_streams = cmd_compute_ext_streams_0,
+		.ext_streams_num = ARRAY_SIZE(cmd_compute_ext_streams_0),
+		.valid_mask = PVR_STREAM_EXTHDR_COMPUTE0_VALID,
+	},
+};
+
+const struct pvr_stream_cmd_defs pvr_cmd_compute_stream = {
+	.type = PVR_STREAM_TYPE_COMPUTE,
+
+	.main_stream = rogue_fwif_cmd_compute_stream,
+	.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_compute_stream),
+
+	.ext_nr_headers = ARRAY_SIZE(cmd_compute_ext_headers),
+	.ext_headers = cmd_compute_ext_headers,
+
+	.dest_size = sizeof(struct rogue_fwif_cmd_compute),
+};
+
+static const struct pvr_stream_def rogue_fwif_cmd_transfer_stream[] = {
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd0_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd1_base, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd3_sizeinfo, 64),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_mtile_base, 64),
+	PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_transfer, regs.pbe_wordx_mrty),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_bgobjvals, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_pixel_output_ctrl, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register0, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register1, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register2, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register3, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_mtile_size, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_render_origin, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_ctl, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_aa, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_info, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_code, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_data, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_render, 32),
+	PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_rgn, 32),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_transfer, regs.isp_xtp_pipe_enable, 32,
+			       PVR_FEATURE_S7_TOP_INFRASTRUCTURE),
+	PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_transfer, regs.frag_screen, 32,
+			       PVR_FEATURE_GPU_MULTICORE_SUPPORT),
+};
+
+const struct pvr_stream_cmd_defs pvr_cmd_transfer_stream = {
+	.type = PVR_STREAM_TYPE_TRANSFER,
+
+	.main_stream = rogue_fwif_cmd_transfer_stream,
+	.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_transfer_stream),
+
+	.ext_nr_headers = 0,
+
+	.dest_size = sizeof(struct rogue_fwif_cmd_transfer),
+};
+
 static const struct pvr_stream_def rogue_fwif_static_render_context_state_stream[] = {
 	PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
 		       geom_reg_vdm_context_state_base_addr, 64),
diff --git a/drivers/gpu/drm/imagination/pvr_sync.c b/drivers/gpu/drm/imagination/pvr_sync.c
new file mode 100644
index 0000000000000..129f646d14ba1
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_sync.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <drm/drm_syncobj.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/xarray.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include "pvr_device.h"
+#include "pvr_queue.h"
+#include "pvr_sync.h"
+
+static int
+pvr_check_sync_op(const struct drm_pvr_sync_op *sync_op)
+{
+	u8 handle_type;
+
+	if (sync_op->flags & ~DRM_PVR_SYNC_OP_FLAGS_MASK)
+		return -EINVAL;
+
+	handle_type = sync_op->flags & DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK;
+	if (handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ &&
+	    handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ)
+		return -EINVAL;
+
+	if (handle_type == DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ &&
+	    sync_op->value != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+pvr_sync_signal_free(struct pvr_sync_signal *sig_sync)
+{
+	if (!sig_sync)
+		return;
+
+	drm_syncobj_put(sig_sync->syncobj);
+	dma_fence_chain_free(sig_sync->chain);
+	dma_fence_put(sig_sync->fence);
+	kfree(sig_sync);
+}
+
+void
+pvr_sync_signal_array_cleanup(struct xarray *array)
+{
+	struct pvr_sync_signal *sig_sync;
+	unsigned long i;
+
+	xa_for_each(array, i, sig_sync)
+		pvr_sync_signal_free(sig_sync);
+
+	xa_destroy(array);
+}
+
+static struct pvr_sync_signal *
+pvr_sync_signal_array_add(struct xarray *array, struct drm_file *file, u32 handle, u64 point)
+{
+	struct pvr_sync_signal *sig_sync;
+	struct dma_fence *cur_fence;
+	int err;
+	u32 id;
+
+	sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL);
+	if (!sig_sync)
+		return ERR_PTR(-ENOMEM);
+
+	sig_sync->handle = handle;
+	sig_sync->point = point;
+
+	if (point > 0) {
+		sig_sync->chain = dma_fence_chain_alloc();
+		if (!sig_sync->chain) {
+			err = -ENOMEM;
+			goto err_free_sig_sync;
+		}
+	}
+
+	sig_sync->syncobj = drm_syncobj_find(file, handle);
+	if (!sig_sync->syncobj) {
+		err = -EINVAL;
+		goto err_free_sig_sync;
+	}
+
+	/* Retrieve the current fence attached to that point. It's
+	 * perfectly fine to get a NULL fence here, it just means there's
+	 * no fence attached to that point yet.
+	 */
+	if (!drm_syncobj_find_fence(file, handle, point, 0, &cur_fence))
+		sig_sync->fence = cur_fence;
+
+	err = xa_alloc(array, &id, sig_sync, xa_limit_32b, GFP_KERNEL);
+	if (err)
+		goto err_free_sig_sync;
+
+	return sig_sync;
+
+err_free_sig_sync:
+	pvr_sync_signal_free(sig_sync);
+	return ERR_PTR(err);
+}
+
+static struct pvr_sync_signal *
+pvr_sync_signal_array_search(struct xarray *array, u32 handle, u64 point)
+{
+	struct pvr_sync_signal *sig_sync;
+	unsigned long i;
+
+	xa_for_each(array, i, sig_sync) {
+		if (handle == sig_sync->handle && point == sig_sync->point)
+			return sig_sync;
+	}
+
+	return NULL;
+}
+
+static struct pvr_sync_signal *
+pvr_sync_signal_array_get(struct xarray *array, struct drm_file *file, u32 handle, u64 point)
+{
+	struct pvr_sync_signal *sig_sync;
+
+	sig_sync = pvr_sync_signal_array_search(array, handle, point);
+	if (sig_sync)
+		return sig_sync;
+
+	return pvr_sync_signal_array_add(array, file, handle, point);
+}
+
+int
+pvr_sync_signal_array_collect_ops(struct xarray *array,
+				  struct drm_file *file,
+				  u32 sync_op_count,
+				  const struct drm_pvr_sync_op *sync_ops)
+{
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct pvr_sync_signal *sig_sync;
+		int ret;
+
+		if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL))
+			continue;
+
+		ret = pvr_check_sync_op(&sync_ops[i]);
+		if (ret)
+			return ret;
+
+		sig_sync = pvr_sync_signal_array_get(array, file,
+						     sync_ops[i].handle,
+						     sync_ops[i].value);
+		if (IS_ERR(sig_sync))
+			return PTR_ERR(sig_sync);
+	}
+
+	return 0;
+}
+
+int
+pvr_sync_signal_array_update_fences(struct xarray *array,
+				    u32 sync_op_count,
+				    const struct drm_pvr_sync_op *sync_ops,
+				    struct dma_fence *done_fence)
+{
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct dma_fence *old_fence;
+		struct pvr_sync_signal *sig_sync;
+
+		if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL))
+			continue;
+
+		sig_sync = pvr_sync_signal_array_search(array, sync_ops[i].handle,
+							sync_ops[i].value);
+		if (WARN_ON(!sig_sync))
+			return -EINVAL;
+
+		old_fence = sig_sync->fence;
+		sig_sync->fence = dma_fence_get(done_fence);
+		dma_fence_put(old_fence);
+
+		if (WARN_ON(!sig_sync->fence))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+void
+pvr_sync_signal_array_push_fences(struct xarray *array)
+{
+	struct pvr_sync_signal *sig_sync;
+	unsigned long i;
+
+	xa_for_each(array, i, sig_sync) {
+		if (sig_sync->chain) {
+			drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain,
+					      sig_sync->fence, sig_sync->point);
+			sig_sync->chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence);
+		}
+	}
+}
+
+static int
+pvr_sync_add_dep_to_job(struct drm_sched_job *job, struct dma_fence *f)
+{
+	struct dma_fence_unwrap iter;
+	u32 native_fence_count = 0;
+	struct dma_fence *uf;
+	int err = 0;
+
+	dma_fence_unwrap_for_each(uf, &iter, f) {
+		if (pvr_queue_fence_is_ufo_backed(uf))
+			native_fence_count++;
+	}
+
+	/* No need to unwrap the fence if it's fully non-native. */
+	if (!native_fence_count)
+		return drm_sched_job_add_dependency(job, f);
+
+	dma_fence_unwrap_for_each(uf, &iter, f) {
+		/* There's no dma_fence_unwrap_stop() helper cleaning up the refs
+		 * owned by dma_fence_unwrap(), so let's just iterate over all
+		 * entries without doing anything when something failed.
+		 */
+		if (err)
+			continue;
+
+		if (pvr_queue_fence_is_ufo_backed(uf)) {
+			struct drm_sched_fence *s_fence = to_drm_sched_fence(uf);
+
+			/* If this is a native dependency, we wait for the scheduled fence,
+			 * and we will let pvr_queue_run_job() issue FW waits.
+			 */
+			err = drm_sched_job_add_dependency(job,
+							   dma_fence_get(&s_fence->scheduled));
+		} else {
+			err = drm_sched_job_add_dependency(job, dma_fence_get(uf));
+		}
+	}
+
+	dma_fence_put(f);
+	return err;
+}
+
+int
+pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job,
+			 u32 sync_op_count,
+			 const struct drm_pvr_sync_op *sync_ops,
+			 struct xarray *signal_array)
+{
+	int err = 0;
+
+	if (!sync_op_count)
+		return 0;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct pvr_sync_signal *sig_sync;
+		struct dma_fence *fence;
+
+		if (sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL)
+			continue;
+
+		err = pvr_check_sync_op(&sync_ops[i]);
+		if (err)
+			return err;
+
+		sig_sync = pvr_sync_signal_array_search(signal_array, sync_ops[i].handle,
+							sync_ops[i].value);
+		if (sig_sync) {
+			if (WARN_ON(!sig_sync->fence))
+				return -EINVAL;
+
+			fence = dma_fence_get(sig_sync->fence);
+		} else {
+			err = drm_syncobj_find_fence(from_pvr_file(pvr_file), sync_ops[i].handle,
+						     sync_ops[i].value, 0, &fence);
+			if (err)
+				return err;
+		}
+
+		err = pvr_sync_add_dep_to_job(job, fence);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_sync.h b/drivers/gpu/drm/imagination/pvr_sync.h
new file mode 100644
index 0000000000000..db6ccfda104a3
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_sync.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_SYNC_H
+#define PVR_SYNC_H
+
+#include <uapi/drm/pvr_drm.h>
+
+/* Forward declaration from <linux/xarray.h>. */
+struct xarray;
+
+/* Forward declaration from <drm/drm_file.h>. */
+struct drm_file;
+
+/* Forward declaration from <drm/gpu_scheduler.h>. */
+struct drm_sched_job;
+
+/* Forward declaration from "pvr_device.h". */
+struct pvr_file;
+
+/**
+ * struct pvr_sync_signal - Object encoding a syncobj signal operation
+ *
+ * The job submission logic collects all signal operations in an array of
+ * pvr_sync_signal objects. This array also serves as a cache to get the
+ * latest dma_fence when multiple jobs are submitted at once, and one job
+ * signals a syncobj point that's later waited on by a subsequent job.
+ */
+struct pvr_sync_signal {
+	/** @handle: Handle of the syncobj to signal. */
+	u32 handle;
+
+	/**
+	 * @point: Point to signal in the syncobj.
+	 *
+	 * Only relevant for timeline syncobjs.
+	 */
+	u64 point;
+
+	/** @syncobj: Syncobj retrieved from the handle. */
+	struct drm_syncobj *syncobj;
+
+	/**
+	 * @chain: Chain object used to link the new fence with the
+	 *	   existing timeline syncobj.
+	 *
+	 * Should be zero when manipulating a regular syncobj.
+	 */
+	struct dma_fence_chain *chain;
+
+	/**
+	 * @fence: New fence object to attach to the syncobj.
+	 *
+	 * This pointer starts with the current fence bound to
+	 * the <handle,point> pair.
+	 */
+	struct dma_fence *fence;
+};
+
+void
+pvr_sync_signal_array_cleanup(struct xarray *array);
+
+int
+pvr_sync_signal_array_collect_ops(struct xarray *array,
+				  struct drm_file *file,
+				  u32 sync_op_count,
+				  const struct drm_pvr_sync_op *sync_ops);
+
+int
+pvr_sync_signal_array_update_fences(struct xarray *array,
+				    u32 sync_op_count,
+				    const struct drm_pvr_sync_op *sync_ops,
+				    struct dma_fence *done_fence);
+
+void
+pvr_sync_signal_array_push_fences(struct xarray *array);
+
+int
+pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job,
+			 u32 sync_op_count,
+			 const struct drm_pvr_sync_op *sync_ops,
+			 struct xarray *signal_array);
+
+#endif /* PVR_SYNC_H */
-- 
GitLab


From 6b17baabf6d306f85021b9a081dcd0a1a5c6f846 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:39 +0000
Subject: [PATCH 0414/2340] drm/imagination: Add firmware trace header

Changes since v8:
- Corrected license identifiers

Changes since v5:
- Split up header commit due to size

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Link: https://lore.kernel.org/r/c0fc28b2df394584afe3cc1b320140b01e17b322.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 .../gpu/drm/imagination/pvr_rogue_fwif_sf.h   | 1648 +++++++++++++++++
 1 file changed, 1648 insertions(+)
 create mode 100644 drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h

diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h
new file mode 100644
index 0000000000000..571954182f33d
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h
@@ -0,0 +1,1648 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_ROGUE_FWIF_SF_H
+#define PVR_ROGUE_FWIF_SF_H
+
+/*
+ ******************************************************************************
+ * *DO*NOT* rearrange or delete lines in rogue_fw_log_sfgroups or stid_fmts
+ *           WILL BREAK fw tracing message compatibility with previous
+ *           fw versions. Only add new ones, if so required.
+ ******************************************************************************
+ */
+
+/* Available log groups. */
+enum rogue_fw_log_sfgroups {
+	ROGUE_FW_GROUP_NULL,
+	ROGUE_FW_GROUP_MAIN,
+	ROGUE_FW_GROUP_CLEANUP,
+	ROGUE_FW_GROUP_CSW,
+	ROGUE_FW_GROUP_PM,
+	ROGUE_FW_GROUP_RTD,
+	ROGUE_FW_GROUP_SPM,
+	ROGUE_FW_GROUP_MTS,
+	ROGUE_FW_GROUP_BIF,
+	ROGUE_FW_GROUP_MISC,
+	ROGUE_FW_GROUP_POW,
+	ROGUE_FW_GROUP_HWR,
+	ROGUE_FW_GROUP_HWP,
+	ROGUE_FW_GROUP_RPM,
+	ROGUE_FW_GROUP_DMA,
+	ROGUE_FW_GROUP_DBG,
+};
+
+#define PVR_SF_STRING_MAX_SIZE 256U
+
+/* pair of string format id and string formats */
+struct rogue_fw_stid_fmt {
+	u32 id;
+	char name[PVR_SF_STRING_MAX_SIZE];
+};
+
+/*
+ *  The symbolic names found in the table above are assigned an u32 value of
+ *  the following format:
+ *  31 30 28 27       20   19  16    15  12      11            0   bits
+ *  -   ---   ---- ----     ----      ----        ---- ---- ----
+ *     0-11: id number
+ *    12-15: group id number
+ *    16-19: number of parameters
+ *    20-27: unused
+ *    28-30: active: identify SF packet, otherwise regular int32
+ *       31: reserved for signed/unsigned compatibility
+ *
+ *   The following macro assigns those values to the enum generated SF ids list.
+ */
+#define ROGUE_FW_LOG_IDMARKER (0x70000000U)
+#define ROGUE_FW_LOG_CREATESFID(a, b, e) ((u32)(a) | ((u32)(b) << 12) | ((u32)(e) << 16) | \
+					  ROGUE_FW_LOG_IDMARKER)
+
+#define ROGUE_FW_LOG_IDMASK (0xFFF00000)
+#define ROGUE_FW_LOG_VALIDID(I) (((I) & ROGUE_FW_LOG_IDMASK) == ROGUE_FW_LOG_IDMARKER)
+
+/* Return the group id that the given (enum generated) id belongs to */
+#define ROGUE_FW_SF_GID(x) (((u32)(x) >> 12) & 0xfU)
+/* Returns how many arguments the SF(string format) for the given (enum generated) id requires */
+#define ROGUE_FW_SF_PARAMNUM(x) (((u32)(x) >> 16) & 0xfU)
+
+/* pair of string format id and string formats */
+struct rogue_km_stid_fmt {
+	u32 id;
+	const char *name;
+};
+
+static const struct rogue_km_stid_fmt stid_fmts[] = {
+	{ ROGUE_FW_LOG_CREATESFID(0, ROGUE_FW_GROUP_NULL, 0),
+	  "You should not use this string" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_MAIN, 6),
+	  "Kick 3D: FWCtx 0x%08.8x @ %d, RTD 0x%08x. Partial render:%d, CSW resume:%d, prio:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_MAIN, 2),
+	  "3D finished, HWRTData0State=%x, HWRTData1State=%x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_MAIN, 4),
+	  "Kick 3D TQ: FWCtx 0x%08.8x @ %d, CSW resume:%d, prio: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_MAIN, 0),
+	  "3D Transfer finished" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_MAIN, 3),
+	  "Kick Compute: FWCtx 0x%08.8x @ %d, prio: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_MAIN, 0),
+	  "Compute finished" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_MAIN, 7),
+	  "Kick TA: FWCtx 0x%08.8x @ %d, RTD 0x%08x. First kick:%d, Last kick:%d, CSW resume:%d, prio:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_MAIN, 0),
+	  "TA finished" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_MAIN, 0),
+	  "Restart TA after partial render" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_MAIN, 0),
+	  "Resume TA without partial render" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_MAIN, 2),
+	  "Out of memory! Context 0x%08x, HWRTData 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_MAIN, 3),
+	  "Kick TLA: FWCtx 0x%08.8x @ %d, prio:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_MAIN, 0),
+	  "TLA finished" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_MAIN, 3),
+	  "cCCB Woff update = %d, DM = %d, FWCtx = 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_MAIN, 2),
+	  "UFO Checks for FWCtx 0x%08.8x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_MAIN, 3),
+	  "UFO Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_MAIN, 0),
+	  "UFO Checks succeeded" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_MAIN, 3),
+	  "UFO PR-Check: [0x%08.8x] is 0x%08.8x requires >= 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_MAIN, 1),
+	  "UFO SPM PR-Checks for FWCtx 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_MAIN, 4),
+	  "UFO SPM special PR-Check: [0x%08.8x] is 0x%08.8x requires >= ????????, [0x%08.8x] is ???????? requires 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_MAIN, 2),
+	  "UFO Updates for FWCtx 0x%08.8x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_MAIN, 2),
+	  "UFO Update: [0x%08.8x] = 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_MAIN, 1),
+	  "ASSERT Failed: line %d of:" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_MAIN, 2),
+	  "HWR: Lockup detected on DM%d, FWCtx: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_MAIN, 3),
+	  "HWR: Reset fw state for DM%d, FWCtx: 0x%08.8x, MemCtx: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_MAIN, 0),
+	  "HWR: Reset HW" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_MAIN, 0),
+	  "HWR: Lockup recovered." },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_MAIN, 1),
+	  "HWR: False lockup detected for DM%u" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_MAIN, 3),
+	  "Alignment check %d failed: host = 0x%x, fw = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_MAIN, 0),
+	  "GP USC triggered" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_MAIN, 2),
+	  "Overallocating %u temporary registers and %u shared registers for breakpoint handler" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_MAIN, 1),
+	  "Setting breakpoint: Addr 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_MAIN, 0),
+	  "Store breakpoint state" },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_MAIN, 0),
+	  "Unsetting BP Registers" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_MAIN, 1),
+	  "Active RTs expected to be zero, actually %u" },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_MAIN, 1),
+	  "RTC present, %u active render targets" },
+	{ ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_MAIN, 1),
+	  "Estimated Power 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_MAIN, 1),
+	  "RTA render target %u" },
+	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_MAIN, 2),
+	  "Kick RTA render %u of %u" },
+	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_MAIN, 3),
+	  "HWR sizes check %d failed: addresses = %d, sizes = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_MAIN, 1),
+	  "Pow: DUSTS_ENABLE = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_MAIN, 2),
+	  "Pow: On(1)/Off(0): %d, Units: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_MAIN, 2),
+	  "Pow: Changing number of dusts from %d to %d" },
+	{ ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_MAIN, 0),
+	  "Pow: Sidekick ready to be powered down" },
+	{ ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_MAIN, 2),
+	  "Pow: Request to change num of dusts to %d (bPowRascalDust=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_MAIN, 0),
+	  "No ZS Buffer used for partial render (store)" },
+	{ ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_MAIN, 0),
+	  "No Depth/Stencil Buffer used for partial render (load)" },
+	{ ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_MAIN, 2),
+	  "HWR: Lock-up DM%d FWCtx: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_MAIN, 7),
+	  "MLIST%d checker: CatBase TE=0x%08x (%d Pages), VCE=0x%08x (%d Pages), ALIST=0x%08x, IsTA=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_MAIN, 3),
+	  "MLIST%d checker: MList[%d] = 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_MAIN, 1),
+	  "MLIST%d OK" },
+	{ ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_MAIN, 1),
+	  "MLIST%d is empty" },
+	{ ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_MAIN, 8),
+	  "MLIST%d checker: CatBase TE=0x%08x%08x, VCE=0x%08x%08x, ALIST=0x%08x%08x, IsTA=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(55, ROGUE_FW_GROUP_MAIN, 0),
+	  "3D OQ flush kick" },
+	{ ROGUE_FW_LOG_CREATESFID(56, ROGUE_FW_GROUP_MAIN, 1),
+	  "HWPerf block ID (0x%x) unsupported by device" },
+	{ ROGUE_FW_LOG_CREATESFID(57, ROGUE_FW_GROUP_MAIN, 2),
+	  "Setting breakpoint: Addr 0x%08.8x DM%u" },
+	{ ROGUE_FW_LOG_CREATESFID(58, ROGUE_FW_GROUP_MAIN, 3),
+	  "Kick RTU: FWCtx 0x%08.8x @ %d, prio: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(59, ROGUE_FW_GROUP_MAIN, 1),
+	  "RDM finished on context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(60, ROGUE_FW_GROUP_MAIN, 3),
+	  "Kick SHG: FWCtx 0x%08.8x @ %d, prio: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(61, ROGUE_FW_GROUP_MAIN, 0),
+	  "SHG finished" },
+	{ ROGUE_FW_LOG_CREATESFID(62, ROGUE_FW_GROUP_MAIN, 1),
+	  "FBA finished on context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(63, ROGUE_FW_GROUP_MAIN, 0),
+	  "UFO Checks failed" },
+	{ ROGUE_FW_LOG_CREATESFID(64, ROGUE_FW_GROUP_MAIN, 1),
+	  "Kill DM%d start" },
+	{ ROGUE_FW_LOG_CREATESFID(65, ROGUE_FW_GROUP_MAIN, 1),
+	  "Kill DM%d complete" },
+	{ ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_MAIN, 2),
+	  "FC%u cCCB Woff update = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_MAIN, 4),
+	  "Kick RTU: FWCtx 0x%08.8x @ %d, prio: %d, Frame Context: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_MAIN, 0),
+	  "GPU init" },
+	{ ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_MAIN, 1),
+	  "GPU Units init (# mask: 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(70, ROGUE_FW_GROUP_MAIN, 3),
+	  "Register access cycles: read: %d cycles, write: %d cycles, iterations: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(71, ROGUE_FW_GROUP_MAIN, 3),
+	  "Register configuration added. Address: 0x%x Value: 0x%x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(72, ROGUE_FW_GROUP_MAIN, 1),
+	  "Register configuration applied to type %d. (0:pow on, 1:Rascal/dust init, 2-5: TA,3D,CDM,TLA, 6:All)" },
+	{ ROGUE_FW_LOG_CREATESFID(73, ROGUE_FW_GROUP_MAIN, 0),
+	  "Perform TPC flush." },
+	{ ROGUE_FW_LOG_CREATESFID(74, ROGUE_FW_GROUP_MAIN, 0),
+	  "GPU has locked up (see HWR logs for more info)" },
+	{ ROGUE_FW_LOG_CREATESFID(75, ROGUE_FW_GROUP_MAIN, 0),
+	  "HWR has been triggered - GPU has overrun its deadline (see HWR logs)" },
+	{ ROGUE_FW_LOG_CREATESFID(76, ROGUE_FW_GROUP_MAIN, 0),
+	  "HWR has been triggered - GPU has failed a poll (see HWR logs)" },
+	{ ROGUE_FW_LOG_CREATESFID(77, ROGUE_FW_GROUP_MAIN, 1),
+	  "Doppler out of memory event for FC %u" },
+	{ ROGUE_FW_LOG_CREATESFID(78, ROGUE_FW_GROUP_MAIN, 3),
+	  "UFO SPM special PR-Check: [0x%08.8x] is 0x%08.8x requires >= 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(79, ROGUE_FW_GROUP_MAIN, 3),
+	  "UFO SPM special PR-Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(80, ROGUE_FW_GROUP_MAIN, 1),
+	  "TIMESTAMP -> [0x%08.8x]" },
+	{ ROGUE_FW_LOG_CREATESFID(81, ROGUE_FW_GROUP_MAIN, 2),
+	  "UFO RMW Updates for FWCtx 0x%08.8x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(82, ROGUE_FW_GROUP_MAIN, 2),
+	  "UFO Update: [0x%08.8x] = 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(83, ROGUE_FW_GROUP_MAIN, 2),
+	  "Kick Null cmd: FWCtx 0x%08.8x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(84, ROGUE_FW_GROUP_MAIN, 2),
+	  "RPM Out of memory! Context 0x%08x, SH requestor %d" },
+	{ ROGUE_FW_LOG_CREATESFID(85, ROGUE_FW_GROUP_MAIN, 4),
+	  "Discard RTU due to RPM abort: FWCtx 0x%08.8x @ %d, prio: %d, Frame Context: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(86, ROGUE_FW_GROUP_MAIN, 4),
+	  "Deferring DM%u from running context 0x%08x @ %d (deferred DMs = 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(87, ROGUE_FW_GROUP_MAIN, 4),
+	  "Deferring DM%u from running context 0x%08x @ %d to let other deferred DMs run (deferred DMs = 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(88, ROGUE_FW_GROUP_MAIN, 4),
+	  "No longer deferring DM%u from running context = 0x%08x @ %d (deferred DMs = 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(89, ROGUE_FW_GROUP_MAIN, 3),
+	  "FWCCB for DM%u is full, we will have to wait for space! (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(90, ROGUE_FW_GROUP_MAIN, 3),
+	  "FWCCB for OSid %u is full, we will have to wait for space! (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(91, ROGUE_FW_GROUP_MAIN, 1),
+	  "Host Sync Partition marker: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(92, ROGUE_FW_GROUP_MAIN, 1),
+	  "Host Sync Partition repeat: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(93, ROGUE_FW_GROUP_MAIN, 1),
+	  "Core clock set to %d Hz" },
+	{ ROGUE_FW_LOG_CREATESFID(94, ROGUE_FW_GROUP_MAIN, 7),
+	  "Compute Queue: FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(95, ROGUE_FW_GROUP_MAIN, 3),
+	  "Signal check failed, Required Data: 0x%x, Address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(96, ROGUE_FW_GROUP_MAIN, 5),
+	  "Signal update, Snoop Filter: %u, MMU Ctx: %u, Signal Id: %u, Signals Base: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(97, ROGUE_FW_GROUP_MAIN, 4),
+	  "Signalled the previously waiting FWCtx: 0x%08.8x, OSId: %u, Signal Address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(98, ROGUE_FW_GROUP_MAIN, 0),
+	  "Compute stalled" },
+	{ ROGUE_FW_LOG_CREATESFID(99, ROGUE_FW_GROUP_MAIN, 3),
+	  "Compute stalled (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(100, ROGUE_FW_GROUP_MAIN, 3),
+	  "Compute resumed (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(101, ROGUE_FW_GROUP_MAIN, 4),
+	  "Signal update notification from the host, PC Physical Address: 0x%08x%08x, Signal Virtual Address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(102, ROGUE_FW_GROUP_MAIN, 4),
+	  "Signal update from DM: %u, OSId: %u, PC Physical Address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(103, ROGUE_FW_GROUP_MAIN, 1),
+	  "DM: %u signal check failed" },
+	{ ROGUE_FW_LOG_CREATESFID(104, ROGUE_FW_GROUP_MAIN, 3),
+	  "Kick TDM: FWCtx 0x%08.8x @ %d, prio:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(105, ROGUE_FW_GROUP_MAIN, 0),
+	  "TDM finished" },
+	{ ROGUE_FW_LOG_CREATESFID(106, ROGUE_FW_GROUP_MAIN, 4),
+	  "MMU_PM_CAT_BASE_TE[%d]_PIPE[%d]:  0x%08x 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(107, ROGUE_FW_GROUP_MAIN, 0),
+	  "BRN 54141 HIT" },
+	{ ROGUE_FW_LOG_CREATESFID(108, ROGUE_FW_GROUP_MAIN, 0),
+	  "BRN 54141 Dummy TA kicked" },
+	{ ROGUE_FW_LOG_CREATESFID(109, ROGUE_FW_GROUP_MAIN, 0),
+	  "BRN 54141 resume TA" },
+	{ ROGUE_FW_LOG_CREATESFID(110, ROGUE_FW_GROUP_MAIN, 0),
+	  "BRN 54141 double hit after applying WA" },
+	{ ROGUE_FW_LOG_CREATESFID(111, ROGUE_FW_GROUP_MAIN, 2),
+	  "BRN 54141 Dummy TA VDM base address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(112, ROGUE_FW_GROUP_MAIN, 4),
+	  "Signal check failed, Required Data: 0x%x, Current Data: 0x%x, Address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(113, ROGUE_FW_GROUP_MAIN, 2),
+	  "TDM stalled (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(114, ROGUE_FW_GROUP_MAIN, 1),
+	  "Write Offset update notification for stalled FWCtx 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(115, ROGUE_FW_GROUP_MAIN, 3),
+	  "Changing OSid %d's priority from %u to %u" },
+	{ ROGUE_FW_LOG_CREATESFID(116, ROGUE_FW_GROUP_MAIN, 0),
+	  "Compute resumed" },
+	{ ROGUE_FW_LOG_CREATESFID(117, ROGUE_FW_GROUP_MAIN, 7),
+	  "Kick TLA: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(118, ROGUE_FW_GROUP_MAIN, 7),
+	  "Kick TDM: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(119, ROGUE_FW_GROUP_MAIN, 11),
+	  "Kick TA: FWCtx 0x%08.8x @ %d, RTD 0x%08x, First kick:%d, Last kick:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(120, ROGUE_FW_GROUP_MAIN, 10),
+	  "Kick 3D: FWCtx 0x%08.8x @ %d, RTD 0x%08x, Partial render:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(121, ROGUE_FW_GROUP_MAIN, 8),
+	  "Kick 3D TQ: FWCtx 0x%08.8x @ %d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(122, ROGUE_FW_GROUP_MAIN, 6),
+	  "Kick Compute: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(123, ROGUE_FW_GROUP_MAIN, 8),
+	  "Kick RTU: FWCtx 0x%08.8x @ %d, Frame Context:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(124, ROGUE_FW_GROUP_MAIN, 7),
+	  "Kick SHG: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(125, ROGUE_FW_GROUP_MAIN, 1),
+	  "Reconfigure CSRM: special coeff support enable %d." },
+	{ ROGUE_FW_LOG_CREATESFID(127, ROGUE_FW_GROUP_MAIN, 1),
+	  "TA requires max coeff mode, deferring: %d." },
+	{ ROGUE_FW_LOG_CREATESFID(128, ROGUE_FW_GROUP_MAIN, 1),
+	  "3D requires max coeff mode, deferring: %d." },
+	{ ROGUE_FW_LOG_CREATESFID(129, ROGUE_FW_GROUP_MAIN, 1),
+	  "Kill DM%d failed" },
+	{ ROGUE_FW_LOG_CREATESFID(130, ROGUE_FW_GROUP_MAIN, 2),
+	  "Thread Queue is full, we will have to wait for space! (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(131, ROGUE_FW_GROUP_MAIN, 3),
+	  "Thread Queue is fencing, we are waiting for Roff = %d (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(132, ROGUE_FW_GROUP_MAIN, 1),
+	  "DM %d failed to Context Switch on time. Triggered HCS (see HWR logs)." },
+	{ ROGUE_FW_LOG_CREATESFID(133, ROGUE_FW_GROUP_MAIN, 1),
+	  "HCS changed to %d ms" },
+	{ ROGUE_FW_LOG_CREATESFID(134, ROGUE_FW_GROUP_MAIN, 4),
+	  "Updating Tiles In Flight (Dusts=%d, PartitionMask=0x%08x, ISPCtl=0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(135, ROGUE_FW_GROUP_MAIN, 2),
+	  "  Phantom %d: USCTiles=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(136, ROGUE_FW_GROUP_MAIN, 0),
+	  "Isolation grouping is disabled" },
+	{ ROGUE_FW_LOG_CREATESFID(137, ROGUE_FW_GROUP_MAIN, 1),
+	  "Isolation group configured with a priority threshold of %d" },
+	{ ROGUE_FW_LOG_CREATESFID(138, ROGUE_FW_GROUP_MAIN, 1),
+	  "OS %d has come online" },
+	{ ROGUE_FW_LOG_CREATESFID(139, ROGUE_FW_GROUP_MAIN, 1),
+	  "OS %d has gone offline" },
+	{ ROGUE_FW_LOG_CREATESFID(140, ROGUE_FW_GROUP_MAIN, 4),
+	  "Signalled the previously stalled FWCtx: 0x%08.8x, OSId: %u, Signal Address: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(141, ROGUE_FW_GROUP_MAIN, 7),
+	  "TDM Queue: FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(142, ROGUE_FW_GROUP_MAIN, 6),
+	  "Reset TDM Queue Read Offset: FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u becomes 0, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(143, ROGUE_FW_GROUP_MAIN, 5),
+	  "User Mode Queue mismatched stream start: FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u, StreamStartOffset = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(144, ROGUE_FW_GROUP_MAIN, 0),
+	  "GPU deinit" },
+	{ ROGUE_FW_LOG_CREATESFID(145, ROGUE_FW_GROUP_MAIN, 0),
+	  "GPU units deinit" },
+	{ ROGUE_FW_LOG_CREATESFID(146, ROGUE_FW_GROUP_MAIN, 2),
+	  "Initialised OS %d with config flags 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(147, ROGUE_FW_GROUP_MAIN, 2),
+	  "UFO limit exceeded %d/%d" },
+	{ ROGUE_FW_LOG_CREATESFID(148, ROGUE_FW_GROUP_MAIN, 0),
+	  "3D Dummy stencil store" },
+	{ ROGUE_FW_LOG_CREATESFID(149, ROGUE_FW_GROUP_MAIN, 3),
+	  "Initialised OS %d with config flags 0x%08x and extended config flags 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(150, ROGUE_FW_GROUP_MAIN, 1),
+	  "Unknown Command (eCmdType=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(151, ROGUE_FW_GROUP_MAIN, 4),
+	  "UFO forced update: FWCtx 0x%08.8x @ %d [0x%08.8x] = 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(152, ROGUE_FW_GROUP_MAIN, 5),
+	  "UFO forced update NOP: FWCtx 0x%08.8x @ %d [0x%08.8x] = 0x%08.8x, reason %d" },
+	{ ROGUE_FW_LOG_CREATESFID(153, ROGUE_FW_GROUP_MAIN, 3),
+	  "TDM context switch check: Roff %u points to 0x%08x, Match=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(154, ROGUE_FW_GROUP_MAIN, 6),
+	  "OSid %d CCB init status: %d (1-ok 0-fail): kCCBCtl@0x%x kCCB@0x%x fwCCBCtl@0x%x fwCCB@0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(155, ROGUE_FW_GROUP_MAIN, 2),
+	  "FW IRQ # %u @ %u" },
+	{ ROGUE_FW_LOG_CREATESFID(156, ROGUE_FW_GROUP_MAIN, 3),
+	  "Setting breakpoint: Addr 0x%08.8x DM%u usc_breakpoint_ctrl_dm = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(157, ROGUE_FW_GROUP_MAIN, 3),
+	  "Invalid KCCB setup for OSid %u: KCCB 0x%08x, KCCB Ctrl 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(158, ROGUE_FW_GROUP_MAIN, 3),
+	  "Invalid KCCB cmd (%u) for OSid %u @ KCCB 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(159, ROGUE_FW_GROUP_MAIN, 4),
+	  "FW FAULT: At line %d in file 0x%08x%08x, additional data=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(160, ROGUE_FW_GROUP_MAIN, 4),
+	  "Invalid breakpoint: MemCtx 0x%08x Addr 0x%08.8x DM%u usc_breakpoint_ctrl_dm = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(161, ROGUE_FW_GROUP_MAIN, 3),
+	  "Discarding invalid SLC flushinval command for OSid %u: DM %u, FWCtx 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(162, ROGUE_FW_GROUP_MAIN, 4),
+	  "Invalid Write Offset update notification from OSid %u to DM %u: FWCtx 0x%08x, MemCtx 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(163, ROGUE_FW_GROUP_MAIN, 4),
+	  "Null FWCtx in KCCB kick cmd for OSid %u: KCCB 0x%08x, ROff %u, WOff %u" },
+	{ ROGUE_FW_LOG_CREATESFID(164, ROGUE_FW_GROUP_MAIN, 3),
+	  "Checkpoint CCB for OSid %u is full, signalling host for full check state (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(165, ROGUE_FW_GROUP_MAIN, 8),
+	  "OSid %d CCB init status: %d (1-ok 0-fail): kCCBCtl@0x%x kCCB@0x%x fwCCBCtl@0x%x fwCCB@0x%x chptCCBCtl@0x%x chptCCB@0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(166, ROGUE_FW_GROUP_MAIN, 4),
+	  "OSid %d fw state transition request: from %d to %d (0-offline 1-ready 2-active 3-offloading). Status %d (1-ok 0-fail)" },
+	{ ROGUE_FW_LOG_CREATESFID(167, ROGUE_FW_GROUP_MAIN, 2),
+	  "OSid %u has %u stale commands in its KCCB" },
+	{ ROGUE_FW_LOG_CREATESFID(168, ROGUE_FW_GROUP_MAIN, 0),
+	  "Applying VCE pause" },
+	{ ROGUE_FW_LOG_CREATESFID(169, ROGUE_FW_GROUP_MAIN, 3),
+	  "OSid %u KCCB slot %u value updated to %u" },
+	{ ROGUE_FW_LOG_CREATESFID(170, ROGUE_FW_GROUP_MAIN, 7),
+	  "Unknown KCCB Command: KCCBCtl=0x%08x, KCCB=0x%08x, Roff=%u, Woff=%u, Wrap=%u, Cmd=0x%08x, CmdType=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(171, ROGUE_FW_GROUP_MAIN, 10),
+	  "Unknown Client CCB Command processing fences: FWCtx=0x%08x, CCBCtl=0x%08x, CCB=0x%08x, Roff=%u, Doff=%u, Woff=%u, Wrap=%u, CmdHdr=0x%08x, CmdType=0x%08x, CmdSize=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(172, ROGUE_FW_GROUP_MAIN, 10),
+	  "Unknown Client CCB Command executing kick: FWCtx=0x%08x, CCBCtl=0x%08x, CCB=0x%08x, Roff=%u, Doff=%u, Woff=%u, Wrap=%u, CmdHdr=0x%08x, CmdType=0x%08x, CmdSize=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(173, ROGUE_FW_GROUP_MAIN, 2),
+	  "Null FWCtx in KCCB kick cmd for OSid %u with WOff %u" },
+	{ ROGUE_FW_LOG_CREATESFID(174, ROGUE_FW_GROUP_MAIN, 2),
+	  "Discarding invalid SLC flushinval command for OSid %u, FWCtx 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(175, ROGUE_FW_GROUP_MAIN, 3),
+	  "Invalid Write Offset update notification from OSid %u: FWCtx 0x%08x, MemCtx 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(176, ROGUE_FW_GROUP_MAIN, 2),
+	  "Initialised Firmware with config flags 0x%08x and extended config flags 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(177, ROGUE_FW_GROUP_MAIN, 1),
+	  "Set Periodic Hardware Reset Mode: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(179, ROGUE_FW_GROUP_MAIN, 3),
+	  "PHR mode %d, FW state: 0x%08x, HWR flags: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(180, ROGUE_FW_GROUP_MAIN, 1),
+	  "PHR mode %d triggered a reset" },
+	{ ROGUE_FW_LOG_CREATESFID(181, ROGUE_FW_GROUP_MAIN, 2),
+	  "Signal update, Snoop Filter: %u, Signal Id: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(182, ROGUE_FW_GROUP_MAIN, 1),
+	  "WARNING: Skipping FW KCCB Cmd type %d which is not yet supported on Series8." },
+	{ ROGUE_FW_LOG_CREATESFID(183, ROGUE_FW_GROUP_MAIN, 4),
+	  "MMU context cache data NULL, but cache flags=0x%x (sync counter=%u, update value=%u) OSId=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(184, ROGUE_FW_GROUP_MAIN, 5),
+	  "SLC range based flush: Context=%u VAddr=0x%02x%08x, Size=0x%08x, Invalidate=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(185, ROGUE_FW_GROUP_MAIN, 3),
+	  "FBSC invalidate for Context Set [0x%08x]: Entry mask 0x%08x%08x." },
+	{ ROGUE_FW_LOG_CREATESFID(186, ROGUE_FW_GROUP_MAIN, 3),
+	  "TDM context switch check: Roff %u was not valid for kick starting at %u, moving back to %u" },
+	{ ROGUE_FW_LOG_CREATESFID(187, ROGUE_FW_GROUP_MAIN, 2),
+	  "Signal updates: FIFO: %u, Signals: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(188, ROGUE_FW_GROUP_MAIN, 2),
+	  "Invalid FBSC cmd: FWCtx 0x%08x, MemCtx 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(189, ROGUE_FW_GROUP_MAIN, 0),
+	  "Insert BRN68497 WA blit after TDM Context store." },
+	{ ROGUE_FW_LOG_CREATESFID(190, ROGUE_FW_GROUP_MAIN, 1),
+	  "UFO Updates for previously finished FWCtx 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(191, ROGUE_FW_GROUP_MAIN, 1),
+	  "RTC with RTA present, %u active render targets" },
+	{ ROGUE_FW_LOG_CREATESFID(192, ROGUE_FW_GROUP_MAIN, 0),
+	  "Invalid RTA Set-up. The ValidRenderTargets array in RTACtl is Null!" },
+	{ ROGUE_FW_LOG_CREATESFID(193, ROGUE_FW_GROUP_MAIN, 2),
+	  "Block 0x%x / Counter 0x%x INVALID and ignored" },
+	{ ROGUE_FW_LOG_CREATESFID(194, ROGUE_FW_GROUP_MAIN, 2),
+	  "ECC fault GPU=0x%08x FW=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(195, ROGUE_FW_GROUP_MAIN, 1),
+	  "Processing XPU event on DM = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(196, ROGUE_FW_GROUP_MAIN, 2),
+	  "OSid %u failed to respond to the virtualisation watchdog in time. Timestamp of its last input = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(197, ROGUE_FW_GROUP_MAIN, 1),
+	  "GPU-%u has locked up (see HWR logs for more info)" },
+	{ ROGUE_FW_LOG_CREATESFID(198, ROGUE_FW_GROUP_MAIN, 3),
+	  "Updating Tiles In Flight (Dusts=%d, PartitionMask=0x%08x, ISPCtl=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(199, ROGUE_FW_GROUP_MAIN, 0),
+	  "GPU has locked up (see HWR logs for more info)" },
+	{ ROGUE_FW_LOG_CREATESFID(200, ROGUE_FW_GROUP_MAIN, 1),
+	  "Reprocessing outstanding XPU events from cores 0x%02x" },
+	{ ROGUE_FW_LOG_CREATESFID(201, ROGUE_FW_GROUP_MAIN, 3),
+	  "Secondary XPU event on DM=%d, CoreMask=0x%02x, Raised=0x%02x" },
+	{ ROGUE_FW_LOG_CREATESFID(202, ROGUE_FW_GROUP_MAIN, 8),
+	  "TDM Queue: Core %u, FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(203, ROGUE_FW_GROUP_MAIN, 3),
+	  "TDM stalled Core %u (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(204, ROGUE_FW_GROUP_MAIN, 8),
+	  "Compute Queue: Core %u, FWCtx 0x%08.8x, prio: %d, queue: 0x%08x%08x (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(205, ROGUE_FW_GROUP_MAIN, 4),
+	  "Compute stalled core %u (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(206, ROGUE_FW_GROUP_MAIN, 6),
+	  "User Mode Queue mismatched stream start: Core %u, FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u, StreamStartOffset = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(207, ROGUE_FW_GROUP_MAIN, 3),
+	  "TDM resumed core %u (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(208, ROGUE_FW_GROUP_MAIN, 4),
+	  "Compute resumed core %u (Roff = %u, Woff = %u, Size = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(209, ROGUE_FW_GROUP_MAIN, 2),
+	  " Updated permission for OSid %u to perform MTS kicks: %u (1 = allowed, 0 = not allowed)" },
+	{ ROGUE_FW_LOG_CREATESFID(210, ROGUE_FW_GROUP_MAIN, 2),
+	  "Mask = 0x%X, mask2 = 0x%X" },
+	{ ROGUE_FW_LOG_CREATESFID(211, ROGUE_FW_GROUP_MAIN, 3),
+	  "  core %u, reg = %u, mask = 0x%X)" },
+	{ ROGUE_FW_LOG_CREATESFID(212, ROGUE_FW_GROUP_MAIN, 1),
+	  "ECC fault received from safety bus: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(213, ROGUE_FW_GROUP_MAIN, 1),
+	  "Safety Watchdog threshold period set to 0x%x clock cycles" },
+	{ ROGUE_FW_LOG_CREATESFID(214, ROGUE_FW_GROUP_MAIN, 0),
+	  "MTS Safety Event trigged by the safety watchdog." },
+	{ ROGUE_FW_LOG_CREATESFID(215, ROGUE_FW_GROUP_MAIN, 3),
+	  "DM%d USC tasks range limit 0 - %d, stride %d" },
+	{ ROGUE_FW_LOG_CREATESFID(216, ROGUE_FW_GROUP_MAIN, 1),
+	  "ECC fault GPU=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(217, ROGUE_FW_GROUP_MAIN, 0),
+	  "GPU Hardware units reset to prevent transient faults." },
+	{ ROGUE_FW_LOG_CREATESFID(218, ROGUE_FW_GROUP_MAIN, 2),
+	  "Kick Abort cmd: FWCtx 0x%08.8x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(219, ROGUE_FW_GROUP_MAIN, 7),
+	  "Kick Ray: FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(220, ROGUE_FW_GROUP_MAIN, 0),
+	  "Ray finished" },
+	{ ROGUE_FW_LOG_CREATESFID(221, ROGUE_FW_GROUP_MAIN, 2),
+	  "State of firmware's private data at boot time: %d (0 = uninitialised, 1 = initialised); Fw State Flags = 0x%08X" },
+	{ ROGUE_FW_LOG_CREATESFID(222, ROGUE_FW_GROUP_MAIN, 2),
+	  "CFI Timeout detected (%d increasing to %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(223, ROGUE_FW_GROUP_MAIN, 2),
+	  "CFI Timeout detected for FBM (%d increasing to %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(224, ROGUE_FW_GROUP_MAIN, 0),
+	  "Geom OOM event not allowed" },
+	{ ROGUE_FW_LOG_CREATESFID(225, ROGUE_FW_GROUP_MAIN, 4),
+	  "Changing OSid %d's priority from %u to %u; Isolation = %u (0 = off; 1 = on)" },
+	{ ROGUE_FW_LOG_CREATESFID(226, ROGUE_FW_GROUP_MAIN, 2),
+	  "Skipping already executed TA FWCtx 0x%08.8x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(227, ROGUE_FW_GROUP_MAIN, 2),
+	  "Attempt to execute TA FWCtx 0x%08.8x @ %d ahead of time on other GEOM" },
+	{ ROGUE_FW_LOG_CREATESFID(228, ROGUE_FW_GROUP_MAIN, 8),
+	  "Kick TDM: Kick ID %u FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(229, ROGUE_FW_GROUP_MAIN, 12),
+	  "Kick TA: Kick ID %u FWCtx 0x%08.8x @ %d, RTD 0x%08x, First kick:%d, Last kick:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(230, ROGUE_FW_GROUP_MAIN, 11),
+	  "Kick 3D: Kick ID %u FWCtx 0x%08.8x @ %d, RTD 0x%08x, Partial render:%d, CSW resume:%d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(231, ROGUE_FW_GROUP_MAIN, 7),
+	  "Kick Compute: Kick ID %u FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(232, ROGUE_FW_GROUP_MAIN, 1),
+	  "TDM finished: Kick ID %u " },
+	{ ROGUE_FW_LOG_CREATESFID(233, ROGUE_FW_GROUP_MAIN, 1),
+	  "TA finished: Kick ID %u " },
+	{ ROGUE_FW_LOG_CREATESFID(234, ROGUE_FW_GROUP_MAIN, 3),
+	  "3D finished: Kick ID %u , HWRTData0State=%x, HWRTData1State=%x" },
+	{ ROGUE_FW_LOG_CREATESFID(235, ROGUE_FW_GROUP_MAIN, 1),
+	  "Compute finished: Kick ID %u " },
+	{ ROGUE_FW_LOG_CREATESFID(236, ROGUE_FW_GROUP_MAIN, 10),
+	  "Kick TDM: Kick ID %u FWCtx 0x%08.8x @ %d, Base 0x%08x%08x. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(237, ROGUE_FW_GROUP_MAIN, 8),
+	  "Kick Ray: Kick ID %u FWCtx 0x%08.8x @ %d. (PID:%d, prio:%d, frame:%d, ext:0x%08x, int:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(238, ROGUE_FW_GROUP_MAIN, 1),
+	  "Ray finished: Kick ID %u " },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_MTS, 2),
+	  "Bg Task DM = %u, counted = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_MTS, 1),
+	  "Bg Task complete DM = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_MTS, 3),
+	  "Irq Task DM = %u, Breq = %d, SBIrq = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_MTS, 1),
+	  "Irq Task complete DM = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_MTS, 0),
+	  "Kick MTS Bg task DM=All" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_MTS, 1),
+	  "Kick MTS Irq task DM=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_MTS, 2),
+	  "Ready queue debug DM = %u, celltype = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_MTS, 2),
+	  "Ready-to-run debug DM = %u, item = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_MTS, 3),
+	  "Client command header DM = %u, client CCB = 0x%x, cmd = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_MTS, 3),
+	  "Ready-to-run debug OSid = %u, DM = %u, item = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_MTS, 3),
+	  "Ready queue debug DM = %u, celltype = %d, OSid = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_MTS, 3),
+	  "Bg Task DM = %u, counted = %d, OSid = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_MTS, 1),
+	  "Bg Task complete DM Bitfield: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_MTS, 0),
+	  "Irq Task complete." },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_MTS, 7),
+	  "Discarded Command Type: %d OS ID = %d PID = %d context = 0x%08x cccb ROff = 0x%x, due to USC breakpoint hit by OS ID = %d PID = %d." },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_MTS, 4),
+	  "KCCB Slot %u: DM=%u, Cmd=0x%08x, OSid=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_MTS, 2),
+	  "KCCB Slot %u: Return value %u" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_MTS, 1),
+	  "Bg Task OSid = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_MTS, 3),
+	  "KCCB Slot %u: Cmd=0x%08x, OSid=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_MTS, 1),
+	  "Irq Task (EVENT_STATUS=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_MTS, 2),
+	  "VZ sideband test, kicked with OSid=%u from MTS, OSid for test=%u" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_CLEANUP, 1),
+	  "FwCommonContext [0x%08x] cleaned" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_CLEANUP, 3),
+	  "FwCommonContext [0x%08x] is busy: ReadOffset = %d, WriteOffset = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_CLEANUP, 2),
+	  "HWRTData [0x%08x] for DM=%d, received cleanup request" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_CLEANUP, 3),
+	  "HWRTData [0x%08x] HW Context cleaned for DM%u, executed commands = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_CLEANUP, 2),
+	  "HWRTData [0x%08x] HW Context for DM%u is busy" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_CLEANUP, 2),
+	  "HWRTData [0x%08x] HW Context %u cleaned" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_CLEANUP, 1),
+	  "Freelist [0x%08x] cleaned" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_CLEANUP, 1),
+	  "ZSBuffer [0x%08x] cleaned" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_CLEANUP, 3),
+	  "ZSBuffer [0x%08x] is busy: submitted = %d, executed = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_CLEANUP, 4),
+	  "HWRTData [0x%08x] HW Context for DM%u is busy: submitted = %d, executed = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_CLEANUP, 2),
+	  "HW Ray Frame data [0x%08x] for DM=%d, received cleanup request" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_CLEANUP, 3),
+	  "HW Ray Frame Data [0x%08x] cleaned for DM%u, executed commands = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_CLEANUP, 4),
+	  "HW Ray Frame Data [0x%08x] for DM%u is busy: submitted = %d, executed = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_CLEANUP, 2),
+	  "HW Ray Frame Data [0x%08x] HW Context %u cleaned" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_CLEANUP, 1),
+	  "Discarding invalid cleanup request of type 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_CLEANUP, 1),
+	  "Received cleanup request for HWRTData [0x%08x]" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_CLEANUP, 3),
+	  "HWRTData [0x%08x] HW Context is busy: submitted = %d, executed = %d" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_CLEANUP, 3),
+	  "HWRTData [0x%08x] HW Context %u cleaned, executed commands = %d" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_CSW, 1),
+	  "CDM FWCtx 0x%08.8x needs resume" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_CSW, 3),
+	  "*** CDM FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_CSW, 1),
+	  "CDM FWCtx shared alloc size load 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_CSW, 0),
+	  "*** CDM FWCtx store complete" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_CSW, 0),
+	  "*** CDM FWCtx store start" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_CSW, 0),
+	  "CDM Soft Reset" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_CSW, 1),
+	  "3D FWCtx 0x%08.8x needs resume" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_CSW, 1),
+	  "*** 3D FWCtx 0x%08.8x resume" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_CSW, 0),
+	  "*** 3D context store complete" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_CSW, 3),
+	  "3D context store pipe state: 0x%08.8x 0x%08.8x 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_CSW, 0),
+	  "*** 3D context store start" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_CSW, 1),
+	  "*** 3D TQ FWCtx 0x%08.8x resume" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_CSW, 1),
+	  "TA FWCtx 0x%08.8x needs resume" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_CSW, 3),
+	  "*** TA FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_CSW, 2),
+	  "TA context shared alloc size store 0x%x, load 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_CSW, 0),
+	  "*** TA context store complete" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_CSW, 0),
+	  "*** TA context store start" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_CSW, 3),
+	  "Higher priority context scheduled for DM %u, old prio:%d, new prio:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_CSW, 2),
+	  "Set FWCtx 0x%x priority to %u" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_CSW, 2),
+	  "3D context store pipe%d state: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_CSW, 2),
+	  "3D context resume pipe%d state: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_CSW, 1),
+	  "SHG FWCtx 0x%08.8x needs resume" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_CSW, 3),
+	  "*** SHG FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_CSW, 2),
+	  "SHG context shared alloc size store 0x%x, load 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_CSW, 0),
+	  "*** SHG context store complete" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_CSW, 0),
+	  "*** SHG context store start" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_CSW, 1),
+	  "Performing TA indirection, last used pipe %d" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_CSW, 0),
+	  "CDM context store hit ctrl stream terminate. Skip resume." },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_CSW, 4),
+	  "*** CDM FWCtx 0x%08.8x resume from snapshot buffer 0x%08x%08x, shader state %u" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_CSW, 2),
+	  "TA PDS/USC state buffer flip (%d->%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_CSW, 0),
+	  "TA context store hit BRN 52563: vertex store tasks outstanding" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_CSW, 1),
+	  "TA USC poll failed (USC vertex task count: %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_CSW, 0),
+	  "TA context store deferred due to BRN 54141." },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_CSW, 7),
+	  "Higher priority context scheduled for DM %u. Prios (OSid, OSid Prio, Context Prio): Current: %u, %u, %u New: %u, %u, %u" },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_CSW, 0),
+	  "*** TDM context store start" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_CSW, 0),
+	  "*** TDM context store complete" },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_CSW, 2),
+	  "TDM context needs resume, header [0x%08.8x, 0x%08.8x]" },
+	{ ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_CSW, 8),
+	  "Higher priority context scheduled for DM %u. Prios (OSid, OSid Prio, Context Prio): Current: %u, %u, %u New: %u, %u, %u. Hard Context Switching: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_CSW, 3),
+	  "3D context store pipe %2d (%2d) state: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_CSW, 3),
+	  "3D context resume pipe %2d (%2d) state: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_CSW, 1),
+	  "*** 3D context store start version %d (1=IPP_TILE, 2=ISP_TILE)" },
+	{ ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_CSW, 3),
+	  "3D context store pipe%d state: 0x%08.8x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_CSW, 3),
+	  "3D context resume pipe%d state: 0x%08.8x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_CSW, 2),
+	  "3D context resume IPP state: 0x%08.8x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_CSW, 1),
+	  "All 3D pipes empty after ISP tile mode store! IPP_status: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_CSW, 3),
+	  "TDM context resume pipe%d state: 0x%08.8x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_CSW, 0),
+	  "*** 3D context store start version 4" },
+	{ ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_CSW, 2),
+	  "Multicore context resume on DM%d active core mask 0x%04.4x" },
+	{ ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_CSW, 2),
+	  "Multicore context store on DM%d active core mask 0x%04.4x" },
+	{ ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_CSW, 5),
+	  "TDM context resume Core %d, pipe%d state: 0x%08.8x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_CSW, 0),
+	  "*** RDM FWCtx store complete" },
+	{ ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_CSW, 0),
+	  "*** RDM FWCtx store start" },
+	{ ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_CSW, 1),
+	  "RDM FWCtx 0x%08.8x needs resume" },
+	{ ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_CSW, 1),
+	  "RDM FWCtx 0x%08.8x resume" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_BIF, 3),
+	  "Activate MemCtx=0x%08x BIFreq=%d secure=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_BIF, 1),
+	  "Deactivate MemCtx=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_BIF, 1),
+	  "Alloc PC reg %d" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_BIF, 2),
+	  "Grab reg set %d refcount now %d" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_BIF, 2),
+	  "Ungrab reg set %d refcount now %d" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_BIF, 6),
+	  "Setup reg=%d BIFreq=%d, expect=0x%08x%08x, actual=0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_BIF, 2),
+	  "Trust enabled:%d, for BIFreq=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_BIF, 9),
+	  "BIF Tiling Cfg %d base 0x%08x%08x len 0x%08x%08x enable %d stride %d --> 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_BIF, 4),
+	  "Wrote the Value %d to OSID0, Cat Base %d, Register's contents are now 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_BIF, 3),
+	  "Wrote the Value %d to OSID1, Context  %d, Register's contents are now 0x%04x" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_BIF, 7),
+	  "ui32OSid = %u, Catbase = %u, Reg Address = 0x%x, Reg index = %u, Bitshift index = %u, Val = 0x%08x%08x" }, \
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_BIF, 5),
+	  "Map GPU memory DevVAddr 0x%x%08x, Size %u, Context ID %u, BIFREQ %u" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_BIF, 1),
+	  "Unmap GPU memory (event status 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_BIF, 3),
+	  "Activate MemCtx=0x%08x DM=%d secure=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_BIF, 6),
+	  "Setup reg=%d DM=%d, expect=0x%08x%08x, actual=0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_BIF, 4),
+	  "Map GPU memory DevVAddr 0x%x%08x, Size %u, Context ID %u" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_BIF, 2),
+	  "Trust enabled:%d, for DM=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_BIF, 5),
+	  "Map GPU memory DevVAddr 0x%x%08x, Size %u, Context ID %u, DM %u" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_BIF, 6),
+	  "Setup register set=%d DM=%d, PC address=0x%08x%08x, OSid=%u, NewPCRegRequired=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_BIF, 3),
+	  "Alloc PC set %d as register range [%u - %u]" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_MISC, 1),
+	  "GPIO write 0x%02x" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_MISC, 1),
+	  "GPIO read 0x%02x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_MISC, 0),
+	  "GPIO enabled" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_MISC, 0),
+	  "GPIO disabled" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_MISC, 1),
+	  "GPIO status=%d (0=OK, 1=Disabled)" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_MISC, 2),
+	  "GPIO_AP: Read address=0x%02x (%d byte(s))" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_MISC, 2),
+	  "GPIO_AP: Write address=0x%02x (%d byte(s))" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_MISC, 0),
+	  "GPIO_AP timeout!" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_MISC, 1),
+	  "GPIO_AP error. GPIO status=%d (0=OK, 1=Disabled)" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_MISC, 1),
+	  "GPIO already read 0x%02x" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_MISC, 2),
+	  "SR: Check buffer %d available returned %d" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Waiting for buffer %d" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_MISC, 2),
+	  "SR: Timeout waiting for buffer %d (after %d ticks)" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_MISC, 2),
+	  "SR: Skip frame check for strip %d returned %d (0=No skip, 1=Skip frame)" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Skip remaining strip %d in frame" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Inform HW that strip %d is a new frame" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Timeout waiting for INTERRUPT_FRAME_SKIP (after %d ticks)" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Strip mode is %d" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Strip Render start (strip %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Strip Render complete (buffer %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_MISC, 1),
+	  "SR: Strip Render fault (buffer %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_MISC, 1),
+	  "TRP state: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_MISC, 1),
+	  "TRP failure: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_MISC, 1),
+	  "SW TRP State: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_MISC, 1),
+	  "SW TRP failure: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_MISC, 1),
+	  "HW kick event (%u)" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_MISC, 4),
+	  "GPU core (%u/%u): checksum 0x%08x vs. 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_MISC, 6),
+	  "GPU core (%u/%u), unit (%u,%u): checksum 0x%08x vs. 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_MISC, 6),
+	  "HWR: Core%u, Register=0x%08x, OldValue=0x%08x%08x, CurrValue=0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_MISC, 4),
+	  "HWR: USC Core%u, ui32TotalSlotsUsedByDM=0x%08x, psDMHWCtl->ui32USCSlotsUsedByDM=0x%08x, bHWRNeeded=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_MISC, 6),
+	  "HWR: USC Core%u, Register=0x%08x, OldValue=0x%08x%08x, CurrValue=0x%08x%08x" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_PM, 10),
+	  "ALIST%d SP = %u, MLIST%d SP = %u (VCE 0x%08x%08x, TE 0x%08x%08x, ALIST 0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_PM, 8),
+	  "Is TA: %d, finished: %d on HW %u (HWRTData = 0x%08x, MemCtx = 0x%08x). FL different between TA/3D: global:%d, local:%d, mmu:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_PM, 14),
+	  "UFL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), MFL-3D-Base: 0x%08x%08x (SP = %u, 4PT = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_PM, 14),
+	  "UFL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), MFL-TA-Base: 0x%08x%08x (SP = %u, 4PT = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_PM, 5),
+	  "Freelist grow completed [0x%08x]: added pages 0x%08x, total pages 0x%08x, new DevVirtAddr 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_PM, 1),
+	  "Grow for freelist ID=0x%08x denied by host" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_PM, 5),
+	  "Freelist update completed [0x%08x]: old total pages 0x%08x, new total pages 0x%08x, new DevVirtAddr 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_PM, 1),
+	  "Reconstruction of freelist ID=0x%08x failed" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_PM, 2),
+	  "Ignored attempt to pause or unpause the DM while there is no relevant operation in progress (0-TA,1-3D): %d, operation(0-unpause, 1-pause): %d" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_PM, 2),
+	  "Force free 3D Context memory, FWCtx: 0x%08x, status(1:success, 0:fail): %d" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_PM, 1),
+	  "PM pause TA ALLOC: PM_PAGE_MANAGEOP set to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_PM, 1),
+	  "PM unpause TA ALLOC: PM_PAGE_MANAGEOP set to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_PM, 1),
+	  "PM pause 3D DALLOC: PM_PAGE_MANAGEOP set to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_PM, 1),
+	  "PM unpause 3D DALLOC: PM_PAGE_MANAGEOP set to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_PM, 1),
+	  "PM ALLOC/DALLOC change was not actioned: PM_PAGE_MANAGEOP_STATUS=0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_PM, 7),
+	  "Is TA: %d, finished: %d on HW %u (HWRTData = 0x%08x, MemCtx = 0x%08x). FL different between TA/3D: global:%d, local:%d" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_PM, 10),
+	  "UFL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-3D-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_PM, 10),
+	  "UFL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-TA-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_PM, 7),
+	  "Freelist update completed [0x%08x / FL State 0x%08x%08x]: old total pages 0x%08x, new total pages 0x%08x, new DevVirtAddr 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_PM, 7),
+	  "Freelist update failed [0x%08x / FL State 0x%08x%08x]: old total pages 0x%08x, new total pages 0x%08x, new DevVirtAddr 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_PM, 10),
+	  "UFL-3D-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-3D-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_PM, 10),
+	  "UFL-TA-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u), FL-TA-State-Base: 0x%08x%08x (SP = %u, 4PB = %u, 4PT = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_PM, 5),
+	  "Freelist 0x%08x base address from HW: 0x%02x%08x (expected value: 0x%02x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_PM, 5),
+	  "Analysis of FL grow: Pause=(%u,%u) Paused+Valid(%u,%u) PMStateBuffer=0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_PM, 5),
+	  "Attempt FL grow for FL: 0x%08x, new dev address: 0x%02x%08x, new page count: %u, new ready count: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_PM, 5),
+	  "Deferring FL grow for non-loaded FL: 0x%08x, new dev address: 0x%02x%08x, new page count: %u, new ready count: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_PM, 4),
+	  "Is GEOM: %d, finished: %d (HWRTData = 0x%08x, MemCtx = 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_PM, 1),
+	  "3D Timeout Now for FWCtx 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_PM, 1),
+	  "GEOM PM Recycle for FWCtx 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_PM, 1),
+	  "PM running primary config (Core %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_PM, 1),
+	  "PM running secondary config (Core %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_PM, 1),
+	  "PM running tertiary config (Core %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_PM, 1),
+	  "PM running quaternary config (Core %d)" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_RPM, 3),
+	  "Global link list dynamic page count: vertex 0x%x, varying 0x%x, node 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_RPM, 3),
+	  "Global link list static page count: vertex 0x%x, varying 0x%x, node 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_RPM, 0),
+	  "RPM request failed. Waiting for freelist grow." },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_RPM, 0),
+	  "RPM request failed. Aborting the current frame." },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_RPM, 1),
+	  "RPM waiting for pending grow on freelist 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_RPM, 3),
+	  "Request freelist grow [0x%08x] current pages %d, grow size %d" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_RPM, 2),
+	  "Freelist load: SHF = 0x%08x, SHG = 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_RPM, 2),
+	  "SHF FPL register: 0x%08x.0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_RPM, 2),
+	  "SHG FPL register: 0x%08x.0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_RPM, 5),
+	  "Kernel requested RPM grow on freelist (type %d) at 0x%08x from current size %d to new size %d, RPM restart: %d (1=Yes)" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_RPM, 0),
+	  "Restarting SHG" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_RPM, 0),
+	  "Grow failed, aborting the current frame." },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_RPM, 1),
+	  "RPM abort complete on HWFrameData [0x%08x]." },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_RPM, 1),
+	  "RPM freelist cleanup [0x%08x] requires abort to proceed." },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_RPM, 2),
+	  "RPM page table base register: 0x%08x.0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_RPM, 0),
+	  "Issuing RPM abort." },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_RPM, 0),
+	  "RPM OOM received but toggle bits indicate free pages available" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_RPM, 0),
+	  "RPM hardware timeout. Unable to process OOM event." },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_RPM, 5),
+	  "SHF FL (0x%08x) load, FPL: 0x%08x.0x%08x, roff: 0x%08x, woff: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_RPM, 5),
+	  "SHG FL (0x%08x) load, FPL: 0x%08x.0x%08x, roff: 0x%08x, woff: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_RPM, 3),
+	  "SHF FL (0x%08x) store, roff: 0x%08x, woff: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_RPM, 3),
+	  "SHG FL (0x%08x) store, roff: 0x%08x, woff: 0x%08x" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_RTD, 2),
+	  "3D RTData 0x%08x finished on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_RTD, 2),
+	  "3D RTData 0x%08x ready on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_RTD, 4),
+	  "CONTEXT_PB_BASE set to 0x%x, FL different between TA/3D: local: %d, global: %d, mmu: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_RTD, 2),
+	  "Loading VFP table 0x%08x%08x for 3D" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_RTD, 2),
+	  "Loading VFP table 0x%08x%08x for TA" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_RTD, 10),
+	  "Load Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: TotalPMPages = %d, FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_RTD, 0),
+	  "Perform VHEAP table store" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_RTD, 2),
+	  "RTData 0x%08x: found match in Context=%d: Load=No, Store=No" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_RTD, 2),
+	  "RTData 0x%08x: found NULL in Context=%d: Load=Yes, Store=No" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_RTD, 3),
+	  "RTData 0x%08x: found state 3D finished (0x%08x) in Context=%d: Load=Yes, Store=Yes" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_RTD, 3),
+	  "RTData 0x%08x: found state TA finished (0x%08x) in Context=%d: Load=Yes, Store=Yes" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_RTD, 5),
+	  "Loading stack-pointers for %d (0:MidTA,1:3D) on context %d, MLIST = 0x%08x, ALIST = 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_RTD, 10),
+	  "Store Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: TotalPMPages = %d, FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_RTD, 2),
+	  "TA RTData 0x%08x finished on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_RTD, 2),
+	  "TA RTData 0x%08x loaded on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_RTD, 12),
+	  "Store Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_RTD, 12),
+	  "Load  Freelist 0x%x type: %d (0:local,1:global,2:mmu) for DM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_RTD, 1),
+	  "Freelist 0x%x RESET!!!!!!!!" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_RTD, 5),
+	  "Freelist 0x%x stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_RTD, 3),
+	  "Request reconstruction of Freelist 0x%x type: %d (0:local,1:global,2:mmu) on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_RTD, 1),
+	  "Freelist reconstruction ACK from host (HWR state :%u)" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_RTD, 0),
+	  "Freelist reconstruction completed" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_RTD, 3),
+	  "TA RTData 0x%08x loaded on HW context %u HWRTDataNeedsLoading=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_RTD, 3),
+	  "TE Region headers base 0x%08x%08x (RGNHDR Init: %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_RTD, 8),
+	  "TA Buffers: FWCtx 0x%08x, RT 0x%08x, RTData 0x%08x, VHeap 0x%08x%08x, TPC 0x%08x%08x (MemCtx 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_RTD, 2),
+	  "3D RTData 0x%08x loaded on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_RTD, 4),
+	  "3D Buffers: FWCtx 0x%08x, RT 0x%08x, RTData 0x%08x (MemCtx 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_RTD, 2),
+	  "Restarting TA after partial render, HWRTData0State=0x%x, HWRTData1State=0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_RTD, 3),
+	  "CONTEXT_PB_BASE set to 0x%x, FL different between TA/3D: local: %d, global: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_RTD, 12),
+	  "Store Freelist 0x%x type: %d (0:local,1:global) for PMDM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_RTD, 12),
+	  "Load  Freelist 0x%x type: %d (0:local,1:global) for PMDM%d: FL Total Pages %u (max=%u,grow size=%u), FL-addr = 0x%08x%08x, stacktop = 0x%08x%08x, Alloc Page Count = %u, Alloc MMU Page Count = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_RTD, 5),
+	  "3D Buffers: FWCtx 0x%08x, parent RT 0x%08x, RTData 0x%08x on ctx %d, (MemCtx 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_RTD, 7),
+	  "TA Buffers: FWCtx 0x%08x, RTData 0x%08x, VHeap 0x%08x%08x, TPC 0x%08x%08x (MemCtx 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_RTD, 4),
+	  "3D Buffers: FWCtx 0x%08x, RTData 0x%08x on ctx %d, (MemCtx 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_RTD, 6),
+	  "Load  Freelist 0x%x type: %d (0:local,1:global) for PMDM%d: FL Total Pages %u (max=%u,grow size=%u)" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_RTD, 1),
+	  "TA RTData 0x%08x marked as killed." },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_RTD, 1),
+	  "3D RTData 0x%08x marked as killed." },
+	{ ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_RTD, 1),
+	  "RTData 0x%08x will be killed after TA restart." },
+	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_RTD, 3),
+	  "RTData 0x%08x Render State Buffer 0x%02x%08x will be reset." },
+	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_RTD, 3),
+	  "GEOM RTData 0x%08x using Render State Buffer 0x%02x%08x." },
+	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_RTD, 3),
+	  "FRAG RTData 0x%08x using Render State Buffer 0x%02x%08x." },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_SPM, 0),
+	  "Force Z-Load for partial render" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_SPM, 0),
+	  "Force Z-Store for partial render" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_SPM, 1),
+	  "3D MemFree: Local FL 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_SPM, 1),
+	  "3D MemFree: MMU FL 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_SPM, 1),
+	  "3D MemFree: Global FL 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_SPM, 6),
+	  "OOM TA/3D PR Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x, HardwareSync Fence [0x%08.8x] is 0x%08.8x requires 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_SPM, 3),
+	  "OOM TA_cmd=0x%08x, U-FL 0x%08x, N-FL 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_SPM, 5),
+	  "OOM TA_cmd=0x%08x, OOM MMU:%d, U-FL 0x%08x, N-FL 0x%08x, MMU-FL 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_SPM, 0),
+	  "Partial render avoided" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_SPM, 0),
+	  "Partial render discarded" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_SPM, 0),
+	  "Partial Render finished" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM Owner = 3D-BG" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM Owner = 3D-IRQ" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM Owner = NONE" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM Owner = TA-BG" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM Owner = TA-IRQ" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_SPM, 2),
+	  "ZStore address 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_SPM, 2),
+	  "SStore address 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_SPM, 2),
+	  "ZLoad address 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_SPM, 2),
+	  "SLoad address 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_SPM, 0),
+	  "No deferred ZS Buffer provided" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_SPM, 1),
+	  "ZS Buffer successfully populated (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_SPM, 1),
+	  "No need to populate ZS Buffer (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_SPM, 1),
+	  "ZS Buffer successfully unpopulated (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_SPM, 1),
+	  "No need to unpopulate ZS Buffer (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_SPM, 1),
+	  "Send ZS-Buffer backing request to host (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_SPM, 1),
+	  "Send ZS-Buffer unbacking request to host (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_SPM, 1),
+	  "Don't send ZS-Buffer backing request. Previous request still pending (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_SPM, 1),
+	  "Don't send ZS-Buffer unbacking request. Previous request still pending (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_SPM, 1),
+	  "Partial Render waiting for ZBuffer to be backed (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_SPM, 1),
+	  "Partial Render waiting for SBuffer to be backed (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = none" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = PR blocked" },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = wait for grow" },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = wait for HW" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = PR running" },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = PR avoided" },
+	{ ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = PR executed" },
+	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_SPM, 2),
+	  "3DMemFree matches freelist 0x%08x (FL type = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_SPM, 0),
+	  "Raise the 3DMemFreeDedected flag" },
+	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_SPM, 1),
+	  "Wait for pending grow on Freelist 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_SPM, 1),
+	  "ZS Buffer failed to be populated (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_SPM, 5),
+	  "Grow update inconsistency: FL addr: 0x%02x%08x, curr pages: %u, ready: %u, new: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_SPM, 4),
+	  "OOM: Resumed TA with ready pages, FL addr: 0x%02x%08x, current pages: %u, SP : %u" },
+	{ ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_SPM, 5),
+	  "Received grow update, FL addr: 0x%02x%08x, current pages: %u, ready pages: %u, threshold: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_SPM, 1),
+	  "No deferred partial render FW (Type=%d) Buffer provided" },
+	{ ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_SPM, 1),
+	  "No need to populate PR Buffer (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_SPM, 1),
+	  "No need to unpopulate PR Buffer (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_SPM, 1),
+	  "Send PR Buffer backing request to host (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_SPM, 1),
+	  "Send PR Buffer unbacking request to host (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_SPM, 1),
+	  "Don't send PR Buffer backing request. Previous request still pending (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_SPM, 1),
+	  "Don't send PR Buffer unbacking request. Previous request still pending (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_SPM, 2),
+	  "Partial Render waiting for Buffer %d type to be backed (ID=0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_SPM, 4),
+	  "Received grow update, FL addr: 0x%02x%08x, new pages: %u, ready pages: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_SPM, 3),
+	  "OOM TA/3D PR Check: [0x%08.8x] is 0x%08.8x requires 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_SPM, 3),
+	  "OOM: Resumed TA with ready pages, FL addr: 0x%02x%08x, current pages: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_SPM, 3),
+	  "OOM TA/3D PR deadlock unblocked reordering DM%d runlist head from Context 0x%08x to 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_SPM, 0),
+	  "SPM State = PR force free" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_POW, 4),
+	  "Check Pow state DM%d int: 0x%x, ext: 0x%x, pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_POW, 3),
+	  "GPU idle (might be powered down). Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_POW, 3),
+	  "OS requested pow off (forced = %d), DM%d, pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_POW, 4),
+	  "Initiate powoff query. Inactive DMs: %d %d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_POW, 2),
+	  "Any RD-DM pending? %d, Any RD-DM Active? %d" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_POW, 3),
+	  "GPU ready to be powered down. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_POW, 2),
+	  "HW Request On(1)/Off(0): %d, Units: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_POW, 2),
+	  "Request to change num of dusts to %d (Power flags=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_POW, 2),
+	  "Changing number of dusts from %d to %d" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_POW, 0),
+	  "Sidekick init" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_POW, 1),
+	  "Rascal+Dusts init (# dusts mask: 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_POW, 0),
+	  "Initiate powoff query for RD-DMs." },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_POW, 0),
+	  "Initiate powoff query for TLA-DM." },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_POW, 2),
+	  "Any RD-DM pending? %d, Any RD-DM Active? %d" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_POW, 2),
+	  "TLA-DM pending? %d, TLA-DM Active? %d" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_POW, 1),
+	  "Request power up due to BRN37270. Pow stat int: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_POW, 3),
+	  "Cancel power off request int: 0x%x, ext: 0x%x, pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_POW, 1),
+	  "OS requested forced IDLE, pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_POW, 1),
+	  "OS cancelled forced IDLE, pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_POW, 3),
+	  "Idle timer start. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_POW, 3),
+	  "Cancel idle timer. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_POW, 2),
+	  "Active PM latency set to %dms. Core clock: %d Hz" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_POW, 2),
+	  "Compute cluster mask change to 0x%x, %d dusts powered." },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_POW, 0),
+	  "Null command executed, repeating initiate powoff query for RD-DMs." },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_POW, 1),
+	  "Power monitor: Estimate of dynamic energy %u" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_POW, 3),
+	  "Check Pow state: Int: 0x%x, Ext: 0x%x, Pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: New deadline, time = 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: New workload, cycles = 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Proactive frequency calculated = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Reactive utilisation = %u percent" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: Reactive frequency calculated = %u.%u" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: OPP Point Sent = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: Deadline removed = 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: Workload removed = 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Throttle to a maximum = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_POW, 0),
+	  "Proactive DVFS: Failed to pass OPP point via GPIO." },
+	{ ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_POW, 0),
+	  "Proactive DVFS: Invalid node passed to function." },
+	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Guest OS attempted to do a privileged action. OSid = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Unprofiled work started. Total unprofiled work present: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Unprofiled work finished. Total unprofiled work present: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_POW, 0),
+	  "Proactive DVFS: Disabled: Not enabled by host." },
+	{ ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_POW, 2),
+	  "HW Request Completed(1)/Aborted(0): %d, Ticks: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_POW, 1),
+	  "Allowed number of dusts is %d due to BRN59042." },
+	{ ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_POW, 3),
+	  "Host timed out while waiting for a forced idle state. Pow state int: 0x%x, ext: 0x%x, flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_POW, 5),
+	  "Check Pow state: Int: 0x%x, Ext: 0x%x, Pow flags: 0x%x, Fence Counters: Check: %u - Update: %u" },
+	{ ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: OPP Point Sent = 0x%x, Success = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_POW, 0),
+	  "Proactive DVFS: GPU transitioned to idle" },
+	{ ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_POW, 0),
+	  "Proactive DVFS: GPU transitioned to active" },
+	{ ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_POW, 1),
+	  "Power counter dumping: Data truncated writing register %u. Buffer too small." },
+	{ ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_POW, 0),
+	  "Power controller returned ABORT for last request so retrying." },
+	{ ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_POW, 2),
+	  "Discarding invalid power request: type 0x%x, DM %u" },
+	{ ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_POW, 2),
+	  "Detected attempt to cancel forced idle while not forced idle (pow state 0x%x, pow flags 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_POW, 2),
+	  "Detected attempt to force power off while not forced idle (pow state 0x%x, pow flags 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(55, ROGUE_FW_GROUP_POW, 1),
+	  "Detected attempt to change dust count while not forced idle (pow state 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(56, ROGUE_FW_GROUP_POW, 3),
+	  "Power monitor: Type = %d (0 = power, 1 = energy), Estimate result = 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(57, ROGUE_FW_GROUP_POW, 2),
+	  "Conflicting clock frequency range: OPP min = %u, max = %u" },
+	{ ROGUE_FW_LOG_CREATESFID(58, ROGUE_FW_GROUP_POW, 1),
+	  "Proactive DVFS: Set floor to a minimum = 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(59, ROGUE_FW_GROUP_POW, 2),
+	  "OS requested pow off (forced = %d), pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(60, ROGUE_FW_GROUP_POW, 1),
+	  "Discarding invalid power request: type 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(61, ROGUE_FW_GROUP_POW, 3),
+	  "Request to change SPU power state mask from 0x%x to 0x%x. Pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(62, ROGUE_FW_GROUP_POW, 2),
+	  "Changing SPU power state mask from 0x%x to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(63, ROGUE_FW_GROUP_POW, 1),
+	  "Detected attempt to change SPU power state mask while not forced idle (pow state 0x%x)" },
+	{ ROGUE_FW_LOG_CREATESFID(64, ROGUE_FW_GROUP_POW, 1),
+	  "Invalid SPU power mask 0x%x! Changing to 1" },
+	{ ROGUE_FW_LOG_CREATESFID(65, ROGUE_FW_GROUP_POW, 2),
+	  "Proactive DVFS: Send OPP %u with clock divider value %u" },
+	{ ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_POW, 0),
+	  "PPA block started in perf validation mode." },
+	{ ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_POW, 1),
+	  "Reset PPA block state %u (1=reset, 0=recalculate)." },
+	{ ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_POW, 1),
+	  "Power controller returned ABORT for Core-%d last request so retrying." },
+	{ ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_POW, 3),
+	  "HW Request On(1)/Off(0): %d, Units: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(70, ROGUE_FW_GROUP_POW, 5),
+	  "Request to change SPU power state mask from 0x%x to 0x%x and RAC from 0x%x to 0x%x. Pow flags: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(71, ROGUE_FW_GROUP_POW, 4),
+	  "Changing SPU power state mask from 0x%x to 0x%x and RAC from 0x%x to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(72, ROGUE_FW_GROUP_POW, 2),
+	  "RAC pending? %d, RAC Active? %d" },
+	{ ROGUE_FW_LOG_CREATESFID(73, ROGUE_FW_GROUP_POW, 0),
+	  "Initiate powoff query for RAC." },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_HWR, 2),
+	  "Lockup detected on DM%d, FWCtx: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_HWR, 3),
+	  "Reset fw state for DM%d, FWCtx: 0x%08.8x, MemCtx: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_HWR, 0),
+	  "Reset HW" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_HWR, 0),
+	  "Lockup recovered." },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_HWR, 2),
+	  "Lock-up DM%d FWCtx: 0x%08.8x" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_HWR, 4),
+	  "Lockup detected: GLB(%d->%d), PER-DM(0x%08x->0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_HWR, 3),
+	  "Early fault detection: GLB(%d->%d), PER-DM(0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_HWR, 3),
+	  "Hold scheduling due lockup: GLB(%d), PER-DM(0x%08x->0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_HWR, 4),
+	  "False lockup detected: GLB(%d->%d), PER-DM(0x%08x->0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_HWR, 4),
+	  "BRN37729: GLB(%d->%d), PER-DM(0x%08x->0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_HWR, 3),
+	  "Freelists reconstructed: GLB(%d->%d), PER-DM(0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_HWR, 4),
+	  "Reconstructing freelists: %u (0-No, 1-Yes): GLB(%d->%d), PER-DM(0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_HWR, 3),
+	  "HW poll %u (0-Unset 1-Set) failed (reg:0x%08x val:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_HWR, 2),
+	  "Discarded cmd on DM%u FWCtx=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_HWR, 6),
+	  "Discarded cmd on DM%u (reason=%u) HWRTData=0x%08x (st: %d), FWCtx 0x%08x @ %d" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_HWR, 2),
+	  "PM fence WA could not be applied, Valid TA Setup: %d, RD powered off: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_HWR, 5),
+	  "FL snapshot RTD 0x%08.8x - local (0x%08.8x): %d, global (0x%08.8x): %d" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_HWR, 8),
+	  "FL check RTD 0x%08.8x, discard: %d - local (0x%08.8x): s%d?=c%d, global (0x%08.8x): s%d?=c%d" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_HWR, 2),
+	  "FL reconstruction 0x%08.8x c%d" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_HWR, 3),
+	  "3D check: missing TA FWCtx 0x%08.8x @ %d, RTD 0x%08x." },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_HWR, 2),
+	  "Reset HW (mmu:%d, extmem: %d)" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_HWR, 4),
+	  "Zero TA caches for FWCtx: 0x%08.8x (TPC addr: 0x%08x%08x, size: %d bytes)" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_HWR, 2),
+	  "Recovery DM%u: Freelists reconstructed. New R-Flags=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_HWR, 5),
+	  "Recovery DM%u: FWCtx 0x%08x skipped to command @ %u. PR=%u. New R-Flags=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_HWR, 1),
+	  "Recovery DM%u: DM fully recovered" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_HWR, 2),
+	  "DM%u: Hold scheduling due to R-Flag = 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_HWR, 0),
+	  "Analysis: Need freelist reconstruction" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_HWR, 2),
+	  "Analysis DM%u: Lockup FWCtx: 0x%08.8x. Need to skip to next command" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_HWR, 2),
+	  "Analysis DM%u: Lockup while TA is OOM FWCtx: 0x%08.8x. Need to skip to next command" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_HWR, 2),
+	  "Analysis DM%u: Lockup while partial render FWCtx: 0x%08.8x. Need PR cleanup" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_HWR, 0),
+	  "GPU has locked up" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_HWR, 1),
+	  "DM%u ready for HWR" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_HWR, 2),
+	  "Recovery DM%u: Updated Recovery counter. New R-Flags=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_HWR, 1),
+	  "Analysis: BRN37729 detected, reset TA and re-kicked 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_HWR, 1),
+	  "DM%u timed out" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_HWR, 1),
+	  "RGX_CR_EVENT_STATUS=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_HWR, 2),
+	  "DM%u lockup falsely detected, R-Flags=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(38, ROGUE_FW_GROUP_HWR, 0),
+	  "GPU has overrun its deadline" },
+	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_HWR, 0),
+	  "GPU has failed a poll" },
+	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_HWR, 2),
+	  "RGX DM%u phase count=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_HWR, 2),
+	  "Reset HW (loop:%d, poll failures: 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_HWR, 1),
+	  "MMU fault event: 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(43, ROGUE_FW_GROUP_HWR, 1),
+	  "BIF1 page fault detected (Bank1 MMU Status: 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(44, ROGUE_FW_GROUP_HWR, 1),
+	  "Fast CRC Failed. Proceeding to full register checking (DM: %u)." },
+	{ ROGUE_FW_LOG_CREATESFID(45, ROGUE_FW_GROUP_HWR, 2),
+	  "Meta MMU page fault detected (Meta MMU Status: 0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(46, ROGUE_FW_GROUP_HWR, 2),
+	  "Fast CRC Check result for DM%u is HWRNeeded=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(47, ROGUE_FW_GROUP_HWR, 2),
+	  "Full Signature Check result for DM%u is HWRNeeded=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(48, ROGUE_FW_GROUP_HWR, 3),
+	  "Final result for DM%u is HWRNeeded=%u with HWRChecksToGo=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(49, ROGUE_FW_GROUP_HWR, 3),
+	  "USC Slots result for DM%u is HWRNeeded=%u USCSlotsUsedByDM=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(50, ROGUE_FW_GROUP_HWR, 2),
+	  "Deadline counter for DM%u is HWRDeadline=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(51, ROGUE_FW_GROUP_HWR, 1),
+	  "Holding Scheduling on OSid %u due to pending freelist reconstruction" },
+	{ ROGUE_FW_LOG_CREATESFID(52, ROGUE_FW_GROUP_HWR, 2),
+	  "Requesting reconstruction for freelist 0x%x (ID=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(53, ROGUE_FW_GROUP_HWR, 1),
+	  "Reconstruction of freelist ID=%d complete" },
+	{ ROGUE_FW_LOG_CREATESFID(54, ROGUE_FW_GROUP_HWR, 4),
+	  "Reconstruction needed for freelist 0x%x (ID=%d) type: %d (0:local,1:global,2:mmu) on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(55, ROGUE_FW_GROUP_HWR, 1),
+	  "Reconstruction of freelist ID=%d failed" },
+	{ ROGUE_FW_LOG_CREATESFID(56, ROGUE_FW_GROUP_HWR, 4),
+	  "Restricting PDS Tasks to help other stalling DMs (RunningMask=0x%02x, StallingMask=0x%02x, PDS_CTRL=0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(57, ROGUE_FW_GROUP_HWR, 4),
+	  "Unrestricting PDS Tasks again (RunningMask=0x%02x, StallingMask=0x%02x, PDS_CTRL=0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(58, ROGUE_FW_GROUP_HWR, 2),
+	  "USC slots: %u used by DM%u" },
+	{ ROGUE_FW_LOG_CREATESFID(59, ROGUE_FW_GROUP_HWR, 1),
+	  "USC slots: %u empty" },
+	{ ROGUE_FW_LOG_CREATESFID(60, ROGUE_FW_GROUP_HWR, 5),
+	  "HCS DM%d's Context Switch failed to meet deadline. Current time: 0x%08x%08x, deadline: 0x%08x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(61, ROGUE_FW_GROUP_HWR, 1),
+	  "Begin hardware reset (HWR Counter=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(62, ROGUE_FW_GROUP_HWR, 1),
+	  "Finished hardware reset (HWR Counter=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(63, ROGUE_FW_GROUP_HWR, 2),
+	  "Holding Scheduling on DM %u for OSid %u due to pending freelist reconstruction" },
+	{ ROGUE_FW_LOG_CREATESFID(64, ROGUE_FW_GROUP_HWR, 5),
+	  "User Mode Queue ROff reset: FWCtx 0x%08.8x, queue: 0x%08x%08x (Roff = %u becomes StreamStartOffset = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(65, ROGUE_FW_GROUP_HWR, 4),
+	  "Reconstruction needed for freelist 0x%x (ID=%d) type: %d (0:local,1:global) on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(66, ROGUE_FW_GROUP_HWR, 3),
+	  "Mips page fault detected (BadVAddr: 0x%08x, EntryLo0: 0x%08x, EntryLo1: 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(67, ROGUE_FW_GROUP_HWR, 1),
+	  "At least one other DM is running okay so DM%u will get another chance" },
+	{ ROGUE_FW_LOG_CREATESFID(68, ROGUE_FW_GROUP_HWR, 2),
+	  "Reconstructing in FW, FL: 0x%x (ID=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(69, ROGUE_FW_GROUP_HWR, 4),
+	  "Zero RTC for FWCtx: 0x%08.8x (RTC addr: 0x%08x%08x, size: %d bytes)" },
+	{ ROGUE_FW_LOG_CREATESFID(70, ROGUE_FW_GROUP_HWR, 5),
+	  "Reconstruction needed for freelist 0x%x (ID=%d) type: %d (0:local,1:global) phase: %d (0:TA, 1:3D) on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(71, ROGUE_FW_GROUP_HWR, 3),
+	  "Start long HW poll %u (0-Unset 1-Set) for (reg:0x%08x val:0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(72, ROGUE_FW_GROUP_HWR, 1),
+	  "End long HW poll (result=%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(73, ROGUE_FW_GROUP_HWR, 3),
+	  "DM%u has taken %d ticks and deadline is %d ticks" },
+	{ ROGUE_FW_LOG_CREATESFID(74, ROGUE_FW_GROUP_HWR, 5),
+	  "USC Watchdog result for DM%u is HWRNeeded=%u Status=%u USCs={0x%x} with HWRChecksToGo=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(75, ROGUE_FW_GROUP_HWR, 6),
+	  "Reconstruction needed for freelist 0x%x (ID=%d) OSid: %d type: %d (0:local,1:global) phase: %d (0:TA, 1:3D) on HW context %u" },
+	{ ROGUE_FW_LOG_CREATESFID(76, ROGUE_FW_GROUP_HWR, 1),
+	  "GPU-%u has locked up" },
+	{ ROGUE_FW_LOG_CREATESFID(77, ROGUE_FW_GROUP_HWR, 1),
+	  "DM%u has locked up" },
+	{ ROGUE_FW_LOG_CREATESFID(78, ROGUE_FW_GROUP_HWR, 2),
+	  "Core %d RGX_CR_EVENT_STATUS=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(79, ROGUE_FW_GROUP_HWR, 2),
+	  "RGX_CR_MULTICORE_EVENT_STATUS%u=0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(80, ROGUE_FW_GROUP_HWR, 5),
+	  "BIF0 page fault detected (Core %d MMU Status: 0x%08x%08x Req Status: 0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(81, ROGUE_FW_GROUP_HWR, 3),
+	  "MMU page fault detected (Core %d MMU Status: 0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(82, ROGUE_FW_GROUP_HWR, 4),
+	  "MMU page fault detected (Core %d MMU Status: 0x%08x%08x 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(83, ROGUE_FW_GROUP_HWR, 4),
+	  "Reset HW (core:%d of %d, loop:%d, poll failures: 0x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(84, ROGUE_FW_GROUP_HWR, 3),
+	  "Fast CRC Check result for Core%u, DM%u is HWRNeeded=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(85, ROGUE_FW_GROUP_HWR, 3),
+	  "Full Signature Check result for Core%u, DM%u is HWRNeeded=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(86, ROGUE_FW_GROUP_HWR, 4),
+	  "USC Slots result for Core%u, DM%u is HWRNeeded=%u USCSlotsUsedByDM=%d" },
+	{ ROGUE_FW_LOG_CREATESFID(87, ROGUE_FW_GROUP_HWR, 6),
+	  "USC Watchdog result for Core%u DM%u is HWRNeeded=%u Status=%u USCs={0x%x} with HWRChecksToGo=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(88, ROGUE_FW_GROUP_HWR, 3),
+	  "RISC-V MMU page fault detected (FWCORE MMU Status 0x%08x Req Status 0x%08x%08x)" },
+	{ ROGUE_FW_LOG_CREATESFID(89, ROGUE_FW_GROUP_HWR, 2),
+	  "TEXAS1_PFS poll failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(90, ROGUE_FW_GROUP_HWR, 2),
+	  "BIF_PFS poll failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(91, ROGUE_FW_GROUP_HWR, 2),
+	  "MMU_ABORT_PM_STATUS set poll failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(92, ROGUE_FW_GROUP_HWR, 2),
+	  "MMU_ABORT_PM_STATUS unset poll failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(93, ROGUE_FW_GROUP_HWR, 2),
+	  "MMU_CTRL_INVAL poll (all but fw) failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(94, ROGUE_FW_GROUP_HWR, 2),
+	  "MMU_CTRL_INVAL poll (all) failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(95, ROGUE_FW_GROUP_HWR, 3),
+	  "TEXAS%d_PFS poll failed on core %d with value 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(96, ROGUE_FW_GROUP_HWR, 3),
+	  "Extra Registers Check result for Core%u, DM%u is HWRNeeded=%u" },
+	{ ROGUE_FW_LOG_CREATESFID(97, ROGUE_FW_GROUP_HWR, 1),
+	  "FW attempted to write to read-only GPU address 0x%08x" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_HWP, 2),
+	  "Block 0x%x mapped to Config Idx %u" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_HWP, 1),
+	  "Block 0x%x omitted from event - not enabled in HW" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_HWP, 1),
+	  "Block 0x%x included in event - enabled in HW" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_HWP, 2),
+	  "Select register state hi_0x%x lo_0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_HWP, 1),
+	  "Counter stream block header word 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_HWP, 1),
+	  "Counter register offset 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_HWP, 1),
+	  "Block 0x%x config unset, skipping" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_HWP, 1),
+	  "Accessing Indirect block 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_HWP, 1),
+	  "Accessing Direct block 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_HWP, 1),
+	  "Programmed counter select register at offset 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_HWP, 2),
+	  "Block register offset 0x%x and value 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_HWP, 1),
+	  "Reading config block from driver 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_HWP, 2),
+	  "Reading block range 0x%x to 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_HWP, 1),
+	  "Recording block 0x%x config from driver" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_HWP, 0),
+	  "Finished reading config block from driver" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_HWP, 2),
+	  "Custom Counter offset: 0x%x  value: 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_HWP, 2),
+	  "Select counter n:%u  ID:0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_HWP, 3),
+	  "The counter ID 0x%x is not allowed. The package [b:%u, n:%u] will be discarded" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_HWP, 1),
+	  "Custom Counters filter status %d" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_HWP, 2),
+	  "The Custom block %d is not allowed. Use only blocks lower than %d. The package will be discarded" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_HWP, 2),
+	  "The package will be discarded because it contains %d counters IDs while the upper limit is %d" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_HWP, 2),
+	  "Check Filter 0x%x is 0x%x ?" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_HWP, 1),
+	  "The custom block %u is reset" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_HWP, 1),
+	  "Encountered an invalid command (%d)" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_HWP, 2),
+	  "HWPerf Queue is full, we will have to wait for space! (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(26, ROGUE_FW_GROUP_HWP, 3),
+	  "HWPerf Queue is fencing, we are waiting for Roff = %d (Roff = %u, Woff = %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(27, ROGUE_FW_GROUP_HWP, 1),
+	  "Custom Counter block: %d" },
+	{ ROGUE_FW_LOG_CREATESFID(28, ROGUE_FW_GROUP_HWP, 1),
+	  "Block 0x%x ENABLED" },
+	{ ROGUE_FW_LOG_CREATESFID(29, ROGUE_FW_GROUP_HWP, 1),
+	  "Block 0x%x DISABLED" },
+	{ ROGUE_FW_LOG_CREATESFID(30, ROGUE_FW_GROUP_HWP, 2),
+	  "Accessing Indirect block 0x%x, instance %u" },
+	{ ROGUE_FW_LOG_CREATESFID(31, ROGUE_FW_GROUP_HWP, 2),
+	  "Counter register 0x%x, Value 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(32, ROGUE_FW_GROUP_HWP, 1),
+	  "Counters filter status %d" },
+	{ ROGUE_FW_LOG_CREATESFID(33, ROGUE_FW_GROUP_HWP, 2),
+	  "Block 0x%x mapped to Ctl Idx %u" },
+	{ ROGUE_FW_LOG_CREATESFID(34, ROGUE_FW_GROUP_HWP, 0),
+	  "Block(s) in use for workload estimation." },
+	{ ROGUE_FW_LOG_CREATESFID(35, ROGUE_FW_GROUP_HWP, 3),
+	  "GPU %u Cycle counter 0x%x, Value 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(36, ROGUE_FW_GROUP_HWP, 3),
+	  "GPU Mask 0x%x Cycle counter 0x%x, Value 0x%x" },
+	{ ROGUE_FW_LOG_CREATESFID(37, ROGUE_FW_GROUP_HWP, 1),
+	  "Blocks IGNORED for GPU %u" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_DMA, 5),
+	  "Transfer 0x%02x request: 0x%02x%08x -> 0x%08x, size %u" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_DMA, 4),
+	  "Transfer of type 0x%02x expected on channel %u, 0x%02x found, status %u" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_DMA, 1),
+	  "DMA Interrupt register 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_DMA, 1),
+	  "Waiting for transfer of type 0x%02x completion..." },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_DMA, 3),
+	  "Loading of cCCB data from FW common context 0x%08x (offset: %u, size: %u) failed" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_DMA, 3),
+	  "Invalid load of cCCB data from FW common context 0x%08x (offset: %u, size: %u)" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_DMA, 1),
+	  "Transfer 0x%02x request poll failure" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_DMA, 2),
+	  "Boot transfer(s) failed (code? %u, data? %u), used slower memcpy instead" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_DMA, 7),
+	  "Transfer 0x%02x request on ch. %u: system 0x%02x%08x, coremem 0x%08x, flags 0x%x, size %u" },
+
+	{ ROGUE_FW_LOG_CREATESFID(1, ROGUE_FW_GROUP_DBG, 2),
+	  "0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(2, ROGUE_FW_GROUP_DBG, 1),
+	  "0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(3, ROGUE_FW_GROUP_DBG, 2),
+	  "0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(4, ROGUE_FW_GROUP_DBG, 3),
+	  "0x%08x 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(5, ROGUE_FW_GROUP_DBG, 4),
+	  "0x%08x 0x%08x 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(6, ROGUE_FW_GROUP_DBG, 5),
+	  "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(7, ROGUE_FW_GROUP_DBG, 6),
+	  "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(8, ROGUE_FW_GROUP_DBG, 7),
+	  "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(9, ROGUE_FW_GROUP_DBG, 8),
+	  "0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x" },
+	{ ROGUE_FW_LOG_CREATESFID(10, ROGUE_FW_GROUP_DBG, 1),
+	  "%d" },
+	{ ROGUE_FW_LOG_CREATESFID(11, ROGUE_FW_GROUP_DBG, 2),
+	  "%d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(12, ROGUE_FW_GROUP_DBG, 3),
+	  "%d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(13, ROGUE_FW_GROUP_DBG, 4),
+	  "%d %d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(14, ROGUE_FW_GROUP_DBG, 5),
+	  "%d %d %d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(15, ROGUE_FW_GROUP_DBG, 6),
+	  "%d %d %d %d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(16, ROGUE_FW_GROUP_DBG, 7),
+	  "%d %d %d %d %d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(17, ROGUE_FW_GROUP_DBG, 8),
+	  "%d %d %d %d %d %d %d %d" },
+	{ ROGUE_FW_LOG_CREATESFID(18, ROGUE_FW_GROUP_DBG, 1),
+	  "%u" },
+	{ ROGUE_FW_LOG_CREATESFID(19, ROGUE_FW_GROUP_DBG, 2),
+	  "%u %u" },
+	{ ROGUE_FW_LOG_CREATESFID(20, ROGUE_FW_GROUP_DBG, 3),
+	  "%u %u %u" },
+	{ ROGUE_FW_LOG_CREATESFID(21, ROGUE_FW_GROUP_DBG, 4),
+	  "%u %u %u %u" },
+	{ ROGUE_FW_LOG_CREATESFID(22, ROGUE_FW_GROUP_DBG, 5),
+	  "%u %u %u %u %u" },
+	{ ROGUE_FW_LOG_CREATESFID(23, ROGUE_FW_GROUP_DBG, 6),
+	  "%u %u %u %u %u %u" },
+	{ ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_DBG, 7),
+	  "%u %u %u %u %u %u %u" },
+	{ ROGUE_FW_LOG_CREATESFID(25, ROGUE_FW_GROUP_DBG, 8),
+	  "%u %u %u %u %u %u %u %u" },
+
+	{ ROGUE_FW_LOG_CREATESFID(65535, ROGUE_FW_GROUP_NULL, 15),
+	  "You should not use this string" },
+};
+
+#define ROGUE_FW_SF_FIRST ROGUE_FW_LOG_CREATESFID(0, ROGUE_FW_GROUP_NULL, 0)
+#define ROGUE_FW_SF_MAIN_ASSERT_FAILED ROGUE_FW_LOG_CREATESFID(24, ROGUE_FW_GROUP_MAIN, 1)
+#define ROGUE_FW_SF_LAST ROGUE_FW_LOG_CREATESFID(65535, ROGUE_FW_GROUP_NULL, 15)
+
+#endif /* PVR_ROGUE_FWIF_SF_H */
-- 
GitLab


From cb56cd61086645e46cc54d1837de803b1c471df6 Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:40 +0000
Subject: [PATCH 0415/2340] drm/imagination: Add firmware trace to debugfs

Firmware trace is exposed at /sys/debug/dri/<dev_nr>/pvr_fw/trace_0.
Trace is enabled via the group mask at
/sys/debug/dri/<dev_nr>/pvr_params/fw_trace_mask.

Changes since v8:
- Corrected license identifiers

Changes since v3:
- Use drm_dev_{enter,exit}

Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Link: https://lore.kernel.org/r/009cf9fee347fa96c8a665dc368fc54a5ffceff0.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/Makefile       |   4 +
 drivers/gpu/drm/imagination/pvr_debugfs.c  |  53 +++
 drivers/gpu/drm/imagination/pvr_debugfs.h  |  29 ++
 drivers/gpu/drm/imagination/pvr_device.c   |   9 +
 drivers/gpu/drm/imagination/pvr_device.h   |  10 +
 drivers/gpu/drm/imagination/pvr_drv.c      |   4 +
 drivers/gpu/drm/imagination/pvr_fw_trace.c | 395 +++++++++++++++++++++
 drivers/gpu/drm/imagination/pvr_params.c   | 147 ++++++++
 drivers/gpu/drm/imagination/pvr_params.h   |  72 ++++
 9 files changed, 723 insertions(+)
 create mode 100644 drivers/gpu/drm/imagination/pvr_debugfs.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_debugfs.h
 create mode 100644 drivers/gpu/drm/imagination/pvr_params.c
 create mode 100644 drivers/gpu/drm/imagination/pvr_params.h

diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile
index 7e3515c99caa9..ec6db8e9b4037 100644
--- a/drivers/gpu/drm/imagination/Makefile
+++ b/drivers/gpu/drm/imagination/Makefile
@@ -20,6 +20,7 @@ powervr-y := \
 	pvr_hwrt.o \
 	pvr_job.o \
 	pvr_mmu.o \
+	pvr_params.o \
 	pvr_power.o \
 	pvr_queue.o \
 	pvr_stream.o \
@@ -28,4 +29,7 @@ powervr-y := \
 	pvr_vm.o \
 	pvr_vm_mips.o
 
+powervr-$(CONFIG_DEBUG_FS) += \
+	pvr_debugfs.o
+
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
diff --git a/drivers/gpu/drm/imagination/pvr_debugfs.c b/drivers/gpu/drm/imagination/pvr_debugfs.c
new file mode 100644
index 0000000000000..6b77c9b4bde88
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_debugfs.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_debugfs.h"
+
+#include "pvr_device.h"
+#include "pvr_fw_trace.h"
+#include "pvr_params.h"
+
+#include <linux/dcache.h>
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
+
+static const struct pvr_debugfs_entry pvr_debugfs_entries[] = {
+	{"pvr_params", pvr_params_debugfs_init},
+	{"pvr_fw", pvr_fw_trace_debugfs_init},
+};
+
+void
+pvr_debugfs_init(struct drm_minor *minor)
+{
+	struct drm_device *drm_dev = minor->dev;
+	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+	struct dentry *root = minor->debugfs_root;
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(pvr_debugfs_entries); ++i) {
+		const struct pvr_debugfs_entry *entry = &pvr_debugfs_entries[i];
+		struct dentry *dir;
+
+		dir = debugfs_create_dir(entry->name, root);
+		if (IS_ERR(dir)) {
+			drm_warn(drm_dev,
+				 "failed to create debugfs dir '%s' (err=%d)",
+				 entry->name, (int)PTR_ERR(dir));
+			continue;
+		}
+
+		entry->init(pvr_dev, dir);
+	}
+}
+
+/*
+ * Since all entries are created under &drm_minor->debugfs_root, there's no
+ * need for a pvr_debugfs_fini() as DRM will clean up everything under its root
+ * automatically.
+ */
diff --git a/drivers/gpu/drm/imagination/pvr_debugfs.h b/drivers/gpu/drm/imagination/pvr_debugfs.h
new file mode 100644
index 0000000000000..ebacbd13b84a0
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_debugfs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_DEBUGFS_H
+#define PVR_DEBUGFS_H
+
+/* Forward declaration from <drm/drm_drv.h>. */
+struct drm_minor;
+
+#if defined(CONFIG_DEBUG_FS)
+/* Forward declaration from "pvr_device.h". */
+struct pvr_device;
+
+/* Forward declaration from <linux/dcache.h>. */
+struct dentry;
+
+struct pvr_debugfs_entry {
+	const char *name;
+	void (*init)(struct pvr_device *pvr_dev, struct dentry *dir);
+};
+
+void pvr_debugfs_init(struct drm_minor *minor);
+#else /* defined(CONFIG_DEBUG_FS) */
+#include <linux/compiler_attributes.h>
+
+static __always_inline void pvr_debugfs_init(struct drm_minor *minor) {}
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+#endif /* PVR_DEBUGFS_H */
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 199f812d1ee71..8499becf4fbba 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -5,6 +5,7 @@
 #include "pvr_device_info.h"
 
 #include "pvr_fw.h"
+#include "pvr_params.h"
 #include "pvr_power.h"
 #include "pvr_queue.h"
 #include "pvr_rogue_cr_defs.h"
@@ -495,6 +496,14 @@ pvr_device_init(struct pvr_device *pvr_dev)
 	struct device *dev = drm_dev->dev;
 	int err;
 
+	/*
+	 * Setup device parameters. We do this first in case other steps
+	 * depend on them.
+	 */
+	err = pvr_device_params_init(&pvr_dev->params);
+	if (err)
+		return err;
+
 	/* Enable and initialize clocks required for the device to operate. */
 	err = pvr_device_clk_init(pvr_dev);
 	if (err)
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index b6e9a7e8072ee..e07655fc65e8c 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -7,6 +7,7 @@
 #include "pvr_ccb.h"
 #include "pvr_device_info.h"
 #include "pvr_fw.h"
+#include "pvr_params.h"
 #include "pvr_rogue_fwif_stream.h"
 #include "pvr_stream.h"
 
@@ -148,6 +149,15 @@ struct pvr_device {
 	/** @fw_dev: Firmware related data. */
 	struct pvr_fw_device fw_dev;
 
+	/**
+	 * @params: Device-specific parameters.
+	 *
+	 *          The values of these parameters are initialized from the
+	 *          defaults specified as module parameters. They may be
+	 *          modified at runtime via debugfs (if enabled).
+	 */
+	struct pvr_device_params params;
+
 	/** @stream_musthave_quirks: Bit array of "must-have" quirks for stream commands. */
 	u32 stream_musthave_quirks[PVR_STREAM_TYPE_MAX][PVR_STREAM_EXTHDR_TYPE_MAX];
 
diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c
index a6f1d52aaebb4..5c3b2d58d766b 100644
--- a/drivers/gpu/drm/imagination/pvr_drv.c
+++ b/drivers/gpu/drm/imagination/pvr_drv.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2023 Imagination Technologies Ltd. */
 
 #include "pvr_context.h"
+#include "pvr_debugfs.h"
 #include "pvr_device.h"
 #include "pvr_drv.h"
 #include "pvr_free_list.h"
@@ -1377,6 +1378,9 @@ static struct drm_driver pvr_drm_driver = {
 	.ioctls = pvr_drm_driver_ioctls,
 	.num_ioctls = ARRAY_SIZE(pvr_drm_driver_ioctls),
 	.fops = &pvr_drm_driver_fops,
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = pvr_debugfs_init,
+#endif
 
 	.name = PVR_DRIVER_NAME,
 	.desc = PVR_DRIVER_DESC,
diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c
index 0a8340369f1ac..87a42fb6ace68 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_trace.c
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c
@@ -4,8 +4,10 @@
 #include "pvr_device.h"
 #include "pvr_gem.h"
 #include "pvr_rogue_fwif.h"
+#include "pvr_rogue_fwif_sf.h"
 #include "pvr_fw_trace.h"
 
+#include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 
 #include <linux/build_bug.h>
@@ -118,3 +120,396 @@ void pvr_fw_trace_fini(struct pvr_device *pvr_dev)
 	}
 	pvr_fw_object_unmap_and_destroy(fw_trace->tracebuf_ctrl_obj);
 }
+
+/**
+ * update_logtype() - Send KCCB command to trigger FW to update logtype
+ * @pvr_dev: Target PowerVR device
+ * @group_mask: New log group mask.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * Any error returned by pvr_kccb_send_cmd(), or
+ *  * -%EIO if the device is lost.
+ */
+static int
+update_logtype(struct pvr_device *pvr_dev, u32 group_mask)
+{
+	struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace;
+	struct rogue_fwif_kccb_cmd cmd;
+	int idx;
+	int err;
+
+	if (group_mask)
+		fw_trace->tracebuf_ctrl->log_type = ROGUE_FWIF_LOG_TYPE_TRACE | group_mask;
+	else
+		fw_trace->tracebuf_ctrl->log_type = ROGUE_FWIF_LOG_TYPE_NONE;
+
+	fw_trace->group_mask = group_mask;
+
+	down_read(&pvr_dev->reset_sem);
+	if (!drm_dev_enter(from_pvr_device(pvr_dev), &idx)) {
+		err = -EIO;
+		goto err_up_read;
+	}
+
+	cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_LOGTYPE_UPDATE;
+	cmd.kccb_flags = 0;
+
+	err = pvr_kccb_send_cmd(pvr_dev, &cmd, NULL);
+
+	drm_dev_exit(idx);
+
+err_up_read:
+	up_read(&pvr_dev->reset_sem);
+
+	return err;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int fw_trace_group_mask_show(struct seq_file *m, void *data)
+{
+	struct pvr_device *pvr_dev = m->private;
+
+	seq_printf(m, "%08x\n", pvr_dev->fw_dev.fw_trace.group_mask);
+
+	return 0;
+}
+
+static int fw_trace_group_mask_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, fw_trace_group_mask_show, inode->i_private);
+}
+
+static ssize_t fw_trace_group_mask_write(struct file *file, const char __user *ubuf, size_t len,
+					 loff_t *offp)
+{
+	struct seq_file *m = file->private_data;
+	struct pvr_device *pvr_dev = m->private;
+	u32 new_group_mask;
+	int err;
+
+	err = kstrtouint_from_user(ubuf, len, 0, &new_group_mask);
+	if (err)
+		return err;
+
+	err = update_logtype(pvr_dev, new_group_mask);
+	if (err)
+		return err;
+
+	pvr_dev->fw_dev.fw_trace.group_mask = new_group_mask;
+
+	return (ssize_t)len;
+}
+
+static const struct file_operations pvr_fw_trace_group_mask_fops = {
+	.owner = THIS_MODULE,
+	.open = fw_trace_group_mask_open,
+	.read = seq_read,
+	.write = fw_trace_group_mask_write,
+	.llseek = default_llseek,
+	.release = single_release,
+};
+
+struct pvr_fw_trace_seq_data {
+	/** @buffer: Pointer to copy of trace data. */
+	u32 *buffer;
+
+	/** @start_offset: Starting offset in trace data, as reported by FW. */
+	u32 start_offset;
+
+	/** @idx: Current index into trace data. */
+	u32 idx;
+
+	/** @assert_buf: Trace assert buffer, as reported by FW. */
+	struct rogue_fwif_file_info_buf assert_buf;
+};
+
+static u32 find_sfid(u32 id)
+{
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(stid_fmts); i++) {
+		if (stid_fmts[i].id == id)
+			return i;
+	}
+
+	return ROGUE_FW_SF_LAST;
+}
+
+static u32 read_fw_trace(struct pvr_fw_trace_seq_data *trace_seq_data, u32 offset)
+{
+	u32 idx;
+
+	idx = trace_seq_data->idx + offset;
+	if (idx >= ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS)
+		return 0;
+
+	idx = (idx + trace_seq_data->start_offset) % ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS;
+	return trace_seq_data->buffer[idx];
+}
+
+/**
+ * fw_trace_get_next() - Advance trace index to next entry
+ * @trace_seq_data: Trace sequence data.
+ *
+ * Returns:
+ *  * %true if trace index is now pointing to a valid entry, or
+ *  * %false if trace index is pointing to an invalid entry, or has hit the end
+ *    of the trace.
+ */
+static bool fw_trace_get_next(struct pvr_fw_trace_seq_data *trace_seq_data)
+{
+	u32 id, sf_id;
+
+	while (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) {
+		id = read_fw_trace(trace_seq_data, 0);
+		trace_seq_data->idx++;
+		if (!ROGUE_FW_LOG_VALIDID(id))
+			continue;
+		if (id == ROGUE_FW_SF_MAIN_ASSERT_FAILED) {
+			/* Assertion failure marks the end of the trace. */
+			return false;
+		}
+
+		sf_id = find_sfid(id);
+		if (sf_id == ROGUE_FW_SF_FIRST)
+			continue;
+		if (sf_id == ROGUE_FW_SF_LAST) {
+			/*
+			 * Could not match with an ID in the SF table, trace is
+			 * most likely corrupt from this point.
+			 */
+			return false;
+		}
+
+		/* Skip over the timestamp, and any parameters. */
+		trace_seq_data->idx += 2 + ROGUE_FW_SF_PARAMNUM(id);
+
+		/* Ensure index is now pointing to a valid trace entry. */
+		id = read_fw_trace(trace_seq_data, 0);
+		if (!ROGUE_FW_LOG_VALIDID(id))
+			continue;
+
+		return true;
+	};
+
+	/* Hit end of trace data. */
+	return false;
+}
+
+/**
+ * fw_trace_get_first() - Find first valid entry in trace
+ * @trace_seq_data: Trace sequence data.
+ *
+ * Skips over invalid (usually zero) and ROGUE_FW_SF_FIRST entries.
+ *
+ * If the trace has no valid entries, this function will exit with the trace
+ * index pointing to the end of the trace. trace_seq_show() will return an error
+ * in this state.
+ */
+static void fw_trace_get_first(struct pvr_fw_trace_seq_data *trace_seq_data)
+{
+	trace_seq_data->idx = 0;
+
+	while (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) {
+		u32 id = read_fw_trace(trace_seq_data, 0);
+
+		if (ROGUE_FW_LOG_VALIDID(id)) {
+			u32 sf_id = find_sfid(id);
+
+			if (sf_id != ROGUE_FW_SF_FIRST)
+				break;
+		}
+		trace_seq_data->idx++;
+	}
+}
+
+static void *fw_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct pvr_fw_trace_seq_data *trace_seq_data = s->private;
+	u32 i;
+
+	/* Reset trace index, then advance to *pos. */
+	fw_trace_get_first(trace_seq_data);
+
+	for (i = 0; i < *pos; i++) {
+		if (!fw_trace_get_next(trace_seq_data))
+			return NULL;
+	}
+
+	return (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) ? pos : NULL;
+}
+
+static void *fw_trace_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct pvr_fw_trace_seq_data *trace_seq_data = s->private;
+
+	(*pos)++;
+	if (!fw_trace_get_next(trace_seq_data))
+		return NULL;
+
+	return (trace_seq_data->idx < ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS) ? pos : NULL;
+}
+
+static void fw_trace_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int fw_trace_seq_show(struct seq_file *s, void *v)
+{
+	struct pvr_fw_trace_seq_data *trace_seq_data = s->private;
+	u64 timestamp;
+	u32 id;
+	u32 sf_id;
+
+	if (trace_seq_data->idx >= ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS)
+		return -EINVAL;
+
+	id = read_fw_trace(trace_seq_data, 0);
+	/* Index is not pointing at a valid entry. */
+	if (!ROGUE_FW_LOG_VALIDID(id))
+		return -EINVAL;
+
+	sf_id = find_sfid(id);
+	/* Index is not pointing at a valid entry. */
+	if (sf_id == ROGUE_FW_SF_LAST)
+		return -EINVAL;
+
+	timestamp = read_fw_trace(trace_seq_data, 1) |
+		((u64)read_fw_trace(trace_seq_data, 2) << 32);
+	timestamp = (timestamp & ~ROGUE_FWT_TIMESTAMP_TIME_CLRMSK) >>
+		ROGUE_FWT_TIMESTAMP_TIME_SHIFT;
+
+	seq_printf(s, "[%llu] : ", timestamp);
+	if (id == ROGUE_FW_SF_MAIN_ASSERT_FAILED) {
+		seq_printf(s, "ASSERTION %s failed at %s:%u",
+			   trace_seq_data->assert_buf.info,
+			   trace_seq_data->assert_buf.path,
+			   trace_seq_data->assert_buf.line_num);
+	} else {
+		seq_printf(s, stid_fmts[sf_id].name,
+			   read_fw_trace(trace_seq_data, 3),
+			   read_fw_trace(trace_seq_data, 4),
+			   read_fw_trace(trace_seq_data, 5),
+			   read_fw_trace(trace_seq_data, 6),
+			   read_fw_trace(trace_seq_data, 7),
+			   read_fw_trace(trace_seq_data, 8),
+			   read_fw_trace(trace_seq_data, 9),
+			   read_fw_trace(trace_seq_data, 10),
+			   read_fw_trace(trace_seq_data, 11),
+			   read_fw_trace(trace_seq_data, 12),
+			   read_fw_trace(trace_seq_data, 13),
+			   read_fw_trace(trace_seq_data, 14),
+			   read_fw_trace(trace_seq_data, 15),
+			   read_fw_trace(trace_seq_data, 16),
+			   read_fw_trace(trace_seq_data, 17),
+			   read_fw_trace(trace_seq_data, 18),
+			   read_fw_trace(trace_seq_data, 19),
+			   read_fw_trace(trace_seq_data, 20),
+			   read_fw_trace(trace_seq_data, 21),
+			   read_fw_trace(trace_seq_data, 22));
+	}
+	seq_puts(s, "\n");
+	return 0;
+}
+
+static const struct seq_operations pvr_fw_trace_seq_ops = {
+	.start = fw_trace_seq_start,
+	.next = fw_trace_seq_next,
+	.stop = fw_trace_seq_stop,
+	.show = fw_trace_seq_show
+};
+
+static int fw_trace_open(struct inode *inode, struct file *file)
+{
+	struct pvr_fw_trace_buffer *trace_buffer = inode->i_private;
+	struct rogue_fwif_tracebuf_space *tracebuf_space =
+		trace_buffer->tracebuf_space;
+	struct pvr_fw_trace_seq_data *trace_seq_data;
+	int err;
+
+	trace_seq_data = kzalloc(sizeof(*trace_seq_data), GFP_KERNEL);
+	if (!trace_seq_data)
+		return -ENOMEM;
+
+	trace_seq_data->buffer = kcalloc(ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS,
+					 sizeof(*trace_seq_data->buffer), GFP_KERNEL);
+	if (!trace_seq_data->buffer) {
+		err = -ENOMEM;
+		goto err_free_data;
+	}
+
+	/*
+	 * Take a local copy of the trace buffer, as firmware may still be
+	 * writing to it. This will exist as long as this file is open.
+	 */
+	memcpy(trace_seq_data->buffer, trace_buffer->buf,
+	       ROGUE_FW_TRACE_BUF_DEFAULT_SIZE_IN_DWORDS * sizeof(u32));
+	trace_seq_data->start_offset = READ_ONCE(tracebuf_space->trace_pointer);
+	trace_seq_data->assert_buf = tracebuf_space->assert_buf;
+	fw_trace_get_first(trace_seq_data);
+
+	err = seq_open(file, &pvr_fw_trace_seq_ops);
+	if (err)
+		goto err_free_buffer;
+
+	((struct seq_file *)file->private_data)->private = trace_seq_data;
+
+	return 0;
+
+err_free_buffer:
+	kfree(trace_seq_data->buffer);
+
+err_free_data:
+	kfree(trace_seq_data);
+
+	return err;
+}
+
+static int fw_trace_release(struct inode *inode, struct file *file)
+{
+	struct pvr_fw_trace_seq_data *trace_seq_data =
+		((struct seq_file *)file->private_data)->private;
+
+	seq_release(inode, file);
+	kfree(trace_seq_data->buffer);
+	kfree(trace_seq_data);
+
+	return 0;
+}
+
+static const struct file_operations pvr_fw_trace_fops = {
+	.owner = THIS_MODULE,
+	.open = fw_trace_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = fw_trace_release,
+};
+
+void
+pvr_fw_trace_mask_update(struct pvr_device *pvr_dev, u32 old_mask, u32 new_mask)
+{
+	if (old_mask != new_mask)
+		update_logtype(pvr_dev, new_mask);
+}
+
+void
+pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir)
+{
+	struct pvr_fw_trace *fw_trace = &pvr_dev->fw_dev.fw_trace;
+	u32 thread_nr;
+
+	static_assert(ARRAY_SIZE(fw_trace->buffers) <= 10,
+		      "The filename buffer is only large enough for a single-digit thread count");
+
+	for (thread_nr = 0; thread_nr < ARRAY_SIZE(fw_trace->buffers); ++thread_nr) {
+		char filename[8];
+
+		snprintf(filename, ARRAY_SIZE(filename), "trace_%u", thread_nr);
+		debugfs_create_file(filename, 0400, dir,
+				    &fw_trace->buffers[thread_nr],
+				    &pvr_fw_trace_fops);
+	}
+}
+#endif
diff --git a/drivers/gpu/drm/imagination/pvr_params.c b/drivers/gpu/drm/imagination/pvr_params.c
new file mode 100644
index 0000000000000..b91759f362c57
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_params.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_params.h"
+
+#include <linux/cache.h>
+#include <linux/moduleparam.h>
+
+static struct pvr_device_params pvr_device_param_defaults __read_mostly = {
+#define X(type_, name_, value_, desc_, ...) .name_ = (value_),
+	PVR_DEVICE_PARAMS
+#undef X
+};
+
+#define PVR_DEVICE_PARAM_NAMED(name_, type_, desc_) \
+	module_param_named(name_, pvr_device_param_defaults.name_, type_, \
+			   0400);                                         \
+	MODULE_PARM_DESC(name_, desc_);
+
+/*
+ * This list of defines must contain every type specified in "pvr_params.h" as
+ * ``PVR_PARAM_TYPE_*_C``.
+ */
+#define PVR_PARAM_TYPE_X32_MODPARAM uint
+
+#define X(type_, name_, value_, desc_, ...) \
+	PVR_DEVICE_PARAM_NAMED(name_, PVR_PARAM_TYPE_##type_##_MODPARAM, desc_);
+PVR_DEVICE_PARAMS
+#undef X
+
+int
+pvr_device_params_init(struct pvr_device_params *params)
+{
+	/*
+	 * If heap-allocated parameters are added in the future (e.g.
+	 * modparam's charp type), they must be handled specially here (via
+	 * kstrdup() in the case of charp). Since that's not necessary yet,
+	 * a straight copy will do for now. This change will also require a
+	 * pvr_device_params_fini() function to free any heap-allocated copies.
+	 */
+
+	*params = pvr_device_param_defaults;
+
+	return 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+#include "pvr_device.h"
+
+#include <linux/dcache.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/stddef.h>
+
+/*
+ * This list of defines must contain every type specified in "pvr_params.h" as
+ * ``PVR_PARAM_TYPE_*_C``.
+ */
+#define PVR_PARAM_TYPE_X32_FMT "0x%08llx"
+
+#define X_SET(name_, mode_) X_SET_##mode_(name_)
+#define X_SET_DEF(name_, update_, mode_) X_SET_DEF_##mode_(name_, update_)
+
+#define X_SET_RO(name_) NULL
+#define X_SET_RW(name_) __pvr_device_param_##name_##set
+
+#define X_SET_DEF_RO(name_, update_)
+#define X_SET_DEF_RW(name_, update_)                                    \
+	static int                                                      \
+	X_SET_RW(name_)(void *data, u64 val)                            \
+	{                                                               \
+		struct pvr_device *pvr_dev = data;                      \
+		/* This is not just (update_) to suppress -Waddress. */ \
+		if ((void *)(update_) != NULL)                          \
+			(update_)(pvr_dev, pvr_dev->params.name_, val); \
+		pvr_dev->params.name_ = val;                            \
+		return 0;                                               \
+	}
+
+#define X(type_, name_, value_, desc_, mode_, update_)                     \
+	static int                                                         \
+	__pvr_device_param_##name_##_get(void *data, u64 *val)             \
+	{                                                                  \
+		struct pvr_device *pvr_dev = data;                         \
+		*val = pvr_dev->params.name_;                              \
+		return 0;                                                  \
+	}                                                                  \
+	X_SET_DEF(name_, update_, mode_)                                   \
+	static int                                                         \
+	__pvr_device_param_##name_##_open(struct inode *inode,             \
+					  struct file *file)               \
+	{                                                                  \
+		__simple_attr_check_format(PVR_PARAM_TYPE_##type_##_FMT,   \
+					   0ull);                          \
+		return simple_attr_open(inode, file,                       \
+					__pvr_device_param_##name_##_get,  \
+					X_SET(name_, mode_),               \
+					PVR_PARAM_TYPE_##type_##_FMT);     \
+	}
+PVR_DEVICE_PARAMS
+#undef X
+
+#undef X_SET
+#undef X_SET_RO
+#undef X_SET_RW
+#undef X_SET_DEF
+#undef X_SET_DEF_RO
+#undef X_SET_DEF_RW
+
+static struct {
+#define X(type_, name_, value_, desc_, mode_, update_) \
+	const struct file_operations name_;
+	PVR_DEVICE_PARAMS
+#undef X
+} pvr_device_param_debugfs_fops = {
+#define X(type_, name_, value_, desc_, mode_, update_)     \
+	.name_ = {                                         \
+		.owner = THIS_MODULE,                      \
+		.open = __pvr_device_param_##name_##_open, \
+		.release = simple_attr_release,            \
+		.read = simple_attr_read,                  \
+		.write = simple_attr_write,                \
+		.llseek = generic_file_llseek,             \
+	},
+	PVR_DEVICE_PARAMS
+#undef X
+};
+
+void
+pvr_params_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir)
+{
+#define X_MODE(mode_) X_MODE_##mode_
+#define X_MODE_RO 0400
+#define X_MODE_RW 0600
+
+#define X(type_, name_, value_, desc_, mode_, update_)             \
+	debugfs_create_file(#name_, X_MODE(mode_), dir, pvr_dev,   \
+			    &pvr_device_param_debugfs_fops.name_);
+	PVR_DEVICE_PARAMS
+#undef X
+
+#undef X_MODE
+#undef X_MODE_RO
+#undef X_MODE_RW
+}
+#endif
diff --git a/drivers/gpu/drm/imagination/pvr_params.h b/drivers/gpu/drm/imagination/pvr_params.h
new file mode 100644
index 0000000000000..5807915b456bf
--- /dev/null
+++ b/drivers/gpu/drm/imagination/pvr_params.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_PARAMS_H
+#define PVR_PARAMS_H
+
+#include "pvr_rogue_fwif.h"
+
+#include <linux/cache.h>
+#include <linux/compiler_attributes.h>
+
+/*
+ * This is the definitive list of types allowed in the definition of
+ * %PVR_DEVICE_PARAMS.
+ */
+#define PVR_PARAM_TYPE_X32_C u32
+
+/*
+ * This macro defines all device-specific parameters; that is parameters which
+ * are set independently per device.
+ *
+ * The X-macro accepts the following arguments. Arguments marked with [debugfs]
+ * are ignored when debugfs is disabled; values used for these arguments may
+ * safely be gated behind CONFIG_DEBUG_FS.
+ *
+ * @type_: The definitive list of allowed values is PVR_PARAM_TYPE_*_C.
+ * @name_: Name of the parameter. This is used both as the field name in C and
+ *         stringified as the parameter name.
+ * @value_: Initial/default value.
+ * @desc_: String literal used as help text to describe the usage of this
+ *         parameter.
+ * @mode_: [debugfs] One of {RO,RW}. The access mode of the debugfs entry for
+ *         this parameter.
+ * @update_: [debugfs] When debugfs support is enabled, parameters may be
+ *           updated at runtime. When this happens, this function will be
+ *           called to allow changes to propagate. The signature of this
+ *           function is:
+ *
+ *              void (*)(struct pvr_device *pvr_dev, T old_val, T new_val)
+ *
+ *           Where T is the C type associated with @type_.
+ *
+ *           If @mode_ does not allow write access, this function will never be
+ *           called. In this case, or if no update callback is required, you
+ *           should specify NULL for this argument.
+ */
+#define PVR_DEVICE_PARAMS                                                    \
+	X(X32, fw_trace_mask, ROGUE_FWIF_LOG_TYPE_NONE,                      \
+	  "Enable FW trace for the specified groups. Specifying 0 disables " \
+	  "all FW tracing.",                                                 \
+	  RW, pvr_fw_trace_mask_update)
+
+struct pvr_device_params {
+#define X(type_, name_, value_, desc_, ...) \
+	PVR_PARAM_TYPE_##type_##_C name_;
+	PVR_DEVICE_PARAMS
+#undef X
+};
+
+int pvr_device_params_init(struct pvr_device_params *params);
+
+#if defined(CONFIG_DEBUG_FS)
+/* Forward declaration from "pvr_device.h". */
+struct pvr_device;
+
+/* Forward declaration from <linux/dcache.h>. */
+struct dentry;
+
+void pvr_params_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir);
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+#endif /* PVR_PARAMS_H */
-- 
GitLab


From 815d8b0425ad1164e45953ac3d56a9f6f63792cc Mon Sep 17 00:00:00 2001
From: Sarah Walker <sarah.walker@imgtec.com>
Date: Wed, 22 Nov 2023 16:34:41 +0000
Subject: [PATCH 0416/2340] drm/imagination: Add driver documentation

Add documentation for the UAPI.

Changes since v5:
- Remove obsolete VM documentation

Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/76a7b18cfbe93066efcee3311ae795176ce7c65d.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 Documentation/gpu/drivers.rst           |   2 +
 Documentation/gpu/imagination/index.rst |  13 ++
 Documentation/gpu/imagination/uapi.rst  | 174 ++++++++++++++++++++++++
 MAINTAINERS                             |   1 +
 4 files changed, 190 insertions(+)
 create mode 100644 Documentation/gpu/imagination/index.rst
 create mode 100644 Documentation/gpu/imagination/uapi.rst

diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index 45a12e552091b..cc6535f5f28c3 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -3,9 +3,11 @@ GPU Driver Documentation
 ========================
 
 .. toctree::
+   :maxdepth: 3
 
    amdgpu/index
    i915
+   imagination/index
    mcde
    meson
    pl111
diff --git a/Documentation/gpu/imagination/index.rst b/Documentation/gpu/imagination/index.rst
new file mode 100644
index 0000000000000..dc9579e758c3e
--- /dev/null
+++ b/Documentation/gpu/imagination/index.rst
@@ -0,0 +1,13 @@
+=======================================
+drm/imagination PowerVR Graphics Driver
+=======================================
+
+.. kernel-doc:: drivers/gpu/drm/imagination/pvr_drv.c
+   :doc: PowerVR Graphics Driver
+
+Contents
+========
+.. toctree::
+   :maxdepth: 2
+
+   uapi
diff --git a/Documentation/gpu/imagination/uapi.rst b/Documentation/gpu/imagination/uapi.rst
new file mode 100644
index 0000000000000..2227ea7e6222c
--- /dev/null
+++ b/Documentation/gpu/imagination/uapi.rst
@@ -0,0 +1,174 @@
+====
+UAPI
+====
+The sources associated with this section can be found in ``pvr_drm.h``.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR UAPI
+
+OBJECT ARRAYS
+=============
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_obj_array
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: DRM_PVR_OBJ_ARRAY
+
+IOCTLS
+======
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: PVR_IOCTL
+
+DEV_QUERY
+---------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL DEV_QUERY interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_dev_query
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_dev_query_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_dev_query_gpu_info
+                 drm_pvr_dev_query_runtime_info
+                 drm_pvr_dev_query_hwrt_info
+                 drm_pvr_dev_query_quirks
+                 drm_pvr_dev_query_enhancements
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_heap_id
+                 drm_pvr_heap
+                 drm_pvr_dev_query_heap_info
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_static_data_area_usage
+                 drm_pvr_static_data_area
+                 drm_pvr_dev_query_static_data_areas
+
+CREATE_BO
+---------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_BO interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_bo_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for CREATE_BO
+
+GET_BO_MMAP_OFFSET
+------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL GET_BO_MMAP_OFFSET interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_get_bo_mmap_offset_args
+
+CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT
+----------------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_vm_context_args
+                 drm_pvr_ioctl_destroy_vm_context_args
+
+VM_MAP and VM_UNMAP
+-------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL VM_MAP and VM_UNMAP interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_vm_map_args
+                 drm_pvr_ioctl_vm_unmap_args
+
+CREATE_CONTEXT and DESTROY_CONTEXT
+----------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_CONTEXT and DESTROY_CONTEXT interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_context_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ctx_priority
+                 drm_pvr_ctx_type
+                 drm_pvr_static_render_context_state
+                 drm_pvr_static_render_context_state_format
+                 drm_pvr_reset_framework
+                 drm_pvr_reset_framework_format
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_destroy_context_args
+
+CREATE_FREE_LIST and DESTROY_FREE_LIST
+--------------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_FREE_LIST and DESTROY_FREE_LIST interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_free_list_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_destroy_free_list_args
+
+CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET
+--------------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_hwrt_dataset_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_create_hwrt_geom_data_args
+                 drm_pvr_create_hwrt_rt_data_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_destroy_hwrt_dataset_args
+
+SUBMIT_JOBS
+-----------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL SUBMIT_JOBS interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for the drm_pvr_sync_op object.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_submit_jobs_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl geometry command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl fragment command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl compute command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl transfer command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_sync_op
+                 drm_pvr_job_type
+                 drm_pvr_hwrt_data_ref
+                 drm_pvr_job
+
+Internal notes
+==============
+.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h
+   :doc: IOCTL validation helpers
+
+.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h
+   :identifiers: PVR_STATIC_ASSERT_64BIT_ALIGNED PVR_IOCTL_UNION_PADDING_CHECK
+                 pvr_ioctl_union_padding_check
diff --git a/MAINTAINERS b/MAINTAINERS
index debbc1956b8e8..a6d8a15a23dd8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10398,6 +10398,7 @@ M:	Donald Robson <donald.robson@imgtec.com>
 M:	Matt Coster <matt.coster@imgtec.com>
 S:	Supported
 F:	Documentation/devicetree/bindings/gpu/img,powervr.yaml
+F:	Documentation/gpu/imagination/
 F:	drivers/gpu/drm/imagination/
 F:	include/uapi/drm/pvr_drm.h
 
-- 
GitLab


From bc53c4d56eb24dbe56cd2c66ef4e9fc9393b1533 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:14 +0200
Subject: [PATCH 0417/2340] drm/i915: Check pipe active state in
 {planes,vrr}_{enabling,disabling}()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

{planes,vrr}_{enabling,disabling}() are supposed to indicate
whether the specific hardware feature is supposed to be enabling
or disabling. That can only makes sense if the pipe is active
overall. So check for that before we go poking at the hardware.

I think we're semi-safe currently on due to:
- intel_pre_plane_update() doesn't get called when the pipe
  was not-active prior to the commit, but this is actually a bug.
  This saves vrr_disabling(), and vrr_enabling() is called from
  deeper down where we have already checked hw.active.
- active_planes mirrors the crtc's hw.active

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-2-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index f8503d917c3d0..78d8e9e19cfb9 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -901,12 +901,18 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state)
 static bool planes_enabling(const struct intel_crtc_state *old_crtc_state,
 			    const struct intel_crtc_state *new_crtc_state)
 {
+	if (!new_crtc_state->hw.active)
+		return false;
+
 	return is_enabling(active_planes, old_crtc_state, new_crtc_state);
 }
 
 static bool planes_disabling(const struct intel_crtc_state *old_crtc_state,
 			     const struct intel_crtc_state *new_crtc_state)
 {
+	if (!old_crtc_state->hw.active)
+		return false;
+
 	return is_disabling(active_planes, old_crtc_state, new_crtc_state);
 }
 
@@ -923,6 +929,9 @@ static bool vrr_params_changed(const struct intel_crtc_state *old_crtc_state,
 static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state,
 			 const struct intel_crtc_state *new_crtc_state)
 {
+	if (!new_crtc_state->hw.active)
+		return false;
+
 	return is_enabling(vrr.enable, old_crtc_state, new_crtc_state) ||
 		(new_crtc_state->vrr.enable &&
 		 (new_crtc_state->update_m_n || new_crtc_state->update_lrr ||
@@ -932,6 +941,9 @@ static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state,
 static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state,
 			  const struct intel_crtc_state *new_crtc_state)
 {
+	if (!old_crtc_state->hw.active)
+		return false;
+
 	return is_disabling(vrr.enable, old_crtc_state, new_crtc_state) ||
 		(old_crtc_state->vrr.enable &&
 		 (new_crtc_state->update_m_n || new_crtc_state->update_lrr ||
-- 
GitLab


From e0d5ce11ed0a21bb2bf328ad82fd261783c7ad88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:15 +0200
Subject: [PATCH 0418/2340] drm/i915: Call intel_pre_plane_updates() also for
 pipes getting enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We used to call intel_pre_plane_updates() for any pipe going through
a modeset whether the pipe was previously enabled or not. This in
fact needed to apply all the necessary clock gating workarounds/etc.
Restore the correct behaviour.

Fixes: 39919997322f ("drm/i915: Disable all planes before modesetting any pipes")
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-3-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 78d8e9e19cfb9..6adc32e2f4774 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6694,10 +6694,11 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state)
 		if (!intel_crtc_needs_modeset(new_crtc_state))
 			continue;
 
+		intel_pre_plane_update(state, crtc);
+
 		if (!old_crtc_state->hw.active)
 			continue;
 
-		intel_pre_plane_update(state, crtc);
 		intel_crtc_disable_planes(state, crtc);
 	}
 
-- 
GitLab


From e4fb7f894ed48f6fb5b1ca61ade44a92c425444b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:16 +0200
Subject: [PATCH 0419/2340] drm/i915: Polish some RMWs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Doing the if-else around RMWs is kinda silly. Just set/clear the
apporiate bits with a single RMW.

Also unify the coding style a bit icl_wa_cursorclkgating() while at it.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-4-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 6adc32e2f4774..98ea3591665fd 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -193,12 +193,9 @@ static bool is_hdr_mode(const struct intel_crtc_state *crtc_state)
 static void
 skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable)
 {
-	if (enable)
-		intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe),
-			     0, DUPS1_GATING_DIS | DUPS2_GATING_DIS);
-	else
-		intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe),
-			     DUPS1_GATING_DIS | DUPS2_GATING_DIS, 0);
+	intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe),
+		     DUPS1_GATING_DIS | DUPS2_GATING_DIS,
+		     enable ? DUPS1_GATING_DIS | DUPS2_GATING_DIS : 0);
 }
 
 /* Wa_2006604312:icl,ehl */
@@ -206,10 +203,9 @@ static void
 icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe,
 		       bool enable)
 {
-	if (enable)
-		intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), 0, DPFR_GATING_DIS);
-	else
-		intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), DPFR_GATING_DIS, 0);
+	intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe),
+		     DPFR_GATING_DIS,
+		     enable ? DPFR_GATING_DIS : 0);
 }
 
 /* Wa_1604331009:icl,jsl,ehl */
@@ -217,7 +213,8 @@ static void
 icl_wa_cursorclkgating(struct drm_i915_private *dev_priv, enum pipe pipe,
 		       bool enable)
 {
-	intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe), CURSOR_GATING_DIS,
+	intel_de_rmw(dev_priv, CLKGATE_DIS_PSL(pipe),
+		     CURSOR_GATING_DIS,
 		     enable ? CURSOR_GATING_DIS : 0);
 }
 
-- 
GitLab


From 7966a93a27cfea1d9ceae3be1298be06184f5afe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:17 +0200
Subject: [PATCH 0420/2340] drm/i915: Push audio enable/disable further out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Push the audio enable/disable to be the last/first thing
respectively that is done in the encoder enable/disable hooks.
The goal is to move it further out of these encoder hooks entirely.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-5-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/g4x_dp.c       |  8 ++++----
 drivers/gpu/drm/i915/display/intel_ddi.c    | 12 ++++--------
 drivers/gpu/drm/i915/display/intel_dp_mst.c |  8 ++++----
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index e8ee0a08947e8..79ef2b435bebd 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -482,10 +482,10 @@ static void intel_disable_dp(struct intel_atomic_state *state,
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	intel_dp->link_trained = false;
-
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
 
+	intel_dp->link_trained = false;
+
 	/*
 	 * Make sure the panel is off before trying to change the mode.
 	 * But also ensure that we have vdd while we switch off the panel.
@@ -686,8 +686,8 @@ static void g4x_enable_dp(struct intel_atomic_state *state,
 			  const struct drm_connector_state *conn_state)
 {
 	intel_enable_dp(state, encoder, pipe_config, conn_state);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
+	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
 
 static void vlv_enable_dp(struct intel_atomic_state *state,
@@ -695,8 +695,8 @@ static void vlv_enable_dp(struct intel_atomic_state *state,
 			  const struct intel_crtc_state *pipe_config,
 			  const struct drm_connector_state *conn_state)
 {
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
+	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
 
 static void g4x_pre_enable_dp(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index f70af660dfcfa..ca7ca3951de78 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3201,8 +3201,6 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state,
 	if (!dig_port->lspcon.active || intel_dp_has_hdmi_sink(&dig_port->dp))
 		intel_dp_set_infoframes(encoder, true, crtc_state, conn_state);
 
-	intel_audio_codec_enable(encoder, crtc_state, conn_state);
-
 	trans_port_sync_stop_link_train(state, encoder, crtc_state);
 }
 
@@ -3333,8 +3331,6 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state,
 	intel_de_write(dev_priv, DDI_BUF_CTL(port), buf_ctl);
 
 	intel_wait_ddi_buf_active(dev_priv, port);
-
-	intel_audio_codec_enable(encoder, crtc_state, conn_state);
 }
 
 static void intel_enable_ddi(struct intel_atomic_state *state,
@@ -3362,6 +3358,8 @@ static void intel_enable_ddi(struct intel_atomic_state *state,
 		intel_enable_ddi_dp(state, encoder, crtc_state, conn_state);
 
 	intel_hdcp_enable(state, encoder, crtc_state, conn_state);
+
+	intel_audio_codec_enable(encoder, crtc_state, conn_state);
 }
 
 static void intel_disable_ddi_dp(struct intel_atomic_state *state,
@@ -3375,8 +3373,6 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state,
 
 	intel_dp->link_trained = false;
 
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
-
 	intel_psr_disable(intel_dp, old_crtc_state);
 	intel_edp_backlight_off(old_conn_state);
 	/* Disable the decompression in DP Sink */
@@ -3395,8 +3391,6 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state,
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct drm_connector *connector = old_conn_state->connector;
 
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
-
 	if (!intel_hdmi_handle_sink_scrambling(encoder, connector,
 					       false, false))
 		drm_dbg_kms(&i915->drm,
@@ -3409,6 +3403,8 @@ static void intel_disable_ddi(struct intel_atomic_state *state,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
+	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+
 	intel_tc_port_link_cancel_reset_work(enc_to_dig_port(encoder));
 
 	intel_hdcp_disable(to_intel_connector(old_conn_state->connector));
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index d3d53e1b44891..7f6533c99e2c0 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -918,10 +918,10 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 	drm_dbg_kms(&i915->drm, "active links %d\n",
 		    intel_dp->active_mst_links);
 
-	intel_hdcp_disable(intel_mst->connector);
-
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
 
+	intel_hdcp_disable(intel_mst->connector);
+
 	intel_dp_sink_disable_decompression(state, connector, old_crtc_state);
 }
 
@@ -1164,9 +1164,9 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 
 	intel_crtc_vblank_on(pipe_config);
 
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
-
 	intel_hdcp_enable(state, encoder, pipe_config, conn_state);
+
+	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
 
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
-- 
GitLab


From ceb53adad7e3cb4806d5fadcd583eade32a6b915 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:18 +0200
Subject: [PATCH 0421/2340] drm/i915: Wrap g4x+ DP/HDMI audio enable/disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Put a wrapper around the intel_audio_codec_{enable,disable}()
calls in the g4x+ DP/HDMI code. We shall move the presence
detect enable/disable into the wrappers later.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-6-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/g4x_dp.c   | 26 ++++++++++++--
 drivers/gpu/drm/i915/display/g4x_hdmi.c | 48 +++++++++++++++----------
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index 79ef2b435bebd..ecc2ec8664245 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -475,6 +475,26 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	}
 }
 
+static void g4x_dp_audio_enable(struct intel_encoder *encoder,
+				const struct intel_crtc_state *crtc_state,
+				const struct drm_connector_state *conn_state)
+{
+	if (!crtc_state->has_audio)
+		return;
+
+	intel_audio_codec_enable(encoder, crtc_state, conn_state);
+}
+
+static void g4x_dp_audio_disable(struct intel_encoder *encoder,
+				 const struct intel_crtc_state *old_crtc_state,
+				 const struct drm_connector_state *old_conn_state)
+{
+	if (!old_crtc_state->has_audio)
+		return;
+
+	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+}
+
 static void intel_disable_dp(struct intel_atomic_state *state,
 			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
@@ -482,7 +502,7 @@ static void intel_disable_dp(struct intel_atomic_state *state,
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+	g4x_dp_audio_disable(encoder, old_crtc_state, old_conn_state);
 
 	intel_dp->link_trained = false;
 
@@ -687,7 +707,7 @@ static void g4x_enable_dp(struct intel_atomic_state *state,
 {
 	intel_enable_dp(state, encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	g4x_dp_audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void vlv_enable_dp(struct intel_atomic_state *state,
@@ -696,7 +716,7 @@ static void vlv_enable_dp(struct intel_atomic_state *state,
 			  const struct drm_connector_state *conn_state)
 {
 	intel_edp_backlight_on(pipe_config, conn_state);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	g4x_dp_audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void g4x_pre_enable_dp(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index 45e044b4a88db..9c70245d8b0a4 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -235,18 +235,38 @@ static void g4x_hdmi_enable_port(struct intel_encoder *encoder,
 	intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
 }
 
+static void g4x_hdmi_audio_enable(struct intel_encoder *encoder,
+				  const struct intel_crtc_state *crtc_state,
+				  const struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+
+	if (!crtc_state->has_audio)
+		return;
+
+	drm_WARN_ON(&i915->drm, !crtc_state->has_hdmi_sink);
+
+	intel_audio_codec_enable(encoder, crtc_state, conn_state);
+}
+
+static void g4x_hdmi_audio_disable(struct intel_encoder *encoder,
+				   const struct intel_crtc_state *old_crtc_state,
+				   const struct drm_connector_state *old_conn_state)
+{
+	if (!old_crtc_state->has_audio)
+		return;
+
+	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+}
+
 static void g4x_enable_hdmi(struct intel_atomic_state *state,
 			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
 	g4x_hdmi_enable_port(encoder, pipe_config);
 
-	drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio &&
-		    !pipe_config->has_hdmi_sink);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void ibx_enable_hdmi(struct intel_atomic_state *state,
@@ -297,9 +317,7 @@ static void ibx_enable_hdmi(struct intel_atomic_state *state,
 		intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
 	}
 
-	drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio &&
-		    !pipe_config->has_hdmi_sink);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void cpt_enable_hdmi(struct intel_atomic_state *state,
@@ -352,9 +370,7 @@ static void cpt_enable_hdmi(struct intel_atomic_state *state,
 			     TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE, 0);
 	}
 
-	drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio &&
-		    !pipe_config->has_hdmi_sink);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void vlv_enable_hdmi(struct intel_atomic_state *state,
@@ -362,11 +378,7 @@ static void vlv_enable_hdmi(struct intel_atomic_state *state,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
-	drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio &&
-		    !pipe_config->has_hdmi_sink);
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void intel_disable_hdmi(struct intel_atomic_state *state,
@@ -433,7 +445,7 @@ static void g4x_disable_hdmi(struct intel_atomic_state *state,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+	g4x_hdmi_audio_disable(encoder, old_crtc_state, old_conn_state);
 
 	intel_disable_hdmi(state, encoder, old_crtc_state, old_conn_state);
 }
@@ -443,7 +455,7 @@ static void pch_disable_hdmi(struct intel_atomic_state *state,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+	g4x_hdmi_audio_disable(encoder, old_crtc_state, old_conn_state);
 }
 
 static void pch_post_disable_hdmi(struct intel_atomic_state *state,
-- 
GitLab


From 0195e381b14fc8b16f359cbf45193bcdaaf5cd27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:19 +0200
Subject: [PATCH 0422/2340] drm/i915: Split g4x+ DP audio presence detect from
 port enable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow the hsw+ approach toggle the audio presence detect
when we set up the ELD, instead of doing it when turning the
port on/off.

This will facilitate audio enable/disable to happen during
fastsets instead of requiring a full modeset.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-7-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/g4x_dp.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index ecc2ec8664245..266cb594d7939 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -432,7 +432,7 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
 	intel_de_posting_read(dev_priv, intel_dp->output_reg);
 
-	intel_dp->DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE);
+	intel_dp->DP &= ~DP_PORT_EN;
 	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
 	intel_de_posting_read(dev_priv, intel_dp->output_reg);
 
@@ -479,9 +479,16 @@ static void g4x_dp_audio_enable(struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state,
 				const struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
 	if (!crtc_state->has_audio)
 		return;
 
+	/* Enable audio presence detect */
+	intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
+	intel_de_write(i915, intel_dp->output_reg, intel_dp->DP);
+
 	intel_audio_codec_enable(encoder, crtc_state, conn_state);
 }
 
@@ -489,10 +496,17 @@ static void g4x_dp_audio_disable(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
 	if (!old_crtc_state->has_audio)
 		return;
 
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+
+	/* Disable audio presence detect */
+	intel_dp->DP &= ~DP_AUDIO_OUTPUT_ENABLE;
+	intel_de_write(i915, intel_dp->output_reg, intel_dp->DP);
 }
 
 static void intel_disable_dp(struct intel_atomic_state *state,
@@ -651,8 +665,6 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp,
 	 * fail when the power sequencer is freshly used for this port.
 	 */
 	intel_dp->DP |= DP_PORT_EN;
-	if (crtc_state->has_audio)
-		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
 
 	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
 	intel_de_posting_read(dev_priv, intel_dp->output_reg);
-- 
GitLab


From 4645e8980479a0cbfa99bdd07c562cec1597e9cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:20 +0200
Subject: [PATCH 0423/2340] drm/i915: Split g4x+ HDMI audio presence detect
 from port enable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow the hsw+ approach toggle the audio presence detect
when we set up the ELD, instead of doing it when turning the
port on/off.

This will facilitate audio enable/disable to happen during
fastsets instead of requiring a full modeset.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-8-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/g4x_hdmi.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index 9c70245d8b0a4..beda6b480bf1c 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -228,8 +228,6 @@ static void g4x_hdmi_enable_port(struct intel_encoder *encoder,
 	temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg);
 
 	temp |= SDVO_ENABLE;
-	if (pipe_config->has_audio)
-		temp |= HDMI_AUDIO_ENABLE;
 
 	intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp);
 	intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
@@ -240,12 +238,16 @@ static void g4x_hdmi_audio_enable(struct intel_encoder *encoder,
 				  const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder);
 
 	if (!crtc_state->has_audio)
 		return;
 
 	drm_WARN_ON(&i915->drm, !crtc_state->has_hdmi_sink);
 
+	/* Enable audio presence detect */
+	intel_de_rmw(i915, hdmi->hdmi_reg, 0, HDMI_AUDIO_ENABLE);
+
 	intel_audio_codec_enable(encoder, crtc_state, conn_state);
 }
 
@@ -253,10 +255,16 @@ static void g4x_hdmi_audio_disable(struct intel_encoder *encoder,
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_hdmi *hdmi = enc_to_intel_hdmi(encoder);
+
 	if (!old_crtc_state->has_audio)
 		return;
 
 	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+
+	/* Disable audio presence detect */
+	intel_de_rmw(i915, hdmi->hdmi_reg, HDMI_AUDIO_ENABLE, 0);
 }
 
 static void g4x_enable_hdmi(struct intel_atomic_state *state,
@@ -282,8 +290,6 @@ static void ibx_enable_hdmi(struct intel_atomic_state *state,
 	temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg);
 
 	temp |= SDVO_ENABLE;
-	if (pipe_config->has_audio)
-		temp |= HDMI_AUDIO_ENABLE;
 
 	/*
 	 * HW workaround, need to write this twice for issue
@@ -335,8 +341,6 @@ static void cpt_enable_hdmi(struct intel_atomic_state *state,
 	temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg);
 
 	temp |= SDVO_ENABLE;
-	if (pipe_config->has_audio)
-		temp |= HDMI_AUDIO_ENABLE;
 
 	/*
 	 * WaEnableHDMI8bpcBefore12bpc:snb,ivb
@@ -396,7 +400,7 @@ static void intel_disable_hdmi(struct intel_atomic_state *state,
 
 	temp = intel_de_read(dev_priv, intel_hdmi->hdmi_reg);
 
-	temp &= ~(SDVO_ENABLE | HDMI_AUDIO_ENABLE);
+	temp &= ~SDVO_ENABLE;
 	intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp);
 	intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
 
-- 
GitLab


From 3654a48ab16c243519c40849a61b617828a4a61e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:21 +0200
Subject: [PATCH 0424/2340] drm/i915: Convert audio enable/disable into encoder
 vfuncs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add encoder vfuncs for audio enable/disable. This will allow
audio to be enabled/disabled during fastsets. An encoder hook
is necessary as on pre-hsw platforms different encoder types
implement audio in different ways.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-9-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/g4x_dp.c         |  8 ++++---
 drivers/gpu/drm/i915/display/g4x_hdmi.c       | 14 ++++++-----
 drivers/gpu/drm/i915/display/intel_ddi.c      |  6 +++--
 .../drm/i915/display/intel_display_types.h    |  6 +++++
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  6 +++--
 drivers/gpu/drm/i915/display/intel_sdvo.c     | 23 ++++++++++++++-----
 6 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index 266cb594d7939..96232af42db23 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -516,7 +516,7 @@ static void intel_disable_dp(struct intel_atomic_state *state,
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	g4x_dp_audio_disable(encoder, old_crtc_state, old_conn_state);
+	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
 
 	intel_dp->link_trained = false;
 
@@ -719,7 +719,7 @@ static void g4x_enable_dp(struct intel_atomic_state *state,
 {
 	intel_enable_dp(state, encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
-	g4x_dp_audio_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void vlv_enable_dp(struct intel_atomic_state *state,
@@ -728,7 +728,7 @@ static void vlv_enable_dp(struct intel_atomic_state *state,
 			  const struct drm_connector_state *conn_state)
 {
 	intel_edp_backlight_on(pipe_config, conn_state);
-	g4x_dp_audio_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void g4x_pre_enable_dp(struct intel_atomic_state *state,
@@ -1357,6 +1357,8 @@ bool g4x_dp_init(struct drm_i915_private *dev_priv,
 		intel_encoder->disable = g4x_disable_dp;
 		intel_encoder->post_disable = g4x_post_disable_dp;
 	}
+	intel_encoder->audio_enable = g4x_dp_audio_enable;
+	intel_encoder->audio_disable = g4x_dp_audio_disable;
 
 	if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) ||
 	    (HAS_PCH_CPT(dev_priv) && port != PORT_A))
diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index beda6b480bf1c..26a0981102ffd 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -274,7 +274,7 @@ static void g4x_enable_hdmi(struct intel_atomic_state *state,
 {
 	g4x_hdmi_enable_port(encoder, pipe_config);
 
-	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void ibx_enable_hdmi(struct intel_atomic_state *state,
@@ -323,7 +323,7 @@ static void ibx_enable_hdmi(struct intel_atomic_state *state,
 		intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
 	}
 
-	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void cpt_enable_hdmi(struct intel_atomic_state *state,
@@ -374,7 +374,7 @@ static void cpt_enable_hdmi(struct intel_atomic_state *state,
 			     TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE, 0);
 	}
 
-	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void vlv_enable_hdmi(struct intel_atomic_state *state,
@@ -382,7 +382,7 @@ static void vlv_enable_hdmi(struct intel_atomic_state *state,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
-	g4x_hdmi_audio_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void intel_disable_hdmi(struct intel_atomic_state *state,
@@ -449,7 +449,7 @@ static void g4x_disable_hdmi(struct intel_atomic_state *state,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	g4x_hdmi_audio_disable(encoder, old_crtc_state, old_conn_state);
+	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
 
 	intel_disable_hdmi(state, encoder, old_crtc_state, old_conn_state);
 }
@@ -459,7 +459,7 @@ static void pch_disable_hdmi(struct intel_atomic_state *state,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	g4x_hdmi_audio_disable(encoder, old_crtc_state, old_conn_state);
+	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
 }
 
 static void pch_post_disable_hdmi(struct intel_atomic_state *state,
@@ -766,6 +766,8 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv,
 		else
 			intel_encoder->enable = g4x_enable_hdmi;
 	}
+	intel_encoder->audio_enable = g4x_hdmi_audio_enable;
+	intel_encoder->audio_disable = g4x_hdmi_audio_disable;
 	intel_encoder->shutdown = intel_hdmi_encoder_shutdown;
 
 	intel_encoder->type = INTEL_OUTPUT_HDMI;
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index ca7ca3951de78..06558fd8d1f18 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3359,7 +3359,7 @@ static void intel_enable_ddi(struct intel_atomic_state *state,
 
 	intel_hdcp_enable(state, encoder, crtc_state, conn_state);
 
-	intel_audio_codec_enable(encoder, crtc_state, conn_state);
+	encoder->audio_enable(encoder, crtc_state, conn_state);
 }
 
 static void intel_disable_ddi_dp(struct intel_atomic_state *state,
@@ -3403,7 +3403,7 @@ static void intel_disable_ddi(struct intel_atomic_state *state,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
 
 	intel_tc_port_link_cancel_reset_work(enc_to_dig_port(encoder));
 
@@ -4936,6 +4936,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv,
 	encoder->post_pll_disable = intel_ddi_post_pll_disable;
 	encoder->post_disable = intel_ddi_post_disable;
 	encoder->update_pipe = intel_ddi_update_pipe;
+	encoder->audio_enable = intel_audio_codec_enable;
+	encoder->audio_disable = intel_audio_codec_disable;
 	encoder->get_hw_state = intel_ddi_get_hw_state;
 	encoder->sync_state = intel_ddi_sync_state;
 	encoder->initial_fastset_check = intel_ddi_initial_fastset_check;
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 9a44350ba05dd..b3e942f2eeb0f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -198,6 +198,12 @@ struct intel_encoder {
 			    struct intel_encoder *,
 			    const struct intel_crtc_state *,
 			    const struct drm_connector_state *);
+	void (*audio_enable)(struct intel_encoder *encoder,
+			     const struct intel_crtc_state *crtc_state,
+			     const struct drm_connector_state *conn_state);
+	void (*audio_disable)(struct intel_encoder *encoder,
+			      const struct intel_crtc_state *old_crtc_state,
+			      const struct drm_connector_state *old_conn_state);
 	/* Read out the current hw state of this connector, returning true if
 	 * the encoder is active. If the encoder is enabled it also set the pipe
 	 * it is connected to in the pipe parameter. */
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 7f6533c99e2c0..ebd72c56b124e 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -918,7 +918,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 	drm_dbg_kms(&i915->drm, "active links %d\n",
 		    intel_dp->active_mst_links);
 
-	intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
+	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
 
 	intel_hdcp_disable(intel_mst->connector);
 
@@ -1166,7 +1166,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 
 	intel_hdcp_enable(state, encoder, pipe_config, conn_state);
 
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
@@ -1626,6 +1626,8 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe
 	intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp;
 	intel_encoder->pre_enable = intel_mst_pre_enable_dp;
 	intel_encoder->enable = intel_mst_enable_dp;
+	intel_encoder->audio_enable = intel_audio_codec_enable;
+	intel_encoder->audio_disable = intel_audio_codec_disable;
 	intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state;
 	intel_encoder->get_config = intel_dp_mst_enc_get_config;
 	intel_encoder->initial_fastset_check = intel_dp_mst_initial_fastset_check;
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index a636f42ceae55..c7af7e0461885 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1787,17 +1787,28 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
 	intel_sdvo_get_eld(intel_sdvo, pipe_config);
 }
 
-static void intel_sdvo_disable_audio(struct intel_sdvo *intel_sdvo)
+static void intel_sdvo_disable_audio(struct intel_encoder *encoder,
+				     const struct intel_crtc_state *old_crtc_state,
+				     const struct drm_connector_state *old_conn_state)
 {
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
+
+	if (!old_crtc_state->has_audio)
+		return;
+
 	intel_sdvo_set_audio_state(intel_sdvo, 0);
 }
 
-static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo,
+static void intel_sdvo_enable_audio(struct intel_encoder *encoder,
 				    const struct intel_crtc_state *crtc_state,
 				    const struct drm_connector_state *conn_state)
 {
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	const u8 *eld = crtc_state->eld;
 
+	if (!crtc_state->has_audio)
+		return;
+
 	intel_sdvo_set_audio_state(intel_sdvo, 0);
 
 	intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_ELD,
@@ -1818,8 +1829,7 @@ static void intel_disable_sdvo(struct intel_atomic_state *state,
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	u32 temp;
 
-	if (old_crtc_state->has_audio)
-		intel_sdvo_disable_audio(intel_sdvo);
+	encoder->audio_disable(encoder, old_crtc_state, conn_state);
 
 	intel_sdvo_set_active_outputs(intel_sdvo, 0);
 	if (0)
@@ -1913,8 +1923,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state,
 						   DRM_MODE_DPMS_ON);
 	intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag);
 
-	if (pipe_config->has_audio)
-		intel_sdvo_enable_audio(intel_sdvo, pipe_config, conn_state);
+	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static enum drm_mode_status
@@ -3390,6 +3399,8 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv,
 	}
 	intel_encoder->pre_enable = intel_sdvo_pre_enable;
 	intel_encoder->enable = intel_enable_sdvo;
+	intel_encoder->audio_enable = intel_sdvo_enable_audio;
+	intel_encoder->audio_disable = intel_sdvo_disable_audio;
 	intel_encoder->get_hw_state = intel_sdvo_get_hw_state;
 	intel_encoder->get_config = intel_sdvo_get_config;
 
-- 
GitLab


From cff742cc6851f469ae1192877a308884a6439005 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:22 +0200
Subject: [PATCH 0425/2340] drm/i915: Hoist the
 encoder->audio_{enable,disable}() calls higher up
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Push the encoder->audio_{enable,disable}() calls out from the
encoder->{enable,disable}() hooks. Moving towards audio fastset.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-10-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/g4x_dp.c        |  2 -
 drivers/gpu/drm/i915/display/g4x_hdmi.c      | 10 ----
 drivers/gpu/drm/i915/display/intel_ddi.c     |  3 --
 drivers/gpu/drm/i915/display/intel_display.c | 49 ++++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_dp_mst.c  |  4 --
 5 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index 96232af42db23..dfe0b07a122d1 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -516,8 +516,6 @@ static void intel_disable_dp(struct intel_atomic_state *state,
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
-	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
-
 	intel_dp->link_trained = false;
 
 	/*
diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index 26a0981102ffd..8096492b3fade 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -273,8 +273,6 @@ static void g4x_enable_hdmi(struct intel_atomic_state *state,
 			    const struct drm_connector_state *conn_state)
 {
 	g4x_hdmi_enable_port(encoder, pipe_config);
-
-	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void ibx_enable_hdmi(struct intel_atomic_state *state,
@@ -322,8 +320,6 @@ static void ibx_enable_hdmi(struct intel_atomic_state *state,
 		intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp);
 		intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
 	}
-
-	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void cpt_enable_hdmi(struct intel_atomic_state *state,
@@ -373,8 +369,6 @@ static void cpt_enable_hdmi(struct intel_atomic_state *state,
 		intel_de_rmw(dev_priv, TRANS_CHICKEN1(pipe),
 			     TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE, 0);
 	}
-
-	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void vlv_enable_hdmi(struct intel_atomic_state *state,
@@ -382,7 +376,6 @@ static void vlv_enable_hdmi(struct intel_atomic_state *state,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
-	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static void intel_disable_hdmi(struct intel_atomic_state *state,
@@ -449,8 +442,6 @@ static void g4x_disable_hdmi(struct intel_atomic_state *state,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
-
 	intel_disable_hdmi(state, encoder, old_crtc_state, old_conn_state);
 }
 
@@ -459,7 +450,6 @@ static void pch_disable_hdmi(struct intel_atomic_state *state,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
-	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
 }
 
 static void pch_post_disable_hdmi(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 06558fd8d1f18..38f28c480b38b 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3359,7 +3359,6 @@ static void intel_enable_ddi(struct intel_atomic_state *state,
 
 	intel_hdcp_enable(state, encoder, crtc_state, conn_state);
 
-	encoder->audio_enable(encoder, crtc_state, conn_state);
 }
 
 static void intel_disable_ddi_dp(struct intel_atomic_state *state,
@@ -3403,8 +3402,6 @@ static void intel_disable_ddi(struct intel_atomic_state *state,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
-	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
-
 	intel_tc_port_link_cancel_reset_work(enc_to_dig_port(encoder));
 
 	intel_hdcp_disable(to_intel_connector(old_conn_state->connector));
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 98ea3591665fd..61594690b1a94 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -888,6 +888,48 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state)
 		(DISPLAY_VER(i915) == 9 || IS_BROADWELL(i915) || IS_HASWELL(i915));
 }
 
+static void intel_encoders_audio_enable(struct intel_atomic_state *state,
+					struct intel_crtc *crtc)
+{
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct drm_connector_state *conn_state;
+	struct drm_connector *conn;
+	int i;
+
+	for_each_new_connector_in_state(&state->base, conn, conn_state, i) {
+		struct intel_encoder *encoder =
+			to_intel_encoder(conn_state->best_encoder);
+
+		if (conn_state->crtc != &crtc->base)
+			continue;
+
+		if (encoder->audio_enable)
+			encoder->audio_enable(encoder, crtc_state, conn_state);
+	}
+}
+
+static void intel_encoders_audio_disable(struct intel_atomic_state *state,
+					 struct intel_crtc *crtc)
+{
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct drm_connector_state *old_conn_state;
+	struct drm_connector *conn;
+	int i;
+
+	for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) {
+		struct intel_encoder *encoder =
+			to_intel_encoder(old_conn_state->best_encoder);
+
+		if (old_conn_state->crtc != &crtc->base)
+			continue;
+
+		if (encoder->audio_disable)
+			encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
+	}
+}
+
 #define is_enabling(feature, old_crtc_state, new_crtc_state) \
 	((!(old_crtc_state)->feature || intel_crtc_needs_modeset(new_crtc_state)) && \
 	 (new_crtc_state)->feature)
@@ -1460,6 +1502,7 @@ static void ilk_crtc_enable(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
+	intel_encoders_audio_enable(state, crtc);
 
 	if (HAS_PCH_CPT(dev_priv))
 		intel_wait_for_pipe_scanline_moving(crtc);
@@ -1633,6 +1676,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state,
 		intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
+	intel_encoders_audio_enable(state, crtc);
 
 	if (psl_clkgate_wa) {
 		intel_crtc_wait_for_next_vblank(crtc);
@@ -1684,6 +1728,7 @@ static void ilk_crtc_disable(struct intel_atomic_state *state,
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
 
+	intel_encoders_audio_disable(state, crtc);
 	intel_encoders_disable(state, crtc);
 
 	intel_crtc_vblank_off(old_crtc_state);
@@ -1718,6 +1763,7 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
 	 * Need care with mst->ddi interactions.
 	 */
 	if (!intel_crtc_is_bigjoiner_slave(old_crtc_state)) {
+		intel_encoders_audio_disable(state, crtc);
 		intel_encoders_disable(state, crtc);
 		intel_encoders_post_disable(state, crtc);
 	}
@@ -1987,6 +2033,7 @@ static void valleyview_crtc_enable(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
+	intel_encoders_audio_enable(state, crtc);
 }
 
 static void i9xx_crtc_enable(struct intel_atomic_state *state,
@@ -2028,6 +2075,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
+	intel_encoders_audio_enable(state, crtc);
 
 	/* prevents spurious underruns */
 	if (DISPLAY_VER(dev_priv) == 2)
@@ -2064,6 +2112,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state *state,
 	if (DISPLAY_VER(dev_priv) == 2)
 		intel_crtc_wait_for_next_vblank(crtc);
 
+	intel_encoders_audio_disable(state, crtc);
 	intel_encoders_disable(state, crtc);
 
 	intel_crtc_vblank_off(old_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index ebd72c56b124e..63364c9602efb 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -918,8 +918,6 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 	drm_dbg_kms(&i915->drm, "active links %d\n",
 		    intel_dp->active_mst_links);
 
-	encoder->audio_disable(encoder, old_crtc_state, old_conn_state);
-
 	intel_hdcp_disable(intel_mst->connector);
 
 	intel_dp_sink_disable_decompression(state, connector, old_crtc_state);
@@ -1165,8 +1163,6 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(pipe_config);
 
 	intel_hdcp_enable(state, encoder, pipe_config, conn_state);
-
-	encoder->audio_enable(encoder, pipe_config, conn_state);
 }
 
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
-- 
GitLab


From 109e1e898abd2c68ceb02058c56db7cf6b9c18d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:23 +0200
Subject: [PATCH 0426/2340] drm/i915: Push audio_{enable,disable}() to the
 pre/post pane update stage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Relocate the audio enable/disable from the full modeset hooks into
the common pre/post plane update stage of the commit. Audio fastset
is within easy reach now.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-11-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 31 +++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 61594690b1a94..e923ff634d109 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -989,6 +989,24 @@ static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state,
 		  vrr_params_changed(old_crtc_state, new_crtc_state)));
 }
 
+static bool audio_enabling(const struct intel_crtc_state *old_crtc_state,
+			   const struct intel_crtc_state *new_crtc_state)
+{
+	if (!new_crtc_state->hw.active)
+		return false;
+
+	return is_enabling(has_audio, old_crtc_state, new_crtc_state);
+}
+
+static bool audio_disabling(const struct intel_crtc_state *old_crtc_state,
+			    const struct intel_crtc_state *new_crtc_state)
+{
+	if (!old_crtc_state->hw.active)
+		return false;
+
+	return is_disabling(has_audio, old_crtc_state, new_crtc_state);
+}
+
 #undef is_disabling
 #undef is_enabling
 
@@ -1029,6 +1047,9 @@ static void intel_post_plane_update(struct intel_atomic_state *state,
 
 	if (intel_crtc_needs_color_update(new_crtc_state))
 		intel_color_post_update(new_crtc_state);
+
+	if (audio_enabling(old_crtc_state, new_crtc_state))
+		intel_encoders_audio_enable(state, crtc);
 }
 
 static void intel_crtc_enable_flip_done(struct intel_atomic_state *state,
@@ -1112,6 +1133,9 @@ static void intel_pre_plane_update(struct intel_atomic_state *state,
 		intel_crtc_update_active_timings(old_crtc_state, false);
 	}
 
+	if (audio_disabling(old_crtc_state, new_crtc_state))
+		intel_encoders_audio_disable(state, crtc);
+
 	intel_drrs_deactivate(old_crtc_state);
 
 	intel_psr_pre_plane_update(state, crtc);
@@ -1502,7 +1526,6 @@ static void ilk_crtc_enable(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
-	intel_encoders_audio_enable(state, crtc);
 
 	if (HAS_PCH_CPT(dev_priv))
 		intel_wait_for_pipe_scanline_moving(crtc);
@@ -1676,7 +1699,6 @@ static void hsw_crtc_enable(struct intel_atomic_state *state,
 		intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
-	intel_encoders_audio_enable(state, crtc);
 
 	if (psl_clkgate_wa) {
 		intel_crtc_wait_for_next_vblank(crtc);
@@ -1728,7 +1750,6 @@ static void ilk_crtc_disable(struct intel_atomic_state *state,
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
 
-	intel_encoders_audio_disable(state, crtc);
 	intel_encoders_disable(state, crtc);
 
 	intel_crtc_vblank_off(old_crtc_state);
@@ -1763,7 +1784,6 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
 	 * Need care with mst->ddi interactions.
 	 */
 	if (!intel_crtc_is_bigjoiner_slave(old_crtc_state)) {
-		intel_encoders_audio_disable(state, crtc);
 		intel_encoders_disable(state, crtc);
 		intel_encoders_post_disable(state, crtc);
 	}
@@ -2033,7 +2053,6 @@ static void valleyview_crtc_enable(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
-	intel_encoders_audio_enable(state, crtc);
 }
 
 static void i9xx_crtc_enable(struct intel_atomic_state *state,
@@ -2075,7 +2094,6 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state,
 	intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
-	intel_encoders_audio_enable(state, crtc);
 
 	/* prevents spurious underruns */
 	if (DISPLAY_VER(dev_priv) == 2)
@@ -2112,7 +2130,6 @@ static void i9xx_crtc_disable(struct intel_atomic_state *state,
 	if (DISPLAY_VER(dev_priv) == 2)
 		intel_crtc_wait_for_next_vblank(crtc);
 
-	intel_encoders_audio_disable(state, crtc);
 	intel_encoders_disable(state, crtc);
 
 	intel_crtc_vblank_off(old_crtc_state);
-- 
GitLab


From 07e823c0fd991565106eff6f03892c5d645cd690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 21 Nov 2023 07:43:24 +0200
Subject: [PATCH 0427/2340] drm/i915: Implement audio fastset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's no real reason why we'd need a full modeset for audio
changes. So let's allow audio to be toggled during fastset.
In case the ELD changes while has_audio isn't changing state
we force both audio disable and enable so the new ELD gets
propagated to the audio driver.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-12-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 32 ++++++--------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index e923ff634d109..cf338ac71229c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -995,7 +995,9 @@ static bool audio_enabling(const struct intel_crtc_state *old_crtc_state,
 	if (!new_crtc_state->hw.active)
 		return false;
 
-	return is_enabling(has_audio, old_crtc_state, new_crtc_state);
+	return is_enabling(has_audio, old_crtc_state, new_crtc_state) ||
+		(new_crtc_state->has_audio &&
+		 memcmp(old_crtc_state->eld, new_crtc_state->eld, MAX_ELD_BYTES) != 0);
 }
 
 static bool audio_disabling(const struct intel_crtc_state *old_crtc_state,
@@ -1004,7 +1006,9 @@ static bool audio_disabling(const struct intel_crtc_state *old_crtc_state,
 	if (!old_crtc_state->hw.active)
 		return false;
 
-	return is_disabling(has_audio, old_crtc_state, new_crtc_state);
+	return is_disabling(has_audio, old_crtc_state, new_crtc_state) ||
+		(old_crtc_state->has_audio &&
+		 memcmp(old_crtc_state->eld, new_crtc_state->eld, MAX_ELD_BYTES) != 0);
 }
 
 #undef is_disabling
@@ -4956,23 +4960,6 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	} \
 } while (0)
 
-/*
- * Checks state where we only read out the enabling, but not the entire
- * state itself (like full infoframes or ELD for audio). These states
- * require a full modeset on bootup to fix up.
- */
-#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \
-	if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \
-		PIPE_CONF_CHECK_BOOL(name); \
-	} else { \
-		pipe_config_mismatch(fastset, crtc, __stringify(name), \
-				     "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \
-				     str_yes_no(current_config->name), \
-				     str_yes_no(pipe_config->name)); \
-		ret = false; \
-	} \
-} while (0)
-
 #define PIPE_CONF_CHECK_P(name) do { \
 	if (current_config->name != pipe_config->name) { \
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
@@ -5160,8 +5147,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_BOOL(enhanced_framing);
 	PIPE_CONF_CHECK_BOOL(fec_enable);
 
-	PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio);
-	PIPE_CONF_CHECK_BUFFER(eld, MAX_ELD_BYTES);
+	if (!fastset) {
+		PIPE_CONF_CHECK_BOOL(has_audio);
+		PIPE_CONF_CHECK_BUFFER(eld, MAX_ELD_BYTES);
+	}
 
 	PIPE_CONF_CHECK_X(gmch_pfit.control);
 	/* pfit ratios are autocomputed by the hw on gen4+ */
@@ -5331,7 +5320,6 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 #undef PIPE_CONF_CHECK_X
 #undef PIPE_CONF_CHECK_I
 #undef PIPE_CONF_CHECK_BOOL
-#undef PIPE_CONF_CHECK_BOOL_INCOMPLETE
 #undef PIPE_CONF_CHECK_P
 #undef PIPE_CONF_CHECK_FLAGS
 #undef PIPE_CONF_CHECK_COLOR_LUT
-- 
GitLab


From 1aba67132cbc46856dfa8f904cd7021a75b1806d Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Thu, 16 Nov 2023 17:02:23 +0200
Subject: [PATCH 0428/2340] drm/i915/display: Separate xe and i915 common dpt
 code into own file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Here created intel_dpt_common.c to hold intel_dpt_configure which is
needed for both xe and i915.

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116150225.204233-1-juhapekka.heikkila@gmail.com
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 drivers/gpu/drm/i915/display/intel_display.c  |  1 +
 drivers/gpu/drm/i915/display/intel_dpt.c      | 26 --------------
 drivers/gpu/drm/i915/display/intel_dpt.h      |  2 --
 .../gpu/drm/i915/display/intel_dpt_common.c   | 34 +++++++++++++++++++
 .../gpu/drm/i915/display/intel_dpt_common.h   | 13 +++++++
 6 files changed, 49 insertions(+), 28 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/display/intel_dpt_common.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_dpt_common.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 7e5d6a39d4503..65e984242089c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -275,6 +275,7 @@ i915-y += \
 	display/intel_dpll.o \
 	display/intel_dpll_mgr.o \
 	display/intel_dpt.o \
+	display/intel_dpt_common.o \
 	display/intel_drrs.o \
 	display/intel_dsb.o \
 	display/intel_dsb_buffer.o \
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index cf338ac71229c..5cf162628b95e 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -76,6 +76,7 @@
 #include "intel_dpll.h"
 #include "intel_dpll_mgr.h"
 #include "intel_dpt.h"
+#include "intel_dpt_common.h"
 #include "intel_drrs.h"
 #include "intel_dsb.h"
 #include "intel_dsi.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 2b067cb952f00..b29bceff73f23 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -9,8 +9,6 @@
 #include "gt/gen8_ppgtt.h"
 
 #include "i915_drv.h"
-#include "i915_reg.h"
-#include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dpt.h"
 #include "intel_fb.h"
@@ -318,27 +316,3 @@ void intel_dpt_destroy(struct i915_address_space *vm)
 	i915_vm_put(&dpt->vm);
 }
 
-void intel_dpt_configure(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
-
-	if (DISPLAY_VER(i915) == 14) {
-		enum pipe pipe = crtc->pipe;
-		enum plane_id plane_id;
-
-		for_each_plane_id_on_crtc(crtc, plane_id) {
-			if (plane_id == PLANE_CURSOR)
-				continue;
-
-			intel_de_rmw(i915, PLANE_CHICKEN(pipe, plane_id),
-				     PLANE_CHICKEN_DISABLE_DPT,
-				     i915->display.params.enable_dpt ? 0 :
-				     PLANE_CHICKEN_DISABLE_DPT);
-		}
-	} else if (DISPLAY_VER(i915) == 13) {
-		intel_de_rmw(i915, CHICKEN_MISC_2,
-			     CHICKEN_MISC_DISABLE_DPT,
-			     i915->display.params.enable_dpt ? 0 :
-			     CHICKEN_MISC_DISABLE_DPT);
-	}
-}
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h
index d9a1665501851..e18a9f767b112 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.h
+++ b/drivers/gpu/drm/i915/display/intel_dpt.h
@@ -10,7 +10,6 @@ struct drm_i915_private;
 
 struct i915_address_space;
 struct i915_vma;
-struct intel_crtc;
 struct intel_framebuffer;
 
 void intel_dpt_destroy(struct i915_address_space *vm);
@@ -20,6 +19,5 @@ void intel_dpt_suspend(struct drm_i915_private *i915);
 void intel_dpt_resume(struct drm_i915_private *i915);
 struct i915_address_space *
 intel_dpt_create(struct intel_framebuffer *fb);
-void intel_dpt_configure(struct intel_crtc *crtc);
 
 #endif /* __INTEL_DPT_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_dpt_common.c b/drivers/gpu/drm/i915/display/intel_dpt_common.c
new file mode 100644
index 0000000000000..cdba47165c04c
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dpt_common.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_reg.h"
+#include "intel_de.h"
+#include "intel_display_types.h"
+#include "intel_dpt_common.h"
+
+void intel_dpt_configure(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	if (DISPLAY_VER(i915) == 14) {
+		enum pipe pipe = crtc->pipe;
+		enum plane_id plane_id;
+
+		for_each_plane_id_on_crtc(crtc, plane_id) {
+			if (plane_id == PLANE_CURSOR)
+				continue;
+
+			intel_de_rmw(i915, PLANE_CHICKEN(pipe, plane_id),
+				     PLANE_CHICKEN_DISABLE_DPT,
+				     i915->display.params.enable_dpt ? 0 :
+				     PLANE_CHICKEN_DISABLE_DPT);
+		}
+	} else if (DISPLAY_VER(i915) == 13) {
+		intel_de_rmw(i915, CHICKEN_MISC_2,
+			     CHICKEN_MISC_DISABLE_DPT,
+			     i915->display.params.enable_dpt ? 0 :
+			     CHICKEN_MISC_DISABLE_DPT);
+	}
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dpt_common.h b/drivers/gpu/drm/i915/display/intel_dpt_common.h
new file mode 100644
index 0000000000000..6d7de405126a5
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dpt_common.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_DPT_COMMON_H__
+#define __INTEL_DPT_COMMON_H__
+
+struct intel_crtc;
+
+void intel_dpt_configure(struct intel_crtc *crtc);
+
+#endif /* __INTEL_DPT_COMMON_H__ */
-- 
GitLab


From 185b24883e278ba298c073164d1e1abacc986d9f Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Thu, 16 Nov 2023 17:02:24 +0200
Subject: [PATCH 0429/2340] drm/i915/display: in skl_surf_address check for
 dpt-vma
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

touch dpt_vma->node only if dpt-vma is not NULL

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116150225.204233-2-juhapekka.heikkila@gmail.com
---
 drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 39499a0ec6c01..f5c77a018e104 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -1006,7 +1006,8 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state,
 		 * The DPT object contains only one vma, so the VMA's offset
 		 * within the DPT is always 0.
 		 */
-		drm_WARN_ON(&i915->drm, plane_state->dpt_vma->node.start);
+		drm_WARN_ON(&i915->drm, plane_state->dpt_vma &&
+			    plane_state->dpt_vma->node.start);
 		drm_WARN_ON(&i915->drm, offset & 0x1fffff);
 		return offset >> 9;
 	} else {
-- 
GitLab


From 12b7142e679f8184b42de6750e44a4fc67ebc4e4 Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Thu, 16 Nov 2023 17:02:25 +0200
Subject: [PATCH 0430/2340] drm/i915/display: In intel_framebuffer_init switch
 to use intel_bo_to_drm_bo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are preparing for Xe driver. I915 and Xe object implementation are
differing. Use intel_bo_to_drm_bo instead of &obj->base.

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116150225.204233-3-juhapekka.heikkila@gmail.com
---
 drivers/gpu/drm/i915/display/intel_fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index e1db7a3afd1ac..77226ec00cb1c 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -2093,7 +2093,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			}
 		}
 
-		fb->obj[i] = &obj->base;
+		fb->obj[i] = intel_bo_to_drm_bo(obj);
 	}
 
 	ret = intel_fill_fb_info(dev_priv, intel_fb);
-- 
GitLab


From c6fbb6bca10838485b820e8a26c23996f77ce580 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 13 Oct 2023 16:13:59 +0300
Subject: [PATCH 0431/2340] drm: Fix color LUT rounding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current implementation of drm_color_lut_extract()
generates weird results. Eg. if we go through all the
values for 16->8bpc conversion we see the following pattern:

in            out (count)
   0 -   7f ->  0 (128)
  80 -  17f ->  1 (256)
 180 -  27f ->  2 (256)
 280 -  37f ->  3 (256)
...
fb80 - fc7f -> fc (256)
fc80 - fd7f -> fd (256)
fd80 - fe7f -> fe (256)
fe80 - ffff -> ff (384)

So less values map to 0 and more values map 0xff, which
doesn't seem particularly great.

To get just the same number of input values to map to
the same output values we'd just need to drop the rounding
entrirely. But perhaps a better idea would be to follow the
OpenGL int<->float conversion rules, in which case we get
the following results:

in            out (count)
   0 -   80 ->  0 (129)
  81 -  181 ->  1 (257)
 182 -  282 ->  2 (257)
 283 -  383 ->  3 (257)
...
fc7c - fd7c -> fc (257)
fd7d - fe7d -> fd (257)
fe7e - ff7e -> fe (257)
ff7f - ffff -> ff (129)

Note that since the divisor is constant the compiler
is able to optimize away the integer division in most
cases. The only exception is the _ULL() case on 32bit
architectures since that gets emitted as inline asm
via do_div() and thus the compiler doesn't get to
optimize it.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013131402.24072-2-ville.syrjala@linux.intel.com
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_color_mgmt.h | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h
index 81c298488b0c8..54b2b2467bfd7 100644
--- a/include/drm/drm_color_mgmt.h
+++ b/include/drm/drm_color_mgmt.h
@@ -36,20 +36,17 @@ struct drm_plane;
  *
  * Extract a degamma/gamma LUT value provided by user (in the form of
  * &drm_color_lut entries) and round it to the precision supported by the
- * hardware.
+ * hardware, following OpenGL int<->float conversion rules
+ * (see eg. OpenGL 4.6 specification - 2.3.5 Fixed-Point Data Conversions).
  */
 static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision)
 {
-	u32 val = user_input;
-	u32 max = 0xffff >> (16 - bit_precision);
-
-	/* Round only if we're not using full precision. */
-	if (bit_precision < 16) {
-		val += 1UL << (16 - bit_precision - 1);
-		val >>= 16 - bit_precision;
-	}
-
-	return clamp_val(val, 0, max);
+	if (bit_precision > 16)
+		return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(user_input, (1 << bit_precision) - 1),
+					     (1 << 16) - 1);
+	else
+		return DIV_ROUND_CLOSEST(user_input * ((1 << bit_precision) - 1),
+					 (1 << 16) - 1);
 }
 
 u64 drm_color_ctm_s31_32_to_qm_n(u64 user_input, u32 m, u32 n);
-- 
GitLab


From edc2b74a535a87110a70757ff535aaa47c34e66d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 13 Oct 2023 16:14:00 +0300
Subject: [PATCH 0432/2340] drm/i915: Adjust LUT rounding rules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_color_lut_extract() rounding was changed to follow the
OpenGL int<->float conversion rules. Adjust intel_color_lut_pack()
to match.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013131402.24072-3-ville.syrjala@linux.intel.com
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_color.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 1d26be54ddfc2..1e734e9329170 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -785,14 +785,12 @@ static void chv_assign_csc(struct intel_crtc_state *crtc_state)
 /* convert hw value with given bit_precision to lut property val */
 static u32 intel_color_lut_pack(u32 val, int bit_precision)
 {
-	u32 max = 0xffff >> (16 - bit_precision);
-
-	val = clamp_val(val, 0, max);
-
-	if (bit_precision < 16)
-		val <<= 16 - bit_precision;
-
-	return val;
+	if (bit_precision > 16)
+		return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(val, (1 << 16) - 1),
+					     (1 << bit_precision) - 1);
+	else
+		return DIV_ROUND_CLOSEST(val * ((1 << 16) - 1),
+					 (1 << bit_precision) - 1);
 }
 
 static u32 i9xx_lut_8(const struct drm_color_lut *color)
-- 
GitLab


From 5d76c8163f09cfee7dbc1870a1154c2ca443528b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 13 Oct 2023 16:14:01 +0300
Subject: [PATCH 0433/2340] drm/i915: s/clamp()/min()/ in
 i965_lut_11p6_max_pack()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use min() instead of clamp() since the color values
involved are unsigned. No functional changes.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013131402.24072-4-ville.syrjala@linux.intel.com
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_color.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 1e734e9329170..91f92844e2134 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -909,7 +909,7 @@ static void i965_lut_10p6_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
 static u16 i965_lut_11p6_max_pack(u32 val)
 {
 	/* PIPEGCMAX is 11.6, clamp to 10.6 */
-	return clamp_val(val, 0, 0xffff);
+	return min(val, 0xffffu);
 }
 
 static u32 ilk_lut_10(const struct drm_color_lut *color)
-- 
GitLab


From deac453244d309ad7a94d0501eb5e0f9d8d1f1df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 13 Oct 2023 16:14:02 +0300
Subject: [PATCH 0434/2340] drm/i915: Fix glk+ degamma LUT conversions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current implementation of change_lut_val_precision() is just
a convoluted way of shifting by 8. Implement the proper rounding
by just using drm_color_lut_extract() and intel_color_lut_pack()
like everyone else does.

And as the uapi can't handle >=1.0 values but the hardware
can we need to clamp the results appropriately in the readout
path.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013131402.24072-5-ville.syrjala@linux.intel.com
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_color.c | 54 +++++++++++-----------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 91f92844e2134..c5092b7e87d52 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -1526,14 +1526,27 @@ static int glk_degamma_lut_size(struct drm_i915_private *i915)
 		return 35;
 }
 
-/*
- * change_lut_val_precision: helper function to upscale or downscale lut values.
- * Parameters 'to' and 'from' needs to be less than 32. This should be sufficient
- * as currently there are no lut values exceeding 32 bit.
- */
-static u32 change_lut_val_precision(u32 lut_val, int to, int from)
+static u32 glk_degamma_lut(const struct drm_color_lut *color)
+{
+	return color->green;
+}
+
+static void glk_degamma_lut_pack(struct drm_color_lut *entry, u32 val)
+{
+	/* PRE_CSC_GAMC_DATA is 3.16, clamp to 0.16 */
+	entry->red = entry->green = entry->blue = min(val, 0xffffu);
+}
+
+static u32 mtl_degamma_lut(const struct drm_color_lut *color)
+{
+	return drm_color_lut_extract(color->green, 24);
+}
+
+static void mtl_degamma_lut_pack(struct drm_color_lut *entry, u32 val)
 {
-	return mul_u32_u32(lut_val, (1 << to)) / (1 << from);
+	/* PRE_CSC_GAMC_DATA is 3.24, clamp to 0.16 */
+	entry->red = entry->green = entry->blue =
+		intel_color_lut_pack(min(val, 0xffffffu), 24);
 }
 
 static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
@@ -1570,20 +1583,16 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
 		 * ToDo: Extend to max 7.0. Enable 32 bit input value
 		 * as compared to just 16 to achieve this.
 		 */
-		u32 lut_val;
-
-		if (DISPLAY_VER(i915) >= 14)
-			lut_val = change_lut_val_precision(lut[i].green, 24, 16);
-		else
-			lut_val = lut[i].green;
-
 		ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
-			      lut_val);
+			      DISPLAY_VER(i915) >= 14 ?
+			      mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
 	}
 
 	/* Clamp values > 1.0. */
 	while (i++ < glk_degamma_lut_size(i915))
-		ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), 1 << 16);
+		ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
+			      DISPLAY_VER(i915) >= 14 ?
+			      1 << 24 : 1 << 16);
 
 	ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0);
 }
@@ -3570,17 +3579,10 @@ static struct drm_property_blob *glk_read_degamma_lut(struct intel_crtc *crtc)
 	for (i = 0; i < lut_size; i++) {
 		u32 val = intel_de_read_fw(dev_priv, PRE_CSC_GAMC_DATA(pipe));
 
-		/*
-		 * For MTL and beyond, convert back the 24 bit lut values
-		 * read from HW to 16 bit values to maintain parity with
-		 * userspace values
-		 */
 		if (DISPLAY_VER(dev_priv) >= 14)
-			val = change_lut_val_precision(val, 16, 24);
-
-		lut[i].red = val;
-		lut[i].green = val;
-		lut[i].blue = val;
+			mtl_degamma_lut_pack(&lut[i], val);
+		else
+			glk_degamma_lut_pack(&lut[i], val);
 	}
 
 	intel_de_write_fw(dev_priv, PRE_CSC_GAMC_INDEX(pipe),
-- 
GitLab


From 0e26cc72c71cb98e951716a6596060cd04b0ba6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Wed, 22 Nov 2023 13:19:38 -0300
Subject: [PATCH 0435/2340] drm: Refuse to async flip with atomic prop changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Given that prop changes may lead to modesetting, which would defeat the
fast path of the async flip, refuse any atomic prop change for async
flips in atomic API. The only exception is the framebuffer ID to flip
to. Currently the only plane type supported is the primary one.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Reviewed-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Simon Ser <contact@emersion.fr>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122161941.320564-2-andrealmeid@igalia.com
---
 drivers/gpu/drm/drm_atomic_uapi.c   | 52 +++++++++++++++++++++++++++--
 drivers/gpu/drm/drm_crtc_internal.h |  2 +-
 drivers/gpu/drm/drm_mode_object.c   |  2 +-
 3 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 98d3b10c08ae1..52494642b095c 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1006,13 +1006,28 @@ out:
 	return ret;
 }
 
+static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t prop_value,
+					 struct drm_property *prop)
+{
+	if (ret != 0 || old_val != prop_value) {
+		drm_dbg_atomic(prop->dev,
+			       "[PROP:%d:%s] No prop can be changed during async flip\n",
+			       prop->base.id, prop->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int drm_atomic_set_property(struct drm_atomic_state *state,
 			    struct drm_file *file_priv,
 			    struct drm_mode_object *obj,
 			    struct drm_property *prop,
-			    uint64_t prop_value)
+			    u64 prop_value,
+			    bool async_flip)
 {
 	struct drm_mode_object *ref;
+	u64 old_val;
 	int ret;
 
 	if (!drm_property_change_valid_get(prop, prop_value, &ref))
@@ -1029,6 +1044,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
 			break;
 		}
 
+		if (async_flip) {
+			ret = drm_atomic_connector_get_property(connector, connector_state,
+								prop, &old_val);
+			ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
+			break;
+		}
+
 		ret = drm_atomic_connector_set_property(connector,
 				connector_state, file_priv,
 				prop, prop_value);
@@ -1044,6 +1066,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
 			break;
 		}
 
+		if (async_flip) {
+			ret = drm_atomic_crtc_get_property(crtc, crtc_state,
+							   prop, &old_val);
+			ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
+			break;
+		}
+
 		ret = drm_atomic_crtc_set_property(crtc,
 				crtc_state, prop, prop_value);
 		break;
@@ -1051,6 +1080,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
 	case DRM_MODE_OBJECT_PLANE: {
 		struct drm_plane *plane = obj_to_plane(obj);
 		struct drm_plane_state *plane_state;
+		struct drm_mode_config *config = &plane->dev->mode_config;
 
 		plane_state = drm_atomic_get_plane_state(state, plane);
 		if (IS_ERR(plane_state)) {
@@ -1058,6 +1088,21 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
 			break;
 		}
 
+		if (async_flip && prop != config->prop_fb_id) {
+			ret = drm_atomic_plane_get_property(plane, plane_state,
+							    prop, &old_val);
+			ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
+			break;
+		}
+
+		if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) {
+			drm_dbg_atomic(prop->dev,
+				       "[OBJECT:%d] Only primary planes can be changed during async flip\n",
+				       obj->id);
+			ret = -EINVAL;
+			break;
+		}
+
 		ret = drm_atomic_plane_set_property(plane,
 				plane_state, file_priv,
 				prop, prop_value);
@@ -1337,6 +1382,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
 	struct drm_out_fence_state *fence_state;
 	int ret = 0;
 	unsigned int i, j, num_fences;
+	bool async_flip = false;
 
 	/* disallow for drivers not supporting atomic: */
 	if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
@@ -1450,8 +1496,8 @@ retry:
 				goto out;
 			}
 
-			ret = drm_atomic_set_property(state, file_priv,
-						      obj, prop, prop_value);
+			ret = drm_atomic_set_property(state, file_priv, obj,
+						      prop, prop_value, async_flip);
 			if (ret) {
 				drm_mode_object_put(obj);
 				goto out;
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 6b646e0783be9..a514d5207e419 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -253,7 +253,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
 			    struct drm_file *file_priv,
 			    struct drm_mode_object *obj,
 			    struct drm_property *prop,
-			    uint64_t prop_value);
+			    u64 prop_value, bool async_flip);
 int drm_atomic_get_property(struct drm_mode_object *obj,
 			    struct drm_property *property, uint64_t *val);
 
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index ac0d2ce3f8704..0e8355063eee3 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -538,7 +538,7 @@ retry:
 						       obj_to_connector(obj),
 						       prop_value);
 	} else {
-		ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value);
+		ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, false);
 		if (ret)
 			goto out;
 		ret = drm_atomic_commit(state);
-- 
GitLab


From 4b4af74ab9719d17538a97f43137e93296ec7437 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 22 Nov 2023 13:19:39 -0300
Subject: [PATCH 0436/2340] drm: allow DRM_MODE_PAGE_FLIP_ASYNC for atomic
 commits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the driver supports it, allow user-space to supply the
DRM_MODE_PAGE_FLIP_ASYNC flag to request an async page-flip.
Set drm_crtc_state.async_flip accordingly.

Document that drivers will reject atomic commits if an async
flip isn't possible. This allows user-space to fall back to
something else. For instance, Xorg falls back to a blit.
Another option is to wait as close to the next vblank as
possible before performing the page-flip to reduce latency.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Co-developed-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122161941.320564-3-andrealmeid@igalia.com
---
 drivers/gpu/drm/drm_atomic_uapi.c | 25 ++++++++++++++++++++++---
 include/uapi/drm/drm_mode.h       |  9 +++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 52494642b095c..1164070022696 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1368,6 +1368,18 @@ static void complete_signaling(struct drm_device *dev,
 	kfree(fence_state);
 }
 
+static void
+set_async_flip(struct drm_atomic_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	int i;
+
+	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+		crtc_state->async_flip = true;
+	}
+}
+
 int drm_mode_atomic_ioctl(struct drm_device *dev,
 			  void *data, struct drm_file *file_priv)
 {
@@ -1409,9 +1421,13 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
 	}
 
 	if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) {
-		drm_dbg_atomic(dev,
-			       "commit failed: invalid flag DRM_MODE_PAGE_FLIP_ASYNC\n");
-		return -EINVAL;
+		if (!dev->mode_config.async_page_flip) {
+			drm_dbg_atomic(dev,
+				       "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported\n");
+			return -EINVAL;
+		}
+
+		async_flip = true;
 	}
 
 	/* can't test and expect an event at the same time. */
@@ -1514,6 +1530,9 @@ retry:
 	if (ret)
 		goto out;
 
+	if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		set_async_flip(state);
+
 	if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) {
 		ret = drm_atomic_check_only(state);
 	} else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) {
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 09e7a471ee30b..95630f1701102 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -957,6 +957,15 @@ struct hdr_output_metadata {
  * Request that the page-flip is performed as soon as possible, ie. with no
  * delay due to waiting for vblank. This may cause tearing to be visible on
  * the screen.
+ *
+ * When used with atomic uAPI, the driver will return an error if the hardware
+ * doesn't support performing an asynchronous page-flip for this update.
+ * User-space should handle this, e.g. by falling back to a regular page-flip.
+ *
+ * Note, some hardware might need to perform one last synchronous page-flip
+ * before being able to switch to asynchronous page-flips. As an exception,
+ * the driver will return success even though that first page-flip is not
+ * asynchronous.
  */
 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
 #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
-- 
GitLab


From e4d983acffff270ccee417445a69b9ed198658b1 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 22 Nov 2023 13:19:40 -0300
Subject: [PATCH 0437/2340] drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This new kernel capability indicates whether async page-flips are
supported via the atomic uAPI. DRM clients can use it to check
for support before feeding DRM_MODE_PAGE_FLIP_ASYNC to the kernel.

Make it clear that DRM_CAP_ASYNC_PAGE_FLIP is for legacy uAPI only.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: André Almeida <andrealmeid@igalia.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: Simon Ser <contact@emersion.fr>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122161941.320564-4-andrealmeid@igalia.com
---
 drivers/gpu/drm/drm_ioctl.c |  4 ++++
 include/uapi/drm/drm.h      | 10 +++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 44fda68c28aeb..f461ed862480e 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -301,6 +301,10 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_
 	case DRM_CAP_CRTC_IN_VBLANK_EVENT:
 		req->value = 1;
 		break;
+	case DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP:
+		req->value = drm_core_check_feature(dev, DRIVER_ATOMIC) &&
+			     dev->mode_config.async_page_flip;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 8662b5aeea0c8..796de831f4a04 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -713,7 +713,8 @@ struct drm_gem_open {
 /**
  * DRM_CAP_ASYNC_PAGE_FLIP
  *
- * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
+ * page-flips.
  */
 #define DRM_CAP_ASYNC_PAGE_FLIP		0x7
 /**
@@ -773,6 +774,13 @@ struct drm_gem_open {
  * :ref:`drm_sync_objects`.
  */
 #define DRM_CAP_SYNCOBJ_TIMELINE	0x14
+/**
+ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
+ * commits.
+ */
+#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP	0x15
 
 /* DRM_IOCTL_GET_CAP ioctl argument type */
 struct drm_get_cap {
-- 
GitLab


From 58046e6cf811464b8a6f269dc6a40a8cb91a8a68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 22 Nov 2023 11:31:36 +0200
Subject: [PATCH 0438/2340] drm/i915: Stop printing pipe name as hex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Print the pipe name in ascii rather than hex.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122093137.1509-3-ville.syrjala@linux.intel.com
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 7958d0bd851e1..ef57dad1a9cb7 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -4537,7 +4537,7 @@ void intel_shared_dpll_state_verify(struct intel_atomic_state *state,
 				"pll active mismatch (didn't expect pipe %c in active mask (0x%x))\n",
 				pipe_name(crtc->pipe), pll->active_mask);
 		I915_STATE_WARN(i915, pll->state.pipe_mask & pipe_mask,
-				"pll enabled crtcs mismatch (found %x in enabled mask (0x%x))\n",
+				"pll enabled crtcs mismatch (found pipe %c in enabled mask (0x%x))\n",
 				pipe_name(crtc->pipe), pll->state.pipe_mask);
 	}
 }
-- 
GitLab


From b90fccfb5cde406365c33aa21ee87da83bbfca02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 22 Nov 2023 11:31:37 +0200
Subject: [PATCH 0439/2340] drm/i915: Move the SDP split debug spew to the
 correct place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding ad-hoc debug prints all over the place is not good.
Move the SDP split debug spew into the proper place (state
dumper).

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122093137.1509-4-ville.syrjala@linux.intel.com
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
---
 drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 3 +++
 drivers/gpu/drm/i915/display/intel_dp.c              | 7 -------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
index 66fe880af8f3f..30336453041d1 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
@@ -261,6 +261,9 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config,
 		drm_dbg_kms(&i915->drm, "fec: %s, enhanced framing: %s\n",
 			    str_enabled_disabled(pipe_config->fec_enable),
 			    str_enabled_disabled(pipe_config->enhanced_framing));
+
+		drm_dbg_kms(&i915->drm, "sdp split: %s\n",
+			    str_enabled_disabled(pipe_config->sdp_split_enable));
 	}
 
 	drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n",
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 6d5a794b6b717..f9b5fc2fdbcb9 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2844,19 +2844,12 @@ intel_dp_audio_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	struct drm_connector *connector = conn_state->connector;
-
 	pipe_config->has_audio =
 		intel_dp_has_audio(encoder, pipe_config, conn_state) &&
 		intel_audio_compute_config(encoder, pipe_config, conn_state);
 
 	pipe_config->sdp_split_enable = pipe_config->has_audio &&
 					intel_dp_is_uhbr(pipe_config);
-
-	drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDP split enable: %s\n",
-		    connector->base.id, connector->name,
-		    str_yes_no(pipe_config->sdp_split_enable));
 }
 
 int
-- 
GitLab


From 4e3b70da64a53784683cfcbac2deda5d6e540407 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:05 +0200
Subject: [PATCH 0440/2340] drm: Disable the cursor plane on atomic contexts
 with virtualized drivers

Cursor planes on virtualized drivers have special meaning and require
that the clients handle them in specific ways, e.g. the cursor plane
should react to the mouse movement the way a mouse cursor would be
expected to and the client is required to set hotspot properties on it
in order for the mouse events to be routed correctly.

This breaks the contract as specified by the "universal planes". Fix it
by disabling the cursor planes on virtualized drivers while adding
a foundation on top of which it's possible to special case mouse cursor
planes for clients that want it.

Disabling the cursor planes makes some kms compositors which were broken,
e.g. Weston, fallback to software cursor which works fine or at least
better than currently while having no effect on others, e.g. gnome-shell
or kwin, which put virtualized drivers on a deny-list when running in
atomic context to make them fallback to legacy kms and avoid this issue.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Fixes: 681e7ec73044 ("drm: Allow userspace to ask for universal plane list (v2)")
Cc: <stable@vger.kernel.org> # v5.4+
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Gurchetan Singh <gurchetansingh@chromium.org>
Cc: Chia-I Wu <olvaffe@gmail.com>
Cc: dri-devel@lists.freedesktop.org
Cc: virtualization@lists.linux-foundation.org
Cc: spice-devel@lists.freedesktop.org
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-2-aesteve@redhat.com
---
 drivers/gpu/drm/drm_plane.c          | 13 +++++++++++++
 drivers/gpu/drm/qxl/qxl_drv.c        |  2 +-
 drivers/gpu/drm/vboxvideo/vbox_drv.c |  2 +-
 drivers/gpu/drm/virtio/virtgpu_drv.c |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c  |  2 +-
 include/drm/drm_drv.h                |  9 +++++++++
 include/drm/drm_file.h               | 12 ++++++++++++
 7 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 24e7998d17313..c6bbb0c209f47 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -678,6 +678,19 @@ int drm_mode_getplane_res(struct drm_device *dev, void *data,
 		    !file_priv->universal_planes)
 			continue;
 
+		/*
+		 * If we're running on a virtualized driver then,
+		 * unless userspace advertizes support for the
+		 * virtualized cursor plane, disable cursor planes
+		 * because they'll be broken due to missing cursor
+		 * hotspot info.
+		 */
+		if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		    drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) &&
+		    file_priv->atomic &&
+		    !file_priv->supports_virtualized_cursor_plane)
+			continue;
+
 		if (drm_lease_held(file_priv, plane->base.id)) {
 			if (count < plane_resp->count_planes &&
 			    put_user(plane->base.id, plane_ptr + count))
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 46de4f171970b..beee5563031aa 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -285,7 +285,7 @@ static const struct drm_ioctl_desc qxl_ioctls[] = {
 };
 
 static struct drm_driver qxl_driver = {
-	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
+	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT,
 
 	.dumb_create = qxl_mode_dumb_create,
 	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c
index 047b958123341..cd9e66a06596a 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c
@@ -182,7 +182,7 @@ DEFINE_DRM_GEM_FOPS(vbox_fops);
 
 static const struct drm_driver driver = {
 	.driver_features =
-	    DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
+	    DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT,
 
 	.fops = &vbox_fops,
 	.name = DRIVER_NAME,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 4334c76084084..f8e9abe647b92 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -177,7 +177,7 @@ static const struct drm_driver driver = {
 	 * out via drm_device::driver_features:
 	 */
 	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC |
-			   DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
+			   DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE | DRIVER_CURSOR_HOTSPOT,
 	.open = virtio_gpu_driver_open,
 	.postclose = virtio_gpu_driver_postclose,
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 8b24ecf60e3ec..d3e308fdfd5be 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1611,7 +1611,7 @@ static const struct file_operations vmwgfx_driver_fops = {
 
 static const struct drm_driver driver = {
 	.driver_features =
-	DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM,
+	DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM | DRIVER_CURSOR_HOTSPOT,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = ARRAY_SIZE(vmw_ioctls),
 	.master_set = vmw_master_set,
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index e2640dc64e081..ea36aa79dca25 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -110,6 +110,15 @@ enum drm_driver_feature {
 	 * Driver supports user defined GPU VA bindings for GEM objects.
 	 */
 	DRIVER_GEM_GPUVA		= BIT(8),
+	/**
+	 * @DRIVER_CURSOR_HOTSPOT:
+	 *
+	 * Driver supports and requires cursor hotspot information in the
+	 * cursor plane (e.g. cursor plane has to actually track the mouse
+	 * cursor and the clients are required to set hotspot in order for
+	 * the cursor planes to work correctly).
+	 */
+	DRIVER_CURSOR_HOTSPOT           = BIT(9),
 
 	/* IMPORTANT: Below are all the legacy flags, add new ones above. */
 
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index e1b5b4282f75d..8f35dcea82d3b 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -226,6 +226,18 @@ struct drm_file {
 	 */
 	bool is_master;
 
+	/**
+	 * @supports_virtualized_cursor_plane:
+	 *
+	 * This client is capable of handling the cursor plane with the
+	 * restrictions imposed on it by the virtualized drivers.
+	 *
+	 * This implies that the cursor plane has to behave like a cursor
+	 * i.e. track cursor movement. It also requires setting of the
+	 * hotspot properties by the client on the cursor plane.
+	 */
+	bool supports_virtualized_cursor_plane;
+
 	/**
 	 * @master:
 	 *
-- 
GitLab


From 8f7179a1027d89bf949b0b80c388a544a5e096f2 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:06 +0200
Subject: [PATCH 0441/2340] drm/atomic: Add support for mouse hotspots

Atomic modesetting code lacked support for specifying mouse cursor
hotspots. The legacy kms DRM_IOCTL_MODE_CURSOR2 had support for setting
the hotspot but the functionality was not implemented in the new atomic
paths.

Due to the lack of hotspots in the atomic paths userspace compositors
completely disable atomic modesetting for drivers that require it (i.e.
all paravirtualized drivers).

This change adds hotspot properties to the atomic codepaths throughtout
the DRM core and will allow enabling atomic modesetting for virtualized
drivers in the userspace.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-3-aesteve@redhat.com
---
 drivers/gpu/drm/drm_atomic_state_helper.c | 14 +++++++
 drivers/gpu/drm/drm_atomic_uapi.c         | 20 +++++++++
 drivers/gpu/drm/drm_plane.c               | 50 +++++++++++++++++++++++
 include/drm/drm_plane.h                   | 14 +++++++
 4 files changed, 98 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 784e63d70a421..54975de44a0e3 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -275,6 +275,20 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state,
 			plane_state->normalized_zpos = val;
 		}
 	}
+
+	if (plane->hotspot_x_property) {
+		if (!drm_object_property_get_default_value(&plane->base,
+							   plane->hotspot_x_property,
+							   &val))
+			plane_state->hotspot_x = val;
+	}
+
+	if (plane->hotspot_y_property) {
+		if (!drm_object_property_get_default_value(&plane->base,
+							   plane->hotspot_y_property,
+							   &val))
+			plane_state->hotspot_y = val;
+	}
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_state_reset);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 1164070022696..aee4a65d49591 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -593,6 +593,22 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 	} else if (plane->funcs->atomic_set_property) {
 		return plane->funcs->atomic_set_property(plane, state,
 				property, val);
+	} else if (property == plane->hotspot_x_property) {
+		if (plane->type != DRM_PLANE_TYPE_CURSOR) {
+			drm_dbg_atomic(plane->dev,
+				       "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n",
+				       plane->base.id, plane->name, val);
+			return -EINVAL;
+		}
+		state->hotspot_x = val;
+	} else if (property == plane->hotspot_y_property) {
+		if (plane->type != DRM_PLANE_TYPE_CURSOR) {
+			drm_dbg_atomic(plane->dev,
+				       "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n",
+				       plane->base.id, plane->name, val);
+			return -EINVAL;
+		}
+		state->hotspot_y = val;
 	} else {
 		drm_dbg_atomic(plane->dev,
 			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n",
@@ -653,6 +669,10 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = state->scaling_filter;
 	} else if (plane->funcs->atomic_get_property) {
 		return plane->funcs->atomic_get_property(plane, state, property, val);
+	} else if (property == plane->hotspot_x_property) {
+		*val = state->hotspot_x;
+	} else if (property == plane->hotspot_y_property) {
+		*val = state->hotspot_y;
 	} else {
 		drm_dbg_atomic(dev,
 			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n",
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index c6bbb0c209f47..eaca367bdc7e7 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -230,6 +230,47 @@ static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane
 	return 0;
 }
 
+/**
+ * drm_plane_create_hotspot_properties - creates the mouse hotspot
+ * properties and attaches them to the given cursor plane
+ *
+ * @plane: drm cursor plane
+ *
+ * This function enables the mouse hotspot property on a given
+ * cursor plane.
+ *
+ * RETURNS:
+ * Zero for success or -errno
+ */
+static int drm_plane_create_hotspot_properties(struct drm_plane *plane)
+{
+	struct drm_property *prop_x;
+	struct drm_property *prop_y;
+
+	drm_WARN_ON(plane->dev,
+		    !drm_core_check_feature(plane->dev,
+					    DRIVER_CURSOR_HOTSPOT));
+
+	prop_x = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_X",
+						  INT_MIN, INT_MAX);
+	if (IS_ERR(prop_x))
+		return PTR_ERR(prop_x);
+
+	prop_y = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_Y",
+						  INT_MIN, INT_MAX);
+	if (IS_ERR(prop_y)) {
+		drm_property_destroy(plane->dev, prop_x);
+		return PTR_ERR(prop_y);
+	}
+
+	drm_object_attach_property(&plane->base, prop_x, 0);
+	drm_object_attach_property(&plane->base, prop_y, 0);
+	plane->hotspot_x_property = prop_x;
+	plane->hotspot_y_property = prop_y;
+
+	return 0;
+}
+
 __printf(9, 0)
 static int __drm_universal_plane_init(struct drm_device *dev,
 				      struct drm_plane *plane,
@@ -348,6 +389,10 @@ static int __drm_universal_plane_init(struct drm_device *dev,
 		drm_object_attach_property(&plane->base, config->prop_src_w, 0);
 		drm_object_attach_property(&plane->base, config->prop_src_h, 0);
 	}
+	if (drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) &&
+	    type == DRM_PLANE_TYPE_CURSOR) {
+		drm_plane_create_hotspot_properties(plane);
+	}
 
 	if (format_modifier_count)
 		create_in_format_blob(dev, plane);
@@ -1067,6 +1112,11 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
 
 			fb->hot_x = req->hot_x;
 			fb->hot_y = req->hot_y;
+
+			if (plane->hotspot_x_property && plane->state)
+				plane->state->hotspot_x = req->hot_x;
+			if (plane->hotspot_y_property && plane->state)
+				plane->state->hotspot_y = req->hot_y;
 		} else {
 			fb = NULL;
 		}
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 79d62856defbf..e2c671585775b 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -116,6 +116,10 @@ struct drm_plane_state {
 	/** @src_h: height of visible portion of plane (in 16.16) */
 	uint32_t src_h, src_w;
 
+	/** @hotspot_x: x offset to mouse cursor hotspot */
+	/** @hotspot_y: y offset to mouse cursor hotspot */
+	int32_t hotspot_x, hotspot_y;
+
 	/**
 	 * @alpha:
 	 * Opacity of the plane with 0 as completely transparent and 0xffff as
@@ -748,6 +752,16 @@ struct drm_plane {
 	 * scaling.
 	 */
 	struct drm_property *scaling_filter_property;
+
+	/**
+	 * @hotspot_x_property: property to set mouse hotspot x offset.
+	 */
+	struct drm_property *hotspot_x_property;
+
+	/**
+	 * @hotspot_y_property: property to set mouse hotspot y offset.
+	 */
+	struct drm_property *hotspot_y_property;
 };
 
 #define obj_to_plane(x) container_of(x, struct drm_plane, base)
-- 
GitLab


From cd5499429237b7ba3f5bfd3efb488688886c82fe Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:07 +0200
Subject: [PATCH 0442/2340] drm/vmwgfx: Use the hotspot properties from cursor
 planes

Atomic modesetting got support for mouse hotspots via the hotspot
properties. Port the legacy kms hotspot handling to the new properties
on cursor planes.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: Maaz Mombasawala <mombasawalam@vmware.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-4-aesteve@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 818b7f109f538..bea0abc3d4188 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -768,13 +768,8 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 	struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state);
 	s32 hotspot_x, hotspot_y;
 
-	hotspot_x = du->hotspot_x;
-	hotspot_y = du->hotspot_y;
-
-	if (new_state->fb) {
-		hotspot_x += new_state->fb->hot_x;
-		hotspot_y += new_state->fb->hot_y;
-	}
+	hotspot_x = du->hotspot_x + new_state->hotspot_x;
+	hotspot_y = du->hotspot_y + new_state->hotspot_y;
 
 	du->cursor_surface = vps->surf;
 	du->cursor_bo = vps->bo;
-- 
GitLab


From 305b391d8f84a46119b5554a7a7af775266ce382 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:08 +0200
Subject: [PATCH 0443/2340] drm/qxl: Use the hotspot properties from cursor
 planes

Atomic modesetting got support for mouse hotspots via the hotspot
properties. Port the legacy kms hotspot handling to the new properties
on cursor planes.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: virtualization@lists.linux-foundation.org
Cc: spice-devel@lists.freedesktop.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-5-aesteve@redhat.com
---
 drivers/gpu/drm/qxl/qxl_display.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 404b0483bb7cb..c6d35c33d5d63 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -485,7 +485,6 @@ static int qxl_primary_atomic_check(struct drm_plane *plane,
 static int qxl_primary_apply_cursor(struct qxl_device *qdev,
 				    struct drm_plane_state *plane_state)
 {
-	struct drm_framebuffer *fb = plane_state->fb;
 	struct qxl_crtc *qcrtc = to_qxl_crtc(plane_state->crtc);
 	struct qxl_cursor_cmd *cmd;
 	struct qxl_release *release;
@@ -510,8 +509,8 @@ static int qxl_primary_apply_cursor(struct qxl_device *qdev,
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_SET;
-	cmd->u.set.position.x = plane_state->crtc_x + fb->hot_x;
-	cmd->u.set.position.y = plane_state->crtc_y + fb->hot_y;
+	cmd->u.set.position.x = plane_state->crtc_x + plane_state->hotspot_x;
+	cmd->u.set.position.y = plane_state->crtc_y + plane_state->hotspot_y;
 
 	cmd->u.set.shape = qxl_bo_physical_address(qdev, qcrtc->cursor_bo, 0);
 
@@ -531,7 +530,6 @@ out_free_release:
 static int qxl_primary_move_cursor(struct qxl_device *qdev,
 				   struct drm_plane_state *plane_state)
 {
-	struct drm_framebuffer *fb = plane_state->fb;
 	struct qxl_crtc *qcrtc = to_qxl_crtc(plane_state->crtc);
 	struct qxl_cursor_cmd *cmd;
 	struct qxl_release *release;
@@ -554,8 +552,8 @@ static int qxl_primary_move_cursor(struct qxl_device *qdev,
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_MOVE;
-	cmd->u.position.x = plane_state->crtc_x + fb->hot_x;
-	cmd->u.position.y = plane_state->crtc_y + fb->hot_y;
+	cmd->u.position.x = plane_state->crtc_x + plane_state->hotspot_x;
+	cmd->u.position.y = plane_state->crtc_y + plane_state->hotspot_y;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
 	qxl_release_fence_buffer_objects(release);
@@ -851,8 +849,8 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane,
 		struct qxl_bo *old_cursor_bo = qcrtc->cursor_bo;
 
 		qcrtc->cursor_bo = qxl_create_cursor(qdev, user_bo,
-						     new_state->fb->hot_x,
-						     new_state->fb->hot_y);
+						     new_state->hotspot_x,
+						     new_state->hotspot_y);
 		qxl_free_cursor(old_cursor_bo);
 	}
 
-- 
GitLab


From 44d877a1de912fa24d1af8f76433a914e6816057 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:09 +0200
Subject: [PATCH 0444/2340] drm/vboxvideo: Use the hotspot properties from
 cursor planes

Atomic modesetting got support for mouse hotspots via the hotspot
properties. Port the legacy kms hotspot handling to the new properties
on cursor planes.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-6-aesteve@redhat.com
---
 drivers/gpu/drm/vboxvideo/vbox_mode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vboxvideo/vbox_mode.c b/drivers/gpu/drm/vboxvideo/vbox_mode.c
index 341edd982cb3b..9ff3bade97957 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_mode.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_mode.c
@@ -429,8 +429,8 @@ static void vbox_cursor_atomic_update(struct drm_plane *plane,
 	flags = VBOX_MOUSE_POINTER_VISIBLE | VBOX_MOUSE_POINTER_SHAPE |
 		VBOX_MOUSE_POINTER_ALPHA;
 	hgsmi_update_pointer_shape(vbox->guest_pool, flags,
-				   min_t(u32, max(fb->hot_x, 0), width),
-				   min_t(u32, max(fb->hot_y, 0), height),
+				   min_t(u32, max(new_state->hotspot_x, 0), width),
+				   min_t(u32, max(new_state->hotspot_y, 0), height),
 				   width, height, vbox->cursor_data, data_size);
 
 	mutex_unlock(&vbox->hw_mutex);
-- 
GitLab


From cc6c535967ed07fd75f54a26a70091826daf691e Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:10 +0200
Subject: [PATCH 0445/2340] drm/virtio: Use the hotspot properties from cursor
 planes

Atomic modesetting got support for mouse hotspots via the hotspot
properties. Port the legacy kms hotspot handling to the new properties
on cursor planes.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Gurchetan Singh <gurchetansingh@chromium.org>
Cc: Chia-I Wu <olvaffe@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: virtualization@lists.linux-foundation.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-7-aesteve@redhat.com
---
 drivers/gpu/drm/virtio/virtgpu_plane.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index a2e045f3a0004..20de599658c1f 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -323,16 +323,16 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
 		DRM_DEBUG("update, handle %d, pos +%d+%d, hot %d,%d\n", handle,
 			  plane->state->crtc_x,
 			  plane->state->crtc_y,
-			  plane->state->fb ? plane->state->fb->hot_x : 0,
-			  plane->state->fb ? plane->state->fb->hot_y : 0);
+			  plane->state->hotspot_x,
+			  plane->state->hotspot_y);
 		output->cursor.hdr.type =
 			cpu_to_le32(VIRTIO_GPU_CMD_UPDATE_CURSOR);
 		output->cursor.resource_id = cpu_to_le32(handle);
 		if (plane->state->fb) {
 			output->cursor.hot_x =
-				cpu_to_le32(plane->state->fb->hot_x);
+				cpu_to_le32(plane->state->hotspot_x);
 			output->cursor.hot_y =
-				cpu_to_le32(plane->state->fb->hot_y);
+				cpu_to_le32(plane->state->hotspot_y);
 		} else {
 			output->cursor.hot_x = cpu_to_le32(0);
 			output->cursor.hot_y = cpu_to_le32(0);
-- 
GitLab


From bce3dab7eb6ee596388699e8a052a7d58954c472 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:11 +0200
Subject: [PATCH 0446/2340] drm: Remove legacy cursor hotspot code

Atomic modesetting supports mouse cursor offsets via the hotspot
properties that are created on cursor planes. All drivers which
support hotspots are atomic and the legacy code has been implemented
in terms of the atomic properties as well.

Due to the above the lagacy cursor hotspot code is no longer used or
needed and can be removed.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-8-aesteve@redhat.com
---
 drivers/gpu/drm/drm_plane.c   |  3 ---
 include/drm/drm_framebuffer.h | 12 ------------
 2 files changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index eaca367bdc7e7..1dc00ad4c33c3 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1110,9 +1110,6 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
 				return PTR_ERR(fb);
 			}
 
-			fb->hot_x = req->hot_x;
-			fb->hot_y = req->hot_y;
-
 			if (plane->hotspot_x_property && plane->state)
 				plane->state->hotspot_x = req->hot_x;
 			if (plane->hotspot_y_property && plane->state)
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index 80ece7b6dd9b5..668077009fced 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -188,18 +188,6 @@ struct drm_framebuffer {
 	 * DRM_MODE_FB_MODIFIERS.
 	 */
 	int flags;
-	/**
-	 * @hot_x: X coordinate of the cursor hotspot. Used by the legacy cursor
-	 * IOCTL when the driver supports cursor through a DRM_PLANE_TYPE_CURSOR
-	 * universal plane.
-	 */
-	int hot_x;
-	/**
-	 * @hot_y: Y coordinate of the cursor hotspot. Used by the legacy cursor
-	 * IOCTL when the driver supports cursor through a DRM_PLANE_TYPE_CURSOR
-	 * universal plane.
-	 */
-	int hot_y;
 	/**
 	 * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock.
 	 */
-- 
GitLab


From 9724ed6c1b1212d138e63f5e80647dc8b6b86696 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 23 Oct 2023 09:46:12 +0200
Subject: [PATCH 0447/2340] drm: Introduce DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT

Virtualized drivers place additional restrictions on the cursor plane
which breaks the contract of universal planes. To allow atomic
modesettings with virtualized drivers the clients need to advertise
that they're capable of dealing with those extra restrictions.

To do that introduce DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT which
lets DRM know that the client is aware of and capable of dealing with
the extra restrictions on the virtual cursor plane.

Setting this option to true makes DRM expose the cursor plane on
virtualized drivers. The userspace is expected to set the hotspots
and handle mouse events on that plane.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-9-aesteve@redhat.com
---
 drivers/gpu/drm/drm_ioctl.c |  9 +++++++++
 include/uapi/drm/drm.h      | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f461ed862480e..ea303386e6886 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -365,6 +365,15 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 			return -EINVAL;
 		file_priv->writeback_connectors = req->value;
 		break;
+	case DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT:
+		if (!drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT))
+			return -EOPNOTSUPP;
+		if (!file_priv->atomic)
+			return -EINVAL;
+		if (req->value > 1)
+			return -EINVAL;
+		file_priv->supports_virtualized_cursor_plane = req->value;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 796de831f4a04..91943e5ce0105 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -850,6 +850,31 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
 
+/**
+ * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
+ *
+ * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
+ * virtualbox) have additional restrictions for cursor planes (thus
+ * making cursor planes on those drivers not truly universal,) e.g.
+ * they need cursor planes to act like one would expect from a mouse
+ * cursor and have correctly set hotspot properties.
+ * If this client cap is not set the DRM core will hide cursor plane on
+ * those virtualized drivers because not setting it implies that the
+ * client is not capable of dealing with those extra restictions.
+ * Clients which do set cursor hotspot and treat the cursor plane
+ * like a mouse cursor should set this property.
+ * The client must enable &DRM_CLIENT_CAP_ATOMIC first.
+ *
+ * Setting this property on drivers which do not special case
+ * cursor planes (i.e. non-virtualized drivers) will return
+ * EOPNOTSUPP, which can be used by userspace to gauge
+ * requirements of the hardware/drivers they're running on.
+ *
+ * This capability is always supported for atomic-capable virtualized
+ * drivers starting from kernel version 6.6.
+ */
+#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT	6
+
 /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
-- 
GitLab


From 4653f9d014117f78813cae7b022c15b899c77d7b Mon Sep 17 00:00:00 2001
From: Michael Banack <banackm@vmware.com>
Date: Mon, 23 Oct 2023 09:46:13 +0200
Subject: [PATCH 0448/2340] drm: Introduce documentation for hotspot properties

To clarify the intent and reasoning behind the hotspot properties
introduce userspace documentation that goes over cursor handling
in para-virtualized environments.

The documentation is generic enough to not special case for any
specific hypervisor and should apply equally to all.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Michael Banack <banackm@vmware.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-10-aesteve@redhat.com
---
 Documentation/gpu/drm-kms.rst |  6 ++++
 drivers/gpu/drm/drm_plane.c   | 58 ++++++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 270d320407c7c..4a112b8cc7fbe 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -579,6 +579,12 @@ Variable Refresh Properties
 .. kernel-doc:: drivers/gpu/drm/drm_connector.c
    :doc: Variable refresh properties
 
+Cursor Hotspot Properties
+---------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane.c
+   :doc: hotspot properties
+
 Existing KMS Properties
 -----------------------
 
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 1dc00ad4c33c3..f3f2eae83cca8 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -230,6 +230,61 @@ static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane
 	return 0;
 }
 
+/**
+ * DOC: hotspot properties
+ *
+ * HOTSPOT_X: property to set mouse hotspot x offset.
+ * HOTSPOT_Y: property to set mouse hotspot y offset.
+ *
+ * When the plane is being used as a cursor image to display a mouse pointer,
+ * the "hotspot" is the offset within the cursor image where mouse events
+ * are expected to go.
+ *
+ * Positive values move the hotspot from the top-left corner of the cursor
+ * plane towards the right and bottom.
+ *
+ * Most display drivers do not need this information because the
+ * hotspot is not actually connected to anything visible on screen.
+ * However, this is necessary for display drivers like the para-virtualized
+ * drivers (eg qxl, vbox, virtio, vmwgfx), that are attached to a user console
+ * with a mouse pointer.  Since these consoles are often being remoted over a
+ * network, they would otherwise have to wait to display the pointer movement to
+ * the user until a full network round-trip has occurred.  New mouse events have
+ * to be sent from the user's console, over the network to the virtual input
+ * devices, forwarded to the desktop for processing, and then the cursor plane's
+ * position can be updated and sent back to the user's console over the network.
+ * Instead, with the hotspot information, the console can anticipate the new
+ * location, and draw the mouse cursor there before the confirmation comes in.
+ * To do that correctly, the user's console must be able predict how the
+ * desktop will process mouse events, which normally requires the desktop's
+ * mouse topology information, ie where each CRTC sits in the mouse coordinate
+ * space.  This is typically sent to the para-virtualized drivers using some
+ * driver-specific method, and the driver then forwards it to the console by
+ * way of the virtual display device or hypervisor.
+ *
+ * The assumption is generally made that there is only one cursor plane being
+ * used this way at a time, and that the desktop is feeding all mouse devices
+ * into the same global pointer.  Para-virtualized drivers that require this
+ * should only be exposing a single cursor plane, or find some other way
+ * to coordinate with a userspace desktop that supports multiple pointers.
+ * If the hotspot properties are set, the cursor plane is therefore assumed to be
+ * used only for displaying a mouse cursor image, and the position of the combined
+ * cursor plane + offset can therefore be used for coordinating with input from a
+ * mouse device.
+ *
+ * The cursor will then be drawn either at the location of the plane in the CRTC
+ * console, or as a free-floating cursor plane on the user's console
+ * corresponding to their desktop mouse position.
+ *
+ * DRM clients which would like to work correctly on drivers which expose
+ * hotspot properties should advertise DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT.
+ * Setting this property on drivers which do not special case
+ * cursor planes will return EOPNOTSUPP, which can be used by userspace to
+ * gauge requirements of the hardware/drivers they're running on. Advertising
+ * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT implies that the userspace client will be
+ * correctly setting the hotspot properties.
+ */
+
 /**
  * drm_plane_create_hotspot_properties - creates the mouse hotspot
  * properties and attaches them to the given cursor plane
@@ -237,7 +292,8 @@ static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane
  * @plane: drm cursor plane
  *
  * This function enables the mouse hotspot property on a given
- * cursor plane.
+ * cursor plane. Look at the documentation for hotspot properties
+ * to get a better understanding for what they're used for.
  *
  * RETURNS:
  * Zero for success or -errno
-- 
GitLab


From 35ed38d58257336c1df26b14fd5110b026e2adde Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 23 Nov 2023 23:13:00 +0100
Subject: [PATCH 0449/2340] drm: Allow drivers to indicate the damage helpers
 to ignore damage clips

It allows drivers to set a struct drm_plane_state .ignore_damage_clips in
their plane's .atomic_check callback, as an indication to damage helpers
such as drm_atomic_helper_damage_iter_init() that the damage clips should
be ignored.

To be used by drivers that do per-buffer (e.g: virtio-gpu) uploads (rather
than per-plane uploads), since these type of drivers need to handle buffer
damages instead of frame damages.

That way, these drivers could force a full plane update if the framebuffer
attached to a plane's state has changed since the last update (page-flip).

Fixes: 01f05940a9a7 ("drm/virtio: Enable fb damage clips property for the primary plane")
Cc: <stable@vger.kernel.org> # v6.4+
Reported-by: nerdopolis <bluescreen_avenger@verizon.net>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218115
Suggested-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Acked-by: Sima Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123221315.3579454-2-javierm@redhat.com
---
 Documentation/gpu/drm-kms.rst       |  2 ++
 drivers/gpu/drm/drm_damage_helper.c |  3 ++-
 include/drm/drm_plane.h             | 10 ++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 4a112b8cc7fbe..13d3627d8bc08 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -548,6 +548,8 @@ Plane Composition Properties
 .. kernel-doc:: drivers/gpu/drm/drm_blend.c
    :doc: overview
 
+.. _damage_tracking_properties:
+
 Damage Tracking Properties
 --------------------------
 
diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index d8b2955e88fd0..afb02aae707b4 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -241,7 +241,8 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter,
 	iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF);
 	iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF);
 
-	if (!iter->clips || !drm_rect_equals(&state->src, &old_state->src)) {
+	if (!iter->clips || state->ignore_damage_clips ||
+	    !drm_rect_equals(&state->src, &old_state->src)) {
 		iter->clips = NULL;
 		iter->num_clips = 0;
 		iter->full_update = true;
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index e2c671585775b..c6565a6f9324c 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -194,6 +194,16 @@ struct drm_plane_state {
 	 */
 	struct drm_property_blob *fb_damage_clips;
 
+	/**
+	 * @ignore_damage_clips:
+	 *
+	 * Set by drivers to indicate the drm_atomic_helper_damage_iter_init()
+	 * helper that the @fb_damage_clips blob property should be ignored.
+	 *
+	 * See :ref:`damage_tracking_properties` for more information.
+	 */
+	bool ignore_damage_clips;
+
 	/**
 	 * @src:
 	 *
-- 
GitLab


From 0240db231dfe5ee5b7a3a03cba96f0844b7a673d Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 23 Nov 2023 23:13:01 +0100
Subject: [PATCH 0450/2340] drm/virtio: Disable damage clipping if FB changed
 since last page-flip

The driver does per-buffer uploads and needs to force a full plane update
if the plane's attached framebuffer has change since the last page-flip.

Fixes: 01f05940a9a7 ("drm/virtio: Enable fb damage clips property for the primary plane")
Cc: <stable@vger.kernel.org> # v6.4+
Reported-by: nerdopolis <bluescreen_avenger@verizon.net>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218115
Suggested-by: Sima Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Acked-by: Sima Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123221315.3579454-3-javierm@redhat.com
---
 drivers/gpu/drm/virtio/virtgpu_plane.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 20de599658c1f..a72a2dbda031c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -79,6 +79,8 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane,
 {
 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
 										 plane);
+	struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state,
+										 plane);
 	bool is_cursor = plane->type == DRM_PLANE_TYPE_CURSOR;
 	struct drm_crtc_state *crtc_state;
 	int ret;
@@ -86,6 +88,14 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane,
 	if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc))
 		return 0;
 
+	/*
+	 * Ignore damage clips if the framebuffer attached to the plane's state
+	 * has changed since the last plane update (page-flip). In this case, a
+	 * full plane update should happen because uploads are done per-buffer.
+	 */
+	if (old_plane_state->fb != new_plane_state->fb)
+		new_plane_state->ignore_damage_clips = true;
+
 	crtc_state = drm_atomic_get_crtc_state(state,
 					       new_plane_state->crtc);
 	if (IS_ERR(crtc_state))
-- 
GitLab


From b83b2a80d662cc8ba9d78db64fb70fbb5a481d9c Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 23 Nov 2023 23:13:02 +0100
Subject: [PATCH 0451/2340] drm/vmwgfx: Disable damage clipping if FB changed
 since last page-flip

The driver does per-buffer uploads and needs to force a full plane update
if the plane's attached framebuffer has change since the last page-flip.

Suggested-by: Sima Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Acked-by: Sima Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123221315.3579454-4-javierm@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index bea0abc3d4188..5fd0ccaa0b41b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -832,10 +832,21 @@ int vmw_du_primary_plane_atomic_check(struct drm_plane *plane,
 {
 	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
 									   plane);
+	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state,
+									   plane);
 	struct drm_crtc_state *crtc_state = NULL;
 	struct drm_framebuffer *new_fb = new_state->fb;
+	struct drm_framebuffer *old_fb = old_state->fb;
 	int ret;
 
+	/*
+	 * Ignore damage clips if the framebuffer attached to the plane's state
+	 * has changed since the last plane update (page-flip). In this case, a
+	 * full plane update should happen because uploads are done per-buffer.
+	 */
+	if (old_fb != new_fb)
+		new_state->ignore_damage_clips = true;
+
 	if (new_state->crtc)
 		crtc_state = drm_atomic_get_new_crtc_state(state,
 							   new_state->crtc);
-- 
GitLab


From 017bdf8fa20175b9cccbc746122256432a599845 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 23 Nov 2023 23:13:03 +0100
Subject: [PATCH 0452/2340] drm/plane: Extend damage tracking kernel-doc

The "Damage Tracking Properties" section in the documentation doesn't have
info about the two type of damage handling: frame damage vs buffer damage.

Add it to the section and mention that helpers only support frame damage,
and how drivers handling buffer damage can indicate that the damage clips
should be ignored.

Also add references to further documentation about the two damage types.

Suggested-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Acked-by: Sima Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123221315.3579454-5-javierm@redhat.com
---
 drivers/gpu/drm/drm_plane.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index f3f2eae83cca8..9e8e4c60983d6 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1558,6 +1558,36 @@ out:
  * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and
  * drm_atomic_helper_damage_iter_next() helper iterator function to get damage
  * rectangles clipped to &drm_plane_state.src.
+ *
+ * Note that there are two types of damage handling: frame damage and buffer
+ * damage, the type of damage handling implemented depends on a driver's upload
+ * target. Drivers implementing a per-plane or per-CRTC upload target need to
+ * handle frame damage, while drivers implementing a per-buffer upload target
+ * need to handle buffer damage.
+ *
+ * The existing damage helpers only support the frame damage type, there is no
+ * buffer age support or similar damage accumulation algorithm implemented yet.
+ *
+ * Only drivers handling frame damage can use the mentioned damage helpers to
+ * iterate over the damaged regions. Drivers that handle buffer damage, must set
+ * &drm_plane_state.ignore_damage_clips for drm_atomic_helper_damage_iter_init()
+ * to know that damage clips should be ignored and return &drm_plane_state.src
+ * as the damage rectangle, to force a full plane update.
+ *
+ * Drivers with a per-buffer upload target could compare the &drm_plane_state.fb
+ * of the old and new plane states to determine if the framebuffer attached to a
+ * plane has changed or not since the last plane update. If &drm_plane_state.fb
+ * has changed, then &drm_plane_state.ignore_damage_clips must be set to true.
+ *
+ * That is because drivers with a per-plane upload target, expect the backing
+ * storage buffer to not change for a given plane. If the upload buffer changes
+ * between page flips, the new upload buffer has to be updated as a whole. This
+ * can be improved in the future if support for frame damage is added to the DRM
+ * damage helpers, similarly to how user-space already handle this case as it is
+ * explained in the following documents:
+ *
+ *     https://registry.khronos.org/EGL/extensions/KHR/EGL_KHR_swap_buffers_with_damage.txt
+ *     https://emersion.fr/blog/2019/intro-to-damage-tracking/
  */
 
 /**
-- 
GitLab


From 6c18005d8fabe8a6835fc0f96102d4269199e05b Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 23 Nov 2023 23:13:04 +0100
Subject: [PATCH 0453/2340] drm/todo: Add entry about implementing buffer age
 for damage tracking

Currently, only damage tracking for frame damage is supported. If a driver
needs to do buffer damage (e.g: the framebuffer attached to plane's state
has changed since the last page-flip), the damage helpers just fallback to
a full plane update.

Add en entry in the TODO about implementing buffer age or any other damage
accumulation algorithm for buffer damage handling.

Suggested-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Acked-by: Sima Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123221315.3579454-6-javierm@redhat.com
---
 Documentation/gpu/todo.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index b62c7fa0c2bcc..503d57c752159 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -782,6 +782,29 @@ Contact: Hans de Goede
 
 Level: Advanced
 
+Buffer age or other damage accumulation algorithm for buffer damage
+===================================================================
+
+Drivers that do per-buffer uploads, need a buffer damage handling (rather than
+frame damage like drivers that do per-plane or per-CRTC uploads), but there is
+no support to get the buffer age or any other damage accumulation algorithm.
+
+For this reason, the damage helpers just fallback to a full plane update if the
+framebuffer attached to a plane has changed since the last page-flip. Drivers
+set &drm_plane_state.ignore_damage_clips to true as indication to
+drm_atomic_helper_damage_iter_init() and drm_atomic_helper_damage_iter_next()
+helpers that the damage clips should be ignored.
+
+This should be improved to get damage tracking properly working on drivers that
+do per-buffer uploads.
+
+More information about damage tracking and references to learning materials can
+be found in :ref:`damage_tracking_properties`.
+
+Contact: Javier Martinez Canillas <javierm@redhat.com>
+
+Level: Advanced
+
 Outside DRM
 ===========
 
-- 
GitLab


From 014f831abcb82738e57c0b00db66dfef0798ed67 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Mon, 13 Nov 2023 23:12:00 +0100
Subject: [PATCH 0454/2340] drm/nouveau: use GPUVM common infrastructure

GPUVM provides common infrastructure to track external and evicted GEM
objects as well as locking and validation helpers.

Especially external and evicted object tracking is a huge improvement
compared to the current brute force approach of iterating all mappings
in order to lock and validate the GPUVM's GEM objects. Hence, make us of
it.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113221202.7203-1-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c    |   4 +-
 drivers/gpu/drm/nouveau/nouveau_exec.c  |  57 +++-------
 drivers/gpu/drm/nouveau/nouveau_exec.h  |   4 -
 drivers/gpu/drm/nouveau/nouveau_sched.c |   9 +-
 drivers/gpu/drm/nouveau/nouveau_sched.h |   7 +-
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 134 +++++++++++++-----------
 6 files changed, 100 insertions(+), 115 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7afad86da64b7..b7dda486a7eac 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1061,17 +1061,18 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	struct drm_gem_object *obj = &bo->base;
 	struct ttm_resource *old_reg = bo->resource;
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
-
 	if (new_reg->mem_type == TTM_PL_TT) {
 		ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
 		if (ret)
 			return ret;
 	}
 
+	drm_gpuvm_bo_gem_evict(obj, evict);
 	nouveau_bo_move_ntfy(bo, new_reg);
 	ret = ttm_bo_wait_ctx(bo, ctx);
 	if (ret)
@@ -1136,6 +1137,7 @@ out:
 out_ntfy:
 	if (ret) {
 		nouveau_bo_move_ntfy(bo, bo->resource);
+		drm_gpuvm_bo_gem_evict(obj, !evict);
 	}
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index 9a5ef574744b3..f8ef7004d1338 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: MIT
 
-#include <drm/drm_exec.h>
-
 #include "nouveau_drv.h"
 #include "nouveau_gem.h"
 #include "nouveau_mem.h"
@@ -86,14 +84,12 @@
  */
 
 static int
-nouveau_exec_job_submit(struct nouveau_job *job)
+nouveau_exec_job_submit(struct nouveau_job *job,
+			struct drm_gpuvm_exec *vme)
 {
 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 	struct nouveau_cli *cli = job->cli;
 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
-	struct drm_exec *exec = &job->exec;
-	struct drm_gem_object *obj;
-	unsigned long index;
 	int ret;
 
 	/* Create a new fence, but do not emit yet. */
@@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_job *job)
 		return ret;
 
 	nouveau_uvmm_lock(uvmm);
-	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
-			    DRM_EXEC_IGNORE_DUPLICATES);
-	drm_exec_until_all_locked(exec) {
-		struct drm_gpuva *va;
-
-		drm_gpuvm_for_each_va(va, &uvmm->base) {
-			if (unlikely(va == &uvmm->base.kernel_alloc_node))
-				continue;
-
-			ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
-			drm_exec_retry_on_contention(exec);
-			if (ret)
-				goto err_uvmm_unlock;
-		}
+	ret = drm_gpuvm_exec_lock(vme);
+	if (ret) {
+		nouveau_uvmm_unlock(uvmm);
+		return ret;
 	}
 	nouveau_uvmm_unlock(uvmm);
 
-	drm_exec_for_each_locked_object(exec, index, obj) {
-		struct nouveau_bo *nvbo = nouveau_gem_object(obj);
-
-		ret = nouveau_bo_validate(nvbo, true, false);
-		if (ret)
-			goto err_exec_fini;
+	ret = drm_gpuvm_exec_validate(vme);
+	if (ret) {
+		drm_gpuvm_exec_unlock(vme);
+		return ret;
 	}
 
 	return 0;
-
-err_uvmm_unlock:
-	nouveau_uvmm_unlock(uvmm);
-err_exec_fini:
-	drm_exec_fini(exec);
-	return ret;
-
 }
 
 static void
-nouveau_exec_job_armed_submit(struct nouveau_job *job)
+nouveau_exec_job_armed_submit(struct nouveau_job *job,
+			      struct drm_gpuvm_exec *vme)
 {
-	struct drm_exec *exec = &job->exec;
-	struct drm_gem_object *obj;
-	unsigned long index;
-
-	drm_exec_for_each_locked_object(exec, index, obj)
-		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
-	drm_exec_fini(exec);
+	drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+				      job->resv_usage, job->resv_usage);
+	drm_gpuvm_exec_unlock(vme);
 }
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h
index 5488d337bcc0e..5a8fe1208a444 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.h
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
@@ -3,16 +3,12 @@
 #ifndef __NOUVEAU_EXEC_H__
 #define __NOUVEAU_EXEC_H__
 
-#include <drm/drm_exec.h>
-
 #include "nouveau_drv.h"
 #include "nouveau_sched.h"
 
 struct nouveau_exec_job_args {
 	struct drm_file *file_priv;
 	struct nouveau_sched_entity *sched_entity;
-
-	struct drm_exec exec;
 	struct nouveau_channel *chan;
 
 	struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index e03fd2bc8a11b..f69f4cbf251be 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -263,6 +263,11 @@ nouveau_job_submit(struct nouveau_job *job)
 {
 	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
 	struct dma_fence *done_fence = NULL;
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &nouveau_cli_uvmm(job->cli)->base,
+		.flags = DRM_EXEC_IGNORE_DUPLICATES,
+		.num_fences = 1,
+	};
 	int ret;
 
 	ret = nouveau_job_add_deps(job);
@@ -282,7 +287,7 @@ nouveau_job_submit(struct nouveau_job *job)
 	 * successfully.
 	 */
 	if (job->ops->submit) {
-		ret = job->ops->submit(job);
+		ret = job->ops->submit(job, &vm_exec);
 		if (ret)
 			goto err_cleanup;
 	}
@@ -315,7 +320,7 @@ nouveau_job_submit(struct nouveau_job *job)
 	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
 
 	if (job->ops->armed_submit)
-		job->ops->armed_submit(job);
+		job->ops->armed_submit(job, &vm_exec);
 
 	nouveau_job_fence_attach(job);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index 27ac19792597b..0f87697dbc9ee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -5,7 +5,7 @@
 
 #include <linux/types.h>
 
-#include <drm/drm_exec.h>
+#include <drm/drm_gpuvm.h>
 #include <drm/gpu_scheduler.h>
 
 #include "nouveau_drv.h"
@@ -54,7 +54,6 @@ struct nouveau_job {
 	struct drm_file *file_priv;
 	struct nouveau_cli *cli;
 
-	struct drm_exec exec;
 	enum dma_resv_usage resv_usage;
 	struct dma_fence *done_fence;
 
@@ -76,8 +75,8 @@ struct nouveau_job {
 		/* If .submit() returns without any error, it is guaranteed that
 		 * armed_submit() is called.
 		 */
-		int (*submit)(struct nouveau_job *);
-		void (*armed_submit)(struct nouveau_job *);
+		int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
+		void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
 		struct dma_fence *(*run)(struct nouveau_job *);
 		void (*free)(struct nouveau_job *);
 		enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index eda7bb8624f11..90e6732ee9b5e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -438,8 +438,9 @@ nouveau_uvma_region_complete(struct nouveau_uvma_region *reg)
 static void
 op_map_prepare_unwind(struct nouveau_uvma *uvma)
 {
+	struct drm_gpuva *va = &uvma->va;
 	nouveau_uvma_gem_put(uvma);
-	drm_gpuva_remove(&uvma->va);
+	drm_gpuva_remove(va);
 	nouveau_uvma_free(uvma);
 }
 
@@ -468,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
 			break;
 		case DRM_GPUVA_OP_REMAP: {
 			struct drm_gpuva_op_remap *r = &op->remap;
+			struct drm_gpuva *va = r->unmap->va;
 
 			if (r->next)
 				op_map_prepare_unwind(new->next);
@@ -475,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
 			if (r->prev)
 				op_map_prepare_unwind(new->prev);
 
-			op_unmap_prepare_unwind(r->unmap->va);
+			op_unmap_prepare_unwind(va);
 			break;
 		}
 		case DRM_GPUVA_OP_UNMAP:
@@ -634,6 +636,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
 					goto unwind;
 				}
 			}
+
 			break;
 		}
 		case DRM_GPUVA_OP_REMAP: {
@@ -1135,12 +1138,53 @@ bind_link_gpuvas(struct bind_job_op *bop)
 }
 
 static int
-nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
+bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec,
+		   unsigned int num_fences)
+{
+	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+	struct bind_job_op *op;
+	int ret;
+
+	list_for_each_op(op, &bind_job->ops) {
+		struct drm_gpuva_op *va_op;
+
+		if (!op->ops)
+			continue;
+
+		drm_gpuva_for_each_op(va_op, op->ops) {
+			struct drm_gem_object *obj = op_gem_obj(va_op);
+
+			if (unlikely(!obj))
+				continue;
+
+			ret = drm_exec_prepare_obj(exec, obj, num_fences);
+			if (ret)
+				return ret;
+
+			/* Don't validate GEMs backing mappings we're about to
+			 * unmap, it's not worth the effort.
+			 */
+			if (va_op->op == DRM_GPUVA_OP_UNMAP)
+				continue;
+
+			ret = nouveau_bo_validate(nouveau_gem_object(obj),
+						  true, false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int
+nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
+			     struct drm_gpuvm_exec *vme)
 {
 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
 	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
 	struct nouveau_sched_entity *entity = job->entity;
-	struct drm_exec *exec = &job->exec;
+	struct drm_exec *exec = &vme->exec;
 	struct bind_job_op *op;
 	int ret;
 
@@ -1157,6 +1201,8 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
 			dma_resv_unlock(obj->resv);
 			if (IS_ERR(op->vm_bo))
 				return PTR_ERR(op->vm_bo);
+
+			drm_gpuvm_bo_extobj_add(op->vm_bo);
 		}
 
 		ret = bind_validate_op(job, op);
@@ -1179,6 +1225,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
 	 * unwind all GPU VA space changes on failure.
 	 */
 	nouveau_uvmm_lock(uvmm);
+
 	list_for_each_op(op, &bind_job->ops) {
 		switch (op->op) {
 		case OP_MAP_SPARSE:
@@ -1290,55 +1337,13 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
 		}
 	}
 
-	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
-			    DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(exec, vme->flags);
 	drm_exec_until_all_locked(exec) {
-		list_for_each_op(op, &bind_job->ops) {
-			struct drm_gpuva_op *va_op;
-
-			if (IS_ERR_OR_NULL(op->ops))
-				continue;
-
-			drm_gpuva_for_each_op(va_op, op->ops) {
-				struct drm_gem_object *obj = op_gem_obj(va_op);
-
-				if (unlikely(!obj))
-					continue;
-
-				ret = drm_exec_prepare_obj(exec, obj, 1);
-				drm_exec_retry_on_contention(exec);
-				if (ret) {
-					op = list_last_op(&bind_job->ops);
-					goto unwind;
-				}
-			}
-		}
-	}
-
-	list_for_each_op(op, &bind_job->ops) {
-		struct drm_gpuva_op *va_op;
-
-		if (IS_ERR_OR_NULL(op->ops))
-			continue;
-
-		drm_gpuva_for_each_op(va_op, op->ops) {
-			struct drm_gem_object *obj = op_gem_obj(va_op);
-
-			if (unlikely(!obj))
-				continue;
-
-			/* Don't validate GEMs backing mappings we're about to
-			 * unmap, it's not worth the effort.
-			 */
-			if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP))
-				continue;
-
-			ret = nouveau_bo_validate(nouveau_gem_object(obj),
-						  true, false);
-			if (ret) {
-				op = list_last_op(&bind_job->ops);
-				goto unwind;
-			}
+		ret = bind_lock_validate(job, exec, vme->num_fences);
+		drm_exec_retry_on_contention(exec);
+		if (ret) {
+			op = list_last_op(&bind_job->ops);
+			goto unwind;
 		}
 	}
 
@@ -1413,21 +1418,17 @@ unwind:
 	}
 
 	nouveau_uvmm_unlock(uvmm);
-	drm_exec_fini(exec);
+	drm_gpuvm_exec_unlock(vme);
 	return ret;
 }
 
 static void
-nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job)
+nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job,
+				   struct drm_gpuvm_exec *vme)
 {
-	struct drm_exec *exec = &job->exec;
-	struct drm_gem_object *obj;
-	unsigned long index;
-
-	drm_exec_for_each_locked_object(exec, index, obj)
-		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
-	drm_exec_fini(exec);
+	drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+				      job->resv_usage, job->resv_usage);
+	drm_gpuvm_exec_unlock(vme);
 }
 
 static struct dma_fence *
@@ -1815,8 +1816,17 @@ nouveau_uvmm_free(struct drm_gpuvm *gpuvm)
 	kfree(uvmm);
 }
 
+static int
+nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+	struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj);
+
+	return nouveau_bo_validate(nvbo, true, false);
+}
+
 static const struct drm_gpuvm_ops gpuvm_ops = {
 	.vm_free = nouveau_uvmm_free,
+	.vm_bo_validate = nouveau_uvmm_bo_validate,
 };
 
 int
-- 
GitLab


From 5f03a507b29e44a848f315c7240c19894dd8be4f Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Tue, 14 Nov 2023 01:27:24 +0100
Subject: [PATCH 0455/2340] drm/nouveau: implement 1:1 scheduler - entity
 relationship

Recent patches to the DRM scheduler [1][2] allow for a variable number
of run-queues and add support for (shared) workqueues rather than
dedicated kthreads per scheduler. This allows us to create a 1:1
relationship between a GPU scheduler and a scheduler entity, in order to
properly support firmware schedulers being able to handle an arbitrary
amount of dynamically allocated command ring buffers. This perfectly
matches Nouveau's needs, hence make use of it.

Topology wise we create one scheduler instance per client (handling
VM_BIND jobs) and one scheduler instance per channel (handling EXEC
jobs).

All channel scheduler instances share a workqueue, but every client
scheduler instance has a dedicated workqueue. The latter is required to
ensure that for VM_BIND job's free_job() work and run_job() work can
always run concurrently and hence, free_job() work can never stall
run_job() work. For EXEC jobs we don't have this requirement, since EXEC
job's free_job() does not require to take any locks which indirectly or
directly are held for allocations elsewhere.

[1] https://lore.kernel.org/all/8f53f7ef-7621-4f0b-bdef-d8d20bc497ff@redhat.com/T/
[2] https://lore.kernel.org/all/20231031032439.1558703-1-matthew.brost@intel.com/T/

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114002728.3491-1-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c |  18 +--
 drivers/gpu/drm/nouveau/nouveau_abi16.h |   2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c   |  31 ++--
 drivers/gpu/drm/nouveau/nouveau_drv.h   |   9 +-
 drivers/gpu/drm/nouveau/nouveau_exec.c  |   7 +-
 drivers/gpu/drm/nouveau/nouveau_exec.h  |   2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c | 197 +++++++++++++-----------
 drivers/gpu/drm/nouveau/nouveau_sched.h |  35 ++---
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  |  69 +++------
 drivers/gpu/drm/nouveau/nouveau_uvmm.h  |   4 +-
 10 files changed, 179 insertions(+), 195 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 2edd7bb13faea..7542e17871831 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 {
 	struct nouveau_abi16_ntfy *ntfy, *temp;
 
-	/* When a client exits without waiting for it's queued up jobs to
-	 * finish it might happen that we fault the channel. This is due to
-	 * drm_file_free() calling drm_gem_release() before the postclose()
-	 * callback. Hence, we can't tear down this scheduler entity before
-	 * uvmm mappings are unmapped. Currently, we can't detect this case.
-	 *
-	 * However, this should be rare and harmless, since the channel isn't
-	 * needed anymore.
-	 */
-	nouveau_sched_entity_fini(&chan->sched_entity);
+	/* Cancel all jobs from the entity's queue. */
+	drm_sched_entity_fini(&chan->sched.entity);
 
-	/* wait for all activity to stop before cleaning up */
 	if (chan->chan)
 		nouveau_channel_idle(chan->chan);
 
+	nouveau_sched_fini(&chan->sched);
+
 	/* cleanup notifier state */
 	list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
 		nouveau_abi16_ntfy_fini(chan, ntfy);
@@ -344,8 +337,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (ret)
 		goto done;
 
-	ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched,
-					drm->sched_wq);
+	ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq);
 	if (ret)
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 9f538486c10e3..1f5e243c0c759 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
 	struct nouveau_bo *ntfy;
 	struct nouveau_vma *ntfy_vma;
 	struct nvkm_mm  heap;
-	struct nouveau_sched_entity sched_entity;
+	struct nouveau_sched sched;
 };
 
 struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index f603eaef15608..7e5f19153829e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -201,9 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli)
 	WARN_ON(!list_empty(&cli->worker));
 
 	usif_client_fini(cli);
+	nouveau_sched_fini(&cli->sched);
 	if (uvmm)
 		nouveau_uvmm_fini(uvmm);
-	nouveau_sched_entity_fini(&cli->sched_entity);
 	nouveau_vmm_fini(&cli->svm);
 	nouveau_vmm_fini(&cli->vmm);
 	nvif_mmu_dtor(&cli->mmu);
@@ -310,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 
 	cli->mem = &mems[ret];
 
-	ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched,
-					drm->sched_wq);
+	/* Don't pass in the (shared) sched_wq in order to let
+	 * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+	 *
+	 * This is required to ensure that for VM_BIND jobs free_job() work and
+	 * run_job() work can always run concurrently and hence, free_job() work
+	 * can never stall run_job() work. For EXEC jobs we don't have this
+	 * requirement, since EXEC job's free_job() does not require to take any
+	 * locks which indirectly or directly are held for allocations
+	 * elsewhere.
+	 */
+	ret = nouveau_sched_init(&cli->sched, drm, NULL);
 	if (ret)
 		goto done;
 
@@ -582,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev)
 	nvif_parent_ctor(&nouveau_parent, &drm->parent);
 	drm->master.base.object.parent = &drm->parent;
 
-	ret = nouveau_sched_init(drm);
-	if (ret)
+	drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0,
+					WQ_MAX_ACTIVE);
+	if (!drm->sched_wq) {
+		ret = -ENOMEM;
 		goto fail_alloc;
+	}
 
 	ret = nouveau_cli_init(drm, "DRM-master", &drm->master);
 	if (ret)
-		goto fail_sched;
+		goto fail_wq;
 
 	ret = nouveau_cli_init(drm, "DRM", &drm->client);
 	if (ret)
@@ -658,8 +670,8 @@ fail_ttm:
 	nouveau_cli_fini(&drm->client);
 fail_master:
 	nouveau_cli_fini(&drm->master);
-fail_sched:
-	nouveau_sched_fini(drm);
+fail_wq:
+	destroy_workqueue(drm->sched_wq);
 fail_alloc:
 	nvif_parent_dtor(&drm->parent);
 	kfree(drm);
@@ -711,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev)
 	}
 	mutex_unlock(&drm->clients_lock);
 
-	nouveau_sched_fini(drm);
-
 	nouveau_cli_fini(&drm->client);
 	nouveau_cli_fini(&drm->master);
+	destroy_workqueue(drm->sched_wq);
 	nvif_parent_dtor(&drm->parent);
 	mutex_destroy(&drm->clients_lock);
 	kfree(drm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 7f7051df84a6d..8a6d94c8b1631 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -98,7 +98,7 @@ struct nouveau_cli {
 		bool disabled;
 	} uvmm;
 
-	struct nouveau_sched_entity sched_entity;
+	struct nouveau_sched sched;
 
 	const struct nvif_mclass *mem;
 
@@ -258,6 +258,9 @@ struct nouveau_drm {
 		u64 context_base;
 	} *runl;
 
+	/* Workqueue used for channel schedulers. */
+	struct workqueue_struct *sched_wq;
+
 	/* context for accelerated drm-internal operations */
 	struct nouveau_channel *cechan;
 	struct nouveau_channel *channel;
@@ -298,10 +301,6 @@ struct nouveau_drm {
 		struct mutex lock;
 		bool component_registered;
 	} audio;
-
-	struct drm_gpu_scheduler sched;
-	struct workqueue_struct *sched_wq;
-
 };
 
 static inline struct nouveau_drm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index f8ef7004d1338..a179eeb6df093 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -165,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job)
 {
 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 
+	nouveau_job_done(job);
 	nouveau_job_free(job);
 
 	kfree(exec_job->fence);
@@ -184,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job)
 	NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
 		  chan->chid);
 
-	nouveau_sched_entity_fini(job->entity);
-
 	return DRM_GPU_SCHED_STAT_NOMINAL;
 }
 
@@ -234,7 +233,7 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob,
 
 	job->chan = __args->chan;
 
-	args.sched_entity = __args->sched_entity;
+	args.sched = __args->sched;
 	args.file_priv = __args->file_priv;
 
 	args.in_sync.count = __args->in_sync.count;
@@ -388,7 +387,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
 	if (ret)
 		goto out;
 
-	args.sched_entity = &chan16->sched_entity;
+	args.sched = &chan16->sched;
 	args.file_priv = file_priv;
 	args.chan = chan;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h
index 5a8fe1208a444..9b3b151facfd8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.h
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
@@ -8,7 +8,7 @@
 
 struct nouveau_exec_job_args {
 	struct drm_file *file_priv;
-	struct nouveau_sched_entity *sched_entity;
+	struct nouveau_sched *sched;
 	struct nouveau_channel *chan;
 
 	struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index f69f4cbf251be..0f4583f6fcad3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -12,30 +12,29 @@
 #include "nouveau_abi16.h"
 #include "nouveau_sched.h"
 
-/* FIXME
- *
- * We want to make sure that jobs currently executing can't be deferred by
- * other jobs competing for the hardware. Otherwise we might end up with job
- * timeouts just because of too many clients submitting too many jobs. We don't
- * want jobs to time out because of system load, but because of the job being
- * too bulky.
- *
- * For now allow for up to 16 concurrent jobs in flight until we know how many
- * rings the hardware can process in parallel.
- */
-#define NOUVEAU_SCHED_HW_SUBMISSIONS		16
+#define NOUVEAU_SCHED_HW_SUBMISSIONS		1
 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000
 
+/* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
+ * index to the run-queue array.
+ */
+enum nouveau_sched_priority {
+	NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_MIN,
+	NOUVEAU_SCHED_PRIORITY_COUNT,
+};
+
 int
 nouveau_job_init(struct nouveau_job *job,
 		 struct nouveau_job_args *args)
 {
-	struct nouveau_sched_entity *entity = args->sched_entity;
+	struct nouveau_sched *sched = args->sched;
 	int ret;
 
+	INIT_LIST_HEAD(&job->entry);
+
 	job->file_priv = args->file_priv;
 	job->cli = nouveau_cli(args->file_priv);
-	job->entity = entity;
+	job->sched = sched;
 
 	job->sync = args->sync;
 	job->resv_usage = args->resv_usage;
@@ -89,7 +88,7 @@ nouveau_job_init(struct nouveau_job *job,
 
 	}
 
-	ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
+	ret = drm_sched_job_init(&job->base, &sched->entity, 1, NULL);
 	if (ret)
 		goto err_free_chains;
 
@@ -108,6 +107,27 @@ err_free_in_sync:
 return ret;
 }
 
+void
+nouveau_job_fini(struct nouveau_job *job)
+{
+	dma_fence_put(job->done_fence);
+	drm_sched_job_cleanup(&job->base);
+
+	job->ops->free(job);
+}
+
+void
+nouveau_job_done(struct nouveau_job *job)
+{
+	struct nouveau_sched *sched = job->sched;
+
+	spin_lock(&sched->job.list.lock);
+	list_del(&job->entry);
+	spin_unlock(&sched->job.list.lock);
+
+	wake_up(&sched->job.wq);
+}
+
 void
 nouveau_job_free(struct nouveau_job *job)
 {
@@ -117,13 +137,6 @@ nouveau_job_free(struct nouveau_job *job)
 	kfree(job->out_sync.chains);
 }
 
-void nouveau_job_fini(struct nouveau_job *job)
-{
-	dma_fence_put(job->done_fence);
-	drm_sched_job_cleanup(&job->base);
-	job->ops->free(job);
-}
-
 static int
 sync_find_fence(struct nouveau_job *job,
 		struct drm_nouveau_sync *sync,
@@ -261,7 +274,7 @@ nouveau_job_fence_attach(struct nouveau_job *job)
 int
 nouveau_job_submit(struct nouveau_job *job)
 {
-	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
+	struct nouveau_sched *sched = job->sched;
 	struct dma_fence *done_fence = NULL;
 	struct drm_gpuvm_exec vm_exec = {
 		.vm = &nouveau_cli_uvmm(job->cli)->base,
@@ -281,7 +294,7 @@ nouveau_job_submit(struct nouveau_job *job)
 	/* Make sure the job appears on the sched_entity's queue in the same
 	 * order as it was submitted.
 	 */
-	mutex_lock(&entity->mutex);
+	mutex_lock(&sched->mutex);
 
 	/* Guarantee we won't fail after the submit() callback returned
 	 * successfully.
@@ -292,33 +305,16 @@ nouveau_job_submit(struct nouveau_job *job)
 			goto err_cleanup;
 	}
 
+	/* Submit was successful; add the job to the schedulers job list. */
+	spin_lock(&sched->job.list.lock);
+	list_add(&job->entry, &sched->job.list.head);
+	spin_unlock(&sched->job.list.lock);
+
 	drm_sched_job_arm(&job->base);
 	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
 	if (job->sync)
 		done_fence = dma_fence_get(job->done_fence);
 
-	/* If a sched job depends on a dma-fence from a job from the same GPU
-	 * scheduler instance, but a different scheduler entity, the GPU
-	 * scheduler does only wait for the particular job to be scheduled,
-	 * rather than for the job to fully complete. This is due to the GPU
-	 * scheduler assuming that there is a scheduler instance per ring.
-	 * However, the current implementation, in order to avoid arbitrary
-	 * amounts of kthreads, has a single scheduler instance while scheduler
-	 * entities represent rings.
-	 *
-	 * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
-	 * out-fences in order to force the scheduler to wait for full job
-	 * completion for dependent jobs from different entities and same
-	 * scheduler instance.
-	 *
-	 * There is some work in progress [1] to address the issues of firmware
-	 * schedulers; once it is in-tree the scheduler topology in Nouveau
-	 * should be re-worked accordingly.
-	 *
-	 * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
-	 */
-	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
-
 	if (job->ops->armed_submit)
 		job->ops->armed_submit(job, &vm_exec);
 
@@ -331,7 +327,7 @@ nouveau_job_submit(struct nouveau_job *job)
 
 	drm_sched_entity_push_job(&job->base);
 
-	mutex_unlock(&entity->mutex);
+	mutex_unlock(&sched->mutex);
 
 	if (done_fence) {
 		dma_fence_wait(done_fence, true);
@@ -341,20 +337,13 @@ nouveau_job_submit(struct nouveau_job *job)
 	return 0;
 
 err_cleanup:
-	mutex_unlock(&entity->mutex);
+	mutex_unlock(&sched->mutex);
 	nouveau_job_fence_attach_cleanup(job);
 err:
 	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
 	return ret;
 }
 
-bool
-nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
-			   struct work_struct *work)
-{
-	return queue_work(entity->sched_wq, work);
-}
-
 static struct dma_fence *
 nouveau_job_run(struct nouveau_job *job)
 {
@@ -404,50 +393,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
 	nouveau_job_fini(job);
 }
 
-int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
-			      struct drm_gpu_scheduler *sched,
-			      struct workqueue_struct *sched_wq)
-{
-	mutex_init(&entity->mutex);
-	spin_lock_init(&entity->job.list.lock);
-	INIT_LIST_HEAD(&entity->job.list.head);
-	init_waitqueue_head(&entity->job.wq);
-
-	entity->sched_wq = sched_wq;
-	return drm_sched_entity_init(&entity->base,
-				     DRM_SCHED_PRIORITY_NORMAL,
-				     &sched, 1, NULL);
-}
-
-void
-nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
-{
-	drm_sched_entity_destroy(&entity->base);
-}
-
 static const struct drm_sched_backend_ops nouveau_sched_ops = {
 	.run_job = nouveau_sched_run_job,
 	.timedout_job = nouveau_sched_timedout_job,
 	.free_job = nouveau_sched_free_job,
 };
 
-int nouveau_sched_init(struct nouveau_drm *drm)
+int
+nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
+		   struct workqueue_struct *wq)
 {
-	struct drm_gpu_scheduler *sched = &drm->sched;
+	struct drm_gpu_scheduler *drm_sched = &sched->base;
+	struct drm_sched_entity *entity = &sched->entity;
 	long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
+	int ret;
 
-	drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
-	if (!drm->sched_wq)
-		return -ENOMEM;
+	if (!wq) {
+		wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
+				     current->pid);
+		if (!wq)
+			return -ENOMEM;
+
+		sched->wq = wq;
+	}
 
-	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
-			      DRM_SCHED_PRIORITY_COUNT,
-			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
-			      NULL, NULL, "nouveau_sched", drm->dev->dev);
+	ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
+			     NOUVEAU_SCHED_PRIORITY_COUNT,
+			     NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
+			     NULL, NULL, "nouveau_sched", drm->dev->dev);
+	if (ret)
+		goto fail_wq;
+
+	/* Using DRM_SCHED_PRIORITY_MIN, since that's what we're required to use
+	 * when we want to have a single run-queue only.
+	 *
+	 * It's not documented, but one will find out when trying to use any
+	 * other priority running into faults, because the scheduler uses the
+	 * priority as array index.
+	 *
+	 * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
+	 * matching the enum type used in drm_sched_entity_init().
+	 */
+	ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_MIN,
+				    &drm_sched, 1, NULL);
+	if (ret)
+		goto fail_sched;
+
+	mutex_init(&sched->mutex);
+	spin_lock_init(&sched->job.list.lock);
+	INIT_LIST_HEAD(&sched->job.list.head);
+	init_waitqueue_head(&sched->job.wq);
+
+	return 0;
+
+fail_sched:
+	drm_sched_fini(drm_sched);
+fail_wq:
+	if (sched->wq)
+		destroy_workqueue(sched->wq);
+	return ret;
 }
 
-void nouveau_sched_fini(struct nouveau_drm *drm)
+void
+nouveau_sched_fini(struct nouveau_sched *sched)
 {
-	destroy_workqueue(drm->sched_wq);
-	drm_sched_fini(&drm->sched);
+	struct drm_gpu_scheduler *drm_sched = &sched->base;
+	struct drm_sched_entity *entity = &sched->entity;
+
+	rmb(); /* for list_empty to work without lock */
+	wait_event(sched->job.wq, list_empty(&sched->job.list.head));
+
+	drm_sched_entity_fini(entity);
+	drm_sched_fini(drm_sched);
+
+	/* Destroy workqueue after scheduler tear down, otherwise it might still
+	 * be in use.
+	 */
+	if (sched->wq)
+		destroy_workqueue(sched->wq);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index 0f87697dbc9ee..23eff4b0f5b2c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -26,7 +26,7 @@ enum nouveau_job_state {
 
 struct nouveau_job_args {
 	struct drm_file *file_priv;
-	struct nouveau_sched_entity *sched_entity;
+	struct nouveau_sched *sched;
 
 	enum dma_resv_usage resv_usage;
 	bool sync;
@@ -49,7 +49,8 @@ struct nouveau_job {
 
 	enum nouveau_job_state state;
 
-	struct nouveau_sched_entity *entity;
+	struct nouveau_sched *sched;
+	struct list_head entry;
 
 	struct drm_file *file_priv;
 	struct nouveau_cli *cli;
@@ -89,20 +90,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args,
 
 int nouveau_job_init(struct nouveau_job *job,
 		     struct nouveau_job_args *args);
-void nouveau_job_free(struct nouveau_job *job);
-
-int nouveau_job_submit(struct nouveau_job *job);
 void nouveau_job_fini(struct nouveau_job *job);
+int nouveau_job_submit(struct nouveau_job *job);
+void nouveau_job_done(struct nouveau_job *job);
+void nouveau_job_free(struct nouveau_job *job);
 
-#define to_nouveau_sched_entity(entity)		\
-		container_of((entity), struct nouveau_sched_entity, base)
-
-struct nouveau_sched_entity {
-	struct drm_sched_entity base;
+struct nouveau_sched {
+	struct drm_gpu_scheduler base;
+	struct drm_sched_entity entity;
+	struct workqueue_struct *wq;
 	struct mutex mutex;
 
-	struct workqueue_struct *sched_wq;
-
 	struct {
 		struct {
 			struct list_head head;
@@ -112,15 +110,8 @@ struct nouveau_sched_entity {
 	} job;
 };
 
-int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
-			      struct drm_gpu_scheduler *sched,
-			      struct workqueue_struct *sched_wq);
-void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity);
-
-bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
-				struct work_struct *work);
-
-int nouveau_sched_init(struct nouveau_drm *drm);
-void nouveau_sched_fini(struct nouveau_drm *drm);
+int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
+		       struct workqueue_struct *wq);
+void nouveau_sched_fini(struct nouveau_sched *sched);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 90e6732ee9b5e..46493fcb82fd2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -963,6 +963,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref)
 {
 	struct nouveau_uvmm_bind_job *job =
 		container_of(kref, struct nouveau_uvmm_bind_job, kref);
+	struct bind_job_op *op, *next;
+
+	list_for_each_op_safe(op, next, &job->ops) {
+		list_del(&op->entry);
+		kfree(op);
+	}
 
 	nouveau_job_free(&job->base);
 	kfree(job);
@@ -1004,14 +1010,16 @@ bind_validate_op(struct nouveau_job *job,
 static void
 bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
 {
-	struct nouveau_uvmm_bind_job *bind_job;
-	struct nouveau_sched_entity *entity = job->entity;
+	struct nouveau_sched *sched = job->sched;
+	struct nouveau_job *__job;
 	struct bind_job_op *op;
 	u64 end = addr + range;
 
 again:
-	spin_lock(&entity->job.list.lock);
-	list_for_each_entry(bind_job, &entity->job.list.head, entry) {
+	spin_lock(&sched->job.list.lock);
+	list_for_each_entry(__job, &sched->job.list.head, entry) {
+		struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
+
 		list_for_each_op(op, &bind_job->ops) {
 			if (op->op == OP_UNMAP) {
 				u64 op_addr = op->va.addr;
@@ -1019,7 +1027,7 @@ again:
 
 				if (!(end <= op_addr || addr >= op_end)) {
 					nouveau_uvmm_bind_job_get(bind_job);
-					spin_unlock(&entity->job.list.lock);
+					spin_unlock(&sched->job.list.lock);
 					wait_for_completion(&bind_job->complete);
 					nouveau_uvmm_bind_job_put(bind_job);
 					goto again;
@@ -1027,7 +1035,7 @@ again:
 			}
 		}
 	}
-	spin_unlock(&entity->job.list.lock);
+	spin_unlock(&sched->job.list.lock);
 }
 
 static int
@@ -1183,7 +1191,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
 {
 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
 	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
-	struct nouveau_sched_entity *entity = job->entity;
 	struct drm_exec *exec = &vme->exec;
 	struct bind_job_op *op;
 	int ret;
@@ -1380,10 +1387,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
 	}
 	nouveau_uvmm_unlock(uvmm);
 
-	spin_lock(&entity->job.list.lock);
-	list_add(&bind_job->entry, &entity->job.list.head);
-	spin_unlock(&entity->job.list.lock);
-
 	return 0;
 
 unwind_continue:
@@ -1466,14 +1469,11 @@ out:
 }
 
 static void
-nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
+nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job)
 {
-	struct nouveau_uvmm_bind_job *bind_job =
-		container_of(work, struct nouveau_uvmm_bind_job, work);
-	struct nouveau_job *job = &bind_job->base;
+	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
-	struct nouveau_sched_entity *entity = job->entity;
-	struct bind_job_op *op, *next;
+	struct bind_job_op *op;
 
 	list_for_each_op(op, &bind_job->ops) {
 		struct drm_gem_object *obj = op->gem.obj;
@@ -1525,38 +1525,17 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
 			drm_gem_object_put(obj);
 	}
 
-	spin_lock(&entity->job.list.lock);
-	list_del(&bind_job->entry);
-	spin_unlock(&entity->job.list.lock);
-
+	nouveau_job_done(job);
 	complete_all(&bind_job->complete);
-	wake_up(&entity->job.wq);
-
-	/* Remove and free ops after removing the bind job from the job list to
-	 * avoid races against bind_validate_map_sparse().
-	 */
-	list_for_each_op_safe(op, next, &bind_job->ops) {
-		list_del(&op->entry);
-		kfree(op);
-	}
 
 	nouveau_uvmm_bind_job_put(bind_job);
 }
 
-static void
-nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job)
-{
-	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
-	struct nouveau_sched_entity *entity = job->entity;
-
-	nouveau_sched_entity_qwork(entity, &bind_job->work);
-}
-
 static struct nouveau_job_ops nouveau_bind_job_ops = {
 	.submit = nouveau_uvmm_bind_job_submit,
 	.armed_submit = nouveau_uvmm_bind_job_armed_submit,
 	.run = nouveau_uvmm_bind_job_run,
-	.free = nouveau_uvmm_bind_job_free_qwork,
+	.free = nouveau_uvmm_bind_job_cleanup,
 };
 
 static int
@@ -1617,7 +1596,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
 		return ret;
 
 	INIT_LIST_HEAD(&job->ops);
-	INIT_LIST_HEAD(&job->entry);
 
 	for (i = 0; i < __args->op.count; i++) {
 		ret = bind_job_op_from_uop(&op, &__args->op.s[i]);
@@ -1628,9 +1606,8 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
 	}
 
 	init_completion(&job->complete);
-	INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn);
 
-	args.sched_entity = __args->sched_entity;
+	args.sched = __args->sched;
 	args.file_priv = __args->file_priv;
 
 	args.in_sync.count = __args->in_sync.count;
@@ -1758,7 +1735,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	args.sched_entity = &cli->sched_entity;
+	args.sched = &cli->sched;
 	args.file_priv = file_priv;
 
 	ret = nouveau_uvmm_vm_bind(&args);
@@ -1910,12 +1887,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
 	MA_STATE(mas, &uvmm->region_mt, 0, 0);
 	struct nouveau_uvma_region *reg;
 	struct nouveau_cli *cli = uvmm->vmm.cli;
-	struct nouveau_sched_entity *entity = &cli->sched_entity;
 	struct drm_gpuva *va, *next;
 
-	rmb(); /* for list_empty to work without lock */
-	wait_event(entity->job.wq, list_empty(&entity->job.list.head));
-
 	nouveau_uvmm_lock(uvmm);
 	drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) {
 		struct nouveau_uvma *uvma = uvma_from_va(va);
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
index f0a6d98ace4fd..9d3c348581eb3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -44,8 +44,6 @@ struct nouveau_uvmm_bind_job {
 	struct nouveau_job base;
 
 	struct kref kref;
-	struct list_head entry;
-	struct work_struct work;
 	struct completion complete;
 
 	/* struct bind_job_op */
@@ -54,7 +52,7 @@ struct nouveau_uvmm_bind_job {
 
 struct nouveau_uvmm_bind_job_args {
 	struct drm_file *file_priv;
-	struct nouveau_sched_entity *sched_entity;
+	struct nouveau_sched *sched;
 
 	unsigned int flags;
 
-- 
GitLab


From 46990918f35c1bf6e367cf8e0423e7344fec9fcb Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Tue, 14 Nov 2023 01:27:25 +0100
Subject: [PATCH 0456/2340] drm/nouveau: enable dynamic job-flow control

Make use of the scheduler's credit limit and scheduler job's credit
count to account for the actual size of a job, such that we fill up the
ring efficiently.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114002728.3491-2-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c | 3 ++-
 drivers/gpu/drm/nouveau/nouveau_drm.c   | 2 +-
 drivers/gpu/drm/nouveau/nouveau_exec.c  | 4 +++-
 drivers/gpu/drm/nouveau/nouveau_sched.c | 9 ++++-----
 drivers/gpu/drm/nouveau/nouveau_sched.h | 3 ++-
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 4 +++-
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 7542e17871831..a04156ca8390b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -337,7 +337,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (ret)
 		goto done;
 
-	ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq);
+	ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
+				 chan->chan->dma.ib_max);
 	if (ret)
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 7e5f19153829e..6f6c31a9937b2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -320,7 +320,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 	 * locks which indirectly or directly are held for allocations
 	 * elsewhere.
 	 */
-	ret = nouveau_sched_init(&cli->sched, drm, NULL);
+	ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
 	if (ret)
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index a179eeb6df093..bc5d71b79ab20 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -231,10 +231,12 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob,
 		}
 	}
 
+	args.file_priv = __args->file_priv;
 	job->chan = __args->chan;
 
 	args.sched = __args->sched;
-	args.file_priv = __args->file_priv;
+	/* Plus one to account for the HW fence. */
+	args.credits = job->push.count + 1;
 
 	args.in_sync.count = __args->in_sync.count;
 	args.in_sync.s = __args->in_sync.s;
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 0f4583f6fcad3..3393647bd9442 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -12,7 +12,6 @@
 #include "nouveau_abi16.h"
 #include "nouveau_sched.h"
 
-#define NOUVEAU_SCHED_HW_SUBMISSIONS		1
 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000
 
 /* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
@@ -85,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job,
 			ret = -ENOMEM;
 			goto err_free_objs;
 		}
-
 	}
 
-	ret = drm_sched_job_init(&job->base, &sched->entity, 1, NULL);
+	ret = drm_sched_job_init(&job->base, &sched->entity,
+				 args->credits, NULL);
 	if (ret)
 		goto err_free_chains;
 
@@ -401,7 +400,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = {
 
 int
 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
-		   struct workqueue_struct *wq)
+		   struct workqueue_struct *wq, u32 credit_limit)
 {
 	struct drm_gpu_scheduler *drm_sched = &sched->base;
 	struct drm_sched_entity *entity = &sched->entity;
@@ -419,7 +418,7 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
 
 	ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
 			     NOUVEAU_SCHED_PRIORITY_COUNT,
-			     NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
+			     credit_limit, 0, job_hang_limit,
 			     NULL, NULL, "nouveau_sched", drm->dev->dev);
 	if (ret)
 		goto fail_wq;
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index 23eff4b0f5b2c..a6528f5981e6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -27,6 +27,7 @@ enum nouveau_job_state {
 struct nouveau_job_args {
 	struct drm_file *file_priv;
 	struct nouveau_sched *sched;
+	u32 credits;
 
 	enum dma_resv_usage resv_usage;
 	bool sync;
@@ -111,7 +112,7 @@ struct nouveau_sched {
 };
 
 int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
-		       struct workqueue_struct *wq);
+		       struct workqueue_struct *wq, u32 credit_limit);
 void nouveau_sched_fini(struct nouveau_sched *sched);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 46493fcb82fd2..4b236da795094 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1607,9 +1607,11 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
 
 	init_completion(&job->complete);
 
-	args.sched = __args->sched;
 	args.file_priv = __args->file_priv;
 
+	args.sched = __args->sched;
+	args.credits = 1;
+
 	args.in_sync.count = __args->in_sync.count;
 	args.in_sync.s = __args->in_sync.s;
 
-- 
GitLab


From 2bbe6ab2be53858507f11f99f856846d04765ae3 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Wed, 22 Nov 2023 23:58:53 -0500
Subject: [PATCH 0457/2340] drm/sched: Fix bounds limiting when given a
 malformed entity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we're given a malformed entity in drm_sched_entity_init()--shouldn't
happen, but we verify--with out-of-bounds priority value, we set it to an
allowed value. Fix the expression which sets this limit.

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Fixes: 56e449603f0ac5 ("drm/sched: Convert the GPU scheduler to variable number of run-queues")
Link: https://patchwork.freedesktop.org/patch/msgid/20231123122422.167832-2-ltuikov89@gmail.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/dbb91dbe-ef77-4d79-aaf9-2adb171c1d7a@amd.com
---
 drivers/gpu/drm/scheduler/sched_entity.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 4d42b1e4daa67..ee645d38e98d9 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -81,12 +81,15 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 		 */
 		pr_warn("%s: called with uninitialized scheduler\n", __func__);
 	} else if (num_sched_list) {
-		/* The "priority" of an entity cannot exceed the number
-		 * of run-queues of a scheduler.
+		/* The "priority" of an entity cannot exceed the number of run-queues of a
+		 * scheduler. Protect against num_rqs being 0, by converting to signed.
 		 */
-		if (entity->priority >= sched_list[0]->num_rqs)
-			entity->priority = max_t(u32, sched_list[0]->num_rqs,
-						 DRM_SCHED_PRIORITY_MIN);
+		if (entity->priority >= sched_list[0]->num_rqs) {
+			drm_err(sched_list[0], "entity with out-of-bounds priority:%u num_rqs:%u\n",
+				entity->priority, sched_list[0]->num_rqs);
+			entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1,
+						 (s32) DRM_SCHED_PRIORITY_MIN);
+		}
 		entity->rq = sched_list[0]->sched_rq[entity->priority];
 	}
 
-- 
GitLab


From fe375c74806dbd30b00ec038a80a5b7bf4653ab7 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Tue, 14 Nov 2023 20:40:58 -0500
Subject: [PATCH 0458/2340] drm/sched: Rename priority MIN to LOW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename DRM_SCHED_PRIORITY_MIN to DRM_SCHED_PRIORITY_LOW.

This mirrors DRM_SCHED_PRIORITY_HIGH, for a list of DRM scheduler priorities
in ascending order,
  DRM_SCHED_PRIORITY_LOW,
  DRM_SCHED_PRIORITY_NORMAL,
  DRM_SCHED_PRIORITY_HIGH,
  DRM_SCHED_PRIORITY_KERNEL.

Cc: Rob Clark <robdclark@gmail.com>
Cc: Abhinav Kumar <quic_abhinavk@quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: Danilo Krummrich <dakr@redhat.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: linux-arm-msm@vger.kernel.org
Cc: freedreno@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124052752.6915-5-ltuikov89@gmail.com
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 +-
 drivers/gpu/drm/msm/msm_gpu.h            |  2 +-
 drivers/gpu/drm/scheduler/sched_entity.c |  2 +-
 drivers/gpu/drm/scheduler/sched_main.c   | 10 +++++-----
 include/drm/gpu_scheduler.h              |  2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e2ae9ba147ba9..5cb33ac99f708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -73,10 +73,10 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
 		return DRM_SCHED_PRIORITY_NORMAL;
 
 	case AMDGPU_CTX_PRIORITY_VERY_LOW:
-		return DRM_SCHED_PRIORITY_MIN;
+		return DRM_SCHED_PRIORITY_LOW;
 
 	case AMDGPU_CTX_PRIORITY_LOW:
-		return DRM_SCHED_PRIORITY_MIN;
+		return DRM_SCHED_PRIORITY_LOW;
 
 	case AMDGPU_CTX_PRIORITY_NORMAL:
 		return DRM_SCHED_PRIORITY_NORMAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 62bb7fc7448ad..1a25931607c51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -325,7 +325,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
 	int i;
 
 	/* Signal all jobs not yet scheduled */
-	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) {
 		struct drm_sched_rq *rq = sched->sched_rq[i];
 		spin_lock(&rq->lock);
 		list_for_each_entry(s_entity, &rq->entities, list) {
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 4252e3839fbc8..eb0c97433e5f8 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -347,7 +347,7 @@ struct msm_gpu_perfcntr {
  * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
  * cases, so we don't use it (no need for kernel generated jobs).
  */
-#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
+#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_LOW)
 
 /**
  * struct msm_file_private - per-drm_file context
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index ee645d38e98d9..dd2b8f777f518 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -88,7 +88,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 			drm_err(sched_list[0], "entity with out-of-bounds priority:%u num_rqs:%u\n",
 				entity->priority, sched_list[0]->num_rqs);
 			entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1,
-						 (s32) DRM_SCHED_PRIORITY_MIN);
+						 (s32) DRM_SCHED_PRIORITY_LOW);
 		}
 		entity->rq = sched_list[0]->sched_rq[entity->priority];
 	}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 044a8c4875ba6..b6d7bc49ff6ef 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1052,7 +1052,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 	int i;
 
 	/* Kernel run queue has higher priority than normal run queue*/
-	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) {
 		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
 			drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
 			drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
@@ -1291,7 +1291,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	if (!sched->sched_rq)
 		goto Out_free;
 	sched->num_rqs = num_rqs;
-	for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
+	for (i = DRM_SCHED_PRIORITY_LOW; i < sched->num_rqs; i++) {
 		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
 		if (!sched->sched_rq[i])
 			goto Out_unroll;
@@ -1312,7 +1312,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	sched->ready = true;
 	return 0;
 Out_unroll:
-	for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
+	for (--i ; i >= DRM_SCHED_PRIORITY_LOW; i--)
 		kfree(sched->sched_rq[i]);
 Out_free:
 	kfree(sched->sched_rq);
@@ -1338,7 +1338,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 
 	drm_sched_wqueue_stop(sched);
 
-	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) {
 		struct drm_sched_rq *rq = sched->sched_rq[i];
 
 		spin_lock(&rq->lock);
@@ -1390,7 +1390,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 		atomic_inc(&bad->karma);
 
-		for (i = DRM_SCHED_PRIORITY_MIN;
+		for (i = DRM_SCHED_PRIORITY_LOW;
 		     i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL);
 		     i++) {
 			struct drm_sched_rq *rq = sched->sched_rq[i];
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9a50348bd5c04..d8e2d84d9223e 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -63,7 +63,7 @@ struct drm_file;
  * to an array, and as such should start at 0.
  */
 enum drm_sched_priority {
-	DRM_SCHED_PRIORITY_MIN,
+	DRM_SCHED_PRIORITY_LOW,
 	DRM_SCHED_PRIORITY_NORMAL,
 	DRM_SCHED_PRIORITY_HIGH,
 	DRM_SCHED_PRIORITY_KERNEL,
-- 
GitLab


From 38f922a563aac3148ac73e73689805917f034cb5 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Wed, 22 Nov 2023 22:08:48 -0500
Subject: [PATCH 0459/2340] drm/sched: Reverse run-queue priority enumeration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reverse run-queue priority enumeration such that the higest priority is now 0,
and for each consecutive integer the prioirty diminishes.

Run-queues correspond to priorities. To an external observer a scheduler
created with a single run-queue, and another created with
DRM_SCHED_PRIORITY_COUNT number of run-queues, should always schedule
sched->sched_rq[0] with the same "priority", as that index run-queue exists in
both schedulers, i.e. a scheduler with one run-queue or many. This patch makes
it so.

In other words, the "priority" of sched->sched_rq[n], n >= 0, is the same for
any scheduler created with any allowable number of run-queues (priorities), 0
to DRM_SCHED_PRIORITY_COUNT.

Cc: Rob Clark <robdclark@gmail.com>
Cc: Abhinav Kumar <quic_abhinavk@quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: Danilo Krummrich <dakr@redhat.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: linux-arm-msm@vger.kernel.org
Cc: freedreno@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124052752.6915-6-ltuikov89@gmail.com
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 +-
 drivers/gpu/drm/msm/msm_gpu.h            |  2 +-
 drivers/gpu/drm/scheduler/sched_entity.c |  5 +++--
 drivers/gpu/drm/scheduler/sched_main.c   | 15 +++++++--------
 include/drm/gpu_scheduler.h              |  6 +++---
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1a25931607c51..71a5cf37b472d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -325,7 +325,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
 	int i;
 
 	/* Signal all jobs not yet scheduled */
-	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) {
+	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
 		struct drm_sched_rq *rq = sched->sched_rq[i];
 		spin_lock(&rq->lock);
 		list_for_each_entry(s_entity, &rq->entities, list) {
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index eb0c97433e5f8..2bfcb222e3533 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -347,7 +347,7 @@ struct msm_gpu_perfcntr {
  * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
  * cases, so we don't use it (no need for kernel generated jobs).
  */
-#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_LOW)
+#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_LOW - DRM_SCHED_PRIORITY_HIGH)
 
 /**
  * struct msm_file_private - per-drm_file context
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index dd2b8f777f518..3c4f5a392b064 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -82,13 +82,14 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 		pr_warn("%s: called with uninitialized scheduler\n", __func__);
 	} else if (num_sched_list) {
 		/* The "priority" of an entity cannot exceed the number of run-queues of a
-		 * scheduler. Protect against num_rqs being 0, by converting to signed.
+		 * scheduler. Protect against num_rqs being 0, by converting to signed. Choose
+		 * the lowest priority available.
 		 */
 		if (entity->priority >= sched_list[0]->num_rqs) {
 			drm_err(sched_list[0], "entity with out-of-bounds priority:%u num_rqs:%u\n",
 				entity->priority, sched_list[0]->num_rqs);
 			entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1,
-						 (s32) DRM_SCHED_PRIORITY_LOW);
+						 (s32) DRM_SCHED_PRIORITY_KERNEL);
 		}
 		entity->rq = sched_list[0]->sched_rq[entity->priority];
 	}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index b6d7bc49ff6ef..682aebe96db78 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1051,8 +1051,9 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 	struct drm_sched_entity *entity;
 	int i;
 
-	/* Kernel run queue has higher priority than normal run queue*/
-	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) {
+	/* Start with the highest priority.
+	 */
+	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
 		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
 			drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
 			drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
@@ -1291,7 +1292,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	if (!sched->sched_rq)
 		goto Out_free;
 	sched->num_rqs = num_rqs;
-	for (i = DRM_SCHED_PRIORITY_LOW; i < sched->num_rqs; i++) {
+	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
 		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
 		if (!sched->sched_rq[i])
 			goto Out_unroll;
@@ -1312,7 +1313,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	sched->ready = true;
 	return 0;
 Out_unroll:
-	for (--i ; i >= DRM_SCHED_PRIORITY_LOW; i--)
+	for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--)
 		kfree(sched->sched_rq[i]);
 Out_free:
 	kfree(sched->sched_rq);
@@ -1338,7 +1339,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 
 	drm_sched_wqueue_stop(sched);
 
-	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) {
+	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
 		struct drm_sched_rq *rq = sched->sched_rq[i];
 
 		spin_lock(&rq->lock);
@@ -1390,9 +1391,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 		atomic_inc(&bad->karma);
 
-		for (i = DRM_SCHED_PRIORITY_LOW;
-		     i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL);
-		     i++) {
+		for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) {
 			struct drm_sched_rq *rq = sched->sched_rq[i];
 
 			spin_lock(&rq->lock);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d8e2d84d9223e..5acc64954a883 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -63,10 +63,10 @@ struct drm_file;
  * to an array, and as such should start at 0.
  */
 enum drm_sched_priority {
-	DRM_SCHED_PRIORITY_LOW,
-	DRM_SCHED_PRIORITY_NORMAL,
-	DRM_SCHED_PRIORITY_HIGH,
 	DRM_SCHED_PRIORITY_KERNEL,
+	DRM_SCHED_PRIORITY_HIGH,
+	DRM_SCHED_PRIORITY_NORMAL,
+	DRM_SCHED_PRIORITY_LOW,
 
 	DRM_SCHED_PRIORITY_COUNT
 };
-- 
GitLab


From cf1aaa7d4a719f0bdd9c246c0fac8247cb54ddd7 Mon Sep 17 00:00:00 2001
From: Danylo Piliaiev <dpiliaiev@igalia.com>
Date: Sat, 25 Nov 2023 11:11:50 -0800
Subject: [PATCH 0460/2340] drm/msm/a6xx: Add missing BIT(7) to
 REG_A6XX_UCHE_CLIENT_PF

Downstream always set BIT(7)

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/568930/
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 7a0220d29a235..e5558d5e0aa24 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1782,7 +1782,7 @@ static int hw_init(struct msm_gpu *gpu)
 	else
 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
 
-	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
+	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
 
 	/* Set weights for bicubic filtering */
 	if (adreno_is_a650_family(adreno_gpu)) {
-- 
GitLab


From 07e6de738aa6f0e873463e9ca88bdb7081c4bfd4 Mon Sep 17 00:00:00 2001
From: Danylo Piliaiev <dpiliaiev@igalia.com>
Date: Sat, 25 Nov 2023 11:11:51 -0800
Subject: [PATCH 0461/2340] drm/msm/a690: Fix reg values for a690

KGSL doesn't support a690 so all reg values were the same as
on a660. Now we know the values and they are different from the
windows driver.

This fixes hangs on D3D12 games and some CTS tests.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/568931/
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index e5558d5e0aa24..723f9b97734da 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1312,6 +1312,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 
 	if (adreno_is_a650(adreno_gpu) ||
 	    adreno_is_a660(adreno_gpu) ||
+	    adreno_is_a690(adreno_gpu) ||
 	    adreno_is_a730(adreno_gpu) ||
 	    adreno_is_a740_family(adreno_gpu)) {
 		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
@@ -1321,13 +1322,6 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 		uavflagprd_inv = 2;
 	}
 
-	if (adreno_is_a690(adreno_gpu)) {
-		hbb_lo = 2;
-		amsbc = 1;
-		rgb565_predicator = 1;
-		uavflagprd_inv = 2;
-	}
-
 	if (adreno_is_7c3(adreno_gpu)) {
 		hbb_lo = 1;
 		amsbc = 1;
@@ -1741,7 +1735,9 @@ static int hw_init(struct msm_gpu *gpu)
 	/* Setting the primFifo thresholds default values,
 	 * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
 	*/
-	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu) || adreno_is_a690(adreno_gpu))
+	if (adreno_is_a690(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00800200);
+	else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
 	else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
@@ -1775,6 +1771,8 @@ static int hw_init(struct msm_gpu *gpu)
 	if (adreno_is_a730(adreno_gpu) ||
 	    adreno_is_a740_family(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
+	else if (adreno_is_a690(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
 	else if (adreno_is_a619(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
 	else if (adreno_is_a610(adreno_gpu))
@@ -1808,12 +1806,17 @@ static int hw_init(struct msm_gpu *gpu)
 	a6xx_set_cp_protect(gpu);
 
 	if (adreno_is_a660_family(adreno_gpu)) {
-		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
+		if (adreno_is_a690(adreno_gpu))
+			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
+		else
+			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
 	}
 
+	if (adreno_is_a690(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
 	/* Set dualQ + disable afull for A660 GPU */
-	if (adreno_is_a660(adreno_gpu))
+	else if (adreno_is_a660(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
 	else if (adreno_is_a7xx(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
-- 
GitLab


From b1f5279b5981f9ed851163ee661692f42397982f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Mon, 20 Nov 2023 10:26:05 +0200
Subject: [PATCH 0462/2340] drm/i915/psr: Move plane sel fetch configuration
 into plane source files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently selective fetch configuration for planes is implemented in psr
code. More suitable place for this code is where everything else is
configured for planes -> move it into skl_universal_plane.c and
intel_cursor.c. This also allows us to drop hooks for cursor handling.

v3: Checkpatch warnings fixed
v2: Removed setting sel_fetch_area->y1/y2 as -1

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120082606.3156488-2-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_cursor.c   | 33 +++++++-
 drivers/gpu/drm/i915/display/intel_psr.c      | 75 -------------------
 drivers/gpu/drm/i915/display/intel_psr.h      | 10 ---
 .../drm/i915/display/skl_universal_plane.c    | 75 ++++++++++++++++++-
 4 files changed, 102 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index b342fad180ca5..c025cafda7d90 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -21,6 +21,7 @@
 #include "intel_fb_pin.h"
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
+#include "intel_psr_regs.h"
 #include "skl_watermark.h"
 
 /* Cursor formats */
@@ -484,6 +485,32 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
+static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+			  plane_state->ctl);
+}
+
+static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane,
+					      const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+}
+
 /* TODO: split into noarm+arm pair */
 static void i9xx_cursor_update_arm(struct intel_plane *plane,
 				   const struct intel_crtc_state *crtc_state,
@@ -531,10 +558,10 @@ static void i9xx_cursor_update_arm(struct intel_plane *plane,
 		skl_write_cursor_wm(plane, crtc_state);
 
 	if (plane_state)
-		intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state,
-						       plane_state);
+		i9xx_cursor_update_sel_fetch_arm(plane, crtc_state,
+						 plane_state);
 	else
-		intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state);
+		i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state);
 
 	if (plane->cursor.base != base ||
 	    plane->cursor.size != fbc_ctl ||
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 15c1804dcd59e..e7d65282fb88c 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1917,81 +1917,6 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
 	intel_de_write(dev_priv, CURSURFLIVE(intel_dp->psr.pipe), 0);
 }
 
-void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
-}
-
-void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state,
-					    const struct intel_plane_state *plane_state)
-{
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	if (plane->id == PLANE_CURSOR)
-		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-				  plane_state->ctl);
-	else
-		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-				  PLANE_SEL_FETCH_CTL_ENABLE);
-}
-
-void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state,
-					      const struct intel_plane_state *plane_state,
-					      int color_plane)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-	const struct drm_rect *clip;
-	u32 val;
-	int x, y;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	if (plane->id == PLANE_CURSOR)
-		return;
-
-	clip = &plane_state->psr2_sel_fetch_area;
-
-	val = (clip->y1 + plane_state->uapi.dst.y1) << 16;
-	val |= plane_state->uapi.dst.x1;
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_POS(pipe, plane->id), val);
-
-	x = plane_state->view.color_plane[color_plane].x;
-
-	/*
-	 * From Bspec: UV surface Start Y Position = half of Y plane Y
-	 * start position.
-	 */
-	if (!color_plane)
-		y = plane_state->view.color_plane[color_plane].y + clip->y1;
-	else
-		y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2;
-
-	val = y << 16 | x;
-
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_OFFSET(pipe, plane->id),
-			  val);
-
-	/* Sizes are 0 based */
-	val = (drm_rect_height(clip) - 1) << 16;
-	val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1;
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val);
-}
-
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 6a1f4573852b1..143e0595c0977 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -55,16 +55,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
 int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 				struct intel_crtc *crtc);
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state);
-void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state,
-					      const struct intel_plane_state *plane_state,
-					      int color_plane);
-void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state,
-					    const struct intel_plane_state *plane_state);
-
-void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state);
 void intel_psr_pause(struct intel_dp *intel_dp);
 void intel_psr_resume(struct intel_dp *intel_dp);
 
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index f5c77a018e104..0cac8be0c77be 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -18,6 +18,7 @@
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
+#include "intel_psr_regs.h"
 #include "skl_scaler.h"
 #include "skl_universal_plane.h"
 #include "skl_watermark.h"
@@ -629,6 +630,18 @@ skl_plane_disable_arm(struct intel_plane *plane,
 	intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0);
 }
 
+static void icl_plane_disable_sel_fetch_arm(struct intel_plane *plane,
+					    const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+}
+
 static void
 icl_plane_disable_arm(struct intel_plane *plane,
 		      const struct intel_crtc_state *crtc_state)
@@ -642,7 +655,7 @@ icl_plane_disable_arm(struct intel_plane *plane,
 
 	skl_write_plane_wm(plane, crtc_state);
 
-	intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state);
+	icl_plane_disable_sel_fetch_arm(plane, crtc_state);
 	intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0);
 	intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0);
 }
@@ -1197,6 +1210,48 @@ skl_plane_update_arm(struct intel_plane *plane,
 			  skl_plane_surf(plane_state, 0));
 }
 
+static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state,
+					     int color_plane)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+	const struct drm_rect *clip;
+	u32 val;
+	int x, y;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	clip = &plane_state->psr2_sel_fetch_area;
+
+	val = (clip->y1 + plane_state->uapi.dst.y1) << 16;
+	val |= plane_state->uapi.dst.x1;
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_POS(pipe, plane->id), val);
+
+	x = plane_state->view.color_plane[color_plane].x;
+
+	/*
+	 * From Bspec: UV surface Start Y Position = half of Y plane Y
+	 * start position.
+	 */
+	if (!color_plane)
+		y = plane_state->view.color_plane[color_plane].y + clip->y1;
+	else
+		y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2;
+
+	val = y << 16 | x;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_OFFSET(pipe, plane->id),
+			  val);
+
+	/* Sizes are 0 based */
+	val = (drm_rect_height(clip) - 1) << 16;
+	val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1;
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val);
+}
+
 static void
 icl_plane_update_noarm(struct intel_plane *plane,
 		       const struct intel_crtc_state *crtc_state,
@@ -1269,7 +1324,21 @@ icl_plane_update_noarm(struct intel_plane *plane,
 	if (plane_state->force_black)
 		icl_plane_csc_load_black(plane);
 
-	intel_psr2_program_plane_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane);
+	icl_plane_update_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane);
+}
+
+static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane,
+					   const struct intel_crtc_state *crtc_state,
+					   const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+			  PLANE_SEL_FETCH_CTL_ENABLE);
 }
 
 static void
@@ -1296,7 +1365,7 @@ icl_plane_update_arm(struct intel_plane *plane,
 	if (plane_state->scaler_id >= 0)
 		skl_program_plane_scaler(plane, crtc_state, plane_state);
 
-	intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, plane_state);
+	icl_plane_update_sel_fetch_arm(plane, crtc_state, plane_state);
 
 	/*
 	 * The control register self-arms if the plane was previously
-- 
GitLab


From a4f477e6ac171ccdea38556437493c3c5222bbe5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Mon, 20 Nov 2023 10:26:06 +0200
Subject: [PATCH 0463/2340] drm/i915/psr: Add proper handling for disabling sel
 fetch for planes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we are enabling selective fetch for all planes that are visible.
This is suboptimal as we might be fetching for memory for planes that are
not part of selective update.

Fix this by adding proper handling for disabling plane selective fetch:
If plane previously part of selective update is now not part of update:
Add it into updated planes and let the plane configuration to disable
selective fetch for it.

v3: Checkpatch warnings fixed
v2:
  - Add setting sel_fetch_area->y1/y2 to -1
  - Remove setting again local sel_fetch_area variable

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120082606.3156488-3-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_cursor.c   | 21 +++++++++++--------
 drivers/gpu/drm/i915/display/intel_psr.c      | 13 +++++++++++-
 .../drm/i915/display/skl_universal_plane.c    |  7 +++++--
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index c025cafda7d90..a515ae2831f80 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -485,22 +485,21 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
-static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane,
-					     const struct intel_crtc_state *crtc_state,
-					     const struct intel_plane_state *plane_state)
+static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane,
+					      const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
 	if (!crtc_state->enable_psr2_sel_fetch)
 		return;
 
-	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-			  plane_state->ctl);
+	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
 }
 
-static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state)
+static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
@@ -508,7 +507,11 @@ static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane,
 	if (!crtc_state->enable_psr2_sel_fetch)
 		return;
 
-	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+	if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0)
+		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+				  plane_state->ctl);
+	else
+		i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state);
 }
 
 /* TODO: split into noarm+arm pair */
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index e7d65282fb88c..36e4a1e9b98f8 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -2176,8 +2176,19 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 			continue;
 
 		inter = pipe_clip;
-		if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+		sel_fetch_area = &new_plane_state->psr2_sel_fetch_area;
+		if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) {
+			sel_fetch_area->y1 = -1;
+			sel_fetch_area->y2 = -1;
+			/*
+			 * if plane sel fetch was previously enabled ->
+			 * disable it
+			 */
+			if (drm_rect_height(&old_plane_state->psr2_sel_fetch_area) > 0)
+				crtc_state->update_planes |= BIT(plane->id);
+
 			continue;
+		}
 
 		if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) {
 			full_update = true;
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 0cac8be0c77be..511dc1544854f 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -1337,8 +1337,11 @@ static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane,
 	if (!crtc_state->enable_psr2_sel_fetch)
 		return;
 
-	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-			  PLANE_SEL_FETCH_CTL_ENABLE);
+	if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0)
+		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+				  PLANE_SEL_FETCH_CTL_ENABLE);
+	else
+		icl_plane_disable_sel_fetch_arm(plane, crtc_state);
 }
 
 static void
-- 
GitLab


From b0a7ce53d494c94dfacb5a877fc0668f2a688652 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Sat, 11 Nov 2023 08:08:56 -0500
Subject: [PATCH 0464/2340] drm/ttm: Schedule delayed_delete worker closer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Try to allocate system memory on the NUMA node the device is closest to
and try to run delayed_delete workers on a CPU of this node as well.

To optimize the memory clearing operation when a TTM BO gets freed by
the delayed_delete worker, scheduling it closer to a NUMA node where the
memory was initially allocated helps avoid the cases where the worker
gets randomly scheduled on the CPU cores that are across interconnect
boundaries such as xGMI, PCIe etc.

This change helps USWC GTT allocations on NUMA systems (dGPU) and AMD
APU platforms such as GFXIP9.4.3.

Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231111130856.1168304-1-rajneesh.bhardwaj@amd.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 8 +++++++-
 drivers/gpu/drm/ttm/ttm_device.c | 6 ++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e58b7e2498166..edf10618fe2b2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -370,7 +370,13 @@ static void ttm_bo_release(struct kref *kref)
 			spin_unlock(&bo->bdev->lru_lock);
 
 			INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
-			queue_work(bdev->wq, &bo->delayed_delete);
+
+			/* Schedule the worker on the closest NUMA node. This
+			 * improves performance since system memory might be
+			 * cleared on free and that is best done on a CPU core
+			 * close to it.
+			 */
+			queue_work_node(bdev->pool.nid, bdev->wq, &bo->delayed_delete);
 			return;
 		}
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index d48b39132b324..f5187b384ae9a 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -204,7 +204,8 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
 	if (ret)
 		return ret;
 
-	bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16);
+	bdev->wq = alloc_workqueue("ttm",
+				   WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16);
 	if (!bdev->wq) {
 		ttm_global_release();
 		return -ENOMEM;
@@ -213,7 +214,8 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
 	bdev->funcs = funcs;
 
 	ttm_sys_man_init(bdev);
-	ttm_pool_init(&bdev->pool, dev, NUMA_NO_NODE, use_dma_alloc, use_dma32);
+
+	ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32);
 
 	bdev->vma_manager = vma_manager;
 	spin_lock_init(&bdev->lru_lock);
-- 
GitLab


From 19b4c60ce8660a0e3a2cebd3e4dc0691928d015d Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov89@gmail.com>
Date: Sat, 25 Nov 2023 14:06:08 -0500
Subject: [PATCH 0465/2340] drm/sched: Fix compilation issues with DRM priority
 rename
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix compilation issues with DRM scheduler priority rename MIN to LOW.

Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311252109.WgbJsSkG-lkp@intel.com/
Cc: Danilo Krummrich <dakr@redhat.com>
Cc: Frank Binns <frank.binns@imgtec.com>
Cc: Donald Robson <donald.robson@imgtec.com>
Cc: Matt Coster <matt.coster@imgtec.com>
Cc: Direct Rendering Infrastructure - Development <dri-devel@lists.freedesktop.org>
Fixes: fe375c74806dbd ("drm/sched: Rename priority MIN to LOW")
Fixes: 38f922a563aac3 ("drm/sched: Reverse run-queue priority enumeration")
Fixes: 5f03a507b29e44 ("drm/nouveau: implement 1:1 scheduler - entity relationship")
Link: https://patchwork.freedesktop.org/patch/msgid/20231125192246.87268-2-ltuikov89@gmail.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/7429262c-6dea-4dcc-bf7e-54d2277dabf1@amd.com
---
 drivers/gpu/drm/imagination/pvr_queue.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c
index d65c3fbedf5ac..5ed9c98fb599c 100644
--- a/drivers/gpu/drm/imagination/pvr_queue.c
+++ b/drivers/gpu/drm/imagination/pvr_queue.c
@@ -1292,7 +1292,7 @@ struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
 		goto err_release_ufo;
 
 	err = drm_sched_entity_init(&queue->entity,
-				    DRM_SCHED_PRIORITY_MIN,
+				    DRM_SCHED_PRIORITY_KERNEL,
 				    &sched, 1, &ctx->faulty);
 	if (err)
 		goto err_sched_fini;
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 3393647bd9442..dd98f6910f9ca 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -18,7 +18,7 @@
  * index to the run-queue array.
  */
 enum nouveau_sched_priority {
-	NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_MIN,
+	NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
 	NOUVEAU_SCHED_PRIORITY_COUNT,
 };
 
@@ -423,7 +423,7 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
 	if (ret)
 		goto fail_wq;
 
-	/* Using DRM_SCHED_PRIORITY_MIN, since that's what we're required to use
+	/* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
 	 * when we want to have a single run-queue only.
 	 *
 	 * It's not documented, but one will find out when trying to use any
@@ -433,7 +433,7 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
 	 * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
 	 * matching the enum type used in drm_sched_entity_init().
 	 */
-	ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_MIN,
+	ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
 				    &drm_sched, 1, NULL);
 	if (ret)
 		goto fail_sched;
-- 
GitLab


From e17049148678725248a57ecbf9c21df0fde3b434 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 20 Oct 2023 07:52:13 -0500
Subject: [PATCH 0466/2340] drm: Use device_get_match_data()

Use preferred device_get_match_data() instead of of_match_device() to
get the driver match data in a single step. With this, adjust the
includes to explicitly include the correct headers. That also serves as
preparation to remove implicit includes within the DT headers
(of_device.h in particular).

Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Reviewed-by: Andrew Jeffery <andrew@codeconstruct.com.au>
Link: https://lore.kernel.org/r/20231020125214.2930329-1-robh@kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/gpu/drm/armada/armada_crtc.c    | 24 +++++++-----------------
 drivers/gpu/drm/aspeed/aspeed_gfx_drv.c | 10 ++++------
 drivers/gpu/drm/exynos/exynos_drm_gsc.c |  9 +++++----
 drivers/gpu/drm/imx/ipuv3/imx-ldb.c     |  9 ++++-----
 drivers/gpu/drm/mxsfb/mxsfb_drv.c       | 10 +++-------
 drivers/gpu/drm/omapdrm/dss/dispc.c     |  4 ++--
 drivers/gpu/drm/omapdrm/dss/dss.c       |  5 +++--
 7 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 52d2c942d3d2a..c78687c755a86 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -7,8 +7,9 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -1012,26 +1013,17 @@ armada_lcd_bind(struct device *dev, struct device *master, void *data)
 	int irq = platform_get_irq(pdev, 0);
 	const struct armada_variant *variant;
 	struct device_node *port = NULL;
+	struct device_node *np, *parent = dev->of_node;
 
 	if (irq < 0)
 		return irq;
 
-	if (!dev->of_node) {
-		const struct platform_device_id *id;
 
-		id = platform_get_device_id(pdev);
-		if (!id)
-			return -ENXIO;
-
-		variant = (const struct armada_variant *)id->driver_data;
-	} else {
-		const struct of_device_id *match;
-		struct device_node *np, *parent = dev->of_node;
-
-		match = of_match_device(dev->driver->of_match_table, dev);
-		if (!match)
-			return -ENXIO;
+	variant = device_get_match_data(dev);
+	if (!variant)
+		return -ENXIO;
 
+	if (parent) {
 		np = of_get_child_by_name(parent, "ports");
 		if (np)
 			parent = np;
@@ -1041,8 +1033,6 @@ armada_lcd_bind(struct device *dev, struct device *master, void *data)
 			dev_err(dev, "no port node found in %pOF\n", parent);
 			return -ENXIO;
 		}
-
-		variant = match->data;
 	}
 
 	return armada_drm_crtc_create(drm, dev, res, irq, variant, port);
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
index 78122b35a0cbb..a7a6b70220eb8 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
@@ -6,10 +6,10 @@
 #include <linux/irq.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
 
@@ -143,7 +143,6 @@ static int aspeed_gfx_load(struct drm_device *drm)
 	struct aspeed_gfx *priv = to_aspeed_gfx(drm);
 	struct device_node *np = pdev->dev.of_node;
 	const struct aspeed_gfx_config *config;
-	const struct of_device_id *match;
 	struct resource *res;
 	int ret;
 
@@ -152,10 +151,9 @@ static int aspeed_gfx_load(struct drm_device *drm)
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
-	match = of_match_device(aspeed_gfx_match, &pdev->dev);
-	if (!match)
+	config = device_get_match_data(&pdev->dev);
+	if (!config)
 		return -EINVAL;
-	config = match->data;
 
 	priv->dac_reg = config->dac_reg;
 	priv->int_clr_reg = config->int_clear_reg;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 34cdabc30b4f5..35771fb4e85d0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -11,9 +11,10 @@
 #include <linux/component.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 
 #include <drm/drm_fourcc.h>
@@ -103,7 +104,7 @@ struct gsc_context {
 	unsigned int			num_formats;
 
 	void __iomem	*regs;
-	const char	**clk_names;
+	const char	*const *clk_names;
 	struct clk	*clocks[GSC_MAX_CLOCKS];
 	int		num_clocks;
 	struct gsc_scaler	sc;
@@ -1217,7 +1218,7 @@ static const unsigned int gsc_tiled_formats[] = {
 static int gsc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct gsc_driverdata *driver_data;
+	const struct gsc_driverdata *driver_data;
 	struct exynos_drm_ipp_formats *formats;
 	struct gsc_context *ctx;
 	int num_formats, ret, i, j;
@@ -1226,7 +1227,7 @@ static int gsc_probe(struct platform_device *pdev)
 	if (!ctx)
 		return -ENOMEM;
 
-	driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev);
+	driver_data = device_get_match_data(dev);
 	ctx->dev = dev;
 	ctx->num_clocks = driver_data->num_clocks;
 	ctx->clk_names = driver_data->clk_names;
diff --git a/drivers/gpu/drm/imx/ipuv3/imx-ldb.c b/drivers/gpu/drm/imx/ipuv3/imx-ldb.c
index 989eca32d3252..53840ab054c72 100644
--- a/drivers/gpu/drm/imx/ipuv3/imx-ldb.c
+++ b/drivers/gpu/drm/imx/ipuv3/imx-ldb.c
@@ -12,8 +12,10 @@
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/videodev2.h>
 
@@ -617,7 +619,6 @@ static int imx_ldb_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	const struct of_device_id *of_id = of_match_device(imx_ldb_dt_ids, dev);
 	struct device_node *child;
 	struct imx_ldb *imx_ldb;
 	int dual;
@@ -638,9 +639,7 @@ static int imx_ldb_probe(struct platform_device *pdev)
 	regmap_write(imx_ldb->regmap, IOMUXC_GPR2, 0);
 
 	imx_ldb->dev = dev;
-
-	if (of_id)
-		imx_ldb->lvds_mux = of_id->data;
+	imx_ldb->lvds_mux = device_get_match_data(dev);
 
 	dual = of_property_read_bool(np, "fsl,dual-channel");
 	if (dual)
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
index 625c1bfc41733..b483ef48216aa 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -11,9 +11,10 @@
 #include <linux/clk.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/pm_runtime.h>
 
 #include <drm/drm_atomic_helper.h>
@@ -346,18 +347,13 @@ MODULE_DEVICE_TABLE(of, mxsfb_dt_ids);
 static int mxsfb_probe(struct platform_device *pdev)
 {
 	struct drm_device *drm;
-	const struct of_device_id *of_id =
-			of_match_device(mxsfb_dt_ids, &pdev->dev);
 	int ret;
 
-	if (!pdev->dev.of_node)
-		return -ENODEV;
-
 	drm = drm_dev_alloc(&mxsfb_driver, &pdev->dev);
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
 
-	ret = mxsfb_load(drm, of_id->data);
+	ret = mxsfb_load(drm, device_get_match_data(&pdev->dev));
 	if (ret)
 		goto err_free;
 
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index c26aab4939fa0..993691b3cc7ea 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -22,11 +22,11 @@
 #include <linux/hardirq.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <linux/sizes.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/component.h>
 #include <linux/sys_soc.h>
 #include <drm/drm_fourcc.h>
@@ -4765,7 +4765,7 @@ static int dispc_bind(struct device *dev, struct device *master, void *data)
 	if (soc)
 		dispc->feat = soc->data;
 	else
-		dispc->feat = of_match_device(dispc_of_match, &pdev->dev)->data;
+		dispc->feat = device_get_match_data(&pdev->dev);
 
 	r = dispc_errata_i734_wa_init(dispc);
 	if (r)
diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c
index 02955f9768459..988888e164d7b 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss.c
@@ -22,12 +22,13 @@
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <linux/gfp.h>
 #include <linux/sizes.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
 #include <linux/of_graph.h>
 #include <linux/regulator/consumer.h>
 #include <linux/suspend.h>
@@ -1445,7 +1446,7 @@ static int dss_probe(struct platform_device *pdev)
 	if (soc)
 		dss->feat = soc->data;
 	else
-		dss->feat = of_match_device(dss_of_match, &pdev->dev)->data;
+		dss->feat = device_get_match_data(&pdev->dev);
 
 	/* Map I/O registers, get and setup clocks. */
 	dss->base = devm_platform_ioremap_resource(pdev, 0);
-- 
GitLab


From 737077b873e32254959bc6f8c3e63cc67ba1f44c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 24 Nov 2023 16:39:17 +0000
Subject: [PATCH 0467/2340] drm/imagination: Fix a couple of spelling mistakes
 in literal strings

There are a couple of spelling mistakes in literal strings in the
stid_fmts array. Fix these.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124163917.300685-1-colin.i.king@gmail.com
---
 drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h b/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h
index 571954182f33d..56e11009e1230 100644
--- a/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h
+++ b/drivers/gpu/drm/imagination/pvr_rogue_fwif_sf.h
@@ -497,7 +497,7 @@ static const struct rogue_km_stid_fmt stid_fmts[] = {
 	{ ROGUE_FW_LOG_CREATESFID(213, ROGUE_FW_GROUP_MAIN, 1),
 	  "Safety Watchdog threshold period set to 0x%x clock cycles" },
 	{ ROGUE_FW_LOG_CREATESFID(214, ROGUE_FW_GROUP_MAIN, 0),
-	  "MTS Safety Event trigged by the safety watchdog." },
+	  "MTS Safety Event triggered by the safety watchdog." },
 	{ ROGUE_FW_LOG_CREATESFID(215, ROGUE_FW_GROUP_MAIN, 3),
 	  "DM%d USC tasks range limit 0 - %d, stride %d" },
 	{ ROGUE_FW_LOG_CREATESFID(216, ROGUE_FW_GROUP_MAIN, 1),
@@ -1114,7 +1114,7 @@ static const struct rogue_km_stid_fmt stid_fmts[] = {
 	{ ROGUE_FW_LOG_CREATESFID(39, ROGUE_FW_GROUP_SPM, 2),
 	  "3DMemFree matches freelist 0x%08x (FL type = %u)" },
 	{ ROGUE_FW_LOG_CREATESFID(40, ROGUE_FW_GROUP_SPM, 0),
-	  "Raise the 3DMemFreeDedected flag" },
+	  "Raise the 3DMemFreeDetected flag" },
 	{ ROGUE_FW_LOG_CREATESFID(41, ROGUE_FW_GROUP_SPM, 1),
 	  "Wait for pending grow on Freelist 0x%08x" },
 	{ ROGUE_FW_LOG_CREATESFID(42, ROGUE_FW_GROUP_SPM, 1),
-- 
GitLab


From 3519d77293fb74786a45811fa6b600db26c1b0be Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Mon, 27 Nov 2023 09:04:30 +0800
Subject: [PATCH 0468/2340] drm/imagination: Remove unneeded semicolon

./drivers/gpu/drm/imagination/pvr_free_list.c:258:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7635
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127010430.119666-1-yang.lee@linux.alibaba.com
---
 drivers/gpu/drm/imagination/pvr_free_list.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_free_list.c b/drivers/gpu/drm/imagination/pvr_free_list.c
index c61fd417edcb7..5e51bc980751c 100644
--- a/drivers/gpu/drm/imagination/pvr_free_list.c
+++ b/drivers/gpu/drm/imagination/pvr_free_list.c
@@ -255,7 +255,7 @@ pvr_free_list_insert_pages_locked(struct pvr_free_list *free_list,
 
 		if (!num_pages)
 			break;
-	};
+	}
 	/* clang-format on */
 
 	/* Make sure our free_list update is flushed. */
-- 
GitLab


From 4aa89e8644d3b8879191911edea0b6a63ea9d6e2 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Sat, 25 Nov 2023 00:36:36 +0100
Subject: [PATCH 0469/2340] drm/imagination: vm: prevent duplicate drm_gpuvm_bo
 instances

Use drm_gpuvm_bo_obtain() instead of drm_gpuvm_bo_create(). The latter
should only be used in conjunction with drm_gpuvm_bo_obtain_prealloc().

drm_gpuvm_bo_obtain() re-uses existing instances of a given VM and BO
combination, whereas drm_gpuvm_bo_create() would always create a new
instance of struct drm_gpuvm_bo and hence leave us with duplicates.

Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Donald Robson <donald.robson@imgtec.com>
Tested-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124233650.152653-2-dakr@redhat.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 3ad1366294b93..09d481c575b0e 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -224,6 +224,7 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
 			struct pvr_gem_object *pvr_obj, u64 offset,
 			u64 device_addr, u64 size)
 {
+	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
 	const bool is_user = vm_ctx == vm_ctx->pvr_dev->kernel_vm_ctx;
 	const u64 pvr_obj_size = pvr_gem_object_size(pvr_obj);
 	struct sg_table *sgt;
@@ -245,10 +246,11 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
 
 	bind_op->type = PVR_VM_BIND_TYPE_MAP;
 
-	bind_op->gpuvm_bo = drm_gpuvm_bo_create(&vm_ctx->gpuvm_mgr,
-						gem_from_pvr_gem(pvr_obj));
-	if (!bind_op->gpuvm_bo)
-		return -ENOMEM;
+	dma_resv_lock(obj->resv, NULL);
+	bind_op->gpuvm_bo = drm_gpuvm_bo_obtain(&vm_ctx->gpuvm_mgr, obj);
+	dma_resv_unlock(obj->resv);
+	if (IS_ERR(bind_op->gpuvm_bo))
+		return PTR_ERR(bind_op->gpuvm_bo);
 
 	bind_op->new_va = kzalloc(sizeof(*bind_op->new_va), GFP_KERNEL);
 	bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL);
-- 
GitLab


From 4550d66d08b2257a1b2d3ce339d68ca33177f4b9 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Sat, 25 Nov 2023 00:36:37 +0100
Subject: [PATCH 0470/2340] drm/imagination: vm: check for
 drm_gpuvm_range_valid()

Extend pvr_device_addr_and_size_are_valid() by the corresponding GPUVM
sanity checks. This includes a, previously missing, overflow check for
the base address and size of the requested mapping.

Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Donald Robson <donald.robson@imgtec.com>
Tested-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124233650.152653-3-dakr@redhat.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 9 ++++++---
 drivers/gpu/drm/imagination/pvr_vm.h | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 09d481c575b0e..1e89092c3dcc5 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -239,7 +239,7 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
 		return -EINVAL;
 	}
 
-	if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
+	if (!pvr_device_addr_and_size_are_valid(vm_ctx, device_addr, size) ||
 	    offset & ~PAGE_MASK || size & ~PAGE_MASK ||
 	    offset >= pvr_obj_size || offset_plus_size > pvr_obj_size)
 		return -EINVAL;
@@ -295,7 +295,7 @@ pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op,
 {
 	int err;
 
-	if (!pvr_device_addr_and_size_are_valid(device_addr, size))
+	if (!pvr_device_addr_and_size_are_valid(vm_ctx, device_addr, size))
 		return -EINVAL;
 
 	bind_op->type = PVR_VM_BIND_TYPE_UNMAP;
@@ -505,6 +505,7 @@ pvr_device_addr_is_valid(u64 device_addr)
 /**
  * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
  * address and associated size are both valid.
+ * @vm_ctx: Target VM context.
  * @device_addr: Virtual device address to test.
  * @size: Size of the range based at @device_addr to test.
  *
@@ -523,9 +524,11 @@ pvr_device_addr_is_valid(u64 device_addr)
  *  * %false otherwise.
  */
 bool
-pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
+pvr_device_addr_and_size_are_valid(struct pvr_vm_context *vm_ctx,
+				   u64 device_addr, u64 size)
 {
 	return pvr_device_addr_is_valid(device_addr) &&
+	       drm_gpuvm_range_valid(&vm_ctx->gpuvm_mgr, device_addr, size) &&
 	       size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
 	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
 }
diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
index cf8b97553dc85..f2a6463f2b059 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.h
+++ b/drivers/gpu/drm/imagination/pvr_vm.h
@@ -29,7 +29,8 @@ struct drm_exec;
 /* Functions defined in pvr_vm.c */
 
 bool pvr_device_addr_is_valid(u64 device_addr);
-bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
+bool pvr_device_addr_and_size_are_valid(struct pvr_vm_context *vm_ctx,
+					u64 device_addr, u64 size);
 
 struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
 					     bool is_userspace_context);
-- 
GitLab


From 0d3abd456be45369235dd75793ce26f07900044c Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Sat, 25 Nov 2023 00:36:38 +0100
Subject: [PATCH 0471/2340] drm/imagination: vm: fix drm_gpuvm reference count

The driver specific reference count indicates whether the VM should be
teared down, whereas GPUVM's reference count indicates whether the VM
structure can finally be freed.

Hence, free the VM structure in pvr_gpuvm_free() and drop the last
GPUVM reference after tearing down the VM. Generally, this prevents
lifetime issues such as the VM being freed as long as drm_gpuvm_bo
structures still hold references to the VM.

Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Donald Robson <donald.robson@imgtec.com>
Tested-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124233650.152653-4-dakr@redhat.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 1e89092c3dcc5..e0d74d9a61909 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -64,6 +64,12 @@ struct pvr_vm_context {
 	struct drm_gem_object dummy_gem;
 };
 
+static inline
+struct pvr_vm_context *to_pvr_vm_context(struct drm_gpuvm *gpuvm)
+{
+	return container_of(gpuvm, struct pvr_vm_context, gpuvm_mgr);
+}
+
 struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx)
 {
 	if (vm_ctx)
@@ -535,7 +541,7 @@ pvr_device_addr_and_size_are_valid(struct pvr_vm_context *vm_ctx,
 
 void pvr_gpuvm_free(struct drm_gpuvm *gpuvm)
 {
-
+	kfree(to_pvr_vm_context(gpuvm));
 }
 
 static const struct drm_gpuvm_ops pvr_vm_gpuva_ops = {
@@ -655,12 +661,11 @@ pvr_vm_context_release(struct kref *ref_count)
 	WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start,
 			     vm_ctx->gpuvm_mgr.mm_range));
 
-	drm_gpuvm_put(&vm_ctx->gpuvm_mgr);
 	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
 	drm_gem_private_object_fini(&vm_ctx->dummy_gem);
 	mutex_destroy(&vm_ctx->lock);
 
-	kfree(vm_ctx);
+	drm_gpuvm_put(&vm_ctx->gpuvm_mgr);
 }
 
 /**
-- 
GitLab


From c350a08ac7ec933f1dc8a143ebab60164ed4d90b Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:20 +0100
Subject: [PATCH 0472/2340] drm/ast: Turn ioregs_lock to modeset_lock

The lock for the I/O registers is only relevant during mode-setting
operations. It protects the registers from concurrent access from
reading EDID information.

Reduce lock coverage to mode setting, rename the lock and move it
entirely into the mode-setting code. No functional changes, as the
I/O lock was never used for anything else than mode setting.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-2-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_drv.h  |  3 ++-
 drivers/gpu/drm/ast/ast_main.c |  4 ----
 drivers/gpu/drm/ast/ast_mode.c | 26 +++++++++++++++-----------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 772f3b049c169..2ec29480939d3 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -192,7 +192,6 @@ to_ast_bmc_connector(struct drm_connector *connector)
 struct ast_device {
 	struct drm_device base;
 
-	struct mutex ioregs_lock; /* Protects access to I/O registers in ioregs */
 	void __iomem *regs;
 	void __iomem *ioregs;
 	void __iomem *dp501_fw_buf;
@@ -207,6 +206,8 @@ struct ast_device {
 	unsigned long	vram_size;
 	unsigned long	vram_fb_available;
 
+	struct mutex modeset_lock; /* Protects access to modeset I/O registers in ioregs */
+
 	struct ast_plane primary_plane;
 	struct ast_plane cursor_plane;
 	struct drm_crtc crtc;
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index f4ab40e22ceac..445cf47871a43 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -440,10 +440,6 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 
 	pci_set_drvdata(pdev, dev);
 
-	ret = drmm_mutex_init(dev, &ast->ioregs_lock);
-	if (ret)
-		return ERR_PTR(ret);
-
 	ast->regs = pcim_iomap(pdev, 1, 0);
 	if (!ast->regs)
 		return ERR_PTR(-EIO);
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index c20534d0ef7c8..a718646a66b8f 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -1358,13 +1358,13 @@ static int ast_vga_connector_helper_get_modes(struct drm_connector *connector)
 	 * Protect access to I/O registers from concurrent modesetting
 	 * by acquiring the I/O-register lock.
 	 */
-	mutex_lock(&ast->ioregs_lock);
+	mutex_lock(&ast->modeset_lock);
 
 	edid = drm_get_edid(connector, &ast_vga_connector->i2c->adapter);
 	if (!edid)
 		goto err_mutex_unlock;
 
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 
 	count = drm_add_edid_modes(connector, edid);
 	kfree(edid);
@@ -1372,7 +1372,7 @@ static int ast_vga_connector_helper_get_modes(struct drm_connector *connector)
 	return count;
 
 err_mutex_unlock:
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 err_drm_connector_update_edid_property:
 	drm_connector_update_edid_property(connector, NULL);
 	return 0;
@@ -1464,13 +1464,13 @@ static int ast_sil164_connector_helper_get_modes(struct drm_connector *connector
 	 * Protect access to I/O registers from concurrent modesetting
 	 * by acquiring the I/O-register lock.
 	 */
-	mutex_lock(&ast->ioregs_lock);
+	mutex_lock(&ast->modeset_lock);
 
 	edid = drm_get_edid(connector, &ast_sil164_connector->i2c->adapter);
 	if (!edid)
 		goto err_mutex_unlock;
 
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 
 	count = drm_add_edid_modes(connector, edid);
 	kfree(edid);
@@ -1478,7 +1478,7 @@ static int ast_sil164_connector_helper_get_modes(struct drm_connector *connector
 	return count;
 
 err_mutex_unlock:
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 err_drm_connector_update_edid_property:
 	drm_connector_update_edid_property(connector, NULL);
 	return 0;
@@ -1670,13 +1670,13 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector)
 	 * Protect access to I/O registers from concurrent modesetting
 	 * by acquiring the I/O-register lock.
 	 */
-	mutex_lock(&ast->ioregs_lock);
+	mutex_lock(&ast->modeset_lock);
 
 	succ = ast_astdp_read_edid(connector->dev, edid);
 	if (succ < 0)
 		goto err_mutex_unlock;
 
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 
 	drm_connector_update_edid_property(connector, edid);
 	count = drm_add_edid_modes(connector, edid);
@@ -1685,7 +1685,7 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector)
 	return count;
 
 err_mutex_unlock:
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 	kfree(edid);
 err_drm_connector_update_edid_property:
 	drm_connector_update_edid_property(connector, NULL);
@@ -1870,9 +1870,9 @@ static void ast_mode_config_helper_atomic_commit_tail(struct drm_atomic_state *s
 	 * display modes. Protect access to I/O registers by acquiring
 	 * the I/O-register lock. Released in atomic_flush().
 	 */
-	mutex_lock(&ast->ioregs_lock);
+	mutex_lock(&ast->modeset_lock);
 	drm_atomic_helper_commit_tail_rpm(state);
-	mutex_unlock(&ast->ioregs_lock);
+	mutex_unlock(&ast->modeset_lock);
 }
 
 static const struct drm_mode_config_helper_funcs ast_mode_config_helper_funcs = {
@@ -1910,6 +1910,10 @@ int ast_mode_config_init(struct ast_device *ast)
 	struct drm_connector *physical_connector = NULL;
 	int ret;
 
+	ret = drmm_mutex_init(dev, &ast->modeset_lock);
+	if (ret)
+		return ret;
+
 	ret = drmm_mode_config_init(dev);
 	if (ret)
 		return ret;
-- 
GitLab


From 0ccaa3dde97bd30ae615c66fc20080e920ec9b4e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:21 +0100
Subject: [PATCH 0473/2340] drm/ast: Rework I/O register setup

There are three different ways of retrieving the I/O-memory ranges
for AST devices: either from PCI BAR 1, from PCI BAR 2 or from PCI
BAR 1 by 'guessing'.

Make the respective code more readable by making each case self-
contained. Also add error checking against the length of the PCI
BARs.

v2:
	* fix I/O range length to 128 bytes
	* fix length test for PCI BAR 2

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-3-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_main.c | 40 +++++++++++++++++++++++++---------
 drivers/gpu/drm/ast/ast_reg.h  |  1 +
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 445cf47871a43..70e1871dbaf94 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -444,22 +444,42 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	if (!ast->regs)
 		return ERR_PTR(-EIO);
 
-	/*
-	 * After AST2500, MMIO is enabled by default, and it should be adopted
-	 * to be compatible with Arm.
-	 */
 	if (pdev->revision >= 0x40) {
+		/*
+		 * On AST2500 and later models, MMIO is enabled by
+		 * default. Adopt it to be compatible with ARM.
+		 */
+		resource_size_t len = pci_resource_len(pdev, 1);
+
+		if (len < AST_IO_MM_OFFSET)
+			return ERR_PTR(-EIO);
+		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
+			return ERR_PTR(-EIO);
 		ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
-	} else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
-		drm_info(dev, "platform has no IO space, trying MMIO\n");
-		ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
-	}
+	} else if (pci_resource_flags(pdev, 2) & IORESOURCE_IO) {
+		/*
+		 * Map I/O registers if we have a PCI BAR for I/O.
+		 */
+		resource_size_t len = pci_resource_len(pdev, 2);
 
-	/* "map" IO regs if the above hasn't done so already */
-	if (!ast->ioregs) {
+		if (len < AST_IO_MM_LENGTH)
+			return -EIO;
 		ast->ioregs = pcim_iomap(pdev, 2, 0);
 		if (!ast->ioregs)
 			return ERR_PTR(-EIO);
+	} else {
+		/*
+		 * Anything else is best effort.
+		 */
+		resource_size_t len = pci_resource_len(pdev, 1);
+
+		if (len < AST_IO_MM_OFFSET)
+			return ERR_PTR(-EIO);
+		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
+			return ERR_PTR(-EIO);
+		ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+
+		drm_info(dev, "Platform has no I/O space, using MMIO\n");
 	}
 
 	if (!ast_is_vga_enabled(dev)) {
diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h
index 555286ecf5209..05bab94a9a906 100644
--- a/drivers/gpu/drm/ast/ast_reg.h
+++ b/drivers/gpu/drm/ast/ast_reg.h
@@ -10,6 +10,7 @@
  */
 
 #define AST_IO_MM_OFFSET		(0x380)
+#define AST_IO_MM_LENGTH		(128)
 
 #define AST_IO_VGAARI_W			(0x40)
 #define AST_IO_VGAMR_W			(0x42)
-- 
GitLab


From b45efcfc94e8043d08344094a305bb4b8030c7df Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:22 +0100
Subject: [PATCH 0474/2340] drm/ast: Retrieve I/O-memory ranges without ast
 device

Read the I/O-memory ranges into local variables before setting
them in the ast device instanace. We'll later need this to split
detecting the device type from the creation of the ast device
instance.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-4-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_main.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 70e1871dbaf94..026ef893dd509 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -432,6 +432,8 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	struct ast_device *ast;
 	bool need_post = false;
 	int ret = 0;
+	void __iomem *regs;
+	void __iomem *ioregs;
 
 	ast = devm_drm_dev_alloc(&pdev->dev, drv, struct ast_device, base);
 	if (IS_ERR(ast))
@@ -440,8 +442,8 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 
 	pci_set_drvdata(pdev, dev);
 
-	ast->regs = pcim_iomap(pdev, 1, 0);
-	if (!ast->regs)
+	regs = pcim_iomap(pdev, 1, 0);
+	if (!regs)
 		return ERR_PTR(-EIO);
 
 	if (pdev->revision >= 0x40) {
@@ -455,7 +457,7 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 			return ERR_PTR(-EIO);
 		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
 			return ERR_PTR(-EIO);
-		ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+		ioregs = regs + AST_IO_MM_OFFSET;
 	} else if (pci_resource_flags(pdev, 2) & IORESOURCE_IO) {
 		/*
 		 * Map I/O registers if we have a PCI BAR for I/O.
@@ -464,8 +466,8 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 
 		if (len < AST_IO_MM_LENGTH)
 			return -EIO;
-		ast->ioregs = pcim_iomap(pdev, 2, 0);
-		if (!ast->ioregs)
+		ioregs = pcim_iomap(pdev, 2, 0);
+		if (!ioregs)
 			return ERR_PTR(-EIO);
 	} else {
 		/*
@@ -477,11 +479,14 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 			return ERR_PTR(-EIO);
 		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
 			return ERR_PTR(-EIO);
-		ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+		ioregs = regs + AST_IO_MM_OFFSET;
 
 		drm_info(dev, "Platform has no I/O space, using MMIO\n");
 	}
 
+	ast->regs = regs;
+	ast->ioregs = ioregs;
+
 	if (!ast_is_vga_enabled(dev)) {
 		drm_info(dev, "VGA not enabled on entry, requesting chip POST\n");
 		need_post = true;
-- 
GitLab


From cdac0cd459cf282ccdc4f28f838a2375e5cf61f7 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:23 +0100
Subject: [PATCH 0475/2340] drm/ast: Add I/O helpers without ast device

Implement I/O access in helpers that do not use an ast device
instance, but the raw pointer to the I/O memory. We'll later need
these helpers to detect the device type before allocating the ast
device instance.

v3:
	* fix typo in commit message (Sui)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-5-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_drv.h | 73 +++++++++++++++++++++++++++--------
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 2ec29480939d3..71f0e23e08cd2 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -273,55 +273,94 @@ static inline bool __ast_gen_is_eq(struct ast_device *ast, unsigned long gen)
 #define IS_AST_GEN6(__ast)	__ast_gen_is_eq(__ast, 6)
 #define IS_AST_GEN7(__ast)	__ast_gen_is_eq(__ast, 7)
 
+static inline u8 __ast_read8(const void __iomem *addr, u32 reg)
+{
+	return ioread8(addr + reg);
+}
+
+static inline u32 __ast_read32(const void __iomem *addr, u32 reg)
+{
+	return ioread32(addr + reg);
+}
+
+static inline void __ast_write8(void __iomem *addr, u32 reg, u8 val)
+{
+	iowrite8(val, addr + reg);
+}
+
+static inline void __ast_write32(void __iomem *addr, u32 reg, u32 val)
+{
+	iowrite32(val, addr + reg);
+}
+
+static inline u8 __ast_read8_i(void __iomem *addr, u32 reg, u8 index)
+{
+	__ast_write8(addr, reg, index);
+	return __ast_read8(addr, reg + 1);
+}
+
+static inline u8 __ast_read8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 read_mask)
+{
+	u8 val = __ast_read8_i(addr, reg, index);
+
+	return val & read_mask;
+}
+
+static inline void __ast_write8_i(void __iomem *addr, u32 reg, u8 index, u8 val)
+{
+	__ast_write8(addr, reg, index);
+	__ast_write8(addr, reg + 1, val);
+}
+
+static inline void __ast_write8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 read_mask,
+					 u8 val)
+{
+	u8 tmp = __ast_read8_i_masked(addr, reg, index, read_mask);
+
+	tmp |= val;
+	__ast_write8_i(addr, reg, index, tmp);
+}
+
 static inline u32 ast_read32(struct ast_device *ast, u32 reg)
 {
-	return ioread32(ast->regs + reg);
+	return __ast_read32(ast->regs, reg);
 }
 
 static inline void ast_write32(struct ast_device *ast, u32 reg, u32 val)
 {
-	iowrite32(val, ast->regs + reg);
+	__ast_write32(ast->regs, reg, val);
 }
 
 static inline u8 ast_io_read8(struct ast_device *ast, u32 reg)
 {
-	return ioread8(ast->ioregs + reg);
+	return __ast_read8(ast->ioregs, reg);
 }
 
 static inline void ast_io_write8(struct ast_device *ast, u32 reg, u8 val)
 {
-	iowrite8(val, ast->ioregs + reg);
+	__ast_write8(ast->ioregs, reg, val);
 }
 
 static inline u8 ast_get_index_reg(struct ast_device *ast, u32 base, u8 index)
 {
-	ast_io_write8(ast, base, index);
-	++base;
-	return ast_io_read8(ast, base);
+	return __ast_read8_i(ast->ioregs, base, index);
 }
 
 static inline u8 ast_get_index_reg_mask(struct ast_device *ast, u32 base, u8 index,
 					u8 preserve_mask)
 {
-	u8 val = ast_get_index_reg(ast, base, index);
-
-	return val & preserve_mask;
+	return __ast_read8_i_masked(ast->ioregs, base, index, preserve_mask);
 }
 
 static inline void ast_set_index_reg(struct ast_device *ast, u32 base, u8 index, u8 val)
 {
-	ast_io_write8(ast, base, index);
-	++base;
-	ast_io_write8(ast, base, val);
+	__ast_write8_i(ast->ioregs, base, index, val);
 }
 
 static inline void ast_set_index_reg_mask(struct ast_device *ast, u32 base, u8 index,
 					  u8 preserve_mask, u8 val)
 {
-	u8 tmp = ast_get_index_reg_mask(ast, base, index, preserve_mask);
-
-	tmp |= val;
-	ast_set_index_reg(ast, base, index, tmp);
+	__ast_write8_i_masked(ast->ioregs, base, index, preserve_mask, val);
 }
 
 #define AST_VIDMEM_SIZE_8M    0x00800000
-- 
GitLab


From 73b05bb4c0539d89111ed2f9c5a2eac1b577f83d Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:24 +0100
Subject: [PATCH 0476/2340] drm/ast: Enable VGA without ast device instance

We'll have to enable the VGA functionality for detecting the ast
device type. Make this work without an instance of the ast device.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-6-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_main.c | 29 ++++++++++++-----------------
 drivers/gpu/drm/ast/ast_reg.h  |  9 +++++++--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 026ef893dd509..82fcee967d984 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -35,22 +35,17 @@
 
 #include "ast_drv.h"
 
-static bool ast_is_vga_enabled(struct drm_device *dev)
+static bool ast_is_vga_enabled(void __iomem *ioregs)
 {
-	struct ast_device *ast = to_ast_device(dev);
-	u8 ch;
-
-	ch = ast_io_read8(ast, AST_IO_VGAER);
+	u8 vgaer = __ast_read8(ioregs, AST_IO_VGAER);
 
-	return !!(ch & 0x01);
+	return vgaer & AST_IO_VGAER_VGA_ENABLE;
 }
 
-static void ast_enable_vga(struct drm_device *dev)
+static void ast_enable_vga(void __iomem *ioregs)
 {
-	struct ast_device *ast = to_ast_device(dev);
-
-	ast_io_write8(ast, AST_IO_VGAER, 0x01);
-	ast_io_write8(ast, AST_IO_VGAMR_W, 0x01);
+	__ast_write8(ioregs, AST_IO_VGAER, AST_IO_VGAER_VGA_ENABLE);
+	__ast_write8(ioregs, AST_IO_VGAMR_W, AST_IO_VGAMR_IOSEL);
 }
 
 /*
@@ -74,9 +69,9 @@ static int ast_enable_mmio(struct ast_device *ast)
 	return devm_add_action_or_reset(dev->dev, ast_enable_mmio_release, ast);
 }
 
-static void ast_open_key(struct ast_device *ast)
+static void ast_open_key(void __iomem *ioregs)
 {
-	ast_set_index_reg(ast, AST_IO_VGACRI, 0x80, 0xA8);
+	__ast_write8_i(ioregs, AST_IO_VGACRI, 0x80, AST_IO_VGACR80_PASSWORD);
 }
 
 static int ast_device_config_init(struct ast_device *ast)
@@ -487,7 +482,7 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	ast->regs = regs;
 	ast->ioregs = ioregs;
 
-	if (!ast_is_vga_enabled(dev)) {
+	if (!ast_is_vga_enabled(ioregs)) {
 		drm_info(dev, "VGA not enabled on entry, requesting chip POST\n");
 		need_post = true;
 	}
@@ -497,10 +492,10 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	 * access to the scratch registers will fail.
 	 */
 	if (need_post)
-		ast_enable_vga(dev);
-
+		ast_enable_vga(ioregs);
 	/* Enable extended register access */
-	ast_open_key(ast);
+	ast_open_key(ioregs);
+
 	ret = ast_enable_mmio(ast);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h
index 05bab94a9a906..5258a84ef3a6f 100644
--- a/drivers/gpu/drm/ast/ast_reg.h
+++ b/drivers/gpu/drm/ast/ast_reg.h
@@ -13,8 +13,14 @@
 #define AST_IO_MM_LENGTH		(128)
 
 #define AST_IO_VGAARI_W			(0x40)
+
 #define AST_IO_VGAMR_W			(0x42)
+#define AST_IO_VGAMR_R			(0x4c)
+#define AST_IO_VGAMR_IOSEL		BIT(0)
+
 #define AST_IO_VGAER			(0x43)
+#define AST_IO_VGAER_VGA_ENABLE		BIT(0)
+
 #define AST_IO_VGASRI			(0x44)
 #define AST_IO_VGADRR			(0x47)
 #define AST_IO_VGADWR			(0x48)
@@ -22,14 +28,13 @@
 #define AST_IO_VGAGRI			(0x4E)
 
 #define AST_IO_VGACRI			(0x54)
+#define AST_IO_VGACR80_PASSWORD		(0xa8)
 #define AST_IO_VGACRCB_HWC_16BPP	BIT(0) /* set: ARGB4444, cleared: 2bpp palette */
 #define AST_IO_VGACRCB_HWC_ENABLED	BIT(1)
 
 #define AST_IO_VGAIR1_R			(0x5A)
 #define AST_IO_VGAIR1_VREFRESH		BIT(3)
 
-#define AST_IO_VGAMR_R			(0x4C)
-
 /*
  * Display Transmitter Type
  */
-- 
GitLab


From 66f843d6703513b9ee8d3d10694a21931feb32c7 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:25 +0100
Subject: [PATCH 0477/2340] drm/ast: Enable MMIO without ast device instance

We'll have to enable the MMIO access for detecting the ast device
type. Make this work without an instance of the ast device.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-7-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_main.c | 16 +++++++++-------
 drivers/gpu/drm/ast/ast_reg.h  |  2 ++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 82fcee967d984..0173cb44f17de 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -54,19 +54,21 @@ static void ast_enable_vga(void __iomem *ioregs)
  */
 static void ast_enable_mmio_release(void *data)
 {
-	struct ast_device *ast = data;
+	void __iomem *ioregs = (void __force __iomem *)data;
 
 	/* enable standard VGA decode */
-	ast_set_index_reg(ast, AST_IO_VGACRI, 0xa1, 0x04);
+	__ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1, AST_IO_VGACRA1_MMIO_ENABLED);
 }
 
-static int ast_enable_mmio(struct ast_device *ast)
+static int ast_enable_mmio(struct device *dev, void __iomem *ioregs)
 {
-	struct drm_device *dev = &ast->base;
+	void *data = (void __force *)ioregs;
 
-	ast_set_index_reg(ast, AST_IO_VGACRI, 0xa1, 0x06);
+	__ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1,
+		       AST_IO_VGACRA1_MMIO_ENABLED |
+		       AST_IO_VGACRA1_VGAIO_DISABLED);
 
-	return devm_add_action_or_reset(dev->dev, ast_enable_mmio_release, ast);
+	return devm_add_action_or_reset(dev, ast_enable_mmio_release, data);
 }
 
 static void ast_open_key(void __iomem *ioregs)
@@ -496,7 +498,7 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	/* Enable extended register access */
 	ast_open_key(ioregs);
 
-	ret = ast_enable_mmio(ast);
+	ret = ast_enable_mmio(&pdev->dev, ioregs);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h
index 5258a84ef3a6f..62dddbf3fe569 100644
--- a/drivers/gpu/drm/ast/ast_reg.h
+++ b/drivers/gpu/drm/ast/ast_reg.h
@@ -29,6 +29,8 @@
 
 #define AST_IO_VGACRI			(0x54)
 #define AST_IO_VGACR80_PASSWORD		(0xa8)
+#define AST_IO_VGACRA1_VGAIO_DISABLED	BIT(1)
+#define AST_IO_VGACRA1_MMIO_ENABLED	BIT(2)
 #define AST_IO_VGACRCB_HWC_16BPP	BIT(0) /* set: ARGB4444, cleared: 2bpp palette */
 #define AST_IO_VGACRCB_HWC_ENABLED	BIT(1)
 
-- 
GitLab


From 83ab91faf20c1aed982ca5949ce5d83b34b7f546 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:26 +0100
Subject: [PATCH 0478/2340] drm/ast: Partially implement POST without ast
 device instance

We'll have to do some of the GPU POSTing for detecting the ast device
type. Make this work without an instance of the ast device.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-8-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_drv.h  |  2 +-
 drivers/gpu/drm/ast/ast_main.c |  2 +-
 drivers/gpu/drm/ast/ast_post.c | 73 +++++++++++++++++++++-------------
 3 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 71f0e23e08cd2..390fbec6bc2b1 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -482,7 +482,7 @@ int ast_mm_init(struct ast_device *ast);
 void ast_post_gpu(struct drm_device *dev);
 u32 ast_mindwm(struct ast_device *ast, u32 r);
 void ast_moutdwm(struct ast_device *ast, u32 r, u32 v);
-void ast_patch_ahb_2500(struct ast_device *ast);
+void ast_patch_ahb_2500(void __iomem *regs);
 /* ast dp501 */
 void ast_set_dp501_video_output(struct drm_device *dev, u8 mode);
 bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size);
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 0173cb44f17de..19c8894e391a0 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -113,7 +113,7 @@ static int ast_device_config_init(struct ast_device *ast)
 			/* Patch AST2500/AST2510 */
 			if ((pdev->revision & 0xf0) == 0x40) {
 				if (!(jregd0 & AST_VRAM_INIT_STATUS_MASK))
-					ast_patch_ahb_2500(ast);
+					ast_patch_ahb_2500(ast->regs);
 			}
 
 			/* Double check that it's actually working */
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 7a993a3843147..22f548805dfb0 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -77,28 +77,42 @@ ast_set_def_ext_reg(struct drm_device *dev)
 	ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xff, reg);
 }
 
-u32 ast_mindwm(struct ast_device *ast, u32 r)
+static u32 __ast_mindwm(void __iomem *regs, u32 r)
 {
-	uint32_t data;
+	u32 data;
 
-	ast_write32(ast, 0xf004, r & 0xffff0000);
-	ast_write32(ast, 0xf000, 0x1);
+	__ast_write32(regs, 0xf004, r & 0xffff0000);
+	__ast_write32(regs, 0xf000, 0x1);
 
 	do {
-		data = ast_read32(ast, 0xf004) & 0xffff0000;
+		data = __ast_read32(regs, 0xf004) & 0xffff0000;
 	} while (data != (r & 0xffff0000));
-	return ast_read32(ast, 0x10000 + (r & 0x0000ffff));
+
+	return __ast_read32(regs, 0x10000 + (r & 0x0000ffff));
 }
 
-void ast_moutdwm(struct ast_device *ast, u32 r, u32 v)
+static void __ast_moutdwm(void __iomem *regs, u32 r, u32 v)
 {
-	uint32_t data;
-	ast_write32(ast, 0xf004, r & 0xffff0000);
-	ast_write32(ast, 0xf000, 0x1);
+	u32 data;
+
+	__ast_write32(regs, 0xf004, r & 0xffff0000);
+	__ast_write32(regs, 0xf000, 0x1);
+
 	do {
-		data = ast_read32(ast, 0xf004) & 0xffff0000;
+		data = __ast_read32(regs, 0xf004) & 0xffff0000;
 	} while (data != (r & 0xffff0000));
-	ast_write32(ast, 0x10000 + (r & 0x0000ffff), v);
+
+	__ast_write32(regs, 0x10000 + (r & 0x0000ffff), v);
+}
+
+u32 ast_mindwm(struct ast_device *ast, u32 r)
+{
+	return __ast_mindwm(ast->regs, r);
+}
+
+void ast_moutdwm(struct ast_device *ast, u32 r, u32 v)
+{
+	__ast_moutdwm(ast->regs, r, v);
 }
 
 /*
@@ -1987,17 +2001,18 @@ static bool ast_dram_init_2500(struct ast_device *ast)
 	return true;
 }
 
-void ast_patch_ahb_2500(struct ast_device *ast)
+void ast_patch_ahb_2500(void __iomem *regs)
 {
-	u32	data;
+	u32 data;
 
 	/* Clear bus lock condition */
-	ast_moutdwm(ast, 0x1e600000, 0xAEED1A03);
-	ast_moutdwm(ast, 0x1e600084, 0x00010000);
-	ast_moutdwm(ast, 0x1e600088, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8);
-	data = ast_mindwm(ast, 0x1e6e2070);
-	if (data & 0x08000000) {					/* check fast reset */
+	__ast_moutdwm(regs, 0x1e600000, 0xAEED1A03);
+	__ast_moutdwm(regs, 0x1e600084, 0x00010000);
+	__ast_moutdwm(regs, 0x1e600088, 0x00000000);
+	__ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8);
+
+	data = __ast_mindwm(regs, 0x1e6e2070);
+	if (data & 0x08000000) { /* check fast reset */
 		/*
 		 * If "Fast restet" is enabled for ARM-ICE debugger,
 		 * then WDT needs to enable, that
@@ -2009,16 +2024,18 @@ void ast_patch_ahb_2500(struct ast_device *ast)
 		 *	[1]:= 1:WDT will be cleeared and disabled after timeout occurs
 		 *	[0]:= 1:WDT enable
 		 */
-		ast_moutdwm(ast, 0x1E785004, 0x00000010);
-		ast_moutdwm(ast, 0x1E785008, 0x00004755);
-		ast_moutdwm(ast, 0x1E78500c, 0x00000033);
+		__ast_moutdwm(regs, 0x1E785004, 0x00000010);
+		__ast_moutdwm(regs, 0x1E785008, 0x00004755);
+		__ast_moutdwm(regs, 0x1E78500c, 0x00000033);
 		udelay(1000);
 	}
+
 	do {
-		ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8);
-		data = ast_mindwm(ast, 0x1e6e2000);
-	}	while (data != 1);
-	ast_moutdwm(ast, 0x1e6e207c, 0x08000000);	/* clear fast reset */
+		__ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8);
+		data = __ast_mindwm(regs, 0x1e6e2000);
+	} while (data != 1);
+
+	__ast_moutdwm(regs, 0x1e6e207c, 0x08000000); /* clear fast reset */
 }
 
 void ast_post_chip_2500(struct drm_device *dev)
@@ -2030,7 +2047,7 @@ void ast_post_chip_2500(struct drm_device *dev)
 	reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff);
 	if ((reg & AST_VRAM_INIT_STATUS_MASK) == 0) {/* vga only */
 		/* Clear bus lock condition */
-		ast_patch_ahb_2500(ast);
+		ast_patch_ahb_2500(ast->regs);
 
 		/* Disable watchdog */
 		ast_moutdwm(ast, 0x1E78502C, 0x00000000);
-- 
GitLab


From 9f3ebec843b0f48ea2c22b7e85c34040aa7c9ee8 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:27 +0100
Subject: [PATCH 0479/2340] drm/ast: Add enum ast_config_mode

The config mode used to be a field in struct ast_device. Turn it into
a named type. We'll need this for device detection.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-9-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_drv.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 390fbec6bc2b1..4c6ee163c6d92 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -98,6 +98,12 @@ enum ast_tx_chip {
 #define AST_TX_DP501_BIT	BIT(AST_TX_DP501)
 #define AST_TX_ASTDP_BIT	BIT(AST_TX_ASTDP)
 
+enum ast_config_mode {
+	ast_use_p2a,
+	ast_use_dt,
+	ast_use_defaults
+};
+
 #define AST_DRAM_512Mx16 0
 #define AST_DRAM_1Gx16   1
 #define AST_DRAM_512Mx32 2
@@ -196,7 +202,9 @@ struct ast_device {
 	void __iomem *ioregs;
 	void __iomem *dp501_fw_buf;
 
+	enum ast_config_mode config_mode;
 	enum ast_chip chip;
+
 	uint32_t dram_bus_width;
 	uint32_t dram_type;
 	uint32_t mclk;
@@ -235,11 +243,6 @@ struct ast_device {
 	} output;
 
 	bool support_wide_screen;
-	enum {
-		ast_use_p2a,
-		ast_use_dt,
-		ast_use_defaults
-	} config_mode;
 
 	unsigned long tx_chip_types;		/* bitfield of enum ast_chip_type */
 	u8 *dp501_fw_addr;
-- 
GitLab


From 51412f869337682d0e9e640c5b424ffb8295d353 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:28 +0100
Subject: [PATCH 0480/2340] drm/ast: Detect ast device type and config mode
 without ast device

Return the ast chip and config in the detection function's parameters
instead of storing them directly in the ast device instance.

v2:
	* add break statements to switch default branches (Jocelyn)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-10-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_main.c | 106 ++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 19c8894e391a0..ad5b758153de1 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -76,25 +76,27 @@ static void ast_open_key(void __iomem *ioregs)
 	__ast_write8_i(ioregs, AST_IO_VGACRI, 0x80, AST_IO_VGACR80_PASSWORD);
 }
 
-static int ast_device_config_init(struct ast_device *ast)
+static int ast_detect_chip(struct pci_dev *pdev,
+			   void __iomem *regs, void __iomem *ioregs,
+			   enum ast_chip *chip_out,
+			   enum ast_config_mode *config_mode_out)
 {
-	struct drm_device *dev = &ast->base;
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	struct device_node *np = dev->dev->of_node;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	enum ast_config_mode config_mode = ast_use_defaults;
 	uint32_t scu_rev = 0xffffffff;
+	enum ast_chip chip;
 	u32 data;
-	u8 jregd0, jregd1;
+	u8 vgacrd0, vgacrd1;
 
 	/*
 	 * Find configuration mode and read SCU revision
 	 */
 
-	ast->config_mode = ast_use_defaults;
-
 	/* Check if we have device-tree properties */
 	if (np && !of_property_read_u32(np, "aspeed,scu-revision-id", &data)) {
 		/* We do, disable P2A access */
-		ast->config_mode = ast_use_dt;
+		config_mode = ast_use_dt;
 		scu_rev = data;
 	} else if (pdev->device == PCI_CHIP_AST2000) { // Not all families have a P2A bridge
 		/*
@@ -102,9 +104,9 @@ static int ast_device_config_init(struct ast_device *ast)
 		 * is disabled. We force using P2A if VGA only mode bit
 		 * is set D[7]
 		 */
-		jregd0 = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff);
-		jregd1 = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd1, 0xff);
-		if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) {
+		vgacrd0 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd0);
+		vgacrd1 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd1);
+		if (!(vgacrd0 & 0x80) || !(vgacrd1 & 0x10)) {
 
 			/*
 			 * We have a P2A bridge and it is enabled.
@@ -112,32 +114,32 @@ static int ast_device_config_init(struct ast_device *ast)
 
 			/* Patch AST2500/AST2510 */
 			if ((pdev->revision & 0xf0) == 0x40) {
-				if (!(jregd0 & AST_VRAM_INIT_STATUS_MASK))
-					ast_patch_ahb_2500(ast->regs);
+				if (!(vgacrd0 & AST_VRAM_INIT_STATUS_MASK))
+					ast_patch_ahb_2500(regs);
 			}
 
 			/* Double check that it's actually working */
-			data = ast_read32(ast, 0xf004);
+			data = __ast_read32(regs, 0xf004);
 			if ((data != 0xffffffff) && (data != 0x00)) {
-				ast->config_mode = ast_use_p2a;
+				config_mode = ast_use_p2a;
 
 				/* Read SCU7c (silicon revision register) */
-				ast_write32(ast, 0xf004, 0x1e6e0000);
-				ast_write32(ast, 0xf000, 0x1);
-				scu_rev = ast_read32(ast, 0x1207c);
+				__ast_write32(regs, 0xf004, 0x1e6e0000);
+				__ast_write32(regs, 0xf000, 0x1);
+				scu_rev = __ast_read32(regs, 0x1207c);
 			}
 		}
 	}
 
-	switch (ast->config_mode) {
+	switch (config_mode) {
 	case ast_use_defaults:
-		drm_info(dev, "Using default configuration\n");
+		dev_info(dev, "Using default configuration\n");
 		break;
 	case ast_use_dt:
-		drm_info(dev, "Using device-tree for configuration\n");
+		dev_info(dev, "Using device-tree for configuration\n");
 		break;
 	case ast_use_p2a:
-		drm_info(dev, "Using P2A bridge for configuration\n");
+		dev_info(dev, "Using P2A bridge for configuration\n");
 		break;
 	}
 
@@ -146,63 +148,68 @@ static int ast_device_config_init(struct ast_device *ast)
 	 */
 
 	if (pdev->revision >= 0x50) {
-		ast->chip = AST2600;
-		drm_info(dev, "AST 2600 detected\n");
+		chip = AST2600;
+		dev_info(dev, "AST 2600 detected\n");
 	} else if (pdev->revision >= 0x40) {
 		switch (scu_rev & 0x300) {
 		case 0x0100:
-			ast->chip = AST2510;
-			drm_info(dev, "AST 2510 detected\n");
+			chip = AST2510;
+			dev_info(dev, "AST 2510 detected\n");
 			break;
 		default:
-			ast->chip = AST2500;
-			drm_info(dev, "AST 2500 detected\n");
+			chip = AST2500;
+			dev_info(dev, "AST 2500 detected\n");
+			break;
 		}
 	} else if (pdev->revision >= 0x30) {
 		switch (scu_rev & 0x300) {
 		case 0x0100:
-			ast->chip = AST1400;
-			drm_info(dev, "AST 1400 detected\n");
+			chip = AST1400;
+			dev_info(dev, "AST 1400 detected\n");
 			break;
 		default:
-			ast->chip = AST2400;
-			drm_info(dev, "AST 2400 detected\n");
+			chip = AST2400;
+			dev_info(dev, "AST 2400 detected\n");
+			break;
 		}
 	} else if (pdev->revision >= 0x20) {
 		switch (scu_rev & 0x300) {
 		case 0x0000:
-			ast->chip = AST1300;
-			drm_info(dev, "AST 1300 detected\n");
+			chip = AST1300;
+			dev_info(dev, "AST 1300 detected\n");
 			break;
 		default:
-			ast->chip = AST2300;
-			drm_info(dev, "AST 2300 detected\n");
+			chip = AST2300;
+			dev_info(dev, "AST 2300 detected\n");
 			break;
 		}
 	} else if (pdev->revision >= 0x10) {
 		switch (scu_rev & 0x0300) {
 		case 0x0200:
-			ast->chip = AST1100;
-			drm_info(dev, "AST 1100 detected\n");
+			chip = AST1100;
+			dev_info(dev, "AST 1100 detected\n");
 			break;
 		case 0x0100:
-			ast->chip = AST2200;
-			drm_info(dev, "AST 2200 detected\n");
+			chip = AST2200;
+			dev_info(dev, "AST 2200 detected\n");
 			break;
 		case 0x0000:
-			ast->chip = AST2150;
-			drm_info(dev, "AST 2150 detected\n");
+			chip = AST2150;
+			dev_info(dev, "AST 2150 detected\n");
 			break;
 		default:
-			ast->chip = AST2100;
-			drm_info(dev, "AST 2100 detected\n");
+			chip = AST2100;
+			dev_info(dev, "AST 2100 detected\n");
 			break;
 		}
 	} else {
-		ast->chip = AST2000;
-		drm_info(dev, "AST 2000 detected\n");
+		chip = AST2000;
+		dev_info(dev, "AST 2000 detected\n");
 	}
 
+	*chip_out = chip;
+	*config_mode_out = config_mode;
+
 	return 0;
 }
 
@@ -431,6 +438,8 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	int ret = 0;
 	void __iomem *regs;
 	void __iomem *ioregs;
+	enum ast_config_mode config_mode;
+	enum ast_chip chip;
 
 	ast = devm_drm_dev_alloc(&pdev->dev, drv, struct ast_device, base);
 	if (IS_ERR(ast))
@@ -502,10 +511,13 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = ast_device_config_init(ast);
+	ret = ast_detect_chip(pdev, regs, ioregs, &chip, &config_mode);
 	if (ret)
 		return ERR_PTR(ret);
 
+	ast->chip = chip;
+	ast->config_mode = config_mode;
+
 	ast_detect_widescreen(ast);
 	ast_detect_tx_chip(ast, need_post);
 
-- 
GitLab


From 83dc1029dcf50b5b849b26679a1b3f860b85d79c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 16 Nov 2023 10:59:29 +0100
Subject: [PATCH 0481/2340] drm/ast: Move detection code into PCI probe helper

Detect device type and config mode in the PCI probe helper, but leave
DRM device initialization where it is. Structures the driver probe and
setup code into a detection and an initialization phase.

A later patch can add branching to the device-initialization code. Each
chip type can have it own initializer function, if necessary.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231116100240.22975-11-tzimmermann@suse.de
---
 drivers/gpu/drm/ast/ast_drv.c  | 263 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/ast/ast_drv.h  |  10 +-
 drivers/gpu/drm/ast/ast_main.c | 270 ++-------------------------------
 3 files changed, 274 insertions(+), 269 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index cf5b754f044c7..90bcb1eb9cd94 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -89,11 +89,194 @@ static const struct pci_device_id ast_pciidlist[] = {
 
 MODULE_DEVICE_TABLE(pci, ast_pciidlist);
 
+static bool ast_is_vga_enabled(void __iomem *ioregs)
+{
+	u8 vgaer = __ast_read8(ioregs, AST_IO_VGAER);
+
+	return vgaer & AST_IO_VGAER_VGA_ENABLE;
+}
+
+static void ast_enable_vga(void __iomem *ioregs)
+{
+	__ast_write8(ioregs, AST_IO_VGAER, AST_IO_VGAER_VGA_ENABLE);
+	__ast_write8(ioregs, AST_IO_VGAMR_W, AST_IO_VGAMR_IOSEL);
+}
+
+/*
+ * Run this function as part of the HW device cleanup; not
+ * when the DRM device gets released.
+ */
+static void ast_enable_mmio_release(void *data)
+{
+	void __iomem *ioregs = (void __force __iomem *)data;
+
+	/* enable standard VGA decode */
+	__ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1, AST_IO_VGACRA1_MMIO_ENABLED);
+}
+
+static int ast_enable_mmio(struct device *dev, void __iomem *ioregs)
+{
+	void *data = (void __force *)ioregs;
+
+	__ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1,
+		       AST_IO_VGACRA1_MMIO_ENABLED |
+		       AST_IO_VGACRA1_VGAIO_DISABLED);
+
+	return devm_add_action_or_reset(dev, ast_enable_mmio_release, data);
+}
+
+static void ast_open_key(void __iomem *ioregs)
+{
+	__ast_write8_i(ioregs, AST_IO_VGACRI, 0x80, AST_IO_VGACR80_PASSWORD);
+}
+
+static int ast_detect_chip(struct pci_dev *pdev,
+			   void __iomem *regs, void __iomem *ioregs,
+			   enum ast_chip *chip_out,
+			   enum ast_config_mode *config_mode_out)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	enum ast_config_mode config_mode = ast_use_defaults;
+	uint32_t scu_rev = 0xffffffff;
+	enum ast_chip chip;
+	u32 data;
+	u8 vgacrd0, vgacrd1;
+
+	/*
+	 * Find configuration mode and read SCU revision
+	 */
+
+	/* Check if we have device-tree properties */
+	if (np && !of_property_read_u32(np, "aspeed,scu-revision-id", &data)) {
+		/* We do, disable P2A access */
+		config_mode = ast_use_dt;
+		scu_rev = data;
+	} else if (pdev->device == PCI_CHIP_AST2000) { // Not all families have a P2A bridge
+		/*
+		 * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge
+		 * is disabled. We force using P2A if VGA only mode bit
+		 * is set D[7]
+		 */
+		vgacrd0 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd0);
+		vgacrd1 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd1);
+		if (!(vgacrd0 & 0x80) || !(vgacrd1 & 0x10)) {
+
+			/*
+			 * We have a P2A bridge and it is enabled.
+			 */
+
+			/* Patch AST2500/AST2510 */
+			if ((pdev->revision & 0xf0) == 0x40) {
+				if (!(vgacrd0 & AST_VRAM_INIT_STATUS_MASK))
+					ast_patch_ahb_2500(regs);
+			}
+
+			/* Double check that it's actually working */
+			data = __ast_read32(regs, 0xf004);
+			if ((data != 0xffffffff) && (data != 0x00)) {
+				config_mode = ast_use_p2a;
+
+				/* Read SCU7c (silicon revision register) */
+				__ast_write32(regs, 0xf004, 0x1e6e0000);
+				__ast_write32(regs, 0xf000, 0x1);
+				scu_rev = __ast_read32(regs, 0x1207c);
+			}
+		}
+	}
+
+	switch (config_mode) {
+	case ast_use_defaults:
+		dev_info(dev, "Using default configuration\n");
+		break;
+	case ast_use_dt:
+		dev_info(dev, "Using device-tree for configuration\n");
+		break;
+	case ast_use_p2a:
+		dev_info(dev, "Using P2A bridge for configuration\n");
+		break;
+	}
+
+	/*
+	 * Identify chipset
+	 */
+
+	if (pdev->revision >= 0x50) {
+		chip = AST2600;
+		dev_info(dev, "AST 2600 detected\n");
+	} else if (pdev->revision >= 0x40) {
+		switch (scu_rev & 0x300) {
+		case 0x0100:
+			chip = AST2510;
+			dev_info(dev, "AST 2510 detected\n");
+			break;
+		default:
+			chip = AST2500;
+			dev_info(dev, "AST 2500 detected\n");
+			break;
+		}
+	} else if (pdev->revision >= 0x30) {
+		switch (scu_rev & 0x300) {
+		case 0x0100:
+			chip = AST1400;
+			dev_info(dev, "AST 1400 detected\n");
+			break;
+		default:
+			chip = AST2400;
+			dev_info(dev, "AST 2400 detected\n");
+			break;
+		}
+	} else if (pdev->revision >= 0x20) {
+		switch (scu_rev & 0x300) {
+		case 0x0000:
+			chip = AST1300;
+			dev_info(dev, "AST 1300 detected\n");
+			break;
+		default:
+			chip = AST2300;
+			dev_info(dev, "AST 2300 detected\n");
+			break;
+		}
+	} else if (pdev->revision >= 0x10) {
+		switch (scu_rev & 0x0300) {
+		case 0x0200:
+			chip = AST1100;
+			dev_info(dev, "AST 1100 detected\n");
+			break;
+		case 0x0100:
+			chip = AST2200;
+			dev_info(dev, "AST 2200 detected\n");
+			break;
+		case 0x0000:
+			chip = AST2150;
+			dev_info(dev, "AST 2150 detected\n");
+			break;
+		default:
+			chip = AST2100;
+			dev_info(dev, "AST 2100 detected\n");
+			break;
+		}
+	} else {
+		chip = AST2000;
+		dev_info(dev, "AST 2000 detected\n");
+	}
+
+	*chip_out = chip;
+	*config_mode_out = config_mode;
+
+	return 0;
+}
+
 static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	struct ast_device *ast;
-	struct drm_device *dev;
+	struct device *dev = &pdev->dev;
 	int ret;
+	void __iomem *regs;
+	void __iomem *ioregs;
+	enum ast_config_mode config_mode;
+	enum ast_chip chip;
+	struct drm_device *drm;
+	bool need_post = false;
 
 	ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &ast_driver);
 	if (ret)
@@ -103,16 +286,80 @@ static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		return ret;
 
-	ast = ast_device_create(&ast_driver, pdev, ent->driver_data);
-	if (IS_ERR(ast))
-		return PTR_ERR(ast);
-	dev = &ast->base;
+	regs = pcim_iomap(pdev, 1, 0);
+	if (!regs)
+		return -EIO;
+
+	if (pdev->revision >= 0x40) {
+		/*
+		 * On AST2500 and later models, MMIO is enabled by
+		 * default. Adopt it to be compatible with ARM.
+		 */
+		resource_size_t len = pci_resource_len(pdev, 1);
+
+		if (len < AST_IO_MM_OFFSET)
+			return -EIO;
+		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
+			return -EIO;
+		ioregs = regs + AST_IO_MM_OFFSET;
+	} else if (pci_resource_flags(pdev, 2) & IORESOURCE_IO) {
+		/*
+		 * Map I/O registers if we have a PCI BAR for I/O.
+		 */
+		resource_size_t len = pci_resource_len(pdev, 2);
+
+		if (len < AST_IO_MM_LENGTH)
+			return -EIO;
+		ioregs = pcim_iomap(pdev, 2, 0);
+		if (!ioregs)
+			return -EIO;
+	} else {
+		/*
+		 * Anything else is best effort.
+		 */
+		resource_size_t len = pci_resource_len(pdev, 1);
+
+		if (len < AST_IO_MM_OFFSET)
+			return -EIO;
+		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
+			return -EIO;
+		ioregs = regs + AST_IO_MM_OFFSET;
+
+		dev_info(dev, "Platform has no I/O space, using MMIO\n");
+	}
+
+	if (!ast_is_vga_enabled(ioregs)) {
+		dev_info(dev, "VGA not enabled on entry, requesting chip POST\n");
+		need_post = true;
+	}
+
+	/*
+	 * If VGA isn't enabled, we need to enable now or subsequent
+	 * access to the scratch registers will fail.
+	 */
+	if (need_post)
+		ast_enable_vga(ioregs);
+	/* Enable extended register access */
+	ast_open_key(ioregs);
+
+	ret = ast_enable_mmio(dev, ioregs);
+	if (ret)
+		return ret;
+
+	ret = ast_detect_chip(pdev, regs, ioregs, &chip, &config_mode);
+	if (ret)
+		return ret;
+
+	drm = ast_device_create(pdev, &ast_driver, chip, config_mode, regs, ioregs, need_post);
+	if (IS_ERR(drm))
+		return PTR_ERR(drm);
+	pci_set_drvdata(pdev, drm);
 
-	ret = drm_dev_register(dev, ent->driver_data);
+	ret = drm_dev_register(drm, ent->driver_data);
 	if (ret)
 		return ret;
 
-	drm_fbdev_generic_setup(dev, 32);
+	drm_fbdev_generic_setup(drm, 32);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 4c6ee163c6d92..3be5ccf1f5f4d 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -254,9 +254,13 @@ static inline struct ast_device *to_ast_device(struct drm_device *dev)
 	return container_of(dev, struct ast_device, base);
 }
 
-struct ast_device *ast_device_create(const struct drm_driver *drv,
-				     struct pci_dev *pdev,
-				     unsigned long flags);
+struct drm_device *ast_device_create(struct pci_dev *pdev,
+				     const struct drm_driver *drv,
+				     enum ast_chip chip,
+				     enum ast_config_mode config_mode,
+				     void __iomem *regs,
+				     void __iomem *ioregs,
+				     bool need_post);
 
 static inline unsigned long __ast_gen(struct ast_device *ast)
 {
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index ad5b758153de1..2f3ad5f949fcb 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -35,184 +35,6 @@
 
 #include "ast_drv.h"
 
-static bool ast_is_vga_enabled(void __iomem *ioregs)
-{
-	u8 vgaer = __ast_read8(ioregs, AST_IO_VGAER);
-
-	return vgaer & AST_IO_VGAER_VGA_ENABLE;
-}
-
-static void ast_enable_vga(void __iomem *ioregs)
-{
-	__ast_write8(ioregs, AST_IO_VGAER, AST_IO_VGAER_VGA_ENABLE);
-	__ast_write8(ioregs, AST_IO_VGAMR_W, AST_IO_VGAMR_IOSEL);
-}
-
-/*
- * Run this function as part of the HW device cleanup; not
- * when the DRM device gets released.
- */
-static void ast_enable_mmio_release(void *data)
-{
-	void __iomem *ioregs = (void __force __iomem *)data;
-
-	/* enable standard VGA decode */
-	__ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1, AST_IO_VGACRA1_MMIO_ENABLED);
-}
-
-static int ast_enable_mmio(struct device *dev, void __iomem *ioregs)
-{
-	void *data = (void __force *)ioregs;
-
-	__ast_write8_i(ioregs, AST_IO_VGACRI, 0xa1,
-		       AST_IO_VGACRA1_MMIO_ENABLED |
-		       AST_IO_VGACRA1_VGAIO_DISABLED);
-
-	return devm_add_action_or_reset(dev, ast_enable_mmio_release, data);
-}
-
-static void ast_open_key(void __iomem *ioregs)
-{
-	__ast_write8_i(ioregs, AST_IO_VGACRI, 0x80, AST_IO_VGACR80_PASSWORD);
-}
-
-static int ast_detect_chip(struct pci_dev *pdev,
-			   void __iomem *regs, void __iomem *ioregs,
-			   enum ast_chip *chip_out,
-			   enum ast_config_mode *config_mode_out)
-{
-	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node;
-	enum ast_config_mode config_mode = ast_use_defaults;
-	uint32_t scu_rev = 0xffffffff;
-	enum ast_chip chip;
-	u32 data;
-	u8 vgacrd0, vgacrd1;
-
-	/*
-	 * Find configuration mode and read SCU revision
-	 */
-
-	/* Check if we have device-tree properties */
-	if (np && !of_property_read_u32(np, "aspeed,scu-revision-id", &data)) {
-		/* We do, disable P2A access */
-		config_mode = ast_use_dt;
-		scu_rev = data;
-	} else if (pdev->device == PCI_CHIP_AST2000) { // Not all families have a P2A bridge
-		/*
-		 * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge
-		 * is disabled. We force using P2A if VGA only mode bit
-		 * is set D[7]
-		 */
-		vgacrd0 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd0);
-		vgacrd1 = __ast_read8_i(ioregs, AST_IO_VGACRI, 0xd1);
-		if (!(vgacrd0 & 0x80) || !(vgacrd1 & 0x10)) {
-
-			/*
-			 * We have a P2A bridge and it is enabled.
-			 */
-
-			/* Patch AST2500/AST2510 */
-			if ((pdev->revision & 0xf0) == 0x40) {
-				if (!(vgacrd0 & AST_VRAM_INIT_STATUS_MASK))
-					ast_patch_ahb_2500(regs);
-			}
-
-			/* Double check that it's actually working */
-			data = __ast_read32(regs, 0xf004);
-			if ((data != 0xffffffff) && (data != 0x00)) {
-				config_mode = ast_use_p2a;
-
-				/* Read SCU7c (silicon revision register) */
-				__ast_write32(regs, 0xf004, 0x1e6e0000);
-				__ast_write32(regs, 0xf000, 0x1);
-				scu_rev = __ast_read32(regs, 0x1207c);
-			}
-		}
-	}
-
-	switch (config_mode) {
-	case ast_use_defaults:
-		dev_info(dev, "Using default configuration\n");
-		break;
-	case ast_use_dt:
-		dev_info(dev, "Using device-tree for configuration\n");
-		break;
-	case ast_use_p2a:
-		dev_info(dev, "Using P2A bridge for configuration\n");
-		break;
-	}
-
-	/*
-	 * Identify chipset
-	 */
-
-	if (pdev->revision >= 0x50) {
-		chip = AST2600;
-		dev_info(dev, "AST 2600 detected\n");
-	} else if (pdev->revision >= 0x40) {
-		switch (scu_rev & 0x300) {
-		case 0x0100:
-			chip = AST2510;
-			dev_info(dev, "AST 2510 detected\n");
-			break;
-		default:
-			chip = AST2500;
-			dev_info(dev, "AST 2500 detected\n");
-			break;
-		}
-	} else if (pdev->revision >= 0x30) {
-		switch (scu_rev & 0x300) {
-		case 0x0100:
-			chip = AST1400;
-			dev_info(dev, "AST 1400 detected\n");
-			break;
-		default:
-			chip = AST2400;
-			dev_info(dev, "AST 2400 detected\n");
-			break;
-		}
-	} else if (pdev->revision >= 0x20) {
-		switch (scu_rev & 0x300) {
-		case 0x0000:
-			chip = AST1300;
-			dev_info(dev, "AST 1300 detected\n");
-			break;
-		default:
-			chip = AST2300;
-			dev_info(dev, "AST 2300 detected\n");
-			break;
-		}
-	} else if (pdev->revision >= 0x10) {
-		switch (scu_rev & 0x0300) {
-		case 0x0200:
-			chip = AST1100;
-			dev_info(dev, "AST 1100 detected\n");
-			break;
-		case 0x0100:
-			chip = AST2200;
-			dev_info(dev, "AST 2200 detected\n");
-			break;
-		case 0x0000:
-			chip = AST2150;
-			dev_info(dev, "AST 2150 detected\n");
-			break;
-		default:
-			chip = AST2100;
-			dev_info(dev, "AST 2100 detected\n");
-			break;
-		}
-	} else {
-		chip = AST2000;
-		dev_info(dev, "AST 2000 detected\n");
-	}
-
-	*chip_out = chip;
-	*config_mode_out = config_mode;
-
-	return 0;
-}
-
 static void ast_detect_widescreen(struct ast_device *ast)
 {
 	u8 jreg;
@@ -428,95 +250,27 @@ static int ast_get_dram_info(struct drm_device *dev)
 	return 0;
 }
 
-struct ast_device *ast_device_create(const struct drm_driver *drv,
-				     struct pci_dev *pdev,
-				     unsigned long flags)
+struct drm_device *ast_device_create(struct pci_dev *pdev,
+				     const struct drm_driver *drv,
+				     enum ast_chip chip,
+				     enum ast_config_mode config_mode,
+				     void __iomem *regs,
+				     void __iomem *ioregs,
+				     bool need_post)
 {
 	struct drm_device *dev;
 	struct ast_device *ast;
-	bool need_post = false;
-	int ret = 0;
-	void __iomem *regs;
-	void __iomem *ioregs;
-	enum ast_config_mode config_mode;
-	enum ast_chip chip;
+	int ret;
 
 	ast = devm_drm_dev_alloc(&pdev->dev, drv, struct ast_device, base);
 	if (IS_ERR(ast))
-		return ast;
+		return ERR_CAST(ast);
 	dev = &ast->base;
 
-	pci_set_drvdata(pdev, dev);
-
-	regs = pcim_iomap(pdev, 1, 0);
-	if (!regs)
-		return ERR_PTR(-EIO);
-
-	if (pdev->revision >= 0x40) {
-		/*
-		 * On AST2500 and later models, MMIO is enabled by
-		 * default. Adopt it to be compatible with ARM.
-		 */
-		resource_size_t len = pci_resource_len(pdev, 1);
-
-		if (len < AST_IO_MM_OFFSET)
-			return ERR_PTR(-EIO);
-		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
-			return ERR_PTR(-EIO);
-		ioregs = regs + AST_IO_MM_OFFSET;
-	} else if (pci_resource_flags(pdev, 2) & IORESOURCE_IO) {
-		/*
-		 * Map I/O registers if we have a PCI BAR for I/O.
-		 */
-		resource_size_t len = pci_resource_len(pdev, 2);
-
-		if (len < AST_IO_MM_LENGTH)
-			return -EIO;
-		ioregs = pcim_iomap(pdev, 2, 0);
-		if (!ioregs)
-			return ERR_PTR(-EIO);
-	} else {
-		/*
-		 * Anything else is best effort.
-		 */
-		resource_size_t len = pci_resource_len(pdev, 1);
-
-		if (len < AST_IO_MM_OFFSET)
-			return ERR_PTR(-EIO);
-		if ((len - AST_IO_MM_OFFSET) < AST_IO_MM_LENGTH)
-			return ERR_PTR(-EIO);
-		ioregs = regs + AST_IO_MM_OFFSET;
-
-		drm_info(dev, "Platform has no I/O space, using MMIO\n");
-	}
-
-	ast->regs = regs;
-	ast->ioregs = ioregs;
-
-	if (!ast_is_vga_enabled(ioregs)) {
-		drm_info(dev, "VGA not enabled on entry, requesting chip POST\n");
-		need_post = true;
-	}
-
-	/*
-	 * If VGA isn't enabled, we need to enable now or subsequent
-	 * access to the scratch registers will fail.
-	 */
-	if (need_post)
-		ast_enable_vga(ioregs);
-	/* Enable extended register access */
-	ast_open_key(ioregs);
-
-	ret = ast_enable_mmio(&pdev->dev, ioregs);
-	if (ret)
-		return ERR_PTR(ret);
-
-	ret = ast_detect_chip(pdev, regs, ioregs, &chip, &config_mode);
-	if (ret)
-		return ERR_PTR(ret);
-
 	ast->chip = chip;
 	ast->config_mode = config_mode;
+	ast->regs = regs;
+	ast->ioregs = ioregs;
 
 	ast_detect_widescreen(ast);
 	ast_detect_tx_chip(ast, need_post);
@@ -547,5 +301,5 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
 	if (ret)
 		return ERR_PTR(ret);
 
-	return ast;
+	return dev;
 }
-- 
GitLab


From 288b039db225676e0c520c981a1b5a2562d893a3 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Fri, 24 Nov 2023 10:42:30 +0100
Subject: [PATCH 0482/2340] drm/bridge: Fix typo in post_disable() description

s/singals/signals/

Fixes: 199e4e967af4 ("drm: Extract drm_bridge.h")
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124094253.658064-1-dario.binacchi@amarulasolutions.com
---
 include/drm/drm_bridge.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index cfb7dcdb66c4b..9ef461aa9b9e2 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -194,7 +194,7 @@ struct drm_bridge_funcs {
 	 * or &drm_encoder_helper_funcs.dpms hook.
 	 *
 	 * The bridge must assume that the display pipe (i.e. clocks and timing
-	 * singals) feeding it is no longer running when this callback is
+	 * signals) feeding it is no longer running when this callback is
 	 * called.
 	 *
 	 * The @post_disable callback is optional.
-- 
GitLab


From 97137bd3ffc5c5972ef3e27d145250c1750f8dc4 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 27 Nov 2023 11:00:44 -0800
Subject: [PATCH 0483/2340] drm/i915/dg2: Drop Wa_22014600077

This workaround has been dropped from all DG2 variants in the latest
workaround database update.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127190043.4099109-2-matthew.d.roper@intel.com
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 9bc0654efdc0c..4cbf9e5126459 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2357,14 +2357,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			   0, true);
 	}
 
-	if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
-		/* Wa_22014600077:dg2 */
-		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
-			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
-			   0 /* Wa_14012342262 write-only reg, so skip verification */,
-			   true);
-	}
-
 	if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
 	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
 		/*
-- 
GitLab


From 325b71e820b67569048c621227266783442b75ed Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Thu, 23 Nov 2023 13:18:07 +0800
Subject: [PATCH 0484/2340] drm/bridge: imx93-mipi-dsi: Fix a couple of
 building warnings

Fix a couple of building warnings on used uninitialized 'best_m' and
'best_n' local variables by initializing 'best_m' to zero and 'best_n'
to UINT_MAX.  This makes compiler happy only.  No functional change.

Fixes: ce62f8ea7e3f ("drm/bridge: imx: Add i.MX93 MIPI DSI support")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311151746.f7u7dzbZ-lkp@intel.com/
Signed-off-by: Liu Ying <victor.liu@nxp.com>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123051807.3818342-1-victor.liu@nxp.com
---
 drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c
index 3ff30ce80c5b8..2347f8dd632f9 100644
--- a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c
+++ b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c
@@ -226,8 +226,8 @@ dphy_pll_get_configure_from_opts(struct imx93_dsi *dsi,
 	unsigned long fout;
 	unsigned long best_fout = 0;
 	unsigned int fvco_div;
-	unsigned int min_n, max_n, n, best_n;
-	unsigned long m, best_m;
+	unsigned int min_n, max_n, n, best_n = UINT_MAX;
+	unsigned long m, best_m = 0;
 	unsigned long min_delta = ULONG_MAX;
 	unsigned long delta;
 	u64 tmp;
-- 
GitLab


From 3cc808e3239cf566b3d3b15cf2beee066b60f241 Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Tue, 28 Nov 2023 17:35:07 +0000
Subject: [PATCH 0485/2340] drm/imagination: Numerous documentation fixes.

Some reported by Stephen Rothwell. The rest were found by running the
kernel-doc build script.
Some indentation fixes.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311241526.Y2WZeUau-lkp@intel.com/
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128173507.95119-1-donald.robson@imgtec.com
---
 Documentation/gpu/imagination/index.rst   |  2 +-
 Documentation/gpu/imagination/uapi.rst    |  5 +----
 drivers/gpu/drm/imagination/pvr_cccb.h    |  1 +
 drivers/gpu/drm/imagination/pvr_device.h  | 25 ++++++++++++++++-------
 drivers/gpu/drm/imagination/pvr_fw.h      |  3 ++-
 drivers/gpu/drm/imagination/pvr_fw_info.h |  8 ++++----
 drivers/gpu/drm/imagination/pvr_hwrt.h    |  1 +
 drivers/gpu/drm/imagination/pvr_job.c     |  4 +---
 drivers/gpu/drm/imagination/pvr_mmu.c     |  3 ++-
 drivers/gpu/drm/imagination/pvr_queue.h   |  4 ++--
 drivers/gpu/drm/imagination/pvr_vm.c      |  2 +-
 include/uapi/drm/pvr_drm.h                | 10 ++++-----
 12 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/Documentation/gpu/imagination/index.rst b/Documentation/gpu/imagination/index.rst
index dc9579e758c3e..0c1e247cea41e 100644
--- a/Documentation/gpu/imagination/index.rst
+++ b/Documentation/gpu/imagination/index.rst
@@ -3,7 +3,7 @@ drm/imagination PowerVR Graphics Driver
 =======================================
 
 .. kernel-doc:: drivers/gpu/drm/imagination/pvr_drv.c
-   :doc: PowerVR Graphics Driver
+   :doc: PowerVR (Series 6 and later) and IMG Graphics Driver
 
 Contents
 ========
diff --git a/Documentation/gpu/imagination/uapi.rst b/Documentation/gpu/imagination/uapi.rst
index 2227ea7e6222c..7502413d0a939 100644
--- a/Documentation/gpu/imagination/uapi.rst
+++ b/Documentation/gpu/imagination/uapi.rst
@@ -45,9 +45,6 @@ DEV_QUERY
                  drm_pvr_heap
                  drm_pvr_dev_query_heap_info
 
-.. kernel-doc:: include/uapi/drm/pvr_drm.h
-   :doc: Flags for DRM_PVR_DEV_QUERY_HEAP_INFO_GET.
-
 .. kernel-doc:: include/uapi/drm/pvr_drm.h
    :identifiers: drm_pvr_static_data_area_usage
                  drm_pvr_static_data_area
@@ -121,7 +118,7 @@ CREATE_FREE_LIST and DESTROY_FREE_LIST
    :identifiers: drm_pvr_ioctl_destroy_free_list_args
 
 CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET
---------------------------------------
+--------------------------------------------
 .. kernel-doc:: include/uapi/drm/pvr_drm.h
    :doc: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces
 
diff --git a/drivers/gpu/drm/imagination/pvr_cccb.h b/drivers/gpu/drm/imagination/pvr_cccb.h
index f35b3d4c9575b..943fe8f2c963b 100644
--- a/drivers/gpu/drm/imagination/pvr_cccb.h
+++ b/drivers/gpu/drm/imagination/pvr_cccb.h
@@ -86,6 +86,7 @@ pvr_cccb_get_size_of_cmd_with_hdr(u32 cmd_size)
 
 /**
  * pvr_cccb_cmdseq_can_fit() - Check if a command sequence can fit in the CCCB.
+ * @pvr_cccb: Target Client CCB.
  * @size: Command sequence size.
  *
  * Returns:
diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index e07655fc65e8c..2ca7e535799fe 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -203,17 +203,29 @@ struct pvr_device {
 		struct mutex lock;
 	} queues;
 
+	/**
+	 * @watchdog: Watchdog for communications with firmware.
+	 */
 	struct {
 		/** @work: Work item for watchdog callback. */
 		struct delayed_work work;
 
-		/** @old_kccb_cmds_executed: KCCB command execution count at last watchdog poll. */
+		/**
+		 * @old_kccb_cmds_executed: KCCB command execution count at last
+		 * watchdog poll.
+		 */
 		u32 old_kccb_cmds_executed;
 
-		/** @kccb_stall_count: Number of watchdog polls KCCB has been stalled for. */
+		/**
+		 * @kccb_stall_count: Number of watchdog polls KCCB has been
+		 * stalled for.
+		 */
 		u32 kccb_stall_count;
 	} watchdog;
 
+	/**
+	 * @kccb: Circular buffer for communications with firmware.
+	 */
 	struct {
 		/** @ccb: Kernel CCB. */
 		struct pvr_ccb ccb;
@@ -225,8 +237,8 @@ struct pvr_device {
 		struct pvr_fw_object *rtn_obj;
 
 		/**
-		 * @rtn: Pointer to CPU mapping of KCCB return slots. Must be accessed by
-		 *       READ_ONCE()/WRITE_ONCE().
+		 * @rtn: Pointer to CPU mapping of KCCB return slots. Must be
+		 * accessed by READ_ONCE()/WRITE_ONCE().
 		 */
 		u32 *rtn;
 
@@ -293,14 +305,13 @@ struct pvr_file {
 
 	/**
 	 * @pvr_dev: A reference to the powervr-specific wrapper for the
-	 *           associated device. Saves on repeated calls to
-	 *           to_pvr_device().
+	 * associated device. Saves on repeated calls to to_pvr_device().
 	 */
 	struct pvr_device *pvr_dev;
 
 	/**
 	 * @ctx_handles: Array of contexts belonging to this file. Array members
-	 *               are of type "struct pvr_context *".
+	 * are of type "struct pvr_context *".
 	 *
 	 * This array is used to allocate handles returned to userspace.
 	 */
diff --git a/drivers/gpu/drm/imagination/pvr_fw.h b/drivers/gpu/drm/imagination/pvr_fw.h
index 5cd3ef08d82b4..b7966bd574a92 100644
--- a/drivers/gpu/drm/imagination/pvr_fw.h
+++ b/drivers/gpu/drm/imagination/pvr_fw.h
@@ -481,7 +481,8 @@ pvr_fw_object_unmap_and_destroy(struct pvr_fw_object *fw_obj)
 }
 
 /**
- * pvr_fw_get_dma_addr() - Get DMA address for given offset in firmware object
+ * pvr_fw_object_get_dma_addr() - Get DMA address for given offset in firmware
+ * object.
  * @fw_obj: Pointer to object to lookup address in.
  * @offset: Offset within object to lookup address at.
  * @dma_addr_out: Pointer to location to store DMA address.
diff --git a/drivers/gpu/drm/imagination/pvr_fw_info.h b/drivers/gpu/drm/imagination/pvr_fw_info.h
index ad5d44a3067ac..c3639440610eb 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_info.h
+++ b/drivers/gpu/drm/imagination/pvr_fw_info.h
@@ -122,13 +122,13 @@ struct pvr_fw_layout_entry {
  * struct pvr_fw_device_info_header - Device information header.
  */
 struct pvr_fw_device_info_header {
-	/* BRN Mask size (in u64s). */
+	/** @brn_mask_size: BRN mask size (in u64s). */
 	u64 brn_mask_size;
-	/* ERN Mask size (in u64s). */
+	/** @ern_mask_size: ERN mask size (in u64s). */
 	u64 ern_mask_size;
-	/* Feature Mask size (in u64s). */
+	/** @feature_mask_size: Feature mask size (in u64s). */
 	u64 feature_mask_size;
-	/* Feature Parameter size (in u64s). */
+	/** @feature_param_size: Feature parameter size (in u64s). */
 	u64 feature_param_size;
 };
 
diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.h b/drivers/gpu/drm/imagination/pvr_hwrt.h
index 76992948d0472..676070b20c3ba 100644
--- a/drivers/gpu/drm/imagination/pvr_hwrt.h
+++ b/drivers/gpu/drm/imagination/pvr_hwrt.h
@@ -64,6 +64,7 @@ struct pvr_hwrt_dataset {
 	/** @common_fw_obj: FW object representing common FW-side structure. */
 	struct pvr_fw_object *common_fw_obj;
 
+	/** @common: Common HWRT data. */
 	struct rogue_fwif_hwrtdata_common common;
 
 	/** @data: HWRT data structures belonging to this set. */
diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c
index 9d0812710295c..04139da6c04dc 100644
--- a/drivers/gpu/drm/imagination/pvr_job.c
+++ b/drivers/gpu/drm/imagination/pvr_job.c
@@ -378,7 +378,7 @@ prepare_job_syncs(struct pvr_file *pvr_file,
 
 /**
  * prepare_job_syncs_for_each() - Prepare all sync objects for an array of jobs.
- * @file: PowerVR file.
+ * @pvr_file: PowerVR file.
  * @job_data: Array of precreated jobs and their sync_ops.
  * @job_count: Number of jobs.
  * @signal_array: xarray to receive signal sync objects.
@@ -696,8 +696,6 @@ pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count)
  * @pvr_dev: Target PowerVR device.
  * @pvr_file: Pointer to PowerVR file structure.
  * @args: Ioctl args.
- * @job_count: Number of jobs in @jobs_args. On error this will be updated
- * with the index into @jobs_args where the error occurred.
  *
  * This initial implementation is entirely synchronous; on return the GPU will
  * be idle. This will not be the case for future implementations.
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c
index b71d30e5f3805..c8562bfc0dcda 100644
--- a/drivers/gpu/drm/imagination/pvr_mmu.c
+++ b/drivers/gpu/drm/imagination/pvr_mmu.c
@@ -335,8 +335,9 @@ pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
 
 /**
  * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
- *                              device.
+ * device.
  * @page: Target backing page.
+ * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
  *
  * .. caution::
  *
diff --git a/drivers/gpu/drm/imagination/pvr_queue.h b/drivers/gpu/drm/imagination/pvr_queue.h
index b5ce2c742150b..e06ced69302fc 100644
--- a/drivers/gpu/drm/imagination/pvr_queue.h
+++ b/drivers/gpu/drm/imagination/pvr_queue.h
@@ -50,7 +50,7 @@ struct pvr_queue_cccb_fence_ctx {
 	 */
 	struct pvr_job *job;
 
-	/** @lock: Lock protecting access to the job object. */
+	/** @job_lock: Lock protecting access to the job object. */
 	struct mutex job_lock;
 };
 
@@ -114,7 +114,7 @@ struct pvr_queue {
 	} timeline_ufo;
 
 	/**
-	 * last_queued_job_scheduled_fence: The scheduled fence of the last
+	 * @last_queued_job_scheduled_fence: The scheduled fence of the last
 	 * job queued to this queue.
 	 *
 	 * We use it to insert frag -> geom dependencies when issuing combined
diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index e0d74d9a61909..2aab53594a773 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -42,7 +42,7 @@ struct pvr_vm_context {
 	/** @mmu_ctx: The context for binding to physical memory. */
 	struct pvr_mmu_context *mmu_ctx;
 
-	/** @gpuva_mgr: GPUVA manager object associated with this context. */
+	/** @gpuvm_mgr: GPUVM object associated with this context. */
 	struct drm_gpuvm gpuvm_mgr;
 
 	/** @lock: Global lock on this VM. */
diff --git a/include/uapi/drm/pvr_drm.h b/include/uapi/drm/pvr_drm.h
index 1834375390c45..ccf6c2112468f 100644
--- a/include/uapi/drm/pvr_drm.h
+++ b/include/uapi/drm/pvr_drm.h
@@ -741,20 +741,18 @@ enum drm_pvr_ctx_priority {
  */
 enum drm_pvr_ctx_type {
 	/**
-	 * @DRM_PVR_CTX_TYPE_RENDER: Render context. Use &struct
-	 * drm_pvr_ioctl_create_render_context_args for context creation arguments.
+	 * @DRM_PVR_CTX_TYPE_RENDER: Render context.
 	 */
 	DRM_PVR_CTX_TYPE_RENDER = 0,
 
 	/**
-	 * @DRM_PVR_CTX_TYPE_COMPUTE: Compute context. Use &struct
-	 * drm_pvr_ioctl_create_compute_context_args for context creation arguments.
+	 * @DRM_PVR_CTX_TYPE_COMPUTE: Compute context.
 	 */
 	DRM_PVR_CTX_TYPE_COMPUTE,
 
 	/**
-	 * @DRM_PVR_CTX_TYPE_TRANSFER_FRAG: Transfer context for fragment data masters. Use
-	 * &struct drm_pvr_ioctl_create_transfer_context_args for context creation arguments.
+	 * @DRM_PVR_CTX_TYPE_TRANSFER_FRAG: Transfer context for fragment data
+	 * master.
 	 */
 	DRM_PVR_CTX_TYPE_TRANSFER_FRAG,
 };
-- 
GitLab


From f92a39ae47076ea123c7980fb85e6e33313f372e Mon Sep 17 00:00:00 2001
From: Bert Karwatzki <spasswolf@web.de>
Date: Mon, 27 Nov 2023 17:09:55 +0100
Subject: [PATCH 0486/2340] drm/sched: Partial revert of "Qualify
 drm_sched_wakeup() by drm_sched_entity_is_ready()"

Commit f3123c2590005c, in combination with the use of work queues by the GPU
scheduler, leads to random lock-ups of the GUI.

This is a partial revert of of commit f3123c2590005c since drm_sched_wakeup() still
needs its entity argument to pass it to drm_sched_can_queue().

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2994
Link: https://lists.freedesktop.org/archives/dri-devel/2023-November/431606.html

Signed-off-by: Bert Karwatzki <spasswolf@web.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127160955.87879-1-spasswolf@web.de
Link: https://lore.kernel.org/r/36bece178ff5dc705065e53d1e5e41f6db6d87e4.camel@web.de
Fixes: f3123c2590005c ("drm/sched: Qualify drm_sched_wakeup() by drm_sched_entity_is_ready()")
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 682aebe96db78..550492a7a031d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1029,9 +1029,8 @@ EXPORT_SYMBOL(drm_sched_job_cleanup);
 void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
 		      struct drm_sched_entity *entity)
 {
-	if (drm_sched_entity_is_ready(entity))
-		if (drm_sched_can_queue(sched, entity))
-			drm_sched_run_job_queue(sched);
+	if (drm_sched_can_queue(sched, entity))
+		drm_sched_run_job_queue(sched);
 }
 
 /**
-- 
GitLab


From 012e3208ab8d1456379d047d88d69ab7e074a520 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Nov 2023 17:56:57 +0100
Subject: [PATCH 0487/2340] drm/tilcdc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

There is one error path in tilcdc_pdev_remove() that potentially could
yield a non-zero return code. In this case an error message describing
the failure is emitted now instead of

	remove callback returned a non-zero value. This will be ignored.

before. Otherwise there is no difference. Also note that currently
tilcdc_get_external_components() doesn't return negative values.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jyri Sarha <jyri.sarha@iki.fi>
Link: https://patchwork.freedesktop.org/patch/msgid/20231102165640.3307820-34-u.kleine-koenig@pengutronix.de
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Tested-by: Jyri Sarha <jyri.sarha@iki.fi>
---
 drivers/gpu/drm/tilcdc/tilcdc_drv.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 2f6eaac7f659b..23bf16f596f61 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -570,19 +570,18 @@ static int tilcdc_pdev_probe(struct platform_device *pdev)
 						       match);
 }
 
-static int tilcdc_pdev_remove(struct platform_device *pdev)
+static void tilcdc_pdev_remove(struct platform_device *pdev)
 {
 	int ret;
 
 	ret = tilcdc_get_external_components(&pdev->dev, NULL);
 	if (ret < 0)
-		return ret;
+		dev_err(&pdev->dev, "tilcdc_get_external_components() failed (%pe)\n",
+			ERR_PTR(ret));
 	else if (ret == 0)
 		tilcdc_fini(platform_get_drvdata(pdev));
 	else
 		component_master_del(&pdev->dev, &tilcdc_comp_ops);
-
-	return 0;
 }
 
 static void tilcdc_pdev_shutdown(struct platform_device *pdev)
@@ -599,7 +598,7 @@ MODULE_DEVICE_TABLE(of, tilcdc_of_match);
 
 static struct platform_driver tilcdc_platform_driver = {
 	.probe      = tilcdc_pdev_probe,
-	.remove     = tilcdc_pdev_remove,
+	.remove_new = tilcdc_pdev_remove,
 	.shutdown   = tilcdc_pdev_shutdown,
 	.driver     = {
 		.name   = "tilcdc",
-- 
GitLab


From 9f7843b515811aea6c56527eb195b622e9c01f12 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Fri, 17 Nov 2023 13:46:32 -0800
Subject: [PATCH 0488/2340] drm/panel-edp: Add override_edid_mode quirk for
 generic edp

Generic edp gets mode from edid. However, some panels report incorrect
mode in this way, resulting in glitches on panel. Introduce a new quirk
additional_mode to the generic edid to pick a correct hardcoded mode.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117215056.1883314-2-hsinyi@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 48 +++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index f226773731715..33535f6de343c 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -203,6 +203,9 @@ struct edp_panel_entry {
 
 	/** @name: Name of this panel (for printing to logs). */
 	const char *name;
+
+	/** @override_edid_mode: Override the mode obtained by edid. */
+	const struct drm_display_mode *override_edid_mode;
 };
 
 struct panel_edp {
@@ -301,6 +304,24 @@ static unsigned int panel_edp_get_display_modes(struct panel_edp *panel,
 	return num;
 }
 
+static int panel_edp_override_edid_mode(struct panel_edp *panel,
+					struct drm_connector *connector,
+					const struct drm_display_mode *override_mode)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, override_mode);
+	if (!mode) {
+		dev_err(panel->base.dev, "failed to add additional mode\n");
+		return 0;
+	}
+
+	mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_set_name(mode);
+	drm_mode_probed_add(connector, mode);
+	return 1;
+}
+
 static int panel_edp_get_non_edid_modes(struct panel_edp *panel,
 					struct drm_connector *connector)
 {
@@ -568,6 +589,9 @@ static int panel_edp_get_modes(struct drm_panel *panel,
 {
 	struct panel_edp *p = to_panel_edp(panel);
 	int num = 0;
+	bool has_override_edid_mode = p->detected_panel &&
+				      p->detected_panel != ERR_PTR(-EINVAL) &&
+				      p->detected_panel->override_edid_mode;
 
 	/* probe EDID if a DDC bus is available */
 	if (p->ddc) {
@@ -575,9 +599,18 @@ static int panel_edp_get_modes(struct drm_panel *panel,
 
 		if (!p->edid)
 			p->edid = drm_get_edid(connector, p->ddc);
-
-		if (p->edid)
-			num += drm_add_edid_modes(connector, p->edid);
+		if (p->edid) {
+			if (has_override_edid_mode) {
+				/*
+				 * override_edid_mode is specified. Use
+				 * override_edid_mode instead of from edid.
+				 */
+				num += panel_edp_override_edid_mode(p, connector,
+						p->detected_panel->override_edid_mode);
+			} else {
+				num += drm_add_edid_modes(connector, p->edid);
+			}
+		}
 
 		pm_runtime_mark_last_busy(panel->dev);
 		pm_runtime_put_autosuspend(panel->dev);
@@ -1849,6 +1882,15 @@ static const struct panel_delay delay_200_150_e200 = {
 	.delay = _delay \
 }
 
+#define EDP_PANEL_ENTRY2(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name, _mode) \
+{ \
+	.name = _name, \
+	.panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \
+					     product_id), \
+	.delay = _delay, \
+	.override_edid_mode = _mode \
+}
+
 /*
  * This table is used to figure out power sequencing delays for panels that
  * are detected by EDID. Entries here may point to entries in the
-- 
GitLab


From 70e0d5550f5cec301ad116703b840a539fe985dc Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Fri, 17 Nov 2023 13:46:33 -0800
Subject: [PATCH 0489/2340] drm/panel-edp: Add auo_b116xa3_mode

Add auo_b116xa3_mode to override the original modes parsed from edid
of the panels 0x405c B116XAK01.0 and 0x615c B116XAN06.1 which result
in glitches on panel.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117215056.1883314-3-hsinyi@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 33535f6de343c..3e144145a6bd3 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -983,6 +983,19 @@ static const struct panel_desc auo_b101ean01 = {
 	},
 };
 
+static const struct drm_display_mode auo_b116xa3_mode = {
+	.clock = 70589,
+	.hdisplay = 1366,
+	.hsync_start = 1366 + 40,
+	.hsync_end = 1366 + 40 + 40,
+	.htotal = 1366 + 40 + 40 + 32,
+	.vdisplay = 768,
+	.vsync_start = 768 + 10,
+	.vsync_end = 768 + 10 + 12,
+	.vtotal = 768 + 10 + 12 + 6,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
 static const struct drm_display_mode auo_b116xak01_mode = {
 	.clock = 69300,
 	.hdisplay = 1366,
@@ -1908,9 +1921,11 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x239b, &delay_200_500_e50, "B116XAN06.1"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x255c, &delay_200_500_e50, "B116XTN02.5"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x403d, &delay_200_500_e50, "B140HAN04.0"),
-	EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"),
+	EDP_PANEL_ENTRY2('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0",
+			 &auo_b116xa3_mode),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"),
-	EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"),
+	EDP_PANEL_ENTRY2('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1",
+			 &auo_b116xa3_mode),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x635c, &delay_200_500_e50, "B116XAN06.3"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x639c, &delay_200_500_e50, "B140HAK02.7"),
 	EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"),
-- 
GitLab


From fb3f43d50d9b22946702085d1fa2139c8741283d Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Fri, 17 Nov 2023 13:46:34 -0800
Subject: [PATCH 0490/2340] drm/panel-edp: Avoid adding multiple preferred
 modes

If a non generic edp-panel is under aux-bus, the mode read from edid would
still be selected as preferred and results in multiple preferred modes,
which is ambiguous.

If both hard-coded mode and edid exists, only add mode from hard-coded.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117215056.1883314-4-hsinyi@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 3e144145a6bd3..825fa2a0d8a53 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -589,6 +589,7 @@ static int panel_edp_get_modes(struct drm_panel *panel,
 {
 	struct panel_edp *p = to_panel_edp(panel);
 	int num = 0;
+	bool has_hard_coded_modes = p->desc->num_timings || p->desc->num_modes;
 	bool has_override_edid_mode = p->detected_panel &&
 				      p->detected_panel != ERR_PTR(-EINVAL) &&
 				      p->detected_panel->override_edid_mode;
@@ -599,7 +600,11 @@ static int panel_edp_get_modes(struct drm_panel *panel,
 
 		if (!p->edid)
 			p->edid = drm_get_edid(connector, p->ddc);
-		if (p->edid) {
+		/*
+		 * If both edid and hard-coded modes exists, skip edid modes to
+		 * avoid multiple preferred modes.
+		 */
+		if (p->edid && !has_hard_coded_modes) {
 			if (has_override_edid_mode) {
 				/*
 				 * override_edid_mode is specified. Use
@@ -616,12 +621,7 @@ static int panel_edp_get_modes(struct drm_panel *panel,
 		pm_runtime_put_autosuspend(panel->dev);
 	}
 
-	/*
-	 * Add hard-coded panel modes. Don't call this if there are no timings
-	 * and no modes (the generic edp-panel case) because it will clobber
-	 * the display_info that was already set by drm_add_edid_modes().
-	 */
-	if (p->desc->num_timings || p->desc->num_modes)
+	if (has_hard_coded_modes)
 		num += panel_edp_get_non_edid_modes(p, connector);
 	else if (!num)
 		dev_warn(p->base.dev, "No display modes\n");
-- 
GitLab


From c9d99c73940e47692fa982cf7508581f5c55e363 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 23 Nov 2023 18:54:27 +0100
Subject: [PATCH 0491/2340] drm/bridge: ti-sn65dsi86: Simplify using
 pm_runtime_resume_and_get()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pm_runtime_resume_and_get() already drops the runtime PM usage counter
in the error case. So a call to pm_runtime_put_sync() can be dropped.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123175425.496956-2-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index c45c07840f645..5b8e1dfc458da 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1413,11 +1413,9 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	int ret;
 
 	if (!pdata->pwm_enabled) {
-		ret = pm_runtime_get_sync(pdata->dev);
-		if (ret < 0) {
-			pm_runtime_put_sync(pdata->dev);
+		ret = pm_runtime_resume_and_get(pdata->dev);
+		if (ret < 0)
 			return ret;
-		}
 	}
 
 	if (state->enabled) {
-- 
GitLab


From 2d2cffdbbc21586b213e5e371680f9d934d3813b Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 13 Nov 2023 12:55:08 +0100
Subject: [PATCH 0492/2340] drm/loongson: Add platform dependency

Only offer the Loongson DRM driver as an option on platforms where
it makes sense.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Sui Jingfeng <sui.jingfeng@linux.dev>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Sui Jingfeng <sui.jingfeng@linux.dev>
Signed-off-by: Sui Jingfeng <sui.jingfeng@linux.dev>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113125508.4dc755e8@endymion.delvare
---
 drivers/gpu/drm/loongson/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig
index df6946d505fac..8e59753e532de 100644
--- a/drivers/gpu/drm/loongson/Kconfig
+++ b/drivers/gpu/drm/loongson/Kconfig
@@ -3,6 +3,7 @@
 config DRM_LOONGSON
 	tristate "DRM support for Loongson Graphics"
 	depends on DRM && PCI && MMU
+	depends on LOONGARCH || MIPS || COMPILE_TEST
 	select DRM_KMS_HELPER
 	select DRM_TTM
 	select I2C
-- 
GitLab


From b844c6bae2b89b4a4e102eb326e35c632308dd85 Mon Sep 17 00:00:00 2001
From: Vinod Govindapillai <vinod.govindapillai@intel.com>
Date: Fri, 10 Nov 2023 11:32:25 +0200
Subject: [PATCH 0493/2340] drm/i915/xe2lpd: remove the FBC restriction if PSR2
 is enabled

In earlier versions, FBC was restricted if PSR2 is enabled. From
xe2lpd onwards no such restrictions are needed anymore.

HSD: 14014305387
Signed-off-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110093225.39573-2-vinod.govindapillai@intel.com
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 63f389a1707d9..f17a1afb4929e 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1235,7 +1235,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
 	 * Recommendation is to keep this combination disabled
 	 * Bspec: 50422 HSD: 14010260002
 	 */
-	if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) {
+	if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_psr2) {
 		plane_state->no_fbc_reason = "PSR2 enabled";
 		return 0;
 	}
-- 
GitLab


From af3145aa142c92409d3b123ff87ff0b5fd0bf849 Mon Sep 17 00:00:00 2001
From: Xin Ji <xji@analogixsemi.com>
Date: Mon, 20 Nov 2023 17:10:36 +0800
Subject: [PATCH 0494/2340] Revert "drm/bridge: Add 200ms delay to wait FW HPD
 status stable"

This reverts commit 330140d7319fcc4ec68bd924ea212e476bf12275

200ms delay will cause panel display image later than backlight
turn on, revert this patch.

Fixes: 330140d7319fcc ("drm/bridge: Add 200ms delay to wait FW HPD status stable")
Signed-off-by: Xin Ji <xji@analogixsemi.com>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120091038.284825-1-xji@analogixsemi.com
---
 drivers/gpu/drm/bridge/analogix/anx7625.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c
index 8f740154707db..51abe42c639e5 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -1464,9 +1464,6 @@ static int _anx7625_hpd_polling(struct anx7625_data *ctx,
 	if (ctx->pdata.intp_irq)
 		return 0;
 
-	/* Delay 200ms for FW HPD de-bounce */
-	msleep(200);
-
 	ret = readx_poll_timeout(anx7625_read_hpd_status_p0,
 				 ctx, val,
 				 ((val & HPD_STATUS) || (val < 0)),
-- 
GitLab


From e3af7053de3f685c96158373bc234b2feca1f160 Mon Sep 17 00:00:00 2001
From: Xin Ji <xji@analogixsemi.com>
Date: Mon, 20 Nov 2023 17:10:37 +0800
Subject: [PATCH 0495/2340] drm/bridge: anx7625: Fix Set HPD irq detect window
 to 2ms

Polling firmware HPD GPIO status, set HPD irq detect window to 2ms
after firmware HPD GPIO initial done

Signed-off-by: Xin Ji <xji@analogixsemi.com>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231120091038.284825-2-xji@analogixsemi.com
---
 drivers/gpu/drm/bridge/analogix/anx7625.c | 51 ++++++++++++++++-------
 drivers/gpu/drm/bridge/analogix/anx7625.h |  4 ++
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c
index 51abe42c639e5..ef31033439bc1 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -1298,10 +1298,32 @@ static void anx7625_config(struct anx7625_data *ctx)
 			  XTAL_FRQ_SEL, XTAL_FRQ_27M);
 }
 
+static int anx7625_hpd_timer_config(struct anx7625_data *ctx)
+{
+	int ret;
+
+	/* Set irq detect window to 2ms */
+	ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client,
+				HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF);
+	ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client,
+				 HPD_DET_TIMER_BIT8_15,
+				 (HPD_TIME >> 8) & 0xFF);
+	ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client,
+				 HPD_DET_TIMER_BIT16_23,
+				 (HPD_TIME >> 16) & 0xFF);
+
+	return ret;
+}
+
+static int anx7625_read_hpd_gpio_config_status(struct anx7625_data *ctx)
+{
+	return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, GPIO_CTRL_2);
+}
+
 static void anx7625_disable_pd_protocol(struct anx7625_data *ctx)
 {
 	struct device *dev = ctx->dev;
-	int ret;
+	int ret, val;
 
 	/* Reset main ocm */
 	ret = anx7625_reg_write(ctx, ctx->i2c.rx_p0_client, 0x88, 0x40);
@@ -1315,6 +1337,19 @@ static void anx7625_disable_pd_protocol(struct anx7625_data *ctx)
 		DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature fail.\n");
 	else
 		DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature succeeded.\n");
+
+	/*
+	 * Make sure the HPD GPIO already be configured after OCM release before
+	 * setting HPD detect window register. Here we poll the status register
+	 * at maximum 40ms, then config HPD irq detect window register
+	 */
+	readx_poll_timeout(anx7625_read_hpd_gpio_config_status,
+			   ctx, val,
+			   ((val & HPD_SOURCE) || (val < 0)),
+			   2000, 2000 * 20);
+
+	/* Set HPD irq detect window to 2ms */
+	anx7625_hpd_timer_config(ctx);
 }
 
 static int anx7625_ocm_loading_check(struct anx7625_data *ctx)
@@ -1437,20 +1472,6 @@ static void anx7625_start_dp_work(struct anx7625_data *ctx)
 
 static int anx7625_read_hpd_status_p0(struct anx7625_data *ctx)
 {
-	int ret;
-
-	/* Set irq detect window to 2ms */
-	ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client,
-				HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF);
-	ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client,
-				 HPD_DET_TIMER_BIT8_15,
-				 (HPD_TIME >> 8) & 0xFF);
-	ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client,
-				 HPD_DET_TIMER_BIT16_23,
-				 (HPD_TIME >> 16) & 0xFF);
-	if (ret < 0)
-		return ret;
-
 	return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, SYSTEM_STSTUS);
 }
 
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h
index 5af819611ebce..66ebee7f3d832 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.h
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.h
@@ -259,6 +259,10 @@
 #define AP_MIPI_RX_EN BIT(5) /* 1: MIPI RX input in  0: no RX in */
 #define AP_DISABLE_PD BIT(6)
 #define AP_DISABLE_DISPLAY BIT(7)
+
+#define GPIO_CTRL_2   0x49
+#define HPD_SOURCE    BIT(6)
+
 /***************************************************************/
 /* Register definition of device address 0x84 */
 #define  MIPI_PHY_CONTROL_3            0x03
-- 
GitLab


From b48807788e7a2bd93044fe84cfe8ff64b85ec15e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:30 +0100
Subject: [PATCH 0496/2340] fbdev/acornfb: Fix name of fb_ops initializer macro

Fix build by using the correct name for the initializer macro
for struct fb_ops.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 9037afde8b9d ("fbdev/acornfb: Use fbdev I/O helpers")
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: <stable@vger.kernel.org> # v6.6+
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-2-tzimmermann@suse.de
---
 drivers/video/fbdev/acornfb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c
index 163d2c9f951c3..f0600f6ca2548 100644
--- a/drivers/video/fbdev/acornfb.c
+++ b/drivers/video/fbdev/acornfb.c
@@ -605,7 +605,7 @@ acornfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
 
 static const struct fb_ops acornfb_ops = {
 	.owner		= THIS_MODULE,
-	FB_IOMEM_DEFAULT_OPS,
+	FB_DEFAULT_IOMEM_OPS,
 	.fb_check_var	= acornfb_check_var,
 	.fb_set_par	= acornfb_set_par,
 	.fb_setcolreg	= acornfb_setcolreg,
-- 
GitLab


From 12d55c013a09a2d490f004a324c20800e4ff35ec Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:31 +0100
Subject: [PATCH 0497/2340] fbdev/sm712fb: Use correct initializer macros for
 struct fb_ops

Only initialize mmap and draw helpers with macros; leave read/write
callbacks to driver implementations. Fixes the following warnings:

  CC [M]  drivers/video/fbdev/sm712fb.o
  sm712fb.c:1355:25: warning: initialized field overwritten [-Woverride-init]
  1355 |         .fb_fillrect  = cfb_fillrect,
       |                         ^~~~~~~~~~~~
  sm712fb.c:1355:25: note: (near initialization for 'smtcfb_ops.fb_fillrect')
  sm712fb.c:1356:25: warning: initialized field overwritten [-Woverride-init]
  1356 |         .fb_imageblit = cfb_imageblit,
       |                         ^~~~~~~~~~~~~
  sm712fb.c:1356:25: note: (near initialization for 'smtcfb_ops.fb_imageblit')
  sm712fb.c:1357:25: warning: initialized field overwritten [-Woverride-init]
  1357 |         .fb_copyarea  = cfb_copyarea,
       |                         ^~~~~~~~~~~~
  sm712fb.c:1357:25: note: (near initialization for 'smtcfb_ops.fb_copyarea')
  sm712fb.c:1358:25: warning: initialized field overwritten [-Woverride-init]
  1358 |         .fb_read      = smtcfb_read,
       |                         ^~~~~~~~~~~
  sm712fb.c:1358:25: note: (near initialization for 'smtcfb_ops.fb_read')
  sm712fb.c:1359:25: warning: initialized field overwritten [-Woverride-init]
  1359 |         .fb_write     = smtcfb_write,
       |                         ^~~~~~~~~~~~
  sm712fb.c:1359:25: note: (near initialization for 'smtcfb_ops.fb_write')

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 586132cf1d38 ("fbdev/sm712fb: Initialize fb_ops to fbdev I/O-memory helpers")
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Teddy Wang <teddy.wang@siliconmotion.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-fbdev@vger.kernel.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-3-tzimmermann@suse.de
---
 drivers/video/fbdev/sm712fb.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c
index 3f8ef50e32095..104f122e0f273 100644
--- a/drivers/video/fbdev/sm712fb.c
+++ b/drivers/video/fbdev/sm712fb.c
@@ -1347,16 +1347,14 @@ static int smtc_set_par(struct fb_info *info)
 
 static const struct fb_ops smtcfb_ops = {
 	.owner        = THIS_MODULE,
-	FB_DEFAULT_IOMEM_OPS,
 	.fb_check_var = smtc_check_var,
 	.fb_set_par   = smtc_set_par,
 	.fb_setcolreg = smtc_setcolreg,
 	.fb_blank     = smtc_blank,
-	.fb_fillrect  = cfb_fillrect,
-	.fb_imageblit = cfb_imageblit,
-	.fb_copyarea  = cfb_copyarea,
+	__FB_DEFAULT_IOMEM_OPS_DRAW,
 	.fb_read      = smtcfb_read,
 	.fb_write     = smtcfb_write,
+	__FB_DEFAULT_IOMEM_OPS_MMAP,
 };
 
 /*
-- 
GitLab


From 63994d486c9fb5e780dabb0571c607c25d485421 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:32 +0100
Subject: [PATCH 0498/2340] fbdev/vfb: Set FBINFO_VIRTFB flag

The vfb driver operates on system memory. Mark the framebuffer
accordingly. Helpers operating on the framebuffer memory will test
for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-4-tzimmermann@suse.de
---
 drivers/video/fbdev/vfb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c
index 1b7c338f99560..c748b9311fb12 100644
--- a/drivers/video/fbdev/vfb.c
+++ b/drivers/video/fbdev/vfb.c
@@ -440,6 +440,7 @@ static int vfb_probe(struct platform_device *dev)
 	if (!info)
 		goto err;
 
+	info->flags |= FBINFO_VIRTFB;
 	info->screen_buffer = videomemory;
 	info->fbops = &vfb_ops;
 
-- 
GitLab


From 853767b6b94630ac6388aa8e7893d3df26be53be Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:33 +0100
Subject: [PATCH 0499/2340] fbdev/vfb: Initialize fb_ops with fbdev macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in virtual address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-5-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig | 5 +----
 drivers/video/fbdev/vfb.c   | 7 ++-----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 53693c826ebdd..63956382ffb65 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1800,10 +1800,7 @@ config FB_DA8XX
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_SYSMEM_HELPERS
 	help
 	  This is a `virtual' frame buffer device. It operates on a chunk of
 	  unswappable kernel memory instead of on the memory of a graphics
diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c
index c748b9311fb12..f6140f247e4b0 100644
--- a/drivers/video/fbdev/vfb.c
+++ b/drivers/video/fbdev/vfb.c
@@ -80,15 +80,12 @@ static int vfb_mmap(struct fb_info *info,
 
 static const struct fb_ops vfb_ops = {
 	.owner		= THIS_MODULE,
-	.fb_read        = fb_sys_read,
-	.fb_write       = fb_sys_write,
+	__FB_DEFAULT_SYSMEM_OPS_RDWR,
 	.fb_check_var	= vfb_check_var,
 	.fb_set_par	= vfb_set_par,
 	.fb_setcolreg	= vfb_setcolreg,
 	.fb_pan_display	= vfb_pan_display,
-	.fb_fillrect	= sys_fillrect,
-	.fb_copyarea	= sys_copyarea,
-	.fb_imageblit	= sys_imageblit,
+	__FB_DEFAULT_SYSMEM_OPS_DRAW,
 	.fb_mmap	= vfb_mmap,
 };
 
-- 
GitLab


From 30b72c0bde93ae3001736b596cf94bfb47c5e159 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:34 +0100
Subject: [PATCH 0500/2340] fbdev/arcfb: Set FBINFO_VIRTFB flag

The arcfb driver operates on system memory. Mark the framebuffer
accordingly. Helpers operating on the framebuffer memory will test
for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Jaya Kumar <jayalk@intworks.biz>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-6-tzimmermann@suse.de
---
 drivers/video/fbdev/arcfb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index cff11cb04a551..7344e825543af 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -529,6 +529,7 @@ static int arcfb_probe(struct platform_device *dev)
 	if (!info)
 		goto err_fb_alloc;
 
+	info->flags |= FBINFO_VIRTFB;
 	info->screen_buffer = videomemory;
 	info->fbops = &arcfb_ops;
 
-- 
GitLab


From 28f57d03f5a73ad7c5b07fd1414d816f15f7fb1d Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:35 +0100
Subject: [PATCH 0501/2340] fbdev/arcfb: Use generator macros for deferred I/O

Implement the driver's fops with the generator macros for deferred
I/O. Only requires per-driver code for the on-scren scanout buffer.
The generated helpers implement reading, writing and drawing on top
of that. Also update the selected Kconfig tokens accordingly.

Actual support for deferred I/O is missing from the driver. So
writing to memory-mapped pages does not automatically update the
scanout buffer.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Jaya Kumar <jayalk@intworks.biz>
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-7-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig |   5 +-
 drivers/video/fbdev/arcfb.c | 113 +++++++++---------------------------
 2 files changed, 27 insertions(+), 91 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 63956382ffb65..44bf622fc8d74 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -272,10 +272,7 @@ config FB_FM2
 config FB_ARC
 	tristate "Arc Monochrome LCD board support"
 	depends on FB && (X86 || COMPILE_TEST)
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_SYSMEM_HELPERS_DEFERRED
 	help
 	  This enables support for the Arc Monochrome LCD board. The board
 	  is based on the KS-108 lcd controller and is typically a matrix
diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index 7344e825543af..b2408543277ce 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -363,39 +363,6 @@ static void arcfb_lcd_update(struct arcfb_par *par, unsigned int dx,
 	}
 }
 
-static void arcfb_fillrect(struct fb_info *info,
-			   const struct fb_fillrect *rect)
-{
-	struct arcfb_par *par = info->par;
-
-	sys_fillrect(info, rect);
-
-	/* update the physical lcd */
-	arcfb_lcd_update(par, rect->dx, rect->dy, rect->width, rect->height);
-}
-
-static void arcfb_copyarea(struct fb_info *info,
-			   const struct fb_copyarea *area)
-{
-	struct arcfb_par *par = info->par;
-
-	sys_copyarea(info, area);
-
-	/* update the physical lcd */
-	arcfb_lcd_update(par, area->dx, area->dy, area->width, area->height);
-}
-
-static void arcfb_imageblit(struct fb_info *info, const struct fb_image *image)
-{
-	struct arcfb_par *par = info->par;
-
-	sys_imageblit(info, image);
-
-	/* update the physical lcd */
-	arcfb_lcd_update(par, image->dx, image->dy, image->width,
-				image->height);
-}
-
 static int arcfb_ioctl(struct fb_info *info,
 			  unsigned int cmd, unsigned long arg)
 {
@@ -436,76 +403,48 @@ static int arcfb_ioctl(struct fb_info *info,
 	}
 }
 
-/*
- * this is the access path from userspace. they can seek and write to
- * the fb. it's inefficient for them to do anything less than 64*8
- * writes since we update the lcd in each write() anyway.
- */
-static ssize_t arcfb_write(struct fb_info *info, const char __user *buf,
-			   size_t count, loff_t *ppos)
+static void arcfb_damage_range(struct fb_info *info, off_t off, size_t len)
 {
-	/* modded from epson 1355 */
-
-	unsigned long p;
-	int err;
-	unsigned int fbmemlength,x,y,w,h, bitppos, startpos, endpos, bitcount;
-	struct arcfb_par *par;
-	unsigned int xres;
-
-	if (!info->screen_buffer)
-		return -ENODEV;
-
-	p = *ppos;
-	par = info->par;
-	xres = info->var.xres;
-	fbmemlength = (xres * info->var.yres)/8;
-
-	if (p > fbmemlength)
-		return -ENOSPC;
-
-	err = 0;
-	if ((count + p) > fbmemlength) {
-		count = fbmemlength - p;
-		err = -ENOSPC;
-	}
-
-	if (count) {
-		char *base_addr;
-
-		base_addr = info->screen_buffer;
-		count -= copy_from_user(base_addr + p, buf, count);
-		*ppos += count;
-		err = -EFAULT;
-	}
-
+	struct arcfb_par *par = info->par;
+	unsigned int xres = info->var.xres;
+	unsigned int bitppos, startpos, endpos, bitcount;
+	unsigned int x, y, width, height;
 
-	bitppos = p*8;
+	bitppos = off * 8;
 	startpos = floorXres(bitppos, xres);
-	endpos = ceilXres((bitppos + (count*8)), xres);
+	endpos = ceilXres((bitppos + (len * 8)), xres);
 	bitcount = endpos - startpos;
 
 	x = startpos % xres;
 	y = startpos / xres;
-	w = xres;
-	h = bitcount / xres;
-	arcfb_lcd_update(par, x, y, w, h);
+	width = xres;
+	height = bitcount / xres;
+
+	arcfb_lcd_update(par, x, y, width, height);
+}
 
-	if (count)
-		return count;
-	return err;
+static void arcfb_damage_area(struct fb_info *info, u32 x, u32 y,
+			      u32 width, u32 height)
+{
+	struct arcfb_par *par = info->par;
+
+	/* update the physical lcd */
+	arcfb_lcd_update(par, x, y, width, height);
 }
 
+FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(arcfb,
+				   arcfb_damage_range,
+				   arcfb_damage_area)
+
 static const struct fb_ops arcfb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_open	= arcfb_open,
-	.fb_read        = fb_sys_read,
-	.fb_write	= arcfb_write,
+	__FB_DEFAULT_DEFERRED_OPS_RDWR(arcfb),
 	.fb_release	= arcfb_release,
 	.fb_pan_display	= arcfb_pan_display,
-	.fb_fillrect	= arcfb_fillrect,
-	.fb_copyarea	= arcfb_copyarea,
-	.fb_imageblit	= arcfb_imageblit,
+	__FB_DEFAULT_DEFERRED_OPS_DRAW(arcfb),
 	.fb_ioctl 	= arcfb_ioctl,
+	// .fb_mmap reqires deferred I/O
 };
 
 static int arcfb_probe(struct platform_device *dev)
-- 
GitLab


From eba1418968264cb3f83ff1ce78270ccc9a729c22 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:36 +0100
Subject: [PATCH 0502/2340] auxdisplay/cfag12864bfb: Set FBINFO_VIRTFB flag

The cfag12864bfb driver operates on system memory. Mark the framebuffer
accordingly. Helpers operating on the framebuffer memory will test for
the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-8-tzimmermann@suse.de
---
 drivers/auxdisplay/cfag12864bfb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index 729845bcc803a..c0ba693845aaf 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -72,6 +72,7 @@ static int cfag12864bfb_probe(struct platform_device *device)
 	if (!info)
 		goto none;
 
+	info->flags = FBINFO_VIRTFB;
 	info->screen_buffer = cfag12864b_buffer;
 	info->screen_size = CFAG12864B_SIZE;
 	info->fbops = &cfag12864bfb_ops;
-- 
GitLab


From 36e6cacdb095e35f2389ba8829db12603133d61d Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:37 +0100
Subject: [PATCH 0503/2340] auxdisplay/cfag12864bfb: Initialize fb_ops with
 fbdev macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in virtual address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-9-tzimmermann@suse.de
---
 drivers/auxdisplay/Kconfig        | 5 +----
 drivers/auxdisplay/cfag12864bfb.c | 7 ++-----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 64012cda4d126..4377e53f8f572 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -112,10 +112,7 @@ config CFAG12864B
 	depends on X86
 	depends on FB
 	depends on KS0108
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_SYSMEM_HELPERS
 	default n
 	help
 	  If you have a Crystalfontz 128x64 2-color LCD, cfag12864b Series,
diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index c0ba693845aaf..ede0f9a513110 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -56,11 +56,8 @@ static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 
 static const struct fb_ops cfag12864bfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_read = fb_sys_read,
-	.fb_write = fb_sys_write,
-	.fb_fillrect = sys_fillrect,
-	.fb_copyarea = sys_copyarea,
-	.fb_imageblit = sys_imageblit,
+	__FB_DEFAULT_SYSMEM_OPS_RDWR,
+	__FB_DEFAULT_SYSMEM_OPS_DRAW,
 	.fb_mmap = cfag12864bfb_mmap,
 };
 
-- 
GitLab


From 1d6796547a44fb44252c83b36609d0ae6624cd7c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:38 +0100
Subject: [PATCH 0504/2340] auxdisplay/ht16k33: Set FBINFO_VIRTFB flag

The ht16k33 driver operates on system memory. Mark the framebuffer
accordingly. Helpers operating on the framebuffer memory will test
for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Robin van der Gracht <robin@protonic.nl>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Acked-by: Robin van der Gracht <robin@protonic.nl>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-10-tzimmermann@suse.de
---
 drivers/auxdisplay/ht16k33.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index 3a2d883872249..f1716e3ce6a92 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -640,6 +640,7 @@ static int ht16k33_fbdev_probe(struct device *dev, struct ht16k33_priv *priv,
 
 	INIT_DELAYED_WORK(&priv->work, ht16k33_fb_update);
 	fbdev->info->fbops = &ht16k33_fb_ops;
+	fbdev->info->flags |= FBINFO_VIRTFB;
 	fbdev->info->screen_buffer = fbdev->buffer;
 	fbdev->info->screen_size = HT16K33_FB_SIZE;
 	fbdev->info->fix = ht16k33_fb_fix;
-- 
GitLab


From df558d53139f8adc7058b3dc17f01ae03c18a440 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:39 +0100
Subject: [PATCH 0505/2340] auxdisplay/ht16k33: Initialize fb_ops with fbdev
 macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in virtual address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Robin van der Gracht <robin@protonic.nl>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Robin van der Gracht <robin@protonic.nl>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-11-tzimmermann@suse.de
---
 drivers/auxdisplay/Kconfig   | 5 +----
 drivers/auxdisplay/ht16k33.c | 7 ++-----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 4377e53f8f572..d944d5298eca8 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -167,10 +167,7 @@ config IMG_ASCII_LCD
 config HT16K33
 	tristate "Holtek Ht16K33 LED controller with keyscan"
 	depends on FB && I2C && INPUT
-	select FB_SYS_FOPS
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
+	select FB_SYSMEM_HELPERS
 	select INPUT_MATRIXKMAP
 	select FB_BACKLIGHT
 	select NEW_LEDS
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index f1716e3ce6a92..2f1dc6b4e2765 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -356,12 +356,9 @@ static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
 
 static const struct fb_ops ht16k33_fb_ops = {
 	.owner = THIS_MODULE,
-	.fb_read = fb_sys_read,
-	.fb_write = fb_sys_write,
+	__FB_DEFAULT_SYSMEM_OPS_RDWR,
 	.fb_blank = ht16k33_blank,
-	.fb_fillrect = sys_fillrect,
-	.fb_copyarea = sys_copyarea,
-	.fb_imageblit = sys_imageblit,
+	__FB_DEFAULT_SYSMEM_OPS_DRAW,
 	.fb_mmap = ht16k33_mmap,
 };
 
-- 
GitLab


From bc4e90771c886086572c6bfabd57f7a6b492f52e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:40 +0100
Subject: [PATCH 0506/2340] hid/picolcd_fb: Set FBINFO_VIRTFB flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The picolcd_fb driver operates on system memory. Mark the framebuffer
accordingly. Helpers operating on the framebuffer memory will test
for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: "Bruno Prémont" <bonbons@linux-vserver.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Bruno Prémont  <bonbons@linux-vserver.org>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-12-tzimmermann@suse.de
---
 drivers/hid/hid-picolcd_fb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c
index a4dccdcda26fc..d7dddd99d325e 100644
--- a/drivers/hid/hid-picolcd_fb.c
+++ b/drivers/hid/hid-picolcd_fb.c
@@ -505,6 +505,7 @@ int picolcd_init_framebuffer(struct picolcd_data *data)
 		dev_err(dev, "can't get a free page for framebuffer\n");
 		goto err_nomem;
 	}
+	info->flags |= FBINFO_VIRTFB;
 	info->screen_buffer = fbdata->bitmap;
 	info->fix.smem_start = (unsigned long)fbdata->bitmap;
 	memset(fbdata->vbitmap, 0xff, PICOLCDFB_SIZE);
-- 
GitLab


From 46b655ceeed09363b810712525dc63b533e5cd0b Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:41 +0100
Subject: [PATCH 0507/2340] fbdev/sh_mobile_lcdcfb: Set FBINFO_VIRTFB flag

The sh_mobile_lcdcfb driver operates on DMA-able system memory. Mark
the framebuffer accordingly. Helpers operating on the framebuffer memory
will test for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-13-tzimmermann@suse.de
---
 drivers/video/fbdev/sh_mobile_lcdcfb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index 1364dafaadb1d..5c99fc8a409fd 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c
@@ -1567,6 +1567,7 @@ sh_mobile_lcdc_overlay_fb_init(struct sh_mobile_lcdc_overlay *ovl)
 
 	info->fbops = &sh_mobile_lcdc_overlay_ops;
 	info->device = priv->dev;
+	info->flags |= FBINFO_VIRTFB;
 	info->screen_buffer = ovl->fb_mem;
 	info->par = ovl;
 
@@ -2053,6 +2054,7 @@ sh_mobile_lcdc_channel_fb_init(struct sh_mobile_lcdc_chan *ch,
 
 	info->fbops = &sh_mobile_lcdc_ops;
 	info->device = priv->dev;
+	info->flags |= FBINFO_VIRTFB;
 	info->screen_buffer = ch->fb_mem;
 	info->pseudo_palette = &ch->pseudo_palette;
 	info->par = ch;
-- 
GitLab


From 01f4fbb3bd265167cdd20ed5ff8250199a0d189e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:42 +0100
Subject: [PATCH 0508/2340] fbdev/sh_mobile_lcdcfb: Initialize fb_ops with
 fbdev macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in DMA-able virtual address space. Set the
read/write, draw and mmap callbacks to the correct implementation
and avoid implicit defaults. Also select the necessary helpers in
Kconfig.

The driver uses a mixture of DMA helpers and deferred I/O. That
probably needs fixing by a driver maintainer.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-14-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig            |  7 ++-----
 drivers/video/fbdev/sh_mobile_lcdcfb.c | 10 +++-------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 44bf622fc8d74..1eab89a07bbc5 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1634,12 +1634,9 @@ config FB_SH_MOBILE_LCDC
 	depends on FB && HAVE_CLK && HAS_IOMEM
 	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 	depends on FB_DEVICE
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
-	select FB_DEFERRED_IO
 	select FB_BACKLIGHT
+	select FB_DEFERRED_IO
+	select FB_DMAMEM_HELPERS
 	help
 	  Frame buffer driver for the on-chip SH-Mobile LCD controller.
 
diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index 5c99fc8a409fd..d84628de51893 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c
@@ -1488,13 +1488,10 @@ sh_mobile_lcdc_overlay_mmap(struct fb_info *info, struct vm_area_struct *vma)
 
 static const struct fb_ops sh_mobile_lcdc_overlay_ops = {
 	.owner          = THIS_MODULE,
-	.fb_read        = fb_sys_read,
-	.fb_write       = fb_sys_write,
-	.fb_fillrect	= sys_fillrect,
-	.fb_copyarea	= sys_copyarea,
-	.fb_imageblit	= sys_imageblit,
+	__FB_DEFAULT_DMAMEM_OPS_RDWR,
 	.fb_blank	= sh_mobile_lcdc_overlay_blank,
 	.fb_pan_display = sh_mobile_lcdc_overlay_pan,
+	__FB_DEFAULT_DMAMEM_OPS_DRAW,
 	.fb_ioctl       = sh_mobile_lcdc_overlay_ioctl,
 	.fb_check_var	= sh_mobile_lcdc_overlay_check_var,
 	.fb_set_par	= sh_mobile_lcdc_overlay_set_par,
@@ -1966,8 +1963,7 @@ sh_mobile_lcdc_mmap(struct fb_info *info, struct vm_area_struct *vma)
 static const struct fb_ops sh_mobile_lcdc_ops = {
 	.owner          = THIS_MODULE,
 	.fb_setcolreg	= sh_mobile_lcdc_setcolreg,
-	.fb_read        = fb_sys_read,
-	.fb_write       = fb_sys_write,
+	__FB_DEFAULT_DMAMEM_OPS_RDWR,
 	.fb_fillrect	= sh_mobile_lcdc_fillrect,
 	.fb_copyarea	= sh_mobile_lcdc_copyarea,
 	.fb_imageblit	= sh_mobile_lcdc_imageblit,
-- 
GitLab


From 133a2ca22e11041f891e6fbbf398d0dfd8b07c0c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:43 +0100
Subject: [PATCH 0509/2340] fbdev/smscufx: Select correct helpers

The driver uses deferred I/O. Select the correct helpers via
FB_SYSMEM_HELPERS_DEFERRED in the Kconfig file.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-15-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 1eab89a07bbc5..d110d89b21d46 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1683,11 +1683,7 @@ config FB_SMSCUFX
 	tristate "SMSC UFX6000/7000 USB Framebuffer support"
 	depends on FB && USB
 	select FB_MODE_HELPERS
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
-	select FB_DEFERRED_IO
+	select FB_SYSMEM_HELPERS_DEFERRED
 	help
 	  This is a kernel framebuffer driver for SMSC UFX USB devices.
 	  Supports fbdev clients like xf86-video-fbdev, kdrive, fbi, and
-- 
GitLab


From c9496954c138b96964a2fa24bccbe5a4c3ad796f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:44 +0100
Subject: [PATCH 0510/2340] fbdev/udlfb: Select correct helpers

The driver uses deferred I/O. Select the correct helpers via
FB_SYSMEM_HELPERS_DEFERRED in the Kconfig file.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-16-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index d110d89b21d46..de57bf59e0179 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1696,11 +1696,7 @@ config FB_UDL
 	depends on FB && USB
 	depends on FB_DEVICE
 	select FB_MODE_HELPERS
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
-	select FB_DEFERRED_IO
+	select FB_SYSMEM_HELPERS_DEFERRED
 	help
 	  This is a kernel framebuffer driver for DisplayLink USB devices.
 	  Supports fbdev clients like xf86-video-fbdev, kdrive, fbi, and
-- 
GitLab


From cb99b486a5bcc9b9a0c869774137fe40c48cf2f4 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:45 +0100
Subject: [PATCH 0511/2340] fbdev/au1200fb: Set FBINFO_VIRTFB flag

The au1200fb driver operates on DMA-able system memory. Mark the
framebuffer accordingly. Helpers operating on the framebuffer memory
will test for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-17-tzimmermann@suse.de
---
 drivers/video/fbdev/au1200fb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 98afd385c49cd..817c1ebb625b6 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1568,6 +1568,8 @@ static int au1200fb_init_fbinfo(struct au1200fb_device *fbdev)
 	fbi->fix.mmio_len = 0;
 	fbi->fix.accel = FB_ACCEL_NONE;
 
+	fbi->flags |= FBINFO_VIRTFB;
+
 	fbi->screen_buffer = fbdev->fb_mem;
 
 	au1200fb_update_fbinfo(fbi);
-- 
GitLab


From dfc3052256e024057ea8a6fbfa2691bb87fea343 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:46 +0100
Subject: [PATCH 0512/2340] fbdev/au1200fb: Initialize fb_ops with fbdev macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in DMA-able virtual address space. Set the
read/write, draw and mmap callbacks to the correct implementation
and avoid implicit defaults. Also select the necessary helpers in
Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-18-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig    | 5 +----
 drivers/video/fbdev/au1200fb.c | 7 ++-----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index de57bf59e0179..4c82890887ba1 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1457,10 +1457,7 @@ config FB_AU1100
 config FB_AU1200
 	bool "Au1200/Au1300 LCD Driver"
 	depends on (FB = y) && MIPS_ALCHEMY
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_DMAMEM_HELPERS
 	help
 	  This is the framebuffer driver for the Au1200/Au1300 SOCs.
 	  It can drive various panels and CRTs by passing in kernel cmd line
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 817c1ebb625b6..16ebbab500972 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1488,15 +1488,12 @@ static int au1200fb_ioctl(struct fb_info *info, unsigned int cmd,
 
 static const struct fb_ops au1200fb_fb_ops = {
 	.owner		= THIS_MODULE,
+	__FB_DEFAULT_DMAMEM_OPS_RDWR,
 	.fb_check_var	= au1200fb_fb_check_var,
 	.fb_set_par	= au1200fb_fb_set_par,
 	.fb_setcolreg	= au1200fb_fb_setcolreg,
 	.fb_blank	= au1200fb_fb_blank,
-	.fb_fillrect	= sys_fillrect,
-	.fb_copyarea	= sys_copyarea,
-	.fb_imageblit	= sys_imageblit,
-	.fb_read	= fb_sys_read,
-	.fb_write	= fb_sys_write,
+	__FB_DEFAULT_DMAMEM_OPS_DRAW,
 	.fb_sync	= NULL,
 	.fb_ioctl	= au1200fb_ioctl,
 	.fb_mmap	= au1200fb_fb_mmap,
-- 
GitLab


From cccc934a74487dcf1407918a0739d0923a3c4cc0 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:47 +0100
Subject: [PATCH 0513/2340] fbdev/ps3fb: Set FBINFO_VIRTFB flag

The ps3fb driver operates on system memory. Mark the framebuffer
accordingly. Helpers operating on the framebuffer memory will test
for the presence of this flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-19-tzimmermann@suse.de
---
 drivers/video/fbdev/ps3fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c
index 64d291d6b1532..de81ad3a5d1ed 100644
--- a/drivers/video/fbdev/ps3fb.c
+++ b/drivers/video/fbdev/ps3fb.c
@@ -1145,7 +1145,7 @@ static int ps3fb_probe(struct ps3_system_bus_device *dev)
 	info->fix.smem_len = ps3fb_videomemory.size - GPU_FB_START;
 
 	info->pseudo_palette = par->pseudo_palette;
-	info->flags = FBINFO_READS_FAST |
+	info->flags = FBINFO_VIRTFB | FBINFO_READS_FAST |
 		      FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN;
 
 	retval = fb_alloc_cmap(&info->cmap, 256, 0);
-- 
GitLab


From 741effeab963073c257ad1bcfac6c355d0eca966 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:48 +0100
Subject: [PATCH 0514/2340] fbdev/ps3fb: Initialize fb_ops with fbdev macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in virtual address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-20-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig | 5 +----
 drivers/video/fbdev/ps3fb.c | 7 ++-----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 4c82890887ba1..f9dab4c900332 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1715,10 +1715,7 @@ config FB_IBM_GXT4500
 config FB_PS3
 	tristate "PS3 GPU framebuffer driver"
 	depends on FB && PS3_PS3AV
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_SYSMEM_HELPERS
 	help
 	  Include support for the virtual frame buffer in the PS3 platform.
 
diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c
index de81ad3a5d1ed..de8d78bf070a0 100644
--- a/drivers/video/fbdev/ps3fb.c
+++ b/drivers/video/fbdev/ps3fb.c
@@ -939,15 +939,12 @@ static const struct fb_ops ps3fb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_open	= ps3fb_open,
 	.fb_release	= ps3fb_release,
-	.fb_read        = fb_sys_read,
-	.fb_write       = fb_sys_write,
+	__FB_DEFAULT_SYSMEM_OPS_RDWR,
 	.fb_check_var	= ps3fb_check_var,
 	.fb_set_par	= ps3fb_set_par,
 	.fb_setcolreg	= ps3fb_setcolreg,
 	.fb_pan_display	= ps3fb_pan_display,
-	.fb_fillrect	= sys_fillrect,
-	.fb_copyarea	= sys_copyarea,
-	.fb_imageblit	= sys_imageblit,
+	__FB_DEFAULT_SYSMEM_OPS_DRAW,
 	.fb_mmap	= ps3fb_mmap,
 	.fb_blank	= ps3fb_blank,
 	.fb_ioctl	= ps3fb_ioctl,
-- 
GitLab


From bff13b8f2c5a38f7216ea9ce40aae88694a2b196 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:49 +0100
Subject: [PATCH 0515/2340] media/ivtvfb: Initialize fb_ops to fbdev I/O-memory
 helpers

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in I/O address space. This explictily sets
the read/write, draw and mmap callbacks to the correct default
implementation.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default implementation to be invoked; hence requireing the I/O
helpers to be built in any case. Setting all callbacks in all
drivers explicitly will allow to make the I/O helpers optional.
This benefits systems that do not use these functions.

Set the callbacks via macros. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Andy Walls <awalls@md.metrocast.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-media@vger.kernel.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-21-tzimmermann@suse.de
---
 drivers/media/pci/ivtv/Kconfig  | 4 +---
 drivers/media/pci/ivtv/ivtvfb.c | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/media/pci/ivtv/Kconfig b/drivers/media/pci/ivtv/Kconfig
index 9be52101bc4f2..2498f9079b756 100644
--- a/drivers/media/pci/ivtv/Kconfig
+++ b/drivers/media/pci/ivtv/Kconfig
@@ -48,9 +48,7 @@ config VIDEO_IVTV_ALSA
 config VIDEO_FB_IVTV
 	tristate "Conexant cx23415 framebuffer support"
 	depends on VIDEO_IVTV && FB
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
+	select FB_IOMEM_HELPERS
 	help
 	  This is a framebuffer driver for the Conexant cx23415 MPEG
 	  encoder/decoder.
diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c
index 23c8c094e791b..410477e3e6216 100644
--- a/drivers/media/pci/ivtv/ivtvfb.c
+++ b/drivers/media/pci/ivtv/ivtvfb.c
@@ -927,17 +927,17 @@ static int ivtvfb_blank(int blank_mode, struct fb_info *info)
 
 static const struct fb_ops ivtvfb_ops = {
 	.owner = THIS_MODULE,
+	.fb_read        = fb_io_read,
 	.fb_write       = ivtvfb_write,
 	.fb_check_var   = ivtvfb_check_var,
 	.fb_set_par     = ivtvfb_set_par,
 	.fb_setcolreg   = ivtvfb_setcolreg,
-	.fb_fillrect    = cfb_fillrect,
-	.fb_copyarea    = cfb_copyarea,
-	.fb_imageblit   = cfb_imageblit,
+	__FB_DEFAULT_IOMEM_OPS_DRAW,
 	.fb_cursor      = NULL,
 	.fb_ioctl       = ivtvfb_ioctl,
 	.fb_pan_display = ivtvfb_pan_display,
 	.fb_blank       = ivtvfb_blank,
+	__FB_DEFAULT_IOMEM_OPS_MMAP,
 };
 
 /* Restore hardware after firmware restart */
-- 
GitLab


From dec2d60923dbda7c26a6194a0ed9fff9dc20cd69 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:50 +0100
Subject: [PATCH 0516/2340] fbdev/clps711x-fb: Initialize fb_ops with fbdev
 macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in I/O address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

The driver previously selected drawing ops for system memory
although it operates on I/O memory. Fixed now.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-22-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig       | 4 +---
 drivers/video/fbdev/clps711x-fb.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index f9dab4c900332..6fc43b40acac3 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -146,10 +146,8 @@ config FB_ACORN
 config FB_CLPS711X
 	tristate "CLPS711X LCD support"
 	depends on FB && (ARCH_CLPS711X || COMPILE_TEST)
+	select FB_IOMEM_HELPERS
 	select FB_MODE_HELPERS
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select LCD_CLASS_DEVICE
 	select VIDEOMODE_HELPERS
 	help
diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c
index e956c90efcdcc..dcfd1fbbc7e10 100644
--- a/drivers/video/fbdev/clps711x-fb.c
+++ b/drivers/video/fbdev/clps711x-fb.c
@@ -155,13 +155,11 @@ static int clps711x_fb_blank(int blank, struct fb_info *info)
 
 static const struct fb_ops clps711x_fb_ops = {
 	.owner		= THIS_MODULE,
+	FB_DEFAULT_IOMEM_OPS,
 	.fb_setcolreg	= clps711x_fb_setcolreg,
 	.fb_check_var	= clps711x_fb_check_var,
 	.fb_set_par	= clps711x_fb_set_par,
 	.fb_blank	= clps711x_fb_blank,
-	.fb_fillrect	= sys_fillrect,
-	.fb_copyarea	= sys_copyarea,
-	.fb_imageblit	= sys_imageblit,
 };
 
 static int clps711x_lcd_check_fb(struct lcd_device *lcddev, struct fb_info *fi)
-- 
GitLab


From 63a11adaceb8b77d70bcce0890197fa9462ce160 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:51 +0100
Subject: [PATCH 0517/2340] fbdev/vt8500lcdfb: Initialize fb_ops with fbdev
 macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in DMA-able virtual address space. Set the
read/write, draw and mmap callbacks to the correct implementation
and avoid implicit defaults. Also select the necessary helpers in
Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-23-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig       | 1 +
 drivers/video/fbdev/vt8500lcdfb.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 6fc43b40acac3..4e06e403d0217 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1467,6 +1467,7 @@ config FB_VT8500
 	select FB_SYS_FILLRECT if (!FB_WMT_GE_ROPS)
 	select FB_SYS_COPYAREA if (!FB_WMT_GE_ROPS)
 	select FB_SYS_IMAGEBLIT
+	select FB_SYS_FOPS
 	select FB_MODE_HELPERS
 	select VIDEOMODE_HELPERS
 	help
diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c
index 42d39a9d5130f..42c25dc851976 100644
--- a/drivers/video/fbdev/vt8500lcdfb.c
+++ b/drivers/video/fbdev/vt8500lcdfb.c
@@ -241,6 +241,7 @@ static int vt8500lcd_blank(int blank, struct fb_info *info)
 
 static const struct fb_ops vt8500lcd_ops = {
 	.owner		= THIS_MODULE,
+	__FB_DEFAULT_DMAMEM_OPS_RDWR,
 	.fb_set_par	= vt8500lcd_set_par,
 	.fb_setcolreg	= vt8500lcd_setcolreg,
 	.fb_fillrect	= wmt_ge_fillrect,
@@ -250,6 +251,7 @@ static const struct fb_ops vt8500lcd_ops = {
 	.fb_ioctl	= vt8500lcd_ioctl,
 	.fb_pan_display	= vt8500lcd_pan_display,
 	.fb_blank	= vt8500lcd_blank,
+	// .fb_mmap needs DMA mmap
 };
 
 static irqreturn_t vt8500lcd_handle_irq(int irq, void *dev_id)
@@ -357,7 +359,7 @@ static int vt8500lcd_probe(struct platform_device *pdev)
 
 	fbi->fb.fix.smem_start	= fb_mem_phys;
 	fbi->fb.fix.smem_len	= fb_mem_len;
-	fbi->fb.screen_base	= fb_mem_virt;
+	fbi->fb.screen_buffer	= fb_mem_virt;
 
 	fbi->palette_size	= PAGE_ALIGN(512);
 	fbi->palette_cpu	= dma_alloc_coherent(&pdev->dev,
-- 
GitLab


From 11754a5046080a02ecf8a78bd80644e0dae56362 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:52 +0100
Subject: [PATCH 0518/2340] fbdev/wm8505fb: Initialize fb_ops to fbdev
 I/O-memory helpers

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in DMA-able address space. This explictily
sets the read/write, draw and mmap callbacks to the correct default
implementation. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default implementation to be invoked; hence requireing the I/O
helpers to be built in any case. Setting all callbacks in all
drivers explicitly will allow to make the I/O helpers optional.
This benefits systems that do not use these functions.

Set the callbacks via macros. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-24-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig    | 1 +
 drivers/video/fbdev/wm8505fb.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 4e06e403d0217..004a63cdea5a3 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1480,6 +1480,7 @@ config FB_WM8505
 	select FB_SYS_FILLRECT if (!FB_WMT_GE_ROPS)
 	select FB_SYS_COPYAREA if (!FB_WMT_GE_ROPS)
 	select FB_SYS_IMAGEBLIT
+	select FB_SYS_FOPS
 	select FB_MODE_HELPERS
 	select VIDEOMODE_HELPERS
 	help
diff --git a/drivers/video/fbdev/wm8505fb.c b/drivers/video/fbdev/wm8505fb.c
index 5833147aa43d1..00952e9c88028 100644
--- a/drivers/video/fbdev/wm8505fb.c
+++ b/drivers/video/fbdev/wm8505fb.c
@@ -248,6 +248,7 @@ static int wm8505fb_blank(int blank, struct fb_info *info)
 
 static const struct fb_ops wm8505fb_ops = {
 	.owner		= THIS_MODULE,
+	__FB_DEFAULT_DMAMEM_OPS_RDWR,
 	.fb_set_par	= wm8505fb_set_par,
 	.fb_setcolreg	= wm8505fb_setcolreg,
 	.fb_fillrect	= wmt_ge_fillrect,
@@ -256,6 +257,7 @@ static const struct fb_ops wm8505fb_ops = {
 	.fb_sync	= wmt_ge_sync,
 	.fb_pan_display	= wm8505fb_pan_display,
 	.fb_blank	= wm8505fb_blank,
+	__FB_DEFAULT_IOMEM_OPS_MMAP,
 };
 
 static int wm8505fb_probe(struct platform_device *pdev)
-- 
GitLab


From e0f05e643eb1ff9588d38c46ad12ff74efb09959 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:53 +0100
Subject: [PATCH 0519/2340] fbdev/cyber2000fb: Initialize fb_ops with fbdev
 macros

Initialize the instance of struct fb_ops with fbdev initializer
macros for framebuffers in I/O address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-25-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig       | 5 +----
 drivers/video/fbdev/cyber2000fb.c | 9 +--------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 004a63cdea5a3..37c8188752baf 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -178,10 +178,7 @@ config FB_IMX
 config FB_CYBER2000
 	tristate "CyberPro 2000/2010/5000 support"
 	depends on FB && PCI && (BROKEN || !SPARC64)
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	select FB_IOMEM_FOPS
+	select FB_IOMEM_HELPERS
 	select VIDEO_NOMODESET
 	help
 	  This enables support for the Integraphics CyberPro 20x0 and 5000
diff --git a/drivers/video/fbdev/cyber2000fb.c b/drivers/video/fbdev/cyber2000fb.c
index 52105dc1a72f3..abb87d3576db0 100644
--- a/drivers/video/fbdev/cyber2000fb.c
+++ b/drivers/video/fbdev/cyber2000fb.c
@@ -227,13 +227,6 @@ cyber2000fb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 			   CO_REG_CMD_H, cfb);
 }
 
-static void
-cyber2000fb_imageblit(struct fb_info *info, const struct fb_image *image)
-{
-	cfb_imageblit(info, image);
-	return;
-}
-
 static int cyber2000fb_sync(struct fb_info *info)
 {
 	struct cfb_info *cfb = container_of(info, struct cfb_info, fb);
@@ -1069,7 +1062,7 @@ static const struct fb_ops cyber2000fb_ops = {
 	.fb_pan_display	= cyber2000fb_pan_display,
 	.fb_fillrect	= cyber2000fb_fillrect,
 	.fb_copyarea	= cyber2000fb_copyarea,
-	.fb_imageblit	= cyber2000fb_imageblit,
+	.fb_imageblit	= cfb_imageblit,
 	.fb_sync	= cyber2000fb_sync,
 	__FB_DEFAULT_IOMEM_OPS_MMAP,
 };
-- 
GitLab


From f7c8a046577e09d8f88213c985d102604e73ed4c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:54 +0100
Subject: [PATCH 0520/2340] staging/sm750fb: Declare fb_ops as constant

Split up lynxfb_ops and declare each as constant. The fb_ops
instance used to be modified while initializing the driver. It is
now constant and the driver picks the correct instance, depending
on the settings for acceleration and cursor support.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Teddy Wang <teddy.wang@siliconmotion.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-staging@lists.linux.dev
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-26-tzimmermann@suse.de
---
 drivers/staging/sm750fb/sm750.c | 59 +++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c
index 79bcd5bd49380..35098516f53c9 100644
--- a/drivers/staging/sm750fb/sm750.c
+++ b/drivers/staging/sm750fb/sm750.c
@@ -663,16 +663,53 @@ static int sm750fb_set_drv(struct lynxfb_par *par)
 	return ret;
 }
 
-static struct fb_ops lynxfb_ops = {
+static const struct fb_ops lynxfb_ops = {
 	.owner = THIS_MODULE,
 	.fb_check_var =  lynxfb_ops_check_var,
 	.fb_set_par = lynxfb_ops_set_par,
 	.fb_setcolreg = lynxfb_ops_setcolreg,
 	.fb_blank = lynxfb_ops_blank,
+	.fb_pan_display = lynxfb_ops_pan_display,
 	.fb_fillrect = cfb_fillrect,
 	.fb_imageblit = cfb_imageblit,
 	.fb_copyarea = cfb_copyarea,
-	/* cursor */
+};
+
+static const struct fb_ops lynxfb_ops_with_cursor = {
+	.owner = THIS_MODULE,
+	.fb_check_var =  lynxfb_ops_check_var,
+	.fb_set_par = lynxfb_ops_set_par,
+	.fb_setcolreg = lynxfb_ops_setcolreg,
+	.fb_blank = lynxfb_ops_blank,
+	.fb_pan_display = lynxfb_ops_pan_display,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+	.fb_cursor = lynxfb_ops_cursor,
+};
+
+static const struct fb_ops lynxfb_ops_accel = {
+	.owner = THIS_MODULE,
+	.fb_check_var =  lynxfb_ops_check_var,
+	.fb_set_par = lynxfb_ops_set_par,
+	.fb_setcolreg = lynxfb_ops_setcolreg,
+	.fb_blank = lynxfb_ops_blank,
+	.fb_pan_display = lynxfb_ops_pan_display,
+	.fb_fillrect = lynxfb_ops_fillrect,
+	.fb_copyarea = lynxfb_ops_copyarea,
+	.fb_imageblit = lynxfb_ops_imageblit,
+};
+
+static const struct fb_ops lynxfb_ops_accel_with_cursor = {
+	.owner = THIS_MODULE,
+	.fb_check_var =  lynxfb_ops_check_var,
+	.fb_set_par = lynxfb_ops_set_par,
+	.fb_setcolreg = lynxfb_ops_setcolreg,
+	.fb_blank = lynxfb_ops_blank,
+	.fb_pan_display = lynxfb_ops_pan_display,
+	.fb_fillrect = lynxfb_ops_fillrect,
+	.fb_copyarea = lynxfb_ops_copyarea,
+	.fb_imageblit = lynxfb_ops_imageblit,
 	.fb_cursor = lynxfb_ops_cursor,
 };
 
@@ -714,7 +751,6 @@ static int lynxfb_set_fbinfo(struct fb_info *info, int index)
 	par->index = index;
 	output->channel = &crtc->channel;
 	sm750fb_set_drv(par);
-	lynxfb_ops.fb_pan_display = lynxfb_ops_pan_display;
 
 	/*
 	 * set current cursor variable and proc pointer,
@@ -731,19 +767,22 @@ static int lynxfb_set_fbinfo(struct fb_info *info, int index)
 	crtc->cursor.vstart = sm750_dev->pvMem + crtc->cursor.offset;
 
 	memset_io(crtc->cursor.vstart, 0, crtc->cursor.size);
-	if (!g_hwcursor) {
-		lynxfb_ops.fb_cursor = NULL;
+	if (!g_hwcursor)
 		sm750_hw_cursor_disable(&crtc->cursor);
-	}
 
 	/* set info->fbops, must be set before fb_find_mode */
 	if (!sm750_dev->accel_off) {
 		/* use 2d acceleration */
-		lynxfb_ops.fb_fillrect = lynxfb_ops_fillrect;
-		lynxfb_ops.fb_copyarea = lynxfb_ops_copyarea;
-		lynxfb_ops.fb_imageblit = lynxfb_ops_imageblit;
+		if (!g_hwcursor)
+			info->fbops = &lynxfb_ops_accel;
+		else
+			info->fbops = &lynxfb_ops_accel_with_cursor;
+	} else {
+		if (!g_hwcursor)
+			info->fbops = &lynxfb_ops;
+		else
+			info->fbops = &lynxfb_ops_with_cursor;
 	}
-	info->fbops = &lynxfb_ops;
 
 	if (!g_fbmode[index]) {
 		g_fbmode[index] = g_def_fbmode;
-- 
GitLab


From dc0ad215e5d8524bd72180206e042d904fcc1d4f Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:55 +0100
Subject: [PATCH 0521/2340] staging/sm750fb: Initialize fb_ops with fbdev
 macros

Initialize all instances of struct fb_ops with fbdev initializer
macros for framebuffers in I/O address space. Set the read/write,
draw and mmap callbacks to the correct implementation and avoid
implicit defaults. Also select the necessary helpers in Kconfig.

Fbdev drivers sometimes rely on the callbacks being NULL for a
default I/O-memory-based implementation to be invoked; hence
requiring the I/O helpers to be built in any case. Setting all
callbacks in all drivers explicitly will allow to make the I/O
helpers optional. This benefits systems that do not use these
functions.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Teddy Wang <teddy.wang@siliconmotion.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-staging@lists.linux.dev
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-27-tzimmermann@suse.de
---
 drivers/staging/sm750fb/sm750.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c
index 35098516f53c9..04c1b32a22c5e 100644
--- a/drivers/staging/sm750fb/sm750.c
+++ b/drivers/staging/sm750fb/sm750.c
@@ -665,31 +665,28 @@ static int sm750fb_set_drv(struct lynxfb_par *par)
 
 static const struct fb_ops lynxfb_ops = {
 	.owner = THIS_MODULE,
+	FB_DEFAULT_IOMEM_OPS,
 	.fb_check_var =  lynxfb_ops_check_var,
 	.fb_set_par = lynxfb_ops_set_par,
 	.fb_setcolreg = lynxfb_ops_setcolreg,
 	.fb_blank = lynxfb_ops_blank,
 	.fb_pan_display = lynxfb_ops_pan_display,
-	.fb_fillrect = cfb_fillrect,
-	.fb_imageblit = cfb_imageblit,
-	.fb_copyarea = cfb_copyarea,
 };
 
 static const struct fb_ops lynxfb_ops_with_cursor = {
 	.owner = THIS_MODULE,
+	FB_DEFAULT_IOMEM_OPS,
 	.fb_check_var =  lynxfb_ops_check_var,
 	.fb_set_par = lynxfb_ops_set_par,
 	.fb_setcolreg = lynxfb_ops_setcolreg,
 	.fb_blank = lynxfb_ops_blank,
 	.fb_pan_display = lynxfb_ops_pan_display,
-	.fb_fillrect = cfb_fillrect,
-	.fb_copyarea = cfb_copyarea,
-	.fb_imageblit = cfb_imageblit,
 	.fb_cursor = lynxfb_ops_cursor,
 };
 
 static const struct fb_ops lynxfb_ops_accel = {
 	.owner = THIS_MODULE,
+	__FB_DEFAULT_IOMEM_OPS_RDWR,
 	.fb_check_var =  lynxfb_ops_check_var,
 	.fb_set_par = lynxfb_ops_set_par,
 	.fb_setcolreg = lynxfb_ops_setcolreg,
@@ -698,10 +695,12 @@ static const struct fb_ops lynxfb_ops_accel = {
 	.fb_fillrect = lynxfb_ops_fillrect,
 	.fb_copyarea = lynxfb_ops_copyarea,
 	.fb_imageblit = lynxfb_ops_imageblit,
+	__FB_DEFAULT_IOMEM_OPS_MMAP,
 };
 
 static const struct fb_ops lynxfb_ops_accel_with_cursor = {
 	.owner = THIS_MODULE,
+	__FB_DEFAULT_IOMEM_OPS_RDWR,
 	.fb_check_var =  lynxfb_ops_check_var,
 	.fb_set_par = lynxfb_ops_set_par,
 	.fb_setcolreg = lynxfb_ops_setcolreg,
@@ -711,6 +710,7 @@ static const struct fb_ops lynxfb_ops_accel_with_cursor = {
 	.fb_copyarea = lynxfb_ops_copyarea,
 	.fb_imageblit = lynxfb_ops_imageblit,
 	.fb_cursor = lynxfb_ops_cursor,
+	__FB_DEFAULT_IOMEM_OPS_MMAP,
 };
 
 static int lynxfb_set_fbinfo(struct fb_info *info, int index)
-- 
GitLab


From 27ad64eac10fcb25fcbfb813921f4d30b3458e13 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:56 +0100
Subject: [PATCH 0522/2340] fbdev: Rename FB_SYS_FOPS token to FB_SYSMEM_FOPS

Rename the token to harmonize naming among various helpers. For
example, I/O-memory helpers use FB_IOMEM_FOPS.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-28-tzimmermann@suse.de
---
 drivers/video/fbdev/Kconfig       | 4 ++--
 drivers/video/fbdev/core/Kconfig  | 6 +++---
 drivers/video/fbdev/core/Makefile | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 37c8188752baf..d5909a9206ffd 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1464,7 +1464,7 @@ config FB_VT8500
 	select FB_SYS_FILLRECT if (!FB_WMT_GE_ROPS)
 	select FB_SYS_COPYAREA if (!FB_WMT_GE_ROPS)
 	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_SYSMEM_FOPS
 	select FB_MODE_HELPERS
 	select VIDEOMODE_HELPERS
 	help
@@ -1477,7 +1477,7 @@ config FB_WM8505
 	select FB_SYS_FILLRECT if (!FB_WMT_GE_ROPS)
 	select FB_SYS_COPYAREA if (!FB_WMT_GE_ROPS)
 	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
+	select FB_SYSMEM_FOPS
 	select FB_MODE_HELPERS
 	select VIDEOMODE_HELPERS
 	help
diff --git a/drivers/video/fbdev/core/Kconfig b/drivers/video/fbdev/core/Kconfig
index 7a3ed13bed708..faab5d50cac3c 100644
--- a/drivers/video/fbdev/core/Kconfig
+++ b/drivers/video/fbdev/core/Kconfig
@@ -129,7 +129,7 @@ config FB_LITTLE_ENDIAN
 
 endchoice
 
-config FB_SYS_FOPS
+config FB_SYSMEM_FOPS
 	tristate
 	depends on FB_CORE
 
@@ -142,8 +142,8 @@ config FB_DMAMEM_HELPERS
 	depends on FB_CORE
 	select FB_SYS_COPYAREA
 	select FB_SYS_FILLRECT
-	select FB_SYS_FOPS
 	select FB_SYS_IMAGEBLIT
+	select FB_SYSMEM_FOPS
 
 config FB_IOMEM_FOPS
 	tristate
@@ -168,8 +168,8 @@ config FB_SYSMEM_HELPERS
 	depends on FB_CORE
 	select FB_SYS_COPYAREA
 	select FB_SYS_FILLRECT
-	select FB_SYS_FOPS
 	select FB_SYS_IMAGEBLIT
+	select FB_SYSMEM_FOPS
 
 config FB_SYSMEM_HELPERS_DEFERRED
 	bool
diff --git a/drivers/video/fbdev/core/Makefile b/drivers/video/fbdev/core/Makefile
index c1d657601b2be..d159747590869 100644
--- a/drivers/video/fbdev/core/Makefile
+++ b/drivers/video/fbdev/core/Makefile
@@ -32,6 +32,6 @@ obj-$(CONFIG_FB_IOMEM_FOPS)    += fb_io_fops.o
 obj-$(CONFIG_FB_SYS_FILLRECT)  += sysfillrect.o
 obj-$(CONFIG_FB_SYS_COPYAREA)  += syscopyarea.o
 obj-$(CONFIG_FB_SYS_IMAGEBLIT) += sysimgblt.o
-obj-$(CONFIG_FB_SYS_FOPS)      += fb_sys_fops.o
+obj-$(CONFIG_FB_SYSMEM_FOPS)   += fb_sys_fops.o
 obj-$(CONFIG_FB_SVGALIB)       += svgalib.o
 obj-$(CONFIG_FB_DDC)           += fb_ddc.o
-- 
GitLab


From 23dad7b95fea68426311405a0627b90c06c3fc34 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:57 +0100
Subject: [PATCH 0523/2340] fbdev: Remove trailing whitespaces

Fix coding style. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-29-tzimmermann@suse.de
---
 drivers/video/fbdev/sbuslib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c
index 21e9fd8e69e26..4d524db5c4f26 100644
--- a/drivers/video/fbdev/sbuslib.c
+++ b/drivers/video/fbdev/sbuslib.c
@@ -48,7 +48,7 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map,
 	unsigned long map_offset = 0;
 	unsigned long off;
 	int i;
-                                        
+
 	if (!(vma->vm_flags & (VM_SHARED | VM_MAYSHARE)))
 		return -EINVAL;
 
@@ -72,7 +72,7 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map,
 #define POFF_MASK	(PAGE_MASK|0x1UL)
 #else
 #define POFF_MASK	(PAGE_MASK)
-#endif				
+#endif
 				map_offset = (physbase + map[i].poff) & POFF_MASK;
 				break;
 			}
-- 
GitLab


From 76f92201b821dd2f442ebe37ec9b52b525855bac Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:58 +0100
Subject: [PATCH 0524/2340] fbdev: Push pgprot_decrypted() into mmap
 implementations

If a driver sets struct fb_ops.fb_mmap, the fbdev core automatically
calls pgprot_decrypted(). But the default fb_mmap code doesn't handle
pgprot_decrypted().

Move the call to pgprot_decrypted() into each drivers' fb_mmap function.
This only concerns fb_mmap functions for system and DMA memory. For
I/O memory, which is the default case, nothing changes. The fb_mmap
for I/O-memory can later be moved into a helper as well.

DRM's fbdev emulation handles pgprot_decrypted() internally via the
Prime helpers. Fbdev doesn't have to do anything in this case. In
cases where DRM uses deferred I/O, this patch updates fb_mmap correctly.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-30-tzimmermann@suse.de
---
 drivers/auxdisplay/cfag12864bfb.c              | 2 ++
 drivers/auxdisplay/ht16k33.c                   | 2 ++
 drivers/video/fbdev/amba-clcd.c                | 2 ++
 drivers/video/fbdev/au1100fb.c                 | 2 ++
 drivers/video/fbdev/au1200fb.c                 | 2 ++
 drivers/video/fbdev/core/fb_chrdev.c           | 5 -----
 drivers/video/fbdev/core/fb_defio.c            | 2 ++
 drivers/video/fbdev/ep93xx-fb.c                | 2 ++
 drivers/video/fbdev/gbefb.c                    | 2 ++
 drivers/video/fbdev/omap/omapfb_main.c         | 2 ++
 drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 2 ++
 drivers/video/fbdev/ps3fb.c                    | 2 ++
 drivers/video/fbdev/sa1100fb.c                 | 2 ++
 drivers/video/fbdev/sbuslib.c                  | 1 +
 drivers/video/fbdev/sh_mobile_lcdcfb.c         | 4 ++++
 drivers/video/fbdev/smscufx.c                  | 2 ++
 drivers/video/fbdev/udlfb.c                    | 2 ++
 drivers/video/fbdev/vermilion/vermilion.c      | 2 ++
 drivers/video/fbdev/vfb.c                      | 2 ++
 19 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index ede0f9a513110..5ba19c339f088 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -51,6 +51,8 @@ static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	struct page *pages = virt_to_page(cfag12864b_buffer);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return vm_map_pages_zero(vma, &pages, 1);
 }
 
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index 2f1dc6b4e2765..a90430b7d07ba 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -351,6 +351,8 @@ static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct ht16k33_priv *priv = info->par;
 	struct page *pages = virt_to_page(priv->fbdev.buffer);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return vm_map_pages_zero(vma, &pages, 1);
 }
 
diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 0399db369e709..47d373f04f3f0 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -829,6 +829,8 @@ static int clcdfb_of_dma_setup(struct clcd_fb *fb)
 
 static int clcdfb_of_dma_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
 			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c
index a9c8d33a6ef71..08109ce535cd4 100644
--- a/drivers/video/fbdev/au1100fb.c
+++ b/drivers/video/fbdev/au1100fb.c
@@ -342,6 +342,8 @@ int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 {
 	struct au1100fb_device *fbdev = to_au1100fb_device(fbi);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6
 
 	return dma_mmap_coherent(fbdev->dev, vma, fbdev->fb_mem, fbdev->fb_phys,
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 16ebbab500972..6f20efc663d7e 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1236,6 +1236,8 @@ static int au1200fb_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	struct au1200fb_device *fbdev = info->par;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return dma_mmap_coherent(fbdev->dev, vma,
 				 fbdev->fb_mem, fbdev->fb_phys, fbdev->fb_len);
 }
diff --git a/drivers/video/fbdev/core/fb_chrdev.c b/drivers/video/fbdev/core/fb_chrdev.c
index 32a7315b4b6dd..b73a122950a94 100644
--- a/drivers/video/fbdev/core/fb_chrdev.c
+++ b/drivers/video/fbdev/core/fb_chrdev.c
@@ -325,11 +325,6 @@ static int fb_mmap(struct file *file, struct vm_area_struct *vma)
 	if (info->fbops->fb_mmap) {
 		int res;
 
-		/*
-		 * The framebuffer needs to be accessed decrypted, be sure
-		 * SME protection is removed ahead of the call
-		 */
-		vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 		res = info->fbops->fb_mmap(info, vma);
 		mutex_unlock(&info->mm_lock);
 		return res;
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 274f5d0fa2471..1b0b85e59e5e1 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -227,6 +227,8 @@ static const struct address_space_operations fb_deferred_io_aops = {
 
 int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	vma->vm_ops = &fb_deferred_io_vm_ops;
 	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
 	if (!(info->flags & FBINFO_VIRTFB))
diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c
index cae00deee0014..3e378874ccc79 100644
--- a/drivers/video/fbdev/ep93xx-fb.c
+++ b/drivers/video/fbdev/ep93xx-fb.c
@@ -311,6 +311,8 @@ static int ep93xxfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	unsigned int offset = vma->vm_pgoff << PAGE_SHIFT;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	if (offset < info->fix.smem_len) {
 		return dma_mmap_wc(info->device, vma, info->screen_base,
 				   info->fix.smem_start, info->fix.smem_len);
diff --git a/drivers/video/fbdev/gbefb.c b/drivers/video/fbdev/gbefb.c
index e89e5579258ef..8463de833d1e1 100644
--- a/drivers/video/fbdev/gbefb.c
+++ b/drivers/video/fbdev/gbefb.c
@@ -1000,6 +1000,8 @@ static int gbefb_mmap(struct fb_info *info,
 	unsigned long phys_addr, phys_size;
 	u16 *tile;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	/* check range */
 	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
 		return -EINVAL;
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 694cf6318782b..aa31c0d26e925 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1203,6 +1203,8 @@ static int omapfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct omapfb_device *fbdev = plane->fbdev;
 	int r;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	omapfb_rqueue_lock(fbdev);
 	r = fbdev->ctrl->mmap(info, vma);
 	omapfb_rqueue_unlock(fbdev);
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
index c9fd0ad352d7f..0db9c55fce5a2 100644
--- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
@@ -1095,6 +1095,8 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 	u32 len;
 	int r;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	rg = omapfb_get_mem_region(ofbi->region);
 
 	start = omapfb_get_region_paddr(ofbi);
diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c
index de8d78bf070a0..dbcda307f6a67 100644
--- a/drivers/video/fbdev/ps3fb.c
+++ b/drivers/video/fbdev/ps3fb.c
@@ -708,6 +708,8 @@ static int ps3fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	int r;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	r = vm_iomap_memory(vma, info->fix.smem_start, info->fix.smem_len);
 
 	dev_dbg(info->device, "ps3fb: mmap framebuffer P(%lx)->V(%lx)\n",
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index befd3fe2f6596..0d362d2bf0e38 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -562,6 +562,8 @@ static int sa1100fb_mmap(struct fb_info *info,
 		container_of(info, struct sa1100fb_info, fb);
 	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	if (off < info->fix.smem_len) {
 		vma->vm_pgoff += 1; /* skip over the palette */
 		return dma_mmap_wc(fbi->dev, vma, fbi->map_cpu, fbi->map_dma,
diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c
index 4d524db5c4f26..634e3d159452c 100644
--- a/drivers/video/fbdev/sbuslib.c
+++ b/drivers/video/fbdev/sbuslib.c
@@ -60,6 +60,7 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map,
 
 	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	/* Each page, see which map applies */
diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index d84628de51893..eb2297b37504c 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c
@@ -1482,6 +1482,8 @@ sh_mobile_lcdc_overlay_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	if (info->fbdefio)
 		return fb_deferred_io_mmap(info, vma);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return dma_mmap_coherent(ovl->channel->lcdc->dev, vma, ovl->fb_mem,
 				 ovl->dma_handle, ovl->fb_size);
 }
@@ -1956,6 +1958,8 @@ sh_mobile_lcdc_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	if (info->fbdefio)
 		return fb_deferred_io_mmap(info, vma);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return dma_mmap_coherent(ch->lcdc->dev, vma, ch->fb_mem,
 				 ch->dma_handle, ch->fb_size);
 }
diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index 90a77d19b236b..35d682b110c42 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -783,6 +783,8 @@ static int ufx_ops_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	if (info->fbdefio)
 		return fb_deferred_io_mmap(info, vma);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
 		return -EINVAL;
 	if (size > info->fix.smem_len)
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 2460ff4ac86b4..1514ddac4cafc 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -331,6 +331,8 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	if (info->fbdefio)
 		return fb_deferred_io_mmap(info, vma);
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
 		return -EINVAL;
 	if (size > info->fix.smem_len)
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 840ead69654b8..a087b42ca652f 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -998,6 +998,8 @@ static int vmlfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	int ret;
 	unsigned long prot;
 
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	ret = vmlfb_vram_offset(vinfo, offset);
 	if (ret)
 		return -EINVAL;
diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c
index f6140f247e4b0..f86149ba38352 100644
--- a/drivers/video/fbdev/vfb.c
+++ b/drivers/video/fbdev/vfb.c
@@ -382,6 +382,8 @@ static int vfb_pan_display(struct fb_var_screeninfo *var,
 static int vfb_mmap(struct fb_info *info,
 		    struct vm_area_struct *vma)
 {
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
 	return remap_vmalloc_range(vma, (void *)info->fix.smem_start, vma->vm_pgoff);
 }
 
-- 
GitLab


From 33253d9e01d40542f07aaf718a655893cd2949e5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:15:59 +0100
Subject: [PATCH 0525/2340] fbdev: Move default fb_mmap code into helper
 function

Move the default fb_mmap code for I/O address spaces into the helper
function fb_io_mmap(). The helper can either be called via struct
fb_ops.fb_mmap or as the default if no fb_mmap has been set. Also
set the new helper in __FB_DEFAULT_IOMEM_OPS_MMAP.

In the mid-term, fb_io_mmap() is supposed to become optional. Fbdev
drivers will initialize their struct fb_ops.fb_mmap to the helper
and select a corresponding Kconfig token. The helper can then be made
optional at compile time.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-31-tzimmermann@suse.de
---
 drivers/video/fbdev/core/fb_chrdev.c  | 36 +++++----------------------
 drivers/video/fbdev/core/fb_io_fops.c | 27 ++++++++++++++++++++
 include/linux/fb.h                    |  3 ++-
 3 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/drivers/video/fbdev/core/fb_chrdev.c b/drivers/video/fbdev/core/fb_chrdev.c
index b73a122950a94..089441c9d810f 100644
--- a/drivers/video/fbdev/core/fb_chrdev.c
+++ b/drivers/video/fbdev/core/fb_chrdev.c
@@ -314,20 +314,16 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd,
 static int fb_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct fb_info *info = file_fb_info(file);
-	unsigned long mmio_pgoff;
-	unsigned long start;
-	u32 len;
+	int res;
 
 	if (!info)
 		return -ENODEV;
+
 	mutex_lock(&info->mm_lock);
 
 	if (info->fbops->fb_mmap) {
-		int res;
 
 		res = info->fbops->fb_mmap(info, vma);
-		mutex_unlock(&info->mm_lock);
-		return res;
 #if IS_ENABLED(CONFIG_FB_DEFERRED_IO)
 	} else if (info->fbdefio) {
 		/*
@@ -335,35 +331,15 @@ static int fb_mmap(struct file *file, struct vm_area_struct *vma)
 		 * minimum, point struct fb_ops.fb_mmap to fb_deferred_io_mmap().
 		 */
 		dev_warn_once(info->dev, "fbdev mmap not set up for deferred I/O.\n");
-		mutex_unlock(&info->mm_lock);
-		return -ENODEV;
+		res = -ENODEV;
 #endif
+	} else {
+		res = fb_io_mmap(info, vma);
 	}
 
-	/*
-	 * Ugh. This can be either the frame buffer mapping, or
-	 * if pgoff points past it, the mmio mapping.
-	 */
-	start = info->fix.smem_start;
-	len = info->fix.smem_len;
-	mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
-	if (vma->vm_pgoff >= mmio_pgoff) {
-		if (info->var.accel_flags) {
-			mutex_unlock(&info->mm_lock);
-			return -EINVAL;
-		}
-
-		vma->vm_pgoff -= mmio_pgoff;
-		start = info->fix.mmio_start;
-		len = info->fix.mmio_len;
-	}
 	mutex_unlock(&info->mm_lock);
 
-	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-	vma->vm_page_prot = pgprot_framebuffer(vma->vm_page_prot, vma->vm_start,
-					       vma->vm_end, start);
-
-	return vm_iomap_memory(vma, start, len);
+	return res;
 }
 
 static int fb_open(struct inode *inode, struct file *file)
diff --git a/drivers/video/fbdev/core/fb_io_fops.c b/drivers/video/fbdev/core/fb_io_fops.c
index 871b829521af3..60805e43914e5 100644
--- a/drivers/video/fbdev/core/fb_io_fops.c
+++ b/drivers/video/fbdev/core/fb_io_fops.c
@@ -132,5 +132,32 @@ ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count,
 }
 EXPORT_SYMBOL(fb_io_write);
 
+int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	unsigned long start = info->fix.smem_start;
+	u32 len = info->fix.smem_len;
+	unsigned long mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
+
+	/*
+	 * This can be either the framebuffer mapping, or if pgoff points
+	 * past it, the mmio mapping.
+	 */
+	if (vma->vm_pgoff >= mmio_pgoff) {
+		if (info->var.accel_flags)
+			return -EINVAL;
+
+		vma->vm_pgoff -= mmio_pgoff;
+		start = info->fix.mmio_start;
+		len = info->fix.mmio_len;
+	}
+
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	vma->vm_page_prot = pgprot_framebuffer(vma->vm_page_prot, vma->vm_start,
+					       vma->vm_end, start);
+
+	return vm_iomap_memory(vma, start, len);
+}
+EXPORT_SYMBOL(fb_io_mmap);
+
 MODULE_DESCRIPTION("Fbdev helpers for framebuffers in I/O memory");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 94e2c44c65699..a36d05b576b0c 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -536,6 +536,7 @@ extern ssize_t fb_io_read(struct fb_info *info, char __user *buf,
 			  size_t count, loff_t *ppos);
 extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf,
 			   size_t count, loff_t *ppos);
+int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
 
 #define __FB_DEFAULT_IOMEM_OPS_RDWR \
 	.fb_read	= fb_io_read, \
@@ -547,7 +548,7 @@ extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf,
 	.fb_imageblit	= cfb_imageblit
 
 #define __FB_DEFAULT_IOMEM_OPS_MMAP \
-	.fb_mmap	= NULL /* default implementation */
+	.fb_mmap	= fb_io_mmap
 
 #define FB_DEFAULT_IOMEM_OPS \
 	__FB_DEFAULT_IOMEM_OPS_RDWR, \
-- 
GitLab


From b3e8813773c568fd2d65e9752abfda27442e502e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:16:00 +0100
Subject: [PATCH 0526/2340] fbdev: Warn on incorrect framebuffer access

Test in framebuffer read, write and drawing helpers if FBINFO_VIRTFB
has been set correctly. Framebuffers in I/O memory should only be
accessed with the architecture's respective helpers. Framebuffers
in system memory should be accessed with the regular load and
store operations. Presumably not all drivers get this right, so we
now warn about it.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-32-tzimmermann@suse.de
---
 drivers/video/fbdev/core/cfbcopyarea.c | 3 +++
 drivers/video/fbdev/core/cfbfillrect.c | 3 +++
 drivers/video/fbdev/core/cfbimgblt.c   | 3 +++
 drivers/video/fbdev/core/fb_io_fops.c  | 9 +++++++++
 drivers/video/fbdev/core/fb_sys_fops.c | 6 ++++++
 drivers/video/fbdev/core/syscopyarea.c | 3 +++
 drivers/video/fbdev/core/sysfillrect.c | 3 +++
 drivers/video/fbdev/core/sysimgblt.c   | 3 +++
 include/linux/fb.h                     | 8 +++++++-
 9 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/core/cfbcopyarea.c b/drivers/video/fbdev/core/cfbcopyarea.c
index 5b80bf3dae504..a271f57d9c6c1 100644
--- a/drivers/video/fbdev/core/cfbcopyarea.c
+++ b/drivers/video/fbdev/core/cfbcopyarea.c
@@ -391,6 +391,9 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
 
+	if (p->flags & FBINFO_VIRTFB)
+		fb_warn_once(p, "Framebuffer is not in I/O address space.");
+
 	/* if the beginning of the target area might overlap with the end of
 	the source area, be have to copy the area reverse. */
 	if ((dy == sy && dx > sx) || (dy > sy)) {
diff --git a/drivers/video/fbdev/core/cfbfillrect.c b/drivers/video/fbdev/core/cfbfillrect.c
index ba9f58b2a5e86..cbaa4c9e2355a 100644
--- a/drivers/video/fbdev/core/cfbfillrect.c
+++ b/drivers/video/fbdev/core/cfbfillrect.c
@@ -287,6 +287,9 @@ void cfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
 
+	if (p->flags & FBINFO_VIRTFB)
+		fb_warn_once(p, "Framebuffer is not in I/O address space.");
+
 	if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    p->fix.visual == FB_VISUAL_DIRECTCOLOR )
 		fg = ((u32 *) (p->pseudo_palette))[rect->color];
diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
index 9ebda4e0dc7ab..7d1d2f1a627dc 100644
--- a/drivers/video/fbdev/core/cfbimgblt.c
+++ b/drivers/video/fbdev/core/cfbimgblt.c
@@ -326,6 +326,9 @@ void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
 
+	if (p->flags & FBINFO_VIRTFB)
+		fb_warn_once(p, "Framebuffer is not in I/O address space.");
+
 	bitstart = (dy * p->fix.line_length * 8) + (dx * bpp);
 	start_index = bitstart & (32 - 1);
 	pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
diff --git a/drivers/video/fbdev/core/fb_io_fops.c b/drivers/video/fbdev/core/fb_io_fops.c
index 60805e43914e5..3408ff1b2b7a0 100644
--- a/drivers/video/fbdev/core/fb_io_fops.c
+++ b/drivers/video/fbdev/core/fb_io_fops.c
@@ -12,6 +12,9 @@ ssize_t fb_io_read(struct fb_info *info, char __user *buf, size_t count, loff_t
 	int c, cnt = 0, err = 0;
 	unsigned long total_size, trailing;
 
+	if (info->flags & FBINFO_VIRTFB)
+		fb_warn_once(info, "Framebuffer is not in I/O address space.");
+
 	if (!info->screen_base)
 		return -ENODEV;
 
@@ -73,6 +76,9 @@ ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count,
 	int c, cnt = 0, err = 0;
 	unsigned long total_size, trailing;
 
+	if (info->flags & FBINFO_VIRTFB)
+		fb_warn_once(info, "Framebuffer is not in I/O address space.");
+
 	if (!info->screen_base)
 		return -ENODEV;
 
@@ -138,6 +144,9 @@ int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	u32 len = info->fix.smem_len;
 	unsigned long mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
 
+	if (info->flags & FBINFO_VIRTFB)
+		fb_warn_once(info, "Framebuffer is not in I/O address space.");
+
 	/*
 	 * This can be either the framebuffer mapping, or if pgoff points
 	 * past it, the mmio mapping.
diff --git a/drivers/video/fbdev/core/fb_sys_fops.c b/drivers/video/fbdev/core/fb_sys_fops.c
index 0cb0989abda61..a9aa6519a5b30 100644
--- a/drivers/video/fbdev/core/fb_sys_fops.c
+++ b/drivers/video/fbdev/core/fb_sys_fops.c
@@ -22,6 +22,9 @@ ssize_t fb_sys_read(struct fb_info *info, char __user *buf, size_t count,
 	unsigned long total_size, c;
 	ssize_t ret;
 
+	if (!(info->flags & FBINFO_VIRTFB))
+		fb_warn_once(info, "Framebuffer is not in virtual address space.");
+
 	if (!info->screen_buffer)
 		return -ENODEV;
 
@@ -64,6 +67,9 @@ ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 	unsigned long total_size, c;
 	size_t ret;
 
+	if (!(info->flags & FBINFO_VIRTFB))
+		fb_warn_once(info, "Framebuffer is not in virtual address space.");
+
 	if (!info->screen_buffer)
 		return -ENODEV;
 
diff --git a/drivers/video/fbdev/core/syscopyarea.c b/drivers/video/fbdev/core/syscopyarea.c
index 7b8bd3a2bedc5..75e7001e8450f 100644
--- a/drivers/video/fbdev/core/syscopyarea.c
+++ b/drivers/video/fbdev/core/syscopyarea.c
@@ -324,6 +324,9 @@ void sys_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
 
+	if (!(p->flags & FBINFO_VIRTFB))
+		fb_warn_once(p, "Framebuffer is not in virtual address space.");
+
 	/* if the beginning of the target area might overlap with the end of
 	the source area, be have to copy the area reverse. */
 	if ((dy == sy && dx > sx) || (dy > sy)) {
diff --git a/drivers/video/fbdev/core/sysfillrect.c b/drivers/video/fbdev/core/sysfillrect.c
index bcdcaeae6538c..e49221a88ccc7 100644
--- a/drivers/video/fbdev/core/sysfillrect.c
+++ b/drivers/video/fbdev/core/sysfillrect.c
@@ -242,6 +242,9 @@ void sys_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
 
+	if (!(p->flags & FBINFO_VIRTFB))
+		fb_warn_once(p, "Framebuffer is not in virtual address space.");
+
 	if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    p->fix.visual == FB_VISUAL_DIRECTCOLOR )
 		fg = ((u32 *) (p->pseudo_palette))[rect->color];
diff --git a/drivers/video/fbdev/core/sysimgblt.c b/drivers/video/fbdev/core/sysimgblt.c
index 665ef7a0a2495..6949bbd51d920 100644
--- a/drivers/video/fbdev/core/sysimgblt.c
+++ b/drivers/video/fbdev/core/sysimgblt.c
@@ -296,6 +296,9 @@ void sys_imageblit(struct fb_info *p, const struct fb_image *image)
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
 
+	if (!(p->flags & FBINFO_VIRTFB))
+		fb_warn_once(p, "Framebuffer is not in virtual address space.");
+
 	bitstart = (dy * p->fix.line_length * 8) + (dx * bpp);
 	start_index = bitstart & (32 - 1);
 	pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index a36d05b576b0c..24f0ec3662352 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -849,7 +849,10 @@ static inline bool fb_modesetting_disabled(const char *drvname)
 }
 #endif
 
-/* Convenience logging macros */
+/*
+ * Convenience logging macros
+ */
+
 #define fb_err(fb_info, fmt, ...)					\
 	pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
 #define fb_notice(info, fmt, ...)					\
@@ -861,4 +864,7 @@ static inline bool fb_modesetting_disabled(const char *drvname)
 #define fb_dbg(fb_info, fmt, ...)					\
 	pr_debug("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
 
+#define fb_warn_once(fb_info, fmt, ...)					\
+	pr_warn_once("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+
 #endif /* _LINUX_FB_H */
-- 
GitLab


From 8813e86f6d82a7931446c3cbc5d596f77d0f1ba6 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 27 Nov 2023 14:16:01 +0100
Subject: [PATCH 0527/2340] fbdev: Remove default file-I/O implementations

Drop the default implementations for file read, write and mmap
operations. Each fbdev driver must now provide an implementation
and select any necessary helpers. If no implementation has been
set, fbdev returns an errno code to user space. The code is the
same as if the operation had not been set in the file_operations
struct.

This change makes the fbdev helpers for I/O memory optional. Most
systems only use system-memory framebuffers via DRM's fbdev emulation.

v2:
	* warn once if I/O callbacks are missing (Javier)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127131655.4020-33-tzimmermann@suse.de
---
 drivers/video/fbdev/core/Kconfig     |  1 -
 drivers/video/fbdev/core/fb_chrdev.c | 37 +++++++++-------------------
 include/linux/fb.h                   |  5 ++++
 3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/drivers/video/fbdev/core/Kconfig b/drivers/video/fbdev/core/Kconfig
index faab5d50cac3c..21053bf00dc58 100644
--- a/drivers/video/fbdev/core/Kconfig
+++ b/drivers/video/fbdev/core/Kconfig
@@ -4,7 +4,6 @@
 #
 
 config FB_CORE
-	select FB_IOMEM_FOPS
 	select VIDEO_CMDLINE
 	tristate
 
diff --git a/drivers/video/fbdev/core/fb_chrdev.c b/drivers/video/fbdev/core/fb_chrdev.c
index 089441c9d810f..4ebd16b7e3b8d 100644
--- a/drivers/video/fbdev/core/fb_chrdev.c
+++ b/drivers/video/fbdev/core/fb_chrdev.c
@@ -34,13 +34,13 @@ static ssize_t fb_read(struct file *file, char __user *buf, size_t count, loff_t
 	if (!info)
 		return -ENODEV;
 
+	if (fb_WARN_ON_ONCE(info, !info->fbops->fb_read))
+		return -EINVAL;
+
 	if (info->state != FBINFO_STATE_RUNNING)
 		return -EPERM;
 
-	if (info->fbops->fb_read)
-		return info->fbops->fb_read(info, buf, count, ppos);
-
-	return fb_io_read(info, buf, count, ppos);
+	return info->fbops->fb_read(info, buf, count, ppos);
 }
 
 static ssize_t fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
@@ -50,13 +50,13 @@ static ssize_t fb_write(struct file *file, const char __user *buf, size_t count,
 	if (!info)
 		return -ENODEV;
 
+	if (fb_WARN_ON_ONCE(info, !info->fbops->fb_write))
+		return -EINVAL;
+
 	if (info->state != FBINFO_STATE_RUNNING)
 		return -EPERM;
 
-	if (info->fbops->fb_write)
-		return info->fbops->fb_write(info, buf, count, ppos);
-
-	return fb_io_write(info, buf, count, ppos);
+	return info->fbops->fb_write(info, buf, count, ppos);
 }
 
 static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
@@ -319,24 +319,11 @@ static int fb_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!info)
 		return -ENODEV;
 
-	mutex_lock(&info->mm_lock);
-
-	if (info->fbops->fb_mmap) {
-
-		res = info->fbops->fb_mmap(info, vma);
-#if IS_ENABLED(CONFIG_FB_DEFERRED_IO)
-	} else if (info->fbdefio) {
-		/*
-		 * FB deferred I/O wants you to handle mmap in your drivers. At a
-		 * minimum, point struct fb_ops.fb_mmap to fb_deferred_io_mmap().
-		 */
-		dev_warn_once(info->dev, "fbdev mmap not set up for deferred I/O.\n");
-		res = -ENODEV;
-#endif
-	} else {
-		res = fb_io_mmap(info, vma);
-	}
+	if (fb_WARN_ON_ONCE(info, !info->fbops->fb_mmap))
+		return -ENODEV;
 
+	mutex_lock(&info->mm_lock);
+	res = info->fbops->fb_mmap(info, vma);
 	mutex_unlock(&info->mm_lock);
 
 	return res;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 24f0ec3662352..05dc9624897df 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -867,4 +867,9 @@ static inline bool fb_modesetting_disabled(const char *drvname)
 #define fb_warn_once(fb_info, fmt, ...)					\
 	pr_warn_once("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
 
+#define fb_WARN_ONCE(fb_info, condition, fmt, ...) \
+	WARN_ONCE(condition, "fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_WARN_ON_ONCE(fb_info, x) \
+	fb_WARN_ONCE(fb_info, (x), "%s", "fb_WARN_ON_ONCE(" __stringify(x) ")")
+
 #endif /* _LINUX_FB_H */
-- 
GitLab


From fcebbe2fa3443e400657d71182610219750d1c1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 24 Nov 2023 10:27:31 +0200
Subject: [PATCH 0528/2340] drm/i915/psr: Include some basic PSR information in
 the state dump
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently no one can figure out what the PSR code is doing since
we're including any of it in the basic state dump. Add at least the
bare minimum there.

v2: Also dump has_panel_replay (Jouni)

Cc: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124082735.25470-1-ville.syrjala@linux.intel.com
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
---
 drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
index 30336453041d1..30b8ddeae8cef 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
@@ -264,6 +264,12 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config,
 
 		drm_dbg_kms(&i915->drm, "sdp split: %s\n",
 			    str_enabled_disabled(pipe_config->sdp_split_enable));
+
+		drm_dbg_kms(&i915->drm, "psr: %s, psr2: %s, panel replay: %s, selective fetch: %s\n",
+			    str_enabled_disabled(pipe_config->has_psr),
+			    str_enabled_disabled(pipe_config->has_psr2),
+			    str_enabled_disabled(pipe_config->has_panel_replay),
+			    str_enabled_disabled(pipe_config->enable_psr2_sel_fetch));
 	}
 
 	drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n",
-- 
GitLab


From e0ef2daa8ca8ce4dbc2fd0959e383b753a87fd7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 27 Nov 2023 16:50:25 +0200
Subject: [PATCH 0529/2340] drm/i915: Skip some timing checks on BXT/GLK DSI
 transcoders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apparently some BXT/GLK systems have DSI panels whose timings
don't agree with the normal cpu transcoder hblank>=32 limitation.
This is perhaps fine as there are no specific hblank/etc. limits
listed for the BXT/GLK DSI transcoders.

Move those checks out from the global intel_mode_valid() into
into connector specific .mode_valid() hooks, skipping BXT/GLK
DSI connectors. We'll leave the basic [hv]display/[hv]total
checks in intel_mode_valid() as those seem like sensible upper
limits regardless of the transcoder used.

Cc: stable@vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9720
Fixes: 8f4b1068e7fc ("drm/i915: Check some transcoder timing minimum limits")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-1-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/icl_dsi.c       |  7 +++++++
 drivers/gpu/drm/i915/display/intel_crt.c     |  5 +++++
 drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++++
 drivers/gpu/drm/i915/display/intel_display.h |  3 +++
 drivers/gpu/drm/i915/display/intel_dp.c      |  4 ++++
 drivers/gpu/drm/i915/display/intel_dp_mst.c  |  4 ++++
 drivers/gpu/drm/i915/display/intel_dvo.c     |  6 ++++++
 drivers/gpu/drm/i915/display/intel_hdmi.c    |  4 ++++
 drivers/gpu/drm/i915/display/intel_lvds.c    |  5 +++++
 drivers/gpu/drm/i915/display/intel_sdvo.c    |  8 +++++++-
 drivers/gpu/drm/i915/display/intel_tv.c      |  8 +++++++-
 drivers/gpu/drm/i915/display/vlv_dsi.c       | 18 +++++++++++++++++-
 12 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 481fcb6508503..ac456a2275dba 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1440,6 +1440,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state,
 static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector,
 						 struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	/* FIXME: DSC? */
 	return intel_dsi_mode_valid(connector, mode);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 0e33a0523a759..abaacea5c2cc4 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector,
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int max_dotclk = dev_priv->max_dotclk_freq;
+	enum drm_mode_status status;
 	int max_clock;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 5cf162628b95e..23b077f436146 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7734,6 +7734,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev,
 	    mode->vtotal > vtotal_max)
 		return MODE_V_ILLEGAL;
 
+	return MODE_OK;
+}
+
+enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv,
+						     const struct drm_display_mode *mode)
+{
+	/*
+	 * Additional transcoder timing limits,
+	 * excluding BXT/GLK DSI transcoders.
+	 */
 	if (DISPLAY_VER(dev_priv) >= 5) {
 		if (mode->hdisplay < 64 ||
 		    mode->htotal - mode->hdisplay < 32)
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 8548f49e39722..f4a0773f0fca8 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -402,6 +402,9 @@ enum drm_mode_status
 intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
 				const struct drm_display_mode *mode,
 				bool bigjoiner);
+enum drm_mode_status
+intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915,
+				const struct drm_display_mode *mode);
 enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port);
 bool is_trans_port_sync_mode(const struct intel_crtc_state *state);
 bool is_trans_port_sync_master(const struct intel_crtc_state *state);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index f9b5fc2fdbcb9..68b2e69dca390 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1227,6 +1227,10 @@ intel_dp_mode_valid(struct drm_connector *_connector,
 	enum drm_mode_status status;
 	bool dsc = false, bigjoiner = false;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return MODE_H_ILLEGAL;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 63364c9602efb..0514f825baf5e 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1282,6 +1282,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 		return 0;
 	}
 
+	*status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (*status != MODE_OK)
+		return 0;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN) {
 		*status = MODE_NO_DBLESCAN;
 		return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index 55d6743374bdd..9111e9d46486d 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector,
 		     struct drm_display_mode *mode)
 {
 	struct intel_connector *connector = to_intel_connector(_connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_fixed_mode(connector, mode);
 	int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq;
 	int target_clock = mode->clock;
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index ab18cfc19c0a3..39e4f5f7c8171 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1983,6 +1983,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 	bool ycbcr_420_only;
 	enum intel_output_format sink_format;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
 		clock *= 2;
 
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 4b114fde57b18..958b6fd0d7416 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector,
 		      struct drm_display_mode *mode)
 {
 	struct intel_connector *connector = to_intel_connector(_connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_fixed_mode(connector, mode);
 	int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq;
 	enum drm_mode_status status;
 
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index c7af7e0461885..0362d02d70b6a 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1930,13 +1930,19 @@ static enum drm_mode_status
 intel_sdvo_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
-	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 	bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state);
+	int max_dotclk = i915->max_dotclk_freq;
+	enum drm_mode_status status;
 	int clock = mode->clock;
 
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 31a79fdfc8128..2ee4f0d958513 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -958,8 +958,14 @@ static enum drm_mode_status
 intel_tv_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
-	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
+	int max_dotclk = i915->max_dotclk_freq;
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index bda49734ca33d..5bda97c96810b 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1544,9 +1544,25 @@ static const struct drm_encoder_funcs intel_dsi_funcs = {
 	.destroy = intel_dsi_encoder_destroy,
 };
 
+static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector,
+					       struct drm_display_mode *mode)
+{
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+		enum drm_mode_status status;
+
+		status = intel_cpu_transcoder_mode_valid(i915, mode);
+		if (status != MODE_OK)
+			return status;
+	}
+
+	return intel_dsi_mode_valid(connector, mode);
+}
+
 static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = {
 	.get_modes = intel_dsi_get_modes,
-	.mode_valid = intel_dsi_mode_valid,
+	.mode_valid = vlv_dsi_mode_valid,
 	.atomic_check = intel_digital_connector_atomic_check,
 };
 
-- 
GitLab


From c1799032d2ef6616113b733428dfaa2199a5604b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 27 Nov 2023 16:50:26 +0200
Subject: [PATCH 0530/2340] drm/i915/mst: Fix .mode_valid_ctx() return values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.mode_valid_ctx() returns an errno, not the mode status. Fix
the code to do the right thing.

Cc: stable@vger.kernel.org
Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-2-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 0514f825baf5e..0680a42f7d2a9 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1366,11 +1366,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	 * Big joiner configuration needs DSC for TGL which is not true for
 	 * XE_LPD where uncompressed joiner is supported.
 	 */
-	if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc)
-		return MODE_CLOCK_HIGH;
+	if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) {
+		*status = MODE_CLOCK_HIGH;
+		return 0;
+	}
 
-	if (mode_rate > max_rate && !dsc)
-		return MODE_CLOCK_HIGH;
+	if (mode_rate > max_rate && !dsc) {
+		*status = MODE_CLOCK_HIGH;
+		return 0;
+	}
 
 	*status = intel_mode_valid_max_plane_size(dev_priv, mode, false);
 	return 0;
-- 
GitLab


From 9c058492b16f90bb772cb0dad567e8acc68e155d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 27 Nov 2023 16:50:27 +0200
Subject: [PATCH 0531/2340] drm/i915/mst: Reject modes that require the
 bigjoiner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have no bigjoiner support in the MST code, so .mode_valid()
pretending otherwise is just going to result black screens for
users. Reject any mode that needs the joiner.

Cc: stable@vger.kernel.org
Cc: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-3-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 0680a42f7d2a9..b665fe6ef871b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1332,6 +1332,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) {
 		bigjoiner = true;
 		max_dotclk *= 2;
+
+		/* TODO: add support for bigjoiner */
+		*status = MODE_CLOCK_HIGH;
+		return 0;
 	}
 
 	if (DISPLAY_VER(dev_priv) >= 10 &&
-- 
GitLab


From 8dfce5f3095b79236b585bfa0e291b77ba4b6dbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 27 Nov 2023 16:50:28 +0200
Subject: [PATCH 0532/2340] drm/i915: Clean up some DISPLAY_VER checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the >= and < operators for the DISPLAY_VER checks everywhere.
This is what most of the code does, but especially recently random
pieces of code have started doing this differently for no good reason.

Conversion done with the following cocci:
@find@
expression i915;
constant ver;
@@
(
DISPLAY_VER(i915) <= ver
|
DISPLAY_VER(i915) > ver
)

@script:python inc@
old_ver << find.ver;
new_ver;
@@
coccinelle.new_ver = str(int(old_ver) + 1)

@@
expression find.i915;
constant find.ver;
identifier inc.new_ver;
@@
(
- DISPLAY_VER(i915) <= ver
+ DISPLAY_VER(i915) < new_ver
|
- DISPLAY_VER(i915) > ver
+ DISPLAY_VER(i915) >= new_ver
)

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-4-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/i9xx_wm.c              | 8 ++++----
 drivers/gpu/drm/i915/display/intel_bw.c             | 7 ++++---
 drivers/gpu/drm/i915/display/intel_cdclk.c          | 2 +-
 drivers/gpu/drm/i915/display/intel_display.c        | 8 ++++----
 drivers/gpu/drm/i915/display/intel_display_device.h | 2 +-
 drivers/gpu/drm/i915/display/intel_display_irq.c    | 2 +-
 drivers/gpu/drm/i915/display/intel_dp.c             | 2 +-
 drivers/gpu/drm/i915/display/intel_dp_mst.c         | 2 +-
 drivers/gpu/drm/i915/display/intel_lvds.c           | 2 +-
 drivers/gpu/drm/i915/display/intel_psr.c            | 8 ++++----
 10 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c
index b37c0d02d5000..03e8fb6caa83f 100644
--- a/drivers/gpu/drm/i915/display/i9xx_wm.c
+++ b/drivers/gpu/drm/i915/display/i9xx_wm.c
@@ -2477,7 +2477,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
 		 * FIFO size is only half of the self
 		 * refresh FIFO size on ILK/SNB.
 		 */
-		if (DISPLAY_VER(dev_priv) <= 6)
+		if (DISPLAY_VER(dev_priv) < 7)
 			fifo_size /= 2;
 	}
 
@@ -2818,7 +2818,7 @@ static int ilk_compute_pipe_wm(struct intel_atomic_state *state,
 	usable_level = dev_priv->display.wm.num_levels - 1;
 
 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
-	if (DISPLAY_VER(dev_priv) <= 6 && pipe_wm->sprites_enabled)
+	if (DISPLAY_VER(dev_priv) < 7 && pipe_wm->sprites_enabled)
 		usable_level = 1;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
@@ -2961,7 +2961,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv,
 	int last_enabled_level = num_levels - 1;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
-	if ((DISPLAY_VER(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
+	if ((DISPLAY_VER(dev_priv) < 7 || IS_IVYBRIDGE(dev_priv)) &&
 	    config->num_pipes_active > 1)
 		last_enabled_level = 0;
 
@@ -3060,7 +3060,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
 		 * Always set WM_LP_SPRITE_EN when spr_val != 0, even if the
 		 * level is disabled. Doing otherwise could cause underruns.
 		 */
-		if (DISPLAY_VER(dev_priv) <= 6 && r->spr_val) {
+		if (DISPLAY_VER(dev_priv) < 7 && r->spr_val) {
 			drm_WARN_ON(&dev_priv->drm, wm_lp != 1);
 			results->wm_lp_spr[wm_lp - 1] |= WM_LP_SPRITE_ENABLE;
 		}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index bef96db62c807..7f2a50b4f4940 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -87,7 +87,8 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
 		return ret;
 
 	dclk = val & 0xffff;
-	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
+	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) >= 12 ? 500 : 0),
+				1000);
 	sp->t_rp = (val & 0xff0000) >> 16;
 	sp->t_rcd = (val & 0xff000000) >> 24;
 
@@ -480,7 +481,7 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
 	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
 		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
 
-	if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
+	if (DISPLAY_VER(dev_priv) >= 12 && num_channels > qi.max_numchannels)
 		drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
 	if (qi.max_numchannels != 0)
 		num_channels = min_t(u8, num_channels, qi.max_numchannels);
@@ -897,7 +898,7 @@ static int icl_find_qgv_points(struct drm_i915_private *i915,
 		unsigned int idx;
 		unsigned int max_data_rate;
 
-		if (DISPLAY_VER(i915) > 11)
+		if (DISPLAY_VER(i915) >= 12)
 			idx = tgl_max_bw_index(i915, num_active_planes, i);
 		else
 			idx = icl_max_bw_index(i915, num_active_planes, i);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index b93d1ad7936d5..8bb6bab7c8cd4 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2597,7 +2597,7 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
 		 * Since PPC = 2 with bigjoiner
 		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
 		 */
-		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
+		int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24;
 		int min_cdclk_bj =
 			(to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) *
 			 pixel_clock) / (2 * bigjoiner_interface_bits);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 23b077f436146..9dc22fc8b3d3b 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2627,7 +2627,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta
 		crtc_vblank_start = 1;
 	}
 
-	if (DISPLAY_VER(dev_priv) > 3)
+	if (DISPLAY_VER(dev_priv) >= 4)
 		intel_de_write(dev_priv, TRANS_VSYNCSHIFT(cpu_transcoder),
 			       vsyncshift);
 
@@ -3167,7 +3167,7 @@ static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state)
 		break;
 	case 36:
 		/* Port output 12BPC defined for ADLP+ */
-		if (DISPLAY_VER(dev_priv) > 12)
+		if (DISPLAY_VER(dev_priv) >= 13)
 			val |= PIPE_MISC_BPC_12_ADLP;
 		break;
 	default:
@@ -3224,7 +3224,7 @@ int bdw_get_pipe_misc_bpp(struct intel_crtc *crtc)
 	 * MIPI DSI HW readout.
 	 */
 	case PIPE_MISC_BPC_12_ADLP:
-		if (DISPLAY_VER(dev_priv) > 12)
+		if (DISPLAY_VER(dev_priv) >= 13)
 			return 36;
 		fallthrough;
 	default:
@@ -7763,7 +7763,7 @@ enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *de
 	 * Cantiga+ cannot handle modes with a hsync front porch of 0.
 	 * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw.
 	 */
-	if ((DISPLAY_VER(dev_priv) > 4 || IS_G4X(dev_priv)) &&
+	if ((DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) &&
 	    mode->hsync_start == mode->hdisplay)
 		return MODE_H_ILLEGAL;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index 4299cc452e050..79e9f1c3e241f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -49,7 +49,7 @@ struct drm_printer;
 #define HAS_DSC(__i915)			(DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
 #define HAS_FBC(i915)			(DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
 #define HAS_FPGA_DBG_UNCLAIMED(i915)	(DISPLAY_INFO(i915)->has_fpga_dbg)
-#define HAS_FW_BLC(i915)		(DISPLAY_VER(i915) > 2)
+#define HAS_FW_BLC(i915)		(DISPLAY_VER(i915) >= 3)
 #define HAS_GMBUS_IRQ(i915)		(DISPLAY_VER(i915) >= 4)
 #define HAS_GMBUS_BURST_READ(i915)	(DISPLAY_VER(i915) >= 10 || IS_KABYLAKE(i915))
 #define HAS_GMCH(i915)			(DISPLAY_INFO(i915)->has_gmch)
diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index bff4a76310c0e..5a3e10e73e59e 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -1653,7 +1653,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 	else if (HAS_PCH_SPLIT(dev_priv))
 		ibx_irq_postinstall(dev_priv);
 
-	if (DISPLAY_VER(dev_priv) <= 10)
+	if (DISPLAY_VER(dev_priv) < 11)
 		de_misc_masked |= GEN8_DE_MISC_GSE;
 
 	if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 68b2e69dca390..0b24c0cba94e3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1890,7 +1890,7 @@ static int dsc_src_max_compressed_bpp(struct intel_dp *intel_dp)
 	 * Max Compressed bpp for Gen 13+ is 27bpp.
 	 * For earlier platform is 23bpp. (Bspec:49259).
 	 */
-	if (DISPLAY_VER(i915) <= 12)
+	if (DISPLAY_VER(i915) < 13)
 		return 23;
 	else
 		return 27;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index b665fe6ef871b..e8940acea8ad1 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -54,7 +54,7 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp
 					  struct intel_crtc_state *crtc_state,
 					  bool dsc)
 {
-	if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) <= 13 && dsc) {
+	if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) < 14 && dsc) {
 		int output_bpp = bpp;
 		/* DisplayPort 2 128b/132b, bits per lane is always 32 */
 		int symbol_clock = crtc_state->port_clock / 32;
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 958b6fd0d7416..221f5c6c871b1 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -185,7 +185,7 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
 	/* Convert from 100ms to 100us units */
 	pps->t4 = val * 1000;
 
-	if (DISPLAY_VER(dev_priv) <= 4 &&
+	if (DISPLAY_VER(dev_priv) < 5 &&
 	    pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "Panel power timings uninitialized, "
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 36e4a1e9b98f8..6029bb71276cf 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -806,10 +806,10 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 
 	val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp));
 
-	if (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))
+	if (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))
 		val |= EDP_SU_TRACK_ENABLE;
 
-	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12)
+	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) < 13)
 		val |= EDP_Y_COORDINATE_ENABLE;
 
 	val |= EDP_PSR2_FRAME_BEFORE_SU(frames_before_su_entry(intel_dp));
@@ -1094,7 +1094,7 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
 		return true;
 
 	/* Not supported <13 / Wa_22012279113:adl-p */
-	if (DISPLAY_VER(dev_priv) <= 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b)
+	if (DISPLAY_VER(dev_priv) < 14 || intel_dp->edp_dpcd[0] < DP_EDP_14b)
 		return false;
 
 	crtc_state->req_psr2_sdp_prior_scanline = true;
@@ -1221,7 +1221,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 	 * over PSR2.
 	 */
 	if (crtc_state->dsc.compression_enable &&
-	    (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))) {
+	    (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "PSR2 cannot be enabled since DSC is enabled\n");
 		return false;
-- 
GitLab


From fd2096500acb8b57a66a75ec7985049a5650cff1 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <sergeantsagara@protonmail.com>
Date: Sun, 26 Nov 2023 21:42:01 +0000
Subject: [PATCH 0533/2340] drm/i915/irq: Improve error logging for unexpected
 DE Misc interrupts

Dump the iir value in hex when the interrupt is unexpected.

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9652#note_2178501
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rahul Rameshbabu <sergeantsagara@protonmail.com>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231126214142.102106-1-sergeantsagara@protonmail.com
---
 drivers/gpu/drm/i915/display/intel_display_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index 5a3e10e73e59e..f8ed53f30b2e4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -896,7 +896,7 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 	}
 
 	if (!found)
-		drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt\n");
+		drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt: 0x%08x\n", iir);
 }
 
 static void gen11_dsi_te_interrupt_handler(struct drm_i915_private *dev_priv,
-- 
GitLab


From ef32c3cc9c62252986f09e06b4e525742cd91529 Mon Sep 17 00:00:00 2001
From: heminhong <heminhong@kylinos.cn>
Date: Tue, 14 Nov 2023 10:43:41 +0800
Subject: [PATCH 0534/2340] drm/i915: correct the input parameter on
 _intel_dsb_commit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Current, the dewake_scanline variable is defined as unsigned int,
an unsigned int variable that is always greater than or equal to 0.
when _intel_dsb_commit function is called by intel_dsb_commit function,
the dewake_scanline variable may have an int value.
So the dewake_scanline variable is necessary to defined as an int.

Fixes: f83b94d23770 ("drm/i915/dsb: Use DEwake to combat PkgC latency")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202310052201.AnVbpgPr-lkp@intel.com/
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: heminhong <heminhong@kylinos.cn>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114024341.14524-1-heminhong@kylinos.cn
---
 drivers/gpu/drm/i915/display/intel_dsb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 9598d50f68f22..482c28b5c2de5 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -341,7 +341,7 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
 }
 
 static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
-			      unsigned int dewake_scanline)
+			      int dewake_scanline)
 {
 	struct intel_crtc *crtc = dsb->crtc;
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-- 
GitLab


From 0f82a1b94862da255ac791e11f2c3610f5ad5f26 Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Tue, 28 Nov 2023 15:54:51 +0530
Subject: [PATCH 0535/2340] drm/i915/display: Fix IP version of the WAs

WAs 14011508470, 14011503030 were applied on IP versions beyond which
they are applicable. Fixed the IP version checks for these workarounds.

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128102451.825242-1-balasubramani.vivekanandan@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_power.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index e390595d73416..f23080a4368dd 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -1697,14 +1697,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv,
 	if (resume)
 		intel_dmc_load_program(dev_priv);
 
-	/* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */
-	if (DISPLAY_VER(dev_priv) >= 12)
+	/* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */
+	if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0)))
 		intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0,
 			     DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM |
 			     DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR);
 
 	/* Wa_14011503030:xelpd */
-	if (DISPLAY_VER(dev_priv) >= 13)
+	if (DISPLAY_VER(dev_priv) == 13)
 		intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0);
 }
 
-- 
GitLab


From dad19630c476f4c3d88f222c60f254f13e6245ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 29 Nov 2023 10:06:37 +0100
Subject: [PATCH 0536/2340] Documentation/gpu: VM_BIND locking document
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the first version of the VM_BIND locking document which is
intended to be part of the xe driver upstreaming agreement.

The document describes and discuss the locking used during exec-
functions, evicton and for userptr gpu-vmas. Intention is to be using the
same nomenclature as the drm-vm-bind-async.rst.

v2:
- s/gvm/gpu_vm/g (Rodrigo Vivi)
- Clarify the userptr seqlock with a pointer to mm/mmu_notifier.c
  (Rodrigo Vivi)
- Adjust commit message accordingly.
- Add SPDX license header.

v3:
- Large update to align with the drm_gpuvm manager locking
- Add "Efficient userptr gpu_vma exec function iteration" section
- Add "Locking at bind- and unbind time" section.

v4:
- Fix tabs vs space errors by untabifying (Rodrigo Vivi)
- Minor style fixes and typos (Rodrigo Vivi)
- Clarify situations where stale GPU mappings are occurring and how
  access through these mappings are blocked. (Rodrigo Vivi)
- Insert into the toctree in implementation_guidelines.rst

v5:
- Add a section about recoverable page-faults.
- Use local references to other documentation where possible
  (Bagas Sanjaya)
- General documentation fixes and typos (Danilo Krummrich and
  Boris Brezillon)
- Improve the documentation around locks that need to be grabbed from the
  dm-fence critical section (Boris Brezillon)
- Add more references to the DRM GPUVM helpers (Danilo Krummrich and
  Boriz Brezillon)
- Update the rfc/xe.rst document.

v6:
- Rework wording to improve readability (Boris Brezillon, Rodrigo Vivi,
  Bagas Sanjaya)
- Various minor fixes across the document (Boris Brezillon)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Acked-by: John Hubbard <jhubbard@nvidia.com> # Documentation/core-api/pin_user_pages.rst changes
Link: https://patchwork.freedesktop.org/patch/msgid/20231129090637.2629-1-thomas.hellstrom@linux.intel.com
---
 Documentation/core-api/pin_user_pages.rst     |   2 +
 Documentation/gpu/drm-mm.rst                  |   4 +
 Documentation/gpu/drm-vm-bind-locking.rst     | 582 ++++++++++++++++++
 .../gpu/implementation_guidelines.rst         |   1 +
 Documentation/gpu/rfc/xe.rst                  |   5 +
 5 files changed, 594 insertions(+)
 create mode 100644 Documentation/gpu/drm-vm-bind-locking.rst

diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index d3c1f6d8c0e0e..6b5f7e6e7155f 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -153,6 +153,8 @@ NOTE: Some pages, such as DAX pages, cannot be pinned with longterm pins. That's
 because DAX pages do not have a separate page cache, and so "pinning" implies
 locking down file system blocks, which is not (yet) supported in that way.
 
+.. _mmu-notifier-registration-case:
+
 CASE 3: MMU notifier registration, with or without page faulting hardware
 -------------------------------------------------------------------------
 Device drivers can pin pages via get_user_pages*(), and register for mmu
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index acc5901ac8408..d55751cad67cf 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -466,6 +466,8 @@ DRM MM Range Allocator Function References
 .. kernel-doc:: drivers/gpu/drm/drm_mm.c
    :export:
 
+.. _drm_gpuvm:
+
 DRM GPUVM
 =========
 
@@ -481,6 +483,8 @@ Split and Merge
 .. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
    :doc: Split and Merge
 
+.. _drm_gpuvm_locking:
+
 Locking
 -------
 
diff --git a/Documentation/gpu/drm-vm-bind-locking.rst b/Documentation/gpu/drm-vm-bind-locking.rst
new file mode 100644
index 0000000000000..a345aa513d12b
--- /dev/null
+++ b/Documentation/gpu/drm-vm-bind-locking.rst
@@ -0,0 +1,582 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+===============
+VM_BIND locking
+===============
+
+This document attempts to describe what's needed to get VM_BIND locking right,
+including the userptr mmu_notifier locking. It also discusses some
+optimizations to get rid of the looping through of all userptr mappings and
+external / shared object mappings that is needed in the simplest
+implementation. In addition, there is a section describing the VM_BIND locking
+required for implementing recoverable pagefaults.
+
+The DRM GPUVM set of helpers
+============================
+
+There is a set of helpers for drivers implementing VM_BIND, and this
+set of helpers implements much, but not all of the locking described
+in this document. In particular, it is currently lacking a userptr
+implementation. This document does not intend to describe the DRM GPUVM
+implementation in detail, but it is covered in :ref:`its own
+documentation <drm_gpuvm>`. It is highly recommended for any driver
+implementing VM_BIND to use the DRM GPUVM helpers and to extend it if
+common functionality is missing.
+
+Nomenclature
+============
+
+* ``gpu_vm``: Abstraction of a virtual GPU address space with
+  meta-data. Typically one per client (DRM file-private), or one per
+  execution context.
+* ``gpu_vma``: Abstraction of a GPU address range within a gpu_vm with
+  associated meta-data. The backing storage of a gpu_vma can either be
+  a GEM object or anonymous or page-cache pages mapped also into the CPU
+  address space for the process.
+* ``gpu_vm_bo``: Abstracts the association of a GEM object and
+  a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo
+  maintains a list of gpu_vmas.
+* ``userptr gpu_vma or just userptr``: A gpu_vma, whose backing store
+  is anonymous or page-cache pages as described above.
+* ``revalidating``: Revalidating a gpu_vma means making the latest version
+  of the backing store resident and making sure the gpu_vma's
+  page-table entries point to that backing store.
+* ``dma_fence``: A struct dma_fence that is similar to a struct completion
+  and which tracks GPU activity. When the GPU activity is finished,
+  the dma_fence signals. Please refer to the ``DMA Fences`` section of
+  the :doc:`dma-buf doc </driver-api/dma-buf>`.
+* ``dma_resv``: A struct dma_resv (a.k.a reservation object) that is used
+  to track GPU activity in the form of multiple dma_fences on a
+  gpu_vm or a GEM object. The dma_resv contains an array / list
+  of dma_fences and a lock that needs to be held when adding
+  additional dma_fences to the dma_resv. The lock is of a type that
+  allows deadlock-safe locking of multiple dma_resvs in arbitrary
+  order. Please refer to the ``Reservation Objects`` section of the
+  :doc:`dma-buf doc </driver-api/dma-buf>`.
+* ``exec function``: An exec function is a function that revalidates all
+  affected gpu_vmas, submits a GPU command batch and registers the
+  dma_fence representing the GPU command's activity with all affected
+  dma_resvs. For completeness, although not covered by this document,
+  it's worth mentioning that an exec function may also be the
+  revalidation worker that is used by some drivers in compute /
+  long-running mode.
+* ``local object``: A GEM object which is only mapped within a
+  single VM. Local GEM objects share the gpu_vm's dma_resv.
+* ``external object``: a.k.a shared object: A GEM object which may be shared
+  by multiple gpu_vms and whose backing storage may be shared with
+  other drivers.
+
+Locks and locking order
+=======================
+
+One of the benefits of VM_BIND is that local GEM objects share the gpu_vm's
+dma_resv object and hence the dma_resv lock. So, even with a huge
+number of local GEM objects, only one lock is needed to make the exec
+sequence atomic.
+
+The following locks and locking orders are used:
+
+* The ``gpu_vm->lock`` (optionally an rwsem). Protects the gpu_vm's
+  data structure keeping track of gpu_vmas. It can also protect the
+  gpu_vm's list of userptr gpu_vmas. With a CPU mm analogy this would
+  correspond to the mmap_lock. An rwsem allows several readers to walk
+  the VM tree concurrently, but the benefit of that concurrency most
+  likely varies from driver to driver.
+* The ``userptr_seqlock``. This lock is taken in read mode for each
+  userptr gpu_vma on the gpu_vm's userptr list, and in write mode during mmu
+  notifier invalidation. This is not a real seqlock but described in
+  ``mm/mmu_notifier.c`` as a "Collision-retry read-side/write-side
+  'lock' a lot like a seqcount. However this allows multiple
+  write-sides to hold it at once...". The read side critical section
+  is enclosed by ``mmu_interval_read_begin() /
+  mmu_interval_read_retry()`` with ``mmu_interval_read_begin()``
+  sleeping if the write side is held.
+  The write side is held by the core mm while calling mmu interval
+  invalidation notifiers.
+* The ``gpu_vm->resv`` lock. Protects the gpu_vm's list of gpu_vmas needing
+  rebinding, as well as the residency state of all the gpu_vm's local
+  GEM objects.
+  Furthermore, it typically protects the gpu_vm's list of evicted and
+  external GEM objects.
+* The ``gpu_vm->userptr_notifier_lock``. This is an rwsem that is
+  taken in read mode during exec and write mode during a mmu notifier
+  invalidation. The userptr notifier lock is per gpu_vm.
+* The ``gem_object->gpuva_lock`` This lock protects the GEM object's
+  list of gpu_vm_bos. This is usually the same lock as the GEM
+  object's dma_resv, but some drivers protects this list differently,
+  see below.
+* The ``gpu_vm list spinlocks``. With some implementations they are needed
+  to be able to update the gpu_vm evicted- and external object
+  list. For those implementations, the spinlocks are grabbed when the
+  lists are manipulated. However, to avoid locking order violations
+  with the dma_resv locks, a special scheme is needed when iterating
+  over the lists.
+
+.. _gpu_vma lifetime:
+
+Protection and lifetime of gpu_vm_bos and gpu_vmas
+==================================================
+
+The GEM object's list of gpu_vm_bos, and the gpu_vm_bo's list of gpu_vmas
+is protected by the ``gem_object->gpuva_lock``, which is typically the
+same as the GEM object's dma_resv, but if the driver
+needs to access these lists from within a dma_fence signalling
+critical section, it can instead choose to protect it with a
+separate lock, which can be locked from within the dma_fence signalling
+critical section. Such drivers then need to pay additional attention
+to what locks need to be taken from within the loop when iterating
+over the gpu_vm_bo and gpu_vma lists to avoid locking-order violations.
+
+The DRM GPUVM set of helpers provide lockdep asserts that this lock is
+held in relevant situations and also provides a means of making itself
+aware of which lock is actually used: :c:func:`drm_gem_gpuva_set_lock`.
+
+Each gpu_vm_bo holds a reference counted pointer to the underlying GEM
+object, and each gpu_vma holds a reference counted pointer to the
+gpu_vm_bo. When iterating over the GEM object's list of gpu_vm_bos and
+over the gpu_vm_bo's list of gpu_vmas, the ``gem_object->gpuva_lock`` must
+not be dropped, otherwise, gpu_vmas attached to a gpu_vm_bo may
+disappear without notice since those are not reference-counted. A
+driver may implement its own scheme to allow this at the expense of
+additional complexity, but this is outside the scope of this document.
+
+In the DRM GPUVM implementation, each gpu_vm_bo and each gpu_vma
+holds a reference count on the gpu_vm itself. Due to this, and to avoid circular
+reference counting, cleanup of the gpu_vm's gpu_vmas must not be done from the
+gpu_vm's destructor. Drivers typically implements a gpu_vm close
+function for this cleanup. The gpu_vm close function will abort gpu
+execution using this VM, unmap all gpu_vmas and release page-table memory.
+
+Revalidation and eviction of local objects
+==========================================
+
+Note that in all the code examples given below we use simplified
+pseudo-code. In particular, the dma_resv deadlock avoidance algorithm
+as well as reserving memory for dma_resv fences is left out.
+
+Revalidation
+____________
+With VM_BIND, all local objects need to be resident when the gpu is
+executing using the gpu_vm, and the objects need to have valid
+gpu_vmas set up pointing to them. Typically, each gpu command buffer
+submission is therefore preceded with a re-validation section:
+
+.. code-block:: C
+
+   dma_resv_lock(gpu_vm->resv);
+
+   // Validation section starts here.
+   for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
+           validate_gem_bo(&gpu_vm_bo->gem_bo);
+
+           // The following list iteration needs the Gem object's
+           // dma_resv to be held (it protects the gpu_vm_bo's list of
+           // gpu_vmas, but since local gem objects share the gpu_vm's
+           // dma_resv, it is already held at this point.
+           for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
+                  move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
+   }
+
+   for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
+           rebind_gpu_vma(&gpu_vma);
+           remove_gpu_vma_from_rebind_list(&gpu_vma);
+   }
+   // Validation section ends here, and job submission starts.
+
+   add_dependencies(&gpu_job, &gpu_vm->resv);
+   job_dma_fence = gpu_submit(&gpu_job));
+
+   add_dma_fence(job_dma_fence, &gpu_vm->resv);
+   dma_resv_unlock(gpu_vm->resv);
+
+The reason for having a separate gpu_vm rebind list is that there
+might be userptr gpu_vmas that are not mapping a buffer object that
+also need rebinding.
+
+Eviction
+________
+
+Eviction of one of these local objects will then look similar to the
+following:
+
+.. code-block:: C
+
+   obj = get_object_from_lru();
+
+   dma_resv_lock(obj->resv);
+   for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo);
+           add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
+
+   add_dependencies(&eviction_job, &obj->resv);
+   job_dma_fence = gpu_submit(&eviction_job);
+   add_dma_fence(&obj->resv, job_dma_fence);
+
+   dma_resv_unlock(&obj->resv);
+   put_object(obj);
+
+Note that since the object is local to the gpu_vm, it will share the gpu_vm's
+dma_resv lock such that ``obj->resv == gpu_vm->resv``.
+The gpu_vm_bos marked for eviction are put on the gpu_vm's evict list,
+which is protected by ``gpu_vm->resv``. During eviction all local
+objects have their dma_resv locked and, due to the above equality, also
+the gpu_vm's dma_resv protecting the gpu_vm's evict list is locked.
+
+With VM_BIND, gpu_vmas don't need to be unbound before eviction,
+since the driver must ensure that the eviction blit or copy will wait
+for GPU idle or depend on all previous GPU activity. Furthermore, any
+subsequent attempt by the GPU to access freed memory through the
+gpu_vma will be preceded by a new exec function, with a revalidation
+section which will make sure all gpu_vmas are rebound. The eviction
+code holding the object's dma_resv while revalidating will ensure a
+new exec function may not race with the eviction.
+
+A driver can be implemented in such a way that, on each exec function,
+only a subset of vmas are selected for rebind.  In this case, all vmas that are
+*not* selected for rebind must be unbound before the exec
+function workload is submitted.
+
+Locking with external buffer objects
+====================================
+
+Since external buffer objects may be shared by multiple gpu_vm's they
+can't share their reservation object with a single gpu_vm. Instead
+they need to have a reservation object of their own. The external
+objects bound to a gpu_vm using one or many gpu_vmas are therefore put on a
+per-gpu_vm list which is protected by the gpu_vm's dma_resv lock or
+one of the :ref:`gpu_vm list spinlocks <Spinlock iteration>`. Once
+the gpu_vm's reservation object is locked, it is safe to traverse the
+external object list and lock the dma_resvs of all external
+objects. However, if instead a list spinlock is used, a more elaborate
+iteration scheme needs to be used.
+
+At eviction time, the gpu_vm_bos of *all* the gpu_vms an external
+object is bound to need to be put on their gpu_vm's evict list.
+However, when evicting an external object, the dma_resvs of the
+gpu_vms the object is bound to are typically not held. Only
+the object's private dma_resv can be guaranteed to be held. If there
+is a ww_acquire context at hand at eviction time we could grab those
+dma_resvs but that could cause expensive ww_mutex rollbacks. A simple
+option is to just mark the gpu_vm_bos of the evicted gem object with
+an ``evicted`` bool that is inspected before the next time the
+corresponding gpu_vm evicted list needs to be traversed. For example, when
+traversing the list of external objects and locking them. At that time,
+both the gpu_vm's dma_resv and the object's dma_resv is held, and the
+gpu_vm_bo marked evicted, can then be added to the gpu_vm's list of
+evicted gpu_vm_bos. The ``evicted`` bool is formally protected by the
+object's dma_resv.
+
+The exec function becomes
+
+.. code-block:: C
+
+   dma_resv_lock(gpu_vm->resv);
+
+   // External object list is protected by the gpu_vm->resv lock.
+   for_each_gpu_vm_bo_on_extobj_list(gpu_vm, &gpu_vm_bo) {
+           dma_resv_lock(gpu_vm_bo.gem_obj->resv);
+           if (gpu_vm_bo_marked_evicted(&gpu_vm_bo))
+                   add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
+   }
+
+   for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
+           validate_gem_bo(&gpu_vm_bo->gem_bo);
+
+           for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
+                  move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
+   }
+
+   for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
+           rebind_gpu_vma(&gpu_vma);
+           remove_gpu_vma_from_rebind_list(&gpu_vma);
+   }
+
+   add_dependencies(&gpu_job, &gpu_vm->resv);
+   job_dma_fence = gpu_submit(&gpu_job));
+
+   add_dma_fence(job_dma_fence, &gpu_vm->resv);
+   for_each_external_obj(gpu_vm, &obj)
+          add_dma_fence(job_dma_fence, &obj->resv);
+   dma_resv_unlock_all_resv_locks();
+
+And the corresponding shared-object aware eviction would look like:
+
+.. code-block:: C
+
+   obj = get_object_from_lru();
+
+   dma_resv_lock(obj->resv);
+   for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo)
+           if (object_is_vm_local(obj))
+                add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
+           else
+                mark_gpu_vm_bo_evicted(&gpu_vm_bo);
+
+   add_dependencies(&eviction_job, &obj->resv);
+   job_dma_fence = gpu_submit(&eviction_job);
+   add_dma_fence(&obj->resv, job_dma_fence);
+
+   dma_resv_unlock(&obj->resv);
+   put_object(obj);
+
+.. _Spinlock iteration:
+
+Accessing the gpu_vm's lists without the dma_resv lock held
+===========================================================
+
+Some drivers will hold the gpu_vm's dma_resv lock when accessing the
+gpu_vm's evict list and external objects lists. However, there are
+drivers that need to access these lists without the dma_resv lock
+held, for example due to asynchronous state updates from within the
+dma_fence signalling critical path. In such cases, a spinlock can be
+used to protect manipulation of the lists. However, since higher level
+sleeping locks need to be taken for each list item while iterating
+over the lists, the items already iterated over need to be
+temporarily moved to a private list and the spinlock released
+while processing each item:
+
+.. code block:: C
+
+    struct list_head still_in_list;
+
+    INIT_LIST_HEAD(&still_in_list);
+
+    spin_lock(&gpu_vm->list_lock);
+    do {
+            struct list_head *entry = list_first_entry_or_null(&gpu_vm->list, head);
+
+            if (!entry)
+                    break;
+
+            list_move_tail(&entry->head, &still_in_list);
+            list_entry_get_unless_zero(entry);
+            spin_unlock(&gpu_vm->list_lock);
+
+            process(entry);
+
+            spin_lock(&gpu_vm->list_lock);
+            list_entry_put(entry);
+    } while (true);
+
+    list_splice_tail(&still_in_list, &gpu_vm->list);
+    spin_unlock(&gpu_vm->list_lock);
+
+Due to the additional locking and atomic operations, drivers that *can*
+avoid accessing the gpu_vm's list outside of the dma_resv lock
+might want to avoid also this iteration scheme. Particularly, if the
+driver anticipates a large number of list items. For lists where the
+anticipated number of list items is small, where list iteration doesn't
+happen very often or if there is a significant additional cost
+associated with each iteration, the atomic operation overhead
+associated with this type of iteration is, most likely, negligible. Note that
+if this scheme is used, it is necessary to make sure this list
+iteration is protected by an outer level lock or semaphore, since list
+items are temporarily pulled off the list while iterating, and it is
+also worth mentioning that the local list ``still_in_list`` should
+also be considered protected by the ``gpu_vm->list_lock``, and it is
+thus possible that items can be removed also from the local list
+concurrently with list iteration.
+
+Please refer to the :ref:`DRM GPUVM locking section
+<drm_gpuvm_locking>` and its internal
+:c:func:`get_next_vm_bo_from_list` function.
+
+
+userptr gpu_vmas
+================
+
+A userptr gpu_vma is a gpu_vma that, instead of mapping a buffer object to a
+GPU virtual address range, directly maps a CPU mm range of anonymous-
+or file page-cache pages.
+A very simple approach would be to just pin the pages using
+pin_user_pages() at bind time and unpin them at unbind time, but this
+creates a Denial-Of-Service vector since a single user-space process
+would be able to pin down all of system memory, which is not
+desirable. (For special use-cases and assuming proper accounting pinning might
+still be a desirable feature, though). What we need to do in the
+general case is to obtain a reference to the desired pages, make sure
+we are notified using a MMU notifier just before the CPU mm unmaps the
+pages, dirty them if they are not mapped read-only to the GPU, and
+then drop the reference.
+When we are notified by the MMU notifier that CPU mm is about to drop the
+pages, we need to stop GPU access to the pages by waiting for VM idle
+in the MMU notifier and make sure that before the next time the GPU
+tries to access whatever is now present in the CPU mm range, we unmap
+the old pages from the GPU page tables and repeat the process of
+obtaining new page references. (See the :ref:`notifier example
+<Invalidation example>` below). Note that when the core mm decides to
+laundry pages, we get such an unmap MMU notification and can mark the
+pages dirty again before the next GPU access. We also get similar MMU
+notifications for NUMA accounting which the GPU driver doesn't really
+need to care about, but so far it has proven difficult to exclude
+certain notifications.
+
+Using a MMU notifier for device DMA (and other methods) is described in
+:ref:`the pin_user_pages() documentation <mmu-notifier-registration-case>`.
+
+Now, the method of obtaining struct page references using
+get_user_pages() unfortunately can't be used under a dma_resv lock
+since that would violate the locking order of the dma_resv lock vs the
+mmap_lock that is grabbed when resolving a CPU pagefault. This means
+the gpu_vm's list of userptr gpu_vmas needs to be protected by an
+outer lock, which in our example below is the ``gpu_vm->lock``.
+
+The MMU interval seqlock for a userptr gpu_vma is used in the following
+way:
+
+.. code-block:: C
+
+   // Exclusive locking mode here is strictly needed only if there are
+   // invalidated userptr gpu_vmas present, to avoid concurrent userptr
+   // revalidations of the same userptr gpu_vma.
+   down_write(&gpu_vm->lock);
+   retry:
+
+   // Note: mmu_interval_read_begin() blocks until there is no
+   // invalidation notifier running anymore.
+   seq = mmu_interval_read_begin(&gpu_vma->userptr_interval);
+   if (seq != gpu_vma->saved_seq) {
+           obtain_new_page_pointers(&gpu_vma);
+           dma_resv_lock(&gpu_vm->resv);
+           add_gpu_vma_to_revalidate_list(&gpu_vma, &gpu_vm);
+           dma_resv_unlock(&gpu_vm->resv);
+           gpu_vma->saved_seq = seq;
+   }
+
+   // The usual revalidation goes here.
+
+   // Final userptr sequence validation may not happen before the
+   // submission dma_fence is added to the gpu_vm's resv, from the POW
+   // of the MMU invalidation notifier. Hence the
+   // userptr_notifier_lock that will make them appear atomic.
+
+   add_dependencies(&gpu_job, &gpu_vm->resv);
+   down_read(&gpu_vm->userptr_notifier_lock);
+   if (mmu_interval_read_retry(&gpu_vma->userptr_interval, gpu_vma->saved_seq)) {
+          up_read(&gpu_vm->userptr_notifier_lock);
+          goto retry;
+   }
+
+   job_dma_fence = gpu_submit(&gpu_job));
+
+   add_dma_fence(job_dma_fence, &gpu_vm->resv);
+
+   for_each_external_obj(gpu_vm, &obj)
+          add_dma_fence(job_dma_fence, &obj->resv);
+
+   dma_resv_unlock_all_resv_locks();
+   up_read(&gpu_vm->userptr_notifier_lock);
+   up_write(&gpu_vm->lock);
+
+The code between ``mmu_interval_read_begin()`` and the
+``mmu_interval_read_retry()`` marks the read side critical section of
+what we call the ``userptr_seqlock``. In reality, the gpu_vm's userptr
+gpu_vma list is looped through, and the check is done for *all* of its
+userptr gpu_vmas, although we only show a single one here.
+
+The userptr gpu_vma MMU invalidation notifier might be called from
+reclaim context and, again, to avoid locking order violations, we can't
+take any dma_resv lock nor the gpu_vm->lock from within it.
+
+.. _Invalidation example:
+.. code-block:: C
+
+  bool gpu_vma_userptr_invalidate(userptr_interval, cur_seq)
+  {
+          // Make sure the exec function either sees the new sequence
+          // and backs off or we wait for the dma-fence:
+
+          down_write(&gpu_vm->userptr_notifier_lock);
+          mmu_interval_set_seq(userptr_interval, cur_seq);
+          up_write(&gpu_vm->userptr_notifier_lock);
+
+          // At this point, the exec function can't succeed in
+          // submitting a new job, because cur_seq is an invalid
+          // sequence number and will always cause a retry. When all
+          // invalidation callbacks, the mmu notifier core will flip
+          // the sequence number to a valid one. However we need to
+          // stop gpu access to the old pages here.
+
+          dma_resv_wait_timeout(&gpu_vm->resv, DMA_RESV_USAGE_BOOKKEEP,
+                                false, MAX_SCHEDULE_TIMEOUT);
+          return true;
+  }
+
+When this invalidation notifier returns, the GPU can no longer be
+accessing the old pages of the userptr gpu_vma and needs to redo the
+page-binding before a new GPU submission can succeed.
+
+Efficient userptr gpu_vma exec_function iteration
+_________________________________________________
+
+If the gpu_vm's list of userptr gpu_vmas becomes large, it's
+inefficient to iterate through the complete lists of userptrs on each
+exec function to check whether each userptr gpu_vma's saved
+sequence number is stale. A solution to this is to put all
+*invalidated* userptr gpu_vmas on a separate gpu_vm list and
+only check the gpu_vmas present on this list on each exec
+function. This list will then lend itself very-well to the spinlock
+locking scheme that is
+:ref:`described in the spinlock iteration section <Spinlock iteration>`, since
+in the mmu notifier, where we add the invalidated gpu_vmas to the
+list, it's not possible to take any outer locks like the
+``gpu_vm->lock`` or the ``gpu_vm->resv`` lock. Note that the
+``gpu_vm->lock`` still needs to be taken while iterating to ensure the list is
+complete, as also mentioned in that section.
+
+If using an invalidated userptr list like this, the retry check in the
+exec function trivially becomes a check for invalidated list empty.
+
+Locking at bind and unbind time
+===============================
+
+At bind time, assuming a GEM object backed gpu_vma, each
+gpu_vma needs to be associated with a gpu_vm_bo and that
+gpu_vm_bo in turn needs to be added to the GEM object's
+gpu_vm_bo list, and possibly to the gpu_vm's external object
+list. This is referred to as *linking* the gpu_vma, and typically
+requires that the ``gpu_vm->lock`` and the ``gem_object->gpuva_lock``
+are held. When unlinking a gpu_vma the same locks should be held,
+and that ensures that when iterating over ``gpu_vmas`, either under
+the ``gpu_vm->resv`` or the GEM object's dma_resv, that the gpu_vmas
+stay alive as long as the lock under which we iterate is not released. For
+userptr gpu_vmas it's similarly required that during vma destroy, the
+outer ``gpu_vm->lock`` is held, since otherwise when iterating over
+the invalidated userptr list as described in the previous section,
+there is nothing keeping those userptr gpu_vmas alive.
+
+Locking for recoverable page-fault page-table updates
+=====================================================
+
+There are two important things we need to ensure with locking for
+recoverable page-faults:
+
+* At the time we return pages back to the system / allocator for
+  reuse, there should be no remaining GPU mappings and any GPU TLB
+  must have been flushed.
+* The unmapping and mapping of a gpu_vma must not race.
+
+Since the unmapping (or zapping) of GPU ptes is typically taking place
+where it is hard or even impossible to take any outer level locks we
+must either introduce a new lock that is held at both mapping and
+unmapping time, or look at the locks we do hold at unmapping time and
+make sure that they are held also at mapping time. For userptr
+gpu_vmas, the ``userptr_seqlock`` is held in write mode in the mmu
+invalidation notifier where zapping happens. Hence, if the
+``userptr_seqlock`` as well as the ``gpu_vm->userptr_notifier_lock``
+is held in read mode during mapping, it will not race with the
+zapping. For GEM object backed gpu_vmas, zapping will take place under
+the GEM object's dma_resv and ensuring that the dma_resv is held also
+when populating the page-tables for any gpu_vma pointing to the GEM
+object, will similarly ensure we are race-free.
+
+If any part of the mapping is performed asynchronously
+under a dma-fence with these locks released, the zapping will need to
+wait for that dma-fence to signal under the relevant lock before
+starting to modify the page-table.
+
+Since modifying the
+page-table structure in a way that frees up page-table memory
+might also require outer level locks, the zapping of GPU ptes
+typically focuses only on zeroing page-table or page-directory entries
+and flushing TLB, whereas freeing of page-table memory is deferred to
+unbind or rebind time.
diff --git a/Documentation/gpu/implementation_guidelines.rst b/Documentation/gpu/implementation_guidelines.rst
index 138e637dcc6b2..dbccfa72f1c9d 100644
--- a/Documentation/gpu/implementation_guidelines.rst
+++ b/Documentation/gpu/implementation_guidelines.rst
@@ -7,3 +7,4 @@ Misc DRM driver uAPI- and feature implementation guidelines
 .. toctree::
 
    drm-vm-bind-async
+   drm-vm-bind-locking
diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index c29113a0ac301..ceb21219d52ec 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -123,10 +123,15 @@ Documentation should include:
 
  * O(1) complexity under VM_BIND.
 
+The document is now included in the drm documentation :doc:`here </gpu/drm-vm-bind-async>`.
+
 Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when
 the second driver supporting VM_BIND+userptr appears. Details to be defined when
 the time comes.
 
+The DRM GPUVM helpers do not yet include the userptr parts, but discussions
+about implementing them are ongoing.
+
 Long running compute: minimal data structure/scaffolding
 --------------------------------------------------------
 The generic scheduler code needs to include the handling of endless compute
-- 
GitLab


From 613ecd6563d2716192e69624105fe1939d104663 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
Date: Fri, 10 Nov 2023 12:23:28 -0500
Subject: [PATCH 0537/2340] drm/amd: Document device reset methods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document what each amdgpu driver reset method does.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4d100f8d75bf1..ea2656bd714f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -508,6 +508,31 @@ struct amdgpu_allowed_register_entry {
 	bool grbm_indexed;
 };
 
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ *                   any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ *                   individually. Suitable only for some discrete GPU, not
+ *                   available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ *                   are reset depends on the ASIC. Notably doesn't reset IPs
+ *                   shared with the CPU on APUs or the memory controllers (so
+ *                   VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ *                  but without powering off the PCI bus. Suitable only for
+ *                  discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ *                 and does a secondary bus reset or FLR, depending on what the
+ *                 underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
 enum amd_reset_method {
 	AMD_RESET_METHOD_NONE = -1,
 	AMD_RESET_METHOD_LEGACY = 0,
-- 
GitLab


From 534eee82356c220649dc9c2ea90099f39fb1cb62 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Sun, 12 Nov 2023 09:30:51 +0530
Subject: [PATCH 0538/2340] drm/amd/display: Remove redundant DRM device struct
 in amdgpu_dm_, mst_types.c

Declaration of 'struct drm_device *dev' is redundant, as
'connector->dev' & 'dev_get_drvdata(kdev)' can be directly passed to
'drm_to_adev', without any intermediate DRM device 'dev' variable

Cc: Roman Li <roman.li@amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Roman Li <roman.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c        | 9 +++------
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c  | 3 +--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ee97814ebd994..ba3f7b232a16f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -894,8 +894,7 @@ static int dm_early_init(void *handle);
 /* Allocate memory for FBC compressed data  */
 static void amdgpu_dm_fbc_init(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct dm_compressor_info *compressor = &adev->dm.compressor;
 	struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector);
 	struct drm_display_mode *mode;
@@ -989,8 +988,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev,
 static void amdgpu_dm_audio_component_unbind(struct device *kdev,
 					  struct device *hda_kdev, void *data)
 {
-	struct drm_device *dev = dev_get_drvdata(kdev);
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev));
 	struct drm_audio_component *acomp = data;
 
 	acomp->ops = NULL;
@@ -10788,8 +10786,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 	struct dm_connector_state *dm_con_state = NULL;
 	struct dc_sink *sink;
 
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_hdmi_vsdb_info vsdb_info = {0};
 	bool freesync_capable = false;
 	enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 8d7d4024f2859..a6995688d7ad1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -424,8 +424,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector,
 {
 	struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
 											 connector);
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
 
 	return &adev->dm.mst_encoders[acrtc->crtc_id].base;
-- 
GitLab


From 12c2d3b5f5bc4ecb470a4bc06424914c145e8c03 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 29 Sep 2023 10:51:02 +0530
Subject: [PATCH 0539/2340] drm/amd/pm: Add support to fetch pm metrics sample

Add API support to fetch a snapshot of power management metrics from PMFW.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/kgd_pp_interface.h |  1 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c            | 17 +++++++++++++++++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h        | 12 ++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c      | 15 +++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h  | 10 ++++++++++
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h   |  3 ++-
 6 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index f9c438d16c565..6a1d31e8fdd1f 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -431,6 +431,7 @@ struct amd_pm_funcs {
 	int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
 	int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
 	ssize_t (*get_gpu_metrics)(void *handle, void **table);
+	ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size);
 	int (*set_watermarks_for_clock_ranges)(void *handle,
 					       struct pp_smu_wm_range_sets *ranges);
 	int (*display_disable_memory_clock_switch)(void *handle,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 1ae3b81548fa3..4717884a2bc4c 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1299,6 +1299,23 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table)
 	return ret;
 }
 
+ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics,
+				  size_t size)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	int ret = 0;
+
+	if (!pp_funcs->get_pm_metrics)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = pp_funcs->get_pm_metrics(adev->powerplay.pp_handle, pm_metrics,
+				       size);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
 				    uint32_t *fan_mode)
 {
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index feccd2a7120dc..9b3fef5474b11 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -511,6 +511,18 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device *adev,
 int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev,
 				      long *input, uint32_t size);
 int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table);
+
+/**
+ * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The
+ * sample is copied to pm_metrics buffer. It's expected to be allocated by the
+ * caller and size of the allocated buffer is passed. Max size expected for a
+ * metrics sample is 4096 bytes.
+ *
+ * Return: Actual size of the metrics sample
+ */
+ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics,
+				  size_t size);
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
 				    uint32_t *fan_mode);
 int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 37c2605f9b351..930e7e3532ad3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3189,6 +3189,20 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, void **table)
 	return smu->ppt_funcs->get_gpu_metrics(smu, table);
 }
 
+static ssize_t smu_sys_get_pm_metrics(void *handle, void *pm_metrics,
+				      size_t size)
+{
+	struct smu_context *smu = handle;
+
+	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+		return -EOPNOTSUPP;
+
+	if (!smu->ppt_funcs->get_pm_metrics)
+		return -EOPNOTSUPP;
+
+	return smu->ppt_funcs->get_pm_metrics(smu, pm_metrics, size);
+}
+
 static int smu_enable_mgpu_fan_boost(void *handle)
 {
 	struct smu_context *smu = handle;
@@ -3330,6 +3344,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
 	.set_df_cstate                    = smu_set_df_cstate,
 	.set_xgmi_pstate                  = smu_set_xgmi_pstate,
 	.get_gpu_metrics                  = smu_sys_get_gpu_metrics,
+	.get_pm_metrics                   = smu_sys_get_pm_metrics,
 	.set_watermarks_for_clock_ranges     = smu_set_watermarks_for_clock_ranges,
 	.display_disable_memory_clock_switch = smu_display_disable_memory_clock_switch,
 	.get_max_sustainable_clocks_by_dc    = smu_get_max_sustainable_clocks_by_dc,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 8def291b18bcd..32600ba74bf12 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -253,6 +253,7 @@ struct smu_table {
 	uint64_t mc_address;
 	void *cpu_addr;
 	struct amdgpu_bo *bo;
+	uint32_t version;
 };
 
 enum smu_perf_level_designation {
@@ -1252,6 +1253,15 @@ struct pptable_funcs {
 	 */
 	ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table);
 
+	/**
+	 * @get_pm_metrics: Get one snapshot of power management metrics from
+	 * PMFW.
+	 *
+	 * Return: Size of the metrics sample
+	 */
+	ssize_t (*get_pm_metrics)(struct smu_context *smu, void *pm_metrics,
+				  size_t size);
+
 	/**
 	 * @enable_mgpu_fan_boost: Enable multi-GPU fan boost.
 	 */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 9dd47d91093eb..ea8ea99b74368 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -259,7 +259,8 @@
 	__SMU_DUMMY_MAP(PowerUpUmsch),	\
 	__SMU_DUMMY_MAP(PowerDownUmsch),	\
 	__SMU_DUMMY_MAP(SetSoftMaxVpe),	\
-	__SMU_DUMMY_MAP(SetSoftMinVpe),
+	__SMU_DUMMY_MAP(SetSoftMinVpe), \
+	__SMU_DUMMY_MAP(GetMetricsVersion),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
-- 
GitLab


From f9a45b76a1883b081fbe15466b11d0264e85d372 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 29 Sep 2023 11:35:19 +0530
Subject: [PATCH 0540/2340] drm/amd/pm: Add pm metrics support to SMU v13.0.6

Add support to fetch PM metrics sample from SMU v13.0.6

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index f723a4190ee52..8ccb8354bb496 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -120,6 +120,7 @@ struct mca_ras_info {
 #define P2S_TABLE_ID_A 0x50325341
 #define P2S_TABLE_ID_X 0x50325358
 
+// clang-format off
 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
 	MSG_MAP(TestMessage,			     PPSMC_MSG_TestMessage,			0),
 	MSG_MAP(GetSmuVersion,			     PPSMC_MSG_GetSmuVersion,			1),
@@ -128,6 +129,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(DisableAllSmuFeatures,		     PPSMC_MSG_DisableAllSmuFeatures,		0),
 	MSG_MAP(RequestI2cTransaction,		     PPSMC_MSG_RequestI2cTransaction,		0),
 	MSG_MAP(GetMetricsTable,		     PPSMC_MSG_GetMetricsTable,			1),
+	MSG_MAP(GetMetricsVersion,		     PPSMC_MSG_GetMetricsVersion,		1),
 	MSG_MAP(GetEnabledSmuFeaturesHigh,	     PPSMC_MSG_GetEnabledSmuFeaturesHigh,	1),
 	MSG_MAP(GetEnabledSmuFeaturesLow,	     PPSMC_MSG_GetEnabledSmuFeaturesLow,	1),
 	MSG_MAP(SetDriverDramAddrHigh,		     PPSMC_MSG_SetDriverDramAddrHigh,		1),
@@ -171,6 +173,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(SelectPLPDMode,                      PPSMC_MSG_SelectPLPDMode,                  0),
 };
 
+// clang-format on
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
 	CLK_MAP(SOCCLK, PPCLK_SOCCLK),
 	CLK_MAP(FCLK, PPCLK_FCLK),
@@ -428,6 +431,41 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu,
 	return 0;
 }
 
+static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu,
+					  void *metrics, size_t max_size)
+{
+	struct smu_table_context *smu_tbl_ctxt = &smu->smu_table;
+	uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version;
+	uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size;
+	struct amdgpu_pm_metrics *pm_metrics = metrics;
+	uint32_t pmfw_version;
+	int ret;
+
+	if (!pm_metrics || !max_size)
+		return -EINVAL;
+
+	if (max_size < (table_size + sizeof(pm_metrics->common_header)))
+		return -EOVERFLOW;
+
+	/* Don't use cached metrics data */
+	ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true);
+	if (ret)
+		return ret;
+
+	smu_cmn_get_smc_version(smu, NULL, &pmfw_version);
+
+	memset(&pm_metrics->common_header, 0,
+	       sizeof(pm_metrics->common_header));
+	pm_metrics->common_header.mp1_ip_discovery_version =
+		IP_VERSION(13, 0, 6);
+	pm_metrics->common_header.pmfw_version = pmfw_version;
+	pm_metrics->common_header.pmmetrics_version = table_version;
+	pm_metrics->common_header.structure_size =
+		sizeof(pm_metrics->common_header) + table_size;
+
+	return pm_metrics->common_header.structure_size;
+}
+
 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
@@ -435,6 +473,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 	struct PPTable_t *pptable =
 		(struct PPTable_t *)smu_table->driver_pptable;
 	int ret, i, retry = 100;
+	uint32_t table_version;
 
 	/* Store one-time values in driver PPTable */
 	if (!pptable->Init) {
@@ -453,6 +492,13 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 		if (!retry)
 			return -ETIME;
 
+		ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion,
+					   &table_version);
+		if (ret)
+			return ret;
+		smu_table->tables[SMU_TABLE_SMU_METRICS].version =
+			table_version;
+
 		pptable->MaxSocketPowerLimit =
 			SMUQ10_ROUND(metrics->MaxSocketPowerLimit);
 		pptable->MaxGfxclkFrequency =
@@ -2864,6 +2910,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event,
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
+	.get_pm_metrics = smu_v13_0_6_get_pm_metrics,
 	.get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
 	.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
 	.mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
-- 
GitLab


From 223aad1be34e1169ee7210bce05726cc5ef1fd66 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 29 Sep 2023 11:56:49 +0530
Subject: [PATCH 0541/2340] drm/amd/pm: Add sysfs attribute to get pm metrics

Add sysfs attribute to read power management metrics. A snapshot is
captured to the buffer when the attribute is read.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 40 ++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index ca2ece24e1e07..e1497296afee2 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1799,6 +1799,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev,
 	return count;
 }
 
+static int amdgpu_pm_metrics_attr_update(struct amdgpu_device *adev,
+					 struct amdgpu_device_attr *attr,
+					 uint32_t mask,
+					 enum amdgpu_device_attr_states *states)
+{
+	if (amdgpu_dpm_get_pm_metrics(adev, NULL, 0) == -EOPNOTSUPP)
+		*states = ATTR_STATE_UNSUPPORTED;
+
+	return 0;
+}
+
+static ssize_t amdgpu_get_pm_metrics(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t size = 0;
+	int ret;
+
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+	if (adev->in_suspend && !adev->in_runpm)
+		return -EPERM;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(ddev->dev);
+		return ret;
+	}
+
+	size = amdgpu_dpm_get_pm_metrics(adev, buf, PAGE_SIZE);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
+}
+
 /**
  * DOC: gpu_metrics
  *
@@ -2096,6 +2134,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
 	AMDGPU_DEVICE_ATTR_RW(smartshift_bias,				ATTR_FLAG_BASIC,
 			      .attr_update = ss_bias_attr_update),
 	AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(pm_metrics,				ATTR_FLAG_BASIC,
+			      .attr_update = amdgpu_pm_metrics_attr_update),
 };
 
 static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
-- 
GitLab


From cee6de122461de699aaa7932b33466c6d259eabb Mon Sep 17 00:00:00 2001
From: Dennis Chan <dennis.chan@amd.com>
Date: Fri, 27 Oct 2023 11:00:36 +0800
Subject: [PATCH 0542/2340] drm/amd/display: Add new Replay command and
 Disabled Replay Timing Resync

[why]
To support dynamic switching for Replay timing sync mechanism.

Reviewed-by: ChunTao Tso <chuntao.tso@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Dennis Chan <dennis.chan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_types.h     | 10 ++++++++
 .../gpu/drm/amd/display/dc/dce/dmub_replay.h  |  3 +++
 drivers/gpu/drm/amd/display/dc/inc/link.h     |  3 +++
 .../drm/amd/display/dc/link/link_factory.c    |  1 +
 .../link/protocols/link_edp_panel_control.c   | 24 +++++++++++++++++++
 .../link/protocols/link_edp_panel_control.h   |  3 +++
 6 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index fcb825e4f1bb8..edf60c4f318cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1022,6 +1022,16 @@ enum replay_coasting_vtotal_type {
 	PR_COASTING_TYPE_NUM,
 };
 
+/*
+ * This is general Interface for Replay to
+ * set an 32 bit variable to dmub
+ * The Message_type indicates which variable
+ * passed to DMUB.
+ */
+enum replay_FW_Message_type {
+	Replay_Set_Timing_Sync_Supported,
+};
+
 union replay_error_status {
 	struct {
 		unsigned char STATE_TRANSITION_ERROR    :1;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index e8385bbf51fc2..427bc47a676e6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -45,6 +45,9 @@ struct dmub_replay_funcs {
 		struct replay_context *replay_context, uint8_t panel_inst);
 	void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
 		uint8_t panel_inst);
+	void (*replay_send_cmd)(struct dmub_replay *dmub,
+		enum replay_FW_Message_type msg, unsigned int panel_inst,
+		uint32_t cmd_data);
 	void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal,
 		uint8_t panel_inst);
 	void (*replay_residency)(struct dmub_replay *dmub,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h
index d7685368140ab..dd3f53151d8b8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link.h
@@ -281,6 +281,9 @@ struct link_service {
 			const unsigned int *power_opts);
 	bool (*edp_setup_replay)(struct dc_link *link,
 			const struct dc_stream_state *stream);
+	bool (*edp_send_replay_cmd)(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			uint32_t params);
 	bool (*edp_set_coasting_vtotal)(
 			struct dc_link *link, uint16_t coasting_vtotal);
 	bool (*edp_replay_residency)(const struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 7abfc67d10a62..6b306ea58b9bc 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -213,6 +213,7 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s
 	link_srv->edp_get_replay_state = edp_get_replay_state;
 	link_srv->edp_set_replay_allow_active = edp_set_replay_allow_active;
 	link_srv->edp_setup_replay = edp_setup_replay;
+	link_srv->edp_send_replay_cmd = edp_send_replay_cmd;
 	link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal;
 	link_srv->edp_replay_residency = edp_replay_residency;
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index e32a7974a4bc6..c52b51b2b4b38 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -1007,6 +1007,30 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 	return true;
 }
 
+/*
+ * This is general Interface for Replay to set an 32 bit variable to dmub
+ * replay_FW_Message_type: Indicates which instruction or variable pass to DMUB
+ * cmd_data: Value of the config.
+ */
+bool edp_send_replay_cmd(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			uint32_t cmd_data)
+{
+	struct dc *dc = link->ctx->dc;
+	struct dmub_replay *replay = dc->res_pool->replay;
+	unsigned int panel_inst;
+
+	if (!replay)
+		return false;
+
+	if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		return false;
+
+	replay->funcs->replay_send_cmd(replay, msg, cmd_data, panel_inst);
+
+	return true;
+}
+
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal)
 {
 	struct dc *dc = link->ctx->dc;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index ebf7deb63d136..6b223580ac8a7 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -57,6 +57,9 @@ bool edp_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
 	bool wait, bool force_static, const unsigned int *power_opts);
 bool edp_setup_replay(struct dc_link *link,
 		const struct dc_stream_state *stream);
+bool edp_send_replay_cmd(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			uint32_t params);
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal);
 bool edp_replay_residency(const struct dc_link *link,
 	unsigned int *residency, const bool is_start, const bool is_alpm);
-- 
GitLab


From 1c22d6ce53280763bcb4cb24d4f71111fff4a526 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Mon, 6 Nov 2023 11:20:15 -0500
Subject: [PATCH 0543/2340] drm/amd/display: Include udelay when waiting for
 INBOX0 ACK

When waiting for the ACK for INBOX0 message,
we have to ensure to include the udelay
for proper wait time

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 22fc4ba96defd..38360adc53d97 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -1077,6 +1077,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti
 		ack = dmub->hw_funcs.read_inbox0_ack_register(dmub);
 		if (ack)
 			return DMUB_STATUS_OK;
+		udelay(1);
 	}
 	return DMUB_STATUS_TIMEOUT;
 }
-- 
GitLab


From 3f3b08be58834339b00f28d19c20d684cdec704f Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Fri, 3 Nov 2023 10:01:01 -0400
Subject: [PATCH 0544/2340] drm/amd/display: Add z-state support policy for
 dcn35

[Why]
DML2 means that the dcn3x policy for calculating z-state support
no longer runs from validate_bandwidth.

This means we are unconditionally allowing Z8, the hardware default.

[How]
Port the policy over to DCN35, but with a few modifications:
- Don't use min_dst_y_next_start as a check for Z8/Z10 allow
- Add support for overriding the Z10 stutter period per ASIC
- Cleanup the code to make the policy assignment more clear

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h           |  1 +
 .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c  | 34 +++++++++++++++++++
 .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.h  |  2 ++
 .../dc/resource/dcn35/dcn35_resource.c        |  7 ++++
 4 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 541e781267b92..349629858205a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -874,6 +874,7 @@ struct dc_debug_options {
 	unsigned int seamless_boot_odm_combine;
 	unsigned int force_odm_combine_4to1; //bit vector based on otg inst
 	int minimum_z8_residency_time;
+	int minimum_z10_residency_time;
 	bool disable_z9_mpc;
 	unsigned int force_fclk_khz;
 	bool enable_tri_buf;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index a5fe523668e94..dee80429fc4c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -507,3 +507,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
 
 	return pipe_cnt;
 }
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+	enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+	unsigned int i, plane_count = 0;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			plane_count++;
+	}
+
+	if (plane_count == 0) {
+		support = DCN_ZSTATE_SUPPORT_ALLOW;
+	} else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+		struct dc_link *link = context->streams[0]->sink->link;
+		bool is_pwrseq0 = link && link->link_index == 0;
+		bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr;
+		int minmum_z8_residency =
+			dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+		bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+		int minmum_z10_residency =
+			dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000;
+		bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency;
+
+		if (is_pwrseq0 && allow_z10)
+			support = DCN_ZSTATE_SUPPORT_ALLOW;
+		else if (is_pwrseq0 && is_psr1)
+			support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+		else if (allow_z8)
+			support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+	}
+
+	context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
index e8d5a170893e3..067480fc36913 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
@@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
 					      display_e2e_pipe_params_st *pipes,
 					      bool fast_validate);
 
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 5c935d94a95cd..0d5a03c6d812a 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -1724,6 +1724,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc,
 
 	out = dml2_validate(dc, context, fast_validate);
 
+	if (fast_validate)
+		return out;
+
+	DC_FP_START();
+	dcn35_decide_zstate_support(dc, context);
+	DC_FP_END();
+
 	return out;
 }
 
-- 
GitLab


From a2d3c69261178df7d4c1350d5ef67375d399acd3 Mon Sep 17 00:00:00 2001
From: David Yat Sin <David.YatSin@amd.com>
Date: Fri, 17 Nov 2023 05:16:59 +0000
Subject: [PATCH 0545/2340] drm/amdkfd: Copy HW exception data to user event

Fixes issue where user events of type KFD_EVENT_TYPE_HW_EXCEPTION do not
have valid data

Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 0f58be65132fc..739721254a5df 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -880,6 +880,10 @@ static int copy_signaled_event_data(uint32_t num_events,
 				dst = &data[i].memory_exception_data;
 				src = &event->memory_exception_data;
 				size = sizeof(struct kfd_hsa_memory_exception_data);
+			} else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+				dst = &data[i].memory_exception_data;
+				src = &event->hw_exception_data;
+				size = sizeof(struct kfd_hsa_hw_exception_data);
 			} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
 				waiter->event_age_enabled) {
 				dst = &data[i].signal_event_data.last_event_age;
-- 
GitLab


From 35c425f5cc251417ad681475dc9901ab6d3244ea Mon Sep 17 00:00:00 2001
From: Jonathan Kim <jonathan.kim@amd.com>
Date: Thu, 16 Nov 2023 13:57:07 -0500
Subject: [PATCH 0546/2340] drm/amdgpu: update xgmi num links info post gc9.4.2

GC IP 9.4.2 and up support TA reporting of the number
of xGMI links between peers.

Tested-by: Vignesh Chander <vignesh.chander@amd.com>
Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Mukul Joshi <mukul.joshi@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 6ab17330a6ed2..2d22f7d45512d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -547,7 +547,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
 	struct amdgpu_device *adev = dst, *peer_adev;
 	int num_links;
 
-	if (adev->asic_type != CHIP_ALDEBARAN)
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))
 		return 0;
 
 	if (src)
-- 
GitLab


From 76c5d6900908439386b0045a6130150150079300 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Mon, 6 Nov 2023 17:29:33 -0500
Subject: [PATCH 0547/2340] drm/amd/display: Update DCN35 watermarks

[Why & How]
Update to the new values per HW team request. Affects both stutter
and z8.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  | 32 +++++++++----------
 .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c  |  8 ++---
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 507a7cf56711f..3469f692d6ea1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -443,32 +443,32 @@ static struct wm_table ddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 	}
@@ -480,32 +480,32 @@ static struct wm_table lpddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index dee80429fc4c8..30d78ad91b9cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -164,10 +164,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 		},
 	},
 	.num_states = 5,
-	.sr_exit_time_us = 9.0,
-	.sr_enter_plus_exit_time_us = 11.0,
-	.sr_exit_z8_time_us = 50.0, /*changed from 442.0*/
-	.sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/
+	.sr_exit_time_us = 14.0,
+	.sr_enter_plus_exit_time_us = 16.0,
+	.sr_exit_z8_time_us = 525.0,
+	.sr_enter_plus_exit_z8_time_us = 715.0,
 	.fclk_change_latency_us = 20.0,
 	.usr_retraining_latency_us = 2,
 	.writeback_latency_us = 12.0,
-- 
GitLab


From c4290449f8fbecc55013c6125b50908b5359a8fd Mon Sep 17 00:00:00 2001
From: Ian Chen <ian.chen@amd.com>
Date: Mon, 6 Nov 2023 15:46:19 +0800
Subject: [PATCH 0548/2340] drm/amd/display: add skip_implict_edp_power_control
 flag for dce110

If the link requests to skip implicit eDP power control, we should honor
that request.

Reviewed-by: Robin Chen <robin.chen@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Ian Chen <ian.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 960a55e06375b..0a331d17ee926 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -3115,7 +3115,8 @@ void dce110_disable_link_output(struct dc_link *link,
 	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	if (signal == SIGNAL_TYPE_EDP &&
-			link->dc->hwss.edp_backlight_control)
+			link->dc->hwss.edp_backlight_control &&
+			!link->skip_implict_edp_power_control)
 		link->dc->hwss.edp_backlight_control(link, false);
 	else if (dmcu != NULL && dmcu->funcs->lock_phy)
 		dmcu->funcs->lock_phy(dmcu);
-- 
GitLab


From 613a81995575889753ca44d70d33e84a1d21bae5 Mon Sep 17 00:00:00 2001
From: Wenjing Liu <wenjing.liu@amd.com>
Date: Mon, 6 Nov 2023 16:47:19 -0500
Subject: [PATCH 0549/2340] drm/amd/display: fix a pipe mapping error in
 dcn32_fpu

[why]
In dcn32 DML pipes are ordered the same as dc pipes but only for used
pipes. For example, if dc pipe 1 and 2 are used, their dml pipe indices
would be 0 and 1 respectively. However
update_pipe_slice_table_with_split_flags doesn't skip indices for free
pipes. This causes us to not reference correct dml pipe output when
building pipe topology.

[how]
Use two variables to iterate dc and dml pipes respectively and only
increment dml pipe index when current dc pipe is not free.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 9ec4172d1c2d8..44b0666e53b0b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1192,13 +1192,16 @@ static bool update_pipe_slice_table_with_split_flags(
 	 */
 	struct pipe_ctx *pipe;
 	bool odm;
-	int i;
+	int dc_pipe_idx, dml_pipe_idx = 0;
 	bool updated = false;
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		pipe = &context->res_ctx.pipe_ctx[i];
+	for (dc_pipe_idx = 0;
+			dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) {
+		pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
+		if (resource_is_pipe_type(pipe, FREE_PIPE))
+			continue;
 
-		if (merge[i]) {
+		if (merge[dc_pipe_idx]) {
 			if (resource_is_pipe_type(pipe, OPP_HEAD))
 				/* merging OPP head means reducing ODM slice
 				 * count by 1
@@ -1213,17 +1216,18 @@ static bool update_pipe_slice_table_with_split_flags(
 			updated = true;
 		}
 
-		if (split[i]) {
-			odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] !=
+		if (split[dc_pipe_idx]) {
+			odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] !=
 					dm_odm_combine_mode_disabled;
 			if (odm && resource_is_pipe_type(pipe, OPP_HEAD))
 				update_slice_table_for_stream(
-						table, pipe->stream, split[i] - 1);
+						table, pipe->stream, split[dc_pipe_idx] - 1);
 			else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE))
 				update_slice_table_for_plane(table, pipe,
-						pipe->plane_state, split[i] - 1);
+						pipe->plane_state, split[dc_pipe_idx] - 1);
 			updated = true;
 		}
+		dml_pipe_idx++;
 	}
 	return updated;
 }
-- 
GitLab


From 702e2fb579e000382c219c58dacef4f733511a36 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Mon, 20 Nov 2023 11:04:45 +0800
Subject: [PATCH 0550/2340] drm/amdgpu: Retire query/reset_ras_err_status from
 gfx_v9_4_3

Not needed anymore.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 156 ------------------------
 1 file changed, 156 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 40d06d32bb745..21865668b7877 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3882,150 +3882,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
-static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t data;
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
-	}
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
-	}
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-				regVML2_WALKER_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS,
-				0x3);
-	}
-}
-
-static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev,
-					uint32_t status, int xcc_id)
-{
-	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
-	uint32_t i, simd, wave;
-	uint32_t wave_status;
-	uint32_t wave_pc_lo, wave_pc_hi;
-	uint32_t wave_exec_lo, wave_exec_hi;
-	uint32_t wave_inst_dw0, wave_inst_dw1;
-	uint32_t wave_ib_sts;
-
-	for (i = 0; i < 32; i++) {
-		if (!((i << 1) & status))
-			continue;
-
-		simd = i / cu_info->max_waves_per_simd;
-		wave = i % cu_info->max_waves_per_simd;
-
-		wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
-		wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
-		wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
-		wave_exec_lo =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
-		wave_exec_hi =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
-		wave_inst_dw0 =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
-		wave_inst_dw1 =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
-		wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
-
-		dev_info(
-			adev->dev,
-			"\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
-			simd, wave, wave_status,
-			((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
-			((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
-			((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
-			wave_ib_sts);
-	}
-}
-
-static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t se_idx, sh_idx, cu_idx;
-	uint32_t status;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
-		for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
-			for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
-				gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
-							cu_idx, xcc_id);
-				status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						      regSQ_TIMEOUT_STATUS);
-				if (status != 0) {
-					dev_info(
-						adev->dev,
-						"GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
-						se_idx, sh_idx, cu_idx);
-					gfx_v9_4_3_log_cu_timeout_status(
-						adev, status, xcc_id);
-				}
-				/* clear old status */
-				WREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						regSQ_TIMEOUT_STATUS, 0);
-			}
-		}
-	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-			xcc_id);
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
-					void *ras_error_status, int xcc_id)
-{
-	gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
-	gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
-}
-
-static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
-}
-
-static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t se_idx, sh_idx, cu_idx;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
-		for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
-			for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
-				gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
-							cu_idx, xcc_id);
-				WREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						regSQ_TIMEOUT_STATUS, 0);
-			}
-		}
-	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-			xcc_id);
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
-					void *ras_error_status, int xcc_id)
-{
-	gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
-	gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
-}
-
 static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
 					void *ras_error_status, int xcc_id)
 {
@@ -4067,16 +3923,6 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
 }
 
-static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
-{
-	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
-}
-
-static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
-{
-	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
-}
-
 static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
 {
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
@@ -4394,8 +4240,6 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
 struct amdgpu_ras_block_hw_ops  gfx_v9_4_3_ras_ops = {
 	.query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
 	.reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
-	.query_ras_error_status = &gfx_v9_4_3_query_ras_error_status,
-	.reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status,
 };
 
 struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
-- 
GitLab


From 2e9b152325f649923b9324fa8ea5f1a5289145bb Mon Sep 17 00:00:00 2001
From: Perry Yuan <perry.yuan@amd.com>
Date: Tue, 1 Aug 2023 10:37:41 -0400
Subject: [PATCH 0551/2340] drm/amdgpu: optimize RLC powerdown notification on
 Vangogh

The smu needs to get the rlc power down message to sync the rlc state
with smu, the rlc state updating message need to be sent at while smu
begin suspend sequence , otherwise SMU will crash while RLC state is not
notified by driver, and rlc state probally changed after that
notification, so it needs to notify rlc state to smu at the end of the
suspend sequence in amdgpu_device_suspend() that can make sure the rlc
state  is correctly set to SMU.

[  101.000590] amdgpu 0000:03:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x0000001E SMN_C2PMSG_82:0x00000000
[  101.000598] amdgpu 0000:03:00.0: amdgpu: Failed to disable gfxoff!
[  110.838026] amdgpu 0000:03:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x0000001E SMN_C2PMSG_82:0x00000000
[  110.838035] amdgpu 0000:03:00.0: amdgpu: Failed to disable smu features.
[  110.838039] amdgpu 0000:03:00.0: amdgpu: Fail to disable dpm features!
[  110.838040] [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block <smu> failed -62
[  110.884394] PM: suspend of devices aborted after 21213.620 msecs
[  110.884402] PM: start suspend of devices aborted after 21213.882 msecs
[  110.884405] PM: Some devices failed to suspend, or early wake event detected

Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c     |  4 ++++
 drivers/gpu/drm/amd/include/kgd_pp_interface.h |  1 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c            | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h        |  2 ++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c      | 10 ++++++++++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h  |  5 +++++
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c   |  5 ++---
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h    |  1 +
 8 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b5edf40b5d03f..5d3a83193115f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4549,6 +4549,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_release_full_gpu(adev, false);
 
+	r = amdgpu_dpm_notify_rlc_state(adev, false);
+	if (r)
+		return r;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 6a1d31e8fdd1f..8ebba87f42891 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -445,6 +445,7 @@ struct amd_pm_funcs {
 				   struct dpm_clocks *clock_table);
 	int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size);
 	void (*pm_compute_clocks)(void *handle);
+	int (*notify_rlc_state)(void *handle, bool en);
 };
 
 struct metrics_table_header {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 4717884a2bc4c..97b40c5fa1ff6 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -181,6 +181,24 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en)
+{
+	int ret = 0;
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+
+	if (pp_funcs && pp_funcs->notify_rlc_state) {
+		mutex_lock(&adev->pm.mutex);
+
+		ret = pp_funcs->notify_rlc_state(
+				adev->powerplay.pp_handle,
+				en);
+
+		mutex_unlock(&adev->pm.mutex);
+	}
+
+	return ret;
+}
+
 bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 9b3fef5474b11..d76b0a60db443 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -415,6 +415,8 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev);
 int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 			     enum pp_mp1_state mp1_state);
 
+int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en);
+
 int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev);
 
 int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 930e7e3532ad3..f464610a959f6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1710,6 +1710,16 @@ static int smu_disable_dpms(struct smu_context *smu)
 		}
 	}
 
+	/* Notify SMU RLC is going to be off, stop RLC and SMU interaction.
+	 * otherwise SMU will hang while interacting with RLC if RLC is halted
+	 * this is a WA for Vangogh asic which fix the SMU hang issue.
+	 */
+	ret = smu_notify_rlc_state(smu, false);
+	if (ret) {
+		dev_err(adev->dev, "Fail to notify rlc status!\n");
+		return ret;
+	}
+
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) &&
 	    !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) &&
 	    !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 32600ba74bf12..c125253df20b8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1370,6 +1370,11 @@ struct pptable_funcs {
 	 *                       management.
 	 */
 	int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable);
+
+	/**
+	 * @notify_rlc_state: Notify RLC power state to SMU.
+	 */
+	int (*notify_rlc_state)(struct smu_context *smu, bool en);
 };
 
 typedef enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 762b31455a0b6..2ff6deedef955 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -2193,8 +2193,7 @@ static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clock
 	return 0;
 }
 
-
-static int vangogh_system_features_control(struct smu_context *smu, bool en)
+static int vangogh_notify_rlc_state(struct smu_context *smu, bool en)
 {
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
@@ -2523,7 +2522,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
 	.print_clk_levels = vangogh_common_print_clk_levels,
 	.set_default_dpm_table = vangogh_set_default_dpm_tables,
 	.set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters,
-	.system_features_control = vangogh_system_features_control,
+	.notify_rlc_state = vangogh_notify_rlc_state,
 	.feature_is_enabled = smu_cmn_feature_is_enabled,
 	.set_power_profile_mode = vangogh_set_power_profile_mode,
 	.get_power_profile_mode = vangogh_get_power_profile_mode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 80b3c3efc006d..64766ac69c53b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -97,6 +97,7 @@
 #define smu_get_default_config_table_settings(smu, config_table)	smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table)
 #define smu_set_config_table(smu, config_table)				smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table)
 #define smu_init_pptable_microcode(smu)					smu_ppt_funcs(init_pptable_microcode, 0, smu)
+#define smu_notify_rlc_state(smu, en)					smu_ppt_funcs(notify_rlc_state, 0, smu, en)
 
 #endif
 #endif
-- 
GitLab


From cfab803884f426b36b58dbe1f86f99742767c208 Mon Sep 17 00:00:00 2001
From: Wenjing Liu <wenjing.liu@amd.com>
Date: Thu, 2 Nov 2023 14:59:13 -0400
Subject: [PATCH 0552/2340] drm/amd/display: update pixel clock params after
 stream slice count change in context

[why]
When ODM slice count is changed, otg master pipe's pixel clock params is
no longer valid as the value is dependent on ODM slice count.

Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/core/dc_resource.c    |  9 ++++++---
 .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c |  6 +-----
 drivers/gpu/drm/amd/display/dc/inc/core_types.h  |  1 +
 .../display/dc/resource/dcn20/dcn20_resource.c   | 16 ++++++++++------
 .../display/dc/resource/dcn20/dcn20_resource.h   |  1 +
 .../display/dc/resource/dcn32/dcn32_resource.c   |  1 +
 .../display/dc/resource/dcn321/dcn321_resource.c |  1 +
 7 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 42a9277107430..84d632700949e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2256,7 +2256,7 @@ static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine(
 }
 
 static bool update_pipe_params_after_odm_slice_count_change(
-		const struct dc_stream_state *stream,
+		struct pipe_ctx *otg_master,
 		struct dc_state *context,
 		const struct resource_pool *pool)
 {
@@ -2266,9 +2266,12 @@ static bool update_pipe_params_after_odm_slice_count_change(
 
 	for (i = 0; i < pool->pipe_count && result; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
-		if (pipe->stream == stream && pipe->plane_state)
+		if (pipe->stream == otg_master->stream && pipe->plane_state)
 			result = resource_build_scaling_params(pipe);
 	}
+
+	if (pool->funcs->build_pipe_pix_clk_params)
+		pool->funcs->build_pipe_pix_clk_params(otg_master);
 	return result;
 }
 
@@ -2951,7 +2954,7 @@ bool resource_update_pipes_for_stream_with_slice_count(
 					otg_master, new_ctx, pool);
 	if (result)
 		result = update_pipe_params_after_odm_slice_count_change(
-				otg_master->stream, new_ctx, pool);
+				otg_master, new_ctx, pool);
 	return result;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 44b0666e53b0b..e7f13e28caa38 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1237,15 +1237,11 @@ static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *contex
 {
 	int i;
 
-	for (i = 0; i < table->odm_combine_count; i++) {
+	for (i = 0; i < table->odm_combine_count; i++)
 		resource_update_pipes_for_stream_with_slice_count(context,
 				dc->current_state, dc->res_pool,
 				table->odm_combines[i].stream,
 				table->odm_combines[i].slice_count);
-		/* TODO: move this into the function above */
-		dcn20_build_mapped_resource(dc, context,
-				table->odm_combines[i].stream);
-	}
 
 	for (i = 0; i < table->mpc_combine_count; i++)
 		resource_update_pipes_for_plane_with_slice_count(context,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index bac1420b1de84..10397d4dfb075 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -205,6 +205,7 @@ struct resource_funcs {
 	void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
 	void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
 	void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
+	void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx);
 };
 
 struct audio_support{
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index f04bb5b1471dd..f9c5bc624be30 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -1273,15 +1273,19 @@ static void build_clamping_params(struct dc_stream_state *stream)
 	stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
 }
 
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
 {
-
 	get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
 	pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
-		pipe_ctx->clock_source,
-		&pipe_ctx->stream_res.pix_clk_params,
-		&pipe_ctx->pll_settings);
+			pipe_ctx->clock_source,
+			&pipe_ctx->stream_res.pix_clk_params,
+			&pipe_ctx->pll_settings);
+}
+
+static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+{
+
+	dcn20_build_pipe_pix_clk_params(pipe_ctx);
 
 	pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
index 37ecaccc5d128..4cee3fa11a7ff 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
@@ -165,6 +165,7 @@ enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx,
 enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state);
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx);
 
 #endif /* __DC_RESOURCE_DCN20_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 36e4c7bef403c..f6cbcc9b40069 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -2041,6 +2041,7 @@ static struct resource_funcs dcn32_res_pool_funcs = {
 	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
 	.save_mall_state = dcn32_save_mall_state,
 	.restore_mall_state = dcn32_restore_mall_state,
+	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index bedb70b98162b..12986fe0b2892 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1609,6 +1609,7 @@ static struct resource_funcs dcn321_res_pool_funcs = {
 	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
 	.save_mall_state = dcn32_save_mall_state,
 	.restore_mall_state = dcn32_restore_mall_state,
+	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
-- 
GitLab


From 1290183db494641772c18d063c34e9c8f720c61c Mon Sep 17 00:00:00 2001
From: Wenjing Liu <wenjing.liu@amd.com>
Date: Thu, 2 Nov 2023 15:02:42 -0400
Subject: [PATCH 0553/2340] drm/amd/display: always use mpc factor of 2 for
 stereo timings

[why]
In the new pipe resource management logic, the special handling for
stereo timings is missing.
This commit implements the same stereo timings handling as old
pipe resource management code.

Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/dml2/dml2_dc_resource_mgmt.c   | 26 +++++++++++++------
 .../gpu/drm/amd/display/dc/dml2/dml2_utils.c  |  2 +-
 .../gpu/drm/amd/display/dc/dml2/dml2_utils.h  |  2 +-
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index 1a2b24cc6b61d..0baf39d64a2d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -772,18 +772,29 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx,
 		const struct dc_state *state,
 		const struct dml_display_cfg_st *disp_cfg,
 		struct dml2_dml_to_dc_pipe_mapping *mapping,
-		const struct dc_stream_status *status, unsigned int stream_id,
+		const struct dc_stream_status *status,
+		const struct dc_stream_state *stream,
 		int plane_idx)
 {
 	unsigned int plane_id;
 	unsigned int cfg_idx;
+	unsigned int mpc_factor;
 
-	get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id);
+	get_plane_id(ctx, state, status->plane_states[plane_idx],
+			stream->stream_id, plane_idx, &plane_id);
 	cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
-	if (ctx->architecture == dml2_architecture_20)
-		return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
-	ASSERT(false);
-	return 1;
+	if (ctx->architecture == dml2_architecture_20) {
+		mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
+	} else {
+		mpc_factor = 1;
+		ASSERT(false);
+	}
+
+	/* For stereo timings, we need to pipe split */
+	if (dml2_is_stereo_timing(stream))
+		mpc_factor = 2;
+
+	return mpc_factor;
 }
 
 static unsigned int get_odm_factor(
@@ -820,14 +831,13 @@ static void populate_mpc_factors_for_stream(
 		unsigned int mpc_factors[MAX_PIPES])
 {
 	const struct dc_stream_status *status = &state->stream_status[stream_idx];
-	unsigned int stream_id = state->streams[stream_idx]->stream_id;
 	int i;
 
 	for (i = 0; i < status->plane_count; i++)
 		if (odm_factor == 1)
 			mpc_factors[i] = get_mpc_factor(
 					ctx, state, disp_cfg, mapping, status,
-					stream_id, i);
+					state->streams[stream_idx], i);
 		else
 			mpc_factors[i] = 1;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 2498b8341199b..33eab80e89a84 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -468,7 +468,7 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc
 	return need_recalculation;
 }
 
-bool dml2_is_stereo_timing(struct dc_stream_state *stream)
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream)
 {
 	bool is_stereo = false;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
index 23b9028337d43..5842d6d3c4b60 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
@@ -42,7 +42,7 @@ void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_st
 void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx);
 int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
 bool is_dtbclk_required(const struct dc *dc, struct dc_state *context);
-bool dml2_is_stereo_timing(struct dc_stream_state *stream);
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream);
 
 /*
  * dml2_dc_construct_pipes - This function will determine if we need additional pipes based
-- 
GitLab


From 80061d6b58a99f1fffb97a7f3592234a5fe0a3fe Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Tue, 7 Nov 2023 11:12:45 -0500
Subject: [PATCH 0554/2340] drm/amd/display: Add Z8 watermarks for DML2 bbox
 overrides

[Why]
We can override SR watermarks but not Z8 ones.

[How]
Add new parameters for Z8 matching the SR ones and feed them into the
states.

These also weren't being applied to every state, so make sure that
we loop over and update all SOC states if given an override.

Reviewed-by: Jun Lei <jun.lei@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/dml2/dml2_translation_helper.c | 47 +++++++++++++------
 .../drm/amd/display/dc/dml2/dml2_wrapper.h    |  2 +
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 75171bee6f716..2b9638c6d9b00 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -341,25 +341,42 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		break;
 	}
 
-	/* Override from passed values, mainly for debugging purposes, if available */
-	if (dml2->config.bbox_overrides.sr_exit_latency_us) {
-		p->in_states->state_array[0].sr_exit_time_us = dml2->config.bbox_overrides.sr_exit_latency_us;
-	}
+	/* Override from passed values, if available */
+	for (i = 0; i < p->in_states->num_states; i++) {
+		if (dml2->config.bbox_overrides.sr_exit_latency_us) {
+			p->in_states->state_array[i].sr_exit_time_us =
+				dml2->config.bbox_overrides.sr_exit_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
-		p->in_states->state_array[0].sr_enter_plus_exit_time_us = dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
+			p->in_states->state_array[i].sr_enter_plus_exit_time_us =
+				dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.urgent_latency_us) {
-		p->in_states->state_array[0].urgent_latency_pixel_data_only_us = dml2->config.bbox_overrides.urgent_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_exit_z8_time_us) {
+			p->in_states->state_array[i].sr_exit_z8_time_us =
+				dml2->config.bbox_overrides.sr_exit_z8_time_us;
+		}
 
-	if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
-		p->in_states->state_array[0].dram_clock_change_latency_us = dml2->config.bbox_overrides.dram_clock_change_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) {
+			p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us =
+				dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us;
+		}
+
+		if (dml2->config.bbox_overrides.urgent_latency_us) {
+			p->in_states->state_array[i].urgent_latency_pixel_data_only_us =
+				dml2->config.bbox_overrides.urgent_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.fclk_change_latency_us) {
-		p->in_states->state_array[0].fclk_change_latency_us = dml2->config.bbox_overrides.fclk_change_latency_us;
+		if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
+			p->in_states->state_array[i].dram_clock_change_latency_us =
+				dml2->config.bbox_overrides.dram_clock_change_latency_us;
+		}
+
+		if (dml2->config.bbox_overrides.fclk_change_latency_us) {
+			p->in_states->state_array[i].fclk_change_latency_us =
+				dml2->config.bbox_overrides.fclk_change_latency_us;
+		}
 	}
 
 	/* DCFCLK stas values are project specific */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 317f90776d972..fe15baa4bf094 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -139,6 +139,8 @@ struct dml2_soc_bbox_overrides {
 	double urgent_latency_us;
 	double sr_exit_latency_us;
 	double sr_enter_plus_exit_latency_us;
+	double sr_exit_z8_time_us;
+	double sr_enter_plus_exit_z8_time_us;
 	double dram_clock_change_latency_us;
 	double fclk_change_latency_us;
 	unsigned int dram_num_chan;
-- 
GitLab


From 4b8251e019ea17037667e6d61aa5e66d5b4f51d2 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Mon, 20 Nov 2023 10:14:21 +0800
Subject: [PATCH 0555/2340] drm/amdgpu: Do not issue gpu reset from nbio v7_9
 bif interrupt

In nbio v7_9, host driver should not issu gpu reset

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 23f26f8caad4c..25a3da83e0fb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -611,11 +611,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 		dev_info(adev->dev, "RAS controller interrupt triggered "
 					"by NBIF error\n");
-
-		/* ras_controller_int is dedicated for nbif ras error,
-		 * not the global interrupt for sync flood
-		 */
-		amdgpu_ras_reset_gpu(adev);
 	}
 
 	amdgpu_ras_error_data_fini(&err_data);
-- 
GitLab


From 20b07b0cb3a0a2fb3a6daf00f645925be77ec80c Mon Sep 17 00:00:00 2001
From: Alex Sierra <alex.sierra@amd.com>
Date: Mon, 20 Nov 2023 11:31:32 -0600
Subject: [PATCH 0556/2340] drm/amdgpu: Force order between a read and write to
 the same address

Setting register to force ordering to prevent read/write or write/read
hazards for un-cached modes.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c                    | 8 ++++++++
 .../gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h    | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0c6133cc5e578..40ce123231640 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -89,6 +89,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
 
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
 {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
@@ -304,6 +308,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 	default:
 		break;
 	}
+	soc15_program_register_sequence(adev,
+					golden_settings_gc_11_0,
+					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
 }
 
 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
index c92c4b83253f8..4bff1ef8a9a64 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
@@ -6369,6 +6369,8 @@
 #define regTCP_INVALIDATE_BASE_IDX                                                                      1
 #define regTCP_STATUS                                                                                   0x19a1
 #define regTCP_STATUS_BASE_IDX                                                                          1
+#define regTCP_CNTL                                                                                     0x19a2
+#define regTCP_CNTL_BASE_IDX                                                                            1
 #define regTCP_CNTL2                                                                                    0x19a3
 #define regTCP_CNTL2_BASE_IDX                                                                           1
 #define regTCP_DEBUG_INDEX                                                                              0x19a5
-- 
GitLab


From 251027968a7230f18c353e25634cc7e25d9ab953 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Tue, 7 Nov 2023 11:15:16 -0500
Subject: [PATCH 0557/2340] drm/amd/display: Feed SR and Z8 watermarks into
 DML2 for DCN35

[Why]
We've updated the table but the values aren't being reflected in DML2
calculation.

[How]
Pass them into the bbox overrides.

Reviewed-by: Jun Lei <jun.lei@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 30d78ad91b9cb..21c17d3296a3c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -329,6 +329,15 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 	/*temp till dml2 fully work without dml1*/
 	dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
 				DML_PROJECT_DCN31);
+
+	/* Update latency values */
+	dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us;
+
+	dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us;
+	dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us;
+
+	dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us;
+	dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us;
 }
 
 static bool is_dual_plane(enum surface_pixel_format format)
-- 
GitLab


From a953cd8cac6be69fba0b66e6fb46d1324d797af4 Mon Sep 17 00:00:00 2001
From: Ilya Bakoulin <ilya.bakoulin@amd.com>
Date: Tue, 7 Nov 2023 15:07:56 -0500
Subject: [PATCH 0558/2340] drm/amd/display: Fix MPCC 1DLUT programming

[Why]
Wrong function is used to translate LUT values to HW format, leading to
visible artifacting in some cases.

[How]
Use the correct cm3_helper function.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Krunoslav Kovac <krunoslav.kovac@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Ilya Bakoulin <ilya.bakoulin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 6a65af8c36b90..5f7f474ef51c9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -487,8 +487,7 @@ bool dcn32_set_mcm_luts(
 		if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
 			lut_params = &plane_state->blend_tf->pwl;
 		else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
-			cm_helper_translate_curve_to_hw_format(plane_state->ctx,
-					plane_state->blend_tf,
+			cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf,
 					&dpp_base->regamma_params, false);
 			lut_params = &dpp_base->regamma_params;
 		}
@@ -503,8 +502,7 @@ bool dcn32_set_mcm_luts(
 		else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
 			// TODO: dpp_base replace
 			ASSERT(false);
-			cm_helper_translate_curve_to_hw_format(plane_state->ctx,
-					plane_state->in_shaper_func,
+			cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func,
 					&dpp_base->shaper_params, true);
 			lut_params = &dpp_base->shaper_params;
 		}
-- 
GitLab


From 40436ce7ccfec5c616e2e48d0ec2c905637c7397 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Tue, 7 Nov 2023 17:01:49 -0500
Subject: [PATCH 0559/2340] drm/amd/display: Use DRAM speed from validation for
 dummy p-state

[Description]
When choosing which dummy p-state latency to use, we
need to use the DRAM speed from validation. The DRAMSpeed
DML variable can change because we use different input
params to DML when populating watermarks set B.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index e7f13e28caa38..92e2ddc9ab7e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2231,6 +2231,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	int i, pipe_idx, vlevel_temp = 0;
 	double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
 	double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+	double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed;
 	double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation;
 	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
 			dm_dram_clock_change_unsupported;
@@ -2418,7 +2419,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	}
 
 	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
-		min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+		min_dram_speed_mts = dram_speed_from_validation;
 		min_dram_speed_mts_margin = 160;
 
 		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
-- 
GitLab


From 6c22fb07e0c2935d97a86509f16f755ab895f2c8 Mon Sep 17 00:00:00 2001
From: Bhuvana Chandra Pinninti <bhuvanachandra.pinninti@amd.com>
Date: Wed, 18 Oct 2023 19:16:17 +0530
Subject: [PATCH 0560/2340] drm/amd/display: Refactor DSC into component folder

[why]

To refactor DSC and make DSC files unit testable.

[how]

moved the dcnxx_dsc.c and .h files
into corresponding dcn folders inside
the dsc and cleared the linkage errors.

Reviewed-by: Wenjing Liu <wenjing.liu@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Bhuvana Chandra Pinninti <bhuvanachandra.pinninti@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/Makefile          |  1 +
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile |  2 --
 drivers/gpu/drm/amd/display/dc/dcn35/Makefile |  2 +-
 drivers/gpu/drm/amd/display/dc/dsc/Makefile   | 26 +++++++++++++++++++
 .../display/dc/{ => dsc}/dcn20/dcn20_dsc.c    |  0
 .../display/dc/{ => dsc}/dcn20/dcn20_dsc.h    |  0
 .../display/dc/{ => dsc}/dcn35/dcn35_dsc.c    |  0
 .../display/dc/{ => dsc}/dcn35/dcn35_dsc.h    |  0
 .../drm/amd/display/dc/{inc/hw => dsc}/dsc.h  |  0
 9 files changed, 28 insertions(+), 3 deletions(-)
 rename drivers/gpu/drm/amd/display/dc/{ => dsc}/dcn20/dcn20_dsc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => dsc}/dcn20/dcn20_dsc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => dsc}/dcn35/dcn35_dsc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => dsc}/dcn35/dcn35_dsc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{inc/hw => dsc}/dsc.h (100%)

diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index 71192fc81a20e..7b0959da2cacc 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -31,6 +31,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index 1cac1eca8111e..93ac45802e444 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -7,8 +7,6 @@ DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
 
-DCN20 += dcn20_dsc.o
-
 AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN20)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
index 719afb5a3b127..85a307babab9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
@@ -12,7 +12,7 @@
 
 DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \
 	dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \
-	dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \
+	dcn35_hubp.o dcn35_hubbub.o \
 	dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o
 
 AMD_DAL_DCN35 = $(addprefix $(AMDDALPATH)/dc/dcn35/,$(DCN35))
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index a2537229ee88b..b183ba5a692ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,8 +1,34 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for the 'dsc' sub-component of DAL.
+
+ifdef CONFIG_DRM_AMD_DC_FP
+
+###############################################################################
+# DCN20
+###############################################################################
+DSC_DCN20 = dcn20_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn20/,$(DSC_DCN20))
+
+
+
+
+###############################################################################
+# DCN35
+###############################################################################
+
+DSC_DCN35 = dcn35_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn35/,$(DSC_DCN35))
+
+
+
+endif
+
 DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
 
 AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DSC)
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
rename to drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
rename to drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c
rename to drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h
rename to drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
rename to drivers/gpu/drm/amd/display/dc/dsc/dsc.h
-- 
GitLab


From c77b0008591094d454c1f340d1e82b5ebe2d918d Mon Sep 17 00:00:00 2001
From: Max Tseng <max.tseng@amd.com>
Date: Tue, 7 Nov 2023 15:00:03 +0800
Subject: [PATCH 0561/2340] drm/amd/display: replay: generalize the send
 command function usage

Augment the function to allow send different format data in different
use case.

Reviewed-by: Dennis Chan <dennis.chan@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Max Tseng <max.tseng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_types.h              |  1 +
 drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h       |  3 +--
 drivers/gpu/drm/amd/display/dc/inc/link.h              |  2 +-
 .../display/dc/link/protocols/link_edp_panel_control.c |  4 ++--
 .../display/dc/link/protocols/link_edp_panel_control.h |  2 +-
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h        | 10 ++++++++++
 6 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index edf60c4f318cc..6f5da510e8de9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1029,6 +1029,7 @@ enum replay_coasting_vtotal_type {
  * passed to DMUB.
  */
 enum replay_FW_Message_type {
+	Replay_Msg_Not_Support = -1,
 	Replay_Set_Timing_Sync_Supported,
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index 427bc47a676e6..368711f763350 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -46,8 +46,7 @@ struct dmub_replay_funcs {
 	void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
 		uint8_t panel_inst);
 	void (*replay_send_cmd)(struct dmub_replay *dmub,
-		enum replay_FW_Message_type msg, unsigned int panel_inst,
-		uint32_t cmd_data);
+		enum replay_FW_Message_type msg, unsigned int panel_inst, union dmub_replay_cmd_set *cmd_element);
 	void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal,
 		uint8_t panel_inst);
 	void (*replay_residency)(struct dmub_replay *dmub,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h
index dd3f53151d8b8..7439865d1b50c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link.h
@@ -283,7 +283,7 @@ struct link_service {
 			const struct dc_stream_state *stream);
 	bool (*edp_send_replay_cmd)(struct dc_link *link,
 			enum replay_FW_Message_type msg,
-			uint32_t params);
+			union dmub_replay_cmd_set *cmd_data);
 	bool (*edp_set_coasting_vtotal)(
 			struct dc_link *link, uint16_t coasting_vtotal);
 	bool (*edp_replay_residency)(const struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index c52b51b2b4b38..6bc8ec47e267f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -1014,7 +1014,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
  */
 bool edp_send_replay_cmd(struct dc_link *link,
 			enum replay_FW_Message_type msg,
-			uint32_t cmd_data)
+			union dmub_replay_cmd_set *cmd_data)
 {
 	struct dc *dc = link->ctx->dc;
 	struct dmub_replay *replay = dc->res_pool->replay;
@@ -1026,7 +1026,7 @@ bool edp_send_replay_cmd(struct dc_link *link,
 	if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
 		return false;
 
-	replay->funcs->replay_send_cmd(replay, msg, cmd_data, panel_inst);
+	replay->funcs->replay_send_cmd(replay, msg, panel_inst, cmd_data);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index 6b223580ac8a7..39526bd401782 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -59,7 +59,7 @@ bool edp_setup_replay(struct dc_link *link,
 		const struct dc_stream_state *stream);
 bool edp_send_replay_cmd(struct dc_link *link,
 			enum replay_FW_Message_type msg,
-			uint32_t params);
+			union dmub_replay_cmd_set *cmd_data);
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal);
 bool edp_replay_residency(const struct dc_link *link,
 	unsigned int *residency, const bool is_start, const bool is_alpm);
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 55573083bc31c..5f06cf4c663f9 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -3143,6 +3143,16 @@ struct dmub_rb_cmd_replay_set_timing_sync {
 	struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data;
 };
 
+/**
+ * Definition union of replay command set
+ */
+union dmub_replay_cmd_set {
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data.
+	 */
+	struct dmub_cmd_replay_set_timing_sync_data sync_data;
+};
+
 /**
  * Set of HW components that can be locked.
  *
-- 
GitLab


From 220db802cb505e6ec3b3e0018ac0233205632a72 Mon Sep 17 00:00:00 2001
From: Michael Strauss <michael.strauss@amd.com>
Date: Fri, 27 Oct 2023 14:12:51 -0400
Subject: [PATCH 0562/2340] drm/amd/display: Do not read DPREFCLK spread info
 from LUT on DCN35

[WHY]
Currently DCN35 does not spread DPREFCLK

[HOW]
Remove hardcoded table with nonzero caps

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Michael Strauss <michael.strauss@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 3469f692d6ea1..0f3f6a9d51449 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -515,11 +515,6 @@ static DpmClocks_t_dcn35 dummy_clocks;
 
 static struct dcn35_watermarks dummy_wms = { 0 };
 
-static struct dcn35_ss_info_table ss_info_table = {
-	.ss_divider = 1000,
-	.ss_percentage = {0, 0, 375, 375, 375}
-};
-
 static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
 {
 	int i, num_valid_sets;
@@ -965,21 +960,6 @@ struct clk_mgr_funcs dcn35_fpga_funcs = {
 	.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
 };
 
-static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
-{
-	uint32_t clock_source;
-	struct dc_context *ctx = clk_mgr->base.ctx;
-
-	REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
-
-	clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
-
-	if (clk_mgr->dprefclk_ss_percentage != 0) {
-		clk_mgr->ss_on_dprefclk = true;
-		clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
-	}
-}
-
 void dcn35_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_dcn35 *clk_mgr,
@@ -1052,8 +1032,6 @@ void dcn35_clk_mgr_construct(
 	dce_clock_read_ss_info(&clk_mgr->base);
 	/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
 
-	dcn35_read_ss_info_from_lut(&clk_mgr->base);
-
 	clk_mgr->base.base.bw_params = &dcn35_bw_params;
 
 	if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
-- 
GitLab


From 75a3371e8ffdab2e504f4326daab60f8fb15fdf1 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Wed, 8 Nov 2023 17:16:28 -0500
Subject: [PATCH 0563/2340] drm/amd/display: Increase num voltage states to 40

[Description]
If during driver init stage there are greater than 20
intermediary voltage states while constructing the SOC
BB we could hit issues because we will index outside of the
clock_limits array and start overwriting data. Increase the
total number of states to 40 to avoid this issue.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml/dc_features.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2cbdd75429ffd..6e669a2c5b2d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -36,7 +36,7 @@
  * Define the maximum amount of states supported by the ASIC. Every ASIC has a
  * specific number of states; this macro defines the maximum number of states.
  */
-#define DC__VOLTAGE_STATES 20
+#define DC__VOLTAGE_STATES 40
 #define DC__NUM_DPP__4 1
 #define DC__NUM_DPP__0_PRESENT 1
 #define DC__NUM_DPP__1_PRESENT 1
-- 
GitLab


From 37f4382b64a2b01109a0ed5c05f58d3f86385e10 Mon Sep 17 00:00:00 2001
From: Max Tseng <max.tseng@amd.com>
Date: Wed, 8 Nov 2023 11:31:50 +0800
Subject: [PATCH 0564/2340] drm/amd/display: replay: Augment Frameupdate
 Command

[Why]
Sending certain Frameupdate number for Replay Power Evaluation

Reviewed-by: Dennis Chan <dennis.chan@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Max Tseng <max.tseng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_types.h     |  1 +
 .../gpu/drm/amd/display/dc/dce/dmub_replay.h  |  2 +-
 .../link/protocols/link_edp_panel_control.c   | 10 +++-
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 51 +++++++++++++++++++
 4 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 6f5da510e8de9..4a60d2c476864 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1031,6 +1031,7 @@ enum replay_coasting_vtotal_type {
 enum replay_FW_Message_type {
 	Replay_Msg_Not_Support = -1,
 	Replay_Set_Timing_Sync_Supported,
+	Replay_Set_Residency_Frameupdate_Timer,
 };
 
 union replay_error_status {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index 368711f763350..b3ee90a0b8b3d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -46,7 +46,7 @@ struct dmub_replay_funcs {
 	void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
 		uint8_t panel_inst);
 	void (*replay_send_cmd)(struct dmub_replay *dmub,
-		enum replay_FW_Message_type msg, unsigned int panel_inst, union dmub_replay_cmd_set *cmd_element);
+		enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element);
 	void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal,
 		uint8_t panel_inst);
 	void (*replay_residency)(struct dmub_replay *dmub,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index 6bc8ec47e267f..fdeb8dff5485a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -1023,10 +1023,16 @@ bool edp_send_replay_cmd(struct dc_link *link,
 	if (!replay)
 		return false;
 
-	if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+	DC_LOGGER_INIT(link->ctx->logger);
+
+	if (dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		cmd_data->panel_inst = panel_inst;
+	else {
+		DC_LOG_DC("%s(): get edp panel inst fail ", __func__);
 		return false;
+	}
 
-	replay->funcs->replay_send_cmd(replay, msg, panel_inst, cmd_data);
+	replay->funcs->replay_send_cmd(replay, msg, cmd_data);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 5f06cf4c663f9..d1becbb5aa292 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -2880,6 +2880,10 @@ enum dmub_cmd_replay_type {
 	 * Set disabled iiming sync.
 	 */
 	DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED	= 5,
+	/**
+	 * Set Residency Frameupdate Timer.
+	 */
+	DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6,
 };
 
 /**
@@ -3143,14 +3147,57 @@ struct dmub_rb_cmd_replay_set_timing_sync {
 	struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data;
 };
 
+/**
+ * Data passed from driver to FW in  DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+ */
+struct dmub_cmd_replay_frameupdate_timer_data {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * Replay Frameupdate Timer Enable or not
+	 */
+	uint8_t enable;
+	/**
+	 * REPLAY force reflash frame update number
+	 */
+	uint16_t frameupdate_count;
+};
+/**
+ * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER
+ */
+struct dmub_rb_cmd_replay_set_frameupdate_timer {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
+	 */
+	struct dmub_cmd_replay_frameupdate_timer_data data;
+};
+
 /**
  * Definition union of replay command set
  */
 union dmub_replay_cmd_set {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
 	/**
 	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data.
 	 */
 	struct dmub_cmd_replay_set_timing_sync_data sync_data;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data.
+	 */
+	struct dmub_cmd_replay_frameupdate_timer_data timer_data;
 };
 
 /**
@@ -4288,6 +4335,10 @@ union dmub_rb_cmd {
 	struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal;
 
 	struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync;
+	/**
+	 * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+	 */
+	struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer;
 };
 
 /**
-- 
GitLab


From 43b8ac4b34ec239bccf4a692c1227ef51a95a4d2 Mon Sep 17 00:00:00 2001
From: Camille Cho <camille.cho@amd.com>
Date: Fri, 3 Nov 2023 12:08:42 +0800
Subject: [PATCH 0565/2340] drm/amd/display: Simplify brightness initialization

[Why]
Remove the brightness cache in DC. It uses a single value to represent
the brightness for both SDR and HDR mode. This leads to flash in HDR
on/off. It also unconditionally programs brightness as in HDR mode. This
may introduce garbage on SDR mode in miniLED panel.

[How]
Simplify the initialization flow by removing the DC cache and taking
what panel has as default. Expand the mechanism for PWM to DPCD Aux to
restore cached brightness value generally.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Krunoslav Kovac <krunoslav.kovac@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Camille Cho <camille.cho@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h              |  1 -
 drivers/gpu/drm/amd/display/dc/dc_types.h        |  4 ----
 .../gpu/drm/amd/display/dc/link/link_detection.c |  2 +-
 drivers/gpu/drm/amd/display/dc/link/link_dpms.c  |  3 +--
 .../dc/link/protocols/link_edp_panel_control.c   | 16 +++-------------
 .../dc/link/protocols/link_edp_panel_control.h   |  1 -
 6 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 349629858205a..18b8d7f3f7389 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1610,7 +1610,6 @@ struct dc_link {
 	enum edp_revision edp_revision;
 	union dpcd_sink_ext_caps dpcd_sink_ext_caps;
 
-	struct backlight_settings backlight_settings;
 	struct psr_settings psr_settings;
 
 	struct replay_settings replay_settings;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 4a60d2c476864..a2f6c994a2a9c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -991,10 +991,6 @@ struct link_mst_stream_allocation_table {
 	struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
 };
 
-struct backlight_settings {
-	uint32_t backlight_millinits;
-};
-
 /* PSR feature flags */
 struct psr_settings {
 	bool psr_feature_enabled;		// PSR is supported by sink
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index f2fe523f914f1..24153b0df503d 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -879,7 +879,7 @@ static bool detect_link_and_local_sink(struct dc_link *link,
 			(link->dpcd_sink_ext_caps.bits.oled == 1)) {
 			dpcd_set_source_specific_data(link);
 			msleep(post_oui_delay);
-			set_cached_brightness_aux(link);
+			set_default_brightness_aux(link);
 		}
 
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 34a4a8c0e18c3..f8e01ca09d964 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -2142,8 +2142,7 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 	if (link->dpcd_sink_ext_caps.bits.oled == 1 ||
 		link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 ||
 		link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) {
-		set_cached_brightness_aux(link);
-
+		set_default_brightness_aux(link);
 		if (link->dpcd_sink_ext_caps.bits.oled == 1)
 			msleep(bl_oled_enable_delay);
 		edp_backlight_enable_aux(link, true);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index fdeb8dff5485a..ac0fa88b52a07 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -170,7 +170,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link,
 	*(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits;
 	*(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms;
 
-	link->backlight_settings.backlight_millinits = backlight_millinits;
 
 	if (!link->dpcd_caps.panel_luminance_control) {
 		if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
@@ -288,9 +287,9 @@ bool set_default_brightness_aux(struct dc_link *link)
 	if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
 		if (!read_default_bl_aux(link, &default_backlight))
 			default_backlight = 150000;
-		// if < 1 nits or > 5000, it might be wrong readback
-		if (default_backlight < 1000 || default_backlight > 5000000)
-			default_backlight = 150000; //
+		// if > 5000, it might be wrong readback
+		if (default_backlight > 5000000)
+			default_backlight = 150000;
 
 		return edp_set_backlight_level_nits(link, true,
 				default_backlight, 0);
@@ -298,15 +297,6 @@ bool set_default_brightness_aux(struct dc_link *link)
 	return false;
 }
 
-bool set_cached_brightness_aux(struct dc_link *link)
-{
-	if (link->backlight_settings.backlight_millinits)
-		return edp_set_backlight_level_nits(link, true,
-						    link->backlight_settings.backlight_millinits, 0);
-	else
-		return set_default_brightness_aux(link);
-	return false;
-}
 bool edp_is_ilr_optimization_enabled(struct dc_link *link)
 {
 	if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index 39526bd401782..b7493ff4fceef 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -30,7 +30,6 @@
 enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
 void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
 bool set_default_brightness_aux(struct dc_link *link);
-bool set_cached_brightness_aux(struct dc_link *link);
 void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd);
 int edp_get_backlight_level(const struct dc_link *link);
 bool edp_get_backlight_level_nits(struct dc_link *link,
-- 
GitLab


From 83a79dd6f4fb54c8cfe3ecbd378817047687a9b2 Mon Sep 17 00:00:00 2001
From: Wayne Lin <wayne.lin@amd.com>
Date: Mon, 7 Aug 2023 15:34:39 +0800
Subject: [PATCH 0566/2340] drm/amd/display: adjust flow for deallocation mst
 payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[Why]
MST relevant variables are maintained at drm side. As the result, we
still have to call drm_dp_remove_payload_part2() to update the relevant
values regardless the link is under mst mode or not. We used to have a
workaround patch to tackle this: commit 3d8fcc6740c9 ("drm/amd/display:
Extract temp drm mst deallocation wa into its own function")

Now it's time to remove the workaround and adjust the flow.

[How]
During deallocate_mst_payload(), source actually doesn't send out
ALLOCATE_PAYLOAD at the end as like the flow in allocate_mst_payload().
Call function dm_helpers_dp_mst_send_payload_allocation() at the end of
deallocate_mst_payload() is a bit confusing.

Separate dm_helpers_dp_mst_send_payload_allocation() into 2 functions.
Have a new function dm_helpers_dp_mst_update_mst_mgr_for_deallocation()
to replace dm_helpers_dp_mst_send_payload_allocation() for payload
deallocation.

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Wenjing Liu <wenjing.liu@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  17 ---
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  53 +++++----
 drivers/gpu/drm/amd/display/dc/dc.h           |   1 -
 drivers/gpu/drm/amd/display/dc/dm_helpers.h   |  12 +-
 .../gpu/drm/amd/display/dc/link/link_dpms.c   | 105 ++----------------
 5 files changed, 50 insertions(+), 138 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ba3f7b232a16f..2be64c593c877 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1715,23 +1715,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
 	adev->dm.dc->debug.ignore_cable_id = true;
 
-	/* TODO: There is a new drm mst change where the freedom of
-	 * vc_next_start_slot update is revoked/moved into drm, instead of in
-	 * driver. This forces us to make sure to get vc_next_start_slot updated
-	 * in drm function each time without considering if mst_state is active
-	 * or not. Otherwise, next time hotplug will give wrong start_slot
-	 * number. We are implementing a temporary solution to even notify drm
-	 * mst deallocation when link is no longer of MST type when uncommitting
-	 * the stream so we will have more time to work on a proper solution.
-	 * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we
-	 * should notify drm to do a complete "reset" of its states and stop
-	 * calling further drm mst functions when link is no longer of an MST
-	 * type. This could happen when we unplug an MST hubs/displays. When
-	 * uncommit stream comes later after unplug, we should just reset
-	 * hardware states only.
-	 */
-	adev->dm.dc->debug.temp_mst_deallocation_sequence = true;
-
 	if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
 		DRM_INFO("DP-HDMI FRL PCON supported\n");
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index c7a29bb737e24..e44ba5c1c48ea 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -333,15 +333,14 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 	return ACT_SUCCESS;
 }
 
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
 		struct dc_context *ctx,
-		const struct dc_stream_state *stream,
-		bool enable)
+		const struct dc_stream_state *stream)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_dp_mst_topology_state *mst_state;
 	struct drm_dp_mst_topology_mgr *mst_mgr;
-	struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+	struct drm_dp_mst_atomic_payload *new_payload;
 	enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
 	enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
 	int ret = 0;
@@ -349,25 +348,13 @@ bool dm_helpers_dp_mst_send_payload_allocation(
 	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 
 	if (!aconnector || !aconnector->mst_root)
-		return false;
+		return;
 
 	mst_mgr = &aconnector->mst_root->mst_mgr;
 	mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
-
 	new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
 
-	if (!enable) {
-		set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
-		clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
-	}
-
-	if (enable) {
-		ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload);
-	} else {
-		dm_helpers_construct_old_payload(mst_mgr, mst_state,
-						 new_payload, &old_payload);
-		drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
-	}
+	ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload);
 
 	if (ret) {
 		amdgpu_dm_set_mst_status(&aconnector->mst_status,
@@ -378,10 +365,36 @@ bool dm_helpers_dp_mst_send_payload_allocation(
 		amdgpu_dm_set_mst_status(&aconnector->mst_status,
 			clr_flag, false);
 	}
-
-	return true;
 }
 
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+		struct dc_context *ctx,
+		const struct dc_stream_state *stream)
+{
+	struct amdgpu_dm_connector *aconnector;
+	struct drm_dp_mst_topology_state *mst_state;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
+	struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+	enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
+	enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
+
+	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+	if (!aconnector || !aconnector->mst_root)
+		return;
+
+	mst_mgr = &aconnector->mst_root->mst_mgr;
+	mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+	new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
+	dm_helpers_construct_old_payload(mst_mgr, mst_state,
+					 new_payload, &old_payload);
+
+	drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
+
+	amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true);
+	amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false);
+ }
+
 void dm_dtn_log_begin(struct dc_context *ctx,
 	struct dc_log_buffer_ctx *log_ctx)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 18b8d7f3f7389..85771ef98cd73 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -956,7 +956,6 @@ struct dc_debug_options {
 	unsigned int min_prefetch_in_strobe_ns;
 	bool disable_unbounded_requesting;
 	bool dig_fifo_off_in_blank;
-	bool temp_mst_deallocation_sequence;
 	bool override_dispclk_programming;
 	bool otg_crc_db;
 	bool disallow_dispclk_dppclk_ds;
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 7ce9a5b6c33ba..6d7a15dcf8a7b 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -103,10 +103,16 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 /*
  * Sends ALLOCATE_PAYLOAD message.
  */
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
 		struct dc_context *ctx,
-		const struct dc_stream_state *stream,
-		bool enable);
+		const struct dc_stream_state *stream);
+
+/*
+ * Update mst manager relevant variables
+ */
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+		struct dc_context *ctx,
+		const struct dc_stream_state *stream);
 
 bool dm_helpers_dp_mst_start_top_mgr(
 		struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index f8e01ca09d964..aa0a086aa7fc3 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -1247,86 +1247,6 @@ static void remove_stream_from_alloc_table(
 	}
 }
 
-static enum dc_status deallocate_mst_payload_with_temp_drm_wa(
-		struct pipe_ctx *pipe_ctx)
-{
-	struct dc_stream_state *stream = pipe_ctx->stream;
-	struct dc_link *link = stream->link;
-	struct dc_dp_mst_stream_allocation_table proposed_table = {0};
-	struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
-	int i;
-	bool mst_mode = (link->type == dc_connection_mst_branch);
-	/* adjust for drm changes*/
-	const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
-	const struct dc_link_settings empty_link_settings = {0};
-	DC_LOGGER_INIT(link->ctx->logger);
-
-	if (link_hwss->ext.set_throttled_vcp_size)
-		link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp);
-	if (link_hwss->ext.set_hblank_min_symbol_width)
-		link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx,
-				&empty_link_settings,
-				avg_time_slots_per_mtp);
-
-	if (dm_helpers_dp_mst_write_payload_allocation_table(
-			stream->ctx,
-			stream,
-			&proposed_table,
-			false))
-		update_mst_stream_alloc_table(
-				link,
-				pipe_ctx->stream_res.stream_enc,
-				pipe_ctx->stream_res.hpo_dp_stream_enc,
-				&proposed_table);
-	else
-		DC_LOG_WARNING("Failed to update"
-				"MST allocation table for"
-				"pipe idx:%d\n",
-				pipe_ctx->pipe_idx);
-
-	DC_LOG_MST("%s"
-			"stream_count: %d: ",
-			__func__,
-			link->mst_stream_alloc_table.stream_count);
-
-	for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-		DC_LOG_MST("stream_enc[%d]: %p      "
-		"stream[%d].hpo_dp_stream_enc: %p      "
-		"stream[%d].vcp_id: %d      "
-		"stream[%d].slot_count: %d\n",
-		i,
-		(void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
-		i,
-		(void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc,
-		i,
-		link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
-		i,
-		link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-	}
-
-	if (link_hwss->ext.update_stream_allocation_table == NULL ||
-			link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) {
-		DC_LOG_DEBUG("Unknown encoding format\n");
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
-			&link->mst_stream_alloc_table);
-
-	if (mst_mode) {
-		dm_helpers_dp_mst_poll_for_allocation_change_trigger(
-			stream->ctx,
-			stream);
-	}
-
-	dm_helpers_dp_mst_send_payload_allocation(
-			stream->ctx,
-			stream,
-			false);
-
-	return DC_OK;
-}
-
 static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -1339,9 +1259,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	const struct dc_link_settings empty_link_settings = {0};
 	DC_LOGGER_INIT(link->ctx->logger);
 
-	if (link->dc->debug.temp_mst_deallocation_sequence)
-		return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx);
-
 	/* deallocate_mst_payload is called before disable link. When mode or
 	 * disable/enable monitor, new stream is created which is not in link
 	 * stream[] yet. For this, payload is not allocated yet, so de-alloc
@@ -1414,16 +1331,14 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
 			&link->mst_stream_alloc_table);
 
-	if (mst_mode) {
+	if (mst_mode)
 		dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 			stream->ctx,
 			stream);
 
-		dm_helpers_dp_mst_send_payload_allocation(
-				stream->ctx,
-				stream,
-				false);
-	}
+	dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+			stream->ctx,
+			stream);
 
 	return DC_OK;
 }
@@ -1504,12 +1419,10 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
 			stream->ctx,
 			stream);
 
-	if (ret != ACT_LINK_LOST) {
+	if (ret != ACT_LINK_LOST)
 		dm_helpers_dp_mst_send_payload_allocation(
 				stream->ctx,
-				stream,
-				true);
-	}
+				stream);
 
 	/* slot X.Y for only current stream */
 	pbn_per_slot = get_pbn_per_slot(stream);
@@ -1769,8 +1682,7 @@ enum dc_status link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in
 	/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
 	dm_helpers_dp_mst_send_payload_allocation(
 			stream->ctx,
-			stream,
-			true);
+			stream);
 
 	/* notify immediate branch device table update */
 	if (dm_helpers_dp_mst_write_payload_allocation_table(
@@ -1899,8 +1811,7 @@ enum dc_status link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_
 		/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
 		dm_helpers_dp_mst_send_payload_allocation(
 				stream->ctx,
-				stream,
-				true);
+				stream);
 	}
 
 	/* increase throttled vcp size */
-- 
GitLab


From 5f2a404cbccec0c8d6635f0997cea2ac226d25d4 Mon Sep 17 00:00:00 2001
From: Dennis Chan <dennis.chan@amd.com>
Date: Tue, 31 Oct 2023 10:09:02 +0800
Subject: [PATCH 0567/2340] drm/amd/display: Disable Timing sync check in
 Full-Screen Video Case

[why]
If Panel max link off frame count is low, it will cause low residency
for Replay, then Disabled timing sync check in Full screen Video Case.

Reviewed-by: Robin Chen <robin.chen@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Dennis Chan <dennis.chan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_types.h     | 66 +++++++++++++------
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  9 ++-
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index a2f6c994a2a9c..7313cfe69498c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1018,6 +1018,12 @@ enum replay_coasting_vtotal_type {
 	PR_COASTING_TYPE_NUM,
 };
 
+enum replay_link_off_frame_count_level {
+	PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0,
+	PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2,
+	PR_LINK_OFF_FRAME_COUNT_BEST = 0x6,
+};
+
 /*
  * This is general Interface for Replay to
  * set an 32 bit variable to dmub
@@ -1041,26 +1047,48 @@ union replay_error_status {
 };
 
 struct replay_config {
-	bool replay_supported;                          // Replay feature is supported
-	unsigned int replay_power_opt_supported;        // Power opt flags that are supported
-	bool replay_smu_opt_supported;                  // SMU optimization is supported
-	unsigned int replay_enable_option;              // Replay enablement option
-	uint32_t debug_flags;                           // Replay debug flags
-	bool replay_timing_sync_supported; // Replay desync is supported
-	bool force_disable_desync_error_check;             // Replay desync is supported
-	bool received_desync_error_hpd; //Replay Received Desync Error HPD.
-	union replay_error_status replay_error_status; // Replay error status
-};
-
-/* Replay feature flags */
+	/* Replay feature is supported */
+	bool replay_supported;
+	/* Power opt flags that are supported */
+	unsigned int replay_power_opt_supported;
+	/* SMU optimization is supported */
+	bool replay_smu_opt_supported;
+	/* Replay enablement option */
+	unsigned int replay_enable_option;
+	/* Replay debug flags */
+	uint32_t debug_flags;
+	/* Replay sync is supported */
+	bool replay_timing_sync_supported;
+	/* Replay Disable desync error check. */
+	bool force_disable_desync_error_check;
+	/* Replay Received Desync Error HPD. */
+	bool received_desync_error_hpd;
+	/* Replay feature is supported long vblank */
+	bool replay_support_fast_resync_in_ultra_sleep_mode;
+	/* Replay error status */
+	union replay_error_status replay_error_status;
+};
+
+/* Replay feature flags*/
 struct replay_settings {
-	struct replay_config config;            // Replay configuration
-	bool replay_feature_enabled;            // Replay feature is ready for activating
-	bool replay_allow_active;               // Replay is currently active
-	unsigned int replay_power_opt_active;   // Power opt flags that are activated currently
-	bool replay_smu_opt_enable;             // SMU optimization is enabled
-	uint16_t coasting_vtotal;               // Current Coasting vtotal
-	uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; // Coasting vtotal table
+	/* Replay configuration */
+	struct replay_config config;
+	/* Replay feature is ready for activating */
+	bool replay_feature_enabled;
+	/* Replay is currently active */
+	bool replay_allow_active;
+	/* Replay is currently active */
+	bool replay_allow_long_vblank;
+	/* Power opt flags that are activated currently */
+	unsigned int replay_power_opt_active;
+	/* SMU optimization is enabled */
+	bool replay_smu_opt_enable;
+	/* Current Coasting vtotal */
+	uint16_t coasting_vtotal;
+	/* Coasting vtotal table */
+	uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+	/* Maximum link off frame count */
+	enum replay_link_off_frame_count_level link_off_frame_count_level;
 };
 
 /* To split out "global" and "per-panel" config settings.
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index d1becbb5aa292..a08073fc92aeb 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -3056,15 +3056,14 @@ struct dmub_cmd_replay_set_timing_sync_data {
 	 * Currently the support is only for 0 or 1
 	 */
 	uint8_t panel_inst;
-
 	/**
-	 * Explicit padding to 4 byte boundary.
+	 * REPLAY set_timing_sync
 	 */
-	uint8_t pad[3];
+	uint8_t timing_sync_supported;
 	/**
-	 * REPLAY set_timing_sync
+	 * Explicit padding to 4 byte boundary.
 	 */
-	bool timing_sync_supported;
+	uint8_t pad[2];
 };
 
 /**
-- 
GitLab


From 8f3656ce65d6d550247a85fdb5c54a5b65cc2252 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Thu, 9 Nov 2023 10:50:30 -0500
Subject: [PATCH 0568/2340] drm/amd/display: Enable SubVP on 1080p60 displays

[Description]
- Previously SubVP would never be selected on 1080p60 displays because
  it has too much vactive margin. However, implement a change to allow
  it like how 1440p60 is allowed.
- Add a new struct such that we have a list of allowed modes for
  enabling subvp with vactive margin (currently 1080p60 and 1440p60)
- Also ensure to block drr + vblank cases to prevent unexpected
  enablement of new display configs
- Update SW cursor fallback for these new potential cases as well

Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c | 32 ++++++++++++++-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 41 +++++++++++--------
 .../dc/resource/dcn32/dcn32_resource.h        | 10 +++++
 4 files changed, 65 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 84d632700949e..f3a9fdd2340de 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -5316,7 +5316,7 @@ bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_st
 	if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
 			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 		return true;
-	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 &&
+	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 &&
 			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 		return true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index ef0a2b01734d7..389ac7ae1154f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -665,6 +665,30 @@ bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int widt
 	return is_native_scaling;
 }
 
+/**
+ * disallow_subvp_in_active_plus_blank() - Function to determine disallowed subvp + drr/vblank configs
+ *
+ * @pipe: subvp pipe to be used for the subvp + drr/vblank config
+ *
+ * Since subvp is being enabled on more configs (such as 1080p60), we want
+ * to explicitly block any configs that we don't want to enable. We do not
+ * want to enable any 1080p60 (SubVP) + drr / vblank configs since these
+ * are already convered by FPO.
+ *
+ * Return: True if disallowed, false otherwise
+ */
+static bool disallow_subvp_in_active_plus_blank(struct pipe_ctx *pipe)
+{
+	bool disallow = false;
+
+	if (resource_is_pipe_type(pipe, OPP_HEAD) &&
+			resource_is_pipe_type(pipe, DPP_PIPE)) {
+		if (pipe->stream->timing.v_addressable == 1080 && pipe->stream->timing.h_addressable == 1920)
+			disallow = true;
+	}
+	return disallow;
+}
+
 /**
  * dcn32_subvp_drr_admissable() - Determine if SubVP + DRR config is admissible
  *
@@ -688,6 +712,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 	bool drr_pipe_found = false;
 	bool drr_psr_capable = false;
 	uint64_t refresh_rate = 0;
+	bool subvp_disallow = false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -697,6 +722,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
 				subvp_count++;
 
+				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
 				refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 					pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
@@ -713,7 +739,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 		}
 	}
 
-	if (subvp_count == 1 && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
+	if (subvp_count == 1 && !subvp_disallow && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
 		((uint32_t)refresh_rate < 120))
 		result = true;
 
@@ -746,6 +772,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
 	bool vblank_psr_capable = false;
 	uint64_t refresh_rate = 0;
+	bool subvp_disallow = false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -755,6 +782,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
 				subvp_count++;
 
+				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
 				refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 					pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
@@ -772,7 +800,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 	}
 
 	if (subvp_count == 1 && non_subvp_pipes == 1 && !drr_pipe_found && !vblank_psr_capable &&
-		((uint32_t)refresh_rate < 120) &&
+		((uint32_t)refresh_rate < 120) && !subvp_disallow &&
 		vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp)
 		result = true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 92e2ddc9ab7e2..c4ffb4b92b563 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -45,6 +45,14 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = {
 				{.width = 1920, .height = 1080, }},
 };
 
+static const struct subvp_active_margin_list subvp_active_margin_list = {
+			.min_refresh = 55,
+			.max_refresh = 65,
+			.res = {
+				{.width = 2560, .height = 1440, },
+				{.width = 1920, .height = 1080, }},
+};
+
 struct _vcs_dpi_ip_params_st dcn3_2_ip = {
 	.gpuvm_enable = 0,
 	.gpuvm_max_page_table_levels = 4,
@@ -3295,25 +3303,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe)
 {
 	bool allow = false;
 	uint32_t refresh_rate = 0;
+	uint32_t min_refresh = subvp_active_margin_list.min_refresh;
+	uint32_t max_refresh = subvp_active_margin_list.max_refresh;
+	uint32_t i;
 
-	/* Allow subvp on displays that have active margin for 2560x1440@60hz displays
-	 * only for now. There must be no scaling as well.
-	 *
-	 * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs
-	 * for p-state switching.
-	 */
-	if (pipe->stream && pipe->plane_state) {
-		refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
-						pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
-						/ (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
-		if (pipe->stream->timing.v_addressable == 1440 &&
-				pipe->stream->timing.h_addressable == 2560 &&
-				refresh_rate >= 55 && refresh_rate <= 65 &&
-				pipe->plane_state->src_rect.height == 1440 &&
-				pipe->plane_state->src_rect.width == 2560 &&
-				pipe->plane_state->dst_rect.height == 1440 &&
-				pipe->plane_state->dst_rect.width == 2560)
+	for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) {
+		uint32_t width = subvp_active_margin_list.res[i].width;
+		uint32_t height = subvp_active_margin_list.res[i].height;
+
+		refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
+			pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+		refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
+		refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
+
+		if (refresh_rate >= min_refresh && refresh_rate <= max_refresh &&
+				dcn32_check_native_scaling_for_res(pipe, width, height)) {
 			allow = true;
+			break;
+		}
 	}
 	return allow;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index b931008114c91..b2f20e6cfb38f 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -39,6 +39,7 @@
 #define DCN3_2_MBLK_HEIGHT_8BPE 64
 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq
 #define SUBVP_HIGH_REFRESH_LIST_LEN 4
+#define SUBVP_ACTIVE_MARGIN_LIST_LEN 2
 #define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800
 #define DCN3_2_VMIN_DISPCLK_HZ 717000000
 
@@ -57,6 +58,15 @@ struct subvp_high_refresh_list {
 	} res[SUBVP_HIGH_REFRESH_LIST_LEN];
 };
 
+struct subvp_active_margin_list {
+	int min_refresh;
+	int max_refresh;
+	struct {
+		int width;
+		int height;
+	} res[SUBVP_ACTIVE_MARGIN_LIST_LEN];
+};
+
 struct dcn32_resource_pool {
 	struct resource_pool base;
 };
-- 
GitLab


From ee95135bfeecf67b313b5573054b03aa6dbc76f8 Mon Sep 17 00:00:00 2001
From: Li Ma <li.ma@amd.com>
Date: Tue, 21 Nov 2023 16:54:59 +0800
Subject: [PATCH 0569/2340] drm/amdgpu: add init_registers for nbio v7.11

enable init_registers callback func for nbio v7.11.

Signed-off-by: Li Ma <li.ma@amd.com>
Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c       | 18 ++++++------
 .../asic_reg/nbio/nbio_7_11_0_offset.h        |  2 ++
 .../asic_reg/nbio/nbio_7_11_0_sh_mask.h       | 29 +++++++++++++++++++
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 676ab1d20d2f6..1f52b4b1db030 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -259,17 +259,17 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = {
 
 static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
 {
-/*	uint32_t def, data;
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
+	data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+				CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+	data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+				CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
 
-		def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
-		data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
-			CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
-		data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
-			CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+	if (def != data)
+		WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
 
-		if (def != data)
-			WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
-*/
 }
 
 static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
index ff30f04be5912..7ee3d291120d5 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
@@ -781,6 +781,8 @@
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX                                              5
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1                                             0x420187
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX                                    5
+#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3                                                  0x4201c6
+#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3_BASE_IDX                                         5
 
 
 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
index 7f131999a263d..eb8c556d9c930 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
@@ -24646,6 +24646,35 @@
 //BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1
 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK                                  0x00000001L
 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK                               0x00000008L
+//BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE__SHIFT                     0x8
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE__SHIFT                     0x9
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE__SHIFT                          0xb
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE__SHIFT                     0xd
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN__SHIFT                            0xf
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE__SHIFT                          0x10
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE__SHIFT                          0x11
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE__SHIFT                     0x14
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE__SHIFT                     0x15
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE__SHIFT                             0x18
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE__SHIFT                       0x19
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT                0x1b
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT                0x1c
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE__SHIFT                  0x1e
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE_MASK                       0x00000100L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE_MASK                       0x00000600L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE_MASK                            0x00001800L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE_MASK                       0x00006000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN_MASK                              0x00008000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE_MASK                            0x00010000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE_MASK                            0x000E0000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE_MASK                       0x00100000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE_MASK                       0x00E00000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE_MASK                               0x01000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE_MASK                         0x06000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK                  0x08000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK                  0x30000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE_MASK                    0xC0000000L
 
 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp
 //BIF_CFG_DEV0_RC0_VENDOR_ID
-- 
GitLab


From 88f4b10a793262c4d6cf2566b1d210ec76f87867 Mon Sep 17 00:00:00 2001
From: Tim Huang <Tim.Huang@amd.com>
Date: Tue, 21 Nov 2023 11:06:51 +0800
Subject: [PATCH 0570/2340] drm/amdgpu: fix memory overflow in the IB test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a memory overflow issue in the gfx IB test
for some ASICs. At least 20 bytes are needed for
the IB test packet.

v2: correct code indentation errors. (Christian)

Signed-off-by: Tim Huang <Tim.Huang@amd.com>
Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 40ce123231640..8ed4a6fb147a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -427,7 +427,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 		cpu_ptr = &adev->wb.wb[index];
 
-		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 		if (r) {
 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 			goto err1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 885ebd703260f..1943beb135c4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-					AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e3ff6e46f3f73..69c5009107460 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-					AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 21865668b7877..00b21ece081f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -297,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-			  AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
-- 
GitLab


From c4b8394e76adba4f50a3c2696c75b214a291e24a Mon Sep 17 00:00:00 2001
From: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Date: Thu, 9 Nov 2023 00:04:36 -0500
Subject: [PATCH 0571/2340] drm/amd/display: Fix tiled display misalignment

[Why]
When otg workaround is applied during clock update, otgs of
tiled display went out of sync.

[How]
To call dc_trigger_sync() after clock update to sync otgs again.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 76b47f1781279..eab713c0da0d7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1964,6 +1964,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		wait_for_no_pipes_pending(dc, context);
 		/* pplib is notified if disp_num changed */
 		dc->hwss.optimize_bandwidth(dc, context);
+		/* Need to do otg sync again as otg could be out of sync due to otg
+		 * workaround applied during clock update
+		 */
+		dc_trigger_sync(dc, context);
 	}
 
 	if (dc->hwss.update_dsc_pg)
-- 
GitLab


From fcd94ef1b3e78f7dc76309c9611915018d2d62a3 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Wed, 8 Nov 2023 10:55:53 -0500
Subject: [PATCH 0572/2340] drm/amd/display: Remove min_dst_y_next_start check
 for Z8

[Why]
Flickering occurs on DRR supported panels when engaged in DRR due to
min_dst_y_next becoming larger than the frame size itself.

[How]
In general, we should be able to enter Z8 when this is engaged but it
might be a net power loss even if the calculation wasn't bugged.

Don't support enabling Z8 during the DRR region.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Syed Hassan <syed.hassan@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 7fc8b18096ba8..ec77b2b41ba36 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -950,10 +950,8 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 {
 	int plane_count;
 	int i;
-	unsigned int min_dst_y_next_start_us;
 
 	plane_count = 0;
-	min_dst_y_next_start_us = 0;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (context->res_ctx.pipe_ctx[i].plane_state)
 			plane_count++;
@@ -975,26 +973,15 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 	else if (context->stream_count == 1 &&  context->streams[0]->signal == SIGNAL_TYPE_EDP) {
 		struct dc_link *link = context->streams[0]->sink->link;
 		struct dc_stream_status *stream_status = &context->stream_status[0];
-		struct dc_stream_state *current_stream = context->streams[0];
 		int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
 		bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
 		bool is_pwrseq0 = link->link_index == 0;
-		bool isFreesyncVideo;
-
-		isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max;
-		isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min;
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) {
-				min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us;
-				break;
-			}
-		}
 
 		/* Don't support multi-plane configurations */
 		if (stream_status->plane_count > 1)
 			return DCN_ZSTATE_SUPPORT_DISALLOW;
 
-		if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000))
+		if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
 			return DCN_ZSTATE_SUPPORT_ALLOW;
 		else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr)
 			return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
-- 
GitLab


From d642b0100bf8c95e88e8396b7191b35807dabb4c Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Wed, 8 Nov 2023 10:59:00 -0500
Subject: [PATCH 0573/2340] drm/amd/display: Update min Z8 residency time to
 2100 for DCN314

[Why]
Some panels with residency period of 2054 exhibit flickering with
Z8 at the end of the frame.

[How]
As a workaround, increase the limit to block these panels.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Syed Hassan <syed.hassan@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 677361d74a4ec..c97391edb5ff7 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -871,7 +871,7 @@ static const struct dc_plane_cap plane_cap = {
 static const struct dc_debug_options debug_defaults_drv = {
 	.disable_z10 = false,
 	.enable_z9_disable_interface = true,
-	.minimum_z8_residency_time = 2000,
+	.minimum_z8_residency_time = 2100,
 	.psr_skip_crtc_disable = true,
 	.replay_skip_crtc_disabled = true,
 	.disable_dmcu = true,
-- 
GitLab


From a5e90392fdda05ce842810bb749f3d210c3ffc65 Mon Sep 17 00:00:00 2001
From: Gabe Teeger <gabe.teeger@amd.com>
Date: Thu, 9 Nov 2023 17:53:49 -0500
Subject: [PATCH 0574/2340] Revert "drm/amd/display: Enable CM low mem power
 optimization"

This reverts commit fcfc6ceec3ebb725a0d6381a1120e7cd546e1df4.

[why]
Flickering observed. Regression search pointed to this being
the offending commit.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Reviewed-by: Yihan Zhu <yihan.zhu@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Gabe Teeger <gabe.teeger@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c    | 13 +++++--------
 .../amd/display/dc/resource/dcn35/dcn35_resource.c  |  2 +-
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 1a2adb3547182..994b21ed272f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -71,24 +71,21 @@ void mpc32_power_on_blnd_lut(
 {
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
 
-/*
 	if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) {
 		if (power_on) {
 			REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
 			REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
 		} else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
-			//TODO: change to mpc
-			dpp_base->ctx->dc->optimized_required = true;
-			dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
+			ASSERT(false);
+			/* TODO: change to mpc
+			 *  dpp_base->ctx->dc->optimized_required = true;
+			 *  dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
+			 */
 		}
 	} else {
 		REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0,
 				MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1);
 	}
-*/
-
-	REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0,
-			MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1);
 }
 
 static enum dc_lut_mode mpc32_get_post1dlut_current(struct mpc *mpc, uint32_t mpcc_id)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 0d5a03c6d812a..53eefba0b9dce 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 			.i2c = true,
 			.dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
 			.dscl = true,
-			.cm = true,
+			.cm = false,
 			.mpc = true,
 			.optc = true,
 			.vpg = true,
-- 
GitLab


From 0e6a12884ca72520b195d95f1fa1bf790678cd6c Mon Sep 17 00:00:00 2001
From: Prike Liang <Prike.Liang@amd.com>
Date: Wed, 8 Nov 2023 14:38:29 +0800
Subject: [PATCH 0575/2340] drm/amdgpu: correct the amdgpu runtime dereference
 usage count

Fix the amdgpu runpm dereference usage count.

Signed-off-by: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 0cacd0b9f8bea..b8fbe97efe1d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
 		adev->have_disp_power_ref = true;
 		return ret;
 	}
-	/* if we have no active crtcs, then drop the power ref
-	 * we got before
+	/* if we have no active crtcs, then go to
+	 * drop the power ref we got before
 	 */
-	if (!active && adev->have_disp_power_ref) {
-		pm_runtime_put_autosuspend(dev->dev);
+	if (!active && adev->have_disp_power_ref)
 		adev->have_disp_power_ref = false;
-	}
-
 out:
 	/* drop the power reference we got coming in here */
 	pm_runtime_put_autosuspend(dev->dev);
-- 
GitLab


From f4233efedf75572e49efd08202b1a07196949b4a Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Thu, 9 Nov 2023 18:08:17 -0500
Subject: [PATCH 0576/2340] drm/amd/display: If P-State is supported try SubVP
 for smaller vlevel

[Description]
- To reduce vlevel further, we can try to apply subvp on
  configs that already support p-state since the natural
  p-state support may not allow for DPM0.
- Add code to try subvp to reduce UCLK DPM level further
  if already supported, but don't use subvp if it does not
  optimize the DPM level even lower

Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index c4ffb4b92b563..26411d4e97300 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1416,6 +1416,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 	unsigned int dc_pipe_idx = 0;
 	int i = 0;
 	bool found_supported_config = false;
+	int vlevel_temp = 0;
 
 	dc_assert_fp_enabled();
 
@@ -1448,13 +1449,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 	 */
 	if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
 	    !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) &&
-		(*vlevel == context->bw_ctx.dml.soc.num_states ||
+		(*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] &&
+				vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) ||
 	    vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
 	    dc->debug.force_subvp_mclk_switch)) {
 
 		dcn32_merge_pipes_for_subvp(dc, context);
 		memset(merge, 0, MAX_PIPES * sizeof(bool));
 
+		vlevel_temp = *vlevel;
 		/* to re-initialize viewport after the pipe merge */
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -1523,6 +1526,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 			}
 		}
 
+		if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp])
+			found_supported_config = false;
+
 		// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
 		// remove phantom pipes and repopulate dml pipes
 		if (!found_supported_config) {
-- 
GitLab


From 5a9a2cc8ae1889c4002850b00fd4fd9691dfac4e Mon Sep 17 00:00:00 2001
From: Zhongwei <zhongwei.zhang@amd.com>
Date: Wed, 8 Nov 2023 16:34:36 +0800
Subject: [PATCH 0577/2340] drm/amd/display: force toggle rate wa for first
 link training for a retimer

[WHY]
Handover from DMUB to driver does not perform link rate toggle.
It might cause link training failure for boot up.

[HOW]
Force toggle rate wa for first link train.
link->vendor_specific_lttpr_link_rate_wa should be zero then.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Michael Strauss <michael.strauss@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Zhongwei <zhongwei.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../link/protocols/link_dp_training_fixed_vs_pe_retimer.c   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
index fd8f6f1981461..68096d12f52fd 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
@@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq
 		lt_settings->cr_pattern_time = 16000;
 
 	/* Fixed VS/PE specific: Toggle link rate */
-	apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate);
+	apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0));
 	target_rate = get_dpcd_link_rate(&lt_settings->link_settings);
 	toggle_rate = (target_rate == 0x6) ? 0xA : 0x6;
 
@@ -271,7 +271,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 	/* Vendor specific: Toggle link rate */
 	toggle_rate = (rate == 0x6) ? 0xA : 0x6;
 
-	if (link->vendor_specific_lttpr_link_rate_wa == rate) {
+	if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
 		core_link_write_dpcd(
 				link,
 				DP_LINK_BW_SET,
@@ -617,7 +617,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 	/* Vendor specific: Toggle link rate */
 	toggle_rate = (rate == 0x6) ? 0xA : 0x6;
 
-	if (link->vendor_specific_lttpr_link_rate_wa == rate) {
+	if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
 		core_link_write_dpcd(
 				link,
 				DP_LINK_BW_SET,
-- 
GitLab


From 3d0fe49454652117522f60bfbefb978ba0e5300b Mon Sep 17 00:00:00 2001
From: Parandhaman K <parandhaman.k@amd.com>
Date: Thu, 9 Nov 2023 15:52:17 +0530
Subject: [PATCH 0578/2340] drm/amd/display: Refactor OPTC into component
 folder

[why]
Move all optc files to unique
folder optc.

[how]
creating optc repo in dc, and moved the dcnxx_optc.c and .h files into
corresponding new folders inside the optc and cleared the linkage
errors by adding relative paths in the Makefile.template.

Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Parandhaman K <parandhaman.k@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/Makefile          |   1 +
 drivers/gpu/drm/amd/display/dc/Makefile       |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn10/Makefile |   2 +-
 .../dc/dcn10/dcn10_hw_sequencer_debug.c       |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn201/Makefile    |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile |   1 -
 .../gpu/drm/amd/display/dc/dcn301/Makefile    |   3 +-
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn314/Makefile    |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile |   6 +-
 drivers/gpu/drm/amd/display/dc/dcn35/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/optc/Makefile  | 108 ++++++++++++++++++
 .../display/dc/{ => optc}/dcn10/dcn10_optc.c  |   0
 .../display/dc/{ => optc}/dcn10/dcn10_optc.h  |   0
 .../display/dc/{ => optc}/dcn20/dcn20_optc.c  |   0
 .../display/dc/{ => optc}/dcn20/dcn20_optc.h  |   2 +-
 .../dc/{ => optc}/dcn201/dcn201_optc.c        |   0
 .../dc/{ => optc}/dcn201/dcn201_optc.h        |   0
 .../display/dc/{ => optc}/dcn30/dcn30_optc.c  |   0
 .../display/dc/{ => optc}/dcn30/dcn30_optc.h  |   0
 .../dc/{ => optc}/dcn301/dcn301_optc.c        |   0
 .../dc/{ => optc}/dcn301/dcn301_optc.h        |   0
 .../display/dc/{ => optc}/dcn31/dcn31_optc.c  |   0
 .../display/dc/{ => optc}/dcn31/dcn31_optc.h  |   0
 .../dc/{ => optc}/dcn314/dcn314_optc.c        |   0
 .../dc/{ => optc}/dcn314/dcn314_optc.h        |   0
 .../display/dc/{ => optc}/dcn32/dcn32_optc.c  |   0
 .../display/dc/{ => optc}/dcn32/dcn32_optc.h  |   0
 .../display/dc/{ => optc}/dcn35/dcn35_optc.c  |   0
 .../display/dc/{ => optc}/dcn35/dcn35_optc.h  |   0
 31 files changed, 122 insertions(+), 15 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/optc/Makefile
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn10/dcn10_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn10/dcn10_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn20/dcn20_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn20/dcn20_optc.h (99%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn201/dcn201_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn201/dcn201_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn30/dcn30_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn30/dcn30_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn301/dcn301_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn301/dcn301_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn31/dcn31_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn31/dcn31_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn314/dcn314_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn314/dcn314_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn32/dcn32_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn32/dcn32_optc.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn35/dcn35_optc.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => optc}/dcn35/dcn35_optc.h (100%)

diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index 7b0959da2cacc..92a5c5efcf926 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -32,6 +32,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index ab51a065cf0e5..390e7a99be541 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for Display Core (dc) component.
 
-DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc
 
 ifdef CONFIG_DRM_AMD_DC_FP
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 1eb7418ced3aa..0dd62934a18ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -24,7 +24,7 @@
 
 DCN10 = dcn10_init.o dcn10_ipp.o \
 		dcn10_hw_sequencer_debug.o \
-		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
+		dcn10_dpp.o dcn10_opp.o \
 		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
 		dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 92fdab731f4ac..9033b39e0e0c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -32,7 +32,7 @@
 #include "dce/dce_hwseq.h"
 #include "abm.h"
 #include "dmcu.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
 #include "dcn10/dcn10_dpp.h"
 #include "dcn10/dcn10_mpc.h"
 #include "timing_generator.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index 93ac45802e444..bd760442ff893 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -3,7 +3,7 @@
 # Makefile for DCN.
 
 DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
-		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
+		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index c069a894db925..a101e65115550 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -3,7 +3,7 @@
 # Makefile for DCN.
 DCN201 = dcn201_init.o \
 	dcn201_hubbub.o\
-	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \
+	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \
 	dcn201_dccg.o dcn201_link_encoder.o
 
 AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index 9dcf06c0954d5..cd95f322235e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -28,7 +28,6 @@ DCN30 := \
 	dcn30_hubbub.o \
 	dcn30_hubp.o \
 	dcn30_dpp.o \
-	dcn30_optc.o \
 	dcn30_dccg.o \
 	dcn30_mpc.o dcn30_vpg.o \
 	dcn30_afmt.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index cbf59d7e78c4f..090011300dcdb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -11,8 +11,7 @@
 # Makefile for dcn30.
 
 DCN301 = dcn301_init.o dcn301_dccg.o \
-		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \
-		dcn301_optc.o
+		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o
 
 AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index 212287008c0a6..11a2662e58ef3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -11,7 +11,7 @@
 # Makefile for dcn31.
 
 DCN31 = dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \
-	dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
+	dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
 	dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
 	dcn31_afmt.o dcn31_vpg.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index 6ea47e00d62d4..d5c177346a3bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -11,7 +11,7 @@
 # Makefile for dcn314.
 
 DCN314 = dcn314_init.o \
-		dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
+		dcn314_dio_stream_encoder.o dcn314_dccg.o
 
 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 3bb17dd01e4ce..905b74b530920 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -11,9 +11,9 @@
 # Makefile for dcn32.
 
 DCN32 = dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \
-		dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \
-		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \
-		dcn32_resource_helpers.o dcn32_mpc.o
+		dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \
+		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \
+		dcn32_hpo_dp_link_encoder.o
 
 AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
index 85a307babab9e..fa7ec82ae5f58 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
@@ -11,7 +11,7 @@
 # Makefile for DCN35.
 
 DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \
-	dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \
+	dcn35_dio_link_encoder.o dcn35_dccg.o \
 	dcn35_hubp.o dcn35_hubbub.o \
 	dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/optc/Makefile b/drivers/gpu/drm/amd/display/dc/optc/Makefile
new file mode 100644
index 0000000000000..bb213335fb9fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/Makefile
@@ -0,0 +1,108 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'optc' sub-component of DAL.
+#
+
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+OPTC_DCN10 = dcn10_optc.o
+
+AMD_DAL_OPTC_DCN10 = $(addprefix $(AMDDALPATH)/dc/optc/dcn10/,$(OPTC_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN10)
+
+###############################################################################
+
+OPTC_DCN20 = dcn20_optc.o
+
+AMD_DAL_OPTC_DCN20 = $(addprefix $(AMDDALPATH)/dc/optc/dcn20/,$(OPTC_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN20)
+
+###############################################################################
+
+OPTC_DCN201 = dcn201_optc.o
+
+AMD_DAL_OPTC_DCN201 = $(addprefix $(AMDDALPATH)/dc/optc/dcn201/,$(OPTC_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN201)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+OPTC_DCN30 = dcn30_optc.o
+
+AMD_DAL_OPTC_DCN30 = $(addprefix $(AMDDALPATH)/dc/optc/dcn30/,$(OPTC_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN30)
+
+###############################################################################
+
+OPTC_DCN301 = dcn301_optc.o
+
+AMD_DAL_OPTC_DCN301 = $(addprefix $(AMDDALPATH)/dc/optc/dcn301/,$(OPTC_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN301)
+
+###############################################################################
+
+OPTC_DCN31 = dcn31_optc.o
+
+AMD_DAL_OPTC_DCN31 = $(addprefix $(AMDDALPATH)/dc/optc/dcn31/,$(OPTC_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN31)
+
+###############################################################################
+
+OPTC_DCN314 = dcn314_optc.o
+
+AMD_DAL_OPTC_DCN314 = $(addprefix $(AMDDALPATH)/dc/optc/dcn314/,$(OPTC_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN314)
+
+###############################################################################
+
+OPTC_DCN32 = dcn32_optc.o
+
+AMD_DAL_OPTC_DCN32 = $(addprefix $(AMDDALPATH)/dc/optc/dcn32/,$(OPTC_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN32)
+
+###############################################################################
+
+OPTC_DCN35 = dcn35_optc.o
+
+AMD_DAL_OPTC_DCN35 = $(addprefix $(AMDDALPATH)/dc/optc/dcn35/,$(OPTC_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN35)
+
+###############################################################################
+
+###############################################################################
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
similarity index 99%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
index f7968b9ca16eb..c2e03ced392ee 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
@@ -26,7 +26,7 @@
 #ifndef __DC_OPTC_DCN20_H__
 #define __DC_OPTC_DCN20_H__
 
-#include "../dcn10/dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
 
 #define TG_COMMON_REG_LIST_DCN2_0(inst) \
 	TG_COMMON_REG_LIST_DCN(inst),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
rename to drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h
rename to drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
-- 
GitLab


From 5fcf74e002f152db0c39a7cdafa082c952cc5640 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Fri, 3 Nov 2023 18:07:11 -0400
Subject: [PATCH 0579/2340] drm/amd/display: Update DCN35 clock table policy

[Why]
The new table doesn't have an implicit mapping between Fclk SOC voltage
and MemClk and it currently builds the table off of number of Fclk
states rather than DcfClock states.

The DML table in use is not correct for functionality or power and
does not align with our existing policies for DCN3x.

[How]
Build the table based on DcfClock with the following assumptions:

1. Raising Soc voltage is the most expensive operation, so assume that
running at max DispClock or DppClock is preferable.

2. Assume that we can run at max Fclk / MemClk at any state, but
restrict the maximum state to the very last entry in the table as the
worst case scenario.

3. Assume that Fclk always has a 2x multiplier on DcfClock unless the
table specifies something lower.

Reviewed-by: Taimur Hassan <syed.hassan@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  | 77 ++++++++++++-------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 0f3f6a9d51449..19f8d83698be7 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -648,27 +648,47 @@ static unsigned int convert_wck_ratio(uint8_t wck_ratio)
 	return 1;
 }
 
+static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry)
+{
+	return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2;
+}
+
 static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
 						    struct integrated_info *bios_info,
 						    DpmClocks_t_dcn35 *clock_table)
 {
 	struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
 	struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
-	uint32_t max_pstate = 0,  max_uclk = 0, max_fclk = 0;
-	uint32_t min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
 	int i;
 
+	/* Determine min/max p-state values. */
 	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
-		if (is_valid_clock_value(clock_table->MemPstateTable[i].UClk) &&
-		    clock_table->MemPstateTable[i].UClk > max_uclk) {
-			max_uclk = clock_table->MemPstateTable[i].UClk;
+		uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+		if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
+			max_dram_speed_mts = dram_speed_mts;
 			max_pstate = i;
 		}
 	}
 
-	/* We expect the table to contain at least one valid Uclk entry. */
-	ASSERT(is_valid_clock_value(max_uclk));
+	min_dram_speed_mts = max_dram_speed_mts;
+	min_pstate = max_pstate;
 
+	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
+		uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+		if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
+			min_dram_speed_mts = dram_speed_mts;
+			min_pstate = i;
+		}
+	}
+
+	/* We expect the table to contain at least one valid P-state entry. */
+	ASSERT(clock_table->NumMemPstatesEnabled &&
+	       is_valid_clock_value(max_dram_speed_mts) &&
+	       is_valid_clock_value(min_dram_speed_mts));
 
 	/* dispclk and dppclk can be max at any voltage, same number of levels for both */
 	if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
@@ -678,47 +698,46 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 		max_dppclk = find_max_clk_value(clock_table->DppClocks,
 			clock_table->NumDispClkLevelsEnabled);
 	} else {
+		/* Invalid number of entries in the table from PMFW. */
 		ASSERT(0);
 	}
-	if (clock_table->NumFclkLevelsEnabled <= NUM_FCLK_DPM_LEVELS)
-		max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq,
-			clock_table->NumFclkLevelsEnabled);
 
-	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
-		uint32_t min_uclk = clock_table->MemPstateTable[0].UClk;
-		int j;
+	/* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+	ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
 
-		for (j = 1; j < clock_table->NumMemPstatesEnabled; j++) {
-			if (is_valid_clock_value(clock_table->MemPstateTable[j].UClk) &&
-			    clock_table->MemPstateTable[j].UClk < min_uclk &&
-			    clock_table->MemPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
-				min_uclk = clock_table->MemPstateTable[j].UClk;
-				min_pstate = j;
-			}
-		}
+	max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled);
 
+	for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+		int j;
+
+		/* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
 		for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
 			if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
-			break;
+				break;
 
 		bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
 		bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
 		bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
-		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+
+		/* Now update clocks we do read */
 		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk;
 		bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage;
 		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
 		bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
 		bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
 		bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
-		bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
-			clock_table->MemPstateTable[min_pstate].WckRatio);
-		}
+		bw_params->clk_table.entries[i].wck_ratio =
+			convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio);
+
+		/* Dcfclk and Fclk are tied, but at a different ratio */
+		bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]);
+	}
 
 	/* Make sure to include at least one entry at highest pstate */
 	if (max_pstate != min_pstate || i == 0) {
 		if (i > MAX_NUM_DPM_LVL - 1)
 			i = MAX_NUM_DPM_LVL - 1;
+
 		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
 		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk;
 		bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage;
@@ -734,6 +753,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	}
 	bw_params->clk_table.num_entries = i--;
 
+	/* Make sure all highest clocks are included*/
 	bw_params->clk_table.entries[i].socclk_mhz =
 		find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
 	bw_params->clk_table.entries[i].dispclk_mhz =
@@ -752,6 +772,11 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled;
 	bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled;
 	bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled;
+
+	/*
+	 * Set any 0 clocks to max default setting. Not an issue for
+	 * power since we aren't doing switching in such case anyway
+	 */
 	for (i = 0; i < bw_params->clk_table.num_entries; i++) {
 		if (!bw_params->clk_table.entries[i].fclk_mhz) {
 			bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
-- 
GitLab


From f19c115d9c3c4f386c4662cc7b02ae1ffc2374af Mon Sep 17 00:00:00 2001
From: Taimur Hassan <syed.hassan@amd.com>
Date: Fri, 10 Nov 2023 10:06:09 -0500
Subject: [PATCH 0580/2340] drm/amd/display: Remove config update

[Why]
Prevent overwrite of dc->config.use_default_clock_table, as it should be
pre-configured.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Taimur Hassan <syed.hassan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 53eefba0b9dce..1a0ed1c7e2d45 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -1876,7 +1876,7 @@ static bool dcn35_resource_construct(
 
 	/* Use pipe context based otg sync logic */
 	dc->config.use_pipe_ctx_sync_logic = true;
-	dc->config.use_default_clock_table = false;
+
 	/* read VBIOS LTTPR caps */
 	{
 		if (ctx->dc_bios->funcs->get_lttpr_caps) {
-- 
GitLab


From fbd2076c31e3281dea7b475d80211b7a6f1500da Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Wed, 22 Nov 2023 15:17:22 -0500
Subject: [PATCH 0581/2340] drm/amdkfd: Use common function for IP version
 check

KFD_GC_VERSION was recently updated to use a new function
for IP version checks. As a result, use KFD_GC_VERSION as
the common function for all IP version checks in KFD.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a40f8cfc6aa57..45366b4ca9769 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1127,7 +1127,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
 	struct kfd_dev *dev = adev->kfd.dev;
 	uint32_t i;
 
-	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3))
 		return dev->nodes[0];
 
 	for (i = 0; i < dev->num_nodes; i++)
-- 
GitLab


From 22136ff27c4e01fae81f6588033363a46c72ed8c Mon Sep 17 00:00:00 2001
From: Taimur Hassan <syed.hassan@amd.com>
Date: Fri, 10 Nov 2023 10:15:28 -0500
Subject: [PATCH 0582/2340] drm/amd/display: Fix conversions between bytes and
 KB

[Why]
There are a number of instances where we convert HostVMMinPageSize or
GPUVMMinPageSize from bytes to KB by dividing (rather than multiplying) and
vice versa.
Additionally, in some cases, a parameter is passed through DML in KB but
later checked as if it were in bytes.

Cc: stable@vger.kernel.org
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Taimur Hassan <syed.hassan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml2/display_mode_core.c  | 16 ++++++++--------
 .../display/dc/dml2/dml2_translation_helper.c    |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 510be909cd752..59718ee33e513 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 				mode_lib->ms.NoOfDPPThisState,
 				mode_lib->ms.dpte_group_bytes,
 				s->HostVMInefficiencyFactor,
-				mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+				mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 				mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 		s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
@@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
 						mode_lib->ms.MetaRowBytes[j][k],
 						mode_lib->ms.DPTEBytesPerRow[j][k],
@@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 		CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
@@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 		UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
-		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 		UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
@@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 	CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 	CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
@@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
 			locals->dpte_group_bytes,
 			s->HostVMInefficiencyFactor,
-			mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+			mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 			mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 	locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
@@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 			CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 			CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 			CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 			CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 			CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						locals->PDEAndMetaPTEBytesFrame[k],
 						locals->MetaRowByte[k],
 						locals->PixelPTEBytesPerRow[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 2b9638c6d9b00..48caa34a5ce75 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -515,8 +515,8 @@ void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st
 	out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment;
 	out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes;
 	out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes;
-	out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes * 1024;
-	out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes * 1024;
+	out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024;
+	out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024;
 	out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes;
 	out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent;
 	out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent;
-- 
GitLab


From 2d1c884a535fcca74814553132d41c15dc9831ef Mon Sep 17 00:00:00 2001
From: Sung Joon Kim <sungkim@amd.com>
Date: Fri, 10 Nov 2023 11:33:45 -0500
Subject: [PATCH 0583/2340] drm/amd/display: Fix black screen on video playback
 with embedded panel

[why]
We have dynamic power control in driver but
should be ignored when power is forced on.

[how]
Bypass any power control when it's forced on.

Cc: stable@vger.kernel.org
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Sung Joon Kim <sungkim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c   |  1 -
 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c   | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 19f8d83698be7..63a0b885b6f03 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -1132,7 +1132,6 @@ void dcn35_clk_mgr_construct(
 			ctx->dc->debug.disable_dpp_power_gate = false;
 			ctx->dc->debug.disable_hubp_power_gate = false;
 			ctx->dc->debug.disable_dsc_power_gate = false;
-			ctx->dc->debug.disable_hpo_power_gate = false;
 		} else {
 			/*let's reset the config control flag*/
 			ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
index 0f60c40e1fc50..53bd0ae4bab5e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
@@ -261,6 +261,7 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	uint32_t power_gate = power_on ? 0 : 1;
 	uint32_t pwr_status = power_on ? 0 : 2;
 	uint32_t org_ip_request_cntl;
+	uint32_t power_forceon;
 	bool block_enabled;
 
 	if (pg_cntl->ctx->dc->debug.ignore_pg ||
@@ -277,6 +278,10 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 			return;
 	}
 
+	REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+	if (power_forceon)
+		return;
+
 	REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
@@ -304,6 +309,7 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	uint32_t power_gate = power_on ? 0 : 1;
 	uint32_t pwr_status = power_on ? 0 : 2;
 	uint32_t org_ip_request_cntl;
+	uint32_t power_forceon;
 	bool block_enabled;
 
 	if (pg_cntl->ctx->dc->debug.ignore_pg ||
@@ -319,6 +325,10 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 			return;
 	}
 
+	REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+	if (power_forceon)
+		return;
+
 	REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
-- 
GitLab


From 641220b2a53c64efb8327ffbbc3bfcf96b5a613f Mon Sep 17 00:00:00 2001
From: Anthony Koo <anthony.koo@amd.com>
Date: Sat, 11 Nov 2023 22:47:50 -0500
Subject: [PATCH 0584/2340] drm/amd/display: [FW Promotion] Release 0.0.193.0

- Add a tracing framework, to measure duration, execution count and
  longest duration of main loop/vsync interrupt work
  GPINT command is used to start/stop the measurements.

Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Anthony Koo <anthony.koo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index a08073fc92aeb..b7d9360bfdc82 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -185,8 +185,7 @@ union abm_flags {
 		unsigned int disable_abm_requested : 1;
 
 		/**
-		 * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled
-		 * immediately.
+		 * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled immediately.
 		 */
 		unsigned int disable_abm_immediately : 1;
 
@@ -866,6 +865,13 @@ enum dmub_gpint_command {
 	 * DESC: Updates the trace buffer mask bit32~bit63.
 	 */
 	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119,
+
+	/**
+	 * DESC: Enable measurements for various task duration
+	 * ARGS: 0 - Disable measurement
+	 *       1 - Enable measurement
+	 */
+	DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123,
 };
 
 /**
-- 
GitLab


From db4616f7667c9d1f733ec360a754a4d7fd32c28e Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Mon, 13 Nov 2023 09:07:22 -0500
Subject: [PATCH 0585/2340] drm/amd/display: 3.2.261

This version brings along the following:
- DCN314 fixes
- DCN32 fixes
- DCN35 fixes
- DML2 fixes
- eDP fixes
- HDR fixes
- MST fixes
- Replay fixes
- SubVP support for more configs

Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 85771ef98cd73..6074ac1b180a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -49,7 +49,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.260"
+#define DC_VER "3.2.261"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
GitLab


From 201761b5eb57c3fad810cde555795c3b5721a031 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Thu, 9 Nov 2023 10:50:38 +0530
Subject: [PATCH 0586/2340] drm/amdgpu: Move mca debug mode decision to ras

Refactor code such that ras block decides the default mca debug mode,
and not swsmu block.

By default mca debug mode is set to false.

v2: squash in uninitialized value fix (Alex)

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c            |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c            | 14 +++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h            |  2 +-
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c   | 12 ------------
 4 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index cf33eb219e257..54f2f346579ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -377,7 +377,7 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
 	int ret;
 
-	ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false);
+	ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a3dc68e989108..f6b47ebce9d62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3132,6 +3132,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	amdgpu_ras_set_mca_debug_mode(adev, false);
+
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		if (!node->ras_obj) {
 			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
@@ -3405,12 +3407,18 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 	return 0;
 }
 
-void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret = 0;
 
-	if (con)
-		con->is_mca_debug_mode = enable;
+	if (con) {
+		ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+		if (!ret)
+			con->is_mca_debug_mode = enable;
+	}
+
+	return ret;
 }
 
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 19161916ac46b..6a941eb8fb8fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -773,7 +773,7 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
 
 int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
 
-void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev);
 bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
 				     unsigned int *mode);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 8ccb8354bb496..dda2249c4994f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1516,7 +1516,6 @@ static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable)
 	if (smu->smc_fw_version < 0x554800)
 		return 0;
 
-	amdgpu_ras_set_mca_debug_mode(smu->adev, enable);
 	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead,
 					       enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK,
 					       NULL);
@@ -2346,16 +2345,6 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v13_0_6_post_init(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-
-	if (!amdgpu_sriov_vf(adev) && adev->ras_enabled)
-		return smu_v13_0_6_mca_set_debug_mode(smu, false);
-
-	return 0;
-}
-
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct smu_context *smu = adev->powerplay.pp_handle;
@@ -2920,7 +2909,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.i2c_init = smu_v13_0_6_i2c_control_init,
 	.i2c_fini = smu_v13_0_6_i2c_control_fini,
 	.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
-	.post_init = smu_v13_0_6_post_init,
 };
 
 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
-- 
GitLab


From 7a6931a476d30f0d6bf70b01a925f76f92d23940 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Wed, 22 Nov 2023 14:50:34 -0500
Subject: [PATCH 0587/2340] drm/amd/display: fix ABM disablement

On recent versions of DMUB firmware, if we want to completely disable
ABM we have to pass ABM_LEVEL_IMMEDIATE_DISABLE as the requested ABM
level to DMUB. Otherwise, LCD eDP displays are unable to reach their
maximum brightness levels. So, to fix this whenever the user requests an
ABM level of 0 pass ABM_LEVEL_IMMEDIATE_DISABLE to DMUB instead. Also,
to keep the user's experience consistent map ABM_LEVEL_IMMEDIATE_DISABLE
to 0 when a user tries to read the requested ABM level.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2be64c593c877..39a4b47b6804c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6248,7 +6248,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
 		dm_new_state->underscan_enable = val;
 		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
-		dm_new_state->abm_level = val;
+		dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE;
 		ret = 0;
 	}
 
@@ -6293,7 +6293,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
 		*val = dm_state->underscan_enable;
 		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
-		*val = dm_state->abm_level;
+		*val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ?
+			dm_state->abm_level : 0;
 		ret = 0;
 	}
 
@@ -6366,7 +6367,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 		state->pbn = 0;
 
 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-			state->abm_level = amdgpu_dm_abm_level;
+			state->abm_level = amdgpu_dm_abm_level ?:
+				ABM_LEVEL_IMMEDIATE_DISABLE;
 
 		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
-- 
GitLab


From 01a1526ac4c8d9342d3d8b703751f3fc5ce487ba Mon Sep 17 00:00:00 2001
From: Dmytro Laktyushkin <dmytro.laktyushkin@amd.com>
Date: Fri, 3 Nov 2023 14:55:37 -0400
Subject: [PATCH 0588/2340] drm/amd/display: update dcn315 lpddr pstate latency

[WHY/HOW]
Increase the pstate latency to improve ac/dc transition

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Dmytro Laktyushkin <dmytro.laktyushkin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index b2c4f97afc8b4..8776055bbeaae 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -334,7 +334,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -342,7 +342,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -350,7 +350,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -358,7 +358,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
-- 
GitLab


From bcdbd6f607bacb51743ac73f13f40d015cb9de53 Mon Sep 17 00:00:00 2001
From: RutingZhang <u202112078@hust.edu.cn>
Date: Tue, 21 Nov 2023 12:36:20 +0800
Subject: [PATCH 0589/2340] drm/amd/display: remove unnecessary braces to fix
 coding style

checkpatch complains that:

WARNING: braces {} are not necessary for single statement blocks
+                if (pool->base.irqs != NULL) {
+                        dal_irq_service_destroy(&pool->base.irqs);
+                }

Fixed it by removing unnecessary braces to fix the coding style issue.

Signed-off-by: RutingZhang <u202112078@hust.edu.cn>
Reviewed-by: Dongliang Mu <dzm91@hust.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index c07da45e1e2c6..65d337731f567 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -713,9 +713,8 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
 			pool->base.hubps[i] = NULL;
 		}
 
-		if (pool->base.irqs != NULL) {
+		if (pool->base.irqs != NULL)
 			dal_irq_service_destroy(&pool->base.irqs);
-		}
 	}
 
 	for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
-- 
GitLab


From ca0b006939f9701ab2e14a08ed9ef77a8014d2c5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 10 Nov 2023 09:39:18 -0500
Subject: [PATCH 0590/2340] drm/amdgpu: fix AGP addressing when GART is not at
 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This worked by luck if the GART aperture ended up at 0.  When
we ended up moving GART on some chips, the GART aperture ended
up offsetting the AGP address since the resource->start is
a GART offset, not an MC address.  Fix this by moving the AGP
address setup into amdgpu_bo_gpu_offset_no_check().

v2: check mem_type before checking agp
v3: check if the ttm bo has a ttm_tt allocated yet

Fixes: 67318cb84341 ("drm/amdgpu/gmc11: set gart placement GC11")
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Reported-by: Jesse Zhang <Jesse.Zhang@amd.com>
Reported-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: christian.koenig@amd.com
Cc: mario.limonciello@amd.com
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    |  4 +---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5f71414190e9a..d2f273d77e595 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 
+	if (!bo->ttm)
+		return AMDGPU_BO_INVALID_OFFSET;
+
 	if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
 		return AMDGPU_BO_INVALID_OFFSET;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924b..d79b4ca1ecfc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1527,10 +1527,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	uint64_t offset;
+	uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
 
-	offset = (bo->tbo.resource->start << PAGE_SHIFT) +
-		 amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+	if (bo->tbo.resource->mem_type == TTM_PL_TT)
+		offset = amdgpu_gmc_agp_addr(&bo->tbo);
+
+	if (offset == AMDGPU_BO_INVALID_OFFSET)
+		offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+			amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
 
 	return amdgpu_gmc_sign_extend(offset);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddbf..ab4a762aed5bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -959,10 +959,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 		return 0;
 
 	addr = amdgpu_gmc_agp_addr(bo);
-	if (addr != AMDGPU_BO_INVALID_OFFSET) {
-		bo->resource->start = addr >> PAGE_SHIFT;
+	if (addr != AMDGPU_BO_INVALID_OFFSET)
 		return 0;
-	}
 
 	/* allocate GART space */
 	placement.num_placement = 1;
-- 
GitLab


From b0e5c88d8a88bdcc9834409387e10a5ae1b2753e Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Thu, 23 Nov 2023 15:33:22 +0800
Subject: [PATCH 0591/2340] drm/amd/pm: fix a memleak in aldebaran_tables_init

When kzalloc() for smu_table->ecc_table fails, we should free
the previously allocated resources to prevent memleak.

Fixes: edd794208555 ("drm/amd/pm: add message smu to get ecc_table v2")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 1a6675d70a4bc..f1440869d1ce0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu)
 	}
 
 	smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL);
-	if (!smu_table->ecc_table)
+	if (!smu_table->ecc_table) {
+		kfree(smu_table->metrics_table);
+		kfree(smu_table->gpu_metrics_table);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
-- 
GitLab


From 7b194fdccb8458779687063e582cf218a0920c29 Mon Sep 17 00:00:00 2001
From: Lu Yao <yaolu@kylinos.cn>
Date: Thu, 23 Nov 2023 09:22:34 +0800
Subject: [PATCH 0592/2340] drm/amdgpu: Fix cat debugfs amdgpu_regs_didt causes
 kernel null pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For 'AMDGPU_FAMILY_SI' family cards, in 'si_common_early_init' func, init
'didt_rreg' and 'didt_wreg' to 'NULL'. But in func
'amdgpu_debugfs_regs_didt_read/write', using 'RREG32_DIDT' 'WREG32_DIDT'
lacks of relevant judgment. And other 'amdgpu_ip_block_version' that use
these two definitions won't be added for 'AMDGPU_FAMILY_SI'.

So, add null pointer judgment before calling.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Lu Yao <yaolu@kylinos.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a53f436fa9f1a..0e61ebdb3f3e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -638,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (!adev->didt_rreg)
+		return -EOPNOTSUPP;
+
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
 	if (r < 0) {
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -694,6 +697,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (!adev->didt_wreg)
+		return -EOPNOTSUPP;
+
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
 	if (r < 0) {
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-- 
GitLab


From bd1f6a31e7762ebc99b97f3eda5e5ea3708fa792 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 24 Nov 2023 09:56:32 -0600
Subject: [PATCH 0593/2340] drm/amd: Enable PCIe PME from D3

When dGPU is put into BOCO it may be in D3cold but still able send
PME on display hotplug event. For this to work it must be enabled
as wake source from D3.

When runpm is enabled use pci_wake_from_d3() to mark wakeup as
enabled by default.

Cc: stable@vger.kernel.org # 6.1+
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8f24cabe21554..8b33b130ea36e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2263,6 +2263,8 @@ retry_init:
 		pm_runtime_mark_last_busy(ddev->dev);
 		pm_runtime_put_autosuspend(ddev->dev);
 
+		pci_wake_from_d3(pdev, TRUE);
+
 		/*
 		 * For runpm implemented via BACO, PMFW will handle the
 		 * timing for BACO in and out:
-- 
GitLab


From 2e583200907cc43f062321bf751fe4b0960dbecf Mon Sep 17 00:00:00 2001
From: Dmytro Laktyushkin <dmytro.laktyushkin@amd.com>
Date: Mon, 13 Nov 2023 13:12:44 -0500
Subject: [PATCH 0594/2340] drm/amd/display: block dcn315 dynamic crb
 allocation when unintended

[WHY/HOW]
Limit the dynamic crb to dual stream configs that include eDP

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Dmytro Laktyushkin <dmytro.laktyushkin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/resource/dcn315/dcn315_resource.c    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index cb8024eee8e4d..515ba435f759c 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -1631,8 +1631,10 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
 	int i;
 	struct resource_context *res_ctx = &context->res_ctx;
 
-	/*Don't apply for single stream*/
-	if (context->stream_count < 2)
+	/* Only apply for dual stream scenarios with edp*/
+	if (context->stream_count != 2)
+		return false;
+	if (context->streams[0]->signal != SIGNAL_TYPE_EDP && context->streams[1]->signal != SIGNAL_TYPE_EDP)
 		return false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-- 
GitLab


From 4fc26c2f912b5d9232dc4432fb1b7bfd6f016be6 Mon Sep 17 00:00:00 2001
From: Michael Strauss <michael.strauss@amd.com>
Date: Fri, 13 Oct 2023 12:13:28 -0400
Subject: [PATCH 0595/2340] drm/amd/display: Update Fixed VS/PE Retimer
 Sequence

[WHY/HOW]
Add a new AUX sequence provided by vendor to improve
interop with specific display configurations.

Reviewed-by: Wenjing Liu <wenjing.liu@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Michael Strauss <michael.strauss@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../protocols/link_dp_training_fixed_vs_pe_retimer.c   | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
index 68096d12f52fd..7087cdc9e9775 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
@@ -205,6 +205,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 	const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
 	const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
 	const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+	const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	uint8_t lane = 0;
 	union down_spread_ctrl downspread = {0};
@@ -293,6 +294,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 		DP_DOWNSPREAD_CTRL,
 		lt_settings->link_settings.link_spread);
 
+	link_configure_fixed_vs_pe_retimer(link->ddc,
+			&vendor_lttpr_write_data_dpmf[0],
+			sizeof(vendor_lttpr_write_data_dpmf));
+
 	if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
 		link_configure_fixed_vs_pe_retimer(link->ddc,
 				&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
@@ -552,6 +557,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 	const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
 	const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
 	const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+	const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	uint8_t lane = 0;
 	union down_spread_ctrl downspread = {0};
@@ -639,6 +645,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 		DP_DOWNSPREAD_CTRL,
 		lt_settings->link_settings.link_spread);
 
+	link_configure_fixed_vs_pe_retimer(link->ddc,
+			&vendor_lttpr_write_data_dpmf[0],
+			sizeof(vendor_lttpr_write_data_dpmf));
+
 	if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
 		link_configure_fixed_vs_pe_retimer(link->ddc,
 				&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
-- 
GitLab


From 9a1c1339abf972477aeef4ea037e650f49c5892d Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 27 Oct 2023 18:21:55 -0400
Subject: [PATCH 0596/2340] drm/amdkfd: Run restore_workers on freezable WQs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make restore workers freezable so we don't have to explicitly flush them
in suspend and GPU reset code paths, and we don't accidentally try to
restore BOs while the GPU is suspended. Not having to flush restore_work
also helps avoid lock/fence dependencies in the GPU reset case where we're
not allowed to wait for fences.

A side effect of this is, that we can now have multiple concurrent threads
trying to signal the same eviction fence. Rework eviction fence signaling
and replacement to account for that.

The GPU reset path can no longer rely on restore_process_worker to resume
queues because evict/restore workers can run independently of it. Instead
call a new restore_process_helper directly.

This is an RFC and request for testing.

v2:
- Reworked eviction fence signaling
- Introduced restore_process_helper

v3:
- Handle unsignaled eviction fences in restore_process_bos

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Tested-by: Emily Deng <Emily.Deng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 68 +++++++++++----
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 87 +++++++++++--------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  4 +-
 3 files changed, 104 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b98c0969d3be8..73288f9ccaf85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1384,7 +1384,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 				  amdgpu_amdkfd_restore_userptr_worker);
 
 		*process_info = info;
-		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
 
 	vm->process_info = *process_info;
@@ -1415,6 +1414,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 	list_add_tail(&vm->vm_list_node,
 			&(vm->process_info->vm_list_head));
 	vm->process_info->n_vms++;
+
+	*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
 	mutex_unlock(&vm->process_info->lock);
 
 	return 0;
@@ -1426,10 +1427,7 @@ validate_pd_fail:
 reserve_pd_fail:
 	vm->process_info = NULL;
 	if (info) {
-		/* Two fence references: one in info and one in *ef */
 		dma_fence_put(&info->eviction_fence->base);
-		dma_fence_put(*ef);
-		*ef = NULL;
 		*process_info = NULL;
 		put_pid(info->pid);
 create_evict_fence_fail:
@@ -1623,7 +1621,8 @@ int amdgpu_amdkfd_criu_resume(void *p)
 		goto out_unlock;
 	}
 	WRITE_ONCE(pinfo->block_mmu_notifications, false);
-	schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+	queue_delayed_work(system_freezable_wq,
+			   &pinfo->restore_userptr_work, 0);
 
 out_unlock:
 	mutex_unlock(&pinfo->lock);
@@ -2426,7 +2425,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
 				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
 		if (r)
 			pr_err("Failed to quiesce KFD\n");
-		schedule_delayed_work(&process_info->restore_userptr_work,
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 	}
 	mutex_unlock(&process_info->notifier_lock);
@@ -2749,7 +2749,8 @@ unlock_out:
 
 	/* If validation failed, reschedule another attempt */
 	if (evicted_bos) {
-		schedule_delayed_work(&process_info->restore_userptr_work,
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 
 		kfd_smi_event_queue_restore_rescheduled(mm);
@@ -2758,6 +2759,23 @@ unlock_out:
 	put_task_struct(usertask);
 }
 
+static void replace_eviction_fence(struct dma_fence **ef,
+				   struct dma_fence *new_ef)
+{
+	struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+		/* protected by process_info->lock */);
+
+	/* If we're replacing an unsignaled eviction fence, that fence will
+	 * never be signaled, and if anyone is still waiting on that fence,
+	 * they will hang forever. This should never happen. We should only
+	 * replace the fence in restore_work that only gets scheduled after
+	 * eviction work signaled the fence.
+	 */
+	WARN_ONCE(!dma_fence_is_signaled(old_ef),
+		  "Replacing unsignaled eviction fence");
+	dma_fence_put(old_ef);
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *
@@ -2781,7 +2799,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	struct amdkfd_process_info *process_info = info;
 	struct amdgpu_vm *peer_vm;
 	struct kgd_mem *mem;
-	struct amdgpu_amdkfd_fence *new_fence;
 	struct list_head duplicate_save;
 	struct amdgpu_sync sync_obj;
 	unsigned long failed_size = 0;
@@ -2907,22 +2924,35 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	/* Wait for validate and PT updates to finish */
 	amdgpu_sync_wait(&sync_obj, false);
 
-	/* Release old eviction fence and create new one, because fence only
-	 * goes from unsignaled to signaled, fence cannot be reused.
-	 * Use context and mm from the old fence.
+	/* The old eviction fence may be unsignaled if restore happens
+	 * after a GPU reset or suspend/resume. Keep the old fence in that
+	 * case. Otherwise release the old eviction fence and create new
+	 * one, because fence only goes from unsignaled to signaled once
+	 * and cannot be reused. Use context and mm from the old fence.
+	 *
+	 * If an old eviction fence signals after this check, that's OK.
+	 * Anyone signaling an eviction fence must stop the queues first
+	 * and schedule another restore worker.
 	 */
-	new_fence = amdgpu_amdkfd_fence_create(
+	if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+		struct amdgpu_amdkfd_fence *new_fence =
+			amdgpu_amdkfd_fence_create(
 				process_info->eviction_fence->base.context,
 				process_info->eviction_fence->mm,
 				NULL);
-	if (!new_fence) {
-		pr_err("Failed to create eviction fence\n");
-		ret = -ENOMEM;
-		goto validate_map_fail;
+
+		if (!new_fence) {
+			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
+			goto validate_map_fail;
+		}
+		dma_fence_put(&process_info->eviction_fence->base);
+		process_info->eviction_fence = new_fence;
+		replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+	} else {
+		WARN_ONCE(*ef != &process_info->eviction_fence->base,
+			  "KFD eviction fence doesn't match KGD process_info");
 	}
-	dma_fence_put(&process_info->eviction_fence->base);
-	process_info->eviction_fence = new_fence;
-	*ef = dma_fence_get(&new_fence->base);
 
 	/* Attach new eviction fence to all BOs except pinned ones */
 	list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index c10d050e1a612..71df51fcc1b0d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -664,7 +664,8 @@ int kfd_process_create_wq(void)
 	if (!kfd_process_wq)
 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 	if (!kfd_restore_wq)
-		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq",
+							 WQ_FREEZABLE);
 
 	if (!kfd_process_wq || !kfd_restore_wq) {
 		kfd_process_destroy_wq();
@@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	struct amdgpu_fpriv *drv_priv;
 	struct amdgpu_vm *avm;
 	struct kfd_process *p;
+	struct dma_fence *ef;
 	struct kfd_node *dev;
 	int ret;
 
@@ -1661,11 +1663,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 
 	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
 						     &p->kgd_process_info,
-						     &p->ef);
+						     &ef);
 	if (ret) {
 		pr_err("Failed to create process VM object\n");
 		return ret;
 	}
+	RCU_INIT_POINTER(p->ef, ef);
 	pdd->drm_priv = drm_file->private_data;
 
 	ret = kfd_process_device_reserve_ib_mem(pdd);
@@ -1908,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
 	return -EINVAL;
 }
 
+static int signal_eviction_fence(struct kfd_process *p)
+{
+	struct dma_fence *ef;
+	int ret;
+
+	rcu_read_lock();
+	ef = dma_fence_get_rcu_safe(&p->ef);
+	rcu_read_unlock();
+
+	ret = dma_fence_signal(ef);
+	dma_fence_put(ef);
+
+	return ret;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
 	int ret;
@@ -1920,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work)
 	 * lifetime of this thread, kfd_process p will be valid
 	 */
 	p = container_of(dwork, struct kfd_process, eviction_work);
-	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
-		  "Eviction fence mismatch\n");
-
-	/* Narrow window of overlap between restore and evict work
-	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
-	 * unreserves KFD BOs, it is possible to evicted again. But
-	 * restore has few more steps of finish. So lets wait for any
-	 * previous restore work to complete
-	 */
-	flush_delayed_work(&p->restore_work);
 
 	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
 	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
 	if (!ret) {
-		dma_fence_signal(p->ef);
-		dma_fence_put(p->ef);
-		p->ef = NULL;
-		queue_delayed_work(kfd_restore_wq, &p->restore_work,
+		/* If another thread already signaled the eviction fence,
+		 * they are responsible stopping the queues and scheduling
+		 * the restore work.
+		 */
+		if (!signal_eviction_fence(p))
+			queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+		else
+			kfd_process_restore_queues(p);
 
 		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
 	} else
 		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
 }
 
+static int restore_process_helper(struct kfd_process *p)
+{
+	int ret = 0;
+
+	/* VMs may not have been acquired yet during debugging. */
+	if (p->kgd_process_info) {
+		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(
+			p->kgd_process_info, &p->ef);
+		if (ret)
+			return ret;
+	}
+
+	ret = kfd_process_restore_queues(p);
+	if (!ret)
+		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
+	else
+		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
+
+	return ret;
+}
+
 static void restore_process_worker(struct work_struct *work)
 {
 	struct delayed_work *dwork;
@@ -1970,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work)
 	 */
 
 	p->last_restore_timestamp = get_jiffies_64();
-	/* VMs may not have been acquired yet during debugging. */
-	if (p->kgd_process_info)
-		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
-							     &p->ef);
+
+	ret = restore_process_helper(p);
 	if (ret) {
 		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
 		WARN(!ret, "reschedule restore work failed\n");
-		return;
 	}
-
-	ret = kfd_process_restore_queues(p);
-	if (!ret)
-		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
-	else
-		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
 }
 
 void kfd_suspend_all_processes(void)
@@ -1998,14 +2022,9 @@ void kfd_suspend_all_processes(void)
 
 	WARN(debug_evictions, "Evicting all processes");
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		cancel_delayed_work_sync(&p->eviction_work);
-		flush_delayed_work(&p->restore_work);
-
 		if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
 			pr_err("Failed to suspend process 0x%x\n", p->pasid);
-		dma_fence_signal(p->ef);
-		dma_fence_put(p->ef);
-		p->ef = NULL;
+		signal_eviction_fence(p);
 	}
 	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
@@ -2017,7 +2036,7 @@ int kfd_resume_all_processes(void)
 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
+		if (restore_process_helper(p)) {
 			pr_err("Restore process %d failed during resume\n",
 			       p->pasid);
 			ret = -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2f3c338fd944..613f195102873 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1870,7 +1870,7 @@ out_reschedule:
 	/* If validation failed, reschedule another attempt */
 	if (evicted_ranges) {
 		pr_debug("reschedule to restore svm range\n");
-		schedule_delayed_work(&svms->restore_work,
+		queue_delayed_work(system_freezable_wq, &svms->restore_work,
 			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 
 		kfd_smi_event_queue_restore_rescheduled(mm);
@@ -1946,7 +1946,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 			pr_debug("failed to quiesce KFD\n");
 
 		pr_debug("schedule to restore svm %p ranges\n", svms);
-		schedule_delayed_work(&svms->restore_work,
+		queue_delayed_work(system_freezable_wq, &svms->restore_work,
 			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 	} else {
 		unsigned long s, l;
-- 
GitLab


From 1919fd6bb09f61015549b9e5a5af1541b41f45d9 Mon Sep 17 00:00:00 2001
From: Anthony Koo <anthony.koo@amd.com>
Date: Sat, 18 Nov 2023 14:39:40 -0500
Subject: [PATCH 0597/2340] drm/amd/display: [FW Promotion] Release 0.0.194.0

- Add a new dmub command in enum dmub_cmd_cab_type

Reviewed-by: Tom Chung <chiahsuan.chung@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Anthony Koo <anthony.koo@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index b7d9360bfdc82..a365f6c096de8 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -1345,6 +1345,10 @@ enum dmub_cmd_cab_type {
 	 * Fit surfaces in CAB (i.e. CAB enable)
 	 */
 	DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB = 2,
+	/**
+	 * Do not fit surfaces in CAB (i.e. no CAB)
+	 */
+	DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB = 3,
 };
 
 /**
-- 
GitLab


From 061a5bf210cd7b941627092309ff6035a017cda3 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Tue, 14 Nov 2023 11:22:09 -0500
Subject: [PATCH 0598/2340] drm/amd/display: Allow DTBCLK disable for DCN35

[Why]
DTBCLK is enabled on idle and it will burn power.

[How]
There's a few issues here:
- Always enabling DTBCLK on clock manager init
- Setting refclk when DTBCLK is supposed to be disabled
- Not applying the correct calculated version refclk, but instead the
  base value which might be zero

On dtbclk_en change we'll message PMFW to enable or disable the clock
accordingly.

The DTBDTO will be then based on refclk, but it will be set to the
default fixed value if there was nothing calculated in DML despite the
clock being considered enabled.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  | 27 +++++++++----------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 63a0b885b6f03..d5fde7d23fbf8 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -232,6 +232,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	if (dc->work_arounds.skip_clock_update)
 		return;
 
+	/* DTBCLK is fixed, so set a default if unspecified. */
+	if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
+		new_clocks->ref_dtbclk_khz = 600000;
+
 	/*
 	 * if it is safe to lower, but we are already in the lower state, we don't have to do anything
 	 * also if safe to lower is false, we just go in the higher state
@@ -265,8 +269,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 
 		if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
 			dcn35_smu_set_dtbclk(clk_mgr, true);
-			dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 			clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+
+			dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+			clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
 		}
 
 		/* check that we're not already in D0 */
@@ -314,17 +320,12 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 		update_dispclk = true;
 	}
 
-	if (!new_clocks->dtbclk_en) {
-		new_clocks->ref_dtbclk_khz = 600000;
-	}
-
 	/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
 	if (!dc->debug.disable_dtb_ref_clk_switch &&
-			should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
-		/* DCCG requires KHz precision for DTBCLK */
-		dcn35_smu_set_dtbclk(clk_mgr, true);
-
-		dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
+	    should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000,
+			     clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
+		dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+		clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
 	}
 
 	if (dpp_clock_lowered) {
@@ -1048,12 +1049,8 @@ void dcn35_clk_mgr_construct(
 	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
 
 	clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
-	clk_mgr->base.base.clks.ref_dtbclk_khz = dcn35_smu_get_dtbclk(&clk_mgr->base);
-
-	if (!clk_mgr->base.base.clks.ref_dtbclk_khz)
-		dcn35_smu_set_dtbclk(&clk_mgr->base, true);
+	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
 
-	clk_mgr->base.base.clks.dtbclk_en = true;
 	dce_clock_read_ss_info(&clk_mgr->base);
 	/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
 
-- 
GitLab


From ca0ad76089a8171d7a075e50b7beecf092f8fd0c Mon Sep 17 00:00:00 2001
From: Candice Li <candice.li@amd.com>
Date: Fri, 24 Nov 2023 09:33:47 +0800
Subject: [PATCH 0599/2340] drm/amdgpu: Update EEPROM I2C address for smu
 v13_0_0

Check smu v13_0_0 SKU type to select EEPROM I2C address.

Signed-off-by: Candice Li <candice.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 65aa218380be1..2fde93b00cab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 			control->i2c_address = EEPROM_I2C_MADDR_0;
 		return true;
 	case IP_VERSION(13, 0, 0):
+		if (strnstr(atom_ctx->vbios_pn, "D707",
+			    sizeof(atom_ctx->vbios_pn)))
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
 	case IP_VERSION(13, 0, 6):
 	case IP_VERSION(13, 0, 10):
 		control->i2c_address = EEPROM_I2C_MADDR_4;
-- 
GitLab


From d581ceab26a1be9fe94befe2604cbe99eadf1acc Mon Sep 17 00:00:00 2001
From: ZhenGuo Yin <zhenguo.yin@amd.com>
Date: Mon, 6 Nov 2023 18:07:51 +0800
Subject: [PATCH 0600/2340] drm/amdkfd: Free gang_ctx_bo and wptr_bo in
 pqm_uninit

[Why]
Memory leaks of gang_ctx_bo and wptr_bo.

[How]
Free gang_ctx_bo and wptr_bo in pqm_uninit.

v2: add a common function pqm_clean_queue_resource to
free queue's resources.
v3: reset pdd->pqd.num_gws when destorying GWS queue.

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: ZhenGuo Yin <zhenguo.yin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/amdkfd/kfd_process_queue_manager.c    | 54 +++++++++++--------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 77649392e2331..77f493262e058 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -169,16 +169,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 	return 0;
 }
 
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+				     struct process_queue_node *pqn)
+{
+	struct kfd_node *dev;
+	struct kfd_process_device *pdd;
+
+	dev = pqn->q->device;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return;
+	}
+
+	if (pqn->q->gws) {
+		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+		    !dev->kfd->shared_resources.enable_mes)
+			amdgpu_amdkfd_remove_gws_from_process(
+				pqm->process->kgd_process_info, pqn->q->gws);
+		pdd->qpd.num_gws = 0;
+	}
+
+	if (dev->kfd->shared_resources.enable_mes) {
+		amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
+		if (pqn->q->wptr_bo)
+			amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+	}
+}
+
 void pqm_uninit(struct process_queue_manager *pqm)
 {
 	struct process_queue_node *pqn, *next;
 
 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
-		if (pqn->q && pqn->q->gws &&
-		    KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
-		    !pqn->q->device->kfd->shared_resources.enable_mes)
-			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
-				pqn->q->gws);
+		if (pqn->q)
+			pqm_clean_queue_resource(pqm, pqn);
+
 		kfd_procfs_del_queue(pqn->q);
 		uninit_queue(pqn->q);
 		list_del(&pqn->process_queue_list);
@@ -461,22 +488,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 				goto err_destroy_queue;
 		}
 
-		if (pqn->q->gws) {
-			if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
-			    !dev->kfd->shared_resources.enable_mes)
-				amdgpu_amdkfd_remove_gws_from_process(
-						pqm->process->kgd_process_info,
-						pqn->q->gws);
-			pdd->qpd.num_gws = 0;
-		}
-
-		if (dev->kfd->shared_resources.enable_mes) {
-			amdgpu_amdkfd_free_gtt_mem(dev->adev,
-						   pqn->q->gang_ctx_bo);
-			if (pqn->q->wptr_bo)
-				amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
-
-		}
+		pqm_clean_queue_resource(pqm, pqn);
 		uninit_queue(pqn->q);
 	}
 
-- 
GitLab


From 5290ed0a8b261115fe4965a6d95a642b0742d159 Mon Sep 17 00:00:00 2001
From: Ilya Bakoulin <ilya.bakoulin@amd.com>
Date: Thu, 16 Nov 2023 15:28:53 -0500
Subject: [PATCH 0601/2340] drm/amd/display: Add DSC granular throughput
 adjustment

[Why/How]
Update DSC DPCD parsing to take granular throughput adjustment into
consideration.

Reviewed-by: Wenjing Liu <wenjing.liu@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Ilya Bakoulin <ilya.bakoulin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index e8b5f17beb963..0df6c55eb3260 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -331,8 +331,9 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
 		int buff_block_size;
 		int buff_size;
 
-		if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT],
-										   &buff_block_size))
+		if (!dsc_buff_block_size_from_dpcd(
+				dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] & 0x03,
+				&buff_block_size))
 			return false;
 
 		buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1;
@@ -357,10 +358,15 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
 
 	{
 		int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT];
+		int dsc_throughput_granular_delta;
+
+		dsc_throughput_granular_delta = dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] >> 3;
+		dsc_throughput_granular_delta *= 2;
 
 		if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK,
 									  &dsc_sink_caps->throughput_mode_0_mps))
 			return false;
+		dsc_sink_caps->throughput_mode_0_mps += dsc_throughput_granular_delta;
 
 		dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT;
 		if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps))
-- 
GitLab


From 33a6e409165cd23d1dc580031cb749550ca18517 Mon Sep 17 00:00:00 2001
From: Taimur Hassan <syed.hassan@amd.com>
Date: Fri, 10 Nov 2023 10:24:20 -0500
Subject: [PATCH 0602/2340] drm/amd/display: Fix some HostVM parameters in DML

[Why]
A number of DML parameters related to HostVM were either missing or
being set incorrectly, which may cause inaccuracies in calculating
margins and determining BW limitations.

[How]
Correct these values where needed and populate the missing values.

Cc: stable@vger.kernel.org
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Taimur Hassan <syed.hassan@amd.com>
Signed-off-by: Roman Li <Roman.Li@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c  | 33 +++++++++++++++++++
 .../display/dc/dml2/dml2_translation_helper.c |  9 +++--
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 21c17d3296a3c..39cf1ae3a3e16 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -330,6 +330,39 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 	dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
 				DML_PROJECT_DCN31);
 
+	/*copy to dml2, before dml2_create*/
+	if (clk_table->num_entries > 2) {
+
+		for (i = 0; i < clk_table->num_entries; i++) {
+			dc->dml2_options.bbox_overrides.clks_table.num_states =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+				clock_limits[i].dcfclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+				clock_limits[i].fabricclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+				clock_limits[i].dispclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+				clock_limits[i].dppclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+				clock_limits[i].socclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+				clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+				clk_table->num_entries;
+		}
+	}
+
 	/* Update latency values */
 	dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 48caa34a5ce75..fa8fe5bf7e575 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -1057,9 +1057,12 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 	}
 
 	//Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file
-	dml_dispcfg->plane.GPUVMEnable = true;
-	dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4;
-	dml_dispcfg->plane.HostVMEnable = false;
+	dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable;
+	dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels;
+	dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable;
+	dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels;
+	if (dml2->v20.dml_core_ctx.ip.hostvm_enable)
+		dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter;
 
 	dml2_populate_pipe_to_plane_index_mapping(dml2, context);
 
-- 
GitLab


From 70378005378a23fbfe0d4c44dac4187cad07da94 Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Sun, 19 Nov 2023 23:02:26 -0500
Subject: [PATCH 0603/2340] drm/amd/display: Promote DAL to 3.2.262

This version brings along following fixes:
- Add DSC granular throughput adjustment
- Allow DTBCLK disable for DCN35
- Update Fixed VS/PE Retimer Sequence
- Block dcn315 dynamic crb allocation when unintended
- Update dcn315 lpddr pstate latency
- Fix some HostVM parameters in DML

Reviewed-by: Tom Chung <chiahsuan.chung@amd.com>
Acked-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 6074ac1b180a3..e8c1599ab18a7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -49,7 +49,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.261"
+#define DC_VER "3.2.262"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
GitLab


From 9a5095e785c38ab8d9f3d91f4ee76f4f73ec4adc Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 13 Sep 2023 12:00:44 -0400
Subject: [PATCH 0604/2340] drm/amdgpu: add amdgpu_reg_state.h

This header defines the reg state structures exposed via
sysfs for umr debugging.

v2: add content type

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   5 +
 .../gpu/drm/amd/include/amdgpu_reg_state.h    | 140 ++++++++++++++++++
 2 files changed, 145 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_reg_state.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ea2656bd714f4..0af8ac81facd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -110,6 +110,7 @@
 #include "amdgpu_ras.h"
 #include "amdgpu_xcp.h"
 #include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
 
 #define MAX_GPU_INSTANCE		64
 
@@ -612,6 +613,10 @@ struct amdgpu_asic_funcs {
 				  const struct amdgpu_video_codecs **codecs);
 	/* encode "> 32bits" smn addressing */
 	u64 (*encode_ext_smn_addressing)(int ext_id);
+
+	ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+				 enum amdgpu_reg_state reg_state, void *buf,
+				 size_t max_size);
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
new file mode 100644
index 0000000000000..3dbdb118db599
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_REG_STATE_H__
+#define __AMDGPU_REG_STATE_H__
+
+enum amdgpu_reg_state {
+	AMDGPU_REG_STATE_TYPE_INVALID	= 0,
+	AMDGPU_REG_STATE_TYPE_XGMI	= 1,
+	AMDGPU_REG_STATE_TYPE_WAFL	= 2,
+	AMDGPU_REG_STATE_TYPE_PCIE	= 3,
+	AMDGPU_REG_STATE_TYPE_USR	= 4,
+	AMDGPU_REG_STATE_TYPE_USR_1	= 5
+};
+
+struct amdgpu_reg_state_header {
+	uint16_t		structure_size;
+	uint8_t			format_revision;
+	uint8_t			content_revision;
+	uint8_t			state_type;
+	uint8_t			num_instances;
+	uint16_t		pad;
+};
+
+enum amdgpu_reg_inst_state {
+	AMDGPU_INST_S_OK,
+	AMDGPU_INST_S_EDISABLED,
+	AMDGPU_INST_S_EACCESS,
+};
+
+struct amdgpu_smn_reg_data {
+	uint64_t addr;
+	uint32_t value;
+	uint32_t pad;
+};
+
+struct amdgpu_reg_inst_header {
+	uint16_t	instance;
+	uint16_t	state;
+	uint16_t	num_smn_regs;
+	uint16_t	pad;
+};
+
+
+struct amdgpu_regs_xgmi_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_xgmi_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_XGMI */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_xgmi_v1_0	xgmi_state_regs[];
+};
+
+struct amdgpu_regs_wafl_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_wafl_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_WAFL */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_wafl_v1_0	wafl_state_regs[];
+};
+
+struct amdgpu_regs_pcie_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	uint16_t			device_status;
+	uint16_t			link_status;
+	uint32_t			sub_bus_number_latency;
+	uint32_t			pcie_corr_err_status;
+	uint32_t			pcie_uncorr_err_status;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_pcie_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_PCIE */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_pcie_v1_0	pci_state_regs[];
+};
+
+struct amdgpu_regs_usr_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_usr_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_USR */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_usr_v1_0	usr_state_regs[];
+};
+
+static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size,
+					 uint16_t num_regs)
+{
+	return num_inst *
+	       (inst_size + num_regs * sizeof(struct amdgpu_smn_reg_data));
+}
+
+#define amdgpu_asic_get_reg_state_supported(adev) \
+	((adev)->asic_funcs->get_reg_state ? 1 : 0)
+
+#define amdgpu_asic_get_reg_state(adev, state, buf, size)                  \
+	((adev)->asic_funcs->get_reg_state ?                               \
+		 (adev)->asic_funcs->get_reg_state((adev), (state), (buf), \
+						   (size)) :               \
+		 0)
+
+#endif
-- 
GitLab


From af39e6f4d8032b101907cc2ac12a21a778da568d Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 6 Oct 2023 13:49:03 +0530
Subject: [PATCH 0605/2340] drm/amdgpu: Add reg_state sysfs attribute

Add reg_state attribute to fetch the register snapshot of different
IPs like XGMI, WAFL,PCIE and USR. To get a snapshot for a particular IP
	1) Open the sysfs file
	2) Seek to the offset as defined in amdgpu_sysfs_reg_offset
	3) Read

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 62 +++++++++++++++++++
 .../gpu/drm/amd/include/amdgpu_reg_state.h    | 13 ++++
 2 files changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5d3a83193115f..f29d0faf956e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -162,6 +162,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 static DEVICE_ATTR(pcie_replay_count, 0444,
 		amdgpu_device_get_pcie_replay_count, NULL);
 
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+					  struct bin_attribute *attr, char *buf,
+					  loff_t ppos, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t bytes_read;
+
+	switch (ppos) {
+	case AMDGPU_SYS_REG_STATE_XGMI:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_WAFL:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_PCIE:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR_1:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return bytes_read;
+}
+
+BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+	 AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return 0;
+
+	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+	return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return;
+	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
 /**
  * DOC: board_info
  *
@@ -4233,6 +4292,7 @@ fence_driver_init:
 			"Could not create amdgpu board attributes\n");
 
 	amdgpu_fru_sysfs_init(adev);
+	amdgpu_reg_state_sysfs_init(adev);
 
 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
 		r = amdgpu_pmu_init(adev);
@@ -4355,6 +4415,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
 	amdgpu_fru_sysfs_fini(adev);
 
+	amdgpu_reg_state_sysfs_fini(adev);
+
 	/* disable ras feature must before hw fini */
 	amdgpu_ras_pre_fini(adev);
 
diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
index 3dbdb118db599..be519c8edf496 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
@@ -34,6 +34,15 @@ enum amdgpu_reg_state {
 	AMDGPU_REG_STATE_TYPE_USR_1	= 5
 };
 
+enum amdgpu_sysfs_reg_offset {
+	AMDGPU_SYS_REG_STATE_XGMI	= 0x0000,
+	AMDGPU_SYS_REG_STATE_WAFL	= 0x1000,
+	AMDGPU_SYS_REG_STATE_PCIE	= 0x2000,
+	AMDGPU_SYS_REG_STATE_USR	= 0x3000,
+	AMDGPU_SYS_REG_STATE_USR_1	= 0x4000,
+	AMDGPU_SYS_REG_STATE_END	= 0x5000,
+};
+
 struct amdgpu_reg_state_header {
 	uint16_t		structure_size;
 	uint8_t			format_revision;
@@ -137,4 +146,8 @@ static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size,
 						   (size)) :               \
 		 0)
 
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev);
+
 #endif
-- 
GitLab


From 081a6eda2b25092e1466f09eb46d829488b75730 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 6 Oct 2023 14:20:45 +0530
Subject: [PATCH 0606/2340] drm/amdgpu: Read aquavanjaram PCIE register state

Add support to read aqua vanjaram PCIE register state

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 115 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/soc15.c         |   1 +
 drivers/gpu/drm/amd/amdgpu/soc15.h         |   4 +
 3 files changed, 120 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 3f715e7fe1a95..0a4598480dd55 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -24,6 +24,7 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
+#include "amdgpu_reg_state.h"
 #include "amdgpu_xcp.h"
 #include "gfx_v9_4_3.h"
 #include "gfxhub_v1_2.h"
@@ -656,3 +657,117 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+			  struct amdgpu_smn_reg_data *regdata,
+			  uint64_t smn_addr)
+{
+	regdata->addr = smn_addr;
+	regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+	uint64_t start_addr;
+	uint32_t num_regs;
+	uint32_t incrx;
+};
+
+#define DW_ADDR_INCR	4
+
+#define smnreg_0x1A340218	0x1A340218
+#define smnreg_0x1A3402E4	0x1A3402E4
+#define smnreg_0x1A340294	0x1A340294
+#define smreg_0x1A380088	0x1A380088
+
+#define NUM_PCIE_SMN_REGS	14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+	{ smnreg_0x1A340218, 1, 0 },
+	{ smnreg_0x1A3402E4, 1, 0 },
+	{ smnreg_0x1A340294, 6, DW_ADDR_INCR },
+	{ smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	struct pci_dev *us_pdev, *ds_pdev;
+	int aer_cap, r, n;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+	szbuf = sizeof(*pcie_reg_state) +
+		amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+						     sizeof(*pcie_reg_state));
+	pcie_regs->inst_header.instance = 0;
+	pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+	pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+	reg_data = pcie_regs->smn_reg_values;
+
+	for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+		start_addr = pcie_reg_addrs[r].start_addr;
+		incrx = pcie_reg_addrs[r].incrx;
+		num_regs = pcie_reg_addrs[r].num_regs;
+		for (n = 0; n < num_regs; n++) {
+			aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+			++reg_data;
+		}
+	}
+
+	ds_pdev = pci_upstream_bridge(adev->pdev);
+	us_pdev = pci_upstream_bridge(ds_pdev);
+
+	pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+				  &pcie_regs->device_status);
+	pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+				  &pcie_regs->link_status);
+
+	aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+	if (aer_cap) {
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+				      &pcie_regs->pcie_corr_err_status);
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+				      &pcie_regs->pcie_uncorr_err_status);
+	}
+
+	pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+			      &pcie_regs->sub_bus_number_latency);
+
+	pcie_reg_state->common_header.structure_size = szbuf;
+	pcie_reg_state->common_header.format_revision = 1;
+	pcie_reg_state->common_header.content_revision = 0;
+	pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+	pcie_reg_state->common_header.num_instances = 1;
+
+	return pcie_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size)
+{
+	ssize_t size;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_PCIE:
+		size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d4b8d62f42943..e3d41e8aac9d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -902,6 +902,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
 	.pre_asic_init = &soc15_pre_asic_init,
 	.query_video_codecs = &soc15_query_video_codecs,
 	.encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+	.get_reg_state = &aqua_vanjaram_get_reg_state,
 };
 
 static int soc15_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index eac54042c6c0e..1444b7765e4be 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,7 @@
 #include "nbio_v6_1.h"
 #include "nbio_v7_0.h"
 #include "nbio_v7_4.h"
+#include "amdgpu_reg_state.h"
 
 extern const struct amdgpu_ip_block_version vega10_common_ip_block;
 
@@ -114,6 +115,9 @@ int aldebaran_reg_base_init(struct amdgpu_device *adev);
 void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev);
 u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
 int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size);
 
 void vega10_doorbell_index_init(struct amdgpu_device *adev);
 void vega20_doorbell_index_init(struct amdgpu_device *adev);
-- 
GitLab


From 92e508eaf337d465f0574dda18d805bb4df138bc Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Sat, 7 Oct 2023 15:41:27 +0530
Subject: [PATCH 0607/2340] drm/amdgpu: Read aquavanjaram XGMI register state

Add support to read state of XGMI links in aquavanjaram SOC.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 96 ++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 0a4598480dd55..a00b8c6f0a942 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -674,6 +674,15 @@ struct aqua_reg_list {
 
 #define DW_ADDR_INCR	4
 
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+			      struct amdgpu_smn_reg_data *regdata,
+			      uint64_t smn_addr, int i)
+{
+	regdata->addr =
+		smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+	regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
 #define smnreg_0x1A340218	0x1A340218
 #define smnreg_0x1A3402E4	0x1A3402E4
 #define smnreg_0x1A340294	0x1A340294
@@ -755,6 +764,90 @@ static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
 	return pcie_reg_state->common_header.structure_size;
 }
 
+#define smnreg_0x11A00050	0x11A00050
+#define smnreg_0x11A00180	0x11A00180
+#define smnreg_0x11A00070	0x11A00070
+#define smnreg_0x11A00200	0x11A00200
+#define smnreg_0x11A0020C	0x11A0020C
+#define smnreg_0x11A00210	0x11A00210
+#define smnreg_0x11A00108	0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+	{ smnreg_0x11A00050, 1, 0 },
+	{ smnreg_0x11A00180, 16, DW_ADDR_INCR },
+	{ smnreg_0x11A00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11A00200, 1, 0 },
+	{ smnreg_0x11A0020C, 1, 0 },
+	{ smnreg_0x11A00210, 1, 0 },
+	{ smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_xgmi_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int xgmi_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+	szbuf = sizeof(*xgmi_reg_state) +
+		amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+				    NUM_XGMI_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &xgmi_reg_state->xgmi_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < xgmi_inst; ++j) {
+			xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+			xgmi_regs->inst_header.instance = inst++;
+
+			xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+			xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+			reg_data = xgmi_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+				start_addr = xgmi_reg_addrs[r].start_addr;
+				incrx = xgmi_reg_addrs[r].incrx;
+				num_regs = xgmi_reg_addrs[r].num_regs;
+
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						XGMI_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	xgmi_reg_state->common_header.structure_size = szbuf;
+	xgmi_reg_state->common_header.format_revision = 1;
+	xgmi_reg_state->common_header.content_revision = 0;
+	xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+	xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+	return xgmi_reg_state->common_header.structure_size;
+}
+
 ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
 				    enum amdgpu_reg_state reg_state, void *buf,
 				    size_t max_size)
@@ -765,6 +858,9 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
 	case AMDGPU_REG_STATE_TYPE_PCIE:
 		size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
 		break;
+	case AMDGPU_REG_STATE_TYPE_XGMI:
+		size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
GitLab


From 36fd9969fa53c40e8a58192714d9a3624cbe04e3 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Tue, 28 Nov 2023 16:47:14 +0530
Subject: [PATCH 0608/2340] drm/amdgpu: Use another offset for GC 9.4.3 remap

The legacy region at 0x7F000 maps to valid registers in GC 9.4.3 SOCs.
Use 0x1A000 offset instead as MMIO register remap region.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index e3d41e8aac9d9..9ad4d6d3122b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1162,6 +1162,11 @@ static int soc15_common_early_init(void *handle)
 			AMD_PG_SUPPORT_VCN_DPG |
 			AMD_PG_SUPPORT_JPEG;
 		adev->external_rev_id = adev->rev_id + 0x46;
+		/* GC 9.4.3 uses MMIO register region hole at a different offset */
+		if (!amdgpu_sriov_vf(adev)) {
+			adev->rmmio_remap.reg_offset = 0x1A000;
+			adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000;
+		}
 		break;
 	default:
 		/* FIXME: not supported yet */
-- 
GitLab


From 562f33836f519a235e5c5e71bcc723ab1faccd2f Mon Sep 17 00:00:00 2001
From: Clint Taylor <clinton.a.taylor@intel.com>
Date: Tue, 28 Nov 2023 11:03:29 -0800
Subject: [PATCH 0609/2340] drm/i915/dgfx: DGFX uses direct VBT pin mapping

DDC pin mapping for DGFX cards uses direct VBT pin mapping

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Clint Taylor <clinton.a.taylor@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128190329.1335562-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_bios.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 2fd72b2fd109b..3e7e96acb24ab 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2201,6 +2201,9 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
 	const u8 *ddc_pin_map;
 	int i, n_entries;
 
+	if (IS_DGFX(i915))
+		return vbt_pin;
+
 	if (INTEL_PCH_TYPE(i915) >= PCH_LNL || HAS_PCH_MTP(i915) ||
 	    IS_ALDERLAKE_P(i915)) {
 		ddc_pin_map = adlp_ddc_pin_map;
@@ -2208,8 +2211,6 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
 	} else if (IS_ALDERLAKE_S(i915)) {
 		ddc_pin_map = adls_ddc_pin_map;
 		n_entries = ARRAY_SIZE(adls_ddc_pin_map);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) {
-		return vbt_pin;
 	} else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) {
 		ddc_pin_map = rkl_pch_tgp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map);
-- 
GitLab


From 0eec708ec3c2cb4076cd239605eb6d51e7c23e77 Mon Sep 17 00:00:00 2001
From: Alan Previn <alan.previn.teres.alexis@intel.com>
Date: Wed, 22 Nov 2023 11:15:23 -0800
Subject: [PATCH 0610/2340] drm/i915/pxp: Add drm_dbgs for critical PXP events.

Debugging PXP issues can't even begin without understanding precedding
sequence of important events. Add drm_dbg into the most important PXP
events.

 v5 : - rebase.
 v4 : - rebase.
 v3 : - move gt_dbg to after mutex block in function
        i915_gsc_proxy_component_bind. (Vivaik)
 v2 : - remove __func__ since drm_dbg covers that (Jani).
      - add timeout dbg of the restart from front-end (Alan).

Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122191523.58379-1-alan.previn.teres.alexis@intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c |  2 ++
 drivers/gpu/drm/i915/pxp/intel_pxp.c         | 15 ++++++++++++---
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c     |  5 +++--
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c |  6 +++++-
 drivers/gpu/drm/i915/pxp/intel_pxp_types.h   |  1 +
 5 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index 5f138de3c14f9..40817ebcca71f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
 	gsc->proxy.component = data;
 	gsc->proxy.component->mei_dev = mei_kdev;
 	mutex_unlock(&gsc->proxy.mutex);
+	gt_dbg(gt, "GSC proxy mei component bound\n");
 
 	return 0;
 }
@@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
 				 HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
+	gt_dbg(gt, "GSC proxy mei component unbound\n");
 }
 
 static const struct component_ops i915_gsc_proxy_component_ops = {
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index dc327cf40b5aa..e11f562b1876d 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -303,6 +303,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp)
 
 	if (!pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini");
 	/*
 	 * To ensure synchronous and coherent session teardown completion
 	 * in response to suspend or shutdown triggers, don't use a worker.
@@ -324,6 +326,8 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	if (pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart");
 	/*
 	 * The arb-session is currently inactive and we are doing a reset and restart
 	 * due to a runtime event. Use the worker that was designed for this.
@@ -332,8 +336,11 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	timeout = intel_pxp_get_backend_timeout_ms(pxp);
 
-	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout)))
+	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)",
+			timeout);
 		return -ETIMEDOUT;
+	}
 
 	return 0;
 }
@@ -414,10 +421,12 @@ int intel_pxp_start(struct intel_pxp *pxp)
 	int ret = 0;
 
 	ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT);
-	if (ret < 0)
+	if (ret < 0) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret);
 		return ret;
-	else if (ret > 1)
+	} else if (ret > 1) {
 		return -EIO; /* per UAPI spec, user may retry later */
+	}
 
 	mutex_lock(&pxp->arb_mutex);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
index 91e9622c07d09..d81750b9bddaa 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
@@ -40,11 +40,12 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
 		   GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) {
 		/* immediately mark PXP as inactive on termination */
 		intel_pxp_mark_termination_in_progress(pxp);
-		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED;
+		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED |
+				       PXP_EVENT_TYPE_IRQ;
 	}
 
 	if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT)
-		pxp->session_events |= PXP_TERMINATION_COMPLETE;
+		pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ;
 
 	if (pxp->session_events)
 		queue_work(system_unbound_wq, &pxp->session_work);
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
index 0a3e66b0265e8..091c86e03d1a5 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
@@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res
 static void pxp_terminate_complete(struct intel_pxp *pxp)
 {
 	/* Re-create the arb session after teardown handle complete */
-	if (fetch_and_zero(&pxp->hw_state_invalidated))
+	if (fetch_and_zero(&pxp->hw_state_invalidated)) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation");
 		pxp_create_arb_session(pxp);
+	}
 
 	complete_all(&pxp->termination);
 }
@@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work)
 	if (!events)
 		return;
 
+	drm_dbg(&gt->i915->drm, "PXP: processing event-flags 0x%08x", events);
+
 	if (events & PXP_INVAL_REQUIRED)
 		intel_pxp_invalidate(pxp);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
index 7e11fa8034b24..07864b584cf4c 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
@@ -124,6 +124,7 @@ struct intel_pxp {
 #define PXP_TERMINATION_REQUEST  BIT(0)
 #define PXP_TERMINATION_COMPLETE BIT(1)
 #define PXP_INVAL_REQUIRED       BIT(2)
+#define PXP_EVENT_TYPE_IRQ       BIT(3)
 };
 
 #endif /* __INTEL_PXP_TYPES_H__ */
-- 
GitLab


From b101d08451de6eaebd1a840e4885ce7ce73656ad Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Fri, 17 Nov 2023 02:22:00 +0530
Subject: [PATCH 0611/2340] drm/nouveau: Removes unnecessary args check in
 nouveau_uvmm_sm_prepare

Checking `args` after calling `op_map_prepare` is unnecessary since
if `op_map_prepare` was to be called with  NULL args, it would lead
to a NULL pointer dereference, thus never hitting that check.

Hence remove the check and add a note to remind users of this function
to ensure that args != NULL when calling this function for a map
operation as it was suggested by Danilo [1].

[1] https://lore.kernel.org/lkml/6a1ebcef-bade-45a0-9bd9-c05f0226eb88@redhat.com

Suggested-by: Danilo Krummrich <dakr@redhat.com>
Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/GV1PR10MB65637F4BAABFE2D8E261E1DCE8B0A@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM
---
 drivers/gpu/drm/nouveau/nouveau_uvmm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 4b236da795094..dae3baf707a0b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -608,6 +608,9 @@ op_unmap_prepare(struct drm_gpuva_op_unmap *u)
 	drm_gpuva_unmap(u);
 }
 
+/*
+ * Note: @args should not be NULL when calling for a map operation.
+ */
 static int
 nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
 			struct nouveau_uvma_prealloc *new,
@@ -628,7 +631,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
 			if (ret)
 				goto unwind;
 
-			if (args && vmm_get_range) {
+			if (vmm_get_range) {
 				ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start,
 							   vmm_get_range);
 				if (ret) {
-- 
GitLab


From 698e19da2914a0021a088b2b5d101d1854862315 Mon Sep 17 00:00:00 2001
From: Zhanjun Dong <zhanjun.dong@intel.com>
Date: Mon, 13 Nov 2023 14:49:53 -0800
Subject: [PATCH 0612/2340] drm/i915: Skip pxp init if gt is wedged

The gt wedged could be triggered by missing guc firmware file, HW not
working, etc. Once triggered, it means all gt usage is dead, therefore we
can't enable pxp under this fatal error condition.

v2: Updated commit message.
v3: Updated return code check.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113224953.378534-1-zhanjun.dong@intel.com
---
 drivers/gpu/drm/i915/i915_driver.c   | 4 +++-
 drivers/gpu/drm/i915/pxp/intel_pxp.c | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index db6f7c30adde1..8a17eb7f93214 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -804,7 +804,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_cleanup_modeset2;
 
-	intel_pxp_init(i915);
+	ret = intel_pxp_init(i915);
+	if (ret != -ENODEV)
+		drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
 
 	ret = intel_display_driver_probe(i915);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index e11f562b1876d..75278e78ca90e 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -199,6 +199,9 @@ int intel_pxp_init(struct drm_i915_private *i915)
 	struct intel_gt *gt;
 	bool is_full_feature = false;
 
+	if (intel_gt_is_wedged(to_gt(i915)))
+		return -ENOTCONN;
+
 	/*
 	 * NOTE: Get the ctrl_gt before checking intel_pxp_is_supported since
 	 * we still need it if PXP's backend tee transport is needed.
-- 
GitLab


From 03219a3aa6c89f1cbb6624907f32d6939a1ffeb0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Thu, 30 Nov 2023 10:26:29 +0300
Subject: [PATCH 0613/2340] drm/imagination: Fix error codes in
 pvr_device_clk_init()

There is a cut and paste error so this code returns the wrong variable.

Fixes: 1f88f017e649 ("drm/imagination: Get GPU resources")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/1649c66b-3eea-40d2-9687-592124f968cf@moroto.mountain
---
 drivers/gpu/drm/imagination/pvr_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 8499becf4fbba..e1dcc4e420876 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -105,12 +105,12 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
 
 	sys_clk = devm_clk_get_optional(drm_dev->dev, "sys");
 	if (IS_ERR(sys_clk))
-		return dev_err_probe(drm_dev->dev, PTR_ERR(core_clk),
+		return dev_err_probe(drm_dev->dev, PTR_ERR(sys_clk),
 				     "failed to get sys clock\n");
 
 	mem_clk = devm_clk_get_optional(drm_dev->dev, "mem");
 	if (IS_ERR(mem_clk))
-		return dev_err_probe(drm_dev->dev, PTR_ERR(core_clk),
+		return dev_err_probe(drm_dev->dev, PTR_ERR(mem_clk),
 				     "failed to get mem clock\n");
 
 	pvr_dev->core_clk = core_clk;
-- 
GitLab


From 9ee33dc47772724ff583b060bb37c62b92b2d9c4 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Thu, 30 Nov 2023 10:27:01 +0300
Subject: [PATCH 0614/2340] drm/imagination: Fix IS_ERR() vs NULL bug in
 pvr_request_firmware()

The pvr_build_firmware_filename() function returns NULL on error.  It
doesn't return error pointers.

Fixes: f99f5f3ea7ef ("drm/imagination: Add GPU ID parsing and firmware loading")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/384288de-a779-46c7-869d-b3c63462e12b@moroto.mountain
---
 drivers/gpu/drm/imagination/pvr_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index e1dcc4e420876..5389aea7ff216 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -286,8 +286,8 @@ pvr_request_firmware(struct pvr_device *pvr_dev)
 
 	filename = pvr_build_firmware_filename(pvr_dev, "powervr/rogue",
 					       PVR_FW_VERSION_MAJOR);
-	if (IS_ERR(filename))
-		return PTR_ERR(filename);
+	if (!filename)
+		return -ENOMEM;
 
 	/*
 	 * This function takes a copy of &filename, meaning we can free our
-- 
GitLab


From 55b0f4a7c37680428d640aeada96d62888366c56 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Thu, 30 Nov 2023 10:27:15 +0300
Subject: [PATCH 0615/2340] drm/imagination: fix off by one in
 pvr_vm_mips_init() error handling

If the call to vmap() fails the "page_nr" is one element beyond the end
of the mips_data->pt_dma_addr[] and mips_data->pt_pages[] arrays.

The way that this is traditionally written is that we clean up the
partial loop iteration before the goto and then we can say
while (--i >= 0).  At that point we know that all the elements thus
far are initialized so we don't need to have NULL checks.

Fixes: 927f3e0253c1 ("drm/imagination: Implement MIPS firmware processor and MMU support")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/a2d3210b-290f-4397-9c3e-efdcca94d8ac@moroto.mountain
---
 drivers/gpu/drm/imagination/pvr_vm_mips.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c
index 7268cf6e630be..2bc7181a4c3eb 100644
--- a/drivers/gpu/drm/imagination/pvr_vm_mips.c
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c
@@ -57,6 +57,7 @@ pvr_vm_mips_init(struct pvr_device *pvr_dev)
 							       PAGE_SIZE, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, mips_data->pt_dma_addr[page_nr])) {
 			err = -ENOMEM;
+			__free_page(mips_data->pt_pages[page_nr]);
 			goto err_free_pages;
 		}
 	}
@@ -79,13 +80,11 @@ pvr_vm_mips_init(struct pvr_device *pvr_dev)
 	return 0;
 
 err_free_pages:
-	for (; page_nr >= 0; page_nr--) {
-		if (mips_data->pt_dma_addr[page_nr])
-			dma_unmap_page(from_pvr_device(pvr_dev)->dev,
-				       mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE);
+	while (--page_nr >= 0) {
+		dma_unmap_page(from_pvr_device(pvr_dev)->dev,
+			       mips_data->pt_dma_addr[page_nr], PAGE_SIZE, DMA_TO_DEVICE);
 
-		if (mips_data->pt_pages[page_nr])
-			__free_page(mips_data->pt_pages[page_nr]);
+		__free_page(mips_data->pt_pages[page_nr]);
 	}
 
 	return err;
-- 
GitLab


From 3d1ff9dfdc168722f570144aba0ce29d28d7f483 Mon Sep 17 00:00:00 2001
From: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Date: Wed, 22 Nov 2023 10:05:56 -0600
Subject: [PATCH 0616/2340] dma-buf: Correct the documentation of name and
 exp_name symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the documentation of struct dma_buf members name and exp_name
as to how these members are to be used and accessed.

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122160556.24948-1-Ramesh.Errabolu@amd.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-buf.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 3f31baa3293f9..8ff4add71f883 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -343,16 +343,19 @@ struct dma_buf {
 	/**
 	 * @exp_name:
 	 *
-	 * Name of the exporter; useful for debugging. See the
-	 * DMA_BUF_SET_NAME IOCTL.
+	 * Name of the exporter; useful for debugging. Must not be NULL
 	 */
 	const char *exp_name;
 
 	/**
 	 * @name:
 	 *
-	 * Userspace-provided name; useful for accounting and debugging,
-	 * protected by dma_resv_lock() on @resv and @name_lock for read access.
+	 * Userspace-provided name. Default value is NULL. If not NULL,
+	 * length cannot be longer than DMA_BUF_NAME_LEN, including NIL
+	 * char. Useful for accounting and debugging. Read/Write accesses
+	 * are protected by @name_lock
+	 *
+	 * See the IOCTLs DMA_BUF_SET_NAME or DMA_BUF_SET_NAME_A/B
 	 */
 	const char *name;
 
-- 
GitLab


From b7d2a4da38fb558832b70c6f45929649a9d114a3 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Mon, 13 Nov 2023 17:00:15 -0800
Subject: [PATCH 0617/2340] drm/i915/guc: Fix for potential false positives in
 GuC hang selftest

Noticed that the hangcheck selftest is submitting a non-preemptoble
spinner. That means that even if the GuC does not die, the heartbeat
will still kick in and trigger a reset. Which is rather defeating the
purpose of the test - to verify that the heartbeat will kick in if the
GuC itself has died. The test is deliberately killing the GuC, so it
should never hit the case of a non-dead GuC. But it is not impossible
that the kill might fail at some future point due to other driver
re-work.

So, make the spinner pre-emptible. That way the heartbeat can get
through if the GuC is alive and context switching. Thus a reset only
happens if the GuC dies. Thus, if the kill should stop working the
test will now fail rather than claim to pass.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114010016.234570-2-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
index 34b5d952e2bcb..26fdc392fce6c 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg)
 		goto err;
 	}
 
-	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 	intel_context_put(ce);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
-- 
GitLab


From 706785c19fe92186815bdb9ae0148c4ba7262669 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Mon, 13 Nov 2023 17:00:16 -0800
Subject: [PATCH 0618/2340] drm/i915/guc: Add a selftest for FAST_REQUEST
 errors

There is a mechanism for reporting errors from fire and forget H2G
messages. This is the only way to find out about almost any error in
the GuC backend submission path. So it would be useful to know that it
is working.

v2: Fix some dumb over-complications and a couple of typos - review
feedback from Daniele.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114010016.234570-3-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h    |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   9 ++
 drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 115 ++++++++++++++++++++++
 3 files changed, 128 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b6dfe62c8f2a..e22c12ce245ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -297,6 +297,10 @@ struct intel_guc {
 	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
 	 */
 	int number_guc_id_stolen;
+	/**
+	 * @fast_response_selftest: Backdoor to CT handler for fast response selftest
+	 */
+	u32 fast_response_selftest;
 #endif
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 89e314b3756bb..ed6ce73ef3b07 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		found = true;
 		break;
 	}
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+	if (!found && ct_to_guc(ct)->fast_response_selftest) {
+		CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
+		ct_to_guc(ct)->fast_response_selftest++;
+		found = true;
+	}
+#endif
+
 	if (!found) {
 		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
 			 len, hxg[0], fence, ct->requests.last_fence);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bfb72143566f6..c900aac85adba 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -286,11 +286,126 @@ err_wakeref:
 	return ret;
 }
 
+/*
+ * Send a context schedule H2G message with an invalid context id.
+ * This should generate a GUC_RESULT_INVALID_CONTEXT response.
+ */
+static int bad_h2g(struct intel_guc *guc)
+{
+	u32 action[] = {
+	   INTEL_GUC_ACTION_SCHED_CONTEXT,
+	   0x12345678,
+	};
+
+	return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
+}
+
+/*
+ * Set a spinner running to make sure the system is alive and active,
+ * then send a bad but asynchronous H2G command and wait to see if an
+ * error response is returned. If no response is received or if the
+ * spinner dies then the test will fail.
+ */
+#define FAST_RESPONSE_TIMEOUT_MS	1000
+static int intel_guc_fast_request(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
+	bool spinning = false;
+	int ret = 0;
+
+	if (!engine)
+		return 0;
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
+		goto err_pm;
+	}
+
+	ret = igt_spinner_init(&spin, engine->gt);
+	if (ret) {
+		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
+		goto err_pm;
+	}
+	spinning = true;
+
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+	intel_context_put(ce);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
+		goto err_spin;
+	}
+
+	ret = request_add_spin(rq, &spin);
+	if (ret) {
+		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	gt->uc.guc.fast_response_selftest = 1;
+
+	ret = bad_h2g(&gt->uc.guc);
+	if (ret) {
+		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
+		       FAST_RESPONSE_TIMEOUT_MS);
+	if (ret) {
+		gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	if (i915_request_completed(rq)) {
+		gt_err(gt, "Spinner died waiting for fast request error!\n");
+		ret = -EIO;
+		goto err_rq;
+	}
+
+	if (gt->uc.guc.fast_response_selftest != 2) {
+		gt_err(gt, "Unexpected fast response count: %d\n",
+		       gt->uc.guc.fast_response_selftest);
+		goto err_rq;
+	}
+
+	igt_spinner_end(&spin);
+	spinning = false;
+
+	ret = intel_selftest_wait_for_rq(rq);
+	if (ret) {
+		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+err_rq:
+	i915_request_put(rq);
+
+err_spin:
+	if (spinning)
+		igt_spinner_end(&spin);
+	igt_spinner_fini(&spin);
+
+err_pm:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	return ret;
+}
+
 int intel_guc_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(intel_guc_scrub_ctbs),
 		SUBTEST(intel_guc_steal_guc_ids),
+		SUBTEST(intel_guc_fast_request),
 	};
 	struct intel_gt *gt = to_gt(i915);
 
-- 
GitLab


From 04fcc3fec5dbd316b0b1fb2b9f8a39bfbe07af50 Mon Sep 17 00:00:00 2001
From: ZhenGuo Yin <zhenguo.yin@amd.com>
Date: Thu, 23 Nov 2023 16:47:33 +0800
Subject: [PATCH 0619/2340] drm/amdgpu: Skip access gfx11 golden registers
 under SRIOV

[Why]
Golden registers are PF-only registers on gfx11.
RLCG interface will return "out-of-range" under SRIOV VF.

[How]
Skip access gfx11 golden registers under SRIOV.

Reviewed-by: Horace Chen <horace.chen@amd.com>
Signed-off-by: ZhenGuo Yin <zhenguo.yin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 8ed4a6fb147a2..288e926e5cd43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -293,6 +293,9 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 
 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 {
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 4):
-- 
GitLab


From 00f9d49bce844e8196e0c2ea298f9a41a11129d9 Mon Sep 17 00:00:00 2001
From: Yang Wang <kevinyang.wang@amd.com>
Date: Thu, 30 Nov 2023 12:58:14 +0800
Subject: [PATCH 0620/2340] drm/amdgpu: Fix missing mca debugfs node

Use amdgpu_ip_version() helper function to check ip version.

The ip version contains other information,
use the helper function to avoid reading wrong value.

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 54f2f346579ed..210aea590a52e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -485,7 +485,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se
 void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
 {
 #if defined(CONFIG_DEBUG_FS)
-	if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
+	if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6))
 		return;
 
 	debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
-- 
GitLab


From 9596ffe1cc99dd699e595ea971a2c8ccd2735e21 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Wed, 29 Nov 2023 10:59:53 +0800
Subject: [PATCH 0621/2340] drm/amdgpu: distinguish rlc fw for different SKU

For some SKU, rlc firmware should use different one
compared with the normal rlc firmware.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 288e926e5cd43..c659ef0f47ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -67,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
@@ -567,7 +568,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 	}
 
 	if (!amdgpu_sriov_vf(adev)) {
-		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+		    adev->pdev->revision == 0xCE)
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin");
+		else
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
 		if (err)
 			goto out;
-- 
GitLab


From f875f61b1fd626a4223a5bdf0339b5372c689e13 Mon Sep 17 00:00:00 2001
From: Yang Wang <kevinyang.wang@amd.com>
Date: Thu, 30 Nov 2023 18:39:03 +0800
Subject: [PATCH 0622/2340] drm/amdgpu: enable mca debug mode on APU by default

enable MCA debug mode on APU device by default.

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f6b47ebce9d62..72634d675e277 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3132,7 +3132,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
-	amdgpu_ras_set_mca_debug_mode(adev, false);
+	/* enable MCA debug on APU device */
+	amdgpu_ras_set_mca_debug_mode(adev, !!(adev->flags & AMD_IS_APU));
 
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		if (!node->ras_obj) {
-- 
GitLab


From 39c960bbf9d9ea862398759e75736cfb68c3446f Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Wed, 29 Nov 2023 07:22:30 -0800
Subject: [PATCH 0623/2340] drm/radeon/r600_cs: Fix possible int overflows in
 r600_cs_check_reg()

While improbable, there may be a chance of hitting integer
overflow when the result of radeon_get_ib_value() gets shifted
left.

Avoid it by casting one of the operands to larger data type (u64).

Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.

Fixes: 1729dd33d20b ("drm/radeon/kms: r600 CS parser fixes")
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/r600_cs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 638f861af80fa..6cf54a747749d 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1275,7 +1275,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		tmp = (reg - CB_COLOR0_BASE) / 4;
-		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
+		track->cb_color_bo_offset[tmp] = (u64)radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_base_last[tmp] = ib[idx];
 		track->cb_color_bo[tmp] = reloc->robj;
@@ -1302,7 +1302,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		track->htile_offset = radeon_get_ib_value(p, idx) << 8;
+		track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->htile_bo = reloc->robj;
 		track->db_dirty = true;
-- 
GitLab


From b5c5baa458faa5430c445acd9a17481274d77ccf Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Wed, 29 Nov 2023 07:22:12 -0800
Subject: [PATCH 0624/2340] drm/radeon/r100: Fix integer overflow issues in
 r100_cs_track_check()

It may be possible, albeit unlikely, to encounter integer overflow
during the multiplication of several unsigned int variables, the
result being assigned to a variable 'size' of wider type.

Prevent this potential behaviour by converting one of the multiples
to unsigned long.

Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.

Fixes: 0242f74d29df ("drm/radeon: clean up CS functions in r100.c")
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/r100.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index affa9e0309b27..cfeca2694d5f9 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2321,7 +2321,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 	switch (prim_walk) {
 	case 1:
 		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * track->max_indx * 4;
+			size = track->arrays[i].esize * track->max_indx * 4UL;
 			if (track->arrays[i].robj == NULL) {
 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
 					  "bound\n", prim_walk, i);
@@ -2340,7 +2340,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 		break;
 	case 2:
 		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * (nverts - 1) * 4;
+			size = track->arrays[i].esize * (nverts - 1) * 4UL;
 			if (track->arrays[i].robj == NULL) {
 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
 					  "bound\n", prim_walk, i);
-- 
GitLab


From 71225e1c930942cb1e042fc08c5cc0c4ef30e95e Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Tue, 8 Aug 2023 11:04:16 -0700
Subject: [PATCH 0625/2340] drm/radeon: check return value of
 radeon_ring_lock()

In the unlikely event of radeon_ring_lock() failing, its errno return
value should be processed. This patch checks said return value and
prints a debug message in case of an error.

Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.

Fixes: 48c0c902e2e6 ("drm/radeon/kms: add support for CP setup on SI")
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/si.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index a91012447b56e..85e9cba49cecb 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3611,6 +3611,10 @@ static int si_cp_start(struct radeon_device *rdev)
 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
 		ring = &rdev->ring[i];
 		r = radeon_ring_lock(rdev, ring, 2);
+		if (r) {
+			DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+			return r;
+		}
 
 		/* clear the compute context state */
 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
-- 
GitLab


From b719a9c15d52d4f56bdea8241a5d90fd9197ce99 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 28 Nov 2023 18:35:09 -0600
Subject: [PATCH 0626/2340] drm/amd/display: Fix NULL pointer dereference at
 hibernate

During hibernate sequence the source context might not have a clk_mgr.
So don't use it to look for DML2 support.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2980
Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2")
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index f3a9fdd2340de..e1c02527d04ae 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -4554,7 +4554,7 @@ void dc_resource_state_copy_construct(
 	struct dml2_context *dml2 = NULL;
 
 	// Need to preserve allocated dml2 context
-	if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
+	if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
 		dml2 = dst_ctx->bw_ctx.dml2;
 #endif
 
@@ -4562,7 +4562,7 @@ void dc_resource_state_copy_construct(
 
 #ifdef CONFIG_DRM_AMD_DC_FP
 	// Preserve allocated dml2 context
-	if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
+	if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
 		dst_ctx->bw_ctx.dml2 = dml2;
 #endif
 
-- 
GitLab


From 00cb022753e29a1c5993fa7d291378750377bd70 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 29 Nov 2023 19:33:16 +0200
Subject: [PATCH 0627/2340] drm/i915: use PIPE_CONF_CHECK_BOOL() for bool
 members

Don't treat bools as integers.

v2: Rebase

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129173317.1192269-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 9dc22fc8b3d3b..d62cdae7ab6b0 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5091,8 +5091,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 #define PIPE_CONF_QUIRK(quirk) \
 	((current_config->quirks | pipe_config->quirks) & (quirk))
 
-	PIPE_CONF_CHECK_I(hw.enable);
-	PIPE_CONF_CHECK_I(hw.active);
+	PIPE_CONF_CHECK_BOOL(hw.enable);
+	PIPE_CONF_CHECK_BOOL(hw.active);
 
 	PIPE_CONF_CHECK_I(cpu_transcoder);
 	PIPE_CONF_CHECK_I(mst_master_transcoder);
@@ -5301,8 +5301,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_I(dsc.config.second_line_bpg_offset);
 	PIPE_CONF_CHECK_I(dsc.config.nsl_bpg_offset);
 
-	PIPE_CONF_CHECK_I(dsc.compression_enable);
-	PIPE_CONF_CHECK_I(dsc.dsc_split);
+	PIPE_CONF_CHECK_BOOL(dsc.compression_enable);
+	PIPE_CONF_CHECK_BOOL(dsc.dsc_split);
 	PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16);
 
 	PIPE_CONF_CHECK_BOOL(splitter.enable);
-- 
GitLab


From 9f82f1655fdbaf598a0106f7268ff99a606be434 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 29 Nov 2023 19:33:17 +0200
Subject: [PATCH 0628/2340] drm/i915: add bool type checks in PIPE_CONF_CHECK_*

Avoid bool/int mismatches in state checker macros.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129173317.1192269-2-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index d62cdae7ab6b0..7d48bcdd57974 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -4923,6 +4923,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name, \
@@ -4933,6 +4935,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X_WITH_MASK(name, mask) do { \
 	if ((current_config->name & (mask)) != (pipe_config->name & (mask))) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name & (mask), \
@@ -4943,6 +4947,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_I(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected %i, found %i)", \
 				     current_config->name, \
@@ -4953,6 +4959,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_BOOL(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \
+				 __stringify(name) " is not bool");	\
 		pipe_config_mismatch(fastset, crtc,  __stringify(name), \
 				     "(expected %s, found %s)", \
 				     str_yes_no(current_config->name), \
-- 
GitLab


From f8cc37c59731c88c8c2c62222482dddf071a0600 Mon Sep 17 00:00:00 2001
From: Andrew Davis <afd@ti.com>
Date: Mon, 13 Nov 2023 14:55:01 -0600
Subject: [PATCH 0629/2340] drm/omapdrm: Improve check for contiguous buffers

While a scatter-gather table having only 1 entry does imply it is
contiguous, it is a logic error to assume the inverse. Tables can have
more than 1 entry and still be contiguous. Use a proper check here.

Signed-off-by: Andrew Davis <afd@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231113205501.616927-1-afd@ti.com
---
 drivers/gpu/drm/omapdrm/omap_gem.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index c48fa531ca321..3421e8389222a 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -48,7 +48,7 @@ struct omap_gem_object {
 	 *   OMAP_BO_MEM_DMA_API flag set)
 	 *
 	 * - buffers imported from dmabuf (with the OMAP_BO_MEM_DMABUF flag set)
-	 *   if they are physically contiguous (when sgt->orig_nents == 1)
+	 *   if they are physically contiguous
 	 *
 	 * - buffers mapped through the TILER when pin_cnt is not zero, in which
 	 *   case the DMA address points to the TILER aperture
@@ -148,12 +148,18 @@ u64 omap_gem_mmap_offset(struct drm_gem_object *obj)
 	return drm_vma_node_offset_addr(&obj->vma_node);
 }
 
+static bool omap_gem_sgt_is_contiguous(struct sg_table *sgt, size_t size)
+{
+	return !(drm_prime_get_contiguous_size(sgt) < size);
+}
+
 static bool omap_gem_is_contiguous(struct omap_gem_object *omap_obj)
 {
 	if (omap_obj->flags & OMAP_BO_MEM_DMA_API)
 		return true;
 
-	if ((omap_obj->flags & OMAP_BO_MEM_DMABUF) && omap_obj->sgt->nents == 1)
+	if ((omap_obj->flags & OMAP_BO_MEM_DMABUF) &&
+	    omap_gem_sgt_is_contiguous(omap_obj->sgt, omap_obj->base.size))
 		return true;
 
 	return false;
@@ -1385,7 +1391,7 @@ struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size,
 	union omap_gem_size gsize;
 
 	/* Without a DMM only physically contiguous buffers can be supported. */
-	if (sgt->orig_nents != 1 && !priv->has_dmm)
+	if (!omap_gem_sgt_is_contiguous(sgt, size) && !priv->has_dmm)
 		return ERR_PTR(-EINVAL);
 
 	gsize.bytes = PAGE_ALIGN(size);
@@ -1399,7 +1405,7 @@ struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size,
 
 	omap_obj->sgt = sgt;
 
-	if (sgt->orig_nents == 1) {
+	if (omap_gem_sgt_is_contiguous(sgt, size)) {
 		omap_obj->dma_addr = sg_dma_address(sgt->sgl);
 	} else {
 		/* Create pages list from sgt */
-- 
GitLab


From 7959ceb767e4b6c8ced7cb8aaa1c6439cfca890c Mon Sep 17 00:00:00 2001
From: Aradhya Bhatia <a-bhatia1@ti.com>
Date: Wed, 8 Nov 2023 22:46:18 +0530
Subject: [PATCH 0630/2340] dt-bindings: display: ti: Add support for am62a7
 dss

The DSS controller on TI's AM62A7 SoC is an update from that on TI's
AM625 SoC. Like the DSS in AM625, the DSS in this SoC has 2 video
pipelines, but unlike the former, the latter only has one output port on
VP2 to service DPI display sinks.

Add the new controller's compatible.

Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://lore.kernel.org/r/20231108171619.978438-2-a-bhatia1@ti.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 .../bindings/display/ti/ti,am65x-dss.yaml          | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
index ae09cd3cbce1f..b6767ef0d24de 100644
--- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
+++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml
@@ -23,6 +23,7 @@ properties:
   compatible:
     enum:
       - ti,am625-dss
+      - ti,am62a7,dss
       - ti,am65x-dss
 
   reg:
@@ -87,6 +88,7 @@ properties:
           For AM65x DSS, the OLDI output port node from video port 1.
           For AM625 DSS, the internal DPI output port node from video
           port 1.
+          For AM62A7 DSS, the port is tied off inside the SoC.
 
       port@1:
         $ref: /schemas/graph.yaml#/properties/port
@@ -108,6 +110,18 @@ properties:
       Input memory (from main memory to dispc) bandwidth limit in
       bytes per second
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,am62a7-dss
+    then:
+      properties:
+        ports:
+          properties:
+            port@0: false
+
 required:
   - compatible
   - reg
-- 
GitLab


From 5cc5ea7b6d7b0649b74df1e0f2d1875e683704d9 Mon Sep 17 00:00:00 2001
From: Aradhya Bhatia <a-bhatia1@ti.com>
Date: Wed, 8 Nov 2023 22:46:19 +0530
Subject: [PATCH 0631/2340] drm/tidss: Add support for AM62A7 DSS

Add support for the DSS controller on TI's AM62A7 SoC in the tidss
driver.

This controller has 2 video pipelines that can render 2 video planes on
over a screen, using the overlay managers. The output of the DSS comes
from video port 2 (VP2) in the form of RGB88 DPI signals, while the VP1
is tied off inside the SoC.

Also add and use a new type of VP, DISPC_VP_TIED_OFF, for the tied-off
VP1 of AM62A DSS.

Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://lore.kernel.org/r/20231108171619.978438-3-a-bhatia1@ti.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 59 +++++++++++++++++++++++++++++
 drivers/gpu/drm/tidss/tidss_dispc.h |  3 ++
 drivers/gpu/drm/tidss/tidss_drv.c   |  1 +
 3 files changed, 63 insertions(+)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 9d9dee7abaefd..7af416457c57d 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -322,6 +322,60 @@ const struct dispc_features dispc_am625_feats = {
 	.vid_order = { 1, 0 },
 };
 
+const struct dispc_features dispc_am62a7_feats = {
+	/*
+	 * if the code reaches dispc_mode_valid with VP1,
+	 * it should return MODE_BAD.
+	 */
+	.max_pclk_khz = {
+		[DISPC_VP_TIED_OFF] = 0,
+		[DISPC_VP_DPI] = 165000,
+	},
+
+	.scaling = {
+		.in_width_max_5tap_rgb = 1280,
+		.in_width_max_3tap_rgb = 2560,
+		.in_width_max_5tap_yuv = 2560,
+		.in_width_max_3tap_yuv = 4096,
+		.upscale_limit = 16,
+		.downscale_limit_5tap = 4,
+		.downscale_limit_3tap = 2,
+		/*
+		 * The max supported pixel inc value is 255. The value
+		 * of pixel inc is calculated like this: 1+(xinc-1)*bpp.
+		 * The maximum bpp of all formats supported by the HW
+		 * is 8. So the maximum supported xinc value is 32,
+		 * because 1+(32-1)*8 < 255 < 1+(33-1)*4.
+		 */
+		.xinc_max = 32,
+	},
+
+	.subrev = DISPC_AM62A7,
+
+	.common = "common",
+	.common_regs = tidss_am65x_common_regs,
+
+	.num_vps = 2,
+	.vp_name = { "vp1", "vp2" },
+	.ovr_name = { "ovr1", "ovr2" },
+	.vpclk_name =  { "vp1", "vp2" },
+	/* VP1 of the DSS in AM62A7 SoC is tied off internally */
+	.vp_bus_type = { DISPC_VP_TIED_OFF, DISPC_VP_DPI },
+
+	.vp_feat = { .color = {
+			.has_ctm = true,
+			.gamma_size = 256,
+			.gamma_type = TIDSS_GAMMA_8BIT,
+		},
+	},
+
+	.num_planes = 2,
+	/* note: vid is plane_id 0 and vidl1 is plane_id 1 */
+	.vid_name = { "vid", "vidl1" },
+	.vid_lite = { false, true, },
+	.vid_order = { 1, 0 },
+};
+
 static const u16 *dispc_common_regmap;
 
 struct dss_vp_data {
@@ -824,6 +878,7 @@ dispc_irq_t dispc_read_and_clear_irqstatus(struct dispc_device *dispc)
 	case DISPC_K2G:
 		return dispc_k2g_read_and_clear_irqstatus(dispc);
 	case DISPC_AM625:
+	case DISPC_AM62A7:
 	case DISPC_AM65X:
 	case DISPC_J721E:
 		return dispc_k3_read_and_clear_irqstatus(dispc);
@@ -840,6 +895,7 @@ void dispc_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask)
 		dispc_k2g_set_irqenable(dispc, mask);
 		break;
 	case DISPC_AM625:
+	case DISPC_AM62A7:
 	case DISPC_AM65X:
 	case DISPC_J721E:
 		dispc_k3_set_irqenable(dispc, mask);
@@ -1331,6 +1387,7 @@ void dispc_ovr_set_plane(struct dispc_device *dispc, u32 hw_plane,
 					x, y, layer);
 		break;
 	case DISPC_AM625:
+	case DISPC_AM62A7:
 	case DISPC_AM65X:
 		dispc_am65x_ovr_set_plane(dispc, hw_plane, hw_videoport,
 					  x, y, layer);
@@ -2250,6 +2307,7 @@ static void dispc_plane_init(struct dispc_device *dispc)
 		dispc_k2g_plane_init(dispc);
 		break;
 	case DISPC_AM625:
+	case DISPC_AM62A7:
 	case DISPC_AM65X:
 	case DISPC_J721E:
 		dispc_k3_plane_init(dispc);
@@ -2357,6 +2415,7 @@ static void dispc_vp_write_gamma_table(struct dispc_device *dispc,
 		dispc_k2g_vp_write_gamma_table(dispc, hw_videoport);
 		break;
 	case DISPC_AM625:
+	case DISPC_AM62A7:
 	case DISPC_AM65X:
 		dispc_am65x_vp_write_gamma_table(dispc, hw_videoport);
 		break;
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.h b/drivers/gpu/drm/tidss/tidss_dispc.h
index 33ac5ad7a423d..086327d51a903 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.h
+++ b/drivers/gpu/drm/tidss/tidss_dispc.h
@@ -54,12 +54,14 @@ enum dispc_vp_bus_type {
 	DISPC_VP_DPI,		/* DPI output */
 	DISPC_VP_OLDI,		/* OLDI (LVDS) output */
 	DISPC_VP_INTERNAL,	/* SoC internal routing */
+	DISPC_VP_TIED_OFF,	/* Tied off / Unavailable */
 	DISPC_VP_MAX_BUS_TYPE,
 };
 
 enum dispc_dss_subrevision {
 	DISPC_K2G,
 	DISPC_AM625,
+	DISPC_AM62A7,
 	DISPC_AM65X,
 	DISPC_J721E,
 };
@@ -88,6 +90,7 @@ struct dispc_features {
 
 extern const struct dispc_features dispc_k2g_feats;
 extern const struct dispc_features dispc_am625_feats;
+extern const struct dispc_features dispc_am62a7_feats;
 extern const struct dispc_features dispc_am65x_feats;
 extern const struct dispc_features dispc_j721e_feats;
 
diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c
index 4d063eb9cd0b7..edf69d0205442 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.c
+++ b/drivers/gpu/drm/tidss/tidss_drv.c
@@ -231,6 +231,7 @@ static void tidss_shutdown(struct platform_device *pdev)
 static const struct of_device_id tidss_of_table[] = {
 	{ .compatible = "ti,k2g-dss", .data = &dispc_k2g_feats, },
 	{ .compatible = "ti,am625-dss", .data = &dispc_am625_feats, },
+	{ .compatible = "ti,am62a7-dss", .data = &dispc_am62a7_feats, },
 	{ .compatible = "ti,am65x-dss", .data = &dispc_am65x_feats, },
 	{ .compatible = "ti,j721e-dss", .data = &dispc_j721e_feats, },
 	{ }
-- 
GitLab


From a0a9e7b4690b1d78f146fbc8c78a630856ed3015 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:37:54 +0200
Subject: [PATCH 0632/2340] drm/tidss: Use pm_runtime_resume_and_get()

Use pm_runtime_resume_and_get() instead of pm_runtime_get_sync(), which
will handle error situations better. Also fix the return, as there
should be no reason for the current complex return.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-1-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c
index edf69d0205442..bb12978208568 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.c
+++ b/drivers/gpu/drm/tidss/tidss_drv.c
@@ -32,9 +32,9 @@ int tidss_runtime_get(struct tidss_device *tidss)
 
 	dev_dbg(tidss->dev, "%s\n", __func__);
 
-	r = pm_runtime_get_sync(tidss->dev);
+	r = pm_runtime_resume_and_get(tidss->dev);
 	WARN_ON(r < 0);
-	return r < 0 ? r : 0;
+	return r;
 }
 
 void tidss_runtime_put(struct tidss_device *tidss)
-- 
GitLab


From 4b0bdf9383a999498b88df4c1a1e3f0910b86d53 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:37:55 +0200
Subject: [PATCH 0633/2340] drm/tidss: Use PM autosuspend

Use runtime PM autosuspend feature, with 1s timeout, to avoid
unnecessary suspend-resume cycles when, e.g. the userspace temporarily
turns off the crtcs when configuring the outputs.

Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-2-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_drv.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c
index bb12978208568..09e30de2dd484 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.c
+++ b/drivers/gpu/drm/tidss/tidss_drv.c
@@ -43,7 +43,9 @@ void tidss_runtime_put(struct tidss_device *tidss)
 
 	dev_dbg(tidss->dev, "%s\n", __func__);
 
-	r = pm_runtime_put_sync(tidss->dev);
+	pm_runtime_mark_last_busy(tidss->dev);
+
+	r = pm_runtime_put_autosuspend(tidss->dev);
 	WARN_ON(r < 0);
 }
 
@@ -144,6 +146,9 @@ static int tidss_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 
+	pm_runtime_set_autosuspend_delay(dev, 1000);
+	pm_runtime_use_autosuspend(dev);
+
 #ifndef CONFIG_PM
 	/* If we don't have PM, we need to call resume manually */
 	dispc_runtime_resume(tidss->dispc);
@@ -192,6 +197,7 @@ err_runtime_suspend:
 #ifndef CONFIG_PM
 	dispc_runtime_suspend(tidss->dispc);
 #endif
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
 	return ret;
@@ -215,6 +221,7 @@ static void tidss_remove(struct platform_device *pdev)
 	/* If we don't have PM, we need to call suspend manually */
 	dispc_runtime_suspend(tidss->dispc);
 #endif
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
 	/* devm allocated dispc goes away with the dev so mark it NULL */
-- 
GitLab


From c2746e4d278be8f71ef0dbcd00fcc8ba46c678ef Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:37:56 +0200
Subject: [PATCH 0634/2340] drm/tidss: Drop useless variable init

No need to initialize the ret to 0 in dispc_softreset().

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-3-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 7af416457c57d..1dfd95a54e64e 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2764,7 +2764,7 @@ static void dispc_init_errata(struct dispc_device *dispc)
 static void dispc_softreset(struct dispc_device *dispc)
 {
 	u32 val;
-	int ret = 0;
+	int ret;
 
 	/* Soft reset */
 	REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, 1, 1);
-- 
GitLab


From 36d1e0852680aa038e2428d450673390111b165c Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:37:57 +0200
Subject: [PATCH 0635/2340] drm/tidss: Move reset to the end of dispc_init()

We do a DSS reset in the middle of the dispc_init(). While that happens
to work now, we should really make sure that e..g the fclk, which is
acquired only later in the function, is enabled when doing a reset. This
will be handled in a later patch, but for now, let's move the
dispc_softreset() call to the end of dispc_init(), which is a sensible
place for it anyway.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-4-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 1dfd95a54e64e..bce5432d26f3f 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2836,10 +2836,6 @@ int dispc_init(struct tidss_device *tidss)
 			return r;
 	}
 
-	/* K2G display controller does not support soft reset */
-	if (feat->subrev != DISPC_K2G)
-		dispc_softreset(dispc);
-
 	for (i = 0; i < dispc->feat->num_vps; i++) {
 		u32 gamma_size = dispc->feat->vp_feat.color.gamma_size;
 		u32 *gamma_table;
@@ -2888,6 +2884,10 @@ int dispc_init(struct tidss_device *tidss)
 	of_property_read_u32(dispc->dev->of_node, "max-memory-bandwidth",
 			     &dispc->memory_bandwidth_limit);
 
+	/* K2G display controller does not support soft reset */
+	if (feat->subrev != DISPC_K2G)
+		dispc_softreset(dispc);
+
 	tidss->dispc = dispc;
 
 	return 0;
-- 
GitLab


From aceafbb5035c4bfc75a321863ed1e393d644d2d2 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:37:58 +0200
Subject: [PATCH 0636/2340] drm/tidss: Return error value from from softreset

Return an error value from dispc_softreset() so that the caller can
handle the errors.

Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-5-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index bce5432d26f3f..b72f1e7d4b722 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2761,7 +2761,7 @@ static void dispc_init_errata(struct dispc_device *dispc)
 	}
 }
 
-static void dispc_softreset(struct dispc_device *dispc)
+static int dispc_softreset(struct dispc_device *dispc)
 {
 	u32 val;
 	int ret;
@@ -2771,8 +2771,12 @@ static void dispc_softreset(struct dispc_device *dispc)
 	/* Wait for reset to complete */
 	ret = readl_poll_timeout(dispc->base_common + DSS_SYSSTATUS,
 				 val, val & 1, 100, 5000);
-	if (ret)
-		dev_warn(dispc->dev, "failed to reset dispc\n");
+	if (ret) {
+		dev_err(dispc->dev, "failed to reset dispc\n");
+		return ret;
+	}
+
+	return 0;
 }
 
 int dispc_init(struct tidss_device *tidss)
@@ -2885,8 +2889,11 @@ int dispc_init(struct tidss_device *tidss)
 			     &dispc->memory_bandwidth_limit);
 
 	/* K2G display controller does not support soft reset */
-	if (feat->subrev != DISPC_K2G)
-		dispc_softreset(dispc);
+	if (feat->subrev != DISPC_K2G) {
+		r = dispc_softreset(dispc);
+		if (r)
+			return r;
+	}
 
 	tidss->dispc = dispc;
 
-- 
GitLab


From 151825150cf9c2e9fb90763d35b9dff3783628ac Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:37:59 +0200
Subject: [PATCH 0637/2340] drm/tidss: Check for K2G in in dispc_softreset()

K2G doesn't have softreset feature. Instead of having every caller of
dispc_softreset() check for K2G, move the check into dispc_softreset(),
and make dispc_softreset() return 0 in case of K2G.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-6-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index b72f1e7d4b722..f9de26cd241ad 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2766,6 +2766,10 @@ static int dispc_softreset(struct dispc_device *dispc)
 	u32 val;
 	int ret;
 
+	/* K2G display controller does not support soft reset */
+	if (dispc->feat->subrev == DISPC_K2G)
+		return 0;
+
 	/* Soft reset */
 	REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, 1, 1);
 	/* Wait for reset to complete */
@@ -2888,12 +2892,9 @@ int dispc_init(struct tidss_device *tidss)
 	of_property_read_u32(dispc->dev->of_node, "max-memory-bandwidth",
 			     &dispc->memory_bandwidth_limit);
 
-	/* K2G display controller does not support soft reset */
-	if (feat->subrev != DISPC_K2G) {
-		r = dispc_softreset(dispc);
-		if (r)
-			return r;
-	}
+	r = dispc_softreset(dispc);
+	if (r)
+		return r;
 
 	tidss->dispc = dispc;
 
-- 
GitLab


From 576d96c5c896221b5bc8feae473739469a92e144 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:38:00 +0200
Subject: [PATCH 0638/2340] drm/tidss: Add simple K2G manual reset

K2G display controller does not support soft reset, but we can do the
most important steps manually: mask the IRQs and disable the VPs.

Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-7-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index f9de26cd241ad..c3741feaafffa 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2761,14 +2761,28 @@ static void dispc_init_errata(struct dispc_device *dispc)
 	}
 }
 
+/*
+ * K2G display controller does not support soft reset, so we do a basic manual
+ * reset here: make sure the IRQs are masked and VPs are disabled.
+ */
+static void dispc_softreset_k2g(struct dispc_device *dispc)
+{
+	dispc_set_irqenable(dispc, 0);
+	dispc_read_and_clear_irqstatus(dispc);
+
+	for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
+		VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
+}
+
 static int dispc_softreset(struct dispc_device *dispc)
 {
 	u32 val;
 	int ret;
 
-	/* K2G display controller does not support soft reset */
-	if (dispc->feat->subrev == DISPC_K2G)
+	if (dispc->feat->subrev == DISPC_K2G) {
+		dispc_softreset_k2g(dispc);
 		return 0;
+	}
 
 	/* Soft reset */
 	REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, 1, 1);
-- 
GitLab


From bc288a927815efcf9d7f4a54d4d89c5df478c635 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:38:01 +0200
Subject: [PATCH 0639/2340] drm/tidss: Fix dss reset

The probe function calls dispc_softreset() before runtime PM is enabled
and without enabling any of the DSS clocks. This happens to work by
luck, and we need to make sure the DSS HW is active and the fclk is
enabled.

To fix the above, add a new function, dispc_init_hw(), which does:

- pm_runtime_set_active()
- clk_prepare_enable(fclk)
- dispc_softreset().

This ensures that the reset can be successfully accomplished.

Note that we use pm_runtime_set_active(), not the normal
pm_runtime_get(). The reason for this is that at this point we haven't
enabled the runtime PM yet and also we don't want the normal resume
callback to be called: the dispc resume callback does some initial HW
setup, and it expects that the HW was off (no video ports are
streaming). If the bootloader has enabled the DSS and has set up a
boot time splash-screen, the DSS would be enabled and streaming which
might lead to issues with the normal resume callback.

Fixes: c9b2d923befd ("drm/tidss: Soft Reset DISPC on startup")
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-8-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_dispc.c | 45 ++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index c3741feaafffa..1ad711f8d2a8b 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2797,6 +2797,49 @@ static int dispc_softreset(struct dispc_device *dispc)
 	return 0;
 }
 
+static int dispc_init_hw(struct dispc_device *dispc)
+{
+	struct device *dev = dispc->dev;
+	int ret;
+
+	ret = pm_runtime_set_active(dev);
+	if (ret) {
+		dev_err(dev, "Failed to set DSS PM to active\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(dispc->fclk);
+	if (ret) {
+		dev_err(dev, "Failed to enable DSS fclk\n");
+		goto err_runtime_suspend;
+	}
+
+	ret = dispc_softreset(dispc);
+	if (ret)
+		goto err_clk_disable;
+
+	clk_disable_unprepare(dispc->fclk);
+	ret = pm_runtime_set_suspended(dev);
+	if (ret) {
+		dev_err(dev, "Failed to set DSS PM to suspended\n");
+		return ret;
+	}
+
+	return 0;
+
+err_clk_disable:
+	clk_disable_unprepare(dispc->fclk);
+
+err_runtime_suspend:
+	ret = pm_runtime_set_suspended(dev);
+	if (ret) {
+		dev_err(dev, "Failed to set DSS PM to suspended\n");
+		return ret;
+	}
+
+	return ret;
+}
+
 int dispc_init(struct tidss_device *tidss)
 {
 	struct device *dev = tidss->dev;
@@ -2906,7 +2949,7 @@ int dispc_init(struct tidss_device *tidss)
 	of_property_read_u32(dispc->dev->of_node, "max-memory-bandwidth",
 			     &dispc->memory_bandwidth_limit);
 
-	r = dispc_softreset(dispc);
+	r = dispc_init_hw(dispc);
 	if (r)
 		return r;
 
-- 
GitLab


From d4652187367bc55983139401d0ee41d6e565b13d Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:38:02 +0200
Subject: [PATCH 0640/2340] drm/tidss: IRQ code cleanup

The IRQ setup code is overly complex. All we really need to do is
initialize the related fields in struct tidss_device, and request the
IRQ.

We can drop all the HW accesses, as they are pointless: the driver will
set the IRQs correctly when it needs any of the IRQs, and at probe time
we have done a reset, so we know that all the IRQs are masked by default
in the hardware.

Thus we can combine the tidss_irq_preinstall() and
tidss_irq_postinstall() into the tidss_irq_install() function, drop the
HW accesses, and drop the use of spinlock, as this is done at init time
and there can be no races.

We can also drop the HW access from the tidss_irq_uninstall(), as the
driver will anyway disable and suspend the hardware at remove time.

Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-9-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_drv.c |  2 ++
 drivers/gpu/drm/tidss/tidss_irq.c | 54 +++++--------------------------
 2 files changed, 10 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c
index 09e30de2dd484..d15f836dca95c 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.c
+++ b/drivers/gpu/drm/tidss/tidss_drv.c
@@ -138,6 +138,8 @@ static int tidss_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, tidss);
 
+	spin_lock_init(&tidss->wait_lock);
+
 	ret = dispc_init(tidss);
 	if (ret) {
 		dev_err(dev, "failed to initialize dispc: %d\n", ret);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 0c681c7600bcb..604334ef526a0 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -93,33 +93,21 @@ void tidss_irq_resume(struct tidss_device *tidss)
 	spin_unlock_irqrestore(&tidss->wait_lock, flags);
 }
 
-static void tidss_irq_preinstall(struct drm_device *ddev)
-{
-	struct tidss_device *tidss = to_tidss(ddev);
-
-	spin_lock_init(&tidss->wait_lock);
-
-	tidss_runtime_get(tidss);
-
-	dispc_set_irqenable(tidss->dispc, 0);
-	dispc_read_and_clear_irqstatus(tidss->dispc);
-
-	tidss_runtime_put(tidss);
-}
-
-static void tidss_irq_postinstall(struct drm_device *ddev)
+int tidss_irq_install(struct drm_device *ddev, unsigned int irq)
 {
 	struct tidss_device *tidss = to_tidss(ddev);
-	unsigned long flags;
-	unsigned int i;
+	int ret;
 
-	tidss_runtime_get(tidss);
+	if (irq == IRQ_NOTCONNECTED)
+		return -ENOTCONN;
 
-	spin_lock_irqsave(&tidss->wait_lock, flags);
+	ret = request_irq(irq, tidss_irq_handler, 0, ddev->driver->name, ddev);
+	if (ret)
+		return ret;
 
 	tidss->irq_mask = DSS_IRQ_DEVICE_OCP_ERR;
 
-	for (i = 0; i < tidss->num_crtcs; ++i) {
+	for (unsigned int i = 0; i < tidss->num_crtcs; ++i) {
 		struct tidss_crtc *tcrtc = to_tidss_crtc(tidss->crtcs[i]);
 
 		tidss->irq_mask |= DSS_IRQ_VP_SYNC_LOST(tcrtc->hw_videoport);
@@ -127,28 +115,6 @@ static void tidss_irq_postinstall(struct drm_device *ddev)
 		tidss->irq_mask |= DSS_IRQ_VP_FRAME_DONE(tcrtc->hw_videoport);
 	}
 
-	tidss_irq_update(tidss);
-
-	spin_unlock_irqrestore(&tidss->wait_lock, flags);
-
-	tidss_runtime_put(tidss);
-}
-
-int tidss_irq_install(struct drm_device *ddev, unsigned int irq)
-{
-	int ret;
-
-	if (irq == IRQ_NOTCONNECTED)
-		return -ENOTCONN;
-
-	tidss_irq_preinstall(ddev);
-
-	ret = request_irq(irq, tidss_irq_handler, 0, ddev->driver->name, ddev);
-	if (ret)
-		return ret;
-
-	tidss_irq_postinstall(ddev);
-
 	return 0;
 }
 
@@ -156,9 +122,5 @@ void tidss_irq_uninstall(struct drm_device *ddev)
 {
 	struct tidss_device *tidss = to_tidss(ddev);
 
-	tidss_runtime_get(tidss);
-	dispc_set_irqenable(tidss->dispc, 0);
-	tidss_runtime_put(tidss);
-
 	free_irq(tidss->irq, ddev);
 }
-- 
GitLab


From 95d4b471953411854f9c80b568da7fcf753f3801 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:38:03 +0200
Subject: [PATCH 0641/2340] drm/tidss: Fix atomic_flush check

tidss_crtc_atomic_flush() checks if the crtc is enabled, and if not,
returns immediately as there's no reason to do any register changes.

However, the code checks for 'crtc->state->enable', which does not
reflect the actual HW state. We should instead look at the
'crtc->state->active' flag.

This causes the tidss_crtc_atomic_flush() to proceed with the flush even
if the active state is false, which then causes us to hit the
WARN_ON(!crtc->state->event) check.

Fix this by checking the active flag, and while at it, fix the related
debug print which had "active" and "needs modeset" wrong way.

Cc:  <stable@vger.kernel.org>
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-10-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_crtc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c
index 5e5e466f35d10..7c78c074e3a2e 100644
--- a/drivers/gpu/drm/tidss/tidss_crtc.c
+++ b/drivers/gpu/drm/tidss/tidss_crtc.c
@@ -169,13 +169,13 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct tidss_device *tidss = to_tidss(ddev);
 	unsigned long flags;
 
-	dev_dbg(ddev->dev,
-		"%s: %s enabled %d, needs modeset %d, event %p\n", __func__,
-		crtc->name, drm_atomic_crtc_needs_modeset(crtc->state),
-		crtc->state->enable, crtc->state->event);
+	dev_dbg(ddev->dev, "%s: %s is %sactive, %s modeset, event %p\n",
+		__func__, crtc->name, crtc->state->active ? "" : "not ",
+		drm_atomic_crtc_needs_modeset(crtc->state) ? "needs" : "doesn't need",
+		crtc->state->event);
 
 	/* There is nothing to do if CRTC is not going to be enabled. */
-	if (!crtc->state->enable)
+	if (!crtc->state->active)
 		return;
 
 	/*
-- 
GitLab


From ca89b69734f92021e25899dff3e433b1904f5832 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 9 Nov 2023 09:38:04 +0200
Subject: [PATCH 0642/2340] drm/tidss: Use DRM_PLANE_COMMIT_ACTIVE_ONLY

At the moment the driver does not use DRM_PLANE_COMMIT_ACTIVE_ONLY, but
still checks for crtc->state->active in tidss_crtc_atomic_flush(), and
skips the flush if the crtc is not active.

The exact reason why DRM_PLANE_COMMIT_ACTIVE_ONLY is not used has been
lost in history. DRM_PLANE_COMMIT_ACTIVE_ONLY does also affect the plane
updates, and I think the issue was related to multi-display systems and
moving planes between the displays. However, it is possible the issue
was only present on the older DSS hardware, handled by the omapdrm
driver (on which the tidss driver is loosely based).

Reviewing the code related to DRM_PLANE_COMMIT_ACTIVE_ONLY does not show
any issues, and testing on J7 EVM with two displays works fine.

Change the driver to use DRM_PLANE_COMMIT_ACTIVE_ONLY.

Reviewed-by: Aradhya Bhatia <a-bhatia1@ti.com>
Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-11-ac91b5ea35c0@ideasonboard.com
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
 drivers/gpu/drm/tidss/tidss_crtc.c | 4 ----
 drivers/gpu/drm/tidss/tidss_kms.c  | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c
index 7c78c074e3a2e..5f838980c7a11 100644
--- a/drivers/gpu/drm/tidss/tidss_crtc.c
+++ b/drivers/gpu/drm/tidss/tidss_crtc.c
@@ -174,10 +174,6 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc,
 		drm_atomic_crtc_needs_modeset(crtc->state) ? "needs" : "doesn't need",
 		crtc->state->event);
 
-	/* There is nothing to do if CRTC is not going to be enabled. */
-	if (!crtc->state->active)
-		return;
-
 	/*
 	 * Flush CRTC changes with go bit only if new modeset is not
 	 * coming, so CRTC is enabled trough out the commit.
diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c
index d096d8d2bc8f8..a0e494c806a96 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.c
+++ b/drivers/gpu/drm/tidss/tidss_kms.c
@@ -29,7 +29,7 @@ static void tidss_atomic_commit_tail(struct drm_atomic_state *old_state)
 	tidss_runtime_get(tidss);
 
 	drm_atomic_helper_commit_modeset_disables(ddev, old_state);
-	drm_atomic_helper_commit_planes(ddev, old_state, 0);
+	drm_atomic_helper_commit_planes(ddev, old_state, DRM_PLANE_COMMIT_ACTIVE_ONLY);
 	drm_atomic_helper_commit_modeset_enables(ddev, old_state);
 
 	drm_atomic_helper_commit_hw_done(old_state);
-- 
GitLab


From 561322c3bc14bb59f26120a9135eabc140284f86 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Wed, 29 Nov 2023 14:22:21 +0200
Subject: [PATCH 0643/2340] drm/i915/display: Skip state verification with
 TBT-ALT mode

With TBT-ALT mode we are not programming C20 chip PLL's and
hence we don't need to check state verification. We don't
need to program DP link signal levels i.e.pre-emphasis and
voltage swing either.

This patch fixes dmesg errors like this one

"[drm] ERROR PHY F Write 0c06 failed after 3 retries."

Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129122221.1109084-1-mika.kahola@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index a8fa765808020..5fbec5784b835 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -415,9 +415,15 @@ void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder,
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	const struct intel_ddi_buf_trans *trans;
 	enum phy phy = intel_port_to_phy(i915, encoder->port);
-	u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder);
+	u8 owned_lane_mask;
 	intel_wakeref_t wakeref;
 	int n_entries, ln;
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+
+	if (intel_tc_port_in_tbt_alt_mode(dig_port))
+		return;
+
+	owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder);
 
 	wakeref = intel_cx0_phy_transaction_begin(encoder);
 
@@ -3136,6 +3142,9 @@ void intel_cx0pll_state_verify(struct intel_atomic_state *state,
 	encoder = intel_get_crtc_new_encoder(state, new_crtc_state);
 	phy = intel_port_to_phy(i915, encoder->port);
 
+	if (intel_tc_port_in_tbt_alt_mode(enc_to_dig_port(encoder)))
+		return;
+
 	intel_cx0pll_readout_hw_state(encoder, &mpll_hw_state);
 
 	if (intel_is_c10phy(i915, phy))
-- 
GitLab


From 780b9463ce66a9efb18e3b5d35cd011fb918d741 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 30 Nov 2023 13:40:24 -0300
Subject: [PATCH 0644/2340] drm/v3d: Remove unused function header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v3d_mmu_get_offset header was added but the function was never defined.
Just remove it.

Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-3-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 4c59fefaa0b4b..d8b392494eba5 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -420,8 +420,6 @@ void v3d_irq_disable(struct v3d_dev *v3d);
 void v3d_irq_reset(struct v3d_dev *v3d);
 
 /* v3d_mmu.c */
-int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo,
-		       u32 *offset);
 int v3d_mmu_set_page_table(struct v3d_dev *v3d);
 void v3d_mmu_insert_ptes(struct v3d_bo *bo);
 void v3d_mmu_remove_ptes(struct v3d_bo *bo);
-- 
GitLab


From a8ad9d63a160f6b93f6958a5a0ded1a6abb15815 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 30 Nov 2023 13:40:25 -0300
Subject: [PATCH 0645/2340] drm/v3d: Move wait BO ioctl to the v3d_bo file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IOCTLs related to BO operations reside on the file v3d_bo.c. The wait BO
ioctl is the only IOCTL regarding BOs that is placed in a different file.
So, move it to the v3d_bo.c file.

Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-4-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_bo.c  | 33 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_drv.h |  4 ++--
 drivers/gpu/drm/v3d/v3d_gem.c | 33 ---------------------------------
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index 8b3229a37c6df..357a0da7e16aa 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -233,3 +233,36 @@ int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
 	drm_gem_object_put(gem_obj);
 	return 0;
 }
+
+int
+v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	int ret;
+	struct drm_v3d_wait_bo *args = data;
+	ktime_t start = ktime_get();
+	u64 delta_ns;
+	unsigned long timeout_jiffies =
+		nsecs_to_jiffies_timeout(args->timeout_ns);
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	ret = drm_gem_dma_resv_wait(file_priv, args->handle,
+				    true, timeout_jiffies);
+
+	/* Decrement the user's timeout, in case we got interrupted
+	 * such that the ioctl will be restarted.
+	 */
+	delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
+	if (delta_ns < args->timeout_ns)
+		args->timeout_ns -= delta_ns;
+	else
+		args->timeout_ns = 0;
+
+	/* Asked to wait beyond the jiffie/scheduler precision? */
+	if (ret == -ETIME && args->timeout_ns)
+		ret = -EAGAIN;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index d8b392494eba5..fc04372d5cbd3 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -385,6 +385,8 @@ int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
 int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv);
+int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
 struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev,
 						 struct dma_buf_attachment *attach,
 						 struct sg_table *sgt);
@@ -405,8 +407,6 @@ int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
-int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv);
 void v3d_job_cleanup(struct v3d_job *job);
 void v3d_job_put(struct v3d_job *job);
 void v3d_reset(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 9d2ac23c29e33..26f62a48d2260 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -363,39 +363,6 @@ void v3d_job_put(struct v3d_job *job)
 	kref_put(&job->refcount, job->free);
 }
 
-int
-v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
-		  struct drm_file *file_priv)
-{
-	int ret;
-	struct drm_v3d_wait_bo *args = data;
-	ktime_t start = ktime_get();
-	u64 delta_ns;
-	unsigned long timeout_jiffies =
-		nsecs_to_jiffies_timeout(args->timeout_ns);
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	ret = drm_gem_dma_resv_wait(file_priv, args->handle,
-				    true, timeout_jiffies);
-
-	/* Decrement the user's timeout, in case we got interrupted
-	 * such that the ioctl will be restarted.
-	 */
-	delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
-	if (delta_ns < args->timeout_ns)
-		args->timeout_ns -= delta_ns;
-	else
-		args->timeout_ns = 0;
-
-	/* Asked to wait beyond the jiffie/scheduler precision? */
-	if (ret == -ETIME && args->timeout_ns)
-		ret = -EAGAIN;
-
-	return ret;
-}
-
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	     void **container, size_t size, void (*free)(struct kref *ref),
-- 
GitLab


From 9032d5f633ed7b5c726971dc7e2372045bf27f40 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 30 Nov 2023 13:40:26 -0300
Subject: [PATCH 0646/2340] drm/v3d: Detach job submissions IOCTLs to a new
 specific file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will include a new job submission type, the CPU job submission. For
readability and maintability, separate the job submission IOCTLs and
related operations from v3d_gem.c.

Minor fix in the CSD submission kernel doc:
CSD (texture formatting) -> CSD (compute shader).

Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-5-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/Makefile     |   3 +-
 drivers/gpu/drm/v3d/v3d_drv.h    |  12 +-
 drivers/gpu/drm/v3d/v3d_gem.c    | 735 ------------------------------
 drivers/gpu/drm/v3d/v3d_submit.c | 744 +++++++++++++++++++++++++++++++
 4 files changed, 753 insertions(+), 741 deletions(-)
 create mode 100644 drivers/gpu/drm/v3d/v3d_submit.c

diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
index 4b21b20e49981..b7d673f1153be 100644
--- a/drivers/gpu/drm/v3d/Makefile
+++ b/drivers/gpu/drm/v3d/Makefile
@@ -12,7 +12,8 @@ v3d-y := \
 	v3d_perfmon.o \
 	v3d_trace_points.o \
 	v3d_sched.o \
-	v3d_sysfs.o
+	v3d_sysfs.o \
+	v3d_submit.o
 
 v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
 
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index fc04372d5cbd3..4db9ace660240 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -401,17 +401,19 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue);
 /* v3d_gem.c */
 int v3d_gem_init(struct drm_device *dev);
 void v3d_gem_destroy(struct drm_device *dev);
+void v3d_reset(struct v3d_dev *v3d);
+void v3d_invalidate_caches(struct v3d_dev *v3d);
+void v3d_clean_caches(struct v3d_dev *v3d);
+
+/* v3d_submit.c */
+void v3d_job_cleanup(struct v3d_job *job);
+void v3d_job_put(struct v3d_job *job);
 int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
-void v3d_job_cleanup(struct v3d_job *job);
-void v3d_job_put(struct v3d_job *job);
-void v3d_reset(struct v3d_dev *v3d);
-void v3d_invalidate_caches(struct v3d_dev *v3d);
-void v3d_clean_caches(struct v3d_dev *v3d);
 
 /* v3d_irq.c */
 int v3d_irq_init(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 26f62a48d2260..afc565078c785 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -11,8 +11,6 @@
 #include <linux/uaccess.h>
 
 #include <drm/drm_managed.h>
-#include <drm/drm_syncobj.h>
-#include <uapi/drm/v3d_drm.h>
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
@@ -241,739 +239,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
 	v3d_invalidate_slices(v3d, 0);
 }
 
-/* Takes the reservation lock on all the BOs being referenced, so that
- * at queue submit time we can update the reservations.
- *
- * We don't lock the RCL the tile alloc/state BOs, or overflow memory
- * (all of which are on exec->unref_list).  They're entirely private
- * to v3d, so we don't attach dma-buf fences to them.
- */
-static int
-v3d_lock_bo_reservations(struct v3d_job *job,
-			 struct ww_acquire_ctx *acquire_ctx)
-{
-	int i, ret;
-
-	ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < job->bo_count; i++) {
-		ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
-		if (ret)
-			goto fail;
-
-		ret = drm_sched_job_add_implicit_dependencies(&job->base,
-							      job->bo[i], true);
-		if (ret)
-			goto fail;
-	}
-
-	return 0;
-
-fail:
-	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
-	return ret;
-}
-
-/**
- * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
- * referenced by the job.
- * @dev: DRM device
- * @file_priv: DRM file for this fd
- * @job: V3D job being set up
- * @bo_handles: GEM handles
- * @bo_count: Number of GEM handles passed in
- *
- * The command validator needs to reference BOs by their index within
- * the submitted job's BO list.  This does the validation of the job's
- * BO list and reference counting for the lifetime of the job.
- *
- * Note that this function doesn't need to unreference the BOs on
- * failure, because that will happen at v3d_exec_cleanup() time.
- */
-static int
-v3d_lookup_bos(struct drm_device *dev,
-	       struct drm_file *file_priv,
-	       struct v3d_job *job,
-	       u64 bo_handles,
-	       u32 bo_count)
-{
-	job->bo_count = bo_count;
-
-	if (!job->bo_count) {
-		/* See comment on bo_index for why we have to check
-		 * this.
-		 */
-		DRM_DEBUG("Rendering requires BOs\n");
-		return -EINVAL;
-	}
-
-	return drm_gem_objects_lookup(file_priv,
-				      (void __user *)(uintptr_t)bo_handles,
-				      job->bo_count, &job->bo);
-}
-
-static void
-v3d_job_free(struct kref *ref)
-{
-	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
-	int i;
-
-	if (job->bo) {
-		for (i = 0; i < job->bo_count; i++)
-			drm_gem_object_put(job->bo[i]);
-		kvfree(job->bo);
-	}
-
-	dma_fence_put(job->irq_fence);
-	dma_fence_put(job->done_fence);
-
-	if (job->perfmon)
-		v3d_perfmon_put(job->perfmon);
-
-	kfree(job);
-}
-
-static void
-v3d_render_job_free(struct kref *ref)
-{
-	struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
-						  base.refcount);
-	struct v3d_bo *bo, *save;
-
-	list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
-		drm_gem_object_put(&bo->base.base);
-	}
-
-	v3d_job_free(ref);
-}
-
-void v3d_job_cleanup(struct v3d_job *job)
-{
-	if (!job)
-		return;
-
-	drm_sched_job_cleanup(&job->base);
-	v3d_job_put(job);
-}
-
-void v3d_job_put(struct v3d_job *job)
-{
-	kref_put(&job->refcount, job->free);
-}
-
-static int
-v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
-	     void **container, size_t size, void (*free)(struct kref *ref),
-	     u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
-{
-	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
-	struct v3d_job *job;
-	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
-	int ret, i;
-
-	*container = kcalloc(1, size, GFP_KERNEL);
-	if (!*container) {
-		DRM_ERROR("Cannot allocate memory for v3d job.");
-		return -ENOMEM;
-	}
-
-	job = *container;
-	job->v3d = v3d;
-	job->free = free;
-	job->file = file_priv;
-
-	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
-				 1, v3d_priv);
-	if (ret)
-		goto fail;
-
-	if (has_multisync) {
-		if (se->in_sync_count && se->wait_stage == queue) {
-			struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs);
-
-			for (i = 0; i < se->in_sync_count; i++) {
-				struct drm_v3d_sem in;
-
-				if (copy_from_user(&in, handle++, sizeof(in))) {
-					ret = -EFAULT;
-					DRM_DEBUG("Failed to copy wait dep handle.\n");
-					goto fail_deps;
-				}
-				ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0);
-
-				// TODO: Investigate why this was filtered out for the IOCTL.
-				if (ret && ret != -ENOENT)
-					goto fail_deps;
-			}
-		}
-	} else {
-		ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0);
-
-		// TODO: Investigate why this was filtered out for the IOCTL.
-		if (ret && ret != -ENOENT)
-			goto fail_deps;
-	}
-
-	kref_init(&job->refcount);
-
-	return 0;
-
-fail_deps:
-	drm_sched_job_cleanup(&job->base);
-fail:
-	kfree(*container);
-	*container = NULL;
-
-	return ret;
-}
-
-static void
-v3d_push_job(struct v3d_job *job)
-{
-	drm_sched_job_arm(&job->base);
-
-	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
-
-	/* put by scheduler job completion */
-	kref_get(&job->refcount);
-
-	drm_sched_entity_push_job(&job->base);
-}
-
-static void
-v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
-					 struct v3d_job *job,
-					 struct ww_acquire_ctx *acquire_ctx,
-					 u32 out_sync,
-					 struct v3d_submit_ext *se,
-					 struct dma_fence *done_fence)
-{
-	struct drm_syncobj *sync_out;
-	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
-	int i;
-
-	for (i = 0; i < job->bo_count; i++) {
-		/* XXX: Use shared fences for read-only objects. */
-		dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
-				   DMA_RESV_USAGE_WRITE);
-	}
-
-	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
-
-	/* Update the return sync object for the job */
-	/* If it only supports a single signal semaphore*/
-	if (!has_multisync) {
-		sync_out = drm_syncobj_find(file_priv, out_sync);
-		if (sync_out) {
-			drm_syncobj_replace_fence(sync_out, done_fence);
-			drm_syncobj_put(sync_out);
-		}
-		return;
-	}
-
-	/* If multiple semaphores extension is supported */
-	if (se->out_sync_count) {
-		for (i = 0; i < se->out_sync_count; i++) {
-			drm_syncobj_replace_fence(se->out_syncs[i].syncobj,
-						  done_fence);
-			drm_syncobj_put(se->out_syncs[i].syncobj);
-		}
-		kvfree(se->out_syncs);
-	}
-}
-
-static void
-v3d_put_multisync_post_deps(struct v3d_submit_ext *se)
-{
-	unsigned int i;
-
-	if (!(se && se->out_sync_count))
-		return;
-
-	for (i = 0; i < se->out_sync_count; i++)
-		drm_syncobj_put(se->out_syncs[i].syncobj);
-	kvfree(se->out_syncs);
-}
-
-static int
-v3d_get_multisync_post_deps(struct drm_file *file_priv,
-			    struct v3d_submit_ext *se,
-			    u32 count, u64 handles)
-{
-	struct drm_v3d_sem __user *post_deps;
-	int i, ret;
-
-	if (!count)
-		return 0;
-
-	se->out_syncs = (struct v3d_submit_outsync *)
-			kvmalloc_array(count,
-				       sizeof(struct v3d_submit_outsync),
-				       GFP_KERNEL);
-	if (!se->out_syncs)
-		return -ENOMEM;
-
-	post_deps = u64_to_user_ptr(handles);
-
-	for (i = 0; i < count; i++) {
-		struct drm_v3d_sem out;
-
-		if (copy_from_user(&out, post_deps++, sizeof(out))) {
-			ret = -EFAULT;
-			DRM_DEBUG("Failed to copy post dep handles\n");
-			goto fail;
-		}
-
-		se->out_syncs[i].syncobj = drm_syncobj_find(file_priv,
-							    out.handle);
-		if (!se->out_syncs[i].syncobj) {
-			ret = -EINVAL;
-			goto fail;
-		}
-	}
-	se->out_sync_count = count;
-
-	return 0;
-
-fail:
-	for (i--; i >= 0; i--)
-		drm_syncobj_put(se->out_syncs[i].syncobj);
-	kvfree(se->out_syncs);
-
-	return ret;
-}
-
-/* Get data for multiple binary semaphores synchronization. Parse syncobj
- * to be signaled when job completes (out_sync).
- */
-static int
-v3d_get_multisync_submit_deps(struct drm_file *file_priv,
-			      struct drm_v3d_extension __user *ext,
-			      void *data)
-{
-	struct drm_v3d_multi_sync multisync;
-	struct v3d_submit_ext *se = data;
-	int ret;
-
-	if (copy_from_user(&multisync, ext, sizeof(multisync)))
-		return -EFAULT;
-
-	if (multisync.pad)
-		return -EINVAL;
-
-	ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count,
-					  multisync.out_syncs);
-	if (ret)
-		return ret;
-
-	se->in_sync_count = multisync.in_sync_count;
-	se->in_syncs = multisync.in_syncs;
-	se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC;
-	se->wait_stage = multisync.wait_stage;
-
-	return 0;
-}
-
-/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
- * according to the extension id (name).
- */
-static int
-v3d_get_extensions(struct drm_file *file_priv,
-		   u64 ext_handles,
-		   void *data)
-{
-	struct drm_v3d_extension __user *user_ext;
-	int ret;
-
-	user_ext = u64_to_user_ptr(ext_handles);
-	while (user_ext) {
-		struct drm_v3d_extension ext;
-
-		if (copy_from_user(&ext, user_ext, sizeof(ext))) {
-			DRM_DEBUG("Failed to copy submit extension\n");
-			return -EFAULT;
-		}
-
-		switch (ext.id) {
-		case DRM_V3D_EXT_ID_MULTI_SYNC:
-			ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data);
-			if (ret)
-				return ret;
-			break;
-		default:
-			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
-			return -EINVAL;
-		}
-
-		user_ext = u64_to_user_ptr(ext.next);
-	}
-
-	return 0;
-}
-
-/**
- * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
- * @dev: DRM device
- * @data: ioctl argument
- * @file_priv: DRM file for this fd
- *
- * This is the main entrypoint for userspace to submit a 3D frame to
- * the GPU.  Userspace provides the binner command list (if
- * applicable), and the kernel sets up the render command list to draw
- * to the framebuffer described in the ioctl, using the command lists
- * that the 3D engine's binner will produce.
- */
-int
-v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file_priv)
-{
-	struct v3d_dev *v3d = to_v3d_dev(dev);
-	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
-	struct drm_v3d_submit_cl *args = data;
-	struct v3d_submit_ext se = {0};
-	struct v3d_bin_job *bin = NULL;
-	struct v3d_render_job *render = NULL;
-	struct v3d_job *clean_job = NULL;
-	struct v3d_job *last_job;
-	struct ww_acquire_ctx acquire_ctx;
-	int ret = 0;
-
-	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
-
-	if (args->pad)
-		return -EINVAL;
-
-	if (args->flags &&
-	    args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE |
-			    DRM_V3D_SUBMIT_EXTENSION)) {
-		DRM_INFO("invalid flags: %d\n", args->flags);
-		return -EINVAL;
-	}
-
-	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
-		if (ret) {
-			DRM_DEBUG("Failed to get extensions.\n");
-			return ret;
-		}
-	}
-
-	ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render),
-			   v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
-	if (ret)
-		goto fail;
-
-	render->start = args->rcl_start;
-	render->end = args->rcl_end;
-	INIT_LIST_HEAD(&render->unref_list);
-
-	if (args->bcl_start != args->bcl_end) {
-		ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin),
-				   v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
-		if (ret)
-			goto fail;
-
-		bin->start = args->bcl_start;
-		bin->end = args->bcl_end;
-		bin->qma = args->qma;
-		bin->qms = args->qms;
-		bin->qts = args->qts;
-		bin->render = render;
-	}
-
-	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
-		ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
-				   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
-		if (ret)
-			goto fail;
-
-		last_job = clean_job;
-	} else {
-		last_job = &render->base;
-	}
-
-	ret = v3d_lookup_bos(dev, file_priv, last_job,
-			     args->bo_handles, args->bo_handle_count);
-	if (ret)
-		goto fail;
-
-	ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
-	if (ret)
-		goto fail;
-
-	if (args->perfmon_id) {
-		render->base.perfmon = v3d_perfmon_find(v3d_priv,
-							args->perfmon_id);
-
-		if (!render->base.perfmon) {
-			ret = -ENOENT;
-			goto fail_perfmon;
-		}
-	}
-
-	mutex_lock(&v3d->sched_lock);
-	if (bin) {
-		bin->base.perfmon = render->base.perfmon;
-		v3d_perfmon_get(bin->base.perfmon);
-		v3d_push_job(&bin->base);
-
-		ret = drm_sched_job_add_dependency(&render->base.base,
-						   dma_fence_get(bin->base.done_fence));
-		if (ret)
-			goto fail_unreserve;
-	}
-
-	v3d_push_job(&render->base);
-
-	if (clean_job) {
-		struct dma_fence *render_fence =
-			dma_fence_get(render->base.done_fence);
-		ret = drm_sched_job_add_dependency(&clean_job->base,
-						   render_fence);
-		if (ret)
-			goto fail_unreserve;
-		clean_job->perfmon = render->base.perfmon;
-		v3d_perfmon_get(clean_job->perfmon);
-		v3d_push_job(clean_job);
-	}
-
-	mutex_unlock(&v3d->sched_lock);
-
-	v3d_attach_fences_and_unlock_reservation(file_priv,
-						 last_job,
-						 &acquire_ctx,
-						 args->out_sync,
-						 &se,
-						 last_job->done_fence);
-
-	if (bin)
-		v3d_job_put(&bin->base);
-	v3d_job_put(&render->base);
-	if (clean_job)
-		v3d_job_put(clean_job);
-
-	return 0;
-
-fail_unreserve:
-	mutex_unlock(&v3d->sched_lock);
-fail_perfmon:
-	drm_gem_unlock_reservations(last_job->bo,
-				    last_job->bo_count, &acquire_ctx);
-fail:
-	v3d_job_cleanup((void *)bin);
-	v3d_job_cleanup((void *)render);
-	v3d_job_cleanup(clean_job);
-	v3d_put_multisync_post_deps(&se);
-
-	return ret;
-}
-
-/**
- * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
- * @dev: DRM device
- * @data: ioctl argument
- * @file_priv: DRM file for this fd
- *
- * Userspace provides the register setup for the TFU, which we don't
- * need to validate since the TFU is behind the MMU.
- */
-int
-v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
-{
-	struct v3d_dev *v3d = to_v3d_dev(dev);
-	struct drm_v3d_submit_tfu *args = data;
-	struct v3d_submit_ext se = {0};
-	struct v3d_tfu_job *job = NULL;
-	struct ww_acquire_ctx acquire_ctx;
-	int ret = 0;
-
-	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
-
-	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
-		DRM_DEBUG("invalid flags: %d\n", args->flags);
-		return -EINVAL;
-	}
-
-	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
-		if (ret) {
-			DRM_DEBUG("Failed to get extensions.\n");
-			return ret;
-		}
-	}
-
-	ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
-			   v3d_job_free, args->in_sync, &se, V3D_TFU);
-	if (ret)
-		goto fail;
-
-	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
-			       sizeof(*job->base.bo), GFP_KERNEL);
-	if (!job->base.bo) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	job->args = *args;
-
-	for (job->base.bo_count = 0;
-	     job->base.bo_count < ARRAY_SIZE(args->bo_handles);
-	     job->base.bo_count++) {
-		struct drm_gem_object *bo;
-
-		if (!args->bo_handles[job->base.bo_count])
-			break;
-
-		bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]);
-		if (!bo) {
-			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
-				  job->base.bo_count,
-				  args->bo_handles[job->base.bo_count]);
-			ret = -ENOENT;
-			goto fail;
-		}
-		job->base.bo[job->base.bo_count] = bo;
-	}
-
-	ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
-	if (ret)
-		goto fail;
-
-	mutex_lock(&v3d->sched_lock);
-	v3d_push_job(&job->base);
-	mutex_unlock(&v3d->sched_lock);
-
-	v3d_attach_fences_and_unlock_reservation(file_priv,
-						 &job->base, &acquire_ctx,
-						 args->out_sync,
-						 &se,
-						 job->base.done_fence);
-
-	v3d_job_put(&job->base);
-
-	return 0;
-
-fail:
-	v3d_job_cleanup((void *)job);
-	v3d_put_multisync_post_deps(&se);
-
-	return ret;
-}
-
-/**
- * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
- * @dev: DRM device
- * @data: ioctl argument
- * @file_priv: DRM file for this fd
- *
- * Userspace provides the register setup for the CSD, which we don't
- * need to validate since the CSD is behind the MMU.
- */
-int
-v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
-{
-	struct v3d_dev *v3d = to_v3d_dev(dev);
-	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
-	struct drm_v3d_submit_csd *args = data;
-	struct v3d_submit_ext se = {0};
-	struct v3d_csd_job *job = NULL;
-	struct v3d_job *clean_job = NULL;
-	struct ww_acquire_ctx acquire_ctx;
-	int ret;
-
-	trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
-
-	if (args->pad)
-		return -EINVAL;
-
-	if (!v3d_has_csd(v3d)) {
-		DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
-		return -EINVAL;
-	}
-
-	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
-		DRM_INFO("invalid flags: %d\n", args->flags);
-		return -EINVAL;
-	}
-
-	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
-		if (ret) {
-			DRM_DEBUG("Failed to get extensions.\n");
-			return ret;
-		}
-	}
-
-	ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
-			   v3d_job_free, args->in_sync, &se, V3D_CSD);
-	if (ret)
-		goto fail;
-
-	ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
-			   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
-	if (ret)
-		goto fail;
-
-	job->args = *args;
-
-	ret = v3d_lookup_bos(dev, file_priv, clean_job,
-			     args->bo_handles, args->bo_handle_count);
-	if (ret)
-		goto fail;
-
-	ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
-	if (ret)
-		goto fail;
-
-	if (args->perfmon_id) {
-		job->base.perfmon = v3d_perfmon_find(v3d_priv,
-						     args->perfmon_id);
-		if (!job->base.perfmon) {
-			ret = -ENOENT;
-			goto fail_perfmon;
-		}
-	}
-
-	mutex_lock(&v3d->sched_lock);
-	v3d_push_job(&job->base);
-
-	ret = drm_sched_job_add_dependency(&clean_job->base,
-					   dma_fence_get(job->base.done_fence));
-	if (ret)
-		goto fail_unreserve;
-
-	v3d_push_job(clean_job);
-	mutex_unlock(&v3d->sched_lock);
-
-	v3d_attach_fences_and_unlock_reservation(file_priv,
-						 clean_job,
-						 &acquire_ctx,
-						 args->out_sync,
-						 &se,
-						 clean_job->done_fence);
-
-	v3d_job_put(&job->base);
-	v3d_job_put(clean_job);
-
-	return 0;
-
-fail_unreserve:
-	mutex_unlock(&v3d->sched_lock);
-fail_perfmon:
-	drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
-				    &acquire_ctx);
-fail:
-	v3d_job_cleanup((void *)job);
-	v3d_job_cleanup(clean_job);
-	v3d_put_multisync_post_deps(&se);
-
-	return ret;
-}
-
 int
 v3d_gem_init(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
new file mode 100644
index 0000000000000..f36214002f37c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2014-2018 Broadcom
+ * Copyright (C) 2023 Raspberry Pi
+ */
+
+#include <drm/drm_syncobj.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list).  They're entirely private
+ * to v3d, so we don't attach dma-buf fences to them.
+ */
+static int
+v3d_lock_bo_reservations(struct v3d_job *job,
+			 struct ww_acquire_ctx *acquire_ctx)
+{
+	int i, ret;
+
+	ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < job->bo_count; i++) {
+		ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
+		if (ret)
+			goto fail;
+
+		ret = drm_sched_job_add_implicit_dependencies(&job->base,
+							      job->bo[i], true);
+		if (ret)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+	return ret;
+}
+
+/**
+ * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @job: V3D job being set up
+ * @bo_handles: GEM handles
+ * @bo_count: Number of GEM handles passed in
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list.  This does the validation of the job's
+ * BO list and reference counting for the lifetime of the job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at v3d_exec_cleanup() time.
+ */
+static int
+v3d_lookup_bos(struct drm_device *dev,
+	       struct drm_file *file_priv,
+	       struct v3d_job *job,
+	       u64 bo_handles,
+	       u32 bo_count)
+{
+	job->bo_count = bo_count;
+
+	if (!job->bo_count) {
+		/* See comment on bo_index for why we have to check
+		 * this.
+		 */
+		DRM_DEBUG("Rendering requires BOs\n");
+		return -EINVAL;
+	}
+
+	return drm_gem_objects_lookup(file_priv,
+				      (void __user *)(uintptr_t)bo_handles,
+				      job->bo_count, &job->bo);
+}
+
+static void
+v3d_job_free(struct kref *ref)
+{
+	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
+	int i;
+
+	if (job->bo) {
+		for (i = 0; i < job->bo_count; i++)
+			drm_gem_object_put(job->bo[i]);
+		kvfree(job->bo);
+	}
+
+	dma_fence_put(job->irq_fence);
+	dma_fence_put(job->done_fence);
+
+	if (job->perfmon)
+		v3d_perfmon_put(job->perfmon);
+
+	kfree(job);
+}
+
+static void
+v3d_render_job_free(struct kref *ref)
+{
+	struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
+						  base.refcount);
+	struct v3d_bo *bo, *save;
+
+	list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
+		drm_gem_object_put(&bo->base.base);
+	}
+
+	v3d_job_free(ref);
+}
+
+void v3d_job_cleanup(struct v3d_job *job)
+{
+	if (!job)
+		return;
+
+	drm_sched_job_cleanup(&job->base);
+	v3d_job_put(job);
+}
+
+void v3d_job_put(struct v3d_job *job)
+{
+	kref_put(&job->refcount, job->free);
+}
+
+static int
+v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
+	     void **container, size_t size, void (*free)(struct kref *ref),
+	     u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
+{
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct v3d_job *job;
+	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
+	int ret, i;
+
+	*container = kcalloc(1, size, GFP_KERNEL);
+	if (!*container) {
+		DRM_ERROR("Cannot allocate memory for v3d job.");
+		return -ENOMEM;
+	}
+
+	job = *container;
+	job->v3d = v3d;
+	job->free = free;
+	job->file = file_priv;
+
+	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
+				 1, v3d_priv);
+	if (ret)
+		goto fail;
+
+	if (has_multisync) {
+		if (se->in_sync_count && se->wait_stage == queue) {
+			struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs);
+
+			for (i = 0; i < se->in_sync_count; i++) {
+				struct drm_v3d_sem in;
+
+				if (copy_from_user(&in, handle++, sizeof(in))) {
+					ret = -EFAULT;
+					DRM_DEBUG("Failed to copy wait dep handle.\n");
+					goto fail_deps;
+				}
+				ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0);
+
+				// TODO: Investigate why this was filtered out for the IOCTL.
+				if (ret && ret != -ENOENT)
+					goto fail_deps;
+			}
+		}
+	} else {
+		ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0);
+
+		// TODO: Investigate why this was filtered out for the IOCTL.
+		if (ret && ret != -ENOENT)
+			goto fail_deps;
+	}
+
+	kref_init(&job->refcount);
+
+	return 0;
+
+fail_deps:
+	drm_sched_job_cleanup(&job->base);
+fail:
+	kfree(*container);
+	*container = NULL;
+
+	return ret;
+}
+
+static void
+v3d_push_job(struct v3d_job *job)
+{
+	drm_sched_job_arm(&job->base);
+
+	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+	/* put by scheduler job completion */
+	kref_get(&job->refcount);
+
+	drm_sched_entity_push_job(&job->base);
+}
+
+static void
+v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
+					 struct v3d_job *job,
+					 struct ww_acquire_ctx *acquire_ctx,
+					 u32 out_sync,
+					 struct v3d_submit_ext *se,
+					 struct dma_fence *done_fence)
+{
+	struct drm_syncobj *sync_out;
+	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
+	int i;
+
+	for (i = 0; i < job->bo_count; i++) {
+		/* XXX: Use shared fences for read-only objects. */
+		dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
+				   DMA_RESV_USAGE_WRITE);
+	}
+
+	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+
+	/* Update the return sync object for the job */
+	/* If it only supports a single signal semaphore*/
+	if (!has_multisync) {
+		sync_out = drm_syncobj_find(file_priv, out_sync);
+		if (sync_out) {
+			drm_syncobj_replace_fence(sync_out, done_fence);
+			drm_syncobj_put(sync_out);
+		}
+		return;
+	}
+
+	/* If multiple semaphores extension is supported */
+	if (se->out_sync_count) {
+		for (i = 0; i < se->out_sync_count; i++) {
+			drm_syncobj_replace_fence(se->out_syncs[i].syncobj,
+						  done_fence);
+			drm_syncobj_put(se->out_syncs[i].syncobj);
+		}
+		kvfree(se->out_syncs);
+	}
+}
+
+static void
+v3d_put_multisync_post_deps(struct v3d_submit_ext *se)
+{
+	unsigned int i;
+
+	if (!(se && se->out_sync_count))
+		return;
+
+	for (i = 0; i < se->out_sync_count; i++)
+		drm_syncobj_put(se->out_syncs[i].syncobj);
+	kvfree(se->out_syncs);
+}
+
+static int
+v3d_get_multisync_post_deps(struct drm_file *file_priv,
+			    struct v3d_submit_ext *se,
+			    u32 count, u64 handles)
+{
+	struct drm_v3d_sem __user *post_deps;
+	int i, ret;
+
+	if (!count)
+		return 0;
+
+	se->out_syncs = (struct v3d_submit_outsync *)
+			kvmalloc_array(count,
+				       sizeof(struct v3d_submit_outsync),
+				       GFP_KERNEL);
+	if (!se->out_syncs)
+		return -ENOMEM;
+
+	post_deps = u64_to_user_ptr(handles);
+
+	for (i = 0; i < count; i++) {
+		struct drm_v3d_sem out;
+
+		if (copy_from_user(&out, post_deps++, sizeof(out))) {
+			ret = -EFAULT;
+			DRM_DEBUG("Failed to copy post dep handles\n");
+			goto fail;
+		}
+
+		se->out_syncs[i].syncobj = drm_syncobj_find(file_priv,
+							    out.handle);
+		if (!se->out_syncs[i].syncobj) {
+			ret = -EINVAL;
+			goto fail;
+		}
+	}
+	se->out_sync_count = count;
+
+	return 0;
+
+fail:
+	for (i--; i >= 0; i--)
+		drm_syncobj_put(se->out_syncs[i].syncobj);
+	kvfree(se->out_syncs);
+
+	return ret;
+}
+
+/* Get data for multiple binary semaphores synchronization. Parse syncobj
+ * to be signaled when job completes (out_sync).
+ */
+static int
+v3d_get_multisync_submit_deps(struct drm_file *file_priv,
+			      struct drm_v3d_extension __user *ext,
+			      void *data)
+{
+	struct drm_v3d_multi_sync multisync;
+	struct v3d_submit_ext *se = data;
+	int ret;
+
+	if (copy_from_user(&multisync, ext, sizeof(multisync)))
+		return -EFAULT;
+
+	if (multisync.pad)
+		return -EINVAL;
+
+	ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count,
+					  multisync.out_syncs);
+	if (ret)
+		return ret;
+
+	se->in_sync_count = multisync.in_sync_count;
+	se->in_syncs = multisync.in_syncs;
+	se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC;
+	se->wait_stage = multisync.wait_stage;
+
+	return 0;
+}
+
+/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
+ * according to the extension id (name).
+ */
+static int
+v3d_get_extensions(struct drm_file *file_priv,
+		   u64 ext_handles,
+		   void *data)
+{
+	struct drm_v3d_extension __user *user_ext;
+	int ret;
+
+	user_ext = u64_to_user_ptr(ext_handles);
+	while (user_ext) {
+		struct drm_v3d_extension ext;
+
+		if (copy_from_user(&ext, user_ext, sizeof(ext))) {
+			DRM_DEBUG("Failed to copy submit extension\n");
+			return -EFAULT;
+		}
+
+		switch (ext.id) {
+		case DRM_V3D_EXT_ID_MULTI_SYNC:
+			ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data);
+			if (ret)
+				return ret;
+			break;
+		default:
+			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
+			return -EINVAL;
+		}
+
+		user_ext = u64_to_user_ptr(ext.next);
+	}
+
+	return 0;
+}
+
+/**
+ * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * This is the main entrypoint for userspace to submit a 3D frame to
+ * the GPU.  Userspace provides the binner command list (if
+ * applicable), and the kernel sets up the render command list to draw
+ * to the framebuffer described in the ioctl, using the command lists
+ * that the 3D engine's binner will produce.
+ */
+int
+v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct drm_v3d_submit_cl *args = data;
+	struct v3d_submit_ext se = {0};
+	struct v3d_bin_job *bin = NULL;
+	struct v3d_render_job *render = NULL;
+	struct v3d_job *clean_job = NULL;
+	struct v3d_job *last_job;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret = 0;
+
+	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
+
+	if (args->pad)
+		return -EINVAL;
+
+	if (args->flags &&
+	    args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE |
+			    DRM_V3D_SUBMIT_EXTENSION)) {
+		DRM_INFO("invalid flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
+		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		if (ret) {
+			DRM_DEBUG("Failed to get extensions.\n");
+			return ret;
+		}
+	}
+
+	ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render),
+			   v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
+	if (ret)
+		goto fail;
+
+	render->start = args->rcl_start;
+	render->end = args->rcl_end;
+	INIT_LIST_HEAD(&render->unref_list);
+
+	if (args->bcl_start != args->bcl_end) {
+		ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin),
+				   v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
+		if (ret)
+			goto fail;
+
+		bin->start = args->bcl_start;
+		bin->end = args->bcl_end;
+		bin->qma = args->qma;
+		bin->qms = args->qms;
+		bin->qts = args->qts;
+		bin->render = render;
+	}
+
+	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
+		ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
+				   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
+		if (ret)
+			goto fail;
+
+		last_job = clean_job;
+	} else {
+		last_job = &render->base;
+	}
+
+	ret = v3d_lookup_bos(dev, file_priv, last_job,
+			     args->bo_handles, args->bo_handle_count);
+	if (ret)
+		goto fail;
+
+	ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
+	if (ret)
+		goto fail;
+
+	if (args->perfmon_id) {
+		render->base.perfmon = v3d_perfmon_find(v3d_priv,
+							args->perfmon_id);
+
+		if (!render->base.perfmon) {
+			ret = -ENOENT;
+			goto fail_perfmon;
+		}
+	}
+
+	mutex_lock(&v3d->sched_lock);
+	if (bin) {
+		bin->base.perfmon = render->base.perfmon;
+		v3d_perfmon_get(bin->base.perfmon);
+		v3d_push_job(&bin->base);
+
+		ret = drm_sched_job_add_dependency(&render->base.base,
+						   dma_fence_get(bin->base.done_fence));
+		if (ret)
+			goto fail_unreserve;
+	}
+
+	v3d_push_job(&render->base);
+
+	if (clean_job) {
+		struct dma_fence *render_fence =
+			dma_fence_get(render->base.done_fence);
+		ret = drm_sched_job_add_dependency(&clean_job->base,
+						   render_fence);
+		if (ret)
+			goto fail_unreserve;
+		clean_job->perfmon = render->base.perfmon;
+		v3d_perfmon_get(clean_job->perfmon);
+		v3d_push_job(clean_job);
+	}
+
+	mutex_unlock(&v3d->sched_lock);
+
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 last_job,
+						 &acquire_ctx,
+						 args->out_sync,
+						 &se,
+						 last_job->done_fence);
+
+	if (bin)
+		v3d_job_put(&bin->base);
+	v3d_job_put(&render->base);
+	if (clean_job)
+		v3d_job_put(clean_job);
+
+	return 0;
+
+fail_unreserve:
+	mutex_unlock(&v3d->sched_lock);
+fail_perfmon:
+	drm_gem_unlock_reservations(last_job->bo,
+				    last_job->bo_count, &acquire_ctx);
+fail:
+	v3d_job_cleanup((void *)bin);
+	v3d_job_cleanup((void *)render);
+	v3d_job_cleanup(clean_job);
+	v3d_put_multisync_post_deps(&se);
+
+	return ret;
+}
+
+/**
+ * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Userspace provides the register setup for the TFU, which we don't
+ * need to validate since the TFU is behind the MMU.
+ */
+int
+v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct drm_v3d_submit_tfu *args = data;
+	struct v3d_submit_ext se = {0};
+	struct v3d_tfu_job *job = NULL;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret = 0;
+
+	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
+
+	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
+		DRM_DEBUG("invalid flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
+		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		if (ret) {
+			DRM_DEBUG("Failed to get extensions.\n");
+			return ret;
+		}
+	}
+
+	ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
+			   v3d_job_free, args->in_sync, &se, V3D_TFU);
+	if (ret)
+		goto fail;
+
+	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
+			       sizeof(*job->base.bo), GFP_KERNEL);
+	if (!job->base.bo) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	job->args = *args;
+
+	for (job->base.bo_count = 0;
+	     job->base.bo_count < ARRAY_SIZE(args->bo_handles);
+	     job->base.bo_count++) {
+		struct drm_gem_object *bo;
+
+		if (!args->bo_handles[job->base.bo_count])
+			break;
+
+		bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]);
+		if (!bo) {
+			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+				  job->base.bo_count,
+				  args->bo_handles[job->base.bo_count]);
+			ret = -ENOENT;
+			goto fail;
+		}
+		job->base.bo[job->base.bo_count] = bo;
+	}
+
+	ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
+	if (ret)
+		goto fail;
+
+	mutex_lock(&v3d->sched_lock);
+	v3d_push_job(&job->base);
+	mutex_unlock(&v3d->sched_lock);
+
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 &job->base, &acquire_ctx,
+						 args->out_sync,
+						 &se,
+						 job->base.done_fence);
+
+	v3d_job_put(&job->base);
+
+	return 0;
+
+fail:
+	v3d_job_cleanup((void *)job);
+	v3d_put_multisync_post_deps(&se);
+
+	return ret;
+}
+
+/**
+ * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Userspace provides the register setup for the CSD, which we don't
+ * need to validate since the CSD is behind the MMU.
+ */
+int
+v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct drm_v3d_submit_csd *args = data;
+	struct v3d_submit_ext se = {0};
+	struct v3d_csd_job *job = NULL;
+	struct v3d_job *clean_job = NULL;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret;
+
+	trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
+
+	if (args->pad)
+		return -EINVAL;
+
+	if (!v3d_has_csd(v3d)) {
+		DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
+		return -EINVAL;
+	}
+
+	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
+		DRM_INFO("invalid flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
+		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		if (ret) {
+			DRM_DEBUG("Failed to get extensions.\n");
+			return ret;
+		}
+	}
+
+	ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
+			   v3d_job_free, args->in_sync, &se, V3D_CSD);
+	if (ret)
+		goto fail;
+
+	ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
+			   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
+	if (ret)
+		goto fail;
+
+	job->args = *args;
+
+	ret = v3d_lookup_bos(dev, file_priv, clean_job,
+			     args->bo_handles, args->bo_handle_count);
+	if (ret)
+		goto fail;
+
+	ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
+	if (ret)
+		goto fail;
+
+	if (args->perfmon_id) {
+		job->base.perfmon = v3d_perfmon_find(v3d_priv,
+						     args->perfmon_id);
+		if (!job->base.perfmon) {
+			ret = -ENOENT;
+			goto fail_perfmon;
+		}
+	}
+
+	mutex_lock(&v3d->sched_lock);
+	v3d_push_job(&job->base);
+
+	ret = drm_sched_job_add_dependency(&clean_job->base,
+					   dma_fence_get(job->base.done_fence));
+	if (ret)
+		goto fail_unreserve;
+
+	v3d_push_job(clean_job);
+	mutex_unlock(&v3d->sched_lock);
+
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 clean_job,
+						 &acquire_ctx,
+						 args->out_sync,
+						 &se,
+						 clean_job->done_fence);
+
+	v3d_job_put(&job->base);
+	v3d_job_put(clean_job);
+
+	return 0;
+
+fail_unreserve:
+	mutex_unlock(&v3d->sched_lock);
+fail_perfmon:
+	drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
+				    &acquire_ctx);
+fail:
+	v3d_job_cleanup((void *)job);
+	v3d_job_cleanup(clean_job);
+	v3d_put_multisync_post_deps(&se);
+
+	return ret;
+}
-- 
GitLab


From 8288faaa8b3817c2fcdbacc720527bb8df2b57b1 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 30 Nov 2023 13:40:27 -0300
Subject: [PATCH 0647/2340] drm/v3d: Simplify job refcount handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of checking if the job is NULL every time we call the function,
check it inside the function.

Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-6-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_submit.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index f36214002f37c..a0caf9c499bbb 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -129,6 +129,9 @@ void v3d_job_cleanup(struct v3d_job *job)
 
 void v3d_job_put(struct v3d_job *job)
 {
+	if (!job)
+		return;
+
 	kref_put(&job->refcount, job->free);
 }
 
@@ -517,11 +520,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 						 &se,
 						 last_job->done_fence);
 
-	if (bin)
-		v3d_job_put(&bin->base);
+	v3d_job_put(&bin->base);
 	v3d_job_put(&render->base);
-	if (clean_job)
-		v3d_job_put(clean_job);
+	v3d_job_put(clean_job);
 
 	return 0;
 
-- 
GitLab


From 6893deb881ab7da1691bd05045ffcc0c806319b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:28 -0300
Subject: [PATCH 0648/2340] drm/v3d: Don't allow two multisync extensions in
 the same job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, two multisync extensions can be added to the same job and
only the last multisync extension will be used. To avoid this
vulnerability, don't allow two multisync extensions in the same job.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-7-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_submit.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index a0caf9c499bbb..10141dc2820af 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -329,6 +329,11 @@ v3d_get_multisync_submit_deps(struct drm_file *file_priv,
 	struct v3d_submit_ext *se = data;
 	int ret;
 
+	if (se->in_sync_count || se->out_sync_count) {
+		DRM_DEBUG("Two multisync extensions were added to the same job.");
+		return -EINVAL;
+	}
+
 	if (copy_from_user(&multisync, ext, sizeof(multisync)))
 		return -EFAULT;
 
-- 
GitLab


From 464c61e76de851a216e667c91332172c68ffed54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:29 -0300
Subject: [PATCH 0649/2340] drm/v3d: Decouple job allocation from job
 initiation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We want to allow the IOCTLs to allocate the job without initiating it.
This will be useful for the CPU job submission IOCTL, as the CPU job has
the need to use information from the user extensions. Currently, the
user extensions are parsed before the job allocation, making it
impossible to fill the CPU job when parsing the user extensions.
Therefore, decouple the job allocation from the job initiation.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-8-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_submit.c | 64 ++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 10141dc2820af..148900283c2a7 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -135,23 +135,27 @@ void v3d_job_put(struct v3d_job *job)
 	kref_put(&job->refcount, job->free);
 }
 
+static int
+v3d_job_allocate(void **container, size_t size)
+{
+	*container = kcalloc(1, size, GFP_KERNEL);
+	if (!*container) {
+		DRM_ERROR("Cannot allocate memory for V3D job.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
-	     void **container, size_t size, void (*free)(struct kref *ref),
+	     struct v3d_job *job, void (*free)(struct kref *ref),
 	     u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
 {
 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
-	struct v3d_job *job;
 	bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
 	int ret, i;
 
-	*container = kcalloc(1, size, GFP_KERNEL);
-	if (!*container) {
-		DRM_ERROR("Cannot allocate memory for v3d job.");
-		return -ENOMEM;
-	}
-
-	job = *container;
 	job->v3d = v3d;
 	job->free = free;
 	job->file = file_priv;
@@ -159,7 +163,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 				 1, v3d_priv);
 	if (ret)
-		goto fail;
+		return ret;
 
 	if (has_multisync) {
 		if (se->in_sync_count && se->wait_stage == queue) {
@@ -194,10 +198,6 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 
 fail_deps:
 	drm_sched_job_cleanup(&job->base);
-fail:
-	kfree(*container);
-	*container = NULL;
-
 	return ret;
 }
 
@@ -437,7 +437,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render),
+	ret = v3d_job_allocate((void *)&render, sizeof(*render));
+	if (ret)
+		return ret;
+
+	ret = v3d_job_init(v3d, file_priv, &render->base,
 			   v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
 	if (ret)
 		goto fail;
@@ -447,7 +451,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	INIT_LIST_HEAD(&render->unref_list);
 
 	if (args->bcl_start != args->bcl_end) {
-		ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin),
+		ret = v3d_job_allocate((void *)&bin, sizeof(*bin));
+		if (ret)
+			goto fail;
+
+		ret = v3d_job_init(v3d, file_priv, &bin->base,
 				   v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
 		if (ret)
 			goto fail;
@@ -461,7 +469,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	}
 
 	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
-		ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
+		ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job));
+		if (ret)
+			goto fail;
+
+		ret = v3d_job_init(v3d, file_priv, clean_job,
 				   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
 		if (ret)
 			goto fail;
@@ -580,7 +592,11 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
+	ret = v3d_job_allocate((void *)&job, sizeof(*job));
+	if (ret)
+		return ret;
+
+	ret = v3d_job_init(v3d, file_priv, &job->base,
 			   v3d_job_free, args->in_sync, &se, V3D_TFU);
 	if (ret)
 		goto fail;
@@ -683,12 +699,20 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
+	ret = v3d_job_allocate((void *)&job, sizeof(*job));
+	if (ret)
+		return ret;
+
+	ret = v3d_job_init(v3d, file_priv, &job->base,
 			   v3d_job_free, args->in_sync, &se, V3D_CSD);
 	if (ret)
 		goto fail;
 
-	ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
+	ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job));
+	if (ret)
+		goto fail;
+
+	ret = v3d_job_init(v3d, file_priv, clean_job,
 			   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
 	if (ret)
 		goto fail;
-- 
GitLab


From aafc1a2bea67460c41a289e8bb1e4dc6d016fe11 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 30 Nov 2023 13:40:30 -0300
Subject: [PATCH 0650/2340] drm/v3d: Add a CPU job submission
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a new type of job, a CPU job. A CPU job is a type of job that
performs operations that requires CPU intervention. The overall idea is
to use user extensions to enable different types of CPU job, allowing the
CPU job to perform different operations according to the type of user
extension. The user extension ID identify the type of CPU job that must
be dealt.

Having a CPU job is interesting for synchronization purposes as a CPU
job has a queue like any other V3D job and can be synchoronized by the
multisync extension.

Signed-off-by: Melissa Wen <mwen@igalia.com>
Co-developed-by: Maíra Canal <mcanal@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-9-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.c    |  4 ++
 drivers/gpu/drm/v3d/v3d_drv.h    | 16 +++++-
 drivers/gpu/drm/v3d/v3d_sched.c  | 57 +++++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_submit.c | 86 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 17 +++++++
 5 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 44a1ca57d6a44..3debf37e7d9ba 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -91,6 +91,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
 		args->value = 1;
 		return 0;
+	case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE:
+		args->value = 1;
+		return 0;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", args->param);
 		return -EINVAL;
@@ -189,6 +192,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
 };
 
 static const struct drm_driver v3d_drm_driver = {
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 4db9ace660240..2246a0e29955a 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -19,7 +19,7 @@ struct reset_control;
 
 #define GMP_GRANULARITY (128 * 1024)
 
-#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
+#define V3D_MAX_QUEUES (V3D_CPU + 1)
 
 static inline char *v3d_queue_to_string(enum v3d_queue queue)
 {
@@ -29,6 +29,7 @@ static inline char *v3d_queue_to_string(enum v3d_queue queue)
 	case V3D_TFU: return "tfu";
 	case V3D_CSD: return "csd";
 	case V3D_CACHE_CLEAN: return "cache_clean";
+	case V3D_CPU: return "cpu";
 	}
 	return "UNKNOWN";
 }
@@ -122,6 +123,7 @@ struct v3d_dev {
 	struct v3d_render_job *render_job;
 	struct v3d_tfu_job *tfu_job;
 	struct v3d_csd_job *csd_job;
+	struct v3d_cpu_job *cpu_job;
 
 	struct v3d_queue_state queue[V3D_MAX_QUEUES];
 
@@ -312,6 +314,16 @@ struct v3d_csd_job {
 	struct drm_v3d_submit_csd args;
 };
 
+enum v3d_cpu_job_type {};
+
+struct v3d_cpu_job {
+	struct v3d_job base;
+
+	enum v3d_cpu_job_type job_type;
+};
+
+typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
+
 struct v3d_submit_outsync {
 	struct drm_syncobj *syncobj;
 };
@@ -414,6 +426,8 @@ int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
+int v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 
 /* v3d_irq.c */
 int v3d_irq_init(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index fccbea2a5f2eb..c89e92fc614cf 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -55,6 +55,12 @@ to_csd_job(struct drm_sched_job *sched_job)
 	return container_of(sched_job, struct v3d_csd_job, base.base);
 }
 
+static struct v3d_cpu_job *
+to_cpu_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct v3d_cpu_job, base.base);
+}
+
 static void
 v3d_sched_job_free(struct drm_sched_job *sched_job)
 {
@@ -262,6 +268,42 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 	return fence;
 }
 
+static const v3d_cpu_job_fn cpu_job_function[] = { };
+
+static struct dma_fence *
+v3d_cpu_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_cpu_job *job = to_cpu_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_file_priv *file = job->base.file->driver_priv;
+	u64 runtime;
+
+	v3d->cpu_job = job;
+
+	if (job->job_type >= ARRAY_SIZE(cpu_job_function)) {
+		DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type);
+		return NULL;
+	}
+
+	file->start_ns[V3D_CPU] = local_clock();
+	v3d->queue[V3D_CPU].start_ns = file->start_ns[V3D_CPU];
+
+	cpu_job_function[job->job_type](job);
+
+	runtime = local_clock() - file->start_ns[V3D_CPU];
+
+	file->enabled_ns[V3D_CPU] += runtime;
+	v3d->queue[V3D_CPU].enabled_ns += runtime;
+
+	file->jobs_sent[V3D_CPU]++;
+	v3d->queue[V3D_CPU].jobs_sent++;
+
+	file->start_ns[V3D_CPU] = 0;
+	v3d->queue[V3D_CPU].start_ns = 0;
+
+	return NULL;
+}
+
 static struct dma_fence *
 v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 {
@@ -416,6 +458,12 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
 	.free_job = v3d_sched_job_free
 };
 
+static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
+	.run_job = v3d_cpu_job_run,
+	.timedout_job = v3d_generic_job_timedout,
+	.free_job = v3d_sched_job_free
+};
+
 int
 v3d_sched_init(struct v3d_dev *v3d)
 {
@@ -471,6 +519,15 @@ v3d_sched_init(struct v3d_dev *v3d)
 			goto fail;
 	}
 
+	ret = drm_sched_init(&v3d->queue[V3D_CPU].sched,
+			     &v3d_cpu_sched_ops, NULL,
+			     DRM_SCHED_PRIORITY_COUNT,
+			     1, job_hang_limit,
+			     msecs_to_jiffies(hang_limit_ms), NULL,
+			     NULL, "v3d_cpu", v3d->drm.dev);
+	if (ret)
+		goto fail;
+
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 148900283c2a7..918e28b1d00a1 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -772,3 +772,89 @@ fail:
 
 	return ret;
 }
+
+static const unsigned int cpu_job_bo_handle_count[] = { };
+
+/**
+ * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Userspace specifies the CPU job type and data required to perform its
+ * operations through the drm_v3d_extension struct.
+ */
+int
+v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct drm_v3d_submit_cpu *args = data;
+	struct v3d_submit_ext se = {0};
+	struct v3d_cpu_job *cpu_job = NULL;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret;
+
+	if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
+		DRM_INFO("Invalid flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job));
+	if (ret)
+		return ret;
+
+	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
+		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		if (ret) {
+			DRM_DEBUG("Failed to get extensions.\n");
+			goto fail;
+		}
+	}
+
+	/* Every CPU job must have a CPU job user extension */
+	if (!cpu_job->job_type) {
+		DRM_DEBUG("CPU job must have a CPU job user extension.\n");
+		goto fail;
+	}
+
+	if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) {
+		DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n");
+		goto fail;
+	}
+
+	ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
+			   v3d_job_free, 0, &se, V3D_CPU);
+	if (ret)
+		goto fail;
+
+	if (args->bo_handle_count) {
+		ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base,
+				     args->bo_handles, args->bo_handle_count);
+		if (ret)
+			goto fail;
+
+		ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx);
+		if (ret)
+			goto fail;
+	}
+
+	mutex_lock(&v3d->sched_lock);
+	v3d_push_job(&cpu_job->base);
+	mutex_unlock(&v3d->sched_lock);
+
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 &cpu_job->base,
+						 &acquire_ctx, 0,
+						 NULL, cpu_job->base.done_fence);
+
+	v3d_job_put(&cpu_job->base);
+
+	return 0;
+
+fail:
+	v3d_job_cleanup((void *)cpu_job);
+	v3d_put_multisync_post_deps(&se);
+
+	return ret;
+}
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 1a7d7a689de38..00abef9d0db73 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -41,6 +41,7 @@ extern "C" {
 #define DRM_V3D_PERFMON_CREATE                    0x08
 #define DRM_V3D_PERFMON_DESTROY                   0x09
 #define DRM_V3D_PERFMON_GET_VALUES                0x0a
+#define DRM_V3D_SUBMIT_CPU                        0x0b
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -56,6 +57,7 @@ extern "C" {
 						   struct drm_v3d_perfmon_destroy)
 #define DRM_IOCTL_V3D_PERFMON_GET_VALUES  DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \
 						   struct drm_v3d_perfmon_get_values)
+#define DRM_IOCTL_V3D_SUBMIT_CPU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
 
 #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
 #define DRM_V3D_SUBMIT_EXTENSION		  0x02
@@ -93,6 +95,7 @@ enum v3d_queue {
 	V3D_TFU,
 	V3D_CSD,
 	V3D_CACHE_CLEAN,
+	V3D_CPU,
 };
 
 /**
@@ -276,6 +279,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
 	DRM_V3D_PARAM_SUPPORTS_PERFMON,
 	DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT,
+	DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE,
 };
 
 struct drm_v3d_get_param {
@@ -361,6 +365,19 @@ struct drm_v3d_submit_csd {
 	__u32 pad;
 };
 
+struct drm_v3d_submit_cpu {
+	/* Pointer to a u32 array of the BOs that are referenced by the job. */
+	__u64 bo_handles;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	__u32 flags;
+
+	/* Pointer to an array of ioctl extensions*/
+	__u64 extensions;
+};
+
 enum {
 	V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
 	V3D_PERFCNT_FEP_VALID_PRIMS,
-- 
GitLab


From c5195d001f4c122032a9ce90c6b88d772673fa35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:31 -0300
Subject: [PATCH 0651/2340] drm/v3d: Use v3d_get_extensions() to parse CPU job
 data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, v3d_get_extensions() only parses multisync data and assigns
it to the `struct v3d_submit_ext`. But, to implement the CPU job with
user extensions, we want v3d_get_extensions() to be able to parse CPU
job data and assign it to the `struct v3d_cpu_job`.

Therefore, allow the function v3d_get_extensions() to use `struct v3d_cpu_job *`
as a parameter. If the `struct v3d_cpu_job *` is assigned to NULL, it means
that the job is a GPU job and CPU job extensions should be rejected.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-10-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_submit.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 918e28b1d00a1..124935547f179 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -323,10 +323,9 @@ fail:
 static int
 v3d_get_multisync_submit_deps(struct drm_file *file_priv,
 			      struct drm_v3d_extension __user *ext,
-			      void *data)
+			      struct v3d_submit_ext *se)
 {
 	struct drm_v3d_multi_sync multisync;
-	struct v3d_submit_ext *se = data;
 	int ret;
 
 	if (se->in_sync_count || se->out_sync_count) {
@@ -340,7 +339,7 @@ v3d_get_multisync_submit_deps(struct drm_file *file_priv,
 	if (multisync.pad)
 		return -EINVAL;
 
-	ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count,
+	ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count,
 					  multisync.out_syncs);
 	if (ret)
 		return ret;
@@ -359,7 +358,8 @@ v3d_get_multisync_submit_deps(struct drm_file *file_priv,
 static int
 v3d_get_extensions(struct drm_file *file_priv,
 		   u64 ext_handles,
-		   void *data)
+		   struct v3d_submit_ext *se,
+		   struct v3d_cpu_job *job)
 {
 	struct drm_v3d_extension __user *user_ext;
 	int ret;
@@ -375,15 +375,16 @@ v3d_get_extensions(struct drm_file *file_priv,
 
 		switch (ext.id) {
 		case DRM_V3D_EXT_ID_MULTI_SYNC:
-			ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data);
-			if (ret)
-				return ret;
+			ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se);
 			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
 		}
 
+		if (ret)
+			return ret;
+
 		user_ext = u64_to_user_ptr(ext.next);
 	}
 
@@ -430,7 +431,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	}
 
 	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL);
 		if (ret) {
 			DRM_DEBUG("Failed to get extensions.\n");
 			return ret;
@@ -585,7 +586,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	}
 
 	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL);
 		if (ret) {
 			DRM_DEBUG("Failed to get extensions.\n");
 			return ret;
@@ -692,7 +693,7 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 	}
 
 	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL);
 		if (ret) {
 			DRM_DEBUG("Failed to get extensions.\n");
 			return ret;
@@ -805,7 +806,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 		return ret;
 
 	if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
-		ret = v3d_get_extensions(file_priv, args->extensions, &se);
+		ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job);
 		if (ret) {
 			DRM_DEBUG("Failed to get extensions.\n");
 			goto fail;
-- 
GitLab


From 1fe0879efc8f623816c7a825d853d2140c88cb2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:32 -0300
Subject: [PATCH 0652/2340] drm/v3d: Create tracepoints to track the CPU job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create tracepoints to track the three major events of a CPU job
lifetime:
	1. Submission of a `v3d_submit_cpu` IOCTL
	2. Beginning of the execution of a CPU job
	3. Ending of the execution of a CPU job

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-11-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_sched.c  |  4 +++
 drivers/gpu/drm/v3d/v3d_submit.c |  2 ++
 drivers/gpu/drm/v3d/v3d_trace.h  | 57 ++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index c89e92fc614cf..ebbd00840a73c 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -288,8 +288,12 @@ v3d_cpu_job_run(struct drm_sched_job *sched_job)
 	file->start_ns[V3D_CPU] = local_clock();
 	v3d->queue[V3D_CPU].start_ns = file->start_ns[V3D_CPU];
 
+	trace_v3d_cpu_job_begin(&v3d->drm, job->job_type);
+
 	cpu_job_function[job->job_type](job);
 
+	trace_v3d_cpu_job_end(&v3d->drm, job->job_type);
+
 	runtime = local_clock() - file->start_ns[V3D_CPU];
 
 	file->enabled_ns[V3D_CPU] += runtime;
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 124935547f179..c134b113b1812 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -824,6 +824,8 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 		goto fail;
 	}
 
+	trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type);
+
 	ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
 			   v3d_job_free, 0, &se, V3D_CPU);
 	if (ret)
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
index 7aa8dc356e54c..5917b94148f5a 100644
--- a/drivers/gpu/drm/v3d/v3d_trace.h
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -225,6 +225,63 @@ TRACE_EVENT(v3d_submit_csd,
 		      __entry->seqno)
 );
 
+TRACE_EVENT(v3d_submit_cpu_ioctl,
+	    TP_PROTO(struct drm_device *dev, enum v3d_cpu_job_type job_type),
+	    TP_ARGS(dev, job_type),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(enum v3d_cpu_job_type, job_type)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->job_type = job_type;
+			   ),
+
+	    TP_printk("dev=%u, job_type=%d",
+		      __entry->dev,
+		      __entry->job_type)
+);
+
+TRACE_EVENT(v3d_cpu_job_begin,
+	    TP_PROTO(struct drm_device *dev, enum v3d_cpu_job_type job_type),
+	    TP_ARGS(dev, job_type),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(enum v3d_cpu_job_type, job_type)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->job_type = job_type;
+			   ),
+
+	    TP_printk("dev=%u, job_type=%d",
+		      __entry->dev,
+		      __entry->job_type)
+);
+
+TRACE_EVENT(v3d_cpu_job_end,
+	    TP_PROTO(struct drm_device *dev, enum v3d_cpu_job_type job_type),
+	    TP_ARGS(dev, job_type),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(enum v3d_cpu_job_type, job_type)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->job_type = job_type;
+			   ),
+
+	    TP_printk("dev=%u, job_type=%d",
+		      __entry->dev,
+		      __entry->job_type)
+);
+
 TRACE_EVENT(v3d_cache_clean_begin,
 	    TP_PROTO(struct drm_device *dev),
 	    TP_ARGS(dev),
-- 
GitLab


From 369b05961731925e4a43608ea1e7884df200f0bd Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 30 Nov 2023 13:40:33 -0300
Subject: [PATCH 0653/2340] drm/v3d: Detach the CSD job BO setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Detach CSD job setup from CSD submission ioctl to reuse it in CPU
submission ioctl for indirect CSD job.

Signed-off-by: Melissa Wen <mwen@igalia.com>
Co-developed-by: Maíra Canal <mcanal@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-12-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_submit.c | 68 ++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index c134b113b1812..eb26fe1e27e3a 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -256,6 +256,45 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
 	}
 }
 
+static int
+v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv,
+			   struct v3d_dev *v3d,
+			   struct drm_v3d_submit_csd *args,
+			   struct v3d_csd_job **job,
+			   struct v3d_job **clean_job,
+			   struct v3d_submit_ext *se,
+			   struct ww_acquire_ctx *acquire_ctx)
+{
+	int ret;
+
+	ret = v3d_job_allocate((void *)job, sizeof(**job));
+	if (ret)
+		return ret;
+
+	ret = v3d_job_init(v3d, file_priv, &(*job)->base,
+			   v3d_job_free, args->in_sync, se, V3D_CSD);
+	if (ret)
+		return ret;
+
+	ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job));
+	if (ret)
+		return ret;
+
+	ret = v3d_job_init(v3d, file_priv, *clean_job,
+			   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
+	if (ret)
+		return ret;
+
+	(*job)->args = *args;
+
+	ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job,
+			     args->bo_handles, args->bo_handle_count);
+	if (ret)
+		return ret;
+
+	return v3d_lock_bo_reservations(*clean_job, acquire_ctx);
+}
+
 static void
 v3d_put_multisync_post_deps(struct v3d_submit_ext *se)
 {
@@ -700,32 +739,9 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = v3d_job_allocate((void *)&job, sizeof(*job));
-	if (ret)
-		return ret;
-
-	ret = v3d_job_init(v3d, file_priv, &job->base,
-			   v3d_job_free, args->in_sync, &se, V3D_CSD);
-	if (ret)
-		goto fail;
-
-	ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job));
-	if (ret)
-		goto fail;
-
-	ret = v3d_job_init(v3d, file_priv, clean_job,
-			   v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
-	if (ret)
-		goto fail;
-
-	job->args = *args;
-
-	ret = v3d_lookup_bos(dev, file_priv, clean_job,
-			     args->bo_handles, args->bo_handle_count);
-	if (ret)
-		goto fail;
-
-	ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
+	ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args,
+					 &job, &clean_job, &se,
+					 &acquire_ctx);
 	if (ret)
 		goto fail;
 
-- 
GitLab


From 7c13132c4073628b5fe23b5188ac583a2882a6b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:34 -0300
Subject: [PATCH 0654/2340] drm/v3d: Enable BO mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For the indirect CSD CPU job, we will need to access the internal
contents of the BO with the dispatch parameters. Therefore, create
methods to allow the mapping and unmapping of the BO.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-13-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_bo.c  | 18 ++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_drv.h |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index 357a0da7e16aa..1bdfac8beafdf 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -33,6 +33,9 @@ void v3d_free_object(struct drm_gem_object *obj)
 	struct v3d_dev *v3d = to_v3d_dev(obj->dev);
 	struct v3d_bo *bo = to_v3d_bo(obj);
 
+	if (bo->vaddr)
+		v3d_put_bo_vaddr(bo);
+
 	v3d_mmu_remove_ptes(bo);
 
 	mutex_lock(&v3d->bo_lock);
@@ -134,6 +137,7 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
 	if (IS_ERR(shmem_obj))
 		return ERR_CAST(shmem_obj);
 	bo = to_v3d_bo(&shmem_obj->base);
+	bo->vaddr = NULL;
 
 	ret = v3d_bo_create_finish(&shmem_obj->base);
 	if (ret)
@@ -167,6 +171,20 @@ v3d_prime_import_sg_table(struct drm_device *dev,
 	return obj;
 }
 
+void v3d_get_bo_vaddr(struct v3d_bo *bo)
+{
+	struct drm_gem_shmem_object *obj = &bo->base;
+
+	bo->vaddr = vmap(obj->pages, obj->base.size >> PAGE_SHIFT, VM_MAP,
+			 pgprot_writecombine(PAGE_KERNEL));
+}
+
+void v3d_put_bo_vaddr(struct v3d_bo *bo)
+{
+	vunmap(bo->vaddr);
+	bo->vaddr = NULL;
+}
+
 int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 2246a0e29955a..39d62915cdd62 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -202,6 +202,8 @@ struct v3d_bo {
 	 * v3d_render_job->unref_list
 	 */
 	struct list_head unref_head;
+
+	void *vaddr;
 };
 
 static inline struct v3d_bo *
@@ -391,6 +393,8 @@ struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size);
 void v3d_free_object(struct drm_gem_object *gem_obj);
 struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
 			     size_t size);
+void v3d_get_bo_vaddr(struct v3d_bo *bo);
+void v3d_put_bo_vaddr(struct v3d_bo *bo);
 int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
-- 
GitLab


From 18b8413b25b7070fa2e55858a2c808e6909581d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:35 -0300
Subject: [PATCH 0655/2340] drm/v3d: Create a CPU job extension for a indirect
 CSD job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CPU job is a type of job that performs operations that requires CPU
intervention. An indirect CSD job is a job that, when executed in the
queue, will map the indirect buffer, read the dispatch parameters, and
submit a regular dispatch. Therefore, it is a job that needs CPU
intervention.

So, create a user extension for the CPU job that enables the creation
of an indirect CSD. This user extension will allow the creation of a CSD
job linked to a CPU job. The CPU job will wait for the indirect CSD job
dependencies and, once they are signaled, it will update the CSD job
parameters.

Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-14-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h    |  31 ++++++++-
 drivers/gpu/drm/v3d/v3d_sched.c  |  41 +++++++++++-
 drivers/gpu/drm/v3d/v3d_submit.c | 104 ++++++++++++++++++++++++++++++-
 include/uapi/drm/v3d_drm.h       |  43 ++++++++++++-
 4 files changed, 213 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 39d62915cdd62..202c0d4b04a5f 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -316,12 +316,41 @@ struct v3d_csd_job {
 	struct drm_v3d_submit_csd args;
 };
 
-enum v3d_cpu_job_type {};
+enum v3d_cpu_job_type {
+	V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
+};
+
+struct v3d_indirect_csd_info {
+	/* Indirect CSD */
+	struct v3d_csd_job *job;
+
+	/* Clean cache job associated to the Indirect CSD job */
+	struct v3d_job *clean_job;
+
+	/* Offset within the BO where the workgroup counts are stored */
+	u32 offset;
+
+	/* Workgroups size */
+	u32 wg_size;
+
+	/* Indices of the uniforms with the workgroup dispatch counts
+	 * in the uniform stream.
+	 */
+	u32 wg_uniform_offsets[3];
+
+	/* Indirect BO */
+	struct drm_gem_object *indirect;
+
+	/* Context of the Indirect CSD job */
+	struct ww_acquire_ctx acquire_ctx;
+};
 
 struct v3d_cpu_job {
 	struct v3d_job base;
 
 	enum v3d_cpu_job_type job_type;
+
+	struct v3d_indirect_csd_info indirect_csd;
 };
 
 typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index ebbd00840a73c..307257ab0e4a2 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -25,6 +25,8 @@
 #include "v3d_regs.h"
 #include "v3d_trace.h"
 
+#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
+
 static struct v3d_job *
 to_v3d_job(struct drm_sched_job *sched_job)
 {
@@ -268,7 +270,44 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 	return fence;
 }
 
-static const v3d_cpu_job_fn cpu_job_function[] = { };
+static void
+v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
+{
+	struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd;
+	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+	struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
+	struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
+	u32 *wg_counts;
+
+	v3d_get_bo_vaddr(bo);
+	v3d_get_bo_vaddr(indirect);
+
+	wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset);
+
+	if (wg_counts[0] == 0 || wg_counts[1] == 0 || wg_counts[2] == 0)
+		return;
+
+	args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
+	args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
+	args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
+	args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
+		       (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
+
+	for (int i = 0; i < 3; i++) {
+		/* 0xffffffff indicates that the uniform rewrite is not needed */
+		if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) {
+			u32 uniform_idx = indirect_csd->wg_uniform_offsets[i];
+			((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i];
+		}
+	}
+
+	v3d_put_bo_vaddr(indirect);
+	v3d_put_bo_vaddr(bo);
+}
+
+static const v3d_cpu_job_fn cpu_job_function[] = {
+	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
+};
 
 static struct dma_fence *
 v3d_cpu_job_run(struct drm_sched_job *sched_job)
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index eb26fe1e27e3a..0320695b941b7 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -391,6 +391,48 @@ v3d_get_multisync_submit_deps(struct drm_file *file_priv,
 	return 0;
 }
 
+/* Get data for the indirect CSD job submission. */
+static int
+v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv,
+				struct drm_v3d_extension __user *ext,
+				struct v3d_cpu_job *job)
+{
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct v3d_dev *v3d = v3d_priv->v3d;
+	struct drm_v3d_indirect_csd indirect_csd;
+	struct v3d_indirect_csd_info *info = &job->indirect_csd;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd)))
+		return -EFAULT;
+
+	if (!v3d_has_csd(v3d)) {
+		DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n");
+		return -EINVAL;
+	}
+
+	job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD;
+	info->offset = indirect_csd.offset;
+	info->wg_size = indirect_csd.wg_size;
+	memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets,
+	       sizeof(indirect_csd.wg_uniform_offsets));
+
+	info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect);
+
+	return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit,
+					  &info->job, &info->clean_job,
+					  NULL, &info->acquire_ctx);
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -416,6 +458,9 @@ v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_MULTI_SYNC:
 			ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se);
 			break;
+		case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD:
+			ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -790,7 +835,9 @@ fail:
 	return ret;
 }
 
-static const unsigned int cpu_job_bo_handle_count[] = { };
+static const unsigned int cpu_job_bo_handle_count[] = {
+	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1,
+};
 
 /**
  * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D.
@@ -808,7 +855,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct drm_v3d_submit_cpu *args = data;
 	struct v3d_submit_ext se = {0};
+	struct v3d_submit_ext *out_se = NULL;
 	struct v3d_cpu_job *cpu_job = NULL;
+	struct v3d_csd_job *csd_job = NULL;
+	struct v3d_job *clean_job = NULL;
 	struct ww_acquire_ctx acquire_ctx;
 	int ret;
 
@@ -847,6 +897,9 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto fail;
 
+	clean_job = cpu_job->indirect_csd.clean_job;
+	csd_job = cpu_job->indirect_csd.job;
+
 	if (args->bo_handle_count) {
 		ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base,
 				     args->bo_handles, args->bo_handle_count);
@@ -860,19 +913,66 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 
 	mutex_lock(&v3d->sched_lock);
 	v3d_push_job(&cpu_job->base);
+
+	switch (cpu_job->job_type) {
+	case V3D_CPU_JOB_TYPE_INDIRECT_CSD:
+		ret = drm_sched_job_add_dependency(&csd_job->base.base,
+						   dma_fence_get(cpu_job->base.done_fence));
+		if (ret)
+			goto fail_unreserve;
+
+		v3d_push_job(&csd_job->base);
+
+		ret = drm_sched_job_add_dependency(&clean_job->base,
+						   dma_fence_get(csd_job->base.done_fence));
+		if (ret)
+			goto fail_unreserve;
+
+		v3d_push_job(clean_job);
+
+		break;
+	default:
+		break;
+	}
 	mutex_unlock(&v3d->sched_lock);
 
+	out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se;
+
 	v3d_attach_fences_and_unlock_reservation(file_priv,
 						 &cpu_job->base,
 						 &acquire_ctx, 0,
-						 NULL, cpu_job->base.done_fence);
+						 out_se, cpu_job->base.done_fence);
+
+	switch (cpu_job->job_type) {
+	case V3D_CPU_JOB_TYPE_INDIRECT_CSD:
+		v3d_attach_fences_and_unlock_reservation(file_priv,
+							 clean_job,
+							 &cpu_job->indirect_csd.acquire_ctx,
+							 0, &se, clean_job->done_fence);
+		break;
+	default:
+		break;
+	}
 
 	v3d_job_put(&cpu_job->base);
+	v3d_job_put(&csd_job->base);
+	v3d_job_put(clean_job);
 
 	return 0;
 
+fail_unreserve:
+	mutex_unlock(&v3d->sched_lock);
+
+	drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count,
+				    &acquire_ctx);
+
+	drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
+				    &cpu_job->indirect_csd.acquire_ctx);
+
 fail:
 	v3d_job_cleanup((void *)cpu_job);
+	v3d_job_cleanup((void *)csd_job);
+	v3d_job_cleanup(clean_job);
 	v3d_put_multisync_post_deps(&se);
 
 	return ret;
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 00abef9d0db73..0c0f477825282 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -71,7 +71,8 @@ extern "C" {
 struct drm_v3d_extension {
 	__u64 next;
 	__u32 id;
-#define DRM_V3D_EXT_ID_MULTI_SYNC		0x01
+#define DRM_V3D_EXT_ID_MULTI_SYNC			0x01
+#define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD		0x02
 	__u32 flags; /* mbz */
 };
 
@@ -365,8 +366,46 @@ struct drm_v3d_submit_csd {
 	__u32 pad;
 };
 
+/**
+ * struct drm_v3d_indirect_csd - ioctl extension for the CPU job to create an
+ * indirect CSD
+ *
+ * When an extension of DRM_V3D_EXT_ID_CPU_INDIRECT_CSD id is defined, it
+ * points to this extension to define a indirect CSD submission. It creates a
+ * CPU job linked to a CSD job. The CPU job waits for the indirect CSD
+ * dependencies and, once they are signaled, it updates the CSD job config
+ * before allowing the CSD job execution.
+ */
+struct drm_v3d_indirect_csd {
+	struct drm_v3d_extension base;
+
+	/* Indirect CSD */
+	struct drm_v3d_submit_csd submit;
+
+	/* Handle of the indirect BO, that should be also attached to the
+	 * indirect CSD.
+	 */
+	__u32 indirect;
+
+	/* Offset within the BO where the workgroup counts are stored */
+	__u32 offset;
+
+	/* Workgroups size */
+	__u32 wg_size;
+
+	/* Indices of the uniforms with the workgroup dispatch counts
+	 * in the uniform stream. If the uniform rewrite is not needed,
+	 * the offset must be 0xffffffff.
+	 */
+	__u32 wg_uniform_offsets[3];
+};
+
 struct drm_v3d_submit_cpu {
-	/* Pointer to a u32 array of the BOs that are referenced by the job. */
+	/* Pointer to a u32 array of the BOs that are referenced by the job.
+	 *
+	 * For DRM_V3D_EXT_ID_CPU_INDIRECT_CSD, it must contain only one BO,
+	 * that contains the workgroup counts.
+	 */
 	__u64 bo_handles;
 
 	/* Number of BO handles passed in (size is that times 4). */
-- 
GitLab


From 9ba0ff3e083f6a4a0b6698f06bfff74805fefa5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:36 -0300
Subject: [PATCH 0656/2340] drm/v3d: Create a CPU job extension for the
 timestamp query job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CPU job is a type of job that performs operations that requires CPU
intervention. A timestamp query job is a job that calculates the
query timestamp and updates the query availability by signaling a
syncobj. As V3D doesn't provide any mechanism to obtain a timestamp
from the GPU, it is a job that needs CPU intervention.

So, create a user extension for the CPU job that enables the creation
of a timestamp query job. This user extension will allow the creation of
a CPU job that performs the timestamp query calculation and updates the
timestamp BO with the proper value.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-15-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h    | 17 +++++++++
 drivers/gpu/drm/v3d/v3d_sched.c  | 40 +++++++++++++++++++-
 drivers/gpu/drm/v3d/v3d_submit.c | 63 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 30 +++++++++++++++
 4 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 202c0d4b04a5f..dd86e745c2608 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -318,6 +318,15 @@ struct v3d_csd_job {
 
 enum v3d_cpu_job_type {
 	V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
+	V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
+};
+
+struct v3d_timestamp_query {
+	/* Offset of this query in the timestamp BO for its value. */
+	u32 offset;
+
+	/* Syncobj that indicates the timestamp availability */
+	struct drm_syncobj *syncobj;
 };
 
 struct v3d_indirect_csd_info {
@@ -345,12 +354,20 @@ struct v3d_indirect_csd_info {
 	struct ww_acquire_ctx acquire_ctx;
 };
 
+struct v3d_timestamp_query_info {
+	struct v3d_timestamp_query *queries;
+
+	u32 count;
+};
+
 struct v3d_cpu_job {
 	struct v3d_job base;
 
 	enum v3d_cpu_job_type job_type;
 
 	struct v3d_indirect_csd_info indirect_csd;
+
+	struct v3d_timestamp_query_info timestamp_query;
 };
 
 typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 307257ab0e4a2..5e692f0871050 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -21,6 +21,8 @@
 #include <linux/sched/clock.h>
 #include <linux/kthread.h>
 
+#include <drm/drm_syncobj.h>
+
 #include "v3d_drv.h"
 #include "v3d_regs.h"
 #include "v3d_trace.h"
@@ -71,6 +73,21 @@ v3d_sched_job_free(struct drm_sched_job *sched_job)
 	v3d_job_cleanup(job);
 }
 
+static void
+v3d_cpu_job_free(struct drm_sched_job *sched_job)
+{
+	struct v3d_cpu_job *job = to_cpu_job(sched_job);
+	struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
+
+	if (timestamp_query->queries) {
+		for (int i = 0; i < timestamp_query->count; i++)
+			drm_syncobj_put(timestamp_query->queries[i].syncobj);
+		kvfree(timestamp_query->queries);
+	}
+
+	v3d_job_cleanup(&job->base);
+}
+
 static void
 v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
 {
@@ -305,8 +322,29 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
 	v3d_put_bo_vaddr(bo);
 }
 
+static void
+v3d_timestamp_query(struct v3d_cpu_job *job)
+{
+	struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
+	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+	u8 *value_addr;
+
+	v3d_get_bo_vaddr(bo);
+
+	for (int i = 0; i < timestamp_query->count; i++) {
+		value_addr = ((u8 *)bo->vaddr) + timestamp_query->queries[i].offset;
+		*((u64 *)value_addr) = i == 0 ? ktime_get_ns() : 0ull;
+
+		drm_syncobj_replace_fence(timestamp_query->queries[i].syncobj,
+					  job->base.done_fence);
+	}
+
+	v3d_put_bo_vaddr(bo);
+}
+
 static const v3d_cpu_job_fn cpu_job_function[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
+	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
 };
 
 static struct dma_fence *
@@ -504,7 +542,7 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
 static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
 	.run_job = v3d_cpu_job_run,
 	.timedout_job = v3d_generic_job_timedout,
-	.free_job = v3d_sched_job_free
+	.free_job = v3d_cpu_job_free
 };
 
 int
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 0320695b941b7..83e029e786eaf 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -433,6 +433,64 @@ v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv,
 					  NULL, &info->acquire_ctx);
 }
 
+/* Get data for the query timestamp job submission. */
+static int
+v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv,
+				   struct drm_v3d_extension __user *ext,
+				   struct v3d_cpu_job *job)
+{
+	u32 __user *offsets, *syncs;
+	struct drm_v3d_timestamp_query timestamp;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&timestamp, ext, sizeof(timestamp)))
+		return -EFAULT;
+
+	if (timestamp.pad)
+		return -EINVAL;
+
+	job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY;
+
+	job->timestamp_query.queries = kvmalloc_array(timestamp.count,
+						      sizeof(struct v3d_timestamp_query),
+						      GFP_KERNEL);
+	if (!job->timestamp_query.queries)
+		return -ENOMEM;
+
+	offsets = u64_to_user_ptr(timestamp.offsets);
+	syncs = u64_to_user_ptr(timestamp.syncs);
+
+	for (int i = 0; i < timestamp.count; i++) {
+		u32 offset, sync;
+
+		if (copy_from_user(&offset, offsets++, sizeof(offset))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].offset = offset;
+
+		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+	}
+	job->timestamp_query.count = timestamp.count;
+
+	return 0;
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -461,6 +519,9 @@ v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD:
 			ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job);
 			break;
+		case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY:
+			ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -837,6 +898,7 @@ fail:
 
 static const unsigned int cpu_job_bo_handle_count[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1,
+	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1,
 };
 
 /**
@@ -974,6 +1036,7 @@ fail:
 	v3d_job_cleanup((void *)csd_job);
 	v3d_job_cleanup(clean_job);
 	v3d_put_multisync_post_deps(&se);
+	kvfree(cpu_job->timestamp_query.queries);
 
 	return ret;
 }
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 0c0f477825282..239801b5e1174 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -73,6 +73,7 @@ struct drm_v3d_extension {
 	__u32 id;
 #define DRM_V3D_EXT_ID_MULTI_SYNC			0x01
 #define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD		0x02
+#define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY		0x03
 	__u32 flags; /* mbz */
 };
 
@@ -400,11 +401,40 @@ struct drm_v3d_indirect_csd {
 	__u32 wg_uniform_offsets[3];
 };
 
+/**
+ * struct drm_v3d_timestamp_query - ioctl extension for the CPU job to calculate
+ * a timestamp query
+ *
+ * When an extension DRM_V3D_EXT_ID_TIMESTAMP_QUERY is defined, it points to
+ * this extension to define a timestamp query submission. This CPU job will
+ * calculate the timestamp query and update the query value within the
+ * timestamp BO. Moreover, it will signal the timestamp syncobj to indicate
+ * query availability.
+ */
+struct drm_v3d_timestamp_query {
+	struct drm_v3d_extension base;
+
+	/* Array of queries' offsets within the timestamp BO for their value */
+	__u64 offsets;
+
+	/* Array of timestamp's syncobjs to indicate its availability */
+	__u64 syncs;
+
+	/* Number of queries */
+	__u32 count;
+
+	/* mbz */
+	__u32 pad;
+};
+
 struct drm_v3d_submit_cpu {
 	/* Pointer to a u32 array of the BOs that are referenced by the job.
 	 *
 	 * For DRM_V3D_EXT_ID_CPU_INDIRECT_CSD, it must contain only one BO,
 	 * that contains the workgroup counts.
+	 *
+	 * For DRM_V3D_EXT_ID_TIMESTAMP_QUERY, it must contain only one BO,
+	 * that will contain the timestamp.
 	 */
 	__u64 bo_handles;
 
-- 
GitLab


From 34a101e64296c736b14ce27e647fcebd70cb7bf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:37 -0300
Subject: [PATCH 0657/2340] drm/v3d: Create a CPU job extension for the reset
 timestamp job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CPU job is a type of job that performs operations that requires CPU
intervention. A reset timestamp job is a job that resets the timestamp
queries based on the value offset of the first query. As V3D doesn't
provide any mechanism to obtain a timestamp from the GPU, it is a job
that needs CPU intervention.

So, create a user extension for the CPU job that enables the creation
of a reset timestamp job. This user extension will allow the creation of
a CPU job that resets the timestamp value in the timestamp BO and resets
the availability syncobj.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-16-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h    |  1 +
 drivers/gpu/drm/v3d/v3d_sched.c  | 21 +++++++++++++
 drivers/gpu/drm/v3d/v3d_submit.c | 52 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 27 +++++++++++++++++
 4 files changed, 101 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index dd86e745c2608..3988407635ed8 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -319,6 +319,7 @@ struct v3d_csd_job {
 enum v3d_cpu_job_type {
 	V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
 	V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
+	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
 };
 
 struct v3d_timestamp_query {
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 5e692f0871050..e287f42d3621d 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -342,9 +342,30 @@ v3d_timestamp_query(struct v3d_cpu_job *job)
 	v3d_put_bo_vaddr(bo);
 }
 
+static void
+v3d_reset_timestamp_queries(struct v3d_cpu_job *job)
+{
+	struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
+	struct v3d_timestamp_query *queries = timestamp_query->queries;
+	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+	u8 *value_addr;
+
+	v3d_get_bo_vaddr(bo);
+
+	for (int i = 0; i < timestamp_query->count; i++) {
+		value_addr = ((u8 *)bo->vaddr) + queries[i].offset;
+		*((u64 *)value_addr) = 0;
+
+		drm_syncobj_replace_fence(queries[i].syncobj, NULL);
+	}
+
+	v3d_put_bo_vaddr(bo);
+}
+
 static const v3d_cpu_job_fn cpu_job_function[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
+	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
 };
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 83e029e786eaf..1c719416e26a5 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -491,6 +491,54 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv,
 	return 0;
 }
 
+static int
+v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv,
+				   struct drm_v3d_extension __user *ext,
+				   struct v3d_cpu_job *job)
+{
+	u32 __user *syncs;
+	struct drm_v3d_reset_timestamp_query reset;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&reset, ext, sizeof(reset)))
+		return -EFAULT;
+
+	job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY;
+
+	job->timestamp_query.queries = kvmalloc_array(reset.count,
+						      sizeof(struct v3d_timestamp_query),
+						      GFP_KERNEL);
+	if (!job->timestamp_query.queries)
+		return -ENOMEM;
+
+	syncs = u64_to_user_ptr(reset.syncs);
+
+	for (int i = 0; i < reset.count; i++) {
+		u32 sync;
+
+		job->timestamp_query.queries[i].offset = reset.offset + 8 * i;
+
+		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+	}
+	job->timestamp_query.count = reset.count;
+
+	return 0;
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -522,6 +570,9 @@ v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY:
 			ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job);
 			break;
+		case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY:
+			ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -899,6 +950,7 @@ fail:
 static const unsigned int cpu_job_bo_handle_count[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1,
+	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
 };
 
 /**
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 239801b5e1174..930f8d07f088e 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -74,6 +74,7 @@ struct drm_v3d_extension {
 #define DRM_V3D_EXT_ID_MULTI_SYNC			0x01
 #define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD		0x02
 #define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY		0x03
+#define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY	0x04
 	__u32 flags; /* mbz */
 };
 
@@ -427,6 +428,29 @@ struct drm_v3d_timestamp_query {
 	__u32 pad;
 };
 
+/**
+ * struct drm_v3d_reset_timestamp_query - ioctl extension for the CPU job to
+ * reset timestamp queries
+ *
+ * When an extension DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY is defined, it
+ * points to this extension to define a reset timestamp submission. This CPU
+ * job will reset the timestamp queries based on value offset of the first
+ * query. Moreover, it will reset the timestamp syncobj to reset query
+ * availability.
+ */
+struct drm_v3d_reset_timestamp_query {
+	struct drm_v3d_extension base;
+
+	/* Array of timestamp's syncobjs to indicate its availability */
+	__u64 syncs;
+
+	/* Offset of the first query within the timestamp BO for its value */
+	__u32 offset;
+
+	/* Number of queries */
+	__u32 count;
+};
+
 struct drm_v3d_submit_cpu {
 	/* Pointer to a u32 array of the BOs that are referenced by the job.
 	 *
@@ -435,6 +459,9 @@ struct drm_v3d_submit_cpu {
 	 *
 	 * For DRM_V3D_EXT_ID_TIMESTAMP_QUERY, it must contain only one BO,
 	 * that will contain the timestamp.
+	 *
+	 * For DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY, it must contain only
+	 * one BO, that contains the timestamp.
 	 */
 	__u64 bo_handles;
 
-- 
GitLab


From 6745f3e44a20ac18e7e5a40a3c7f62225983d544 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:38 -0300
Subject: [PATCH 0658/2340] drm/v3d: Create a CPU job extension to copy
 timestamp query to a buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CPU job is a type of job that performs operations that requires CPU
intervention. A copy timestamp query job is a job that copy the complete
or partial result of a query to a buffer. As V3D doesn't provide any
mechanism to obtain a timestamp from the GPU, it is a job that needs
CPU intervention.

So, create a user extension for the CPU job that enables the creation
of a copy timestamp query job. This user extension will allow the creation
of a CPU job that copy the results of a timestamp query to a BO with the
possibility to indicate the timestamp availability with a availability bit.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-17-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h    | 20 +++++++++
 drivers/gpu/drm/v3d/v3d_sched.c  | 56 ++++++++++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_submit.c | 69 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 45 +++++++++++++++++++++
 4 files changed, 190 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 3988407635ed8..5058a354fffda 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -320,6 +320,7 @@ enum v3d_cpu_job_type {
 	V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
 	V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
 	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
+	V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
 };
 
 struct v3d_timestamp_query {
@@ -361,6 +362,23 @@ struct v3d_timestamp_query_info {
 	u32 count;
 };
 
+struct v3d_copy_query_results_info {
+	/* Define if should write to buffer using 64 or 32 bits */
+	bool do_64bit;
+
+	/* Define if it can write to buffer even if the query is not available */
+	bool do_partial;
+
+	/* Define if it should write availability bit to buffer */
+	bool availability_bit;
+
+	/* Offset of the copy buffer in the BO */
+	u32 offset;
+
+	/* Stride of the copy buffer in the BO */
+	u32 stride;
+};
+
 struct v3d_cpu_job {
 	struct v3d_job base;
 
@@ -369,6 +387,8 @@ struct v3d_cpu_job {
 	struct v3d_indirect_csd_info indirect_csd;
 
 	struct v3d_timestamp_query_info timestamp_query;
+
+	struct v3d_copy_query_results_info copy;
 };
 
 typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index e287f42d3621d..b1662d32a929a 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -362,10 +362,66 @@ v3d_reset_timestamp_queries(struct v3d_cpu_job *job)
 	v3d_put_bo_vaddr(bo);
 }
 
+static void
+write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value)
+{
+	if (do_64bit) {
+		u64 *dst64 = (u64 *)dst;
+
+		dst64[idx] = value;
+	} else {
+		u32 *dst32 = (u32 *)dst;
+
+		dst32[idx] = (u32)value;
+	}
+}
+
+static void
+v3d_copy_query_results(struct v3d_cpu_job *job)
+{
+	struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
+	struct v3d_timestamp_query *queries = timestamp_query->queries;
+	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+	struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]);
+	struct v3d_copy_query_results_info *copy = &job->copy;
+	struct dma_fence *fence;
+	u8 *query_addr;
+	bool available, write_result;
+	u8 *data;
+	int i;
+
+	v3d_get_bo_vaddr(bo);
+	v3d_get_bo_vaddr(timestamp);
+
+	data = ((u8 *)bo->vaddr) + copy->offset;
+
+	for (i = 0; i < timestamp_query->count; i++) {
+		fence = drm_syncobj_fence_get(queries[i].syncobj);
+		available = fence ? dma_fence_is_signaled(fence) : false;
+
+		write_result = available || copy->do_partial;
+		if (write_result) {
+			query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset;
+			write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr));
+		}
+
+		if (copy->availability_bit)
+			write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u);
+
+		data += copy->stride;
+
+		dma_fence_put(fence);
+	}
+
+	v3d_put_bo_vaddr(timestamp);
+	v3d_put_bo_vaddr(bo);
+}
+
 static const v3d_cpu_job_fn cpu_job_function[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
+	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
 };
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 1c719416e26a5..bafd49c6440cb 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -539,6 +539,71 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv,
 	return 0;
 }
 
+/* Get data for the copy timestamp query results job submission. */
+static int
+v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv,
+				      struct drm_v3d_extension __user *ext,
+				      struct v3d_cpu_job *job)
+{
+	u32 __user *offsets, *syncs;
+	struct drm_v3d_copy_timestamp_query copy;
+	int i;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&copy, ext, sizeof(copy)))
+		return -EFAULT;
+
+	if (copy.pad)
+		return -EINVAL;
+
+	job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY;
+
+	job->timestamp_query.queries = kvmalloc_array(copy.count,
+						      sizeof(struct v3d_timestamp_query),
+						      GFP_KERNEL);
+	if (!job->timestamp_query.queries)
+		return -ENOMEM;
+
+	offsets = u64_to_user_ptr(copy.offsets);
+	syncs = u64_to_user_ptr(copy.syncs);
+
+	for (i = 0; i < copy.count; i++) {
+		u32 offset, sync;
+
+		if (copy_from_user(&offset, offsets++, sizeof(offset))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].offset = offset;
+
+		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+	}
+	job->timestamp_query.count = copy.count;
+
+	job->copy.do_64bit = copy.do_64bit;
+	job->copy.do_partial = copy.do_partial;
+	job->copy.availability_bit = copy.availability_bit;
+	job->copy.offset = copy.offset;
+	job->copy.stride = copy.stride;
+
+	return 0;
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -573,6 +638,9 @@ v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY:
 			ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job);
 			break;
+		case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY:
+			ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -951,6 +1019,7 @@ static const unsigned int cpu_job_bo_handle_count[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
+	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2,
 };
 
 /**
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 930f8d07f088e..f7defbf3d0436 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -75,6 +75,7 @@ struct drm_v3d_extension {
 #define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD		0x02
 #define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY		0x03
 #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY	0x04
+#define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY	0x05
 	__u32 flags; /* mbz */
 };
 
@@ -451,6 +452,46 @@ struct drm_v3d_reset_timestamp_query {
 	__u32 count;
 };
 
+/**
+ * struct drm_v3d_copy_timestamp_query - ioctl extension for the CPU job to copy
+ * query results to a buffer
+ *
+ * When an extension DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY is defined, it
+ * points to this extension to define a copy timestamp query submission. This
+ * CPU job will copy the timestamp queries results to a BO with the offset
+ * and stride defined in the extension.
+ */
+struct drm_v3d_copy_timestamp_query {
+	struct drm_v3d_extension base;
+
+	/* Define if should write to buffer using 64 or 32 bits */
+	__u8 do_64bit;
+
+	/* Define if it can write to buffer even if the query is not available */
+	__u8 do_partial;
+
+	/* Define if it should write availability bit to buffer */
+	__u8 availability_bit;
+
+	/* mbz */
+	__u8 pad;
+
+	/* Offset of the buffer in the BO */
+	__u32 offset;
+
+	/* Stride of the buffer in the BO */
+	__u32 stride;
+
+	/* Number of queries */
+	__u32 count;
+
+	/* Array of queries' offsets within the timestamp BO for their value */
+	__u64 offsets;
+
+	/* Array of timestamp's syncobjs to indicate its availability */
+	__u64 syncs;
+};
+
 struct drm_v3d_submit_cpu {
 	/* Pointer to a u32 array of the BOs that are referenced by the job.
 	 *
@@ -462,6 +503,10 @@ struct drm_v3d_submit_cpu {
 	 *
 	 * For DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY, it must contain only
 	 * one BO, that contains the timestamp.
+	 *
+	 * For DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, it must contain two
+	 * BOs. The first is the BO where the timestamp queries will be written
+	 * to. The second is the BO that contains the timestamp.
 	 */
 	__u64 bo_handles;
 
-- 
GitLab


From bae7cb5d68001a8d4ceec5964dda74bb9aab7220 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:39 -0300
Subject: [PATCH 0659/2340] drm/v3d: Create a CPU job extension for the reset
 performance query job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CPU job is a type of job that performs operations that requires CPU
intervention. A reset performance query job is a job that resets the
performance queries by resetting the values of the perfmons. Moreover,
we also reset the syncobjs related to the availability of the query.

So, create a user extension for the CPU job that enables the creation
of a reset performance job. This user extension will allow the creation of
a CPU job that resets the perfmons values and resets the availability syncobj.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-18-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h    | 28 ++++++++++++
 drivers/gpu/drm/v3d/v3d_sched.c  | 36 ++++++++++++++++
 drivers/gpu/drm/v3d/v3d_submit.c | 73 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 30 +++++++++++++
 4 files changed, 167 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 5058a354fffda..0f7f80ad8d88d 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -321,6 +321,7 @@ enum v3d_cpu_job_type {
 	V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
 	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
 	V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
+	V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY,
 };
 
 struct v3d_timestamp_query {
@@ -331,6 +332,18 @@ struct v3d_timestamp_query {
 	struct drm_syncobj *syncobj;
 };
 
+/* Number of perfmons required to handle all supported performance counters */
+#define V3D_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \
+				      DRM_V3D_MAX_PERF_COUNTERS)
+
+struct v3d_performance_query {
+	/* Performance monitor IDs for this query */
+	u32 kperfmon_ids[V3D_MAX_PERFMONS];
+
+	/* Syncobj that indicates the query availability */
+	struct drm_syncobj *syncobj;
+};
+
 struct v3d_indirect_csd_info {
 	/* Indirect CSD */
 	struct v3d_csd_job *job;
@@ -362,6 +375,19 @@ struct v3d_timestamp_query_info {
 	u32 count;
 };
 
+struct v3d_performance_query_info {
+	struct v3d_performance_query *queries;
+
+	/* Number of performance queries */
+	u32 count;
+
+	/* Number of performance monitors related to that query pool */
+	u32 nperfmons;
+
+	/* Number of performance counters related to that query pool */
+	u32 ncounters;
+};
+
 struct v3d_copy_query_results_info {
 	/* Define if should write to buffer using 64 or 32 bits */
 	bool do_64bit;
@@ -389,6 +415,8 @@ struct v3d_cpu_job {
 	struct v3d_timestamp_query_info timestamp_query;
 
 	struct v3d_copy_query_results_info copy;
+
+	struct v3d_performance_query_info performance_query;
 };
 
 typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index b1662d32a929a..e83d2907bc83b 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -78,6 +78,7 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job)
 {
 	struct v3d_cpu_job *job = to_cpu_job(sched_job);
 	struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
+	struct v3d_performance_query_info *performance_query = &job->performance_query;
 
 	if (timestamp_query->queries) {
 		for (int i = 0; i < timestamp_query->count; i++)
@@ -85,6 +86,12 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job)
 		kvfree(timestamp_query->queries);
 	}
 
+	if (performance_query->queries) {
+		for (int i = 0; i < performance_query->count; i++)
+			drm_syncobj_put(performance_query->queries[i].syncobj);
+		kvfree(performance_query->queries);
+	}
+
 	v3d_job_cleanup(&job->base);
 }
 
@@ -417,11 +424,40 @@ v3d_copy_query_results(struct v3d_cpu_job *job)
 	v3d_put_bo_vaddr(bo);
 }
 
+static void
+v3d_reset_performance_queries(struct v3d_cpu_job *job)
+{
+	struct v3d_performance_query_info *performance_query = &job->performance_query;
+	struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
+	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_perfmon *perfmon;
+
+	for (int i = 0; i < performance_query->count; i++) {
+		for (int j = 0; j < performance_query->nperfmons; j++) {
+			perfmon = v3d_perfmon_find(v3d_priv,
+						   performance_query->queries[i].kperfmon_ids[j]);
+			if (!perfmon) {
+				DRM_DEBUG("Failed to find perfmon.");
+				continue;
+			}
+
+			v3d_perfmon_stop(v3d, perfmon, false);
+
+			memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64));
+
+			v3d_perfmon_put(perfmon);
+		}
+
+		drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL);
+	}
+}
+
 static const v3d_cpu_job_fn cpu_job_function[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
 	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
+	[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries,
 };
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index bafd49c6440cb..20af8ae14831d 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -604,6 +604,74 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv,
 	return 0;
 }
 
+static int
+v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
+				     struct drm_v3d_extension __user *ext,
+				     struct v3d_cpu_job *job)
+{
+	u32 __user *syncs;
+	u64 __user *kperfmon_ids;
+	struct drm_v3d_reset_performance_query reset;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&reset, ext, sizeof(reset)))
+		return -EFAULT;
+
+	job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY;
+
+	job->performance_query.queries = kvmalloc_array(reset.count,
+							sizeof(struct v3d_performance_query),
+							GFP_KERNEL);
+	if (!job->performance_query.queries)
+		return -ENOMEM;
+
+	syncs = u64_to_user_ptr(reset.syncs);
+	kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids);
+
+	for (int i = 0; i < reset.count; i++) {
+		u32 sync;
+		u64 ids;
+		u32 __user *ids_pointer;
+		u32 id;
+
+		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+			kvfree(job->performance_query.queries);
+			return -EFAULT;
+		}
+
+		job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+
+		if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) {
+			kvfree(job->performance_query.queries);
+			return -EFAULT;
+		}
+
+		ids_pointer = u64_to_user_ptr(ids);
+
+		for (int j = 0; j < reset.nperfmons; j++) {
+			if (copy_from_user(&id, ids_pointer++, sizeof(id))) {
+				kvfree(job->performance_query.queries);
+				return -EFAULT;
+			}
+
+			job->performance_query.queries[i].kperfmon_ids[j] = id;
+		}
+	}
+	job->performance_query.count = reset.count;
+	job->performance_query.nperfmons = reset.nperfmons;
+
+	return 0;
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -641,6 +709,9 @@ v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY:
 			ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job);
 			break;
+		case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY:
+			ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -1020,6 +1091,7 @@ static const unsigned int cpu_job_bo_handle_count[] = {
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
 	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2,
+	[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0,
 };
 
 /**
@@ -1158,6 +1230,7 @@ fail:
 	v3d_job_cleanup(clean_job);
 	v3d_put_multisync_post_deps(&se);
 	kvfree(cpu_job->timestamp_query.queries);
+	kvfree(cpu_job->performance_query.queries);
 
 	return ret;
 }
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index f7defbf3d0436..d609bdf29fd6e 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -76,6 +76,7 @@ struct drm_v3d_extension {
 #define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY		0x03
 #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY	0x04
 #define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY	0x05
+#define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY	0x06
 	__u32 flags; /* mbz */
 };
 
@@ -492,6 +493,32 @@ struct drm_v3d_copy_timestamp_query {
 	__u64 syncs;
 };
 
+/**
+ * struct drm_v3d_reset_performance_query - ioctl extension for the CPU job to
+ * reset performance queries
+ *
+ * When an extension DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY is defined, it
+ * points to this extension to define a reset performance submission. This CPU
+ * job will reset the performance queries by resetting the values of the
+ * performance monitors. Moreover, it will reset the syncobj to reset query
+ * availability.
+ */
+struct drm_v3d_reset_performance_query {
+	struct drm_v3d_extension base;
+
+	/* Array of performance queries's syncobjs to indicate its availability */
+	__u64 syncs;
+
+	/* Number of queries */
+	__u32 count;
+
+	/* Number of performance monitors */
+	__u32 nperfmons;
+
+	/* Array of u64 user-pointers that point to an array of kperfmon_ids */
+	__u64 kperfmon_ids;
+};
+
 struct drm_v3d_submit_cpu {
 	/* Pointer to a u32 array of the BOs that are referenced by the job.
 	 *
@@ -507,6 +534,9 @@ struct drm_v3d_submit_cpu {
 	 * For DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, it must contain two
 	 * BOs. The first is the BO where the timestamp queries will be written
 	 * to. The second is the BO that contains the timestamp.
+	 *
+	 * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no
+	 * BOs.
 	 */
 	__u64 bo_handles;
 
-- 
GitLab


From 209e8d2695ee7a67a5b0487bbd1aa75e290d0f41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Thu, 30 Nov 2023 13:40:40 -0300
Subject: [PATCH 0660/2340] drm/v3d: Create a CPU job extension for the copy
 performance query job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CPU job is a type of job that performs operations that requires CPU
intervention. A copy performance query job is a job that copy the complete
or partial result of a query to a buffer. In order to copy the result of
a performance query to a buffer, we need to get the values from the
performance monitors.

So, create a user extension for the CPU job that enables the creation
of a copy performance query job. This user extension will allow the creation
of a CPU job that copy the results of a performance query to a BO with the
possibility to indicate the availability with a availability bit.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-19-mcanal@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.h    |  1 +
 drivers/gpu/drm/v3d/v3d_sched.c  | 65 +++++++++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_submit.c | 82 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 50 +++++++++++++++++++
 4 files changed, 198 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 0f7f80ad8d88d..3c7d588665704 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -322,6 +322,7 @@ enum v3d_cpu_job_type {
 	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
 	V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
 	V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY,
+	V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY,
 };
 
 struct v3d_timestamp_query {
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index e83d2907bc83b..54015ad765c75 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -452,12 +452,77 @@ v3d_reset_performance_queries(struct v3d_cpu_job *job)
 	}
 }
 
+static void
+v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query)
+{
+	struct v3d_performance_query_info *performance_query = &job->performance_query;
+	struct v3d_copy_query_results_info *copy = &job->copy;
+	struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
+	struct v3d_dev *v3d = job->base.v3d;
+	struct v3d_perfmon *perfmon;
+	u64 counter_values[V3D_PERFCNT_NUM];
+
+	for (int i = 0; i < performance_query->nperfmons; i++) {
+		perfmon = v3d_perfmon_find(v3d_priv,
+					   performance_query->queries[query].kperfmon_ids[i]);
+		if (!perfmon) {
+			DRM_DEBUG("Failed to find perfmon.");
+			continue;
+		}
+
+		v3d_perfmon_stop(v3d, perfmon, true);
+
+		memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values,
+		       perfmon->ncounters * sizeof(u64));
+
+		v3d_perfmon_put(perfmon);
+	}
+
+	for (int i = 0; i < performance_query->ncounters; i++)
+		write_to_buffer(data, i, copy->do_64bit, counter_values[i]);
+}
+
+static void
+v3d_copy_performance_query(struct v3d_cpu_job *job)
+{
+	struct v3d_performance_query_info *performance_query = &job->performance_query;
+	struct v3d_copy_query_results_info *copy = &job->copy;
+	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+	struct dma_fence *fence;
+	bool available, write_result;
+	u8 *data;
+
+	v3d_get_bo_vaddr(bo);
+
+	data = ((u8 *)bo->vaddr) + copy->offset;
+
+	for (int i = 0; i < performance_query->count; i++) {
+		fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj);
+		available = fence ? dma_fence_is_signaled(fence) : false;
+
+		write_result = available || copy->do_partial;
+		if (write_result)
+			v3d_write_performance_query_result(job, data, i);
+
+		if (copy->availability_bit)
+			write_to_buffer(data, performance_query->ncounters,
+					copy->do_64bit, available ? 1u : 0u);
+
+		data += copy->stride;
+
+		dma_fence_put(fence);
+	}
+
+	v3d_put_bo_vaddr(bo);
+}
+
 static const v3d_cpu_job_fn cpu_job_function[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
 	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
 	[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries,
+	[V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query,
 };
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 20af8ae14831d..d7a9da2484fd8 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -672,6 +672,84 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
 	return 0;
 }
 
+static int
+v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
+					  struct drm_v3d_extension __user *ext,
+					  struct v3d_cpu_job *job)
+{
+	u32 __user *syncs;
+	u64 __user *kperfmon_ids;
+	struct drm_v3d_copy_performance_query copy;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&copy, ext, sizeof(copy)))
+		return -EFAULT;
+
+	if (copy.pad)
+		return -EINVAL;
+
+	job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY;
+
+	job->performance_query.queries = kvmalloc_array(copy.count,
+							sizeof(struct v3d_performance_query),
+							GFP_KERNEL);
+	if (!job->performance_query.queries)
+		return -ENOMEM;
+
+	syncs = u64_to_user_ptr(copy.syncs);
+	kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids);
+
+	for (int i = 0; i < copy.count; i++) {
+		u32 sync;
+		u64 ids;
+		u32 __user *ids_pointer;
+		u32 id;
+
+		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+			kvfree(job->performance_query.queries);
+			return -EFAULT;
+		}
+
+		job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+
+		if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) {
+			kvfree(job->performance_query.queries);
+			return -EFAULT;
+		}
+
+		ids_pointer = u64_to_user_ptr(ids);
+
+		for (int j = 0; j < copy.nperfmons; j++) {
+			if (copy_from_user(&id, ids_pointer++, sizeof(id))) {
+				kvfree(job->performance_query.queries);
+				return -EFAULT;
+			}
+
+			job->performance_query.queries[i].kperfmon_ids[j] = id;
+		}
+	}
+	job->performance_query.count = copy.count;
+	job->performance_query.nperfmons = copy.nperfmons;
+	job->performance_query.ncounters = copy.ncounters;
+
+	job->copy.do_64bit = copy.do_64bit;
+	job->copy.do_partial = copy.do_partial;
+	job->copy.availability_bit = copy.availability_bit;
+	job->copy.offset = copy.offset;
+	job->copy.stride = copy.stride;
+
+	return 0;
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -712,6 +790,9 @@ v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY:
 			ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job);
 			break;
+		case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY:
+			ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -1092,6 +1173,7 @@ static const unsigned int cpu_job_bo_handle_count[] = {
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
 	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2,
 	[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0,
+	[V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1,
 };
 
 /**
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index d609bdf29fd6e..dce1835eced49 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -77,6 +77,7 @@ struct drm_v3d_extension {
 #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY	0x04
 #define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY	0x05
 #define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY	0x06
+#define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY	0x07
 	__u32 flags; /* mbz */
 };
 
@@ -519,6 +520,52 @@ struct drm_v3d_reset_performance_query {
 	__u64 kperfmon_ids;
 };
 
+/**
+ * struct drm_v3d_copy_performance_query - ioctl extension for the CPU job to copy
+ * performance query results to a buffer
+ *
+ * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is defined, it
+ * points to this extension to define a copy performance query submission. This
+ * CPU job will copy the performance queries results to a BO with the offset
+ * and stride defined in the extension.
+ */
+struct drm_v3d_copy_performance_query {
+	struct drm_v3d_extension base;
+
+	/* Define if should write to buffer using 64 or 32 bits */
+	__u8 do_64bit;
+
+	/* Define if it can write to buffer even if the query is not available */
+	__u8 do_partial;
+
+	/* Define if it should write availability bit to buffer */
+	__u8 availability_bit;
+
+	/* mbz */
+	__u8 pad;
+
+	/* Offset of the buffer in the BO */
+	__u32 offset;
+
+	/* Stride of the buffer in the BO */
+	__u32 stride;
+
+	/* Number of performance monitors */
+	__u32 nperfmons;
+
+	/* Number of performance counters related to this query pool */
+	__u32 ncounters;
+
+	/* Number of queries */
+	__u32 count;
+
+	/* Array of performance queries's syncobjs to indicate its availability */
+	__u64 syncs;
+
+	/* Array of u64 user-pointers that point to an array of kperfmon_ids */
+	__u64 kperfmon_ids;
+};
+
 struct drm_v3d_submit_cpu {
 	/* Pointer to a u32 array of the BOs that are referenced by the job.
 	 *
@@ -537,6 +584,9 @@ struct drm_v3d_submit_cpu {
 	 *
 	 * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no
 	 * BOs.
+	 *
+	 * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must contain one
+	 * BO, where the performance queries will be written.
 	 */
 	__u64 bo_handles;
 
-- 
GitLab


From ff3670877e7c73d06c2a835d9abb62efcae0145c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 6 Jul 2023 11:27:31 +0200
Subject: [PATCH 0661/2340] drm/imx/lcdc: Fix double-free of driver data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct imx_lcdc driver data is allocated using devm_drm_dev_alloc()
so it must not be explicitly kfree()d.

Also drm_kms_helper_poll_fini() should not be called as there is no
matching drm_kms_helper_poll_init(). So drop the release function
completely.

Fixes: c87e859cdeb5 ("drm/imx/lcdc: Implement DRM driver for imx25")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230706092731.2630232-1-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/imx/lcdc/imx-lcdc.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c
index 0902983374d0b..43ddf3a9810b6 100644
--- a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c
+++ b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c
@@ -342,21 +342,12 @@ static const struct drm_mode_config_helper_funcs imx_lcdc_mode_config_helpers =
 	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
 };
 
-static void imx_lcdc_release(struct drm_device *drm)
-{
-	struct imx_lcdc *lcdc = imx_lcdc_from_drmdev(drm);
-
-	drm_kms_helper_poll_fini(drm);
-	kfree(lcdc);
-}
-
 DEFINE_DRM_GEM_DMA_FOPS(imx_lcdc_drm_fops);
 
 static struct drm_driver imx_lcdc_drm_driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops = &imx_lcdc_drm_fops,
 	DRM_GEM_DMA_DRIVER_OPS_VMAP,
-	.release = imx_lcdc_release,
 	.name = "imx-lcdc",
 	.desc = "i.MX LCDC driver",
 	.date = "20200716",
-- 
GitLab


From 80d20fd99124800749d605c733911a8d9da78e2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 15 Nov 2023 11:07:18 +0200
Subject: [PATCH 0662/2340] drm/i915/display: split i915 specific code from
 intel_fbdev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split out code from intel_fbdev that can not be share between i915 and
xe. Create new i915 specific source/header file intel_fbdev_fb.[ch] which
contains this code.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115090719.3210079-2-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 drivers/gpu/drm/i915/display/intel_fbdev.c    | 108 ++--------------
 drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 115 ++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_fbdev_fb.h |  21 ++++
 4 files changed, 146 insertions(+), 101 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/display/intel_fbdev_fb.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_fbdev_fb.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 65e984242089c..eadc9e612962a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -318,7 +318,8 @@ i915-$(CONFIG_ACPI) += \
 	display/intel_acpi.o \
 	display/intel_opregion.o
 i915-$(CONFIG_DRM_FBDEV_EMULATION) += \
-	display/intel_fbdev.o
+	display/intel_fbdev.o \
+	display/intel_fbdev_fb.o
 i915-$(CONFIG_DEBUG_FS) += \
 	display/intel_display_debugfs.o \
 	display/intel_display_debugfs_params.o \
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 31d0d695d5671..b7e4b7830e454 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -43,7 +43,6 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
-#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_mman.h"
 
 #include "i915_drv.h"
@@ -51,6 +50,7 @@
 #include "intel_fb.h"
 #include "intel_fb_pin.h"
 #include "intel_fbdev.h"
+#include "intel_fbdev_fb.h"
 #include "intel_frontbuffer.h"
 
 struct intel_fbdev {
@@ -146,65 +146,6 @@ static const struct fb_ops intelfb_ops = {
 	.fb_mmap = intel_fbdev_mmap,
 };
 
-static int intelfb_alloc(struct drm_fb_helper *helper,
-			 struct drm_fb_helper_surface_size *sizes)
-{
-	struct intel_fbdev *ifbdev = to_intel_fbdev(helper);
-	struct drm_framebuffer *fb;
-	struct drm_device *dev = helper->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_mode_fb_cmd2 mode_cmd = {};
-	struct drm_i915_gem_object *obj;
-	int size;
-
-	/* we don't do packed 24bpp */
-	if (sizes->surface_bpp == 24)
-		sizes->surface_bpp = 32;
-
-	mode_cmd.width = sizes->surface_width;
-	mode_cmd.height = sizes->surface_height;
-
-	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
-				    DIV_ROUND_UP(sizes->surface_bpp, 8), 64);
-	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
-							  sizes->surface_depth);
-
-	size = mode_cmd.pitches[0] * mode_cmd.height;
-	size = PAGE_ALIGN(size);
-
-	obj = ERR_PTR(-ENODEV);
-	if (HAS_LMEM(dev_priv)) {
-		obj = i915_gem_object_create_lmem(dev_priv, size,
-						  I915_BO_ALLOC_CONTIGUOUS |
-						  I915_BO_ALLOC_USER);
-	} else {
-		/*
-		 * If the FB is too big, just don't use it since fbdev is not very
-		 * important and we should probably use that space with FBC or other
-		 * features.
-		 *
-		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
-		 */
-		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
-			obj = i915_gem_object_create_stolen(dev_priv, size);
-		if (IS_ERR(obj))
-			obj = i915_gem_object_create_shmem(dev_priv, size);
-	}
-
-	if (IS_ERR(obj)) {
-		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
-		return PTR_ERR(obj);
-	}
-
-	fb = intel_framebuffer_create(obj, &mode_cmd);
-	i915_gem_object_put(obj);
-	if (IS_ERR(fb))
-		return PTR_ERR(fb);
-
-	ifbdev->fb = to_intel_framebuffer(fb);
-	return 0;
-}
-
 static int intelfb_create(struct drm_fb_helper *helper,
 			  struct drm_fb_helper_surface_size *sizes)
 {
@@ -213,7 +154,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt;
 	const struct i915_gtt_view view = {
 		.type = I915_GTT_VIEW_NORMAL,
 	};
@@ -222,9 +162,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct i915_vma *vma;
 	unsigned long flags = 0;
 	bool prealloc = false;
-	void __iomem *vaddr;
 	struct drm_i915_gem_object *obj;
-	struct i915_gem_ww_ctx ww;
 	int ret;
 
 	mutex_lock(&ifbdev->hpd_lock);
@@ -245,12 +183,13 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		intel_fb = ifbdev->fb = NULL;
 	}
 	if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) {
+		struct drm_framebuffer *fb;
 		drm_dbg_kms(&dev_priv->drm,
 			    "no BIOS fb, allocating a new one\n");
-		ret = intelfb_alloc(helper, sizes);
-		if (ret)
-			return ret;
-		intel_fb = ifbdev->fb;
+		fb = intel_fbdev_fb_alloc(helper, sizes);
+		if (IS_ERR(fb))
+			return PTR_ERR(fb);
+		intel_fb = ifbdev->fb = to_intel_framebuffer(fb);
 	} else {
 		drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n");
 		prealloc = true;
@@ -283,49 +222,18 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	info->fbops = &intelfb_ops;
 
 	obj = intel_fb_obj(&intel_fb->base);
-	if (i915_gem_object_is_lmem(obj)) {
-		struct intel_memory_region *mem = obj->mm.region;
-
-		/* Use fbdev's framebuffer from lmem for discrete */
-		info->fix.smem_start =
-			(unsigned long)(mem->io_start +
-					i915_gem_object_get_dma_address(obj, 0));
-		info->fix.smem_len = obj->base.size;
-	} else {
-		/* Our framebuffer is the entirety of fbdev's system memory */
-		info->fix.smem_start =
-			(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
-		info->fix.smem_len = vma->size;
-	}
-
-	for_i915_gem_ww(&ww, ret, false) {
-		ret = i915_gem_object_lock(vma->obj, &ww);
-
-		if (ret)
-			continue;
-
-		vaddr = i915_vma_pin_iomap(vma);
-		if (IS_ERR(vaddr)) {
-			drm_err(&dev_priv->drm,
-				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
-			ret = PTR_ERR(vaddr);
-			continue;
-		}
-	}
 
+	ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma);
 	if (ret)
 		goto out_unpin;
 
-	info->screen_base = vaddr;
-	info->screen_size = vma->size;
-
 	drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
 
 	/* If the object is shmemfs backed, it will have given us zeroed pages.
 	 * If the object is stolen however, it will be full of whatever
 	 * garbage was left in there.
 	 */
-	if (!i915_gem_object_is_shmem(vma->obj) && !prealloc)
+	if (!i915_gem_object_is_shmem(obj) && !prealloc)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
new file mode 100644
index 0000000000000..717c3a3237c48
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_fb_helper.h>
+
+#include "gem/i915_gem_lmem.h"
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_fbdev_fb.h"
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+					     struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_framebuffer *fb;
+	struct drm_device *dev = helper->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct drm_i915_gem_object *obj;
+	int size;
+
+	/* we don't do packed 24bpp */
+	if (sizes->surface_bpp == 24)
+		sizes->surface_bpp = 32;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
+				    DIV_ROUND_UP(sizes->surface_bpp, 8), 64);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	size = PAGE_ALIGN(size);
+
+	obj = ERR_PTR(-ENODEV);
+	if (HAS_LMEM(dev_priv)) {
+		obj = i915_gem_object_create_lmem(dev_priv, size,
+						  I915_BO_ALLOC_CONTIGUOUS |
+						  I915_BO_ALLOC_USER);
+	} else {
+		/*
+		 * If the FB is too big, just don't use it since fbdev is not very
+		 * important and we should probably use that space with FBC or other
+		 * features.
+		 *
+		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
+		 */
+		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
+			obj = i915_gem_object_create_stolen(dev_priv, size);
+		if (IS_ERR(obj))
+			obj = i915_gem_object_create_shmem(dev_priv, size);
+	}
+
+	if (IS_ERR(obj)) {
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fb = intel_framebuffer_create(obj, &mode_cmd);
+	i915_gem_object_put(obj);
+
+	return fb;
+}
+
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			     struct drm_i915_gem_object *obj, struct i915_vma *vma)
+{
+	struct i915_gem_ww_ctx ww;
+	void __iomem *vaddr;
+	int ret;
+
+	if (i915_gem_object_is_lmem(obj)) {
+		struct intel_memory_region *mem = obj->mm.region;
+
+		/* Use fbdev's framebuffer from lmem for discrete */
+		info->fix.smem_start =
+			(unsigned long)(mem->io_start +
+					i915_gem_object_get_dma_address(obj, 0));
+		info->fix.smem_len = obj->base.size;
+	} else {
+		struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+		/* Our framebuffer is the entirety of fbdev's system memory */
+		info->fix.smem_start =
+			(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
+		info->fix.smem_len = vma->size;
+	}
+
+	for_i915_gem_ww(&ww, ret, false) {
+		ret = i915_gem_object_lock(vma->obj, &ww);
+
+		if (ret)
+			continue;
+
+		vaddr = i915_vma_pin_iomap(vma);
+		if (IS_ERR(vaddr)) {
+			drm_err(&i915->drm,
+				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
+			ret = PTR_ERR(vaddr);
+			continue;
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	info->screen_base = vaddr;
+	info->screen_size = intel_bo_to_drm_bo(obj)->size;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
new file mode 100644
index 0000000000000..a395b2c65d33b
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_FBDEV_FB_H__
+#define __INTEL_FBDEV_FB_H__
+
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct fb_info;
+struct i915_vma;
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+					     struct drm_fb_helper_surface_size *sizes);
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			     struct drm_i915_gem_object *obj, struct i915_vma *vma);
+
+#endif
-- 
GitLab


From c952bf11ace50b03fce14dbc15a092fdc9a6d2c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 15 Nov 2023 11:07:19 +0200
Subject: [PATCH 0663/2340] drm/i915/display: use intel_bo_to_drm_bo in
 intel_fbdev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are preparing for Xe driver. I915 and Xe object implementation are
differing. Do not use  i915_gem_object->base directly. Instead use
intel_bo_to_drm_bo.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115090719.3210079-3-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_fbdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index b7e4b7830e454..99894a855ef05 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -332,12 +332,12 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 			continue;
 		}
 
-		if (obj->base.size > max_size) {
+		if (intel_bo_to_drm_bo(obj)->size > max_size) {
 			drm_dbg_kms(&i915->drm,
 				    "found possible fb from [PLANE:%d:%s]\n",
 				    plane->base.base.id, plane->base.name);
 			fb = to_intel_framebuffer(plane_state->uapi.fb);
-			max_size = obj->base.size;
+			max_size = intel_bo_to_drm_bo(obj)->size;
 		}
 	}
 
-- 
GitLab


From 44df9a2a13211955ae98f8650e4dc04065e8ec0b Mon Sep 17 00:00:00 2001
From: Carl Vanderlip <quic_carlv@quicinc.com>
Date: Fri, 17 Nov 2023 10:43:36 -0700
Subject: [PATCH 0664/2340] accel/qaic: Increase number of in_reset states

'in_reset' holds the state of the device. As part of bringup, the device
needs to be queried to check if it's in a valid state. Add a new state
that indicates that the device is coming up, but not ready for users
yet. Rename to 'dev_state' to better describe the variable.

Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117174337.20174-2-quic_jhugo@quicinc.com
---
 drivers/accel/qaic/qaic.h         | 13 +++++++++++--
 drivers/accel/qaic/qaic_control.c |  5 +++--
 drivers/accel/qaic/qaic_data.c    | 16 ++++++++--------
 drivers/accel/qaic/qaic_drv.c     | 12 ++++++------
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index bc40d52dc0104..cb04f7f9cdb3d 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -31,6 +31,15 @@
 #define to_drm(qddev) (&(qddev)->drm)
 #define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
 
+enum __packed dev_states {
+	/* Device is offline or will be very soon */
+	QAIC_OFFLINE,
+	/* Device is booting, not clear if it's in a usable state */
+	QAIC_BOOT,
+	/* Device is fully operational */
+	QAIC_ONLINE,
+};
+
 extern bool datapath_polling;
 
 struct qaic_user {
@@ -121,8 +130,8 @@ struct qaic_device {
 	struct workqueue_struct	*cntl_wq;
 	/* Synchronizes all the users of device during cleanup */
 	struct srcu_struct	dev_lock;
-	/* true: Device under reset; false: Device not under reset */
-	bool			in_reset;
+	/* Track the state of the device during resets */
+	enum dev_states		dev_state;
 	/* true: single MSI is used to operate device */
 	bool			single_msi;
 	/*
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index 84915824be543..9e8a8cbadf6bb 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -1022,7 +1022,8 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
 	int xfer_count = 0;
 	int retry_count;
 
-	if (qdev->in_reset) {
+	/* Allow QAIC_BOOT state since we need to check control protocol version */
+	if (qdev->dev_state == QAIC_OFFLINE) {
 		mutex_unlock(&qdev->cntl_mutex);
 		return ERR_PTR(-ENODEV);
 	}
@@ -1306,7 +1307,7 @@ int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_
 	qdev = usr->qddev->qdev;
 
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
 		srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
 		return -ENODEV;
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 8998c28e566e1..cf2898eda7ae3 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -690,7 +690,7 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -749,7 +749,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -970,7 +970,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1341,7 +1341,7 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1497,7 +1497,7 @@ void irq_polling_work(struct work_struct *work)
 	rcu_id = srcu_read_lock(&dbc->ch_lock);
 
 	while (1) {
-		if (dbc->qdev->in_reset) {
+		if (dbc->qdev->dev_state != QAIC_ONLINE) {
 			srcu_read_unlock(&dbc->ch_lock, rcu_id);
 			return;
 		}
@@ -1687,7 +1687,7 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1756,7 +1756,7 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1847,7 +1847,7 @@ int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index b12226385003d..24b8680b2dbd2 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -64,7 +64,7 @@ static int qaic_open(struct drm_device *dev, struct drm_file *file)
 	int ret;
 
 	rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto dev_unlock;
 	}
@@ -121,7 +121,7 @@ static void qaic_postclose(struct drm_device *dev, struct drm_file *file)
 	if (qddev) {
 		qdev = qddev->qdev;
 		qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-		if (!qdev->in_reset) {
+		if (qdev->dev_state == QAIC_ONLINE) {
 			qaic_release_usr(qdev, usr);
 			for (i = 0; i < qdev->num_dbc; ++i)
 				if (qdev->dbc[i].usr && qdev->dbc[i].usr->handle == usr->handle)
@@ -254,7 +254,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 
 	qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
 
-	qdev->in_reset = false;
+	qdev->dev_state = QAIC_ONLINE;
 
 	dev_set_drvdata(&mhi_dev->dev, qdev);
 	qdev->cntl_ch = mhi_dev;
@@ -291,7 +291,7 @@ static void qaic_notify_reset(struct qaic_device *qdev)
 {
 	int i;
 
-	qdev->in_reset = true;
+	qdev->dev_state = QAIC_OFFLINE;
 	/* wake up any waiters to avoid waiting for timeouts at sync */
 	wake_all_cntl(qdev);
 	for (i = 0; i < qdev->num_dbc; ++i)
@@ -313,7 +313,7 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
 		release_dbc(qdev, i);
 
 	if (exit_reset)
-		qdev->in_reset = false;
+		qdev->dev_state = QAIC_ONLINE;
 }
 
 static void cleanup_qdev(struct qaic_device *qdev)
@@ -550,7 +550,7 @@ static void qaic_pci_reset_done(struct pci_dev *pdev)
 {
 	struct qaic_device *qdev = pci_get_drvdata(pdev);
 
-	qdev->in_reset = false;
+	qdev->dev_state = QAIC_ONLINE;
 	qaic_mhi_reset_done(qdev->mhi_cntrl);
 }
 
-- 
GitLab


From 5f0a0ebca2b98eeda10715433c7d9418ef2e6d01 Mon Sep 17 00:00:00 2001
From: Carl Vanderlip <quic_carlv@quicinc.com>
Date: Fri, 17 Nov 2023 10:43:37 -0700
Subject: [PATCH 0665/2340] accel/qaic: Expand DRM device lifecycle

Currently the QAIC DRM device registers itself when the MHI QAIC_CONTROL
channel becomes available. This is when the device is able to process
workloads. However, the DRM driver also provides the debugfs interface
bootlog for the device. If the device fails to boot to the QSM (which
brings up the MHI QAIC_CONTROL channel), the bootlog won't be available for
debugging why it failed to boot.

Change when the DRM device registers itself from when QAIC_CONTROL is
available to when the card is first probed on the PCI bus. Additionally,
make the DRM driver persist through reset/error cases so the driver
doesn't have to be reloaded to access the card again. Send
KOBJ_ONLINE/OFFLINE uevents so userspace can know when DRM device is
ready to handle requests.

Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231117174337.20174-3-quic_jhugo@quicinc.com
---
 Documentation/accel/qaic/qaic.rst   |  9 +++++-
 drivers/accel/qaic/mhi_controller.c |  2 +-
 drivers/accel/qaic/qaic.h           |  2 +-
 drivers/accel/qaic/qaic_drv.c       | 44 +++++++++++------------------
 4 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
index f81020736ebfb..efb7771273bbc 100644
--- a/Documentation/accel/qaic/qaic.rst
+++ b/Documentation/accel/qaic/qaic.rst
@@ -93,8 +93,15 @@ commands (does not impact QAIC).
 uAPI
 ====
 
+QAIC creates an accel device per phsyical PCIe device. This accel device exists
+for as long as the PCIe device is known to Linux.
+
+The PCIe device may not be in the state to accept requests from userspace at
+all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the
+device can accept requests (ONLINE) and when the device is no longer accepting
+requests (OFFLINE) because of a reset or other state transition.
+
 QAIC defines a number of driver specific IOCTLs as part of the userspace API.
-This section describes those APIs.
 
 DRM_IOCTL_QAIC_MANAGE
   This IOCTL allows userspace to send a NNC request to the QSM. The call will
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 5d3cc30009cce..832464f2833ad 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -469,7 +469,7 @@ static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback re
 		pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n");
 	/* this event occurs in non-atomic context */
 	if (reason == MHI_CB_SYS_ERROR)
-		qaic_dev_reset_clean_local_state(qdev, true);
+		qaic_dev_reset_clean_local_state(qdev);
 }
 
 static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index cb04f7f9cdb3d..582836f9538f9 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -283,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
 void release_dbc(struct qaic_device *qdev, u32 dbc_id);
 
 void wake_all_cntl(struct qaic_device *qdev);
-void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset);
+void qaic_dev_reset_clean_local_state(struct qaic_device *qdev);
 
 struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
 
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 24b8680b2dbd2..2a313eb69b121 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -8,6 +8,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
+#include <linux/kobject.h>
 #include <linux/kref.h>
 #include <linux/mhi.h>
 #include <linux/module.h>
@@ -43,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
 static bool link_up;
 static DEFINE_IDA(qaic_usrs);
 
-static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id);
-static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id);
-
 static void free_usr(struct kref *kref)
 {
 	struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
@@ -183,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
 
 	qddev->partition_id = partition_id;
 
-	/*
-	 * drm_dev_unregister() sets the driver data to NULL and
-	 * drm_dev_register() does not update the driver data. During a SOC
-	 * reset drm dev is unregistered and registered again leaving the
-	 * driver data to NULL.
-	 */
-	dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
 	ret = drm_dev_register(drm, 0);
 	if (ret)
 		pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
@@ -203,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 	struct drm_device *drm = to_drm(qddev);
 	struct qaic_user *usr;
 
-	drm_dev_get(drm);
 	drm_dev_unregister(drm);
 	qddev->partition_id = 0;
 	/*
@@ -232,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 		mutex_lock(&qddev->users_mutex);
 	}
 	mutex_unlock(&qddev->users_mutex);
-	drm_dev_put(drm);
 }
 
 static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
@@ -254,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 
 	qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
 
-	qdev->dev_state = QAIC_ONLINE;
-
 	dev_set_drvdata(&mhi_dev->dev, qdev);
 	qdev->cntl_ch = mhi_dev;
 
@@ -265,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 		return ret;
 	}
 
+	qdev->dev_state = QAIC_BOOT;
 	ret = get_cntl_version(qdev, NULL, &major, &minor);
 	if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
 		pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n",
@@ -272,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 		ret = -EINVAL;
 		goto close_control;
 	}
-
-	ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
+	qdev->dev_state = QAIC_ONLINE;
+	kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE);
 
 	return ret;
 
@@ -291,6 +279,7 @@ static void qaic_notify_reset(struct qaic_device *qdev)
 {
 	int i;
 
+	kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE);
 	qdev->dev_state = QAIC_OFFLINE;
 	/* wake up any waiters to avoid waiting for timeouts at sync */
 	wake_all_cntl(qdev);
@@ -299,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev)
 	synchronize_srcu(&qdev->dev_lock);
 }
 
-void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
+void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
 {
 	int i;
 
 	qaic_notify_reset(qdev);
 
-	/* remove drmdevs to prevent new users from coming in */
-	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
-
 	/* start tearing things down */
 	for (i = 0; i < qdev->num_dbc; ++i)
 		release_dbc(qdev, i);
-
-	if (exit_reset)
-		qdev->dev_state = QAIC_ONLINE;
 }
 
 static void cleanup_qdev(struct qaic_device *qdev)
@@ -338,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
 	if (!qdev)
 		return NULL;
 
+	qdev->dev_state = QAIC_OFFLINE;
 	if (id->device == PCI_DEV_AIC100) {
 		qdev->num_dbc = 16;
 		qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
@@ -499,15 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto cleanup_qdev;
 	}
 
+	ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
+	if (ret)
+		goto cleanup_qdev;
+
 	qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
 						       qdev->single_msi);
 	if (IS_ERR(qdev->mhi_cntrl)) {
 		ret = PTR_ERR(qdev->mhi_cntrl);
-		goto cleanup_qdev;
+		goto cleanup_drm_dev;
 	}
 
 	return 0;
 
+cleanup_drm_dev:
+	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
 cleanup_qdev:
 	cleanup_qdev(qdev);
 	return ret;
@@ -520,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
 	if (!qdev)
 		return;
 
-	qaic_dev_reset_clean_local_state(qdev, false);
+	qaic_dev_reset_clean_local_state(qdev);
+	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
 	qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
 	cleanup_qdev(qdev);
 }
@@ -543,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev)
 
 	qaic_notify_reset(qdev);
 	qaic_mhi_start_reset(qdev->mhi_cntrl);
-	qaic_dev_reset_clean_local_state(qdev, false);
+	qaic_dev_reset_clean_local_state(qdev);
 }
 
 static void qaic_pci_reset_done(struct pci_dev *pdev)
 {
 	struct qaic_device *qdev = pci_get_drvdata(pdev);
 
-	qdev->dev_state = QAIC_ONLINE;
 	qaic_mhi_reset_done(qdev->mhi_cntrl);
 }
 
-- 
GitLab


From 8570c27932e132d2663e8120311891deb2a853de Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Wed, 23 Aug 2023 14:54:54 -0700
Subject: [PATCH 0666/2340] drm/syncobj: Add deadline support for syncobj waits

Add a new flag to let userspace provide a deadline as a hint for syncobj
and timeline waits.  This gives a hint to the driver signaling the
backing fences about how soon userspace needs it to compete work, so it
can adjust GPU frequency accordingly.  An immediate deadline can be
given to provide something equivalent to i915 "wait boost".

v2: Use absolute u64 ns value for deadline hint, drop cap and driver
    feature flag in favor of allowing count_handles==0 as a way for
    userspace to probe kernel for support of new flag
v3: More verbose comments about UAPI
v4: Fix negative zero, s/deadline_ns/deadline_nsec/ for consistency with
    existing ioctl struct fields
v5: Comment/description typo fixes

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
[DB: fixed checkpatch warnings]
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230823215458.203366-2-robdclark@gmail.com
---
 drivers/gpu/drm/drm_syncobj.c | 64 ++++++++++++++++++++++++++++-------
 include/uapi/drm/drm.h        | 17 ++++++++++
 2 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 01da6789d0440..cbb65b7ba4259 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -126,6 +126,11 @@
  * synchronize between the two.
  * This requirement is inherited from the Vulkan fence API.
  *
+ * If &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE is set, the ioctl will also set
+ * a fence deadline hint on the backing fences before waiting, to provide the
+ * fence signaler with an appropriate sense of urgency.  The deadline is
+ * specified as an absolute &CLOCK_MONOTONIC value in units of ns.
+ *
  * Similarly, &DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT takes an array of syncobj
  * handles as well as an array of u64 points and does a host-side wait on all
  * of syncobj fences at the given points simultaneously.
@@ -1027,7 +1032,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 						  uint32_t count,
 						  uint32_t flags,
 						  signed long timeout,
-						  uint32_t *idx)
+						  uint32_t *idx,
+						  ktime_t *deadline)
 {
 	struct syncobj_wait_entry *entries;
 	struct dma_fence *fence;
@@ -1108,6 +1114,15 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]);
 	}
 
+	if (deadline) {
+		for (i = 0; i < count; ++i) {
+			fence = entries[i].fence;
+			if (!fence)
+				continue;
+			dma_fence_set_deadline(fence, *deadline);
+		}
+	}
+
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1206,7 +1221,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
 				  struct drm_file *file_private,
 				  struct drm_syncobj_wait *wait,
 				  struct drm_syncobj_timeline_wait *timeline_wait,
-				  struct drm_syncobj **syncobjs, bool timeline)
+				  struct drm_syncobj **syncobjs, bool timeline,
+				  ktime_t *deadline)
 {
 	signed long timeout = 0;
 	uint32_t first = ~0;
@@ -1217,7 +1233,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
 							 NULL,
 							 wait->count_handles,
 							 wait->flags,
-							 timeout, &first);
+							 timeout, &first,
+							 deadline);
 		if (timeout < 0)
 			return timeout;
 		wait->first_signaled = first;
@@ -1227,7 +1244,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
 							 u64_to_user_ptr(timeline_wait->points),
 							 timeline_wait->count_handles,
 							 timeline_wait->flags,
-							 timeout, &first);
+							 timeout, &first,
+							 deadline);
 		if (timeout < 0)
 			return timeout;
 		timeline_wait->first_signaled = first;
@@ -1298,17 +1316,22 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_syncobj_wait *args = data;
 	struct drm_syncobj **syncobjs;
+	unsigned int possible_flags;
+	ktime_t t, *tp = NULL;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
 		return -EOPNOTSUPP;
 
-	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
-			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+	possible_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+			 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE;
+
+	if (args->flags & ~possible_flags)
 		return -EINVAL;
 
 	if (args->count_handles == 0)
-		return -EINVAL;
+		return 0;
 
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
@@ -1317,8 +1340,13 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 	if (ret < 0)
 		return ret;
 
+	if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE) {
+		t = ns_to_ktime(args->deadline_nsec);
+		tp = &t;
+	}
+
 	ret = drm_syncobj_array_wait(dev, file_private,
-				     args, NULL, syncobjs, false);
+				     args, NULL, syncobjs, false, tp);
 
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
@@ -1331,18 +1359,23 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_syncobj_timeline_wait *args = data;
 	struct drm_syncobj **syncobjs;
+	unsigned int possible_flags;
+	ktime_t t, *tp = NULL;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
 		return -EOPNOTSUPP;
 
-	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
-			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
-			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
+	possible_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+			 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE |
+			 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE;
+
+	if (args->flags & ~possible_flags)
 		return -EINVAL;
 
 	if (args->count_handles == 0)
-		return -EINVAL;
+		return 0;
 
 	ret = drm_syncobj_array_find(file_private,
 				     u64_to_user_ptr(args->handles),
@@ -1351,8 +1384,13 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 	if (ret < 0)
 		return ret;
 
+	if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE) {
+		t = ns_to_ktime(args->deadline_nsec);
+		tp = &t;
+	}
+
 	ret = drm_syncobj_array_wait(dev, file_private,
-				     NULL, args, syncobjs, true);
+				     NULL, args, syncobjs, true, tp);
 
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 91943e5ce0105..16122819edfef 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -926,6 +926,7 @@ struct drm_syncobj_transfer {
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */
@@ -934,6 +935,14 @@ struct drm_syncobj_wait {
 	__u32 flags;
 	__u32 first_signaled; /* only valid when not waiting all */
 	__u32 pad;
+	/**
+	 * @deadline_nsec - fence deadline hint
+	 *
+	 * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+	 * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+	 * set.
+	 */
+	__u64 deadline_nsec;
 };
 
 struct drm_syncobj_timeline_wait {
@@ -946,6 +955,14 @@ struct drm_syncobj_timeline_wait {
 	__u32 flags;
 	__u32 first_signaled; /* only valid when not waiting all */
 	__u32 pad;
+	/**
+	 * @deadline_nsec - fence deadline hint
+	 *
+	 * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+	 * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+	 * set.
+	 */
+	__u64 deadline_nsec;
 };
 
 /**
-- 
GitLab


From 63ee44540205d993854f143a5ab1d7d9e63ffcf1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Wed, 23 Aug 2023 14:54:55 -0700
Subject: [PATCH 0667/2340] dma-buf/sync_file: Add SET_DEADLINE ioctl

The initial purpose is for igt tests, but this would also be useful for
compositors that wait until close to vblank deadline to make decisions
about which frame to show.

The igt tests can be found at:

https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline

v2: Clarify the timebase, add link to igt tests
v3: Use u64 value in ns to express deadline.
v4: More doc

Signed-off-by: Rob Clark <robdclark@chromium.org>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230823215458.203366-3-robdclark@gmail.com
---
 drivers/dma-buf/dma-fence.c    |  3 ++-
 drivers/dma-buf/sync_file.c    | 19 +++++++++++++++++++
 include/uapi/linux/sync_file.h | 22 ++++++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 8aa8f8cb7071e..e0fd99e61a2d4 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -934,7 +934,8 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout);
  *   the GPU's devfreq to reduce frequency, when in fact the opposite is what is
  *   needed.
  *
- * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline.
+ * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline
+ * (or indirectly via userspace facing ioctls like &sync_set_deadline).
  * The deadline hint provides a way for the waiting driver, or userspace, to
  * convey an appropriate sense of urgency to the signaling driver.
  *
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2e9a316c596a3..d9b1c1b2a72b2 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -347,6 +347,22 @@ out:
 	return ret;
 }
 
+static int sync_file_ioctl_set_deadline(struct sync_file *sync_file,
+					unsigned long arg)
+{
+	struct sync_set_deadline ts;
+
+	if (copy_from_user(&ts, (void __user *)arg, sizeof(ts)))
+		return -EFAULT;
+
+	if (ts.pad)
+		return -EINVAL;
+
+	dma_fence_set_deadline(sync_file->fence, ns_to_ktime(ts.deadline_ns));
+
+	return 0;
+}
+
 static long sync_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
@@ -359,6 +375,9 @@ static long sync_file_ioctl(struct file *file, unsigned int cmd,
 	case SYNC_IOC_FILE_INFO:
 		return sync_file_ioctl_fence_info(sync_file, arg);
 
+	case SYNC_IOC_SET_DEADLINE:
+		return sync_file_ioctl_set_deadline(sync_file, arg);
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
index ff0a931833e25..ff1f38889dcfb 100644
--- a/include/uapi/linux/sync_file.h
+++ b/include/uapi/linux/sync_file.h
@@ -76,6 +76,27 @@ struct sync_file_info {
 	__u64	sync_fence_info;
 };
 
+/**
+ * struct sync_set_deadline - SYNC_IOC_SET_DEADLINE - set a deadline hint on a fence
+ * @deadline_ns: absolute time of the deadline
+ * @pad:	must be zero
+ *
+ * Allows userspace to set a deadline on a fence, see &dma_fence_set_deadline
+ *
+ * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank).  For
+ * example
+ *
+ *     clock_gettime(CLOCK_MONOTONIC, &t);
+ *     deadline_ns = (t.tv_sec * 1000000000L) + t.tv_nsec + ns_until_deadline
+ */
+struct sync_set_deadline {
+	__u64	deadline_ns;
+	/* Not strictly needed for alignment but gives some possibility
+	 * for future extension:
+	 */
+	__u64	pad;
+};
+
 #define SYNC_IOC_MAGIC		'>'
 
 /*
@@ -87,5 +108,6 @@ struct sync_file_info {
 
 #define SYNC_IOC_MERGE		_IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
 #define SYNC_IOC_FILE_INFO	_IOWR(SYNC_IOC_MAGIC, 4, struct sync_file_info)
+#define SYNC_IOC_SET_DEADLINE	_IOW(SYNC_IOC_MAGIC, 5, struct sync_set_deadline)
 
 #endif /* _UAPI_LINUX_SYNC_H */
-- 
GitLab


From 70e67aaec2f4706df0006423eebca813b00f5840 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Wed, 23 Aug 2023 14:54:56 -0700
Subject: [PATCH 0668/2340] dma-buf/sw_sync: Add fence deadline support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This consists of simply storing the most recent deadline, and adding an
ioctl to retrieve the deadline.  This can be used in conjunction with
the SET_DEADLINE ioctl on a fence fd for testing.  Ie. create various
sw_sync fences, merge them into a fence-array, set deadline on the
fence-array and confirm that it is propagated properly to each fence.

v2: Switch UABI to express deadline as u64
v3: More verbose UAPI docs, show how to convert from timespec
v4: Better comments, track the soonest deadline, as a normal fence
    implementation would, return an error if no deadline set.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230823215458.203366-4-robdclark@gmail.com
---
 drivers/dma-buf/sw_sync.c    | 82 ++++++++++++++++++++++++++++++++++++
 drivers/dma-buf/sync_debug.h |  2 +
 2 files changed, 84 insertions(+)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index f0a35277fd844..c353029789cf1 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -52,12 +52,33 @@ struct sw_sync_create_fence_data {
 	__s32	fence; /* fd of new fence */
 };
 
+/**
+ * struct sw_sync_get_deadline - get the deadline hint of a sw_sync fence
+ * @deadline_ns: absolute time of the deadline
+ * @pad:	must be zero
+ * @fence_fd:	the sw_sync fence fd (in)
+ *
+ * Return the earliest deadline set on the fence.  The timebase for the
+ * deadline is CLOCK_MONOTONIC (same as vblank).  If there is no deadline
+ * set on the fence, this ioctl will return -ENOENT.
+ */
+struct sw_sync_get_deadline {
+	__u64	deadline_ns;
+	__u32	pad;
+	__s32	fence_fd;
+};
+
 #define SW_SYNC_IOC_MAGIC	'W'
 
 #define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\
 		struct sw_sync_create_fence_data)
 
 #define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
+#define SW_SYNC_GET_DEADLINE		_IOWR(SW_SYNC_IOC_MAGIC, 2, \
+		struct sw_sync_get_deadline)
+
+
+#define SW_SYNC_HAS_DEADLINE_BIT	DMA_FENCE_FLAG_USER_BITS
 
 static const struct dma_fence_ops timeline_fence_ops;
 
@@ -171,6 +192,22 @@ static void timeline_fence_timeline_value_str(struct dma_fence *fence,
 	snprintf(str, size, "%d", parent->value);
 }
 
+static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
+{
+	struct sync_pt *pt = dma_fence_to_sync_pt(fence);
+	unsigned long flags;
+
+	spin_lock_irqsave(fence->lock, flags);
+	if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
+		if (ktime_before(deadline, pt->deadline))
+			pt->deadline = deadline;
+	} else {
+		pt->deadline = deadline;
+		__set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags);
+	}
+	spin_unlock_irqrestore(fence->lock, flags);
+}
+
 static const struct dma_fence_ops timeline_fence_ops = {
 	.get_driver_name = timeline_fence_get_driver_name,
 	.get_timeline_name = timeline_fence_get_timeline_name,
@@ -179,6 +216,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
 	.release = timeline_fence_release,
 	.fence_value_str = timeline_fence_value_str,
 	.timeline_value_str = timeline_fence_timeline_value_str,
+	.set_deadline = timeline_fence_set_deadline,
 };
 
 /**
@@ -387,6 +425,47 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg)
 	return 0;
 }
 
+static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long arg)
+{
+	struct sw_sync_get_deadline data;
+	struct dma_fence *fence;
+	unsigned long flags;
+	struct sync_pt *pt;
+	int ret = 0;
+
+	if (copy_from_user(&data, (void __user *)arg, sizeof(data)))
+		return -EFAULT;
+
+	if (data.deadline_ns || data.pad)
+		return -EINVAL;
+
+	fence = sync_file_get_fence(data.fence_fd);
+	if (!fence)
+		return -EINVAL;
+
+	pt = dma_fence_to_sync_pt(fence);
+	if (!pt)
+		return -EINVAL;
+
+	spin_lock_irqsave(fence->lock, flags);
+	if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
+		data.deadline_ns = ktime_to_ns(pt->deadline);
+	} else {
+		ret = -ENOENT;
+	}
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	dma_fence_put(fence);
+
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)arg, &data, sizeof(data)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static long sw_sync_ioctl(struct file *file, unsigned int cmd,
 			  unsigned long arg)
 {
@@ -399,6 +478,9 @@ static long sw_sync_ioctl(struct file *file, unsigned int cmd,
 	case SW_SYNC_IOC_INC:
 		return sw_sync_ioctl_inc(obj, arg);
 
+	case SW_SYNC_GET_DEADLINE:
+		return sw_sync_ioctl_get_deadline(obj, arg);
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index 6176e52ba2d74..a1bdd62efccd8 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -55,11 +55,13 @@ static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence)
  * @base: base fence object
  * @link: link on the sync timeline's list
  * @node: node in the sync timeline's tree
+ * @deadline: the earliest fence deadline hint
  */
 struct sync_pt {
 	struct dma_fence base;
 	struct list_head link;
 	struct rb_node node;
+	ktime_t deadline;
 };
 
 extern const struct file_operations sw_sync_debugfs_fops;
-- 
GitLab


From e6c0de5f445091d250b75dabc4c60dd2643b8c98 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Mon, 11 Sep 2023 15:16:27 -0700
Subject: [PATCH 0669/2340] drm/msm/dpu: try multirect based on mdp clock
 limits

It's certainly possible that for large resolutions a single DPU SSPP
cannot process the image without exceeding the MDP clock limits but
it can still process it in multirect mode because the source rectangles
will get divided and can fall within the MDP clock limits.

If the SSPP cannot process the image even in multirect mode, then it
will be rejected in dpu_plane_atomic_check_pipe().

Hence try using multirect for resolutions which cannot be processed
by a single SSPP without exceeding the MDP clock limits.

changes in v2:
	- use crtc_state's adjusted_mode instead of mode
	- fix the UBWC condition to check maxlinewidth

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/556817/
Link: https://lore.kernel.org/r/20230911221627.9569-2-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 3eef5e025e12e..1e0da38c6f2a5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -824,6 +824,8 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 										 plane);
 	int ret = 0, min_scale;
 	struct dpu_plane *pdpu = to_dpu_plane(plane);
+	struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base);
+	u64 max_mdp_clk_rate = kms->perf.max_core_clk_rate;
 	struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state);
 	struct dpu_sw_pipe *pipe = &pstate->pipe;
 	struct dpu_sw_pipe *r_pipe = &pstate->r_pipe;
@@ -892,14 +894,16 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 
 	max_linewidth = pdpu->catalog->caps->max_linewidth;
 
-	if (drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) {
+	if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) ||
+	     _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) {
 		/*
 		 * In parallel multirect case only the half of the usual width
 		 * is supported for tiled formats. If we are here, we know that
 		 * full width is more than max_linewidth, thus each rect is
 		 * wider than allowed.
 		 */
-		if (DPU_FORMAT_IS_UBWC(fmt)) {
+		if (DPU_FORMAT_IS_UBWC(fmt) &&
+		    drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) {
 			DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, tiled format\n",
 					DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth);
 			return -E2BIG;
-- 
GitLab


From a9bd555de5e9042fdf8ab8d6080b86f45c68ddf6 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 9 Oct 2023 19:56:27 +0300
Subject: [PATCH 0670/2340] drm/msm/dpu: enable SmartDMA on SM8450

Enable the SmartDMA / multirect support on the SM8450 platform to
support higher resoltion modes.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/561590/
Link: https://lore.kernel.org/r/20231009165627.2691015-1-dmitry.baryshkov@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 7742f52be859b..d18145c226dac 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
@@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
@@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
@@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x32c,
-		.features = DMA_SDM845_MASK,
+		.features = DMA_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_0,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
@@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x32c,
-		.features = DMA_SDM845_MASK,
+		.features = DMA_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_1,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
@@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x32c,
-		.features = DMA_CURSOR_SDM845_MASK,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_2,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x32c,
-		.features = DMA_CURSOR_SDM845_MASK,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_3,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
-- 
GitLab


From 921e32bf6c0c86f774226577cb743d654f9bcfd9 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Fri, 8 Sep 2023 12:33:13 -0700
Subject: [PATCH 0671/2340] drm/msm/dpu: enable smartdma on sm8350

To support high resolutions on sm8350, enable smartdma
in its catalog.

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/556561/
Link: https://lore.kernel.org/r/20230908193314.27008-1-quic_abhinavk@quicinc.com
[DB: rebased on top of msm-next]
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index 1709ba57f3840..5aaa24281906e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
@@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
@@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
@@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SC7180_MASK_SDMA,
 		.sblk = &sm8250_vig_sblk_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
@@ -106,7 +106,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
-		.features = DMA_SDM845_MASK,
+		.features = DMA_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_0,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
@@ -114,7 +114,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
-		.features = DMA_SDM845_MASK,
+		.features = DMA_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_1,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
@@ -122,7 +122,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
-		.features = DMA_CURSOR_SDM845_MASK,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_2,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
@@ -130,7 +130,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f8,
-		.features = DMA_CURSOR_SDM845_MASK,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
 		.sblk = &sdm845_dma_sblk_3,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
-- 
GitLab


From 96ab215b2d5edc39310c00f50b33d8ab72ac3fe3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Oct 2023 22:04:03 +0200
Subject: [PATCH 0672/2340] drm/msm/a6xx: add QMP dependency

When QMP is in a loadable module, the A6xx GPU driver fails to link
as built-in:

x86_64-linux-ld: drivers/gpu/drm/msm/adreno/a6xx_gmu.o: in function `a6xx_gmu_resume':
a6xx_gmu.c:(.text+0xd62): undefined reference to `qmp_send'

Add the usual dependency that still allows compiling without QMP but
otherwise avoids the broken combination of options.

Fixes: 88a0997f2f949 ("drm/msm/a6xx: Send ACD state to QMP at GMU resume")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/562945/
Link: https://lore.kernel.org/r/20231016200415.791090-1-arnd@kernel.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 6309a857ca312..ad70b611b44f0 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -6,6 +6,7 @@ config DRM_MSM
 	depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST
 	depends on COMMON_CLK
 	depends on IOMMU_SUPPORT
+	depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
 	depends on QCOM_OCMEM || QCOM_OCMEM=n
 	depends on QCOM_LLCC || QCOM_LLCC=n
 	depends on QCOM_COMMAND_DB || QCOM_COMMAND_DB=n
-- 
GitLab


From e50e5fed41c7eed2db4119645bf3480ec43fec11 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:51 -0700
Subject: [PATCH 0673/2340] drm: Introduce pixel_source DRM plane property

Add support for pixel_source property to drm_plane and related
documentation. In addition, force pixel_source to
DRM_PLANE_PIXEL_SOURCE_FB in DRM_IOCTL_MODE_SETPLANE as to not break
legacy userspace.

This enum property will allow user to specify a pixel source for the
plane. Possible pixel sources will be defined in the
drm_plane_pixel_source enum.

Currently, the only pixel sources are DRM_PLANE_PIXEL_SOURCE_FB (the
default value) and DRM_PLANE_PIXEL_SOURCE_NONE.

Acked-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Sebastian Wick <sebastian@sebastianwick.net>
Acked-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-1-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_atomic_state_helper.c |  1 +
 drivers/gpu/drm/drm_atomic_uapi.c         |  4 +
 drivers/gpu/drm/drm_blend.c               | 94 +++++++++++++++++++++++
 drivers/gpu/drm/drm_plane.c               | 19 ++++-
 include/drm/drm_blend.h                   |  2 +
 include/drm/drm_plane.h                   | 21 +++++
 6 files changed, 137 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 54975de44a0e3..311b04edf742c 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -252,6 +252,7 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state,
 
 	plane_state->alpha = DRM_BLEND_ALPHA_OPAQUE;
 	plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
+	plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB;
 
 	if (plane->color_encoding_property) {
 		if (!drm_object_property_get_default_value(&plane->base,
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index aee4a65d49591..bd71405319488 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -562,6 +562,8 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 		state->src_w = val;
 	} else if (property == config->prop_src_h) {
 		state->src_h = val;
+	} else if (property == plane->pixel_source_property) {
+		state->pixel_source = val;
 	} else if (property == plane->alpha_property) {
 		state->alpha = val;
 	} else if (property == plane->blend_mode_property) {
@@ -650,6 +652,8 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = state->src_w;
 	} else if (property == config->prop_src_h) {
 		*val = state->src_h;
+	} else if (property == plane->pixel_source_property) {
+		*val = state->pixel_source;
 	} else if (property == plane->alpha_property) {
 		*val = state->alpha;
 	} else if (property == plane->blend_mode_property) {
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 6e74de8334663..fce734cdb85ba 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -185,6 +185,25 @@
  *		 plane does not expose the "alpha" property, then this is
  *		 assumed to be 1.0
  *
+ * pixel_source:
+ *	pixel_source is set up with drm_plane_create_pixel_source_property().
+ *	It is used to toggle the active source of pixel data for the plane.
+ *	The plane will only display data from the set pixel_source -- any
+ *	data from other sources will be ignored.
+ *
+ *	For non-framebuffer sources, if pixel_source is set to a non-framebuffer
+ *	and non-NONE source, and the corresponding source property is NULL, then
+ *	the atomic commit should return an error.
+ *
+ *	Possible values:
+ *
+ *	"NONE":
+ *		No active pixel source.
+ *		Committing with a NONE pixel source will disable the plane.
+ *
+ *	"FB":
+ *		Framebuffer source set by the "FB_ID" property.
+ *
  * Note that all the property extensions described here apply either to the
  * plane or the CRTC (e.g. for the background color, which currently is not
  * exposed and assumed to be black).
@@ -615,3 +634,78 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane,
 	return 0;
 }
 EXPORT_SYMBOL(drm_plane_create_blend_mode_property);
+
+static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
+	{ DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" },
+	{ DRM_PLANE_PIXEL_SOURCE_FB, "FB" },
+};
+
+/**
+ * drm_plane_create_pixel_source_property - create a new pixel source property
+ * @plane: DRM plane
+ * @extra_sources: Bitmask of additional supported pixel_sources for the driver.
+ *		   DRM_PLANE_PIXEL_SOURCE_FB and DRM_PLANE_PIXEL_SOURCE_NONE will
+ *		   always be enabled as supported sources.
+ *
+ * This creates a new property describing the current source of pixel data for the
+ * plane. The pixel_source will be initialized as DRM_PLANE_PIXEL_SOURCE_FB by default.
+ *
+ * Drivers can set a custom default source by overriding the pixel_source value in
+ * drm_plane_funcs.reset()
+ *
+ * The property is exposed to userspace as an enumeration property called
+ * "pixel_source" and has the following enumeration values:
+ *
+ * "NONE":
+ *	No active pixel source
+ *
+ * "FB":
+ *	Framebuffer pixel source
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int drm_plane_create_pixel_source_property(struct drm_plane *plane,
+					   unsigned long extra_sources)
+{
+	struct drm_device *dev = plane->dev;
+	struct drm_property *prop;
+	static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) |
+						      BIT(DRM_PLANE_PIXEL_SOURCE_NONE);
+	int i;
+
+	/* FB is supported by default */
+	unsigned long supported_sources = extra_sources |
+					  BIT(DRM_PLANE_PIXEL_SOURCE_FB) |
+					  BIT(DRM_PLANE_PIXEL_SOURCE_NONE);
+
+	if (WARN_ON(supported_sources & ~valid_source_mask))
+		return -EINVAL;
+
+	prop = drm_property_create(dev, DRM_MODE_PROP_ENUM | DRM_MODE_PROP_ATOMIC, "pixel_source",
+			hweight32(supported_sources));
+
+	if (!prop)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(drm_pixel_source_enum_list); i++) {
+		int ret;
+
+		if (!test_bit(drm_pixel_source_enum_list[i].type, &supported_sources))
+			continue;
+
+		ret = drm_property_add_enum(prop, drm_pixel_source_enum_list[i].type,
+				drm_pixel_source_enum_list[i].name);
+		if (ret) {
+			drm_property_destroy(dev, prop);
+
+			return ret;
+		}
+	}
+
+	drm_object_attach_property(&plane->base, prop, DRM_PLANE_PIXEL_SOURCE_FB);
+	plane->pixel_source_property = prop;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_plane_create_pixel_source_property);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 9e8e4c60983d6..c8dbe5ae60cc6 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -953,6 +953,14 @@ bool drm_any_plane_has_format(struct drm_device *dev,
 }
 EXPORT_SYMBOL(drm_any_plane_has_format);
 
+static bool drm_plane_needs_disable(struct drm_plane_state *state, struct drm_framebuffer *fb)
+{
+	if (state->pixel_source == DRM_PLANE_PIXEL_SOURCE_NONE)
+		return true;
+
+	return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && fb == NULL;
+}
+
 /*
  * __setplane_internal - setplane handler for internal callers
  *
@@ -975,8 +983,8 @@ static int __setplane_internal(struct drm_plane *plane,
 
 	WARN_ON(drm_drv_uses_atomic_modeset(plane->dev));
 
-	/* No fb means shut it down */
-	if (!fb) {
+	/* No visible data means shut it down */
+	if (drm_plane_needs_disable(plane->state, fb)) {
 		plane->old_fb = plane->fb;
 		ret = plane->funcs->disable_plane(plane, ctx);
 		if (!ret) {
@@ -1027,8 +1035,8 @@ static int __setplane_atomic(struct drm_plane *plane,
 
 	WARN_ON(!drm_drv_uses_atomic_modeset(plane->dev));
 
-	/* No fb means shut it down */
-	if (!fb)
+	/* No visible data means shut it down */
+	if (drm_plane_needs_disable(plane->state, fb))
 		return plane->funcs->disable_plane(plane, ctx);
 
 	/*
@@ -1101,6 +1109,9 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 
+	if (plane->state)
+		plane->state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB;
+
 	if (plane_req->fb_id) {
 		fb = drm_framebuffer_lookup(dev, file_priv, plane_req->fb_id);
 		if (!fb) {
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index 88bdfec3bd884..122bbfbaae33c 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -58,4 +58,6 @@ int drm_atomic_normalize_zpos(struct drm_device *dev,
 			      struct drm_atomic_state *state);
 int drm_plane_create_blend_mode_property(struct drm_plane *plane,
 					 unsigned int supported_modes);
+int drm_plane_create_pixel_source_property(struct drm_plane *plane,
+					   unsigned long extra_sources);
 #endif
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index c6565a6f9324c..bc0176ba25be1 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -40,6 +40,12 @@ enum drm_scaling_filter {
 	DRM_SCALING_FILTER_NEAREST_NEIGHBOR,
 };
 
+enum drm_plane_pixel_source {
+	DRM_PLANE_PIXEL_SOURCE_NONE,
+	DRM_PLANE_PIXEL_SOURCE_FB,
+	DRM_PLANE_PIXEL_SOURCE_MAX
+};
+
 /**
  * struct drm_plane_state - mutable plane state
  *
@@ -120,6 +126,14 @@ struct drm_plane_state {
 	/** @hotspot_y: y offset to mouse cursor hotspot */
 	int32_t hotspot_x, hotspot_y;
 
+	/**
+	 * @pixel_source:
+	 *
+	 * Source of pixel information for the plane. See
+	 * drm_plane_create_pixel_source_property() for more details.
+	 */
+	enum drm_plane_pixel_source pixel_source;
+
 	/**
 	 * @alpha:
 	 * Opacity of the plane with 0 as completely transparent and 0xffff as
@@ -713,6 +727,13 @@ struct drm_plane {
 	 */
 	struct drm_plane_state *state;
 
+	/*
+	 * @pixel_source_property:
+	 * Optional pixel_source property for this plane. See
+	 * drm_plane_create_pixel_source_property().
+	 */
+	struct drm_property *pixel_source_property;
+
 	/**
 	 * @alpha_property:
 	 * Optional alpha property for this plane. See
-- 
GitLab


From 85863a4e16e77079ee14865905ddc3ef9483a640 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:52 -0700
Subject: [PATCH 0674/2340] drm: Introduce solid fill DRM plane property

Document and add support for solid_fill property to drm_plane. In
addition, add support for setting and getting the values for solid_fill.

To enable solid fill planes, userspace must assign a property blob to
the "solid_fill" plane property containing the following information:

struct drm_mode_solid_fill {
	u32 r, g, b, pad;
};

Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Sebastian Wick <sebastian@sebastianwick.net>
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-2-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_atomic_state_helper.c |  9 ++++++
 drivers/gpu/drm/drm_atomic_uapi.c         | 26 ++++++++++++++++
 drivers/gpu/drm/drm_blend.c               | 30 +++++++++++++++++++
 include/drm/drm_blend.h                   |  1 +
 include/drm/drm_plane.h                   | 36 +++++++++++++++++++++++
 include/uapi/drm/drm_mode.h               | 24 +++++++++++++++
 6 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 311b04edf742c..f6c76cea8be49 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -254,6 +254,11 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state,
 	plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
 	plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB;
 
+	if (plane_state->solid_fill_blob) {
+		drm_property_blob_put(plane_state->solid_fill_blob);
+		plane_state->solid_fill_blob = NULL;
+	}
+
 	if (plane->color_encoding_property) {
 		if (!drm_object_property_get_default_value(&plane->base,
 							   plane->color_encoding_property,
@@ -350,6 +355,9 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
 	if (state->fb)
 		drm_framebuffer_get(state->fb);
 
+	if (state->solid_fill_blob)
+		drm_property_blob_get(state->solid_fill_blob);
+
 	state->fence = NULL;
 	state->commit = NULL;
 	state->fb_damage_clips = NULL;
@@ -399,6 +407,7 @@ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state)
 		drm_crtc_commit_put(state->commit);
 
 	drm_property_blob_put(state->fb_damage_clips);
+	drm_property_blob_put(state->solid_fill_blob);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index bd71405319488..d7ae8e2c02651 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -316,6 +316,20 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
 }
 EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector);
 
+static void drm_atomic_set_solid_fill_prop(struct drm_plane_state *state)
+{
+	struct drm_mode_solid_fill *user_info;
+
+	if (!state->solid_fill_blob)
+		return;
+
+	user_info = (struct drm_mode_solid_fill *)state->solid_fill_blob->data;
+
+	state->solid_fill.r = user_info->r;
+	state->solid_fill.g = user_info->g;
+	state->solid_fill.b = user_info->b;
+}
+
 static void set_out_fence_for_crtc(struct drm_atomic_state *state,
 				   struct drm_crtc *crtc, s32 __user *fence_ptr)
 {
@@ -564,6 +578,15 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 		state->src_h = val;
 	} else if (property == plane->pixel_source_property) {
 		state->pixel_source = val;
+	} else if (property == plane->solid_fill_property) {
+		ret = drm_atomic_replace_property_blob_from_id(dev,
+				&state->solid_fill_blob,
+				val, sizeof(struct drm_mode_solid_fill),
+				-1, &replaced);
+		if (ret)
+			return ret;
+
+		drm_atomic_set_solid_fill_prop(state);
 	} else if (property == plane->alpha_property) {
 		state->alpha = val;
 	} else if (property == plane->blend_mode_property) {
@@ -654,6 +677,9 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = state->src_h;
 	} else if (property == plane->pixel_source_property) {
 		*val = state->pixel_source;
+	} else if (property == plane->solid_fill_property) {
+		*val = state->solid_fill_blob ?
+			state->solid_fill_blob->base.id : 0;
 	} else if (property == plane->alpha_property) {
 		*val = state->alpha;
 	} else if (property == plane->blend_mode_property) {
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index fce734cdb85ba..665c5d9b056f8 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -204,6 +204,10 @@
  *	"FB":
  *		Framebuffer source set by the "FB_ID" property.
  *
+ * solid_fill:
+ *	solid_fill is set up with drm_plane_create_solid_fill_property(). It
+ *	contains pixel data that drivers can use to fill a plane.
+ *
  * Note that all the property extensions described here apply either to the
  * plane or the CRTC (e.g. for the background color, which currently is not
  * exposed and assumed to be black).
@@ -709,3 +713,29 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane,
 	return 0;
 }
 EXPORT_SYMBOL(drm_plane_create_pixel_source_property);
+
+/**
+ * drm_plane_create_solid_fill_property - create a new solid_fill property
+ * @plane: drm plane
+ *
+ * This creates a new property blob that holds pixel data for solid fill planes.
+ * The property is exposed to userspace as a property blob called "solid_fill".
+ *
+ * For information on what the blob contains, see `drm_mode_solid_fill`.
+ */
+int drm_plane_create_solid_fill_property(struct drm_plane *plane)
+{
+	struct drm_property *prop;
+
+	prop = drm_property_create(plane->dev,
+			DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB,
+			"solid_fill", 0);
+	if (!prop)
+		return -ENOMEM;
+
+	drm_object_attach_property(&plane->base, prop, 0);
+	plane->solid_fill_property = prop;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_plane_create_solid_fill_property);
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index 122bbfbaae33c..e7158fbee3897 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -60,4 +60,5 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane,
 					 unsigned int supported_modes);
 int drm_plane_create_pixel_source_property(struct drm_plane *plane,
 					   unsigned long extra_sources);
+int drm_plane_create_solid_fill_property(struct drm_plane *plane);
 #endif
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index bc0176ba25be1..5bac644d4cc34 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -46,6 +46,18 @@ enum drm_plane_pixel_source {
 	DRM_PLANE_PIXEL_SOURCE_MAX
 };
 
+/**
+ * struct solid_fill_property - RGB values for solid fill plane
+ *
+ * TODO: Add solid fill source and corresponding pixel source
+ *       that supports RGBA color
+ */
+struct drm_solid_fill {
+	uint32_t r;
+	uint32_t g;
+	uint32_t b;
+};
+
 /**
  * struct drm_plane_state - mutable plane state
  *
@@ -134,6 +146,23 @@ struct drm_plane_state {
 	 */
 	enum drm_plane_pixel_source pixel_source;
 
+	/**
+	 * @solid_fill_blob:
+	 *
+	 * Blob containing relevant information for a solid fill plane
+	 * including RGB color values. See
+	 * drm_plane_create_solid_fill_property() for more details.
+	 */
+	struct drm_property_blob *solid_fill_blob;
+
+	/**
+	 * @solid_fill:
+	 *
+	 * Pixel data for solid fill planes. See
+	 * drm_plane_create_solid_fill_property() for more details.
+	 */
+	struct drm_solid_fill solid_fill;
+
 	/**
 	 * @alpha:
 	 * Opacity of the plane with 0 as completely transparent and 0xffff as
@@ -734,6 +763,13 @@ struct drm_plane {
 	 */
 	struct drm_property *pixel_source_property;
 
+	/**
+	 * @solid_fill_property:
+	 * Optional solid_fill property for this plane. See
+	 * drm_plane_create_solid_fill_property().
+	 */
+	struct drm_property *solid_fill_property;
+
 	/**
 	 * @alpha_property:
 	 * Optional alpha property for this plane. See
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 95630f1701102..cbd943a8c88b1 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -259,6 +259,30 @@ struct drm_mode_modeinfo {
 	char name[DRM_DISPLAY_MODE_LEN];
 };
 
+/**
+ * struct drm_mode_solid_fill - User info for solid fill planes
+ *
+ * This is the userspace API solid fill information structure.
+ *
+ * Userspace can enable solid fill planes by assigning the plane "solid_fill"
+ * property to a blob containing a single drm_mode_solid_fill struct populated with an RGB323232
+ * color and setting the pixel source to "SOLID_FILL".
+ *
+ * For information on the plane property, see drm_plane_create_solid_fill_property()
+ *
+ * @r: Red color value of single pixel
+ * @g: Green color value of single pixel
+ * @b: Blue color value of single pixel
+ * @pad: padding, must be zero
+ */
+struct drm_mode_solid_fill {
+	__u32 r;
+	__u32 g;
+	__u32 b;
+	__u32 pad;
+};
+
+
 struct drm_mode_card_res {
 	__u64 fb_id_ptr;
 	__u64 crtc_id_ptr;
-- 
GitLab


From 4b64167042927531f4cfaf035b8f88c2f7a05f06 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:53 -0700
Subject: [PATCH 0675/2340] drm: Add solid fill pixel source

Add "SOLID_FILL" as a valid pixel source. If the pixel_source property is
set to "SOLID_FILL", it will display data from the drm_plane "solid_fill"
blob property.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Sebastian Wick <sebastian@sebastianwick.net>
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-3-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_blend.c | 10 +++++++++-
 include/drm/drm_plane.h     |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 665c5d9b056f8..37b31b7e5ce5d 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -204,6 +204,9 @@
  *	"FB":
  *		Framebuffer source set by the "FB_ID" property.
  *
+ *	"SOLID_FILL":
+ *		Solid fill color source set by the "solid_fill" property.
+ *
  * solid_fill:
  *	solid_fill is set up with drm_plane_create_solid_fill_property(). It
  *	contains pixel data that drivers can use to fill a plane.
@@ -642,6 +645,7 @@ EXPORT_SYMBOL(drm_plane_create_blend_mode_property);
 static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
 	{ DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" },
 	{ DRM_PLANE_PIXEL_SOURCE_FB, "FB" },
+	{ DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, "SOLID_FILL" },
 };
 
 /**
@@ -666,6 +670,9 @@ static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
  * "FB":
  *	Framebuffer pixel source
  *
+ * "SOLID_FILL":
+ * 	Solid fill color pixel source
+ *
  * Returns:
  * Zero on success, negative errno on failure.
  */
@@ -675,7 +682,8 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane,
 	struct drm_device *dev = plane->dev;
 	struct drm_property *prop;
 	static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) |
-						      BIT(DRM_PLANE_PIXEL_SOURCE_NONE);
+						      BIT(DRM_PLANE_PIXEL_SOURCE_NONE) |
+						      BIT(DRM_PLANE_PIXEL_SOURCE_SOLID_FILL);
 	int i;
 
 	/* FB is supported by default */
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 5bac644d4cc34..4b7af4381bbe4 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -43,6 +43,7 @@ enum drm_scaling_filter {
 enum drm_plane_pixel_source {
 	DRM_PLANE_PIXEL_SOURCE_NONE,
 	DRM_PLANE_PIXEL_SOURCE_FB,
+	DRM_PLANE_PIXEL_SOURCE_SOLID_FILL,
 	DRM_PLANE_PIXEL_SOURCE_MAX
 };
 
-- 
GitLab


From 8283ac7871a959848e09fc6593b8c12b8febfee6 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:54 -0700
Subject: [PATCH 0676/2340] drm/atomic: Add pixel source to plane state dump

Add pixel source to the atomic plane state dump

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Sebastian Wick <sebastian@sebastianwick.net>
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-4-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_atomic.c        | 1 +
 drivers/gpu/drm/drm_blend.c         | 1 +
 drivers/gpu/drm/drm_crtc_internal.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index f1a503aafe5aa..02aa7df832cc0 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -722,6 +722,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 
 	drm_printf(p, "plane[%u]: %s\n", plane->base.id, plane->name);
 	drm_printf(p, "\tcrtc=%s\n", state->crtc ? state->crtc->name : "(null)");
+	drm_printf(p, "\tpixel-source=%s\n", drm_get_pixel_source_name(state->pixel_source));
 	drm_printf(p, "\tfb=%u\n", state->fb ? state->fb->base.id : 0);
 	if (state->fb)
 		drm_framebuffer_print_info(p, 2, state->fb);
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 37b31b7e5ce5d..9c1608f7c1df7 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -647,6 +647,7 @@ static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
 	{ DRM_PLANE_PIXEL_SOURCE_FB, "FB" },
 	{ DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, "SOLID_FILL" },
 };
+DRM_ENUM_NAME_FN(drm_get_pixel_source_name, drm_pixel_source_enum_list);
 
 /**
  * drm_plane_create_pixel_source_property - create a new pixel source property
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index a514d5207e419..4114675c07282 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -269,6 +269,7 @@ int drm_plane_check_pixel_format(struct drm_plane *plane,
 				 u32 format, u64 modifier);
 struct drm_mode_rect *
 __drm_plane_get_damage_clips(const struct drm_plane_state *state);
+const char *drm_get_pixel_source_name(int val);
 
 /* drm_bridge.c */
 void drm_bridge_detach(struct drm_bridge *bridge);
-- 
GitLab


From e86413f5442ee094e66b3e75f2d3419ed0df9520 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:55 -0700
Subject: [PATCH 0677/2340] drm/atomic: Add solid fill data to plane state dump

Add solid_fill property data to the atomic plane state dump.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Sebastian Wick <sebastian@sebastianwick.net>
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-5-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_atomic.c | 4 ++++
 drivers/gpu/drm/drm_plane.c  | 8 ++++++++
 include/drm/drm_plane.h      | 3 +++
 3 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 02aa7df832cc0..1339785fbe80d 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -726,6 +726,10 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 	drm_printf(p, "\tfb=%u\n", state->fb ? state->fb->base.id : 0);
 	if (state->fb)
 		drm_framebuffer_print_info(p, 2, state->fb);
+	drm_printf(p, "\tsolid_fill=%u\n",
+			state->solid_fill_blob ? state->solid_fill_blob->base.id : 0);
+	if (state->solid_fill_blob)
+		drm_plane_solid_fill_print_info(p, 2, state);
 	drm_printf(p, "\tcrtc-pos=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&dest));
 	drm_printf(p, "\tsrc-pos=" DRM_RECT_FP_FMT "\n", DRM_RECT_FP_ARG(&src));
 	drm_printf(p, "\trotation=%x\n", state->rotation);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index c8dbe5ae60cc6..c65c72701dd29 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1641,6 +1641,14 @@ __drm_plane_get_damage_clips(const struct drm_plane_state *state)
 					state->fb_damage_clips->data : NULL);
 }
 
+void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent,
+				     const struct drm_plane_state *state)
+{
+	drm_printf_indent(p, indent, "r=0x%08x\n", state->solid_fill.r);
+	drm_printf_indent(p, indent, "g=0x%08x\n", state->solid_fill.g);
+	drm_printf_indent(p, indent, "b=0x%08x\n", state->solid_fill.b);
+}
+
 /**
  * drm_plane_get_damage_clips - Returns damage clips.
  * @state: Plane state.
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 4b7af4381bbe4..d14e2f1db2343 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -1025,6 +1025,9 @@ drm_plane_get_damage_clips_count(const struct drm_plane_state *state);
 struct drm_mode_rect *
 drm_plane_get_damage_clips(const struct drm_plane_state *state);
 
+void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent,
+				     const struct drm_plane_state *state);
+
 int drm_plane_create_scaling_filter_property(struct drm_plane *plane,
 					     unsigned int supported_filters);
 
-- 
GitLab


From 4ba6b7a646321e740c7f2d80c90505019c4e8fce Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:56 -0700
Subject: [PATCH 0678/2340] drm/atomic: Move framebuffer checks to helper

Currently framebuffer checks happen directly in
drm_atomic_plane_check(). Move these checks into their own helper
method.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Sebastian Wick <sebastian@sebastianwick.net>
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-6-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_atomic.c | 130 ++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 1339785fbe80d..c6f2b86c48ae2 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -589,6 +589,76 @@ plane_switching_crtc(const struct drm_plane_state *old_plane_state,
 	return true;
 }
 
+static int drm_atomic_plane_check_fb(const struct drm_plane_state *state)
+{
+	struct drm_plane *plane = state->plane;
+	const struct drm_framebuffer *fb = state->fb;
+	struct drm_mode_rect *clips;
+
+	uint32_t num_clips;
+	unsigned int fb_width, fb_height;
+	int ret;
+
+	/* Check whether this plane supports the fb pixel format. */
+	ret = drm_plane_check_pixel_format(plane, fb->format->format,
+					   fb->modifier);
+
+	if (ret) {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] invalid pixel format %p4cc, modifier 0x%llx\n",
+			       plane->base.id, plane->name,
+			       &fb->format->format, fb->modifier);
+		return ret;
+	}
+
+	fb_width = fb->width << 16;
+	fb_height = fb->height << 16;
+
+	/* Make sure source coordinates are inside the fb. */
+	if (state->src_w > fb_width ||
+	    state->src_x > fb_width - state->src_w ||
+	    state->src_h > fb_height ||
+	    state->src_y > fb_height - state->src_h) {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] invalid source coordinates "
+			       "%u.%06ux%u.%06u+%u.%06u+%u.%06u (fb %ux%u)\n",
+			       plane->base.id, plane->name,
+			       state->src_w >> 16,
+			       ((state->src_w & 0xffff) * 15625) >> 10,
+			       state->src_h >> 16,
+			       ((state->src_h & 0xffff) * 15625) >> 10,
+			       state->src_x >> 16,
+			       ((state->src_x & 0xffff) * 15625) >> 10,
+			       state->src_y >> 16,
+			       ((state->src_y & 0xffff) * 15625) >> 10,
+			       fb->width, fb->height);
+		return -ENOSPC;
+	}
+
+	clips = __drm_plane_get_damage_clips(state);
+	num_clips = drm_plane_get_damage_clips_count(state);
+
+	/* Make sure damage clips are valid and inside the fb. */
+	while (num_clips > 0) {
+		if (clips->x1 >= clips->x2 ||
+		    clips->y1 >= clips->y2 ||
+		    clips->x1 < 0 ||
+		    clips->y1 < 0 ||
+		    clips->x2 > fb_width ||
+		    clips->y2 > fb_height) {
+			drm_dbg_atomic(plane->dev,
+				       "[PLANE:%d:%s] invalid damage clip %d %d %d %d\n",
+				       plane->base.id, plane->name, clips->x1,
+				       clips->y1, clips->x2, clips->y2);
+			return -EINVAL;
+		}
+		clips++;
+		num_clips--;
+	}
+
+	return 0;
+}
+
 /**
  * drm_atomic_plane_check - check plane state
  * @old_plane_state: old plane state to check
@@ -605,9 +675,6 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 	struct drm_plane *plane = new_plane_state->plane;
 	struct drm_crtc *crtc = new_plane_state->crtc;
 	const struct drm_framebuffer *fb = new_plane_state->fb;
-	unsigned int fb_width, fb_height;
-	struct drm_mode_rect *clips;
-	uint32_t num_clips;
 	int ret;
 
 	/* either *both* CRTC and FB must be set, or neither */
@@ -634,17 +701,6 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 		return -EINVAL;
 	}
 
-	/* Check whether this plane supports the fb pixel format. */
-	ret = drm_plane_check_pixel_format(plane, fb->format->format,
-					   fb->modifier);
-	if (ret) {
-		drm_dbg_atomic(plane->dev,
-			       "[PLANE:%d:%s] invalid pixel format %p4cc, modifier 0x%llx\n",
-			       plane->base.id, plane->name,
-			       &fb->format->format, fb->modifier);
-		return ret;
-	}
-
 	/* Give drivers some help against integer overflows */
 	if (new_plane_state->crtc_w > INT_MAX ||
 	    new_plane_state->crtc_x > INT_MAX - (int32_t) new_plane_state->crtc_w ||
@@ -658,50 +714,10 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 		return -ERANGE;
 	}
 
-	fb_width = fb->width << 16;
-	fb_height = fb->height << 16;
 
-	/* Make sure source coordinates are inside the fb. */
-	if (new_plane_state->src_w > fb_width ||
-	    new_plane_state->src_x > fb_width - new_plane_state->src_w ||
-	    new_plane_state->src_h > fb_height ||
-	    new_plane_state->src_y > fb_height - new_plane_state->src_h) {
-		drm_dbg_atomic(plane->dev,
-			       "[PLANE:%d:%s] invalid source coordinates "
-			       "%u.%06ux%u.%06u+%u.%06u+%u.%06u (fb %ux%u)\n",
-			       plane->base.id, plane->name,
-			       new_plane_state->src_w >> 16,
-			       ((new_plane_state->src_w & 0xffff) * 15625) >> 10,
-			       new_plane_state->src_h >> 16,
-			       ((new_plane_state->src_h & 0xffff) * 15625) >> 10,
-			       new_plane_state->src_x >> 16,
-			       ((new_plane_state->src_x & 0xffff) * 15625) >> 10,
-			       new_plane_state->src_y >> 16,
-			       ((new_plane_state->src_y & 0xffff) * 15625) >> 10,
-			       fb->width, fb->height);
-		return -ENOSPC;
-	}
-
-	clips = __drm_plane_get_damage_clips(new_plane_state);
-	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
-
-	/* Make sure damage clips are valid and inside the fb. */
-	while (num_clips > 0) {
-		if (clips->x1 >= clips->x2 ||
-		    clips->y1 >= clips->y2 ||
-		    clips->x1 < 0 ||
-		    clips->y1 < 0 ||
-		    clips->x2 > fb_width ||
-		    clips->y2 > fb_height) {
-			drm_dbg_atomic(plane->dev,
-				       "[PLANE:%d:%s] invalid damage clip %d %d %d %d\n",
-				       plane->base.id, plane->name, clips->x1,
-				       clips->y1, clips->x2, clips->y2);
-			return -EINVAL;
-		}
-		clips++;
-		num_clips--;
-	}
+	ret = drm_atomic_plane_check_fb(new_plane_state);
+	if (ret)
+		return ret;
 
 	if (plane_switching_crtc(old_plane_state, new_plane_state)) {
 		drm_dbg_atomic(plane->dev,
-- 
GitLab


From f1e75da5364e780905d9cd6043f9c74cdcf84073 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Fri, 27 Oct 2023 15:32:57 -0700
Subject: [PATCH 0679/2340] drm/atomic: Loosen FB atomic checks

Loosen the requirements for atomic and legacy commit so that, in cases
where pixel_source != FB, the commit can still go through.

This includes adding framebuffer NULL checks in other areas to account for
FB being NULL when non-FB pixel sources are enabled.

To disable a plane, the pixel_source must be NONE or the FB must be NULL
if pixel_source == FB.

Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-7-780188bfa7b2@quicinc.com
---
 drivers/gpu/drm/drm_atomic.c        | 21 ++++++++--------
 drivers/gpu/drm/drm_atomic_helper.c | 39 ++++++++++++++++-------------
 include/drm/drm_atomic_helper.h     |  4 +--
 include/drm/drm_plane.h             | 29 +++++++++++++++++++++
 4 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c6f2b86c48ae2..aed0a694c74c4 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -674,17 +674,16 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 {
 	struct drm_plane *plane = new_plane_state->plane;
 	struct drm_crtc *crtc = new_plane_state->crtc;
-	const struct drm_framebuffer *fb = new_plane_state->fb;
 	int ret;
 
-	/* either *both* CRTC and FB must be set, or neither */
-	if (crtc && !fb) {
-		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no FB\n",
+	/* either *both* CRTC and pixel source must be set, or neither */
+	if (crtc && !drm_plane_has_visible_data(new_plane_state)) {
+		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no visible data\n",
 			       plane->base.id, plane->name);
 		return -EINVAL;
-	} else if (fb && !crtc) {
-		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] FB set but no CRTC\n",
-			       plane->base.id, plane->name);
+	} else if (drm_plane_has_visible_data(new_plane_state) && !crtc) {
+		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] Source %d has visible data but no CRTC\n",
+			       plane->base.id, plane->name, new_plane_state->pixel_source);
 		return -EINVAL;
 	}
 
@@ -715,9 +714,11 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 	}
 
 
-	ret = drm_atomic_plane_check_fb(new_plane_state);
-	if (ret)
-		return ret;
+	if (new_plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && new_plane_state->fb) {
+		ret = drm_atomic_plane_check_fb(new_plane_state);
+		if (ret)
+			return ret;
+	}
 
 	if (plane_switching_crtc(old_plane_state, new_plane_state)) {
 		drm_dbg_atomic(plane->dev,
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c3f677130def0..dc048988e3f3c 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -861,7 +861,6 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 					bool can_position,
 					bool can_update_disabled)
 {
-	struct drm_framebuffer *fb = plane_state->fb;
 	struct drm_rect *src = &plane_state->src;
 	struct drm_rect *dst = &plane_state->dst;
 	unsigned int rotation = plane_state->rotation;
@@ -873,7 +872,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 	*src = drm_plane_state_src(plane_state);
 	*dst = drm_plane_state_dest(plane_state);
 
-	if (!fb) {
+	if (!drm_plane_has_visible_data(plane_state)) {
 		plane_state->visible = false;
 		return 0;
 	}
@@ -890,25 +889,31 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 		return -EINVAL;
 	}
 
-	drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation);
+	/* Check that selected pixel source is valid */
+	if (plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && plane_state->fb) {
+		struct drm_framebuffer *fb = plane_state->fb;
+		drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation);
 
-	/* Check scaling */
-	hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
-	vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
-	if (hscale < 0 || vscale < 0) {
-		drm_dbg_kms(plane_state->plane->dev,
-			    "Invalid scaling of plane\n");
-		drm_rect_debug_print("src: ", &plane_state->src, true);
-		drm_rect_debug_print("dst: ", &plane_state->dst, false);
-		return -ERANGE;
-	}
+		/* Check scaling */
+		hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
+		vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
 
-	if (crtc_state->enable)
-		drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2);
+		if (hscale < 0 || vscale < 0) {
+			drm_dbg_kms(plane_state->plane->dev,
+					"Invalid scaling of plane\n");
+			drm_rect_debug_print("src: ", &plane_state->src, true);
+			drm_rect_debug_print("dst: ", &plane_state->dst, false);
+			return -ERANGE;
+		}
 
-	plane_state->visible = drm_rect_clip_scaled(src, dst, &clip);
+		if (crtc_state->enable)
+			drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2);
 
-	drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation);
+		plane_state->visible = drm_rect_clip_scaled(src, dst, &clip);
+		drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation);
+	} else if (drm_plane_solid_fill_enabled(plane_state)) {
+		plane_state->visible = true;
+	}
 
 	if (!plane_state->visible)
 		/*
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 536a0b0091c3a..6d97f38ac1f67 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -256,8 +256,8 @@ drm_atomic_plane_disabling(struct drm_plane_state *old_plane_state,
 	 * Anything else should be considered a bug in the atomic core, so we
 	 * gently warn about it.
 	 */
-	WARN_ON((new_plane_state->crtc == NULL && new_plane_state->fb != NULL) ||
-		(new_plane_state->crtc != NULL && new_plane_state->fb == NULL));
+	WARN_ON((new_plane_state->crtc == NULL && drm_plane_has_visible_data(new_plane_state)) ||
+		(new_plane_state->crtc != NULL && !drm_plane_has_visible_data(new_plane_state)));
 
 	return old_plane_state->crtc && !new_plane_state->crtc;
 }
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index d14e2f1db2343..3b187f3f54664 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -1016,6 +1016,35 @@ static inline struct drm_plane *drm_plane_find(struct drm_device *dev,
 #define drm_for_each_plane(plane, dev) \
 	list_for_each_entry(plane, &(dev)->mode_config.plane_list, head)
 
+/**
+ * drm_plane_solid_fill_enabled - Check if solid fill is enabled on plane
+ * @state: plane state
+ *
+ * Returns:
+ * Whether the plane has been assigned a solid_fill_blob
+ */
+static inline bool drm_plane_solid_fill_enabled(struct drm_plane_state *state)
+{
+	if (!state)
+		return false;
+	return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_SOLID_FILL && state->solid_fill_blob;
+}
+
+static inline bool drm_plane_has_visible_data(const struct drm_plane_state *state)
+{
+	switch (state->pixel_source) {
+	case DRM_PLANE_PIXEL_SOURCE_NONE:
+		return false;
+	case DRM_PLANE_PIXEL_SOURCE_SOLID_FILL:
+		return state->solid_fill_blob != NULL;
+	case DRM_PLANE_PIXEL_SOURCE_FB:
+	default:
+		WARN_ON(state->pixel_source != DRM_PLANE_PIXEL_SOURCE_FB);
+	}
+
+	return state->fb != NULL;
+}
+
 bool drm_any_plane_has_format(struct drm_device *dev,
 			      u32 format, u64 modifier);
 
-- 
GitLab


From 5d86c15c3171c3ecebd84d53e30d9812b5591c84 Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Sat, 2 Dec 2023 13:51:42 +0100
Subject: [PATCH 0680/2340] dt-bindings: gpu: mali-utgard: Add Rockchip RK3128
 compatible

Rockchip RK312x SoC family has a Mali400 MP2.
Add a compatible for it.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231202125144.66052-4-knaerzche@gmail.com
---
 Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml
index 0fae1ef013be8..abd4aa335fbce 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml
@@ -29,6 +29,7 @@ properties:
               - allwinner,sun50i-a64-mali
               - rockchip,rk3036-mali
               - rockchip,rk3066-mali
+              - rockchip,rk3128-mali
               - rockchip,rk3188-mali
               - rockchip,rk3228-mali
               - samsung,exynos4210-mali
-- 
GitLab


From ab8420418c2e67c81549ce483ff6a472c6dc2833 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 19 Oct 2023 13:44:19 +0300
Subject: [PATCH 0681/2340] drm/msm/dp: cleanup debugfs handling

Currently there are two subdirs for DP debugfs files, e.g. DP-1, created
by the drm core for the connector, and the msm_dp-DP-1, created by the
DP driver itself. Merge those two, so that there are no extraneous
connector-related subdirs.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/563523/
Link: https://lore.kernel.org/r/20231019104419.1032329-1-dmitry.baryshkov@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 11 ----
 drivers/gpu/drm/msm/dp/dp_debug.c       | 69 ++++++-------------------
 drivers/gpu/drm/msm/dp/dp_debug.h       | 23 +++------
 drivers/gpu/drm/msm/dp/dp_display.c     |  5 +-
 drivers/gpu/drm/msm/dp/dp_display.h     |  1 +
 drivers/gpu/drm/msm/dp/dp_drm.c         | 16 ++++++
 drivers/gpu/drm/msm/msm_drv.h           |  6 ---
 7 files changed, 42 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index fe7267b3bff53..babec724c76a5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -274,9 +274,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
 	void *p = dpu_hw_util_get_log_mask_ptr();
 	struct dentry *entry;
-	struct drm_device *dev;
-	struct msm_drm_private *priv;
-	int i;
 
 	if (!p)
 		return -EINVAL;
@@ -285,9 +282,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 	if (minor->type != DRM_MINOR_PRIMARY)
 		return 0;
 
-	dev = dpu_kms->dev;
-	priv = dev->dev_private;
-
 	entry = debugfs_create_dir("debug", minor->debugfs_root);
 
 	debugfs_create_x32(DPU_DEBUGFS_HWMASKNAME, 0600, entry, p);
@@ -297,11 +291,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 	dpu_debugfs_core_irq_init(dpu_kms, entry);
 	dpu_debugfs_sspp_init(dpu_kms, entry);
 
-	for (i = 0; i < ARRAY_SIZE(priv->dp); i++) {
-		if (priv->dp[i])
-			msm_dp_debugfs_init(priv->dp[i], minor);
-	}
-
 	return dpu_core_perf_debugfs_init(dpu_kms, entry);
 }
 #endif
diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c b/drivers/gpu/drm/msm/dp/dp_debug.c
index 3bba901afe335..6c281dc095b9a 100644
--- a/drivers/gpu/drm/msm/dp/dp_debug.c
+++ b/drivers/gpu/drm/msm/dp/dp_debug.c
@@ -19,13 +19,9 @@
 #define DEBUG_NAME "msm_dp"
 
 struct dp_debug_private {
-	struct dentry *root;
-
 	struct dp_link *link;
 	struct dp_panel *panel;
 	struct drm_connector *connector;
-	struct device *dev;
-	struct drm_device *drm_dev;
 
 	struct dp_debug dp_debug;
 };
@@ -204,35 +200,33 @@ static const struct file_operations test_active_fops = {
 	.write = dp_test_active_write
 };
 
-static void dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor)
+static void dp_debug_init(struct dp_debug *dp_debug, struct dentry *root, bool is_edp)
 {
-	char path[64];
 	struct dp_debug_private *debug = container_of(dp_debug,
 			struct dp_debug_private, dp_debug);
 
-	snprintf(path, sizeof(path), "msm_dp-%s", debug->connector->name);
-
-	debug->root = debugfs_create_dir(path, minor->debugfs_root);
-
-	debugfs_create_file("dp_debug", 0444, debug->root,
+	debugfs_create_file("dp_debug", 0444, root,
 			debug, &dp_debug_fops);
 
-	debugfs_create_file("msm_dp_test_active", 0444,
-			debug->root,
-			debug, &test_active_fops);
+	if (!is_edp) {
+		debugfs_create_file("msm_dp_test_active", 0444,
+				    root,
+				    debug, &test_active_fops);
 
-	debugfs_create_file("msm_dp_test_data", 0444,
-			debug->root,
-			debug, &dp_test_data_fops);
+		debugfs_create_file("msm_dp_test_data", 0444,
+				    root,
+				    debug, &dp_test_data_fops);
 
-	debugfs_create_file("msm_dp_test_type", 0444,
-			debug->root,
-			debug, &dp_test_type_fops);
+		debugfs_create_file("msm_dp_test_type", 0444,
+				    root,
+				    debug, &dp_test_type_fops);
+	}
 }
 
 struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 		struct dp_link *link,
-		struct drm_connector *connector, struct drm_minor *minor)
+		struct drm_connector *connector,
+		struct dentry *root, bool is_edp)
 {
 	struct dp_debug_private *debug;
 	struct dp_debug *dp_debug;
@@ -253,46 +247,15 @@ struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 	debug->dp_debug.debug_en = false;
 	debug->link = link;
 	debug->panel = panel;
-	debug->dev = dev;
-	debug->drm_dev = minor->dev;
-	debug->connector = connector;
 
 	dp_debug = &debug->dp_debug;
 	dp_debug->vdisplay = 0;
 	dp_debug->hdisplay = 0;
 	dp_debug->vrefresh = 0;
 
-	dp_debug_init(dp_debug, minor);
+	dp_debug_init(dp_debug, root, is_edp);
 
 	return dp_debug;
  error:
 	return ERR_PTR(rc);
 }
-
-static int dp_debug_deinit(struct dp_debug *dp_debug)
-{
-	struct dp_debug_private *debug;
-
-	if (!dp_debug)
-		return -EINVAL;
-
-	debug = container_of(dp_debug, struct dp_debug_private, dp_debug);
-
-	debugfs_remove_recursive(debug->root);
-
-	return 0;
-}
-
-void dp_debug_put(struct dp_debug *dp_debug)
-{
-	struct dp_debug_private *debug;
-
-	if (!dp_debug)
-		return;
-
-	debug = container_of(dp_debug, struct dp_debug_private, dp_debug);
-
-	dp_debug_deinit(dp_debug);
-
-	devm_kfree(debug->dev, debug);
-}
diff --git a/drivers/gpu/drm/msm/dp/dp_debug.h b/drivers/gpu/drm/msm/dp/dp_debug.h
index 124227873d58c..9b3b2e702f655 100644
--- a/drivers/gpu/drm/msm/dp/dp_debug.h
+++ b/drivers/gpu/drm/msm/dp/dp_debug.h
@@ -34,7 +34,8 @@ struct dp_debug {
  * @panel: instance of panel module
  * @link: instance of link module
  * @connector: double pointer to display connector
- * @minor: pointer to drm minor number after device registration
+ * @root: connector's debugfs root
+ * @is_edp: set for eDP connectors / panels
  * return: pointer to allocated debug module data
  *
  * This function sets up the debug module and provides a way
@@ -43,31 +44,21 @@ struct dp_debug {
 struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 		struct dp_link *link,
 		struct drm_connector *connector,
-		struct drm_minor *minor);
-
-/**
- * dp_debug_put()
- *
- * Cleans up dp_debug instance
- *
- * @dp_debug: instance of dp_debug
- */
-void dp_debug_put(struct dp_debug *dp_debug);
+		struct dentry *root,
+		bool is_edp);
 
 #else
 
 static inline
 struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 		struct dp_link *link,
-		struct drm_connector *connector, struct drm_minor *minor)
+		struct drm_connector *connector,
+		struct dentry *root,
+		bool is_edp)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void dp_debug_put(struct dp_debug *dp_debug)
-{
-}
-
 #endif /* defined(CONFIG_DEBUG_FS) */
 
 #endif /* _DP_DEBUG_H_ */
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index e329e03e068d5..2ac9d61501c75 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -715,7 +715,6 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
 
 static void dp_display_deinit_sub_modules(struct dp_display_private *dp)
 {
-	dp_debug_put(dp->debug);
 	dp_audio_put(dp->audio);
 	dp_panel_put(dp->panel);
 	dp_aux_put(dp->aux);
@@ -1451,7 +1450,7 @@ bool msm_dp_wide_bus_available(const struct msm_dp *dp_display)
 	return dp->wide_bus_en;
 }
 
-void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor)
+void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *root, bool is_edp)
 {
 	struct dp_display_private *dp;
 	struct device *dev;
@@ -1462,7 +1461,7 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor)
 
 	dp->debug = dp_debug_get(dev, dp->panel,
 					dp->link, dp->dp_display.connector,
-					minor);
+					root, is_edp);
 	if (IS_ERR(dp->debug)) {
 		rc = PTR_ERR(dp->debug);
 		DRM_ERROR("failed to initialize debug, rc = %d\n", rc);
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h
index f66cdbc357856..5e2fbd8318e93 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -42,5 +42,6 @@ int dp_display_get_test_bpp(struct msm_dp *dp_display);
 void dp_display_signal_audio_start(struct msm_dp *dp_display);
 void dp_display_signal_audio_complete(struct msm_dp *dp_display);
 void dp_display_set_psr(struct msm_dp *dp, bool enter);
+void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *dentry, bool is_edp);
 
 #endif /* _DP_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c
index 40e7344180e3e..21850a384b10b 100644
--- a/drivers/gpu/drm/msm/dp/dp_drm.c
+++ b/drivers/gpu/drm/msm/dp/dp_drm.c
@@ -90,6 +90,13 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *
 	return rc;
 }
 
+static void dp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root)
+{
+	struct msm_dp *dp = to_dp_bridge(bridge)->dp_display;
+
+	dp_display_debugfs_init(dp, root, false);
+}
+
 static const struct drm_bridge_funcs dp_bridge_ops = {
 	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
 	.atomic_destroy_state   = drm_atomic_helper_bridge_destroy_state,
@@ -105,6 +112,7 @@ static const struct drm_bridge_funcs dp_bridge_ops = {
 	.hpd_enable   = dp_bridge_hpd_enable,
 	.hpd_disable  = dp_bridge_hpd_disable,
 	.hpd_notify   = dp_bridge_hpd_notify,
+	.debugfs_init = dp_bridge_debugfs_init,
 };
 
 static int edp_bridge_atomic_check(struct drm_bridge *drm_bridge,
@@ -260,6 +268,13 @@ static enum drm_mode_status edp_bridge_mode_valid(struct drm_bridge *bridge,
 	return MODE_OK;
 }
 
+static void edp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root)
+{
+	struct msm_dp *dp = to_dp_bridge(bridge)->dp_display;
+
+	dp_display_debugfs_init(dp, root, true);
+}
+
 static const struct drm_bridge_funcs edp_bridge_ops = {
 	.atomic_enable = edp_bridge_atomic_enable,
 	.atomic_disable = edp_bridge_atomic_disable,
@@ -270,6 +285,7 @@ static const struct drm_bridge_funcs edp_bridge_ops = {
 	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
 	.atomic_check = edp_bridge_atomic_check,
+	.debugfs_init = edp_bridge_debugfs_init,
 };
 
 int dp_bridge_init(struct msm_dp *dp_display, struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index cd5bf658df669..628ef3e663ea4 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -389,7 +389,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 void msm_dp_irq_postinstall(struct msm_dp *dp_display);
 void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display);
 
-void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor);
 bool msm_dp_wide_bus_available(const struct msm_dp *dp_display);
 
 #else
@@ -415,11 +414,6 @@ static inline void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm
 {
 }
 
-static inline void msm_dp_debugfs_init(struct msm_dp *dp_display,
-		struct drm_minor *minor)
-{
-}
-
 static inline bool msm_dp_wide_bus_available(const struct msm_dp *dp_display)
 {
 	return false;
-- 
GitLab


From 062aeadeba1d3822b1704235de9a0a0024a78683 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:52 +0300
Subject: [PATCH 0682/2340] drm/msm/mdp5: use devres-managed allocation for
 configuration data

Use devm_kzalloc to create configuration data structure. This allows us
to remove corresponding kfree and drop mdp5_cfg_destroy() function.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546156/
Link: https://lore.kernel.org/r/20230708010407.3871346-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c | 24 +++++-------------------
 drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h |  1 -
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c |  2 --
 3 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
index 694d543413374..c5179e4c393ca 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
@@ -1350,23 +1350,17 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_handler)
 	return cfg_handler->revision;
 }
 
-void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_handler)
-{
-	kfree(cfg_handler);
-}
-
 struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 		uint32_t major, uint32_t minor)
 {
 	struct drm_device *dev = mdp5_kms->dev;
 	struct mdp5_cfg_handler *cfg_handler;
 	const struct mdp5_cfg_handler *cfg_handlers;
-	int i, ret = 0, num_handlers;
+	int i, num_handlers;
 
-	cfg_handler = kzalloc(sizeof(*cfg_handler), GFP_KERNEL);
+	cfg_handler = devm_kzalloc(dev->dev, sizeof(*cfg_handler), GFP_KERNEL);
 	if (unlikely(!cfg_handler)) {
-		ret = -ENOMEM;
-		goto fail;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	switch (major) {
@@ -1381,8 +1375,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	default:
 		DRM_DEV_ERROR(dev->dev, "unexpected MDP major version: v%d.%d\n",
 				major, minor);
-		ret = -ENXIO;
-		goto fail;
+		return ERR_PTR(-ENXIO);
 	}
 
 	/* only after mdp5_cfg global pointer's init can we access the hw */
@@ -1396,8 +1389,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	if (unlikely(!mdp5_cfg)) {
 		DRM_DEV_ERROR(dev->dev, "unexpected MDP minor revision: v%d.%d\n",
 				major, minor);
-		ret = -ENXIO;
-		goto fail;
+		return ERR_PTR(-ENXIO);
 	}
 
 	cfg_handler->revision = minor;
@@ -1406,10 +1398,4 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	DBG("MDP5: %s hw config selected", mdp5_cfg->name);
 
 	return cfg_handler;
-
-fail:
-	if (cfg_handler)
-		mdp5_cfg_destroy(cfg_handler);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h
index c2502cc338646..26c5d8b4ab463 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h
@@ -121,6 +121,5 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_hnd);
 
 struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 		uint32_t major, uint32_t minor);
-void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_hnd);
 
 #endif /* __MDP5_CFG_H__ */
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 11d9fc2c6bf5e..9f0467c8f1728 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -629,8 +629,6 @@ static void mdp5_destroy(struct mdp5_kms *mdp5_kms)
 		mdp5_ctlm_destroy(mdp5_kms->ctlm);
 	if (mdp5_kms->smp)
 		mdp5_smp_destroy(mdp5_kms->smp);
-	if (mdp5_kms->cfg)
-		mdp5_cfg_destroy(mdp5_kms->cfg);
 
 	for (i = 0; i < mdp5_kms->num_intfs; i++)
 		kfree(mdp5_kms->intfs[i]);
-- 
GitLab


From 4c1f4c1f1b43dbe399b93cf712ce31cc0b5181e6 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:53 +0300
Subject: [PATCH 0683/2340] drm/msm/mdp5: use devres-managed allocation for CTL
 manager data

Use devm_kzalloc to create CTL manager data structure. This allows us
to remove corresponding kfree and drop mdp5_ctlm_destroy() function.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546159/
Link: https://lore.kernel.org/r/20230708010407.3871346-4-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c | 21 ++++-----------------
 drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h |  1 -
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c |  2 --
 3 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
index 1220f2b20e05c..666de99a46a5b 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
@@ -681,11 +681,6 @@ void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctl_mgr)
 	}
 }
 
-void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctl_mgr)
-{
-	kfree(ctl_mgr);
-}
-
 struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 		void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd)
 {
@@ -697,18 +692,16 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 	unsigned long flags;
 	int c, ret;
 
-	ctl_mgr = kzalloc(sizeof(*ctl_mgr), GFP_KERNEL);
+	ctl_mgr = devm_kzalloc(dev->dev, sizeof(*ctl_mgr), GFP_KERNEL);
 	if (!ctl_mgr) {
 		DRM_DEV_ERROR(dev->dev, "failed to allocate CTL manager\n");
-		ret = -ENOMEM;
-		goto fail;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	if (WARN_ON(ctl_cfg->count > MAX_CTL)) {
 		DRM_DEV_ERROR(dev->dev, "Increase static pool size to at least %d\n",
 				ctl_cfg->count);
-		ret = -ENOSPC;
-		goto fail;
+		return ERR_PTR(-ENOSPC);
 	}
 
 	/* initialize the CTL manager: */
@@ -727,7 +720,7 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 			DRM_DEV_ERROR(dev->dev, "CTL_%d: base is null!\n", c);
 			ret = -EINVAL;
 			spin_unlock_irqrestore(&ctl_mgr->pool_lock, flags);
-			goto fail;
+			return ERR_PTR(ret);
 		}
 		ctl->ctlm = ctl_mgr;
 		ctl->id = c;
@@ -755,10 +748,4 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 	DBG("Pool of %d CTLs created.", ctl_mgr->nctl);
 
 	return ctl_mgr;
-
-fail:
-	if (ctl_mgr)
-		mdp5_ctlm_destroy(ctl_mgr);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
index c2af68aa77aed..9020e8efc4e4b 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
@@ -17,7 +17,6 @@ struct mdp5_ctl_manager;
 struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 		void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd);
 void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctlm);
-void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctlm);
 
 /*
  * CTL prototypes:
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 9f0467c8f1728..d2bd14e8dadf6 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -625,8 +625,6 @@ static void mdp5_destroy(struct mdp5_kms *mdp5_kms)
 {
 	int i;
 
-	if (mdp5_kms->ctlm)
-		mdp5_ctlm_destroy(mdp5_kms->ctlm);
 	if (mdp5_kms->smp)
 		mdp5_smp_destroy(mdp5_kms->smp);
 
-- 
GitLab


From 1ad175c2c884a8ee56fb669648ef655a5d3f22fe Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:54 +0300
Subject: [PATCH 0684/2340] drm/msm/mdp5: use devres-managed allocation for
 mixer data

Use devm_kzalloc to create mixer data structure. This allows us
to remove corresponding kfree and drop mdp5_mixer_destroy() function.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546166/
Link: https://lore.kernel.org/r/20230708010407.3871346-5-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c   |  5 +----
 drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c | 10 +++-------
 drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h |  4 ++--
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index d2bd14e8dadf6..3189b3b0a7432 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -211,9 +211,6 @@ static void mdp5_kms_destroy(struct msm_kms *kms)
 	struct msm_gem_address_space *aspace = kms->aspace;
 	int i;
 
-	for (i = 0; i < mdp5_kms->num_hwmixers; i++)
-		mdp5_mixer_destroy(mdp5_kms->hwmixers[i]);
-
 	for (i = 0; i < mdp5_kms->num_hwpipes; i++)
 		mdp5_pipe_destroy(mdp5_kms->hwpipes[i]);
 
@@ -720,7 +717,7 @@ static int hwmixer_init(struct mdp5_kms *mdp5_kms)
 	for (i = 0; i < hw_cfg->lm.count; i++) {
 		struct mdp5_hw_mixer *mixer;
 
-		mixer = mdp5_mixer_init(&hw_cfg->lm.instances[i]);
+		mixer = mdp5_mixer_init(dev, &hw_cfg->lm.instances[i]);
 		if (IS_ERR(mixer)) {
 			ret = PTR_ERR(mixer);
 			DRM_DEV_ERROR(dev->dev, "failed to construct LM%d (%d)\n",
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
index 2536def2a0005..2822b533f8077 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
@@ -140,20 +140,16 @@ int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer)
 	return 0;
 }
 
-void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer)
-{
-	kfree(mixer);
-}
-
 static const char * const mixer_names[] = {
 	"LM0", "LM1", "LM2", "LM3", "LM4", "LM5",
 };
 
-struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm)
+struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev,
+				      const struct mdp5_lm_instance *lm)
 {
 	struct mdp5_hw_mixer *mixer;
 
-	mixer = kzalloc(sizeof(*mixer), GFP_KERNEL);
+	mixer = devm_kzalloc(dev->dev, sizeof(*mixer), GFP_KERNEL);
 	if (!mixer)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
index 545ee223b9d74..2bedd75835bcd 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
@@ -25,8 +25,8 @@ struct mdp5_hw_mixer_state {
 	struct drm_crtc *hwmixer_to_crtc[8];
 };
 
-struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm);
-void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm);
+struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev,
+				      const struct mdp5_lm_instance *lm);
 int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc,
 		      uint32_t caps, struct mdp5_hw_mixer **mixer,
 		      struct mdp5_hw_mixer **r_mixer);
-- 
GitLab


From 323e9a18d6e195f44eaaa3d3fe92fce9073fe298 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:55 +0300
Subject: [PATCH 0685/2340] drm/msm/mdp5: use devres-managed allocation for
 pipe data

Use devm_kzalloc to create pipe data structure. This allows us
to remove corresponding kfree and drop mdp5_pipe_destroy() function.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546171/
Link: https://lore.kernel.org/r/20230708010407.3871346-6-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c  |  6 +-----
 drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c | 10 +++-------
 drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h |  4 ++--
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 3189b3b0a7432..b918c9dc0afb6 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -209,10 +209,6 @@ static void mdp5_kms_destroy(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	struct msm_gem_address_space *aspace = kms->aspace;
-	int i;
-
-	for (i = 0; i < mdp5_kms->num_hwpipes; i++)
-		mdp5_pipe_destroy(mdp5_kms->hwpipes[i]);
 
 	if (aspace) {
 		aspace->mmu->funcs->detach(aspace->mmu);
@@ -645,7 +641,7 @@ static int construct_pipes(struct mdp5_kms *mdp5_kms, int cnt,
 	for (i = 0; i < cnt; i++) {
 		struct mdp5_hw_pipe *hwpipe;
 
-		hwpipe = mdp5_pipe_init(pipes[i], offsets[i], caps);
+		hwpipe = mdp5_pipe_init(dev, pipes[i], offsets[i], caps);
 		if (IS_ERR(hwpipe)) {
 			ret = PTR_ERR(hwpipe);
 			DRM_DEV_ERROR(dev->dev, "failed to construct pipe for %s (%d)\n",
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
index e4b8a789835a4..99b2c30b1d486 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
@@ -151,17 +151,13 @@ int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe)
 	return 0;
 }
 
-void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe)
-{
-	kfree(hwpipe);
-}
-
-struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe,
+struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev,
+		enum mdp5_pipe pipe,
 		uint32_t reg_offset, uint32_t caps)
 {
 	struct mdp5_hw_pipe *hwpipe;
 
-	hwpipe = kzalloc(sizeof(*hwpipe), GFP_KERNEL);
+	hwpipe = devm_kzalloc(dev->dev, sizeof(*hwpipe), GFP_KERNEL);
 	if (!hwpipe)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
index cca67938cab21..452138821f600 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
@@ -39,8 +39,8 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane,
 		     struct mdp5_hw_pipe **r_hwpipe);
 int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe);
 
-struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe,
+struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev,
+		enum mdp5_pipe pipe,
 		uint32_t reg_offset, uint32_t caps);
-void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe);
 
 #endif /* __MDP5_PIPE_H__ */
-- 
GitLab


From 531d5313d934325c10721e1d22e352dc4c4a236e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:56 +0300
Subject: [PATCH 0686/2340] drm/msm/mdp5: use devres-managed allocation for SMP
 data

Use devm_kzalloc to create SMP data structure. This allows us
to remove corresponding kfree and drop mdp5_smp_destroy() function.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546172/
Link: https://lore.kernel.org/r/20230708010407.3871346-7-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c |  3 ---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c | 19 ++++---------------
 drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h |  1 -
 3 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index b918c9dc0afb6..4cfb1a8e903ed 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -618,9 +618,6 @@ static void mdp5_destroy(struct mdp5_kms *mdp5_kms)
 {
 	int i;
 
-	if (mdp5_kms->smp)
-		mdp5_smp_destroy(mdp5_kms->smp);
-
 	for (i = 0; i < mdp5_kms->num_intfs; i++)
 		kfree(mdp5_kms->intfs[i]);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
index b68682c1b5bcf..8b59562e29e28 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
@@ -370,23 +370,17 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p)
 		drm_modeset_unlock(&mdp5_kms->glob_state_lock);
 }
 
-void mdp5_smp_destroy(struct mdp5_smp *smp)
-{
-	kfree(smp);
-}
 
 struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg)
 {
+	struct drm_device *dev = mdp5_kms->dev;
 	struct mdp5_smp_state *state;
 	struct mdp5_global_state *global_state;
 	struct mdp5_smp *smp;
-	int ret;
 
-	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
-	if (unlikely(!smp)) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	smp = devm_kzalloc(dev->dev, sizeof(*smp), GFP_KERNEL);
+	if (unlikely(!smp))
+		return ERR_PTR(-ENOMEM);
 
 	smp->dev = mdp5_kms->dev;
 	smp->blk_cnt = cfg->mmb_count;
@@ -400,9 +394,4 @@ struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_
 	memcpy(smp->reserved, cfg->reserved, sizeof(smp->reserved));
 
 	return smp;
-fail:
-	if (smp)
-		mdp5_smp_destroy(smp);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h
index ba5618e136c3d..d8b6a11413d93 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h
@@ -68,7 +68,6 @@ struct mdp5_smp;
 
 struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms,
 		const struct mdp5_smp_block *cfg);
-void  mdp5_smp_destroy(struct mdp5_smp *smp);
 
 void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p);
 
-- 
GitLab


From 6de8288bf668ef4eba1258a523faf32a8d406d9e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:57 +0300
Subject: [PATCH 0687/2340] drm/msm/mdp5: use devres-managed allocation for
 INTF data

Use devm_kzalloc to create INTF data structure. This allows us
to remove corresponding kfree() call.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546163/
Link: https://lore.kernel.org/r/20230708010407.3871346-8-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 4cfb1a8e903ed..48f447f4e1833 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -616,11 +616,6 @@ fail:
 
 static void mdp5_destroy(struct mdp5_kms *mdp5_kms)
 {
-	int i;
-
-	for (i = 0; i < mdp5_kms->num_intfs; i++)
-		kfree(mdp5_kms->intfs[i]);
-
 	if (mdp5_kms->rpm_enabled)
 		pm_runtime_disable(&mdp5_kms->pdev->dev);
 
@@ -741,7 +736,7 @@ static int interface_init(struct mdp5_kms *mdp5_kms)
 		if (intf_types[i] == INTF_DISABLED)
 			continue;
 
-		intf = kzalloc(sizeof(*intf), GFP_KERNEL);
+		intf = devm_kzalloc(dev->dev, sizeof(*intf), GFP_KERNEL);
 		if (!intf) {
 			DRM_DEV_ERROR(dev->dev, "failed to construct INTF%d\n", i);
 			return -ENOMEM;
-- 
GitLab


From 6f235e3d6b1894a808cefdf32e45998a6459794f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:58 +0300
Subject: [PATCH 0688/2340] drm/msm/mdp5: use drmm-managed allocation for
 mdp5_crtc

Change struct mdp5_crtc allocation to use drmm_crtc_alloc(). This
removes the need to perform any actions on CRTC destruction.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546169/
Link: https://lore.kernel.org/r/20230708010407.3871346-9-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 30 +++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index 86036dd4e1e82..4a3db2ea16895 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -13,6 +13,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_flip_work.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -172,14 +173,11 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val)
 	drm_gem_object_put(val);
 }
 
-static void mdp5_crtc_destroy(struct drm_crtc *crtc)
+static void mdp5_crtc_flip_cleanup(struct drm_device *dev, void *ptr)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc *mdp5_crtc = ptr;
 
-	drm_crtc_cleanup(crtc);
 	drm_flip_work_cleanup(&mdp5_crtc->unref_cursor_work);
-
-	kfree(mdp5_crtc);
 }
 
 static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage)
@@ -1147,7 +1145,6 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc)
 
 static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = mdp5_crtc_reset,
 	.atomic_duplicate_state = mdp5_crtc_duplicate_state,
@@ -1161,7 +1158,6 @@ static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
 
 static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = mdp5_crtc_reset,
 	.atomic_duplicate_state = mdp5_crtc_duplicate_state,
@@ -1327,10 +1323,16 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 {
 	struct drm_crtc *crtc = NULL;
 	struct mdp5_crtc *mdp5_crtc;
+	int ret;
 
-	mdp5_crtc = kzalloc(sizeof(*mdp5_crtc), GFP_KERNEL);
-	if (!mdp5_crtc)
-		return ERR_PTR(-ENOMEM);
+	mdp5_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp5_crtc, base,
+						plane, cursor_plane,
+						cursor_plane ?
+						&mdp5_crtc_no_lm_cursor_funcs :
+						&mdp5_crtc_funcs,
+						NULL);
+	if (IS_ERR(mdp5_crtc))
+		return ERR_CAST(mdp5_crtc);
 
 	crtc = &mdp5_crtc->base;
 
@@ -1346,13 +1348,11 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 
 	mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true;
 
-	drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane,
-				  cursor_plane ?
-				  &mdp5_crtc_no_lm_cursor_funcs :
-				  &mdp5_crtc_funcs, NULL);
-
 	drm_flip_work_init(&mdp5_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
+	ret = drmm_add_action_or_reset(dev, mdp5_crtc_flip_cleanup, mdp5_crtc);
+	if (ret)
+		return ERR_PTR(ret);
 
 	drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs);
 
-- 
GitLab


From 669afee4a17ebb758889b1470b993efe9c66d5eb Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:03:59 +0300
Subject: [PATCH 0689/2340] drm/msm/mdp5: use drmm-managed allocation for
 mdp5_encoder

Change struct mdp5_encoder allocation to use drmm_encoder_alloc(). This
removes the need to perform any actions on encoder destruction.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546164/
Link: https://lore.kernel.org/r/20230708010407.3871346-10-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c | 29 +++-----------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
index 79d67c495780f..8db97083e14db 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
@@ -16,17 +16,6 @@ static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp5_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp5_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp5_encoder);
-}
-
-static const struct drm_encoder_funcs mdp5_encoder_funcs = {
-	.destroy = mdp5_encoder_destroy,
-};
-
 static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
 				      struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode)
@@ -342,13 +331,11 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 	struct mdp5_encoder *mdp5_encoder;
 	int enc_type = (intf->type == INTF_DSI) ?
 		DRM_MODE_ENCODER_DSI : DRM_MODE_ENCODER_TMDS;
-	int ret;
 
-	mdp5_encoder = kzalloc(sizeof(*mdp5_encoder), GFP_KERNEL);
-	if (!mdp5_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp5_encoder = drmm_encoder_alloc(dev, struct mdp5_encoder, base,
+					  NULL, enc_type, NULL);
+	if (IS_ERR(mdp5_encoder))
+		return ERR_CAST(mdp5_encoder);
 
 	encoder = &mdp5_encoder->base;
 	mdp5_encoder->ctl = ctl;
@@ -356,15 +343,7 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 
 	spin_lock_init(&mdp5_encoder->intf_lock);
 
-	drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type, NULL);
-
 	drm_encoder_helper_add(encoder, &mdp5_encoder_helper_funcs);
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp5_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
-- 
GitLab


From 54f1fbcb47d45bbd9737cd0115e44fd80acf4073 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:04:01 +0300
Subject: [PATCH 0690/2340] drm/msm/mdp4: use bulk regulators API for LCDC
 encoder

Switch mdp4_lcdc_encoder to using regulator_bulk_* API instead of
enumerating regulators by hand.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546173/
Link: https://lore.kernel.org/r/20230708010407.3871346-12-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 51 ++++++-------------
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
index 10eb3e5b218ef..67c118bb30cad 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
@@ -18,7 +18,7 @@ struct mdp4_lcdc_encoder {
 	struct drm_panel *panel;
 	struct clk *lcdc_clk;
 	unsigned long int pixclock;
-	struct regulator *regs[3];
+	struct regulator_bulk_data regs[3];
 	bool enabled;
 	uint32_t bsc;
 };
@@ -271,12 +271,10 @@ static void mdp4_lcdc_encoder_mode_set(struct drm_encoder *encoder,
 
 static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
 			to_mdp4_lcdc_encoder(encoder);
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
 	struct drm_panel *panel;
-	int i, ret;
 
 	if (WARN_ON(!mdp4_lcdc_encoder->enabled))
 		return;
@@ -301,11 +299,8 @@ static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder)
 
 	clk_disable_unprepare(mdp4_lcdc_encoder->lcdc_clk);
 
-	for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) {
-		ret = regulator_disable(mdp4_lcdc_encoder->regs[i]);
-		if (ret)
-			DRM_DEV_ERROR(dev->dev, "failed to disable regulator: %d\n", ret);
-	}
+	regulator_bulk_disable(ARRAY_SIZE(mdp4_lcdc_encoder->regs),
+			       mdp4_lcdc_encoder->regs);
 
 	mdp4_lcdc_encoder->enabled = false;
 }
@@ -319,7 +314,7 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
 	struct drm_panel *panel;
 	uint32_t config;
-	int i, ret;
+	int ret;
 
 	if (WARN_ON(mdp4_lcdc_encoder->enabled))
 		return;
@@ -339,11 +334,10 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 	mdp4_crtc_set_config(encoder->crtc, config);
 	mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV, 0);
 
-	for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) {
-		ret = regulator_enable(mdp4_lcdc_encoder->regs[i]);
-		if (ret)
-			DRM_DEV_ERROR(dev->dev, "failed to enable regulator: %d\n", ret);
-	}
+	ret = regulator_bulk_enable(ARRAY_SIZE(mdp4_lcdc_encoder->regs),
+				    mdp4_lcdc_encoder->regs);
+	if (ret)
+		DRM_DEV_ERROR(dev->dev, "failed to enable regulators: %d\n", ret);
 
 	DBG("setting lcdc_clk=%lu", pc);
 	ret = clk_set_rate(mdp4_lcdc_encoder->lcdc_clk, pc);
@@ -385,7 +379,6 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
 {
 	struct drm_encoder *encoder = NULL;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder;
-	struct regulator *reg;
 	int ret;
 
 	mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL);
@@ -411,29 +404,15 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
 	}
 
 	/* TODO: different regulators in other cases? */
-	reg = devm_regulator_get(dev->dev, "lvds-vccs-3p3v");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		DRM_DEV_ERROR(dev->dev, "failed to get lvds-vccs-3p3v: %d\n", ret);
-		goto fail;
-	}
-	mdp4_lcdc_encoder->regs[0] = reg;
-
-	reg = devm_regulator_get(dev->dev, "lvds-pll-vdda");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		DRM_DEV_ERROR(dev->dev, "failed to get lvds-pll-vdda: %d\n", ret);
-		goto fail;
-	}
-	mdp4_lcdc_encoder->regs[1] = reg;
+	mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v";
+	mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v";
+	mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda";
 
-	reg = devm_regulator_get(dev->dev, "lvds-vdda");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		DRM_DEV_ERROR(dev->dev, "failed to get lvds-vdda: %d\n", ret);
+	ret = devm_regulator_bulk_get(dev->dev,
+				      ARRAY_SIZE(mdp4_lcdc_encoder->regs),
+				      mdp4_lcdc_encoder->regs);
+	if (ret)
 		goto fail;
-	}
-	mdp4_lcdc_encoder->regs[2] = reg;
 
 	return encoder;
 
-- 
GitLab


From 783ad6e6312fe28a005a3378128cdbbaab211527 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:04:02 +0300
Subject: [PATCH 0691/2340] drm/msm/mdp4: use drmm-managed allocation for
 mdp4_crtc

Change struct mdp4_crtc allocation to use drmm_crtc_alloc(). This
removes the need to perform any actions on CRTC destruction.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546184/
Link: https://lore.kernel.org/r/20230708010407.3871346-13-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 33 ++++++++++++-----------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
index 169f9de4a12a7..5e5c31b44a8a3 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
@@ -6,6 +6,7 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_flip_work.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
@@ -123,16 +124,6 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val)
 	drm_gem_object_put(val);
 }
 
-static void mdp4_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-
-	drm_crtc_cleanup(crtc);
-	drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work);
-
-	kfree(mdp4_crtc);
-}
-
 /* statically (for now) map planes to mixer stage (z-order): */
 static const int idxs[] = {
 		[VG1]  = 1,
@@ -475,7 +466,6 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 
 static const struct drm_crtc_funcs mdp4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = mdp4_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.cursor_set = mdp4_crtc_cursor_set,
 	.cursor_move = mdp4_crtc_cursor_move,
@@ -616,6 +606,13 @@ static const char *dma_names[] = {
 		"DMA_P", "DMA_S", "DMA_E",
 };
 
+static void mdp4_crtc_flip_cleanup(struct drm_device *dev, void *ptr)
+{
+	struct mdp4_crtc *mdp4_crtc = ptr;
+
+	drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work);
+}
+
 /* initialize crtc */
 struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 		struct drm_plane *plane, int id, int ovlp_id,
@@ -623,10 +620,13 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 {
 	struct drm_crtc *crtc = NULL;
 	struct mdp4_crtc *mdp4_crtc;
+	int ret;
 
-	mdp4_crtc = kzalloc(sizeof(*mdp4_crtc), GFP_KERNEL);
-	if (!mdp4_crtc)
-		return ERR_PTR(-ENOMEM);
+	mdp4_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp4_crtc, base,
+						plane, NULL,
+						&mdp4_crtc_funcs, NULL);
+	if (IS_ERR(mdp4_crtc))
+		return ERR_CAST(mdp4_crtc);
 
 	crtc = &mdp4_crtc->base;
 
@@ -648,9 +648,10 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 
 	drm_flip_work_init(&mdp4_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
+	ret = drmm_add_action_or_reset(dev, mdp4_crtc_flip_cleanup, mdp4_crtc);
+	if (ret)
+		return ERR_PTR(ret);
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs,
-				  NULL);
 	drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
 
 	return crtc;
-- 
GitLab


From e79571e8708bdcbd12c85ec0fdef3c6b99e34e0f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:04:03 +0300
Subject: [PATCH 0692/2340] drm/msm/mdp4: use drmm-managed allocation for
 mdp4_dsi_encoder

Change struct mdp4_dsi_encoder allocation to use drmm_encoder_alloc().
This removes the need to perform any actions on this encoder
destruction.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546180/
Link: https://lore.kernel.org/r/20230708010407.3871346-14-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c  | 32 +++----------------
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
index 39b8fe53c29de..74dafe7106be0 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
@@ -26,18 +26,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp4_dsi_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
-
-	drm_encoder_cleanup(encoder);
-	kfree(mdp4_dsi_encoder);
-}
-
-static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = {
-	.destroy = mdp4_dsi_encoder_destroy,
-};
-
 static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder,
 				      struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode)
@@ -148,28 +136,18 @@ static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = {
 /* initialize encoder */
 struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
 {
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *encoder;
 	struct mdp4_dsi_encoder *mdp4_dsi_encoder;
-	int ret;
 
-	mdp4_dsi_encoder = kzalloc(sizeof(*mdp4_dsi_encoder), GFP_KERNEL);
-	if (!mdp4_dsi_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp4_dsi_encoder = drmm_encoder_alloc(dev, struct mdp4_dsi_encoder, base,
+					      NULL, DRM_MODE_ENCODER_DSI, NULL);
+	if (IS_ERR(mdp4_dsi_encoder))
+		return ERR_CAST(mdp4_dsi_encoder);
 
 	encoder = &mdp4_dsi_encoder->base;
 
-	drm_encoder_init(dev, encoder, &mdp4_dsi_encoder_funcs,
-			 DRM_MODE_ENCODER_DSI, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dsi_encoder_helper_funcs);
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp4_dsi_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
 #endif /* CONFIG_DRM_MSM_DSI */
-- 
GitLab


From 93d6e1b82b93df1a7e58cd511663197f1208a8b0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:04:04 +0300
Subject: [PATCH 0693/2340] drm/msm/mdp4: use drmm-managed allocation for
 mdp4_dtv_encoder

Change struct mdp4_dtv_encoder allocation to use drmm_encoder_alloc().
This removes the need to perform any actions on this encoder
destruction.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546178/
Link: https://lore.kernel.org/r/20230708010407.3871346-15-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c  | 37 ++++---------------
 1 file changed, 7 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c
index 88645dbc3785b..3b70764b48c47 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c
@@ -25,17 +25,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp4_dtv_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp4_dtv_encoder);
-}
-
-static const struct drm_encoder_funcs mdp4_dtv_encoder_funcs = {
-	.destroy = mdp4_dtv_encoder_destroy,
-};
-
 static void mdp4_dtv_encoder_mode_set(struct drm_encoder *encoder,
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
@@ -173,41 +162,29 @@ long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 /* initialize encoder */
 struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 {
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *encoder;
 	struct mdp4_dtv_encoder *mdp4_dtv_encoder;
-	int ret;
 
-	mdp4_dtv_encoder = kzalloc(sizeof(*mdp4_dtv_encoder), GFP_KERNEL);
-	if (!mdp4_dtv_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp4_dtv_encoder = drmm_encoder_alloc(dev, struct mdp4_dtv_encoder, base,
+					      NULL, DRM_MODE_ENCODER_TMDS, NULL);
+	if (IS_ERR(mdp4_dtv_encoder))
+		return ERR_CAST(mdp4_dtv_encoder);
 
 	encoder = &mdp4_dtv_encoder->base;
 
-	drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
 
 	mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk");
 	if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) {
 		DRM_DEV_ERROR(dev->dev, "failed to get hdmi_clk\n");
-		ret = PTR_ERR(mdp4_dtv_encoder->hdmi_clk);
-		goto fail;
+		return ERR_CAST(mdp4_dtv_encoder->hdmi_clk);
 	}
 
 	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "tv_clk");
 	if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) {
 		DRM_DEV_ERROR(dev->dev, "failed to get tv_clk\n");
-		ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk);
-		goto fail;
+		return ERR_CAST(mdp4_dtv_encoder->mdp_clk);
 	}
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp4_dtv_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
-- 
GitLab


From 2c24668cc068fc69bf3ffb32234c7e6e91184a82 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 8 Jul 2023 04:04:05 +0300
Subject: [PATCH 0694/2340] drm/msm/mdp4: use drmm-managed allocation for
 mdp4_lcdc_encoder

Change struct mdp4_lcdc_encoder allocation to use drmm_encoder_alloc().
This removes the need to perform any actions on this encoder
destruction.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/546176/
Link: https://lore.kernel.org/r/20230708010407.3871346-16-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 36 ++++---------------
 1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
index 67c118bb30cad..576995ddce37e 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
@@ -30,18 +30,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp4_lcdc_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
-			to_mdp4_lcdc_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp4_lcdc_encoder);
-}
-
-static const struct drm_encoder_funcs mdp4_lcdc_encoder_funcs = {
-	.destroy = mdp4_lcdc_encoder_destroy,
-};
-
 /* this should probably be a helper: */
 static struct drm_connector *get_connector(struct drm_encoder *encoder)
 {
@@ -377,30 +365,26 @@ long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
 		struct device_node *panel_node)
 {
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *encoder;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder;
 	int ret;
 
-	mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL);
-	if (!mdp4_lcdc_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp4_lcdc_encoder = drmm_encoder_alloc(dev, struct mdp4_lcdc_encoder, base,
+					       NULL, DRM_MODE_ENCODER_LVDS, NULL);
+	if (IS_ERR(mdp4_lcdc_encoder))
+		return ERR_CAST(mdp4_lcdc_encoder);
 
 	mdp4_lcdc_encoder->panel_node = panel_node;
 
 	encoder = &mdp4_lcdc_encoder->base;
 
-	drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs);
 
 	/* TODO: do we need different pll in other cases? */
 	mdp4_lcdc_encoder->lcdc_clk = mpd4_lvds_pll_init(dev);
 	if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) {
 		DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n");
-		ret = PTR_ERR(mdp4_lcdc_encoder->lcdc_clk);
-		goto fail;
+		return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk);
 	}
 
 	/* TODO: different regulators in other cases? */
@@ -412,13 +396,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
 				      ARRAY_SIZE(mdp4_lcdc_encoder->regs),
 				      mdp4_lcdc_encoder->regs);
 	if (ret)
-		goto fail;
+		return ERR_PTR(ret);
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp4_lcdc_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
-- 
GitLab


From 52e36770b174948126416ecddbcb0da817bc3cfc Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 7 Nov 2023 11:36:00 +0100
Subject: [PATCH 0695/2340] dt-bindings: display/msm: qcom, sm8250-mdss: add
 DisplayPort controller node

Document the DisplayPort controller node in MDSS binding, already used
in DTS:

  sm8250-xiaomi-elish-boe.dtb: display-subsystem@ae00000: Unevaluated properties are not allowed ('displayport-controller@ae90000' was unexpected)

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Patchwork: https://patchwork.freedesktop.org/patch/566297/
Link: https://lore.kernel.org/r/20231107103600.27424-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../bindings/display/msm/qcom,sm8250-mdss.yaml         | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
index 994975909fea5..51368cda7b2fe 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
@@ -52,6 +52,16 @@ patternProperties:
       compatible:
         const: qcom,sm8250-dpu
 
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        items:
+          - const: qcom,sm8250-dp
+          - const: qcom,sm8350-dp
+
   "^dsi@[0-9a-f]+$":
     type: object
     additionalProperties: true
-- 
GitLab


From 1cd83dfe9a584a0db2f9a7dc617d710123169feb Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Sat, 11 Nov 2023 15:20:17 +0100
Subject: [PATCH 0696/2340] dt-bindings: display/msm: qcom, sm8150-mdss:
 correct DSI PHY compatible

Qualcomm SM8150 MDSS comes with a bit different 7nm DSI PHY with its own
compatible.  DTS already use it:

  sa8155p-adp.dtb: display-subsystem@ae00000: phy@ae94400:compatible:0: 'qcom,dsi-phy-7nm' was expected

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Patchwork: https://patchwork.freedesktop.org/patch/567178/
Link: https://lore.kernel.org/r/20231111142017.51922-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
index a2a8be7f64a9c..c0d6a4fdff97e 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
@@ -69,7 +69,7 @@ patternProperties:
 
     properties:
       compatible:
-        const: qcom,dsi-phy-7nm
+        const: qcom,dsi-phy-7nm-8150
 
 unevaluatedProperties: false
 
@@ -247,7 +247,7 @@ examples:
         };
 
         dsi0_phy: phy@ae94400 {
-            compatible = "qcom,dsi-phy-7nm";
+            compatible = "qcom,dsi-phy-7nm-8150";
             reg = <0x0ae94400 0x200>,
                   <0x0ae94600 0x280>,
                   <0x0ae94900 0x260>;
@@ -318,7 +318,7 @@ examples:
         };
 
         dsi1_phy: phy@ae96400 {
-            compatible = "qcom,dsi-phy-7nm";
+            compatible = "qcom,dsi-phy-7nm-8150";
             reg = <0x0ae96400 0x200>,
                   <0x0ae96600 0x280>,
                   <0x0ae96900 0x260>;
-- 
GitLab


From c6721b3c6423d8a348ae885a0f4c85e14f9bf85c Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 28 Nov 2023 00:54:01 +0300
Subject: [PATCH 0697/2340] drm/msm/mdp4: flush vblank event on disable

Flush queued events when disabling the crtc. This avoids timeouts when
we come back and wait for dependencies (like the previous frame's
flip_done).

Fixes: c8afe684c95c ("drm/msm: basic KMS driver for snapdragon")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/569127/
Link: https://lore.kernel.org/r/20231127215401.4064128-1-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
index 5e5c31b44a8a3..75f93e3462825 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
@@ -260,6 +260,7 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
 	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	unsigned long flags;
 
 	DBG("%s", mdp4_crtc->name);
 
@@ -272,6 +273,14 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc,
 	mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err);
 	mdp4_disable(mdp4_kms);
 
+	if (crtc->state->event && !crtc->state->active) {
+		WARN_ON(mdp4_crtc->event);
+		spin_lock_irqsave(&mdp4_kms->dev->event_lock, flags);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+		spin_unlock_irqrestore(&mdp4_kms->dev->event_lock, flags);
+	}
+
 	mdp4_crtc->enabled = false;
 }
 
-- 
GitLab


From 25daacc60394d1c82b14b13ebfb87544e14a5f36 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 29 Nov 2023 15:43:58 +0100
Subject: [PATCH 0698/2340] dt-bindings: display: msm: qcm2290-mdss: Use the
 non-deprecated DSI compat

The "qcom,dsi-ctrl-6g-qcm2290" has been deprecated in
commit 0c0f65c6dd44 ("dt-bindings: msm: dsi-controller-main: Add
compatible strings for every current SoC"), but the example hasn't been
updated to reflect that.

Fix that.

Fixes: 0c0f65c6dd44 ("dt-bindings: msm: dsi-controller-main: Add compatible strings for every current SoC")
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/569478/
Link: https://lore.kernel.org/r/20231125-topic-rb1_feat-v3-1-4cbb567743bb@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
index 5ad155612b6cf..d71a8e09a798e 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
@@ -56,7 +56,9 @@ patternProperties:
 
     properties:
       compatible:
-        const: qcom,dsi-ctrl-6g-qcm2290
+        items:
+          - const: qcom,qcm2290-dsi-ctrl
+          - const: qcom,mdss-dsi-ctrl
 
   "^phy@[0-9a-f]+$":
     type: object
@@ -136,7 +138,8 @@ examples:
         };
 
         dsi@5e94000 {
-            compatible = "qcom,dsi-ctrl-6g-qcm2290";
+            compatible = "qcom,qcm2290-dsi-ctrl",
+                         "qcom,mdss-dsi-ctrl";
             reg = <0x05e94000 0x400>;
             reg-names = "dsi_ctrl";
 
-- 
GitLab


From a1ed5860efd39cb6c7766a94badff0c7616df6f5 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 29 Nov 2023 15:43:59 +0100
Subject: [PATCH 0699/2340] dt-bindings: display: msm: Add reg bus and rotator
 interconnects

Apart from the already handled data bus (MAS_MDP_Pn<->DDR), there are
other connection paths:
- a path that connects rotator block to the DDR.
- a path that needs to be handled to ensure MDSS register access
  functions properly, namely the "reg bus", a.k.a the CPU-MDSS CFG
  interconnect.

Describe these paths to allow using them in device trees and in the
driver.

[Konrad: rework for one vs two MDP paths, update examples]
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/569480/
Link: https://lore.kernel.org/r/20231125-topic-rb1_feat-v3-2-4cbb567743bb@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../bindings/display/msm/mdss-common.yaml      | 18 ++++++++++++++----
 .../display/msm/qcom,qcm2290-mdss.yaml         | 14 ++++++++++----
 .../bindings/display/msm/qcom,sc7180-mdss.yaml | 14 ++++++++++----
 .../bindings/display/msm/qcom,sc7280-mdss.yaml | 14 ++++++++++----
 .../bindings/display/msm/qcom,sm6115-mdss.yaml | 10 ++++++++++
 .../bindings/display/msm/qcom,sm6125-mdss.yaml |  8 ++++++--
 .../bindings/display/msm/qcom,sm6350-mdss.yaml |  8 ++++++--
 .../bindings/display/msm/qcom,sm6375-mdss.yaml |  8 ++++++--
 .../bindings/display/msm/qcom,sm8450-mdss.yaml | 13 ++++++++-----
 9 files changed, 80 insertions(+), 27 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/msm/mdss-common.yaml b/Documentation/devicetree/bindings/display/msm/mdss-common.yaml
index f69196e4cc765..c6305a6e03346 100644
--- a/Documentation/devicetree/bindings/display/msm/mdss-common.yaml
+++ b/Documentation/devicetree/bindings/display/msm/mdss-common.yaml
@@ -61,17 +61,27 @@ properties:
 
   ranges: true
 
+  # This is not a perfect description, but it's impossible to discern and match
+  # the entries like we do with interconnect-names
   interconnects:
     minItems: 1
     items:
       - description: Interconnect path from mdp0 (or a single mdp) port to the data bus
       - description: Interconnect path from mdp1 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    minItems: 1
-    items:
-      - const: mdp0-mem
-      - const: mdp1-mem
+    oneOf:
+      - minItems: 1
+        items:
+          - const: mdp0-mem
+          - const: cpu-cfg
+
+      - minItems: 2
+        items:
+          - const: mdp0-mem
+          - const: mdp1-mem
+          - const: cpu-cfg
 
   resets:
     items:
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
index d71a8e09a798e..f0cdb54226885 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
@@ -36,10 +36,14 @@ properties:
     maxItems: 2
 
   interconnects:
-    maxItems: 1
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 1
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -98,8 +102,10 @@ examples:
         interrupt-controller;
         #interrupt-cells = <1>;
 
-        interconnects = <&mmrt_virt MASTER_MDP0 &bimc SLAVE_EBI1>;
-        interconnect-names = "mdp0-mem";
+        interconnects = <&mmrt_virt MASTER_MDP0 &bimc SLAVE_EBI1>,
+                        <&bimc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "cpu-cfg";
 
         iommus = <&apps_smmu 0x420 0x2>,
                  <&apps_smmu 0x421 0x0>;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
index 3432a2407caa6..7a0555b15ddf1 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
@@ -36,10 +36,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 1
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 1
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -106,8 +110,10 @@ examples:
         interrupt-controller;
         #interrupt-cells = <1>;
 
-        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>;
-        interconnect-names = "mdp0-mem";
+        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>,
+                        <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "cpu-cfg";
 
         iommus = <&apps_smmu 0x800 0x2>;
         ranges;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
index bbb727831fcab..2947f27e05852 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
@@ -36,10 +36,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 1
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 1
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -118,8 +122,10 @@ examples:
         interrupt-controller;
         #interrupt-cells = <1>;
 
-        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>;
-        interconnect-names = "mdp0-mem";
+        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>,
+                        <&gem_noc MASTER_APPSS_PROC &cnoc2 SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "cpu-cfg";
 
         iommus = <&apps_smmu 0x900 0x402>;
         ranges;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
index dde5c2acead5d..309de1953c88f 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
@@ -29,6 +29,16 @@ properties:
   iommus:
     maxItems: 2
 
+  interconnects:
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
+
+  interconnect-names:
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
+
 patternProperties:
   "^display-controller@[0-9a-f]+$":
     type: object
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
index 671c2c2aa8965..3deb9dc81c9c3 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
@@ -35,10 +35,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 2
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
index e1dcb453762ec..c9ba1fae80425 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
@@ -35,10 +35,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 2
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
index b15c3950f09db..8e8a288d318c3 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
@@ -35,10 +35,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 2
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
index 001b26e653012..747a2e9665f4f 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
@@ -30,10 +30,10 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    maxItems: 3
 
   interconnect-names:
-    maxItems: 2
+    maxItems: 3
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -91,9 +91,12 @@ examples:
         reg = <0x0ae00000 0x1000>;
         reg-names = "mdss";
 
-        interconnects = <&mmss_noc MASTER_MDP_DISP 0 &mc_virt SLAVE_EBI1_DISP 0>,
-                        <&mmss_noc MASTER_MDP_DISP 0 &mc_virt SLAVE_EBI1_DISP 0>;
-        interconnect-names = "mdp0-mem", "mdp1-mem";
+        interconnects = <&mmss_noc MASTER_MDP_DISP &mc_virt SLAVE_EBI1_DISP>,
+                        <&mmss_noc MASTER_MDP_DISP &mc_virt SLAVE_EBI1_DISP>,
+                        <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "mdp1-mem",
+                             "cpu-cfg";
 
         resets = <&dispcc DISP_CC_MDSS_CORE_BCR>;
 
-- 
GitLab


From 82c2a5751227056a643455d080a389a4b0bfe184 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:43 -0800
Subject: [PATCH 0700/2340] drm/msm/dp: tie dp_display_irq_handler() with dp
 driver

Currently the dp_display_request_irq() is executed at
msm_dp_modeset_init() which ties irq registering to the DPU device's
life cycle, while depending on resources that are released as the DP
device is torn down. Move register DP driver irq handler to
dp_display_probe() to have dp_display_irq_handler() IRQ tied with DP
device. In addition, use platform_get_irq() to retrieve irq number
from platform device directly.

Changes in v5:
-- reworded commit text as review comments at change #4
-- tear down component if failed at dp_display_request_irq()

Changes in v4:
-- delete dp->irq check at dp_display_request_irq()

Changes in v3:
-- move calling dp_display_irq_handler() to probe

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570069/
Link: https://lore.kernel.org/r/1701472789-25951-2-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 32 ++++++++++++-----------------
 drivers/gpu/drm/msm/dp/dp_display.h |  1 -
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 2ac9d61501c75..36bdba19467ad 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1183,26 +1183,18 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id)
 	return ret;
 }
 
-int dp_display_request_irq(struct msm_dp *dp_display)
+static int dp_display_request_irq(struct dp_display_private *dp)
 {
 	int rc = 0;
-	struct dp_display_private *dp;
-
-	if (!dp_display) {
-		DRM_ERROR("invalid input\n");
-		return -EINVAL;
-	}
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
+	struct platform_device *pdev = dp->dp_display.pdev;
 
-	dp->irq = irq_of_parse_and_map(dp->dp_display.pdev->dev.of_node, 0);
+	dp->irq = platform_get_irq(pdev, 0);
 	if (!dp->irq) {
 		DRM_ERROR("failed to get irq\n");
 		return -EINVAL;
 	}
 
-	rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq,
-			dp_display_irq_handler,
+	rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler,
 			IRQF_TRIGGER_HIGH, "dp_display_isr", dp);
 	if (rc < 0) {
 		DRM_ERROR("failed to request IRQ%u: %d\n",
@@ -1277,13 +1269,21 @@ static int dp_display_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, &dp->dp_display);
 
+	rc = dp_display_request_irq(dp);
+	if (rc)
+		goto err;
+
 	rc = component_add(&pdev->dev, &dp_display_comp_ops);
 	if (rc) {
 		DRM_ERROR("component add failed, rc=%d\n", rc);
-		dp_display_deinit_sub_modules(dp);
+		goto err;
 	}
 
 	return rc;
+
+err:
+	dp_display_deinit_sub_modules(dp);
+	return rc;
 }
 
 static void dp_display_remove(struct platform_device *pdev)
@@ -1536,12 +1536,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 
 	dp_priv = container_of(dp_display, struct dp_display_private, dp_display);
 
-	ret = dp_display_request_irq(dp_display);
-	if (ret) {
-		DRM_ERROR("request_irq failed, ret=%d\n", ret);
-		return ret;
-	}
-
 	ret = dp_display_get_next_bridge(dp_display);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h
index 5e2fbd8318e93..b4a8be0abd9a6 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -36,7 +36,6 @@ struct msm_dp {
 int dp_display_set_plugged_cb(struct msm_dp *dp_display,
 		hdmi_codec_plugged_cb fn, struct device *codec_dev);
 int dp_display_get_modes(struct msm_dp *dp_display);
-int dp_display_request_irq(struct msm_dp *dp_display);
 bool dp_display_check_video_test(struct msm_dp *dp_display);
 int dp_display_get_test_bpp(struct msm_dp *dp_display);
 void dp_display_signal_audio_start(struct msm_dp *dp_display);
-- 
GitLab


From aa1131204e587535b9b48ed6d1b9187942bc939e Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:44 -0800
Subject: [PATCH 0701/2340] drm/msm/dp: rename is_connected with link_ready

The is_connected flag is set to true after DP mainlink successfully
finishes link training to enter into ST_MAINLINK_READY state rather
than being set after the DP dongle is connected. Rename the
is_connected flag with link_ready flag to match the state of DP
driver's state machine.

Changes in v5:
-- reworded commit text according to review comments from change #4

Changes in v4:
-- reworded commit text

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570063/
Link: https://lore.kernel.org/r/1701472789-25951-3-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 19 +++++++++----------
 drivers/gpu/drm/msm/dp/dp_display.h |  2 +-
 drivers/gpu/drm/msm/dp/dp_drm.c     | 14 +++++++-------
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 36bdba19467ad..a3d7cb1943e2d 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -351,12 +351,11 @@ static void dp_display_send_hpd_event(struct msm_dp *dp_display)
 	drm_helper_hpd_irq_event(connector->dev);
 }
 
-
 static int dp_display_send_hpd_notification(struct dp_display_private *dp,
 					    bool hpd)
 {
-	if ((hpd && dp->dp_display.is_connected) ||
-			(!hpd && !dp->dp_display.is_connected)) {
+	if ((hpd && dp->dp_display.link_ready) ||
+			(!hpd && !dp->dp_display.link_ready)) {
 		drm_dbg_dp(dp->drm_dev, "HPD already %s\n",
 				(hpd ? "on" : "off"));
 		return 0;
@@ -370,7 +369,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
 						 dp->panel->dpcd, dp->panel->downstream_ports);
 	}
 
-	dp->dp_display.is_connected = hpd;
+	dp->dp_display.link_ready = hpd;
 
 	drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n",
 			dp->dp_display.connector_type, hpd);
@@ -912,7 +911,7 @@ int dp_display_set_plugged_cb(struct msm_dp *dp_display,
 
 	dp_display->plugged_cb = fn;
 	dp_display->codec_dev = codec_dev;
-	plugged = dp_display->is_connected;
+	plugged = dp_display->link_ready;
 	dp_display_handle_plugged_change(dp_display, plugged);
 
 	return 0;
@@ -1343,16 +1342,16 @@ static int dp_pm_resume(struct device *dev)
 	 * also only signal audio when disconnected
 	 */
 	if (dp->link->sink_count) {
-		dp->dp_display.is_connected = true;
+		dp->dp_display.link_ready = true;
 	} else {
-		dp->dp_display.is_connected = false;
+		dp->dp_display.link_ready = false;
 		dp_display_handle_plugged_change(dp_display, false);
 	}
 
 	drm_dbg_dp(dp->drm_dev,
 		"After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
 		dp->dp_display.connector_type, dp->link->sink_count,
-		dp->dp_display.is_connected, dp->core_initialized,
+		dp->dp_display.link_ready, dp->core_initialized,
 		dp->phy_initialized, dp_display->power_on);
 
 	mutex_unlock(&dp->event_mutex);
@@ -1740,8 +1739,8 @@ void dp_bridge_hpd_notify(struct drm_bridge *bridge,
 		return;
 	}
 
-	if (!dp_display->is_connected && status == connector_status_connected)
+	if (!dp_display->link_ready && status == connector_status_connected)
 		dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0);
-	else if (dp_display->is_connected && status == connector_status_disconnected)
+	else if (dp_display->link_ready && status == connector_status_disconnected)
 		dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0);
 }
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h
index b4a8be0abd9a6..102f3507d8247 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -17,7 +17,7 @@ struct msm_dp {
 	struct drm_bridge *bridge;
 	struct drm_connector *connector;
 	struct drm_bridge *next_bridge;
-	bool is_connected;
+	bool link_ready;
 	bool audio_enabled;
 	bool power_on;
 	unsigned int connector_type;
diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c
index 21850a384b10b..0efb501490cce 100644
--- a/drivers/gpu/drm/msm/dp/dp_drm.c
+++ b/drivers/gpu/drm/msm/dp/dp_drm.c
@@ -24,10 +24,10 @@ static enum drm_connector_status dp_bridge_detect(struct drm_bridge *bridge)
 
 	dp = to_dp_bridge(bridge)->dp_display;
 
-	drm_dbg_dp(dp->drm_dev, "is_connected = %s\n",
-		(dp->is_connected) ? "true" : "false");
+	drm_dbg_dp(dp->drm_dev, "link_ready = %s\n",
+		(dp->link_ready) ? "true" : "false");
 
-	return (dp->is_connected) ? connector_status_connected :
+	return (dp->link_ready) ? connector_status_connected :
 					connector_status_disconnected;
 }
 
@@ -40,8 +40,8 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge,
 
 	dp = to_dp_bridge(bridge)->dp_display;
 
-	drm_dbg_dp(dp->drm_dev, "is_connected = %s\n",
-		(dp->is_connected) ? "true" : "false");
+	drm_dbg_dp(dp->drm_dev, "link_ready = %s\n",
+		(dp->link_ready) ? "true" : "false");
 
 	/*
 	 * There is no protection in the DRM framework to check if the display
@@ -55,7 +55,7 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge,
 	 * After that this piece of code can be removed.
 	 */
 	if (bridge->ops & DRM_BRIDGE_OP_HPD)
-		return (dp->is_connected) ? 0 : -ENOTCONN;
+		return (dp->link_ready) ? 0 : -ENOTCONN;
 
 	return 0;
 }
@@ -78,7 +78,7 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *
 	dp = to_dp_bridge(bridge)->dp_display;
 
 	/* pluggable case assumes EDID is read when HPD */
-	if (dp->is_connected) {
+	if (dp->link_ready) {
 		rc = dp_display_get_modes(dp);
 		if (rc <= 0) {
 			DRM_ERROR("failed to get DP sink modes, rc=%d\n", rc);
-- 
GitLab


From e467e0bde881a667ccde9c7963f1042c01818059 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:45 -0800
Subject: [PATCH 0702/2340] drm/msm/dp: use drm_bridge_hpd_notify() to report
 HPD status changes

Currently DP driver use drm_helper_hpd_irq_event(), bypassing drm bridge
framework, to report HPD status changes to user space frame work.
Replace it with drm_bridge_hpd_notify() since DP driver is part of drm
bridge.

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570067/
Link: https://lore.kernel.org/r/1701472789-25951-4-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index a3d7cb1943e2d..148ac1629ef22 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -340,26 +340,10 @@ static const struct component_ops dp_display_comp_ops = {
 	.unbind = dp_display_unbind,
 };
 
-static void dp_display_send_hpd_event(struct msm_dp *dp_display)
-{
-	struct dp_display_private *dp;
-	struct drm_connector *connector;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	connector = dp->dp_display.connector;
-	drm_helper_hpd_irq_event(connector->dev);
-}
-
 static int dp_display_send_hpd_notification(struct dp_display_private *dp,
 					    bool hpd)
 {
-	if ((hpd && dp->dp_display.link_ready) ||
-			(!hpd && !dp->dp_display.link_ready)) {
-		drm_dbg_dp(dp->drm_dev, "HPD already %s\n",
-				(hpd ? "on" : "off"));
-		return 0;
-	}
+	struct drm_bridge *bridge = dp->dp_display.bridge;
 
 	/* reset video pattern flag on disconnect */
 	if (!hpd) {
@@ -373,7 +357,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
 
 	drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n",
 			dp->dp_display.connector_type, hpd);
-	dp_display_send_hpd_event(&dp->dp_display);
+	drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready);
 
 	return 0;
 }
-- 
GitLab


From 9179fd9596a47de3ff2947101751315ea00bd7ef Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:46 -0800
Subject: [PATCH 0703/2340] drm/msm/dp: move parser->parse() and
 dp_power_client_init() to probe

Original both parser->parse() and dp_power_client_init() are done at
dp_display_bind() since eDP population is done at binding time.
In the preparation of having eDP population done at probe() time,
move both function from dp_display_bind() to dp_display_probe().

Changes in v6:
-- move dp_power_client_deinit() to remove()

Changes in v5:
-- explain why parser->parse() and dp_power_client_init() are moved to
   probe time
-- tear down sub modules if failed

Changes in v4:
-- split this patch out of "incorporate pm_runtime framework into DP
   driver" patch

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570065/
Link: https://lore.kernel.org/r/1701472789-25951-5-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 148ac1629ef22..bae37e459d34c 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -275,11 +275,6 @@ static int dp_display_bind(struct device *dev, struct device *master,
 	dp->dp_display.drm_dev = drm;
 	priv->dp[dp->id] = &dp->dp_display;
 
-	rc = dp->parser->parse(dp->parser);
-	if (rc) {
-		DRM_ERROR("device tree parsing failed\n");
-		goto end;
-	}
 
 
 	dp->drm_dev = drm;
@@ -290,11 +285,6 @@ static int dp_display_bind(struct device *dev, struct device *master,
 		goto end;
 	}
 
-	rc = dp_power_client_init(dp->power);
-	if (rc) {
-		DRM_ERROR("Power client create failed\n");
-		goto end;
-	}
 
 	rc = dp_register_audio_driver(dev, dp->audio);
 	if (rc) {
@@ -327,7 +317,6 @@ static void dp_display_unbind(struct device *dev, struct device *master,
 
 	of_dp_aux_depopulate_bus(dp->aux);
 
-	dp_power_client_deinit(dp->power);
 	dp_unregister_audio_driver(dev, dp->audio);
 	dp_aux_unregister(dp->aux);
 	dp->drm_dev = NULL;
@@ -1240,6 +1229,18 @@ static int dp_display_probe(struct platform_device *pdev)
 		return -EPROBE_DEFER;
 	}
 
+	rc = dp->parser->parse(dp->parser);
+	if (rc) {
+		DRM_ERROR("device tree parsing failed\n");
+		goto err;
+	}
+
+	rc = dp_power_client_init(dp->power);
+	if (rc) {
+		DRM_ERROR("Power client create failed\n");
+		goto err;
+	}
+
 	/* setup event q */
 	mutex_init(&dp->event_mutex);
 	init_waitqueue_head(&dp->event_q);
@@ -1274,6 +1275,7 @@ static void dp_display_remove(struct platform_device *pdev)
 	struct dp_display_private *dp = dev_get_dp_display_private(&pdev->dev);
 
 	component_del(&pdev->dev, &dp_display_comp_ops);
+	dp_power_client_deinit(dp->power);
 	dp_display_deinit_sub_modules(dp);
 
 	platform_set_drvdata(pdev, NULL);
-- 
GitLab


From 5814b8bf086a1402a7fab4021aa58d4b84246db5 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:47 -0800
Subject: [PATCH 0704/2340] drm/msm/dp: incorporate pm_runtime framework into
 DP driver

Currently DP driver is executed independent of PM runtime framework.
This leads msm eDP panel can not being detected by edp_panel driver
during generic_edp_panel_probe() due to AUX DPCD read failed at
edp panel driver. Incorporate PM runtime framework into DP driver so
that host controller's power and clocks are enable/disable through
PM runtime mechanism.  Once PM runtime framework is incorporated into
DP driver, waking up device from power up path is not necessary. Hence
remove it.

After incorporating pm_runtime framework into eDP/DP driver,
dp_pm_suspend() to handle power off both DP phy and controller during
suspend and dp_pm_resume() to handle power on both DP phy and controller
during resume are not necessary. Therefore both dp_pm_suspend() and
dp_pm_resume() are dropped and replace with dp_pm_runtime_suspend() and
dp_pm_runtime_resume() respectively.

Changes in v9:
-- silent compiler warning message at dp_power_init() and dp_power_deinit()
   with W1 flag

Changes in v7:
-- add comments to dp_pm_runtime_resume()
-- add comments to dp_bridge_hpd_enable()
-- delete dp->hpd_state = ST_DISCONNECTED from dp_bridge_hpd_notify()

Changes in v6:
-- delete dp_power_client_deinit(dp->power);
-- remove if (!dp->dp_display.is_edp) condition checkout at plug_handle()
-- remove if (!dp->dp_display.is_edp) condition checkout at unplug_handle()
-- add IRQF_NO_AUTOEN to devm_request_irq()
-- add enable_irq() and disable_irq() to pm_runtime_resume()/suspend()
-- del dp->hpd_state = ST_DISCONNECTED from dp_bridge_hpd_disable()

Changes in v5:
-- remove pm_runtime_put_autosuspend feature, use pm_runtime_put_sync()
-- squash add pm_runtime_force_suspend()/resume() patch into this patch

Changes in v4:
-- reworded commit text to explain why pm_framework is required for
   edp panel
-- reworded commit text to explain autosuspend is choiced
-- delete EV_POWER_PM_GET and PM_EV_POWER_PUT from changes #3
-- delete dp_display_pm_get() and dp_display_pm_Put() from changes #3
-- return value from pm_runtime_resume_and_get() directly
-- check return value of devm_pm_runtime_enable()
-- delete pm_runtime_xxx from dp_display_remove()
-- drop dp_display_host_init() from EV_HPD_INIT_SETUP
-- drop both dp_pm_prepare() and dp_pm_compete() from this change
-- delete ST_SUSPENDED state
-- rewording commit text to add more details regrading the purpose
   of this change

Changes in v3:
-- incorporate removing pm_runtime_xx() from dp_pwer.c to this patch
-- use pm_runtime_resume_and_get() instead of pm_runtime_get()
-- error checking pm_runtime_resume_and_get() return value
-- add EV_POWER_PM_GET and PM_EV_POWER_PUT to handle HPD_GPIO case
-- replace dp_pm_suspend() with pm_runtime_force_suspend()
-- replace dp_pm_resume() with pm_runtime_force_resume()

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570073/
Link: https://lore.kernel.org/r/1701472789-25951-6-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_aux.c     |   5 +
 drivers/gpu/drm/msm/dp/dp_display.c | 181 +++++++++++-----------------
 drivers/gpu/drm/msm/dp/dp_power.c   |  32 +----
 drivers/gpu/drm/msm/dp/dp_power.h   |  11 --
 4 files changed, 77 insertions(+), 152 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index 8e3b677f35e64..10b6eeb7bebf0 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -291,6 +291,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
 		return -EINVAL;
 	}
 
+	ret = pm_runtime_resume_and_get(dp_aux->dev);
+	if (ret)
+		return  ret;
+
 	mutex_lock(&aux->mutex);
 	if (!aux->initted) {
 		ret = -EIO;
@@ -364,6 +368,7 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
 
 exit:
 	mutex_unlock(&aux->mutex);
+	pm_runtime_put_sync(dp_aux->dev);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index bae37e459d34c..05143ab9c6a43 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -49,7 +49,6 @@ enum {
 	ST_CONNECTED,
 	ST_DISCONNECT_PENDING,
 	ST_DISPLAY_OFF,
-	ST_SUSPENDED,
 };
 
 enum {
@@ -309,10 +308,6 @@ static void dp_display_unbind(struct device *dev, struct device *master,
 	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 	struct msm_drm_private *priv = dev_get_drvdata(master);
 
-	/* disable all HPD interrupts */
-	if (dp->core_initialized)
-		dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
-
 	kthread_stop(dp->ev_tsk);
 
 	of_dp_aux_depopulate_bus(dp->aux);
@@ -542,6 +537,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 {
 	u32 state;
 	int ret;
+	struct platform_device *pdev = dp->dp_display.pdev;
 
 	mutex_lock(&dp->event_mutex);
 
@@ -549,7 +545,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 	drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n",
 			dp->dp_display.connector_type, state);
 
-	if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) {
+	if (state == ST_DISPLAY_OFF) {
 		mutex_unlock(&dp->event_mutex);
 		return 0;
 	}
@@ -566,7 +562,13 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 		return 0;
 	}
 
-	ret = dp_display_usbpd_configure_cb(&dp->dp_display.pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
+	if (ret) {
+		DRM_ERROR("failed to pm_runtime_resume\n");
+		return ret;
+	}
+
+	ret = dp_display_usbpd_configure_cb(&pdev->dev);
 	if (ret) {	/* link train failed */
 		dp->hpd_state = ST_DISCONNECTED;
 	} else {
@@ -598,6 +600,7 @@ static void dp_display_handle_plugged_change(struct msm_dp *dp_display,
 static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
 {
 	u32 state;
+	struct platform_device *pdev = dp->dp_display.pdev;
 
 	mutex_lock(&dp->event_mutex);
 
@@ -648,6 +651,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
 			dp->dp_display.connector_type, state);
 
 	/* uevent will complete disconnection part */
+	pm_runtime_put_sync(&pdev->dev);
 	mutex_unlock(&dp->event_mutex);
 	return 0;
 }
@@ -663,7 +667,7 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
 	drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n",
 			dp->dp_display.connector_type, state);
 
-	if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) {
+	if (state == ST_DISPLAY_OFF) {
 		mutex_unlock(&dp->event_mutex);
 		return 0;
 	}
@@ -1075,7 +1079,6 @@ static int hpd_event_thread(void *data)
 
 		switch (todo->event_id) {
 		case EV_HPD_INIT_SETUP:
-			dp_display_host_init(dp_priv);
 			break;
 		case EV_HPD_PLUG_INT:
 			dp_hpd_plug_handle(dp_priv, todo->data);
@@ -1167,7 +1170,9 @@ static int dp_display_request_irq(struct dp_display_private *dp)
 	}
 
 	rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler,
-			IRQF_TRIGGER_HIGH, "dp_display_isr", dp);
+			      IRQF_TRIGGER_HIGH|IRQF_NO_AUTOEN,
+			      "dp_display_isr", dp);
+
 	if (rc < 0) {
 		DRM_ERROR("failed to request IRQ%u: %d\n",
 				dp->irq, rc);
@@ -1253,6 +1258,10 @@ static int dp_display_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, &dp->dp_display);
 
+	rc = devm_pm_runtime_enable(&pdev->dev);
+	if (rc)
+		goto err;
+
 	rc = dp_display_request_irq(dp);
 	if (rc)
 		goto err;
@@ -1275,115 +1284,52 @@ static void dp_display_remove(struct platform_device *pdev)
 	struct dp_display_private *dp = dev_get_dp_display_private(&pdev->dev);
 
 	component_del(&pdev->dev, &dp_display_comp_ops);
-	dp_power_client_deinit(dp->power);
 	dp_display_deinit_sub_modules(dp);
 
 	platform_set_drvdata(pdev, NULL);
 }
 
-static int dp_pm_resume(struct device *dev)
+static int dp_pm_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct msm_dp *dp_display = platform_get_drvdata(pdev);
-	struct dp_display_private *dp;
-	int sink_count = 0;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	mutex_lock(&dp->event_mutex);
-
-	drm_dbg_dp(dp->drm_dev,
-		"Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
-	/* start from disconnected state */
-	dp->hpd_state = ST_DISCONNECTED;
-
-	/* turn on dp ctrl/phy */
-	dp_display_host_init(dp);
-
-	if (dp_display->is_edp)
-		dp_catalog_ctrl_hpd_enable(dp->catalog);
+	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 
-	if (dp_catalog_link_is_connected(dp->catalog)) {
-		/*
-		 * set sink to normal operation mode -- D0
-		 * before dpcd read
-		 */
-		dp_display_host_phy_init(dp);
-		dp_link_psm_config(dp->link, &dp->panel->link_info, false);
-		sink_count = drm_dp_read_sink_count(dp->aux);
-		if (sink_count < 0)
-			sink_count = 0;
+	disable_irq(dp->irq);
 
+	if (dp->dp_display.is_edp) {
 		dp_display_host_phy_exit(dp);
+		dp_catalog_ctrl_hpd_disable(dp->catalog);
 	}
-
-	dp->link->sink_count = sink_count;
-	/*
-	 * can not declared display is connected unless
-	 * HDMI cable is plugged in and sink_count of
-	 * dongle become 1
-	 * also only signal audio when disconnected
-	 */
-	if (dp->link->sink_count) {
-		dp->dp_display.link_ready = true;
-	} else {
-		dp->dp_display.link_ready = false;
-		dp_display_handle_plugged_change(dp_display, false);
-	}
-
-	drm_dbg_dp(dp->drm_dev,
-		"After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
-		dp->dp_display.connector_type, dp->link->sink_count,
-		dp->dp_display.link_ready, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
-	mutex_unlock(&dp->event_mutex);
+	dp_display_host_deinit(dp);
 
 	return 0;
 }
 
-static int dp_pm_suspend(struct device *dev)
+static int dp_pm_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct msm_dp *dp_display = platform_get_drvdata(pdev);
-	struct dp_display_private *dp;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	mutex_lock(&dp->event_mutex);
-
-	drm_dbg_dp(dp->drm_dev,
-		"Before, type=%d core_inited=%d  phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
-	/* mainlink enabled */
-	if (dp_power_clk_status(dp->power, DP_CTRL_PM))
-		dp_ctrl_off_link_stream(dp->ctrl);
-
-	dp_display_host_phy_exit(dp);
-
-	/* host_init will be called at pm_resume */
-	dp_display_host_deinit(dp);
-
-	dp->hpd_state = ST_SUSPENDED;
-
-	drm_dbg_dp(dp->drm_dev,
-		"After, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
+	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 
-	mutex_unlock(&dp->event_mutex);
+	/*
+	 * for eDP, host cotroller, HPD block and PHY are enabled here
+	 * but with HPD irq disabled
+	 *
+	 * for DP, only host controller is enabled here.
+	 * HPD block is enabled at dp_bridge_hpd_enable()
+	 * PHY will be enabled at plugin handler later
+	 */
+	dp_display_host_init(dp);
+	if (dp->dp_display.is_edp) {
+		dp_catalog_ctrl_hpd_enable(dp->catalog);
+		dp_display_host_phy_init(dp);
+	}
 
+	enable_irq(dp->irq);
 	return 0;
 }
 
 static const struct dev_pm_ops dp_pm_ops = {
-	.suspend = dp_pm_suspend,
-	.resume =  dp_pm_resume,
+	SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 };
 
 static struct platform_driver dp_display_driver = {
@@ -1466,10 +1412,6 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 	aux_bus = of_get_child_by_name(dev->of_node, "aux-bus");
 
 	if (aux_bus && dp->is_edp) {
-		dp_display_host_init(dp_priv);
-		dp_catalog_ctrl_hpd_enable(dp_priv->catalog);
-		dp_display_host_phy_init(dp_priv);
-
 		/*
 		 * The code below assumes that the panel will finish probing
 		 * by the time devm_of_dp_aux_populate_ep_devices() returns.
@@ -1566,6 +1508,11 @@ void dp_bridge_atomic_enable(struct drm_bridge *drm_bridge,
 		dp_hpd_plug_handle(dp_display, 0);
 
 	mutex_lock(&dp_display->event_mutex);
+	if (pm_runtime_resume_and_get(&dp->pdev->dev)) {
+		DRM_ERROR("failed to pm_runtime_resume\n");
+		mutex_unlock(&dp_display->event_mutex);
+		return;
+	}
 
 	state = dp_display->hpd_state;
 	if (state != ST_DISPLAY_OFF && state != ST_MAINLINK_READY) {
@@ -1630,10 +1577,9 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
 	mutex_lock(&dp_display->event_mutex);
 
 	state = dp_display->hpd_state;
-	if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED) {
-		mutex_unlock(&dp_display->event_mutex);
-		return;
-	}
+	if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED)
+		drm_dbg_dp(dp->drm_dev, "type=%d wrong hpd_state=%d\n",
+			   dp->connector_type, state);
 
 	dp_display_disable(dp_display);
 
@@ -1646,6 +1592,8 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
 	}
 
 	drm_dbg_dp(dp->drm_dev, "type=%d Done\n", dp->connector_type);
+
+	pm_runtime_put_sync(&dp->pdev->dev);
 	mutex_unlock(&dp_display->event_mutex);
 }
 
@@ -1684,7 +1632,21 @@ void dp_bridge_hpd_enable(struct drm_bridge *bridge)
 	struct msm_dp *dp_display = dp_bridge->dp_display;
 	struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display);
 
+	/*
+	 * this is for external DP with hpd irq enabled case,
+	 * step-1: dp_pm_runtime_resume() enable dp host only
+	 * step-2: enable hdp block and have hpd irq enabled here
+	 * step-3: waiting for plugin irq while phy is not initialized
+	 * step-4: DP PHY is initialized at plugin handler before link training
+	 *
+	 */
 	mutex_lock(&dp->event_mutex);
+	if (pm_runtime_resume_and_get(&dp_display->pdev->dev)) {
+		DRM_ERROR("failed to resume power\n");
+		mutex_unlock(&dp->event_mutex);
+		return;
+	}
+
 	dp_catalog_ctrl_hpd_enable(dp->catalog);
 
 	/* enable HDP interrupts */
@@ -1706,6 +1668,8 @@ void dp_bridge_hpd_disable(struct drm_bridge *bridge)
 	dp_catalog_ctrl_hpd_disable(dp->catalog);
 
 	dp_display->internal_hpd = false;
+
+	pm_runtime_put_sync(&dp_display->pdev->dev);
 	mutex_unlock(&dp->event_mutex);
 }
 
@@ -1720,11 +1684,6 @@ void dp_bridge_hpd_notify(struct drm_bridge *bridge,
 	if (dp_display->internal_hpd)
 		return;
 
-	if (!dp->core_initialized) {
-		drm_dbg_dp(dp->drm_dev, "not initialized\n");
-		return;
-	}
-
 	if (!dp_display->link_ready && status == connector_status_connected)
 		dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0);
 	else if (dp_display->link_ready && status == connector_status_disconnected)
diff --git a/drivers/gpu/drm/msm/dp/dp_power.c b/drivers/gpu/drm/msm/dp/dp_power.c
index 5cb84ca40e9ed..c4843dd69f479 100644
--- a/drivers/gpu/drm/msm/dp/dp_power.c
+++ b/drivers/gpu/drm/msm/dp/dp_power.c
@@ -152,45 +152,17 @@ int dp_power_client_init(struct dp_power *dp_power)
 
 	power = container_of(dp_power, struct dp_power_private, dp_power);
 
-	pm_runtime_enable(power->dev);
-
 	return dp_power_clk_init(power);
 }
 
-void dp_power_client_deinit(struct dp_power *dp_power)
-{
-	struct dp_power_private *power;
-
-	power = container_of(dp_power, struct dp_power_private, dp_power);
-
-	pm_runtime_disable(power->dev);
-}
-
 int dp_power_init(struct dp_power *dp_power)
 {
-	int rc = 0;
-	struct dp_power_private *power = NULL;
-
-	power = container_of(dp_power, struct dp_power_private, dp_power);
-
-	pm_runtime_get_sync(power->dev);
-
-	rc = dp_power_clk_enable(dp_power, DP_CORE_PM, true);
-	if (rc)
-		pm_runtime_put_sync(power->dev);
-
-	return rc;
+	return dp_power_clk_enable(dp_power, DP_CORE_PM, true);
 }
 
 int dp_power_deinit(struct dp_power *dp_power)
 {
-	struct dp_power_private *power;
-
-	power = container_of(dp_power, struct dp_power_private, dp_power);
-
-	dp_power_clk_enable(dp_power, DP_CORE_PM, false);
-	pm_runtime_put_sync(power->dev);
-	return 0;
+	return dp_power_clk_enable(dp_power, DP_CORE_PM, false);
 }
 
 struct dp_power *dp_power_get(struct device *dev, struct dp_parser *parser)
diff --git a/drivers/gpu/drm/msm/dp/dp_power.h b/drivers/gpu/drm/msm/dp/dp_power.h
index a3dec200785e5..55ada51edb57b 100644
--- a/drivers/gpu/drm/msm/dp/dp_power.h
+++ b/drivers/gpu/drm/msm/dp/dp_power.h
@@ -80,17 +80,6 @@ int dp_power_clk_enable(struct dp_power *power, enum dp_pm_type pm_type,
  */
 int dp_power_client_init(struct dp_power *power);
 
-/**
- * dp_power_clinet_deinit() - de-initialize clock and regulator modules
- *
- * @power: instance of power module
- * return: 0 for success, error for failure.
- *
- * This API will de-initialize the DisplayPort's clocks and regulator
- * modules.
- */
-void dp_power_client_deinit(struct dp_power *power);
-
 /**
  * dp_power_get() - configure and get the DisplayPort power module data
  *
-- 
GitLab


From 2b3aabc9caa2452653ef22bdfd15af65f0dff164 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:48 -0800
Subject: [PATCH 0705/2340] drm/msm/dp: delete EV_HPD_INIT_SETUP

EV_HPD_INIT_SETUP flag is used to trigger the initialization of external
DP host controller. Since external DP host controller initialization had
been incorporated into pm_runtime_resume(), this flag became obsolete.
msm_dp_irq_postinstall() which triggers EV_HPD_INIT_SETUP event is
obsoleted accordingly.

Changes in v4:
-- reworded commit text
-- drop EV_HPD_INIT_SETUP
-- drop msm_dp_irq_postinstall()

Changes in v3:
-- drop EV_HPD_INIT_SETUP and msm_dp_irq_postinstall()

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570075/
Link: https://lore.kernel.org/r/1701472789-25951-7-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c |  4 ----
 drivers/gpu/drm/msm/dp/dp_display.c     | 16 ----------------
 drivers/gpu/drm/msm/msm_drv.h           |  5 -----
 3 files changed, 25 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index babec724c76a5..9b2efd7acc872 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -845,7 +845,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms)
 {
 	struct msm_drm_private *priv;
 	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
-	int i;
 
 	if (!dpu_kms || !dpu_kms->dev)
 		return -EINVAL;
@@ -854,9 +853,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms)
 	if (!priv)
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(priv->dp); i++)
-		msm_dp_irq_postinstall(priv->dp[i]);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 05143ab9c6a43..50054f28e5b2f 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -54,7 +54,6 @@ enum {
 enum {
 	EV_NO_EVENT,
 	/* hpd events */
-	EV_HPD_INIT_SETUP,
 	EV_HPD_PLUG_INT,
 	EV_IRQ_HPD_INT,
 	EV_HPD_UNPLUG_INT,
@@ -1078,8 +1077,6 @@ static int hpd_event_thread(void *data)
 		spin_unlock_irqrestore(&dp_priv->event_lock, flag);
 
 		switch (todo->event_id) {
-		case EV_HPD_INIT_SETUP:
-			break;
 		case EV_HPD_PLUG_INT:
 			dp_hpd_plug_handle(dp_priv, todo->data);
 			break;
@@ -1359,19 +1356,6 @@ void __exit msm_dp_unregister(void)
 	platform_driver_unregister(&dp_display_driver);
 }
 
-void msm_dp_irq_postinstall(struct msm_dp *dp_display)
-{
-	struct dp_display_private *dp;
-
-	if (!dp_display)
-		return;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	if (!dp_display->is_edp)
-		dp_add_event(dp, EV_HPD_INIT_SETUP, 0, 0);
-}
-
 bool msm_dp_wide_bus_available(const struct msm_dp *dp_display)
 {
 	struct dp_display_private *dp;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 628ef3e663ea4..a205127ccc932 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -386,7 +386,6 @@ int __init msm_dp_register(void);
 void __exit msm_dp_unregister(void);
 int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 			 struct drm_encoder *encoder);
-void msm_dp_irq_postinstall(struct msm_dp *dp_display);
 void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display);
 
 bool msm_dp_wide_bus_available(const struct msm_dp *dp_display);
@@ -406,10 +405,6 @@ static inline int msm_dp_modeset_init(struct msm_dp *dp_display,
 	return -EINVAL;
 }
 
-static inline void msm_dp_irq_postinstall(struct msm_dp *dp_display)
-{
-}
-
 static inline void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display)
 {
 }
-- 
GitLab


From e2969ee302522b0b29506a9d17cebb5b02b5ce5c Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <quic_khsieh@quicinc.com>
Date: Fri, 1 Dec 2023 15:19:49 -0800
Subject: [PATCH 0706/2340] drm/msm/dp: move of_dp_aux_populate_bus() to eDP
 probe()

Currently eDP population is done at msm_dp_modeset_init() which happen
at binding time. Move eDP population to be done at display probe time
so that probe deferral cases can be handled effectively.
wait_for_hpd_asserted callback is added during drm_dp_aux_init()
to ensure eDP's HPD is up before proceeding eDP population.

Changes in v5:
-- inline dp_display_auxbus_population() and delete it

Changes in v4:
-- delete duplicate initialize code to dp_aux before drm_dp_aux_register()
-- delete of_get_child_by_name(dev->of_node, "aux-bus") and inline the
   function
-- not initialize rc = 0

Changes in v3:
-- add done_probing callback into devm_of_dp_aux_populate_bus()

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570074/
Link: https://lore.kernel.org/r/1701472789-25951-8-git-send-email-quic_khsieh@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_aux.c     | 34 +++++++++++++----
 drivers/gpu/drm/msm/dp/dp_display.c | 59 ++++++++++++-----------------
 2 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index 10b6eeb7bebf0..03f4951c49f42 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -479,7 +479,6 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux)
 
 int dp_aux_register(struct drm_dp_aux *dp_aux)
 {
-	struct dp_aux_private *aux;
 	int ret;
 
 	if (!dp_aux) {
@@ -487,12 +486,7 @@ int dp_aux_register(struct drm_dp_aux *dp_aux)
 		return -EINVAL;
 	}
 
-	aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
-
-	aux->dp_aux.name = "dpu_dp_aux";
-	aux->dp_aux.dev = aux->dev;
-	aux->dp_aux.transfer = dp_aux_transfer;
-	ret = drm_dp_aux_register(&aux->dp_aux);
+	ret = drm_dp_aux_register(dp_aux);
 	if (ret) {
 		DRM_ERROR("%s: failed to register drm aux: %d\n", __func__,
 				ret);
@@ -507,6 +501,21 @@ void dp_aux_unregister(struct drm_dp_aux *dp_aux)
 	drm_dp_aux_unregister(dp_aux);
 }
 
+static int dp_wait_hpd_asserted(struct drm_dp_aux *dp_aux,
+				 unsigned long wait_us)
+{
+	int ret;
+	struct dp_aux_private *aux;
+
+	aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
+
+	pm_runtime_get_sync(aux->dev);
+	ret = dp_catalog_aux_wait_for_hpd_connect_state(aux->catalog);
+	pm_runtime_put_sync(aux->dev);
+
+	return ret;
+}
+
 struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog,
 			      bool is_edp)
 {
@@ -530,6 +539,17 @@ struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog,
 	aux->catalog = catalog;
 	aux->retry_cnt = 0;
 
+	/*
+	 * Use the drm_dp_aux_init() to use the aux adapter
+	 * before registering AUX with the DRM device so that
+	 * msm eDP panel can be detected by generic_dep_panel_probe().
+	 */
+	aux->dp_aux.name = "dpu_dp_aux";
+	aux->dp_aux.dev = dev;
+	aux->dp_aux.transfer = dp_aux_transfer;
+	aux->dp_aux.wait_hpd_asserted = dp_wait_hpd_asserted;
+	drm_dp_aux_init(&aux->dp_aux);
+
 	return &aux->dp_aux;
 }
 
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 50054f28e5b2f..3a360c87e2dc4 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1198,6 +1198,17 @@ static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pde
 	return NULL;
 }
 
+static int dp_auxbus_done_probe(struct drm_dp_aux *aux)
+{
+	int rc;
+
+	rc = component_add(aux->dev, &dp_display_comp_ops);
+	if (rc)
+		DRM_ERROR("eDP component add failed, rc=%d\n", rc);
+
+	return rc;
+}
+
 static int dp_display_probe(struct platform_device *pdev)
 {
 	int rc = 0;
@@ -1263,10 +1274,18 @@ static int dp_display_probe(struct platform_device *pdev)
 	if (rc)
 		goto err;
 
-	rc = component_add(&pdev->dev, &dp_display_comp_ops);
-	if (rc) {
-		DRM_ERROR("component add failed, rc=%d\n", rc);
-		goto err;
+	if (dp->dp_display.is_edp) {
+		rc = devm_of_dp_aux_populate_bus(dp->aux, dp_auxbus_done_probe);
+		if (rc) {
+			DRM_ERROR("eDP auxbus population failed, rc=%d\n", rc);
+			goto err;
+		}
+	} else {
+		rc = component_add(&pdev->dev, &dp_display_comp_ops);
+		if (rc) {
+			DRM_ERROR("component add failed, rc=%d\n", rc);
+			goto err;
+		}
 	}
 
 	return rc;
@@ -1282,7 +1301,6 @@ static void dp_display_remove(struct platform_device *pdev)
 
 	component_del(&pdev->dev, &dp_display_comp_ops);
 	dp_display_deinit_sub_modules(dp);
-
 	platform_set_drvdata(pdev, NULL);
 }
 
@@ -1388,29 +1406,8 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 {
 	int rc;
 	struct dp_display_private *dp_priv;
-	struct device_node *aux_bus;
-	struct device *dev;
 
 	dp_priv = container_of(dp, struct dp_display_private, dp_display);
-	dev = &dp_priv->dp_display.pdev->dev;
-	aux_bus = of_get_child_by_name(dev->of_node, "aux-bus");
-
-	if (aux_bus && dp->is_edp) {
-		/*
-		 * The code below assumes that the panel will finish probing
-		 * by the time devm_of_dp_aux_populate_ep_devices() returns.
-		 * This isn't a great assumption since it will fail if the
-		 * panel driver is probed asynchronously but is the best we
-		 * can do without a bigger driver reorganization.
-		 */
-		rc = of_dp_aux_populate_bus(dp_priv->aux, NULL);
-		of_node_put(aux_bus);
-		if (rc)
-			goto error;
-	} else if (dp->is_edp) {
-		DRM_ERROR("eDP aux_bus not found\n");
-		return -ENODEV;
-	}
 
 	/*
 	 * External bridges are mandatory for eDP interfaces: one has to
@@ -1423,17 +1420,9 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 	if (!dp->is_edp && rc == -ENODEV)
 		return 0;
 
-	if (!rc) {
+	if (!rc)
 		dp->next_bridge = dp_priv->parser->next_bridge;
-		return 0;
-	}
 
-error:
-	if (dp->is_edp) {
-		of_dp_aux_depopulate_bus(dp_priv->aux);
-		dp_display_host_phy_exit(dp_priv);
-		dp_display_host_deinit(dp_priv);
-	}
 	return rc;
 }
 
-- 
GitLab


From 0b414c731432917c83353c446e60ee838c9a9cfd Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Thu, 30 Nov 2023 11:21:18 -0800
Subject: [PATCH 0707/2340] drm/msm/dpu: Correct UBWC settings for sc8280xp

The UBWC settings need to match between the display and GPU.  When we
updated the GPU settings, we forgot to make the corresponding update on
the display side.

Reported-by: Steev Klimaszewski <steev@kali.org>
Fixes: 07e6de738aa6 ("drm/msm/a690: Fix reg values for a690")
Signed-off-by: Rob Clark <robdclark@chromium.org>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/569817/
Link: https://lore.kernel.org/r/20231130192119.32538-1-robdclark@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/msm_mdss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 6865db1e3ce89..29bb38f0bb2ce 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -545,7 +545,7 @@ static const struct msm_mdss_data sc8280xp_data = {
 	.ubwc_dec_version = UBWC_4_0,
 	.ubwc_swizzle = 6,
 	.ubwc_static = 1,
-	.highest_bank_bit = 2,
+	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
 };
 
-- 
GitLab


From 9cad81143ef000695b32e92048d8ec0481e5ea3d Mon Sep 17 00:00:00 2001
From: Paloma Arellano <quic_parellan@quicinc.com>
Date: Thu, 30 Nov 2023 14:47:37 -0800
Subject: [PATCH 0708/2340] drm/msm/dpu: Capture dpu snapshot when
 frame_done_timer timeouts

Trigger a devcoredump to dump dpu registers and capture the drm atomic
state when the frame_done_timer timeouts.

v2: Optimize the format in which frame_done_timeout_cnt is incremented
v3: Describe parameter frame_done_timeout_cnt in dpu_encoder_virt

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312010225.2OJWLKmA-lkp@intel.com/
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Paloma Arellano <quic_parellan@quicinc.com>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/569834/
Link: https://lore.kernel.org/r/20231130224740.24383-1-quic_parellan@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 1cf7ff6caff4e..1c3560e1c6252 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -151,6 +151,8 @@ enum dpu_enc_rc_states {
  * @crtc_frame_event_cb:	callback handler for frame event
  * @crtc_frame_event_cb_data:	callback handler private data
  * @frame_done_timeout_ms:	frame done timeout in ms
+ * @frame_done_timeout_cnt:	atomic counter tracking the number of frame
+ * 				done timeouts
  * @frame_done_timer:		watchdog timer for frame done event
  * @disp_info:			local copy of msm_display_info struct
  * @idle_pc_supported:		indicate if idle power collaps is supported
@@ -191,6 +193,7 @@ struct dpu_encoder_virt {
 	void *crtc_frame_event_cb_data;
 
 	atomic_t frame_done_timeout_ms;
+	atomic_t frame_done_timeout_cnt;
 	struct timer_list frame_done_timer;
 
 	struct msm_display_info disp_info;
@@ -1204,6 +1207,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
 
 	dpu_enc->dsc = dpu_encoder_get_dsc_config(drm_enc);
 
+	atomic_set(&dpu_enc->frame_done_timeout_cnt, 0);
+
 	if (disp_info->intf_type == INTF_DP)
 		dpu_enc->wide_bus_en = msm_dp_wide_bus_available(priv->dp[index]);
 	else if (disp_info->intf_type == INTF_DSI)
@@ -2115,11 +2120,12 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data)
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		seq_printf(s, "intf:%d  wb:%d  vsync:%8d     underrun:%8d    ",
+		seq_printf(s, "intf:%d  wb:%d  vsync:%8d     underrun:%8d    frame_done_cnt:%d",
 				phys->hw_intf ? phys->hw_intf->idx - INTF_0 : -1,
 				phys->hw_wb ? phys->hw_wb->idx - WB_0 : -1,
 				atomic_read(&phys->vsync_cnt),
-				atomic_read(&phys->underrun_cnt));
+				atomic_read(&phys->underrun_cnt),
+				atomic_read(&dpu_enc->frame_done_timeout_cnt));
 
 		seq_printf(s, "mode: %s\n", dpu_encoder_helper_get_intf_type(phys->intf_mode));
 	}
@@ -2341,6 +2347,9 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t)
 
 	DPU_ERROR_ENC(dpu_enc, "frame done timeout\n");
 
+	if (atomic_inc_return(&dpu_enc->frame_done_timeout_cnt) == 1)
+		msm_disp_snapshot_state(drm_enc->dev);
+
 	event = DPU_ENCODER_FRAME_EVENT_ERROR;
 	trace_dpu_enc_frame_done_timeout(DRMID(drm_enc), event);
 	dpu_enc->crtc_frame_event_cb(dpu_enc->crtc_frame_event_cb_data, event);
@@ -2392,6 +2401,7 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 		goto fail;
 
 	atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
+	atomic_set(&dpu_enc->frame_done_timeout_cnt, 0);
 	timer_setup(&dpu_enc->frame_done_timer,
 			dpu_encoder_frame_done_timeout, 0);
 
-- 
GitLab


From 7cc2621f16b644bb7af37987cb471311641a9e56 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 30 Nov 2023 16:35:01 -0800
Subject: [PATCH 0709/2340] drm/msm/dpu: Add missing safe_lut_tbl in sc8180x
 catalog

Similar to SC8280XP, the misconfigured SAFE logic causes rather
significant delays in __arm_smmu_tlb_sync(), resulting in poor
performance for things such as USB.

Introduce appropriate SAFE values for SC8180X to correct this.

Fixes: f3af2d6ee9ab ("drm/msm/dpu: Add SC8180x to hw catalog")
Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Reported-by: Anton Bambura <jenneron@postmarketos.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/569840/
Link: https://lore.kernel.org/r/20231130-sc8180x-dpu-safe-lut-v1-1-a8a6bbac36b8@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
index e07f4c8c25b9f..9ffc8804a6fca 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
@@ -367,6 +367,7 @@ static const struct dpu_perf_cfg sc8180x_perf_data = {
 	.min_llcc_ib = 800000,
 	.min_dram_ib = 800000,
 	.danger_lut_tbl = {0xf, 0xffff, 0x0},
+	.safe_lut_tbl = {0xfff0, 0xf000, 0xffff},
 	.qos_lut_tbl = {
 		{.nentry = ARRAY_SIZE(sc7180_qos_linear),
 		.entries = sc7180_qos_linear
-- 
GitLab


From 3d07a411b4faaf2b498760ccf12888f8de529de0 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Tue, 20 Jun 2023 13:43:20 +0200
Subject: [PATCH 0710/2340] drm/msm/dsi: Use pm_runtime_resume_and_get to
 prevent refcnt leaks

This helper has been introduced to avoid programmer errors (missing
_put calls leading to dangling refcnt) when using pm_runtime_get, use it.

While at it, start checking the return value.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Fixes: 5c8290284402 ("drm/msm/dsi: Split PHY drivers to separate files")
Patchwork: https://patchwork.freedesktop.org/patch/543350/
Link: https://lore.kernel.org/r/20230620-topic-dsiphy_rpm-v2-1-a11a751f34f0@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 05621e5e7d634..b6314bb66d2fd 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -516,7 +516,9 @@ static int dsi_phy_enable_resource(struct msm_dsi_phy *phy)
 	struct device *dev = &phy->pdev->dev;
 	int ret;
 
-	pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret)
+		return ret;
 
 	ret = clk_prepare_enable(phy->ahb_clk);
 	if (ret) {
-- 
GitLab


From 6ab502bc1cf3147ea1d8540d04b83a7a4cb6d1f1 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Tue, 20 Jun 2023 13:43:21 +0200
Subject: [PATCH 0711/2340] drm/msm/dsi: Enable runtime PM

Some devices power the DSI PHY/PLL through a power rail that we model
as a GENPD. Enable runtime PM to make it suspendable.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/543352/
Link: https://lore.kernel.org/r/20230620-topic-dsiphy_rpm-v2-2-a11a751f34f0@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index b6314bb66d2fd..e49ebd9f6326f 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -691,6 +691,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 		return dev_err_probe(dev, PTR_ERR(phy->ahb_clk),
 				     "Unable to get ahb clk\n");
 
+	ret = devm_pm_runtime_enable(&pdev->dev);
+	if (ret)
+		return ret;
+
 	/* PLL init will call into clk_register which requires
 	 * register access, so we need to enable power and ahb clock.
 	 */
-- 
GitLab


From a5b2dcb96d6acb286459612a142371b0d74543bf Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Wed, 20 Sep 2023 13:13:58 -0700
Subject: [PATCH 0712/2340] drm: improve the documentation of connector hpd ops

While making the changes in [1], it was noted that the documentation
of the enable_hpd() and disable_hpd() does not make it clear that
these ops should not try to do hpd state maintenance and should only
enable/disable hpd related hardware for the connector.

The state management of these calls to make sure these calls are
balanced is handled by the DRM core and we should keep it that way
to minimize the overhead in the drivers which implement these ops.

[1]: https://patchwork.freedesktop.org/patch/558387/

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Suggested-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230920201358.27597-1-quic_abhinavk@quicinc.com
---
 include/drm/drm_modeset_helper_vtables.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index e3c3ac6159094..a33cf74887375 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -1154,6 +1154,11 @@ struct drm_connector_helper_funcs {
 	 * This operation is optional.
 	 *
 	 * This callback is used by the drm_kms_helper_poll_enable() helpers.
+	 *
+	 * This operation does not need to perform any hpd state tracking as
+	 * the DRM core handles that maintenance and ensures the calls to enable
+	 * and disable hpd are balanced.
+	 *
 	 */
 	void (*enable_hpd)(struct drm_connector *connector);
 
@@ -1165,6 +1170,11 @@ struct drm_connector_helper_funcs {
 	 * This operation is optional.
 	 *
 	 * This callback is used by the drm_kms_helper_poll_disable() helpers.
+	 *
+	 * This operation does not need to perform any hpd state tracking as
+	 * the DRM core handles that maintenance and ensures the calls to enable
+	 * and disable hpd are balanced.
+	 *
 	 */
 	void (*disable_hpd)(struct drm_connector *connector);
 };
-- 
GitLab


From f730e7adfd69d7ac859d8fe4d67e980cbad1e445 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 19 Sep 2023 10:48:12 -0700
Subject: [PATCH 0713/2340] drm: remove drm_bridge_hpd_disable() from
 drm_bridge_connector_destroy()

drm_bridge_hpd_enable()/drm_bridge_hpd_disable() callbacks call into
the respective driver's hpd_enable()/hpd_disable() ops. These ops control
the HPD enable/disable logic which in some cases like MSM can be a
dedicate hardware block to control the HPD.

During probe_defer cases, a connector can be initialized and then later
destroyed till the probe is retried. During connector destroy in these
cases, the hpd_disable() callback gets called without a corresponding
hpd_enable() leading to an unbalanced state potentially causing even
a crash.

This can be avoided by the respective drivers maintaining their own
state logic to ensure that a hpd_disable() without a corresponding
hpd_enable() just returns without doing anything.

However, to have a generic fix it would be better to avoid the
hpd_disable() callback from the connector destroy path and let
the hpd_enable() / hpd_disable() balance be maintained by the
corresponding drm_bridge_connector_enable_hpd() /
drm_bridge_connector_disable_hpd() APIs which should get called by
drm_kms_helper_disable_hpd().

changes in v2:
	- minor change in commit text (Dmitry)

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230919174813.26958-1-quic_abhinavk@quicinc.com
---
 drivers/gpu/drm/drm_bridge_connector.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c
index 8239ad43aed58..3acd67021ec62 100644
--- a/drivers/gpu/drm/drm_bridge_connector.c
+++ b/drivers/gpu/drm/drm_bridge_connector.c
@@ -198,12 +198,6 @@ static void drm_bridge_connector_destroy(struct drm_connector *connector)
 	struct drm_bridge_connector *bridge_connector =
 		to_drm_bridge_connector(connector);
 
-	if (bridge_connector->bridge_hpd) {
-		struct drm_bridge *hpd = bridge_connector->bridge_hpd;
-
-		drm_bridge_hpd_disable(hpd);
-	}
-
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 
-- 
GitLab


From 72207699ff76d4392244c8d9850aaef0160dc6b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Sun, 3 Dec 2023 13:48:37 +0200
Subject: [PATCH 0714/2340] drm/i915/display: use intel_bo_to_drm_bo in
 intel_fb.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are preparing for Xe driver. I915 and Xe object implementation are
differing. Do not use  i915_gem_object->base directly. Instead use
intel_bo_to_drm_bo.

Also use drm_gem_object_put instead of i915_gem_object_put. This should be
ok as i915_gem_object_put is really just doing 	__drm_gem_object_put.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-2-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_fb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 77226ec00cb1c..b17600aba5772 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -1657,10 +1657,10 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *
 		max_size = max(max_size, offset + size);
 	}
 
-	if (mul_u32_u32(max_size, tile_size) > obj->base.size) {
+	if (mul_u32_u32(max_size, tile_size) > intel_bo_to_drm_bo(obj)->size) {
 		drm_dbg_kms(&i915->drm,
 			    "fb too big for bo (need %llu bytes, have %zu bytes)\n",
-			    mul_u32_u32(max_size, tile_size), obj->base.size);
+			    mul_u32_u32(max_size, tile_size), intel_bo_to_drm_bo(obj)->size);
 		return -EINVAL;
 	}
 
@@ -1889,7 +1889,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 						unsigned int *handle)
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct drm_i915_private *i915 = to_i915(intel_bo_to_drm_bo(obj)->dev);
 
 	if (i915_gem_object_is_userptr(obj)) {
 		drm_dbg(&i915->drm,
@@ -1897,7 +1897,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 		return -EINVAL;
 	}
 
-	return drm_gem_handle_create(file, &obj->base, handle);
+	return drm_gem_handle_create(file, intel_bo_to_drm_bo(obj), handle);
 }
 
 struct frontbuffer_fence_cb {
@@ -1975,7 +1975,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			   struct drm_i915_gem_object *obj,
 			   struct drm_mode_fb_cmd2 *mode_cmd)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev);
 	struct drm_framebuffer *fb = &intel_fb->base;
 	u32 max_stride;
 	unsigned int tiling, stride;
@@ -2153,7 +2153,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 	}
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	i915_gem_object_put(obj);
+	drm_gem_object_put(intel_bo_to_drm_bo(obj));
 
 	return fb;
 }
-- 
GitLab


From 6383f69bd2ccd4765b22d60f12576891daa36c1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Sun, 3 Dec 2023 13:48:38 +0200
Subject: [PATCH 0715/2340] drm/i915/display: Convert
 intel_fb_modifier_to_tiling as non-static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are about to split i915 specific code from intel_fb.c. Convert
intel_fb_modifier_to_tiling as non-static to allow calling it from split
code.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-3-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_fb.c | 40 ++++++++++++-------------
 drivers/gpu/drm/i915/display/intel_fb.h |  2 ++
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index b17600aba5772..f42a693f96c17 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -301,6 +301,26 @@ lookup_format_info(const struct drm_format_info formats[],
 	return NULL;
 }
 
+unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
+{
+	u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps &
+			 INTEL_PLANE_CAP_TILING_MASK;
+
+	switch (tiling_caps) {
+	case INTEL_PLANE_CAP_TILING_Y:
+		return I915_TILING_Y;
+	case INTEL_PLANE_CAP_TILING_X:
+		return I915_TILING_X;
+	case INTEL_PLANE_CAP_TILING_4:
+	case INTEL_PLANE_CAP_TILING_Yf:
+	case INTEL_PLANE_CAP_TILING_NONE:
+		return I915_TILING_NONE;
+	default:
+		MISSING_CASE(tiling_caps);
+		return I915_TILING_NONE;
+	}
+}
+
 /**
  * intel_fb_get_format_info: Get a modifier specific format information
  * @cmd: FB add command structure
@@ -737,26 +757,6 @@ intel_fb_align_height(const struct drm_framebuffer *fb,
 	return ALIGN(height, tile_height);
 }
 
-static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
-{
-	u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps &
-			 INTEL_PLANE_CAP_TILING_MASK;
-
-	switch (tiling_caps) {
-	case INTEL_PLANE_CAP_TILING_Y:
-		return I915_TILING_Y;
-	case INTEL_PLANE_CAP_TILING_X:
-		return I915_TILING_X;
-	case INTEL_PLANE_CAP_TILING_4:
-	case INTEL_PLANE_CAP_TILING_Yf:
-	case INTEL_PLANE_CAP_TILING_NONE:
-		return I915_TILING_NONE;
-	default:
-		MISSING_CASE(tiling_caps);
-		return I915_TILING_NONE;
-	}
-}
-
 bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 {
 	return HAS_DPT(i915) && modifier != DRM_FORMAT_MOD_LINEAR;
diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h
index e85167d6bc347..23db6628f53e7 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.h
+++ b/drivers/gpu/drm/i915/display/intel_fb.h
@@ -95,4 +95,6 @@ intel_user_framebuffer_create(struct drm_device *dev,
 bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier);
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb);
 
+unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier);
+
 #endif /* __INTEL_FB_H__ */
-- 
GitLab


From ae424921a5ca763fef4be46f900065db0b0870ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Sun, 3 Dec 2023 13:48:39 +0200
Subject: [PATCH 0716/2340] drm/i915/display: Handle invalid fb_modifier in
 intel_fb_modifier_to_tiling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lookup_modifier is returning INTEL_PLANE_CAP_TILING_4 on invalid
fb_modifier value. Use lookup_modifier_or_null in
intel_fb_modifier_to_tiling and return I915_TILING_NONE in case
lookup_modifier_or_null returns null.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-4-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_fb.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index f42a693f96c17..20d58a021a42e 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -303,7 +303,14 @@ lookup_format_info(const struct drm_format_info formats[],
 
 unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
 {
-	u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps &
+	const struct intel_modifier_desc *md;
+	u8 tiling_caps;
+
+	md = lookup_modifier_or_null(fb_modifier);
+	if (!md)
+		return I915_TILING_NONE;
+
+	tiling_caps = lookup_modifier_or_null(fb_modifier)->plane_caps &
 			 INTEL_PLANE_CAP_TILING_MASK;
 
 	switch (tiling_caps) {
-- 
GitLab


From 5f449ed05da8bb2a470b17962978f0347ba399d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Sun, 3 Dec 2023 13:48:40 +0200
Subject: [PATCH 0717/2340] drm/i915/display: Split i915 specific code away
 from intel_fb.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are preparing for Xe driver. Backing object implementation is differing
between i915 and Xe. Split i915 specific code into separate source file
built only for i915.

v9:
  - Use ERR_CAST
v8:
  - return original error code from intel_fb_bo_lookup_valid_bo on failure
v7:
  - drop #include <drm/drm_plane.h>
  - s/user_mode_cmd/mode_cmd/
  - Use passed i915 pointer instead of to_i915(obj->base.dev)
v6: Add missing intel_fb_bo.[ch]
v5:
  - Keep drm_any_plane_has_format check in intel_fb.c
  - Use mode_cmd instead of user_mode_cmd for intel_fb_bo_lookup_valid_bo
v4: Move drm_any_plane_has_format check into intel_fb_bo.c
v3: Fix failure handling in intel_framebuffer_init
v2: Couple of fixes to error value handling

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-5-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/Makefile              |  1 +
 drivers/gpu/drm/i915/display/intel_fb.c    | 73 +++--------------
 drivers/gpu/drm/i915/display/intel_fb_bo.c | 92 ++++++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_fb_bo.h | 24 ++++++
 4 files changed, 127 insertions(+), 63 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/display/intel_fb_bo.c
 create mode 100644 drivers/gpu/drm/i915/display/intel_fb_bo.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index eadc9e612962a..e777686190ca2 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -280,6 +280,7 @@ i915-y += \
 	display/intel_dsb.o \
 	display/intel_dsb_buffer.o \
 	display/intel_fb.o \
+	display/intel_fb_bo.o \
 	display/intel_fb_pin.o \
 	display/intel_fbc.o \
 	display/intel_fdi.o \
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 20d58a021a42e..96663f9ac431d 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -4,7 +4,6 @@
  */
 
 #include <drm/drm_blend.h>
-#include <drm/drm_framebuffer.h>
 #include <drm/drm_modeset_helper.h>
 
 #include <linux/dma-fence.h>
@@ -15,6 +14,7 @@
 #include "intel_display_types.h"
 #include "intel_dpt.h"
 #include "intel_fb.h"
+#include "intel_fb_bo.h"
 #include "intel_frontbuffer.h"
 
 #define check_array_bounds(i915, a, i) drm_WARN_ON(&(i915)->drm, (i) >= ARRAY_SIZE(a))
@@ -1985,7 +1985,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev);
 	struct drm_framebuffer *fb = &intel_fb->base;
 	u32 max_stride;
-	unsigned int tiling, stride;
 	int ret = -EINVAL;
 	int i;
 
@@ -1993,32 +1992,11 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	if (!intel_fb->frontbuffer)
 		return -ENOMEM;
 
-	i915_gem_object_lock(obj, NULL);
-	tiling = i915_gem_object_get_tiling(obj);
-	stride = i915_gem_object_get_stride(obj);
-	i915_gem_object_unlock(obj);
-
-	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
-		/*
-		 * If there's a fence, enforce that
-		 * the fb modifier and tiling mode match.
-		 */
-		if (tiling != I915_TILING_NONE &&
-		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
-			drm_dbg_kms(&dev_priv->drm,
-				    "tiling_mode doesn't match fb modifier\n");
-			goto err;
-		}
-	} else {
-		if (tiling == I915_TILING_X) {
-			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
-		} else if (tiling == I915_TILING_Y) {
-			drm_dbg_kms(&dev_priv->drm,
-				    "No Y tiling for legacy addfb\n");
-			goto err;
-		}
-	}
+	ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd);
+	if (ret)
+		goto err;
 
+	ret = -EINVAL;
 	if (!drm_any_plane_has_format(&dev_priv->drm,
 				      mode_cmd->pixel_format,
 				      mode_cmd->modifier[0])) {
@@ -2028,17 +2006,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		goto err;
 	}
 
-	/*
-	 * gen2/3 display engine uses the fence if present,
-	 * so the tiling mode must match the fb modifier exactly.
-	 */
-	if (DISPLAY_VER(dev_priv) < 4 &&
-	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "tiling_mode must match fb modifier exactly on gen2/3\n");
-		goto err;
-	}
-
 	max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format,
 					 mode_cmd->modifier[0]);
 	if (mode_cmd->pitches[0] > max_stride) {
@@ -2050,17 +2017,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		goto err;
 	}
 
-	/*
-	 * If there's a fence, enforce that
-	 * the fb pitch and fence stride match.
-	 */
-	if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "pitch (%d) must match tiling stride (%d)\n",
-			    mode_cmd->pitches[0], stride);
-		goto err;
-	}
-
 	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
 	if (mode_cmd->offsets[0] != 0) {
 		drm_dbg_kms(&dev_priv->drm,
@@ -2144,20 +2100,11 @@ intel_user_framebuffer_create(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
-	struct drm_i915_private *i915;
-
-	obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
-	if (!obj)
-		return ERR_PTR(-ENOENT);
-
-	/* object is backed with LMEM for discrete */
-	i915 = to_i915(obj->base.dev);
-	if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
-		/* object is "remote", not in local memory */
-		i915_gem_object_put(obj);
-		drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n");
-		return ERR_PTR(-EREMOTE);
-	}
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	obj = intel_fb_bo_lookup_valid_bo(i915, filp, &mode_cmd);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
 	drm_gem_object_put(intel_bo_to_drm_bo(obj));
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c
new file mode 100644
index 0000000000000..ce86eff7b226a
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_framebuffer.h>
+
+#include "gem/i915_gem_object.h"
+
+#include "i915_drv.h"
+#include "intel_fb.h"
+#include "intel_fb_bo.h"
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned int tiling, stride;
+
+	i915_gem_object_lock(obj, NULL);
+	tiling = i915_gem_object_get_tiling(obj);
+	stride = i915_gem_object_get_stride(obj);
+	i915_gem_object_unlock(obj);
+
+	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
+		/*
+		 * If there's a fence, enforce that
+		 * the fb modifier and tiling mode match.
+		 */
+		if (tiling != I915_TILING_NONE &&
+		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+			drm_dbg_kms(&i915->drm,
+				    "tiling_mode doesn't match fb modifier\n");
+			return -EINVAL;
+		}
+	} else {
+		if (tiling == I915_TILING_X) {
+			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
+		} else if (tiling == I915_TILING_Y) {
+			drm_dbg_kms(&i915->drm,
+				    "No Y tiling for legacy addfb\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * gen2/3 display engine uses the fence if present,
+	 * so the tiling mode must match the fb modifier exactly.
+	 */
+	if (DISPLAY_VER(i915) < 4 &&
+	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+		drm_dbg_kms(&i915->drm,
+			    "tiling_mode must match fb modifier exactly on gen2/3\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If there's a fence, enforce that
+	 * the fb pitch and fence stride match.
+	 */
+	if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) {
+		drm_dbg_kms(&i915->drm,
+			    "pitch (%d) must match tiling stride (%d)\n",
+			    mode_cmd->pitches[0], stride);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct drm_i915_gem_object *
+intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+			    struct drm_file *filp,
+			    const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_lookup(filp, mode_cmd->handles[0]);
+	if (!obj)
+		return ERR_PTR(-ENOENT);
+
+	/* object is backed with LMEM for discrete */
+	if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
+		/* object is "remote", not in local memory */
+		i915_gem_object_put(obj);
+		drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n");
+		return ERR_PTR(-EREMOTE);
+	}
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h
new file mode 100644
index 0000000000000..dd06ceec8601b
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __INTEL_FB_BO_H__
+#define __INTEL_FB_BO_H__
+
+struct drm_file;
+struct drm_mode_fb_cmd2;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct intel_framebuffer;
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct drm_i915_gem_object *
+intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+			    struct drm_file *filp,
+			    const struct drm_mode_fb_cmd2 *user_mode_cmd);
+
+#endif
-- 
GitLab


From 93032ae634d409e621c68a2fb7d6930e7eebb1d9 Mon Sep 17 00:00:00 2001
From: Marco Pagani <marpagan@redhat.com>
Date: Thu, 30 Nov 2023 18:14:16 +0100
Subject: [PATCH 0718/2340] drm/test: add a test suite for GEM objects backed
 by shmem

This patch introduces an initial KUnit test suite for GEM objects
backed by shmem buffers.

Suggested-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Marco Pagani <marpagan@redhat.com>

v5:
- using __drm_kunit_helper_alloc_drm_device() to avoid local struct
v4:
- Add missing MMU dependency for DRM_GEM_SHMEM_HELPER (kernel test robot)
v3:
- Explicitly cast pointers in the helpers
- Removed unused pointer to parent dev in struct fake_dev
- Test entries reordering in Kconfig and Makefile sent as a separate patch
v2:
- Improved description of test cases
- Cleaner error handling using KUnit actions
- Alphabetical order in Kconfig and Makefile

Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130171417.74162-1-marpagan@redhat.com
---
 drivers/gpu/drm/Kconfig                    |   3 +-
 drivers/gpu/drm/tests/Makefile             |   1 +
 drivers/gpu/drm/tests/drm_gem_shmem_test.c | 383 +++++++++++++++++++++
 3 files changed, 386 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_gem_shmem_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 740c1c0bd0680..b7abd436455f0 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -74,12 +74,13 @@ config DRM_KUNIT_TEST_HELPERS
 
 config DRM_KUNIT_TEST
 	tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
-	depends on DRM && KUNIT
+	depends on DRM && KUNIT && MMU
 	select DRM_BUDDY
 	select DRM_DISPLAY_DP_HELPER
 	select DRM_DISPLAY_HELPER
 	select DRM_EXEC
 	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_GEM_SHMEM_HELPER
 	select DRM_KMS_HELPER
 	select DRM_KUNIT_TEST_HELPERS
 	select DRM_LIB_RANDOM
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index 2645af241ff0b..d6183b3d76880 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_format_helper_test.o \
 	drm_format_test.o \
 	drm_framebuffer_test.o \
+	drm_gem_shmem_test.o \
 	drm_managed_test.o \
 	drm_mm_test.o \
 	drm_modes_test.o \
diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c
new file mode 100644
index 0000000000000..91202e40cde94
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test suite for GEM objects backed by shmem buffers
+ *
+ * Copyright (C) 2023 Red Hat, Inc.
+ *
+ * Author: Marco Pagani <marpagan@redhat.com>
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/iosys-map.h>
+#include <linux/sizes.h>
+
+#include <kunit/test.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_kunit_helpers.h>
+
+#define TEST_SIZE		SZ_1M
+#define TEST_BYTE		0xae
+
+/*
+ * Wrappers to avoid an explicit type casting when passing action
+ * functions to kunit_add_action().
+ */
+static void kfree_wrapper(void *ptr)
+{
+	const void *obj = ptr;
+
+	kfree(obj);
+}
+
+static void sg_free_table_wrapper(void *ptr)
+{
+	struct sg_table *sgt = ptr;
+
+	sg_free_table(sgt);
+}
+
+static void drm_gem_shmem_free_wrapper(void *ptr)
+{
+	struct drm_gem_shmem_object *shmem = ptr;
+
+	drm_gem_shmem_free(shmem);
+}
+
+/*
+ * Test creating a shmem GEM object backed by shmem buffer. The test
+ * case succeeds if the GEM object is successfully allocated with the
+ * shmem file node and object functions attributes set, and the size
+ * attribute is equal to the correct size.
+ */
+static void drm_gem_shmem_test_obj_create(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+	KUNIT_EXPECT_EQ(test, shmem->base.size, TEST_SIZE);
+	KUNIT_EXPECT_NOT_NULL(test, shmem->base.filp);
+	KUNIT_EXPECT_NOT_NULL(test, shmem->base.funcs);
+
+	drm_gem_shmem_free(shmem);
+}
+
+/*
+ * Test creating a shmem GEM object from a scatter/gather table exported
+ * via a DMA-BUF. The test case succeed if the GEM object is successfully
+ * created with the shmem file node attribute equal to NULL and the sgt
+ * attribute pointing to the scatter/gather table that has been imported.
+ */
+static void drm_gem_shmem_test_obj_create_private(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	struct drm_gem_object *gem_obj;
+	struct dma_buf buf_mock;
+	struct dma_buf_attachment attach_mock;
+	struct sg_table *sgt;
+	char *buf;
+	int ret;
+
+	/* Create a mock scatter/gather table */
+	buf = kunit_kzalloc(test, TEST_SIZE, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, buf);
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, sgt);
+
+	ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	sg_init_one(sgt->sgl, buf, TEST_SIZE);
+
+	/* Init a mock DMA-BUF */
+	buf_mock.size = TEST_SIZE;
+	attach_mock.dmabuf = &buf_mock;
+
+	gem_obj = drm_gem_shmem_prime_import_sg_table(drm_dev, &attach_mock, sgt);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gem_obj);
+	KUNIT_EXPECT_EQ(test, gem_obj->size, TEST_SIZE);
+	KUNIT_EXPECT_NULL(test, gem_obj->filp);
+	KUNIT_EXPECT_NOT_NULL(test, gem_obj->funcs);
+
+	/* The scatter/gather table will be freed by drm_gem_shmem_free */
+	kunit_remove_action(test, sg_free_table_wrapper, sgt);
+	kunit_remove_action(test, kfree_wrapper, sgt);
+
+	shmem = to_drm_gem_shmem_obj(gem_obj);
+	KUNIT_EXPECT_PTR_EQ(test, shmem->sgt, sgt);
+
+	drm_gem_shmem_free(shmem);
+}
+
+/*
+ * Test pinning backing pages for a shmem GEM object. The test case
+ * succeeds if a suitable number of backing pages are allocated, and
+ * the pages table counter attribute is increased by one.
+ */
+static void drm_gem_shmem_test_pin_pages(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	int i, ret;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+	KUNIT_EXPECT_NULL(test, shmem->pages);
+	KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 0);
+
+	ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = drm_gem_shmem_pin(shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+	KUNIT_ASSERT_NOT_NULL(test, shmem->pages);
+	KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 1);
+
+	for (i = 0; i < (shmem->base.size >> PAGE_SHIFT); i++)
+		KUNIT_ASSERT_NOT_NULL(test, shmem->pages[i]);
+
+	drm_gem_shmem_unpin(shmem);
+	KUNIT_EXPECT_NULL(test, shmem->pages);
+	KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 0);
+}
+
+/*
+ * Test creating a virtual mapping for a shmem GEM object. The test
+ * case succeeds if the backing memory is mapped and the reference
+ * counter for virtual mapping is increased by one. Moreover, the test
+ * case writes and then reads a test pattern over the mapped memory.
+ */
+static void drm_gem_shmem_test_vmap(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	struct iosys_map map;
+	int ret, i;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+	KUNIT_EXPECT_NULL(test, shmem->vaddr);
+	KUNIT_EXPECT_EQ(test, shmem->vmap_use_count, 0);
+
+	ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = drm_gem_shmem_vmap(shmem, &map);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+	KUNIT_ASSERT_NOT_NULL(test, shmem->vaddr);
+	KUNIT_ASSERT_FALSE(test, iosys_map_is_null(&map));
+	KUNIT_EXPECT_EQ(test, shmem->vmap_use_count, 1);
+
+	iosys_map_memset(&map, 0, TEST_BYTE, TEST_SIZE);
+	for (i = 0; i < TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, iosys_map_rd(&map, i, u8), TEST_BYTE);
+
+	drm_gem_shmem_vunmap(shmem, &map);
+	KUNIT_EXPECT_NULL(test, shmem->vaddr);
+	KUNIT_EXPECT_EQ(test, shmem->vmap_use_count, 0);
+}
+
+/*
+ * Test exporting a scatter/gather table of pinned pages suitable for
+ * PRIME usage from a shmem GEM object. The test case succeeds if a
+ * scatter/gather table large enough to accommodate the backing memory
+ * is successfully exported.
+ */
+static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int si, len = 0;
+	int ret;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+
+	ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = drm_gem_shmem_pin(shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	sgt = drm_gem_shmem_get_sg_table(shmem);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt);
+	KUNIT_EXPECT_NULL(test, shmem->sgt);
+
+	ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	for_each_sgtable_sg(sgt, sg, si) {
+		KUNIT_EXPECT_NOT_NULL(test, sg);
+		len += sg->length;
+	}
+
+	KUNIT_EXPECT_GE(test, len, TEST_SIZE);
+}
+
+/*
+ * Test pinning pages and exporting a scatter/gather table suitable for
+ * driver usage from a shmem GEM object. The test case succeeds if the
+ * backing pages are pinned and a scatter/gather table large enough to
+ * accommodate the backing memory is successfully exported.
+ */
+static void drm_gem_shmem_test_get_sg_table(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int si, ret, len = 0;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+
+	ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	/* The scatter/gather table will be freed by drm_gem_shmem_free */
+	sgt = drm_gem_shmem_get_pages_sgt(shmem);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt);
+	KUNIT_ASSERT_NOT_NULL(test, shmem->pages);
+	KUNIT_EXPECT_EQ(test, shmem->pages_use_count, 1);
+	KUNIT_EXPECT_PTR_EQ(test, sgt, shmem->sgt);
+
+	for_each_sgtable_sg(sgt, sg, si) {
+		KUNIT_EXPECT_NOT_NULL(test, sg);
+		len += sg->length;
+	}
+
+	KUNIT_EXPECT_GE(test, len, TEST_SIZE);
+}
+
+/*
+ * Test updating the madvise state of a shmem GEM object. The test
+ * case checks that the function for setting madv updates it only if
+ * its current value is greater or equal than zero and returns false
+ * if it has a negative value.
+ */
+static void drm_gem_shmem_test_madvise(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	int ret;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+	KUNIT_ASSERT_EQ(test, shmem->madv, 0);
+
+	ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = drm_gem_shmem_madvise(shmem, 1);
+	KUNIT_EXPECT_TRUE(test, ret);
+	KUNIT_ASSERT_EQ(test, shmem->madv, 1);
+
+	/* Set madv to a negative value */
+	ret = drm_gem_shmem_madvise(shmem, -1);
+	KUNIT_EXPECT_FALSE(test, ret);
+	KUNIT_ASSERT_EQ(test, shmem->madv, -1);
+
+	/* Check that madv cannot be set back to a positive value */
+	ret = drm_gem_shmem_madvise(shmem, 0);
+	KUNIT_EXPECT_FALSE(test, ret);
+	KUNIT_ASSERT_EQ(test, shmem->madv, -1);
+}
+
+/*
+ * Test purging a shmem GEM object. First, assert that a newly created
+ * shmem GEM object is not purgeable. Then, set madvise to a positive
+ * value and call drm_gem_shmem_get_pages_sgt() to pin and dma-map the
+ * backing pages. Finally, assert that the shmem GEM object is now
+ * purgeable and purge it.
+ */
+static void drm_gem_shmem_test_purge(struct kunit *test)
+{
+	struct drm_device *drm_dev = test->priv;
+	struct drm_gem_shmem_object *shmem;
+	struct sg_table *sgt;
+	int ret;
+
+	shmem = drm_gem_shmem_create(drm_dev, TEST_SIZE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, shmem);
+
+	ret = kunit_add_action_or_reset(test, drm_gem_shmem_free_wrapper, shmem);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = drm_gem_shmem_is_purgeable(shmem);
+	KUNIT_EXPECT_FALSE(test, ret);
+
+	ret = drm_gem_shmem_madvise(shmem, 1);
+	KUNIT_EXPECT_TRUE(test, ret);
+
+	/* The scatter/gather table will be freed by drm_gem_shmem_free */
+	sgt = drm_gem_shmem_get_pages_sgt(shmem);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt);
+
+	ret = drm_gem_shmem_is_purgeable(shmem);
+	KUNIT_EXPECT_TRUE(test, ret);
+
+	drm_gem_shmem_purge(shmem);
+	KUNIT_EXPECT_NULL(test, shmem->pages);
+	KUNIT_EXPECT_NULL(test, shmem->sgt);
+	KUNIT_EXPECT_EQ(test, shmem->madv, -1);
+}
+
+static int drm_gem_shmem_test_init(struct kunit *test)
+{
+	struct device *dev;
+	struct drm_device *drm_dev;
+
+	/* Allocate a parent device */
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	/*
+	 * The DRM core will automatically initialize the GEM core and create
+	 * a DRM Memory Manager object which provides an address space pool
+	 * for GEM objects allocation.
+	 */
+	drm_dev = __drm_kunit_helper_alloc_drm_device(test, dev, sizeof(*drm_dev),
+						      0, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, drm_dev);
+
+	test->priv = drm_dev;
+
+	return 0;
+}
+
+static struct kunit_case drm_gem_shmem_test_cases[] = {
+	KUNIT_CASE(drm_gem_shmem_test_obj_create),
+	KUNIT_CASE(drm_gem_shmem_test_obj_create_private),
+	KUNIT_CASE(drm_gem_shmem_test_pin_pages),
+	KUNIT_CASE(drm_gem_shmem_test_vmap),
+	KUNIT_CASE(drm_gem_shmem_test_get_pages_sgt),
+	KUNIT_CASE(drm_gem_shmem_test_get_sg_table),
+	KUNIT_CASE(drm_gem_shmem_test_madvise),
+	KUNIT_CASE(drm_gem_shmem_test_purge),
+	{}
+};
+
+static struct kunit_suite drm_gem_shmem_suite = {
+	.name = "drm_gem_shmem",
+	.init = drm_gem_shmem_test_init,
+	.test_cases = drm_gem_shmem_test_cases
+};
+
+kunit_test_suite(drm_gem_shmem_suite);
+
+MODULE_LICENSE("GPL");
-- 
GitLab


From 51097ef14d4e555c532ae535d24f97cc19c8c5a6 Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Thu, 30 Nov 2023 16:00:13 +0000
Subject: [PATCH 0719/2340] drm/imagination: Fixed warning due to implicit cast
 to bool

This line appears to confuse the compiler and had been noticed previously in
clang-tidy output. There isn't anything fundamentally wrong that I can see.
I suspect that it just looks like a mistake - hence the first note.  By making
the second operand an actual bool result, const correctness can be preserved
while silencing the warning.

>> drivers/gpu/drm/imagination/pvr_device_info.c:230:47: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand]
     230 |         } else if (features_size == mapping_max_size && (mapping_max & 63)) {
         |                                                      ^  ~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/imagination/pvr_device_info.c:230:47: note: use '&' for a bitwise operation
     230 |         } else if (features_size == mapping_max_size && (mapping_max & 63)) {
         |                                                      ^~
         |                                                      &
   drivers/gpu/drm/imagination/pvr_device_info.c:230:47: note: remove constant to silence this warning
     230 |         } else if (features_size == mapping_max_size && (mapping_max & 63)) {
         |                                                     ~^~~~~~~~~~~~~~~~~~~~~
   1 warning generated.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311241752.3iLyyFcA-lkp@intel.com/
Fixes: f99f5f3ea7ef ("drm/imagination: Add GPU ID parsing and firmware loading")
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130160017.259902-1-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_device_info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_device_info.c b/drivers/gpu/drm/imagination/pvr_device_info.c
index 11e6bef52ecd0..d3301cde7d119 100644
--- a/drivers/gpu/drm/imagination/pvr_device_info.c
+++ b/drivers/gpu/drm/imagination/pvr_device_info.c
@@ -227,7 +227,8 @@ int pvr_device_info_set_features(struct pvr_device *pvr_dev, const u64 *features
 	/* Verify no unsupported values in the bitmask. */
 	if (features_size > mapping_max_size) {
 		drm_warn(from_pvr_device(pvr_dev), "Unsupported features in firmware image");
-	} else if (features_size == mapping_max_size && (mapping_max & 63)) {
+	} else if (features_size == mapping_max_size &&
+		   ((mapping_max & 63) != 0)) {
 		u64 invalid_mask = ~0ull << (mapping_max & 63);
 
 		if (features[features_size - 1] & invalid_mask)
-- 
GitLab


From 0ffe9eb826f1391d52089ba8056a3778688da57d Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Thu, 30 Nov 2023 16:00:14 +0000
Subject: [PATCH 0720/2340] drm/imagination: Fixed missing header in
 pvr_fw_meta

A missing header causes the compiler to warn that the function below is not
forward declared.

>> drivers/gpu/drm/imagination/pvr_fw_meta.c:33:1: warning: no previous prototype for function 'pvr_meta_cr_read32' [-Wmissing-prototypes]
      33 | pvr_meta_cr_read32(struct pvr_device *pvr_dev, u32 reg_addr, u32 *reg_value_out)
         | ^
   drivers/gpu/drm/imagination/pvr_fw_meta.c:32:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
      32 | int
         | ^
         | static
   1 warning generated.

Include the correct header.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311250226.Da2yiSKp-lkp@intel.com/
Fixes: cc1aeedb98ad ("drm/imagination: Implement firmware infrastructure and META FW support")
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130160017.259902-2-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_fw_meta.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/imagination/pvr_fw_meta.c b/drivers/gpu/drm/imagination/pvr_fw_meta.c
index 119934c36184a..c39beb70c3173 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_meta.c
+++ b/drivers/gpu/drm/imagination/pvr_fw_meta.c
@@ -4,6 +4,7 @@
 #include "pvr_device.h"
 #include "pvr_fw.h"
 #include "pvr_fw_info.h"
+#include "pvr_fw_meta.h"
 #include "pvr_gem.h"
 #include "pvr_rogue_cr_defs.h"
 #include "pvr_rogue_meta.h"
-- 
GitLab


From 7620c6bd76b1076b104926b78da8d6ff17cfef5d Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Thu, 30 Nov 2023 16:00:15 +0000
Subject: [PATCH 0721/2340] drm/imagination: pvr_device_process_active_queues
 now static

The function below is used only within this source file, but is not static.

>> drivers/gpu/drm/imagination/pvr_device.c:129:6: warning: no previous prototype for function 'pvr_device_process_active_queues' [-Wmissing-prototypes]
     129 | void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
         |      ^
   drivers/gpu/drm/imagination/pvr_device.c:129:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     129 | void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
         | ^
         | static
   1 warning generated.

Make it static.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311250632.giVEx7MU-lkp@intel.com/
Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and scheduling")
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130160017.259902-3-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c
index 5389aea7ff216..1704c0268589b 100644
--- a/drivers/gpu/drm/imagination/pvr_device.c
+++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -127,7 +127,7 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
  * This is called any time we receive a FW event. It iterates over all
  * active queues and calls pvr_queue_process() on them.
  */
-void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
+static void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
 {
 	struct pvr_queue *queue, *tmp_queue;
 	LIST_HEAD(active_queues);
-- 
GitLab


From e8878b8043a25a19d0b405a29652a0cb94f56cdb Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Thu, 30 Nov 2023 16:00:16 +0000
Subject: [PATCH 0722/2340] drm/imagination: pvr_gpuvm_free() now static

The function below is used only within this source file, but is not static.

drivers/gpu/drm/imagination/pvr_vm.c:542:6: error: no previous prototype for 'pvr_gpuvm_free' [-Werror=missing-prototypes]
  542 | void pvr_gpuvm_free(struct drm_gpuvm *gpuvm)

Make it static.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311242159.hh8MWiAm-lkp@intel.com/
Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Link: https://lore.kernel.org/r/20231130160017.259902-4-donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/pvr_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 2aab53594a773..30ecd7d7052e5 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -539,7 +539,7 @@ pvr_device_addr_and_size_are_valid(struct pvr_vm_context *vm_ctx,
 	       (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
 }
 
-void pvr_gpuvm_free(struct drm_gpuvm *gpuvm)
+static void pvr_gpuvm_free(struct drm_gpuvm *gpuvm)
 {
 	kfree(to_pvr_vm_context(gpuvm));
 }
-- 
GitLab


From 72ef65ab246e55847097d68e0964fbcdfff4366c Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Thu, 30 Nov 2023 16:00:17 +0000
Subject: [PATCH 0723/2340] drm/imagination: Removed unused function
 to_pvr_vm_gpuva()

This function is now unused, hence it causes a compiler warning.

   drivers/gpu/drm/imagination/pvr_vm.c:112:22: warning: unused function 'to_pvr_vm_gpuva' [-Wunused-function]
     112 | struct pvr_vm_gpuva *to_pvr_vm_gpuva(struct drm_gpuva *gpuva)
         |                      ^

Remove the function for now.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311242159.hh8MWiAm-lkp@intel.com/
Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130160017.259902-5-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 30ecd7d7052e5..6f4e07effad23 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -114,12 +114,6 @@ struct pvr_vm_gpuva {
 	struct drm_gpuva base;
 };
 
-static __always_inline
-struct pvr_vm_gpuva *to_pvr_vm_gpuva(struct drm_gpuva *gpuva)
-{
-	return container_of(gpuva, struct pvr_vm_gpuva, base);
-}
-
 enum pvr_vm_bind_type {
 	PVR_VM_BIND_TYPE_MAP,
 	PVR_VM_BIND_TYPE_UNMAP,
-- 
GitLab


From 5f8dec200923a76dc57187965fd59c1136f5d085 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 01:55:52 +0300
Subject: [PATCH 0724/2340] drm/drv: propagate errors from
 drm_modeset_register_all()

In case the drm_modeset_register_all() function fails, its error code
will be ignored. Instead make the drm_dev_register() bail out in case of
such an error.

Fixes: 79190ea2658a ("drm: Add callbacks for late registering")
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231202225552.1283638-1-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_drv.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 535f16e7882e7..3c835c99daadd 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -949,8 +949,11 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 			goto err_minors;
 	}
 
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		drm_modeset_register_all(dev);
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_modeset_register_all(dev);
+		if (ret)
+			goto err_unload;
+	}
 
 	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
 		 driver->name, driver->major, driver->minor,
@@ -960,6 +963,9 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 
 	goto out_unlock;
 
+err_unload:
+	if (dev->driver->unload)
+		dev->driver->unload(dev);
 err_minors:
 	remove_compat_control_link(dev);
 	drm_minor_unregister(dev, DRM_MINOR_ACCEL);
-- 
GitLab


From b1dba0b13c0aa93d22f8ef8cb082a4f32e5ab1f6 Mon Sep 17 00:00:00 2001
From: heminhong <heminhong@kylinos.cn>
Date: Fri, 10 Nov 2023 13:50:31 +0800
Subject: [PATCH 0725/2340] drm/qxl: remove unused declaration

Some functions are never used by the driver,
removing the functions declaration, it can be reducing program size,
and improving code readability and maintainability.

Signed-off-by: heminhong <heminhong@kylinos.cn>
Link: https://lore.kernel.org/r/20231110055031.57360-1-heminhong@kylinos.cn
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/qxl/qxl_drv.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 307a890fde133..32069acd93f87 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -119,7 +119,6 @@ struct qxl_output {
 
 #define to_qxl_crtc(x) container_of(x, struct qxl_crtc, base)
 #define drm_connector_to_qxl_output(x) container_of(x, struct qxl_output, base)
-#define drm_encoder_to_qxl_output(x) container_of(x, struct qxl_output, enc)
 
 struct qxl_mman {
 	struct ttm_device		bdev;
@@ -256,8 +255,6 @@ struct qxl_device {
 
 #define to_qxl(dev) container_of(dev, struct qxl_device, ddev)
 
-int qxl_debugfs_fence_init(struct qxl_device *rdev);
-
 int qxl_device_init(struct qxl_device *qdev, struct pci_dev *pdev);
 void qxl_device_fini(struct qxl_device *qdev);
 
@@ -344,8 +341,6 @@ qxl_image_alloc_objects(struct qxl_device *qdev,
 			int height, int stride);
 void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage);
 
-void qxl_update_screen(struct qxl_device *qxl);
-
 /* qxl io operations (qxl_cmd.c) */
 
 void qxl_io_create_primary(struct qxl_device *qdev,
@@ -445,8 +440,6 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev,
 
 int qxl_bo_check_id(struct qxl_device *qdev, struct qxl_bo *bo);
 
-struct qxl_drv_surface *
-qxl_surface_lookup(struct drm_device *dev, int surface_id);
 void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freeing);
 
 /* qxl_ioctl.c */
-- 
GitLab


From 687eb09b1d76d01401dd9b22efb34931c3f1e21d Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 29 Nov 2023 19:35:06 +0200
Subject: [PATCH 0726/2340] drm/i915/syncmap: squelch a sparse warning

The code is fine, really, but tweak it to get rid of the sparse warning:

drivers/gpu/drm/i915/selftests/i915_syncmap.c:80:54: warning: dubious: x | !y

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129173506.1194437-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/selftests/i915_syncmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
index 47f4ae18a1ef0..88fa845e9f4a0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -77,7 +77,7 @@ __sync_print(struct i915_syncmap *p,
 		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
 			buf = __sync_print(__sync_child(p)[i], buf, sz,
 					   depth + 1,
-					   last << 1 | !!(p->bitmap >> (i + 1)),
+					   last << 1 | ((p->bitmap >> (i + 1)) ? 1 : 0),
 					   i);
 		}
 	}
-- 
GitLab


From 1116efbff3b106ec131e833f0e78f35c923d0104 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 29 Nov 2023 16:01:28 +0200
Subject: [PATCH 0727/2340] drm/i915/display: Don't use "proxy" headers

The driver uses math.h and not util_macros.h.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129140129.699767-1-andriy.shevchenko@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_snps_phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index ce5a73a4cc893..bc61e736f9b32 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -3,7 +3,7 @@
  * Copyright © 2019 Intel Corporation
  */
 
-#include <linux/util_macros.h>
+#include <linux/math.h>
 
 #include "i915_reg.h"
 #include "intel_ddi.h"
-- 
GitLab


From e4256751df4a0a3860f181588ee730dd19cb0c30 Mon Sep 17 00:00:00 2001
From: Khaled Almahallawy <khaled.almahallawy@intel.com>
Date: Thu, 30 Nov 2023 15:15:10 -0800
Subject: [PATCH 0728/2340] drm/display/dp: Add the remaining Square PHY
 patterns DPCD register definitions

DP2.1 Specs added new DPCDs definitions for square pattern configs[1]
These new definitions are used for UHBR Source Transmitter
Equalizations tests[2]. Add the 3 new values for square pattern.

v2: rebase

[1]: DP2.1 Specs - 2.12.3.6.5 Square Pattern
[2]: DP2.1 PHY CTS specs - 4.3 UHBR Source Transmitter Equalization

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Lee Shawn C <shawn.c.lee@intel.com>
Signed-off-by: Khaled Almahallawy <khaled.almahallawy@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130231510.221143-1-khaled.almahallawy@intel.com
---
 include/drm/display/drm_dp.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index 83d2039c018b4..3731828825bdf 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -651,6 +651,9 @@
 # define DP_LINK_QUAL_PATTERN_PRSBS31       0x38
 # define DP_LINK_QUAL_PATTERN_CUSTOM        0x40
 # define DP_LINK_QUAL_PATTERN_SQUARE        0x48
+# define DP_LINK_QUAL_PATTERN_SQUARE_PRESHOOT_DISABLED                   0x49
+# define DP_LINK_QUAL_PATTERN_SQUARE_DEEMPHASIS_DISABLED                 0x4a
+# define DP_LINK_QUAL_PATTERN_SQUARE_PRESHOOT_DEEMPHASIS_DISABLED        0x4b
 
 #define DP_TRAINING_LANE0_1_SET2	    0x10f
 #define DP_TRAINING_LANE2_3_SET2	    0x110
-- 
GitLab


From 0b82a2b70f890e8dd7a46dfbfcce00bd7e434762 Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Date: Wed, 15 Nov 2023 13:13:36 +0100
Subject: [PATCH 0729/2340] drm/bridge: lt8912b: Add suspend/resume support

Add support for suspend and resume. The lt8912b will power off when
going into suspend and power on when resuming.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115121338.22959-2-francesco@dolcini.it
---
 drivers/gpu/drm/bridge/lontium-lt8912b.c | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
index 03532efb893bb..097ab04234b73 100644
--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
+++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
@@ -634,6 +634,33 @@ static const struct drm_bridge_funcs lt8912_bridge_funcs = {
 	.get_edid = lt8912_bridge_get_edid,
 };
 
+static int lt8912_bridge_resume(struct device *dev)
+{
+	struct lt8912 *lt = dev_get_drvdata(dev);
+	int ret;
+
+	ret = lt8912_hard_power_on(lt);
+	if (ret)
+		return ret;
+
+	ret = lt8912_soft_power_on(lt);
+	if (ret)
+		return ret;
+
+	return lt8912_video_on(lt);
+}
+
+static int lt8912_bridge_suspend(struct device *dev)
+{
+	struct lt8912 *lt = dev_get_drvdata(dev);
+
+	lt8912_hard_power_off(lt);
+
+	return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(lt8912_bridge_pm_ops, lt8912_bridge_suspend, lt8912_bridge_resume);
+
 static int lt8912_parse_dt(struct lt8912 *lt)
 {
 	struct gpio_desc *gp_reset;
@@ -770,6 +797,7 @@ static struct i2c_driver lt8912_i2c_driver = {
 	.driver = {
 		.name = "lt8912",
 		.of_match_table = lt8912_dt_match,
+		.pm = pm_sleep_ptr(&lt8912_bridge_pm_ops),
 	},
 	.probe = lt8912_probe,
 	.remove = lt8912_remove,
-- 
GitLab


From f168c7f7d1a0cb12a4888af9f3f907139372f137 Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Date: Wed, 15 Nov 2023 13:13:37 +0100
Subject: [PATCH 0730/2340] dt-bindings: display: bridge: lt8912b: Add power
 supplies

Add Lontium lt8912b power supplies.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115121338.22959-3-francesco@dolcini.it
---
 .../display/bridge/lontium,lt8912b.yaml       | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml b/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml
index f201ae4af4fbd..2cef252157985 100644
--- a/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/lontium,lt8912b.yaml
@@ -55,6 +55,27 @@ properties:
       - port@0
       - port@1
 
+  vcchdmipll-supply:
+    description: A 1.8V supply that powers the HDMI PLL.
+
+  vcchdmitx-supply:
+    description: A 1.8V supply that powers the HDMI TX part.
+
+  vcclvdspll-supply:
+    description: A 1.8V supply that powers the LVDS PLL.
+
+  vcclvdstx-supply:
+    description: A 1.8V supply that powers the LVDS TX part.
+
+  vccmipirx-supply:
+    description: A 1.8V supply that powers the MIPI RX part.
+
+  vccsysclk-supply:
+    description: A 1.8V supply that powers the SYSCLK.
+
+  vdd-supply:
+    description: A 1.8V supply that powers the digital part.
+
 required:
   - compatible
   - reg
-- 
GitLab


From f6d8a80f1d10ff01cff3ac26e242165a270bbbad Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Date: Wed, 15 Nov 2023 13:13:38 +0100
Subject: [PATCH 0731/2340] drm/bridge: lt8912b: Add power supplies

Add supplies to the driver that can be used to turn the Lontium lt8912b
on and off. It can have up to 7 independent supplies, we add them all
and enable/disable them with bulk_enable/disable.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231115121338.22959-4-francesco@dolcini.it
---
 drivers/gpu/drm/bridge/lontium-lt8912b.c | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
index 097ab04234b73..273157428c827 100644
--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
+++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
@@ -43,6 +43,8 @@ struct lt8912 {
 
 	struct videomode mode;
 
+	struct regulator_bulk_data supplies[7];
+
 	u8 data_lanes;
 	bool is_power_on;
 };
@@ -257,6 +259,12 @@ static int lt8912_free_i2c(struct lt8912 *lt)
 
 static int lt8912_hard_power_on(struct lt8912 *lt)
 {
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(lt->supplies), lt->supplies);
+	if (ret)
+		return ret;
+
 	gpiod_set_value_cansleep(lt->gp_reset, 0);
 	msleep(20);
 
@@ -267,6 +275,9 @@ static void lt8912_hard_power_off(struct lt8912 *lt)
 {
 	gpiod_set_value_cansleep(lt->gp_reset, 1);
 	msleep(20);
+
+	regulator_bulk_disable(ARRAY_SIZE(lt->supplies), lt->supplies);
+
 	lt->is_power_on = false;
 }
 
@@ -661,6 +672,21 @@ static int lt8912_bridge_suspend(struct device *dev)
 
 static DEFINE_SIMPLE_DEV_PM_OPS(lt8912_bridge_pm_ops, lt8912_bridge_suspend, lt8912_bridge_resume);
 
+static int lt8912_get_regulators(struct lt8912 *lt)
+{
+	unsigned int i;
+	const char * const supply_names[] = {
+		"vdd", "vccmipirx", "vccsysclk", "vcclvdstx",
+		"vcchdmitx", "vcclvdspll", "vcchdmipll"
+	};
+
+	for (i = 0; i < ARRAY_SIZE(lt->supplies); i++)
+		lt->supplies[i].supply = supply_names[i];
+
+	return devm_regulator_bulk_get(lt->dev, ARRAY_SIZE(lt->supplies),
+				       lt->supplies);
+}
+
 static int lt8912_parse_dt(struct lt8912 *lt)
 {
 	struct gpio_desc *gp_reset;
@@ -712,6 +738,10 @@ static int lt8912_parse_dt(struct lt8912 *lt)
 		goto err_free_host_node;
 	}
 
+	ret = lt8912_get_regulators(lt);
+	if (ret)
+		goto err_free_host_node;
+
 	of_node_put(port_node);
 	return 0;
 
-- 
GitLab


From 914437992876838662c968cb416f832110fb1093 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Mon, 4 Dec 2023 15:29:00 +0300
Subject: [PATCH 0732/2340] drm/bridge: nxp-ptn3460: fix i2c_master_send()
 error checking

The i2c_master_send/recv() functions return negative error codes or the
number of bytes that were able to be sent/received.  This code has
two problems.  1)  Instead of checking if all the bytes were sent or
received, it checks that at least one byte was sent or received.
2) If there was a partial send/receive then we should return a negative
error code but this code returns success.

Fixes: a9fe713d7d45 ("drm/bridge: Add PTN3460 bridge driver")
Cc: stable@vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/0cdc2dce-ca89-451a-9774-1482ab2f4762@moroto.mountain
---
 drivers/gpu/drm/bridge/nxp-ptn3460.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index d81920227a8ae..9b7eb8c669c15 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -56,13 +56,13 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr,
 	ret = i2c_master_send(ptn_bridge->client, &addr, 1);
 	if (ret <= 0) {
 		DRM_ERROR("Failed to send i2c command, ret=%d\n", ret);
-		return ret;
+		return ret ?: -EIO;
 	}
 
 	ret = i2c_master_recv(ptn_bridge->client, buf, len);
-	if (ret <= 0) {
+	if (ret != len) {
 		DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret);
-		return ret;
+		return ret < 0 ? ret : -EIO;
 	}
 
 	return 0;
@@ -78,9 +78,9 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr,
 	buf[1] = val;
 
 	ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf));
-	if (ret <= 0) {
+	if (ret != ARRAY_SIZE(buf)) {
 		DRM_ERROR("Failed to send i2c command, ret=%d\n", ret);
-		return ret;
+		return ret < 0 ? ret : -EIO;
 	}
 
 	return 0;
-- 
GitLab


From aa041111311d35cb311543f43bc60b825b8cd109 Mon Sep 17 00:00:00 2001
From: Frank Binns <frank.binns@imgtec.com>
Date: Mon, 4 Dec 2023 13:28:47 +0000
Subject: [PATCH 0733/2340] MAINTAINERS: Document Imagination PowerVR driver
 patches go via drm-misc

This is the tree used by nearly all other DRM drivers, so use it for the
PowerVR driver as well.

Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204132847.1307340-1-frank.binns@imgtec.com
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0ba904b46efe2..d4b46b3db022f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10397,6 +10397,7 @@ M:	Frank Binns <frank.binns@imgtec.com>
 M:	Donald Robson <donald.robson@imgtec.com>
 M:	Matt Coster <matt.coster@imgtec.com>
 S:	Supported
+T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/gpu/img,powervr.yaml
 F:	Documentation/gpu/imagination/
 F:	drivers/gpu/drm/imagination/
-- 
GitLab


From 2a04739139b2b2761571e18937e2400e71eff664 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:43:28 +0300
Subject: [PATCH 0734/2340] drm/bridge: add transparent bridge helper

Define a helper for creating simple transparent bridges which serve the
only purpose of linking devices into the bridge chain up to the last
bridge representing the connector. This is especially useful for
DP/USB-C bridge chains, which can span across several devices, but do
not require any additional functionality from the intermediate bridges.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/bridge/Kconfig      |   9 ++
 drivers/gpu/drm/bridge/Makefile     |   1 +
 drivers/gpu/drm/bridge/aux-bridge.c | 140 ++++++++++++++++++++++++++++
 include/drm/bridge/aux-bridge.h     |  19 ++++
 4 files changed, 169 insertions(+)
 create mode 100644 drivers/gpu/drm/bridge/aux-bridge.c
 create mode 100644 include/drm/bridge/aux-bridge.h

diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index ba82a1142adf7..f12eab62799f2 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -12,6 +12,15 @@ config DRM_PANEL_BRIDGE
 	help
 	  DRM bridge wrapper of DRM panels
 
+config DRM_AUX_BRIDGE
+	tristate
+	depends on DRM_BRIDGE && OF
+	select AUXILIARY_BUS
+	select DRM_PANEL_BRIDGE
+	help
+	  Simple transparent bridge that is used by several non-DRM drivers to
+	  build bridges chain.
+
 menu "Display Interface Bridges"
 	depends on DRM && DRM_BRIDGE
 
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index 2b892b7ed59e8..918e3bfff079c 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DRM_AUX_BRIDGE) += aux-bridge.o
 obj-$(CONFIG_DRM_CHIPONE_ICN6211) += chipone-icn6211.o
 obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o
 obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o
diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c
new file mode 100644
index 0000000000000..49d7c2ab1ecc3
--- /dev/null
+++ b/drivers/gpu/drm/bridge/aux-bridge.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Author: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+ */
+#include <linux/auxiliary_bus.h>
+#include <linux/module.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/bridge/aux-bridge.h>
+
+static DEFINE_IDA(drm_aux_bridge_ida);
+
+static void drm_aux_bridge_release(struct device *dev)
+{
+	struct auxiliary_device *adev = to_auxiliary_dev(dev);
+
+	ida_free(&drm_aux_bridge_ida, adev->id);
+
+	kfree(adev);
+}
+
+static void drm_aux_bridge_unregister_adev(void *_adev)
+{
+	struct auxiliary_device *adev = _adev;
+
+	auxiliary_device_delete(adev);
+	auxiliary_device_uninit(adev);
+}
+
+/**
+ * drm_aux_bridge_register - Create a simple bridge device to link the chain
+ * @parent: device instance providing this bridge
+ *
+ * Creates a simple DRM bridge that doesn't implement any drm_bridge
+ * operations. Such bridges merely fill a place in the bridge chain linking
+ * surrounding DRM bridges.
+ *
+ * Return: zero on success, negative error code on failure
+ */
+int drm_aux_bridge_register(struct device *parent)
+{
+	struct auxiliary_device *adev;
+	int ret;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+
+	ret = ida_alloc(&drm_aux_bridge_ida, GFP_KERNEL);
+	if (ret < 0) {
+		kfree(adev);
+		return ret;
+	}
+
+	adev->id = ret;
+	adev->name = "aux_bridge";
+	adev->dev.parent = parent;
+	adev->dev.of_node = parent->of_node;
+	adev->dev.release = drm_aux_bridge_release;
+
+	ret = auxiliary_device_init(adev);
+	if (ret) {
+		ida_free(&drm_aux_bridge_ida, adev->id);
+		kfree(adev);
+		return ret;
+	}
+
+	ret = auxiliary_device_add(adev);
+	if (ret) {
+		auxiliary_device_uninit(adev);
+		return ret;
+	}
+
+	return devm_add_action_or_reset(parent, drm_aux_bridge_unregister_adev, adev);
+}
+EXPORT_SYMBOL_GPL(drm_aux_bridge_register);
+
+struct drm_aux_bridge_data {
+	struct drm_bridge bridge;
+	struct drm_bridge *next_bridge;
+	struct device *dev;
+};
+
+static int drm_aux_bridge_attach(struct drm_bridge *bridge,
+				 enum drm_bridge_attach_flags flags)
+{
+	struct drm_aux_bridge_data *data;
+
+	if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR))
+		return -EINVAL;
+
+	data = container_of(bridge, struct drm_aux_bridge_data, bridge);
+
+	return drm_bridge_attach(bridge->encoder, data->next_bridge, bridge,
+				 DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+}
+
+static const struct drm_bridge_funcs drm_aux_bridge_funcs = {
+	.attach	= drm_aux_bridge_attach,
+};
+
+static int drm_aux_bridge_probe(struct auxiliary_device *auxdev,
+				const struct auxiliary_device_id *id)
+{
+	struct drm_aux_bridge_data *data;
+
+	data = devm_kzalloc(&auxdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->dev = &auxdev->dev;
+	data->next_bridge = devm_drm_of_get_bridge(&auxdev->dev, auxdev->dev.of_node, 0, 0);
+	if (IS_ERR(data->next_bridge))
+		return dev_err_probe(&auxdev->dev, PTR_ERR(data->next_bridge),
+				     "failed to acquire drm_bridge\n");
+
+	data->bridge.funcs = &drm_aux_bridge_funcs;
+	data->bridge.of_node = data->dev->of_node;
+
+	return devm_drm_bridge_add(data->dev, &data->bridge);
+}
+
+static const struct auxiliary_device_id drm_aux_bridge_table[] = {
+	{ .name = KBUILD_MODNAME ".aux_bridge" },
+	{},
+};
+MODULE_DEVICE_TABLE(auxiliary, drm_aux_bridge_table);
+
+static struct auxiliary_driver drm_aux_bridge_drv = {
+	.name = "aux_bridge",
+	.id_table = drm_aux_bridge_table,
+	.probe = drm_aux_bridge_probe,
+};
+module_auxiliary_driver(drm_aux_bridge_drv);
+
+MODULE_AUTHOR("Dmitry Baryshkov <dmitry.baryshkov@linaro.org>");
+MODULE_DESCRIPTION("DRM transparent bridge");
+MODULE_LICENSE("GPL");
diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h
new file mode 100644
index 0000000000000..b3a9cc9c862fd
--- /dev/null
+++ b/include/drm/bridge/aux-bridge.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Author: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+ */
+#ifndef DRM_AUX_BRIDGE_H
+#define DRM_AUX_BRIDGE_H
+
+#if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE)
+int drm_aux_bridge_register(struct device *parent);
+#else
+static inline int drm_aux_bridge_register(struct device *parent)
+{
+	return 0;
+}
+#endif
+
+#endif
-- 
GitLab


From 35921910bbd0b6ab595cead16d0c8faadbf2fd94 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:43:29 +0300
Subject: [PATCH 0735/2340] phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE

Switch to using the new DRM_AUX_BRIDGE helper to create the
transparent DRM bridge device instead of handcoding corresponding
functionality.

Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-3-dmitry.baryshkov@linaro.org
---
 drivers/phy/qualcomm/Kconfig              |  2 +-
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 44 ++---------------------
 2 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index d891058b7c39a..846f8c99547fd 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -63,7 +63,7 @@ config PHY_QCOM_QMP_COMBO
 	depends on DRM || DRM=n
 	select GENERIC_PHY
 	select MFD_SYSCON
-	select DRM_PANEL_BRIDGE if DRM
+	select DRM_AUX_BRIDGE if DRM_BRIDGE
 	help
 	  Enable this to support the QMP Combo PHY transceiver that is used
 	  with USB3 and DisplayPort controllers on Qualcomm chips.
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 9c87845c78ec2..f6c727249104f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -21,7 +21,7 @@
 #include <linux/usb/typec.h>
 #include <linux/usb/typec_mux.h>
 
-#include <drm/drm_bridge.h>
+#include <drm/bridge/aux-bridge.h>
 
 #include <dt-bindings/phy/phy-qcom-qmp.h>
 
@@ -1419,8 +1419,6 @@ struct qmp_combo {
 	struct clk_hw dp_link_hw;
 	struct clk_hw dp_pixel_hw;
 
-	struct drm_bridge bridge;
-
 	struct typec_switch_dev *sw;
 	enum typec_orientation orientation;
 };
@@ -3191,44 +3189,6 @@ static int qmp_combo_typec_switch_register(struct qmp_combo *qmp)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_DRM)
-static int qmp_combo_bridge_attach(struct drm_bridge *bridge,
-				   enum drm_bridge_attach_flags flags)
-{
-	struct qmp_combo *qmp = container_of(bridge, struct qmp_combo, bridge);
-	struct drm_bridge *next_bridge;
-
-	if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR))
-		return -EINVAL;
-
-	next_bridge = devm_drm_of_get_bridge(qmp->dev, qmp->dev->of_node, 0, 0);
-	if (IS_ERR(next_bridge)) {
-		dev_err(qmp->dev, "failed to acquire drm_bridge: %pe\n", next_bridge);
-		return PTR_ERR(next_bridge);
-	}
-
-	return drm_bridge_attach(bridge->encoder, next_bridge, bridge,
-				 DRM_BRIDGE_ATTACH_NO_CONNECTOR);
-}
-
-static const struct drm_bridge_funcs qmp_combo_bridge_funcs = {
-	.attach	= qmp_combo_bridge_attach,
-};
-
-static int qmp_combo_dp_register_bridge(struct qmp_combo *qmp)
-{
-	qmp->bridge.funcs = &qmp_combo_bridge_funcs;
-	qmp->bridge.of_node = qmp->dev->of_node;
-
-	return devm_drm_bridge_add(qmp->dev, &qmp->bridge);
-}
-#else
-static int qmp_combo_dp_register_bridge(struct qmp_combo *qmp)
-{
-	return 0;
-}
-#endif
-
 static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct device *dev = qmp->dev;
@@ -3440,7 +3400,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	ret = qmp_combo_dp_register_bridge(qmp);
+	ret = drm_aux_bridge_register(dev);
 	if (ret)
 		return ret;
 
-- 
GitLab


From c5d296bad640b190c52ef7508114d70e971a4bba Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:43:30 +0300
Subject: [PATCH 0736/2340] usb: typec: nb7vpq904m: switch to DRM_AUX_BRIDGE

Switch to using the new DRM_AUX_BRIDGE helper to create the
transparent DRM bridge device instead of handcoding corresponding
functionality.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-4-dmitry.baryshkov@linaro.org
---
 drivers/usb/typec/mux/Kconfig      |  2 +-
 drivers/usb/typec/mux/nb7vpq904m.c | 44 ++----------------------------
 2 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/drivers/usb/typec/mux/Kconfig b/drivers/usb/typec/mux/Kconfig
index 816b9bd08355f..5120942f309d4 100644
--- a/drivers/usb/typec/mux/Kconfig
+++ b/drivers/usb/typec/mux/Kconfig
@@ -40,7 +40,7 @@ config TYPEC_MUX_NB7VPQ904M
 	tristate "On Semiconductor NB7VPQ904M Type-C redriver driver"
 	depends on I2C
 	depends on DRM || DRM=n
-	select DRM_PANEL_BRIDGE if DRM
+	select DRM_AUX_BRIDGE if DRM_BRIDGE
 	select REGMAP_I2C
 	help
 	  Say Y or M if your system has a On Semiconductor NB7VPQ904M Type-C
diff --git a/drivers/usb/typec/mux/nb7vpq904m.c b/drivers/usb/typec/mux/nb7vpq904m.c
index cda206cf0c387..b17826713753a 100644
--- a/drivers/usb/typec/mux/nb7vpq904m.c
+++ b/drivers/usb/typec/mux/nb7vpq904m.c
@@ -11,7 +11,7 @@
 #include <linux/regmap.h>
 #include <linux/bitfield.h>
 #include <linux/of_graph.h>
-#include <drm/drm_bridge.h>
+#include <drm/bridge/aux-bridge.h>
 #include <linux/usb/typec_dp.h>
 #include <linux/usb/typec_mux.h>
 #include <linux/usb/typec_retimer.h>
@@ -70,8 +70,6 @@ struct nb7vpq904m {
 	bool swap_data_lanes;
 	struct typec_switch *typec_switch;
 
-	struct drm_bridge bridge;
-
 	struct mutex lock; /* protect non-concurrent retimer & switch */
 
 	enum typec_orientation orientation;
@@ -297,44 +295,6 @@ static int nb7vpq904m_retimer_set(struct typec_retimer *retimer, struct typec_re
 	return ret;
 }
 
-#if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_DRM_PANEL_BRIDGE)
-static int nb7vpq904m_bridge_attach(struct drm_bridge *bridge,
-				    enum drm_bridge_attach_flags flags)
-{
-	struct nb7vpq904m *nb7 = container_of(bridge, struct nb7vpq904m, bridge);
-	struct drm_bridge *next_bridge;
-
-	if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR))
-		return -EINVAL;
-
-	next_bridge = devm_drm_of_get_bridge(&nb7->client->dev, nb7->client->dev.of_node, 0, 0);
-	if (IS_ERR(next_bridge)) {
-		dev_err(&nb7->client->dev, "failed to acquire drm_bridge: %pe\n", next_bridge);
-		return PTR_ERR(next_bridge);
-	}
-
-	return drm_bridge_attach(bridge->encoder, next_bridge, bridge,
-				 DRM_BRIDGE_ATTACH_NO_CONNECTOR);
-}
-
-static const struct drm_bridge_funcs nb7vpq904m_bridge_funcs = {
-	.attach	= nb7vpq904m_bridge_attach,
-};
-
-static int nb7vpq904m_register_bridge(struct nb7vpq904m *nb7)
-{
-	nb7->bridge.funcs = &nb7vpq904m_bridge_funcs;
-	nb7->bridge.of_node = nb7->client->dev.of_node;
-
-	return devm_drm_bridge_add(&nb7->client->dev, &nb7->bridge);
-}
-#else
-static int nb7vpq904m_register_bridge(struct nb7vpq904m *nb7)
-{
-	return 0;
-}
-#endif
-
 static const struct regmap_config nb7_regmap = {
 	.max_register = 0x1f,
 	.reg_bits = 8,
@@ -461,7 +421,7 @@ static int nb7vpq904m_probe(struct i2c_client *client)
 
 	gpiod_set_value(nb7->enable_gpio, 1);
 
-	ret = nb7vpq904m_register_bridge(nb7);
+	ret = drm_aux_bridge_register(dev);
 	if (ret)
 		goto err_disable_gpio;
 
-- 
GitLab


From e560518a6c2e60f1566473c146fddcff3281f617 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:43:31 +0300
Subject: [PATCH 0737/2340] drm/bridge: implement generic DP HPD bridge

Several USB-C controllers implement a pretty simple DRM bridge which
implements just the HPD notification operations. Add special helper
for creating such simple bridges.

Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-5-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/bridge/Kconfig          |   8 ++
 drivers/gpu/drm/bridge/Makefile         |   1 +
 drivers/gpu/drm/bridge/aux-hpd-bridge.c | 163 ++++++++++++++++++++++++
 include/drm/bridge/aux-bridge.h         |  18 +++
 4 files changed, 190 insertions(+)
 create mode 100644 drivers/gpu/drm/bridge/aux-hpd-bridge.c

diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index f12eab62799f2..19d2dc05c3970 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -21,6 +21,14 @@ config DRM_AUX_BRIDGE
 	  Simple transparent bridge that is used by several non-DRM drivers to
 	  build bridges chain.
 
+config DRM_AUX_HPD_BRIDGE
+	tristate
+	depends on DRM_BRIDGE && OF
+	select AUXILIARY_BUS
+	help
+	  Simple bridge that terminates the bridge chain and provides HPD
+	  support.
+
 menu "Display Interface Bridges"
 	depends on DRM && DRM_BRIDGE
 
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index 918e3bfff079c..017b5832733b2 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DRM_AUX_BRIDGE) += aux-bridge.o
+obj-$(CONFIG_DRM_AUX_HPD_BRIDGE) += aux-hpd-bridge.o
 obj-$(CONFIG_DRM_CHIPONE_ICN6211) += chipone-icn6211.o
 obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o
 obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
new file mode 100644
index 0000000000000..5d2ab3a715f9c
--- /dev/null
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Author: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+ */
+#include <linux/auxiliary_bus.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/bridge/aux-bridge.h>
+
+static DEFINE_IDA(drm_aux_hpd_bridge_ida);
+
+struct drm_aux_hpd_bridge_data {
+	struct drm_bridge bridge;
+	struct device *dev;
+};
+
+static void drm_aux_hpd_bridge_release(struct device *dev)
+{
+	struct auxiliary_device *adev = to_auxiliary_dev(dev);
+
+	ida_free(&drm_aux_hpd_bridge_ida, adev->id);
+
+	of_node_put(adev->dev.platform_data);
+
+	kfree(adev);
+}
+
+static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
+{
+	struct auxiliary_device *adev = _adev;
+
+	auxiliary_device_delete(adev);
+	auxiliary_device_uninit(adev);
+}
+
+/**
+ * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge
+ * @parent: device instance providing this bridge
+ * @np: device node pointer corresponding to this bridge instance
+ *
+ * Creates a simple DRM bridge with the type set to
+ * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is
+ * able to send the HPD events.
+ *
+ * Return: device instance that will handle created bridge or an error code
+ * encoded into the pointer.
+ */
+struct device *drm_dp_hpd_bridge_register(struct device *parent,
+					  struct device_node *np)
+{
+	struct auxiliary_device *adev;
+	int ret;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return ERR_PTR(-ENOMEM);
+
+	ret = ida_alloc(&drm_aux_hpd_bridge_ida, GFP_KERNEL);
+	if (ret < 0) {
+		kfree(adev);
+		return ERR_PTR(ret);
+	}
+
+	adev->id = ret;
+	adev->name = "dp_hpd_bridge";
+	adev->dev.parent = parent;
+	adev->dev.of_node = parent->of_node;
+	adev->dev.release = drm_aux_hpd_bridge_release;
+	adev->dev.platform_data = np;
+
+	ret = auxiliary_device_init(adev);
+	if (ret) {
+		ida_free(&drm_aux_hpd_bridge_ida, adev->id);
+		kfree(adev);
+		return ERR_PTR(ret);
+	}
+
+	ret = auxiliary_device_add(adev);
+	if (ret) {
+		auxiliary_device_uninit(adev);
+		return ERR_PTR(ret);
+	}
+
+	ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &adev->dev;
+}
+EXPORT_SYMBOL_GPL(drm_dp_hpd_bridge_register);
+
+/**
+ * drm_aux_hpd_bridge_notify - notify hot plug detection events
+ * @dev: device created for the HPD bridge
+ * @status: output connection status
+ *
+ * A wrapper around drm_bridge_hpd_notify() that is used to report hot plug
+ * detection events for bridges created via drm_dp_hpd_bridge_register().
+ *
+ * This function shall be called in a context that can sleep.
+ */
+void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status)
+{
+	struct auxiliary_device *adev = to_auxiliary_dev(dev);
+	struct drm_aux_hpd_bridge_data *data = auxiliary_get_drvdata(adev);
+
+	if (!data)
+		return;
+
+	drm_bridge_hpd_notify(&data->bridge, status);
+}
+EXPORT_SYMBOL_GPL(drm_aux_hpd_bridge_notify);
+
+static int drm_aux_hpd_bridge_attach(struct drm_bridge *bridge,
+				     enum drm_bridge_attach_flags flags)
+{
+	return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL;
+}
+
+static const struct drm_bridge_funcs drm_aux_hpd_bridge_funcs = {
+	.attach	= drm_aux_hpd_bridge_attach,
+};
+
+static int drm_aux_hpd_bridge_probe(struct auxiliary_device *auxdev,
+				    const struct auxiliary_device_id *id)
+{
+	struct drm_aux_hpd_bridge_data *data;
+
+	data = devm_kzalloc(&auxdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->dev = &auxdev->dev;
+	data->bridge.funcs = &drm_aux_hpd_bridge_funcs;
+	data->bridge.of_node = dev_get_platdata(data->dev);
+	data->bridge.ops = DRM_BRIDGE_OP_HPD;
+	data->bridge.type = id->driver_data;
+
+	auxiliary_set_drvdata(auxdev, data);
+
+	return devm_drm_bridge_add(data->dev, &data->bridge);
+}
+
+static const struct auxiliary_device_id drm_aux_hpd_bridge_table[] = {
+	{ .name = KBUILD_MODNAME ".dp_hpd_bridge", .driver_data = DRM_MODE_CONNECTOR_DisplayPort, },
+	{},
+};
+MODULE_DEVICE_TABLE(auxiliary, drm_aux_hpd_bridge_table);
+
+static struct auxiliary_driver drm_aux_hpd_bridge_drv = {
+	.name = "aux_hpd_bridge",
+	.id_table = drm_aux_hpd_bridge_table,
+	.probe = drm_aux_hpd_bridge_probe,
+};
+module_auxiliary_driver(drm_aux_hpd_bridge_drv);
+
+MODULE_AUTHOR("Dmitry Baryshkov <dmitry.baryshkov@linaro.org>");
+MODULE_DESCRIPTION("DRM HPD bridge");
+MODULE_LICENSE("GPL");
diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h
index b3a9cc9c862fd..66249ff0858e7 100644
--- a/include/drm/bridge/aux-bridge.h
+++ b/include/drm/bridge/aux-bridge.h
@@ -7,6 +7,8 @@
 #ifndef DRM_AUX_BRIDGE_H
 #define DRM_AUX_BRIDGE_H
 
+#include <drm/drm_connector.h>
+
 #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE)
 int drm_aux_bridge_register(struct device *parent);
 #else
@@ -16,4 +18,20 @@ static inline int drm_aux_bridge_register(struct device *parent)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE)
+struct device *drm_dp_hpd_bridge_register(struct device *parent,
+					  struct device_node *np);
+void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status);
+#else
+static inline struct device *drm_dp_hpd_bridge_register(struct device *parent,
+							struct device_node *np)
+{
+	return 0;
+}
+
+static inline void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status)
+{
+}
+#endif
+
 #endif
-- 
GitLab


From 2bcca96abfbf89d26fc10fc92e40532bb2ae8891 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:43:32 +0300
Subject: [PATCH 0738/2340] soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE

Use the freshly defined DRM_AUX_HPD_BRIDGE instead of open-coding the
same functionality for the DRM bridge chain termination.

Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Acked-by: Bjorn Andersson <andersson@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-6-dmitry.baryshkov@linaro.org
---
 drivers/soc/qcom/Kconfig              |  1 +
 drivers/soc/qcom/pmic_glink_altmode.c | 33 ++++++++-------------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index b3634e10f6f5e..c954001ae79ec 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -86,6 +86,7 @@ config QCOM_PMIC_GLINK
 	depends on OF
 	select AUXILIARY_BUS
 	select QCOM_PDR_HELPERS
+	select DRM_AUX_HPD_BRIDGE
 	help
 	  The Qualcomm PMIC GLINK driver provides access, over GLINK, to the
 	  USB and battery firmware running on one of the coprocessors in
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index b78279e2f54cf..053b7393e26a8 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -11,7 +11,7 @@
 #include <linux/mutex.h>
 #include <linux/property.h>
 #include <linux/soc/qcom/pdr.h>
-#include <drm/drm_bridge.h>
+#include <drm/bridge/aux-bridge.h>
 
 #include <linux/usb/typec_altmode.h>
 #include <linux/usb/typec_dp.h>
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
 
 	struct work_struct work;
 
-	struct drm_bridge bridge;
+	struct device *bridge;
 
 	enum typec_orientation orientation;
 	u16 svid;
@@ -230,10 +230,10 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
 	else
 		pmic_glink_altmode_enable_usb(altmode, alt_port);
 
-	if (alt_port->hpd_state)
-		drm_bridge_hpd_notify(&alt_port->bridge, connector_status_connected);
-	else
-		drm_bridge_hpd_notify(&alt_port->bridge, connector_status_disconnected);
+	drm_aux_hpd_bridge_notify(alt_port->bridge,
+				  alt_port->hpd_state ?
+				  connector_status_connected :
+				  connector_status_disconnected);
 
 	pmic_glink_altmode_request(altmode, ALTMODE_PAN_ACK, alt_port->index);
 };
@@ -365,16 +365,6 @@ static void pmic_glink_altmode_callback(const void *data, size_t len, void *priv
 	}
 }
 
-static int pmic_glink_altmode_attach(struct drm_bridge *bridge,
-				     enum drm_bridge_attach_flags flags)
-{
-	return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL;
-}
-
-static const struct drm_bridge_funcs pmic_glink_altmode_bridge_funcs = {
-	.attach = pmic_glink_altmode_attach,
-};
-
 static void pmic_glink_altmode_put_retimer(void *data)
 {
 	typec_retimer_put(data);
@@ -464,15 +454,10 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
 		alt_port->index = port;
 		INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
 
-		alt_port->bridge.funcs = &pmic_glink_altmode_bridge_funcs;
-		alt_port->bridge.of_node = to_of_node(fwnode);
-		alt_port->bridge.ops = DRM_BRIDGE_OP_HPD;
-		alt_port->bridge.type = DRM_MODE_CONNECTOR_DisplayPort;
-
-		ret = devm_drm_bridge_add(dev, &alt_port->bridge);
-		if (ret) {
+		alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+		if (IS_ERR(alt_port->bridge)) {
 			fwnode_handle_put(fwnode);
-			return ret;
+			return PTR_ERR(alt_port->bridge);
 		}
 
 		alt_port->dp_alt.svid = USB_TYPEC_DP_SID;
-- 
GitLab


From 7d9f1b72b29698e3030c2b163522cf4aa91b47e9 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:43:33 +0300
Subject: [PATCH 0739/2340] usb: typec: qcom-pmic-typec: switch to
 DRM_AUX_HPD_BRIDGE

Use the freshly defined DRM_AUX_HPD_BRIDGE instead of open-coding the
same functionality for the DRM bridge chain termination.

Acked-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-7-dmitry.baryshkov@linaro.org
---
 drivers/usb/typec/tcpm/Kconfig                |  1 +
 drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c | 41 +++----------------
 2 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig
index 0b2993fef564b..64d5421c69e69 100644
--- a/drivers/usb/typec/tcpm/Kconfig
+++ b/drivers/usb/typec/tcpm/Kconfig
@@ -80,6 +80,7 @@ config TYPEC_QCOM_PMIC
 	tristate "Qualcomm PMIC USB Type-C Port Controller Manager driver"
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on DRM || DRM=n
+	select DRM_AUX_HPD_BRIDGE if DRM_BRIDGE
 	help
 	  A Type-C port and Power Delivery driver which aggregates two
 	  discrete pieces of silicon in the PM8150b PMIC block: the
diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
index 581199d37b49d..1a2b4bddaa97e 100644
--- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
+++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
@@ -18,7 +18,7 @@
 #include <linux/usb/tcpm.h>
 #include <linux/usb/typec_mux.h>
 
-#include <drm/drm_bridge.h>
+#include <drm/bridge/aux-bridge.h>
 
 #include "qcom_pmic_typec_pdphy.h"
 #include "qcom_pmic_typec_port.h"
@@ -36,7 +36,6 @@ struct pmic_typec {
 	struct pmic_typec_port	*pmic_typec_port;
 	bool			vbus_enabled;
 	struct mutex		lock;		/* VBUS state serialization */
-	struct drm_bridge	bridge;
 };
 
 #define tcpc_to_tcpm(_tcpc_) container_of(_tcpc_, struct pmic_typec, tcpc)
@@ -150,35 +149,6 @@ static int qcom_pmic_typec_init(struct tcpc_dev *tcpc)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DRM)
-static int qcom_pmic_typec_attach(struct drm_bridge *bridge,
-				     enum drm_bridge_attach_flags flags)
-{
-	return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL;
-}
-
-static const struct drm_bridge_funcs qcom_pmic_typec_bridge_funcs = {
-	.attach = qcom_pmic_typec_attach,
-};
-
-static int qcom_pmic_typec_init_drm(struct pmic_typec *tcpm)
-{
-	tcpm->bridge.funcs = &qcom_pmic_typec_bridge_funcs;
-#ifdef CONFIG_OF
-	tcpm->bridge.of_node = of_get_child_by_name(tcpm->dev->of_node, "connector");
-#endif
-	tcpm->bridge.ops = DRM_BRIDGE_OP_HPD;
-	tcpm->bridge.type = DRM_MODE_CONNECTOR_DisplayPort;
-
-	return devm_drm_bridge_add(tcpm->dev, &tcpm->bridge);
-}
-#else
-static int qcom_pmic_typec_init_drm(struct pmic_typec *tcpm)
-{
-	return 0;
-}
-#endif
-
 static int qcom_pmic_typec_probe(struct platform_device *pdev)
 {
 	struct pmic_typec *tcpm;
@@ -186,6 +156,7 @@ static int qcom_pmic_typec_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	const struct pmic_typec_resources *res;
 	struct regmap *regmap;
+	struct device *bridge_dev;
 	u32 base[2];
 	int ret;
 
@@ -241,14 +212,14 @@ static int qcom_pmic_typec_probe(struct platform_device *pdev)
 	mutex_init(&tcpm->lock);
 	platform_set_drvdata(pdev, tcpm);
 
-	ret = qcom_pmic_typec_init_drm(tcpm);
-	if (ret)
-		return ret;
-
 	tcpm->tcpc.fwnode = device_get_named_child_node(tcpm->dev, "connector");
 	if (!tcpm->tcpc.fwnode)
 		return -EINVAL;
 
+	bridge_dev = drm_dp_hpd_bridge_register(tcpm->dev, to_of_node(tcpm->tcpc.fwnode));
+	if (IS_ERR(bridge_dev))
+		return PTR_ERR(bridge_dev);
+
 	tcpm->tcpm_port = tcpm_register_port(tcpm->dev, &tcpm->tcpc);
 	if (IS_ERR(tcpm->tcpm_port)) {
 		ret = PTR_ERR(tcpm->tcpm_port);
-- 
GitLab


From caf525ed45b4960b450cbd4e811d9b247bc2586c Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:53:13 +0300
Subject: [PATCH 0740/2340] drm/encoder: register per-encoder debugfs dir

Each of connectors and CRTCs used by the DRM device provides debugfs
directory, which is used by several standard debugfs files and can
further be extended by the driver. Add such generic debugfs directories
for encoder.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203115315.1306124-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_debugfs.c  | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/drm_encoder.c  |  4 ++++
 drivers/gpu/drm/drm_internal.h | 10 ++++++++++
 include/drm/drm_encoder.h      | 16 +++++++++++++++-
 4 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index f291fb4b359fd..00406b4f3235c 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -589,4 +589,29 @@ void drm_debugfs_crtc_remove(struct drm_crtc *crtc)
 	crtc->debugfs_entry = NULL;
 }
 
+void drm_debugfs_encoder_add(struct drm_encoder *encoder)
+{
+	struct drm_minor *minor = encoder->dev->primary;
+	struct dentry *root;
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "encoder-%d", encoder->index);
+	if (!name)
+		return;
+
+	root = debugfs_create_dir(name, minor->debugfs_root);
+	kfree(name);
+
+	encoder->debugfs_entry = root;
+
+	if (encoder->funcs->debugfs_init)
+		encoder->funcs->debugfs_init(encoder, root);
+}
+
+void drm_debugfs_encoder_remove(struct drm_encoder *encoder)
+{
+	debugfs_remove_recursive(encoder->debugfs_entry);
+	encoder->debugfs_entry = NULL;
+}
+
 #endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/drm/drm_encoder.c b/drivers/gpu/drm/drm_encoder.c
index 1143bc7f32522..8f2bc6a284822 100644
--- a/drivers/gpu/drm/drm_encoder.c
+++ b/drivers/gpu/drm/drm_encoder.c
@@ -30,6 +30,7 @@
 #include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
 
 /**
  * DOC: overview
@@ -74,6 +75,8 @@ int drm_encoder_register_all(struct drm_device *dev)
 	int ret = 0;
 
 	drm_for_each_encoder(encoder, dev) {
+		drm_debugfs_encoder_add(encoder);
+
 		if (encoder->funcs && encoder->funcs->late_register)
 			ret = encoder->funcs->late_register(encoder);
 		if (ret)
@@ -90,6 +93,7 @@ void drm_encoder_unregister_all(struct drm_device *dev)
 	drm_for_each_encoder(encoder, dev) {
 		if (encoder->funcs && encoder->funcs->early_unregister)
 			encoder->funcs->early_unregister(encoder);
+		drm_debugfs_encoder_remove(encoder);
 	}
 }
 
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index b12c463bc4605..b7a311efa2b18 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -194,6 +194,8 @@ void drm_debugfs_connector_remove(struct drm_connector *connector);
 void drm_debugfs_crtc_add(struct drm_crtc *crtc);
 void drm_debugfs_crtc_remove(struct drm_crtc *crtc);
 void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc);
+void drm_debugfs_encoder_add(struct drm_encoder *encoder);
+void drm_debugfs_encoder_remove(struct drm_encoder *encoder);
 #else
 static inline void drm_debugfs_dev_fini(struct drm_device *dev)
 {
@@ -231,6 +233,14 @@ static inline void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc)
 {
 }
 
+static inline void drm_debugfs_encoder_add(struct drm_encoder *encoder)
+{
+}
+
+static inline void drm_debugfs_encoder_remove(struct drm_encoder *encoder)
+{
+}
+
 #endif
 
 drm_ioctl_t drm_version;
diff --git a/include/drm/drm_encoder.h b/include/drm/drm_encoder.h
index 3a09682af685a..977a9381c8ba9 100644
--- a/include/drm/drm_encoder.h
+++ b/include/drm/drm_encoder.h
@@ -60,7 +60,7 @@ struct drm_encoder_funcs {
 	 * @late_register:
 	 *
 	 * This optional hook can be used to register additional userspace
-	 * interfaces attached to the encoder like debugfs interfaces.
+	 * interfaces attached to the encoder.
 	 * It is called late in the driver load sequence from drm_dev_register().
 	 * Everything added from this callback should be unregistered in
 	 * the early_unregister callback.
@@ -81,6 +81,13 @@ struct drm_encoder_funcs {
 	 * before data structures are torndown.
 	 */
 	void (*early_unregister)(struct drm_encoder *encoder);
+
+	/**
+	 * @debugfs_init:
+	 *
+	 * Allows encoders to create encoder-specific debugfs files.
+	 */
+	void (*debugfs_init)(struct drm_encoder *encoder, struct dentry *root);
 };
 
 /**
@@ -184,6 +191,13 @@ struct drm_encoder {
 
 	const struct drm_encoder_funcs *funcs;
 	const struct drm_encoder_helper_funcs *helper_private;
+
+	/**
+	 * @debugfs_entry:
+	 *
+	 * Debugfs directory for this CRTC.
+	 */
+	struct dentry *debugfs_entry;
 };
 
 #define obj_to_encoder(x) container_of(x, struct drm_encoder, base)
-- 
GitLab


From d0b3c318e04cc6c4e2a3c30ee0f6f619aa8d0db5 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:53:14 +0300
Subject: [PATCH 0741/2340] drm/bridge: migrate bridge_chains to per-encoder
 file

Instead of having a single file with all bridge chains, list bridges
under a corresponding per-encoder debugfs directory.

While we are at it, also slightly improve the formatting of the bridge
data: split a single line entry into multiple lines, include the symbol
name of the bridge funcs and add the textual representation of the
bridge ops.

Example of the listing:

$ cat /sys/kernel/debug/dri/0/encoder-0/bridges
bridge[0]: dsi_mgr_bridge_funcs
	type: [0] Unknown
	ops: [0]
bridge[1]: lt9611uxc_bridge_funcs
	type: [11] HDMI-A
	OF: /soc@0/geniqup@9c0000/i2c@994000/hdmi-bridge@2b:lontium,lt9611uxc
	ops: [7] detect edid hpd

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203115315.1306124-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_bridge.c  | 44 -----------------------------------
 drivers/gpu/drm/drm_debugfs.c | 40 ++++++++++++++++++++++++++++---
 include/drm/drm_bridge.h      |  2 --
 3 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index 30d66bee0ec6a..cee3188adf3d5 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -1347,50 +1347,6 @@ struct drm_bridge *of_drm_find_bridge(struct device_node *np)
 EXPORT_SYMBOL(of_drm_find_bridge);
 #endif
 
-#ifdef CONFIG_DEBUG_FS
-static int drm_bridge_chains_info(struct seq_file *m, void *data)
-{
-	struct drm_debugfs_entry *entry = m->private;
-	struct drm_device *dev = entry->dev;
-	struct drm_printer p = drm_seq_file_printer(m);
-	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_encoder *encoder;
-	unsigned int bridge_idx = 0;
-
-	list_for_each_entry(encoder, &config->encoder_list, head) {
-		struct drm_bridge *bridge;
-
-		drm_printf(&p, "encoder[%u]\n", encoder->base.id);
-
-		drm_for_each_bridge_in_chain(encoder, bridge) {
-			drm_printf(&p, "\tbridge[%u] type: %u, ops: %#x",
-				   bridge_idx, bridge->type, bridge->ops);
-
-#ifdef CONFIG_OF
-			if (bridge->of_node)
-				drm_printf(&p, ", OF: %pOFfc", bridge->of_node);
-#endif
-
-			drm_printf(&p, "\n");
-
-			bridge_idx++;
-		}
-	}
-
-	return 0;
-}
-
-static const struct drm_debugfs_info drm_bridge_debugfs_list[] = {
-	{ "bridge_chains", drm_bridge_chains_info, 0 },
-};
-
-void drm_bridge_debugfs_init(struct drm_device *dev)
-{
-	drm_debugfs_add_files(dev, drm_bridge_debugfs_list,
-			      ARRAY_SIZE(drm_bridge_debugfs_list));
-}
-#endif
-
 MODULE_AUTHOR("Ajay Kumar <ajaykumar.rs@samsung.com>");
 MODULE_DESCRIPTION("DRM bridge infrastructure");
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 00406b4f3235c..02e7481758c0a 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -314,10 +314,8 @@ void drm_debugfs_dev_register(struct drm_device *dev)
 		drm_framebuffer_debugfs_init(dev);
 		drm_client_debugfs_init(dev);
 	}
-	if (drm_drv_uses_atomic_modeset(dev)) {
+	if (drm_drv_uses_atomic_modeset(dev))
 		drm_atomic_debugfs_init(dev);
-		drm_bridge_debugfs_init(dev);
-	}
 }
 
 int drm_debugfs_register(struct drm_minor *minor, int minor_id,
@@ -589,6 +587,38 @@ void drm_debugfs_crtc_remove(struct drm_crtc *crtc)
 	crtc->debugfs_entry = NULL;
 }
 
+static int bridges_show(struct seq_file *m, void *data)
+{
+	struct drm_encoder *encoder = m->private;
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct drm_bridge *bridge;
+	unsigned int idx = 0;
+
+	drm_for_each_bridge_in_chain(encoder, bridge) {
+		drm_printf(&p, "bridge[%d]: %ps\n", idx++, bridge->funcs);
+		drm_printf(&p, "\ttype: [%d] %s\n",
+			   bridge->type,
+			   drm_get_connector_type_name(bridge->type));
+#ifdef CONFIG_OF
+		if (bridge->of_node)
+			drm_printf(&p, "\tOF: %pOFfc\n", bridge->of_node);
+#endif
+		drm_printf(&p, "\tops: [0x%x]", bridge->ops);
+		if (bridge->ops & DRM_BRIDGE_OP_DETECT)
+			drm_puts(&p, " detect");
+		if (bridge->ops & DRM_BRIDGE_OP_EDID)
+			drm_puts(&p, " edid");
+		if (bridge->ops & DRM_BRIDGE_OP_HPD)
+			drm_puts(&p, " hpd");
+		if (bridge->ops & DRM_BRIDGE_OP_MODES)
+			drm_puts(&p, " modes");
+		drm_puts(&p, "\n");
+	}
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(bridges);
+
 void drm_debugfs_encoder_add(struct drm_encoder *encoder)
 {
 	struct drm_minor *minor = encoder->dev->primary;
@@ -604,6 +634,10 @@ void drm_debugfs_encoder_add(struct drm_encoder *encoder)
 
 	encoder->debugfs_entry = root;
 
+	/* bridges list */
+	debugfs_create_file("bridges", 0444, root, encoder,
+			    &bridges_fops);
+
 	if (encoder->funcs->debugfs_init)
 		encoder->funcs->debugfs_init(encoder, root);
 }
diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 9ef461aa9b9e2..e39da5807ba71 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -950,6 +950,4 @@ static inline struct drm_bridge *drmm_of_get_bridge(struct drm_device *drm,
 }
 #endif
 
-void drm_bridge_debugfs_init(struct drm_device *dev);
-
 #endif
-- 
GitLab


From 4de77156a2acdec0014fa89fc1766a7410d726ff Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 1 Dec 2023 17:11:30 +0100
Subject: [PATCH 0742/2340] drm/i915/dsi: Use devm_gpiod_get() for all GPIOs

soc_gpio_set_value() already uses devm_gpiod_get(), lets be consistent
and use devm_gpiod_get() for all GPIOs.

This allows removing the intel_dsi_vbt_gpio_cleanup() function,
which only function was to put the GPIO-descriptors.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201161130.23976-1-hdegoede@redhat.com
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 17 ++---------------
 drivers/gpu/drm/i915/display/intel_dsi_vbt.h |  1 -
 drivers/gpu/drm/i915/display/vlv_dsi.c       | 10 +---------
 3 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 275d0218394c1..a5d7fc8418c97 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -922,7 +922,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 		gpiod_add_lookup_table(gpiod_lookup_table);
 
 	if (want_panel_gpio) {
-		intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags);
+		intel_dsi->gpio_panel = devm_gpiod_get(dev->dev, "panel", flags);
 		if (IS_ERR(intel_dsi->gpio_panel)) {
 			drm_err(&dev_priv->drm,
 				"Failed to own gpio for panel control\n");
@@ -932,7 +932,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 
 	if (want_backlight_gpio) {
 		intel_dsi->gpio_backlight =
-			gpiod_get(dev->dev, "backlight", flags);
+			devm_gpiod_get(dev->dev, "backlight", flags);
 		if (IS_ERR(intel_dsi->gpio_backlight)) {
 			drm_err(&dev_priv->drm,
 				"Failed to own gpio for backlight control\n");
@@ -943,16 +943,3 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 	if (gpiod_lookup_table)
 		gpiod_remove_lookup_table(gpiod_lookup_table);
 }
-
-void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
-{
-	if (intel_dsi->gpio_panel) {
-		gpiod_put(intel_dsi->gpio_panel);
-		intel_dsi->gpio_panel = NULL;
-	}
-
-	if (intel_dsi->gpio_backlight) {
-		gpiod_put(intel_dsi->gpio_backlight);
-		intel_dsi->gpio_backlight = NULL;
-	}
-}
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
index 468d873fab1ae..3462fcc760e64 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
@@ -13,7 +13,6 @@ struct intel_dsi;
 
 bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id);
 void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on);
-void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi);
 void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 				 enum mipi_seq seq_id);
 void intel_dsi_log_params(struct intel_dsi *intel_dsi);
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 5bda97c96810b..9b33b8a74d643 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1532,16 +1532,8 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder)
 	}
 }
 
-static void intel_dsi_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
-
-	intel_dsi_vbt_gpio_cleanup(intel_dsi);
-	intel_encoder_destroy(encoder);
-}
-
 static const struct drm_encoder_funcs intel_dsi_funcs = {
-	.destroy = intel_dsi_encoder_destroy,
+	.destroy = intel_encoder_destroy,
 };
 
 static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector,
-- 
GitLab


From 1c0a80f160965c88f16e73ff69015db2f044c486 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:48 +0200
Subject: [PATCH 0743/2340] Revert "drm/atomic: Loosen FB atomic checks"

This reverts commit f1e75da5364e780905d9cd6043f9c74cdcf84073.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic.c        | 21 ++++++++--------
 drivers/gpu/drm/drm_atomic_helper.c | 39 +++++++++++++----------------
 include/drm/drm_atomic_helper.h     |  4 +--
 include/drm/drm_plane.h             | 29 ---------------------
 4 files changed, 29 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index aed0a694c74c4..c6f2b86c48ae2 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -674,16 +674,17 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 {
 	struct drm_plane *plane = new_plane_state->plane;
 	struct drm_crtc *crtc = new_plane_state->crtc;
+	const struct drm_framebuffer *fb = new_plane_state->fb;
 	int ret;
 
-	/* either *both* CRTC and pixel source must be set, or neither */
-	if (crtc && !drm_plane_has_visible_data(new_plane_state)) {
-		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no visible data\n",
+	/* either *both* CRTC and FB must be set, or neither */
+	if (crtc && !fb) {
+		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no FB\n",
 			       plane->base.id, plane->name);
 		return -EINVAL;
-	} else if (drm_plane_has_visible_data(new_plane_state) && !crtc) {
-		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] Source %d has visible data but no CRTC\n",
-			       plane->base.id, plane->name, new_plane_state->pixel_source);
+	} else if (fb && !crtc) {
+		drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] FB set but no CRTC\n",
+			       plane->base.id, plane->name);
 		return -EINVAL;
 	}
 
@@ -714,11 +715,9 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 	}
 
 
-	if (new_plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && new_plane_state->fb) {
-		ret = drm_atomic_plane_check_fb(new_plane_state);
-		if (ret)
-			return ret;
-	}
+	ret = drm_atomic_plane_check_fb(new_plane_state);
+	if (ret)
+		return ret;
 
 	if (plane_switching_crtc(old_plane_state, new_plane_state)) {
 		drm_dbg_atomic(plane->dev,
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index dc048988e3f3c..c3f677130def0 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -861,6 +861,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 					bool can_position,
 					bool can_update_disabled)
 {
+	struct drm_framebuffer *fb = plane_state->fb;
 	struct drm_rect *src = &plane_state->src;
 	struct drm_rect *dst = &plane_state->dst;
 	unsigned int rotation = plane_state->rotation;
@@ -872,7 +873,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 	*src = drm_plane_state_src(plane_state);
 	*dst = drm_plane_state_dest(plane_state);
 
-	if (!drm_plane_has_visible_data(plane_state)) {
+	if (!fb) {
 		plane_state->visible = false;
 		return 0;
 	}
@@ -889,31 +890,25 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 		return -EINVAL;
 	}
 
-	/* Check that selected pixel source is valid */
-	if (plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && plane_state->fb) {
-		struct drm_framebuffer *fb = plane_state->fb;
-		drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation);
+	drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation);
 
-		/* Check scaling */
-		hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
-		vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
+	/* Check scaling */
+	hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
+	vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
+	if (hscale < 0 || vscale < 0) {
+		drm_dbg_kms(plane_state->plane->dev,
+			    "Invalid scaling of plane\n");
+		drm_rect_debug_print("src: ", &plane_state->src, true);
+		drm_rect_debug_print("dst: ", &plane_state->dst, false);
+		return -ERANGE;
+	}
 
-		if (hscale < 0 || vscale < 0) {
-			drm_dbg_kms(plane_state->plane->dev,
-					"Invalid scaling of plane\n");
-			drm_rect_debug_print("src: ", &plane_state->src, true);
-			drm_rect_debug_print("dst: ", &plane_state->dst, false);
-			return -ERANGE;
-		}
+	if (crtc_state->enable)
+		drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2);
 
-		if (crtc_state->enable)
-			drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2);
+	plane_state->visible = drm_rect_clip_scaled(src, dst, &clip);
 
-		plane_state->visible = drm_rect_clip_scaled(src, dst, &clip);
-		drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation);
-	} else if (drm_plane_solid_fill_enabled(plane_state)) {
-		plane_state->visible = true;
-	}
+	drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation);
 
 	if (!plane_state->visible)
 		/*
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 6d97f38ac1f67..536a0b0091c3a 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -256,8 +256,8 @@ drm_atomic_plane_disabling(struct drm_plane_state *old_plane_state,
 	 * Anything else should be considered a bug in the atomic core, so we
 	 * gently warn about it.
 	 */
-	WARN_ON((new_plane_state->crtc == NULL && drm_plane_has_visible_data(new_plane_state)) ||
-		(new_plane_state->crtc != NULL && !drm_plane_has_visible_data(new_plane_state)));
+	WARN_ON((new_plane_state->crtc == NULL && new_plane_state->fb != NULL) ||
+		(new_plane_state->crtc != NULL && new_plane_state->fb == NULL));
 
 	return old_plane_state->crtc && !new_plane_state->crtc;
 }
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 3b187f3f54664..d14e2f1db2343 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -1016,35 +1016,6 @@ static inline struct drm_plane *drm_plane_find(struct drm_device *dev,
 #define drm_for_each_plane(plane, dev) \
 	list_for_each_entry(plane, &(dev)->mode_config.plane_list, head)
 
-/**
- * drm_plane_solid_fill_enabled - Check if solid fill is enabled on plane
- * @state: plane state
- *
- * Returns:
- * Whether the plane has been assigned a solid_fill_blob
- */
-static inline bool drm_plane_solid_fill_enabled(struct drm_plane_state *state)
-{
-	if (!state)
-		return false;
-	return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_SOLID_FILL && state->solid_fill_blob;
-}
-
-static inline bool drm_plane_has_visible_data(const struct drm_plane_state *state)
-{
-	switch (state->pixel_source) {
-	case DRM_PLANE_PIXEL_SOURCE_NONE:
-		return false;
-	case DRM_PLANE_PIXEL_SOURCE_SOLID_FILL:
-		return state->solid_fill_blob != NULL;
-	case DRM_PLANE_PIXEL_SOURCE_FB:
-	default:
-		WARN_ON(state->pixel_source != DRM_PLANE_PIXEL_SOURCE_FB);
-	}
-
-	return state->fb != NULL;
-}
-
 bool drm_any_plane_has_format(struct drm_device *dev,
 			      u32 format, u64 modifier);
 
-- 
GitLab


From b881ba8faa5c7689eb1cb487ad891c46dbbed0e8 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:49 +0200
Subject: [PATCH 0744/2340] Revert "drm/atomic: Move framebuffer checks to
 helper"

This reverts commit 4ba6b7a646321e740c7f2d80c90505019c4e8fce.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic.c | 130 +++++++++++++++--------------------
 1 file changed, 57 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c6f2b86c48ae2..1339785fbe80d 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -589,76 +589,6 @@ plane_switching_crtc(const struct drm_plane_state *old_plane_state,
 	return true;
 }
 
-static int drm_atomic_plane_check_fb(const struct drm_plane_state *state)
-{
-	struct drm_plane *plane = state->plane;
-	const struct drm_framebuffer *fb = state->fb;
-	struct drm_mode_rect *clips;
-
-	uint32_t num_clips;
-	unsigned int fb_width, fb_height;
-	int ret;
-
-	/* Check whether this plane supports the fb pixel format. */
-	ret = drm_plane_check_pixel_format(plane, fb->format->format,
-					   fb->modifier);
-
-	if (ret) {
-		drm_dbg_atomic(plane->dev,
-			       "[PLANE:%d:%s] invalid pixel format %p4cc, modifier 0x%llx\n",
-			       plane->base.id, plane->name,
-			       &fb->format->format, fb->modifier);
-		return ret;
-	}
-
-	fb_width = fb->width << 16;
-	fb_height = fb->height << 16;
-
-	/* Make sure source coordinates are inside the fb. */
-	if (state->src_w > fb_width ||
-	    state->src_x > fb_width - state->src_w ||
-	    state->src_h > fb_height ||
-	    state->src_y > fb_height - state->src_h) {
-		drm_dbg_atomic(plane->dev,
-			       "[PLANE:%d:%s] invalid source coordinates "
-			       "%u.%06ux%u.%06u+%u.%06u+%u.%06u (fb %ux%u)\n",
-			       plane->base.id, plane->name,
-			       state->src_w >> 16,
-			       ((state->src_w & 0xffff) * 15625) >> 10,
-			       state->src_h >> 16,
-			       ((state->src_h & 0xffff) * 15625) >> 10,
-			       state->src_x >> 16,
-			       ((state->src_x & 0xffff) * 15625) >> 10,
-			       state->src_y >> 16,
-			       ((state->src_y & 0xffff) * 15625) >> 10,
-			       fb->width, fb->height);
-		return -ENOSPC;
-	}
-
-	clips = __drm_plane_get_damage_clips(state);
-	num_clips = drm_plane_get_damage_clips_count(state);
-
-	/* Make sure damage clips are valid and inside the fb. */
-	while (num_clips > 0) {
-		if (clips->x1 >= clips->x2 ||
-		    clips->y1 >= clips->y2 ||
-		    clips->x1 < 0 ||
-		    clips->y1 < 0 ||
-		    clips->x2 > fb_width ||
-		    clips->y2 > fb_height) {
-			drm_dbg_atomic(plane->dev,
-				       "[PLANE:%d:%s] invalid damage clip %d %d %d %d\n",
-				       plane->base.id, plane->name, clips->x1,
-				       clips->y1, clips->x2, clips->y2);
-			return -EINVAL;
-		}
-		clips++;
-		num_clips--;
-	}
-
-	return 0;
-}
-
 /**
  * drm_atomic_plane_check - check plane state
  * @old_plane_state: old plane state to check
@@ -675,6 +605,9 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 	struct drm_plane *plane = new_plane_state->plane;
 	struct drm_crtc *crtc = new_plane_state->crtc;
 	const struct drm_framebuffer *fb = new_plane_state->fb;
+	unsigned int fb_width, fb_height;
+	struct drm_mode_rect *clips;
+	uint32_t num_clips;
 	int ret;
 
 	/* either *both* CRTC and FB must be set, or neither */
@@ -701,6 +634,17 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 		return -EINVAL;
 	}
 
+	/* Check whether this plane supports the fb pixel format. */
+	ret = drm_plane_check_pixel_format(plane, fb->format->format,
+					   fb->modifier);
+	if (ret) {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] invalid pixel format %p4cc, modifier 0x%llx\n",
+			       plane->base.id, plane->name,
+			       &fb->format->format, fb->modifier);
+		return ret;
+	}
+
 	/* Give drivers some help against integer overflows */
 	if (new_plane_state->crtc_w > INT_MAX ||
 	    new_plane_state->crtc_x > INT_MAX - (int32_t) new_plane_state->crtc_w ||
@@ -714,10 +658,50 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state,
 		return -ERANGE;
 	}
 
+	fb_width = fb->width << 16;
+	fb_height = fb->height << 16;
 
-	ret = drm_atomic_plane_check_fb(new_plane_state);
-	if (ret)
-		return ret;
+	/* Make sure source coordinates are inside the fb. */
+	if (new_plane_state->src_w > fb_width ||
+	    new_plane_state->src_x > fb_width - new_plane_state->src_w ||
+	    new_plane_state->src_h > fb_height ||
+	    new_plane_state->src_y > fb_height - new_plane_state->src_h) {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] invalid source coordinates "
+			       "%u.%06ux%u.%06u+%u.%06u+%u.%06u (fb %ux%u)\n",
+			       plane->base.id, plane->name,
+			       new_plane_state->src_w >> 16,
+			       ((new_plane_state->src_w & 0xffff) * 15625) >> 10,
+			       new_plane_state->src_h >> 16,
+			       ((new_plane_state->src_h & 0xffff) * 15625) >> 10,
+			       new_plane_state->src_x >> 16,
+			       ((new_plane_state->src_x & 0xffff) * 15625) >> 10,
+			       new_plane_state->src_y >> 16,
+			       ((new_plane_state->src_y & 0xffff) * 15625) >> 10,
+			       fb->width, fb->height);
+		return -ENOSPC;
+	}
+
+	clips = __drm_plane_get_damage_clips(new_plane_state);
+	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
+
+	/* Make sure damage clips are valid and inside the fb. */
+	while (num_clips > 0) {
+		if (clips->x1 >= clips->x2 ||
+		    clips->y1 >= clips->y2 ||
+		    clips->x1 < 0 ||
+		    clips->y1 < 0 ||
+		    clips->x2 > fb_width ||
+		    clips->y2 > fb_height) {
+			drm_dbg_atomic(plane->dev,
+				       "[PLANE:%d:%s] invalid damage clip %d %d %d %d\n",
+				       plane->base.id, plane->name, clips->x1,
+				       clips->y1, clips->x2, clips->y2);
+			return -EINVAL;
+		}
+		clips++;
+		num_clips--;
+	}
 
 	if (plane_switching_crtc(old_plane_state, new_plane_state)) {
 		drm_dbg_atomic(plane->dev,
-- 
GitLab


From a513f095b941e9e96196f04f11f253d763310c08 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:50 +0200
Subject: [PATCH 0745/2340] Revert "drm/atomic: Add solid fill data to plane
 state dump"

This reverts commit e86413f5442ee094e66b3e75f2d3419ed0df9520.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-4-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic.c | 4 ----
 drivers/gpu/drm/drm_plane.c  | 8 --------
 include/drm/drm_plane.h      | 3 ---
 3 files changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 1339785fbe80d..02aa7df832cc0 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -726,10 +726,6 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 	drm_printf(p, "\tfb=%u\n", state->fb ? state->fb->base.id : 0);
 	if (state->fb)
 		drm_framebuffer_print_info(p, 2, state->fb);
-	drm_printf(p, "\tsolid_fill=%u\n",
-			state->solid_fill_blob ? state->solid_fill_blob->base.id : 0);
-	if (state->solid_fill_blob)
-		drm_plane_solid_fill_print_info(p, 2, state);
 	drm_printf(p, "\tcrtc-pos=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&dest));
 	drm_printf(p, "\tsrc-pos=" DRM_RECT_FP_FMT "\n", DRM_RECT_FP_ARG(&src));
 	drm_printf(p, "\trotation=%x\n", state->rotation);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index c65c72701dd29..c8dbe5ae60cc6 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1641,14 +1641,6 @@ __drm_plane_get_damage_clips(const struct drm_plane_state *state)
 					state->fb_damage_clips->data : NULL);
 }
 
-void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent,
-				     const struct drm_plane_state *state)
-{
-	drm_printf_indent(p, indent, "r=0x%08x\n", state->solid_fill.r);
-	drm_printf_indent(p, indent, "g=0x%08x\n", state->solid_fill.g);
-	drm_printf_indent(p, indent, "b=0x%08x\n", state->solid_fill.b);
-}
-
 /**
  * drm_plane_get_damage_clips - Returns damage clips.
  * @state: Plane state.
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index d14e2f1db2343..4b7af4381bbe4 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -1025,9 +1025,6 @@ drm_plane_get_damage_clips_count(const struct drm_plane_state *state);
 struct drm_mode_rect *
 drm_plane_get_damage_clips(const struct drm_plane_state *state);
 
-void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent,
-				     const struct drm_plane_state *state);
-
 int drm_plane_create_scaling_filter_property(struct drm_plane *plane,
 					     unsigned int supported_filters);
 
-- 
GitLab


From fe28421d4fedb90cadcef4932be0e8364f79283d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:51 +0200
Subject: [PATCH 0746/2340] Revert "drm/atomic: Add pixel source to plane state
 dump"

This reverts commit 8283ac7871a959848e09fc6593b8c12b8febfee6.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-5-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic.c        | 1 -
 drivers/gpu/drm/drm_blend.c         | 1 -
 drivers/gpu/drm/drm_crtc_internal.h | 1 -
 3 files changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 02aa7df832cc0..f1a503aafe5aa 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -722,7 +722,6 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 
 	drm_printf(p, "plane[%u]: %s\n", plane->base.id, plane->name);
 	drm_printf(p, "\tcrtc=%s\n", state->crtc ? state->crtc->name : "(null)");
-	drm_printf(p, "\tpixel-source=%s\n", drm_get_pixel_source_name(state->pixel_source));
 	drm_printf(p, "\tfb=%u\n", state->fb ? state->fb->base.id : 0);
 	if (state->fb)
 		drm_framebuffer_print_info(p, 2, state->fb);
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 9c1608f7c1df7..37b31b7e5ce5d 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -647,7 +647,6 @@ static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
 	{ DRM_PLANE_PIXEL_SOURCE_FB, "FB" },
 	{ DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, "SOLID_FILL" },
 };
-DRM_ENUM_NAME_FN(drm_get_pixel_source_name, drm_pixel_source_enum_list);
 
 /**
  * drm_plane_create_pixel_source_property - create a new pixel source property
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 4114675c07282..a514d5207e419 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -269,7 +269,6 @@ int drm_plane_check_pixel_format(struct drm_plane *plane,
 				 u32 format, u64 modifier);
 struct drm_mode_rect *
 __drm_plane_get_damage_clips(const struct drm_plane_state *state);
-const char *drm_get_pixel_source_name(int val);
 
 /* drm_bridge.c */
 void drm_bridge_detach(struct drm_bridge *bridge);
-- 
GitLab


From 5fb1ad3f5725c5c4d1a0c24ba4f82f239dc6878d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:52 +0200
Subject: [PATCH 0747/2340] Revert "drm: Add solid fill pixel source"

This reverts commit 4b64167042927531f4cfaf035b8f88c2f7a05f06.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-6-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_blend.c | 10 +---------
 include/drm/drm_plane.h     |  1 -
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 37b31b7e5ce5d..665c5d9b056f8 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -204,9 +204,6 @@
  *	"FB":
  *		Framebuffer source set by the "FB_ID" property.
  *
- *	"SOLID_FILL":
- *		Solid fill color source set by the "solid_fill" property.
- *
  * solid_fill:
  *	solid_fill is set up with drm_plane_create_solid_fill_property(). It
  *	contains pixel data that drivers can use to fill a plane.
@@ -645,7 +642,6 @@ EXPORT_SYMBOL(drm_plane_create_blend_mode_property);
 static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
 	{ DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" },
 	{ DRM_PLANE_PIXEL_SOURCE_FB, "FB" },
-	{ DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, "SOLID_FILL" },
 };
 
 /**
@@ -670,9 +666,6 @@ static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
  * "FB":
  *	Framebuffer pixel source
  *
- * "SOLID_FILL":
- * 	Solid fill color pixel source
- *
  * Returns:
  * Zero on success, negative errno on failure.
  */
@@ -682,8 +675,7 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane,
 	struct drm_device *dev = plane->dev;
 	struct drm_property *prop;
 	static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) |
-						      BIT(DRM_PLANE_PIXEL_SOURCE_NONE) |
-						      BIT(DRM_PLANE_PIXEL_SOURCE_SOLID_FILL);
+						      BIT(DRM_PLANE_PIXEL_SOURCE_NONE);
 	int i;
 
 	/* FB is supported by default */
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 4b7af4381bbe4..5bac644d4cc34 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -43,7 +43,6 @@ enum drm_scaling_filter {
 enum drm_plane_pixel_source {
 	DRM_PLANE_PIXEL_SOURCE_NONE,
 	DRM_PLANE_PIXEL_SOURCE_FB,
-	DRM_PLANE_PIXEL_SOURCE_SOLID_FILL,
 	DRM_PLANE_PIXEL_SOURCE_MAX
 };
 
-- 
GitLab


From e5fba1ada1c1d676438138d815acd8f427a1eaf0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:53 +0200
Subject: [PATCH 0748/2340] Revert "drm: Introduce solid fill DRM plane
 property"

This reverts commit 85863a4e16e77079ee14865905ddc3ef9483a640.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-7-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic_state_helper.c |  9 ------
 drivers/gpu/drm/drm_atomic_uapi.c         | 26 ----------------
 drivers/gpu/drm/drm_blend.c               | 30 -------------------
 include/drm/drm_blend.h                   |  1 -
 include/drm/drm_plane.h                   | 36 -----------------------
 include/uapi/drm/drm_mode.h               | 24 ---------------
 6 files changed, 126 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index f6c76cea8be49..311b04edf742c 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -254,11 +254,6 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state,
 	plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
 	plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB;
 
-	if (plane_state->solid_fill_blob) {
-		drm_property_blob_put(plane_state->solid_fill_blob);
-		plane_state->solid_fill_blob = NULL;
-	}
-
 	if (plane->color_encoding_property) {
 		if (!drm_object_property_get_default_value(&plane->base,
 							   plane->color_encoding_property,
@@ -355,9 +350,6 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
 	if (state->fb)
 		drm_framebuffer_get(state->fb);
 
-	if (state->solid_fill_blob)
-		drm_property_blob_get(state->solid_fill_blob);
-
 	state->fence = NULL;
 	state->commit = NULL;
 	state->fb_damage_clips = NULL;
@@ -407,7 +399,6 @@ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state)
 		drm_crtc_commit_put(state->commit);
 
 	drm_property_blob_put(state->fb_damage_clips);
-	drm_property_blob_put(state->solid_fill_blob);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index d7ae8e2c02651..bd71405319488 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -316,20 +316,6 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
 }
 EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector);
 
-static void drm_atomic_set_solid_fill_prop(struct drm_plane_state *state)
-{
-	struct drm_mode_solid_fill *user_info;
-
-	if (!state->solid_fill_blob)
-		return;
-
-	user_info = (struct drm_mode_solid_fill *)state->solid_fill_blob->data;
-
-	state->solid_fill.r = user_info->r;
-	state->solid_fill.g = user_info->g;
-	state->solid_fill.b = user_info->b;
-}
-
 static void set_out_fence_for_crtc(struct drm_atomic_state *state,
 				   struct drm_crtc *crtc, s32 __user *fence_ptr)
 {
@@ -578,15 +564,6 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 		state->src_h = val;
 	} else if (property == plane->pixel_source_property) {
 		state->pixel_source = val;
-	} else if (property == plane->solid_fill_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
-				&state->solid_fill_blob,
-				val, sizeof(struct drm_mode_solid_fill),
-				-1, &replaced);
-		if (ret)
-			return ret;
-
-		drm_atomic_set_solid_fill_prop(state);
 	} else if (property == plane->alpha_property) {
 		state->alpha = val;
 	} else if (property == plane->blend_mode_property) {
@@ -677,9 +654,6 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = state->src_h;
 	} else if (property == plane->pixel_source_property) {
 		*val = state->pixel_source;
-	} else if (property == plane->solid_fill_property) {
-		*val = state->solid_fill_blob ?
-			state->solid_fill_blob->base.id : 0;
 	} else if (property == plane->alpha_property) {
 		*val = state->alpha;
 	} else if (property == plane->blend_mode_property) {
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 665c5d9b056f8..fce734cdb85ba 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -204,10 +204,6 @@
  *	"FB":
  *		Framebuffer source set by the "FB_ID" property.
  *
- * solid_fill:
- *	solid_fill is set up with drm_plane_create_solid_fill_property(). It
- *	contains pixel data that drivers can use to fill a plane.
- *
  * Note that all the property extensions described here apply either to the
  * plane or the CRTC (e.g. for the background color, which currently is not
  * exposed and assumed to be black).
@@ -713,29 +709,3 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane,
 	return 0;
 }
 EXPORT_SYMBOL(drm_plane_create_pixel_source_property);
-
-/**
- * drm_plane_create_solid_fill_property - create a new solid_fill property
- * @plane: drm plane
- *
- * This creates a new property blob that holds pixel data for solid fill planes.
- * The property is exposed to userspace as a property blob called "solid_fill".
- *
- * For information on what the blob contains, see `drm_mode_solid_fill`.
- */
-int drm_plane_create_solid_fill_property(struct drm_plane *plane)
-{
-	struct drm_property *prop;
-
-	prop = drm_property_create(plane->dev,
-			DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB,
-			"solid_fill", 0);
-	if (!prop)
-		return -ENOMEM;
-
-	drm_object_attach_property(&plane->base, prop, 0);
-	plane->solid_fill_property = prop;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_plane_create_solid_fill_property);
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index e7158fbee3897..122bbfbaae33c 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -60,5 +60,4 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane,
 					 unsigned int supported_modes);
 int drm_plane_create_pixel_source_property(struct drm_plane *plane,
 					   unsigned long extra_sources);
-int drm_plane_create_solid_fill_property(struct drm_plane *plane);
 #endif
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 5bac644d4cc34..bc0176ba25be1 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -46,18 +46,6 @@ enum drm_plane_pixel_source {
 	DRM_PLANE_PIXEL_SOURCE_MAX
 };
 
-/**
- * struct solid_fill_property - RGB values for solid fill plane
- *
- * TODO: Add solid fill source and corresponding pixel source
- *       that supports RGBA color
- */
-struct drm_solid_fill {
-	uint32_t r;
-	uint32_t g;
-	uint32_t b;
-};
-
 /**
  * struct drm_plane_state - mutable plane state
  *
@@ -146,23 +134,6 @@ struct drm_plane_state {
 	 */
 	enum drm_plane_pixel_source pixel_source;
 
-	/**
-	 * @solid_fill_blob:
-	 *
-	 * Blob containing relevant information for a solid fill plane
-	 * including RGB color values. See
-	 * drm_plane_create_solid_fill_property() for more details.
-	 */
-	struct drm_property_blob *solid_fill_blob;
-
-	/**
-	 * @solid_fill:
-	 *
-	 * Pixel data for solid fill planes. See
-	 * drm_plane_create_solid_fill_property() for more details.
-	 */
-	struct drm_solid_fill solid_fill;
-
 	/**
 	 * @alpha:
 	 * Opacity of the plane with 0 as completely transparent and 0xffff as
@@ -763,13 +734,6 @@ struct drm_plane {
 	 */
 	struct drm_property *pixel_source_property;
 
-	/**
-	 * @solid_fill_property:
-	 * Optional solid_fill property for this plane. See
-	 * drm_plane_create_solid_fill_property().
-	 */
-	struct drm_property *solid_fill_property;
-
 	/**
 	 * @alpha_property:
 	 * Optional alpha property for this plane. See
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index cbd943a8c88b1..95630f1701102 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -259,30 +259,6 @@ struct drm_mode_modeinfo {
 	char name[DRM_DISPLAY_MODE_LEN];
 };
 
-/**
- * struct drm_mode_solid_fill - User info for solid fill planes
- *
- * This is the userspace API solid fill information structure.
- *
- * Userspace can enable solid fill planes by assigning the plane "solid_fill"
- * property to a blob containing a single drm_mode_solid_fill struct populated with an RGB323232
- * color and setting the pixel source to "SOLID_FILL".
- *
- * For information on the plane property, see drm_plane_create_solid_fill_property()
- *
- * @r: Red color value of single pixel
- * @g: Green color value of single pixel
- * @b: Blue color value of single pixel
- * @pad: padding, must be zero
- */
-struct drm_mode_solid_fill {
-	__u32 r;
-	__u32 g;
-	__u32 b;
-	__u32 pad;
-};
-
-
 struct drm_mode_card_res {
 	__u64 fb_id_ptr;
 	__u64 crtc_id_ptr;
-- 
GitLab


From 90422201f8f2b4e26ab7bd43b92786a11c1ffebf Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 4 Dec 2023 15:13:54 +0200
Subject: [PATCH 0749/2340] Revert "drm: Introduce pixel_source DRM plane
 property"

This reverts commit e50e5fed41c7eed2db4119645bf3480ec43fec11.

Although the Solid Fill planes patchset got all reviews and
acknowledgements, it doesn't fulfill requirements for the new uABI. It
has neither corresponding open-source userspace implementation nor the
IGT tests coverage. Reverting this patchset until userspace obligations
are fulfilled.

Acked-by: Simon Ser <contact@emersion.fr>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-8-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic_state_helper.c |  1 -
 drivers/gpu/drm/drm_atomic_uapi.c         |  4 -
 drivers/gpu/drm/drm_blend.c               | 94 -----------------------
 drivers/gpu/drm/drm_plane.c               | 19 +----
 include/drm/drm_blend.h                   |  2 -
 include/drm/drm_plane.h                   | 21 -----
 6 files changed, 4 insertions(+), 137 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 311b04edf742c..54975de44a0e3 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -252,7 +252,6 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state,
 
 	plane_state->alpha = DRM_BLEND_ALPHA_OPAQUE;
 	plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
-	plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB;
 
 	if (plane->color_encoding_property) {
 		if (!drm_object_property_get_default_value(&plane->base,
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index bd71405319488..aee4a65d49591 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -562,8 +562,6 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 		state->src_w = val;
 	} else if (property == config->prop_src_h) {
 		state->src_h = val;
-	} else if (property == plane->pixel_source_property) {
-		state->pixel_source = val;
 	} else if (property == plane->alpha_property) {
 		state->alpha = val;
 	} else if (property == plane->blend_mode_property) {
@@ -652,8 +650,6 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = state->src_w;
 	} else if (property == config->prop_src_h) {
 		*val = state->src_h;
-	} else if (property == plane->pixel_source_property) {
-		*val = state->pixel_source;
 	} else if (property == plane->alpha_property) {
 		*val = state->alpha;
 	} else if (property == plane->blend_mode_property) {
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index fce734cdb85ba..6e74de8334663 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -185,25 +185,6 @@
  *		 plane does not expose the "alpha" property, then this is
  *		 assumed to be 1.0
  *
- * pixel_source:
- *	pixel_source is set up with drm_plane_create_pixel_source_property().
- *	It is used to toggle the active source of pixel data for the plane.
- *	The plane will only display data from the set pixel_source -- any
- *	data from other sources will be ignored.
- *
- *	For non-framebuffer sources, if pixel_source is set to a non-framebuffer
- *	and non-NONE source, and the corresponding source property is NULL, then
- *	the atomic commit should return an error.
- *
- *	Possible values:
- *
- *	"NONE":
- *		No active pixel source.
- *		Committing with a NONE pixel source will disable the plane.
- *
- *	"FB":
- *		Framebuffer source set by the "FB_ID" property.
- *
  * Note that all the property extensions described here apply either to the
  * plane or the CRTC (e.g. for the background color, which currently is not
  * exposed and assumed to be black).
@@ -634,78 +615,3 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane,
 	return 0;
 }
 EXPORT_SYMBOL(drm_plane_create_blend_mode_property);
-
-static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = {
-	{ DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" },
-	{ DRM_PLANE_PIXEL_SOURCE_FB, "FB" },
-};
-
-/**
- * drm_plane_create_pixel_source_property - create a new pixel source property
- * @plane: DRM plane
- * @extra_sources: Bitmask of additional supported pixel_sources for the driver.
- *		   DRM_PLANE_PIXEL_SOURCE_FB and DRM_PLANE_PIXEL_SOURCE_NONE will
- *		   always be enabled as supported sources.
- *
- * This creates a new property describing the current source of pixel data for the
- * plane. The pixel_source will be initialized as DRM_PLANE_PIXEL_SOURCE_FB by default.
- *
- * Drivers can set a custom default source by overriding the pixel_source value in
- * drm_plane_funcs.reset()
- *
- * The property is exposed to userspace as an enumeration property called
- * "pixel_source" and has the following enumeration values:
- *
- * "NONE":
- *	No active pixel source
- *
- * "FB":
- *	Framebuffer pixel source
- *
- * Returns:
- * Zero on success, negative errno on failure.
- */
-int drm_plane_create_pixel_source_property(struct drm_plane *plane,
-					   unsigned long extra_sources)
-{
-	struct drm_device *dev = plane->dev;
-	struct drm_property *prop;
-	static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) |
-						      BIT(DRM_PLANE_PIXEL_SOURCE_NONE);
-	int i;
-
-	/* FB is supported by default */
-	unsigned long supported_sources = extra_sources |
-					  BIT(DRM_PLANE_PIXEL_SOURCE_FB) |
-					  BIT(DRM_PLANE_PIXEL_SOURCE_NONE);
-
-	if (WARN_ON(supported_sources & ~valid_source_mask))
-		return -EINVAL;
-
-	prop = drm_property_create(dev, DRM_MODE_PROP_ENUM | DRM_MODE_PROP_ATOMIC, "pixel_source",
-			hweight32(supported_sources));
-
-	if (!prop)
-		return -ENOMEM;
-
-	for (i = 0; i < ARRAY_SIZE(drm_pixel_source_enum_list); i++) {
-		int ret;
-
-		if (!test_bit(drm_pixel_source_enum_list[i].type, &supported_sources))
-			continue;
-
-		ret = drm_property_add_enum(prop, drm_pixel_source_enum_list[i].type,
-				drm_pixel_source_enum_list[i].name);
-		if (ret) {
-			drm_property_destroy(dev, prop);
-
-			return ret;
-		}
-	}
-
-	drm_object_attach_property(&plane->base, prop, DRM_PLANE_PIXEL_SOURCE_FB);
-	plane->pixel_source_property = prop;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_plane_create_pixel_source_property);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index c8dbe5ae60cc6..9e8e4c60983d6 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -953,14 +953,6 @@ bool drm_any_plane_has_format(struct drm_device *dev,
 }
 EXPORT_SYMBOL(drm_any_plane_has_format);
 
-static bool drm_plane_needs_disable(struct drm_plane_state *state, struct drm_framebuffer *fb)
-{
-	if (state->pixel_source == DRM_PLANE_PIXEL_SOURCE_NONE)
-		return true;
-
-	return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && fb == NULL;
-}
-
 /*
  * __setplane_internal - setplane handler for internal callers
  *
@@ -983,8 +975,8 @@ static int __setplane_internal(struct drm_plane *plane,
 
 	WARN_ON(drm_drv_uses_atomic_modeset(plane->dev));
 
-	/* No visible data means shut it down */
-	if (drm_plane_needs_disable(plane->state, fb)) {
+	/* No fb means shut it down */
+	if (!fb) {
 		plane->old_fb = plane->fb;
 		ret = plane->funcs->disable_plane(plane, ctx);
 		if (!ret) {
@@ -1035,8 +1027,8 @@ static int __setplane_atomic(struct drm_plane *plane,
 
 	WARN_ON(!drm_drv_uses_atomic_modeset(plane->dev));
 
-	/* No visible data means shut it down */
-	if (drm_plane_needs_disable(plane->state, fb))
+	/* No fb means shut it down */
+	if (!fb)
 		return plane->funcs->disable_plane(plane, ctx);
 
 	/*
@@ -1109,9 +1101,6 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 
-	if (plane->state)
-		plane->state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB;
-
 	if (plane_req->fb_id) {
 		fb = drm_framebuffer_lookup(dev, file_priv, plane_req->fb_id);
 		if (!fb) {
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index 122bbfbaae33c..88bdfec3bd884 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -58,6 +58,4 @@ int drm_atomic_normalize_zpos(struct drm_device *dev,
 			      struct drm_atomic_state *state);
 int drm_plane_create_blend_mode_property(struct drm_plane *plane,
 					 unsigned int supported_modes);
-int drm_plane_create_pixel_source_property(struct drm_plane *plane,
-					   unsigned long extra_sources);
 #endif
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index bc0176ba25be1..c6565a6f9324c 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -40,12 +40,6 @@ enum drm_scaling_filter {
 	DRM_SCALING_FILTER_NEAREST_NEIGHBOR,
 };
 
-enum drm_plane_pixel_source {
-	DRM_PLANE_PIXEL_SOURCE_NONE,
-	DRM_PLANE_PIXEL_SOURCE_FB,
-	DRM_PLANE_PIXEL_SOURCE_MAX
-};
-
 /**
  * struct drm_plane_state - mutable plane state
  *
@@ -126,14 +120,6 @@ struct drm_plane_state {
 	/** @hotspot_y: y offset to mouse cursor hotspot */
 	int32_t hotspot_x, hotspot_y;
 
-	/**
-	 * @pixel_source:
-	 *
-	 * Source of pixel information for the plane. See
-	 * drm_plane_create_pixel_source_property() for more details.
-	 */
-	enum drm_plane_pixel_source pixel_source;
-
 	/**
 	 * @alpha:
 	 * Opacity of the plane with 0 as completely transparent and 0xffff as
@@ -727,13 +713,6 @@ struct drm_plane {
 	 */
 	struct drm_plane_state *state;
 
-	/*
-	 * @pixel_source_property:
-	 * Optional pixel_source property for this plane. See
-	 * drm_plane_create_pixel_source_property().
-	 */
-	struct drm_property *pixel_source_property;
-
 	/**
 	 * @alpha_property:
 	 * Optional alpha property for this plane. See
-- 
GitLab


From 801207c18834e289960c515bc41d243aa623763b Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Mon, 4 Dec 2023 09:13:14 -0800
Subject: [PATCH 0750/2340] drm/msm/dp: add a missing unlock in
 dp_hpd_plug_handle()

When pm_runtime_resume_and_get() fails, unlock before returning.

Fixes: 5814b8bf086a ("drm/msm/dp: incorporate pm_runtime framework into DP driver")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570395/
Link: https://lore.kernel.org/r/20231204171317.192427-1-harshit.m.mogalapalli@oracle.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 3a360c87e2dc4..20c5cf7986b22 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -564,6 +564,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret) {
 		DRM_ERROR("failed to pm_runtime_resume\n");
+		mutex_unlock(&dp->event_mutex);
 		return ret;
 	}
 
-- 
GitLab


From dce94061f0d02f5ab355390a6e63d3dbea938b72 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Mon, 4 Dec 2023 04:21:01 -0800
Subject: [PATCH 0751/2340] drm/v3d: Fix missing error code in
 v3d_submit_cpu_ioctl()

Smatch warns:
	drivers/gpu/drm/v3d/v3d_submit.c:1222 v3d_submit_cpu_ioctl()
	warn: missing error code 'ret'

When there is no job type or job is submitted with wrong number of BOs
it is an error path, ret is zero at this point which is incorrect
return.

Fix this by changing it to -EINVAL.

Fixes: aafc1a2bea67 ("drm/v3d: Add a CPU job submission")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <melissa.srw@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204122102.181298-1-harshit.m.mogalapalli@oracle.com
---
 drivers/gpu/drm/v3d/v3d_submit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index d7a9da2484fd8..fcff41dd2315b 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -1219,11 +1219,13 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
 	/* Every CPU job must have a CPU job user extension */
 	if (!cpu_job->job_type) {
 		DRM_DEBUG("CPU job must have a CPU job user extension.\n");
+		ret = -EINVAL;
 		goto fail;
 	}
 
 	if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) {
 		DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n");
+		ret = -EINVAL;
 		goto fail;
 	}
 
-- 
GitLab


From 46b1f1b839cad600de3ad7ed999bd0155c528746 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:25 +0200
Subject: [PATCH 0752/2340] drm/msm/dpu: populate SSPP scaler block version

The function _dpu_hw_sspp_setup_scaler3() passes and
dpu_hw_setup_scaler3() uses scaler_blk.version to determine in which way
the scaler (QSEED3) block should be programmed. However up to now we
were not setting this field. Set it now, splitting the vig_sblk data
which has different version fields.

Reported-by: Marijn Suijten <marijn.suijten@somainline.org>
Fixes: 9b6f4fedaac2 ("drm/msm/dpu: Add SM6125 support")
Fixes: 27f0df03f3ff ("drm/msm/dpu: Add SM6375 support")
Fixes: 3186acba5cdc ("drm/msm/dpu: Add SM6350 support")
Fixes: efcd0107727c ("drm/msm/dpu: add support for SM8550")
Fixes: 4a352c2fc15a ("drm/msm/dpu: Introduce SC8280XP")
Fixes: 0e91bcbb0016 ("drm/msm/dpu: Add SM8350 to hw catalog")
Fixes: 100d7ef6995d ("drm/msm/dpu: add support for SM8450")
Fixes: 3581b7062cec ("drm/msm/disp/dpu1: add support for display on SM6115")
Fixes: dabfdd89eaa9 ("drm/msm/disp/dpu1: add inline rotation support for sc7280")
Fixes: f3af2d6ee9ab ("drm/msm/dpu: Add SC8180x to hw catalog")
Fixes: 94391a14fc27 ("drm/msm/dpu1: Add MSM8998 to hw catalog")
Fixes: af776a3e1c30 ("drm/msm/dpu: add SM8250 to hw catalog")
Fixes: 386fced3f76f ("drm/msm/dpu: add SM8150 to hw catalog")
Fixes: b75ab05a3479 ("msm:disp:dpu1: add scaler support on SC7180 display")
Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570098/
Link: https://lore.kernel.org/r/20231201234234.2065610-2-dmitry.baryshkov@linaro.org
---
 .../msm/disp/dpu1/catalog/dpu_5_0_sm8150.h    |  8 +-
 .../msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h   |  8 +-
 .../msm/disp/dpu1/catalog/dpu_8_1_sm8450.h    |  8 +-
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    | 95 ++++++++++++++-----
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h    |  3 +-
 5 files changed, 87 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
index 9392ad2b4d3fe..c022e57864a43 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
@@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_0,
+		.sblk = &sm8150_vig_sblk_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_1,
+		.sblk = &sm8150_vig_sblk_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -93,7 +93,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_2,
+		.sblk = &sm8150_vig_sblk_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -101,7 +101,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_3,
+		.sblk = &sm8150_vig_sblk_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
index 9ffc8804a6fca..cb0758f0829de 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
@@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_0,
+		.sblk = &sm8150_vig_sblk_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_1,
+		.sblk = &sm8150_vig_sblk_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_2,
+		.sblk = &sm8150_vig_sblk_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_3,
+		.sblk = &sm8150_vig_sblk_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index d18145c226dac..72b0f547242fe 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_0,
+		.sblk = &sm8450_vig_sblk_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_1,
+		.sblk = &sm8450_vig_sblk_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_2,
+		.sblk = &sm8450_vig_sblk_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_3,
+		.sblk = &sm8450_vig_sblk_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index a1aada6307808..7056c08b9957f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -249,14 +249,17 @@ static const uint32_t wb2_formats[] = {
  * SSPP sub blocks config
  *************************************************************/
 
+#define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min))
+
 /* SSPP common configuration */
-#define _VIG_SBLK(sdma_pri, qseed_ver) \
+#define _VIG_SBLK(sdma_pri, qseed_ver, scaler_ver) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
 	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
 		.id = qseed_ver, \
+		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = "csc", \
 		.id = DPU_SSPP_CSC_10BIT, \
@@ -268,13 +271,14 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = NULL, \
 	}
 
-#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, rot_cfg) \
+#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, scaler_ver, rot_cfg) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
 	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
 		.id = qseed_ver, \
+		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = "csc", \
 		.id = DPU_SSPP_CSC_10BIT, \
@@ -298,13 +302,17 @@ static const uint32_t wb2_formats[] = {
 	}
 
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 2));
 
 static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 	.rot_maxheight = 1088,
@@ -313,13 +321,30 @@ static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 };
 
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3);
+				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 3));
+
+static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 =
+				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 4));
+static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 =
+				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 4));
+static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 =
+				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 4));
+static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 =
+				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3,
+					  SSPP_SCALER_VER(1, 4));
 
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1);
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(2);
@@ -327,34 +352,60 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3);
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4);
 
 static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 =
-				_VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 =
-			_VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4, &dpu_rot_sc7280_cfg_v2);
+			_VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4,
+				      SSPP_SCALER_VER(3, 0),
+				      &dpu_rot_sc7280_cfg_v2);
 
 static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 =
-				_VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 =
-				_VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE);
+				_VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE,
+					  SSPP_SCALER_VER(2, 4));
 
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 0));
+
+static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 =
+				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 1));
+static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 =
+				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 1));
+static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 =
+				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 1));
+static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 =
+				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 1));
 
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 =
-				_VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 =
-				_VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4);
+				_VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4,
+					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5);
 static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index df024e10d3a32..62445075306af 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -265,7 +265,8 @@ enum {
 /**
  * struct dpu_scaler_blk: Scaler information
  * @info:   HW register and features supported by this sub-blk
- * @version: qseed block revision
+ * @version: qseed block revision, on QSEED3+ platforms this is the value of
+ *           scaler_blk.base + QSEED3_HW_VERSION registers.
  */
 struct dpu_scaler_blk {
 	DPU_HW_SUBBLK_INFO;
-- 
GitLab


From 07b852c91cbe2c9985d218ab7e2dd1ba51beb9e6 Mon Sep 17 00:00:00 2001
From: Marijn Suijten <marijn.suijten@somainline.org>
Date: Sat, 2 Dec 2023 01:40:26 +0200
Subject: [PATCH 0753/2340] drm/msm/dpu: Drop unused get_scaler_ver callback
 from SSPP

This pointer callback is never used and should be removed.

Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
[DB: dropped the helpers completely, which are unused now]
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570096/
Link: https://lore.kernel.org/r/20231201234234.2065610-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 13 +------------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h |  6 ------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c |  6 ------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h |  3 ---
 4 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
index 8e3c65989c498..b408d456c123d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
@@ -395,15 +395,6 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_sspp *ctx,
 			format);
 }
 
-static u32 _dpu_hw_sspp_get_scaler3_ver(struct dpu_hw_sspp *ctx)
-{
-	if (!ctx)
-		return 0;
-
-	return dpu_hw_get_scaler3_ver(&ctx->hw,
-				      ctx->cap->sblk->scaler_blk.base);
-}
-
 /*
  * dpu_hw_sspp_setup_rects()
  */
@@ -616,10 +607,8 @@ static void _setup_layer_ops(struct dpu_hw_sspp *c,
 
 	if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) ||
 			test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) ||
-			test_bit(DPU_SSPP_SCALER_QSEED4, &features)) {
+			test_bit(DPU_SSPP_SCALER_QSEED4, &features))
 		c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3;
-		c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver;
-	}
 
 	if (test_bit(DPU_SSPP_CDP, &features))
 		c->ops.setup_cdp = dpu_hw_sspp_setup_cdp;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
index f93969fddb225..b094ea23ad325 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
@@ -296,12 +296,6 @@ struct dpu_hw_sspp_ops {
 		struct dpu_hw_scaler3_cfg *scaler3_cfg,
 		const struct dpu_format *format);
 
-	/**
-	 * get_scaler_ver - get scaler h/w version
-	 * @ctx: Pointer to pipe context
-	 */
-	u32 (*get_scaler_ver)(struct dpu_hw_sspp *ctx);
-
 	/**
 	 * setup_cdp - setup client driven prefetch
 	 * @pipe: Pointer to software pipe context
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
index 18b16b2d2bf52..0b05061e3e621 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
@@ -381,12 +381,6 @@ end:
 	DPU_REG_WRITE(c, QSEED3_OP_MODE + scaler_offset, op_mode);
 }
 
-u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c,
-			u32 scaler_offset)
-{
-	return DPU_REG_READ(c, QSEED3_HW_VERSION + scaler_offset);
-}
-
 void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map *c,
 		u32 csc_reg_off,
 		const struct dpu_csc_cfg *data, bool csc10)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index 4bea139081bcd..fe083b2e5696a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -340,9 +340,6 @@ void dpu_hw_setup_scaler3(struct dpu_hw_blk_reg_map *c,
 		u32 scaler_offset, u32 scaler_version,
 		const struct dpu_format *format);
 
-u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c,
-		u32 scaler_offset);
-
 void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map  *c,
 		u32 csc_reg_off,
 		const struct dpu_csc_cfg *data, bool csc10);
-- 
GitLab


From 88fc981f8ef232848fb60f77660a27826ea518ef Mon Sep 17 00:00:00 2001
From: Marijn Suijten <marijn.suijten@somainline.org>
Date: Sat, 2 Dec 2023 01:40:27 +0200
Subject: [PATCH 0754/2340] drm/msm/dpu: Drop unused qseed_type from catalog
 dpu_caps

The SSPP scaler subblk is responsible for reporting its version (via the
.id field, feature bits on the parent SSPP block, and since recently
also from reading a register to supersede a read-but-unset version field
in the catalog), leaving this global qseed_type field logically unused.
Remove this dead code to lighten the catalog and bringup-overhead.

Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570109/
Link: https://lore.kernel.org/r/20231201234234.2065610-4-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h  | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h  | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h   | 1 -
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h           | 2 --
 15 files changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
index aa1867943c9fd..4dcc9f804ac12 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps msm8998_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x7,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
index 38ac0c1a134bf..03159d359500f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sdm845_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
index c022e57864a43..4184c18d81f38 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8150_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
index cb0758f0829de..83fb7312bc979 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc8180x_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 94278a3e3483c..885a2bec82584 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8250_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index c0d88ddccb28c..c9d1f1292b3da 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc7180_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x9,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
index 57ce14c18defb..320b8f23364f1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm6115_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_LINE_WIDTH,
 	.max_mixer_blendstages = 0x4,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = 2160,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
index 62db84bd15f24..6c6bc754aeb70 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
@@ -11,7 +11,6 @@
 static const struct dpu_caps sm6350_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x7,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
index 5a3aad364c789..77703f663e259 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
@@ -11,7 +11,6 @@
 static const struct dpu_caps sm6375_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_LINE_WIDTH,
 	.max_mixer_blendstages = 0x4,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = 2160,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index 5aaa24281906e..03181ba891d59 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8350_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 15942fa5a8e06..42bb47181ba1c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc7280_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x7,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = 2400,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
index 1ccd1edd693c5..6da122df0b888 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc8280xp_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 11,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 72b0f547242fe..330d48b1ea028 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8450_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
index 69b80af6566a6..2b5b342ea0b25 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8550_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 62445075306af..1bf2d2d64f2a9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -342,7 +342,6 @@ struct dpu_rotation_cfg {
  * @max_mixer_width    max layer mixer line width support.
  * @max_mixer_blendstages max layer mixer blend stages or
  *                       supported z order
- * @qseed_type         qseed2 or qseed3 support.
  * @has_src_split      source split feature status
  * @has_dim_layer      dim layer feature status
  * @has_idle_pc        indicate if idle power collapse feature is supported
@@ -355,7 +354,6 @@ struct dpu_rotation_cfg {
 struct dpu_caps {
 	u32 max_mixer_width;
 	u32 max_mixer_blendstages;
-	u32 qseed_type;
 	bool has_src_split;
 	bool has_dim_layer;
 	bool has_idle_pc;
-- 
GitLab


From 6876059d7edfcce3d8946f5a49d65ba9b38b1bab Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:28 +0200
Subject: [PATCH 0755/2340] drm/msm/dpu: drop the `id' field from
 DPU_HW_SUBBLK_INFO

The field `id' is not used for subblocks. The handling code usually
knows, which sub-block it is now looking at. Drop the field completely.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570100/
Link: https://lore.kernel.org/r/20231201234234.2065610-5-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    | 76 +++++++++----------
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h    |  2 -
 2 files changed, 36 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 7056c08b9957f..eb9dfdcf610e9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -252,17 +252,15 @@ static const uint32_t wb2_formats[] = {
 #define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min))
 
 /* SSPP common configuration */
-#define _VIG_SBLK(sdma_pri, qseed_ver, scaler_ver) \
+#define _VIG_SBLK(sdma_pri, scaler_ver) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
 	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
-		.id = qseed_ver, \
 		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = "csc", \
-		.id = DPU_SSPP_CSC_10BIT, \
 		.base = 0x1a00, .len = 0x100,}, \
 	.format_list = plane_formats_yuv, \
 	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
@@ -271,17 +269,15 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = NULL, \
 	}
 
-#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, scaler_ver, rot_cfg) \
+#define _VIG_SBLK_ROT(sdma_pri, scaler_ver, rot_cfg) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
 	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
-		.id = qseed_ver, \
 		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = "csc", \
-		.id = DPU_SSPP_CSC_10BIT, \
 		.base = 0x1a00, .len = 0x100,}, \
 	.format_list = plane_formats_yuv, \
 	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
@@ -302,16 +298,16 @@ static const uint32_t wb2_formats[] = {
 	}
 
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(0,
 					  SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(0,
 					  SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(0,
 					  SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(0,
 					  SSPP_SCALER_VER(1, 2));
 
 static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
@@ -321,29 +317,29 @@ static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 };
 
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(5,
 					  SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(6,
 					  SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(7,
 					  SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(8,
 					  SSPP_SCALER_VER(1, 3));
 
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(5,
 					  SSPP_SCALER_VER(1, 4));
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(6,
 					  SSPP_SCALER_VER(1, 4));
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(7,
 					  SSPP_SCALER_VER(1, 4));
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3,
+				_VIG_SBLK(8,
 					  SSPP_SCALER_VER(1, 4));
 
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1);
@@ -352,59 +348,59 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3);
 static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4);
 
 static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 =
-				_VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(4,
 					  SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 =
-			_VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4,
+			_VIG_SBLK_ROT(4,
 				      SSPP_SCALER_VER(3, 0),
 				      &dpu_rot_sc7280_cfg_v2);
 
 static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 =
-				_VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(2,
 					  SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 =
-				_VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE,
+				_VIG_SBLK(3,
 					  SSPP_SCALER_VER(2, 4));
 
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(5,
 					  SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(6,
 					  SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(7,
 					  SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(8,
 					  SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(5,
 					  SSPP_SCALER_VER(3, 1));
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(6,
 					  SSPP_SCALER_VER(3, 1));
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(7,
 					  SSPP_SCALER_VER(3, 1));
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(8,
 					  SSPP_SCALER_VER(3, 1));
 
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(7,
 					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(8,
 					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 =
-				_VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(9,
 					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 =
-				_VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4,
+				_VIG_SBLK(10,
 					  SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5);
 static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6);
@@ -473,12 +469,12 @@ static const struct dpu_lm_sub_blks qcm2290_lm_sblk = {
  * DSPP sub blocks config
  *************************************************************/
 static const struct dpu_dspp_sub_blks msm8998_dspp_sblk = {
-	.pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700,
+	.pcc = {.name = "pcc", .base = 0x1700,
 		.len = 0x90, .version = 0x10007},
 };
 
 static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = {
-	.pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700,
+	.pcc = {.name = "pcc", .base = 0x1700,
 		.len = 0x90, .version = 0x40000},
 };
 
@@ -486,19 +482,19 @@ static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = {
  * PINGPONG sub blocks config
  *************************************************************/
 static const struct dpu_pingpong_sub_blks sdm845_pp_sblk_te = {
-	.te2 = {.name = "te2", .id = DPU_PINGPONG_TE2, .base = 0x2000, .len = 0x0,
+	.te2 = {.name = "te2", .base = 0x2000, .len = 0x0,
 		.version = 0x1},
-	.dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0,
+	.dither = {.name = "dither", .base = 0x30e0,
 		.len = 0x20, .version = 0x10000},
 };
 
 static const struct dpu_pingpong_sub_blks sdm845_pp_sblk = {
-	.dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0,
+	.dither = {.name = "dither", .base = 0x30e0,
 		.len = 0x20, .version = 0x10000},
 };
 
 static const struct dpu_pingpong_sub_blks sc7280_pp_sblk = {
-	.dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0xe0,
+	.dither = {.name = "dither", .base = 0xe0,
 	.len = 0x20, .version = 0x20000},
 };
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 1bf2d2d64f2a9..3cf7d316b4750 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -251,14 +251,12 @@ enum {
 /**
  * MACRO DPU_HW_SUBBLK_INFO - information of HW sub-block inside DPU
  * @name:              string name for debug purposes
- * @id:                enum identifying this sub-block
  * @base:              offset of this sub-block relative to the block
  *                     offset
  * @len                register block length of this sub-block
  */
 #define DPU_HW_SUBBLK_INFO \
 	char name[DPU_HW_BLK_NAME_LEN]; \
-	u32 id; \
 	u32 base; \
 	u32 len
 
-- 
GitLab


From 01fc6c012fad314290af5cc9a8de3fb612ca67c4 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:29 +0200
Subject: [PATCH 0756/2340] drm/msm/dpu: drop the `smart_dma_priority' field
 from struct dpu_sspp_sub_blks

In preparation to deduplicating SSPP subblocks, drop the (unused)
`smart_dma_priority' field from struct dpu_sspp_sub_blks. If it is
needed later (e.g. for SmartDMA v1), it should be added to the SSPP
declarations themselves.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570103/
Link: https://lore.kernel.org/r/20231201234234.2065610-6-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    | 112 +++++++-----------
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h    |   2 -
 2 files changed, 40 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index eb9dfdcf610e9..4179174ee4ee7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -252,11 +252,10 @@ static const uint32_t wb2_formats[] = {
 #define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min))
 
 /* SSPP common configuration */
-#define _VIG_SBLK(sdma_pri, scaler_ver) \
+#define _VIG_SBLK(scaler_ver) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
-	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
 		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
@@ -269,11 +268,10 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = NULL, \
 	}
 
-#define _VIG_SBLK_ROT(sdma_pri, scaler_ver, rot_cfg) \
+#define _VIG_SBLK_ROT(scaler_ver, rot_cfg) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
-	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
 		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
@@ -286,11 +284,10 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = rot_cfg, \
 	}
 
-#define _DMA_SBLK(sdma_pri) \
+#define _DMA_SBLK() \
 	{ \
 	.maxdwnscale = SSPP_UNITY_SCALE, \
 	.maxupscale = SSPP_UNITY_SCALE, \
-	.smart_dma_priority = sdma_pri, \
 	.format_list = plane_formats, \
 	.num_formats = ARRAY_SIZE(plane_formats), \
 	.virt_format_list = plane_formats, \
@@ -298,17 +295,13 @@ static const uint32_t wb2_formats[] = {
 	}
 
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 =
-				_VIG_SBLK(0,
-					  SSPP_SCALER_VER(1, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 =
-				_VIG_SBLK(0,
-					  SSPP_SCALER_VER(1, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 =
-				_VIG_SBLK(0,
-					  SSPP_SCALER_VER(1, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
 static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 =
-				_VIG_SBLK(0,
-					  SSPP_SCALER_VER(1, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
 
 static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 	.rot_maxheight = 1088,
@@ -317,107 +310,82 @@ static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 };
 
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 =
-				_VIG_SBLK(5,
-					  SSPP_SCALER_VER(1, 3));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 =
-				_VIG_SBLK(6,
-					  SSPP_SCALER_VER(1, 3));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 =
-				_VIG_SBLK(7,
-					  SSPP_SCALER_VER(1, 3));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 =
-				_VIG_SBLK(8,
-					  SSPP_SCALER_VER(1, 3));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
 
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 =
-				_VIG_SBLK(5,
-					  SSPP_SCALER_VER(1, 4));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 =
-				_VIG_SBLK(6,
-					  SSPP_SCALER_VER(1, 4));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 =
-				_VIG_SBLK(7,
-					  SSPP_SCALER_VER(1, 4));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
 static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 =
-				_VIG_SBLK(8,
-					  SSPP_SCALER_VER(1, 4));
+				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
 
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1);
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(2);
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3);
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4);
+static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK();
+static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK();
+static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK();
+static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK();
 
 static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 =
-				_VIG_SBLK(4,
-					  SSPP_SCALER_VER(3, 0));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 =
-			_VIG_SBLK_ROT(4,
-				      SSPP_SCALER_VER(3, 0),
+			_VIG_SBLK_ROT(SSPP_SCALER_VER(3, 0),
 				      &dpu_rot_sc7280_cfg_v2);
 
 static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 =
-				_VIG_SBLK(2,
-					  SSPP_SCALER_VER(3, 0));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 =
-				_VIG_SBLK(3,
-					  SSPP_SCALER_VER(2, 4));
+				_VIG_SBLK(SSPP_SCALER_VER(2, 4));
 
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
-				_VIG_SBLK(5,
-					  SSPP_SCALER_VER(3, 0));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
-				_VIG_SBLK(6,
-					  SSPP_SCALER_VER(3, 0));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
-				_VIG_SBLK(7,
-					  SSPP_SCALER_VER(3, 0));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
-				_VIG_SBLK(8,
-					  SSPP_SCALER_VER(3, 0));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 =
-				_VIG_SBLK(5,
-					  SSPP_SCALER_VER(3, 1));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 =
-				_VIG_SBLK(6,
-					  SSPP_SCALER_VER(3, 1));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 =
-				_VIG_SBLK(7,
-					  SSPP_SCALER_VER(3, 1));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
 static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 =
-				_VIG_SBLK(8,
-					  SSPP_SCALER_VER(3, 1));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
 
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 =
-				_VIG_SBLK(7,
-					  SSPP_SCALER_VER(3, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 =
-				_VIG_SBLK(8,
-					  SSPP_SCALER_VER(3, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 =
-				_VIG_SBLK(9,
-					  SSPP_SCALER_VER(3, 2));
+				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
 static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 =
-				_VIG_SBLK(10,
-					  SSPP_SCALER_VER(3, 2));
-static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5);
-static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6);
+				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
+static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK();
+static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK();
 
-#define _VIG_SBLK_NOSCALE(sdma_pri) \
+#define _VIG_SBLK_NOSCALE() \
 	{ \
 	.maxdwnscale = SSPP_UNITY_SCALE, \
 	.maxupscale = SSPP_UNITY_SCALE, \
-	.smart_dma_priority = sdma_pri, \
 	.format_list = plane_formats_yuv, \
 	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
 	.virt_format_list = plane_formats, \
 	.virt_num_formats = ARRAY_SIZE(plane_formats), \
 	}
 
-static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE(2);
-static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK(1);
+static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE();
+static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK();
 
 /*************************************************************
  * MIXER sub blocks config
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 3cf7d316b4750..b2a9b2cf2c051 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -368,7 +368,6 @@ struct dpu_caps {
  * common: Pointer to common configurations shared by sub blocks
  * @maxdwnscale: max downscale ratio supported(without DECIMATION)
  * @maxupscale:  maxupscale ratio supported
- * @smart_dma_priority: hw priority of rect1 of multirect pipe
  * @max_per_pipe_bw: maximum allowable bandwidth of this pipe in kBps
  * @qseed_ver: qseed version
  * @scaler_blk:
@@ -382,7 +381,6 @@ struct dpu_caps {
 struct dpu_sspp_sub_blks {
 	u32 maxdwnscale;
 	u32 maxupscale;
-	u32 smart_dma_priority;
 	u32 max_per_pipe_bw;
 	u32 qseed_ver;
 	struct dpu_scaler_blk scaler_blk;
-- 
GitLab


From 0fd205412e1ee1f60f549269838119969f0883ad Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:30 +0200
Subject: [PATCH 0757/2340] drm/msm/dpu: deduplicate some (most) of SSPP
 sub-blocks

As we have dropped the variadic parts of SSPP sub-blocks declarations,
deduplicate them now, reducing memory cruft.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570112/
Link: https://lore.kernel.org/r/20231201234234.2065610-7-dmitry.baryshkov@linaro.org
---
 .../msm/disp/dpu1/catalog/dpu_3_0_msm8998.h   | 16 +--
 .../msm/disp/dpu1/catalog/dpu_4_0_sdm845.h    | 16 +--
 .../msm/disp/dpu1/catalog/dpu_5_0_sm8150.h    | 16 +--
 .../msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h   | 16 +--
 .../msm/disp/dpu1/catalog/dpu_5_4_sm6125.h    |  6 +-
 .../msm/disp/dpu1/catalog/dpu_6_0_sm8250.h    | 16 +--
 .../msm/disp/dpu1/catalog/dpu_6_2_sc7180.h    |  8 +-
 .../msm/disp/dpu1/catalog/dpu_6_3_sm6115.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_6_4_sm6350.h    |  8 +-
 .../msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h   |  4 +-
 .../msm/disp/dpu1/catalog/dpu_6_9_sm6375.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_7_0_sm8350.h    | 16 +--
 .../msm/disp/dpu1/catalog/dpu_7_2_sc7280.h    |  8 +-
 .../msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h  | 16 +--
 .../msm/disp/dpu1/catalog/dpu_8_1_sm8450.h    | 16 +--
 .../msm/disp/dpu1/catalog/dpu_9_0_sm8550.h    | 20 ++--
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    | 97 +++++--------------
 17 files changed, 120 insertions(+), 167 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
index 4dcc9f804ac12..1d3e9666c7411 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
@@ -69,7 +69,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -93,7 +93,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -101,7 +101,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1ac,
 		.features = DMA_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -109,7 +109,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1ac,
 		.features = DMA_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -117,7 +117,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1ac,
 		.features = DMA_CURSOR_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -125,7 +125,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1ac,
 		.features = DMA_CURSOR_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
index 03159d359500f..7a23389a57327 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
@@ -67,7 +67,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1c8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1c8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1c8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1c8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
index 4184c18d81f38..145f3d5953a30 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
@@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -108,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -116,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -124,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -132,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
index 83fb7312bc979..9e3bec8bc1218 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sm8150_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
index cec7af6667dc9..3969f2925d89b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
@@ -69,7 +69,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SM6125_MASK,
-		.sblk = &sm6125_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_2_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 885a2bec82584..f751d63845d1f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index c9d1f1292b3da..7627f16c5b2d8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -52,7 +52,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sc7180_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -60,7 +60,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -68,7 +68,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
index 320b8f23364f1..7aefdb7eb4948 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
@@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm6115_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -47,7 +47,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
index 6c6bc754aeb70..99df9816f1714 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
@@ -59,7 +59,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sc7180_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -67,7 +67,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
index fb36fba5171cf..3cbb2fe8aba24 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
@@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_QCM2290_MASK,
-		.sblk = &qcm2290_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_noscale,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -47,7 +47,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &qcm2290_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
index 77703f663e259..2de304d97c9b8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
@@ -40,7 +40,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm6115_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -48,7 +48,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index 03181ba891d59..ad6eb28ead49a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -106,7 +106,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -114,7 +114,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -122,7 +122,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -130,7 +130,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 42bb47181ba1c..93564e9e5fa55 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -57,7 +57,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7280_MASK_SDMA,
-		.sblk = &sc7280_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0_rot_v2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -65,7 +65,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -73,7 +73,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -81,7 +81,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
index 6da122df0b888..c2f1acd435241 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x2ac,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x2ac,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x2ac,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x2ac,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x2ac,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x2ac,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x2ac,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x2ac,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 330d48b1ea028..0559ef780661c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8450_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8450_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8450_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x32c,
 		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8450_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x32c,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x32c,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x32c,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x32c,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
index 2b5b342ea0b25..f393d42317af3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -67,70 +67,70 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x344,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x344,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x344,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x344,
 		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_12", .id = SSPP_DMA4,
 		.base = 0x2c000, .len = 0x344,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sm8550_dma_sblk_4,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 14,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_13", .id = SSPP_DMA5,
 		.base = 0x2e000, .len = 0x344,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sm8550_dma_sblk_5,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 15,
 		.type = SSPP_TYPE_DMA,
 	},
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 4179174ee4ee7..b8ea33d0f364f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -284,6 +284,16 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = rot_cfg, \
 	}
 
+#define _VIG_SBLK_NOSCALE() \
+	{ \
+	.maxdwnscale = SSPP_UNITY_SCALE, \
+	.maxupscale = SSPP_UNITY_SCALE, \
+	.format_list = plane_formats_yuv, \
+	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
+	.virt_format_list = plane_formats, \
+	.virt_num_formats = ARRAY_SIZE(plane_formats), \
+	}
+
 #define _DMA_SBLK() \
 	{ \
 	.maxdwnscale = SSPP_UNITY_SCALE, \
@@ -294,98 +304,41 @@ static const uint32_t wb2_formats[] = {
 	.virt_num_formats = ARRAY_SIZE(plane_formats), \
 	}
 
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
-
 static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 	.rot_maxheight = 1088,
 	.rot_num_formats = ARRAY_SIZE(rotation_v2_formats),
 	.rot_format_list = rotation_v2_formats,
 };
 
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 =
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_noscale =
+				_VIG_SBLK_NOSCALE();
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_2 =
+				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_3 =
 				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
 
-static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
-static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
-static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 =
-				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
-static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 =
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_4 =
 				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
 
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK();
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK();
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK();
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK();
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_2_4 =
+				_VIG_SBLK(SSPP_SCALER_VER(2, 4));
 
-static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 =
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0 =
 				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
 
-static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 =
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0_rot_v2 =
 			_VIG_SBLK_ROT(SSPP_SCALER_VER(3, 0),
 				      &dpu_rot_sc7280_cfg_v2);
 
-static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
-
-static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(2, 4));
-
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
-
-static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
-static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
-static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
-static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 =
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_1 =
 				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
 
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 =
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_2 =
 				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 =
-				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
-static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK();
-static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK();
-
-#define _VIG_SBLK_NOSCALE() \
-	{ \
-	.maxdwnscale = SSPP_UNITY_SCALE, \
-	.maxupscale = SSPP_UNITY_SCALE, \
-	.format_list = plane_formats_yuv, \
-	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
-	.virt_format_list = plane_formats, \
-	.virt_num_formats = ARRAY_SIZE(plane_formats), \
-	}
 
-static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE();
-static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK();
+static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK();
 
 /*************************************************************
  * MIXER sub blocks config
-- 
GitLab


From aa83fa5bf6c78f77873954e757a2fd2dd1018c30 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:31 +0200
Subject: [PATCH 0758/2340] drm/msm/dpu: drop DPU_HW_SUBBLK_INFO macro

As the subblock info is now mostly gone, inline and drop the macro
DPU_HW_SUBBLK_INFO.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570106/
Link: https://lore.kernel.org/r/20231201234234.2065610-8-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h    | 40 ++++++++++---------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index b2a9b2cf2c051..f9586ddbafdad 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -248,49 +248,51 @@ enum {
 	u32 len; \
 	unsigned long features
 
-/**
- * MACRO DPU_HW_SUBBLK_INFO - information of HW sub-block inside DPU
- * @name:              string name for debug purposes
- * @base:              offset of this sub-block relative to the block
- *                     offset
- * @len                register block length of this sub-block
- */
-#define DPU_HW_SUBBLK_INFO \
-	char name[DPU_HW_BLK_NAME_LEN]; \
-	u32 base; \
-	u32 len
-
 /**
  * struct dpu_scaler_blk: Scaler information
- * @info:   HW register and features supported by this sub-blk
+ * @name: string name for debug purposes
+ * @base: offset of this sub-block relative to the block offset
+ * @len: register block length of this sub-block
  * @version: qseed block revision, on QSEED3+ platforms this is the value of
  *           scaler_blk.base + QSEED3_HW_VERSION registers.
  */
 struct dpu_scaler_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 	u32 version;
 };
 
 struct dpu_csc_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 };
 
 /**
  * struct dpu_pp_blk : Pixel processing sub-blk information
- * @info:   HW register and features supported by this sub-blk
+ * @name: string name for debug purposes
+ * @base: offset of this sub-block relative to the block offset
+ * @len: register block length of this sub-block
  * @version: HW Algorithm version
  */
 struct dpu_pp_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 	u32 version;
 };
 
 /**
  * struct dpu_dsc_blk - DSC Encoder sub-blk information
- * @info:   HW register and features supported by this sub-blk
+ * @name: string name for debug purposes
+ * @base: offset of this sub-block relative to the block offset
+ * @len: register block length of this sub-block
  */
 struct dpu_dsc_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 };
 
 /**
-- 
GitLab


From 2b98aa1d65581a34919ac159cbdc9c2a1b37a258 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:32 +0200
Subject: [PATCH 0759/2340] drm/msm/dpu: rewrite scaler and CSC presense checks

In order to check whether the SSPP block has scaler and CSC subblocks
the funcion dpu_plane_atomic_check_pipe() uses macros which enumerate
all possible scaler and CSC features. Replace those checks with the
scaler and CSC subblock length checks in order to be able to drop those
two macros.

Suggested-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570113/
Link: https://lore.kernel.org/r/20231201234234.2065610-9-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h | 15 ---------------
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c   |  4 ++--
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
index b094ea23ad325..28e7d53bd1918 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
@@ -21,21 +21,6 @@ struct dpu_hw_sspp;
 #define DPU_SSPP_ROT_90			BIT(3)
 #define DPU_SSPP_SOLID_FILL		BIT(4)
 
-/**
- * Define all scaler feature bits in catalog
- */
-#define DPU_SSPP_SCALER (BIT(DPU_SSPP_SCALER_RGB) | \
-			 BIT(DPU_SSPP_SCALER_QSEED2) | \
-			 BIT(DPU_SSPP_SCALER_QSEED3) | \
-			 BIT(DPU_SSPP_SCALER_QSEED3LITE) | \
-			 BIT(DPU_SSPP_SCALER_QSEED4))
-
-/*
- * Define all CSC feature bits in catalog
- */
-#define DPU_SSPP_CSC_ANY (BIT(DPU_SSPP_CSC) | \
-			  BIT(DPU_SSPP_CSC_10BIT))
-
 /**
  * Component indices
  */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 1e0da38c6f2a5..93365332bdac2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -774,8 +774,8 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu,
 	min_src_size = DPU_FORMAT_IS_YUV(fmt) ? 2 : 1;
 
 	if (DPU_FORMAT_IS_YUV(fmt) &&
-	    (!(pipe->sspp->cap->features & DPU_SSPP_SCALER) ||
-	     !(pipe->sspp->cap->features & DPU_SSPP_CSC_ANY))) {
+	    (!pipe->sspp->cap->sblk->scaler_blk.len ||
+	     !pipe->sspp->cap->sblk->csc_blk.len)) {
 		DPU_DEBUG_PLANE(pdpu,
 				"plane doesn't have scaler/csc for yuv\n");
 		return -EINVAL;
-- 
GitLab


From 193838acc1111a7001c9fc99af0a248f284ea79d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:33 +0200
Subject: [PATCH 0760/2340] drm/msm/dpu: merge DPU_SSPP_SCALER_QSEED3,
 QSEED3LITE, QSEED4

Three different features, DPU_SSPP_SCALER_QSEED3, QSEED3LITE and QSEED4
are all related to different versions of the same HW scaling block.
Corresponding driver parts use scaler_blk.version to identify the
correct way to program the hardware. In order to simplify the driver
codepath, merge these three feature bits into QSEED3_COMPATIBLE bin.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570114/
Link: https://lore.kernel.org/r/20231201234234.2065610-10-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 8 ++++----
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 8 ++------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c    | 9 ++-------
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c      | 3 +--
 4 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index b8ea33d0f364f..c24817b0e4e5d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -22,19 +22,19 @@
 	BIT(DPU_SSPP_CSC_10BIT))
 
 #define VIG_MSM8998_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3))
+	(VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
 
 #define VIG_SDM845_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3))
+	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
 
 #define VIG_SDM845_MASK_SDMA \
 	(VIG_SDM845_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
 
 #define VIG_SC7180_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED4))
+	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
 
 #define VIG_SM6125_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3LITE))
+	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
 
 #define VIG_SC7180_MASK_SDMA \
 	(VIG_SC7180_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index f9586ddbafdad..d6b9000b63b07 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -51,9 +51,7 @@ enum {
 /**
  * SSPP sub-blocks/features
  * @DPU_SSPP_SCALER_QSEED2,  QSEED2 algorithm support
- * @DPU_SSPP_SCALER_QSEED3,  QSEED3 alogorithm support
- * @DPU_SSPP_SCALER_QSEED3LITE,  QSEED3 Lite alogorithm support
- * @DPU_SSPP_SCALER_QSEED4,  QSEED4 algorithm support
+ * @DPU_SSPP_SCALER_QSEED3_COMPATIBLE,  QSEED3-compatible alogorithm support (includes QSEED3, QSEED3LITE and QSEED4)
  * @DPU_SSPP_SCALER_RGB,     RGB Scaler, supported by RGB pipes
  * @DPU_SSPP_CSC,            Support of Color space converion
  * @DPU_SSPP_CSC_10BIT,      Support of 10-bit Color space conversion
@@ -71,9 +69,7 @@ enum {
  */
 enum {
 	DPU_SSPP_SCALER_QSEED2 = 0x1,
-	DPU_SSPP_SCALER_QSEED3,
-	DPU_SSPP_SCALER_QSEED3LITE,
-	DPU_SSPP_SCALER_QSEED4,
+	DPU_SSPP_SCALER_QSEED3_COMPATIBLE,
 	DPU_SSPP_SCALER_RGB,
 	DPU_SSPP_CSC,
 	DPU_SSPP_CSC_10BIT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
index b408d456c123d..26307d9fc81d0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
@@ -605,9 +605,7 @@ static void _setup_layer_ops(struct dpu_hw_sspp *c,
 		test_bit(DPU_SSPP_SMART_DMA_V2, &c->cap->features))
 		c->ops.setup_multirect = dpu_hw_sspp_setup_multirect;
 
-	if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) ||
-			test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) ||
-			test_bit(DPU_SSPP_SCALER_QSEED4, &features))
+	if (test_bit(DPU_SSPP_SCALER_QSEED3_COMPATIBLE, &features))
 		c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3;
 
 	if (test_bit(DPU_SSPP_CDP, &features))
@@ -643,10 +641,7 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms,
 			cfg->len,
 			kms);
 
-	if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED3LITE) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED4))
+	if (sblk->scaler_blk.len)
 		dpu_debugfs_create_regset32("scaler_blk", 0400,
 				debugfs_root,
 				sblk->scaler_blk.base + cfg->base,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 93365332bdac2..4d7f3894f4d7b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -470,8 +470,7 @@ static void _dpu_plane_setup_scaler3(struct dpu_hw_sspp *pipe_hw,
 			scale_cfg->src_height[i] /= chroma_subsmpl_v;
 		}
 
-		if (pipe_hw->cap->features &
-			BIT(DPU_SSPP_SCALER_QSEED4)) {
+		if (pipe_hw->cap->sblk->scaler_blk.version >= 0x3000) {
 			scale_cfg->preload_x[i] = DPU_QSEED4_DEFAULT_PRELOAD_H;
 			scale_cfg->preload_y[i] = DPU_QSEED4_DEFAULT_PRELOAD_V;
 		} else {
-- 
GitLab


From 223fb06fbc26e42f9cdf0ca80fb575916548d74b Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 01:40:34 +0200
Subject: [PATCH 0761/2340] drm/msm/gpu: drop duplicating VIG feature masks

After folding QSEED3LITE and QSEED4 feature bits into QSEED3_COMPATIBLE
several VIG feature masks became equal. Drop these duplicates.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570107/
Link: https://lore.kernel.org/r/20231201234234.2065610-11-dmitry.baryshkov@linaro.org
---
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h    |  2 +-
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h    |  8 ++++----
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h    |  2 +-
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h    |  2 +-
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h    |  2 +-
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h    |  2 +-
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h    |  8 ++++----
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h  |  8 ++++----
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h    |  8 ++++----
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h    |  8 ++++----
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c        | 11 +----------
 11 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
index 3969f2925d89b..76b2ec0d2489b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
@@ -68,7 +68,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
-		.features = VIG_SM6125_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_2_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index f751d63845d1f..9acf07108899b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
@@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
@@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
@@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index 7627f16c5b2d8..783269c6ead1e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -51,7 +51,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
index 7aefdb7eb4948..43f64a005f5a8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
@@ -38,7 +38,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
index 99df9816f1714..e17a30be75253 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
@@ -58,7 +58,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
index 2de304d97c9b8..a06c8634d2d77 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
@@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index ad6eb28ead49a..bed87250e68c1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -73,7 +73,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
@@ -81,7 +81,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
@@ -89,7 +89,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
@@ -97,7 +97,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
index c2f1acd435241..8b5c5031e2d92 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
@@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
@@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
@@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
@@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 0559ef780661c..7a647e1f729d9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x32c,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
@@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x32c,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
@@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x32c,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
@@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x32c,
-		.features = VIG_SC7180_MASK_SDMA,
+		.features = VIG_SDM845_MASK_SDMA,
 		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
index f393d42317af3..bf56265967c0f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -66,28 +66,28 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
+		.features = VIG_SDM845_MASK,
 		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index c24817b0e4e5d..ec10f68cf0b25 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -30,15 +30,6 @@
 #define VIG_SDM845_MASK_SDMA \
 	(VIG_SDM845_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
 
-#define VIG_SC7180_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
-
-#define VIG_SM6125_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
-
-#define VIG_SC7180_MASK_SDMA \
-	(VIG_SC7180_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
-
 #define VIG_QCM2290_MASK (VIG_BASE_MASK | BIT(DPU_SSPP_QOS_8LVL))
 
 #define DMA_MSM8998_MASK \
@@ -47,7 +38,7 @@
 	BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_EXCL_RECT))
 
 #define VIG_SC7280_MASK \
-	(VIG_SC7180_MASK | BIT(DPU_SSPP_INLINE_ROTATION))
+	(VIG_SDM845_MASK | BIT(DPU_SSPP_INLINE_ROTATION))
 
 #define VIG_SC7280_MASK_SDMA \
 	(VIG_SC7280_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
-- 
GitLab


From bf5a806953118e02accfcaa6f56b1c8b488a0396 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 16 Oct 2023 22:18:08 -0400
Subject: [PATCH 0762/2340] dt-bindings: display/msm: dsi-controller-main: add
 SDM670 compatible

The SDM670 has DSI ports. Add the compatible for the controller.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/562958/
Link: https://lore.kernel.org/r/20231017021805.1083350-10-mailingradian@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../devicetree/bindings/display/msm/dsi-controller-main.yaml     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
index c6dbab65d5f70..887c7dcaf438b 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
@@ -25,6 +25,7 @@ properties:
               - qcom,sc7180-dsi-ctrl
               - qcom,sc7280-dsi-ctrl
               - qcom,sdm660-dsi-ctrl
+              - qcom,sdm670-dsi-ctrl
               - qcom,sdm845-dsi-ctrl
               - qcom,sm6115-dsi-ctrl
               - qcom,sm6125-dsi-ctrl
-- 
GitLab


From 0e1af3ec823bdb99007aedffa41e711573378839 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 16 Oct 2023 22:18:09 -0400
Subject: [PATCH 0763/2340] dt-bindings: display/msm: sdm845-dpu: Describe
 SDM670

The SDM670 display controller has the same requirements as the SDM845
display controller, despite having distinct properties as described in
the catalog. Add the compatible for SDM670 to the SDM845 controller.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/562959/
Link: https://lore.kernel.org/r/20231017021805.1083350-11-mailingradian@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml      | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml
index b917064bdf334..dc11fd421a27f 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml
@@ -13,7 +13,9 @@ $ref: /schemas/display/msm/dpu-common.yaml#
 
 properties:
   compatible:
-    const: qcom,sdm845-dpu
+    enum:
+      - qcom,sdm670-dpu
+      - qcom,sdm845-dpu
 
   reg:
     items:
-- 
GitLab


From c965007970ed918203a375e55bca874956c67d68 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 16 Oct 2023 22:18:10 -0400
Subject: [PATCH 0764/2340] dt-bindings: display: msm: Add SDM670 MDSS

Add documentation for the SDM670 display subsystem, adapted from the
SDM845 and SM6125 documentation.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/562961/
Link: https://lore.kernel.org/r/20231017021805.1083350-12-mailingradian@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../display/msm/qcom,sdm670-mdss.yaml         | 292 ++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml
new file mode 100644
index 0000000000000..7dc269322b8e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml
@@ -0,0 +1,292 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/qcom,sdm670-mdss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SDM670 Display MDSS
+
+maintainers:
+  - Richard Acayan <mailingradian@gmail.com>
+
+description:
+  SDM670 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks
+  like DPU display controller, DSI and DP interfaces etc.
+
+$ref: /schemas/display/msm/mdss-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,sdm670-mdss
+
+  clocks:
+    items:
+      - description: Display AHB clock from gcc
+      - description: Display core clock
+
+  clock-names:
+    items:
+      - const: iface
+      - const: core
+
+  iommus:
+    maxItems: 2
+
+  interconnects:
+    maxItems: 2
+
+  interconnect-names:
+    maxItems: 2
+
+patternProperties:
+  "^display-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        const: qcom,sdm670-dpu
+
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        const: qcom,sdm670-dp
+
+  "^dsi@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        contains:
+          const: qcom,sdm670-dsi-ctrl
+
+  "^phy@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        const: qcom,dsi-phy-10nm
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+    #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/interconnect/qcom,sdm670-rpmh.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/qcom-rpmpd.h>
+
+    display-subsystem@ae00000 {
+        compatible = "qcom,sdm670-mdss";
+        reg = <0x0ae00000 0x1000>;
+        reg-names = "mdss";
+        power-domains = <&dispcc MDSS_GDSC>;
+
+        clocks = <&gcc GCC_DISP_AHB_CLK>,
+                 <&dispcc DISP_CC_MDSS_MDP_CLK>;
+        clock-names = "iface", "core";
+
+        interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-controller;
+        #interrupt-cells = <1>;
+
+        interconnects = <&mmss_noc MASTER_MDP_PORT0 0 &mem_noc SLAVE_EBI_CH0 0>,
+                        <&mmss_noc MASTER_MDP_PORT1 0 &mem_noc SLAVE_EBI_CH0 0>;
+        interconnect-names = "mdp0-mem", "mdp1-mem";
+
+        iommus = <&apps_smmu 0x880 0x8>,
+                 <&apps_smmu 0xc80 0x8>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        display-controller@ae01000 {
+            compatible = "qcom,sdm670-dpu";
+            reg = <0x0ae01000 0x8f000>,
+                  <0x0aeb0000 0x2008>;
+            reg-names = "mdp", "vbif";
+
+            clocks = <&gcc GCC_DISP_AXI_CLK>,
+                     <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&dispcc DISP_CC_MDSS_AXI_CLK>,
+                     <&dispcc DISP_CC_MDSS_MDP_CLK>,
+                     <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
+            clock-names = "gcc-bus", "iface", "bus", "core", "vsync";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <0>;
+            power-domains = <&rpmhpd SDM670_CX>;
+            operating-points-v2 = <&mdp_opp_table>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dpu_intf1_out: endpoint {
+                        remote-endpoint = <&mdss_dsi0_in>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dpu_intf2_out: endpoint {
+                        remote-endpoint = <&mdss_dsi1_in>;
+                    };
+                };
+            };
+        };
+
+        dsi@ae94000 {
+            compatible = "qcom,sdm670-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae94000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <4>;
+
+            clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>,
+                     <&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>,
+                     <&dispcc DISP_CC_MDSS_PCLK0_CLK>,
+                     <&dispcc DISP_CC_MDSS_ESC0_CLK>,
+                     <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&dispcc DISP_CC_MDSS_AXI_CLK>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+            assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>,
+                              <&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>;
+            assigned-clock-parents = <&mdss_dsi0_phy 0>, <&mdss_dsi0_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd SDM670_CX>;
+
+            phys = <&mdss_dsi0_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    mdss_dsi0_in: endpoint {
+                        remote-endpoint = <&dpu_intf1_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    mdss_dsi0_out: endpoint {
+                    };
+                };
+            };
+        };
+
+        mdss_dsi0_phy: phy@ae94400 {
+            compatible = "qcom,dsi-phy-10nm";
+            reg = <0x0ae94400 0x200>,
+                  <0x0ae94600 0x280>,
+                  <0x0ae94a00 0x1e0>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&rpmhcc RPMH_CXO_CLK>;
+            clock-names = "iface", "ref";
+            vdds-supply = <&vreg_dsi_phy>;
+        };
+
+        dsi@ae96000 {
+            compatible = "qcom,sdm670-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae96000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <5>;
+
+            clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK>,
+                     <&dispcc DISP_CC_MDSS_BYTE1_INTF_CLK>,
+                     <&dispcc DISP_CC_MDSS_PCLK1_CLK>,
+                     <&dispcc DISP_CC_MDSS_ESC1_CLK>,
+                     <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&dispcc DISP_CC_MDSS_AXI_CLK>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+            assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK_SRC>,
+                              <&dispcc DISP_CC_MDSS_PCLK1_CLK_SRC>;
+            assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd SDM670_CX>;
+
+            phys = <&dsi1_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    mdss_dsi1_in: endpoint {
+                        remote-endpoint = <&dpu_intf2_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    mdss_dsi1_out: endpoint {
+                    };
+                };
+            };
+        };
+
+        mdss_dsi1_phy: phy@ae96400 {
+            compatible = "qcom,dsi-phy-10nm";
+            reg = <0x0ae96400 0x200>,
+                  <0x0ae96600 0x280>,
+                  <0x0ae96a00 0x10e>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&rpmhcc RPMH_CXO_CLK>;
+            clock-names = "iface", "ref";
+            vdds-supply = <&vreg_dsi_phy>;
+        };
+    };
+...
-- 
GitLab


From 3c13a56e435348ffc094ff5a3b3133311673390e Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 16 Oct 2023 22:18:11 -0400
Subject: [PATCH 0765/2340] drm/msm: mdss: add support for SDM670

Add support for the MDSS block on the SDM670 platform.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/562963/
Link: https://lore.kernel.org/r/20231017021805.1083350-13-mailingradian@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/msm_mdss.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 29bb38f0bb2ce..6f390c8505770 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -549,6 +549,12 @@ static const struct msm_mdss_data sc8280xp_data = {
 	.macrotile_mode = 1,
 };
 
+static const struct msm_mdss_data sdm670_data = {
+	.ubwc_enc_version = UBWC_2_0,
+	.ubwc_dec_version = UBWC_2_0,
+	.highest_bank_bit = 1,
+};
+
 static const struct msm_mdss_data sdm845_data = {
 	.ubwc_enc_version = UBWC_2_0,
 	.ubwc_dec_version = UBWC_2_0,
@@ -607,6 +613,7 @@ static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,mdss" },
 	{ .compatible = "qcom,msm8998-mdss", .data = &msm8998_data },
 	{ .compatible = "qcom,qcm2290-mdss", .data = &qcm2290_data },
+	{ .compatible = "qcom,sdm670-mdss", .data = &sdm670_data },
 	{ .compatible = "qcom,sdm845-mdss", .data = &sdm845_data },
 	{ .compatible = "qcom,sc7180-mdss", .data = &sc7180_data },
 	{ .compatible = "qcom,sc7280-mdss", .data = &sc7280_data },
-- 
GitLab


From e140b7e496b73c116454b0a0265bec4cacc9ca40 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 16 Oct 2023 22:18:12 -0400
Subject: [PATCH 0766/2340] drm/msm/dpu: Add hw revision 4.1 (SDM670)

The Snapdragon 670 uses similar clocks (with one frequency added) to the
Snapdragon 845 but reports DPU revision 4.1. Add support for this DPU
with configuration from the Pixel 3a downstream kernel.

Since revision 4.0 is SDM845, reuse some configuration from its catalog
entry.

Link: https://android.googlesource.com/kernel/msm/+/368478b0ae76566927a2769a2bf24dfe7f38bb78/arch/arm64/boot/dts/qcom/sdm670-sde.dtsi
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/562965/
Link: https://lore.kernel.org/r/20231017021805.1083350-14-mailingradian@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../msm/disp/dpu1/catalog/dpu_4_1_sdm670.h    | 104 ++++++++++++++++++
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    |   1 +
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h    |   1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c       |   1 +
 4 files changed, 107 insertions(+)
 create mode 100644 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h
new file mode 100644
index 0000000000000..cbbdaebe357ec
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2023, Richard Acayan. All rights reserved.
+ */
+
+#ifndef _DPU_4_1_SDM670_H
+#define _DPU_4_1_SDM670_H
+
+static const struct dpu_mdp_cfg sdm670_mdp = {
+	.name = "top_0",
+	.base = 0x0, .len = 0x45c,
+	.features = BIT(DPU_MDP_AUDIO_SELECT),
+	.clk_ctrls = {
+		[DPU_CLK_CTRL_VIG0] = { .reg_off = 0x2ac, .bit_off = 0 },
+		[DPU_CLK_CTRL_VIG1] = { .reg_off = 0x2b4, .bit_off = 0 },
+		[DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 },
+		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
+		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
+	},
+};
+
+static const struct dpu_sspp_cfg sdm670_sspp[] = {
+	{
+		.name = "sspp_0", .id = SSPP_VIG0,
+		.base = 0x4000, .len = 0x1c8,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
+		.xin_id = 0,
+		.type = SSPP_TYPE_VIG,
+		.clk_ctrl = DPU_CLK_CTRL_VIG0,
+	}, {
+		.name = "sspp_1", .id = SSPP_VIG1,
+		.base = 0x6000, .len = 0x1c8,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
+		.xin_id = 4,
+		.type = SSPP_TYPE_VIG,
+		.clk_ctrl = DPU_CLK_CTRL_VIG0,
+	}, {
+		.name = "sspp_8", .id = SSPP_DMA0,
+		.base = 0x24000, .len = 0x1c8,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 1,
+		.type = SSPP_TYPE_DMA,
+		.clk_ctrl = DPU_CLK_CTRL_DMA0,
+	}, {
+		.name = "sspp_9", .id = SSPP_DMA1,
+		.base = 0x26000, .len = 0x1c8,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 5,
+		.type = SSPP_TYPE_DMA,
+		.clk_ctrl = DPU_CLK_CTRL_DMA1,
+	}, {
+		.name = "sspp_10", .id = SSPP_DMA2,
+		.base = 0x28000, .len = 0x1c8,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 9,
+		.type = SSPP_TYPE_DMA,
+		.clk_ctrl = DPU_CLK_CTRL_DMA2,
+	},
+};
+
+static const struct dpu_dsc_cfg sdm670_dsc[] = {
+	{
+		.name = "dsc_0", .id = DSC_0,
+		.base = 0x80000, .len = 0x140,
+	}, {
+		.name = "dsc_1", .id = DSC_1,
+		.base = 0x80400, .len = 0x140,
+	},
+};
+
+static const struct dpu_mdss_version sdm670_mdss_ver = {
+	.core_major_ver = 4,
+	.core_minor_ver = 1,
+};
+
+const struct dpu_mdss_cfg dpu_sdm670_cfg = {
+	.mdss_ver = &sdm670_mdss_ver,
+	.caps = &sdm845_dpu_caps,
+	.mdp = &sdm670_mdp,
+	.ctl_count = ARRAY_SIZE(sdm845_ctl),
+	.ctl = sdm845_ctl,
+	.sspp_count = ARRAY_SIZE(sdm670_sspp),
+	.sspp = sdm670_sspp,
+	.mixer_count = ARRAY_SIZE(sdm845_lm),
+	.mixer = sdm845_lm,
+	.pingpong_count = ARRAY_SIZE(sdm845_pp),
+	.pingpong = sdm845_pp,
+	.dsc_count = ARRAY_SIZE(sdm670_dsc),
+	.dsc = sdm670_dsc,
+	.intf_count = ARRAY_SIZE(sdm845_intf),
+	.intf = sdm845_intf,
+	.vbif_count = ARRAY_SIZE(sdm845_vbif),
+	.vbif = sdm845_vbif,
+	.perf = &sdm845_perf_data,
+};
+
+#endif
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index ec10f68cf0b25..21aaa7cb5acdb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -613,6 +613,7 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = {
 #include "catalog/dpu_3_0_msm8998.h"
 
 #include "catalog/dpu_4_0_sdm845.h"
+#include "catalog/dpu_4_1_sdm670.h"
 
 #include "catalog/dpu_5_0_sm8150.h"
 #include "catalog/dpu_5_1_sc8180x.h"
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index d6b9000b63b07..476214905e76b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -820,6 +820,7 @@ struct dpu_mdss_cfg {
 
 extern const struct dpu_mdss_cfg dpu_msm8998_cfg;
 extern const struct dpu_mdss_cfg dpu_sdm845_cfg;
+extern const struct dpu_mdss_cfg dpu_sdm670_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8150_cfg;
 extern const struct dpu_mdss_cfg dpu_sc8180x_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8250_cfg;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 9b2efd7acc872..4426033252cad 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -1334,6 +1334,7 @@ static const struct dev_pm_ops dpu_pm_ops = {
 static const struct of_device_id dpu_dt_match[] = {
 	{ .compatible = "qcom,msm8998-dpu", .data = &dpu_msm8998_cfg, },
 	{ .compatible = "qcom,qcm2290-dpu", .data = &dpu_qcm2290_cfg, },
+	{ .compatible = "qcom,sdm670-dpu", .data = &dpu_sdm670_cfg, },
 	{ .compatible = "qcom,sdm845-dpu", .data = &dpu_sdm845_cfg, },
 	{ .compatible = "qcom,sc7180-dpu", .data = &dpu_sc7180_cfg, },
 	{ .compatible = "qcom,sc7280-dpu", .data = &dpu_sc7280_cfg, },
-- 
GitLab


From 0014a4ad6c084dd3af972d44f2b0d0146b65760d Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:23 +0100
Subject: [PATCH 0767/2340] dt-bindings: display: msm-dsi-phy-7nm: document the
 SM8650 DSI PHY

Document the DSI PHY on the SM8650 Platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564970/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-1-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
index dd6619555a126..7e764eac3ef31 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
@@ -22,6 +22,7 @@ properties:
       - qcom,sm8350-dsi-phy-5nm
       - qcom,sm8450-dsi-phy-5nm
       - qcom,sm8550-dsi-phy-4nm
+      - qcom,sm8650-dsi-phy-4nm
 
   reg:
     items:
-- 
GitLab


From 8adc26fcebaedd6d9a61d7f17793569a90669142 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:24 +0100
Subject: [PATCH 0768/2340] dt-bindings: display: msm-dsi-controller-main:
 document the SM8650 DSI Controller

Document the DSI Controller on the SM8650 Platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564972/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-2-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../devicetree/bindings/display/msm/dsi-controller-main.yaml    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
index 887c7dcaf438b..4219936eda5a1 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
@@ -36,6 +36,7 @@ properties:
               - qcom,sm8350-dsi-ctrl
               - qcom,sm8450-dsi-ctrl
               - qcom,sm8550-dsi-ctrl
+              - qcom,sm8650-dsi-ctrl
           - const: qcom,mdss-dsi-ctrl
       - enum:
           - qcom,dsi-ctrl-6g-qcm2290
@@ -334,6 +335,7 @@ allOf:
               - qcom,sm8350-dsi-ctrl
               - qcom,sm8450-dsi-ctrl
               - qcom,sm8550-dsi-ctrl
+              - qcom,sm8650-dsi-ctrl
     then:
       properties:
         clocks:
-- 
GitLab


From 3e135a7700f9d3a2a606d01934c53315fcedf5ac Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:25 +0100
Subject: [PATCH 0769/2340] dt-bindings: display: msm: document the SM8650 DPU

Document the DPU Display Controller on the SM8650 Platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564974/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-3-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../bindings/display/msm/qcom,sm8650-dpu.yaml | 127 ++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml
new file mode 100644
index 0000000000000..a01d15a033176
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/qcom,sm8650-dpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SM8650 Display DPU
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+$ref: /schemas/display/msm/dpu-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,sm8650-dpu
+
+  reg:
+    items:
+      - description: Address offset and size for mdp register set
+      - description: Address offset and size for vbif register set
+
+  reg-names:
+    items:
+      - const: mdp
+      - const: vbif
+
+  clocks:
+    items:
+      - description: Display hf axi
+      - description: Display MDSS ahb
+      - description: Display lut
+      - description: Display core
+      - description: Display vsync
+
+  clock-names:
+    items:
+      - const: nrt_bus
+      - const: iface
+      - const: lut
+      - const: core
+      - const: vsync
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/qcom,rpmhpd.h>
+
+    display-controller@ae01000 {
+        compatible = "qcom,sm8650-dpu";
+        reg = <0x0ae01000 0x8f000>,
+              <0x0aeb0000 0x2008>;
+        reg-names = "mdp", "vbif";
+
+        clocks = <&gcc_axi_clk>,
+                 <&dispcc_ahb_clk>,
+                 <&dispcc_mdp_lut_clk>,
+                 <&dispcc_mdp_clk>,
+                 <&dispcc_vsync_clk>;
+        clock-names = "nrt_bus",
+                      "iface",
+                      "lut",
+                      "core",
+                      "vsync";
+
+        assigned-clocks = <&dispcc_vsync_clk>;
+        assigned-clock-rates = <19200000>;
+
+        operating-points-v2 = <&mdp_opp_table>;
+        power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+        interrupt-parent = <&mdss>;
+        interrupts = <0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+                dpu_intf1_out: endpoint {
+                    remote-endpoint = <&dsi0_in>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+                dpu_intf2_out: endpoint {
+                    remote-endpoint = <&dsi1_in>;
+                };
+            };
+        };
+
+        mdp_opp_table: opp-table {
+            compatible = "operating-points-v2";
+
+            opp-200000000 {
+                opp-hz = /bits/ 64 <200000000>;
+                required-opps = <&rpmhpd_opp_low_svs>;
+            };
+
+            opp-325000000 {
+                opp-hz = /bits/ 64 <325000000>;
+                required-opps = <&rpmhpd_opp_svs>;
+            };
+
+            opp-375000000 {
+                opp-hz = /bits/ 64 <375000000>;
+                required-opps = <&rpmhpd_opp_svs_l1>;
+            };
+
+            opp-514000000 {
+                opp-hz = /bits/ 64 <514000000>;
+                required-opps = <&rpmhpd_opp_nom>;
+            };
+        };
+    };
+...
-- 
GitLab


From cbcef056fa40e7e00f470e5d0873a906d0170470 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:26 +0100
Subject: [PATCH 0770/2340] dt-bindings: display: msm: document the SM8650
 Mobile Display Subsystem

Document the Mobile Display Subsystem (MDSS) on the SM8650 Platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564982/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-4-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../display/msm/qcom,sm8650-mdss.yaml         | 322 ++++++++++++++++++
 1 file changed, 322 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml

diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
new file mode 100644
index 0000000000000..5638c1ea692ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
@@ -0,0 +1,322 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/qcom,sm8650-mdss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SM8650 Display MDSS
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+description:
+  SM8650 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like
+  DPU display controller, DSI and DP interfaces etc.
+
+$ref: /schemas/display/msm/mdss-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,sm8650-mdss
+
+  clocks:
+    items:
+      - description: Display AHB
+      - description: Display hf AXI
+      - description: Display core
+
+  iommus:
+    maxItems: 1
+
+  interconnects:
+    maxItems: 2
+
+  interconnect-names:
+    maxItems: 2
+
+patternProperties:
+  "^display-controller@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        const: qcom,sm8650-dpu
+
+  "^dsi@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        items:
+          - const: qcom,sm8650-dsi-ctrl
+          - const: qcom,mdss-dsi-ctrl
+
+  "^phy@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        const: qcom,sm8650-dsi-phy-4nm
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/qcom,rpmhpd.h>
+
+    display-subsystem@ae00000 {
+        compatible = "qcom,sm8650-mdss";
+        reg = <0x0ae00000 0x1000>;
+        reg-names = "mdss";
+
+        resets = <&dispcc_core_bcr>;
+
+        power-domains = <&dispcc_gdsc>;
+
+        clocks = <&gcc_ahb_clk>,
+                 <&gcc_axi_clk>,
+                 <&dispcc_mdp_clk>;
+        clock-names = "bus", "nrt_bus", "core";
+
+        interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-controller;
+        #interrupt-cells = <1>;
+
+        iommus = <&apps_smmu 0x1c00 0x2>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        display-controller@ae01000 {
+            compatible = "qcom,sm8650-dpu";
+            reg = <0x0ae01000 0x8f000>,
+                  <0x0aeb0000 0x2008>;
+            reg-names = "mdp", "vbif";
+
+            clocks = <&gcc_axi_clk>,
+                     <&dispcc_ahb_clk>,
+                     <&dispcc_mdp_lut_clk>,
+                     <&dispcc_mdp_clk>,
+                     <&dispcc_mdp_vsync_clk>;
+            clock-names = "nrt_bus",
+                          "iface",
+                          "lut",
+                          "core",
+                          "vsync";
+
+            assigned-clocks = <&dispcc_mdp_vsync_clk>;
+            assigned-clock-rates = <19200000>;
+
+            operating-points-v2 = <&mdp_opp_table>;
+            power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+            interrupt-parent = <&mdss>;
+            interrupts = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dpu_intf1_out: endpoint {
+                        remote-endpoint = <&dsi0_in>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dpu_intf2_out: endpoint {
+                        remote-endpoint = <&dsi1_in>;
+                    };
+                };
+            };
+
+            mdp_opp_table: opp-table {
+                compatible = "operating-points-v2";
+
+                opp-200000000 {
+                    opp-hz = /bits/ 64 <200000000>;
+                    required-opps = <&rpmhpd_opp_low_svs>;
+                };
+
+                opp-325000000 {
+                    opp-hz = /bits/ 64 <325000000>;
+                    required-opps = <&rpmhpd_opp_svs>;
+                };
+
+                opp-375000000 {
+                    opp-hz = /bits/ 64 <375000000>;
+                    required-opps = <&rpmhpd_opp_svs_l1>;
+                };
+
+                opp-514000000 {
+                    opp-hz = /bits/ 64 <514000000>;
+                    required-opps = <&rpmhpd_opp_nom>;
+                };
+            };
+        };
+
+        dsi@ae94000 {
+            compatible = "qcom,sm8650-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae94000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <4>;
+
+            clocks = <&dispc_byte_clk>,
+                     <&dispcc_intf_clk>,
+                     <&dispcc_pclk>,
+                     <&dispcc_esc_clk>,
+                     <&dispcc_ahb_clk>,
+                     <&gcc_bus_clk>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+
+            assigned-clocks = <&dispcc_byte_clk>,
+                              <&dispcc_pclk>;
+            assigned-clock-parents = <&dsi0_phy 0>, <&dsi0_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+            phys = <&dsi0_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dsi0_in: endpoint {
+                        remote-endpoint = <&dpu_intf1_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dsi0_out: endpoint {
+                    };
+                };
+            };
+
+            dsi_opp_table: opp-table {
+                compatible = "operating-points-v2";
+
+                opp-187500000 {
+                    opp-hz = /bits/ 64 <187500000>;
+                    required-opps = <&rpmhpd_opp_low_svs>;
+                };
+
+                opp-300000000 {
+                    opp-hz = /bits/ 64 <300000000>;
+                    required-opps = <&rpmhpd_opp_svs>;
+                };
+
+                opp-358000000 {
+                    opp-hz = /bits/ 64 <358000000>;
+                    required-opps = <&rpmhpd_opp_svs_l1>;
+                };
+            };
+        };
+
+        dsi0_phy: phy@ae94400 {
+            compatible = "qcom,sm8650-dsi-phy-4nm";
+            reg = <0x0ae95000 0x200>,
+                  <0x0ae95200 0x280>,
+                  <0x0ae95500 0x400>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc_iface_clk>,
+                     <&rpmhcc_ref_clk>;
+            clock-names = "iface", "ref";
+        };
+
+        dsi@ae96000 {
+            compatible = "qcom,sm8650-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae96000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <5>;
+
+            clocks = <&dispc_byte_clk>,
+                     <&dispcc_intf_clk>,
+                     <&dispcc_pclk>,
+                     <&dispcc_esc_clk>,
+                     <&dispcc_ahb_clk>,
+                     <&gcc_bus_clk>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+
+            assigned-clocks = <&dispcc_byte_clk>,
+                              <&dispcc_pclk>;
+            assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+            phys = <&dsi1_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dsi1_in: endpoint {
+                        remote-endpoint = <&dpu_intf2_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dsi1_out: endpoint {
+                    };
+                };
+            };
+        };
+
+        dsi1_phy: phy@ae96400 {
+            compatible = "qcom,sm8650-dsi-phy-4nm";
+            reg = <0x0ae97000 0x200>,
+                  <0x0ae97200 0x280>,
+                  <0x0ae97500 0x400>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc_iface_clk>,
+                     <&rpmhcc_ref_clk>;
+            clock-names = "iface", "ref";
+        };
+    };
+...
-- 
GitLab


From b94747f7d8c77e1b261afba8b8cdc9a56e35c9a3 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:27 +0100
Subject: [PATCH 0771/2340] drm/msm/dpu: add support for SM8650 DPU

Add DPU version 10.0 support for the SM8650 platform.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564975/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-5-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../msm/disp/dpu1/catalog/dpu_10_0_sm8650.h   | 457 ++++++++++++++++++
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    |  26 +
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h    |   1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h   |   3 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c       |   1 +
 5 files changed, 488 insertions(+)
 create mode 100644 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
new file mode 100644
index 0000000000000..04d2a73dd942d
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2015-2018, 2020 The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DPU_10_0_SM8650_H
+#define _DPU_10_0_SM8650_H
+
+static const struct dpu_caps sm8650_dpu_caps = {
+	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
+	.max_mixer_blendstages = 0xb,
+	.has_src_split = true,
+	.has_dim_layer = true,
+	.has_idle_pc = true,
+	.has_3d_merge = true,
+	.max_linewidth = 8192,
+	.pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE,
+};
+
+static const struct dpu_mdp_cfg sm8650_mdp = {
+	.name = "top_0",
+	.base = 0, .len = 0x494,
+	.features = BIT(DPU_MDP_PERIPH_0_REMOVED),
+	.clk_ctrls = {
+		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
+	},
+};
+
+/* FIXME: get rid of DPU_CTL_SPLIT_DISPLAY in favour of proper ACTIVE_CTL support */
+static const struct dpu_ctl_cfg sm8650_ctl[] = {
+	{
+		.name = "ctl_0", .id = CTL_0,
+		.base = 0x15000, .len = 0x1000,
+		.features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY),
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9),
+	}, {
+		.name = "ctl_1", .id = CTL_1,
+		.base = 0x16000, .len = 0x1000,
+		.features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY),
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10),
+	}, {
+		.name = "ctl_2", .id = CTL_2,
+		.base = 0x17000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11),
+	}, {
+		.name = "ctl_3", .id = CTL_3,
+		.base = 0x18000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12),
+	}, {
+		.name = "ctl_4", .id = CTL_4,
+		.base = 0x19000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 13),
+	}, {
+		.name = "ctl_5", .id = CTL_5,
+		.base = 0x1a000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 23),
+	},
+};
+
+static const struct dpu_sspp_cfg sm8650_sspp[] = {
+	{
+		.name = "sspp_0", .id = SSPP_VIG0,
+		.base = 0x4000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 0,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_1", .id = SSPP_VIG1,
+		.base = 0x6000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 4,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_2", .id = SSPP_VIG2,
+		.base = 0x8000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 8,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_3", .id = SSPP_VIG3,
+		.base = 0xa000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 12,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_8", .id = SSPP_DMA0,
+		.base = 0x24000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 1,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_9", .id = SSPP_DMA1,
+		.base = 0x26000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 5,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_10", .id = SSPP_DMA2,
+		.base = 0x28000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 9,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_11", .id = SSPP_DMA3,
+		.base = 0x2a000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 13,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_12", .id = SSPP_DMA4,
+		.base = 0x2c000, .len = 0x344,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 14,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_13", .id = SSPP_DMA5,
+		.base = 0x2e000, .len = 0x344,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 15,
+		.type = SSPP_TYPE_DMA,
+	},
+};
+
+static const struct dpu_lm_cfg sm8650_lm[] = {
+	{
+		.name = "lm_0", .id = LM_0,
+		.base = 0x44000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_1,
+		.pingpong = PINGPONG_0,
+		.dspp = DSPP_0,
+	}, {
+		.name = "lm_1", .id = LM_1,
+		.base = 0x45000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_0,
+		.pingpong = PINGPONG_1,
+		.dspp = DSPP_1,
+	}, {
+		.name = "lm_2", .id = LM_2,
+		.base = 0x46000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_3,
+		.pingpong = PINGPONG_2,
+	}, {
+		.name = "lm_3", .id = LM_3,
+		.base = 0x47000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_2,
+		.pingpong = PINGPONG_3,
+	}, {
+		.name = "lm_4", .id = LM_4,
+		.base = 0x48000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_5,
+		.pingpong = PINGPONG_4,
+	}, {
+		.name = "lm_5", .id = LM_5,
+		.base = 0x49000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_4,
+		.pingpong = PINGPONG_5,
+	},
+};
+
+static const struct dpu_dspp_cfg sm8650_dspp[] = {
+	{
+		.name = "dspp_0", .id = DSPP_0,
+		.base = 0x54000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	}, {
+		.name = "dspp_1", .id = DSPP_1,
+		.base = 0x56000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	}, {
+		.name = "dspp_2", .id = DSPP_2,
+		.base = 0x58000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	}, {
+		.name = "dspp_3", .id = DSPP_3,
+		.base = 0x5a000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	},
+};
+
+static const struct dpu_pingpong_cfg sm8650_pp[] = {
+	{
+		.name = "pingpong_0", .id = PINGPONG_0,
+		.base = 0x69000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_0,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
+	}, {
+		.name = "pingpong_1", .id = PINGPONG_1,
+		.base = 0x6a000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_0,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
+	}, {
+		.name = "pingpong_2", .id = PINGPONG_2,
+		.base = 0x6b000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_1,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10),
+	}, {
+		.name = "pingpong_3", .id = PINGPONG_3,
+		.base = 0x6c000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_1,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11),
+	}, {
+		.name = "pingpong_4", .id = PINGPONG_4,
+		.base = 0x6d000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_2,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30),
+	}, {
+		.name = "pingpong_5", .id = PINGPONG_5,
+		.base = 0x6e000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_2,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31),
+	}, {
+		.name = "pingpong_6", .id = PINGPONG_6,
+		.base = 0x66000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_3,
+	}, {
+		.name = "pingpong_7", .id = PINGPONG_7,
+		.base = 0x66400, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_3,
+	}, {
+		.name = "pingpong_8", .id = PINGPONG_8,
+		.base = 0x7e000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_4,
+	}, {
+		.name = "pingpong_9", .id = PINGPONG_9,
+		.base = 0x7e400, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_4,
+	},
+};
+
+static const struct dpu_merge_3d_cfg sm8650_merge_3d[] = {
+	{
+		.name = "merge_3d_0", .id = MERGE_3D_0,
+		.base = 0x4e000, .len = 0x8,
+	}, {
+		.name = "merge_3d_1", .id = MERGE_3D_1,
+		.base = 0x4f000, .len = 0x8,
+	}, {
+		.name = "merge_3d_2", .id = MERGE_3D_2,
+		.base = 0x50000, .len = 0x8,
+	}, {
+		.name = "merge_3d_3", .id = MERGE_3D_3,
+		.base = 0x66700, .len = 0x8,
+	}, {
+		.name = "merge_3d_4", .id = MERGE_3D_4,
+		.base = 0x7e700, .len = 0x8,
+	},
+};
+
+/*
+ * NOTE: Each display compression engine (DCE) contains dual hard
+ * slice DSC encoders so both share same base address but with
+ * its own different sub block address.
+ */
+static const struct dpu_dsc_cfg sm8650_dsc[] = {
+	{
+		.name = "dce_0_0", .id = DSC_0,
+		.base = 0x80000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_0,
+	}, {
+		.name = "dce_0_1", .id = DSC_1,
+		.base = 0x80000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_1,
+	}, {
+		.name = "dce_1_0", .id = DSC_2,
+		.base = 0x81000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_0,
+	}, {
+		.name = "dce_1_1", .id = DSC_3,
+		.base = 0x81000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_1,
+	}, {
+		.name = "dce_2_0", .id = DSC_4,
+		.base = 0x82000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2),
+		.sblk = &dsc_sblk_0,
+	}, {
+		.name = "dce_2_1", .id = DSC_5,
+		.base = 0x82000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2),
+		.sblk = &dsc_sblk_1,
+	},
+};
+
+static const struct dpu_wb_cfg sm8650_wb[] = {
+	{
+		.name = "wb_2", .id = WB_2,
+		.base = 0x65000, .len = 0x2c8,
+		.features = WB_SM8250_MASK,
+		.format_list = wb2_formats,
+		.num_formats = ARRAY_SIZE(wb2_formats),
+		.xin_id = 6,
+		.vbif_idx = VBIF_RT,
+		.maxlinewidth = 4096,
+		.intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4),
+	},
+};
+
+static const struct dpu_intf_cfg sm8650_intf[] = {
+	{
+		.name = "intf_0", .id = INTF_0,
+		.base = 0x34000, .len = 0x280,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DP,
+		.controller_id = MSM_DP_CONTROLLER_0,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25),
+	}, {
+		.name = "intf_1", .id = INTF_1,
+		.base = 0x35000, .len = 0x300,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DSI,
+		.controller_id = MSM_DSI_CONTROLLER_0,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
+		.intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2),
+	}, {
+		.name = "intf_2", .id = INTF_2,
+		.base = 0x36000, .len = 0x300,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DSI,
+		.controller_id = MSM_DSI_CONTROLLER_1,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29),
+		.intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF2_TEAR_INTR, 2),
+	}, {
+		.name = "intf_3", .id = INTF_3,
+		.base = 0x37000, .len = 0x280,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DP,
+		.controller_id = MSM_DP_CONTROLLER_1,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
+	},
+};
+
+static const struct dpu_perf_cfg sm8650_perf_data = {
+	.max_bw_low = 17000000,
+	.max_bw_high = 27000000,
+	.min_core_ib = 2500000,
+	.min_llcc_ib = 0,
+	.min_dram_ib = 800000,
+	.min_prefill_lines = 35,
+	/* FIXME: lut tables */
+	.danger_lut_tbl = {0x3ffff, 0x3ffff, 0x0},
+	.safe_lut_tbl = {0xfe00, 0xfe00, 0xffff},
+	.qos_lut_tbl = {
+		{.nentry = ARRAY_SIZE(sc7180_qos_linear),
+		.entries = sc7180_qos_linear
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_macrotile),
+		.entries = sc7180_qos_macrotile
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_nrt),
+		.entries = sc7180_qos_nrt
+		},
+		/* TODO: macrotile-qseed is different from macrotile */
+	},
+	.cdp_cfg = {
+		{.rd_enable = 1, .wr_enable = 1},
+		{.rd_enable = 1, .wr_enable = 0}
+	},
+	.clk_inefficiency_factor = 105,
+	.bw_inefficiency_factor = 120,
+};
+
+static const struct dpu_mdss_version sm8650_mdss_ver = {
+	.core_major_ver = 10,
+	.core_minor_ver = 0,
+};
+
+const struct dpu_mdss_cfg dpu_sm8650_cfg = {
+	.mdss_ver = &sm8650_mdss_ver,
+	.caps = &sm8650_dpu_caps,
+	.mdp = &sm8650_mdp,
+	.ctl_count = ARRAY_SIZE(sm8650_ctl),
+	.ctl = sm8650_ctl,
+	.sspp_count = ARRAY_SIZE(sm8650_sspp),
+	.sspp = sm8650_sspp,
+	.mixer_count = ARRAY_SIZE(sm8650_lm),
+	.mixer = sm8650_lm,
+	.dspp_count = ARRAY_SIZE(sm8650_dspp),
+	.dspp = sm8650_dspp,
+	.pingpong_count = ARRAY_SIZE(sm8650_pp),
+	.pingpong = sm8650_pp,
+	.dsc_count = ARRAY_SIZE(sm8650_dsc),
+	.dsc = sm8650_dsc,
+	.merge_3d_count = ARRAY_SIZE(sm8650_merge_3d),
+	.merge_3d = sm8650_merge_3d,
+	.wb_count = ARRAY_SIZE(sm8650_wb),
+	.wb = sm8650_wb,
+	.intf_count = ARRAY_SIZE(sm8650_intf),
+	.intf = sm8650_intf,
+	.vbif_count = ARRAY_SIZE(sm8650_vbif),
+	.vbif = sm8650_vbif,
+	.perf = &sm8650_perf_data,
+};
+
+#endif
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 21aaa7cb5acdb..d52aae54bbd59 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -329,6 +329,9 @@ static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_1 =
 static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_2 =
 				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
 
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_3 =
+				_VIG_SBLK(SSPP_SCALER_VER(3, 3));
+
 static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK();
 
 /*************************************************************
@@ -431,6 +434,7 @@ static const u32 msm8998_rt_pri_lvl[] = {1, 2, 2, 2};
 static const u32 msm8998_nrt_pri_lvl[] = {1, 1, 1, 1};
 static const u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6};
 static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3};
+static const u32 sm8650_rt_pri_lvl[] = {4, 4, 5, 5, 5, 5, 5, 6};
 
 static const struct dpu_vbif_dynamic_ot_cfg msm8998_ot_rdwr_cfg[] = {
 	{
@@ -517,6 +521,26 @@ static const struct dpu_vbif_cfg sm8550_vbif[] = {
 	},
 };
 
+static const struct dpu_vbif_cfg sm8650_vbif[] = {
+	{
+	.name = "vbif_rt", .id = VBIF_RT,
+	.base = 0, .len = 0x1074,
+	.features = BIT(DPU_VBIF_QOS_REMAP),
+	.xin_halt_timeout = 0x4000,
+	.qos_rp_remap_size = 0x40,
+	.qos_rt_tbl = {
+		.npriority_lvl = ARRAY_SIZE(sm8650_rt_pri_lvl),
+		.priority_lvl = sm8650_rt_pri_lvl,
+		},
+	.qos_nrt_tbl = {
+		.npriority_lvl = ARRAY_SIZE(sdm845_nrt_pri_lvl),
+		.priority_lvl = sdm845_nrt_pri_lvl,
+		},
+	.memtype_count = 16,
+	.memtype = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
+	},
+};
+
 /*************************************************************
  * PERF data config
  *************************************************************/
@@ -633,3 +657,5 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = {
 #include "catalog/dpu_8_1_sm8450.h"
 
 #include "catalog/dpu_9_0_sm8550.h"
+
+#include "catalog/dpu_10_0_sm8650.h"
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 476214905e76b..e3c0d007481bc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -835,5 +835,6 @@ extern const struct dpu_mdss_cfg dpu_sc7280_cfg;
 extern const struct dpu_mdss_cfg dpu_sc8280xp_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8450_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8550_cfg;
+extern const struct dpu_mdss_cfg dpu_sm8650_cfg;
 
 #endif /* _DPU_HW_CATALOG_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index d85157acfbf8f..a6702b2bfc686 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -195,6 +195,8 @@ enum dpu_pingpong {
 	PINGPONG_5,
 	PINGPONG_6,
 	PINGPONG_7,
+	PINGPONG_8,
+	PINGPONG_9,
 	PINGPONG_S0,
 	PINGPONG_MAX
 };
@@ -204,6 +206,7 @@ enum dpu_merge_3d {
 	MERGE_3D_1,
 	MERGE_3D_2,
 	MERGE_3D_3,
+	MERGE_3D_4,
 	MERGE_3D_MAX
 };
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 4426033252cad..c1cb500adc076 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -1349,6 +1349,7 @@ static const struct of_device_id dpu_dt_match[] = {
 	{ .compatible = "qcom,sm8350-dpu", .data = &dpu_sm8350_cfg, },
 	{ .compatible = "qcom,sm8450-dpu", .data = &dpu_sm8450_cfg, },
 	{ .compatible = "qcom,sm8550-dpu", .data = &dpu_sm8550_cfg, },
+	{ .compatible = "qcom,sm8650-dpu", .data = &dpu_sm8650_cfg, },
 	{}
 };
 MODULE_DEVICE_TABLE(of, dpu_dt_match);
-- 
GitLab


From e6488c2a354103b6a3212f2fffa854e235d8a80e Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:28 +0100
Subject: [PATCH 0772/2340] drm/msm: mdss: add support for SM8650

Add Mobile Display Subsystem (MDSS) support for the SM8650 platform.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564980/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-6-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/msm_mdss.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 6f390c8505770..16b567a39b8bd 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -628,6 +628,7 @@ static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,sm8350-mdss", .data = &sm8250_data },
 	{ .compatible = "qcom,sm8450-mdss", .data = &sm8250_data },
 	{ .compatible = "qcom,sm8550-mdss", .data = &sm8550_data },
+	{ .compatible = "qcom,sm8650-mdss", .data = &sm8550_data},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mdss_dt_match);
-- 
GitLab


From 3a73e376cff31c6cbc99f0e44068db3a769684d8 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:29 +0100
Subject: [PATCH 0773/2340] drm/msm: dsi: add support for DSI-PHY on SM8650

Add DSI PHY support for the SM8650 platform.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564976/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-7-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c     |  2 ++
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.h     |  1 +
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 27 +++++++++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index e49ebd9f6326f..24a347fe29984 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -587,6 +587,8 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 	  .data = &dsi_phy_5nm_8450_cfgs },
 	{ .compatible = "qcom,sm8550-dsi-phy-4nm",
 	  .data = &dsi_phy_4nm_8550_cfgs },
+	{ .compatible = "qcom,sm8650-dsi-phy-4nm",
+	  .data = &dsi_phy_4nm_8650_cfgs },
 #endif
 	{}
 };
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 8b640d1747851..e4275d3ad5819 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -62,6 +62,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_7nm_7280_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8350_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8450_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs;
 
 struct msm_dsi_dphy_timing {
 	u32 clk_zero;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
index 3b1ed02f644d2..c66193f2dc0d4 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
@@ -1121,6 +1121,10 @@ static const struct regulator_bulk_data dsi_phy_7nm_37750uA_regulators[] = {
 	{ .supply = "vdds", .init_load_uA = 37550 },
 };
 
+static const struct regulator_bulk_data dsi_phy_7nm_98000uA_regulators[] = {
+	{ .supply = "vdds", .init_load_uA = 98000 },
+};
+
 static const struct regulator_bulk_data dsi_phy_7nm_97800uA_regulators[] = {
 	{ .supply = "vdds", .init_load_uA = 97800 },
 };
@@ -1281,3 +1285,26 @@ const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs = {
 	.num_dsi_phy = 2,
 	.quirks = DSI_PHY_7NM_QUIRK_V5_2,
 };
+
+const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs = {
+	.has_phy_lane = true,
+	.regulator_data = dsi_phy_7nm_98000uA_regulators,
+	.num_regulators = ARRAY_SIZE(dsi_phy_7nm_98000uA_regulators),
+	.ops = {
+		.enable = dsi_7nm_phy_enable,
+		.disable = dsi_7nm_phy_disable,
+		.pll_init = dsi_pll_7nm_init,
+		.save_pll_state = dsi_7nm_pll_save_state,
+		.restore_pll_state = dsi_7nm_pll_restore_state,
+		.set_continuous_clock = dsi_7nm_set_continuous_clock,
+	},
+	.min_pll_rate = 600000000UL,
+#ifdef CONFIG_64BIT
+	.max_pll_rate = 5000000000UL,
+#else
+	.max_pll_rate = ULONG_MAX,
+#endif
+	.io_start = { 0xae95000, 0xae97000 },
+	.num_dsi_phy = 2,
+	.quirks = DSI_PHY_7NM_QUIRK_V5_2,
+};
-- 
GitLab


From fec254cc752d0449714c026f021d3043abda15e5 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 30 Oct 2023 11:36:30 +0100
Subject: [PATCH 0774/2340] drm/msm: dsi: add support for DSI 2.8.0

Add DSI Controller version 2.8.0 support for the SM8650 platform.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/564977/
Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-8-43f1887c82b8@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dsi/dsi_cfg.c | 17 +++++++++++++++++
 drivers/gpu/drm/msm/dsi/dsi_cfg.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 1f98ff74ceb09..10ba7d153d1cf 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -190,6 +190,21 @@ static const struct msm_dsi_config sm8550_dsi_cfg = {
 	},
 };
 
+static const struct regulator_bulk_data sm8650_dsi_regulators[] = {
+	{ .supply = "vdda", .init_load_uA = 16600 },	/* 1.2 V */
+};
+
+static const struct msm_dsi_config sm8650_dsi_cfg = {
+	.io_offset = DSI_6G_REG_SHIFT,
+	.regulator_data = sm8650_dsi_regulators,
+	.num_regulators = ARRAY_SIZE(sm8650_dsi_regulators),
+	.bus_clk_names = dsi_v2_4_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_v2_4_clk_names),
+	.io_start = {
+		{ 0xae94000, 0xae96000 },
+	},
+};
+
 static const struct regulator_bulk_data sc7280_dsi_regulators[] = {
 	{ .supply = "vdda", .init_load_uA = 8350 },	/* 1.2 V */
 	{ .supply = "refgen" },
@@ -281,6 +296,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
 		&sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_7_0,
 		&sm8550_dsi_cfg, &msm_dsi_6g_v2_host_ops},
+	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_8_0,
+		&sm8650_dsi_cfg, &msm_dsi_6g_v2_host_ops},
 };
 
 const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor)
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index 43f0dd74edb65..4c9b4b37681b0 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -28,6 +28,7 @@
 #define MSM_DSI_6G_VER_MINOR_V2_5_0	0x20050000
 #define MSM_DSI_6G_VER_MINOR_V2_6_0	0x20060000
 #define MSM_DSI_6G_VER_MINOR_V2_7_0	0x20070000
+#define MSM_DSI_6G_VER_MINOR_V2_8_0	0x20080000
 
 #define MSM_DSI_V2_VER_MINOR_8064	0x0
 
-- 
GitLab


From ded61d7dc5a0f8cfe7390aba33187c862d09b177 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 01:42:44 +0300
Subject: [PATCH 0775/2340] drm/msm/mdss: switch mdss to use devm_of_icc_get()

Stop using hand-written reset function for ICC release, use
devm_of_icc_get() instead.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570161/
Link: https://lore.kernel.org/r/20231202224247.1282567-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/msm_mdss.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 16b567a39b8bd..92a290b36959e 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -50,14 +50,14 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 	struct icc_path *path0;
 	struct icc_path *path1;
 
-	path0 = of_icc_get(dev, "mdp0-mem");
+	path0 = devm_of_icc_get(dev, "mdp0-mem");
 	if (IS_ERR_OR_NULL(path0))
 		return PTR_ERR_OR_ZERO(path0);
 
 	msm_mdss->path[0] = path0;
 	msm_mdss->num_paths = 1;
 
-	path1 = of_icc_get(dev, "mdp1-mem");
+	path1 = devm_of_icc_get(dev, "mdp1-mem");
 	if (!IS_ERR_OR_NULL(path1)) {
 		msm_mdss->path[1] = path1;
 		msm_mdss->num_paths++;
@@ -66,15 +66,6 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 	return 0;
 }
 
-static void msm_mdss_put_icc_path(void *data)
-{
-	struct msm_mdss *msm_mdss = data;
-	int i;
-
-	for (i = 0; i < msm_mdss->num_paths; i++)
-		icc_put(msm_mdss->path[i]);
-}
-
 static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
 {
 	int i;
@@ -391,9 +382,6 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
 	dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio);
 
 	ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss);
-	if (ret)
-		return ERR_PTR(ret);
-	ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss);
 	if (ret)
 		return ERR_PTR(ret);
 
-- 
GitLab


From fabaf176322d687b91a4acf1630c0d0a7d097faa Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Sun, 3 Dec 2023 01:42:45 +0300
Subject: [PATCH 0776/2340] drm/msm/mdss: Rename path references to mdp_path

The DPU1 driver needs to handle all MDPn<->DDR paths, as well as
CPU<->SLAVE_DISPLAY_CFG. The former ones share how their values are
calculated, but the latter one has static predefines spanning all SoCs.

In preparation for supporting the CPU<->SLAVE_DISPLAY_CFG path, rename
the path-related struct members to include "mdp_".

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570163/
Link: https://lore.kernel.org/r/20231202224247.1282567-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/msm_mdss.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 92a290b36959e..a1a077e2bac8e 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -40,8 +40,8 @@ struct msm_mdss {
 		struct irq_domain *domain;
 	} irq_controller;
 	const struct msm_mdss_data *mdss_data;
-	struct icc_path *path[2];
-	u32 num_paths;
+	struct icc_path *mdp_path[2];
+	u32 num_mdp_paths;
 };
 
 static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
@@ -54,13 +54,13 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 	if (IS_ERR_OR_NULL(path0))
 		return PTR_ERR_OR_ZERO(path0);
 
-	msm_mdss->path[0] = path0;
-	msm_mdss->num_paths = 1;
+	msm_mdss->mdp_path[0] = path0;
+	msm_mdss->num_mdp_paths = 1;
 
 	path1 = devm_of_icc_get(dev, "mdp1-mem");
 	if (!IS_ERR_OR_NULL(path1)) {
-		msm_mdss->path[1] = path1;
-		msm_mdss->num_paths++;
+		msm_mdss->mdp_path[1] = path1;
+		msm_mdss->num_mdp_paths++;
 	}
 
 	return 0;
@@ -70,8 +70,8 @@ static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
 {
 	int i;
 
-	for (i = 0; i < msm_mdss->num_paths; i++)
-		icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw));
+	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
+		icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(bw));
 }
 
 static void msm_mdss_irq(struct irq_desc *desc)
-- 
GitLab


From 7323694e118a24ab954d3a16fe59a68b311cb462 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 01:42:46 +0300
Subject: [PATCH 0777/2340] drm/msm/mdss: inline msm_mdss_icc_request_bw()

There are just two places where we set the bandwidth: in the resume and
in the suspend paths. Drop the wrapping function
msm_mdss_icc_request_bw() and call icc_set_bw() directly.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570168/
Link: https://lore.kernel.org/r/20231202224247.1282567-4-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/msm_mdss.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index a1a077e2bac8e..776579ac69d82 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -66,14 +66,6 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 	return 0;
 }
 
-static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
-{
-	int i;
-
-	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
-		icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(bw));
-}
-
 static void msm_mdss_irq(struct irq_desc *desc)
 {
 	struct msm_mdss *msm_mdss = irq_desc_get_handler_data(desc);
@@ -227,14 +219,15 @@ const struct msm_mdss_data *msm_mdss_get_mdss_data(struct device *dev)
 
 static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 {
-	int ret;
+	int ret, i;
 
 	/*
 	 * Several components have AXI clocks that can only be turned on if
 	 * the interconnect is enabled (non-zero bandwidth). Let's make sure
 	 * that the interconnects are at least at a minimum amount.
 	 */
-	msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW);
+	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
+		icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(MIN_IB_BW));
 
 	ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks);
 	if (ret) {
@@ -286,8 +279,12 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 
 static int msm_mdss_disable(struct msm_mdss *msm_mdss)
 {
+	int i;
+
 	clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks);
-	msm_mdss_icc_request_bw(msm_mdss, 0);
+
+	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
+		icc_set_bw(msm_mdss->mdp_path[i], 0, 0);
 
 	return 0;
 }
-- 
GitLab


From a55c8ff252d374acb6f78b979cadc38073ce95e8 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 01:42:47 +0300
Subject: [PATCH 0778/2340] drm/msm/mdss: Handle the reg bus ICC path

Apart from the already handled data bus (MAS_MDP_Pn<->DDR), there's
another path that needs to be handled to ensure MDSS functions properly,
namely the "reg bus", a.k.a the CPU-MDSS interconnect.

Gating that path may have a variety of effects, from none to otherwise
inexplicable DSI timeouts.

Provide a way for MDSS driver to vote on this bus.

A note regarding vote values. Newer platforms have corresponding
bandwidth values in the vendor DT files. For the older platforms there
was a static vote in the mdss_mdp and rotator drivers. I choose to be
conservative here and choose this value as a default.

Co-developed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570164/
Link: https://lore.kernel.org/r/20231202224247.1282567-5-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/msm_mdss.c | 49 +++++++++++++++++++++++++++++++---
 drivers/gpu/drm/msm/msm_mdss.h |  1 +
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 776579ac69d82..455b2e3a0cdd4 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -28,6 +28,8 @@
 
 #define MIN_IB_BW	400000000UL /* Min ib vote 400MB */
 
+#define DEFAULT_REG_BW	153600 /* Used in mdss fbdev driver */
+
 struct msm_mdss {
 	struct device *dev;
 
@@ -42,6 +44,7 @@ struct msm_mdss {
 	const struct msm_mdss_data *mdss_data;
 	struct icc_path *mdp_path[2];
 	u32 num_mdp_paths;
+	struct icc_path *reg_bus_path;
 };
 
 static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
@@ -49,6 +52,7 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 {
 	struct icc_path *path0;
 	struct icc_path *path1;
+	struct icc_path *reg_bus_path;
 
 	path0 = devm_of_icc_get(dev, "mdp0-mem");
 	if (IS_ERR_OR_NULL(path0))
@@ -63,6 +67,10 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 		msm_mdss->num_mdp_paths++;
 	}
 
+	reg_bus_path = of_icc_get(dev, "cpu-cfg");
+	if (!IS_ERR_OR_NULL(reg_bus_path))
+		msm_mdss->reg_bus_path = reg_bus_path;
+
 	return 0;
 }
 
@@ -229,6 +237,13 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
 		icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(MIN_IB_BW));
 
+	if (msm_mdss->mdss_data && msm_mdss->mdss_data->reg_bus_bw)
+		icc_set_bw(msm_mdss->reg_bus_path, 0,
+			   msm_mdss->mdss_data->reg_bus_bw);
+	else
+		icc_set_bw(msm_mdss->reg_bus_path, 0,
+			   DEFAULT_REG_BW);
+
 	ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks);
 	if (ret) {
 		dev_err(msm_mdss->dev, "clock enable failed, ret:%d\n", ret);
@@ -286,6 +301,9 @@ static int msm_mdss_disable(struct msm_mdss *msm_mdss)
 	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
 		icc_set_bw(msm_mdss->mdp_path[i], 0, 0);
 
+	if (msm_mdss->reg_bus_path)
+		icc_set_bw(msm_mdss->reg_bus_path, 0, 0);
+
 	return 0;
 }
 
@@ -372,6 +390,8 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
 	if (!msm_mdss)
 		return ERR_PTR(-ENOMEM);
 
+	msm_mdss->mdss_data = of_device_get_match_data(&pdev->dev);
+
 	msm_mdss->mmio = devm_platform_ioremap_resource_byname(pdev, is_mdp5 ? "mdss_phys" : "mdss");
 	if (IS_ERR(msm_mdss->mmio))
 		return ERR_CAST(msm_mdss->mmio);
@@ -462,8 +482,6 @@ static int mdss_probe(struct platform_device *pdev)
 	if (IS_ERR(mdss))
 		return PTR_ERR(mdss);
 
-	mdss->mdss_data = of_device_get_match_data(&pdev->dev);
-
 	platform_set_drvdata(pdev, mdss);
 
 	/*
@@ -495,11 +513,13 @@ static const struct msm_mdss_data msm8998_data = {
 	.ubwc_enc_version = UBWC_1_0,
 	.ubwc_dec_version = UBWC_1_0,
 	.highest_bank_bit = 2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data qcm2290_data = {
 	/* no UBWC */
 	.highest_bank_bit = 0x2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sc7180_data = {
@@ -507,6 +527,7 @@ static const struct msm_mdss_data sc7180_data = {
 	.ubwc_dec_version = UBWC_2_0,
 	.ubwc_static = 0x1e,
 	.highest_bank_bit = 0x3,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sc7280_data = {
@@ -516,6 +537,7 @@ static const struct msm_mdss_data sc7280_data = {
 	.ubwc_static = 1,
 	.highest_bank_bit = 1,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 74000,
 };
 
 static const struct msm_mdss_data sc8180x_data = {
@@ -523,6 +545,7 @@ static const struct msm_mdss_data sc8180x_data = {
 	.ubwc_dec_version = UBWC_3_0,
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sc8280xp_data = {
@@ -532,6 +555,7 @@ static const struct msm_mdss_data sc8280xp_data = {
 	.ubwc_static = 1,
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sdm670_data = {
@@ -544,6 +568,7 @@ static const struct msm_mdss_data sdm845_data = {
 	.ubwc_enc_version = UBWC_2_0,
 	.ubwc_dec_version = UBWC_2_0,
 	.highest_bank_bit = 2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm6350_data = {
@@ -552,12 +577,14 @@ static const struct msm_mdss_data sm6350_data = {
 	.ubwc_swizzle = 6,
 	.ubwc_static = 0x1e,
 	.highest_bank_bit = 1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm8150_data = {
 	.ubwc_enc_version = UBWC_3_0,
 	.ubwc_dec_version = UBWC_3_0,
 	.highest_bank_bit = 2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm6115_data = {
@@ -566,6 +593,7 @@ static const struct msm_mdss_data sm6115_data = {
 	.ubwc_swizzle = 7,
 	.ubwc_static = 0x11f,
 	.highest_bank_bit = 0x1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm6125_data = {
@@ -583,6 +611,18 @@ static const struct msm_mdss_data sm8250_data = {
 	/* TODO: highest_bank_bit = 2 for LP_DDR4 */
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 76800,
+};
+
+static const struct msm_mdss_data sm8350_data = {
+	.ubwc_enc_version = UBWC_4_0,
+	.ubwc_dec_version = UBWC_4_0,
+	.ubwc_swizzle = 6,
+	.ubwc_static = 1,
+	/* TODO: highest_bank_bit = 2 for LP_DDR4 */
+	.highest_bank_bit = 3,
+	.macrotile_mode = 1,
+	.reg_bus_bw = 74000,
 };
 
 static const struct msm_mdss_data sm8550_data = {
@@ -593,6 +633,7 @@ static const struct msm_mdss_data sm8550_data = {
 	/* TODO: highest_bank_bit = 2 for LP_DDR4 */
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 57000,
 };
 static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,mdss" },
@@ -610,8 +651,8 @@ static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,sm6375-mdss", .data = &sm6350_data },
 	{ .compatible = "qcom,sm8150-mdss", .data = &sm8150_data },
 	{ .compatible = "qcom,sm8250-mdss", .data = &sm8250_data },
-	{ .compatible = "qcom,sm8350-mdss", .data = &sm8250_data },
-	{ .compatible = "qcom,sm8450-mdss", .data = &sm8250_data },
+	{ .compatible = "qcom,sm8350-mdss", .data = &sm8350_data },
+	{ .compatible = "qcom,sm8450-mdss", .data = &sm8350_data },
 	{ .compatible = "qcom,sm8550-mdss", .data = &sm8550_data },
 	{ .compatible = "qcom,sm8650-mdss", .data = &sm8550_data},
 	{}
diff --git a/drivers/gpu/drm/msm/msm_mdss.h b/drivers/gpu/drm/msm/msm_mdss.h
index 02bbab42adbc0..3afef4b1786d2 100644
--- a/drivers/gpu/drm/msm/msm_mdss.h
+++ b/drivers/gpu/drm/msm/msm_mdss.h
@@ -14,6 +14,7 @@ struct msm_mdss_data {
 	u32 ubwc_static;
 	u32 highest_bank_bit;
 	u32 macrotile_mode;
+	u32 reg_bus_bw;
 };
 
 #define UBWC_1_0 0x10000000
-- 
GitLab


From e759f2ca29d918d3db57a61cdf838025beb03465 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 29 Nov 2023 23:08:00 +0100
Subject: [PATCH 0779/2340] drm/gpuvm: fall back to drm_exec_lock_obj()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fall back to drm_exec_lock_obj() if num_fences is zero for the
drm_gpuvm_prepare_* function family.

Otherwise dma_resv_reserve_fences() would actually allocate slots even
though num_fences is zero.

Cc: Christian König <christian.koenig@amd.com>
Acked-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129220835.297885-2-dakr@redhat.com
---
 drivers/gpu/drm/drm_gpuvm.c | 43 ++++++++++++++++++++++++++++++++-----
 include/drm/drm_gpuvm.h     | 23 +++-----------------
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 54f5e8851de5d..07a6676bc4f9b 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1085,6 +1085,37 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_put);
 
+static int
+exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj,
+		 unsigned int num_fences)
+{
+	return num_fences ? drm_exec_prepare_obj(exec, obj, num_fences) :
+			    drm_exec_lock_obj(exec, obj);
+}
+
+/**
+ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
+ * @gpuvm: the &drm_gpuvm
+ * @exec: the &drm_exec context
+ * @num_fences: the amount of &dma_fences to reserve
+ *
+ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object; if
+ * @num_fences is zero drm_exec_lock_obj() is called instead.
+ *
+ * Using this function directly, it is the drivers responsibility to call
+ * drm_exec_init() and drm_exec_fini() accordingly.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int
+drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
+		     struct drm_exec *exec,
+		     unsigned int num_fences)
+{
+	return exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
+}
+EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_vm);
+
 static int
 __drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
 			    struct drm_exec *exec,
@@ -1095,7 +1126,7 @@ __drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
 	int ret = 0;
 
 	for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) {
-		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
+		ret = exec_prepare_obj(exec, vm_bo->obj, num_fences);
 		if (ret)
 			break;
 	}
@@ -1116,7 +1147,7 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm,
 
 	drm_gpuvm_resv_assert_held(gpuvm);
 	list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) {
-		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
+		ret = exec_prepare_obj(exec, vm_bo->obj, num_fences);
 		if (ret)
 			break;
 
@@ -1134,7 +1165,8 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm,
  * @num_fences: the amount of &dma_fences to reserve
  *
  * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given
- * &drm_gpuvm contains mappings of.
+ * &drm_gpuvm contains mappings of; if @num_fences is zero drm_exec_lock_obj()
+ * is called instead.
  *
  * Using this function directly, it is the drivers responsibility to call
  * drm_exec_init() and drm_exec_fini() accordingly.
@@ -1171,7 +1203,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects);
  * @num_fences: the amount of &dma_fences to reserve
  *
  * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr
- * and @addr + @range.
+ * and @addr + @range; if @num_fences is zero drm_exec_lock_obj() is called
+ * instead.
  *
  * Returns: 0 on success, negative error code on failure.
  */
@@ -1186,7 +1219,7 @@ drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec,
 	drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) {
 		struct drm_gem_object *obj = va->gem.obj;
 
-		ret = drm_exec_prepare_obj(exec, obj, num_fences);
+		ret = exec_prepare_obj(exec, obj, num_fences);
 		if (ret)
 			return ret;
 	}
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index f94fec9a8517b..b3f82ec7fb17d 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -544,26 +544,9 @@ struct drm_gpuvm_exec {
 	} extra;
 };
 
-/**
- * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
- * @gpuvm: the &drm_gpuvm
- * @exec: the &drm_exec context
- * @num_fences: the amount of &dma_fences to reserve
- *
- * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object.
- *
- * Using this function directly, it is the drivers responsibility to call
- * drm_exec_init() and drm_exec_fini() accordingly.
- *
- * Returns: 0 on success, negative error code on failure.
- */
-static inline int
-drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
-		     struct drm_exec *exec,
-		     unsigned int num_fences)
-{
-	return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
-}
+int drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
+			 struct drm_exec *exec,
+			 unsigned int num_fences);
 
 int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
 			      struct drm_exec *exec,
-- 
GitLab


From 4bc736f890cec126246a1d65d3b556763670a8d4 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 29 Nov 2023 23:08:01 +0100
Subject: [PATCH 0780/2340] drm/imagination: vm: make use of GPUVM's drm_exec
 helper

Make use of GPUVM's drm_exec helper functions preventing direct access
to GPUVM internal data structures, such as the external object list.

This is especially important to ensure following the locking rules
around the GPUVM external object list.

Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Reviewed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231129220835.297885-3-dakr@redhat.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 91 +++++++++++-----------------
 1 file changed, 36 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 6f4e07effad23..f42345fbe4bf9 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -327,48 +327,6 @@ err_bind_op_fini:
 	return err;
 }
 
-static int
-pvr_vm_bind_op_lock_resvs(struct drm_exec *exec, struct pvr_vm_bind_op *bind_op)
-{
-	drm_exec_until_all_locked(exec) {
-		struct drm_gem_object *r_obj = &bind_op->vm_ctx->dummy_gem;
-		struct drm_gpuvm *gpuvm = &bind_op->vm_ctx->gpuvm_mgr;
-		struct pvr_gem_object *pvr_obj = bind_op->pvr_obj;
-		struct drm_gpuvm_bo *gpuvm_bo;
-
-		/* Acquire lock on the vm_context's reserve object. */
-		int err = drm_exec_lock_obj(exec, r_obj);
-
-		drm_exec_retry_on_contention(exec);
-		if (err)
-			return err;
-
-		/* Acquire lock on all BOs in the context. */
-		list_for_each_entry(gpuvm_bo, &gpuvm->extobj.list,
-				    list.entry.extobj) {
-			err = drm_exec_lock_obj(exec, gpuvm_bo->obj);
-
-			drm_exec_retry_on_contention(exec);
-			if (err)
-				return err;
-		}
-
-		/* Unmap operations don't have an object to lock. */
-		if (!pvr_obj)
-			break;
-
-		/* Acquire lock on the GEM being mapped. */
-		err = drm_exec_lock_obj(exec,
-					gem_from_pvr_gem(bind_op->pvr_obj));
-
-		drm_exec_retry_on_contention(exec);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 /**
  * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
  * @op: gpuva op containing the remap details.
@@ -725,6 +683,20 @@ void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
 	}
 }
 
+static int
+pvr_vm_lock_extra(struct drm_gpuvm_exec *vm_exec)
+{
+	struct pvr_vm_bind_op *bind_op = vm_exec->extra.priv;
+	struct pvr_gem_object *pvr_obj = bind_op->pvr_obj;
+
+	/* Unmap operations don't have an object to lock. */
+	if (!pvr_obj)
+		return 0;
+
+	/* Acquire lock on the GEM being mapped. */
+	return drm_exec_lock_obj(&vm_exec->exec, gem_from_pvr_gem(pvr_obj));
+}
+
 /**
  * pvr_vm_map() - Map a section of physical memory into a section of
  * device-virtual memory.
@@ -752,7 +724,15 @@ pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj,
 	   u64 pvr_obj_offset, u64 device_addr, u64 size)
 {
 	struct pvr_vm_bind_op bind_op = {0};
-	struct drm_exec exec;
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm_ctx->gpuvm_mgr,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT |
+			 DRM_EXEC_IGNORE_DUPLICATES,
+		.extra = {
+			.fn = pvr_vm_lock_extra,
+			.priv = &bind_op,
+		},
+	};
 
 	int err = pvr_vm_bind_op_map_init(&bind_op, vm_ctx, pvr_obj,
 					  pvr_obj_offset, device_addr,
@@ -761,18 +741,15 @@ pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj,
 	if (err)
 		return err;
 
-	drm_exec_init(&exec,
-		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
-
 	pvr_gem_object_get(pvr_obj);
 
-	err = pvr_vm_bind_op_lock_resvs(&exec, &bind_op);
+	err = drm_gpuvm_exec_lock(&vm_exec);
 	if (err)
 		goto err_cleanup;
 
 	err = pvr_vm_bind_op_exec(&bind_op);
 
-	drm_exec_fini(&exec);
+	drm_gpuvm_exec_unlock(&vm_exec);
 
 err_cleanup:
 	pvr_vm_bind_op_fini(&bind_op);
@@ -798,24 +775,28 @@ int
 pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
 {
 	struct pvr_vm_bind_op bind_op = {0};
-	struct drm_exec exec;
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm_ctx->gpuvm_mgr,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT |
+			 DRM_EXEC_IGNORE_DUPLICATES,
+		.extra = {
+			.fn = pvr_vm_lock_extra,
+			.priv = &bind_op,
+		},
+	};
 
 	int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr,
 					    size);
-
 	if (err)
 		return err;
 
-	drm_exec_init(&exec,
-		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
-
-	err = pvr_vm_bind_op_lock_resvs(&exec, &bind_op);
+	err = drm_gpuvm_exec_lock(&vm_exec);
 	if (err)
 		goto err_cleanup;
 
 	err = pvr_vm_bind_op_exec(&bind_op);
 
-	drm_exec_fini(&exec);
+	drm_gpuvm_exec_unlock(&vm_exec);
 
 err_cleanup:
 	pvr_vm_bind_op_fini(&bind_op);
-- 
GitLab


From 4777dded21717c31d2d8471bccaf7c0ff58feaa4 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 27 Nov 2023 07:15:43 +0200
Subject: [PATCH 0781/2340] dt-bindings: display: simple: Add boe,bp101wx1-100
 panel

This panel is found on Motorola mapphone tablets mz615 to mz617.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231127051547.15023-1-tony@atomide.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127051547.15023-1-tony@atomide.com
---
 .../devicetree/bindings/display/panel/panel-simple.yaml         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
index 3ec9ee95045fb..f00275e505316 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -73,6 +73,8 @@ properties:
       - auo,t215hvn01
         # Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel
       - avic,tm070ddh03
+        # BOE BP101WX1-100 10.1" WXGA (1280x800) LVDS panel
+      - boe,bp101wx1-100
         # BOE EV121WXM-N10-1850 12.1" WXGA (1280x800) TFT LCD panel
       - boe,ev121wxm-n10-1850
         # BOE HV070WSA-100 7.01" WSVGA TFT LCD panel
-- 
GitLab


From eeaddab4c14beb02157db5ca8f9e074066759bfd Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 27 Nov 2023 07:15:44 +0200
Subject: [PATCH 0782/2340] drm/panel: simple: Add BOE BP101WX1-100 panel

This panel is found on Motorola mapphone tablets from mz615 to mz617.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231127051547.15023-2-tony@atomide.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127051547.15023-2-tony@atomide.com
---
 drivers/gpu/drm/panel/panel-simple.c | 32 ++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 9367a4572dcf6..876e1b40cfb3b 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1324,6 +1324,35 @@ static const struct panel_desc bananapi_s070wv20_ct16 = {
 	},
 };
 
+static const struct drm_display_mode boe_bp101wx1_100_mode = {
+	.clock = 78945,
+	.hdisplay = 1280,
+	.hsync_start = 1280 + 0,
+	.hsync_end = 1280 + 0 + 2,
+	.htotal = 1280 + 62 + 0 + 2,
+	.vdisplay = 800,
+	.vsync_start = 800 + 8,
+	.vsync_end = 800 + 8 + 2,
+	.vtotal = 800 + 6 + 8 + 2,
+};
+
+static const struct panel_desc boe_bp101wx1_100 = {
+	.modes = &boe_bp101wx1_100_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 217,
+		.height = 136,
+	},
+	.delay = {
+		.enable = 50,
+		.disable = 50,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct display_timing boe_ev121wxm_n10_1850_timing = {
 	.pixelclock = { 69922000, 71000000, 72293000 },
 	.hactive = { 1280, 1280, 1280 },
@@ -4253,6 +4282,9 @@ static const struct of_device_id platform_of_match[] = {
 	}, {
 		.compatible = "bananapi,s070wv20-ct16",
 		.data = &bananapi_s070wv20_ct16,
+	}, {
+		.compatible = "boe,bp101wx1-100",
+		.data = &boe_bp101wx1_100,
 	}, {
 		.compatible = "boe,ev121wxm-n10-1850",
 		.data = &boe_ev121wxm_n10_1850,
-- 
GitLab


From 8c2c5d1d33f0725b7995f44f87a81311d13a441d Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Mon, 4 Dec 2023 12:57:10 -0600
Subject: [PATCH 0783/2340] drm/panel: himax-hx8394: Drop prepare/unprepare
 tracking

Drop the panel specific prepare/unprepare logic. This is now tracked
by the DRM stack [1].

[1] commit d2aacaf07395 ("drm/panel: Check for already prepared/enabled in
drm_panel")

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231204185719.569021-2-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204185719.569021-2-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-himax-hx8394.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-himax-hx8394.c b/drivers/gpu/drm/panel/panel-himax-hx8394.c
index c73243d85de71..3823ff388b96b 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx8394.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx8394.c
@@ -68,7 +68,6 @@ struct hx8394 {
 	struct gpio_desc *reset_gpio;
 	struct regulator *vcc;
 	struct regulator *iovcc;
-	bool prepared;
 
 	const struct hx8394_panel_desc *desc;
 };
@@ -262,16 +261,11 @@ static int hx8394_unprepare(struct drm_panel *panel)
 {
 	struct hx8394 *ctx = panel_to_hx8394(panel);
 
-	if (!ctx->prepared)
-		return 0;
-
 	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
 
 	regulator_disable(ctx->iovcc);
 	regulator_disable(ctx->vcc);
 
-	ctx->prepared = false;
-
 	return 0;
 }
 
@@ -280,9 +274,6 @@ static int hx8394_prepare(struct drm_panel *panel)
 	struct hx8394 *ctx = panel_to_hx8394(panel);
 	int ret;
 
-	if (ctx->prepared)
-		return 0;
-
 	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
 
 	ret = regulator_enable(ctx->vcc);
@@ -301,8 +292,6 @@ static int hx8394_prepare(struct drm_panel *panel)
 
 	msleep(180);
 
-	ctx->prepared = true;
-
 	return 0;
 
 disable_vcc:
-- 
GitLab


From e4f53a4d921eba6187a2599cf184a3beeb604fe2 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Mon, 4 Dec 2023 12:57:11 -0600
Subject: [PATCH 0784/2340] drm/panel: himax-hx8394: Drop shutdown logic

The driver shutdown is duplicate as it calls drm_unprepare and
drm_disable which are called anyway when associated drivers are
shutdown/removed.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231204185719.569021-3-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204185719.569021-3-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-himax-hx8394.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-himax-hx8394.c b/drivers/gpu/drm/panel/panel-himax-hx8394.c
index 3823ff388b96b..d8e590d5e1da4 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx8394.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx8394.c
@@ -390,27 +390,11 @@ static int hx8394_probe(struct mipi_dsi_device *dsi)
 	return 0;
 }
 
-static void hx8394_shutdown(struct mipi_dsi_device *dsi)
-{
-	struct hx8394 *ctx = mipi_dsi_get_drvdata(dsi);
-	int ret;
-
-	ret = drm_panel_disable(&ctx->panel);
-	if (ret < 0)
-		dev_err(&dsi->dev, "Failed to disable panel: %d\n", ret);
-
-	ret = drm_panel_unprepare(&ctx->panel);
-	if (ret < 0)
-		dev_err(&dsi->dev, "Failed to unprepare panel: %d\n", ret);
-}
-
 static void hx8394_remove(struct mipi_dsi_device *dsi)
 {
 	struct hx8394 *ctx = mipi_dsi_get_drvdata(dsi);
 	int ret;
 
-	hx8394_shutdown(dsi);
-
 	ret = mipi_dsi_detach(dsi);
 	if (ret < 0)
 		dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret);
@@ -427,7 +411,6 @@ MODULE_DEVICE_TABLE(of, hx8394_of_match);
 static struct mipi_dsi_driver hx8394_driver = {
 	.probe	= hx8394_probe,
 	.remove = hx8394_remove,
-	.shutdown = hx8394_shutdown,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = hx8394_of_match,
-- 
GitLab


From be478bc7ab08127473ce9ed893378cc2a8762611 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Mon, 4 Dec 2023 12:57:12 -0600
Subject: [PATCH 0785/2340] dt-bindings: display: Document Himax HX8394 panel
 rotation

Document panel rotation for Himax HX8394 display panel.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231204185719.569021-4-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204185719.569021-4-macroalpha82@gmail.com
---
 .../devicetree/bindings/display/panel/himax,hx8394.yaml         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml b/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
index ffb35288ffbb4..3096debca55c4 100644
--- a/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
+++ b/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
@@ -31,6 +31,8 @@ properties:
 
   backlight: true
 
+  rotation: true
+
   port: true
 
   vcc-supply:
-- 
GitLab


From a695a5009c8fd239a98d98209489997ff5397d2b Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Mon, 4 Dec 2023 12:57:13 -0600
Subject: [PATCH 0786/2340] drm/panel: himax-hx8394: Add Panel Rotation Support

Add support for setting the rotation property for the Himax HX8394
panel.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231204185719.569021-5-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204185719.569021-5-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-himax-hx8394.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-himax-hx8394.c b/drivers/gpu/drm/panel/panel-himax-hx8394.c
index d8e590d5e1da4..b68ea09f47252 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx8394.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx8394.c
@@ -68,6 +68,7 @@ struct hx8394 {
 	struct gpio_desc *reset_gpio;
 	struct regulator *vcc;
 	struct regulator *iovcc;
+	enum drm_panel_orientation orientation;
 
 	const struct hx8394_panel_desc *desc;
 };
@@ -324,12 +325,20 @@ static int hx8394_get_modes(struct drm_panel *panel,
 	return 1;
 }
 
+static enum drm_panel_orientation hx8394_get_orientation(struct drm_panel *panel)
+{
+	struct hx8394 *ctx = panel_to_hx8394(panel);
+
+	return ctx->orientation;
+}
+
 static const struct drm_panel_funcs hx8394_drm_funcs = {
 	.disable   = hx8394_disable,
 	.unprepare = hx8394_unprepare,
 	.prepare   = hx8394_prepare,
 	.enable	   = hx8394_enable,
 	.get_modes = hx8394_get_modes,
+	.get_orientation = hx8394_get_orientation,
 };
 
 static int hx8394_probe(struct mipi_dsi_device *dsi)
@@ -347,6 +356,12 @@ static int hx8394_probe(struct mipi_dsi_device *dsi)
 		return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio),
 				     "Failed to get reset gpio\n");
 
+	ret = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation);
+	if (ret < 0) {
+		dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, ret);
+		return ret;
+	}
+
 	mipi_dsi_set_drvdata(dsi, ctx);
 
 	ctx->dev = dev;
-- 
GitLab


From 00830a0d8f0d820335e7beb26e251069d90f2574 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Mon, 4 Dec 2023 12:57:14 -0600
Subject: [PATCH 0787/2340] dt-bindings: display: himax-hx8394: Add Powkiddy
 X55 panel

Add compatible string for the Powkiddy X55 panel.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231204185719.569021-6-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204185719.569021-6-macroalpha82@gmail.com
---
 .../devicetree/bindings/display/panel/himax,hx8394.yaml          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml b/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
index 3096debca55c4..916bb7f942062 100644
--- a/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
+++ b/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
@@ -23,6 +23,7 @@ properties:
     items:
       - enum:
           - hannstar,hsd060bhw4
+          - powkiddy,x55-panel
       - const: himax,hx8394
 
   reg: true
-- 
GitLab


From 38db985966d2f0f89f7e1891253489a16936fc5e Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Mon, 4 Dec 2023 12:57:15 -0600
Subject: [PATCH 0788/2340] drm/panel: himax-hx8394: Add Support for Powkiddy
 X55 panel

Add support for the Powkiddy X55 panel as used on the Powkiddy X55
handheld gaming console. This panel uses a Himax HX8394 display
controller and requires a vendor provided init sequence. The display
resolution is 720x1280 and is 67mm by 121mm as measured with calipers.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231204185719.569021-7-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204185719.569021-7-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-himax-hx8394.c | 137 +++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-himax-hx8394.c b/drivers/gpu/drm/panel/panel-himax-hx8394.c
index b68ea09f47252..ff0dc08b98297 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx8394.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx8394.c
@@ -38,6 +38,7 @@
 #define HX8394_CMD_SETMIPI	  0xba
 #define HX8394_CMD_SETOTP	  0xbb
 #define HX8394_CMD_SETREGBANK	  0xbd
+#define HX8394_CMD_UNKNOWN5	  0xbf
 #define HX8394_CMD_UNKNOWN1	  0xc0
 #define HX8394_CMD_SETDGCLUT	  0xc1
 #define HX8394_CMD_SETID	  0xc3
@@ -52,6 +53,7 @@
 #define HX8394_CMD_SETGIP1	  0xd5
 #define HX8394_CMD_SETGIP2	  0xd6
 #define HX8394_CMD_SETGPO	  0xd6
+#define HX8394_CMD_UNKNOWN4	  0xd8
 #define HX8394_CMD_SETSCALING	  0xdd
 #define HX8394_CMD_SETIDLE	  0xdf
 #define HX8394_CMD_SETGAMMA	  0xe0
@@ -203,6 +205,140 @@ static const struct hx8394_panel_desc hsd060bhw4_desc = {
 	.init_sequence = hsd060bhw4_init_sequence,
 };
 
+static int powkiddy_x55_init_sequence(struct hx8394 *ctx)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+
+	/* 5.19.8 SETEXTC: Set extension command (B9h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETEXTC,
+			       0xff, 0x83, 0x94);
+
+	/* 5.19.9 SETMIPI: Set MIPI control (BAh) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETMIPI,
+			       0x63, 0x03, 0x68, 0x6b, 0xb2, 0xc0);
+
+	/* 5.19.2 SETPOWER: Set power (B1h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETPOWER,
+			       0x48, 0x12, 0x72, 0x09, 0x32, 0x54, 0x71, 0x71, 0x57, 0x47);
+
+	/* 5.19.3 SETDISP: Set display related register (B2h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETDISP,
+			       0x00, 0x80, 0x64, 0x2c, 0x16, 0x2f);
+
+	/* 5.19.4 SETCYC: Set display waveform cycles (B4h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETCYC,
+			       0x73, 0x74, 0x73, 0x74, 0x73, 0x74, 0x01, 0x0c, 0x86, 0x75,
+			       0x00, 0x3f, 0x73, 0x74, 0x73, 0x74, 0x73, 0x74, 0x01, 0x0c,
+			       0x86);
+
+	/* 5.19.5 SETVCOM: Set VCOM voltage (B6h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETVCOM,
+			       0x6e, 0x6e);
+
+	/* 5.19.19 SETGIP0: Set GIP Option0 (D3h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGIP0,
+			       0x00, 0x00, 0x07, 0x07, 0x40, 0x07, 0x0c, 0x00, 0x08, 0x10,
+			       0x08, 0x00, 0x08, 0x54, 0x15, 0x0a, 0x05, 0x0a, 0x02, 0x15,
+			       0x06, 0x05, 0x06, 0x47, 0x44, 0x0a, 0x0a, 0x4b, 0x10, 0x07,
+			       0x07, 0x0c, 0x40);
+
+	/* 5.19.20 Set GIP Option1 (D5h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGIP1,
+			       0x1c, 0x1c, 0x1d, 0x1d, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+			       0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x24, 0x25, 0x18, 0x18,
+			       0x26, 0x27, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+			       0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x20, 0x21,
+			       0x18, 0x18, 0x18, 0x18);
+
+	/* 5.19.21 Set GIP Option2 (D6h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGIP2,
+			       0x1c, 0x1c, 0x1d, 0x1d, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
+			       0x01, 0x00, 0x0b, 0x0a, 0x09, 0x08, 0x21, 0x20, 0x18, 0x18,
+			       0x27, 0x26, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+			       0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x25, 0x24,
+			       0x18, 0x18, 0x18, 0x18);
+
+	/* 5.19.25 SETGAMMA: Set gamma curve related setting (E0h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETGAMMA,
+			       0x00, 0x0a, 0x15, 0x1b, 0x1e, 0x21, 0x24, 0x22, 0x47, 0x56,
+			       0x65, 0x66, 0x6e, 0x82, 0x88, 0x8b, 0x9a, 0x9d, 0x98, 0xa8,
+			       0xb9, 0x5d, 0x5c, 0x61, 0x66, 0x6a, 0x6f, 0x7f, 0x7f, 0x00,
+			       0x0a, 0x15, 0x1b, 0x1e, 0x21, 0x24, 0x22, 0x47, 0x56, 0x65,
+			       0x65, 0x6e, 0x81, 0x87, 0x8b, 0x98, 0x9d, 0x99, 0xa8, 0xba,
+			       0x5d, 0x5d, 0x62, 0x67, 0x6b, 0x72, 0x7f, 0x7f);
+
+	/* Unknown command, not listed in the HX8394-F datasheet */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN1,
+			       0x1f, 0x31);
+
+	/* 5.19.17 SETPANEL (CCh) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETPANEL,
+			       0x0b);
+
+	/* Unknown command, not listed in the HX8394-F datasheet */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN3,
+			       0x02);
+
+	/* 5.19.11 Set register bank (BDh) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK,
+			       0x02);
+
+	/* Unknown command, not listed in the HX8394-F datasheet */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN4,
+			       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			       0xff, 0xff);
+
+	/* 5.19.11 Set register bank (BDh) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK,
+			       0x00);
+
+	/* 5.19.11 Set register bank (BDh) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK,
+			       0x01);
+
+	/* 5.19.2 SETPOWER: Set power (B1h) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETPOWER,
+			       0x00);
+
+	/* 5.19.11 Set register bank (BDh) */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_SETREGBANK,
+			       0x00);
+
+	/* Unknown command, not listed in the HX8394-F datasheet */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN5,
+			       0x40, 0x81, 0x50, 0x00, 0x1a, 0xfc, 0x01);
+
+	/* Unknown command, not listed in the HX8394-F datasheet */
+	mipi_dsi_dcs_write_seq(dsi, HX8394_CMD_UNKNOWN2,
+			       0xed);
+
+	return 0;
+}
+
+static const struct drm_display_mode powkiddy_x55_mode = {
+	.hdisplay	= 720,
+	.hsync_start	= 720 + 44,
+	.hsync_end	= 720 + 44 + 20,
+	.htotal		= 720 + 44 + 20 + 20,
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 12,
+	.vsync_end	= 1280 + 12 + 10,
+	.vtotal		= 1280 + 12 + 10 + 10,
+	.clock		= 63290,
+	.flags		= DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	.width_mm	= 67,
+	.height_mm	= 121,
+};
+
+static const struct hx8394_panel_desc powkiddy_x55_desc = {
+	.mode = &powkiddy_x55_mode,
+	.lanes = 4,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		      MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET,
+	.format = MIPI_DSI_FMT_RGB888,
+	.init_sequence = powkiddy_x55_init_sequence,
+};
+
 static int hx8394_enable(struct drm_panel *panel)
 {
 	struct hx8394 *ctx = panel_to_hx8394(panel);
@@ -419,6 +555,7 @@ static void hx8394_remove(struct mipi_dsi_device *dsi)
 
 static const struct of_device_id hx8394_of_match[] = {
 	{ .compatible = "hannstar,hsd060bhw4", .data = &hsd060bhw4_desc },
+	{ .compatible = "powkiddy,x55-panel", .data = &powkiddy_x55_desc },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, hx8394_of_match);
-- 
GitLab


From 68c193c8d4a403222ce51c8b08bd1715f8b74274 Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Thu, 23 Nov 2023 18:08:04 +0100
Subject: [PATCH 0789/2340] drm/panel: ilitek-ili9881c: make use of
 prepare_prev_first

The panel.prepare() call requires an initialized MIPI-DSI host, so set
the prepare_prev_first flag to indicate that the host must be
initialized first.

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231123-drm-panel-ili9881c-am8001280g-v1-1-fdf4d624c211@pengutronix.de
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123-drm-panel-ili9881c-am8001280g-v1-1-fdf4d624c211@pengutronix.de
---
 drivers/gpu/drm/panel/panel-ilitek-ili9881c.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
index 7838947a1bf3c..0c911ed9141b2 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
@@ -1094,6 +1094,8 @@ static int ili9881c_dsi_probe(struct mipi_dsi_device *dsi)
 		return ret;
 	}
 
+	ctx->panel.prepare_prev_first = true;
+
 	ret = drm_panel_of_backlight(&ctx->panel);
 	if (ret)
 		return ret;
-- 
GitLab


From 7ff02f82c3e9ddd5dd81957c8659d350261196ae Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 23 Nov 2023 18:08:05 +0100
Subject: [PATCH 0790/2340] dt-bindings: ili9881c: Add Ampire AM8001280G LCD
 panel

Document the compatible value for Ampire AM8001280G LCD panels.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20231123-drm-panel-ili9881c-am8001280g-v1-2-fdf4d624c211@pengutronix.de
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123-drm-panel-ili9881c-am8001280g-v1-2-fdf4d624c211@pengutronix.de
---
 .../devicetree/bindings/display/panel/ilitek,ili9881c.yaml       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml
index e7ab6224b52e0..b1e624be3e334 100644
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml
@@ -16,6 +16,7 @@ properties:
   compatible:
     items:
       - enum:
+          - ampire,am8001280g
           - bananapi,lhr050h41
           - feixin,k101-im2byl02
           - tdo,tl050hdv35
-- 
GitLab


From 2748848ceaf32671927c3b19672ba3104a1dba7e Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 23 Nov 2023 18:08:06 +0100
Subject: [PATCH 0791/2340] drm/panel: ilitek-ili9881c: Add Ampire AM8001280G
 LCD panel

Add support for Ampire AM8001280G LCD panels.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231123-drm-panel-ili9881c-am8001280g-v1-3-fdf4d624c211@pengutronix.de
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123-drm-panel-ili9881c-am8001280g-v1-3-fdf4d624c211@pengutronix.de
---
 drivers/gpu/drm/panel/panel-ilitek-ili9881c.c | 223 ++++++++++++++++++
 1 file changed, 223 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
index 0c911ed9141b2..2ffe5f68a8903 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c
@@ -830,6 +830,203 @@ static const struct ili9881c_instr w552946ab_init[] = {
 	ILI9881C_SWITCH_PAGE_INSTR(0),
 };
 
+static const struct ili9881c_instr am8001280g_init[] = {
+	ILI9881C_SWITCH_PAGE_INSTR(3),
+	ILI9881C_COMMAND_INSTR(0x01, 0x00),
+	ILI9881C_COMMAND_INSTR(0x02, 0x00),
+	ILI9881C_COMMAND_INSTR(0x03, 0x73),
+	ILI9881C_COMMAND_INSTR(0x04, 0xD3),
+	ILI9881C_COMMAND_INSTR(0x05, 0x00),
+	ILI9881C_COMMAND_INSTR(0x06, 0x0A),
+	ILI9881C_COMMAND_INSTR(0x07, 0x0E),
+	ILI9881C_COMMAND_INSTR(0x08, 0x00),
+	ILI9881C_COMMAND_INSTR(0x09, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0a, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0b, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0c, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0d, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0e, 0x01),
+	ILI9881C_COMMAND_INSTR(0x0f, 0x01),
+	ILI9881C_COMMAND_INSTR(0x10, 0x01),
+	ILI9881C_COMMAND_INSTR(0x11, 0x00),
+	ILI9881C_COMMAND_INSTR(0x12, 0x00),
+	ILI9881C_COMMAND_INSTR(0x13, 0x00),
+	ILI9881C_COMMAND_INSTR(0x14, 0x00),
+	ILI9881C_COMMAND_INSTR(0x15, 0x00),
+	ILI9881C_COMMAND_INSTR(0x16, 0x00),
+	ILI9881C_COMMAND_INSTR(0x17, 0x00),
+	ILI9881C_COMMAND_INSTR(0x18, 0x00),
+	ILI9881C_COMMAND_INSTR(0x19, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1a, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1b, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1c, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x1e, 0x40),
+	ILI9881C_COMMAND_INSTR(0x1f, 0x80),
+	ILI9881C_COMMAND_INSTR(0x20, 0x06),
+	ILI9881C_COMMAND_INSTR(0x21, 0x01),
+	ILI9881C_COMMAND_INSTR(0x22, 0x00),
+	ILI9881C_COMMAND_INSTR(0x23, 0x00),
+	ILI9881C_COMMAND_INSTR(0x24, 0x00),
+	ILI9881C_COMMAND_INSTR(0x25, 0x00),
+	ILI9881C_COMMAND_INSTR(0x26, 0x00),
+	ILI9881C_COMMAND_INSTR(0x27, 0x00),
+	ILI9881C_COMMAND_INSTR(0x28, 0x33),
+	ILI9881C_COMMAND_INSTR(0x29, 0x03),
+	ILI9881C_COMMAND_INSTR(0x2a, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2b, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2c, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2e, 0x00),
+	ILI9881C_COMMAND_INSTR(0x2f, 0x00),
+	ILI9881C_COMMAND_INSTR(0x30, 0x00),
+	ILI9881C_COMMAND_INSTR(0x31, 0x00),
+	ILI9881C_COMMAND_INSTR(0x32, 0x00),
+	ILI9881C_COMMAND_INSTR(0x33, 0x00),
+	ILI9881C_COMMAND_INSTR(0x34, 0x03),
+	ILI9881C_COMMAND_INSTR(0x35, 0x00),
+	ILI9881C_COMMAND_INSTR(0x36, 0x03),
+	ILI9881C_COMMAND_INSTR(0x37, 0x00),
+	ILI9881C_COMMAND_INSTR(0x38, 0x00),
+	ILI9881C_COMMAND_INSTR(0x39, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3a, 0x40),
+	ILI9881C_COMMAND_INSTR(0x3b, 0x40),
+	ILI9881C_COMMAND_INSTR(0x3c, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3d, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3e, 0x00),
+	ILI9881C_COMMAND_INSTR(0x3f, 0x00),
+	ILI9881C_COMMAND_INSTR(0x40, 0x00),
+	ILI9881C_COMMAND_INSTR(0x41, 0x00),
+	ILI9881C_COMMAND_INSTR(0x42, 0x00),
+	ILI9881C_COMMAND_INSTR(0x43, 0x00),
+	ILI9881C_COMMAND_INSTR(0x44, 0x00),
+
+	ILI9881C_COMMAND_INSTR(0x50, 0x01),
+	ILI9881C_COMMAND_INSTR(0x51, 0x23),
+	ILI9881C_COMMAND_INSTR(0x52, 0x45),
+	ILI9881C_COMMAND_INSTR(0x53, 0x67),
+	ILI9881C_COMMAND_INSTR(0x54, 0x89),
+	ILI9881C_COMMAND_INSTR(0x55, 0xab),
+	ILI9881C_COMMAND_INSTR(0x56, 0x01),
+	ILI9881C_COMMAND_INSTR(0x57, 0x23),
+	ILI9881C_COMMAND_INSTR(0x58, 0x45),
+	ILI9881C_COMMAND_INSTR(0x59, 0x67),
+	ILI9881C_COMMAND_INSTR(0x5a, 0x89),
+	ILI9881C_COMMAND_INSTR(0x5b, 0xab),
+	ILI9881C_COMMAND_INSTR(0x5c, 0xcd),
+	ILI9881C_COMMAND_INSTR(0x5d, 0xef),
+
+	ILI9881C_COMMAND_INSTR(0x5e, 0x11),
+	ILI9881C_COMMAND_INSTR(0x5f, 0x02),
+	ILI9881C_COMMAND_INSTR(0x60, 0x00),
+	ILI9881C_COMMAND_INSTR(0x61, 0x01),
+	ILI9881C_COMMAND_INSTR(0x62, 0x0D),
+	ILI9881C_COMMAND_INSTR(0x63, 0x0C),
+	ILI9881C_COMMAND_INSTR(0x64, 0x0F),
+	ILI9881C_COMMAND_INSTR(0x65, 0x0E),
+	ILI9881C_COMMAND_INSTR(0x66, 0x06),
+	ILI9881C_COMMAND_INSTR(0x67, 0x07),
+	ILI9881C_COMMAND_INSTR(0x68, 0x02),
+	ILI9881C_COMMAND_INSTR(0x69, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6a, 0x08),
+	ILI9881C_COMMAND_INSTR(0x6b, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6c, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6d, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6e, 0x02),
+	ILI9881C_COMMAND_INSTR(0x6f, 0x02),
+	ILI9881C_COMMAND_INSTR(0x70, 0x02),
+	ILI9881C_COMMAND_INSTR(0x71, 0x02),
+	ILI9881C_COMMAND_INSTR(0x72, 0x02),
+	ILI9881C_COMMAND_INSTR(0x73, 0x02),
+	ILI9881C_COMMAND_INSTR(0x74, 0x02),
+	ILI9881C_COMMAND_INSTR(0x75, 0x02),
+	ILI9881C_COMMAND_INSTR(0x76, 0x00),
+	ILI9881C_COMMAND_INSTR(0x77, 0x01),
+	ILI9881C_COMMAND_INSTR(0x78, 0x0D),
+	ILI9881C_COMMAND_INSTR(0x79, 0x0C),
+	ILI9881C_COMMAND_INSTR(0x7a, 0x0F),
+	ILI9881C_COMMAND_INSTR(0x7b, 0x0E),
+	ILI9881C_COMMAND_INSTR(0x7c, 0x06),
+	ILI9881C_COMMAND_INSTR(0x7d, 0x07),
+	ILI9881C_COMMAND_INSTR(0x7e, 0x02),
+	ILI9881C_COMMAND_INSTR(0x7f, 0x02),
+	ILI9881C_COMMAND_INSTR(0x80, 0x08),
+	ILI9881C_COMMAND_INSTR(0x81, 0x02),
+	ILI9881C_COMMAND_INSTR(0x82, 0x02),
+	ILI9881C_COMMAND_INSTR(0x83, 0x02),
+	ILI9881C_COMMAND_INSTR(0x84, 0x02),
+	ILI9881C_COMMAND_INSTR(0x85, 0x02),
+	ILI9881C_COMMAND_INSTR(0x86, 0x02),
+	ILI9881C_COMMAND_INSTR(0x87, 0x02),
+	ILI9881C_COMMAND_INSTR(0x88, 0x02),
+	ILI9881C_COMMAND_INSTR(0x89, 0x02),
+	ILI9881C_COMMAND_INSTR(0x8A, 0x02),
+
+	ILI9881C_SWITCH_PAGE_INSTR(4),
+	ILI9881C_COMMAND_INSTR(0x6c, 0x15),
+	ILI9881C_COMMAND_INSTR(0x6e, 0x30),
+	ILI9881C_COMMAND_INSTR(0x6f, 0x33),
+	ILI9881C_COMMAND_INSTR(0x8d, 0x15),
+	ILI9881C_COMMAND_INSTR(0x3a, 0xa4),
+	ILI9881C_COMMAND_INSTR(0x87, 0xba),
+	ILI9881C_COMMAND_INSTR(0x26, 0x76),
+	ILI9881C_COMMAND_INSTR(0xb2, 0xd1),
+
+	ILI9881C_SWITCH_PAGE_INSTR(1),
+	ILI9881C_COMMAND_INSTR(0x22, 0x0A),
+	ILI9881C_COMMAND_INSTR(0x31, 0x0B),
+	ILI9881C_COMMAND_INSTR(0x50, 0xa5),
+	ILI9881C_COMMAND_INSTR(0x51, 0xa0),
+	ILI9881C_COMMAND_INSTR(0x53, 0x70),
+	ILI9881C_COMMAND_INSTR(0x55, 0x7A),
+	ILI9881C_COMMAND_INSTR(0x60, 0x14),
+
+	ILI9881C_COMMAND_INSTR(0xA0, 0x00),
+	ILI9881C_COMMAND_INSTR(0xA1, 0x53),
+	ILI9881C_COMMAND_INSTR(0xA2, 0x50),
+	ILI9881C_COMMAND_INSTR(0xA3, 0x20),
+	ILI9881C_COMMAND_INSTR(0xA4, 0x27),
+	ILI9881C_COMMAND_INSTR(0xA5, 0x33),
+	ILI9881C_COMMAND_INSTR(0xA6, 0x25),
+	ILI9881C_COMMAND_INSTR(0xA7, 0x25),
+	ILI9881C_COMMAND_INSTR(0xA8, 0xD4),
+	ILI9881C_COMMAND_INSTR(0xA9, 0x1A),
+	ILI9881C_COMMAND_INSTR(0xAA, 0x2B),
+	ILI9881C_COMMAND_INSTR(0xAB, 0xB5),
+	ILI9881C_COMMAND_INSTR(0xAC, 0x19),
+	ILI9881C_COMMAND_INSTR(0xAD, 0x18),
+	ILI9881C_COMMAND_INSTR(0xAE, 0x53),
+	ILI9881C_COMMAND_INSTR(0xAF, 0x1A),
+	ILI9881C_COMMAND_INSTR(0xB0, 0x25),
+	ILI9881C_COMMAND_INSTR(0xB1, 0x62),
+	ILI9881C_COMMAND_INSTR(0xB2, 0x6A),
+	ILI9881C_COMMAND_INSTR(0xB3, 0x31),
+
+	ILI9881C_COMMAND_INSTR(0xC0, 0x00),
+	ILI9881C_COMMAND_INSTR(0xC1, 0x53),
+	ILI9881C_COMMAND_INSTR(0xC2, 0x50),
+	ILI9881C_COMMAND_INSTR(0xC3, 0x20),
+	ILI9881C_COMMAND_INSTR(0xC4, 0x27),
+	ILI9881C_COMMAND_INSTR(0xC5, 0x33),
+	ILI9881C_COMMAND_INSTR(0xC6, 0x25),
+	ILI9881C_COMMAND_INSTR(0xC7, 0x25),
+	ILI9881C_COMMAND_INSTR(0xC8, 0xD4),
+	ILI9881C_COMMAND_INSTR(0xC9, 0x1A),
+	ILI9881C_COMMAND_INSTR(0xCA, 0x2B),
+	ILI9881C_COMMAND_INSTR(0xCB, 0xB5),
+	ILI9881C_COMMAND_INSTR(0xCC, 0x19),
+	ILI9881C_COMMAND_INSTR(0xCD, 0x18),
+	ILI9881C_COMMAND_INSTR(0xCE, 0x53),
+	ILI9881C_COMMAND_INSTR(0xCF, 0x1A),
+	ILI9881C_COMMAND_INSTR(0xD0, 0x25),
+	ILI9881C_COMMAND_INSTR(0xD1, 0x62),
+	ILI9881C_COMMAND_INSTR(0xD2, 0x6A),
+	ILI9881C_COMMAND_INSTR(0xD3, 0x31),
+	ILI9881C_SWITCH_PAGE_INSTR(0),
+	ILI9881C_COMMAND_INSTR(MIPI_DCS_WRITE_CONTROL_DISPLAY, 0x2c),
+	ILI9881C_COMMAND_INSTR(MIPI_DCS_WRITE_POWER_SAVE, 0x00),
+};
+
 static inline struct ili9881c *panel_to_ili9881c(struct drm_panel *panel)
 {
 	return container_of(panel, struct ili9881c, panel);
@@ -1014,6 +1211,23 @@ static const struct drm_display_mode w552946aba_default_mode = {
 	.height_mm	= 121,
 };
 
+static const struct drm_display_mode am8001280g_default_mode = {
+	.clock		= 67911,
+
+	.hdisplay	= 800,
+	.hsync_start	= 800 + 20,
+	.hsync_end	= 800 + 20 + 32,
+	.htotal		= 800 + 20 + 32 + 20,
+
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 6,
+	.vsync_end	= 1280 + 6 + 8,
+	.vtotal		= 1280 + 6 + 8 + 4,
+
+	.width_mm	= 94,
+	.height_mm	= 151,
+};
+
 static int ili9881c_get_modes(struct drm_panel *panel,
 			      struct drm_connector *connector)
 {
@@ -1147,11 +1361,20 @@ static const struct ili9881c_desc w552946aba_desc = {
 		      MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET,
 };
 
+static const struct ili9881c_desc am8001280g_desc = {
+	.init = am8001280g_init,
+	.init_length = ARRAY_SIZE(am8001280g_init),
+	.mode = &am8001280g_default_mode,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+		      MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM,
+};
+
 static const struct of_device_id ili9881c_of_match[] = {
 	{ .compatible = "bananapi,lhr050h41", .data = &lhr050h41_desc },
 	{ .compatible = "feixin,k101-im2byl02", .data = &k101_im2byl02_desc },
 	{ .compatible = "tdo,tl050hdv35", .data = &tl050hdv35_desc },
 	{ .compatible = "wanchanglong,w552946aba", .data = &w552946aba_desc },
+	{ .compatible = "ampire,am8001280g", .data = &am8001280g_desc },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, ili9881c_of_match);
-- 
GitLab


From 2a5244a04e751c8617d5e7707550d97a83b1102d Mon Sep 17 00:00:00 2001
From: Michael Walle <mwalle@kernel.org>
Date: Thu, 23 Nov 2023 11:24:03 +0100
Subject: [PATCH 0792/2340] dt-bindings: display: simple: add Evervision
 VGG644804 panel

Add Evervision VGG644804 5.7" 640x480 LVDS panel compatible string.

Signed-off-by: Michael Walle <mwalle@kernel.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20231123102404.2022201-1-mwalle@kernel.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123102404.2022201-1-mwalle@kernel.org
---
 .../devicetree/bindings/display/panel/panel-simple.yaml         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
index f00275e505316..2021aa82871af 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -146,6 +146,8 @@ properties:
       - edt,etmv570g2dhu
         # E Ink VB3300-KCA
       - eink,vb3300-kca
+        # Evervision Electronics Co. Ltd. VGG644804 5.7" VGA TFT LCD Panel
+      - evervision,vgg644804
         # Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel
       - evervision,vgg804821
         # Foxlink Group 5" WVGA TFT LCD panel
-- 
GitLab


From 1319f2178bdf1898a76ea8c4f00d57b240bbc5fd Mon Sep 17 00:00:00 2001
From: Michael Walle <mwalle@kernel.org>
Date: Thu, 23 Nov 2023 11:24:04 +0100
Subject: [PATCH 0793/2340] drm/panel-simple: add Evervision VGG644804 panel
 entry

Timings taken from the datasheet, although sometimes there are just
typical values and it's not clear if they are no min and max values or
if you must use the typical value exactly. To make things worse, there
is no back porch but only a combined sync and back porch length.

Unfortunately, there is not public datasheet. Therefore, here are the
relevant timings:
                 | min |  typ   | max |
-----------------+-----+--------+-----+
CLK frequency    |  -  | 25.175 |  -  |
HS period        |  -  |   800  |  -  |
HS pulse width   |  5  |    30  |  -  |
HS-DEN time      | 112 |   144  | 175 |
DEN pulse width  |  -  |   640  |  -  |
VS pulse width   |  1  |     3  |  5  |
VS-DEN time      |  -  |    35  |  -  |
VS period        |  -  |   525  |  -  |

Signed-off-by: Michael Walle <mwalle@kernel.org>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231123102404.2022201-2-mwalle@kernel.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123102404.2022201-2-mwalle@kernel.org
---
 drivers/gpu/drm/panel/panel-simple.c | 30 ++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 876e1b40cfb3b..8017ad33cf18d 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2002,6 +2002,33 @@ static const struct panel_desc eink_vb3300_kca = {
 	.connector_type = DRM_MODE_CONNECTOR_DPI,
 };
 
+static const struct display_timing evervision_vgg644804_timing = {
+	.pixelclock = { 25175000, 25175000, 25175000 },
+	.hactive = { 640, 640, 640 },
+	.hfront_porch = { 16, 16, 16 },
+	.hback_porch = { 82, 114, 170 },
+	.hsync_len = { 5, 30, 30 },
+	.vactive = { 480, 480, 480 },
+	.vfront_porch = { 10, 10, 10 },
+	.vback_porch = { 30, 32, 34 },
+	.vsync_len = { 1, 3, 5 },
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		 DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE |
+		 DISPLAY_FLAGS_SYNC_POSEDGE,
+};
+
+static const struct panel_desc evervision_vgg644804 = {
+	.timings = &evervision_vgg644804_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 115,
+		.height = 86,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE,
+};
+
 static const struct display_timing evervision_vgg804821_timing = {
 	.pixelclock = { 27600000, 33300000, 50000000 },
 	.hactive = { 800, 800, 800 },
@@ -4366,6 +4393,9 @@ static const struct of_device_id platform_of_match[] = {
 	}, {
 		.compatible = "eink,vb3300-kca",
 		.data = &eink_vb3300_kca,
+	}, {
+		.compatible = "evervision,vgg644804",
+		.data = &evervision_vgg644804,
 	}, {
 		.compatible = "evervision,vgg804821",
 		.data = &evervision_vgg804821,
-- 
GitLab


From a4f5892914ca7709ea6d191f0edace93a5935966 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Mon, 4 Dec 2023 12:42:13 +0100
Subject: [PATCH 0794/2340] drm/panfrost: Ignore core_mask for poweroff and
 disable PWRTRANS irq

Some SoCs may be equipped with a GPU containing two core groups
and this is exactly the case of Samsung's Exynos 5422 featuring
an ARM Mali-T628 MP6 GPU: the support for this GPU in Panfrost
is partial, as this driver currently supports using only one
core group and that's reflected on all parts of it, including
the power on (and power off, previously to this patch) function.

The issue with this is that even though executing the soft reset
operation should power off all cores unconditionally, on at least
one platform we're seeing a crash that seems to be happening due
to an interrupt firing which may be because we are calling power
transition only on the first core group, leaving the second one
unchanged, or because ISR execution was pending before entering
the panfrost_gpu_power_off() function and executed after powering
off the GPU cores, or all of the above.

Finally, solve this by:
 - Avoid to enable the power transition interrupt on reset; and
 - Ignoring the core_mask and ask the GPU to poweroff both core groups

Fixes: 22aa1a209018 ("drm/panfrost: Really power off GPU cores in panfrost_gpu_power_off()")
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204114215.54575-2-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_gpu.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 09f5e1563ebd4..bd41617c5e4bd 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -78,7 +78,12 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
 	}
 
 	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
-	gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL);
+
+	/* Only enable the interrupts we care about */
+	gpu_write(pfdev, GPU_INT_MASK,
+		  GPU_IRQ_MASK_ERROR |
+		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED |
+		  GPU_IRQ_CLEAN_CACHES_COMPLETED);
 
 	/*
 	 * All in-flight jobs should have released their cycle
@@ -425,11 +430,10 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 
 void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 {
-	u64 core_mask = panfrost_get_core_mask(pfdev);
 	int ret;
 	u32 val;
 
-	gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present & core_mask);
+	gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
 					 val, !val, 1, 1000);
 	if (ret)
@@ -441,7 +445,7 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 	if (ret)
 		dev_err(pfdev->dev, "tiler power transition timeout");
 
-	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present & core_mask);
+	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
 	ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
 				 val, !val, 0, 1000);
 	if (ret)
-- 
GitLab


From b98e9a84d38ac88f9fd2accbcd45b656eeea7a04 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Mon, 4 Dec 2023 12:42:14 +0100
Subject: [PATCH 0795/2340] drm/panfrost: Add gpu_irq, mmu_irq to struct
 panfrost_device

In preparation for adding a IRQ synchronization mechanism for PM suspend,
add gpu_irq and mmu_irq variables to struct panfrost_device and change
functions panfrost_gpu_init() and panfrost_mmu_init() to use those.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204114215.54575-3-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_device.h |  2 ++
 drivers/gpu/drm/panfrost/panfrost_gpu.c    | 10 +++++-----
 drivers/gpu/drm/panfrost/panfrost_mmu.c    | 10 +++++-----
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 0fc558db6bfd5..54a8aad54259e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -94,6 +94,8 @@ struct panfrost_device {
 	struct device *dev;
 	struct drm_device *ddev;
 	struct platform_device *pdev;
+	int gpu_irq;
+	int mmu_irq;
 
 	void __iomem *iomem;
 	struct clk *clock;
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index bd41617c5e4bd..7adc4441fa147 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -454,7 +454,7 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 
 int panfrost_gpu_init(struct panfrost_device *pfdev)
 {
-	int err, irq;
+	int err;
 
 	err = panfrost_gpu_soft_reset(pfdev);
 	if (err)
@@ -469,11 +469,11 @@ int panfrost_gpu_init(struct panfrost_device *pfdev)
 
 	dma_set_max_seg_size(pfdev->dev, UINT_MAX);
 
-	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
-	if (irq < 0)
-		return irq;
+	pfdev->gpu_irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
+	if (pfdev->gpu_irq < 0)
+		return pfdev->gpu_irq;
 
-	err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler,
+	err = devm_request_irq(pfdev->dev, pfdev->gpu_irq, panfrost_gpu_irq_handler,
 			       IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev);
 	if (err) {
 		dev_err(pfdev->dev, "failed to request gpu irq");
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 846dd697c4106..ac4296c1e54b9 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -753,13 +753,13 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
 
 int panfrost_mmu_init(struct panfrost_device *pfdev)
 {
-	int err, irq;
+	int err;
 
-	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu");
-	if (irq < 0)
-		return irq;
+	pfdev->mmu_irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu");
+	if (pfdev->mmu_irq < 0)
+		return pfdev->mmu_irq;
 
-	err = devm_request_threaded_irq(pfdev->dev, irq,
+	err = devm_request_threaded_irq(pfdev->dev, pfdev->mmu_irq,
 					panfrost_mmu_irq_handler,
 					panfrost_mmu_irq_handler_thread,
 					IRQF_SHARED, KBUILD_MODNAME "-mmu",
-- 
GitLab


From 157ad4ccff0754d9eb57d3a4fa31264ee6e9716b Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Mon, 4 Dec 2023 12:42:15 +0100
Subject: [PATCH 0796/2340] drm/panfrost: Synchronize and disable interrupts
 before powering off

To make sure that we don't unintentionally perform any unclocked and/or
unpowered R/W operation on GPU registers, before turning off clocks and
regulators we must make sure that no GPU, JOB or MMU ISR execution is
pending: doing that requires to add a mechanism to synchronize the
interrupts on suspend.

Add functions panfrost_{gpu,job,mmu}_suspend_irq() which will perform
interrupts masking and ISR execution synchronization, and then call
those in the panfrost_device_runtime_suspend() handler in the exact
sequence of job (may require mmu!) -> mmu -> gpu.

As a side note, JOB and MMU suspend_irq functions needed some special
treatment: as their interrupt handlers will unmask interrupts, it was
necessary to add an `is_suspended` bitmap which is used to address the
possible corner case of unintentional IRQ unmasking because of ISR
execution after a call to synchronize_irq().

At resume, clear each is_suspended bit in the reset path of JOB/MMU
to allow unmasking the interrupts.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204114215.54575-4-angelogioacchino.delregno@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_device.c |  3 +++
 drivers/gpu/drm/panfrost/panfrost_device.h |  8 +++++++
 drivers/gpu/drm/panfrost/panfrost_gpu.c    | 18 +++++++++++++--
 drivers/gpu/drm/panfrost/panfrost_gpu.h    |  1 +
 drivers/gpu/drm/panfrost/panfrost_job.c    | 26 ++++++++++++++++++----
 drivers/gpu/drm/panfrost/panfrost_job.h    |  1 +
 drivers/gpu/drm/panfrost/panfrost_mmu.c    | 22 +++++++++++++++---
 drivers/gpu/drm/panfrost/panfrost_mmu.h    |  1 +
 8 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index c90ad5ee34e7a..a45e4addcc19a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -421,6 +421,9 @@ static int panfrost_device_runtime_suspend(struct device *dev)
 		return -EBUSY;
 
 	panfrost_devfreq_suspend(pfdev);
+	panfrost_job_suspend_irq(pfdev);
+	panfrost_mmu_suspend_irq(pfdev);
+	panfrost_gpu_suspend_irq(pfdev);
 	panfrost_gpu_power_off(pfdev);
 
 	return 0;
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 54a8aad54259e..62f7e35273857 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -25,6 +25,13 @@ struct panfrost_perfcnt;
 #define NUM_JOB_SLOTS 3
 #define MAX_PM_DOMAINS 5
 
+enum panfrost_drv_comp_bits {
+	PANFROST_COMP_BIT_GPU,
+	PANFROST_COMP_BIT_JOB,
+	PANFROST_COMP_BIT_MMU,
+	PANFROST_COMP_BIT_MAX
+};
+
 /**
  * enum panfrost_gpu_pm - Supported kernel power management features
  * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
@@ -109,6 +116,7 @@ struct panfrost_device {
 
 	struct panfrost_features features;
 	const struct panfrost_compatible *comp;
+	DECLARE_BITMAP(is_suspended, PANFROST_COMP_BIT_MAX);
 
 	spinlock_t as_lock;
 	unsigned long as_in_use_mask;
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 7adc4441fa147..9063ce2546422 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -22,9 +22,13 @@
 static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data)
 {
 	struct panfrost_device *pfdev = data;
-	u32 state = gpu_read(pfdev, GPU_INT_STAT);
-	u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS);
+	u32 fault_status, state;
 
+	if (test_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended))
+		return IRQ_NONE;
+
+	fault_status = gpu_read(pfdev, GPU_FAULT_STATUS);
+	state = gpu_read(pfdev, GPU_INT_STAT);
 	if (!state)
 		return IRQ_NONE;
 
@@ -61,6 +65,8 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
 	gpu_write(pfdev, GPU_INT_MASK, 0);
 	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
 
+	clear_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended);
+
 	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT,
 		val, val & GPU_IRQ_RESET_COMPLETED, 10, 10000);
@@ -452,6 +458,14 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 		dev_err(pfdev->dev, "l2 power transition timeout");
 }
 
+void panfrost_gpu_suspend_irq(struct panfrost_device *pfdev)
+{
+	set_bit(PANFROST_COMP_BIT_GPU, pfdev->is_suspended);
+
+	gpu_write(pfdev, GPU_INT_MASK, 0);
+	synchronize_irq(pfdev->gpu_irq);
+}
+
 int panfrost_gpu_init(struct panfrost_device *pfdev)
 {
 	int err;
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
index 876fdad9f7214..d841b86504ea8 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
@@ -15,6 +15,7 @@ u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev);
 int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
 void panfrost_gpu_power_on(struct panfrost_device *pfdev);
 void panfrost_gpu_power_off(struct panfrost_device *pfdev);
+void panfrost_gpu_suspend_irq(struct panfrost_device *pfdev);
 
 void panfrost_cycle_counter_get(struct panfrost_device *pfdev);
 void panfrost_cycle_counter_put(struct panfrost_device *pfdev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index f9446e197428d..0c2dbf6ef2a55 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -405,6 +405,8 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
 	int j;
 	u32 irq_mask = 0;
 
+	clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
+
 	for (j = 0; j < NUM_JOB_SLOTS; j++) {
 		irq_mask |= MK_JS_MASK(j);
 	}
@@ -413,6 +415,14 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
 	job_write(pfdev, JOB_INT_MASK, irq_mask);
 }
 
+void panfrost_job_suspend_irq(struct panfrost_device *pfdev)
+{
+	set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
+
+	job_write(pfdev, JOB_INT_MASK, 0);
+	synchronize_irq(pfdev->js->irq);
+}
+
 static void panfrost_job_handle_err(struct panfrost_device *pfdev,
 				    struct panfrost_job *job,
 				    unsigned int js)
@@ -792,17 +802,25 @@ static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
 	struct panfrost_device *pfdev = data;
 
 	panfrost_job_handle_irqs(pfdev);
-	job_write(pfdev, JOB_INT_MASK,
-		  GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
-		  GENMASK(NUM_JOB_SLOTS - 1, 0));
+
+	/* Enable interrupts only if we're not about to get suspended */
+	if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
+		job_write(pfdev, JOB_INT_MASK,
+			  GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
+			  GENMASK(NUM_JOB_SLOTS - 1, 0));
+
 	return IRQ_HANDLED;
 }
 
 static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
 {
 	struct panfrost_device *pfdev = data;
-	u32 status = job_read(pfdev, JOB_INT_STAT);
+	u32 status;
+
+	if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
+		return IRQ_NONE;
 
+	status = job_read(pfdev, JOB_INT_STAT);
 	if (!status)
 		return IRQ_NONE;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index 17ff808dba078..ec581b97852b6 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -47,6 +47,7 @@ int panfrost_job_get_slot(struct panfrost_job *job);
 int panfrost_job_push(struct panfrost_job *job);
 void panfrost_job_put(struct panfrost_job *job);
 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
+void panfrost_job_suspend_irq(struct panfrost_device *pfdev);
 int panfrost_job_is_idle(struct panfrost_device *pfdev);
 
 #endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index ac4296c1e54b9..f38385fe76bbb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -231,6 +231,8 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev)
 {
 	struct panfrost_mmu *mmu, *mmu_tmp;
 
+	clear_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended);
+
 	spin_lock(&pfdev->as_lock);
 
 	pfdev->as_alloc_mask = 0;
@@ -670,6 +672,9 @@ static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data)
 {
 	struct panfrost_device *pfdev = data;
 
+	if (test_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended))
+		return IRQ_NONE;
+
 	if (!mmu_read(pfdev, MMU_INT_STAT))
 		return IRQ_NONE;
 
@@ -744,9 +749,12 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
 			status = mmu_read(pfdev, MMU_INT_RAWSTAT) & ~pfdev->as_faulty_mask;
 	}
 
-	spin_lock(&pfdev->as_lock);
-	mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask);
-	spin_unlock(&pfdev->as_lock);
+	/* Enable interrupts only if we're not about to get suspended */
+	if (!test_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended)) {
+		spin_lock(&pfdev->as_lock);
+		mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask);
+		spin_unlock(&pfdev->as_lock);
+	}
 
 	return IRQ_HANDLED;
 };
@@ -777,3 +785,11 @@ void panfrost_mmu_fini(struct panfrost_device *pfdev)
 {
 	mmu_write(pfdev, MMU_INT_MASK, 0);
 }
+
+void panfrost_mmu_suspend_irq(struct panfrost_device *pfdev)
+{
+	set_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended);
+
+	mmu_write(pfdev, MMU_INT_MASK, 0);
+	synchronize_irq(pfdev->mmu_irq);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h
index cc2a0d307febc..022a9a74a1141 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.h
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h
@@ -14,6 +14,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping);
 int panfrost_mmu_init(struct panfrost_device *pfdev);
 void panfrost_mmu_fini(struct panfrost_device *pfdev);
 void panfrost_mmu_reset(struct panfrost_device *pfdev);
+void panfrost_mmu_suspend_irq(struct panfrost_device *pfdev);
 
 u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu);
 void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu);
-- 
GitLab


From c50a291d621aa7abaa27b05f56d450a388b64948 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 4 Dec 2023 16:14:06 +0100
Subject: [PATCH 0797/2340] drm/gpuvm: Let drm_gpuvm_bo_put() report when the
 vm_bo object is destroyed

Some users need to release resources attached to the vm_bo object when
it's destroyed. In Panthor's case, we need to release the pin ref so
BO pages can be returned to the system when all GPU mappings are gone.

This could be done through a custom drm_gpuvm::vm_bo_free() hook, but
this has all sort of locking implications that would force us to expose
a drm_gem_shmem_unpin_locked() helper, not to mention the fact that
having a ::vm_bo_free() implementation without a ::vm_bo_alloc() one
seems odd. So let's keep things simple, and extend drm_gpuvm_bo_put()
to report when the object is destroyed.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204151406.1977285-1-boris.brezillon@collabora.com
---
 drivers/gpu/drm/drm_gpuvm.c | 8 ++++++--
 include/drm/drm_gpuvm.h     | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 07a6676bc4f9b..9c0922c1a5a27 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1535,14 +1535,18 @@ drm_gpuvm_bo_destroy(struct kref *kref)
  * hold the dma-resv or driver specific GEM gpuva lock.
  *
  * This function may only be called from non-atomic context.
+ *
+ * Returns: true if vm_bo was destroyed, false otherwise.
  */
-void
+bool
 drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo)
 {
 	might_sleep();
 
 	if (vm_bo)
-		kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
+		return !!kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
+
+	return false;
 }
 EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put);
 
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index b3f82ec7fb17d..6258849382e16 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -721,7 +721,7 @@ drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo)
 	return vm_bo;
 }
 
-void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
+bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
 
 struct drm_gpuvm_bo *
 drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
-- 
GitLab


From c8fa1cc07759dde17c97796f41696a0da35c6ea7 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 03:05:28 +0300
Subject: [PATCH 0798/2340] drm/atomic: add private obj state to state dump

The drm_atomic_print_new_state() already prints private object state via
drm_atomic_private_obj_print_state(). Add private object state dumping
to __drm_state_dump(), so that it is also included into drm_state_dump()
output and into debugfs/dri/N/state file.

Reviewed-by: Rob Clark <robdclark@gmail.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203000532.1290480-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index f1a503aafe5aa..c31fc0b48c316 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1773,6 +1773,7 @@ static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p,
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
+	struct drm_private_obj *obj;
 
 	if (!drm_drv_uses_atomic_modeset(dev))
 		return;
@@ -1801,6 +1802,14 @@ static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p,
 	if (take_locks)
 		drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	drm_connector_list_iter_end(&conn_iter);
+
+	list_for_each_entry(obj, &config->privobj_list, head) {
+		if (take_locks)
+			drm_modeset_lock(&obj->lock, NULL);
+		drm_atomic_private_obj_print_state(p, obj->state);
+		if (take_locks)
+			drm_modeset_unlock(&obj->lock);
+	}
 }
 
 /**
-- 
GitLab


From 01a39f1c4f1220a4e6a25729fae87ff5794cbc52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 4 Dec 2023 22:24:43 +0200
Subject: [PATCH 0799/2340] drm/i915: Fix ADL+ tiled plane stride when the POT
 stride is smaller than the original
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

plane_view_scanout_stride() currently assumes that we had to pad the
mapping stride with dummy pages in order to align it. But that is not
the case if the original fb stride exceeds the aligned stride used
to populate the remapped view, which is calculated from the user
specified framebuffer width rather than the user specified framebuffer
stride.

Ignore the original fb stride in this case and just stick to the POT
aligned stride. Getting this wrong will cause the plane to fetch the
wrong data, and can lead to fault errors if the page tables at the
bogus location aren't even populated.

TODO: figure out if this is OK for CCS, or if we should instead increase
the width of the view to cover the entire user specified fb stride
instead...

Cc: Imre Deak <imre.deak@intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204202443.31247-1-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 drivers/gpu/drm/i915/display/intel_fb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 96663f9ac431d..32a7ee1c5b343 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -1381,7 +1381,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane,
 	struct drm_i915_private *i915 = to_i915(fb->base.dev);
 	unsigned int stride_tiles;
 
-	if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14)
+	if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) &&
+	    src_stride_tiles < dst_stride_tiles)
 		stride_tiles = src_stride_tiles;
 	else
 		stride_tiles = dst_stride_tiles;
-- 
GitLab


From 8ebb1fc2e69ab8b89a425e402c7bd85e053b7b01 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Mon, 4 Dec 2023 10:54:25 +0200
Subject: [PATCH 0800/2340] drm/panel-edp: Add SDC ATNA45AF01

Add support for the SDC ATNA45AF01 panel.

Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201-x1e80100-drm-panel-edp-v2-1-b0173484631a@linaro.org
---
 drivers/gpu/drm/panel/panel-edp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 825fa2a0d8a53..78565c99b54de 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1983,6 +1983,8 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('K', 'D', 'C', 0x0809, &delay_200_500_e50, "KD116N2930A15"),
 	EDP_PANEL_ENTRY('K', 'D', 'B', 0x1120, &delay_200_500_e80_d50, "116N29-30NK-C007"),
 
+	EDP_PANEL_ENTRY('S', 'D', 'C', 0x416d, &delay_100_500_e200, "ATNA45AF01"),
+
 	EDP_PANEL_ENTRY('S', 'H', 'P', 0x1511, &delay_200_500_e50, "LQ140M1JW48"),
 	EDP_PANEL_ENTRY('S', 'H', 'P', 0x1523, &sharp_lq140m1jw46.delay, "LQ140M1JW46"),
 	EDP_PANEL_ENTRY('S', 'H', 'P', 0x154c, &delay_200_500_p2e100, "LQ116M1JW10"),
-- 
GitLab


From 4900e0396e59be233cfa636369d4eec6b40dbeca Mon Sep 17 00:00:00 2001
From: Pin-yen Lin <treapking@chromium.org>
Date: Tue, 5 Dec 2023 20:35:35 +0800
Subject: [PATCH 0801/2340] drm/edp-panel: Sort the panel entries

Move the order of CMN 0x14e5 to make the list sorted.

Signed-off-by: Pin-yen Lin <treapking@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205123630.988663-3-treapking@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 78565c99b54de..c76f186c4baaa 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1967,9 +1967,9 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x142b, &delay_200_500_e80_d50, "N140HCA-EAC"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x144f, &delay_200_500_e80_d50, "N140HGA-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x1468, &delay_200_500_e80, "N140HGA-EA1"),
-	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14e5, &delay_200_500_e80_d50, "N140HGA-EA1"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d4, &delay_200_500_e80_d50, "N140HCA-EAC"),
 	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d6, &delay_200_500_e80_d50, "N140BGA-EA4"),
+	EDP_PANEL_ENTRY('C', 'M', 'N', 0x14e5, &delay_200_500_e80_d50, "N140HGA-EA1"),
 
 	EDP_PANEL_ENTRY('H', 'K', 'C', 0x2d5c, &delay_200_500_e200, "MB116AN01-2"),
 
-- 
GitLab


From 134c78c962279e423f5d699e3d3b379cbf4885c8 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:33 +0300
Subject: [PATCH 0802/2340] drm/msm/dpu: cleanup dpu_kms_hw_init error path

It was noticed that dpu_kms_hw_init()'s error path contains several
labels which point to the same code path. Replace all of them with a
single label.

Suggested-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570035/
Link: https://lore.kernel.org/r/20231201211845.1026967-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index c1cb500adc076..cc6e2c05e7940 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -1063,7 +1063,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	if (!dpu_kms->catalog) {
 		DPU_ERROR("device config not known!\n");
 		rc = -EINVAL;
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	/*
@@ -1073,26 +1073,26 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	rc = _dpu_kms_mmu_init(dpu_kms);
 	if (rc) {
 		DPU_ERROR("dpu_kms_mmu_init failed: %d\n", rc);
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	dpu_kms->mdss = msm_mdss_get_mdss_data(dpu_kms->pdev->dev.parent);
 	if (IS_ERR(dpu_kms->mdss)) {
 		rc = PTR_ERR(dpu_kms->mdss);
 		DPU_ERROR("failed to get MDSS data: %d\n", rc);
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	if (!dpu_kms->mdss) {
 		rc = -EINVAL;
 		DPU_ERROR("NULL MDSS data\n");
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio);
 	if (rc) {
 		DPU_ERROR("rm init failed: %d\n", rc);
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	dpu_kms->rm_init = true;
@@ -1104,7 +1104,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		rc = PTR_ERR(dpu_kms->hw_mdp);
 		DPU_ERROR("failed to get hw_mdp: %d\n", rc);
 		dpu_kms->hw_mdp = NULL;
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	for (i = 0; i < dpu_kms->catalog->vbif_count; i++) {
@@ -1115,7 +1115,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed to init vbif %d: %d\n", vbif->id, rc);
-			goto power_error;
+			goto err_pm_put;
 		}
 
 		dpu_kms->hw_vbif[vbif->id] = hw;
@@ -1131,7 +1131,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	rc = dpu_core_perf_init(&dpu_kms->perf, dpu_kms->catalog->perf, max_core_clk_rate);
 	if (rc) {
 		DPU_ERROR("failed to init perf %d\n", rc);
-		goto perf_err;
+		goto err_pm_put;
 	}
 
 	dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog);
@@ -1139,7 +1139,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		rc = PTR_ERR(dpu_kms->hw_intr);
 		DPU_ERROR("hw_intr init failed: %d\n", rc);
 		dpu_kms->hw_intr = NULL;
-		goto hw_intr_init_err;
+		goto err_pm_put;
 	}
 
 	dev->mode_config.min_width = 0;
@@ -1164,7 +1164,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	rc = _dpu_kms_drm_obj_init(dpu_kms);
 	if (rc) {
 		DPU_ERROR("modeset init failed: %d\n", rc);
-		goto drm_obj_init_err;
+		goto err_pm_put;
 	}
 
 	dpu_vbif_init_memtypes(dpu_kms);
@@ -1173,10 +1173,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 
 	return 0;
 
-drm_obj_init_err:
-hw_intr_init_err:
-perf_err:
-power_error:
+err_pm_put:
 	pm_runtime_put_sync(&dpu_kms->pdev->dev);
 error:
 	_dpu_kms_hw_destroy(dpu_kms);
-- 
GitLab


From b830b06f008755526646bb1d12500c8ccd8be335 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:34 +0300
Subject: [PATCH 0803/2340] drm/msm/dpu: remove IS_ERR_OR_NULL for
 dpu_hw_intr_init() error handling

Using IS_ERR_OR_NULL() together with PTR_ERR() is a typical mistake. If
the value is NULL, then the function will return 0 instead of a proper
return code. Replace IS_ERR_OR_NULL() with IS_ERR() in the
dpu_hw_intr_init() error check.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570036/
Link: https://lore.kernel.org/r/20231201211845.1026967-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index cc6e2c05e7940..75663f1626188 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -1135,7 +1135,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	}
 
 	dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog);
-	if (IS_ERR_OR_NULL(dpu_kms->hw_intr)) {
+	if (IS_ERR(dpu_kms->hw_intr)) {
 		rc = PTR_ERR(dpu_kms->hw_intr);
 		DPU_ERROR("hw_intr init failed: %d\n", rc);
 		dpu_kms->hw_intr = NULL;
-- 
GitLab


From b19e6f7dd2e7cc47bed565064fd7ff52f9424e52 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:35 +0300
Subject: [PATCH 0804/2340] drm/msm/dpu: use devres-managed allocation for
 interrupts data

Use devm_kzalloc to create interrupts data structure. This allows us to
remove corresponding kfree and drop dpu_hw_intr_destroy() function.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570038/
Link: https://lore.kernel.org/r/20231201211845.1026967-4-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c | 14 ++++++--------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h | 11 ++++-------
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c           |  4 +---
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 088807db2c83d..946dd0135dffc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -6,6 +6,8 @@
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 
+#include <drm/drm_managed.h>
+
 #include "dpu_core_irq.h"
 #include "dpu_kms.h"
 #include "dpu_hw_interrupts.h"
@@ -472,8 +474,9 @@ u32 dpu_core_irq_read(struct dpu_kms *dpu_kms,
 	return intr_status;
 }
 
-struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
-		const struct dpu_mdss_cfg *m)
+struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev,
+				     void __iomem *addr,
+				     const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_intr *intr;
 	unsigned int i;
@@ -481,7 +484,7 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
 	if (!addr || !m)
 		return ERR_PTR(-EINVAL);
 
-	intr = kzalloc(sizeof(*intr), GFP_KERNEL);
+	intr = drmm_kzalloc(dev, sizeof(*intr), GFP_KERNEL);
 	if (!intr)
 		return ERR_PTR(-ENOMEM);
 
@@ -512,11 +515,6 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
 	return intr;
 }
 
-void dpu_hw_intr_destroy(struct dpu_hw_intr *intr)
-{
-	kfree(intr);
-}
-
 int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
 				   unsigned int irq_idx,
 				   void (*irq_cb)(void *arg),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
index 53a21ebc57e8b..564b750a28fe2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
@@ -70,15 +70,12 @@ struct dpu_hw_intr {
 
 /**
  * dpu_hw_intr_init(): Initializes the interrupts hw object
+ * @dev:  Corresponding device for devres management
  * @addr: mapped register io address of MDP
  * @m:    pointer to MDSS catalog data
  */
-struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
-		const struct dpu_mdss_cfg *m);
+struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev,
+				     void __iomem *addr,
+				     const struct dpu_mdss_cfg *m);
 
-/**
- * dpu_hw_intr_destroy(): Cleanup interrutps hw object
- * @intr: pointer to interrupts hw object
- */
-void dpu_hw_intr_destroy(struct dpu_hw_intr *intr);
 #endif
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 75663f1626188..34d707db1d0cb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -795,8 +795,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
 {
 	int i;
 
-	if (dpu_kms->hw_intr)
-		dpu_hw_intr_destroy(dpu_kms->hw_intr);
 	dpu_kms->hw_intr = NULL;
 
 	/* safe to call these more than once during shutdown */
@@ -1134,7 +1132,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		goto err_pm_put;
 	}
 
-	dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog);
+	dpu_kms->hw_intr = dpu_hw_intr_init(dev, dpu_kms->mmio, dpu_kms->catalog);
 	if (IS_ERR(dpu_kms->hw_intr)) {
 		rc = PTR_ERR(dpu_kms->hw_intr);
 		DPU_ERROR("hw_intr init failed: %d\n", rc);
-- 
GitLab


From bdfa47d9b17a2335666a741a98857f9e5e482dc8 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:36 +0300
Subject: [PATCH 0805/2340] drm/msm/dpu: use devres-managed allocation for VBIF
 data

Use devm_kzalloc to create VBIF data structure. This allows us to
remove corresponding kfree and drop dpu_hw_vbif_destroy() function.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570040/
Link: https://lore.kernel.org/r/20231201211845.1026967-5-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c | 14 ++++++--------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h |  8 ++++----
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c     | 11 +++--------
 3 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
index a5121a50b2bbf..98e34afde2d25 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_vbif.h"
@@ -211,12 +213,13 @@ static void _setup_vbif_ops(struct dpu_hw_vbif_ops *ops,
 	ops->set_write_gather_en = dpu_hw_set_write_gather_en;
 }
 
-struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg,
-		void __iomem *addr)
+struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev,
+				     const struct dpu_vbif_cfg *cfg,
+				     void __iomem *addr)
 {
 	struct dpu_hw_vbif *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -234,8 +237,3 @@ struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif)
-{
-	kfree(vbif);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h
index 7e10d2a172b45..e2b4307500e42 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h
@@ -108,12 +108,12 @@ struct dpu_hw_vbif {
 /**
  * dpu_hw_vbif_init() - Initializes the VBIF driver for the passed
  * VBIF catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  VBIF catalog entry for which driver object is required
  * @addr: Mapped register io address of MDSS
  */
-struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg,
-		void __iomem *addr);
-
-void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif);
+struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev,
+				     const struct dpu_vbif_cfg *cfg,
+				     void __iomem *addr);
 
 #endif /*_DPU_HW_VBIF_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 34d707db1d0cb..a1753c54aaacb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -800,13 +800,8 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
 	/* safe to call these more than once during shutdown */
 	_dpu_kms_mmu_destroy(dpu_kms);
 
-	if (dpu_kms->catalog) {
-		for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
-			if (dpu_kms->hw_vbif[i]) {
-				dpu_hw_vbif_destroy(dpu_kms->hw_vbif[i]);
-				dpu_kms->hw_vbif[i] = NULL;
-			}
-		}
+	for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
+		dpu_kms->hw_vbif[i] = NULL;
 	}
 
 	if (dpu_kms->rm_init)
@@ -1109,7 +1104,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		struct dpu_hw_vbif *hw;
 		const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i];
 
-		hw = dpu_hw_vbif_init(vbif, dpu_kms->vbif[vbif->id]);
+		hw = dpu_hw_vbif_init(dev, vbif, dpu_kms->vbif[vbif->id]);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed to init vbif %d: %d\n", vbif->id, rc);
-- 
GitLab


From 1e897dcc4c673b9d585c09ddcdbe7fab0934e64f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:37 +0300
Subject: [PATCH 0806/2340] drm/msm/dpu: use devres-managed allocation for MDP
 TOP

Use devm_kzalloc to create MDP TOP structure. This allows us to remove
corresponding kfree and drop dpu_hw_mdp_destroy() function.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570047/
Link: https://lore.kernel.org/r/20231201211845.1026967-6-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c | 17 +++++++----------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h |  8 +++++---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c    |  5 ++---
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
index 24e734768a727..05e48cf4ec1d2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_top.h"
@@ -247,16 +249,17 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops,
 		ops->intf_audio_select = dpu_hw_intf_audio_select;
 }
 
-struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg,
-		void __iomem *addr,
-		const struct dpu_mdss_cfg *m)
+struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev,
+				      const struct dpu_mdp_cfg *cfg,
+				      void __iomem *addr,
+				      const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_mdp *mdp;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	mdp = kzalloc(sizeof(*mdp), GFP_KERNEL);
+	mdp = drmm_kzalloc(dev, sizeof(*mdp), GFP_KERNEL);
 	if (!mdp)
 		return ERR_PTR(-ENOMEM);
 
@@ -271,9 +274,3 @@ struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg,
 
 	return mdp;
 }
-
-void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp)
-{
-	kfree(mdp);
-}
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h
index 8b1463d2b2f0b..6f3dc98087dfe 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h
@@ -145,13 +145,15 @@ struct dpu_hw_mdp {
 
 /**
  * dpu_hw_mdptop_init - initializes the top driver for the passed config
+ * @dev:  Corresponding device for devres management
  * @cfg:  MDP TOP configuration from catalog
  * @addr: Mapped register io address of MDP
  * @m:    Pointer to mdss catalog data
  */
-struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg,
-		void __iomem *addr,
-		const struct dpu_mdss_cfg *m);
+struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev,
+				      const struct dpu_mdp_cfg *cfg,
+				      void __iomem *addr,
+				      const struct dpu_mdss_cfg *m);
 
 void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index a1753c54aaacb..5fcb256302b2f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -810,8 +810,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
 
 	dpu_kms->catalog = NULL;
 
-	if (dpu_kms->hw_mdp)
-		dpu_hw_mdp_destroy(dpu_kms->hw_mdp);
 	dpu_kms->hw_mdp = NULL;
 }
 
@@ -1090,7 +1088,8 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 
 	dpu_kms->rm_init = true;
 
-	dpu_kms->hw_mdp = dpu_hw_mdptop_init(dpu_kms->catalog->mdp,
+	dpu_kms->hw_mdp = dpu_hw_mdptop_init(dev,
+					     dpu_kms->catalog->mdp,
 					     dpu_kms->mmio,
 					     dpu_kms->catalog);
 	if (IS_ERR(dpu_kms->hw_mdp)) {
-- 
GitLab


From a106ed98af6848ef5810b12f5c9e2e0566f1d9c4 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:38 +0300
Subject: [PATCH 0807/2340] drm/msm/dpu: use devres-managed allocation for HW
 blocks

Use devm_kzalloc to create HW block structure. This allows us to remove
corresponding kfree and drop all dpu_hw_*_destroy() functions as well as
dpu_rm_destroy(), which becomes empty afterwards.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570041/
Link: https://lore.kernel.org/r/20231201211845.1026967-7-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c    | 19 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h    | 16 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c    | 12 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h    | 10 ++-
 .../gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c    |  7 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c   | 16 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h   | 12 +--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c   | 16 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h   | 13 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c     | 14 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h     | 12 +--
 .../gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c    | 14 ++-
 .../gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h    | 13 +--
 .../gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c   | 15 ++--
 .../gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h   | 14 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c   | 17 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h   | 16 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c     | 15 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h     | 13 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c       |  8 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h       |  1 -
 drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c        | 90 +++----------------
 drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h        | 11 +--
 23 files changed, 127 insertions(+), 247 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
index 86182c7346060..e7b680a151d6d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
@@ -4,6 +4,9 @@
  */
 
 #include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_ctl.h"
 #include "dpu_kms.h"
@@ -680,14 +683,15 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops,
 		ops->set_active_pipes = dpu_hw_ctl_set_fetch_pipe_active;
 };
 
-struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg,
-		void __iomem *addr,
-		u32 mixer_count,
-		const struct dpu_lm_cfg *mixer)
+struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev,
+				   const struct dpu_ctl_cfg *cfg,
+				   void __iomem *addr,
+				   u32 mixer_count,
+				   const struct dpu_lm_cfg *mixer)
 {
 	struct dpu_hw_ctl *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -702,8 +706,3 @@ struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx)
-{
-	kfree(ctx);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
index 1c242298ff2ee..279ebd8dfbff7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
@@ -274,20 +274,16 @@ static inline struct dpu_hw_ctl *to_dpu_hw_ctl(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_ctl_init() - Initializes the ctl_path hw driver object.
  * Should be called before accessing any ctl_path register.
+ * @dev:  Corresponding device for devres management
  * @cfg:  ctl_path catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  * @mixer_count: Number of mixers in @mixer
  * @mixer: Pointer to an array of Layer Mixers defined in the catalog
  */
-struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg,
-		void __iomem *addr,
-		u32 mixer_count,
-		const struct dpu_lm_cfg *mixer);
-
-/**
- * dpu_hw_ctl_destroy(): Destroys ctl driver context
- * should be called to free the context
- */
-void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx);
+struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev,
+				   const struct dpu_ctl_cfg *cfg,
+				   void __iomem *addr,
+				   u32 mixer_count,
+				   const struct dpu_lm_cfg *mixer);
 
 #endif /*_DPU_HW_CTL_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
index 509dbaa51d878..5e9aad1b2aa28 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
@@ -3,6 +3,8 @@
  * Copyright (c) 2020-2022, Linaro Limited
  */
 
+#include <drm/drm_managed.h>
+
 #include <drm/display/drm_dsc_helper.h>
 
 #include "dpu_kms.h"
@@ -188,12 +190,13 @@ static void _setup_dsc_ops(struct dpu_hw_dsc_ops *ops,
 		ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk;
 };
 
-struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg,
+struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev,
+				   const struct dpu_dsc_cfg *cfg,
 				   void __iomem *addr)
 {
 	struct dpu_hw_dsc *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -206,8 +209,3 @@ struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_dsc_destroy(struct dpu_hw_dsc *dsc)
-{
-	kfree(dsc);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h
index d5b597ab8c5ca..989c88d2449b6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h
@@ -64,20 +64,24 @@ struct dpu_hw_dsc {
 
 /**
  * dpu_hw_dsc_init() - Initializes the DSC hw driver object.
+ * @dev:  Corresponding device for devres management
  * @cfg:  DSC catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Return: Error code or allocated dpu_hw_dsc context
  */
-struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg,
-		void __iomem *addr);
+struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev,
+				   const struct dpu_dsc_cfg *cfg,
+				   void __iomem *addr);
 
 /**
  * dpu_hw_dsc_init_1_2() - initializes the v1.2 DSC hw driver object
+ * @dev:  Corresponding device for devres management
  * @cfg:  DSC catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Returns: Error code or allocated dpu_hw_dsc context
  */
-struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg,
+struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev,
+				       const struct dpu_dsc_cfg *cfg,
 				       void __iomem *addr);
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c
index 24fe1d98eb86a..ba193b0376fe8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c
@@ -4,6 +4,8 @@
  * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved
  */
 
+#include <drm/drm_managed.h>
+
 #include <drm/display/drm_dsc_helper.h>
 
 #include "dpu_kms.h"
@@ -367,12 +369,13 @@ static void _setup_dcs_ops_1_2(struct dpu_hw_dsc_ops *ops,
 	ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk_1_2;
 }
 
-struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg,
+struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev,
+				       const struct dpu_dsc_cfg *cfg,
 				       void __iomem *addr)
 {
 	struct dpu_hw_dsc *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
index 9419b2209af88..b1da88e2935f4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_lm.h"
@@ -68,15 +70,16 @@ static void _setup_dspp_ops(struct dpu_hw_dspp *c,
 		c->ops.setup_pcc = dpu_setup_dspp_pcc;
 }
 
-struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg,
-			void __iomem *addr)
+struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev,
+				     const struct dpu_dspp_cfg *cfg,
+				     void __iomem *addr)
 {
 	struct dpu_hw_dspp *c;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -90,10 +93,3 @@ struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp)
-{
-	kfree(dspp);
-}
-
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h
index bea9656813301..3b435690b6ccb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h
@@ -81,18 +81,14 @@ static inline struct dpu_hw_dspp *to_dpu_hw_dspp(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_dspp_init() - Initializes the DSPP hw driver object.
  * should be called once before accessing every DSPP.
+ * @dev:  Corresponding device for devres management
  * @cfg:  DSPP catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Return: pointer to structure or ERR_PTR
  */
-struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg,
-	void __iomem *addr);
-
-/**
- * dpu_hw_dspp_destroy(): Destroys DSPP driver context
- * @dspp: Pointer to DSPP driver context
- */
-void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp);
+struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev,
+				     const struct dpu_dspp_cfg *cfg,
+				     void __iomem *addr);
 
 #endif /*_DPU_HW_DSPP_H */
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index e8b8908d3e122..0b6a0a7dcc392 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -12,6 +12,8 @@
 
 #include <linux/iopoll.h>
 
+#include <drm/drm_managed.h>
+
 #define INTF_TIMING_ENGINE_EN           0x000
 #define INTF_CONFIG                     0x004
 #define INTF_HSYNC_CTL                  0x008
@@ -527,8 +529,10 @@ static void dpu_hw_intf_program_intf_cmd_cfg(struct dpu_hw_intf *ctx,
 	DPU_REG_WRITE(&ctx->hw, INTF_CONFIG2, intf_cfg2);
 }
 
-struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev,
+				     const struct dpu_intf_cfg *cfg,
+				     void __iomem *addr,
+				     const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_intf *c;
 
@@ -537,7 +541,7 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
 		return NULL;
 	}
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -581,9 +585,3 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_intf_destroy(struct dpu_hw_intf *intf)
-{
-	kfree(intf);
-}
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
index c539025c418bb..215401bb042eb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
@@ -131,17 +131,14 @@ struct dpu_hw_intf {
 /**
  * dpu_hw_intf_init() - Initializes the INTF driver for the passed
  * interface catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  interface catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  * @mdss_rev: dpu core's major and minor versions
  */
-struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_intf_destroy(): Destroys INTF driver context
- * @intf:   Pointer to INTF driver context
- */
-void dpu_hw_intf_destroy(struct dpu_hw_intf *intf);
+struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev,
+				     const struct dpu_intf_cfg *cfg,
+				     void __iomem *addr,
+				     const struct dpu_mdss_version *mdss_rev);
 
 #endif /*_DPU_HW_INTF_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
index d1c3bd8379ea9..25af52ab602f5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
@@ -4,6 +4,8 @@
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_kms.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hwio.h"
@@ -156,8 +158,9 @@ static void _setup_mixer_ops(struct dpu_hw_lm_ops *ops,
 	ops->collect_misr = dpu_hw_lm_collect_misr;
 }
 
-struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
-		void __iomem *addr)
+struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev,
+				    const struct dpu_lm_cfg *cfg,
+				    void __iomem *addr)
 {
 	struct dpu_hw_mixer *c;
 
@@ -166,7 +169,7 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
 		return NULL;
 	}
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -180,8 +183,3 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm)
-{
-	kfree(lm);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
index 36992d046a533..8835fd1064131 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
@@ -95,16 +95,12 @@ static inline struct dpu_hw_mixer *to_dpu_hw_mixer(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_lm_init() - Initializes the mixer hw driver object.
  * should be called once before accessing every mixer.
+ * @dev:  Corresponding device for devres management
  * @cfg:  mixer catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  */
-struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
-		void __iomem *addr);
-
-/**
- * dpu_hw_lm_destroy(): Destroys layer mixer driver context
- * @lm:   Pointer to LM driver context
- */
-void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm);
+struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev,
+				    const struct dpu_lm_cfg *cfg,
+				    void __iomem *addr);
 
 #endif /*_DPU_HW_LM_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c
index 90e0e05eff8d3..ddfa40a959cbd 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c
@@ -4,6 +4,8 @@
 
 #include <linux/iopoll.h>
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hw_mdss.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
@@ -37,12 +39,13 @@ static void _setup_merge_3d_ops(struct dpu_hw_merge_3d *c,
 	c->ops.setup_3d_mode = dpu_hw_merge_3d_setup_3d_mode;
 };
 
-struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg,
-		void __iomem *addr)
+struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev,
+					     const struct dpu_merge_3d_cfg *cfg,
+					     void __iomem *addr)
 {
 	struct dpu_hw_merge_3d *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -55,8 +58,3 @@ struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *hw)
-{
-	kfree(hw);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h
index 19cec5e887221..c192f02ec1abc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h
@@ -48,18 +48,13 @@ static inline struct dpu_hw_merge_3d *to_dpu_hw_merge_3d(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_merge_3d_init() - Initializes the merge_3d driver for the passed
  * merge3d catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  Pingpong catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Return: Error code or allocated dpu_hw_merge_3d context
  */
-struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg,
-		void __iomem *addr);
-
-/**
- * dpu_hw_merge_3d_destroy - destroys merge_3d driver context
- *	should be called to free the context
- * @pp:   Pointer to PP driver context returned by dpu_hw_merge_3d_init
- */
-void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *pp);
+struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev,
+					     const struct dpu_merge_3d_cfg *cfg,
+					     void __iomem *addr);
 
 #endif /*_DPU_HW_MERGE3D_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
index 057cac7f5d936..2db4c6fba37ac 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
@@ -4,6 +4,8 @@
 
 #include <linux/iopoll.h>
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hw_mdss.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
@@ -281,12 +283,14 @@ static int dpu_hw_pp_setup_dsc(struct dpu_hw_pingpong *pp)
 	return 0;
 }
 
-struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev,
+					     const struct dpu_pingpong_cfg *cfg,
+					     void __iomem *addr,
+					     const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_pingpong *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -317,8 +321,3 @@ struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp)
-{
-	kfree(pp);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
index 0d541ca5b0566..a48b69fd79a3b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
@@ -121,19 +121,15 @@ static inline struct dpu_hw_pingpong *to_dpu_hw_pingpong(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_pingpong_init() - initializes the pingpong driver for the passed
  * pingpong catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  Pingpong catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * @mdss_rev: dpu core's major and minor versions
  * Return: Error code or allocated dpu_hw_pingpong context
  */
-struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_pingpong_destroy - destroys pingpong driver context
- *	should be called to free the context
- * @pp:   Pointer to PP driver context returned by dpu_hw_pingpong_init
- */
-void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp);
+struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev,
+					     const struct dpu_pingpong_cfg *cfg,
+					     void __iomem *addr,
+					     const struct dpu_mdss_version *mdss_rev);
 
 #endif /*_DPU_HW_PINGPONG_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
index 26307d9fc81d0..0bf8a83e8df36 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
@@ -11,6 +11,7 @@
 #include "msm_mdss.h"
 
 #include <drm/drm_file.h>
+#include <drm/drm_managed.h>
 
 #define DPU_FETCH_CONFIG_RESET_VALUE   0x00000087
 
@@ -669,16 +670,18 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms,
 }
 #endif
 
-struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg,
-		void __iomem *addr, const struct msm_mdss_data *mdss_data,
-		const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev,
+				     const struct dpu_sspp_cfg *cfg,
+				     void __iomem *addr,
+				     const struct msm_mdss_data *mdss_data,
+				     const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_sspp *hw_pipe;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	hw_pipe = kzalloc(sizeof(*hw_pipe), GFP_KERNEL);
+	hw_pipe = drmm_kzalloc(dev, sizeof(*hw_pipe), GFP_KERNEL);
 	if (!hw_pipe)
 		return ERR_PTR(-ENOMEM);
 
@@ -693,9 +696,3 @@ struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg,
 
 	return hw_pipe;
 }
-
-void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx)
-{
-	kfree(ctx);
-}
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
index 28e7d53bd1918..b7dc52312c392 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
@@ -318,21 +318,17 @@ struct dpu_kms;
 /**
  * dpu_hw_sspp_init() - Initializes the sspp hw driver object.
  * Should be called once before accessing every pipe.
+ * @dev:  Corresponding device for devres management
  * @cfg:  Pipe catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * @mdss_data: UBWC / MDSS configuration data
  * @mdss_rev: dpu core's major and minor versions
  */
-struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg,
-		void __iomem *addr, const struct msm_mdss_data *mdss_data,
-		const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_sspp_destroy(): Destroys SSPP driver context
- * should be called during Hw pipe cleanup.
- * @ctx:  Pointer to SSPP driver context returned by dpu_hw_sspp_init
- */
-void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx);
+struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev,
+				     const struct dpu_sspp_cfg *cfg,
+				     void __iomem *addr,
+				     const struct msm_mdss_data *mdss_data,
+				     const struct dpu_mdss_version *mdss_rev);
 
 int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms,
 			      struct dentry *entry);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
index 9668fb97c0478..ed0e80616129a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
@@ -3,6 +3,8 @@
   * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved
   */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hw_mdss.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
@@ -208,15 +210,17 @@ static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
 		ops->setup_clk_force_ctrl = dpu_hw_wb_setup_clk_force_ctrl;
 }
 
-struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev,
+				 const struct dpu_wb_cfg *cfg,
+				 void __iomem *addr,
+				 const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_wb *c;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -230,8 +234,3 @@ struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
-{
-	kfree(hw_wb);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
index 88792f450a927..e671796ea379c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
@@ -76,18 +76,15 @@ struct dpu_hw_wb {
 
 /**
  * dpu_hw_wb_init() - Initializes the writeback hw driver object.
+ * @dev:  Corresponding device for devres management
  * @cfg:  wb_path catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  * @mdss_rev: dpu core's major and minor versions
  * Return: Error code or allocated dpu_hw_wb context
  */
-struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
- * @hw_wb:  Pointer to writeback hw driver object
- */
-void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
+struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev,
+				 const struct dpu_wb_cfg *cfg,
+				 void __iomem *addr,
+				 const struct dpu_mdss_version *mdss_rev);
 
 #endif /*_DPU_HW_WB_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 5fcb256302b2f..dc24fe4bb3b0c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -804,10 +804,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
 		dpu_kms->hw_vbif[i] = NULL;
 	}
 
-	if (dpu_kms->rm_init)
-		dpu_rm_destroy(&dpu_kms->rm);
-	dpu_kms->rm_init = false;
-
 	dpu_kms->catalog = NULL;
 
 	dpu_kms->hw_mdp = NULL;
@@ -1080,14 +1076,12 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		goto err_pm_put;
 	}
 
-	rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio);
+	rc = dpu_rm_init(dev, &dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio);
 	if (rc) {
 		DPU_ERROR("rm init failed: %d\n", rc);
 		goto err_pm_put;
 	}
 
-	dpu_kms->rm_init = true;
-
 	dpu_kms->hw_mdp = dpu_hw_mdptop_init(dev,
 					     dpu_kms->catalog->mdp,
 					     dpu_kms->mmio,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index b6f53ca6e9628..df6271017b800 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -88,7 +88,6 @@ struct dpu_kms {
 	struct drm_private_obj global_state;
 
 	struct dpu_rm rm;
-	bool rm_init;
 
 	struct dpu_hw_vbif *hw_vbif[VBIF_MAX];
 	struct dpu_hw_mdp *hw_mdp;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index 8759466e2f379..0bb28cf4a6cbc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -34,72 +34,8 @@ struct dpu_rm_requirements {
 	struct msm_display_topology topology;
 };
 
-int dpu_rm_destroy(struct dpu_rm *rm)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) {
-		struct dpu_hw_dspp *hw;
-
-		if (rm->dspp_blks[i]) {
-			hw = to_dpu_hw_dspp(rm->dspp_blks[i]);
-			dpu_hw_dspp_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) {
-		struct dpu_hw_pingpong *hw;
-
-		if (rm->pingpong_blks[i]) {
-			hw = to_dpu_hw_pingpong(rm->pingpong_blks[i]);
-			dpu_hw_pingpong_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->merge_3d_blks); i++) {
-		struct dpu_hw_merge_3d *hw;
-
-		if (rm->merge_3d_blks[i]) {
-			hw = to_dpu_hw_merge_3d(rm->merge_3d_blks[i]);
-			dpu_hw_merge_3d_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->mixer_blks); i++) {
-		struct dpu_hw_mixer *hw;
-
-		if (rm->mixer_blks[i]) {
-			hw = to_dpu_hw_mixer(rm->mixer_blks[i]);
-			dpu_hw_lm_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->ctl_blks); i++) {
-		struct dpu_hw_ctl *hw;
-
-		if (rm->ctl_blks[i]) {
-			hw = to_dpu_hw_ctl(rm->ctl_blks[i]);
-			dpu_hw_ctl_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->hw_intf); i++)
-		dpu_hw_intf_destroy(rm->hw_intf[i]);
-
-	for (i = 0; i < ARRAY_SIZE(rm->dsc_blks); i++) {
-		struct dpu_hw_dsc *hw;
-
-		if (rm->dsc_blks[i]) {
-			hw = to_dpu_hw_dsc(rm->dsc_blks[i]);
-			dpu_hw_dsc_destroy(hw);
-		}
-	}
-
-	for (i = 0; i < ARRAY_SIZE(rm->hw_wb); i++)
-		dpu_hw_wb_destroy(rm->hw_wb[i]);
-
-	for (i = 0; i < ARRAY_SIZE(rm->hw_sspp); i++)
-		dpu_hw_sspp_destroy(rm->hw_sspp[i]);
-
-	return 0;
-}
-
-int dpu_rm_init(struct dpu_rm *rm,
+int dpu_rm_init(struct drm_device *dev,
+		struct dpu_rm *rm,
 		const struct dpu_mdss_cfg *cat,
 		const struct msm_mdss_data *mdss_data,
 		void __iomem *mmio)
@@ -119,7 +55,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_mixer *hw;
 		const struct dpu_lm_cfg *lm = &cat->mixer[i];
 
-		hw = dpu_hw_lm_init(lm, mmio);
+		hw = dpu_hw_lm_init(dev, lm, mmio);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed lm object creation: err %d\n", rc);
@@ -132,7 +68,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_merge_3d *hw;
 		const struct dpu_merge_3d_cfg *merge_3d = &cat->merge_3d[i];
 
-		hw = dpu_hw_merge_3d_init(merge_3d, mmio);
+		hw = dpu_hw_merge_3d_init(dev, merge_3d, mmio);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed merge_3d object creation: err %d\n",
@@ -146,7 +82,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_pingpong *hw;
 		const struct dpu_pingpong_cfg *pp = &cat->pingpong[i];
 
-		hw = dpu_hw_pingpong_init(pp, mmio, cat->mdss_ver);
+		hw = dpu_hw_pingpong_init(dev, pp, mmio, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed pingpong object creation: err %d\n",
@@ -162,7 +98,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_intf *hw;
 		const struct dpu_intf_cfg *intf = &cat->intf[i];
 
-		hw = dpu_hw_intf_init(intf, mmio, cat->mdss_ver);
+		hw = dpu_hw_intf_init(dev, intf, mmio, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed intf object creation: err %d\n", rc);
@@ -175,7 +111,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_wb *hw;
 		const struct dpu_wb_cfg *wb = &cat->wb[i];
 
-		hw = dpu_hw_wb_init(wb, mmio, cat->mdss_ver);
+		hw = dpu_hw_wb_init(dev, wb, mmio, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed wb object creation: err %d\n", rc);
@@ -188,7 +124,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_ctl *hw;
 		const struct dpu_ctl_cfg *ctl = &cat->ctl[i];
 
-		hw = dpu_hw_ctl_init(ctl, mmio, cat->mixer_count, cat->mixer);
+		hw = dpu_hw_ctl_init(dev, ctl, mmio, cat->mixer_count, cat->mixer);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed ctl object creation: err %d\n", rc);
@@ -201,7 +137,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_dspp *hw;
 		const struct dpu_dspp_cfg *dspp = &cat->dspp[i];
 
-		hw = dpu_hw_dspp_init(dspp, mmio);
+		hw = dpu_hw_dspp_init(dev, dspp, mmio);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed dspp object creation: err %d\n", rc);
@@ -215,9 +151,9 @@ int dpu_rm_init(struct dpu_rm *rm,
 		const struct dpu_dsc_cfg *dsc = &cat->dsc[i];
 
 		if (test_bit(DPU_DSC_HW_REV_1_2, &dsc->features))
-			hw = dpu_hw_dsc_init_1_2(dsc, mmio);
+			hw = dpu_hw_dsc_init_1_2(dev, dsc, mmio);
 		else
-			hw = dpu_hw_dsc_init(dsc, mmio);
+			hw = dpu_hw_dsc_init(dev, dsc, mmio);
 
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
@@ -231,7 +167,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_sspp *hw;
 		const struct dpu_sspp_cfg *sspp = &cat->sspp[i];
 
-		hw = dpu_hw_sspp_init(sspp, mmio, mdss_data, cat->mdss_ver);
+		hw = dpu_hw_sspp_init(dev, sspp, mmio, mdss_data, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed sspp object creation: err %d\n", rc);
@@ -243,8 +179,6 @@ int dpu_rm_init(struct dpu_rm *rm,
 	return 0;
 
 fail:
-	dpu_rm_destroy(rm);
-
 	return rc ? rc : -EFAULT;
 }
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
index 2b551566cbf48..36752d837be4c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
@@ -38,24 +38,19 @@ struct dpu_rm {
 /**
  * dpu_rm_init - Read hardware catalog and create reservation tracking objects
  *	for all HW blocks.
+ * @dev:  Corresponding device for devres management
  * @rm: DPU Resource Manager handle
  * @cat: Pointer to hardware catalog
  * @mdss_data: Pointer to MDSS / UBWC configuration
  * @mmio: mapped register io address of MDP
  * @Return: 0 on Success otherwise -ERROR
  */
-int dpu_rm_init(struct dpu_rm *rm,
+int dpu_rm_init(struct drm_device *dev,
+		struct dpu_rm *rm,
 		const struct dpu_mdss_cfg *cat,
 		const struct msm_mdss_data *mdss_data,
 		void __iomem *mmio);
 
-/**
- * dpu_rm_destroy - Free all memory allocated by dpu_rm_init
- * @rm: DPU Resource Manager handle
- * @Return: 0 on Success otherwise -ERROR
- */
-int dpu_rm_destroy(struct dpu_rm *rm);
-
 /**
  * dpu_rm_reserve - Given a CRTC->Encoder->Connector display chain, analyze
  *	the use connections and user requirements, specified through related
-- 
GitLab


From b0311c1c4e069e2e15f2e2a32bccbcf628be42ec Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:39 +0300
Subject: [PATCH 0808/2340] drm/msm/dpu: drop unused dpu_plane::lock

The field dpu_plane::lock was never used for protecting any kind of
data. Drop it now.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570043/
Link: https://lore.kernel.org/r/20231201211845.1026967-8-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 4d7f3894f4d7b..97d2211de0e3a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -78,8 +78,6 @@ static const uint32_t qcom_compressed_supported_formats[] = {
 struct dpu_plane {
 	struct drm_plane base;
 
-	struct mutex lock;
-
 	enum dpu_sspp pipe;
 
 	uint32_t color_fill;
@@ -1230,8 +1228,6 @@ static void dpu_plane_destroy(struct drm_plane *plane)
 		if (pstate->r_pipe.sspp)
 			_dpu_plane_set_qos_ctrl(plane, &pstate->r_pipe, false);
 
-		mutex_destroy(&pdpu->lock);
-
 		/* this will destroy the states as well */
 		drm_plane_cleanup(plane);
 
@@ -1491,8 +1487,6 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 	/* success! finalize initialization */
 	drm_plane_helper_add(plane, &dpu_plane_helper_funcs);
 
-	mutex_init(&pdpu->lock);
-
 	DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name,
 					pipe, plane->base.id);
 	return plane;
-- 
GitLab


From bcc54a4c063a823cb75b71116762673b380a1ef6 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:40 +0300
Subject: [PATCH 0809/2340] drm/msm/dpu: remove QoS teardown on plane
 destruction

There is little point in disabling QoS on plane destruction: it happens
during DPU device destruction process, after which there will be no
running planes.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570049/
Link: https://lore.kernel.org/r/20231201211845.1026967-9-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 97d2211de0e3a..dae7d57f58810 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -1217,17 +1217,10 @@ static void dpu_plane_atomic_update(struct drm_plane *plane,
 static void dpu_plane_destroy(struct drm_plane *plane)
 {
 	struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL;
-	struct dpu_plane_state *pstate;
 
 	DPU_DEBUG_PLANE(pdpu, "\n");
 
 	if (pdpu) {
-		pstate = to_dpu_plane_state(plane->state);
-		_dpu_plane_set_qos_ctrl(plane, &pstate->pipe, false);
-
-		if (pstate->r_pipe.sspp)
-			_dpu_plane_set_qos_ctrl(plane, &pstate->r_pipe, false);
-
 		/* this will destroy the states as well */
 		drm_plane_cleanup(plane);
 
-- 
GitLab


From 0e00f9af95bbce1f1d2f193d08e80b446329dd57 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:41 +0300
Subject: [PATCH 0810/2340] drm/msm/dpu: use drmm-managed allocation for
 dpu_plane

Change struct dpu_plane allocation to use drmm_universal_plane_alloc().
This removes the need to perform any actions on plane destruction.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570042/
Link: https://lore.kernel.org/r/20231201211845.1026967-10-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 46 +++++------------------
 1 file changed, 10 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index dae7d57f58810..3235ab1325400 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -1214,20 +1214,6 @@ static void dpu_plane_atomic_update(struct drm_plane *plane,
 	}
 }
 
-static void dpu_plane_destroy(struct drm_plane *plane)
-{
-	struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL;
-
-	DPU_DEBUG_PLANE(pdpu, "\n");
-
-	if (pdpu) {
-		/* this will destroy the states as well */
-		drm_plane_cleanup(plane);
-
-		kfree(pdpu);
-	}
-}
-
 static void dpu_plane_destroy_state(struct drm_plane *plane,
 		struct drm_plane_state *state)
 {
@@ -1397,7 +1383,6 @@ static bool dpu_plane_format_mod_supported(struct drm_plane *plane,
 static const struct drm_plane_funcs dpu_plane_funcs = {
 		.update_plane = drm_atomic_helper_update_plane,
 		.disable_plane = drm_atomic_helper_disable_plane,
-		.destroy = dpu_plane_destroy,
 		.reset = dpu_plane_reset,
 		.atomic_duplicate_state = dpu_plane_duplicate_state,
 		.atomic_destroy_state = dpu_plane_destroy_state,
@@ -1425,35 +1410,28 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 	struct dpu_hw_sspp *pipe_hw;
 	uint32_t num_formats;
 	uint32_t supported_rotations;
-	int ret = -EINVAL;
-
-	/* create and zero local structure */
-	pdpu = kzalloc(sizeof(*pdpu), GFP_KERNEL);
-	if (!pdpu) {
-		DPU_ERROR("[%u]failed to allocate local plane struct\n", pipe);
-		ret = -ENOMEM;
-		return ERR_PTR(ret);
-	}
-
-	/* cache local stuff for later */
-	plane = &pdpu->base;
-	pdpu->pipe = pipe;
+	int ret;
 
 	/* initialize underlying h/w driver */
 	pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe);
 	if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) {
 		DPU_ERROR("[%u]SSPP is invalid\n", pipe);
-		goto clean_plane;
+		return ERR_PTR(-EINVAL);
 	}
 
 	format_list = pipe_hw->cap->sblk->format_list;
 	num_formats = pipe_hw->cap->sblk->num_formats;
 
-	ret = drm_universal_plane_init(dev, plane, 0xff, &dpu_plane_funcs,
+	pdpu = drmm_universal_plane_alloc(dev, struct dpu_plane, base,
+				0xff, &dpu_plane_funcs,
 				format_list, num_formats,
 				supported_format_modifiers, type, NULL);
-	if (ret)
-		goto clean_plane;
+	if (IS_ERR(pdpu))
+		return ERR_CAST(pdpu);
+
+	/* cache local stuff for later */
+	plane = &pdpu->base;
+	pdpu->pipe = pipe;
 
 	pdpu->catalog = kms->catalog;
 
@@ -1483,8 +1461,4 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 	DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name,
 					pipe, plane->base.id);
 	return plane;
-
-clean_plane:
-	kfree(pdpu);
-	return ERR_PTR(ret);
 }
-- 
GitLab


From 3637af92de2b1f51a4ecd5e21ada2d62d3a5a6b5 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:42 +0300
Subject: [PATCH 0811/2340] drm/msm/dpu: use drmm-managed allocation for
 dpu_crtc

Change struct dpu_crtc allocation to use drmm_crtc_alloc_with_planes().
This removes the need to perform any actions on CRTC destruction.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570054/
Link: https://lore.kernel.org/r/20231201211845.1026967-11-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 25 +++++++-----------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index 3c475f8042b03..a798c10036e1e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -51,17 +51,6 @@ static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc)
 	return to_dpu_kms(priv->kms);
 }
 
-static void dpu_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
-
-	if (!crtc)
-		return;
-
-	drm_crtc_cleanup(crtc);
-	kfree(dpu_crtc);
-}
-
 static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -1435,7 +1424,6 @@ static int dpu_crtc_late_register(struct drm_crtc *crtc)
 
 static const struct drm_crtc_funcs dpu_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = dpu_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = dpu_crtc_reset,
 	.atomic_duplicate_state = dpu_crtc_duplicate_state,
@@ -1469,9 +1457,13 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
 	struct dpu_crtc *dpu_crtc;
 	int i, ret;
 
-	dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL);
-	if (!dpu_crtc)
-		return ERR_PTR(-ENOMEM);
+	dpu_crtc = drmm_crtc_alloc_with_planes(dev, struct dpu_crtc, base,
+					       plane, cursor,
+					       &dpu_crtc_funcs,
+					       NULL);
+
+	if (IS_ERR(dpu_crtc))
+		return ERR_CAST(dpu_crtc);
 
 	crtc = &dpu_crtc->base;
 	crtc->dev = dev;
@@ -1491,9 +1483,6 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
 				dpu_crtc_frame_event_work);
 	}
 
-	drm_crtc_init_with_planes(dev, crtc, plane, cursor, &dpu_crtc_funcs,
-				NULL);
-
 	drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs);
 
 	if (dpu_kms->catalog->dspp_count)
-- 
GitLab


From 73169b45e1ed296b4357a694f5fa586ac0643ac1 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:43 +0300
Subject: [PATCH 0812/2340] drm/msm/dpu: use drmm-managed allocation for
 dpu_encoder_phys

Change struct allocation of encoder's phys backend data to use
drmm_kzalloc(). This removes the need to perform any actions on encoder
destruction.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570051/
Link: https://lore.kernel.org/r/20231201211845.1026967-12-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c   |  9 ++++----
 .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  8 ++++---
 .../drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c  | 15 ++++---------
 .../drm/msm/disp/dpu1/dpu_encoder_phys_vid.c  | 13 ++++--------
 .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 21 ++++---------------
 5 files changed, 22 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 1c3560e1c6252..cd2e153860b52 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2179,6 +2179,7 @@ static void dpu_encoder_early_unregister(struct drm_encoder *encoder)
 }
 
 static int dpu_encoder_virt_add_phys_encs(
+		struct drm_device *dev,
 		struct msm_display_info *disp_info,
 		struct dpu_encoder_virt *dpu_enc,
 		struct dpu_enc_phys_init_params *params)
@@ -2200,7 +2201,7 @@ static int dpu_encoder_virt_add_phys_encs(
 
 
 	if (disp_info->intf_type == INTF_WB) {
-		enc = dpu_encoder_phys_wb_init(params);
+		enc = dpu_encoder_phys_wb_init(dev, params);
 
 		if (IS_ERR(enc)) {
 			DPU_ERROR_ENC(dpu_enc, "failed to init wb enc: %ld\n",
@@ -2211,7 +2212,7 @@ static int dpu_encoder_virt_add_phys_encs(
 		dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc;
 		++dpu_enc->num_phys_encs;
 	} else if (disp_info->is_cmd_mode) {
-		enc = dpu_encoder_phys_cmd_init(params);
+		enc = dpu_encoder_phys_cmd_init(dev, params);
 
 		if (IS_ERR(enc)) {
 			DPU_ERROR_ENC(dpu_enc, "failed to init cmd enc: %ld\n",
@@ -2222,7 +2223,7 @@ static int dpu_encoder_virt_add_phys_encs(
 		dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc;
 		++dpu_enc->num_phys_encs;
 	} else {
-		enc = dpu_encoder_phys_vid_init(params);
+		enc = dpu_encoder_phys_vid_init(dev, params);
 
 		if (IS_ERR(enc)) {
 			DPU_ERROR_ENC(dpu_enc, "failed to init vid enc: %ld\n",
@@ -2311,7 +2312,7 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc,
 			break;
 		}
 
-		ret = dpu_encoder_virt_add_phys_encs(disp_info,
+		ret = dpu_encoder_virt_add_phys_encs(dpu_kms->dev, disp_info,
 				dpu_enc, &phys_params);
 		if (ret) {
 			DPU_ERROR_ENC(dpu_enc, "failed to add phys encs\n");
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index 6f04c3d56e77c..5dc53b65040e2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -281,22 +281,24 @@ struct dpu_encoder_wait_info {
  * @p:	Pointer to init params structure
  * Return: Error code or newly allocated encoder
  */
-struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
+struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p);
 
 /**
  * dpu_encoder_phys_cmd_init - Construct a new command mode physical encoder
+ * @dev:  Corresponding device for devres management
  * @p:	Pointer to init params structure
  * Return: Error code or newly allocated encoder
  */
-struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
+struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p);
 
 /**
  * dpu_encoder_phys_wb_init - initialize writeback encoder
+ * @dev:  Corresponding device for devres management
  * @init:	Pointer to init info structure with initialization params
  */
-struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
+struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p);
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index be185fe69793b..d24f45d1f654f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -13,6 +13,8 @@
 #include "dpu_trace.h"
 #include "disp/msm_disp_snapshot.h"
 
+#include <drm/drm_managed.h>
+
 #define DPU_DEBUG_CMDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \
 		(e) && (e)->base.parent ? \
 		(e)->base.parent->base.id : -1, \
@@ -558,14 +560,6 @@ static void dpu_encoder_phys_cmd_disable(struct dpu_encoder_phys *phys_enc)
 	phys_enc->enable_state = DPU_ENC_DISABLED;
 }
 
-static void dpu_encoder_phys_cmd_destroy(struct dpu_encoder_phys *phys_enc)
-{
-	struct dpu_encoder_phys_cmd *cmd_enc =
-		to_dpu_encoder_phys_cmd(phys_enc);
-
-	kfree(cmd_enc);
-}
-
 static void dpu_encoder_phys_cmd_prepare_for_kickoff(
 		struct dpu_encoder_phys *phys_enc)
 {
@@ -731,7 +725,6 @@ static void dpu_encoder_phys_cmd_init_ops(
 	ops->atomic_mode_set = dpu_encoder_phys_cmd_atomic_mode_set;
 	ops->enable = dpu_encoder_phys_cmd_enable;
 	ops->disable = dpu_encoder_phys_cmd_disable;
-	ops->destroy = dpu_encoder_phys_cmd_destroy;
 	ops->control_vblank_irq = dpu_encoder_phys_cmd_control_vblank_irq;
 	ops->wait_for_commit_done = dpu_encoder_phys_cmd_wait_for_commit_done;
 	ops->prepare_for_kickoff = dpu_encoder_phys_cmd_prepare_for_kickoff;
@@ -746,7 +739,7 @@ static void dpu_encoder_phys_cmd_init_ops(
 	ops->get_line_count = dpu_encoder_phys_cmd_get_line_count;
 }
 
-struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
+struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p)
 {
 	struct dpu_encoder_phys *phys_enc = NULL;
@@ -754,7 +747,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
 
 	DPU_DEBUG("intf\n");
 
-	cmd_enc = kzalloc(sizeof(*cmd_enc), GFP_KERNEL);
+	cmd_enc = drmm_kzalloc(dev, sizeof(*cmd_enc), GFP_KERNEL);
 	if (!cmd_enc) {
 		DPU_ERROR("failed to allocate\n");
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index a01fda7118835..69bc1b2e514cc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -11,6 +11,8 @@
 #include "dpu_trace.h"
 #include "disp/msm_disp_snapshot.h"
 
+#include <drm/drm_managed.h>
+
 #define DPU_DEBUG_VIDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \
 		(e) && (e)->parent ? \
 		(e)->parent->base.id : -1, \
@@ -438,12 +440,6 @@ skip_flush:
 		phys_enc->enable_state = DPU_ENC_ENABLING;
 }
 
-static void dpu_encoder_phys_vid_destroy(struct dpu_encoder_phys *phys_enc)
-{
-	DPU_DEBUG_VIDENC(phys_enc, "\n");
-	kfree(phys_enc);
-}
-
 static int dpu_encoder_phys_vid_wait_for_vblank(
 		struct dpu_encoder_phys *phys_enc)
 {
@@ -681,7 +677,6 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->atomic_mode_set = dpu_encoder_phys_vid_atomic_mode_set;
 	ops->enable = dpu_encoder_phys_vid_enable;
 	ops->disable = dpu_encoder_phys_vid_disable;
-	ops->destroy = dpu_encoder_phys_vid_destroy;
 	ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq;
 	ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done;
 	ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank;
@@ -694,7 +689,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->get_frame_count = dpu_encoder_phys_vid_get_frame_count;
 }
 
-struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
+struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p)
 {
 	struct dpu_encoder_phys *phys_enc = NULL;
@@ -704,7 +699,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
 		return ERR_PTR(-EINVAL);
 	}
 
-	phys_enc = kzalloc(sizeof(*phys_enc), GFP_KERNEL);
+	phys_enc = drmm_kzalloc(dev, sizeof(*phys_enc), GFP_KERNEL);
 	if (!phys_enc) {
 		DPU_ERROR("failed to create encoder due to memory allocation error\n");
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 0b6a761d68b79..bb94909caa254 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -8,6 +8,7 @@
 #include <linux/debugfs.h>
 
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_managed.h>
 
 #include "dpu_encoder_phys.h"
 #include "dpu_formats.h"
@@ -580,20 +581,6 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc)
 	phys_enc->enable_state = DPU_ENC_DISABLED;
 }
 
-/**
- * dpu_encoder_phys_wb_destroy - destroy writeback encoder
- * @phys_enc:	Pointer to physical encoder
- */
-static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc)
-{
-	if (!phys_enc)
-		return;
-
-	DPU_DEBUG("[wb:%d]\n", phys_enc->hw_wb->idx - WB_0);
-
-	kfree(phys_enc);
-}
-
 static void dpu_encoder_phys_wb_prepare_wb_job(struct dpu_encoder_phys *phys_enc,
 		struct drm_writeback_job *job)
 {
@@ -689,7 +676,6 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->atomic_mode_set = dpu_encoder_phys_wb_atomic_mode_set;
 	ops->enable = dpu_encoder_phys_wb_enable;
 	ops->disable = dpu_encoder_phys_wb_disable;
-	ops->destroy = dpu_encoder_phys_wb_destroy;
 	ops->atomic_check = dpu_encoder_phys_wb_atomic_check;
 	ops->wait_for_commit_done = dpu_encoder_phys_wb_wait_for_commit_done;
 	ops->prepare_for_kickoff = dpu_encoder_phys_wb_prepare_for_kickoff;
@@ -705,9 +691,10 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops)
 
 /**
  * dpu_encoder_phys_wb_init - initialize writeback encoder
+ * @dev:  Corresponding device for devres management
  * @p:	Pointer to init info structure with initialization params
  */
-struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
+struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p)
 {
 	struct dpu_encoder_phys *phys_enc = NULL;
@@ -720,7 +707,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
 		return ERR_PTR(-EINVAL);
 	}
 
-	wb_enc = kzalloc(sizeof(*wb_enc), GFP_KERNEL);
+	wb_enc = drmm_kzalloc(dev, sizeof(*wb_enc), GFP_KERNEL);
 	if (!wb_enc) {
 		DPU_ERROR("failed to allocate wb phys_enc enc\n");
 		return ERR_PTR(-ENOMEM);
-- 
GitLab


From 3285f4acb23c6ea611583fe32c4ad231daf15a08 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:44 +0300
Subject: [PATCH 0813/2340] drm/msm/dpu: drop dpu_encoder_phys_ops::destroy

Drop the dpu_encoder_phys_ops' destroy() callback. No phys backend
implements it anymore, so it is useless.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570058/
Link: https://lore.kernel.org/r/20231201211845.1026967-13-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c    | 18 ------------------
 .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h   |  2 --
 2 files changed, 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index cd2e153860b52..6167aa664b30e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -455,24 +455,6 @@ static void dpu_encoder_destroy(struct drm_encoder *drm_enc)
 	dpu_enc = to_dpu_encoder_virt(drm_enc);
 	DPU_DEBUG_ENC(dpu_enc, "\n");
 
-	mutex_lock(&dpu_enc->enc_lock);
-
-	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
-		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
-
-		if (phys->ops.destroy) {
-			phys->ops.destroy(phys);
-			--dpu_enc->num_phys_encs;
-			dpu_enc->phys_encs[i] = NULL;
-		}
-	}
-
-	if (dpu_enc->num_phys_encs)
-		DPU_ERROR_ENC(dpu_enc, "expected 0 num_phys_encs not %d\n",
-				dpu_enc->num_phys_encs);
-	dpu_enc->num_phys_encs = 0;
-	mutex_unlock(&dpu_enc->enc_lock);
-
 	drm_encoder_cleanup(drm_enc);
 	mutex_destroy(&dpu_enc->enc_lock);
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index 5dc53b65040e2..b6b48e2c63efa 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -72,7 +72,6 @@ struct dpu_encoder_phys;
  * @enable:			DRM Call. Enable a DRM mode.
  * @disable:			DRM Call. Disable mode.
  * @atomic_check:		DRM Call. Atomic check new DRM state.
- * @destroy:			DRM Call. Destroy and release resources.
  * @control_vblank_irq		Register/Deregister for VBLANK IRQ
  * @wait_for_commit_done:	Wait for hardware to have flushed the
  *				current pending frames to hardware
@@ -102,7 +101,6 @@ struct dpu_encoder_phys_ops {
 	int (*atomic_check)(struct dpu_encoder_phys *encoder,
 			    struct drm_crtc_state *crtc_state,
 			    struct drm_connector_state *conn_state);
-	void (*destroy)(struct dpu_encoder_phys *encoder);
 	int (*control_vblank_irq)(struct dpu_encoder_phys *enc, bool enable);
 	int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc);
 	int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc);
-- 
GitLab


From cd42c56d9c0bb6007939408b4fbc62bbeccc9037 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 2 Dec 2023 00:18:45 +0300
Subject: [PATCH 0814/2340] drm/msm/dpu: use drmm-managed allocation for
 dpu_encoder_virt

It is incorrect to use devm-managed memory allocations for DRM data
structures exposed to userspace. They should use drmm_ allocations.
Change struct dpu_encoder allocation to use drmm_encoder_alloc(). This
removes the need to perform any actions on encoder destruction.

Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570053/
Link: https://lore.kernel.org/r/20231201211845.1026967-14-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 50 +++++----------------
 1 file changed, 10 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 6167aa664b30e..aa1a1646b322a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -442,23 +442,6 @@ int dpu_encoder_get_linecount(struct drm_encoder *drm_enc)
 	return linecount;
 }
 
-static void dpu_encoder_destroy(struct drm_encoder *drm_enc)
-{
-	struct dpu_encoder_virt *dpu_enc = NULL;
-	int i = 0;
-
-	if (!drm_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
-
-	dpu_enc = to_dpu_encoder_virt(drm_enc);
-	DPU_DEBUG_ENC(dpu_enc, "\n");
-
-	drm_encoder_cleanup(drm_enc);
-	mutex_destroy(&dpu_enc->enc_lock);
-}
-
 void dpu_encoder_helper_split_config(
 		struct dpu_encoder_phys *phys_enc,
 		enum dpu_intf interface)
@@ -2346,7 +2329,6 @@ static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = {
 };
 
 static const struct drm_encoder_funcs dpu_encoder_funcs = {
-		.destroy = dpu_encoder_destroy,
 		.late_register = dpu_encoder_late_register,
 		.early_unregister = dpu_encoder_early_unregister,
 };
@@ -2357,20 +2339,13 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
-	struct drm_encoder *drm_enc = NULL;
-	struct dpu_encoder_virt *dpu_enc = NULL;
-	int ret = 0;
+	struct dpu_encoder_virt *dpu_enc;
+	int ret;
 
-	dpu_enc = devm_kzalloc(dev->dev, sizeof(*dpu_enc), GFP_KERNEL);
-	if (!dpu_enc)
-		return ERR_PTR(-ENOMEM);
-
-	ret = drm_encoder_init(dev, &dpu_enc->base, &dpu_encoder_funcs,
-			       drm_enc_mode, NULL);
-	if (ret) {
-		devm_kfree(dev->dev, dpu_enc);
-		return ERR_PTR(ret);
-	}
+	dpu_enc = drmm_encoder_alloc(dev, struct dpu_encoder_virt, base,
+				     &dpu_encoder_funcs, drm_enc_mode, NULL);
+	if (IS_ERR(dpu_enc))
+		return ERR_CAST(dpu_enc);
 
 	drm_encoder_helper_add(&dpu_enc->base, &dpu_encoder_helper_funcs);
 
@@ -2380,8 +2355,10 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 	mutex_init(&dpu_enc->rc_lock);
 
 	ret = dpu_encoder_setup_display(dpu_enc, dpu_kms, disp_info);
-	if (ret)
-		goto fail;
+	if (ret) {
+		DPU_ERROR("failed to setup encoder\n");
+		return ERR_PTR(-ENOMEM);
+	}
 
 	atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
 	atomic_set(&dpu_enc->frame_done_timeout_cnt, 0);
@@ -2397,13 +2374,6 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 	DPU_DEBUG_ENC(dpu_enc, "created\n");
 
 	return &dpu_enc->base;
-
-fail:
-	DPU_ERROR("failed to create encoder\n");
-	if (drm_enc)
-		dpu_encoder_destroy(drm_enc);
-
-	return ERR_PTR(ret);
 }
 
 int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
-- 
GitLab


From 9cf5ca1f485cae406968947a92bf304603999fa1 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:31 +0100
Subject: [PATCH 0815/2340] drm: Fix TODO list mentioning non-KMS drivers

Non-KMS drivers have been removed from DRM. Update the TODO list
accordingly.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: a276afc19eec ("drm: Remove some obsolete drm pciids(tdfx, mga, i810, savage, r128, sis, via)")
Cc: Cai Huoqing <cai.huoqing@linux.dev>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v6.3+
Cc: linux-doc@vger.kernel.org
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-3-tzimmermann@suse.de
---
 Documentation/gpu/todo.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 503d57c752159..41a264bf84ce0 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -337,8 +337,8 @@ connector register/unregister fixes
 
 Level: Intermediate
 
-Remove load/unload callbacks from all non-DRIVER_LEGACY drivers
----------------------------------------------------------------
+Remove load/unload callbacks
+----------------------------
 
 The load/unload callbacks in struct &drm_driver are very much midlayers, plus
 for historical reasons they get the ordering wrong (and we can't fix that)
@@ -347,8 +347,7 @@ between setting up the &drm_driver structure and calling drm_dev_register().
 - Rework drivers to no longer use the load/unload callbacks, directly coding the
   load/unload sequence into the driver's probe function.
 
-- Once all non-DRIVER_LEGACY drivers are converted, disallow the load/unload
-  callbacks for all modern drivers.
+- Once all drivers are converted, remove the load/unload callbacks.
 
 Contact: Daniel Vetter
 
-- 
GitLab


From 972c45e892448f698047f312763eb984c0b8d7c3 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:32 +0100
Subject: [PATCH 0816/2340] drm: Include <drm/drm_auth.h>

One of the source files includes <drm/drm_auth.h> via <drm/drm_legacy.h>,
which will be removed. Include drm_auth.h directly.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-4-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 39d35fc3a43bc..67907dd427214 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -29,6 +29,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 
+#include <drm/drm_auth.h>
 #include <drm/drm.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_print.h>
-- 
GitLab


From 786b96d01919f8876187d75a6a995ac5783ed0f5 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:33 +0100
Subject: [PATCH 0817/2340] drm/i915: Include <drm/drm_auth.h>

One of the source files includes <drm/drm_auth.h> via <drm/drm_legacy.h>,
which will be removed. Include drm_auth.h directly.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-5-tzimmermann@suse.de
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 683fd8d3151c3..ccc077b74d2de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -9,6 +9,7 @@
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
+#include <drm/drm_auth.h>
 #include <drm/drm_syncobj.h>
 
 #include "display/intel_frontbuffer.h"
-- 
GitLab


From 64c39a93ef6c45447bc093e34e02afeb4c063579 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:34 +0100
Subject: [PATCH 0818/2340] accel: Include <drm/drm_auth.h>

One of the source files includes <drm/drm_auth.h> via <drm/drm_legacy.h>,
which will be removed. Include drm_auth.h directly.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-6-tzimmermann@suse.de
---
 drivers/accel/drm_accel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 294b572a9c331..24cac4c0274bb 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -11,6 +11,7 @@
 #include <linux/idr.h>
 
 #include <drm/drm_accel.h>
+#include <drm/drm_auth.h>
 #include <drm/drm_debugfs.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
-- 
GitLab


From 9f4db4495b6fa551f18a892f32c71899a20f4923 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:35 +0100
Subject: [PATCH 0819/2340] drm: Include <drm/drm_device.h>

Include <drm/drm_device.h> in drm_ioc32.c. Resolves a depenency
on <drm/drm_legacy.h>, which will be removed.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-7-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_ioc32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 025dc558c94ee..c0163cb076ad8 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -31,6 +31,7 @@
 #include <linux/ratelimit.h>
 #include <linux/export.h>
 
+#include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/drm_print.h>
 
-- 
GitLab


From c45a1e0a2e9d3f6b37d27e636ba905678c84a41a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:36 +0100
Subject: [PATCH 0820/2340] drm/radeon: Do not include <drm/drm_legacy.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Including <drm/drm_legacy.h> is not required by radeon.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-8-tzimmermann@suse.de
---
 drivers/gpu/drm/radeon/radeon_drv.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 34a1c73d3938f..02a65971d140d 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -33,7 +33,6 @@
 
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
-#include <drm/drm_legacy.h>
 
 #include "radeon_family.h"
 
-- 
GitLab


From 184dcdc251420929bf195f99f0b9fb6960788b6d Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:37 +0100
Subject: [PATCH 0821/2340] drm: Remove entry points for legacy ioctls

DRM drivers with user-space mode setting have been removed in Linux
v6.3. [1] Now remove the ioctl entry points for these drivers. Invoking
any of the ioctl ops will unconditionally return -EINVAL to user space.
This has already been the behavior for kernels without CONFIG_DRM_LEGACY
set.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/series/111602/ # [1]
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-9-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_ioc32.c | 610 ------------------------------------
 drivers/gpu/drm/drm_ioctl.c |  57 ----
 2 files changed, 667 deletions(-)

diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index c0163cb076ad8..910cadf14756e 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -37,7 +37,6 @@
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
-#include "drm_legacy.h"
 
 #define DRM_IOCTL_VERSION32		DRM_IOWR(0x00, drm_version32_t)
 #define DRM_IOCTL_GET_UNIQUE32		DRM_IOWR(0x01, drm_unique32_t)
@@ -164,92 +163,6 @@ static int compat_drm_setunique(struct file *file, unsigned int cmd,
 	return -EINVAL;
 }
 
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-typedef struct drm_map32 {
-	u32 offset;		/* Requested physical address (0 for SAREA) */
-	u32 size;		/* Requested physical size (bytes) */
-	enum drm_map_type type;	/* Type of memory to map */
-	enum drm_map_flags flags;	/* Flags */
-	u32 handle;		/* User-space: "Handle" to pass to mmap() */
-	int mtrr;		/* MTRR slot used */
-} drm_map32_t;
-
-static int compat_drm_getmap(struct file *file, unsigned int cmd,
-			     unsigned long arg)
-{
-	drm_map32_t __user *argp = (void __user *)arg;
-	drm_map32_t m32;
-	struct drm_map map;
-	int err;
-
-	if (copy_from_user(&m32, argp, sizeof(m32)))
-		return -EFAULT;
-
-	map.offset = m32.offset;
-	err = drm_ioctl_kernel(file, drm_legacy_getmap_ioctl, &map, 0);
-	if (err)
-		return err;
-
-	m32.offset = map.offset;
-	m32.size = map.size;
-	m32.type = map.type;
-	m32.flags = map.flags;
-	m32.handle = ptr_to_compat((void __user *)map.handle);
-	m32.mtrr = map.mtrr;
-	if (copy_to_user(argp, &m32, sizeof(m32)))
-		return -EFAULT;
-	return 0;
-
-}
-
-static int compat_drm_addmap(struct file *file, unsigned int cmd,
-			     unsigned long arg)
-{
-	drm_map32_t __user *argp = (void __user *)arg;
-	drm_map32_t m32;
-	struct drm_map map;
-	int err;
-
-	if (copy_from_user(&m32, argp, sizeof(m32)))
-		return -EFAULT;
-
-	map.offset = m32.offset;
-	map.size = m32.size;
-	map.type = m32.type;
-	map.flags = m32.flags;
-
-	err = drm_ioctl_kernel(file, drm_legacy_addmap_ioctl, &map,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-	if (err)
-		return err;
-
-	m32.offset = map.offset;
-	m32.mtrr = map.mtrr;
-	m32.handle = ptr_to_compat((void __user *)map.handle);
-	if (map.handle != compat_ptr(m32.handle))
-		pr_err_ratelimited("compat_drm_addmap truncated handle %p for type %d offset %x\n",
-				   map.handle, m32.type, m32.offset);
-
-	if (copy_to_user(argp, &m32, sizeof(m32)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int compat_drm_rmmap(struct file *file, unsigned int cmd,
-			    unsigned long arg)
-{
-	drm_map32_t __user *argp = (void __user *)arg;
-	struct drm_map map;
-	u32 handle;
-
-	if (get_user(handle, &argp->handle))
-		return -EFAULT;
-	map.handle = compat_ptr(handle);
-	return drm_ioctl_kernel(file, drm_legacy_rmmap_ioctl, &map, DRM_AUTH);
-}
-#endif
-
 typedef struct drm_client32 {
 	int idx;	/* Which client desired? */
 	int auth;	/* Is client authenticated? */
@@ -309,501 +222,6 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-typedef struct drm_buf_desc32 {
-	int count;		 /* Number of buffers of this size */
-	int size;		 /* Size in bytes */
-	int low_mark;		 /* Low water mark */
-	int high_mark;		 /* High water mark */
-	int flags;
-	u32 agp_start;		 /* Start address in the AGP aperture */
-} drm_buf_desc32_t;
-
-static int compat_drm_addbufs(struct file *file, unsigned int cmd,
-			      unsigned long arg)
-{
-	drm_buf_desc32_t __user *argp = (void __user *)arg;
-	drm_buf_desc32_t desc32;
-	struct drm_buf_desc desc;
-	int err;
-
-	if (copy_from_user(&desc32, argp, sizeof(drm_buf_desc32_t)))
-		return -EFAULT;
-
-	desc = (struct drm_buf_desc){
-		desc32.count, desc32.size, desc32.low_mark, desc32.high_mark,
-		desc32.flags, desc32.agp_start
-	};
-
-	err = drm_ioctl_kernel(file, drm_legacy_addbufs, &desc,
-				   DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-	if (err)
-		return err;
-
-	desc32 = (drm_buf_desc32_t){
-		desc.count, desc.size, desc.low_mark, desc.high_mark,
-		desc.flags, desc.agp_start
-	};
-	if (copy_to_user(argp, &desc32, sizeof(drm_buf_desc32_t)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int compat_drm_markbufs(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_buf_desc32_t b32;
-	drm_buf_desc32_t __user *argp = (void __user *)arg;
-	struct drm_buf_desc buf;
-
-	if (copy_from_user(&b32, argp, sizeof(b32)))
-		return -EFAULT;
-
-	buf.size = b32.size;
-	buf.low_mark = b32.low_mark;
-	buf.high_mark = b32.high_mark;
-
-	return drm_ioctl_kernel(file, drm_legacy_markbufs, &buf,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-
-typedef struct drm_buf_info32 {
-	int count;		/**< Entries in list */
-	u32 list;
-} drm_buf_info32_t;
-
-static int copy_one_buf32(void *data, int count, struct drm_buf_entry *from)
-{
-	drm_buf_info32_t *request = data;
-	drm_buf_desc32_t __user *to = compat_ptr(request->list);
-	drm_buf_desc32_t v = {.count = from->buf_count,
-			      .size = from->buf_size,
-			      .low_mark = from->low_mark,
-			      .high_mark = from->high_mark};
-
-	if (copy_to_user(to + count, &v, offsetof(drm_buf_desc32_t, flags)))
-		return -EFAULT;
-	return 0;
-}
-
-static int drm_legacy_infobufs32(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	drm_buf_info32_t *request = data;
-
-	return __drm_legacy_infobufs(dev, data, &request->count, copy_one_buf32);
-}
-
-static int compat_drm_infobufs(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_buf_info32_t req32;
-	drm_buf_info32_t __user *argp = (void __user *)arg;
-	int err;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-
-	if (req32.count < 0)
-		req32.count = 0;
-
-	err = drm_ioctl_kernel(file, drm_legacy_infobufs32, &req32, DRM_AUTH);
-	if (err)
-		return err;
-
-	if (put_user(req32.count, &argp->count))
-		return -EFAULT;
-
-	return 0;
-}
-
-typedef struct drm_buf_pub32 {
-	int idx;		/**< Index into the master buffer list */
-	int total;		/**< Buffer size */
-	int used;		/**< Amount of buffer in use (for DMA) */
-	u32 address;		/**< Address of buffer */
-} drm_buf_pub32_t;
-
-typedef struct drm_buf_map32 {
-	int count;		/**< Length of the buffer list */
-	u32 virtual;		/**< Mmap'd area in user-virtual */
-	u32 list;		/**< Buffer information */
-} drm_buf_map32_t;
-
-static int map_one_buf32(void *data, int idx, unsigned long virtual,
-			struct drm_buf *buf)
-{
-	drm_buf_map32_t *request = data;
-	drm_buf_pub32_t __user *to = compat_ptr(request->list) + idx;
-	drm_buf_pub32_t v;
-
-	v.idx = buf->idx;
-	v.total = buf->total;
-	v.used = 0;
-	v.address = virtual + buf->offset;
-	if (copy_to_user(to, &v, sizeof(v)))
-		return -EFAULT;
-	return 0;
-}
-
-static int drm_legacy_mapbufs32(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv)
-{
-	drm_buf_map32_t *request = data;
-	void __user *v;
-	int err = __drm_legacy_mapbufs(dev, data, &request->count,
-				    &v, map_one_buf32,
-				    file_priv);
-	request->virtual = ptr_to_compat(v);
-	return err;
-}
-
-static int compat_drm_mapbufs(struct file *file, unsigned int cmd,
-			      unsigned long arg)
-{
-	drm_buf_map32_t __user *argp = (void __user *)arg;
-	drm_buf_map32_t req32;
-	int err;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-	if (req32.count < 0)
-		return -EINVAL;
-
-	err = drm_ioctl_kernel(file, drm_legacy_mapbufs32, &req32, DRM_AUTH);
-	if (err)
-		return err;
-
-	if (put_user(req32.count, &argp->count)
-	    || put_user(req32.virtual, &argp->virtual))
-		return -EFAULT;
-
-	return 0;
-}
-
-typedef struct drm_buf_free32 {
-	int count;
-	u32 list;
-} drm_buf_free32_t;
-
-static int compat_drm_freebufs(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_buf_free32_t req32;
-	struct drm_buf_free request;
-	drm_buf_free32_t __user *argp = (void __user *)arg;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-
-	request.count = req32.count;
-	request.list = compat_ptr(req32.list);
-	return drm_ioctl_kernel(file, drm_legacy_freebufs, &request, DRM_AUTH);
-}
-
-typedef struct drm_ctx_priv_map32 {
-	unsigned int ctx_id;	 /**< Context requesting private mapping */
-	u32 handle;		/**< Handle of map */
-} drm_ctx_priv_map32_t;
-
-static int compat_drm_setsareactx(struct file *file, unsigned int cmd,
-				  unsigned long arg)
-{
-	drm_ctx_priv_map32_t req32;
-	struct drm_ctx_priv_map request;
-	drm_ctx_priv_map32_t __user *argp = (void __user *)arg;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-
-	request.ctx_id = req32.ctx_id;
-	request.handle = compat_ptr(req32.handle);
-	return drm_ioctl_kernel(file, drm_legacy_setsareactx, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-
-static int compat_drm_getsareactx(struct file *file, unsigned int cmd,
-				  unsigned long arg)
-{
-	struct drm_ctx_priv_map req;
-	drm_ctx_priv_map32_t req32;
-	drm_ctx_priv_map32_t __user *argp = (void __user *)arg;
-	int err;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-
-	req.ctx_id = req32.ctx_id;
-	err = drm_ioctl_kernel(file, drm_legacy_getsareactx, &req, DRM_AUTH);
-	if (err)
-		return err;
-
-	req32.handle = ptr_to_compat((void __user *)req.handle);
-	if (copy_to_user(argp, &req32, sizeof(req32)))
-		return -EFAULT;
-
-	return 0;
-}
-
-typedef struct drm_ctx_res32 {
-	int count;
-	u32 contexts;
-} drm_ctx_res32_t;
-
-static int compat_drm_resctx(struct file *file, unsigned int cmd,
-			     unsigned long arg)
-{
-	drm_ctx_res32_t __user *argp = (void __user *)arg;
-	drm_ctx_res32_t res32;
-	struct drm_ctx_res res;
-	int err;
-
-	if (copy_from_user(&res32, argp, sizeof(res32)))
-		return -EFAULT;
-
-	res.count = res32.count;
-	res.contexts = compat_ptr(res32.contexts);
-	err = drm_ioctl_kernel(file, drm_legacy_resctx, &res, DRM_AUTH);
-	if (err)
-		return err;
-
-	res32.count = res.count;
-	if (copy_to_user(argp, &res32, sizeof(res32)))
-		return -EFAULT;
-
-	return 0;
-}
-
-typedef struct drm_dma32 {
-	int context;		  /**< Context handle */
-	int send_count;		  /**< Number of buffers to send */
-	u32 send_indices;	  /**< List of handles to buffers */
-	u32 send_sizes;		  /**< Lengths of data to send */
-	enum drm_dma_flags flags;		  /**< Flags */
-	int request_count;	  /**< Number of buffers requested */
-	int request_size;	  /**< Desired size for buffers */
-	u32 request_indices;	  /**< Buffer information */
-	u32 request_sizes;
-	int granted_count;	  /**< Number of buffers granted */
-} drm_dma32_t;
-
-static int compat_drm_dma(struct file *file, unsigned int cmd,
-			  unsigned long arg)
-{
-	drm_dma32_t d32;
-	drm_dma32_t __user *argp = (void __user *)arg;
-	struct drm_dma d;
-	int err;
-
-	if (copy_from_user(&d32, argp, sizeof(d32)))
-		return -EFAULT;
-
-	d.context = d32.context;
-	d.send_count = d32.send_count;
-	d.send_indices = compat_ptr(d32.send_indices);
-	d.send_sizes = compat_ptr(d32.send_sizes);
-	d.flags = d32.flags;
-	d.request_count = d32.request_count;
-	d.request_indices = compat_ptr(d32.request_indices);
-	d.request_sizes = compat_ptr(d32.request_sizes);
-	err = drm_ioctl_kernel(file, drm_legacy_dma_ioctl, &d, DRM_AUTH);
-	if (err)
-		return err;
-
-	if (put_user(d.request_size, &argp->request_size)
-	    || put_user(d.granted_count, &argp->granted_count))
-		return -EFAULT;
-
-	return 0;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-#if IS_ENABLED(CONFIG_AGP)
-typedef struct drm_agp_mode32 {
-	u32 mode;	/**< AGP mode */
-} drm_agp_mode32_t;
-
-static int compat_drm_agp_enable(struct file *file, unsigned int cmd,
-				 unsigned long arg)
-{
-	drm_agp_mode32_t __user *argp = (void __user *)arg;
-	struct drm_agp_mode mode;
-
-	if (get_user(mode.mode, &argp->mode))
-		return -EFAULT;
-
-	return drm_ioctl_kernel(file,  drm_legacy_agp_enable_ioctl, &mode,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-
-typedef struct drm_agp_info32 {
-	int agp_version_major;
-	int agp_version_minor;
-	u32 mode;
-	u32 aperture_base;	/* physical address */
-	u32 aperture_size;	/* bytes */
-	u32 memory_allowed;	/* bytes */
-	u32 memory_used;
-
-	/* PCI information */
-	unsigned short id_vendor;
-	unsigned short id_device;
-} drm_agp_info32_t;
-
-static int compat_drm_agp_info(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_agp_info32_t __user *argp = (void __user *)arg;
-	drm_agp_info32_t i32;
-	struct drm_agp_info info;
-	int err;
-
-	err = drm_ioctl_kernel(file, drm_legacy_agp_info_ioctl, &info, DRM_AUTH);
-	if (err)
-		return err;
-
-	i32.agp_version_major = info.agp_version_major;
-	i32.agp_version_minor = info.agp_version_minor;
-	i32.mode = info.mode;
-	i32.aperture_base = info.aperture_base;
-	i32.aperture_size = info.aperture_size;
-	i32.memory_allowed = info.memory_allowed;
-	i32.memory_used = info.memory_used;
-	i32.id_vendor = info.id_vendor;
-	i32.id_device = info.id_device;
-	if (copy_to_user(argp, &i32, sizeof(i32)))
-		return -EFAULT;
-
-	return 0;
-}
-
-typedef struct drm_agp_buffer32 {
-	u32 size;	/**< In bytes -- will round to page boundary */
-	u32 handle;	/**< Used for binding / unbinding */
-	u32 type;	/**< Type of memory to allocate */
-	u32 physical;	/**< Physical used by i810 */
-} drm_agp_buffer32_t;
-
-static int compat_drm_agp_alloc(struct file *file, unsigned int cmd,
-				unsigned long arg)
-{
-	drm_agp_buffer32_t __user *argp = (void __user *)arg;
-	drm_agp_buffer32_t req32;
-	struct drm_agp_buffer request;
-	int err;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-
-	request.size = req32.size;
-	request.type = req32.type;
-	err = drm_ioctl_kernel(file, drm_legacy_agp_alloc_ioctl, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-	if (err)
-		return err;
-
-	req32.handle = request.handle;
-	req32.physical = request.physical;
-	if (copy_to_user(argp, &req32, sizeof(req32))) {
-		drm_ioctl_kernel(file, drm_legacy_agp_free_ioctl, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-		return -EFAULT;
-	}
-
-	return 0;
-}
-
-static int compat_drm_agp_free(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_agp_buffer32_t __user *argp = (void __user *)arg;
-	struct drm_agp_buffer request;
-
-	if (get_user(request.handle, &argp->handle))
-		return -EFAULT;
-
-	return drm_ioctl_kernel(file, drm_legacy_agp_free_ioctl, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-
-typedef struct drm_agp_binding32 {
-	u32 handle;	/**< From drm_agp_buffer */
-	u32 offset;	/**< In bytes -- will round to page boundary */
-} drm_agp_binding32_t;
-
-static int compat_drm_agp_bind(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_agp_binding32_t __user *argp = (void __user *)arg;
-	drm_agp_binding32_t req32;
-	struct drm_agp_binding request;
-
-	if (copy_from_user(&req32, argp, sizeof(req32)))
-		return -EFAULT;
-
-	request.handle = req32.handle;
-	request.offset = req32.offset;
-	return drm_ioctl_kernel(file, drm_legacy_agp_bind_ioctl, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-
-static int compat_drm_agp_unbind(struct file *file, unsigned int cmd,
-				 unsigned long arg)
-{
-	drm_agp_binding32_t __user *argp = (void __user *)arg;
-	struct drm_agp_binding request;
-
-	if (get_user(request.handle, &argp->handle))
-		return -EFAULT;
-
-	return drm_ioctl_kernel(file, drm_legacy_agp_unbind_ioctl, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-#endif /* CONFIG_AGP */
-
-typedef struct drm_scatter_gather32 {
-	u32 size;	/**< In bytes -- will round to page boundary */
-	u32 handle;	/**< Used for mapping / unmapping */
-} drm_scatter_gather32_t;
-
-static int compat_drm_sg_alloc(struct file *file, unsigned int cmd,
-			       unsigned long arg)
-{
-	drm_scatter_gather32_t __user *argp = (void __user *)arg;
-	struct drm_scatter_gather request;
-	int err;
-
-	if (get_user(request.size, &argp->size))
-		return -EFAULT;
-
-	err = drm_ioctl_kernel(file, drm_legacy_sg_alloc, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-	if (err)
-		return err;
-
-	/* XXX not sure about the handle conversion here... */
-	if (put_user(request.handle >> PAGE_SHIFT, &argp->handle))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int compat_drm_sg_free(struct file *file, unsigned int cmd,
-			      unsigned long arg)
-{
-	drm_scatter_gather32_t __user *argp = (void __user *)arg;
-	struct drm_scatter_gather request;
-	unsigned long x;
-
-	if (get_user(x, &argp->handle))
-		return -EFAULT;
-	request.handle = x << PAGE_SHIFT;
-	return drm_ioctl_kernel(file, drm_legacy_sg_free, &request,
-				DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY);
-}
-#endif
 #if defined(CONFIG_X86)
 typedef struct drm_update_draw32 {
 	drm_drawable_t handle;
@@ -915,37 +333,9 @@ static struct {
 #define DRM_IOCTL32_DEF(n, f) [DRM_IOCTL_NR(n##32)] = {.fn = f, .name = #n}
 	DRM_IOCTL32_DEF(DRM_IOCTL_VERSION, compat_drm_version),
 	DRM_IOCTL32_DEF(DRM_IOCTL_GET_UNIQUE, compat_drm_getunique),
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	DRM_IOCTL32_DEF(DRM_IOCTL_GET_MAP, compat_drm_getmap),
-#endif
 	DRM_IOCTL32_DEF(DRM_IOCTL_GET_CLIENT, compat_drm_getclient),
 	DRM_IOCTL32_DEF(DRM_IOCTL_GET_STATS, compat_drm_getstats),
 	DRM_IOCTL32_DEF(DRM_IOCTL_SET_UNIQUE, compat_drm_setunique),
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	DRM_IOCTL32_DEF(DRM_IOCTL_ADD_MAP, compat_drm_addmap),
-	DRM_IOCTL32_DEF(DRM_IOCTL_ADD_BUFS, compat_drm_addbufs),
-	DRM_IOCTL32_DEF(DRM_IOCTL_MARK_BUFS, compat_drm_markbufs),
-	DRM_IOCTL32_DEF(DRM_IOCTL_INFO_BUFS, compat_drm_infobufs),
-	DRM_IOCTL32_DEF(DRM_IOCTL_MAP_BUFS, compat_drm_mapbufs),
-	DRM_IOCTL32_DEF(DRM_IOCTL_FREE_BUFS, compat_drm_freebufs),
-	DRM_IOCTL32_DEF(DRM_IOCTL_RM_MAP, compat_drm_rmmap),
-	DRM_IOCTL32_DEF(DRM_IOCTL_SET_SAREA_CTX, compat_drm_setsareactx),
-	DRM_IOCTL32_DEF(DRM_IOCTL_GET_SAREA_CTX, compat_drm_getsareactx),
-	DRM_IOCTL32_DEF(DRM_IOCTL_RES_CTX, compat_drm_resctx),
-	DRM_IOCTL32_DEF(DRM_IOCTL_DMA, compat_drm_dma),
-#if IS_ENABLED(CONFIG_AGP)
-	DRM_IOCTL32_DEF(DRM_IOCTL_AGP_ENABLE, compat_drm_agp_enable),
-	DRM_IOCTL32_DEF(DRM_IOCTL_AGP_INFO, compat_drm_agp_info),
-	DRM_IOCTL32_DEF(DRM_IOCTL_AGP_ALLOC, compat_drm_agp_alloc),
-	DRM_IOCTL32_DEF(DRM_IOCTL_AGP_FREE, compat_drm_agp_free),
-	DRM_IOCTL32_DEF(DRM_IOCTL_AGP_BIND, compat_drm_agp_bind),
-	DRM_IOCTL32_DEF(DRM_IOCTL_AGP_UNBIND, compat_drm_agp_unbind),
-#endif
-#endif
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	DRM_IOCTL32_DEF(DRM_IOCTL_SG_ALLOC, compat_drm_sg_alloc),
-	DRM_IOCTL32_DEF(DRM_IOCTL_SG_FREE, compat_drm_sg_free),
-#endif
 #if defined(CONFIG_X86)
 	DRM_IOCTL32_DEF(DRM_IOCTL_UPDATE_DRAW, compat_drm_update_draw),
 #endif
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ea303386e6886..93f6bf15c0ae4 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -42,7 +42,6 @@
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
-#include "drm_legacy.h"
 
 /**
  * DOC: getunique and setversion story
@@ -572,21 +571,11 @@ static int drm_ioctl_permit(u32 flags, struct drm_file *file_priv)
 		.name = #ioctl			\
 	}
 
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-#define DRM_LEGACY_IOCTL_DEF(ioctl, _func, _flags)  DRM_IOCTL_DEF(ioctl, _func, _flags)
-#else
-#define DRM_LEGACY_IOCTL_DEF(ioctl, _func, _flags) DRM_IOCTL_DEF(ioctl, drm_invalid_op, _flags)
-#endif
-
 /* Ioctl table */
 static const struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_legacy_irq_by_busid,
-			     DRM_MASTER|DRM_ROOT_ONLY),
-
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_legacy_getmap_ioctl, 0),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, 0),
@@ -599,60 +588,14 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_MASTER),
 
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_MAP, drm_legacy_addmap_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RM_MAP, drm_legacy_rmmap_ioctl, DRM_AUTH),
-
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH),
-
 	DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, 0),
 
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_CTX, drm_legacy_getctx, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SWITCH_CTX, drm_legacy_switchctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_legacy_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_legacy_resctx, DRM_AUTH),
-
 	DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_LOCK, drm_legacy_lock, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_legacy_unlock, DRM_AUTH),
-
 	DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH),
 
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_BUFS, drm_legacy_addbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_MARK_BUFS, drm_legacy_markbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_INFO_BUFS, drm_legacy_infobufs, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_MAP_BUFS, drm_legacy_mapbufs, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_FREE_BUFS, drm_legacy_freebufs, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_DMA, drm_legacy_dma_ioctl, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_CONTROL, drm_legacy_irq_control, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-
-#if IS_ENABLED(CONFIG_AGP)
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_ACQUIRE, drm_legacy_agp_acquire_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_RELEASE, drm_legacy_agp_release_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_ENABLE, drm_legacy_agp_enable_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_INFO, drm_legacy_agp_info_ioctl, DRM_AUTH),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_ALLOC, drm_legacy_agp_alloc_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_FREE, drm_legacy_agp_free_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_BIND, drm_legacy_agp_bind_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_AGP_UNBIND, drm_legacy_agp_unbind_ioctl,
-			     DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-#endif
-
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SG_ALLOC, drm_legacy_sg_alloc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SG_FREE, drm_legacy_sg_free, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-
 	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, DRM_UNLOCKED),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_legacy_modeset_ctl_ioctl, 0),
-- 
GitLab


From 6bb0814be42e109555dd63e59e6eabf968b9b016 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:38 +0100
Subject: [PATCH 0822/2340] drm: Remove the legacy DRM_IOCTL_MODESET_CTL ioctl

DRM drivers with user-space mode setting have been removed in Linux
v6.3. [1] Now remove the ioctl entry points for these drivers. Invoking
any of the ioctl ops will unconditionally return -EINVAL to user space.

Invoking DRM_IOCTL_MODESET_CTL is different from the other legacy
ioctl ops as it returns 0 even without CONFIG_DRM_LEGACY set. From the
original commit 29935554b384 ("drm: Disallow DRM_IOCTL_MODESET_CTL for
KMS drivers") it is not apparent how or why the operation differs from
the others. It is likely just an oversight in commit 61ae227032e7
("drm: allow removal of legacy codepaths (v4.1)"), which allowed
disabling leagacy ioctls in the first place. Still keep this removal
separate from the other ioctls to allow an easy revert, if necessary.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/series/111602/ # [1]
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-10-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_internal.h |  2 -
 drivers/gpu/drm/drm_ioctl.c    |  2 -
 drivers/gpu/drm/drm_vblank.c   | 82 ----------------------------------
 3 files changed, 86 deletions(-)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index b7a311efa2b18..fa08a313127e2 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -117,8 +117,6 @@ void drm_handle_vblank_works(struct drm_vblank_crtc *vblank);
 /* IOCTLS */
 int drm_wait_vblank_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
-int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv);
 
 /* drm_irq.c */
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 93f6bf15c0ae4..ebba34ba734ea 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -598,8 +598,6 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 
 	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, DRM_UNLOCKED),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_legacy_modeset_ctl_ioctl, 0),
-
 	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index 877e2067534fa..a11f164b2384f 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -1574,88 +1574,6 @@ void drm_crtc_vblank_restore(struct drm_crtc *crtc)
 }
 EXPORT_SYMBOL(drm_crtc_vblank_restore);
 
-static void drm_legacy_vblank_pre_modeset(struct drm_device *dev,
-					  unsigned int pipe)
-{
-	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
-
-	/* vblank is not initialized (IRQ not installed ?), or has been freed */
-	if (!drm_dev_has_vblank(dev))
-		return;
-
-	if (drm_WARN_ON(dev, pipe >= dev->num_crtcs))
-		return;
-
-	/*
-	 * To avoid all the problems that might happen if interrupts
-	 * were enabled/disabled around or between these calls, we just
-	 * have the kernel take a reference on the CRTC (just once though
-	 * to avoid corrupting the count if multiple, mismatch calls occur),
-	 * so that interrupts remain enabled in the interim.
-	 */
-	if (!vblank->inmodeset) {
-		vblank->inmodeset = 0x1;
-		if (drm_vblank_get(dev, pipe) == 0)
-			vblank->inmodeset |= 0x2;
-	}
-}
-
-static void drm_legacy_vblank_post_modeset(struct drm_device *dev,
-					   unsigned int pipe)
-{
-	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
-
-	/* vblank is not initialized (IRQ not installed ?), or has been freed */
-	if (!drm_dev_has_vblank(dev))
-		return;
-
-	if (drm_WARN_ON(dev, pipe >= dev->num_crtcs))
-		return;
-
-	if (vblank->inmodeset) {
-		spin_lock_irq(&dev->vbl_lock);
-		drm_reset_vblank_timestamp(dev, pipe);
-		spin_unlock_irq(&dev->vbl_lock);
-
-		if (vblank->inmodeset & 0x2)
-			drm_vblank_put(dev, pipe);
-
-		vblank->inmodeset = 0;
-	}
-}
-
-int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv)
-{
-	struct drm_modeset_ctl *modeset = data;
-	unsigned int pipe;
-
-	/* If drm_vblank_init() hasn't been called yet, just no-op */
-	if (!drm_dev_has_vblank(dev))
-		return 0;
-
-	/* KMS drivers handle this internally */
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return 0;
-
-	pipe = modeset->crtc;
-	if (pipe >= dev->num_crtcs)
-		return -EINVAL;
-
-	switch (modeset->cmd) {
-	case _DRM_PRE_MODESET:
-		drm_legacy_vblank_pre_modeset(dev, pipe);
-		break;
-	case _DRM_POST_MODESET:
-		drm_legacy_vblank_post_modeset(dev, pipe);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe,
 				  u64 req_seq,
 				  union drm_wait_vblank *vblwait,
-- 
GitLab


From 2722ac1ce1c1f3e6a3a0c59f0072b2f9ba136551 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:39 +0100
Subject: [PATCH 0823/2340] drm: Remove support for legacy drivers

Remove all hooks and calls into code for user-space mode setting from
the DRM core. Without the drivers and ioctl entry points, none of this
is required any longer.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-11-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_auth.c   |  8 +----
 drivers/gpu/drm/drm_drv.c    | 17 ----------
 drivers/gpu/drm/drm_file.c   | 64 ++----------------------------------
 drivers/gpu/drm/drm_vblank.c | 19 -----------
 4 files changed, 3 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 2ed2585ded378..252c105d614ff 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -37,13 +37,12 @@
 #include <drm/drm_print.h>
 
 #include "drm_internal.h"
-#include "drm_legacy.h"
 
 /**
  * DOC: master and authentication
  *
  * &struct drm_master is used to track groups of clients with open
- * primary/legacy device nodes. For every &struct drm_file which has had at
+ * primary device nodes. For every &struct drm_file which has had at
  * least once successfully became the device master (either through the
  * SET_MASTER IOCTL, or implicitly through opening the primary device node when
  * no one else is the current master that time) there exists one &drm_master.
@@ -139,7 +138,6 @@ struct drm_master *drm_master_create(struct drm_device *dev)
 		return NULL;
 
 	kref_init(&master->refcount);
-	drm_master_legacy_init(master);
 	idr_init_base(&master->magic_map, 1);
 	master->dev = dev;
 
@@ -365,8 +363,6 @@ void drm_master_release(struct drm_file *file_priv)
 	if (!drm_is_current_master_locked(file_priv))
 		goto out;
 
-	drm_legacy_lock_master_cleanup(dev, master);
-
 	if (dev->master == file_priv->master)
 		drm_drop_master(dev, file_priv);
 out:
@@ -429,8 +425,6 @@ static void drm_master_destroy(struct kref *kref)
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_lease_destroy(master);
 
-	drm_legacy_master_rmmaps(dev, master);
-
 	idr_destroy(&master->magic_map);
 	idr_destroy(&master->leases);
 	idr_destroy(&master->lessee_idr);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3c835c99daadd..243cacb3575c0 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -48,7 +48,6 @@
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
-#include "drm_legacy.h"
 
 MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl");
 MODULE_DESCRIPTION("DRM shared core routines");
@@ -585,8 +584,6 @@ static void drm_fs_inode_free(struct inode *inode)
 
 static void drm_dev_init_release(struct drm_device *dev, void *res)
 {
-	drm_legacy_ctxbitmap_cleanup(dev);
-	drm_legacy_remove_map_hash(dev);
 	drm_fs_inode_free(dev->anon_inode);
 
 	put_device(dev->dev);
@@ -597,7 +594,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
 	mutex_destroy(&dev->clientlist_mutex);
 	mutex_destroy(&dev->filelist_mutex);
 	mutex_destroy(&dev->struct_mutex);
-	drm_legacy_destroy_members(dev);
 }
 
 static int drm_dev_init(struct drm_device *dev,
@@ -632,7 +628,6 @@ static int drm_dev_init(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	drm_legacy_init_members(dev);
 	INIT_LIST_HEAD(&dev->filelist);
 	INIT_LIST_HEAD(&dev->filelist_internal);
 	INIT_LIST_HEAD(&dev->clientlist);
@@ -673,12 +668,6 @@ static int drm_dev_init(struct drm_device *dev,
 			goto err;
 	}
 
-	ret = drm_legacy_create_map_hash(dev);
-	if (ret)
-		goto err;
-
-	drm_legacy_ctxbitmap_init(dev);
-
 	if (drm_core_check_feature(dev, DRIVER_GEM)) {
 		ret = drm_gem_init(dev);
 		if (ret) {
@@ -996,9 +985,6 @@ EXPORT_SYMBOL(drm_dev_register);
  */
 void drm_dev_unregister(struct drm_device *dev)
 {
-	if (drm_core_check_feature(dev, DRIVER_LEGACY))
-		drm_lastclose(dev);
-
 	dev->registered = false;
 
 	drm_client_dev_unregister(dev);
@@ -1009,9 +995,6 @@ void drm_dev_unregister(struct drm_device *dev)
 	if (dev->driver->unload)
 		dev->driver->unload(dev);
 
-	drm_legacy_pci_agp_destroy(dev);
-	drm_legacy_rmmaps(dev);
-
 	remove_compat_control_link(dev);
 	drm_minor_unregister(dev, DRM_MINOR_ACCEL);
 	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 5ddaffd325865..987c1a5d1773b 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -47,21 +47,12 @@
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
-#include "drm_legacy.h"
 
 /* from BKL pushdown */
 DEFINE_MUTEX(drm_global_mutex);
 
 bool drm_dev_needs_global_mutex(struct drm_device *dev)
 {
-	/*
-	 * Legacy drivers rely on all kinds of BKL locking semantics, don't
-	 * bother. They also still need BKL locking for their ioctls, so better
-	 * safe than sorry.
-	 */
-	if (drm_core_check_feature(dev, DRIVER_LEGACY))
-		return true;
-
 	/*
 	 * The deprecated ->load callback must be called after the driver is
 	 * already registered. This means such drivers rely on the BKL to make
@@ -107,9 +98,7 @@ bool drm_dev_needs_global_mutex(struct drm_device *dev)
  * drm_send_event() as the main starting points.
  *
  * The memory mapping implementation will vary depending on how the driver
- * manages memory. Legacy drivers will use the deprecated drm_legacy_mmap()
- * function, modern drivers should use one of the provided memory-manager
- * specific implementations. For GEM-based drivers this is drm_gem_mmap().
+ * manages memory. For GEM-based drivers this is drm_gem_mmap().
  *
  * No other file operations are supported by the DRM userspace API. Overall the
  * following is an example &file_operations structure::
@@ -254,18 +243,6 @@ void drm_file_free(struct drm_file *file)
 		     (long)old_encode_dev(file->minor->kdev->devt),
 		     atomic_read(&dev->open_count));
 
-#ifdef CONFIG_DRM_LEGACY
-	if (drm_core_check_feature(dev, DRIVER_LEGACY) &&
-	    dev->driver->preclose)
-		dev->driver->preclose(dev, file);
-#endif
-
-	if (drm_core_check_feature(dev, DRIVER_LEGACY))
-		drm_legacy_lock_release(dev, file->filp);
-
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		drm_legacy_reclaim_buffers(dev, file);
-
 	drm_events_release(file);
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
@@ -279,8 +256,6 @@ void drm_file_free(struct drm_file *file)
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		drm_gem_release(dev, file);
 
-	drm_legacy_ctxbitmap_flush(dev, file);
-
 	if (drm_is_primary_client(file))
 		drm_master_release(file);
 
@@ -367,29 +342,6 @@ int drm_open_helper(struct file *filp, struct drm_minor *minor)
 	list_add(&priv->lhead, &dev->filelist);
 	mutex_unlock(&dev->filelist_mutex);
 
-#ifdef CONFIG_DRM_LEGACY
-#ifdef __alpha__
-	/*
-	 * Default the hose
-	 */
-	if (!dev->hose) {
-		struct pci_dev *pci_dev;
-
-		pci_dev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, NULL);
-		if (pci_dev) {
-			dev->hose = pci_dev->sysdata;
-			pci_dev_put(pci_dev);
-		}
-		if (!dev->hose) {
-			struct pci_bus *b = list_entry(pci_root_buses.next,
-				struct pci_bus, node);
-			if (b)
-				dev->hose = b->sysdata;
-		}
-	}
-#endif
-#endif
-
 	return 0;
 }
 
@@ -411,7 +363,6 @@ int drm_open(struct inode *inode, struct file *filp)
 	struct drm_device *dev;
 	struct drm_minor *minor;
 	int retcode;
-	int need_setup = 0;
 
 	minor = drm_minor_acquire(iminor(inode));
 	if (IS_ERR(minor))
@@ -421,8 +372,7 @@ int drm_open(struct inode *inode, struct file *filp)
 	if (drm_dev_needs_global_mutex(dev))
 		mutex_lock(&drm_global_mutex);
 
-	if (!atomic_fetch_inc(&dev->open_count))
-		need_setup = 1;
+	atomic_fetch_inc(&dev->open_count);
 
 	/* share address_space across all char-devs of a single device */
 	filp->f_mapping = dev->anon_inode->i_mapping;
@@ -430,13 +380,6 @@ int drm_open(struct inode *inode, struct file *filp)
 	retcode = drm_open_helper(filp, minor);
 	if (retcode)
 		goto err_undo;
-	if (need_setup) {
-		retcode = drm_legacy_setup(dev);
-		if (retcode) {
-			drm_close_helper(filp);
-			goto err_undo;
-		}
-	}
 
 	if (drm_dev_needs_global_mutex(dev))
 		mutex_unlock(&drm_global_mutex);
@@ -460,9 +403,6 @@ void drm_lastclose(struct drm_device * dev)
 		dev->driver->lastclose(dev);
 	drm_dbg_core(dev, "driver lastclose completed\n");
 
-	if (drm_core_check_feature(dev, DRIVER_LEGACY))
-		drm_legacy_dev_reinit(dev);
-
 	drm_client_dev_restore(dev);
 }
 
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index a11f164b2384f..702a12bc93bd9 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -210,11 +210,6 @@ static u32 __get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 		if (crtc->funcs->get_vblank_counter)
 			return crtc->funcs->get_vblank_counter(crtc);
 	}
-#ifdef CONFIG_DRM_LEGACY
-	else if (dev->driver->get_vblank_counter) {
-		return dev->driver->get_vblank_counter(dev, pipe);
-	}
-#endif
 
 	return drm_vblank_no_hw_counter(dev, pipe);
 }
@@ -433,11 +428,6 @@ static void __disable_vblank(struct drm_device *dev, unsigned int pipe)
 		if (crtc->funcs->disable_vblank)
 			crtc->funcs->disable_vblank(crtc);
 	}
-#ifdef CONFIG_DRM_LEGACY
-	else {
-		dev->driver->disable_vblank(dev, pipe);
-	}
-#endif
 }
 
 /*
@@ -1151,11 +1141,6 @@ static int __enable_vblank(struct drm_device *dev, unsigned int pipe)
 		if (crtc->funcs->enable_vblank)
 			return crtc->funcs->enable_vblank(crtc);
 	}
-#ifdef CONFIG_DRM_LEGACY
-	else if (dev->driver->enable_vblank) {
-		return dev->driver->enable_vblank(dev, pipe);
-	}
-#endif
 
 	return -EINVAL;
 }
@@ -1698,10 +1683,6 @@ static void drm_wait_vblank_reply(struct drm_device *dev, unsigned int pipe,
 
 static bool drm_wait_vblank_supported(struct drm_device *dev)
 {
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	if (unlikely(drm_core_check_feature(dev, DRIVER_LEGACY)))
-		return dev->irq_enabled;
-#endif
 	return drm_dev_has_vblank(dev);
 }
 
-- 
GitLab


From 2798ffcc1d6a788b5769b1fbcf0750dfc06ae98a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:40 +0100
Subject: [PATCH 0824/2340] drm: Remove locking for legacy ioctls and
 DRM_UNLOCKED

Modern DRM drivers acquire ioctl locks by themselves. Legacy ioctls
for user-space mode setting used to acquire drm_global_mutex. After
removing the ioctl entry points, also remove the locking code. The only
legacy ioctl without global locking was VBLANK_WAIT, which has been
removed as well. Hence remove the related DRM_UNLOCKED flag.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-12-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_ioc32.c |  2 +-
 drivers/gpu/drm/drm_ioctl.c | 23 +++++++----------------
 include/drm/drm_ioctl.h     | 11 -----------
 3 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 910cadf14756e..129e2b91dbfe7 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -273,7 +273,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
 	req.request.type = req32.request.type;
 	req.request.sequence = req32.request.sequence;
 	req.request.signal = req32.request.signal;
-	err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED);
+	err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, 0);
 
 	req32.reply.type = req.reply.type;
 	req32.reply.sequence = req.reply.sequence;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ebba34ba734ea..e368fc084c77d 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -596,7 +596,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 
 	DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, 0),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
@@ -729,7 +729,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata,
 {
 	struct drm_file *file_priv = file->private_data;
 	struct drm_device *dev = file_priv->minor->dev;
-	int retcode;
+	int ret;
 
 	/* Update drm_file owner if fd was passed along. */
 	drm_file_update_pid(file_priv);
@@ -737,20 +737,11 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata,
 	if (drm_dev_is_unplugged(dev))
 		return -ENODEV;
 
-	retcode = drm_ioctl_permit(flags, file_priv);
-	if (unlikely(retcode))
-		return retcode;
-
-	/* Enforce sane locking for modern driver ioctls. */
-	if (likely(!drm_core_check_feature(dev, DRIVER_LEGACY)) ||
-	    (flags & DRM_UNLOCKED))
-		retcode = func(dev, kdata, file_priv);
-	else {
-		mutex_lock(&drm_global_mutex);
-		retcode = func(dev, kdata, file_priv);
-		mutex_unlock(&drm_global_mutex);
-	}
-	return retcode;
+	ret = drm_ioctl_permit(flags, file_priv);
+	if (unlikely(ret))
+		return ret;
+
+	return func(dev, kdata, file_priv);
 }
 EXPORT_SYMBOL(drm_ioctl_kernel);
 
diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h
index 6ed61c371f6ce..171760b6c4a14 100644
--- a/include/drm/drm_ioctl.h
+++ b/include/drm/drm_ioctl.h
@@ -109,17 +109,6 @@ enum drm_ioctl_flags {
 	 * This is equivalent to callers with the SYSADMIN capability.
 	 */
 	DRM_ROOT_ONLY		= BIT(2),
-	/**
-	 * @DRM_UNLOCKED:
-	 *
-	 * Whether &drm_ioctl_desc.func should be called with the DRM BKL held
-	 * or not. Enforced as the default for all modern drivers, hence there
-	 * should never be a need to set this flag.
-	 *
-	 * Do not use anywhere else than for the VBLANK_WAIT IOCTL, which is the
-	 * only legacy IOCTL which needs this.
-	 */
-	DRM_UNLOCKED		= BIT(4),
 	/**
 	 * @DRM_RENDER_ALLOW:
 	 *
-- 
GitLab


From 2504c7ec728b7a2b6ca067e2a908fd1af2aad57c Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:41 +0100
Subject: [PATCH 0825/2340] drm: Remove source code for non-KMS drivers

Remove all remaining source code for non-KMS drivers. These drivers
have been removed in v6.3 and won't comeback.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-13-tzimmermann@suse.de
---
 drivers/gpu/drm/Makefile          |   12 -
 drivers/gpu/drm/drm_agpsupport.c  |  451 --------
 drivers/gpu/drm/drm_bufs.c        | 1627 -----------------------------
 drivers/gpu/drm/drm_context.c     |  513 ---------
 drivers/gpu/drm/drm_dma.c         |  178 ----
 drivers/gpu/drm/drm_hashtab.c     |  203 ----
 drivers/gpu/drm/drm_internal.h    |    5 -
 drivers/gpu/drm/drm_irq.c         |  204 ----
 drivers/gpu/drm/drm_legacy.h      |  290 -----
 drivers/gpu/drm/drm_legacy_misc.c |  105 --
 drivers/gpu/drm/drm_lock.c        |  373 -------
 drivers/gpu/drm/drm_memory.c      |  138 ---
 drivers/gpu/drm/drm_pci.c         |  203 ----
 drivers/gpu/drm/drm_scatter.c     |  220 ----
 drivers/gpu/drm/drm_vm.c          |  665 ------------
 include/drm/drm_auth.h            |   22 -
 include/drm/drm_device.h          |   71 +-
 include/drm/drm_drv.h             |   19 -
 include/drm/drm_file.h            |    5 -
 include/drm/drm_legacy.h          |  331 ------
 20 files changed, 2 insertions(+), 5633 deletions(-)
 delete mode 100644 drivers/gpu/drm/drm_agpsupport.c
 delete mode 100644 drivers/gpu/drm/drm_bufs.c
 delete mode 100644 drivers/gpu/drm/drm_context.c
 delete mode 100644 drivers/gpu/drm/drm_dma.c
 delete mode 100644 drivers/gpu/drm/drm_hashtab.c
 delete mode 100644 drivers/gpu/drm/drm_irq.c
 delete mode 100644 drivers/gpu/drm/drm_legacy.h
 delete mode 100644 drivers/gpu/drm/drm_legacy_misc.c
 delete mode 100644 drivers/gpu/drm/drm_lock.c
 delete mode 100644 drivers/gpu/drm/drm_memory.c
 delete mode 100644 drivers/gpu/drm/drm_scatter.c
 delete mode 100644 drivers/gpu/drm/drm_vm.c
 delete mode 100644 include/drm/drm_legacy.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index b4cb0835620af..8ac6f4b9546e6 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -47,18 +47,6 @@ drm-y := \
 	drm_vblank_work.o \
 	drm_vma_manager.o \
 	drm_writeback.o
-drm-$(CONFIG_DRM_LEGACY) += \
-	drm_agpsupport.o \
-	drm_bufs.o \
-	drm_context.o \
-	drm_dma.o \
-	drm_hashtab.o \
-	drm_irq.o \
-	drm_legacy_misc.o \
-	drm_lock.o \
-	drm_memory.o \
-	drm_scatter.o \
-	drm_vm.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_PANEL) += drm_panel.o
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
deleted file mode 100644
index a4ad6fd13abcf..0000000000000
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * \file drm_agpsupport.c
- * DRM support for AGP/GART backend
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-
-#if IS_ENABLED(CONFIG_AGP)
-#include <asm/agp.h>
-#endif
-
-#include <drm/drm_device.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-#include <drm/drm_print.h>
-
-#include "drm_legacy.h"
-
-#if IS_ENABLED(CONFIG_AGP)
-
-/*
- * Get AGP information.
- *
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device has been initialized and acquired and fills in the
- * drm_agp_info structure with the information in drm_agp_head::agp_info.
- */
-int drm_legacy_agp_info(struct drm_device *dev, struct drm_agp_info *info)
-{
-	struct agp_kern_info *kern;
-
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-
-	kern = &dev->agp->agp_info;
-	info->agp_version_major = kern->version.major;
-	info->agp_version_minor = kern->version.minor;
-	info->mode = kern->mode;
-	info->aperture_base = kern->aper_base;
-	info->aperture_size = kern->aper_size * 1024 * 1024;
-	info->memory_allowed = kern->max_memory << PAGE_SHIFT;
-	info->memory_used = kern->current_memory << PAGE_SHIFT;
-	info->id_vendor = kern->device->vendor;
-	info->id_device = kern->device->device;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_info);
-
-int drm_legacy_agp_info_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv)
-{
-	struct drm_agp_info *info = data;
-	int err;
-
-	err = drm_legacy_agp_info(dev, info);
-	if (err)
-		return err;
-
-	return 0;
-}
-
-/*
- * Acquire the AGP device.
- *
- * \param dev DRM device that is to acquire AGP.
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device hasn't been acquired before and calls
- * \c agp_backend_acquire.
- */
-int drm_legacy_agp_acquire(struct drm_device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
-
-	if (!dev->agp)
-		return -ENODEV;
-	if (dev->agp->acquired)
-		return -EBUSY;
-	dev->agp->bridge = agp_backend_acquire(pdev);
-	if (!dev->agp->bridge)
-		return -ENODEV;
-	dev->agp->acquired = 1;
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_acquire);
-
-/*
- * Acquire the AGP device (ioctl).
- *
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device hasn't been acquired before and calls
- * \c agp_backend_acquire.
- */
-int drm_legacy_agp_acquire_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv)
-{
-	return drm_legacy_agp_acquire((struct drm_device *)file_priv->minor->dev);
-}
-
-/*
- * Release the AGP device.
- *
- * \param dev DRM device that is to release AGP.
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device has been acquired and calls \c agp_backend_release.
- */
-int drm_legacy_agp_release(struct drm_device *dev)
-{
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-	agp_backend_release(dev->agp->bridge);
-	dev->agp->acquired = 0;
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_release);
-
-int drm_legacy_agp_release_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv)
-{
-	return drm_legacy_agp_release(dev);
-}
-
-/*
- * Enable the AGP bus.
- *
- * \param dev DRM device that has previously acquired AGP.
- * \param mode Requested AGP mode.
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device has been acquired but not enabled, and calls
- * \c agp_enable.
- */
-int drm_legacy_agp_enable(struct drm_device *dev, struct drm_agp_mode mode)
-{
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-
-	dev->agp->mode = mode.mode;
-	agp_enable(dev->agp->bridge, mode.mode);
-	dev->agp->enabled = 1;
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_enable);
-
-int drm_legacy_agp_enable_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	struct drm_agp_mode *mode = data;
-
-	return drm_legacy_agp_enable(dev, *mode);
-}
-
-/*
- * Allocate AGP memory.
- *
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device is present and has been acquired, allocates the
- * memory via agp_allocate_memory() and creates a drm_agp_mem entry for it.
- */
-int drm_legacy_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request)
-{
-	struct drm_agp_mem *entry;
-	struct agp_memory *memory;
-	unsigned long pages;
-	u32 type;
-
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	pages = DIV_ROUND_UP(request->size, PAGE_SIZE);
-	type = (u32) request->type;
-	memory = agp_allocate_memory(dev->agp->bridge, pages, type);
-	if (!memory) {
-		kfree(entry);
-		return -ENOMEM;
-	}
-
-	entry->handle = (unsigned long)memory->key + 1;
-	entry->memory = memory;
-	entry->bound = 0;
-	entry->pages = pages;
-	list_add(&entry->head, &dev->agp->memory);
-
-	request->handle = entry->handle;
-	request->physical = memory->physical;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_alloc);
-
-
-int drm_legacy_agp_alloc_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_agp_buffer *request = data;
-
-	return drm_legacy_agp_alloc(dev, request);
-}
-
-/*
- * Search for the AGP memory entry associated with a handle.
- *
- * \param dev DRM device structure.
- * \param handle AGP memory handle.
- * \return pointer to the drm_agp_mem structure associated with \p handle.
- *
- * Walks through drm_agp_head::memory until finding a matching handle.
- */
-static struct drm_agp_mem *drm_legacy_agp_lookup_entry(struct drm_device *dev,
-						       unsigned long handle)
-{
-	struct drm_agp_mem *entry;
-
-	list_for_each_entry(entry, &dev->agp->memory, head) {
-		if (entry->handle == handle)
-			return entry;
-	}
-	return NULL;
-}
-
-/*
- * Unbind AGP memory from the GATT (ioctl).
- *
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device is present and acquired, looks-up the AGP memory
- * entry and passes it to the unbind_agp() function.
- */
-int drm_legacy_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request)
-{
-	struct drm_agp_mem *entry;
-	int ret;
-
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-	entry = drm_legacy_agp_lookup_entry(dev, request->handle);
-	if (!entry || !entry->bound)
-		return -EINVAL;
-	ret = agp_unbind_memory(entry->memory);
-	if (ret == 0)
-		entry->bound = 0;
-	return ret;
-}
-EXPORT_SYMBOL(drm_legacy_agp_unbind);
-
-
-int drm_legacy_agp_unbind_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	struct drm_agp_binding *request = data;
-
-	return drm_legacy_agp_unbind(dev, request);
-}
-
-/*
- * Bind AGP memory into the GATT (ioctl)
- *
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device is present and has been acquired and that no memory
- * is currently bound into the GATT. Looks-up the AGP memory entry and passes
- * it to bind_agp() function.
- */
-int drm_legacy_agp_bind(struct drm_device *dev, struct drm_agp_binding *request)
-{
-	struct drm_agp_mem *entry;
-	int retcode;
-	int page;
-
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-	entry = drm_legacy_agp_lookup_entry(dev, request->handle);
-	if (!entry || entry->bound)
-		return -EINVAL;
-	page = DIV_ROUND_UP(request->offset, PAGE_SIZE);
-	retcode = agp_bind_memory(entry->memory, page);
-	if (retcode)
-		return retcode;
-	entry->bound = dev->agp->base + (page << PAGE_SHIFT);
-	DRM_DEBUG("base = 0x%lx entry->bound = 0x%lx\n",
-		  dev->agp->base, entry->bound);
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_bind);
-
-
-int drm_legacy_agp_bind_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv)
-{
-	struct drm_agp_binding *request = data;
-
-	return drm_legacy_agp_bind(dev, request);
-}
-
-/*
- * Free AGP memory (ioctl).
- *
- * \return zero on success or a negative number on failure.
- *
- * Verifies the AGP device is present and has been acquired and looks up the
- * AGP memory entry. If the memory is currently bound, unbind it via
- * unbind_agp(). Frees it via free_agp() as well as the entry itself
- * and unlinks from the doubly linked list it's inserted in.
- */
-int drm_legacy_agp_free(struct drm_device *dev, struct drm_agp_buffer *request)
-{
-	struct drm_agp_mem *entry;
-
-	if (!dev->agp || !dev->agp->acquired)
-		return -EINVAL;
-	entry = drm_legacy_agp_lookup_entry(dev, request->handle);
-	if (!entry)
-		return -EINVAL;
-	if (entry->bound)
-		agp_unbind_memory(entry->memory);
-
-	list_del(&entry->head);
-
-	agp_free_memory(entry->memory);
-	kfree(entry);
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_agp_free);
-
-
-int drm_legacy_agp_free_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv)
-{
-	struct drm_agp_buffer *request = data;
-
-	return drm_legacy_agp_free(dev, request);
-}
-
-/*
- * Initialize the AGP resources.
- *
- * \return pointer to a drm_agp_head structure.
- *
- * Gets the drm_agp_t structure which is made available by the agpgart module
- * via the inter_module_* functions. Creates and initializes a drm_agp_head
- * structure.
- *
- * Note that final cleanup of the kmalloced structure is directly done in
- * drm_pci_agp_destroy.
- */
-struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	struct drm_agp_head *head = NULL;
-
-	head = kzalloc(sizeof(*head), GFP_KERNEL);
-	if (!head)
-		return NULL;
-	head->bridge = agp_find_bridge(pdev);
-	if (!head->bridge) {
-		head->bridge = agp_backend_acquire(pdev);
-		if (!head->bridge) {
-			kfree(head);
-			return NULL;
-		}
-		agp_copy_info(head->bridge, &head->agp_info);
-		agp_backend_release(head->bridge);
-	} else {
-		agp_copy_info(head->bridge, &head->agp_info);
-	}
-	if (head->agp_info.chipset == NOT_SUPPORTED) {
-		kfree(head);
-		return NULL;
-	}
-	INIT_LIST_HEAD(&head->memory);
-	head->cant_use_aperture = head->agp_info.cant_use_aperture;
-	head->page_mask = head->agp_info.page_mask;
-	head->base = head->agp_info.aper_base;
-	return head;
-}
-/* Only exported for i810.ko */
-EXPORT_SYMBOL(drm_legacy_agp_init);
-
-/**
- * drm_legacy_agp_clear - Clear AGP resource list
- * @dev: DRM device
- *
- * Iterate over all AGP resources and remove them. But keep the AGP head
- * intact so it can still be used. It is safe to call this if AGP is disabled or
- * was already removed.
- *
- * Cleanup is only done for drivers who have DRIVER_LEGACY set.
- */
-void drm_legacy_agp_clear(struct drm_device *dev)
-{
-	struct drm_agp_mem *entry, *tempe;
-
-	if (!dev->agp)
-		return;
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	list_for_each_entry_safe(entry, tempe, &dev->agp->memory, head) {
-		if (entry->bound)
-			agp_unbind_memory(entry->memory);
-		agp_free_memory(entry->memory);
-		kfree(entry);
-	}
-	INIT_LIST_HEAD(&dev->agp->memory);
-
-	if (dev->agp->acquired)
-		drm_legacy_agp_release(dev);
-
-	dev->agp->acquired = 0;
-	dev->agp->enabled = 0;
-}
-
-#endif
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
deleted file mode 100644
index 86700560fea28..0000000000000
--- a/drivers/gpu/drm/drm_bufs.c
+++ /dev/null
@@ -1,1627 +0,0 @@
-/*
- * Legacy: Generic DRM Buffer Management
- *
- * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Author: Rickard E. (Rik) Faith <faith@valinux.com>
- * Author: Gareth Hughes <gareth@valinux.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/export.h>
-#include <linux/log2.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/nospec.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-
-#include <asm/shmparam.h>
-
-#include <drm/drm_device.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-#include <drm/drm_print.h>
-
-#include "drm_legacy.h"
-
-
-static struct drm_map_list *drm_find_matching_map(struct drm_device *dev,
-						  struct drm_local_map *map)
-{
-	struct drm_map_list *entry;
-
-	list_for_each_entry(entry, &dev->maplist, head) {
-		/*
-		 * Because the kernel-userspace ABI is fixed at a 32-bit offset
-		 * while PCI resources may live above that, we only compare the
-		 * lower 32 bits of the map offset for maps of type
-		 * _DRM_FRAMEBUFFER or _DRM_REGISTERS.
-		 * It is assumed that if a driver have more than one resource
-		 * of each type, the lower 32 bits are different.
-		 */
-		if (!entry->map ||
-		    map->type != entry->map->type ||
-		    entry->master != dev->master)
-			continue;
-		switch (map->type) {
-		case _DRM_SHM:
-			if (map->flags != _DRM_CONTAINS_LOCK)
-				break;
-			return entry;
-		case _DRM_REGISTERS:
-		case _DRM_FRAME_BUFFER:
-			if ((entry->map->offset & 0xffffffff) ==
-			    (map->offset & 0xffffffff))
-				return entry;
-			break;
-		default: /* Make gcc happy */
-			break;
-		}
-		if (entry->map->offset == map->offset)
-			return entry;
-	}
-
-	return NULL;
-}
-
-static int drm_map_handle(struct drm_device *dev, struct drm_hash_item *hash,
-			  unsigned long user_token, int hashed_handle, int shm)
-{
-	int use_hashed_handle, shift;
-	unsigned long add;
-
-#if (BITS_PER_LONG == 64)
-	use_hashed_handle = ((user_token & 0xFFFFFFFF00000000UL) || hashed_handle);
-#elif (BITS_PER_LONG == 32)
-	use_hashed_handle = hashed_handle;
-#else
-#error Unsupported long size. Neither 64 nor 32 bits.
-#endif
-
-	if (!use_hashed_handle) {
-		int ret;
-
-		hash->key = user_token >> PAGE_SHIFT;
-		ret = drm_ht_insert_item(&dev->map_hash, hash);
-		if (ret != -EINVAL)
-			return ret;
-	}
-
-	shift = 0;
-	add = DRM_MAP_HASH_OFFSET >> PAGE_SHIFT;
-	if (shm && (SHMLBA > PAGE_SIZE)) {
-		int bits = ilog2(SHMLBA >> PAGE_SHIFT) + 1;
-
-		/* For shared memory, we have to preserve the SHMLBA
-		 * bits of the eventual vma->vm_pgoff value during
-		 * mmap().  Otherwise we run into cache aliasing problems
-		 * on some platforms.  On these platforms, the pgoff of
-		 * a mmap() request is used to pick a suitable virtual
-		 * address for the mmap() region such that it will not
-		 * cause cache aliasing problems.
-		 *
-		 * Therefore, make sure the SHMLBA relevant bits of the
-		 * hash value we use are equal to those in the original
-		 * kernel virtual address.
-		 */
-		shift = bits;
-		add |= ((user_token >> PAGE_SHIFT) & ((1UL << bits) - 1UL));
-	}
-
-	return drm_ht_just_insert_please(&dev->map_hash, hash,
-					 user_token, 32 - PAGE_SHIFT - 3,
-					 shift, add);
-}
-
-/*
- * Core function to create a range of memory available for mapping by a
- * non-root process.
- *
- * Adjusts the memory offset to its absolute value according to the mapping
- * type.  Adds the map to the map list drm_device::maplist. Adds MTRR's where
- * applicable and if supported by the kernel.
- */
-static int drm_addmap_core(struct drm_device *dev, resource_size_t offset,
-			   unsigned int size, enum drm_map_type type,
-			   enum drm_map_flags flags,
-			   struct drm_map_list **maplist)
-{
-	struct drm_local_map *map;
-	struct drm_map_list *list;
-	unsigned long user_token;
-	int ret;
-
-	map = kmalloc(sizeof(*map), GFP_KERNEL);
-	if (!map)
-		return -ENOMEM;
-
-	map->offset = offset;
-	map->size = size;
-	map->flags = flags;
-	map->type = type;
-
-	/* Only allow shared memory to be removable since we only keep enough
-	 * book keeping information about shared memory to allow for removal
-	 * when processes fork.
-	 */
-	if ((map->flags & _DRM_REMOVABLE) && map->type != _DRM_SHM) {
-		kfree(map);
-		return -EINVAL;
-	}
-	DRM_DEBUG("offset = 0x%08llx, size = 0x%08lx, type = %d\n",
-		  (unsigned long long)map->offset, map->size, map->type);
-
-	/* page-align _DRM_SHM maps. They are allocated here so there is no security
-	 * hole created by that and it works around various broken drivers that use
-	 * a non-aligned quantity to map the SAREA. --BenH
-	 */
-	if (map->type == _DRM_SHM)
-		map->size = PAGE_ALIGN(map->size);
-
-	if ((map->offset & (~(resource_size_t)PAGE_MASK)) || (map->size & (~PAGE_MASK))) {
-		kfree(map);
-		return -EINVAL;
-	}
-	map->mtrr = -1;
-	map->handle = NULL;
-
-	switch (map->type) {
-	case _DRM_REGISTERS:
-	case _DRM_FRAME_BUFFER:
-#if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) && !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__arm__)
-		if (map->offset + (map->size-1) < map->offset ||
-		    map->offset < virt_to_phys(high_memory)) {
-			kfree(map);
-			return -EINVAL;
-		}
-#endif
-		/* Some drivers preinitialize some maps, without the X Server
-		 * needing to be aware of it.  Therefore, we just return success
-		 * when the server tries to create a duplicate map.
-		 */
-		list = drm_find_matching_map(dev, map);
-		if (list != NULL) {
-			if (list->map->size != map->size) {
-				DRM_DEBUG("Matching maps of type %d with "
-					  "mismatched sizes, (%ld vs %ld)\n",
-					  map->type, map->size,
-					  list->map->size);
-				list->map->size = map->size;
-			}
-
-			kfree(map);
-			*maplist = list;
-			return 0;
-		}
-
-		if (map->type == _DRM_FRAME_BUFFER ||
-		    (map->flags & _DRM_WRITE_COMBINING)) {
-			map->mtrr =
-				arch_phys_wc_add(map->offset, map->size);
-		}
-		if (map->type == _DRM_REGISTERS) {
-			if (map->flags & _DRM_WRITE_COMBINING)
-				map->handle = ioremap_wc(map->offset,
-							 map->size);
-			else
-				map->handle = ioremap(map->offset, map->size);
-			if (!map->handle) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-
-		break;
-	case _DRM_SHM:
-		list = drm_find_matching_map(dev, map);
-		if (list != NULL) {
-			if (list->map->size != map->size) {
-				DRM_DEBUG("Matching maps of type %d with "
-					  "mismatched sizes, (%ld vs %ld)\n",
-					  map->type, map->size, list->map->size);
-				list->map->size = map->size;
-			}
-
-			kfree(map);
-			*maplist = list;
-			return 0;
-		}
-		map->handle = vmalloc_user(map->size);
-		DRM_DEBUG("%lu %d %p\n",
-			  map->size, order_base_2(map->size), map->handle);
-		if (!map->handle) {
-			kfree(map);
-			return -ENOMEM;
-		}
-		map->offset = (unsigned long)map->handle;
-		if (map->flags & _DRM_CONTAINS_LOCK) {
-			/* Prevent a 2nd X Server from creating a 2nd lock */
-			if (dev->master->lock.hw_lock != NULL) {
-				vfree(map->handle);
-				kfree(map);
-				return -EBUSY;
-			}
-			dev->sigdata.lock = dev->master->lock.hw_lock = map->handle;	/* Pointer to lock */
-		}
-		break;
-	case _DRM_AGP: {
-		struct drm_agp_mem *entry;
-		int valid = 0;
-
-		if (!dev->agp) {
-			kfree(map);
-			return -EINVAL;
-		}
-#ifdef __alpha__
-		map->offset += dev->hose->mem_space->start;
-#endif
-		/* In some cases (i810 driver), user space may have already
-		 * added the AGP base itself, because dev->agp->base previously
-		 * only got set during AGP enable.  So, only add the base
-		 * address if the map's offset isn't already within the
-		 * aperture.
-		 */
-		if (map->offset < dev->agp->base ||
-		    map->offset > dev->agp->base +
-		    dev->agp->agp_info.aper_size * 1024 * 1024 - 1) {
-			map->offset += dev->agp->base;
-		}
-		map->mtrr = dev->agp->agp_mtrr;	/* for getmap */
-
-		/* This assumes the DRM is in total control of AGP space.
-		 * It's not always the case as AGP can be in the control
-		 * of user space (i.e. i810 driver). So this loop will get
-		 * skipped and we double check that dev->agp->memory is
-		 * actually set as well as being invalid before EPERM'ing
-		 */
-		list_for_each_entry(entry, &dev->agp->memory, head) {
-			if ((map->offset >= entry->bound) &&
-			    (map->offset + map->size <= entry->bound + entry->pages * PAGE_SIZE)) {
-				valid = 1;
-				break;
-			}
-		}
-		if (!list_empty(&dev->agp->memory) && !valid) {
-			kfree(map);
-			return -EPERM;
-		}
-		DRM_DEBUG("AGP offset = 0x%08llx, size = 0x%08lx\n",
-			  (unsigned long long)map->offset, map->size);
-
-		break;
-	}
-	case _DRM_SCATTER_GATHER:
-		if (!dev->sg) {
-			kfree(map);
-			return -EINVAL;
-		}
-		map->offset += (unsigned long)dev->sg->virtual;
-		break;
-	case _DRM_CONSISTENT:
-		/* dma_addr_t is 64bit on i386 with CONFIG_HIGHMEM64G,
-		 * As we're limiting the address to 2^32-1 (or less),
-		 * casting it down to 32 bits is no problem, but we
-		 * need to point to a 64bit variable first.
-		 */
-		map->handle = dma_alloc_coherent(dev->dev,
-						 map->size,
-						 &map->offset,
-						 GFP_KERNEL);
-		if (!map->handle) {
-			kfree(map);
-			return -ENOMEM;
-		}
-		break;
-	default:
-		kfree(map);
-		return -EINVAL;
-	}
-
-	list = kzalloc(sizeof(*list), GFP_KERNEL);
-	if (!list) {
-		if (map->type == _DRM_REGISTERS)
-			iounmap(map->handle);
-		kfree(map);
-		return -EINVAL;
-	}
-	list->map = map;
-
-	mutex_lock(&dev->struct_mutex);
-	list_add(&list->head, &dev->maplist);
-
-	/* Assign a 32-bit handle */
-	/* We do it here so that dev->struct_mutex protects the increment */
-	user_token = (map->type == _DRM_SHM) ? (unsigned long)map->handle :
-		map->offset;
-	ret = drm_map_handle(dev, &list->hash, user_token, 0,
-			     (map->type == _DRM_SHM));
-	if (ret) {
-		if (map->type == _DRM_REGISTERS)
-			iounmap(map->handle);
-		kfree(map);
-		kfree(list);
-		mutex_unlock(&dev->struct_mutex);
-		return ret;
-	}
-
-	list->user_token = list->hash.key << PAGE_SHIFT;
-	mutex_unlock(&dev->struct_mutex);
-
-	if (!(map->flags & _DRM_DRIVER))
-		list->master = dev->master;
-	*maplist = list;
-	return 0;
-}
-
-int drm_legacy_addmap(struct drm_device *dev, resource_size_t offset,
-		      unsigned int size, enum drm_map_type type,
-		      enum drm_map_flags flags, struct drm_local_map **map_ptr)
-{
-	struct drm_map_list *list;
-	int rc;
-
-	rc = drm_addmap_core(dev, offset, size, type, flags, &list);
-	if (!rc)
-		*map_ptr = list->map;
-	return rc;
-}
-EXPORT_SYMBOL(drm_legacy_addmap);
-
-struct drm_local_map *drm_legacy_findmap(struct drm_device *dev,
-					 unsigned int token)
-{
-	struct drm_map_list *_entry;
-
-	list_for_each_entry(_entry, &dev->maplist, head)
-		if (_entry->user_token == token)
-			return _entry->map;
-	return NULL;
-}
-EXPORT_SYMBOL(drm_legacy_findmap);
-
-/*
- * Ioctl to specify a range of memory that is available for mapping by a
- * non-root process.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg pointer to a drm_map structure.
- * \return zero on success or a negative value on error.
- *
- */
-int drm_legacy_addmap_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv)
-{
-	struct drm_map *map = data;
-	struct drm_map_list *maplist;
-	int err;
-
-	if (!(capable(CAP_SYS_ADMIN) || map->type == _DRM_AGP || map->type == _DRM_SHM))
-		return -EPERM;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	err = drm_addmap_core(dev, map->offset, map->size, map->type,
-			      map->flags, &maplist);
-
-	if (err)
-		return err;
-
-	/* avoid a warning on 64-bit, this casting isn't very nice, but the API is set so too late */
-	map->handle = (void *)(unsigned long)maplist->user_token;
-
-	/*
-	 * It appears that there are no users of this value whatsoever --
-	 * drmAddMap just discards it.  Let's not encourage its use.
-	 * (Keeping drm_addmap_core's returned mtrr value would be wrong --
-	 *  it's not a real mtrr index anymore.)
-	 */
-	map->mtrr = -1;
-
-	return 0;
-}
-
-/*
- * Get a mapping information.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument, pointing to a drm_map structure.
- *
- * \return zero on success or a negative number on failure.
- *
- * Searches for the mapping with the specified offset and copies its information
- * into userspace
- */
-int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv)
-{
-	struct drm_map *map = data;
-	struct drm_map_list *r_list = NULL;
-	struct list_head *list;
-	int idx;
-	int i;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	idx = map->offset;
-	if (idx < 0)
-		return -EINVAL;
-
-	i = 0;
-	mutex_lock(&dev->struct_mutex);
-	list_for_each(list, &dev->maplist) {
-		if (i == idx) {
-			r_list = list_entry(list, struct drm_map_list, head);
-			break;
-		}
-		i++;
-	}
-	if (!r_list || !r_list->map) {
-		mutex_unlock(&dev->struct_mutex);
-		return -EINVAL;
-	}
-
-	map->offset = r_list->map->offset;
-	map->size = r_list->map->size;
-	map->type = r_list->map->type;
-	map->flags = r_list->map->flags;
-	map->handle = (void *)(unsigned long) r_list->user_token;
-	map->mtrr = arch_phys_wc_index(r_list->map->mtrr);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-}
-
-/*
- * Remove a map private from list and deallocate resources if the mapping
- * isn't in use.
- *
- * Searches the map on drm_device::maplist, removes it from the list, see if
- * it's being used, and free any associated resource (such as MTRR's) if it's not
- * being on use.
- *
- * \sa drm_legacy_addmap
- */
-int drm_legacy_rmmap_locked(struct drm_device *dev, struct drm_local_map *map)
-{
-	struct drm_map_list *r_list = NULL, *list_t;
-	int found = 0;
-	struct drm_master *master;
-
-	/* Find the list entry for the map and remove it */
-	list_for_each_entry_safe(r_list, list_t, &dev->maplist, head) {
-		if (r_list->map == map) {
-			master = r_list->master;
-			list_del(&r_list->head);
-			drm_ht_remove_key(&dev->map_hash,
-					  r_list->user_token >> PAGE_SHIFT);
-			kfree(r_list);
-			found = 1;
-			break;
-		}
-	}
-
-	if (!found)
-		return -EINVAL;
-
-	switch (map->type) {
-	case _DRM_REGISTERS:
-		iounmap(map->handle);
-		fallthrough;
-	case _DRM_FRAME_BUFFER:
-		arch_phys_wc_del(map->mtrr);
-		break;
-	case _DRM_SHM:
-		vfree(map->handle);
-		if (master) {
-			if (dev->sigdata.lock == master->lock.hw_lock)
-				dev->sigdata.lock = NULL;
-			master->lock.hw_lock = NULL;   /* SHM removed */
-			master->lock.file_priv = NULL;
-			wake_up_interruptible_all(&master->lock.lock_queue);
-		}
-		break;
-	case _DRM_AGP:
-	case _DRM_SCATTER_GATHER:
-		break;
-	case _DRM_CONSISTENT:
-		dma_free_coherent(dev->dev,
-				  map->size,
-				  map->handle,
-				  map->offset);
-		break;
-	}
-	kfree(map);
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_rmmap_locked);
-
-void drm_legacy_rmmap(struct drm_device *dev, struct drm_local_map *map)
-{
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	mutex_lock(&dev->struct_mutex);
-	drm_legacy_rmmap_locked(dev, map);
-	mutex_unlock(&dev->struct_mutex);
-}
-EXPORT_SYMBOL(drm_legacy_rmmap);
-
-void drm_legacy_master_rmmaps(struct drm_device *dev, struct drm_master *master)
-{
-	struct drm_map_list *r_list, *list_temp;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) {
-		if (r_list->master == master) {
-			drm_legacy_rmmap_locked(dev, r_list->map);
-			r_list = NULL;
-		}
-	}
-	mutex_unlock(&dev->struct_mutex);
-}
-
-void drm_legacy_rmmaps(struct drm_device *dev)
-{
-	struct drm_map_list *r_list, *list_temp;
-
-	list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head)
-		drm_legacy_rmmap(dev, r_list->map);
-}
-
-/* The rmmap ioctl appears to be unnecessary.  All mappings are torn down on
- * the last close of the device, and this is necessary for cleanup when things
- * exit uncleanly.  Therefore, having userland manually remove mappings seems
- * like a pointless exercise since they're going away anyway.
- *
- * One use case might be after addmap is allowed for normal users for SHM and
- * gets used by drivers that the server doesn't need to care about.  This seems
- * unlikely.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg pointer to a struct drm_map structure.
- * \return zero on success or a negative value on error.
- */
-int drm_legacy_rmmap_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv)
-{
-	struct drm_map *request = data;
-	struct drm_local_map *map = NULL;
-	struct drm_map_list *r_list;
-	int ret;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(r_list, &dev->maplist, head) {
-		if (r_list->map &&
-		    r_list->user_token == (unsigned long)request->handle &&
-		    r_list->map->flags & _DRM_REMOVABLE) {
-			map = r_list->map;
-			break;
-		}
-	}
-
-	/* List has wrapped around to the head pointer, or it's empty we didn't
-	 * find anything.
-	 */
-	if (list_empty(&dev->maplist) || !map) {
-		mutex_unlock(&dev->struct_mutex);
-		return -EINVAL;
-	}
-
-	/* Register and framebuffer maps are permanent */
-	if ((map->type == _DRM_REGISTERS) || (map->type == _DRM_FRAME_BUFFER)) {
-		mutex_unlock(&dev->struct_mutex);
-		return 0;
-	}
-
-	ret = drm_legacy_rmmap_locked(dev, map);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
-/*
- * Cleanup after an error on one of the addbufs() functions.
- *
- * \param dev DRM device.
- * \param entry buffer entry where the error occurred.
- *
- * Frees any pages and buffers associated with the given entry.
- */
-static void drm_cleanup_buf_error(struct drm_device *dev,
-				  struct drm_buf_entry *entry)
-{
-	drm_dma_handle_t *dmah;
-	int i;
-
-	if (entry->seg_count) {
-		for (i = 0; i < entry->seg_count; i++) {
-			if (entry->seglist[i]) {
-				dmah = entry->seglist[i];
-				dma_free_coherent(dev->dev,
-						  dmah->size,
-						  dmah->vaddr,
-						  dmah->busaddr);
-				kfree(dmah);
-			}
-		}
-		kfree(entry->seglist);
-
-		entry->seg_count = 0;
-	}
-
-	if (entry->buf_count) {
-		for (i = 0; i < entry->buf_count; i++) {
-			kfree(entry->buflist[i].dev_private);
-		}
-		kfree(entry->buflist);
-
-		entry->buf_count = 0;
-	}
-}
-
-#if IS_ENABLED(CONFIG_AGP)
-/*
- * Add AGP buffers for DMA transfers.
- *
- * \param dev struct drm_device to which the buffers are to be added.
- * \param request pointer to a struct drm_buf_desc describing the request.
- * \return zero on success or a negative number on failure.
- *
- * After some sanity checks creates a drm_buf structure for each buffer and
- * reallocates the buffer list of the same size order to accommodate the new
- * buffers.
- */
-int drm_legacy_addbufs_agp(struct drm_device *dev,
-			   struct drm_buf_desc *request)
-{
-	struct drm_device_dma *dma = dev->dma;
-	struct drm_buf_entry *entry;
-	struct drm_agp_mem *agp_entry;
-	struct drm_buf *buf;
-	unsigned long offset;
-	unsigned long agp_offset;
-	int count;
-	int order;
-	int size;
-	int alignment;
-	int page_order;
-	int total;
-	int byte_count;
-	int i, valid;
-	struct drm_buf **temp_buflist;
-
-	if (!dma)
-		return -EINVAL;
-
-	count = request->count;
-	order = order_base_2(request->size);
-	size = 1 << order;
-
-	alignment = (request->flags & _DRM_PAGE_ALIGN)
-	    ? PAGE_ALIGN(size) : size;
-	page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0;
-	total = PAGE_SIZE << page_order;
-
-	byte_count = 0;
-	agp_offset = dev->agp->base + request->agp_start;
-
-	DRM_DEBUG("count:      %d\n", count);
-	DRM_DEBUG("order:      %d\n", order);
-	DRM_DEBUG("size:       %d\n", size);
-	DRM_DEBUG("agp_offset: %lx\n", agp_offset);
-	DRM_DEBUG("alignment:  %d\n", alignment);
-	DRM_DEBUG("page_order: %d\n", page_order);
-	DRM_DEBUG("total:      %d\n", total);
-
-	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
-		return -EINVAL;
-
-	/* Make sure buffers are located in AGP memory that we own */
-	valid = 0;
-	list_for_each_entry(agp_entry, &dev->agp->memory, head) {
-		if ((agp_offset >= agp_entry->bound) &&
-		    (agp_offset + total * count <= agp_entry->bound + agp_entry->pages * PAGE_SIZE)) {
-			valid = 1;
-			break;
-		}
-	}
-	if (!list_empty(&dev->agp->memory) && !valid) {
-		DRM_DEBUG("zone invalid\n");
-		return -EINVAL;
-	}
-	spin_lock(&dev->buf_lock);
-	if (dev->buf_use) {
-		spin_unlock(&dev->buf_lock);
-		return -EBUSY;
-	}
-	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->buf_lock);
-
-	mutex_lock(&dev->struct_mutex);
-	entry = &dma->bufs[order];
-	if (entry->buf_count) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;	/* May only call once for each order */
-	}
-
-	if (count < 0 || count > 4096) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -EINVAL;
-	}
-
-	entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL);
-	if (!entry->buflist) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-
-	entry->buf_size = size;
-	entry->page_order = page_order;
-
-	offset = 0;
-
-	while (entry->buf_count < count) {
-		buf = &entry->buflist[entry->buf_count];
-		buf->idx = dma->buf_count + entry->buf_count;
-		buf->total = alignment;
-		buf->order = order;
-		buf->used = 0;
-
-		buf->offset = (dma->byte_count + offset);
-		buf->bus_address = agp_offset + offset;
-		buf->address = (void *)(agp_offset + offset);
-		buf->next = NULL;
-		buf->waiting = 0;
-		buf->pending = 0;
-		buf->file_priv = NULL;
-
-		buf->dev_priv_size = dev->driver->dev_priv_size;
-		buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL);
-		if (!buf->dev_private) {
-			/* Set count correctly so we free the proper amount. */
-			entry->buf_count = count;
-			drm_cleanup_buf_error(dev, entry);
-			mutex_unlock(&dev->struct_mutex);
-			atomic_dec(&dev->buf_alloc);
-			return -ENOMEM;
-		}
-
-		DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address);
-
-		offset += alignment;
-		entry->buf_count++;
-		byte_count += PAGE_SIZE << page_order;
-	}
-
-	DRM_DEBUG("byte_count: %d\n", byte_count);
-
-	temp_buflist = krealloc(dma->buflist,
-				(dma->buf_count + entry->buf_count) *
-				sizeof(*dma->buflist), GFP_KERNEL);
-	if (!temp_buflist) {
-		/* Free the entry because it isn't valid */
-		drm_cleanup_buf_error(dev, entry);
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-	dma->buflist = temp_buflist;
-
-	for (i = 0; i < entry->buf_count; i++) {
-		dma->buflist[i + dma->buf_count] = &entry->buflist[i];
-	}
-
-	dma->buf_count += entry->buf_count;
-	dma->seg_count += entry->seg_count;
-	dma->page_count += byte_count >> PAGE_SHIFT;
-	dma->byte_count += byte_count;
-
-	DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count);
-	DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	request->count = entry->buf_count;
-	request->size = size;
-
-	dma->flags = _DRM_DMA_USE_AGP;
-
-	atomic_dec(&dev->buf_alloc);
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_addbufs_agp);
-#endif /* CONFIG_AGP */
-
-int drm_legacy_addbufs_pci(struct drm_device *dev,
-			   struct drm_buf_desc *request)
-{
-	struct drm_device_dma *dma = dev->dma;
-	int count;
-	int order;
-	int size;
-	int total;
-	int page_order;
-	struct drm_buf_entry *entry;
-	drm_dma_handle_t *dmah;
-	struct drm_buf *buf;
-	int alignment;
-	unsigned long offset;
-	int i;
-	int byte_count;
-	int page_count;
-	unsigned long *temp_pagelist;
-	struct drm_buf **temp_buflist;
-
-	if (!drm_core_check_feature(dev, DRIVER_PCI_DMA))
-		return -EOPNOTSUPP;
-
-	if (!dma)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	count = request->count;
-	order = order_base_2(request->size);
-	size = 1 << order;
-
-	DRM_DEBUG("count=%d, size=%d (%d), order=%d\n",
-		  request->count, request->size, size, order);
-
-	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
-		return -EINVAL;
-
-	alignment = (request->flags & _DRM_PAGE_ALIGN)
-	    ? PAGE_ALIGN(size) : size;
-	page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0;
-	total = PAGE_SIZE << page_order;
-
-	spin_lock(&dev->buf_lock);
-	if (dev->buf_use) {
-		spin_unlock(&dev->buf_lock);
-		return -EBUSY;
-	}
-	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->buf_lock);
-
-	mutex_lock(&dev->struct_mutex);
-	entry = &dma->bufs[order];
-	if (entry->buf_count) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;	/* May only call once for each order */
-	}
-
-	if (count < 0 || count > 4096) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -EINVAL;
-	}
-
-	entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL);
-	if (!entry->buflist) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-
-	entry->seglist = kcalloc(count, sizeof(*entry->seglist), GFP_KERNEL);
-	if (!entry->seglist) {
-		kfree(entry->buflist);
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-
-	/* Keep the original pagelist until we know all the allocations
-	 * have succeeded
-	 */
-	temp_pagelist = kmalloc_array(dma->page_count + (count << page_order),
-				      sizeof(*dma->pagelist),
-				      GFP_KERNEL);
-	if (!temp_pagelist) {
-		kfree(entry->buflist);
-		kfree(entry->seglist);
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-	memcpy(temp_pagelist,
-	       dma->pagelist, dma->page_count * sizeof(*dma->pagelist));
-	DRM_DEBUG("pagelist: %d entries\n",
-		  dma->page_count + (count << page_order));
-
-	entry->buf_size = size;
-	entry->page_order = page_order;
-	byte_count = 0;
-	page_count = 0;
-
-	while (entry->buf_count < count) {
-		dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL);
-		if (!dmah) {
-			/* Set count correctly so we free the proper amount. */
-			entry->buf_count = count;
-			entry->seg_count = count;
-			drm_cleanup_buf_error(dev, entry);
-			kfree(temp_pagelist);
-			mutex_unlock(&dev->struct_mutex);
-			atomic_dec(&dev->buf_alloc);
-			return -ENOMEM;
-		}
-
-		dmah->size = total;
-		dmah->vaddr = dma_alloc_coherent(dev->dev,
-						 dmah->size,
-						 &dmah->busaddr,
-						 GFP_KERNEL);
-		if (!dmah->vaddr) {
-			kfree(dmah);
-
-			/* Set count correctly so we free the proper amount. */
-			entry->buf_count = count;
-			entry->seg_count = count;
-			drm_cleanup_buf_error(dev, entry);
-			kfree(temp_pagelist);
-			mutex_unlock(&dev->struct_mutex);
-			atomic_dec(&dev->buf_alloc);
-			return -ENOMEM;
-		}
-		entry->seglist[entry->seg_count++] = dmah;
-		for (i = 0; i < (1 << page_order); i++) {
-			DRM_DEBUG("page %d @ 0x%08lx\n",
-				  dma->page_count + page_count,
-				  (unsigned long)dmah->vaddr + PAGE_SIZE * i);
-			temp_pagelist[dma->page_count + page_count++]
-				= (unsigned long)dmah->vaddr + PAGE_SIZE * i;
-		}
-		for (offset = 0;
-		     offset + size <= total && entry->buf_count < count;
-		     offset += alignment, ++entry->buf_count) {
-			buf = &entry->buflist[entry->buf_count];
-			buf->idx = dma->buf_count + entry->buf_count;
-			buf->total = alignment;
-			buf->order = order;
-			buf->used = 0;
-			buf->offset = (dma->byte_count + byte_count + offset);
-			buf->address = (void *)(dmah->vaddr + offset);
-			buf->bus_address = dmah->busaddr + offset;
-			buf->next = NULL;
-			buf->waiting = 0;
-			buf->pending = 0;
-			buf->file_priv = NULL;
-
-			buf->dev_priv_size = dev->driver->dev_priv_size;
-			buf->dev_private = kzalloc(buf->dev_priv_size,
-						GFP_KERNEL);
-			if (!buf->dev_private) {
-				/* Set count correctly so we free the proper amount. */
-				entry->buf_count = count;
-				entry->seg_count = count;
-				drm_cleanup_buf_error(dev, entry);
-				kfree(temp_pagelist);
-				mutex_unlock(&dev->struct_mutex);
-				atomic_dec(&dev->buf_alloc);
-				return -ENOMEM;
-			}
-
-			DRM_DEBUG("buffer %d @ %p\n",
-				  entry->buf_count, buf->address);
-		}
-		byte_count += PAGE_SIZE << page_order;
-	}
-
-	temp_buflist = krealloc(dma->buflist,
-				(dma->buf_count + entry->buf_count) *
-				sizeof(*dma->buflist), GFP_KERNEL);
-	if (!temp_buflist) {
-		/* Free the entry because it isn't valid */
-		drm_cleanup_buf_error(dev, entry);
-		kfree(temp_pagelist);
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-	dma->buflist = temp_buflist;
-
-	for (i = 0; i < entry->buf_count; i++) {
-		dma->buflist[i + dma->buf_count] = &entry->buflist[i];
-	}
-
-	/* No allocations failed, so now we can replace the original pagelist
-	 * with the new one.
-	 */
-	if (dma->page_count) {
-		kfree(dma->pagelist);
-	}
-	dma->pagelist = temp_pagelist;
-
-	dma->buf_count += entry->buf_count;
-	dma->seg_count += entry->seg_count;
-	dma->page_count += entry->seg_count << page_order;
-	dma->byte_count += PAGE_SIZE * (entry->seg_count << page_order);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	request->count = entry->buf_count;
-	request->size = size;
-
-	if (request->flags & _DRM_PCI_BUFFER_RO)
-		dma->flags = _DRM_DMA_USE_PCI_RO;
-
-	atomic_dec(&dev->buf_alloc);
-	return 0;
-
-}
-EXPORT_SYMBOL(drm_legacy_addbufs_pci);
-
-static int drm_legacy_addbufs_sg(struct drm_device *dev,
-				 struct drm_buf_desc *request)
-{
-	struct drm_device_dma *dma = dev->dma;
-	struct drm_buf_entry *entry;
-	struct drm_buf *buf;
-	unsigned long offset;
-	unsigned long agp_offset;
-	int count;
-	int order;
-	int size;
-	int alignment;
-	int page_order;
-	int total;
-	int byte_count;
-	int i;
-	struct drm_buf **temp_buflist;
-
-	if (!drm_core_check_feature(dev, DRIVER_SG))
-		return -EOPNOTSUPP;
-
-	if (!dma)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	count = request->count;
-	order = order_base_2(request->size);
-	size = 1 << order;
-
-	alignment = (request->flags & _DRM_PAGE_ALIGN)
-	    ? PAGE_ALIGN(size) : size;
-	page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0;
-	total = PAGE_SIZE << page_order;
-
-	byte_count = 0;
-	agp_offset = request->agp_start;
-
-	DRM_DEBUG("count:      %d\n", count);
-	DRM_DEBUG("order:      %d\n", order);
-	DRM_DEBUG("size:       %d\n", size);
-	DRM_DEBUG("agp_offset: %lu\n", agp_offset);
-	DRM_DEBUG("alignment:  %d\n", alignment);
-	DRM_DEBUG("page_order: %d\n", page_order);
-	DRM_DEBUG("total:      %d\n", total);
-
-	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
-		return -EINVAL;
-
-	spin_lock(&dev->buf_lock);
-	if (dev->buf_use) {
-		spin_unlock(&dev->buf_lock);
-		return -EBUSY;
-	}
-	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->buf_lock);
-
-	mutex_lock(&dev->struct_mutex);
-	entry = &dma->bufs[order];
-	if (entry->buf_count) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;	/* May only call once for each order */
-	}
-
-	if (count < 0 || count > 4096) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -EINVAL;
-	}
-
-	entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL);
-	if (!entry->buflist) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-
-	entry->buf_size = size;
-	entry->page_order = page_order;
-
-	offset = 0;
-
-	while (entry->buf_count < count) {
-		buf = &entry->buflist[entry->buf_count];
-		buf->idx = dma->buf_count + entry->buf_count;
-		buf->total = alignment;
-		buf->order = order;
-		buf->used = 0;
-
-		buf->offset = (dma->byte_count + offset);
-		buf->bus_address = agp_offset + offset;
-		buf->address = (void *)(agp_offset + offset
-					+ (unsigned long)dev->sg->virtual);
-		buf->next = NULL;
-		buf->waiting = 0;
-		buf->pending = 0;
-		buf->file_priv = NULL;
-
-		buf->dev_priv_size = dev->driver->dev_priv_size;
-		buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL);
-		if (!buf->dev_private) {
-			/* Set count correctly so we free the proper amount. */
-			entry->buf_count = count;
-			drm_cleanup_buf_error(dev, entry);
-			mutex_unlock(&dev->struct_mutex);
-			atomic_dec(&dev->buf_alloc);
-			return -ENOMEM;
-		}
-
-		DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address);
-
-		offset += alignment;
-		entry->buf_count++;
-		byte_count += PAGE_SIZE << page_order;
-	}
-
-	DRM_DEBUG("byte_count: %d\n", byte_count);
-
-	temp_buflist = krealloc(dma->buflist,
-				(dma->buf_count + entry->buf_count) *
-				sizeof(*dma->buflist), GFP_KERNEL);
-	if (!temp_buflist) {
-		/* Free the entry because it isn't valid */
-		drm_cleanup_buf_error(dev, entry);
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-	dma->buflist = temp_buflist;
-
-	for (i = 0; i < entry->buf_count; i++) {
-		dma->buflist[i + dma->buf_count] = &entry->buflist[i];
-	}
-
-	dma->buf_count += entry->buf_count;
-	dma->seg_count += entry->seg_count;
-	dma->page_count += byte_count >> PAGE_SHIFT;
-	dma->byte_count += byte_count;
-
-	DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count);
-	DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	request->count = entry->buf_count;
-	request->size = size;
-
-	dma->flags = _DRM_DMA_USE_SG;
-
-	atomic_dec(&dev->buf_alloc);
-	return 0;
-}
-
-/*
- * Add buffers for DMA transfers (ioctl).
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg pointer to a struct drm_buf_desc request.
- * \return zero on success or a negative number on failure.
- *
- * According with the memory type specified in drm_buf_desc::flags and the
- * build options, it dispatches the call either to addbufs_agp(),
- * addbufs_sg() or addbufs_pci() for AGP, scatter-gather or consistent
- * PCI memory respectively.
- */
-int drm_legacy_addbufs(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv)
-{
-	struct drm_buf_desc *request = data;
-	int ret;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		return -EOPNOTSUPP;
-
-#if IS_ENABLED(CONFIG_AGP)
-	if (request->flags & _DRM_AGP_BUFFER)
-		ret = drm_legacy_addbufs_agp(dev, request);
-	else
-#endif
-	if (request->flags & _DRM_SG_BUFFER)
-		ret = drm_legacy_addbufs_sg(dev, request);
-	else if (request->flags & _DRM_FB_BUFFER)
-		ret = -EINVAL;
-	else
-		ret = drm_legacy_addbufs_pci(dev, request);
-
-	return ret;
-}
-
-/*
- * Get information about the buffer mappings.
- *
- * This was originally mean for debugging purposes, or by a sophisticated
- * client library to determine how best to use the available buffers (e.g.,
- * large buffers can be used for image transfer).
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg pointer to a drm_buf_info structure.
- * \return zero on success or a negative number on failure.
- *
- * Increments drm_device::buf_use while holding the drm_device::buf_lock
- * lock, preventing of allocating more buffers after this call. Information
- * about each requested buffer is then copied into user space.
- */
-int __drm_legacy_infobufs(struct drm_device *dev,
-			void *data, int *p,
-			int (*f)(void *, int, struct drm_buf_entry *))
-{
-	struct drm_device_dma *dma = dev->dma;
-	int i;
-	int count;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		return -EOPNOTSUPP;
-
-	if (!dma)
-		return -EINVAL;
-
-	spin_lock(&dev->buf_lock);
-	if (atomic_read(&dev->buf_alloc)) {
-		spin_unlock(&dev->buf_lock);
-		return -EBUSY;
-	}
-	++dev->buf_use;		/* Can't allocate more after this call */
-	spin_unlock(&dev->buf_lock);
-
-	for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) {
-		if (dma->bufs[i].buf_count)
-			++count;
-	}
-
-	DRM_DEBUG("count = %d\n", count);
-
-	if (*p >= count) {
-		for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) {
-			struct drm_buf_entry *from = &dma->bufs[i];
-
-			if (from->buf_count) {
-				if (f(data, count, from) < 0)
-					return -EFAULT;
-				DRM_DEBUG("%d %d %d %d %d\n",
-					  i,
-					  dma->bufs[i].buf_count,
-					  dma->bufs[i].buf_size,
-					  dma->bufs[i].low_mark,
-					  dma->bufs[i].high_mark);
-				++count;
-			}
-		}
-	}
-	*p = count;
-
-	return 0;
-}
-
-static int copy_one_buf(void *data, int count, struct drm_buf_entry *from)
-{
-	struct drm_buf_info *request = data;
-	struct drm_buf_desc __user *to = &request->list[count];
-	struct drm_buf_desc v = {.count = from->buf_count,
-				 .size = from->buf_size,
-				 .low_mark = from->low_mark,
-				 .high_mark = from->high_mark};
-
-	if (copy_to_user(to, &v, offsetof(struct drm_buf_desc, flags)))
-		return -EFAULT;
-	return 0;
-}
-
-int drm_legacy_infobufs(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_buf_info *request = data;
-
-	return __drm_legacy_infobufs(dev, data, &request->count, copy_one_buf);
-}
-
-/*
- * Specifies a low and high water mark for buffer allocation
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg a pointer to a drm_buf_desc structure.
- * \return zero on success or a negative number on failure.
- *
- * Verifies that the size order is bounded between the admissible orders and
- * updates the respective drm_device_dma::bufs entry low and high water mark.
- *
- * \note This ioctl is deprecated and mostly never used.
- */
-int drm_legacy_markbufs(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_device_dma *dma = dev->dma;
-	struct drm_buf_desc *request = data;
-	int order;
-	struct drm_buf_entry *entry;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		return -EOPNOTSUPP;
-
-	if (!dma)
-		return -EINVAL;
-
-	DRM_DEBUG("%d, %d, %d\n",
-		  request->size, request->low_mark, request->high_mark);
-	order = order_base_2(request->size);
-	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
-		return -EINVAL;
-	entry = &dma->bufs[order];
-
-	if (request->low_mark < 0 || request->low_mark > entry->buf_count)
-		return -EINVAL;
-	if (request->high_mark < 0 || request->high_mark > entry->buf_count)
-		return -EINVAL;
-
-	entry->low_mark = request->low_mark;
-	entry->high_mark = request->high_mark;
-
-	return 0;
-}
-
-/*
- * Unreserve the buffers in list, previously reserved using drmDMA.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg pointer to a drm_buf_free structure.
- * \return zero on success or a negative number on failure.
- *
- * Calls free_buffer() for each used buffer.
- * This function is primarily used for debugging.
- */
-int drm_legacy_freebufs(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_device_dma *dma = dev->dma;
-	struct drm_buf_free *request = data;
-	int i;
-	int idx;
-	struct drm_buf *buf;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		return -EOPNOTSUPP;
-
-	if (!dma)
-		return -EINVAL;
-
-	DRM_DEBUG("%d\n", request->count);
-	for (i = 0; i < request->count; i++) {
-		if (copy_from_user(&idx, &request->list[i], sizeof(idx)))
-			return -EFAULT;
-		if (idx < 0 || idx >= dma->buf_count) {
-			DRM_ERROR("Index %d (of %d max)\n",
-				  idx, dma->buf_count - 1);
-			return -EINVAL;
-		}
-		idx = array_index_nospec(idx, dma->buf_count);
-		buf = dma->buflist[idx];
-		if (buf->file_priv != file_priv) {
-			DRM_ERROR("Process %d freeing buffer not owned\n",
-				  task_pid_nr(current));
-			return -EINVAL;
-		}
-		drm_legacy_free_buffer(dev, buf);
-	}
-
-	return 0;
-}
-
-/*
- * Maps all of the DMA buffers into client-virtual space (ioctl).
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg pointer to a drm_buf_map structure.
- * \return zero on success or a negative number on failure.
- *
- * Maps the AGP, SG or PCI buffer region with vm_mmap(), and copies information
- * about each buffer into user space. For PCI buffers, it calls vm_mmap() with
- * offset equal to 0, which drm_mmap() interprets as PCI buffers and calls
- * drm_mmap_dma().
- */
-int __drm_legacy_mapbufs(struct drm_device *dev, void *data, int *p,
-			 void __user **v,
-			 int (*f)(void *, int, unsigned long,
-				 struct drm_buf *),
-				 struct drm_file *file_priv)
-{
-	struct drm_device_dma *dma = dev->dma;
-	int retcode = 0;
-	unsigned long virtual;
-	int i;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		return -EOPNOTSUPP;
-
-	if (!dma)
-		return -EINVAL;
-
-	spin_lock(&dev->buf_lock);
-	if (atomic_read(&dev->buf_alloc)) {
-		spin_unlock(&dev->buf_lock);
-		return -EBUSY;
-	}
-	dev->buf_use++;		/* Can't allocate more after this call */
-	spin_unlock(&dev->buf_lock);
-
-	if (*p >= dma->buf_count) {
-		if ((dev->agp && (dma->flags & _DRM_DMA_USE_AGP))
-		    || (drm_core_check_feature(dev, DRIVER_SG)
-			&& (dma->flags & _DRM_DMA_USE_SG))) {
-			struct drm_local_map *map = dev->agp_buffer_map;
-			unsigned long token = dev->agp_buffer_token;
-
-			if (!map) {
-				retcode = -EINVAL;
-				goto done;
-			}
-			virtual = vm_mmap(file_priv->filp, 0, map->size,
-					  PROT_READ | PROT_WRITE,
-					  MAP_SHARED,
-					  token);
-		} else {
-			virtual = vm_mmap(file_priv->filp, 0, dma->byte_count,
-					  PROT_READ | PROT_WRITE,
-					  MAP_SHARED, 0);
-		}
-		if (virtual > -1024UL) {
-			/* Real error */
-			retcode = (signed long)virtual;
-			goto done;
-		}
-		*v = (void __user *)virtual;
-
-		for (i = 0; i < dma->buf_count; i++) {
-			if (f(data, i, virtual, dma->buflist[i]) < 0) {
-				retcode = -EFAULT;
-				goto done;
-			}
-		}
-	}
-      done:
-	*p = dma->buf_count;
-	DRM_DEBUG("%d buffers, retcode = %d\n", *p, retcode);
-
-	return retcode;
-}
-
-static int map_one_buf(void *data, int idx, unsigned long virtual,
-			struct drm_buf *buf)
-{
-	struct drm_buf_map *request = data;
-	unsigned long address = virtual + buf->offset;	/* *** */
-
-	if (copy_to_user(&request->list[idx].idx, &buf->idx,
-			 sizeof(request->list[0].idx)))
-		return -EFAULT;
-	if (copy_to_user(&request->list[idx].total, &buf->total,
-			 sizeof(request->list[0].total)))
-		return -EFAULT;
-	if (clear_user(&request->list[idx].used, sizeof(int)))
-		return -EFAULT;
-	if (copy_to_user(&request->list[idx].address, &address,
-			 sizeof(address)))
-		return -EFAULT;
-	return 0;
-}
-
-int drm_legacy_mapbufs(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv)
-{
-	struct drm_buf_map *request = data;
-
-	return __drm_legacy_mapbufs(dev, data, &request->count,
-				    &request->virtual, map_one_buf,
-				    file_priv);
-}
-
-int drm_legacy_dma_ioctl(struct drm_device *dev, void *data,
-		  struct drm_file *file_priv)
-{
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (dev->driver->dma_ioctl)
-		return dev->driver->dma_ioctl(dev, data, file_priv);
-	else
-		return -EINVAL;
-}
-
-struct drm_local_map *drm_legacy_getsarea(struct drm_device *dev)
-{
-	struct drm_map_list *entry;
-
-	list_for_each_entry(entry, &dev->maplist, head) {
-		if (entry->map && entry->map->type == _DRM_SHM &&
-		    (entry->map->flags & _DRM_CONTAINS_LOCK)) {
-			return entry->map;
-		}
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(drm_legacy_getsarea);
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
deleted file mode 100644
index a0fc779e5e1e8..0000000000000
--- a/drivers/gpu/drm/drm_context.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * Legacy: Generic DRM Contexts
- *
- * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Author: Rickard E. (Rik) Faith <faith@valinux.com>
- * Author: Gareth Hughes <gareth@valinux.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-#include <drm/drm_print.h>
-
-#include "drm_legacy.h"
-
-struct drm_ctx_list {
-	struct list_head head;
-	drm_context_t handle;
-	struct drm_file *tag;
-};
-
-/******************************************************************/
-/** \name Context bitmap support */
-/*@{*/
-
-/*
- * Free a handle from the context bitmap.
- *
- * \param dev DRM device.
- * \param ctx_handle context handle.
- *
- * Clears the bit specified by \p ctx_handle in drm_device::ctx_bitmap and the entry
- * in drm_device::ctx_idr, while holding the drm_device::struct_mutex
- * lock.
- */
-void drm_legacy_ctxbitmap_free(struct drm_device * dev, int ctx_handle)
-{
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	mutex_lock(&dev->struct_mutex);
-	idr_remove(&dev->ctx_idr, ctx_handle);
-	mutex_unlock(&dev->struct_mutex);
-}
-
-/*
- * Context bitmap allocation.
- *
- * \param dev DRM device.
- * \return (non-negative) context handle on success or a negative number on failure.
- *
- * Allocate a new idr from drm_device::ctx_idr while holding the
- * drm_device::struct_mutex lock.
- */
-static int drm_legacy_ctxbitmap_next(struct drm_device * dev)
-{
-	int ret;
-
-	mutex_lock(&dev->struct_mutex);
-	ret = idr_alloc(&dev->ctx_idr, NULL, DRM_RESERVED_CONTEXTS, 0,
-			GFP_KERNEL);
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
-}
-
-/*
- * Context bitmap initialization.
- *
- * \param dev DRM device.
- *
- * Initialise the drm_device::ctx_idr
- */
-void drm_legacy_ctxbitmap_init(struct drm_device * dev)
-{
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	idr_init(&dev->ctx_idr);
-}
-
-/*
- * Context bitmap cleanup.
- *
- * \param dev DRM device.
- *
- * Free all idr members using drm_ctx_sarea_free helper function
- * while holding the drm_device::struct_mutex lock.
- */
-void drm_legacy_ctxbitmap_cleanup(struct drm_device * dev)
-{
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	mutex_lock(&dev->struct_mutex);
-	idr_destroy(&dev->ctx_idr);
-	mutex_unlock(&dev->struct_mutex);
-}
-
-/**
- * drm_legacy_ctxbitmap_flush() - Flush all contexts owned by a file
- * @dev: DRM device to operate on
- * @file: Open file to flush contexts for
- *
- * This iterates over all contexts on @dev and drops them if they're owned by
- * @file. Note that after this call returns, new contexts might be added if
- * the file is still alive.
- */
-void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file)
-{
-	struct drm_ctx_list *pos, *tmp;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	mutex_lock(&dev->ctxlist_mutex);
-
-	list_for_each_entry_safe(pos, tmp, &dev->ctxlist, head) {
-		if (pos->tag == file &&
-		    pos->handle != DRM_KERNEL_CONTEXT) {
-			if (dev->driver->context_dtor)
-				dev->driver->context_dtor(dev, pos->handle);
-
-			drm_legacy_ctxbitmap_free(dev, pos->handle);
-			list_del(&pos->head);
-			kfree(pos);
-		}
-	}
-
-	mutex_unlock(&dev->ctxlist_mutex);
-}
-
-/*@}*/
-
-/******************************************************************/
-/** \name Per Context SAREA Support */
-/*@{*/
-
-/*
- * Get per-context SAREA.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx_priv_map structure.
- * \return zero on success or a negative number on failure.
- *
- * Gets the map from drm_device::ctx_idr with the handle specified and
- * returns its handle.
- */
-int drm_legacy_getsareactx(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv)
-{
-	struct drm_ctx_priv_map *request = data;
-	struct drm_local_map *map;
-	struct drm_map_list *_entry;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&dev->struct_mutex);
-
-	map = idr_find(&dev->ctx_idr, request->ctx_id);
-	if (!map) {
-		mutex_unlock(&dev->struct_mutex);
-		return -EINVAL;
-	}
-
-	request->handle = NULL;
-	list_for_each_entry(_entry, &dev->maplist, head) {
-		if (_entry->map == map) {
-			request->handle =
-			    (void *)(unsigned long)_entry->user_token;
-			break;
-		}
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-
-	if (request->handle == NULL)
-		return -EINVAL;
-
-	return 0;
-}
-
-/*
- * Set per-context SAREA.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx_priv_map structure.
- * \return zero on success or a negative number on failure.
- *
- * Searches the mapping specified in \p arg and update the entry in
- * drm_device::ctx_idr with it.
- */
-int drm_legacy_setsareactx(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv)
-{
-	struct drm_ctx_priv_map *request = data;
-	struct drm_local_map *map = NULL;
-	struct drm_map_list *r_list = NULL;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(r_list, &dev->maplist, head) {
-		if (r_list->map
-		    && r_list->user_token == (unsigned long) request->handle)
-			goto found;
-	}
-      bad:
-	mutex_unlock(&dev->struct_mutex);
-	return -EINVAL;
-
-      found:
-	map = r_list->map;
-	if (!map)
-		goto bad;
-
-	if (IS_ERR(idr_replace(&dev->ctx_idr, map, request->ctx_id)))
-		goto bad;
-
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-}
-
-/*@}*/
-
-/******************************************************************/
-/** \name The actual DRM context handling routines */
-/*@{*/
-
-/*
- * Switch context.
- *
- * \param dev DRM device.
- * \param old old context handle.
- * \param new new context handle.
- * \return zero on success or a negative number on failure.
- *
- * Attempt to set drm_device::context_flag.
- */
-static int drm_context_switch(struct drm_device * dev, int old, int new)
-{
-	if (test_and_set_bit(0, &dev->context_flag)) {
-		DRM_ERROR("Reentering -- FIXME\n");
-		return -EBUSY;
-	}
-
-	DRM_DEBUG("Context switch from %d to %d\n", old, new);
-
-	if (new == dev->last_context) {
-		clear_bit(0, &dev->context_flag);
-		return 0;
-	}
-
-	return 0;
-}
-
-/*
- * Complete context switch.
- *
- * \param dev DRM device.
- * \param new new context handle.
- * \return zero on success or a negative number on failure.
- *
- * Updates drm_device::last_context and drm_device::last_switch. Verifies the
- * hardware lock is held, clears the drm_device::context_flag and wakes up
- * drm_device::context_wait.
- */
-static int drm_context_switch_complete(struct drm_device *dev,
-				       struct drm_file *file_priv, int new)
-{
-	dev->last_context = new;	/* PRE/POST: This is the _only_ writer. */
-
-	if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock)) {
-		DRM_ERROR("Lock isn't held after context switch\n");
-	}
-
-	/* If a context switch is ever initiated
-	   when the kernel holds the lock, release
-	   that lock here.
-	 */
-	clear_bit(0, &dev->context_flag);
-
-	return 0;
-}
-
-/*
- * Reserve contexts.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx_res structure.
- * \return zero on success or a negative number on failure.
- */
-int drm_legacy_resctx(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_ctx_res *res = data;
-	struct drm_ctx ctx;
-	int i;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (res->count >= DRM_RESERVED_CONTEXTS) {
-		memset(&ctx, 0, sizeof(ctx));
-		for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) {
-			ctx.handle = i;
-			if (copy_to_user(&res->contexts[i], &ctx, sizeof(ctx)))
-				return -EFAULT;
-		}
-	}
-	res->count = DRM_RESERVED_CONTEXTS;
-
-	return 0;
-}
-
-/*
- * Add context.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx structure.
- * \return zero on success or a negative number on failure.
- *
- * Get a new handle for the context and copy to userspace.
- */
-int drm_legacy_addctx(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_ctx_list *ctx_entry;
-	struct drm_ctx *ctx = data;
-	int tmp_handle;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	tmp_handle = drm_legacy_ctxbitmap_next(dev);
-	if (tmp_handle == DRM_KERNEL_CONTEXT) {
-		/* Skip kernel's context and get a new one. */
-		tmp_handle = drm_legacy_ctxbitmap_next(dev);
-	}
-	DRM_DEBUG("%d\n", tmp_handle);
-	if (tmp_handle < 0) {
-		DRM_DEBUG("Not enough free contexts.\n");
-		/* Should this return -EBUSY instead? */
-		return tmp_handle;
-	}
-
-	ctx->handle = tmp_handle;
-
-	ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL);
-	if (!ctx_entry) {
-		DRM_DEBUG("out of memory\n");
-		return -ENOMEM;
-	}
-
-	INIT_LIST_HEAD(&ctx_entry->head);
-	ctx_entry->handle = ctx->handle;
-	ctx_entry->tag = file_priv;
-
-	mutex_lock(&dev->ctxlist_mutex);
-	list_add(&ctx_entry->head, &dev->ctxlist);
-	mutex_unlock(&dev->ctxlist_mutex);
-
-	return 0;
-}
-
-/*
- * Get context.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx structure.
- * \return zero on success or a negative number on failure.
- */
-int drm_legacy_getctx(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_ctx *ctx = data;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	/* This is 0, because we don't handle any context flags */
-	ctx->flags = 0;
-
-	return 0;
-}
-
-/*
- * Switch context.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx structure.
- * \return zero on success or a negative number on failure.
- *
- * Calls context_switch().
- */
-int drm_legacy_switchctx(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv)
-{
-	struct drm_ctx *ctx = data;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	DRM_DEBUG("%d\n", ctx->handle);
-	return drm_context_switch(dev, dev->last_context, ctx->handle);
-}
-
-/*
- * New context.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx structure.
- * \return zero on success or a negative number on failure.
- *
- * Calls context_switch_complete().
- */
-int drm_legacy_newctx(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_ctx *ctx = data;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	DRM_DEBUG("%d\n", ctx->handle);
-	drm_context_switch_complete(dev, file_priv, ctx->handle);
-
-	return 0;
-}
-
-/*
- * Remove context.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument pointing to a drm_ctx structure.
- * \return zero on success or a negative number on failure.
- *
- * If not the special kernel context, calls ctxbitmap_free() to free the specified context.
- */
-int drm_legacy_rmctx(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
-{
-	struct drm_ctx *ctx = data;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	DRM_DEBUG("%d\n", ctx->handle);
-	if (ctx->handle != DRM_KERNEL_CONTEXT) {
-		if (dev->driver->context_dtor)
-			dev->driver->context_dtor(dev, ctx->handle);
-		drm_legacy_ctxbitmap_free(dev, ctx->handle);
-	}
-
-	mutex_lock(&dev->ctxlist_mutex);
-	if (!list_empty(&dev->ctxlist)) {
-		struct drm_ctx_list *pos, *n;
-
-		list_for_each_entry_safe(pos, n, &dev->ctxlist, head) {
-			if (pos->handle == ctx->handle) {
-				list_del(&pos->head);
-				kfree(pos);
-			}
-		}
-	}
-	mutex_unlock(&dev->ctxlist_mutex);
-
-	return 0;
-}
-
-/*@}*/
diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c
deleted file mode 100644
index eb6b741a6f995..0000000000000
--- a/drivers/gpu/drm/drm_dma.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * \file drm_dma.c
- * DMA IOCTL and function support
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com
- *
- * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/export.h>
-#include <linux/pci.h>
-
-#include <drm/drm_drv.h>
-#include <drm/drm_print.h>
-
-#include "drm_legacy.h"
-
-/**
- * drm_legacy_dma_setup() - Initialize the DMA data.
- *
- * @dev: DRM device.
- * Return: zero on success or a negative value on failure.
- *
- * Allocate and initialize a drm_device_dma structure.
- */
-int drm_legacy_dma_setup(struct drm_device *dev)
-{
-	int i;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) ||
-	    !drm_core_check_feature(dev, DRIVER_LEGACY))
-		return 0;
-
-	dev->buf_use = 0;
-	atomic_set(&dev->buf_alloc, 0);
-
-	dev->dma = kzalloc(sizeof(*dev->dma), GFP_KERNEL);
-	if (!dev->dma)
-		return -ENOMEM;
-
-	for (i = 0; i <= DRM_MAX_ORDER; i++)
-		memset(&dev->dma->bufs[i], 0, sizeof(dev->dma->bufs[0]));
-
-	return 0;
-}
-
-/**
- * drm_legacy_dma_takedown() - Cleanup the DMA resources.
- *
- * @dev: DRM device.
- *
- * Free all pages associated with DMA buffers, the buffers and pages lists, and
- * finally the drm_device::dma structure itself.
- */
-void drm_legacy_dma_takedown(struct drm_device *dev)
-{
-	struct drm_device_dma *dma = dev->dma;
-	drm_dma_handle_t *dmah;
-	int i, j;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) ||
-	    !drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	if (!dma)
-		return;
-
-	/* Clear dma buffers */
-	for (i = 0; i <= DRM_MAX_ORDER; i++) {
-		if (dma->bufs[i].seg_count) {
-			DRM_DEBUG("order %d: buf_count = %d,"
-				  " seg_count = %d\n",
-				  i,
-				  dma->bufs[i].buf_count,
-				  dma->bufs[i].seg_count);
-			for (j = 0; j < dma->bufs[i].seg_count; j++) {
-				if (dma->bufs[i].seglist[j]) {
-					dmah = dma->bufs[i].seglist[j];
-					dma_free_coherent(dev->dev,
-							  dmah->size,
-							  dmah->vaddr,
-							  dmah->busaddr);
-					kfree(dmah);
-				}
-			}
-			kfree(dma->bufs[i].seglist);
-		}
-		if (dma->bufs[i].buf_count) {
-			for (j = 0; j < dma->bufs[i].buf_count; j++) {
-				kfree(dma->bufs[i].buflist[j].dev_private);
-			}
-			kfree(dma->bufs[i].buflist);
-		}
-	}
-
-	kfree(dma->buflist);
-	kfree(dma->pagelist);
-	kfree(dev->dma);
-	dev->dma = NULL;
-}
-
-/**
- * drm_legacy_free_buffer() - Free a buffer.
- *
- * @dev: DRM device.
- * @buf: buffer to free.
- *
- * Resets the fields of \p buf.
- */
-void drm_legacy_free_buffer(struct drm_device *dev, struct drm_buf * buf)
-{
-	if (!buf)
-		return;
-
-	buf->waiting = 0;
-	buf->pending = 0;
-	buf->file_priv = NULL;
-	buf->used = 0;
-}
-
-/**
- * drm_legacy_reclaim_buffers() - Reclaim the buffers.
- *
- * @dev: DRM device.
- * @file_priv: DRM file private.
- *
- * Frees each buffer associated with \p file_priv not already on the hardware.
- */
-void drm_legacy_reclaim_buffers(struct drm_device *dev,
-				struct drm_file *file_priv)
-{
-	struct drm_device_dma *dma = dev->dma;
-	int i;
-
-	if (!dma)
-		return;
-	for (i = 0; i < dma->buf_count; i++) {
-		if (dma->buflist[i]->file_priv == file_priv) {
-			switch (dma->buflist[i]->list) {
-			case DRM_LIST_NONE:
-				drm_legacy_free_buffer(dev, dma->buflist[i]);
-				break;
-			case DRM_LIST_WAIT:
-				dma->buflist[i]->list = DRM_LIST_RECLAIM;
-				break;
-			default:
-				/* Buffer already on hardware. */
-				break;
-			}
-		}
-	}
-}
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
deleted file mode 100644
index 60afa18655599..0000000000000
--- a/drivers/gpu/drm/drm_hashtab.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- *
- **************************************************************************/
-/*
- * Simple open hash tab implementation.
- *
- * Authors:
- * Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-#include <linux/hash.h>
-#include <linux/mm.h>
-#include <linux/rculist.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <drm/drm_print.h>
-
-#include "drm_legacy.h"
-
-int drm_ht_create(struct drm_open_hash *ht, unsigned int order)
-{
-	unsigned int size = 1 << order;
-
-	ht->order = order;
-	ht->table = NULL;
-	if (size <= PAGE_SIZE / sizeof(*ht->table))
-		ht->table = kcalloc(size, sizeof(*ht->table), GFP_KERNEL);
-	else
-		ht->table = vzalloc(array_size(size, sizeof(*ht->table)));
-	if (!ht->table) {
-		DRM_ERROR("Out of memory for hash table\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key)
-{
-	struct drm_hash_item *entry;
-	struct hlist_head *h_list;
-	unsigned int hashed_key;
-	int count = 0;
-
-	hashed_key = hash_long(key, ht->order);
-	DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key);
-	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry(entry, h_list, head)
-		DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key);
-}
-
-static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht,
-					  unsigned long key)
-{
-	struct drm_hash_item *entry;
-	struct hlist_head *h_list;
-	unsigned int hashed_key;
-
-	hashed_key = hash_long(key, ht->order);
-	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry(entry, h_list, head) {
-		if (entry->key == key)
-			return &entry->head;
-		if (entry->key > key)
-			break;
-	}
-	return NULL;
-}
-
-static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht,
-					      unsigned long key)
-{
-	struct drm_hash_item *entry;
-	struct hlist_head *h_list;
-	unsigned int hashed_key;
-
-	hashed_key = hash_long(key, ht->order);
-	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry_rcu(entry, h_list, head) {
-		if (entry->key == key)
-			return &entry->head;
-		if (entry->key > key)
-			break;
-	}
-	return NULL;
-}
-
-int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
-{
-	struct drm_hash_item *entry;
-	struct hlist_head *h_list;
-	struct hlist_node *parent;
-	unsigned int hashed_key;
-	unsigned long key = item->key;
-
-	hashed_key = hash_long(key, ht->order);
-	h_list = &ht->table[hashed_key];
-	parent = NULL;
-	hlist_for_each_entry(entry, h_list, head) {
-		if (entry->key == key)
-			return -EINVAL;
-		if (entry->key > key)
-			break;
-		parent = &entry->head;
-	}
-	if (parent) {
-		hlist_add_behind_rcu(&item->head, parent);
-	} else {
-		hlist_add_head_rcu(&item->head, h_list);
-	}
-	return 0;
-}
-
-/*
- * Just insert an item and return any "bits" bit key that hasn't been
- * used before.
- */
-int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item,
-			      unsigned long seed, int bits, int shift,
-			      unsigned long add)
-{
-	int ret;
-	unsigned long mask = (1UL << bits) - 1;
-	unsigned long first, unshifted_key;
-
-	unshifted_key = hash_long(seed, bits);
-	first = unshifted_key;
-	do {
-		item->key = (unshifted_key << shift) + add;
-		ret = drm_ht_insert_item(ht, item);
-		if (ret)
-			unshifted_key = (unshifted_key + 1) & mask;
-	} while(ret && (unshifted_key != first));
-
-	if (ret) {
-		DRM_ERROR("Available key bit space exhausted\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key,
-		     struct drm_hash_item **item)
-{
-	struct hlist_node *list;
-
-	list = drm_ht_find_key_rcu(ht, key);
-	if (!list)
-		return -EINVAL;
-
-	*item = hlist_entry(list, struct drm_hash_item, head);
-	return 0;
-}
-
-int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key)
-{
-	struct hlist_node *list;
-
-	list = drm_ht_find_key(ht, key);
-	if (list) {
-		hlist_del_init_rcu(list);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item)
-{
-	hlist_del_init_rcu(&item->head);
-	return 0;
-}
-
-void drm_ht_remove(struct drm_open_hash *ht)
-{
-	if (ht->table) {
-		kvfree(ht->table);
-		ht->table = NULL;
-	}
-}
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index fa08a313127e2..8e4faf0a28e6c 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -121,11 +121,6 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data,
 /* drm_irq.c */
 
 /* IOCTLS */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int drm_legacy_irq_control(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv);
-#endif
-
 int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
deleted file mode 100644
index d327638e15ee7..0000000000000
--- a/drivers/gpu/drm/drm_irq.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * drm_irq.c IRQ and vblank support
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com
- *
- * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <linux/export.h>
-#include <linux/interrupt.h>	/* For task queue support */
-#include <linux/pci.h>
-#include <linux/vgaarb.h>
-
-#include <drm/drm.h>
-#include <drm/drm_device.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_legacy.h>
-#include <drm/drm_print.h>
-#include <drm/drm_vblank.h>
-
-#include "drm_internal.h"
-
-static int drm_legacy_irq_install(struct drm_device *dev, int irq)
-{
-	int ret;
-	unsigned long sh_flags = 0;
-
-	if (irq == 0)
-		return -EINVAL;
-
-	if (dev->irq_enabled)
-		return -EBUSY;
-	dev->irq_enabled = true;
-
-	DRM_DEBUG("irq=%d\n", irq);
-
-	/* Before installing handler */
-	if (dev->driver->irq_preinstall)
-		dev->driver->irq_preinstall(dev);
-
-	/* PCI devices require shared interrupts. */
-	if (dev_is_pci(dev->dev))
-		sh_flags = IRQF_SHARED;
-
-	ret = request_irq(irq, dev->driver->irq_handler,
-			  sh_flags, dev->driver->name, dev);
-
-	if (ret < 0) {
-		dev->irq_enabled = false;
-		return ret;
-	}
-
-	/* After installing handler */
-	if (dev->driver->irq_postinstall)
-		ret = dev->driver->irq_postinstall(dev);
-
-	if (ret < 0) {
-		dev->irq_enabled = false;
-		if (drm_core_check_feature(dev, DRIVER_LEGACY))
-			vga_client_unregister(to_pci_dev(dev->dev));
-		free_irq(irq, dev);
-	} else {
-		dev->irq = irq;
-	}
-
-	return ret;
-}
-
-int drm_legacy_irq_uninstall(struct drm_device *dev)
-{
-	unsigned long irqflags;
-	bool irq_enabled;
-	int i;
-
-	irq_enabled = dev->irq_enabled;
-	dev->irq_enabled = false;
-
-	/*
-	 * Wake up any waiters so they don't hang. This is just to paper over
-	 * issues for UMS drivers which aren't in full control of their
-	 * vblank/irq handling. KMS drivers must ensure that vblanks are all
-	 * disabled when uninstalling the irq handler.
-	 */
-	if (drm_dev_has_vblank(dev)) {
-		spin_lock_irqsave(&dev->vbl_lock, irqflags);
-		for (i = 0; i < dev->num_crtcs; i++) {
-			struct drm_vblank_crtc *vblank = &dev->vblank[i];
-
-			if (!vblank->enabled)
-				continue;
-
-			WARN_ON(drm_core_check_feature(dev, DRIVER_MODESET));
-
-			drm_vblank_disable_and_save(dev, i);
-			wake_up(&vblank->queue);
-		}
-		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
-	}
-
-	if (!irq_enabled)
-		return -EINVAL;
-
-	DRM_DEBUG("irq=%d\n", dev->irq);
-
-	if (drm_core_check_feature(dev, DRIVER_LEGACY))
-		vga_client_unregister(to_pci_dev(dev->dev));
-
-	if (dev->driver->irq_uninstall)
-		dev->driver->irq_uninstall(dev);
-
-	free_irq(dev->irq, dev);
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_irq_uninstall);
-
-int drm_legacy_irq_control(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv)
-{
-	struct drm_control *ctl = data;
-	int ret = 0, irq;
-	struct pci_dev *pdev;
-
-	/* if we haven't irq we fallback for compatibility reasons -
-	 * this used to be a separate function in drm_dma.h
-	 */
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-		return 0;
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return 0;
-	/* UMS was only ever supported on pci devices. */
-	if (WARN_ON(!dev_is_pci(dev->dev)))
-		return -EINVAL;
-
-	switch (ctl->func) {
-	case DRM_INST_HANDLER:
-		pdev = to_pci_dev(dev->dev);
-		irq = pdev->irq;
-
-		if (dev->if_version < DRM_IF_VERSION(1, 2) &&
-		    ctl->irq != irq)
-			return -EINVAL;
-		mutex_lock(&dev->struct_mutex);
-		ret = drm_legacy_irq_install(dev, irq);
-		mutex_unlock(&dev->struct_mutex);
-
-		return ret;
-	case DRM_UNINST_HANDLER:
-		mutex_lock(&dev->struct_mutex);
-		ret = drm_legacy_irq_uninstall(dev);
-		mutex_unlock(&dev->struct_mutex);
-
-		return ret;
-	default:
-		return -EINVAL;
-	}
-}
diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h
deleted file mode 100644
index 70c9dba114a61..0000000000000
--- a/drivers/gpu/drm/drm_legacy.h
+++ /dev/null
@@ -1,290 +0,0 @@
-#ifndef __DRM_LEGACY_H__
-#define __DRM_LEGACY_H__
-
-/*
- * Copyright (c) 2014 David Herrmann <dh.herrmann@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This file contains legacy interfaces that modern drm drivers
- * should no longer be using. They cannot be removed as legacy
- * drivers use them, and removing them are API breaks.
- */
-#include <linux/list.h>
-
-#include <drm/drm.h>
-#include <drm/drm_device.h>
-#include <drm/drm_legacy.h>
-
-struct agp_memory;
-struct drm_buf_desc;
-struct drm_device;
-struct drm_file;
-struct drm_hash_item;
-struct drm_open_hash;
-
-/*
- * Hash-table Support
- */
-
-#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member)
-
-/* drm_hashtab.c */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int drm_ht_create(struct drm_open_hash *ht, unsigned int order);
-int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item);
-int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item,
-			      unsigned long seed, int bits, int shift,
-			      unsigned long add);
-int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item);
-
-void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key);
-int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key);
-int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item);
-void drm_ht_remove(struct drm_open_hash *ht);
-#endif
-
-/*
- * RCU-safe interface
- *
- * The user of this API needs to make sure that two or more instances of the
- * hash table manipulation functions are never run simultaneously.
- * The lookup function drm_ht_find_item_rcu may, however, run simultaneously
- * with any of the manipulation functions as long as it's called from within
- * an RCU read-locked section.
- */
-#define drm_ht_insert_item_rcu drm_ht_insert_item
-#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please
-#define drm_ht_remove_key_rcu drm_ht_remove_key
-#define drm_ht_remove_item_rcu drm_ht_remove_item
-#define drm_ht_find_item_rcu drm_ht_find_item
-
-/*
- * Generic DRM Contexts
- */
-
-#define DRM_KERNEL_CONTEXT		0
-#define DRM_RESERVED_CONTEXTS		1
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_ctxbitmap_init(struct drm_device *dev);
-void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev);
-void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file);
-#else
-static inline void drm_legacy_ctxbitmap_init(struct drm_device *dev) {}
-static inline void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev) {}
-static inline void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) {}
-#endif
-
-void drm_legacy_ctxbitmap_free(struct drm_device *dev, int ctx_handle);
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int drm_legacy_resctx(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_addctx(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_getctx(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_switchctx(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_newctx(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_rmctx(struct drm_device *d, void *v, struct drm_file *f);
-
-int drm_legacy_setsareactx(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_getsareactx(struct drm_device *d, void *v, struct drm_file *f);
-#endif
-
-/*
- * Generic Buffer Management
- */
-
-#define DRM_MAP_HASH_OFFSET 0x10000000
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-static inline int drm_legacy_create_map_hash(struct drm_device *dev)
-{
-	return drm_ht_create(&dev->map_hash, 12);
-}
-
-static inline void drm_legacy_remove_map_hash(struct drm_device *dev)
-{
-	drm_ht_remove(&dev->map_hash);
-}
-#else
-static inline int drm_legacy_create_map_hash(struct drm_device *dev)
-{
-	return 0;
-}
-
-static inline void drm_legacy_remove_map_hash(struct drm_device *dev) {}
-#endif
-
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv);
-int drm_legacy_addmap_ioctl(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_rmmap_ioctl(struct drm_device *d, void *v, struct drm_file *f);
-
-int drm_legacy_addbufs(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_infobufs(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_markbufs(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_freebufs(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_mapbufs(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_dma_ioctl(struct drm_device *d, void *v, struct drm_file *f);
-#endif
-
-int __drm_legacy_infobufs(struct drm_device *, void *, int *,
-			  int (*)(void *, int, struct drm_buf_entry *));
-int __drm_legacy_mapbufs(struct drm_device *, void *, int *,
-			  void __user **,
-			  int (*)(void *, int, unsigned long, struct drm_buf *),
-			  struct drm_file *);
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_master_rmmaps(struct drm_device *dev,
-			      struct drm_master *master);
-void drm_legacy_rmmaps(struct drm_device *dev);
-#else
-static inline void drm_legacy_master_rmmaps(struct drm_device *dev,
-					    struct drm_master *master) {}
-static inline void drm_legacy_rmmaps(struct drm_device *dev) {}
-#endif
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_vma_flush(struct drm_device *d);
-#else
-static inline void drm_legacy_vma_flush(struct drm_device *d)
-{
-	/* do nothing */
-}
-#endif
-
-/*
- * AGP Support
- */
-
-struct drm_agp_mem {
-	unsigned long handle;
-	struct agp_memory *memory;
-	unsigned long bound;
-	int pages;
-	struct list_head head;
-};
-
-/* drm_agpsupport.c */
-#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_AGP)
-void drm_legacy_agp_clear(struct drm_device *dev);
-
-int drm_legacy_agp_acquire_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv);
-int drm_legacy_agp_release_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv);
-int drm_legacy_agp_enable_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-int drm_legacy_agp_info_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int drm_legacy_agp_alloc_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv);
-int drm_legacy_agp_free_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int drm_legacy_agp_unbind_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-int drm_legacy_agp_bind_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-#else
-static inline void drm_legacy_agp_clear(struct drm_device *dev) {}
-#endif
-
-/* drm_lock.c */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int drm_legacy_lock(struct drm_device *d, void *v, struct drm_file *f);
-int drm_legacy_unlock(struct drm_device *d, void *v, struct drm_file *f);
-void drm_legacy_lock_release(struct drm_device *dev, struct file *filp);
-#else
-static inline void drm_legacy_lock_release(struct drm_device *dev, struct file *filp) {}
-#endif
-
-/* DMA support */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int drm_legacy_dma_setup(struct drm_device *dev);
-void drm_legacy_dma_takedown(struct drm_device *dev);
-#else
-static inline int drm_legacy_dma_setup(struct drm_device *dev)
-{
-	return 0;
-}
-#endif
-
-void drm_legacy_free_buffer(struct drm_device *dev,
-			    struct drm_buf * buf);
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_reclaim_buffers(struct drm_device *dev,
-				struct drm_file *filp);
-#else
-static inline void drm_legacy_reclaim_buffers(struct drm_device *dev,
-					      struct drm_file *filp) {}
-#endif
-
-/* Scatter Gather Support */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_sg_cleanup(struct drm_device *dev);
-int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int drm_legacy_sg_free(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv);
-#endif
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_init_members(struct drm_device *dev);
-void drm_legacy_destroy_members(struct drm_device *dev);
-void drm_legacy_dev_reinit(struct drm_device *dev);
-int drm_legacy_setup(struct drm_device * dev);
-#else
-static inline void drm_legacy_init_members(struct drm_device *dev) {}
-static inline void drm_legacy_destroy_members(struct drm_device *dev) {}
-static inline void drm_legacy_dev_reinit(struct drm_device *dev) {}
-static inline int drm_legacy_setup(struct drm_device * dev) { return 0; }
-#endif
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master);
-#else
-static inline void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master) {}
-#endif
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_master_legacy_init(struct drm_master *master);
-#else
-static inline void drm_master_legacy_init(struct drm_master *master) {}
-#endif
-
-/* drm_pci.c */
-#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_PCI)
-int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, struct drm_file *file_priv);
-void drm_legacy_pci_agp_destroy(struct drm_device *dev);
-#else
-static inline int drm_legacy_irq_by_busid(struct drm_device *dev, void *data,
-					  struct drm_file *file_priv)
-{
-	return -EINVAL;
-}
-
-static inline void drm_legacy_pci_agp_destroy(struct drm_device *dev) {}
-#endif
-
-#endif /* __DRM_LEGACY_H__ */
diff --git a/drivers/gpu/drm/drm_legacy_misc.c b/drivers/gpu/drm/drm_legacy_misc.c
deleted file mode 100644
index d4c5434062d7c..0000000000000
--- a/drivers/gpu/drm/drm_legacy_misc.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * \file drm_legacy_misc.c
- * Misc legacy support functions.
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Created: Tue Feb  2 08:37:54 1999 by faith@valinux.com
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <drm/drm_device.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_print.h>
-
-#include "drm_internal.h"
-#include "drm_legacy.h"
-
-void drm_legacy_init_members(struct drm_device *dev)
-{
-	INIT_LIST_HEAD(&dev->ctxlist);
-	INIT_LIST_HEAD(&dev->vmalist);
-	INIT_LIST_HEAD(&dev->maplist);
-	spin_lock_init(&dev->buf_lock);
-	mutex_init(&dev->ctxlist_mutex);
-}
-
-void drm_legacy_destroy_members(struct drm_device *dev)
-{
-	mutex_destroy(&dev->ctxlist_mutex);
-}
-
-int drm_legacy_setup(struct drm_device * dev)
-{
-	int ret;
-
-	if (dev->driver->firstopen &&
-	    drm_core_check_feature(dev, DRIVER_LEGACY)) {
-		ret = dev->driver->firstopen(dev);
-		if (ret != 0)
-			return ret;
-	}
-
-	ret = drm_legacy_dma_setup(dev);
-	if (ret < 0)
-		return ret;
-
-
-	DRM_DEBUG("\n");
-	return 0;
-}
-
-void drm_legacy_dev_reinit(struct drm_device *dev)
-{
-	if (dev->irq_enabled)
-		drm_legacy_irq_uninstall(dev);
-
-	mutex_lock(&dev->struct_mutex);
-
-	drm_legacy_agp_clear(dev);
-
-	drm_legacy_sg_cleanup(dev);
-	drm_legacy_vma_flush(dev);
-	drm_legacy_dma_takedown(dev);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	dev->sigdata.lock = NULL;
-
-	dev->context_flag = 0;
-	dev->last_context = 0;
-	dev->if_version = 0;
-
-	DRM_DEBUG("lastclose completed\n");
-}
-
-void drm_master_legacy_init(struct drm_master *master)
-{
-	spin_lock_init(&master->lock.spinlock);
-	init_waitqueue_head(&master->lock.lock_queue);
-}
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
deleted file mode 100644
index 1efbd5389d893..0000000000000
--- a/drivers/gpu/drm/drm_lock.c
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * \file drm_lock.c
- * IOCTLs for locking
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Created: Tue Feb  2 08:37:54 1999 by faith@valinux.com
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/export.h>
-#include <linux/sched/signal.h>
-
-#include <drm/drm.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-#include <drm/drm_print.h>
-
-#include "drm_internal.h"
-#include "drm_legacy.h"
-
-static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
-
-/*
- * Take the heavyweight lock.
- *
- * \param lock lock pointer.
- * \param context locking context.
- * \return one if the lock is held, or zero otherwise.
- *
- * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction.
- */
-static
-int drm_lock_take(struct drm_lock_data *lock_data,
-		  unsigned int context)
-{
-	unsigned int old, new, prev;
-	volatile unsigned int *lock = &lock_data->hw_lock->lock;
-
-	spin_lock_bh(&lock_data->spinlock);
-	do {
-		old = *lock;
-		if (old & _DRM_LOCK_HELD)
-			new = old | _DRM_LOCK_CONT;
-		else {
-			new = context | _DRM_LOCK_HELD |
-				((lock_data->user_waiters + lock_data->kernel_waiters > 1) ?
-				 _DRM_LOCK_CONT : 0);
-		}
-		prev = cmpxchg(lock, old, new);
-	} while (prev != old);
-	spin_unlock_bh(&lock_data->spinlock);
-
-	if (_DRM_LOCKING_CONTEXT(old) == context) {
-		if (old & _DRM_LOCK_HELD) {
-			if (context != DRM_KERNEL_CONTEXT) {
-				DRM_ERROR("%d holds heavyweight lock\n",
-					  context);
-			}
-			return 0;
-		}
-	}
-
-	if ((_DRM_LOCKING_CONTEXT(new)) == context && (new & _DRM_LOCK_HELD)) {
-		/* Have lock */
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * This takes a lock forcibly and hands it to context.	Should ONLY be used
- * inside *_unlock to give lock to kernel before calling *_dma_schedule.
- *
- * \param dev DRM device.
- * \param lock lock pointer.
- * \param context locking context.
- * \return always one.
- *
- * Resets the lock file pointer.
- * Marks the lock as held by the given context, via the \p cmpxchg instruction.
- */
-static int drm_lock_transfer(struct drm_lock_data *lock_data,
-			     unsigned int context)
-{
-	unsigned int old, new, prev;
-	volatile unsigned int *lock = &lock_data->hw_lock->lock;
-
-	lock_data->file_priv = NULL;
-	do {
-		old = *lock;
-		new = context | _DRM_LOCK_HELD;
-		prev = cmpxchg(lock, old, new);
-	} while (prev != old);
-	return 1;
-}
-
-static int drm_legacy_lock_free(struct drm_lock_data *lock_data,
-				unsigned int context)
-{
-	unsigned int old, new, prev;
-	volatile unsigned int *lock = &lock_data->hw_lock->lock;
-
-	spin_lock_bh(&lock_data->spinlock);
-	if (lock_data->kernel_waiters != 0) {
-		drm_lock_transfer(lock_data, 0);
-		lock_data->idle_has_lock = 1;
-		spin_unlock_bh(&lock_data->spinlock);
-		return 1;
-	}
-	spin_unlock_bh(&lock_data->spinlock);
-
-	do {
-		old = *lock;
-		new = _DRM_LOCKING_CONTEXT(old);
-		prev = cmpxchg(lock, old, new);
-	} while (prev != old);
-
-	if (_DRM_LOCK_IS_HELD(old) && _DRM_LOCKING_CONTEXT(old) != context) {
-		DRM_ERROR("%d freed heavyweight lock held by %d\n",
-			  context, _DRM_LOCKING_CONTEXT(old));
-		return 1;
-	}
-	wake_up_interruptible(&lock_data->lock_queue);
-	return 0;
-}
-
-/*
- * Lock ioctl.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument, pointing to a drm_lock structure.
- * \return zero on success or negative number on failure.
- *
- * Add the current task to the lock wait queue, and attempt to take to lock.
- */
-int drm_legacy_lock(struct drm_device *dev, void *data,
-		    struct drm_file *file_priv)
-{
-	DECLARE_WAITQUEUE(entry, current);
-	struct drm_lock *lock = data;
-	struct drm_master *master = file_priv->master;
-	int ret = 0;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	++file_priv->lock_count;
-
-	if (lock->context == DRM_KERNEL_CONTEXT) {
-		DRM_ERROR("Process %d using kernel context %d\n",
-			  task_pid_nr(current), lock->context);
-		return -EINVAL;
-	}
-
-	DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n",
-		  lock->context, task_pid_nr(current),
-		  master->lock.hw_lock ? master->lock.hw_lock->lock : -1,
-		  lock->flags);
-
-	add_wait_queue(&master->lock.lock_queue, &entry);
-	spin_lock_bh(&master->lock.spinlock);
-	master->lock.user_waiters++;
-	spin_unlock_bh(&master->lock.spinlock);
-
-	for (;;) {
-		__set_current_state(TASK_INTERRUPTIBLE);
-		if (!master->lock.hw_lock) {
-			/* Device has been unregistered */
-			send_sig(SIGTERM, current, 0);
-			ret = -EINTR;
-			break;
-		}
-		if (drm_lock_take(&master->lock, lock->context)) {
-			master->lock.file_priv = file_priv;
-			master->lock.lock_time = jiffies;
-			break;	/* Got lock */
-		}
-
-		/* Contention */
-		mutex_unlock(&drm_global_mutex);
-		schedule();
-		mutex_lock(&drm_global_mutex);
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-	}
-	spin_lock_bh(&master->lock.spinlock);
-	master->lock.user_waiters--;
-	spin_unlock_bh(&master->lock.spinlock);
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&master->lock.lock_queue, &entry);
-
-	DRM_DEBUG("%d %s\n", lock->context,
-		  ret ? "interrupted" : "has lock");
-	if (ret) return ret;
-
-	/* don't set the block all signals on the master process for now 
-	 * really probably not the correct answer but lets us debug xkb
- 	 * xserver for now */
-	if (!drm_is_current_master(file_priv)) {
-		dev->sigdata.context = lock->context;
-		dev->sigdata.lock = master->lock.hw_lock;
-	}
-
-	if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
-	{
-		if (dev->driver->dma_quiescent(dev)) {
-			DRM_DEBUG("%d waiting for DMA quiescent\n",
-				  lock->context);
-			return -EBUSY;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Unlock ioctl.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument, pointing to a drm_lock structure.
- * \return zero on success or negative number on failure.
- *
- * Transfer and free the lock.
- */
-int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
-{
-	struct drm_lock *lock = data;
-	struct drm_master *master = file_priv->master;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (lock->context == DRM_KERNEL_CONTEXT) {
-		DRM_ERROR("Process %d using kernel context %d\n",
-			  task_pid_nr(current), lock->context);
-		return -EINVAL;
-	}
-
-	if (drm_legacy_lock_free(&master->lock, lock->context)) {
-		/* FIXME: Should really bail out here. */
-	}
-
-	return 0;
-}
-
-/*
- * This function returns immediately and takes the hw lock
- * with the kernel context if it is free, otherwise it gets the highest priority when and if
- * it is eventually released.
- *
- * This guarantees that the kernel will _eventually_ have the lock _unless_ it is held
- * by a blocked process. (In the latter case an explicit wait for the hardware lock would cause
- * a deadlock, which is why the "idlelock" was invented).
- *
- * This should be sufficient to wait for GPU idle without
- * having to worry about starvation.
- */
-void drm_legacy_idlelock_take(struct drm_lock_data *lock_data)
-{
-	int ret;
-
-	spin_lock_bh(&lock_data->spinlock);
-	lock_data->kernel_waiters++;
-	if (!lock_data->idle_has_lock) {
-
-		spin_unlock_bh(&lock_data->spinlock);
-		ret = drm_lock_take(lock_data, DRM_KERNEL_CONTEXT);
-		spin_lock_bh(&lock_data->spinlock);
-
-		if (ret == 1)
-			lock_data->idle_has_lock = 1;
-	}
-	spin_unlock_bh(&lock_data->spinlock);
-}
-EXPORT_SYMBOL(drm_legacy_idlelock_take);
-
-void drm_legacy_idlelock_release(struct drm_lock_data *lock_data)
-{
-	unsigned int old, prev;
-	volatile unsigned int *lock = &lock_data->hw_lock->lock;
-
-	spin_lock_bh(&lock_data->spinlock);
-	if (--lock_data->kernel_waiters == 0) {
-		if (lock_data->idle_has_lock) {
-			do {
-				old = *lock;
-				prev = cmpxchg(lock, old, DRM_KERNEL_CONTEXT);
-			} while (prev != old);
-			wake_up_interruptible(&lock_data->lock_queue);
-			lock_data->idle_has_lock = 0;
-		}
-	}
-	spin_unlock_bh(&lock_data->spinlock);
-}
-EXPORT_SYMBOL(drm_legacy_idlelock_release);
-
-static int drm_legacy_i_have_hw_lock(struct drm_device *dev,
-				     struct drm_file *file_priv)
-{
-	struct drm_master *master = file_priv->master;
-
-	return (file_priv->lock_count && master->lock.hw_lock &&
-		_DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) &&
-		master->lock.file_priv == file_priv);
-}
-
-void drm_legacy_lock_release(struct drm_device *dev, struct file *filp)
-{
-	struct drm_file *file_priv = filp->private_data;
-
-	/* if the master has gone away we can't do anything with the lock */
-	if (!dev->master)
-		return;
-
-	if (drm_legacy_i_have_hw_lock(dev, file_priv)) {
-		DRM_DEBUG("File %p released, freeing lock for context %d\n",
-			  filp, _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
-		drm_legacy_lock_free(&file_priv->master->lock,
-				     _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
-	}
-}
-
-void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master)
-{
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return;
-
-	/*
-	 * Since the master is disappearing, so is the
-	 * possibility to lock.
-	 */
-	mutex_lock(&dev->struct_mutex);
-	if (master->lock.hw_lock) {
-		if (dev->sigdata.lock == master->lock.hw_lock)
-			dev->sigdata.lock = NULL;
-		master->lock.hw_lock = NULL;
-		master->lock.file_priv = NULL;
-		wake_up_interruptible_all(&master->lock.lock_queue);
-	}
-	mutex_unlock(&dev->struct_mutex);
-}
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
deleted file mode 100644
index d2e1dccd8113e..0000000000000
--- a/drivers/gpu/drm/drm_memory.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * \file drm_memory.c
- * Memory management wrappers for DRM
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Created: Thu Feb  4 14:00:34 1999 by faith@valinux.com
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/export.h>
-#include <linux/highmem.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include <drm/drm_cache.h>
-#include <drm/drm_device.h>
-
-#include "drm_legacy.h"
-
-#if IS_ENABLED(CONFIG_AGP)
-
-#ifdef HAVE_PAGE_AGP
-# include <asm/agp.h>
-#else
-# ifdef __powerpc__
-#  define PAGE_AGP	pgprot_noncached_wc(PAGE_KERNEL)
-# else
-#  define PAGE_AGP	PAGE_KERNEL
-# endif
-#endif
-
-static void *agp_remap(unsigned long offset, unsigned long size,
-		       struct drm_device *dev)
-{
-	unsigned long i, num_pages =
-	    PAGE_ALIGN(size) / PAGE_SIZE;
-	struct drm_agp_mem *agpmem;
-	struct page **page_map;
-	struct page **phys_page_map;
-	void *addr;
-
-	size = PAGE_ALIGN(size);
-
-#ifdef __alpha__
-	offset -= dev->hose->mem_space->start;
-#endif
-
-	list_for_each_entry(agpmem, &dev->agp->memory, head)
-		if (agpmem->bound <= offset
-		    && (agpmem->bound + (agpmem->pages << PAGE_SHIFT)) >=
-		    (offset + size))
-			break;
-	if (&agpmem->head == &dev->agp->memory)
-		return NULL;
-
-	/*
-	 * OK, we're mapping AGP space on a chipset/platform on which memory accesses by
-	 * the CPU do not get remapped by the GART.  We fix this by using the kernel's
-	 * page-table instead (that's probably faster anyhow...).
-	 */
-	/* note: use vmalloc() because num_pages could be large... */
-	page_map = vmalloc(array_size(num_pages, sizeof(struct page *)));
-	if (!page_map)
-		return NULL;
-
-	phys_page_map = (agpmem->memory->pages + (offset - agpmem->bound) / PAGE_SIZE);
-	for (i = 0; i < num_pages; ++i)
-		page_map[i] = phys_page_map[i];
-	addr = vmap(page_map, num_pages, VM_IOREMAP, PAGE_AGP);
-	vfree(page_map);
-
-	return addr;
-}
-
-#else /*  CONFIG_AGP  */
-static inline void *agp_remap(unsigned long offset, unsigned long size,
-			      struct drm_device *dev)
-{
-	return NULL;
-}
-
-#endif /* CONFIG_AGP */
-
-void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev)
-{
-	if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
-		map->handle = agp_remap(map->offset, map->size, dev);
-	else
-		map->handle = ioremap(map->offset, map->size);
-}
-EXPORT_SYMBOL(drm_legacy_ioremap);
-
-void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev)
-{
-	if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
-		map->handle = agp_remap(map->offset, map->size, dev);
-	else
-		map->handle = ioremap_wc(map->offset, map->size);
-}
-EXPORT_SYMBOL(drm_legacy_ioremap_wc);
-
-void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev)
-{
-	if (!map->handle || !map->size)
-		return;
-
-	if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
-		vunmap(map->handle);
-	else
-		iounmap(map->handle);
-}
-EXPORT_SYMBOL(drm_legacy_ioremapfree);
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 67907dd427214..c585f1e8803ec 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -35,13 +35,6 @@
 #include <drm/drm_print.h>
 
 #include "drm_internal.h"
-#include "drm_legacy.h"
-
-#ifdef CONFIG_DRM_LEGACY
-/* List of devices hanging off drivers with stealth attach. */
-static LIST_HEAD(legacy_dev_list);
-static DEFINE_MUTEX(legacy_dev_list_lock);
-#endif
 
 static int drm_get_pci_domain(struct drm_device *dev)
 {
@@ -72,199 +65,3 @@ int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
 	master->unique_len = strlen(master->unique);
 	return 0;
 }
-
-#ifdef CONFIG_DRM_LEGACY
-
-static int drm_legacy_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p)
-{
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
-
-	if ((p->busnum >> 8) != drm_get_pci_domain(dev) ||
-	    (p->busnum & 0xff) != pdev->bus->number ||
-	    p->devnum != PCI_SLOT(pdev->devfn) || p->funcnum != PCI_FUNC(pdev->devfn))
-		return -EINVAL;
-
-	p->irq = pdev->irq;
-
-	DRM_DEBUG("%d:%d:%d => IRQ %d\n", p->busnum, p->devnum, p->funcnum,
-		  p->irq);
-	return 0;
-}
-
-/**
- * drm_legacy_irq_by_busid - Get interrupt from bus ID
- * @dev: DRM device
- * @data: IOCTL parameter pointing to a drm_irq_busid structure
- * @file_priv: DRM file private.
- *
- * Finds the PCI device with the specified bus id and gets its IRQ number.
- * This IOCTL is deprecated, and will now return EINVAL for any busid not equal
- * to that of the device that this DRM instance attached to.
- *
- * Return: 0 on success or a negative error code on failure.
- */
-int drm_legacy_irq_by_busid(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv)
-{
-	struct drm_irq_busid *p = data;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	/* UMS was only ever support on PCI devices. */
-	if (WARN_ON(!dev_is_pci(dev->dev)))
-		return -EINVAL;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-		return -EOPNOTSUPP;
-
-	return drm_legacy_pci_irq_by_busid(dev, p);
-}
-
-void drm_legacy_pci_agp_destroy(struct drm_device *dev)
-{
-	if (dev->agp) {
-		arch_phys_wc_del(dev->agp->agp_mtrr);
-		drm_legacy_agp_clear(dev);
-		kfree(dev->agp);
-		dev->agp = NULL;
-	}
-}
-
-static void drm_legacy_pci_agp_init(struct drm_device *dev)
-{
-	if (drm_core_check_feature(dev, DRIVER_USE_AGP)) {
-		if (pci_find_capability(to_pci_dev(dev->dev), PCI_CAP_ID_AGP))
-			dev->agp = drm_legacy_agp_init(dev);
-		if (dev->agp) {
-			dev->agp->agp_mtrr = arch_phys_wc_add(
-				dev->agp->agp_info.aper_base,
-				dev->agp->agp_info.aper_size *
-				1024 * 1024);
-		}
-	}
-}
-
-static int drm_legacy_get_pci_dev(struct pci_dev *pdev,
-				  const struct pci_device_id *ent,
-				  const struct drm_driver *driver)
-{
-	struct drm_device *dev;
-	int ret;
-
-	DRM_DEBUG("\n");
-
-	dev = drm_dev_alloc(driver, &pdev->dev);
-	if (IS_ERR(dev))
-		return PTR_ERR(dev);
-
-	ret = pci_enable_device(pdev);
-	if (ret)
-		goto err_free;
-
-#ifdef __alpha__
-	dev->hose = pdev->sysdata;
-#endif
-
-	drm_legacy_pci_agp_init(dev);
-
-	ret = drm_dev_register(dev, ent->driver_data);
-	if (ret)
-		goto err_agp;
-
-	if (drm_core_check_feature(dev, DRIVER_LEGACY)) {
-		mutex_lock(&legacy_dev_list_lock);
-		list_add_tail(&dev->legacy_dev_list, &legacy_dev_list);
-		mutex_unlock(&legacy_dev_list_lock);
-	}
-
-	return 0;
-
-err_agp:
-	drm_legacy_pci_agp_destroy(dev);
-	pci_disable_device(pdev);
-err_free:
-	drm_dev_put(dev);
-	return ret;
-}
-
-/**
- * drm_legacy_pci_init - shadow-attach a legacy DRM PCI driver
- * @driver: DRM device driver
- * @pdriver: PCI device driver
- *
- * This is only used by legacy dri1 drivers and deprecated.
- *
- * Return: 0 on success or a negative error code on failure.
- */
-int drm_legacy_pci_init(const struct drm_driver *driver,
-			struct pci_driver *pdriver)
-{
-	struct pci_dev *pdev = NULL;
-	const struct pci_device_id *pid;
-	int i;
-
-	DRM_DEBUG("\n");
-
-	if (WARN_ON(!(driver->driver_features & DRIVER_LEGACY)))
-		return -EINVAL;
-
-	/* If not using KMS, fall back to stealth mode manual scanning. */
-	for (i = 0; pdriver->id_table[i].vendor != 0; i++) {
-		pid = &pdriver->id_table[i];
-
-		/* Loop around setting up a DRM device for each PCI device
-		 * matching our ID and device class.  If we had the internal
-		 * function that pci_get_subsys and pci_get_class used, we'd
-		 * be able to just pass pid in instead of doing a two-stage
-		 * thing.
-		 */
-		pdev = NULL;
-		while ((pdev =
-			pci_get_subsys(pid->vendor, pid->device, pid->subvendor,
-				       pid->subdevice, pdev)) != NULL) {
-			if ((pdev->class & pid->class_mask) != pid->class)
-				continue;
-
-			/* stealth mode requires a manual probe */
-			pci_dev_get(pdev);
-			drm_legacy_get_pci_dev(pdev, pid, driver);
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL(drm_legacy_pci_init);
-
-/**
- * drm_legacy_pci_exit - unregister shadow-attach legacy DRM driver
- * @driver: DRM device driver
- * @pdriver: PCI device driver
- *
- * Unregister a DRM driver shadow-attached through drm_legacy_pci_init(). This
- * is deprecated and only used by dri1 drivers.
- */
-void drm_legacy_pci_exit(const struct drm_driver *driver,
-			 struct pci_driver *pdriver)
-{
-	struct drm_device *dev, *tmp;
-
-	DRM_DEBUG("\n");
-
-	if (!(driver->driver_features & DRIVER_LEGACY)) {
-		WARN_ON(1);
-	} else {
-		mutex_lock(&legacy_dev_list_lock);
-		list_for_each_entry_safe(dev, tmp, &legacy_dev_list,
-					 legacy_dev_list) {
-			if (dev->driver == driver) {
-				list_del(&dev->legacy_dev_list);
-				drm_put_dev(dev);
-			}
-		}
-		mutex_unlock(&legacy_dev_list_lock);
-	}
-	DRM_INFO("Module unloaded\n");
-}
-EXPORT_SYMBOL(drm_legacy_pci_exit);
-
-#endif
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
deleted file mode 100644
index f4e6184d18772..0000000000000
--- a/drivers/gpu/drm/drm_scatter.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * \file drm_scatter.c
- * IOCTLs to manage scatter/gather memory
- *
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Created: Mon Dec 18 23:20:54 2000 by gareth@valinux.com
- *
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <drm/drm.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_print.h>
-
-#include "drm_legacy.h"
-
-#define DEBUG_SCATTER 0
-
-static void drm_sg_cleanup(struct drm_sg_mem * entry)
-{
-	struct page *page;
-	int i;
-
-	for (i = 0; i < entry->pages; i++) {
-		page = entry->pagelist[i];
-		if (page)
-			ClearPageReserved(page);
-	}
-
-	vfree(entry->virtual);
-
-	kfree(entry->busaddr);
-	kfree(entry->pagelist);
-	kfree(entry);
-}
-
-void drm_legacy_sg_cleanup(struct drm_device *dev)
-{
-	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg &&
-	    drm_core_check_feature(dev, DRIVER_LEGACY)) {
-		drm_sg_cleanup(dev->sg);
-		dev->sg = NULL;
-	}
-}
-#ifdef _LP64
-# define ScatterHandle(x) (unsigned int)((x >> 32) + (x & ((1L << 32) - 1)))
-#else
-# define ScatterHandle(x) (unsigned int)(x)
-#endif
-
-int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_scatter_gather *request = data;
-	struct drm_sg_mem *entry;
-	unsigned long pages, i, j;
-
-	DRM_DEBUG("\n");
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_SG))
-		return -EOPNOTSUPP;
-
-	if (request->size > SIZE_MAX - PAGE_SIZE)
-		return -EINVAL;
-
-	if (dev->sg)
-		return -EINVAL;
-
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE;
-	DRM_DEBUG("size=%ld pages=%ld\n", request->size, pages);
-
-	entry->pages = pages;
-	entry->pagelist = kcalloc(pages, sizeof(*entry->pagelist), GFP_KERNEL);
-	if (!entry->pagelist) {
-		kfree(entry);
-		return -ENOMEM;
-	}
-
-	entry->busaddr = kcalloc(pages, sizeof(*entry->busaddr), GFP_KERNEL);
-	if (!entry->busaddr) {
-		kfree(entry->pagelist);
-		kfree(entry);
-		return -ENOMEM;
-	}
-
-	entry->virtual = vmalloc_32(pages << PAGE_SHIFT);
-	if (!entry->virtual) {
-		kfree(entry->busaddr);
-		kfree(entry->pagelist);
-		kfree(entry);
-		return -ENOMEM;
-	}
-
-	/* This also forces the mapping of COW pages, so our page list
-	 * will be valid.  Please don't remove it...
-	 */
-	memset(entry->virtual, 0, pages << PAGE_SHIFT);
-
-	entry->handle = ScatterHandle((unsigned long)entry->virtual);
-
-	DRM_DEBUG("handle  = %08lx\n", entry->handle);
-	DRM_DEBUG("virtual = %p\n", entry->virtual);
-
-	for (i = (unsigned long)entry->virtual, j = 0; j < pages;
-	     i += PAGE_SIZE, j++) {
-		entry->pagelist[j] = vmalloc_to_page((void *)i);
-		if (!entry->pagelist[j])
-			goto failed;
-		SetPageReserved(entry->pagelist[j]);
-	}
-
-	request->handle = entry->handle;
-
-	dev->sg = entry;
-
-#if DEBUG_SCATTER
-	/* Verify that each page points to its virtual address, and vice
-	 * versa.
-	 */
-	{
-		int error = 0;
-
-		for (i = 0; i < pages; i++) {
-			unsigned long *tmp;
-
-			tmp = page_address(entry->pagelist[i]);
-			for (j = 0;
-			     j < PAGE_SIZE / sizeof(unsigned long);
-			     j++, tmp++) {
-				*tmp = 0xcafebabe;
-			}
-			tmp = (unsigned long *)((u8 *) entry->virtual +
-						(PAGE_SIZE * i));
-			for (j = 0;
-			     j < PAGE_SIZE / sizeof(unsigned long);
-			     j++, tmp++) {
-				if (*tmp != 0xcafebabe && error == 0) {
-					error = 1;
-					DRM_ERROR("Scatter allocation error, "
-						  "pagelist does not match "
-						  "virtual mapping\n");
-				}
-			}
-			tmp = page_address(entry->pagelist[i]);
-			for (j = 0;
-			     j < PAGE_SIZE / sizeof(unsigned long);
-			     j++, tmp++) {
-				*tmp = 0;
-			}
-		}
-		if (error == 0)
-			DRM_ERROR("Scatter allocation matches pagelist\n");
-	}
-#endif
-
-	return 0;
-
-      failed:
-	drm_sg_cleanup(entry);
-	return -ENOMEM;
-}
-
-int drm_legacy_sg_free(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv)
-{
-	struct drm_scatter_gather *request = data;
-	struct drm_sg_mem *entry;
-
-	if (!drm_core_check_feature(dev, DRIVER_LEGACY))
-		return -EOPNOTSUPP;
-
-	if (!drm_core_check_feature(dev, DRIVER_SG))
-		return -EOPNOTSUPP;
-
-	entry = dev->sg;
-	dev->sg = NULL;
-
-	if (!entry || entry->handle != request->handle)
-		return -EINVAL;
-
-	DRM_DEBUG("virtual  = %p\n", entry->virtual);
-
-	drm_sg_cleanup(entry);
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
deleted file mode 100644
index 87c9fe55dec76..0000000000000
--- a/drivers/gpu/drm/drm_vm.c
+++ /dev/null
@@ -1,665 +0,0 @@
-/*
- * \file drm_vm.c
- * Memory mapping for DRM
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Created: Mon Jan  4 08:58:31 1999 by faith@valinux.com
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <linux/seq_file.h>
-#include <linux/vmalloc.h>
-#include <linux/pgtable.h>
-
-#if defined(__ia64__)
-#include <linux/efi.h>
-#include <linux/slab.h>
-#endif
-#include <linux/mem_encrypt.h>
-
-#include <drm/drm_device.h>
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-#include <drm/drm_framebuffer.h>
-#include <drm/drm_print.h>
-
-#include "drm_internal.h"
-#include "drm_legacy.h"
-
-struct drm_vma_entry {
-	struct list_head head;
-	struct vm_area_struct *vma;
-	pid_t pid;
-};
-
-static void drm_vm_open(struct vm_area_struct *vma);
-static void drm_vm_close(struct vm_area_struct *vma);
-
-static pgprot_t drm_io_prot(struct drm_local_map *map,
-			    struct vm_area_struct *vma)
-{
-	pgprot_t tmp = vm_get_page_prot(vma->vm_flags);
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
-    defined(__mips__) || defined(__loongarch__)
-	if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING))
-		tmp = pgprot_noncached(tmp);
-	else
-		tmp = pgprot_writecombine(tmp);
-#elif defined(__ia64__)
-	if (efi_range_is_wc(vma->vm_start, vma->vm_end -
-				    vma->vm_start))
-		tmp = pgprot_writecombine(tmp);
-	else
-		tmp = pgprot_noncached(tmp);
-#elif defined(__sparc__) || defined(__arm__)
-	tmp = pgprot_noncached(tmp);
-#endif
-	return tmp;
-}
-
-static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma)
-{
-	pgprot_t tmp = vm_get_page_prot(vma->vm_flags);
-
-#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-	tmp = pgprot_noncached_wc(tmp);
-#endif
-	return tmp;
-}
-
-/*
- * \c fault method for AGP virtual memory.
- *
- * \param vma virtual memory area.
- * \param address access address.
- * \return pointer to the page structure.
- *
- * Find the right map and if it's AGP memory find the real physical page to
- * map, get the page, increment the use count and return it.
- */
-#if IS_ENABLED(CONFIG_AGP)
-static vm_fault_t drm_vm_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_file *priv = vma->vm_file->private_data;
-	struct drm_device *dev = priv->minor->dev;
-	struct drm_local_map *map = NULL;
-	struct drm_map_list *r_list;
-	struct drm_hash_item *hash;
-
-	/*
-	 * Find the right map
-	 */
-	if (!dev->agp)
-		goto vm_fault_error;
-
-	if (!dev->agp || !dev->agp->cant_use_aperture)
-		goto vm_fault_error;
-
-	if (drm_ht_find_item(&dev->map_hash, vma->vm_pgoff, &hash))
-		goto vm_fault_error;
-
-	r_list = drm_hash_entry(hash, struct drm_map_list, hash);
-	map = r_list->map;
-
-	if (map && map->type == _DRM_AGP) {
-		/*
-		 * Using vm_pgoff as a selector forces us to use this unusual
-		 * addressing scheme.
-		 */
-		resource_size_t offset = vmf->address - vma->vm_start;
-		resource_size_t baddr = map->offset + offset;
-		struct drm_agp_mem *agpmem;
-		struct page *page;
-
-#ifdef __alpha__
-		/*
-		 * Adjust to a bus-relative address
-		 */
-		baddr -= dev->hose->mem_space->start;
-#endif
-
-		/*
-		 * It's AGP memory - find the real physical page to map
-		 */
-		list_for_each_entry(agpmem, &dev->agp->memory, head) {
-			if (agpmem->bound <= baddr &&
-			    agpmem->bound + agpmem->pages * PAGE_SIZE > baddr)
-				break;
-		}
-
-		if (&agpmem->head == &dev->agp->memory)
-			goto vm_fault_error;
-
-		/*
-		 * Get the page, inc the use count, and return it
-		 */
-		offset = (baddr - agpmem->bound) >> PAGE_SHIFT;
-		page = agpmem->memory->pages[offset];
-		get_page(page);
-		vmf->page = page;
-
-		DRM_DEBUG
-		    ("baddr = 0x%llx page = 0x%p, offset = 0x%llx, count=%d\n",
-		     (unsigned long long)baddr,
-		     agpmem->memory->pages[offset],
-		     (unsigned long long)offset,
-		     page_count(page));
-		return 0;
-	}
-vm_fault_error:
-	return VM_FAULT_SIGBUS;	/* Disallow mremap */
-}
-#else
-static vm_fault_t drm_vm_fault(struct vm_fault *vmf)
-{
-	return VM_FAULT_SIGBUS;
-}
-#endif
-
-/*
- * \c nopage method for shared virtual memory.
- *
- * \param vma virtual memory area.
- * \param address access address.
- * \return pointer to the page structure.
- *
- * Get the mapping, find the real physical page to map, get the page, and
- * return it.
- */
-static vm_fault_t drm_vm_shm_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_local_map *map = vma->vm_private_data;
-	unsigned long offset;
-	unsigned long i;
-	struct page *page;
-
-	if (!map)
-		return VM_FAULT_SIGBUS;	/* Nothing allocated */
-
-	offset = vmf->address - vma->vm_start;
-	i = (unsigned long)map->handle + offset;
-	page = vmalloc_to_page((void *)i);
-	if (!page)
-		return VM_FAULT_SIGBUS;
-	get_page(page);
-	vmf->page = page;
-
-	DRM_DEBUG("shm_fault 0x%lx\n", offset);
-	return 0;
-}
-
-/*
- * \c close method for shared virtual memory.
- *
- * \param vma virtual memory area.
- *
- * Deletes map information if we are the last
- * person to close a mapping and it's not in the global maplist.
- */
-static void drm_vm_shm_close(struct vm_area_struct *vma)
-{
-	struct drm_file *priv = vma->vm_file->private_data;
-	struct drm_device *dev = priv->minor->dev;
-	struct drm_vma_entry *pt, *temp;
-	struct drm_local_map *map;
-	struct drm_map_list *r_list;
-	int found_maps = 0;
-
-	DRM_DEBUG("0x%08lx,0x%08lx\n",
-		  vma->vm_start, vma->vm_end - vma->vm_start);
-
-	map = vma->vm_private_data;
-
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
-		if (pt->vma->vm_private_data == map)
-			found_maps++;
-		if (pt->vma == vma) {
-			list_del(&pt->head);
-			kfree(pt);
-		}
-	}
-
-	/* We were the only map that was found */
-	if (found_maps == 1 && map->flags & _DRM_REMOVABLE) {
-		/* Check to see if we are in the maplist, if we are not, then
-		 * we delete this mappings information.
-		 */
-		found_maps = 0;
-		list_for_each_entry(r_list, &dev->maplist, head) {
-			if (r_list->map == map)
-				found_maps++;
-		}
-
-		if (!found_maps) {
-			switch (map->type) {
-			case _DRM_REGISTERS:
-			case _DRM_FRAME_BUFFER:
-				arch_phys_wc_del(map->mtrr);
-				iounmap(map->handle);
-				break;
-			case _DRM_SHM:
-				vfree(map->handle);
-				break;
-			case _DRM_AGP:
-			case _DRM_SCATTER_GATHER:
-				break;
-			case _DRM_CONSISTENT:
-				dma_free_coherent(dev->dev,
-						  map->size,
-						  map->handle,
-						  map->offset);
-				break;
-			}
-			kfree(map);
-		}
-	}
-	mutex_unlock(&dev->struct_mutex);
-}
-
-/*
- * \c fault method for DMA virtual memory.
- *
- * \param address access address.
- * \return pointer to the page structure.
- *
- * Determine the page number from the page offset and get it from drm_device_dma::pagelist.
- */
-static vm_fault_t drm_vm_dma_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_file *priv = vma->vm_file->private_data;
-	struct drm_device *dev = priv->minor->dev;
-	struct drm_device_dma *dma = dev->dma;
-	unsigned long offset;
-	unsigned long page_nr;
-	struct page *page;
-
-	if (!dma)
-		return VM_FAULT_SIGBUS;	/* Error */
-	if (!dma->pagelist)
-		return VM_FAULT_SIGBUS;	/* Nothing allocated */
-
-	offset = vmf->address - vma->vm_start;
-					/* vm_[pg]off[set] should be 0 */
-	page_nr = offset >> PAGE_SHIFT; /* page_nr could just be vmf->pgoff */
-	page = virt_to_page((void *)dma->pagelist[page_nr]);
-
-	get_page(page);
-	vmf->page = page;
-
-	DRM_DEBUG("dma_fault 0x%lx (page %lu)\n", offset, page_nr);
-	return 0;
-}
-
-/*
- * \c fault method for scatter-gather virtual memory.
- *
- * \param address access address.
- * \return pointer to the page structure.
- *
- * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist.
- */
-static vm_fault_t drm_vm_sg_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_local_map *map = vma->vm_private_data;
-	struct drm_file *priv = vma->vm_file->private_data;
-	struct drm_device *dev = priv->minor->dev;
-	struct drm_sg_mem *entry = dev->sg;
-	unsigned long offset;
-	unsigned long map_offset;
-	unsigned long page_offset;
-	struct page *page;
-
-	if (!entry)
-		return VM_FAULT_SIGBUS;	/* Error */
-	if (!entry->pagelist)
-		return VM_FAULT_SIGBUS;	/* Nothing allocated */
-
-	offset = vmf->address - vma->vm_start;
-	map_offset = map->offset - (unsigned long)dev->sg->virtual;
-	page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT);
-	page = entry->pagelist[page_offset];
-	get_page(page);
-	vmf->page = page;
-
-	return 0;
-}
-
-/** AGP virtual memory operations */
-static const struct vm_operations_struct drm_vm_ops = {
-	.fault = drm_vm_fault,
-	.open = drm_vm_open,
-	.close = drm_vm_close,
-};
-
-/** Shared virtual memory operations */
-static const struct vm_operations_struct drm_vm_shm_ops = {
-	.fault = drm_vm_shm_fault,
-	.open = drm_vm_open,
-	.close = drm_vm_shm_close,
-};
-
-/** DMA virtual memory operations */
-static const struct vm_operations_struct drm_vm_dma_ops = {
-	.fault = drm_vm_dma_fault,
-	.open = drm_vm_open,
-	.close = drm_vm_close,
-};
-
-/** Scatter-gather virtual memory operations */
-static const struct vm_operations_struct drm_vm_sg_ops = {
-	.fault = drm_vm_sg_fault,
-	.open = drm_vm_open,
-	.close = drm_vm_close,
-};
-
-static void drm_vm_open_locked(struct drm_device *dev,
-			       struct vm_area_struct *vma)
-{
-	struct drm_vma_entry *vma_entry;
-
-	DRM_DEBUG("0x%08lx,0x%08lx\n",
-		  vma->vm_start, vma->vm_end - vma->vm_start);
-
-	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
-	if (vma_entry) {
-		vma_entry->vma = vma;
-		vma_entry->pid = current->pid;
-		list_add(&vma_entry->head, &dev->vmalist);
-	}
-}
-
-static void drm_vm_open(struct vm_area_struct *vma)
-{
-	struct drm_file *priv = vma->vm_file->private_data;
-	struct drm_device *dev = priv->minor->dev;
-
-	mutex_lock(&dev->struct_mutex);
-	drm_vm_open_locked(dev, vma);
-	mutex_unlock(&dev->struct_mutex);
-}
-
-static void drm_vm_close_locked(struct drm_device *dev,
-				struct vm_area_struct *vma)
-{
-	struct drm_vma_entry *pt, *temp;
-
-	DRM_DEBUG("0x%08lx,0x%08lx\n",
-		  vma->vm_start, vma->vm_end - vma->vm_start);
-
-	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
-		if (pt->vma == vma) {
-			list_del(&pt->head);
-			kfree(pt);
-			break;
-		}
-	}
-}
-
-/*
- * \c close method for all virtual memory types.
- *
- * \param vma virtual memory area.
- *
- * Search the \p vma private data entry in drm_device::vmalist, unlink it, and
- * free it.
- */
-static void drm_vm_close(struct vm_area_struct *vma)
-{
-	struct drm_file *priv = vma->vm_file->private_data;
-	struct drm_device *dev = priv->minor->dev;
-
-	mutex_lock(&dev->struct_mutex);
-	drm_vm_close_locked(dev, vma);
-	mutex_unlock(&dev->struct_mutex);
-}
-
-/*
- * mmap DMA memory.
- *
- * \param file_priv DRM file private.
- * \param vma virtual memory area.
- * \return zero on success or a negative number on failure.
- *
- * Sets the virtual memory area operations structure to vm_dma_ops, the file
- * pointer, and calls vm_open().
- */
-static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *priv = filp->private_data;
-	struct drm_device *dev;
-	struct drm_device_dma *dma;
-	unsigned long length = vma->vm_end - vma->vm_start;
-
-	dev = priv->minor->dev;
-	dma = dev->dma;
-	DRM_DEBUG("start = 0x%lx, end = 0x%lx, page offset = 0x%lx\n",
-		  vma->vm_start, vma->vm_end, vma->vm_pgoff);
-
-	/* Length must match exact page count */
-	if (!dma || (length >> PAGE_SHIFT) != dma->page_count) {
-		return -EINVAL;
-	}
-
-	if (!capable(CAP_SYS_ADMIN) &&
-	    (dma->flags & _DRM_DMA_USE_PCI_RO)) {
-		vm_flags_clear(vma, VM_WRITE | VM_MAYWRITE);
-#if defined(__i386__) || defined(__x86_64__)
-		pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW;
-#else
-		/* Ye gads this is ugly.  With more thought
-		   we could move this up higher and use
-		   `protection_map' instead.  */
-		vma->vm_page_prot =
-		    __pgprot(pte_val
-			     (pte_wrprotect
-			      (__pte(pgprot_val(vma->vm_page_prot)))));
-#endif
-	}
-
-	vma->vm_ops = &drm_vm_dma_ops;
-
-	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
-
-	drm_vm_open_locked(dev, vma);
-	return 0;
-}
-
-static resource_size_t drm_core_get_reg_ofs(struct drm_device *dev)
-{
-#ifdef __alpha__
-	return dev->hose->dense_mem_base;
-#else
-	return 0;
-#endif
-}
-
-/*
- * mmap DMA memory.
- *
- * \param file_priv DRM file private.
- * \param vma virtual memory area.
- * \return zero on success or a negative number on failure.
- *
- * If the virtual memory area has no offset associated with it then it's a DMA
- * area, so calls mmap_dma(). Otherwise searches the map in drm_device::maplist,
- * checks that the restricted flag is not set, sets the virtual memory operations
- * according to the mapping type and remaps the pages. Finally sets the file
- * pointer and calls vm_open().
- */
-static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *priv = filp->private_data;
-	struct drm_device *dev = priv->minor->dev;
-	struct drm_local_map *map = NULL;
-	resource_size_t offset = 0;
-	struct drm_hash_item *hash;
-
-	DRM_DEBUG("start = 0x%lx, end = 0x%lx, page offset = 0x%lx\n",
-		  vma->vm_start, vma->vm_end, vma->vm_pgoff);
-
-	if (!priv->authenticated)
-		return -EACCES;
-
-	/* We check for "dma". On Apple's UniNorth, it's valid to have
-	 * the AGP mapped at physical address 0
-	 * --BenH.
-	 */
-	if (!vma->vm_pgoff
-#if IS_ENABLED(CONFIG_AGP)
-	    && (!dev->agp
-		|| dev->agp->agp_info.device->vendor != PCI_VENDOR_ID_APPLE)
-#endif
-	    )
-		return drm_mmap_dma(filp, vma);
-
-	if (drm_ht_find_item(&dev->map_hash, vma->vm_pgoff, &hash)) {
-		DRM_ERROR("Could not find map\n");
-		return -EINVAL;
-	}
-
-	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
-	if (!map || ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN)))
-		return -EPERM;
-
-	/* Check for valid size. */
-	if (map->size < vma->vm_end - vma->vm_start)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN) && (map->flags & _DRM_READ_ONLY)) {
-		vm_flags_clear(vma, VM_WRITE | VM_MAYWRITE);
-#if defined(__i386__) || defined(__x86_64__)
-		pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW;
-#else
-		/* Ye gads this is ugly.  With more thought
-		   we could move this up higher and use
-		   `protection_map' instead.  */
-		vma->vm_page_prot =
-		    __pgprot(pte_val
-			     (pte_wrprotect
-			      (__pte(pgprot_val(vma->vm_page_prot)))));
-#endif
-	}
-
-	switch (map->type) {
-#if !defined(__arm__)
-	case _DRM_AGP:
-		if (dev->agp && dev->agp->cant_use_aperture) {
-			/*
-			 * On some platforms we can't talk to bus dma address from the CPU, so for
-			 * memory of type DRM_AGP, we'll deal with sorting out the real physical
-			 * pages and mappings in fault()
-			 */
-#if defined(__powerpc__)
-			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-#endif
-			vma->vm_ops = &drm_vm_ops;
-			break;
-		}
-		fallthrough;	/* to _DRM_FRAME_BUFFER... */
-#endif
-	case _DRM_FRAME_BUFFER:
-	case _DRM_REGISTERS:
-		offset = drm_core_get_reg_ofs(dev);
-		vma->vm_page_prot = drm_io_prot(map, vma);
-		if (io_remap_pfn_range(vma, vma->vm_start,
-				       (map->offset + offset) >> PAGE_SHIFT,
-				       vma->vm_end - vma->vm_start,
-				       vma->vm_page_prot))
-			return -EAGAIN;
-		DRM_DEBUG("   Type = %d; start = 0x%lx, end = 0x%lx,"
-			  " offset = 0x%llx\n",
-			  map->type,
-			  vma->vm_start, vma->vm_end, (unsigned long long)(map->offset + offset));
-
-		vma->vm_ops = &drm_vm_ops;
-		break;
-	case _DRM_CONSISTENT:
-		/* Consistent memory is really like shared memory. But
-		 * it's allocated in a different way, so avoid fault */
-		if (remap_pfn_range(vma, vma->vm_start,
-		    page_to_pfn(virt_to_page(map->handle)),
-		    vma->vm_end - vma->vm_start, vma->vm_page_prot))
-			return -EAGAIN;
-		vma->vm_page_prot = drm_dma_prot(map->type, vma);
-		fallthrough;	/* to _DRM_SHM */
-	case _DRM_SHM:
-		vma->vm_ops = &drm_vm_shm_ops;
-		vma->vm_private_data = (void *)map;
-		break;
-	case _DRM_SCATTER_GATHER:
-		vma->vm_ops = &drm_vm_sg_ops;
-		vma->vm_private_data = (void *)map;
-		vma->vm_page_prot = drm_dma_prot(map->type, vma);
-		break;
-	default:
-		return -EINVAL;	/* This should never happen. */
-	}
-	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
-
-	drm_vm_open_locked(dev, vma);
-	return 0;
-}
-
-int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *priv = filp->private_data;
-	struct drm_device *dev = priv->minor->dev;
-	int ret;
-
-	if (drm_dev_is_unplugged(dev))
-		return -ENODEV;
-
-	mutex_lock(&dev->struct_mutex);
-	ret = drm_mmap_locked(filp, vma);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL(drm_legacy_mmap);
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-void drm_legacy_vma_flush(struct drm_device *dev)
-{
-	struct drm_vma_entry *vma, *vma_temp;
-
-	/* Clear vma list (only needed for legacy drivers) */
-	list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) {
-		list_del(&vma->head);
-		kfree(vma);
-	}
-}
-#endif
diff --git a/include/drm/drm_auth.h b/include/drm/drm_auth.h
index ba248ca8866f5..50131383ed814 100644
--- a/include/drm/drm_auth.h
+++ b/include/drm/drm_auth.h
@@ -33,24 +33,6 @@
 #include <linux/wait.h>
 
 struct drm_file;
-struct drm_hw_lock;
-
-/*
- * Legacy DRI1 locking data structure. Only here instead of in drm_legacy.h for
- * include ordering reasons.
- *
- * DO NOT USE.
- */
-struct drm_lock_data {
-	struct drm_hw_lock *hw_lock;
-	struct drm_file *file_priv;
-	wait_queue_head_t lock_queue;
-	unsigned long lock_time;
-	spinlock_t spinlock;
-	uint32_t kernel_waiters;
-	uint32_t user_waiters;
-	int idle_has_lock;
-};
 
 /**
  * struct drm_master - drm master structure
@@ -145,10 +127,6 @@ struct drm_master {
 	 * Protected by &drm_device.mode_config's &drm_mode_config.idr_mutex.
 	 */
 	struct idr lessee_idr;
-	/* private: */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	struct drm_lock_data lock;
-#endif
 };
 
 struct drm_master *drm_master_get(struct drm_master *master);
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index c490977ee250c..63767cf24371b 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -6,7 +6,6 @@
 #include <linux/mutex.h>
 #include <linux/idr.h>
 
-#include <drm/drm_legacy.h>
 #include <drm/drm_mode_config.h>
 
 struct drm_driver;
@@ -153,8 +152,8 @@ struct drm_device {
 	 *
 	 * Lock for others (not &drm_minor.master and &drm_file.is_master)
 	 *
-	 * WARNING:
-	 * Only drivers annotated with DRIVER_LEGACY should be using this.
+	 * TODO: This lock used to be the BKL of the DRM subsystem. Move the
+	 *       lock into i915, which is the only remaining user.
 	 */
 	struct mutex struct_mutex;
 
@@ -317,72 +316,6 @@ struct drm_device {
 	 * Root directory for debugfs files.
 	 */
 	struct dentry *debugfs_root;
-
-	/* Everything below here is for legacy driver, never use! */
-	/* private: */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	/* List of devices per driver for stealth attach cleanup */
-	struct list_head legacy_dev_list;
-
-#ifdef __alpha__
-	/** @hose: PCI hose, only used on ALPHA platforms. */
-	struct pci_controller *hose;
-#endif
-
-	/* AGP data */
-	struct drm_agp_head *agp;
-
-	/* Context handle management - linked list of context handles */
-	struct list_head ctxlist;
-
-	/* Context handle management - mutex for &ctxlist */
-	struct mutex ctxlist_mutex;
-
-	/* Context handle management */
-	struct idr ctx_idr;
-
-	/* Memory management - linked list of regions */
-	struct list_head maplist;
-
-	/* Memory management - user token hash table for maps */
-	struct drm_open_hash map_hash;
-
-	/* Context handle management - list of vmas (for debugging) */
-	struct list_head vmalist;
-
-	/* Optional pointer for DMA support */
-	struct drm_device_dma *dma;
-
-	/* Context swapping flag */
-	__volatile__ long context_flag;
-
-	/* Last current context */
-	int last_context;
-
-	/* Lock for &buf_use and a few other things. */
-	spinlock_t buf_lock;
-
-	/* Usage counter for buffers in use -- cannot alloc */
-	int buf_use;
-
-	/* Buffer allocation in progress */
-	atomic_t buf_alloc;
-
-	struct {
-		int context;
-		struct drm_hw_lock *lock;
-	} sigdata;
-
-	struct drm_local_map *agp_buffer_map;
-	unsigned int agp_buffer_token;
-
-	/* Scatter gather memory */
-	struct drm_sg_mem *sg;
-
-	/* IRQs */
-	bool irq_enabled;
-	int irq;
-#endif
 };
 
 #endif
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index ea36aa79dca25..8878260d7529d 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -442,25 +442,6 @@ struct drm_driver {
 	 * some examples.
 	 */
 	const struct file_operations *fops;
-
-#ifdef CONFIG_DRM_LEGACY
-	/* Everything below here is for legacy driver, never use! */
-	/* private: */
-
-	int (*firstopen) (struct drm_device *);
-	void (*preclose) (struct drm_device *, struct drm_file *file_priv);
-	int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv);
-	int (*dma_quiescent) (struct drm_device *);
-	int (*context_dtor) (struct drm_device *dev, int context);
-	irqreturn_t (*irq_handler)(int irq, void *arg);
-	void (*irq_preinstall)(struct drm_device *dev);
-	int (*irq_postinstall)(struct drm_device *dev);
-	void (*irq_uninstall)(struct drm_device *dev);
-	u32 (*get_vblank_counter)(struct drm_device *dev, unsigned int pipe);
-	int (*enable_vblank)(struct drm_device *dev, unsigned int pipe);
-	void (*disable_vblank)(struct drm_device *dev, unsigned int pipe);
-	int dev_priv_size;
-#endif
 };
 
 void *__devm_drm_dev_alloc(struct device *parent,
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 8f35dcea82d3b..ab230d3af138d 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -386,11 +386,6 @@ struct drm_file {
 	 * Per-file buffer caches used by the PRIME buffer sharing code.
 	 */
 	struct drm_prime_file_private prime;
-
-	/* private: */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-	unsigned long lock_count; /* DRI1 legacy lock count */
-#endif
 };
 
 /**
diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h
deleted file mode 100644
index 0fc85418aad8c..0000000000000
--- a/include/drm/drm_legacy.h
+++ /dev/null
@@ -1,331 +0,0 @@
-#ifndef __DRM_DRM_LEGACY_H__
-#define __DRM_DRM_LEGACY_H__
-/*
- * Legacy driver interfaces for the Direct Rendering Manager
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * Copyright (c) 2009-2010, Code Aurora Forum.
- * All rights reserved.
- * Copyright © 2014 Intel Corporation
- *   Daniel Vetter <daniel.vetter@ffwll.ch>
- *
- * Author: Rickard E. (Rik) Faith <faith@valinux.com>
- * Author: Gareth Hughes <gareth@valinux.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/agp_backend.h>
-
-#include <drm/drm.h>
-#include <drm/drm_auth.h>
-
-struct drm_device;
-struct drm_driver;
-struct file;
-struct pci_driver;
-
-/*
- * Legacy Support for palateontologic DRM drivers
- *
- * If you add a new driver and it uses any of these functions or structures,
- * you're doing it terribly wrong.
- */
-
-/*
- * Hash-table Support
- */
-
-struct drm_hash_item {
-	struct hlist_node head;
-	unsigned long key;
-};
-
-struct drm_open_hash {
-	struct hlist_head *table;
-	u8 order;
-};
-
-/**
- * DMA buffer.
- */
-struct drm_buf {
-	int idx;		       /**< Index into master buflist */
-	int total;		       /**< Buffer size */
-	int order;		       /**< log-base-2(total) */
-	int used;		       /**< Amount of buffer in use (for DMA) */
-	unsigned long offset;	       /**< Byte offset (used internally) */
-	void *address;		       /**< Address of buffer */
-	unsigned long bus_address;     /**< Bus address of buffer */
-	struct drm_buf *next;	       /**< Kernel-only: used for free list */
-	__volatile__ int waiting;      /**< On kernel DMA queue */
-	__volatile__ int pending;      /**< On hardware DMA queue */
-	struct drm_file *file_priv;    /**< Private of holding file descr */
-	int context;		       /**< Kernel queue for this buffer */
-	int while_locked;	       /**< Dispatch this buffer while locked */
-	enum {
-		DRM_LIST_NONE = 0,
-		DRM_LIST_FREE = 1,
-		DRM_LIST_WAIT = 2,
-		DRM_LIST_PEND = 3,
-		DRM_LIST_PRIO = 4,
-		DRM_LIST_RECLAIM = 5
-	} list;			       /**< Which list we're on */
-
-	int dev_priv_size;		 /**< Size of buffer private storage */
-	void *dev_private;		 /**< Per-buffer private storage */
-};
-
-typedef struct drm_dma_handle {
-	dma_addr_t busaddr;
-	void *vaddr;
-	size_t size;
-} drm_dma_handle_t;
-
-/**
- * Buffer entry.  There is one of this for each buffer size order.
- */
-struct drm_buf_entry {
-	int buf_size;			/**< size */
-	int buf_count;			/**< number of buffers */
-	struct drm_buf *buflist;		/**< buffer list */
-	int seg_count;
-	int page_order;
-	struct drm_dma_handle **seglist;
-
-	int low_mark;			/**< Low water mark */
-	int high_mark;			/**< High water mark */
-};
-
-/**
- * DMA data.
- */
-struct drm_device_dma {
-
-	struct drm_buf_entry bufs[DRM_MAX_ORDER + 1];	/**< buffers, grouped by their size order */
-	int buf_count;			/**< total number of buffers */
-	struct drm_buf **buflist;		/**< Vector of pointers into drm_device_dma::bufs */
-	int seg_count;
-	int page_count;			/**< number of pages */
-	unsigned long *pagelist;	/**< page list */
-	unsigned long byte_count;
-	enum {
-		_DRM_DMA_USE_AGP = 0x01,
-		_DRM_DMA_USE_SG = 0x02,
-		_DRM_DMA_USE_FB = 0x04,
-		_DRM_DMA_USE_PCI_RO = 0x08
-	} flags;
-
-};
-
-/**
- * Scatter-gather memory.
- */
-struct drm_sg_mem {
-	unsigned long handle;
-	void *virtual;
-	int pages;
-	struct page **pagelist;
-	dma_addr_t *busaddr;
-};
-
-/**
- * Kernel side of a mapping
- */
-struct drm_local_map {
-	dma_addr_t offset;	 /**< Requested physical address (0 for SAREA)*/
-	unsigned long size;	 /**< Requested physical size (bytes) */
-	enum drm_map_type type;	 /**< Type of memory to map */
-	enum drm_map_flags flags;	 /**< Flags */
-	void *handle;		 /**< User-space: "Handle" to pass to mmap() */
-				 /**< Kernel-space: kernel-virtual address */
-	int mtrr;		 /**< MTRR slot used */
-};
-
-typedef struct drm_local_map drm_local_map_t;
-
-/**
- * Mappings list
- */
-struct drm_map_list {
-	struct list_head head;		/**< list head */
-	struct drm_hash_item hash;
-	struct drm_local_map *map;	/**< mapping */
-	uint64_t user_token;
-	struct drm_master *master;
-};
-
-int drm_legacy_addmap(struct drm_device *d, resource_size_t offset,
-		      unsigned int size, enum drm_map_type type,
-		      enum drm_map_flags flags, struct drm_local_map **map_p);
-struct drm_local_map *drm_legacy_findmap(struct drm_device *dev, unsigned int token);
-void drm_legacy_rmmap(struct drm_device *d, struct drm_local_map *map);
-int drm_legacy_rmmap_locked(struct drm_device *d, struct drm_local_map *map);
-struct drm_local_map *drm_legacy_getsarea(struct drm_device *dev);
-int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma);
-
-int drm_legacy_addbufs_agp(struct drm_device *d, struct drm_buf_desc *req);
-int drm_legacy_addbufs_pci(struct drm_device *d, struct drm_buf_desc *req);
-
-/**
- * Test that the hardware lock is held by the caller, returning otherwise.
- *
- * \param dev DRM device.
- * \param filp file pointer of the caller.
- */
-#define LOCK_TEST_WITH_RETURN( dev, _file_priv )				\
-do {										\
-	if (!_DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock) ||	\
-	    _file_priv->master->lock.file_priv != _file_priv)	{		\
-		DRM_ERROR( "%s called without lock held, held  %d owner %p %p\n",\
-			   __func__, _DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock),\
-			   _file_priv->master->lock.file_priv, _file_priv);	\
-		return -EINVAL;							\
-	}									\
-} while (0)
-
-void drm_legacy_idlelock_take(struct drm_lock_data *lock);
-void drm_legacy_idlelock_release(struct drm_lock_data *lock);
-
-/* drm_irq.c */
-int drm_legacy_irq_uninstall(struct drm_device *dev);
-
-/* drm_pci.c */
-
-#ifdef CONFIG_PCI
-
-int drm_legacy_pci_init(const struct drm_driver *driver,
-			struct pci_driver *pdriver);
-void drm_legacy_pci_exit(const struct drm_driver *driver,
-			 struct pci_driver *pdriver);
-
-#else
-
-static inline struct drm_dma_handle *drm_pci_alloc(struct drm_device *dev,
-						   size_t size, size_t align)
-{
-	return NULL;
-}
-
-static inline void drm_pci_free(struct drm_device *dev,
-				struct drm_dma_handle *dmah)
-{
-}
-
-static inline int drm_legacy_pci_init(const struct drm_driver *driver,
-				      struct pci_driver *pdriver)
-{
-	return -EINVAL;
-}
-
-static inline void drm_legacy_pci_exit(const struct drm_driver *driver,
-				       struct pci_driver *pdriver)
-{
-}
-
-#endif
-
-/*
- * AGP Support
- */
-
-struct drm_agp_head {
-	struct agp_kern_info agp_info;
-	struct list_head memory;
-	unsigned long mode;
-	struct agp_bridge_data *bridge;
-	int enabled;
-	int acquired;
-	unsigned long base;
-	int agp_mtrr;
-	int cant_use_aperture;
-	unsigned long page_mask;
-};
-
-#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_AGP)
-struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev);
-int drm_legacy_agp_acquire(struct drm_device *dev);
-int drm_legacy_agp_release(struct drm_device *dev);
-int drm_legacy_agp_enable(struct drm_device *dev, struct drm_agp_mode mode);
-int drm_legacy_agp_info(struct drm_device *dev, struct drm_agp_info *info);
-int drm_legacy_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request);
-int drm_legacy_agp_free(struct drm_device *dev, struct drm_agp_buffer *request);
-int drm_legacy_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request);
-int drm_legacy_agp_bind(struct drm_device *dev, struct drm_agp_binding *request);
-#else
-static inline struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev)
-{
-	return NULL;
-}
-
-static inline int drm_legacy_agp_acquire(struct drm_device *dev)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_release(struct drm_device *dev)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_enable(struct drm_device *dev,
-					struct drm_agp_mode mode)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_info(struct drm_device *dev,
-				      struct drm_agp_info *info)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_alloc(struct drm_device *dev,
-				       struct drm_agp_buffer *request)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_free(struct drm_device *dev,
-				      struct drm_agp_buffer *request)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_unbind(struct drm_device *dev,
-					struct drm_agp_binding *request)
-{
-	return -ENODEV;
-}
-
-static inline int drm_legacy_agp_bind(struct drm_device *dev,
-				      struct drm_agp_binding *request)
-{
-	return -ENODEV;
-}
-#endif
-
-/* drm_memory.c */
-void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev);
-void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev);
-void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev);
-
-#endif /* __DRM_DRM_LEGACY_H__ */
-- 
GitLab


From 87be41f09ac92cee6d0124636f49fac21dfd445e Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:42 +0100
Subject: [PATCH 0826/2340] char/agp: Remove frontend code

The AGP subsystem supports a user-space interface via /dev/agpgart. It
is only enabled with DRM support for mode setting in user space. (i.e.,
CONFIG_DRM_LEGACY). All of that DRM code has been removed and the option
will go away. Hence remove the AGP frontend.

Modern DRM drivers with kernel mode setting handle AGP support internally.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-14-tzimmermann@suse.de
---
 drivers/char/agp/Makefile       |    6 -
 drivers/char/agp/agp.h          |    9 -
 drivers/char/agp/backend.c      |   11 -
 drivers/char/agp/compat_ioctl.c |  291 ---------
 drivers/char/agp/compat_ioctl.h |  106 ---
 drivers/char/agp/frontend.c     | 1068 -------------------------------
 6 files changed, 1491 deletions(-)
 delete mode 100644 drivers/char/agp/compat_ioctl.c
 delete mode 100644 drivers/char/agp/compat_ioctl.h
 delete mode 100644 drivers/char/agp/frontend.c

diff --git a/drivers/char/agp/Makefile b/drivers/char/agp/Makefile
index 25834557e4865..43b09cf193bb7 100644
--- a/drivers/char/agp/Makefile
+++ b/drivers/char/agp/Makefile
@@ -1,12 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 agpgart-y := backend.o generic.o isoch.o
 
-ifeq ($(CONFIG_DRM_LEGACY),y)
-agpgart-$(CONFIG_COMPAT)	+= compat_ioctl.o
-agpgart-y			+= frontend.o
-endif
-
-
 obj-$(CONFIG_AGP)		+= agpgart.o
 obj-$(CONFIG_AGP_ALI)		+= ali-agp.o
 obj-$(CONFIG_AGP_ATI)		+= ati-agp.o
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 8771dcc9b8e2f..5c36ab85f80b7 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -185,15 +185,6 @@ void agp_put_bridge(struct agp_bridge_data *bridge);
 int agp_add_bridge(struct agp_bridge_data *bridge);
 void agp_remove_bridge(struct agp_bridge_data *bridge);
 
-/* Frontend routines. */
-#if IS_ENABLED(CONFIG_DRM_LEGACY)
-int agp_frontend_initialize(void);
-void agp_frontend_cleanup(void);
-#else
-static inline int agp_frontend_initialize(void) { return 0; }
-static inline void agp_frontend_cleanup(void) {}
-#endif
-
 /* Generic routines. */
 void agp_generic_enable(struct agp_bridge_data *bridge, u32 mode);
 int agp_generic_create_gatt_table(struct agp_bridge_data *bridge);
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 0e19c600db531..1776afd3ee078 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -293,13 +293,6 @@ int agp_add_bridge(struct agp_bridge_data *bridge)
 	}
 
 	if (list_empty(&agp_bridges)) {
-		error = agp_frontend_initialize();
-		if (error) {
-			dev_info(&bridge->dev->dev,
-				 "agp_frontend_initialize() failed\n");
-			goto frontend_err;
-		}
-
 		dev_info(&bridge->dev->dev, "AGP aperture is %dM @ 0x%lx\n",
 			 bridge->driver->fetch_size(), bridge->gart_bus_addr);
 
@@ -308,8 +301,6 @@ int agp_add_bridge(struct agp_bridge_data *bridge)
 	list_add(&bridge->list, &agp_bridges);
 	return 0;
 
-frontend_err:
-	agp_backend_cleanup(bridge);
 err_out:
 	module_put(bridge->driver->owner);
 err_put_bridge:
@@ -323,8 +314,6 @@ void agp_remove_bridge(struct agp_bridge_data *bridge)
 {
 	agp_backend_cleanup(bridge);
 	list_del(&bridge->list);
-	if (list_empty(&agp_bridges))
-		agp_frontend_cleanup();
 	module_put(bridge->driver->owner);
 }
 EXPORT_SYMBOL_GPL(agp_remove_bridge);
diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c
deleted file mode 100644
index 52ffe1706ce05..0000000000000
--- a/drivers/char/agp/compat_ioctl.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * AGPGART driver frontend compatibility ioctls
- * Copyright (C) 2004 Silicon Graphics, Inc.
- * Copyright (C) 2002-2003 Dave Jones
- * Copyright (C) 1999 Jeff Hartmann
- * Copyright (C) 1999 Precision Insight, Inc.
- * Copyright (C) 1999 Xi Graphics, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
- * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/fs.h>
-#include <linux/agpgart.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include "agp.h"
-#include "compat_ioctl.h"
-
-static int compat_agpioc_info_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_info32 userinfo;
-	struct agp_kern_info kerninfo;
-
-	agp_copy_info(agp_bridge, &kerninfo);
-
-	userinfo.version.major = kerninfo.version.major;
-	userinfo.version.minor = kerninfo.version.minor;
-	userinfo.bridge_id = kerninfo.device->vendor |
-	    (kerninfo.device->device << 16);
-	userinfo.agp_mode = kerninfo.mode;
-	userinfo.aper_base = (compat_long_t)kerninfo.aper_base;
-	userinfo.aper_size = kerninfo.aper_size;
-	userinfo.pg_total = userinfo.pg_system = kerninfo.max_memory;
-	userinfo.pg_used = kerninfo.current_memory;
-
-	if (copy_to_user(arg, &userinfo, sizeof(userinfo)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int compat_agpioc_reserve_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_region32 ureserve;
-	struct agp_region kreserve;
-	struct agp_client *client;
-	struct agp_file_private *client_priv;
-
-	DBG("");
-	if (copy_from_user(&ureserve, arg, sizeof(ureserve)))
-		return -EFAULT;
-
-	if ((unsigned) ureserve.seg_count >= ~0U/sizeof(struct agp_segment32))
-		return -EFAULT;
-
-	kreserve.pid = ureserve.pid;
-	kreserve.seg_count = ureserve.seg_count;
-
-	client = agp_find_client_by_pid(kreserve.pid);
-
-	if (kreserve.seg_count == 0) {
-		/* remove a client */
-		client_priv = agp_find_private(kreserve.pid);
-
-		if (client_priv != NULL) {
-			set_bit(AGP_FF_IS_CLIENT, &client_priv->access_flags);
-			set_bit(AGP_FF_IS_VALID, &client_priv->access_flags);
-		}
-		if (client == NULL) {
-			/* client is already removed */
-			return 0;
-		}
-		return agp_remove_client(kreserve.pid);
-	} else {
-		struct agp_segment32 *usegment;
-		struct agp_segment *ksegment;
-		int seg;
-
-		if (ureserve.seg_count >= 16384)
-			return -EINVAL;
-
-		usegment = kmalloc_array(ureserve.seg_count,
-					 sizeof(*usegment),
-					 GFP_KERNEL);
-		if (!usegment)
-			return -ENOMEM;
-
-		ksegment = kmalloc_array(kreserve.seg_count,
-					 sizeof(*ksegment),
-					 GFP_KERNEL);
-		if (!ksegment) {
-			kfree(usegment);
-			return -ENOMEM;
-		}
-
-		if (copy_from_user(usegment, (void __user *) ureserve.seg_list,
-				   sizeof(*usegment) * ureserve.seg_count)) {
-			kfree(usegment);
-			kfree(ksegment);
-			return -EFAULT;
-		}
-
-		for (seg = 0; seg < ureserve.seg_count; seg++) {
-			ksegment[seg].pg_start = usegment[seg].pg_start;
-			ksegment[seg].pg_count = usegment[seg].pg_count;
-			ksegment[seg].prot = usegment[seg].prot;
-		}
-
-		kfree(usegment);
-		kreserve.seg_list = ksegment;
-
-		if (client == NULL) {
-			/* Create the client and add the segment */
-			client = agp_create_client(kreserve.pid);
-
-			if (client == NULL) {
-				kfree(ksegment);
-				return -ENOMEM;
-			}
-			client_priv = agp_find_private(kreserve.pid);
-
-			if (client_priv != NULL) {
-				set_bit(AGP_FF_IS_CLIENT, &client_priv->access_flags);
-				set_bit(AGP_FF_IS_VALID, &client_priv->access_flags);
-			}
-		}
-		return agp_create_segment(client, &kreserve);
-	}
-	/* Will never really happen */
-	return -EINVAL;
-}
-
-static int compat_agpioc_allocate_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_memory *memory;
-	struct agp_allocate32 alloc;
-
-	DBG("");
-	if (copy_from_user(&alloc, arg, sizeof(alloc)))
-		return -EFAULT;
-
-	memory = agp_allocate_memory_wrap(alloc.pg_count, alloc.type);
-
-	if (memory == NULL)
-		return -ENOMEM;
-
-	alloc.key = memory->key;
-	alloc.physical = memory->physical;
-
-	if (copy_to_user(arg, &alloc, sizeof(alloc))) {
-		agp_free_memory_wrap(memory);
-		return -EFAULT;
-	}
-	return 0;
-}
-
-static int compat_agpioc_bind_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_bind32 bind_info;
-	struct agp_memory *memory;
-
-	DBG("");
-	if (copy_from_user(&bind_info, arg, sizeof(bind_info)))
-		return -EFAULT;
-
-	memory = agp_find_mem_by_key(bind_info.key);
-
-	if (memory == NULL)
-		return -EINVAL;
-
-	return agp_bind_memory(memory, bind_info.pg_start);
-}
-
-static int compat_agpioc_unbind_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_memory *memory;
-	struct agp_unbind32 unbind;
-
-	DBG("");
-	if (copy_from_user(&unbind, arg, sizeof(unbind)))
-		return -EFAULT;
-
-	memory = agp_find_mem_by_key(unbind.key);
-
-	if (memory == NULL)
-		return -EINVAL;
-
-	return agp_unbind_memory(memory);
-}
-
-long compat_agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct agp_file_private *curr_priv = file->private_data;
-	int ret_val = -ENOTTY;
-
-	mutex_lock(&(agp_fe.agp_mutex));
-
-	if ((agp_fe.current_controller == NULL) &&
-	    (cmd != AGPIOC_ACQUIRE32)) {
-		ret_val = -EINVAL;
-		goto ioctl_out;
-	}
-	if ((agp_fe.backend_acquired != true) &&
-	    (cmd != AGPIOC_ACQUIRE32)) {
-		ret_val = -EBUSY;
-		goto ioctl_out;
-	}
-	if (cmd != AGPIOC_ACQUIRE32) {
-		if (!(test_bit(AGP_FF_IS_CONTROLLER, &curr_priv->access_flags))) {
-			ret_val = -EPERM;
-			goto ioctl_out;
-		}
-		/* Use the original pid of the controller,
-		 * in case it's threaded */
-
-		if (agp_fe.current_controller->pid != curr_priv->my_pid) {
-			ret_val = -EBUSY;
-			goto ioctl_out;
-		}
-	}
-
-	switch (cmd) {
-	case AGPIOC_INFO32:
-		ret_val = compat_agpioc_info_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_ACQUIRE32:
-		ret_val = agpioc_acquire_wrap(curr_priv);
-		break;
-
-	case AGPIOC_RELEASE32:
-		ret_val = agpioc_release_wrap(curr_priv);
-		break;
-
-	case AGPIOC_SETUP32:
-		ret_val = agpioc_setup_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_RESERVE32:
-		ret_val = compat_agpioc_reserve_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_PROTECT32:
-		ret_val = agpioc_protect_wrap(curr_priv);
-		break;
-
-	case AGPIOC_ALLOCATE32:
-		ret_val = compat_agpioc_allocate_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_DEALLOCATE32:
-		ret_val = agpioc_deallocate_wrap(curr_priv, (int) arg);
-		break;
-
-	case AGPIOC_BIND32:
-		ret_val = compat_agpioc_bind_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_UNBIND32:
-		ret_val = compat_agpioc_unbind_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_CHIPSET_FLUSH32:
-		break;
-	}
-
-ioctl_out:
-	DBG("ioctl returns %d\n", ret_val);
-	mutex_unlock(&(agp_fe.agp_mutex));
-	return ret_val;
-}
-
diff --git a/drivers/char/agp/compat_ioctl.h b/drivers/char/agp/compat_ioctl.h
deleted file mode 100644
index f30e0fd979635..0000000000000
--- a/drivers/char/agp/compat_ioctl.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 1999 Jeff Hartmann
- * Copyright (C) 1999 Precision Insight, Inc.
- * Copyright (C) 1999 Xi Graphics, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
- * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef _AGP_COMPAT_IOCTL_H
-#define _AGP_COMPAT_IOCTL_H
-
-#include <linux/compat.h>
-#include <linux/agpgart.h>
-
-#define AGPIOC_INFO32       _IOR (AGPIOC_BASE, 0, compat_uptr_t)
-#define AGPIOC_ACQUIRE32    _IO  (AGPIOC_BASE, 1)
-#define AGPIOC_RELEASE32    _IO  (AGPIOC_BASE, 2)
-#define AGPIOC_SETUP32      _IOW (AGPIOC_BASE, 3, compat_uptr_t)
-#define AGPIOC_RESERVE32    _IOW (AGPIOC_BASE, 4, compat_uptr_t)
-#define AGPIOC_PROTECT32    _IOW (AGPIOC_BASE, 5, compat_uptr_t)
-#define AGPIOC_ALLOCATE32   _IOWR(AGPIOC_BASE, 6, compat_uptr_t)
-#define AGPIOC_DEALLOCATE32 _IOW (AGPIOC_BASE, 7, compat_int_t)
-#define AGPIOC_BIND32       _IOW (AGPIOC_BASE, 8, compat_uptr_t)
-#define AGPIOC_UNBIND32     _IOW (AGPIOC_BASE, 9, compat_uptr_t)
-#define AGPIOC_CHIPSET_FLUSH32 _IO (AGPIOC_BASE, 10)
-
-struct agp_info32 {
-	struct agp_version version;	/* version of the driver        */
-	u32 bridge_id;		/* bridge vendor/device         */
-	u32 agp_mode;		/* mode info of bridge          */
-	compat_long_t aper_base;	/* base of aperture             */
-	compat_size_t aper_size;	/* size of aperture             */
-	compat_size_t pg_total;	/* max pages (swap + system)    */
-	compat_size_t pg_system;	/* max pages (system)           */
-	compat_size_t pg_used;		/* current pages used           */
-};
-
-/*
- * The "prot" down below needs still a "sleep" flag somehow ...
- */
-struct agp_segment32 {
-	compat_off_t pg_start;		/* starting page to populate    */
-	compat_size_t pg_count;	/* number of pages              */
-	compat_int_t prot;		/* prot flags for mmap          */
-};
-
-struct agp_region32 {
-	compat_pid_t pid;		/* pid of process               */
-	compat_size_t seg_count;	/* number of segments           */
-	struct agp_segment32 *seg_list;
-};
-
-struct agp_allocate32 {
-	compat_int_t key;		/* tag of allocation            */
-	compat_size_t pg_count;	/* number of pages              */
-	u32 type;		/* 0 == normal, other devspec   */
-	u32 physical;           /* device specific (some devices
-				 * need a phys address of the
-				 * actual page behind the gatt
-				 * table)                        */
-};
-
-struct agp_bind32 {
-	compat_int_t key;		/* tag of allocation            */
-	compat_off_t pg_start;		/* starting page to populate    */
-};
-
-struct agp_unbind32 {
-	compat_int_t key;		/* tag of allocation            */
-	u32 priority;		/* priority for paging out      */
-};
-
-extern struct agp_front_data agp_fe;
-
-int agpioc_acquire_wrap(struct agp_file_private *priv);
-int agpioc_release_wrap(struct agp_file_private *priv);
-int agpioc_protect_wrap(struct agp_file_private *priv);
-int agpioc_setup_wrap(struct agp_file_private *priv, void __user *arg);
-int agpioc_deallocate_wrap(struct agp_file_private *priv, int arg);
-struct agp_file_private *agp_find_private(pid_t pid);
-struct agp_client *agp_create_client(pid_t id);
-int agp_remove_client(pid_t id);
-int agp_create_segment(struct agp_client *client, struct agp_region *region);
-void agp_free_memory_wrap(struct agp_memory *memory);
-struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type);
-struct agp_memory *agp_find_mem_by_key(int key);
-struct agp_client *agp_find_client_by_pid(pid_t id);
-
-#endif /* _AGP_COMPAT_H */
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
deleted file mode 100644
index 321118a9cfa52..0000000000000
--- a/drivers/char/agp/frontend.c
+++ /dev/null
@@ -1,1068 +0,0 @@
-/*
- * AGPGART driver frontend
- * Copyright (C) 2004 Silicon Graphics, Inc.
- * Copyright (C) 2002-2003 Dave Jones
- * Copyright (C) 1999 Jeff Hartmann
- * Copyright (C) 1999 Precision Insight, Inc.
- * Copyright (C) 1999 Xi Graphics, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
- * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mman.h>
-#include <linux/pci.h>
-#include <linux/miscdevice.h>
-#include <linux/agp_backend.h>
-#include <linux/agpgart.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-
-#include "agp.h"
-#include "compat_ioctl.h"
-
-struct agp_front_data agp_fe;
-
-struct agp_memory *agp_find_mem_by_key(int key)
-{
-	struct agp_memory *curr;
-
-	if (agp_fe.current_controller == NULL)
-		return NULL;
-
-	curr = agp_fe.current_controller->pool;
-
-	while (curr != NULL) {
-		if (curr->key == key)
-			break;
-		curr = curr->next;
-	}
-
-	DBG("key=%d -> mem=%p", key, curr);
-	return curr;
-}
-
-static void agp_remove_from_pool(struct agp_memory *temp)
-{
-	struct agp_memory *prev;
-	struct agp_memory *next;
-
-	/* Check to see if this is even in the memory pool */
-
-	DBG("mem=%p", temp);
-	if (agp_find_mem_by_key(temp->key) != NULL) {
-		next = temp->next;
-		prev = temp->prev;
-
-		if (prev != NULL) {
-			prev->next = next;
-			if (next != NULL)
-				next->prev = prev;
-
-		} else {
-			/* This is the first item on the list */
-			if (next != NULL)
-				next->prev = NULL;
-
-			agp_fe.current_controller->pool = next;
-		}
-	}
-}
-
-/*
- * Routines for managing each client's segment list -
- * These routines handle adding and removing segments
- * to each auth'ed client.
- */
-
-static struct
-agp_segment_priv *agp_find_seg_in_client(const struct agp_client *client,
-						unsigned long offset,
-					    int size, pgprot_t page_prot)
-{
-	struct agp_segment_priv *seg;
-	int i;
-	off_t pg_start;
-	size_t pg_count;
-
-	pg_start = offset / 4096;
-	pg_count = size / 4096;
-	seg = *(client->segments);
-
-	for (i = 0; i < client->num_segments; i++) {
-		if ((seg[i].pg_start == pg_start) &&
-		    (seg[i].pg_count == pg_count) &&
-		    (pgprot_val(seg[i].prot) == pgprot_val(page_prot))) {
-			return seg + i;
-		}
-	}
-
-	return NULL;
-}
-
-static void agp_remove_seg_from_client(struct agp_client *client)
-{
-	DBG("client=%p", client);
-
-	if (client->segments != NULL) {
-		if (*(client->segments) != NULL) {
-			DBG("Freeing %p from client %p", *(client->segments), client);
-			kfree(*(client->segments));
-		}
-		DBG("Freeing %p from client %p", client->segments, client);
-		kfree(client->segments);
-		client->segments = NULL;
-	}
-}
-
-static void agp_add_seg_to_client(struct agp_client *client,
-			       struct agp_segment_priv ** seg, int num_segments)
-{
-	struct agp_segment_priv **prev_seg;
-
-	prev_seg = client->segments;
-
-	if (prev_seg != NULL)
-		agp_remove_seg_from_client(client);
-
-	DBG("Adding seg %p (%d segments) to client %p", seg, num_segments, client);
-	client->num_segments = num_segments;
-	client->segments = seg;
-}
-
-static pgprot_t agp_convert_mmap_flags(int prot)
-{
-	unsigned long prot_bits;
-
-	prot_bits = calc_vm_prot_bits(prot, 0) | VM_SHARED;
-	return vm_get_page_prot(prot_bits);
-}
-
-int agp_create_segment(struct agp_client *client, struct agp_region *region)
-{
-	struct agp_segment_priv **ret_seg;
-	struct agp_segment_priv *seg;
-	struct agp_segment *user_seg;
-	size_t i;
-
-	seg = kzalloc((sizeof(struct agp_segment_priv) * region->seg_count), GFP_KERNEL);
-	if (seg == NULL) {
-		kfree(region->seg_list);
-		region->seg_list = NULL;
-		return -ENOMEM;
-	}
-	user_seg = region->seg_list;
-
-	for (i = 0; i < region->seg_count; i++) {
-		seg[i].pg_start = user_seg[i].pg_start;
-		seg[i].pg_count = user_seg[i].pg_count;
-		seg[i].prot = agp_convert_mmap_flags(user_seg[i].prot);
-	}
-	kfree(region->seg_list);
-	region->seg_list = NULL;
-
-	ret_seg = kmalloc(sizeof(void *), GFP_KERNEL);
-	if (ret_seg == NULL) {
-		kfree(seg);
-		return -ENOMEM;
-	}
-	*ret_seg = seg;
-	agp_add_seg_to_client(client, ret_seg, region->seg_count);
-	return 0;
-}
-
-/* End - Routines for managing each client's segment list */
-
-/* This function must only be called when current_controller != NULL */
-static void agp_insert_into_pool(struct agp_memory * temp)
-{
-	struct agp_memory *prev;
-
-	prev = agp_fe.current_controller->pool;
-
-	if (prev != NULL) {
-		prev->prev = temp;
-		temp->next = prev;
-	}
-	agp_fe.current_controller->pool = temp;
-}
-
-
-/* File private list routines */
-
-struct agp_file_private *agp_find_private(pid_t pid)
-{
-	struct agp_file_private *curr;
-
-	curr = agp_fe.file_priv_list;
-
-	while (curr != NULL) {
-		if (curr->my_pid == pid)
-			return curr;
-		curr = curr->next;
-	}
-
-	return NULL;
-}
-
-static void agp_insert_file_private(struct agp_file_private * priv)
-{
-	struct agp_file_private *prev;
-
-	prev = agp_fe.file_priv_list;
-
-	if (prev != NULL)
-		prev->prev = priv;
-	priv->next = prev;
-	agp_fe.file_priv_list = priv;
-}
-
-static void agp_remove_file_private(struct agp_file_private * priv)
-{
-	struct agp_file_private *next;
-	struct agp_file_private *prev;
-
-	next = priv->next;
-	prev = priv->prev;
-
-	if (prev != NULL) {
-		prev->next = next;
-
-		if (next != NULL)
-			next->prev = prev;
-
-	} else {
-		if (next != NULL)
-			next->prev = NULL;
-
-		agp_fe.file_priv_list = next;
-	}
-}
-
-/* End - File flag list routines */
-
-/*
- * Wrappers for agp_free_memory & agp_allocate_memory
- * These make sure that internal lists are kept updated.
- */
-void agp_free_memory_wrap(struct agp_memory *memory)
-{
-	agp_remove_from_pool(memory);
-	agp_free_memory(memory);
-}
-
-struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type)
-{
-	struct agp_memory *memory;
-
-	memory = agp_allocate_memory(agp_bridge, pg_count, type);
-	if (memory == NULL)
-		return NULL;
-
-	agp_insert_into_pool(memory);
-	return memory;
-}
-
-/* Routines for managing the list of controllers -
- * These routines manage the current controller, and the list of
- * controllers
- */
-
-static struct agp_controller *agp_find_controller_by_pid(pid_t id)
-{
-	struct agp_controller *controller;
-
-	controller = agp_fe.controllers;
-
-	while (controller != NULL) {
-		if (controller->pid == id)
-			return controller;
-		controller = controller->next;
-	}
-
-	return NULL;
-}
-
-static struct agp_controller *agp_create_controller(pid_t id)
-{
-	struct agp_controller *controller;
-
-	controller = kzalloc(sizeof(struct agp_controller), GFP_KERNEL);
-	if (controller == NULL)
-		return NULL;
-
-	controller->pid = id;
-	return controller;
-}
-
-static int agp_insert_controller(struct agp_controller *controller)
-{
-	struct agp_controller *prev_controller;
-
-	prev_controller = agp_fe.controllers;
-	controller->next = prev_controller;
-
-	if (prev_controller != NULL)
-		prev_controller->prev = controller;
-
-	agp_fe.controllers = controller;
-
-	return 0;
-}
-
-static void agp_remove_all_clients(struct agp_controller *controller)
-{
-	struct agp_client *client;
-	struct agp_client *temp;
-
-	client = controller->clients;
-
-	while (client) {
-		struct agp_file_private *priv;
-
-		temp = client;
-		agp_remove_seg_from_client(temp);
-		priv = agp_find_private(temp->pid);
-
-		if (priv != NULL) {
-			clear_bit(AGP_FF_IS_VALID, &priv->access_flags);
-			clear_bit(AGP_FF_IS_CLIENT, &priv->access_flags);
-		}
-		client = client->next;
-		kfree(temp);
-	}
-}
-
-static void agp_remove_all_memory(struct agp_controller *controller)
-{
-	struct agp_memory *memory;
-	struct agp_memory *temp;
-
-	memory = controller->pool;
-
-	while (memory) {
-		temp = memory;
-		memory = memory->next;
-		agp_free_memory_wrap(temp);
-	}
-}
-
-static int agp_remove_controller(struct agp_controller *controller)
-{
-	struct agp_controller *prev_controller;
-	struct agp_controller *next_controller;
-
-	prev_controller = controller->prev;
-	next_controller = controller->next;
-
-	if (prev_controller != NULL) {
-		prev_controller->next = next_controller;
-		if (next_controller != NULL)
-			next_controller->prev = prev_controller;
-
-	} else {
-		if (next_controller != NULL)
-			next_controller->prev = NULL;
-
-		agp_fe.controllers = next_controller;
-	}
-
-	agp_remove_all_memory(controller);
-	agp_remove_all_clients(controller);
-
-	if (agp_fe.current_controller == controller) {
-		agp_fe.current_controller = NULL;
-		agp_fe.backend_acquired = false;
-		agp_backend_release(agp_bridge);
-	}
-	kfree(controller);
-	return 0;
-}
-
-static void agp_controller_make_current(struct agp_controller *controller)
-{
-	struct agp_client *clients;
-
-	clients = controller->clients;
-
-	while (clients != NULL) {
-		struct agp_file_private *priv;
-
-		priv = agp_find_private(clients->pid);
-
-		if (priv != NULL) {
-			set_bit(AGP_FF_IS_VALID, &priv->access_flags);
-			set_bit(AGP_FF_IS_CLIENT, &priv->access_flags);
-		}
-		clients = clients->next;
-	}
-
-	agp_fe.current_controller = controller;
-}
-
-static void agp_controller_release_current(struct agp_controller *controller,
-				      struct agp_file_private *controller_priv)
-{
-	struct agp_client *clients;
-
-	clear_bit(AGP_FF_IS_VALID, &controller_priv->access_flags);
-	clients = controller->clients;
-
-	while (clients != NULL) {
-		struct agp_file_private *priv;
-
-		priv = agp_find_private(clients->pid);
-
-		if (priv != NULL)
-			clear_bit(AGP_FF_IS_VALID, &priv->access_flags);
-
-		clients = clients->next;
-	}
-
-	agp_fe.current_controller = NULL;
-	agp_fe.used_by_controller = false;
-	agp_backend_release(agp_bridge);
-}
-
-/*
- * Routines for managing client lists -
- * These routines are for managing the list of auth'ed clients.
- */
-
-static struct agp_client
-*agp_find_client_in_controller(struct agp_controller *controller, pid_t id)
-{
-	struct agp_client *client;
-
-	if (controller == NULL)
-		return NULL;
-
-	client = controller->clients;
-
-	while (client != NULL) {
-		if (client->pid == id)
-			return client;
-		client = client->next;
-	}
-
-	return NULL;
-}
-
-static struct agp_controller *agp_find_controller_for_client(pid_t id)
-{
-	struct agp_controller *controller;
-
-	controller = agp_fe.controllers;
-
-	while (controller != NULL) {
-		if ((agp_find_client_in_controller(controller, id)) != NULL)
-			return controller;
-		controller = controller->next;
-	}
-
-	return NULL;
-}
-
-struct agp_client *agp_find_client_by_pid(pid_t id)
-{
-	struct agp_client *temp;
-
-	if (agp_fe.current_controller == NULL)
-		return NULL;
-
-	temp = agp_find_client_in_controller(agp_fe.current_controller, id);
-	return temp;
-}
-
-static void agp_insert_client(struct agp_client *client)
-{
-	struct agp_client *prev_client;
-
-	prev_client = agp_fe.current_controller->clients;
-	client->next = prev_client;
-
-	if (prev_client != NULL)
-		prev_client->prev = client;
-
-	agp_fe.current_controller->clients = client;
-	agp_fe.current_controller->num_clients++;
-}
-
-struct agp_client *agp_create_client(pid_t id)
-{
-	struct agp_client *new_client;
-
-	new_client = kzalloc(sizeof(struct agp_client), GFP_KERNEL);
-	if (new_client == NULL)
-		return NULL;
-
-	new_client->pid = id;
-	agp_insert_client(new_client);
-	return new_client;
-}
-
-int agp_remove_client(pid_t id)
-{
-	struct agp_client *client;
-	struct agp_client *prev_client;
-	struct agp_client *next_client;
-	struct agp_controller *controller;
-
-	controller = agp_find_controller_for_client(id);
-	if (controller == NULL)
-		return -EINVAL;
-
-	client = agp_find_client_in_controller(controller, id);
-	if (client == NULL)
-		return -EINVAL;
-
-	prev_client = client->prev;
-	next_client = client->next;
-
-	if (prev_client != NULL) {
-		prev_client->next = next_client;
-		if (next_client != NULL)
-			next_client->prev = prev_client;
-
-	} else {
-		if (next_client != NULL)
-			next_client->prev = NULL;
-		controller->clients = next_client;
-	}
-
-	controller->num_clients--;
-	agp_remove_seg_from_client(client);
-	kfree(client);
-	return 0;
-}
-
-/* End - Routines for managing client lists */
-
-/* File Operations */
-
-static int agp_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	unsigned int size, current_size;
-	unsigned long offset;
-	struct agp_client *client;
-	struct agp_file_private *priv = file->private_data;
-	struct agp_kern_info kerninfo;
-
-	mutex_lock(&(agp_fe.agp_mutex));
-
-	if (agp_fe.backend_acquired != true)
-		goto out_eperm;
-
-	if (!(test_bit(AGP_FF_IS_VALID, &priv->access_flags)))
-		goto out_eperm;
-
-	agp_copy_info(agp_bridge, &kerninfo);
-	size = vma->vm_end - vma->vm_start;
-	current_size = kerninfo.aper_size;
-	current_size = current_size * 0x100000;
-	offset = vma->vm_pgoff << PAGE_SHIFT;
-	DBG("%lx:%lx", offset, offset+size);
-
-	if (test_bit(AGP_FF_IS_CLIENT, &priv->access_flags)) {
-		if ((size + offset) > current_size)
-			goto out_inval;
-
-		client = agp_find_client_by_pid(current->pid);
-
-		if (client == NULL)
-			goto out_eperm;
-
-		if (!agp_find_seg_in_client(client, offset, size, vma->vm_page_prot))
-			goto out_inval;
-
-		DBG("client vm_ops=%p", kerninfo.vm_ops);
-		if (kerninfo.vm_ops) {
-			vma->vm_ops = kerninfo.vm_ops;
-		} else if (io_remap_pfn_range(vma, vma->vm_start,
-				(kerninfo.aper_base + offset) >> PAGE_SHIFT,
-				size,
-				pgprot_writecombine(vma->vm_page_prot))) {
-			goto out_again;
-		}
-		mutex_unlock(&(agp_fe.agp_mutex));
-		return 0;
-	}
-
-	if (test_bit(AGP_FF_IS_CONTROLLER, &priv->access_flags)) {
-		if (size != current_size)
-			goto out_inval;
-
-		DBG("controller vm_ops=%p", kerninfo.vm_ops);
-		if (kerninfo.vm_ops) {
-			vma->vm_ops = kerninfo.vm_ops;
-		} else if (io_remap_pfn_range(vma, vma->vm_start,
-				kerninfo.aper_base >> PAGE_SHIFT,
-				size,
-				pgprot_writecombine(vma->vm_page_prot))) {
-			goto out_again;
-		}
-		mutex_unlock(&(agp_fe.agp_mutex));
-		return 0;
-	}
-
-out_eperm:
-	mutex_unlock(&(agp_fe.agp_mutex));
-	return -EPERM;
-
-out_inval:
-	mutex_unlock(&(agp_fe.agp_mutex));
-	return -EINVAL;
-
-out_again:
-	mutex_unlock(&(agp_fe.agp_mutex));
-	return -EAGAIN;
-}
-
-static int agp_release(struct inode *inode, struct file *file)
-{
-	struct agp_file_private *priv = file->private_data;
-
-	mutex_lock(&(agp_fe.agp_mutex));
-
-	DBG("priv=%p", priv);
-
-	if (test_bit(AGP_FF_IS_CONTROLLER, &priv->access_flags)) {
-		struct agp_controller *controller;
-
-		controller = agp_find_controller_by_pid(priv->my_pid);
-
-		if (controller != NULL) {
-			if (controller == agp_fe.current_controller)
-				agp_controller_release_current(controller, priv);
-			agp_remove_controller(controller);
-			controller = NULL;
-		}
-	}
-
-	if (test_bit(AGP_FF_IS_CLIENT, &priv->access_flags))
-		agp_remove_client(priv->my_pid);
-
-	agp_remove_file_private(priv);
-	kfree(priv);
-	file->private_data = NULL;
-	mutex_unlock(&(agp_fe.agp_mutex));
-	return 0;
-}
-
-static int agp_open(struct inode *inode, struct file *file)
-{
-	int minor = iminor(inode);
-	struct agp_file_private *priv;
-	struct agp_client *client;
-
-	if (minor != AGPGART_MINOR)
-		return -ENXIO;
-
-	mutex_lock(&(agp_fe.agp_mutex));
-
-	priv = kzalloc(sizeof(struct agp_file_private), GFP_KERNEL);
-	if (priv == NULL) {
-		mutex_unlock(&(agp_fe.agp_mutex));
-		return -ENOMEM;
-	}
-
-	set_bit(AGP_FF_ALLOW_CLIENT, &priv->access_flags);
-	priv->my_pid = current->pid;
-
-	if (capable(CAP_SYS_RAWIO))
-		/* Root priv, can be controller */
-		set_bit(AGP_FF_ALLOW_CONTROLLER, &priv->access_flags);
-
-	client = agp_find_client_by_pid(current->pid);
-
-	if (client != NULL) {
-		set_bit(AGP_FF_IS_CLIENT, &priv->access_flags);
-		set_bit(AGP_FF_IS_VALID, &priv->access_flags);
-	}
-	file->private_data = (void *) priv;
-	agp_insert_file_private(priv);
-	DBG("private=%p, client=%p", priv, client);
-
-	mutex_unlock(&(agp_fe.agp_mutex));
-
-	return 0;
-}
-
-static int agpioc_info_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_info userinfo;
-	struct agp_kern_info kerninfo;
-
-	agp_copy_info(agp_bridge, &kerninfo);
-
-	memset(&userinfo, 0, sizeof(userinfo));
-	userinfo.version.major = kerninfo.version.major;
-	userinfo.version.minor = kerninfo.version.minor;
-	userinfo.bridge_id = kerninfo.device->vendor |
-	    (kerninfo.device->device << 16);
-	userinfo.agp_mode = kerninfo.mode;
-	userinfo.aper_base = kerninfo.aper_base;
-	userinfo.aper_size = kerninfo.aper_size;
-	userinfo.pg_total = userinfo.pg_system = kerninfo.max_memory;
-	userinfo.pg_used = kerninfo.current_memory;
-
-	if (copy_to_user(arg, &userinfo, sizeof(struct agp_info)))
-		return -EFAULT;
-
-	return 0;
-}
-
-int agpioc_acquire_wrap(struct agp_file_private *priv)
-{
-	struct agp_controller *controller;
-
-	DBG("");
-
-	if (!(test_bit(AGP_FF_ALLOW_CONTROLLER, &priv->access_flags)))
-		return -EPERM;
-
-	if (agp_fe.current_controller != NULL)
-		return -EBUSY;
-
-	if (!agp_bridge)
-		return -ENODEV;
-
-        if (atomic_read(&agp_bridge->agp_in_use))
-                return -EBUSY;
-
-	atomic_inc(&agp_bridge->agp_in_use);
-
-	agp_fe.backend_acquired = true;
-
-	controller = agp_find_controller_by_pid(priv->my_pid);
-
-	if (controller != NULL) {
-		agp_controller_make_current(controller);
-	} else {
-		controller = agp_create_controller(priv->my_pid);
-
-		if (controller == NULL) {
-			agp_fe.backend_acquired = false;
-			agp_backend_release(agp_bridge);
-			return -ENOMEM;
-		}
-		agp_insert_controller(controller);
-		agp_controller_make_current(controller);
-	}
-
-	set_bit(AGP_FF_IS_CONTROLLER, &priv->access_flags);
-	set_bit(AGP_FF_IS_VALID, &priv->access_flags);
-	return 0;
-}
-
-int agpioc_release_wrap(struct agp_file_private *priv)
-{
-	DBG("");
-	agp_controller_release_current(agp_fe.current_controller, priv);
-	return 0;
-}
-
-int agpioc_setup_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_setup mode;
-
-	DBG("");
-	if (copy_from_user(&mode, arg, sizeof(struct agp_setup)))
-		return -EFAULT;
-
-	agp_enable(agp_bridge, mode.agp_mode);
-	return 0;
-}
-
-static int agpioc_reserve_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_region reserve;
-	struct agp_client *client;
-	struct agp_file_private *client_priv;
-
-	DBG("");
-	if (copy_from_user(&reserve, arg, sizeof(struct agp_region)))
-		return -EFAULT;
-
-	if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment))
-		return -EFAULT;
-
-	client = agp_find_client_by_pid(reserve.pid);
-
-	if (reserve.seg_count == 0) {
-		/* remove a client */
-		client_priv = agp_find_private(reserve.pid);
-
-		if (client_priv != NULL) {
-			set_bit(AGP_FF_IS_CLIENT, &client_priv->access_flags);
-			set_bit(AGP_FF_IS_VALID, &client_priv->access_flags);
-		}
-		if (client == NULL) {
-			/* client is already removed */
-			return 0;
-		}
-		return agp_remove_client(reserve.pid);
-	} else {
-		struct agp_segment *segment;
-
-		if (reserve.seg_count >= 16384)
-			return -EINVAL;
-
-		segment = kmalloc((sizeof(struct agp_segment) * reserve.seg_count),
-				  GFP_KERNEL);
-
-		if (segment == NULL)
-			return -ENOMEM;
-
-		if (copy_from_user(segment, (void __user *) reserve.seg_list,
-				   sizeof(struct agp_segment) * reserve.seg_count)) {
-			kfree(segment);
-			return -EFAULT;
-		}
-		reserve.seg_list = segment;
-
-		if (client == NULL) {
-			/* Create the client and add the segment */
-			client = agp_create_client(reserve.pid);
-
-			if (client == NULL) {
-				kfree(segment);
-				return -ENOMEM;
-			}
-			client_priv = agp_find_private(reserve.pid);
-
-			if (client_priv != NULL) {
-				set_bit(AGP_FF_IS_CLIENT, &client_priv->access_flags);
-				set_bit(AGP_FF_IS_VALID, &client_priv->access_flags);
-			}
-		}
-		return agp_create_segment(client, &reserve);
-	}
-	/* Will never really happen */
-	return -EINVAL;
-}
-
-int agpioc_protect_wrap(struct agp_file_private *priv)
-{
-	DBG("");
-	/* This function is not currently implemented */
-	return -EINVAL;
-}
-
-static int agpioc_allocate_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_memory *memory;
-	struct agp_allocate alloc;
-
-	DBG("");
-	if (copy_from_user(&alloc, arg, sizeof(struct agp_allocate)))
-		return -EFAULT;
-
-	if (alloc.type >= AGP_USER_TYPES)
-		return -EINVAL;
-
-	memory = agp_allocate_memory_wrap(alloc.pg_count, alloc.type);
-
-	if (memory == NULL)
-		return -ENOMEM;
-
-	alloc.key = memory->key;
-	alloc.physical = memory->physical;
-
-	if (copy_to_user(arg, &alloc, sizeof(struct agp_allocate))) {
-		agp_free_memory_wrap(memory);
-		return -EFAULT;
-	}
-	return 0;
-}
-
-int agpioc_deallocate_wrap(struct agp_file_private *priv, int arg)
-{
-	struct agp_memory *memory;
-
-	DBG("");
-	memory = agp_find_mem_by_key(arg);
-
-	if (memory == NULL)
-		return -EINVAL;
-
-	agp_free_memory_wrap(memory);
-	return 0;
-}
-
-static int agpioc_bind_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_bind bind_info;
-	struct agp_memory *memory;
-
-	DBG("");
-	if (copy_from_user(&bind_info, arg, sizeof(struct agp_bind)))
-		return -EFAULT;
-
-	memory = agp_find_mem_by_key(bind_info.key);
-
-	if (memory == NULL)
-		return -EINVAL;
-
-	return agp_bind_memory(memory, bind_info.pg_start);
-}
-
-static int agpioc_unbind_wrap(struct agp_file_private *priv, void __user *arg)
-{
-	struct agp_memory *memory;
-	struct agp_unbind unbind;
-
-	DBG("");
-	if (copy_from_user(&unbind, arg, sizeof(struct agp_unbind)))
-		return -EFAULT;
-
-	memory = agp_find_mem_by_key(unbind.key);
-
-	if (memory == NULL)
-		return -EINVAL;
-
-	return agp_unbind_memory(memory);
-}
-
-static long agp_ioctl(struct file *file,
-		     unsigned int cmd, unsigned long arg)
-{
-	struct agp_file_private *curr_priv = file->private_data;
-	int ret_val = -ENOTTY;
-
-	DBG("priv=%p, cmd=%x", curr_priv, cmd);
-	mutex_lock(&(agp_fe.agp_mutex));
-
-	if ((agp_fe.current_controller == NULL) &&
-	    (cmd != AGPIOC_ACQUIRE)) {
-		ret_val = -EINVAL;
-		goto ioctl_out;
-	}
-	if ((agp_fe.backend_acquired != true) &&
-	    (cmd != AGPIOC_ACQUIRE)) {
-		ret_val = -EBUSY;
-		goto ioctl_out;
-	}
-	if (cmd != AGPIOC_ACQUIRE) {
-		if (!(test_bit(AGP_FF_IS_CONTROLLER, &curr_priv->access_flags))) {
-			ret_val = -EPERM;
-			goto ioctl_out;
-		}
-		/* Use the original pid of the controller,
-		 * in case it's threaded */
-
-		if (agp_fe.current_controller->pid != curr_priv->my_pid) {
-			ret_val = -EBUSY;
-			goto ioctl_out;
-		}
-	}
-
-	switch (cmd) {
-	case AGPIOC_INFO:
-		ret_val = agpioc_info_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_ACQUIRE:
-		ret_val = agpioc_acquire_wrap(curr_priv);
-		break;
-
-	case AGPIOC_RELEASE:
-		ret_val = agpioc_release_wrap(curr_priv);
-		break;
-
-	case AGPIOC_SETUP:
-		ret_val = agpioc_setup_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_RESERVE:
-		ret_val = agpioc_reserve_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_PROTECT:
-		ret_val = agpioc_protect_wrap(curr_priv);
-		break;
-
-	case AGPIOC_ALLOCATE:
-		ret_val = agpioc_allocate_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_DEALLOCATE:
-		ret_val = agpioc_deallocate_wrap(curr_priv, (int) arg);
-		break;
-
-	case AGPIOC_BIND:
-		ret_val = agpioc_bind_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_UNBIND:
-		ret_val = agpioc_unbind_wrap(curr_priv, (void __user *) arg);
-		break;
-
-	case AGPIOC_CHIPSET_FLUSH:
-		break;
-	}
-
-ioctl_out:
-	DBG("ioctl returns %d\n", ret_val);
-	mutex_unlock(&(agp_fe.agp_mutex));
-	return ret_val;
-}
-
-static const struct file_operations agp_fops =
-{
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.unlocked_ioctl	= agp_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= compat_agp_ioctl,
-#endif
-	.mmap		= agp_mmap,
-	.open		= agp_open,
-	.release	= agp_release,
-};
-
-static struct miscdevice agp_miscdev =
-{
-	.minor	= AGPGART_MINOR,
-	.name	= "agpgart",
-	.fops	= &agp_fops
-};
-
-int agp_frontend_initialize(void)
-{
-	memset(&agp_fe, 0, sizeof(struct agp_front_data));
-	mutex_init(&(agp_fe.agp_mutex));
-
-	if (misc_register(&agp_miscdev)) {
-		printk(KERN_ERR PFX "unable to get minor: %d\n", AGPGART_MINOR);
-		return -EIO;
-	}
-	return 0;
-}
-
-void agp_frontend_cleanup(void)
-{
-	misc_deregister(&agp_miscdev);
-}
-- 
GitLab


From 94f8f319cbcbddce8f82bfaf8ed39eb57efdd457 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 22 Nov 2023 13:09:43 +0100
Subject: [PATCH 0827/2340] drm: Remove Kconfig option for legacy support
 (CONFIG_DRM_LEGACY)

Remove CONFIG_DRM_LEGACY from Kconfig. Nothing depends on the option.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: David Airlie <airlied@gmail.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-15-tzimmermann@suse.de
---
 drivers/gpu/drm/Kconfig | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b7abd436455f0..31cfe2c2a2afa 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -410,27 +410,6 @@ config DRM_HYPERV
 
 	 If M is selected the module will be called hyperv_drm.
 
-# Keep legacy drivers last
-
-menuconfig DRM_LEGACY
-	bool "Enable legacy drivers (DANGEROUS)"
-	depends on DRM && MMU
-	help
-	  Enable legacy DRI1 drivers. Those drivers expose unsafe and dangerous
-	  APIs to user-space, which can be used to circumvent access
-	  restrictions and other security measures. For backwards compatibility
-	  those drivers are still available, but their use is highly
-	  inadvisable and might harm your system.
-
-	  You are recommended to use the safe modeset-only drivers instead, and
-	  perform 3D emulation in user-space.
-
-	  Unless you have strong reasons to go rogue, say "N".
-
-if DRM_LEGACY
-# leave here to list legacy drivers
-endif # DRM_LEGACY
-
 config DRM_EXPORT_FOR_TESTS
 	bool
 
-- 
GitLab


From e843ca2f30e630675e2d2a75c96f4844f2854430 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 03:24:37 +0300
Subject: [PATCH 0828/2340] drm/msm/dpu: correct clk bit for WB2 block

On sc7280 there are two clk bits for WB2: vbif_cli and clk_ctrl. While
programming the VBIF params of WB, the driver should be toggling the
former bit, while the sc7180_mdp, sc7280_mdp and sm8250_mdp structs
list the latter one.

Correct that to ensure proper programming sequence for WB2 on these
platforms.

Fixes: 255f056181ac ("drm/msm/dpu: sc7180: add missing WB2 clock control")
Fixes: 3ce166380567 ("drm/msm/dpu: add writeback support for sc7280")
Fixes: 53324b99bd7b ("drm/msm/dpu: add writeback blocks to the sm8250 DPU catalog")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Tested-by: Paloma Arellano <quic_parellan@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570185/
Link: https://lore.kernel.org/r/20231203002437.1291595-1-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 2 +-
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h | 2 +-
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 9acf07108899b..2359c16e9206b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -31,7 +31,7 @@ static const struct dpu_mdp_cfg sm8250_mdp = {
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 },
 		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
-		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 	},
 };
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index 783269c6ead1e..bcfedfc8251af 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -24,7 +24,7 @@ static const struct dpu_mdp_cfg sc7180_mdp = {
 		[DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 },
-		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 	},
 };
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 93564e9e5fa55..209675de6742f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -24,7 +24,7 @@ static const struct dpu_mdp_cfg sc7280_mdp = {
 		[DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 },
-		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 	},
 };
 
-- 
GitLab


From a0fce84cb1b3b88d3d5853f7ac5f1a3ef7e38620 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:46 +0100
Subject: [PATCH 0829/2340] drm/plane-helper: Move
 drm_plane_helper_atomic_check() into udl

The udl driver is the only caller of drm_plane_helper_atomic_check().
Move the function into the driver. No functional changes.

v2:
	* fix documenation (Sui)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-2-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_crtc_helper.c  |  7 ++-----
 drivers/gpu/drm/drm_plane_helper.c | 32 ------------------------------
 drivers/gpu/drm/udl/udl_modeset.c  | 19 ++++++++++++++++--
 include/drm/drm_plane_helper.h     |  2 --
 4 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index a209659a996c7..2dafc39a27cb9 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -439,11 +439,8 @@ EXPORT_SYMBOL(drm_crtc_helper_set_mode);
  * @state: atomic state object
  *
  * Provides a default CRTC-state check handler for CRTCs that only have
- * one primary plane attached to it.
- *
- * This is often the case for the CRTC of simple framebuffers. See also
- * drm_plane_helper_atomic_check() for the respective plane-state check
- * helper function.
+ * one primary plane attached to it. This is often the case for the CRTC
+ * of simple framebuffers.
  *
  * RETURNS:
  * Zero on success, or an errno code otherwise.
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index 5e95089676ff8..7982be4b0306d 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -279,35 +279,3 @@ void drm_plane_helper_destroy(struct drm_plane *plane)
 	kfree(plane);
 }
 EXPORT_SYMBOL(drm_plane_helper_destroy);
-
-/**
- * drm_plane_helper_atomic_check() - Helper to check plane atomic-state
- * @plane: plane to check
- * @state: atomic state object
- *
- * Provides a default plane-state check handler for planes whose atomic-state
- * scale and positioning are not expected to change since the plane is always
- * a fullscreen scanout buffer.
- *
- * This is often the case for the primary plane of simple framebuffers. See
- * also drm_crtc_helper_atomic_check() for the respective CRTC-state check
- * helper function.
- *
- * RETURNS:
- * Zero on success, or an errno code otherwise.
- */
-int drm_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state)
-{
-	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane);
-	struct drm_crtc *new_crtc = new_plane_state->crtc;
-	struct drm_crtc_state *new_crtc_state = NULL;
-
-	if (new_crtc)
-		new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc);
-
-	return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state,
-						   DRM_PLANE_NO_SCALING,
-						   DRM_PLANE_NO_SCALING,
-						   false, false);
-}
-EXPORT_SYMBOL(drm_plane_helper_atomic_check);
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 40876bcdd79a4..7702359c90c22 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -21,7 +21,6 @@
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -261,6 +260,22 @@ static const uint64_t udl_primary_plane_fmtmods[] = {
 	DRM_FORMAT_MOD_INVALID
 };
 
+static int udl_primary_plane_helper_atomic_check(struct drm_plane *plane,
+						 struct drm_atomic_state *state)
+{
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+	struct drm_crtc *new_crtc = new_plane_state->crtc;
+	struct drm_crtc_state *new_crtc_state = NULL;
+
+	if (new_crtc)
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc);
+
+	return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state,
+						   DRM_PLANE_NO_SCALING,
+						   DRM_PLANE_NO_SCALING,
+						   false, false);
+}
+
 static void udl_primary_plane_helper_atomic_update(struct drm_plane *plane,
 						   struct drm_atomic_state *state)
 {
@@ -296,7 +311,7 @@ out_drm_gem_fb_end_cpu_access:
 
 static const struct drm_plane_helper_funcs udl_primary_plane_helper_funcs = {
 	DRM_GEM_SHADOW_PLANE_HELPER_FUNCS,
-	.atomic_check = drm_plane_helper_atomic_check,
+	.atomic_check = udl_primary_plane_helper_atomic_check,
 	.atomic_update = udl_primary_plane_helper_atomic_update,
 };
 
diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index 3a574e8cd22f4..75f9c4830564a 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -26,7 +26,6 @@
 
 #include <linux/types.h>
 
-struct drm_atomic_state;
 struct drm_crtc;
 struct drm_framebuffer;
 struct drm_modeset_acquire_ctx;
@@ -42,7 +41,6 @@ int drm_plane_helper_update_primary(struct drm_plane *plane, struct drm_crtc *cr
 int drm_plane_helper_disable_primary(struct drm_plane *plane,
 				     struct drm_modeset_acquire_ctx *ctx);
 void drm_plane_helper_destroy(struct drm_plane *plane);
-int drm_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state);
 
 /**
  * DRM_PLANE_NON_ATOMIC_FUNCS - Default plane functions for non-atomic drivers
-- 
GitLab


From bb8532601260209d1ee40c52d15e98578b703e47 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:47 +0100
Subject: [PATCH 0830/2340] drm/amdgpu: Do not include <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-3-tzimmermann@suse.de
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index aa43f1761acd3..b8c3a9b104a41 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -92,7 +92,6 @@
 #include <drm/drm_vblank.h>
 #include <drm/drm_audio_component.h>
 #include <drm/drm_gem_atomic_helper.h>
-#include <drm/drm_plane_helper.h>
 
 #include <acpi/video.h>
 
-- 
GitLab


From 85ddae2392b5673aa4bda3c7d14d205d1ed069fe Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:48 +0100
Subject: [PATCH 0831/2340] drm/loongson: Do not include
 <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Sui Jingfeng <suijingfeng@loongson.cn>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-4-tzimmermann@suse.de
---
 drivers/gpu/drm/loongson/lsdc_plane.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/loongson/lsdc_plane.c b/drivers/gpu/drm/loongson/lsdc_plane.c
index 0d50946332229..d227a2c1dcf16 100644
--- a/drivers/gpu/drm/loongson/lsdc_plane.c
+++ b/drivers/gpu/drm/loongson/lsdc_plane.c
@@ -9,7 +9,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
-#include <drm/drm_plane_helper.h>
 
 #include "lsdc_drv.h"
 #include "lsdc_regs.h"
-- 
GitLab


From 2887875256d486c0cbb544e67932526bd681e209 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:49 +0100
Subject: [PATCH 0832/2340] drm/shmobile: Do not include
 <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-5-tzimmermann@suse.de
---
 drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c
index 8f9a728affde8..07ad17d24294d 100644
--- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c
+++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c
@@ -14,7 +14,6 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_dma_helper.h>
-#include <drm/drm_plane_helper.h>
 
 #include "shmob_drm_drv.h"
 #include "shmob_drm_kms.h"
-- 
GitLab


From 9e8f373e8a77c5192532bab6ea267b329fe66b77 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:50 +0100
Subject: [PATCH 0833/2340] drm/solomon: Do not include
 <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-6-tzimmermann@suse.de
---
 drivers/gpu/drm/solomon/ssd130x.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/solomon/ssd130x.h b/drivers/gpu/drm/solomon/ssd130x.h
index acf7cedf0c1ab..075c5c3ee75ac 100644
--- a/drivers/gpu/drm/solomon/ssd130x.h
+++ b/drivers/gpu/drm/solomon/ssd130x.h
@@ -17,7 +17,6 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_plane_helper.h>
 
 #include <linux/regmap.h>
 
-- 
GitLab


From 7e661a06998e06455563b2ff6198c7f3efe31cf2 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:51 +0100
Subject: [PATCH 0834/2340] drm/ofdrm: Do not include <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-7-tzimmermann@suse.de
---
 drivers/gpu/drm/tiny/ofdrm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 05a72473cfc65..ab89b7fc7bf61 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -19,7 +19,6 @@
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
 
-- 
GitLab


From 81b32f4393cde612e022ff35b556b28001350d3b Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:52 +0100
Subject: [PATCH 0835/2340] drm/simpledrm: Do not include
 <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-8-tzimmermann@suse.de
---
 drivers/gpu/drm/tiny/simpledrm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 34bbbd7b53dd9..7ce1c46176750 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -25,7 +25,6 @@
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 
 #define DRIVER_NAME	"simpledrm"
-- 
GitLab


From aa5d7cf88bdebe5376970d79878f16d28536f7e2 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 4 Dec 2023 10:07:53 +0100
Subject: [PATCH 0836/2340] drm/xlnx: Do not include <drm/drm_plane_helper.h>

Remove unnecessary include statements for <drm/drm_plane_helper.h>.
The file contains helpers for non-atomic code and should not be
required by most drivers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-9-tzimmermann@suse.de
---
 drivers/gpu/drm/xlnx/zynqmp_kms.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xlnx/zynqmp_kms.c b/drivers/gpu/drm/xlnx/zynqmp_kms.c
index a7f8611be6f42..db3bb4afbfc46 100644
--- a/drivers/gpu/drm/xlnx/zynqmp_kms.c
+++ b/drivers/gpu/drm/xlnx/zynqmp_kms.c
@@ -27,7 +27,6 @@
 #include <drm/drm_managed.h>
 #include <drm/drm_mode_config.h>
 #include <drm/drm_plane.h>
-#include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
-- 
GitLab


From e9d5ae8a9e7e32d0b1bc582996de4f7180cb2ff2 Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Mon, 4 Dec 2023 15:13:37 +0000
Subject: [PATCH 0837/2340] drm/imagination: Removed unused functions in
 pvr_fw_trace

Fixing the warning below due to an unused file level vtable. Removing
only this causes additional warnings for the now unused functions, so
I've removed those too.

>> drivers/gpu/drm/imagination/pvr_fw_trace.c:205:37: warning: 'pvr_fw_trace_group_mask_fops' defined but not used [-Wunused-const-variable=]
     205 | static const struct file_operations pvr_fw_trace_group_mask_fops = {
         |                                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~

Changes since v1:
- Corrected hash in Fixes tag.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311302054.MVYPxFCE-lkp@intel.com/
Fixes: cb56cd610866 ("drm/imagination: Add firmware trace to debugfs")
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204151337.60930-1-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_fw_trace.c | 44 ----------------------
 1 file changed, 44 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c
index 87a42fb6ace68..30f41a10a0cf8 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_trace.c
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c
@@ -167,50 +167,6 @@ err_up_read:
 
 #if defined(CONFIG_DEBUG_FS)
 
-static int fw_trace_group_mask_show(struct seq_file *m, void *data)
-{
-	struct pvr_device *pvr_dev = m->private;
-
-	seq_printf(m, "%08x\n", pvr_dev->fw_dev.fw_trace.group_mask);
-
-	return 0;
-}
-
-static int fw_trace_group_mask_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, fw_trace_group_mask_show, inode->i_private);
-}
-
-static ssize_t fw_trace_group_mask_write(struct file *file, const char __user *ubuf, size_t len,
-					 loff_t *offp)
-{
-	struct seq_file *m = file->private_data;
-	struct pvr_device *pvr_dev = m->private;
-	u32 new_group_mask;
-	int err;
-
-	err = kstrtouint_from_user(ubuf, len, 0, &new_group_mask);
-	if (err)
-		return err;
-
-	err = update_logtype(pvr_dev, new_group_mask);
-	if (err)
-		return err;
-
-	pvr_dev->fw_dev.fw_trace.group_mask = new_group_mask;
-
-	return (ssize_t)len;
-}
-
-static const struct file_operations pvr_fw_trace_group_mask_fops = {
-	.owner = THIS_MODULE,
-	.open = fw_trace_group_mask_open,
-	.read = seq_read,
-	.write = fw_trace_group_mask_write,
-	.llseek = default_llseek,
-	.release = single_release,
-};
-
 struct pvr_fw_trace_seq_data {
 	/** @buffer: Pointer to copy of trace data. */
 	u32 *buffer;
-- 
GitLab


From 4b83b783ad778f7e69312fa61d1bee8e76e2156f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 4 Dec 2023 08:32:10 +0100
Subject: [PATCH 0838/2340] drm/imagination: move update_logtype() into ifdef
 section

This function is only used when debugfs is enabled, and otherwise
causes a build warning:

drivers/gpu/drm/imagination/pvr_fw_trace.c:135:1: error: 'update_logtype' defined but not used [-Werror=unused-function]

Move the #ifdef check to include this function as well.

Fixes: cb56cd610866 ("drm/imagination: Add firmware trace to debugfs")
Acked-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20231204073231.1164163-1-arnd@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/imagination/pvr_fw_trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c
index 30f41a10a0cf8..7159fc479001c 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_trace.c
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c
@@ -121,6 +121,8 @@ void pvr_fw_trace_fini(struct pvr_device *pvr_dev)
 	pvr_fw_object_unmap_and_destroy(fw_trace->tracebuf_ctrl_obj);
 }
 
+#if defined(CONFIG_DEBUG_FS)
+
 /**
  * update_logtype() - Send KCCB command to trigger FW to update logtype
  * @pvr_dev: Target PowerVR device
@@ -165,8 +167,6 @@ err_up_read:
 	return err;
 }
 
-#if defined(CONFIG_DEBUG_FS)
-
 struct pvr_fw_trace_seq_data {
 	/** @buffer: Pointer to copy of trace data. */
 	u32 *buffer;
-- 
GitLab


From 28d3d0696688154cc04983f343011d07bf0508e4 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Wed, 6 Dec 2023 18:05:15 +0300
Subject: [PATCH 0839/2340] drm/bridge: nxp-ptn3460: simplify some error
 checking

The i2c_master_send/recv() functions return negative error codes or
they return "len" on success.  So the error handling here can be written
as just normal checks for "if (ret < 0) return ret;".  No need to
complicate things.

Btw, in this code the "len" parameter can never be zero, but even if
it were, then I feel like this would still be the best way to write it.

Fixes: 914437992876 ("drm/bridge: nxp-ptn3460: fix i2c_master_send() error checking")
Suggested-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/04242630-42d8-4920-8c67-24ac9db6b3c9@moroto.mountain
---
 drivers/gpu/drm/bridge/nxp-ptn3460.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index 9b7eb8c669c15..7c0076e499533 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -54,15 +54,15 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr,
 	int ret;
 
 	ret = i2c_master_send(ptn_bridge->client, &addr, 1);
-	if (ret <= 0) {
+	if (ret < 0) {
 		DRM_ERROR("Failed to send i2c command, ret=%d\n", ret);
-		return ret ?: -EIO;
+		return ret;
 	}
 
 	ret = i2c_master_recv(ptn_bridge->client, buf, len);
-	if (ret != len) {
+	if (ret < 0) {
 		DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret);
-		return ret < 0 ? ret : -EIO;
+		return ret;
 	}
 
 	return 0;
@@ -78,9 +78,9 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr,
 	buf[1] = val;
 
 	ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf));
-	if (ret != ARRAY_SIZE(buf)) {
+	if (ret < 0) {
 		DRM_ERROR("Failed to send i2c command, ret=%d\n", ret);
-		return ret < 0 ? ret : -EIO;
+		return ret;
 	}
 
 	return 0;
-- 
GitLab


From fd2ef5fa3556549c565f5b7a07776d899a8ed8b7 Mon Sep 17 00:00:00 2001
From: Jiadong Zhu <Jiadong.Zhu@amd.com>
Date: Fri, 1 Dec 2023 08:38:15 +0800
Subject: [PATCH 0840/2340] drm/amdgpu: disable MCBP by default

Disable MCBP(mid command buffer preemption) by default as old Mesa
hangs with it. We shall not enable the feature that breaks old usermode
driver.

Fixes: 50a7c8765ca6 ("drm/amdgpu: enable mcbp by default on gfx9")
Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6c0cf64d465a7..d5b950fd1d852 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3861,10 +3861,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
 		adev->gfx.mcbp = true;
 	else if (amdgpu_mcbp == 0)
 		adev->gfx.mcbp = false;
-	else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
-		 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
-		 adev->gfx.num_gfx_rings)
-		adev->gfx.mcbp = true;
 
 	if (amdgpu_sriov_vf(adev))
 		adev->gfx.mcbp = true;
-- 
GitLab


From d5e78f1c2611e22204490b679d962d8f51762969 Mon Sep 17 00:00:00 2001
From: Bokun Zhang <bokun.zhang@amd.com>
Date: Wed, 29 Nov 2023 19:02:57 -0500
Subject: [PATCH 0841/2340] drm/amd/amdgpu: Move vcn4 fw_shared init to a
 single function

- Move VCN4's fw_shared initialization to a separated function.
  This way, the function can be reused at different locations.

Signed-off-by: Bokun Zhang <bokun.zhang@amd.com>
Reviewed-by: Emily Deng <Emily.Deng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 45 ++++++++++++++++-----------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 48bfcd0d558b7..28b7f96b42e59 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -100,6 +100,31 @@ static int vcn_v4_0_early_init(void *handle)
 	return amdgpu_vcn_early_init(adev);
 }
 
+static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+	fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+	fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+	fw_shared->sq.is_enabled = 1;
+
+	fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+	fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+		AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+	if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
+	    IP_VERSION(4, 0, 2)) {
+		fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+		fw_shared->drm_key_wa.method =
+			AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+	}
+
+	if (amdgpu_vcnfw_log)
+		amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+	return 0;
+}
+
 /**
  * vcn_v4_0_sw_init - sw init for VCN block
  *
@@ -124,8 +149,6 @@ static int vcn_v4_0_sw_init(void *handle)
 		return r;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
-
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
@@ -161,23 +184,7 @@ static int vcn_v4_0_sw_init(void *handle)
 		if (r)
 			return r;
 
-		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
-		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
-		fw_shared->sq.is_enabled = 1;
-
-		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
-		fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
-			AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
-
-		if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
-		    IP_VERSION(4, 0, 2)) {
-			fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
-			fw_shared->drm_key_wa.method =
-				AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
-		}
-
-		if (amdgpu_vcnfw_log)
-			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+		vcn_v4_0_fw_shared_init(adev, i);
 	}
 
 	if (amdgpu_sriov_vf(adev)) {
-- 
GitLab


From e57cd73f971194e94bc42d57b9fcb184c93a8754 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Fri, 1 Dec 2023 06:24:57 -0700
Subject: [PATCH 0842/2340] drm/amd/display: Optimize fast validation cases

Optimize fast validation cases to only validate the highest voltage
level. This works because during fast validation we only care if the
mode can be supported or not (at any vlevel).

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../dc/dml/dcn30/display_mode_vba_30.c        | 29 +++++++++++--------
 .../dc/resource/dcn30/dcn30_resource.c        |  2 ++
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 3686f1e7de3ab..63c48c29ba491 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -3542,7 +3542,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 {
 	struct vba_vars_st *v = &mode_lib->vba;
 	int MinPrefetchMode, MaxPrefetchMode;
-	int i;
+	int i, start_state;
 	unsigned int j, k, m;
 	bool   EnoughWritebackUnits = true;
 	bool   WritebackModeSupport = true;
@@ -3553,6 +3553,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
 
+	if (mode_lib->validate_max_state)
+		start_state = v->soc.num_states - 1;
+	else
+		start_state = 0;
+
 	CalculateMinAndMaxPrefetchMode(
 		mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
 		&MinPrefetchMode, &MaxPrefetchMode);
@@ -3851,7 +3856,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			v->SingleDPPViewportSizeSupportPerPlane,
 			&v->ViewportSizeSupport[0][0]);
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
 			v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
@@ -4007,7 +4012,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*Total Available Pipes Support Check*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
 				v->TotalAvailablePipesSupport[i][j] = true;
@@ -4046,7 +4051,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			v->RequiresDSC[i][k] = false;
 			v->RequiresFEC[i][k] = false;
@@ -4174,7 +4179,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			}
 		}
 	}
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		v->DIOSupport[i] = true;
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
@@ -4185,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	for (i = 0; i < v->soc.num_states; ++i) {
+	for (i = start_state; i < v->soc.num_states; ++i) {
 		v->ODMCombine4To1SupportCheckOK[i] = true;
 		for (k = 0; k < v->NumberOfActivePlanes; ++k) {
 			if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
@@ -4197,7 +4202,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		v->NotEnoughDSCUnits[i] = false;
 		v->TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -4217,7 +4222,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	/*DSC Delay per state*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			if (v->OutputBppPerState[i][k] == BPP_INVALID) {
 				v->BPP = 0.0;
@@ -4333,7 +4338,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
 	}
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 				v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
@@ -5075,7 +5080,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*PTE Buffer Size Check*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			v->PTEBufferSizeNotExceeded[i][j] = true;
 			for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -5136,7 +5141,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	/*Mode Support, Voltage State and SOC Configuration*/
 
-	for (i = v->soc.num_states - 1; i >= 0; i--) {
+	for (i = v->soc.num_states - 1; i >= start_state; i--) {
 		for (j = 0; j < 2; j++) {
 			if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
 					&& v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
@@ -5158,7 +5163,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	{
 		unsigned int MaximumMPCCombine = 0;
-		for (i = v->soc.num_states; i >= 0; i--) {
+		for (i = v->soc.num_states; i >= start_state; i--) {
 			if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
 				v->VoltageLevel = i;
 				v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 2b6dcb489b901..37a64186f3241 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -1682,6 +1682,7 @@ noinline bool dcn30_internal_validate_bw(
 		 * We don't actually support prefetch mode 2, so require that we
 		 * at least support prefetch mode 1.
 		 */
+		context->bw_ctx.dml.validate_max_state = fast_validate;
 		context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
 			dm_allow_self_refresh;
 
@@ -1691,6 +1692,7 @@ noinline bool dcn30_internal_validate_bw(
 			memset(merge, 0, sizeof(merge));
 			vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
 		}
+		context->bw_ctx.dml.validate_max_state = false;
 	}
 
 	dml_log_mode_support_params(&context->bw_ctx.dml);
-- 
GitLab


From 885c71ad791c1709f668a37f701d33e6872a902f Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Fri, 1 Dec 2023 06:24:58 -0700
Subject: [PATCH 0843/2340] drm/amd/display: initialize all the dpm level's
 stutter latency

Fix issue when override level bigger than default. Levels 5, 6, and 7
had zero stutter latency, this is because override level being
initialized after stutter latency inits.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Syed Hassan <syed.hassan@amd.com>
Reviewed-by: Allen Pan <allen.pan@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index fa8fe5bf7e575..255af7875c087 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -341,6 +341,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		break;
 	}
 
+	if (dml2->config.bbox_overrides.clks_table.num_states)
+			p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
+
 	/* Override from passed values, if available */
 	for (i = 0; i < p->in_states->num_states; i++) {
 		if (dml2->config.bbox_overrides.sr_exit_latency_us) {
@@ -397,7 +400,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 	}
 	/* Copy clocks tables entries, if available */
 	if (dml2->config.bbox_overrides.clks_table.num_states) {
-		p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) {
 			p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz;
-- 
GitLab


From dd2c5fac91d46df9dc1bf025ef23eff4704bd85f Mon Sep 17 00:00:00 2001
From: Relja Vojvodic <relja.vojvodic@amd.com>
Date: Fri, 1 Dec 2023 06:24:59 -0700
Subject: [PATCH 0844/2340] drm/amd/display: Add ODM check during pipe
 split/merge validation

[why]
When querying DML for a vlevel after pipes have been split or merged the
ODM policy would revert to a default policy, which could cause the query
to use the incorrect ODM status. In this case ODM 2to1 was validated,
but the last DML query would assume no ODM and return the incorrect
vlevel.

[how]
Added ODM check to apply the correct ODM policy before querying DML.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Relja Vojvodic <relja.vojvodic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/dcn32/dcn32_resource_helpers.c | 26 +++++++++++++++++++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  3 +++
 .../dc/resource/dcn32/dcn32_resource.h        |  2 ++
 3 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index 389ac7ae1154f..e8159a459bcef 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -806,3 +806,29 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 
 	return result;
 }
+
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context,
+		display_e2e_pipe_params_st *pipes)
+{
+	int i, pipe_cnt;
+	struct resource_context *res_ctx = &context->res_ctx;
+	struct pipe_ctx *pipe = NULL;
+
+	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+		int odm_slice_count = 0;
+
+		if (!res_ctx->pipe_ctx[i].stream)
+			continue;
+		pipe = &res_ctx->pipe_ctx[i];
+		odm_slice_count = resource_get_odm_slice_count(pipe);
+
+		if (odm_slice_count == 1)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+		else if (odm_slice_count == 2)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+		else if (odm_slice_count == 4)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+
+		pipe_cnt++;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 26411d4e97300..de209ca0cf8cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2192,6 +2192,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		int i;
 
 		pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+		dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
 
 		/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
 		 * we have to re-calculate the DET allocation and run through DML once more to
@@ -2200,7 +2201,9 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		 * */
 		context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
 					dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
 		vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
 		if (vlevel == context->bw_ctx.dml.soc.num_states) {
 			/* failed after DET size changes */
 			goto validate_fail;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index b2f20e6cfb38f..9ca799da1a56e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -193,6 +193,8 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context);
 
 bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel);
 
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes);
+
 /* definitions for run time init of reg offsets */
 
 /* CLK SRC */
-- 
GitLab


From fa745b554733ff0ed9ff918a0a53267300444c88 Mon Sep 17 00:00:00 2001
From: Michael Strauss <michael.strauss@amd.com>
Date: Fri, 1 Dec 2023 06:25:00 -0700
Subject: [PATCH 0845/2340] drm/amd/display: Only enumerate top local sink as
 DP2 output

[WHY]
Many DCN generations only have two HPO link encoders and therefore only
support driving a max of two DP2 PHYs. DP2 MST hubs currently can not
pass 3x display validation as each downstream sink is enumerated as
separate DP2 output.

[HOW]
Count MST hubs once by treating only 1st remote sink in topology as an
encoder.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Michael Strauss <michael.strauss@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 33eab80e89a84..6ba393e5b8ee7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -157,6 +157,15 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 {
 	/* If this assert is hit then we have a link encoder dynamic management issue */
 	ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
+
+	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
+	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
+			return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+				pipe_ctx->link_res.hpo_dp_link_enc &&
+				dc_is_dp_signal(pipe_ctx->stream->signal) &&
+				(pipe_ctx->stream->link->remote_sinks[0] == pipe_ctx->stream->sink));
+	}
+
 	return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
 		pipe_ctx->link_res.hpo_dp_link_enc &&
 		dc_is_dp_signal(pipe_ctx->stream->signal));
-- 
GitLab


From a546a27684407942604bccdf3b62f0765c0f6399 Mon Sep 17 00:00:00 2001
From: Xiaogang Chen <xiaogang.chen@amd.com>
Date: Fri, 1 Dec 2023 23:38:12 -0600
Subject: [PATCH 0846/2340] drm/amdkfd: Use partial migrations/mapping for
 GPU/CPU page faults in SVM

This patch implements partial migration/mapping for gpu/cpu page faults in SVM
according to migration granularity(default 2MB). A svm range may include pages
from both system ram and vram of one gpu now. These chagnes are expected to
improve migration performance and reduce mmu callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 146 ++++++++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 176 +++++++++++------------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |   9 +-
 4 files changed, 176 insertions(+), 159 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5c..b854cbf06dcee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
 	migrate_vma_pages(&migrate);
@@ -479,6 +479,8 @@ out:
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ out:
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			unsigned long start_mgr, unsigned long last_mgr,
 			struct mm_struct *mm, uint32_t trigger)
 {
 	unsigned long addr, start, end;
@@ -498,10 +501,10 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	unsigned long cpages = 0;
 	long r = 0;
 
-	if (prange->actual_loc == best_loc) {
-		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-			 prange->svms, prange->start, prange->last, best_loc);
-		return 0;
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+			 start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
 	}
 
 	node = svm_range_get_node_by_id(prange, best_loc);
@@ -510,18 +513,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 		return -ENODEV;
 	}
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-		 prange->start, prange->last, best_loc);
+	pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+		prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+		best_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	r = svm_range_vram_node_new(node, prange, true);
 	if (r) {
 		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
 		return r;
 	}
-	ttm_res_offset = prange->offset << PAGE_SHIFT;
+	ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -544,8 +548,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 
 	if (cpages) {
 		prange->actual_loc = best_loc;
-		svm_range_dma_unmap(prange);
-	} else {
+		prange->vram_pages = prange->vram_pages + cpages;
+	} else if (!prange->actual_loc) {
+		/* if no page migrated and all pages from prange are at
+		 * sys ram drop svm_bo got from svm_range_vram_node_new
+		 */
 		svm_range_vram_node_free(prange);
 	}
 
@@ -663,9 +670,8 @@ out_oom:
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
  */
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
@@ -676,6 +682,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 	uint64_t npages = (end - start) >> PAGE_SHIFT;
 	unsigned long upages = npages;
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	struct amdgpu_device *adev = node->adev;
 	struct kfd_process_device *pdd;
 	struct dma_fence *mfence = NULL;
@@ -725,10 +732,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
 				    scratch, npages);
@@ -751,17 +758,21 @@ out_free:
 	kvfree(buf);
 out:
 	if (!r && cpages) {
+		mpages = cpages - upages;
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
+			WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
 	}
-	return r ? r : upages;
+
+	return r ? r : mpages;
 }
 
 /**
  * svm_migrate_vram_to_ram - migrate svm range from device to system
  * @prange: range structure
  * @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
  * @trigger: reason of migration
  * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
  *
@@ -771,6 +782,7 @@ out:
  * 0 - OK, otherwise error code
  */
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    unsigned long start_mgr, unsigned long last_mgr,
 			    uint32_t trigger, struct page *fault_page)
 {
 	struct kfd_node *node;
@@ -778,26 +790,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 	unsigned long addr;
 	unsigned long start;
 	unsigned long end;
-	unsigned long upages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
+	/* this pragne has no any vram page to migrate to sys ram */
 	if (!prange->actual_loc) {
 		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 			 prange->start, prange->last);
 		return 0;
 	}
 
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+			 start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
+	}
+
 	node = svm_range_get_node_by_id(prange, prange->actual_loc);
 	if (!node) {
 		pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
 		return -ENODEV;
 	}
 	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
-		 prange->svms, prange, prange->start, prange->last,
+		 prange->svms, prange, start_mgr, last_mgr,
 		 prange->actual_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -816,14 +835,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 			pr_debug("failed %ld to migrate prange %p\n", r, prange);
 			break;
 		} else {
-			upages += r;
+			mpages += r;
 		}
 		addr = next;
 	}
 
-	if (r >= 0 && !upages) {
-		svm_range_vram_node_free(prange);
-		prange->actual_loc = 0;
+	if (r >= 0) {
+		prange->vram_pages -= mpages;
+
+		/* prange does not have vram page set its actual_loc to system
+		 * and drop its svm_bo ref
+		 */
+		if (prange->vram_pages == 0 && prange->ttm_res) {
+			prange->actual_loc = 0;
+			svm_range_vram_node_free(prange);
+		}
 	}
 
 	return r < 0 ? r : 0;
@@ -833,17 +859,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
  * svm_migrate_vram_to_vram - migrate svm range from device to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start: start page need be migrated to sys ram
+ * @last: last page need be migrated to sys ram
  * @mm: process mm, use current->mm if NULL
  * @trigger: reason of migration
  *
  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
  *
+ * migrate all vram pages in prange to sys ram, then migrate
+ * [start, last] pages from sys ram to gpu node best_loc.
+ *
  * Return:
  * 0 - OK, otherwise error code
  */
 static int
 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
-			 struct mm_struct *mm, uint32_t trigger)
+			unsigned long start, unsigned long last,
+			struct mm_struct *mm, uint32_t trigger)
 {
 	int r, retries = 3;
 
@@ -855,7 +887,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 
 	do {
-		r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
+		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
+					    trigger, NULL);
 		if (r)
 			return r;
 	} while (prange->actual_loc && --retries);
@@ -863,17 +896,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	if (prange->actual_loc)
 		return -EDEADLK;
 
-	return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
 }
 
 int
 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+		    unsigned long start, unsigned long last,
 		    struct mm_struct *mm, uint32_t trigger)
 {
-	if  (!prange->actual_loc)
-		return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	if  (!prange->actual_loc || prange->actual_loc == best_loc)
+		return svm_migrate_ram_to_vram(prange, best_loc, start, last,
+					       mm, trigger);
+
 	else
-		return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
+		return svm_migrate_vram_to_vram(prange, best_loc, start, last,
+						mm, trigger);
 
 }
 
@@ -889,10 +926,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
  */
 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 {
+	unsigned long start, last, size;
 	unsigned long addr = vmf->address;
 	struct svm_range_bo *svm_bo;
-	enum svm_work_list_ops op;
-	struct svm_range *parent;
 	struct svm_range *prange;
 	struct kfd_process *p;
 	struct mm_struct *mm;
@@ -929,51 +965,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 
 	mutex_lock(&p->svms.lock);
 
-	prange = svm_range_from_addr(&p->svms, addr, &parent);
+	prange = svm_range_from_addr(&p->svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
 		r = -EFAULT;
 		goto out_unlock_svms;
 	}
 
-	mutex_lock(&parent->migrate_mutex);
-	if (prange != parent)
-		mutex_lock_nested(&prange->migrate_mutex, 1);
+	mutex_lock(&prange->migrate_mutex);
 
 	if (!prange->actual_loc)
 		goto out_unlock_prange;
 
-	svm_range_lock(parent);
-	if (prange != parent)
-		mutex_lock_nested(&prange->lock, 1);
-	r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
-	if (prange != parent)
-		mutex_unlock(&prange->lock);
-	svm_range_unlock(parent);
-	if (r) {
-		pr_debug("failed %d to split range by granularity\n", r);
-		goto out_unlock_prange;
-	}
+	/* Align migration range start and size to granularity size */
+	size = 1UL << prange->granularity;
+	start = max(ALIGN_DOWN(addr, size), prange->start);
+	last = min(ALIGN(addr + 1, size) - 1, prange->last);
 
-	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
-				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
-				    vmf->page);
+	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last,
+				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);
 	if (r)
 		pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
-			 r, prange->svms, prange, prange->start, prange->last);
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && parent == prange)
-		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
-	else
-		op = SVM_OP_UPDATE_RANGE_NOTIFIER;
-	svm_range_add_list_work(&p->svms, parent, mm, op);
-	schedule_deferred_list_work(&p->svms);
+			r, prange->svms, prange, start, last);
 
 out_unlock_prange:
-	if (prange != parent)
-		mutex_unlock(&prange->migrate_mutex);
-	mutex_unlock(&parent->migrate_mutex);
+	mutex_unlock(&prange->migrate_mutex);
 out_unlock_svms:
 	mutex_unlock(&p->svms.lock);
 out_unref_process:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 487f263681648..2eebf67f9c2ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
 };
 
 int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+			unsigned long start, unsigned long last,
 			struct mm_struct *mm, uint32_t trigger);
+
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    unsigned long start, unsigned long last,
 			    uint32_t trigger, struct page *fault_page);
+
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 613f195102873..617d20f0380f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		      unsigned long offset, unsigned long npages,
-		      unsigned long *hmm_pfns, uint32_t gpuidx)
+		      unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
 {
 	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
 	dma_addr_t *addr = prange->dma_addr[gpuidx];
 	struct device *dev = adev->dev;
 	struct page *page;
+	uint64_t vram_pages_dev;
 	int i, r;
 
 	if (!addr) {
@@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		prange->dma_addr[gpuidx] = addr;
 	}
 
+	vram_pages_dev = 0;
 	addr += offset;
 	for (i = 0; i < npages; i++) {
 		if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
@@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		if (is_zone_device_page(page)) {
 			struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
 
+			vram_pages_dev++;
 			addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
 				   bo_adev->vm_manager.vram_base_offset -
 				   bo_adev->kfd.pgmap.range.start;
@@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));
 	}
+	*vram_pages = vram_pages_dev;
 	return 0;
 }
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		  unsigned long offset, unsigned long npages,
-		  unsigned long *hmm_pfns)
+		  unsigned long *hmm_pfns, uint64_t *vram_pages)
 {
 	struct kfd_process *p;
 	uint32_t gpuidx;
@@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		}
 
 		r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
-					  hmm_pfns, gpuidx);
+					  hmm_pfns, gpuidx, vram_pages);
 		if (r)
 			break;
 	}
@@ -349,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	INIT_LIST_HEAD(&prange->child_list);
 	atomic_set(&prange->invalid, 0);
 	prange->validate_timestamp = 0;
+	prange->vram_pages = 0;
 	mutex_init(&prange->migrate_mutex);
 	mutex_init(&prange->lock);
 
@@ -395,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref)
 			 prange->start, prange->last);
 		mutex_lock(&prange->lock);
 		prange->svm_bo = NULL;
+		/* prange should not hold vram page now */
+		WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
 		mutex_unlock(&prange->lock);
 
 		spin_lock(&svm_bo->list_lock);
@@ -975,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
 	new->svm_bo = svm_range_bo_ref(old->svm_bo);
 	new->ttm_res = old->ttm_res;
 
+	/* set new's vram_pages as old range's now, the acurate vram_pages
+	 * will be updated during mapping
+	 */
+	new->vram_pages = min(old->vram_pages, new->npages);
+
 	spin_lock(&new->svm_bo->list_lock);
 	list_add(&new->svm_bo_list, &new->svm_bo->range_list);
 	spin_unlock(&new->svm_bo->list_lock);
@@ -1135,66 +1147,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
 	list_add_tail(&pchild->child_list, &prange->child_list);
 }
 
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			       unsigned long addr, struct svm_range *parent,
-			       struct svm_range *prange)
-{
-	struct svm_range *head, *tail;
-	unsigned long start, last, size;
-	int r;
-
-	/* Align splited range start and size to granularity size, then a single
-	 * PTE will be used for whole range, this reduces the number of PTE
-	 * updated and the L1 TLB space used for translation.
-	 */
-	size = 1UL << prange->granularity;
-	start = ALIGN_DOWN(addr, size);
-	last = ALIGN(addr + 1, size) - 1;
-
-	pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
-		 prange->svms, prange->start, prange->last, start, last, size);
-
-	if (start > prange->start) {
-		r = svm_range_split(prange, start, prange->last, &head);
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
-	}
-
-	if (last < prange->last) {
-		r = svm_range_split(prange, prange->start, last, &tail);
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
-	}
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
-		prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
-		pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
-			 prange, prange->start, prange->last,
-			 SVM_OP_ADD_RANGE_AND_MAP);
-	}
-	return 0;
-}
 static bool
 svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
 {
@@ -1614,12 +1566,14 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+				      unsigned long map_start, unsigned long map_last,
 				      struct svm_range *prange, int32_t gpuidx,
 				      bool intr, bool wait, bool flush_tlb)
 {
 	struct svm_validate_context *ctx;
 	unsigned long start, end, addr;
 	struct kfd_process *p;
+	uint64_t vram_pages;
 	void *owner;
 	int32_t idx;
 	int r = 0;
@@ -1688,11 +1642,15 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 	}
 
+	vram_pages = 0;
 	start = prange->start << PAGE_SHIFT;
 	end = (prange->last + 1) << PAGE_SHIFT;
 	for (addr = start; !r && addr < end; ) {
 		struct hmm_range *hmm_range;
+		unsigned long map_start_vma;
+		unsigned long map_last_vma;
 		struct vm_area_struct *vma;
+		uint64_t vram_pages_vma;
 		unsigned long next = 0;
 		unsigned long offset;
 		unsigned long npages;
@@ -1721,9 +1679,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		if (!r) {
 			offset = (addr - start) >> PAGE_SHIFT;
 			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
-					      hmm_range->hmm_pfns);
+					      hmm_range->hmm_pfns, &vram_pages_vma);
 			if (r)
 				pr_debug("failed %d to dma map range\n", r);
+			else
+				vram_pages += vram_pages_vma;
 		}
 
 		svm_range_lock(prange);
@@ -1737,9 +1697,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			r = -EAGAIN;
 		}
 
-		if (!r)
-			r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-						  ctx->bitmap, wait, flush_tlb);
+		if (!r) {
+			map_start_vma = max(map_start, prange->start + offset);
+			map_last_vma = min(map_last, prange->start + offset + npages - 1);
+			if (map_start_vma <= map_last_vma) {
+				offset = map_start_vma - prange->start;
+				npages = map_last_vma - map_start_vma + 1;
+				r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+							  ctx->bitmap, wait, flush_tlb);
+			}
+		}
 
 		if (!r && next == end)
 			prange->mapped_to_gpu = true;
@@ -1749,6 +1716,19 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		addr = next;
 	}
 
+	if (addr == end) {
+		prange->vram_pages = vram_pages;
+
+		/* if prange does not include any vram page and it
+		 * has not released svm_bo drop its svm_bo reference
+		 * and set its actaul_loc to sys ram
+		 */
+		if (!vram_pages && prange->ttm_res) {
+			prange->actual_loc = 0;
+			svm_range_vram_node_free(prange);
+		}
+	}
+
 	svm_range_unreserve_bos(ctx);
 	if (!r)
 		prange->validate_timestamp = ktime_get_boottime();
@@ -1832,8 +1812,8 @@ static void svm_range_restore_work(struct work_struct *work)
 		 */
 		mutex_lock(&prange->migrate_mutex);
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       false, true, false);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, false, true, false);
 		if (r)
 			pr_debug("failed %d to map 0x%lx to gpus\n", r,
 				 prange->start);
@@ -2001,6 +1981,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
 	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
 	new->mapped_to_gpu = old->mapped_to_gpu;
+	new->vram_pages = old->vram_pages;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
@@ -2908,6 +2889,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			uint32_t vmid, uint32_t node_id,
 			uint64_t addr, bool write_fault)
 {
+	unsigned long start, last, size;
 	struct mm_struct *mm = NULL;
 	struct svm_range_list *svms;
 	struct svm_range *prange;
@@ -3043,40 +3025,44 @@ retry_write_locked:
 	kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
 				       write_fault, timestamp);
 
-	if (prange->actual_loc != best_loc) {
+	/* Align migration range start and size to granularity size */
+	size = 1UL << prange->granularity;
+	start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
+	last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
+	if (prange->actual_loc != 0 || best_loc != 0) {
 		migration = true;
+
 		if (best_loc) {
-			r = svm_migrate_to_vram(prange, best_loc, mm,
-					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
+			r = svm_migrate_to_vram(prange, best_loc, start, last,
+					mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
 			if (r) {
 				pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
 					 r, addr);
 				/* Fallback to system memory if migration to
 				 * VRAM failed
 				 */
-				if (prange->actual_loc)
-					r = svm_migrate_vram_to_ram(prange, mm,
-					   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-					   NULL);
+				if (prange->actual_loc && prange->actual_loc != best_loc)
+					r = svm_migrate_vram_to_ram(prange, mm, start, last,
+						KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
 				else
 					r = 0;
 			}
 		} else {
-			r = svm_migrate_vram_to_ram(prange, mm,
-					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-					NULL);
+			r = svm_migrate_vram_to_ram(prange, mm, start, last,
+					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
 		}
 		if (r) {
 			pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
-				 r, svms, prange->start, prange->last);
+				 r, svms, start, last);
 			goto out_unlock_range;
 		}
 	}
 
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+	r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+				       false, false);
 	if (r)
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-			 r, svms, prange->start, prange->last);
+			 r, svms, start, last);
 
 	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 				     migration);
@@ -3422,18 +3408,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
 	*migrated = false;
 	best_loc = svm_range_best_prefetch_location(prange);
 
-	if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
-	    best_loc == prange->actual_loc)
+	/* when best_loc is a gpu node and same as prange->actual_loc
+	 * we still need do migration as prange->actual_loc !=0 does
+	 * not mean all pages in prange are vram. hmm migrate will pick
+	 * up right pages during migration.
+	 */
+	if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
+	    (best_loc == 0 && prange->actual_loc == 0))
 		return 0;
 
 	if (!best_loc) {
-		r = svm_migrate_vram_to_ram(prange, mm,
+		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
 					KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
 		*migrated = !r;
 		return r;
 	}
 
-	r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
+	r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
+				mm, KFD_MIGRATE_TRIGGER_PREFETCH);
 	*migrated = !r;
 
 	return r;
@@ -3488,7 +3480,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 
 		mutex_lock(&prange->migrate_mutex);
 		do {
+			/* migrate all vram pages in this prange to sys ram
+			 * after that prange->actual_loc should be zero
+			 */
 			r = svm_migrate_vram_to_ram(prange, mm,
+					prange->start, prange->last,
 					KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
 		} while (!r && prange->actual_loc && --retries);
 
@@ -3612,8 +3608,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 
 		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true, flush_tlb);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d to map svm range\n", r);
 
@@ -3627,8 +3623,8 @@ out_unlock_range:
 		pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
 			 prange, prange->start, prange->last);
 		mutex_lock(&prange->migrate_mutex);
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true, prange->mapped_to_gpu);
+		r = svm_range_validate_and_map(mm,  prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
 		if (r)
 			pr_debug("failed %d on remap svm range\n", r);
 		mutex_unlock(&prange->migrate_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index c528df1d0ba2c..026863a0abcd3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -78,6 +78,7 @@ struct svm_work_list_item {
  * @update_list:link list node used to add to update_list
  * @mapping:    bo_va mapping structure to create and update GPU page table
  * @npages:     number of pages
+ * @vram_pages: vram pages number in this svm_range
  * @dma_addr:   dma mapping address on each GPU for system memory physical page
  * @ttm_res:    vram ttm resource map
  * @offset:     range start offset within mm_nodes
@@ -88,7 +89,9 @@ struct svm_work_list_item {
  * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
  * @perferred_loc: perferred location, 0 for CPU, or GPU id
  * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
- * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @actual_loc: this svm_range location. 0: all pages are from sys ram;
+ *              GPU id: this svm_range may include vram pages from GPU with
+ *              id actual_loc.
  * @granularity:migration granularity, log2 num pages
  * @invalid:    not 0 means cpu page table is invalidated
  * @validate_timestamp: system timestamp when range is validated
@@ -112,6 +115,7 @@ struct svm_range {
 	struct list_head		list;
 	struct list_head		update_list;
 	uint64_t			npages;
+	uint64_t			vram_pages;
 	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];
 	struct ttm_resource		*ttm_res;
 	uint64_t			offset;
@@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
 int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 			    bool clear);
 void svm_range_vram_node_free(struct svm_range *prange);
-int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			       unsigned long addr, struct svm_range *parent,
-			       struct svm_range *prange);
 int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			    uint32_t vmid, uint32_t node_id, uint64_t addr,
 			    bool write_fault);
-- 
GitLab


From 62fbfdbbe3a1f188a6310d9418956b918840cd33 Mon Sep 17 00:00:00 2001
From: Daniel Miess <daniel.miess@amd.com>
Date: Fri, 1 Dec 2023 06:25:01 -0700
Subject: [PATCH 0847/2340] drm/amd/display: Add missing dcn35 RCO registers

[Why]
Some registers needed for root clock gating in dcn35 are not defined in
the dccg header.

[How]
Add the needed registers and temporarily disable some register writes
that are now taking place successfully until the registers can be
properly enabled.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Daniel Miess <daniel.miess@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h | 32 ++++++++++
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 62 ++++++++++++++++++-
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h | 51 +++++++++++++++
 3 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
index 76da59d8caaf5..ef5c22f41563d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
@@ -296,6 +296,38 @@
 	type DTBCLK_P1_GATE_DISABLE;\
 	type DTBCLK_P2_GATE_DISABLE;\
 	type DTBCLK_P3_GATE_DISABLE;\
+	type DSCCLK0_ROOT_GATE_DISABLE;\
+	type DSCCLK1_ROOT_GATE_DISABLE;\
+	type DSCCLK2_ROOT_GATE_DISABLE;\
+	type DSCCLK3_ROOT_GATE_DISABLE;\
+	type SYMCLKA_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKB_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKC_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKD_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKE_FE_ROOT_GATE_DISABLE;\
+	type DPPCLK0_ROOT_GATE_DISABLE;\
+	type DPPCLK1_ROOT_GATE_DISABLE;\
+	type DPPCLK2_ROOT_GATE_DISABLE;\
+	type DPPCLK3_ROOT_GATE_DISABLE;\
+	type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\
+	type SYMCLKA_ROOT_GATE_DISABLE;\
+	type SYMCLKB_ROOT_GATE_DISABLE;\
+	type SYMCLKC_ROOT_GATE_DISABLE;\
+	type SYMCLKD_ROOT_GATE_DISABLE;\
+	type SYMCLKE_ROOT_GATE_DISABLE;\
+	type PHYA_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYB_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYC_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYD_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYE_REFCLK_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK0_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK1_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK2_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK3_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK0_GATE_DISABLE;\
+	type DPSTREAMCLK1_GATE_DISABLE;\
+	type DPSTREAMCLK2_GATE_DISABLE;\
+	type DPSTREAMCLK3_GATE_DISABLE;\
 
 struct dccg_shift {
 	DCCG_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index 142efd390d862..f1ba7bb792ea2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -506,6 +506,64 @@ static void dccg35_dpp_root_clock_control(
 	dccg->dpp_clock_gated[dpp_inst] = !clock_on;
 }
 
+static void dccg35_disable_symclk32_se(
+		struct dccg *dccg,
+		int hpo_se_inst)
+{
+	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+	/* set refclk as the source for symclk32_se */
+	switch (hpo_se_inst) {
+	case 0:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE0_SRC_SEL, 0,
+				SYMCLK32_SE0_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE0_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
+		}
+		break;
+	case 1:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE1_SRC_SEL, 0,
+				SYMCLK32_SE1_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE1_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
+		}
+		break;
+	case 2:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE2_SRC_SEL, 0,
+				SYMCLK32_SE2_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE2_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
+		}
+		break;
+	case 3:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE3_SRC_SEL, 0,
+				SYMCLK32_SE3_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE3_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
+		}
+		break;
+	default:
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+}
+
 void dccg35_init(struct dccg *dccg)
 {
 	int otg_inst;
@@ -514,7 +572,7 @@ void dccg35_init(struct dccg *dccg)
 	 * will cause DCN to hang.
 	 */
 	for (otg_inst = 0; otg_inst < 4; otg_inst++)
-		dccg31_disable_symclk32_se(dccg, otg_inst);
+		dccg35_disable_symclk32_se(dccg, otg_inst);
 
 	if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
 		for (otg_inst = 0; otg_inst < 2; otg_inst++)
@@ -788,7 +846,7 @@ static const struct dccg_funcs dccg35_funcs = {
 	.dccg_init = dccg35_init,
 	.set_dpstreamclk = dccg35_set_dpstreamclk,
 	.enable_symclk32_se = dccg31_enable_symclk32_se,
-	.disable_symclk32_se = dccg31_disable_symclk32_se,
+	.disable_symclk32_se = dccg35_disable_symclk32_se,
 	.enable_symclk32_le = dccg31_enable_symclk32_le,
 	.disable_symclk32_le = dccg31_disable_symclk32_le,
 	.set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
index bde48bee0119b..1586a45ca3bd4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
@@ -34,6 +34,7 @@
 #define DCCG_REG_LIST_DCN35() \
 	DCCG_REG_LIST_DCN314(),\
 	SR(DPPCLK_CTRL),\
+	SR(DCCG_GATE_DISABLE_CNTL4),\
 	SR(DCCG_GATE_DISABLE_CNTL5),\
 	SR(DCCG_GATE_DISABLE_CNTL6),\
 	SR(DCCG_GLOBAL_FGCG_REP_CNTL),\
@@ -180,6 +181,56 @@
 	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
 	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
 	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\
+	DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
 
 struct dccg *dccg35_create(
 		struct dc_context *ctx,
-- 
GitLab


From b17ef04bf3a4346d66404454d6a646343ddc9749 Mon Sep 17 00:00:00 2001
From: Lewis Huang <lewis.huang@amd.com>
Date: Fri, 1 Dec 2023 06:25:02 -0700
Subject: [PATCH 0848/2340] drm/amd/display: Pass pwrseq inst for backlight and
 ABM

[Why]
OTG inst and pwrseq inst mapping is not align therefore we cannot use
otg_inst as pwrseq inst to get DCIO register.

[How]
1. Pass the correct pwrseq instance to dmub when set abm pipe.
2. LVTMA control index change from panel_inst to pwrseq_inst.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Phil Hsieh <phil.hsieh@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Lewis Huang <lewis.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/bios/bios_parser2.c    |  4 +-
 .../drm/amd/display/dc/bios/command_table2.c  | 12 ++--
 .../drm/amd/display/dc/bios/command_table2.h  |  2 +-
 .../gpu/drm/amd/display/dc/dc_bios_types.h    |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c |  8 ++-
 .../gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c |  7 ++-
 .../gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h |  2 +-
 .../amd/display/dc/dcn31/dcn31_panel_cntl.c   |  5 +-
 .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 16 ++---
 .../amd/display/dc/hwss/dcn21/dcn21_hwseq.c   | 36 ++++++++---
 drivers/gpu/drm/amd/display/dc/inc/hw/abm.h   |  3 +-
 .../drm/amd/display/dc/inc/hw/panel_cntl.h    |  2 +
 .../drm/amd/display/dc/link/link_factory.c    | 59 +++++++++++++------
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 14 ++++-
 14 files changed, 119 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 7cdb1a8a0ba06..2fb804eb5702e 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1691,7 +1691,7 @@ static enum bp_result bios_parser_enable_disp_power_gating(
 static enum bp_result bios_parser_enable_lvtma_control(
 	struct dc_bios *dcb,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 	struct bios_parser *bp = BP_FROM_DCB(dcb);
@@ -1699,7 +1699,7 @@ static enum bp_result bios_parser_enable_lvtma_control(
 	if (!bp->cmd_tbl.enable_lvtma_control)
 		return BP_RESULT_FAILURE;
 
-	return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, bypass_panel_control_wait);
+	return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait);
 }
 
 static bool bios_parser_is_accelerated_mode(
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 90a02d7bd3da3..ab0adabf9dd4c 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -976,7 +976,7 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id)
 static enum bp_result enable_lvtma_control(
 	struct bios_parser *bp,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait);
 
 static void init_enable_lvtma_control(struct bios_parser *bp)
@@ -989,7 +989,7 @@ static void init_enable_lvtma_control(struct bios_parser *bp)
 static void enable_lvtma_control_dmcub(
 	struct dc_dmub_srv *dmcub,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 
@@ -1002,8 +1002,8 @@ static void enable_lvtma_control_dmcub(
 			DMUB_CMD__VBIOS_LVTMA_CONTROL;
 	cmd.lvtma_control.data.uc_pwr_action =
 			uc_pwr_on;
-	cmd.lvtma_control.data.panel_inst =
-			panel_instance;
+	cmd.lvtma_control.data.pwrseq_inst =
+			pwrseq_instance;
 	cmd.lvtma_control.data.bypass_panel_control_wait =
 			bypass_panel_control_wait;
 	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
@@ -1012,7 +1012,7 @@ static void enable_lvtma_control_dmcub(
 static enum bp_result enable_lvtma_control(
 	struct bios_parser *bp,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 	enum bp_result result = BP_RESULT_FAILURE;
@@ -1021,7 +1021,7 @@ static enum bp_result enable_lvtma_control(
 	    bp->base.ctx->dc->debug.dmub_command_table) {
 		enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
 				uc_pwr_on,
-				panel_instance,
+				pwrseq_instance,
 				bypass_panel_control_wait);
 		return BP_RESULT_OK;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index b6d09bf6cf72b..41c8c014397f2 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -96,7 +96,7 @@ struct cmd_tbl {
 			struct bios_parser *bp, uint8_t id);
 	enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
 			uint8_t uc_pwr_on,
-			uint8_t panel_instance,
+			uint8_t pwrseq_instance,
 			uint8_t bypass_panel_control_wait);
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index be9aa1a71847d..26940d94d8fb4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -140,7 +140,7 @@ struct dc_vbios_funcs {
 	enum bp_result (*enable_lvtma_control)(
 		struct dc_bios *bios,
 		uint8_t uc_pwr_on,
-		uint8_t panel_instance,
+		uint8_t pwrseq_instance,
 		uint8_t bypass_panel_control_wait);
 
 	enum bp_result (*get_soc_bb_info)(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index d3e6544022b78..930fd929e93a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -145,7 +145,11 @@ static bool dmub_abm_save_restore_ex(
 	return ret;
 }
 
-static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe_ex(struct abm *abm,
+		uint32_t otg_inst,
+		uint32_t option,
+		uint32_t panel_inst,
+		uint32_t pwrseq_inst)
 {
 	bool ret = false;
 	unsigned int feature_support;
@@ -153,7 +157,7 @@ static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t op
 	feature_support = abm_feature_support(abm, panel_inst);
 
 	if (feature_support == ABM_LCD_SUPPORT)
-		ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst);
+		ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 592a8f7a1c6d0..42c802afc4681 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -254,7 +254,11 @@ bool dmub_abm_save_restore(
 	return true;
 }
 
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+bool dmub_abm_set_pipe(struct abm *abm,
+		uint32_t otg_inst,
+		uint32_t option,
+		uint32_t panel_inst,
+		uint32_t pwrseq_inst)
 {
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = abm->ctx;
@@ -264,6 +268,7 @@ bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint
 	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
 	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
 	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
 	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
index 853564d7f4714..07ea6c8d414f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
@@ -44,7 +44,7 @@ bool dmub_abm_save_restore(
 		struct dc_context *dc,
 		unsigned int panel_inst,
 		struct abm_save_restore *pData);
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst);
+bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst);
 bool dmub_abm_set_backlight_level(struct abm *abm,
 		unsigned int backlight_pwm_u16_16,
 		unsigned int frame_ramp,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 217acd4e292a3..d849b1eaa4a5c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -50,7 +50,7 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub
 	cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
 	cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO;
 	cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data);
-	cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+	cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 
 	return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
@@ -78,7 +78,7 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
 	cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
 	cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT;
 	cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
-	cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+	cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 	cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL;
 	cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL;
 	cmd.panel_cntl.data.bl_pwm_ref_div1 =
@@ -157,4 +157,5 @@ void dcn31_panel_cntl_construct(
 	dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
 	dcn31_panel_cntl->base.ctx = init_data->ctx;
 	dcn31_panel_cntl->base.inst = init_data->inst;
+	dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 0a331d17ee926..c73fe5e9b3610 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -790,7 +790,7 @@ void dce110_edp_power_control(
 	struct dc_context *ctx = link->ctx;
 	struct bp_transmitter_control cntl = { 0 };
 	enum bp_result bp_result;
-	uint8_t panel_instance;
+	uint8_t pwrseq_instance;
 
 
 	if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
@@ -873,7 +873,7 @@ void dce110_edp_power_control(
 		cntl.coherent = false;
 		cntl.lanes_number = LANE_COUNT_FOUR;
 		cntl.hpd_sel = link->link_enc->hpd_source;
-		panel_instance = link->panel_cntl->inst;
+		pwrseq_instance = link->panel_cntl->pwrseq_inst;
 
 		if (ctx->dc->ctx->dmub_srv &&
 				ctx->dc->debug.dmub_command_table) {
@@ -881,11 +881,11 @@ void dce110_edp_power_control(
 			if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) {
 				bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 						LVTMA_CONTROL_POWER_ON,
-						panel_instance, link->link_powered_externally);
+						pwrseq_instance, link->link_powered_externally);
 			} else {
 				bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 						LVTMA_CONTROL_POWER_OFF,
-						panel_instance, link->link_powered_externally);
+						pwrseq_instance, link->link_powered_externally);
 			}
 		}
 
@@ -956,7 +956,7 @@ void dce110_edp_backlight_control(
 {
 	struct dc_context *ctx = link->ctx;
 	struct bp_transmitter_control cntl = { 0 };
-	uint8_t panel_instance;
+	uint8_t pwrseq_instance;
 	unsigned int pre_T11_delay = OLED_PRE_T11_DELAY;
 	unsigned int post_T7_delay = OLED_POST_T7_DELAY;
 
@@ -1009,7 +1009,7 @@ void dce110_edp_backlight_control(
 	 */
 	/* dc_service_sleep_in_milliseconds(50); */
 		/*edp 1.2*/
-	panel_instance = link->panel_cntl->inst;
+	pwrseq_instance = link->panel_cntl->pwrseq_inst;
 
 	if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
 		if (!link->dc->config.edp_no_power_sequencing)
@@ -1034,11 +1034,11 @@ void dce110_edp_backlight_control(
 		if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
 			ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 					LVTMA_CONTROL_LCD_BLON,
-					panel_instance, link->link_powered_externally);
+					pwrseq_instance, link->link_powered_externally);
 		else
 			ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 					LVTMA_CONTROL_LCD_BLOFF,
-					panel_instance, link->link_powered_externally);
+					pwrseq_instance, link->link_powered_externally);
 	}
 
 	link_transmitter_control(ctx->dc_bios, &cntl);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 467812cf33686..08783ad097d21 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -137,7 +137,8 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->stream->dpms_off = true;
 }
 
-static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst,
+		uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst)
 {
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = abm->ctx;
@@ -147,6 +148,7 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t optio
 	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
 	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
 	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
 	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
@@ -179,7 +181,6 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	struct abm *abm = pipe_ctx->stream_res.abm;
 	uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
 	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
 	struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
 
 	if (dmcu) {
@@ -190,9 +191,13 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	if (abm && panel_cntl) {
 		if (abm->funcs && abm->funcs->set_pipe_ex) {
 			abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE,
-			panel_cntl->inst);
+					panel_cntl->inst, panel_cntl->pwrseq_inst);
 		} else {
-			dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst);
+				dmub_abm_set_pipe(abm,
+						otg_inst,
+						SET_ABM_PIPE_IMMEDIATELY_DISABLE,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 		}
 		panel_cntl->funcs->store_backlight_level(panel_cntl);
 	}
@@ -212,9 +217,16 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
 
 	if (abm && panel_cntl) {
 		if (abm->funcs && abm->funcs->set_pipe_ex) {
-			abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+			abm->funcs->set_pipe_ex(abm,
+					otg_inst,
+					SET_ABM_PIPE_NORMAL,
+					panel_cntl->inst,
+					panel_cntl->pwrseq_inst);
 		} else {
-			dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+				dmub_abm_set_pipe(abm, otg_inst,
+						SET_ABM_PIPE_NORMAL,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 		}
 	}
 }
@@ -237,9 +249,17 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
 
 		if (abm && panel_cntl) {
 			if (abm->funcs && abm->funcs->set_pipe_ex) {
-				abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+				abm->funcs->set_pipe_ex(abm,
+						otg_inst,
+						SET_ABM_PIPE_NORMAL,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 			} else {
-				dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+					dmub_abm_set_pipe(abm,
+							otg_inst,
+							SET_ABM_PIPE_NORMAL,
+							panel_cntl->inst,
+							panel_cntl->pwrseq_inst);
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 33db15d69f233..9f521cf0fc5a2 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -64,7 +64,8 @@ struct abm_funcs {
 	bool (*set_pipe_ex)(struct abm *abm,
 			unsigned int otg_inst,
 			unsigned int option,
-			unsigned int panel_inst);
+			unsigned int panel_inst,
+			unsigned int pwrseq_inst);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
index 24af9d80b9373..248adc1705e35 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -56,12 +56,14 @@ struct panel_cntl_funcs {
 struct panel_cntl_init_data {
 	struct dc_context *ctx;
 	uint32_t inst;
+	uint32_t pwrseq_inst;
 };
 
 struct panel_cntl {
 	const struct panel_cntl_funcs *funcs;
 	struct dc_context *ctx;
 	uint32_t inst;
+	uint32_t pwrseq_inst;
 	/* registers setting needs to be saved and restored at InitBacklight */
 	struct panel_cntl_backlight_registers stored_backlight_registers;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 6b306ea58b9bc..5464d8d26bd37 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -369,6 +369,30 @@ static enum transmitter translate_encoder_to_transmitter(
 	}
 }
 
+static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link)
+{
+	uint8_t pwrseq_inst = 0xF;
+	struct dc_context *dc_ctx = link->dc->ctx;
+
+	DC_LOGGER_INIT(dc_ctx->logger);
+
+	switch (link->eng_id) {
+	case ENGINE_ID_DIGA:
+		pwrseq_inst = 0;
+		break;
+	case ENGINE_ID_DIGB:
+		pwrseq_inst = 1;
+		break;
+	default:
+		DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id);
+		ASSERT(false);
+		break;
+	}
+
+	return pwrseq_inst;
+}
+
+
 static void link_destruct(struct dc_link *link)
 {
 	int i;
@@ -596,24 +620,6 @@ static bool construct_phy(struct dc_link *link,
 	link->ddc_hw_inst =
 		dal_ddc_get_line(get_ddc_pin(link->ddc));
 
-
-	if (link->dc->res_pool->funcs->panel_cntl_create &&
-		(link->link_id.id == CONNECTOR_ID_EDP ||
-			link->link_id.id == CONNECTOR_ID_LVDS)) {
-		panel_cntl_init_data.ctx = dc_ctx;
-		panel_cntl_init_data.inst =
-			panel_cntl_init_data.ctx->dc_edp_id_count;
-		link->panel_cntl =
-			link->dc->res_pool->funcs->panel_cntl_create(
-								&panel_cntl_init_data);
-		panel_cntl_init_data.ctx->dc_edp_id_count++;
-
-		if (link->panel_cntl == NULL) {
-			DC_ERROR("Failed to create link panel_cntl!\n");
-			goto panel_cntl_create_fail;
-		}
-	}
-
 	enc_init_data.ctx = dc_ctx;
 	bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0,
 			      &enc_init_data.encoder);
@@ -644,6 +650,23 @@ static bool construct_phy(struct dc_link *link,
 	link->dc->res_pool->dig_link_enc_count++;
 
 	link->link_enc_hw_inst = link->link_enc->transmitter;
+
+	if (link->dc->res_pool->funcs->panel_cntl_create &&
+		(link->link_id.id == CONNECTOR_ID_EDP ||
+			link->link_id.id == CONNECTOR_ID_LVDS)) {
+		panel_cntl_init_data.ctx = dc_ctx;
+		panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count;
+		panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link);
+		link->panel_cntl =
+			link->dc->res_pool->funcs->panel_cntl_create(
+								&panel_cntl_init_data);
+		panel_cntl_init_data.ctx->dc_edp_id_count++;
+
+		if (link->panel_cntl == NULL) {
+			DC_ERROR("Failed to create link panel_cntl!\n");
+			goto panel_cntl_create_fail;
+		}
+	}
 	for (i = 0; i < 4; i++) {
 		if (bp_funcs->get_device_tag(dc_ctx->dc_bios,
 					     link->link_id, i,
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index a365f6c096de8..3c092064c72ec 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -3498,6 +3498,16 @@ struct dmub_cmd_abm_set_pipe_data {
 	 * TODO: Remove.
 	 */
 	uint8_t ramping_boundary;
+
+	/**
+	 * PwrSeq HW Instance.
+	 */
+	uint8_t pwrseq_inst;
+
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad[3];
 };
 
 /**
@@ -3878,7 +3888,7 @@ enum dmub_cmd_panel_cntl_type {
  * struct dmub_cmd_panel_cntl_data - Panel control data.
  */
 struct dmub_cmd_panel_cntl_data {
-	uint32_t inst; /**< panel instance */
+	uint32_t pwrseq_inst; /**< pwrseq instance */
 	uint32_t current_backlight; /* in/out */
 	uint32_t bl_pwm_cntl; /* in/out */
 	uint32_t bl_pwm_period_cntl; /* in/out */
@@ -3937,7 +3947,7 @@ struct dmub_cmd_lvtma_control_data {
 	uint8_t uc_pwr_action; /**< LVTMA_ACTION */
 	uint8_t bypass_panel_control_wait;
 	uint8_t reserved_0[2]; /**< For future use */
-	uint8_t panel_inst; /**< LVTMA control instance */
+	uint8_t pwrseq_inst; /**< LVTMA control instance */
 	uint8_t reserved_1[3]; /**< For future use */
 };
 
-- 
GitLab


From d5df648ec830cfd775bdacb3a3640c1e16de90f2 Mon Sep 17 00:00:00 2001
From: Krunoslav Kovac <krunoslav.kovac@amd.com>
Date: Fri, 1 Dec 2023 06:25:03 -0700
Subject: [PATCH 0849/2340] drm/amd/display: Change dither policy for 10bpc to
 round

We use spatial dither by default for all output bpc (6/8/10). While it
makes some sense for FP16, for ARGB2101010 surfaces it makes little
sense as even if we skip color pipeline to preserve bit accuracy,
spatial dither adds random noise so a few percent pixels are 1 bit off.
This commit chages the 10bpc out dither policy to rounding.

Also, in Polaris/Vega times, policy used to be round for 10bpc out; it
looks like it got inadvertently changed for Navi. Difference is only
detectable with capture cards.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Reviewed-by: Anthony Koo <anthony.koo@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Krunoslav Kovac <krunoslav.kovac@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index e1c02527d04ae..4382d9ae4292d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -4773,7 +4773,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 			option = DITHER_OPTION_SPATIAL8;
 			break;
 		case COLOR_DEPTH_101010:
-			option = DITHER_OPTION_SPATIAL10;
+			option = DITHER_OPTION_TRUN10;
 			break;
 		default:
 			option = DITHER_OPTION_DISABLE;
@@ -4799,6 +4799,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 			option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
 		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
 		fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
+		if (option == DITHER_OPTION_TRUN10)
+			fmt_bit_depth->flags.TRUNCATE_MODE = 1;
 	}
 
 	/* special case - Formatter can only reduce by 4 bits at most.
-- 
GitLab


From ab779466166348eecf17d20f620aa9a47965c934 Mon Sep 17 00:00:00 2001
From: Josip Pavic <josip.pavic@amd.com>
Date: Fri, 1 Dec 2023 06:25:04 -0700
Subject: [PATCH 0850/2340] drm/amd/display: Increase scratch buffer size

[Why]
Larger data blocks are expected to be transferred between driver and FW
in the future.

[How]
Embiggen the scratch buffer to a cromulent size.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Josip Pavic <josip.pavic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 38360adc53d97..a329a6ecf4dd6 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -64,7 +64,7 @@
 
 
 /* Default scratch mem size. */
-#define DMUB_SCRATCH_MEM_SIZE (256)
+#define DMUB_SCRATCH_MEM_SIZE (1024)
 
 /* Number of windows in use. */
 #define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL)
-- 
GitLab


From 23cf5a5cd33a518b6bdbe9966dc49f1cf6bfe532 Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Fri, 1 Dec 2023 06:25:05 -0700
Subject: [PATCH 0851/2340] drm/amd/display: insert drv-pmfw log + rollback to
 new context

Rollback to new context for active display: this was previous tested
sequence. Avoid to do OTG master toggle is no active display at all,
this w/a was for fifo err.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Chris Park <chris.park@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  | 27 ++++++------
 .../amd/display/dc/clk_mgr/dcn35/dcn35_smu.c  | 43 ++++++++++++++-----
 2 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index d5fde7d23fbf8..9e0fa01ecb795 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -80,12 +80,12 @@
 
 static int dcn35_get_active_display_cnt_wa(
 		struct dc *dc,
-		struct dc_state *context)
+		struct dc_state *context,
+		int *all_active_disps)
 {
-	int i, display_count;
+	int i, display_count = 0;
 	bool tmds_present = false;
 
-	display_count = 0;
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_stream_state *stream = context->streams[i];
 
@@ -103,7 +103,8 @@ static int dcn35_get_active_display_cnt_wa(
 				link->link_enc->funcs->is_dig_enabled(link->link_enc))
 			display_count++;
 	}
-
+	if (all_active_disps != NULL)
+		*all_active_disps = display_count;
 	/* WA for hang on HDMI after display off back on*/
 	if (display_count == 0 && tmds_present)
 		display_count = 1;
@@ -224,15 +225,16 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 	struct dc *dc = clk_mgr_base->ctx->dc;
-	int display_count;
+	int display_count = 0;
 	bool update_dppclk = false;
 	bool update_dispclk = false;
 	bool dpp_clock_lowered = false;
+	int all_active_disps = 0;
 
 	if (dc->work_arounds.skip_clock_update)
 		return;
 
-	/* DTBCLK is fixed, so set a default if unspecified. */
+	display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
 	if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
 		new_clocks->ref_dtbclk_khz = 600000;
 
@@ -254,7 +256,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 		}
 		/* check that we're not already in lower */
 		if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
-			display_count = dcn35_get_active_display_cnt_wa(dc, context);
 			/* if we can go lower, go lower */
 			if (display_count == 0)
 				clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -311,11 +312,13 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-		dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
-
 		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
-		dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-		dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
+		if (all_active_disps != 0) {
+			dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+			dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
+			dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
+		} else
+			dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
 
 		update_dispclk = true;
 	}
@@ -826,7 +829,7 @@ static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base)
 	struct dc_state *context = dc->current_state;
 
 	if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
-		display_count = dcn35_get_active_display_cnt_wa(dc, context);
+		display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL);
 		/* if we can go lower, go lower */
 		if (display_count == 0)
 			clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index af0a0f2925950..786b62ab8d0fc 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -279,7 +279,7 @@ void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, u
 		clk_mgr,
 		VBIOSSMC_MSG_SetDisplayIdleOptimizations,
 		idle_info);
-	smu_print("VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info  = %d\n", idle_info);
+	smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info  = %x\n", __func__, idle_info);
 }
 
 void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
@@ -298,7 +298,7 @@ void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool e
 			clk_mgr,
 			VBIOSSMC_MSG_SetDisplayIdleOptimizations,
 			idle_info.data);
-	smu_print("dcn35_smu_enable_phy_refclk_pwrdwn  = %d\n", enable ? 1 : 0);
+	smu_print("%s smu_enable_phy_refclk_pwrdwn  = %d\n", __func__, enable ? 1 : 0);
 }
 
 void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
@@ -310,6 +310,7 @@ void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
 			clk_mgr,
 			VBIOSSMC_MSG_UpdatePmeRestore,
 			0);
+	smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__);
 }
 
 void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
@@ -350,7 +351,7 @@ void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
 
 void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
 {
-	unsigned int msg_id, param;
+	unsigned int msg_id, param, retv;
 
 	if (!clk_mgr->smu_present)
 		return;
@@ -360,27 +361,32 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	case DCN_ZSTATE_SUPPORT_ALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 9) | (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_DISALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = 0;
+		smu_print("%s: SMC_MSG_AllowZstatesEntr msg_id = DISALLOW, param = %d\n",  __func__, param);
 		break;
 
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10);
+		smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = %d\n", __func__, param);
 		break;
 
 	default: //DCN_ZSTATE_SUPPORT_UNKNOWN
@@ -390,11 +396,11 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	}
 
 
-	dcn35_smu_send_msg_with_param(
+	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
 		msg_id,
 		param);
-	smu_print("dcn35_smu_set_zstate_support msg_id = %d, param = %d\n", msg_id, param);
+	smu_print("%s:  msg_id = %d, param = 0x%x, return = %d\n", __func__, msg_id, param, retv);
 }
 
 int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
@@ -408,7 +414,7 @@ int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
 						 VBIOSSMC_MSG_GetDprefclkFreq,
 						 0);
 
-	smu_print("dcn35_smu_get_DPREF clk  = %d mhz\n", dprefclk);
+	smu_print("%s:  SMU DPREF clk  = %d mhz\n",  __func__, dprefclk);
 	return dprefclk * 1000;
 }
 
@@ -423,7 +429,7 @@ int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
 					       VBIOSSMC_MSG_GetDtbclkFreq,
 					       0);
 
-	smu_print("dcn35_smu_get_dtbclk  = %d mhz\n", dtbclk);
+	smu_print("%s: get_dtbclk  = %dmhz\n", __func__, dtbclk);
 	return dtbclk * 1000;
 }
 /* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
@@ -436,7 +442,7 @@ void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
 			clk_mgr,
 			VBIOSSMC_MSG_SetDtbClk,
 			enable);
-	smu_print("dcn35_smu_set_dtbclk  = %d \n", enable ? 1 : 0);
+	smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0);
 }
 
 void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
@@ -445,30 +451,45 @@ void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *cl
 			clk_mgr,
 			VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown,
 			enable);
+	smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
 }
 
 int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr)
 {
-	return dcn35_smu_send_msg_with_param(
+	int retv;
+
+	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
 		VBIOSSMC_MSG_DispPsrExit,
 		0);
+	smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv);
+	return retv;
 }
 
 int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr)
 {
-	return dcn35_smu_send_msg_with_param(
+	int retv;
+
+	retv = dcn35_smu_send_msg_with_param(
 			clk_mgr,
 			VBIOSSMC_MSG_QueryIPS2Support,
 			0);
+
+	smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
+	return retv;
 }
 
 void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param)
 {
 	REG_WRITE(MP1_SMN_C2PMSG_71, param);
+	smu_print("%s: write_ips_scratch = %x\n", __func__, param);
 }
 
 uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr)
 {
-	return REG_READ(MP1_SMN_C2PMSG_71);
+	uint32_t retv;
+
+	retv = REG_READ(MP1_SMN_C2PMSG_71);
+	smu_print("%s: dcn35_smu_read_ips_scratch = %x\n",  __func__, retv);
+	return retv;
 }
-- 
GitLab


From 94bbf802efd0a8f13147d6664af6e653637340a8 Mon Sep 17 00:00:00 2001
From: Ilya Bakoulin <ilya.bakoulin@amd.com>
Date: Fri, 1 Dec 2023 06:25:06 -0700
Subject: [PATCH 0852/2340] drm/amd/display: Fix MST PBN/X.Y value calculations

Changing PBN calculation to be more in line with spec. We don't need to
inflate PBN_NATIVE value by the 1.006 margin, since that is already
taken care of in the get_pbn_per_slot function.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Wenjing Liu <wenjing.liu@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Ilya Bakoulin <ilya.bakoulin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index aa0a086aa7fc3..b16b2e14312e9 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -1057,18 +1057,21 @@ static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps)
 	uint32_t denominator = 1;
 
 	/*
-	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+	 * The 1.006 factor (margin 5300ppm + 300ppm ~ 0.6% as per spec) is not
+	 * required when determining PBN/time slot utilization on the link between
+	 * us and the branch, since that overhead is already accounted for in
+	 * the get_pbn_per_slot function.
+	 *
 	 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
 	 * common multiplier to render an integer PBN for all link rate/lane
 	 * counts combinations
 	 * calculate
-	 * peak_kbps *= (1006/1000)
 	 * peak_kbps *= (64/54)
-	 * peak_kbps *= 8    convert to bytes
+	 * peak_kbps /= (8 * 1000) convert to bytes
 	 */
 
-	numerator = 64 * PEAK_FACTOR_X1000;
-	denominator = 54 * 8 * 1000 * 1000;
+	numerator = 64;
+	denominator = 54 * 8 * 1000;
 	kbps *= numerator;
 	peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
 
-- 
GitLab


From 823423b8ec7b56e22dad83e171c9ca6418679169 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Fri, 1 Dec 2023 06:25:07 -0700
Subject: [PATCH 0853/2340] drm/amd/display: Use channel_width = 2 for vram
 table 3.0

VBIOS has suggested to use channel_width=2 for any ASIC that uses vram
info 3.0. This is because channel_width in the vram table no longer
represents the memory width

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 2fb804eb5702e..01abc2f3081a8 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -2386,7 +2386,13 @@ static enum bp_result get_vram_info_v30(
 		return BP_RESULT_BADBIOSTABLE;
 
 	info->num_chans = info_v30->channel_num;
-	info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8;
+	/* As suggested by VBIOS we should always use
+	 * dram_channel_width_bytes = 2 when using VRAM
+	 * table version 3.0. This is because the channel_width
+	 * param in the VRAM info table is changed in 7000 series and
+	 * no longer represents the memory channel width.
+	 */
+	info->dram_channel_width_bytes = 2;
 
 	return result;
 }
-- 
GitLab


From 2cbed167d2698f10a67f47f14aaac7d498f6dfb7 Mon Sep 17 00:00:00 2001
From: Johnson Chen <johnson.chen@amd.com>
Date: Fri, 1 Dec 2023 06:25:08 -0700
Subject: [PATCH 0854/2340] drm/amd/display: Fix null pointer

Add guard for NULL pointer access

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Johnson Chen <johnson.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 6ba393e5b8ee7..38fa7441df51f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -158,6 +158,8 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 	/* If this assert is hit then we have a link encoder dynamic management issue */
 	ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
 
+	if (pipe_ctx->stream == NULL)
+		return false;
 	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
 	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
 			return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
-- 
GitLab


From d24e50e1005fd584e0fea138aa153349e13b4d94 Mon Sep 17 00:00:00 2001
From: George Shen <george.shen@amd.com>
Date: Fri, 1 Dec 2023 06:25:09 -0700
Subject: [PATCH 0855/2340] drm/amd/display: Skip DPIA-specific DP LL
 automation flag for non-DPIA links

[Why]
The is_automated flag logic only applies to USB4 DPIA links during DP LL
compliance test automation. The flag should not be set for non-DPIA
cases.

[How]
Add check for DPIA link endpoint type before setting the flag. Also,
rename is_automated to skip_fallback_on_link_loss for clarity.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Reviewed-by: Wenjing Liu <wenjing.liu@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: George Shen <george.shen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h                       | 8 +++++++-
 .../amd/display/dc/link/protocols/link_dp_irq_handler.c   | 6 ++++--
 .../amd/display/dc/link/protocols/link_dp_training_dpia.c | 4 ++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index e8c1599ab18a7..6ee4db8c05484 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1541,7 +1541,13 @@ struct dc_link {
 	bool is_dig_mapping_flexible;
 	bool hpd_status; /* HPD status of link without physical HPD pin. */
 	bool is_hpd_pending; /* Indicates a new received hpd */
-	bool is_automated; /* Indicates automated testing */
+
+	/* USB4 DPIA links skip verifying link cap, instead performing the fallback method
+	 * for every link training. This is incompatible with DP LL compliance automation,
+	 * which expects the same link settings to be used every retry on a link loss.
+	 * This flag is used to skip the fallback when link loss occurs during automation.
+	 */
+	bool skip_fallback_on_link_loss;
 
 	bool edp_sink_present;
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index 0c00e94e90b1d..ae271c830c9aa 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -265,7 +265,7 @@ void dp_handle_link_loss(struct dc_link *link)
 
 	for (i = count - 1; i >= 0; i--) {
 		// Always use max settings here for DP 1.4a LL Compliance CTS
-		if (link->is_automated) {
+		if (link->skip_fallback_on_link_loss) {
 			pipes[i]->link_config.dp_link_settings.lane_count =
 					link->verified_link_cap.lane_count;
 			pipes[i]->link_config.dp_link_settings.link_rate =
@@ -404,7 +404,9 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
 
 	if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
 		// Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC
-		link->is_automated = true;
+		if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+			link->skip_fallback_on_link_loss = true;
+
 		device_service_clear.bits.AUTOMATED_TEST = 1;
 		core_link_write_dpcd(
 			link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
index 4f4e899e5c46d..e8dda44b23cb2 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -811,7 +811,7 @@ static enum link_training_result dpia_training_eq_transparent(
 			/* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
 			 * has to share encoders unlike DP and USBC
 			 */
-			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) {
+			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->skip_fallback_on_link_loss && retries_eq)) {
 				result =  LINK_TRAINING_SUCCESS;
 				break;
 			}
@@ -1037,7 +1037,7 @@ enum link_training_result dpia_perform_link_training(
 	 */
 	if (result == LINK_TRAINING_SUCCESS) {
 		fsleep(5000);
-		if (!link->is_automated)
+		if (!link->skip_fallback_on_link_loss)
 			result = dp_check_link_loss_status(link, &lt_settings);
 	} else if (result == LINK_TRAINING_ABORT)
 		dpia_training_abort(link, &lt_settings, repeater_id);
-- 
GitLab


From d218291579de53fad8242ad1ae732604de25b635 Mon Sep 17 00:00:00 2001
From: Chris Park <chris.park@amd.com>
Date: Fri, 1 Dec 2023 06:25:10 -0700
Subject: [PATCH 0856/2340] drm/amd/display: Update BIOS FW info table revision

[Why]
BIOS FW info version 3.5 is introduced to support new ASICs, but it's
content is currently same as 3.4.

[How]
Include minor version 5 in parsing to enable support.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Dillon Varone <dillon.varone@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Chris Park <chris.park@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 01abc2f3081a8..971473c69b328 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1747,6 +1747,7 @@ static enum bp_result bios_parser_get_firmware_info(
 				result = get_firmware_info_v3_2(bp, info);
 				break;
 			case 4:
+			case 5:
 				result = get_firmware_info_v3_4(bp, info);
 				break;
 			default:
-- 
GitLab


From c59397eff9439bbc8b9a9835142e99ea0abf9cde Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Fri, 1 Dec 2023 06:25:11 -0700
Subject: [PATCH 0857/2340] drm/amd/display: revert removing otg toggle w/a
 back when no active display

w/a use case:
- dual display, compliance, toggling between the displays
- switching between 120Hz 420 -> 144Hz 444 and vice versa
- switching between 144Hz -> 60Hz TMDS or vice versa

It'd typically involve TMDS in some capacity since that's the only link
signal we leave the OTG running but DIO/PHY off you can hit this in
cases where you have multiple displays as well it syncs with the first
active OTG, so if you had OTG[0] mapped and FIFO off you'd hit it even
if OTG[1] was mapped and had FIFO

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c   | 10 ++++------
 .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c   |  6 +++---
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 9e0fa01ecb795..439414ef464a0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -312,13 +312,11 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
+		dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+
 		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
-		if (all_active_disps != 0) {
-			dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
-			dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-			dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
-		} else
-			dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
+		dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
+		dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
 		update_dispclk = true;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index 786b62ab8d0fc..d6db9d7fced2e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -475,14 +475,14 @@ int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr)
 			VBIOSSMC_MSG_QueryIPS2Support,
 			0);
 
-	smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
+	//smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
 	return retv;
 }
 
 void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param)
 {
 	REG_WRITE(MP1_SMN_C2PMSG_71, param);
-	smu_print("%s: write_ips_scratch = %x\n", __func__, param);
+	//smu_print("%s: write_ips_scratch = %x\n", __func__, param);
 }
 
 uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr)
@@ -490,6 +490,6 @@ uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr)
 	uint32_t retv;
 
 	retv = REG_READ(MP1_SMN_C2PMSG_71);
-	smu_print("%s: dcn35_smu_read_ips_scratch = %x\n",  __func__, retv);
+	//smu_print("%s: dcn35_smu_read_ips_scratch = %x\n",  __func__, retv);
 	return retv;
 }
-- 
GitLab


From e6ae4c47e8f8941fde115434fd8884e4e972cf6b Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Fri, 1 Dec 2023 06:25:12 -0700
Subject: [PATCH 0858/2340] drm/amd/display: Pass debug watermarks through to
 DCN35 DML2

[Why]
The DC debug options currently do not function for dynamically adjusting
our watermarks.

[How]
Hook them up before passing them to DML2.
Also make sure we're using dc->bb_overrides since dc->debug isn't
populated during dc_construct.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Michael Strauss <michael.strauss@amd.com>
Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 39cf1ae3a3e16..e9d88f52717b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -326,6 +326,25 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 		dcn3_5_soc.dram_clock_change_latency_us =
 			dc->debug.dram_clock_change_latency_ns / 1000.0;
 	}
+
+	if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+		dcn3_5_soc.dram_clock_change_latency_us =
+			dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_exit_time_ns > 0)
+		dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+		dcn3_5_soc.sr_enter_plus_exit_time_us =
+			dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+		dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+		dcn3_5_soc.sr_enter_plus_exit_z8_time_us =
+			dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
 	/*temp till dml2 fully work without dml1*/
 	dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
 				DML_PROJECT_DCN31);
-- 
GitLab


From 43484c4bdb6eb2f74cec61e4e7cfcb6ce8e69e2f Mon Sep 17 00:00:00 2001
From: Relja Vojvodic <relja.vojvodic@amd.com>
Date: Fri, 1 Dec 2023 06:25:13 -0700
Subject: [PATCH 0859/2340] drm/amd/display: Added delay to DPM log

HW registers were being read to quickly, causing incorrect values to be
logged after a clock frequency was changed

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Relja Vojvodic <relja.vojvodic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index a496930b1f9c0..95c0b49b531a0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -460,18 +460,24 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
 
 static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr)
 {
-    unsigned int dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
-    unsigned int dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
-    unsigned int dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
-    unsigned int dcfclk_khz_reg     = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
-    unsigned int dtbclk_khz_reg     = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
-    unsigned int fclk_khz_reg       = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
+	unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg,
+				 fclk_khz_reg;
+	int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+	msleep(5);
+
+    dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
+    dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
+    dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
+    dcfclk_khz_reg     = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
+    dtbclk_khz_reg     = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
+    fclk_khz_reg       = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
 
     // Overrides for these clocks in case there is no p_state change support
-    int dramclk_khz_override = new_clocks->dramclk_khz;
-    int fclk_khz_override = new_clocks->fclk_khz;
+    dramclk_khz_override = new_clocks->dramclk_khz;
+    fclk_khz_override = new_clocks->fclk_khz;
 
-    int num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+    num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
 
     if (!new_clocks->p_state_change_support) {
 	    dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
@@ -707,7 +713,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 		dmcu->funcs->set_psr_wait_loop(dmcu,
 				clk_mgr_base->clks.dispclk_khz / 1000 / 7);
 
-	if (dc->config.enable_auto_dpm_test_logs) {
+	if (dc->config.enable_auto_dpm_test_logs && safe_to_lower) {
 	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr);
 	}
 }
-- 
GitLab


From d3586c707b8f64cbe5b778cfe59ac4b8a4be0d3b Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Fri, 1 Dec 2023 06:25:14 -0700
Subject: [PATCH 0860/2340] drm/amd/display: keep domain24 power on if eDP not
 exist

This is w/a: we need to keep domain 24 power up in driver side, and let
dmubfw handle it for S0i3. For last display unplugged, if OTG in PG, no
interrupt call back coming.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Chris Park <chris.park@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 39260371beb9d..09498aa92096d 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -979,6 +979,8 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	bool hpo_frl_stream_enc_acquired = false;
 	bool hpo_dp_stream_enc_acquired = false;
 	int i = 0, j = 0;
+	int edp_num = 0;
+	struct dc_link *edp_links[MAX_NUM_EDP] = { NULL };
 
 	memset(update_state, 0, sizeof(struct pg_block_update));
 
@@ -1019,10 +1021,24 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 
 		if (pipe_ctx->stream_res.opp)
 			update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false;
+	}
+	/*domain24 controls all the otg, mpc, opp, as long as one otg is still up, avoid enabling OTG PG*/
+	for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
+		struct timing_generator *tg = dc->res_pool->timing_generators[i];
+		if (tg && tg->funcs->is_tg_enabled(tg)) {
+			update_state->pg_pipe_res_update[PG_OPTC][i] = false;
+			break;
+		}
+	}
 
-		if (pipe_ctx->stream_res.tg)
-			update_state->pg_pipe_res_update[PG_OPTC][pipe_ctx->stream_res.tg->inst] = false;
+	dc_get_edp_links(dc, edp_links, &edp_num);
+	if (edp_num == 0 ||
+		((!edp_links[0] || !edp_links[0]->edp_sink_present) &&
+			(!edp_links[1] || !edp_links[1]->edp_sink_present))) {
+		/*eDP not exist on this config, keep Domain24 power on, for S0i3, this will be handled in dmubfw*/
+		update_state->pg_pipe_res_update[PG_OPTC][0] = false;
 	}
+
 }
 
 void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
@@ -1156,8 +1172,10 @@ void dcn35_block_power_control(struct dc *dc,
 			pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on);
 	}
 
+	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
 	if (pg_cntl->funcs->plane_otg_pg_control)
 		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on);
+
 }
 
 void dcn35_root_clock_control(struct dc *dc,
-- 
GitLab


From 2ce156482a6fef349d2eba98e5070c412d3af662 Mon Sep 17 00:00:00 2001
From: Nicholas Susanto <nicholas.susanto@amd.com>
Date: Fri, 1 Dec 2023 06:25:15 -0700
Subject: [PATCH 0861/2340] drm/amd/display: Fix disable_otg_wa logic

[Why]
When switching to another HDMI mode, we are unnecesarilly
disabling/enabling FIFO causing both HPO and DIG registers to be set at
the same time when only HPO is supposed to be set.

This can lead to a system hang the next time we change refresh rates as
there are cases when we don't disable OTG/FIFO but FIFO is enabled when
it isn't supposed to be.

[How]
Removing the enable/disable FIFO entirely.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Nicholas Susanto <nicholas.susanto@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 439414ef464a0..8d4c0b209872b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -127,21 +127,13 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *
 			continue;
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
 				     !pipe->stream->link_enc)) {
-			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
 			if (disable) {
-				if (stream_enc && stream_enc->funcs->disable_fifo)
-					pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
-
 				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
 					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 
 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
 				pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
-				if (stream_enc && stream_enc->funcs->enable_fifo)
-					pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
 			}
 		}
 	}
-- 
GitLab


From 21afc872fbc29cd68cfde816d1df4d55848c3f61 Mon Sep 17 00:00:00 2001
From: Ivan Lipski <ivlipski@amd.com>
Date: Fri, 1 Dec 2023 06:25:16 -0700
Subject: [PATCH 0862/2340] drm/amd/display: Add monitor patch for specific eDP

[WHY]
Some eDP panels's ext caps don't write initial value cause the value of
dpcd_addr(0x317) is random.  It means that sometimes the eDP will
clarify it is OLED, miniLED...etc cause the backlight control interface
is incorrect.

[HOW]
Add a new panel patch to remove sink ext caps(HDR,OLED...etc)

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Sun peng Li <sunpeng.li@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Ivan Lipski <ivlipski@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index b4696ec621c45..eaf8d9f482446 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -64,6 +64,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
 		DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id);
 		edid_caps->panel_patch.disable_fams = true;
 		break;
+	/* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+	case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+	case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+		DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+		edid_caps->panel_patch.remove_sink_ext_caps = true;
+		break;
 	default:
 		return;
 	}
-- 
GitLab


From cfa96a14e89d8341a7308acc4c6168991d4fdac0 Mon Sep 17 00:00:00 2001
From: Yihan Zhu <yihan.zhu@amd.com>
Date: Fri, 1 Dec 2023 06:25:17 -0700
Subject: [PATCH 0863/2340] drm/amd/display: add MPC MCM 1D LUT clock gating
 programming

Missing clock gating programming blocks memory power on from boot up.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Chris Park <chris.park@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Yihan Zhu <yihan.zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c               | 3 ++-
 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 994b21ed272f1..e789e654c3870 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -71,12 +71,13 @@ void mpc32_power_on_blnd_lut(
 {
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
 
+	REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_DIS, power_on);
+
 	if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) {
 		if (power_on) {
 			REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
 			REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
 		} else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
-			ASSERT(false);
 			/* TODO: change to mpc
 			 *  dpp_base->ctx->dc->optimized_required = true;
 			 *  dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 1a0ed1c7e2d45..13324422ff50e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 			.i2c = true,
 			.dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
 			.dscl = true,
-			.cm = false,
+			.cm = true,
 			.mpc = true,
 			.optc = true,
 			.vpg = true,
-- 
GitLab


From abd26a3252cbd1a3ae4e46d37596d176fe50b41a Mon Sep 17 00:00:00 2001
From: Dillon Varone <dillon.varone@amd.com>
Date: Fri, 1 Dec 2023 06:25:18 -0700
Subject: [PATCH 0864/2340] drm/amd/display: Add dml2 copy functions

Add function to handle deep copying dml2 context.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c      | 15 +++-------
 .../drm/amd/display/dc/dml2/dml2_wrapper.c    | 29 ++++++++++++++++++-
 .../drm/amd/display/dc/dml2/dml2_wrapper.h    |  4 +++
 3 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index eab713c0da0d7..102d00a9a24f1 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2258,23 +2258,16 @@ struct dc_state *dc_copy_state(struct dc_state *src_ctx)
 {
 	int i, j;
 	struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
-#ifdef CONFIG_DRM_AMD_DC_FP
-	struct dml2_context *dml2 =  NULL;
-#endif
 
 	if (!new_ctx)
 		return NULL;
 	memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
 
 #ifdef CONFIG_DRM_AMD_DC_FP
-	if (new_ctx->bw_ctx.dml2) {
-		dml2 = kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
-		if (!dml2)
-			return NULL;
-
-		memcpy(dml2, src_ctx->bw_ctx.dml2, sizeof(struct dml2_context));
-		new_ctx->bw_ctx.dml2 = dml2;
-	}
+	if (new_ctx->bw_ctx.dml2 && !dml2_create_copy(&new_ctx->bw_ctx.dml2, src_ctx->bw_ctx.dml2)) {
+		dc_release_state(new_ctx);
+		return NULL;
+ 	}
 #endif
 
 	for (i = 0; i < MAX_PIPES; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 8f231418870f2..9d354fde6908a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -691,10 +691,15 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, bool fast_v
 	return out;
 }
 
+static inline struct dml2_context *dml2_allocate_memory(void)
+{
+	return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
+}
+
 bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
 {
 	// Allocate Mode Lib Ctx
-	*dml2 = (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
+	*dml2 = dml2_allocate_memory();
 
 	if (!(*dml2))
 		return false;
@@ -745,3 +750,25 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
 	*fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0];
 	*dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0];
 }
+
+void dml2_copy(struct dml2_context *dst_dml2,
+	struct dml2_context *src_dml2)
+{
+	/* copy Mode Lib Ctx */
+	memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context));
+}
+
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+	struct dml2_context *src_dml2)
+{
+	/* Allocate Mode Lib Ctx */
+	*dst_dml2 = dml2_allocate_memory();
+
+	if (!(*dst_dml2))
+		return false;
+
+	/* copy Mode Lib Ctx */
+	dml2_copy(*dst_dml2, src_dml2);
+
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index fe15baa4bf094..0de6886969c69 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -191,6 +191,10 @@ bool dml2_create(const struct dc *in_dc,
 				 struct dml2_context **dml2);
 
 void dml2_destroy(struct dml2_context *dml2);
+void dml2_copy(struct dml2_context *dst_dml2,
+	struct dml2_context *src_dml2);
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+	struct dml2_context *src_dml2);
 
 /*
  * dml2_validate - Determines if a display configuration is supported or not.
-- 
GitLab


From dd5c6362ddcd8bdb07704faff8648593885ecfa1 Mon Sep 17 00:00:00 2001
From: Dennis Chan <dennis.chan@amd.com>
Date: Fri, 1 Dec 2023 06:25:19 -0700
Subject: [PATCH 0865/2340] drm/amd/display: Fix Replay Desync Error IRQ
 handler

In previous case, Replay didn't identify the IRQ type, This commit fixes
the issues for the interrupt.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Robin Chen <robin.chen@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Dennis Chan <dennis.chan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/link/protocols/link_dp_irq_handler.c  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index ae271c830c9aa..ba69874be5a49 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -190,9 +190,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 	/*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/
 	union psr_error_status replay_error_status;
 
-	if (link->replay_settings.config.force_disable_desync_error_check)
-		return;
-
 	if (!link->replay_settings.replay_feature_enabled)
 		return;
 
@@ -210,9 +207,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 		&replay_error_status.raw,
 		sizeof(replay_error_status.raw));
 
-	if (replay_configuration.bits.DESYNC_ERROR_STATUS)
-		link->replay_settings.config.received_desync_error_hpd = 1;
-
 	link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR =
 		replay_error_status.bits.LINK_CRC_ERROR;
 	link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR =
@@ -225,6 +219,12 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 		link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR) {
 		bool allow_active;
 
+		if (link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR)
+			link->replay_settings.config.received_desync_error_hpd = 1;
+
+		if (link->replay_settings.config.force_disable_desync_error_check)
+			return;
+
 		/* Acknowledge and clear configuration bits */
 		dm_helpers_dp_write_dpcd(
 			link->ctx,
-- 
GitLab


From 08a32addf17317b9fac55be9b31275cbf6e41fb7 Mon Sep 17 00:00:00 2001
From: Wenjing Liu <wenjing.liu@amd.com>
Date: Fri, 1 Dec 2023 06:25:20 -0700
Subject: [PATCH 0866/2340] drm/amd/display: add support for DTO genarated
 dscclk

Current implementation will choose to use refclk as dscclk. This is not
recommended by hardware team as refclk is a fixed value which could
cause unnecessary power consumption or it could be not enough for large
DSC timings. So we are adding new interfaces so we could switch to use
dynamically generated DSCCLK by DTO. So DSCCLK is programmable based on
current pixel clock and dispclk.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Wenjing Liu <wenjing.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c   | 25 +++++++++++++++++
 drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h  |  4 +++
 .../gpu/drm/amd/display/dc/link/link_dpms.c   | 27 ++++++++++++++++++-
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 5f7f474ef51c9..5c323718ec906 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -989,9 +989,22 @@ static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
 static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
+	struct dccg *dccg = dc->res_pool->dccg;
+	/* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+	 * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+	 * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+	 * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+	 * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+	 * this problem. We are implementing a workaround here to keep using dscclk
+	 * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+	 * 48Mhz) pixel clock to avoid hitting this problem.
+	 */
+	bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+			stream->timing.pix_clk_100hz > 480000;
 
 	ASSERT(dsc);
 	for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -1014,12 +1027,16 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 
 		dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
 		dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+		if (should_use_dto_dscclk)
+			dccg->funcs->set_dto_dscclk(dccg, dsc->inst);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
 
 			ASSERT(odm_dsc);
 			odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
 			odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+			if (should_use_dto_dscclk)
+				dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst);
 		}
 		dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
 		dsc_cfg.pic_width *= opp_cnt;
@@ -1039,9 +1056,13 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 				OPTC_DSC_DISABLED, 0, 0);
 
 		/* disable DSC block */
+		if (dccg->funcs->set_ref_dscclk)
+			dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst);
 		dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			ASSERT(odm_pipe->stream_res.dsc);
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
 			odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
 		}
 	}
@@ -1124,6 +1145,10 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
 		if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe &&
 				current_pipe_ctx->next_odm_pipe->stream_res.dsc) {
 			struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc;
+			struct dccg *dccg = dc->res_pool->dccg;
+
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
 			/* disconnect DSC block from stream */
 			dsc->funcs->dsc_disconnect(dsc);
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index ce2f0c0e82bd6..6b44557fcb1a8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -201,6 +201,10 @@ struct dccg_funcs {
 			struct dccg *dccg,
 			enum streamclk_source src,
 			uint32_t otg_inst);
+	void (*set_dto_dscclk)(
+			struct dccg *dccg,
+			uint32_t dsc_inst);
+	void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
 };
 
 #endif //__DAL_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index b16b2e14312e9..5fe8b4871c776 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -776,10 +776,26 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
  */
 void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
+	/* TODO: Move this to HWSS as this is hardware programming sequence not a
+	 * link layer sequence
+	 */
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
+	struct dccg *dccg = dc->res_pool->dccg;
+	/* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+	 * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+	 * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+	 * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+	 * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+	 * this problem. We are implementing a workaround here to keep using dscclk
+	 * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+	 * 48Mhz) pixel clock to avoid hitting this problem.
+	 */
+	bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+			stream->timing.pix_clk_100hz > 480000;
 	DC_LOGGER_INIT(dsc->ctx->logger);
 
 	for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -802,11 +818,15 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 
 		dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
 		dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+		if (should_use_dto_dscclk)
+			dccg->funcs->set_dto_dscclk(dccg, dsc->inst);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
 
 			odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
 			odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+			if (should_use_dto_dscclk)
+				dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst);
 		}
 		dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
 		dsc_cfg.pic_width *= opp_cnt;
@@ -856,9 +876,14 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 		}
 
 		/* disable DSC block */
+		if (dccg->funcs->set_ref_dscclk)
+			dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst);
 		pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
-		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
+		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
 			odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
+		}
 	}
 }
 
-- 
GitLab


From b6411638c026fde33046f5515a5a7d37af1da146 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:21 -0700
Subject: [PATCH 0867/2340] drm/amd/display: Avoid virtual stream encoder if
 not explicitly requested

Virtual stream encoder should not be a free match for thunderbolt or
usbc, and thus should be avoided.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index af1b31f4e69a2..d08d109692514 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -1250,7 +1250,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
 			/* Store first available for MST second display
 			 * in daisy chain use case
 			 */
-			j = i;
+
+			if (pool->stream_enc[i]->id != ENGINE_ID_VIRTUAL)
+				j = i;
+
 			if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id ==
 					link->link_enc->preferred_engine)
 				return pool->stream_enc[i];
-- 
GitLab


From 80af8859b46d1fa386871f71bad95db9ff50ad62 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:22 -0700
Subject: [PATCH 0868/2340] drm/amd/display: Skip entire amdgpu_dm build if
 !CONFIG_DRM_AMD_DC

[WHY]
Previously this only excluded build for a few amdgpu_dm
binaries which makes no sense.

[HOW]
Wrap the entire Makefile in "ifneq ($(CONFIG_DRM_AMD_DC),)"

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/Makefile | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 8bf94920d23ea..063205ecb1374 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,22 +25,24 @@
 
 
+ifneq ($(CONFIG_DRM_AMD_DC),)
 AMDGPUDM = \
 	amdgpu_dm.o \
 	amdgpu_dm_plane.o \
 	amdgpu_dm_crtc.o \
 	amdgpu_dm_irq.o \
 	amdgpu_dm_mst_types.o \
-	amdgpu_dm_color.o
+	amdgpu_dm_color.o \
+	amdgpu_dm_services.o \
+	amdgpu_dm_helpers.o \
+	amdgpu_dm_pp_smu.o \
+	amdgpu_dm_psr.o \
+	amdgpu_dm_replay.o
 
 ifdef CONFIG_DRM_AMD_DC_FP
 AMDGPUDM += dc_fpu.o
 endif
 
-ifneq ($(CONFIG_DRM_AMD_DC),)
-AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o amdgpu_dm_replay.o
-endif
-
 AMDGPUDM += amdgpu_dm_hdcp.o
 
 ifneq ($(CONFIG_DEBUG_FS),)
@@ -52,3 +54,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc
 AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM))
 
 AMD_DISPLAY_FILES += $(AMDGPU_DM)
+endif
-- 
GitLab


From 198891fd2902fba155fe23f8ad27c9cf8cd8286d Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:23 -0700
Subject: [PATCH 0869/2340] drm/amd/display: Create one virtual connector in DC

[WHAT]
Prepare a virtual connector for writeback.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c     | 11 +++++++++--
 .../gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c   |  3 ++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index bc7b8a3253538..9b791e3f7e331 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1675,6 +1675,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
 	init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
 
+	/* Enable DWB for tested platforms only */
+	if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0))
+		init_data.num_virtual_links = 1;
+
 	INIT_LIST_HEAD(&adev->dm.da_list);
 
 	retrieve_dmi_info(&adev->dm);
@@ -4465,6 +4469,11 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			continue;
 		}
 
+		link = dc_get_link_at_index(dm->dc, i);
+
+		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL)
+			continue;
+
 		aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
 		if (!aconnector)
 			goto fail;
@@ -4483,8 +4492,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			goto fail;
 		}
 
-		link = dc_get_link_at_index(dm->dc, i);
-
 		if (!dc_link_detect_connection_type(link, &new_connection_type))
 			DRM_ERROR("KMS: Failed to detect connector\n");
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 97798cee876e2..5d62805f3bdf6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -96,7 +96,8 @@ static void enable_memory_low_power(struct dc *dc)
 	if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
 		// Power down VPGs
 		for (i = 0; i < dc->res_pool->stream_enc_count; i++)
-			dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
+			if (dc->res_pool->stream_enc[i]->vpg)
+				dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
 #if defined(CONFIG_DRM_AMD_DC_FP)
 		for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
 			dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
-- 
GitLab


From dfc03588cf8ce2af8ef810cd226dc98ee4fbac38 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:24 -0700
Subject: [PATCH 0870/2340] drm/amd/display: Initialize writeback connector

[WHAT]
Create a drm_writeback_connector when connector signal equals
SIGNAL_TYPE_VIRTUAL.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/Makefile    |   3 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  20 +-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c  | 209 ++++++++++++++++++
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h  |  35 +++
 4 files changed, 265 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 063205ecb1374..ab2a97e354da1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -37,7 +37,8 @@ AMDGPUDM = \
 	amdgpu_dm_helpers.o \
 	amdgpu_dm_pp_smu.o \
 	amdgpu_dm_psr.o \
-	amdgpu_dm_replay.o
+	amdgpu_dm_replay.o \
+	amdgpu_dm_wb.o
 
 ifdef CONFIG_DRM_AMD_DC_FP
 AMDGPUDM += dc_fpu.o
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9b791e3f7e331..51a8736585358 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -54,6 +54,7 @@
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_hdcp.h"
 #include <drm/display/drm_hdcp_helper.h>
+#include "amdgpu_dm_wb.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_atombios.h"
 
@@ -4471,8 +4472,25 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
 		link = dc_get_link_at_index(dm->dc, i);
 
-		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL)
+		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) {
+			struct drm_writeback_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
+
+			if (!wbcon) {
+				DRM_ERROR("KMS: Failed to allocate writeback connector\n");
+				continue;
+			}
+
+			if (amdgpu_dm_wb_connector_init(dm, wbcon)) {
+				DRM_ERROR("KMS: Failed to initialize writeback connector\n");
+				kfree(wbcon);
+				continue;
+			}
+
+			link->psr_settings.psr_feature_enabled = false;
+			link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+
 			continue;
+		}
 
 		aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
 		if (!aconnector)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
new file mode 100644
index 0000000000000..74e656696d8ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services_types.h"
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_dm_wb.h"
+#include "amdgpu_display.h"
+
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+
+static const u32 amdgpu_dm_wb_formats[] = {
+	DRM_FORMAT_XRGB2101010,
+};
+
+static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
+					struct drm_crtc_state *crtc_state,
+					struct drm_connector_state *conn_state)
+{
+	struct drm_framebuffer *fb;
+	const struct drm_display_mode *mode = &crtc_state->mode;
+	bool found = false;
+	uint8_t i;
+
+	if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+		return 0;
+
+	fb = conn_state->writeback_job->fb;
+	if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) {
+		DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
+			      fb->width, fb->height);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) {
+		if (fb->format->format == amdgpu_dm_wb_formats[i])
+			found = true;
+	}
+
+	if (!found) {
+		DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
+			      &fb->format->format);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+
+	return drm_add_modes_noedid(connector, dev->mode_config.max_width,
+				    dev->mode_config.max_height);
+}
+
+static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
+			       struct drm_writeback_job *job)
+{
+	struct amdgpu_framebuffer *afb;
+	struct drm_gem_object *obj;
+	struct amdgpu_device *adev;
+	struct amdgpu_bo *rbo;
+	uint32_t domain;
+	int r;
+
+	if (!job->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+
+	afb = to_amdgpu_framebuffer(job->fb);
+	obj = job->fb->obj[0];
+	rbo = gem_to_amdgpu_bo(obj);
+	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+	r = amdgpu_bo_reserve(rbo, true);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+		return r;
+	}
+
+	r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+	if (r) {
+		dev_err(adev->dev, "reserving fence slot failed (%d)\n", r);
+		goto error_unlock;
+	}
+
+	domain = amdgpu_display_supported_domains(adev, rbo->flags);
+
+	r = amdgpu_bo_pin(rbo, domain);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+		goto error_unlock;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("%p bind failed\n", rbo);
+		goto error_unpin;
+	}
+
+	amdgpu_bo_unreserve(rbo);
+
+	afb->address = amdgpu_bo_gpu_offset(rbo);
+
+	amdgpu_bo_ref(rbo);
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(rbo);
+
+error_unlock:
+	amdgpu_bo_unreserve(rbo);
+	return r;
+}
+
+static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector,
+				struct drm_writeback_job *job)
+{
+	struct amdgpu_bo *rbo;
+	int r;
+
+	if (!job->fb)
+		return;
+
+	rbo = gem_to_amdgpu_bo(job->fb->obj[0]);
+	r = amdgpu_bo_reserve(rbo, false);
+	if (unlikely(r)) {
+		DRM_ERROR("failed to reserve rbo before unpin\n");
+		return;
+	}
+
+	amdgpu_bo_unpin(rbo);
+	amdgpu_bo_unreserve(rbo);
+	amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = {
+	.atomic_check = amdgpu_dm_wb_encoder_atomic_check,
+};
+
+static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.reset = amdgpu_dm_connector_funcs_reset,
+	.atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = {
+	.get_modes = amdgpu_dm_wb_connector_get_modes,
+	.prepare_writeback_job = amdgpu_dm_wb_prepare_job,
+	.cleanup_writeback_job = amdgpu_dm_wb_cleanup_job,
+};
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+				struct drm_writeback_connector *wbcon)
+{
+	int res = 0;
+
+	drm_connector_helper_add(&wbcon->base, &amdgpu_dm_wb_conn_helper_funcs);
+
+	res = drm_writeback_connector_init(&dm->adev->ddev, wbcon,
+					    &amdgpu_dm_wb_connector_funcs,
+					    &amdgpu_dm_wb_encoder_helper_funcs,
+					    amdgpu_dm_wb_formats,
+					    ARRAY_SIZE(amdgpu_dm_wb_formats),
+					    amdgpu_dm_get_encoder_crtc_mask(dm->adev));
+
+	if (res)
+		return res;
+	/*
+	 * Some of the properties below require access to state, like bpc.
+	 * Allocate some default initial connector state with our reset helper.
+	 */
+	if (wbcon->base.funcs->reset)
+		wbcon->base.funcs->reset(&wbcon->base);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
new file mode 100644
index 0000000000000..0bc9df7e7ee19
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_WB_H__
+#define __AMDGPU_DM_WB_H__
+
+#include <drm/drm_writeback.h>
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+				struct drm_writeback_connector *wbcon);
+
+#endif
-- 
GitLab


From 7db7ade270ae8e177cc8bd09753745e7c2dc92e7 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:25 -0700
Subject: [PATCH 0871/2340] drm/amd/display: Skip writeback connector when we
 get amdgpu_dm_connector

[WHY]
Writeback connectors are based on a different object:
drm_writeback_connector, and are therefore different from
amdgpu_dm_connector. We need to be careful to ensure code
designed for amdgpu_dm_connector doesn't inadvertently try
to operate on a drm_writeback_connector.

[HOW]
Skip them when connector type is DRM_MODE_CONNECTOR_WRITEBACK.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 62 +++++++++++++++++--
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c |  3 +
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 22 +++++--
 3 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 51a8736585358..1d9a10384f932 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -728,6 +728,10 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (link && aconnector->dc_link == link) {
 			if (notify->type == DMUB_NOTIFICATION_HPD)
@@ -951,6 +955,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port,
 
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->audio_inst != port)
 			continue;
@@ -2257,6 +2265,10 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type == dc_connection_mst_branch &&
 		    aconnector->mst_mgr.aux) {
@@ -2385,6 +2397,10 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type != dc_connection_mst_branch ||
 		    aconnector->mst_root)
@@ -2906,6 +2922,10 @@ static int dm_resume(void *handle)
 	/* Do detection*/
 	drm_connector_list_iter_begin(ddev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!aconnector->dc_link)
@@ -3479,6 +3499,9 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
 	list_for_each_entry(connector,
 			&dev->mode_config.connector_list, head)	{
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		dc_link = aconnector->dc_link;
 
@@ -5516,10 +5539,13 @@ static void fill_stream_properties_from_drm_display_mode(
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
 	const struct drm_display_info *info = &connector->display_info;
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *aconnector = NULL;
 	struct hdmi_vendor_infoframe hv_frame;
 	struct hdmi_avi_infoframe avi_frame;
 
+	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+		aconnector = to_amdgpu_dm_connector(connector);
+
 	memset(&hv_frame, 0, sizeof(hv_frame));
 	memset(&avi_frame, 0, sizeof(avi_frame));
 
@@ -6964,6 +6990,9 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 
 	for_each_new_connector_in_state(state, connector, new_con_state, i) {
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!aconnector->mst_output_port)
@@ -8523,6 +8552,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 notify:
 		aconnector = to_amdgpu_dm_connector(connector);
 
@@ -8556,6 +8588,9 @@ notify:
 		if (!status)
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		mutex_lock(&adev->dm.audio_lock);
@@ -8779,7 +8814,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
 		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
-		struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+		struct amdgpu_dm_connector *aconnector;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!adev->dm.hdcp_workqueue)
 			continue;
@@ -9173,10 +9213,15 @@ out:
 void dm_restore_drm_connector_state(struct drm_device *dev,
 				    struct drm_connector *connector)
 {
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *aconnector;
 	struct amdgpu_crtc *disconnected_acrtc;
 	struct dm_crtc_state *acrtc_state;
 
+	if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+		return;
+
+	aconnector = to_amdgpu_dm_connector(connector);
+
 	if (!aconnector->dc_sink || !connector->state || !connector->encoder)
 		return;
 
@@ -9253,12 +9298,16 @@ static void get_freesync_config_for_crtc(
 	struct dm_connector_state *new_con_state)
 {
 	struct mod_freesync_config config = {0};
-	struct amdgpu_dm_connector *aconnector =
-			to_amdgpu_dm_connector(new_con_state->base.connector);
+	struct amdgpu_dm_connector *aconnector;
 	struct drm_display_mode *mode = &new_crtc_state->base.mode;
 	int vrefresh = drm_mode_vrefresh(mode);
 	bool fs_vid_mode = false;
 
+	if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+		return;
+
+	aconnector = to_amdgpu_dm_connector(new_con_state->base.connector);
+
 	new_crtc_state->vrr_supported = new_con_state->freesync_capable &&
 					vrefresh >= aconnector->min_vfreq &&
 					vrefresh <= aconnector->max_vfreq;
@@ -10088,6 +10137,9 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
 		if (conn_state->crtc != crtc)
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (!aconnector->mst_output_port || !aconnector->mst_root)
 			aconnector = NULL;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 52ecfa746b54d..f936a35fa9ebb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -326,6 +326,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
 			if (!connector->state || connector->state->crtc != crtc)
 				continue;
 
+			if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+				continue;
+
 			aconn = to_amdgpu_dm_connector(connector);
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 51467f132c260..58b880acb087a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -894,10 +894,15 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
-		struct amdgpu_dm_connector *amdgpu_dm_connector =
-				to_amdgpu_dm_connector(connector);
+		struct amdgpu_dm_connector *amdgpu_dm_connector;
+		const struct dc_link *dc_link;
 
-		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+
+		dc_link = amdgpu_dm_connector->dc_link;
 
 		if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
 			dc_interrupt_set(adev->dm.dc,
@@ -930,9 +935,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
-		struct amdgpu_dm_connector *amdgpu_dm_connector =
-				to_amdgpu_dm_connector(connector);
-		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+		struct amdgpu_dm_connector *amdgpu_dm_connector;
+		const struct dc_link *dc_link;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+		dc_link = amdgpu_dm_connector->dc_link;
 
 		if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
 			dc_interrupt_set(adev->dm.dc,
-- 
GitLab


From 748b091d641638e68330b1b24195eaba9aadf997 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:26 -0700
Subject: [PATCH 0872/2340] drm/amd/display: Return drm_connector from
 find_first_crtc_matching_connector

[WHY]
We will be dealing with two types of connector: amdgpu_dm_connector
and drm_writeback_connector.

[HOW]
We want to find both and then cast to the appriopriate type afterwards.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c         | 8 +++++---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h         | 2 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c   | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1d9a10384f932..5c89a043ac106 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2664,7 +2664,7 @@ static int dm_suspend(void *handle)
 	return 0;
 }
 
-struct amdgpu_dm_connector *
+struct drm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 					     struct drm_crtc *crtc)
 {
@@ -2677,7 +2677,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 		crtc_from_state = new_con_state->crtc;
 
 		if (crtc_from_state == crtc)
-			return to_amdgpu_dm_connector(connector);
+			return connector;
 	}
 
 	return NULL;
@@ -9407,6 +9407,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	 * update changed items
 	 */
 	struct amdgpu_crtc *acrtc = NULL;
+	struct drm_connector *connector = NULL;
 	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL;
 	struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL;
@@ -9416,7 +9417,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 	dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 	acrtc = to_amdgpu_crtc(crtc);
-	aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+	connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+	aconnector = to_amdgpu_dm_connector(connector);
 
 	/* TODO This hack should go away */
 	if (aconnector && enable) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 3d480be802cb5..3710f4d0f2cb2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -834,7 +834,7 @@ struct dc_stream_state *
 int dm_atomic_get_state(struct drm_atomic_state *state,
 			struct dm_atomic_state **dm_state);
 
-struct amdgpu_dm_connector *
+struct drm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 					     struct drm_crtc *crtc);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index b599efda3b19f..19ae5587dd5ae 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1500,14 +1500,16 @@ int pre_validate_dsc(struct drm_atomic_state *state,
 		int ind = find_crtc_index_in_state_by_stream(state, stream);
 
 		if (ind >= 0) {
+			struct drm_connector *connector;
 			struct amdgpu_dm_connector *aconnector;
 			struct drm_connector_state *drm_new_conn_state;
 			struct dm_connector_state *dm_new_conn_state;
 			struct dm_crtc_state *dm_old_crtc_state;
 
-			aconnector =
+			connector =
 				amdgpu_dm_find_first_crtc_matching_connector(state,
 									     state->crtcs[ind].ptr);
+			aconnector = to_amdgpu_dm_connector(connector);
 			drm_new_conn_state =
 				drm_atomic_get_new_connector_state(state,
 								   &aconnector->base);
-- 
GitLab


From 3e094a2875260543ca74838decc0c995d3765096 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:27 -0700
Subject: [PATCH 0873/2340] drm/amd/display: Use drm_connector in
 create_stream_for_sink

[WHAT]
We need to use this function for both amdgpu_dm_connectors
and drm_writeback_connectors. Modify it to operate on
a drm_connector as a common base.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 65 +++++++++++--------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5c89a043ac106..46477192265ee 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5558,6 +5558,7 @@ static void fill_stream_properties_from_drm_display_mode(
 			&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if (drm_mode_is_420_also(info, mode_in)
+			&& aconnector
 			&& aconnector->force_yuv420_output)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444)
@@ -5593,7 +5594,7 @@ static void fill_stream_properties_from_drm_display_mode(
 		timing_out->hdmi_vic = hv_frame.vic;
 	}
 
-	if (is_freesync_video_mode(mode_in, aconnector)) {
+	if (aconnector && is_freesync_video_mode(mode_in, aconnector)) {
 		timing_out->h_addressable = mode_in->hdisplay;
 		timing_out->h_total = mode_in->htotal;
 		timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start;
@@ -6070,14 +6071,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
 }
 
 static struct dc_stream_state *
-create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+create_stream_for_sink(struct drm_connector *connector,
 		       const struct drm_display_mode *drm_mode,
 		       const struct dm_connector_state *dm_state,
 		       const struct dc_stream_state *old_stream,
 		       int requested_bpc)
 {
+	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_display_mode *preferred_mode = NULL;
-	struct drm_connector *drm_connector;
 	const struct drm_connector_state *con_state = &dm_state->base;
 	struct dc_stream_state *stream = NULL;
 	struct drm_display_mode mode;
@@ -6096,20 +6097,22 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	drm_mode_init(&mode, drm_mode);
 	memset(&saved_mode, 0, sizeof(saved_mode));
 
-	if (aconnector == NULL) {
-		DRM_ERROR("aconnector is NULL!\n");
+	if (connector == NULL) {
+		DRM_ERROR("connector is NULL!\n");
 		return stream;
 	}
 
-	drm_connector = &aconnector->base;
-
-	if (!aconnector->dc_sink) {
-		sink = create_fake_sink(aconnector);
-		if (!sink)
-			return stream;
-	} else {
-		sink = aconnector->dc_sink;
-		dc_sink_retain(sink);
+	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
+		aconnector = NULL;
+		aconnector = to_amdgpu_dm_connector(connector);
+		if (!aconnector->dc_sink) {
+			sink = create_fake_sink(aconnector);
+			if (!sink)
+				return stream;
+		} else {
+			sink = aconnector->dc_sink;
+			dc_sink_retain(sink);
+		}
 	}
 
 	stream = dc_create_stream_for_sink(sink);
@@ -6119,12 +6122,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		goto finish;
 	}
 
+	/* We leave this NULL for writeback connectors */
 	stream->dm_stream_context = aconnector;
 
 	stream->timing.flags.LTE_340MCSC_SCRAMBLE =
-		drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
+		connector->display_info.hdmi.scdc.scrambling.low_rates;
 
-	list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
+	list_for_each_entry(preferred_mode, &connector->modes, head) {
 		/* Search for preferred mode */
 		if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) {
 			native_mode_found = true;
@@ -6133,7 +6137,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	}
 	if (!native_mode_found)
 		preferred_mode = list_first_entry_or_null(
-				&aconnector->base.modes,
+				&connector->modes,
 				struct drm_display_mode,
 				head);
 
@@ -6147,7 +6151,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		 * and the modelist may not be filled in time.
 		 */
 		DRM_DEBUG_DRIVER("No preferred mode found\n");
-	} else {
+	} else if (aconnector) {
 		recalculate_timing = is_freesync_video_mode(&mode, aconnector);
 		if (recalculate_timing) {
 			freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
@@ -6170,13 +6174,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	 */
 	if (!scale || mode_refresh != preferred_refresh)
 		fill_stream_properties_from_drm_display_mode(
-			stream, &mode, &aconnector->base, con_state, NULL,
+			stream, &mode, connector, con_state, NULL,
 			requested_bpc);
 	else
 		fill_stream_properties_from_drm_display_mode(
-			stream, &mode, &aconnector->base, con_state, old_stream,
+			stream, &mode, connector, con_state, old_stream,
 			requested_bpc);
 
+	/* The rest isn't needed for writeback connectors */
+	if (!aconnector)
+		goto finish;
+
 	if (aconnector->timing_changed) {
 		drm_dbg(aconnector->base.dev,
 			"overriding timing for automated test, bpc %d, changing to %d\n",
@@ -6194,7 +6202,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	fill_audio_info(
 		&stream->audio_info,
-		drm_connector,
+		connector,
 		sink);
 
 	update_stream_signal(stream, sink);
@@ -6662,7 +6670,7 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	enum dc_status dc_result = DC_OK;
 
 	do {
-		stream = create_stream_for_sink(aconnector, drm_mode,
+		stream = create_stream_for_sink(connector, drm_mode,
 						dm_state, old_stream,
 						requested_bpc);
 		if (stream == NULL) {
@@ -9418,15 +9426,16 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 	acrtc = to_amdgpu_crtc(crtc);
 	connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
-	aconnector = to_amdgpu_dm_connector(connector);
+	if (connector && connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+		aconnector = to_amdgpu_dm_connector(connector);
 
 	/* TODO This hack should go away */
-	if (aconnector && enable) {
+	if (connector && enable) {
 		/* Make sure fake sink is created in plug-in scenario */
 		drm_new_conn_state = drm_atomic_get_new_connector_state(state,
-							    &aconnector->base);
+									connector);
 		drm_old_conn_state = drm_atomic_get_old_connector_state(state,
-							    &aconnector->base);
+									connector);
 
 		if (IS_ERR(drm_new_conn_state)) {
 			ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
@@ -9573,7 +9582,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 		 * added MST connectors not found in existing crtc_state in the chained mode
 		 * TODO: need to dig out the root cause of that
 		 */
-		if (!aconnector)
+		if (!connector)
 			goto skip_modeset;
 
 		if (modereset_required(new_crtc_state))
@@ -9616,7 +9625,7 @@ skip_modeset:
 	 * We want to do dc stream updates that do not require a
 	 * full modeset below.
 	 */
-	if (!(enable && aconnector && new_crtc_state->active))
+	if (!(enable && connector && new_crtc_state->active))
 		return 0;
 	/*
 	 * Given above conditions, the dc state cannot be NULL because:
-- 
GitLab


From dbf5d3d02987faa0eec3710dd687cd912362d7b5 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:28 -0700
Subject: [PATCH 0874/2340] drm/amd/display: Check writeback connectors in
 create_validate_stream_for_sink

[WHY & HOW]
This is to check connector type to avoid
unhandled null pointer for writeback connectors.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Fixes: 60e034f28600 ("drm/amd/display: Revert "drm/amd/display: Use drm_connector in create_validate_stream_for_sink"")
Signed-off-by: Alex Hung <alex.hung@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 46477192265ee..315a747f97502 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6678,6 +6678,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 			break;
 		}
 
+		if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			return stream;
+
 		dc_result = dc_validate_stream(adev->dm.dc, stream);
 		if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 			dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
@@ -9426,7 +9429,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 	acrtc = to_amdgpu_crtc(crtc);
 	connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
-	if (connector && connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+	if (connector)
 		aconnector = to_amdgpu_dm_connector(connector);
 
 	/* TODO This hack should go away */
-- 
GitLab


From ff73d4cdde18bc4607ff10c53351715ee1164be0 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:29 -0700
Subject: [PATCH 0875/2340] drm/amd/display: Create amdgpu_dm_wb_connector

[WHY]
We need to track the dc_link and it would get confusing if
re-using the amdgpu_dm_connector.

[HOW]
Creating new amdgpu_dm_wb_connector.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c    |  5 +++--
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h    |  8 ++++++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c | 16 +++++++++++-----
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h |  3 ++-
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 315a747f97502..0f41328510c21 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4496,14 +4496,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		link = dc_get_link_at_index(dm->dc, i);
 
 		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) {
-			struct drm_writeback_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
+			struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
 
 			if (!wbcon) {
 				DRM_ERROR("KMS: Failed to allocate writeback connector\n");
 				continue;
 			}
 
-			if (amdgpu_dm_wb_connector_init(dm, wbcon)) {
+			if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) {
 				DRM_ERROR("KMS: Failed to initialize writeback connector\n");
 				kfree(wbcon);
 				continue;
@@ -7609,6 +7609,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 	struct dc_link *link = dc_get_link_at_index(dc, link_index);
 	struct amdgpu_i2c_adapter *i2c;
 
+	/* Not needed for writeback connector */
 	link->priv = aconnector;
 
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 3710f4d0f2cb2..0a9974017ebaa 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -32,6 +32,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_plane.h>
 #include "link_service_types.h"
+#include <drm/drm_writeback.h>
 
 /*
  * This file contains the definition for amdgpu_display_manager
@@ -714,6 +715,13 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status,
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
 
+struct amdgpu_dm_wb_connector {
+	struct drm_writeback_connector base;
+	struct dc_link *link;
+};
+
+#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base)
+
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
 struct dm_plane_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index 74e656696d8ec..b3e634b0f712e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -30,6 +30,7 @@
 #include "amdgpu_dm.h"
 #include "amdgpu_dm_wb.h"
 #include "amdgpu_display.h"
+#include "dc.h"
 
 #include <drm/drm_atomic_state_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
@@ -183,13 +184,18 @@ static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs =
 };
 
 int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
-				struct drm_writeback_connector *wbcon)
+				struct amdgpu_dm_wb_connector *wbcon,
+				uint32_t link_index)
 {
+	struct dc *dc = dm->dc;
+	struct dc_link *link = dc_get_link_at_index(dc, link_index);
 	int res = 0;
 
-	drm_connector_helper_add(&wbcon->base, &amdgpu_dm_wb_conn_helper_funcs);
+	wbcon->link = link;
 
-	res = drm_writeback_connector_init(&dm->adev->ddev, wbcon,
+	drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs);
+
+	res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base,
 					    &amdgpu_dm_wb_connector_funcs,
 					    &amdgpu_dm_wb_encoder_helper_funcs,
 					    amdgpu_dm_wb_formats,
@@ -202,8 +208,8 @@ int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
 	 * Some of the properties below require access to state, like bpc.
 	 * Allocate some default initial connector state with our reset helper.
 	 */
-	if (wbcon->base.funcs->reset)
-		wbcon->base.funcs->reset(&wbcon->base);
+	if (wbcon->base.base.funcs->reset)
+		wbcon->base.base.funcs->reset(&wbcon->base.base);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
index 0bc9df7e7ee19..13d31c857dee4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
@@ -30,6 +30,7 @@
 #include <drm/drm_writeback.h>
 
 int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
-				struct drm_writeback_connector *wbcon);
+				struct amdgpu_dm_wb_connector *dm_wbcon,
+				uint32_t link_index);
 
 #endif
-- 
GitLab


From 1fb9d7b975baba081724be8ff6370b1a71a8aea4 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 1 Dec 2023 06:25:30 -0700
Subject: [PATCH 0876/2340] drm/amd/display: Create fake sink and stream for
 writeback connector

[WHAT]
Writeback connectors don't have a physical sink but DC still
needs a sink to function. Create a fake sink and stream for
writeback connectors

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++++++++++++-------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0f41328510c21..06ccfc9669c7d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5715,13 +5715,13 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode,
 }
 
 static struct dc_sink *
-create_fake_sink(struct amdgpu_dm_connector *aconnector)
+create_fake_sink(struct dc_link *link)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
 	struct dc_sink *sink = NULL;
 
-	sink_init_data.link = aconnector->dc_link;
-	sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
+	sink_init_data.link = link;
+	sink_init_data.sink_signal = link->connector_signal;
 
 	sink = dc_sink_create(&sink_init_data);
 	if (!sink) {
@@ -6092,6 +6092,7 @@ create_stream_for_sink(struct drm_connector *connector,
 	enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
 	struct dsc_dec_dpcd_caps dsc_caps;
 
+	struct dc_link *link = NULL;
 	struct dc_sink *sink = NULL;
 
 	drm_mode_init(&mode, drm_mode);
@@ -6105,14 +6106,24 @@ create_stream_for_sink(struct drm_connector *connector,
 	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
 		aconnector = NULL;
 		aconnector = to_amdgpu_dm_connector(connector);
-		if (!aconnector->dc_sink) {
-			sink = create_fake_sink(aconnector);
-			if (!sink)
-				return stream;
-		} else {
-			sink = aconnector->dc_sink;
-			dc_sink_retain(sink);
-		}
+		link = aconnector->dc_link;
+	} else {
+		struct drm_writeback_connector *wbcon = NULL;
+		struct amdgpu_dm_wb_connector *dm_wbcon = NULL;
+
+		wbcon = drm_connector_to_writeback(connector);
+		dm_wbcon = to_amdgpu_dm_wb_connector(wbcon);
+		link = dm_wbcon->link;
+	}
+
+	if (!aconnector || !aconnector->dc_sink) {
+		sink = create_fake_sink(link);
+		if (!sink)
+			return stream;
+
+	} else {
+		sink = aconnector->dc_sink;
+		dc_sink_retain(sink);
 	}
 
 	stream = dc_create_stream_for_sink(sink);
-- 
GitLab


From c81e13b929df2fd16dce87ac36672978f10eae1c Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:31 -0700
Subject: [PATCH 0877/2340] drm/amd/display: Hande writeback request from
 userspace

[WHAT]
Handle writeback requests and fill in the required information for DWB
programming and setup.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |   3 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 159 ++++++++++++++++++
 2 files changed, 162 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 32fe05c810c6f..a166d76847196 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -416,6 +416,9 @@ struct amdgpu_crtc {
 
 	int otg_inst;
 	struct drm_pending_vblank_event *event;
+
+	bool wb_pending;
+	struct drm_writeback_connector *wb_conn;
 };
 
 struct amdgpu_encoder_atom_dig {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 06ccfc9669c7d..311f815452e07 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -578,6 +578,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 {
 	struct common_irq_params *irq_params = interrupt_params;
 	struct amdgpu_device *adev = irq_params->adev;
+	struct drm_writeback_job *job;
 	struct amdgpu_crtc *acrtc;
 	unsigned long flags;
 	int vrr_active;
@@ -586,6 +587,21 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	if (!acrtc)
 		return;
 
+	if (acrtc->wb_pending) {
+		if (acrtc->wb_conn) {
+			spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags);
+			job = list_first_entry_or_null(&acrtc->wb_conn->job_queue,
+						       struct drm_writeback_job,
+						       list_entry);
+			spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+
+			if (job)
+				drm_writeback_signal_completion(acrtc->wb_conn, 0);
+		} else
+			DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__);
+		acrtc->wb_pending = false;
+	}
+
 	vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
 
 	drm_dbg_vbl(adev_to_drm(adev),
@@ -8639,6 +8655,12 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat
 	stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
 }
 
+static void dm_clear_writeback(struct amdgpu_display_manager *dm,
+			      struct dm_crtc_state *crtc_state)
+{
+	dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0);
+}
+
 static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 					struct dc_state *dc_state)
 {
@@ -8648,9 +8670,34 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
 	struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+	struct drm_connector_state *old_con_state;
+	struct drm_connector *connector;
 	bool mode_set_reset_required = false;
 	u32 i;
 
+	/* Disable writeback */
+	for_each_old_connector_in_state(state, connector, old_con_state, i) {
+		struct dm_connector_state *dm_old_con_state;
+		struct amdgpu_crtc *acrtc;
+
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		old_crtc_state = NULL;
+
+		dm_old_con_state = to_dm_connector_state(old_con_state);
+		if (!dm_old_con_state->base.crtc)
+			continue;
+
+		acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc);
+		if (acrtc)
+			old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+
+		dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+		dm_clear_writeback(dm, dm_old_crtc_state);
+	}
+
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
 				      new_crtc_state, i) {
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
@@ -8787,6 +8834,97 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 	}
 }
 
+static void dm_set_writeback(struct amdgpu_display_manager *dm,
+			      struct dm_crtc_state *crtc_state,
+			      struct drm_connector *connector,
+			      struct drm_connector_state *new_con_state)
+{
+	struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector);
+	struct amdgpu_crtc *acrtc;
+	struct dc_writeback_info *wb_info;
+	struct pipe_ctx *pipe = NULL;
+	struct amdgpu_framebuffer *afb;
+	int i = 0;
+
+	wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL);
+	if (!wb_info) {
+		DRM_ERROR("Failed to allocate wb_info\n");
+		return;
+	}
+
+	acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
+	if (!acrtc) {
+		DRM_ERROR("no amdgpu_crtc found\n");
+		return;
+	}
+
+	afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
+	if (!afb) {
+		DRM_ERROR("No amdgpu_framebuffer found\n");
+		return;
+	}
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) {
+			pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i];
+			break;
+		}
+	}
+
+	/* fill in wb_info */
+	wb_info->wb_enabled = true;
+
+	wb_info->dwb_pipe_inst = 0;
+	wb_info->dwb_params.dwbscl_black_color = 0;
+	wb_info->dwb_params.hdr_mult = 0x1F000;
+	wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS;
+	wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13;
+	wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC;
+	wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC;
+
+	/* width & height from crtc */
+	wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay;
+	wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay;
+	wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay;
+	wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay;
+
+	wb_info->dwb_params.cnv_params.crop_en = false;
+	wb_info->dwb_params.stereo_params.stereo_enabled = false;
+
+	wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff;	// 10 bits
+	wb_info->dwb_params.cnv_params.out_min_pix_val = 0;
+	wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB;
+	wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS;
+
+	wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444;
+
+	wb_info->dwb_params.capture_rate = dwb_capture_rate_0;
+
+	wb_info->dwb_params.scaler_taps.h_taps = 4;
+	wb_info->dwb_params.scaler_taps.v_taps = 4;
+	wb_info->dwb_params.scaler_taps.h_taps_c = 2;
+	wb_info->dwb_params.scaler_taps.v_taps_c = 2;
+	wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING;
+
+	wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0];
+	wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1];
+
+	for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) {
+		wb_info->mcif_buf_params.luma_address[i] = afb->address;
+		wb_info->mcif_buf_params.chroma_address[i] = 0;
+	}
+
+	wb_info->mcif_buf_params.p_vmid = 1;
+	wb_info->mcif_warmup_params.p_vmid = 1;
+	wb_info->writeback_source_plane = pipe->plane_state;
+
+	dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info);
+
+	acrtc->wb_pending = true;
+	acrtc->wb_conn = wb_conn;
+	drm_writeback_queue_job(wb_conn, new_con_state);
+}
+
 /**
  * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
  * @state: The atomic state to commit
@@ -9119,6 +9257,27 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank);
 	}
 
+	/* Enable writeback */
+	for_each_new_connector_in_state(state, connector, new_con_state, i) {
+		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
+		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		if (!new_con_state->writeback_job)
+			continue;
+
+		new_crtc_state = NULL;
+
+		if (acrtc)
+			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+
+		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+		dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state);
+	}
+
 	/* Update audio instances for each connector. */
 	amdgpu_dm_commit_audio(dev, state);
 
-- 
GitLab


From f772f902b28662188636faba88e2a10bdb08e128 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:32 -0700
Subject: [PATCH 0878/2340] drm/amd/display: Add writeback enable/disable in dc

[WHAT]
The enable and disable writeback calls need to be included in the
coressponding functions in dc_stream.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/core/dc_stream.c   | 33 +++++++++++++++++++
 .../amd/display/dc/hwss/dcn30/dcn30_hwseq.c   |  4 +++
 2 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 4bdf105d1d715..e71d622098d7f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -467,6 +467,25 @@ bool dc_stream_add_writeback(struct dc *dc,
 		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
 		dwb->otg_inst = stream_status->primary_otg_inst;
 	}
+
+	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+		dm_error("DC: update_bandwidth failed!\n");
+		return false;
+	}
+
+	/* enable writeback */
+	if (dc->hwss.enable_writeback) {
+		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+
+		if (dwb->funcs->is_enabled(dwb)) {
+			/* writeback pipe already enabled, only need to update */
+			dc->hwss.update_writeback(dc, wb_info, dc->current_state);
+		} else {
+			/* Enable writeback pipe from scratch*/
+			dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
+		}
+	}
+
 	return true;
 }
 
@@ -511,6 +530,20 @@ bool dc_stream_remove_writeback(struct dc *dc,
 	}
 	stream->num_wb_info = j;
 
+	/* recalculate and apply DML parameters */
+	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+		dm_error("DC: update_bandwidth failed!\n");
+		return false;
+	}
+
+	/* disable writeback */
+	if (dc->hwss.disable_writeback) {
+		struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+		if (dwb->funcs->is_enabled(dwb))
+			dc->hwss.disable_writeback(dc, dwb_pipe_inst);
+	}
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index d71faf2ecd413..fd8a8c10a2019 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -367,6 +367,10 @@ void dcn30_enable_writeback(
 	DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\
 		__func__, wb_info->dwb_pipe_inst,\
 		wb_info->mpcc_inst);
+
+	/* Warmup interface */
+	dcn30_mmhubbub_warmup(dc, 1, wb_info);
+
 	/* Update writeback pipe */
 	dcn30_set_writeback(dc, wb_info, context);
 
-- 
GitLab


From c09919e6ea5fefd49d8b7b54aa5b222937163108 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:33 -0700
Subject: [PATCH 0879/2340] drm/amd/display: Fix writeback_info never got
 updated

[WHY]
wb_enabled field is set to false before it is used, and the following
code will never be executed.

[HOW]
Setting wb_enable to false after all removal work is completed.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index e71d622098d7f..8e7bc4a30f271 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -509,18 +509,13 @@ bool dc_stream_remove_writeback(struct dc *dc,
 		return false;
 	}
 
-//	stream->writeback_info[dwb_pipe_inst].wb_enabled = false;
-	for (i = 0; i < stream->num_wb_info; i++) {
-		/*dynamic update*/
-		if (stream->writeback_info[i].wb_enabled &&
-			stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) {
-			stream->writeback_info[i].wb_enabled = false;
-		}
-	}
-
 	/* remove writeback info for disabled writeback pipes from stream */
 	for (i = 0, j = 0; i < stream->num_wb_info; i++) {
 		if (stream->writeback_info[i].wb_enabled) {
+
+			if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst)
+				stream->writeback_info[i].wb_enabled = false;
+
 			if (j < i)
 				/* trim the array */
 				memcpy(&stream->writeback_info[j], &stream->writeback_info[i],
-- 
GitLab


From 86ecd796a88e26e025f184ff6a2e8872a6dc9ac7 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:34 -0700
Subject: [PATCH 0880/2340] drm/amd/display: Validate hw_points_num before
 using it

[WHAT]
hw_points_num is 0 before ogam LUT is programmed; however, function
"dwb3_program_ogam_pwl" assumes hw_points_num is always greater than 0,
i.e. substracting it by 1 as an array index.

[HOW]
Check hw_points_num is not equal to 0 before using it.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
index 701c7d8bc038a..03a50c32fcfe1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
@@ -243,6 +243,9 @@ static bool dwb3_program_ogam_lut(
 		return false;
 	}
 
+	if (params->hw_points_num == 0)
+		return false;
+
 	REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2);
 
 	current_mode = dwb3_get_ogam_current(dwbc30);
-- 
GitLab


From ab37b88ed9de9de8d582683f7ea17059f1251a7f Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:35 -0700
Subject: [PATCH 0881/2340] drm/amd/display: Fix writeback_info is not removed

[WHY]
Counter j was not updated to present the num of writeback_info when
writeback pipes are removed.

[HOW]
update j (num of writeback info) under the correct condition.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 8e7bc4a30f271..3dd9f4ab2261f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -516,11 +516,12 @@ bool dc_stream_remove_writeback(struct dc *dc,
 			if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst)
 				stream->writeback_info[i].wb_enabled = false;
 
-			if (j < i)
-				/* trim the array */
+			/* trim the array */
+			if (j < i) {
 				memcpy(&stream->writeback_info[j], &stream->writeback_info[i],
 						sizeof(struct dc_writeback_info));
-			j++;
+				j++;
+			}
 		}
 	}
 	stream->num_wb_info = j;
-- 
GitLab


From f872e2f5f0beabd34c03799a5c597f6ba47b51cc Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:36 -0700
Subject: [PATCH 0882/2340] drm/amd/display: Add writeback enable field
 (wb_enabled)

[WHAT]
Add a new field to keep track whether a crtc is previously
writeback-enabled.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h          | 1 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index a166d76847196..d8083972e393e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -418,6 +418,7 @@ struct amdgpu_crtc {
 	struct drm_pending_vblank_event *event;
 
 	bool wb_pending;
+	bool wb_enabled;
 	struct drm_writeback_connector *wb_conn;
 };
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 311f815452e07..b45b248808f7b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8693,9 +8693,13 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 		if (acrtc)
 			old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
 
+		if (!acrtc->wb_enabled)
+			continue;
+
 		dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 
 		dm_clear_writeback(dm, dm_old_crtc_state);
+		acrtc->wb_enabled = false;
 	}
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
@@ -9273,9 +9277,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		if (acrtc)
 			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
 
+		if (acrtc->wb_enabled)
+			continue;
+
 		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 
 		dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state);
+		acrtc->wb_enabled = true;
 	}
 
 	/* Update audio instances for each connector. */
-- 
GitLab


From fdf43d25e38f9e6e6a3cdb15335c198fb6d5dcb9 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:37 -0700
Subject: [PATCH 0883/2340] drm/amd/display: Setup for mmhubbub3_warmup_mcif
 with big buffer

[WHY]
Hardware may require different warmup approaches - big buffer or
individual buffers.

[HOW]
Setup warmup for big buffer when it is required by specific hardware.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b45b248808f7b..7e5e4be99b600 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8844,6 +8844,7 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm,
 			      struct drm_connector_state *new_con_state)
 {
 	struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector);
+	struct amdgpu_device *adev = dm->adev;
 	struct amdgpu_crtc *acrtc;
 	struct dc_writeback_info *wb_info;
 	struct pipe_ctx *pipe = NULL;
@@ -8919,6 +8920,11 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm,
 	}
 
 	wb_info->mcif_buf_params.p_vmid = 1;
+	if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0)) {
+		wb_info->mcif_warmup_params.start_address.quad_part = afb->address;
+		wb_info->mcif_warmup_params.region_size =
+			wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height;
+	}
 	wb_info->mcif_warmup_params.p_vmid = 1;
 	wb_info->writeback_source_plane = pipe->plane_state;
 
-- 
GitLab


From 50ad10cba6cd1c7f0ac9049f2c2c6b7589b510d0 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:38 -0700
Subject: [PATCH 0884/2340] drm/amd/display: Add new set_fc_enable to struct
 dwbc_funcs

[WHAT]
Add a function to enable and disable DWB's frame captures.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c  | 23 +++++++++++++++++++
 .../gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h  |  2 ++
 drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h   |  4 ++++
 3 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
index 0d98918bf0fc4..1b9d9495f76de 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
@@ -130,6 +130,28 @@ bool dwb3_disable(struct dwbc *dwbc)
 	return true;
 }
 
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable)
+{
+	struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
+	unsigned int pre_locked;
+
+	REG_GET(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, &pre_locked);
+
+	/* Lock DWB registers */
+	if (pre_locked == 0)
+		REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 1);
+
+	/* Disable FC */
+	REG_UPDATE(FC_MODE_CTRL, FC_FRAME_CAPTURE_EN, enable);
+
+	/* Unlock DWB registers */
+	if (pre_locked == 0)
+		REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 0);
+
+	DC_LOG_DWB("%s dwb3_fc_disabled at inst = %d", __func__, dwbc->inst);
+}
+
+
 bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params)
 {
 	struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
@@ -226,6 +248,7 @@ static const struct dwbc_funcs dcn30_dwbc_funcs = {
 	.disable		= dwb3_disable,
 	.update			= dwb3_update,
 	.is_enabled		= dwb3_is_enabled,
+	.set_fc_enable		= dwb3_set_fc_enable,
 	.set_stereo		= dwb3_set_stereo,
 	.set_new_content	= dwb3_set_new_content,
 	.dwb_program_output_csc	= NULL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
index a5d1b81e768dd..332634b76aacd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
@@ -877,6 +877,8 @@ bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params);
 
 bool dwb3_is_enabled(struct dwbc *dwbc);
 
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable);
+
 void dwb3_set_stereo(struct dwbc *dwbc,
 	struct dwb_stereo_params *stereo_params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
index 86b711dcc7858..729ca0064e94d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
@@ -188,6 +188,10 @@ struct dwbc_funcs {
 	bool (*is_enabled)(
 		struct dwbc *dwbc);
 
+	void (*set_fc_enable)(
+		struct dwbc *dwbc,
+		enum dwb_frame_capture_enable enable);
+
 	void (*set_stereo)(
 		struct dwbc *dwbc,
 		struct dwb_stereo_params *stereo_params);
-- 
GitLab


From 87ce0e62694115cfe4210a17c269d6855d2a139b Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 1 Dec 2023 06:25:39 -0700
Subject: [PATCH 0885/2340] drm/amd/display: Disable DWB frame capture to
 emulate oneshot

[WHY]
drm_writeback requires to capture exact one frame in each writeback
call.

[HOW]
frame_capture is disabled after each writeback is completed.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++++-
 .../gpu/drm/amd/display/dc/core/dc_stream.c   | 27 +++++++++++++++++++
 drivers/gpu/drm/amd/display/dc/dc_stream.h    |  4 +++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7e5e4be99b600..6aabe8a3ffef1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -595,8 +595,20 @@ static void dm_crtc_high_irq(void *interrupt_params)
 						       list_entry);
 			spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
 
-			if (job)
+			if (job) {
+				unsigned int v_total, refresh_hz;
+				struct dc_stream_state *stream = acrtc->dm_irq_params.stream;
+
+				v_total = stream->adjust.v_total_max ?
+					  stream->adjust.v_total_max : stream->timing.v_total;
+				refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz *
+					     100LL, (v_total * stream->timing.h_total));
+				mdelay(1000 / refresh_hz);
+
 				drm_writeback_signal_completion(acrtc->wb_conn, 0);
+				dc_stream_fc_disable_writeback(adev->dm.dc,
+							       acrtc->dm_irq_params.stream, 0);
+			}
 		} else
 			DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__);
 		acrtc->wb_pending = false;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 3dd9f4ab2261f..38cd29b210c05 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -489,6 +489,33 @@ bool dc_stream_add_writeback(struct dc *dc,
 	return true;
 }
 
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+		struct dc_stream_state *stream,
+		uint32_t dwb_pipe_inst)
+{
+	struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+	if (stream == NULL) {
+		dm_error("DC: dc_stream is NULL!\n");
+		return false;
+	}
+
+	if (dwb_pipe_inst >= MAX_DWB_PIPES) {
+		dm_error("DC: writeback pipe is invalid!\n");
+		return false;
+	}
+
+	if (stream->num_wb_info > MAX_DWB_PIPES) {
+		dm_error("DC: num_wb_info is invalid!\n");
+		return false;
+	}
+
+	if (dwb->funcs->set_fc_enable)
+		dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE);
+
+	return true;
+}
+
 bool dc_stream_remove_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst)
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index e61eea6db29cd..4ac48c346a339 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -454,6 +454,10 @@ bool dc_stream_add_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_writeback_info *wb_info);
 
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+		struct dc_stream_state *stream,
+		uint32_t dwb_pipe_inst);
+
 bool dc_stream_remove_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst);
-- 
GitLab


From 514a1cc940c264007805c02173dd5490c0a59f48 Mon Sep 17 00:00:00 2001
From: Roman Li <Roman.Li@amd.com>
Date: Fri, 1 Dec 2023 06:25:40 -0700
Subject: [PATCH 0886/2340] drm/amd/display: Fix array-index-out-of-bounds in
 dml2

[Why]
UBSAN errors observed in dmesg.
array-index-out-of-bounds in dml2/display_mode_core.c

[How]
Fix the index.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Roman Li <Roman.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 59718ee33e513..4d1336e5afc2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -9447,12 +9447,12 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 
 		// Output
 		CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
-		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
+		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0];
 		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
 		CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
-		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
+		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0];
 		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
-		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
+		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0];
 
 		CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 			&mode_lib->scratch,
-- 
GitLab


From d3e78612e949e16088b6ee83647b28499c24954d Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Date: Fri, 1 Dec 2023 06:25:41 -0700
Subject: [PATCH 0887/2340] drm/amd/display: Adjust code style

This simple commit adjusts part of the code style in some of the dc bios
files.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/bios/bios_parser2.c    | 63 +++++++++----------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 971473c69b328..aef964f1bcbe5 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -2215,22 +2215,22 @@ static enum bp_result bios_parser_get_disp_connector_caps_info(
 
 	switch (bp->object_info_tbl.revision.minor) {
 	case 4:
-	    default:
-		    object = get_bios_object(bp, object_id);
-
-		    if (!object)
-			    return BP_RESULT_BADINPUT;
-
-		    record = get_disp_connector_caps_record(bp, object);
-		    if (!record)
-			    return BP_RESULT_NORECORD;
-
-		    info->INTERNAL_DISPLAY =
-			    (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
-		    info->INTERNAL_DISPLAY_BL =
-			    (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
-		    break;
-	    case 5:
+		default:
+			object = get_bios_object(bp, object_id);
+
+			if (!object)
+				return BP_RESULT_BADINPUT;
+
+			record = get_disp_connector_caps_record(bp, object);
+			if (!record)
+				return BP_RESULT_NORECORD;
+
+			info->INTERNAL_DISPLAY =
+				(record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
+			info->INTERNAL_DISPLAY_BL =
+				(record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
+			break;
+	case 5:
 		object_path_v3 = get_bios_object_from_path_v3(bp, object_id);
 
 		if (!object_path_v3)
@@ -3330,27 +3330,28 @@ static enum bp_result get_bracket_layout_record(
 		DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n");
 		return BP_RESULT_BADINPUT;
 	}
+
 	tbl = &bp->object_info_tbl;
 	v1_4 = tbl->v1_4;
 	v1_5 = tbl->v1_5;
 
 	result = BP_RESULT_NORECORD;
 	switch (bp->object_info_tbl.revision.minor) {
-		case 4:
-		default:
-			for (i = 0; i < v1_4->number_of_path; ++i)	{
-				if (bracket_layout_id ==
-					v1_4->display_path[i].display_objid) {
-					result = update_slot_layout_info(dcb, i, slot_layout_info);
-					break;
-				}
+	case 4:
+	default:
+		for (i = 0; i < v1_4->number_of_path; ++i) {
+			if (bracket_layout_id == v1_4->display_path[i].display_objid) {
+				result = update_slot_layout_info(dcb, i, slot_layout_info);
+				break;
 			}
-		    break;
-		case 5:
-			for (i = 0; i < v1_5->number_of_path; ++i)
-				result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
-			break;
+		}
+		break;
+	case 5:
+		for (i = 0; i < v1_5->number_of_path; ++i)
+			result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
+		break;
 	}
+
 	return result;
 }
 
@@ -3359,9 +3360,7 @@ static enum bp_result bios_get_board_layout_info(
 	struct board_layout_info *board_layout_info)
 {
 	unsigned int i;
-
 	struct bios_parser *bp;
-
 	static enum bp_result record_result;
 	unsigned int max_slots;
 
@@ -3371,7 +3370,6 @@ static enum bp_result bios_get_board_layout_info(
 		0, 0
 	};
 
-
 	bp = BP_FROM_DCB(dcb);
 
 	if (board_layout_info == NULL) {
@@ -3552,7 +3550,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
 	.bios_parser_destroy = firmware_parser_destroy,
 
 	.get_board_layout_info = bios_get_board_layout_info,
-	/* TODO: use this fn in hw init?*/
 	.pack_data_tables = bios_parser_pack_data_tables,
 
 	.get_atom_dc_golden_table = bios_get_atom_dc_golden_table,
-- 
GitLab


From 107d678f6aecb4421975a25127b6bf521504b39e Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Date: Fri, 1 Dec 2023 06:25:42 -0700
Subject: [PATCH 0888/2340] drm/amd/display: Update code comment to be more
 accurate

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 102d00a9a24f1..9d3925603979e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1521,7 +1521,7 @@ static void program_timing_sync(
 
 		}
 
-		/* remove any other pipes that are already been synced */
+		/* remove any other unblanked pipes as they have already been synced */
 		if (dc->config.use_pipe_ctx_sync_logic) {
 			/* check pipe's syncd to decide which pipe to be removed */
 			for (j = 1; j < group_size; j++) {
@@ -1534,6 +1534,7 @@ static void program_timing_sync(
 					pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
 			}
 		} else {
+			/* remove any other pipes by checking valid plane */
 			for (j = j + 1; j < group_size; j++) {
 				bool is_blanked;
 
-- 
GitLab


From c0c22ed7c9fd6e6d50f61ed7347e60342e958e6f Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Fri, 1 Dec 2023 06:25:43 -0700
Subject: [PATCH 0889/2340] drm/amd/display: 3.2.263

This version brings along following fixes:

* Enable writeback.
* Add multiple fixes for DML2 and DCN35.
* Introduce small code style adjustments.

Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 6ee4db8c05484..61d08349a0d71 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -49,7 +49,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.262"
+#define DC_VER "3.2.263"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
GitLab


From 24d29d5b189590497947510046eb685e5e2452b6 Mon Sep 17 00:00:00 2001
From: Li Ma <li.ma@amd.com>
Date: Tue, 14 Nov 2023 16:17:51 +0800
Subject: [PATCH 0890/2340] drm/amd/swsmu: update smu v14_0_0 driver if version
 and metrics table

Increment the driver if version and add new mems to the mertics table.

Signed-off-by: Li Ma <li.ma@amd.com>
Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/include/kgd_pp_interface.h    | 17 ++++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 +++
 .../inc/pmfw_if/smu14_driver_if_v14_0_0.h     | 77 +++++++++++--------
 .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c  | 46 ++++++++++-
 4 files changed, 115 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 8ebba87f42891..36a5ad8c00c59 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -1086,6 +1086,10 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_dram_reads;
 	/* time filtered DRAM write bandwidth [MB/sec] */
 	uint16_t			average_dram_writes;
+	/* time filtered IPU read bandwidth [MB/sec] */
+	uint16_t			average_ipu_reads;
+	/* time filtered IPU write bandwidth [MB/sec] */
+	uint16_t			average_ipu_writes;
 
 	/* Driver attached timestamp (in ns) */
 	uint64_t			system_clock_counter;
@@ -1105,6 +1109,8 @@ struct gpu_metrics_v3_0 {
 	uint32_t			average_all_core_power;
 	/* calculated core power [mW] */
 	uint16_t			average_core_power[16];
+	/* time filtered total system power [mW] */
+	uint16_t			average_sys_power;
 	/* maximum IRM defined STAPM power limit [mW] */
 	uint16_t			stapm_power_limit;
 	/* time filtered STAPM power limit [mW] */
@@ -1117,6 +1123,8 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_ipuclk_frequency;
 	uint16_t			average_fclk_frequency;
 	uint16_t			average_vclk_frequency;
+	uint16_t			average_uclk_frequency;
+	uint16_t			average_mpipu_frequency;
 
 	/* Current clocks */
 	/* target core frequency [MHz] */
@@ -1126,6 +1134,15 @@ struct gpu_metrics_v3_0 {
 	/* GFXCLK frequency limit enforced on GFX [MHz] */
 	uint16_t			current_gfx_maxfreq;
 
+	/* Throttle Residency (ASIC dependent) */
+	uint32_t			throttle_residency_prochot;
+	uint32_t			throttle_residency_spl;
+	uint32_t			throttle_residency_fppt;
+	uint32_t			throttle_residency_sppt;
+	uint32_t			throttle_residency_thm_core;
+	uint32_t			throttle_residency_thm_gfx;
+	uint32_t			throttle_residency_thm_soc;
+
 	/* Metrics table alpha filter time constant [us] */
 	uint32_t			time_filter_alphavalue;
 };
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index c125253df20b8..c2265e027ca8b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1418,6 +1418,16 @@ typedef enum {
 	METRICS_PCIE_WIDTH,
 	METRICS_CURR_FANPWM,
 	METRICS_CURR_SOCKETPOWER,
+	METRICS_AVERAGE_VPECLK,
+	METRICS_AVERAGE_IPUCLK,
+	METRICS_AVERAGE_MPIPUCLK,
+	METRICS_THROTTLER_RESIDENCY_PROCHOT,
+	METRICS_THROTTLER_RESIDENCY_SPL,
+	METRICS_THROTTLER_RESIDENCY_FPPT,
+	METRICS_THROTTLER_RESIDENCY_SPPT,
+	METRICS_THROTTLER_RESIDENCY_THM_CORE,
+	METRICS_THROTTLER_RESIDENCY_THM_GFX,
+	METRICS_THROTTLER_RESIDENCY_THM_SOC,
 } MetricsMember_t;
 
 enum smu_cmn2asic_mapping_type {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 22f88842a7fd2..8f42771e1f0a2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if
 // any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 6
+#define PMFW_DRIVER_IF_VERSION 7
 
 typedef struct {
   int32_t value;
@@ -150,37 +150,50 @@ typedef struct {
 } DpmClocks_t;
 
 typedef struct {
-  uint16_t CoreFrequency[16];        //Target core frequency [MHz]
-  uint16_t CorePower[16];            //CAC calculated core power [mW]
-  uint16_t CoreTemperature[16];      //TSEN measured core temperature [centi-C]
-  uint16_t GfxTemperature;           //TSEN measured GFX temperature [centi-C]
-  uint16_t SocTemperature;           //TSEN measured SOC temperature [centi-C]
-  uint16_t StapmOpnLimit;            //Maximum IRM defined STAPM power limit [mW]
-  uint16_t StapmCurrentLimit;        //Time filtered STAPM power limit [mW]
-  uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz]
-  uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz]
-  uint16_t SkinTemp;                 //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
-  uint16_t GfxclkFrequency;          //Time filtered target GFXCLK frequency [MHz]
-  uint16_t FclkFrequency;            //Time filtered target FCLK frequency [MHz]
-  uint16_t GfxActivity;              //Time filtered GFX busy % [0-100]
-  uint16_t SocclkFrequency;          //Time filtered target SOCCLK frequency [MHz]
-  uint16_t VclkFrequency;            //Time filtered target VCLK frequency [MHz]
-  uint16_t VcnActivity;              //Time filtered VCN busy % [0-100]
-  uint16_t VpeclkFrequency;          //Time filtered target VPECLK frequency [MHz]
-  uint16_t IpuclkFrequency;          //Time filtered target IPUCLK frequency [MHz]
-  uint16_t IpuBusy[8];               //Time filtered IPU per-column busy % [0-100]
-  uint16_t DRAMReads;                //Time filtered DRAM read bandwidth [MB/sec]
-  uint16_t DRAMWrites;               //Time filtered DRAM write bandwidth [MB/sec]
-  uint16_t CoreC0Residency[16];      //Time filtered per-core C0 residency % [0-100]
-  uint16_t IpuPower;                 //Time filtered IPU power [mW]
-  uint32_t ApuPower;                 //Time filtered APU power [mW]
-  uint32_t GfxPower;                 //Time filtered GFX power [mW]
-  uint32_t dGpuPower;                //Time filtered dGPU power [mW]
-  uint32_t SocketPower;              //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
-  uint32_t AllCorePower;             //Time filtered sum of core power across all cores in the socket [mW]
-  uint32_t FilterAlphaValue;         //Metrics table alpha filter time constant [us]
-  uint32_t MetricsCounter;           //Counter that is incremented on every metrics table update [PM_TIMER cycles]
-  uint32_t spare[16];
+  uint16_t CoreFrequency[16];          //Target core frequency [MHz]
+  uint16_t CorePower[16];              //CAC calculated core power [mW]
+  uint16_t CoreTemperature[16];        //TSEN measured core temperature [centi-C]
+  uint16_t GfxTemperature;             //TSEN measured GFX temperature [centi-C]
+  uint16_t SocTemperature;             //TSEN measured SOC temperature [centi-C]
+  uint16_t StapmOpnLimit;              //Maximum IRM defined STAPM power limit [mW]
+  uint16_t StapmCurrentLimit;          //Time filtered STAPM power limit [mW]
+  uint16_t InfrastructureCpuMaxFreq;   //CCLK frequency limit enforced on classic cores [MHz]
+  uint16_t InfrastructureGfxMaxFreq;   //GFXCLK frequency limit enforced on GFX [MHz]
+  uint16_t SkinTemp;                   //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
+  uint16_t GfxclkFrequency;            //Time filtered target GFXCLK frequency [MHz]
+  uint16_t FclkFrequency;              //Time filtered target FCLK frequency [MHz]
+  uint16_t GfxActivity;                //Time filtered GFX busy % [0-100]
+  uint16_t SocclkFrequency;            //Time filtered target SOCCLK frequency [MHz]
+  uint16_t VclkFrequency;              //Time filtered target VCLK frequency [MHz]
+  uint16_t VcnActivity;                //Time filtered VCN busy % [0-100]
+  uint16_t VpeclkFrequency;            //Time filtered target VPECLK frequency [MHz]
+  uint16_t IpuclkFrequency;            //Time filtered target IPUCLK frequency [MHz]
+  uint16_t IpuBusy[8];                 //Time filtered IPU per-column busy % [0-100]
+  uint16_t DRAMReads;                  //Time filtered DRAM read bandwidth [MB/sec]
+  uint16_t DRAMWrites;                 //Time filtered DRAM write bandwidth [MB/sec]
+  uint16_t CoreC0Residency[16];        //Time filtered per-core C0 residency % [0-100]
+  uint16_t IpuPower;                   //Time filtered IPU power [mW]
+  uint32_t ApuPower;                   //Time filtered APU power [mW]
+  uint32_t GfxPower;                   //Time filtered GFX power [mW]
+  uint32_t dGpuPower;                  //Time filtered dGPU power [mW]
+  uint32_t SocketPower;                //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
+  uint32_t AllCorePower;               //Time filtered sum of core power across all cores in the socket [mW]
+  uint32_t FilterAlphaValue;           //Metrics table alpha filter time constant [us]
+  uint32_t MetricsCounter;             //Counter that is incremented on every metrics table update [PM_TIMER cycles]
+  uint16_t MemclkFrequency;            //Time filtered target MEMCLK frequency [MHz]
+  uint16_t MpipuclkFrequency;          //Time filtered target MPIPUCLK frequency [MHz]
+  uint16_t IpuReads;                   //Time filtered IPU read bandwidth [MB/sec]
+  uint16_t IpuWrites;                  //Time filtered IPU write bandwidth [MB/sec]
+  uint32_t ThrottleResidency_PROCHOT;  //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_SPL;      //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_FPPT;     //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_SPPT;     //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_GFX;  //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_SOC;  //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles]
+  uint16_t Psys;                       //Time filtered Psys power [mW]
+  uint16_t spare1;
+  uint32_t spare[6];
 } SmuMetrics_t;
 
 //ISP tile definitions
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 03b38c3a99684..94ccdbfd70909 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = 0;
 		break;
 	case METRICS_AVERAGE_UCLK:
-		*value = 0;
+		*value = metrics->MemclkFrequency;
 		break;
 	case METRICS_AVERAGE_FCLK:
 		*value = metrics->FclkFrequency;
 		break;
+	case METRICS_AVERAGE_VPECLK:
+		*value = metrics->VpeclkFrequency;
+		break;
+	case METRICS_AVERAGE_IPUCLK:
+		*value = metrics->IpuclkFrequency;
+		break;
+	case METRICS_AVERAGE_MPIPUCLK:
+		*value = metrics->MpipuclkFrequency;
+		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
 		*value = metrics->GfxActivity / 100;
 		break;
@@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = metrics->SocTemperature / 100 *
 		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
-	case METRICS_THROTTLER_STATUS:
-		*value = 0;
+	case METRICS_THROTTLER_RESIDENCY_PROCHOT:
+		*value = metrics->ThrottleResidency_PROCHOT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_SPL:
+		*value = metrics->ThrottleResidency_SPL;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_FPPT:
+		*value = metrics->ThrottleResidency_FPPT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_SPPT:
+		*value = metrics->ThrottleResidency_SPPT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_CORE:
+		*value = metrics->ThrottleResidency_THM_CORE;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_GFX:
+		*value = metrics->ThrottleResidency_THM_GFX;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_SOC:
+		*value = metrics->ThrottleResidency_THM_SOC;
 		break;
 	case METRICS_VOLTAGE_VDDGFX:
 		*value = 0;
@@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 		sizeof(uint16_t) * 16);
 	gpu_metrics->average_dram_reads = metrics.DRAMReads;
 	gpu_metrics->average_dram_writes = metrics.DRAMWrites;
+	gpu_metrics->average_ipu_reads = metrics.IpuReads;
+	gpu_metrics->average_ipu_writes = metrics.IpuWrites;
 
 	gpu_metrics->average_socket_power = metrics.SocketPower;
 	gpu_metrics->average_ipu_power = metrics.IpuPower;
@@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->average_gfx_power = metrics.GfxPower;
 	gpu_metrics->average_dgpu_power = metrics.dGpuPower;
 	gpu_metrics->average_all_core_power = metrics.AllCorePower;
+	gpu_metrics->average_sys_power = metrics.Psys;
 	memcpy(&gpu_metrics->average_core_power[0],
 		&metrics.CorePower[0],
 		sizeof(uint16_t) * 16);
@@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->average_fclk_frequency = metrics.FclkFrequency;
 	gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
 	gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency;
+	gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency;
+	gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency;
 
 	memcpy(&gpu_metrics->current_coreclk[0],
 		&metrics.CoreFrequency[0],
@@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq;
 	gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq;
 
+	gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT;
+	gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL;
+	gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT;
+	gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT;
+	gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE;
+	gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX;
+	gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC;
+
 	gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue;
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
-- 
GitLab


From 0d65efcbe350f1e9e96f24905df4929188e80d56 Mon Sep 17 00:00:00 2001
From: Yang Wang <kevinyang.wang@amd.com>
Date: Mon, 4 Dec 2023 10:17:57 +0800
Subject: [PATCH 0891/2340] drm/amd/pm: support new mca smu error code decoding

support new mca smu error code decoding from smu 85.86.0 for smu v13.0.6

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h              | 2 ++
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 2b488fcf2f95b..e51e8918e6671 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -46,6 +46,8 @@
 #define MCA_REG__STATUS__ERRORCODEEXT(x)	MCA_REG_FIELD(x, 21, 16)
 #define MCA_REG__STATUS__ERRORCODE(x)		MCA_REG_FIELD(x, 15, 0)
 
+#define MCA_REG__SYND__ERRORINFORMATION(x)	MCA_REG_FIELD(x, 17, 0)
+
 enum amdgpu_mca_ip {
 	AMDGPU_MCA_IP_UNKNOW = -1,
 	AMDGPU_MCA_IP_PSP = 0,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index dda2249c4994f..ddd782fbee7a1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2635,6 +2635,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct
 static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
 				  enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
 {
+	struct smu_context *smu = adev->powerplay.pp_handle;
 	uint32_t errcode, instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
@@ -2642,7 +2643,13 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd
 	if (instlo != 0x03b30400)
 		return false;
 
-	errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) {
+		errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]);
+		errcode &= 0xff;
+	} else {
+		errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	}
+
 	return mca_smu_check_error_code(adev, mca_ras, errcode);
 }
 
-- 
GitLab


From 71a9d7a2a1034e72074335ac2aff4f9b8013ae6c Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Mon, 20 Nov 2023 10:43:02 +0800
Subject: [PATCH 0892/2340] drm/amdgpu: Update fw version for boot time error
 query

Boot time error query is not available until fw a10109

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 3cf4684d0d3f3..5f46877f78cf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -821,7 +821,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp)
 	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
 		return 0;
 
-	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007)
+	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109)
 		return 0;
 
 	for_each_inst(i, inst_mask) {
-- 
GitLab


From 04a71f110446eb6ffdaaa13407b4c1bf286db760 Mon Sep 17 00:00:00 2001
From: Yang Wang <kevinyang.wang@amd.com>
Date: Mon, 4 Dec 2023 10:44:32 +0800
Subject: [PATCH 0893/2340] drm/amdgpu: optimize the printing order of error
 data

sort error data list to optimize the printing order.

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 72634d675e277..bacb59d8b701a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -28,6 +28,7 @@
 #include <linux/reboot.h>
 #include <linux/syscalls.h>
 #include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
 
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
@@ -3674,6 +3675,21 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void)
 	return err_node;
 }
 
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+	struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+	struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+	struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+	struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+	if (unlikely(infoa->socket_id != infob->socket_id))
+		return infoa->socket_id - infob->socket_id;
+	else
+		return infoa->die_id - infob->die_id;
+
+	return 0;
+}
+
 static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
 						      struct amdgpu_smuio_mcm_config_info *mcm_info)
 {
@@ -3691,6 +3707,7 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
 
 	err_data->err_list_count++;
 	list_add_tail(&err_node->node, &err_data->err_node_list);
+	list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
 
 	return &err_node->err_info;
 }
-- 
GitLab


From 7a2464fac80d42f6f8819fed97a553e9c2f43310 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 30 Nov 2023 15:50:16 +0800
Subject: [PATCH 0894/2340] drm/radeon: check the alloc_workqueue return value
 in radeon_crtc_init()

check the alloc_workqueue return value in radeon_crtc_init()
to avoid null-ptr-deref.

Fixes: fa7f517cb26e ("drm/radeon: rework page flip handling v4")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_display.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 901e75ec70ff4..efd18c8d84c83 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -687,11 +687,16 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 	if (radeon_crtc == NULL)
 		return;
 
+	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
+	if (!radeon_crtc->flip_queue) {
+		kfree(radeon_crtc);
+		return;
+	}
+
 	drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs);
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
-	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
-- 
GitLab


From 72ada8603e36291ad91e4f40f10ef742ef79bc4e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 30 Nov 2023 17:34:07 -0500
Subject: [PATCH 0895/2340] drm/amd/display: Increase frame warning limit with
 KASAN or KCSAN in dml

Does the same thing as:
commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2")

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/
Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Alvin Lee <alvin.lee2@amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
Cc: Samson Tam <samson.tam@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ea7d60f9a9b45..6042a5a6a44f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -61,8 +61,12 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+frame_warn_flag := -Wframe-larger-than=3072
+else
 frame_warn_flag := -Wframe-larger-than=2048
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
-- 
GitLab


From 13ac7c0e30e87e006cfad67ce4337268f65d4333 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Sat, 7 Oct 2023 16:53:24 +0530
Subject: [PATCH 0896/2340] drm/amdgpu: Read aquavanjaram WAFL register state

Add support to read state of WAFL links in aquavanjaram SOC.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 76 ++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index a00b8c6f0a942..eec15f1c143fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -848,6 +848,79 @@ static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
 	return xgmi_reg_state->common_header.structure_size;
 }
 
+#define smnreg_0x11C00070	0x11C00070
+#define smnreg_0x11C00210	0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+	{ smnreg_0x11C00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_wafl_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int wafl_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+	szbuf = sizeof(*wafl_reg_state) +
+		amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+				    NUM_WAFL_SMN_REGS);
+
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &wafl_reg_state->wafl_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < wafl_inst; ++j) {
+			wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+			wafl_regs->inst_header.instance = inst++;
+
+			wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+			wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+			reg_data = wafl_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+				start_addr = wafl_reg_addrs[r].start_addr;
+				incrx = wafl_reg_addrs[r].incrx;
+				num_regs = wafl_reg_addrs[r].num_regs;
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						WAFL_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	wafl_reg_state->common_header.structure_size = szbuf;
+	wafl_reg_state->common_header.format_revision = 1;
+	wafl_reg_state->common_header.content_revision = 0;
+	wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+	wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+	return wafl_reg_state->common_header.structure_size;
+}
+
 ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
 				    enum amdgpu_reg_state reg_state, void *buf,
 				    size_t max_size)
@@ -861,6 +934,9 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
 	case AMDGPU_REG_STATE_TYPE_XGMI:
 		size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
 		break;
+	case AMDGPU_REG_STATE_TYPE_WAFL:
+		size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
GitLab


From 650f0487d6cd95c4e07a41d3a464d0f60a983a15 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Sat, 7 Oct 2023 19:52:11 +0530
Subject: [PATCH 0897/2340] drm/amdgpu: Read aquavanjaram USR register state

Add support to read state of USR links in aquavanjaram SOC.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 127 +++++++++++++++++++++
 1 file changed, 127 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index eec15f1c143fa..d6f808acfb17b 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -921,6 +921,125 @@ static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
 	return wafl_reg_state->common_header.structure_size;
 }
 
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS	20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+	{ smnreg_0x1B311060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B411060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B511060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B611060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1C307120, 2, DW_ADDR_INCR },
+	{ smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS	46
+struct aqua_reg_list usr1_reg_addrs[] = {
+	{ smnreg_0x1C320830, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420830, 4, USR_CAKE_INCR },
+	{ smnreg_0x1C320100, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420100, 4, USR_CAKE_INCR },
+	{ smnreg_0x1B310500, 4, USR_LINK_INCR },
+	{ smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+					    void *buf, size_t max_size,
+					    int reg_state)
+{
+	uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+	struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+	struct amdgpu_regs_usr_v1_0 *usr_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_usr_instances = 4;
+	struct aqua_reg_list *reg_addrs;
+	int inst = 0, i, n, r, arr_size;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_USR:
+		arr_size = ARRAY_SIZE(usr_reg_addrs);
+		reg_addrs = usr_reg_addrs;
+		num_smn = NUM_USR_SMN_REGS;
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		arr_size = ARRAY_SIZE(usr1_reg_addrs);
+		reg_addrs = usr1_reg_addrs;
+		num_smn = NUM_USR1_SMN_REGS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+	szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+							     sizeof(*usr_regs),
+							     num_smn);
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &usr_reg_state->usr_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+		usr_regs->inst_header.instance = inst++;
+		usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+		usr_regs->inst_header.num_smn_regs = num_smn;
+		reg_data = usr_regs->smn_reg_values;
+
+		for (r = 0; r < arr_size; r++) {
+			start_addr = reg_addrs[r].start_addr;
+			incrx = reg_addrs[r].incrx;
+			num_regs = reg_addrs[r].num_regs;
+			for (n = 0; n < num_regs; n++) {
+				aqua_read_smn_ext(adev, reg_data,
+						  start_addr + n * incrx, i);
+				reg_data++;
+			}
+		}
+		p = reg_data;
+	}
+
+	usr_reg_state->common_header.structure_size = szbuf;
+	usr_reg_state->common_header.format_revision = 1;
+	usr_reg_state->common_header.content_revision = 0;
+	usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+	usr_reg_state->common_header.num_instances = max_usr_instances;
+
+	return usr_reg_state->common_header.structure_size;
+}
+
 ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
 				    enum amdgpu_reg_state reg_state, void *buf,
 				    size_t max_size)
@@ -937,6 +1056,14 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
 	case AMDGPU_REG_STATE_TYPE_WAFL:
 		size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
 		break;
+	case AMDGPU_REG_STATE_TYPE_USR:
+		size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+						    AMDGPU_REG_STATE_TYPE_USR);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		size = aqua_vanjaram_read_usr_state(
+			adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
GitLab


From 3b35dd87c5969637ab5aa6666bbab6e6929c9e16 Mon Sep 17 00:00:00 2001
From: Aurabindo Pillai <aurabindo.pillai@amd.com>
Date: Thu, 30 Nov 2023 16:53:21 -0500
Subject: [PATCH 0898/2340] drm/amd: Add a DC debug mask for DML2

[Why&How]
To enable testing/development of DML2, expose a new debug mask for future use.

Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/amd_shared.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 7f98394338c26..bf7f258c324a1 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -257,6 +257,7 @@ enum DC_DEBUG_MASK {
 	DC_DISABLE_MPO = 0x40,
 	DC_DISABLE_REPLAY = 0x50,
 	DC_ENABLE_DPIA_TRACE = 0x80,
+	DC_ENABLE_DML2 = 0x100,
 };
 
 enum amd_dpm_forced_level;
-- 
GitLab


From 4657b3e45683223b5d982ec13a6e2cd367004bb6 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Wed, 29 Nov 2023 18:06:55 +0530
Subject: [PATCH 0899/2340] drm/amdgpu: Restrict extended wait to PSP v13.0.6

Only PSPv13.0.6 SOCs take a longer time to reach steady state. Other
PSPv13 based SOCs don't need extended wait. Also, reduce PSPv13.0.6 wait
time.

Cc: stable@vger.kernel.org
Fixes: fc5988907156 ("drm/amdgpu: update retry times for psp vmbx wait")
Fixes: d8c1925ba8cd ("drm/amdgpu: update retry times for psp BL wait")
Link: https://lore.kernel.org/amd-gfx/34dd4c66-f7bf-44aa-af8f-c82889dd652c@amd.com/
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 5f46877f78cf7..df1844d0800f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
 #define GFX_CMD_USB_PD_USE_LFB 0x480
 
 /* Retry times for vmbx ready wait */
-#define PSP_VMBX_POLLING_LIMIT 20000
+#define PSP_VMBX_POLLING_LIMIT 3000
 
 /* VBIOS gfl defines */
 #define MBOX_READY_MASK 0x80000000
@@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
 static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
-	int retry_loop, ret;
+	int retry_loop, retry_cnt, ret;
 
+	retry_cnt =
+		(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ?
+			PSP_VMBX_POLLING_LIMIT :
+			10;
 	/* Wait for bootloader to signify that it is ready having bit 31 of
 	 * C2PMSG_35 set to 1. All other bits are expected to be cleared.
 	 * If there is an error in processing command, bits[7:0] will be set.
 	 * This is applicable for PSP v13.0.6 and newer.
 	 */
-	for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
+	for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
 		ret = psp_wait_for(
 			psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
 			0x80000000, 0xffffffff, false);
-- 
GitLab


From 0737df9ed0997f5b8addd6e2b9699a8c6edba2e4 Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Mon, 4 Dec 2023 16:57:56 +0800
Subject: [PATCH 0900/2340] drm/radeon/dpm: fix a memleak in
 sumo_parse_power_table

The rdev->pm.dpm.ps allocated by kcalloc should be freed in every
following error-handling path. However, in the error-handling of
rdev->pm.power_state[i].clock_info the rdev->pm.dpm.ps is not freed,
resulting in a memleak in this function.

Fixes: 80ea2c129c76 ("drm/radeon/kms: add dpm support for sumo asics (v2)")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/sumo_dpm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index f74f381af05fd..d49c145db4370 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -1493,8 +1493,10 @@ static int sumo_parse_power_table(struct radeon_device *rdev)
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
-		if (!rdev->pm.power_state[i].clock_info)
+		if (!rdev->pm.power_state[i].clock_info) {
+			kfree(rdev->pm.dpm.ps);
 			return -EINVAL;
+		}
 		ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL);
 		if (ps == NULL) {
 			kfree(rdev->pm.dpm.ps);
-- 
GitLab


From 28c28d7f77c06ac2c0b8f9c82bc04eba22912b3b Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Mon, 4 Dec 2023 18:21:54 +0800
Subject: [PATCH 0901/2340] drm/radeon/trinity_dpm: fix a memleak in
 trinity_parse_power_table

The rdev->pm.dpm.ps allocated by kcalloc should be freed in every
following error-handling path. However, in the error-handling of
rdev->pm.power_state[i].clock_info the rdev->pm.dpm.ps is not freed,
resulting in a memleak in this function.

Fixes: d70229f70447 ("drm/radeon/kms: add dpm support for trinity asics")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/trinity_dpm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 08ea1c864cb23..ef1cc7bad20a7 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1726,8 +1726,10 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
-		if (!rdev->pm.power_state[i].clock_info)
+		if (!rdev->pm.power_state[i].clock_info) {
+			kfree(rdev->pm.dpm.ps);
 			return -EINVAL;
+		}
 		ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL);
 		if (ps == NULL) {
 			kfree(rdev->pm.dpm.ps);
-- 
GitLab


From 44f3356e36c2082f0f91c4f6b8859c577cee14a4 Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Fri, 24 Nov 2023 08:32:06 -0500
Subject: [PATCH 0902/2340] drm/amd/amdgpu: Add SMUIO headers for 10.0.2

These were requested by a UMR user for debugging purposes.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../asic_reg/smuio/smuio_10_0_2_offset.h      | 102 ++++++++++
 .../asic_reg/smuio/smuio_10_0_2_sh_mask.h     | 184 ++++++++++++++++++
 2 files changed, 286 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h

diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
new file mode 100644
index 0000000000000..a4dd372c05419
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_10_0_2_OFFSET_HEADER
+
+// addressBlock: smuio_smuio_misc_SmuSmuioDec
+// base address: 0x5a000
+#define mmSMUIO_MCM_CONFIG                                                                             0x0023
+#define mmSMUIO_MCM_CONFIG_BASE_IDX                                                                    0
+#define mmIP_DISCOVERY_VERSION                                                                         0x0000
+#define mmIP_DISCOVERY_VERSION_BASE_IDX                                                                1
+#define mmIO_SMUIO_PINSTRAP                                                                            0x01b1
+#define mmIO_SMUIO_PINSTRAP_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER0                                                                            0x01b2
+#define mmSCRATCH_REGISTER0_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER1                                                                            0x01b3
+#define mmSCRATCH_REGISTER1_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER2                                                                            0x01b4
+#define mmSCRATCH_REGISTER2_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER3                                                                            0x01b5
+#define mmSCRATCH_REGISTER3_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER4                                                                            0x01b6
+#define mmSCRATCH_REGISTER4_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER5                                                                            0x01b7
+#define mmSCRATCH_REGISTER5_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER6                                                                            0x01b8
+#define mmSCRATCH_REGISTER6_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER7                                                                            0x01b9
+#define mmSCRATCH_REGISTER7_BASE_IDX                                                                   1
+
+
+// addressBlock: smuio_smuio_reset_SmuSmuioDec
+// base address: 0x5a300
+#define mmSMUIO_MP_RESET_INTR                                                                          0x00c1
+#define mmSMUIO_MP_RESET_INTR_BASE_IDX                                                                 0
+#define mmSMUIO_SOC_HALT                                                                               0x00c2
+#define mmSMUIO_SOC_HALT_BASE_IDX                                                                      0
+#define mmSMUIO_GFX_MISC_CNTL                                                                          0x00c8
+#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX                                                                 0
+
+
+// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec
+// base address: 0x5a000
+#define mmPWROK_REFCLK_GAP_CYCLES                                                                      0x0001
+#define mmPWROK_REFCLK_GAP_CYCLES_BASE_IDX                                                             1
+#define mmGOLDEN_TSC_INCREMENT_UPPER                                                                   0x0004
+#define mmGOLDEN_TSC_INCREMENT_UPPER_BASE_IDX                                                          1
+#define mmGOLDEN_TSC_INCREMENT_LOWER                                                                   0x0005
+#define mmGOLDEN_TSC_INCREMENT_LOWER_BASE_IDX                                                          1
+#define mmGOLDEN_TSC_COUNT_UPPER                                                                       0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_BASE_IDX                                                              1
+#define mmGOLDEN_TSC_COUNT_LOWER                                                                       0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_BASE_IDX                                                              1
+#define mmGFX_GOLDEN_TSC_SHADOW_UPPER                                                                  0x0029
+#define mmGFX_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX                                                         1
+#define mmGFX_GOLDEN_TSC_SHADOW_LOWER                                                                  0x002a
+#define mmGFX_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX                                                         1
+#define mmSOC_GOLDEN_TSC_SHADOW_UPPER                                                                  0x002b
+#define mmSOC_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX                                                         1
+#define mmSOC_GOLDEN_TSC_SHADOW_LOWER                                                                  0x002c
+#define mmSOC_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX                                                         1
+#define mmSOC_GAP_PWROK                                                                                0x002d
+#define mmSOC_GAP_PWROK_BASE_IDX                                                                       1
+
+// addressBlock: smuio_smuio_swtimer_SmuSmuioDec
+// base address: 0x5ac40
+#define mmPWR_VIRT_RESET_REQ                                                                           0x0110
+#define mmPWR_VIRT_RESET_REQ_BASE_IDX                                                                  1
+#define mmPWR_DISP_TIMER_CONTROL                                                                       0x0111
+#define mmPWR_DISP_TIMER_CONTROL_BASE_IDX                                                              1
+#define mmPWR_DISP_TIMER2_CONTROL                                                                      0x0113
+#define mmPWR_DISP_TIMER2_CONTROL_BASE_IDX                                                             1
+#define mmPWR_DISP_TIMER_GLOBAL_CONTROL                                                                0x0115
+#define mmPWR_DISP_TIMER_GLOBAL_CONTROL_BASE_IDX                                                       1
+#define mmPWR_IH_CONTROL                                                                               0x0116
+#define mmPWR_IH_CONTROL_BASE_IDX                                                                      1
+
+// addressBlock: smuio_smuio_svi0_SmuSmuioDec
+// base address: 0x6f000
+#define mmSMUSVI0_TEL_PLANE0                                                                           0x520e
+#define mmSMUSVI0_TEL_PLANE0_BASE_IDX                                                                  1
+#define mmSMUSVI0_PLANE0_CURRENTVID                                                                    0x5217
+#define mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX                                                           1
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h
new file mode 100644
index 0000000000000..d10ae61c346b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_10_0_2_SH_MASK_HEADER
+
+// addressBlock: smuio_smuio_misc_SmuSmuioDec
+//SMUIO_MCM_CONFIG
+#define SMUIO_MCM_CONFIG__DIE_ID__SHIFT                                                                       0x0
+#define SMUIO_MCM_CONFIG__PKG_TYPE__SHIFT                                                                     0x2
+#define SMUIO_MCM_CONFIG__SOCKET_ID__SHIFT                                                                    0x5
+#define SMUIO_MCM_CONFIG__PKG_SUBTYPE__SHIFT                                                                  0x6
+#define SMUIO_MCM_CONFIG__CONSOLE_K__SHIFT                                                                    0x10
+#define SMUIO_MCM_CONFIG__CONSOLE_A__SHIFT                                                                    0x11
+#define SMUIO_MCM_CONFIG__DIE_ID_MASK                                                                         0x00000003L
+#define SMUIO_MCM_CONFIG__PKG_TYPE_MASK                                                                       0x0000001CL
+#define SMUIO_MCM_CONFIG__SOCKET_ID_MASK                                                                      0x00000020L
+#define SMUIO_MCM_CONFIG__PKG_SUBTYPE_MASK                                                                    0x000000C0L
+#define SMUIO_MCM_CONFIG__CONSOLE_K_MASK                                                                      0x00010000L
+#define SMUIO_MCM_CONFIG__CONSOLE_A_MASK                                                                      0x00020000L
+//IP_DISCOVERY_VERSION
+#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION__SHIFT                                                     0x0
+#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION_MASK                                                       0xFFFFFFFFL
+//IO_SMUIO_PINSTRAP
+#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN__SHIFT                                                               0x0
+#define IO_SMUIO_PINSTRAP__AUD__SHIFT                                                                         0x3
+#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN_MASK                                                                 0x00000007L
+#define IO_SMUIO_PINSTRAP__AUD_MASK                                                                           0x00000018L
+//SCRATCH_REGISTER0
+#define SCRATCH_REGISTER0__ScratchPad0__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER0__ScratchPad0_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER1
+#define SCRATCH_REGISTER1__ScratchPad1__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER1__ScratchPad1_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER2
+#define SCRATCH_REGISTER2__ScratchPad2__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER2__ScratchPad2_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER3
+#define SCRATCH_REGISTER3__ScratchPad3__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER3__ScratchPad3_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER4
+#define SCRATCH_REGISTER4__ScratchPad4__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER4__ScratchPad4_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER5
+#define SCRATCH_REGISTER5__ScratchPad5__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER5__ScratchPad5_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER6
+#define SCRATCH_REGISTER6__ScratchPad6__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER6__ScratchPad6_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER7
+#define SCRATCH_REGISTER7__ScratchPad7__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER7__ScratchPad7_MASK                                                                   0xFFFFFFFFL
+
+// addressBlock: smuio_smuio_reset_SmuSmuioDec
+//SMUIO_MP_RESET_INTR
+#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT                                                       0x0
+#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK                                                         0x00000001L
+//SMUIO_SOC_HALT
+#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN__SHIFT                                                             0x2
+#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN__SHIFT                                                            0x3
+#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN_MASK                                                               0x00000004L
+#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN_MASK                                                              0x00000008L
+//SMUIO_GFX_MISC_CNTL
+#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff__SHIFT                                                    0x0
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT                                                         0x1
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH__SHIFT                                                   0x3
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN__SHIFT                                                       0x4
+#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff_MASK                                                      0x00000001L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK                                                           0x00000006L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH_MASK                                                     0x00000008L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN_MASK                                                         0x00000010L
+
+// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec
+//PWROK_REFCLK_GAP_CYCLES
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles__SHIFT                                      0x0
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles__SHIFT                                     0x8
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles_MASK                                        0x000000FFL
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles_MASK                                       0x0000FF00L
+//GOLDEN_TSC_INCREMENT_UPPER
+#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper__SHIFT                                            0x0
+#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper_MASK                                              0x00FFFFFFL
+//GOLDEN_TSC_INCREMENT_LOWER
+#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower__SHIFT                                            0x0
+#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower_MASK                                              0xFFFFFFFFL
+//GOLDEN_TSC_COUNT_UPPER
+#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper__SHIFT                                                    0x0
+#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper_MASK                                                      0x00FFFFFFL
+//GOLDEN_TSC_COUNT_LOWER
+#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower__SHIFT                                                    0x0
+#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower_MASK                                                      0xFFFFFFFFL
+//GFX_GOLDEN_TSC_SHADOW_UPPER
+#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper__SHIFT                                           0x0
+#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper_MASK                                             0x00FFFFFFL
+//GFX_GOLDEN_TSC_SHADOW_LOWER
+#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower__SHIFT                                           0x0
+#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower_MASK                                             0xFFFFFFFFL
+//SOC_GOLDEN_TSC_SHADOW_UPPER
+#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper__SHIFT                                           0x0
+#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper_MASK                                             0x00FFFFFFL
+//SOC_GOLDEN_TSC_SHADOW_LOWER
+#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower__SHIFT                                           0x0
+#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower_MASK                                             0xFFFFFFFFL
+//SOC_GAP_PWROK
+#define SOC_GAP_PWROK__soc_gap_pwrok__SHIFT                                                                   0x0
+#define SOC_GAP_PWROK__soc_gap_pwrok_MASK                                                                     0x00000001L
+
+// addressBlock: smuio_smuio_swtimer_SmuSmuioDec
+//PWR_VIRT_RESET_REQ
+#define PWR_VIRT_RESET_REQ__VF_FLR__SHIFT                                                                     0x0
+#define PWR_VIRT_RESET_REQ__PF_FLR__SHIFT                                                                     0x1f
+#define PWR_VIRT_RESET_REQ__VF_FLR_MASK                                                                       0x7FFFFFFFL
+#define PWR_VIRT_RESET_REQ__PF_FLR_MASK                                                                       0x80000000L
+//PWR_DISP_TIMER_CONTROL
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT__SHIFT                                                   0x0
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT                                                  0x19
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT                                                 0x1a
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK__SHIFT                                                    0x1b
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT                                                 0x1c
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE__SHIFT                                                    0x1d
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE__SHIFT                                                    0x1e
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT_MASK                                                     0x01FFFFFFL
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE_MASK                                                    0x02000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE_MASK                                                   0x04000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK_MASK                                                      0x08000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK_MASK                                                   0x10000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE_MASK                                                      0x20000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE_MASK                                                      0x40000000L
+//PWR_DISP_TIMER2_CONTROL
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT__SHIFT                                                  0x0
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT                                                 0x19
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT                                                0x1a
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK__SHIFT                                                   0x1b
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT                                                0x1c
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE__SHIFT                                                   0x1d
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE__SHIFT                                                   0x1e
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT_MASK                                                    0x01FFFFFFL
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE_MASK                                                   0x02000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE_MASK                                                  0x04000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK_MASK                                                     0x08000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK_MASK                                                  0x10000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE_MASK                                                     0x20000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE_MASK                                                     0x40000000L
+//PWR_DISP_TIMER_GLOBAL_CONTROL
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH__SHIFT                                          0x0
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN__SHIFT                                             0xa
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH_MASK                                            0x000003FFL
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN_MASK                                               0x00000400L
+//PWR_IH_CONTROL
+#define PWR_IH_CONTROL__MAX_CREDIT__SHIFT                                                                     0x0
+#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK__SHIFT                                                        0x5
+#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK__SHIFT                                                       0x6
+#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN__SHIFT                                                             0x1f
+#define PWR_IH_CONTROL__MAX_CREDIT_MASK                                                                       0x0000001FL
+#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK_MASK                                                          0x00000020L
+#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK_MASK                                                         0x00000040L
+#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN_MASK                                                               0x80000000L
+
+// addressBlock: smuio_smuio_svi0_SmuSmuioDec
+//SMUSVI0_TEL_PLANE0
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR__SHIFT                                                         0x0
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR__SHIFT                                                         0x10
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR_MASK                                                           0x000000FFL
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR_MASK                                                           0x01FF0000L
+//SMUSVI0_PLANE0_CURRENTVID
+#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT                                             0x18
+#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK                                               0xFF000000L
+
+#endif
-- 
GitLab


From 6146081d58e3dd0c50ceb5a70a6906640727ff96 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Wed, 29 Nov 2023 12:37:34 +0530
Subject: [PATCH 0903/2340] drm/amdgpu: Add NULL checks for function pointers

Check if function is implemented before making the call.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 9ad4d6d3122b3..9043ebf1e161f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1424,9 +1424,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 
-	adev->nbio.funcs->get_clockgating_state(adev, flags);
+	if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+		adev->nbio.funcs->get_clockgating_state(adev, flags);
 
-	adev->hdp.funcs->get_clock_gating_state(adev, flags);
+	if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+		adev->hdp.funcs->get_clock_gating_state(adev, flags);
 
 	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) {
 		/* AMD_CG_SUPPORT_DRM_MGCG */
@@ -1441,9 +1443,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	}
 
 	/* AMD_CG_SUPPORT_ROM_MGCG */
-	adev->smuio.funcs->get_clock_gating_state(adev, flags);
+	if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+		adev->smuio.funcs->get_clock_gating_state(adev, flags);
 
-	adev->df.funcs->get_clockgating_state(adev, flags);
+	if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+		adev->df.funcs->get_clockgating_state(adev, flags);
 }
 
 static int soc15_common_set_powergating_state(void *handle,
-- 
GitLab


From 828afefd4b0636d92386d4b2c1f53fff87505aba Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Thu, 30 Nov 2023 15:58:21 +0530
Subject: [PATCH 0904/2340] drm/amdgpu: Update HDP 4.4.2 clock gating flags

HDP 4.4.2 clockgating is enabled by default, update the flags
accordingly.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 49e9349757197..4db6bb73ead42 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
 {
 	int data;
 
+	if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+		/* Default enabled */
+		*flags |= AMD_CG_SUPPORT_HDP_MGCG;
+		return;
+	}
 	/* AMD_CG_SUPPORT_HDP_LS */
 	data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
 	if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
-- 
GitLab


From b12fb2953915b092aaef956f6e80783fa70b9f40 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Thu, 30 Nov 2023 16:35:58 +0530
Subject: [PATCH 0905/2340] drm/amdgpu: Avoid querying DRM MGCG status

MP0 v13.0.6 SOCs don't support DRM MGCG.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 9043ebf1e161f..15033efec2bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1430,7 +1430,8 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
 		adev->hdp.funcs->get_clock_gating_state(adev, flags);
 
-	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) {
+	if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
+	    (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) {
 		/* AMD_CG_SUPPORT_DRM_MGCG */
 		data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
 		if (!(data & 0x01000000))
-- 
GitLab


From c03581986234044f2eeae308b7840e0083981034 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 29 Nov 2023 15:44:25 -0500
Subject: [PATCH 0906/2340] drm/amdgpu: fix buffer funcs setting order on
 suspend

We need to disable this after the last eviction
call, but before we disable the SDMA IP.

Fixes: b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level")
Link: https://lore.kernel.org/r/87edgv4x3i.fsf@vps.thesusis.net
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Tested-by: Phillip Susi <phill@thesusis.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Phillip Susi <phill@thesusis.net>
Cc: Luben Tuikov <ltuikov89@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d5b950fd1d852..138c7b37af698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4600,6 +4600,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (r)
 		return r;
 
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
 	amdgpu_fence_driver_hw_fini(adev);
 
 	amdgpu_device_ip_suspend_phase2(adev);
-- 
GitLab


From e17768691dd8d0664413de3123621daa0504054c Mon Sep 17 00:00:00 2001
From: Bokun Zhang <bokun.zhang@amd.com>
Date: Wed, 29 Nov 2023 19:11:22 -0500
Subject: [PATCH 0907/2340] drm/amd/amdgpu: SRIOV full reset issue with VCN

- After a full reset, VF's FB will be cleaned. This
  includes the VCN's fw_shared memory.

  However, there is no suspend-resume routine for
  SRIOV VF. Therefore, the data in the fw_shared
  memory will be lost forever and it causes engine
  hang later on.

  We must repopulate the data in fw_shared during
  SRIOV hw_init

Signed-off-by: Bokun Zhang <bokun.zhang@amd.com>
Reviewed-by: Emily Deng <Emily.Deng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 28b7f96b42e59..169ed400ee7b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1280,6 +1280,9 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
+		// Must re/init fw_shared at beginning
+		vcn_v4_0_fw_shared_init(adev, i);
+
 		table_size = 0;
 
 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
-- 
GitLab


From 1d3062fad9c7313fff9970a88e0538a24480ffb8 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Fri, 3 Nov 2023 15:14:03 +0200
Subject: [PATCH 0908/2340] drm/drm_file: fix use of uninitialized variable

smatch reports:

drivers/gpu/drm/drm_file.c:967 drm_show_memory_stats() error: uninitialized symbol 'supported_status'.

'supported_status' is only set in one code path. I'm not familiar with
the code to say if that path will always be ran in real life, but
whether that is the case or not, I think it is good to initialize
'supported_status' to 0 to silence the warning (and possibly fix a bug).

Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103-uninit-fixes-v2-1-c22b2444f5f5@ideasonboard.com
---
 drivers/gpu/drm/drm_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 987c1a5d1773b..8c87287c3e168 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -898,7 +898,7 @@ void drm_show_memory_stats(struct drm_printer *p, struct drm_file *file)
 {
 	struct drm_gem_object *obj;
 	struct drm_memory_stats status = {};
-	enum drm_gem_object_status supported_status;
+	enum drm_gem_object_status supported_status = 0;
 	int id;
 
 	spin_lock(&file->table_lock);
-- 
GitLab


From f9af8f0c1dc567a5a6a6318ff324c45d80d4a60f Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Fri, 3 Nov 2023 15:14:04 +0200
Subject: [PATCH 0909/2340] drm/framebuffer: Fix use of uninitialized variable

smatch reports:

drivers/gpu/drm/drm_framebuffer.c:654 drm_mode_getfb2_ioctl() error: uninitialized symbol 'ret'.

'ret' is possibly not set when there are no errors, causing the error
above. I can't say if that ever happens in real-life, but in any case I
think it is good to initialize 'ret' to 0.

Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103-uninit-fixes-v2-2-c22b2444f5f5@ideasonboard.com
---
 drivers/gpu/drm/drm_framebuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 09e289fca5c31..3cc0ffc28e862 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -583,7 +583,7 @@ int drm_mode_getfb2_ioctl(struct drm_device *dev,
 	struct drm_mode_fb_cmd2 *r = data;
 	struct drm_framebuffer *fb;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
-- 
GitLab


From 155d6fb61270dd297f128731cd155080deee8f3a Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Fri, 3 Nov 2023 15:14:05 +0200
Subject: [PATCH 0910/2340] drm/bridge: cdns-mhdp8546: Fix use of uninitialized
 variable

'ret' could be uninitialized at the end of the function, although it's
not clear if that can happen in practice.

Fixes: 6a3608eae6d3 ("drm: bridge: cdns-mhdp8546: Enable HDCP")
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103-uninit-fixes-v2-3-c22b2444f5f5@ideasonboard.com
---
 drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c
index 946212a955981..5e3b8edcf7948 100644
--- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c
+++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c
@@ -403,7 +403,8 @@ static int _cdns_mhdp_hdcp_disable(struct cdns_mhdp_device *mhdp)
 
 static int _cdns_mhdp_hdcp_enable(struct cdns_mhdp_device *mhdp, u8 content_type)
 {
-	int ret, tries = 3;
+	int ret = -EINVAL;
+	int tries = 3;
 	u32 i;
 
 	for (i = 0; i < tries; i++) {
-- 
GitLab


From 32bd29b619638256c5b75fb021d6d9f12fc4a984 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Fri, 3 Nov 2023 15:14:06 +0200
Subject: [PATCH 0911/2340] drm/bridge: tc358767: Fix return value on error
 case

If the hpd_pin is invalid, the driver returns 'ret'. But 'ret' contains
0, instead of an error value.

Return -EINVAL instead.

Fixes: f25ee5017e4f ("drm/bridge: tc358767: add IRQ and HPD support")
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103-uninit-fixes-v2-4-c22b2444f5f5@ideasonboard.com
---
 drivers/gpu/drm/bridge/tc358767.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index ef2e373606ba3..615cc8f950d7b 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -2273,7 +2273,7 @@ static int tc_probe(struct i2c_client *client)
 	} else {
 		if (tc->hpd_pin < 0 || tc->hpd_pin > 1) {
 			dev_err(dev, "failed to parse HPD number\n");
-			return ret;
+			return -EINVAL;
 		}
 	}
 
-- 
GitLab


From 90d50b8d85834e73536fdccd5aa913b30494fef0 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 21 Sep 2023 13:50:32 +0300
Subject: [PATCH 0912/2340] drm/mipi-dsi: Fix detach call without attach

It's been reported that DSI host driver's detach can be called without
the attach ever happening:

https://lore.kernel.org/all/20230412073954.20601-1-tony@atomide.com/

After reading the code, I think this is what happens:

We have a DSI host defined in the device tree and a DSI peripheral under
that host (i.e. an i2c device using the DSI as data bus doesn't exhibit
this behavior).

The host driver calls mipi_dsi_host_register(), which causes (via a few
functions) mipi_dsi_device_add() to be called for the DSI peripheral. So
now we have a DSI device under the host, but attach hasn't been called.

Normally the probing of the devices continues, and eventually the DSI
peripheral's driver will call mipi_dsi_attach(), attaching the
peripheral.

However, if the host driver's probe encounters an error after calling
mipi_dsi_host_register(), and before the peripheral has called
mipi_dsi_attach(), the host driver will do cleanups and return an error
from its probe function. The cleanups include calling
mipi_dsi_host_unregister().

mipi_dsi_host_unregister() will call two functions for all its DSI
peripheral devices: mipi_dsi_detach() and mipi_dsi_device_unregister().
The latter makes sense, as the device exists, but the former may be
wrong as attach has not necessarily been done.

To fix this, track the attached state of the peripheral, and only detach
from mipi_dsi_host_unregister() if the peripheral was attached.

Note that I have only tested this with a board with an i2c DSI
peripheral, not with a "pure" DSI peripheral.

However, slightly related, the unregister machinery still seems broken.
E.g. if the DSI host driver is unbound, it'll detach and unregister the
DSI peripherals. After that, when the DSI peripheral driver unbound
it'll call detach either directly or using the devm variant, leading to
a crash. And probably the driver will crash if it happens, for some
reason, to try to send a message via the DSI bus.

But that's another topic.

Tested-by: H. Nikolaus Schaller <hns@goldelico.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230921-dsi-detach-fix-v1-1-d0de2d1621d9@ideasonboard.com
---
 drivers/gpu/drm/drm_mipi_dsi.c | 17 +++++++++++++++--
 include/drm/drm_mipi_dsi.h     |  2 ++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 14201f73aab13..843a6dbda93a0 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -347,7 +347,8 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
 {
 	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
 
-	mipi_dsi_detach(dsi);
+	if (dsi->attached)
+		mipi_dsi_detach(dsi);
 	mipi_dsi_device_unregister(dsi);
 
 	return 0;
@@ -370,11 +371,18 @@ EXPORT_SYMBOL(mipi_dsi_host_unregister);
 int mipi_dsi_attach(struct mipi_dsi_device *dsi)
 {
 	const struct mipi_dsi_host_ops *ops = dsi->host->ops;
+	int ret;
 
 	if (!ops || !ops->attach)
 		return -ENOSYS;
 
-	return ops->attach(dsi->host, dsi);
+	ret = ops->attach(dsi->host, dsi);
+	if (ret)
+		return ret;
+
+	dsi->attached = true;
+
+	return 0;
 }
 EXPORT_SYMBOL(mipi_dsi_attach);
 
@@ -386,9 +394,14 @@ int mipi_dsi_detach(struct mipi_dsi_device *dsi)
 {
 	const struct mipi_dsi_host_ops *ops = dsi->host->ops;
 
+	if (WARN_ON(!dsi->attached))
+		return -EINVAL;
+
 	if (!ops || !ops->detach)
 		return -ENOSYS;
 
+	dsi->attached = false;
+
 	return ops->detach(dsi->host, dsi);
 }
 EXPORT_SYMBOL(mipi_dsi_detach);
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index c9df0407980c9..c0aec0d4d664e 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -168,6 +168,7 @@ struct mipi_dsi_device_info {
  * struct mipi_dsi_device - DSI peripheral device
  * @host: DSI host for this peripheral
  * @dev: driver model device node for this peripheral
+ * @attached: the DSI device has been successfully attached
  * @name: DSI peripheral chip type
  * @channel: virtual channel assigned to the peripheral
  * @format: pixel format for video mode
@@ -184,6 +185,7 @@ struct mipi_dsi_device_info {
 struct mipi_dsi_device {
 	struct mipi_dsi_host *host;
 	struct device dev;
+	bool attached;
 
 	char name[DSI_DEV_NAME_SIZE];
 	unsigned int channel;
-- 
GitLab


From dcdf1bbe82f4b2a301a3692a0b1942c3fda70644 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 1 Dec 2023 12:00:32 +0200
Subject: [PATCH 0913/2340] drm/i915: handle uncore spinlock when not available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The uncore code may not always be available (e.g. when we build the
display code with Xe), so we can't always rely on having the uncore's
spinlock.

To handle this, split the spin_lock/unlock_irqsave/restore() into
spin_lock/unlock() followed by a call to local_irq_save/restore() and
create wrapper functions for locking and unlocking the uncore's
spinlock.  In these functions, we have a condition check and only
actually try to lock/unlock the spinlock when I915 is defined, and
thus uncore is available.

This keeps the ifdefs contained in these new functions and all such
logic inside the display code.

Cc: Tvrtko Ursulin <tvrto.ursulin@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201100032.1367589-1-luciano.coelho@intel.com
---
 drivers/gpu/drm/i915/display/intel_vblank.c | 51 +++++++++++++++++----
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
index 2cec2abf97466..fe256bf7b485b 100644
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
@@ -265,6 +265,32 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline)
 	return (scanline + vtotal - crtc->scanline_offset) % vtotal;
 }
 
+/*
+ * The uncore version of the spin lock functions is used to decide
+ * whether we need to lock the uncore lock or not.  This is only
+ * needed in i915, not in Xe.
+ *
+ * This lock in i915 is needed because some old platforms (at least
+ * IVB and possibly HSW as well), which are not supported in Xe, need
+ * all register accesses to the same cacheline to be serialized,
+ * otherwise they may hang.
+ */
+static void intel_vblank_section_enter(struct drm_i915_private *i915)
+	__acquires(i915->uncore.lock)
+{
+#ifdef I915
+	spin_lock(&i915->uncore.lock);
+#endif
+}
+
+static void intel_vblank_section_exit(struct drm_i915_private *i915)
+	__releases(i915->uncore.lock)
+{
+#ifdef I915
+	spin_unlock(&i915->uncore.lock);
+#endif
+}
+
 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 				     bool in_vblank_irq,
 				     int *vpos, int *hpos,
@@ -302,11 +328,12 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 	}
 
 	/*
-	 * Lock uncore.lock, as we will do multiple timing critical raw
-	 * register reads, potentially with preemption disabled, so the
-	 * following code must not block on uncore.lock.
+	 * Enter vblank critical section, as we will do multiple
+	 * timing critical raw register reads, potentially with
+	 * preemption disabled, so the following code must not block.
 	 */
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	intel_vblank_section_enter(dev_priv);
 
 	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
 
@@ -374,7 +401,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 
 	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
 
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	intel_vblank_section_exit(dev_priv);
+	local_irq_restore(irqflags);
 
 	/*
 	 * While in vblank, position will be negative
@@ -412,9 +440,13 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	unsigned long irqflags;
 	int position;
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	intel_vblank_section_enter(dev_priv);
+
 	position = __intel_get_crtc_scanline(crtc);
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+
+	intel_vblank_section_exit(dev_priv);
+	local_irq_restore(irqflags);
 
 	return position;
 }
@@ -537,7 +569,7 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	 * Need to audit everything to make sure it's safe.
 	 */
 	spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags);
-	spin_lock(&i915->uncore.lock);
+	intel_vblank_section_enter(i915);
 
 	drm_calc_timestamping_constants(&crtc->base, &adjusted_mode);
 
@@ -546,7 +578,6 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	crtc->mode_flags = mode_flags;
 
 	crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state);
-
-	spin_unlock(&i915->uncore.lock);
+	intel_vblank_section_exit(i915);
 	spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags);
 }
-- 
GitLab


From 5908cbe82ef77f6019349c450d7f1c8b3c29bb0e Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 5 Dec 2023 13:13:34 -0700
Subject: [PATCH 0914/2340] usb: typec: nb7vpq904m: Only select DRM_AUX_BRIDGE
 with OF

CONFIG_DRM_AUX_BRIDGE depends on CONFIG_OF but that dependency is not
included when CONFIG_TYPEC_MUX_NB7VPQ904M selects it, resulting in a
Kconfig warning when CONFIG_OF is disabled:

  WARNING: unmet direct dependencies detected for DRM_AUX_BRIDGE
    Depends on [n]: HAS_IOMEM [=y] && DRM_BRIDGE [=y] && OF [=n]
    Selected by [y]:
    - TYPEC_MUX_NB7VPQ904M [=y] && USB_SUPPORT [=y] && TYPEC [=y] && I2C [=y] && (DRM [=y] || DRM [=y]=n) && DRM_BRIDGE [=y]

Only select CONFIG_DRM_AUX_BRIDGE with both CONFIG_DRM_BRIDGE and
CONFIG_OF to clear up the warning.

Fixes: c5d296bad640 ("usb: typec: nb7vpq904m: switch to DRM_AUX_BRIDGE")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205-drm_aux_bridge-fixes-v1-1-d242a0ae9df4@kernel.org
---
 drivers/usb/typec/mux/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/mux/Kconfig b/drivers/usb/typec/mux/Kconfig
index 5120942f309d4..38416fb0cc3ca 100644
--- a/drivers/usb/typec/mux/Kconfig
+++ b/drivers/usb/typec/mux/Kconfig
@@ -40,7 +40,7 @@ config TYPEC_MUX_NB7VPQ904M
 	tristate "On Semiconductor NB7VPQ904M Type-C redriver driver"
 	depends on I2C
 	depends on DRM || DRM=n
-	select DRM_AUX_BRIDGE if DRM_BRIDGE
+	select DRM_AUX_BRIDGE if DRM_BRIDGE && OF
 	select REGMAP_I2C
 	help
 	  Say Y or M if your system has a On Semiconductor NB7VPQ904M Type-C
-- 
GitLab


From 03c0343bdf8d43fee6dfe92a7b66308b60e9e77c Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 5 Dec 2023 13:13:35 -0700
Subject: [PATCH 0915/2340] usb: typec: qcom-pmic-typec: Only select
 DRM_AUX_HPD_BRIDGE with OF

CONFIG_DRM_AUX_HPD_BRIDGE depends on CONFIG_OF but that dependency is
not included when CONFIG_TYPEC_QCOM_PMIC selects it, resulting in a
Kconfig warning when CONFIG_OF is disabled:

  WARNING: unmet direct dependencies detected for DRM_AUX_HPD_BRIDGE
    Depends on [n]: HAS_IOMEM [=y] && DRM_BRIDGE [=y] && OF [=n]
    Selected by [m]:
    - TYPEC_QCOM_PMIC [=m] && USB_SUPPORT [=y] && TYPEC [=m] && TYPEC_TCPM [=m] && (ARCH_QCOM || COMPILE_TEST [=y]) && (DRM [=m] || DRM [=m]=n) && DRM_BRIDGE [=y]

Only select CONFIG_DRM_AUX_HPD_BRIDGE with both CONFIG_DRM_BRIDGE and
CONFIG_OF to clear up the warning.

Fixes: 7d9f1b72b296 ("usb: typec: qcom-pmic-typec: switch to DRM_AUX_HPD_BRIDGE")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205-drm_aux_bridge-fixes-v1-2-d242a0ae9df4@kernel.org
---
 drivers/usb/typec/tcpm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig
index 64d5421c69e69..8cdd84ca5d6f7 100644
--- a/drivers/usb/typec/tcpm/Kconfig
+++ b/drivers/usb/typec/tcpm/Kconfig
@@ -80,7 +80,7 @@ config TYPEC_QCOM_PMIC
 	tristate "Qualcomm PMIC USB Type-C Port Controller Manager driver"
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on DRM || DRM=n
-	select DRM_AUX_HPD_BRIDGE if DRM_BRIDGE
+	select DRM_AUX_HPD_BRIDGE if DRM_BRIDGE && OF
 	help
 	  A Type-C port and Power Delivery driver which aggregates two
 	  discrete pieces of silicon in the PM8150b PMIC block: the
-- 
GitLab


From 812cc1da7ffd9e178ef66b8a22113be10fba466c Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 5 Dec 2023 13:13:36 -0700
Subject: [PATCH 0916/2340] drm/bridge: Return NULL instead of plain 0 in
 drm_dp_hpd_bridge_register() stub

sparse complains:

  drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c: note: in included file:
  include/drm/bridge/aux-bridge.h:29:16: sparse: sparse: Using plain integer as NULL pointer

Return NULL to clear up the warning.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312060025.BdeqZrWx-lkp@intel.com/
Fixes: e560518a6c2e ("drm/bridge: implement generic DP HPD bridge")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205-drm_aux_bridge-fixes-v1-3-d242a0ae9df4@kernel.org
---
 include/drm/bridge/aux-bridge.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h
index 66249ff0858e7..c4c423e97f069 100644
--- a/include/drm/bridge/aux-bridge.h
+++ b/include/drm/bridge/aux-bridge.h
@@ -26,7 +26,7 @@ void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status sta
 static inline struct device *drm_dp_hpd_bridge_register(struct device *parent,
 							struct device_node *np)
 {
-	return 0;
+	return NULL;
 }
 
 static inline void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status)
-- 
GitLab


From 76385d493c2137460ee7735a5d3a494099c35188 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Tue, 5 Dec 2023 14:06:31 +0100
Subject: [PATCH 0917/2340] drm/debugfs: fix potential NULL pointer dereference

encoder->funcs entry might be NULL, so check it first before calling its
methods. This fixes NULL pointer dereference observed on Rasberry Pi
3b/4b boards.

Fixes: caf525ed45b4 ("drm/encoder: register per-encoder debugfs dir")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205130631.3456986-1-m.szyprowski@samsung.com
---
 drivers/gpu/drm/drm_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 02e7481758c0a..f4715a67e340d 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -638,7 +638,7 @@ void drm_debugfs_encoder_add(struct drm_encoder *encoder)
 	debugfs_create_file("bridges", 0444, root, encoder,
 			    &bridges_fops);
 
-	if (encoder->funcs->debugfs_init)
+	if (encoder->funcs && encoder->funcs->debugfs_init)
 		encoder->funcs->debugfs_init(encoder, root);
 }
 
-- 
GitLab


From 261200eb7030dc796f08c1ad778bd0b18b19451b Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 5 Dec 2023 14:15:45 +0200
Subject: [PATCH 0918/2340] drm/i915/rpm: add rpm_to_i915() helper around
 container_of()

Reduce the duplication.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205121545.2338665-1-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/intel_runtime_pm.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8743153fad878..156cb1536b473 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -50,6 +50,11 @@
  * present for a given platform.
  */
 
+static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm)
+{
+	return container_of(rpm, struct drm_i915_private, runtime_pm);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 
 #include <linux/sort.h>
@@ -349,9 +354,7 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
 static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
 					      bool wakelock)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	int ret;
 
 	ret = pm_runtime_get_sync(rpm->kdev);
@@ -556,9 +559,7 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
  */
 void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct device *kdev = rpm->kdev;
 
 	/*
@@ -611,9 +612,7 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct device *kdev = rpm->kdev;
 
 	/* Transfer rpm ownership back to core */
@@ -628,9 +627,7 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	int count = atomic_read(&rpm->wakeref_count);
 
 	intel_wakeref_auto_fini(&rpm->userfault_wakeref);
@@ -645,8 +642,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 =
-			container_of(rpm, struct drm_i915_private, runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	struct device *kdev = &pdev->dev;
 
-- 
GitLab


From 922181a52de923a2220998a26d84d94889dd6e97 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 5 Dec 2023 15:41:41 +0200
Subject: [PATCH 0919/2340] drm/i915: use intel_connector in
 intel_connector_debugfs_add()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prefer struct intel_connector over struct drm_connector.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205134143.2427661-1-jani.nikula@intel.com
---
 .../drm/i915/display/intel_display_debugfs.c  | 53 +++++++++----------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index a7675a7339c34..32539dfd506e4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -1566,59 +1566,58 @@ DEFINE_SHOW_ATTRIBUTE(intel_crtc_pipe);
 
 /**
  * intel_connector_debugfs_add - add i915 specific connector debugfs files
- * @intel_connector: pointer to a registered drm_connector
+ * @connector: pointer to a registered intel_connector
  *
  * Cleanup will be done by drm_connector_unregister() through a call to
  * drm_debugfs_connector_remove().
  */
-void intel_connector_debugfs_add(struct intel_connector *intel_connector)
+void intel_connector_debugfs_add(struct intel_connector *connector)
 {
-	struct drm_connector *connector = &intel_connector->base;
-	struct dentry *root = connector->debugfs_entry;
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct dentry *root = connector->base.debugfs_entry;
+	int connector_type = connector->base.connector_type;
 
 	/* The connector must have been registered beforehands. */
 	if (!root)
 		return;
 
-	intel_drrs_connector_debugfs_add(intel_connector);
-	intel_psr_connector_debugfs_add(intel_connector);
+	intel_drrs_connector_debugfs_add(connector);
+	intel_psr_connector_debugfs_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+	if (connector_type == DRM_MODE_CONNECTOR_eDP)
 		debugfs_create_file("i915_panel_timings", S_IRUGO, root,
-				    connector, &i915_panel_fops);
+				    &connector->base, &i915_panel_fops);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) {
+	if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
 		debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root,
-				    connector, &i915_hdcp_sink_capability_fops);
+				    &connector->base, &i915_hdcp_sink_capability_fops);
 	}
 
-	if (DISPLAY_VER(dev_priv) >= 11 &&
-	    ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-	    !to_intel_connector(connector)->mst_port) ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+	if (DISPLAY_VER(i915) >= 11 &&
+	    ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) ||
+	     connector_type == DRM_MODE_CONNECTOR_eDP)) {
 		debugfs_create_file("i915_dsc_fec_support", 0644, root,
-				    connector, &i915_dsc_fec_support_fops);
+				    &connector->base, &i915_dsc_fec_support_fops);
 
 		debugfs_create_file("i915_dsc_bpc", 0644, root,
-				    connector, &i915_dsc_bpc_fops);
+				    &connector->base, &i915_dsc_bpc_fops);
 
 		debugfs_create_file("i915_dsc_output_format", 0644, root,
-				    connector, &i915_dsc_output_format_fops);
+				    &connector->base, &i915_dsc_output_format_fops);
 
 		debugfs_create_file("i915_dsc_fractional_bpp", 0644, root,
-				    connector, &i915_dsc_fractional_bpp_fops);
+				    &connector->base, &i915_dsc_fractional_bpp_fops);
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
+	if (connector_type == DRM_MODE_CONNECTOR_DSI ||
+	    connector_type == DRM_MODE_CONNECTOR_eDP ||
+	    connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB)
 		debugfs_create_file("i915_lpsp_capability", 0444, root,
-				    connector, &i915_lpsp_capability_fops);
+				    &connector->base, &i915_lpsp_capability_fops);
 }
 
 /**
-- 
GitLab


From 77bdb83f0dbc8dd64c07bba08ecd2ac83030a508 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 5 Dec 2023 15:41:42 +0200
Subject: [PATCH 0920/2340] drm/i915: pass struct intel_connector to connector
 debugfs fops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prefer struct intel_connector over struct drm_connector, and unify the
declarations in the fops.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205134143.2427661-2-jani.nikula@intel.com
---
 .../drm/i915/display/intel_display_debugfs.c  | 134 +++++++++---------
 1 file changed, 66 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 32539dfd506e4..85e5e21a859c1 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -1117,11 +1117,10 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 
 static int i915_panel_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct intel_dp *intel_dp =
-		intel_attached_dp(to_intel_connector(connector));
+	struct intel_connector *connector = m->private;
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return -ENODEV;
 
 	seq_printf(m, "Panel power up delay: %d\n",
@@ -1139,23 +1138,23 @@ DEFINE_SHOW_ATTRIBUTE(i915_panel);
 
 static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_i915_private *i915 = to_i915(connector->dev);
-	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	int ret;
 
 	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	if (!connector->encoder || connector->status != connector_status_connected) {
+	if (!connector->base.encoder ||
+	    connector->base.status != connector_status_connected) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	seq_printf(m, "%s:%d HDCP version: ", connector->name,
-		   connector->base.id);
-	intel_hdcp_info(m, intel_connector);
+	seq_printf(m, "%s:%d HDCP version: ", connector->base.name,
+		   connector->base.base.id);
+	intel_hdcp_info(m, connector);
 
 out:
 	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
@@ -1166,16 +1165,16 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability);
 
 static int i915_lpsp_capability_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_i915_private *i915 = to_i915(connector->dev);
-	struct intel_encoder *encoder;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	int connector_type = connector->base.connector_type;
 	bool lpsp_capable = false;
 
-	encoder = intel_attached_encoder(to_intel_connector(connector));
 	if (!encoder)
 		return -ENODEV;
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return -ENODEV;
 
 	if (DISPLAY_VER(i915) >= 13)
@@ -1188,15 +1187,15 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data)
 		 */
 		lpsp_capable = encoder->port <= PORT_B;
 	else if (DISPLAY_VER(i915) == 11)
-		lpsp_capable = (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-				connector->connector_type == DRM_MODE_CONNECTOR_eDP);
+		lpsp_capable = (connector_type == DRM_MODE_CONNECTOR_DSI ||
+				connector_type == DRM_MODE_CONNECTOR_eDP);
 	else if (IS_DISPLAY_VER(i915, 9, 10))
 		lpsp_capable = (encoder->port == PORT_A &&
-				(connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-				 connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
-				 connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort));
+				(connector_type == DRM_MODE_CONNECTOR_DSI ||
+				 connector_type == DRM_MODE_CONNECTOR_eDP ||
+				 connector_type == DRM_MODE_CONNECTOR_DisplayPort));
 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-		lpsp_capable = connector->connector_type == DRM_MODE_CONNECTOR_eDP;
+		lpsp_capable = connector_type == DRM_MODE_CONNECTOR_eDP;
 
 	seq_printf(m, "LPSP: %s\n", lpsp_capable ? "capable" : "incapable");
 
@@ -1206,7 +1205,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability);
 
 static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 {
-	struct intel_connector *connector = to_intel_connector(m->private);
+	struct intel_connector *connector = m->private;
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct drm_crtc *crtc;
 	struct intel_dp *intel_dp;
@@ -1276,13 +1275,13 @@ static ssize_t i915_dsc_fec_support_write(struct file *file,
 					  const char __user *ubuf,
 					  size_t len, loff_t *offp)
 {
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	bool dsc_enable = false;
 	int ret;
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	if (len == 0)
 		return 0;
@@ -1320,22 +1319,22 @@ static const struct file_operations i915_dsc_fec_support_fops = {
 
 static int i915_dsc_bpc_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1343,7 +1342,7 @@ static int i915_dsc_bpc_show(struct seq_file *m, void *data)
 	crtc_state = to_intel_crtc_state(crtc->state);
 	seq_printf(m, "Input_BPC: %d\n", crtc_state->dsc.config.bits_per_component);
 
-out:	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+out:	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1352,9 +1351,9 @@ static ssize_t i915_dsc_bpc_write(struct file *file,
 				  const char __user *ubuf,
 				  size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int dsc_bpc = 0;
 	int ret;
@@ -1386,22 +1385,22 @@ static const struct file_operations i915_dsc_bpc_fops = {
 
 static int i915_dsc_output_format_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1410,7 +1409,7 @@ static int i915_dsc_output_format_show(struct seq_file *m, void *data)
 	seq_printf(m, "DSC_Output_Format: %s\n",
 		   intel_output_format_name(crtc_state->output_format));
 
-out:	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+out:	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1419,9 +1418,9 @@ static ssize_t i915_dsc_output_format_write(struct file *file,
 					    const char __user *ubuf,
 					    size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int dsc_output_format = 0;
 	int ret;
@@ -1453,33 +1452,32 @@ static const struct file_operations i915_dsc_output_format_fops = {
 
 static int i915_dsc_fractional_bpp_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_dp *intel_dp;
-	struct intel_connector *intel_connector = to_intel_connector(connector);
-	struct intel_encoder *encoder = intel_attached_encoder(intel_connector);
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	intel_dp = intel_attached_dp(intel_connector);
+	intel_dp = intel_attached_dp(connector);
 	seq_printf(m, "Force_DSC_Fractional_BPP_Enable: %s\n",
 		   str_yes_no(intel_dp->force_dsc_fractional_bpp_en));
 
 out:
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1488,10 +1486,10 @@ static ssize_t i915_dsc_fractional_bpp_write(struct file *file,
 					     const char __user *ubuf,
 					     size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	bool dsc_fractional_bpp_enable = false;
 	int ret;
@@ -1586,29 +1584,29 @@ void intel_connector_debugfs_add(struct intel_connector *connector)
 
 	if (connector_type == DRM_MODE_CONNECTOR_eDP)
 		debugfs_create_file("i915_panel_timings", S_IRUGO, root,
-				    &connector->base, &i915_panel_fops);
+				    connector, &i915_panel_fops);
 
 	if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
 	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
 	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
 		debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root,
-				    &connector->base, &i915_hdcp_sink_capability_fops);
+				    connector, &i915_hdcp_sink_capability_fops);
 	}
 
 	if (DISPLAY_VER(i915) >= 11 &&
 	    ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) ||
 	     connector_type == DRM_MODE_CONNECTOR_eDP)) {
 		debugfs_create_file("i915_dsc_fec_support", 0644, root,
-				    &connector->base, &i915_dsc_fec_support_fops);
+				    connector, &i915_dsc_fec_support_fops);
 
 		debugfs_create_file("i915_dsc_bpc", 0644, root,
-				    &connector->base, &i915_dsc_bpc_fops);
+				    connector, &i915_dsc_bpc_fops);
 
 		debugfs_create_file("i915_dsc_output_format", 0644, root,
-				    &connector->base, &i915_dsc_output_format_fops);
+				    connector, &i915_dsc_output_format_fops);
 
 		debugfs_create_file("i915_dsc_fractional_bpp", 0644, root,
-				    &connector->base, &i915_dsc_fractional_bpp_fops);
+				    connector, &i915_dsc_fractional_bpp_fops);
 	}
 
 	if (connector_type == DRM_MODE_CONNECTOR_DSI ||
@@ -1617,7 +1615,7 @@ void intel_connector_debugfs_add(struct intel_connector *connector)
 	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
 	    connector_type == DRM_MODE_CONNECTOR_HDMIB)
 		debugfs_create_file("i915_lpsp_capability", 0444, root,
-				    &connector->base, &i915_lpsp_capability_fops);
+				    connector, &i915_lpsp_capability_fops);
 }
 
 /**
-- 
GitLab


From f270b7087dc8369d21018541157a270a023e7f21 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 5 Dec 2023 15:41:43 +0200
Subject: [PATCH 0921/2340] drm/i915: use octal permissions in display debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Octal permissions are preferred over the symbolics ones.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205134143.2427661-3-jani.nikula@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_debugfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 85e5e21a859c1..0cb819fc2a0cf 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -1096,7 +1096,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 
 	for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) {
 		debugfs_create_file(intel_display_debugfs_files[i].name,
-				    S_IRUGO | S_IWUSR,
+				    0644,
 				    minor->debugfs_root,
 				    to_i915(minor->dev),
 				    intel_display_debugfs_files[i].fops);
@@ -1583,13 +1583,13 @@ void intel_connector_debugfs_add(struct intel_connector *connector)
 	intel_psr_connector_debugfs_add(connector);
 
 	if (connector_type == DRM_MODE_CONNECTOR_eDP)
-		debugfs_create_file("i915_panel_timings", S_IRUGO, root,
+		debugfs_create_file("i915_panel_timings", 0444, root,
 				    connector, &i915_panel_fops);
 
 	if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
 	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
 	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
-		debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root,
+		debugfs_create_file("i915_hdcp_sink_capability", 0444, root,
 				    connector, &i915_hdcp_sink_capability_fops);
 	}
 
-- 
GitLab


From 7054b551de18e9875fbdf8d4f3baade428353545 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Wed, 25 Oct 2023 12:11:31 +0200
Subject: [PATCH 0922/2340] drm/i915/display: Use
 i915_gem_object_get_dma_address to get dma address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Works better for xe like that. obj is no longer const.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204134946.16219-1-maarten.lankhorst@linux.intel.com
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cursor.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index a515ae2831f80..926e2de00eb58 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -24,6 +24,8 @@
 #include "intel_psr_regs.h"
 #include "skl_watermark.h"
 
+#include "gem/i915_gem_object.h"
+
 /* Cursor formats */
 static const u32 intel_cursor_formats[] = {
 	DRM_FORMAT_ARGB8888,
@@ -34,11 +36,11 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
 	struct drm_i915_private *dev_priv =
 		to_i915(plane_state->uapi.plane->dev);
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
-	const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 base;
 
 	if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
-		base = sg_dma_address(obj->mm.pages->sgl);
+		base = i915_gem_object_get_dma_address(obj, 0);
 	else
 		base = intel_plane_ggtt_offset(plane_state);
 
-- 
GitLab


From 0647ece3819b018cb62a71c3bcb7c2c3243e78ac Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 1 Dec 2023 12:21:08 +0000
Subject: [PATCH 0923/2340] drm/i915/selftests: Fix engine reset count storage
 for multi-tile

Engine->id namespace is per-tile so struct igt_live_test->reset_engine[]
needs to be two-dimensional so engine reset counts from all tiles can be
stored with no aliasing. With aliasing, if we had a real multi-tile
platform, the reset counts would be incorrect for same engine instance on
different tiles.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 0c29efa23f5c ("drm/i915/selftests: Consider multi-gt instead of to_gt()")
Reported-by: Alan Previn Teres Alexis <alan.previn.teres.alexis@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-1-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/selftests/igt_live_test.c | 9 +++++----
 drivers/gpu/drm/i915/selftests/igt_live_test.h | 3 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 4ddc6d902752a..7d41874a49c58 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t,
 		}
 
 		for_each_engine(engine, gt, id)
-			t->reset_engine[id] =
-			i915_reset_engine_count(&i915->gpu_error, engine);
+			t->reset_engine[i][id] =
+				i915_reset_engine_count(&i915->gpu_error,
+							engine);
 	}
 
 	t->reset_global = i915_reset_count(&i915->gpu_error);
@@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t)
 
 	for_each_gt(gt, i915, i) {
 		for_each_engine(engine, gt, id) {
-			if (t->reset_engine[id] ==
+			if (t->reset_engine[i][id] ==
 			    i915_reset_engine_count(&i915->gpu_error, engine))
 				continue;
 
 			gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n",
 			       t->func, t->name, engine->name,
 			       i915_reset_engine_count(&i915->gpu_error, engine) -
-			       t->reset_engine[id]);
+			       t->reset_engine[i][id]);
 			return -EIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h
index 36ed42736c521..83e3ad430922f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -7,6 +7,7 @@
 #ifndef IGT_LIVE_TEST_H
 #define IGT_LIVE_TEST_H
 
+#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */
 #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */
 
 struct drm_i915_private;
@@ -17,7 +18,7 @@ struct igt_live_test {
 	const char *name;
 
 	unsigned int reset_global;
-	unsigned int reset_engine[I915_NUM_ENGINES];
+	unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES];
 };
 
 /*
-- 
GitLab


From cf9cb028ac56696ff879af1154c4b2f0b12701fd Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 1 Dec 2023 12:21:09 +0000
Subject: [PATCH 0924/2340] drm/i915: Use internal class when counting engine
 resets

Commit 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class")
made the GSC0 engine not have a valid uabi class and so broke the engine
reset counting, which in turn was made class based in cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets").

Despite the title and commit text of the latter is not mentioning it (and
has left the storage array incorrectly sized), tracking by class, despite
it adding aliasing in hypthotetical multi-tile systems, is handy for
virtual engines which for instance do not have a valid engine->id.

Therefore we keep that but just change it to use the internal class which
is always valid. We also add a helper to increment the count, which
aligns with the existing getter.

What was broken without this fix were out of bounds reads every time a
reset would happen on the GSC0 engine, or during selftests when storing
and cross-checking the counts in igt_live_test_begin and
igt_live_test_end.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: dfed6b58d54f ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class")
[tursulin: fixed Fixes tag]
Reported-by: Alan Previn Teres Alexis <alan.previn.teres.alexis@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-2-tvrtko.ursulin@linux.intel.com
---
 drivers/gpu/drm/i915/gt/intel_reset.c             |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  5 +++--
 drivers/gpu/drm/i915/i915_gpu_error.h             | 12 ++++++++++--
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index d5ed904f355d5..6801f8b95c53d 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 	if (msg)
 		drm_notice(&engine->i915->drm,
 			   "Resetting %s for %s\n", engine->name, msg);
-	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
+	i915_increase_reset_engine_count(&engine->i915->gpu_error, engine);
 
 	ret = intel_gt_reset_engine(engine);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 04f8377fd7a39..58ea285c51d4c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -5003,7 +5003,8 @@ static void capture_error_state(struct intel_guc *guc,
 			if (match) {
 				intel_engine_set_hung_context(e, ce);
 				engine_mask |= e->mask;
-				atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]);
+				i915_increase_reset_engine_count(&i915->gpu_error,
+								 e);
 			}
 		}
 
@@ -5015,7 +5016,7 @@ static void capture_error_state(struct intel_guc *guc,
 	} else {
 		intel_engine_set_hung_context(ce->engine, ce);
 		engine_mask = ce->engine->mask;
-		atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]);
+		i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
 	}
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 68c964d6720a8..b75052e69a2b7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -16,6 +16,7 @@
 
 #include "display/intel_display_device.h"
 #include "gt/intel_engine.h"
+#include "gt/intel_engine_types.h"
 #include "gt/intel_gt_types.h"
 #include "gt/uc/intel_uc_fw.h"
 
@@ -232,7 +233,7 @@ struct i915_gpu_error {
 	atomic_t reset_count;
 
 	/** Number of times an engine has been reset */
-	atomic_t reset_engine_count[I915_NUM_ENGINES];
+	atomic_t reset_engine_count[MAX_ENGINE_CLASS];
 };
 
 struct drm_i915_error_state_buf {
@@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 					  const struct intel_engine_cs *engine)
 {
-	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
+	return atomic_read(&error->reset_engine_count[engine->class]);
+}
+
+static inline void
+i915_increase_reset_engine_count(struct i915_gpu_error *error,
+				 const struct intel_engine_cs *engine)
+{
+	atomic_inc(&error->reset_engine_count[engine->class]);
 }
 
 #define CORE_DUMP_FLAG_NONE           0x0
-- 
GitLab


From 10690b8a49bceafb1badf0ad91842a359e796d8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Thu, 7 Dec 2023 10:34:51 +0200
Subject: [PATCH 0925/2340] drm/i915/display: Add intel_fb_bo_framebuffer_fini
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe needs intel_fb_bo_framebuffer_fini for taking care of unpinning the fb
and taking reference.  In i915 this can be empty.

Also move intel_frontbuffer_get to be done after
intel_fb_bo_framebuffer_init to have reasonable sequences:

intel_fb_bo_framebuffer_init
intel_frontbuffer_get
...
intel_frontbuffer_put
intel_fb_bo_framebuffer_fini

v2: Empty function instead of define

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207083451.2184562-1-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_fb.c    | 32 +++++++++++++---------
 drivers/gpu/drm/i915/display/intel_fb_bo.c |  5 ++++
 drivers/gpu/drm/i915/display/intel_fb_bo.h |  2 ++
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 32a7ee1c5b343..4f9d2e57a770a 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -1889,6 +1889,8 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	intel_frontbuffer_put(intel_fb->frontbuffer);
 
+	intel_fb_bo_framebuffer_fini(intel_fb_obj(fb));
+
 	kfree(intel_fb);
 }
 
@@ -1989,13 +1991,15 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	int ret = -EINVAL;
 	int i;
 
-	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
-	if (!intel_fb->frontbuffer)
-		return -ENOMEM;
-
 	ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd);
 	if (ret)
+		return ret;
+
+	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
+	if (!intel_fb->frontbuffer) {
+		ret = -ENOMEM;
 		goto err;
+	}
 
 	ret = -EINVAL;
 	if (!drm_any_plane_has_format(&dev_priv->drm,
@@ -2004,7 +2008,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		drm_dbg_kms(&dev_priv->drm,
 			    "unsupported pixel format %p4cc / modifier 0x%llx\n",
 			    &mode_cmd->pixel_format, mode_cmd->modifier[0]);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format,
@@ -2015,7 +2019,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			    mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ?
 			    "tiled" : "linear",
 			    mode_cmd->pitches[0], max_stride);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
@@ -2023,7 +2027,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		drm_dbg_kms(&dev_priv->drm,
 			    "plane 0 offset (0x%08x) must be 0\n",
 			    mode_cmd->offsets[0]);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
@@ -2034,7 +2038,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
 			drm_dbg_kms(&dev_priv->drm, "bad plane %d handle\n",
 				    i);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		stride_alignment = intel_fb_stride_alignment(fb, i);
@@ -2042,7 +2046,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			drm_dbg_kms(&dev_priv->drm,
 				    "plane %d pitch (%d) must be at least %u byte aligned\n",
 				    i, fb->pitches[i], stride_alignment);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) {
@@ -2053,7 +2057,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 					    "ccs aux plane %d pitch (%d) must be %d\n",
 					    i,
 					    fb->pitches[i], ccs_aux_stride);
-				goto err;
+				goto err_frontbuffer_put;
 			}
 		}
 
@@ -2062,7 +2066,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 	ret = intel_fill_fb_info(dev_priv, intel_fb);
 	if (ret)
-		goto err;
+		goto err_frontbuffer_put;
 
 	if (intel_fb_uses_dpt(fb)) {
 		struct i915_address_space *vm;
@@ -2071,7 +2075,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		if (IS_ERR(vm)) {
 			drm_dbg_kms(&dev_priv->drm, "failed to create DPT\n");
 			ret = PTR_ERR(vm);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		intel_fb->dpt_vm = vm;
@@ -2088,8 +2092,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 err_free_dpt:
 	if (intel_fb_uses_dpt(fb))
 		intel_dpt_destroy(intel_fb->dpt_vm);
-err:
+err_frontbuffer_put:
 	intel_frontbuffer_put(intel_fb->frontbuffer);
+err:
+	intel_fb_bo_framebuffer_fini(obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c
index ce86eff7b226a..4be09541e5095 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c
@@ -11,6 +11,11 @@
 #include "intel_fb.h"
 #include "intel_fb_bo.h"
 
+void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj)
+{
+	/* Nothing to do for i915 */
+}
+
 int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
 				 struct drm_i915_gem_object *obj,
 				 struct drm_mode_fb_cmd2 *mode_cmd)
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h
index dd06ceec8601b..232bf898b0138 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_bo.h
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h
@@ -12,6 +12,8 @@ struct drm_i915_gem_object;
 struct drm_i915_private;
 struct intel_framebuffer;
 
+void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj);
+
 int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
 				 struct drm_i915_gem_object *obj,
 				 struct drm_mode_fb_cmd2 *mode_cmd);
-- 
GitLab


From 3e743b0fcb90551106c13837548079b91d661384 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 12 Apr 2023 14:46:33 +0800
Subject: [PATCH 0926/2340] drm/mediatek: Use devm_platform_ioremap_resource()

Remove variable 'res' and convert platform_get_resource(),
devm_ioremap_resource() to a single call to
devm_platform_ioremap_resource(), as this is exactly what this function
does.

Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Reviewed-by: Alexandre Mergnat <amergnat@baylibre.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20230412064635.41315-1-yang.lee@linux.alibaba.com/
Link: https://patchwork.kernel.org/project/dri-devel/patch/20230412064635.41315-2-yang.lee@linux.alibaba.com/
Link: https://patchwork.kernel.org/project/dri-devel/patch/20230412064635.41315-3-yang.lee@linux.alibaba.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_cec.c        | 4 +---
 drivers/gpu/drm/mediatek/mtk_disp_aal.c   | 4 +---
 drivers/gpu/drm/mediatek/mtk_disp_ccorr.c | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
index f47f417d8ba67..8519e9bade361 100644
--- a/drivers/gpu/drm/mediatek/mtk_cec.c
+++ b/drivers/gpu/drm/mediatek/mtk_cec.c
@@ -185,7 +185,6 @@ static int mtk_cec_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_cec *cec;
-	struct resource *res;
 	int ret;
 
 	cec = devm_kzalloc(dev, sizeof(*cec), GFP_KERNEL);
@@ -195,8 +194,7 @@ static int mtk_cec_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, cec);
 	spin_lock_init(&cec->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	cec->regs = devm_ioremap_resource(dev, res);
+	cec->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(cec->regs)) {
 		ret = PTR_ERR(cec->regs);
 		dev_err(dev, "Failed to ioremap cec: %d\n", ret);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_aal.c b/drivers/gpu/drm/mediatek/mtk_disp_aal.c
index 2209159d88557..40fe403086c3d 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_aal.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_aal.c
@@ -168,7 +168,6 @@ static int mtk_disp_aal_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_disp_aal *priv;
-	struct resource *res;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -181,8 +180,7 @@ static int mtk_disp_aal_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = devm_ioremap_resource(dev, res);
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "failed to ioremap aal\n");
 		return PTR_ERR(priv->regs);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
index 4234ff7485e88..465cddce0d324 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
@@ -153,7 +153,6 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_disp_ccorr *priv;
-	struct resource *res;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -166,8 +165,7 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = devm_ioremap_resource(dev, res);
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "failed to ioremap ccorr\n");
 		return PTR_ERR(priv->regs);
-- 
GitLab


From a85607e3cfc5032daf64efc5f6b5387f2271b073 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 30 Nov 2023 23:21:51 -0500
Subject: [PATCH 0927/2340] drm/doc/rfc: Mark long running workload as
 complete.

No DRM scheduler changes required, drivers just return NULL in run_job
vfunc.

The rough consensus is that no helper or extra scaffolding is needed
around long-running jobs and no further changes to drm-scheduler.

At least for now. Other drivers that currently do long-running workloads
have no plat to use drm-scheduler. Besides, the current consensus is
that this solution of simply returning NULL to the run_job function should
work without extra code duplication or complication.

On top of that, this item was already a non-blocking one for upstreaming Xe,
so let's move that to the 'Completed' section and revisit the long-running
solution as a community after Xe is integrated in DRM.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201042158.80009-2-rodrigo.vivi@intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/rfc/xe.rst | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index ceb21219d52ec..2a51323276178 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -132,21 +132,6 @@ the time comes.
 The DRM GPUVM helpers do not yet include the userptr parts, but discussions
 about implementing them are ongoing.
 
-Long running compute: minimal data structure/scaffolding
---------------------------------------------------------
-The generic scheduler code needs to include the handling of endless compute
-contexts, with the minimal scaffolding for preempt-ctx fences (probably on the
-drm_sched_entity) and making sure drm_scheduler can cope with the lack of job
-completion fence.
-
-The goal is to achieve a consensus ahead of Xe initial pull-request, ideally with
-this minimal drm/scheduler work, if needed, merged to drm-misc in a way that any
-drm driver, including Xe, could re-use and add their own individual needs on top
-in a next stage. However, this should not block the initial merge.
-
-This is a non-blocker item since the driver without the support for the long
-running compute enabled is not a showstopper.
-
 Display integration with i915
 -----------------------------
 In order to share the display code with the i915 driver so that there is maximum
@@ -184,6 +169,18 @@ Xe – uAPI high level overview
 Xe – Pre-Merge Goals - Completed
 ================================
 
+Long running compute: minimal data structure/scaffolding
+--------------------------------------------------------
+The generic scheduler code needs to include the handling of endless compute
+contexts, with the minimal scaffolding for preempt-ctx fences (probably on the
+drm_sched_entity) and making sure drm_scheduler can cope with the lack of job
+completion fence.
+
+The goal is to achieve a consensus ahead of Xe initial pull-request, ideally with
+this minimal drm/scheduler work, if needed, merged to drm-misc in a way that any
+drm driver, including Xe, could re-use and add their own individual needs on top
+in a next stage. However, this should not block the initial merge.
+
 Dev_coredump
 ------------
 
-- 
GitLab


From 0e2e6c49c1c072dff841d9dfab3b4966abb2edf1 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 30 Nov 2023 23:21:52 -0500
Subject: [PATCH 0928/2340] drm/doc/rfc: Mark drm_scheduler as completed

Current drm-xe-next doesn't have any drm/scheduler patch that is not
already accepted in drm-misc-next. This completed this goal with
the consensus of how the drm/scheduler fits to the fw scheduling and
the relationship between drm_gpu_scheduler and drm_sched_entity.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201042158.80009-3-rodrigo.vivi@intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/rfc/xe.rst | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index 2a51323276178..9f3afab6c06ac 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -70,24 +70,6 @@ When the time comes for Xe, the protection will be lifted on Xe and kept in i915
 Xe – Pre-Merge Goals - Work-in-Progress
 =======================================
 
-Drm_scheduler
--------------
-Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
-drm_scheduler as the scheduler ‘frontend’ for userspace submission in order to
-resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
-not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
-drm_sched_entity.
-
-Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
-some consensus needs to be reached between Xe and other community drivers that
-could also benefit from this work, for coupling FW based/assisted submission such
-as the ARM’s new Mali GPU driver, and others.
-
-As a key measurable result, the patch series introducing Xe itself shall not
-depend on any other patch touching drm_scheduler itself that was not yet merged
-through drm-misc. This, by itself, already includes the reach of an agreement for
-uniform 1 to 1 relationship implementation / usage across drivers.
-
 ASYNC VM_BIND
 -------------
 Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
@@ -169,6 +151,24 @@ Xe – uAPI high level overview
 Xe – Pre-Merge Goals - Completed
 ================================
 
+Drm_scheduler
+-------------
+Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
+drm_scheduler as the scheduler ‘frontend’ for userspace submission in order to
+resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
+not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
+drm_sched_entity.
+
+Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
+some consensus needs to be reached between Xe and other community drivers that
+could also benefit from this work, for coupling FW based/assisted submission such
+as the ARM’s new Mali GPU driver, and others.
+
+As a key measurable result, the patch series introducing Xe itself shall not
+depend on any other patch touching drm_scheduler itself that was not yet merged
+through drm-misc. This, by itself, already includes the reach of an agreement for
+uniform 1 to 1 relationship implementation / usage across drivers.
+
 Long running compute: minimal data structure/scaffolding
 --------------------------------------------------------
 The generic scheduler code needs to include the handling of endless compute
-- 
GitLab


From 16805e994bff2db40362b9007f3fcd476571a15e Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 30 Nov 2023 23:21:53 -0500
Subject: [PATCH 0929/2340] drm/doc/rfc: Move Xe 'ASYNC VM_BIND' to the
 'completed' section

As already indicated in this block, the consensus was already
reached out and documented as:
The ASYNC VM_BIND document </gpu/drm-vm-bind-async>

However this was item was not moved to the completed section.
Let's move and clean up the WIP block.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201042158.80009-4-rodrigo.vivi@intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/rfc/xe.rst | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index 9f3afab6c06ac..87dd620aea594 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -70,18 +70,6 @@ When the time comes for Xe, the protection will be lifted on Xe and kept in i915
 Xe – Pre-Merge Goals - Work-in-Progress
 =======================================
 
-ASYNC VM_BIND
--------------
-Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
-Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
-It needs to be clear how to handle async VM_BIND and interactions with userspace
-memory fences. Ideally with helper support so people don't get it wrong in all
-possible ways.
-
-As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
-various flavors, error handling and sample API suggestions are documented in
-:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
-
 Userptr integration and vm_bind
 -------------------------------
 Different drivers implement different ways of dealing with execution of userptr.
@@ -151,6 +139,18 @@ Xe – uAPI high level overview
 Xe – Pre-Merge Goals - Completed
 ================================
 
+ASYNC VM_BIND
+-------------
+Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
+Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
+It needs to be clear how to handle async VM_BIND and interactions with userspace
+memory fences. Ideally with helper support so people don't get it wrong in all
+possible ways.
+
+As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
+various flavors, error handling and sample API suggestions are documented in
+:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
+
 Drm_scheduler
 -------------
 Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
-- 
GitLab


From 34e64dd192071c3159c118331f87c66dffa0f9c9 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 30 Nov 2023 23:21:54 -0500
Subject: [PATCH 0930/2340] drm/doc/rfc: Move userptr integration and vm_bind
 to the 'completed' section

The must-have part of the documentation was already added to the existing
/gpu/drm-vm-bind-async. The other extra discussion around GPUVM helpers
are currently active in the community. None of those discussion should
block Xe since documentation, specially around locking was completed in
a community consensus.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201042158.80009-5-rodrigo.vivi@intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/rfc/xe.rst | 64 ++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index 87dd620aea594..cfff8a59a8761 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -70,38 +70,6 @@ When the time comes for Xe, the protection will be lifted on Xe and kept in i915
 Xe – Pre-Merge Goals - Work-in-Progress
 =======================================
 
-Userptr integration and vm_bind
--------------------------------
-Different drivers implement different ways of dealing with execution of userptr.
-With multiple drivers currently introducing support to VM_BIND, the goal is to
-aim for a DRM consensus on what’s the best way to have that support. To some
-extent this is already getting addressed itself with the GPUVA where likely the
-userptr will be a GPUVA with a NULL GEM call VM bind directly on the userptr.
-However, there are more aspects around the rules for that and the usage of
-mmu_notifiers, locking and other aspects.
-
-This task here has the goal of introducing a documentation of the basic rules.
-
-The documentation *needs* to first live in this document (API session below) and
-then moved to another more specific document or at Xe level or at DRM level.
-
-Documentation should include:
-
- * The userptr part of the VM_BIND api.
-
- * Locking, including the page-faulting case.
-
- * O(1) complexity under VM_BIND.
-
-The document is now included in the drm documentation :doc:`here </gpu/drm-vm-bind-async>`.
-
-Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when
-the second driver supporting VM_BIND+userptr appears. Details to be defined when
-the time comes.
-
-The DRM GPUVM helpers do not yet include the userptr parts, but discussions
-about implementing them are ongoing.
-
 Display integration with i915
 -----------------------------
 In order to share the display code with the i915 driver so that there is maximum
@@ -139,6 +107,38 @@ Xe – uAPI high level overview
 Xe – Pre-Merge Goals - Completed
 ================================
 
+Userptr integration and vm_bind
+-------------------------------
+Different drivers implement different ways of dealing with execution of userptr.
+With multiple drivers currently introducing support to VM_BIND, the goal is to
+aim for a DRM consensus on what’s the best way to have that support. To some
+extent this is already getting addressed itself with the GPUVA where likely the
+userptr will be a GPUVA with a NULL GEM call VM bind directly on the userptr.
+However, there are more aspects around the rules for that and the usage of
+mmu_notifiers, locking and other aspects.
+
+This task here has the goal of introducing a documentation of the basic rules.
+
+The documentation *needs* to first live in this document (API session below) and
+then moved to another more specific document or at Xe level or at DRM level.
+
+Documentation should include:
+
+ * The userptr part of the VM_BIND api.
+
+ * Locking, including the page-faulting case.
+
+ * O(1) complexity under VM_BIND.
+
+The document is now included in the drm documentation :doc:`here </gpu/drm-vm-bind-async>`.
+
+Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when
+the second driver supporting VM_BIND+userptr appears. Details to be defined when
+the time comes.
+
+The DRM GPUVM helpers do not yet include the userptr parts, but discussions
+about implementing them are ongoing.
+
 ASYNC VM_BIND
 -------------
 Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
-- 
GitLab


From aa15b031854fc86fd0b9c7e6b794d57563cba60f Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 30 Nov 2023 23:21:55 -0500
Subject: [PATCH 0931/2340] drm/doc/rfc: Xe is using drm_exec, so mark as
 completed

Nothing else to be done on this front from Xe perspective.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231201042158.80009-6-rodrigo.vivi@intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/gpu/rfc/xe.rst | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index cfff8a59a8761..97cf87578f973 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -85,6 +85,14 @@ from the first pull request of Xe towards drm-next. The expectation is that when
 both drivers are part of the drm-tip, the introduction of cleaner patches will be
 easier and speed up.
 
+Xe – uAPI high level overview
+=============================
+
+...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
+
+Xe – Pre-Merge Goals - Completed
+================================
+
 Drm_exec
 --------
 Helper to make dma_resv locking for a big number of buffers is getting removed in
@@ -99,14 +107,6 @@ This item ties into the GPUVA, VM_BIND, and even long-running compute support.
 As a key measurable result, we need to have a community consensus documented in
 this document and the Xe driver prepared for the changes, if necessary.
 
-Xe – uAPI high level overview
-=============================
-
-...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
-
-Xe – Pre-Merge Goals - Completed
-================================
-
 Userptr integration and vm_bind
 -------------------------------
 Different drivers implement different ways of dealing with execution of userptr.
-- 
GitLab


From 26f4bac3d884e340fd8b061dcfc64688a8c416e1 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 7 Dec 2023 10:25:00 -0600
Subject: [PATCH 0932/2340] drm/bridge: aux-hpd: Replace of_device.h with
 explicit include

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it was merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. Soon the implicit includes are going to be removed.

of_device.h isn't needed, but of.h is for of_node_put().

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20231207162501.2629952-1-robh@kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/gpu/drm/bridge/aux-hpd-bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
index 5d2ab3a715f9c..1999a053d59bd 100644
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -6,7 +6,7 @@
  */
 #include <linux/auxiliary_bus.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <drm/drm_bridge.h>
 #include <drm/bridge/aux-bridge.h>
-- 
GitLab


From 67a5f0ff342907ca399b77f0445b2673137cdfa5 Mon Sep 17 00:00:00 2001
From: Pin-yen Lin <treapking@chromium.org>
Date: Thu, 7 Dec 2023 16:17:35 +0800
Subject: [PATCH 0933/2340] drm/edp-panel: Move the KDC panel to a separate
 group

Move the KDC panel entry to make the list sorted by the vendor string.

Signed-off-by: Pin-yen Lin <treapking@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207081801.4049075-2-treapking@chromium.org
---
 drivers/gpu/drm/panel/panel-edp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index c76f186c4baaa..a0b6f69b916f9 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1980,9 +1980,10 @@ static const struct edp_panel_entry edp_panels[] = {
 	EDP_PANEL_ENTRY('I', 'V', 'O', 0x8c4d, &delay_200_150_e200, "R140NWFM R1"),
 
 	EDP_PANEL_ENTRY('K', 'D', 'B', 0x0624, &kingdisplay_kd116n21_30nv_a010.delay, "116N21-30NV-A010"),
-	EDP_PANEL_ENTRY('K', 'D', 'C', 0x0809, &delay_200_500_e50, "KD116N2930A15"),
 	EDP_PANEL_ENTRY('K', 'D', 'B', 0x1120, &delay_200_500_e80_d50, "116N29-30NK-C007"),
 
+	EDP_PANEL_ENTRY('K', 'D', 'C', 0x0809, &delay_200_500_e50, "KD116N2930A15"),
+
 	EDP_PANEL_ENTRY('S', 'D', 'C', 0x416d, &delay_100_500_e200, "ATNA45AF01"),
 
 	EDP_PANEL_ENTRY('S', 'H', 'P', 0x1511, &delay_200_500_e50, "LQ140M1JW48"),
-- 
GitLab


From df2a5f74e6eda50e1376a32bd60402a28ed51c8e Mon Sep 17 00:00:00 2001
From: Dmitrii Galantsev <dmitrii.galantsev@amd.com>
Date: Wed, 6 Dec 2023 02:04:52 -0600
Subject: [PATCH 0934/2340] drm/amd/pm: fix pp_*clk_od typo

Fix pp_dpm_sclk_od and pp_dpm_mclk_od typos.
Those were defined as pp_*clk_od but used as pp_dpm_*clk_od instead.
This change removes the _dpm part.

Fixes: 8cfd6a05750c ("drm/amd/pm: Hide irrelevant pm device attributes")
Signed-off-by: Dmitrii Galantsev <dmitrii.galantsev@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index e1497296afee2..2cd995b0cebac 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2238,10 +2238,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 	} else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) {
 		if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE)
 			*states = ATTR_STATE_UNSUPPORTED;
-	} else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) {
+	} else if (DEVICE_ATTR_IS(pp_mclk_od)) {
 		if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP)
 			*states = ATTR_STATE_UNSUPPORTED;
-	} else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) {
+	} else if (DEVICE_ATTR_IS(pp_sclk_od)) {
 		if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP)
 			*states = ATTR_STATE_UNSUPPORTED;
 	} else if (DEVICE_ATTR_IS(apu_thermal_cap)) {
-- 
GitLab


From 0497ae6f8830816d9277a8d5c8d9bf5966f292e1 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Tue, 5 Dec 2023 14:55:04 -0500
Subject: [PATCH 0935/2340] drm/amd/display: fix hw rotated modes when PSR-SU
 is enabled

We currently don't support dirty rectangles on hardware rotated modes.
So, if a user is using hardware rotated modes with PSR-SU enabled,
use PSR-SU FFU for all rotated planes (including cursor planes).

Cc: stable@vger.kernel.org
Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2952
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Bin Li <binli@gnome.org>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c    |  3 +++
 drivers/gpu/drm/amd/display/dc/dc_hw_types.h         |  1 +
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c    | 12 ++++++++++--
 .../gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c  |  3 ++-
 4 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6aabe8a3ffef1..90868a54d4ddf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5241,6 +5241,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 	if (plane->type == DRM_PLANE_TYPE_CURSOR)
 		return;
 
+	if (new_plane_state->rotation != DRM_MODE_ROTATE_0)
+		goto ffu;
+
 	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
 	clips = drm_plane_get_damage_clips(new_plane_state);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 9649934ea186d..e2a3aa8812df4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -465,6 +465,7 @@ struct dc_cursor_mi_param {
 	struct fixed31_32 v_scale_ratio;
 	enum dc_rotation_angle rotation;
 	bool mirror;
+	struct dc_stream_state *stream;
 };
 
 /* IPP related types */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 139cf31d2e456..89c3bf0fe0c99 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position(
 	if (src_y_offset < 0)
 		src_y_offset = 0;
 	/* Save necessary cursor info x, y position. w, h is saved in attribute func. */
-	hubp->cur_rect.x = src_x_offset + param->viewport.x;
-	hubp->cur_rect.y = src_y_offset + param->viewport.y;
+	if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
+	    param->rotation != ROTATION_ANGLE_0) {
+		hubp->cur_rect.x = 0;
+		hubp->cur_rect.y = 0;
+		hubp->cur_rect.w = param->stream->timing.h_addressable;
+		hubp->cur_rect.h = param->stream->timing.v_addressable;
+	} else {
+		hubp->cur_rect.x = src_x_offset + param->viewport.x;
+		hubp->cur_rect.y = src_y_offset + param->viewport.y;
+	}
 }
 
 void hubp2_clk_cntl(struct hubp *hubp, bool enable)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 2b8b8366538e9..cdb903116eb7c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -3417,7 +3417,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 		.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
 		.v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
 		.rotation = pipe_ctx->plane_state->rotation,
-		.mirror = pipe_ctx->plane_state->horizontal_mirror
+		.mirror = pipe_ctx->plane_state->horizontal_mirror,
+		.stream = pipe_ctx->stream,
 	};
 	bool pipe_split_on = false;
 	bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) ||
-- 
GitLab


From a2020be69490ee8778c59a02e7b270dfeecffbd4 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 6 Dec 2023 12:08:26 -0600
Subject: [PATCH 0936/2340] drm/amd/display: Restore guard against default
 backlight value < 1 nit

Mark reports that brightness is not restored after Xorg dpms screen blank.

This behavior was introduced by commit d9e865826c20 ("drm/amd/display:
Simplify brightness initialization") which dropped the cached backlight
value in display code, but also removed code for when the default value
read back was less than 1 nit.

Restore this code so that the backlight brightness is restored to the
correct default value in this circumstance.

Reported-by: Mark Herbert <mark.herbert42@gmail.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3031
Cc: stable@vger.kernel.org
Cc: Camille Cho <camille.cho@amd.com>
Cc: Krunoslav Kovac <krunoslav.kovac@amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization")
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/dc/link/protocols/link_edp_panel_control.c    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index ac0fa88b52a07..bf53a86ea8171 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -287,8 +287,8 @@ bool set_default_brightness_aux(struct dc_link *link)
 	if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
 		if (!read_default_bl_aux(link, &default_backlight))
 			default_backlight = 150000;
-		// if > 5000, it might be wrong readback
-		if (default_backlight > 5000000)
+		// if < 1 nits or > 5000, it might be wrong readback
+		if (default_backlight < 1000 || default_backlight > 5000000)
 			default_backlight = 150000;
 
 		return edp_set_backlight_level_nits(link, true,
-- 
GitLab


From b2662d4cc4ce2db4bd55e00a528b1d35be82c6c3 Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Thu, 23 Nov 2023 13:59:44 -0500
Subject: [PATCH 0937/2340] drm/amdgpu: SW part of MES event log enablement

This is the generic SW part, prepare the event log buffer and dump it through debugfs

Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c     | 61 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h     |  5 ++
 4 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c480192e33f56..424bed7382965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2147,6 +2147,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 	amdgpu_debugfs_firmware_init(adev);
 	amdgpu_ta_if_debugfs_init(adev);
 
+	amdgpu_debugfs_mes_event_log_init(adev);
+
 #if defined(CONFIG_DRM_AMD_DC)
 	if (adev->dc_enabled)
 		dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb299..0425432d8659b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 9ddbf1494326a..e544b823abf6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -98,6 +98,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->mes.event_log_gpu_obj,
+				    &adev->mes.event_log_gpu_addr,
+				    &adev->mes.event_log_cpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+		return r;
+	}
+
+	memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+	return  0;
+
+}
+
 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
 {
 	bitmap_free(adev->mes.doorbell_bitmap);
@@ -182,8 +202,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 	if (r)
 		goto error;
 
+	r = amdgpu_mes_event_log_init(adev);
+	if (r)
+		goto error_doorbell;
+
 	return 0;
 
+error_doorbell:
+	amdgpu_mes_doorbell_free(adev);
 error:
 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
@@ -199,6 +225,10 @@ error_ids:
 
 void amdgpu_mes_fini(struct amdgpu_device *adev)
 {
+	amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+			      &adev->mes.event_log_gpu_addr,
+			      &adev->mes.event_log_cpu_addr);
+
 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -1479,3 +1509,34 @@ out:
 	amdgpu_ucode_release(&adev->mes.fw[pipe]);
 	return r;
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+	seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+		     mem, PAGE_SIZE, false);
+
+	return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+			    adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe005..894b9b1330009 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -133,6 +133,11 @@ struct amdgpu_mes {
 	uint32_t			num_mes_dbs;
 	unsigned long			*doorbell_bitmap;
 
+	/* MES event log buffer */
+	struct amdgpu_bo		*event_log_gpu_obj;
+	uint64_t                        event_log_gpu_addr;
+	void				*event_log_cpu_addr;
+
 	/* ip specific functions */
 	const struct amdgpu_mes_funcs   *funcs;
 };
-- 
GitLab


From 47c4533543af4759b7668a06c1a2ce06cdc71173 Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Thu, 23 Nov 2023 14:52:55 -0500
Subject: [PATCH 0938/2340] drm/amdgpu: Enable event log on MES 11

Enable event log through the HW specific FW API

Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        | 2 ++
 drivers/gpu/drm/amd/include/mes_v11_api_def.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 4dfec56e1b7fc..26d71a22395d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -408,6 +408,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
 	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
 	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
 	mes_set_hw_res_pkt.oversubscription_timer = 50;
+	mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+	mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
 
 	return mes_v11_0_submit_pkt_and_poll_completion(mes,
 			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index b1db2b1901874..1fbfd1aa987e3 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -232,6 +232,7 @@ union MESAPI_SET_HW_RESOURCES {
 		};
 		uint32_t	oversubscription_timer;
 		uint64_t        doorbell_info;
+		uint64_t        event_intr_history_gpu_mc_ptr;
 	};
 
 	uint32_t	max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-- 
GitLab


From c5a761e2fe58631e1cd0249a729ce1934a11bcbe Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 23 Nov 2023 13:41:12 +0000
Subject: [PATCH 0939/2340] drm/mediatek: Stop using iommu_present()

Remove the pointless check. If an IOMMU is providing transparent DMA API
ops for any device(s) we care about, the DT code will have enforced the
appropriate probe ordering already. And if the IOMMU *is* entirely
absent, then attempting to go ahead with CMA and either suceeding or
failing decisively seems more useful than deferring forever.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/fd1b62aa006556f29f37535814abfe41be63f7ae.1700746094.git.robin.murphy@arm.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_drm_drv.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 2dfaa613276a6..48581da51857a 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -5,7 +5,6 @@
  */
 
 #include <linux/component.h>
-#include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -608,9 +607,6 @@ static int mtk_drm_bind(struct device *dev)
 	struct drm_device *drm;
 	int ret, i;
 
-	if (!iommu_present(&platform_bus_type))
-		return -EPROBE_DEFER;
-
 	pdev = of_find_device_by_node(private->mutex_node);
 	if (!pdev) {
 		dev_err(dev, "Waiting for disp-mutex device %pOF\n",
-- 
GitLab


From 157fd368561e03057e6e038fc115d1e81fa2ebf2 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Thu, 7 Dec 2023 17:37:17 +0100
Subject: [PATCH 0940/2340] dt-bindings: display: msm: dp-controller: document
 SM8650 compatible

Document the DisplayPort controller found in the Qualcomm SM8650 SoC,
the Controller base addresses and layout differ and thus cannot use
the SM8350 compatible as fallback.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571131/
Link: https://lore.kernel.org/r/20231207-topic-sm8650-upstream-dp-v1-1-b762c06965bb@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../devicetree/bindings/display/msm/dp-controller.yaml      | 1 +
 .../devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml   | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
index dbe398f84ffb3..93ded71c52d72 100644
--- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@@ -26,6 +26,7 @@ properties:
           - qcom,sc8280xp-edp
           - qcom,sdm845-dp
           - qcom,sm8350-dp
+          - qcom,sm8650-dp
       - items:
           - enum:
               - qcom,sm8250-dp
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
index 5638c1ea692ed..bd11119dc93da 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
@@ -41,6 +41,12 @@ patternProperties:
       compatible:
         const: qcom,sm8650-dpu
 
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        const: qcom,sm8650-dp
+
   "^dsi@[0-9a-f]+$":
     type: object
     properties:
-- 
GitLab


From 1b2d98bdd7b7c64265732f5f0dace4c52c9ba8a8 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Thu, 7 Dec 2023 17:37:18 +0100
Subject: [PATCH 0941/2340] drm/msm/dp: Add DisplayPort controller for SM8650

The Qualcomm SM8650 platform comes with a DisplayPort controller
with a different base offset than the previous SM8550 SoC,
add support for this in the DisplayPort driver.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571132/
Link: https://lore.kernel.org/r/20231207-topic-sm8650-upstream-dp-v1-2-b762c06965bb@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 20c5cf7986b22..fd4e62fe163fd 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -168,6 +168,11 @@ static const struct msm_dp_desc sm8350_dp_descs[] = {
 	{}
 };
 
+static const struct msm_dp_desc sm8650_dp_descs[] = {
+	{ .io_start = 0x0af54000, .id = MSM_DP_CONTROLLER_0, .connector_type = DRM_MODE_CONNECTOR_DisplayPort },
+	{}
+};
+
 static const struct of_device_id dp_dt_match[] = {
 	{ .compatible = "qcom,sc7180-dp", .data = &sc7180_dp_descs },
 	{ .compatible = "qcom,sc7280-dp", .data = &sc7280_dp_descs },
@@ -178,6 +183,7 @@ static const struct of_device_id dp_dt_match[] = {
 	{ .compatible = "qcom,sc8280xp-edp", .data = &sc8280xp_edp_descs },
 	{ .compatible = "qcom,sdm845-dp", .data = &sc7180_dp_descs },
 	{ .compatible = "qcom,sm8350-dp", .data = &sm8350_dp_descs },
+	{ .compatible = "qcom,sm8650-dp", .data = &sm8650_dp_descs },
 	{}
 };
 
-- 
GitLab


From c4ac0c6c96f0985357c5866cf22fd642390409dd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Wed, 6 Dec 2023 15:02:05 +0300
Subject: [PATCH 0942/2340] drm/msm/dp: Fix platform_get_irq() check

The platform_get_irq() function returns negative error codes.  It never
returns zero.  Fix the check accordingly.

Fixes: 82c2a5751227 ("drm/msm/dp: tie dp_display_irq_handler() with dp driver")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/570755/
Link: https://lore.kernel.org/r/c12bb69b-d676-4345-9712-48aab48f2b48@moroto.mountain
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index fd4e62fe163fd..f8a9e8403c264 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1168,9 +1168,9 @@ static int dp_display_request_irq(struct dp_display_private *dp)
 	struct platform_device *pdev = dp->dp_display.pdev;
 
 	dp->irq = platform_get_irq(pdev, 0);
-	if (!dp->irq) {
+	if (dp->irq < 0) {
 		DRM_ERROR("failed to get irq\n");
-		return -EINVAL;
+		return dp->irq;
 	}
 
 	rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler,
-- 
GitLab


From a08935fc859b22884dcb6b5126d3a986467101ce Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Wed, 4 Oct 2023 06:19:03 +0300
Subject: [PATCH 0943/2340] drm/msm/dpu: drop MSM_ENC_VBLANK support

There are no in-kernel users of MSM_ENC_VBLANK wait type. Drop it
together with the corresponding wait_for_vblank callback.

Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/560701/
Link: https://lore.kernel.org/r/20231004031903.518223-1-dmitry.baryshkov@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c   |  3 --
 .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  1 -
 .../drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c  | 28 -------------------
 .../drm/msm/disp/dpu1/dpu_encoder_phys_vid.c  |  9 +++---
 drivers/gpu/drm/msm/msm_drv.h                 |  2 --
 5 files changed, 4 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index aa1a1646b322a..889e9bb42715f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2400,9 +2400,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
 		case MSM_ENC_TX_COMPLETE:
 			fn_wait = phys->ops.wait_for_tx_complete;
 			break;
-		case MSM_ENC_VBLANK:
-			fn_wait = phys->ops.wait_for_vblank;
-			break;
 		default:
 			DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
 					event);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index b6b48e2c63efa..e2934a6702d1c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -104,7 +104,6 @@ struct dpu_encoder_phys_ops {
 	int (*control_vblank_irq)(struct dpu_encoder_phys *enc, bool enable);
 	int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc);
 	int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc);
-	int (*wait_for_vblank)(struct dpu_encoder_phys *phys_enc);
 	void (*prepare_for_kickoff)(struct dpu_encoder_phys *phys_enc);
 	void (*handle_post_kickoff)(struct dpu_encoder_phys *phys_enc);
 	void (*trigger_start)(struct dpu_encoder_phys *phys_enc);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index d24f45d1f654f..76f1f66e41cd0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -675,33 +675,6 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done(
 	return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc);
 }
 
-static int dpu_encoder_phys_cmd_wait_for_vblank(
-		struct dpu_encoder_phys *phys_enc)
-{
-	int rc = 0;
-	struct dpu_encoder_phys_cmd *cmd_enc;
-	struct dpu_encoder_wait_info wait_info;
-
-	cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
-
-	/* only required for master controller */
-	if (!dpu_encoder_phys_cmd_is_master(phys_enc))
-		return rc;
-
-	wait_info.wq = &cmd_enc->pending_vblank_wq;
-	wait_info.atomic_cnt = &cmd_enc->pending_vblank_cnt;
-	wait_info.timeout_ms = KICKOFF_TIMEOUT_MS;
-
-	atomic_inc(&cmd_enc->pending_vblank_cnt);
-
-	rc = dpu_encoder_helper_wait_for_irq(phys_enc,
-			phys_enc->irq[INTR_IDX_RDPTR],
-			dpu_encoder_phys_cmd_te_rd_ptr_irq,
-			&wait_info);
-
-	return rc;
-}
-
 static void dpu_encoder_phys_cmd_handle_post_kickoff(
 		struct dpu_encoder_phys *phys_enc)
 {
@@ -729,7 +702,6 @@ static void dpu_encoder_phys_cmd_init_ops(
 	ops->wait_for_commit_done = dpu_encoder_phys_cmd_wait_for_commit_done;
 	ops->prepare_for_kickoff = dpu_encoder_phys_cmd_prepare_for_kickoff;
 	ops->wait_for_tx_complete = dpu_encoder_phys_cmd_wait_for_tx_complete;
-	ops->wait_for_vblank = dpu_encoder_phys_cmd_wait_for_vblank;
 	ops->trigger_start = dpu_encoder_phys_cmd_trigger_start;
 	ops->needs_single_flush = dpu_encoder_phys_cmd_needs_single_flush;
 	ops->irq_control = dpu_encoder_phys_cmd_irq_control;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 69bc1b2e514cc..2b4e5b5eff447 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -440,7 +440,7 @@ skip_flush:
 		phys_enc->enable_state = DPU_ENC_ENABLING;
 }
 
-static int dpu_encoder_phys_vid_wait_for_vblank(
+static int dpu_encoder_phys_vid_wait_for_tx_complete(
 		struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_encoder_wait_info wait_info;
@@ -554,7 +554,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
 	 * scanout buffer) don't latch properly..
 	 */
 	if (dpu_encoder_phys_vid_is_master(phys_enc)) {
-		ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc);
+		ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc);
 		if (ret) {
 			atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 			DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n",
@@ -574,7 +574,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
 		spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags);
 		dpu_encoder_phys_inc_pending(phys_enc);
 		spin_unlock_irqrestore(phys_enc->enc_spinlock, lock_flags);
-		ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc);
+		ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc);
 		if (ret) {
 			atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 			DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n",
@@ -679,8 +679,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->disable = dpu_encoder_phys_vid_disable;
 	ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq;
 	ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done;
-	ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank;
-	ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_vblank;
+	ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_tx_complete;
 	ops->irq_control = dpu_encoder_phys_vid_irq_control;
 	ops->prepare_for_kickoff = dpu_encoder_phys_vid_prepare_for_kickoff;
 	ops->handle_post_kickoff = dpu_encoder_phys_vid_handle_post_kickoff;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index a205127ccc932..c0446fa66b98d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -78,12 +78,10 @@ enum msm_dsi_controller {
  * enum msm_event_wait - type of HW events to wait for
  * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW
  * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel
- * @MSM_ENC_VBLANK - wait for the HW VBLANK event (for driver-internal waiters)
  */
 enum msm_event_wait {
 	MSM_ENC_COMMIT_DONE = 0,
 	MSM_ENC_TX_COMPLETE,
-	MSM_ENC_VBLANK,
 };
 
 /**
-- 
GitLab


From 66f011145b835f9a68af9d7156c8d84a6f29c331 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 8 Dec 2023 04:03:13 +0300
Subject: [PATCH 0944/2340] drm/atomic-helper: rename
 drm_atomic_helper_check_wb_encoder_state

The drm_atomic_helper_check_wb_encoder_state() function doesn't use
encoder for anything other than getting the drm_device instance. The
function's description talks about checking the writeback connector
state, not the encoder state. Moreover, there is no such thing as an
encoder state, encoders generally do not have a state on their own.

Rename the function to drm_atomic_helper_check_wb_connector_state()
and change arguments to drm_writeback_connector and drm_atomic_state.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208010314.3395904-2-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/drm_atomic_helper.c   | 16 +++++++++-------
 drivers/gpu/drm/vkms/vkms_writeback.c |  3 ++-
 include/drm/drm_atomic_helper.h       |  5 ++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c3f677130def0..c98a766ca3bd7 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -795,9 +795,9 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 EXPORT_SYMBOL(drm_atomic_helper_check_modeset);
 
 /**
- * drm_atomic_helper_check_wb_encoder_state() - Check writeback encoder state
- * @encoder: encoder state to check
- * @conn_state: connector state to check
+ * drm_atomic_helper_check_wb_connector_state() - Check writeback connector state
+ * @connector: corresponding connector
+ * @state: the driver state object
  *
  * Checks if the writeback connector state is valid, and returns an error if it
  * isn't.
@@ -806,9 +806,11 @@ EXPORT_SYMBOL(drm_atomic_helper_check_modeset);
  * Zero for success or -errno
  */
 int
-drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder,
-					 struct drm_connector_state *conn_state)
+drm_atomic_helper_check_wb_connector_state(struct drm_connector *connector,
+					   struct drm_atomic_state *state)
 {
+	struct drm_connector_state *conn_state =
+		drm_atomic_get_new_connector_state(state, connector);
 	struct drm_writeback_job *wb_job = conn_state->writeback_job;
 	struct drm_property_blob *pixel_format_blob;
 	struct drm_framebuffer *fb;
@@ -827,11 +829,11 @@ drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder,
 		if (fb->format->format == formats[i])
 			return 0;
 
-	drm_dbg_kms(encoder->dev, "Invalid pixel format %p4cc\n", &fb->format->format);
+	drm_dbg_kms(connector->dev, "Invalid pixel format %p4cc\n", &fb->format->format);
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(drm_atomic_helper_check_wb_encoder_state);
+EXPORT_SYMBOL(drm_atomic_helper_check_wb_connector_state);
 
 /**
  * drm_atomic_helper_check_plane_state() - Check plane state for validity
diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c
index d7e63aa14663f..fef7f3daf2c9a 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -34,6 +34,7 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder,
 					struct drm_crtc_state *crtc_state,
 					struct drm_connector_state *conn_state)
 {
+	struct drm_connector *connector = conn_state->connector;
 	struct drm_framebuffer *fb;
 	const struct drm_display_mode *mode = &crtc_state->mode;
 	int ret;
@@ -48,7 +49,7 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder,
 		return -EINVAL;
 	}
 
-	ret = drm_atomic_helper_check_wb_encoder_state(encoder, conn_state);
+	ret = drm_atomic_helper_check_wb_connector_state(connector, conn_state->state);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 536a0b0091c3a..fea528aacfe27 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -49,9 +49,8 @@ struct drm_private_state;
 
 int drm_atomic_helper_check_modeset(struct drm_device *dev,
 				struct drm_atomic_state *state);
-int
-drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder,
-					 struct drm_connector_state *conn_state);
+int drm_atomic_helper_check_wb_connector_state(struct drm_connector *connector,
+					       struct drm_atomic_state *state);
 int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
 					const struct drm_crtc_state *crtc_state,
 					int min_scale,
-- 
GitLab


From aa8ec5d7b26d820dfad2f7668e9dd9edff7ebd7d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 8 Dec 2023 04:03:14 +0300
Subject: [PATCH 0945/2340] drm/vkms: move wb's atomic_check from encoder to
 connector

As the renamed drm_atomic_helper_check_wb_connector_state() now accepts
drm_writeback_connector as the first argument (instead of drm_encoder),
move the VKMS writeback atomic_check from drm_encoder_helper_funcs to
drm_connector_helper_funcs. Also drop the vkms_wb_encoder_helper_funcs,
which have become empty now.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208010314.3395904-3-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/vkms/vkms_writeback.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c
index fef7f3daf2c9a..bc724cbd5e3a7 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -30,18 +30,25 @@ static const struct drm_connector_funcs vkms_wb_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder,
-					struct drm_crtc_state *crtc_state,
-					struct drm_connector_state *conn_state)
+static int vkms_wb_atomic_check(struct drm_connector *connector,
+				struct drm_atomic_state *state)
 {
-	struct drm_connector *connector = conn_state->connector;
+	struct drm_connector_state *conn_state =
+		drm_atomic_get_new_connector_state(state, connector);
+	struct drm_crtc_state *crtc_state;
 	struct drm_framebuffer *fb;
-	const struct drm_display_mode *mode = &crtc_state->mode;
+	const struct drm_display_mode *mode;
 	int ret;
 
 	if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
 		return 0;
 
+	if (!conn_state->crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
+	mode = &crtc_state->mode;
+
 	fb = conn_state->writeback_job->fb;
 	if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) {
 		DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
@@ -49,17 +56,13 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder,
 		return -EINVAL;
 	}
 
-	ret = drm_atomic_helper_check_wb_connector_state(connector, conn_state->state);
+	ret = drm_atomic_helper_check_wb_connector_state(connector, state);
 	if (ret < 0)
 		return ret;
 
 	return 0;
 }
 
-static const struct drm_encoder_helper_funcs vkms_wb_encoder_helper_funcs = {
-	.atomic_check = vkms_wb_encoder_atomic_check,
-};
-
 static int vkms_wb_connector_get_modes(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
@@ -162,6 +165,7 @@ static const struct drm_connector_helper_funcs vkms_wb_conn_helper_funcs = {
 	.prepare_writeback_job = vkms_wb_prepare_job,
 	.cleanup_writeback_job = vkms_wb_cleanup_job,
 	.atomic_commit = vkms_wb_atomic_commit,
+	.atomic_check = vkms_wb_atomic_check,
 };
 
 int vkms_enable_writeback_connector(struct vkms_device *vkmsdev)
@@ -172,7 +176,7 @@ int vkms_enable_writeback_connector(struct vkms_device *vkmsdev)
 
 	return drm_writeback_connector_init(&vkmsdev->drm, wb,
 					    &vkms_wb_connector_funcs,
-					    &vkms_wb_encoder_helper_funcs,
+					    NULL,
 					    vkms_wb_formats,
 					    ARRAY_SIZE(vkms_wb_formats),
 					    1);
-- 
GitLab


From b84135e7a5921a79e3dc0cc1bbfbe9c3c661c8d0 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 8 Dec 2023 08:30:34 +0800
Subject: [PATCH 0946/2340] drm/imagination: Remove unneeded semicolon

./drivers/gpu/drm/imagination/pvr_fw_trace.c:251:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7694
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208003034.68339-1-yang.lee@linux.alibaba.com
---
 drivers/gpu/drm/imagination/pvr_fw_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c
index 7159fc479001c..31199e45b72ea 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_trace.c
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c
@@ -248,7 +248,7 @@ static bool fw_trace_get_next(struct pvr_fw_trace_seq_data *trace_seq_data)
 			continue;
 
 		return true;
-	};
+	}
 
 	/* Hit end of trace data. */
 	return false;
-- 
GitLab


From fca9448ae2f5ddebd841c727ee86136e1b5cbd86 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Wed, 6 Dec 2023 17:37:24 +0300
Subject: [PATCH 0947/2340] drm/imagination: Move dereference after NULL check
 in pvr_mmu_backing_page_init()

This code dereferences "page->pvr_dev" and then checked for NULL on the
next line.  Re-order it to avoid a potential NULL pointer dereference.

Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/13f4278e-af9c-4092-9196-bc0e6b76f1eb@moroto.mountain
---
 drivers/gpu/drm/imagination/pvr_mmu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c
index c8562bfc0dcda..4fe70610ed94c 100644
--- a/drivers/gpu/drm/imagination/pvr_mmu.c
+++ b/drivers/gpu/drm/imagination/pvr_mmu.c
@@ -316,12 +316,14 @@ err_free_page:
 static void
 pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
 {
-	struct device *dev = from_pvr_device(page->pvr_dev)->dev;
+	struct device *dev;
 
 	/* Do nothing if no allocation is present. */
 	if (!page->pvr_dev)
 		return;
 
+	dev = from_pvr_device(page->pvr_dev)->dev;
+
 	dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
 		       DMA_TO_DEVICE);
 
-- 
GitLab


From 34880b18733efff60b68d074ff74d018ffc309a7 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Tue, 5 Dec 2023 14:09:37 +0100
Subject: [PATCH 0948/2340] drm/i915/selftests: wait for active idle event in
 i915_active_unlock_wait

After i915_active_unlock_wait i915_active can be still non-idle due
to barrier async handling in signal_irq_work. As a result one can observe
following errors:
bcs0: heartbeat pulse did not flush idle tasks
*ERROR* pulse active pulse_active [i915]:pulse_retire [i915]
*ERROR* pulse    count: 0
*ERROR* pulse    preallocated barriers? no

To prevent it let's wait explicitly for idleness.

v2: wait only in live_idle tests

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205-selftest_wait_for_active_idle_event-v2-1-1437d0bf9829@intel.com
---
 drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53e3f..bc441ce7b380d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -84,7 +84,7 @@ static struct pulse *pulse_create(void)
 
 static void pulse_unlock_wait(struct pulse *p)
 {
-	i915_active_unlock_wait(&p->active);
+	wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
 }
 
 static int __live_idle_pulse(struct intel_engine_cs *engine,
-- 
GitLab


From be5bcc4be9d9d3ae294072441a66fe39b74e5bba Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi.shyti@linux.intel.com>
Date: Wed, 6 Dec 2023 19:43:22 +0100
Subject: [PATCH 0949/2340] drm/i915/guc: Create the guc_to_i915() wrapper

Given a reference to "guc", the guc_to_i915() returns the
pointer to "i915" private data.

Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231206184322.57111-1-andi.shyti@linux.intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt.h                |  5 +++++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c            |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c    |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c         |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c        | 10 +++++-----
 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c         |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c       |  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c |  4 ++--
 8 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 9ffdb05e231e2..2f81c1c792382 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -118,6 +118,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc)
 	return container_of(gsc, struct intel_gt, gsc);
 }
 
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+	return guc_to_gt(guc)->i915;
+}
+
 void intel_gt_common_init_early(struct intel_gt *gt);
 int intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 3f3df1166b860..2b450c43bbd7f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
 
 static u32 guc_ctl_devid(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index a4da0208c8838..a1cd40d805178 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc,
 static const struct __guc_mmio_reg_descr_group *
 guc_capture_get_device_reglist(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	const struct __guc_mmio_reg_descr_group *lists;
 
 	if (GRAPHICS_VER(i915) >= 12)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index ed6ce73ef3b07..0d5197c0824a9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
 	u32 *cmds;
 	int err;
 
-	err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO);
+	err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 55bc8b55fbc05..bf16351c93499 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
 static int guc_log_relay_create(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
 	int ret;
@@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
 static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
 
 	_guc_log_copy_debuglogs_for_relay(log);
@@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 static u32 __get_default_log_level(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	/* A negative value means "use platform/config default" */
 	if (i915->params.guc_log_level < 0) {
@@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log)
 int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
 	int ret = 0;
 
@@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 static void guc_log_relay_stop(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	if (!log->relay.started)
 		return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index 1adec6de223c7..9df7927304aec 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc)
 {
 	/* GuC RC is unavailable for pre-Gen12 */
 	return guc->submission_supported &&
-		GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+		GRAPHICS_VER(guc_to_i915(guc)) >= 12;
 }
 
 static bool __guc_rc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 2dfb07cc4b33b..3e681ab6fbf9f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc)
 {
 	/* GuC SLPC is unavailable for pre-Gen12 */
 	return guc->submission_supported &&
-		GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+		GRAPHICS_VER(guc_to_i915(guc)) >= 12;
 }
 
 static bool __guc_slpc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 58ea285c51d4c..a259f1118c5ab 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4626,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc)
 {
 	/* GuC submission is unavailable for pre-Gen11 */
 	return intel_guc_is_supported(guc) &&
-	       GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
+	       GRAPHICS_VER(guc_to_i915(guc)) >= 11;
 }
 
 static bool __guc_submission_selected(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	if (!intel_guc_submission_is_supported(guc))
 		return false;
-- 
GitLab


From 9030a77742f309d6b6a30c60093f5d617f33889f Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Tue, 24 Oct 2023 21:00:26 +0800
Subject: [PATCH 0950/2340] dt-bindings: display: mediatek: ethdr: Add
 compatible for MT8188

Add compatible name for MediaTek MT8188 ETHDR.

Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231024130048.14749-2-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../bindings/display/mediatek/mediatek,ethdr.yaml           | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
index 801fa66ae6150..677882348eded 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
@@ -23,7 +23,11 @@ description:
 
 properties:
   compatible:
-    const: mediatek,mt8195-disp-ethdr
+    oneOf:
+      - const: mediatek,mt8195-disp-ethdr
+      - items:
+          - const: mediatek,mt8188-disp-ethdr
+          - const: mediatek,mt8195-disp-ethdr
 
   reg:
     maxItems: 7
-- 
GitLab


From e7013a3d77e28f32def43f2e20e32ffb822d86e5 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Tue, 24 Oct 2023 21:00:27 +0800
Subject: [PATCH 0951/2340] dt-bindings: display: mediatek: mdp-rdma: Add
 compatible for MT8188

Add compatible name for MediaTek MT8188 MDP-RDMA.

Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231024130048.14749-3-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../bindings/display/mediatek/mediatek,mdp-rdma.yaml        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml
index dd12e2ff685c2..7570a0684967a 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml
@@ -21,7 +21,11 @@ description:
 
 properties:
   compatible:
-    const: mediatek,mt8195-vdo1-rdma
+    oneOf:
+      - const: mediatek,mt8195-vdo1-rdma
+      - items:
+          - const: mediatek,mt8188-vdo1-rdma
+          - const: mediatek,mt8195-vdo1-rdma
 
   reg:
     maxItems: 1
-- 
GitLab


From ffd00b7994dd8e050ba43a49c7a8a4029a0174fd Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Tue, 24 Oct 2023 21:00:28 +0800
Subject: [PATCH 0952/2340] dt-bindings: display: mediatek: merge: Add
 compatible for MT8188

Add compatible name for MediaTek MT8188 MERGE.

Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231024130048.14749-4-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../devicetree/bindings/display/mediatek/mediatek,merge.yaml   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
index eead5cb8636ed..5c678695162e9 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
@@ -27,6 +27,9 @@ properties:
       - items:
           - const: mediatek,mt6795-disp-merge
           - const: mediatek,mt8173-disp-merge
+      - items:
+          - const: mediatek,mt8188-disp-merge
+          - const: mediatek,mt8195-disp-merge
 
   reg:
     maxItems: 1
-- 
GitLab


From d2423d8a38e724b28007abd76db08ccb59f56015 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Tue, 24 Oct 2023 21:00:29 +0800
Subject: [PATCH 0953/2340] dt-bindings: display: mediatek: padding: Add MT8188

Padding is a new hardware module on MediaTek MT8188,
add dt-bindings for it.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231024130048.14749-5-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../display/mediatek/mediatek,padding.yaml    | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml

diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml
new file mode 100644
index 0000000000000..6bad7dc2d69fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,padding.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Display Padding
+
+maintainers:
+  - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+  - Philipp Zabel <p.zabel@pengutronix.de>
+
+description:
+  Padding provides ability to add pixels to width and height of a layer with
+  specified colors. Due to hardware design, Mixer in VDOSYS1 requires
+  width of a layer to be 2-pixel-align, or 4-pixel-align when ETHDR is enabled,
+  we need Padding to deal with odd width.
+  Please notice that even if the Padding is in bypass mode, settings in
+  register must be cleared to 0, or undefined behaviors could happen.
+
+properties:
+  compatible:
+    const: mediatek,mt8188-disp-padding
+
+  reg:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Padding's clocks
+
+  mediatek,gce-client-reg:
+    description:
+      GCE (Global Command Engine) is a multi-core micro processor that helps
+      its clients to execute commands without interrupting CPU. This property
+      describes GCE client's information that is composed by 4 fields.
+      1. Phandle of the GCE (there may be several GCE processors)
+      2. Sub-system ID defined in the dt-binding like a user ID
+         (Please refer to include/dt-bindings/gce/<chip>-gce.h)
+      3. Offset from base address of the subsys you are at
+      4. Size of the register the client needs
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      items:
+        - description: Phandle of the GCE
+        - description: Subsys ID defined in the dt-binding
+        - description: Offset from base address of the subsys
+        - description: Size of register
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - power-domains
+  - clocks
+  - mediatek,gce-client-reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mediatek,mt8188-clk.h>
+    #include <dt-bindings/power/mediatek,mt8188-power.h>
+    #include <dt-bindings/gce/mt8195-gce.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        padding0: padding@1c11d000 {
+            compatible = "mediatek,mt8188-disp-padding";
+            reg = <0 0x1c11d000 0 0x1000>;
+            clocks = <&vdosys1 CLK_VDO1_PADDING0>;
+            power-domains = <&spm MT8188_POWER_DOMAIN_VDOSYS1>;
+            mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0xd000 0x1000>;
+        };
+    };
-- 
GitLab


From 2c12eb36f849256f5eb00ffaee9bf99396fd3814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 5 Dec 2023 20:03:08 +0200
Subject: [PATCH 0954/2340] drm/i915: Fix remapped stride with CCS on ADL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On ADL+ the hardware automagically calculates the CCS AUX surface
stride from the main surface stride, so when remapping we can't
really play a lot of tricks with the main surface stride, or else
the AUX surface stride would get miscalculated and no longer
match the actual data layout in memory.

Supposedly we could remap in 256 main surface tile units
(AUX page(4096)/cachline(64)*4(4x1 main surface tiles per
AUX cacheline)=256 main surface tiles), but the extra complexity
is probably not worth the hassle.

So let's just make sure our mapping stride is calculated from
the full framebuffer stride (instead of the framebuffer width).
This way the stride we program into PLANE_STRIDE will be the
original framebuffer stride, and thus there will be no change
to the AUX stride/layout.

Cc: stable@vger.kernel.org
Cc: Imre Deak <imre.deak@intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205180308.7505-1-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/display/intel_fb.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 4f9d2e57a770a..81316b21314dc 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -1509,8 +1509,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p
 
 			size += remap_info->size;
 		} else {
-			unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane,
-									      remap_info->width);
+			unsigned int dst_stride;
+
+			/*
+			 * The hardware automagically calculates the CCS AUX surface
+			 * stride from the main surface stride so can't really remap a
+			 * smaller subset (unless we'd remap in whole AUX page units).
+			 */
+			if (intel_fb_needs_pot_stride_remap(fb) &&
+			    intel_fb_is_ccs_modifier(fb->base.modifier))
+				dst_stride = remap_info->src_stride;
+			else
+				dst_stride = remap_info->width;
+
+			dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride);
 
 			assign_chk_ovf(i915, remap_info->dst_stride, dst_stride);
 			color_plane_info->mapping_stride = dst_stride *
-- 
GitLab


From e81144106e21271c619f0c722a09e27ccb8c043d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:34 +0200
Subject: [PATCH 0955/2340] drm/i915: Fix intel_atomic_setup_scalers()
 plane_state handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the plane_state variable is declared outside the scaler_users
loop in intel_atomic_setup_scalers(), and it's never reset back to
NULL inside the loop we may end up calling intel_atomic_setup_scaler()
with a non-NULL plane state for the pipe scaling case. That is bad
because intel_atomic_setup_scaler() determines whether we are doing
plane scaling or pipe scaling based on plane_state!=NULL. The end
result is that we may miscalculate the scaler mode for pipe scaling.

The hardware becomes somewhat upset if we end up in this situation
when scanning out a planar format on a SDR plane. We end up
programming the pipe scaler into planar mode as well, and the
result is a screenfull of garbage.

Fix the situation by making sure we pass the correct plane_state==NULL
when calculating the scaler mode for pipe scaling.

Cc: stable@vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-2-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/skl_scaler.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c
index 1e7c97243fcf5..8a934bada6245 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.c
+++ b/drivers/gpu/drm/i915/display/skl_scaler.c
@@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 {
 	struct drm_plane *plane = NULL;
 	struct intel_plane *intel_plane;
-	struct intel_plane_state *plane_state = NULL;
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	struct drm_atomic_state *drm_state = crtc_state->uapi.state;
@@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 
 	/* walkthrough scaler_users bits and start assigning scalers */
 	for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) {
+		struct intel_plane_state *plane_state = NULL;
 		int *scaler_id;
 		const char *name;
 		int idx, ret;
-- 
GitLab


From e05a67fdd3c9293827d44a0dfa3618429b832d59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:35 +0200
Subject: [PATCH 0956/2340] drm/i915: Streamline intel_dsc_pps_read()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_dsc_pps_read() is rather convoluted. Make it legible.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-3-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_vdsc.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 5f2fb702e367e..17d6572f9d0a0 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -812,13 +812,13 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state)
 }
 
 static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps,
-			      bool *check_equal)
+			      bool *all_equal)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	i915_reg_t dsc_reg[2];
 	int i, vdsc_per_pipe, dsc_reg_num;
-	u32 val = 0;
+	u32 val;
 
 	vdsc_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state);
 	dsc_reg_num = min_t(int, ARRAY_SIZE(dsc_reg), vdsc_per_pipe);
@@ -827,20 +827,13 @@ static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps,
 
 	intel_dsc_get_pps_reg(crtc_state, pps, dsc_reg, dsc_reg_num);
 
-	if (check_equal)
-		*check_equal = true;
-
-	for (i = 0; i < dsc_reg_num; i++) {
-		u32 tmp;
+	*all_equal = true;
 
-		tmp = intel_de_read(i915, dsc_reg[i]);
+	val = intel_de_read(i915, dsc_reg[0]);
 
-		if (i == 0) {
-			val = tmp;
-		} else if (check_equal && tmp != val) {
-			*check_equal = false;
-			break;
-		} else if (!check_equal) {
+	for (i = 1; i < dsc_reg_num; i++) {
+		if (intel_de_read(i915, dsc_reg[i]) != val) {
+			*all_equal = false;
 			break;
 		}
 	}
-- 
GitLab


From f175de44d0cf5aa688747b96bad0e596a50eaad7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:36 +0200
Subject: [PATCH 0957/2340] drm/i915: Drop redundant NULL check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_bios_get_dsc_params() is only called from
gen11_dsi_dsc_compute_config() and it always passes a non-NULL
crtc_state in. Drop the redundant check.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-4-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_bios.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 3e7e96acb24ab..aa169b0055e97 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -3475,8 +3475,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
 			if (!devdata->dsc)
 				return false;
 
-			if (crtc_state)
-				fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
+			fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
 
 			return true;
 		}
-- 
GitLab


From ed1566a982213c1a8a39cac26aa4c53d289ed4bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:37 +0200
Subject: [PATCH 0958/2340] drm/i915: Drop crtc NULL check from
 intel_crtc_active()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_crtc_active() is never called with a NULL crtc. Drop
the redundant NULL check.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-5-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/i9xx_wm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c
index 03e8fb6caa83f..11ca9572e8b3c 100644
--- a/drivers/gpu/drm/i915/display/i9xx_wm.c
+++ b/drivers/gpu/drm/i915/display/i9xx_wm.c
@@ -608,7 +608,7 @@ static bool intel_crtc_active(struct intel_crtc *crtc)
 	 * crtc->state->active once we have proper CRTC states wired up
 	 * for atomic.
 	 */
-	return crtc && crtc->active && crtc->base.primary->state->fb &&
+	return crtc->active && crtc->base.primary->state->fb &&
 		crtc->config->hw.adjusted_mode.crtc_clock;
 }
 
-- 
GitLab


From a599d302ae00917038777fad09107576375e2c95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:38 +0200
Subject: [PATCH 0959/2340] drm/i915: Drop NULL fb check from
 intel_fb_uses_dpt()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_fb_uses_dpt() should not be called with a NULL fb, so
drop the check.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-6-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 81316b21314dc..44ae6c1e1d46d 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -771,7 +771,7 @@ bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb)
 {
-	return fb && to_i915(fb->dev)->display.params.enable_dpt &&
+	return to_i915(fb->dev)->display.params.enable_dpt &&
 		intel_fb_modifier_uses_dpt(to_i915(fb->dev), fb->modifier);
 }
 
-- 
GitLab


From 7f4f756df7a0716b15176f6fa0552e3480a2b981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:39 +0200
Subject: [PATCH 0960/2340] drm/i915: Drop redunant null check from
 intel_get_frame_time_us()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_get_frame_time_us() is never called with a NULL crtc_state so
drop the redundant check.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-7-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_psr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 6029bb71276cf..0058b07a7cdad 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -893,7 +893,7 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans
 
 static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
 {
-	if (!cstate || !cstate->hw.active)
+	if (!cstate->hw.active)
 		return 0;
 
 	return DIV_ROUND_UP(1000 * 1000,
-- 
GitLab


From e81f48512aa42d633015f182d2ecf91299803077 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:40 +0200
Subject: [PATCH 0961/2340] drm/i915: s/cstate/crtc_state/ in
 intel_get_frame_time_us()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use standard variable name 'crtc_state' instead of 'cstate'.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-8-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_psr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 0058b07a7cdad..b6e2e70e12904 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -891,13 +891,13 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans
 		return false;
 }
 
-static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
+static u32 intel_get_frame_time_us(const struct intel_crtc_state *crtc_state)
 {
-	if (!cstate->hw.active)
+	if (!crtc_state->hw.active)
 		return 0;
 
 	return DIV_ROUND_UP(1000 * 1000,
-			    drm_mode_vrefresh(&cstate->hw.adjusted_mode));
+			    drm_mode_vrefresh(&crtc_state->hw.adjusted_mode));
 }
 
 static void psr2_program_idle_frames(struct intel_dp *intel_dp,
-- 
GitLab


From c6117b33a173717714a8dbbf9d14ca85db79725e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 7 Dec 2023 21:34:41 +0200
Subject: [PATCH 0962/2340] drm/i915/tv: Drop redundant null checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Neither 'tv_mode' or 'color_conversion' can be NULL,
so drop the pointless checks.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-9-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_tv.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 2ee4f0d958513..d4386cb3569e0 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -1417,9 +1417,6 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv,
 static void set_color_conversion(struct drm_i915_private *dev_priv,
 				 const struct color_conversion *color_conversion)
 {
-	if (!color_conversion)
-		return;
-
 	intel_de_write(dev_priv, TV_CSC_Y,
 		       (color_conversion->ry << 16) | color_conversion->gy);
 	intel_de_write(dev_priv, TV_CSC_Y2,
@@ -1454,9 +1451,6 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state,
 	int xpos, ypos;
 	unsigned int xsize, ysize;
 
-	if (!tv_mode)
-		return;	/* can't happen (mode_prepare prevents this) */
-
 	tv_ctl = intel_de_read(dev_priv, TV_CTL);
 	tv_ctl &= TV_CTL_SAVE;
 
-- 
GitLab


From 877fd09a120d0acee073fbada79fad2ab35396c2 Mon Sep 17 00:00:00 2001
From: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Date: Thu, 7 Dec 2023 14:10:23 -0800
Subject: [PATCH 0963/2340] drm/i915/mtl: Use port clock compatible numbers for
 C20 phy

In C20 pll_state link_bit_rate and clock fields are bit redundant. Since
many of the helpers assume the clock values, which are different from
link_bit_rate for dp2.0, convert the helpers to use the numbers that
are compatible with link_bit_rate.

Currently link_bit_rate is compatible with crtc_state->port_clock. The
function intel_c20pll_calc_port_clock returns the number which is
compatible with crtc_state->port_clock. In order to avoid extra
conversions b/ween clock and link_bit_rate, remove "clock" field from the
C20 pll_state and then rename "link_bit_rate" as "clock".

While at it rely on crtc_state->port_clock during C20 Pll programming.

Cc: Clint Taylor <clinton.a.taylor@intel.com>
Cc: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207221025.2032207-2-radhakrishna.sripada@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 43 ++++++++++----------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 5fbec5784b835..7d412be996eac 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -2117,7 +2117,7 @@ int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state,
 static bool intel_c20_use_mplla(u32 clock)
 {
 	/* 10G and 20G rates use MPLLA */
-	if (clock == 312500 || clock == 625000)
+	if (clock == 1000000 || clock == 2000000)
 		return true;
 
 	return false;
@@ -2192,7 +2192,7 @@ void intel_c20pll_dump_hw_state(struct drm_i915_private *i915,
 	drm_dbg_kms(&i915->drm, "cmn[0] = 0x%.4x, cmn[1] = 0x%.4x, cmn[2] = 0x%.4x, cmn[3] = 0x%.4x\n",
 		    hw_state->cmn[0], hw_state->cmn[1], hw_state->cmn[2], hw_state->cmn[3]);
 
-	if (intel_c20_use_mplla(hw_state->clock)) {
+	if (intel_c20_use_mplla(hw_state->link_bit_rate)) {
 		for (i = 0; i < ARRAY_SIZE(hw_state->mplla); i++)
 			drm_dbg_kms(&i915->drm, "mplla[%d] = 0x%.4x\n", i, hw_state->mplla[i]);
 	} else {
@@ -2220,11 +2220,11 @@ static u8 intel_c20_get_dp_rate(u32 clock)
 		return 6;
 	case 432000: /* 4.32 Gbps eDP */
 		return 7;
-	case 312500: /* 10 Gbps DP2.0 */
+	case 1000000: /* 10 Gbps DP2.0 */
 		return 8;
-	case 421875: /* 13.5 Gbps DP2.0 */
+	case 1350000: /* 13.5 Gbps DP2.0 */
 		return 9;
-	case 625000: /* 20 Gbps DP2.0*/
+	case 2000000: /* 20 Gbps DP2.0 */
 		return 10;
 	case 648000: /* 6.48 Gbps eDP*/
 		return 11;
@@ -2242,13 +2242,13 @@ static u8 intel_c20_get_hdmi_rate(u32 clock)
 		return 0;
 
 	switch (clock) {
-	case 166670: /* 3 Gbps */
-	case 333330: /* 6 Gbps */
-	case 666670: /* 12 Gbps */
+	case 300000: /* 3 Gbps */
+	case 600000: /* 6 Gbps */
+	case 1200000: /* 12 Gbps */
 		return 1;
-	case 444440: /* 8 Gbps */
+	case 800000: /* 8 Gbps */
 		return 2;
-	case 555560: /* 10 Gbps */
+	case 1000000: /* 10 Gbps */
 		return 3;
 	default:
 		MISSING_CASE(clock);
@@ -2259,7 +2259,7 @@ static u8 intel_c20_get_hdmi_rate(u32 clock)
 static bool is_dp2(u32 clock)
 {
 	/* DP2.0 clock rates */
-	if (clock == 312500 || clock == 421875 || clock  == 625000)
+	if (clock == 1000000 || clock == 1350000 || clock  == 2000000)
 		return true;
 
 	return false;
@@ -2268,11 +2268,11 @@ static bool is_dp2(u32 clock)
 static bool is_hdmi_frl(u32 clock)
 {
 	switch (clock) {
-	case 166670: /* 3 Gbps */
-	case 333330: /* 6 Gbps */
-	case 444440: /* 8 Gbps */
-	case 555560: /* 10 Gbps */
-	case 666670: /* 12 Gbps */
+	case 300000: /* 3 Gbps */
+	case 600000: /* 6 Gbps */
+	case 800000: /* 8 Gbps */
+	case 1000000: /* 10 Gbps */
+	case 1200000: /* 12 Gbps */
 		return true;
 	default:
 		return false;
@@ -2305,6 +2305,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	const struct intel_c20pll_state *pll_state = &crtc_state->cx0pll_state.c20;
 	bool dp = false;
 	int lane = crtc_state->lane_count > 2 ? INTEL_CX0_BOTH_LANES : INTEL_CX0_LANE0;
+	u32 clock = crtc_state->port_clock;
 	bool cntx;
 	int i;
 
@@ -2343,7 +2344,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	}
 
 	/* 3.3 mpllb or mplla configuration */
-	if (intel_c20_use_mplla(pll_state->clock)) {
+	if (intel_c20_use_mplla(clock)) {
 		for (i = 0; i < ARRAY_SIZE(pll_state->mplla); i++) {
 			if (cntx)
 				intel_c20_sram_write(i915, encoder->port, INTEL_CX0_LANE0,
@@ -2370,23 +2371,23 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	/* 4. Program custom width to match the link protocol */
 	intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_WIDTH,
 		      PHY_C20_CUSTOM_WIDTH_MASK,
-		      PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(pll_state->clock, dp)),
+		      PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(clock, dp)),
 		      MB_WRITE_COMMITTED);
 
 	/* 5. For DP or 6. For HDMI */
 	if (dp) {
 		intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE,
 			      BIT(6) | PHY_C20_CUSTOM_SERDES_MASK,
-			      BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(pll_state->clock)),
+			      BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(clock)),
 			      MB_WRITE_COMMITTED);
 	} else {
 		intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE,
 			      BIT(7) | PHY_C20_CUSTOM_SERDES_MASK,
-			      is_hdmi_frl(pll_state->clock) ? BIT(7) : 0,
+			      is_hdmi_frl(clock) ? BIT(7) : 0,
 			      MB_WRITE_COMMITTED);
 
 		intel_cx0_write(i915, encoder->port, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE,
-				intel_c20_get_hdmi_rate(pll_state->clock),
+				intel_c20_get_hdmi_rate(clock),
 				MB_WRITE_COMMITTED);
 	}
 
-- 
GitLab


From 1103672fd6b8486c4cc1ab69623e9a080a00e022 Mon Sep 17 00:00:00 2001
From: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Date: Thu, 7 Dec 2023 14:10:24 -0800
Subject: [PATCH 0964/2340] drm/i915/mtl: Remove misleading "clock" field from
 C20 pll_state

The field link_bit_rate serves as the actual clock value for the C20
pll_state structure. Remove the misleading clock field. The subsequent
patch would rename the link_bit_rate as the clock field.

Cc: Clint Taylor <clinton.a.taylor@intel.com>
Cc: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207221025.2032207-3-radhakrishna.sripada@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c   | 18 ------------------
 .../gpu/drm/i915/display/intel_display_types.h |  3 +--
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 7d412be996eac..d518b55d5150e 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -746,7 +746,6 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = {
 /* C20 basic DP 1.4 tables */
 static const struct intel_c20pll_state mtl_c20_dp_rbr = {
 	.link_bit_rate = 162000,
-	.clock = 162000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x5800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -772,7 +771,6 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = {
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
 	.link_bit_rate = 270000,
-	.clock = 270000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -798,7 +796,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
 	.link_bit_rate = 540000,
-	.clock = 540000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -824,7 +821,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
 	.link_bit_rate = 810000,
-	.clock = 810000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -851,7 +847,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
 /* C20 basic DP 2.0 tables */
 static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
 	.link_bit_rate = 1000000, /* 10 Gbps */
-	.clock = 312500,
 	.tx = {	0xbe21, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -876,7 +871,6 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
 	.link_bit_rate = 1350000, /* 13.5 Gbps */
-	.clock = 421875,
 	.tx = {	0xbea0, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -902,7 +896,6 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = {
 	.link_bit_rate = 2000000, /* 20 Gbps */
-	.clock = 625000,
 	.tx = {	0xbe20, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -1522,7 +1515,6 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
 	.link_bit_rate = 25175,
-	.clock = 25175,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1548,7 +1540,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
 	.link_bit_rate = 27000,
-	.clock = 27000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1574,7 +1565,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
 	.link_bit_rate = 74250,
-	.clock = 74250,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1600,7 +1590,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
 	.link_bit_rate = 148500,
-	.clock = 148500,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1626,7 +1615,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
 	.link_bit_rate = 594000,
-	.clock = 594000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1652,7 +1640,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
 	.link_bit_rate = 3000000,
-	.clock = 166670,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1678,7 +1665,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
 	.link_bit_rate = 6000000,
-	.clock = 333330,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1704,7 +1690,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
 	.link_bit_rate = 8000000,
-	.clock = 444440,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1730,7 +1715,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
 	.link_bit_rate = 10000000,
-	.clock = 555560,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1756,7 +1740,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1200 = {
 	.link_bit_rate = 12000000,
-	.clock = 666670,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -2006,7 +1989,6 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_
 		mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0;
 
 	pll_state->link_bit_rate	= pixel_clock;
-	pll_state->clock	= pixel_clock;
 	pll_state->tx[0]	= 0xbe88;
 	pll_state->tx[1]	= 0x9800;
 	pll_state->tx[2]	= 0x0000;
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index b3e942f2eeb0f..6b348d3299570 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1022,8 +1022,7 @@ struct intel_c10pll_state {
 };
 
 struct intel_c20pll_state {
-	u32 link_bit_rate;
-	u32 clock; /* in kHz */
+	u32 link_bit_rate; /* in kHz */
 	u16 tx[3];
 	u16 cmn[4];
 	union {
-- 
GitLab


From 872ee9cc0219334486e19da20e56665e612fdcb7 Mon Sep 17 00:00:00 2001
From: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Date: Thu, 7 Dec 2023 14:10:25 -0800
Subject: [PATCH 0965/2340] drm/i915/mtl: Rename the link_bit_rate to clock in
 C20 pll_state

With the cleanup of the misleading clock value to avoid extra
calculations to convert between link_bit_rate and clock, use
one standard "clock" field for the c20 pll which works with
crtc_state->port_clock field.

Cc: Clint Taylor <clinton.a.taylor@intel.com>
Cc: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207221025.2032207-4-radhakrishna.sripada@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c  | 42 +++++++++----------
 .../drm/i915/display/intel_display_types.h    |  2 +-
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index d518b55d5150e..4e6ea71ff6294 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -745,7 +745,7 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = {
 
 /* C20 basic DP 1.4 tables */
 static const struct intel_c20pll_state mtl_c20_dp_rbr = {
-	.link_bit_rate = 162000,
+	.clock = 162000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x5800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -770,7 +770,7 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
-	.link_bit_rate = 270000,
+	.clock = 270000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -795,7 +795,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
-	.link_bit_rate = 540000,
+	.clock = 540000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -820,7 +820,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
-	.link_bit_rate = 810000,
+	.clock = 810000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -846,7 +846,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
 
 /* C20 basic DP 2.0 tables */
 static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
-	.link_bit_rate = 1000000, /* 10 Gbps */
+	.clock = 1000000, /* 10 Gbps */
 	.tx = {	0xbe21, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -870,7 +870,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
-	.link_bit_rate = 1350000, /* 13.5 Gbps */
+	.clock = 1350000, /* 13.5 Gbps */
 	.tx = {	0xbea0, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -895,7 +895,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = {
-	.link_bit_rate = 2000000, /* 20 Gbps */
+	.clock = 2000000, /* 20 Gbps */
 	.tx = {	0xbe20, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -1514,7 +1514,7 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
-	.link_bit_rate = 25175,
+	.clock = 25175,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1539,7 +1539,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
-	.link_bit_rate = 27000,
+	.clock = 27000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1564,7 +1564,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
-	.link_bit_rate = 74250,
+	.clock = 74250,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1589,7 +1589,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
-	.link_bit_rate = 148500,
+	.clock = 148500,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1614,7 +1614,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
-	.link_bit_rate = 594000,
+	.clock = 594000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1639,7 +1639,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
-	.link_bit_rate = 3000000,
+	.clock = 3000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1664,7 +1664,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
-	.link_bit_rate = 6000000,
+	.clock = 6000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1689,7 +1689,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
-	.link_bit_rate = 8000000,
+	.clock = 8000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1714,7 +1714,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
-	.link_bit_rate = 10000000,
+	.clock = 10000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1739,7 +1739,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1200 = {
-	.link_bit_rate = 12000000,
+	.clock = 12000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1988,7 +1988,7 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_
 	else
 		mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0;
 
-	pll_state->link_bit_rate	= pixel_clock;
+	pll_state->clock	= pixel_clock;
 	pll_state->tx[0]	= 0xbe88;
 	pll_state->tx[1]	= 0x9800;
 	pll_state->tx[2]	= 0x0000;
@@ -2024,7 +2024,7 @@ static int intel_c20_phy_check_hdmi_link_rate(int clock)
 	int i;
 
 	for (i = 0; tables[i]; i++) {
-		if (clock == tables[i]->link_bit_rate)
+		if (clock == tables[i]->clock)
 			return MODE_OK;
 	}
 
@@ -2076,7 +2076,7 @@ static int intel_c20pll_calc_state(struct intel_crtc_state *crtc_state,
 		return -EINVAL;
 
 	for (i = 0; tables[i]; i++) {
-		if (crtc_state->port_clock == tables[i]->link_bit_rate) {
+		if (crtc_state->port_clock == tables[i]->clock) {
 			crtc_state->cx0pll_state.c20 = *tables[i];
 			return 0;
 		}
@@ -2174,7 +2174,7 @@ void intel_c20pll_dump_hw_state(struct drm_i915_private *i915,
 	drm_dbg_kms(&i915->drm, "cmn[0] = 0x%.4x, cmn[1] = 0x%.4x, cmn[2] = 0x%.4x, cmn[3] = 0x%.4x\n",
 		    hw_state->cmn[0], hw_state->cmn[1], hw_state->cmn[2], hw_state->cmn[3]);
 
-	if (intel_c20_use_mplla(hw_state->link_bit_rate)) {
+	if (intel_c20_use_mplla(hw_state->clock)) {
 		for (i = 0; i < ARRAY_SIZE(hw_state->mplla); i++)
 			drm_dbg_kms(&i915->drm, "mplla[%d] = 0x%.4x\n", i, hw_state->mplla[i]);
 	} else {
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 6b348d3299570..2616bb6267a10 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1022,7 +1022,7 @@ struct intel_c10pll_state {
 };
 
 struct intel_c20pll_state {
-	u32 link_bit_rate; /* in kHz */
+	u32 clock; /* in kHz */
 	u16 tx[3];
 	u16 cmn[4];
 	union {
-- 
GitLab


From 6128becaeafa876048bd1b6a83d836329e4940c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 28 Sep 2023 18:24:49 +0300
Subject: [PATCH 0966/2340] drm/i915: Stop accessing crtc->state from the flip
 done irq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Assuming crtc->state is pointing at the correct thing for the
async flip commit is nonsense. If we had already queued up multiple
commits this would point at the very lates crtc state even if the
older commits hadn't even happened yet.

Instead properly stage/arm the event like we do for async flips.
Since we don't need to arm multiple of these at the same time we
don't need a list like the normal vblank even processing uses.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230928152450.30109-1-ville.syrjala@linux.intel.com
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_crtc.c          | 9 ++++++++-
 drivers/gpu/drm/i915/display/intel_display_irq.c   | 9 ++++-----
 drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 1fd068e6e26ca..8a84a31c7b48a 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -553,8 +553,15 @@ void intel_pipe_update_start(struct intel_atomic_state *state,
 
 	intel_psr_lock(new_crtc_state);
 
-	if (new_crtc_state->do_async_flip)
+	if (new_crtc_state->do_async_flip) {
+		spin_lock_irq(&crtc->base.dev->event_lock);
+		/* arm the event for the flip done irq handler */
+		crtc->flip_done_event = new_crtc_state->uapi.event;
+		spin_unlock_irq(&crtc->base.dev->event_lock);
+
+		new_crtc_state->uapi.event = NULL;
 		return;
+	}
 
 	if (intel_crtc_needs_vblank_work(new_crtc_state))
 		intel_crtc_vblank_work_init(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index f8ed53f30b2e4..6f2b45cfcf721 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -340,16 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915,
 			      enum pipe pipe)
 {
 	struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe);
-	struct drm_crtc_state *crtc_state = crtc->base.state;
-	struct drm_pending_vblank_event *e = crtc_state->event;
 	struct drm_device *dev = &i915->drm;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev->event_lock, irqflags);
 
-	crtc_state->event = NULL;
-
-	drm_crtc_send_vblank_event(&crtc->base, e);
+	if (crtc->flip_done_event) {
+		drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event);
+		crtc->flip_done_event = NULL;
+	}
 
 	spin_unlock_irqrestore(&dev->event_lock, irqflags);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 2616bb6267a10..341d80c2b9de9 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1475,6 +1475,9 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
+	/* armed event for async flip */
+	struct drm_pending_vblank_event *flip_done_event;
+
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
 	bool pch_fifo_underrun_disabled;
-- 
GitLab


From 362a8dba85ebedbf6939dad78bc6de398a2ef4e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 28 Sep 2023 18:24:50 +0300
Subject: [PATCH 0967/2340] drm/i915: Drop irqsave/restore for
 flip_done_handler()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since flip_done_handler() is always called from the irq handler
we can skip the irqsave/restore dance.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230928152450.30109-2-ville.syrjala@linux.intel.com
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display_irq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index 6f2b45cfcf721..a7d8f3fc98de9 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -340,17 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915,
 			      enum pipe pipe)
 {
 	struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe);
-	struct drm_device *dev = &i915->drm;
-	unsigned long irqflags;
 
-	spin_lock_irqsave(&dev->event_lock, irqflags);
+	spin_lock(&i915->drm.event_lock);
 
 	if (crtc->flip_done_event) {
 		drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event);
 		crtc->flip_done_event = NULL;
 	}
 
-	spin_unlock_irqrestore(&dev->event_lock, irqflags);
+	spin_unlock(&i915->drm.event_lock);
 }
 
 static void hsw_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
-- 
GitLab


From 8814455a0e54ca353b4b0ad5105569d3fdb945cc Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Thu, 7 Dec 2023 21:30:47 +0000
Subject: [PATCH 0968/2340] drm/msm: Refactor UBWC config setting

Split up calculating configuration parameters and programming them, so
that we can expose them to userspace.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/571180/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  21 ++---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 101 +++++++++++++-----------
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |   9 +++
 3 files changed, 78 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index e5916c1067967..c003f970189b0 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -684,7 +684,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
-	u32 regbit;
+	u32 hbb;
 	int ret;
 
 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
@@ -820,18 +820,15 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 
-	/* Set the highest bank bit */
-	if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
-		regbit = 2;
-	else
-		regbit = 1;
+	BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
+	hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
 
-	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
-	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
+	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, hbb << 7);
+	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, hbb << 1);
 
 	if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
 	    adreno_is_a540(adreno_gpu))
-		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
+		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, hbb);
 
 	/* Disable All flat shading optimization (ALLFLATOPTDIS) */
 	gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
@@ -1785,5 +1782,11 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 	/* Set up the preemption specific bits and pieces for each ringbuffer */
 	a5xx_preempt_init(gpu);
 
+	/* Set the highest bank bit */
+	if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
+		adreno_gpu->ubwc_config.highest_bank_bit = 15;
+	else
+		adreno_gpu->ubwc_config.highest_bank_bit = 14;
+
 	return gpu;
 }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 723f9b97734da..c0bc924cd3025 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1270,81 +1270,92 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
 }
 
-static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
+static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
 {
-	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	/* Unknown, introduced with A650 family, related to UBWC mode/ver 4 */
-	u32 rgb565_predicator = 0;
+	gpu->ubwc_config.rgb565_predicator = 0;
 	/* Unknown, introduced with A650 family */
-	u32 uavflagprd_inv = 0;
+	gpu->ubwc_config.uavflagprd_inv = 0;
 	/* Whether the minimum access length is 64 bits */
-	u32 min_acc_len = 0;
+	gpu->ubwc_config.min_acc_len = 0;
 	/* Entirely magic, per-GPU-gen value */
-	u32 ubwc_mode = 0;
+	gpu->ubwc_config.ubwc_mode = 0;
 	/*
 	 * The Highest Bank Bit value represents the bit of the highest DDR bank.
-	 * We then subtract 13 from it (13 is the minimum value allowed by hw) and
-	 * write the lowest two bits of the remaining value as hbb_lo and the
-	 * one above it as hbb_hi to the hardware. This should ideally use DRAM
-	 * type detection.
+	 * This should ideally use DRAM type detection.
 	 */
-	u32 hbb_hi = 0;
-	u32 hbb_lo = 2;
-	/* Unknown, introduced with A640/680 */
-	u32 amsbc = 0;
+	gpu->ubwc_config.highest_bank_bit = 15;
 
-	if (adreno_is_a610(adreno_gpu)) {
-		/* HBB = 14 */
-		hbb_lo = 1;
-		min_acc_len = 1;
-		ubwc_mode = 1;
+	if (adreno_is_a610(gpu)) {
+		gpu->ubwc_config.highest_bank_bit = 14;
+		gpu->ubwc_config.min_acc_len = 1;
+		gpu->ubwc_config.ubwc_mode = 1;
 	}
 
 	/* a618 is using the hw default values */
-	if (adreno_is_a618(adreno_gpu))
+	if (adreno_is_a618(gpu))
 		return;
 
-	if (adreno_is_a619_holi(adreno_gpu))
-		hbb_lo = 0;
+	if (adreno_is_a619_holi(gpu))
+		gpu->ubwc_config.highest_bank_bit = 13;
 
-	if (adreno_is_a640_family(adreno_gpu))
-		amsbc = 1;
+	if (adreno_is_a640_family(gpu))
+		gpu->ubwc_config.amsbc = 1;
 
-	if (adreno_is_a650(adreno_gpu) ||
-	    adreno_is_a660(adreno_gpu) ||
-	    adreno_is_a690(adreno_gpu) ||
-	    adreno_is_a730(adreno_gpu) ||
-	    adreno_is_a740_family(adreno_gpu)) {
+	if (adreno_is_a650(gpu) ||
+	    adreno_is_a660(gpu) ||
+	    adreno_is_a690(gpu) ||
+	    adreno_is_a730(gpu) ||
+	    adreno_is_a740_family(gpu)) {
 		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
-		hbb_lo = 3;
-		amsbc = 1;
-		rgb565_predicator = 1;
-		uavflagprd_inv = 2;
+		gpu->ubwc_config.highest_bank_bit = 16;
+		gpu->ubwc_config.amsbc = 1;
+		gpu->ubwc_config.rgb565_predicator = 1;
+		gpu->ubwc_config.uavflagprd_inv = 2;
 	}
 
-	if (adreno_is_7c3(adreno_gpu)) {
-		hbb_lo = 1;
-		amsbc = 1;
-		rgb565_predicator = 1;
-		uavflagprd_inv = 2;
+	if (adreno_is_7c3(gpu)) {
+		gpu->ubwc_config.highest_bank_bit = 14;
+		gpu->ubwc_config.amsbc = 1;
+		gpu->ubwc_config.rgb565_predicator = 1;
+		gpu->ubwc_config.uavflagprd_inv = 2;
 	}
+}
+
+static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	/*
+	 * We subtract 13 from the highest bank bit (13 is the minimum value
+	 * allowed by hw) and write the lowest two bits of the remaining value
+	 * as hbb_lo and the one above it as hbb_hi to the hardware.
+	 */
+	BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
+	u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
+	u32 hbb_hi = hbb >> 2;
+	u32 hbb_lo = hbb & 3;
 
 	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
-		  rgb565_predicator << 11 | hbb_hi << 10 | amsbc << 4 |
-		  min_acc_len << 3 | hbb_lo << 1 | ubwc_mode);
+		  adreno_gpu->ubwc_config.rgb565_predicator << 11 |
+		  hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 |
+		  adreno_gpu->ubwc_config.min_acc_len << 3 |
+		  hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode);
 
 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, hbb_hi << 4 |
-		  min_acc_len << 3 | hbb_lo << 1 | ubwc_mode);
+		  adreno_gpu->ubwc_config.min_acc_len << 3 |
+		  hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode);
 
 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, hbb_hi << 10 |
-		  uavflagprd_inv << 4 | min_acc_len << 3 |
-		  hbb_lo << 1 | ubwc_mode);
+		  adreno_gpu->ubwc_config.uavflagprd_inv << 4 |
+		  adreno_gpu->ubwc_config.min_acc_len << 3 |
+		  hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode);
 
 	if (adreno_is_a7xx(adreno_gpu))
 		gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
 			  FIELD_PREP(GENMASK(8, 5), hbb_lo));
 
-	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | hbb_lo << 21);
+	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
+		  adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21);
 }
 
 static int a6xx_cp_init(struct msm_gpu *gpu)
@@ -2911,5 +2922,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
 				a6xx_fault_handler);
 
+	a6xx_calc_ubwc_config(adreno_gpu);
+
 	return gpu;
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 80b3f6312116e..bc14df96feb01 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -165,6 +165,15 @@ struct adreno_gpu {
 	/* firmware: */
 	const struct firmware *fw[ADRENO_FW_MAX];
 
+	struct {
+		u32 rgb565_predicator;
+		u32 uavflagprd_inv;
+		u32 min_acc_len;
+		u32 ubwc_mode;
+		u32 highest_bank_bit;
+		u32 amsbc;
+	} ubwc_config;
+
 	/*
 	 * Register offsets are different between some GPUs.
 	 * GPU specific offsets will be exported by GPU specific
-- 
GitLab


From 44a88fa45665318473bfdbb832eba1da2d0a3740 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Thu, 7 Dec 2023 21:30:48 +0000
Subject: [PATCH 0969/2340] drm/msm: Add param for the highest bank bit

This parameter is programmed by the kernel and influences the tiling
layout of images. Exposing it to userspace will allow it to tile/untile
images correctly without guessing what value the kernel programmed, and
allow us to change it in the future without breaking userspace.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/571181/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++
 include/uapi/drm/msm_drm.h              | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 3fe9fd240cc73..074fb498706f2 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -373,6 +373,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 			return -EINVAL;
 		*value = ctx->aspace->va_size;
 		return 0;
+	case MSM_PARAM_HIGHEST_BANK_BIT:
+		*value = adreno_gpu->ubwc_config.highest_bank_bit;
+		return 0;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 6f2a7ad04aa40..d8a6b34727601 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -86,6 +86,7 @@ struct drm_msm_timespec {
 #define MSM_PARAM_CMDLINE    0x0d  /* WO: override for task cmdline */
 #define MSM_PARAM_VA_START   0x0e  /* RO: start of valid GPU iova range */
 #define MSM_PARAM_VA_SIZE    0x0f  /* RO: size of valid GPU iova range (bytes) */
+#define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #
-- 
GitLab


From 3e6688fd96966b6c275e95c39aa367bc0a490ccd Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 30 Nov 2023 16:26:41 -0800
Subject: [PATCH 0970/2340] drm/msm/adreno: Fix A680 chip id

The only A680 referenced from DeviceTree is patch level 1, which since
commit '90b593ce1c9e ("drm/msm/adreno: Switch to chip-id for identifying
GPU")' isn't a known chip id.

Correct the chip id to allow the A680 to be recognized again.

Fixes: 90b593ce1c9e ("drm/msm/adreno: Switch to chip-id for identifying GPU")
Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/569839/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index f62ab5257e668..2ce7d7b1690dc 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -464,7 +464,7 @@ static const struct adreno_info gpulist[] = {
 			{ 190, 1 },
 		),
 	}, {
-		.chip_ids = ADRENO_CHIP_IDS(0x06080000),
+		.chip_ids = ADRENO_CHIP_IDS(0x06080001),
 		.family = ADRENO_6XX_GEN2,
 		.revn = 680,
 		.fw = {
-- 
GitLab


From a3dec9cdf42bceccdf7a7185099a515ba0a8b29b Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:45 -0800
Subject: [PATCH 0971/2340] drm/msm/gem: Remove "valid" tracking

This was a small optimization for pre-soft-pin userspace.  But mesa
switched to soft-pin nearly 5yrs ago.  So lets drop the optimization
and simplify the code.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/568328/
---
 drivers/gpu/drm/msm/msm_gem.h        |  2 --
 drivers/gpu/drm/msm/msm_gem_submit.c | 44 +++++-----------------------
 2 files changed, 8 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 6352bd3cc5ed6..79195f2fd1710 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -275,7 +275,6 @@ struct msm_gem_submit {
 	struct msm_gpu_submitqueue *queue;
 	struct pid *pid;    /* submitting process */
 	bool fault_dumped;  /* Limit devcoredump dumping to one per submit */
-	bool valid;         /* true if no cmdstream patching needed */
 	bool in_rb;         /* "sudo" mode, copy cmds into RB */
 	struct msm_ringbuffer *ring;
 	unsigned int nr_cmds;
@@ -292,7 +291,6 @@ struct msm_gem_submit {
 	} *cmd;  /* array of size nr_cmds */
 	struct {
 /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_VALID	0x8000	/* is current addr in cmdstream correct/valid? */
 #define BO_LOCKED	0x4000	/* obj lock is held */
 #define BO_PINNED	0x2000	/* obj (pages) is pinned and on active list */
 		uint32_t flags;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 7aa5e14ab9aa1..2fdc373c25891 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -150,8 +150,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 
 		submit->bos[i].handle = submit_bo.handle;
 		submit->bos[i].flags = submit_bo.flags;
-		/* in validate_objects() we figure out if this is true: */
-		submit->bos[i].iova  = submit_bo.presumed;
 	}
 
 	spin_lock(&file->table_lock);
@@ -278,9 +276,6 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
 {
 	unsigned cleanup_flags = BO_PINNED | BO_LOCKED;
 	submit_cleanup_bo(submit, i, cleanup_flags);
-
-	if (!(submit->bos[i].flags & BO_VALID))
-		submit->bos[i].iova = 0;
 }
 
 /* This is where we make sure all the bo's are reserved and pin'd: */
@@ -390,8 +385,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
 	struct msm_drm_private *priv = submit->dev->dev_private;
 	int i, ret = 0;
 
-	submit->valid = true;
-
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = submit->bos[i].obj;
 		struct msm_gem_vma *vma;
@@ -407,14 +400,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
 		if (ret)
 			break;
 
-		if (vma->iova == submit->bos[i].iova) {
-			submit->bos[i].flags |= BO_VALID;
-		} else {
-			submit->bos[i].iova = vma->iova;
-			/* iova changed, so address in cmdstream is not valid: */
-			submit->bos[i].flags &= ~BO_VALID;
-			submit->valid = false;
-		}
+		submit->bos[i].iova = vma->iova;
 	}
 
 	/*
@@ -451,7 +437,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit)
 }
 
 static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
-		struct drm_gem_object **obj, uint64_t *iova, bool *valid)
+		struct drm_gem_object **obj, uint64_t *iova)
 {
 	if (idx >= submit->nr_bos) {
 		SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n",
@@ -463,8 +449,6 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
 		*obj = submit->bos[idx].obj;
 	if (iova)
 		*iova = submit->bos[idx].iova;
-	if (valid)
-		*valid = !!(submit->bos[idx].flags & BO_VALID);
 
 	return 0;
 }
@@ -477,9 +461,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 	uint32_t *ptr;
 	int ret = 0;
 
-	if (!nr_relocs)
-		return 0;
-
 	if (offset % 4) {
 		SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset);
 		return -EINVAL;
@@ -500,7 +481,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 		struct drm_msm_gem_submit_reloc submit_reloc = relocs[i];
 		uint32_t off;
 		uint64_t iova;
-		bool valid;
 
 		if (submit_reloc.submit_offset % 4) {
 			SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n",
@@ -519,13 +499,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 			goto out;
 		}
 
-		ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid);
+		ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova);
 		if (ret)
 			goto out;
 
-		if (valid)
-			continue;
-
 		iova += submit_reloc.reloc_offset;
 
 		if (submit_reloc.shift < 0)
@@ -879,8 +856,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_gem_object *obj;
 		uint64_t iova;
 
-		ret = submit_bo(submit, submit->cmd[i].idx,
-				&obj, &iova, NULL);
+		ret = submit_bo(submit, submit->cmd[i].idx, &obj, &iova);
 		if (ret)
 			goto out;
 
@@ -894,17 +870,13 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 		submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4);
 
-		if (submit->valid)
+		if (likely(!submit->cmd[i].nr_relocs))
 			continue;
 
 		if (!gpu->allow_relocs) {
-			if (submit->cmd[i].nr_relocs) {
-				SUBMIT_ERROR(submit, "relocs not allowed\n");
-				ret = -EINVAL;
-				goto out;
-			}
-
-			continue;
+			SUBMIT_ERROR(submit, "relocs not allowed\n");
+			ret = -EINVAL;
+			goto out;
 		}
 
 		ret = submit_reloc(submit, obj, submit->cmd[i].offset * 4,
-- 
GitLab


From 202f98c19a11e335867a1906c2fb6c53680e4603 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:46 -0800
Subject: [PATCH 0972/2340] drm/msm/gem: Remove submit_unlock_unpin_bo()

The only point it is called is before pinning objects, so the "unpin"
part of the name is fiction.  Just remove it and call submit_cleanup_bo()
directly.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/568330/
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 2fdc373c25891..51726c9ddffdf 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -272,12 +272,6 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
 		dma_resv_unlock(obj->resv);
 }
 
-static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
-{
-	unsigned cleanup_flags = BO_PINNED | BO_LOCKED;
-	submit_cleanup_bo(submit, i, cleanup_flags);
-}
-
 /* This is where we make sure all the bo's are reserved and pin'd: */
 static int submit_lock_objects(struct msm_gem_submit *submit)
 {
@@ -313,10 +307,10 @@ fail:
 	}
 
 	for (; i >= 0; i--)
-		submit_unlock_unpin_bo(submit, i);
+		submit_cleanup_bo(submit, i, BO_LOCKED);
 
 	if (slow_locked > 0)
-		submit_unlock_unpin_bo(submit, slow_locked);
+		submit_cleanup_bo(submit, slow_locked, BO_LOCKED);
 
 	if (ret == -EDEADLK) {
 		struct drm_gem_object *obj = submit->bos[contended].obj;
-- 
GitLab


From ceab575cafed594fb3cee1bec01a0e4ed5e2d752 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:47 -0800
Subject: [PATCH 0973/2340] drm/msm/gem: Don't queue job to sched in error
 cases

We shouldn't be running the job in error cases.  This also avoids having
to think too hard about where the objs get unpinned (and if necessary,
the resv takes over tracking that the obj is busy).. ie. error cases it
always happens synchronously, and normal cases it happens from scheduler
job_run() callback.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/568331/
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 51726c9ddffdf..0bcdc589ceea0 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -946,6 +946,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		}
 	}
 
+	if (ret)
+		goto out;
+
 	submit_attach_object_fences(submit);
 
 	/* The scheduler owns a ref now: */
-- 
GitLab


From 2d7d2c4e84802485a1e765bd0732d41526dcf25c Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:48 -0800
Subject: [PATCH 0974/2340] drm/msm/gem: Split out submit_unpin_objects()
 helper

Untangle unpinning from unlock/unref loop.  The unpin only happens in
error paths so it is easier to decouple from the normal unlock path.

Since we never have an intermediate state where a subset of buffers
are pinned (ie. we never bail out of the pin or unpin loops) we can
replace the bo state flag bit with a global flag in the submit.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/568335/
---
 drivers/gpu/drm/msm/msm_gem.h        |  6 +++---
 drivers/gpu/drm/msm/msm_gem_submit.c | 22 +++++++++++++++++-----
 drivers/gpu/drm/msm/msm_ringbuffer.c |  3 ++-
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 79195f2fd1710..80f4207120eab 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -274,8 +274,9 @@ struct msm_gem_submit {
 	int fence_id;       /* key into queue->fence_idr */
 	struct msm_gpu_submitqueue *queue;
 	struct pid *pid;    /* submitting process */
-	bool fault_dumped;  /* Limit devcoredump dumping to one per submit */
-	bool in_rb;         /* "sudo" mode, copy cmds into RB */
+	bool bos_pinned : 1;
+	bool fault_dumped:1;/* Limit devcoredump dumping to one per submit */
+	bool in_rb : 1;     /* "sudo" mode, copy cmds into RB */
 	struct msm_ringbuffer *ring;
 	unsigned int nr_cmds;
 	unsigned int nr_bos;
@@ -292,7 +293,6 @@ struct msm_gem_submit {
 	struct {
 /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
 #define BO_LOCKED	0x4000	/* obj lock is held */
-#define BO_PINNED	0x2000	/* obj (pages) is pinned and on active list */
 		uint32_t flags;
 		union {
 			struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 0bcdc589ceea0..208fad42f7844 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -265,9 +265,6 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
 	 */
 	submit->bos[i].flags &= ~cleanup_flags;
 
-	if (flags & BO_PINNED)
-		msm_gem_unpin_locked(obj);
-
 	if (flags & BO_LOCKED)
 		dma_resv_unlock(obj->resv);
 }
@@ -407,13 +404,28 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
 	mutex_lock(&priv->lru.lock);
 	for (i = 0; i < submit->nr_bos; i++) {
 		msm_gem_pin_obj_locked(submit->bos[i].obj);
-		submit->bos[i].flags |= BO_PINNED;
 	}
 	mutex_unlock(&priv->lru.lock);
 
+	submit->bos_pinned = true;
+
 	return ret;
 }
 
+static void submit_unpin_objects(struct msm_gem_submit *submit)
+{
+	if (!submit->bos_pinned)
+		return;
+
+	for (int i = 0; i < submit->nr_bos; i++) {
+		struct drm_gem_object *obj = submit->bos[i].obj;
+
+		msm_gem_unpin_locked(obj);
+	}
+
+	submit->bos_pinned = false;
+}
+
 static void submit_attach_object_fences(struct msm_gem_submit *submit)
 {
 	int i;
@@ -525,7 +537,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 	unsigned i;
 
 	if (error)
-		cleanup_flags |= BO_PINNED;
+		submit_unpin_objects(submit);
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = submit->bos[i].obj;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 4968568e3b546..4bc13f7d005ab 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -29,9 +29,10 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
 		struct drm_gem_object *obj = submit->bos[i].obj;
 
 		msm_gem_unpin_active(obj);
-		submit->bos[i].flags &= ~BO_PINNED;
 	}
 
+	submit->bos_pinned = false;
+
 	mutex_unlock(&priv->lru.lock);
 
 	msm_gpu_submit(gpu, submit);
-- 
GitLab


From 3a48a40387e73e1a37428b11cb3ba56d4c108571 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:49 -0800
Subject: [PATCH 0975/2340] drm/msm/gem: Cleanup submit_cleanup_bo()

Now that it only handles unlock duty, drop the superfluous arg and
rename it.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/568333/
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 208fad42f7844..2bb5d13726d58 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -248,14 +248,10 @@ out:
 	return ret;
 }
 
-/* Unwind bo state, according to cleanup_flags.  In the success case, only
- * the lock is dropped at the end of the submit (and active/pin ref is dropped
- * later when the submit is retired).
- */
-static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
-		unsigned cleanup_flags)
+static void submit_unlock_bo(struct msm_gem_submit *submit, int i)
 {
 	struct drm_gem_object *obj = submit->bos[i].obj;
+	unsigned cleanup_flags = BO_LOCKED;
 	unsigned flags = submit->bos[i].flags & cleanup_flags;
 
 	/*
@@ -304,10 +300,10 @@ fail:
 	}
 
 	for (; i >= 0; i--)
-		submit_cleanup_bo(submit, i, BO_LOCKED);
+		submit_unlock_bo(submit, i);
 
 	if (slow_locked > 0)
-		submit_cleanup_bo(submit, slow_locked, BO_LOCKED);
+		submit_unlock_bo(submit, slow_locked);
 
 	if (ret == -EDEADLK) {
 		struct drm_gem_object *obj = submit->bos[contended].obj;
@@ -533,7 +529,6 @@ out:
  */
 static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 {
-	unsigned cleanup_flags = BO_LOCKED;
 	unsigned i;
 
 	if (error)
@@ -541,7 +536,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = submit->bos[i].obj;
-		submit_cleanup_bo(submit, i, cleanup_flags);
+		submit_unlock_bo(submit, i);
 		if (error)
 			drm_gem_object_put(obj);
 	}
-- 
GitLab


From 05d249352f1ae909230c230767ca8f4e9fdf8e7b Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:50 -0800
Subject: [PATCH 0976/2340] drm/exec: Pass in initial # of objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In cases where the # is known ahead of time, it is silly to do the table
resize dance.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Patchwork: https://patchwork.freedesktop.org/patch/568338/
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c          |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c          |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c          |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c     |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c             |  2 +-
 drivers/gpu/drm/drm_exec.c                       | 13 ++++++++++---
 drivers/gpu/drm/drm_gpuvm.c                      |  4 ++--
 drivers/gpu/drm/imagination/pvr_job.c            |  2 +-
 drivers/gpu/drm/nouveau/nouveau_uvmm.c           |  2 +-
 drivers/gpu/drm/tests/drm_exec_test.c            | 16 ++++++++--------
 include/drm/drm_exec.h                           |  2 +-
 13 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 41fbc4fd0fac3..0bd3c4a6267ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1137,7 +1137,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
 	ctx->n_vms = 1;
 	ctx->sync = &mem->sync;
-	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&ctx->exec) {
 		ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
 		drm_exec_retry_on_contention(&ctx->exec);
@@ -1176,7 +1176,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	int ret;
 
 	ctx->sync = &mem->sync;
-	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&ctx->exec) {
 		ctx->n_vms = 0;
 		list_for_each_entry(entry, &mem->attachments, list) {
@@ -2552,7 +2552,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 
 	amdgpu_sync_create(&sync);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	/* Reserve all BOs and page tables for validation */
 	drm_exec_until_all_locked(&exec) {
 		/* Reserve all the page directories */
@@ -2793,7 +2793,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 
 	mutex_lock(&process_info->lock);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		list_for_each_entry(peer_vm, &process_info->vm_list_head,
 				    vm_list_node) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e50be65000303..440e9a6786fcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,7 +66,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
 
 	amdgpu_sync_create(&p->sync);
 	drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
-		      DRM_EXEC_IGNORE_DUPLICATES);
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7200110197415..796fa6f1420b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct drm_exec exec;
 	int r;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
 		if (likely(!r))
@@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct drm_exec exec;
 	int r;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
 		if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 84beeaa4d21ca..49a5f1c73b3ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct drm_exec exec;
 	long r;
 
-	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
 		drm_exec_retry_on_contention(&exec);
@@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	}
 
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
-		      DRM_EXEC_IGNORE_DUPLICATES);
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
 		if (gobj) {
 			r = drm_exec_lock_obj(&exec, gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 9ddbf1494326a..abd0b9763904f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1122,7 +1122,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
 
 	amdgpu_sync_create(&sync);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec,
 				      &ctx_data->meta_data_obj->tbo.base);
@@ -1193,7 +1193,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
 	struct drm_exec exec;
 	long r;
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec,
 				      &ctx_data->meta_data_obj->tbo.base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index ca45ba8ac1715..bfbf59326ee12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	amdgpu_sync_create(&sync);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec, &bo->tbo.base);
 		drm_exec_retry_on_contention(&exec);
@@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct drm_exec exec;
 	long r;
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec, &bo->tbo.base);
 		drm_exec_retry_on_contention(&exec);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2f3c338fd944..76d9f14ccc7c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1529,7 +1529,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
 	uint32_t gpuidx;
 	int r;
 
-	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);
 	drm_exec_until_all_locked(&ctx->exec) {
 		for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
 			pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
index 5d2809de4517c..48ee851b61d90 100644
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -69,16 +69,23 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
  * drm_exec_init - initialize a drm_exec object
  * @exec: the drm_exec object to initialize
  * @flags: controls locking behavior, see DRM_EXEC_* defines
+ * @nr: the initial # of objects
  *
  * Initialize the object and make sure that we can track locked objects.
+ *
+ * If nr is non-zero then it is used as the initial objects table size.
+ * In either case, the table will grow (be re-allocated) on demand.
  */
-void drm_exec_init(struct drm_exec *exec, uint32_t flags)
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
 {
+	if (!nr)
+		nr = PAGE_SIZE / sizeof(void *);
+
 	exec->flags = flags;
-	exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL);
 
 	/* If allocation here fails, just delay that till the first use */
-	exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
+	exec->max_objects = exec->objects ? nr : 0;
 	exec->num_objects = 0;
 	exec->contended = DRM_EXEC_DUMMY;
 	exec->prelocked = NULL;
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 9c0922c1a5a27..dc8edca617644 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1250,7 +1250,7 @@ drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec)
 	unsigned int num_fences = vm_exec->num_fences;
 	int ret;
 
-	drm_exec_init(exec, vm_exec->flags);
+	drm_exec_init(exec, vm_exec->flags, 0);
 
 	drm_exec_until_all_locked(exec) {
 		ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences);
@@ -1341,7 +1341,7 @@ drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec,
 	struct drm_exec *exec = &vm_exec->exec;
 	int ret;
 
-	drm_exec_init(exec, vm_exec->flags);
+	drm_exec_init(exec, vm_exec->flags, 0);
 
 	drm_exec_until_all_locked(exec) {
 		ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range,
diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c
index 04139da6c04dc..78c2f3c6dce01 100644
--- a/drivers/gpu/drm/imagination/pvr_job.c
+++ b/drivers/gpu/drm/imagination/pvr_job.c
@@ -746,7 +746,7 @@ pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
 	if (err)
 		goto out_job_data_cleanup;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0);
 
 	xa_init_flags(&signal_array, XA_FLAGS_ALLOC);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index dae3baf707a0b..4f223c972c6a8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1347,7 +1347,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
 		}
 	}
 
-	drm_exec_init(exec, vme->flags);
+	drm_exec_init(exec, vme->flags, 0);
 	drm_exec_until_all_locked(exec) {
 		ret = bind_lock_validate(job, exec, vme->num_fences);
 		drm_exec_retry_on_contention(exec);
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c
index 563949d777ddc..81f928a429ba0 100644
--- a/drivers/gpu/drm/tests/drm_exec_test.c
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -46,7 +46,7 @@ static void sanitycheck(struct kunit *test)
 {
 	struct drm_exec exec;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_fini(&exec);
 	KUNIT_SUCCEED(test);
 }
@@ -60,7 +60,7 @@ static void test_lock(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_lock_obj(&exec, &gobj);
 		drm_exec_retry_on_contention(&exec);
@@ -80,7 +80,7 @@ static void test_lock_unlock(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_lock_obj(&exec, &gobj);
 		drm_exec_retry_on_contention(&exec);
@@ -107,7 +107,7 @@ static void test_duplicates(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_lock_obj(&exec, &gobj);
 		drm_exec_retry_on_contention(&exec);
@@ -134,7 +134,7 @@ static void test_prepare(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_prepare_obj(&exec, &gobj, 1);
 		drm_exec_retry_on_contention(&exec);
@@ -159,7 +159,7 @@ static void test_prepare_array(struct kunit *test)
 	drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE);
 	drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec)
 		ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array),
 					     1);
@@ -174,14 +174,14 @@ static void test_multiple_loops(struct kunit *test)
 {
 	struct drm_exec exec;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec)
 	{
 		break;
 	}
 	drm_exec_fini(&exec);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec)
 	{
 		break;
diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
index b5bf0b6da791c..f1a66c0487217 100644
--- a/include/drm/drm_exec.h
+++ b/include/drm/drm_exec.h
@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
 	return !!exec->contended;
 }
 
-void drm_exec_init(struct drm_exec *exec, uint32_t flags);
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
 void drm_exec_fini(struct drm_exec *exec);
 bool drm_exec_cleanup(struct drm_exec *exec);
 int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
-- 
GitLab


From a6397e63877e82528c67058e6e6e8d700682df50 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 20 Nov 2023 16:38:51 -0800
Subject: [PATCH 0977/2340] drm/msm/gem: Convert to drm_exec

Replace the ww_mutex locking dance with the drm_exec helper.

v2: Error path fixes, move drm_exec_fini so we only call it once (and
    only if we have drm_exec_init()

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/568342/
---
 drivers/gpu/drm/msm/Kconfig          |   1 +
 drivers/gpu/drm/msm/msm_gem.h        |   5 +-
 drivers/gpu/drm/msm/msm_gem_submit.c | 119 +++++----------------------
 3 files changed, 24 insertions(+), 101 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index ad70b611b44f0..f202f26adab2a 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -17,6 +17,7 @@ config DRM_MSM
 	select DRM_DP_AUX_BUS
 	select DRM_DISPLAY_DP_HELPER
 	select DRM_DISPLAY_HELPER
+	select DRM_EXEC
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_BRIDGE
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 80f4207120eab..8d414b072c29d 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -9,6 +9,7 @@
 
 #include <linux/kref.h>
 #include <linux/dma-resv.h>
+#include "drm/drm_exec.h"
 #include "drm/gpu_scheduler.h"
 #include "msm_drv.h"
 
@@ -258,7 +259,7 @@ struct msm_gem_submit {
 	struct msm_gpu *gpu;
 	struct msm_gem_address_space *aspace;
 	struct list_head node;   /* node in ring submit list */
-	struct ww_acquire_ctx ticket;
+	struct drm_exec exec;
 	uint32_t seqno;		/* Sequence number of the submit on the ring */
 
 	/* Hw fence, which is created when the scheduler executes the job, and
@@ -291,8 +292,6 @@ struct msm_gem_submit {
 		struct drm_msm_gem_submit_reloc *relocs;
 	} *cmd;  /* array of size nr_cmds */
 	struct {
-/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_LOCKED	0x4000	/* obj lock is held */
 		uint32_t flags;
 		union {
 			struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 2bb5d13726d58..fba78193127de 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -248,85 +248,30 @@ out:
 	return ret;
 }
 
-static void submit_unlock_bo(struct msm_gem_submit *submit, int i)
-{
-	struct drm_gem_object *obj = submit->bos[i].obj;
-	unsigned cleanup_flags = BO_LOCKED;
-	unsigned flags = submit->bos[i].flags & cleanup_flags;
-
-	/*
-	 * Clear flags bit before dropping lock, so that the msm_job_run()
-	 * path isn't racing with submit_cleanup() (ie. the read/modify/
-	 * write is protected by the obj lock in all paths)
-	 */
-	submit->bos[i].flags &= ~cleanup_flags;
-
-	if (flags & BO_LOCKED)
-		dma_resv_unlock(obj->resv);
-}
-
 /* This is where we make sure all the bo's are reserved and pin'd: */
 static int submit_lock_objects(struct msm_gem_submit *submit)
 {
-	int contended, slow_locked = -1, i, ret = 0;
-
-retry:
-	for (i = 0; i < submit->nr_bos; i++) {
-		struct drm_gem_object *obj = submit->bos[i].obj;
-
-		if (slow_locked == i)
-			slow_locked = -1;
+	int ret;
 
-		contended = i;
+	drm_exec_init(&submit->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, submit->nr_bos);
 
-		if (!(submit->bos[i].flags & BO_LOCKED)) {
-			ret = dma_resv_lock_interruptible(obj->resv,
-							  &submit->ticket);
+	drm_exec_until_all_locked (&submit->exec) {
+		for (unsigned i = 0; i < submit->nr_bos; i++) {
+			struct drm_gem_object *obj = submit->bos[i].obj;
+			ret = drm_exec_prepare_obj(&submit->exec, obj, 1);
+			drm_exec_retry_on_contention(&submit->exec);
 			if (ret)
-				goto fail;
-			submit->bos[i].flags |= BO_LOCKED;
+				goto error;
 		}
 	}
 
-	ww_acquire_done(&submit->ticket);
-
 	return 0;
 
-fail:
-	if (ret == -EALREADY) {
-		SUBMIT_ERROR(submit, "handle %u at index %u already on submit list\n",
-			     submit->bos[i].handle, i);
-		ret = -EINVAL;
-	}
-
-	for (; i >= 0; i--)
-		submit_unlock_bo(submit, i);
-
-	if (slow_locked > 0)
-		submit_unlock_bo(submit, slow_locked);
-
-	if (ret == -EDEADLK) {
-		struct drm_gem_object *obj = submit->bos[contended].obj;
-		/* we lost out in a seqno race, lock and retry.. */
-		ret = dma_resv_lock_slow_interruptible(obj->resv,
-						       &submit->ticket);
-		if (!ret) {
-			submit->bos[contended].flags |= BO_LOCKED;
-			slow_locked = contended;
-			goto retry;
-		}
-
-		/* Not expecting -EALREADY here, if the bo was already
-		 * locked, we should have gotten -EALREADY already from
-		 * the dma_resv_lock_interruptable() call.
-		 */
-		WARN_ON_ONCE(ret == -EALREADY);
-	}
-
+error:
 	return ret;
 }
 
-static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
+static int submit_fence_sync(struct msm_gem_submit *submit)
 {
 	int i, ret = 0;
 
@@ -334,22 +279,6 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
 		struct drm_gem_object *obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;
 
-		/* NOTE: _reserve_shared() must happen before
-		 * _add_shared_fence(), which makes this a slightly
-		 * strange place to call it.  OTOH this is a
-		 * convenient can-fail point to hook it in.
-		 */
-		ret = dma_resv_reserve_fences(obj->resv, 1);
-		if (ret)
-			return ret;
-
-		/* If userspace has determined that explicit fencing is
-		 * used, it can disable implicit sync on the entire
-		 * submit:
-		 */
-		if (no_implicit)
-			continue;
-
 		/* Otherwise userspace can ask for implicit sync to be
 		 * disabled on specific buffers.  This is useful for internal
 		 * usermode driver managed buffers, suballocation, etc.
@@ -529,17 +458,14 @@ out:
  */
 static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 {
-	unsigned i;
-
-	if (error)
+	if (error) {
 		submit_unpin_objects(submit);
-
-	for (i = 0; i < submit->nr_bos; i++) {
-		struct drm_gem_object *obj = submit->bos[i].obj;
-		submit_unlock_bo(submit, i);
-		if (error)
-			drm_gem_object_put(obj);
+		/* job wasn't enqueued to scheduler, so early retirement: */
+		msm_submit_retire(submit);
 	}
+
+	if (submit->exec.objects)
+		drm_exec_fini(&submit->exec);
 }
 
 void msm_submit_retire(struct msm_gem_submit *submit)
@@ -733,7 +659,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_submit_post_dep *post_deps = NULL;
 	struct drm_syncobj **syncobjs_to_reset = NULL;
 	int out_fence_fd = -1;
-	bool has_ww_ticket = false;
 	unsigned i;
 	int ret;
 
@@ -839,15 +764,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto out;
 
 	/* copy_*_user while holding a ww ticket upsets lockdep */
-	ww_acquire_init(&submit->ticket, &reservation_ww_class);
-	has_ww_ticket = true;
 	ret = submit_lock_objects(submit);
 	if (ret)
 		goto out;
 
-	ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT));
-	if (ret)
-		goto out;
+	if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) {
+		ret = submit_fence_sync(submit);
+		if (ret)
+			goto out;
+	}
 
 	ret = submit_pin_objects(submit);
 	if (ret)
@@ -975,8 +900,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 out:
 	submit_cleanup(submit, !!ret);
-	if (has_ww_ticket)
-		ww_acquire_fini(&submit->ticket);
 out_unlock:
 	mutex_unlock(&queue->lock);
 out_post_unlock:
-- 
GitLab


From 236fa3873de8f1d4b0c7eaeb4c53b4d1220d55a0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 01:13:53 +0300
Subject: [PATCH 0978/2340] drm/ci: remove rebase-merge directory

Gitlab runner can cache git repository, including the unfinished rebase
merge status. New CI job will come as a fresh checkout, however this
will not destroy the unfinished rebase, failing our build script.
Destroy the unfinished rebase state.

Suggested-by: David Heidelberg <david.heidelberg@collabora.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Helen Koike <helen.koike@collabora.com>
Patchwork: https://patchwork.freedesktop.org/patch/570159/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/ci/build.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh
index e5c5dcedd1084..ca2923ed22903 100644
--- a/drivers/gpu/drm/ci/build.sh
+++ b/drivers/gpu/drm/ci/build.sh
@@ -58,6 +58,9 @@ git config --global user.email "fdo@example.com"
 git config --global user.name "freedesktop.org CI"
 git config --global pull.rebase true
 
+# cleanup git state on the worker
+rm -rf .git/rebase-merge
+
 # Try to merge fixes from target repo
 if [ "$(git ls-remote --exit-code --heads ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes)" ]; then
     git pull ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes
-- 
GitLab


From 5169477081a1ed08924949e4893732de92ad7d25 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Sat, 9 Dec 2023 23:05:41 +0000
Subject: [PATCH 0979/2340] drm/i915/selftests: Fix spelling mistake
 "initialiased" -> "initialised"

There is a spelling mistake in a pr_err error message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231209230541.4055786-1-colin.i.king@gmail.com
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index e57f9390076c5..d684a70f2c042 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -504,7 +504,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	}
 
 	if (memchr_inv(ptr, 0, dmabuf->size)) {
-		pr_err("Exported object not initialiased to zero!\n");
+		pr_err("Exported object not initialised to zero!\n");
 		err = -EINVAL;
 		goto out;
 	}
-- 
GitLab


From f48dee9ed7c992eaf6a3635db304a61ed82827b3 Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:49 +1100
Subject: [PATCH 0980/2340] drm/panel: nv3052c: Document known register names

Many of these registers have a known name in the public datasheet.
Document them as comments for reference.

Signed-off-by: John Watts <contact@jookia.org>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-1-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-1-d75adc75571f@jookia.org
---
 .../gpu/drm/panel/panel-newvision-nv3052c.c   | 261 +++++++++---------
 1 file changed, 132 insertions(+), 129 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
index 71e57de6d8b2c..589431523ce7f 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
@@ -42,9 +42,9 @@ struct nv3052c_reg {
 };
 
 static const struct nv3052c_reg nv3052c_panel_regs[] = {
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x01 },
+	// EXTC Command set enable, select page 1
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 },
+	// Mostly unknown registers
 	{ 0xe3, 0x00 },
 	{ 0x40, 0x00 },
 	{ 0x03, 0x40 },
@@ -62,15 +62,15 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = {
 	{ 0x25, 0x06 },
 	{ 0x26, 0x14 },
 	{ 0x27, 0x14 },
-	{ 0x38, 0xcc },
-	{ 0x39, 0xd7 },
-	{ 0x3a, 0x4a },
+	{ 0x38, 0xcc }, // VCOM_ADJ1
+	{ 0x39, 0xd7 }, // VCOM_ADJ2
+	{ 0x3a, 0x4a }, // VCOM_ADJ3
 	{ 0x28, 0x40 },
 	{ 0x29, 0x01 },
 	{ 0x2a, 0xdf },
 	{ 0x49, 0x3c },
-	{ 0x91, 0x77 },
-	{ 0x92, 0x77 },
+	{ 0x91, 0x77 }, // EXTPW_CTRL2
+	{ 0x92, 0x77 }, // EXTPW_CTRL3
 	{ 0xa0, 0x55 },
 	{ 0xa1, 0x50 },
 	{ 0xa4, 0x9c },
@@ -94,123 +94,126 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = {
 	{ 0xb8, 0x26 },
 	{ 0xf0, 0x00 },
 	{ 0xf6, 0xc0 },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x02 },
-	{ 0xb0, 0x0b },
-	{ 0xb1, 0x16 },
-	{ 0xb2, 0x17 },
-	{ 0xb3, 0x2c },
-	{ 0xb4, 0x32 },
-	{ 0xb5, 0x3b },
-	{ 0xb6, 0x29 },
-	{ 0xb7, 0x40 },
-	{ 0xb8, 0x0d },
-	{ 0xb9, 0x05 },
-	{ 0xba, 0x12 },
-	{ 0xbb, 0x10 },
-	{ 0xbc, 0x12 },
-	{ 0xbd, 0x15 },
-	{ 0xbe, 0x19 },
-	{ 0xbf, 0x0e },
-	{ 0xc0, 0x16 },
-	{ 0xc1, 0x0a },
-	{ 0xd0, 0x0c },
-	{ 0xd1, 0x17 },
-	{ 0xd2, 0x14 },
-	{ 0xd3, 0x2e },
-	{ 0xd4, 0x32 },
-	{ 0xd5, 0x3c },
-	{ 0xd6, 0x22 },
-	{ 0xd7, 0x3d },
-	{ 0xd8, 0x0d },
-	{ 0xd9, 0x07 },
-	{ 0xda, 0x13 },
-	{ 0xdb, 0x13 },
-	{ 0xdc, 0x11 },
-	{ 0xdd, 0x15 },
-	{ 0xde, 0x19 },
-	{ 0xdf, 0x10 },
-	{ 0xe0, 0x17 },
-	{ 0xe1, 0x0a },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x03 },
-	{ 0x00, 0x2a },
-	{ 0x01, 0x2a },
-	{ 0x02, 0x2a },
-	{ 0x03, 0x2a },
-	{ 0x04, 0x61 },
-	{ 0x05, 0x80 },
-	{ 0x06, 0xc7 },
-	{ 0x07, 0x01 },
-	{ 0x08, 0x03 },
-	{ 0x09, 0x04 },
-	{ 0x70, 0x22 },
-	{ 0x71, 0x80 },
-	{ 0x30, 0x2a },
-	{ 0x31, 0x2a },
-	{ 0x32, 0x2a },
-	{ 0x33, 0x2a },
-	{ 0x34, 0x61 },
-	{ 0x35, 0xc5 },
-	{ 0x36, 0x80 },
-	{ 0x37, 0x23 },
-	{ 0x40, 0x03 },
-	{ 0x41, 0x04 },
-	{ 0x42, 0x05 },
-	{ 0x43, 0x06 },
-	{ 0x44, 0x11 },
-	{ 0x45, 0xe8 },
-	{ 0x46, 0xe9 },
-	{ 0x47, 0x11 },
-	{ 0x48, 0xea },
-	{ 0x49, 0xeb },
-	{ 0x50, 0x07 },
-	{ 0x51, 0x08 },
-	{ 0x52, 0x09 },
-	{ 0x53, 0x0a },
-	{ 0x54, 0x11 },
-	{ 0x55, 0xec },
-	{ 0x56, 0xed },
-	{ 0x57, 0x11 },
-	{ 0x58, 0xef },
-	{ 0x59, 0xf0 },
-	{ 0xb1, 0x01 },
-	{ 0xb4, 0x15 },
-	{ 0xb5, 0x16 },
-	{ 0xb6, 0x09 },
-	{ 0xb7, 0x0f },
-	{ 0xb8, 0x0d },
-	{ 0xb9, 0x0b },
-	{ 0xba, 0x00 },
-	{ 0xc7, 0x02 },
-	{ 0xca, 0x17 },
-	{ 0xcb, 0x18 },
-	{ 0xcc, 0x0a },
-	{ 0xcd, 0x10 },
-	{ 0xce, 0x0e },
-	{ 0xcf, 0x0c },
-	{ 0xd0, 0x00 },
-	{ 0x81, 0x00 },
-	{ 0x84, 0x15 },
-	{ 0x85, 0x16 },
-	{ 0x86, 0x10 },
-	{ 0x87, 0x0a },
-	{ 0x88, 0x0c },
-	{ 0x89, 0x0e },
-	{ 0x8a, 0x02 },
-	{ 0x97, 0x00 },
-	{ 0x9a, 0x17 },
-	{ 0x9b, 0x18 },
-	{ 0x9c, 0x0f },
-	{ 0x9d, 0x09 },
-	{ 0x9e, 0x0b },
-	{ 0x9f, 0x0d },
-	{ 0xa0, 0x01 },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x02 },
+	// EXTC Command set enable, select page 2
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Set gray scale voltage to adjust gamma
+	{ 0xb0, 0x0b }, // PGAMVR0
+	{ 0xb1, 0x16 }, // PGAMVR1
+	{ 0xb2, 0x17 }, // PGAMVR2
+	{ 0xb3, 0x2c }, // PGAMVR3
+	{ 0xb4, 0x32 }, // PGAMVR4
+	{ 0xb5, 0x3b }, // PGAMVR5
+	{ 0xb6, 0x29 }, // PGAMPR0
+	{ 0xb7, 0x40 }, // PGAMPR1
+	{ 0xb8, 0x0d }, // PGAMPK0
+	{ 0xb9, 0x05 }, // PGAMPK1
+	{ 0xba, 0x12 }, // PGAMPK2
+	{ 0xbb, 0x10 }, // PGAMPK3
+	{ 0xbc, 0x12 }, // PGAMPK4
+	{ 0xbd, 0x15 }, // PGAMPK5
+	{ 0xbe, 0x19 }, // PGAMPK6
+	{ 0xbf, 0x0e }, // PGAMPK7
+	{ 0xc0, 0x16 }, // PGAMPK8
+	{ 0xc1, 0x0a }, // PGAMPK9
+	// Set gray scale voltage to adjust gamma
+	{ 0xd0, 0x0c }, // NGAMVR0
+	{ 0xd1, 0x17 }, // NGAMVR0
+	{ 0xd2, 0x14 }, // NGAMVR1
+	{ 0xd3, 0x2e }, // NGAMVR2
+	{ 0xd4, 0x32 }, // NGAMVR3
+	{ 0xd5, 0x3c }, // NGAMVR4
+	{ 0xd6, 0x22 }, // NGAMPR0
+	{ 0xd7, 0x3d }, // NGAMPR1
+	{ 0xd8, 0x0d }, // NGAMPK0
+	{ 0xd9, 0x07 }, // NGAMPK1
+	{ 0xda, 0x13 }, // NGAMPK2
+	{ 0xdb, 0x13 }, // NGAMPK3
+	{ 0xdc, 0x11 }, // NGAMPK4
+	{ 0xdd, 0x15 }, // NGAMPK5
+	{ 0xde, 0x19 }, // NGAMPK6
+	{ 0xdf, 0x10 }, // NGAMPK7
+	{ 0xe0, 0x17 }, // NGAMPK8
+	{ 0xe1, 0x0a }, // NGAMPK9
+	// EXTC Command set enable, select page 3
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x03 },
+	// Set various timing settings
+	{ 0x00, 0x2a }, // GIP_VST_1
+	{ 0x01, 0x2a }, // GIP_VST_2
+	{ 0x02, 0x2a }, // GIP_VST_3
+	{ 0x03, 0x2a }, // GIP_VST_4
+	{ 0x04, 0x61 }, // GIP_VST_5
+	{ 0x05, 0x80 }, // GIP_VST_6
+	{ 0x06, 0xc7 }, // GIP_VST_7
+	{ 0x07, 0x01 }, // GIP_VST_8
+	{ 0x08, 0x03 }, // GIP_VST_9
+	{ 0x09, 0x04 }, // GIP_VST_10
+	{ 0x70, 0x22 }, // GIP_ECLK1
+	{ 0x71, 0x80 }, // GIP_ECLK2
+	{ 0x30, 0x2a }, // GIP_CLK_1
+	{ 0x31, 0x2a }, // GIP_CLK_2
+	{ 0x32, 0x2a }, // GIP_CLK_3
+	{ 0x33, 0x2a }, // GIP_CLK_4
+	{ 0x34, 0x61 }, // GIP_CLK_5
+	{ 0x35, 0xc5 }, // GIP_CLK_6
+	{ 0x36, 0x80 }, // GIP_CLK_7
+	{ 0x37, 0x23 }, // GIP_CLK_8
+	{ 0x40, 0x03 }, // GIP_CLKA_1
+	{ 0x41, 0x04 }, // GIP_CLKA_2
+	{ 0x42, 0x05 }, // GIP_CLKA_3
+	{ 0x43, 0x06 }, // GIP_CLKA_4
+	{ 0x44, 0x11 }, // GIP_CLKA_5
+	{ 0x45, 0xe8 }, // GIP_CLKA_6
+	{ 0x46, 0xe9 }, // GIP_CLKA_7
+	{ 0x47, 0x11 }, // GIP_CLKA_8
+	{ 0x48, 0xea }, // GIP_CLKA_9
+	{ 0x49, 0xeb }, // GIP_CLKA_10
+	{ 0x50, 0x07 }, // GIP_CLKB_1
+	{ 0x51, 0x08 }, // GIP_CLKB_2
+	{ 0x52, 0x09 }, // GIP_CLKB_3
+	{ 0x53, 0x0a }, // GIP_CLKB_4
+	{ 0x54, 0x11 }, // GIP_CLKB_5
+	{ 0x55, 0xec }, // GIP_CLKB_6
+	{ 0x56, 0xed }, // GIP_CLKB_7
+	{ 0x57, 0x11 }, // GIP_CLKB_8
+	{ 0x58, 0xef }, // GIP_CLKB_9
+	{ 0x59, 0xf0 }, // GIP_CLKB_10
+	// Map internal GOA signals to GOA output pad
+	{ 0xb1, 0x01 }, // PANELD2U2
+	{ 0xb4, 0x15 }, // PANELD2U5
+	{ 0xb5, 0x16 }, // PANELD2U6
+	{ 0xb6, 0x09 }, // PANELD2U7
+	{ 0xb7, 0x0f }, // PANELD2U8
+	{ 0xb8, 0x0d }, // PANELD2U9
+	{ 0xb9, 0x0b }, // PANELD2U10
+	{ 0xba, 0x00 }, // PANELD2U11
+	{ 0xc7, 0x02 }, // PANELD2U24
+	{ 0xca, 0x17 }, // PANELD2U27
+	{ 0xcb, 0x18 }, // PANELD2U28
+	{ 0xcc, 0x0a }, // PANELD2U29
+	{ 0xcd, 0x10 }, // PANELD2U30
+	{ 0xce, 0x0e }, // PANELD2U31
+	{ 0xcf, 0x0c }, // PANELD2U32
+	{ 0xd0, 0x00 }, // PANELD2U33
+	// Map internal GOA signals to GOA output pad
+	{ 0x81, 0x00 }, // PANELU2D2
+	{ 0x84, 0x15 }, // PANELU2D5
+	{ 0x85, 0x16 }, // PANELU2D6
+	{ 0x86, 0x10 }, // PANELU2D7
+	{ 0x87, 0x0a }, // PANELU2D8
+	{ 0x88, 0x0c }, // PANELU2D9
+	{ 0x89, 0x0e }, // PANELU2D10
+	{ 0x8a, 0x02 }, // PANELU2D11
+	{ 0x97, 0x00 }, // PANELU2D24
+	{ 0x9a, 0x17 }, // PANELU2D27
+	{ 0x9b, 0x18 }, // PANELU2D28
+	{ 0x9c, 0x0f }, // PANELU2D29
+	{ 0x9d, 0x09 }, // PANELU2D30
+	{ 0x9e, 0x0b }, // PANELU2D31
+	{ 0x9f, 0x0d }, // PANELU2D32
+	{ 0xa0, 0x01 }, // PANELU2D33
+	// EXTC Command set enable, select page 2
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Unknown registers
 	{ 0x01, 0x01 },
 	{ 0x02, 0xda },
 	{ 0x03, 0xba },
@@ -227,10 +230,10 @@ static const struct nv3052c_reg nv3052c_panel_regs[] = {
 	{ 0x0e, 0x48 },
 	{ 0x0f, 0x38 },
 	{ 0x10, 0x2b },
-	{ 0xff, 0x30 },
-	{ 0xff, 0x52 },
-	{ 0xff, 0x00 },
-	{ 0x36, 0x0a },
+	// EXTC Command set enable, select page 0
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x00 },
+	// Display Access Control
+	{ 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0
 };
 
 static inline struct nv3052c *to_nv3052c(struct drm_panel *panel)
-- 
GitLab


From 095e3a99e793767ca6c0483d31fb5d4087966d51 Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:50 +1100
Subject: [PATCH 0981/2340] drm/panel: nv3052c: Add SPI device IDs

SPI drivers needs their own list of compatible device IDs in order
for automatic module loading to work. Add those for this driver.

Signed-off-by: John Watts <contact@jookia.org>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-2-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-2-d75adc75571f@jookia.org
---
 drivers/gpu/drm/panel/panel-newvision-nv3052c.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
index 589431523ce7f..90dea21f9856a 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
@@ -465,6 +465,12 @@ static const struct nv3052c_panel_info ltk035c5444t_panel_info = {
 	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
 };
 
+static const struct spi_device_id nv3052c_ids[] = {
+	{ "ltk035c5444t", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(spi, nv3052c_ids);
+
 static const struct of_device_id nv3052c_of_match[] = {
 	{ .compatible = "leadtek,ltk035c5444t", .data = &ltk035c5444t_panel_info },
 	{ /* sentinel */ }
@@ -476,6 +482,7 @@ static struct spi_driver nv3052c_driver = {
 		.name = "nv3052c",
 		.of_match_table = nv3052c_of_match,
 	},
+	.id_table = nv3052c_ids,
 	.probe = nv3052c_probe,
 	.remove = nv3052c_remove,
 };
-- 
GitLab


From 2e6b7be84d88c0af927967418a56e22d372ce98c Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:51 +1100
Subject: [PATCH 0982/2340] drm/panel: nv3052c: Allow specifying registers per
 panel

Panel initialization registers are per-display and not tied to the
controller itself. Different panels will specify their own registers.
Attach the sequences to the panel info struct so future panels
can specify their own sequences.

Signed-off-by: John Watts <contact@jookia.org>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-3-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-3-d75adc75571f@jookia.org
---
 .../gpu/drm/panel/panel-newvision-nv3052c.c   | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
index 90dea21f9856a..b0114b5e85540 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
@@ -20,11 +20,18 @@
 #include <drm/drm_modes.h>
 #include <drm/drm_panel.h>
 
+struct nv3052c_reg {
+	u8 cmd;
+	u8 val;
+};
+
 struct nv3052c_panel_info {
 	const struct drm_display_mode *display_modes;
 	unsigned int num_modes;
 	u16 width_mm, height_mm;
 	u32 bus_format, bus_flags;
+	const struct nv3052c_reg *panel_regs;
+	unsigned int panel_regs_len;
 };
 
 struct nv3052c {
@@ -36,12 +43,7 @@ struct nv3052c {
 	struct gpio_desc *reset_gpio;
 };
 
-struct nv3052c_reg {
-	u8 cmd;
-	u8 val;
-};
-
-static const struct nv3052c_reg nv3052c_panel_regs[] = {
+static const struct nv3052c_reg ltk035c5444t_panel_regs[] = {
 	// EXTC Command set enable, select page 1
 	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 },
 	// Mostly unknown registers
@@ -244,6 +246,8 @@ static inline struct nv3052c *to_nv3052c(struct drm_panel *panel)
 static int nv3052c_prepare(struct drm_panel *panel)
 {
 	struct nv3052c *priv = to_nv3052c(panel);
+	const struct nv3052c_reg *panel_regs = priv->panel_info->panel_regs;
+	unsigned int panel_regs_len = priv->panel_info->panel_regs_len;
 	struct mipi_dbi *dbi = &priv->dbi;
 	unsigned int i;
 	int err;
@@ -260,9 +264,9 @@ static int nv3052c_prepare(struct drm_panel *panel)
 	gpiod_set_value_cansleep(priv->reset_gpio, 0);
 	usleep_range(5000, 20000);
 
-	for (i = 0; i < ARRAY_SIZE(nv3052c_panel_regs); i++) {
-		err = mipi_dbi_command(dbi, nv3052c_panel_regs[i].cmd,
-				       nv3052c_panel_regs[i].val);
+	for (i = 0; i < panel_regs_len; i++) {
+		err = mipi_dbi_command(dbi, panel_regs[i].cmd,
+				       panel_regs[i].val);
 
 		if (err) {
 			dev_err(priv->dev, "Unable to set register: %d\n", err);
@@ -463,6 +467,8 @@ static const struct nv3052c_panel_info ltk035c5444t_panel_info = {
 	.height_mm = 64,
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.panel_regs = ltk035c5444t_panel_regs,
+	.panel_regs_len = ARRAY_SIZE(ltk035c5444t_panel_regs),
 };
 
 static const struct spi_device_id nv3052c_ids[] = {
-- 
GitLab


From bf92f9163097dc717518d598116c1e385004b5ce Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:52 +1100
Subject: [PATCH 0983/2340] drm/panel: nv3052c: Add Fascontek FS035VG158 LCD
 display

This display is extremely similar to the LTK035C5444T, but still has
some minor variations in panel initialization.

Signed-off-by: John Watts <contact@jookia.org>
Reviewed-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-4-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-4-d75adc75571f@jookia.org
---
 .../gpu/drm/panel/panel-newvision-nv3052c.c   | 223 ++++++++++++++++++
 1 file changed, 223 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
index b0114b5e85540..1aab0c9ae52fa 100644
--- a/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3052c.c
@@ -238,6 +238,201 @@ static const struct nv3052c_reg ltk035c5444t_panel_regs[] = {
 	{ 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0
 };
 
+static const struct nv3052c_reg fs035vg158_panel_regs[] = {
+	// EXTC Command set enable, select page 1
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x01 },
+	// Mostly unknown registers
+	{ 0xe3, 0x00 },
+	{ 0x40, 0x00 },
+	{ 0x03, 0x40 },
+	{ 0x04, 0x00 },
+	{ 0x05, 0x03 },
+	{ 0x08, 0x00 },
+	{ 0x09, 0x07 },
+	{ 0x0a, 0x01 },
+	{ 0x0b, 0x32 },
+	{ 0x0c, 0x32 },
+	{ 0x0d, 0x0b },
+	{ 0x0e, 0x00 },
+	{ 0x23, 0x20 }, // RGB interface control: DE MODE PCLK-N
+	{ 0x24, 0x0c },
+	{ 0x25, 0x06 },
+	{ 0x26, 0x14 },
+	{ 0x27, 0x14 },
+	{ 0x38, 0x9c }, //VCOM_ADJ1, different to ltk035c5444t
+	{ 0x39, 0xa7 }, //VCOM_ADJ2, different to ltk035c5444t
+	{ 0x3a, 0x50 }, //VCOM_ADJ3, different to ltk035c5444t
+	{ 0x28, 0x40 },
+	{ 0x29, 0x01 },
+	{ 0x2a, 0xdf },
+	{ 0x49, 0x3c },
+	{ 0x91, 0x57 }, //EXTPW_CTRL2, different to ltk035c5444t
+	{ 0x92, 0x57 }, //EXTPW_CTRL3, different to ltk035c5444t
+	{ 0xa0, 0x55 },
+	{ 0xa1, 0x50 },
+	{ 0xa4, 0x9c },
+	{ 0xa7, 0x02 },
+	{ 0xa8, 0x01 },
+	{ 0xa9, 0x01 },
+	{ 0xaa, 0xfc },
+	{ 0xab, 0x28 },
+	{ 0xac, 0x06 },
+	{ 0xad, 0x06 },
+	{ 0xae, 0x06 },
+	{ 0xaf, 0x03 },
+	{ 0xb0, 0x08 },
+	{ 0xb1, 0x26 },
+	{ 0xb2, 0x28 },
+	{ 0xb3, 0x28 },
+	{ 0xb4, 0x03 }, // Unknown, different to ltk035c5444
+	{ 0xb5, 0x08 },
+	{ 0xb6, 0x26 },
+	{ 0xb7, 0x08 },
+	{ 0xb8, 0x26 },
+	{ 0xf0, 0x00 },
+	{ 0xf6, 0xc0 },
+	// EXTC Command set enable, select page 0
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Set gray scale voltage to adjust gamma
+	{ 0xb0, 0x0b }, // PGAMVR0
+	{ 0xb1, 0x16 }, // PGAMVR1
+	{ 0xb2, 0x17 }, // PGAMVR2
+	{ 0xb3, 0x2c }, // PGAMVR3
+	{ 0xb4, 0x32 }, // PGAMVR4
+	{ 0xb5, 0x3b }, // PGAMVR5
+	{ 0xb6, 0x29 }, // PGAMPR0
+	{ 0xb7, 0x40 }, // PGAMPR1
+	{ 0xb8, 0x0d }, // PGAMPK0
+	{ 0xb9, 0x05 }, // PGAMPK1
+	{ 0xba, 0x12 }, // PGAMPK2
+	{ 0xbb, 0x10 }, // PGAMPK3
+	{ 0xbc, 0x12 }, // PGAMPK4
+	{ 0xbd, 0x15 }, // PGAMPK5
+	{ 0xbe, 0x19 }, // PGAMPK6
+	{ 0xbf, 0x0e }, // PGAMPK7
+	{ 0xc0, 0x16 }, // PGAMPK8
+	{ 0xc1, 0x0a }, // PGAMPK9
+	// Set gray scale voltage to adjust gamma
+	{ 0xd0, 0x0c }, // NGAMVR0
+	{ 0xd1, 0x17 }, // NGAMVR0
+	{ 0xd2, 0x14 }, // NGAMVR1
+	{ 0xd3, 0x2e }, // NGAMVR2
+	{ 0xd4, 0x32 }, // NGAMVR3
+	{ 0xd5, 0x3c }, // NGAMVR4
+	{ 0xd6, 0x22 }, // NGAMPR0
+	{ 0xd7, 0x3d }, // NGAMPR1
+	{ 0xd8, 0x0d }, // NGAMPK0
+	{ 0xd9, 0x07 }, // NGAMPK1
+	{ 0xda, 0x13 }, // NGAMPK2
+	{ 0xdb, 0x13 }, // NGAMPK3
+	{ 0xdc, 0x11 }, // NGAMPK4
+	{ 0xdd, 0x15 }, // NGAMPK5
+	{ 0xde, 0x19 }, // NGAMPK6
+	{ 0xdf, 0x10 }, // NGAMPK7
+	{ 0xe0, 0x17 }, // NGAMPK8
+	{ 0xe1, 0x0a }, // NGAMPK9
+	// EXTC Command set enable, select page 3
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x03 },
+	// Set various timing settings
+	{ 0x00, 0x2a }, // GIP_VST_1
+	{ 0x01, 0x2a }, // GIP_VST_2
+	{ 0x02, 0x2a }, // GIP_VST_3
+	{ 0x03, 0x2a }, // GIP_VST_4
+	{ 0x04, 0x61 }, // GIP_VST_5
+	{ 0x05, 0x80 }, // GIP_VST_6
+	{ 0x06, 0xc7 }, // GIP_VST_7
+	{ 0x07, 0x01 }, // GIP_VST_8
+	{ 0x08, 0x03 }, // GIP_VST_9
+	{ 0x09, 0x04 }, // GIP_VST_10
+	{ 0x70, 0x22 }, // GIP_ECLK1
+	{ 0x71, 0x80 }, // GIP_ECLK2
+	{ 0x30, 0x2a }, // GIP_CLK_1
+	{ 0x31, 0x2a }, // GIP_CLK_2
+	{ 0x32, 0x2a }, // GIP_CLK_3
+	{ 0x33, 0x2a }, // GIP_CLK_4
+	{ 0x34, 0x61 }, // GIP_CLK_5
+	{ 0x35, 0xc5 }, // GIP_CLK_6
+	{ 0x36, 0x80 }, // GIP_CLK_7
+	{ 0x37, 0x23 }, // GIP_CLK_8
+	{ 0x40, 0x03 }, // GIP_CLKA_1
+	{ 0x41, 0x04 }, // GIP_CLKA_2
+	{ 0x42, 0x05 }, // GIP_CLKA_3
+	{ 0x43, 0x06 }, // GIP_CLKA_4
+	{ 0x44, 0x11 }, // GIP_CLKA_5
+	{ 0x45, 0xe8 }, // GIP_CLKA_6
+	{ 0x46, 0xe9 }, // GIP_CLKA_7
+	{ 0x47, 0x11 }, // GIP_CLKA_8
+	{ 0x48, 0xea }, // GIP_CLKA_9
+	{ 0x49, 0xeb }, // GIP_CLKA_10
+	{ 0x50, 0x07 }, // GIP_CLKB_1
+	{ 0x51, 0x08 }, // GIP_CLKB_2
+	{ 0x52, 0x09 }, // GIP_CLKB_3
+	{ 0x53, 0x0a }, // GIP_CLKB_4
+	{ 0x54, 0x11 }, // GIP_CLKB_5
+	{ 0x55, 0xec }, // GIP_CLKB_6
+	{ 0x56, 0xed }, // GIP_CLKB_7
+	{ 0x57, 0x11 }, // GIP_CLKB_8
+	{ 0x58, 0xef }, // GIP_CLKB_9
+	{ 0x59, 0xf0 }, // GIP_CLKB_10
+	// Map internal GOA signals to GOA output pad
+	{ 0xb1, 0x01 }, // PANELD2U2
+	{ 0xb4, 0x15 }, // PANELD2U5
+	{ 0xb5, 0x16 }, // PANELD2U6
+	{ 0xb6, 0x09 }, // PANELD2U7
+	{ 0xb7, 0x0f }, // PANELD2U8
+	{ 0xb8, 0x0d }, // PANELD2U9
+	{ 0xb9, 0x0b }, // PANELD2U10
+	{ 0xba, 0x00 }, // PANELD2U11
+	{ 0xc7, 0x02 }, // PANELD2U24
+	{ 0xca, 0x17 }, // PANELD2U27
+	{ 0xcb, 0x18 }, // PANELD2U28
+	{ 0xcc, 0x0a }, // PANELD2U29
+	{ 0xcd, 0x10 }, // PANELD2U30
+	{ 0xce, 0x0e }, // PANELD2U31
+	{ 0xcf, 0x0c }, // PANELD2U32
+	{ 0xd0, 0x00 }, // PANELD2U33
+	// Map internal GOA signals to GOA output pad
+	{ 0x81, 0x00 }, // PANELU2D2
+	{ 0x84, 0x15 }, // PANELU2D5
+	{ 0x85, 0x16 }, // PANELU2D6
+	{ 0x86, 0x10 }, // PANELU2D7
+	{ 0x87, 0x0a }, // PANELU2D8
+	{ 0x88, 0x0c }, // PANELU2D9
+	{ 0x89, 0x0e }, // PANELU2D10
+	{ 0x8a, 0x02 }, // PANELU2D11
+	{ 0x97, 0x00 }, // PANELU2D24
+	{ 0x9a, 0x17 }, // PANELU2D27
+	{ 0x9b, 0x18 }, // PANELU2D28
+	{ 0x9c, 0x0f }, // PANELU2D29
+	{ 0x9d, 0x09 }, // PANELU2D30
+	{ 0x9e, 0x0b }, // PANELU2D31
+	{ 0x9f, 0x0d }, // PANELU2D32
+	{ 0xa0, 0x01 }, // PANELU2D33
+	// EXTC Command set enable, select page 2
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x02 },
+	// Unknown registers
+	{ 0x01, 0x01 },
+	{ 0x02, 0xda },
+	{ 0x03, 0xba },
+	{ 0x04, 0xa8 },
+	{ 0x05, 0x9a },
+	{ 0x06, 0x70 },
+	{ 0x07, 0xff },
+	{ 0x08, 0x91 },
+	{ 0x09, 0x90 },
+	{ 0x0a, 0xff },
+	{ 0x0b, 0x8f },
+	{ 0x0c, 0x60 },
+	{ 0x0d, 0x58 },
+	{ 0x0e, 0x48 },
+	{ 0x0f, 0x38 },
+	{ 0x10, 0x2b },
+	// EXTC Command set enable, select page 0
+	{ 0xff, 0x30 }, { 0xff, 0x52 }, { 0xff, 0x00 },
+	// Display Access Control
+	{ 0x36, 0x0a }, // bgr = 1, ss = 1, gs = 0
+};
+
 static inline struct nv3052c *to_nv3052c(struct drm_panel *panel)
 {
 	return container_of(panel, struct nv3052c, panel);
@@ -460,6 +655,21 @@ static const struct drm_display_mode ltk035c5444t_modes[] = {
 	},
 };
 
+static const struct drm_display_mode fs035vg158_modes[] = {
+	{ /* 60 Hz */
+		.clock = 21000,
+		.hdisplay = 640,
+		.hsync_start = 640 + 34,
+		.hsync_end = 640 + 34 + 4,
+		.htotal = 640 + 34 + 4 + 20,
+		.vdisplay = 480,
+		.vsync_start = 480 + 12,
+		.vsync_end = 480 + 12 + 4,
+		.vtotal = 480 + 12 + 4 + 6,
+		.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+};
+
 static const struct nv3052c_panel_info ltk035c5444t_panel_info = {
 	.display_modes = ltk035c5444t_modes,
 	.num_modes = ARRAY_SIZE(ltk035c5444t_modes),
@@ -471,14 +681,27 @@ static const struct nv3052c_panel_info ltk035c5444t_panel_info = {
 	.panel_regs_len = ARRAY_SIZE(ltk035c5444t_panel_regs),
 };
 
+static const struct nv3052c_panel_info fs035vg158_panel_info = {
+	.display_modes = fs035vg158_modes,
+	.num_modes = ARRAY_SIZE(fs035vg158_modes),
+	.width_mm = 70,
+	.height_mm = 53,
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.panel_regs = fs035vg158_panel_regs,
+	.panel_regs_len = ARRAY_SIZE(fs035vg158_panel_regs),
+};
+
 static const struct spi_device_id nv3052c_ids[] = {
 	{ "ltk035c5444t", },
+	{ "fs035vg158", },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(spi, nv3052c_ids);
 
 static const struct of_device_id nv3052c_of_match[] = {
 	{ .compatible = "leadtek,ltk035c5444t", .data = &ltk035c5444t_panel_info },
+	{ .compatible = "fascontek,fs035vg158", .data = &fs035vg158_panel_info },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, nv3052c_of_match);
-- 
GitLab


From 43cc1ce456b57ad48220393bbb7fac6e32369233 Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:53 +1100
Subject: [PATCH 0984/2340] dt-bindings: display: panel: Clean up
 leadtek,ltk035c5444t properties

Remove common properties listed in common yaml files.
Add required properties needed to describe the panel.

Signed-off-by: John Watts <contact@jookia.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-5-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-5-d75adc75571f@jookia.org
---
 .../bindings/display/panel/leadtek,ltk035c5444t.yaml      | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml
index ebdca5f5a0011..7a55961e1a3d3 100644
--- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml
+++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml
@@ -18,16 +18,12 @@ properties:
   compatible:
     const: leadtek,ltk035c5444t
 
-  backlight: true
-  port: true
-  power-supply: true
-  reg: true
-  reset-gpios: true
-
   spi-3wire: true
 
 required:
   - compatible
+  - reg
+  - port
   - power-supply
   - reset-gpios
 
-- 
GitLab


From 8fcb387a210cfc30a3b61abae21d5c8c4a55e470 Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:54 +1100
Subject: [PATCH 0985/2340] dt-bindings: vendor-prefixes: Add fascontek

Fascontek manufactures LCD panels such as the FS035VG158.

Signed-off-by: John Watts <contact@jookia.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-6-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-6-d75adc75571f@jookia.org
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 309b94c328c84..08ba7aad0d76e 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -474,6 +474,8 @@ patternProperties:
     description: Fairphone B.V.
   "^faraday,.*":
     description: Faraday Technology Corporation
+  "^fascontek,.*":
+    description: Fascontek
   "^fastrax,.*":
     description: Fastrax Oy
   "^fcs,.*":
-- 
GitLab


From 267624378ed6bebd733b4917452d78780db032dc Mon Sep 17 00:00:00 2001
From: John Watts <contact@jookia.org>
Date: Sun, 10 Dec 2023 17:55:55 +1100
Subject: [PATCH 0986/2340] dt-bindings: display: panel: add Fascontek
 FS035VG158 panel

This is a small 3.5" 640x480 IPS LCD panel.

Signed-off-by: John Watts <contact@jookia.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20231210-fs035vg158-v5-7-d75adc75571f@jookia.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231210-fs035vg158-v5-7-d75adc75571f@jookia.org
---
 .../display/panel/fascontek,fs035vg158.yaml   | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/display/panel/fascontek,fs035vg158.yaml

diff --git a/Documentation/devicetree/bindings/display/panel/fascontek,fs035vg158.yaml b/Documentation/devicetree/bindings/display/panel/fascontek,fs035vg158.yaml
new file mode 100644
index 0000000000000..d13c4bd26de46
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/fascontek,fs035vg158.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/fascontek,fs035vg158.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Fascontek FS035VG158 3.5" (640x480 pixels) 24-bit IPS LCD panel
+
+maintainers:
+  - John Watts <contact@jookia.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+  compatible:
+    const: fascontek,fs035vg158
+
+  spi-3wire: true
+
+required:
+  - compatible
+  - reg
+  - port
+  - power-supply
+  - reset-gpios
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "fascontek,fs035vg158";
+            reg = <0>;
+
+            spi-3wire;
+            spi-max-frequency = <3125000>;
+
+            reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>;
+
+            backlight = <&backlight>;
+            power-supply = <&vcc>;
+
+            port {
+                panel_input: endpoint {
+                    remote-endpoint = <&panel_output>;
+                };
+            };
+        };
+    };
-- 
GitLab


From 799825aa87200ade1ba21db853d1c2ff720dcfe0 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 8 Dec 2023 09:48:45 -0600
Subject: [PATCH 0987/2340] drm/panel: st7701: Fix AVCL calculation

The AVCL register, according to the datasheet, comes in increments
of -0.2v between -4.4v (represented by 0x0) to -5.0v (represented
by 0x3). The current calculation is done by adding the defined
AVCL value in mV to -4400 and then dividing by 200 to get the register
value. Unfortunately if I subtract -4400 from -4400 I get -8800, which
divided by 200 gives me -44. If I instead subtract -4400 from -4400
I get 0, which divided by 200 gives me 0. Based on the datasheet this
is the expected register value.

Fixes: 83b7a8e7e88e ("drm/panel/panel-sitronix-st7701: Parametrize voltage and timing")

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231208154847.130615-2-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208154847.130615-2-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-sitronix-st7701.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
index 0459965e1b4f7..036ac403ed213 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
@@ -288,7 +288,7 @@ static void st7701_init_sequence(struct st7701 *st7701)
 		   FIELD_PREP(DSI_CMD2_BK1_PWRCTRL2_AVDD_MASK,
 			      DIV_ROUND_CLOSEST(desc->avdd_mv - 6200, 200)) |
 		   FIELD_PREP(DSI_CMD2_BK1_PWRCTRL2_AVCL_MASK,
-			      DIV_ROUND_CLOSEST(-4400 + desc->avcl_mv, 200)));
+			      DIV_ROUND_CLOSEST(-4400 - desc->avcl_mv, 200)));
 
 	/* T2D = 0.2us * T2D[3:0] */
 	ST7701_DSI(st7701, DSI_CMD2_BK1_SPD1,
-- 
GitLab


From acbf9184a87d5d6868809baa3a6c7d0537d1f321 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 8 Dec 2023 09:48:46 -0600
Subject: [PATCH 0988/2340] dt-bindings: display: st7701: Add Anbernic RG-ARC
 panel

The RG-ARC panel is a panel specific to the Anbernic RG-ARC. It is 4
inches in size (diagonally) with a resolution of 480x640.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231208154847.130615-3-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208154847.130615-3-macroalpha82@gmail.com
---
 .../devicetree/bindings/display/panel/sitronix,st7701.yaml       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
index 4dc0cd4a6a772..b348f5bf0a980 100644
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
@@ -27,6 +27,7 @@ properties:
   compatible:
     items:
       - enum:
+          - anbernic,rg-arc-panel
           - densitron,dmt028vghmcmi-1a
           - elida,kd50t048a
           - techstar,ts8550b
-- 
GitLab


From a7890252c1a314654862944cf4733e4333b76e25 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Fri, 8 Dec 2023 09:48:47 -0600
Subject: [PATCH 0989/2340] drm/panel: st7701: Add Anbernic RG-ARC Panel
 Support

The Powkiddy RG-ARC is a series of 2 handheld devices, each with a 4
inch 480x640 display. Add support for the display.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231208154847.130615-4-macroalpha82@gmail.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208154847.130615-4-macroalpha82@gmail.com
---
 drivers/gpu/drm/panel/panel-sitronix-st7701.c | 136 ++++++++++++++++++
 1 file changed, 136 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
index 036ac403ed213..421eb4592b61a 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
@@ -423,6 +423,42 @@ static void kd50t048a_gip_sequence(struct st7701 *st7701)
 		   0xFF, 0xFF, 0xFF, 0xFF, 0x10, 0x45, 0x67, 0x98, 0xBA);
 }
 
+static void rg_arc_gip_sequence(struct st7701 *st7701)
+{
+	st7701_switch_cmd_bkx(st7701, true, 3);
+	ST7701_DSI(st7701, 0xEF, 0x08);
+	st7701_switch_cmd_bkx(st7701, true, 0);
+	ST7701_DSI(st7701, 0xC7, 0x04);
+	ST7701_DSI(st7701, 0xCC, 0x38);
+	st7701_switch_cmd_bkx(st7701, true, 1);
+	ST7701_DSI(st7701, 0xB9, 0x10);
+	ST7701_DSI(st7701, 0xBC, 0x03);
+	ST7701_DSI(st7701, 0xC0, 0x89);
+	ST7701_DSI(st7701, 0xE0, 0x00, 0x00, 0x02);
+	ST7701_DSI(st7701, 0xE1, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00,
+		   0x00, 0x00, 0x20, 0x20);
+	ST7701_DSI(st7701, 0xE2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		   0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xE3, 0x00, 0x00, 0x33, 0x00);
+	ST7701_DSI(st7701, 0xE4, 0x22, 0x00);
+	ST7701_DSI(st7701, 0xE5, 0x04, 0x5C, 0xA0, 0xA0, 0x06, 0x5C, 0xA0,
+		   0xA0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xE6, 0x00, 0x00, 0x33, 0x00);
+	ST7701_DSI(st7701, 0xE7, 0x22, 0x00);
+	ST7701_DSI(st7701, 0xE8, 0x05, 0x5C, 0xA0, 0xA0, 0x07, 0x5C, 0xA0,
+		   0xA0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xEB, 0x02, 0x00, 0x40, 0x40, 0x00, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xEC, 0x00, 0x00);
+	ST7701_DSI(st7701, 0xED, 0xFA, 0x45, 0x0B, 0xFF, 0xFF, 0xFF, 0xFF,
+		   0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xB0, 0x54, 0xAF);
+	ST7701_DSI(st7701, 0xEF, 0x08, 0x08, 0x08, 0x45, 0x3F, 0x54);
+	st7701_switch_cmd_bkx(st7701, false, 0);
+	ST7701_DSI(st7701, MIPI_DCS_SET_ADDRESS_MODE, 0x17);
+	ST7701_DSI(st7701, MIPI_DCS_SET_PIXEL_FORMAT, 0x77);
+	ST7701_DSI(st7701, MIPI_DCS_EXIT_SLEEP_MODE, 0x00);
+	msleep(120);
+}
+
 static int st7701_prepare(struct drm_panel *panel)
 {
 	struct st7701 *st7701 = panel_to_st7701(panel);
@@ -839,6 +875,105 @@ static const struct st7701_panel_desc kd50t048a_desc = {
 	.gip_sequence = kd50t048a_gip_sequence,
 };
 
+static const struct drm_display_mode rg_arc_mode = {
+	.clock          = 25600,
+
+	.hdisplay	= 480,
+	.hsync_start	= 480 + 60,
+	.hsync_end	= 480 + 60 + 42,
+	.htotal         = 480 + 60 + 42 + 60,
+
+	.vdisplay	= 640,
+	.vsync_start	= 640 + 10,
+	.vsync_end	= 640 + 10 + 4,
+	.vtotal         = 640 + 10 + 4 + 16,
+
+	.width_mm	= 63,
+	.height_mm	= 84,
+
+	.type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+};
+
+static const struct st7701_panel_desc rg_arc_desc = {
+	.mode = &rg_arc_mode,
+	.lanes = 2,
+	.format = MIPI_DSI_FMT_RGB888,
+	.panel_sleep_delay = 80,
+
+	.pv_gamma = {
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0x01) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC0_MASK, 0),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC4_MASK, 0x16),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC8_MASK, 0x1d),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC16_MASK, 0x0e),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC24_MASK, 0x12),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC52_MASK, 0x06),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC80_MASK, 0x0c),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC108_MASK, 0x0a),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC147_MASK, 0x09),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC175_MASK, 0x25),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC203_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC231_MASK, 0x03),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC239_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC247_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC251_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC255_MASK, 0x1c)
+	},
+	.nv_gamma = {
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0x01) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC0_MASK, 0),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC4_MASK, 0x16),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC8_MASK, 0x1e),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC16_MASK, 0x0e),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC24_MASK, 0x11),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC52_MASK, 0x06),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC80_MASK, 0x0c),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC108_MASK, 0x08),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC147_MASK, 0x09),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC175_MASK, 0x26),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC203_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC231_MASK, 0x15),
+
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC239_MASK, 0x00),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC247_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC251_MASK, 0x3f),
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_AJ_MASK, 0) |
+		CFIELD_PREP(DSI_CMD2_BK0_GAMCTRL_VC255_MASK, 0x1c)
+	},
+	.nlinv = 0,
+	.vop_uv = 4500000,
+	.vcom_uv = 762500,
+	.vgh_mv = 15000,
+	.vgl_mv = -9510,
+	.avdd_mv = 6600,
+	.avcl_mv = -4400,
+	.gamma_op_bias = OP_BIAS_MIDDLE,
+	.input_op_bias = OP_BIAS_MIN,
+	.output_op_bias = OP_BIAS_MIN,
+	.t2d_ns = 1600,
+	.t3d_ns = 10400,
+	.eot_en = true,
+	.gip_sequence = rg_arc_gip_sequence,
+};
+
 static int st7701_dsi_probe(struct mipi_dsi_device *dsi)
 {
 	const struct st7701_panel_desc *desc;
@@ -917,6 +1052,7 @@ static void st7701_dsi_remove(struct mipi_dsi_device *dsi)
 }
 
 static const struct of_device_id st7701_of_match[] = {
+	{ .compatible = "anbernic,rg-arc-panel", .data = &rg_arc_desc },
 	{ .compatible = "densitron,dmt028vghmcmi-1a", .data = &dmt028vghmcmi_1a_desc },
 	{ .compatible = "elida,kd50t048a", .data = &kd50t048a_desc },
 	{ .compatible = "techstar,ts8550b", .data = &ts8550b_desc },
-- 
GitLab


From 2e87bad7cd339882cf26b7101a1c87dab71962c9 Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Thu, 7 Dec 2023 15:16:35 +0100
Subject: [PATCH 0990/2340] drm/panel: Add Synaptics R63353 panel driver

The LS068B3SX02 panel is based on the Synaptics R63353 Controller.
Add a driver for it.

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231207141723.108004-7-dario.binacchi@amarulasolutions.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207141723.108004-7-dario.binacchi@amarulasolutions.com
---
 MAINTAINERS                                   |   6 +
 drivers/gpu/drm/panel/Kconfig                 |   9 +
 drivers/gpu/drm/panel/Makefile                |   1 +
 .../gpu/drm/panel/panel-synaptics-r63353.c    | 363 ++++++++++++++++++
 4 files changed, 379 insertions(+)
 create mode 100644 drivers/gpu/drm/panel/panel-synaptics-r63353.c

diff --git a/MAINTAINERS b/MAINTAINERS
index d4b46b3db022f..3a5ac0c7cc0ec 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6873,6 +6873,12 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/ste,mcde.yaml
 F:	drivers/gpu/drm/mcde/
 
+DRM DRIVER FOR SYNAPTICS R63353 PANELS
+M:	Michael Trimarchi <michael@amarulasolutions.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/display/panel/synaptics,r63353.yaml
+F:	drivers/gpu/drm/panel/panel-synaptics-r63353.c
+
 DRM DRIVER FOR TI DLPC3433 MIPI DSI TO DMD BRIDGE
 M:	Jagan Teki <jagan@amarulasolutions.com>
 S:	Maintained
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 99e14dc212ecb..d018702be3dc5 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -735,6 +735,15 @@ config DRM_PANEL_SITRONIX_ST7789V
 	  Say Y here if you want to enable support for the Sitronix
 	  ST7789V controller for 240x320 LCD panels
 
+config DRM_PANEL_SYNAPTICS_R63353
+	tristate "Synaptics R63353-based panels"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y if you want to enable support for panels based on the
+	  Synaptics R63353 controller.
+
 config DRM_PANEL_SONY_ACX565AKM
 	tristate "Sony ACX565AKM panel"
 	depends on GPIOLIB && OF && SPI
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index d10c3de51c6db..f267d932c2b5a 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_DRM_PANEL_SHARP_LS060T1SX01) += panel-sharp-ls060t1sx01.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7701) += panel-sitronix-st7701.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7703) += panel-sitronix-st7703.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7789V) += panel-sitronix-st7789v.o
+obj-$(CONFIG_DRM_PANEL_SYNAPTICS_R63353) += panel-synaptics-r63353.o
 obj-$(CONFIG_DRM_PANEL_SONY_ACX565AKM) += panel-sony-acx565akm.o
 obj-$(CONFIG_DRM_PANEL_SONY_TD4353_JDI) += panel-sony-td4353-jdi.o
 obj-$(CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521) += panel-sony-tulip-truly-nt35521.o
diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
new file mode 100644
index 0000000000000..3f61fcdc550bb
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synaptics R63353 Controller driver
+ *
+ * Copyright (C) 2020 BSH Hausgerate GmbH
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/media-bus-format.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+#define R63353_INSTR(...) { \
+		.len = sizeof((u8[]) {__VA_ARGS__}), \
+		.data = (u8[]){__VA_ARGS__} \
+	}
+
+struct r63353_instr {
+	size_t len;
+	const u8 *data;
+};
+
+static const struct r63353_instr sharp_ls068b3sx02_init[] = {
+	R63353_INSTR(0x51, 0xff),
+	R63353_INSTR(0x53, 0x0c),
+	R63353_INSTR(0x55, 0x00),
+	R63353_INSTR(0x84, 0x00),
+	R63353_INSTR(0x29),
+};
+
+struct r63353_desc {
+	const char *name;
+	const struct r63353_instr *init;
+	const size_t init_length;
+	const struct drm_display_mode *mode;
+	u32 width_mm;
+	u32 height_mm;
+};
+
+struct r63353_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	struct gpio_desc *reset_gpio;
+	struct regulator *dvdd;
+	struct regulator *avdd;
+
+	struct r63353_desc *pdata;
+};
+
+static inline struct r63353_panel *to_r63353_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct r63353_panel, base);
+}
+
+static int r63353_panel_power_on(struct r63353_panel *rpanel)
+{
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = regulator_enable(rpanel->avdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable avdd regulator (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(15000, 25000);
+
+	ret = regulator_enable(rpanel->dvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable dvdd regulator (%d)\n", ret);
+		regulator_disable(rpanel->avdd);
+		return ret;
+	}
+
+	usleep_range(300000, 350000);
+	gpiod_set_value(rpanel->reset_gpio, 1);
+	usleep_range(15000, 25000);
+
+	return 0;
+}
+
+static int r63353_panel_power_off(struct r63353_panel *rpanel)
+{
+	gpiod_set_value(rpanel->reset_gpio, 0);
+	regulator_disable(rpanel->dvdd);
+	regulator_disable(rpanel->avdd);
+
+	return 0;
+}
+
+static int r63353_panel_activate(struct r63353_panel *rpanel)
+{
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int i, ret;
+
+	ret = mipi_dsi_dcs_soft_reset(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to do Software Reset (%d)\n", ret);
+		goto fail;
+	}
+
+	usleep_range(15000, 17000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode (%d)\n", ret);
+		goto fail;
+	}
+
+	for (i = 0; i < rpanel->pdata->init_length; i++) {
+		const struct r63353_instr *instr = &rpanel->pdata->init[i];
+
+		ret = mipi_dsi_dcs_write_buffer(dsi, instr->data, instr->len);
+		if (ret < 0)
+			goto fail;
+	}
+
+	msleep(120);
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to exit sleep mode (%d)\n", ret);
+		goto fail;
+	}
+
+	usleep_range(5000, 10000);
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display ON (%d)\n", ret);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	gpiod_set_value(rpanel->reset_gpio, 0);
+
+	return ret;
+}
+
+static int r63353_panel_prepare(struct drm_panel *panel)
+{
+	struct r63353_panel *rpanel = to_r63353_panel(panel);
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	dev_dbg(dev, "Preparing\n");
+
+	ret = r63353_panel_power_on(rpanel);
+	if (ret)
+		return ret;
+
+	ret = r63353_panel_activate(rpanel);
+	if (ret) {
+		r63353_panel_power_off(rpanel);
+		return ret;
+	}
+
+	dev_dbg(dev, "Prepared\n");
+	return 0;
+}
+
+static int r63353_panel_deactivate(struct r63353_panel *rpanel)
+{
+	struct mipi_dsi_device *dsi = rpanel->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display OFF (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(5000, 10000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int r63353_panel_unprepare(struct drm_panel *panel)
+{
+	struct r63353_panel *rpanel = to_r63353_panel(panel);
+
+	r63353_panel_deactivate(rpanel);
+	r63353_panel_power_off(rpanel);
+
+	return 0;
+}
+
+static const struct drm_display_mode sharp_ls068b3sx02_timing = {
+	.clock = 70000,
+	.hdisplay = 640,
+	.hsync_start = 640 + 35,
+	.hsync_end = 640 + 35 + 2,
+	.htotal = 640 + 35 + 2 + 150,
+	.vdisplay = 1280,
+	.vsync_start = 1280 + 2,
+	.vsync_end = 1280 + 2 + 4,
+	.vtotal = 1280 + 2 + 4 + 0,
+};
+
+static int r63353_panel_get_modes(struct drm_panel *panel,
+				  struct drm_connector *connector)
+{
+	struct r63353_panel *rpanel = to_r63353_panel(panel);
+	struct drm_display_mode *mode;
+	static const u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+
+	mode = drm_mode_duplicate(connector->dev, rpanel->pdata->mode);
+	if (!mode)
+		return -ENOMEM;
+
+	drm_mode_set_name(mode);
+	drm_mode_probed_add(connector, mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	connector->display_info.width_mm = rpanel->pdata->width_mm;
+	connector->display_info.height_mm = rpanel->pdata->height_mm;
+
+	drm_display_info_set_bus_formats(&connector->display_info,
+					 &bus_format, 1);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs r63353_panel_funcs = {
+	.prepare = r63353_panel_prepare,
+	.unprepare = r63353_panel_unprepare,
+	.get_modes = r63353_panel_get_modes,
+};
+
+static int r63353_panel_probe(struct mipi_dsi_device *dsi)
+{
+	int ret = 0;
+	struct device *dev = &dsi->dev;
+	struct r63353_panel *panel;
+
+	panel = devm_kzalloc(&dsi->dev, sizeof(*panel), GFP_KERNEL);
+	if (!panel)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, panel);
+	panel->dsi = dsi;
+	panel->pdata = (struct r63353_desc *)of_device_get_match_data(dev);
+
+	dev_info(dev, "Panel %s\n", panel->pdata->name);
+
+	dsi->lanes = 2;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM |
+			  MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET;
+
+	panel->dvdd = devm_regulator_get(dev, "dvdd");
+	if (IS_ERR(panel->dvdd))
+		return PTR_ERR(panel->dvdd);
+	panel->avdd = devm_regulator_get(dev, "avdd");
+	if (IS_ERR(panel->avdd))
+		return PTR_ERR(panel->avdd);
+
+	panel->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(panel->reset_gpio)) {
+		dev_err(dev, "failed to get RESET GPIO\n");
+		return PTR_ERR(panel->reset_gpio);
+	}
+
+	drm_panel_init(&panel->base, dev, &r63353_panel_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	panel->base.prepare_prev_first = true;
+	ret = drm_panel_of_backlight(&panel->base);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&panel->base);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "mipi_dsi_attach failed: %d\n", ret);
+		drm_panel_remove(&panel->base);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void r63353_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi);
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(dev, "Failed to detach from host (%d)\n", ret);
+
+	drm_panel_remove(&rpanel->base);
+}
+
+static void r63353_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct r63353_panel *rpanel = mipi_dsi_get_drvdata(dsi);
+
+	r63353_panel_unprepare(&rpanel->base);
+}
+
+static const struct r63353_desc sharp_ls068b3sx02_data = {
+	.name = "Sharp LS068B3SX02",
+	.mode = &sharp_ls068b3sx02_timing,
+	.init = sharp_ls068b3sx02_init,
+	.init_length = ARRAY_SIZE(sharp_ls068b3sx02_init),
+	.width_mm = 68,
+	.height_mm = 159,
+};
+
+static const struct of_device_id r63353_of_match[] = {
+	{ .compatible = "sharp,ls068b3sx02", .data = &sharp_ls068b3sx02_data },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, r63353_of_match);
+
+static struct mipi_dsi_driver r63353_panel_driver = {
+	.driver = {
+		   .name = "r63353-dsi",
+		   .of_match_table = r63353_of_match,
+	},
+	.probe = r63353_panel_probe,
+	.remove = r63353_panel_remove,
+	.shutdown = r63353_panel_shutdown,
+};
+
+module_mipi_dsi_driver(r63353_panel_driver);
+
+MODULE_AUTHOR("Matthias Proske <Matthias.Proske@bshg.com>");
+MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>");
+MODULE_DESCRIPTION("Synaptics R63353 Controller Driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 549240c98e50207244bc1ac182622b8daba89a89 Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Thu, 7 Dec 2023 15:16:36 +0100
Subject: [PATCH 0991/2340] dt-bindings: display: panel: Add Ilitek ili9805
 panel controller

Add documentation for "ilitek,ili9805" panel.

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20231207141723.108004-8-dario.binacchi@amarulasolutions.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207141723.108004-8-dario.binacchi@amarulasolutions.com
---
 .../display/panel/ilitek,ili9805.yaml         | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml

diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml
new file mode 100644
index 0000000000000..f4f91f93f4903
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/ilitek,ili9805.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ilitek ILI9805 based MIPI-DSI panels
+
+maintainers:
+  - Michael Trimarchi <michael@amarulasolutions.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - giantplus,gpm1790a0
+          - tianma,tm041xdhg01
+      - const: ilitek,ili9805
+
+  avdd-supply: true
+  dvdd-supply: true
+  reg: true
+
+required:
+  - compatible
+  - avdd-supply
+  - dvdd-supply
+  - reg
+  - reset-gpios
+  - port
+  - backlight
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "giantplus,gpm1790a0", "ilitek,ili9805";
+            reg = <0>;
+            avdd-supply = <&avdd_display>;
+            dvdd-supply = <&dvdd_display>;
+            reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
+            backlight = <&backlight>;
+
+            port {
+                panel_in: endpoint {
+                    remote-endpoint = <&mipi_dsi_out>;
+                };
+            };
+        };
+    };
+
+...
-- 
GitLab


From edbf1d506ebe8c0857c406bd5d5b81d46ffd8437 Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Thu, 7 Dec 2023 15:16:37 +0100
Subject: [PATCH 0992/2340] drm/panel: Add Ilitek ILI9805 panel driver

The GPM1790A0 panel is based on the Ilitek ILI9805 Controller.
Add a driver for it.

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20231207141723.108004-9-dario.binacchi@amarulasolutions.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207141723.108004-9-dario.binacchi@amarulasolutions.com
---
 MAINTAINERS                                  |   6 +
 drivers/gpu/drm/panel/Kconfig                |   9 +
 drivers/gpu/drm/panel/Makefile               |   1 +
 drivers/gpu/drm/panel/panel-ilitek-ili9805.c | 353 +++++++++++++++++++
 4 files changed, 369 insertions(+)
 create mode 100644 drivers/gpu/drm/panel/panel-ilitek-ili9805.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 3a5ac0c7cc0ec..98ffa3ed90a0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6645,6 +6645,12 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/ilitek,ili9486.yaml
 F:	drivers/gpu/drm/tiny/ili9486.c
 
+DRM DRIVER FOR ILITEK ILI9805 PANELS
+M:	Michael Trimarchi <michael@amarulasolutions.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml
+F:	drivers/gpu/drm/panel/panel-ilitek-ili9805.c
+
 DRM DRIVER FOR JADARD JD9365DA-H3 MIPI-DSI LCD PANELS
 M:	Jagan Teki <jagan@edgeble.ai>
 S:	Maintained
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index d018702be3dc5..dad938cf6decf 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -194,6 +194,15 @@ config DRM_PANEL_ILITEK_ILI9341
 	  QVGA (240x320) RGB panels. support serial & parallel rgb
 	  interface.
 
+config DRM_PANEL_ILITEK_ILI9805
+	tristate "Ilitek ILI9805-based panels"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y if you want to enable support for panels based on the
+	  Ilitek ILI9805 controller.
+
 config DRM_PANEL_ILITEK_ILI9881C
 	tristate "Ilitek ILI9881C-based panels"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index f267d932c2b5a..d94a644d0a6ce 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D) += panel-feiyang-fy07024di26a30d
 obj-$(CONFIG_DRM_PANEL_HIMAX_HX8394) += panel-himax-hx8394.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9341) += panel-ilitek-ili9341.o
+obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9805) += panel-ilitek-ili9805.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9882T) += panel-ilitek-ili9882t.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_EJ030NA) += panel-innolux-ej030na.o
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
new file mode 100644
index 0000000000000..e36984b46e14e
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 BSH Hausgerate GmbH
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+#define ILI9805_EXTCMD_CMD_SET_ENABLE_REG	(0xff)
+#define ILI9805_SETEXTC_PARAMETER1		(0xff)
+#define ILI9805_SETEXTC_PARAMETER2		(0x98)
+#define ILI9805_SETEXTC_PARAMETER3		(0x05)
+
+#define ILI9805_INSTR(_delay, ...) { \
+		.delay = (_delay), \
+		.len = sizeof((u8[]) {__VA_ARGS__}), \
+		.data = (u8[]){__VA_ARGS__} \
+	}
+
+struct ili9805_instr {
+	size_t len;
+	const u8 *data;
+	u32 delay;
+};
+
+struct ili9805_desc {
+	const char *name;
+	const struct ili9805_instr *init;
+	const size_t init_length;
+	const struct drm_display_mode *mode;
+	u32 width_mm;
+	u32 height_mm;
+};
+
+struct ili9805 {
+	struct drm_panel	panel;
+	struct mipi_dsi_device	*dsi;
+	const struct ili9805_desc	*desc;
+
+	struct regulator	*dvdd;
+	struct regulator	*avdd;
+	struct gpio_desc	*reset_gpio;
+};
+
+static const struct ili9805_instr gpm1780a0_init[] = {
+	ILI9805_INSTR(100, ILI9805_EXTCMD_CMD_SET_ENABLE_REG, ILI9805_SETEXTC_PARAMETER1,
+		      ILI9805_SETEXTC_PARAMETER2, ILI9805_SETEXTC_PARAMETER3),
+	ILI9805_INSTR(100, 0xFD, 0x0F, 0x10, 0x44, 0x00),
+	ILI9805_INSTR(0, 0xf8, 0x18, 0x02, 0x02, 0x18, 0x02, 0x02, 0x30, 0x00,
+		      0x00, 0x30, 0x00, 0x00, 0x30, 0x00, 0x00),
+	ILI9805_INSTR(0, 0xB8, 0x62),
+	ILI9805_INSTR(0, 0xF1, 0x00),
+	ILI9805_INSTR(0, 0xF2, 0x00, 0x58, 0x40),
+	ILI9805_INSTR(0, 0xF3, 0x60, 0x83, 0x04),
+	ILI9805_INSTR(0, 0xFC, 0x04, 0x0F, 0x01),
+	ILI9805_INSTR(0, 0xEB, 0x08, 0x0F),
+	ILI9805_INSTR(0, 0xe0, 0x00, 0x08, 0x0d, 0x0e, 0x0e, 0x0d, 0x0a, 0x08, 0x04,
+		      0x08, 0x0d, 0x0f, 0x0b, 0x1c, 0x14, 0x0a),
+	ILI9805_INSTR(0, 0xe1, 0x00, 0x08, 0x0d, 0x0e, 0x0e, 0x0d, 0x0a, 0x08, 0x04,
+		      0x08, 0x0d, 0x0f, 0x0b, 0x1c, 0x14, 0x0a),
+	ILI9805_INSTR(10, 0xc1, 0x13, 0x39, 0x19, 0x06),
+	ILI9805_INSTR(10, 0xc7, 0xe5),
+	ILI9805_INSTR(10, 0xB1, 0x00, 0x12, 0x14),
+	ILI9805_INSTR(10, 0xB4, 0x02),
+	ILI9805_INSTR(0, 0xBB, 0x14, 0x55),
+	ILI9805_INSTR(0, MIPI_DCS_SET_ADDRESS_MODE, 0x08),
+	ILI9805_INSTR(0, MIPI_DCS_SET_PIXEL_FORMAT, 0x77),
+	ILI9805_INSTR(0, 0x20),
+	ILI9805_INSTR(0, 0xB0, 0x01),
+	ILI9805_INSTR(0, 0xB6, 0x31, 0x00, 0xef),
+	ILI9805_INSTR(0, 0xDF, 0x23),
+	ILI9805_INSTR(0, 0xB9, 0x02, 0x00),
+};
+
+static inline struct ili9805 *panel_to_ili9805(struct drm_panel *panel)
+{
+	return container_of(panel, struct ili9805, panel);
+}
+
+static int ili9805_power_on(struct ili9805 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = regulator_enable(ctx->avdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable avdd regulator (%d)\n", ret);
+		return ret;
+	}
+
+	ret = regulator_enable(ctx->dvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable dvdd regulator (%d)\n", ret);
+		regulator_disable(ctx->avdd);
+		return ret;
+	}
+
+	gpiod_set_value(ctx->reset_gpio, 0);
+	usleep_range(5000, 10000);
+	gpiod_set_value(ctx->reset_gpio, 1);
+	msleep(120);
+
+	return 0;
+}
+
+static int ili9805_power_off(struct ili9805 *ctx)
+{
+	gpiod_set_value(ctx->reset_gpio, 0);
+	regulator_disable(ctx->dvdd);
+	regulator_disable(ctx->avdd);
+
+	return 0;
+}
+
+static int ili9805_activate(struct ili9805 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int i, ret;
+
+	for (i = 0; i < ctx->desc->init_length; i++) {
+		const struct ili9805_instr *instr = &ctx->desc->init[i];
+
+		ret = mipi_dsi_dcs_write_buffer(ctx->dsi, instr->data, instr->len);
+		if (ret < 0)
+			return ret;
+
+		if (instr->delay > 0)
+			msleep(instr->delay);
+	}
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(ctx->dsi);
+	if (ret) {
+		dev_err(dev, "Failed to exit sleep mode (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(5000, 6000);
+
+	ret = mipi_dsi_dcs_set_display_on(ctx->dsi);
+	if (ret) {
+		dev_err(dev, "Failed to set display ON (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9805_prepare(struct drm_panel *panel)
+{
+	struct ili9805 *ctx = panel_to_ili9805(panel);
+	int ret;
+
+	ret = ili9805_power_on(ctx);
+	if (ret)
+		return ret;
+
+	ret = ili9805_activate(ctx);
+	if (ret) {
+		ili9805_power_off(ctx);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9805_deactivate(struct ili9805 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(ctx->dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display OFF (%d)\n", ret);
+		return ret;
+	}
+
+	usleep_range(5000, 10000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(ctx->dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ili9805_unprepare(struct drm_panel *panel)
+{
+	struct ili9805 *ctx = panel_to_ili9805(panel);
+
+	ili9805_deactivate(ctx);
+	ili9805_power_off(ctx);
+
+	return 0;
+}
+
+static const struct drm_display_mode gpm1780a0_timing = {
+	.clock = 26227,
+
+	.hdisplay = 480,
+	.hsync_start = 480 + 10,
+	.hsync_end = 480 + 10 + 2,
+	.htotal = 480 + 10 + 2 + 36,
+
+	.vdisplay = 480,
+	.vsync_start = 480 + 2,
+	.vsync_end = 480 + 10 + 4,
+	.vtotal = 480 + 2 + 4 + 10,
+};
+
+static int ili9805_get_modes(struct drm_panel *panel,
+			      struct drm_connector *connector)
+{
+	struct ili9805 *ctx = panel_to_ili9805(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, ctx->desc->mode);
+	if (!mode) {
+		dev_err(&ctx->dsi->dev, "failed to add mode %ux%ux@%u\n",
+			ctx->desc->mode->hdisplay,
+			ctx->desc->mode->vdisplay,
+			drm_mode_vrefresh(ctx->desc->mode));
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs ili9805_funcs = {
+	.prepare	= ili9805_prepare,
+	.unprepare	= ili9805_unprepare,
+	.get_modes	= ili9805_get_modes,
+};
+
+static int ili9805_dsi_probe(struct mipi_dsi_device *dsi)
+{
+	struct ili9805 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	mipi_dsi_set_drvdata(dsi, ctx);
+	ctx->dsi = dsi;
+	ctx->desc = of_device_get_match_data(&dsi->dev);
+
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO |
+		MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM |
+		MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET;
+	dsi->lanes = 2;
+
+	drm_panel_init(&ctx->panel, &dsi->dev, &ili9805_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ctx->dvdd = devm_regulator_get(&dsi->dev, "dvdd");
+	if (IS_ERR(ctx->dvdd))
+		return PTR_ERR(ctx->dvdd);
+	ctx->avdd = devm_regulator_get(&dsi->dev, "avdd");
+	if (IS_ERR(ctx->avdd))
+		return PTR_ERR(ctx->avdd);
+
+	ctx->reset_gpio = devm_gpiod_get(&dsi->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		dev_err(&dsi->dev, "Couldn't get our reset GPIO\n");
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	ctx->panel.prepare_prev_first = true;
+	ret = drm_panel_of_backlight(&ctx->panel);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(&dsi->dev, "mipi_dsi_attach failed: %d\n", ret);
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void ili9805_dsi_remove(struct mipi_dsi_device *dsi)
+{
+	struct ili9805 *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to detach from DSI host: %d\n",
+			ret);
+
+	drm_panel_remove(&ctx->panel);
+}
+
+static const struct ili9805_desc gpm1780a0_desc = {
+	.init = gpm1780a0_init,
+	.init_length = ARRAY_SIZE(gpm1780a0_init),
+	.mode = &gpm1780a0_timing,
+	.width_mm = 65,
+	.height_mm = 65,
+};
+
+static const struct of_device_id ili9805_of_match[] = {
+	{ .compatible = "giantplus,gpm1790a0", .data = &gpm1780a0_desc },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ili9805_of_match);
+
+static struct mipi_dsi_driver ili9805_dsi_driver = {
+	.probe		= ili9805_dsi_probe,
+	.remove		= ili9805_dsi_remove,
+	.driver = {
+		.name		= "ili9805-dsi",
+		.of_match_table	= ili9805_of_match,
+	},
+};
+module_mipi_dsi_driver(ili9805_dsi_driver);
+
+MODULE_AUTHOR("Matthias Proske <Matthias.Proske@bshg.com>");
+MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>");
+MODULE_DESCRIPTION("Ilitek ILI9805 Controller Driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From b1fcb7ee3707290466b2cc4956325fb91f09f13b Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Thu, 7 Dec 2023 15:16:38 +0100
Subject: [PATCH 0993/2340] drm/panel: ilitek-ili9805: add support for Tianma
 TM041XDHG01 panel

Tianma TM041XDHG01 utilizes the Ilitek ILI9805 controller.

Add this panel's initialzation sequence and timing to ILI9805 driver.

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>

Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Link: https://lore.kernel.org/r/20231207141723.108004-10-dario.binacchi@amarulasolutions.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207141723.108004-10-dario.binacchi@amarulasolutions.com
---
 drivers/gpu/drm/panel/panel-ilitek-ili9805.c | 53 ++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
index e36984b46e14e..5054d1a2b2f54 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
@@ -87,6 +87,36 @@ static const struct ili9805_instr gpm1780a0_init[] = {
 	ILI9805_INSTR(0, 0xB9, 0x02, 0x00),
 };
 
+static const struct ili9805_instr tm041xdhg01_init[] = {
+	ILI9805_INSTR(100, ILI9805_EXTCMD_CMD_SET_ENABLE_REG, ILI9805_SETEXTC_PARAMETER1,
+		      ILI9805_SETEXTC_PARAMETER2, ILI9805_SETEXTC_PARAMETER3),
+	ILI9805_INSTR(100, 0xFD, 0x0F, 0x13, 0x44, 0x00),
+	ILI9805_INSTR(0, 0xf8, 0x18, 0x02, 0x02, 0x18, 0x02, 0x02, 0x30, 0x01,
+		      0x01, 0x30, 0x01, 0x01, 0x30, 0x01, 0x01),
+	ILI9805_INSTR(0, 0xB8, 0x74),
+	ILI9805_INSTR(0, 0xF1, 0x00),
+	ILI9805_INSTR(0, 0xF2, 0x00, 0x58, 0x40),
+	ILI9805_INSTR(0, 0xFC, 0x04, 0x0F, 0x01),
+	ILI9805_INSTR(0, 0xEB, 0x08, 0x0F),
+	ILI9805_INSTR(0, 0xe0, 0x01, 0x0d, 0x15, 0x0e, 0x0f, 0x0f, 0x0b, 0x08, 0x04,
+		      0x07, 0x0a, 0x0d, 0x0c, 0x15, 0x0f, 0x08),
+	ILI9805_INSTR(0, 0xe1, 0x01, 0x0d, 0x15, 0x0e, 0x0f, 0x0f, 0x0b, 0x08, 0x04,
+		      0x07, 0x0a, 0x0d, 0x0c, 0x15, 0x0f, 0x08),
+	ILI9805_INSTR(10, 0xc1, 0x15, 0x03, 0x03, 0x31),
+	ILI9805_INSTR(10, 0xB1, 0x00, 0x12, 0x14),
+	ILI9805_INSTR(10, 0xB4, 0x02),
+	ILI9805_INSTR(0, 0xBB, 0x14, 0x55),
+	ILI9805_INSTR(0, MIPI_DCS_SET_ADDRESS_MODE, 0x0a),
+	ILI9805_INSTR(0, MIPI_DCS_SET_PIXEL_FORMAT, 0x77),
+	ILI9805_INSTR(0, 0x20),
+	ILI9805_INSTR(0, 0xB0, 0x00),
+	ILI9805_INSTR(0, 0xB6, 0x01),
+	ILI9805_INSTR(0, 0xc2, 0x11),
+	ILI9805_INSTR(0, 0x51, 0xFF),
+	ILI9805_INSTR(0, 0x53, 0x24),
+	ILI9805_INSTR(0, 0x55, 0x00),
+};
+
 static inline struct ili9805 *panel_to_ili9805(struct drm_panel *panel)
 {
 	return container_of(panel, struct ili9805, panel);
@@ -227,6 +257,20 @@ static const struct drm_display_mode gpm1780a0_timing = {
 	.vtotal = 480 + 2 + 4 + 10,
 };
 
+static const struct drm_display_mode tm041xdhg01_timing = {
+	.clock = 26227,
+
+	.hdisplay = 480,
+	.hsync_start = 480 + 10,
+	.hsync_end = 480 + 10 + 2,
+	.htotal = 480 + 10 + 2 + 36,
+
+	.vdisplay = 768,
+	.vsync_start = 768 + 2,
+	.vsync_end = 768 + 10 + 4,
+	.vtotal = 768 + 2 + 4 + 10,
+};
+
 static int ili9805_get_modes(struct drm_panel *panel,
 			      struct drm_connector *connector)
 {
@@ -331,8 +375,17 @@ static const struct ili9805_desc gpm1780a0_desc = {
 	.height_mm = 65,
 };
 
+static const struct ili9805_desc tm041xdhg01_desc = {
+	.init = tm041xdhg01_init,
+	.init_length = ARRAY_SIZE(tm041xdhg01_init),
+	.mode = &tm041xdhg01_timing,
+	.width_mm = 42,
+	.height_mm = 96,
+};
+
 static const struct of_device_id ili9805_of_match[] = {
 	{ .compatible = "giantplus,gpm1790a0", .data = &gpm1780a0_desc },
+	{ .compatible = "tianma,tm041xdhg01", .data = &tm041xdhg01_desc },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, ili9805_of_match);
-- 
GitLab


From bf7f730dea3125b2272ad7268f59e8992c5b7822 Mon Sep 17 00:00:00 2001
From: Elmar Albert <ealbert@data-modul.com>
Date: Sat, 9 Dec 2023 07:36:59 +0100
Subject: [PATCH 0994/2340] dt-bindings: display: simple: Add AUO G156HAN04.0
 LVDS display

Document support for the AUO G156HAN04.0 LVDS display.

G156HAN04.0 is a Color Active Matrix Liquid Crystal Display composed of
a TFT LCD panel, a driver circuit, and LED backlight system. The screen
format is intended to support the 16:9 FHD, 1920(H) x 1080(V) screen
and 16.7M colors (RGB 8-bits) with LED backlight driving circuit.
All input signals are LVDS interface compatible.

G156HAN04.0 is designed for a display unit of notebook style
personal computer and industrial machine.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Elmar Albert <ealbert@data-modul.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/20231209063714.1381913-1-marex@denx.de
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231209063714.1381913-1-marex@denx.de
---
 .../bindings/display/panel/panel-simple-lvds-dual-ports.yaml    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml
index a5a596ff8e752..716ece5f39784 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml
@@ -33,6 +33,8 @@ properties:
 
         # AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel
       - auo,g133han01
+        # AU Optronics Corporation 15.6" FHD (1920x1080) TFT LCD panel
+      - auo,g156han04
         # AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel
       - auo,g185han01
         # AU Optronics Corporation 19.0" (1280x1024) TFT LCD panel
-- 
GitLab


From 9e52d5c808215b0033cdbeca72700b1e401ea987 Mon Sep 17 00:00:00 2001
From: Elmar Albert <ealbert@data-modul.com>
Date: Sat, 9 Dec 2023 07:37:00 +0100
Subject: [PATCH 0995/2340] drm/panel: simple: Add AUO G156HAN04.0 LVDS display
 support

G156HAN04.0 is a Color Active Matrix Liquid Crystal Display composed of
a TFT LCD panel, a driver circuit, and LED backlight system. The screen
format is intended to support the 16:9 FHD, 1920(H) x 1080(V) screen
and 16.7M colors (RGB 8-bits) with LED backlight driving circuit.
All input signals are LVDS interface compatible.

G156HAN04.0 is designed for a display unit of notebook style
personal computer and industrial machine.

Signed-off-by: Elmar Albert <ealbert@data-modul.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/20231209063714.1381913-2-marex@denx.de
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231209063714.1381913-2-marex@denx.de
---
 drivers/gpu/drm/panel/panel-simple.c | 34 ++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 8017ad33cf18d..2214cb09678cd 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1134,6 +1134,37 @@ static const struct panel_desc auo_g133han01 = {
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
+static const struct display_timing auo_g156han04_timings = {
+	.pixelclock = { 137000000, 141000000, 146000000 },
+	.hactive = { 1920, 1920, 1920 },
+	.hfront_porch = { 60, 60, 60 },
+	.hback_porch = { 90, 92, 111 },
+	.hsync_len =  { 32, 32, 32 },
+	.vactive = { 1080, 1080, 1080 },
+	.vfront_porch = { 12, 12, 12 },
+	.vback_porch = { 24, 36, 56 },
+	.vsync_len = { 8, 8, 8 },
+};
+
+static const struct panel_desc auo_g156han04 = {
+	.timings = &auo_g156han04_timings,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 344,
+		.height = 194,
+	},
+	.delay = {
+		.prepare = 50,		/* T2 */
+		.enable = 200,		/* T3 */
+		.disable = 110,		/* T10 */
+		.unprepare = 1000,	/* T13 */
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct drm_display_mode auo_g156xtn01_mode = {
 	.clock = 76000,
 	.hdisplay = 1366,
@@ -4288,6 +4319,9 @@ static const struct of_device_id platform_of_match[] = {
 	}, {
 		.compatible = "auo,g133han01",
 		.data = &auo_g133han01,
+	}, {
+		.compatible = "auo,g156han04",
+		.data = &auo_g156han04,
 	}, {
 		.compatible = "auo,g156xtn01",
 		.data = &auo_g156xtn01,
-- 
GitLab


From 23b392b94acb0499f69706c5808c099f590ebcf4 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 5 Dec 2023 20:05:51 +0200
Subject: [PATCH 0996/2340] drm/i915/edp: don't write to DP_LINK_BW_SET when
 using rate select
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The eDP 1.5 spec adds a clarification for eDP 1.4x:

> For eDP v1.4x, if the Source device chooses the Main-Link rate by way
> of DPCD 00100h, the Sink device shall ignore DPCD 00115h[2:0].

We write 0 to DP_LINK_BW_SET (DPCD 100h) even when using
DP_LINK_RATE_SET (DPCD 114h). Stop doing that, as it can cause the panel
to ignore the rate set method.

Moreover, 0 is a reserved value for DP_LINK_BW_SET, and should not be
used.

v2: Improve the comments (Ville)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9081
Tested-by: Animesh Manna <animesh.manna@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231205180551.2476228-1-jani.nikula@intel.com
---
 .../drm/i915/display/intel_dp_link_training.c | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index dbc1b66c8ee48..1abfafbbfa757 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp,
 			    const struct intel_crtc_state *crtc_state,
 			    u8 link_bw, u8 rate_select)
 {
-	u8 link_config[2];
+	u8 lane_count = crtc_state->lane_count;
 
-	/* Write the link configuration data */
-	link_config[0] = link_bw;
-	link_config[1] = crtc_state->lane_count;
 	if (crtc_state->enhanced_framing)
-		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
+		lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	if (link_bw) {
+		/* DP and eDP v1.3 and earlier link bw set method. */
+		u8 link_config[] = { link_bw, lane_count };
 
-	/* eDP 1.4 rate select method. */
-	if (!link_bw)
-		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
-				  &rate_select, 1);
+		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config,
+				  ARRAY_SIZE(link_config));
+	} else {
+		/*
+		 * eDP v1.4 and later link rate set method.
+		 *
+		 * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if
+		 * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET.
+		 *
+		 * eDP v1.5 sinks allow choosing either, and the last choice
+		 * shall be active.
+		 */
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count);
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select);
+	}
 }
 
 /*
-- 
GitLab


From eb3f7cbee2942b2c98c1af1652199c46d507153e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sat, 9 Dec 2023 16:31:09 +0100
Subject: [PATCH 0997/2340] drm/bridge: ti-sn65dsi86: Associate PWM device to
 auxiliary device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's the ti_sn65dsi86.pwm auxiliary driver that creates the pwmchip, so
let the auxiliary device be the parent of the pwm device.

Note that getting a reference to the ti-sn65dsi86's pwm using pwm_get()
isn't affected by this change as ti_sn65dsi86_add_aux_device() sets the
auxiliary device's of_node to that of the main device.

Also change PM runtime tracking and diagnostic messages to use that one.
After enabling runtime PM operation for the auxiliary device, all works
as expected as parent devices are handled just fine.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Tested-by: Nikita Travkin <nikita@trvn.ru> # Acer Aspire 1
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231209153108.1988551-2-u.kleine-koenig@pengutronix.de
---
 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 5b8e1dfc458da..9095d14537103 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1413,7 +1413,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	int ret;
 
 	if (!pdata->pwm_enabled) {
-		ret = pm_runtime_resume_and_get(pdata->dev);
+		ret = pm_runtime_resume_and_get(chip->dev);
 		if (ret < 0)
 			return ret;
 	}
@@ -1429,7 +1429,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 						 SN_GPIO_MUX_MASK << (2 * SN_PWM_GPIO_IDX),
 						 SN_GPIO_MUX_SPECIAL << (2 * SN_PWM_GPIO_IDX));
 			if (ret) {
-				dev_err(pdata->dev, "failed to mux in PWM function\n");
+				dev_err(chip->dev, "failed to mux in PWM function\n");
 				goto out;
 			}
 		}
@@ -1505,7 +1505,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 		ret = regmap_write(pdata->regmap, SN_PWM_PRE_DIV_REG, pre_div);
 		if (ret) {
-			dev_err(pdata->dev, "failed to update PWM_PRE_DIV\n");
+			dev_err(chip->dev, "failed to update PWM_PRE_DIV\n");
 			goto out;
 		}
 
@@ -1517,7 +1517,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 		     FIELD_PREP(SN_PWM_INV_MASK, state->polarity == PWM_POLARITY_INVERSED);
 	ret = regmap_write(pdata->regmap, SN_PWM_EN_INV_REG, pwm_en_inv);
 	if (ret) {
-		dev_err(pdata->dev, "failed to update PWM_EN/PWM_INV\n");
+		dev_err(chip->dev, "failed to update PWM_EN/PWM_INV\n");
 		goto out;
 	}
 
@@ -1525,7 +1525,7 @@ static int ti_sn_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 out:
 
 	if (!pdata->pwm_enabled)
-		pm_runtime_put_sync(pdata->dev);
+		pm_runtime_put_sync(chip->dev);
 
 	return ret;
 }
@@ -1585,12 +1585,14 @@ static int ti_sn_pwm_probe(struct auxiliary_device *adev,
 {
 	struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent);
 
-	pdata->pchip.dev = pdata->dev;
+	pdata->pchip.dev = &adev->dev;
 	pdata->pchip.ops = &ti_sn_pwm_ops;
 	pdata->pchip.npwm = 1;
 	pdata->pchip.of_xlate = of_pwm_single_xlate;
 	pdata->pchip.of_pwm_n_cells = 1;
 
+	devm_pm_runtime_enable(&adev->dev);
+
 	return pwmchip_add(&pdata->pchip);
 }
 
@@ -1601,7 +1603,7 @@ static void ti_sn_pwm_remove(struct auxiliary_device *adev)
 	pwmchip_remove(&pdata->pchip);
 
 	if (pdata->pwm_enabled)
-		pm_runtime_put_sync(pdata->dev);
+		pm_runtime_put_sync(&adev->dev);
 }
 
 static const struct auxiliary_device_id ti_sn_pwm_id_table[] = {
-- 
GitLab


From 62d35629da807d3a0729980e8012263ff84a0966 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 14:53:15 +0300
Subject: [PATCH 0998/2340] drm/msm/dpu: move encoder status to standard
 encoder debugfs dir

Now as we have standard per-encoder debugfs directory, move DPU encoder
status file to that directory.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570219/
Link: https://lore.kernel.org/r/20231203115315.1306124-4-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 45 +++------------------
 1 file changed, 6 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 889e9bb42715f..250c21a4cfc7e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -186,7 +186,6 @@ struct dpu_encoder_virt {
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
 
-	struct dentry *debugfs_root;
 	struct mutex enc_lock;
 	DECLARE_BITMAP(frame_busy_mask, MAX_PHYS_ENCODERS_PER_VIRTUAL);
 	void (*crtc_frame_event_cb)(void *, u32 event);
@@ -2078,7 +2077,8 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
 #ifdef CONFIG_DEBUG_FS
 static int _dpu_encoder_status_show(struct seq_file *s, void *data)
 {
-	struct dpu_encoder_virt *dpu_enc = s->private;
+	struct drm_encoder *drm_enc = s->private;
+	struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc);
 	int i;
 
 	mutex_lock(&dpu_enc->enc_lock);
@@ -2101,48 +2101,16 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data)
 
 DEFINE_SHOW_ATTRIBUTE(_dpu_encoder_status);
 
-static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
+static void dpu_encoder_debugfs_init(struct drm_encoder *drm_enc, struct dentry *root)
 {
-	struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc);
-
-	char name[12];
-
-	if (!drm_enc->dev) {
-		DPU_ERROR("invalid encoder or kms\n");
-		return -EINVAL;
-	}
-
-	snprintf(name, sizeof(name), "encoder%u", drm_enc->base.id);
-
-	/* create overall sub-directory for the encoder */
-	dpu_enc->debugfs_root = debugfs_create_dir(name,
-			drm_enc->dev->primary->debugfs_root);
-
 	/* don't error check these */
 	debugfs_create_file("status", 0600,
-		dpu_enc->debugfs_root, dpu_enc, &_dpu_encoder_status_fops);
-
-	return 0;
+			    root, drm_enc, &_dpu_encoder_status_fops);
 }
 #else
-static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
-{
-	return 0;
-}
+#define dpu_encoder_debugfs_init NULL
 #endif
 
-static int dpu_encoder_late_register(struct drm_encoder *encoder)
-{
-	return _dpu_encoder_init_debugfs(encoder);
-}
-
-static void dpu_encoder_early_unregister(struct drm_encoder *encoder)
-{
-	struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(encoder);
-
-	debugfs_remove_recursive(dpu_enc->debugfs_root);
-}
-
 static int dpu_encoder_virt_add_phys_encs(
 		struct drm_device *dev,
 		struct msm_display_info *disp_info,
@@ -2329,8 +2297,7 @@ static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = {
 };
 
 static const struct drm_encoder_funcs dpu_encoder_funcs = {
-		.late_register = dpu_encoder_late_register,
-		.early_unregister = dpu_encoder_early_unregister,
+	.debugfs_init = dpu_encoder_debugfs_init,
 };
 
 struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
-- 
GitLab


From 15d03119ed215177c52fb5c9edbe184b78263b65 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Fri, 24 Nov 2023 08:53:04 +0100
Subject: [PATCH 0999/2340] drm/i915/display: do not use cursor size reduction
 on MTL

Cursor size reduction is not supported since MTL.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124-cur_size_reduction-v1-1-30495dba475f@intel.com
---
 drivers/gpu/drm/i915/display/intel_display_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index 79e9f1c3e241f..fe42688137863 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -36,7 +36,7 @@ struct drm_printer;
 #define HAS_ASYNC_FLIPS(i915)		(DISPLAY_VER(i915) >= 5)
 #define HAS_CDCLK_CRAWL(i915)		(DISPLAY_INFO(i915)->has_cdclk_crawl)
 #define HAS_CDCLK_SQUASH(i915)		(DISPLAY_INFO(i915)->has_cdclk_squash)
-#define HAS_CUR_FBC(i915)		(!HAS_GMCH(i915) && DISPLAY_VER(i915) >= 7)
+#define HAS_CUR_FBC(i915)		(!HAS_GMCH(i915) && IS_DISPLAY_VER(i915, 7, 13))
 #define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915))
 #define HAS_DDI(i915)			(DISPLAY_INFO(i915)->has_ddi)
 #define HAS_DISPLAY(i915)		(DISPLAY_RUNTIME_INFO(i915)->pipe_mask != 0)
-- 
GitLab


From 16ac5b21b31b439f03cdf44c153c5f5af94fb3eb Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 21 Sep 2023 12:26:52 -0700
Subject: [PATCH 1000/2340] drm/exynos: Call drm_atomic_helper_shutdown() at
 shutdown/unbind time

Based on grepping through the source code this driver appears to be
missing a call to drm_atomic_helper_shutdown() at system shutdown time
and at driver unbind time. Among other things, this means that if a
panel is in use that it won't be cleanly powered off at system
shutdown time.

The fact that we should call drm_atomic_helper_shutdown() in the case
of OS shutdown/restart and at driver remove (or unbind) time comes
straight out of the kernel doc "driver instance overview" in
drm_drv.c.

A few notes about this fix:
- When adding drm_atomic_helper_shutdown() to the unbind path, I added
  it after drm_kms_helper_poll_fini() since that's when other drivers
  seemed to have it.
- Technically with a previous patch, ("drm/atomic-helper:
  drm_atomic_helper_shutdown(NULL) should be a noop"), we don't
  actually need to check to see if our "drm" pointer is NULL before
  calling drm_atomic_helper_shutdown(). We'll leave the "if" test in,
  though, so that this patch can land without any dependencies. It
  could potentially be removed later.
- This patch also makes sure to set the drvdata to NULL in the case of
  bind errors to make sure that shutdown can't access freed data.

Suggested-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 8399256cb5c9d..5380fb6c55ae1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -300,6 +300,7 @@ err_mode_config_cleanup:
 	drm_mode_config_cleanup(drm);
 	exynos_drm_cleanup_dma(drm);
 	kfree(private);
+	dev_set_drvdata(dev, NULL);
 err_free_drm:
 	drm_dev_put(drm);
 
@@ -313,6 +314,7 @@ static void exynos_drm_unbind(struct device *dev)
 	drm_dev_unregister(drm);
 
 	drm_kms_helper_poll_fini(drm);
+	drm_atomic_helper_shutdown(drm);
 
 	component_unbind_all(drm->dev, drm);
 	drm_mode_config_cleanup(drm);
@@ -350,9 +352,18 @@ static int exynos_drm_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void exynos_drm_platform_shutdown(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+
+	if (drm)
+		drm_atomic_helper_shutdown(drm);
+}
+
 static struct platform_driver exynos_drm_platform_driver = {
 	.probe	= exynos_drm_platform_probe,
 	.remove	= exynos_drm_platform_remove,
+	.shutdown = exynos_drm_platform_shutdown,
 	.driver	= {
 		.name	= "exynos-drm",
 		.pm	= &exynos_drm_pm_ops,
-- 
GitLab


From 4fe7a1ecaa4104c58c58a12ec29f24894381bdcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 8 Nov 2023 13:09:12 +0900
Subject: [PATCH 1001/2340] drm/exynos: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.

To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new(), which already returns void. Eventually after all drivers
are converted, .remove_new() will be renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
fix merge conflict and drop duplicated patch description.
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 6 ++----
 drivers/gpu/drm/exynos/exynos7_drm_decon.c    | 6 ++----
 drivers/gpu/drm/exynos/exynos_dp.c            | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_drv.c       | 5 ++---
 drivers/gpu/drm/exynos/exynos_drm_fimc.c      | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_fimd.c      | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_g2d.c       | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_gsc.c       | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_mic.c       | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_rotator.c   | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_scaler.c    | 6 ++----
 drivers/gpu/drm/exynos/exynos_drm_vidi.c      | 6 ++----
 drivers/gpu/drm/exynos/exynos_hdmi.c          | 6 ++----
 drivers/gpu/drm/exynos/exynos_mixer.c         | 6 ++----
 14 files changed, 28 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 4d986077738b9..776f2f0b602de 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -862,18 +862,16 @@ err_disable_pm_runtime:
 	return ret;
 }
 
-static int exynos5433_decon_remove(struct platform_device *pdev)
+static void exynos5433_decon_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &decon_component_ops);
-
-	return 0;
 }
 
 struct platform_driver exynos5433_decon_driver = {
 	.probe		= exynos5433_decon_probe,
-	.remove		= exynos5433_decon_remove,
+	.remove_new	= exynos5433_decon_remove,
 	.driver		= {
 		.name	= "exynos5433-decon",
 		.pm	= pm_ptr(&exynos5433_decon_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 0156a5e944359..0d185c0564b91 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -765,7 +765,7 @@ err_iounmap:
 	return ret;
 }
 
-static int decon_remove(struct platform_device *pdev)
+static void decon_remove(struct platform_device *pdev)
 {
 	struct decon_context *ctx = dev_get_drvdata(&pdev->dev);
 
@@ -774,8 +774,6 @@ static int decon_remove(struct platform_device *pdev)
 	iounmap(ctx->regs);
 
 	component_del(&pdev->dev, &decon_component_ops);
-
-	return 0;
 }
 
 static int exynos7_decon_suspend(struct device *dev)
@@ -840,7 +838,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos7_decon_pm_ops, exynos7_decon_suspend,
 
 struct platform_driver decon_driver = {
 	.probe		= decon_probe,
-	.remove		= decon_remove,
+	.remove_new	= decon_remove,
 	.driver		= {
 		.name	= "exynos-decon",
 		.pm	= pm_ptr(&exynos7_decon_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 3404ec1367fb9..ca31bad6c5760 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -250,14 +250,12 @@ out:
 	return component_add(&pdev->dev, &exynos_dp_ops);
 }
 
-static int exynos_dp_remove(struct platform_device *pdev)
+static void exynos_dp_remove(struct platform_device *pdev)
 {
 	struct exynos_dp_device *dp = platform_get_drvdata(pdev);
 
 	component_del(&pdev->dev, &exynos_dp_ops);
 	analogix_dp_remove(dp->adp);
-
-	return 0;
 }
 
 static int exynos_dp_suspend(struct device *dev)
@@ -285,7 +283,7 @@ MODULE_DEVICE_TABLE(of, exynos_dp_match);
 
 struct platform_driver dp_driver = {
 	.probe		= exynos_dp_probe,
-	.remove		= exynos_dp_remove,
+	.remove_new	= exynos_dp_remove,
 	.driver		= {
 		.name	= "exynos-dp",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 5380fb6c55ae1..7c59e1164a483 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -346,10 +346,9 @@ static int exynos_drm_platform_probe(struct platform_device *pdev)
 					       match);
 }
 
-static int exynos_drm_platform_remove(struct platform_device *pdev)
+static void exynos_drm_platform_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &exynos_drm_ops);
-	return 0;
 }
 
 static void exynos_drm_platform_shutdown(struct platform_device *pdev)
@@ -362,7 +361,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev)
 
 static struct platform_driver exynos_drm_platform_driver = {
 	.probe	= exynos_drm_platform_probe,
-	.remove	= exynos_drm_platform_remove,
+	.remove_new	= exynos_drm_platform_remove,
 	.shutdown = exynos_drm_platform_shutdown,
 	.driver	= {
 		.name	= "exynos-drm",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 8de2714599fc5..e81a576de3983 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1367,7 +1367,7 @@ err_pm_dis:
 	return ret;
 }
 
-static int fimc_remove(struct platform_device *pdev)
+static void fimc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct fimc_context *ctx = get_fimc_context(dev);
@@ -1377,8 +1377,6 @@ static int fimc_remove(struct platform_device *pdev)
 	pm_runtime_disable(dev);
 
 	fimc_put_clocks(ctx);
-
-	return 0;
 }
 
 static int fimc_runtime_suspend(struct device *dev)
@@ -1410,7 +1408,7 @@ MODULE_DEVICE_TABLE(of, fimc_of_match);
 
 struct platform_driver fimc_driver = {
 	.probe		= fimc_probe,
-	.remove		= fimc_remove,
+	.remove_new	= fimc_remove,
 	.driver		= {
 		.of_match_table = fimc_of_match,
 		.name	= "exynos-drm-fimc",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 8dde7b1e9b35d..a9f1c5c058940 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -1277,13 +1277,11 @@ err_disable_pm_runtime:
 	return ret;
 }
 
-static int fimd_remove(struct platform_device *pdev)
+static void fimd_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &fimd_component_ops);
-
-	return 0;
 }
 
 static int exynos_fimd_suspend(struct device *dev)
@@ -1325,7 +1323,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos_fimd_pm_ops, exynos_fimd_suspend,
 
 struct platform_driver fimd_driver = {
 	.probe		= fimd_probe,
-	.remove		= fimd_remove,
+	.remove_new	= fimd_remove,
 	.driver		= {
 		.name	= "exynos4-fb",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 414e585ec7dd0..f3138423612e6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1530,7 +1530,7 @@ err_destroy_slab:
 	return ret;
 }
 
-static int g2d_remove(struct platform_device *pdev)
+static void g2d_remove(struct platform_device *pdev)
 {
 	struct g2d_data *g2d = platform_get_drvdata(pdev);
 
@@ -1545,8 +1545,6 @@ static int g2d_remove(struct platform_device *pdev)
 	g2d_fini_cmdlist(g2d);
 	destroy_workqueue(g2d->g2d_workq);
 	kmem_cache_destroy(g2d->runqueue_slab);
-
-	return 0;
 }
 
 static int g2d_suspend(struct device *dev)
@@ -1609,7 +1607,7 @@ MODULE_DEVICE_TABLE(of, exynos_g2d_match);
 
 struct platform_driver g2d_driver = {
 	.probe		= g2d_probe,
-	.remove		= g2d_remove,
+	.remove_new	= g2d_remove,
 	.driver		= {
 		.name	= "exynos-drm-g2d",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 35771fb4e85d0..e9a769590415d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1309,15 +1309,13 @@ err_pm_dis:
 	return ret;
 }
 
-static int gsc_remove(struct platform_device *pdev)
+static void gsc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &gsc_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int __maybe_unused gsc_runtime_suspend(struct device *dev)
@@ -1422,7 +1420,7 @@ MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
 
 struct platform_driver gsc_driver = {
 	.probe		= gsc_probe,
-	.remove		= gsc_remove,
+	.remove_new	= gsc_remove,
 	.driver		= {
 		.name	= "exynos-drm-gsc",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 17bab5b1663ff..e2920960180fd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -442,7 +442,7 @@ err:
 	return ret;
 }
 
-static int exynos_mic_remove(struct platform_device *pdev)
+static void exynos_mic_remove(struct platform_device *pdev)
 {
 	struct exynos_mic *mic = platform_get_drvdata(pdev);
 
@@ -450,8 +450,6 @@ static int exynos_mic_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 
 	drm_bridge_remove(&mic->bridge);
-
-	return 0;
 }
 
 static const struct of_device_id exynos_mic_of_match[] = {
@@ -462,7 +460,7 @@ MODULE_DEVICE_TABLE(of, exynos_mic_of_match);
 
 struct platform_driver mic_driver = {
 	.probe		= exynos_mic_probe,
-	.remove		= exynos_mic_remove,
+	.remove_new	= exynos_mic_remove,
 	.driver		= {
 		.name	= "exynos-mic",
 		.pm	= pm_ptr(&exynos_mic_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index ffb327c5139ec..5f7516655b08f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -329,15 +329,13 @@ err_component:
 	return ret;
 }
 
-static int rotator_remove(struct platform_device *pdev)
+static void rotator_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &rotator_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int rotator_runtime_suspend(struct device *dev)
@@ -453,7 +451,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(rotator_pm_ops, rotator_runtime_suspend,
 
 struct platform_driver rotator_driver = {
 	.probe		= rotator_probe,
-	.remove		= rotator_remove,
+	.remove_new	= rotator_remove,
 	.driver		= {
 		.name	= "exynos-rotator",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index f2b8b09a6b4ec..392f721f13ab7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -539,15 +539,13 @@ err_ippdrv_register:
 	return ret;
 }
 
-static int scaler_remove(struct platform_device *pdev)
+static void scaler_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &scaler_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int clk_disable_unprepare_wrapper(struct clk *clk)
@@ -721,7 +719,7 @@ MODULE_DEVICE_TABLE(of, exynos_scaler_match);
 
 struct platform_driver scaler_driver = {
 	.probe		= scaler_probe,
-	.remove		= scaler_remove,
+	.remove_new	= scaler_remove,
 	.driver		= {
 		.name	= "exynos-scaler",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index f5e1adfcaa514..00382f28748ac 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -462,7 +462,7 @@ static int vidi_probe(struct platform_device *pdev)
 	return component_add(dev, &vidi_component_ops);
 }
 
-static int vidi_remove(struct platform_device *pdev)
+static void vidi_remove(struct platform_device *pdev)
 {
 	struct vidi_context *ctx = platform_get_drvdata(pdev);
 
@@ -472,13 +472,11 @@ static int vidi_remove(struct platform_device *pdev)
 	}
 
 	component_del(&pdev->dev, &vidi_component_ops);
-
-	return 0;
 }
 
 struct platform_driver vidi_driver = {
 	.probe		= vidi_probe,
-	.remove		= vidi_remove,
+	.remove_new	= vidi_remove,
 	.driver		= {
 		.name	= "exynos-drm-vidi",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index dd9903eab563e..43bed6cbaaea0 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -2069,7 +2069,7 @@ err_ddc:
 	return ret;
 }
 
-static int hdmi_remove(struct platform_device *pdev)
+static void hdmi_remove(struct platform_device *pdev)
 {
 	struct hdmi_context *hdata = platform_get_drvdata(pdev);
 
@@ -2092,8 +2092,6 @@ static int hdmi_remove(struct platform_device *pdev)
 	put_device(&hdata->ddc_adpt->dev);
 
 	mutex_destroy(&hdata->mutex);
-
-	return 0;
 }
 
 static int __maybe_unused exynos_hdmi_suspend(struct device *dev)
@@ -2125,7 +2123,7 @@ static const struct dev_pm_ops exynos_hdmi_pm_ops = {
 
 struct platform_driver hdmi_driver = {
 	.probe		= hdmi_probe,
-	.remove		= hdmi_remove,
+	.remove_new	= hdmi_remove,
 	.driver		= {
 		.name	= "exynos-hdmi",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index b302392ff0d7f..6822333fd0e65 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1258,13 +1258,11 @@ static int mixer_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mixer_remove(struct platform_device *pdev)
+static void mixer_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &mixer_component_ops);
-
-	return 0;
 }
 
 static int __maybe_unused exynos_mixer_suspend(struct device *dev)
@@ -1338,5 +1336,5 @@ struct platform_driver mixer_driver = {
 		.of_match_table = mixer_match_types,
 	},
 	.probe = mixer_probe,
-	.remove = mixer_remove,
+	.remove_new = mixer_remove,
 };
-- 
GitLab


From ead5a41c8f8a13ad7b1c9fd2d7edb1ea909b777f Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 1 Nov 2023 21:17:31 +0100
Subject: [PATCH 1002/2340] drm/exynos: dpi: Change connector type to DPI

When exynos_drm_dpi.c was written, DRM_MODE_CONNECTOR_DPI did not exist
yet and I guess that's the reason why DRM_MODE_CONNECTOR_VGA was used as
the connector type.

However, now it makes more sense to use DRM_MODE_CONNECTOR_DPI as the
connector type.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 378e5381978fa..0dc36df6ada34 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -101,7 +101,7 @@ static int exynos_dpi_create_connector(struct drm_encoder *encoder)
 
 	ret = drm_connector_init(encoder->dev, connector,
 				 &exynos_dpi_connector_funcs,
-				 DRM_MODE_CONNECTOR_VGA);
+				 DRM_MODE_CONNECTOR_DPI);
 	if (ret) {
 		DRM_DEV_ERROR(ctx->dev,
 			      "failed to initialize connector with drm\n");
-- 
GitLab


From dd08ebf6c3525a7ea2186e636df064ea47281987 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 30 Mar 2023 17:31:57 -0400
Subject: [PATCH 1003/2340] drm/xe: Introduce a new DRM driver for Intel GPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe, is a new driver for Intel GPUs that supports both integrated and
discrete platforms starting with Tiger Lake (first Intel Xe Architecture).

The code is at a stage where it is already functional and has experimental
support for multiple platforms starting from Tiger Lake, with initial
support implemented in Mesa (for Iris and Anv, our OpenGL and Vulkan
drivers), as well as in NEO (for OpenCL and Level0).

The new Xe driver leverages a lot from i915.

As for display, the intent is to share the display code with the i915
driver so that there is maximum reuse there. But it is not added
in this patch.

This initial work is a collaboration of many people and unfortunately
the big squashed patch won't fully honor the proper credits. But let's
get some git quick stats so we can at least try to preserve some of the
credits:

Co-developed-by: Matthew Brost <matthew.brost@intel.com>
Co-developed-by: Matthew Auld <matthew.auld@intel.com>
Co-developed-by: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Co-developed-by: Francois Dugast <francois.dugast@intel.com>
Co-developed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Co-developed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Co-developed-by: Philippe Lecluse <philippe.lecluse@intel.com>
Co-developed-by: Nirmoy Das <nirmoy.das@intel.com>
Co-developed-by: Jani Nikula <jani.nikula@intel.com>
Co-developed-by: José Roberto de Souza <jose.souza@intel.com>
Co-developed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Co-developed-by: Dave Airlie <airlied@redhat.com>
Co-developed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Co-developed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Co-developed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 Documentation/gpu/drivers.rst                 |    1 +
 Documentation/gpu/xe/index.rst                |   23 +
 Documentation/gpu/xe/xe_cs.rst                |    8 +
 Documentation/gpu/xe/xe_firmware.rst          |   34 +
 Documentation/gpu/xe/xe_gt_mcr.rst            |   13 +
 Documentation/gpu/xe/xe_map.rst               |    8 +
 Documentation/gpu/xe/xe_migrate.rst           |    8 +
 Documentation/gpu/xe/xe_mm.rst                |   14 +
 Documentation/gpu/xe/xe_pcode.rst             |   14 +
 Documentation/gpu/xe/xe_pm.rst                |   14 +
 Documentation/gpu/xe/xe_rtp.rst               |   20 +
 Documentation/gpu/xe/xe_wa.rst                |   14 +
 drivers/gpu/drm/Kconfig                       |    2 +
 drivers/gpu/drm/Makefile                      |    1 +
 drivers/gpu/drm/xe/.gitignore                 |    2 +
 drivers/gpu/drm/xe/Kconfig                    |   63 +
 drivers/gpu/drm/xe/Kconfig.debug              |   96 +
 drivers/gpu/drm/xe/Makefile                   |  121 +
 drivers/gpu/drm/xe/abi/guc_actions_abi.h      |  219 ++
 drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h |  249 ++
 .../drm/xe/abi/guc_communication_ctb_abi.h    |  189 +
 .../drm/xe/abi/guc_communication_mmio_abi.h   |   49 +
 drivers/gpu/drm/xe/abi/guc_errors_abi.h       |   37 +
 drivers/gpu/drm/xe/abi/guc_klvs_abi.h         |  322 ++
 drivers/gpu/drm/xe/abi/guc_messages_abi.h     |  234 ++
 drivers/gpu/drm/xe/tests/Makefile             |    4 +
 drivers/gpu/drm/xe/tests/xe_bo.c              |  303 ++
 drivers/gpu/drm/xe/tests/xe_bo_test.c         |   25 +
 drivers/gpu/drm/xe/tests/xe_dma_buf.c         |  259 ++
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c    |   23 +
 drivers/gpu/drm/xe/tests/xe_migrate.c         |  378 ++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c    |   23 +
 drivers/gpu/drm/xe/tests/xe_test.h            |   66 +
 drivers/gpu/drm/xe/xe_bb.c                    |   97 +
 drivers/gpu/drm/xe/xe_bb.h                    |   27 +
 drivers/gpu/drm/xe/xe_bb_types.h              |   20 +
 drivers/gpu/drm/xe/xe_bo.c                    | 1698 ++++++++
 drivers/gpu/drm/xe/xe_bo.h                    |  290 ++
 drivers/gpu/drm/xe/xe_bo_doc.h                |  179 +
 drivers/gpu/drm/xe/xe_bo_evict.c              |  225 ++
 drivers/gpu/drm/xe/xe_bo_evict.h              |   15 +
 drivers/gpu/drm/xe/xe_bo_types.h              |   73 +
 drivers/gpu/drm/xe/xe_debugfs.c               |  129 +
 drivers/gpu/drm/xe/xe_debugfs.h               |   13 +
 drivers/gpu/drm/xe/xe_device.c                |  359 ++
 drivers/gpu/drm/xe/xe_device.h                |  126 +
 drivers/gpu/drm/xe/xe_device_types.h          |  214 ++
 drivers/gpu/drm/xe/xe_dma_buf.c               |  307 ++
 drivers/gpu/drm/xe/xe_dma_buf.h               |   15 +
 drivers/gpu/drm/xe/xe_drv.h                   |   24 +
 drivers/gpu/drm/xe/xe_engine.c                |  734 ++++
 drivers/gpu/drm/xe/xe_engine.h                |   54 +
 drivers/gpu/drm/xe/xe_engine_types.h          |  208 +
 drivers/gpu/drm/xe/xe_exec.c                  |  390 ++
 drivers/gpu/drm/xe/xe_exec.h                  |   14 +
 drivers/gpu/drm/xe/xe_execlist.c              |  489 +++
 drivers/gpu/drm/xe/xe_execlist.h              |   21 +
 drivers/gpu/drm/xe/xe_execlist_types.h        |   49 +
 drivers/gpu/drm/xe/xe_force_wake.c            |  203 +
 drivers/gpu/drm/xe/xe_force_wake.h            |   40 +
 drivers/gpu/drm/xe/xe_force_wake_types.h      |   84 +
 drivers/gpu/drm/xe/xe_ggtt.c                  |  304 ++
 drivers/gpu/drm/xe/xe_ggtt.h                  |   28 +
 drivers/gpu/drm/xe/xe_ggtt_types.h            |   28 +
 drivers/gpu/drm/xe/xe_gpu_scheduler.c         |  101 +
 drivers/gpu/drm/xe/xe_gpu_scheduler.h         |   73 +
 drivers/gpu/drm/xe/xe_gpu_scheduler_types.h   |   57 +
 drivers/gpu/drm/xe/xe_gt.c                    |  830 ++++
 drivers/gpu/drm/xe/xe_gt.h                    |   64 +
 drivers/gpu/drm/xe/xe_gt_clock.c              |   83 +
 drivers/gpu/drm/xe/xe_gt_clock.h              |   13 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c            |  160 +
 drivers/gpu/drm/xe/xe_gt_debugfs.h            |   13 +
 drivers/gpu/drm/xe/xe_gt_mcr.c                |  552 +++
 drivers/gpu/drm/xe/xe_gt_mcr.h                |   26 +
 drivers/gpu/drm/xe/xe_gt_pagefault.c          |  750 ++++
 drivers/gpu/drm/xe/xe_gt_pagefault.h          |   22 +
 drivers/gpu/drm/xe/xe_gt_sysfs.c              |   55 +
 drivers/gpu/drm/xe/xe_gt_sysfs.h              |   19 +
 drivers/gpu/drm/xe/xe_gt_sysfs_types.h        |   26 +
 drivers/gpu/drm/xe/xe_gt_topology.c           |  144 +
 drivers/gpu/drm/xe/xe_gt_topology.h           |   20 +
 drivers/gpu/drm/xe/xe_gt_types.h              |  320 ++
 drivers/gpu/drm/xe/xe_guc.c                   |  875 +++++
 drivers/gpu/drm/xe/xe_guc.h                   |   57 +
 drivers/gpu/drm/xe/xe_guc_ads.c               |  676 ++++
 drivers/gpu/drm/xe/xe_guc_ads.h               |   17 +
 drivers/gpu/drm/xe/xe_guc_ads_types.h         |   25 +
 drivers/gpu/drm/xe/xe_guc_ct.c                | 1196 ++++++
 drivers/gpu/drm/xe/xe_guc_ct.h                |   62 +
 drivers/gpu/drm/xe/xe_guc_ct_types.h          |   87 +
 drivers/gpu/drm/xe/xe_guc_debugfs.c           |  105 +
 drivers/gpu/drm/xe/xe_guc_debugfs.h           |   14 +
 drivers/gpu/drm/xe/xe_guc_engine_types.h      |   52 +
 drivers/gpu/drm/xe/xe_guc_fwif.h              |  392 ++
 drivers/gpu/drm/xe/xe_guc_hwconfig.c          |  125 +
 drivers/gpu/drm/xe/xe_guc_hwconfig.h          |   17 +
 drivers/gpu/drm/xe/xe_guc_log.c               |  109 +
 drivers/gpu/drm/xe/xe_guc_log.h               |   48 +
 drivers/gpu/drm/xe/xe_guc_log_types.h         |   23 +
 drivers/gpu/drm/xe/xe_guc_pc.c                |  843 ++++
 drivers/gpu/drm/xe/xe_guc_pc.h                |   15 +
 drivers/gpu/drm/xe/xe_guc_pc_types.h          |   34 +
 drivers/gpu/drm/xe/xe_guc_reg.h               |  147 +
 drivers/gpu/drm/xe/xe_guc_submit.c            | 1695 ++++++++
 drivers/gpu/drm/xe/xe_guc_submit.h            |   30 +
 drivers/gpu/drm/xe/xe_guc_types.h             |   71 +
 drivers/gpu/drm/xe/xe_huc.c                   |  131 +
 drivers/gpu/drm/xe/xe_huc.h                   |   19 +
 drivers/gpu/drm/xe/xe_huc_debugfs.c           |   71 +
 drivers/gpu/drm/xe/xe_huc_debugfs.h           |   14 +
 drivers/gpu/drm/xe/xe_huc_types.h             |   19 +
 drivers/gpu/drm/xe/xe_hw_engine.c             |  658 ++++
 drivers/gpu/drm/xe/xe_hw_engine.h             |   27 +
 drivers/gpu/drm/xe/xe_hw_engine_types.h       |  107 +
 drivers/gpu/drm/xe/xe_hw_fence.c              |  230 ++
 drivers/gpu/drm/xe/xe_hw_fence.h              |   27 +
 drivers/gpu/drm/xe/xe_hw_fence_types.h        |   72 +
 drivers/gpu/drm/xe/xe_irq.c                   |  565 +++
 drivers/gpu/drm/xe/xe_irq.h                   |   18 +
 drivers/gpu/drm/xe/xe_lrc.c                   |  841 ++++
 drivers/gpu/drm/xe/xe_lrc.h                   |   50 +
 drivers/gpu/drm/xe/xe_lrc_types.h             |   47 +
 drivers/gpu/drm/xe/xe_macros.h                |   20 +
 drivers/gpu/drm/xe/xe_map.h                   |   93 +
 drivers/gpu/drm/xe/xe_migrate.c               | 1168 ++++++
 drivers/gpu/drm/xe/xe_migrate.h               |   88 +
 drivers/gpu/drm/xe/xe_migrate_doc.h           |   88 +
 drivers/gpu/drm/xe/xe_mmio.c                  |  466 +++
 drivers/gpu/drm/xe/xe_mmio.h                  |  110 +
 drivers/gpu/drm/xe/xe_mocs.c                  |  557 +++
 drivers/gpu/drm/xe/xe_mocs.h                  |   29 +
 drivers/gpu/drm/xe/xe_module.c                |   76 +
 drivers/gpu/drm/xe/xe_module.h                |   13 +
 drivers/gpu/drm/xe/xe_pci.c                   |  651 ++++
 drivers/gpu/drm/xe/xe_pci.h                   |   21 +
 drivers/gpu/drm/xe/xe_pcode.c                 |  296 ++
 drivers/gpu/drm/xe/xe_pcode.h                 |   25 +
 drivers/gpu/drm/xe/xe_pcode_api.h             |   40 +
 drivers/gpu/drm/xe/xe_platform_types.h        |   32 +
 drivers/gpu/drm/xe/xe_pm.c                    |  207 +
 drivers/gpu/drm/xe/xe_pm.h                    |   24 +
 drivers/gpu/drm/xe/xe_preempt_fence.c         |  157 +
 drivers/gpu/drm/xe/xe_preempt_fence.h         |   61 +
 drivers/gpu/drm/xe/xe_preempt_fence_types.h   |   33 +
 drivers/gpu/drm/xe/xe_pt.c                    | 1542 ++++++++
 drivers/gpu/drm/xe/xe_pt.h                    |   54 +
 drivers/gpu/drm/xe/xe_pt_types.h              |   57 +
 drivers/gpu/drm/xe/xe_pt_walk.c               |  160 +
 drivers/gpu/drm/xe/xe_pt_walk.h               |  161 +
 drivers/gpu/drm/xe/xe_query.c                 |  387 ++
 drivers/gpu/drm/xe/xe_query.h                 |   14 +
 drivers/gpu/drm/xe/xe_reg_sr.c                |  248 ++
 drivers/gpu/drm/xe/xe_reg_sr.h                |   28 +
 drivers/gpu/drm/xe/xe_reg_sr_types.h          |   44 +
 drivers/gpu/drm/xe/xe_reg_whitelist.c         |   73 +
 drivers/gpu/drm/xe/xe_reg_whitelist.h         |   13 +
 drivers/gpu/drm/xe/xe_res_cursor.h            |  226 ++
 drivers/gpu/drm/xe/xe_ring_ops.c              |  373 ++
 drivers/gpu/drm/xe/xe_ring_ops.h              |   17 +
 drivers/gpu/drm/xe/xe_ring_ops_types.h        |   22 +
 drivers/gpu/drm/xe/xe_rtp.c                   |  144 +
 drivers/gpu/drm/xe/xe_rtp.h                   |  340 ++
 drivers/gpu/drm/xe/xe_rtp_types.h             |  105 +
 drivers/gpu/drm/xe/xe_sa.c                    |   96 +
 drivers/gpu/drm/xe/xe_sa.h                    |   42 +
 drivers/gpu/drm/xe/xe_sa_types.h              |   19 +
 drivers/gpu/drm/xe/xe_sched_job.c             |  246 ++
 drivers/gpu/drm/xe/xe_sched_job.h             |   76 +
 drivers/gpu/drm/xe/xe_sched_job_types.h       |   46 +
 drivers/gpu/drm/xe/xe_step.c                  |  189 +
 drivers/gpu/drm/xe/xe_step.h                  |   18 +
 drivers/gpu/drm/xe/xe_step_types.h            |   51 +
 drivers/gpu/drm/xe/xe_sync.c                  |  276 ++
 drivers/gpu/drm/xe/xe_sync.h                  |   27 +
 drivers/gpu/drm/xe/xe_sync_types.h            |   27 +
 drivers/gpu/drm/xe/xe_trace.c                 |    9 +
 drivers/gpu/drm/xe/xe_trace.h                 |  513 +++
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c           |  130 +
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h           |   16 +
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h     |   18 +
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |  403 ++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h          |   41 +
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h    |   44 +
 drivers/gpu/drm/xe/xe_tuning.c                |   39 +
 drivers/gpu/drm/xe/xe_tuning.h                |   13 +
 drivers/gpu/drm/xe/xe_uc.c                    |  226 ++
 drivers/gpu/drm/xe/xe_uc.h                    |   21 +
 drivers/gpu/drm/xe/xe_uc_debugfs.c            |   26 +
 drivers/gpu/drm/xe/xe_uc_debugfs.h            |   14 +
 drivers/gpu/drm/xe/xe_uc_fw.c                 |  406 ++
 drivers/gpu/drm/xe/xe_uc_fw.h                 |  180 +
 drivers/gpu/drm/xe/xe_uc_fw_abi.h             |   81 +
 drivers/gpu/drm/xe/xe_uc_fw_types.h           |  112 +
 drivers/gpu/drm/xe/xe_uc_types.h              |   25 +
 drivers/gpu/drm/xe/xe_vm.c                    | 3407 +++++++++++++++++
 drivers/gpu/drm/xe/xe_vm.h                    |  141 +
 drivers/gpu/drm/xe/xe_vm_doc.h                |  555 +++
 drivers/gpu/drm/xe/xe_vm_madvise.c            |  347 ++
 drivers/gpu/drm/xe/xe_vm_madvise.h            |   15 +
 drivers/gpu/drm/xe/xe_vm_types.h              |  337 ++
 drivers/gpu/drm/xe/xe_wa.c                    |  326 ++
 drivers/gpu/drm/xe/xe_wa.h                    |   18 +
 drivers/gpu/drm/xe/xe_wait_user_fence.c       |  202 +
 drivers/gpu/drm/xe/xe_wait_user_fence.h       |   15 +
 drivers/gpu/drm/xe/xe_wopcm.c                 |  263 ++
 drivers/gpu/drm/xe/xe_wopcm.h                 |   16 +
 drivers/gpu/drm/xe/xe_wopcm_types.h           |   26 +
 include/drm/xe_pciids.h                       |  195 +
 include/uapi/drm/xe_drm.h                     |  787 ++++
 210 files changed, 40575 insertions(+)
 create mode 100644 Documentation/gpu/xe/index.rst
 create mode 100644 Documentation/gpu/xe/xe_cs.rst
 create mode 100644 Documentation/gpu/xe/xe_firmware.rst
 create mode 100644 Documentation/gpu/xe/xe_gt_mcr.rst
 create mode 100644 Documentation/gpu/xe/xe_map.rst
 create mode 100644 Documentation/gpu/xe/xe_migrate.rst
 create mode 100644 Documentation/gpu/xe/xe_mm.rst
 create mode 100644 Documentation/gpu/xe/xe_pcode.rst
 create mode 100644 Documentation/gpu/xe/xe_pm.rst
 create mode 100644 Documentation/gpu/xe/xe_rtp.rst
 create mode 100644 Documentation/gpu/xe/xe_wa.rst
 create mode 100644 drivers/gpu/drm/xe/.gitignore
 create mode 100644 drivers/gpu/drm/xe/Kconfig
 create mode 100644 drivers/gpu/drm/xe/Kconfig.debug
 create mode 100644 drivers/gpu/drm/xe/Makefile
 create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_errors_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_klvs_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/guc_messages_abi.h
 create mode 100644 drivers/gpu/drm/xe/tests/Makefile
 create mode 100644 drivers/gpu/drm/xe/tests/xe_bo.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_bo_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_test.h
 create mode 100644 drivers/gpu/drm/xe/xe_bb.c
 create mode 100644 drivers/gpu/drm/xe/xe_bb.h
 create mode 100644 drivers/gpu/drm/xe/xe_bb_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo.c
 create mode 100644 drivers/gpu/drm/xe/xe_bo.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.c
 create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.h
 create mode 100644 drivers/gpu/drm/xe/xe_bo_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_device.c
 create mode 100644 drivers/gpu/drm/xe/xe_device.h
 create mode 100644 drivers/gpu/drm/xe/xe_device_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.c
 create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.h
 create mode 100644 drivers/gpu/drm/xe/xe_drv.h
 create mode 100644 drivers/gpu/drm/xe/xe_engine.c
 create mode 100644 drivers/gpu/drm/xe/xe_engine.h
 create mode 100644 drivers/gpu/drm/xe/xe_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_exec.c
 create mode 100644 drivers/gpu/drm/xe/xe_exec.h
 create mode 100644 drivers/gpu/drm/xe/xe_execlist.c
 create mode 100644 drivers/gpu/drm/xe/xe_execlist.h
 create mode 100644 drivers/gpu/drm/xe/xe_execlist_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_force_wake.c
 create mode 100644 drivers/gpu/drm/xe/xe_force_wake.h
 create mode 100644 drivers/gpu/drm/xe/xe_force_wake_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_ggtt.c
 create mode 100644 drivers/gpu/drm/xe/xe_ggtt.h
 create mode 100644 drivers/gpu/drm/xe/xe_ggtt_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.c
 create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.h
 create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ads_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_ct_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_fwif.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_log.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_log.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_log_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_pc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_reg.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.c
 create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_huc.c
 create mode 100644 drivers/gpu/drm/xe/xe_huc.h
 create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_huc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.c
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.h
 create mode 100644 drivers/gpu/drm/xe/xe_hw_fence_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_irq.c
 create mode 100644 drivers/gpu/drm/xe/xe_irq.h
 create mode 100644 drivers/gpu/drm/xe/xe_lrc.c
 create mode 100644 drivers/gpu/drm/xe/xe_lrc.h
 create mode 100644 drivers/gpu/drm/xe/xe_lrc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_macros.h
 create mode 100644 drivers/gpu/drm/xe/xe_map.h
 create mode 100644 drivers/gpu/drm/xe/xe_migrate.c
 create mode 100644 drivers/gpu/drm/xe/xe_migrate.h
 create mode 100644 drivers/gpu/drm/xe/xe_migrate_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_mmio.c
 create mode 100644 drivers/gpu/drm/xe/xe_mmio.h
 create mode 100644 drivers/gpu/drm/xe/xe_mocs.c
 create mode 100644 drivers/gpu/drm/xe/xe_mocs.h
 create mode 100644 drivers/gpu/drm/xe/xe_module.c
 create mode 100644 drivers/gpu/drm/xe/xe_module.h
 create mode 100644 drivers/gpu/drm/xe/xe_pci.c
 create mode 100644 drivers/gpu/drm/xe/xe_pci.h
 create mode 100644 drivers/gpu/drm/xe/xe_pcode.c
 create mode 100644 drivers/gpu/drm/xe/xe_pcode.h
 create mode 100644 drivers/gpu/drm/xe/xe_pcode_api.h
 create mode 100644 drivers/gpu/drm/xe/xe_platform_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_pm.c
 create mode 100644 drivers/gpu/drm/xe/xe_pm.h
 create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.h
 create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_pt.c
 create mode 100644 drivers/gpu/drm/xe/xe_pt.h
 create mode 100644 drivers/gpu/drm/xe/xe_pt_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.c
 create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.h
 create mode 100644 drivers/gpu/drm/xe/xe_query.c
 create mode 100644 drivers/gpu/drm/xe/xe_query.h
 create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.c
 create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.h
 create mode 100644 drivers/gpu/drm/xe/xe_reg_sr_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.c
 create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.h
 create mode 100644 drivers/gpu/drm/xe/xe_res_cursor.h
 create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.c
 create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.h
 create mode 100644 drivers/gpu/drm/xe/xe_ring_ops_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_rtp.c
 create mode 100644 drivers/gpu/drm/xe/xe_rtp.h
 create mode 100644 drivers/gpu/drm/xe/xe_rtp_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sa.c
 create mode 100644 drivers/gpu/drm/xe/xe_sa.h
 create mode 100644 drivers/gpu/drm/xe/xe_sa_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sched_job.c
 create mode 100644 drivers/gpu/drm/xe/xe_sched_job.h
 create mode 100644 drivers/gpu/drm/xe/xe_sched_job_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_step.c
 create mode 100644 drivers/gpu/drm/xe/xe_step.h
 create mode 100644 drivers/gpu/drm/xe/xe_step_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sync.c
 create mode 100644 drivers/gpu/drm/xe/xe_sync.h
 create mode 100644 drivers/gpu/drm/xe/xe_sync_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_trace.c
 create mode 100644 drivers/gpu/drm/xe/xe_trace.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_tuning.c
 create mode 100644 drivers/gpu/drm/xe/xe_tuning.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc.c
 create mode 100644 drivers/gpu/drm/xe/xe_uc.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.c
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_abi.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_uc_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm.c
 create mode 100644 drivers/gpu/drm/xe/xe_vm.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm_doc.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c
 create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h
 create mode 100644 drivers/gpu/drm/xe/xe_vm_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_wa.c
 create mode 100644 drivers/gpu/drm/xe/xe_wa.h
 create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.h
 create mode 100644 drivers/gpu/drm/xe/xe_wopcm.c
 create mode 100644 drivers/gpu/drm/xe/xe_wopcm.h
 create mode 100644 drivers/gpu/drm/xe/xe_wopcm_types.h
 create mode 100644 include/drm/xe_pciids.h
 create mode 100644 include/uapi/drm/xe_drm.h

diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index cc6535f5f28c3..b899cbc5c2b48 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -18,6 +18,7 @@ GPU Driver Documentation
    vkms
    bridge/dw-hdmi
    xen-front
+   xe/index
    afbc
    komeda-kms
    panfrost
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
new file mode 100644
index 0000000000000..2fddf9ed251e7
--- /dev/null
+++ b/Documentation/gpu/xe/index.rst
@@ -0,0 +1,23 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=======================
+drm/xe Intel GFX Driver
+=======================
+
+The drm/xe driver supports some future GFX cards with rendering, display,
+compute and media. Support for currently available platforms like TGL, ADL,
+DG2, etc is provided to prototype the driver.
+
+.. toctree::
+   :titlesonly:
+
+   xe_mm
+   xe_map
+   xe_migrate
+   xe_cs
+   xe_pm
+   xe_pcode
+   xe_gt_mcr
+   xe_wa
+   xe_rtp
+   xe_firmware
diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst
new file mode 100644
index 0000000000000..e379aed4f5a86
--- /dev/null
+++ b/Documentation/gpu/xe/xe_cs.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Command submission
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c
+   :doc: Execbuf (User GPU command submission)
diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
new file mode 100644
index 0000000000000..c01246ae99f53
--- /dev/null
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -0,0 +1,34 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========
+Firmware
+========
+
+Firmware Layout
+===============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: Firmware Layout
+
+Write Once Protected Content Memory (WOPCM) Layout
+==================================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c
+   :doc: Write Once Protected Content Memory (WOPCM) Layout
+
+GuC CTB Blob
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c
+   :doc: GuC CTB Blob
+
+GuC Power Conservation (PC)
+===========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c
+   :doc: GuC Power Conservation (PC)
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst
new file mode 100644
index 0000000000000..848c07bc36d0f
--- /dev/null
+++ b/Documentation/gpu/xe/xe_gt_mcr.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==============================================
+GT Multicast/Replicated (MCR) Register Support
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c
+   :doc: GT Multicast/Replicated (MCR) Register Support
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst
new file mode 100644
index 0000000000000..a098cfd2df04d
--- /dev/null
+++ b/Documentation/gpu/xe/xe_map.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Map Layer
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h
+   :doc: Map layer
diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst
new file mode 100644
index 0000000000000..f92faec0ac943
--- /dev/null
+++ b/Documentation/gpu/xe/xe_migrate.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=============
+Migrate Layer
+=============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h
+   :doc: Migrate Layer
diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst
new file mode 100644
index 0000000000000..6c8fd8b4a4667
--- /dev/null
+++ b/Documentation/gpu/xe/xe_mm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=================
+Memory Management
+=================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h
+   :doc: Buffer Objects (BO)
+
+Pagetable building
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c
+   :doc: Pagetable building
diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst
new file mode 100644
index 0000000000000..d2e22cc45061f
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pcode.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=====
+Pcode
+=====
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :doc: PCODE
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst
new file mode 100644
index 0000000000000..6781cdfb24f6e
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========================
+Runtime Power Management
+========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :doc: Xe Power Management
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst
new file mode 100644
index 0000000000000..7fdf4b6c1a04f
--- /dev/null
+++ b/Documentation/gpu/xe/xe_rtp.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========================
+Register Table Processing
+=========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :doc: Register Table Processing
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst
new file mode 100644
index 0000000000000..f8811cc6adccb
--- /dev/null
+++ b/Documentation/gpu/xe/xe_wa.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+====================
+Hardware workarounds
+====================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :doc: Hardware workarounds
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :internal:
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 31cfe2c2a2afa..2520db0b776e1 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -276,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig"
 
 source "drivers/gpu/drm/i915/Kconfig"
 
+source "drivers/gpu/drm/xe/Kconfig"
+
 source "drivers/gpu/drm/kmb/Kconfig"
 
 config DRM_VGEM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 8ac6f4b9546e6..104b42df2e956 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
 obj-$(CONFIG_DRM_I915)	+= i915/
+obj-$(CONFIG_DRM_XE)	+= xe/
 obj-$(CONFIG_DRM_KMB_DISPLAY)  += kmb/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_V3D)  += v3d/
diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore
new file mode 100644
index 0000000000000..81972dce1affa
--- /dev/null
+++ b/drivers/gpu/drm/xe/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*.hdrtest
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
new file mode 100644
index 0000000000000..62f54e6d62d93
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE
+	tristate "Intel Xe Graphics"
+	depends on DRM && PCI && MMU
+	select INTERVAL_TREE
+	# we need shmfs for the swappable backing store, and in particular
+	# the shmem_readpage() which depends upon tmpfs
+	select SHMEM
+	select TMPFS
+	select DRM_BUDDY
+	select DRM_KMS_HELPER
+	select DRM_PANEL
+	select DRM_SUBALLOC_HELPER
+	select RELAY
+	select IRQ_WORK
+	select SYNC_FILE
+	select IOSF_MBI
+	select CRC32
+	select SND_HDA_I915 if SND_HDA_CORE
+	select CEC_CORE if CEC_NOTIFIER
+	select VMAP_PFN
+	select DRM_TTM
+	select DRM_TTM_HELPER
+	select DRM_SCHED
+	select MMU_NOTIFIER
+	help
+	  Experimental driver for Intel Xe series GPUs
+
+	  If "M" is selected, the module will be called xe.
+
+config DRM_XE_FORCE_PROBE
+	string "Force probe xe for selected Intel hardware IDs"
+	depends on DRM_XE
+	help
+	  This is the default value for the xe.force_probe module
+	  parameter. Using the module parameter overrides this option.
+
+	  Force probe the xe for Intel graphics devices that are
+	  recognized but not properly supported by this kernel version. It is
+	  recommended to upgrade to a kernel version with proper support as soon
+	  as it is available.
+
+	  It can also be used to block the probe of recognized and fully
+	  supported devices.
+
+	  Use "" to disable force probe. If in doubt, use this.
+
+	  Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed
+	  devices. For example, "4500" or "4500,4571".
+
+	  Use "*" to force probe the driver for all known devices.
+
+	  Use "!" right before the ID to block the probe of the device. For
+	  example, "4500,!4571" forces the probe of 4500 and blocks the probe of
+	  4571.
+
+	  Use "!*" to block the probe of the driver for all known devices.
+
+menu "drm/Xe Debugging"
+depends on DRM_XE
+depends on EXPERT
+source "drivers/gpu/drm/xe/Kconfig.debug"
+endmenu
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
new file mode 100644
index 0000000000000..b61fd43a76fed
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE_WERROR
+	bool "Force GCC to throw an error instead of a warning when compiling"
+	# As this may inadvertently break the build, only allow the user
+	# to shoot oneself in the foot iff they aim really hard
+	depends on EXPERT
+	# We use the dependency on !COMPILE_TEST to not be enabled in
+	# allmodconfig or allyesconfig configurations
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Add -Werror to the build flags for (and only for) xe.ko.
+	  Do not enable this unless you are writing code for the xe.ko module.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG
+	bool "Enable additional driver debugging"
+	depends on DRM_XE
+	depends on EXPERT
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_VM
+	bool "Enable extra VM debugging info"
+	default n
+	help
+	  Enable extra VM debugging info
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_MEM
+	bool "Enable passing SYS/LMEM addresses to user space"
+	default n
+	help
+	  Pass object location trough uapi. Intended for extended
+	  testing and development only.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_SIMPLE_ERROR_CAPTURE
+	bool "Enable simple error capture to dmesg on job timeout"
+	default n
+	help
+	  Choose this option when debugging an unexpected job timeout
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_KUNIT_TEST
+        tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
+	depends on DRM_XE && KUNIT
+	default KUNIT_ALL_TESTS
+	select DRM_EXPORT_FOR_TESTS if m
+	help
+	  Choose this option to allow the driver to perform selftests under
+	  the kunit framework
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_LARGE_GUC_BUFFER
+        bool "Enable larger guc log buffer"
+        default n
+        help
+          Choose this option when debugging guc issues.
+          Buffer should be large enough for complex issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
+config DRM_XE_USERPTR_INVAL_INJECT
+       bool "Inject userptr invalidation -EINVAL errors"
+       default n
+       help
+         Choose this option when debugging error paths that
+	 are hit during checks for userptr invalidations.
+
+	 Recomended for driver developers only.
+	 If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
new file mode 100644
index 0000000000000..228a87f2fe7b3
--- /dev/null
+++ b/drivers/gpu/drm/xe/Makefile
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+# Add a set of useful warning flags and enable -Werror for CI to prevent
+# trivial mistakes from creeping in. We have to do this piecemeal as we reject
+# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we
+# need to filter out dubious warnings.  Still it is our interest
+# to keep running locally with W=1 C=1 until we are completely clean.
+#
+# Note the danger in using -Wall -Wextra is that when CI updates gcc we
+# will most likely get a sudden build breakage... Hopefully we will fix
+# new warnings before CI updates!
+subdir-ccflags-y := -Wall -Wextra
+# making these call cc-disable-warning breaks when trying to build xe.mod.o
+# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree,
+# so it was somehow fixed by the changes in the build system. Move it back to
+# $(call cc-disable-warning, ...) after rebase.
+subdir-ccflags-y += -Wno-unused-parameter
+subdir-ccflags-y += -Wno-type-limits
+#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
+#subdir-ccflags-y += $(call cc-disable-warning, type-limits)
+subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
+subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
+# clang warnings
+subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
+subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
+subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
+subdir-ccflags-y += $(call cc-disable-warning, frame-address)
+subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
+
+# Fine grained warnings disable
+CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init)
+
+subdir-ccflags-y += -I$(srctree)/$(src)
+
+# Please keep these build lists sorted!
+
+# core driver code
+
+xe-y += xe_bb.o \
+	xe_bo.o \
+	xe_bo_evict.o \
+	xe_debugfs.o \
+	xe_device.o \
+	xe_dma_buf.o \
+	xe_engine.o \
+	xe_exec.o \
+	xe_execlist.o \
+	xe_force_wake.o \
+	xe_ggtt.o \
+	xe_gpu_scheduler.o \
+	xe_gt.o \
+	xe_gt_clock.o \
+	xe_gt_debugfs.o \
+	xe_gt_mcr.o \
+	xe_gt_pagefault.o \
+	xe_gt_sysfs.o \
+	xe_gt_topology.o \
+	xe_guc.o \
+	xe_guc_ads.o \
+	xe_guc_ct.o \
+	xe_guc_debugfs.o \
+	xe_guc_hwconfig.o \
+	xe_guc_log.o \
+	xe_guc_pc.o \
+	xe_guc_submit.o \
+	xe_hw_engine.o \
+	xe_hw_fence.o \
+	xe_huc.o \
+	xe_huc_debugfs.o \
+	xe_irq.o \
+	xe_lrc.o \
+	xe_migrate.o \
+	xe_mmio.o \
+	xe_mocs.o \
+	xe_module.o \
+	xe_pci.o \
+	xe_pcode.o \
+	xe_pm.o \
+	xe_preempt_fence.o \
+	xe_pt.o \
+	xe_pt_walk.o \
+	xe_query.o \
+	xe_reg_sr.o \
+	xe_reg_whitelist.o \
+	xe_rtp.o \
+	xe_ring_ops.o \
+	xe_sa.o \
+	xe_sched_job.o \
+	xe_step.o \
+	xe_sync.o \
+	xe_trace.o \
+	xe_ttm_gtt_mgr.o \
+	xe_ttm_vram_mgr.o \
+	xe_tuning.o \
+	xe_uc.o \
+	xe_uc_debugfs.o \
+	xe_uc_fw.o \
+	xe_vm.o \
+	xe_vm_madvise.o \
+	xe_wait_user_fence.o \
+	xe_wa.o \
+	xe_wopcm.o
+
+# XXX: Needed for i915 register definitions. Will be removed after xe-regs.
+subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/
+
+obj-$(CONFIG_DRM_XE) += xe.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+\
+# header test
+always-$(CONFIG_DRM_XE_WERROR) += \
+	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h'))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
new file mode 100644
index 0000000000000..3062e0e0d467e
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ACTIONS_ABI_H
+#define _ABI_GUC_ACTIONS_ABI_H
+
+/**
+ * DOC: HOST2GUC_SELF_CFG
+ *
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508            |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_           |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **KLV_LEN** - KLV length                                     |
+ *  |   |       |                                                              |
+ *  |   |       |   - 32 bit KLV = 1                                           |
+ *  |   |       |   - 64 bit KLV = 2                                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **VALUE32** - Bits 31-0 of the KLV value                     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2)  |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_SELF_CFG			0x0508
+
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY		(0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN		(0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32		GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64		GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: HOST2GUC_CONTROL_CTB
+ *
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **CONTROL** - control `CTB based communication`_             |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_CONTROL_DISABLE` = 0                           |
+ *  |   |       |   - _`GUC_CTB_CONTROL_ENABLE` = 1                            |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB			0x4509
+
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL	GUC_HXG_REQUEST_MSG_n_DATAn
+#define   GUC_CTB_CONTROL_DISABLE			0u
+#define   GUC_CTB_CONTROL_ENABLE			1u
+
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/* legacy definitions */
+
+enum xe_guc_action {
+	XE_GUC_ACTION_DEFAULT = 0x0,
+	XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
+	XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
+	XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+	XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+	XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+	XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
+	XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
+	XE_GUC_ACTION_ENTER_S_STATE = 0x501,
+	XE_GUC_ACTION_EXIT_S_STATE = 0x502,
+	XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+	XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+	XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
+	XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+	XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
+	XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
+	XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
+	XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
+	XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
+	XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	XE_GUC_ACTION_GET_HWCONFIG = 0x4100,
+	XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
+	XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+	XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
+	XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
+	XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
+	XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
+	XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
+	XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004,
+	XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+	XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
+	XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+	XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+	XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+	XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+	XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+	XE_GUC_ACTION_LIMIT
+};
+
+enum xe_guc_rc_options {
+	XE_GUCRC_HOST_CONTROL,
+	XE_GUCRC_FIRMWARE_CONTROL,
+};
+
+enum xe_guc_preempt_options {
+	XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
+	XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
+};
+
+enum xe_guc_report_status {
+	XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
+	XE_GUC_REPORT_STATUS_ACKED = 0x1,
+	XE_GUC_REPORT_STATUS_ERROR = 0x2,
+	XE_GUC_REPORT_STATUS_COMPLETE = 0x4,
+};
+
+enum xe_guc_sleep_state_status {
+	XE_GUC_SLEEP_STATE_SUCCESS = 0x1,
+	XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+	XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
+#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
+};
+
+#define GUC_LOG_CONTROL_LOGGING_ENABLED	(1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT	4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK	(0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING	(1 << 8)
+
+#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
+#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
+/* Flush PPC or SMRO caches along with TLB invalidation request */
+#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31)
+
+enum xe_guc_tlb_invalidation_type {
+	XE_GUC_TLB_INVAL_FULL = 0x0,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2,
+	XE_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum xe_guc_tlb_inval_mode {
+	XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+	XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
new file mode 100644
index 0000000000000..811add10c30dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: SLPC SHARED DATA STRUCTURE
+ *
+ *  +----+------+--------------------------------------------------------------+
+ *  | CL | Bytes| Description                                                  |
+ *  +====+======+==============================================================+
+ *  | 1  | 0-3  | SHARED DATA SIZE                                             |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 4-7  | GLOBAL STATE                                                 |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 8-11 | DISPLAY DATA ADDRESS                                         |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 12:63| PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    | 0:63 | PADDING(PLATFORM INFO)                                       |
+ *  +----+------+--------------------------------------------------------------+
+ *  | 3  | 0-3  | TASK STATE DATA                                              |
+ *  +    +------+--------------------------------------------------------------+
+ *  |    | 4:63 | PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS                               |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    |      | PADDING + EXTRA RESERVED PAGE                                |
+ *  +----+------+--------------------------------------------------------------+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS		256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+		(SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES			4096
+#define SLPC_CACHELINE_SIZE_BYTES		64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE	SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX		(2 * SLPC_PAGE_SIZE_BYTES)
+
+/*
+ * Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES	(((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+						+ ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+		+ (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \
+					SLPC_CACHELINE_SIZE_BYTES)
+
+#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER	(SLPC_SHARED_DATA_SIZE_BYTE_MAX - \
+					(SLPC_SHARED_DATA_SIZE_BYTE_HEADER \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \
+					+ SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \
+					+ SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE))
+
+enum slpc_task_enable {
+	SLPC_PARAM_TASK_DEFAULT = 0,
+	SLPC_PARAM_TASK_ENABLED,
+	SLPC_PARAM_TASK_DISABLED,
+	SLPC_PARAM_TASK_UNKNOWN
+};
+
+enum slpc_global_state {
+	SLPC_GLOBAL_STATE_NOT_RUNNING = 0,
+	SLPC_GLOBAL_STATE_INITIALIZING = 1,
+	SLPC_GLOBAL_STATE_RESETTING = 2,
+	SLPC_GLOBAL_STATE_RUNNING = 3,
+	SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4,
+	SLPC_GLOBAL_STATE_ERROR = 5
+};
+
+enum slpc_param_id {
+	SLPC_PARAM_TASK_ENABLE_GTPERF = 0,
+	SLPC_PARAM_TASK_DISABLE_GTPERF = 1,
+	SLPC_PARAM_TASK_ENABLE_BALANCER = 2,
+	SLPC_PARAM_TASK_DISABLE_BALANCER = 3,
+	SLPC_PARAM_TASK_ENABLE_DCC = 4,
+	SLPC_PARAM_TASK_DISABLE_DCC = 5,
+	SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6,
+	SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7,
+	SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8,
+	SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9,
+	SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10,
+	SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11,
+	SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12,
+	SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13,
+	SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14,
+	SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15,
+	SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16,
+	SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17,
+	SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18,
+	SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19,
+	SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20,
+	SLPC_PARAM_PWRGATE_RC_MODE = 21,
+	SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22,
+	SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23,
+	SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24,
+	SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25,
+	SLPC_PARAM_STRATEGIES = 26,
+	SLPC_PARAM_POWER_PROFILE = 27,
+	SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28,
+	SLPC_MAX_PARAM = 32,
+};
+
+enum slpc_media_ratio_mode {
+	SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
+};
+
+enum slpc_gucrc_mode {
+	SLPC_GUCRC_MODE_HW = 0,
+	SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+	SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+	SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+	SLPC_GUCRC_MODE_MAX,
+};
+
+enum slpc_event_id {
+	SLPC_EVENT_RESET = 0,
+	SLPC_EVENT_SHUTDOWN = 1,
+	SLPC_EVENT_PLATFORM_INFO_CHANGE = 2,
+	SLPC_EVENT_DISPLAY_MODE_CHANGE = 3,
+	SLPC_EVENT_FLIP_COMPLETE = 4,
+	SLPC_EVENT_QUERY_TASK_STATE = 5,
+	SLPC_EVENT_PARAMETER_SET = 6,
+	SLPC_EVENT_PARAMETER_UNSET = 7,
+};
+
+struct slpc_task_state_data {
+	union {
+		u32 task_status_padding;
+		struct {
+			u32 status;
+#define SLPC_GTPERF_TASK_ENABLED	REG_BIT(0)
+#define SLPC_DCC_TASK_ENABLED		REG_BIT(11)
+#define SLPC_IN_DCC			REG_BIT(12)
+#define SLPC_BALANCER_ENABLED		REG_BIT(15)
+#define SLPC_IBC_TASK_ENABLED		REG_BIT(16)
+#define SLPC_BALANCER_IA_LMT_ENABLED	REG_BIT(17)
+#define SLPC_BALANCER_IA_LMT_ACTIVE	REG_BIT(18)
+		};
+	};
+	union {
+		u32 freq_padding;
+		struct {
+#define SLPC_MAX_UNSLICE_FREQ_MASK	REG_GENMASK(7, 0)
+#define SLPC_MIN_UNSLICE_FREQ_MASK	REG_GENMASK(15, 8)
+#define SLPC_MAX_SLICE_FREQ_MASK	REG_GENMASK(23, 16)
+#define SLPC_MIN_SLICE_FREQ_MASK	REG_GENMASK(31, 24)
+			u32 freq;
+		};
+	};
+} __packed;
+
+struct slpc_shared_data_header {
+	/* Total size in bytes of this shared buffer. */
+	u32 size;
+	u32 global_state;
+	u32 display_data_addr;
+} __packed;
+
+struct slpc_override_params {
+	u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE];
+	u32 values[SLPC_MAX_OVERRIDE_PARAMETERS];
+} __packed;
+
+struct slpc_shared_data {
+	struct slpc_shared_data_header header;
+	u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER -
+				sizeof(struct slpc_shared_data_header)];
+
+	u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO];
+
+	struct slpc_task_state_data task_state_data;
+	u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE -
+				sizeof(struct slpc_task_state_data)];
+
+	struct slpc_override_params override_params;
+	u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES -
+				sizeof(struct slpc_override_params)];
+
+	u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER];
+
+	/* PAGE 2 (4096 bytes), mode based parameter will be removed soon */
+	u8 reserved_mode_definition[4096];
+} __packed;
+
+/**
+ * DOC: SLPC H2G MESSAGE FORMAT
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:8 | **EVENT_ID**                                                 |
+ *  +   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **EVENT_ARGC** - number of data arguments                    |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **EVENT_DATA1**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | ...                                                          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |2+n|  31:0 | **EVENT_DATAn**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST		0x3003
+
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \
+				(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS		9
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \
+		(HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
+			HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID		(0xff << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC	(0xff << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N	GUC_HXG_REQUEST_MSG_n_DATAn
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
new file mode 100644
index 0000000000000..41244055cc0c7
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H
+#define _ABI_GUC_COMMUNICATION_CTB_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: CT Buffer
+ *
+ * Circular buffer used to send `CTB Message`_
+ */
+
+/**
+ * DOC: CTB Descriptor
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 | **HEAD** - offset (in dwords) to the last dword that was     |
+ *  |   |       | read from the `CT Buffer`_.                                  |
+ *  |   |       | It can only be updated by the receiver.                      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **TAIL** - offset (in dwords) to the last dword that was     |
+ *  |   |       | written to the `CT Buffer`_.                                 |
+ *  |   |       | It can only be updated by the sender.                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **STATUS** - status of the CTB                               |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation)        |
+ *  |   |       |   - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large)     |
+ *  |   |       |   - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message)      |
+ *  |   |       |   - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|       | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 15|  31:0 | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+struct guc_ct_buffer_desc {
+	u32 head;
+	u32 tail;
+	u32 status;
+#define GUC_CTB_STATUS_NO_ERROR				0
+#define GUC_CTB_STATUS_OVERFLOW				(1 << 0)
+#define GUC_CTB_STATUS_UNDERFLOW			(1 << 1)
+#define GUC_CTB_STATUS_MISMATCH				(1 << 2)
+	u32 reserved[13];
+} __packed;
+static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
+
+/**
+ * DOC: CTB Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **FENCE** - message identifier                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | **FORMAT** - format of the CTB message                       |
+ *  |   |       |  - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | **RESERVED**                                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **NUM_DWORDS** - length of the CTB message (w/o header)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | optional (depends on FORMAT)                                 |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HDR_LEN				1u
+#define GUC_CTB_MSG_MIN_LEN			GUC_CTB_HDR_LEN
+#define GUC_CTB_MSG_MAX_LEN			256u
+#define GUC_CTB_MSG_0_FENCE			(0xffff << 16)
+#define GUC_CTB_MSG_0_FORMAT			(0xf << 12)
+#define   GUC_CTB_FORMAT_HXG			0u
+#define GUC_CTB_MSG_0_RESERVED			(0xf << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS		(0xff << 0)
+
+/**
+ * DOC: CTB HXG Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | FENCE                                                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | RESERVED = MBZ                                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HXG_MSG_MIN_LEN		(GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN)
+#define GUC_CTB_HXG_MSG_MAX_LEN		GUC_CTB_MSG_MAX_LEN
+
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is stored in the buffer descriptor.
+ * Buffer descriptor holds tail and head fields that represents active data
+ * stream. The tail field is updated by the data producer (sender), and head
+ * field is updated by the data consumer (receiver)::
+ *
+ *      +------------+
+ *      | DESCRIPTOR |          +=================+============+========+
+ *      +============+          |                 | MESSAGE(s) |        |
+ *      | address    |--------->+=================+============+========+
+ *      +------------+
+ *      | head       |          ^-----head--------^
+ *      +------------+
+ *      | tail       |          ^---------tail-----------------^
+ *      +------------+
+ *      | size       |          ^---------------size--------------------^
+ *      +------------+
+ *
+ * Each message in data stream starts with the single u32 treated as a header,
+ * followed by optional set of u32 data that makes message specific payload::
+ *
+ *      +------------+---------+---------+---------+
+ *      |         MESSAGE                          |
+ *      +------------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD       |
+ *      +   HEADER   +---------+---------+---------+
+ *      |            |    0    |   ...   |    n    |
+ *      +======+=====+=========+=========+=========+
+ *      | 31:16| code|         |         |         |
+ *      +------+-----+         |         |         |
+ *      |  15:5|flags|         |         |         |
+ *      +------+-----+         |         |         |
+ *      |   4:0|  len|         |         |         |
+ *      +------+-----+---------+---------+---------+
+ *
+ *                   ^-------------len-------------^
+ *
+ * The message header consists of:
+ *
+ * - **len**, indicates length of the message payload (in u32)
+ * - **code**, indicates message code
+ * - **flags**, holds various bits to control message handling
+ */
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]	message len (in dwords)
+ * bit[7..5]	reserved
+ * bit[8]	response (G2H only)
+ * bit[8]	write fence to desc (H2G only)
+ * bit[9]	write status to H2G buff (H2G only)
+ * bit[10]	send status back via G2H (H2G only)
+ * bit[15..11]	reserved
+ * bit[31..16]	action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT			0
+#define GUC_CT_MSG_LEN_MASK			0x1F
+#define GUC_CT_MSG_IS_RESPONSE			(1 << 8)
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
+#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT			16
+#define GUC_CT_MSG_ACTION_MASK			0xFFFF
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
new file mode 100644
index 0000000000000..ef538e34f8947
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+
+/**
+ * DOC: GuC MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC relies on special
+ * hardware registers which format could be defined by the software
+ * (so called scratch registers).
+ *
+ * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H)
+ * messages, which maximum length depends on number of available scratch
+ * registers, is directly written into those scratch registers.
+ *
+ * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8,
+ * but no H2G command takes more than 4 parameters and the GuC firmware
+ * itself uses an 4-element array to store the H2G message.
+ *
+ * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which
+ * are, regardless on lower count, preferred over legacy ones.
+ *
+ * The MMIO based communication is mainly used during driver initialization
+ * phase to setup the `CTB based communication`_ that will be used afterwards.
+ */
+
+#define GUC_MAX_MMIO_MSG_LEN		4
+
+/**
+ * DOC: MMIO HXG Message
+ *
+ * Format of the MMIO messages follows definitions of `HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
new file mode 100644
index 0000000000000..ec83551bf9c0b
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ERRORS_ABI_H
+#define _ABI_GUC_ERRORS_ABI_H
+
+enum xe_guc_response_status {
+	XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+	XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+};
+
+enum xe_guc_load_status {
+	XE_GUC_LOAD_STATUS_DEFAULT                          = 0x00,
+	XE_GUC_LOAD_STATUS_START                            = 0x01,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
+	XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	XE_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
+	XE_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
+	XE_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
+	XE_GUC_LOAD_STATUS_GUCINT_DONE                      = 0x40,
+	XE_GUC_LOAD_STATUS_DPC_READY                        = 0x50,
+	XE_GUC_LOAD_STATUS_DPC_ERROR                        = 0x60,
+	XE_GUC_LOAD_STATUS_EXCEPTION                        = 0x70,
+	XE_GUC_LOAD_STATUS_INIT_DATA_INVALID                = 0x71,
+	XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED        = 0x72,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+	XE_GUC_LOAD_STATUS_MPU_DATA_INVALID                 = 0x73,
+	XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+	XE_GUC_LOAD_STATUS_READY                            = 0xF0,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
new file mode 100644
index 0000000000000..47094b9b044cb
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+#include <linux/types.h>
+
+/**
+ * DOC: GuC KLV
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **KEY** - KLV key identifier                                 |
+ *  |   |       |   - `GuC Self Config KLVs`_                                  |
+ *  |   |       |   - `GuC VGT Policy KLVs`_                                   |
+ *  |   |       |   - `GuC VF Configuration KLVs`_                             |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LEN** - length of VALUE (in 32bit dwords)                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE** - actual value of the KLV (format depends on KEY)  |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN				1u
+#define GUC_KLV_0_KEY				(0xffff << 16)
+#define GUC_KLV_0_LEN				(0xffff << 0)
+#define GUC_KLV_n_VALUE				(0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      status vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      source vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ *      Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903
+ *      Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904
+ *      Refers to size of H2G `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905
+ *      Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906
+ *      Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907
+ *      Refers to size of G2H `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY		0x0900
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY		0x0901
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY		0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY	0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY		0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN		1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY		0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY	0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY		0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN		1u
+
+/*
+ * Per context scheduling policy update keys.
+ */
+enum  {
+	GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM			= 0x2001,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT			= 0x2002,
+	GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY			= 0x2003,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY	= 0x2004,
+	GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY			= 0x2005,
+
+	GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
+};
+
+/**
+ * DOC: GuC VGT Policy KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
+ *
+ * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
+ *      This config sets whether strict scheduling is enabled whereby any VF
+ *      that doesn’t have work to submit is still allocated a fixed execution
+ *      time-slice to ensure active VFs execution is always consitent even
+ *      during other VF reprovisiong / rebooting events. Changing this KLV
+ *      impacts all VFs and takes effect on the next VF-Switch event.
+ *
+ *      :0: don't schedule idle (default)
+ *      :1: schedule if idle
+ *
+ * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002
+ *      This config sets the sample period for tracking adverse event counters.
+ *       A sample period is the period in millisecs during which events are counted.
+ *       This is applicable for all the VFs.
+ *
+ *      :0: adverse events are not counted (default)
+ *      :n: sample period in milliseconds
+ *
+ * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00
+ *      This enum is to reset utilized HW engine after VF Switch (i.e to clean
+ *      up Stale HW register left behind by previous VF)
+ *
+ *      :0: don't reset (default)
+ *      :1: reset
+ */
+
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY		0x8001
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN		1u
+
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY	0x8002
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN	1u
+
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY	0x8D00
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN	1u
+
+/**
+ * DOC: GuC VF Configuration KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001
+ *      A 4K aligned start GTT address/offset assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002
+ *      A 4K aligned size of GGTT assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003
+ *      A 2M aligned size of local memory assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004
+ *      Refers to the number of contexts allocated to this VF.
+ *
+ *      :0: no contexts (default)
+ *      :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005
+ *      For multi-tiled products, this field contains the bitwise-OR of tiles
+ *      assigned to the VF. Bit-0-set means VF has access to Tile-0,
+ *      Bit-31-set means VF has access to Tile-31, and etc.
+ *      At least one tile will always be allocated.
+ *      If all bits are zero, VF KMD should treat this as a fatal error.
+ *      For, single-tile products this KLV config is ignored.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006
+ *      Refers to the number of doorbells allocated to this VF.
+ *
+ *      :0: no doorbells (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01
+ *      This config sets the VFs-execution-quantum in milliseconds.
+ *      GUC will attempt to obey the maximum values as much as HW is capable
+ *      of and this will never be perfectly-exact (accumulated nano-second
+ *      granularity) since the GPUs clock time runs off a different crystal
+ *      from the CPUs clock. Changing this KLV on a VF that is currently
+ *      running a context wont take effect until a new context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel. In such a scenario, the
+ *      PF would need to trigger a VM PAUSE and then change the KLV to force
+ *      it to take effect. Such cases might typically happen on a 1PF+1VF
+ *      Virtualization config enabled for heavier workloads like AI/ML.
+ *
+ *      :0: infinite exec quantum (default)
+ *
+ * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
+ *      This config sets the VF-preemption-timeout in microseconds.
+ *      GUC will attempt to obey the minimum and maximum values as much as
+ *      HW is capable and this will never be perfectly-exact (accumulated
+ *      nano-second granularity) since the GPUs clock time runs off a
+ *      different crystal from the CPUs clock. Changing this KLV on a VF
+ *      that is currently running a context wont take effect until a new
+ *      context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel.
+ *      In this case, the PF would need to trigger a VM PAUSE and then change
+ *      the KLV to force it to take effect. Such cases might typically happen
+ *      on a 1PF+1VF Virtualization config enabled for heavier workloads like
+ *      AI/ML.
+ *
+ *      :0: no preemption timeout (default)
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
+ *      This config sets threshold for CAT errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04
+ *      This config sets threshold for engine reset caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05
+ *      This config sets threshold for page fault errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06
+ *      This config sets threshold for H2G interrupts triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07
+ *      This config sets threshold for GT interrupts triggered by the VF's
+ *      workloads.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08
+ *      This config sets threshold for doorbell's ring triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A
+ *      Refers to the start index of doorbell assigned to this VF.
+ *
+ *      :0: (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B
+ *      Refers to the start index in context array allocated to this VF’s use.
+ *
+ *      :0: (default)
+ *      :1-65535: number of contexts (Gen12)
+ */
+
+#define GUC_KLV_VF_CFG_GGTT_START_KEY		0x0001
+#define GUC_KLV_VF_CFG_GGTT_START_LEN		2u
+
+#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY		0x0002
+#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY		0x0003
+#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY		0x0004
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN		1u
+
+#define GUC_KLV_VF_CFG_TILE_MASK_KEY		0x0005
+#define GUC_KLV_VF_CFG_TILE_MASK_LEN		1u
+
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY	0x0006
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN	1u
+
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY		0x8a01
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN		1u
+
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY	0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY		0x8a03
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY	0x8a04
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY		0x8a05
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY		0x8a06
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY		0x8a07
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY	0x8a08
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY	0x8a0a
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY	0x8a0b
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN	1u
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
new file mode 100644
index 0000000000000..3d199016cf881
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_MESSAGES_ABI_H
+#define _ABI_GUC_MESSAGES_ABI_H
+
+/**
+ * DOC: HXG Message
+ *
+ * All messages exchanged with GuC are defined using 32 bit dwords.
+ * First dword is treated as a message header. Remaining dwords are optional.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  |   |       |                                                              |
+ *  | 0 |    31 | **ORIGIN** - originator of the message                       |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_HOST` = 0                               |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_GUC` = 1                                |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | **TYPE** - message type                                      |
+ *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
+ *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7                     |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **AUX** - auxiliary data (depends on TYPE)                   |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **PAYLOAD** - optional payload (depends on TYPE)             |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_MSG_MIN_LEN			1u
+#define GUC_HXG_MSG_0_ORIGIN			(0x1 << 31)
+#define   GUC_HXG_ORIGIN_HOST			0u
+#define   GUC_HXG_ORIGIN_GUC			1u
+#define GUC_HXG_MSG_0_TYPE			(0x7 << 28)
+#define   GUC_HXG_TYPE_REQUEST			0u
+#define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
+#define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
+#define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
+#define   GUC_HXG_TYPE_RESPONSE_SUCCESS		7u
+#define GUC_HXG_MSG_0_AUX			(0xfffffff << 0)
+#define GUC_HXG_MSG_n_PAYLOAD			(0xffffffff << 0)
+
+/**
+ * DOC: HXG Request
+ *
+ * The `HXG Request`_ message should be used to initiate synchronous activity
+ * for which confirmation or return data is expected.
+ *
+ * The recipient of this message shall use `HXG Response`_, `HXG Failure`_
+ * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_
+ * message as a intermediate reply.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - request data (depends on ACTION)                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - requested action code                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional data (depends on ACTION)                |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_REQUEST_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_REQUEST_MSG_0_DATA0		(0xfff << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION		(0xffff << 0)
+#define GUC_HXG_REQUEST_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Event
+ *
+ * The `HXG Event`_ message should be used to initiate asynchronous activity
+ * that does not involves immediate confirmation nor data.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - event data (depends on ACTION)                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - event action code                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional event  data (depends on ACTION)         |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_EVENT_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_EVENT_MSG_0_DATA0		(0xfff << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION		(0xffff << 0)
+#define GUC_HXG_EVENT_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Busy
+ *
+ * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_
+ * message if the recipient expects that it processing will be longer than default
+ * timeout.
+ *
+ * The @COUNTER field may be used as a progress indicator.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **COUNTER** - progress indicator                             |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_BUSY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_BUSY_MSG_0_COUNTER		GUC_HXG_MSG_0_AUX
+
+/**
+ * DOC: HXG Retry
+ *
+ * The `HXG Retry`_ message should be used by recipient to indicate that the
+ * `HXG Request`_ message was dropped and it should be resent again.
+ *
+ * The @REASON field may be used to provide additional information.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **REASON** - reason for retry                                |
+ *  |   |       |  - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0                   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RETRY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RETRY_MSG_0_REASON		GUC_HXG_MSG_0_AUX
+#define   GUC_HXG_RETRY_REASON_UNSPECIFIED	0u
+
+/**
+ * DOC: HXG Failure
+ *
+ * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_
+ * message that could not be processed due to an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **HINT** - additional error hint                             |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ERROR** - error/result code                                |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_FAILURE_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_FAILURE_MSG_0_HINT		(0xfff << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR		(0xffff << 0)
+
+/**
+ * DOC: HXG Response
+ *
+ * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_
+ * message that was successfully processed without an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RESPONSE_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RESPONSE_MSG_0_DATA0		GUC_HXG_MSG_0_AUX
+#define GUC_HXG_RESPONSE_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/* deprecated */
+#define INTEL_GUC_MSG_TYPE_SHIFT	28
+#define INTEL_GUC_MSG_TYPE_MASK		(0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT	16
+#define INTEL_GUC_MSG_DATA_MASK		(0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT	0
+#define INTEL_GUC_MSG_CODE_MASK		(0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+enum intel_guc_msg_type {
+	INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+	INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
new file mode 100644
index 0000000000000..47056b6459e3c
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \
+	xe_migrate_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
new file mode 100644
index 0000000000000..87ac21cc8ca93
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_bo_evict.h"
+#include "xe_pci.h"
+
+static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
+			    bool clear, u64 get_val, u64 assign_val,
+			    struct kunit *test)
+{
+	struct dma_fence *fence;
+	struct ttm_tt *ttm;
+	struct page *page;
+	pgoff_t ccs_page;
+	long timeout;
+	u64 *cpu_map;
+	int ret;
+	u32 offset;
+
+	/* Move bo to VRAM if not already there. */
+	ret = xe_bo_validate(bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate bo.\n");
+		return ret;
+	}
+
+	/* Optionally clear bo *and* CCS data in VRAM. */
+	if (clear) {
+		fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0);
+		if (IS_ERR(fence)) {
+			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
+			return PTR_ERR(fence);
+		}
+		dma_fence_put(fence);
+	}
+
+	/* Evict to system. CCS data should be copied. */
+	ret = xe_bo_evict(bo, true);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to evict bo.\n");
+		return ret;
+	}
+
+	/* Sync all migration blits */
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL,
+					true,
+					5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+		return -ETIME;
+	}
+
+	/*
+	 * Bo with CCS data is now in system memory. Verify backing store
+	 * and data integrity. Then assign for the next testing round while
+	 * we still have a CPU map.
+	 */
+	ttm = bo->ttm.ttm;
+	if (!ttm || !ttm_tt_is_populated(ttm)) {
+		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
+		return -EINVAL;
+	}
+
+	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
+	if (ccs_page >= ttm->num_pages) {
+		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
+		return -EINVAL;
+	}
+
+	page = ttm->pages[ccs_page];
+	cpu_map = kmap_local_page(page);
+
+	/* Check first CCS value */
+	if (cpu_map[0] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[0]);
+		ret = -EINVAL;
+	}
+
+	/* Check last CCS value, or at least last value in page. */
+	offset = xe_device_ccs_bytes(gt->xe, bo->size);
+	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
+	if (cpu_map[offset] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[offset]);
+		ret = -EINVAL;
+	}
+
+	cpu_map[0] = assign_val;
+	cpu_map[offset] = assign_val;
+	kunmap_local(cpu_map);
+
+	return ret;
+}
+
+static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt,
+			    struct kunit *test)
+{
+	struct xe_bo *bo;
+	u32 vram_bit;
+	int ret;
+
+	/* TODO: Sanity check */
+	vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id;
+	kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id,
+		   gt->info.vram_id);
+
+	bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device,
+				 vram_bit);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "Failed to create bo.\n");
+		return;
+	}
+
+	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
+	ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
+			       test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data survives migration.\n");
+	ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL,
+			       0xdeadbeefdeadbeefULL, test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
+	ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test);
+
+out_unlock:
+	xe_bo_unlock_no_vm(bo);
+	xe_bo_put(bo);
+}
+
+static int ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_gt *gt;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	for_each_gt(gt, xe, id)
+		ccs_test_run_gt(xe, gt, test);
+
+	return 0;
+}
+
+void xe_ccs_migrate_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(ccs_test_run_device);
+}
+EXPORT_SYMBOL(xe_ccs_migrate_kunit);
+
+static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test)
+{
+	struct xe_bo *bo, *external;
+	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
+		XE_BO_CREATE_VRAM_IF_DGFX(gt);
+	struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate);
+	struct ww_acquire_ctx ww;
+	int err, i;
+
+	kunit_info(test, "Testing device %s gt id %u vram id %u\n",
+		   dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id);
+
+	for (i = 0; i < 2; ++i) {
+		xe_vm_lock(vm, &ww, 0, false);
+		bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device,
+				  bo_flags);
+		xe_vm_unlock(vm, &ww);
+		if (IS_ERR(bo)) {
+			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
+			break;
+		}
+
+		external = xe_bo_create(xe, NULL, NULL, 0x10000,
+					ttm_bo_type_device, bo_flags);
+		if (IS_ERR(external)) {
+			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
+			goto cleanup_bo;
+		}
+
+		xe_bo_lock(external, &ww, 0, false);
+		err = xe_bo_pin_external(external);
+		xe_bo_unlock(external, &ww);
+		if (err) {
+			KUNIT_FAIL(test, "external bo pin err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_external;
+		}
+
+		err = xe_bo_evict_all(xe);
+		if (err) {
+			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		err = xe_bo_restore_kernel(xe);
+		if (err) {
+			KUNIT_FAIL(test, "restore kernel err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		err = xe_bo_restore_user(xe);
+		if (err) {
+			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		if (!xe_bo_is_vram(external)) {
+			KUNIT_FAIL(test, "external bo is not vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (xe_bo_is_vram(bo)) {
+			KUNIT_FAIL(test, "bo is vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (i) {
+			down_read(&vm->lock);
+			xe_vm_lock(vm, &ww, 0, false);
+			err = xe_bo_validate(bo, bo->vm, false);
+			xe_vm_unlock(vm, &ww);
+			up_read(&vm->lock);
+			if (err) {
+				KUNIT_FAIL(test, "bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+			xe_bo_lock(external, &ww, 0, false);
+			err = xe_bo_validate(external, NULL, false);
+			xe_bo_unlock(external, &ww);
+			if (err) {
+				KUNIT_FAIL(test, "external bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+		}
+
+		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external, &ww);
+
+		xe_bo_put(external);
+		xe_bo_put(bo);
+		continue;
+
+cleanup_all:
+		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external, &ww);
+cleanup_external:
+		xe_bo_put(external);
+cleanup_bo:
+		xe_bo_put(bo);
+		break;
+	}
+
+	xe_vm_put(vm);
+
+	return 0;
+}
+
+static int evict_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_gt *gt;
+	int id;
+
+	if (!IS_DGFX(xe)) {
+		kunit_info(test, "Skipping non-discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	for_each_gt(gt, xe, id)
+		evict_test_run_gt(xe, gt, test);
+
+	return 0;
+}
+
+void xe_bo_evict_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(evict_test_run_device);
+}
+EXPORT_SYMBOL(xe_bo_evict_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
new file mode 100644
index 0000000000000..c8fa29b0b3b23
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_ccs_migrate_kunit(struct kunit *test);
+void xe_bo_evict_kunit(struct kunit *test);
+
+static struct kunit_case xe_bo_tests[] = {
+	KUNIT_CASE(xe_ccs_migrate_kunit),
+	KUNIT_CASE(xe_bo_evict_kunit),
+	{}
+};
+
+static struct kunit_suite xe_bo_test_suite = {
+	.name = "xe_bo",
+	.test_cases = xe_bo_tests,
+};
+
+kunit_test_suite(xe_bo_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
new file mode 100644
index 0000000000000..615d22e3f7318
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_pci.h"
+
+static bool p2p_enabled(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops &&
+		params->attach_ops->allow_peer2peer;
+}
+
+static bool is_dynamic(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops &&
+		params->attach_ops->move_notify;
+}
+
+static void check_residency(struct kunit *test, struct xe_bo *exported,
+			    struct xe_bo *imported, struct dma_buf *dmabuf)
+{
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	u32 mem_type;
+	int ret;
+
+	xe_bo_assert_held(exported);
+	xe_bo_assert_held(imported);
+
+	mem_type = XE_PL_VRAM0;
+	if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		/* No VRAM allowed */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !p2p_enabled(params))
+		/* No P2P */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !is_dynamic(params) &&
+		 (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+		/* Pin migrated to TT */
+		mem_type = XE_PL_TT;
+
+	if (!xe_bo_is_mem_type(exported, mem_type)) {
+		KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n");
+		return;
+	}
+
+	if (xe_bo_is_pinned(exported))
+		return;
+
+	/*
+	 * Evict exporter. Note that the gem object dma_buf member isn't
+	 * set from xe_gem_prime_export(), and it's needed for the move_notify()
+	 * functionality, so hack that up here. Evicting the exported bo will
+	 * evict also the imported bo through the move_notify() functionality if
+	 * importer is on a different device. If they're on the same device,
+	 * the exporter and the importer should be the same bo.
+	 */
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	ret = xe_bo_evict(exported, true);
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/* Verify that also importer has been evicted to SYSTEM */
+	if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
+		KUNIT_FAIL(test, "Importer wasn't properly evicted.\n");
+		return;
+	}
+
+	/* Re-validate the importer. This should move also exporter in. */
+	ret = xe_bo_validate(imported, NULL, false);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/*
+	 * If on different devices, the exporter is kept in system  if
+	 * possible, saving a migration step as the transfer is just
+	 * likely as fast from system memory.
+	 */
+	if (params->force_different_devices &&
+	    params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+
+	if (params->force_different_devices)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, exported == imported);
+}
+
+static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	struct xe_bo *bo;
+
+	/* No VRAM on this device? */
+	if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
+	    (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		return;
+
+	kunit_info(test, "running %s\n", __func__);
+	bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device,
+			  XE_BO_CREATE_USER_BIT | params->mem_mask);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	dmabuf = xe_gem_prime_export(&bo->ttm.base, 0);
+	if (IS_ERR(dmabuf)) {
+		KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n",
+			   PTR_ERR(dmabuf));
+		goto out;
+	}
+
+	import = xe_gem_prime_import(&xe->drm, dmabuf);
+	if (!IS_ERR(import)) {
+		struct xe_bo *import_bo = gem_to_xe_bo(import);
+
+		/*
+		 * Did import succeed when it shouldn't due to lack of p2p support?
+		 */
+		if (params->force_different_devices &&
+		    !p2p_enabled(params) &&
+		    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+			KUNIT_FAIL(test,
+				   "xe_gem_prime_import() succeeded when it shouldn't have\n");
+		} else {
+			int err;
+
+			/* Is everything where we expect it to be? */
+			xe_bo_lock_no_vm(import_bo, NULL);
+			err = xe_bo_validate(import_bo, NULL, false);
+			if (err && err != -EINTR && err != -ERESTARTSYS)
+				KUNIT_FAIL(test,
+					   "xe_bo_validate() failed with err=%d\n", err);
+
+			check_residency(test, bo, import_bo, dmabuf);
+			xe_bo_unlock_no_vm(import_bo);
+		}
+		drm_gem_object_put(import);
+	} else if (PTR_ERR(import) != -EOPNOTSUPP) {
+		/* Unexpected error code. */
+		KUNIT_FAIL(test,
+			   "xe_gem_prime_import failed with the wrong err=%ld\n",
+			   PTR_ERR(import));
+	} else if (!params->force_different_devices ||
+		   p2p_enabled(params) ||
+		   (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+		/* Shouldn't fail if we can reuse same bo, use p2p or use system */
+		KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
+			   PTR_ERR(import));
+	}
+	dma_buf_put(dmabuf);
+out:
+	drm_gem_object_put(&bo->ttm.base);
+}
+
+static const struct dma_buf_attach_ops nop2p_attach_ops = {
+	.allow_peer2peer = false,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+/*
+ * We test the implementation with bos of different residency and with
+ * importers with different capabilities; some lacking p2p support and some
+ * lacking dynamic capabilities (attach_ops == NULL). We also fake
+ * different devices avoiding the import shortcut that just reuses the same
+ * gem object.
+ */
+static const struct dma_buf_test_params test_params[] = {
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{}
+};
+
+static int dma_buf_run_device(struct xe_device *xe)
+{
+	const struct dma_buf_test_params *params;
+	struct kunit *test = xe_cur_kunit();
+
+	for (params = test_params; params->mem_mask; ++params) {
+		struct dma_buf_test_params p = *params;
+
+		p.base.id = XE_TEST_LIVE_DMA_BUF;
+		test->priv = &p;
+		xe_test_dmabuf_import_same_driver(xe);
+	}
+
+	/* A non-zero return would halt iteration over driver devices */
+	return 0;
+}
+
+void xe_dma_buf_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(dma_buf_run_device);
+}
+EXPORT_SYMBOL(xe_dma_buf_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
new file mode 100644
index 0000000000000..7bb292da11936
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_dma_buf_kunit(struct kunit *test);
+
+static struct kunit_case xe_dma_buf_tests[] = {
+	KUNIT_CASE(xe_dma_buf_kunit),
+	{}
+};
+
+static struct kunit_suite xe_dma_buf_test_suite = {
+	.name = "xe_dma_buf",
+	.test_cases = xe_dma_buf_tests,
+};
+
+kunit_test_suite(xe_dma_buf_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
new file mode 100644
index 0000000000000..0f3b819f0a349
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_pci.h"
+
+static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
+				const char *str, struct kunit *test)
+{
+	long ret;
+
+	if (IS_ERR(fence)) {
+		KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str,
+			   PTR_ERR(fence));
+		return true;
+	}
+	if (!fence)
+		return true;
+
+	ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
+	if (ret <= 0) {
+		KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret);
+		return true;
+	}
+
+	return false;
+}
+
+static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
+			  struct xe_bb *bb, u32 second_idx, const char *str,
+			  struct kunit *test)
+{
+	struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb,
+							      m->batch_base_ofs,
+							      second_idx);
+	struct dma_fence *fence;
+
+	if (IS_ERR(job)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(job));
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (sanity_fence_failed(xe, fence, str, test))
+		return -ETIMEDOUT;
+
+	dma_fence_put(fence);
+	kunit_info(test, "%s: Job completed\n", str);
+	return 0;
+}
+
+static void
+sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
+		   struct xe_gt *gt, struct iosys_map *map, void *dst,
+		   u32 qword_ofs, u32 num_qwords,
+		   const struct xe_vm_pgtable_update *update)
+{
+	int i;
+	u64 *ptr = dst;
+
+	for (i = 0; i < num_qwords; i++)
+		ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+}
+
+static const struct xe_migrate_pt_update_ops sanity_ops = {
+	.populate = sanity_populate_cb,
+};
+
+#define check(_retval, _expected, str, _test)				\
+	do { if ((_retval) != (_expected)) {				\
+			KUNIT_FAIL(_test, "Sanity check failed: " str	\
+				   " expected %llx, got %llx\n",	\
+				   (u64)(_expected), (u64)(_retval));	\
+		} } while (0)
+
+static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
+		      struct kunit *test)
+{
+	struct xe_device *xe = gt_to_xe(m->gt);
+	u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL;
+	bool big = bo->size >= SZ_2M;
+	struct dma_fence *fence;
+	const char *str = big ? "Copying big bo" : "Copying small bo";
+	int err;
+
+	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
+						   bo->size,
+						   ttm_bo_type_kernel,
+						   XE_BO_CREATE_SYSTEM_BIT);
+	if (IS_ERR(sysmem)) {
+		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
+			   str, PTR_ERR(sysmem));
+		return;
+	}
+
+	err = xe_bo_validate(sysmem, NULL, false);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	err = xe_bo_vmap(sysmem);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+	fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0);
+	if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
+				 "Clearing sysmem small bo", test)) {
+		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+		check(retval, expected, "sysmem first offset should be cleared",
+		      test);
+		retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64);
+		check(retval, expected, "sysmem last offset should be cleared",
+		      test);
+	}
+	dma_fence_put(fence);
+
+	/* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */
+	xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
+
+	fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource,
+				bo->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" :
+				 "Copying small bo sysmem -> vram", test)) {
+		retval = xe_map_rd(xe, &bo->vmap, 0, u64);
+		check(retval, expected,
+		      "sysmem -> vram bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "sysmem -> vram bo offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	/* And other way around.. slightly hacky.. */
+	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
+
+	fence = xe_migrate_copy(m, sysmem, bo->ttm.resource,
+				sysmem->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" :
+				 "Copying small bo vram -> sysmem", test)) {
+		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+		check(retval, expected,
+		      "vram -> sysmem bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "vram -> sysmem bo last offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	xe_bo_vunmap(sysmem);
+out_unlock:
+	xe_bo_unlock_no_vm(sysmem);
+	xe_bo_put(sysmem);
+}
+
+static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
+			   struct kunit *test)
+{
+	struct xe_device *xe = gt_to_xe(m->gt);
+	struct dma_fence *fence;
+	u64 retval, expected;
+	int i;
+
+	struct xe_vm_pgtable_update update = {
+		.ofs = 1,
+		.qwords = 0x10,
+		.pt_bo = pt,
+	};
+	struct xe_migrate_pt_update pt_update = {
+		.ops = &sanity_ops,
+	};
+
+	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
+	expected = 0xf0f0f0f0f0f0f0f0ULL;
+	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
+
+	fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1,
+					   NULL, 0, &pt_update);
+	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
+		return;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
+	check(retval, expected, "PTE[0] must stay untouched", test);
+
+	for (i = 0; i < update.qwords; i++) {
+		retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
+		check(retval, i * 0x1111111111111111ULL, "PTE update", test);
+	}
+
+	retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
+			   u64);
+	check(retval, expected, "PTE[0x11] must stay untouched", test);
+}
+
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+{
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
+	struct xe_res_cursor src_it;
+	struct dma_fence *fence;
+	u64 retval, expected;
+	struct xe_bb *bb;
+	int err;
+	u8 id = gt->info.id;
+
+	err = xe_bo_vmap(bo);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M,
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				   XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(big)) {
+		KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
+		goto vunmap;
+	}
+
+	pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(pt)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_big;
+	}
+
+	tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm,
+				    2 * SZ_4K,
+				    ttm_bo_type_kernel,
+				    XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				    XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(tiny)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_pt;
+	}
+
+	bb = xe_bb_new(m->gt, 32, xe->info.supports_usm);
+	if (IS_ERR(bb)) {
+		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
+			   PTR_ERR(bb));
+		goto free_tiny;
+	}
+
+	kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n",
+		   xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE),
+		   xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE));
+
+	/* First part of the test, are we updating our pagetable bo with a new entry? */
+	xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef);
+	expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
+	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
+		expected |= GEN12_PTE_PS64;
+	xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
+		 &src_it, GEN8_PAGE_SIZE, pt);
+	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
+
+	retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
+			   u64);
+	check(retval, expected, "PTE entry write", test);
+
+	/* Now try to write data to our newly mapped 'pagetable', see if it succeeds */
+	bb->len = 0;
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
+	expected = 0x12345678U;
+
+	emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+		   expected, IS_DGFX(xe));
+	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
+		       test);
+
+	retval = xe_map_rd(xe, &pt->vmap, 0, u32);
+	check(retval, expected, "Write to PT after adding PTE", test);
+
+	/* Sanity checks passed, try the full ones! */
+
+	/* Clear a small bo */
+	kunit_info(test, "Clearing small buffer object\n");
+	xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
+	expected = 0x224488ff;
+	fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected);
+	if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &tiny->vmap, 0, u32);
+	check(retval, expected, "Command clear small first value", test);
+	retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
+	check(retval, expected, "Command clear small last value", test);
+
+	if (IS_DGFX(xe)) {
+		kunit_info(test, "Copying small buffer object to system\n");
+		test_copy(m, tiny, test);
+	}
+
+	/* Clear a big bo with a fixed value */
+	kunit_info(test, "Clearing big buffer object\n");
+	xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
+	expected = 0x11223344U;
+	fence = xe_migrate_clear(m, big, big->ttm.resource, expected);
+	if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &big->vmap, 0, u32);
+	check(retval, expected, "Command clear big first value", test);
+	retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
+	check(retval, expected, "Command clear big last value", test);
+
+	if (IS_DGFX(xe)) {
+		kunit_info(test, "Copying big buffer object to system\n");
+		test_copy(m, big, test);
+	}
+
+	test_pt_update(m, pt, test);
+
+out:
+	xe_bb_free(bb, NULL);
+free_tiny:
+	xe_bo_unpin(tiny);
+	xe_bo_put(tiny);
+free_pt:
+	xe_bo_unpin(pt);
+	xe_bo_put(pt);
+free_big:
+	xe_bo_unpin(big);
+	xe_bo_put(big);
+vunmap:
+	xe_bo_vunmap(m->pt_bo);
+}
+
+static int migrate_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_gt *gt;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		struct xe_migrate *m = gt->migrate;
+		struct ww_acquire_ctx ww;
+
+		kunit_info(test, "Testing gt id %d.\n", id);
+		xe_vm_lock(m->eng->vm, &ww, 0, true);
+		xe_migrate_sanity_test(m, test);
+		xe_vm_unlock(m->eng->vm, &ww);
+	}
+
+	return 0;
+}
+
+void xe_migrate_sanity_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(migrate_test_run_device);
+}
+EXPORT_SYMBOL(xe_migrate_sanity_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
new file mode 100644
index 0000000000000..ad779e2bd0717
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_migrate_sanity_kunit(struct kunit *test);
+
+static struct kunit_case xe_migrate_tests[] = {
+	KUNIT_CASE(xe_migrate_sanity_kunit),
+	{}
+};
+
+static struct kunit_suite xe_migrate_test_suite = {
+	.name = "xe_migrate",
+	.test_cases = xe_migrate_tests,
+};
+
+kunit_test_suite(xe_migrate_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
new file mode 100644
index 0000000000000..1ec502b5acf38
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_TEST_H__
+#define __XE_TEST_H__
+
+#include <linux/types.h>
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include <linux/sched.h>
+#include <kunit/test.h>
+
+/*
+ * Each test that provides a kunit private test structure, place a test id
+ * here and point the kunit->priv to an embedded struct xe_test_priv.
+ */
+enum xe_test_priv_id {
+	XE_TEST_LIVE_DMA_BUF,
+};
+
+/**
+ * struct xe_test_priv - Base class for test private info
+ * @id: enum xe_test_priv_id to identify the subclass.
+ */
+struct xe_test_priv {
+	enum xe_test_priv_id id;
+};
+
+#define XE_TEST_DECLARE(x) x
+#define XE_TEST_ONLY(x) unlikely(x)
+#define XE_TEST_EXPORT
+#define xe_cur_kunit() current->kunit_test
+
+/**
+ * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
+ * current->kunit->priv if it exists and is embedded in the expected subclass.
+ * @id: Id of the expected subclass.
+ *
+ * Return: NULL if the process is not a kunit test, and NULL if the
+ * current kunit->priv pointer is not pointing to an object of the expected
+ * subclass. A pointer to the embedded struct xe_test_priv otherwise.
+ */
+static inline struct xe_test_priv *
+xe_cur_kunit_priv(enum xe_test_priv_id id)
+{
+	struct xe_test_priv *priv;
+
+	if (!xe_cur_kunit())
+		return NULL;
+
+	priv = xe_cur_kunit()->priv;
+	return priv->id == id ? priv : NULL;
+}
+
+#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */
+
+#define XE_TEST_DECLARE(x)
+#define XE_TEST_ONLY(x) 0
+#define XE_TEST_EXPORT static
+#define xe_cur_kunit() NULL
+#define xe_cur_kunit_priv(_id) NULL
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
new file mode 100644
index 0000000000000..8b9209571fd01
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bb.h"
+#include "xe_sa.h"
+#include "xe_device.h"
+#include "xe_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+#include "gt/intel_gpu_commands.h"
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+{
+	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+	int err;
+
+	if (!bb)
+		return ERR_PTR(-ENOMEM);
+
+	bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool :
+			      &gt->usm.bb_pool, 4 * dwords + 4);
+	if (IS_ERR(bb->bo)) {
+		err = PTR_ERR(bb->bo);
+		goto err;
+	}
+
+	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+	bb->len = 0;
+
+	return bb;
+err:
+	kfree(bb);
+	return ERR_PTR(err);
+}
+
+static struct xe_sched_job *
+__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
+{
+	u32 size = drm_suballoc_size(bb->bo);
+
+	XE_BUG_ON((bb->len * 4 + 1) > size);
+
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+	xe_sa_bo_flush_write(bb->bo);
+
+	return xe_sched_job_create(kernel_eng, addr);
+}
+
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+					 struct xe_bb *bb, u64 batch_base_ofs)
+{
+	u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
+
+	XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+
+	return __xe_bb_create_job(wa_eng, bb, &addr);
+}
+
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+						struct xe_bb *bb,
+						u64 batch_base_ofs,
+						u32 second_idx)
+{
+	u64 addr[2] = {
+		batch_base_ofs + drm_suballoc_soffset(bb->bo),
+		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
+		4 * second_idx,
+	};
+
+	BUG_ON(second_idx > bb->len);
+	BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+
+	return __xe_bb_create_job(kernel_eng, bb, addr);
+}
+
+struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+				      struct xe_bb *bb)
+{
+	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
+
+	BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION);
+	return __xe_bb_create_job(kernel_eng, bb, &addr);
+}
+
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
+{
+	if (!bb)
+		return;
+
+	xe_sa_bo_free(bb->bo, fence);
+	kfree(bb);
+}
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
new file mode 100644
index 0000000000000..0cc9260c96347
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_H_
+#define _XE_BB_H_
+
+#include "xe_bb_types.h"
+
+struct dma_fence;
+
+struct xe_gt;
+struct xe_engine;
+struct xe_sched_job;
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+				      struct xe_bb *bb);
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+						struct xe_bb *bb, u64 batch_ofs,
+						u32 second_idx);
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+					 struct xe_bb *bb, u64 batch_ofs);
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h
new file mode 100644
index 0000000000000..b7d30308cf90a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_TYPES_H_
+#define _XE_BB_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_suballoc;
+
+struct xe_bb {
+	struct drm_suballoc *bo;
+
+	u32 *cs;
+	u32 len; /* in dwords */
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
new file mode 100644
index 0000000000000..ef2c9196c113c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -0,0 +1,1698 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+
+#include "xe_bo.h"
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_preempt_fence.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static const struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_SYSTEM,
+	.flags = 0,
+};
+
+static struct ttm_placement sys_placement = {
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+bool mem_type_is_vram(u32 mem_type)
+{
+	return mem_type >= XE_PL_VRAM0;
+}
+
+static bool resource_is_vram(struct ttm_resource *res)
+{
+	return mem_type_is_vram(res->mem_type);
+}
+
+bool xe_bo_is_vram(struct xe_bo *bo)
+{
+	return resource_is_vram(bo->ttm.resource);
+}
+
+static bool xe_bo_is_user(struct xe_bo *bo)
+{
+	return bo->flags & XE_BO_CREATE_USER_BIT;
+}
+
+static struct xe_gt *
+mem_type_to_gt(struct xe_device *xe, u32 mem_type)
+{
+	XE_BUG_ON(!mem_type_is_vram(mem_type));
+
+	return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0);
+}
+
+static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
+			   u32 bo_flags, u32 *c)
+{
+	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_TT,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_TT;
+	}
+}
+
+static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
+			  struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+	struct xe_gt *gt;
+
+	if (bo_flags & XE_BO_CREATE_VRAM0_BIT) {
+		gt = mem_type_to_gt(xe, XE_PL_VRAM0);
+		XE_BUG_ON(!gt->mem.vram.size);
+
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_VRAM0,
+			/*
+			 * For eviction / restore on suspend / resume objects
+			 * pinned in VRAM must be contiguous
+			 */
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_VRAM0;
+	}
+}
+
+static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
+			  struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+	struct xe_gt *gt;
+
+	if (bo_flags & XE_BO_CREATE_VRAM1_BIT) {
+		gt = mem_type_to_gt(xe, XE_PL_VRAM1);
+		XE_BUG_ON(!gt->mem.vram.size);
+
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_VRAM1,
+			/*
+			 * For eviction / restore on suspend / resume objects
+			 * pinned in VRAM must be contiguous
+			 */
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_VRAM1;
+	}
+}
+
+static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+				       u32 bo_flags)
+{
+	struct ttm_place *places = bo->placements;
+	u32 c = 0;
+
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+
+	/* The order of placements should indicate preferred location */
+
+	if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) {
+		try_add_system(bo, places, bo_flags, &c);
+		if (bo->props.preferred_gt == XE_GT1) {
+			try_add_vram1(xe, bo, places, bo_flags, &c);
+			try_add_vram0(xe, bo, places, bo_flags, &c);
+		} else {
+			try_add_vram0(xe, bo, places, bo_flags, &c);
+			try_add_vram1(xe, bo, places, bo_flags, &c);
+		}
+	} else if (bo->props.preferred_gt == XE_GT1) {
+		try_add_vram1(xe, bo, places, bo_flags, &c);
+		try_add_vram0(xe, bo, places, bo_flags, &c);
+		try_add_system(bo, places, bo_flags, &c);
+	} else {
+		try_add_vram0(xe, bo, places, bo_flags, &c);
+		try_add_vram1(xe, bo, places, bo_flags, &c);
+		try_add_system(bo, places, bo_flags, &c);
+	}
+
+	if (!c)
+		return -EINVAL;
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = c,
+		.placement = places,
+		.num_busy_placement = c,
+		.busy_placement = places,
+	};
+
+	return 0;
+}
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags)
+{
+	xe_bo_assert_held(bo);
+	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
+}
+
+static void xe_evict_flags(struct ttm_buffer_object *tbo,
+			   struct ttm_placement *placement)
+{
+	struct xe_bo *bo;
+
+	if (!xe_bo_is_xe_bo(tbo)) {
+		/* Don't handle scatter gather BOs */
+		if (tbo->type == ttm_bo_type_sg) {
+			placement->num_placement = 0;
+			placement->num_busy_placement = 0;
+			return;
+		}
+
+		*placement = sys_placement;
+		return;
+	}
+
+	/*
+	 * For xe, sg bos that are evicted to system just triggers a
+	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
+	 */
+
+	bo = ttm_to_xe_bo(tbo);
+	switch (tbo->resource->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+	case XE_PL_TT:
+	default:
+		/* for now kick out to system */
+		*placement = sys_placement;
+		break;
+	}
+}
+
+struct xe_ttm_tt {
+	struct ttm_tt ttm;
+	struct device *dev;
+	struct sg_table sgt;
+	struct sg_table *sg;
+};
+
+static int xe_tt_map_sg(struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+	unsigned long num_pages = tt->num_pages;
+	int ret;
+
+	XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+
+	if (xe_tt->sg)
+		return 0;
+
+	ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages,
+					0, (u64)num_pages << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	xe_tt->sg = &xe_tt->sgt;
+	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC);
+	if (ret) {
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+struct sg_table *xe_bo_get_sg(struct xe_bo *bo)
+{
+	struct ttm_tt *tt = bo->ttm.ttm;
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	return xe_tt->sg;
+}
+
+static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
+				       u32 page_flags)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ttm_tt *tt;
+	int err;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->dev = xe->drm.dev;
+
+	/* TODO: Select caching mode */
+	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags,
+			  bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached,
+			  DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo),
+							   bo->ttm.base.size),
+				       PAGE_SIZE));
+	if (err) {
+		kfree(tt);
+		return NULL;
+	}
+
+	return &tt->ttm;
+}
+
+static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
+			      struct ttm_operation_ctx *ctx)
+{
+	int err;
+
+	/*
+	 * dma-bufs are not populated with pages, and the dma-
+	 * addresses are set up when moved to XE_PL_TT.
+	 */
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return 0;
+
+	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
+	if (err)
+		return err;
+
+	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
+	err = xe_tt_map_sg(tt);
+	if (err)
+		ttm_pool_free(&ttm_dev->pool, tt);
+
+	return err;
+}
+
+static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return;
+
+	if (xe_tt->sg) {
+		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+	}
+
+	return ttm_pool_free(&ttm_dev->pool, tt);
+}
+
+static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
+				 struct ttm_resource *mem)
+{
+	struct xe_device *xe = ttm_to_xe_device(bdev);
+	struct xe_gt *gt;
+
+	switch (mem->mem_type) {
+	case XE_PL_SYSTEM:
+	case XE_PL_TT:
+		return 0;
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		gt = mem_type_to_gt(xe, mem->mem_type);
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+
+		if (gt->mem.vram.mapping &&
+		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+			mem->bus.addr = (u8 *)gt->mem.vram.mapping +
+				mem->bus.offset;
+
+		mem->bus.offset += gt->mem.vram.io_start;
+		mem->bus.is_iomem = true;
+
+#if  !defined(CONFIG_X86)
+		mem->bus.caching = ttm_write_combined;
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
+				const struct ttm_operation_ctx *ctx)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct xe_vma *vma;
+	int ret = 0;
+
+	dma_resv_assert_held(bo->ttm.base.resv);
+
+	if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) {
+		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
+				    DMA_RESV_USAGE_BOOKKEEP);
+		dma_resv_for_each_fence_unlocked(&cursor, fence)
+			dma_fence_enable_sw_signaling(fence);
+		dma_resv_iter_end(&cursor);
+	}
+
+	list_for_each_entry(vma, &bo->vmas, bo_link) {
+		struct xe_vm *vm = vma->vm;
+
+		trace_xe_vma_evict(vma);
+
+		if (xe_vm_in_fault_mode(vm)) {
+			/* Wait for pending binds / unbinds. */
+			long timeout;
+
+			if (ctx->no_wait_gpu &&
+			    !dma_resv_test_signaled(bo->ttm.base.resv,
+						    DMA_RESV_USAGE_BOOKKEEP))
+				return -EBUSY;
+
+			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+							DMA_RESV_USAGE_BOOKKEEP,
+							ctx->interruptible,
+							MAX_SCHEDULE_TIMEOUT);
+			if (timeout > 0) {
+				ret = xe_vm_invalidate_vma(vma);
+				XE_WARN_ON(ret);
+			} else if (!timeout) {
+				ret = -ETIME;
+			} else {
+				ret = timeout;
+			}
+
+		} else {
+			bool vm_resv_locked = false;
+			struct xe_vm *vm = vma->vm;
+
+			/*
+			 * We need to put the vma on the vm's rebind_list,
+			 * but need the vm resv to do so. If we can't verify
+			 * that we indeed have it locked, put the vma an the
+			 * vm's notifier.rebind_list instead and scoop later.
+			 */
+			if (dma_resv_trylock(&vm->resv))
+				vm_resv_locked = true;
+			else if (ctx->resv != &vm->resv) {
+				spin_lock(&vm->notifier.list_lock);
+				list_move_tail(&vma->notifier.rebind_link,
+					       &vm->notifier.rebind_list);
+				spin_unlock(&vm->notifier.list_lock);
+				continue;
+			}
+
+			xe_vm_assert_held(vm);
+			if (list_empty(&vma->rebind_link) && vma->gt_present)
+				list_add_tail(&vma->rebind_link, &vm->rebind_list);
+
+			if (vm_resv_locked)
+				dma_resv_unlock(&vm->resv);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
+ * Note that unmapping the attachment is deferred to the next
+ * map_attachment time, or to bo destroy (after idling) whichever comes first.
+ * This is to avoid syncing before unmap_attachment(), assuming that the
+ * caller relies on idling the reservation object before moving the
+ * backing store out. Should that assumption not hold, then we will be able
+ * to unconditionally call unmap_attachment() when moving out to system.
+ */
+static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
+			     struct ttm_resource *old_res,
+			     struct ttm_resource *new_res)
+{
+	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
+	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
+					       ttm);
+	struct sg_table *sg;
+
+	XE_BUG_ON(!attach);
+	XE_BUG_ON(!ttm_bo->ttm);
+
+	if (new_res->mem_type == XE_PL_SYSTEM)
+		goto out;
+
+	if (ttm_bo->sg) {
+		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+	}
+
+	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sg))
+		return PTR_ERR(sg);
+
+	ttm_bo->sg = sg;
+	xe_tt->sg = sg;
+
+out:
+	ttm_bo_move_null(ttm_bo, new_res);
+
+	return 0;
+}
+
+/**
+ * xe_bo_move_notify - Notify subsystems of a pending move
+ * @bo: The buffer object
+ * @ctx: The struct ttm_operation_ctx controlling locking and waits.
+ *
+ * This function notifies subsystems of an upcoming buffer move.
+ * Upon receiving such a notification, subsystems should schedule
+ * halting access to the underlying pages and optionally add a fence
+ * to the buffer object's dma_resv object, that signals when access is
+ * stopped. The caller will wait on all dma_resv fences before
+ * starting the move.
+ *
+ * A subsystem may commence access to the object after obtaining
+ * bindings to the new backing memory under the object lock.
+ *
+ * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
+ * negative error code on error.
+ */
+static int xe_bo_move_notify(struct xe_bo *bo,
+			     const struct ttm_operation_ctx *ctx)
+{
+	struct ttm_buffer_object *ttm_bo = &bo->ttm;
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	int ret;
+
+	/*
+	 * If this starts to call into many components, consider
+	 * using a notification chain here.
+	 */
+
+	if (xe_bo_is_pinned(bo))
+		return -EINVAL;
+
+	xe_bo_vunmap(bo);
+	ret = xe_bo_trigger_rebind(xe, bo, ctx);
+	if (ret)
+		return ret;
+
+	/* Don't call move_notify() for imported dma-bufs. */
+	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
+		dma_buf_move_notify(ttm_bo->base.dma_buf);
+
+	return 0;
+}
+
+static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
+		      struct ttm_operation_ctx *ctx,
+		      struct ttm_resource *new_mem,
+		      struct ttm_place *hop)
+{
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	struct ttm_tt *ttm = ttm_bo->ttm;
+	struct xe_gt *gt = NULL;
+	struct dma_fence *fence;
+	bool move_lacks_source;
+	bool needs_clear;
+	int ret = 0;
+
+	if (!old_mem) {
+		if (new_mem->mem_type != TTM_PL_SYSTEM) {
+			hop->mem_type = TTM_PL_SYSTEM;
+			hop->flags = TTM_PL_FLAG_TEMPORARY;
+			ret = -EMULTIHOP;
+			goto out;
+		}
+
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (ttm_bo->type == ttm_bo_type_sg) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (!ret)
+			ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem);
+		goto out;
+	}
+
+	move_lacks_source = !resource_is_vram(old_mem) &&
+		(!ttm || !ttm_tt_is_populated(ttm));
+
+	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
+		(!ttm && ttm_bo->type == ttm_bo_type_device);
+
+	if ((move_lacks_source && !needs_clear) ||
+	    (old_mem->mem_type == XE_PL_SYSTEM &&
+	     new_mem->mem_type == XE_PL_TT)) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (ret)
+			goto out;
+	}
+
+	if (old_mem->mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_SYSTEM) {
+		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+						     DMA_RESV_USAGE_BOOKKEEP,
+						     true,
+						     MAX_SCHEDULE_TIMEOUT);
+		if (timeout < 0) {
+			ret = timeout;
+			goto out;
+		}
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (!move_lacks_source &&
+	    ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
+	     (resource_is_vram(old_mem) &&
+	      new_mem->mem_type == XE_PL_SYSTEM))) {
+		hop->fpfn = 0;
+		hop->lpfn = 0;
+		hop->mem_type = XE_PL_TT;
+		hop->flags = TTM_PL_FLAG_TEMPORARY;
+		ret = -EMULTIHOP;
+		goto out;
+	}
+
+	if (bo->gt)
+		gt = bo->gt;
+	else if (resource_is_vram(new_mem))
+		gt = mem_type_to_gt(xe, new_mem->mem_type);
+	else if (resource_is_vram(old_mem))
+		gt = mem_type_to_gt(xe, old_mem->mem_type);
+
+	XE_BUG_ON(!gt);
+	XE_BUG_ON(!gt->migrate);
+
+	trace_xe_bo_move(bo);
+	xe_device_mem_access_get(xe);
+
+	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
+		/*
+		 * Kernel memory that is pinned should only be moved on suspend
+		 * / resume, some of the pinned memory is required for the
+		 * device to resume / use the GPU to move other evicted memory
+		 * (user memory) around. This likely could be optimized a bit
+		 * futher where we find the minimum set of pinned memory
+		 * required for resume but for simplity doing a memcpy for all
+		 * pinned memory.
+		 */
+		ret = xe_bo_vmap(bo);
+		if (!ret) {
+			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
+
+			/* Create a new VMAP once kernel BO back in VRAM */
+			if (!ret && resource_is_vram(new_mem)) {
+				void *new_addr = gt->mem.vram.mapping +
+					(new_mem->start << PAGE_SHIFT);
+
+				XE_BUG_ON(new_mem->start !=
+					  bo->placements->fpfn);
+
+				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
+			}
+		}
+	} else {
+		if (move_lacks_source)
+			fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0);
+		else
+			fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+			xe_device_mem_access_put(xe);
+			goto out;
+		}
+		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
+						new_mem);
+		dma_fence_put(fence);
+	}
+
+	xe_device_mem_access_put(xe);
+	trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type);
+
+out:
+	return ret;
+
+}
+
+static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+				       unsigned long page_offset)
+{
+	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
+	struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type);
+	struct xe_res_cursor cursor;
+
+	xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
+	return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo);
+
+/*
+ * TODO: Move this function to TTM so we don't rely on how TTM does its
+ * locking, thereby abusing TTM internals.
+ */
+static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
+{
+	bool locked;
+
+	XE_WARN_ON(kref_read(&ttm_bo->kref));
+
+	/*
+	 * We can typically only race with TTM trylocking under the
+	 * lru_lock, which will immediately be unlocked again since
+	 * the ttm_bo refcount is zero at this point. So trylocking *should*
+	 * always succeed here, as long as we hold the lru lock.
+	 */
+	spin_lock(&ttm_bo->bdev->lru_lock);
+	locked = dma_resv_trylock(ttm_bo->base.resv);
+	spin_unlock(&ttm_bo->bdev->lru_lock);
+	XE_WARN_ON(!locked);
+
+	return locked;
+}
+
+static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct dma_fence *replacement = NULL;
+	struct xe_bo *bo;
+
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	bo = ttm_to_xe_bo(ttm_bo);
+	XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount));
+
+	/*
+	 * Corner case where TTM fails to allocate memory and this BOs resv
+	 * still points the VMs resv
+	 */
+	if (ttm_bo->base.resv != &ttm_bo->base._resv)
+		return;
+
+	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
+		return;
+
+	/*
+	 * Scrub the preempt fences if any. The unbind fence is already
+	 * attached to the resv.
+	 * TODO: Don't do this for external bos once we scrub them after
+	 * unbind.
+	 */
+	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+				DMA_RESV_USAGE_BOOKKEEP, fence) {
+		if (xe_fence_is_xe_preempt(fence) &&
+		    !dma_fence_is_signaled(fence)) {
+			if (!replacement)
+				replacement = dma_fence_get_stub();
+
+			dma_resv_replace_fences(ttm_bo->base.resv,
+						fence->context,
+						replacement,
+						DMA_RESV_USAGE_BOOKKEEP);
+		}
+	}
+	dma_fence_put(replacement);
+
+	dma_resv_unlock(ttm_bo->base.resv);
+}
+
+static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
+{
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	/*
+	 * Object is idle and about to be destroyed. Release the
+	 * dma-buf attachment.
+	 */
+	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
+						       struct xe_ttm_tt, ttm);
+
+		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
+					 DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+		xe_tt->sg = NULL;
+	}
+}
+
+struct ttm_device_funcs xe_ttm_funcs = {
+	.ttm_tt_create = xe_ttm_tt_create,
+	.ttm_tt_populate = xe_ttm_tt_populate,
+	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
+	.ttm_tt_destroy = xe_ttm_tt_destroy,
+	.evict_flags = xe_evict_flags,
+	.move = xe_bo_move,
+	.io_mem_reserve = xe_ttm_io_mem_reserve,
+	.io_mem_pfn = xe_ttm_io_mem_pfn,
+	.release_notify = xe_ttm_bo_release_notify,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+};
+
+static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+
+	if (bo->ttm.base.import_attach)
+		drm_prime_gem_destroy(&bo->ttm.base, NULL);
+	drm_gem_object_release(&bo->ttm.base);
+
+	WARN_ON(!list_empty(&bo->vmas));
+
+	if (bo->ggtt_node.size)
+		xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo);
+
+	if (bo->vm && xe_bo_is_user(bo))
+		xe_vm_put(bo->vm);
+
+	kfree(bo);
+}
+
+static void xe_gem_object_free(struct drm_gem_object *obj)
+{
+	/* Our BO reference counting scheme works as follows:
+	 *
+	 * The gem object kref is typically used throughout the driver,
+	 * and the gem object holds a ttm_buffer_object refcount, so
+	 * that when the last gem object reference is put, which is when
+	 * we end up in this function, we put also that ttm_buffer_object
+	 * refcount. Anything using gem interfaces is then no longer
+	 * allowed to access the object in a way that requires a gem
+	 * refcount, including locking the object.
+	 *
+	 * driver ttm callbacks is allowed to use the ttm_buffer_object
+	 * refcount directly if needed.
+	 */
+	__xe_bo_vunmap(gem_to_xe_bo(obj));
+	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+}
+
+static bool should_migrate_to_system(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
+}
+
+static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
+	struct drm_device *ddev = tbo->base.dev;
+	vm_fault_t ret;
+	int idx, r = 0;
+
+	ret = ttm_bo_vm_reserve(tbo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(ddev, &idx)) {
+		struct xe_bo *bo = ttm_to_xe_bo(tbo);
+
+		trace_xe_bo_cpu_fault(bo);
+
+		if (should_migrate_to_system(bo)) {
+			r = xe_bo_migrate(bo, XE_PL_TT);
+			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
+				ret = VM_FAULT_NOPAGE;
+			else if (r)
+				ret = VM_FAULT_SIGBUS;
+		}
+		if (!ret)
+			ret = ttm_bo_vm_fault_reserved(vmf,
+						       vmf->vma->vm_page_prot,
+						       TTM_BO_VM_NUM_PREFAULT);
+
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+	dma_resv_unlock(tbo->base.resv);
+	return ret;
+}
+
+static const struct vm_operations_struct xe_gem_vm_ops = {
+	.fault = xe_gem_fault,
+	.open = ttm_bo_vm_open,
+	.close = ttm_bo_vm_close,
+	.access = ttm_bo_vm_access
+};
+
+static const struct drm_gem_object_funcs xe_gem_object_funcs = {
+	.free = xe_gem_object_free,
+	.mmap = drm_gem_ttm_mmap,
+	.export = xe_gem_prime_export,
+	.vm_ops = &xe_gem_vm_ops,
+};
+
+/**
+ * xe_bo_alloc - Allocate storage for a struct xe_bo
+ *
+ * This funcition is intended to allocate storage to be used for input
+ * to __xe_bo_create_locked(), in the case a pointer to the bo to be
+ * created is needed before the call to __xe_bo_create_locked().
+ * If __xe_bo_create_locked ends up never to be called, then the
+ * storage allocated with this function needs to be freed using
+ * xe_bo_free().
+ *
+ * Return: A pointer to an uninitialized struct xe_bo on success,
+ * ERR_PTR(-ENOMEM) on error.
+ */
+struct xe_bo *xe_bo_alloc(void)
+{
+	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	return bo;
+}
+
+/**
+ * xe_bo_free - Free storage allocated using xe_bo_alloc()
+ * @bo: The buffer object storage.
+ *
+ * Refer to xe_bo_alloc() documentation for valid use-cases.
+ */
+void xe_bo_free(struct xe_bo *bo)
+{
+	kfree(bo);
+}
+
+struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				    struct xe_gt *gt, struct dma_resv *resv,
+				    size_t size, enum ttm_bo_type type,
+				    u32 flags)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement *placement;
+	uint32_t alignment;
+	int err;
+
+	/* Only kernel objects should set GT */
+	XE_BUG_ON(gt && type != ttm_bo_type_kernel);
+
+	if (!bo) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+	}
+
+	if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) &&
+	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+		size = ALIGN(size, SZ_64K);
+		flags |= XE_BO_INTERNAL_64K;
+		alignment = SZ_64K >> PAGE_SHIFT;
+	} else {
+		alignment = SZ_4K >> PAGE_SHIFT;
+	}
+
+	bo->gt = gt;
+	bo->size = size;
+	bo->flags = flags;
+	bo->ttm.base.funcs = &xe_gem_object_funcs;
+	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
+	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+	bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL;
+	INIT_LIST_HEAD(&bo->vmas);
+	INIT_LIST_HEAD(&bo->pinned_link);
+
+	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
+
+	if (resv) {
+		ctx.allow_res_evict = true;
+		ctx.resv = resv;
+	}
+
+	err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+	if (WARN_ON(err))
+		return ERR_PTR(err);
+
+	/* Defer populating type_sg bos */
+	placement = (type == ttm_bo_type_sg ||
+		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+		&bo->placement;
+	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
+				   placement, alignment,
+				   &ctx, NULL, resv, xe_ttm_bo_destroy);
+	if (err)
+		return ERR_PTR(err);
+
+	bo->created = true;
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo;
+	int err;
+
+	if (vm)
+		xe_vm_assert_held(vm);
+	bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size,
+				   type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	if (vm && xe_bo_is_user(bo))
+		xe_vm_get(vm);
+	bo->vm = vm;
+
+	if (flags & XE_BO_CREATE_GGTT_BIT) {
+		XE_BUG_ON(!gt);
+
+		err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
+		if (err)
+			goto err_unlock_put_bo;
+	}
+
+	return bo;
+
+err_unlock_put_bo:
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+	int err;
+
+	if (IS_ERR(bo))
+		return bo;
+
+	err = xe_bo_pin(bo);
+	if (err)
+		goto err_put;
+
+	err = xe_bo_vmap(bo);
+	if (err)
+		goto err_unpin;
+
+	xe_bo_unlock_vm_held(bo);
+
+	return bo;
+
+err_unpin:
+	xe_bo_unpin(bo);
+err_put:
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL,
+						ALIGN(size, PAGE_SIZE),
+						type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
+/*
+ * XXX: This is in the VM bind data path, likely should calculate this once and
+ * store, with a recalculation if the BO is moved.
+ */
+static uint64_t vram_region_io_offset(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type);
+
+	return gt->mem.vram.io_start - xe->mem.vram.io_start;
+}
+
+/**
+ * xe_bo_pin_external - pin an external BO
+ * @bo: buffer object to be pinned
+ *
+ * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_pin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_bo_pin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	XE_BUG_ON(bo->vm);
+	XE_BUG_ON(!xe_bo_is_user(bo));
+
+	if (!xe_bo_is_pinned(bo)) {
+		err = xe_bo_validate(bo, NULL, false);
+		if (err)
+			return err;
+
+		if (xe_bo_is_vram(bo)) {
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link,
+				      &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+int xe_bo_pin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	/* We currently don't expect user BO to be pinned */
+	XE_BUG_ON(xe_bo_is_user(bo));
+
+	/* Pinned object must be in GGTT or have pinned flag */
+	XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT |
+				 XE_BO_CREATE_GGTT_BIT)));
+
+	/*
+	 * No reason we can't support pinning imported dma-bufs we just don't
+	 * expect to pin an imported dma-buf.
+	 */
+	XE_BUG_ON(bo->ttm.base.import_attach);
+
+	/* We only expect at most 1 pin */
+	XE_BUG_ON(xe_bo_is_pinned(bo));
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	/*
+	 * For pinned objects in on DGFX, we expect these objects to be in
+	 * contiguous VRAM memory. Required eviction / restore during suspend /
+	 * resume (force restore to same physical address).
+	 */
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		struct ttm_place *place = &(bo->placements[0]);
+		bool lmem;
+
+		XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
+		XE_BUG_ON(!mem_type_is_vram(place->mem_type));
+
+		place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
+			       vram_region_io_offset(bo)) >> PAGE_SHIFT;
+		place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
+
+		spin_lock(&xe->pinned.lock);
+		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+/**
+ * xe_bo_unpin_external - unpin an external BO
+ * @bo: buffer object to be unpinned
+ *
+ * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_unpin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+void xe_bo_unpin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	XE_BUG_ON(bo->vm);
+	XE_BUG_ON(!xe_bo_is_pinned(bo));
+	XE_BUG_ON(!xe_bo_is_user(bo));
+
+	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
+		spin_lock(&xe->pinned.lock);
+		list_del_init(&bo->pinned_link);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+}
+
+void xe_bo_unpin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	XE_BUG_ON(bo->ttm.base.import_attach);
+	XE_BUG_ON(!xe_bo_is_pinned(bo));
+
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		XE_BUG_ON(list_empty(&bo->pinned_link));
+
+		spin_lock(&xe->pinned.lock);
+		list_del_init(&bo->pinned_link);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+}
+
+/**
+ * xe_bo_validate() - Make sure the bo is in an allowed placement
+ * @bo: The bo,
+ * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
+ *      NULL. Used together with @allow_res_evict.
+ * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
+ *                   reservation object.
+ *
+ * Make sure the bo is in allowed placement, migrating it if necessary. If
+ * needed, other bos will be evicted. If bos selected for eviction shares
+ * the @vm's reservation object, they can be evicted iff @allow_res_evict is
+ * set to true, otherwise they will be bypassed.
+ *
+ * Return: 0 on success, negative error code on failure. May return
+ * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
+ */
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+
+	if (vm) {
+		lockdep_assert_held(&vm->lock);
+		xe_vm_assert_held(vm);
+
+		ctx.allow_res_evict = allow_res_evict;
+		ctx.resv = &vm->resv;
+	}
+
+	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
+{
+	if (bo->destroy == &xe_ttm_bo_destroy)
+		return true;
+
+	return false;
+}
+
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+		      size_t page_size, bool *is_lmem)
+{
+	struct xe_res_cursor cur;
+	u64 page;
+
+	if (!READ_ONCE(bo->ttm.pin_count))
+		xe_bo_assert_held(bo);
+
+	XE_BUG_ON(page_size > PAGE_SIZE);
+	page = offset >> PAGE_SHIFT;
+	offset &= (PAGE_SIZE - 1);
+
+	*is_lmem = xe_bo_is_vram(bo);
+
+	if (!*is_lmem) {
+		XE_BUG_ON(!bo->ttm.ttm);
+
+		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
+				page_size, &cur);
+		return xe_res_dma(&cur) + offset;
+	} else {
+		struct xe_res_cursor cur;
+
+		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
+			     page_size, &cur);
+		return cur.start + offset + vram_region_io_offset(bo);
+	}
+}
+
+int xe_bo_vmap(struct xe_bo *bo)
+{
+	void *virtual;
+	bool is_iomem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (!iosys_map_is_null(&bo->vmap))
+		return 0;
+
+	/*
+	 * We use this more or less deprecated interface for now since
+	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
+	 * single page bos, which is done here.
+	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
+	 * to use struct iosys_map.
+	 */
+	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
+	if (ret)
+		return ret;
+
+	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+	if (is_iomem)
+		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
+	else
+		iosys_map_set_vaddr(&bo->vmap, virtual);
+
+	return 0;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo)
+{
+	if (!iosys_map_is_null(&bo->vmap)) {
+		iosys_map_clear(&bo->vmap);
+		ttm_bo_kunmap(&bo->kmap);
+	}
+}
+
+void xe_bo_vunmap(struct xe_bo *bo)
+{
+	xe_bo_assert_held(bo);
+	__xe_bo_vunmap(bo);
+}
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_gem_create *args = data;
+	struct ww_acquire_ctx ww;
+	struct xe_vm *vm = NULL;
+	struct xe_bo *bo;
+	unsigned bo_flags = XE_BO_CREATE_USER_BIT;
+	u32 handle;
+	int err;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags &
+			 ~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
+			   XE_GEM_CREATE_FLAG_SCANOUT |
+			   xe->info.mem_region_mask)))
+		return -EINVAL;
+
+	/* at least one memory type must be specified */
+	if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->handle))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK))
+		return -EINVAL;
+
+	if (args->vm_id) {
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_ERR(xe, !vm))
+			return -ENOENT;
+		err = xe_vm_lock(vm, &ww, 0, true);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+	}
+
+	if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING)
+		bo_flags |= XE_BO_DEFER_BACKING;
+
+	if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT)
+		bo_flags |= XE_BO_SCANOUT_BIT;
+
+	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
+			  bo_flags);
+	if (vm) {
+		xe_vm_unlock(vm, &ww);
+		xe_vm_put(vm);
+	}
+
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
+	xe_bo_put(bo);
+	if (err)
+		return err;
+
+	args->handle = handle;
+
+	return 0;
+}
+
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_gem_mmap_offset *args = data;
+	struct drm_gem_object *gem_obj;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags))
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file, args->handle);
+	if (XE_IOCTL_ERR(xe, !gem_obj))
+		return -ENOENT;
+
+	/* The mmap offset was set up at BO allocation time. */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	xe_bo_put(gem_to_xe_bo(gem_obj));
+	return 0;
+}
+
+int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr)
+{
+	struct ttm_validate_buffer tv_bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+
+	XE_BUG_ON(!ww);
+
+	tv_bo.num_shared = num_resv;
+	tv_bo.bo = &bo->ttm;;
+	list_add_tail(&tv_bo.head, &objs);
+
+	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+}
+
+void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww)
+{
+	dma_resv_unlock(bo->ttm.base.resv);
+	ww_acquire_fini(ww);
+}
+
+/**
+ * xe_bo_can_migrate - Whether a buffer object likely can be migrated
+ * @bo: The buffer object to migrate
+ * @mem_type: The TTM memory type intended to migrate to
+ *
+ * Check whether the buffer object supports migration to the
+ * given memory type. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate buffer objects and can be called without
+ * the object lock held.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	unsigned int cur_place;
+
+	if (bo->ttm.type == ttm_bo_type_kernel)
+		return true;
+
+	if (bo->ttm.type == ttm_bo_type_sg)
+		return false;
+
+	for (cur_place = 0; cur_place < bo->placement.num_placement;
+	     cur_place++) {
+		if (bo->placements[cur_place].mem_type == mem_type)
+			return true;
+	}
+
+	return false;
+}
+
+static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
+{
+	memset(place, 0, sizeof(*place));
+	place->mem_type = mem_type;
+}
+
+/**
+ * xe_bo_migrate - Migrate an object to the desired region id
+ * @bo: The buffer object to migrate.
+ * @mem_type: The TTM region type to migrate to.
+ *
+ * Attempt to migrate the buffer object to the desired memory region. The
+ * buffer object may not be pinned, and must be locked.
+ * On successful completion, the object memory type will be updated,
+ * but an async migration task may not have completed yet, and to
+ * accomplish that, the object's kernel fences must be signaled with
+ * the object lock held.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -EINTR or -ERESTARTSYS if signal pending.
+ */
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement placement;
+	struct ttm_place requested;
+
+	xe_bo_assert_held(bo);
+
+	if (bo->ttm.resource->mem_type == mem_type)
+		return 0;
+
+	if (xe_bo_is_pinned(bo))
+		return -EBUSY;
+
+	if (!xe_bo_can_migrate(bo, mem_type))
+		return -EINVAL;
+
+	xe_place_from_ttm_type(mem_type, &requested);
+	placement.num_placement = 1;
+	placement.num_busy_placement = 1;
+	placement.placement = &requested;
+	placement.busy_placement = &requested;
+
+	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+}
+
+/**
+ * xe_bo_evict - Evict an object to evict placement
+ * @bo: The buffer object to migrate.
+ * @force_alloc: Set force_alloc in ttm_operation_ctx
+ *
+ * On successful completion, the object memory will be moved to evict
+ * placement. Ths function blocks until the object has been fully moved.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false,
+		.force_alloc = force_alloc,
+	};
+	struct ttm_placement placement;
+	int ret;
+
+	xe_evict_flags(&bo->ttm, &placement);
+	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
+	if (ret)
+		return ret;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+}
+
+/**
+ * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
+ * placed in system memory.
+ * @bo: The xe_bo
+ *
+ * If a bo has an allowable placement in XE_PL_TT memory, it can't use
+ * flat CCS compression, because the GPU then has no way to access the
+ * CCS metadata using relevant commands. For the opposite case, we need to
+ * allocate storage for the CCS metadata when the BO is not resident in
+ * VRAM memory.
+ *
+ * Return: true if extra pages need to be allocated, false otherwise.
+ */
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
+{
+	return bo->ttm.type == ttm_bo_type_device &&
+		!(bo->flags & XE_BO_CREATE_SYSTEM_BIT) &&
+		(bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT));
+}
+
+/**
+ * __xe_bo_release_dummy() - Dummy kref release function
+ * @kref: The embedded struct kref.
+ *
+ * Dummy release function for xe_bo_put_deferred(). Keep off.
+ */
+void __xe_bo_release_dummy(struct kref *kref)
+{
+}
+
+/**
+ * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
+ * @deferred: The lockless list used for the call to xe_bo_put_deferred().
+ *
+ * Puts all bos whose put was deferred by xe_bo_put_deferred().
+ * The @deferred list can be either an onstack local list or a global
+ * shared list used by a workqueue.
+ */
+void xe_bo_put_commit(struct llist_head *deferred)
+{
+	struct llist_node *freed;
+	struct xe_bo *bo, *next;
+
+	if (!deferred)
+		return;
+
+	freed = llist_del_all(deferred);
+	if (!freed)
+		return;
+
+	llist_for_each_entry_safe(bo, next, freed, freed)
+		drm_gem_object_free(&bo->ttm.base.refcount);
+}
+
+/**
+ * xe_bo_dumb_create - Create a dumb bo as backing for a fb
+ * @file_priv: ...
+ * @dev: ...
+ * @args: ...
+ *
+ * See dumb_create() hook in include/drm/drm_drv.h
+ *
+ * Return: ...
+ */
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	uint32_t handle;
+	int cpp = DIV_ROUND_UP(args->bpp, 8);
+	int err;
+	u32 page_size = max_t(u32, PAGE_SIZE,
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
+
+	args->pitch = ALIGN(args->width * cpp, 64);
+	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
+			   page_size);
+
+	bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
+			  XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) |
+			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put(&bo->ttm.base);
+	if (!err)
+		args->handle = handle;
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_bo.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
new file mode 100644
index 0000000000000..1a49c0a3c4c6a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_BO_H_
+#define _XE_BO_H_
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_vm_types.h"
+
+#define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
+
+#define XE_BO_CREATE_USER_BIT		BIT(1)
+#define XE_BO_CREATE_SYSTEM_BIT		BIT(2)
+#define XE_BO_CREATE_VRAM0_BIT		BIT(3)
+#define XE_BO_CREATE_VRAM1_BIT		BIT(4)
+#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
+	(IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \
+	 XE_BO_CREATE_SYSTEM_BIT)
+#define XE_BO_CREATE_GGTT_BIT		BIT(5)
+#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
+#define XE_BO_CREATE_PINNED_BIT		BIT(7)
+#define XE_BO_DEFER_BACKING		BIT(8)
+#define XE_BO_SCANOUT_BIT		BIT(9)
+/* this one is trigger internally only */
+#define XE_BO_INTERNAL_TEST		BIT(30)
+#define XE_BO_INTERNAL_64K		BIT(31)
+
+#define PPAT_UNCACHED                   GENMASK_ULL(4, 3)
+#define PPAT_CACHED_PDE                 0
+#define PPAT_CACHED                     BIT_ULL(7)
+#define PPAT_DISPLAY_ELLC               BIT_ULL(4)
+
+#define GEN8_PTE_SHIFT			12
+#define GEN8_PAGE_SIZE			(1 << GEN8_PTE_SHIFT)
+#define GEN8_PTE_MASK			(GEN8_PAGE_SIZE - 1)
+#define GEN8_PDE_SHIFT			(GEN8_PTE_SHIFT - 3)
+#define GEN8_PDES			(1 << GEN8_PDE_SHIFT)
+#define GEN8_PDE_MASK			(GEN8_PDES - 1)
+
+#define GEN8_64K_PTE_SHIFT		16
+#define GEN8_64K_PAGE_SIZE		(1 << GEN8_64K_PTE_SHIFT)
+#define GEN8_64K_PTE_MASK		(GEN8_64K_PAGE_SIZE - 1)
+#define GEN8_64K_PDE_MASK		(GEN8_PDE_MASK >> 4)
+
+#define GEN8_PDE_PS_2M			BIT_ULL(7)
+#define GEN8_PDPE_PS_1G			BIT_ULL(7)
+#define GEN8_PDE_IPS_64K		BIT_ULL(11)
+
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define GEN12_USM_PPGTT_PTE_AE		BIT_ULL(10)
+#define GEN12_PPGTT_PTE_LM		BIT_ULL(11)
+#define GEN12_PDE_64K			BIT_ULL(6)
+#define GEN12_PTE_PS64                  BIT_ULL(8)
+
+#define GEN8_PAGE_PRESENT		BIT_ULL(0)
+#define GEN8_PAGE_RW			BIT_ULL(1)
+
+#define PTE_READ_ONLY			BIT(0)
+
+#define XE_PL_SYSTEM		TTM_PL_SYSTEM
+#define XE_PL_TT		TTM_PL_TT
+#define XE_PL_VRAM0		TTM_PL_VRAM
+#define XE_PL_VRAM1		(XE_PL_VRAM0 + 1)
+
+#define XE_BO_PROPS_INVALID	(-1)
+
+struct sg_table;
+
+struct xe_bo *xe_bo_alloc(void);
+void xe_bo_free(struct xe_bo *bo);
+
+struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				    struct xe_gt *gt, struct dma_resv *resv,
+				    size_t size, enum ttm_bo_type type,
+				    u32 flags);
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags);
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags);
+
+static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct xe_bo, ttm);
+}
+
+static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj)
+{
+	return container_of(obj, struct xe_bo, ttm.base);
+}
+
+#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev)
+
+static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_get(&bo->ttm.base);
+
+	return bo;
+}
+
+static inline void xe_bo_put(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_put(&bo->ttm.base);
+}
+
+static inline void xe_bo_assert_held(struct xe_bo *bo)
+{
+	if (bo)
+		dma_resv_assert_held((bo)->ttm.base.resv);
+}
+
+int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr);
+
+void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
+
+static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
+{
+	if (bo) {
+		XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv);
+		if (bo->vm)
+			xe_vm_assert_held(bo->vm);
+		else
+			dma_resv_unlock(bo->ttm.base.resv);
+	}
+}
+
+static inline void xe_bo_lock_no_vm(struct xe_bo *bo,
+				    struct ww_acquire_ctx *ctx)
+{
+	if (bo) {
+		XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+				     bo->ttm.base.resv != &bo->ttm.base._resv));
+		dma_resv_lock(bo->ttm.base.resv, ctx);
+	}
+}
+
+static inline void xe_bo_unlock_no_vm(struct xe_bo *bo)
+{
+	if (bo) {
+		XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+				     bo->ttm.base.resv != &bo->ttm.base._resv));
+		dma_resv_unlock(bo->ttm.base.resv);
+	}
+}
+
+int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin(struct xe_bo *bo);
+void xe_bo_unpin_external(struct xe_bo *bo);
+void xe_bo_unpin(struct xe_bo *bo);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+
+static inline bool xe_bo_is_pinned(struct xe_bo *bo)
+{
+	return bo->ttm.pin_count;
+}
+
+static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+	if (likely(bo)) {
+		xe_bo_lock_no_vm(bo, NULL);
+		xe_bo_unpin(bo);
+		xe_bo_unlock_no_vm(bo);
+
+		xe_bo_put(bo);
+	}
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+		      size_t page_size, bool *is_lmem);
+
+static inline dma_addr_t
+xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
+{
+	bool is_lmem;
+
+	return xe_bo_addr(bo, 0, page_size, &is_lmem);
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+	XE_BUG_ON(bo->ggtt_node.size > bo->size);
+	XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
+	return bo->ggtt_node.start;
+}
+
+int xe_bo_vmap(struct xe_bo *bo);
+void xe_bo_vunmap(struct xe_bo *bo);
+
+bool mem_type_is_vram(u32 mem_type);
+bool xe_bo_is_vram(struct xe_bo *bo);
+
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
+
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args);
+
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
+
+static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
+{
+	return PAGE_ALIGN(bo->ttm.base.size);
+}
+
+void __xe_bo_release_dummy(struct kref *kref);
+
+/**
+ * xe_bo_put_deferred() - Put a buffer object with delayed final freeing
+ * @bo: The bo to put.
+ * @deferred: List to which to add the buffer object if we cannot put, or
+ * NULL if the function is to put unconditionally.
+ *
+ * Since the final freeing of an object includes both sleeping and (!)
+ * memory allocation in the dma_resv individualization, it's not ok
+ * to put an object from atomic context nor from within a held lock
+ * tainted by reclaim. In such situations we want to defer the final
+ * freeing until we've exited the restricting context, or in the worst
+ * case to a workqueue.
+ * This function either puts the object if possible without the refcount
+ * reaching zero, or adds it to the @deferred list if that was not possible.
+ * The caller needs to follow up with a call to xe_bo_put_commit() to actually
+ * put the bo iff this function returns true. It's safe to always
+ * follow up with a call to xe_bo_put_commit().
+ * TODO: It's TTM that is the villain here. Perhaps TTM should add an
+ * interface like this.
+ *
+ * Return: true if @bo was the first object put on the @freed list,
+ * false otherwise.
+ */
+static inline bool
+xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
+{
+	if (!deferred) {
+		xe_bo_put(bo);
+		return false;
+	}
+
+	if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy))
+		return false;
+
+	return llist_add(&bo->freed, deferred);
+}
+
+void xe_bo_put_commit(struct llist_head *deferred);
+
+struct sg_table *xe_bo_get_sg(struct xe_bo *bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+/**
+ * xe_bo_is_mem_type - Whether the bo currently resides in the given
+ * TTM memory type
+ * @bo: The bo to check.
+ * @mem_type: The TTM memory type.
+ *
+ * Return: true iff the bo resides in @mem_type, false otherwise.
+ */
+static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type)
+{
+	xe_bo_assert_held(bo);
+	return bo->ttm.resource->mem_type == mem_type;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
new file mode 100644
index 0000000000000..f57d440cc95a5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_DOC_H_
+#define _XE_BO_DOC_H_
+
+/**
+ * DOC: Buffer Objects (BO)
+ *
+ * BO management
+ * =============
+ *
+ * TTM manages (placement, eviction, etc...) all BOs in XE.
+ *
+ * BO creation
+ * ===========
+ *
+ * Create a chunk of memory which can be used by the GPU. Placement rules
+ * (sysmem or vram region) passed in upon creation. TTM handles placement of BO
+ * and can trigger eviction of other BOs to make space for the new BO.
+ *
+ * Kernel BOs
+ * ----------
+ *
+ * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC
+ * ADS, etc...) or a BO created as part of a user operation which requires
+ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
+ * are typically mapped in the GGTT (any kernel BOs aside memory for page tables
+ * are in the GGTT), are pinned (can't move or be evicted at runtime), have a
+ * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * memory.
+ *
+ * More details of why kernel BOs are pinned and contiguous below.
+ *
+ * User BOs
+ * --------
+ *
+ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
+ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
+ * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
+ * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * the backing store can be defered from creation time until first use which is
+ * either mmap, bind, or pagefault.
+ *
+ * Private BOs
+ * ~~~~~~~~~~~
+ *
+ * A private BO is a user BO created with a valid VM argument passed into the
+ * create IOCTL. If a BO is private it cannot be exported via prime FD and
+ * mappings can only be created for the BO within the VM it is tied to. Lastly,
+ * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all
+ * private BOs to a VM share common dma-resv slots / lock).
+ *
+ * External BOs
+ * ~~~~~~~~~~~~
+ *
+ * An external BO is a user BO created with a NULL VM argument passed into the
+ * create IOCTL. An external BO can be shared with different UMDs / devices via
+ * prime FD and the BO can be mapped into multiple VMs. An external BO has its
+ * own unique dma-resv slots / lock. An external BO will be in an array of all
+ * VMs which has a mapping of the BO. This allows VMs to lookup and lock all
+ * external BOs mapped in the VM as needed.
+ *
+ * BO placement
+ * ~~~~~~~~~~~~
+ *
+ * When a user BO is created, a mask of valid placements is passed indicating
+ * which memory regions are considered valid.
+ *
+ * The memory region information is available via query uAPI (TODO: add link).
+ *
+ * BO validation
+ * =============
+ *
+ * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid
+ * placement. If a BO was swapped to temporary storage, a validation call will
+ * trigger a move back to a valid (location where GPU can access BO) placement.
+ * Validation of a BO may evict other BOs to make room for the BO being
+ * validated.
+ *
+ * BO eviction / moving
+ * ====================
+ *
+ * All eviction (or in other words, moving a BO from one memory location to
+ * another) is routed through TTM with a callback into XE.
+ *
+ * Runtime eviction
+ * ----------------
+ *
+ * Runtime evictions refers to during normal operations where TTM decides it
+ * needs to move a BO. Typically this is because TTM needs to make room for
+ * another BO and the evicted BO is first BO on LRU list that is not locked.
+ *
+ * An example of this is a new BO which can only be placed in VRAM but there is
+ * not space in VRAM. There could be multiple BOs which have sysmem and VRAM
+ * placement rules which currently reside in VRAM, TTM trigger a will move of
+ * one (or multiple) of these BO(s) until there is room in VRAM to place the new
+ * BO. The evicted BO(s) are valid but still need new bindings before the BO
+ * used again (exec or compute mode rebind worker).
+ *
+ * Another example would be, TTM can't find a BO to evict which has another
+ * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s)
+ * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid
+ * and before next use need to be moved to a valid placement and rebound.
+ *
+ * In both cases, moves of these BOs are scheduled behind the fences in the BO's
+ * dma-resv slots.
+ *
+ * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress
+ * is made on both VMs.
+ *
+ * Runtime eviction uses per a GT migration engine (TODO: link to migration
+ * engine doc) to do a GPU memcpy from one location to another.
+ *
+ * Rebinds after runtime eviction
+ * ------------------------------
+ *
+ * When BOs are moved, every mapping (VMA) of the BO needs to rebound before
+ * the BO is used again. Every VMA is added to an evicted list of its VM when
+ * the BO is moved. This is safe because of the VM locking structure (TODO: link
+ * to VM locking doc). On the next use of a VM (exec or compute mode rebind
+ * worker) the evicted VMA list is checked and rebinds are triggered. In the
+ * case of faulting VM, the rebind is done in the page fault handler.
+ *
+ * Suspend / resume eviction of VRAM
+ * ---------------------------------
+ *
+ * During device suspend / resume VRAM may lose power which means the contents
+ * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of
+ * suspend must be moved to sysmem in order for their contents to be saved.
+ *
+ * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned
+ * (user) BOs to sysmem. External BOs that are pinned need to be manually
+ * evicted with a simple loop + xe_bo_evict call. It gets a little trickier
+ * with kernel BOs.
+ *
+ * Some kernel BOs are used by the GT migration engine to do moves, thus we
+ * can't move all of the BOs via the GT migration engine. For simplity, use a
+ * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume.
+ *
+ * Some kernel BOs need to be restored to the exact same physical location. TTM
+ * makes this rather easy but the caveat is the memory must be contiguous. Again
+ * for simplity, we enforce that all kernel (pinned) BOs are contiguous and
+ * restored to the same physical location.
+ *
+ * Pinned external BOs in VRAM are restored on resume via the GPU.
+ *
+ * Rebinds after suspend / resume
+ * ------------------------------
+ *
+ * Most kernel BOs have GGTT mappings which must be restored during the resume
+ * process. All user BOs are rebound after validation on their next use.
+ *
+ * Future work
+ * ===========
+ *
+ * Trim the list of BOs which is saved / restored via TTM memcpy on suspend /
+ * resume. All we really need to save / restore via TTM memcpy is the memory
+ * required for the GuC to load and the memory for the GT migrate engine to
+ * operate.
+ *
+ * Do not require kernel BOs to be contiguous in physical memory / restored to
+ * the same physical address on resume. In all likelihood the only memory that
+ * needs to be restored to the same physical address is memory used for page
+ * tables. All of that memory is allocated 1 page at time so the contiguous
+ * requirement isn't needed. Some work on the vmap code would need to be done if
+ * kernel BOs are not contiguous too.
+ *
+ * Make some kernel BO evictable rather than pinned. An example of this would be
+ * engine state, in all likelihood if the dma-slots of these BOs where properly
+ * used rather than pinning we could safely evict + rebind these BOs as needed.
+ *
+ * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is
+ * repopulated on resume), add flag to mark such objects as no save / restore.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
new file mode 100644
index 0000000000000..7046dc203138f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+
+/**
+ * xe_bo_evict_all - evict all BOs from VRAM
+ *
+ * @xe: xe device
+ *
+ * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next
+ * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU.
+ * All eviction magic done via TTM calls.
+ *
+ * Evict == move VRAM BOs to temporary (typically system) memory.
+ *
+ * This function should be called before the device goes into a suspend state
+ * where the VRAM loses power.
+ */
+int xe_bo_evict_all(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo;
+	struct xe_gt *gt;
+	struct list_head still_in_list;
+	u32 mem_type;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* User memory */
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			ret = ttm_resource_manager_evict_all(bdev, man);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Pinned user memory in VRAM */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_evict(bo, true);
+		xe_bo_unlock(bo, &ww);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/*
+	 * Wait for all user BO to be evicted as those evictions depend on the
+	 * memory moved below.
+	 */
+	for_each_gt(gt, xe, id)
+		xe_gt_migrate_wait(gt);
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_evict(bo, true);
+		xe_bo_unlock(bo, &ww);
+		xe_bo_put(bo);
+		if (ret)
+			return ret;
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
+ *
+ * This function should be called early, before trying to init the GT, on device
+ * resume.
+ */
+int xe_bo_restore_kernel(struct xe_device *xe)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.evicted,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_validate(bo, NULL, false);
+		xe_bo_unlock(bo, &ww);
+		if (ret) {
+			xe_bo_put(bo);
+			return ret;
+		}
+
+		if (bo->flags & XE_BO_CREATE_GGTT_BIT)
+			xe_ggtt_map_bo(bo->gt->mem.ggtt, bo);
+
+		/*
+		 * We expect validate to trigger a move VRAM and our move code
+		 * should setup the iosys map.
+		 */
+		XE_BUG_ON(iosys_map_is_null(&bo->vmap));
+		XE_BUG_ON(!xe_bo_is_vram(bo));
+
+		xe_bo_put(bo);
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_user - restore pinned user BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move pinned user BOs from temporary (typically system) memory to VRAM via
+ * CPU. All moves done via TTM calls.
+ *
+ * This function should be called late, after GT init, on device resume.
+ */
+int xe_bo_restore_user(struct xe_device *xe)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo;
+	struct xe_gt *gt;
+	struct list_head still_in_list;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Pinned user memory in VRAM should be validated on resume */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		xe_bo_get(bo);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ret = xe_bo_validate(bo, NULL, false);
+		xe_bo_unlock(bo, &ww);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/* Wait for validate to complete */
+	for_each_gt(gt, xe, id)
+		xe_gt_migrate_wait(gt);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
new file mode 100644
index 0000000000000..746894798852d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_EVICT_H_
+#define _XE_BO_EVICT_H_
+
+struct xe_device;
+
+int xe_bo_evict_all(struct xe_device *xe);
+int xe_bo_restore_kernel(struct xe_device *xe);
+int xe_bo_restore_user(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
new file mode 100644
index 0000000000000..06de3330211d2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_TYPES_H_
+#define _XE_BO_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_placement.h>
+
+struct xe_device;
+struct xe_vm;
+
+#define XE_BO_MAX_PLACEMENTS	3
+
+/** @xe_bo: XE buffer object */
+struct xe_bo {
+	/** @ttm: TTM base buffer object */
+	struct ttm_buffer_object ttm;
+	/** @size: Size of this buffer object */
+	size_t size;
+	/** @flags: flags for this buffer object */
+	u32 flags;
+	/** @vm: VM this BO is attached to, for extobj this will be NULL */
+	struct xe_vm *vm;
+	/** @gt: GT this BO is attached to (kernel BO only) */
+	struct xe_gt *gt;
+	/** @vmas: List of VMAs for this BO */
+	struct list_head vmas;
+	/** @placements: valid placements for this BO */
+	struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
+	/** @placement: current placement for this BO */
+	struct ttm_placement placement;
+	/** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
+	struct drm_mm_node ggtt_node;
+	/** @vmap: iosys map of this buffer */
+	struct iosys_map vmap;
+	/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+	struct ttm_bo_kmap_obj kmap;
+	/** @pinned_link: link to present / evicted list of pinned BO */
+	struct list_head pinned_link;
+	/** @props: BO user controlled properties */
+	struct {
+		/** @preferred_mem: preferred memory class for this BO */
+		s16 preferred_mem_class;
+		/** @prefered_gt: preferred GT for this BO */
+		s16 preferred_gt;
+		/** @preferred_mem_type: preferred memory type */
+		s32 preferred_mem_type;
+		/**
+		 * @cpu_atomic: the CPU expects to do atomics operations to
+		 * this BO
+		 */
+		bool cpu_atomic;
+		/**
+		 * @device_atomic: the device expects to do atomics operations
+		 * to this BO
+		 */
+		bool device_atomic;
+	} props;
+	/** @freed: List node for delayed put. */
+	struct llist_node freed;
+	/** @created: Whether the bo has passed initial creation */
+	bool created;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
new file mode 100644
index 0000000000000..84db7b3f501ea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_debugfs.h"
+#include "xe_gt_debugfs.h"
+#include "xe_step.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+#include "xe_bo_evict.h"
+#include "xe_migrate.h"
+#include "xe_vm.h"
+#endif
+
+static struct xe_device *node_to_xe(struct drm_info_node *node)
+{
+	return to_xe_device(node->minor->dev);
+}
+
+static int info(struct seq_file *m, void *data)
+{
+	struct xe_device *xe = node_to_xe(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_gt *gt;
+	u8 id;
+
+	drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
+	drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
+	drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
+		   xe_step_name(xe->info.step.graphics),
+		   xe_step_name(xe->info.step.media),
+		   xe_step_name(xe->info.step.display),
+		   xe_step_name(xe->info.step.basedie));
+	drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx));
+	drm_printf(&p, "platform %d\n", xe->info.platform);
+	drm_printf(&p, "subplatform %d\n",
+		   xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0);
+	drm_printf(&p, "devid 0x%x\n", xe->info.devid);
+	drm_printf(&p, "revid %d\n", xe->info.revid);
+	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
+	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
+	drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc));
+	drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm));
+	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
+	for_each_gt(gt, xe, id) {
+		drm_printf(&p, "gt%d force wake %d\n", id,
+			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
+		drm_printf(&p, "gt%d engine_mask 0x%llx\n", id,
+			   gt->info.engine_mask);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"info", info, 0},
+};
+
+static int forcewake_open(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	return 0;
+}
+
+static int forcewake_release(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	return 0;
+}
+
+static const struct file_operations forcewake_all_fops = {
+	.owner = THIS_MODULE,
+	.open = forcewake_open,
+	.release = forcewake_release,
+};
+
+void xe_debugfs_register(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct drm_minor *minor = xe->drm.primary;
+	struct dentry *root = minor->debugfs_root;
+	struct ttm_resource_manager *man;
+	struct xe_gt *gt;
+	u32 mem_type;
+	u8 id;
+
+	drm_debugfs_create_files(debugfs_list,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	debugfs_create_file("forcewake_all", 0400, root, xe,
+			    &forcewake_all_fops);
+
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			char name[16];
+
+			sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+			ttm_resource_manager_create_debugfs(man, root, name);
+		}
+	}
+
+	man = ttm_manager_type(bdev, XE_PL_TT);
+	ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
+
+	for_each_gt(gt, xe, id)
+		xe_gt_debugfs_register(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h
new file mode 100644
index 0000000000000..715b8e2e0bd9b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEBUGFS_H_
+#define _XE_DEBUGFS_H_
+
+struct xe_device;
+
+void xe_debugfs_register(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
new file mode 100644
index 0000000000000..93dea2b9c4648
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_aperture.h>
+#include <drm/drm_ioctl.h>
+#include <drm/xe_drm.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_atomic_helper.h>
+
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_dma_buf.h"
+#include "xe_drv.h"
+#include "xe_engine.h"
+#include "xe_exec.h"
+#include "xe_gt.h"
+#include "xe_irq.h"
+#include "xe_module.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_vm.h"
+#include "xe_vm_madvise.h"
+#include "xe_wait_user_fence.h"
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_file *xef;
+
+	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+	if (!xef)
+		return -ENOMEM;
+
+	xef->drm = file;
+
+	mutex_init(&xef->vm.lock);
+	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+	mutex_init(&xef->engine.lock);
+	xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1);
+
+	file->driver_priv = xef;
+	return 0;
+}
+
+static void device_kill_persitent_engines(struct xe_device *xe,
+					  struct xe_file *xef);
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = file->driver_priv;
+	struct xe_vm *vm;
+	struct xe_engine *e;
+	unsigned long idx;
+
+	mutex_lock(&xef->engine.lock);
+	xa_for_each(&xef->engine.xa, idx, e) {
+		xe_engine_kill(e);
+		xe_engine_put(e);
+	}
+	mutex_unlock(&xef->engine.lock);
+	mutex_destroy(&xef->engine.lock);
+	device_kill_persitent_engines(xe, xef);
+
+	mutex_lock(&xef->vm.lock);
+	xa_for_each(&xef->vm.xa, idx, vm)
+		xe_vm_close_and_put(vm);
+	mutex_unlock(&xef->vm.lock);
+	mutex_destroy(&xef->vm.lock);
+
+	kfree(xef);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations xe_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release_noglobal,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+//	.compat_ioctl = i915_ioc32_compat_ioctl,
+	.llseek = noop_llseek,
+};
+
+static void xe_driver_release(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
+}
+
+static struct drm_driver driver = {
+	/* Don't use MTRRs here; the Xserver or userspace app should
+	 * deal with them for Intel hardware.
+	 */
+	.driver_features =
+	    DRIVER_GEM |
+	    DRIVER_RENDER | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
+	.open = xe_file_open,
+	.postclose = xe_file_close,
+
+	.gem_prime_import = xe_gem_prime_import,
+
+	.dumb_create = xe_bo_dumb_create,
+	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+	.release = &xe_driver_release,
+
+	.ioctls = xe_ioctls,
+	.num_ioctls = ARRAY_SIZE(xe_ioctls),
+	.fops = &xe_driver_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	destroy_workqueue(xe->ordered_wq);
+	mutex_destroy(&xe->persitent_engines.lock);
+	ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	struct xe_device *xe;
+	int err;
+
+	err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+	if (err)
+		return ERR_PTR(err);
+
+	xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+	if (IS_ERR(xe))
+		return xe;
+
+	err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+			      xe->drm.anon_inode->i_mapping,
+			      xe->drm.vma_offset_manager, false, false);
+	if (WARN_ON(err))
+		goto err_put;
+
+	xe->info.devid = pdev->device;
+	xe->info.revid = pdev->revision;
+	xe->info.enable_guc = enable_guc;
+
+	spin_lock_init(&xe->irq.lock);
+
+	init_waitqueue_head(&xe->ufence_wq);
+
+	mutex_init(&xe->usm.lock);
+	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
+
+	mutex_init(&xe->persitent_engines.lock);
+	INIT_LIST_HEAD(&xe->persitent_engines.list);
+
+	spin_lock_init(&xe->pinned.lock);
+	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+	INIT_LIST_HEAD(&xe->pinned.external_vram);
+	INIT_LIST_HEAD(&xe->pinned.evicted);
+
+	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+
+	mutex_init(&xe->sb_lock);
+	xe->enabled_irq_mask = ~0;
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+	if (err)
+		goto err_put;
+
+	mutex_init(&xe->mem_access.lock);
+	return xe;
+
+err_put:
+	drm_dev_put(&xe->drm);
+
+	return ERR_PTR(err);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	int err;
+	u8 id;
+
+	xe->info.mem_region_mask = 1;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_alloc(xe, gt);
+		if (err)
+			return err;
+	}
+
+	err = xe_mmio_init(xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_probe(gt);
+		if (err)
+			return err;
+	}
+
+	err = xe_irq_install(xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_early(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	err = xe_mmio_probe_vram(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_noalloc(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	err = drm_dev_register(&xe->drm, 0);
+	if (err)
+		goto err_irq_shutdown;
+
+	xe_debugfs_register(xe);
+
+	return 0;
+
+err_irq_shutdown:
+	xe_irq_shutdown(xe);
+	return err;
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+	xe_irq_shutdown(xe);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e)
+{
+	mutex_lock(&xe->persitent_engines.lock);
+	list_add_tail(&e->persitent.link, &xe->persitent_engines.list);
+	mutex_unlock(&xe->persitent_engines.lock);
+}
+
+void xe_device_remove_persitent_engines(struct xe_device *xe,
+					struct xe_engine *e)
+{
+	mutex_lock(&xe->persitent_engines.lock);
+	if (!list_empty(&e->persitent.link))
+		list_del(&e->persitent.link);
+	mutex_unlock(&xe->persitent_engines.lock);
+}
+
+static void device_kill_persitent_engines(struct xe_device *xe,
+					  struct xe_file *xef)
+{
+	struct xe_engine *e, *next;
+
+	mutex_lock(&xe->persitent_engines.lock);
+	list_for_each_entry_safe(e, next, &xe->persitent_engines.list,
+				 persitent.link)
+		if (e->persitent.xef == xef) {
+			xe_engine_kill(e);
+			list_del_init(&e->persitent.link);
+		}
+	mutex_unlock(&xe->persitent_engines.lock);
+}
+
+#define SOFTWARE_FLAGS_SPR33         _MMIO(0x4F084)
+
+void xe_device_wmb(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+
+	wmb();
+	if (IS_DGFX(xe))
+		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+	return xe_device_has_flat_ccs(xe) ?
+		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe)
+{
+	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
+
+	mutex_lock(&xe->mem_access.lock);
+	if (xe->mem_access.ref++ == 0)
+		xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
+	mutex_unlock(&xe->mem_access.lock);
+
+	/* The usage counter increased if device was immediately resumed */
+	if (resumed)
+		xe_pm_runtime_put(xe);
+
+	XE_WARN_ON(xe->mem_access.ref == U32_MAX);
+}
+
+void xe_device_mem_access_put(struct xe_device *xe)
+{
+	mutex_lock(&xe->mem_access.lock);
+	if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm)
+		xe_pm_runtime_put(xe);
+	mutex_unlock(&xe->mem_access.lock);
+
+	XE_WARN_ON(xe->mem_access.ref < 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
new file mode 100644
index 0000000000000..88d55671b0685
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_H_
+#define _XE_DEVICE_H_
+
+struct xe_engine;
+struct xe_file;
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_macros.h"
+#include "xe_force_wake.h"
+
+#include "gt/intel_gpu_commands.h"
+
+static inline struct xe_device *to_xe_device(const struct drm_device *dev)
+{
+	return container_of(dev, struct xe_device, drm);
+}
+
+static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
+{
+	return pci_get_drvdata(pdev);
+}
+
+static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
+{
+	return container_of(ttm, struct xe_device, ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent);
+int xe_device_probe(struct xe_device *xe);
+void xe_device_remove(struct xe_device *xe);
+void xe_device_shutdown(struct xe_device *xe);
+
+void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e);
+void xe_device_remove_persitent_engines(struct xe_device *xe,
+					struct xe_engine *e);
+
+void xe_device_wmb(struct xe_device *xe);
+
+static inline struct xe_file *to_xe_file(const struct drm_file *file)
+{
+	return file->driver_priv;
+}
+
+static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
+{
+	struct xe_gt *gt;
+
+	XE_BUG_ON(gt_id > XE_MAX_GT);
+	gt = xe->gt + gt_id;
+	XE_BUG_ON(gt->info.id != gt_id);
+	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+	return gt;
+}
+
+/*
+ * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function.
+ */
+static inline struct xe_gt *to_gt(struct xe_device *xe)
+{
+	return xe->gt;
+}
+
+static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
+{
+	return xe->info.enable_guc;
+}
+
+static inline void xe_device_guc_submission_disable(struct xe_device *xe)
+{
+	xe->info.enable_guc = false;
+}
+
+#define for_each_gt(gt__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \
+		for_each_if ((gt__) = xe_device_get_gt((xe__), (id__)))
+
+static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
+{
+	return &gt->mmio.fw;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe);
+void xe_device_mem_access_put(struct xe_device *xe);
+
+static inline void xe_device_assert_mem_access(struct xe_device *xe)
+{
+	XE_WARN_ON(!xe->mem_access.ref);
+}
+
+static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
+{
+	bool ret;
+
+	mutex_lock(&xe->mem_access.lock);
+	ret = xe->mem_access.ref;
+	mutex_unlock(&xe->mem_access.lock);
+
+	return ret;
+}
+
+static inline bool xe_device_in_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_fault_mode != 0;
+}
+
+static inline bool xe_device_in_non_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_non_fault_mode != 0;
+}
+
+static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
+{
+	return xe->info.has_flat_ccs;
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
new file mode 100644
index 0000000000000..d62ee85bfcbe0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_TYPES_H_
+#define _XE_DEVICE_TYPES_H_
+
+#include <linux/pci.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
+
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_step_types.h"
+
+#define XE_BO_INVALID_OFFSET	LONG_MAX
+
+#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
+#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100)
+#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
+#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
+#define IS_DGFX(xe) ((xe)->info.is_dgfx)
+
+#define XE_VRAM_FLAGS_NEED64K		BIT(0)
+
+#define XE_GT0		0
+#define XE_GT1		1
+#define XE_MAX_GT	(XE_GT1 + 1)
+
+#define XE_MAX_ASID	(BIT(20))
+
+#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&			\
+	 (_xe)->info.step.graphics >= (min_step) &&		\
+	 (_xe)->info.step.graphics < (max_step))
+#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&				\
+	 (_xe)->info.subplatform == (sub) &&				\
+	 (_xe)->info.step.graphics >= (min_step) &&			\
+	 (_xe)->info.step.graphics < (max_step))
+
+/**
+ * struct xe_device - Top level struct of XE device
+ */
+struct xe_device {
+	/** @drm: drm device */
+	struct drm_device drm;
+
+	/** @info: device info */
+	struct intel_device_info {
+		/** @graphics_verx100: graphics IP version */
+		u32 graphics_verx100;
+		/** @media_verx100: media IP version */
+		u32 media_verx100;
+		/** @mem_region_mask: mask of valid memory regions */
+		u32 mem_region_mask;
+		/** @is_dgfx: is discrete device */
+		bool is_dgfx;
+		/** @platform: XE platform enum */
+		enum xe_platform platform;
+		/** @subplatform: XE subplatform enum */
+		enum xe_subplatform subplatform;
+		/** @devid: device ID */
+		u16 devid;
+		/** @revid: device revision */
+		u8 revid;
+		/** @step: stepping information for each IP */
+		struct xe_step_info step;
+		/** @dma_mask_size: DMA address bits */
+		u8 dma_mask_size;
+		/** @vram_flags: Vram flags */
+		u8 vram_flags;
+		/** @tile_count: Number of tiles */
+		u8 tile_count;
+		/** @vm_max_level: Max VM level */
+		u8 vm_max_level;
+		/** @media_ver: Media version */
+		u8 media_ver;
+		/** @supports_usm: Supports unified shared memory */
+		bool supports_usm;
+		/** @enable_guc: GuC submission enabled */
+		bool enable_guc;
+		/** @has_flat_ccs: Whether flat CCS metadata is used */
+		bool has_flat_ccs;
+		/** @has_4tile: Whether tile-4 tiling is supported */
+		bool has_4tile;
+	} info;
+
+	/** @irq: device interrupt state */
+	struct {
+		/** @lock: lock for processing irq's on this device */
+		spinlock_t lock;
+
+		/** @enabled: interrupts enabled on this device */
+		bool enabled;
+	} irq;
+
+	/** @ttm: ttm device */
+	struct ttm_device ttm;
+
+	/** @mmio: mmio info for device */
+	struct {
+		/** @size: size of MMIO space for device */
+		size_t size;
+		/** @regs: pointer to MMIO space for device */
+		void *regs;
+	} mmio;
+
+	/** @mem: memory info for device */
+	struct {
+		/** @vram: VRAM info for device */
+		struct {
+			/** @io_start: start address of VRAM */
+			resource_size_t io_start;
+			/** @size: size of VRAM */
+			resource_size_t size;
+			/** @mapping: pointer to VRAM mappable space */
+			void *__iomem mapping;
+		} vram;
+	} mem;
+
+	/** @usm: unified memory state */
+	struct {
+		/** @asid: convert a ASID to VM */
+		struct xarray asid_to_vm;
+		/** @next_asid: next ASID, used to cyclical alloc asids */
+		u32 next_asid;
+		/** @num_vm_in_fault_mode: number of VM in fault mode */
+		u32 num_vm_in_fault_mode;
+		/** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+		u32 num_vm_in_non_fault_mode;
+		/** @lock: protects UM state */
+		struct mutex lock;
+	} usm;
+
+	/** @persitent_engines: engines that are closed but still running */
+	struct {
+		/** @lock: protects persitent engines */
+		struct mutex lock;
+		/** @list: list of persitent engines */
+		struct list_head list;
+	} persitent_engines;
+
+	/** @pinned: pinned BO state */
+	struct {
+		/** @lock: protected pinned BO list state */
+		spinlock_t lock;
+		/** @evicted: pinned kernel BO that are present */
+		struct list_head kernel_bo_present;
+		/** @evicted: pinned BO that have been evicted */
+		struct list_head evicted;
+		/** @external_vram: pinned external BO in vram*/
+		struct list_head external_vram;
+	} pinned;
+
+	/** @ufence_wq: user fence wait queue */
+	wait_queue_head_t ufence_wq;
+
+	/** @ordered_wq: used to serialize compute mode resume */
+	struct workqueue_struct *ordered_wq;
+
+	/** @gt: graphics tile */
+	struct xe_gt gt[XE_MAX_GT];
+
+	/**
+	 * @mem_access: keep track of memory access in the device, possibly
+	 * triggering additional actions when they occur.
+	 */
+	struct {
+		/** @lock: protect the ref count */
+		struct mutex lock;
+		/** @ref: ref count of memory accesses */
+		u32 ref;
+		/** @hold_rpm: need to put rpm ref back at the end */
+		bool hold_rpm;
+	} mem_access;
+
+	/** @d3cold_allowed: Indicates if d3cold is a valid device state */
+	bool d3cold_allowed;
+
+	/* For pcode */
+	struct mutex sb_lock;
+
+	u32 enabled_irq_mask;
+};
+
+/**
+ * struct xe_file - file handle for XE driver
+ */
+struct xe_file {
+	/** @drm: base DRM file */
+	struct drm_file *drm;
+
+	/** @vm: VM state for file */
+	struct {
+		/** @xe: xarray to store VMs */
+		struct xarray xa;
+		/** @lock: protects file VM state */
+		struct mutex lock;
+	} vm;
+
+	/** @engine: Submission engine state for file */
+	struct {
+		/** @xe: xarray to store engines */
+		struct xarray xa;
+		/** @lock: protects file engine state */
+		struct mutex lock;
+	} engine;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
new file mode 100644
index 0000000000000..d09ff25bd9409
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_prime.h>
+
+#include <drm/ttm/ttm_tt.h>
+
+#include <kunit/test.h>
+#include <linux/pci-p2pdma.h>
+
+#include "tests/xe_test.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_vm.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static int xe_dma_buf_attach(struct dma_buf *dmabuf,
+			     struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	if (attach->peer2peer &&
+	    pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0)
+		attach->peer2peer = false;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
+		return -EOPNOTSUPP;
+
+	xe_device_mem_access_get(to_xe_device(obj->dev));
+	return 0;
+}
+
+static void xe_dma_buf_detach(struct dma_buf *dmabuf,
+			      struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	xe_device_mem_access_put(to_xe_device(obj->dev));
+}
+
+static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	/*
+	 * Migrate to TT first to increase the chance of non-p2p clients
+	 * can attach.
+	 */
+	(void)xe_bo_migrate(bo, XE_PL_TT);
+	xe_bo_pin_external(bo);
+
+	return 0;
+}
+
+static void xe_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	xe_bo_unpin_external(bo);
+}
+
+static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
+				       enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct sg_table *sgt;
+	int r = 0;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (!xe_bo_is_pinned(bo)) {
+		if (!attach->peer2peer ||
+		    bo->ttm.resource->mem_type == XE_PL_SYSTEM) {
+			if (xe_bo_can_migrate(bo, XE_PL_TT))
+				r = xe_bo_migrate(bo, XE_PL_TT);
+			else
+				r = xe_bo_validate(bo, NULL, false);
+		}
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	switch (bo->ttm.resource->mem_type) {
+	case XE_PL_TT:
+		sgt = drm_prime_pages_to_sg(obj->dev,
+					    bo->ttm.ttm->pages,
+					    bo->ttm.ttm->num_pages);
+		if (IS_ERR(sgt))
+			return sgt;
+
+		if (dma_map_sgtable(attach->dev, sgt, dir,
+				    DMA_ATTR_SKIP_CPU_SYNC))
+			goto error_free;
+		break;
+
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo),
+					      bo->ttm.resource, 0,
+					      bo->ttm.base.size, attach->dev,
+					      dir, &sgt);
+		if (r)
+			return ERR_PTR(r);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sgt;
+
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-EBUSY);
+}
+
+static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
+			     struct sg_table *sgt,
+			     enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
+
+	if (!xe_bo_is_vram(bo)) {
+		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+		sg_free_table(sgt);
+		kfree(sgt);
+	} else {
+		xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt);
+	}
+}
+
+static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+				       enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	bool reads =  (direction == DMA_BIDIRECTIONAL ||
+		       direction == DMA_FROM_DEVICE);
+
+	if (!reads)
+		return 0;
+
+	xe_bo_lock_no_vm(bo, NULL);
+	(void)xe_bo_migrate(bo, XE_PL_TT);
+	xe_bo_unlock_no_vm(bo);
+
+	return 0;
+}
+
+const struct dma_buf_ops xe_dmabuf_ops = {
+	.attach = xe_dma_buf_attach,
+	.detach = xe_dma_buf_detach,
+	.pin = xe_dma_buf_pin,
+	.unpin = xe_dma_buf_unpin,
+	.map_dma_buf = xe_dma_buf_map,
+	.unmap_dma_buf = xe_dma_buf_unmap,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = xe_dma_buf_begin_cpu_access,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct dma_buf *buf;
+
+	if (bo->vm)
+		return ERR_PTR(-EPERM);
+
+	buf = drm_gem_prime_export(obj, flags);
+	if (!IS_ERR(buf))
+		buf->ops = &xe_dmabuf_ops;
+
+	return buf;
+}
+
+static struct drm_gem_object *
+xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
+		    struct dma_buf *dma_buf)
+{
+	struct dma_resv *resv = dma_buf->resv;
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	int ret;
+
+	dma_resv_lock(resv, NULL);
+	bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
+				   ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto error;
+	}
+	dma_resv_unlock(resv);
+
+	return &bo->ttm.base;
+
+error:
+	dma_resv_unlock(resv);
+	return ERR_PTR(ret);
+}
+
+static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	XE_WARN_ON(xe_bo_evict(bo, false));
+}
+
+static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
+	.allow_peer2peer = true,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+
+struct dma_buf_test_params {
+	struct xe_test_priv base;
+	const struct dma_buf_attach_ops *attach_ops;
+	bool force_different_devices;
+	u32 mem_mask;
+};
+
+#define to_dma_buf_test_params(_priv) \
+	container_of(_priv, struct dma_buf_test_params, base)
+#endif
+
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf)
+{
+	XE_TEST_DECLARE(struct dma_buf_test_params *test =
+			to_dma_buf_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));)
+	const struct dma_buf_attach_ops *attach_ops;
+	struct dma_buf_attachment *attach;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+
+	if (dma_buf->ops == &xe_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev &&
+		    !XE_TEST_ONLY(test && test->force_different_devices)) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	/*
+	 * Don't publish the bo until we have a valid attachment, and a
+	 * valid attachment needs the bo address. So pre-create a bo before
+	 * creating the attachment and publish.
+	 */
+	bo = xe_bo_alloc();
+	if (IS_ERR(bo))
+		return ERR_CAST(bo);
+
+	attach_ops = &xe_dma_buf_attach_ops;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	if (test)
+		attach_ops = test->attach_ops;
+#endif
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+	if (IS_ERR(attach)) {
+		obj = ERR_CAST(attach);
+		goto out_err;
+	}
+
+	/* Errors here will take care of freeing the bo. */
+	obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
+
+out_err:
+	xe_bo_free(bo);
+
+	return obj;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_dma_buf.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h
new file mode 100644
index 0000000000000..861dd28a862c7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_H_
+#define _XE_DMA_BUF_H_
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags);
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
new file mode 100644
index 0000000000000..0377e5e4e35f4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DRV_H_
+#define _XE_DRV_H_
+
+#include <drm/drm_drv.h>
+
+#define DRIVER_NAME		"xe"
+#define DRIVER_DESC		"Intel Xe Graphics"
+#define DRIVER_DATE		"20201103"
+#define DRIVER_TIMESTAMP	1604406085
+
+/* Interface history:
+ *
+ * 1.1: Original.
+ */
+#define DRIVER_MAJOR		1
+#define DRIVER_MINOR		1
+#define DRIVER_PATCHLEVEL	0
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
new file mode 100644
index 0000000000000..63219bd98be7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_engine.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+#include <linux/nospec.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct xe_engine *__xe_engine_create(struct xe_device *xe,
+					    struct xe_vm *vm,
+					    u32 logical_mask,
+					    u16 width, struct xe_hw_engine *hwe,
+					    u32 flags)
+{
+	struct xe_engine *e;
+	struct xe_gt *gt = hwe->gt;
+	int err;
+	int i;
+
+	e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&e->refcount);
+	e->flags = flags;
+	e->hwe = hwe;
+	e->gt = gt;
+	if (vm)
+		e->vm = xe_vm_get(vm);
+	e->class = hwe->class;
+	e->width = width;
+	e->logical_mask = logical_mask;
+	e->fence_irq = &gt->fence_irq[hwe->class];
+	e->ring_ops = gt->ring_ops[hwe->class];
+	e->ops = gt->engine_ops;
+	INIT_LIST_HEAD(&e->persitent.link);
+	INIT_LIST_HEAD(&e->compute.link);
+	INIT_LIST_HEAD(&e->multi_gt_link);
+
+	/* FIXME: Wire up to configurable default value */
+	e->sched_props.timeslice_us = 1 * 1000;
+	e->sched_props.preempt_timeout_us = 640 * 1000;
+
+	if (xe_engine_is_parallel(e)) {
+		e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
+		e->parallel.composite_fence_seqno = 1;
+	}
+	if (e->flags & ENGINE_FLAG_VM) {
+		e->bind.fence_ctx = dma_fence_context_alloc(1);
+		e->bind.fence_seqno = 1;
+	}
+
+	for (i = 0; i < width; ++i) {
+		err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K);
+		if (err)
+			goto err_lrc;
+	}
+
+	err = e->ops->init(e);
+	if (err)
+		goto err_lrc;
+
+	return e;
+
+err_lrc:
+	for (i = i - 1; i >= 0; --i)
+		xe_lrc_finish(e->lrc + i);
+	kfree(e);
+	return ERR_PTR(err);
+}
+
+struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
+				   u32 logical_mask, u16 width,
+				   struct xe_hw_engine *hwe, u32 flags)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_engine *e;
+	int err;
+
+	if (vm) {
+		err = xe_vm_lock(vm, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
+	}
+	e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags);
+	if (vm)
+		xe_vm_unlock(vm, &ww);
+
+	return e;
+}
+
+struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
+					 struct xe_vm *vm,
+					 enum xe_engine_class class, u32 flags)
+{
+	struct xe_hw_engine *hwe, *hwe0 = NULL;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class == class) {
+			logical_mask |= BIT(hwe->logical_instance);
+			if (!hwe0)
+				hwe0 = hwe;
+		}
+	}
+
+	if (!logical_mask)
+		return ERR_PTR(-ENODEV);
+
+	return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags);
+}
+
+void xe_engine_destroy(struct kref *ref)
+{
+	struct xe_engine *e = container_of(ref, struct xe_engine, refcount);
+	struct xe_engine *engine, *next;
+
+	if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) {
+		list_for_each_entry_safe(engine, next, &e->multi_gt_list,
+					 multi_gt_link)
+			xe_engine_put(engine);
+	}
+
+	e->ops->fini(e);
+}
+
+void xe_engine_fini(struct xe_engine *e)
+{
+	int i;
+
+	for (i = 0; i < e->width; ++i)
+		xe_lrc_finish(e->lrc + i);
+	if (e->vm)
+		xe_vm_put(e->vm);
+
+	kfree(e);
+}
+
+struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_engine *e;
+
+	mutex_lock(&xef->engine.lock);
+	e = xa_load(&xef->engine.xa, id);
+	mutex_unlock(&xef->engine.lock);
+
+	if (e)
+		xe_engine_get(e);
+
+	return e;
+}
+
+static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
+			       u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH &&
+			 !capable(CAP_SYS_NICE)))
+		return -EPERM;
+
+	return e->ops->set_priority(e, value);
+}
+
+static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e,
+				u64 value, bool create)
+{
+	if (!capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return e->ops->set_timeslice(e, value);
+}
+
+static int engine_set_preemption_timeout(struct xe_device *xe,
+					 struct xe_engine *e, u64 value,
+					 bool create)
+{
+	if (!capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return e->ops->set_preempt_timeout(e, value);
+}
+
+static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
+				   u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM))
+		return -EINVAL;
+
+	if (value) {
+		struct xe_vm *vm = e->vm;
+		int err;
+
+		if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm)))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm)))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, e->width != 1))
+			return -EINVAL;
+
+		e->compute.context = dma_fence_context_alloc(1);
+		spin_lock_init(&e->compute.lock);
+
+		err = xe_vm_add_compute_engine(vm, e);
+		if (XE_IOCTL_ERR(xe, err))
+			return err;
+
+		e->flags |= ENGINE_FLAG_COMPUTE_MODE;
+		e->flags &= ~ENGINE_FLAG_PERSISTENT;
+	}
+
+	return 0;
+}
+
+static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
+				  u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+		return -EINVAL;
+
+	if (value)
+		e->flags |= ENGINE_FLAG_PERSISTENT;
+	else
+		e->flags &= ~ENGINE_FLAG_PERSISTENT;
+
+	return 0;
+}
+
+static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
+				  u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return e->ops->set_job_timeout(e, value);
+}
+
+static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
+				  u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+		return -EINVAL;
+
+	e->usm.acc_trigger = value;
+
+	return 0;
+}
+
+static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
+				 u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+		return -EINVAL;
+
+	e->usm.acc_notify = value;
+
+	return 0;
+}
+
+static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
+				      u64 value, bool create)
+{
+	if (XE_IOCTL_ERR(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+		return -EINVAL;
+
+	e->usm.acc_granularity = value;
+
+	return 0;
+}
+
+typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
+					 struct xe_engine *e,
+					 u64 value, bool create);
+
+static const xe_engine_set_property_fn engine_set_property_funcs[] = {
+	[XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority,
+	[XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice,
+	[XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
+	[XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
+	[XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence,
+	[XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
+	[XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
+	[XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
+	[XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
+};
+
+static int engine_user_ext_set_property(struct xe_device *xe,
+					struct xe_engine *e,
+					u64 extension,
+					bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_engine_set_property ext;
+	int err;
+	u32 idx;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.property >=
+			 ARRAY_SIZE(engine_set_property_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
+	return engine_set_property_funcs[idx](xe, e, ext.value,  create);
+}
+
+typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe,
+					   struct xe_engine *e,
+					   u64 extension,
+					   bool create);
+
+static const xe_engine_set_property_fn engine_user_extension_funcs[] = {
+	[XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
+				  u64 extensions, int ext_number, bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct xe_user_extension ext;
+	int err;
+	u32 idx;
+
+	if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.name >=
+			 ARRAY_SIZE(engine_user_extension_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.name,
+				 ARRAY_SIZE(engine_user_extension_funcs));
+	err = engine_user_extension_funcs[idx](xe, e, extensions, create);
+	if (XE_IOCTL_ERR(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return engine_user_extensions(xe, e, ext.next_extension,
+					      ++ext_number, create);
+
+	return 0;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static struct xe_hw_engine *
+find_hw_engine(struct xe_device *xe,
+	       struct drm_xe_engine_class_instance eci)
+{
+	u32 idx;
+
+	if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+		return NULL;
+
+	if (eci.gt_id >= xe->info.tile_count)
+		return NULL;
+
+	idx = array_index_nospec(eci.engine_class,
+				 ARRAY_SIZE(user_to_xe_engine_class));
+
+	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
+			       user_to_xe_engine_class[idx],
+			       eci.engine_instance, true);
+}
+
+static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				    struct drm_xe_engine_class_instance *eci,
+				    u16 width, u16 num_placements)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	if (XE_IOCTL_ERR(xe, width != 1))
+		return 0;
+	if (XE_IOCTL_ERR(xe, num_placements != 1))
+		return 0;
+	if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0))
+		return 0;
+
+	eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class ==
+		    user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
+static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				      struct drm_xe_engine_class_instance *eci,
+				      u16 width, u16 num_placements)
+{
+	int len = width * num_placements;
+	int i, j, n;
+	u16 class;
+	u16 gt_id;
+	u32 return_mask = 0, prev_mask;
+
+	if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) &&
+			 len > 1))
+		return 0;
+
+	for (i = 0; i < width; ++i) {
+		u32 current_mask = 0;
+
+		for (j = 0; j < num_placements; ++j) {
+			struct xe_hw_engine *hwe;
+
+			n = j * width + i;
+
+			hwe = find_hw_engine(xe, eci[n]);
+			if (XE_IOCTL_ERR(xe, !hwe))
+				return 0;
+
+			if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe)))
+				return 0;
+
+			if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) ||
+			    XE_IOCTL_ERR(xe, n && eci[n].engine_class != class))
+				return 0;
+
+			class = eci[n].engine_class;
+			gt_id = eci[n].gt_id;
+
+			if (width == 1 || !i)
+				return_mask |= BIT(eci[n].engine_instance);
+			current_mask |= BIT(eci[n].engine_instance);
+		}
+
+		/* Parallel submissions must be logically contiguous */
+		if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1))
+			return 0;
+
+		prev_mask = current_mask;
+	}
+
+	return return_mask;
+}
+
+int xe_engine_create_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_create *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_hw_engine *hwe;
+	struct xe_vm *vm, *migrate_vm;
+	struct xe_gt *gt;
+	struct xe_engine *e = NULL;
+	u32 logical_mask;
+	u32 id;
+	int len;
+	int err;
+
+	if (XE_IOCTL_ERR(xe, args->flags))
+		return -EINVAL;
+
+	len = args->width * args->num_placements;
+	if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+		return -EINVAL;
+
+	err = __copy_from_user(eci, user_eci,
+			       sizeof(struct drm_xe_engine_class_instance) *
+			       len);
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
+	       return -EINVAL;
+
+	xe_pm_runtime_get(xe);
+
+	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+		for_each_gt(gt, xe, id) {
+			struct xe_engine *new;
+
+			if (xe_gt_is_media_type(gt))
+				continue;
+
+			eci[0].gt_id = gt->info.id;
+			logical_mask = bind_engine_logical_mask(xe, gt, eci,
+								args->width,
+								args->num_placements);
+			if (XE_IOCTL_ERR(xe, !logical_mask)) {
+				err = -EINVAL;
+				goto put_rpm;
+			}
+
+			hwe = find_hw_engine(xe, eci[0]);
+			if (XE_IOCTL_ERR(xe, !hwe)) {
+				err = -EINVAL;
+				goto put_rpm;
+			}
+
+			migrate_vm = xe_migrate_get_vm(gt->migrate);
+			new = xe_engine_create(xe, migrate_vm, logical_mask,
+					       args->width, hwe,
+					       ENGINE_FLAG_PERSISTENT |
+					       ENGINE_FLAG_VM |
+					       (id ?
+					       ENGINE_FLAG_BIND_ENGINE_CHILD :
+					       0));
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(new)) {
+				err = PTR_ERR(new);
+				if (e)
+					goto put_engine;
+				goto put_rpm;
+			}
+			if (id == 0)
+				e = new;
+			else
+				list_add_tail(&new->multi_gt_list,
+					      &e->multi_gt_link);
+		}
+	} else {
+		gt = xe_device_get_gt(xe, eci[0].gt_id);
+		logical_mask = calc_validate_logical_mask(xe, gt, eci,
+							  args->width,
+							  args->num_placements);
+		if (XE_IOCTL_ERR(xe, !logical_mask)) {
+			err = -EINVAL;
+			goto put_rpm;
+		}
+
+		hwe = find_hw_engine(xe, eci[0]);
+		if (XE_IOCTL_ERR(xe, !hwe)) {
+			err = -EINVAL;
+			goto put_rpm;
+		}
+
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_ERR(xe, !vm)) {
+			err = -ENOENT;
+			goto put_rpm;
+		}
+
+		e = xe_engine_create(xe, vm, logical_mask,
+				     args->width, hwe, ENGINE_FLAG_PERSISTENT);
+		xe_vm_put(vm);
+		if (IS_ERR(e)) {
+			err = PTR_ERR(e);
+			goto put_rpm;
+		}
+	}
+
+	if (args->extensions) {
+		err = engine_user_extensions(xe, e, args->extensions, 0, true);
+		if (XE_IOCTL_ERR(xe, err))
+			goto put_engine;
+	}
+
+	if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
+			 !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
+		err = -ENOTSUPP;
+		goto put_engine;
+	}
+
+	e->persitent.xef = xef;
+
+	mutex_lock(&xef->engine.lock);
+	err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->engine.lock);
+	if (err)
+		goto put_engine;
+
+	args->engine_id = id;
+
+	return 0;
+
+put_engine:
+	xe_engine_kill(e);
+	xe_engine_put(e);
+put_rpm:
+	xe_pm_runtime_put(xe);
+	return err;
+}
+
+static void engine_kill_compute(struct xe_engine *e)
+{
+	if (!xe_vm_in_compute_mode(e->vm))
+		return;
+
+	down_write(&e->vm->lock);
+	list_del(&e->compute.link);
+	--e->vm->preempt.num_engines;
+	if (e->compute.pfence) {
+		dma_fence_enable_sw_signaling(e->compute.pfence);
+		dma_fence_put(e->compute.pfence);
+		e->compute.pfence = NULL;
+	}
+	up_write(&e->vm->lock);
+}
+
+void xe_engine_kill(struct xe_engine *e)
+{
+	struct xe_engine *engine = e, *next;
+
+	list_for_each_entry_safe(engine, next, &engine->multi_gt_list,
+				 multi_gt_link) {
+		e->ops->kill(engine);
+		engine_kill_compute(engine);
+	}
+
+	e->ops->kill(e);
+	engine_kill_compute(e);
+}
+
+int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_destroy *args = data;
+	struct xe_engine *e;
+
+	if (XE_IOCTL_ERR(xe, args->pad))
+		return -EINVAL;
+
+	mutex_lock(&xef->engine.lock);
+	e = xa_erase(&xef->engine.xa, args->engine_id);
+	mutex_unlock(&xef->engine.lock);
+	if (XE_IOCTL_ERR(xe, !e))
+		return -ENOENT;
+
+	if (!(e->flags & ENGINE_FLAG_PERSISTENT))
+		xe_engine_kill(e);
+	else
+		xe_device_add_persitent_engines(xe, e);
+
+	trace_xe_engine_close(e);
+	xe_engine_put(e);
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_set_property *args = data;
+	struct xe_engine *e;
+	int ret;
+	u32 idx;
+
+	e = xe_engine_lookup(xef, args->engine_id);
+	if (XE_IOCTL_ERR(xe, !e))
+		return -ENOENT;
+
+	if (XE_IOCTL_ERR(xe, args->property >=
+			 ARRAY_SIZE(engine_set_property_funcs))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	idx = array_index_nospec(args->property,
+				 ARRAY_SIZE(engine_set_property_funcs));
+	ret = engine_set_property_funcs[idx](xe, e, args->value, false);
+	if (XE_IOCTL_ERR(xe, ret))
+		goto out;
+
+	if (args->extensions)
+		ret = engine_user_extensions(xe, e, args->extensions, 0,
+					     false);
+out:
+	xe_engine_put(e);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
new file mode 100644
index 0000000000000..4d1b609fea7e0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_H_
+#define _XE_ENGINE_H_
+
+#include "xe_engine_types.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+struct xe_file;
+
+struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
+				   u32 logical_mask, u16 width,
+				   struct xe_hw_engine *hw_engine, u32 flags);
+struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
+					 struct xe_vm *vm,
+					 enum xe_engine_class class, u32 flags);
+
+void xe_engine_fini(struct xe_engine *e);
+void xe_engine_destroy(struct kref *ref);
+
+struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id);
+
+static inline struct xe_engine *xe_engine_get(struct xe_engine *engine)
+{
+	kref_get(&engine->refcount);
+	return engine;
+}
+
+static inline void xe_engine_put(struct xe_engine *engine)
+{
+	kref_put(&engine->refcount, xe_engine_destroy);
+}
+
+static inline bool xe_engine_is_parallel(struct xe_engine *engine)
+{
+	return engine->width > 1;
+}
+
+void xe_engine_kill(struct xe_engine *e);
+
+int xe_engine_create_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
new file mode 100644
index 0000000000000..3dfa1c14e181a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_TYPES_H_
+#define _XE_ENGINE_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_lrc_types.h"
+
+struct xe_execlist_engine;
+struct xe_gt;
+struct xe_guc_engine;
+struct xe_hw_engine;
+struct xe_vm;
+
+enum xe_engine_priority {
+	XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */
+	XE_ENGINE_PRIORITY_LOW = 0,
+	XE_ENGINE_PRIORITY_NORMAL,
+	XE_ENGINE_PRIORITY_HIGH,
+	XE_ENGINE_PRIORITY_KERNEL,
+
+	XE_ENGINE_PRIORITY_COUNT
+};
+
+/**
+ * struct xe_engine - Submission engine
+ *
+ * Contains all state necessary for submissions. Can either be a user object or
+ * a kernel object.
+ */
+struct xe_engine {
+	/** @gt: graphics tile this engine can submit to */
+	struct xe_gt *gt;
+	/**
+	 * @hwe: A hardware of the same class. May (physical engine) or may not
+	 * (virtual engine) be where jobs actual engine up running. Should never
+	 * really be used for submissions.
+	 */
+	struct xe_hw_engine *hwe;
+	/** @refcount: ref count of this engine */
+	struct kref refcount;
+	/** @vm: VM (address space) for this engine */
+	struct xe_vm *vm;
+	/** @class: class of this engine */
+	enum xe_engine_class class;
+	/** @priority: priority of this exec queue */
+	enum xe_engine_priority priority;
+	/**
+	 * @logical_mask: logical mask of where job submitted to engine can run
+	 */
+	u32 logical_mask;
+	/** @name: name of this engine */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @width: width (number BB submitted per exec) of this engine */
+	u16 width;
+	/** @fence_irq: fence IRQ used to signal job completion */
+	struct xe_hw_fence_irq *fence_irq;
+
+#define ENGINE_FLAG_BANNED		BIT(0)
+#define ENGINE_FLAG_KERNEL		BIT(1)
+#define ENGINE_FLAG_PERSISTENT		BIT(2)
+#define ENGINE_FLAG_COMPUTE_MODE	BIT(3)
+#define ENGINE_FLAG_VM			BIT(4)
+#define ENGINE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+#define ENGINE_FLAG_WA			BIT(6)
+
+	/**
+	 * @flags: flags for this engine, should statically setup aside from ban
+	 * bit
+	 */
+	unsigned long flags;
+
+	union {
+		/** @multi_gt_list: list head for VM bind engines if multi-GT */
+		struct list_head multi_gt_list;
+		/** @multi_gt_link: link for VM bind engines if multi-GT */
+		struct list_head multi_gt_link;
+	};
+
+	union {
+		/** @execlist: execlist backend specific state for engine */
+		struct xe_execlist_engine *execlist;
+		/** @guc: GuC backend specific state for engine */
+		struct xe_guc_engine *guc;
+	};
+
+	/**
+	 * @persitent: persitent engine state
+	 */
+	struct {
+		/** @xef: file which this engine belongs to */
+		struct xe_file *xef;
+		/** @link: link in list of persitent engines */
+		struct list_head link;
+	} persitent;
+
+	union {
+		/**
+		 * @parallel: parallel submission state
+		 */
+		struct {
+			/** @composite_fence_ctx: context composite fence */
+			u64 composite_fence_ctx;
+			/** @composite_fence_seqno: seqno for composite fence */
+			u32 composite_fence_seqno;
+		} parallel;
+		/**
+		 * @bind: bind submission state
+		 */
+		struct {
+			/** @fence_ctx: context bind fence */
+			u64 fence_ctx;
+			/** @fence_seqno: seqno for bind fence */
+			u32 fence_seqno;
+		} bind;
+	};
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @compute: compute engine state */
+	struct {
+		/** @pfence: preemption fence */
+		struct dma_fence *pfence;
+		/** @context: preemption fence context */
+		u64 context;
+		/** @seqno: preemption fence seqno */
+		u32 seqno;
+		/** @link: link into VM's list of engines */
+		struct list_head link;
+		/** @lock: preemption fences lock */
+		spinlock_t lock;
+	} compute;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @acc_trigger: access counter trigger */
+		u32 acc_trigger;
+		/** @acc_notify: access counter notify */
+		u32 acc_notify;
+		/** @acc_granularity: access counter granularity */
+		u32 acc_granularity;
+	} usm;
+
+	/** @ops: submission backend engine operations */
+	const struct xe_engine_ops *ops;
+
+	/** @ring_ops: ring operations for this engine */
+	const struct xe_ring_ops *ring_ops;
+	/** @entity: DRM sched entity for this engine (1 to 1 relationship) */
+	struct drm_sched_entity *entity;
+	/** @lrc: logical ring context for this engine */
+	struct xe_lrc lrc[0];
+};
+
+/**
+ * struct xe_engine_ops - Submission backend engine operations
+ */
+struct xe_engine_ops {
+	/** @init: Initialize engine for submission backend */
+	int (*init)(struct xe_engine *e);
+	/** @kill: Kill inflight submissions for backend */
+	void (*kill)(struct xe_engine *e);
+	/** @fini: Fini engine for submission backend */
+	void (*fini)(struct xe_engine *e);
+	/** @set_priority: Set priority for engine */
+	int (*set_priority)(struct xe_engine *e,
+			    enum xe_engine_priority priority);
+	/** @set_timeslice: Set timeslice for engine */
+	int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us);
+	/** @set_preempt_timeout: Set preemption timeout for engine */
+	int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us);
+	/** @set_job_timeout: Set job timeout for engine */
+	int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms);
+	/**
+	 * @suspend: Suspend engine from executing, allowed to be called
+	 * multiple times in a row before resume with the caveat that
+	 * suspend_wait returns before calling suspend again.
+	 */
+	int (*suspend)(struct xe_engine *e);
+	/**
+	 * @suspend_wait: Wait for an engine to suspend executing, should be
+	 * call after suspend.
+	 */
+	void (*suspend_wait)(struct xe_engine *e);
+	/**
+	 * @resume: Resume engine execution, engine must be in a suspended
+	 * state and dma fence returned from most recent suspend call must be
+	 * signalled when this function is called.
+	 */
+	void (*resume)(struct xe_engine *e);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
new file mode 100644
index 0000000000000..00f298acc4366
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_exec.h"
+#include "xe_macros.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Execbuf (User GPU command submission)
+ *
+ * Execs have historically been rather complicated in DRM drivers (at least in
+ * the i915) because a few things:
+ *
+ * - Passing in a list BO which are read / written to creating implicit syncs
+ * - Binding at exec time
+ * - Flow controlling the ring at exec time
+ *
+ * In XE we avoid all of this complication by not allowing a BO list to be
+ * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
+ * seperate operations, and using the DRM scheduler to flow control the ring.
+ * Let's deep dive on each of these.
+ *
+ * We can get away from a BO list by forcing the user to use in / out fences on
+ * every exec rather than the kernel tracking dependencies of BO (e.g. if the
+ * user knows an exec writes to a BO and reads from the BO in the next exec, it
+ * is the user's responsibility to pass in / out fence between the two execs).
+ *
+ * Implicit dependencies for external BOs are handled by using the dma-buf
+ * implicit dependency uAPI (TODO: add link). To make this works each exec must
+ * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
+ * BO mapped in the VM.
+ *
+ * We do not allow a user to trigger a bind at exec time rather we have a VM
+ * bind IOCTL which uses the same in / out fence interface as exec. In that
+ * sense, a VM bind is basically the same operation as an exec from the user
+ * perspective. e.g. If an exec depends on a VM bind use the in / out fence
+ * interface (struct drm_xe_sync) to synchronize like syncing between two
+ * dependent execs.
+ *
+ * Although a user cannot trigger a bind, we still have to rebind userptrs in
+ * the VM that have been invalidated since the last exec, likewise we also have
+ * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
+ * behind any pending kernel operations on any external BOs in VM or any BOs
+ * private to the VM. This is accomplished by the rebinds waiting on BOs
+ * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
+ * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
+ * in DMA_RESV_USAGE_WRITE for external BOs).
+ *
+ * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
+ * mode VMs we use preempt fences and a rebind worker (TODO: add link).
+ *
+ * There is no need to flow control the ring in the exec as we write the ring at
+ * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
+ * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
+ * ring is available.
+ *
+ * All of this results in a rather simple exec implementation.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	Parse input arguments
+ *	Wait for any async VM bind passed as in-fences to start
+ *	<----------------------------------------------------------------------|
+ *	Lock global VM lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last exec)          |
+ *	Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
+ *	Validate BOs that have been evicted                                    |
+ *	Create job                                                             |
+ *	Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
+ *	Add rebind fence dependency to job                                     |
+ *	Add job VM dma-resv bookkeeping slot (non-compute mode)                |
+ *	Add job to external BOs dma-resv write slots (non-compute mode)        |
+ *	Check if any userptrs invalidated since pin ------ Drop locks ---------|
+ *	Install in / out fences for job
+ *	Submit job
+ *	Unlock all
+ */
+
+static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
+			 struct ttm_validate_buffer tv_onstack[],
+			 struct ttm_validate_buffer **tv,
+			 struct list_head *objs)
+{
+	struct xe_vm *vm = e->vm;
+	struct xe_vma *vma;
+	LIST_HEAD(dups);
+	int err;
+
+	*tv = NULL;
+	if (xe_vm_no_dma_fences(e->vm))
+		return 0;
+
+	err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
+	if (err)
+		return err;
+
+	/*
+	 * Validate BOs that have been evicted (i.e. make sure the
+	 * BOs have valid placements possibly moving an evicted BO back
+	 * to a location where the GPU can access it).
+	 */
+	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+		if (xe_vma_is_userptr(vma))
+			continue;
+
+		err = xe_bo_validate(vma->bo, vm, false);
+		if (err) {
+			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
+			*tv = NULL;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void xe_exec_end(struct xe_engine *e,
+			struct ttm_validate_buffer *tv_onstack,
+			struct ttm_validate_buffer *tv,
+			struct ww_acquire_ctx *ww,
+			struct list_head *objs)
+{
+	if (!xe_vm_no_dma_fences(e->vm))
+		xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
+}
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec *args = data;
+	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
+	u64 __user *addresses_user = u64_to_user_ptr(args->address);
+	struct xe_engine *engine;
+	struct xe_sync_entry *syncs = NULL;
+	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
+	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+	struct ttm_validate_buffer *tv = NULL;
+	u32 i, num_syncs = 0;
+	struct xe_sched_job *job;
+	struct dma_fence *rebind_fence;
+	struct xe_vm *vm;
+	struct ww_acquire_ctx ww;
+	struct list_head objs;
+	bool write_locked;
+	int err = 0;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	engine = xe_engine_lookup(xef, args->engine_id);
+	if (XE_IOCTL_ERR(xe, !engine))
+		return -ENOENT;
+
+	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) {
+		err = -ECANCELED;
+		goto err_engine;
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto err_engine;
+		}
+	}
+
+	vm = engine->vm;
+
+	for (i = 0; i < args->num_syncs; i++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
+					  &syncs_user[i], true,
+					  xe_vm_no_dma_fences(vm));
+		if (err)
+			goto err_syncs;
+	}
+
+	if (xe_engine_is_parallel(engine)) {
+		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
+				       engine->width);
+		if (err) {
+			err = -EFAULT;
+			goto err_syncs;
+		}
+	}
+
+	/*
+	 * We can't install a job into the VM dma-resv shared slot before an
+	 * async VM bind passed in as a fence without the risk of deadlocking as
+	 * the bind can trigger an eviction which in turn depends on anything in
+	 * the VM dma-resv shared slots. Not an ideal solution, but we wait for
+	 * all dependent async VM binds to start (install correct fences into
+	 * dma-resv slots) before moving forward.
+	 */
+	if (!xe_vm_no_dma_fences(vm) &&
+	    vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
+		for (i = 0; i < args->num_syncs; i++) {
+			struct dma_fence *fence = syncs[i].fence;
+			if (fence) {
+				err = xe_vm_async_fence_wait_start(fence);
+				if (err)
+					goto err_syncs;
+			}
+		}
+	}
+
+retry:
+	if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
+		err = down_write_killable(&vm->lock);
+		write_locked = true;
+	} else {
+		/* We don't allow execs while the VM is in error state */
+		err = down_read_interruptible(&vm->lock);
+		write_locked = false;
+	}
+	if (err)
+		goto err_syncs;
+
+	/* We don't allow execs while the VM is in error state */
+	if (vm->async_ops.error) {
+		err = vm->async_ops.error;
+		goto err_unlock_list;
+	}
+
+	/*
+	 * Extreme corner where we exit a VM error state with a munmap style VM
+	 * unbind inflight which requires a rebind. In this case the rebind
+	 * needs to install some fences into the dma-resv slots. The worker to
+	 * do this queued, let that worker make progress by dropping vm->lock,
+	 * flushing the worker and retrying the exec.
+	 */
+	if (vm->async_ops.munmap_rebind_inflight) {
+		if (write_locked)
+			up_write(&vm->lock);
+		else
+			up_read(&vm->lock);
+		flush_work(&vm->async_ops.work);
+		goto retry;
+	}
+
+	if (write_locked) {
+		err = xe_vm_userptr_pin(vm);
+		downgrade_write(&vm->lock);
+		write_locked = false;
+		if (err)
+			goto err_unlock_list;
+	}
+
+	err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
+	if (err)
+		goto err_unlock_list;
+
+	if (xe_vm_is_closed(engine->vm)) {
+		drm_warn(&xe->drm, "Trying to schedule after vm is closed\n");
+		err = -EIO;
+		goto err_engine_end;
+	}
+
+	job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
+				  addresses : &args->address);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_engine_end;
+	}
+
+	/*
+	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
+	 * VM mode only.
+	 */
+	rebind_fence = xe_vm_rebind(vm, false);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto err_put_job;
+	}
+
+	/*
+	 * We store the rebind_fence in the VM so subsequent execs don't get
+	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
+	 */
+	if (rebind_fence) {
+		dma_fence_put(vm->rebind_fence);
+		vm->rebind_fence = rebind_fence;
+	}
+	if (vm->rebind_fence) {
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			     &vm->rebind_fence->flags)) {
+			dma_fence_put(vm->rebind_fence);
+			vm->rebind_fence = NULL;
+		} else {
+			dma_fence_get(vm->rebind_fence);
+			err = drm_sched_job_add_dependency(&job->drm,
+							   vm->rebind_fence);
+			if (err)
+				goto err_put_job;
+		}
+	}
+
+	/* Wait behind munmap style rebinds */
+	if (!xe_vm_no_dma_fences(vm)) {
+		err = drm_sched_job_add_resv_dependencies(&job->drm,
+							  &vm->resv,
+							  DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_put_job;
+	}
+
+	for (i = 0; i < num_syncs && !err; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+	if (err)
+		goto err_put_job;
+
+	if (!xe_vm_no_dma_fences(vm)) {
+		err = down_read_interruptible(&vm->userptr.notifier_lock);
+		if (err)
+			goto err_put_job;
+
+		err = __xe_vm_userptr_needs_repin(vm);
+		if (err)
+			goto err_repin;
+	}
+
+	/*
+	 * Point of no return, if we error after this point just set an error on
+	 * the job and let the DRM scheduler / backend clean up the job.
+	 */
+	xe_sched_job_arm(job);
+	if (!xe_vm_no_dma_fences(vm)) {
+		/* Block userptr invalidations / BO eviction */
+		dma_resv_add_fence(&vm->resv,
+				   &job->drm.s_fence->finished,
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		/*
+		 * Make implicit sync work across drivers, assuming all external
+		 * BOs are written as we don't pass in a read / write list.
+		 */
+		xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished,
+					DMA_RESV_USAGE_WRITE);
+	}
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], job,
+				     &job->drm.s_fence->finished);
+
+	xe_sched_job_push(job);
+
+err_repin:
+	if (!xe_vm_no_dma_fences(vm))
+		up_read(&vm->userptr.notifier_lock);
+err_put_job:
+	if (err)
+		xe_sched_job_put(job);
+err_engine_end:
+	xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
+err_unlock_list:
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (err == -EAGAIN)
+		goto retry;
+err_syncs:
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_cleanup(&syncs[i]);
+	kfree(syncs);
+err_engine:
+	xe_engine_put(engine);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h
new file mode 100644
index 0000000000000..e4932494cea3e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_H_
+#define _XE_EXEC_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
new file mode 100644
index 0000000000000..47587571123a3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_execlist.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_regs.h"
+
+#define XE_EXECLIST_HANG_LIMIT 1
+
+#define GEN11_SW_CTX_ID_SHIFT 37
+#define GEN11_SW_CTX_ID_WIDTH 11
+#define XEHP_SW_CTX_ID_SHIFT  39
+#define XEHP_SW_CTX_ID_WIDTH  16
+
+#define GEN11_SW_CTX_ID \
+	GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \
+		    GEN11_SW_CTX_ID_SHIFT)
+
+#define XEHP_SW_CTX_ID \
+	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
+		    XEHP_SW_CTX_ID_SHIFT)
+
+
+static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
+			u32 ctx_id)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 lrc_desc;
+
+	printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id);
+
+	lrc_desc = xe_lrc_descriptor(lrc);
+
+	if (GRAPHICS_VERx100(xe) >= 1250) {
+		XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
+	} else {
+		XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id);
+	}
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
+				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	lrc->ring.old_tail = lrc->ring.tail;
+
+	/*
+	 * Make sure the context image is complete before we submit it to HW.
+	 *
+	 * Ostensibly, writes (including the WCB) should be flushed prior to
+	 * an uncached write such as our mmio register access, the empirical
+	 * evidence (esp. on Braswell) suggests that the WC write into memory
+	 * may not be visible to the HW prior to the completion of the UC
+	 * register write and that we may begin execution from the context
+	 * before its image is complete leading to invalid PD chasing.
+	 */
+	wmb();
+
+	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg,
+			xe_bo_ggtt_addr(hwe->hwsp));
+	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg);
+	xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg,
+			_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0,
+			lower_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4,
+			upper_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg,
+			EL_CTRL_LOAD);
+}
+
+static void __xe_execlist_port_start(struct xe_execlist_port *port,
+				     struct xe_execlist_engine *exl)
+{
+	struct xe_device *xe = gt_to_xe(port->hwe->gt);
+	int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
+
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl != exl || !exl->has_run) {
+		port->last_ctx_id++;
+
+		/* 0 is reserved for the kernel context */
+		if (port->last_ctx_id > max_ctx)
+			port->last_ctx_id = 1;
+	}
+
+	__start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id);
+	port->running_exl = exl;
+	exl->has_run = true;
+}
+
+static void __xe_execlist_port_idle(struct xe_execlist_port *port)
+{
+	u32 noop[2] = { MI_NOOP, MI_NOOP };
+
+	xe_execlist_port_assert_held(port);
+
+	if (!port->running_exl)
+		return;
+
+	printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class,
+	       port->hwe->instance);
+
+	xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
+	__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
+	port->running_exl = NULL;
+}
+
+static bool xe_execlist_is_idle(struct xe_execlist_engine *exl)
+{
+	struct xe_lrc *lrc = exl->engine->lrc;
+
+	return lrc->ring.tail == lrc->ring.old_tail;
+}
+
+static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
+{
+	struct xe_execlist_engine *exl = NULL;
+	int i;
+
+	xe_execlist_port_assert_held(port);
+
+	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
+		while (!list_empty(&port->active[i])) {
+			exl = list_first_entry(&port->active[i],
+					       struct xe_execlist_engine,
+					       active_link);
+			list_del(&exl->active_link);
+
+			if (xe_execlist_is_idle(exl)) {
+				exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+				continue;
+			}
+
+			list_add_tail(&exl->active_link, &port->active[i]);
+			__xe_execlist_port_start(port, exl);
+			return;
+		}
+	}
+
+	__xe_execlist_port_idle(port);
+}
+
+static u64 read_execlist_status(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	u32 hi, lo;
+
+	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg);
+	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg);
+
+	printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class,
+	       hwe->instance, hi, lo);
+
+	return lo | (u64)hi << 32;
+}
+
+static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
+{
+	u64 status;
+
+	xe_execlist_port_assert_held(port);
+
+	status = read_execlist_status(port->hwe);
+	if (status & BIT(7))
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
+					 u16 intr_vec)
+{
+	struct xe_execlist_port *port = hwe->exl_port;
+
+	spin_lock(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock(&port->lock);
+}
+
+static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
+					 enum xe_engine_priority priority)
+{
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl && port->running_exl->active_priority >= priority)
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_make_active(struct xe_execlist_engine *exl)
+{
+	struct xe_execlist_port *port = exl->port;
+	enum xe_engine_priority priority = exl->active_priority;
+
+	XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET);
+	XE_BUG_ON(priority < 0);
+	XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active));
+
+	spin_lock_irq(&port->lock);
+
+	if (exl->active_priority != priority &&
+	    exl->active_priority != XE_ENGINE_PRIORITY_UNSET) {
+		/* Priority changed, move it to the right list */
+		list_del(&exl->active_link);
+		exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+	}
+
+	if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) {
+		exl->active_priority = priority;
+		list_add_tail(&exl->active_link, &port->active[priority]);
+	}
+
+	xe_execlist_port_wake_locked(exl->port, priority);
+
+	spin_unlock_irq(&port->lock);
+}
+
+static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
+{
+	struct xe_execlist_port *port =
+		container_of(timer, struct xe_execlist_port, irq_fail);
+
+	spin_lock_irq(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock_irq(&port->lock);
+
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+}
+
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe)
+{
+	struct drm_device *drm = &xe->drm;
+	struct xe_execlist_port *port;
+	int i;
+
+	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	port->hwe = hwe;
+
+	spin_lock_init(&port->lock);
+	for (i = 0; i < ARRAY_SIZE(port->active); i++)
+		INIT_LIST_HEAD(&port->active[i]);
+
+	port->last_ctx_id = 1;
+	port->running_exl = NULL;
+
+	hwe->irq_handler = xe_execlist_port_irq_handler;
+
+	/* TODO: Fix the interrupt code so it doesn't race like mad */
+	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+
+	return port;
+}
+
+void xe_execlist_port_destroy(struct xe_execlist_port *port)
+{
+	del_timer(&port->irq_fail);
+
+	/* Prevent an interrupt while we're destroying */
+	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+	port->hwe->irq_handler = NULL;
+	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+}
+
+static struct dma_fence *
+execlist_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_engine *e = job->engine;
+	struct xe_execlist_engine *exl = job->engine->execlist;
+
+	e->ring_ops->emit_job(job);
+	xe_execlist_make_active(exl);
+
+	return dma_fence_get(job->fence);
+}
+
+static void execlist_job_free(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	xe_sched_job_put(job);
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = execlist_run_job,
+	.free_job = execlist_job_free,
+};
+
+static int execlist_engine_init(struct xe_engine *e)
+{
+	struct drm_gpu_scheduler *sched;
+	struct xe_execlist_engine *exl;
+	int err;
+
+	XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+
+	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
+	if (!exl)
+		return -ENOMEM;
+
+	exl->engine = e;
+
+	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
+			     e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
+			     NULL, NULL, e->hwe->name,
+			     gt_to_xe(e->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &exl->sched;
+	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
+	if (err)
+		goto err_sched;
+
+	exl->port = e->hwe->exl_port;
+	exl->has_run = false;
+	exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+	e->execlist = exl;
+	e->entity = &exl->entity;
+
+	switch (e->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1);
+		break;
+	default:
+		XE_WARN_ON(e->class);
+	}
+
+	return 0;
+
+err_sched:
+	drm_sched_fini(&exl->sched);
+err_free:
+	kfree(exl);
+	return err;
+}
+
+static void execlist_engine_fini_async(struct work_struct *w)
+{
+	struct xe_execlist_engine *ee =
+		container_of(w, struct xe_execlist_engine, fini_async);
+	struct xe_engine *e = ee->engine;
+	struct xe_execlist_engine *exl = e->execlist;
+	unsigned long flags;
+
+	XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+
+	spin_lock_irqsave(&exl->port->lock, flags);
+	if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET))
+		list_del(&exl->active_link);
+	spin_unlock_irqrestore(&exl->port->lock, flags);
+
+	if (e->flags & ENGINE_FLAG_PERSISTENT)
+		xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+	drm_sched_entity_fini(&exl->entity);
+	drm_sched_fini(&exl->sched);
+	kfree(exl);
+
+	xe_engine_fini(e);
+}
+
+static void execlist_engine_kill(struct xe_engine *e)
+{
+	/* NIY */
+}
+
+static void execlist_engine_fini(struct xe_engine *e)
+{
+	INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async);
+	queue_work(system_unbound_wq, &e->execlist->fini_async);
+}
+
+static int execlist_engine_set_priority(struct xe_engine *e,
+					enum xe_engine_priority priority)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_set_preempt_timeout(struct xe_engine *e,
+					       u32 preempt_timeout_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_set_job_timeout(struct xe_engine *e,
+					   u32 job_timeout_ms)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_engine_suspend(struct xe_engine *e)
+{
+	/* NIY */
+	return 0;
+}
+
+static void execlist_engine_suspend_wait(struct xe_engine *e)
+
+{
+	/* NIY */
+}
+
+static void execlist_engine_resume(struct xe_engine *e)
+{
+	xe_mocs_init_engine(e);
+}
+
+static const struct xe_engine_ops execlist_engine_ops = {
+	.init = execlist_engine_init,
+	.kill = execlist_engine_kill,
+	.fini = execlist_engine_fini,
+	.set_priority = execlist_engine_set_priority,
+	.set_timeslice = execlist_engine_set_timeslice,
+	.set_preempt_timeout = execlist_engine_set_preempt_timeout,
+	.set_job_timeout = execlist_engine_set_job_timeout,
+	.suspend = execlist_engine_suspend,
+	.suspend_wait = execlist_engine_suspend_wait,
+	.resume = execlist_engine_resume,
+};
+
+int xe_execlist_init(struct xe_gt *gt)
+{
+	/* GuC submission enabled, nothing to do */
+	if (xe_device_guc_submission_enabled(gt_to_xe(gt)))
+		return 0;
+
+	gt->engine_ops = &execlist_engine_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h
new file mode 100644
index 0000000000000..6a0442a6eff62
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_H_
+#define _XE_EXECLIST_H_
+
+#include "xe_execlist_types.h"
+
+struct xe_device;
+struct xe_gt;
+
+#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock);
+
+int xe_execlist_init(struct xe_gt *gt);
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe);
+void xe_execlist_port_destroy(struct xe_execlist_port *port);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
new file mode 100644
index 0000000000000..9b1239b47292b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_TYPES_H_
+#define _XE_EXECLIST_TYPES_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_engine_types.h"
+
+struct xe_hw_engine;
+struct xe_execlist_engine;
+
+struct xe_execlist_port {
+	struct xe_hw_engine *hwe;
+
+	spinlock_t lock;
+
+	struct list_head active[XE_ENGINE_PRIORITY_COUNT];
+
+	u32 last_ctx_id;
+
+	struct xe_execlist_engine *running_exl;
+
+	struct timer_list irq_fail;
+};
+
+struct xe_execlist_engine {
+	struct xe_engine *engine;
+
+	struct drm_gpu_scheduler sched;
+
+	struct drm_sched_entity entity;
+
+	struct xe_execlist_port *port;
+
+	bool has_run;
+
+	struct work_struct fini_async;
+
+	enum xe_engine_priority active_priority;
+	struct list_head active_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
new file mode 100644
index 0000000000000..0320ce7ba3d19
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_util.h>
+
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "gt/intel_gt_regs.h"
+
+#define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
+
+static struct xe_gt *
+fw_to_gt(struct xe_force_wake *fw)
+{
+	return fw->gt;
+}
+
+static struct xe_device *
+fw_to_xe(struct xe_force_wake *fw)
+{
+	return gt_to_xe(fw_to_gt(fw));
+}
+
+static void domain_init(struct xe_force_wake_domain *domain,
+			enum xe_force_wake_domain_id id,
+			u32 reg, u32 ack, u32 val, u32 mask)
+{
+	domain->id = id;
+	domain->reg_ctl = reg;
+	domain->reg_ack = ack;
+	domain->val = val;
+	domain->mask = mask;
+}
+
+#define FORCEWAKE_ACK_GT_MTL                 _MMIO(0xdfc)
+
+void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	fw->gt = gt;
+	mutex_init(&fw->lock);
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+	if (xe->info.platform == XE_METEORLAKE) {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT_GEN9.reg,
+			    FORCEWAKE_ACK_GT_MTL.reg,
+			    BIT(0), BIT(16));
+	} else {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT_GEN9.reg,
+			    FORCEWAKE_ACK_GT_GEN9.reg,
+			    BIT(0), BIT(16));
+	}
+}
+
+void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	int i, j;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+	if (!xe_gt_is_media_type(gt))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
+			    XE_FW_DOMAIN_ID_RENDER,
+			    FORCEWAKE_RENDER_GEN9.reg,
+			    FORCEWAKE_ACK_RENDER_GEN9.reg,
+			    BIT(0), BIT(16));
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+			    FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg,
+			    FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg,
+			    BIT(0), BIT(16));
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+			    FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg,
+			    FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg,
+			    BIT(0), BIT(16));
+	}
+}
+
+void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	int i, j;
+
+	/* Call after fuses have been read, prune domains that are fused off */
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j)
+		if (!(gt->info.engine_mask & BIT(i)))
+			fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0;
+
+	for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j)
+		if (!(gt->info.engine_mask & BIT(i)))
+			fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0;
+}
+
+static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val);
+}
+
+static int domain_wake_wait(struct xe_gt *gt,
+			    struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+}
+
+static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask);
+}
+
+static int domain_sleep_wait(struct xe_gt *gt,
+			     struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+}
+
+#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
+	for (tmp__ = (mask__); tmp__ ;) \
+		for_each_if((domain__ = ((fw__)->domains + \
+					 __mask_next_bit(tmp__))) && \
+					 domain__->reg_ctl)
+
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, woken = 0;
+	int ret, ret2 = 0;
+
+	mutex_lock(&fw->lock);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!domain->ref++) {
+			woken |= BIT(domain->id);
+			domain_wake(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, woken, fw, tmp) {
+		ret = domain_wake_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains |= woken;
+	mutex_unlock(&fw->lock);
+
+	return ret2;
+}
+
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, sleep = 0;
+	int ret, ret2 = 0;
+
+	mutex_lock(&fw->lock);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!--domain->ref) {
+			sleep |= BIT(domain->id);
+			domain_sleep(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, sleep, fw, tmp) {
+		ret = domain_sleep_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains &= ~sleep;
+	mutex_unlock(&fw->lock);
+
+	return ret2;
+}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
new file mode 100644
index 0000000000000..5adb8daa3b718
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_H_
+#define _XE_FORCE_WAKE_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_macros.h"
+
+struct xe_gt;
+
+void xe_force_wake_init_gt(struct xe_gt *gt,
+			   struct xe_force_wake *fw);
+void xe_force_wake_init_engines(struct xe_gt *gt,
+				struct xe_force_wake *fw);
+void xe_force_wake_prune(struct  xe_gt *gt,
+			 struct xe_force_wake *fw);
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+
+static inline int
+xe_force_wake_ref(struct xe_force_wake *fw,
+		  enum xe_force_wake_domains domain)
+{
+	XE_BUG_ON(!domain);
+	return fw->domains[ffs(domain) - 1].ref;
+}
+
+static inline void
+xe_force_wake_assert_held(struct xe_force_wake *fw,
+			  enum xe_force_wake_domains domain)
+{
+	XE_BUG_ON(!(fw->awake_domains & domain));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
new file mode 100644
index 0000000000000..208dd629d7b12
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_TYPES_H_
+#define _XE_FORCE_WAKE_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+enum xe_force_wake_domain_id {
+	XE_FW_DOMAIN_ID_GT = 0,
+	XE_FW_DOMAIN_ID_RENDER,
+	XE_FW_DOMAIN_ID_MEDIA,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX3,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX4,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX5,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX6,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX7,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX3,
+	XE_FW_DOMAIN_ID_GSC,
+	XE_FW_DOMAIN_ID_COUNT
+};
+
+enum xe_force_wake_domains {
+	XE_FW_GT		= BIT(XE_FW_DOMAIN_ID_GT),
+	XE_FW_RENDER		= BIT(XE_FW_DOMAIN_ID_RENDER),
+	XE_FW_MEDIA		= BIT(XE_FW_DOMAIN_ID_MEDIA),
+	XE_FW_MEDIA_VDBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0),
+	XE_FW_MEDIA_VDBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1),
+	XE_FW_MEDIA_VDBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2),
+	XE_FW_MEDIA_VDBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3),
+	XE_FW_MEDIA_VDBOX4	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4),
+	XE_FW_MEDIA_VDBOX5	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5),
+	XE_FW_MEDIA_VDBOX6	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6),
+	XE_FW_MEDIA_VDBOX7	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7),
+	XE_FW_MEDIA_VEBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0),
+	XE_FW_MEDIA_VEBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1),
+	XE_FW_MEDIA_VEBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
+	XE_FW_MEDIA_VEBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
+	XE_FW_GSC		= BIT(XE_FW_DOMAIN_ID_GSC),
+	XE_FORCEWAKE_ALL	= BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+};
+
+/**
+ * struct xe_force_wake_domain - XE force wake domains
+ */
+struct xe_force_wake_domain {
+	/** @id: domain force wake id */
+	enum xe_force_wake_domain_id id;
+	/** @reg_ctl: domain wake control register address */
+	u32 reg_ctl;
+	/** @reg_ack: domain ack register address */
+	u32 reg_ack;
+	/** @val: domain wake write value */
+	u32 val;
+	/** @mask: domain mask */
+	u32 mask;
+	/** @ref: domain reference */
+	u32 ref;
+};
+
+/**
+ * struct xe_force_wake - XE force wake
+ */
+struct xe_force_wake {
+	/** @gt: back pointers to GT */
+	struct xe_gt *gt;
+	/** @lock: protects everything force wake struct */
+	struct mutex lock;
+	/** @awake_domains: mask of all domains awake */
+	enum xe_force_wake_domains awake_domains;
+	/** @domains: force wake domains */
+	struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
new file mode 100644
index 0000000000000..eab74a509f681
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_ggtt.h"
+
+#include <linux/sizes.h>
+#include <drm/i915_drm.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_wopcm.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+/* FIXME: Common file, preferably auto-gen */
+#define MTL_GGTT_PTE_PAT0	BIT(52)
+#define MTL_GGTT_PTE_PAT1	BIT(53)
+
+u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+	bool is_lmem;
+
+	pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+	pte |= GEN8_PAGE_PRESENT;
+
+	if (is_lmem)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	/* FIXME: vfunc + pass in caching rules */
+	if (xe->info.platform == XE_METEORLAKE) {
+		pte |= MTL_GGTT_PTE_PAT0;
+		pte |= MTL_GGTT_PTE_PAT1;
+	}
+
+	return pte;
+}
+
+static unsigned int probe_gsm_size(struct pci_dev *pdev)
+{
+	u16 gmch_ctl, ggms;
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
+	ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
+	return ggms ? SZ_1M << ggms : 0;
+}
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
+{
+	XE_BUG_ON(addr & GEN8_PTE_MASK);
+	XE_BUG_ON(addr >= ggtt->size);
+
+	writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]);
+}
+
+static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
+{
+	u64 end = start + size - 1;
+	u64 scratch_pte;
+
+	XE_BUG_ON(start >= end);
+
+	if (ggtt->scratch)
+		scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+	else
+		scratch_pte = 0;
+
+	while (start < end) {
+		xe_ggtt_set_pte(ggtt, start, scratch_pte);
+		start += GEN8_PAGE_SIZE;
+	}
+}
+
+static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
+
+	mutex_destroy(&ggtt->lock);
+	drm_mm_takedown(&ggtt->mm);
+
+	xe_bo_unpin_map_no_vm(ggtt->scratch);
+}
+
+int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	unsigned int gsm_size;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	ggtt->gt = gt;
+
+	gsm_size = probe_gsm_size(pdev);
+	if (gsm_size == 0) {
+		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+		return -ENOMEM;
+	}
+
+	ggtt->gsm = gt->mmio.regs + SZ_8M;
+	ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE;
+
+	/*
+	 * 8B per entry, each points to a 4KB page.
+	 *
+	 * The GuC owns the WOPCM space, thus we can't allocate GGTT address in
+	 * this area. Even though we likely configure the WOPCM to less than the
+	 * maximum value, to simplify the driver load (no need to fetch HuC +
+	 * GuC firmwares and determine there sizes before initializing the GGTT)
+	 * just start the GGTT allocation above the max WOPCM size. This might
+	 * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can
+	 * live with this.
+	 *
+	 * Another benifit of this is the GuC bootrom can't access anything
+	 * below the WOPCM max size so anything the bootom needs to access (e.g.
+	 * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
+	 * Starting the GGTT allocations above the WOPCM max give us the correct
+	 * placement for free.
+	 */
+	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
+		    ggtt->size - xe_wopcm_size(xe));
+	mutex_init(&ggtt->lock);
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt);
+}
+
+static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
+{
+	struct drm_mm_node *hole;
+	u64 start, end;
+
+	/* Display may have allocated inside ggtt, so be careful with clearing here */
+	mutex_lock(&ggtt->lock);
+	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
+		xe_ggtt_clear(ggtt, start, end - start);
+
+	xe_ggtt_invalidate(ggtt->gt);
+	mutex_unlock(&ggtt->lock);
+}
+
+int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE,
+					    ttm_bo_type_kernel,
+					    XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					    XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(ggtt->scratch)) {
+		err = PTR_ERR(ggtt->scratch);
+		goto err;
+	}
+
+	err = xe_bo_pin(ggtt->scratch);
+	xe_bo_unlock_no_vm(ggtt->scratch);
+	if (err) {
+		xe_bo_put(ggtt->scratch);
+		goto err;
+	}
+
+	xe_ggtt_initial_clear(ggtt);
+	return 0;
+err:
+	ggtt->scratch = NULL;
+	return err;
+}
+
+#define GEN12_GUC_TLB_INV_CR                     _MMIO(0xcee8)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE        (1 << 0)
+#define PVC_GUC_TLB_INV_DESC0			_MMIO(0xcf7c)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		 (1 << 0)
+#define PVC_GUC_TLB_INV_DESC1			_MMIO(0xcf80)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	 (1 << 6)
+
+void xe_ggtt_invalidate(struct xe_gt *gt)
+{
+	/* TODO: vfunc for GuC vs. non-GuC */
+
+	/* TODO: i915 makes comments about this being uncached and
+	 * therefore flushing WC buffers.  Is that really true here?
+	 */
+	xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN);
+	if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
+		struct xe_device *xe = gt_to_xe(gt);
+
+		/* TODO: also use vfunc here */
+		if (xe->info.platform == XE_PVC) {
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg,
+					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg,
+					PVC_GUC_TLB_INV_DESC0_VALID);
+		} else
+			xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg,
+					GEN12_GUC_TLB_INV_CR_INVALIDATE);
+	}
+}
+
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
+{
+	u64 addr, scratch_pte;
+
+	scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+
+	printk("%sGlobal GTT:", prefix);
+	for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) {
+		unsigned int i = addr / GEN8_PAGE_SIZE;
+
+		XE_BUG_ON(addr > U32_MAX);
+		if (ggtt->gsm[i] == scratch_pte)
+			continue;
+
+		printk("%s    ggtt[0x%08x] = 0x%016llx",
+		       prefix, (u32)addr, ggtt->gsm[i]);
+	}
+}
+
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags)
+{
+	return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
+					  mm_flags);
+}
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align)
+{
+	int ret;
+
+	mutex_lock(&ggtt->lock);
+	ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
+						 align, DRM_MM_INSERT_HIGH);
+	mutex_unlock(&ggtt->lock);
+
+	return ret;
+}
+
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	u64 start = bo->ggtt_node.start;
+	u64 offset, pte;
+
+	for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) {
+		pte = xe_ggtt_pte_encode(bo, offset);
+		xe_ggtt_set_pte(ggtt, start + offset, pte);
+	}
+
+	xe_ggtt_invalidate(ggtt->gt);
+}
+
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	int err;
+
+	if (XE_WARN_ON(bo->ggtt_node.size)) {
+		/* Someone's already inserted this BO in the GGTT */
+		XE_BUG_ON(bo->ggtt_node.size != bo->size);
+		return 0;
+	}
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	mutex_lock(&ggtt->lock);
+	err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size);
+	if (!err)
+		xe_ggtt_map_bo(ggtt, bo);
+	mutex_unlock(&ggtt->lock);
+
+	return 0;
+}
+
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+	mutex_lock(&ggtt->lock);
+
+	xe_ggtt_clear(ggtt, node->start, node->size);
+	drm_mm_remove_node(node);
+	node->size = 0;
+
+	xe_ggtt_invalidate(ggtt->gt);
+
+	mutex_unlock(&ggtt->lock);
+}
+
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	if (XE_WARN_ON(!bo->ggtt_node.size))
+		return;
+
+	/* This BO is not currently in the GGTT */
+	XE_BUG_ON(bo->ggtt_node.size != bo->size);
+
+	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
new file mode 100644
index 0000000000000..289c6852ad1a2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_H_
+#define _XE_GGTT_H_
+
+#include "xe_ggtt_types.h"
+
+u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset);
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
+void xe_ggtt_invalidate(struct xe_gt *gt);
+int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt);
+int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt);
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align);
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
+				       struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
new file mode 100644
index 0000000000000..e041930017631
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_TYPES_H_
+#define _XE_GGTT_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+struct xe_bo;
+struct xe_gt;
+
+struct xe_ggtt {
+	struct xe_gt *gt;
+
+	u64 size;
+
+	struct xe_bo *scratch;
+
+	struct mutex lock;
+
+	u64 __iomem *gsm;
+
+	struct drm_mm mm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
new file mode 100644
index 0000000000000..e4ad1d6ce1d5f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gpu_scheduler.h"
+
+static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->base.pause_submit))
+		queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link);
+	if (msg)
+		xe_sched_process_msg_queue(sched);
+	spin_unlock(&sched->base.job_list_lock);
+}
+
+static struct xe_sched_msg *
+xe_sched_get_msg(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs,
+				       struct xe_sched_msg, link);
+	if (msg)
+		list_del(&msg->link);
+	spin_unlock(&sched->base.job_list_lock);
+
+	return msg;
+}
+
+static void xe_sched_process_msg_work(struct work_struct *w)
+{
+	struct xe_gpu_scheduler *sched =
+		container_of(w, struct xe_gpu_scheduler, work_process_msg);
+	struct xe_sched_msg *msg;
+
+	if (READ_ONCE(sched->base.pause_submit))
+		return;
+
+	msg = xe_sched_get_msg(sched);
+	if (msg) {
+		sched->ops->process_msg(msg);
+
+		xe_sched_process_msg_queue_if_ready(sched);
+	}
+}
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev)
+{
+	sched->ops = xe_ops;
+	INIT_LIST_HEAD(&sched->msgs);
+	INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work);
+
+	return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission,
+			      hang_limit, timeout, timeout_wq, score, name,
+			      dev);
+}
+
+void xe_sched_fini(struct xe_gpu_scheduler *sched)
+{
+	xe_sched_submission_stop(sched);
+	drm_sched_fini(&sched->base);
+}
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_start(&sched->base);
+	queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_stop(&sched->base);
+	cancel_work_sync(&sched->work_process_msg);
+}
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg)
+{
+	spin_lock(&sched->base.job_list_lock);
+	list_add_tail(&msg->link, &sched->msgs);
+	spin_unlock(&sched->base.job_list_lock);
+
+	xe_sched_process_msg_queue(sched);
+}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
new file mode 100644
index 0000000000000..10c6bb9c93868
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_H_
+#define _XE_GPU_SCHEDULER_H_
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_sched_job_types.h"
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev);
+void xe_sched_fini(struct xe_gpu_scheduler *sched);
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg);
+
+static inline void xe_sched_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_stop(&sched->base, NULL);
+}
+
+static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_tdr_queue_imm(&sched->base);
+}
+
+static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_resubmit_jobs(&sched->base);
+}
+
+static inline bool
+xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
+{
+	return drm_sched_invalidate_job(&job->drm, threshold);
+}
+
+static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
+					    struct xe_sched_job *job)
+{
+	list_add(&job->drm.list, &sched->base.pending_list);
+}
+
+static inline
+struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
+{
+	return list_first_entry_or_null(&sched->base.pending_list,
+					struct xe_sched_job, drm.list);
+}
+
+static inline int
+xe_sched_entity_init(struct xe_sched_entity *entity,
+		     struct xe_gpu_scheduler *sched)
+{
+	return drm_sched_entity_init(entity, 0,
+				     (struct drm_gpu_scheduler **)&sched,
+				     1, NULL);
+}
+
+#define xe_sched_entity_fini drm_sched_entity_fini
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
new file mode 100644
index 0000000000000..6731b13da8bbb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_TYPES_H_
+#define _XE_GPU_SCHEDULER_TYPES_H_
+
+#include <drm/gpu_scheduler.h>
+
+/**
+ * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue)
+ * message
+ *
+ * Generic enough for backend defined messages, backend can expand if needed.
+ */
+struct xe_sched_msg {
+	/** @link: list link into the gpu scheduler list of messages */
+	struct list_head		link;
+	/**
+	 * @private_data: opaque pointer to message private data (backend defined)
+	 */
+	void				*private_data;
+	/** @opcode: opcode of message (backend defined) */
+	unsigned int			opcode;
+};
+
+/**
+ * struct xe_sched_backend_ops - Define the backend operations called by the
+ * scheduler
+ */
+struct xe_sched_backend_ops {
+	/**
+	 * @process_msg: Process a message. Allowed to block, it is this
+	 * function's responsibility to free message if dynamically allocated.
+	 */
+	void (*process_msg)(struct xe_sched_msg *msg);
+};
+
+/**
+ * struct xe_gpu_scheduler - Xe GPU scheduler
+ */
+struct xe_gpu_scheduler {
+	/** @base: DRM GPU scheduler */
+	struct drm_gpu_scheduler		base;
+	/** @ops: Xe scheduler ops */
+	const struct xe_sched_backend_ops	*ops;
+	/** @msgs: list of messages to be processed in @work_process_msg */
+	struct list_head			msgs;
+	/** @work_process_msg: processes messages */
+	struct work_struct		work_process_msg;
+};
+
+#define xe_sched_entity		drm_sched_entity
+#define xe_sched_policy		drm_sched_policy
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
new file mode 100644
index 0000000000000..5f8fa9d98d5af
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/minmax.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_reg_sr.h"
+#include "xe_ring_ops.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_ttm_gtt_mgr.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_tuning.h"
+#include "xe_uc.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+#include "gt/intel_gt_regs.h"
+
+struct xe_gt *xe_find_full_gt(struct xe_gt *gt)
+{
+	struct xe_gt *search;
+	u8 id;
+
+	XE_BUG_ON(!xe_gt_is_media_type(gt));
+
+	for_each_gt(search, gt_to_xe(gt), id) {
+		if (search->info.vram_id == gt->info.vram_id)
+			return search;
+	}
+
+	XE_BUG_ON("NOT POSSIBLE");
+	return NULL;
+}
+
+int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
+{
+	struct drm_device *drm = &xe->drm;
+
+	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+	if (!xe_gt_is_media_type(gt)) {
+		gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt),
+					    GFP_KERNEL);
+		if (!gt->mem.ggtt)
+			return -ENOMEM;
+
+		gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr),
+						GFP_KERNEL);
+		if (!gt->mem.vram_mgr)
+			return -ENOMEM;
+
+		gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr),
+					       GFP_KERNEL);
+		if (!gt->mem.gtt_mgr)
+			return -ENOMEM;
+	} else {
+		struct xe_gt *full_gt = xe_find_full_gt(gt);
+
+		gt->mem.ggtt = full_gt->mem.ggtt;
+		gt->mem.vram_mgr = full_gt->mem.vram_mgr;
+		gt->mem.gtt_mgr = full_gt->mem.gtt_mgr;
+	}
+
+	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+
+	return 0;
+}
+
+/* FIXME: These should be in a common file */
+#define CHV_PPAT_SNOOP			REG_BIT(6)
+#define GEN8_PPAT_AGE(x)		((x)<<4)
+#define GEN8_PPAT_LLCeLLC		(3<<2)
+#define GEN8_PPAT_LLCELLC		(2<<2)
+#define GEN8_PPAT_LLC			(1<<2)
+#define GEN8_PPAT_WB			(3<<0)
+#define GEN8_PPAT_WT			(2<<0)
+#define GEN8_PPAT_WC			(1<<0)
+#define GEN8_PPAT_UC			(0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
+#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
+#define GEN12_PPAT_CLOS(x)              ((x)<<2)
+
+static void tgl_setup_private_ppat(struct xe_gt *gt)
+{
+	/* TGL doesn't support LLC or AGE settings */
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB);
+}
+
+static void pvc_setup_private_ppat(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg,
+			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg,
+			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg,
+			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB);
+}
+
+#define MTL_PPAT_L4_CACHE_POLICY_MASK   REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK     REG_GENMASK(1, 0)
+#define MTL_PPAT_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
+static void mtl_setup_private_ppat(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg,
+			MTL_PPAT_1_WT | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg,
+			MTL_PPAT_3_UC | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg,
+			MTL_PPAT_0_WB | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+			MTL_PPAT_0_WB | MTL_3_COH_2W);
+}
+
+static void setup_private_ppat(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_METEORLAKE)
+		mtl_setup_private_ppat(gt);
+	else if (xe->info.platform == XE_PVC)
+		pvc_setup_private_ppat(gt);
+	else
+		tgl_setup_private_ppat(gt);
+}
+
+static int gt_ttm_mgr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+	struct sysinfo si;
+	u64 gtt_size;
+
+	si_meminfo(&si);
+	gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
+
+	if (gt->mem.vram.size) {
+		err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr);
+		if (err)
+			return err;
+		gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20),
+				   gt->mem.vram.size),
+			       gtt_size);
+		xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1;
+	}
+
+	err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void gt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+	int i;
+
+	destroy_workqueue(gt->ordered_wq);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+}
+
+static void gt_reset_worker(struct work_struct *w);
+
+int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
+{
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	u64 batch_ofs;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(bb);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
+{
+	struct xe_reg_sr *sr = &e->hwe->reg_lrc;
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	u64 batch_ofs;
+	long timeout;
+	int count = 0;
+
+	bb = xe_bb_new(gt, SZ_4K, false);	/* Just pick a large BB size */
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	xa_for_each(&sr->xa, reg, entry)
+		++count;
+
+	if (count) {
+		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
+		xa_for_each(&sr->xa, reg, entry) {
+			bb->cs[bb->len++] = reg;
+			bb->cs[bb->len++] = entry->set_bits;
+		}
+	}
+	bb->cs[bb->len++] = MI_NOOP;
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(bb);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+int xe_gt_record_default_lrcs(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		struct xe_engine *e, *nop_e;
+		struct xe_vm *vm;
+		void *default_lrc;
+
+		if (gt->default_lrc[hwe->class])
+			continue;
+
+		xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe);
+		xe_wa_process_lrc(hwe);
+
+		default_lrc = drmm_kzalloc(&xe->drm,
+					   xe_lrc_size(xe, hwe->class),
+					   GFP_KERNEL);
+		if (!default_lrc)
+			return -ENOMEM;
+
+		vm = xe_migrate_get_vm(gt->migrate);
+		e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1,
+				     hwe, ENGINE_FLAG_WA);
+		if (IS_ERR(e)) {
+			err = PTR_ERR(e);
+			goto put_vm;
+		}
+
+		/* Prime golden LRC with known good state */
+		err = emit_wa_job(gt, e);
+		if (err)
+			goto put_engine;
+
+		nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance),
+					 1, hwe, ENGINE_FLAG_WA);
+		if (IS_ERR(nop_e)) {
+			err = PTR_ERR(nop_e);
+			goto put_engine;
+		}
+
+		/* Switch to different LRC */
+		err = emit_nop_job(gt, nop_e);
+		if (err)
+			goto put_nop_e;
+
+		/* Reload golden LRC to record the effect of any indirect W/A */
+		err = emit_nop_job(gt, e);
+		if (err)
+			goto put_nop_e;
+
+		xe_map_memcpy_from(xe, default_lrc,
+				   &e->lrc[0].bo->vmap,
+				   xe_lrc_pphwsp_offset(&e->lrc[0]),
+				   xe_lrc_size(xe, hwe->class));
+
+		gt->default_lrc[hwe->class] = default_lrc;
+put_nop_e:
+		xe_engine_put(nop_e);
+put_engine:
+		xe_engine_put(e);
+put_vm:
+		xe_vm_put(vm);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int xe_gt_init_early(struct xe_gt *gt)
+{
+	int err;
+
+	xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_gt_topology_init(gt);
+	xe_gt_mcr_init(gt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
+	xe_wa_process_gt(gt);
+	xe_tuning_process_gt(gt);
+
+	return 0;
+}
+
+/**
+ * xe_gt_init_noalloc - Init GT up to the point where allocations can happen.
+ * @gt: The GT to initialize.
+ *
+ * This function prepares the GT to allow memory allocations to VRAM, but is not
+ * allowed to allocate memory itself. This state is useful for display readout,
+ * because the inherited display framebuffer will otherwise be overwritten as it
+ * is usually put at the start of VRAM.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_gt_init_noalloc(struct xe_gt *gt)
+{
+	int err, err2;
+
+	if (xe_gt_is_media_type(gt))
+		return 0;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err;
+
+	err = gt_ttm_mgr_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt);
+
+err_force_wake:
+	err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(err2);
+	xe_device_mem_access_put(gt_to_xe(gt));
+err:
+	return err;
+}
+
+static int gt_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_hw_fence_irq;
+
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_ggtt_init(gt, gt->mem.ggtt);
+		if (err)
+			goto err_force_wake;
+	}
+
+	/* Allow driver to load if uC init fails (likely missing firmware) */
+	err = xe_uc_init(&gt->uc);
+	XE_WARN_ON(err);
+
+	err = xe_uc_init_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/* Enables per hw engine IRQs */
+	xe_gt_irq_postinstall(gt);
+
+	/* Rerun MCR init as we now have hw engine list */
+	xe_gt_mcr_init(gt);
+
+	err = xe_hw_engines_init_early(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+static int all_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_hw_fence_irq;
+
+	setup_private_ppat(gt);
+
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_gt_clock_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	xe_mocs_init(gt);
+	err = xe_execlist_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_hw_engines_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_uc_init_post_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/*
+	 * FIXME: This should be ok as SA should only be used by gt->migrate and
+	 * vm->gt->migrate and both should be pointing to a non-media GT. But to
+	 * realy safe, convert gt->kernel_bb_pool to a pointer and point a media
+	 * GT to the kernel_bb_pool on a real tile.
+	 */
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_sa_bo_manager_init(gt, &gt->kernel_bb_pool, SZ_1M, 16);
+		if (err)
+			goto err_force_wake;
+
+		/*
+		 * USM has its only SA pool to non-block behind user operations
+		 */
+		if (gt_to_xe(gt)->info.supports_usm) {
+			err = xe_sa_bo_manager_init(gt, &gt->usm.bb_pool,
+						    SZ_1M, 16);
+			if (err)
+				goto err_force_wake;
+		}
+	}
+
+	if (!xe_gt_is_media_type(gt)) {
+		gt->migrate = xe_migrate_init(gt);
+		if (IS_ERR(gt->migrate))
+			goto err_force_wake;
+	} else {
+		gt->migrate = xe_find_full_gt(gt)->migrate;
+	}
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+int xe_gt_init(struct xe_gt *gt)
+{
+	int err;
+	int i;
+
+	INIT_WORK(&gt->reset.worker, gt_reset_worker);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
+		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
+		xe_hw_fence_irq_init(&gt->fence_irq[i]);
+	}
+
+	err = xe_gt_pagefault_init(gt);
+	if (err)
+		return err;
+
+	xe_gt_sysfs_init(gt);
+
+	err = gt_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	xe_force_wake_init_engines(gt, gt_to_fw(gt));
+
+	err = all_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	xe_force_wake_prune(gt, gt_to_fw(gt));
+
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int do_gt_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
+	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5);
+	if (err)
+		drm_err(&xe->drm,
+			"GT reset failed to clear GEN11_GRDOM_FULL\n");
+
+	return err;
+}
+
+static int do_gt_restart(struct xe_gt *gt)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	setup_private_ppat(gt);
+
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_wopcm_init(&gt->uc.wopcm);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_enable_ring(hwe);
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		return err;
+
+	xe_mocs_init(gt);
+	err = xe_uc_start(&gt->uc);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id) {
+		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+		xe_reg_sr_apply_whitelist(&hwe->reg_whitelist,
+					  hwe->mmio_base, gt);
+	}
+
+	return 0;
+}
+
+static int gt_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* We only support GT resets with GuC submission */
+	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+		return -ENODEV;
+
+	drm_info(&xe->drm, "GT reset started\n");
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	xe_uc_stop_prepare(&gt->uc);
+	xe_gt_pagefault_reset(gt);
+
+	err = xe_uc_stop(&gt->uc);
+	if (err)
+		goto err_out;
+
+	err = do_gt_reset(gt);
+	if (err)
+		goto err_out;
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_out;
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	drm_info(&xe->drm, "GT reset done\n");
+
+	return 0;
+
+err_out:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	XE_WARN_ON(xe_uc_start(&gt->uc));
+	xe_device_mem_access_put(gt_to_xe(gt));
+	drm_err(&xe->drm, "GT reset failed, err=%d\n", err);
+
+	return err;
+}
+
+static void gt_reset_worker(struct work_struct *w)
+{
+	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
+
+	gt_reset(gt);
+}
+
+void xe_gt_reset_async(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	drm_info(&xe->drm, "Try GT reset\n");
+
+	/* Don't do a reset while one is already in flight */
+	if (xe_uc_reset_prepare(&gt->uc))
+		return;
+
+	drm_info(&xe->drm, "Doing GT reset\n");
+	queue_work(gt->ordered_wq, &gt->reset.worker);
+}
+
+void xe_gt_suspend_prepare(struct xe_gt *gt)
+{
+	xe_device_mem_access_get(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	xe_uc_stop_prepare(&gt->uc);
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+}
+
+int xe_gt_suspend(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* For now suspend/resume is only allowed with GuC */
+	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+		return -ENODEV;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = xe_uc_suspend(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	drm_info(&xe->drm, "GT suspended\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	drm_err(&xe->drm, "GT suspend failed: %d\n", err);
+
+	return err;
+}
+
+int xe_gt_resume(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_force_wake;
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	drm_info(&xe->drm, "GT resumed\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	drm_err(&xe->drm, "GT resume failed: %d\n", err);
+
+	return err;
+}
+
+void xe_gt_migrate_wait(struct xe_gt *gt)
+{
+	xe_migrate_wait(gt->migrate);
+}
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance, bool logical)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class &&
+		    ((!logical && hwe->instance == instance) ||
+		    (logical && hwe->logical_instance == instance)))
+			return hwe;
+
+	return NULL;
+}
+
+struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
+							 enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		switch (class) {
+		case XE_ENGINE_CLASS_RENDER:
+		case XE_ENGINE_CLASS_COMPUTE:
+			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
+			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
+				return hwe;
+			break;
+		default:
+			if (hwe->class == class)
+				return hwe;
+		}
+	}
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
new file mode 100644
index 0000000000000..5dc08a993cfeb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_H_
+#define _XE_GT_H_
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_hw_engine.h"
+
+#define for_each_hw_engine(hwe__, gt__, id__) \
+	for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
+	     for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \
+			  xe_hw_engine_is_valid((hwe__)))
+
+int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt);
+int xe_gt_init_early(struct xe_gt *gt);
+int xe_gt_init_noalloc(struct xe_gt *gt);
+int xe_gt_init(struct xe_gt *gt);
+int xe_gt_record_default_lrcs(struct xe_gt *gt);
+void xe_gt_suspend_prepare(struct xe_gt *gt);
+int xe_gt_suspend(struct xe_gt *gt);
+int xe_gt_resume(struct xe_gt *gt);
+void xe_gt_reset_async(struct xe_gt *gt);
+void xe_gt_migrate_wait(struct xe_gt *gt);
+
+struct xe_gt *xe_find_full_gt(struct xe_gt *gt);
+
+/**
+ * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
+ * first that matches the same reset domain as @class
+ * @gt: GT structure
+ * @class: hw engine class to lookup
+ */
+struct xe_hw_engine *
+xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class);
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance,
+				     bool logical);
+
+static inline bool xe_gt_is_media_type(struct xe_gt *gt)
+{
+	return gt->info.type == XE_GT_TYPE_MEDIA;
+}
+
+static inline struct xe_device * gt_to_xe(struct xe_gt *gt)
+{
+	return gt->xe;
+}
+
+static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
new file mode 100644
index 0000000000000..575433e9718ac
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+static u32 read_reference_ts_freq(struct xe_gt *gt)
+{
+	u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg);
+	u32 base_freq, frac_freq;
+
+	base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
+		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
+	base_freq *= 1000000;
+
+	frac_freq = ((ts_override &
+		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
+		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
+	frac_freq = 1000000 / (frac_freq + 1);
+
+	return base_freq + frac_freq;
+}
+
+static u32 get_crystal_clock_freq(u32 rpm_config_reg)
+{
+	const u32 f19_2_mhz = 19200000;
+	const u32 f24_mhz = 24000000;
+	const u32 f25_mhz = 25000000;
+	const u32 f38_4_mhz = 38400000;
+	u32 crystal_clock =
+		(rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+		GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+	switch (crystal_clock) {
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+		return f24_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+		return f19_2_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+		return f38_4_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+		return f25_mhz;
+	default:
+		XE_BUG_ON("NOT_POSSIBLE");
+		return 0;
+	}
+}
+
+int xe_gt_clock_init(struct xe_gt *gt)
+{
+	u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg);
+	u32 freq = 0;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+	if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
+		freq = read_reference_ts_freq(gt);
+	} else {
+		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg);
+
+		freq = get_crystal_clock_freq(c0);
+
+		/*
+		 * Now figure out how the command stream's timestamp
+		 * register increments from this frequency (it might
+		 * increment only every few clock cycle).
+		 */
+		freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+	}
+
+	gt->info.clock_freq = freq;
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
new file mode 100644
index 0000000000000..511923afd224a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_CLOCK_H_
+#define _XE_GT_CLOCK_H_
+
+struct xe_gt;
+
+int xe_gt_clock_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
new file mode 100644
index 0000000000000..cd18887841410
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_debugfs.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int hw_engines(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err) {
+		xe_device_mem_access_put(xe);
+		return err;
+	}
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_print_state(hwe, &p);
+
+	xe_device_mem_access_put(xe);
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int force_reset(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+
+	xe_gt_reset_async(gt);
+
+	return 0;
+}
+
+static int sa_info(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	drm_suballoc_dump_debug_info(&gt->kernel_bb_pool.base, &p,
+				     gt->kernel_bb_pool.gpu_addr);
+
+	return 0;
+}
+
+static int topology(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_topology_dump(gt, &p);
+
+	return 0;
+}
+
+static int steering(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_mcr_steering_dump(gt, &p);
+
+	return 0;
+}
+
+#ifdef CONFIG_DRM_XE_DEBUG
+static int invalidate_tlb(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	int seqno;
+	int ret = 0;
+
+	seqno = xe_gt_tlb_invalidation(gt);
+	XE_WARN_ON(seqno < 0);
+	if (seqno > 0)
+		ret = xe_gt_tlb_invalidation_wait(gt, seqno);
+	XE_WARN_ON(ret < 0);
+
+	return 0;
+}
+#endif
+
+static const struct drm_info_list debugfs_list[] = {
+	{"hw_engines", hw_engines, 0},
+	{"force_reset", force_reset, 0},
+	{"sa_info", sa_info, 0},
+	{"topology", topology, 0},
+	{"steering", steering, 0},
+#ifdef CONFIG_DRM_XE_DEBUG
+	{"invalidate_tlb", invalidate_tlb, 0},
+#endif
+};
+
+void xe_gt_debugfs_register(struct xe_gt *gt)
+{
+	struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
+	struct dentry *root;
+	struct drm_info_list *local;
+	char name[8];
+	int i;
+
+	XE_BUG_ON(!minor->debugfs_root);
+
+	sprintf(name, "gt%d", gt->info.id);
+	root = debugfs_create_dir(name, minor->debugfs_root);
+	if (IS_ERR(root)) {
+		XE_WARN_ON("Create GT directory failed");
+		return;
+	}
+
+	/*
+	 * Allocate local copy as we need to pass in the GT to the debugfs
+	 * entry and drm_debugfs_create_files just references the drm_info_list
+	 * passed in (e.g. can't define this on the stack).
+	 */
+#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+	local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local) {
+		XE_WARN_ON("Couldn't allocate memory");
+		return;
+	}
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = gt;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	xe_uc_debugfs_register(&gt->uc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
new file mode 100644
index 0000000000000..5a329f118a578
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_DEBUGFS_H_
+#define _XE_GT_DEBUGFS_H_
+
+struct xe_gt;
+
+void xe_gt_debugfs_register(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
new file mode 100644
index 0000000000000..b69c0d6c6b2f9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_topology.h"
+#include "xe_gt_types.h"
+#include "xe_mmio.h"
+
+#include "gt/intel_gt_regs.h"
+
+/**
+ * DOC: GT Multicast/Replicated (MCR) Register Support
+ *
+ * Some GT registers are designed as "multicast" or "replicated" registers:
+ * multiple instances of the same register share a single MMIO offset.  MCR
+ * registers are generally used when the hardware needs to potentially track
+ * independent values of a register per hardware unit (e.g., per-subslice,
+ * per-L3bank, etc.).  The specific types of replication that exist vary
+ * per-platform.
+ *
+ * MMIO accesses to MCR registers are controlled according to the settings
+ * programmed in the platform's MCR_SELECTOR register(s).  MMIO writes to MCR
+ * registers can be done in either a (i.e., a single write updates all
+ * instances of the register to the same value) or unicast (a write updates only
+ * one specific instance).  Reads of MCR registers always operate in a unicast
+ * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR.
+ * Selection of a specific MCR instance for unicast operations is referred to
+ * as "steering."
+ *
+ * If MCR register operations are steered toward a hardware unit that is
+ * fused off or currently powered down due to power gating, the MMIO operation
+ * is "terminated" by the hardware.  Terminated read operations will return a
+ * value of zero and terminated unicast write operations will be silently
+ * ignored.
+ */
+
+enum {
+	MCR_OP_READ,
+	MCR_OP_WRITE
+};
+
+static const struct xe_mmio_range xelp_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules and doesn't need to be
+ * included here.
+ */
+static const struct xe_mmio_range xehp_mslice_steering_table[] = {
+	{ 0x00DD00, 0x00DDFF },
+	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
+	{},
+};
+
+static const struct xe_mmio_range xehp_lncf_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D880, 0x00D8FF },
+	{},
+};
+
+/*
+ * We have several types of MCR registers where steering to (0,0) will always
+ * provide us with a non-terminated value.  We'll stick them all in the same
+ * table for simplicity.
+ */
+static const struct xe_mmio_range xehpc_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },		/* HALF-BSLICE */
+	{ 0x008800, 0x00887F },		/* CC */
+	{ 0x008A80, 0x008AFF },		/* TILEPSMI */
+	{ 0x00B000, 0x00B0FF },		/* HALF-BSLICE */
+	{ 0x00B100, 0x00B3FF },		/* L3BANK */
+	{ 0x00C800, 0x00CFFF },		/* HALF-BSLICE */
+	{ 0x00D800, 0x00D8FF },		/* HALF-BSLICE */
+	{ 0x00DD00, 0x00DDFF },		/* BSLICE */
+	{ 0x00E900, 0x00E9FF },		/* HALF-BSLICE */
+	{ 0x00EC00, 0x00EEFF },		/* HALF-BSLICE */
+	{ 0x00F000, 0x00FFFF },		/* HALF-BSLICE */
+	{ 0x024180, 0x0241FF },		/* HALF-BSLICE */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_instance0_steering_table[] = {
+	{ 0x000B00, 0x000BFF },         /* SQIDI */
+	{ 0x001000, 0x001FFF },         /* SQIDI */
+	{ 0x004000, 0x0048FF },         /* GAM */
+	{ 0x008700, 0x0087FF },         /* SQIDI */
+	{ 0x00B000, 0x00B0FF },         /* NODE */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* OAAL2 */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct xe_mmio_range xelp_dss_steering_table[] = {
+	{ 0x008150, 0x00815F },
+	{ 0x009520, 0x00955F },
+	{ 0x00DE80, 0x00E8FF },
+	{ 0x024A00, 0x024A7F },
+	{},
+};
+
+/* DSS steering is used for GSLICE ranges as well */
+static const struct xe_mmio_range xehp_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* GSLICE */
+	{ 0x005400, 0x007FFF },		/* GSLICE */
+	{ 0x008140, 0x00815F },		/* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x008D00, 0x008DFF },		/* DSS */
+	{ 0x0094D0, 0x00955F },		/* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* GSLICE */
+	{ 0x00DC00, 0x00DCFF },		/* GSLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved ) */
+	{ 0x017000, 0x017FFF },		/* GSLICE */
+	{ 0x024A00, 0x024A7F },		/* DSS */
+	{},
+};
+
+/* DSS steering is used for COMPUTE ranges as well */
+static const struct xe_mmio_range xehpc_dss_steering_table[] = {
+	{ 0x008140, 0x00817F },		/* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00DC00, 0x00DCFF },		/* COMPUTE */
+	{ 0x00DE80, 0x00E7FF },		/* DSS (0xDF00-0xE1FF reserved ) */
+	{},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct xe_mmio_range xelpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* SLICE */
+	{ 0x005500, 0x007FFF },		/* SLICE */
+	{ 0x008140, 0x00815F },		/* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* SLICE */
+	{ 0x00DC00, 0x00DCFF },		/* SLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved) */
+	{},
+};
+
+static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
+	{ 0x393200, 0x39323F },
+	{ 0x393400, 0x3934FF },
+	{},
+};
+
+/*
+ * DG2 GAM registers are a special case; this table is checked directly in
+ * xe_gt_mcr_get_nonterminated_steering and is not hooked up via
+ * gt->steering[].
+ */
+static const struct xe_mmio_range dg2_gam_ranges[] = {
+	{ 0x004000, 0x004AFF },
+	{ 0x00C800, 0x00CFFF },
+	{ 0x00F000, 0x00FFFF },
+	{},
+};
+
+static void init_steering_l3bank(struct xe_gt *gt)
+{
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+						xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
+					      xe_mmio_read32(gt, XEHP_FUSE4.reg));
+
+		/*
+		 * Group selects mslice, instance selects bank within mslice.
+		 * Bank 0 is always valid _except_ when the bank mask is 010b.
+		 */
+		gt->steering[L3BANK].group_target = __ffs(mslice_mask);
+		gt->steering[L3BANK].instance_target =
+			bank_mask & BIT(0) ? 0 : 2;
+	} else {
+		u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK,
+					 ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+
+		gt->steering[L3BANK].group_target = 0;	/* unused */
+		gt->steering[L3BANK].instance_target = __ffs(fuse);
+	}
+}
+
+static void init_steering_mslice(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+				 xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+
+	/*
+	 * mslice registers are valid (not terminated) if either the meml3
+	 * associated with the mslice is present, or at least one DSS associated
+	 * with the mslice is present.  There will always be at least one meml3
+	 * so we can just use that to find a non-terminated mslice and ignore
+	 * the DSS fusing.
+	 */
+	gt->steering[MSLICE].group_target = __ffs(mask);
+	gt->steering[MSLICE].instance_target = 0;	/* unused */
+
+	/*
+	 * LNCF termination is also based on mslice presence, so we'll set
+	 * it up here.  Either LNCF within a non-terminated mslice will work,
+	 * so we just always pick LNCF 0 here.
+	 */
+	gt->steering[LNCF].group_target = __ffs(mask) << 1;
+	gt->steering[LNCF].instance_target = 0;		/* unused */
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+	unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+			       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
+	unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+
+	gt->steering[DSS].group_target = dss / dss_per_grp;
+	gt->steering[DSS].instance_target = dss % dss_per_grp;
+}
+
+static void init_steering_oaddrm(struct xe_gt *gt)
+{
+	/*
+	 * First instance is only terminated if the entire first media slice
+	 * is absent (i.e., no VCS0 or VECS0).
+	 */
+	if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0))
+		gt->steering[OADDRM].group_target = 0;
+	else
+		gt->steering[OADDRM].group_target = 1;
+
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static void init_steering_inst0(struct xe_gt *gt)
+{
+	gt->steering[DSS].group_target = 0;		/* unused */
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static const struct {
+	const char *name;
+	void (*init)(struct xe_gt *);
+} xe_steering_types[] = {
+	{ "L3BANK",	init_steering_l3bank },
+	{ "MSLICE",	init_steering_mslice },
+	{ "LNCF",	NULL },		/* initialized by mslice init */
+	{ "DSS",	init_steering_dss },
+	{ "OADDRM",	init_steering_oaddrm },
+	{ "INSTANCE 0",	init_steering_inst0 },
+};
+
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
+
+	spin_lock_init(&gt->mcr_lock);
+
+	if (gt->info.type == XE_GT_TYPE_MEDIA) {
+		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
+
+		gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+	} else if (GRAPHICS_VERx100(xe) >= 1270) {
+		gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
+		gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+		gt->steering[DSS].ranges = xelpg_dss_steering_table;
+	} else if (xe->info.platform == XE_PVC) {
+		gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
+		gt->steering[DSS].ranges = xehpc_dss_steering_table;
+	} else if (xe->info.platform == XE_DG2) {
+		gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
+		gt->steering[LNCF].ranges = xehp_lncf_steering_table;
+		gt->steering[DSS].ranges = xehp_dss_steering_table;
+	} else {
+		gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
+		gt->steering[DSS].ranges = xelp_dss_steering_table;
+	}
+
+	/* Select non-terminated steering target for each type */
+	for (int i = 0; i < NUM_STEERING_TYPES; i++)
+		if (gt->steering[i].ranges && xe_steering_types[i].init)
+			xe_steering_types[i].init(gt);
+}
+
+/*
+ * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
+ *    will steer a register to a non-terminated instance
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @group: return variable for group steering
+ * @instance: return variable for instance steering
+ *
+ * This function returns a group/instance pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ *
+ * Returns true if the caller should steer to the @group/@instance values
+ * returned.  Returns false if the caller need not perform any steering (i.e.,
+ * the DG2 GAM range special case).
+ */
+static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+						 i915_mcr_reg_t reg,
+						 u8 *group, u8 *instance)
+{
+	for (int type = 0; type < NUM_STEERING_TYPES; type++) {
+		if (!gt->steering[type].ranges)
+			continue;
+
+		for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
+			if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg.reg)) {
+				*group = gt->steering[type].group_target;
+				*instance = gt->steering[type].instance_target;
+				return true;
+			}
+		}
+	}
+
+	/*
+	 * All MCR registers should usually be part of one of the steering
+	 * ranges we're tracking.  However there's one special case:  DG2
+	 * GAM registers are technically multicast registers, but are special
+	 * in a number of ways:
+	 *  - they have their own dedicated steering control register (they
+	 *    don't share 0xFDC with other MCR classes)
+	 *  - all reads should be directed to instance 1 (unicast reads against
+	 *    other instances are not allowed), and instance 1 is already the
+	 *    the hardware's default steering target, which we never change
+	 *
+	 * Ultimately this means that we can just treat them as if they were
+	 * unicast registers and all operations will work properly.
+	 */
+	for (int i = 0; dg2_gam_ranges[i].end > 0; i++)
+		if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg))
+			return false;
+
+	/*
+	 * Not found in a steering table and not a DG2 GAM register?  We'll
+	 * just steer to 0/0 as a guess and raise a warning.
+	 */
+	drm_WARN(&gt_to_xe(gt)->drm, true,
+		 "Did not find MCR register %#x in any MCR steering table\n",
+		 reg.reg);
+	*group = 0;
+	*instance = 0;
+
+	return true;
+}
+
+#define STEER_SEMAPHORE		0xFD0
+
+/*
+ * Obtain exclusive access to MCR steering.  On MTL and beyond we also need
+ * to synchronize with external clients (e.g., firmware), so a semaphore
+ * register will also need to be taken.
+ */
+static void mcr_lock(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	spin_lock(&gt->mcr_lock);
+
+	/*
+	 * Starting with MTL we also need to grab a semaphore register
+	 * to synchronize with external agents (e.g., firmware) that now
+	 * shares the same steering control register.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10);
+
+	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
+}
+
+static void mcr_unlock(struct xe_gt *gt) {
+	/* Release hardware semaphore */
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+		xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+
+	spin_unlock(&gt->mcr_lock);
+}
+
+/*
+ * Access a register with specific MCR steering
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag,
+				int group, int instance, u32 value)
+{
+	u32 steer_reg, steer_val, val = 0;
+
+	lockdep_assert_held(&gt->mcr_lock);
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		steer_reg = MTL_MCR_SELECTOR.reg;
+		steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+			REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
+	} else {
+		steer_reg = GEN8_MCR_SELECTOR.reg;
+		steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) |
+			REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance);
+	}
+
+	/*
+	 * Always leave the hardware in multicast mode when doing reads
+	 * (see comment about Wa_22013088509 below) and only change it
+	 * to unicast mode when doing writes of a specific instance.
+	 *
+	 * No need to save old steering reg value.
+	 */
+	if (rw_flag == MCR_OP_READ)
+		steer_val |= GEN11_MCR_MULTICAST;
+
+	xe_mmio_write32(gt, steer_reg, steer_val);
+
+	if (rw_flag == MCR_OP_READ)
+		val = xe_mmio_read32(gt, reg.reg);
+	else
+		xe_mmio_write32(gt, reg.reg, value);
+
+	/*
+	 * If we turned off the multicast bit (during a write) we're required
+	 * to turn it back on before finishing.  The group and instance values
+	 * don't matter since they'll be re-programmed on the next MCR
+	 * operation.
+	 */
+	if (rw_flag == MCR_OP_WRITE)
+		xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register
+ * @gt: GT structure
+ * @reg: register to read
+ *
+ * Reads a GT MCR register.  The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains.
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
+{
+	u8 group, instance;
+	u32 val;
+	bool steer;
+
+	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
+
+	if (steer) {
+		mcr_lock(gt);
+		val = rw_with_mcr_steering(gt, reg, MCR_OP_READ,
+					   group, instance, 0);
+		mcr_unlock(gt);
+	} else {
+		/* DG2 GAM special case rules; treat as if unicast */
+		val = xe_mmio_read32(gt, reg.reg);
+	}
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read - read a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to read
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Returns the value read from an MCR register after steering toward a specific
+ * group/instance.
+ */
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
+			   i915_mcr_reg_t reg,
+			   int group, int instance)
+{
+	u32 val;
+
+	mcr_lock(gt);
+	val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0);
+	mcr_unlock(gt);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_write - write a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Write an MCR register in unicast mode after steering toward a specific
+ * group/instance.
+ */
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
+			     int group, int instance)
+{
+	mcr_lock(gt);
+	rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value);
+	mcr_unlock(gt);
+}
+
+/**
+ * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.
+ */
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value)
+{
+	/*
+	 * Synchronize with any unicast operations.  Once we have exclusive
+	 * access, the MULTICAST bit should already be set, so there's no need
+	 * to touch the steering register.
+	 */
+	mcr_lock(gt);
+	xe_mmio_write32(gt, reg.reg, value);
+	mcr_unlock(gt);
+}
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	for (int i = 0; i < NUM_STEERING_TYPES; i++) {
+		if (gt->steering[i].ranges) {
+			drm_printf(p, "%s steering: group=%#x, instance=%#x\n",
+				   xe_steering_types[i].name,
+				   gt->steering[i].group_target,
+				   gt->steering[i].instance_target);
+			for (int j = 0; gt->steering[i].ranges[j].end; j++)
+				drm_printf(p, "\t0x%06x - 0x%06x\n",
+					   gt->steering[i].ranges[j].start,
+					   gt->steering[i].ranges[j].end);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
new file mode 100644
index 0000000000000..62ec6eb654a0b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_MCR_H_
+#define _XE_GT_MCR_H_
+
+#include "i915_reg_defs.h"
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_mcr_init(struct xe_gt *gt);
+
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg,
+			   int group, int instance);
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg);
+
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
+			     int group, int instance);
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value);
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+
+#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
new file mode 100644
index 0000000000000..7125113b7390f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_pagefault.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+struct pagefault {
+	u64 page_addr;
+	u32 asid;
+	u16 pdata;
+	u8 vfid;
+	u8 access_type;
+	u8 fault_type;
+	u8 fault_level;
+	u8 engine_class;
+	u8 engine_instance;
+	u8 fault_unsuccessful;
+};
+
+enum access_type {
+	ACCESS_TYPE_READ = 0,
+	ACCESS_TYPE_WRITE = 1,
+	ACCESS_TYPE_ATOMIC = 2,
+	ACCESS_TYPE_RESERVED = 3,
+};
+
+enum fault_type {
+	NOT_PRESENT = 0,
+	WRITE_ACCESS_VIOLATION = 1,
+	ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+struct acc {
+	u64 va_range_base;
+	u32 asid;
+	u32 sub_granularity;
+	u8 granularity;
+	u8 vfid;
+	u8 access_type;
+	u8 engine_class;
+	u8 engine_instance;
+};
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 action[] = {
+		XE_GUC_ACTION_TLB_INVALIDATION,
+		0,
+		XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
+		XE_GUC_TLB_INVAL_FLUSH_CACHE,
+	};
+	int seqno;
+	int ret;
+
+	/*
+	 * XXX: The seqno algorithm relies on TLB invalidation being processed
+	 * in order which they currently are, if that changes the algorithm will
+	 * need to be updated.
+	 */
+	mutex_lock(&guc->ct.lock);
+	seqno = gt->usm.tlb_invalidation_seqno;
+	action[1] = seqno;
+	gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) %
+		TLB_INVALIDATION_SEQNO_MAX;
+	if (!gt->usm.tlb_invalidation_seqno)
+		gt->usm.tlb_invalidation_seqno = 1;
+	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
+				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret)
+		ret = seqno;
+	mutex_unlock(&guc->ct.lock);
+
+	return ret;
+}
+
+static bool access_is_atomic(enum access_type access_type)
+{
+	return access_type == ACCESS_TYPE_ATOMIC;
+}
+
+static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma)
+{
+	return BIT(gt->info.id) & vma->gt_present &&
+		!(BIT(gt->info.id) & vma->usm.gt_invalidated);
+}
+
+static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
+{
+	if (lookup->start > vma->end || lookup->end < vma->start)
+		return false;
+
+	return true;
+}
+
+static bool only_needs_bo_lock(struct xe_bo *bo)
+{
+	return bo && bo->vm;
+}
+
+static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
+{
+	struct xe_vma *vma = NULL, lookup;
+
+	lookup.start = page_addr;
+	lookup.end = lookup.start + SZ_4K - 1;
+	if (vm->usm.last_fault_vma) {   /* Fast lookup */
+		if (vma_matches(vm->usm.last_fault_vma, &lookup))
+			vma = vm->usm.last_fault_vma;
+	}
+	if (!vma)
+		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+
+	return vma;
+}
+
+static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_vm *vm;
+	struct xe_vma *vma = NULL;
+	struct xe_bo *bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	struct dma_fence *fence;
+	bool write_locked;
+	int ret = 0;
+	bool atomic;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+retry_userptr:
+	/*
+	 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
+	 * start out read-locked?
+	 */
+	down_write(&vm->lock);
+	write_locked = true;
+	vma = lookup_vma(vm, pf->page_addr);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) {
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	trace_xe_vma_pagefault(vma);
+
+	atomic = access_is_atomic(pf->access_type);
+
+	/* Check if VMA is valid */
+	if (vma_is_valid(gt, vma) && !atomic)
+		goto unlock_vm;
+
+	/* TODO: Validate fault */
+
+	if (xe_vma_is_userptr(vma) && write_locked) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del_init(&vma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+
+		ret = xe_vma_userptr_pin_pages(vma);
+		if (ret)
+			goto unlock_vm;
+
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	/* Lock VM and BOs dma-resv */
+	bo = vma->bo;
+	if (only_needs_bo_lock(bo)) {
+		/* This path ensures the BO's LRU is updated */
+		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
+	} else {
+		tv_vm.num_shared = xe->info.tile_count;
+		tv_vm.bo = xe_vm_ttm_bo(vm);
+		list_add(&tv_vm.head, &objs);
+		if (bo) {
+			tv_bo.bo = &bo->ttm;
+			tv_bo.num_shared = xe->info.tile_count;
+			list_add(&tv_bo.head, &objs);
+		}
+		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+	}
+	if (ret)
+		goto unlock_vm;
+
+	if (atomic) {
+		if (xe_vma_is_userptr(vma)) {
+			ret = -EACCES;
+			goto unlock_dma_resv;
+		}
+
+		/* Migrate to VRAM, move should invalidate the VMA first */
+		ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+		if (ret)
+			goto unlock_dma_resv;
+	} else if (bo) {
+		/* Create backing store if needed */
+		ret = xe_bo_validate(bo, vm, true);
+		if (ret)
+			goto unlock_dma_resv;
+	}
+
+	/* Bind VMA only to the GT that has faulted */
+	trace_xe_vma_pf_bind(vma);
+	fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0,
+				 vma->gt_present & BIT(gt->info.id));
+	if (IS_ERR(fence)) {
+		ret = PTR_ERR(fence);
+		goto unlock_dma_resv;
+	}
+
+	/*
+	 * XXX: Should we drop the lock before waiting? This only helps if doing
+	 * GPU binds which is currently only done if we have to wait for more
+	 * than 10ms on a move.
+	 */
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	if (xe_vma_is_userptr(vma))
+		ret = xe_vma_userptr_check_repin(vma);
+	vma->usm.gt_invalidated &= ~BIT(gt->info.id);
+
+unlock_dma_resv:
+	if (only_needs_bo_lock(bo))
+		xe_bo_unlock(bo, &ww);
+	else
+		ttm_eu_backoff_reservation(&ww, &objs);
+unlock_vm:
+	if (!ret)
+		vm->usm.last_fault_vma = vma;
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (ret == -EAGAIN)
+		goto retry_userptr;
+
+	if (!ret) {
+		/*
+		 * FIXME: Doing a full TLB invalidation for now, likely could
+		 * defer TLB invalidate + fault response to a callback of fence
+		 * too
+		 */
+		ret = send_tlb_invalidation(&gt->uc.guc);
+		if (ret >= 0)
+			ret = 0;
+	}
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+static int send_pagefault_reply(struct xe_guc *guc,
+				struct xe_guc_pagefault_reply *reply)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+		reply->dw0,
+		reply->dw1,
+	};
+
+	return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
+{
+	drm_warn(&xe->drm, "\n\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tPDATA: 0x%04x\n"
+		 "\tFaulted Address: 0x%08x%08x\n"
+		 "\tFaultType: %d\n"
+		 "\tAccessType: %d\n"
+		 "\tFaultLevel: %d\n"
+		 "\tEngineClass: %d\n"
+		 "\tEngineInstance: %d\n",
+		 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
+		 lower_32_bits(pf->page_addr),
+		 pf->fault_type, pf->access_type, pf->fault_level,
+		 pf->engine_class, pf->engine_instance);
+}
+
+#define PF_MSG_LEN_DW	4
+
+static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+{
+	const struct xe_guc_pagefault_desc *desc;
+	int ret = 0;
+
+	spin_lock_irq(&pf_queue->lock);
+	if (pf_queue->head != pf_queue->tail) {
+		desc = (const struct xe_guc_pagefault_desc *)
+			(pf_queue->data + pf_queue->head);
+
+		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+		pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
+		pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
+		pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
+			PFD_PDATA_HI_SHIFT;
+		pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
+		pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
+		pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
+		pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
+		pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
+		pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
+			PFD_VIRTUAL_ADDR_HI_SHIFT;
+		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
+			PFD_VIRTUAL_ADDR_LO_SHIFT;
+
+		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+			PF_QUEUE_NUM_DW;
+	} else {
+		ret = -1;
+	}
+	spin_unlock_irq(&pf_queue->lock);
+
+	return ret;
+}
+
+static bool pf_queue_full(struct pf_queue *pf_queue)
+{
+	lockdep_assert_held(&pf_queue->lock);
+
+	return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+		PF_MSG_LEN_DW;
+}
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct pf_queue *pf_queue;
+	unsigned long flags;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != PF_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(PFD_ASID, msg[1]);
+	pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+
+	spin_lock_irqsave(&pf_queue->lock, flags);
+	full = pf_queue_full(pf_queue);
+	if (!full) {
+		memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
+		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+		queue_work(gt->usm.pf_wq, &pf_queue->worker);
+	} else {
+		XE_WARN_ON("PF Queue full, shouldn't be possible");
+	}
+	spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+	return full ? -ENOSPC : 0;
+}
+
+static void pf_queue_work_func(struct work_struct *w)
+{
+	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
+	struct xe_gt *gt = pf_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc_pagefault_reply reply = {};
+	struct pagefault pf = {};
+	int ret;
+
+	ret = get_pagefault(pf_queue, &pf);
+	if (ret)
+		return;
+
+	ret = handle_pagefault(gt, &pf);
+	if (unlikely(ret)) {
+		print_pagefault(xe, &pf);
+		pf.fault_unsuccessful = 1;
+		drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+	}
+
+	reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+		FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+		FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+		FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+		FIELD_PREP(PFR_ASID, pf.asid);
+
+	reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+		FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+		FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+		FIELD_PREP(PFR_PDATA, pf.pdata);
+
+	send_pagefault_reply(&gt->uc.guc, &reply);
+}
+
+static void acc_queue_work_func(struct work_struct *w);
+
+int xe_gt_pagefault_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.supports_usm)
+		return 0;
+
+	gt->usm.tlb_invalidation_seqno = 1;
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		gt->usm.pf_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.pf_queue[i].lock);
+		INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func);
+	}
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		gt->usm.acc_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.acc_queue[i].lock);
+		INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
+	}
+
+	gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
+					WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
+	if (!gt->usm.pf_wq)
+		return -ENOMEM;
+
+	gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
+					 WQ_UNBOUND | WQ_HIGHPRI,
+					 NUM_ACC_QUEUE);
+	if (!gt->usm.acc_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_gt_pagefault_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.supports_usm)
+		return;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		spin_lock_irq(&gt->usm.pf_queue[i].lock);
+		gt->usm.pf_queue[i].head = 0;
+		gt->usm.pf_queue[i].tail = 0;
+		spin_unlock_irq(&gt->usm.pf_queue[i].lock);
+	}
+
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		spin_lock(&gt->usm.acc_queue[i].lock);
+		gt->usm.acc_queue[i].head = 0;
+		gt->usm.acc_queue[i].tail = 0;
+		spin_unlock(&gt->usm.acc_queue[i].lock);
+	}
+}
+
+int xe_gt_tlb_invalidation(struct xe_gt *gt)
+{
+	return send_tlb_invalidation(&gt->uc.guc);
+}
+
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+	if (gt->usm.tlb_invalidation_seqno_recv >= seqno)
+		return true;
+
+	if (seqno - gt->usm.tlb_invalidation_seqno_recv >
+	    (TLB_INVALIDATION_SEQNO_MAX / 2))
+		return true;
+
+	return false;
+}
+
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc *guc = &gt->uc.guc;
+	int ret;
+
+	/*
+	 * XXX: See above, this algorithm only works if seqno are always in
+	 * order
+	 */
+	ret = wait_event_timeout(guc->ct.wq,
+				 tlb_invalidation_seqno_past(gt, seqno),
+				 HZ / 5);
+	if (!ret) {
+		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			seqno, gt->usm.tlb_invalidation_seqno_recv);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int expected_seqno;
+
+	if (unlikely(len != 1))
+		return -EPROTO;
+
+	/* Sanity check on seqno */
+	expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) %
+		TLB_INVALIDATION_SEQNO_MAX;
+	XE_WARN_ON(expected_seqno != msg[0]);
+
+	gt->usm.tlb_invalidation_seqno_recv = msg[0];
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
+
+static int granularity_in_byte(int val)
+{
+	switch (val) {
+	case 0:
+		return SZ_128K;
+	case 1:
+		return SZ_2M;
+	case 2:
+		return SZ_16M;
+	case 3:
+		return SZ_64M;
+	default:
+		return 0;
+	}
+}
+
+static int sub_granularity_in_byte(int val)
+{
+	return (granularity_in_byte(val) / 32);
+}
+
+static void print_acc(struct xe_device *xe, struct acc *acc)
+{
+	drm_warn(&xe->drm, "Access counter request:\n"
+		 "\tType: %s\n"
+		 "\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tEngine: %d:%d\n"
+		 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
+		 "\tSub_Granularity Vector: 0x%08x\n"
+		 "\tVA Range base: 0x%016llx\n",
+		 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
+		 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
+		 granularity_in_byte(acc->granularity) / SZ_1K,
+		 sub_granularity_in_byte(acc->granularity) / SZ_1K,
+		 acc->sub_granularity, acc->va_range_base);
+}
+
+static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
+{
+	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
+		sub_granularity_in_byte(acc->granularity);
+	struct xe_vma lookup;
+
+	lookup.start = page_va;
+	lookup.end = lookup.start + SZ_4K - 1;
+
+	return xe_vm_find_overlapping_vma(vm, &lookup);
+}
+
+static int handle_acc(struct xe_gt *gt, struct acc *acc)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_vm *vm;
+	struct xe_vma *vma;
+	struct xe_bo *bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	int ret = 0;
+
+	/* We only support ACC_TRIGGER at the moment */
+	if (acc->access_type != ACC_TRIGGER)
+		return -EINVAL;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+	down_read(&vm->lock);
+
+	/* Lookup VMA */
+	vma = get_acc_vma(vm, acc);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	trace_xe_vma_acc(vma);
+
+	/* Userptr can't be migrated, nothing to do */
+	if (xe_vma_is_userptr(vma))
+		goto unlock_vm;
+
+	/* Lock VM and BOs dma-resv */
+	bo = vma->bo;
+	if (only_needs_bo_lock(bo)) {
+		/* This path ensures the BO's LRU is updated */
+		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
+	} else {
+		tv_vm.num_shared = xe->info.tile_count;
+		tv_vm.bo = xe_vm_ttm_bo(vm);
+		list_add(&tv_vm.head, &objs);
+		tv_bo.bo = &bo->ttm;
+		tv_bo.num_shared = xe->info.tile_count;
+		list_add(&tv_bo.head, &objs);
+		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+	}
+	if (ret)
+		goto unlock_vm;
+
+	/* Migrate to VRAM, move should invalidate the VMA first */
+	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+
+	if (only_needs_bo_lock(bo))
+		xe_bo_unlock(bo, &ww);
+	else
+		ttm_eu_backoff_reservation(&ww, &objs);
+unlock_vm:
+	up_read(&vm->lock);
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+#define make_u64(hi__, low__)  ((u64)(hi__) << 32 | (u64)(low__))
+
+static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
+{
+	const struct xe_guc_acc_desc *desc;
+	int ret = 0;
+
+	spin_lock(&acc_queue->lock);
+	if (acc_queue->head != acc_queue->tail) {
+		desc = (const struct xe_guc_acc_desc *)
+			(acc_queue->data + acc_queue->head);
+
+		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
+		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
+			FIELD_GET(ACC_SUBG_LO, desc->dw0);
+		acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
+		acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
+		acc->asid =  FIELD_GET(ACC_ASID, desc->dw1);
+		acc->vfid =  FIELD_GET(ACC_VFID, desc->dw2);
+		acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
+		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
+					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+	} else {
+		ret = -1;
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return ret;
+}
+
+static void acc_queue_work_func(struct work_struct *w)
+{
+	struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
+	struct xe_gt *gt = acc_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct acc acc = {};
+	int ret;
+
+	ret = get_acc(acc_queue, &acc);
+	if (ret)
+		return;
+
+	ret = handle_acc(gt, &acc);
+	if (unlikely(ret)) {
+		print_acc(xe, &acc);
+		drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+	}
+}
+
+#define ACC_MSG_LEN_DW	4
+
+static bool acc_queue_full(struct acc_queue *acc_queue)
+{
+	lockdep_assert_held(&acc_queue->lock);
+
+	return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+		ACC_MSG_LEN_DW;
+}
+
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct acc_queue *acc_queue;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != ACC_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(ACC_ASID, msg[1]);
+	acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
+
+	spin_lock(&acc_queue->lock);
+	full = acc_queue_full(acc_queue);
+	if (!full) {
+		memcpy(acc_queue->data + acc_queue->tail, msg,
+		       len * sizeof(u32));
+		acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+		queue_work(gt->usm.acc_wq, &acc_queue->worker);
+	} else {
+		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
new file mode 100644
index 0000000000000..35f68027cc9ce
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_PAGEFAULT_H_
+#define _XE_GT_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+struct xe_guc;
+
+int xe_gt_pagefault_init(struct xe_gt *gt);
+void xe_gt_pagefault_reset(struct xe_gt *gt);
+int xe_gt_tlb_invalidation(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
new file mode 100644
index 0000000000000..2d966d935b8e0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+
+static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static struct kobj_type xe_gt_sysfs_kobj_type = {
+	.release = xe_gt_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	kobject_put(gt->sysfs);
+}
+
+int xe_gt_sysfs_init(struct xe_gt *gt)
+{
+	struct device *dev = gt_to_xe(gt)->drm.dev;
+	struct kobj_gt *kg;
+	int err;
+
+	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+	if (!kg)
+		return -ENOMEM;
+
+	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
+	kg->gt = gt;
+
+	err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id);
+	if (err) {
+		kobject_put(&kg->base);
+		return err;
+	}
+
+	gt->sysfs = &kg->base;
+
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_sysfs_fini, gt);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
new file mode 100644
index 0000000000000..ecbfcc5c7d42c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_H_
+#define _XE_GT_SYSFS_H_
+
+#include "xe_gt_sysfs_types.h"
+
+int xe_gt_sysfs_init(struct xe_gt *gt);
+
+static inline struct xe_gt *
+kobj_to_gt(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+#endif /* _XE_GT_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
new file mode 100644
index 0000000000000..d3bc6b83360f4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_TYPES_H_
+#define _XE_GT_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+
+/**
+ * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT
+ *
+ * When dealing with multiple GTs, this struct helps to understand which GT
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_gt {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @gt: A pointer to the GT itself */
+	struct xe_gt *gt;
+};
+
+#endif	/* _XE_GT_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
new file mode 100644
index 0000000000000..8e02e362ba27d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_mmio.h"
+
+#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
+
+#define XELP_EU_ENABLE				0x9134	/* "_DISABLE" on Xe_LP */
+#define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		0x913c
+#define XEHP_GT_COMPUTE_DSS_ENABLE		0x9144
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		0x9148
+
+static void
+load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
+{
+	va_list argp;
+	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
+	int i;
+
+	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
+		numregs = XE_MAX_DSS_FUSE_REGS;
+
+	va_start(argp, numregs);
+	for (i = 0; i < numregs; i++)
+		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32));
+	va_end(argp);
+
+	bitmap_from_arr32(mask, fuse_val, numregs * 32);
+}
+
+static void
+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE);
+	u32 val = 0;
+	int i;
+
+	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
+
+	/*
+	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
+	 * of enable).
+	 */
+	if (GRAPHICS_VERx100(xe) < 1250)
+		reg = ~reg & XELP_EU_MASK;
+
+	/* On PVC, one bit = one EU */
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		val = reg;
+	} else {
+		/* All other platforms, one bit = 2 EU */
+		for (i = 0; i < fls(reg); i++)
+			if (reg & BIT(i))
+				val |= 0x3 << 2 * i;
+	}
+
+	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
+}
+
+void
+xe_gt_topology_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_debug_printer("GT topology");
+	int num_geometry_regs, num_compute_regs;
+
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		num_geometry_regs = 0;
+		num_compute_regs = 2;
+	} else if (GRAPHICS_VERx100(xe) >= 1250) {
+		num_geometry_regs = 1;
+		num_compute_regs = 1;
+	} else {
+		num_geometry_regs = 1;
+		num_compute_regs = 0;
+	}
+
+	load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs,
+		      XELP_GT_GEOMETRY_DSS_ENABLE);
+	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
+		      XEHP_GT_COMPUTE_DSS_ENABLE,
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
+	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+
+	xe_gt_topology_dump(gt, &p);
+}
+
+unsigned int
+xe_gt_topology_count_dss(xe_dss_mask_t mask)
+{
+	return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
+u64
+xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize)
+{
+	xe_dss_mask_t per_dss_mask = {};
+	u64 grpmask = 0;
+
+	WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask));
+
+	bitmap_fill(per_dss_mask, grpsize);
+	for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) {
+		if (bitmap_intersects(mask, per_dss_mask, grpsize))
+			grpmask |= BIT(i);
+
+		bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS);
+	}
+
+	return grpmask;
+}
+
+void
+xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.g_dss_mask);
+	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.c_dss_mask);
+
+	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
+		   gt->fuse_topo.eu_mask_per_dss);
+
+}
+
+/*
+ * Used to obtain the index of the first DSS.  Can start searching from the
+ * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
+ * groupsize and groupnum are non-zero.
+ */
+unsigned int
+xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum)
+{
+	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
new file mode 100644
index 0000000000000..7a0abc64084f2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_GT_TOPOLOGY_H__
+#define __XE_GT_TOPOLOGY_H__
+
+#include "xe_gt_types.h"
+
+struct drm_printer;
+
+void xe_gt_topology_init(struct xe_gt *gt);
+
+void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+
+unsigned int
+xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
+
+#endif /* __XE_GT_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
new file mode 100644
index 0000000000000..c80a9215098d9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_TYPES_H_
+#define _XE_GT_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_reg_sr_types.h"
+#include "xe_sa_types.h"
+#include "xe_uc_types.h"
+
+struct xe_engine_ops;
+struct xe_ggtt;
+struct xe_migrate;
+struct xe_ring_ops;
+struct xe_ttm_gtt_mgr;
+struct xe_ttm_vram_mgr;
+
+enum xe_gt_type {
+	XE_GT_TYPE_UNINITIALIZED,
+	XE_GT_TYPE_MAIN,
+	XE_GT_TYPE_REMOTE,
+	XE_GT_TYPE_MEDIA,
+};
+
+#define XE_MAX_DSS_FUSE_REGS	2
+#define XE_MAX_EU_FUSE_REGS	1
+
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+
+struct xe_mmio_range {
+	u32 start;
+	u32 end;
+};
+
+/*
+ * The hardware has multiple kinds of multicast register ranges that need
+ * special register steering (and future platforms are expected to add
+ * additional types).
+ *
+ * During driver startup, we initialize the steering control register to
+ * direct reads to a slice/subslice that are valid for the 'subslice' class
+ * of multicast registers.  If another type of steering does not have any
+ * overlap in valid steering targets with 'subslice' style registers, we will
+ * need to explicitly re-steer reads of registers of the other type.
+ *
+ * Only the replication types that may need additional non-default steering
+ * are listed here.
+ */
+enum xe_steering_type {
+	L3BANK,
+	MSLICE,
+	LNCF,
+	DSS,
+	OADDRM,
+
+	/*
+	 * On some platforms there are multiple types of MCR registers that
+	 * will always return a non-terminated value at instance (0, 0).  We'll
+	 * lump those all into a single category to keep things simple.
+	 */
+	INSTANCE0,
+
+	NUM_STEERING_TYPES
+};
+
+/**
+ * struct xe_gt - Top level struct of a graphics tile
+ *
+ * A graphics tile may be a physical split (duplicate pieces of silicon,
+ * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way
+ * this structure encapsulates of everything a GT is (MMIO, VRAM, memory
+ * management, microcontrols, and a hardware set of engines).
+ */
+struct xe_gt {
+	/** @xe: backpointer to XE device */
+	struct xe_device *xe;
+
+	/** @info: GT info */
+	struct {
+		/** @type: type of GT */
+		enum xe_gt_type type;
+		/** @id: id of GT */
+		u8 id;
+		/** @vram: id of the VRAM for this GT */
+		u8 vram_id;
+		/** @clock_freq: clock frequency */
+		u32 clock_freq;
+		/** @engine_mask: mask of engines present on GT */
+		u64 engine_mask;
+	} info;
+
+	/**
+	 * @mmio: mmio info for GT, can be subset of the global device mmio
+	 * space
+	 */
+	struct {
+		/** @size: size of MMIO space on GT */
+		size_t size;
+		/** @regs: pointer to MMIO space on GT */
+		void *regs;
+		/** @fw: force wake for GT */
+		struct xe_force_wake fw;
+		/**
+		 * @adj_limit: adjust MMIO address if address is below this
+		 * value
+		 */
+		u32 adj_limit;
+		/** @adj_offset: offect to add to MMIO address when adjusting */
+		u32 adj_offset;
+	} mmio;
+
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+
+	/**
+	 * @mem: memory management info for GT, multiple GTs can point to same
+	 * objects (virtual split)
+	 */
+	struct {
+		/**
+		 * @vram: VRAM info for GT, multiple GTs can point to same info
+		 * (virtual split), can be subset of global device VRAM
+		 */
+		struct {
+			/** @io_start: start address of VRAM */
+			resource_size_t io_start;
+			/** @size: size of VRAM */
+			resource_size_t size;
+			/** @mapping: pointer to VRAM mappable space */
+			void *__iomem mapping;
+		} vram;
+		/** @vram_mgr: VRAM TTM manager */
+		struct xe_ttm_vram_mgr *vram_mgr;
+		/** @gtt_mr: GTT TTM manager */
+		struct xe_ttm_gtt_mgr *gtt_mgr;
+		/** @ggtt: Global graphics translation table */
+		struct xe_ggtt *ggtt;
+	} mem;
+
+	/** @reset: state for GT resets */
+	struct {
+		/**
+		 * @worker: work so GT resets can done async allowing to reset
+		 * code to safely flush all code paths
+		 */
+		struct work_struct worker;
+	} reset;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/**
+		 * @bb_pool: Pool from which batchbuffers, for USM operations
+		 * (e.g. migrations, fixing page tables), are allocated.
+		 * Dedicated pool needed so USM operations to not get blocked
+		 * behind any user operations which may have resulted in a
+		 * fault.
+		 */
+		struct xe_sa_manager bb_pool;
+		/**
+		 * @reserved_bcs_instance: reserved BCS instance used for USM
+		 * operations (e.g. mmigrations, fixing page tables)
+		 */
+		u16 reserved_bcs_instance;
+		/**
+		 * @tlb_invalidation_seqno: TLB invalidation seqno, protected by
+		 * CT lock
+		 */
+#define TLB_INVALIDATION_SEQNO_MAX	0x100000
+		int tlb_invalidation_seqno;
+		/**
+		 * @tlb_invalidation_seqno_recv: last received TLB invalidation
+		 * seqno, protected by CT lock
+		 */
+		int tlb_invalidation_seqno_recv;
+		/** @pf_wq: page fault work queue, unbound, high priority */
+		struct workqueue_struct *pf_wq;
+		/** @acc_wq: access counter work queue, unbound, high priority */
+		struct workqueue_struct *acc_wq;
+		/**
+		 * @pf_queue: Page fault queue used to sync faults so faults can
+		 * be processed not under the GuC CT lock. The queue is sized so
+		 * it can sync all possible faults (1 per physical engine).
+		 * Multiple queues exists for page faults from different VMs are
+		 * be processed in parallel.
+		 */
+		struct pf_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define PF_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[PF_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process page faults */
+			struct work_struct worker;
+#define NUM_PF_QUEUE	4
+		} pf_queue[NUM_PF_QUEUE];
+		/**
+		 * @acc_queue: Same as page fault queue, cannot process access
+		 * counters under CT lock.
+		 */
+		struct acc_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define ACC_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[ACC_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process access counters */
+			struct work_struct worker;
+#define NUM_ACC_QUEUE	4
+		} acc_queue[NUM_ACC_QUEUE];
+	} usm;
+
+	/** @ordered_wq: used to serialize GT resets and TDRs */
+	struct workqueue_struct *ordered_wq;
+
+	/** @uc: micro controllers on the GT */
+	struct xe_uc uc;
+
+	/** @engine_ops: submission backend engine operations */
+	const struct xe_engine_ops *engine_ops;
+
+	/**
+	 * @ring_ops: ring operations for this hw engine (1 per engine class)
+	 */
+	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
+
+	/** @fence_irq: fence IRQs (1 per engine class) */
+	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
+
+	/** @default_lrc: default LRC state */
+	void *default_lrc[XE_ENGINE_CLASS_MAX];
+
+	/** @hw_engines: hardware engines on the GT */
+	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
+
+	/** @kernel_bb_pool: Pool from which batchbuffers are allocated */
+	struct xe_sa_manager kernel_bb_pool;
+
+	/** @migrate: Migration helper for vram blits and clearing */
+	struct xe_migrate *migrate;
+
+	/** @pcode: GT's PCODE */
+	struct {
+		/** @lock: protecting GT's PCODE mailbox data */
+		struct mutex lock;
+	} pcode;
+
+	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
+	struct kobject *sysfs;
+
+	/** @mocs: info */
+	struct {
+		/** @uc_index: UC index */
+		u8 uc_index;
+		/** @wb_index: WB index, only used on L3_CCS platforms */
+		u8 wb_index;
+	} mocs;
+
+	/** @fuse_topo: GT topology reported by fuse registers */
+	struct {
+		/** @g_dss_mask: dual-subslices usable by geometry */
+		xe_dss_mask_t g_dss_mask;
+
+		/** @c_dss_mask: dual-subslices usable by compute */
+		xe_dss_mask_t c_dss_mask;
+
+		/** @eu_mask_per_dss: EU mask per DSS*/
+		xe_eu_mask_t eu_mask_per_dss;
+	} fuse_topo;
+
+	/** @steering: register steering for individual HW units */
+	struct {
+		/* @ranges: register ranges used for this steering type */
+		const struct xe_mmio_range *ranges;
+
+		/** @group_target: target to steer accesses to */
+		u16 group_target;
+		/** @instance_target: instance to steer accesses to */
+		u16 instance_target;
+	} steering[NUM_STEERING_TYPES];
+
+	/**
+	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
+	 *    of a steered operation
+	 */
+	spinlock_t mcr_lock;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
new file mode 100644
index 0000000000000..3c285d849ef65
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_guc_log.h"
+#include "xe_guc_reg.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_gt.h"
+#include "xe_platform_types.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+#include "xe_mmio.h"
+#include "xe_force_wake.h"
+#include "i915_reg_defs.h"
+#include "gt/intel_gt_regs.h"
+
+/* TODO: move to common file */
+#define GUC_PVC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
+#define PVC_MOCS_UC_INDEX		1
+#define PVC_GUC_MOCS_INDEX(index)	REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\
+						       index)
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP    0xFEE00000
+static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
+			    struct xe_bo *bo)
+{
+	u32 addr = xe_bo_ggtt_addr(bo);
+
+	XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
+	XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP));
+
+	return addr;
+}
+
+static u32 guc_ctl_debug_flags(struct xe_guc *guc)
+{
+	u32 level = xe_guc_log_get_level(&guc->log);
+	u32 flags = 0;
+
+	if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+		flags |= GUC_LOG_DISABLED;
+	else
+		flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+			 GUC_LOG_VERBOSITY_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_feature_flags(struct xe_guc *guc)
+{
+	return GUC_CTL_ENABLE_SLPC;
+}
+
+static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
+{
+	u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
+	u32 flags;
+
+	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+	#define LOG_UNIT SZ_1M
+	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
+	#else
+	#define LOG_UNIT SZ_4K
+	#define LOG_FLAG 0
+	#endif
+
+	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+	#define CAPTURE_UNIT SZ_1M
+	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+	#else
+	#define CAPTURE_UNIT SZ_4K
+	#define CAPTURE_FLAG 0
+	#endif
+
+	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
+
+	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
+	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
+
+	flags = GUC_LOG_VALID |
+		GUC_LOG_NOTIFY_ON_HALF_FULL |
+		CAPTURE_FLAG |
+		LOG_FLAG |
+		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
+		 GUC_LOG_CAPTURE_SHIFT) |
+		(offset << GUC_LOG_BUF_ADDR_SHIFT);
+
+	#undef LOG_UNIT
+	#undef LOG_FLAG
+	#undef CAPTURE_UNIT
+	#undef CAPTURE_FLAG
+
+	return flags;
+}
+
+static u32 guc_ctl_ads_flags(struct xe_guc *guc)
+{
+	u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
+	u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_wa_flags(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 flags = 0;
+
+	/* Wa_22012773006:gen11,gen12 < XeHP */
+	if (GRAPHICS_VER(xe) >= 11 &&
+	    GRAPHICS_VERx100(xe) < 1250)
+		flags |= GUC_WA_POLLCS;
+
+	/* Wa_16011759253 */
+	/* Wa_22011383443 */
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) ||
+	    IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+		flags |= GUC_WA_GAM_CREDITS;
+
+	/* Wa_14014475959 */
+	if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) ||
+	    xe->info.platform == XE_DG2)
+		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+	/*
+	 * Wa_14012197797
+	 * Wa_22011391025
+	 *
+	 * The same WA bit is used for both and 22011391025 is applicable to
+	 * all DG2.
+	 */
+	if (xe->info.platform == XE_DG2)
+		flags |= GUC_WA_DUAL_QUEUE;
+
+	/*
+	 * Wa_2201180203
+	 * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe
+	 * errors. Disable this for PVC A0 steppings.
+	 */
+	if (GRAPHICS_VER(xe) <= 12 &&
+	    !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+		flags |= GUC_WA_PRE_PARSER;
+
+	/* Wa_16011777198 */
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
+				STEP_B0))
+		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+
+	/*
+	 * Wa_22012727170
+	 * Wa_22012727685
+	 *
+	 * This WA is applicable to PVC CT A0, but causes media regressions. 
+	 * Drop the WA for PVC.
+	 */
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
+				STEP_FOREVER))
+		flags |= GUC_WA_CONTEXT_ISOLATION;
+
+	/* Wa_16015675438, Wa_18020744125 */
+	if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
+		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
+	/* Wa_1509372804 */
+	if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0))
+		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
+
+
+	return flags;
+}
+
+static u32 guc_ctl_devid(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	return (((u32)xe->info.devid) << 16) | xe->info.revid;
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2);
+
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+/*
+ * Initialise the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+void guc_write_params(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int i;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]);
+}
+
+#define MEDIA_GUC_HOST_INTERRUPT        _MMIO(0x190304)
+
+int xe_guc_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int ret;
+
+	guc->fw.type = XE_UC_FW_TYPE_GUC;
+	ret = xe_uc_fw_init(&guc->fw);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_log_init(&guc->log);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ads_init(&guc->ads);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ct_init(&guc->ct);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_pc_init(&guc->pc);
+	if (ret)
+		goto out;
+
+	guc_init_params(guc);
+
+	if (xe_gt_is_media_type(gt))
+		guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg;
+	else
+		guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	drm_err(&xe->drm, "GuC init failed with %d", ret);
+	return ret;
+}
+
+/**
+ * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
+ * @guc: The GuC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+	return xe_guc_ads_init_post_hwconfig(&guc->ads);
+}
+
+int xe_guc_post_load_init(struct xe_guc *guc)
+{
+	xe_guc_ads_populate_post_load(&guc->ads);
+
+	return 0;
+}
+
+int xe_guc_reset(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 guc_status;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
+
+	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5);
+	if (ret) {
+		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
+			xe_mmio_read32(gt, GEN6_GDRST.reg));
+		goto err_out;
+	}
+
+	guc_status = xe_mmio_read32(gt, GUC_STATUS.reg);
+	if (!(guc_status & GS_MIA_IN_RESET)) {
+		drm_err(&xe->drm,
+			"GuC status: 0x%x, MIA core expected to be in reset\n",
+			guc_status);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+
+	return ret;
+}
+
+static void guc_prepare_xfer(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe =  guc_to_xe(guc);
+	u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
+		GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
+		GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
+		GUC_ENABLE_MIA_CLOCK_GATING;
+
+	if (GRAPHICS_VERx100(xe) < 1250)
+		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+				GUC_ENABLE_MIA_CACHING;
+
+	if (xe->info.platform == XE_PVC)
+		shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX);
+
+	/* Must program this register before loading the ucode with DMA */
+	xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags);
+
+	xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE);
+}
+
+/*
+ * Supporting MMIO & in memory RSA
+ */
+static int guc_xfer_rsa(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 rsa[UOS_RSA_SCRATCH_COUNT];
+	size_t copied;
+	int i;
+
+	if (guc->fw.rsa_size > 256) {
+		u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
+				    xe_uc_fw_rsa_offset(&guc->fw);
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr);
+		return 0;
+	}
+
+	copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa));
+	if (copied < sizeof(rsa))
+		return -ENOMEM;
+
+	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]);
+
+	return 0;
+}
+
+/*
+ * Read the GuC status register (GUC_STATUS) and store it in the
+ * specified location; then return a boolean indicating whether
+ * the value matches either of two values representing completion
+ * of the GuC boot process.
+ *
+ * This is used for polling the GuC status in a wait_for()
+ * loop below.
+ */
+static bool guc_ready(struct xe_guc *guc, u32 *status)
+{
+	u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg);
+	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+
+	*status = val;
+	return uk_val == XE_GUC_LOAD_STATUS_READY;
+}
+
+static int guc_wait_ucode(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 status;
+	int ret;
+
+	/*
+	 * Wait for the GuC to start up.
+	 * NB: Docs recommend not using the interrupt for completion.
+	 * Measurements indicate this should take no more than 20ms
+	 * (assuming the GT clock is at maximum frequency). So, a
+	 * timeout here indicates that the GuC has failed and is unusable.
+	 * (Higher levels of the driver may decide to reset the GuC and
+	 * attempt the ucode load again if this happens.)
+	 *
+	 * FIXME: There is a known (but exceedingly unlikely) race condition
+	 * where the asynchronous frequency management code could reduce
+	 * the GT clock while a GuC reload is in progress (during a full
+	 * GT reset). A fix is in progress but there are complex locking
+	 * issues to be resolved. In the meantime bump the timeout to
+	 * 200ms. Even at slowest clock, this should be sufficient. And
+	 * in the working case, a larger timeout makes no difference.
+	 */
+	ret = wait_for(guc_ready(guc, &status), 200);
+	if (ret) {
+		struct drm_device *drm = &xe->drm;
+		struct drm_printer p = drm_info_printer(drm->dev);
+
+		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+		drm_info(drm, "GuC load failed: status: Reset = %d, "
+			"BootROM = 0x%02X, UKernel = 0x%02X, "
+			"MIA = 0x%02X, Auth = 0x%02X\n",
+			REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			REG_FIELD_GET(GS_MIA_MASK, status),
+			REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+			drm_info(drm, "GuC firmware signature verification failed\n");
+			ret = -ENOEXEC;
+		}
+
+		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
+		    XE_GUC_LOAD_STATUS_EXCEPTION) {
+			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+				 xe_mmio_read32(guc_to_gt(guc),
+						SOFT_SCRATCH(13).reg));
+			ret = -ENXIO;
+		}
+
+		xe_guc_log_print(&guc->log, &p);
+	} else {
+		drm_dbg(&xe->drm, "GuC successfully loaded");
+	}
+
+	return ret;
+}
+
+static int __xe_guc_upload(struct xe_guc *guc)
+{
+	int ret;
+
+	guc_write_params(guc);
+	guc_prepare_xfer(guc);
+
+	/*
+	 * Note that GuC needs the CSS header plus uKernel code to be copied
+	 * by the DMA engine in one operation, whereas the RSA signature is
+	 * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+	 * register (if key size <= 256) or through a ggtt-pinned vma (if key
+	 * size > 256). The RSA size and therefore the way we provide it to the
+	 * HW is fixed for each platform and hard-coded in the bootrom.
+	 */
+	ret = guc_xfer_rsa(guc);
+	if (ret)
+		goto out;
+	/*
+	 * Current uCode expects the code to be loaded at 8k; locations below
+	 * this are used for the stack.
+	 */
+	ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
+	if (ret)
+		goto out;
+
+	/* Wait for authentication */
+	ret = guc_wait_ucode(guc);
+	if (ret)
+		goto out;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
+	return 0;
+
+out:
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return 0	/* FIXME: ret, don't want to stop load currently */;
+}
+
+/**
+ * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
+ * @guc: The GuC object
+ *
+ * This function uploads a minimal GuC that does not support submissions but
+ * in a state where the hwconfig table can be read. Next, it reads and parses
+ * the hwconfig table so it can be used for subsequent steps in the driver load.
+ * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only).
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ads_populate_minimal(&guc->ads);
+
+	ret = __xe_guc_upload(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_hwconfig_init(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_upload(struct xe_guc *guc)
+{
+	xe_guc_ads_populate(&guc->ads);
+
+	return __xe_guc_upload(guc);
+}
+
+static void guc_handle_mmio_msg(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 msg;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg);
+	msg &= XE_GUC_RECV_MSG_EXCEPTION |
+		XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
+	xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0);
+
+	if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC crash dump notification!\n");
+
+	if (msg & XE_GUC_RECV_MSG_EXCEPTION)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC exception notification!\n");
+}
+
+void guc_enable_irq(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 events = xe_gt_is_media_type(gt) ?
+		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
+		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,
+			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
+	if (xe_gt_is_media_type(gt))
+		xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0);
+	else
+		xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events);
+}
+
+int xe_guc_enable_communication(struct xe_guc *guc)
+{
+	int err;
+
+	guc_enable_irq(guc);
+
+	xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg,
+		      ARAT_EXPIRED_INTRMSK, 0);
+
+	err = xe_guc_ct_enable(&guc->ct);
+	if (err)
+		return err;
+
+	guc_handle_mmio_msg(guc);
+
+	return 0;
+}
+
+int xe_guc_suspend(struct xe_guc *guc)
+{
+	int ret;
+	u32 action[] = {
+		XE_GUC_ACTION_CLIENT_SOFT_RESET,
+	};
+
+	ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (ret) {
+		drm_err(&guc_to_xe(guc)->drm,
+			"GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+		return ret;
+	}
+
+	xe_guc_sanitize(guc);
+	return 0;
+}
+
+void xe_guc_notify(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+
+	xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER);
+}
+
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_AUTHENTICATE_HUC,
+		rsa_addr
+	};
+
+	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+#define MEDIA_SOFT_SCRATCH(n)           _MMIO(0x190310 + (n) * 4)
+#define MEDIA_SOFT_SCRATCH_COUNT        4
+
+int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 header;
+	u32 reply_reg = xe_gt_is_media_type(gt) ?
+		MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg;
+	int ret;
+	int i;
+
+	XE_BUG_ON(guc->ct.enabled);
+	XE_BUG_ON(!len);
+	XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT);
+	XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT);
+	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
+		  GUC_HXG_ORIGIN_HOST);
+	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
+		  GUC_HXG_TYPE_REQUEST);
+
+retry:
+	/* Not in critical data-path, just do if else for GT type */
+	if (xe_gt_is_media_type(gt)) {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg,
+					request[i]);
+#define LAST_INDEX	MEDIA_SOFT_SCRATCH_COUNT - 1
+		xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg);
+	} else {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg,
+					request[i]);
+#undef LAST_INDEX
+#define LAST_INDEX	GEN11_SOFT_SCRATCH_COUNT - 1
+		xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg);
+	}
+
+	xe_guc_notify(guc);
+
+	ret = xe_mmio_wait32(gt, reply_reg,
+			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
+					GUC_HXG_ORIGIN_GUC),
+			     GUC_HXG_MSG_0_ORIGIN,
+			     50);
+	if (ret) {
+timeout:
+		drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
+			request[0], xe_mmio_read32(gt, reply_reg));
+		return ret;
+	}
+
+	header = xe_mmio_read32(gt, reply_reg);
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
+#define done ({ header = xe_mmio_read32(gt, reply_reg); \
+		FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \
+		GUC_HXG_ORIGIN_GUC || \
+		FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \
+		GUC_HXG_TYPE_NO_RESPONSE_BUSY; })
+
+		ret = wait_for(done, 1000);
+		if (unlikely(ret))
+			goto timeout;
+		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
+				       GUC_HXG_ORIGIN_GUC))
+			goto proto;
+#undef done
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
+
+		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n",
+			request[0], reason);
+		goto retry;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
+		u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
+
+		drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n",
+			request[0], error, hint);
+		return -ENXIO;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+	    GUC_HXG_TYPE_RESPONSE_SUCCESS) {
+proto:
+		drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
+			request[0], header);
+		return -EPROTO;
+	}
+
+	/* Use data from the GuC response as our return value */
+	return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header);
+}
+
+static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
+{
+	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_SELF_CFG),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32,
+			   lower_32_bits(val)),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64,
+			   upper_32_bits(val)),
+	};
+	int ret;
+
+	XE_BUG_ON(len > 2);
+	XE_BUG_ON(len == 1 && upper_32_bits(val));
+
+	/* Self config must go over MMIO */
+	ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request));
+
+	if (unlikely(ret < 0))
+		return ret;
+	if (unlikely(ret > 1))
+		return -EPROTO;
+	if (unlikely(!ret))
+		return -ENOKEY;
+
+	return 0;
+}
+
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val)
+{
+	return guc_self_cfg(guc, key, 1, val);
+}
+
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
+{
+	return guc_self_cfg(guc, key, 2, val);
+}
+
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
+{
+	if (iir & GUC_INTR_GUC2HOST)
+		xe_guc_ct_irq_handler(&guc->ct);
+}
+
+void xe_guc_sanitize(struct xe_guc *guc)
+{
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+	xe_guc_ct_disable(&guc->ct);
+}
+
+int xe_guc_reset_prepare(struct xe_guc *guc)
+{
+	return xe_guc_submit_reset_prepare(guc);
+}
+
+void xe_guc_reset_wait(struct xe_guc *guc)
+{
+	xe_guc_submit_reset_wait(guc);
+}
+
+void xe_guc_stop_prepare(struct xe_guc *guc)
+{
+	XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
+}
+
+int xe_guc_stop(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ct_disable(&guc->ct);
+
+	ret = xe_guc_submit_stop(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_start(struct xe_guc *guc)
+{
+	int ret;
+
+	ret = xe_guc_submit_start(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_start(&guc->pc);
+	XE_WARN_ON(ret);
+
+	return 0;
+}
+
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+	int i;
+
+	xe_uc_fw_print(&guc->fw, p);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	status = xe_mmio_read32(gt, GUC_STATUS.reg);
+
+	drm_printf(p, "\nGuC status 0x%08x:\n", status);
+	drm_printf(p, "\tBootrom status = 0x%x\n",
+		   (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+	drm_printf(p, "\tuKernel status = 0x%x\n",
+		   (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+	drm_printf(p, "\tMIA Core status = 0x%x\n",
+		   (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+	drm_printf(p, "\tLog level = %d\n",
+		   xe_guc_log_get_level(&guc->log));
+
+	drm_puts(p, "\nScratch registers:\n");
+	for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
+		drm_printf(p, "\t%2d: \t0x%x\n",
+			   i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg));
+	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	xe_guc_ct_print(&guc->ct, p);
+	xe_guc_submit_print(guc, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
new file mode 100644
index 0000000000000..72b71d75566cb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_H_
+#define _XE_GUC_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_guc_types.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_guc_init(struct xe_guc *guc);
+int xe_guc_init_post_hwconfig(struct xe_guc *guc);
+int xe_guc_post_load_init(struct xe_guc *guc);
+int xe_guc_reset(struct xe_guc *guc);
+int xe_guc_upload(struct xe_guc *guc);
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
+int xe_guc_enable_communication(struct xe_guc *guc);
+int xe_guc_suspend(struct xe_guc *guc);
+void xe_guc_notify(struct xe_guc *guc);
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
+int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
+void xe_guc_sanitize(struct xe_guc *guc);
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
+int xe_guc_reset_prepare(struct xe_guc *guc);
+void xe_guc_reset_wait(struct xe_guc *guc);
+void xe_guc_stop_prepare(struct xe_guc *guc);
+int xe_guc_stop(struct xe_guc *guc);
+int xe_guc_start(struct xe_guc *guc);
+
+static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return GUC_RENDER_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		return GUC_VIDEO_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return GUC_VIDEOENHANCE_CLASS;
+	case XE_ENGINE_CLASS_COPY:
+		return GUC_BLITTER_CLASS;
+	case XE_ENGINE_CLASS_COMPUTE:
+		return GUC_COMPUTE_CLASS;
+	case XE_ENGINE_CLASS_OTHER:
+	default:
+		XE_WARN_ON(class);
+		return -1;
+	}
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
new file mode 100644
index 0000000000000..0c08cecaca401
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_reg.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_engine_regs.h"
+
+/* Slack of a few additional entries per engine */
+#define ADS_REGSET_EXTRA_MAX	8
+
+static struct xe_guc *
+ads_to_guc(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_guc, ads);
+}
+
+static struct xe_gt *
+ads_to_gt(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_gt, uc.guc.ads);
+}
+
+static struct xe_device *
+ads_to_xe(struct xe_guc_ads *ads)
+{
+	return gt_to_xe(ads_to_gt(ads));
+}
+
+static struct iosys_map *
+ads_to_map(struct xe_guc_ads *ads)
+{
+	return &ads->bo->vmap;
+}
+
+/* UM Queue parameters: */
+#define GUC_UM_QUEUE_SIZE       (SZ_64K)
+#define GUC_PAGE_RES_TIMEOUT_US (-1)
+
+/*
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ *      +---------------------------------------+ <== base
+ *      | guc_ads                               |
+ *      +---------------------------------------+
+ *      | guc_policies                          |
+ *      +---------------------------------------+
+ *      | guc_gt_system_info                    |
+ *      +---------------------------------------+
+ *      | guc_engine_usage                      |
+ *      +---------------------------------------+
+ *      | guc_um_init_params                    |
+ *      +---------------------------------------+ <== static
+ *      | guc_mmio_reg[countA] (engine 0.0)     |
+ *      | guc_mmio_reg[countB] (engine 0.1)     |
+ *      | guc_mmio_reg[countC] (engine 1.0)     |
+ *      |   ...                                 |
+ *      +---------------------------------------+ <== dynamic
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | golden contexts                       |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | capture lists                         |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | UM queues                             |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | private data                          |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ */
+struct __guc_ads_blob {
+	struct guc_ads ads;
+	struct guc_policies policies;
+	struct guc_gt_system_info system_info;
+	struct guc_engine_usage engine_usage;
+	struct guc_um_init_params um_init_params;
+	/* From here on, location is dynamic! Refer to above diagram. */
+	struct guc_mmio_reg regset[0];
+} __packed;
+
+#define ads_blob_read(ads_, field_) \
+	xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
+			struct __guc_ads_blob, field_)
+
+#define ads_blob_write(ads_, field_, val_)			\
+	xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0,	\
+			struct __guc_ads_blob, field_, val_)
+
+#define info_map_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
+
+#define info_map_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
+
+static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
+{
+	XE_BUG_ON(!ads->regset_size);
+
+	return ads->regset_size;
+}
+
+static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads->golden_lrc_size);
+}
+
+static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
+{
+	/* FIXME: Allocate a proper capture list */
+	return PAGE_ALIGN(PAGE_SIZE);
+}
+
+static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+
+	if (!xe->info.supports_usm)
+		return 0;
+
+	return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
+}
+
+static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
+}
+
+static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
+{
+	return offsetof(struct __guc_ads_blob, regset);
+}
+
+static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_regset_offset(ads) +
+		guc_ads_regset_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_golden_lrc_offset(ads) +
+		guc_ads_golden_lrc_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
+{
+	u32 offset;
+
+	offset = guc_ads_capture_offset(ads) +
+		 guc_ads_capture_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_um_queues_offset(ads) +
+		guc_ads_um_queues_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_size(struct xe_guc_ads *ads)
+{
+	return guc_ads_private_data_offset(ads) +
+		guc_ads_private_data_size(ads);
+}
+
+static void guc_ads_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_ads *ads = arg;
+
+	xe_bo_unpin_map_no_vm(ads->bo);
+}
+
+static size_t calculate_regset_size(struct xe_gt *gt)
+{
+	struct xe_reg_sr_entry *sr_entry;
+	unsigned long sr_idx;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	unsigned int count = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
+			count++;
+
+	count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES;
+
+	return count * sizeof(struct guc_mmio_reg);
+}
+
+static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 mask = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class)
+			mask |= BIT(hwe->instance);
+
+	return mask;
+}
+
+static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	size_t total_size = 0, alloc_size, real_size;
+	int class;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		if (class == XE_ENGINE_CLASS_OTHER)
+			continue;
+
+		if (!engine_enable_mask(gt, class))
+			continue;
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+	}
+
+	return total_size;
+}
+
+#define MAX_GOLDEN_LRC_SIZE	(SZ_4K * 64)
+
+int xe_guc_ads_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_bo *bo;
+	int err;
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) +
+				  MAX_GOLDEN_LRC_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ads->bo = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
+ * @ads: Additional data structures object
+ *
+ * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
+ * have changed after the hwconfig was loaded. Also verify the new sizes fit in
+ * the already allocated ADS buffer object.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	u32 prev_regset_size = ads->regset_size;
+
+	XE_BUG_ON(!ads->bo);
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	XE_WARN_ON(ads->golden_lrc_size +
+		   (ads->regset_size - prev_regset_size) >
+		   MAX_GOLDEN_LRC_SIZE);
+
+	return 0;
+}
+
+static void guc_policies_init(struct xe_guc_ads *ads)
+{
+	ads_blob_write(ads, policies.dpc_promote_time,
+		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
+	ads_blob_write(ads, policies.max_num_work_items,
+		       GLOBAL_POLICY_MAX_NUM_WI);
+	ads_blob_write(ads, policies.global_flags, 0);
+	ads_blob_write(ads, policies.is_valid, 1);
+}
+
+static void fill_engine_enable_masks(struct xe_gt *gt,
+				     struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
+	info_map_write(xe, info_map,
+		       engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
+}
+
+static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u8 guc_class;
+
+	for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       guc_ads_golden_lrc_size(ads) -
+			       xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       xe_bo_ggtt_addr(ads->bo) +
+			       guc_ads_golden_lrc_offset(ads));
+	}
+}
+
+static void guc_mapping_table_init_invalid(struct xe_gt *gt,
+					   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int i, j;
+
+	/* Table must be set to invalid values for entries not used */
+	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+		for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+			info_map_write(xe, info_map, mapping_table[i][j],
+				       GUC_MAX_INSTANCES_PER_CLASS);
+}
+
+static void guc_mapping_table_init(struct xe_gt *gt,
+				   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	guc_mapping_table_init_invalid(gt, info_map);
+
+	for_each_hw_engine(hwe, gt, id) {
+		u8 guc_class;
+
+		guc_class = xe_engine_class_to_guc_class(hwe->class);
+		info_map_write(xe, info_map,
+			       mapping_table[guc_class][hwe->logical_instance],
+			       hwe->instance);
+	}
+}
+
+static void guc_capture_list_init(struct xe_guc_ads *ads)
+{
+	int i, j;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
+
+	/* FIXME: Populate a proper capture list */
+	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+			ads_blob_write(ads, ads.capture_instance[i][j], addr);
+			ads_blob_write(ads, ads.capture_class[i][j], addr);
+		}
+
+		ads_blob_write(ads, ads.capture_global[i], addr);
+	}
+}
+
+static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
+				      struct iosys_map *regset_map,
+				      u32 reg, u32 flags,
+				      unsigned int n_entry)
+{
+	struct guc_mmio_reg entry = {
+		.offset = reg,
+		.flags = flags,
+		/* TODO: steering */
+	};
+
+	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
+			 &entry, sizeof(entry));
+}
+
+static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
+					  struct iosys_map *regset_map,
+					  struct xe_hw_engine *hwe)
+{
+	struct xe_hw_engine *hwe_rcs_reset_domain =
+		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+	unsigned count = 0;
+	const struct {
+		u32 reg;
+		u32 flags;
+		bool skip;
+	} *e, extra_regs[] = {
+		{ .reg = RING_MODE_GEN7(hwe->mmio_base).reg,		},
+		{ .reg = RING_HWS_PGA(hwe->mmio_base).reg,		},
+		{ .reg = RING_IMR(hwe->mmio_base).reg,			},
+		{ .reg = GEN12_RCU_MODE.reg, .flags = 0x3,
+		  .skip = hwe != hwe_rcs_reset_domain			},
+	};
+	u32 i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
+
+	xa_for_each(&hwe->reg_sr.xa, idx, entry) {
+		u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0;
+
+		guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++);
+	}
+
+	for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
+		if (e->skip)
+			continue;
+
+		guc_mmio_regset_write_one(ads, regset_map,
+					  e->reg, e->flags, count++);
+	}
+
+	for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
+		guc_mmio_regset_write_one(ads, regset_map,
+					  GEN9_LNCFCMOCS(i).reg, 0, count++);
+	}
+
+	XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg)));
+
+	return count;
+}
+
+static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
+{
+	size_t regset_offset = guc_ads_regset_offset(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
+	struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+							    regset_offset);
+
+	for_each_hw_engine(hwe, gt, id) {
+		unsigned int count;
+		u8 gc;
+
+		/*
+		 * 1. Write all MMIO entries for this engine to the table. No
+		 * need to worry about fused-off engines and when there are
+		 * entries in the regset: the reg_state_list has been zero'ed
+		 * by xe_guc_ads_populate()
+		 */
+		count = guc_mmio_regset_write(ads, &regset_map, hwe);
+		if (!count)
+			continue;
+
+		/*
+		 * 2. Record in the header (ads.reg_state_list) the address
+		 * location and number of entries
+		 */
+		gc = xe_engine_class_to_guc_class(hwe->class);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
+
+		addr += count * sizeof(struct guc_mmio_reg);
+		iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
+	}
+}
+
+static void guc_um_init_params(struct xe_guc_ads *ads)
+{
+	u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+	u64 base_dpa;
+	u32 base_ggtt;
+	int i;
+
+	base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
+	base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
+
+	for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
+		ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
+			       base_dpa + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
+			       base_ggtt + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
+			       GUC_UM_QUEUE_SIZE);
+	}
+
+	ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
+		       GUC_PAGE_RES_TIMEOUT_US);
+}
+
+static void guc_doorbell_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+
+	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
+		u32 distdbreg =
+			xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg);
+
+		ads_blob_write(ads,
+			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
+			       ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
+				& GEN12_DOORBELLS_PER_SQIDI) + 1);
+	}
+}
+
+/**
+ * xe_guc_ads_populate_minimal - populate minimal ADS
+ * @ads: Additional data structures object
+ *
+ * This function populates a minimal ADS that does not support submissions but
+ * enough so the GuC can load and the hwconfig table can be read.
+ */
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	XE_BUG_ON(!ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init_invalid(gt, &info_map);
+	guc_doorbell_init(ads);
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+void xe_guc_ads_populate(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	XE_BUG_ON(!ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	fill_engine_enable_masks(gt, &info_map);
+	guc_mmio_reg_state_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init(gt, &info_map);
+	guc_capture_list_init(ads);
+	guc_doorbell_init(ads);
+
+	if (xe->info.supports_usm) {
+		guc_um_init_params(ads);
+		ads_blob_write(ads, ads.um_init_data, base +
+			       offsetof(struct __guc_ads_blob, um_init_params));
+	}
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	size_t total_size = 0, alloc_size, real_size;
+	u32 addr_ggtt, offset;
+	int class;
+
+	offset = guc_ads_golden_lrc_offset(ads);
+	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		u8 guc_class;
+
+		if (class == XE_ENGINE_CLASS_OTHER)
+			continue;
+
+		guc_class = xe_engine_class_to_guc_class(class);
+
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		XE_BUG_ON(!gt->default_lrc[class]);
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+
+		/*
+		 * This interface is slightly confusing. We need to pass the
+		 * base address of the full golden context and the size of just
+		 * the engine state, which is the section of the context image
+		 * that starts after the execlists LRC registers. This is
+		 * required to allow the GuC to restore just the engine state
+		 * when a watchdog reset occurs.
+		 * We calculate the engine state size by removing the size of
+		 * what comes before it in the context image (which is identical
+		 * on all engines).
+		 */
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       real_size - xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       addr_ggtt);
+
+		xe_map_memcpy_to(xe, ads_to_map(ads), offset,
+				 gt->default_lrc[class], real_size);
+
+		addr_ggtt += alloc_size;
+		offset += alloc_size;
+	}
+
+	XE_BUG_ON(total_size != ads->golden_lrc_size);
+}
+
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
+{
+	guc_populate_golden_lrc(ads);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
new file mode 100644
index 0000000000000..138ef62676711
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_H_
+#define _XE_GUC_ADS_H_
+
+#include "xe_guc_ads_types.h"
+
+int xe_guc_ads_init(struct xe_guc_ads *ads);
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
+void xe_guc_ads_populate(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
new file mode 100644
index 0000000000000..4afe44bece4bc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_TYPES_H_
+#define _XE_GUC_ADS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_ads - GuC additional data structures (ADS)
+ */
+struct xe_guc_ads {
+	/** @bo: XE BO for GuC ads blob */
+	struct xe_bo *bo;
+	/** @golden_lrc_size: golden LRC size */
+	size_t golden_lrc_size;
+	/** @regset_size: size of register set passed to GuC for save/restore */
+	u32 regset_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
new file mode 100644
index 0000000000000..61a424c41779d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -0,0 +1,1196 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_pagefault.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+	wait_queue_head_t wq;
+	u32 *response_buffer;
+	u32 seqno;
+	u16 response_len;
+	u16 error;
+	u16 hint;
+	u16 reason;
+	bool retry;
+	bool fail;
+	bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+	g2h_fence->response_buffer = response_buffer;
+	g2h_fence->response_len = 0;
+	g2h_fence->fail = false;
+	g2h_fence->retry = false;
+	g2h_fence->done = false;
+	g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+	return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+	return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ *      +--------+-----------------------------------------------+------+
+ *      | offset | contents                                      | size |
+ *      +========+===============================================+======+
+ *      | 0x0000 | H2G CTB Descriptor (send)                     |      |
+ *      +--------+-----------------------------------------------+  4K  |
+ *      | 0x0800 | G2H CTB Descriptor (g2h)                      |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | H2G CT Buffer (send)                          | n*4K |
+ *      |        |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | G2H CT Buffer (g2h)                           | m*4K |
+ *      | + n*4K |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ */
+
+#define CTB_DESC_SIZE		ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE	(SZ_4K)
+#define CTB_G2H_BUFFER_SIZE	(4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE	(CTB_G2H_BUFFER_SIZE / 4)
+
+static size_t guc_ct_size(void)
+{
+	return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+		CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_ct *ct = arg;
+
+	xa_destroy(&ct->fence_lookup);
+	xe_bo_unpin_map_no_vm(ct->bo);
+}
+
+static void g2h_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ct->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
+	struct xe_bo *bo;
+	int err;
+
+	XE_BUG_ON(guc_ct_size() % PAGE_SIZE);
+
+	mutex_init(&ct->lock);
+	spin_lock_init(&ct->fast_lock);
+	xa_init(&ct->fence_lookup);
+	ct->fence_context = dma_fence_context_alloc(1);
+	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+	init_waitqueue_head(&ct->wq);
+
+	primelockdep(ct);
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ct->bo = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+#define desc_read(xe_, guc_ctb__, field_)			\
+	xe_map_rd_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_)		\
+	xe_map_wr_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+				struct iosys_map *map)
+{
+	h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+	h2g->resv_space = 0;
+	h2g->tail = 0;
+	h2g->head = 0;
+	h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
+		h2g->resv_space;
+	h2g->broken = false;
+
+	h2g->desc = *map;
+	xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+				struct iosys_map *map)
+{
+	g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+	g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+	g2h->head = 0;
+	g2h->tail = 0;
+	g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) -
+		g2h->resv_space;
+	g2h->broken = false;
+
+	g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+	xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+					    CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo);
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+	size = ct->ctbs.h2g.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+		CTB_H2G_BUFFER_SIZE;
+	size = ct->ctbs.g2h.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_CONTROL_CTB),
+		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+			   enable ? GUC_CTB_CONTROL_ENABLE :
+			   GUC_CTB_CONTROL_DISABLE),
+	};
+	int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int err;
+
+	XE_BUG_ON(ct->enabled);
+
+	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+	err = guc_ct_ctb_h2g_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_ctb_g2h_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_control_toggle(ct, true);
+	if (err)
+		goto err_out;
+
+	mutex_lock(&ct->lock);
+	ct->g2h_outstanding = 0;
+	ct->enabled = true;
+	mutex_unlock(&ct->lock);
+
+	smp_mb();
+	wake_up_all(&ct->wq);
+	drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+
+	return 0;
+
+err_out:
+	drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err);
+
+	return err;
+}
+
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+	mutex_lock(&ct->lock);
+	ct->enabled = false;
+	mutex_unlock(&ct->lock);
+
+	xa_destroy(&ct->fence_lookup);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+	lockdep_assert_held(&ct->lock);
+
+	if (cmd_len > h2g->space) {
+		h2g->head = desc_read(ct_to_xe(ct), h2g, head);
+		h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
+			h2g->resv_space;
+		if (cmd_len > h2g->space)
+			return false;
+	}
+
+	return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->lock);
+
+	return ct->ctbs.g2h.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->lock);
+
+	if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+		return -EBUSY;
+
+	return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	lockdep_assert_held(&ct->lock);
+	ct->ctbs.h2g.space -= cmd_len;
+}
+
+static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+	XE_BUG_ON(g2h_len > ct->ctbs.g2h.space);
+
+	if (g2h_len) {
+		spin_lock_irq(&ct->fast_lock);
+		ct->ctbs.g2h.space -= g2h_len;
+		ct->g2h_outstanding += num_g2h;
+		spin_unlock_irq(&ct->fast_lock);
+	}
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->fast_lock);
+	XE_WARN_ON(ct->ctbs.g2h.space + g2h_len >
+		   ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space);
+
+	ct->ctbs.g2h.space += g2h_len;
+	--ct->g2h_outstanding;
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	spin_lock_irq(&ct->fast_lock);
+	__g2h_release_space(ct, g2h_len);
+	spin_unlock_irq(&ct->fast_lock);
+}
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		     u32 ct_fence_value, bool want_response)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+	u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
+	u32 cmd_len = len + GUC_CTB_HDR_LEN;
+	u32 cmd_idx = 0, i;
+	u32 tail = h2g->tail;
+	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+							 tail * sizeof(u32));
+
+	lockdep_assert_held(&ct->lock);
+	XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
+	XE_BUG_ON(tail > h2g->size);
+
+	/* Command will wrap, zero fill (NOPs), return and check credits again */
+	if (tail + cmd_len > h2g->size) {
+		xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32));
+		h2g_reserve_space(ct, (h2g->size - tail));
+		h2g->tail = 0;
+		desc_write(xe, h2g, tail, h2g->tail);
+
+		return -EAGAIN;
+	}
+
+	/*
+	 * dw0: CT header (including fence)
+	 * dw1: HXG header (including action code)
+	 * dw2+: action data
+	 */
+	cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+		FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+		FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+	if (want_response) {
+		cmd[cmd_idx++] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	} else {
+		cmd[cmd_idx++] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	}
+	for (i = 1; i < len; ++i)
+		cmd[cmd_idx++] = action[i];
+
+	/* Write H2G ensuring visable before descriptor update */
+	xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32));
+	xe_device_wmb(ct_to_xe(ct));
+
+	/* Update local copies */
+	h2g->tail = (tail + cmd_len) % h2g->size;
+	h2g_reserve_space(ct, cmd_len);
+
+	/* Update descriptor */
+	desc_write(xe, h2g, tail, h2g->tail);
+
+	return 0;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 g2h_len, u32 num_g2h,
+				struct g2h_fence *g2h_fence)
+{
+	int ret;
+
+	XE_BUG_ON(g2h_len && g2h_fence);
+	XE_BUG_ON(num_g2h && g2h_fence);
+	XE_BUG_ON(g2h_len && !num_g2h);
+	XE_BUG_ON(!g2h_len && num_g2h);
+	lockdep_assert_held(&ct->lock);
+
+	if (unlikely(ct->ctbs.h2g.broken)) {
+		ret = -EPIPE;
+		goto out;
+	}
+
+	if (unlikely(!ct->enabled)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (g2h_fence) {
+		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+		num_g2h = 1;
+
+		if (g2h_fence_needs_alloc(g2h_fence)) {
+			void *ptr;
+
+			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+			init_waitqueue_head(&g2h_fence->wq);
+			ptr = xa_store(&ct->fence_lookup,
+				       g2h_fence->seqno,
+				       g2h_fence, GFP_ATOMIC);
+			if (IS_ERR(ptr)) {
+				ret = PTR_ERR(ptr);
+				goto out;
+			}
+		}
+	}
+
+	xe_device_mem_access_get(ct_to_xe(ct));
+retry:
+	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+	if (unlikely(ret))
+		goto put_wa;
+
+	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
+			!!g2h_fence);
+	if (unlikely(ret)) {
+		if (ret == -EAGAIN)
+			goto retry;
+		goto put_wa;
+	}
+
+	g2h_reserve_space(ct, g2h_len, num_g2h);
+	xe_guc_notify(ct_to_guc(ct));
+put_wa:
+	xe_device_mem_access_put(ct_to_xe(ct));
+out:
+
+	return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+	xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			      u32 g2h_len, u32 num_g2h,
+			      struct g2h_fence *g2h_fence)
+{
+	struct drm_device *drm = &ct_to_xe(ct)->drm;
+	struct drm_printer p = drm_info_printer(drm->dev);
+	unsigned int sleep_period_ms = 1;
+	int ret;
+
+	XE_BUG_ON(g2h_len && g2h_fence);
+	lockdep_assert_held(&ct->lock);
+
+try_again:
+	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+				   g2h_fence);
+
+	/*
+	 * We wait to try to restore credits for about 1 second before bailing.
+	 * In the case of H2G credits we have no choice but just to wait for the
+	 * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+	 * the case of G2H we process any G2H in the channel, hopefully freeing
+	 * credits as we consume the G2H messages.
+	 */
+	if (unlikely(ret == -EBUSY &&
+		     !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+		struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+		if (sleep_period_ms == 1024)
+			goto broken;
+
+		trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail,
+						 h2g->size, h2g->space,
+						 len + GUC_CTB_HDR_LEN);
+		msleep(sleep_period_ms);
+		sleep_period_ms <<= 1;
+
+		goto try_again;
+	} else if (unlikely(ret == -EBUSY)) {
+		struct xe_device *xe = ct_to_xe(ct);
+		struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+		trace_xe_guc_ct_g2h_flow_control(g2h->head,
+						 desc_read(xe, g2h, tail),
+						 g2h->size, g2h->space,
+						 g2h_fence ?
+						 GUC_CTB_HXG_MSG_MAX_LEN :
+						 g2h_len);
+
+#define g2h_avail(ct)	\
+	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head)
+		if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+					g2h_avail(ct), HZ))
+			goto broken;
+#undef g2h_avail
+
+		if (dequeue_one_g2h(ct) < 0)
+			goto broken;
+
+		goto try_again;
+	}
+
+	return ret;
+
+broken:
+	drm_err(drm, "No forward process on H2G, reset required");
+	xe_guc_ct_print(ct, &p);
+	ct->ctbs.h2g.broken = true;
+
+	return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		       u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+	int ret;
+
+	XE_BUG_ON(g2h_len && g2h_fence);
+
+	mutex_lock(&ct->lock);
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+	mutex_unlock(&ct->lock);
+
+	return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+	if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+		return false;
+
+#define ct_alive(ct)	\
+	(ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken)
+	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
+		return false;
+#undef ct_alive
+
+	return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			    u32 *response_buffer, bool no_fail)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct g2h_fence g2h_fence;
+	int ret = 0;
+
+	/*
+	 * We use a fence to implement blocking sends / receiving response data.
+	 * The seqno of the fence is sent in the H2G, returned in the G2H, and
+	 * an xarray is used as storage media with the seqno being to key.
+	 * Fields in the fence hold success, failure, retry status and the
+	 * response data. Safe to allocate on the stack as the xarray is the
+	 * only reference and it cannot be present after this function exits.
+	 */
+retry:
+	g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+	ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+	if (unlikely(ret == -ENOMEM)) {
+		void *ptr;
+
+		/* Retry allocation /w GFP_KERNEL */
+		ptr = xa_store(&ct->fence_lookup,
+			       g2h_fence.seqno,
+			       &g2h_fence, GFP_KERNEL);
+		if (IS_ERR(ptr)) {
+			return PTR_ERR(ptr);
+		}
+
+		goto retry_same_fence;
+	} else if (unlikely(ret)) {
+		if (ret == -EDEADLK)
+			kick_reset(ct);
+
+		if (no_fail && retry_failure(ct, ret))
+			goto retry_same_fence;
+
+		if (!g2h_fence_needs_alloc(&g2h_fence))
+			xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+
+		return ret;
+	}
+
+	ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ);
+	if (!ret) {
+		drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
+			g2h_fence.seqno, action[0]);
+		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+		return -ETIME;
+	}
+
+	if (g2h_fence.retry) {
+		drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
+			 action[0], g2h_fence.reason);
+		goto retry;
+	}
+	if (g2h_fence.fail) {
+		drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
+			action[0], g2h_fence.error, g2h_fence.hint);
+		ret = -EIO;
+	}
+
+	return ret > 0 ? 0 : ret;
+}
+
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+	lockdep_assert_held(&ct->lock);
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		g2h_release_space(ct, len);
+	}
+
+	return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+	struct g2h_fence *g2h_fence;
+
+	lockdep_assert_held(&ct->lock);
+
+	g2h_fence = xa_erase(&ct->fence_lookup, fence);
+	if (unlikely(!g2h_fence)) {
+		/* Don't tear down channel, as send could've timed out */
+		drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+		return 0;
+	}
+
+	XE_WARN_ON(fence != g2h_fence->seqno);
+
+	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		g2h_fence->fail = true;
+		g2h_fence->error =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+		g2h_fence->hint =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		g2h_fence->retry = true;
+		g2h_fence->reason =
+			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]);
+	} else if (g2h_fence->response_buffer) {
+		g2h_fence->response_len = response_len;
+		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
+		       response_len * sizeof(u32));
+	}
+
+	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+	g2h_fence->done = true;
+	smp_mb();
+
+	wake_up(&g2h_fence->wq);
+
+	return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 header, hxg, origin, type;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	header = msg[0];
+	hxg = msg[1];
+
+	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, origin=%d, reset required\n",
+			origin);
+		ct->ctbs.g2h.broken = true;
+
+		return -EPROTO;
+	}
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+	switch (type) {
+	case GUC_HXG_TYPE_EVENT:
+		ret = parse_g2h_event(ct, msg, len);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		ret = parse_g2h_response(ct, msg, len);
+		break;
+	default:
+		drm_err(&xe->drm,
+			"G2H channel broken on read, type=%d, reset required\n",
+			type);
+		ct->ctbs.g2h.broken = true;
+
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+		return 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+		ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+		ret = xe_guc_engine_reset_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+		ret = xe_guc_engine_reset_failure_handler(guc, payload,
+							  adj_len);
+		break;
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+		/* Selftest only at the moment */
+		break;
+	case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+	case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+		/* FIXME: Handle this */
+		break;
+	case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+		ret = xe_guc_engine_memory_cat_error_handler(guc, payload,
+							     adj_len);
+		break;
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+		ret = xe_guc_access_counter_notify_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+
+	return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+	u32 tail, head, len;
+	s32 avail;
+
+	lockdep_assert_held(&ct->fast_lock);
+
+	if (!ct->enabled)
+		return -ENODEV;
+
+	if (g2h->broken)
+		return -EPIPE;
+
+	/* Calculate DW available to read */
+	tail = desc_read(xe, g2h, tail);
+	avail = tail - g2h->head;
+	if (unlikely(avail == 0))
+		return 0;
+
+	if (avail < 0)
+		avail += g2h->size;
+
+	/* Read header */
+	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32));
+	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+	if (len > avail) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, avail=%d, len=%d, reset required\n",
+			avail, len);
+		g2h->broken = true;
+
+		return -EPROTO;
+	}
+
+	head = (g2h->head + 1) % g2h->size;
+	avail = len - 1;
+
+	/* Read G2H message */
+	if (avail + head > g2h->size) {
+		u32 avail_til_wrap = g2h->size - head;
+
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail_til_wrap * sizeof(u32));
+		xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+				   &g2h->cmds, 0,
+				   (avail - avail_til_wrap) * sizeof(u32));
+	} else {
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail * sizeof(u32));
+	}
+
+	if (fast_path) {
+		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+			return 0;
+
+		switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) {
+		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+			break;	/* Process these in fast-path */
+		default:
+			return 0;
+		}
+	}
+
+	/* Update local / descriptor header */
+	g2h->head = (head + avail) % g2h->size;
+	desc_write(xe, g2h, head, g2h->head);
+
+	return len;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		__g2h_release_space(ct, len);
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		XE_WARN_ON("NOT_POSSIBLE");
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int len;
+
+	if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe))
+		return;
+
+	spin_lock(&ct->fast_lock);
+	do {
+		len = g2h_read(ct, ct->fast_msg, true);
+		if (len > 0)
+			g2h_fast_path(ct, ct->fast_msg, len);
+	} while (len > 0);
+	spin_unlock(&ct->fast_lock);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+	int len;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	spin_lock_irq(&ct->fast_lock);
+	len = g2h_read(ct, ct->msg, false);
+	spin_unlock_irq(&ct->fast_lock);
+	if (len <= 0)
+		return len;
+
+	ret = parse_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = process_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return 1;
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+	int ret;
+
+	xe_device_mem_access_get(ct_to_xe(ct));
+	do {
+		mutex_lock(&ct->lock);
+		ret = dequeue_one_g2h(ct);
+		mutex_unlock(&ct->lock);
+
+		if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+			struct drm_device *drm = &ct_to_xe(ct)->drm;
+			struct drm_printer p = drm_info_printer(drm->dev);
+
+			xe_guc_ct_print(ct, &p);
+			kick_reset(ct);
+		}
+	} while (ret == 1);
+	xe_device_mem_access_put(ct_to_xe(ct));
+}
+
+static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
+			     struct drm_printer *p)
+{
+	u32 head, tail;
+
+	drm_printf(p, "\tsize: %d\n", ctb->size);
+	drm_printf(p, "\tresv_space: %d\n", ctb->resv_space);
+	drm_printf(p, "\thead: %d\n", ctb->head);
+	drm_printf(p, "\ttail: %d\n", ctb->tail);
+	drm_printf(p, "\tspace: %d\n", ctb->space);
+	drm_printf(p, "\tbroken: %d\n", ctb->broken);
+
+	head = desc_read(xe, ctb, head);
+	tail = desc_read(xe, ctb, tail);
+	drm_printf(p, "\thead (memory): %d\n", head);
+	drm_printf(p, "\ttail (memory): %d\n", tail);
+	drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
+
+	if (head != tail) {
+		struct iosys_map map =
+			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
+
+		while (head != tail) {
+			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+				   xe_map_rd(xe, &map, 0, u32));
+			++head;
+			if (head == ctb->size) {
+				head = 0;
+				map = ctb->cmds;
+			} else {
+				iosys_map_incr(&map, sizeof(u32));
+			}
+		}
+	}
+}
+
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+	if (ct->enabled) {
+		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
+
+		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
+		drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
+	} else {
+		drm_puts(p, "\nCT disabled\n");
+	}
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+/*
+ * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
+ * control if enough sent, 8k sends is enough. Verify forward process, verify
+ * credits expected values on exit.
+ */
+void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+	u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, };
+	u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, };
+	int ret;
+	int i;
+
+	ct->suppress_irq_handler = true;
+	drm_puts(p, "Starting GuC CT selftest\n");
+
+	for (i = 0; i < 8192; ++i) {
+		ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
+		if (ret) {
+			drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
+			xe_guc_ct_print(ct, p);
+			break;
+		}
+	}
+
+	ct->suppress_irq_handler = false;
+	if (!ret) {
+		xe_guc_ct_irq_handler(ct);
+		msleep(200);
+		if (g2h->space !=
+		    CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+			drm_printf(p, "Mismatch on space %d, %d\n",
+				   g2h->space,
+				   CIRC_SPACE(0, 0, g2h->size) -
+				   g2h->resv_space);
+			ret = -EIO;
+		}
+		if (ct->g2h_outstanding) {
+			drm_printf(p, "Outstanding G2H, %d\n",
+				   ct->g2h_outstanding);
+			ret = -EIO;
+		}
+	}
+
+	/* Check failure path for blocking CTs too */
+	xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action));
+	if (g2h->space !=
+	    CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+		drm_printf(p, "Mismatch on space %d, %d\n",
+			   g2h->space,
+			   CIRC_SPACE(0, 0, g2h->size) -
+			   g2h->resv_space);
+		ret = -EIO;
+	}
+	if (ct->g2h_outstanding) {
+		drm_printf(p, "Outstanding G2H, %d\n",
+			   ct->g2h_outstanding);
+		ret = -EIO;
+	}
+
+	drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS");
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
new file mode 100644
index 0000000000000..49fb74f91e4df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_H_
+#define _XE_GUC_CT_H_
+
+#include "xe_guc_ct_types.h"
+
+struct drm_printer;
+
+int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_enable(struct xe_guc_ct *ct);
+void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
+
+static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
+{
+	wake_up_all(&ct->wq);
+#ifdef XE_GUC_CT_SELFTEST
+	if (!ct->suppress_irq_handler && ct->enabled)
+		queue_work(system_unbound_wq, &ct->g2h_worker);
+#else
+	if (ct->enabled)
+		queue_work(system_unbound_wq, &ct->g2h_worker);
+#endif
+	xe_guc_ct_fast_path(ct);
+}
+
+/* Basic CT send / receives */
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv(ct, action, len, NULL);
+}
+
+/* This is only version of the send CT you can call from a G2H handler */
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
+			       u32 len);
+
+/* Can't fail because a GT reset is in progress */
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
new file mode 100644
index 0000000000000..17b148bf3735d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_TYPES_H_
+#define _XE_GUC_CT_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock_types.h>
+#include <linux/wait.h>
+#include <linux/xarray.h>
+
+#include "abi/guc_communication_ctb_abi.h"
+
+#define XE_GUC_CT_SELFTEST
+
+struct xe_bo;
+
+/**
+ * struct guc_ctb - GuC command transport buffer (CTB)
+ */
+struct guc_ctb {
+	/** @desc: dma buffer map for CTB descriptor */
+	struct iosys_map desc;
+	/** @cmds: dma buffer map for CTB commands */
+	struct iosys_map cmds;
+	/** @size: size of CTB commands (DW) */
+	u32 size;
+	/** @resv_space: reserved space of CTB commands (DW) */
+	u32 resv_space;
+	/** @head: head of CTB commands (DW) */
+	u32 head;
+	/** @tail: tail of CTB commands (DW) */
+	u32 tail;
+	/** @space: space in CTB commands (DW) */
+	u32 space;
+	/** @broken: channel broken */
+	bool broken;
+};
+
+/**
+ * struct xe_guc_ct - GuC command transport (CT) layer
+ *
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
+ */
+struct xe_guc_ct {
+	/** @bo: XE BO for CT */
+	struct xe_bo *bo;
+	/** @lock: protects everything in CT layer */
+	struct mutex lock;
+	/** @fast_lock: protects G2H channel and credits */
+	spinlock_t fast_lock;
+	/** @ctbs: buffers for sending and receiving commands */
+	struct {
+		/** @send: Host to GuC (H2G, send) channel */
+		struct guc_ctb h2g;
+		/** @recv: GuC to Host (G2H, receive) channel */
+		struct guc_ctb g2h;
+	} ctbs;
+	/** @g2h_outstanding: number of outstanding G2H */
+	u32 g2h_outstanding;
+	/** @g2h_worker: worker to process G2H messages */
+	struct work_struct g2h_worker;
+	/** @enabled: CT enabled */
+	bool enabled;
+	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
+	u32 fence_seqno;
+	/** @fence_context: context for G2H fence */
+	u64 fence_context;
+	/** @fence_lookup: G2H fence lookup */
+	struct xarray fence_lookup;
+	/** @wq: wait queue used for reliable CT sends and freeing G2H credits */
+	wait_queue_head_t wq;
+#ifdef XE_GUC_CT_SELFTEST
+	/** @suppress_irq_handler: force flow control to sender */
+	bool suppress_irq_handler;
+#endif
+	/** @msg: Message buffer */
+	u32 msg[GUC_CTB_MSG_MAX_LEN];
+	/** @fast_msg: Message buffer */
+	u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
new file mode 100644
index 0000000000000..916e9633b3227
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_debugfs.h"
+#include "xe_guc_log.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+static struct xe_guc *node_to_guc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int guc_info(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_print_info(guc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static int guc_log(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_log_print(&guc->log, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+static int guc_ct_selftest(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_ct_selftest(&guc->ct, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+#endif
+
+static const struct drm_info_list debugfs_list[] = {
+	{"guc_info", guc_info, 0},
+	{"guc_log", guc_log, 0},
+#ifdef XE_GUC_CT_SELFTEST
+	{"guc_ct_selftest", guc_ct_selftest, 0},
+#endif
+};
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
+{
+	struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+	local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local) {
+		XE_WARN_ON("Couldn't allocate memory");
+		return;
+	}
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = guc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h
new file mode 100644
index 0000000000000..4756dff26fcab
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DEBUGFS_H_
+#define _XE_GUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_guc;
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h
new file mode 100644
index 0000000000000..512615d1ce8c1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_TYPES_H_
+#define _XE_GUC_ENGINE_TYPES_H_
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_gpu_scheduler_types.h"
+
+struct dma_fence;
+struct xe_engine;
+
+/**
+ * struct xe_guc_engine - GuC specific state for an xe_engine
+ */
+struct xe_guc_engine {
+	/** @engine: Backpointer to parent xe_engine */
+	struct xe_engine *engine;
+	/** @sched: GPU scheduler for this xe_engine */
+	struct xe_gpu_scheduler sched;
+	/** @entity: Scheduler entity for this xe_engine */
+	struct xe_sched_entity entity;
+	/**
+	 * @static_msgs: Static messages for this xe_engine, used when a message
+	 * needs to sent through the GPU scheduler but memory allocations are
+	 * not allowed.
+	 */
+#define MAX_STATIC_MSG_TYPE	3
+	struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
+	/** @fini_async: do final fini async from this worker */
+	struct work_struct fini_async;
+	/** @resume_time: time of last resume */
+	u64 resume_time;
+	/** @state: GuC specific state for this xe_engine */
+	atomic_t state;
+	/** @wqi_head: work queue item tail */
+	u32 wqi_head;
+	/** @wqi_tail: work queue item tail */
+	u32 wqi_tail;
+	/** @id: GuC id for this xe_engine */
+	u16 id;
+	/** @suspend_wait: wait queue used to wait on pending suspends */
+	wait_queue_head_t suspend_wait;
+	/** @suspend_pending: a suspend of the engine is pending */
+	bool suspend_pending;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
new file mode 100644
index 0000000000000..f562404a6cf71
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_FWIF_H
+#define _XE_GUC_FWIF_H
+
+#include <linux/bits.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_slpc_abi.h"
+#include "abi/guc_errors_abi.h"
+#include "abi/guc_communication_mmio_abi.h"
+#include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "abi/guc_messages_abi.h"
+
+#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET	4
+#define G2H_LEN_DW_DEREGISTER_CONTEXT		3
+#define G2H_LEN_DW_TLB_INVALIDATE		3
+
+#define GUC_CONTEXT_DISABLE		0
+#define GUC_CONTEXT_ENABLE		1
+
+#define GUC_CLIENT_PRIORITY_KMD_HIGH	0
+#define GUC_CLIENT_PRIORITY_HIGH	1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
+#define GUC_CLIENT_PRIORITY_NORMAL	3
+#define GUC_CLIENT_PRIORITY_NUM		4
+
+#define GUC_RENDER_ENGINE		0
+#define GUC_VIDEO_ENGINE		1
+#define GUC_BLITTER_ENGINE		2
+#define GUC_VIDEOENHANCE_ENGINE		3
+#define GUC_VIDEO_ENGINE2		4
+#define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
+
+#define GUC_RENDER_CLASS		0
+#define GUC_VIDEO_CLASS			1
+#define GUC_VIDEOENHANCE_CLASS		2
+#define GUC_BLITTER_CLASS		3
+#define GUC_COMPUTE_CLASS		4
+#define GUC_GSC_OTHER_CLASS		5
+#define GUC_LAST_ENGINE_CLASS		GUC_GSC_OTHER_CLASS
+#define GUC_MAX_ENGINE_CLASSES		16
+#define GUC_MAX_INSTANCES_PER_CLASS	32
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_NOOP			0x4
+#define WQ_TYPE_MULTI_LRC		0x5
+#define WQ_TYPE_MASK			GENMASK(7, 0)
+#define WQ_LEN_MASK			GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK			GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
+
+struct guc_wq_item {
+	u32 header;
+	u32 context_desc;
+	u32 submit_element_info;
+	u32 fence_id;
+} __packed;
+
+struct guc_sched_wq_desc {
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u32 wq_status;
+	u32 reserved[28];
+} __packed;
+
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+	u32 flags;
+	u32 context_idx;
+	u32 engine_class;
+	u32 engine_submit_mask;
+	u32 wq_desc_lo;
+	u32 wq_desc_hi;
+	u32 wq_base_lo;
+	u32 wq_base_hi;
+	u32 wq_size;
+	u32 hwlrca_lo;
+	u32 hwlrca_hi;
+};
+#define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)
+
+/* 32-bit KLV structure as used by policy updates and others */
+struct guc_klv_generic_dw_t {
+        u32 kl;
+        u32 value;
+} __packed;
+
+/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
+struct guc_update_engine_policy_header {
+        u32 action;
+        u32 guc_id;
+} __packed;
+
+struct guc_update_engine_policy {
+        struct guc_update_engine_policy_header header;
+        struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
+} __packed;
+
+/* GUC_CTL_* - Parameters for loading the GuC */
+#define GUC_CTL_LOG_PARAMS		0
+#define   GUC_LOG_VALID			BIT(0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	BIT(1)
+#define   GUC_LOG_CAPTURE_ALLOC_UNITS	BIT(2)
+#define   GUC_LOG_LOG_ALLOC_UNITS	BIT(3)
+#define   GUC_LOG_CRASH_SHIFT		4
+#define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
+#define   GUC_LOG_DEBUG_SHIFT		6
+#define   GUC_LOG_DEBUG_MASK	        (0xF << GUC_LOG_DEBUG_SHIFT)
+#define   GUC_LOG_CAPTURE_SHIFT		10
+#define   GUC_LOG_CAPTURE_MASK	        (0x3 << GUC_LOG_CAPTURE_SHIFT)
+#define   GUC_LOG_BUF_ADDR_SHIFT	12
+
+#define GUC_CTL_WA			1
+#define   GUC_WA_GAM_CREDITS		BIT(10)
+#define   GUC_WA_DUAL_QUEUE		BIT(11)
+#define   GUC_WA_RCS_RESET_BEFORE_RC6	BIT(13)
+#define   GUC_WA_CONTEXT_ISOLATION	BIT(15)
+#define   GUC_WA_PRE_PARSER		BIT(14)
+#define   GUC_WA_HOLD_CCS_SWITCHOUT	BIT(17)
+#define   GUC_WA_POLLCS			BIT(18)
+#define   GUC_WA_RENDER_RST_RC6_EXIT	BIT(19)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
+
+#define GUC_CTL_FEATURE			2
+#define   GUC_CTL_ENABLE_SLPC		BIT(2)
+#define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
+
+#define GUC_CTL_DEBUG			3
+#define   GUC_LOG_VERBOSITY_SHIFT	0
+#define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_MED		(1 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_HIGH	(2 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_ULTRA	(3 << GUC_LOG_VERBOSITY_SHIFT)
+#define	  GUC_LOG_VERBOSITY_MIN		0
+#define	  GUC_LOG_VERBOSITY_MAX		3
+#define	  GUC_LOG_VERBOSITY_MASK	0x0000000f
+#define	  GUC_LOG_DESTINATION_MASK	(3 << 4)
+#define   GUC_LOG_DISABLED		(1 << 6)
+#define   GUC_PROFILE_ENABLED		(1 << 7)
+
+#define GUC_CTL_ADS			4
+#define   GUC_ADS_ADDR_SHIFT		1
+#define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
+
+#define GUC_CTL_DEVID			5
+
+#define GUC_CTL_MAX_DWORDS		14
+
+/* Scheduling policy settings */
+
+#define GLOBAL_POLICY_MAX_NUM_WI 15
+
+/* Don't reset an engine upon preemption failure */
+#define GLOBAL_POLICY_DISABLE_ENGINE_RESET				BIT(0)
+
+#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+
+struct guc_policies {
+	u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
+	/* In micro seconds. How much time to allow before DPC processing is
+	 * called back via interrupt (to prevent DPC queue drain starving).
+	 * Typically 1000s of micro seconds (example only, not granularity). */
+	u32 dpc_promote_time;
+
+	/* Must be set to take these new values. */
+	u32 is_valid;
+
+	/* Max number of WIs to process per call. A large value may keep CS
+	 * idle. */
+	u32 max_num_work_items;
+
+	u32 global_flags;
+	u32 reserved[4];
+} __packed;
+
+/* GuC MMIO reg state struct */
+struct guc_mmio_reg {
+	u32 offset;
+	u32 value;
+	u32 flags;
+	u32 mask;
+#define GUC_REGSET_MASKED		BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE	BIT(2)
+#define GUC_REGSET_RESTORE_ONLY		BIT(3)
+} __packed;
+
+/* GuC register sets */
+struct guc_mmio_reg_set {
+	u32 address;
+	u16 count;
+	u16 reserved;
+} __packed;
+
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED		0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK	1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI	2
+#define GUC_GENERIC_GT_SYSINFO_MAX			16
+
+/* HW info */
+struct guc_gt_system_info {
+	u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+	u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
+} __packed;
+
+enum {
+	GUC_CAPTURE_LIST_INDEX_PF = 0,
+	GUC_CAPTURE_LIST_INDEX_VF = 1,
+	GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
+/* GuC Additional Data Struct */
+struct guc_ads {
+	struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 reserved0;
+	u32 scheduler_policies;
+	u32 gt_system_info;
+	u32 reserved1;
+	u32 control_data;
+	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+	u32 private_data;
+	u32 um_init_data;
+	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+	u32 reserved[14];
+} __packed;
+
+/* Engine usage stats */
+struct guc_engine_usage_record {
+	u32 current_context_index;
+	u32 last_switch_in_stamp;
+	u32 reserved0;
+	u32 total_runtime;
+	u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+	struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
+/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
+enum xe_guc_recv_message {
+	XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
+	XE_GUC_RECV_MSG_EXCEPTION = BIT(30),
+};
+
+/* Page fault structures */
+struct access_counter_desc {
+	u32 dw0;
+#define ACCESS_COUNTER_TYPE	BIT(0)
+#define ACCESS_COUNTER_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACCESS_COUNTER_SUBG_HI	BIT(0)
+#define ACCESS_COUNTER_RSVD0	GENMASK(2, 1)
+#define ACCESS_COUNTER_ENG_INSTANCE	GENMASK(8, 3)
+#define ACCESS_COUNTER_ENG_CLASS	GENMASK(11, 9)
+#define ACCESS_COUNTER_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACCESS_COUNTER_VFID	GENMASK(5, 0)
+#define ACCESS_COUNTER_RSVD1	GENMASK(7, 6)
+#define ACCESS_COUNTER_GRANULARITY	GENMASK(10, 8)
+#define ACCESS_COUNTER_RSVD2	GENMASK(16, 11)
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+enum guc_um_queue_type {
+	GUC_UM_HW_QUEUE_PAGE_FAULT = 0,
+	GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE,
+	GUC_UM_HW_QUEUE_ACCESS_COUNTER,
+	GUC_UM_HW_QUEUE_MAX
+};
+
+struct guc_um_queue_params {
+	u64 base_dpa;
+	u32 base_ggtt_address;
+	u32 size_in_bytes;
+	u32 rsvd[4];
+} __packed;
+
+struct guc_um_init_params {
+	u64 page_response_timeout_in_us;
+	u32 rsvd[6];
+	struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX];
+} __packed;
+
+enum xe_guc_fault_reply_type {
+	PFR_ACCESS = 0,
+	PFR_ENGINE,
+	PFR_VFID,
+	PFR_ALL,
+	PFR_INVALID
+};
+
+enum xe_guc_response_desc_type {
+	TLB_INVALIDATION_DESC = 0,
+	FAULT_RESPONSE_DESC
+};
+
+struct xe_guc_pagefault_desc {
+	u32 dw0;
+#define PFD_FAULT_LEVEL		GENMASK(2, 0)
+#define PFD_SRC_ID		GENMASK(10, 3)
+#define PFD_RSVD_0		GENMASK(17, 11)
+#define XE2_PFD_TRVA_FAULT	BIT(18)
+#define PFD_ENG_INSTANCE	GENMASK(24, 19)
+#define PFD_ENG_CLASS		GENMASK(27, 25)
+#define PFD_PDATA_LO		GENMASK(31, 28)
+
+	u32 dw1;
+#define PFD_PDATA_HI		GENMASK(11, 0)
+#define PFD_PDATA_HI_SHIFT	4
+#define PFD_ASID		GENMASK(31, 12)
+
+	u32 dw2;
+#define PFD_ACCESS_TYPE		GENMASK(1, 0)
+#define PFD_FAULT_TYPE		GENMASK(3, 2)
+#define PFD_VFID		GENMASK(9, 4)
+#define PFD_RSVD_1		GENMASK(11, 10)
+#define PFD_VIRTUAL_ADDR_LO	GENMASK(31, 12)
+#define PFD_VIRTUAL_ADDR_LO_SHIFT 12
+
+	u32 dw3;
+#define PFD_VIRTUAL_ADDR_HI	GENMASK(31, 0)
+#define PFD_VIRTUAL_ADDR_HI_SHIFT 32
+} __packed;
+
+struct xe_guc_pagefault_reply {
+	u32 dw0;
+#define PFR_VALID		BIT(0)
+#define PFR_SUCCESS		BIT(1)
+#define PFR_REPLY		GENMASK(4, 2)
+#define PFR_RSVD_0		GENMASK(9, 5)
+#define PFR_DESC_TYPE		GENMASK(11, 10)
+#define PFR_ASID		GENMASK(31, 12)
+
+	u32 dw1;
+#define PFR_VFID		GENMASK(5, 0)
+#define PFR_RSVD_1		BIT(6)
+#define PFR_ENG_INSTANCE	GENMASK(12, 7)
+#define PFR_ENG_CLASS		GENMASK(15, 13)
+#define PFR_PDATA		GENMASK(31, 16)
+
+	u32 dw2;
+#define PFR_RSVD_2		GENMASK(31, 0)
+} __packed;
+
+struct xe_guc_acc_desc {
+	u32 dw0;
+#define ACC_TYPE	BIT(0)
+#define ACC_TRIGGER	0
+#define ACC_NOTIFY	1
+#define ACC_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACC_SUBG_HI	BIT(0)
+#define ACC_RSVD0	GENMASK(2, 1)
+#define ACC_ENG_INSTANCE	GENMASK(8, 3)
+#define ACC_ENG_CLASS	GENMASK(11, 9)
+#define ACC_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACC_VFID	GENMASK(5, 0)
+#define ACC_RSVD1	GENMASK(7, 6)
+#define ACC_GRANULARITY	GENMASK(10, 8)
+#define ACC_RSVD2	GENMASK(16, 11)
+#define ACC_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACC_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
new file mode 100644
index 0000000000000..8dfd48f71a7c1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_map.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_GET_HWCONFIG,
+		lower_32_bits(ggtt_addr),
+		upper_32_bits(ggtt_addr),
+		size,
+	};
+
+	return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_hwconfig_size(struct xe_guc *guc, u32 *size)
+{
+	int ret = send_get_hwconfig(guc, 0, 0);
+
+	if (ret < 0)
+		return ret;
+
+	*size = ret;
+	return 0;
+}
+
+static int guc_hwconfig_copy(struct xe_guc *guc)
+{
+	int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo),
+				    guc->hwconfig.size);
+
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void guc_hwconfig_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xe_bo_unpin_map_no_vm(guc->hwconfig.bo);
+}
+
+int xe_guc_hwconfig_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_bo *bo;
+	u32 size;
+	int err;
+
+	/* Initialization already done */
+	if (guc->hwconfig.bo)
+		return 0;
+
+	/*
+	 * All hwconfig the same across GTs so only GT0 needs to be configured
+	 */
+	if (gt->info.id != XE_GT0)
+		return 0;
+
+	/* ADL_P, DG2+ supports hwconfig table */
+	if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P)
+		return 0;
+
+	err = guc_hwconfig_size(guc, &size);
+	if (err)
+		return err;
+	if (!size)
+		return -EINVAL;
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+	guc->hwconfig.bo = bo;
+	guc->hwconfig.size = size;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc);
+	if (err)
+		return err;
+
+	return guc_hwconfig_copy(guc);
+}
+
+u32 xe_guc_hwconfig_size(struct xe_guc *guc)
+{
+	return !guc->hwconfig.bo ? 0 : guc->hwconfig.size;
+}
+
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	XE_BUG_ON(!guc->hwconfig.bo);
+
+	xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
+			   guc->hwconfig.size);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
new file mode 100644
index 0000000000000..b5794d6419006
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HWCONFIG_H_
+#define _XE_GUC_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_hwconfig_init(struct xe_guc *guc);
+u32 xe_guc_hwconfig_size(struct xe_guc *guc);
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
new file mode 100644
index 0000000000000..7ec1b2bb1f8e5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc_log.h"
+#include "xe_map.h"
+#include "xe_module.h"
+
+static struct xe_gt *
+log_to_gt(struct xe_guc_log *log)
+{
+	return container_of(log, struct xe_gt, uc.guc.log);
+}
+
+static struct xe_device *
+log_to_xe(struct xe_guc_log *log)
+{
+	return gt_to_xe(log_to_gt(log));
+}
+
+static size_t guc_log_size(void)
+{
+	/*
+	 *  GuC Log buffer Layout
+	 *
+	 *  +===============================+ 00B
+	 *  |    Crash dump state header    |
+	 *  +-------------------------------+ 32B
+	 *  |      Debug state header       |
+	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
+	 *  |                               |
+	 *  +===============================+ PAGE_SIZE (4KB)
+	 *  |        Crash Dump logs        |
+	 *  +===============================+ + CRASH_SIZE
+	 *  |          Debug logs           |
+	 *  +===============================+ + DEBUG_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
+	 */
+	return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+		CAPTURE_BUFFER_SIZE;
+}
+
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+	struct xe_device *xe = log_to_xe(log);
+	size_t size;
+	int i, j;
+
+	XE_BUG_ON(!log->bo);
+
+	size = log->bo->size;
+
+#define DW_PER_READ		128
+	XE_BUG_ON(size % (DW_PER_READ * sizeof(u32)));
+	for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
+		u32 read[DW_PER_READ];
+
+		xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
+				   DW_PER_READ * sizeof(u32));
+#define DW_PER_PRINT		4
+		for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
+			u32 *print = read + j * DW_PER_PRINT;
+
+			drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				   *(print + 0), *(print + 1),
+				   *(print + 2), *(print + 3));
+		}
+	}
+}
+
+static void guc_log_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_log *log = arg;
+
+	xe_bo_unpin_map_no_vm(log->bo);
+}
+
+int xe_guc_log_init(struct xe_guc_log *log)
+{
+	struct xe_device *xe = log_to_xe(log);
+	struct xe_gt *gt = log_to_gt(log);
+	struct xe_bo *bo;
+	int err;
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
+	log->bo = bo;
+	log->level = xe_guc_log_level;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
new file mode 100644
index 0000000000000..2d25ab28b4b3a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_H_
+#define _XE_GUC_LOG_H_
+
+#include "xe_guc_log_types.h"
+
+struct drm_printer;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
+#define CRASH_BUFFER_SIZE       SZ_1M
+#define DEBUG_BUFFER_SIZE       SZ_8M
+#define CAPTURE_BUFFER_SIZE     SZ_2M
+#else
+#define CRASH_BUFFER_SIZE	SZ_8K
+#define DEBUG_BUFFER_SIZE	SZ_64K
+#define CAPTURE_BUFFER_SIZE	SZ_16K
+#endif
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED		0
+#define GUC_LOG_LEVEL_NON_VERBOSE	1
+#define GUC_LOG_LEVEL_IS_ENABLED(x)	((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x)	((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({		\
+	typeof(x) _x = (x);				\
+	GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0;	\
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x)	((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
+int xe_guc_log_init(struct xe_guc_log *log);
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+
+static inline u32
+xe_guc_log_get_level(struct xe_guc_log *log)
+{
+	return log->level;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
new file mode 100644
index 0000000000000..125080d138a7f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_TYPES_H_
+#define _XE_GUC_LOG_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_log - GuC log
+ */
+struct xe_guc_log {
+	/** @level: GuC log level */
+	u32 level;
+	/** @bo: XE BO for GuC log */
+	struct xe_bo *bo;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
new file mode 100644
index 0000000000000..227e30a482e3b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_types.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_ct.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "i915_reg_defs.h"
+#include "i915_reg.h"
+
+#include "intel_mchbar_regs.h"
+
+/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
+#define   RP0_MASK	REG_GENMASK(7, 0)
+#define   RP1_MASK	REG_GENMASK(15, 8)
+#define   RPN_MASK	REG_GENMASK(23, 16)
+
+#define GEN10_FREQ_INFO_REC	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define   RPE_MASK		REG_GENMASK(15, 8)
+
+#include "gt/intel_gt_regs.h"
+/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */
+#define   REQ_RATIO_MASK	REG_GENMASK(31, 23)
+
+/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */
+#define   RCN_MASK	REG_GENMASK(2, 0)
+
+#define GEN12_RPSTAT1		_MMIO(0x1381b4)
+#define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
+
+#define GT_FREQUENCY_MULTIPLIER	50
+#define GEN9_FREQ_SCALER	3
+
+/**
+ * DOC: GuC Power Conservation (PC)
+ *
+ * GuC Power Conservation (PC) supports multiple features for the most
+ * efficient and performing use of the GT when GuC submission is enabled,
+ * including frequency management, Render-C states management, and various
+ * algorithms for power balancing.
+ *
+ * Single Loop Power Conservation (SLPC) is the name given to the suite of
+ * connected power conservation features in the GuC firmware. The firmware
+ * exposes a programming interface to the host for the control of SLPC.
+ *
+ * Frequency management:
+ * =====================
+ *
+ * Xe driver enables SLPC with all of its defaults features and frequency
+ * selection, which varies per platform.
+ * Xe's GuC PC provides a sysfs API for frequency management:
+ *
+ * device/gt#/freq_* *read-only* files:
+ * - freq_act: The actual resolved frequency decided by PCODE.
+ * - freq_cur: The current one requested by GuC PC to the Hardware.
+ * - freq_rpn: The Render Performance (RP) N level, which is the minimal one.
+ * - freq_rpe: The Render Performance (RP) E level, which is the efficient one.
+ * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * device/gt#/freq_* *read-write* files:
+ * - freq_min: GuC PC min request.
+ * - freq_max: GuC PC max request.
+ *             If max <= min, then freq_min becomes a fixed frequency request.
+ *
+ * Render-C States:
+ * ================
+ *
+ * Render-C states is also a GuC PC feature that is now enabled in Xe for
+ * all platforms.
+ * Xe's GuC PC provides a sysfs API for Render-C States:
+ *
+ * device/gt#/rc* *read-only* files:
+ * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6)
+ * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec.
+ *                  Prone to overflows.
+ */
+
+static struct xe_guc *
+pc_to_guc(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_guc, pc);
+}
+
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+	struct xe_guc *guc = pc_to_guc(pc);
+	struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+	return gt_to_xe(gt);
+}
+
+static struct xe_gt *
+pc_to_gt(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_gt, uc.guc.pc);
+}
+
+static struct xe_guc_pc *
+dev_to_pc(struct device *dev)
+{
+	return &kobj_to_gt(&dev->kobj)->uc.guc.pc;
+}
+
+static struct iosys_map *
+pc_to_maps(struct xe_guc_pc *pc)
+{
+	return &pc->bo->vmap;
+}
+
+#define slpc_shared_data_read(pc_, field_) \
+	xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_)
+
+#define slpc_shared_data_write(pc_, field_, val_) \
+	xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_, val_)
+
+#define SLPC_EVENT(id, count) \
+	(FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
+	 FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
+
+static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state)
+{
+	xe_device_assert_mem_access(pc_to_xe(pc));
+	return slpc_shared_data_read(pc, header.global_state) == state;
+}
+
+static int pc_action_reset(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_RESET, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_shutdown(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_query_task_state(struct xe_guc_pc *pc)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	/* Blocking here to ensure the results are ready before reading them */
+	ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm,
+			"GuC PC query task state failed: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+
+	if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	u32 action[] = {
+		XE_GUC_ACTION_SETUP_PC_GUCRC,
+		mode,
+	};
+	int ret;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe",
+			ERR_PTR(ret));
+	return ret;
+}
+
+static u32 decode_freq(u32 raw)
+{
+	return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER,
+				 GEN9_FREQ_SCALER);
+}
+
+static u32 pc_get_min_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	/*
+	 * GuC policy is to elevate minimum frequency to the efficient levels
+	 * Our goal is to have the admin choices respected.
+	 */
+	pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+			    freq < pc->rpe_freq);
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static int pc_get_max_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 * Also, overclocking is not supported.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static void pc_update_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	/*
+	 * For PVC we still need to use fused RP1 as the approximation for RPe
+	 * For other platforms than PVC we get the resolved RPe directly from
+	 * PCODE at a different register
+	 */
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+	else
+		reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg);
+
+	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+
+	/*
+	 * RPe is decided at runtime by PCODE. In the rare case where that's
+	 * smaller than the fused min, we will trust the PCODE and use that
+	 * as our minimum one.
+	 */
+	pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+}
+
+static ssize_t freq_act_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct kobject *kobj = &dev->kobj;
+	struct xe_gt *gt = kobj_to_gt(kobj);
+	u32 freq;
+	ssize_t ret;
+
+	/*
+	 * When in RC6, actual frequency is 0. Let's block RC6 so we are able
+	 * to verify that our freq requests are really happening.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
+	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+static DEVICE_ATTR_RO(freq_act);
+
+static ssize_t freq_cur_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct kobject *kobj = &dev->kobj;
+	struct xe_gt *gt = kobj_to_gt(kobj);
+	u32 freq;
+	ssize_t ret;
+
+	/*
+	 * GuC SLPC plays with cur freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
+	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+static DEVICE_ATTR_RO(freq_cur);
+
+static ssize_t freq_rp0_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", pc->rp0_freq);
+}
+static DEVICE_ATTR_RO(freq_rp0);
+
+static ssize_t freq_rpe_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	pc_update_rp_values(pc);
+	return sysfs_emit(buf, "%d\n", pc->rpe_freq);
+}
+static DEVICE_ATTR_RO(freq_rpe);
+
+static ssize_t freq_rpn_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", pc->rpn_freq);
+}
+static DEVICE_ATTR_RO(freq_rpn);
+
+static ssize_t freq_min_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	ssize_t ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * GuC SLPC plays with min freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto fw;
+
+	ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc));
+
+fw:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_min_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_min = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret ?: count;
+}
+static DEVICE_ATTR_RW(freq_min);
+
+static ssize_t freq_max_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	ssize_t ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto out;
+
+	ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc));
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_max_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_max = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret ?: count;
+}
+static DEVICE_ATTR_RW(freq_max);
+
+static ssize_t rc_status_show(struct device *dev,
+			      struct device_attribute *attr, char *buff)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	switch (REG_FIELD_GET(RCN_MASK, reg)) {
+	case GEN6_RC6:
+		return sysfs_emit(buff, "rc6\n");
+	case GEN6_RC0:
+		return sysfs_emit(buff, "rc0\n");
+	default:
+		return -ENOENT;
+	}
+}
+static DEVICE_ATTR_RO(rc_status);
+
+static ssize_t rc6_residency_show(struct device *dev,
+				  struct device_attribute *attr, char *buff)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+	ssize_t ret;
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg);
+	xe_device_mem_access_put(pc_to_xe(pc));
+
+	ret = sysfs_emit(buff, "%u\n", reg);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+static DEVICE_ATTR_RO(rc6_residency);
+
+static const struct attribute *pc_attrs[] = {
+	&dev_attr_freq_act.attr,
+	&dev_attr_freq_cur.attr,
+	&dev_attr_freq_rp0.attr,
+	&dev_attr_freq_rpe.attr,
+	&dev_attr_freq_rpn.attr,
+	&dev_attr_freq_min.attr,
+	&dev_attr_freq_max.attr,
+	&dev_attr_rc_status.attr,
+	&dev_attr_rc6_residency.attr,
+	NULL
+};
+
+static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+	else
+		reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg);
+	pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		return ret;
+
+	/*
+	 * GuC defaults to some RPmax that is not actually achievable without
+	 * overclocking. Let's adjust it to the Hardware RP0, which is the
+	 * regular maximum
+	 */
+	if (pc_get_max_freq(pc) > pc->rp0_freq)
+		pc_set_max_freq(pc, pc->rp0_freq);
+
+	/*
+	 * Same thing happens for Server platforms where min is listed as
+	 * RPMax
+	 */
+	if (pc_get_min_freq(pc) > pc->rp0_freq)
+		pc_set_min_freq(pc, pc->rp0_freq);
+
+	return 0;
+}
+
+static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
+{
+	int ret = 0;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	if (pc->user_requested_min != 0) {
+		ret = pc_set_min_freq(pc, pc->user_requested_min);
+		if (ret)
+			return ret;
+	}
+
+	if (pc->user_requested_max != 0) {
+		ret = pc_set_max_freq(pc, pc->user_requested_max);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int pc_gucrc_disable(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
+	if (ret)
+		return ret;
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0);
+	xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return 0;
+}
+
+static void pc_init_pcode_freq(struct xe_guc_pc *pc)
+{
+	u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER);
+	u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER);
+
+	XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max));
+}
+
+static int pc_init_freqs(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	mutex_lock(&pc->freq_lock);
+
+	ret = pc_adjust_freq_bounds(pc);
+	if (ret)
+		goto out;
+
+	ret = pc_adjust_requested_freq(pc);
+	if (ret)
+		goto out;
+
+	pc_update_rp_values(pc);
+
+	pc_init_pcode_freq(pc);
+
+	/*
+	 * The frequencies are really ready for use only after the user
+	 * requested ones got restored.
+	 */
+	pc->freq_ready = true;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	return ret;
+}
+
+/**
+ * xe_guc_pc_start - Start GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_start(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int ret;
+
+	XE_WARN_ON(!xe_device_guc_submission_enabled(xe));
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	memset(pc->bo->vmap.vaddr, 0, size);
+	slpc_shared_data_write(pc, header.size, size);
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		return ret;
+
+	ret = pc_action_reset(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = pc_init_freqs(pc);
+	if (ret)
+		goto out;
+
+	if (xe->info.platform == XE_PVC) {
+		pc_gucrc_disable(pc);
+		ret = 0;
+		goto out;
+	}
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL);
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_stop - Stop GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_stop(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	ret = pc_gucrc_disable(pc);
+	if (ret)
+		goto out;
+
+	mutex_lock(&pc->freq_lock);
+	pc->freq_ready = false;
+	mutex_unlock(&pc->freq_lock);
+
+	ret = pc_action_shutdown(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
+		ret = -EIO;
+	}
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static void pc_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_pc *pc = arg;
+
+	XE_WARN_ON(xe_guc_pc_stop(pc));
+	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
+	xe_bo_unpin_map_no_vm(pc->bo);
+}
+
+/**
+ * xe_guc_pc_init - Initialize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_init(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int err;
+
+	mutex_init(&pc->freq_lock);
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, size,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	pc->bo = bo;
+
+	pc_init_fused_rp_values(pc);
+
+	err = sysfs_create_files(gt->sysfs, pc_attrs);
+	if (err)
+		return err;
+
+	err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
new file mode 100644
index 0000000000000..da29e49348688
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_H_
+#define _XE_GUC_PC_H_
+
+#include "xe_guc_pc_types.h"
+
+int xe_guc_pc_init(struct xe_guc_pc *pc);
+int xe_guc_pc_start(struct xe_guc_pc *pc);
+int xe_guc_pc_stop(struct xe_guc_pc *pc);
+
+#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
new file mode 100644
index 0000000000000..39548e03acf4f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_TYPES_H_
+#define _XE_GUC_PC_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+/**
+ * struct xe_guc_pc - GuC Power Conservation (PC)
+ */
+struct xe_guc_pc {
+	/** @bo: GGTT buffer object that is shared with GuC PC */
+	struct xe_bo *bo;
+	/** @rp0_freq: HW RP0 frequency - The Maximum one */
+	u32 rp0_freq;
+	/** @rpe_freq: HW RPe frequency - The Efficient one */
+	u32 rpe_freq;
+	/** @rpn_freq: HW RPN frequency - The Minimum one */
+	u32 rpn_freq;
+	/** @user_requested_min: Stash the minimum requested freq by user */
+	u32 user_requested_min;
+	/** @user_requested_max: Stash the maximum requested freq by user */
+	u32 user_requested_max;
+	/** @freq_lock: Let's protect the frequencies */
+	struct mutex freq_lock;
+	/** @freq_ready: Only handle freq changes, if they are really ready */
+	bool freq_ready;
+};
+
+#endif	/* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h
new file mode 100644
index 0000000000000..1e16a9b76ddc1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_reg.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_REG_H_
+#define _XE_GUC_REG_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define GUC_STATUS			_MMIO(0xc000)
+#define   GS_RESET_SHIFT		0
+#define   GS_MIA_IN_RESET		  (0x01 << GS_RESET_SHIFT)
+#define   GS_BOOTROM_SHIFT		1
+#define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
+#define   GS_UKERNEL_SHIFT		8
+#define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
+#define   GS_MIA_SHIFT			16
+#define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
+#define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
+#define   GS_MIA_HALT_REQUESTED		  (0x02 << GS_MIA_SHIFT)
+#define   GS_MIA_ISR_ENTRY		  (0x04 << GS_MIA_SHIFT)
+#define   GS_AUTH_STATUS_SHIFT		30
+#define   GS_AUTH_STATUS_MASK		  (0x03 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_BAD		  (0x01 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_GOOD		  (0x02 << GS_AUTH_STATUS_SHIFT)
+
+#define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT		16
+
+#define GEN11_SOFT_SCRATCH(n)		_MMIO(0x190240 + (n) * 4)
+#define GEN11_SOFT_SCRATCH_COUNT	4
+
+#define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT		64
+
+#define DMA_ADDR_0_LOW			_MMIO(0xc300)
+#define DMA_ADDR_0_HIGH			_MMIO(0xc304)
+#define DMA_ADDR_1_LOW			_MMIO(0xc308)
+#define DMA_ADDR_1_HIGH			_MMIO(0xc30c)
+#define   DMA_ADDRESS_SPACE_WOPCM	  (7 << 16)
+#define   DMA_ADDRESS_SPACE_GTT		  (8 << 16)
+#define DMA_COPY_SIZE			_MMIO(0xc310)
+#define DMA_CTRL			_MMIO(0xc314)
+#define   HUC_UKERNEL			  (1<<9)
+#define   UOS_MOVE			  (1<<4)
+#define   START_DMA			  (1<<0)
+#define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
+#define   GUC_WOPCM_OFFSET_VALID	  (1<<0)
+#define   HUC_LOADING_AGENT_VCR		  (0<<1)
+#define   HUC_LOADING_AGENT_GUC		  (1<<1)
+#define   GUC_WOPCM_OFFSET_SHIFT	14
+#define   GUC_WOPCM_OFFSET_MASK		  (0x3ffff << GUC_WOPCM_OFFSET_SHIFT)
+#define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
+
+#define HUC_STATUS2             _MMIO(0xD3B0)
+#define   HUC_FW_VERIFIED       (1<<7)
+
+#define GEN11_HUC_KERNEL_LOAD_INFO	_MMIO(0xC1DC)
+#define   HUC_LOAD_SUCCESSFUL		  (1 << 0)
+
+#define GUC_WOPCM_SIZE			_MMIO(0xc050)
+#define   GUC_WOPCM_SIZE_LOCKED		  (1<<0)
+#define   GUC_WOPCM_SIZE_SHIFT		12
+#define   GUC_WOPCM_SIZE_MASK		  (0xfffff << GUC_WOPCM_SIZE_SHIFT)
+
+#define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9_GT_PM_CONFIG		_MMIO(0x13816c)
+#define   GT_DOORBELL_ENABLE		  (1<<0)
+
+#define GEN8_GTCR			_MMIO(0x4274)
+#define   GEN8_GTCR_INVALIDATE		  (1<<0)
+
+#define GEN12_GUC_TLB_INV_CR		_MMIO(0xcee8)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE	(1 << 0)
+
+#define GUC_ARAT_C6DIS			_MMIO(0xA178)
+
+#define GUC_SHIM_CONTROL		_MMIO(0xc064)
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	(1<<0)
+#define   GUC_ENABLE_READ_CACHE_LOGIC		(1<<1)
+#define   GUC_ENABLE_MIA_CACHING		(1<<2)
+#define   GUC_GEN10_MSGCH_ENABLE		(1<<4)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	(1<<9)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	(1<<10)
+#define   GUC_ENABLE_MIA_CLOCK_GATING		(1<<15)
+#define   GUC_GEN10_SHIM_WC_ENABLE		(1<<21)
+
+#define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
+#define   GUC_SEND_TRIGGER		  (1<<0)
+#define GEN11_GUC_HOST_INTERRUPT	_MMIO(0x1901f0)
+
+#define GUC_NUM_DOORBELLS		256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+	u32 db_status;
+#define GUC_DOORBELL_DISABLED		0
+#define GUC_DOORBELL_ENABLED		1
+
+	u32 cookie;
+	u32 reserved[14];
+} __packed;
+
+#define GEN8_DRBREGL(x)			_MMIO(0x1000 + (x) * 8)
+#define   GEN8_DRB_VALID		  (1<<0)
+#define GEN8_DRBREGU(x)			_MMIO(0x1000 + (x) * 8 + 4)
+
+#define GEN12_DIST_DBS_POPULATED		_MMIO(0xd08)
+#define   GEN12_DOORBELLS_PER_SQIDI_SHIFT	16
+#define   GEN12_DOORBELLS_PER_SQIDI		(0xff)
+#define   GEN12_SQIDIS_DOORBELL_EXIST		(0xffff)
+
+#define DE_GUCRMR			_MMIO(0x44054)
+
+#define GUC_BCS_RCS_IER			_MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER		_MMIO(0xC554)
+#define GUC_WD_VECS_IER			_MMIO(0xC558)
+#define GUC_PM_P24C_IER			_MMIO(0xC55C)
+
+/* GuC Interrupt Vector */
+#define GUC_INTR_GUC2HOST		BIT(15)
+#define GUC_INTR_EXEC_ERROR		BIT(14)
+#define GUC_INTR_DISPLAY_EVENT		BIT(13)
+#define GUC_INTR_SEM_SIG		BIT(12)
+#define GUC_INTR_IOMMU2GUC		BIT(11)
+#define GUC_INTR_DOORBELL_RANG		BIT(10)
+#define GUC_INTR_DMA_DONE		BIT(9)
+#define GUC_INTR_FATAL_ERROR		BIT(8)
+#define GUC_INTR_NOTIF_ERROR		BIT(7)
+#define GUC_INTR_SW_INT_6		BIT(6)
+#define GUC_INTR_SW_INT_5		BIT(5)
+#define GUC_INTR_SW_INT_4		BIT(4)
+#define GUC_INTR_SW_INT_3		BIT(3)
+#define GUC_INTR_SW_INT_2		BIT(2)
+#define GUC_INTR_SW_INT_1		BIT(1)
+#define GUC_INTR_SW_INT_0		BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
new file mode 100644
index 0000000000000..e0d424c2b78c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -0,0 +1,1695 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/dma-fence-array.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_engine_types.h"
+#include "xe_guc_submit.h"
+#include "xe_gt.h"
+#include "xe_force_wake.h"
+#include "xe_gpu_scheduler.h"
+#include "xe_hw_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+#include "gt/intel_lrc_reg.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+static struct xe_guc *
+engine_to_guc(struct xe_engine *e)
+{
+	return &e->gt->uc.guc;
+}
+
+/*
+ * Helpers for engine state, using an atomic as some of the bits can transition
+ * as the same time (e.g. a suspend can be happning at the same time as schedule
+ * engine done being processed).
+ */
+#define ENGINE_STATE_REGISTERED		(1 << 0)
+#define ENGINE_STATE_ENABLED		(1 << 1)
+#define ENGINE_STATE_PENDING_ENABLE	(1 << 2)
+#define ENGINE_STATE_PENDING_DISABLE	(1 << 3)
+#define ENGINE_STATE_DESTROYED		(1 << 4)
+#define ENGINE_STATE_SUSPENDED		(1 << 5)
+#define ENGINE_STATE_RESET		(1 << 6)
+#define ENGINE_STATE_KILLED		(1 << 7)
+
+static bool engine_registered(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED;
+}
+
+static void set_engine_registered(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state);
+}
+
+static void clear_engine_registered(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state);
+}
+
+static bool engine_enabled(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED;
+}
+
+static void set_engine_enabled(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_ENABLED, &e->guc->state);
+}
+
+static void clear_engine_enabled(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state);
+}
+
+static bool engine_pending_enable(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE;
+}
+
+static void set_engine_pending_enable(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+}
+
+static void clear_engine_pending_enable(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+}
+
+static bool engine_pending_disable(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE;
+}
+
+static void set_engine_pending_disable(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+}
+
+static void clear_engine_pending_disable(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+}
+
+static bool engine_destroyed(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED;
+}
+
+static void set_engine_destroyed(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state);
+}
+
+static bool engine_banned(struct xe_engine *e)
+{
+	return (e->flags & ENGINE_FLAG_BANNED);
+}
+
+static void set_engine_banned(struct xe_engine *e)
+{
+	e->flags |= ENGINE_FLAG_BANNED;
+}
+
+static bool engine_suspended(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED;
+}
+
+static void set_engine_suspended(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state);
+}
+
+static void clear_engine_suspended(struct xe_engine *e)
+{
+	atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state);
+}
+
+static bool engine_reset(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_RESET;
+}
+
+static void set_engine_reset(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_RESET, &e->guc->state);
+}
+
+static bool engine_killed(struct xe_engine *e)
+{
+	return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED;
+}
+
+static void set_engine_killed(struct xe_engine *e)
+{
+	atomic_or(ENGINE_STATE_KILLED, &e->guc->state);
+}
+
+static bool engine_killed_or_banned(struct xe_engine *e)
+{
+	return engine_killed(e) || engine_banned(e);
+}
+
+static void guc_submit_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xa_destroy(&guc->submission_state.engine_lookup);
+	ida_destroy(&guc->submission_state.guc_ids);
+	bitmap_free(guc->submission_state.guc_ids_bitmap);
+}
+
+#define GUC_ID_MAX		65535
+#define GUC_ID_NUMBER_MLRC	4096
+#define GUC_ID_NUMBER_SLRC	(GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
+#define GUC_ID_START_MLRC	GUC_ID_NUMBER_SLRC
+
+static const struct xe_engine_ops guc_engine_ops;
+
+static void primelockdep(struct xe_guc *guc)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+
+	mutex_lock(&guc->submission_state.lock);
+	might_lock(&guc->submission_state.suspend.lock);
+	mutex_unlock(&guc->submission_state.lock);
+
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_submit_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int err;
+
+	guc->submission_state.guc_ids_bitmap =
+		bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
+	if (!guc->submission_state.guc_ids_bitmap)
+		return -ENOMEM;
+
+	gt->engine_ops = &guc_engine_ops;
+
+	mutex_init(&guc->submission_state.lock);
+	xa_init(&guc->submission_state.engine_lookup);
+	ida_init(&guc->submission_state.guc_ids);
+
+	spin_lock_init(&guc->submission_state.suspend.lock);
+	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
+
+	primelockdep(guc);
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
+{
+	int ret;
+	void *ptr;
+
+	/*
+	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
+	 * worse case user gets -ENOMEM on engine create and has to try again.
+	 *
+	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
+	 * failure.
+	 */
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	if (xe_engine_is_parallel(e)) {
+		void *bitmap = guc->submission_state.guc_ids_bitmap;
+
+		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
+					      order_base_2(e->width));
+	} else {
+		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
+				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+	}
+	if (ret < 0)
+		return ret;
+
+	e->guc->id = ret;
+	if (xe_engine_is_parallel(e))
+		e->guc->id += GUC_ID_START_MLRC;
+
+	ptr = xa_store(&guc->submission_state.engine_lookup,
+		       e->guc->id, e, GFP_NOWAIT);
+	if (IS_ERR(ptr)) {
+		ret = PTR_ERR(ptr);
+		goto err_release;
+	}
+
+	return 0;
+
+err_release:
+	ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+	return ret;
+}
+
+static void release_guc_id(struct xe_guc *guc, struct xe_engine *e)
+{
+	mutex_lock(&guc->submission_state.lock);
+	xa_erase(&guc->submission_state.engine_lookup, e->guc->id);
+	if (xe_engine_is_parallel(e))
+		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+				      e->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(e->width));
+	else
+		ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+	mutex_unlock(&guc->submission_state.lock);
+}
+
+struct engine_policy {
+	u32 count;
+	struct guc_update_engine_policy h2g;
+};
+
+static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
+{
+	size_t bytes = sizeof(policy->h2g.header) +
+		       (sizeof(policy->h2g.klv[0]) * policy->count);
+
+	return bytes / sizeof(u32);
+}
+
+static void __guc_engine_policy_start_klv(struct engine_policy *policy,
+					  u16 guc_id)
+{
+	policy->h2g.header.action =
+		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
+	policy->h2g.header.guc_id = guc_id;
+	policy->count = 0;
+}
+
+#define MAKE_ENGINE_POLICY_ADD(func, id) \
+static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
+					   u32 data) \
+{ \
+	XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+ \
+	policy->h2g.klv[policy->count].kl = \
+		FIELD_PREP(GUC_KLV_0_KEY, \
+			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+		FIELD_PREP(GUC_KLV_0_LEN, 1); \
+	policy->h2g.klv[policy->count].value = data; \
+	policy->count++; \
+}
+
+MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+#undef MAKE_ENGINE_POLICY_ADD
+
+static const int xe_engine_prio_to_guc[] = {
+	[XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
+	[XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
+	[XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
+	[XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
+};
+
+static void init_policies(struct xe_guc *guc, struct xe_engine *e)
+{
+        struct engine_policy policy;
+	enum xe_engine_priority prio = e->priority;
+	u32 timeslice_us = e->sched_props.timeslice_us;
+	u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
+
+	XE_BUG_ON(!engine_registered(e));
+
+        __guc_engine_policy_start_klv(&policy, e->guc->id);
+        __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
+        __guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
+        __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_engine_policy_action_size(&policy), 0, 0);
+}
+
+static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
+{
+	struct engine_policy policy;
+
+        __guc_engine_policy_start_klv(&policy, e->guc->id);
+        __guc_engine_policy_add_preemption_timeout(&policy, 1);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_engine_policy_action_size(&policy), 0, 0);
+}
+
+#define PARALLEL_SCRATCH_SIZE	2048
+#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES		64
+
+struct sync_semaphore {
+	u32 semaphore;
+	u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+struct parallel_scratch {
+	struct guc_sched_wq_desc wq_desc;
+
+	struct sync_semaphore go;
+	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+		sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+	u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+#define parallel_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
+#define parallel_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
+
+static void __register_mlrc_engine(struct xe_guc *guc,
+				   struct xe_engine *e,
+				   struct guc_ctxt_registration_info *info)
+{
+#define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
+	u32 action[MAX_MLRC_REG_SIZE];
+	int len = 0;
+	int i;
+
+	XE_BUG_ON(!xe_engine_is_parallel(e));
+
+	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+	action[len++] = info->flags;
+	action[len++] = info->context_idx;
+	action[len++] = info->engine_class;
+	action[len++] = info->engine_submit_mask;
+	action[len++] = info->wq_desc_lo;
+	action[len++] = info->wq_desc_hi;
+	action[len++] = info->wq_base_lo;
+	action[len++] = info->wq_base_hi;
+	action[len++] = info->wq_size;
+	action[len++] = e->width;
+	action[len++] = info->hwlrca_lo;
+	action[len++] = info->hwlrca_hi;
+
+	for (i = 1; i < e->width; ++i) {
+		struct xe_lrc *lrc = e->lrc + i;
+
+		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
+		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
+	}
+
+	XE_BUG_ON(len > MAX_MLRC_REG_SIZE);
+#undef MAX_MLRC_REG_SIZE
+
+	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+}
+
+static void __register_engine(struct xe_guc *guc,
+			      struct guc_ctxt_registration_info *info)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_REGISTER_CONTEXT,
+		info->flags,
+		info->context_idx,
+		info->engine_class,
+		info->engine_submit_mask,
+		info->wq_desc_lo,
+		info->wq_desc_hi,
+		info->wq_base_lo,
+		info->wq_base_hi,
+		info->wq_size,
+		info->hwlrca_lo,
+		info->hwlrca_hi,
+	};
+
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void register_engine(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_lrc *lrc = e->lrc;
+	struct guc_ctxt_registration_info info;
+
+	XE_BUG_ON(engine_registered(e));
+
+	memset(&info, 0, sizeof(info));
+	info.context_idx = e->guc->id;
+	info.engine_class = xe_engine_class_to_guc_class(e->class);
+	info.engine_submit_mask = e->logical_mask;
+	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
+	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
+	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+
+	if (xe_engine_is_parallel(e)) {
+		u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+		struct iosys_map map = xe_lrc_parallel_map(lrc);
+
+		info.wq_desc_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq_desc));
+		info.wq_desc_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq_desc));
+		info.wq_base_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq[0]));
+		info.wq_base_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct parallel_scratch, wq[0]));
+		info.wq_size = WQ_SIZE;
+
+		e->guc->wqi_head = 0;
+		e->guc->wqi_tail = 0;
+		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
+		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
+	}
+
+	set_engine_registered(e);
+	trace_xe_engine_register(e);
+	if (xe_engine_is_parallel(e))
+		__register_mlrc_engine(guc, e, &info);
+	else
+		__register_engine(guc, &info);
+	init_policies(guc, e);
+}
+
+static u32 wq_space_until_wrap(struct xe_engine *e)
+{
+	return (WQ_SIZE - e->guc->wqi_tail);
+}
+
+static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	unsigned int sleep_period_ms = 1;
+
+#define AVAILABLE_SPACE \
+	CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE)
+	if (wqi_size > AVAILABLE_SPACE) {
+try_again:
+		e->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
+		if (wqi_size > AVAILABLE_SPACE) {
+			if (sleep_period_ms == 1024) {
+				xe_gt_reset_async(e->gt);
+				return -ENODEV;
+			}
+
+			msleep(sleep_period_ms);
+			sleep_period_ms <<= 1;
+			goto try_again;
+		}
+	}
+#undef AVAILABLE_SPACE
+
+	return 0;
+}
+
+static int wq_noop_append(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1;
+
+	if (wq_wait_for_space(e, wq_space_until_wrap(e)))
+		return -ENODEV;
+
+	XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+	parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
+		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+		       FIELD_PREP(WQ_LEN_MASK, len_dw));
+	e->guc->wqi_tail = 0;
+
+	return 0;
+}
+
+static void wq_item_append(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3];
+	u32 wqi_size = (e->width + 3) * sizeof(u32);
+	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+	int i = 0, j;
+
+	if (wqi_size > wq_space_until_wrap(e)) {
+		if (wq_noop_append(e))
+			return;
+	}
+	if (wq_wait_for_space(e, wqi_size))
+		return;
+
+	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+		FIELD_PREP(WQ_LEN_MASK, len_dw);
+	wqi[i++] = xe_lrc_descriptor(e->lrc);
+	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) |
+		FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64));
+	wqi[i++] = 0;
+	for (j = 1; j < e->width; ++j) {
+		struct xe_lrc *lrc = e->lrc + j;
+
+		wqi[i++] = lrc->ring.tail / sizeof(u64);
+	}
+
+	XE_BUG_ON(i != wqi_size / sizeof(u32));
+
+	iosys_map_incr(&map, offsetof(struct parallel_scratch,
+					wq[e->guc->wqi_tail / sizeof(u32)]));
+	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
+	e->guc->wqi_tail += wqi_size;
+	XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
+
+	xe_device_wmb(xe);
+
+	map = xe_lrc_parallel_map(e->lrc);
+	parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail);
+}
+
+#define RESUME_PENDING	~0x0ull
+static void submit_engine(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_lrc *lrc = e->lrc;
+	u32 action[3];
+	u32 g2h_len = 0;
+	u32 num_g2h = 0;
+	int len = 0;
+	bool extra_submit = false;
+
+	XE_BUG_ON(!engine_registered(e));
+
+	if (xe_engine_is_parallel(e))
+		wq_item_append(e);
+	else
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+
+	if (engine_suspended(e) && !xe_engine_is_parallel(e))
+		return;
+
+	if (!engine_enabled(e) && !engine_suspended(e)) {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+		action[len++] = e->guc->id;
+		action[len++] = GUC_CONTEXT_ENABLE;
+		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
+		num_g2h = 1;
+		if (xe_engine_is_parallel(e))
+			extra_submit = true;
+
+		e->guc->resume_time = RESUME_PENDING;
+		set_engine_pending_enable(e);
+		set_engine_enabled(e);
+		trace_xe_engine_scheduling_enable(e);
+	} else {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = e->guc->id;
+		trace_xe_engine_submit(e);
+	}
+
+	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
+
+	if (extra_submit) {
+		len = 0;
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = e->guc->id;
+		trace_xe_engine_submit(e);
+
+		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+	}
+}
+
+static struct dma_fence *
+guc_engine_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_engine *e = job->engine;
+
+	XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
+		  !engine_banned(e) && !engine_suspended(e));
+
+	trace_xe_sched_job_run(job);
+
+	if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
+		if (!engine_registered(e))
+			register_engine(e);
+		e->ring_ops->emit_job(job);
+		submit_engine(e);
+	}
+
+	if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
+		return job->fence;
+	else
+		return dma_fence_get(job->fence);
+}
+
+static void guc_engine_free_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	trace_xe_sched_job_free(job);
+	xe_sched_job_put(job);
+}
+
+static int guc_read_stopped(struct xe_guc *guc)
+{
+	return atomic_read(&guc->submission_state.stopped);
+}
+
+#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable)			\
+	u32 action[] = {						\
+		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
+		e->guc->id,						\
+		GUC_CONTEXT_##enable_disable,				\
+	}
+
+static void disable_scheduling_deregister(struct xe_guc *guc,
+					  struct xe_engine *e)
+{
+	MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+	int ret;
+
+	set_min_preemption_timeout(guc, e);
+	smp_rmb();
+	ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) ||
+				 guc_read_stopped(guc), HZ * 5);
+	if (!ret) {
+		struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+		XE_WARN_ON("Pending enable failed to respond");
+		xe_sched_submission_start(sched);
+		xe_gt_reset_async(e->gt);
+		xe_sched_tdr_queue_imm(sched);
+		return;
+	}
+
+	clear_engine_enabled(e);
+	set_engine_pending_disable(e);
+	set_engine_destroyed(e);
+	trace_xe_engine_scheduling_disable(e);
+
+	/*
+	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
+	 * handler and we are not allowed to reserved G2H space in handlers.
+	 */
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
+		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
+}
+
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+static void simple_error_capture(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct drm_printer p = drm_err_printer("");
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = e->logical_mask;
+	u32 width_mask = (0x1 << e->width) - 1;
+	int i;
+	bool cookie;
+
+	if (e->vm && !e->vm->error_capture.capture_once) {
+		e->vm->error_capture.capture_once = true;
+		cookie = dma_fence_begin_signalling();
+		for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+			if (adj_logical_mask & BIT(i)) {
+				adj_logical_mask |= width_mask << i;
+				i += e->width;
+			} else {
+				++i;
+			}
+		}
+
+		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		xe_guc_ct_print(&guc->ct, &p);
+		guc_engine_print(e, &p);
+		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
+			if (hwe->class != e->hwe->class ||
+			    !(BIT(hwe->logical_instance) & adj_logical_mask))
+				continue;
+			xe_hw_engine_print_state(hwe, &p);
+		}
+		xe_analyze_vm(&p, e->vm, e->gt->info.id);
+		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		dma_fence_end_signalling(cookie);
+	}
+}
+#else
+static void simple_error_capture(struct xe_engine *e)
+{
+}
+#endif
+
+static enum drm_gpu_sched_stat
+guc_engine_timedout_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_sched_job *tmp_job;
+	struct xe_engine *e = job->engine;
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_device *xe = guc_to_xe(engine_to_guc(e));
+	int err = -ETIME;
+	int i = 0;
+
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+		XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
+		XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e));
+
+		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+			   xe_sched_job_seqno(job), e->guc->id, e->flags);
+		simple_error_capture(e);
+	} else {
+		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
+			 xe_sched_job_seqno(job), e->guc->id, e->flags);
+	}
+	trace_xe_sched_job_timedout(job);
+
+	/* Kill the run_job entry point */
+	xe_sched_submission_stop(sched);
+
+	/*
+	 * Kernel jobs should never fail, nor should VM jobs if they do
+	 * somethings has gone wrong and the GT needs a reset
+	 */
+	if (e->flags & ENGINE_FLAG_KERNEL ||
+	    (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) {
+		if (!xe_sched_invalidate_job(job, 2)) {
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(e->gt);
+			goto out;
+		}
+	}
+
+	/* Engine state now stable, disable scheduling if needed */
+	if (engine_enabled(e)) {
+		struct xe_guc *guc = engine_to_guc(e);
+		int ret;
+
+		if (engine_reset(e))
+			err = -EIO;
+		set_engine_banned(e);
+		xe_engine_get(e);
+		disable_scheduling_deregister(engine_to_guc(e), e);
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 *
+		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
+		 * error) messages which can cause the schedule disable to get
+		 * lost. If this occurs, trigger a GT reset to recover.
+		 */
+		smp_rmb();
+		ret = wait_event_timeout(guc->ct.wq,
+					 !engine_pending_disable(e) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret) {
+			XE_WARN_ON("Schedule disable failed to respond");
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(e->gt);
+			xe_sched_tdr_queue_imm(sched);
+			goto out;
+		}
+	}
+
+	/* Stop fence signaling */
+	xe_hw_fence_irq_stop(e->fence_irq);
+
+	/*
+	 * Fence state now stable, stop / start scheduler which cleans up any
+	 * fences that are complete
+	 */
+	xe_sched_add_pending_job(sched, job);
+	xe_sched_submission_start(sched);
+	xe_sched_tdr_queue_imm(&e->guc->sched);
+
+	/* Mark all outstanding jobs as bad, thus completing them */
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
+		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
+	spin_unlock(&sched->base.job_list_lock);
+
+	/* Start fence signaling */
+	xe_hw_fence_irq_start(e->fence_irq);
+
+out:
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void __guc_engine_fini_async(struct work_struct *w)
+{
+	struct xe_guc_engine *ge =
+		container_of(w, struct xe_guc_engine, fini_async);
+	struct xe_engine *e = ge->engine;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	trace_xe_engine_destroy(e);
+
+	if (e->flags & ENGINE_FLAG_PERSISTENT)
+		xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+	release_guc_id(guc, e);
+	xe_sched_entity_fini(&ge->entity);
+	xe_sched_fini(&ge->sched);
+
+	if (!(e->flags & ENGINE_FLAG_KERNEL)) {
+		kfree(ge);
+		xe_engine_fini(e);
+	}
+}
+
+static void guc_engine_fini_async(struct xe_engine *e)
+{
+	bool kernel = e->flags & ENGINE_FLAG_KERNEL;
+
+	INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
+	queue_work(system_unbound_wq, &e->guc->fini_async);
+
+	/* We must block on kernel engines so slabs are empty on driver unload */
+	if (kernel) {
+		struct xe_guc_engine *ge = e->guc;
+
+		flush_work(&ge->fini_async);
+		kfree(ge);
+		xe_engine_fini(e);
+	}
+}
+
+static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
+{
+	/*
+	 * Might be done from within the GPU scheduler, need to do async as we
+	 * fini the scheduler when the engine is fini'd, the scheduler can't
+	 * complete fini within itself (circular dependency). Async resolves
+	 * this we and don't really care when everything is fini'd, just that it
+	 * is.
+	 */
+	guc_engine_fini_async(e);
+}
+
+static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL);
+	trace_xe_engine_cleanup_entity(e);
+
+	if (engine_registered(e))
+		disable_scheduling_deregister(guc, e);
+	else
+		__guc_engine_fini(guc, e);
+}
+
+static bool guc_engine_allowed_to_change_state(struct xe_engine *e)
+{
+	return !engine_killed_or_banned(e) && engine_registered(e);
+}
+
+static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (guc_engine_allowed_to_change_state(e))
+		init_policies(guc, e);
+	kfree(msg);
+}
+
+static void suspend_fence_signal(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+
+	XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) &&
+		  !guc_read_stopped(guc));
+	XE_BUG_ON(!e->guc->suspend_pending);
+
+	e->guc->suspend_pending = false;
+	smp_wmb();
+	wake_up(&e->guc->suspend_wait);
+}
+
+static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) &&
+	    engine_enabled(e)) {
+		wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING ||
+			   guc_read_stopped(guc));
+
+		if (!guc_read_stopped(guc)) {
+			MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+			s64 since_resume_ms =
+				ktime_ms_delta(ktime_get(),
+					       e->guc->resume_time);
+			s64 wait_ms = e->vm->preempt.min_run_period_ms -
+				since_resume_ms;
+
+			if (wait_ms > 0 && e->guc->resume_time)
+				msleep(wait_ms);
+
+			set_engine_suspended(e);
+			clear_engine_enabled(e);
+			set_engine_pending_disable(e);
+			trace_xe_engine_scheduling_disable(e);
+
+			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+		}
+	} else if (e->guc->suspend_pending) {
+		set_engine_suspended(e);
+		suspend_fence_signal(e);
+	}
+}
+
+static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
+{
+	struct xe_engine *e = msg->private_data;
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (guc_engine_allowed_to_change_state(e)) {
+		MAKE_SCHED_CONTEXT_ACTION(e, ENABLE);
+
+		e->guc->resume_time = RESUME_PENDING;
+		clear_engine_suspended(e);
+		set_engine_pending_enable(e);
+		set_engine_enabled(e);
+		trace_xe_engine_scheduling_enable(e);
+
+		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+	} else {
+		clear_engine_suspended(e);
+	}
+}
+
+#define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
+#define SET_SCHED_PROPS	2
+#define SUSPEND		3
+#define RESUME		4
+
+static void guc_engine_process_msg(struct xe_sched_msg *msg)
+{
+	trace_xe_sched_msg_recv(msg);
+
+	switch (msg->opcode) {
+	case CLEANUP:
+		__guc_engine_process_msg_cleanup(msg);
+		break;
+	case SET_SCHED_PROPS:
+		__guc_engine_process_msg_set_sched_props(msg);
+		break;
+	case SUSPEND:
+		__guc_engine_process_msg_suspend(msg);
+		break;
+	case RESUME:
+		__guc_engine_process_msg_resume(msg);
+		break;
+	default:
+		XE_BUG_ON("Unknown message type");
+	}
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = guc_engine_run_job,
+	.free_job = guc_engine_free_job,
+	.timedout_job = guc_engine_timedout_job,
+};
+
+static const struct xe_sched_backend_ops xe_sched_ops = {
+	.process_msg = guc_engine_process_msg,
+};
+
+static int guc_engine_init(struct xe_engine *e)
+{
+	struct xe_gpu_scheduler *sched;
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc_engine *ge;
+	long timeout;
+	int err;
+
+	XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
+
+	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
+	if (!ge)
+		return -ENOMEM;
+
+	e->guc = ge;
+	ge->engine = e;
+	init_waitqueue_head(&ge->suspend_wait);
+
+	timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
+	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
+			     e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
+			     e->name, gt_to_xe(e->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &ge->sched;
+	err = xe_sched_entity_init(&ge->entity, sched);
+	if (err)
+		goto err_sched;
+	e->priority = XE_ENGINE_PRIORITY_NORMAL;
+
+	mutex_lock(&guc->submission_state.lock);
+
+	err = alloc_guc_id(guc, e);
+	if (err)
+		goto err_entity;
+
+	e->entity = &ge->entity;
+
+	if (guc_read_stopped(guc))
+		xe_sched_stop(sched);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	switch (e->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(e->name, "rcs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(e->name, "vcs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(e->name, "vecs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(e->name, "bcs%d", e->guc->id);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(e->name, "ccs%d", e->guc->id);
+		break;
+	default:
+		XE_WARN_ON(e->class);
+	}
+
+	trace_xe_engine_create(e);
+
+	return 0;
+
+err_entity:
+	xe_sched_entity_fini(&ge->entity);
+err_sched:
+	xe_sched_fini(&ge->sched);
+err_free:
+	kfree(ge);
+
+	return err;
+}
+
+static void guc_engine_kill(struct xe_engine *e)
+{
+	trace_xe_engine_kill(e);
+	set_engine_killed(e);
+	xe_sched_tdr_queue_imm(&e->guc->sched);
+}
+
+static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
+			       u32 opcode)
+{
+	INIT_LIST_HEAD(&msg->link);
+	msg->opcode = opcode;
+	msg->private_data = e;
+
+	trace_xe_sched_msg_add(msg);
+	xe_sched_add_msg(&e->guc->sched, msg);
+}
+
+#define STATIC_MSG_CLEANUP	0
+#define STATIC_MSG_SUSPEND	1
+#define STATIC_MSG_RESUME	2
+static void guc_engine_fini(struct xe_engine *e)
+{
+	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP;
+
+	if (!(e->flags & ENGINE_FLAG_KERNEL))
+		guc_engine_add_msg(e, msg, CLEANUP);
+	else
+		__guc_engine_fini(engine_to_guc(e), e);
+}
+
+static int guc_engine_set_priority(struct xe_engine *e,
+				   enum xe_engine_priority priority)
+{
+	struct xe_sched_msg *msg;
+
+	if (e->priority == priority || engine_killed_or_banned(e))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+	e->priority = priority;
+
+	return 0;
+}
+
+static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (e->sched_props.timeslice_us == timeslice_us ||
+	    engine_killed_or_banned(e))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	e->sched_props.timeslice_us = timeslice_us;
+	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_engine_set_preempt_timeout(struct xe_engine *e,
+					  u32 preempt_timeout_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (e->sched_props.preempt_timeout_us == preempt_timeout_us ||
+	    engine_killed_or_banned(e))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	e->sched_props.preempt_timeout_us = preempt_timeout_us;
+	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+	XE_BUG_ON(engine_registered(e));
+	XE_BUG_ON(engine_banned(e));
+	XE_BUG_ON(engine_killed(e));
+
+	sched->base.timeout = job_timeout_ms;
+
+	return 0;
+}
+
+static int guc_engine_suspend(struct xe_engine *e)
+{
+	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND;
+
+	if (engine_killed_or_banned(e) || e->guc->suspend_pending)
+		return -EINVAL;
+
+	e->guc->suspend_pending = true;
+	guc_engine_add_msg(e, msg, SUSPEND);
+
+	return 0;
+}
+
+static void guc_engine_suspend_wait(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+
+	wait_event(e->guc->suspend_wait, !e->guc->suspend_pending ||
+		   guc_read_stopped(guc));
+}
+
+static void guc_engine_resume(struct xe_engine *e)
+{
+	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
+
+	XE_BUG_ON(e->guc->suspend_pending);
+
+	xe_mocs_init_engine(e);
+	guc_engine_add_msg(e, msg, RESUME);
+}
+
+/*
+ * All of these functions are an abstraction layer which other parts of XE can
+ * use to trap into the GuC backend. All of these functions, aside from init,
+ * really shouldn't do much other than trap into the DRM scheduler which
+ * synchronizes these operations.
+ */
+static const struct xe_engine_ops guc_engine_ops = {
+	.init = guc_engine_init,
+	.kill = guc_engine_kill,
+	.fini = guc_engine_fini,
+	.set_priority = guc_engine_set_priority,
+	.set_timeslice = guc_engine_set_timeslice,
+	.set_preempt_timeout = guc_engine_set_preempt_timeout,
+	.set_job_timeout = guc_engine_set_job_timeout,
+	.suspend = guc_engine_suspend,
+	.suspend_wait = guc_engine_suspend_wait,
+	.resume = guc_engine_resume,
+};
+
+static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+	/* Stop scheduling + flush any DRM scheduler operations */
+	xe_sched_submission_stop(sched);
+
+	/* Clean up lost G2H + reset engine state */
+	if (engine_destroyed(e) && engine_registered(e)) {
+		if (engine_banned(e))
+			xe_engine_put(e);
+		else
+			__guc_engine_fini(guc, e);
+	}
+	if (e->guc->suspend_pending) {
+		set_engine_suspended(e);
+		suspend_fence_signal(e);
+	}
+	atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+		   &e->guc->state);
+	e->guc->resume_time = 0;
+	trace_xe_engine_stop(e);
+
+	/*
+	 * Ban any engine (aside from kernel and engines used for VM ops) with a
+	 * started but not complete job or if a job has gone through a GT reset
+	 * more than twice.
+	 */
+	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
+		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
+
+		if (job) {
+			if ((xe_sched_job_started(job) &&
+			    !xe_sched_job_completed(job)) ||
+			    xe_sched_invalidate_job(job, 2)) {
+				trace_xe_sched_job_ban(job);
+				xe_sched_tdr_queue_imm(&e->guc->sched);
+				set_engine_banned(e);
+			}
+		}
+	}
+}
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+{
+	int ret;
+
+	/*
+	 * Using an atomic here rather than submission_state.lock as this
+	 * function can be called while holding the CT lock (engine reset
+	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
+	 * Atomic is not ideal, but it works to prevent against concurrent reset
+	 * and releasing any TDRs waiting on guc->submission_state.stopped.
+	 */
+	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return ret;
+}
+
+void xe_guc_submit_reset_wait(struct xe_guc *guc)
+{
+	wait_event(guc->ct.wq, !guc_read_stopped(guc));
+}
+
+int xe_guc_submit_stop(struct xe_guc *guc)
+{
+	struct xe_engine *e;
+	unsigned long index;
+
+	XE_BUG_ON(guc_read_stopped(guc) != 1);
+
+	mutex_lock(&guc->submission_state.lock);
+
+	xa_for_each(&guc->submission_state.engine_lookup, index, e)
+		guc_engine_stop(guc, e);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	/*
+	 * No one can enter the backend at this point, aside from new engine
+	 * creation which is protected by guc->submission_state.lock.
+	 */
+
+	return 0;
+}
+
+static void guc_engine_start(struct xe_engine *e)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+	if (!engine_killed_or_banned(e)) {
+		int i;
+
+		trace_xe_engine_resubmit(e);
+		for (i = 0; i < e->width; ++i)
+			xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail);
+		xe_sched_resubmit_jobs(sched);
+	}
+
+	xe_sched_submission_start(sched);
+}
+
+int xe_guc_submit_start(struct xe_guc *guc)
+{
+	struct xe_engine *e;
+	unsigned long index;
+
+	XE_BUG_ON(guc_read_stopped(guc) != 1);
+
+	mutex_lock(&guc->submission_state.lock);
+	atomic_dec(&guc->submission_state.stopped);
+	xa_for_each(&guc->submission_state.engine_lookup, index, e)
+		guc_engine_start(e);
+	mutex_unlock(&guc->submission_state.lock);
+
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
+
+static struct xe_engine *
+g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+
+	if (unlikely(guc_id >= GUC_ID_MAX)) {
+		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+		return NULL;
+	}
+
+	e = xa_load(&guc->submission_state.engine_lookup, guc_id);
+	if (unlikely(!e)) {
+		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+		return NULL;
+	}
+
+	XE_BUG_ON(e->guc->id != guc_id);
+
+	return e;
+}
+
+static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_DEREGISTER_CONTEXT,
+		e->guc->id,
+	};
+
+	trace_xe_engine_deregister(e);
+
+	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 2)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	if (unlikely(!engine_pending_enable(e) &&
+		     !engine_pending_disable(e))) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&e->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_engine_scheduling_done(e);
+
+	if (engine_pending_enable(e)) {
+		e->guc->resume_time = ktime_get();
+		clear_engine_pending_enable(e);
+		smp_wmb();
+		wake_up_all(&guc->ct.wq);
+	} else {
+		clear_engine_pending_disable(e);
+		if (e->guc->suspend_pending) {
+			suspend_fence_signal(e);
+		} else {
+			if (engine_banned(e)) {
+				smp_wmb();
+				wake_up_all(&guc->ct.wq);
+			}
+			deregister_engine(guc, e);
+		}
+	}
+
+	return 0;
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	if (!engine_destroyed(e) || engine_pending_disable(e) ||
+	    engine_pending_enable(e) || engine_enabled(e)) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&e->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_engine_deregister_done(e);
+
+	clear_engine_registered(e);
+	if (engine_banned(e))
+		xe_engine_put(e);
+	else
+		__guc_engine_fini(guc, e);
+
+	return 0;
+}
+
+int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
+
+	/* FIXME: Do error capture, most likely async */
+
+	trace_xe_engine_reset(e);
+
+	/*
+	 * A banned engine is a NOP at this point (came from
+	 * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel
+	 * jobs by setting timeout of the job to the minimum value kicking
+	 * guc_engine_timedout_job.
+	 */
+	set_engine_reset(e);
+	if (!engine_banned(e))
+		xe_sched_tdr_queue_imm(&e->guc->sched);
+
+	return 0;
+}
+
+int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					   u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_engine *e;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	e = g2h_engine_lookup(guc, guc_id);
+	if (unlikely(!e))
+		return -EPROTO;
+
+	drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+	trace_xe_engine_memory_cat_error(e);
+
+	/* Treat the same as engine reset */
+	set_engine_reset(e);
+	if (!engine_banned(e))
+		xe_sched_tdr_queue_imm(&e->guc->sched);
+
+	return 0;
+}
+
+int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u8 guc_class, instance;
+	u32 reason;
+
+	if (unlikely(len != 3)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	guc_class = msg[0];
+	instance = msg[1];
+	reason = msg[2];
+
+	/* Unexpected failure of a hardware feature, log an actual error */
+	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
+		guc_class, instance, reason);
+
+	xe_gt_reset_async(guc_to_gt(guc));
+
+	return 0;
+}
+
+static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	int i;
+
+	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
+		   e->guc->wqi_head, parallel_read(xe, map, wq_desc.head));
+	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
+		   e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail));
+	drm_printf(p, "\tWQ status: %u\n",
+		   parallel_read(xe, map, wq_desc.wq_status));
+	if (parallel_read(xe, map, wq_desc.head) !=
+	    parallel_read(xe, map, wq_desc.tail)) {
+		for (i = parallel_read(xe, map, wq_desc.head);
+		     i != parallel_read(xe, map, wq_desc.tail);
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32),
+				   parallel_read(xe, map, wq[i / sizeof(u32)]));
+	}
+}
+
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
+{
+	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_sched_job *job;
+	int i;
+
+	drm_printf(p, "\nGuC ID: %d\n", e->guc->id);
+	drm_printf(p, "\tName: %s\n", e->name);
+	drm_printf(p, "\tClass: %d\n", e->class);
+	drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask);
+	drm_printf(p, "\tWidth: %d\n", e->width);
+	drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount));
+	drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout);
+	drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us);
+	drm_printf(p, "\tPreempt timeout: %u (us)\n",
+		   e->sched_props.preempt_timeout_us);
+	for (i = 0; i < e->width; ++i ) {
+		struct xe_lrc *lrc = e->lrc + i;
+
+		drm_printf(p, "\tHW Context Desc: 0x%08x\n",
+			   lower_32_bits(xe_lrc_ggtt_addr(lrc)));
+		drm_printf(p, "\tLRC Head: (memory) %u\n",
+			   xe_lrc_ring_head(lrc));
+		drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+			   lrc->ring.tail,
+			   xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL));
+		drm_printf(p, "\tStart seqno: (memory) %d\n",
+			   xe_lrc_start_seqno(lrc));
+		drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc));
+	}
+	drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state));
+	drm_printf(p, "\tFlags: 0x%lx\n", e->flags);
+	if (xe_engine_is_parallel(e))
+		guc_engine_wq_print(e, p);
+
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(job, &sched->base.pending_list, drm.list)
+		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
+			   xe_sched_job_seqno(job),
+			   dma_fence_is_signaled(job->fence) ? 1 : 0,
+			   dma_fence_is_signaled(&job->drm.s_fence->finished) ?
+			   1 : 0);
+	spin_unlock(&sched->base.job_list_lock);
+}
+
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_engine *e;
+	unsigned long index;
+
+	if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	xa_for_each(&guc->submission_state.engine_lookup, index, e)
+		guc_engine_print(e, p);
+	mutex_unlock(&guc->submission_state.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
new file mode 100644
index 0000000000000..8002734d6f24d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_H_
+#define _XE_GUC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_engine;
+struct xe_guc;
+
+int xe_guc_submit_init(struct xe_guc *guc);
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc);
+void xe_guc_submit_reset_wait(struct xe_guc *guc);
+int xe_guc_submit_stop(struct xe_guc *guc);
+int xe_guc_submit_start(struct xe_guc *guc);
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					   u32 len);
+int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
new file mode 100644
index 0000000000000..ca177853cc12c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_TYPES_H_
+#define _XE_GUC_TYPES_H_
+
+#include <linux/idr.h>
+#include <linux/xarray.h>
+
+#include "xe_guc_ads_types.h"
+#include "xe_guc_ct_types.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_log_types.h"
+#include "xe_guc_pc_types.h"
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_guc - Graphic micro controller
+ */
+struct xe_guc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+	/** @log: GuC log */
+	struct xe_guc_log log;
+	/** @ads: GuC ads */
+	struct xe_guc_ads ads;
+	/** @ct: GuC ct */
+	struct xe_guc_ct ct;
+	/** @pc: GuC Power Conservation */
+	struct xe_guc_pc pc;
+	/** @submission_state: GuC submission state */
+	struct {
+		/** @engine_lookup: Lookup an xe_engine from guc_id */
+		struct xarray engine_lookup;
+		/** @guc_ids: used to allocate new guc_ids, single-lrc */
+		struct ida guc_ids;
+		/** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
+		unsigned long *guc_ids_bitmap;
+		/** @stopped: submissions are stopped */
+		atomic_t stopped;
+		/** @lock: protects submission state */
+		struct mutex lock;
+		/** @suspend: suspend fence state */
+		struct {
+			/** @lock: suspend fences lock */
+			spinlock_t lock;
+			/** @context: suspend fences context */
+			u64 context;
+			/** @seqno: suspend fences seqno */
+			u32 seqno;
+		} suspend;
+	} submission_state;
+	/** @hwconfig: Hardware config state */
+	struct {
+		/** @bo: buffer object of the hardware config */
+		struct xe_bo *bo;
+		/** @size: size of the hardware config */
+		u32 size;
+	} hwconfig;
+
+	/**
+	 * @notify_reg: Register which is written to notify GuC of H2G messages
+	 */
+	u32 notify_reg;
+	/** @params: Control params for fw initialization */
+	u32 params[GUC_CTL_MAX_DWORDS];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
new file mode 100644
index 0000000000000..93b22fac6e149
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_reg.h"
+#include "xe_huc.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_guc *
+huc_to_guc(struct xe_huc *huc)
+{
+	return &container_of(huc, struct xe_uc, huc)->guc;
+}
+
+int xe_huc_init(struct xe_huc *huc)
+{
+	struct xe_device *xe = huc_to_xe(huc);
+	int ret;
+
+	huc->fw.type = XE_UC_FW_TYPE_HUC;
+	ret = xe_uc_fw_init(&huc->fw);
+	if (ret)
+		goto out;
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	if (xe_uc_fw_is_disabled(&huc->fw)) {
+		drm_info(&xe->drm, "HuC disabled\n");
+		return 0;
+	}
+	drm_err(&xe->drm, "HuC init failed with %d", ret);
+	return ret;
+}
+
+int xe_huc_upload(struct xe_huc *huc)
+{
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return 0;
+	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
+}
+
+int xe_huc_auth(struct xe_huc *huc)
+{
+	struct xe_device *xe = huc_to_xe(huc);
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_guc *guc = huc_to_guc(huc);
+	int ret;
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return 0;
+
+	XE_BUG_ON(xe_uc_fw_is_running(&huc->fw));
+
+	if (!xe_uc_fw_is_loaded(&huc->fw))
+		return -ENOEXEC;
+
+	ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
+			      xe_uc_fw_rsa_offset(&huc->fw));
+	if (ret) {
+		drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n",
+			ret);
+		goto fail;
+	}
+
+	ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
+			     HUC_LOAD_SUCCESSFUL,
+			     HUC_LOAD_SUCCESSFUL, 100);
+	if (ret) {
+		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
+		goto fail;
+	}
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+	drm_dbg(&xe->drm, "HuC authenticated\n");
+
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "HuC authentication failed %d\n", ret);
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+
+	return ret;
+}
+
+void xe_huc_sanitize(struct xe_huc *huc)
+{
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return;
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	int err;
+
+	xe_uc_fw_print(&huc->fw, p);
+
+	if (xe_uc_fw_is_disabled(&huc->fw))
+		return;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	drm_printf(p, "\nHuC status: 0x%08x\n",
+		   xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg));
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
new file mode 100644
index 0000000000000..5802c43b6ce21
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_H_
+#define _XE_HUC_H_
+
+#include "xe_huc_types.h"
+
+struct drm_printer;
+
+int xe_huc_init(struct xe_huc *huc);
+int xe_huc_upload(struct xe_huc *huc);
+int xe_huc_auth(struct xe_huc *huc);
+void xe_huc_sanitize(struct xe_huc *huc);
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
new file mode 100644
index 0000000000000..268bac36336a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_huc.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_huc *node_to_huc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int huc_info(struct seq_file *m, void *data)
+{
+	struct xe_huc *huc = node_to_huc(m->private);
+	struct xe_device *xe = huc_to_xe(huc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_huc_print_info(huc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"huc_info", huc_info, 0},
+};
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
+{
+	struct drm_minor *minor = huc_to_xe(huc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+	local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local) {
+		XE_WARN_ON("Couldn't allocate memory");
+		return;
+	}
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = huc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h
new file mode 100644
index 0000000000000..ec58f18188048
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_DEBUGFS_H_
+#define _XE_HUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_huc;
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h
new file mode 100644
index 0000000000000..cae6d19097df2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_TYPES_H_
+#define _XE_HUC_TYPES_H_
+
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_huc - HuC
+ */
+struct xe_huc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
new file mode 100644
index 0000000000000..fd89dd90131c9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_engine.h"
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_sr.h"
+#include "xe_sched_job.h"
+#include "xe_wa.h"
+
+#include "gt/intel_engine_regs.h"
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+#define MAX_MMIO_BASES 3
+struct engine_info {
+	const char *name;
+	unsigned int class : 8;
+	unsigned int instance : 8;
+	enum xe_force_wake_domains domain;
+	/* mmio bases table *must* be sorted in reverse graphics_ver order */
+	struct engine_mmio_base {
+		unsigned int graphics_ver : 8;
+		unsigned int base : 24;
+	} mmio_bases[MAX_MMIO_BASES];
+};
+
+static const struct engine_info engine_infos[] = {
+	[XE_HW_ENGINE_RCS0] = {
+		.name = "rcs0",
+		.class = XE_ENGINE_CLASS_RENDER,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 1, .base = RENDER_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS0] = {
+		.name = "bcs0",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 6, .base = BLT_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS1] = {
+		.name = "bcs1",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS2] = {
+		.name = "bcs2",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS3] = {
+		.name = "bcs3",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS4] = {
+		.name = "bcs4",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 4,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS5] = {
+		.name = "bcs5",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 5,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS6] = {
+		.name = "bcs6",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 6,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS7] = {
+		.name = "bcs7",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 7,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_BCS8] = {
+		.name = "bcs8",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 8,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
+		},
+	},
+
+	[XE_HW_ENGINE_VCS0] = {
+		.name = "vcs0",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VDBOX0,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
+			{ .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
+			{ .graphics_ver = 4, .base = BSD_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS1] = {
+		.name = "vcs1",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VDBOX1,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
+			{ .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS2] = {
+		.name = "vcs2",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VDBOX2,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS3] = {
+		.name = "vcs3",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VDBOX3,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS4] = {
+		.name = "vcs4",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 4,
+		.domain = XE_FW_MEDIA_VDBOX4,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS5] = {
+		.name = "vcs5",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 5,
+		.domain = XE_FW_MEDIA_VDBOX5,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS6] = {
+		.name = "vcs6",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 6,
+		.domain = XE_FW_MEDIA_VDBOX6,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VCS7] = {
+		.name = "vcs7",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 7,
+		.domain = XE_FW_MEDIA_VDBOX7,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS0] = {
+		.name = "vecs0",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VEBOX0,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
+			{ .graphics_ver = 7, .base = VEBOX_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS1] = {
+		.name = "vecs1",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VEBOX1,
+		.mmio_bases = {
+			{ .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS2] = {
+		.name = "vecs2",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VEBOX2,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_VECS3] = {
+		.name = "vecs3",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VEBOX3,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
+		},
+	},
+	[XE_HW_ENGINE_CCS0] = {
+		.name = "ccs0",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE },
+		},
+	},
+	[XE_HW_ENGINE_CCS1] = {
+		.name = "ccs1",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE },
+		},
+	},
+	[XE_HW_ENGINE_CCS2] = {
+		.name = "ccs2",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE },
+		},
+	},
+	[XE_HW_ENGINE_CCS3] = {
+		.name = "ccs3",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE },
+		},
+	},
+};
+
+static u32 engine_info_mmio_base(const struct engine_info *info,
+				 unsigned int graphics_ver)
+{
+	int i;
+
+	for (i = 0; i < MAX_MMIO_BASES; i++)
+		if (graphics_ver >= info->mmio_bases[i].graphics_ver)
+			break;
+
+	XE_BUG_ON(i == MAX_MMIO_BASES);
+	XE_BUG_ON(!info->mmio_bases[i].base);
+
+	return info->mmio_bases[i].base;
+}
+
+static void hw_engine_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_hw_engine *hwe = arg;
+
+	if (hwe->exl_port)
+		xe_execlist_port_destroy(hwe->exl_port);
+	xe_lrc_finish(&hwe->kernel_lrc);
+
+	xe_bo_unpin_map_no_vm(hwe->hwsp);
+
+	hwe->gt = NULL;
+}
+
+static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val)
+{
+	XE_BUG_ON(reg & hwe->mmio_base);
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val);
+}
+
+static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg)
+{
+	XE_BUG_ON(reg & hwe->mmio_base);
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base);
+}
+
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
+{
+	u32 ccs_mask =
+		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0))
+		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
+				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+
+	hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0);
+	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg,
+			       xe_bo_ggtt_addr(hwe->hwsp));
+	hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg,
+			       _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+	hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg,
+			       _MASKED_BIT_DISABLE(STOP_RING));
+	hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
+}
+
+static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
+				 enum xe_hw_engine_id id)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct engine_info *info;
+
+	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
+		return;
+
+	if (!(gt->info.engine_mask & BIT(id)))
+		return;
+
+	info = &engine_infos[id];
+
+	XE_BUG_ON(hwe->gt);
+
+	hwe->gt = gt;
+	hwe->class = info->class;
+	hwe->instance = info->instance;
+	hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe));
+	hwe->domain = info->domain;
+	hwe->name = info->name;
+	hwe->fence_irq = &gt->fence_irq[info->class];
+	hwe->engine_id = id;
+
+	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
+	xe_wa_process_engine(hwe);
+
+	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
+	xe_reg_whitelist_process_engine(hwe);
+}
+
+static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
+			  enum xe_hw_engine_id id)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
+	XE_BUG_ON(!(gt->info.engine_mask & BIT(id)));
+
+	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+	xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt);
+
+	hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel,
+					XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(hwe->hwsp)) {
+		err = PTR_ERR(hwe->hwsp);
+		goto err_name;
+	}
+
+	err = xe_bo_pin(hwe->hwsp);
+	if (err)
+		goto err_unlock_put_hwsp;
+
+	err = xe_bo_vmap(hwe->hwsp);
+	if (err)
+		goto err_unpin_hwsp;
+
+	xe_bo_unlock_no_vm(hwe->hwsp);
+
+	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
+	if (err)
+		goto err_hwsp;
+
+	if (!xe_device_guc_submission_enabled(xe)) {
+		hwe->exl_port = xe_execlist_port_create(xe, hwe);
+		if (IS_ERR(hwe->exl_port)) {
+			err = PTR_ERR(hwe->exl_port);
+			goto err_kernel_lrc;
+		}
+	}
+
+	if (xe_device_guc_submission_enabled(xe))
+		xe_hw_engine_enable_ring(hwe);
+
+	/* We reserve the highest BCS instance for USM */
+	if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+		gt->usm.reserved_bcs_instance = hwe->instance;
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
+	if (err)
+		return err;
+
+	return 0;
+
+err_unpin_hwsp:
+	xe_bo_unpin(hwe->hwsp);
+err_unlock_put_hwsp:
+	xe_bo_unlock_no_vm(hwe->hwsp);
+	xe_bo_put(hwe->hwsp);
+err_kernel_lrc:
+	xe_lrc_finish(&hwe->kernel_lrc);
+err_hwsp:
+	xe_bo_put(hwe->hwsp);
+err_name:
+	hwe->name = NULL;
+
+	return err;
+}
+
+static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
+{
+	int class;
+
+	/* FIXME: Doing a simple logical mapping that works for most hardware */
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+		int logical_instance = 0;
+
+		for_each_hw_engine(hwe, gt, id)
+			if (hwe->class == class)
+				hwe->logical_instance = logical_instance++;
+	}
+}
+
+static void read_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 media_fuse;
+	u16 vdbox_mask;
+	u16 vebox_mask;
+	u32 bcs_mask;
+	int i, j;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/*
+	 * FIXME: Hack job, thinking we should have table of vfuncs for each
+	 * class which picks the correct vfunc based on IP version.
+	 */
+
+	media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg);
+	if (GRAPHICS_VERx100(xe) < 1250)
+		media_fuse = ~media_fuse;
+
+	vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
+	vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
+		      GEN11_GT_VEBOX_DISABLE_SHIFT;
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vdbox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vcs%u fused off\n", j);
+		}
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vebox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vecs%u fused off\n", j);
+		}
+	}
+
+	bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg);
+	bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask);
+
+	for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j/2) & bcs_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "bcs%u fused off\n", j);
+		}
+	}
+
+	/* TODO: compute engines */
+}
+
+int xe_hw_engines_init_early(struct xe_gt *gt)
+{
+	int i;
+
+	read_fuses(gt);
+
+	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
+		hw_engine_init_early(gt, &gt->hw_engines[i], i);
+
+	return 0;
+}
+
+int xe_hw_engines_init(struct xe_gt *gt)
+{
+	int err;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		err = hw_engine_init(gt, hwe, id);
+		if (err)
+			return err;
+	}
+
+	hw_engine_setup_logical_mapping(gt);
+
+	return 0;
+}
+
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
+
+	if (hwe->irq_handler)
+		hwe->irq_handler(hwe, intr_vec);
+
+	if (intr_vec & GT_RENDER_USER_INTERRUPT)
+		xe_hw_fence_irq_run(hwe->fence_irq);
+}
+
+void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
+{
+	if (!xe_hw_engine_is_valid(hwe))
+		return;
+
+	drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name,
+		hwe->logical_instance);
+	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+		hwe->domain,
+		xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain));
+	drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base);
+
+	drm_printf(p, "\tHWSTAM: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg));
+	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg));
+
+	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg));
+	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg));
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
+		hw_engine_mmio_read32(hwe,
+					 RING_EXECLIST_SQ_CONTENTS(0).reg));
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
+		hw_engine_mmio_read32(hwe,
+					 RING_EXECLIST_SQ_CONTENTS(0).reg) + 4);
+	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg));
+
+	drm_printf(p, "\tRING_START: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_START(0).reg));
+	drm_printf(p, "\tRING_HEAD:  0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR);
+	drm_printf(p, "\tRING_TAIL:  0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR);
+	drm_printf(p, "\tRING_CTL: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_CTL(0).reg));
+	drm_printf(p, "\tRING_MODE: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg));
+	drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg));
+
+	drm_printf(p, "\tRING_IMR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_IMR(0).reg));
+	drm_printf(p, "\tRING_ESR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_ESR(0).reg));
+	drm_printf(p, "\tRING_EMR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EMR(0).reg));
+	drm_printf(p, "\tRING_EIR:   0x%08x\n",
+		hw_engine_mmio_read32(hwe, RING_EIR(0).reg));
+
+        drm_printf(p, "\tACTHD:  0x%08x_%08x\n",
+		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg),
+		hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg));
+        drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
+		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg),
+		hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg));
+        drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg),
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg));
+
+	drm_printf(p, "\tIPEIR: 0x%08x\n",
+		hw_engine_mmio_read32(hwe, IPEIR(0).reg));
+	drm_printf(p, "\tIPEHR: 0x%08x\n\n",
+		hw_engine_mmio_read32(hwe, IPEHR(0).reg));
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n",
+			xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg));
+
+}
+
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class)
+{
+	u32 mask = 0;
+	enum xe_hw_engine_id id;
+
+	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+		if (engine_infos[id].class == engine_class &&
+		    gt->info.engine_mask & BIT(id))
+			mask |= BIT(engine_infos[id].instance);
+	}
+	return mask;
+}
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
new file mode 100644
index 0000000000000..ceab653972568
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_H_
+#define _XE_HW_ENGINE_H_
+
+#include "xe_hw_engine_types.h"
+
+struct drm_printer;
+
+int xe_hw_engines_init_early(struct xe_gt *gt);
+int xe_hw_engines_init(struct xe_gt *gt);
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
+void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p);
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class);
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
+{
+	return hwe->name;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
new file mode 100644
index 0000000000000..05a2fdc381d75
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_TYPES_H_
+#define _XE_HW_ENGINE_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/* See "Engine ID Definition" struct in the Icelake PRM */
+enum xe_engine_class {
+	XE_ENGINE_CLASS_RENDER = 0,
+	XE_ENGINE_CLASS_VIDEO_DECODE = 1,
+	XE_ENGINE_CLASS_VIDEO_ENHANCE = 2,
+	XE_ENGINE_CLASS_COPY = 3,
+	XE_ENGINE_CLASS_OTHER = 4,
+	XE_ENGINE_CLASS_COMPUTE = 5,
+	XE_ENGINE_CLASS_MAX = 6,
+};
+
+enum xe_hw_engine_id {
+	XE_HW_ENGINE_RCS0,
+	XE_HW_ENGINE_BCS0,
+	XE_HW_ENGINE_BCS1,
+	XE_HW_ENGINE_BCS2,
+	XE_HW_ENGINE_BCS3,
+	XE_HW_ENGINE_BCS4,
+	XE_HW_ENGINE_BCS5,
+	XE_HW_ENGINE_BCS6,
+	XE_HW_ENGINE_BCS7,
+	XE_HW_ENGINE_BCS8,
+	XE_HW_ENGINE_VCS0,
+	XE_HW_ENGINE_VCS1,
+	XE_HW_ENGINE_VCS2,
+	XE_HW_ENGINE_VCS3,
+	XE_HW_ENGINE_VCS4,
+	XE_HW_ENGINE_VCS5,
+	XE_HW_ENGINE_VCS6,
+	XE_HW_ENGINE_VCS7,
+	XE_HW_ENGINE_VECS0,
+	XE_HW_ENGINE_VECS1,
+	XE_HW_ENGINE_VECS2,
+	XE_HW_ENGINE_VECS3,
+	XE_HW_ENGINE_CCS0,
+	XE_HW_ENGINE_CCS1,
+	XE_HW_ENGINE_CCS2,
+	XE_HW_ENGINE_CCS3,
+	XE_NUM_HW_ENGINES,
+};
+
+/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */
+#define XE_HW_ENGINE_MAX_INSTANCE	9
+
+struct xe_bo;
+struct xe_execlist_port;
+struct xe_gt;
+
+/**
+ * struct xe_hw_engine - Hardware engine
+ *
+ * Contains all the hardware engine state for physical instances.
+ */
+struct xe_hw_engine {
+	/** @gt: graphics tile this hw engine belongs to */
+	struct xe_gt *gt;
+	/** @name: name of this hw engine */
+	const char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @instance: physical instance of this hw engine */
+	u16 instance;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+	/**
+	 * @reg_whitelist: table with registers to be whitelisted
+	 */
+	struct xe_reg_sr reg_whitelist;
+	/**
+	 * @reg_lrc: LRC workaround registers
+	 */
+	struct xe_reg_sr reg_lrc;
+	/** @domain: force wake domain of this hw engine */
+	enum xe_force_wake_domains domain;
+	/** @hwsp: hardware status page buffer object */
+	struct xe_bo *hwsp;
+	/** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
+	struct xe_lrc kernel_lrc;
+	/** @exl_port: execlists port */
+	struct xe_execlist_port *exl_port;
+	/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
+	struct xe_hw_fence_irq *fence_irq;
+	/** @irq_handler: IRQ handler to run when hw engine IRQ is received */
+	void (*irq_handler)(struct xe_hw_engine *, u16);
+	/** @engine_id: id  for this hw engine */
+	enum xe_hw_engine_id engine_id;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
new file mode 100644
index 0000000000000..e56ca28675451
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_fence.h"
+
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+static struct kmem_cache *xe_hw_fence_slab;
+
+int __init xe_hw_fence_module_init(void)
+{
+	xe_hw_fence_slab = kmem_cache_create("xe_hw_fence",
+					     sizeof(struct xe_hw_fence), 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_hw_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_hw_fence_module_exit(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(xe_hw_fence_slab);
+}
+
+static struct xe_hw_fence *fence_alloc(void)
+{
+	return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL);
+}
+
+static void fence_free(struct rcu_head *rcu)
+{
+	struct xe_hw_fence *fence =
+		container_of(rcu, struct xe_hw_fence, dma.rcu);
+
+	if (!WARN_ON_ONCE(!fence))
+		kmem_cache_free(xe_hw_fence_slab, fence);
+}
+
+static void hw_fence_irq_run_cb(struct irq_work *work)
+{
+	struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work);
+	struct xe_hw_fence *fence, *next;
+	bool tmp;
+
+	tmp = dma_fence_begin_signalling();
+	spin_lock(&irq->lock);
+	if (irq->enabled) {
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			struct dma_fence *dma_fence = &fence->dma;
+
+			trace_xe_hw_fence_try_signal(fence);
+			if (dma_fence_is_signaled_locked(dma_fence)) {
+				trace_xe_hw_fence_signal(fence);
+				list_del_init(&fence->irq_link);
+				dma_fence_put(dma_fence);
+			}
+		}
+	}
+	spin_unlock(&irq->lock);
+	dma_fence_end_signalling(tmp);
+}
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_init(&irq->lock);
+	init_irq_work(&irq->work, hw_fence_irq_run_cb);
+	INIT_LIST_HEAD(&irq->pending);
+	irq->enabled = true;
+}
+
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
+{
+	struct xe_hw_fence *fence, *next;
+	unsigned long flags;
+	int err;
+	bool tmp;
+
+	if (XE_WARN_ON(!list_empty(&irq->pending))) {
+		tmp = dma_fence_begin_signalling();
+		spin_lock_irqsave(&irq->lock, flags);
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			list_del_init(&fence->irq_link);
+			err = dma_fence_signal_locked(&fence->dma);
+			dma_fence_put(&fence->dma);
+			XE_WARN_ON(err);
+		}
+		spin_unlock_irqrestore(&irq->lock, flags);
+		dma_fence_end_signalling(tmp);
+	}
+}
+
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
+{
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = false;
+	spin_unlock_irq(&irq->lock);
+}
+
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = true;
+	spin_unlock_irq(&irq->lock);
+
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name)
+{
+	ctx->gt = gt;
+	ctx->irq = irq;
+	ctx->dma_fence_ctx = dma_fence_context_alloc(1);
+	ctx->next_seqno = 1;
+	sprintf(ctx->name, "%s", name);
+}
+
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
+{
+}
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
+
+static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
+{
+	return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
+}
+
+static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
+}
+
+static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return fence->ctx->name;
+}
+
+static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_device *xe = gt_to_xe(fence->ctx->gt);
+	u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
+
+	return dma_fence->error ||
+		(s32)fence->dma.seqno <= (s32)seqno;
+}
+
+static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence);
+
+	dma_fence_get(dma_fence);
+	list_add_tail(&fence->irq_link, &irq->pending);
+
+	/* SW completed (no HW IRQ) so kick handler to signal fence */
+	if (xe_hw_fence_signaled(dma_fence))
+		xe_hw_fence_irq_run(irq);
+
+	return true;
+}
+
+static void xe_hw_fence_release(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	trace_xe_hw_fence_free(fence);
+	XE_BUG_ON(!list_empty(&fence->irq_link));
+	call_rcu(&dma_fence->rcu, fence_free);
+}
+
+static const struct dma_fence_ops xe_hw_fence_ops = {
+	.get_driver_name = xe_hw_fence_get_driver_name,
+	.get_timeline_name = xe_hw_fence_get_timeline_name,
+	.enable_signaling = xe_hw_fence_enable_signaling,
+	.signaled = xe_hw_fence_signaled,
+	.release = xe_hw_fence_release,
+};
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence)
+{
+	if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops))
+		return NULL;
+
+	return container_of(fence, struct xe_hw_fence, dma);
+}
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map)
+{
+	struct xe_hw_fence *fence;
+
+	fence = fence_alloc();
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+		       ctx->dma_fence_ctx, ctx->next_seqno++);
+
+	fence->ctx = ctx;
+	fence->seqno_map = seqno_map;
+	INIT_LIST_HEAD(&fence->irq_link);
+
+	trace_xe_hw_fence_create(fence);
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
new file mode 100644
index 0000000000000..07f202db65269
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_H_
+#define _XE_HW_FENCE_H_
+
+#include "xe_hw_fence_types.h"
+
+int xe_hw_fence_module_init(void);
+void xe_hw_fence_module_exit(void);
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq);
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name);
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx);
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
new file mode 100644
index 0000000000000..a78e50eb3cb87
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_TYPES_H_
+#define _XE_HW_FENCE_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_hw_fence_irq - hardware fence IRQ handler
+ *
+ * One per engine class, signals completed xe_hw_fences, triggered via hw engine
+ * interrupt. On each trigger, search list of pending fences and signal.
+ */
+struct xe_hw_fence_irq {
+	/** @lock: protects all xe_hw_fences + pending list */
+	spinlock_t lock;
+	/** @work: IRQ worker run to signal the fences */
+	struct irq_work work;
+	/** @pending: list of pending xe_hw_fences */
+	struct list_head pending;
+	/** @enabled: fence signaling enabled */
+	bool enabled;
+};
+
+#define MAX_FENCE_NAME_LEN	16
+
+/**
+ * struct xe_hw_fence_ctx - hardware fence context
+ *
+ * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points
+ * to a xe_hw_fence_irq, maintains serial seqno.
+ */
+struct xe_hw_fence_ctx {
+	/** @gt: graphics tile of hardware fence context */
+	struct xe_gt *gt;
+	/** @irq: fence irq handler */
+	struct xe_hw_fence_irq *irq;
+	/** @dma_fence_ctx: dma fence context for hardware fence */
+	u64 dma_fence_ctx;
+	/** @next_seqno: next seqno for hardware fence */
+	u32 next_seqno;
+	/** @name: name of hardware fence context */
+	char name[MAX_FENCE_NAME_LEN];
+};
+
+/**
+ * struct xe_hw_fence - hardware fence
+ *
+ * Used to indicate a xe_sched_job is complete via a seqno written to memory.
+ * Signals on error or seqno past.
+ */
+struct xe_hw_fence {
+	/** @dma: base dma fence for hardware fence context */
+	struct dma_fence dma;
+	/** @ctx: hardware fence context */
+	struct xe_hw_fence_ctx *ctx;
+	/** @seqno_map: I/O map for seqno */
+	struct iosys_map seqno_map;
+	/** @irq_link: Link in struct xe_hw_fence_irq.pending */
+	struct list_head irq_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
new file mode 100644
index 0000000000000..df2e3573201df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/sched/clock.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_drv.h"
+#include "xe_guc.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_mmio.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
+{
+	u32 val = xe_mmio_read32(gt, reg.reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&gt_to_xe(gt)->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 reg.reg, val);
+	xe_mmio_write32(gt, reg.reg, 0xffffffff);
+	xe_mmio_read32(gt, reg.reg);
+	xe_mmio_write32(gt, reg.reg, 0xffffffff);
+	xe_mmio_read32(gt, reg.reg);
+}
+
+static void gen3_irq_init(struct xe_gt *gt,
+			  i915_reg_t imr, u32 imr_val,
+			  i915_reg_t ier, u32 ier_val,
+			  i915_reg_t iir)
+{
+	gen3_assert_iir_is_zero(gt, iir);
+
+	xe_mmio_write32(gt, ier.reg, ier_val);
+	xe_mmio_write32(gt, imr.reg, imr_val);
+	xe_mmio_read32(gt, imr.reg);
+}
+#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \
+	gen3_irq_init((gt), \
+		      type##IMR, imr_val, \
+		      type##IER, ier_val, \
+		      type##IIR)
+
+static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
+			   i915_reg_t ier)
+{
+	xe_mmio_write32(gt, imr.reg, 0xffffffff);
+	xe_mmio_read32(gt, imr.reg);
+
+	xe_mmio_write32(gt, ier.reg, 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	xe_mmio_write32(gt, iir.reg, 0xffffffff);
+	xe_mmio_read32(gt, iir.reg);
+	xe_mmio_write32(gt, iir.reg, 0xffffffff);
+	xe_mmio_read32(gt, iir.reg);
+}
+#define GEN3_IRQ_RESET(gt, type) \
+	gen3_irq_reset((gt), type##IMR, type##IIR, type##IER)
+
+static u32 gen11_intr_disable(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0);
+
+	/*
+	 * Now with master disabled, get a sample of level indications
+	 * for this interrupt. Indications will be cleared on related acks.
+	 * New indications can and will light up during processing,
+	 * and will generate new interrupt after enabling master.
+	 */
+	return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+}
+
+static u32
+gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
+{
+	u32 iir;
+
+	if (!(master_ctl & GEN11_GU_MISC_IRQ))
+		return 0;
+
+	iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg);
+	if (likely(iir))
+		xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir);
+
+	return iir;
+}
+
+static inline void gen11_intr_enable(struct xe_gt *gt, bool stall)
+{
+	xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ);
+	if (stall)
+		xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+}
+
+static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	u32 irqs, dmask, smask;
+	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+	if (xe_device_guc_submission_enabled(xe)) {
+		irqs = GT_RENDER_USER_INTERRUPT |
+			GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
+	} else {
+		irqs = GT_RENDER_USER_INTERRUPT |
+		       GT_CS_MASTER_ERROR_INTERRUPT |
+		       GT_CONTEXT_SWITCH_INTERRUPT |
+		       GT_WAIT_SEMAPHORE_INTERRUPT;
+	}
+
+	dmask = irqs << 16 | irqs;
+	smask = irqs << 16;
+
+	/* Enable RCS, BCS, VCS and VECS class interrupts. */
+	xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask);
+	xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask);
+	if (ccs_mask)
+		xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask);
+
+	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
+	xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask);
+	xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask);
+	if (bcs_mask & (BIT(1)|BIT(2)))
+		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask);
+	if (bcs_mask & (BIT(3)|BIT(4)))
+		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask);
+	if (bcs_mask & (BIT(5)|BIT(6)))
+		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask);
+	if (bcs_mask & (BIT(7)|BIT(8)))
+		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask);
+	xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask);
+	xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask);
+	//if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
+	//	intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask);
+	//if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
+	//	intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask);
+	xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask);
+	//if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
+	//	intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask);
+	if (ccs_mask & (BIT(0)|BIT(1)))
+		xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask);
+	if (ccs_mask & (BIT(2)|BIT(3)))
+		xe_mmio_write32(gt,  GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask);
+
+	/*
+	 * RPS interrupts will get enabled/disabled on demand when RPS itself
+	 * is enabled/disabled.
+	 */
+	/* TODO: gt->pm_ier, gt->pm_imr */
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+
+	/* Same thing for GuC interrupts */
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg,  ~0);
+}
+
+static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	/* TODO: PCH */
+
+	gen11_gt_irq_postinstall(xe, gt);
+
+	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
+		      GEN11_GU_MISC_GSE);
+
+	gen11_intr_enable(gt, true);
+}
+
+static u32
+gen11_gt_engine_identity(struct xe_device *xe,
+			 struct xe_gt *gt,
+			 const unsigned int bank,
+			 const unsigned int bit)
+{
+	u32 timeout_ts;
+	u32 ident;
+
+	lockdep_assert_held(&xe->irq.lock);
+
+	xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit));
+
+	/*
+	 * NB: Specs do not specify how long to spin wait,
+	 * so we do ~100us as an educated guess.
+	 */
+	timeout_ts = (local_clock() >> 10) + 100;
+	do {
+		ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg);
+	} while (!(ident & GEN11_INTR_DATA_VALID) &&
+		 !time_after32(local_clock() >> 10, timeout_ts));
+
+	if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) {
+		drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
+			bank, bit, ident);
+		return 0;
+	}
+
+	xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg,
+			GEN11_INTR_DATA_VALID);
+
+	return ident;
+}
+
+#define   OTHER_MEDIA_GUC_INSTANCE           16
+
+static void
+gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
+{
+	if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+	if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+
+	if (instance != OTHER_GUC_INSTANCE &&
+	    instance != OTHER_MEDIA_GUC_INSTANCE) {
+		WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+			  instance, iir);
+	}
+}
+
+static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
+				 u32 master_ctl, long unsigned int *intr_dw,
+				 u32 *identity)
+{
+	unsigned int bank, bit;
+	u16 instance, intr_vec;
+	enum xe_engine_class class;
+	struct xe_hw_engine *hwe;
+
+	spin_lock(&xe->irq.lock);
+
+	for (bank = 0; bank < 2; bank++) {
+		if (!(master_ctl & GEN11_GT_DW_IRQ(bank)))
+			continue;
+
+		if (!xe_gt_is_media_type(gt)) {
+			intr_dw[bank] =
+				xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg);
+			for_each_set_bit(bit, intr_dw + bank, 32)
+				identity[bit] = gen11_gt_engine_identity(xe, gt,
+									 bank,
+									 bit);
+			xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg,
+					intr_dw[bank]);
+		}
+
+		for_each_set_bit(bit, intr_dw + bank, 32) {
+			class = GEN11_INTR_ENGINE_CLASS(identity[bit]);
+			instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]);
+			intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]);
+
+			if (class == XE_ENGINE_CLASS_OTHER) {
+				gen11_gt_other_irq_handler(gt, instance,
+							   intr_vec);
+				continue;
+			}
+
+			hwe = xe_gt_hw_engine(gt, class, instance, false);
+			if (!hwe)
+				continue;
+
+			xe_hw_engine_handle_irq(hwe, intr_vec);
+		}
+	}
+
+	spin_unlock(&xe->irq.lock);
+}
+
+static irqreturn_t gen11_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);	/* Only 1 GT here */
+	u32 master_ctl, gu_misc_iir;
+	long unsigned int intr_dw[2];
+	u32 identity[32];
+
+	master_ctl = gen11_intr_disable(gt);
+	if (!master_ctl) {
+		gen11_intr_enable(gt, false);
+		return IRQ_NONE;
+	}
+
+	gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+
+	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+
+	gen11_intr_enable(gt, false);
+
+	return IRQ_HANDLED;
+}
+
+static u32 dg1_intr_disable(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	u32 val;
+
+	/* First disable interrupts */
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0);
+
+	/* Get the indication levels and ack the master unit */
+	val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+	if (unlikely(!val))
+		return 0;
+
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val);
+
+	return val;
+}
+
+static void dg1_intr_enable(struct xe_device *xe, bool stall)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ);
+	if (stall)
+		xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+}
+
+static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	gen11_gt_irq_postinstall(xe, gt);
+
+	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
+		      GEN11_GU_MISC_GSE);
+
+	if (gt->info.id + 1 == xe->info.tile_count)
+		dg1_intr_enable(xe, true);
+}
+
+static irqreturn_t dg1_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt;
+	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir;
+	long unsigned int intr_dw[2];
+	u32 identity[32];
+	u8 id;
+
+	/* TODO: This really shouldn't be copied+pasted */
+
+	master_tile_ctl = dg1_intr_disable(xe);
+	if (!master_tile_ctl) {
+		dg1_intr_enable(xe, false);
+		return IRQ_NONE;
+	}
+
+	for_each_gt(gt, xe, id) {
+		if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0)
+			continue;
+
+		if (!xe_gt_is_media_type(gt))
+			master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+
+		/*
+		 * We might be in irq handler just when PCIe DPC is initiated
+		 * and all MMIO reads will be returned with all 1's. Ignore this
+		 * irq as device is inaccessible.
+		 */
+		if (master_ctl == REG_GENMASK(31, 0)) {
+			dev_dbg(gt_to_xe(gt)->drm.dev,
+				"Ignore this IRQ as device might be in DPC containment.\n");
+			return IRQ_HANDLED;
+		}
+
+		if (!xe_gt_is_media_type(gt))
+			xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl);
+		gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+	}
+
+	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+
+	dg1_intr_enable(xe, false);
+
+	return IRQ_HANDLED;
+}
+
+static void gen11_gt_irq_reset(struct xe_gt *gt)
+{
+	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+	/* Disable RCS, BCS, VCS and VECS class engines. */
+	xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg,	 0);
+	if (ccs_mask)
+		xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0);
+
+	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
+	xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg,	~0);
+	if (bcs_mask & (BIT(1)|BIT(2)))
+		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0);
+	if (bcs_mask & (BIT(3)|BIT(4)))
+		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0);
+	if (bcs_mask & (BIT(5)|BIT(6)))
+		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0);
+	if (bcs_mask & (BIT(7)|BIT(8)))
+		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0);
+	xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg,	~0);
+//	if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
+//		xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg,   ~0);
+//	if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
+//		xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg,   ~0);
+	xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg,	~0);
+//	if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
+//		xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0);
+	if (ccs_mask & (BIT(0)|BIT(1)))
+		xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0);
+	if (ccs_mask & (BIT(2)|BIT(3)))
+		xe_mmio_write32(gt,  GEN12_CCS2_CCS3_INTR_MASK.reg, ~0);
+
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg,		~0);
+}
+
+static void gen11_irq_reset(struct xe_gt *gt)
+{
+	gen11_intr_disable(gt);
+
+	gen11_gt_irq_reset(gt);
+
+	GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
+	GEN3_IRQ_RESET(gt, GEN8_PCU_);
+}
+
+static void dg1_irq_reset(struct xe_gt *gt)
+{
+	if (gt->info.id == 0)
+		dg1_intr_disable(gt_to_xe(gt));
+
+	gen11_gt_irq_reset(gt);
+
+	GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
+	GEN3_IRQ_RESET(gt, GEN8_PCU_);
+}
+
+void xe_irq_reset(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id) {
+		if (GRAPHICS_VERx100(xe) >= 1210) {
+			dg1_irq_reset(gt);
+		} else if (GRAPHICS_VER(xe) >= 11) {
+			gen11_irq_reset(gt);
+		} else {
+			drm_err(&xe->drm, "No interrupt reset hook");
+		}
+	}
+}
+
+void xe_gt_irq_postinstall(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		dg1_irq_postinstall(xe, gt);
+	else if (GRAPHICS_VER(xe) >= 11)
+		gen11_irq_postinstall(xe, gt);
+	else
+		drm_err(&xe->drm, "No interrupt postinstall hook");
+}
+
+static void xe_irq_postinstall(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_irq_postinstall(gt);
+}
+
+static irq_handler_t xe_irq_handler(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1210) {
+		return dg1_irq_handler;
+	} else if (GRAPHICS_VER(xe) >= 11) {
+		return gen11_irq_handler;
+	} else {
+		return NULL;
+	}
+}
+
+static void irq_uninstall(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int irq = pdev->irq;
+
+	if (!xe->irq.enabled)
+		return;
+
+	xe->irq.enabled = false;
+	xe_irq_reset(xe);
+	free_irq(irq, xe);
+	if (pdev->msi_enabled)
+		pci_disable_msi(pdev);
+}
+
+int xe_irq_install(struct xe_device *xe)
+{
+	int irq = to_pci_dev(xe->drm.dev)->irq;
+	static irq_handler_t irq_handler;
+	int err;
+
+	irq_handler = xe_irq_handler(xe);
+	if (!irq_handler) {
+		drm_err(&xe->drm, "No supported interrupt handler");
+		return -EINVAL;
+	}
+
+	xe->irq.enabled = true;
+
+	xe_irq_reset(xe);
+
+	err = request_irq(irq, irq_handler,
+			  IRQF_SHARED, DRIVER_NAME, xe);
+	if (err < 0) {
+		xe->irq.enabled = false;
+		return err;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	if (err)
+		return err;
+
+	return err;
+}
+
+void xe_irq_shutdown(struct xe_device *xe)
+{
+	irq_uninstall(&xe->drm, xe);
+}
+
+void xe_irq_suspend(struct xe_device *xe)
+{
+	spin_lock_irq(&xe->irq.lock);
+	xe->irq.enabled = false;
+	xe_irq_reset(xe);
+	spin_unlock_irq(&xe->irq.lock);
+}
+
+void xe_irq_resume(struct xe_device *xe)
+{
+	spin_lock_irq(&xe->irq.lock);
+	xe->irq.enabled = true;
+	xe_irq_reset(xe);
+	xe_irq_postinstall(xe);
+	spin_unlock_irq(&xe->irq.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
new file mode 100644
index 0000000000000..34ecf22b32d36
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_IRQ_H_
+#define _XE_IRQ_H_
+
+struct xe_device;
+struct xe_gt;
+
+int xe_irq_install(struct xe_device *xe);
+void xe_gt_irq_postinstall(struct xe_gt *gt);
+void xe_irq_shutdown(struct xe_device *xe);
+void xe_irq_suspend(struct xe_device *xe);
+void xe_irq_resume(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
new file mode 100644
index 0000000000000..056c2c5a0b818
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_lrc.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine_types.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_hw_fence.h"
+#include "xe_vm.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_regs.h"
+
+#define GEN8_CTX_VALID				(1 << 0)
+#define GEN8_CTX_L3LLC_COHERENT			(1 << 5)
+#define GEN8_CTX_PRIVILEGE			(1 << 8)
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT		3
+#define INTEL_LEGACY_64B_CONTEXT		3
+
+#define GEN11_ENGINE_CLASS_SHIFT		61
+#define GEN11_ENGINE_INSTANCE_SHIFT		48
+
+static struct xe_device *
+lrc_to_xe(struct xe_lrc *lrc)
+{
+	return gt_to_xe(lrc->fence_ctx.gt);
+}
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+	case XE_ENGINE_CLASS_COMPUTE:
+		/* 14 pages since graphics_ver == 11 */
+		return 14 * SZ_4K;
+	default:
+		WARN(1, "Unknown engine class: %d", class);
+		fallthrough;
+	case XE_ENGINE_CLASS_COPY:
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return 2 * SZ_4K;
+	}
+}
+
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ *      MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ *        MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ *      follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
+static void set_offsets(u32 *regs,
+			const u8 *data,
+			const struct xe_hw_engine *hwe)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | \
+			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+	(((x) >> 2) & 0x7f)
+#define END 0
+{
+	const u32 base = hwe->mmio_base;
+
+	while (*data) {
+		u8 count, flags;
+
+		if (*data & BIT(7)) { /* skip */
+			count = *data++ & ~BIT(7);
+			regs += count;
+			continue;
+		}
+
+		count = *data & 0x3f;
+		flags = *data >> 6;
+		data++;
+
+		*regs = MI_LOAD_REGISTER_IMM(count);
+		if (flags & POSTED)
+			*regs |= MI_LRI_FORCE_POSTED;
+		*regs |= MI_LRI_LRM_CS_MMIO;
+		regs++;
+
+		XE_BUG_ON(!count);
+		do {
+			u32 offset = 0;
+			u8 v;
+
+			do {
+				v = *data++;
+				offset <<= 7;
+				offset |= v & ~BIT(7);
+			} while (v & BIT(7));
+
+			regs[0] = base + (offset << 2);
+			regs += 2;
+		} while (--count);
+	}
+
+	*regs = MI_BATCH_BUFFER_END | BIT(0);
+}
+
+static const u8 gen12_xcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 dg2_xcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 gen12_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+	NOP(3 + 9 + 1),
+
+	LRI(51, POSTED),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG(0x028),
+	REG(0x09c),
+	REG(0x0c0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x068),
+	REG(0x084),
+	NOP(1),
+
+	END
+};
+
+static const u8 xehp_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 dg2_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 mtl_rcs_offsets[] = {
+       NOP(1),
+       LRI(15, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+       REG(0x120),
+       REG(0x124),
+
+       NOP(1),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(2),
+       LRI(2, POSTED),
+       REG16(0x5a8),
+       REG16(0x5ac),
+
+       NOP(6),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
+{
+	if (class == XE_ENGINE_CLASS_RENDER) {
+		if (GRAPHICS_VERx100(xe) >= 1270)
+			return mtl_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1250)
+			return xehp_rcs_offsets;
+		else
+			return gen12_rcs_offsets;
+	} else {
+		if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_xcs_offsets;
+		else
+			return gen12_xcs_offsets;
+	}
+}
+
+static void set_context_control(u32 * regs, struct xe_hw_engine *hwe)
+{
+	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
+				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+				    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+
+	/* TODO: Timestamp */
+}
+
+static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 0x70;
+	else
+		return 0x60;
+}
+
+static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
+{
+	int x;
+
+	x = lrc_ring_mi_mode(hwe);
+	regs[x + 1] &= ~STOP_RING;
+	regs[x + 1] |= STOP_RING << 16;
+}
+
+static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
+{
+	return 0;
+}
+
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
+{
+	return lrc->ring.size;
+}
+
+/* Make the magic macros work */
+#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
+
+#define LRC_SEQNO_PPHWSP_OFFSET 512
+#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8
+#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_PPHWSP_SIZE SZ_4K
+
+static size_t lrc_reg_size(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 96 * sizeof(u32);
+	else
+		return 80 * sizeof(u32);
+}
+
+size_t xe_lrc_skip_size(struct xe_device *xe)
+{
+	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+}
+
+static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The start seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
+{
+	/* The parallel is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
+{
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
+}
+
+#define DECL_MAP_ADDR_HELPERS(elem) \
+static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
+{ \
+	struct iosys_map map = lrc->bo->vmap; \
+\
+	XE_BUG_ON(iosys_map_is_null(&map)); \
+	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
+	return map; \
+} \
+static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+{ \
+	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
+} \
+
+DECL_MAP_ADDR_HELPERS(ring)
+DECL_MAP_ADDR_HELPERS(pphwsp)
+DECL_MAP_ADDR_HELPERS(seqno)
+DECL_MAP_ADDR_HELPERS(regs)
+DECL_MAP_ADDR_HELPERS(start_seqno)
+DECL_MAP_ADDR_HELPERS(parallel)
+
+#undef DECL_MAP_ADDR_HELPERS
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_pphwsp_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	return xe_map_read32(xe, &map);
+}
+
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	xe_map_write32(xe, &map, val);
+}
+
+static void *empty_lrc_data(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+	void *data;
+	u32 *regs;
+
+	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+	if (!data)
+		return NULL;
+
+	/* 1st page: Per-Process of HW status Page */
+	regs = data + LRC_PPHWSP_SIZE;
+	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+	set_context_control(regs, hwe);
+	reset_stop_ring(regs, hwe);
+
+	return data;
+}
+
+static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
+{
+	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt);
+
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
+}
+
+#define PVC_CTX_ASID		(0x2e + 1)
+#define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
+#define ACC_GRANULARITY_S       20
+#define ACC_NOTIFY_S            16
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_engine *e, struct xe_vm *vm, u32 ring_size)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct iosys_map map;
+	void *init_data = NULL;
+	u32 arb_enable;
+	int err;
+
+	lrc->flags = 0;
+
+	lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm,
+				      ring_size + xe_lrc_size(xe, hwe->class),
+				      ttm_bo_type_kernel,
+				      XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) |
+				      XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(lrc->bo))
+		return PTR_ERR(lrc->bo);
+
+	if (xe_gt_is_media_type(hwe->gt))
+		lrc->full_gt = xe_find_full_gt(hwe->gt);
+	else
+		lrc->full_gt = hwe->gt;
+
+	/*
+	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
+	 * via VM bind calls.
+	 */
+	err = xe_bo_pin(lrc->bo);
+	if (err)
+		goto err_unlock_put_bo;
+	lrc->flags |= XE_LRC_PINNED;
+
+	err = xe_bo_vmap(lrc->bo);
+	if (err)
+		goto err_unpin_bo;
+
+	xe_bo_unlock_vm_held(lrc->bo);
+
+	lrc->ring.size = ring_size;
+	lrc->ring.tail = 0;
+
+	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
+			     hwe->fence_irq, hwe->name);
+
+	if (!gt->default_lrc[hwe->class]) {
+		init_data = empty_lrc_data(hwe);
+		if (!init_data) {
+			xe_lrc_finish(lrc);
+			return -ENOMEM;
+		}
+	}
+
+	/*
+	 * Init Per-Process of HW status Page, LRC / context state to known
+	 * values
+	 */
+	map = __xe_lrc_pphwsp_map(lrc);
+	if (!init_data) {
+		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
+		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
+				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
+				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+	} else {
+		xe_map_memcpy_to(xe, &map, 0, init_data,
+				 xe_lrc_size(xe, hwe->class));
+		kfree(init_data);
+	}
+
+	if (vm)
+		xe_lrc_set_ppgtt(lrc, vm);
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	if (xe->info.supports_usm && vm) {
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
+				     (e->usm.acc_granularity <<
+				      ACC_GRANULARITY_S) | vm->usm.asid);
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
+				     (e->usm.acc_notify << ACC_NOTIFY_S) |
+				     e->usm.acc_trigger);
+	}
+
+	lrc->desc = GEN8_CTX_VALID;
+	lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	/* TODO: Priority */
+
+	/* While this appears to have something about privileged batches or
+	 * some such, it really just means PPGTT mode.
+	 */
+	if (vm)
+		lrc->desc |= GEN8_CTX_PRIVILEGE;
+
+	if (GRAPHICS_VERx100(xe) < 1250) {
+		lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT;
+		lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT;
+	}
+
+	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+
+	return 0;
+
+err_unpin_bo:
+	if (lrc->flags & XE_LRC_PINNED)
+		xe_bo_unpin(lrc->bo);
+err_unlock_put_bo:
+	xe_bo_unlock_vm_held(lrc->bo);
+	xe_bo_put(lrc->bo);
+	return err;
+}
+
+void xe_lrc_finish(struct xe_lrc *lrc)
+{
+	struct ww_acquire_ctx ww;
+
+	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
+	if (lrc->flags & XE_LRC_PINNED) {
+		if (lrc->bo->vm)
+			xe_vm_lock(lrc->bo->vm, &ww, 0, false);
+		else
+			xe_bo_lock_no_vm(lrc->bo, NULL);
+		xe_bo_unpin(lrc->bo);
+		if (lrc->bo->vm)
+			xe_vm_unlock(lrc->bo->vm, &ww);
+		else
+			xe_bo_unlock_no_vm(lrc->bo);
+	}
+	xe_bo_put(lrc->bo);
+}
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
+{
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+}
+
+u32 xe_lrc_ring_head(struct xe_lrc *lrc)
+{
+	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+}
+
+u32 xe_lrc_ring_space(struct xe_lrc *lrc)
+{
+	const u32 head = xe_lrc_ring_head(lrc);
+	const u32 tail = lrc->ring.tail;
+	const u32 size = lrc->ring.size;
+
+	return ((head - tail - 1) & (size - 1)) + 1;
+}
+
+static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
+				const void *data, size_t size)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+
+	iosys_map_incr(&ring, lrc->ring.tail);
+	xe_map_memcpy_to(xe, &ring, 0, data, size);
+	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
+}
+
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
+{
+	struct iosys_map ring;
+	u32 rhs;
+	size_t aligned_size;
+
+	XE_BUG_ON(!IS_ALIGNED(size, 4));
+	aligned_size = ALIGN(size, 8);
+
+	ring = __xe_lrc_ring_map(lrc);
+
+	XE_BUG_ON(lrc->ring.tail >= lrc->ring.size);
+	rhs = lrc->ring.size - lrc->ring.tail;
+	if (size > rhs) {
+		__xe_lrc_write_ring(lrc, ring, data, rhs);
+		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
+	} else {
+		__xe_lrc_write_ring(lrc, ring, data, size);
+	}
+
+	if (aligned_size > size) {
+		u32 noop = MI_NOOP;
+
+		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
+	}
+}
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc)
+{
+	return lrc->desc | xe_lrc_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_seqno_ggtt_addr(lrc);
+}
+
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
+{
+	return &xe_hw_fence_create(&lrc->fence_ctx,
+				   __xe_lrc_seqno_map(lrc))->dma;
+}
+
+s32 xe_lrc_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_start_seqno_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_ggtt_addr(lrc);
+}
+
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_map(lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
new file mode 100644
index 0000000000000..e37f89e75ef80
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _XE_LRC_H_
+#define _XE_LRC_H_
+
+#include "xe_lrc_types.h"
+
+struct xe_device;
+struct xe_engine;
+enum xe_engine_class;
+struct xe_hw_engine;
+struct xe_vm;
+
+#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_engine *e, struct xe_vm *vm, u32 ring_size);
+void xe_lrc_finish(struct xe_lrc *lrc);
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
+u32 xe_lrc_ring_head(struct xe_lrc *lrc);
+u32 xe_lrc_ring_space(struct xe_lrc *lrc);
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
+u32 *xe_lrc_regs(struct xe_lrc *lrc);
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc);
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
+s32 xe_lrc_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc);
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
+
+size_t xe_lrc_skip_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
new file mode 100644
index 0000000000000..2827efa2091d2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_LRC_TYPES_H_
+#define _XE_LRC_TYPES_H_
+
+#include "xe_hw_fence_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_lrc - Logical ring context (LRC) and submission ring object
+ */
+struct xe_lrc {
+	/**
+	 * @bo: buffer object (memory) for logical ring context, per process HW
+	 * status page, and submission ring.
+	 */
+	struct xe_bo *bo;
+
+	/** @full_gt: full GT which this LRC belongs to */
+	struct xe_gt *full_gt;
+
+	/** @flags: LRC flags */
+	u32 flags;
+#define XE_LRC_PINNED BIT(1)
+
+	/** @ring: submission ring state */
+	struct {
+		/** @size: size of submission ring */
+		u32 size;
+		/** @tail: tail of submission ring */
+		u32 tail;
+		/** @old_tail: shadow of tail */
+		u32 old_tail;
+	} ring;
+
+	/** @desc: LRC descriptor */
+	u64 desc;
+
+	/** @fence_ctx: context for hw fence */
+	struct xe_hw_fence_ctx fence_ctx;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
new file mode 100644
index 0000000000000..0d24c124d2025
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MACROS_H_
+#define _XE_MACROS_H_
+
+#include <linux/bug.h>
+
+#define XE_EXTRA_DEBUG 1
+#define XE_WARN_ON WARN_ON
+#define XE_BUG_ON BUG_ON
+
+#define XE_IOCTL_ERR(xe, cond) \
+	((cond) && (drm_info(&(xe)->drm, \
+			    "Ioctl argument check failed at %s:%d: %s", \
+			    __FILE__, __LINE__, #cond), 1))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
new file mode 100644
index 0000000000000..0bac1f73a80d6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_MAP_H__
+#define __XE_MAP_H__
+
+#include <linux/iosys-map.h>
+
+#include <xe_device.h>
+
+/**
+ * DOC: Map layer
+ *
+ * All access to any memory shared with a device (both sysmem and vram) in the
+ * XE driver should go through this layer (xe_map). This layer is built on top
+ * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
+ * and with extra hooks into the XE driver that allows adding asserts to memory
+ * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
+ */
+
+static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst,
+				    size_t dst_offset, const void *src,
+				    size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_to(dst, dst_offset, src, len);
+}
+
+static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst,
+				      const struct iosys_map *src,
+				      size_t src_offset, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_from(dst, src, src_offset, len);
+}
+
+static inline void xe_map_memset(struct xe_device *xe,
+				 struct iosys_map *dst, size_t offset,
+				 int value, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memset(dst, offset, value, len);
+}
+
+/* FIXME: We likely should kill these two functions sooner or later */
+static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		return readl(map->vaddr_iomem);
+	else
+		return READ_ONCE(*(u32 *)map->vaddr);
+}
+
+static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map,
+				  u32 val)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		writel(val, map->vaddr_iomem);
+	else
+		*(u32 *)map->vaddr = val;
+}
+
+#define xe_map_rd(xe__, map__, offset__, type__) ({			\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd(map__, offset__, type__);				\
+})
+
+#define xe_map_wr(xe__, map__, offset__, type__, val__) ({		\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr(map__, offset__, type__, val__);			\
+})
+
+#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd_field(map__, struct_offset__, struct_type__, field__);		\
+})
+
+#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__);	\
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
new file mode 100644
index 0000000000000..7fc40e8009c30
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -0,0 +1,1168 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+#include "xe_migrate.h"
+
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_engine.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+#include <linux/sizes.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "gt/intel_gpu_commands.h"
+
+struct xe_migrate {
+	struct xe_engine *eng;
+	struct xe_gt *gt;
+	struct mutex job_mutex;
+	struct xe_bo *pt_bo;
+	struct xe_bo *cleared_bo;
+	u64 batch_base_ofs;
+	u64 usm_batch_base_ofs;
+	u64 cleared_vram_ofs;
+	struct dma_fence *fence;
+	struct drm_suballoc_manager vm_update_sa;
+};
+
+#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
+#define NUM_KERNEL_PDE 17
+#define NUM_PT_SLOTS 32
+#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
+
+struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt)
+{
+	return gt->migrate->eng;
+}
+
+static void xe_migrate_fini(struct drm_device *dev, void *arg)
+{
+	struct xe_migrate *m = arg;
+	struct ww_acquire_ctx ww;
+
+	xe_vm_lock(m->eng->vm, &ww, 0, false);
+	xe_bo_unpin(m->pt_bo);
+	if (m->cleared_bo)
+		xe_bo_unpin(m->cleared_bo);
+	xe_vm_unlock(m->eng->vm, &ww);
+
+	dma_fence_put(m->fence);
+	if (m->cleared_bo)
+		xe_bo_put(m->cleared_bo);
+	xe_bo_put(m->pt_bo);
+	drm_suballoc_manager_fini(&m->vm_update_sa);
+	mutex_destroy(&m->job_mutex);
+	xe_vm_close_and_put(m->eng->vm);
+	xe_engine_put(m->eng);
+}
+
+static u64 xe_migrate_vm_addr(u64 slot, u32 level)
+{
+	XE_BUG_ON(slot >= NUM_PT_SLOTS);
+
+	/* First slot is reserved for mapping of PT bo and bb, start from 1 */
+	return (slot + 1ULL) << xe_pt_shift(level + 1);
+}
+
+static u64 xe_migrate_vram_ofs(u64 addr)
+{
+	return addr + (256ULL << xe_pt_shift(2));
+}
+
+/*
+ * For flat CCS clearing we need a cleared chunk of memory to copy from,
+ * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy
+ * (it clears on only 14 bytes in each chunk of 16).
+ * If clearing the main surface one can use the part of the main surface
+ * already cleared, but for clearing as part of copying non-compressed
+ * data out of system memory, we don't readily have a cleared part of
+ * VRAM to copy from, so create one to use for that case.
+ */
+static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
+{
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = vm->xe;
+	size_t cleared_size;
+	u64 vram_addr;
+	bool is_vram;
+
+	if (!xe_device_has_flat_ccs(xe))
+		return 0;
+
+	cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER);
+	cleared_size = PAGE_ALIGN(cleared_size);
+	m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size,
+					     ttm_bo_type_kernel,
+					     XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					     XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(m->cleared_bo))
+		return PTR_ERR(m->cleared_bo);
+
+	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
+	vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram);
+	XE_BUG_ON(!is_vram);
+	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
+
+	return 0;
+}
+
+static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
+				 struct xe_vm *vm)
+{
+	u8 id = gt->info.id;
+	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+	u32 map_ofs, level, i;
+	struct xe_device *xe = gt_to_xe(m->gt);
+	struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo;
+	u64 entry;
+	int ret;
+
+	/* Can't bump NUM_PT_SLOTS too high */
+	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE);
+	/* Must be a multiple of 64K to support all platforms */
+	BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K);
+	/* And one slot reserved for the 4KiB page table updates */
+	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
+
+	/* Need to be sure everything fits in the first PT, or create more */
+	XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M);
+
+	bo = xe_bo_create_pin_map(vm->xe, m->gt, vm,
+				  num_entries * GEN8_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ret = xe_migrate_create_cleared_bo(m, vm);
+	if (ret) {
+		xe_bo_put(bo);
+		return ret;
+	}
+
+	entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB);
+	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
+
+	map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE;
+
+	/* Map the entire BO in our level 0 pt */
+	for (i = 0, level = 0; i < num_entries; level++) {
+		entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE,
+					XE_CACHE_WB, 0, 0);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
+
+		if (vm->flags & XE_VM_FLAGS_64K)
+			i += 16;
+		else
+			i += 1;
+	}
+
+	if (!IS_DGFX(xe)) {
+		XE_BUG_ON(xe->info.supports_usm);
+
+		/* Write out batch too */
+		m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE;
+		for (i = 0; i < batch->size;
+		     i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE :
+			     GEN8_PAGE_SIZE) {
+			entry = gen8_pte_encode(NULL, batch, i,
+						XE_CACHE_WB, 0, 0);
+
+			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+				  entry);
+			level++;
+		}
+	} else {
+		bool is_lmem;
+		u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem);
+
+		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+
+		if (xe->info.supports_usm) {
+			batch = gt->usm.bb_pool.bo;
+			batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE,
+						&is_lmem);
+			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+		}
+	}
+
+	for (level = 1; level < num_level; level++) {
+		u32 flags = 0;
+
+		if (vm->flags & XE_VM_FLAGS_64K && level == 1)
+			flags = GEN12_PDE_64K;
+
+		entry = gen8_pde_encode(bo, map_ofs + (level - 1) *
+					GEN8_PAGE_SIZE, XE_CACHE_WB);
+		xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64,
+			  entry | flags);
+	}
+
+	/* Write PDE's that point to our BO. */
+	for (i = 0; i < num_entries - num_level; i++) {
+		entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE,
+					XE_CACHE_WB);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE +
+			  (i + 1) * 8, u64, entry);
+	}
+
+	/* Identity map the entire vram at 256GiB offset */
+	if (IS_DGFX(xe)) {
+		u64 pos, ofs, flags;
+
+		level = 2;
+		ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8;
+		flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED |
+			GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G;
+
+		/*
+		 * Use 1GB pages, it shouldn't matter the physical amount of
+		 * vram is less, when we don't access it.
+		 */
+		for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8)
+			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+	}
+
+	/*
+	 * Example layout created above, with root level = 3:
+	 * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
+	 * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
+	 * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
+	 * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+	 *
+	 * This makes the lowest part of the VM point to the pagetables.
+	 * Hence the lowest 2M in the vm should point to itself, with a few writes
+	 * and flushes, other parts of the VM can be used either for copying and
+	 * clearing.
+	 *
+	 * For performance, the kernel reserves PDE's, so about 20 are left
+	 * for async VM updates.
+	 *
+	 * To make it easier to work, each scratch PT is put in slot (1 + PT #)
+	 * everywhere, this allows lockless updates to scratch pages by using
+	 * the different addresses in VM.
+	 */
+#define NUM_VMUSA_UNIT_PER_PAGE	32
+#define VM_SA_UPDATE_UNIT_SIZE	(GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
+#define NUM_VMUSA_WRITES_PER_UNIT	(VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
+	drm_suballoc_manager_init(&m->vm_update_sa,
+				  (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) *
+				  NUM_VMUSA_UNIT_PER_PAGE, 0);
+
+	m->pt_bo = bo;
+	return 0;
+}
+
+struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_migrate *m;
+	struct xe_vm *vm;
+	struct ww_acquire_ctx ww;
+	int err;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	m->gt = gt;
+
+	/* Special layout, prepared below.. */
+	vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
+			  XE_VM_FLAG_SET_GT_ID(gt));
+	if (IS_ERR(vm))
+		return ERR_CAST(vm);
+
+	xe_vm_lock(vm, &ww, 0, false);
+	err = xe_migrate_prepare_vm(gt, m, vm);
+	xe_vm_unlock(vm, &ww);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return ERR_PTR(err);
+	}
+
+	if (xe->info.supports_usm) {
+		struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
+							   XE_ENGINE_CLASS_COPY,
+							   gt->usm.reserved_bcs_instance,
+							   false);
+		if (!hwe)
+			return ERR_PTR(-EINVAL);
+
+		m->eng = xe_engine_create(xe, vm,
+					  BIT(hwe->logical_instance), 1,
+					  hwe, ENGINE_FLAG_KERNEL);
+	} else {
+		m->eng = xe_engine_create_class(xe, gt, vm,
+						XE_ENGINE_CLASS_COPY,
+						ENGINE_FLAG_KERNEL);
+	}
+	if (IS_ERR(m->eng)) {
+		xe_vm_close_and_put(vm);
+		return ERR_CAST(m->eng);
+	}
+
+	mutex_init(&m->job_mutex);
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+	if (err)
+		return ERR_PTR(err);
+
+	return m;
+}
+
+static void emit_arb_clear(struct xe_bb *bb)
+{
+	/* 1 dword */
+	bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+}
+
+static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur)
+{
+	/*
+	 * For VRAM we use identity mapped pages so we are limited to current
+	 * cursor size. For system we program the pages ourselves so we have no
+	 * such limitation.
+	 */
+	return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER,
+		     mem_type_is_vram(cur->mem_type) ? cur->size :
+		     cur->remaining);
+}
+
+static u32 pte_update_size(struct xe_migrate *m,
+			   bool is_vram,
+			   struct xe_res_cursor *cur,
+			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
+			   u32 cmd_size, u32 pt_ofs, u32 avail_pts)
+{
+	u32 cmds = 0;
+
+	*L0_pt = pt_ofs;
+	if (!is_vram) {
+		/* Clip L0 to available size */
+		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
+		u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+
+		*L0 = size;
+		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
+
+		/* MI_STORE_DATA_IMM */
+		cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+
+		/* PDE qwords */
+		cmds += num_4k_pages * 2;
+
+		/* Each chunk has a single blit command */
+		cmds += cmd_size;
+	} else {
+		/* Offset into identity map. */
+		*L0_ofs = xe_migrate_vram_ofs(cur->start);
+		cmds += cmd_size;
+	}
+
+	return cmds;
+}
+
+static void emit_pte(struct xe_migrate *m,
+		     struct xe_bb *bb, u32 at_pt,
+		     bool is_vram,
+		     struct xe_res_cursor *cur,
+		     u32 size, struct xe_bo *bo)
+{
+	u32 ptes;
+	u64 ofs = at_pt * GEN8_PAGE_SIZE;
+	u64 cur_ofs;
+
+	/*
+	 * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
+	 * we're only emitting VRAM PTEs during sanity tests, so when
+	 * that's moved to a Kunit test, we should condition VRAM PTEs
+	 * on running tests.
+	 */
+
+	ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+
+	while (ptes) {
+		u32 chunk = min(0x1ffU, ptes);
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+			(chunk * 2 + 1);
+		bb->cs[bb->len++] = ofs;
+		bb->cs[bb->len++] = 0;
+
+		cur_ofs = ofs;
+		ofs += chunk * 8;
+		ptes -= chunk;
+
+		while (chunk--) {
+			u64 addr;
+
+			XE_BUG_ON(cur->start & (PAGE_SIZE - 1));
+
+			if (is_vram) {
+				addr = cur->start;
+
+				/* Is this a 64K PTE entry? */
+				if ((m->eng->vm->flags & XE_VM_FLAGS_64K) &&
+				    !(cur_ofs & (16 * 8 - 1))) {
+					XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
+					addr |= GEN12_PTE_PS64;
+				}
+
+				addr |= GEN12_PPGTT_PTE_LM;
+			} else {
+				addr = xe_res_dma(cur);
+			}
+			addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+
+			xe_res_next(cur, PAGE_SIZE);
+			cur_ofs += 8;
+		}
+	}
+}
+
+#define EMIT_COPY_CCS_DW 5
+static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
+			  u64 dst_ofs, bool dst_is_indirect,
+			  u64 src_ofs, bool src_is_indirect,
+			  u32 size)
+{
+	u32 *cs = bb->cs + bb->len;
+	u32 num_ccs_blks;
+	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+
+	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
+				    NUM_CCS_BYTES_PER_BLOCK);
+	XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
+	*cs++ = XY_CTRL_SURF_COPY_BLT |
+		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
+		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
+		((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs) |
+		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+	*cs++ = lower_32_bits(dst_ofs);
+	*cs++ = upper_32_bits(dst_ofs) |
+		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+
+	bb->len = cs - bb->cs;
+}
+
+#define EMIT_COPY_DW 10
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+		      u64 src_ofs, u64 dst_ofs, unsigned int size,
+		      unsigned pitch)
+{
+	XE_BUG_ON(size / pitch > S16_MAX);
+	XE_BUG_ON(pitch / 4 > S16_MAX);
+	XE_BUG_ON(pitch > U16_MAX);
+
+	bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
+	bb->cs[bb->len++] = BLT_DEPTH_32 | pitch;
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
+	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = pitch;
+	bb->cs[bb->len++] = lower_32_bits(src_ofs);
+	bb->cs[bb->len++] = upper_32_bits(src_ofs);
+}
+
+static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
+			enum dma_resv_usage usage)
+{
+	return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
+}
+
+static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
+{
+	return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
+}
+
+static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
+			       struct xe_bb *bb,
+			       u64 src_ofs, bool src_is_vram,
+			       u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+			       u64 ccs_ofs, bool copy_ccs)
+{
+	struct xe_gt *gt = m->gt;
+	u32 flush_flags = 0;
+
+	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+		/*
+		 * If the bo doesn't have any CCS metadata attached, we still
+		 * need to clear it for security reasons.
+		 */
+		emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false,
+			      dst_size);
+		flush_flags = MI_FLUSH_DW_CCS;
+	} else if (copy_ccs) {
+		if (!src_is_vram)
+			src_ofs = ccs_ofs;
+		else if (!dst_is_vram)
+			dst_ofs = ccs_ofs;
+
+		/*
+		 * At the moment, we don't support copying CCS metadata from
+		 * system to system.
+		 */
+		XE_BUG_ON(!src_is_vram && !dst_is_vram);
+
+		emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
+			      src_is_vram, dst_size);
+		if (dst_is_vram)
+			flush_flags = MI_FLUSH_DW_CCS;
+	}
+
+	return flush_flags;
+}
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst)
+{
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = bo->size;
+	struct xe_res_cursor src_it, dst_it, ccs_it;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int pass = 0;
+	int err;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+	bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo);
+	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+	else
+		xe_res_first(src, 0, bo->size, &src_it);
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it);
+	else
+		xe_res_first(dst, 0, bo->size, &dst_it);
+
+	if (copy_system_ccs)
+		xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo),
+				PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+				&ccs_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags;
+		u32 update_idx;
+		u64 ccs_ofs, ccs_size;
+		u32 ccs_pt;
+		bool usm = xe->info.supports_usm;
+
+		src_L0 = xe_migrate_res_sizes(&src_it);
+		dst_L0 = xe_migrate_res_sizes(&dst_it);
+
+		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
+			pass++, src_L0, dst_L0);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      NUM_PT_PER_BLIT);
+
+		batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      NUM_PT_PER_BLIT, NUM_PT_PER_BLIT);
+
+		if (copy_system_ccs) {
+			ccs_size = xe_device_ccs_bytes(xe, src_L0);
+			batch_size += pte_update_size(m, false, &ccs_it, &ccs_size,
+						      &ccs_ofs, &ccs_pt, 0,
+						      2 * NUM_PT_PER_BLIT,
+						      NUM_PT_PER_BLIT);
+		}
+
+		/* Add copy commands size here */
+		batch_size += EMIT_COPY_DW +
+			(xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		/* Preemption is enabled again by the ring ops. */
+		if (!src_is_vram || !dst_is_vram)
+			emit_arb_clear(bb);
+
+		if (!src_is_vram)
+			emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0,
+				 bo);
+		else
+			xe_res_next(&src_it, src_L0);
+
+		if (!dst_is_vram)
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0,
+				 bo);
+		else
+			xe_res_next(&dst_it, src_L0);
+
+		if (copy_system_ccs)
+			emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE);
+		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
+						  dst_L0_ofs, dst_is_vram,
+						  src_L0, ccs_ofs, copy_ccs);
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->eng, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			err = job_add_deps(job, bo->ttm.base.resv,
+					   DMA_RESV_USAGE_BOOKKEEP);
+			if (err)
+				goto err_job;
+		}
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		/* Sync partial copy if any. */
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+		      u32 size, u32 pitch, u32 value, bool is_vram)
+{
+	u32 *cs = bb->cs + bb->len;
+	u32 len = XY_FAST_COLOR_BLT_DW;
+	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+
+	if (GRAPHICS_VERx100(gt->xe) < 1250)
+		len = 11;
+
+	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+		(len - 2);
+	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
+		(pitch - 1);
+	*cs++ = 0;
+	*cs++ = (size / pitch) << 16 | pitch / 4;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+	*cs++ = value;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	if (len > 11) {
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	XE_BUG_ON(cs - bb->cs != len + bb->len);
+	bb->len += len;
+
+	return 0;
+}
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst,
+				   u32 value)
+{
+	bool clear_vram = mem_type_is_vram(dst->mem_type);
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = bo->size;
+	struct xe_res_cursor src_it;
+	struct ttm_resource *src = dst;
+	int err;
+	int pass = 0;
+
+	if (!clear_vram)
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+	else
+		xe_res_first(src, 0, bo->size, &src_it);
+
+	while (size) {
+		u64 clear_L0_ofs;
+		u32 clear_L0_pt;
+		u32 flush_flags = 0;
+		u64 clear_L0;
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 batch_size, update_idx;
+		bool usm = xe->info.supports_usm;
+
+		clear_L0 = xe_migrate_res_sizes(&src_it);
+		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
+
+		/* Calculate final sizes and batch size.. */
+		batch_size = 2 +
+			pte_update_size(m, clear_vram, &src_it,
+					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
+					XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+		if (xe_device_has_flat_ccs(xe) && clear_vram)
+			batch_size += EMIT_COPY_CCS_DW;
+
+		/* Clear commands */
+
+		if (WARN_ON_ONCE(!clear_L0))
+			break;
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		size -= clear_L0;
+
+		/* TODO: Add dependencies here */
+
+		/* Preemption is enabled again by the ring ops. */
+		if (!clear_vram) {
+			emit_arb_clear(bb);
+			emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
+				 bo);
+		} else {
+			xe_res_next(&src_it, clear_L0);
+		}
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
+			   value, clear_vram);
+		if (xe_device_has_flat_ccs(xe) && clear_vram) {
+			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
+				      m->cleared_vram_ofs, false, clear_L0);
+			flush_flags = MI_FLUSH_DW_CCS;
+		}
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->eng, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		continue;
+
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+err_sync:
+		/* Sync partial copies if any. */
+		if (fence) {
+			dma_fence_wait(m->fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
+			  const struct xe_vm_pgtable_update *update,
+			  struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	u32 chunk;
+	u32 ofs = update->ofs, size = update->qwords;
+
+	/*
+	 * If we have 512 entries (max), we would populate it ourselves,
+	 * and update the PDE above it to the new pointer.
+	 * The only time this can only happen if we have to update the top
+	 * PDE. This requires a BO that is almost vm->size big.
+	 *
+	 * This shouldn't be possible in practice.. might change when 16K
+	 * pages are used. Hence the BUG_ON.
+	 */
+	XE_BUG_ON(update->qwords > 0x1ff);
+	if (!ppgtt_ofs) {
+		bool is_lmem;
+
+		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
+							   GEN8_PAGE_SIZE,
+							   &is_lmem));
+		XE_BUG_ON(!is_lmem);
+	}
+
+	do {
+		u64 addr = ppgtt_ofs + ofs * 8;
+		chunk = min(update->qwords, 0x1ffU);
+
+		/* Ensure populatefn can do memset64 by aligning bb->cs */
+		if (!(bb->len & 1))
+			bb->cs[bb->len++] = MI_NOOP;
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+			(chunk * 2 + 1);
+		bb->cs[bb->len++] = lower_32_bits(addr);
+		bb->cs[bb->len++] = upper_32_bits(addr);
+		ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk,
+			      update);
+
+		bb->len += chunk * 2;
+		ofs += chunk;
+		size -= chunk;
+	} while (size);
+}
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
+{
+	return xe_vm_get(m->eng->vm);
+}
+
+static struct dma_fence *
+xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
+			       struct xe_vm *vm, struct xe_bo *bo,
+			       const struct  xe_vm_pgtable_update *updates,
+			       u32 num_updates, bool wait_vm,
+			       struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct dma_fence *fence;
+	int err;
+	u32 i;
+
+	/* Wait on BO moves for 10 ms, then fall back to GPU job */
+	if (bo) {
+		long wait;
+
+		wait = dma_resv_wait_timeout(bo->ttm.base.resv,
+					     DMA_RESV_USAGE_KERNEL,
+					     true, HZ / 100);
+		if (wait <= 0)
+			return ERR_PTR(-ETIME);
+	}
+	if (wait_vm) {
+		long wait;
+
+		wait = dma_resv_wait_timeout(&vm->resv,
+					     DMA_RESV_USAGE_BOOKKEEP,
+					     true, HZ / 100);
+		if (wait <= 0)
+			return ERR_PTR(-ETIME);
+	}
+
+	if (ops->pre_commit) {
+		err = ops->pre_commit(pt_update);
+		if (err)
+			return ERR_PTR(err);
+	}
+	for (i = 0; i < num_updates; i++) {
+		const struct xe_vm_pgtable_update *update = &updates[i];
+
+		ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL,
+			      update->ofs, update->qwords, update);
+	}
+
+	trace_xe_vm_cpu_bind(vm);
+	xe_device_wmb(vm->xe);
+
+	fence = dma_fence_get_stub();
+
+	return fence;
+}
+
+static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		struct dma_fence *fence = syncs[i].fence;
+
+		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				       &fence->flags))
+			return false;
+	}
+
+	return true;
+}
+
+static bool engine_is_idle(struct xe_engine *e)
+{
+	return !e || e->lrc[0].fence_ctx.next_seqno == 1 ||
+		xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno;
+}
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_engine *eng,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct xe_gt *gt = m->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	struct drm_suballoc *sa_bo = NULL;
+	struct xe_vma *vma = pt_update->vma;
+	struct xe_bb *bb;
+	u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
+	u64 addr;
+	int err = 0;
+	bool usm = !eng && xe->info.supports_usm;
+	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
+
+	/* Use the CPU if no in syncs and engine is idle */
+	if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) {
+		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
+							num_updates,
+							first_munmap_rebind,
+							pt_update);
+		if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN))
+			return fence;
+	}
+
+	/* fixed + PTE entries */
+	if (IS_DGFX(xe))
+		batch_size = 2;
+	else
+		batch_size = 6 + num_updates * 2;
+
+	for (i = 0; i < num_updates; i++) {
+		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+
+		/* align noop + MI_STORE_DATA_IMM cmd prefix */
+		batch_size += 4 * num_cmds + updates[i].qwords * 2;
+	}
+
+	/*
+	 * XXX: Create temp bo to copy from, if batch_size becomes too big?
+	 *
+	 * Worst case: Sum(2 * (each lower level page size) + (top level page size))
+	 * Should be reasonably bound..
+	 */
+	XE_BUG_ON(batch_size >= SZ_128K);
+
+	bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
+	if (IS_ERR(bb))
+		return ERR_CAST(bb);
+
+	/* For sysmem PTE's, need to map them in our hole.. */
+	if (!IS_DGFX(xe)) {
+		ppgtt_ofs = NUM_KERNEL_PDE - 1;
+		if (eng) {
+			XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
+
+			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
+						 GFP_KERNEL, true, 0);
+			if (IS_ERR(sa_bo)) {
+				err = PTR_ERR(sa_bo);
+				goto err;
+			}
+
+			ppgtt_ofs = NUM_KERNEL_PDE +
+				(drm_suballoc_soffset(sa_bo) /
+				 NUM_VMUSA_UNIT_PER_PAGE);
+			page_ofs = (drm_suballoc_soffset(sa_bo) %
+				    NUM_VMUSA_UNIT_PER_PAGE) *
+				VM_SA_UPDATE_UNIT_SIZE;
+		}
+
+		/* Preemption is enabled again by the ring ops. */
+		emit_arb_clear(bb);
+
+		/* Map our PT's to gtt */
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+			(num_updates * 2 + 1);
+		bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs;
+		bb->cs[bb->len++] = 0; /* upper_32_bits */
+
+		for (i = 0; i < num_updates; i++) {
+			struct xe_bo *pt_bo = updates[i].pt_bo;
+
+			BUG_ON(pt_bo->size != SZ_4K);
+
+			addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
+					       0, 0);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+		}
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
+			(page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE;
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE,
+				      &updates[i], pt_update);
+	} else {
+		/* phys pages, no preamble required */
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		/* Preemption is enabled again by the ring ops. */
+		emit_arb_clear(bb);
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(m->gt, bb, 0, &updates[i], pt_update);
+	}
+
+	if (!eng)
+		mutex_lock(&m->job_mutex);
+
+	job = xe_bb_create_migration_job(eng ?: m->eng, bb,
+					 xe_migrate_batch_base(m, usm),
+					 update_idx);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_bb;
+	}
+
+	/* Wait on BO move */
+	if (bo) {
+		err = job_add_deps(job, bo->ttm.base.resv,
+				   DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_job;
+	}
+
+	/*
+	 * Munmap style VM unbind, need to wait for all jobs to be complete /
+	 * trigger preempts before moving forward
+	 */
+	if (first_munmap_rebind) {
+		err = job_add_deps(job, &vm->resv,
+				   DMA_RESV_USAGE_BOOKKEEP);
+		if (err)
+			goto err_job;
+	}
+
+	for (i = 0; !err && i < num_syncs; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+
+	if (err)
+		goto err_job;
+
+	if (ops->pre_commit) {
+		err = ops->pre_commit(pt_update);
+		if (err)
+			goto err_job;
+	}
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (!eng)
+		mutex_unlock(&m->job_mutex);
+
+	xe_bb_free(bb, fence);
+	drm_suballoc_free(sa_bo, fence);
+
+	return fence;
+
+err_job:
+	xe_sched_job_put(job);
+err_bb:
+	if (!eng)
+		mutex_unlock(&m->job_mutex);
+	xe_bb_free(bb, NULL);
+err:
+	drm_suballoc_free(sa_bo, NULL);
+	return ERR_PTR(err);
+}
+
+void xe_migrate_wait(struct xe_migrate *m)
+{
+	if (m->fence)
+		dma_fence_wait(m->fence, false);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_migrate.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
new file mode 100644
index 0000000000000..267057a3847fc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __XE_MIGRATE__
+#define __XE_MIGRATE__
+
+#include <drm/drm_mm.h>
+
+struct dma_fence;
+struct iosys_map;
+struct ttm_resource;
+
+struct xe_bo;
+struct xe_gt;
+struct xe_engine;
+struct xe_migrate;
+struct xe_migrate_pt_update;
+struct xe_sync_entry;
+struct xe_pt;
+struct xe_vm;
+struct xe_vm_pgtable_update;
+struct xe_vma;
+
+struct xe_migrate_pt_update_ops {
+	/**
+	 * populate() - Populate a command buffer or page-table with ptes.
+	 * @pt_update: Embeddable callback argument.
+	 * @gt: The gt for the current operation.
+	 * @map: struct iosys_map into the memory to be populated.
+	 * @pos: If @map is NULL, map into the memory to be populated.
+	 * @ofs: qword offset into @map, unused if @map is NULL.
+	 * @num_qwords: Number of qwords to write.
+	 * @update: Information about the PTEs to be inserted.
+	 *
+	 * This interface is intended to be used as a callback into the
+	 * page-table system to populate command buffers or shared
+	 * page-tables with PTEs.
+	 */
+	void (*populate)(struct xe_migrate_pt_update *pt_update,
+			 struct xe_gt *gt, struct iosys_map *map,
+			 void *pos, u32 ofs, u32 num_qwords,
+			 const struct xe_vm_pgtable_update *update);
+
+	/**
+	 * pre_commit(): Callback to be called just before arming the
+	 * sched_job.
+	 * @pt_update: Pointer to embeddable callback argument.
+	 *
+	 * Return: 0 on success, negative error code on error.
+	 */
+	int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
+};
+
+struct xe_migrate_pt_update {
+	const struct xe_migrate_pt_update_ops *ops;
+	struct xe_vma *vma;
+};
+
+struct xe_migrate *xe_migrate_init(struct xe_gt *gt);
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst);
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst,
+				   u32 value);
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_engine *eng,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update);
+
+void xe_migrate_wait(struct xe_migrate *m);
+
+struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
new file mode 100644
index 0000000000000..6a68fdff08dce
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_DOC_H_
+#define _XE_MIGRATE_DOC_H_
+
+/**
+ * DOC: Migrate Layer
+ *
+ * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * clear memory, or program tables (binds). This layer exists in every GT, has
+ * a migrate engine, and uses a special VM for all generated jobs.
+ *
+ * Special VM details
+ * ==================
+ *
+ * The special VM is configured with a page structure where we can dynamically
+ * map BOs which need to be copied and cleared, dynamically map other VM's page
+ * table BOs for updates, and identity map the entire device's VRAM with 1 GB
+ * pages.
+ *
+ * Currently the page structure consists of 48 phyiscal pages with 16 being
+ * reserved for BO mapping during copies and clear, 1 reserved for kernel binds,
+ * several pages are needed to setup the identity mappings (exact number based
+ * on how many bits of address space the device has), and the rest are reserved
+ * user bind operations.
+ *
+ * TODO: Diagram of layout
+ *
+ * Bind jobs
+ * =========
+ *
+ * A bind job consist of two batches and runs either on the migrate engine
+ * (kernel binds) or the bind engine passed in (user binds). In both cases the
+ * VM of the engine is the migrate VM.
+ *
+ * The first batch is used to update the migration VM page structure to point to
+ * the bind VM page table BOs which need to be updated. A physical page is
+ * required for this. If it is a user bind, the page is allocated from pool of
+ * pages reserved user bind operations with drm_suballoc managing this pool. If
+ * it is a kernel bind, the page reserved for kernel binds is used.
+ *
+ * The first batch is only required for devices without VRAM as when the device
+ * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can
+ * be used.
+ *
+ * The second batch is used to program page table updated in the bind VM. Why
+ * not just one batch? Well the TLBs need to be invalidated between these two
+ * batches and that only can be done from the ring.
+ *
+ * When the bind job complete, the page allocated is returned the pool of pages
+ * reserved for user bind operations if a user bind. No need do this for kernel
+ * binds as the reserved kernel page is serially used by each job.
+ *
+ * Copy / clear jobs
+ * =================
+ *
+ * A copy or clear job consist of two batches and runs on the migrate engine.
+ *
+ * Like binds, the first batch is used update the migration VM page structure.
+ * In copy jobs, we need to map the source and destination of the BO into page
+ * the structure. In clear jobs, we just need to add 1 mapping of BO into the
+ * page structure. We use the 16 reserved pages in migration VM for mappings,
+ * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB.
+ *
+ * The second batch is used do either do the copy or clear. Again similar to
+ * binds, two batches are required as the TLBs need to be invalidated from the
+ * ring between the batches.
+ *
+ * More than one job will be generated if the BO is larger than maximum copy /
+ * clear size.
+ *
+ * Future work
+ * ===========
+ *
+ * Update copy and clear code to use identity mapped VRAM.
+ *
+ * Can we rework the use of the pages async binds to use all the entries in each
+ * page?
+ *
+ * Using large pages for sysmem mappings.
+ *
+ * Is it possible to identity map the sysmem? We should explore this.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
new file mode 100644
index 0000000000000..42e2405f2f488
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_mmio.h"
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+
+#include "i915_reg.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+
+#define XEHP_MTCFG_ADDR		_MMIO(0x101800)
+#define TILE_COUNT		REG_GENMASK(15, 8)
+#define GEN12_LMEM_BAR		2
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+	unsigned int mask_size = xe->info.dma_mask_size;
+	int err;
+
+	/*
+	 * We don't have a max segment size, so set it to the max so sg's
+	 * debugging layer doesn't complain
+	 */
+	dma_set_max_seg_size(xe->drm.dev, UINT_MAX);
+
+	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	return 0;
+
+mask_err:
+	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+	return err;
+}
+
+#ifdef CONFIG_64BIT
+static int
+_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int bar_size = pci_rebar_bytes_to_size(size);
+	int ret;
+
+	if (pci_resource_len(pdev, resno))
+		pci_release_resource(pdev, resno);
+
+	ret = pci_resize_resource(pdev, resno, bar_size);
+	if (ret) {
+		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n",
+			 resno, 1 << bar_size, ERR_PTR(ret));
+		return -1;
+	}
+
+	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
+	return 1;
+}
+
+static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_bus *root = pdev->bus;
+	struct resource *root_res;
+	resource_size_t rebar_size;
+	resource_size_t current_size;
+	u32 pci_cmd;
+	int i;
+	int ret;
+	u64 force_lmem_bar_size = xe_force_lmem_bar_size;
+
+	current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR));
+
+	if (force_lmem_bar_size) {
+		u32 bar_sizes;
+
+		rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M;
+		bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
+
+		if (rebar_size == current_size)
+			return 0;
+
+		if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) ||
+		    rebar_size >= roundup_pow_of_two(lmem_size)) {
+			rebar_size = lmem_size;
+			drm_info(&xe->drm,
+				 "Given bar size is not within supported size, setting it to default: %llu\n",
+				 (u64)lmem_size >> 20);
+		}
+	} else {
+		rebar_size = current_size;
+
+		if (rebar_size != roundup_pow_of_two(lmem_size))
+			rebar_size = lmem_size;
+		else
+			return 0;
+	}
+
+	while (root->parent)
+		root = root->parent;
+
+	pci_bus_for_each_resource(root, root_res, i) {
+		if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+		    root_res->start > 0x100000000ull)
+			break;
+	}
+
+	if (!root_res) {
+		drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n");
+		return -1;
+	}
+
+	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+	ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size);
+
+	pci_assign_unassigned_bus_resources(pdev->bus);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+	return ret;
+}
+#else
+static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; }
+#endif
+
+static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
+{
+	if (!pci_resource_flags(pdev, bar))
+		return false;
+
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
+		return false;
+
+	if (!pci_resource_len(pdev, bar))
+		return false;
+
+	return true;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct xe_gt *gt;
+	u8 id;
+	u64 lmem_size;
+	u64 original_size;
+	u64 current_size;
+	u64 flat_ccs_base;
+	int resize_result;
+
+	if (!IS_DGFX(xe)) {
+		xe->mem.vram.mapping = 0;
+		xe->mem.vram.size = 0;
+		xe->mem.vram.io_start = 0;
+
+		for_each_gt(gt, xe, id) {
+			gt->mem.vram.mapping = 0;
+			gt->mem.vram.size = 0;
+			gt->mem.vram.io_start = 0;
+		}
+		return 0;
+	}
+
+	if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
+		drm_err(&xe->drm, "pci resource is not valid\n");
+		return -ENXIO;
+	}
+
+	gt = xe_device_get_gt(xe, 0);
+	lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg);
+
+	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+
+	if (xe->info.has_flat_ccs)  {
+		int err;
+		u32 reg;
+
+		err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+		if (err)
+			return err;
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
+		lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+
+		drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n",
+			 lmem_size, flat_ccs_base);
+
+		err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+		if (err)
+			return err;
+	} else {
+		flat_ccs_base = lmem_size;
+	}
+
+	resize_result = xe_resize_lmem_bar(xe, lmem_size);
+	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
+
+	xe->mem.vram.size = min(current_size, lmem_size);
+
+	if (!xe->mem.vram.size)
+		return -EIO;
+
+	if (resize_result > 0)
+		drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n",
+			 (u64)original_size >> 20,
+			 (u64)current_size >> 20);
+	else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size)
+		drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
+			 (u64)xe->mem.vram.size >> 20);
+	if (xe->mem.vram.size < lmem_size)
+		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
+			 lmem_size, xe->mem.vram.size);
+
+#ifdef CONFIG_64BIT
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
+#endif
+
+	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base);
+
+	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
+
+	/* FIXME: Assuming equally partitioned VRAM, incorrect */
+	if (xe->info.tile_count > 1) {
+		u8 adj_tile_count = xe->info.tile_count;
+		resource_size_t size, io_start;
+
+		for_each_gt(gt, xe, id)
+			if (xe_gt_is_media_type(gt))
+				--adj_tile_count;
+
+		XE_BUG_ON(!adj_tile_count);
+
+		size = xe->mem.vram.size / adj_tile_count;
+		io_start = xe->mem.vram.io_start;
+
+		for_each_gt(gt, xe, id) {
+			if (id && !xe_gt_is_media_type(gt))
+				io_start += size;
+
+			gt->mem.vram.size = size;
+			gt->mem.vram.io_start = io_start;
+			gt->mem.vram.mapping = xe->mem.vram.mapping +
+				(io_start - xe->mem.vram.io_start);
+
+			drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
+				 id, gt->info.vram_id, &gt->mem.vram.io_start,
+				 &gt->mem.vram.size);
+		}
+	} else {
+		gt->mem.vram.size = xe->mem.vram.size;
+		gt->mem.vram.io_start = xe->mem.vram.io_start;
+		gt->mem.vram.mapping = xe->mem.vram.mapping;
+
+		drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);
+	}
+	return 0;
+}
+
+static void xe_mmio_probe_tiles(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	u32 mtcfg;
+	u8 adj_tile_count;
+	u8 id;
+
+	if (xe->info.tile_count == 1)
+		return;
+
+	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg);
+	adj_tile_count = xe->info.tile_count =
+		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+	if (xe->info.media_ver >= 13)
+		xe->info.tile_count *= 2;
+
+	drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
+		 xe->info.tile_count, adj_tile_count);
+
+	if (xe->info.tile_count > 1) {
+		const int mmio_bar = 0;
+		size_t size;
+		void *regs;
+
+		if (adj_tile_count > 1) {
+			pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+			xe->mmio.size = SZ_16M * adj_tile_count;
+			xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev),
+						  mmio_bar, xe->mmio.size);
+		}
+
+		size = xe->mmio.size / adj_tile_count;
+		regs = xe->mmio.regs;
+
+		for_each_gt(gt, xe, id) {
+			if (id && !xe_gt_is_media_type(gt))
+				regs += size;
+			gt->mmio.size = size;
+			gt->mmio.regs = regs;
+		}
+	}
+}
+
+static void mmio_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+	if (xe->mem.vram.mapping)
+		iounmap(xe->mem.vram.mapping);
+}
+
+int xe_mmio_init(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	const int mmio_bar = 0;
+	int err;
+
+	/*
+	 * Map the entire BAR, which includes registers (0-4MB), reserved space
+	 * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs,
+	 * GGTTs) will derive the pointers they need from the mapping in the
+	 * device structure.
+	 */
+	xe->mmio.size = SZ_16M;
+	xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar,
+				  xe->mmio.size);
+	if (xe->mmio.regs == NULL) {
+		drm_err(&xe->drm, "failed to map registers\n");
+		return -EIO;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+	if (err)
+		return err;
+
+	/* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */
+	gt->mmio.size = xe->mmio.size;
+	gt->mmio.regs = xe->mmio.regs;
+
+	/*
+	 * The boot firmware initializes local memory and assesses its health.
+	 * If memory training fails, the punit will have been instructed to
+	 * keep the GT powered down; we won't be able to communicate with it
+	 * and we should not continue with driver initialization.
+	 */
+	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) {
+		drm_err(&xe->drm, "LMEM not initialized by firmware\n");
+		return -ENODEV;
+	}
+
+	err = xe_set_dma_info(xe);
+	if (err)
+		return err;
+
+	xe_mmio_probe_tiles(xe);
+
+	return 0;
+}
+
+#define VALID_MMIO_FLAGS (\
+	DRM_XE_MMIO_BITS_MASK |\
+	DRM_XE_MMIO_READ |\
+	DRM_XE_MMIO_WRITE)
+
+static const i915_reg_t mmio_read_whitelist[] = {
+	RING_TIMESTAMP(RENDER_RING_BASE),
+};
+
+int xe_mmio_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_mmio *args = data;
+	unsigned int bits_flag, bytes;
+	bool allowed;
+	int ret = 0;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value))
+		return -EINVAL;
+
+	allowed = capable(CAP_SYS_ADMIN);
+	if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) {
+		unsigned int i;
+
+		for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
+			if (mmio_read_whitelist[i].reg == args->addr) {
+				allowed = true;
+				break;
+			}
+		}
+	}
+
+	if (XE_IOCTL_ERR(xe, !allowed))
+		return -EPERM;
+
+	bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
+	bytes = 1 << bits_flag;
+	if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size))
+		return -EINVAL;
+
+	xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+
+	if (args->flags & DRM_XE_MMIO_WRITE) {
+		switch (bits_flag) {
+		case DRM_XE_MMIO_8BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_16BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_32BIT:
+			if (XE_IOCTL_ERR(xe, args->value > U32_MAX))
+				return -EINVAL;
+			xe_mmio_write32(to_gt(xe), args->addr, args->value);
+			break;
+		case DRM_XE_MMIO_64BIT:
+			xe_mmio_write64(to_gt(xe), args->addr, args->value);
+			break;
+		default:
+			drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
+			ret = -EINVAL;
+			goto exit;
+		}
+	}
+
+	if (args->flags & DRM_XE_MMIO_READ) {
+		switch (bits_flag) {
+		case DRM_XE_MMIO_8BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_16BIT:
+			return -EINVAL; /* TODO */
+		case DRM_XE_MMIO_32BIT:
+			args->value = xe_mmio_read32(to_gt(xe), args->addr);
+			break;
+		case DRM_XE_MMIO_64BIT:
+			args->value = xe_mmio_read64(to_gt(xe), args->addr);
+			break;
+		default:
+			drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
+			ret = -EINVAL;
+		}
+	}
+
+exit:
+	xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
new file mode 100644
index 0000000000000..09d24467096fb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MMIO_H_
+#define _XE_MMIO_H_
+
+#include <linux/delay.h>
+
+#include "xe_gt_types.h"
+
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use 'wait_for' and unblock initial development. A follow
+ * should replace 'wait_for' with a sane version and drop including this header.
+ */
+#include "i915_utils.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+
+int xe_mmio_init(struct xe_device *xe);
+
+static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	return readb(gt->mmio.regs + reg);
+}
+
+static inline void xe_mmio_write32(struct xe_gt *gt,
+				   u32 reg, u32 val)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	writel(val, gt->mmio.regs + reg);
+}
+
+static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	return readl(gt->mmio.regs + reg);
+}
+
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask,
+				 u32 val)
+{
+	u32 old, reg_val;
+
+	old = xe_mmio_read32(gt, reg);
+	reg_val = (old & mask) | val;
+	xe_mmio_write32(gt, reg, reg_val);
+
+	return old;
+}
+
+static inline void xe_mmio_write64(struct xe_gt *gt,
+				   u32 reg, u64 val)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	writeq(val, gt->mmio.regs + reg);
+}
+
+static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg)
+{
+	if (reg < gt->mmio.adj_limit)
+		reg += gt->mmio.adj_offset;
+
+	return readq(gt->mmio.regs + reg);
+}
+
+static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
+					     u32 reg, u32 val,
+					     u32 mask, u32 eval)
+{
+	u32 reg_val;
+
+	xe_mmio_write32(gt, reg, val);
+	reg_val = xe_mmio_read32(gt, reg);
+
+	return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+static inline int xe_mmio_wait32(struct xe_gt *gt,
+				 u32 reg, u32 val,
+				 u32 mask, u32 timeout_ms)
+{
+	return wait_for((xe_mmio_read32(gt, reg) & mask) == val,
+			timeout_ms);
+}
+
+int xe_mmio_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file);
+
+static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg)
+{
+	return range && reg >= range->start && reg <= range->end;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
new file mode 100644
index 0000000000000..86b966fffbe5f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_platform_types.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_step_types.h"
+
+#include "gt/intel_gt_regs.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define mocs_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void mocs_dbg(const struct drm_device *dev,
+			    const char *format, ...)
+{ /* noop */ }
+#endif
+
+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum xe_mocs_info_index {
+	/*
+	 * Not cached anywhere, coherency between CPU and GPU accesses is
+	 * guaranteed.
+	 */
+	XE_MOCS_UNCACHED,
+	/*
+	 * Cacheability and coherency controlled by the kernel automatically
+	 * based on the xxxx  IOCTL setting and the current
+	 * usage of the surface (used for display scanout or not).
+	 */
+	XE_MOCS_PTE,
+	/*
+	 * Cached in all GPU caches available on the platform.
+	 * Coherency between CPU and GPU accesses to the surface is not
+	 * guaranteed without extra synchronization.
+	 */
+	XE_MOCS_CACHED,
+};
+
+enum {
+	HAS_GLOBAL_MOCS = BIT(0),
+	HAS_RENDER_L3CC = BIT(1),
+};
+
+struct xe_mocs_entry {
+	u32 control_value;
+	u16 l3cc_value;
+	u16 used;
+};
+
+struct xe_mocs_info {
+	unsigned int size;
+	unsigned int n_entries;
+	const struct xe_mocs_entry *table;
+	u8 uc_index;
+	u8 wb_index;
+	u8 unused_entries_index;
+};
+
+/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
+#define LE_LRUM(value)		((value) << 4)
+#define LE_AOM(value)		((value) << 6)
+#define LE_RSC(value)		((value) << 7)
+#define LE_SCC(value)		((value) << 8)
+#define LE_PFM(value)		((value) << 11)
+#define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
+
+/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
+#define L3_ESC(value)		((value) << 0)
+#define L3_SCC(value)		((value) << 1)
+#define _L3_CACHEABILITY(value)	((value) << 4)
+#define L3_GLBGO(value)		((value) << 6)
+#define L3_LKUP(value)		((value) << 7)
+
+/* Helper defines */
+#define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES    16
+
+/* (e)LLC caching options */
+/*
+ * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
+ * the same as LE_UC
+ */
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
+
+/* Target cache */
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+	[__idx] = { \
+		.control_value = __control_value, \
+		.l3cc_value = __l3cc_value, \
+		.used = 1, \
+	}
+
+/*
+ * MOCS tables
+ *
+ * These are the MOCS tables that are programmed across all the rings.
+ * The control value is programmed to all the rings that support the
+ * MOCS registers. While the l3cc_values are only programmed to the
+ * LNCFCMOCS0 - LNCFCMOCS32 registers.
+ *
+ * These tables are intended to be kept reasonably consistent across
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
+ *
+ * Entries not part of the following tables are undefined as far as
+ * userspace is concerned and shouldn't be relied upon.  For Gen < 12
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
+ *
+ * The last few entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE1: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that, for specific hardware platform, existing
+ *       entries will remain constant and the table will only be updated by
+ *       adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ *       indices have been set to L3 WB. These reserved entries should never
+ *       be used, they may be changed to low performant variants with better
+ *       coherency in the future if more entries are needed.
+ *       For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
+ */
+
+#define GEN11_MOCS_ENTRIES \
+	/* Entries 0 and 1 are defined per-platform */ \
+	/* Base - L3 + LLC */ \
+	MOCS_ENTRY(2, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_3_WB), \
+	/* Base - Uncached */ \
+	MOCS_ENTRY(3, \
+		LE_1_UC | LE_TC_1_LLC, \
+		L3_1_UC), \
+	/* Base - L3 */ \
+	MOCS_ENTRY(4, \
+		LE_1_UC | LE_TC_1_LLC, \
+		L3_3_WB), \
+	/* Base - LLC */ \
+	MOCS_ENTRY(5, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_1_UC), \
+	/* Age 0 - LLC */ \
+	MOCS_ENTRY(6, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		L3_1_UC), \
+	/* Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(7, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		L3_3_WB), \
+	/* Age: Don't Chg. - LLC */ \
+	MOCS_ENTRY(8, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		L3_1_UC), \
+	/* Age: Don't Chg. - L3 + LLC */ \
+	MOCS_ENTRY(9, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		L3_3_WB), \
+	/* No AOM - LLC */ \
+	MOCS_ENTRY(10, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		L3_1_UC), \
+	/* No AOM - L3 + LLC */ \
+	MOCS_ENTRY(11, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		L3_3_WB), \
+	/* No AOM; Age 0 - LLC */ \
+	MOCS_ENTRY(12, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		L3_1_UC), \
+	/* No AOM; Age 0 - L3 + LLC */ \
+	MOCS_ENTRY(13, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		L3_3_WB), \
+	/* No AOM; Age:DC - LLC */ \
+	MOCS_ENTRY(14, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		L3_1_UC), \
+	/* No AOM; Age:DC - L3 + LLC */ \
+	MOCS_ENTRY(15, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		L3_3_WB), \
+	/* Self-Snoop - L3 + LLC */ \
+	MOCS_ENTRY(18, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(12.5%) */ \
+	MOCS_ENTRY(19, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(25%) */ \
+	MOCS_ENTRY(20, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(50%) */ \
+	MOCS_ENTRY(21, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(75%) */ \
+	MOCS_ENTRY(22, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+		L3_3_WB), \
+	/* Skip Caching - L3 + LLC(87.5%) */ \
+	MOCS_ENTRY(23, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+		L3_3_WB), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(62, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_1_UC), \
+	/* HW Reserved - SW program but never use */ \
+	MOCS_ENTRY(63, \
+		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		L3_1_UC)
+
+static const struct xe_mocs_entry tgl_mocs_desc[] = {
+	/*
+	 * NOTE:
+	 * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+	 * These reserved entries should never be used, they may be changed
+	 * to low performant variants with better coherency in the future if
+	 * more entries are needed. We are programming index XE_MOCS_PTE(1)
+	 * only, __init_mocs_table() take care to program unused index with
+	 * this entry.
+	 */
+	MOCS_ENTRY(XE_MOCS_PTE,
+		   LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
+		   L3_1_UC),
+	GEN11_MOCS_ENTRIES,
+
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+};
+
+static const struct xe_mocs_entry dg1_mocs_desc[] = {
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+	/* WB - L3 */
+	MOCS_ENTRY(5, 0, L3_3_WB),
+	/* WB - L3 50% */
+	MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+	/* WB - L3 25% */
+	MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+	/* WB - L3 12.5% */
+	MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+	/* HDC:L1 + L3 */
+	MOCS_ENTRY(48, 0, L3_3_WB),
+	/* HDC:L1 */
+	MOCS_ENTRY(49, 0, L3_1_UC),
+
+	/* HW Reserved */
+	MOCS_ENTRY(60, 0, L3_1_UC),
+	MOCS_ENTRY(61, 0, L3_1_UC),
+	MOCS_ENTRY(62, 0, L3_1_UC),
+	MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct xe_mocs_entry gen12_mocs_desc[] = {
+	GEN11_MOCS_ENTRIES,
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc[] = {
+	/* UC - Coherent; GO:L3 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
+	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry pvc_mocs_desc[] = {
+	/* Error */
+	MOCS_ENTRY(0, 0, L3_3_WB),
+
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+
+	/* WB */
+	MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
+static unsigned int get_mocs_settings(struct xe_device *xe,
+				      struct xe_mocs_info *info)
+{
+	unsigned int flags;
+
+	memset(info, 0, sizeof(struct xe_mocs_info));
+
+	info->unused_entries_index = XE_MOCS_PTE;
+	switch (xe->info.platform) {
+	case XE_PVC:
+		info->size = ARRAY_SIZE(pvc_mocs_desc);
+		info->table = pvc_mocs_desc;
+		info->n_entries = PVC_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->wb_index = 2;
+		info->unused_entries_index = 2;
+		break;
+	case XE_METEORLAKE:
+		info->size = ARRAY_SIZE(dg2_mocs_desc);
+		info->table = dg2_mocs_desc;
+		info->n_entries = MTL_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->unused_entries_index = 3;
+		break;
+	case XE_DG2:
+		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
+		    xe->info.step.graphics >= STEP_A0 &&
+		    xe->info.step.graphics <= STEP_B0) {
+			info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
+			info->table = dg2_mocs_desc_g10_ax;
+		} else {
+			info->size = ARRAY_SIZE(dg2_mocs_desc);
+			info->table = dg2_mocs_desc;
+		}
+		info->uc_index = 1;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->unused_entries_index = 3;
+		break;
+	case XE_DG1:
+		info->size = ARRAY_SIZE(dg1_mocs_desc);
+		info->table = dg1_mocs_desc;
+		info->uc_index = 1;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->unused_entries_index = 5;
+		break;
+	case XE_TIGERLAKE:
+		info->size  = ARRAY_SIZE(tgl_mocs_desc);
+		info->table = tgl_mocs_desc;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		break;
+	case XE_ALDERLAKE_S:
+	case XE_ALDERLAKE_P:
+		info->size  = ARRAY_SIZE(gen12_mocs_desc);
+		info->table = gen12_mocs_desc;
+		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->unused_entries_index = 2;
+		break;
+	default:
+		drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n");
+		return 0;
+	}
+
+	if (XE_WARN_ON(info->size > info->n_entries))
+		return 0;
+
+	flags = HAS_RENDER_L3CC;
+	if (!IS_DGFX(xe))
+		flags |= HAS_GLOBAL_MOCS;
+
+	return flags;
+}
+
+/*
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise XE_MOCS_PTE's value is returned in this case.
+ */
+static u32 get_entry_control(const struct xe_mocs_info *info,
+			     unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].control_value;
+	return info->table[info->unused_entries_index].control_value;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+			      const struct xe_mocs_info *info,
+			      u32 addr)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+
+	mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs);
+		xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs);
+	}
+}
+
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct xe_mocs_info *info,
+			  unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].l3cc_value;
+	return info->table[info->unused_entries_index].l3cc_value;
+}
+
+static u32 l3cc_combine(u16 low, u16 high)
+{
+	return low | (u32)high << 16;
+}
+
+static void init_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+
+	mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc);
+		xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc);
+	}
+}
+
+void xe_mocs_init_engine(const struct xe_engine *engine)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+
+	flags = get_mocs_settings(engine->gt->xe, &table);
+	if (!flags)
+		return;
+
+	if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER)
+		init_l3cc_table(engine->gt, &table);
+}
+
+void xe_mocs_init(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+
+	/*
+	 * LLC and eDRAM control values are not applicable to dgfx
+	 */
+	flags = get_mocs_settings(gt->xe, &table);
+	mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
+	gt->mocs.uc_index = table.uc_index;
+	gt->mocs.wb_index = table.wb_index;
+
+	if (flags & HAS_GLOBAL_MOCS)
+		__init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg);
+
+	/*
+	 * Initialize the L3CC table as part of mocs initalization to make
+	 * sure the LNCFCMOCSx registers are programmed for the subsequent
+	 * memory transactions including guc transactions
+	 */
+	if (flags & HAS_RENDER_L3CC)
+		init_l3cc_table(gt, &table);
+}
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
new file mode 100644
index 0000000000000..aba1abe216ab2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_H_
+#define _XE_MOCS_H_
+
+#include <linux/types.h>
+
+struct xe_engine;
+struct xe_gt;
+
+void xe_mocs_init_engine(const struct xe_engine *engine);
+void xe_mocs_init(struct xe_gt *gt);
+
+/**
+ * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by
+ * most blitter commands.
+ * @mocs_index: index into the mocs tables
+ *
+ * Return: The corresponding mocs value to be programmed.
+ */
+static inline u32 xe_mocs_index_to_value(u32 mocs_index)
+{
+	return mocs_index << 1;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
new file mode 100644
index 0000000000000..cc862553a252f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "xe_drv.h"
+#include "xe_hw_fence.h"
+#include "xe_module.h"
+#include "xe_pci.h"
+#include "xe_sched_job.h"
+
+bool enable_guc = true;
+module_param_named_unsafe(enable_guc, enable_guc, bool, 0444);
+MODULE_PARM_DESC(enable_guc, "Enable GuC submission");
+
+u32 xe_force_lmem_bar_size;
+module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600);
+MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)");
+
+int xe_guc_log_level = 5;
+module_param_named(guc_log_level, xe_guc_log_level, int, 0600);
+MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
+
+char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE;
+module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400);
+MODULE_PARM_DESC(force_probe,
+		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+
+struct init_funcs {
+	int (*init)(void);
+	void (*exit)(void);
+};
+#define MAKE_INIT_EXIT_FUNCS(name)		\
+	{ .init = xe_##name##_module_init,	\
+	  .exit = xe_##name##_module_exit, }
+static const struct init_funcs init_funcs[] = {
+	MAKE_INIT_EXIT_FUNCS(hw_fence),
+	MAKE_INIT_EXIT_FUNCS(sched_job),
+};
+
+static int __init xe_init(void)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
+		err = init_funcs[i].init();
+		if (err) {
+			while (i--)
+				init_funcs[i].exit();
+			return err;
+		}
+	}
+
+	return xe_register_pci_driver();
+}
+
+static void __exit xe_exit(void)
+{
+	int i;
+
+	xe_unregister_pci_driver();
+
+	for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
+		init_funcs[i].exit();
+}
+
+module_init(xe_init);
+module_exit(xe_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
new file mode 100644
index 0000000000000..2c6ee46f55953
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/init.h>
+
+/* Module modprobe variables */
+extern bool enable_guc;
+extern bool enable_display;
+extern u32 xe_force_lmem_bar_size;
+extern int xe_guc_log_level;
+extern char *xe_param_force_probe;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
new file mode 100644
index 0000000000000..55d8a597a0689
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_pci.h"
+
+#include <linux/device/driver.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_color_mgmt.h>
+#include <drm/xe_pciids.h>
+
+#include "xe_drv.h"
+#include "xe_device.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+#include "xe_pm.h"
+#include "xe_step.h"
+
+#include "i915_reg.h"
+
+#define DEV_INFO_FOR_EACH_FLAG(func) \
+	func(require_force_probe); \
+	func(is_dgfx); \
+	/* Keep has_* in alphabetical order */ \
+
+struct xe_subplatform_desc {
+	enum xe_subplatform subplatform;
+	const char *name;
+	const u16 *pciidlist;
+};
+
+struct xe_gt_desc {
+	enum xe_gt_type type;
+	u8 vram_id;
+	u64 engine_mask;
+	u32 mmio_adj_limit;
+	u32 mmio_adj_offset;
+};
+
+struct xe_device_desc {
+	u8 graphics_ver;
+	u8 graphics_rel;
+	u8 media_ver;
+	u8 media_rel;
+
+	u64 platform_engine_mask; /* Engines supported by the HW */
+
+	enum xe_platform platform;
+	const char *platform_name;
+	const struct xe_subplatform_desc *subplatforms;
+	const struct xe_gt_desc *extra_gts;
+
+	u8 dma_mask_size; /* available DMA address bits */
+
+	u8 gt; /* GT number, 0 if undefined */
+
+#define DEFINE_FLAG(name) u8 name:1
+	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
+#undef DEFINE_FLAG
+
+	u8 vram_flags;
+	u8 max_tiles;
+	u8 vm_max_level;
+
+	bool supports_usm;
+	bool has_flat_ccs;
+	bool has_4tile;
+};
+
+#define PLATFORM(x)		\
+	.platform = (x),	\
+	.platform_name = #x
+
+#define NOP(x)	x
+
+/* Keep in gen based order, and chronological order within a gen */
+#define GEN12_FEATURES \
+	.require_force_probe = true, \
+	.graphics_ver = 12, \
+	.media_ver = 12, \
+	.dma_mask_size = 39, \
+	.max_tiles = 1, \
+	.vm_max_level = 3, \
+	.vram_flags = 0
+
+static const struct xe_device_desc tgl_desc = {
+	GEN12_FEATURES,
+	PLATFORM(XE_TIGERLAKE),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+};
+
+static const struct xe_device_desc adl_s_desc = {
+	GEN12_FEATURES,
+	PLATFORM(XE_ALDERLAKE_S),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+};
+
+static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_p_desc = {
+	GEN12_FEATURES,
+	PLATFORM(XE_ALDERLAKE_P),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids },
+		{},
+	},
+};
+
+#define DGFX_FEATURES \
+	.is_dgfx = 1
+
+static const struct xe_device_desc dg1_desc = {
+	GEN12_FEATURES,
+	DGFX_FEATURES,
+	.graphics_rel = 10,
+	PLATFORM(XE_DG1),
+	.platform_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+		BIT(XE_HW_ENGINE_VCS2),
+};
+
+#define XE_HP_FEATURES \
+	.require_force_probe = true, \
+	.graphics_ver = 12, \
+	.graphics_rel = 50, \
+	.has_flat_ccs = true, \
+	.dma_mask_size = 46, \
+	.max_tiles = 1, \
+	.vm_max_level = 3
+
+#define XE_HPM_FEATURES \
+	.media_ver = 12, \
+	.media_rel = 50
+
+static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+
+#define DG2_FEATURES \
+	DGFX_FEATURES, \
+	.graphics_rel = 55, \
+	.media_rel = 55, \
+	PLATFORM(XE_DG2), \
+	.subplatforms = (const struct xe_subplatform_desc[]) { \
+		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
+		{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
+		{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
+		{ } \
+	}, \
+	.platform_engine_mask = \
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \
+	.require_force_probe = true, \
+	.vram_flags = XE_VRAM_FLAGS_NEED64K, \
+	.has_4tile = 1
+
+static const struct xe_device_desc ats_m_desc = {
+	XE_HP_FEATURES,
+	XE_HPM_FEATURES,
+
+	DG2_FEATURES,
+};
+
+static const struct xe_device_desc dg2_desc = {
+	XE_HP_FEATURES,
+	XE_HPM_FEATURES,
+
+	DG2_FEATURES,
+};
+
+#define PVC_ENGINES \
+	BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \
+	BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \
+	BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \
+	BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \
+	BIT(XE_HW_ENGINE_BCS8) | \
+	BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \
+	BIT(XE_HW_ENGINE_VCS2) | \
+	BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
+	BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3)
+
+static const struct xe_gt_desc pvc_gts[] = {
+	{
+		.type = XE_GT_TYPE_REMOTE,
+		.vram_id = 1,
+		.engine_mask = PVC_ENGINES,
+		.mmio_adj_limit = 0,
+		.mmio_adj_offset = 0,
+	},
+};
+
+static const __maybe_unused struct xe_device_desc pvc_desc = {
+	XE_HP_FEATURES,
+	XE_HPM_FEATURES,
+	DGFX_FEATURES,
+	PLATFORM(XE_PVC),
+	.extra_gts = pvc_gts,
+	.graphics_rel = 60,
+	.has_flat_ccs = 0,
+	.media_rel = 60,
+	.platform_engine_mask = PVC_ENGINES,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+	.dma_mask_size = 52,
+	.max_tiles = 2,
+	.vm_max_level = 4,
+	.supports_usm = true,
+};
+
+#define MTL_MEDIA_ENGINES \
+	BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
+	BIT(XE_HW_ENGINE_VECS0)	/* TODO: GSC0 */
+
+static const struct xe_gt_desc xelpmp_gts[] = {
+	{
+		.type = XE_GT_TYPE_MEDIA,
+		.vram_id = 0,
+		.engine_mask = MTL_MEDIA_ENGINES,
+		.mmio_adj_limit = 0x40000,
+		.mmio_adj_offset = 0x380000,
+	},
+};
+
+#define MTL_MAIN_ENGINES \
+	BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
+	BIT(XE_HW_ENGINE_CCS0)
+
+static const struct xe_device_desc mtl_desc = {
+	/*
+	 * Real graphics IP version will be obtained from hardware GMD_ID
+	 * register.  Value provided here is just for sanity checking.
+	 */
+	.require_force_probe = true,
+	.graphics_ver = 12,
+	.graphics_rel = 70,
+	.dma_mask_size = 46,
+	.max_tiles = 2,
+	.vm_max_level = 3,
+	.media_ver = 13,
+	PLATFORM(XE_METEORLAKE),
+	.extra_gts = xelpmp_gts,
+	.platform_engine_mask = MTL_MAIN_ENGINES,
+};
+
+#undef PLATFORM
+
+#define INTEL_VGA_DEVICE(id, info) {			\
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, id),		\
+	PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16,	\
+	(unsigned long) info }
+
+/*
+ * Make sure any device matches here are from most specific to most
+ * general.  For example, since the Quanta match is based on the subsystem
+ * and subvendor IDs, we need it to come before the more general IVB
+ * PCI ID matches, otherwise we'll use the wrong info struct above.
+ */
+static const struct pci_device_id pciidlist[] = {
+	XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+#undef INTEL_VGA_DEVICE
+
+/* is device_id present in comma separated list of ids */
+static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
+{
+	char *s, *p, *tok;
+	bool ret;
+
+	if (!devices || !*devices)
+		return false;
+
+	/* match everything */
+	if (negative && strcmp(devices, "!*") == 0)
+		return true;
+	if (!negative && strcmp(devices, "*") == 0)
+		return true;
+
+	s = kstrdup(devices, GFP_KERNEL);
+	if (!s)
+		return false;
+
+	for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) {
+		u16 val;
+
+		if (negative && tok[0] == '!')
+			tok++;
+		else if ((negative && tok[0] != '!') ||
+			 (!negative && tok[0] == '!'))
+			continue;
+
+		if (kstrtou16(tok, 16, &val) == 0 && val == device_id) {
+			ret = true;
+			break;
+		}
+	}
+
+	kfree(s);
+
+	return ret;
+}
+
+static bool id_forced(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_param_force_probe, false);
+}
+
+static bool id_blocked(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_param_force_probe, true);
+}
+
+static const struct xe_subplatform_desc *
+subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc)
+{
+	const struct xe_subplatform_desc *sp;
+	const u16 *id;
+
+	for (sp = desc->subplatforms; sp && sp->subplatform; sp++)
+		for (id = sp->pciidlist; *id; id++)
+			if (*id == xe->info.devid)
+				return sp;
+
+	return NULL;
+}
+
+static void xe_pci_remove(struct pci_dev *pdev)
+{
+	struct xe_device *xe;
+
+	xe = pci_get_drvdata(pdev);
+	if (!xe) /* driver load aborted, nothing to cleanup */
+		return;
+
+	xe_device_remove(xe);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct xe_device_desc *desc = (void *)ent->driver_data;
+	const struct xe_subplatform_desc *spd;
+	struct xe_device *xe;
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	if (desc->require_force_probe && !id_forced(pdev->device)) {
+		dev_info(&pdev->dev,
+			 "Your graphics device %04x is not officially supported\n"
+			 "by xe driver in this kernel version. To force Xe probe,\n"
+			 "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
+			 "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
+			 "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
+			 pdev->device, pdev->device, pdev->device,
+			 pdev->device, pdev->device);
+		return -ENODEV;
+	}
+
+	if (id_blocked(pdev->device)) {
+		dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
+			 pdev->vendor, pdev->device);
+		return -ENODEV;
+	}
+
+	xe = xe_device_create(pdev, ent);
+	if (IS_ERR(xe))
+		return PTR_ERR(xe);
+
+	xe->info.graphics_verx100 = desc->graphics_ver * 100 +
+				    desc->graphics_rel;
+	xe->info.media_verx100 = desc->media_ver * 100 +
+				 desc->media_rel;
+	xe->info.is_dgfx = desc->is_dgfx;
+	xe->info.platform = desc->platform;
+	xe->info.dma_mask_size = desc->dma_mask_size;
+	xe->info.vram_flags = desc->vram_flags;
+	xe->info.tile_count = desc->max_tiles;
+	xe->info.vm_max_level = desc->vm_max_level;
+	xe->info.media_ver = desc->media_ver;
+	xe->info.supports_usm = desc->supports_usm;
+	xe->info.has_flat_ccs = desc->has_flat_ccs;
+	xe->info.has_4tile = desc->has_4tile;
+
+	spd = subplatform_get(xe, desc);
+	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
+	xe->info.step = xe_step_get(xe);
+
+	for (id = 0; id < xe->info.tile_count; ++id) {
+		gt = xe->gt + id;
+		gt->info.id = id;
+		gt->xe = xe;
+
+		if (id == 0) {
+			gt->info.type = XE_GT_TYPE_MAIN;
+			gt->info.vram_id = id;
+			gt->info.engine_mask = desc->platform_engine_mask;
+			gt->mmio.adj_limit = 0;
+			gt->mmio.adj_offset = 0;
+		} else {
+			gt->info.type = desc->extra_gts[id - 1].type;
+			gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
+			gt->info.engine_mask =
+				desc->extra_gts[id - 1].engine_mask;
+			gt->mmio.adj_limit =
+				desc->extra_gts[id - 1].mmio_adj_limit;
+			gt->mmio.adj_offset =
+				desc->extra_gts[id - 1].mmio_adj_offset;
+		}
+	}
+
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d",
+		desc->platform_name, spd ? spd->name : "",
+		xe->info.devid, xe->info.revid,
+		xe->info.is_dgfx, xe->info.graphics_verx100,
+		xe->info.media_verx100,
+		xe->info.dma_mask_size, xe->info.tile_count);
+
+	drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
+		xe_step_name(xe->info.step.graphics),
+		xe_step_name(xe->info.step.media),
+		xe_step_name(xe->info.step.display),
+		xe_step_name(xe->info.step.basedie));
+
+	pci_set_drvdata(pdev, xe);
+	err = pci_enable_device(pdev);
+	if (err) {
+		drm_dev_put(&xe->drm);
+		return err;
+	}
+
+	pci_set_master(pdev);
+
+	if (pci_enable_msi(pdev) < 0)
+		drm_dbg(&xe->drm, "can't enable MSI");
+
+	err = xe_device_probe(xe);
+	if (err) {
+		pci_disable_device(pdev);
+		return err;
+	}
+
+	xe_pm_runtime_init(xe);
+
+	return 0;
+}
+
+static void xe_pci_shutdown(struct pci_dev *pdev)
+{
+	xe_device_shutdown(pdev_to_xe_device(pdev));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int xe_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = xe_pm_suspend(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+
+	err = pci_set_power_state(pdev, PCI_D3hot);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int xe_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	pci_set_master(pdev);
+
+	err = xe_pm_resume(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	return 0;
+}
+#endif
+
+static int xe_pci_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = xe_pm_runtime_suspend(xe);
+	if (err)
+		return err;
+
+	pci_save_state(pdev);
+
+	if (xe->d3cold_allowed) {
+		pci_disable_device(pdev);
+		pci_ignore_hotplug(pdev);
+		pci_set_power_state(pdev, PCI_D3cold);
+	} else {
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+
+	return 0;
+}
+
+static int xe_pci_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	if (xe->d3cold_allowed) {
+		err = pci_enable_device(pdev);
+		if (err)
+			return err;
+
+		pci_set_master(pdev);
+	}
+
+	return xe_pm_runtime_resume(xe);
+}
+
+static int xe_pci_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	/*
+	 * FIXME: d3cold should be allowed (true) if
+	 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
+	 * however the change to the buddy allocator broke the
+	 * xe_bo_restore_kernel when the pci device is disabled
+	 */
+	 xe->d3cold_allowed = false;
+
+	return 0;
+}
+
+static const struct dev_pm_ops xe_pm_ops = {
+	.suspend = xe_pci_suspend,
+	.resume = xe_pci_resume,
+	.freeze = xe_pci_suspend,
+	.thaw = xe_pci_resume,
+	.poweroff = xe_pci_suspend,
+	.restore = xe_pci_resume,
+	.runtime_suspend = xe_pci_runtime_suspend,
+	.runtime_resume = xe_pci_runtime_resume,
+	.runtime_idle = xe_pci_runtime_idle,
+};
+
+static struct pci_driver xe_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+	.probe = xe_pci_probe,
+	.remove = xe_pci_remove,
+	.shutdown = xe_pci_shutdown,
+	.driver.pm = &xe_pm_ops,
+};
+
+int xe_register_pci_driver(void)
+{
+	return pci_register_driver(&xe_pci_driver);
+}
+
+void xe_unregister_pci_driver(void)
+{
+	pci_unregister_driver(&xe_pci_driver);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+static int dev_to_xe_device_fn(struct device *dev, void *data)
+
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	int (*xe_fn)(struct xe_device *xe) = data;
+	int ret = 0;
+	int idx;
+
+	if (drm_dev_enter(drm, &idx))
+		ret = xe_fn(to_xe_device(dev_get_drvdata(dev)));
+	drm_dev_exit(idx);
+
+	return ret;
+}
+
+/**
+ * xe_call_for_each_device - Iterate over all devices this driver binds to
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterated over all devices this driver binds to, and calls
+ * @xe_fn: for each one of them. If the called function returns anything else
+ * than 0, iteration is stopped and the return value is returned by this
+ * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
+ * called for the corresponding drm device.
+ *
+ * Return: Zero or the error code of a call to @xe_fn returning an error
+ * code.
+ */
+int xe_call_for_each_device(xe_device_fn xe_fn)
+{
+	return driver_for_each_device(&xe_pci_driver.driver, NULL,
+				      xe_fn, dev_to_xe_device_fn);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
new file mode 100644
index 0000000000000..9e3089549d5fb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_PCI_H_
+#define _XE_PCI_H_
+
+#include "tests/xe_test.h"
+
+int xe_register_pci_driver(void);
+void xe_unregister_pci_driver(void);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+struct xe_device;
+
+typedef int (*xe_device_fn)(struct xe_device *);
+
+int xe_call_for_each_device(xe_device_fn xe_fn);
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
new file mode 100644
index 0000000000000..236159c8a6c0e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pcode_api.h"
+#include "xe_pcode.h"
+
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#include <linux/errno.h>
+
+/**
+ * DOC: PCODE
+ *
+ * Xe PCODE is the component responsible for interfacing with the PCODE
+ * firmware.
+ * It shall provide a very simple ABI to other Xe components, but be the
+ * single and consolidated place that will communicate with PCODE. All read
+ * and write operations to PCODE will be internal and private to this component.
+ *
+ * What's next:
+ * - PCODE hw metrics
+ * - PCODE for display operations
+ */
+
+static int pcode_mailbox_status(struct xe_gt *gt)
+{
+	u32 err;
+	static const struct pcode_err_decode err_decode[] = {
+		[PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"},
+		[PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"},
+		[PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"},
+		[PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"},
+		[PCODE_LOCKED] = {-EBUSY, "PCODE Locked"},
+		[PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW,
+			"GT ratio out of range"},
+		[PCODE_REJECTED] = {-EACCES, "PCODE Rejected"},
+		[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
+	};
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK;
+	if (err) {
+		drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
+			err_decode[err].str ?: "Unknown");
+		return err_decode[err].errno ?: -EPROTO;
+	}
+
+	return 0;
+}
+
+static bool pcode_mailbox_done(struct xe_gt *gt)
+{
+	lockdep_assert_held(&gt->pcode.lock);
+	return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0;
+}
+
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+			    unsigned int timeout, bool return_data, bool atomic)
+{
+	lockdep_assert_held(&gt->pcode.lock);
+
+	if (!pcode_mailbox_done(gt))
+		return -EAGAIN;
+
+	xe_mmio_write32(gt, PCODE_DATA0.reg, *data0);
+	xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0);
+	xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox);
+
+	if (atomic)
+		_wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1);
+	else
+		wait_for(pcode_mailbox_done(gt), timeout);
+
+	if (return_data) {
+		*data0 = xe_mmio_read32(gt, PCODE_DATA0.reg);
+		if (data1)
+			*data1 = xe_mmio_read32(gt, PCODE_DATA1.reg);
+	}
+
+	return pcode_mailbox_status(gt);
+}
+
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+				  u32 request, u32 reply_mask, u32 reply,
+				  u32 *status, bool atomic)
+{
+	*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic);
+
+	return (*status == 0) && ((request & reply_mask) == reply);
+}
+
+/**
+ * xe_pcode_request - send PCODE request until acknowledgment
+ * @gt: gt
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+	u32 status;
+	int ret;
+	bool atomic = false;
+
+	mutex_lock(&gt->pcode.lock);
+
+#define COND \
+	xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic)
+
+	/*
+	 * Prime the PCODE by doing a request first. Normally it guarantees
+	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
+	 * _wait_for() doesn't guarantee when its passed condition is evaluated
+	 * first, so send the first request explicitly.
+	 */
+	if (COND) {
+		ret = 0;
+		goto out;
+	}
+	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	drm_err(&gt_to_xe(gt)->drm,
+		"PCODE timeout, retrying with preemption disabled\n");
+	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+	preempt_disable();
+	atomic = true;
+	ret = wait_for_atomic(COND, 50);
+	atomic = false;
+	preempt_enable();
+
+out:
+	mutex_unlock(&gt->pcode.lock);
+	return status ? status : ret;
+#undef COND
+}
+/**
+ * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table
+ * @gt: gt instance
+ * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz.
+ * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz.
+ *
+ * This function initialize PCODE's QOS frequency table for a proper minimal
+ * frequency/power steering decision, depending on the current requested GT
+ * frequency. For older platforms this was a more complete table including
+ * the IA freq. However for the latest platforms this table become a simple
+ * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might
+ * not take the right decisions for some memory frequencies and affect latency.
+ *
+ * It returns 0 on success, and -ERROR number on failure, -EINVAL if max
+ * frequency is higher then the minimal, and other errors directly translated
+ * from the PCODE Error returs:
+ * - -ENXIO: "Illegal Command"
+ * - -ETIMEDOUT: "Timed out"
+ * - -EINVAL: "Illegal Data"
+ * - -ENXIO, "Illegal Subcommand"
+ * - -EBUSY: "PCODE Locked"
+ * - -EOVERFLOW, "GT ratio out of range"
+ * - -EACCES, "PCODE Rejected"
+ * - -EPROTO, "Unknown"
+ */
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq)
+{
+	int ret;
+	u32 freq;
+
+	if (IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	if (max_gt_freq <= min_gt_freq)
+		return -EINVAL;
+
+	mutex_lock(&gt->pcode.lock);
+	for (freq = min_gt_freq; freq <= max_gt_freq; freq++) {
+		u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq;
+
+		ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE,
+				       &data, NULL, 1, false, false);
+		if (ret)
+			goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&gt->pcode.lock);
+	return ret;
+}
+
+static bool pcode_dgfx_status_complete(struct xe_gt *gt)
+{
+	u32 data = DGFX_GET_INIT_STATUS;
+	int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS,
+				      &data, NULL, 1, true, false);
+
+	return status == 0 &&
+		(data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE;
+}
+
+/**
+ * xe_pcode_init - Ensure PCODE is initialized
+ * @gt: gt instance
+ *
+ * This function ensures that PCODE is properly initialized. To be called during
+ * probe and resume paths.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_init(struct xe_gt *gt)
+{
+	int timeout = 180000; /* 3 min */
+	int ret;
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	mutex_lock(&gt->pcode.lock);
+	ret = wait_for(pcode_dgfx_status_complete(gt), timeout);
+	mutex_unlock(&gt->pcode.lock);
+
+	if (ret)
+		drm_err(&gt_to_xe(gt)->drm,
+			"PCODE initialization timedout after: %d min\n",
+			timeout / 60000);
+
+	return ret;
+}
+
+/**
+ * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * @gt: gt instance
+ *
+ * This function initializes the xe_pcode component, and when needed, it ensures
+ * that PCODE has properly performed its initialization and it is really ready
+ * to go. To be called once only during probe.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_probe(struct xe_gt *gt)
+{
+	mutex_init(&gt->pcode.lock);
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	return xe_pcode_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
new file mode 100644
index 0000000000000..3b4aa8c1a3bad
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_H_
+#define _XE_PCODE_H_
+
+#include <linux/types.h>
+struct xe_gt;
+
+int xe_pcode_probe(struct xe_gt *gt);
+int xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq);
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val,
+			   int timeout_ms);
+#define xe_pcode_write(gt, mbox, val) \
+	xe_pcode_write_timeout(gt, mbox, val, 1)
+
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		     u32 reply_mask, u32 reply, int timeout_ms);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
new file mode 100644
index 0000000000000..0762c8a912c79
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/* Internal to xe_pcode */
+
+#define PCODE_MAILBOX			_MMIO(0x138124)
+#define   PCODE_READY			REG_BIT(31)
+#define   PCODE_MB_PARAM2		REG_GENMASK(23, 16)
+#define   PCODE_MB_PARAM1		REG_GENMASK(15, 8)
+#define   PCODE_MB_COMMAND		REG_GENMASK(7, 0)
+#define   PCODE_ERROR_MASK		0xFF
+#define     PCODE_SUCCESS		0x0
+#define     PCODE_ILLEGAL_CMD		0x1
+#define     PCODE_TIMEOUT		0x2
+#define     PCODE_ILLEGAL_DATA		0x3
+#define     PCODE_ILLEGAL_SUBCOMMAND	0x4
+#define     PCODE_LOCKED		0x6
+#define     PCODE_GT_RATIO_OUT_OF_RANGE	0x10
+#define     PCODE_REJECTED		0x11
+
+#define PCODE_DATA0			_MMIO(0x138128)
+#define PCODE_DATA1			_MMIO(0x13812C)
+
+/* Min Freq QOS Table */
+#define   PCODE_WRITE_MIN_FREQ_TABLE	0x8
+#define   PCODE_READ_MIN_FREQ_TABLE	0x9
+#define   PCODE_FREQ_RING_RATIO_SHIFT	16
+
+/* PCODE Init */
+#define   DGFX_PCODE_STATUS		0x7E
+#define     DGFX_GET_INIT_STATUS	0x0
+#define     DGFX_INIT_STATUS_COMPLETE	0x1
+
+struct pcode_err_decode {
+	int errno;
+	const char *str;
+};
+
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
new file mode 100644
index 0000000000000..72612c832e88b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PLATFORM_INFO_TYPES_H_
+#define _XE_PLATFORM_INFO_TYPES_H_
+
+/* Keep in gen based order, and chronological order within a gen */
+enum xe_platform {
+	XE_PLATFORM_UNINITIALIZED = 0,
+	/* gen12 */
+	XE_TIGERLAKE,
+	XE_ROCKETLAKE,
+	XE_DG1,
+	XE_DG2,
+	XE_PVC,
+	XE_ALDERLAKE_S,
+	XE_ALDERLAKE_P,
+	XE_METEORLAKE,
+};
+
+enum xe_subplatform {
+	XE_SUBPLATFORM_UNINITIALIZED = 0,
+	XE_SUBPLATFORM_NONE,
+	XE_SUBPLATFORM_DG2_G10,
+	XE_SUBPLATFORM_DG2_G11,
+	XE_SUBPLATFORM_DG2_G12,
+	XE_SUBPLATFORM_ADLP_RPLU,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
new file mode 100644
index 0000000000000..fb0355530e7bd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_pm.h"
+#include "xe_gt.h"
+#include "xe_ggtt.h"
+#include "xe_irq.h"
+#include "xe_pcode.h"
+
+/**
+ * DOC: Xe Power Management
+ *
+ * Xe PM shall be guided by the simplicity.
+ * Use the simplest hook options whenever possible.
+ * Let's not reinvent the runtime_pm references and hooks.
+ * Shall have a clear separation of display and gt underneath this component.
+ *
+ * What's next:
+ *
+ * For now s2idle and s3 are only working in integrated devices. The next step
+ * is to iterate through all VRAM's BO backing them up into the system memory
+ * before allowing the system suspend.
+ *
+ * Also runtime_pm needs to be here from the beginning.
+ *
+ * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
+ * and no wait boost. Frequency optimizations should come on a next stage.
+ */
+
+/**
+ * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_suspend_prepare(gt);
+
+	/* FIXME: Super racey... */
+	err = xe_bo_evict_all(xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err)
+			return err;
+	}
+
+	xe_irq_suspend(xe);
+
+	return 0;
+}
+
+/**
+ * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_init(gt);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * This only restores pinned memory which is the memory required for the
+	 * GT(s) to resume.
+	 */
+	err = xe_bo_restore_kernel(xe);
+	if (err)
+		return err;
+
+	xe_irq_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	err = xe_bo_restore_user(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void xe_pm_runtime_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 1000);
+	pm_runtime_set_active(dev);
+	pm_runtime_allow(dev);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+}
+
+int xe_pm_runtime_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	if (xe->d3cold_allowed) {
+		if (xe_device_mem_access_ongoing(xe))
+			return -EBUSY;
+
+		err = xe_bo_evict_all(xe);
+		if (err)
+			return err;
+	}
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err)
+			return err;
+	}
+
+	xe_irq_suspend(xe);
+
+	return 0;
+}
+
+int xe_pm_runtime_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	if (xe->d3cold_allowed) {
+		for_each_gt(gt, xe, id) {
+			err = xe_pcode_init(gt);
+			if (err)
+				return err;
+		}
+
+		/*
+		 * This only restores pinned memory which is the memory
+		 * required for the GT(s) to resume.
+		 */
+		err = xe_bo_restore_kernel(xe);
+		if (err)
+			return err;
+	}
+
+	xe_irq_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	if (xe->d3cold_allowed) {
+		err = xe_bo_restore_user(xe);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int xe_pm_runtime_get(struct xe_device *xe)
+{
+	return pm_runtime_get_sync(xe->drm.dev);
+}
+
+int xe_pm_runtime_put(struct xe_device *xe)
+{
+	pm_runtime_mark_last_busy(xe->drm.dev);
+	return pm_runtime_put_autosuspend(xe->drm.dev);
+}
+
+/* Return true if resume operation happened and usage count was increased */
+bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe)
+{
+	/* In case we are suspended we need to immediately wake up */
+	if (pm_runtime_suspended(xe->drm.dev))
+		return !pm_runtime_resume_and_get(xe->drm.dev);
+
+	return false;
+}
+
+int xe_pm_runtime_get_if_active(struct xe_device *xe)
+{
+	WARN_ON(pm_runtime_suspended(xe->drm.dev));
+	return pm_runtime_get_if_active(xe->drm.dev, true);
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
new file mode 100644
index 0000000000000..b8c5f9558e263
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PM_H_
+#define _XE_PM_H_
+
+#include <linux/pm_runtime.h>
+
+struct xe_device;
+
+int xe_pm_suspend(struct xe_device *xe);
+int xe_pm_resume(struct xe_device *xe);
+
+void xe_pm_runtime_init(struct xe_device *xe);
+int xe_pm_runtime_suspend(struct xe_device *xe);
+int xe_pm_runtime_resume(struct xe_device *xe);
+int xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_put(struct xe_device *xe);
+bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
+int xe_pm_runtime_get_if_active(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
new file mode 100644
index 0000000000000..6ab9ff4427664
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "xe_engine.h"
+#include "xe_preempt_fence.h"
+#include "xe_vm.h"
+
+static void preempt_fence_work_func(struct work_struct *w)
+{
+	bool cookie = dma_fence_begin_signalling();
+	struct xe_preempt_fence *pfence =
+		container_of(w, typeof(*pfence), preempt_work);
+	struct xe_engine *e = pfence->engine;
+
+	if (pfence->error)
+		dma_fence_set_error(&pfence->base, pfence->error);
+	else
+		e->ops->suspend_wait(e);
+
+	dma_fence_signal(&pfence->base);
+	dma_fence_end_signalling(cookie);
+
+	queue_work(system_unbound_wq, &e->vm->preempt.rebind_work);
+
+	xe_engine_put(e);
+}
+
+static const char *
+preempt_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "xe";
+}
+
+static const char *
+preempt_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "preempt";
+}
+
+static bool preempt_fence_enable_signaling(struct dma_fence *fence)
+{
+	struct xe_preempt_fence *pfence =
+		container_of(fence, typeof(*pfence), base);
+	struct xe_engine *e = pfence->engine;
+
+	pfence->error = e->ops->suspend(e);
+	queue_work(system_unbound_wq, &pfence->preempt_work);
+	return true;
+}
+
+static const struct dma_fence_ops preempt_fence_ops = {
+	.get_driver_name = preempt_fence_get_driver_name,
+	.get_timeline_name = preempt_fence_get_timeline_name,
+	.enable_signaling = preempt_fence_enable_signaling,
+};
+
+/**
+ * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal
+ * initialization
+ *
+ * Allocate a preempt fence, and initialize its list head.
+ * If the preempt_fence allocated has been armed with
+ * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not,
+ * it must be freed using xe_preempt_fence_free().
+ *
+ * Return: A struct xe_preempt_fence pointer used for calling into
+ * xe_preempt_fence_arm() or xe_preempt_fence_free().
+ * An error pointer on error.
+ */
+struct xe_preempt_fence *xe_preempt_fence_alloc(void)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = kmalloc(sizeof(*pfence), GFP_KERNEL);
+	if (!pfence)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pfence->link);
+	INIT_WORK(&pfence->preempt_work, preempt_fence_work_func);
+
+	return pfence;
+}
+
+/**
+ * xe_preempt_fence_free() - Free a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: pointer obtained from xe_preempt_fence_alloc();
+ *
+ * Free a preempt fence that has not yet been armed.
+ */
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence)
+{
+	list_del(&pfence->link);
+	kfree(pfence);
+}
+
+/**
+ * xe_preempt_fence_arm() - Arm a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: The struct xe_preempt_fence pointer returned from
+ *          xe_preempt_fence_alloc().
+ * @e: The struct xe_engine used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Inserts the preempt fence into @context's timeline, takes @link off any
+ * list, and registers the struct xe_engine as the xe_engine to be preempted.
+ *
+ * Return: A pointer to a struct dma_fence embedded into the preempt fence.
+ * This function doesn't error.
+ */
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+		     u64 context, u32 seqno)
+{
+	list_del_init(&pfence->link);
+	pfence->engine = xe_engine_get(e);
+	dma_fence_init(&pfence->base, &preempt_fence_ops,
+		      &e->compute.lock, context, seqno);
+
+	return &pfence->base;
+}
+
+/**
+ * xe_preempt_fence_create() - Helper to create and arm a preempt fence.
+ * @e: The struct xe_engine used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Allocates and inserts the preempt fence into @context's timeline,
+ * and registers @e as the struct xe_engine to be preempted.
+ *
+ * Return: A pointer to the resulting struct dma_fence on success. An error
+ * pointer on error. In particular if allocation fails it returns
+ * ERR_PTR(-ENOMEM);
+ */
+struct dma_fence *
+xe_preempt_fence_create(struct xe_engine *e,
+			u64 context, u32 seqno)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = xe_preempt_fence_alloc();
+	if (IS_ERR(pfence))
+		return ERR_CAST(pfence);
+
+	return xe_preempt_fence_arm(pfence, e, context, seqno);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence)
+{
+	return fence->ops == &preempt_fence_ops;
+}
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h
new file mode 100644
index 0000000000000..4f3966103203c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_H_
+#define _XE_PREEMPT_FENCE_H_
+
+#include "xe_preempt_fence_types.h"
+
+struct list_head;
+
+struct dma_fence *
+xe_preempt_fence_create(struct xe_engine *e,
+			u64 context, u32 seqno);
+
+struct xe_preempt_fence *xe_preempt_fence_alloc(void);
+
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence);
+
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+		     u64 context, u32 seqno);
+
+static inline struct xe_preempt_fence *
+to_preempt_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct xe_preempt_fence, base);
+}
+
+/**
+ * xe_preempt_fence_link() - Return a link used to keep unarmed preempt
+ * fences on a list.
+ * @pfence: Pointer to the preempt fence.
+ *
+ * The link is embedded in the struct xe_preempt_fence. Use
+ * link_to_preempt_fence() to convert back to the preempt fence.
+ *
+ * Return: A pointer to an embedded struct list_head.
+ */
+static inline struct list_head *
+xe_preempt_fence_link(struct xe_preempt_fence *pfence)
+{
+	return &pfence->link;
+}
+
+/**
+ * to_preempt_fence_from_link() - Convert back to a preempt fence pointer
+ * from a link obtained with xe_preempt_fence_link().
+ * @link: The struct list_head obtained from xe_preempt_fence_link().
+ *
+ * Return: A pointer to the embedding struct xe_preempt_fence.
+ */
+static inline struct xe_preempt_fence *
+to_preempt_fence_from_link(struct list_head *link)
+{
+	return container_of(link, struct xe_preempt_fence, link);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
new file mode 100644
index 0000000000000..9d9efd8ff0ed2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_TYPES_H_
+#define _XE_PREEMPT_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+struct xe_engine;
+
+/**
+ * struct xe_preempt_fence - XE preempt fence
+ *
+ * A preemption fence which suspends the execution of an xe_engine on the
+ * hardware and triggers a callback once the xe_engine is complete.
+ */
+struct xe_preempt_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending preempt fences */
+	struct list_head link;
+	/** @engine: xe engine for this preempt fence */
+	struct xe_engine *engine;
+	/** @preempt_work: work struct which issues preemption */
+	struct work_struct preempt_work;
+	/** @error: preempt fence is in error state */
+	int error;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
new file mode 100644
index 0000000000000..81193ddd0af73
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -0,0 +1,1542 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_pt_types.h"
+#include "xe_pt_walk.h"
+#include "xe_vm.h"
+#include "xe_res_cursor.h"
+
+struct xe_pt_dir {
+	struct xe_pt pt;
+	/** @dir: Directory structure for the xe_pt_walk functionality */
+	struct xe_ptw_dir dir;
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
+#else
+#define xe_pt_set_addr(__xe_pt, __addr)
+#define xe_pt_addr(__xe_pt) 0ull
+#endif
+
+static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
+static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
+
+#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
+
+static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
+{
+	return container_of(pt, struct xe_pt_dir, pt);
+}
+
+static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+{
+	return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
+}
+
+/**
+ * gen8_pde_encode() - Encode a page-table directory entry pointing to
+ * another page-table.
+ * @bo: The page-table bo of the page-table to point to.
+ * @bo_offset: Offset in the page-table bo to point to.
+ * @level: The cache level indicating the caching of @bo.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page directory entry. No errors.
+ */
+u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		    const enum xe_cache_level level)
+{
+	u64 pde;
+	bool is_lmem;
+
+	pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+	pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem);
+
+	/* FIXME: I don't think the PPAT handling is correct for MTL */
+
+	if (level != XE_CACHE_NONE)
+		pde |= PPAT_CACHED_PDE;
+	else
+		pde |= PPAT_UNCACHED;
+
+	return pde;
+}
+
+static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
+			   size_t page_size, bool *is_lmem)
+{
+	if (xe_vma_is_userptr(vma)) {
+		struct xe_res_cursor cur;
+		u64 page;
+
+		*is_lmem = false;
+		page = offset >> PAGE_SHIFT;
+		offset &= (PAGE_SIZE - 1);
+
+		xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size,
+				&cur);
+		return xe_res_dma(&cur) + offset;
+	} else {
+		return xe_bo_addr(vma->bo, offset, page_size, is_lmem);
+	}
+}
+
+static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
+			     u32 pt_level)
+{
+	pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	/* FIXME: I don't think the PPAT handling is correct for MTL */
+
+	switch (cache) {
+	case XE_CACHE_NONE:
+		pte |= PPAT_UNCACHED;
+		break;
+	case XE_CACHE_WT:
+		pte |= PPAT_DISPLAY_ELLC;
+		break;
+	default:
+		pte |= PPAT_CACHED;
+		break;
+	}
+
+	if (pt_level == 1)
+		pte |= GEN8_PDE_PS_2M;
+	else if (pt_level == 2)
+		pte |= GEN8_PDPE_PS_1G;
+
+	/* XXX: Does hw support 1 GiB pages? */
+	XE_BUG_ON(pt_level > 2);
+
+	return pte;
+}
+
+/**
+ * gen8_pte_encode() - Encode a page-table entry pointing to memory.
+ * @vma: The vma representing the memory to point to.
+ * @bo: If @vma is NULL, representing the memory to point to.
+ * @offset: The offset into @vma or @bo.
+ * @cache: The cache level indicating
+ * @flags: Currently only supports PTE_READ_ONLY for read-only access.
+ * @pt_level: The page-table level of the page-table into which the entry
+ * is to be inserted.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page-table entry. No errors.
+ */
+u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+		    u64 offset, enum xe_cache_level cache,
+		    u32 flags, u32 pt_level)
+{
+	u64 pte;
+	bool is_vram;
+
+	if (vma)
+		pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram);
+	else
+		pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram);
+
+	if (is_vram) {
+		pte |= GEN12_PPGTT_PTE_LM;
+		if (vma && vma->use_atomic_access_pte_bit)
+			pte |= GEN12_USM_PPGTT_PTE_AE;
+	}
+
+	return __gen8_pte_encode(pte, cache, flags, pt_level);
+}
+
+static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
+			     unsigned int level)
+{
+	u8 id = gt->info.id;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	if (!vm->scratch_bo[id])
+		return 0;
+
+	if (level == 0) {
+		u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0,
+					    XE_CACHE_WB, 0, 0);
+		if (vm->flags & XE_VM_FLAGS_64K)
+			empty |= GEN12_PTE_PS64;
+
+		return empty;
+	} else {
+		return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
+				       XE_CACHE_WB);
+	}
+}
+
+/**
+ * xe_pt_create() - Create a page-table.
+ * @vm: The vm to create for.
+ * @gt: The gt to create for.
+ * @level: The page-table level.
+ *
+ * Allocate and initialize a single struct xe_pt metadata structure. Also
+ * create the corresponding page-table bo, but don't initialize it. If the
+ * level is grater than zero, then it's assumed to be a directory page-
+ * table and the directory structure is also allocated and initialized to
+ * NULL pointers.
+ *
+ * Return: A valid struct xe_pt pointer on success, Pointer error code on
+ * error.
+ */
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+			   unsigned int level)
+{
+	struct xe_pt *pt;
+	struct xe_bo *bo;
+	size_t size;
+	int err;
+
+	size = !level ?  sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
+		GEN8_PDES * sizeof(struct xe_ptw *);
+	pt = kzalloc(size, GFP_KERNEL);
+	if (!pt)
+		return ERR_PTR(-ENOMEM);
+
+	bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto err_kfree;
+	}
+	pt->bo = bo;
+	pt->level = level;
+	pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
+
+	XE_BUG_ON(level > XE_VM_MAX_LEVEL);
+
+	return pt;
+
+err_kfree:
+	kfree(pt);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
+ * entries.
+ * @gt: The gt the scratch pagetable of which to use.
+ * @vm: The vm we populate for.
+ * @pt: The pagetable the bo of which to initialize.
+ *
+ * Populate the page-table bo of @pt with entries pointing into the gt's
+ * scratch page-table tree if any. Otherwise populate with zeros.
+ */
+void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+			  struct xe_pt *pt)
+{
+	struct iosys_map *map = &pt->bo->vmap;
+	u64 empty;
+	int i;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	if (!vm->scratch_bo[gt->info.id]) {
+		/*
+		 * FIXME: Some memory is allocated already allocated to zero?
+		 * Find out which memory that is and avoid this memset...
+		 */
+		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
+	} else {
+		empty = __xe_pt_empty_pte(gt, vm, pt->level);
+		for (i = 0; i < GEN8_PDES; i++)
+			xe_pt_write(vm->xe, map, i, empty);
+	}
+}
+
+/**
+ * xe_pt_shift() - Return the ilog2 value of the size of the address range of
+ * a page-table at a certain level.
+ * @level: The level.
+ *
+ * Return: The ilog2 value of the size of the address range of a page-table
+ * at level @level.
+ */
+unsigned int xe_pt_shift(unsigned int level)
+{
+	return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level;
+}
+
+/**
+ * xe_pt_destroy() - Destroy a page-table tree.
+ * @pt: The root of the page-table tree to destroy.
+ * @flags: vm flags. Currently unused.
+ * @deferred: List head of lockless list for deferred putting. NULL for
+ *            immediate putting.
+ *
+ * Puts the page-table bo, recursively calls xe_pt_destroy on all children
+ * and finally frees @pt. TODO: Can we remove the @flags argument?
+ */
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
+{
+	int i;
+
+	if (!pt)
+		return;
+
+	XE_BUG_ON(!list_empty(&pt->bo->vmas));
+	xe_bo_unpin(pt->bo);
+	xe_bo_put_deferred(pt->bo, deferred);
+
+	if (pt->level > 0 && pt->num_live) {
+		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+		for (i = 0; i < GEN8_PDES; i++) {
+			if (xe_pt_entry(pt_dir, i))
+				xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+					      deferred);
+		}
+	}
+	kfree(pt);
+}
+
+/**
+ * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given gt and vm.
+ * @xe: xe device.
+ * @gt: gt to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf bo. All pagetable entries point to the single page-table or,
+ * for L0, the single bo one level below.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+			 struct xe_vm *vm)
+{
+	u8 id = gt->info.id;
+	int i;
+
+	vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K,
+					  ttm_bo_type_kernel,
+					  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+					  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(vm->scratch_bo[id]))
+		return PTR_ERR(vm->scratch_bo[id]);
+	xe_bo_pin(vm->scratch_bo[id]);
+
+	for (i = 0; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+/**
+ * DOC: Pagetable building
+ *
+ * Below we use the term "page-table" for both page-directories, containing
+ * pointers to lower level page-directories or page-tables, and level 0
+ * page-tables that contain only page-table-entries pointing to memory pages.
+ *
+ * When inserting an address range in an already existing page-table tree
+ * there will typically be a set of page-tables that are shared with other
+ * address ranges, and a set that are private to this address range.
+ * The set of shared page-tables can be at most two per level,
+ * and those can't be updated immediately because the entries of those
+ * page-tables may still be in use by the gpu for other mappings. Therefore
+ * when inserting entries into those, we instead stage those insertions by
+ * adding insertion data into struct xe_vm_pgtable_update structures. This
+ * data, (subtrees for the cpu and page-table-entries for the gpu) is then
+ * added in a separate commit step. CPU-data is committed while still under the
+ * vm lock, the object lock and for userptr, the notifier lock in read mode.
+ * The GPU async data is committed either by the GPU or CPU after fulfilling
+ * relevant dependencies.
+ * For non-shared page-tables (and, in fact, for shared ones that aren't
+ * existing at the time of staging), we add the data in-place without the
+ * special update structures. This private part of the page-table tree will
+ * remain disconnected from the vm page-table tree until data is committed to
+ * the shared page tables of the vm tree in the commit phase.
+ */
+
+struct xe_pt_update {
+	/** @update: The update structure we're building for this parent. */
+	struct xe_vm_pgtable_update *update;
+	/** @parent: The parent. Used to detect a parent change. */
+	struct xe_pt *parent;
+	/** @preexisting: Whether the parent was pre-existing or allocated */
+	bool preexisting;
+};
+
+struct xe_pt_stage_bind_walk {
+	/** base: The base class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @vm: The vm we're building for. */
+	struct xe_vm *vm;
+	/** @gt: The gt we're building for. */
+	struct xe_gt *gt;
+	/** @cache: Desired cache level for the ptes */
+	enum xe_cache_level cache;
+	/** @default_pte: PTE flag only template. No address is associated */
+	u64 default_pte;
+	/** @dma_offset: DMA offset to add to the PTE. */
+	u64 dma_offset;
+	/**
+	 * @needs_64k: This address range enforces 64K alignment and
+	 * granularity.
+	 */
+	bool needs_64K;
+	/**
+	 * @pte_flags: Flags determining PTE setup. These are not flags
+	 * encoded directly in the PTE. See @default_pte for those.
+	 */
+	u32 pte_flags;
+
+	/* Also input, but is updated during the walk*/
+	/** @curs: The DMA address cursor. */
+	struct xe_res_cursor *curs;
+	/** @va_curs_start: The Virtual address coresponding to @curs->start */
+	u64 va_curs_start;
+
+	/* Output */
+	struct xe_walk_update {
+		/** @wupd.entries: Caller provided storage. */
+		struct xe_vm_pgtable_update *entries;
+		/** @wupd.num_used_entries: Number of update @entries used. */
+		unsigned int num_used_entries;
+		/** @wupd.updates: Tracks the update entry at a given level */
+		struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
+	} wupd;
+
+	/* Walk state */
+	/**
+	 * @l0_end_addr: The end address of the current l0 leaf. Used for
+	 * 64K granularity detection.
+	 */
+	u64 l0_end_addr;
+	/** @addr_64K: The start address of the current 64K chunk. */
+	u64 addr_64K;
+	/** @found_64: Whether @add_64K actually points to a 64K chunk. */
+	bool found_64K;
+};
+
+static int
+xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
+		 pgoff_t offset, bool alloc_entries)
+{
+	struct xe_pt_update *upd = &wupd->updates[parent->level];
+	struct xe_vm_pgtable_update *entry;
+
+	/*
+	 * For *each level*, we could only have one active
+	 * struct xt_pt_update at any one time. Once we move on to a
+	 * new parent and page-directory, the old one is complete, and
+	 * updates are either already stored in the build tree or in
+	 * @wupd->entries
+	 */
+	if (likely(upd->parent == parent))
+		return 0;
+
+	upd->parent = parent;
+	upd->preexisting = true;
+
+	if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
+		return -EINVAL;
+
+	entry = wupd->entries + wupd->num_used_entries++;
+	upd->update = entry;
+	entry->ofs = offset;
+	entry->pt_bo = parent->bo;
+	entry->pt = parent;
+	entry->flags = 0;
+	entry->qwords = 0;
+
+	if (alloc_entries) {
+		entry->pt_entries = kmalloc_array(GEN8_PDES,
+						  sizeof(*entry->pt_entries),
+						  GFP_KERNEL);
+		if (!entry->pt_entries)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * NOTE: This is a very frequently called function so we allow ourselves
+ * to annotate (using branch prediction hints) the fastpath of updating a
+ * non-pre-existing pagetable with leaf ptes.
+ */
+static int
+xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
+		   pgoff_t offset, struct xe_pt *xe_child, u64 pte)
+{
+	struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
+	struct xe_pt_update *child_upd = xe_child ?
+		&xe_walk->wupd.updates[xe_child->level] : NULL;
+	int ret;
+
+	ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * Register this new pagetable so that it won't be recognized as
+	 * a shared pagetable by a subsequent insertion.
+	 */
+	if (unlikely(child_upd)) {
+		child_upd->update = NULL;
+		child_upd->parent = xe_child;
+		child_upd->preexisting = false;
+	}
+
+	if (likely(!upd->preexisting)) {
+		/* Continue building a non-connected subtree. */
+		struct iosys_map *map = &parent->bo->vmap;
+
+		if (unlikely(xe_child))
+			parent->base.dir->entries[offset] = &xe_child->base;
+
+		xe_pt_write(xe_walk->vm->xe, map, offset, pte);
+		parent->num_live++;
+	} else {
+		/* Shared pt. Stage update. */
+		unsigned int idx;
+		struct xe_vm_pgtable_update *entry = upd->update;
+
+		idx = offset - entry->ofs;
+		entry->pt_entries[idx].pt = xe_child;
+		entry->pt_entries[idx].pte = pte;
+		entry->qwords++;
+	}
+
+	return 0;
+}
+
+static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
+				   struct xe_pt_stage_bind_walk *xe_walk)
+{
+	u64 size, dma;
+
+	/* Does the virtual range requested cover a huge pte? */
+	if (!xe_pt_covers(addr, next, level, &xe_walk->base))
+		return false;
+
+	/* Does the DMA segment cover the whole pte? */
+	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
+		return false;
+
+	/* Is the DMA address huge PTE size aligned? */
+	size = next - addr;
+	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
+
+	return IS_ALIGNED(dma, size);
+}
+
+/*
+ * Scan the requested mapping to check whether it can be done entirely
+ * with 64K PTEs.
+ */
+static bool
+xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	struct xe_res_cursor curs = *xe_walk->curs;
+
+	if (!IS_ALIGNED(addr, SZ_64K))
+		return false;
+
+	if (next > xe_walk->l0_end_addr)
+		return false;
+
+	xe_res_next(&curs, addr - xe_walk->va_curs_start);
+	for (; addr < next; addr += SZ_64K) {
+		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
+			return false;
+
+		xe_res_next(&curs, SZ_64K);
+	}
+
+	return addr == next;
+}
+
+/*
+ * For non-compact "normal" 4K level-0 pagetables, we want to try to group
+ * addresses together in 64K-contigous regions to add a 64K TLB hint for the
+ * device to the PTE.
+ * This function determines whether the address is part of such a
+ * segment. For VRAM in normal pagetables, this is strictly necessary on
+ * some devices.
+ */
+static bool
+xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	/* Address is within an already found 64k region */
+	if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
+		return true;
+
+	xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
+	xe_walk->addr_64K = addr;
+
+	return xe_walk->found_64K;
+}
+
+static int
+xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
+		       unsigned int level, u64 addr, u64 next,
+		       struct xe_ptw **child,
+		       enum page_walk_action *action,
+		       struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_bind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+	struct xe_pt *xe_child;
+	bool covers;
+	int ret = 0;
+	u64 pte;
+
+	/* Is this a leaf entry ?*/
+	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
+		struct xe_res_cursor *curs = xe_walk->curs;
+
+		XE_WARN_ON(xe_walk->va_curs_start != addr);
+
+		pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
+					xe_walk->cache, xe_walk->pte_flags,
+					level);
+		pte |= xe_walk->default_pte;
+
+		/*
+		 * Set the GEN12_PTE_PS64 hint if possible, otherwise if
+		 * this device *requires* 64K PTE size for VRAM, fail.
+		 */
+		if (level == 0 && !xe_parent->is_compact) {
+			if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
+				pte |= GEN12_PTE_PS64;
+			else if (XE_WARN_ON(xe_walk->needs_64K))
+				return -EINVAL;
+		}
+
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
+		if (unlikely(ret))
+			return ret;
+
+		xe_res_next(curs, next - addr);
+		xe_walk->va_curs_start = next;
+		*action = ACTION_CONTINUE;
+
+		return ret;
+	}
+
+	/*
+	 * Descending to lower level. Determine if we need to allocate a
+	 * new page table or -directory, which we do if there is no
+	 * previous one or there is one we can completely replace.
+	 */
+	if (level == 1) {
+		walk->shifts = xe_normal_pt_shifts;
+		xe_walk->l0_end_addr = next;
+	}
+
+	covers = xe_pt_covers(addr, next, level, &xe_walk->base);
+	if (covers || !*child) {
+		u64 flags = 0;
+
+		xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1);
+		if (IS_ERR(xe_child))
+			return PTR_ERR(xe_child);
+
+		xe_pt_set_addr(xe_child,
+			       round_down(addr, 1ull << walk->shifts[level]));
+
+		if (!covers)
+			xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child);
+
+		*child = &xe_child->base;
+
+		/*
+		 * Prefer the compact pagetable layout for L0 if possible.
+		 * TODO: Suballocate the pt bo to avoid wasting a lot of
+		 * memory.
+		 */
+		if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 &&
+		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
+			walk->shifts = xe_compact_pt_shifts;
+			flags |= GEN12_PDE_64K;
+			xe_child->is_compact = true;
+		}
+
+		pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
+					 pte);
+	}
+
+	*action = ACTION_SUBTREE;
+	return ret;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
+	.pt_entry = xe_pt_stage_bind_entry,
+};
+
+/**
+ * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
+ * range.
+ * @gt: The gt we're building for.
+ * @vma: The vma indicating the address range.
+ * @entries: Storage for the update entries used for connecting the tree to
+ * the main tree at commit time.
+ * @num_entries: On output contains the number of @entries used.
+ *
+ * This function builds a disconnected page-table tree for a given address
+ * range. The tree is connected to the main vm tree for the gpu using
+ * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
+ * The function builds xe_vm_pgtable_update structures for already existing
+ * shared page-tables, and non-existing shared and non-shared page-tables
+ * are built and populated directly.
+ *
+ * Return 0 on success, negative error code on error.
+ */
+static int
+xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
+		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+	struct xe_bo *bo = vma->bo;
+	bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
+	struct xe_res_cursor curs;
+	struct xe_pt_stage_bind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_bind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.vm = vma->vm,
+		.gt = gt,
+		.curs = &curs,
+		.va_curs_start = vma->start,
+		.pte_flags = vma->pte_flags,
+		.wupd.entries = entries,
+		.needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram,
+	};
+	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+	int ret;
+
+	if (is_vram) {
+		xe_walk.default_pte = GEN12_PPGTT_PTE_LM;
+		if (vma && vma->use_atomic_access_pte_bit)
+			xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE;
+		xe_walk.dma_offset = gt->mem.vram.io_start -
+			gt_to_xe(gt)->mem.vram.io_start;
+		xe_walk.cache = XE_CACHE_WB;
+	} else {
+		if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT)
+			xe_walk.cache = XE_CACHE_WT;
+		else
+			xe_walk.cache = XE_CACHE_WB;
+	}
+
+	xe_bo_assert_held(bo);
+	if (xe_vma_is_userptr(vma))
+		xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1,
+				&curs);
+	else if (xe_bo_is_vram(bo))
+		xe_res_first(bo->ttm.resource, vma->bo_offset,
+			     vma->end - vma->start + 1, &curs);
+	else
+		xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
+				vma->end - vma->start + 1, &curs);
+
+	ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1,
+				&xe_walk.base);
+
+	*num_entries = xe_walk.wupd.num_used_entries;
+	return ret;
+}
+
+/**
+ * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
+ * shared pagetable.
+ * @addr: The start address within the non-shared pagetable.
+ * @end: The end address within the non-shared pagetable.
+ * @level: The level of the non-shared pagetable.
+ * @walk: Walk info. The function adjusts the walk action.
+ * @action: next action to perform (see enum page_walk_action)
+ * @offset: Ignored on input, First non-shared entry on output.
+ * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
+ *
+ * A non-shared page-table has some entries that belong to the address range
+ * and others that don't. This function determines the entries that belong
+ * fully to the address range. Depending on level, some entries may
+ * partially belong to the address range (that can't happen at level 0).
+ * The function detects that and adjust those offsets to not include those
+ * partial entries. Iff it does detect partial entries, we know that there must
+ * be shared page tables also at lower levels, so it adjusts the walk action
+ * accordingly.
+ *
+ * Return: true if there were non-shared entries, false otherwise.
+ */
+static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
+				    struct xe_pt_walk *walk,
+				    enum page_walk_action *action,
+				    pgoff_t *offset, pgoff_t *end_offset)
+{
+	u64 size = 1ull << walk->shifts[level];
+
+	*offset = xe_pt_offset(addr, level, walk);
+	*end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
+
+	if (!level)
+		return true;
+
+	/*
+	 * If addr or next are not size aligned, there are shared pts at lower
+	 * level, so in that case traverse down the subtree
+	 */
+	*action = ACTION_CONTINUE;
+	if (!IS_ALIGNED(addr, size)) {
+		*action = ACTION_SUBTREE;
+		(*offset)++;
+	}
+
+	if (!IS_ALIGNED(end, size)) {
+		*action = ACTION_SUBTREE;
+		(*end_offset)--;
+	}
+
+	return *end_offset > *offset;
+}
+
+struct xe_pt_zap_ptes_walk {
+	/** @base: The walk base-class */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @gt: The gt we're building for */
+	struct xe_gt *gt;
+
+	/* Output */
+	/** @needs_invalidate: Whether we need to invalidate TLB*/
+	bool needs_invalidate;
+};
+
+static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_zap_ptes_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+
+	XE_BUG_ON(!*child);
+	XE_BUG_ON(!level && xe_child->is_compact);
+
+	/*
+	 * Note that we're called from an entry callback, and we're dealing
+	 * with the child of that entry rather than the parent, so need to
+	 * adjust level down.
+	 */
+	if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
+				    &end_offset)) {
+		xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap,
+			      offset * sizeof(u64), 0,
+			      (end_offset - offset) * sizeof(u64));
+		xe_walk->needs_invalidate = true;
+	}
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
+	.pt_entry = xe_pt_zap_ptes_entry,
+};
+
+/**
+ * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
+ * @gt: The gt we're zapping for.
+ * @vma: GPU VMA detailing address range.
+ *
+ * Eviction and Userptr invalidation needs to be able to zap the
+ * gpu ptes of a given address range in pagefaulting mode.
+ * In order to be able to do that, that function needs access to the shared
+ * page-table entrieaso it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the necessary locks to ensure neither the page-table connectivity
+ * nor the page-table entries of the range is updated from under us.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
+{
+	struct xe_pt_zap_ptes_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_zap_ptes_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.gt = gt,
+	};
+	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+
+	if (!(vma->gt_present & BIT(gt->info.id)))
+		return false;
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
+				 &xe_walk.base);
+
+	return xe_walk.needs_invalidate;
+}
+
+static void
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt,
+		       struct iosys_map *map, void *data,
+		       u32 qword_ofs, u32 num_qwords,
+		       const struct xe_vm_pgtable_update *update)
+{
+	struct xe_pt_entry *ptes = update->pt_entries;
+	u64 *ptr = data;
+	u32 i;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	for (i = 0; i < num_qwords; i++) {
+		if (map)
+			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+				  sizeof(u64), u64, ptes[i].pte);
+		else
+			ptr[i] = ptes[i].pte;
+	}
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+			     struct xe_vm_pgtable_update *entries,
+			     u32 num_entries)
+{
+	u32 i, j;
+
+	for (i = 0; i < num_entries; i++) {
+		if (!entries[i].pt_entries)
+			continue;
+
+		for (j = 0; j < entries[i].qwords; j++)
+			xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL);
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+	struct xe_vm *vm = vma->vm;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vma_is_userptr(vma))
+		lockdep_assert_held_read(&vm->userptr.notifier_lock);
+	else
+		dma_resv_assert_held(vma->bo->ttm.base.resv);
+
+	dma_resv_assert_held(&vm->resv);
+}
+
+static void xe_pt_commit_bind(struct xe_vma *vma,
+			      struct xe_vm_pgtable_update *entries,
+			      u32 num_entries, bool rebind,
+			      struct llist_head *deferred)
+{
+	u32 i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+		struct xe_pt_dir *pt_dir;
+
+		if (!rebind)
+			pt->num_live += entries[i].qwords;
+
+		if (!pt->level) {
+			kfree(entries[i].pt_entries);
+			continue;
+		}
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = 0; j < entries[i].qwords; j++) {
+			u32 j_ = j + entries[i].ofs;
+			struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+
+			if (xe_pt_entry(pt_dir, j_))
+				xe_pt_destroy(xe_pt_entry(pt_dir, j_),
+					      vma->vm->flags, deferred);
+
+			pt_dir->dir.entries[j_] = &newpte->base;
+		}
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static int
+xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma,
+		   struct xe_vm_pgtable_update *entries, u32 *num_entries,
+		   bool rebind)
+{
+	int err;
+
+	*num_entries = 0;
+	err = xe_pt_stage_bind(gt, vma, entries, num_entries);
+	if (!err)
+		BUG_ON(!*num_entries);
+	else /* abort! */
+		xe_pt_abort_bind(vma, entries, *num_entries);
+
+	return err;
+}
+
+static void xe_vm_dbg_print_entries(struct xe_device *xe,
+				    const struct xe_vm_pgtable_update *entries,
+				    unsigned int num_entries)
+#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+{
+	unsigned int i;
+
+	vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *xe_pt = entry->pt;
+		u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
+		u64 end;
+		u64 start;
+
+		XE_BUG_ON(entry->pt->is_compact);
+		start = entry->ofs * page_size;
+		end = start + page_size * entry->qwords;
+		vm_dbg(&xe->drm,
+		       "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
+		       i, xe_pt->level, entry->ofs, entry->qwords,
+		       xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
+	}
+}
+#else
+{}
+#endif
+
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+	u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
+	static u32 count;
+
+	if (count++ % divisor == divisor - 1) {
+		struct xe_vm *vm = vma->vm;
+
+		vma->userptr.divisor = divisor << 1;
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&vma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+		return true;
+	}
+
+	return false;
+}
+
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+	return false;
+}
+
+#endif
+
+/**
+ * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
+ * @base: Base we derive from.
+ * @bind: Whether this is a bind or an unbind operation. A bind operation
+ *        makes the pre-commit callback error with -EAGAIN if it detects a
+ *        pending invalidation.
+ * @locked: Whether the pre-commit callback locked the userptr notifier lock
+ *          and it needs unlocking.
+ */
+struct xe_pt_migrate_pt_update {
+	struct xe_migrate_pt_update base;
+	bool bind;
+	bool locked;
+};
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_pt_migrate_pt_update *userptr_update =
+		container_of(pt_update, typeof(*userptr_update), base);
+	struct xe_vma *vma = pt_update->vma;
+	unsigned long notifier_seq = vma->userptr.notifier_seq;
+	struct xe_vm *vm = vma->vm;
+
+	userptr_update->locked = false;
+
+	/*
+	 * Wait until nobody is running the invalidation notifier, and
+	 * since we're exiting the loop holding the notifier lock,
+	 * nobody can proceed invalidating either.
+	 *
+	 * Note that we don't update the vma->userptr.notifier_seq since
+	 * we don't update the userptr pages.
+	 */
+	do {
+		down_read(&vm->userptr.notifier_lock);
+		if (!mmu_interval_read_retry(&vma->userptr.notifier,
+					     notifier_seq))
+			break;
+
+		up_read(&vm->userptr.notifier_lock);
+
+		if (userptr_update->bind)
+			return -EAGAIN;
+
+		notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+	} while (true);
+
+	/* Inject errors to test_whether they are handled correctly */
+	if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
+		up_read(&vm->userptr.notifier_lock);
+		return -EAGAIN;
+	}
+
+	userptr_update->locked = true;
+
+	return 0;
+}
+
+static const struct xe_migrate_pt_update_ops bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
+ * address range.
+ * @gt: The gt to bind for.
+ * @vma: The vma to bind.
+ * @e: The engine with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before binding the built tree to the live vm tree.
+ * @num_syncs: Number of @sync entries.
+ * @rebind: Whether we're rebinding this vma to the same address range without
+ * an unbind in-between.
+ *
+ * This function builds a page-table tree (see xe_pt_stage_bind() for more
+ * information on page-table building), and the xe_vm_pgtable_update entries
+ * abstracting the operations needed to attach it to the main vm tree. It
+ * then takes the relevant locks and updates the metadata side of the main
+ * vm tree and submits the operations for pipelined attachment of the
+ * gpu page-table to the vm main tree, (which can be done either by the
+ * cpu and the GPU).
+ *
+ * Return: A valid dma-fence representing the pipelined attachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update bind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
+			.vma = vma,
+		},
+		.bind = true,
+	};
+	struct xe_vm *vm = vma->vm;
+	u32 num_entries;
+	struct dma_fence *fence;
+	int err;
+
+	bind_pt_update.locked = false;
+	xe_bo_assert_held(vma->bo);
+	xe_vm_assert_held(vm);
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	vm_dbg(&vma->vm->xe->drm,
+	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
+	       vma->start, vma->end, e);
+
+	err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind);
+	if (err)
+		goto err;
+	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+
+	fence = xe_migrate_update_pgtables(gt->migrate,
+					   vm, vma->bo,
+					   e ? e : vm->eng[gt->info.id],
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &bind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		LLIST_HEAD(deferred);
+
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(&vm->resv, fence, !rebind &&
+				   vma->last_munmap_rebind ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_bind(vma, entries, num_entries, rebind,
+				  bind_pt_update.locked ? &deferred : NULL);
+
+		/* This vma is live (again?) now */
+		vma->gt_present |= BIT(gt->info.id);
+
+		if (bind_pt_update.locked) {
+			vma->userptr.initial_bind = true;
+			up_read(&vm->userptr.notifier_lock);
+			xe_bo_put_commit(&deferred);
+		}
+		if (!rebind && vma->last_munmap_rebind &&
+		    xe_vm_in_compute_mode(vm))
+			queue_work(vm->xe->ordered_wq,
+				   &vm->preempt.rebind_work);
+	} else {
+		if (bind_pt_update.locked)
+			up_read(&vm->userptr.notifier_lock);
+		xe_pt_abort_bind(vma, entries, num_entries);
+	}
+
+	return fence;
+
+err:
+	return ERR_PTR(err);
+}
+
+struct xe_pt_stage_unbind_walk {
+	/** @base: The pagewalk base-class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @gt: The gt we're unbinding from. */
+	struct xe_gt *gt;
+
+	/**
+	 * @modified_start: Walk range start, modified to include any
+	 * shared pagetables that we're the only user of and can thus
+	 * treat as private.
+	 */
+	u64 modified_start;
+	/** @modified_end: Walk range start, modified like @modified_start. */
+	u64 modified_end;
+
+	/* Output */
+	/* @wupd: Structure to track the page-table updates we're building */
+	struct xe_walk_update wupd;
+};
+
+/*
+ * Check whether this range is the only one populating this pagetable,
+ * and in that case, update the walk range checks so that higher levels don't
+ * view us as a shared pagetable.
+ */
+static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
+			     const struct xe_pt *child,
+			     enum page_walk_action *action,
+			     struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	unsigned int shift = walk->shifts[level];
+	u64 size = 1ull << shift;
+
+	if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
+	    ((next - addr) >> shift) == child->num_live) {
+		u64 size = 1ull << walk->shifts[level + 1];
+
+		*action = ACTION_CONTINUE;
+
+		if (xe_walk->modified_start >= addr)
+			xe_walk->modified_start = round_down(addr, size);
+		if (xe_walk->modified_end <= next)
+			xe_walk->modified_end = round_up(next, size);
+
+		return true;
+	}
+
+	return false;
+}
+
+static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
+				    unsigned int level, u64 addr, u64 next,
+				    struct xe_ptw **child,
+				    enum page_walk_action *action,
+				    struct xe_pt_walk *walk)
+{
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+
+	XE_BUG_ON(!*child);
+	XE_BUG_ON(!level && xe_child->is_compact);
+
+	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+
+	return 0;
+}
+
+static int
+xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+	u64 size = 1ull << walk->shifts[--level];
+
+	if (!IS_ALIGNED(addr, size))
+		addr = xe_walk->modified_start;
+	if (!IS_ALIGNED(next, size))
+		next = xe_walk->modified_end;
+
+	/* Parent == *child is the root pt. Don't kill it. */
+	if (parent != *child &&
+	    xe_pt_check_kill(addr, next, level, xe_child, action, walk))
+		return 0;
+
+	if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
+				     &end_offset))
+		return 0;
+
+	(void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+	xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
+	.pt_entry = xe_pt_stage_unbind_entry,
+	.pt_post_descend = xe_pt_stage_unbind_post_descend,
+};
+
+/**
+ * xe_pt_stage_unbind() - Build page-table update structures for an unbind
+ * operation
+ * @gt: The gt we're unbinding for.
+ * @vma: The vma we're unbinding.
+ * @entries: Caller-provided storage for the update structures.
+ *
+ * Builds page-table update structures for an unbind operation. The function
+ * will attempt to remove all page-tables that we're the only user
+ * of, and for that to work, the unbind operation must be committed in the
+ * same critical section that blocks racing binds to the same page-table tree.
+ *
+ * Return: The number of entries used.
+ */
+static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
+				       struct xe_vm_pgtable_update *entries)
+{
+	struct xe_pt_stage_unbind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_unbind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.gt = gt,
+		.modified_start = vma->start,
+		.modified_end = vma->end + 1,
+		.wupd.entries = entries,
+	};
+	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
+				 &xe_walk.base);
+
+	return xe_walk.wupd.num_used_entries;
+}
+
+static void
+xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
+				  struct xe_gt *gt, struct iosys_map *map,
+				  void *ptr, u32 qword_ofs, u32 num_qwords,
+				  const struct xe_vm_pgtable_update *update)
+{
+	struct xe_vma *vma = pt_update->vma;
+	u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level);
+	int i;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	if (map && map->is_iomem)
+		for (i = 0; i < num_qwords; ++i)
+			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+				  sizeof(u64), u64, empty);
+	else if (map)
+		memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
+			 num_qwords);
+	else
+		memset64(ptr, empty, num_qwords);
+}
+
+static void
+xe_pt_commit_unbind(struct xe_vma *vma,
+		    struct xe_vm_pgtable_update *entries, u32 num_entries,
+		    struct llist_head *deferred)
+{
+	u32 j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (j = 0; j < num_entries; ++j) {
+		struct xe_vm_pgtable_update *entry = &entries[j];
+		struct xe_pt *pt = entry->pt;
+
+		pt->num_live -= entry->qwords;
+		if (pt->level) {
+			struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+			u32 i;
+
+			for (i = entry->ofs; i < entry->ofs + entry->qwords;
+			     i++) {
+				if (xe_pt_entry(pt_dir, i))
+					xe_pt_destroy(xe_pt_entry(pt_dir, i),
+						      vma->vm->flags, deferred);
+
+				pt_dir->dir.entries[i] = NULL;
+			}
+		}
+	}
+}
+
+static const struct xe_migrate_pt_update_ops unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
+ * address range.
+ * @gt: The gt to unbind for.
+ * @vma: The vma to unbind.
+ * @e: The engine with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
+ * @num_syncs: Number of @sync entries.
+ *
+ * This function builds a the xe_vm_pgtable_update entries abstracting the
+ * operations needed to detach the page-table tree to be destroyed from the
+ * man vm tree.
+ * It then takes the relevant locks and submits the operations for
+ * pipelined detachment of the gpu page-table from  the vm main tree,
+ * (which can be done either by the cpu and the GPU), Finally it frees the
+ * detached page-table tree.
+ *
+ * Return: A valid dma-fence representing the pipelined detachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		   struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update unbind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
+			&unbind_ops,
+			.vma = vma,
+		},
+	};
+	struct xe_vm *vm = vma->vm;
+	u32 num_entries;
+	struct dma_fence *fence = NULL;
+	LLIST_HEAD(deferred);
+
+	xe_bo_assert_held(vma->bo);
+	xe_vm_assert_held(vm);
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	vm_dbg(&vma->vm->xe->drm,
+	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
+	       vma->start, vma->end, e);
+
+	num_entries = xe_pt_stage_unbind(gt, vma, entries);
+	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+
+	/*
+	 * Even if we were already evicted and unbind to destroy, we need to
+	 * clear again here. The eviction may have updated pagetables at a
+	 * lower level, because it needs to be more conservative.
+	 */
+	fence = xe_migrate_update_pgtables(gt->migrate,
+					   vm, NULL, e ? e :
+					   vm->eng[gt->info.id],
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &unbind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(&vm->resv, fence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		/* This fence will be installed by caller when doing eviction */
+		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_unbind(vma, entries, num_entries,
+				    unbind_pt_update.locked ? &deferred : NULL);
+		vma->gt_present &= ~BIT(gt->info.id);
+	}
+
+	if (!vma->gt_present)
+		list_del_init(&vma->rebind_link);
+
+	if (unbind_pt_update.locked) {
+		XE_WARN_ON(!xe_vma_is_userptr(vma));
+
+		if (!vma->gt_present) {
+			spin_lock(&vm->userptr.invalidated_lock);
+			list_del_init(&vma->userptr.invalidate_link);
+			spin_unlock(&vm->userptr.invalidated_lock);
+		}
+		up_read(&vm->userptr.notifier_lock);
+		xe_bo_put_commit(&deferred);
+	}
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
new file mode 100644
index 0000000000000..1152043e5c63c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_PT_H_
+#define _XE_PT_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_device;
+struct xe_engine;
+struct xe_gt;
+struct xe_sync_entry;
+struct xe_vm;
+struct xe_vma;
+
+#define xe_pt_write(xe, map, idx, data) \
+	xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
+
+unsigned int xe_pt_shift(unsigned int level);
+
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+			   unsigned int level);
+
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+			 struct xe_vm *vm);
+
+void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+			  struct xe_pt *pt);
+
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
+
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind);
+
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+		   struct xe_sync_entry *syncs, u32 num_syncs);
+
+bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma);
+
+u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		    const enum xe_cache_level level);
+
+u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+		    u64 offset, enum xe_cache_level cache,
+		    u32 flags, u32 pt_level);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
new file mode 100644
index 0000000000000..2ed64c0a44857
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PT_TYPES_H_
+#define _XE_PT_TYPES_H_
+
+#include "xe_pt_walk.h"
+
+enum xe_cache_level {
+	XE_CACHE_NONE,
+	XE_CACHE_WT,
+	XE_CACHE_WB,
+};
+
+#define XE_VM_MAX_LEVEL 4
+
+struct xe_pt {
+	struct xe_ptw base;
+	struct xe_bo *bo;
+	unsigned int level;
+	unsigned int num_live;
+	bool rebind;
+	bool is_compact;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+	/** addr: Virtual address start address of the PT. */
+	u64 addr;
+#endif
+};
+
+struct xe_pt_entry {
+	struct xe_pt *pt;
+	u64 pte;
+};
+
+struct xe_vm_pgtable_update {
+	/** @bo: page table bo to write to */
+	struct xe_bo *pt_bo;
+
+	/** @ofs: offset inside this PTE to begin writing to (in qwords) */
+	u32 ofs;
+
+	/** @qwords: number of PTE's to write */
+	u32 qwords;
+
+	/** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */
+	struct xe_pt *pt;
+
+	/** @pt_entries: Newly added pagetable entries */
+	struct xe_pt_entry *pt_entries;
+
+	/** @flags: Target flags */
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
new file mode 100644
index 0000000000000..0def89af43729
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#include "xe_pt_walk.h"
+
+/**
+ * DOC: GPU page-table tree walking.
+ * The utilities in this file are similar to the CPU page-table walk
+ * utilities in mm/pagewalk.c. The main difference is that we distinguish
+ * the various levels of a page-table tree with an unsigned integer rather
+ * than by name. 0 is the lowest level, and page-tables with level 0 can
+ * not be directories pointing to lower levels, whereas all other levels
+ * can. The user of the utilities determines the highest level.
+ *
+ * Nomenclature:
+ * Each struct xe_ptw, regardless of level is referred to as a page table, and
+ * multiple page tables typically form a page table tree with page tables at
+ * intermediate levels being page directories pointing at page tables at lower
+ * levels. A shared page table for a given address range is a page-table which
+ * is neither fully within nor fully outside the address range and that can
+ * thus be shared by two or more address ranges.
+ *
+ * Please keep this code generic so that it can used as a drm-wide page-
+ * table walker should other drivers find use for it.
+ */
+static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level,
+			  const struct xe_pt_walk *walk)
+{
+	u64 size = 1ull << walk->shifts[level];
+	u64 tmp = round_up(addr + 1, size);
+
+	return min_t(u64, tmp, end);
+}
+
+static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end,
+		       unsigned int level, const struct xe_pt_walk *walk)
+{
+	pgoff_t step = 1;
+
+	/* Shared pt walk skips to the last pagetable */
+	if (unlikely(walk->shared_pt_mode)) {
+		unsigned int shift = walk->shifts[level];
+		u64 skip_to = round_down(end, 1ull << shift);
+
+		if (skip_to > next) {
+			step += (skip_to - next) >> shift;
+			next = skip_to;
+		}
+	}
+
+	*addr = next;
+	*offset += step;
+
+	return next != end;
+}
+
+/**
+ * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks
+ * for each page-table entry in all levels.
+ * @parent: The root page table for walk start.
+ * @level: The root page table level.
+ * @addr: Virtual address start.
+ * @end: Virtual address end + 1.
+ * @walk: Walk info.
+ *
+ * Similar to the CPU page-table walker, this is a helper to walk
+ * a gpu page table and call a provided callback function for each entry.
+ *
+ * Return: 0 on success, negative error code on error. The error is
+ * propagated from the callback and on error the walk is terminated.
+ */
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	pgoff_t offset = xe_pt_offset(addr, level, walk);
+	struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL;
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action;
+	struct xe_ptw *child;
+	int err = 0;
+	u64 next;
+
+	do {
+		next = xe_pt_addr_end(addr, end, level, walk);
+		if (walk->shared_pt_mode && xe_pt_covers(addr, next, level,
+							 walk))
+			continue;
+again:
+		action = ACTION_SUBTREE;
+		child = entries ? entries[offset] : NULL;
+		err = ops->pt_entry(parent, offset, level, addr, next,
+				    &child, &action, walk);
+		if (err)
+			break;
+
+		/* Probably not needed yet for gpu pagetable walk. */
+		if (unlikely(action == ACTION_AGAIN))
+			goto again;
+
+		if (likely(!level || !child || action == ACTION_CONTINUE))
+			continue;
+
+		err = xe_pt_walk_range(child, level - 1, addr, next, walk);
+
+		if (!err && ops->pt_post_descend)
+			err = ops->pt_post_descend(parent, offset, level, addr,
+						   next, &child, &action, walk);
+		if (err)
+			break;
+
+	} while (xe_pt_next(&offset, &addr, next, end, level, walk));
+
+	return err;
+}
+
+/**
+ * xe_pt_walk_shared() - Walk shared page tables of a page-table tree.
+ * @parent: Root page table directory.
+ * @level: Level of the root.
+ * @addr: Start address.
+ * @end: Last address + 1.
+ * @walk: Walk info.
+ *
+ * This function is similar to xe_pt_walk_range() but it skips page tables
+ * that are private to the range. Since the root (or @parent) page table is
+ * typically also a shared page table this function is different in that it
+ * calls the pt_entry callback and the post_descend callback also for the
+ * root. The root can be detected in the callbacks by checking whether
+ * parent == *child.
+ * Walking only the shared page tables is common for unbind-type operations
+ * where the page-table entries for an address range are cleared or detached
+ * from the main page-table tree.
+ *
+ * Return: 0 on success, negative error code on error: If a callback
+ * returns an error, the walk will be terminated and the error returned by
+ * this function.
+ */
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action = ACTION_SUBTREE;
+	struct xe_ptw *child = parent;
+	int err;
+
+	walk->shared_pt_mode = true;
+	err = walk->ops->pt_entry(parent, 0, level + 1, addr, end,
+				  &child, &action, walk);
+
+	if (err || action != ACTION_SUBTREE)
+		return err;
+
+	err = xe_pt_walk_range(parent, level, addr, end, walk);
+	if (!err && ops->pt_post_descend) {
+		err = ops->pt_post_descend(parent, 0, level + 1, addr, end,
+					   &child, &action, walk);
+	}
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
new file mode 100644
index 0000000000000..42c51fa601ecb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef __XE_PT_WALK__
+#define __XE_PT_WALK__
+
+#include <linux/pagewalk.h>
+#include <linux/types.h>
+
+struct xe_ptw_dir;
+
+/**
+ * struct xe_ptw - base class for driver pagetable subclassing.
+ * @dir: Pointer to an array of children if any.
+ *
+ * Drivers could subclass this, and if it's a page-directory, typically
+ * embed the xe_ptw_dir::entries array in the same allocation.
+ */
+struct xe_ptw {
+	struct xe_ptw_dir *dir;
+};
+
+/**
+ * struct xe_ptw_dir - page directory structure
+ * @entries: Array holding page directory children.
+ *
+ * It is the responsibility of the user to ensure @entries is
+ * correctly sized.
+ */
+struct xe_ptw_dir {
+	struct xe_ptw *entries[0];
+};
+
+/**
+ * struct xe_pt_walk - Embeddable struct for walk parameters
+ */
+struct xe_pt_walk {
+	/** @ops: The walk ops used for the pagewalk */
+	const struct xe_pt_walk_ops *ops;
+	/**
+	 * @shifts: Array of page-table entry shifts used for the
+	 * different levels, starting out with the leaf level 0
+	 * page-shift as the first entry. It's legal for this pointer to be
+	 * changed during the walk.
+	 */
+	const u64 *shifts;
+	/** @max_level: Highest populated level in @sizes */
+	unsigned int max_level;
+	/**
+	 * @shared_pt_mode: Whether to skip all entries that are private
+	 * to the address range and called only for entries that are
+	 * shared with other address ranges. Such entries are referred to
+	 * as shared pagetables.
+	 */
+	bool shared_pt_mode;
+};
+
+/**
+ * typedef xe_pt_entry_fn - gpu page-table-walk callback-function
+ * @parent: The parent page.table.
+ * @offset: The offset (number of entries) into the page table.
+ * @level: The level of @parent.
+ * @addr: The virtual address.
+ * @next: The virtual address for the next call, or end address.
+ * @child: Pointer to pointer to child page-table at this @offset. The
+ * function may modify the value pointed to if, for example, allocating a
+ * child page table.
+ * @action: The walk action to take upon return. See <linux/pagewalk.h>.
+ * @walk: The walk parameters.
+ */
+typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset,
+			      unsigned int level, u64 addr, u64 next,
+			      struct xe_ptw **child,
+			      enum page_walk_action *action,
+			      struct xe_pt_walk *walk);
+
+/**
+ * struct xe_pt_walk_ops - Walk callbacks.
+ */
+struct xe_pt_walk_ops {
+	/**
+	 * @pt_entry: Callback to be called for each page table entry prior
+	 * to descending to the next level. The returned value of the action
+	 * function parameter is honored.
+	 */
+	xe_pt_entry_fn pt_entry;
+	/**
+	 * @pt_post_descend: Callback to be called for each page table entry
+	 * after return from descending to the next level. The returned value
+	 * of the action function parameter is ignored.
+	 */
+	xe_pt_entry_fn pt_post_descend;
+};
+
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk);
+
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk);
+
+/**
+ * xe_pt_covers - Whether the address range covers an entire entry in @level
+ * @addr: Start of the range.
+ * @end: End of range + 1.
+ * @level: Page table level.
+ * @walk: Page table walk info.
+ *
+ * This function is a helper to aid in determining whether a leaf page table
+ * entry can be inserted at this @level.
+ *
+ * Return: Whether the range provided covers exactly an entry at this level.
+ */
+static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level,
+				const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return end - addr == pt_size && IS_ALIGNED(addr, pt_size);
+}
+
+/**
+ * xe_pt_num_entries: Number of page-table entries of a given range at this
+ * level
+ * @addr: Start address.
+ * @end: End address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The number of page table entries at this level between @start and
+ * @end.
+ */
+static inline pgoff_t
+xe_pt_num_entries(u64 addr, u64 end, unsigned int level,
+		  const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return (round_up(end, pt_size) - round_down(addr, pt_size)) >>
+		walk->shifts[level];
+}
+
+/**
+ * xe_pt_offset: Offset of the page-table entry for a given address.
+ * @addr: The address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The page table entry offset for the given address in a
+ * page table with size indicated by @level.
+ */
+static inline pgoff_t
+xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk)
+{
+	if (level < walk->max_level)
+		addr &= ((1ull << walk->shifts[level + 1]) - 1);
+
+	return addr >> walk->shifts[level];
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
new file mode 100644
index 0000000000000..6e904e97f4568
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <linux/nospec.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_query.h"
+#include "xe_ggtt.h"
+#include "xe_guc_hwconfig.h"
+
+static const enum xe_engine_class xe_to_user_engine_class[] = {
+	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
+	[XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
+	[XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
+	[XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
+};
+
+static size_t calc_hw_engine_info_size(struct xe_device *xe)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+			i++;
+		}
+
+	return i * sizeof(struct drm_xe_engine_class_instance);
+}
+
+static int query_engines(struct xe_device *xe,
+			 struct drm_xe_device_query *query)
+{
+	size_t size = calc_hw_engine_info_size(xe);
+	struct drm_xe_engine_class_instance __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_engine_class_instance *hw_engine_info;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	hw_engine_info = kmalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !hw_engine_info))
+		return -ENOMEM;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+
+			hw_engine_info[i].engine_class =
+				xe_to_user_engine_class[hwe->class];
+			hw_engine_info[i].engine_instance =
+				hwe->logical_instance;
+			hw_engine_info[i++].gt_id = gt->info.id;
+		}
+
+	if (copy_to_user(query_ptr, hw_engine_info, size)) {
+		kfree(hw_engine_info);
+		return -EFAULT;
+	}
+	kfree(hw_engine_info);
+
+	return 0;
+}
+
+static size_t calc_memory_usage_size(struct xe_device *xe)
+{
+	u32 num_managers = 1;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i)
+		if (ttm_manager_type(&xe->ttm, i))
+			num_managers++;
+
+	return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]);
+}
+
+static int query_memory_usage(struct xe_device *xe,
+			      struct drm_xe_device_query *query)
+{
+	size_t size = calc_memory_usage_size(xe);
+	struct drm_xe_query_mem_usage *usage;
+	struct drm_xe_query_mem_usage __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct ttm_resource_manager *man;
+	int ret, i;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	usage = kmalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !usage))
+		return -ENOMEM;
+
+	usage->pad = 0;
+
+	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
+	usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM;
+	usage->regions[0].instance = 0;
+	usage->regions[0].pad = 0;
+	usage->regions[0].min_page_size = PAGE_SIZE;
+	usage->regions[0].max_page_size = PAGE_SIZE;
+	usage->regions[0].total_size = man->size << PAGE_SHIFT;
+	usage->regions[0].used = ttm_resource_manager_usage(man);
+	usage->num_regions = 1;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			usage->regions[usage->num_regions].mem_class =
+				XE_MEM_REGION_CLASS_VRAM;
+			usage->regions[usage->num_regions].instance =
+				usage->num_regions;
+			usage->regions[usage->num_regions].pad = 0;
+			usage->regions[usage->num_regions].min_page_size =
+				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
+				SZ_64K : PAGE_SIZE;
+			usage->regions[usage->num_regions].max_page_size =
+				SZ_1G;
+			usage->regions[usage->num_regions].total_size =
+				man->size;
+			usage->regions[usage->num_regions++].used =
+				ttm_resource_manager_usage(man);
+		}
+	}
+
+	if (!copy_to_user(query_ptr, usage, size))
+		ret = 0;
+	else
+		ret = -ENOSPC;
+
+	kfree(usage);
+	return ret;
+}
+
+static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	u32 num_params = XE_QUERY_CONFIG_NUM_PARAM;
+	size_t size =
+		sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
+	struct drm_xe_query_config __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_config *config;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	config = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !config))
+		return -ENOMEM;
+
+	config->num_params = num_params;
+	config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
+		xe->info.devid | (xe->info.revid << 16);
+	if (to_gt(xe)->mem.vram.size)
+		config->info[XE_QUERY_CONFIG_FLAGS] =
+			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
+	if (xe->info.enable_guc)
+		config->info[XE_QUERY_CONFIG_FLAGS] |=
+			XE_QUERY_CONFIG_FLAGS_USE_GUC;
+	config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] =
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	config->info[XE_QUERY_CONFIG_VA_BITS] = 12 +
+		(9 * (xe->info.vm_max_level + 1));
+	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count;
+	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
+		hweight_long(xe->info.mem_region_mask);
+
+	if (copy_to_user(query_ptr, config, size)) {
+		kfree(config);
+		return -EFAULT;
+	}
+	kfree(config);
+
+	return 0;
+}
+
+static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt;
+	size_t size = sizeof(struct drm_xe_query_gts) +
+		xe->info.tile_count * sizeof(struct drm_xe_query_gt);
+	struct drm_xe_query_gts __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_gts *gts;
+	u8 id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	gts = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !gts))
+		return -ENOMEM;
+
+	gts->num_gt = xe->info.tile_count;
+	for_each_gt(gt, xe, id) {
+		if (id == 0)
+			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
+		else if (xe_gt_is_media_type(gt))
+			gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA;
+		else
+			gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE;
+		gts->gts[id].instance = id;
+		gts->gts[id].clock_freq = gt->info.clock_freq;
+		if (!IS_DGFX(xe))
+			gts->gts[id].native_mem_regions = 0x1;
+		else
+			gts->gts[id].native_mem_regions =
+				BIT(gt->info.vram_id) << 1;
+		gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^
+			gts->gts[id].native_mem_regions;
+	}
+
+	if (copy_to_user(query_ptr, gts, size)) {
+		kfree(gts);
+		return -EFAULT;
+	}
+	kfree(gts);
+
+	return 0;
+}
+
+static int query_hwconfig(struct xe_device *xe,
+			  struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	void *hwconfig;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	hwconfig = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_ERR(xe, !hwconfig))
+		return -ENOMEM;
+
+	xe_device_mem_access_get(xe);
+	xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
+	xe_device_mem_access_put(xe);
+
+	if (copy_to_user(query_ptr, hwconfig, size)) {
+		kfree(hwconfig);
+		return -EFAULT;
+	}
+	kfree(hwconfig);
+
+	return 0;
+}
+
+static size_t calc_topo_query_size(struct xe_device *xe)
+{
+	return xe->info.tile_count *
+		(3 * sizeof(struct drm_xe_query_topology_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+}
+
+static void __user *copy_mask(void __user *ptr,
+			      struct drm_xe_query_topology_mask *topo,
+			      void *mask, size_t mask_size)
+{
+	topo->num_bytes = mask_size;
+
+	if (copy_to_user(ptr, topo, sizeof(*topo)))
+		return ERR_PTR(-EFAULT);
+	ptr += sizeof(topo);
+
+	if (copy_to_user(ptr, mask, mask_size))
+		return ERR_PTR(-EFAULT);
+	ptr += mask_size;
+
+	return ptr;
+}
+
+static int query_gt_topology(struct xe_device *xe,
+			     struct drm_xe_device_query *query)
+{
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	size_t size = calc_topo_query_size(xe);
+	struct drm_xe_query_topology_mask topo;
+	struct xe_gt *gt;
+	int id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	for_each_gt(gt, xe, id) {
+		topo.gt_id = id;
+
+		topo.type = XE_TOPO_DSS_GEOMETRY;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.g_dss_mask,
+				      sizeof(gt->fuse_topo.g_dss_mask));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+
+		topo.type = XE_TOPO_DSS_COMPUTE;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.c_dss_mask,
+				      sizeof(gt->fuse_topo.c_dss_mask));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+
+		topo.type = XE_TOPO_EU_PER_DSS;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.eu_mask_per_dss,
+				      sizeof(gt->fuse_topo.eu_mask_per_dss));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+	}
+
+	return 0;
+}
+
+static int (* const xe_query_funcs[])(struct xe_device *xe,
+				      struct drm_xe_device_query *query) = {
+	query_engines,
+	query_memory_usage,
+	query_config,
+	query_gts,
+	query_hwconfig,
+	query_gt_topology,
+};
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_device_query *query = data;
+	u32 idx;
+
+	if (XE_IOCTL_ERR(xe, query->extensions != 0))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
+	if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx]))
+		return -EINVAL;
+
+	return xe_query_funcs[idx](xe, query);
+}
diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h
new file mode 100644
index 0000000000000..beeb7a8192b48
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_QUERY_H_
+#define _XE_QUERY_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
new file mode 100644
index 0000000000000..16e025dcf2ccc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_reg_sr.h"
+
+#include <linux/align.h>
+#include <linux/string_helpers.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_managed.h>
+
+#include "xe_rtp_types.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+
+#define XE_REG_SR_GROW_STEP_DEFAULT	16
+
+static void reg_sr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_reg_sr *sr = arg;
+
+	xa_destroy(&sr->xa);
+	kfree(sr->pool.arr);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+}
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
+{
+	xa_init(&sr->xa);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+	sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
+	sr->name = name;
+
+	return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
+}
+
+int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
+		      struct xe_reg_sr_kv **dst)
+{
+	struct xe_reg_sr_kv *iter;
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+
+	if (xa_empty(&sr->xa)) {
+		*dst = NULL;
+		return 0;
+	}
+
+	*dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL);
+	if (!*dst)
+		return -ENOMEM;
+
+	iter = *dst;
+	xa_for_each(&sr->xa, idx, entry) {
+		iter->k = idx;
+		iter->v = *entry;
+		iter++;
+	}
+
+	return 0;
+}
+
+static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
+{
+	if (sr->pool.used == sr->pool.allocated) {
+		struct xe_reg_sr_entry *arr;
+
+		arr = krealloc_array(sr->pool.arr,
+				     ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
+				     sizeof(*arr), GFP_KERNEL);
+		if (!arr)
+			return NULL;
+
+		sr->pool.arr = arr;
+		sr->pool.allocated += sr->pool.grow_step;
+	}
+
+	return &sr->pool.arr[sr->pool.used++];
+}
+
+static bool compatible_entries(const struct xe_reg_sr_entry *e1,
+			       const struct xe_reg_sr_entry *e2)
+{
+	/*
+	 * Don't allow overwriting values: clr_bits/set_bits should be disjoint
+	 * when operating in the same register
+	 */
+	if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits ||
+	    e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits)
+		return false;
+
+	if (e1->masked_reg != e2->masked_reg)
+		return false;
+
+	if (e1->reg_type != e2->reg_type)
+		return false;
+
+	return true;
+}
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+		  const struct xe_reg_sr_entry *e)
+{
+	unsigned long idx = reg;
+	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
+	int ret;
+
+	if (pentry) {
+		if (!compatible_entries(pentry, e)) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		pentry->clr_bits |= e->clr_bits;
+		pentry->set_bits |= e->set_bits;
+		pentry->read_mask |= e->read_mask;
+
+		return 0;
+	}
+
+	pentry = alloc_entry(sr);
+	if (!pentry) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	*pentry = *e;
+	ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
+	if (ret)
+		goto fail;
+
+	return 0;
+
+fail:
+	DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n",
+		  idx, e->clr_bits, e->set_bits,
+		  str_yes_no(e->masked_reg), ret);
+
+	return ret;
+}
+
+static void apply_one_mmio(struct xe_gt *gt, u32 reg,
+			   struct xe_reg_sr_entry *entry)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 val;
+
+	/*
+	 * If this is a masked register, need to figure what goes on the upper
+	 * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or
+	 * the set_bits, when using SET.
+	 *
+	 * When it's not masked, we have to read it from hardware, unless we are
+	 * supposed to set all bits.
+	 */
+	if (entry->masked_reg)
+		val = (entry->clr_bits ?: entry->set_bits << 16);
+	else if (entry->clr_bits + 1)
+		val = (entry->reg_type == XE_RTP_REG_MCR ?
+		       xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) :
+		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+	else
+		val = 0;
+
+	/*
+	 * TODO: add selftest to validate all tables, regardless of platform:
+	 *   - Masked registers can't have set_bits with upper bits set
+	 *   - set_bits must be contained in clr_bits
+	 */
+	val |= entry->set_bits;
+
+	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val);
+
+	if (entry->reg_type == XE_RTP_REG_MCR)
+		xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val);
+	else
+		xe_mmio_write32(gt, reg, val);
+}
+
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	int err;
+
+	drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	xa_for_each(&sr->xa, reg, entry)
+		apply_one_mmio(gt, reg, entry);
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
+
+void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
+			       struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	unsigned int slot = 0;
+	int err;
+
+	drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	xa_for_each(&sr->xa, reg, entry) {
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+				reg | entry->set_bits);
+		slot++;
+	}
+
+	/* And clear the rest just in case of garbage */
+	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++)
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+				RING_NOPID(mmio_base).reg);
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
new file mode 100644
index 0000000000000..c3a9db251e925
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_
+#define _XE_REG_SR_
+
+#include "xe_reg_sr_types.h"
+
+/*
+ * Reg save/restore bookkeeping
+ */
+
+struct xe_device;
+struct xe_gt;
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
+int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
+		      struct xe_reg_sr_kv **dst);
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+		  const struct xe_reg_sr_entry *e);
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
+void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
+			       struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
new file mode 100644
index 0000000000000..2fa7ff3966ba7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_TYPES_
+#define _XE_REG_SR_TYPES_
+
+#include <linux/xarray.h>
+#include <linux/types.h>
+
+#include "i915_reg_defs.h"
+
+struct xe_reg_sr_entry {
+	u32		clr_bits;
+	u32		set_bits;
+	/* Mask for bits to consider when reading value back */
+	u32		read_mask;
+	/*
+	 * "Masked registers" are marked in spec as register with the upper 16
+	 * bits as a mask for the bits that is being updated on the lower 16
+	 * bits when writing to it.
+	 */
+	u8		masked_reg;
+	u8		reg_type;
+};
+
+struct xe_reg_sr_kv {
+	u32			k;
+	struct xe_reg_sr_entry	v;
+};
+
+struct xe_reg_sr {
+	struct {
+		struct xe_reg_sr_entry *arr;
+		unsigned int used;
+		unsigned int allocated;
+		unsigned int grow_step;
+	} pool;
+	struct xarray xa;
+	const char *name;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
new file mode 100644
index 0000000000000..2e0c87b723952
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_reg_whitelist.h"
+
+#include "xe_platform_types.h"
+#include "xe_gt_types.h"
+#include "xe_rtp.h"
+
+#include "../i915/gt/intel_engine_regs.h"
+#include "../i915/gt/intel_gt_regs.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+static bool match_not_render(const struct xe_gt *gt,
+			     const struct xe_hw_engine *hwe)
+{
+	return hwe->class != XE_ENGINE_CLASS_RENDER;
+}
+
+static const struct xe_rtp_entry register_whitelist[] = {
+	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT,
+				RING_FORCE_TO_NONPRIV_ACCESS_RD |
+				RING_FORCE_TO_NONPRIV_RANGE_4)
+	},
+	{ XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0)
+	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0)
+	},
+	{ XE_RTP_NAME("allow_read_ctx_timestamp"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
+	  XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0),
+				RING_FORCE_TO_NONPRIV_ACCESS_RD,
+				XE_RTP_FLAG(ENGINE_BASE))
+	},
+	{ XE_RTP_NAME("16014440446_part_1"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_WHITELIST_REGISTER(_MMIO(0x4400),
+				RING_FORCE_TO_NONPRIV_DENY |
+				RING_FORCE_TO_NONPRIV_RANGE_64)
+	},
+	{ XE_RTP_NAME("16014440446_part_2"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_WHITELIST_REGISTER(_MMIO(0x4500),
+				RING_FORCE_TO_NONPRIV_DENY |
+				RING_FORCE_TO_NONPRIV_RANGE_64)
+	},
+	{}
+};
+
+/**
+ * xe_reg_whitelist_process_engine - process table of registers to whitelist
+ * @hwe: engine instance to process whitelist for
+ *
+ * Process wwhitelist table for this platform, saving in @hwe all the
+ * registers that need to be whitelisted by the hardware so they can be accessed
+ * by userspace.
+ */
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
new file mode 100644
index 0000000000000..6e861b1bdb01e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_WHITELIST_
+#define _XE_REG_WHITELIST_
+
+struct xe_hw_engine;
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
new file mode 100644
index 0000000000000..f54409850d749
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XE_RES_CURSOR_H__
+#define __XE_RES_CURSOR_H__
+
+#include <linux/scatterlist.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
+
+/* state back for walking over vram_mgr and gtt_mgr allocations */
+struct xe_res_cursor {
+	u64 start;
+	u64 size;
+	u64 remaining;
+	void *node;
+	u32 mem_type;
+	struct scatterlist *sgl;
+};
+
+/**
+ * xe_res_first - initialize a xe_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first(struct ttm_resource *res,
+				u64 start, u64 size,
+				struct xe_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
+
+	cur->sgl = NULL;
+	if (!res)
+		goto fallback;
+
+	XE_BUG_ON(start + size > res->size);
+
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto fallback;
+
+		while (start >= xe_ttm_vram_mgr_block_size(block)) {
+			start -= xe_ttm_vram_mgr_block_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block,
+						   link);
+		}
+
+		cur->start = xe_ttm_vram_mgr_block_start(block) + start;
+		cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+				size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	default:
+		goto fallback;
+	}
+
+	return;
+
+fallback:
+	cur->start = start;
+	cur->size = size;
+	cur->remaining = size;
+	cur->node = NULL;
+	cur->mem_type = XE_PL_TT;
+	XE_WARN_ON(res && start + size > res->size);
+	return;
+}
+
+static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
+{
+	struct scatterlist *sgl = cur->sgl;
+	u64 start = cur->start;
+
+	while (start >= sg_dma_len(sgl)) {
+		start -= sg_dma_len(sgl);
+		sgl = sg_next(sgl);
+		XE_BUG_ON(!sgl);
+	}
+
+	cur->start = start;
+	cur->size = sg_dma_len(sgl) - start;
+	cur->sgl = sgl;
+}
+
+/**
+ * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table
+ *
+ * @sg: scatter gather table to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first_sg(const struct sg_table *sg,
+				   u64 start, u64 size,
+				   struct xe_res_cursor *cur)
+{
+	XE_BUG_ON(!sg);
+	XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
+		  !IS_ALIGNED(size, PAGE_SIZE));
+	cur->node = NULL;
+	cur->start = start;
+	cur->remaining = size;
+	cur->size = 0;
+	cur->sgl = sg->sgl;
+	cur->mem_type = XE_PL_TT;
+	__xe_res_sg_next(cur);
+}
+
+/**
+ * xe_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
+{
+	struct drm_buddy_block *block;
+	struct list_head *next;
+	u64 start;
+
+	XE_BUG_ON(size > cur->remaining);
+
+	cur->remaining -= size;
+	if (!cur->remaining)
+		return;
+
+	if (cur->size > size) {
+		cur->size -= size;
+		cur->start += size;
+		return;
+	}
+
+	if (cur->sgl) {
+		cur->start += size;
+		__xe_res_sg_next(cur);
+		return;
+	}
+
+	switch (cur->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		start = size - cur->size;
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+
+		while (start >= xe_ttm_vram_mgr_block_size(block)) {
+			start -= xe_ttm_vram_mgr_block_size(block);
+
+			next = block->link.next;
+			block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = xe_ttm_vram_mgr_block_start(block) + start;
+		cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+				cur->remaining);
+		cur->node = block;
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * xe_res_dma - return dma address of cursor at current position
+ *
+ * @cur: the cursor to return the dma address from
+ */
+static inline u64 xe_res_dma(const struct xe_res_cursor *cur)
+{
+	return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
new file mode 100644
index 0000000000000..fda7978a63e04
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_engine_types.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_ring_ops.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | BIT(8) | state;
+}
+
+static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i)
+{
+	dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
+	dw[i++] = addr + gt->mmio.adj_offset;
+	dw[i++] = AUX_INV;
+	dw[i++] = MI_NOOP;
+
+	return i;
+}
+
+static int emit_user_interrupt(u32 *dw, int i)
+{
+	dw[i++] = MI_USER_INTERRUPT;
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	dw[i++] = MI_ARB_CHECK;
+
+	return i;
+}
+
+static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+	dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
+{
+	dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag;
+	dw[i++] = lower_32_bits(batch_addr);
+	dw[i++] = upper_32_bits(batch_addr);
+
+	return i;
+}
+
+static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
+{
+	dw[i] = MI_FLUSH_DW + 1;
+	dw[i] |= flag;
+	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+		MI_FLUSH_DW_STORE_INDEX;
+
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = ~0U;
+
+	return i;
+}
+
+static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
+{
+	u32 flags = PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+		PIPE_CONTROL_TLB_INVALIDATE |
+		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+		PIPE_CONTROL_VF_CACHE_INVALIDATE |
+		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+		PIPE_CONTROL_QW_WRITE |
+		PIPE_CONTROL_STORE_DATA_INDEX;
+
+	flags &= ~mask_flags;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = flags;
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
+#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
+
+static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
+				       u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED;
+	dw[i++] = lower_32_bits(addr);
+	dw[i++] = upper_32_bits(addr);
+	dw[i++] = lower_32_bits(value);
+	dw[i++] = upper_32_bits(value);
+
+	return i;
+}
+
+static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
+			      int i)
+{
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
+		   PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
+		PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+	dw[i++] = 0; /* We're thrashing one extra dword. */
+
+	return i;
+}
+
+static u32 get_ppgtt_flag(struct xe_sched_job *job)
+{
+	return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
+}
+
+static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
+				  u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+
+	/* XXX: Conditional flushing possible */
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(0, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
+				   u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->engine->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+
+	/* XXX: Conditional flushing possible */
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i);
+	/* Wa_1809175790 */
+	if (!xe->info.has_flat_ccs) {
+		if (decode)
+			i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i);
+		else
+			i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i);
+	}
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
+		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+		PIPE_CONTROL_TILE_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_STALL | \
+		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+		PIPE_CONTROL_PSD_SYNC | \
+		PIPE_CONTROL_AMFS_FLUSH | \
+		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+		PIPE_CONTROL_3D_ENGINE_FLAGS | \
+		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+		PIPE_CONTROL_FLUSH_ENABLE | \
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+		PIPE_CONTROL_DC_FLUSH_ENABLE)
+
+static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
+					    struct xe_lrc *lrc,
+					    u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->engine->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool pvc = xe->info.platform == XE_PVC;
+	u32 mask_flags = 0;
+
+	/* XXX: Conditional flushing possible */
+	dw[i++] = preparser_disable(true);
+	if (pvc)
+		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
+	i = emit_pipe_invalidate(mask_flags, dw, i);
+	/* Wa_1809175790 */
+	if (!xe->info.has_flat_ccs)
+		i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_migration_job_gen12(struct xe_sched_job *job,
+				     struct xe_lrc *lrc, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
+
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(0, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
+
+	dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
+		   MI_FLUSH_DW_OP_STOREDW) + 1;
+	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = seqno; /* value */
+
+	i = emit_user_interrupt(dw, i);
+
+	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_job_gen12_copy(struct xe_sched_job *job)
+{
+	int i;
+
+	if (xe_sched_job_is_migration(job->engine)) {
+		emit_migration_job_gen12(job, job->engine->lrc,
+					 xe_sched_job_seqno(job));
+		return;
+	}
+
+	for (i = 0; i < job->engine->width; ++i)
+		__emit_job_gen12_copy(job, job->engine->lrc + i,
+				      job->batch_addr[i],
+				      xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_video(struct xe_sched_job *job)
+{
+	int i;
+
+	/* FIXME: Not doing parallel handshake for now */
+	for (i = 0; i < job->engine->width; ++i)
+		__emit_job_gen12_video(job, job->engine->lrc + i,
+				       job->batch_addr[i],
+				       xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_render_compute(struct xe_sched_job *job)
+{
+	int i;
+
+	for (i = 0; i < job->engine->width; ++i)
+		__emit_job_gen12_render_compute(job, job->engine->lrc + i,
+						job->batch_addr[i],
+						xe_sched_job_seqno(job));
+}
+
+static const struct xe_ring_ops ring_ops_gen12_copy = {
+	.emit_job = emit_job_gen12_copy,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_video = {
+	.emit_job = emit_job_gen12_video,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_render_compute = {
+	.emit_job = emit_job_gen12_render_compute,
+};
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_COPY:
+		return &ring_ops_gen12_copy;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return &ring_ops_gen12_video;
+	case XE_ENGINE_CLASS_RENDER:
+	case XE_ENGINE_CLASS_COMPUTE:
+		return &ring_ops_gen12_render_compute;
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h
new file mode 100644
index 0000000000000..e942735d76a61
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_H_
+#define _XE_RING_OPS_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_ring_ops_types.h"
+
+struct xe_gt;
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
new file mode 100644
index 0000000000000..1ae56e2ee7b46
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_TYPES_H_
+#define _XE_RING_OPS_TYPES_H_
+
+struct xe_sched_job;
+
+#define MAX_JOB_SIZE_DW 48
+#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
+
+/**
+ * struct xe_ring_ops - Ring operations
+ */
+struct xe_ring_ops {
+	/** @emit_job: Write job to ring */
+	void (*emit_job)(struct xe_sched_job *job);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
new file mode 100644
index 0000000000000..9e8d0e43c6438
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_rtp.h"
+
+#include <drm/xe_drm.h>
+
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_reg_sr.h"
+
+/**
+ * DOC: Register Table Processing
+ *
+ * Internal infrastructure to define how registers should be updated based on
+ * rules and actions. This can be used to define tables with multiple entries
+ * (one per register) that will be walked over at some point in time to apply
+ * the values to the registers that have matching rules.
+ */
+
+static bool rule_matches(struct xe_gt *gt,
+			 struct xe_hw_engine *hwe,
+			 const struct xe_rtp_entry *entry)
+{
+	const struct xe_device *xe = gt_to_xe(gt);
+	const struct xe_rtp_rule *r;
+	unsigned int i;
+	bool match;
+
+	for (r = entry->rules, i = 0; i < entry->n_rules;
+	     r = &entry->rules[++i]) {
+		switch (r->match_type) {
+		case XE_RTP_MATCH_PLATFORM:
+			match = xe->info.platform == r->platform;
+			break;
+		case XE_RTP_MATCH_SUBPLATFORM:
+			match = xe->info.platform == r->platform &&
+				xe->info.subplatform == r->subplatform;
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION:
+			/* TODO: match display */
+			match = xe->info.graphics_verx100 == r->ver_start;
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
+			match = xe->info.graphics_verx100 >= r->ver_start &&
+				xe->info.graphics_verx100 <= r->ver_end;
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION:
+			match = xe->info.media_verx100 == r->ver_start;
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
+			match = xe->info.media_verx100 >= r->ver_start &&
+				xe->info.media_verx100 <= r->ver_end;
+			break;
+		case XE_RTP_MATCH_STEP:
+			/* TODO: match media/display */
+			match = xe->info.step.graphics >= r->step_start &&
+				xe->info.step.graphics < r->step_end;
+			break;
+		case XE_RTP_MATCH_ENGINE_CLASS:
+			match = hwe->class == r->engine_class;
+			break;
+		case XE_RTP_MATCH_NOT_ENGINE_CLASS:
+			match = hwe->class != r->engine_class;
+			break;
+		case XE_RTP_MATCH_FUNC:
+			match = r->match_func(gt, hwe);
+			break;
+		case XE_RTP_MATCH_INTEGRATED:
+			match = !xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_DISCRETE:
+			match = xe->info.is_dgfx;
+			break;
+
+		default:
+			XE_WARN_ON(r->match_type);
+		}
+
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
+			     struct xe_gt *gt,
+			     u32 mmio_base,
+			     struct xe_reg_sr *sr)
+{
+	u32 reg = entry->regval.reg + mmio_base;
+	struct xe_reg_sr_entry sr_entry = {
+		.clr_bits = entry->regval.clr_bits,
+		.set_bits = entry->regval.set_bits,
+		.read_mask = entry->regval.read_mask,
+		.masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG,
+		.reg_type = entry->regval.reg_type,
+	};
+
+	xe_reg_sr_add(sr, reg, &sr_entry);
+}
+
+/**
+ * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
+ * @entries: Table with RTP definitions
+ * @sr: Where to add an entry to with the values for matching. This can be
+ *      viewed as the "coalesced view" of multiple the tables. The bits for each
+ *      register set are expected not to collide with previously added entries
+ * @gt: The GT to be used for matching rules
+ * @hwe: Engine instance to use for matching rules and as mmio base
+ *
+ * Walk the table pointed by @entries (with an empty sentinel) and add all
+ * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
+ * used to calculate the right register offset
+ */
+void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
+		    struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	const struct xe_rtp_entry *entry;
+
+	for (entry = entries; entry && entry->name; entry++) {
+		u32 mmio_base = 0;
+
+		if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) {
+			struct xe_hw_engine *each_hwe;
+			enum xe_hw_engine_id id;
+
+			for_each_hw_engine(each_hwe, gt, id) {
+				mmio_base = each_hwe->mmio_base;
+
+				if (rule_matches(gt, each_hwe, entry))
+					rtp_add_sr_entry(entry, gt, mmio_base, sr);
+			}
+		} else if (rule_matches(gt, hwe, entry)) {
+			if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE)
+				mmio_base = hwe->mmio_base;
+
+			rtp_add_sr_entry(entry, gt, mmio_base, sr);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
new file mode 100644
index 0000000000000..d4e11fdde77fd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_
+#define _XE_RTP_
+
+#include <linux/xarray.h>
+#include <linux/types.h>
+
+#include "xe_rtp_types.h"
+
+#include "i915_reg_defs.h"
+
+/*
+ * Register table poke infrastructure
+ */
+
+struct xe_hw_engine;
+struct xe_gt;
+struct xe_reg_sr;
+
+/*
+ * Helper macros - not to be used outside this header.
+ */
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x)
+#define __CALL_FOR_EACH_2(MACRO_, x, ...)					\
+	MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__)
+#define __CALL_FOR_EACH_3(MACRO_, x, ...)					\
+	MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__)
+#define __CALL_FOR_EACH_4(MACRO_, x, ...)					\
+	MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__)
+
+#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...)					\
+	CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__)
+#define CALL_FOR_EACH(MACRO_, x, ...)						\
+	_CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__)
+
+#define _XE_RTP_REG(x_)	(x_),						\
+			.reg_type = XE_RTP_REG_REGULAR
+#define _XE_RTP_MCR_REG(x_) (x_),					\
+			    .reg_type = XE_RTP_REG_MCR
+
+/*
+ * Helper macros for concatenating prefix - do not use them directly outside
+ * this header
+ */
+#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) |
+#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
+
+/*
+ * Macros to encode rules to match against platform, IP version, stepping, etc.
+ * Shouldn't be used directly - see XE_RTP_RULES()
+ */
+
+#define _XE_RTP_RULE_PLATFORM(plat__)						\
+	{ .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ }
+
+#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__)					\
+	{ .match_type = XE_RTP_MATCH_SUBPLATFORM,				\
+	  .platform = plat__, .subplatform = sub__ }
+
+#define _XE_RTP_RULE_STEP(start__, end__)					\
+	{ .match_type = XE_RTP_MATCH_STEP,					\
+	  .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_ENGINE_CLASS(cls__)					\
+	{ .match_type = XE_RTP_MATCH_ENGINE_CLASS,				\
+	  .engine_class = (cls__) }
+
+/**
+ * XE_RTP_RULE_PLATFORM - Create rule matching platform
+ * @plat_: platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_PLATFORM(plat_)						\
+	_XE_RTP_RULE_PLATFORM(XE_##plat_)
+
+/**
+ * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform
+ * @plat_: platform to match
+ * @sub_: sub-platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_)					\
+	_XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
+
+/**
+ * XE_RTP_RULE_STEP - Create rule matching platform stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on
+ * the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_STEP(start_, end_)						\
+	_XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
+ * @cls_: Engine class to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_ENGINE_CLASS(cls_)						\
+	_XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_)
+
+/**
+ * XE_RTP_RULE_FUNC - Create rule using callback function for match
+ * @func__: Function to call to decide if rule matches
+ *
+ * This allows more complex checks to be performed. The ``XE_RTP``
+ * infrastructure will simply call the function @func_ passed to decide if this
+ * rule matches the device.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_FUNC(func__)						\
+	{ .match_type = XE_RTP_MATCH_FUNC,					\
+	  .match_func = (func__) }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version
+ * @ver_start__: First graphics IP version to match
+ * @ver_end__: Last graphics IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__)		\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version
+ * @ver_start__: First media IP version to match
+ * @ver_end__: Last media IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__)			\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_INTEGRATED						\
+	{ .match_type = XE_RTP_MATCH_INTEGRATED }
+
+/**
+ * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_DISCRETE							\
+	{ .match_type = XE_RTP_MATCH_DISCRETE }
+
+/**
+ * XE_RTP_WR - Helper to write a value to the register, overriding all the bits
+ * @reg_: Register
+ * @val_: Value to set
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * The correspondent notation in bspec is:
+ *
+ *	REGNAME = VALUE
+ */
+#define XE_RTP_WR(reg_, val_, ...)						\
+	.regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_),		\
+		    .read_mask = (~0u), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_SET - Set bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to set in the register
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 1
+ *	REGNAME[5] = 1
+ */
+#define XE_RTP_SET(reg_, val_, ...)						\
+	.regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_),	\
+		    .read_mask = (val_), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_CLR: Clear bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to clear in the register
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 0
+ *	REGNAME[5] = 0
+ */
+#define XE_RTP_CLR(reg_, val_, ...)						\
+	.regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0,		\
+		    .read_mask = (val_), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in
+ * @reg_: Register
+ * @mask_bits_: Mask of bits to be changed in the register, forming a field
+ * @val_: Value to set in the field denoted by @mask_bits_
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is:
+ *
+ *	REGNAME[<end>:<start>] = VALUE
+ */
+#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...)				\
+	.regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+		    .read_mask = (mask_bits_), ##__VA_ARGS__ }
+
+#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)		\
+	.regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+		    .read_mask = 0, ##__VA_ARGS__ }
+
+/**
+ * XE_WHITELIST_REGISTER - Add register to userspace whitelist
+ * @reg_: Register
+ * @flags_: Whitelist-specific flags to set
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * Add a register to the whitelist, allowing userspace to modify the ster with
+ * regular user privileges.
+ */
+#define XE_WHITELIST_REGISTER(reg_, flags_, ...)				\
+	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
+	.regval = { .reg = reg_, .set_bits = (flags_),			\
+		    .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,		\
+		    ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_NAME - Helper to set the name in xe_rtp_entry
+ * @s_: Name describing this rule, often a HW-specific number
+ *
+ * TODO: maybe move this behind a debug config?
+ */
+#define XE_RTP_NAME(s_)	.name = (s_)
+
+/**
+ * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry
+ * @f1_: Last part of a ``XE_RTP_FLAG_*``
+ * @...: Additional flags, defined like @f1_
+ *
+ * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be
+ * easily used to define struct xe_rtp_regval entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_FLAG(f1_, ...)							\
+	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+
+/**
+ * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
+ * @r1: Last part of XE_RTP_MATCH_*
+ * @...: Additional rules, defined like @r1
+ *
+ * At least one rule is needed and up to 4 are supported. Multiple rules are
+ * AND'ed together, i.e. all the rules must evaluate to true for the entry to
+ * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_RULES(r1, ...)							\
+	.n_rules = COUNT_ARGS(r1, ##__VA_ARGS__),				\
+	.rules = (struct xe_rtp_rule[]) {					\
+		CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__)	\
+	}
+
+void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
+		    struct xe_gt *gt, struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
new file mode 100644
index 0000000000000..b55b556a24958
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_TYPES_
+#define _XE_RTP_TYPES_
+
+#include <linux/types.h>
+
+#include "i915_reg_defs.h"
+
+struct xe_hw_engine;
+struct xe_gt;
+
+enum {
+	XE_RTP_REG_REGULAR,
+	XE_RTP_REG_MCR,
+};
+
+/**
+ * struct xe_rtp_regval - register and value for rtp table
+ */
+struct xe_rtp_regval {
+	/** @reg: Register */
+	u32		reg;
+	/*
+	 * TODO: maybe we need a union here with a func pointer for cases
+	 * that are too specific to be generalized
+	 */
+	/** @clr_bits: bits to clear when updating register */
+	u32		clr_bits;
+	/** @set_bits: bits to set when updating register */
+	u32		set_bits;
+#define XE_RTP_NOCHECK		.read_mask = 0
+	/** @read_mask: mask for bits to consider when reading value back */
+	u32		read_mask;
+#define XE_RTP_FLAG_FOREACH_ENGINE	BIT(0)
+#define XE_RTP_FLAG_MASKED_REG		BIT(1)
+#define XE_RTP_FLAG_ENGINE_BASE		BIT(2)
+	/** @flags: flags to apply on rule evaluation or action */
+	u8		flags;
+	/** @reg_type: register type, see ``XE_RTP_REG_*`` */
+	u8		reg_type;
+};
+
+enum {
+	XE_RTP_MATCH_PLATFORM,
+	XE_RTP_MATCH_SUBPLATFORM,
+	XE_RTP_MATCH_GRAPHICS_VERSION,
+	XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
+	XE_RTP_MATCH_MEDIA_VERSION,
+	XE_RTP_MATCH_MEDIA_VERSION_RANGE,
+	XE_RTP_MATCH_INTEGRATED,
+	XE_RTP_MATCH_DISCRETE,
+	XE_RTP_MATCH_STEP,
+	XE_RTP_MATCH_ENGINE_CLASS,
+	XE_RTP_MATCH_NOT_ENGINE_CLASS,
+	XE_RTP_MATCH_FUNC,
+};
+
+/** struct xe_rtp_rule - match rule for processing entry */
+struct xe_rtp_rule {
+	u8 match_type;
+
+	/* match filters */
+	union {
+		/* MATCH_PLATFORM / MATCH_SUBPLATFORM */
+		struct {
+			u8 platform;
+			u8 subplatform;
+		};
+		/*
+		 * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE /
+		 * MATCH_MEDIA_VERSION  / XE_RTP_MATCH_MEDIA_VERSION_RANGE
+		 */
+		struct {
+			u32 ver_start;
+#define XE_RTP_END_VERSION_UNDEFINED	U32_MAX
+			u32 ver_end;
+		};
+		/* MATCH_STEP */
+		struct {
+			u8 step_start;
+			u8 step_end;
+		};
+		/* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */
+		struct {
+			u8 engine_class;
+		};
+		/* MATCH_FUNC */
+		bool (*match_func)(const struct xe_gt *gt,
+				   const struct xe_hw_engine *hwe);
+	};
+};
+
+/** struct xe_rtp_entry - Entry in an rtp table */
+struct xe_rtp_entry {
+	const char *name;
+	const struct xe_rtp_regval regval;
+	const struct xe_rtp_rule *rules;
+	unsigned int n_rules;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
new file mode 100644
index 0000000000000..7403410cd8064
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_sa.h"
+
+static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_sa_manager *sa_manager = arg;
+	struct xe_bo *bo = sa_manager->bo;
+
+	if (!bo) {
+		drm_err(drm, "no bo for sa manager\n");
+		return;
+	}
+
+	drm_suballoc_manager_fini(&sa_manager->base);
+
+	if (bo->vmap.is_iomem)
+		kvfree(sa_manager->cpu_ptr);
+
+	xe_bo_unpin_map_no_vm(bo);
+	sa_manager->bo = NULL;
+}
+
+int xe_sa_bo_manager_init(struct xe_gt *gt,
+			  struct xe_sa_manager *sa_manager,
+			  u32 size, u32 align)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 managed_size = size - SZ_4K;
+	struct xe_bo *bo;
+
+	sa_manager->bo = NULL;
+
+	bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
+			PTR_ERR(bo));
+		return PTR_ERR(bo);
+	}
+	sa_manager->bo = bo;
+
+	drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
+	sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
+
+	if (bo->vmap.is_iomem) {
+		sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
+		if (!sa_manager->cpu_ptr) {
+			xe_bo_unpin_map_no_vm(sa_manager->bo);
+			sa_manager->bo = NULL;
+			return -ENOMEM;
+		}
+	} else {
+		sa_manager->cpu_ptr = bo->vmap.vaddr;
+		memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
+	}
+
+	return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
+					sa_manager);
+}
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  unsigned size)
+{
+	return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
+}
+
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
+{
+	struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+	struct xe_device *xe = gt_to_xe(sa_manager->bo->gt);
+
+	if (!sa_manager->bo->vmap.is_iomem)
+		return;
+
+	xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo),
+			 xe_sa_bo_cpu_addr(sa_bo),
+			 drm_suballoc_size(sa_bo));
+}
+
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence)
+{
+	drm_suballoc_free(sa_bo, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
new file mode 100644
index 0000000000000..742282ef71792
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_H_
+#define _XE_SA_H_
+
+#include "xe_sa_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_gt;
+
+int xe_sa_bo_manager_init(struct xe_gt *gt,
+			  struct xe_sa_manager *sa_manager,
+			  u32 size, u32 align);
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  u32 size);
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo);
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence);
+
+static inline struct xe_sa_manager *
+to_xe_sa_manager(struct drm_suballoc_manager *mng)
+{
+	return container_of(mng, struct xe_sa_manager, base);
+}
+
+static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->gpu_addr +
+		drm_suballoc_soffset(sa);
+}
+
+static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->cpu_ptr +
+		drm_suballoc_soffset(sa);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
new file mode 100644
index 0000000000000..2ef896aeca1d4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_TYPES_H_
+#define _XE_SA_TYPES_H_
+
+#include <drm/drm_suballoc.h>
+
+struct xe_bo;
+
+struct xe_sa_manager {
+	struct drm_suballoc_manager base;
+	struct xe_bo *bo;
+	u64 gpu_addr;
+	void *cpu_ptr;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
new file mode 100644
index 0000000000000..ab81bfe17e8a4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sched_job.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/slab.h>
+
+#include "xe_device_types.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct kmem_cache *xe_sched_job_slab;
+static struct kmem_cache *xe_sched_job_parallel_slab;
+
+int __init xe_sched_job_module_init(void)
+{
+	xe_sched_job_slab =
+		kmem_cache_create("xe_sched_job",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64), 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_slab)
+		return -ENOMEM;
+
+	xe_sched_job_parallel_slab =
+		kmem_cache_create("xe_sched_job_parallel",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64) *
+				  XE_HW_ENGINE_MAX_INSTANCE , 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_parallel_slab) {
+		kmem_cache_destroy(xe_sched_job_slab);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void xe_sched_job_module_exit(void)
+{
+	kmem_cache_destroy(xe_sched_job_slab);
+	kmem_cache_destroy(xe_sched_job_parallel_slab);
+}
+
+static struct xe_sched_job *job_alloc(bool parallel)
+{
+	return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
+				 xe_sched_job_slab, GFP_KERNEL);
+}
+
+bool xe_sched_job_is_migration(struct xe_engine *e)
+{
+	return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) &&
+		!(e->flags & ENGINE_FLAG_WA);
+}
+
+static void job_free(struct xe_sched_job *job)
+{
+	struct xe_engine *e = job->engine;
+	bool is_migration = xe_sched_job_is_migration(e);
+
+	kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ?
+			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
+}
+
+struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+					 u64 *batch_addr)
+{
+	struct xe_sched_job *job;
+	struct dma_fence **fences;
+	bool is_migration = xe_sched_job_is_migration(e);
+	int err;
+	int i, j;
+	u32 width;
+
+	/* Migration and kernel engines have their own locking */
+	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM |
+			  ENGINE_FLAG_WA))) {
+		lockdep_assert_held(&e->vm->lock);
+		if (!xe_vm_no_dma_fences(e->vm))
+			xe_vm_assert_held(e->vm);
+	}
+
+	job = job_alloc(xe_engine_is_parallel(e) || is_migration);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	job->engine = e;
+	kref_init(&job->refcount);
+	xe_engine_get(job->engine);
+
+	err = drm_sched_job_init(&job->drm, e->entity, 1, NULL);
+	if (err)
+		goto err_free;
+
+	if (!xe_engine_is_parallel(e)) {
+		job->fence = xe_lrc_create_seqno_fence(e->lrc);
+		if (IS_ERR(job->fence)) {
+			err = PTR_ERR(job->fence);
+			goto err_sched_job;
+		}
+	} else {
+		struct dma_fence_array *cf;
+
+		fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			err = -ENOMEM;
+			goto err_sched_job;
+		}
+
+		for (j = 0; j < e->width; ++j) {
+			fences[j] = xe_lrc_create_seqno_fence(e->lrc + j);
+			if (IS_ERR(fences[j])) {
+				err = PTR_ERR(fences[j]);
+				goto err_fences;
+			}
+		}
+
+		cf = dma_fence_array_create(e->width, fences,
+					    e->parallel.composite_fence_ctx,
+					    e->parallel.composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--e->parallel.composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+
+		/* Sanity check */
+		for (j = 0; j < e->width; ++j)
+			XE_BUG_ON(cf->base.seqno != fences[j]->seqno);
+
+		job->fence = &cf->base;
+	}
+
+	width = e->width;
+	if (is_migration)
+		width = 2;
+
+	for (i = 0; i < width; ++i)
+		job->batch_addr[i] = batch_addr[i];
+
+	trace_xe_sched_job_create(job);
+	return job;
+
+err_fences:
+	for (j = j - 1; j >= 0; --j) {
+		--e->lrc[j].fence_ctx.next_seqno;
+		dma_fence_put(fences[j]);
+	}
+	kfree(fences);
+err_sched_job:
+	drm_sched_job_cleanup(&job->drm);
+err_free:
+	xe_engine_put(e);
+	job_free(job);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_sched_job_destroy - Destroy XE schedule job
+ * @ref: reference to XE schedule job
+ *
+ * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
+ * base DRM schedule job, and free memory for XE schedule job.
+ */
+void xe_sched_job_destroy(struct kref *ref)
+{
+	struct xe_sched_job *job =
+		container_of(ref, struct xe_sched_job, refcount);
+
+	xe_engine_put(job->engine);
+	dma_fence_put(job->fence);
+	drm_sched_job_cleanup(&job->drm);
+	job_free(job);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
+		return;
+
+	dma_fence_set_error(job->fence, error);
+
+	if (dma_fence_is_array(job->fence)) {
+		struct dma_fence_array *array =
+			to_dma_fence_array(job->fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &current_fence->flags))
+				continue;
+			dma_fence_set_error(current_fence, error);
+		} while (--nchild);
+	}
+
+	trace_xe_sched_job_set_error(job);
+
+	dma_fence_enable_sw_signaling(job->fence);
+	xe_hw_fence_irq_run(job->engine->fence_irq);
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->engine->lrc;
+
+	return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job);
+}
+
+bool xe_sched_job_completed(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->engine->lrc;
+
+	/*
+	 * Can safely check just LRC[0] seqno as that is last seqno written when
+	 * parallel handshake is done.
+	 */
+
+	return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job);
+}
+
+void xe_sched_job_arm(struct xe_sched_job *job)
+{
+	drm_sched_job_arm(&job->drm);
+}
+
+void xe_sched_job_push(struct xe_sched_job *job)
+{
+	xe_sched_job_get(job);
+	trace_xe_sched_job_exec(job);
+	drm_sched_entity_push_job(&job->drm);
+	xe_sched_job_put(job);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
new file mode 100644
index 0000000000000..5315ad8656c24
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_H_
+#define _XE_SCHED_JOB_H_
+
+#include "xe_sched_job_types.h"
+
+#define XE_SCHED_HANG_LIMIT 1
+#define XE_SCHED_JOB_TIMEOUT LONG_MAX
+
+int xe_sched_job_module_init(void);
+void xe_sched_job_module_exit(void);
+
+struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+					 u64 *batch_addr);
+void xe_sched_job_destroy(struct kref *ref);
+
+/**
+ * xe_sched_job_get - get reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Increment XE schedule job's reference count
+ */
+static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
+{
+	kref_get(&job->refcount);
+	return job;
+}
+
+/**
+ * xe_sched_job_put - put reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * reference count == 0.
+ */
+static inline void xe_sched_job_put(struct xe_sched_job *job)
+{
+	kref_put(&job->refcount, xe_sched_job_destroy);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error);
+static inline bool xe_sched_job_is_error(struct xe_sched_job *job)
+{
+	return job->fence->error < 0;
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job);
+bool xe_sched_job_completed(struct xe_sched_job *job);
+
+void xe_sched_job_arm(struct xe_sched_job *job);
+void xe_sched_job_push(struct xe_sched_job *job);
+
+static inline struct xe_sched_job *
+to_xe_sched_job(struct drm_sched_job *drm)
+{
+	return container_of(drm, struct xe_sched_job, drm);
+}
+
+static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
+{
+	return job->fence->seqno;
+}
+
+static inline void
+xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
+{
+	job->migrate_flush_flags = flags;
+}
+
+bool xe_sched_job_is_migration(struct xe_engine *e);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
new file mode 100644
index 0000000000000..fd1d75996127b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_TYPES_H_
+#define _XE_SCHED_JOB_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+struct xe_engine;
+
+/**
+ * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ */
+struct xe_sched_job {
+	/** @drm: base DRM scheduler job */
+	struct drm_sched_job drm;
+	/** @engine: XE submission engine */
+	struct xe_engine *engine;
+	/** @refcount: ref count of this job */
+	struct kref refcount;
+	/**
+	 * @fence: dma fence to indicate completion. 1 way relationship - job
+	 * can safely reference fence, fence cannot safely reference job.
+	 */
+#define JOB_FLAG_SUBMIT		DMA_FENCE_FLAG_USER_BITS
+	struct dma_fence *fence;
+	/** @user_fence: write back value when BB is complete */
+	struct {
+		/** @used: user fence is used */
+		bool used;
+		/** @addr: address to write to */
+		u64 addr;
+		/** @value: write back value */
+		u64 value;
+	} user_fence;
+	/** @migrate_flush_flags: Additional flush flags for migration jobs */
+	u32 migrate_flush_flags;
+	/** @batch_addr: batch buffer address of job */
+	u64 batch_addr[0];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
new file mode 100644
index 0000000000000..ca77d09715297
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_step.h"
+
+#include "xe_device.h"
+#include "xe_platform_types.h"
+
+/*
+ * Provide mapping between PCI's revision ID to the individual GMD
+ * (Graphics/Media/Display) stepping values that can be compared numerically.
+ *
+ * Some platforms may have unusual ways of mapping PCI revision ID to GMD
+ * steppings.  E.g., in some cases a higher PCI revision may translate to a
+ * lower stepping of the GT and/or display IP.
+ *
+ * Also note that some revisions/steppings may have been set aside as
+ * placeholders but never materialized in real hardware; in those cases there
+ * may be jumps in the revision IDs or stepping values in the tables below.
+ */
+
+/*
+ * Some platforms always have the same stepping value for GT and display;
+ * use a macro to define these to make it easier to identify the platforms
+ * where the two steppings can deviate.
+ */
+#define COMMON_GT_MEDIA_STEP(x_)	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_
+
+#define COMMON_STEP(x_)			\
+	COMMON_GT_MEDIA_STEP(x_),	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_,		\
+	.display = STEP_##x_
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
+static const struct xe_step_info tgl_revids[] = {
+	[0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg1_revids[] = {
+	[0] = { COMMON_STEP(A0) },
+	[1] = { COMMON_STEP(B0) },
+};
+
+static const struct xe_step_info adls_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g11_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 },
+	[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g12_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info pvc_revid_step_tbl[] = {
+	[0x3] = { .graphics = STEP_A0 },
+	[0x5] = { .graphics = STEP_B0 },
+	[0x6] = { .graphics = STEP_B1 },
+	[0x7] = { .graphics = STEP_C0 },
+};
+
+static const int pvc_basedie_subids[] = {
+	[0x0] = STEP_A0,
+	[0x3] = STEP_B0,
+	[0x4] = STEP_B1,
+	[0x5] = STEP_B3,
+};
+
+__diag_pop();
+
+struct xe_step_info xe_step_get(struct xe_device *xe)
+{
+	const struct xe_step_info *revids = NULL;
+	struct xe_step_info step = {};
+	u16 revid = xe->info.revid;
+	int size = 0;
+	const int *basedie_info = NULL;
+	int basedie_size = 0;
+	int baseid = 0;
+
+	if (xe->info.platform == XE_PVC) {
+		baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid);
+		revid = FIELD_GET(GENMASK(2, 0), xe->info.revid);
+		revids = pvc_revid_step_tbl;
+		size = ARRAY_SIZE(pvc_revid_step_tbl);
+		basedie_info = pvc_basedie_subids;
+		basedie_size = ARRAY_SIZE(pvc_basedie_subids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) {
+		revids = dg2_g10_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) {
+		revids = dg2_g11_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
+		revids = dg2_g12_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+	} else if (xe->info.platform == XE_ALDERLAKE_S) {
+		revids = adls_revids;
+		size = ARRAY_SIZE(adls_revids);
+	} else if (xe->info.platform == XE_DG1) {
+		revids = dg1_revids;
+		size = ARRAY_SIZE(dg1_revids);
+	} else if (xe->info.platform == XE_TIGERLAKE) {
+		revids = tgl_revids;
+		size = ARRAY_SIZE(tgl_revids);
+	}
+
+	/* Not using the stepping scheme for the platform yet. */
+	if (!revids)
+		return step;
+
+	if (revid < size && revids[revid].graphics != STEP_NONE) {
+		step = revids[revid];
+	} else {
+		drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid);
+
+		/*
+		 * If we hit a gap in the revid array, use the information for
+		 * the next revid.
+		 *
+		 * This may be wrong in all sorts of ways, especially if the
+		 * steppings in the array are not monotonically increasing, but
+		 * it's better than defaulting to 0.
+		 */
+		while (revid < size && revids[revid].graphics == STEP_NONE)
+			revid++;
+
+		if (revid < size) {
+			drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n",
+				revid);
+			step = revids[revid];
+		} else {
+			drm_dbg(&xe->drm, "Using future steppings\n");
+			step.graphics = STEP_FUTURE;
+			step.display = STEP_FUTURE;
+		}
+	}
+
+	drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE);
+
+	if (basedie_info && basedie_size) {
+		if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) {
+			step.basedie = basedie_info[baseid];
+		} else {
+			drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid);
+			step.basedie = STEP_FUTURE;
+		}
+	}
+
+	return step;
+}
+
+#define STEP_NAME_CASE(name)	\
+	case STEP_##name:	\
+		return #name;
+
+const char *xe_step_name(enum xe_step step)
+{
+	switch (step) {
+	STEP_NAME_LIST(STEP_NAME_CASE);
+
+	default:
+		return "**";
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
new file mode 100644
index 0000000000000..0c596c8579fbe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_H_
+#define _XE_STEP_H_
+
+#include <linux/types.h>
+
+#include "xe_step_types.h"
+
+struct xe_device;
+
+struct xe_step_info xe_step_get(struct xe_device *xe);
+const char *xe_step_name(enum xe_step step);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
new file mode 100644
index 0000000000000..b7859f9647caa
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_TYPES_H_
+#define _XE_STEP_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_step_info {
+	u8 graphics;
+	u8 media;
+	u8 display;
+	u8 basedie;
+};
+
+#define STEP_ENUM_VAL(name)  STEP_##name,
+
+#define STEP_NAME_LIST(func)		\
+	func(A0)			\
+	func(A1)			\
+	func(A2)			\
+	func(B0)			\
+	func(B1)			\
+	func(B2)			\
+	func(B3)			\
+	func(C0)			\
+	func(C1)			\
+	func(D0)			\
+	func(D1)			\
+	func(E0)			\
+	func(F0)			\
+	func(G0)			\
+	func(H0)			\
+	func(I0)			\
+	func(I1)			\
+	func(J0)
+
+/*
+ * Symbolic steppings that do not match the hardware. These are valid both as gt
+ * and display steppings as symbolic names.
+ */
+enum xe_step {
+	STEP_NONE = 0,
+	STEP_NAME_LIST(STEP_ENUM_VAL)
+	STEP_FUTURE,
+	STEP_FOREVER,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
new file mode 100644
index 0000000000000..0fbd8d0978cfd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sync.h"
+
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <drm/xe_drm.h>
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+
+#include "xe_device_types.h"
+#include "xe_sched_job_types.h"
+#include "xe_macros.h"
+
+#define SYNC_FLAGS_TYPE_MASK 0x3
+#define SYNC_FLAGS_FENCE_INSTALLED	0x10000
+
+struct user_fence {
+	struct xe_device *xe;
+	struct kref refcount;
+	struct dma_fence_cb cb;
+	struct work_struct worker;
+	struct mm_struct *mm;
+	u64 __user *addr;
+	u64 value;
+};
+
+static void user_fence_destroy(struct kref *kref)
+{
+	struct user_fence *ufence = container_of(kref, struct user_fence,
+						 refcount);
+
+	mmdrop(ufence->mm);
+	kfree(ufence);
+}
+
+static void user_fence_get(struct user_fence *ufence)
+{
+	kref_get(&ufence->refcount);
+}
+
+static void user_fence_put(struct user_fence *ufence)
+{
+	kref_put(&ufence->refcount, user_fence_destroy);
+}
+
+static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+					    u64 value)
+{
+	struct user_fence *ufence;
+
+	ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
+	if (!ufence)
+		return NULL;
+
+	ufence->xe = xe;
+	kref_init(&ufence->refcount);
+	ufence->addr = u64_to_user_ptr(addr);
+	ufence->value = value;
+	ufence->mm = current->mm;
+	mmgrab(ufence->mm);
+
+	return ufence;
+}
+
+static void user_fence_worker(struct work_struct *w)
+{
+	struct user_fence *ufence = container_of(w, struct user_fence, worker);
+
+	if (mmget_not_zero(ufence->mm)) {
+		kthread_use_mm(ufence->mm);
+		if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
+			XE_WARN_ON("Copy to user failed");
+		kthread_unuse_mm(ufence->mm);
+		mmput(ufence->mm);
+	}
+
+	wake_up_all(&ufence->xe->ufence_wq);
+	user_fence_put(ufence);
+}
+
+static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
+{
+	INIT_WORK(&ufence->worker, user_fence_worker);
+	queue_work(ufence->xe->ordered_wq, &ufence->worker);
+	dma_fence_put(fence);
+}
+
+static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct user_fence *ufence = container_of(cb, struct user_fence, cb);
+
+	kick_ufence(ufence, fence);
+}
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			bool exec, bool no_dma_fences)
+{
+	struct drm_xe_sync sync_in;
+	int err;
+
+	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, sync_in.flags &
+			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)))
+		return -EINVAL;
+
+	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
+	case DRM_XE_SYNC_SYNCOBJ:
+		if (XE_IOCTL_ERR(xe, no_dma_fences))
+			return -ENOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_ERR(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_ERR(xe, !sync->fence))
+				return -EINVAL;
+		}
+		break;
+
+	case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
+		if (XE_IOCTL_ERR(xe, no_dma_fences))
+			return -ENOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_ERR(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (sync_in.flags & DRM_XE_SYNC_SIGNAL) {
+			sync->chain_fence = dma_fence_chain_alloc();
+			if (!sync->chain_fence)
+				return -ENOMEM;
+		} else {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_ERR(xe, !sync->fence))
+				return -EINVAL;
+
+			err = dma_fence_chain_find_seqno(&sync->fence,
+							 sync_in.timeline_value);
+			if (err)
+				return err;
+		}
+		break;
+
+	case DRM_XE_SYNC_DMA_BUF:
+		if (XE_IOCTL_ERR(xe, "TODO"))
+			return -EINVAL;
+		break;
+
+	case DRM_XE_SYNC_USER_FENCE:
+		if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL)))
+			return -ENOTSUPP;
+
+		if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7))
+			return -EINVAL;
+
+		if (exec) {
+			sync->addr = sync_in.addr;
+		} else {
+			sync->ufence = user_fence_create(xe, sync_in.addr,
+							 sync_in.timeline_value);
+			if (XE_IOCTL_ERR(xe, !sync->ufence))
+				return -ENOMEM;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	sync->flags = sync_in.flags;
+	sync->timeline_value = sync_in.timeline_value;
+
+	return 0;
+}
+
+int xe_sync_entry_wait(struct xe_sync_entry *sync)
+{
+	if (sync->fence)
+		dma_fence_wait(sync->fence, true);
+
+	return 0;
+}
+
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
+{
+	int err;
+
+	if (sync->fence) {
+		err = drm_sched_job_add_dependency(&job->drm,
+						   dma_fence_get(sync->fence));
+		if (err) {
+			dma_fence_put(sync->fence);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
+			  struct dma_fence *fence)
+{
+	if (!(sync->flags & DRM_XE_SYNC_SIGNAL) ||
+	    sync->flags & SYNC_FLAGS_FENCE_INSTALLED)
+		return false;
+
+	if (sync->chain_fence) {
+		drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
+				      fence, sync->timeline_value);
+		/*
+		 * The chain's ownership is transferred to the
+		 * timeline.
+		 */
+		sync->chain_fence = NULL;
+	} else if (sync->syncobj) {
+		drm_syncobj_replace_fence(sync->syncobj, fence);
+	} else if (sync->ufence) {
+		int err;
+
+		dma_fence_get(fence);
+		user_fence_get(sync->ufence);
+		err = dma_fence_add_callback(fence, &sync->ufence->cb,
+					     user_fence_cb);
+		if (err == -ENOENT) {
+			kick_ufence(sync->ufence, fence);
+		} else if (err) {
+			XE_WARN_ON("failed to add user fence");
+			user_fence_put(sync->ufence);
+			dma_fence_put(fence);
+		}
+	} else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) ==
+		   DRM_XE_SYNC_USER_FENCE) {
+		job->user_fence.used = true;
+		job->user_fence.addr = sync->addr;
+		job->user_fence.value = sync->timeline_value;
+	}
+
+	/* TODO: external BO? */
+
+	sync->flags |= SYNC_FLAGS_FENCE_INSTALLED;
+
+	return true;
+}
+
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
+{
+	if (sync->syncobj)
+		drm_syncobj_put(sync->syncobj);
+	if (sync->fence)
+		dma_fence_put(sync->fence);
+	if (sync->chain_fence)
+		dma_fence_put(&sync->chain_fence->base);
+	if (sync->ufence)
+		user_fence_put(sync->ufence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
new file mode 100644
index 0000000000000..4cbcf7a199110
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_H_
+#define _XE_SYNC_H_
+
+#include "xe_sync_types.h"
+
+struct xe_device;
+struct xe_file;
+struct xe_sched_job;
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			bool exec, bool compute_mode);
+int xe_sync_entry_wait(struct xe_sync_entry *sync);
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
+			   struct xe_sched_job *job);
+bool xe_sync_entry_signal(struct xe_sync_entry *sync,
+			  struct xe_sched_job *job,
+			  struct dma_fence *fence);
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
new file mode 100644
index 0000000000000..24fccc26cb530
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_TYPES_H_
+#define _XE_SYNC_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_syncobj;
+struct dma_fence;
+struct dma_fence_chain;
+struct drm_xe_sync;
+struct user_fence;
+
+struct xe_sync_entry {
+	struct drm_syncobj *syncobj;
+	struct dma_fence *fence;
+	struct dma_fence_chain *chain_fence;
+	struct user_fence *ufence;
+	u64 addr;
+	u64 timeline_value;
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c
new file mode 100644
index 0000000000000..2570c0b859c45
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
new file mode 100644
index 0000000000000..a5f963f1f6ebc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_H_
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include "xe_bo_types.h"
+#include "xe_engine_types.h"
+#include "xe_gpu_scheduler_types.h"
+#include "xe_gt_types.h"
+#include "xe_guc_engine_types.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+DECLARE_EVENT_CLASS(xe_bo,
+		    TP_PROTO(struct xe_bo *bo),
+		    TP_ARGS(bo),
+
+		    TP_STRUCT__entry(
+			     __field(size_t, size)
+			     __field(u32, flags)
+			     __field(u64, vm)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->size = bo->size;
+			   __entry->flags = bo->flags;
+			   __entry->vm = (u64)bo->vm;
+			   ),
+
+		    TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx",
+			      __entry->size, __entry->flags, __entry->vm)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_move,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+DECLARE_EVENT_CLASS(xe_engine,
+		    TP_PROTO(struct xe_engine *e),
+		    TP_ARGS(e),
+
+		    TP_STRUCT__entry(
+			     __field(enum xe_engine_class, class)
+			     __field(u32, logical_mask)
+			     __field(u8, gt_id)
+			     __field(u16, width)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->class = e->class;
+			   __entry->logical_mask = e->logical_mask;
+			   __entry->gt_id = e->gt->info.id;
+			   __entry->width = e->width;
+			   __entry->guc_id = e->guc->id;
+			   __entry->guc_state = atomic_read(&e->guc->state);
+			   __entry->flags = e->flags;
+			   ),
+
+		    TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
+			      __entry->class, __entry->logical_mask,
+			      __entry->gt_id, __entry->width, __entry->guc_id,
+			      __entry->guc_state, __entry->flags)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_create,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_supress_resume,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_submit,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_done,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_register,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_deregister,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_deregister_done,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_close,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_kill,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_destroy,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_reset,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_stop,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_resubmit,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_job,
+		    TP_PROTO(struct xe_sched_job *job),
+		    TP_ARGS(job),
+
+		    TP_STRUCT__entry(
+			     __field(u32, seqno)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     __field(int, error)
+			     __field(u64, fence)
+			     __field(u64, batch_addr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->seqno = xe_sched_job_seqno(job);
+			   __entry->guc_id = job->engine->guc->id;
+			   __entry->guc_state =
+			   atomic_read(&job->engine->guc->state);
+			   __entry->flags = job->engine->flags;
+			   __entry->error = job->fence->error;
+			   __entry->fence = (u64)job->fence;
+			   __entry->batch_addr = (u64)job->batch_addr[0];
+			   ),
+
+		    TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+			      __entry->fence, __entry->seqno, __entry->guc_id,
+			      __entry->batch_addr, __entry->guc_state,
+			      __entry->flags, __entry->error)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_create,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_exec,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_run,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_free,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_ban,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_msg,
+		    TP_PROTO(struct xe_sched_msg *msg),
+		    TP_ARGS(msg),
+
+		    TP_STRUCT__entry(
+			     __field(u32, opcode)
+			     __field(u16, guc_id)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->opcode = msg->opcode;
+			   __entry->guc_id =
+			   ((struct xe_engine *)msg->private_data)->guc->id;
+			   ),
+
+		    TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
+			      __entry->opcode)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DECLARE_EVENT_CLASS(xe_hw_fence,
+		    TP_PROTO(struct xe_hw_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(u64, ctx)
+			     __field(u32, seqno)
+			     __field(u64, fence)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->ctx = fence->dma.context;
+			   __entry->seqno = fence->dma.seqno;
+			   __entry->fence = (u64)fence;
+			   ),
+
+		    TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
+			      __entry->ctx, __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_vma,
+		    TP_PROTO(struct xe_vma *vma),
+		    TP_ARGS(vma),
+
+		    TP_STRUCT__entry(
+			     __field(u64, vma)
+			     __field(u32, asid)
+			     __field(u64, start)
+			     __field(u64, end)
+			     __field(u64, ptr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vma = (u64)vma;
+			   __entry->asid = vma->vm->usm.asid;
+			   __entry->start = vma->start;
+			   __entry->end = vma->end;
+			   __entry->ptr = (u64)vma->userptr.ptr;
+			   ),
+
+		    TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
+			      __entry->vma, __entry->asid, __entry->start,
+			      __entry->end, __entry->ptr)
+)
+
+DEFINE_EVENT(xe_vma, xe_vma_flush,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pagefault,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_acc,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_fail,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pf_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_unbind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_evict,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DECLARE_EVENT_CLASS(xe_vm,
+		    TP_PROTO(struct xe_vm *vm),
+		    TP_ARGS(vm),
+
+		    TP_STRUCT__entry(
+			     __field(u64, vm)
+			     __field(u32, asid)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vm = (u64)vm;
+			   __entry->asid = vm->usm.asid;
+			   ),
+
+		    TP_printk("vm=0x%016llx, asid=0x%05x",  __entry->vm,
+			      __entry->asid)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_create,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_free,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_cpu_bind,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_restart,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+TRACE_EVENT(xe_guc_ct_h2g_flow_control,
+	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	    TP_ARGS(_head, _tail, size, space, len),
+
+	    TP_STRUCT__entry(
+		     __field(u32, _head)
+		     __field(u32, _tail)
+		     __field(u32, size)
+		     __field(u32, space)
+		     __field(u32, len)
+		     ),
+
+	    TP_fast_assign(
+		   __entry->_head = _head;
+		   __entry->_tail = _tail;
+		   __entry->size = size;
+		   __entry->space = space;
+		   __entry->len = len;
+		   ),
+
+	    TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
+		      __entry->_head, __entry->_tail, __entry->size,
+		      __entry->space, __entry->len)
+);
+
+TRACE_EVENT(xe_guc_ct_g2h_flow_control,
+	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	    TP_ARGS(_head, _tail, size, space, len),
+
+	    TP_STRUCT__entry(
+		     __field(u32, _head)
+		     __field(u32, _tail)
+		     __field(u32, size)
+		     __field(u32, space)
+		     __field(u32, len)
+		     ),
+
+	    TP_fast_assign(
+		   __entry->_head = _head;
+		   __entry->_tail = _tail;
+		   __entry->size = size;
+		   __entry->space = space;
+		   __entry->len = len;
+		   ),
+
+	    TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
+		      __entry->_head, __entry->_tail, __entry->size,
+		      __entry->space, __entry->len)
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
new file mode 100644
index 0000000000000..a0ba8bba84d14
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_ttm_gtt_mgr.h"
+
+struct xe_ttm_gtt_node {
+	struct ttm_buffer_object *tbo;
+	struct ttm_range_mgr_node base;
+};
+
+static inline struct xe_ttm_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_gtt_mgr, manager);
+}
+
+static inline struct xe_ttm_gtt_node *
+to_xe_ttm_gtt_node(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_gtt_node, base.base);
+}
+
+static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man,
+			      struct ttm_buffer_object *tbo,
+			      const struct ttm_place *place,
+			      struct ttm_resource **res)
+{
+	struct xe_ttm_gtt_node *node;
+	int r;
+
+	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->tbo = tbo;
+	ttm_resource_init(tbo, place, &node->base.base);
+
+	if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+	    ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) {
+		r = -ENOSPC;
+		goto err_fini;
+	}
+
+	node->base.mm_nodes[0].start = 0;
+	node->base.mm_nodes[0].size = PFN_UP(node->base.base.size);
+	node->base.base.start = XE_BO_INVALID_OFFSET;
+
+	*res = &node->base.base;
+
+	return 0;
+
+err_fini:
+	ttm_resource_fini(man, &node->base.base);
+	kfree(node);
+	return r;
+}
+
+static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man,
+			       struct ttm_resource *res)
+{
+	struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res);
+
+	ttm_resource_fini(man, res);
+	kfree(node);
+}
+
+static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man,
+				 struct drm_printer *printer)
+{
+
+}
+
+static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = {
+	.alloc = xe_ttm_gtt_mgr_new,
+	.free = xe_ttm_gtt_mgr_del,
+	.debug = xe_ttm_gtt_mgr_debug
+};
+
+static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ttm_gtt_mgr *mgr = arg;
+	struct xe_device *xe = gt_to_xe(mgr->gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	ttm_resource_manager_set_used(man, false);
+
+	err = ttm_resource_manager_evict_all(&xe->ttm, man);
+	if (err)
+		return;
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
+}
+
+int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
+			u64 gtt_size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	mgr->gt = gt;
+	man->use_tt = true;
+	man->func = &xe_ttm_gtt_mgr_func;
+
+	ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
+
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+
+	err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
new file mode 100644
index 0000000000000..d1d57cb9c2b89
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTGM_GTT_MGR_H_
+#define _XE_TTGM_GTT_MGR_H_
+
+#include "xe_ttm_gtt_mgr_types.h"
+
+struct xe_gt;
+
+int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
+			u64 gtt_size);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
new file mode 100644
index 0000000000000..c66737488326f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_GTT_MGR_TYPES_H_
+#define _XE_TTM_GTT_MGR_TYPES_H_
+
+#include <drm/ttm/ttm_device.h>
+
+struct xe_gt;
+
+struct xe_ttm_gtt_mgr {
+	struct xe_gt *gt;
+	struct ttm_resource_manager manager;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
new file mode 100644
index 0000000000000..e391e81d3640f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_vram_mgr.h"
+
+static inline struct xe_ttm_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_vram_mgr, manager);
+}
+
+static inline struct xe_gt *
+mgr_to_gt(struct xe_ttm_vram_mgr *mgr)
+{
+	return mgr->gt;
+}
+
+static inline struct drm_buddy_block *
+xe_ttm_vram_mgr_first_block(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 start, size;
+
+	block = xe_ttm_vram_mgr_first_block(head);
+	if (!block)
+		return false;
+
+	while (head != block->link.next) {
+		start = xe_ttm_vram_mgr_block_start(block);
+		size = xe_ttm_vram_mgr_block_size(block);
+
+		block = list_entry(block->link.next, struct drm_buddy_block,
+				   link);
+		if (start + size != xe_ttm_vram_mgr_block_start(block))
+			return false;
+	}
+
+	return true;
+}
+
+static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
+			       struct ttm_buffer_object *tbo,
+			       const struct ttm_place *place,
+			       struct ttm_resource **res)
+{
+	u64 max_bytes, cur_size, min_block_size;
+	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres;
+	u64 size, remaining_size, lpfn, fpfn;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+	unsigned long pages_per_block;
+	int r;
+
+	lpfn = (u64)place->lpfn << PAGE_SHIFT;
+	if (!lpfn)
+		lpfn = man->size;
+
+	fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
+	max_bytes = mgr->gt->mem.vram.size;
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		pages_per_block = ~0ul;
+	} else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		pages_per_block = HPAGE_PMD_NR;
+#else
+		/* default to 2MB */
+		pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+
+		pages_per_block = max_t(uint32_t, pages_per_block,
+					tbo->page_alignment);
+	}
+
+	vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+	if (!vres)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, &vres->base);
+	remaining_size = vres->base.size;
+
+	/* bail out quickly if there's likely not enough VRAM for this BO */
+	if (ttm_resource_manager_usage(man) > max_bytes) {
+		r = -ENOSPC;
+		goto error_fini;
+	}
+
+	INIT_LIST_HEAD(&vres->blocks);
+
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (fpfn || lpfn != man->size)
+		/* Allocate blocks in desired range */
+		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	mutex_lock(&mgr->lock);
+	while (remaining_size) {
+		if (tbo->page_alignment)
+			min_block_size = tbo->page_alignment << PAGE_SHIFT;
+		else
+			min_block_size = mgr->default_page_size;
+
+		XE_BUG_ON(min_block_size < mm->chunk_size);
+
+		/* Limit maximum size to 2GiB due to SG table limitations */
+		size = min(remaining_size, 2ULL << 30);
+
+		if (size >= pages_per_block << PAGE_SHIFT)
+			min_block_size = pages_per_block << PAGE_SHIFT;
+
+		cur_size = size;
+
+		if (fpfn + size != place->lpfn << PAGE_SHIFT) {
+			/*
+			 * Except for actual range allocation, modify the size and
+			 * min_block_size conforming to continuous flag enablement
+			 */
+			if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+				size = roundup_pow_of_two(size);
+				min_block_size = size;
+			/*
+			 * Modify the size value if size is not
+			 * aligned with min_block_size
+			 */
+			} else if (!IS_ALIGNED(size, min_block_size)) {
+				size = round_up(size, min_block_size);
+			}
+		}
+
+		r = drm_buddy_alloc_blocks(mm, fpfn,
+					   lpfn,
+					   size,
+					   min_block_size,
+					   &vres->blocks,
+					   vres->flags);
+		if (unlikely(r))
+			goto error_free_blocks;
+
+		if (size > remaining_size)
+			remaining_size = 0;
+		else
+			remaining_size -= size;
+	}
+	mutex_unlock(&mgr->lock);
+
+	if (cur_size != size) {
+		struct drm_buddy_block *block;
+		struct list_head *trim_list;
+		u64 original_size;
+		LIST_HEAD(temp);
+
+		trim_list = &vres->blocks;
+		original_size = vres->base.size;
+
+		/*
+		 * If size value is rounded up to min_block_size, trim the last
+		 * block to the required size
+		 */
+		if (!list_is_singular(&vres->blocks)) {
+			block = list_last_entry(&vres->blocks, typeof(*block), link);
+			list_move_tail(&block->link, &temp);
+			trim_list = &temp;
+			/*
+			 * Compute the original_size value by subtracting the
+			 * last block size with (aligned size - original size)
+			 */
+			original_size = xe_ttm_vram_mgr_block_size(block) -
+				(size - cur_size);
+		}
+
+		mutex_lock(&mgr->lock);
+		drm_buddy_block_trim(mm,
+				     original_size,
+				     trim_list);
+		mutex_unlock(&mgr->lock);
+
+		if (!list_empty(&temp))
+			list_splice_tail(trim_list, &vres->blocks);
+	}
+
+	vres->base.start = 0;
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long start;
+
+		start = xe_ttm_vram_mgr_block_start(block) +
+			xe_ttm_vram_mgr_block_size(block);
+		start >>= PAGE_SHIFT;
+
+		if (start > PFN_UP(vres->base.size))
+			start -= PFN_UP(vres->base.size);
+		else
+			start = 0;
+		vres->base.start = max(vres->base.start, start);
+	}
+
+	if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks))
+		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+	*res = &vres->base;
+	return 0;
+
+error_free_blocks:
+	drm_buddy_free_list(mm, &vres->blocks);
+	mutex_unlock(&mgr->lock);
+error_fini:
+	ttm_resource_fini(man, &vres->base);
+	kfree(vres);
+
+	return r;
+}
+
+static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
+				struct ttm_resource *res)
+{
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &vres->blocks);
+	mutex_unlock(&mgr->lock);
+
+	ttm_resource_fini(man, res);
+
+	kfree(vres);
+}
+
+static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
+				  struct drm_printer *printer)
+{
+	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_print(mm, printer);
+	mutex_unlock(&mgr->lock);
+	drm_printf(printer, "man size:%llu\n", man->size);
+}
+
+static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
+	.alloc	= xe_ttm_vram_mgr_new,
+	.free	= xe_ttm_vram_mgr_del,
+	.debug	= xe_ttm_vram_mgr_debug
+};
+
+static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ttm_vram_mgr *mgr = arg;
+	struct xe_device *xe = gt_to_xe(mgr->gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	ttm_resource_manager_set_used(man, false);
+
+	err = ttm_resource_manager_evict_all(&xe->ttm, man);
+	if (err)
+		return;
+
+	drm_buddy_fini(&mgr->mm);
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id,
+			       NULL);
+}
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	XE_BUG_ON(xe_gt_is_media_type(gt));
+
+	mgr->gt = gt;
+	man->func = &xe_ttm_vram_mgr_func;
+
+	ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size);
+	err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+	if (err)
+		return err;
+
+	mutex_init(&mgr->lock);
+	mgr->default_page_size = PAGE_SIZE;
+
+	ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id,
+			       &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+
+	err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
+	struct xe_res_cursor cursor;
+	struct scatterlist *sg;
+	int num_entries = 0;
+	int i, r;
+
+	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+	if (!*sgt)
+		return -ENOMEM;
+
+	/* Determine the number of DRM_BUDDY blocks to export */
+	xe_res_first(res, offset, length, &cursor);
+	while (cursor.remaining) {
+		num_entries++;
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+	if (r)
+		goto error_free;
+
+	/* Initialize scatterlist nodes of sg_table */
+	for_each_sgtable_sg((*sgt), sg, i)
+		sg->length = 0;
+
+	/*
+	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * and the number of bytes from it. Access the following
+	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 */
+	xe_res_first(res, offset, length, &cursor);
+	for_each_sgtable_sg((*sgt), sg, i) {
+		phys_addr_t phys = cursor.start + gt->mem.vram.io_start;
+		size_t size = cursor.size;
+		dma_addr_t addr;
+
+		addr = dma_map_resource(dev, phys, size, dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+		r = dma_mapping_error(dev, addr);
+		if (r)
+			goto error_unmap;
+
+		sg_set_page(sg, NULL, size, 0);
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = size;
+
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	return 0;
+
+error_unmap:
+	for_each_sgtable_sg((*sgt), sg, i) {
+		if (!sg->length)
+			continue;
+
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+	sg_free_table(*sgt);
+
+error_free:
+	kfree(*sgt);
+	return r;
+}
+
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sgtable_sg(sgt, sg, i)
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
new file mode 100644
index 0000000000000..537fccec43189
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_H_
+#define _XE_TTM_VRAM_MGR_H_
+
+#include "xe_ttm_vram_mgr_types.h"
+
+enum dma_data_direction;
+struct xe_device;
+struct xe_gt;
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt);
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt);
+
+static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_offset(block);
+}
+
+static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+	return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct xe_ttm_vram_mgr_resource *
+to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_vram_mgr_resource, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
new file mode 100644
index 0000000000000..39b93c71c21b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_TYPES_H_
+#define _XE_TTM_VRAM_MGR_TYPES_H_
+
+#include <drm/drm_buddy.h>
+#include <drm/ttm/ttm_device.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ *
+ * Manages placement of TTM resource in VRAM.
+ */
+struct xe_ttm_vram_mgr {
+	/** @gt: Graphics tile which the VRAM belongs to */
+	struct xe_gt *gt;
+	/** @manager: Base TTM resource manager */
+	struct ttm_resource_manager manager;
+	/** @mm: DRM buddy allocator which manages the VRAM */
+	struct drm_buddy mm;
+	/** @default_page_size: default page size */
+	u64 default_page_size;
+	/** @lock: protects allocations of VRAM */
+	struct mutex lock;
+};
+
+/**
+ * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ */
+struct xe_ttm_vram_mgr_resource {
+	/** @base: Base TTM resource */
+	struct ttm_resource base;
+	/** @blocks: list of DRM buddy blocks */
+	struct list_head blocks;
+	/** @flags: flags associated with the resource */
+	unsigned long flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
new file mode 100644
index 0000000000000..e043db0373687
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include "xe_platform_types.h"
+#include "xe_gt_types.h"
+#include "xe_rtp.h"
+
+#include "gt/intel_gt_regs.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+static const struct xe_rtp_entry gt_tunings[] = {
+	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS)
+	},
+	{}
+};
+
+static const struct xe_rtp_entry context_tunings[] = {
+	{ XE_RTP_NAME("1604555607"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+					FF_MODE2_TDS_TIMER_128)
+	},
+	{}
+};
+
+void xe_tuning_process_gt(struct xe_gt *gt)
+{
+	xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
+}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
new file mode 100644
index 0000000000000..66dbc93192bde
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TUNING_
+#define _XE_TUNING_
+
+struct xe_gt;
+
+void xe_tuning_process_gt(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
new file mode 100644
index 0000000000000..938d146980030
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_huc.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_uc.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+static struct xe_gt *
+uc_to_gt(struct xe_uc *uc)
+{
+	return container_of(uc, struct xe_gt, uc);
+}
+
+static struct xe_device *
+uc_to_xe(struct xe_uc *uc)
+{
+	return gt_to_xe(uc_to_gt(uc));
+}
+
+/* Should be called once at driver load only */
+int xe_uc_init(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_guc_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	ret = xe_huc_init(&uc->huc);
+	if (ret)
+		goto err;
+
+	ret = xe_wopcm_init(&uc->wopcm);
+	if (ret)
+		goto err;
+
+	ret = xe_guc_submit_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	/* If any uC firmwares not found, fall back to execlists */
+	xe_device_guc_submission_disable(uc_to_xe(uc));
+
+	return ret;
+}
+
+/**
+ * xe_uc_init_post_hwconfig - init Uc post hwconfig load
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_post_hwconfig(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_init_post_hwconfig(&uc->guc);
+}
+
+static int uc_reset(struct xe_uc *uc)
+{
+	struct xe_device *xe = uc_to_xe(uc);
+	int ret;
+
+	ret = xe_guc_reset(&uc->guc);
+	if (ret) {
+		drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int uc_sanitize(struct xe_uc *uc)
+{
+	xe_huc_sanitize(&uc->huc);
+	xe_guc_sanitize(&uc->guc);
+
+	return uc_reset(uc);
+}
+
+/**
+ * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_hwconfig(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_guc_min_load_for_hwconfig(&uc->guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Should be called during driver load, after every GT reset, and after every
+ * suspend to reload / auth the firmwares.
+ */
+int xe_uc_init_hw(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = uc_sanitize(uc);
+	if (ret)
+		return ret;
+
+	ret = xe_huc_upload(&uc->huc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_upload(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
+	if (ret)
+		return ret;
+
+	ret = xe_guc_post_load_init(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_start(&uc->guc.pc);
+	if (ret)
+		return ret;
+
+	/* We don't fail the driver load if HuC fails to auth, but let's warn */
+	ret = xe_huc_auth(&uc->huc);
+	XE_WARN_ON(ret);
+
+	return 0;
+}
+
+int xe_uc_reset_prepare(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_reset_prepare(&uc->guc);
+}
+
+void xe_uc_stop_prepare(struct xe_uc *uc)
+{
+	xe_guc_stop_prepare(&uc->guc);
+}
+
+int xe_uc_stop(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_stop(&uc->guc);
+}
+
+int xe_uc_start(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_start(&uc->guc);
+}
+
+static void uc_reset_wait(struct xe_uc *uc)
+{
+       int ret;
+
+again:
+       xe_guc_reset_wait(&uc->guc);
+
+       ret = xe_uc_reset_prepare(uc);
+       if (ret)
+               goto again;
+}
+
+int xe_uc_suspend(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+		return 0;
+
+	uc_reset_wait(uc);
+
+	ret = xe_uc_stop(uc);
+	if (ret)
+		return ret;
+
+	return xe_guc_suspend(&uc->guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
new file mode 100644
index 0000000000000..380e722f95fce
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_H_
+#define _XE_UC_H_
+
+#include "xe_uc_types.h"
+
+int xe_uc_init(struct xe_uc *uc);
+int xe_uc_init_hwconfig(struct xe_uc *uc);
+int xe_uc_init_post_hwconfig(struct xe_uc *uc);
+int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_reset_prepare(struct xe_uc *uc);
+void xe_uc_stop_prepare(struct xe_uc *uc);
+int xe_uc_stop(struct xe_uc *uc);
+int xe_uc_start(struct xe_uc *uc);
+int xe_uc_suspend(struct xe_uc *uc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c
new file mode 100644
index 0000000000000..0a39ec5a6e999
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_gt.h"
+#include "xe_guc_debugfs.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir("uc", parent);
+	if (IS_ERR(root)) {
+		XE_WARN_ON("Create UC directory failed");
+		return;
+	}
+
+	xe_guc_debugfs_register(&uc->guc, root);
+	xe_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h
new file mode 100644
index 0000000000000..a13382df2bd79
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_DEBUGFS_H_
+#define _XE_UC_DEBUGFS_H_
+
+struct dentry;
+struct xe_uc;
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
new file mode 100644
index 0000000000000..86c47b7f09017
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_reg.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
+{
+	if (type == XE_UC_FW_TYPE_GUC)
+		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+
+	XE_BUG_ON(type != XE_UC_FW_TYPE_HUC);
+	return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+}
+
+static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw)
+{
+	return __uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
+static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
+{
+	return gt_to_xe(uc_fw_to_gt(uc_fw));
+}
+
+/*
+ * List of required GuC and HuC binaries per-platform.
+ * Must be ordered based on platform + revid, from newer to older.
+ */
+#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+	fw_def(METEORLAKE,   0, guc_def(mtl,  70, 5, 2)) \
+	fw_def(ALDERLAKE_P,  0, guc_def(adlp,  70, 5, 2)) \
+	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 5, 2)) \
+	fw_def(PVC,          0, guc_def(pvc,  70, 5, 2)) \
+	fw_def(DG2,          0, guc_def(dg2,  70, 5, 2)) \
+	fw_def(DG1,          0, guc_def(dg1,  70, 5, 2)) \
+	fw_def(TIGERLAKE,    0, guc_def(tgl,  70, 5, 2))
+
+#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
+	fw_def(DG1,          0, huc_def(dg1,  7, 9, 3)) \
+	fw_def(TIGERLAKE,    0, huc_def(tgl,  7, 9, 3))
+
+#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
+	"xe/" \
+	__stringify(prefix_) "_" name_ "_" \
+	__stringify(major_) ".bin"
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
+        "xe/" \
+       __stringify(prefix_) name_ \
+       __stringify(major_) "." \
+       __stringify(minor_) "." \
+       __stringify(patch_) ".bin"
+
+#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
+	__MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_)
+
+#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
+	__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define XE_UC_MODULE_FW(platform_, revid_, uc_) \
+	MODULE_FIRMWARE(uc_);
+
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH)
+
+/* The below structs and macros are used to iterate across the list of blobs */
+struct __packed uc_fw_blob {
+	u8 major;
+	u8 minor;
+	const char *path;
+};
+
+#define UC_FW_BLOB(major_, minor_, path_) \
+	{ .major = major_, .minor = minor_, .path = path_ }
+
+#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
+	UC_FW_BLOB(major_, minor_, \
+		   MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
+
+#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \
+	UC_FW_BLOB(major_, minor_, \
+		   MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_))
+
+struct __packed uc_fw_platform_requirement {
+	enum xe_platform p;
+	u8 rev; /* first platform rev using this FW */
+	const struct uc_fw_blob blob;
+};
+
+#define MAKE_FW_LIST(platform_, revid_, uc_) \
+{ \
+	.p = XE_##platform_, \
+	.rev = revid_, \
+	.blob = uc_, \
+},
+
+struct fw_blobs_by_type {
+	const struct uc_fw_platform_requirement *blobs;
+	u32 count;
+};
+
+static void
+uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
+{
+	static const struct uc_fw_platform_requirement blobs_guc[] = {
+		XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
+	};
+	static const struct uc_fw_platform_requirement blobs_huc[] = {
+		XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
+	};
+	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
+		[XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+		[XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+	};
+	static const struct uc_fw_platform_requirement *fw_blobs;
+	enum xe_platform p = xe->info.platform;
+	u32 fw_count;
+	u8 rev = xe->info.revid;
+	int i;
+
+	XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+	fw_blobs = blobs_all[uc_fw->type].blobs;
+	fw_count = blobs_all[uc_fw->type].count;
+
+	for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
+		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
+			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
+
+			uc_fw->path = blob->path;
+			uc_fw->major_ver_wanted = blob->major;
+			uc_fw->minor_ver_wanted = blob->minor;
+			break;
+		}
+	}
+}
+
+/**
+ * xe_uc_fw_copy_rsa - copy fw RSA to buffer
+ *
+ * @uc_fw: uC firmware
+ * @dst: dst buffer
+ * @max_len: max number of bytes to copy
+ *
+ * Return: number of copied bytes.
+ */
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	u32 size = min_t(u32, uc_fw->rsa_size, max_len);
+
+	XE_BUG_ON(size % 4);
+	XE_BUG_ON(!xe_uc_fw_is_available(uc_fw));
+
+	xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
+			   xe_uc_fw_rsa_offset(uc_fw), size);
+
+	return size;
+}
+
+static void uc_fw_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_uc_fw *uc_fw = arg;
+
+	if (!xe_uc_fw_is_available(uc_fw))
+		return;
+
+	xe_bo_unpin_map_no_vm(uc_fw->bo);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct device *dev = xe->drm.dev;
+	const struct firmware *fw = NULL;
+	struct uc_css_header *css;
+	struct xe_bo *obj;
+	size_t size;
+	int err;
+
+	/*
+	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+	 * before we're looked at the HW caps to see if we have uc support
+	 */
+	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
+	XE_BUG_ON(uc_fw->status);
+	XE_BUG_ON(uc_fw->path);
+
+	uc_fw_auto_select(xe, uc_fw);
+	xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
+			       XE_UC_FIRMWARE_SELECTED :
+			       XE_UC_FIRMWARE_DISABLED :
+			       XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+	/* Transform no huc in the list into firmware disabled */
+	if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) {
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
+		err = -ENOPKG;
+		return err;
+	}
+	err = request_firmware(&fw, uc_fw->path, dev);
+	if (err)
+		goto fail;
+
+	/* Check the size of the blob before examining buffer contents */
+	if (unlikely(fw->size < sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw->size, sizeof(struct uc_css_header));
+		err = -ENODATA;
+		goto fail;
+	}
+
+	css = (struct uc_css_header *)fw->data;
+
+	/* Check integrity of size values inside CSS header */
+	size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
+		css->exponent_size_dw) * sizeof(u32);
+	if (unlikely(size != sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm,
+			 "%s firmware %s: unexpected header size: %zu != %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw->size, sizeof(struct uc_css_header));
+		err = -EPROTO;
+		goto fail;
+	}
+
+	/* uCode size must calculated from other sizes */
+	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
+		uc_fw->rsa_size;
+	if (unlikely(fw->size < size)) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw->size, size);
+		err = -ENOEXEC;
+		goto fail;
+	}
+
+	/* Get version numbers from the CSS header */
+	uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+					   css->sw_version);
+	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+					   css->sw_version);
+
+	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+		if (!xe_uc_fw_is_overridden(uc_fw)) {
+			err = -ENOEXEC;
+			goto fail;
+		}
+	}
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
+		uc_fw->private_data_size = css->private_data_size;
+
+	obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size,
+				     ttm_bo_type_kernel,
+				     XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				     XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(obj)) {
+		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+		err = PTR_ERR(obj);
+		goto fail;
+	}
+
+	uc_fw->bo = obj;
+	uc_fw->size = fw->size;
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
+
+	release_firmware(fw);
+
+	err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw);
+	if (err)
+		return err;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
+			       XE_UC_FIRMWARE_MISSING :
+			       XE_UC_FIRMWARE_ERROR);
+
+	drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+	drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
+		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+
+	release_firmware(fw);		/* OK even if fw is NULL */
+	return err;
+}
+
+static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
+{
+	return xe_bo_ggtt_addr(uc_fw->bo);
+}
+
+static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	u32 src_offset;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Set the source address for the uCode */
+	src_offset = uc_fw_ggtt_offset(uc_fw);
+	xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset));
+
+	/* Set the DMA destination */
+	xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset);
+	xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM);
+
+	/*
+	 * Set the transfer size. The header plus uCode will be copied to WOPCM
+	 * via DMA, excluding any other components
+	 */
+	xe_mmio_write32(gt, DMA_COPY_SIZE.reg,
+			sizeof(struct uc_css_header) + uc_fw->ucode_size);
+
+	/* Start the DMA */
+	xe_mmio_write32(gt, DMA_CTRL.reg,
+			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
+
+	/* Wait for DMA to finish */
+	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100);
+	if (ret)
+		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
+			xe_uc_fw_type_repr(uc_fw->type),
+			xe_mmio_read32(gt, DMA_CTRL.reg));
+
+	/* Disable the bits once DMA is over */
+	xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags));
+
+	return ret;
+}
+
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	int err;
+
+	/* make sure the status was cleared the last time we reset the uc */
+	XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw));
+
+	if (!xe_uc_fw_is_loadable(uc_fw))
+		return -ENOEXEC;
+
+	/* Call custom loader */
+	err = uc_fw_xfer(uc_fw, offset, dma_flags);
+	if (err)
+		goto fail;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED);
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n",
+		xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+		err);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return err;
+}
+
+
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
+{
+	drm_printf(p, "%s firmware: %s\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+	drm_printf(p, "\tstatus: %s\n",
+		   xe_uc_fw_status_repr(uc_fw->status));
+	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
+		   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
+		   uc_fw->major_ver_found, uc_fw->minor_ver_found);
+	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
new file mode 100644
index 0000000000000..b0df5064b27da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_H_
+#define _XE_UC_FW_H_
+
+#include <linux/errno.h>
+
+#include "xe_uc_fw_types.h"
+#include "xe_uc_fw_abi.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw);
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len);
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
+
+static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw,
+					  enum xe_uc_fw_status status)
+{
+	uc_fw->__status = status;
+}
+
+static inline
+const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return "N/A";
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return "UNINITIALIZED";
+	case XE_UC_FIRMWARE_DISABLED:
+		return "DISABLED";
+	case XE_UC_FIRMWARE_SELECTED:
+		return "SELECTED";
+	case XE_UC_FIRMWARE_MISSING:
+		return "MISSING";
+	case XE_UC_FIRMWARE_ERROR:
+		return "ERROR";
+	case XE_UC_FIRMWARE_AVAILABLE:
+		return "AVAILABLE";
+	case XE_UC_FIRMWARE_INIT_FAIL:
+		return "INIT FAIL";
+	case XE_UC_FIRMWARE_LOADABLE:
+		return "LOADABLE";
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return "LOAD FAIL";
+	case XE_UC_FIRMWARE_TRANSFERRED:
+		return "TRANSFERRED";
+	case XE_UC_FIRMWARE_RUNNING:
+		return "RUNNING";
+	}
+	return "<invalid>";
+}
+
+static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return -ENODEV;
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return -EACCES;
+	case XE_UC_FIRMWARE_DISABLED:
+		return -EPERM;
+	case XE_UC_FIRMWARE_MISSING:
+		return -ENOENT;
+	case XE_UC_FIRMWARE_ERROR:
+		return -ENOEXEC;
+	case XE_UC_FIRMWARE_INIT_FAIL:
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return -EIO;
+	case XE_UC_FIRMWARE_SELECTED:
+		return -ESTALE;
+	case XE_UC_FIRMWARE_AVAILABLE:
+	case XE_UC_FIRMWARE_LOADABLE:
+	case XE_UC_FIRMWARE_TRANSFERRED:
+	case XE_UC_FIRMWARE_RUNNING:
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
+{
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return "GuC";
+	case XE_UC_FW_TYPE_HUC:
+		return "HuC";
+	}
+	return "uC";
+}
+
+static inline enum xe_uc_fw_status
+__xe_uc_fw_status(struct xe_uc_fw *uc_fw)
+{
+	/* shouldn't call this before checking hw/blob availability */
+	XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+	return uc_fw->status;
+}
+
+static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED;
+}
+
+static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE;
+}
+
+static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+}
+
+static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED;
+}
+
+static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+}
+
+static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
+{
+	return uc_fw->user_overridden;
+}
+
+static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw)
+{
+	if (xe_uc_fw_is_loaded(uc_fw))
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+/**
+ * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	if (!xe_uc_fw_is_available(uc_fw))
+		return 0;
+
+	return __xe_uc_fw_get_upload_size(uc_fw);
+}
+
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe"
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
new file mode 100644
index 0000000000000..dafd26cb0c413
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_ABI_H
+#define _XE_UC_FW_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/**
+ * DOC: Firmware Layout
+ *
+ * The GuC/HuC firmware layout looks like this::
+ *
+ *      +======================================================================+
+ *      |  Firmware blob                                                       |
+ *      +===============+===============+============+============+============+
+ *      |  CSS header   |     uCode     |  RSA key   |  modulus   |  exponent  |
+ *      +===============+===============+============+============+============+
+ *       <-header size->                 <---header size continued ----------->
+ *       <--- size ----------------------------------------------------------->
+ *                                       <-key size->
+ *                                                    <-mod size->
+ *                                                                 <-exp size->
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ *    in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ *    in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct uc_css_header {
+	u32 module_type;
+	/*
+	 * header_size includes all non-uCode bits, including css_header, rsa
+	 * key, modulus key and exponent data.
+	 */
+	u32 header_size_dw;
+	u32 header_version;
+	u32 module_id;
+	u32 module_vendor;
+	u32 date;
+#define CSS_DATE_DAY			(0xFF << 0)
+#define CSS_DATE_MONTH			(0xFF << 8)
+#define CSS_DATE_YEAR			(0xFFFF << 16)
+	u32 size_dw; /* uCode plus header_size_dw */
+	u32 key_size_dw;
+	u32 modulus_size_dw;
+	u32 exponent_size_dw;
+	u32 time;
+#define CSS_TIME_HOUR			(0xFF << 0)
+#define CSS_DATE_MIN			(0xFF << 8)
+#define CSS_DATE_SEC			(0xFFFF << 16)
+	char username[8];
+	char buildnumber[12];
+	u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR		(0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR		(0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH		(0xFF << 0)
+	u32 reserved0[13];
+	union {
+		u32 private_data_size; /* only applies to GuC */
+		u32 reserved1;
+	};
+	u32 header_info;
+} __packed;
+static_assert(sizeof(struct uc_css_header) == 128);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
new file mode 100644
index 0000000000000..1cfd30a655df1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_TYPES_H_
+#define _XE_UC_FW_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/*
+ * +------------+---------------------------------------------------+
+ * |   PHASE    |           FIRMWARE STATUS TRANSITIONS             |
+ * +============+===================================================+
+ * |            |               UNINITIALIZED                       |
+ * +------------+-               /   |   \                         -+
+ * |            |   DISABLED <--/    |    \--> NOT_SUPPORTED        |
+ * | init_early |                    V                              |
+ * |            |                 SELECTED                          |
+ * +------------+-               /   |   \                         -+
+ * |            |    MISSING <--/    |    \--> ERROR                |
+ * |   fetch    |                    V                              |
+ * |            |                 AVAILABLE                         |
+ * +------------+-                   |   \                         -+
+ * |            |                    |    \--> INIT FAIL            |
+ * |   init     |                    V                              |
+ * |            |        /------> LOADABLE <----<-----------\       |
+ * +------------+-       \         /    \        \           \     -+
+ * |            |    LOAD FAIL <--<      \--> TRANSFERRED     \     |
+ * |   upload   |                  \           /   \          /     |
+ * |            |                   \---------/     \--> RUNNING    |
+ * +------------+---------------------------------------------------+
+ */
+
+/*
+ * FIXME: Ported from the i915 and this is state machine is way too complicated.
+ * Circle back and simplify this.
+ */
+enum xe_uc_fw_status {
+	XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+	XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
+	XE_UC_FIRMWARE_DISABLED, /* disabled */
+	XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+	XE_UC_FIRMWARE_MISSING, /* blob not found on the system */
+	XE_UC_FIRMWARE_ERROR, /* invalid format or version */
+	XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
+	XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
+	XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
+	XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+	XE_UC_FIRMWARE_RUNNING /* init/auth done */
+};
+
+enum xe_uc_fw_type {
+	XE_UC_FW_TYPE_GUC = 0,
+	XE_UC_FW_TYPE_HUC
+};
+#define XE_UC_FW_NUM_TYPES 2
+
+/**
+ * struct xe_uc_fw - XE micro controller firmware
+ */
+struct xe_uc_fw {
+	/** @type: type uC firmware */
+	enum xe_uc_fw_type type;
+	union {
+		/** @status: firmware load status */
+		const enum xe_uc_fw_status status;
+		/**
+		 * @__status: private firmware load status - only to be used
+		 * by firmware laoding code
+		 */
+		enum xe_uc_fw_status __status;
+	};
+	/** @path: path to uC firmware */
+	const char *path;
+	/** @user_overridden: user provided path to uC firmware via modparam */
+	bool user_overridden;
+	/** @size: size of uC firmware including css header */
+	size_t size;
+
+	/** @bo: XE BO for uC firmware */
+	struct xe_bo *bo;
+
+	/*
+	 * The firmware build process will generate a version header file with
+	 * major and minor version defined. The versions are built into CSS
+	 * header of firmware. The xe kernel driver set the minimal firmware
+	 * version required per platform.
+	 */
+
+	/** @major_ver_wanted: major firmware version wanted by platform */
+	u16 major_ver_wanted;
+	/** @minor_ver_wanted: minor firmware version wanted by platform */
+	u16 minor_ver_wanted;
+	/** @major_ver_found: major version found in firmware blob */
+	u16 major_ver_found;
+	/** @minor_ver_found: major version found in firmware blob */
+	u16 minor_ver_found;
+
+	/** @rsa_size: RSA size */
+	u32 rsa_size;
+	/** @ucode_size: micro kernel size */
+	u32 ucode_size;
+
+	/** @private_data_size: size of private data found in uC css header */
+	u32 private_data_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
new file mode 100644
index 0000000000000..49bef6498b85e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_TYPES_H_
+#define _XE_UC_TYPES_H_
+
+#include "xe_guc_types.h"
+#include "xe_huc_types.h"
+#include "xe_wopcm_types.h"
+
+/**
+ * struct xe_uc - XE micro controllers
+ */
+struct xe_uc {
+	/** @guc: Graphics micro controller */
+	struct xe_guc guc;
+	/** @huc: HuC */
+	struct xe_huc huc;
+	/** @wopcm: WOPCM */
+	struct xe_wopcm wopcm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
new file mode 100644
index 0000000000000..d47a8617c5b67
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -0,0 +1,3407 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_sync.h"
+
+#define TEST_VM_ASYNC_OPS_ERROR
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @vma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_vma *vma)
+{
+	return mmu_interval_check_retry(&vma->userptr.notifier,
+					vma->userptr.notifier_seq) ?
+		-EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma)
+{
+	struct xe_vm *vm = vma->vm;
+	struct xe_device *xe = vm->xe;
+	const unsigned long num_pages =
+		(vma->end - vma->start + 1) >> PAGE_SHIFT;
+	struct page **pages;
+	bool in_kthread = !current->mm;
+	unsigned long notifier_seq;
+	int pinned, ret, i;
+	bool read_only = vma->pte_flags & PTE_READ_ONLY;
+
+	lockdep_assert_held(&vm->lock);
+	XE_BUG_ON(!xe_vma_is_userptr(vma));
+retry:
+	if (vma->destroyed)
+		return 0;
+
+	notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+	if (notifier_seq == vma->userptr.notifier_seq)
+		return 0;
+
+	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (vma->userptr.sg) {
+		dma_unmap_sgtable(xe->drm.dev,
+				  vma->userptr.sg,
+				  read_only ? DMA_TO_DEVICE :
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(vma->userptr.sg);
+		vma->userptr.sg = NULL;
+	}
+
+	pinned = ret = 0;
+	if (in_kthread) {
+		if (!mmget_not_zero(vma->userptr.notifier.mm)) {
+			ret = -EFAULT;
+			goto mm_closed;
+		}
+		kthread_use_mm(vma->userptr.notifier.mm);
+	}
+
+	while (pinned < num_pages) {
+		ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
+					  num_pages - pinned,
+					  read_only ? 0 : FOLL_WRITE,
+					  &pages[pinned]);
+		if (ret < 0) {
+			if (in_kthread)
+				ret = 0;
+			break;
+		}
+
+		pinned += ret;
+		ret = 0;
+	}
+
+	if (in_kthread) {
+		kthread_unuse_mm(vma->userptr.notifier.mm);
+		mmput(vma->userptr.notifier.mm);
+	}
+mm_closed:
+	if (ret)
+		goto out;
+
+	ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned,
+					0, (u64)pinned << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret) {
+		vma->userptr.sg = NULL;
+		goto out;
+	}
+	vma->userptr.sg = &vma->userptr.sgt;
+
+	ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
+			      read_only ? DMA_TO_DEVICE :
+			      DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC |
+			      DMA_ATTR_NO_KERNEL_MAPPING);
+	if (ret) {
+		sg_free_table(vma->userptr.sg);
+		vma->userptr.sg = NULL;
+		goto out;
+	}
+
+	for (i = 0; i < pinned; ++i) {
+		if (!read_only) {
+			lock_page(pages[i]);
+			set_page_dirty(pages[i]);
+			unlock_page(pages[i]);
+		}
+
+		mark_page_accessed(pages[i]);
+	}
+
+out:
+	release_pages(pages, pinned);
+	kvfree(pages);
+
+	if (!(ret < 0)) {
+		vma->userptr.notifier_seq = notifier_seq;
+		if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
+			goto retry;
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		if (!e->compute.pfence || (e->compute.pfence &&
+		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &e->compute.pfence->flags))) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+	struct list_head *link, *next;
+
+	list_for_each_safe(link, next, list)
+		xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+				unsigned int *count)
+{
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (*count >= vm->preempt.num_engines)
+		return 0;
+
+	for (; *count < vm->preempt.num_engines; ++(*count)) {
+		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+		if (IS_ERR(pfence))
+			return PTR_ERR(pfence);
+
+		list_move_tail(xe_preempt_fence_link(pfence), list);
+	}
+
+	return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		if (e->compute.pfence) {
+			long timeout = dma_fence_wait(e->compute.pfence, false);
+
+			if (timeout < 0)
+				return -ETIME;
+			dma_fence_put(e->compute.pfence);
+			e->compute.pfence = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+	struct list_head *link;
+	struct xe_engine *e;
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		struct dma_fence *fence;
+
+		link = list->next;
+		XE_BUG_ON(link == list);
+
+		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+					     e, e->compute.context,
+					     ++e->compute.seqno);
+		dma_fence_put(e->compute.pfence);
+		e->compute.pfence = fence;
+	}
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_engine *e;
+	struct ww_acquire_ctx ww;
+	int err;
+
+	err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
+	if (err)
+		return err;
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link)
+		if (e->compute.pfence) {
+			dma_resv_add_fence(bo->ttm.base.resv,
+					   e->compute.pfence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		}
+
+	xe_bo_unlock(bo, &ww);
+	return 0;
+}
+
+/**
+ * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
+ * @vm: The vm.
+ * @fence: The fence to add.
+ * @usage: The resv usage for the fence.
+ *
+ * Loops over all of the vm's external object bindings and adds a @fence
+ * with the given @usage to all of the external object's reservation
+ * objects.
+ */
+void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
+			     enum dma_resv_usage usage)
+{
+	struct xe_vma *vma;
+
+	list_for_each_entry(vma, &vm->extobj.list, extobj.link)
+		dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		e->ops->resume(e);
+
+		dma_resv_add_fence(&vm->resv, e->compute.pfence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+		xe_vm_fence_all_extobjs(vm, e->compute.pfence,
+					DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
+{
+	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+	struct ttm_validate_buffer *tv;
+	struct ww_acquire_ctx ww;
+	struct list_head objs;
+	struct dma_fence *pfence;
+	int err;
+	bool wait;
+
+	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+
+	down_write(&vm->lock);
+
+	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
+	if (err)
+		goto out_unlock_outer;
+
+	pfence = xe_preempt_fence_create(e, e->compute.context,
+					 ++e->compute.seqno);
+	if (!pfence) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	list_add(&e->compute.link, &vm->preempt.engines);
+	++vm->preempt.num_engines;
+	e->compute.pfence = pfence;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	dma_resv_add_fence(&vm->resv, pfence,
+			   DMA_RESV_USAGE_BOOKKEEP);
+
+	xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
+
+	/*
+	 * Check to see if a preemption on VM is in flight or userptr
+	 * invalidation, if so trigger this preempt fence to sync state with
+	 * other preempt fences on the VM.
+	 */
+	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+	if (wait)
+		dma_fence_enable_sw_signaling(pfence);
+
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+out_unlock_outer:
+	up_write(&vm->lock);
+
+	return err;
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	return (list_empty(&vm->userptr.repin_list) &&
+		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+/**
+ * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
+ * objects of the vm's external buffer objects.
+ * @vm: The vm.
+ * @ww: Pointer to a struct ww_acquire_ctx locking context.
+ * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
+ * ttm_validate_buffers used for locking.
+ * @tv: Pointer to a pointer that on output contains the actual storage used.
+ * @objs: List head for the buffer objects locked.
+ * @intr: Whether to lock interruptible.
+ * @num_shared: Number of dma-fence slots to reserve in the locked objects.
+ *
+ * Locks the vm dma-resv objects and all the dma-resv objects of the
+ * buffer objects on the vm external object list. The TTM utilities require
+ * a list of struct ttm_validate_buffers pointing to the actual buffer
+ * objects to lock. Storage for those struct ttm_validate_buffers should
+ * be provided in @tv_onstack, and is typically reserved on the stack
+ * of the caller. If the size of @tv_onstack isn't sufficient, then
+ * storage will be allocated internally using kvmalloc().
+ *
+ * The function performs deadlock handling internally, and after a
+ * successful return the ww locking transaction should be considered
+ * sealed.
+ *
+ * Return: 0 on success, Negative error code on error. In particular if
+ * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
+ * of error, any locking performed has been reverted.
+ */
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+			struct ttm_validate_buffer *tv_onstack,
+			struct ttm_validate_buffer **tv,
+			struct list_head *objs,
+			bool intr,
+			unsigned int num_shared)
+{
+	struct ttm_validate_buffer *tv_vm, *tv_bo;
+	struct xe_vma *vma, *next;
+	LIST_HEAD(dups);
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (vm->extobj.entries < XE_ONSTACK_TV) {
+		tv_vm = tv_onstack;
+	} else {
+		tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
+				       GFP_KERNEL);
+		if (!tv_vm)
+			return -ENOMEM;
+	}
+	tv_bo = tv_vm + 1;
+
+	INIT_LIST_HEAD(objs);
+	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
+		tv_bo->num_shared = num_shared;
+		tv_bo->bo = &vma->bo->ttm;
+
+		list_add_tail(&tv_bo->head, objs);
+		tv_bo++;
+	}
+	tv_vm->num_shared = num_shared;
+	tv_vm->bo = xe_vm_ttm_bo(vm);
+	list_add_tail(&tv_vm->head, objs);
+	err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
+	if (err)
+		goto out_err;
+
+	spin_lock(&vm->notifier.list_lock);
+	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
+				 notifier.rebind_link) {
+		xe_bo_assert_held(vma->bo);
+
+		list_del_init(&vma->notifier.rebind_link);
+		if (vma->gt_present && !vma->destroyed)
+			list_move_tail(&vma->rebind_link, &vm->rebind_list);
+	}
+	spin_unlock(&vm->notifier.list_lock);
+
+	*tv = tv_vm;
+	return 0;
+
+out_err:
+	if (tv_vm != tv_onstack)
+		kvfree(tv_vm);
+
+	return err;
+}
+
+/**
+ * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
+ * xe_vm_lock_dma_resv()
+ * @vm: The vm.
+ * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
+ * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
+ * @ww: The ww_acquire_context used for locking.
+ * @objs: The list returned from xe_vm_lock_dma_resv().
+ *
+ * Unlocks the reservation objects and frees any memory allocated by
+ * xe_vm_lock_dma_resv().
+ */
+void xe_vm_unlock_dma_resv(struct xe_vm *vm,
+			   struct ttm_validate_buffer *tv_onstack,
+			   struct ttm_validate_buffer *tv,
+			   struct ww_acquire_ctx *ww,
+			   struct list_head *objs)
+{
+	/*
+	 * Nothing should've been able to enter the list while we were locked,
+	 * since we've held the dma-resvs of all the vm's external objects,
+	 * and holding the dma_resv of an object is required for list
+	 * addition, and we shouldn't add ourselves.
+	 */
+	XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
+
+	ttm_eu_backoff_reservation(ww, objs);
+	if (tv && tv != tv_onstack)
+		kvfree(tv);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+	struct xe_vma *vma;
+	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+	struct ttm_validate_buffer *tv;
+	struct ww_acquire_ctx ww;
+	struct list_head objs;
+	struct dma_fence *rebind_fence;
+	unsigned int fence_count = 0;
+	LIST_HEAD(preempt_fences);
+	int err;
+	long wait;
+	int __maybe_unused tries = 0;
+
+	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+	trace_xe_vm_rebind_worker_enter(vm);
+
+	if (xe_vm_is_closed(vm)) {
+		trace_xe_vm_rebind_worker_exit(vm);
+		return;
+	}
+
+	down_write(&vm->lock);
+
+retry:
+	if (vm->async_ops.error)
+		goto out_unlock_outer;
+
+	/*
+	 * Extreme corner where we exit a VM error state with a munmap style VM
+	 * unbind inflight which requires a rebind. In this case the rebind
+	 * needs to install some fences into the dma-resv slots. The worker to
+	 * do this queued, let that worker make progress by dropping vm->lock
+	 * and trying this again.
+	 */
+	if (vm->async_ops.munmap_rebind_inflight) {
+		up_write(&vm->lock);
+		flush_work(&vm->async_ops.work);
+		goto retry;
+	}
+
+	if (xe_vm_userptr_check_repin(vm)) {
+		err = xe_vm_userptr_pin(vm);
+		if (err)
+			goto out_unlock_outer;
+	}
+
+	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
+				  false, vm->preempt.num_engines);
+	if (err)
+		goto out_unlock_outer;
+
+	/* Fresh preempt fences already installed. Everyting is running. */
+	if (!preempt_fences_waiting(vm))
+		goto out_unlock;
+
+	/*
+	 * This makes sure vm is completely suspended and also balances
+	 * xe_engine suspend- and resume; we resume *all* vm engines below.
+	 */
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		goto out_unlock;
+
+	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+	if (err)
+		goto out_unlock;
+
+	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+		if (xe_vma_is_userptr(vma) || vma->destroyed)
+			continue;
+
+		err = xe_bo_validate(vma->bo, vm, false);
+		if (err)
+			goto out_unlock;
+	}
+
+	rebind_fence = xe_vm_rebind(vm, true);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto out_unlock;
+	}
+
+	if (rebind_fence) {
+		dma_fence_wait(rebind_fence, false);
+		dma_fence_put(rebind_fence);
+	}
+
+	/* Wait on munmap style VM unbinds */
+	wait = dma_resv_wait_timeout(&vm->resv,
+				     DMA_RESV_USAGE_KERNEL,
+				     false, MAX_SCHEDULE_TIMEOUT);
+	if (wait <= 0) {
+		err = -ETIME;
+		goto out_unlock;
+	}
+
+#define retry_required(__tries, __vm) \
+	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+	__xe_vm_userptr_needs_repin(__vm))
+
+	down_read(&vm->userptr.notifier_lock);
+	if (retry_required(tries, vm)) {
+		up_read(&vm->userptr.notifier_lock);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+#undef retry_required
+
+	/* Point of no return. */
+	arm_preempt_fences(vm, &preempt_fences);
+	resume_and_reinstall_preempt_fences(vm);
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+out_unlock_outer:
+	if (err == -EAGAIN) {
+		trace_xe_vm_rebind_worker_retry(vm);
+		goto retry;
+	}
+	up_write(&vm->lock);
+
+	free_preempt_fences(&preempt_fences);
+
+	XE_WARN_ON(err < 0);	/* TODO: Kill VM or put in error state */
+	trace_xe_vm_rebind_worker_exit(vm);
+}
+
+struct async_op_fence;
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_engine *e, struct xe_sync_entry *syncs,
+			u32 num_syncs, struct async_op_fence *afence);
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+				   const struct mmu_notifier_range *range,
+				   unsigned long cur_seq)
+{
+	struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
+	struct xe_vm *vm = vma->vm;
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long err;
+
+	XE_BUG_ON(!xe_vma_is_userptr(vma));
+	trace_xe_vma_userptr_invalidate(vma);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	down_write(&vm->userptr.notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	/* No need to stop gpu access if the userptr is not yet bound. */
+	if (!vma->userptr.initial_bind) {
+		up_write(&vm->userptr.notifier_lock);
+		return true;
+	}
+
+	/*
+	 * Tell exec and rebind worker they need to repin and rebind this
+	 * userptr.
+	 */
+	if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&vma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	}
+
+	up_write(&vm->userptr.notifier_lock);
+
+	/*
+	 * Preempt fences turn into schedule disables, pipeline these.
+	 * Note that even in fault mode, we need to wait for binds and
+	 * unbinds to complete, and those are attached as BOOKMARK fences
+	 * to the vm.
+	 */
+	dma_resv_iter_begin(&cursor, &vm->resv,
+			    DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		dma_fence_enable_sw_signaling(fence);
+	dma_resv_iter_end(&cursor);
+
+	err = dma_resv_wait_timeout(&vm->resv,
+				    DMA_RESV_USAGE_BOOKKEEP,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	XE_WARN_ON(err <= 0);
+
+	if (xe_vm_in_fault_mode(vm)) {
+		err = xe_vm_invalidate_vma(vma);
+		XE_WARN_ON(err);
+	}
+
+	trace_xe_vma_userptr_invalidate_complete(vma);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+	.invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+	struct xe_vma *vma, *next;
+	int err = 0;
+	LIST_HEAD(tmp_evict);
+
+	lockdep_assert_held_write(&vm->lock);
+
+	/* Collect invalidated userptrs */
+	spin_lock(&vm->userptr.invalidated_lock);
+	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
+				 userptr.invalidate_link) {
+		list_del_init(&vma->userptr.invalidate_link);
+		list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
+	}
+	spin_unlock(&vm->userptr.invalidated_lock);
+
+	/* Pin and move to temporary list */
+	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err < 0)
+			goto out_err;
+
+		list_move_tail(&vma->userptr_link, &tmp_evict);
+	}
+
+	/* Take lock and move to rebind_list for rebinding. */
+	err = dma_resv_lock_interruptible(&vm->resv, NULL);
+	if (err)
+		goto out_err;
+
+	list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
+		list_del_init(&vma->userptr_link);
+		list_move_tail(&vma->rebind_link, &vm->rebind_list);
+	}
+
+	dma_resv_unlock(&vm->resv);
+
+	return 0;
+
+out_err:
+	list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
+
+	return err;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+	return (list_empty_careful(&vm->userptr.repin_list) &&
+		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+	       struct xe_sync_entry *syncs, u32 num_syncs);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma *vma, *next;
+
+	lockdep_assert_held(&vm->lock);
+	if (xe_vm_no_dma_fences(vm) && !rebind_worker)
+		return NULL;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
+		XE_WARN_ON(!vma->gt_present);
+
+		list_del_init(&vma->rebind_link);
+		dma_fence_put(fence);
+		if (rebind_worker)
+			trace_xe_vma_rebind_worker(vma);
+		else
+			trace_xe_vma_rebind_exec(vma);
+		fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
+		if (IS_ERR(fence))
+			return fence;
+	}
+
+	return fence;
+}
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+				    struct xe_bo *bo,
+				    u64 bo_offset_or_userptr,
+				    u64 start, u64 end,
+				    bool read_only,
+				    u64 gt_mask)
+{
+	struct xe_vma *vma;
+	struct xe_gt *gt;
+	u8 id;
+
+	XE_BUG_ON(start >= end);
+	XE_BUG_ON(end >= vm->size);
+
+	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (!vma) {
+		vma = ERR_PTR(-ENOMEM);
+		return vma;
+	}
+
+	INIT_LIST_HEAD(&vma->rebind_link);
+	INIT_LIST_HEAD(&vma->unbind_link);
+	INIT_LIST_HEAD(&vma->userptr_link);
+	INIT_LIST_HEAD(&vma->userptr.invalidate_link);
+	INIT_LIST_HEAD(&vma->notifier.rebind_link);
+	INIT_LIST_HEAD(&vma->extobj.link);
+
+	vma->vm = vm;
+	vma->start = start;
+	vma->end = end;
+	if (read_only)
+		vma->pte_flags = PTE_READ_ONLY;
+
+	if (gt_mask) {
+		vma->gt_mask = gt_mask;
+	} else {
+		for_each_gt(gt, vm->xe, id)
+			if (!xe_gt_is_media_type(gt))
+				vma->gt_mask |= 0x1 << id;
+	}
+
+	if (vm->xe->info.platform == XE_PVC)
+		vma->use_atomic_access_pte_bit = true;
+
+	if (bo) {
+		xe_bo_assert_held(bo);
+		vma->bo_offset = bo_offset_or_userptr;
+		vma->bo = xe_bo_get(bo);
+		list_add_tail(&vma->bo_link, &bo->vmas);
+	} else /* userptr */ {
+		u64 size = end - start + 1;
+		int err;
+
+		vma->userptr.ptr = bo_offset_or_userptr;
+
+		err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+						   current->mm,
+						   vma->userptr.ptr, size,
+						   &vma_userptr_notifier_ops);
+		if (err) {
+			kfree(vma);
+			vma = ERR_PTR(err);
+			return vma;
+		}
+
+		vma->userptr.notifier_seq = LONG_MAX;
+		xe_vm_get(vm);
+	}
+
+	return vma;
+}
+
+static bool vm_remove_extobj(struct xe_vma *vma)
+{
+	if (!list_empty(&vma->extobj.link)) {
+		vma->vm->extobj.entries--;
+		list_del_init(&vma->extobj.link);
+		return true;
+	}
+	return false;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+	struct xe_vm *vm = vma->vm;
+	struct xe_device *xe = vm->xe;
+	bool read_only = vma->pte_flags & PTE_READ_ONLY;
+
+	if (xe_vma_is_userptr(vma)) {
+		if (vma->userptr.sg) {
+			dma_unmap_sgtable(xe->drm.dev,
+					  vma->userptr.sg,
+					  read_only ? DMA_TO_DEVICE :
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(vma->userptr.sg);
+			vma->userptr.sg = NULL;
+		}
+
+		/*
+		 * Since userptr pages are not pinned, we can't remove
+		 * the notifer until we're sure the GPU is not accessing
+		 * them anymore
+		 */
+		mmu_interval_notifier_remove(&vma->userptr.notifier);
+		xe_vm_put(vm);
+	} else {
+		xe_bo_put(vma->bo);
+	}
+
+	kfree(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vma *vma =
+		container_of(w, struct xe_vma, destroy_work);
+
+	xe_vma_destroy_late(vma);
+}
+
+static struct xe_vma *
+bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
+			    struct xe_vma *ignore)
+{
+	struct xe_vma *vma;
+
+	list_for_each_entry(vma, &bo->vmas, bo_link) {
+		if (vma != ignore && vma->vm == vm && !vma->destroyed)
+			return vma;
+	}
+
+	return NULL;
+}
+
+static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
+				 struct xe_vma *ignore)
+{
+	struct ww_acquire_ctx ww;
+	bool ret;
+
+	xe_bo_lock(bo, &ww, 0, false);
+	ret = !!bo_has_vm_references_locked(bo, vm, ignore);
+	xe_bo_unlock(bo, &ww);
+
+	return ret;
+}
+
+static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
+{
+	list_add(&vma->extobj.link, &vm->extobj.list);
+	vm->extobj.entries++;
+}
+
+static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
+{
+	struct xe_bo *bo = vma->bo;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (bo_has_vm_references(bo, vm, vma))
+		return;
+
+	__vm_insert_extobj(vm, vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+			   struct dma_fence_cb *cb)
+{
+	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+	queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+	struct xe_vm *vm = vma->vm;
+
+	lockdep_assert_held_write(&vm->lock);
+	XE_BUG_ON(!list_empty(&vma->unbind_link));
+
+	if (xe_vma_is_userptr(vma)) {
+		XE_WARN_ON(!vma->destroyed);
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del_init(&vma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+		list_del(&vma->userptr_link);
+	} else {
+		xe_bo_assert_held(vma->bo);
+		list_del(&vma->bo_link);
+
+		spin_lock(&vm->notifier.list_lock);
+		list_del(&vma->notifier.rebind_link);
+		spin_unlock(&vm->notifier.list_lock);
+
+		if (!vma->bo->vm && vm_remove_extobj(vma)) {
+			struct xe_vma *other;
+
+			other = bo_has_vm_references_locked(vma->bo, vm, NULL);
+
+			if (other)
+				__vm_insert_extobj(vm, other);
+		}
+	}
+
+	xe_vm_assert_held(vm);
+	if (!list_empty(&vma->rebind_link))
+		list_del(&vma->rebind_link);
+
+	if (fence) {
+		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+						 vma_destroy_cb);
+
+		if (ret) {
+			XE_WARN_ON(ret != -ENOENT);
+			xe_vma_destroy_late(vma);
+		}
+	} else {
+		xe_vma_destroy_late(vma);
+	}
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+	struct ttm_validate_buffer tv[2];
+	struct ww_acquire_ctx ww;
+	struct xe_bo *bo = vma->bo;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	int err;
+
+	memset(tv, 0, sizeof(tv));
+	tv[0].bo = xe_vm_ttm_bo(vma->vm);
+	list_add(&tv[0].head, &objs);
+
+	if (bo) {
+		tv[1].bo = &xe_bo_get(bo)->ttm;
+		list_add(&tv[1].head, &objs);
+	}
+	err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+	XE_WARN_ON(err);
+
+	xe_vma_destroy(vma, NULL);
+
+	ttm_eu_backoff_reservation(&ww, &objs);
+	if (bo)
+		xe_bo_put(bo);
+}
+
+static struct xe_vma *to_xe_vma(const struct rb_node *node)
+{
+	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
+	return (struct xe_vma *)node;
+}
+
+static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
+{
+	if (a->end < b->start) {
+		return -1;
+	} else if (b->end < a->start) {
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
+{
+	return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
+}
+
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
+{
+	struct xe_vma *cmp = to_xe_vma(node);
+	const struct xe_vma *own = key;
+
+	if (own->start > cmp->end)
+		return 1;
+
+	if (own->end < cmp->start)
+		return -1;
+
+	return 0;
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
+{
+	struct rb_node *node;
+
+	if (xe_vm_is_closed(vm))
+		return NULL;
+
+	XE_BUG_ON(vma->end >= vm->size);
+	lockdep_assert_held(&vm->lock);
+
+	node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
+
+	return node ? to_xe_vma(node) : NULL;
+}
+
+static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	XE_BUG_ON(vma->vm != vm);
+	lockdep_assert_held(&vm->lock);
+
+	rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	XE_BUG_ON(vma->vm != vm);
+	lockdep_assert_held(&vm->lock);
+
+	rb_erase(&vma->vm_node, &vm->vmas);
+	if (vm->usm.last_fault_vma == vma)
+		vm->usm.last_fault_vma = NULL;
+}
+
+static void async_op_work_func(struct work_struct *w);
+static void vm_destroy_work_func(struct work_struct *w);
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+	struct xe_vm *vm;
+	int err, i = 0, number_gts = 0;
+	struct xe_gt *gt;
+	u8 id;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	vm->xe = xe;
+	kref_init(&vm->refcount);
+	dma_resv_init(&vm->resv);
+
+	vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
+
+	vm->vmas = RB_ROOT;
+	vm->flags = flags;
+
+	init_rwsem(&vm->lock);
+
+	INIT_LIST_HEAD(&vm->rebind_list);
+
+	INIT_LIST_HEAD(&vm->userptr.repin_list);
+	INIT_LIST_HEAD(&vm->userptr.invalidated);
+	init_rwsem(&vm->userptr.notifier_lock);
+	spin_lock_init(&vm->userptr.invalidated_lock);
+
+	INIT_LIST_HEAD(&vm->notifier.rebind_list);
+	spin_lock_init(&vm->notifier.list_lock);
+
+	INIT_LIST_HEAD(&vm->async_ops.pending);
+	INIT_WORK(&vm->async_ops.work, async_op_work_func);
+	spin_lock_init(&vm->async_ops.lock);
+
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+	INIT_LIST_HEAD(&vm->preempt.engines);
+	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+
+	INIT_LIST_HEAD(&vm->extobj.list);
+
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		/* We need to immeditatelly exit from any D3 state */
+		xe_pm_runtime_get(xe);
+		xe_device_mem_access_get(xe);
+	}
+
+	err = dma_resv_lock_interruptible(&vm->resv, NULL);
+	if (err)
+		goto err_put;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		vm->flags |= XE_VM_FLAGS_64K;
+
+	for_each_gt(gt, xe, id) {
+		if (xe_gt_is_media_type(gt))
+			continue;
+
+		if (flags & XE_VM_FLAG_MIGRATION &&
+		    gt->info.id != XE_VM_FLAG_GT_ID(flags))
+			continue;
+
+		vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level);
+		if (IS_ERR(vm->pt_root[id])) {
+			err = PTR_ERR(vm->pt_root[id]);
+			vm->pt_root[id] = NULL;
+			goto err_destroy_root;
+		}
+	}
+
+	if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
+		for_each_gt(gt, xe, id) {
+			if (!vm->pt_root[id])
+				continue;
+
+			err = xe_pt_create_scratch(xe, gt, vm);
+			if (err)
+				goto err_scratch_pt;
+		}
+	}
+
+	if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
+		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+		vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
+	}
+
+	if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
+		vm->async_ops.fence.context = dma_fence_context_alloc(1);
+		vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+	}
+
+	/* Fill pt_root after allocating scratch tables */
+	for_each_gt(gt, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		xe_pt_populate_empty(gt, vm, vm->pt_root[id]);
+	}
+	dma_resv_unlock(&vm->resv);
+
+	/* Kernel migration VM shouldn't have a circular loop.. */
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		for_each_gt(gt, xe, id) {
+			struct xe_vm *migrate_vm;
+			struct xe_engine *eng;
+
+			if (!vm->pt_root[id])
+				continue;
+
+			migrate_vm = xe_migrate_get_vm(gt->migrate);
+			eng = xe_engine_create_class(xe, gt, migrate_vm,
+						     XE_ENGINE_CLASS_COPY,
+						     ENGINE_FLAG_VM);
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(eng)) {
+				xe_vm_close_and_put(vm);
+				return ERR_CAST(eng);
+			}
+			vm->eng[id] = eng;
+			number_gts++;
+		}
+	}
+
+	if (number_gts > 1)
+		vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+	mutex_lock(&xe->usm.lock);
+	if (flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode++;
+	else if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode++;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_create(vm);
+
+	return vm;
+
+err_scratch_pt:
+	for_each_gt(gt, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		i = vm->pt_root[id]->level;
+		while (i)
+			if (vm->scratch_pt[id][--i])
+				xe_pt_destroy(vm->scratch_pt[id][i],
+					      vm->flags, NULL);
+		xe_bo_unpin(vm->scratch_bo[id]);
+		xe_bo_put(vm->scratch_bo[id]);
+	}
+err_destroy_root:
+	for_each_gt(gt, xe, id) {
+		if (vm->pt_root[id])
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+	}
+	dma_resv_unlock(&vm->resv);
+err_put:
+	dma_resv_fini(&vm->resv);
+	kfree(vm);
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
+	}
+	return ERR_PTR(err);
+}
+
+static void flush_async_ops(struct xe_vm *vm)
+{
+	queue_work(system_unbound_wq, &vm->async_ops.work);
+	flush_work(&vm->async_ops.work);
+}
+
+static void vm_error_capture(struct xe_vm *vm, int err,
+			     u32 op, u64 addr, u64 size)
+{
+	struct drm_xe_vm_bind_op_error_capture capture;
+	u64 __user *address =
+		u64_to_user_ptr(vm->async_ops.error_capture.addr);
+	bool in_kthread = !current->mm;
+
+	capture.error = err;
+	capture.op = op;
+	capture.addr = addr;
+	capture.size = size;
+
+	if (in_kthread) {
+		if (!mmget_not_zero(vm->async_ops.error_capture.mm))
+			goto mm_closed;
+		kthread_use_mm(vm->async_ops.error_capture.mm);
+	}
+
+	if (copy_to_user(address, &capture, sizeof(capture)))
+		XE_WARN_ON("Copy to user failed");
+
+	if (in_kthread) {
+		kthread_unuse_mm(vm->async_ops.error_capture.mm);
+		mmput(vm->async_ops.error_capture.mm);
+	}
+
+mm_closed:
+	wake_up_all(&vm->async_ops.error_capture.wq);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+	struct rb_root contested = RB_ROOT;
+	struct ww_acquire_ctx ww;
+	struct xe_device *xe = vm->xe;
+	struct xe_gt *gt;
+	u8 id;
+
+	XE_BUG_ON(vm->preempt.num_engines);
+
+	vm->size = 0;
+	smp_mb();
+	flush_async_ops(vm);
+	if (xe_vm_in_compute_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	for_each_gt(gt, xe, id) {
+		if (vm->eng[id]) {
+			xe_engine_kill(vm->eng[id]);
+			xe_engine_put(vm->eng[id]);
+			vm->eng[id] = NULL;
+		}
+	}
+
+	down_write(&vm->lock);
+	xe_vm_lock(vm, &ww, 0, false);
+	while (vm->vmas.rb_node) {
+		struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
+
+		if (xe_vma_is_userptr(vma)) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->destroyed = true;
+			up_read(&vm->userptr.notifier_lock);
+		}
+
+		rb_erase(&vma->vm_node, &vm->vmas);
+
+		/* easy case, remove from VMA? */
+		if (xe_vma_is_userptr(vma) || vma->bo->vm) {
+			xe_vma_destroy(vma, NULL);
+			continue;
+		}
+
+		rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
+	}
+
+	/*
+	 * All vm operations will add shared fences to resv.
+	 * The only exception is eviction for a shared object,
+	 * but even so, the unbind when evicted would still
+	 * install a fence to resv. Hence it's safe to
+	 * destroy the pagetables immediately.
+	 */
+	for_each_gt(gt, xe, id) {
+		if (vm->scratch_bo[id]) {
+			u32 i;
+
+			xe_bo_unpin(vm->scratch_bo[id]);
+			xe_bo_put(vm->scratch_bo[id]);
+			for (i = 0; i < vm->pt_root[id]->level; i++)
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
+					      NULL);
+		}
+	}
+	xe_vm_unlock(vm, &ww);
+
+	if (contested.rb_node) {
+
+		/*
+		 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+		 * Since we hold a refcount to the bo, we can remove and free
+		 * the members safely without locking.
+		 */
+		while (contested.rb_node) {
+			struct xe_vma *vma = to_xe_vma(contested.rb_node);
+
+			rb_erase(&vma->vm_node, &contested);
+			xe_vma_destroy_unlocked(vma);
+		}
+	}
+
+	if (vm->async_ops.error_capture.addr)
+		wake_up_all(&vm->async_ops.error_capture.wq);
+
+	XE_WARN_ON(!list_empty(&vm->extobj.list));
+	up_write(&vm->lock);
+
+	xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
+	struct ww_acquire_ctx ww;
+	struct xe_device *xe = vm->xe;
+	struct xe_gt *gt;
+	u8 id;
+	void *lookup;
+
+	/* xe_vm_close_and_put was not called? */
+	XE_WARN_ON(vm->size);
+
+	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
+
+		mutex_lock(&xe->usm.lock);
+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+		XE_WARN_ON(lookup != vm);
+		mutex_unlock(&xe->usm.lock);
+	}
+
+	/*
+	 * XXX: We delay destroying the PT root until the VM if freed as PT root
+	 * is needed for xe_vm_lock to work. If we remove that dependency this
+	 * can be moved to xe_vm_close_and_put.
+	 */
+	xe_vm_lock(vm, &ww, 0, false);
+	for_each_gt(gt, xe, id) {
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
+	}
+	xe_vm_unlock(vm, &ww);
+
+	mutex_lock(&xe->usm.lock);
+	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode--;
+	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode--;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_free(vm);
+	dma_fence_put(vm->rebind_fence);
+	dma_resv_fini(&vm->resv);
+	kfree(vm);
+
+}
+
+void xe_vm_free(struct kref *ref)
+{
+	struct xe_vm *vm = container_of(ref, struct xe_vm, refcount);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, id);
+	mutex_unlock(&xef->vm.lock);
+
+	if (vm)
+		xe_vm_get(vm);
+
+	return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt)
+{
+	XE_BUG_ON(xe_gt_is_media_type(full_gt));
+
+	return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0,
+			       XE_CACHE_WB);
+}
+
+static struct dma_fence *
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
+		 struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_gt *gt;
+	struct dma_fence *fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = vma->vm;
+	int cur_fence = 0, i;
+	int number_gts = hweight_long(vma->gt_present);
+	int err;
+	u8 id;
+
+	trace_xe_vma_unbind(vma);
+
+	if (number_gts > 1) {
+		fences = kmalloc_array(number_gts, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_gt(gt, vm->xe, id) {
+		if (!(vma->gt_present & BIT(id)))
+			goto next;
+
+		XE_BUG_ON(xe_gt_is_media_type(gt));
+
+		fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
+			e = list_next_entry(e, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_gts, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+
+	return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence) {
+			/* FIXME: Rewind the previous binds? */
+			dma_fence_put(fences[--cur_fence]);
+		}
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+	       struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_gt *gt;
+	struct dma_fence *fence;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = vma->vm;
+	int cur_fence = 0, i;
+	int number_gts = hweight_long(vma->gt_mask);
+	int err;
+	u8 id;
+
+	trace_xe_vma_bind(vma);
+
+	if (number_gts > 1) {
+		fences = kmalloc_array(number_gts, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_gt(gt, vm->xe, id) {
+		if (!(vma->gt_mask & BIT(id)))
+			goto next;
+
+		XE_BUG_ON(xe_gt_is_media_type(gt));
+		fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs,
+					 vma->gt_present & BIT(id));
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
+			e = list_next_entry(e, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_gts, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+
+	return cf ? &cf->base : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence) {
+			/* FIXME: Rewind the previous binds? */
+			dma_fence_put(fences[--cur_fence]);
+		}
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+struct async_op_fence {
+	struct dma_fence fence;
+	struct dma_fence_cb cb;
+	struct xe_vm *vm;
+	wait_queue_head_t wq;
+	bool started;
+};
+
+static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	return "xe";
+}
+
+static const char *
+async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	return "async_op_fence";
+}
+
+static const struct dma_fence_ops async_op_fence_ops = {
+	.get_driver_name = async_op_fence_get_driver_name,
+	.get_timeline_name = async_op_fence_get_timeline_name,
+};
+
+static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct async_op_fence *afence =
+		container_of(cb, struct async_op_fence, cb);
+
+	dma_fence_signal(&afence->fence);
+	xe_vm_put(afence->vm);
+	dma_fence_put(&afence->fence);
+}
+
+static void add_async_op_fence_cb(struct xe_vm *vm,
+				  struct dma_fence *fence,
+				  struct async_op_fence *afence)
+{
+	int ret;
+
+	if (!xe_vm_no_dma_fences(vm)) {
+		afence->started = true;
+		smp_wmb();
+		wake_up_all(&afence->wq);
+	}
+
+	afence->vm = xe_vm_get(vm);
+	dma_fence_get(&afence->fence);
+	ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
+	if (ret == -ENOENT)
+		dma_fence_signal(&afence->fence);
+	if (ret) {
+		xe_vm_put(vm);
+		dma_fence_put(&afence->fence);
+	}
+	XE_WARN_ON(ret && ret != -ENOENT);
+}
+
+int xe_vm_async_fence_wait_start(struct dma_fence *fence)
+{
+	if (fence->ops == &async_op_fence_ops) {
+		struct async_op_fence *afence =
+			container_of(fence, struct async_op_fence, fence);
+
+		XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
+
+		smp_rmb();
+		return wait_event_interruptible(afence->wq, afence->started);
+	}
+
+	return 0;
+}
+
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_engine *e, struct xe_sync_entry *syncs,
+			u32 num_syncs, struct async_op_fence *afence)
+{
+	struct dma_fence *fence;
+
+	xe_vm_assert_held(vm);
+
+	fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+	if (afence)
+		add_async_op_fence_cb(vm, fence, afence);
+
+	dma_fence_put(fence);
+	return 0;
+}
+
+static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
+		      struct xe_bo *bo, struct xe_sync_entry *syncs,
+		      u32 num_syncs, struct async_op_fence *afence)
+{
+	int err;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
+
+	if (bo) {
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
+}
+
+static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_engine *e, struct xe_sync_entry *syncs,
+			u32 num_syncs, struct async_op_fence *afence)
+{
+	struct dma_fence *fence;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(vma->bo);
+
+	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+	if (afence)
+		add_async_op_fence_cb(vm, fence, afence);
+
+	xe_vma_destroy(vma, fence);
+	dma_fence_put(fence);
+
+	return 0;
+}
+
+static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
+					u64 value)
+{
+	if (XE_IOCTL_ERR(xe, !value))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+		return -ENOTSUPP;
+
+	if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
+		return -ENOTSUPP;
+
+	vm->async_ops.error_capture.mm = current->mm;
+	vm->async_ops.error_capture.addr = value;
+	init_waitqueue_head(&vm->async_ops.error_capture.wq);
+
+	return 0;
+}
+
+typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
+				     u64 value);
+
+static const xe_vm_set_property_fn vm_set_property_funcs[] = {
+	[XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
+		vm_set_error_capture_address,
+};
+
+static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
+				    u64 extension)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_vm_set_property ext;
+	int err;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.property >=
+			 ARRAY_SIZE(vm_set_property_funcs)))
+		return -EINVAL;
+
+	return vm_set_property_funcs[ext.property](xe, vm, ext.value);
+}
+
+typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
+				       u64 extension);
+
+static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
+	[XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
+			      u64 extensions, int ext_number)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct xe_user_extension ext;
+	int err;
+
+	if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_ERR(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_ERR(xe, ext.name >=
+			 ARRAY_SIZE(vm_user_extension_funcs)))
+		return -EINVAL;
+
+	err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
+	if (XE_IOCTL_ERR(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return vm_user_extensions(xe, vm, ext.next_extension,
+					  ++ext_number);
+
+	return 0;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_COMPUTE_MODE | \
+				    DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
+				    DRM_XE_VM_CREATE_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_create *args = data;
+	struct xe_vm *vm;
+	u32 id, asid;
+	int err;
+	u32 flags = 0;
+
+	if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
+			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+			 xe_device_in_non_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
+			 xe_device_in_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+			 !xe->info.supports_usm))
+		return -EINVAL;
+
+	if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
+		flags |= XE_VM_FLAG_SCRATCH_PAGE;
+	if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
+		flags |= XE_VM_FLAG_COMPUTE_MODE;
+	if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
+		flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+	if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
+		flags |= XE_VM_FLAG_FAULT_MODE;
+
+	vm = xe_vm_create(xe, flags);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	if (args->extensions) {
+		err = vm_user_extensions(xe, vm, args->extensions, 0);
+		if (XE_IOCTL_ERR(xe, err)) {
+			xe_vm_close_and_put(vm);
+			return err;
+		}
+	}
+
+	mutex_lock(&xef->vm.lock);
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->vm.lock);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return err;
+	}
+
+	mutex_lock(&xe->usm.lock);
+	err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+			      XA_LIMIT(0, XE_MAX_ASID - 1),
+			      &xe->usm.next_asid, GFP_KERNEL);
+	mutex_unlock(&xe->usm.lock);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return err;
+	}
+	vm->usm.asid = asid;
+
+	args->vm_id = id;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+	/* Warning: Security issue - never enable by default */
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE);
+#endif
+
+	return 0;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_destroy *args = data;
+	struct xe_vm *vm;
+
+	if (XE_IOCTL_ERR(xe, args->pad))
+		return -EINVAL;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm))
+		return -ENOENT;
+	xe_vm_put(vm);
+
+	/* FIXME: Extend this check to non-compute mode VMs */
+	if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
+		return -EBUSY;
+
+	mutex_lock(&xef->vm.lock);
+	xa_erase(&xef->vm.xa, args->vm_id);
+	mutex_unlock(&xef->vm.lock);
+
+	xe_vm_close_and_put(vm);
+
+	return 0;
+}
+
+static const u32 region_to_mem_type[] = {
+	XE_PL_TT,
+	XE_PL_VRAM0,
+	XE_PL_VRAM1,
+};
+
+static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+			  struct xe_engine *e, u32 region,
+			  struct xe_sync_entry *syncs, u32 num_syncs,
+			  struct async_op_fence *afence)
+{
+	int err;
+
+	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
+
+	if (!xe_vma_is_userptr(vma)) {
+		err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
+		if (err)
+			return err;
+	}
+
+	if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) {
+		return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
+				  afence);
+	} else {
+		int i;
+
+		/* Nothing to do, signal fences now */
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     dma_fence_get_stub());
+		if (afence)
+			dma_fence_signal(&afence->fence);
+		return 0;
+	}
+}
+
+#define VM_BIND_OP(op)	(op & 0xffff)
+
+static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
+			   struct xe_engine *e, struct xe_bo *bo, u32 op,
+			   u32 region, struct xe_sync_entry *syncs,
+			   u32 num_syncs, struct async_op_fence *afence)
+{
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+		return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
+	case XE_VM_BIND_OP_UNMAP:
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
+	case XE_VM_BIND_OP_PREFETCH:
+		return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
+				      afence);
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		return -EINVAL;
+	}
+}
+
+struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
+{
+	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
+		XE_VM_FLAG_GT_ID(vm->flags) : 0;
+
+	/* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
+	return &vm->pt_root[idx]->bo->ttm;
+}
+
+static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
+{
+	tv->num_shared = 1;
+	tv->bo = xe_vm_ttm_bo(vm);
+}
+
+static bool is_map_op(u32 op)
+{
+	return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
+		VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
+}
+
+static bool is_unmap_op(u32 op)
+{
+	return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
+		VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
+}
+
+static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
+			 struct xe_engine *e, struct xe_bo *bo,
+			 struct drm_xe_vm_bind_op *bind_op,
+			 struct xe_sync_entry *syncs, u32 num_syncs,
+			 struct async_op_fence *afence)
+{
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	struct xe_bo *vbo;
+	int err, i;
+
+	lockdep_assert_held(&vm->lock);
+	XE_BUG_ON(!list_empty(&vma->unbind_link));
+
+	/* Binds deferred to faults, signal fences now */
+	if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
+	    !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     dma_fence_get_stub());
+		if (afence)
+			dma_fence_signal(&afence->fence);
+		return 0;
+	}
+
+	xe_vm_tv_populate(vm, &tv_vm);
+	list_add_tail(&tv_vm.head, &objs);
+	vbo = vma->bo;
+	if (vbo) {
+		/*
+		 * An unbind can drop the last reference to the BO and
+		 * the BO is needed for ttm_eu_backoff_reservation so
+		 * take a reference here.
+		 */
+		xe_bo_get(vbo);
+
+		tv_bo.bo = &vbo->ttm;
+		tv_bo.num_shared = 1;
+		list_add(&tv_bo.head, &objs);
+	}
+
+again:
+	err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
+	if (!err) {
+		err = __vm_bind_ioctl(vm, vma, e, bo,
+				      bind_op->op, bind_op->region, syncs,
+				      num_syncs, afence);
+		ttm_eu_backoff_reservation(&ww, &objs);
+		if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+			lockdep_assert_held_write(&vm->lock);
+			err = xe_vma_userptr_pin_pages(vma);
+			if (!err)
+				goto again;
+		}
+	}
+	xe_bo_put(vbo);
+
+	return err;
+}
+
+struct async_op {
+	struct xe_vma *vma;
+	struct xe_engine *engine;
+	struct xe_bo *bo;
+	struct drm_xe_vm_bind_op bind_op;
+	struct xe_sync_entry *syncs;
+	u32 num_syncs;
+	struct list_head link;
+	struct async_op_fence *fence;
+};
+
+static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
+{
+	while (op->num_syncs--)
+		xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
+	kfree(op->syncs);
+	xe_bo_put(op->bo);
+	if (op->engine)
+		xe_engine_put(op->engine);
+	xe_vm_put(vm);
+	if (op->fence)
+		dma_fence_put(&op->fence->fence);
+	kfree(op);
+}
+
+static struct async_op *next_async_op(struct xe_vm *vm)
+{
+	return list_first_entry_or_null(&vm->async_ops.pending,
+					struct async_op, link);
+}
+
+static void vm_set_async_error(struct xe_vm *vm, int err)
+{
+	lockdep_assert_held(&vm->lock);
+	vm->async_ops.error = err;
+}
+
+static void async_op_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
+
+	for (;;) {
+		struct async_op *op;
+		int err;
+
+		if (vm->async_ops.error && !xe_vm_is_closed(vm))
+			break;
+
+		spin_lock_irq(&vm->async_ops.lock);
+		op = next_async_op(vm);
+		if (op)
+			list_del_init(&op->link);
+		spin_unlock_irq(&vm->async_ops.lock);
+
+		if (!op)
+			break;
+
+		if (!xe_vm_is_closed(vm)) {
+			bool first, last;
+
+			down_write(&vm->lock);
+again:
+			first = op->vma->first_munmap_rebind;
+			last = op->vma->last_munmap_rebind;
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define FORCE_ASYNC_OP_ERROR	BIT(31)
+			if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) {
+				err = vm_bind_ioctl(vm, op->vma, op->engine,
+						    op->bo, &op->bind_op,
+						    op->syncs, op->num_syncs,
+						    op->fence);
+			} else {
+				err = -ENOMEM;
+				op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR;
+			}
+#else
+			err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo,
+					    &op->bind_op, op->syncs,
+					    op->num_syncs, op->fence);
+#endif
+			/*
+			 * In order for the fencing to work (stall behind
+			 * existing jobs / prevent new jobs from running) all
+			 * the dma-resv slots need to be programmed in a batch
+			 * relative to execs / the rebind worker. The vm->lock
+			 * ensure this.
+			 */
+			if (!err && ((first && VM_BIND_OP(op->bind_op.op) ==
+				      XE_VM_BIND_OP_UNMAP) ||
+				     vm->async_ops.munmap_rebind_inflight)) {
+				if (last) {
+					op->vma->last_munmap_rebind = false;
+					vm->async_ops.munmap_rebind_inflight =
+						false;
+				} else {
+					vm->async_ops.munmap_rebind_inflight =
+						true;
+
+					async_op_cleanup(vm, op);
+
+					spin_lock_irq(&vm->async_ops.lock);
+					op = next_async_op(vm);
+					XE_BUG_ON(!op);
+					list_del_init(&op->link);
+					spin_unlock_irq(&vm->async_ops.lock);
+
+					goto again;
+				}
+			}
+			if (err) {
+				trace_xe_vma_fail(op->vma);
+				drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d",
+					 VM_BIND_OP(op->bind_op.op),
+					 err);
+
+				spin_lock_irq(&vm->async_ops.lock);
+				list_add(&op->link, &vm->async_ops.pending);
+				spin_unlock_irq(&vm->async_ops.lock);
+
+				vm_set_async_error(vm, err);
+				up_write(&vm->lock);
+
+				if (vm->async_ops.error_capture.addr)
+					vm_error_capture(vm, err,
+							 op->bind_op.op,
+							 op->bind_op.addr,
+							 op->bind_op.range);
+				break;
+			}
+			up_write(&vm->lock);
+		} else {
+			trace_xe_vma_flush(op->vma);
+
+			if (is_unmap_op(op->bind_op.op)) {
+				down_write(&vm->lock);
+				xe_vma_destroy_unlocked(op->vma);
+				up_write(&vm->lock);
+			}
+
+			if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+						   &op->fence->fence.flags)) {
+				if (!xe_vm_no_dma_fences(vm)) {
+					op->fence->started = true;
+					smp_wmb();
+					wake_up_all(&op->fence->wq);
+				}
+				dma_fence_signal(&op->fence->fence);
+			}
+		}
+
+		async_op_cleanup(vm, op);
+	}
+}
+
+static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
+				 struct xe_engine *e, struct xe_bo *bo,
+				 struct drm_xe_vm_bind_op *bind_op,
+				 struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct async_op *op;
+	bool installed = false;
+	u64 seqno;
+	int i;
+
+	lockdep_assert_held(&vm->lock);
+
+	op = kmalloc(sizeof(*op), GFP_KERNEL);
+	if (!op) {
+		return -ENOMEM;
+	}
+
+	if (num_syncs) {
+		op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
+		if (!op->fence) {
+			kfree(op);
+			return -ENOMEM;
+		}
+
+		seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
+		dma_fence_init(&op->fence->fence, &async_op_fence_ops,
+			       &vm->async_ops.lock, e ? e->bind.fence_ctx :
+			       vm->async_ops.fence.context, seqno);
+
+		if (!xe_vm_no_dma_fences(vm)) {
+			op->fence->vm = vm;
+			op->fence->started = false;
+			init_waitqueue_head(&op->fence->wq);
+		}
+	} else {
+		op->fence = NULL;
+	}
+	op->vma = vma;
+	op->engine = e;
+	op->bo = bo;
+	op->bind_op = *bind_op;
+	op->syncs = syncs;
+	op->num_syncs = num_syncs;
+	INIT_LIST_HEAD(&op->link);
+
+	for (i = 0; i < num_syncs; i++)
+		installed |= xe_sync_entry_signal(&syncs[i], NULL,
+						  &op->fence->fence);
+
+	if (!installed && op->fence)
+		dma_fence_signal(&op->fence->fence);
+
+	spin_lock_irq(&vm->async_ops.lock);
+	list_add_tail(&op->link, &vm->async_ops.pending);
+	spin_unlock_irq(&vm->async_ops.lock);
+
+	if (!vm->async_ops.error)
+		queue_work(system_unbound_wq, &vm->async_ops.work);
+
+	return 0;
+}
+
+static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
+			       struct xe_engine *e, struct xe_bo *bo,
+			       struct drm_xe_vm_bind_op *bind_op,
+			       struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_vma *__vma, *next;
+	struct list_head rebind_list;
+	struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
+	u32 num_in_syncs = 0, num_out_syncs = 0;
+	bool first = true, last;
+	int err;
+	int i;
+
+	lockdep_assert_held(&vm->lock);
+
+	/* Not a linked list of unbinds + rebinds, easy */
+	if (list_empty(&vma->unbind_link))
+		return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
+					     syncs, num_syncs);
+
+	/*
+	 * Linked list of unbinds + rebinds, decompose syncs into 'in / out'
+	 * passing the 'in' to the first operation and 'out' to the last. Also
+	 * the reference counting is a little tricky, increment the VM / bind
+	 * engine ref count on all but the last operation and increment the BOs
+	 * ref count on each rebind.
+	 */
+
+	XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
+		  VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL &&
+		  VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
+
+	/* Decompose syncs */
+	if (num_syncs) {
+		in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL);
+		out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL);
+		if (!in_syncs || !out_syncs) {
+			err = -ENOMEM;
+			goto out_error;
+		}
+
+		for (i = 0; i < num_syncs; ++i) {
+			bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL;
+
+			if (signal)
+				out_syncs[num_out_syncs++] = syncs[i];
+			else
+				in_syncs[num_in_syncs++] = syncs[i];
+		}
+	}
+
+	/* Do unbinds + move rebinds to new list */
+	INIT_LIST_HEAD(&rebind_list);
+	list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) {
+		if (__vma->destroyed ||
+		    VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) {
+			list_del_init(&__vma->unbind_link);
+			xe_bo_get(bo);
+			err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma,
+						    e ? xe_engine_get(e) : NULL,
+						    bo, bind_op, first ?
+						    in_syncs : NULL,
+						    first ? num_in_syncs : 0);
+			if (err) {
+				xe_bo_put(bo);
+				xe_vm_put(vm);
+				if (e)
+					xe_engine_put(e);
+				goto out_error;
+			}
+			in_syncs = NULL;
+			first = false;
+		} else {
+			list_move_tail(&__vma->unbind_link, &rebind_list);
+		}
+	}
+	last = list_empty(&rebind_list);
+	if (!last) {
+		xe_vm_get(vm);
+		if (e)
+			xe_engine_get(e);
+	}
+	err = __vm_bind_ioctl_async(vm, vma, e,
+				    bo, bind_op,
+				    first ? in_syncs :
+				    last ? out_syncs : NULL,
+				    first ? num_in_syncs :
+				    last ? num_out_syncs : 0);
+	if (err) {
+		if (!last) {
+			xe_vm_put(vm);
+			if (e)
+				xe_engine_put(e);
+		}
+		goto out_error;
+	}
+	in_syncs = NULL;
+
+	/* Do rebinds */
+	list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) {
+		list_del_init(&__vma->unbind_link);
+		last = list_empty(&rebind_list);
+
+		if (xe_vma_is_userptr(__vma)) {
+			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
+				XE_VM_BIND_OP_MAP_USERPTR;
+		} else {
+			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
+				XE_VM_BIND_OP_MAP;
+			xe_bo_get(__vma->bo);
+		}
+
+		if (!last) {
+			xe_vm_get(vm);
+			if (e)
+				xe_engine_get(e);
+		}
+
+		err = __vm_bind_ioctl_async(vm, __vma, e,
+					    __vma->bo, bind_op, last ?
+					    out_syncs : NULL,
+					    last ? num_out_syncs : 0);
+		if (err) {
+			if (!last) {
+				xe_vm_put(vm);
+				if (e)
+					xe_engine_put(e);
+			}
+			goto out_error;
+		}
+	}
+
+	kfree(syncs);
+	return 0;
+
+out_error:
+	kfree(in_syncs);
+	kfree(out_syncs);
+	kfree(syncs);
+
+	return err;
+}
+
+static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
+				      u64 addr, u64 range, u32 op)
+{
+	struct xe_device *xe = vm->xe;
+	struct xe_vma *vma, lookup;
+	bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+
+	lockdep_assert_held(&vm->lock);
+
+	lookup.start = addr;
+	lookup.end = addr + range - 1;
+
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+		if (XE_IOCTL_ERR(xe, vma))
+			return -EBUSY;
+		break;
+	case XE_VM_BIND_OP_UNMAP:
+	case XE_VM_BIND_OP_PREFETCH:
+		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+		if (XE_IOCTL_ERR(xe, !vma) ||
+		    XE_IOCTL_ERR(xe, (vma->start != addr ||
+				 vma->end != addr + range - 1) && !async))
+			return -EINVAL;
+		break;
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
+{
+	down_read(&vm->userptr.notifier_lock);
+	vma->destroyed = true;
+	up_read(&vm->userptr.notifier_lock);
+	xe_vm_remove_vma(vm, vma);
+}
+
+static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	int err;
+
+	if (vma->bo && !vma->bo->vm) {
+		vm_insert_extobj(vm, vma);
+		err = add_preempt_fences(vm, vma->bo);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Find all overlapping VMAs in lookup range and add to a list in the returned
+ * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that
+ * need to be bound if first / last VMAs are not fully unbound. This is akin to
+ * how munmap works.
+ */
+static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
+					    struct xe_vma *lookup)
+{
+	struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
+	struct rb_node *node;
+	struct xe_vma *first = vma, *last = vma, *new_first = NULL,
+		      *new_last = NULL, *__vma, *next;
+	int err = 0;
+	bool first_munmap_rebind = false;
+
+	lockdep_assert_held(&vm->lock);
+	XE_BUG_ON(!vma);
+
+	node = &vma->vm_node;
+	while ((node = rb_next(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
+			last = __vma;
+		} else {
+			break;
+		}
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_prev(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			list_add(&__vma->unbind_link, &vma->unbind_link);
+			first = __vma;
+		} else {
+			break;
+		}
+	}
+
+	if (first->start != lookup->start) {
+		struct ww_acquire_ctx ww;
+
+		if (first->bo)
+			err = xe_bo_lock(first->bo, &ww, 0, true);
+		if (err)
+			goto unwind;
+		new_first = xe_vma_create(first->vm, first->bo,
+					  first->bo ? first->bo_offset :
+					  first->userptr.ptr,
+					  first->start,
+					  lookup->start - 1,
+					  (first->pte_flags & PTE_READ_ONLY),
+					  first->gt_mask);
+		if (first->bo)
+			xe_bo_unlock(first->bo, &ww);
+		if (!new_first) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+		if (!first->bo) {
+			err = xe_vma_userptr_pin_pages(new_first);
+			if (err)
+				goto unwind;
+		}
+		err = prep_replacement_vma(vm, new_first);
+		if (err)
+			goto unwind;
+	}
+
+	if (last->end != lookup->end) {
+		struct ww_acquire_ctx ww;
+		u64 chunk = lookup->end + 1 - last->start;
+
+		if (last->bo)
+			err = xe_bo_lock(last->bo, &ww, 0, true);
+		if (err)
+			goto unwind;
+		new_last = xe_vma_create(last->vm, last->bo,
+					 last->bo ? last->bo_offset + chunk :
+					 last->userptr.ptr + chunk,
+					 last->start + chunk,
+					 last->end,
+					 (last->pte_flags & PTE_READ_ONLY),
+					 last->gt_mask);
+		if (last->bo)
+			xe_bo_unlock(last->bo, &ww);
+		if (!new_last) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+		if (!last->bo) {
+			err = xe_vma_userptr_pin_pages(new_last);
+			if (err)
+				goto unwind;
+		}
+		err = prep_replacement_vma(vm, new_last);
+		if (err)
+			goto unwind;
+	}
+
+	prep_vma_destroy(vm, vma);
+	if (list_empty(&vma->unbind_link) && (new_first || new_last))
+		vma->first_munmap_rebind = true;
+	list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
+		if ((new_first || new_last) && !first_munmap_rebind) {
+			__vma->first_munmap_rebind = true;
+			first_munmap_rebind = true;
+		}
+		prep_vma_destroy(vm, __vma);
+	}
+	if (new_first) {
+		xe_vm_insert_vma(vm, new_first);
+		list_add_tail(&new_first->unbind_link, &vma->unbind_link);
+		if (!new_last)
+			new_first->last_munmap_rebind = true;
+	}
+	if (new_last) {
+		xe_vm_insert_vma(vm, new_last);
+		list_add_tail(&new_last->unbind_link, &vma->unbind_link);
+		new_last->last_munmap_rebind = true;
+	}
+
+	return vma;
+
+unwind:
+	list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link)
+		list_del_init(&__vma->unbind_link);
+	if (new_last) {
+		prep_vma_destroy(vm, new_last);
+		xe_vma_destroy_unlocked(new_last);
+	}
+	if (new_first) {
+		prep_vma_destroy(vm, new_first);
+		xe_vma_destroy_unlocked(new_first);
+	}
+
+	return ERR_PTR(err);
+}
+
+/*
+ * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch
+ */
+static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
+					      struct xe_vma *lookup,
+					      u32 region)
+{
+	struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma,
+		      *next;
+	struct rb_node *node;
+
+	if (!xe_vma_is_userptr(vma)) {
+		if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
+			return ERR_PTR(-EINVAL);
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_next(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			if (!xe_vma_is_userptr(__vma)) {
+				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+					goto flush_list;
+			}
+			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
+		} else {
+			break;
+		}
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_prev(node))) {
+		if (!xe_vma_cmp_vma_cb(lookup, node)) {
+			__vma = to_xe_vma(node);
+			if (!xe_vma_is_userptr(__vma)) {
+				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+					goto flush_list;
+			}
+			list_add(&__vma->unbind_link, &vma->unbind_link);
+		} else {
+			break;
+		}
+	}
+
+	return vma;
+
+flush_list:
+	list_for_each_entry_safe(__vma, next, &vma->unbind_link,
+				 unbind_link)
+		list_del_init(&__vma->unbind_link);
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
+						struct xe_bo *bo)
+{
+	struct xe_vma *first = NULL, *vma;
+
+	lockdep_assert_held(&vm->lock);
+	xe_bo_assert_held(bo);
+
+	list_for_each_entry(vma, &bo->vmas, bo_link) {
+		if (vma->vm != vm)
+			continue;
+
+		prep_vma_destroy(vm, vma);
+		if (!first)
+			first = vma;
+		else
+			list_add_tail(&vma->unbind_link, &first->unbind_link);
+	}
+
+	return first;
+}
+
+static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
+					       struct xe_bo *bo,
+					       u64 bo_offset_or_userptr,
+					       u64 addr, u64 range, u32 op,
+					       u64 gt_mask, u32 region)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_vma *vma, lookup;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+
+	lookup.start = addr;
+	lookup.end = addr + range - 1;
+
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+		XE_BUG_ON(!bo);
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
+		vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
+				    addr + range - 1,
+				    op & XE_VM_BIND_FLAG_READONLY,
+				    gt_mask);
+		xe_bo_unlock(bo, &ww);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		xe_vm_insert_vma(vm, vma);
+		if (!bo->vm) {
+			vm_insert_extobj(vm, vma);
+			err = add_preempt_fences(vm, bo);
+			if (err) {
+				prep_vma_destroy(vm, vma);
+				xe_vma_destroy_unlocked(vma);
+
+				return ERR_PTR(err);
+			}
+		}
+		break;
+	case XE_VM_BIND_OP_UNMAP:
+		vma = vm_unbind_lookup_vmas(vm, &lookup);
+		break;
+	case XE_VM_BIND_OP_PREFETCH:
+		vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
+		break;
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		XE_BUG_ON(!bo);
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
+		vma = vm_unbind_all_lookup_vmas(vm, bo);
+		if (!vma)
+			vma = ERR_PTR(-EINVAL);
+		xe_bo_unlock(bo, &ww);
+		break;
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		XE_BUG_ON(bo);
+
+		vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
+				    addr + range - 1,
+				    op & XE_VM_BIND_FLAG_READONLY,
+				    gt_mask);
+		if (!vma)
+			return ERR_PTR(-ENOMEM);
+
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err) {
+			xe_vma_destroy(vma, NULL);
+
+			return ERR_PTR(err);
+		} else {
+			xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		vma = ERR_PTR(-EINVAL);
+	}
+
+	return vma;
+}
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define SUPPORTED_FLAGS	\
+	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
+	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+#else
+#define SUPPORTED_FLAGS	\
+	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
+	 XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+#endif
+#define XE_64K_PAGE_MASK 0xffffull
+
+#define MAX_BINDS	512	/* FIXME: Picking random upper limit */
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+				    struct drm_xe_vm_bind *args,
+				    struct drm_xe_vm_bind_op **bind_ops,
+				    bool *async)
+{
+	int err;
+	int i;
+
+	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, !args->num_binds) ||
+	    XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
+		return -EINVAL;
+
+	if (args->num_binds > 1) {
+		u64 __user *bind_user =
+			u64_to_user_ptr(args->vector_of_binds);
+
+		*bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
+				    args->num_binds, GFP_KERNEL);
+		if (!*bind_ops)
+			return -ENOMEM;
+
+		err = __copy_from_user(*bind_ops, bind_user,
+				       sizeof(struct drm_xe_vm_bind_op) *
+				       args->num_binds);
+		if (XE_IOCTL_ERR(xe, err)) {
+			err = -EFAULT;
+			goto free_bind_ops;
+		}
+	} else {
+		*bind_ops = &args->bind;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = (*bind_ops)[i].range;
+		u64 addr = (*bind_ops)[i].addr;
+		u32 op = (*bind_ops)[i].op;
+		u32 obj = (*bind_ops)[i].obj;
+		u64 obj_offset = (*bind_ops)[i].obj_offset;
+		u32 region = (*bind_ops)[i].region;
+
+		if (i == 0) {
+			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+		} else if (XE_IOCTL_ERR(xe, !*async) ||
+			   XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
+			   XE_IOCTL_ERR(xe, VM_BIND_OP(op) ==
+					XE_VM_BIND_OP_RESTART)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, !*async &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, !*async &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
+				 XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_ERR(xe, !obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
+		    XE_IOCTL_ERR(xe, !obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_ERR(xe, addr &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_ERR(xe, range &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_ERR(xe, obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_ERR(xe, obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_ERR(xe, region &&
+				 VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_ERR(xe, !(BIT(region) &
+				       xe->info.mem_region_mask)) ||
+		    XE_IOCTL_ERR(xe, obj &&
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_ERR(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) !=
+				 XE_VM_BIND_OP_RESTART &&
+				 VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+	}
+
+	return 0;
+
+free_bind_ops:
+	if (args->num_binds > 1)
+		kfree(*bind_ops);
+	return err;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_bind *args = data;
+	struct drm_xe_sync __user *syncs_user;
+	struct xe_bo **bos = NULL;
+	struct xe_vma **vmas = NULL;
+	struct xe_vm *vm;
+	struct xe_engine *e = NULL;
+	u32 num_syncs;
+	struct xe_sync_entry *syncs = NULL;
+	struct drm_xe_vm_bind_op *bind_ops;
+	bool async;
+	int err;
+	int i, j = 0;
+
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
+	if (err)
+		return err;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm)) {
+		err = -EINVAL;
+		goto free_objs;
+	}
+
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+		DRM_ERROR("VM closed while we began looking up?\n");
+		err = -ENOENT;
+		goto put_vm;
+	}
+
+	if (args->engine_id) {
+		e = xe_engine_lookup(xef, args->engine_id);
+		if (XE_IOCTL_ERR(xe, !e)) {
+			err = -ENOENT;
+			goto put_vm;
+		}
+		if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
+			err = -EINVAL;
+			goto put_engine;
+		}
+	}
+
+	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
+		if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+			err = -ENOTSUPP;
+		if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
+			err = EINVAL;
+		if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
+			err = -EPROTO;
+
+		if (!err) {
+			down_write(&vm->lock);
+			trace_xe_vm_restart(vm);
+			vm_set_async_error(vm, 0);
+			up_write(&vm->lock);
+
+			queue_work(system_unbound_wq, &vm->async_ops.work);
+
+			/* Rebinds may have been blocked, give worker a kick */
+			if (xe_vm_in_compute_mode(vm))
+				queue_work(vm->xe->ordered_wq,
+					   &vm->preempt.rebind_work);
+		}
+
+		goto put_engine;
+	}
+
+	if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
+			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
+		err = -ENOTSUPP;
+		goto put_engine;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+
+		if (XE_IOCTL_ERR(xe, range > vm->size) ||
+		    XE_IOCTL_ERR(xe, addr > vm->size - range)) {
+			err = -EINVAL;
+			goto put_engine;
+		}
+
+		if (bind_ops[i].gt_mask) {
+			u64 valid_gts = BIT(xe->info.tile_count) - 1;
+
+			if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask &
+					 ~valid_gts)) {
+				err = -EINVAL;
+				goto put_engine;
+			}
+		}
+	}
+
+	bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
+	if (!bos) {
+		err = -ENOMEM;
+		goto put_engine;
+	}
+
+	vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
+	if (!vmas) {
+		err = -ENOMEM;
+		goto put_engine;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		struct drm_gem_object *gem_obj;
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 obj = bind_ops[i].obj;
+		u64 obj_offset = bind_ops[i].obj_offset;
+
+		if (!obj)
+			continue;
+
+		gem_obj = drm_gem_object_lookup(file, obj);
+		if (XE_IOCTL_ERR(xe, !gem_obj)) {
+			err = -ENOENT;
+			goto put_obj;
+		}
+		bos[i] = gem_to_xe_bo(gem_obj);
+
+		if (XE_IOCTL_ERR(xe, range > bos[i]->size) ||
+		    XE_IOCTL_ERR(xe, obj_offset >
+				 bos[i]->size - range)) {
+			err = -EINVAL;
+			goto put_obj;
+		}
+
+		if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+			if (XE_IOCTL_ERR(xe, obj_offset &
+					 XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		}
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto put_obj;
+		}
+	}
+
+	syncs_user = u64_to_user_ptr(args->syncs);
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs], false,
+					  xe_vm_no_dma_fences(vm));
+		if (err)
+			goto free_syncs;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto free_syncs;
+
+	/* Do some error checking first to make the unwind easier */
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+
+		err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
+		if (err)
+			goto release_vm_lock;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u64 gt_mask = bind_ops[i].gt_mask;
+		u32 region = bind_ops[i].region;
+
+		vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
+						   addr, range, op, gt_mask,
+						   region);
+		if (IS_ERR(vmas[i])) {
+			err = PTR_ERR(vmas[i]);
+			vmas[i] = NULL;
+			goto destroy_vmas;
+		}
+	}
+
+	for (j = 0; j < args->num_binds; ++j) {
+		struct xe_sync_entry *__syncs;
+		u32 __num_syncs = 0;
+		bool first_or_last = j == 0 || j == args->num_binds - 1;
+
+		if (args->num_binds == 1) {
+			__num_syncs = num_syncs;
+			__syncs = syncs;
+		} else if (first_or_last && num_syncs) {
+			bool first = j == 0;
+
+			__syncs = kmalloc(sizeof(*__syncs) * num_syncs,
+					  GFP_KERNEL);
+			if (!__syncs) {
+				err = ENOMEM;
+				break;
+			}
+
+			/* in-syncs on first bind, out-syncs on last bind */
+			for (i = 0; i < num_syncs; ++i) {
+				bool signal = syncs[i].flags &
+					DRM_XE_SYNC_SIGNAL;
+
+				if ((first && !signal) || (!first && signal))
+					__syncs[__num_syncs++] = syncs[i];
+			}
+		} else {
+			__num_syncs = 0;
+			__syncs = NULL;
+		}
+
+		if (async) {
+			bool last = j == args->num_binds - 1;
+
+			/*
+			 * Each pass of async worker drops the ref, take a ref
+			 * here, 1 set of refs taken above
+			 */
+			if (!last) {
+				if (e)
+					xe_engine_get(e);
+				xe_vm_get(vm);
+			}
+
+			err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j],
+						  bind_ops + j, __syncs,
+						  __num_syncs);
+			if (err && !last) {
+				if (e)
+					xe_engine_put(e);
+				xe_vm_put(vm);
+			}
+			if (err)
+				break;
+		} else {
+			XE_BUG_ON(j != 0);	/* Not supported */
+			err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
+					    bind_ops + j, __syncs,
+					    __num_syncs, NULL);
+			break;	/* Needed so cleanup loops work */
+		}
+	}
+
+	/* Most of cleanup owned by the async bind worker */
+	if (async && !err) {
+		up_write(&vm->lock);
+		if (args->num_binds > 1)
+			kfree(syncs);
+		goto free_objs;
+	}
+
+destroy_vmas:
+	for (i = j; err && i < args->num_binds; ++i) {
+		u32 op = bind_ops[i].op;
+		struct xe_vma *vma, *next;
+
+		if (!vmas[i])
+			break;
+
+		list_for_each_entry_safe(vma, next, &vma->unbind_link,
+					 unbind_link) {
+			list_del_init(&vma->unbind_link);
+			if (!vma->destroyed) {
+				prep_vma_destroy(vm, vma);
+				xe_vma_destroy_unlocked(vma);
+			}
+		}
+
+		switch (VM_BIND_OP(op)) {
+		case XE_VM_BIND_OP_MAP:
+			prep_vma_destroy(vm, vmas[i]);
+			xe_vma_destroy_unlocked(vmas[i]);
+			break;
+		case XE_VM_BIND_OP_MAP_USERPTR:
+			prep_vma_destroy(vm, vmas[i]);
+			xe_vma_destroy_unlocked(vmas[i]);
+			break;
+		}
+	}
+release_vm_lock:
+	up_write(&vm->lock);
+free_syncs:
+	while (num_syncs--) {
+		if (async && j &&
+		    !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
+			continue;	/* Still in async worker */
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
+	}
+
+	kfree(syncs);
+put_obj:
+	for (i = j; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+put_engine:
+	if (e)
+		xe_engine_put(e);
+put_vm:
+	xe_vm_put(vm);
+free_objs:
+	kfree(bos);
+	kfree(vmas);
+	if (args->num_binds > 1)
+		kfree(bind_ops);
+	return err;
+}
+
+/*
+ * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
+ * directly to optimize. Also this likely should be an inline function.
+ */
+int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr)
+{
+	struct ttm_validate_buffer tv_vm;
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+
+	XE_BUG_ON(!ww);
+
+	tv_vm.num_shared = num_resv;
+	tv_vm.bo = xe_vm_ttm_bo(vm);;
+	list_add_tail(&tv_vm.head, &objs);
+
+	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+}
+
+void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
+{
+	dma_resv_unlock(&vm->resv);
+	ww_acquire_fini(ww);
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+	struct xe_device *xe = vma->vm->xe;
+	struct xe_gt *gt;
+	u32 gt_needs_invalidate = 0;
+	int seqno[XE_MAX_GT];
+	u8 id;
+	int ret;
+
+	XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
+	trace_xe_vma_usm_invalidate(vma);
+
+	/* Check that we don't race with page-table updates */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (xe_vma_is_userptr(vma)) {
+			WARN_ON_ONCE(!mmu_interval_check_retry
+				     (&vma->userptr.notifier,
+				      vma->userptr.notifier_seq));
+			WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv,
+							     DMA_RESV_USAGE_BOOKKEEP));
+
+		} else {
+			xe_bo_assert_held(vma->bo);
+		}
+	}
+
+	for_each_gt(gt, xe, id) {
+		if (xe_pt_zap_ptes(gt, vma)) {
+			gt_needs_invalidate |= BIT(id);
+			xe_device_wmb(xe);
+			seqno[id] = xe_gt_tlb_invalidation(gt);
+			if (seqno[id] < 0)
+				return seqno[id];
+		}
+	}
+
+	for_each_gt(gt, xe, id) {
+		if (gt_needs_invalidate & BIT(id)) {
+			ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	vma->usm.gt_invalidated = vma->gt_mask;
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	struct rb_node *node;
+	bool is_lmem;
+	uint64_t addr;
+
+	if (!down_read_trylock(&vm->lock)) {
+		drm_printf(p, " Failed to acquire VM lock to dump capture");
+		return 0;
+	}
+	if (vm->pt_root[gt_id]) {
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS");
+	}
+
+	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
+		struct xe_vma *vma = to_xe_vma(node);
+		bool is_userptr = xe_vma_is_userptr(vma);
+
+		if (is_userptr) {
+			struct xe_res_cursor cur;
+
+			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
+			addr = xe_res_dma(&cur);
+		} else {
+			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+		}
+		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
+			   vma->start, vma->end, vma->end - vma->start + 1ull,
+			   addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS");
+	}
+	up_read(&vm->lock);
+
+	return 0;
+}
+#else
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
new file mode 100644
index 0000000000000..3468ed9d05288
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_H_
+#define _XE_VM_H_
+
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_printer;
+struct drm_file;
+
+struct ttm_buffer_object;
+struct ttm_validate_buffer;
+
+struct xe_engine;
+struct xe_file;
+struct xe_sync_entry;
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
+void xe_vm_free(struct kref *ref);
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
+
+static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
+{
+	kref_get(&vm->refcount);
+	return vm;
+}
+
+static inline void xe_vm_put(struct xe_vm *vm)
+{
+	kref_put(&vm->refcount, xe_vm_free);
+}
+
+int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+	       int num_resv, bool intr);
+
+void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww);
+
+static inline bool xe_vm_is_closed(struct xe_vm *vm)
+{
+	/* Only guaranteed not to change when vm->resv is held */
+	return !vm->size;
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma);
+
+#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt);
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file);
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file);
+
+void xe_vm_close_and_put(struct xe_vm *vm);
+
+static inline bool xe_vm_in_compute_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_COMPUTE_MODE;
+}
+
+static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_FAULT_MODE;
+}
+
+static inline bool xe_vm_no_dma_fences(struct xe_vm *vm)
+{
+	return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm);
+}
+
+int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+
+int xe_vm_invalidate_vma(struct xe_vma *vma);
+
+int xe_vm_async_fence_wait_start(struct dma_fence *fence);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
+
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+	return !vma->bo;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma);
+
+int xe_vma_userptr_check_repin(struct xe_vma *vma);
+
+/*
+ * XE_ONSTACK_TV is used to size the tv_onstack array that is input
+ * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
+ */
+#define XE_ONSTACK_TV 20
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+			struct ttm_validate_buffer *tv_onstack,
+			struct ttm_validate_buffer **tv,
+			struct list_head *objs,
+			bool intr,
+			unsigned int num_shared);
+
+void xe_vm_unlock_dma_resv(struct xe_vm *vm,
+			   struct ttm_validate_buffer *tv_onstack,
+			   struct ttm_validate_buffer *tv,
+			   struct ww_acquire_ctx *ww,
+			   struct list_head *objs);
+
+void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
+			     enum dma_resv_usage usage);
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define vm_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void vm_dbg(const struct drm_device *dev,
+			  const char *format, ...)
+{ /* noop */ }
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
new file mode 100644
index 0000000000000..5b6216964c45d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_DOC_H_
+#define _XE_VM_DOC_H_
+
+/**
+ * DOC: XE VM (user address space)
+ *
+ * VM creation
+ * ===========
+ *
+ * Allocate a physical page for root of the page table structure, create default
+ * bind engine, and return a handle to the user.
+ *
+ * Scratch page
+ * ------------
+ *
+ * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the
+ * entire page table structure defaults pointing to blank page allocated by the
+ * VM. Invalid memory access rather than fault just read / write to this page.
+ *
+ * VM bind (create GPU mapping for a BO or userptr)
+ * ================================================
+ *
+ * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * in / out fence interface (struct drm_xe_sync) as execs which allows users to
+ * think of binds and execs as more or less the same operation.
+ *
+ * Operations
+ * ----------
+ *
+ * XE_VM_BIND_OP_MAP		- Create mapping for a BO
+ * XE_VM_BIND_OP_UNMAP		- Destroy mapping for a BO / userptr
+ * XE_VM_BIND_OP_MAP_USERPTR	- Create mapping for userptr
+ *
+ * Implementation details
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * All bind operations are implemented via a hybrid approach of using the CPU
+ * and GPU to modify page tables. If a new physical page is allocated in the
+ * page table structure we populate that page via the CPU and insert that new
+ * page into the existing page table structure via a GPU job. Also any existing
+ * pages in the page table structure that need to be modified also are updated
+ * via the GPU job. As the root physical page is prealloced on VM creation our
+ * GPU job will always have at least 1 update. The in / out fences are passed to
+ * this job so again this is conceptually the same as an exec.
+ *
+ * Very simple example of few binds on an empty VM with 48 bits of address space
+ * and the resulting operations:
+ *
+ * .. code-block::
+ *
+ *	bind BO0 0x0-0x1000
+ *	alloc page level 3a, program PTE[0] to BO0 phys address (CPU)
+ *	alloc page level 2, program PDE[0] page level 3a phys address (CPU)
+ *	alloc page level 1, program PDE[0] page level 2 phys address (CPU)
+ *	update root PDE[0] to page level 1 phys address (GPU)
+ *
+ *	bind BO1 0x201000-0x202000
+ *	alloc page level 3b, program PTE[1] to BO1 phys address (CPU)
+ *	update page level 2 PDE[1] to page level 3b phys address (GPU)
+ *
+ *	bind BO2 0x1ff000-0x201000
+ *	update page level 3a PTE[511] to BO2 phys addres (GPU)
+ *	update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ *
+ * GPU bypass
+ * ~~~~~~~~~~
+ *
+ * In the above example the steps using the GPU can be converted to CPU if the
+ * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel
+ * slot is idle).
+ *
+ * Address space
+ * -------------
+ *
+ * Depending on platform either 48 or 57 bits of address space is supported.
+ *
+ * Page sizes
+ * ----------
+ *
+ * The minimum page size is either 4k or 64k depending on platform and memory
+ * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the
+ * minimum page size.
+ *
+ * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address
+ * is aligned to the larger pages size, and VA is aligned to the larger page
+ * size. Larger pages for userptrs / BOs in sysmem should be possible but is not
+ * yet implemented.
+ *
+ * Sync error handling mode
+ * ------------------------
+ *
+ * In both modes during the bind IOCTL the user input is validated. In sync
+ * error handling mode the newly bound BO is validated (potentially moved back
+ * to a region of memory where is can be used), page tables are updated by the
+ * CPU and the job to do the GPU binds is created in the IOCTL itself. This step
+ * can fail due to memory pressure. The user can recover by freeing memory and
+ * trying this operation again.
+ *
+ * Async error handling mode
+ * -------------------------
+ *
+ * In async error handling the step of validating the BO, updating page tables,
+ * and generating a job are deferred to an async worker. As this step can now
+ * fail after the IOCTL has reported success we need an error handling flow for
+ * which the user can recover from.
+ *
+ * The solution is for a user to register a user address with the VM which the
+ * VM uses to report errors to. The ufence wait interface can be used to wait on
+ * a VM going into an error state. Once an error is reported the VM's async
+ * worker is paused. While the VM's async worker is paused sync,
+ * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
+ * uses believe the error state is fixed, the async worker can be resumed via
+ * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the
+ * first operation processed is the operation that caused the original error.
+ *
+ * Bind queues / engines
+ * ---------------------
+ *
+ * Think of the case where we have two bind operations A + B and are submitted
+ * in that order. A has in fences while B has none. If using a single bind
+ * queue, B is now blocked on A's in fences even though it is ready to run. This
+ * example is a real use case for VK sparse binding. We work around this
+ * limitation by implementing bind engines.
+ *
+ * In the bind IOCTL the user can optionally pass in an engine ID which must map
+ * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND.
+ * Underneath this is a really virtual engine that can run on any of the copy
+ * hardware engines. The job(s) created each IOCTL are inserted into this
+ * engine's ring. In the example above if A and B have different bind engines B
+ * is free to pass A. If the engine ID field is omitted, the default bind queue
+ * for the VM is used.
+ *
+ * TODO: Explain race in issue 41 and how we solve it
+ *
+ * Array of bind operations
+ * ------------------------
+ *
+ * The uAPI allows multiple binds operations to be passed in via a user array,
+ * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface
+ * matches the VK sparse binding API. The implementation is rather simple, parse
+ * the array into a list of operations, pass the in fences to the first operation,
+ * and pass the out fences to the last operation. The ordered nature of a bind
+ * engine makes this possible.
+ *
+ * Munmap semantics for unbinds
+ * ----------------------------
+ *
+ * Munmap allows things like:
+ *
+ * .. code-block::
+ *
+ *	0x0000-0x2000 and 0x3000-0x5000 have mappings
+ *	Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000
+ *
+ * To support this semantic in the above example we decompose the above example
+ * into 4 operations:
+ *
+ * .. code-block::
+ *
+ *	unbind 0x0000-0x2000
+ *	unbind 0x3000-0x5000
+ *	rebind 0x0000-0x1000
+ *	rebind 0x4000-0x5000
+ *
+ * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This
+ * falls apart when using large pages at the edges and the unbind forces us to
+ * use a smaller page size. For simplity we always issue a set of unbinds
+ * unmapping anything in the range and at most 2 rebinds on the edges.
+ *
+ * Similar to an array of binds, in fences are passed to the first operation and
+ * out fences are signaled on the last operation.
+ *
+ * In this example there is a window of time where 0x0000-0x1000 and
+ * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be
+ * removed from the mapping. To work around this we treat any munmap style
+ * unbinds which require a rebind as a kernel operations (BO eviction or userptr
+ * invalidation). The first operation waits on the VM's
+ * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to
+ * complete / triggers preempt fences) and the last operation is installed in
+ * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode
+ * VM). The caveat is all dma-resv slots must be updated atomically with respect
+ * to execs and compute mode rebind worker. To accomplish this, hold the
+ * vm->lock in write mode from the first operation until the last.
+ *
+ * Deferred binds in fault mode
+ * ----------------------------
+ *
+ * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default are deferred to the page fault handler (first
+ * use). This behavior can be overriden by setting the flag
+ * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
+ * immediately.
+ *
+ * User pointer
+ * ============
+ *
+ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
+ * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
+ * was created and then a binding was created. We bypass creating a dummy BO in
+ * XE and simply create a binding directly from the userptr.
+ *
+ * Invalidation
+ * ------------
+ *
+ * Since this a core kernel managed memory the kernel can move this memory
+ * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * a user poiter is about to move. The invalidation notifier needs to block
+ * until all pending users (jobs or compute mode engines) of the userptr are
+ * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
+ *
+ * Rebinds
+ * -------
+ *
+ * Either the next exec (non-compute) or rebind worker (compute mode) will
+ * rebind the userptr. The invalidation MMU notifier kicks the rebind worker
+ * after the VM dma-resv wait if the VM is in compute mode.
+ *
+ * Compute mode
+ * ============
+ *
+ * A VM in compute mode enables long running workloads and ultra low latency
+ * submission (ULLS). ULLS is implemented via a continuously running batch +
+ * semaphores. This enables to the user to insert jump to new batch commands
+ * into the continuously running batch. In both cases these batches exceed the
+ * time a dma fence is allowed to exist for before signaling, as such dma fences
+ * are not used when a VM is in compute mode. User fences (TODO: link user fence
+ * doc) are used instead to signal operation's completion.
+ *
+ * Preempt fences
+ * --------------
+ *
+ * If the kernel decides to move memory around (either userptr invalidate, BO
+ * eviction, or mumap style unbind which results in a rebind) and a batch is
+ * running on an engine, that batch can fault or cause a memory corruption as
+ * page tables for the moved memory are no longer valid. To work around this we
+ * introduce the concept of preempt fences. When sw signaling is enabled on a
+ * preempt fence it tells the submission backend to kick that engine off the
+ * hardware and the preempt fence signals when the engine is off the hardware.
+ * Once all preempt fences are signaled for a VM the kernel can safely move the
+ * memory and kick the rebind worker which resumes all the engines execution.
+ *
+ * A preempt fence, for every engine using the VM, is installed the VM's
+ * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
+ * engine using the VM, is also installed into the same dma-resv slot of every
+ * external BO mapped in the VM.
+ *
+ * Rebind worker
+ * -------------
+ *
+ * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * evicted BOs or userptrs, waiting on those operations, installing new preempt
+ * fences, and finally resuming executing of engines in the VM.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	<----------------------------------------------------------------------|
+ *	Check if VM is closed, if so bail out                                  |
+ *	Lock VM global lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last rebind worker) |
+ *	Lock VM dma-resv and external BOs dma-resv                             |
+ *	Validate BOs that have been evicted                                    |
+ *	Wait on and allocate new preempt fences for every engine using the VM  |
+ *	Rebind invalidated userptrs + evicted BOs                              |
+ *	Wait on last rebind fence                                              |
+ *	Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot                          |
+ *	Install preeempt fences and issue resume for every engine using the VM |
+ *	Check if any userptrs invalidated since pin                            |
+ *		Squash resume for all engines                                  |
+ *		Unlock all                                                     |
+ *		Wait all VM's dma-resv slots                                   |
+ *		Retry ----------------------------------------------------------
+ *	Release all engines waiting to resume
+ *	Unlock all
+ *
+ * Timeslicing
+ * -----------
+ *
+ * In order to prevent an engine from continuously being kicked off the hardware
+ * and making no forward progress an engine has a period of time it allowed to
+ * run after resume before it can be kicked off again. This effectively gives
+ * each engine a timeslice.
+ *
+ * Handling multiple GTs
+ * =====================
+ *
+ * If a GT has slower access to some regions and the page table structure are in
+ * the slow region, the performance on that GT could adversely be affected. To
+ * work around this we allow a VM page tables to be shadowed in multiple GTs.
+ * When VM is created, a default bind engine and PT table structure are created
+ * on each GT.
+ *
+ * Binds can optionally pass in a mask of GTs where a mapping should be created,
+ * if this mask is zero then default to all the GTs where the VM has page
+ * tables.
+ *
+ * The implementation for this breaks down into a bunch for_each_gt loops in
+ * various places plus exporting a composite fence for multi-GT binds to the
+ * user.
+ *
+ * Fault mode (unified shared memory)
+ * ==================================
+ *
+ * A VM in fault mode can be enabled on devices that support page faults. If
+ * page faults are enabled, using dma fences can potentially induce a deadlock:
+ * A pending page fault can hold up the GPU work which holds up the dma fence
+ * signaling, and memory allocation is usually required to resolve a page
+ * fault, but memory allocation is not allowed to gate dma fence signaling. As
+ * such, dma fences are not allowed when VM is in fault mode. Because dma-fences
+ * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * VM.
+ *
+ * Defered VM binds
+ * ----------------
+ *
+ * By default, on a faulting VM binds just allocate the VMA and the actual
+ * updating of the page tables is defered to the page fault handler. This
+ * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in
+ * the VM bind which will then do the bind immediately.
+ *
+ * Page fault handler
+ * ------------------
+ *
+ * Page faults are received in the G2H worker under the CT lock which is in the
+ * path of dma fences (no memory allocations are allowed, faults require memory
+ * allocations) thus we cannot process faults under the CT lock. Another issue
+ * is faults issue TLB invalidations which require G2H credits and we cannot
+ * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do
+ * not want the CT lock to be an outer lock of the VM global lock (VM global
+ * lock required to fault processing).
+ *
+ * To work around the above issue with processing faults in the G2H worker, we
+ * sink faults to a buffer which is large enough to sink all possible faults on
+ * the GT (1 per hardware engine) and kick a worker to process the faults. Since
+ * the page faults G2H are already received in a worker, kicking another worker
+ * adds more latency to a critical performance path. We add a fast path in the
+ * G2H irq handler which looks at first G2H and if it is a page fault we sink
+ * the fault to the buffer and kick the worker to process the fault. TLB
+ * invalidation responses are also in the critical path so these can also be
+ * processed in this fast path.
+ *
+ * Multiple buffers and workers are used and hashed over based on the ASID so
+ * faults from different VMs can be processed in parallel.
+ *
+ * The page fault handler itself is rather simple, flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in page fault G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in page fault G2H
+ *	Check if VMA is valid, if not bail
+ *	Check if VMA's BO has backing store, if not allocate
+ *	<----------------------------------------------------------------------|
+ *	If userptr, pin pages                                                  |
+ *	Lock VM & BO dma-resv locks                                            |
+ *	If atomic fault, migrate to VRAM, else validate BO location            |
+ *	Issue rebind                                                           |
+ *	Wait on rebind to complete                                             |
+ *	Check if userptr invalidated since pin                                 |
+ *		Drop VM & BO dma-resv locks                                    |
+ *		Retry ----------------------------------------------------------
+ *	Unlock all
+ *	Issue blocking TLB invalidation                                        |
+ *	Send page fault response to GuC
+ *
+ * Access counters
+ * ---------------
+ *
+ * Access counters can be configured to trigger a G2H indicating the device is
+ * accessing VMAs in system memory frequently as hint to migrate those VMAs to
+ * VRAM.
+ *
+ * Same as the page fault handler, access counters G2H cannot be processed the
+ * G2H worker under the CT lock. Again we use a buffer to sink access counter
+ * G2H. Unlike page faults there is no upper bound so if the buffer is full we
+ * simply drop the G2H. Access counters are a best case optimization and it is
+ * safe to drop these unlike page faults.
+ *
+ * The access counter handler itself is rather simple flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in access counter G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in access counter G2H
+ *	If userptr, bail nothing to do
+ *	Lock VM & BO dma-resv locks
+ *	Issue migration to VRAM
+ *	Unlock all
+ *
+ * Notice no rebind is issued in the access counter handler as the rebind will
+ * be issued on next page fault.
+ *
+ * Cavets with eviction / user pointer invalidation
+ * ------------------------------------------------
+ *
+ * In the case of eviction and user pointer invalidation on a faulting VM, there
+ * is no need to issue a rebind rather we just need to blow away the page tables
+ * for the VMAs and the page fault handler will rebind the VMAs when they fault.
+ * The cavet is to update / read the page table structure the VM global lock is
+ * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * held which make acquiring the VM global lock impossible. To work around this
+ * every VMA maintains a list of leaf page table entries which should be written
+ * to zero to blow away the VMA's page tables. After writing zero to these
+ * entries a blocking TLB invalidate is issued. At this point it is safe for the
+ * kernel to move the VMA's memory around. This is a necessary lockless
+ * algorithm and is safe as leafs cannot be changed while either an eviction or
+ * userptr invalidation is occurring.
+ *
+ * Locking
+ * =======
+ *
+ * VM locking protects all of the core data paths (bind operations, execs,
+ * evictions, and compute mode rebind worker) in XE.
+ *
+ * Locks
+ * -----
+ *
+ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
+ * the list of userptrs mapped in the VM, the list of engines using this VM, and
+ * the array of external BOs mapped in the VM. When adding or removing any of the
+ * aforemented state from the VM should acquire this lock in write mode. The VM
+ * bind path also acquires this lock in write while while the exec / compute
+ * mode rebind worker acquire this lock in read mode.
+ *
+ * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
+ * slots which is shared with any private BO in the VM. Expected to be acquired
+ * during VM binds, execs, and compute mode rebind worker. This lock is also
+ * held when private BOs are being evicted.
+ *
+ * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects
+ * external BO dma-resv slots. Expected to be acquired during VM binds (in
+ * addition to the VM dma-resv lock). All external BO dma-locks within a VM are
+ * expected to be acquired (in addition to the VM dma-resv lock) during execs
+ * and the compute mode rebind worker. This lock is also held when an external
+ * BO is being evicted.
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. An exec and bind operation with the same VM can't be executing at the same
+ * time (vm->lock).
+ *
+ * 2. A compute mode rebind worker and bind operation with the same VM can't be
+ * executing at the same time (vm->lock).
+ *
+ * 3. We can't add / remove userptrs or external BOs to a VM while an exec with
+ * the same VM is executing (vm->lock).
+ *
+ * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a
+ * compute mode rebind worker with the same VM is executing (vm->lock).
+ *
+ * 5. Evictions within a VM can't be happen while an exec with the same VM is
+ * executing (dma-resv locks).
+ *
+ * 6. Evictions within a VM can't be happen while a compute mode rebind worker
+ * with the same VM is executing (dma-resv locks).
+ *
+ * dma-resv usage
+ * ==============
+ *
+ * As previously stated to enforce the ordering of kernel ops (eviction, userptr
+ * invalidation, munmap style unbinds which result in a rebind), rebinds during
+ * execs, execs, and resumes in the rebind worker we use both the VMs and
+ * external BOs dma-resv slots. Let try to make this as clear as possible.
+ *
+ * Slot installation
+ * -----------------
+ *
+ * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL
+ * slot of either an external BO or VM (depends on if kernel op is operating on
+ * an external or private BO)
+ *
+ * 2. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_BOOKKEEP slot of the VM
+ *
+ * 3. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM
+ *
+ * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the VM
+ *
+ * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the external BO (if the bind is to an external BO, this is addition to #4)
+ *
+ * 6. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM
+ *
+ * 7. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM
+ *
+ * Slot waiting
+ * ------------
+ *
+ * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
+ * kernel op is operating on external or private BO)
+ *
+ * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
+ * (depends on if the rebind is operatiing on an external or private BO)
+ *
+ * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * last rebind job
+ *
+ * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
+ * or VM (depends on if rebind is operating on external or private BO)
+ *
+ * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence
+ *
+ * 6. In compute mode, resumes in rebind worker shall wait on the
+ * DMA_RESV_USAGE_KERNEL slot of the VM
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. New jobs from kernel ops are blocked behind any existing jobs from
+ * non-compute mode execs
+ *
+ * 2. New jobs from non-compute mode execs are blocked behind any existing jobs
+ * from kernel ops and rebinds
+ *
+ * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in
+ * compute mode
+ *
+ * 4. Compute mode engine resumes are blocked behind any existing jobs from
+ * kernel ops and rebinds
+ *
+ * Future work
+ * ===========
+ *
+ * Support large pages for sysmem and userptr.
+ *
+ * Update page faults to handle BOs are page level grainularity (e.g. part of BO
+ * could be in system memory while another part could be in VRAM).
+ *
+ * Page fault handler likely we be optimized a bit more (e.g. Rebinds always
+ * wait on the dma-resv kernel slots of VM or BO, technically we only have to
+ * wait the BO moving. If using a job to do the rebind, we could not block in
+ * the page fault handler rather attach a callback to fence of the rebind job to
+ * signal page fault complete. Our handling of short circuting for atomic faults
+ * for bound VMAs could be better. etc...). We can tune all of this once we have
+ * benchmarks / performance number from workloads up and running.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
new file mode 100644
index 0000000000000..4498aa2fbd471
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+#include <drm/ttm/ttm_tt.h>
+#include <linux/nospec.h>
+
+#include "xe_bo.h"
+#include "xe_vm.h"
+#include "xe_vm_madvise.h"
+
+static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
+				       struct xe_vma **vmas, int num_vmas,
+				       u64 value)
+{
+	int i, err;
+
+	if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM &&
+			 !xe->info.is_dgfx))
+		return -EINVAL;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.preferred_mem_class = value;
+		xe_bo_placement_for_flags(xe, bo, bo->flags);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
+				struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	if (XE_IOCTL_ERR(xe, value > xe->info.tile_count))
+		return -EINVAL;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.preferred_gt = value;
+		xe_bo_placement_for_flags(xe, bo, bo->flags);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_preferred_mem_class_gt(struct xe_device *xe,
+					  struct xe_vm *vm,
+					  struct xe_vma **vmas, int num_vmas,
+					  u64 value)
+{
+	int i, err;
+	u32 gt_id = upper_32_bits(value);
+	u32 mem_class = lower_32_bits(value);
+
+	if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM &&
+			 !xe->info.is_dgfx))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count))
+		return -EINVAL;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.preferred_mem_class = mem_class;
+		bo->props.preferred_gt = gt_id;
+		xe_bo_placement_for_flags(xe, bo, bo->flags);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
+			      struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
+			return -EINVAL;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.cpu_atomic = !!value;
+
+		/*
+		 * All future CPU accesses must be from system memory only, we
+		 * just invalidate the CPU page tables which will trigger a
+		 * migration on next access.
+		 */
+		if (bo->props.cpu_atomic)
+			ttm_bo_unmap_virtual(&bo->ttm);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
+				 struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
+				 !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
+			return -EINVAL;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->props.device_atomic = !!value;
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
+			    struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	int i, err;
+
+	if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH &&
+			 !capable(CAP_SYS_NICE)))
+		return -EPERM;
+
+	for (i = 0; i < num_vmas; ++i) {
+		struct xe_bo *bo;
+		struct ww_acquire_ctx ww;
+
+		bo = vmas[i]->bo;
+
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return err;
+		bo->ttm.priority = value;
+		ttm_bo_move_to_lru_tail(&bo->ttm);
+		xe_bo_unlock(bo, &ww);
+	}
+
+	return 0;
+}
+
+static int madvise_pin(struct xe_device *xe, struct xe_vm *vm,
+		       struct xe_vma **vmas, int num_vmas, u64 value)
+{
+	XE_WARN_ON("NIY");
+	return 0;
+}
+
+typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
+			    struct xe_vma **vmas, int num_vmas, u64 value);
+
+static const madvise_func madvise_funcs[] = {
+	[DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class,
+	[DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt,
+	[DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] =
+		madvise_preferred_mem_class_gt,
+	[DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic,
+	[DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic,
+	[DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority,
+	[DRM_XE_VM_MADVISE_PIN] = madvise_pin,
+};
+
+static struct xe_vma *node_to_vma(const struct rb_node *node)
+{
+	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
+	return (struct xe_vma *)node;
+}
+
+static struct xe_vma **
+get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
+{
+	struct xe_vma **vmas;
+	struct xe_vma *vma, *__vma, lookup;
+	int max_vmas = 8;
+	struct rb_node *node;
+
+	lockdep_assert_held(&vm->lock);
+
+	vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL);
+	if (!vmas)
+		return NULL;
+
+	lookup.start = addr;
+	lookup.end = addr + range - 1;
+
+	vma = xe_vm_find_overlapping_vma(vm, &lookup);
+	if (!vma)
+		return vmas;
+
+	if (!xe_vma_is_userptr(vma)) {
+		vmas[*num_vmas] = vma;
+		*num_vmas += 1;
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_next(node))) {
+		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
+			__vma = node_to_vma(node);
+			if (xe_vma_is_userptr(__vma))
+				continue;
+
+			if (*num_vmas == max_vmas) {
+				struct xe_vma **__vmas =
+					krealloc(vmas, max_vmas * sizeof(*vmas),
+						 GFP_KERNEL);
+
+				if (!__vmas)
+					return NULL;
+				vmas = __vmas;
+			}
+			vmas[*num_vmas] = __vma;
+			*num_vmas += 1;
+		} else {
+			break;
+		}
+	}
+
+	node = &vma->vm_node;
+	while ((node = rb_prev(node))) {
+		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
+			__vma = node_to_vma(node);
+			if (xe_vma_is_userptr(__vma))
+				continue;
+
+			if (*num_vmas == max_vmas) {
+				struct xe_vma **__vmas =
+					krealloc(vmas, max_vmas * sizeof(*vmas),
+						 GFP_KERNEL);
+
+				if (!__vmas)
+					return NULL;
+				vmas = __vmas;
+			}
+			vmas[*num_vmas] = __vma;
+			*num_vmas += 1;
+		} else {
+			break;
+		}
+	}
+
+	return vmas;
+}
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_madvise *args = data;
+	struct xe_vm *vm;
+	struct xe_vma **vmas = NULL;
+	int num_vmas = 0, err = 0, idx;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs)))
+		return -EINVAL;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+		err = -ENOENT;
+		goto put_vm;
+	}
+
+	if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) {
+		err = -EINVAL;
+		goto put_vm;
+	}
+
+	down_read(&vm->lock);
+
+	vmas = get_vmas(vm, &num_vmas, args->addr, args->range);
+	if (XE_IOCTL_ERR(xe, err))
+		goto unlock_vm;
+
+	if (XE_IOCTL_ERR(xe, !vmas)) {
+		err = -ENOMEM;
+		goto unlock_vm;
+	}
+
+	if (XE_IOCTL_ERR(xe, !num_vmas)) {
+		err = -EINVAL;
+		goto unlock_vm;
+	}
+
+	idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs));
+	err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value);
+
+unlock_vm:
+	up_read(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
+	kfree(vmas);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
new file mode 100644
index 0000000000000..eecd33acd2481
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_MADVISE_H_
+#define _XE_VM_MADVISE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
new file mode 100644
index 0000000000000..2a3b911ab3585
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_TYPES_H_
+#define _XE_VM_TYPES_H_
+
+#include <linux/dma-resv.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/scatterlist.h>
+
+#include "xe_device_types.h"
+#include "xe_pt_types.h"
+
+struct xe_bo;
+struct xe_vm;
+
+struct xe_vma {
+	struct rb_node vm_node;
+	/** @vm: VM which this VMA belongs to */
+	struct xe_vm *vm;
+
+	/**
+	 * @start: start address of this VMA within its address domain, end -
+	 * start + 1 == VMA size
+	 */
+	u64 start;
+	/** @end: end address of this VMA within its address domain */
+	u64 end;
+	/** @pte_flags: pte flags for this VMA */
+	u32 pte_flags;
+
+	/** @bo: BO if not a userptr, must be NULL is userptr */
+	struct xe_bo *bo;
+	/** @bo_offset: offset into BO if not a userptr, unused for userptr */
+	u64 bo_offset;
+
+	/** @gt_mask: GT mask of where to create binding for this VMA */
+	u64 gt_mask;
+
+	/**
+	 * @gt_present: GT mask of binding are present for this VMA.
+	 * protected by vm->lock, vm->resv and for userptrs,
+	 * vm->userptr.notifier_lock for writing. Needs either for reading,
+	 * but if reading is done under the vm->lock only, it needs to be held
+	 * in write mode.
+	 */
+	u64 gt_present;
+
+	/**
+	 * @destroyed: VMA is destroyed, in the sense that it shouldn't be
+	 * subject to rebind anymore. This field must be written under
+	 * the vm lock in write mode and the userptr.notifier_lock in
+	 * either mode. Read under the vm lock or the userptr.notifier_lock in
+	 * write mode.
+	 */
+	bool destroyed;
+
+	/**
+	 * @first_munmap_rebind: VMA is first in a sequence of ops that triggers
+	 * a rebind (munmap style VM unbinds). This indicates the operation
+	 * using this VMA must wait on all dma-resv slots (wait for pending jobs
+	 * / trigger preempt fences).
+	 */
+	bool first_munmap_rebind;
+
+	/**
+	 * @last_munmap_rebind: VMA is first in a sequence of ops that triggers
+	 * a rebind (munmap style VM unbinds). This indicates the operation
+	 * using this VMA must install itself into kernel dma-resv slot (blocks
+	 * future jobs) and kick the rebind work in compute mode.
+	 */
+	bool last_munmap_rebind;
+
+	/** @use_atomic_access_pte_bit: Set atomic access bit in PTE */
+	bool use_atomic_access_pte_bit;
+
+	union {
+		/** @bo_link: link into BO if not a userptr */
+		struct list_head bo_link;
+		/** @userptr_link: link into VM repin list if userptr */
+		struct list_head userptr_link;
+	};
+
+	/**
+	 * @rebind_link: link into VM if this VMA needs rebinding, and
+	 * if it's a bo (not userptr) needs validation after a possible
+	 * eviction. Protected by the vm's resv lock.
+	 */
+	struct list_head rebind_link;
+
+	/**
+	 * @unbind_link: link or list head if an unbind of multiple VMAs, in
+	 * single unbind op, is being done.
+	 */
+	struct list_head unbind_link;
+
+	/** @destroy_cb: callback to destroy VMA when unbind job is done */
+	struct dma_fence_cb destroy_cb;
+
+	/** @destroy_work: worker to destroy this BO */
+	struct work_struct destroy_work;
+
+	/** @userptr: user pointer state */
+	struct {
+		/** @ptr: user pointer */
+		uintptr_t ptr;
+		/** @invalidate_link: Link for the vm::userptr.invalidated list */
+		struct list_head invalidate_link;
+		/**
+		 * @notifier: MMU notifier for user pointer (invalidation call back)
+		 */
+		struct mmu_interval_notifier notifier;
+		/** @sgt: storage for a scatter gather table */
+		struct sg_table sgt;
+		/** @sg: allocated scatter gather table */
+		struct sg_table *sg;
+		/** @notifier_seq: notifier sequence number */
+		unsigned long notifier_seq;
+		/**
+		 * @initial_bind: user pointer has been bound at least once.
+		 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
+		 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+		 */
+		bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+		u32 divisor;
+#endif
+	} userptr;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @gt_invalidated: VMA has been invalidated */
+		u64 gt_invalidated;
+	} usm;
+
+	struct {
+		struct list_head rebind_link;
+	} notifier;
+
+	struct {
+		/**
+		 * @extobj.link: Link into vm's external object list.
+		 * protected by the vm lock.
+		 */
+		struct list_head link;
+	} extobj;
+};
+
+struct xe_device;
+
+#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
+
+struct xe_vm {
+	struct xe_device *xe;
+
+	struct kref refcount;
+
+	/* engine used for (un)binding vma's */
+	struct xe_engine *eng[XE_MAX_GT];
+
+	/** Protects @rebind_list and the page-table structures */
+	struct dma_resv resv;
+
+	u64 size;
+	struct rb_root vmas;
+
+	struct xe_pt *pt_root[XE_MAX_GT];
+	struct xe_bo *scratch_bo[XE_MAX_GT];
+	struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL];
+
+	/** @flags: flags for this VM, statically setup a creation time */
+#define XE_VM_FLAGS_64K			BIT(0)
+#define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
+#define XE_VM_FLAG_ASYNC_BIND_OPS	BIT(2)
+#define XE_VM_FLAG_MIGRATION		BIT(3)
+#define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
+#define XE_VM_FLAG_FAULT_MODE		BIT(5)
+#define XE_VM_FLAG_GT_ID(flags)		(((flags) >> 6) & 0x3)
+#define XE_VM_FLAG_SET_GT_ID(gt)	((gt)->info.id << 6)
+	unsigned long flags;
+
+	/** @composite_fence_ctx: context composite fence */
+	u64 composite_fence_ctx;
+	/** @composite_fence_seqno: seqno for composite fence */
+	u32 composite_fence_seqno;
+
+	/**
+	 * @lock: outer most lock, protects objects of anything attached to this
+	 * VM
+	 */
+	struct rw_semaphore lock;
+
+	/**
+	 * @rebind_list: list of VMAs that need rebinding, and if they are
+	 * bos (not userptr), need validation after a possible eviction. The
+	 * list is protected by @resv.
+	 */
+	struct list_head rebind_list;
+
+	/** @rebind_fence: rebind fence from execbuf */
+	struct dma_fence *rebind_fence;
+
+	/**
+	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+	 * from an irq context can be last put and the destroy needs to be able
+	 * to sleep.
+	 */
+	struct work_struct destroy_work;
+
+	/** @extobj: bookkeeping for external objects. Protected by the vm lock */
+	struct {
+		/** @enties: number of external BOs attached this VM */
+		u32 entries;
+		/** @list: list of vmas with external bos attached */
+		struct list_head list;
+	} extobj;
+
+	/** @async_ops: async VM operations (bind / unbinds) */
+	struct {
+		/** @list: list of pending async VM ops */
+		struct list_head pending;
+		/** @work: worker to execute async VM ops */
+		struct work_struct work;
+		/** @lock: protects list of pending async VM ops and fences */
+		spinlock_t lock;
+		/** @error_capture: error capture state */
+		struct {
+			/** @mm: user MM */
+			struct mm_struct *mm;
+			/**
+			 * @addr: user pointer to copy error capture state too
+			 */
+			u64 addr;
+			/** @wq: user fence wait queue for VM errors */
+			wait_queue_head_t wq;
+		} error_capture;
+		/** @fence: fence state */
+		struct {
+			/** @context: context of async fence */
+			u64 context;
+			/** @seqno: seqno of async fence */
+			u32 seqno;
+		} fence;
+		/** @error: error state for async VM ops */
+		int error;
+		/**
+		 * @munmap_rebind_inflight: an munmap style VM bind is in the
+		 * middle of a set of ops which requires a rebind at the end.
+		 */
+		bool munmap_rebind_inflight;
+	} async_ops;
+
+	/** @userptr: user pointer state */
+	struct {
+		/**
+		 * @userptr.repin_list: list of VMAs which are user pointers,
+		 * and needs repinning. Protected by @lock.
+		 */
+		struct list_head repin_list;
+		/**
+		 * @notifier_lock: protects notifier in write mode and
+		 * submission in read mode.
+		 */
+		struct rw_semaphore notifier_lock;
+		/**
+		 * @userptr.invalidated_lock: Protects the
+		 * @userptr.invalidated list.
+		 */
+		spinlock_t invalidated_lock;
+		/**
+		 * @userptr.invalidated: List of invalidated userptrs, not yet
+		 * picked
+		 * up for revalidation. Protected from access with the
+		 * @invalidated_lock. Removing items from the list
+		 * additionally requires @lock in write mode, and adding
+		 * items to the list requires the @userptr.notifer_lock in
+		 * write mode.
+		 */
+		struct list_head invalidated;
+	} userptr;
+
+	/** @preempt: preempt state */
+	struct {
+		/**
+		 * @min_run_period_ms: The minimum run period before preempting
+		 * an engine again
+		 */
+		s64 min_run_period_ms;
+		/** @engines: list of engines attached to this VM */
+		struct list_head engines;
+		/** @num_engines: number user engines attached to this VM */
+		int num_engines;
+		/**
+		 * @rebind_work: worker to rebind invalidated userptrs / evicted
+		 * BOs
+		 */
+		struct work_struct rebind_work;
+	} preempt;
+
+	/** @um: unified memory state */
+	struct {
+		/** @asid: address space ID, unique to each VM */
+		u32 asid;
+		/**
+		 * @last_fault_vma: Last fault VMA, used for fast lookup when we
+		 * get a flood of faults to the same VMA
+		 */
+		struct xe_vma *last_fault_vma;
+	} usm;
+
+	/**
+	 * @notifier: Lists and locks for temporary usage within notifiers where
+	 * we either can't grab the vm lock or the vm resv.
+	 */
+	struct {
+		/** @notifier.list_lock: lock protecting @rebind_list */
+		spinlock_t list_lock;
+		/**
+		 * @notifier.rebind_list: list of vmas that we want to put on the
+		 * main @rebind_list. This list is protected for writing by both
+		 * notifier.list_lock, and the resv of the bo the vma points to,
+		 * and for reading by the notifier.list_lock only.
+		 */
+		struct list_head rebind_list;
+	} notifier;
+
+	/** @error_capture: allow to track errors */
+	struct {
+		/** @capture_once: capture only one error per VM */
+		bool capture_once;
+	} error_capture;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
new file mode 100644
index 0000000000000..b56141ba7145d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include <linux/compiler_types.h>
+
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+#include "xe_step.h"
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * Hardware workarounds are register programming documented to be executed in
+ * the driver that fall outside of the normal programming sequences for a
+ * platform. There are some basic categories of workarounds, depending on
+ * how/when they are applied:
+ *
+ * - LRC workarounds: workarounds that touch registers that are
+ *   saved/restored to/from the HW context image. The list is emitted (via Load
+ *   Register Immediate commands) once when initializing the device and saved in
+ *   the default context. That default context is then used on every context
+ *   creation to have a "primed golden context", i.e. a context image that
+ *   already contains the changes needed to all the registers.
+ *
+ *   TODO: Although these workarounds are maintained here, they are not
+ *   currently being applied.
+ *
+ * - Engine workarounds: the list of these WAs is applied whenever the specific
+ *   engine is reset. It's also possible that a set of engine classes share a
+ *   common power domain and they are reset together. This happens on some
+ *   platforms with render and compute engines. In this case (at least) one of
+ *   them need to keeep the workaround programming: the approach taken in the
+ *   driver is to tie those workarounds to the first compute/render engine that
+ *   is registered.  When executing with GuC submission, engine resets are
+ *   outside of kernel driver control, hence the list of registers involved in
+ *   written once, on engine initialization, and then passed to GuC, that
+ *   saves/restores their values before/after the reset takes place. See
+ *   ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ *
+ * - GT workarounds: the list of these WAs is applied whenever these registers
+ *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+ *
+ * - Register whitelist: some workarounds need to be implemented in userspace,
+ *   but need to touch privileged registers. The whitelist in the kernel
+ *   instructs the hardware to allow the access to happen. From the kernel side,
+ *   this is just a special case of a MMIO workaround (as we write the list of
+ *   these to/be-whitelisted registers to some special HW registers).
+ *
+ * - Workaround batchbuffers: buffers that get executed automatically by the
+ *   hardware on every HW context restore. These buffers are created and
+ *   programmed in the default context so the hardware always go through those
+ *   programming sequences when switching contexts. The support for workaround
+ *   batchbuffers is enabled these hardware mechanisms:
+ *
+ *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
+ *      context, pointing the hardware to jump to that location when that offset
+ *      is reached in the context restore. Workaround batchbuffer in the driver
+ *      currently uses this mechanism for all platforms.
+ *
+ *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
+ *      pointing the hardware to a buffer to continue executing after the
+ *      engine registers are restored in a context restore sequence. This is
+ *      currently not used in the driver.
+ *
+ * - Other:  There are WAs that, due to their nature, cannot be applied from a
+ *   central place. Those are peppered around the rest of the code, as needed.
+ *   Workarounds related to the display IP are the main example.
+ *
+ * .. [1] Technically, some registers are powercontext saved & restored, so they
+ *    survive a suspend/resume. In practice, writing them again is not too
+ *    costly and simplifies things, so it's the approach taken in the driver.
+ *
+ * .. note::
+ *    Hardware workarounds in xe work the same way as in i915, with the
+ *    difference of how they are maintained in the code. In xe it uses the
+ *    xe_rtp infrastructure so the workarounds can be kept in tables, following
+ *    a more declarative approach rather than procedural.
+ */
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+static bool match_14011060649(const struct xe_gt *gt,
+			      const struct xe_hw_engine *hwe)
+{
+	return hwe->instance % 2 == 0;
+}
+
+static const struct xe_rtp_entry gt_was[] = {
+	{ XE_RTP_NAME("14011060649"),
+	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
+		       ENGINE_CLASS(VIDEO_DECODE),
+		       FUNC(match_14011060649)),
+	  XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS,
+		     XE_RTP_FLAG(FOREACH_ENGINE))
+	},
+	{ XE_RTP_NAME("16010515920"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+		       STEP(A0, B0),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS,
+		     XE_RTP_FLAG(FOREACH_ENGINE))
+	},
+	{ XE_RTP_NAME("22010523718"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011006942"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14010948348"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011037102"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011371254"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011431319/0"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9440,
+		     GAMTLBOACS_CLKGATE_DIS |
+		     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
+		     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
+		     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
+		     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
+		     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
+		     GAMTLBBLT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011431319/1"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLCGCTL9444,
+		     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
+		     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
+		     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
+		     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
+		     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
+		     GAMTLBMERT_CLKGATE_DIS |
+		     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
+		     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14010569222"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14011028019"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("14014830051"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN)
+	},
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)
+	},
+	{ XE_RTP_NAME("14011059788"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)
+	},
+	{ XE_RTP_NAME("1409420604"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)
+	},
+	{ XE_RTP_NAME("1408615072"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)
+	},
+	{}
+};
+
+static const struct xe_rtp_entry engine_was[] = {
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1606931601"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE)
+	},
+	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("18019627453"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1409804808"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("14010229206, 1409085225"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
+		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
+		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("1406941453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
+	  XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{}
+};
+
+static const struct xe_rtp_entry lrc_was[] = {
+	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3,
+		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
+			   XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("16011163337"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  /* read verification is ignored due to 1608008084. */
+	  XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK,
+					FF_MODE2_GS_TIMER_224)
+	},
+	{ XE_RTP_NAME("1409044764"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3,
+		     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{ XE_RTP_NAME("22010493298"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_SET(HIZ_CHICKEN,
+		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
+		     XE_RTP_FLAG(MASKED_REG))
+	},
+	{}
+};
+
+/**
+ * xe_wa_process_gt - process GT workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process GT workaround table for this platform, saving in @gt all the
+ * workarounds that need to be applied at the GT level.
+ */
+void xe_wa_process_gt(struct xe_gt *gt)
+{
+	xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL);
+}
+
+/**
+ * xe_wa_process_engine - process engine workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process engine workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied at the engine level that match this
+ * engine.
+ */
+void xe_wa_process_engine(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
+}
+
+/**
+ * xe_wa_process_lrc - process context workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process context workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied on context restore. These are workarounds
+ * touching registers that are part of the HW context image.
+ */
+void xe_wa_process_lrc(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
new file mode 100644
index 0000000000000..cd2307d587950
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WA_
+#define _XE_WA_
+
+struct xe_gt;
+struct xe_hw_engine;
+
+void xe_wa_process_gt(struct xe_gt *gt);
+void xe_wa_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_process_lrc(struct xe_hw_engine *hwe);
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
new file mode 100644
index 0000000000000..8a8d814a0e7aa
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_vm.h"
+
+static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
+{
+	u64 rvalue;
+	int err;
+	bool passed;
+
+	err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue));
+	if (err)
+		return -EFAULT;
+
+	switch (op) {
+	case DRM_XE_UFENCE_WAIT_EQ:
+		passed = (rvalue & mask) == (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_NEQ:
+		passed = (rvalue & mask) != (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_GT:
+		passed = (rvalue & mask) > (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_GTE:
+		passed = (rvalue & mask) >= (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_LT:
+		passed = (rvalue & mask) < (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_LTE:
+		passed = (rvalue & mask) <= (value & mask);
+		break;
+	default:
+		XE_BUG_ON("Not possible");
+	}
+
+	return passed ? 0 : 1;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+int check_hw_engines(struct xe_device *xe,
+		     struct drm_xe_engine_class_instance *eci,
+		     int num_engines)
+{
+	int i;
+
+	for (i = 0; i < num_engines; ++i) {
+		enum xe_engine_class user_class =
+			user_to_xe_engine_class[eci[i].engine_class];
+
+		if (eci[i].gt_id >= xe->info.tile_count)
+			return -EINVAL;
+
+		if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id),
+				     user_class, eci[i].engine_instance, true))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_SOFT_OP | \
+			 DRM_XE_UFENCE_WAIT_ABSTIME | \
+			 DRM_XE_UFENCE_WAIT_VM_ERROR)
+#define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
+	struct drm_xe_wait_user_fence *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_vm *vm = NULL;
+	u64 addr = args->addr;
+	int err;
+	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
+		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
+	unsigned long timeout = args->timeout;
+
+	if (XE_IOCTL_ERR(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, args->op > MAX_OP))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, no_engines &&
+			 (args->num_engines || args->instances)))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines))
+		return -EINVAL;
+
+	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
+			 addr & 0x7))
+		return -EINVAL;
+
+	if (!no_engines) {
+		err = copy_from_user(eci, user_eci,
+				     sizeof(struct drm_xe_engine_class_instance) *
+			     args->num_engines);
+		if (XE_IOCTL_ERR(xe, err))
+			return -EFAULT;
+
+		if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci,
+						      args->num_engines)))
+			return -EINVAL;
+	}
+
+	if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) {
+		if (XE_IOCTL_ERR(xe, args->vm_id >> 32))
+			return -EINVAL;
+
+		vm = xe_vm_lookup(to_xe_file(file), args->vm_id);
+		if (XE_IOCTL_ERR(xe, !vm))
+			return -ENOENT;
+
+		if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) {
+			xe_vm_put(vm);
+			return -ENOTSUPP;
+		}
+
+		addr = vm->async_ops.error_capture.addr;
+	}
+
+	if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	/*
+	 * FIXME: Very simple implementation at the moment, single wait queue
+	 * for everything. Could be optimized to have a wait queue for every
+	 * hardware engine. Open coding as 'do_compare' can sleep which doesn't
+	 * work with the wait_event_* macros.
+	 */
+	if (vm)
+		add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
+	else
+		add_wait_queue(&xe->ufence_wq, &w_wait);
+	for (;;) {
+		if (vm && xe_vm_is_closed(vm)) {
+			err = -ENODEV;
+			break;
+		}
+		err = do_compare(addr, args->value, args->mask, args->op);
+		if (err <= 0)
+			break;
+
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+
+		if (!timeout) {
+			err = -ETIME;
+			break;
+		}
+
+		timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
+	}
+	if (vm) {
+		remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
+		xe_vm_put(vm);
+	} else {
+		remove_wait_queue(&xe->ufence_wq, &w_wait);
+	}
+	if (XE_IOCTL_ERR(xe, err < 0))
+		return err;
+	else if (XE_IOCTL_ERR(xe, !timeout))
+		return -ETIME;
+
+	/*
+	 * Again very simple, return the time in jiffies that has past, may need
+	 * a more precision
+	 */
+	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
+		args->timeout = args->timeout - timeout;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
new file mode 100644
index 0000000000000..0e268978f9e6c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WAIT_USER_FENCE_H_
+#define _XE_WAIT_USER_FENCE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
new file mode 100644
index 0000000000000..e4a8d4a1899e8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_reg.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+#include "i915_utils.h"
+
+/**
+ * DOC: Write Once Protected Content Memory (WOPCM) Layout
+ *
+ * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and
+ * offset registers whose values are calculated and determined by HuC/GuC
+ * firmware size and set of hardware requirements/restrictions as shown below:
+ *
+ * ::
+ *
+ *    +=========> +====================+ <== WOPCM Top
+ *    ^           |  HW contexts RSVD  |
+ *    |     +===> +====================+ <== GuC WOPCM Top
+ *    |     ^     |                    |
+ *    |     |     |                    |
+ *    |     |     |                    |
+ *    |    GuC    |                    |
+ *    |   WOPCM   |                    |
+ *    |    Size   +--------------------+
+ *  WOPCM   |     |    GuC FW RSVD     |
+ *    |     |     +--------------------+
+ *    |     |     |   GuC Stack RSVD   |
+ *    |     |     +------------------- +
+ *    |     v     |   GuC WOPCM RSVD   |
+ *    |     +===> +====================+ <== GuC WOPCM base
+ *    |           |     WOPCM RSVD     |
+ *    |           +------------------- + <== HuC Firmware Top
+ *    v           |      HuC FW        |
+ *    +=========> +====================+ <== WOPCM Base
+ *
+ * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top.
+ * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6
+ * context).
+ */
+
+/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
+#define DGFX_WOPCM_SIZE			SZ_4M	/* FIXME: Larger size require
+						   for 2 tile PVC, do a proper
+						   probe sooner or later */
+#define MTL_WOPCM_SIZE			SZ_4M	/* FIXME: Larger size require
+						   for MTL, do a proper probe
+						   sooner or later */
+#define GEN11_WOPCM_SIZE		SZ_2M
+/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
+#define WOPCM_RESERVED_SIZE		SZ_16K
+
+/* 16KB reserved at the beginning of GuC WOPCM. */
+#define GUC_WOPCM_RESERVED		SZ_16K
+/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
+#define GUC_WOPCM_STACK_RESERVED	SZ_8K
+
+/* GuC WOPCM Offset value needs to be aligned to 16KB. */
+#define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
+
+/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */
+#define GEN11_WOPCM_HW_CTX_RESERVED	(SZ_32K + SZ_4K)
+
+static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm)
+{
+	return container_of(wopcm, struct xe_gt, uc.wopcm);
+}
+
+static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm)
+{
+	return gt_to_xe(wopcm_to_gt(wopcm));
+}
+
+static u32 context_reserved_size(void)
+{
+	return GEN11_WOPCM_HW_CTX_RESERVED;
+}
+
+static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
+			   u32 guc_wopcm_base, u32 guc_wopcm_size,
+			   u32 guc_fw_size, u32 huc_fw_size)
+{
+	const u32 ctx_rsvd = context_reserved_size();
+	u32 size;
+
+	size = wopcm_size - ctx_rsvd;
+	if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) {
+		drm_err(&xe->drm,
+			"WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
+			size / SZ_1K);
+		return false;
+	}
+
+	size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
+	if (unlikely(guc_wopcm_size < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC),
+			guc_wopcm_size / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	size = huc_fw_size + WOPCM_RESERVED_SIZE;
+	if (unlikely(guc_wopcm_base < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC),
+			guc_wopcm_base / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	return true;
+}
+
+static bool __wopcm_regs_locked(struct xe_gt *gt,
+				u32 *guc_wopcm_base, u32 *guc_wopcm_size)
+{
+	u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg);
+	u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg);
+
+	if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
+	    !(reg_base & GUC_WOPCM_OFFSET_VALID))
+		return false;
+
+	*guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK;
+	*guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK;
+	return true;
+}
+
+static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
+			     struct xe_wopcm *wopcm)
+{
+	u32 base = wopcm->guc.base;
+	u32 size = wopcm->guc.size;
+	u32 huc_agent = xe_uc_fw_is_disabled(&gt->uc.huc.fw) ? 0 :
+		HUC_LOADING_AGENT_GUC;
+	u32 mask;
+	int err;
+
+	XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+	XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+	XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
+	XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
+
+	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
+	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask,
+					 size | GUC_WOPCM_SIZE_LOCKED);
+	if (err)
+		goto err_out;
+
+	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+	err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg,
+					 base | huc_agent, mask,
+					 base | huc_agent |
+					 GUC_WOPCM_OFFSET_VALID);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
+		   DMA_GUC_WOPCM_OFFSET.reg,
+		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg));
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
+		   GUC_WOPCM_SIZE.reg,
+		   xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg));
+
+	return err;
+}
+
+u32 xe_wopcm_size(struct xe_device *xe)
+{
+	return IS_DGFX(xe) ? DGFX_WOPCM_SIZE :
+		xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE :
+		GEN11_WOPCM_SIZE;
+}
+
+/**
+ * xe_wopcm_init() - Initialize the WOPCM structure.
+ * @wopcm: pointer to xe_wopcm.
+ *
+ * This function will partition WOPCM space based on GuC and HuC firmware sizes
+ * and will allocate max remaining for use by GuC. This function will also
+ * enforce platform dependent hardware restrictions on GuC WOPCM offset and
+ * size. It will fail the WOPCM init if any of these checks fail, so that the
+ * following WOPCM registers setup and GuC firmware uploading would be aborted.
+ */
+int xe_wopcm_init(struct xe_wopcm *wopcm)
+{
+	struct xe_device *xe = wopcm_to_xe(wopcm);
+	struct xe_gt *gt = wopcm_to_gt(wopcm);
+	u32 guc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.guc.fw);
+	u32 huc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.huc.fw);
+	u32 ctx_rsvd = context_reserved_size();
+	u32 guc_wopcm_base;
+	u32 guc_wopcm_size;
+	bool locked;
+	int ret = 0;
+
+	if (!guc_fw_size)
+		return -EINVAL;
+
+	wopcm->size = xe_wopcm_size(xe);
+	drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	XE_BUG_ON(guc_fw_size >= wopcm->size);
+	XE_BUG_ON(huc_fw_size >= wopcm->size);
+	XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+
+	locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size);
+	if (locked) {
+		drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+		goto check;
+	}
+
+	/*
+	 * Aligned value of guc_wopcm_base will determine available WOPCM space
+	 * for HuC firmware and mandatory reserved area.
+	 */
+	guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE;
+	guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT);
+
+	/*
+	 * Need to clamp guc_wopcm_base now to make sure the following math is
+	 * correct. Formal check of whole WOPCM layout will be done below.
+	 */
+	guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+
+	/* Aligned remainings of usable WOPCM space can be assigned to GuC. */
+	guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
+
+	drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
+		guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+
+check:
+	if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+			   guc_fw_size, huc_fw_size)) {
+		wopcm->guc.base = guc_wopcm_base;
+		wopcm->guc.size = guc_wopcm_size;
+		XE_BUG_ON(!wopcm->guc.base);
+		XE_BUG_ON(!wopcm->guc.size);
+	} else {
+		drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n");
+		return -E2BIG;
+	}
+
+	if (!locked)
+		ret = __wopcm_init_regs(xe, gt, wopcm);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h
new file mode 100644
index 0000000000000..0197a282460be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_H_
+#define _XE_WOPCM_H_
+
+#include "xe_wopcm_types.h"
+
+struct xe_device;
+
+int xe_wopcm_init(struct xe_wopcm *wopcm);
+u32 xe_wopcm_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
new file mode 100644
index 0000000000000..486d850c40847
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_TYPES_H_
+#define _XE_WOPCM_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_wopcm - Overall WOPCM info and WOPCM regions.
+ */
+struct xe_wopcm {
+	/** @size: Size of overall WOPCM */
+	u32 size;
+	/** @guc: GuC WOPCM Region info */
+	struct {
+		/** @base: GuC WOPCM base which is offset from WOPCM base */
+		u32 base;
+		/** @size: Size of the GuC WOPCM region */
+		u32 size;
+	} guc;
+};
+
+#endif
diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
new file mode 100644
index 0000000000000..e539594ed9391
--- /dev/null
+++ b/include/drm/xe_pciids.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCIIDS_H_
+#define _XE_PCIIDS_H_
+
+/*
+ * Lists below can be turned into initializers for a struct pci_device_id
+ * by defining INTEL_VGA_DEVICE:
+ *
+ * #define INTEL_VGA_DEVICE(id, info) { \
+ *	0x8086, id,			\
+ *	~0, ~0,				\
+ *	0x030000, 0xff0000,		\
+ *	(unsigned long) info }
+ *
+ * And then calling like:
+ *
+ * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__)
+ *
+ * To turn them into something else, just provide a different macro passed as
+ * first argument.
+ */
+
+/* TGL */
+#define XE_TGL_GT1_IDS(MACRO__, ...)		\
+	MACRO__(0x9A60, ## __VA_ARGS__),	\
+	MACRO__(0x9A68, ## __VA_ARGS__),	\
+	MACRO__(0x9A70, ## __VA_ARGS__)
+
+#define XE_TGL_GT2_IDS(MACRO__, ...)		\
+	MACRO__(0x9A40, ## __VA_ARGS__),	\
+	MACRO__(0x9A49, ## __VA_ARGS__),	\
+	MACRO__(0x9A59, ## __VA_ARGS__),	\
+	MACRO__(0x9A78, ## __VA_ARGS__),	\
+	MACRO__(0x9AC0, ## __VA_ARGS__),	\
+	MACRO__(0x9AC9, ## __VA_ARGS__),	\
+	MACRO__(0x9AD9, ## __VA_ARGS__),	\
+	MACRO__(0x9AF8, ## __VA_ARGS__)
+
+#define XE_TGL_IDS(MACRO__, ...)		\
+	XE_TGL_GT1_IDS(MACRO__, ...),		\
+	XE_TGL_GT2_IDS(MACRO__, ...)
+
+/* RKL */
+#define XE_RKL_IDS(MACRO__, ...)		\
+	MACRO__(0x4C80, ## __VA_ARGS__),	\
+	MACRO__(0x4C8A, ## __VA_ARGS__),	\
+	MACRO__(0x4C8B, ## __VA_ARGS__),	\
+	MACRO__(0x4C8C, ## __VA_ARGS__),	\
+	MACRO__(0x4C90, ## __VA_ARGS__),	\
+	MACRO__(0x4C9A, ## __VA_ARGS__)
+
+/* DG1 */
+#define XE_DG1_IDS(MACRO__, ...)		\
+	MACRO__(0x4905, ## __VA_ARGS__),	\
+	MACRO__(0x4906, ## __VA_ARGS__),	\
+	MACRO__(0x4907, ## __VA_ARGS__),	\
+	MACRO__(0x4908, ## __VA_ARGS__),	\
+	MACRO__(0x4909, ## __VA_ARGS__)
+
+/* ADL-S */
+#define XE_ADLS_IDS(MACRO__, ...)		\
+	MACRO__(0x4680, ## __VA_ARGS__),	\
+	MACRO__(0x4682, ## __VA_ARGS__),	\
+	MACRO__(0x4688, ## __VA_ARGS__),	\
+	MACRO__(0x468A, ## __VA_ARGS__),	\
+	MACRO__(0x4690, ## __VA_ARGS__),	\
+	MACRO__(0x4692, ## __VA_ARGS__),	\
+	MACRO__(0x4693, ## __VA_ARGS__)
+
+/* ADL-P */
+#define XE_ADLP_IDS(MACRO__, ...)		\
+	MACRO__(0x46A0, ## __VA_ARGS__),	\
+	MACRO__(0x46A1, ## __VA_ARGS__),	\
+	MACRO__(0x46A2, ## __VA_ARGS__),	\
+	MACRO__(0x46A3, ## __VA_ARGS__),	\
+	MACRO__(0x46A6, ## __VA_ARGS__),	\
+	MACRO__(0x46A8, ## __VA_ARGS__),	\
+	MACRO__(0x46AA, ## __VA_ARGS__),	\
+	MACRO__(0x462A, ## __VA_ARGS__),	\
+	MACRO__(0x4626, ## __VA_ARGS__),	\
+	MACRO__(0x4628, ## __VA_ARGS__),	\
+	MACRO__(0x46B0, ## __VA_ARGS__),	\
+	MACRO__(0x46B1, ## __VA_ARGS__),	\
+	MACRO__(0x46B2, ## __VA_ARGS__),	\
+	MACRO__(0x46B3, ## __VA_ARGS__),	\
+	MACRO__(0x46C0, ## __VA_ARGS__),	\
+	MACRO__(0x46C1, ## __VA_ARGS__),	\
+	MACRO__(0x46C2, ## __VA_ARGS__),	\
+	MACRO__(0x46C3, ## __VA_ARGS__)
+
+/* ADL-N */
+#define XE_ADLN_IDS(MACRO__, ...)		\
+	MACRO__(0x46D0, ## __VA_ARGS__),	\
+	MACRO__(0x46D1, ## __VA_ARGS__),	\
+	MACRO__(0x46D2, ## __VA_ARGS__)
+
+/* RPL-S */
+#define XE_RPLS_IDS(MACRO__, ...)		\
+	MACRO__(0xA780, ## __VA_ARGS__),	\
+	MACRO__(0xA781, ## __VA_ARGS__),	\
+	MACRO__(0xA782, ## __VA_ARGS__),	\
+	MACRO__(0xA783, ## __VA_ARGS__),	\
+	MACRO__(0xA788, ## __VA_ARGS__),	\
+	MACRO__(0xA789, ## __VA_ARGS__),	\
+	MACRO__(0xA78A, ## __VA_ARGS__),	\
+	MACRO__(0xA78B, ## __VA_ARGS__)
+
+/* RPL-U */
+#define XE_RPLU_IDS(MACRO__, ...)		\
+	MACRO__(0xA721, ## __VA_ARGS__),	\
+	MACRO__(0xA7A1, ## __VA_ARGS__),	\
+	MACRO__(0xA7A9, ## __VA_ARGS__)
+
+/* RPL-P */
+#define XE_RPLP_IDS(MACRO__, ...)		\
+	MACRO__(0xA720, ## __VA_ARGS__),	\
+	MACRO__(0xA7A0, ## __VA_ARGS__),	\
+	MACRO__(0xA7A8, ## __VA_ARGS__)
+
+/* DG2 */
+#define XE_DG2_G10_IDS(MACRO__, ...)		\
+	MACRO__(0x5690, ## __VA_ARGS__),	\
+	MACRO__(0x5691, ## __VA_ARGS__),	\
+	MACRO__(0x5692, ## __VA_ARGS__),	\
+	MACRO__(0x56A0, ## __VA_ARGS__),	\
+	MACRO__(0x56A1, ## __VA_ARGS__),	\
+	MACRO__(0x56A2, ## __VA_ARGS__)
+
+#define XE_DG2_G11_IDS(MACRO__, ...)		\
+	MACRO__(0x5693, ## __VA_ARGS__),	\
+	MACRO__(0x5694, ## __VA_ARGS__),	\
+	MACRO__(0x5695, ## __VA_ARGS__),	\
+	MACRO__(0x5698, ## __VA_ARGS__),	\
+	MACRO__(0x56A5, ## __VA_ARGS__),	\
+	MACRO__(0x56A6, ## __VA_ARGS__),	\
+	MACRO__(0x56B0, ## __VA_ARGS__),	\
+	MACRO__(0x56B1, ## __VA_ARGS__)
+
+#define XE_DG2_G12_IDS(MACRO__, ...)		\
+	MACRO__(0x5696, ## __VA_ARGS__),	\
+	MACRO__(0x5697, ## __VA_ARGS__),	\
+	MACRO__(0x56A3, ## __VA_ARGS__),	\
+	MACRO__(0x56A4, ## __VA_ARGS__),	\
+	MACRO__(0x56B2, ## __VA_ARGS__),	\
+	MACRO__(0x56B3, ## __VA_ARGS__)
+
+#define XE_DG2_IDS(MACRO__, ...)		\
+	XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
+
+#define XE_ATS_M150_IDS(MACRO__, ...)		\
+	MACRO__(0x56C0, ## __VA_ARGS__)
+
+#define XE_ATS_M75_IDS(MACRO__, ...)		\
+	MACRO__(0x56C1, ## __VA_ARGS__)
+
+#define XE_ATS_M_IDS(MACRO__, ...)		\
+	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
+
+/* MTL */
+#define XE_MTL_M_IDS(MACRO__, ...)		\
+	MACRO__(0x7D40, ## __VA_ARGS__),	\
+	MACRO__(0x7D43, ## __VA_ARGS__),	\
+	MACRO__(0x7DC0, ## __VA_ARGS__)
+
+#define XE_MTL_P_IDS(MACRO__, ...)		\
+	MACRO__(0x7D45, ## __VA_ARGS__),	\
+	MACRO__(0x7D47, ## __VA_ARGS__),	\
+	MACRO__(0x7D50, ## __VA_ARGS__),	\
+	MACRO__(0x7D55, ## __VA_ARGS__),	\
+	MACRO__(0x7DC5, ## __VA_ARGS__),	\
+	MACRO__(0x7DD0, ## __VA_ARGS__),	\
+	MACRO__(0x7DD5, ## __VA_ARGS__)
+
+#define XE_MTL_S_IDS(MACRO__, ...)		\
+	MACRO__(0x7D60, ## __VA_ARGS__),	\
+	MACRO__(0x7DE0, ## __VA_ARGS__)
+
+#define XE_ARL_IDS(MACRO__, ...)		\
+	MACRO__(0x7D66, ## __VA_ARGS__),	\
+	MACRO__(0x7D76, ## __VA_ARGS__)
+
+#define XE_MTL_IDS(MACRO__, ...)		\
+	XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__),	\
+	XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__),	\
+	XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__),	\
+	XE_ARL_IDS(MACRO__, ## __VA_ARGS__)
+
+#endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
new file mode 100644
index 0000000000000..f64b1c785fad9
--- /dev/null
+++ b/include/uapi/drm/xe_drm.h
@@ -0,0 +1,787 @@
+/*
+ * Copyright 2021 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _UAPI_XE_DRM_H_
+#define _UAPI_XE_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints.
+ */
+
+/**
+ * struct i915_user_extension - Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibility, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ *
+ * Example chaining:
+ *
+ * .. code-block:: C
+ *
+ *	struct i915_user_extension ext3 {
+ *		.next_extension = 0, // end
+ *		.name = ...,
+ *	};
+ *	struct i915_user_extension ext2 {
+ *		.next_extension = (uintptr_t)&ext3,
+ *		.name = ...,
+ *	};
+ *	struct i915_user_extension ext1 {
+ *		.next_extension = (uintptr_t)&ext2,
+ *		.name = ...,
+ *	};
+ *
+ * Typically the struct i915_user_extension would be embedded in some uAPI
+ * struct, and in this case we would feed it the head of the chain(i.e ext1),
+ * which would then apply all of the above extensions.
+ *
+ */
+struct xe_user_extension {
+	/**
+	 * @next_extension:
+	 *
+	 * Pointer to the next struct i915_user_extension, or zero if the end.
+	 */
+	__u64 next_extension;
+	/**
+	 * @name: Name of the extension.
+	 *
+	 * Note that the name here is just some integer.
+	 *
+	 * Also note that the name space for this is not global for the whole
+	 * driver, but rather its scope/meaning is limited to the specific piece
+	 * of uAPI which has embedded the struct i915_user_extension.
+	 */
+	__u32 name;
+	/**
+	 * @flags: MBZ
+	 *
+	 * All undefined bits must be zero.
+	 */
+	__u32 pad;
+};
+
+/*
+ * i915 specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+ */
+#define DRM_XE_DEVICE_QUERY		0x00
+#define DRM_XE_GEM_CREATE		0x01
+#define DRM_XE_GEM_MMAP_OFFSET		0x02
+#define DRM_XE_VM_CREATE		0x03
+#define DRM_XE_VM_DESTROY		0x04
+#define DRM_XE_VM_BIND			0x05
+#define DRM_XE_ENGINE_CREATE		0x06
+#define DRM_XE_ENGINE_DESTROY		0x07
+#define DRM_XE_EXEC			0x08
+#define DRM_XE_MMIO			0x09
+#define DRM_XE_ENGINE_SET_PROPERTY	0x0a
+#define DRM_XE_WAIT_USER_FENCE		0x0b
+#define DRM_XE_VM_MADVISE		0x0c
+
+/* Must be kept compact -- no holes */
+#define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
+#define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
+#define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
+#define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
+#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_ENGINE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create)
+#define DRM_IOCTL_XE_ENGINE_DESTROY		DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy)
+#define DRM_IOCTL_XE_EXEC			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
+#define DRM_IOCTL_XE_MMIO			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio)
+#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY	DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property)
+#define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+#define DRM_IOCTL_XE_VM_MADVISE			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
+
+struct drm_xe_engine_class_instance {
+	__u16 engine_class;
+
+#define DRM_XE_ENGINE_CLASS_RENDER		0
+#define DRM_XE_ENGINE_CLASS_COPY		1
+#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
+#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
+#define DRM_XE_ENGINE_CLASS_COMPUTE		4
+	/*
+	 * Kernel only class (not actual hardware engine class). Used for
+	 * creating ordered queues of VM bind operations.
+	 */
+#define DRM_XE_ENGINE_CLASS_VM_BIND		5
+
+	__u16 engine_instance;
+	__u16 gt_id;
+};
+
+#define XE_MEM_REGION_CLASS_SYSMEM	0
+#define XE_MEM_REGION_CLASS_VRAM	1
+
+struct drm_xe_query_mem_usage {
+	__u32 num_regions;
+	__u32 pad;
+
+	struct drm_xe_query_mem_region {
+		__u16 mem_class;
+		__u16 instance;	/* unique ID even among different classes */
+		__u32 pad;
+		__u32 min_page_size;
+		__u32 max_page_size;
+		__u64 total_size;
+		__u64 used;
+		__u64 reserved[8];
+	} regions[];
+};
+
+struct drm_xe_query_config {
+	__u32 num_params;
+	__u32 pad;
+#define XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
+#define XE_QUERY_CONFIG_FLAGS			1
+	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
+	#define XE_QUERY_CONFIG_FLAGS_USE_GUC		(0x1 << 1)
+#define XE_QUERY_CONFIG_MIN_ALIGNEMENT		2
+#define XE_QUERY_CONFIG_VA_BITS			3
+#define XE_QUERY_CONFIG_GT_COUNT		4
+#define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
+#define XE_QUERY_CONFIG_NUM_PARAM		XE_QUERY_CONFIG_MEM_REGION_COUNT + 1
+	__u64 info[];
+};
+
+struct drm_xe_query_gts {
+	__u32 num_gt;
+	__u32 pad;
+
+	/*
+	 * TODO: Perhaps info about every mem region relative to this GT? e.g.
+	 * bandwidth between this GT and remote region?
+	 */
+
+	struct drm_xe_query_gt {
+#define XE_QUERY_GT_TYPE_MAIN		0
+#define XE_QUERY_GT_TYPE_REMOTE		1
+#define XE_QUERY_GT_TYPE_MEDIA		2
+		__u16 type;
+		__u16 instance;
+		__u32 clock_freq;
+		__u64 features;
+		__u64 native_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 slow_mem_regions;		/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 inaccessible_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
+		__u64 reserved[8];
+	} gts[];
+};
+
+struct drm_xe_query_topology_mask {
+	/** @gt_id: GT ID the mask is associated with */
+	__u16 gt_id;
+
+	/** @type: type of mask */
+	__u16 type;
+#define XE_TOPO_DSS_GEOMETRY	(1 << 0)
+#define XE_TOPO_DSS_COMPUTE	(1 << 1)
+#define XE_TOPO_EU_PER_DSS	(1 << 2)
+
+	/** @num_bytes: number of bytes in requested mask */
+	__u32 num_bytes;
+
+	/** @mask: little-endian mask of @num_bytes */
+	__u8 mask[];
+};
+
+struct drm_xe_device_query {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @query: The type of data to query */
+	__u32 query;
+
+#define DRM_XE_DEVICE_QUERY_ENGINES	0
+#define DRM_XE_DEVICE_QUERY_MEM_USAGE	1
+#define DRM_XE_DEVICE_QUERY_CONFIG	2
+#define DRM_XE_DEVICE_QUERY_GTS		3
+#define DRM_XE_DEVICE_QUERY_HWCONFIG	4
+#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY	5
+
+	/** @size: Size of the queried data */
+	__u32 size;
+
+	/** @data: Queried data is placed here */
+	__u64 data;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_gem_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/**
+	 * @size: Requested size for the object
+	 *
+	 * The (page-aligned) allocated size for the object will be returned.
+	 */
+	__u64 size;
+
+	/**
+	 * @flags: Flags, currently a mask of memory instances of where BO can
+	 * be placed
+	 */
+#define XE_GEM_CREATE_FLAG_DEFER_BACKING	(0x1 << 24)
+#define XE_GEM_CREATE_FLAG_SCANOUT		(0x1 << 25)
+	__u32 flags;
+
+	/**
+	 * @vm_id: Attached VM, if any
+	 *
+	 * If a VM is specified, this BO must:
+	 *
+	 *  1. Only ever be bound to that VM.
+	 *
+	 *  2. Cannot be exported as a PRIME fd.
+	 */
+	__u32 vm_id;
+
+	/**
+	 * @handle: Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_gem_mmap_offset {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @handle: Handle for the object being mapped. */
+	__u32 handle;
+
+	/** @flags: Must be zero */
+	__u32 flags;
+
+	/** @offset: The fake offset to use for subsequent mmap call */
+	__u64 offset;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture
+ */
+struct drm_xe_vm_bind_op_error_capture {
+	/** @error: errno that occured */
+	__s32 error;
+	/** @op: operation that encounter an error */
+	__u32 op;
+	/** @addr: address of bind op */
+	__u64 addr;
+	/** @size: size of bind */
+	__u64 size;
+};
+
+/** struct drm_xe_ext_vm_set_property - VM set property extension */
+struct drm_xe_ext_vm_set_property {
+	/** @base: base user extension */
+	struct xe_user_extension base;
+
+	/** @property: property to set */
+#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+#define XE_VM_EXTENSION_SET_PROPERTY	0
+	__u64 extensions;
+
+	/** @flags: Flags */
+	__u32 flags;
+
+#define DRM_XE_VM_CREATE_SCRATCH_PAGE	(0x1 << 0)
+#define DRM_XE_VM_CREATE_COMPUTE_MODE	(0x1 << 1)
+#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS	(0x1 << 2)
+#define DRM_XE_VM_CREATE_FAULT_MODE	(0x1 << 3)
+
+	/** @vm_id: Returned VM ID */
+	__u32 vm_id;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_destroy {
+	/** @vm_id: VM ID */
+	__u32 vm_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_bind_op {
+	/**
+	 * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
+	 */
+	__u32 obj;
+
+	union {
+		/**
+		 * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE,
+		 * ignored for unbind
+		 */
+		__u64 obj_offset;
+		/** @userptr: user pointer to bind on */
+		__u64 userptr;
+	};
+
+	/**
+	 * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
+	 */
+	__u64 range;
+
+	/** @addr: Address to operate on, MBZ for UNMAP_ALL */
+	__u64 addr;
+
+	/**
+	 * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs,
+	 * only applies to creating new VMAs
+	 */
+	__u64 gt_mask;
+
+	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
+	__u32 op;
+
+	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
+	__u32 region;
+
+#define XE_VM_BIND_OP_MAP		0x0
+#define XE_VM_BIND_OP_UNMAP		0x1
+#define XE_VM_BIND_OP_MAP_USERPTR	0x2
+#define XE_VM_BIND_OP_RESTART		0x3
+#define XE_VM_BIND_OP_UNMAP_ALL		0x4
+#define XE_VM_BIND_OP_PREFETCH		0x5
+
+#define XE_VM_BIND_FLAG_READONLY	(0x1 << 16)
+	/*
+	 * A bind ops completions are always async, hence the support for out
+	 * sync. This flag indicates the allocation of the memory for new page
+	 * tables and the job to program the pages tables is asynchronous
+	 * relative to the IOCTL. That part of a bind operation can fail under
+	 * memory pressure, the job in practice can't fail unless the system is
+	 * totally shot.
+	 *
+	 * If this flag is clear and the IOCTL doesn't return an error, in
+	 * practice the bind op is good and will complete.
+	 *
+	 * If this flag is set and doesn't return return an error, the bind op
+	 * can still fail and recovery is needed. If configured, the bind op that
+	 * caused the error will be captured in drm_xe_vm_bind_op_error_capture.
+	 * Once the user sees the error (via a ufence +
+	 * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory
+	 * via non-async unbinds, and then restart all queue'd async binds op via
+	 * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
+	 * VM.
+	 *
+	 * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is
+	 * configured in the VM and must be set if the VM is configured with
+	 * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state.
+	 */
+#define XE_VM_BIND_FLAG_ASYNC		(0x1 << 17)
+	/*
+	 * Valid on a faulting VM only, do the MAP operation immediately rather
+	 * than differing the MAP to the page fault handler.
+	 */
+#define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 18)
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_bind {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @vm_id: The ID of the VM to bind to */
+	__u32 vm_id;
+
+	/**
+	 * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
+	 * and engine must have same vm_id. If zero, the default VM bind engine
+	 * is used.
+	 */
+	__u32 engine_id;
+
+	/** @num_binds: number of binds in this IOCTL */
+	__u32 num_binds;
+
+	union {
+		/** @bind: used if num_binds == 1 */
+		struct drm_xe_vm_bind_op bind;
+		/**
+		 * @vector_of_binds: userptr to array of struct
+		 * drm_xe_vm_bind_op if num_binds > 1
+		 */
+		__u64 vector_of_binds;
+	};
+
+	/** @num_syncs: amount of syncs to wait on */
+	__u32 num_syncs;
+
+	/** @syncs: pointer to struct drm_xe_sync array */
+	__u64 syncs;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/** struct drm_xe_ext_engine_set_property - engine set property extension */
+struct drm_xe_ext_engine_set_property {
+	/** @base: base user extension */
+	struct xe_user_extension base;
+
+	/** @property: property to set */
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+};
+
+/**
+ * struct drm_xe_engine_set_property - engine set property
+ *
+ * Same namespace for extensions as drm_xe_engine_create
+ */
+struct drm_xe_engine_set_property {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @engine_id: Engine ID */
+	__u32 engine_id;
+
+	/** @property: property to set */
+#define XE_ENGINE_PROPERTY_PRIORITY			0
+#define XE_ENGINE_PROPERTY_TIMESLICE			1
+#define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT		2
+	/*
+	 * Long running or ULLS engine mode. DMA fences not allowed in this
+	 * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves
+	 * as a sanity check the UMD knows what it is doing. Can only be set at
+	 * engine create time.
+	 */
+#define XE_ENGINE_PROPERTY_COMPUTE_MODE			3
+#define XE_ENGINE_PROPERTY_PERSISTENCE			4
+#define XE_ENGINE_PROPERTY_JOB_TIMEOUT			5
+#define XE_ENGINE_PROPERTY_ACC_TRIGGER			6
+#define XE_ENGINE_PROPERTY_ACC_NOTIFY			7
+#define XE_ENGINE_PROPERTY_ACC_GRANULARITY		8
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_engine_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+#define XE_ENGINE_EXTENSION_SET_PROPERTY               0
+	__u64 extensions;
+
+	/** @width: submission width (number BB per exec) for this engine */
+	__u16 width;
+
+	/** @num_placements: number of valid placements for this engine */
+	__u16 num_placements;
+
+	/** @vm_id: VM to use for this engine */
+	__u32 vm_id;
+
+	/** @flags: MBZ */
+	__u32 flags;
+
+	/** @engine_id: Returned engine ID */
+	__u32 engine_id;
+
+	/**
+	 * @instances: user pointer to a 2-d array of struct
+	 * drm_xe_engine_class_instance
+	 *
+	 * length = width (i) * num_placements (j)
+	 * index = j + i * width
+	 */
+	__u64 instances;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_engine_destroy {
+	/** @vm_id: VM ID */
+	__u32 engine_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_sync {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	__u32 flags;
+
+#define DRM_XE_SYNC_SYNCOBJ		0x0
+#define DRM_XE_SYNC_TIMELINE_SYNCOBJ	0x1
+#define DRM_XE_SYNC_DMA_BUF		0x2
+#define DRM_XE_SYNC_USER_FENCE		0x3
+#define DRM_XE_SYNC_SIGNAL		0x10
+
+	union {
+		__u32 handle;
+		/**
+		 * @addr: Address of user fence. When sync passed in via exec
+		 * IOCTL this a GPU address in the VM. When sync passed in via
+		 * VM bind IOCTL this is a user pointer. In either case, it is
+		 * the users responsibility that this address is present and
+		 * mapped when the user fence is signalled. Must be qword
+		 * aligned.
+		 */
+		__u64 addr;
+	};
+
+	__u64 timeline_value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_exec {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @engine_id: Engine ID for the batch buffer */
+	__u32 engine_id;
+
+	/** @num_syncs: Amount of struct drm_xe_sync in array. */
+	__u32 num_syncs;
+
+	/** @syncs: Pointer to struct drm_xe_sync array. */
+	__u64 syncs;
+
+	/**
+	  * @address: address of batch buffer if num_batch_buffer == 1 or an
+	  * array of batch buffer addresses
+	  */
+	__u64 address;
+
+	/**
+	 * @num_batch_buffer: number of batch buffer in this exec, must match
+	 * the width of the engine
+	 */
+	__u16 num_batch_buffer;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_mmio {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	__u32 addr;
+
+	__u32 flags;
+
+#define DRM_XE_MMIO_8BIT	0x0
+#define DRM_XE_MMIO_16BIT	0x1
+#define DRM_XE_MMIO_32BIT	0x2
+#define DRM_XE_MMIO_64BIT	0x3
+#define DRM_XE_MMIO_BITS_MASK	0x3
+#define DRM_XE_MMIO_READ	0x4
+#define DRM_XE_MMIO_WRITE	0x8
+
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_wait_user_fence - wait user fence
+ *
+ * Wait on user fence, XE will wakeup on every HW engine interrupt in the
+ * instances list and check if user fence is complete:
+ * (*addr & MASK) OP (VALUE & MASK)
+ *
+ * Returns to user on user fence completion or timeout.
+ */
+struct drm_xe_wait_user_fence {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+	union {
+		/**
+		 * @addr: user pointer address to wait on, must qword aligned
+		 */
+		__u64 addr;
+		/**
+		 * @vm_id: The ID of the VM which encounter an error used with
+		 * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear.
+		 */
+		__u64 vm_id;
+	};
+	/** @op: wait operation (type of comparison) */
+#define DRM_XE_UFENCE_WAIT_EQ	0
+#define DRM_XE_UFENCE_WAIT_NEQ	1
+#define DRM_XE_UFENCE_WAIT_GT	2
+#define DRM_XE_UFENCE_WAIT_GTE	3
+#define DRM_XE_UFENCE_WAIT_LT	4
+#define DRM_XE_UFENCE_WAIT_LTE	5
+	__u16 op;
+	/** @flags: wait flags */
+#define DRM_XE_UFENCE_WAIT_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
+#define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
+#define DRM_XE_UFENCE_WAIT_VM_ERROR	(1 << 2)
+	__u16 flags;
+	/** @value: compare value */
+	__u64 value;
+	/** @mask: comparison mask */
+#define DRM_XE_UFENCE_WAIT_U8		0xffu
+#define DRM_XE_UFENCE_WAIT_U16		0xffffu
+#define DRM_XE_UFENCE_WAIT_U32		0xffffffffu
+#define DRM_XE_UFENCE_WAIT_U64		0xffffffffffffffffu
+	__u64 mask;
+	/** @timeout: how long to wait before bailing, value in jiffies */
+	__s64 timeout;
+	/**
+	 * @num_engines: number of engine instances to wait on, must be zero
+	 * when DRM_XE_UFENCE_WAIT_SOFT_OP set
+	 */
+	__u64 num_engines;
+	/**
+	 * @instances: user pointer to array of drm_xe_engine_class_instance to
+	 * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set
+	 */
+	__u64 instances;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+struct drm_xe_vm_madvise {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @vm_id: The ID VM in which the VMA exists */
+	__u32 vm_id;
+
+	/** @range: Number of bytes in the VMA */
+	__u64 range;
+
+	/** @addr: Address of the VMA to operation on */
+	__u64 addr;
+
+	/*
+	 * Setting the preferred location will trigger a migrate of the VMA
+	 * backing store to new location if the backing store is already
+	 * allocated.
+	 */
+#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS	0
+#define DRM_XE_VM_MADVISE_PREFERRED_GT		1
+	/*
+	 * In this case lower 32 bits are mem class, upper 32 are GT.
+	 * Combination provides a single IOCTL plus migrate VMA to preferred
+	 * location.
+	 */
+#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT	2
+	/*
+	 * The CPU will do atomic memory operations to this VMA. Must be set on
+	 * some devices for atomics to behave correctly.
+	 */
+#define DRM_XE_VM_MADVISE_CPU_ATOMIC		3
+	/*
+	 * The device will do atomic memory operations to this VMA. Must be set
+	 * on some devices for atomics to behave correctly.
+	 */
+#define DRM_XE_VM_MADVISE_DEVICE_ATOMIC		4
+	/*
+	 * Priority WRT to eviction (moving from preferred memory location due
+	 * to memory pressure). The lower the priority, the more likely to be
+	 * evicted.
+	 */
+#define DRM_XE_VM_MADVISE_PRIORITY		5
+#define		DRM_XE_VMA_PRIORITY_LOW		0
+#define		DRM_XE_VMA_PRIORITY_NORMAL	1	/* Default */
+#define		DRM_XE_VMA_PRIORITY_HIGH	2	/* Must be elevated user */
+	/* Pin the VMA in memory, must be elevated user */
+#define DRM_XE_VM_MADVISE_PIN			6
+
+	/** @property: property to set */
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_XE_DRM_H_ */
-- 
GitLab


From 0f06dc101972d598d1c6bb356436c3dbf1e4b646 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:03 -0500
Subject: [PATCH 1004/2340] drm/xe: Implement a local xe_mmio_wait32

Then, move the i915_utils.h include to its user.

The overall goal is to kill all the usages of the i915_utils
stuff.

Yes, wait_for also depends on <linux/delay.h>, so they go
together to where it is needed. It will be likely needed
anyway directly for udelay or usleep_range.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c |  6 ++++++
 drivers/gpu/drm/xe/xe_gt_mcr.c     |  7 +++++++
 drivers/gpu/drm/xe/xe_guc.c        |  7 +++++++
 drivers/gpu/drm/xe/xe_guc_pc.c     |  7 +++++++
 drivers/gpu/drm/xe/xe_mmio.h       | 29 ++++++++++++++++++++---------
 drivers/gpu/drm/xe/xe_pcode.c      |  7 +++++++
 6 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 0320ce7ba3d19..31a33ee9ccb6c 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -10,6 +10,12 @@
 #include "xe_mmio.h"
 #include "gt/intel_gt_regs.h"
 
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use '__mask_next_bit'.
+ */
+#include "i915_utils.h"
+
 #define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
 
 static struct xe_gt *
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index b69c0d6c6b2f9..8add5ec9a3077 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -11,6 +11,13 @@
 
 #include "gt/intel_gt_regs.h"
 
+#include <linux/delay.h>
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporar
+ * including so we can use 'wait_for'.
+ */
+#include "i915_utils.h"
+
 /**
  * DOC: GT Multicast/Replicated (MCR) Register Support
  *
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 3c285d849ef65..969a2427b1f26 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -22,6 +22,13 @@
 #include "i915_reg_defs.h"
 #include "gt/intel_gt_regs.h"
 
+#include <linux/delay.h>
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use 'wait_for' and range_overflow_t.
+ */
+#include "i915_utils.h"
+
 /* TODO: move to common file */
 #define GUC_PVC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
 #define PVC_MOCS_UC_INDEX		1
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 227e30a482e3b..260ccf3fe215a 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -16,6 +16,13 @@
 #include "i915_reg_defs.h"
 #include "i915_reg.h"
 
+#include <linux/delay.h>
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use 'wait_for'.
+ */
+#include "i915_utils.h"
+
 #include "intel_mchbar_regs.h"
 
 /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 09d24467096fb..7352b622ca87a 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -10,13 +10,6 @@
 
 #include "xe_gt_types.h"
 
-/*
- * FIXME: This header has been deemed evil and we need to kill it. Temporarily
- * including so we can use 'wait_for' and unblock initial development. A follow
- * should replace 'wait_for' with a sane version and drop including this header.
- */
-#include "i915_utils.h"
-
 struct drm_device;
 struct drm_file;
 struct xe_device;
@@ -93,8 +86,26 @@ static inline int xe_mmio_wait32(struct xe_gt *gt,
 				 u32 reg, u32 val,
 				 u32 mask, u32 timeout_ms)
 {
-	return wait_for((xe_mmio_read32(gt, reg) & mask) == val,
-			timeout_ms);
+	ktime_t cur = ktime_get_raw();
+	const ktime_t end = ktime_add_ms(cur, timeout_ms);
+	s64 wait = 10;
+
+	for (;;) {
+		if ((xe_mmio_read32(gt, reg) & mask) == val)
+			return 0;
+
+		cur = ktime_get_raw();
+		if (!ktime_before(cur, end))
+			return -ETIMEDOUT;
+
+		if (ktime_after(ktime_add_us(cur, wait), end))
+			wait = ktime_us_delta(end, cur);
+
+		usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	return -ETIMEDOUT;
 }
 
 int xe_mmio_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 236159c8a6c0e..313ccd70d1a98 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -11,6 +11,13 @@
 
 #include <linux/errno.h>
 
+#include <linux/delay.h>
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use 'wait_for'.
+ */
+#include "i915_utils.h"
+
 /**
  * DOC: PCODE
  *
-- 
GitLab


From 86011ae21c15a779dcf25b97d5670371dc14e4c3 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:04 -0500
Subject: [PATCH 1005/2340] drm/xe: Stop using i915's range_overflows_t macro.

Let's do it directly.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 969a2427b1f26..9234da06d205c 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -25,7 +25,7 @@
 #include <linux/delay.h>
 /*
  * FIXME: This header has been deemed evil and we need to kill it. Temporarily
- * including so we can use 'wait_for' and range_overflow_t.
+ * including so we can use 'wait_for'.
  */
 #include "i915_utils.h"
 
@@ -55,7 +55,8 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
 	u32 addr = xe_bo_ggtt_addr(bo);
 
 	XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
-	XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP));
+	XE_BUG_ON(addr >= GUC_GGTT_TOP);
+	XE_BUG_ON(bo->size > GUC_GGTT_TOP - addr);
 
 	return addr;
 }
-- 
GitLab


From 7aaec3a623adda324f2435153a105088a8556b9a Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 31 Mar 2023 14:21:34 -0400
Subject: [PATCH 1006/2340] drm/xe: Let's return last value read on
 xe_mmio_wait32.

This is already useful because it avoids some extra reads
where registers might have changed after the timeout decision.

But also, it will be important to end the kill of i915's wait_for.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c |  4 ++--
 drivers/gpu/drm/xe/xe_gt.c         |  2 +-
 drivers/gpu/drm/xe/xe_guc.c        | 13 ++++++-------
 drivers/gpu/drm/xe/xe_huc.c        |  2 +-
 drivers/gpu/drm/xe/xe_mmio.h       | 20 +++++++++++++++-----
 drivers/gpu/drm/xe/xe_uc_fw.c      |  7 +++----
 6 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 31a33ee9ccb6c..a203eabba4e29 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -129,7 +129,7 @@ static int domain_wake_wait(struct xe_gt *gt,
 			    struct xe_force_wake_domain *domain)
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
-			      XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL);
 }
 
 static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
@@ -141,7 +141,7 @@ static int domain_sleep_wait(struct xe_gt *gt,
 			     struct xe_force_wake_domain *domain)
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
-			      XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL);
 }
 
 #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 5f8fa9d98d5af..6a84d2a1c7f34 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -599,7 +599,7 @@ int do_gt_reset(struct xe_gt *gt)
 	int err;
 
 	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
-	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5);
+	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5, NULL);
 	if (err)
 		drm_err(&xe->drm,
 			"GT reset failed to clear GEN11_GRDOM_FULL\n");
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 9234da06d205c..52f42bd5cad7a 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -324,17 +324,17 @@ int xe_guc_reset(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
-	u32 guc_status;
+	u32 guc_status, gdrst;
 	int ret;
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
 
-	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5);
+	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5, &gdrst);
 	if (ret) {
 		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
-			xe_mmio_read32(gt, GEN6_GDRST.reg));
+			gdrst);
 		goto err_out;
 	}
 
@@ -654,7 +654,7 @@ int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
-	u32 header;
+	u32 header, reply;
 	u32 reply_reg = xe_gt_is_media_type(gt) ?
 		MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg;
 	int ret;
@@ -691,12 +691,11 @@ retry:
 	ret = xe_mmio_wait32(gt, reply_reg,
 			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
 					GUC_HXG_ORIGIN_GUC),
-			     GUC_HXG_MSG_0_ORIGIN,
-			     50);
+			     GUC_HXG_MSG_0_ORIGIN, 50, &reply);
 	if (ret) {
 timeout:
 		drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
-			request[0], xe_mmio_read32(gt, reply_reg));
+			request[0], reply);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 93b22fac6e149..c8c93bdf4760f 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -85,7 +85,7 @@ int xe_huc_auth(struct xe_huc *huc)
 
 	ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
 			     HUC_LOAD_SUCCESSFUL,
-			     HUC_LOAD_SUCCESSFUL, 100);
+			     HUC_LOAD_SUCCESSFUL, 100, NULL);
 	if (ret) {
 		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
 		goto fail;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 7352b622ca87a..ccd97a4a89c16 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -82,21 +82,28 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 	return (reg_val & mask) != eval ? -EINVAL : 0;
 }
 
-static inline int xe_mmio_wait32(struct xe_gt *gt,
-				 u32 reg, u32 val,
-				 u32 mask, u32 timeout_ms)
+static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val,
+				 u32 mask, u32 timeout_ms, u32 *out_val)
 {
 	ktime_t cur = ktime_get_raw();
 	const ktime_t end = ktime_add_ms(cur, timeout_ms);
+	int ret = -ETIMEDOUT;
 	s64 wait = 10;
+	u32 read;
 
 	for (;;) {
 		if ((xe_mmio_read32(gt, reg) & mask) == val)
 			return 0;
 
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val) {
+			ret = 0;
+			break;
+		}
+
 		cur = ktime_get_raw();
 		if (!ktime_before(cur, end))
-			return -ETIMEDOUT;
+			break;
 
 		if (ktime_after(ktime_add_us(cur, wait), end))
 			wait = ktime_us_delta(end, cur);
@@ -105,7 +112,10 @@ static inline int xe_mmio_wait32(struct xe_gt *gt,
 		wait <<= 1;
 	}
 
-	return -ETIMEDOUT;
+	if (out_val)
+		*out_val = read;
+
+	return ret;
 }
 
 int xe_mmio_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 86c47b7f09017..edd6a5d2db34e 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -326,7 +326,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-	u32 src_offset;
+	u32 src_offset, dma_ctrl;
 	int ret;
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
@@ -352,11 +352,10 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
 
 	/* Wait for DMA to finish */
-	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100);
+	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100, &dma_ctrl);
 	if (ret)
 		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
-			xe_uc_fw_type_repr(uc_fw->type),
-			xe_mmio_read32(gt, DMA_CTRL.reg));
+			xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
 
 	/* Disable the bits once DMA is over */
 	xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags));
-- 
GitLab


From 2e5be5d57dbe5e04a5abbd01417fc098f8925a35 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:06 -0500
Subject: [PATCH 1007/2340] drm/xe: Convert guc_ready to regular xe_mmio_wait32

Possible now that the wait function returns the last read value.

So we can remove the users of i915's wait_for one by one...

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 52f42bd5cad7a..7e8451e60d2d3 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -403,24 +403,6 @@ static int guc_xfer_rsa(struct xe_guc *guc)
 	return 0;
 }
 
-/*
- * Read the GuC status register (GUC_STATUS) and store it in the
- * specified location; then return a boolean indicating whether
- * the value matches either of two values representing completion
- * of the GuC boot process.
- *
- * This is used for polling the GuC status in a wait_for()
- * loop below.
- */
-static bool guc_ready(struct xe_guc *guc, u32 *status)
-{
-	u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg);
-	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
-
-	*status = val;
-	return uk_val == XE_GUC_LOAD_STATUS_READY;
-}
-
 static int guc_wait_ucode(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
@@ -444,7 +426,11 @@ static int guc_wait_ucode(struct xe_guc *guc)
 	 * 200ms. Even at slowest clock, this should be sufficient. And
 	 * in the working case, a larger timeout makes no difference.
 	 */
-	ret = wait_for(guc_ready(guc, &status), 200);
+	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg,
+			     FIELD_PREP(GS_UKERNEL_MASK,
+					XE_GUC_LOAD_STATUS_READY),
+			     GS_UKERNEL_MASK, 200, &status);
+
 	if (ret) {
 		struct drm_device *drm = &xe->drm;
 		struct drm_printer p = drm_info_printer(drm->dev);
-- 
GitLab


From eb04985d7211a5fc651f8cca588b2d78d3a36cee Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:07 -0500
Subject: [PATCH 1008/2340] drm/xe: Wait for success on guc done.

Rather than a constant check on proto and wait not busy,
let's wait for the expected success and then check the
protocol afterwards.

With this, we can now use the regular xe_mmio_wait32
and kill this local need for the wait_for.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 7e8451e60d2d3..6ecf493c26b5e 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -22,13 +22,6 @@
 #include "i915_reg_defs.h"
 #include "gt/intel_gt_regs.h"
 
-#include <linux/delay.h>
-/*
- * FIXME: This header has been deemed evil and we need to kill it. Temporarily
- * including so we can use 'wait_for'.
- */
-#include "i915_utils.h"
-
 /* TODO: move to common file */
 #define GUC_PVC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
 #define PVC_MOCS_UC_INDEX		1
@@ -688,19 +681,17 @@ timeout:
 	header = xe_mmio_read32(gt, reply_reg);
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
 	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
-#define done ({ header = xe_mmio_read32(gt, reply_reg); \
-		FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \
-		GUC_HXG_ORIGIN_GUC || \
-		FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \
-		GUC_HXG_TYPE_NO_RESPONSE_BUSY; })
 
-		ret = wait_for(done, 1000);
-		if (unlikely(ret))
-			goto timeout;
+		ret = xe_mmio_wait32(gt, reply_reg,
+				     FIELD_PREP(GUC_HXG_MSG_0_TYPE,
+						GUC_HXG_TYPE_RESPONSE_SUCCESS),
+				     GUC_HXG_MSG_0_TYPE, 1000, &header);
+
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
-				       GUC_HXG_ORIGIN_GUC))
+			     GUC_HXG_ORIGIN_GUC))
 			goto proto;
-#undef done
+		if (unlikely(ret))
+			goto timeout;
 	}
 
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
-- 
GitLab


From b6f468b847d09ca1fe5cea2606a323be892f8893 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:08 -0500
Subject: [PATCH 1009/2340] drm/xe: Remove i915_utils dependency from
 xe_guc_pc.

To make it simpler, all of the status checks also waits and
times out.

Also, no ktime precision is needed in this case, and we
can use usleep_range because we are not in atomic paths here.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 260ccf3fe215a..d751ee98de115 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -17,11 +17,6 @@
 #include "i915_reg.h"
 
 #include <linux/delay.h>
-/*
- * FIXME: This header has been deemed evil and we need to kill it. Temporarily
- * including so we can use 'wait_for'.
- */
-#include "i915_utils.h"
 
 #include "intel_mchbar_regs.h"
 
@@ -135,10 +130,26 @@ pc_to_maps(struct xe_guc_pc *pc)
 	(FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
 	 FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
 
-static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state)
+static int wait_for_pc_state(struct xe_guc_pc *pc,
+			     enum slpc_global_state state)
 {
+	int timeout_us = 5000; /* rought 5ms, but no need for precision */
+	int slept, wait = 10;
+
 	xe_device_assert_mem_access(pc_to_xe(pc));
-	return slpc_shared_data_read(pc, header.global_state) == state;
+
+	for (slept = 0; slept < timeout_us;) {
+		if (slpc_shared_data_read(pc, header.global_state) == state)
+			return 0;
+
+		usleep_range(wait, wait << 1);
+		slept += wait;
+		wait <<= 1;
+		if (slept + wait > timeout_us)
+			wait = timeout_us - slept;
+	}
+
+	return -ETIMEDOUT;
 }
 
 static int pc_action_reset(struct xe_guc_pc *pc)
@@ -189,7 +200,7 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc)
 		0,
 	};
 
-	if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
 		return -EAGAIN;
 
 	/* Blocking here to ensure the results are ready before reading them */
@@ -212,7 +223,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
 		value,
 	};
 
-	if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
 		return -EAGAIN;
 
 	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@@ -747,7 +758,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 	if (ret)
 		goto out;
 
-	if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) {
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
 		drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n");
 		ret = -EIO;
 		goto out;
@@ -793,7 +804,7 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
 	if (ret)
 		goto out;
 
-	if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) {
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) {
 		drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
 		ret = -EIO;
 	}
-- 
GitLab


From b56d208273bf5be6593d0dcd2d471f771c08a805 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:09 -0500
Subject: [PATCH 1010/2340] drm/xe: Stop using i915_utils in xe_wopcm.

We don't need any macro for a simple check we can do explicitly
and clear.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_wopcm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index e4a8d4a1899e8..8fe182afa06c5 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -11,8 +11,6 @@
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
-#include "i915_utils.h"
-
 /**
  * DOC: Write Once Protected Content Memory (WOPCM) Layout
  *
@@ -92,7 +90,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
 	u32 size;
 
 	size = wopcm_size - ctx_rsvd;
-	if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) {
+	if (unlikely(guc_wopcm_base >= size ||
+		     guc_wopcm_size > size - guc_wopcm_base)) {
 		drm_err(&xe->drm,
 			"WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
 			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
-- 
GitLab


From eeb8019d8c6fba1eae6ef8a238b42ff9b39dbaa4 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:10 -0500
Subject: [PATCH 1011/2340] drm/xe: Let's avoid i915_utils in the
 xe_force_wake.

We can run the bit operation locally without yet another macro.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index a203eabba4e29..b87bf3b4cd52d 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -10,11 +10,6 @@
 #include "xe_mmio.h"
 #include "gt/intel_gt_regs.h"
 
-/*
- * FIXME: This header has been deemed evil and we need to kill it. Temporarily
- * including so we can use '__mask_next_bit'.
- */
-#include "i915_utils.h"
 
 #define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
 
@@ -145,9 +140,9 @@ static int domain_sleep_wait(struct xe_gt *gt,
 }
 
 #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
-	for (tmp__ = (mask__); tmp__ ;) \
+	for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \
 		for_each_if((domain__ = ((fw__)->domains + \
-					 __mask_next_bit(tmp__))) && \
+					 (ffs(tmp__) - 1))) && \
 					 domain__->reg_ctl)
 
 int xe_force_wake_get(struct xe_force_wake *fw,
-- 
GitLab


From 81593af6c88d3482997e43f0a85ccd93cc4928df Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:11 -0500
Subject: [PATCH 1012/2340] drm/xe: Convert xe_mmio_wait32 to us so we can stop
 using wait_for_us.

Another clean-up towards killing the usage of i915_utils.h

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c | 6 ++++--
 drivers/gpu/drm/xe/xe_gt.c         | 3 ++-
 drivers/gpu/drm/xe/xe_gt_mcr.c     | 9 +--------
 drivers/gpu/drm/xe/xe_guc.c        | 9 +++++----
 drivers/gpu/drm/xe/xe_huc.c        | 2 +-
 drivers/gpu/drm/xe/xe_mmio.h       | 4 ++--
 drivers/gpu/drm/xe/xe_uc_fw.c      | 2 +-
 7 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index b87bf3b4cd52d..1f7b68f61ec5e 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -124,7 +124,8 @@ static int domain_wake_wait(struct xe_gt *gt,
 			    struct xe_force_wake_domain *domain)
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
-			      XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL);
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
+			      NULL);
 }
 
 static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
@@ -136,7 +137,8 @@ static int domain_sleep_wait(struct xe_gt *gt,
 			     struct xe_force_wake_domain *domain)
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
-			      XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL);
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
+			      NULL);
 }
 
 #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 6a84d2a1c7f34..bdc64219ed4c8 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -599,7 +599,8 @@ int do_gt_reset(struct xe_gt *gt)
 	int err;
 
 	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
-	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5, NULL);
+	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5000,
+			     NULL);
 	if (err)
 		drm_err(&xe->drm,
 			"GT reset failed to clear GEN11_GRDOM_FULL\n");
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 8add5ec9a3077..f4bfff98d5f4c 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -11,13 +11,6 @@
 
 #include "gt/intel_gt_regs.h"
 
-#include <linux/delay.h>
-/*
- * FIXME: This header has been deemed evil and we need to kill it. Temporar
- * including so we can use 'wait_for'.
- */
-#include "i915_utils.h"
-
 /**
  * DOC: GT Multicast/Replicated (MCR) Register Support
  *
@@ -383,7 +376,7 @@ static void mcr_lock(struct xe_gt *gt)
 	 * shares the same steering control register.
 	 */
 	if (GRAPHICS_VERx100(xe) >= 1270)
-		ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10);
+		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL);
 
 	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 6ecf493c26b5e..2deb1f6544ea6 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -324,7 +324,8 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
 
-	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5, &gdrst);
+	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5000,
+			     &gdrst);
 	if (ret) {
 		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
 			gdrst);
@@ -422,7 +423,7 @@ static int guc_wait_ucode(struct xe_guc *guc)
 	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg,
 			     FIELD_PREP(GS_UKERNEL_MASK,
 					XE_GUC_LOAD_STATUS_READY),
-			     GS_UKERNEL_MASK, 200, &status);
+			     GS_UKERNEL_MASK, 200000, &status);
 
 	if (ret) {
 		struct drm_device *drm = &xe->drm;
@@ -670,7 +671,7 @@ retry:
 	ret = xe_mmio_wait32(gt, reply_reg,
 			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
 					GUC_HXG_ORIGIN_GUC),
-			     GUC_HXG_MSG_0_ORIGIN, 50, &reply);
+			     GUC_HXG_MSG_0_ORIGIN, 50000, &reply);
 	if (ret) {
 timeout:
 		drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
@@ -685,7 +686,7 @@ timeout:
 		ret = xe_mmio_wait32(gt, reply_reg,
 				     FIELD_PREP(GUC_HXG_MSG_0_TYPE,
 						GUC_HXG_TYPE_RESPONSE_SUCCESS),
-				     GUC_HXG_MSG_0_TYPE, 1000, &header);
+				     GUC_HXG_MSG_0_TYPE, 1000000, &header);
 
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
 			     GUC_HXG_ORIGIN_GUC))
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index c8c93bdf4760f..9cb15bb40a385 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -85,7 +85,7 @@ int xe_huc_auth(struct xe_huc *huc)
 
 	ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
 			     HUC_LOAD_SUCCESSFUL,
-			     HUC_LOAD_SUCCESSFUL, 100, NULL);
+			     HUC_LOAD_SUCCESSFUL, 100000, NULL);
 	if (ret) {
 		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
 		goto fail;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index ccd97a4a89c16..f72edfb39cc01 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -83,10 +83,10 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 }
 
 static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val,
-				 u32 mask, u32 timeout_ms, u32 *out_val)
+				 u32 mask, u32 timeout_us, u32 *out_val)
 {
 	ktime_t cur = ktime_get_raw();
-	const ktime_t end = ktime_add_ms(cur, timeout_ms);
+	const ktime_t end = ktime_add_us(cur, timeout_us);
 	int ret = -ETIMEDOUT;
 	s64 wait = 10;
 	u32 read;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index edd6a5d2db34e..bbb931bc19ce2 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -352,7 +352,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
 
 	/* Wait for DMA to finish */
-	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100, &dma_ctrl);
+	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl);
 	if (ret)
 		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
 			xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
-- 
GitLab


From 7dc9b92dcfeff727776bca5ab11b3e0f3445ece2 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 12 Jan 2023 17:25:12 -0500
Subject: [PATCH 1013/2340] drm/xe: Remove i915_utils dependency from xe_pcode.

Expand xe_mmio_wait32 to accept atomic and then use
that directly when possible, and create own routine to
wait for the pcode status.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c |  4 +-
 drivers/gpu/drm/xe/xe_gt.c         |  2 +-
 drivers/gpu/drm/xe/xe_gt_mcr.c     |  3 +-
 drivers/gpu/drm/xe/xe_guc.c        |  9 +--
 drivers/gpu/drm/xe/xe_huc.c        |  2 +-
 drivers/gpu/drm/xe/xe_mmio.h       |  9 ++-
 drivers/gpu/drm/xe/xe_pcode.c      | 94 ++++++++++++------------------
 drivers/gpu/drm/xe/xe_uc_fw.c      |  3 +-
 8 files changed, 57 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 1f7b68f61ec5e..d2080e6fbe10c 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -125,7 +125,7 @@ static int domain_wake_wait(struct xe_gt *gt,
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
 			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
-			      NULL);
+			      NULL, false);
 }
 
 static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
@@ -138,7 +138,7 @@ static int domain_sleep_wait(struct xe_gt *gt,
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
 			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
-			      NULL);
+			      NULL, false);
 }
 
 #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index bdc64219ed4c8..fd8232a4556e5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -600,7 +600,7 @@ int do_gt_reset(struct xe_gt *gt)
 
 	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
 	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5000,
-			     NULL);
+			     NULL, false);
 	if (err)
 		drm_err(&xe->drm,
 			"GT reset failed to clear GEN11_GRDOM_FULL\n");
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index f4bfff98d5f4c..ddce2c41c7f51 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -376,7 +376,8 @@ static void mcr_lock(struct xe_gt *gt)
 	 * shares the same steering control register.
 	 */
 	if (GRAPHICS_VERx100(xe) >= 1270)
-		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL);
+		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL,
+				     false);
 
 	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 2deb1f6544ea6..4a7e8f9a14d5b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -325,7 +325,7 @@ int xe_guc_reset(struct xe_guc *guc)
 	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
 
 	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5000,
-			     &gdrst);
+			     &gdrst, false);
 	if (ret) {
 		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
 			gdrst);
@@ -423,7 +423,7 @@ static int guc_wait_ucode(struct xe_guc *guc)
 	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg,
 			     FIELD_PREP(GS_UKERNEL_MASK,
 					XE_GUC_LOAD_STATUS_READY),
-			     GS_UKERNEL_MASK, 200000, &status);
+			     GS_UKERNEL_MASK, 200000, &status, false);
 
 	if (ret) {
 		struct drm_device *drm = &xe->drm;
@@ -671,7 +671,7 @@ retry:
 	ret = xe_mmio_wait32(gt, reply_reg,
 			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
 					GUC_HXG_ORIGIN_GUC),
-			     GUC_HXG_MSG_0_ORIGIN, 50000, &reply);
+			     GUC_HXG_MSG_0_ORIGIN, 50000, &reply, false);
 	if (ret) {
 timeout:
 		drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
@@ -686,7 +686,8 @@ timeout:
 		ret = xe_mmio_wait32(gt, reply_reg,
 				     FIELD_PREP(GUC_HXG_MSG_0_TYPE,
 						GUC_HXG_TYPE_RESPONSE_SUCCESS),
-				     GUC_HXG_MSG_0_TYPE, 1000000, &header);
+				     GUC_HXG_MSG_0_TYPE, 1000000, &header,
+				     false);
 
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
 			     GUC_HXG_ORIGIN_GUC))
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 9cb15bb40a385..82e7fb3a62922 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -85,7 +85,7 @@ int xe_huc_auth(struct xe_huc *huc)
 
 	ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
 			     HUC_LOAD_SUCCESSFUL,
-			     HUC_LOAD_SUCCESSFUL, 100000, NULL);
+			     HUC_LOAD_SUCCESSFUL, 100000, NULL, false);
 	if (ret) {
 		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
 		goto fail;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index f72edfb39cc01..adc7d7484afb9 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -82,8 +82,8 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 	return (reg_val & mask) != eval ? -EINVAL : 0;
 }
 
-static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val,
-				 u32 mask, u32 timeout_us, u32 *out_val)
+static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask,
+				 u32 timeout_us, u32 *out_val, bool atomic)
 {
 	ktime_t cur = ktime_get_raw();
 	const ktime_t end = ktime_add_us(cur, timeout_us);
@@ -108,7 +108,10 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val,
 		if (ktime_after(ktime_add_us(cur, wait), end))
 			wait = ktime_us_delta(end, cur);
 
-		usleep_range(wait, wait << 1);
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
 		wait <<= 1;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 313ccd70d1a98..39712e8437283 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -12,11 +12,6 @@
 #include <linux/errno.h>
 
 #include <linux/delay.h>
-/*
- * FIXME: This header has been deemed evil and we need to kill it. Temporarily
- * including so we can use 'wait_for'.
- */
-#include "i915_utils.h"
 
 /**
  * DOC: PCODE
@@ -59,28 +54,24 @@ static int pcode_mailbox_status(struct xe_gt *gt)
 	return 0;
 }
 
-static bool pcode_mailbox_done(struct xe_gt *gt)
-{
-	lockdep_assert_held(&gt->pcode.lock);
-	return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0;
-}
-
 static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
-			    unsigned int timeout, bool return_data, bool atomic)
+			    unsigned int timeout_ms, bool return_data,
+			    bool atomic)
 {
+	int err;
 	lockdep_assert_held(&gt->pcode.lock);
 
-	if (!pcode_mailbox_done(gt))
+	if ((xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) != 0)
 		return -EAGAIN;
 
 	xe_mmio_write32(gt, PCODE_DATA0.reg, *data0);
 	xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0);
 	xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox);
 
-	if (atomic)
-		_wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1);
-	else
-		wait_for(pcode_mailbox_done(gt), timeout);
+	err = xe_mmio_wait32(gt, PCODE_MAILBOX.reg, 0, PCODE_READY,
+			     timeout_ms * 1000, NULL, atomic);
+	if (err)
+		return err;
 
 	if (return_data) {
 		*data0 = xe_mmio_read32(gt, PCODE_DATA0.reg);
@@ -113,13 +104,26 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
 	return err;
 }
 
-static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
-				  u32 request, u32 reply_mask, u32 reply,
-				  u32 *status, bool atomic)
+static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+				u32 request, u32 reply_mask, u32 reply,
+				u32 *status, bool atomic, int timeout_us)
 {
-	*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic);
+	int slept, wait = 10;
+
+	for (slept = 0; slept < timeout_us; slept += wait) {
+		*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+					   atomic);
+		if ((*status == 0) && ((request & reply_mask) == reply))
+			return 0;
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
 
-	return (*status == 0) && ((request & reply_mask) == reply);
+	return -ETIMEDOUT;
 }
 
 /**
@@ -146,25 +150,12 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 {
 	u32 status;
 	int ret;
-	bool atomic = false;
 
 	mutex_lock(&gt->pcode.lock);
 
-#define COND \
-	xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic)
-
-	/*
-	 * Prime the PCODE by doing a request first. Normally it guarantees
-	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
-	 * _wait_for() doesn't guarantee when its passed condition is evaluated
-	 * first, so send the first request explicitly.
-	 */
-	if (COND) {
-		ret = 0;
-		goto out;
-	}
-	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
-	if (!ret)
+	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				   false, timeout_base_ms * 1000);
+	if (ret)
 		goto out;
 
 	/*
@@ -181,15 +172,13 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 		"PCODE timeout, retrying with preemption disabled\n");
 	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
 	preempt_disable();
-	atomic = true;
-	ret = wait_for_atomic(COND, 50);
-	atomic = false;
+	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				   true, timeout_base_ms * 1000);
 	preempt_enable();
 
 out:
 	mutex_unlock(&gt->pcode.lock);
 	return status ? status : ret;
-#undef COND
 }
 /**
  * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table
@@ -243,16 +232,6 @@ unlock:
 	return ret;
 }
 
-static bool pcode_dgfx_status_complete(struct xe_gt *gt)
-{
-	u32 data = DGFX_GET_INIT_STATUS;
-	int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS,
-				      &data, NULL, 1, true, false);
-
-	return status == 0 &&
-		(data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE;
-}
-
 /**
  * xe_pcode_init - Ensure PCODE is initialized
  * @gt: gt instance
@@ -264,20 +243,23 @@ static bool pcode_dgfx_status_complete(struct xe_gt *gt)
  */
 int xe_pcode_init(struct xe_gt *gt)
 {
-	int timeout = 180000; /* 3 min */
+	u32 status, request = DGFX_GET_INIT_STATUS;
+	int timeout_us = 180000000; /* 3 min */
 	int ret;
 
 	if (!IS_DGFX(gt_to_xe(gt)))
 		return 0;
 
 	mutex_lock(&gt->pcode.lock);
-	ret = wait_for(pcode_dgfx_status_complete(gt), timeout);
+	ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+				   DGFX_INIT_STATUS_COMPLETE,
+				   DGFX_INIT_STATUS_COMPLETE,
+				   &status, false, timeout_us);
 	mutex_unlock(&gt->pcode.lock);
 
 	if (ret)
 		drm_err(&gt_to_xe(gt)->drm,
-			"PCODE initialization timedout after: %d min\n",
-			timeout / 60000);
+			"PCODE initialization timedout after: 3 min\n");
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index bbb931bc19ce2..cd264cf50d305 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -352,7 +352,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
 
 	/* Wait for DMA to finish */
-	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl);
+	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl,
+			     false);
 	if (ret)
 		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
 			xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
-- 
GitLab


From e9d285ff9d4998d20790395adc8a62f283bdb72b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 12 Jan 2023 17:25:13 -0500
Subject: [PATCH 1014/2340] drm/xe/migrate: Add kerneldoc for the migrate
 subsystem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add kerneldoc for structs and external functions.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 drivers/gpu/drm/xe/xe_migrate.c | 108 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_migrate.h |  16 ++++-
 2 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7fc40e8009c30..d3fa7bec78d38 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -27,16 +27,37 @@
 
 #include "gt/intel_gpu_commands.h"
 
+/**
+ * struct xe_migrate - migrate context.
+ */
 struct xe_migrate {
+	/** @eng: Default engine used for migration */
 	struct xe_engine *eng;
+	/** @gt: Backpointer to the gt this struct xe_migrate belongs to. */
 	struct xe_gt *gt;
+	/** @job_mutex: Timeline mutex for @eng. */
 	struct mutex job_mutex;
+	/** @pt_bo: Page-table buffer object. */
 	struct xe_bo *pt_bo;
+	/**
+	 * @cleared_bo: Zeroed out bo used as a source for CCS metadata clears
+	 */
 	struct xe_bo *cleared_bo;
+	/** @batch_base_ofs: VM offset of the migration batch buffer */
 	u64 batch_base_ofs;
+	/** @usm_batch_base_ofs: VM offset of the usm batch buffer */
 	u64 usm_batch_base_ofs;
+	/** @cleared_vram_ofs: VM offset of @cleared_bo. */
 	u64 cleared_vram_ofs;
+	/**
+	 * @fence: dma-fence representing the last migration job batch.
+	 * Protected by @job_mutex.
+	 */
 	struct dma_fence *fence;
+	/**
+	 * @vm_update_sa: For integrated, used to suballocate page-tables
+	 * out of the pt_bo.
+	 */
 	struct drm_suballoc_manager vm_update_sa;
 };
 
@@ -45,6 +66,15 @@ struct xe_migrate {
 #define NUM_PT_SLOTS 32
 #define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
 
+/**
+ * xe_gt_migrate_engine() - Get this gt's migrate engine.
+ * @gt: The gt.
+ *
+ * Returns the default migrate engine of this gt.
+ * TODO: Perhaps this function is slightly misplaced, and even unneeded?
+ *
+ * Return: The default migrate engine
+ */
 struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt)
 {
 	return gt->migrate->eng;
@@ -271,6 +301,12 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	return 0;
 }
 
+/**
+ * xe_migrate_init() - Initialize a migrate context
+ * @gt: Back-pointer to the gt we're initializing for.
+ *
+ * Return: Pointer to a migrate context on success. Error pointer on error.
+ */
 struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
@@ -540,6 +576,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 	return flush_flags;
 }
 
+/**
+ * xe_migrate_copy() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @bo: The buffer object @src is currently bound to.
+ * @src: The source TTM resource.
+ * @dst: The dst TTM resource.
+ *
+ * Copies the contents of @src to @dst: On flat CCS devices,
+ * the CCS metadata is copied as well if needed, or if not present,
+ * the CCS metadata of @dst is cleared for security reasons.
+ * It's currently not possible to copy between two system resources,
+ * since that would require two TTM page-vectors.
+ * TODO: Eliminate the @bo argument and supply two TTM page-vectors.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
 struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct xe_bo *bo,
 				  struct ttm_resource *src,
@@ -683,7 +737,7 @@ err:
 		xe_bb_free(bb, NULL);
 
 err_sync:
-		/* Sync partial copy if any. */
+		/* Sync partial copy if any. FIXME: under job_mutex? */
 		if (fence) {
 			dma_fence_wait(fence, false);
 			dma_fence_put(fence);
@@ -733,6 +787,21 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
 	return 0;
 }
 
+/**
+ * xe_migrate_clear() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @bo: The buffer object @dst is currently bound to.
+ * @dst: The dst TTM resource to be cleared.
+ * @value: Clear value.
+ *
+ * Clear the contents of @dst. On flat CCS devices,
+ * the CCS metadata is cleared to zero as well on VRAM destionations.
+ * TODO: Eliminate the @bo argument.
+ *
+ * Return: Pointer to a dma_fence representing the last clear batch, or
+ * an error pointer on failure. If there is a failure, any clear operation
+ * started by the function call has been synced.
+ */
 struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct xe_bo *bo,
 				   struct ttm_resource *dst,
@@ -836,7 +905,7 @@ err:
 		mutex_unlock(&m->job_mutex);
 		xe_bb_free(bb, NULL);
 err_sync:
-		/* Sync partial copies if any. */
+		/* Sync partial copies if any. FIXME: job_mutex? */
 		if (fence) {
 			dma_fence_wait(m->fence, false);
 			dma_fence_put(fence);
@@ -974,6 +1043,33 @@ static bool engine_is_idle(struct xe_engine *e)
 		xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno;
 }
 
+/**
+ * xe_migrate_update_pgtables() - Pipelined page-table update
+ * @m: The migrate context.
+ * @vm: The vm we'll be updating.
+ * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr.
+ * @eng: The engine to be used for the update or NULL if the default
+ * migration engine is to be used.
+ * @updates: An array of update descriptors.
+ * @num_updates: Number of descriptors in @updates.
+ * @syncs: Array of xe_sync_entry to await before updating. Note that waits
+ * will block the engine timeline.
+ * @num_syncs: Number of entries in @syncs.
+ * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains
+ * pointers to callback functions and, if subclassed, private arguments to
+ * those.
+ *
+ * Perform a pipelined page-table update. The update descriptors are typically
+ * built under the same lock critical section as a call to this function. If
+ * using the default engine for the updates, they will be performed in the
+ * order they grab the job_mutex. If different engines are used, external
+ * synchronization is needed for overlapping updates to maintain page-table
+ * consistency. Note that the meaing of "overlapping" is that the updates
+ * touch the same page-table, which might be a higher-level page-directory.
+ * If no pipelining is needed, then updates may be performed by the cpu.
+ *
+ * Return: A dma_fence that, when signaled, indicates the update completion.
+ */
 struct dma_fence *
 xe_migrate_update_pgtables(struct xe_migrate *m,
 			   struct xe_vm *vm,
@@ -1157,6 +1253,14 @@ err:
 	return ERR_PTR(err);
 }
 
+/**
+ * xe_migrate_wait() - Complete all operations using the xe_migrate context
+ * @m: Migrate context to wait for.
+ *
+ * Waits until the GPU no longer uses the migrate context's default engine
+ * or its page-table objects. FIXME: What about separate page-table update
+ * engines?
+ */
 void xe_migrate_wait(struct xe_migrate *m)
 {
 	if (m->fence)
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 267057a3847fc..b2d55283252f0 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -23,9 +23,13 @@ struct xe_vm;
 struct xe_vm_pgtable_update;
 struct xe_vma;
 
+/**
+ * struct xe_migrate_pt_update_ops - Callbacks for the
+ * xe_migrate_update_pgtables() function.
+ */
 struct xe_migrate_pt_update_ops {
 	/**
-	 * populate() - Populate a command buffer or page-table with ptes.
+	 * @populate: Populate a command buffer or page-table with ptes.
 	 * @pt_update: Embeddable callback argument.
 	 * @gt: The gt for the current operation.
 	 * @map: struct iosys_map into the memory to be populated.
@@ -44,7 +48,7 @@ struct xe_migrate_pt_update_ops {
 			 const struct xe_vm_pgtable_update *update);
 
 	/**
-	 * pre_commit(): Callback to be called just before arming the
+	 * @pre_commit: Callback to be called just before arming the
 	 * sched_job.
 	 * @pt_update: Pointer to embeddable callback argument.
 	 *
@@ -53,8 +57,16 @@ struct xe_migrate_pt_update_ops {
 	int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
 };
 
+/**
+ * struct xe_migrate_pt_update - Argument to the
+ * struct xe_migrate_pt_update_ops callbacks.
+ *
+ * Intended to be subclassed to support additional arguments if necessary.
+ */
 struct xe_migrate_pt_update {
+	/** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */
 	const struct xe_migrate_pt_update_ops *ops;
+	/** @vma: The vma we're updating the pagetable for. */
 	struct xe_vma *vma;
 };
 
-- 
GitLab


From 765b65e5bde79a9e8332c58f54a98e20fdb25fc7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 12 Jan 2023 17:25:14 -0500
Subject: [PATCH 1015/2340] drm/xe: Take memory ref on kernel job creation

When a job is inflight we may access memory to read the hardware seqno.
All user jobs have VM open which has a ref but kernel jobs do not
require VM so it is possible to not have memory ref. To avoid this, take
a memory ref on kernel job creation.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_sched_job.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index ab81bfe17e8a4..d9add0370a984 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -8,7 +8,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/slab.h>
 
-#include "xe_device_types.h"
+#include "xe_device.h"
 #include "xe_engine.h"
 #include "xe_gt.h"
 #include "xe_hw_engine_types.h"
@@ -72,6 +72,11 @@ static void job_free(struct xe_sched_job *job)
 			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
 }
 
+static struct xe_device *job_to_xe(struct xe_sched_job *job)
+{
+	return gt_to_xe(job->engine->gt);
+}
+
 struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
 					 u64 *batch_addr)
 {
@@ -149,6 +154,11 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
 	for (i = 0; i < width; ++i)
 		job->batch_addr[i] = batch_addr[i];
 
+	/* All other jobs require a VM to be open which has a ref */
+	if (unlikely(e->flags & ENGINE_FLAG_KERNEL))
+		xe_device_mem_access_get(job_to_xe(job));
+	xe_device_assert_mem_access(job_to_xe(job));
+
 	trace_xe_sched_job_create(job);
 	return job;
 
@@ -178,6 +188,8 @@ void xe_sched_job_destroy(struct kref *ref)
 	struct xe_sched_job *job =
 		container_of(ref, struct xe_sched_job, refcount);
 
+	if (unlikely(job->engine->flags & ENGINE_FLAG_KERNEL))
+		xe_device_mem_access_put(job_to_xe(job));
 	xe_engine_put(job->engine);
 	dma_fence_put(job->fence);
 	drm_sched_job_cleanup(&job->drm);
-- 
GitLab


From d8b52a02cb40fe355374e8b0b89763fefc697b53 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 12 Jan 2023 17:25:17 -0500
Subject: [PATCH 1016/2340] drm/xe: Implement stolen memory.

This adds support for stolen memory, with the same allocator as
vram_mgr. This allows us to skip a whole lot of copy-paste,
by re-using parts of xe_ttm_vram_mgr.

The stolen memory may be bound using VM_BIND, so it performs like any
other memory region.

We should be able to map a stolen BO directly using the physical memory
location instead of through GGTT even on old platforms, but I don't know
what the effects are on coherency.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                |   1 +
 drivers/gpu/drm/xe/xe_bo.c                 | 173 ++++++++++++---
 drivers/gpu/drm/xe/xe_bo.h                 |  16 +-
 drivers/gpu/drm/xe/xe_debugfs.c            |   4 +
 drivers/gpu/drm/xe/xe_device.c             |   4 +
 drivers/gpu/drm/xe/xe_mmio.c               |  58 +++--
 drivers/gpu/drm/xe/xe_mmio.h               |   1 +
 drivers/gpu/drm/xe/xe_pt.c                 |   5 +-
 drivers/gpu/drm/xe/xe_res_cursor.h         |  47 ++--
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c     | 244 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h     |  21 ++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c       |  91 ++++----
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h       |  18 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h |   2 +
 14 files changed, 560 insertions(+), 125 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 228a87f2fe7b3..f8da32b550bcd 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -93,6 +93,7 @@ xe-y += xe_bb.o \
 	xe_sync.o \
 	xe_trace.o \
 	xe_ttm_gtt_mgr.o \
+	xe_ttm_stolen_mgr.o \
 	xe_ttm_vram_mgr.o \
 	xe_tuning.o \
 	xe_uc.o \
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index ef2c9196c113c..f07d1cd63fddf 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -24,6 +24,7 @@
 #include "xe_preempt_fence.h"
 #include "xe_res_cursor.h"
 #include "xe_trace.h"
+#include "xe_ttm_stolen_mgr.h"
 #include "xe_vm.h"
 
 static const struct ttm_place sys_placement_flags = {
@@ -42,7 +43,12 @@ static struct ttm_placement sys_placement = {
 
 bool mem_type_is_vram(u32 mem_type)
 {
-	return mem_type >= XE_PL_VRAM0;
+	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
+}
+
+static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
+{
+	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
 }
 
 static bool resource_is_vram(struct ttm_resource *res)
@@ -52,7 +58,13 @@ static bool resource_is_vram(struct ttm_resource *res)
 
 bool xe_bo_is_vram(struct xe_bo *bo)
 {
-	return resource_is_vram(bo->ttm.resource);
+	return resource_is_vram(bo->ttm.resource) ||
+		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
+}
+
+bool xe_bo_is_stolen(struct xe_bo *bo)
+{
+	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
 }
 
 static bool xe_bo_is_user(struct xe_bo *bo)
@@ -63,9 +75,9 @@ static bool xe_bo_is_user(struct xe_bo *bo)
 static struct xe_gt *
 mem_type_to_gt(struct xe_device *xe, u32 mem_type)
 {
-	XE_BUG_ON(!mem_type_is_vram(mem_type));
+	XE_BUG_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type));
 
-	return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0);
+	return xe_device_get_gt(xe, mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0));
 }
 
 static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
@@ -134,6 +146,20 @@ static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
 	}
 }
 
+static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
+			   struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+		places[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_STOLEN,
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+	}
+}
+
 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 				       u32 bo_flags)
 {
@@ -162,6 +188,7 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 		try_add_vram1(xe, bo, places, bo_flags, &c);
 		try_add_system(bo, places, bo_flags, &c);
 	}
+	try_add_stolen(xe, bo, places, bo_flags, &c);
 
 	if (!c)
 		return -EINVAL;
@@ -209,6 +236,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 	switch (tbo->resource->mem_type) {
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1:
+	case XE_PL_STOLEN:
 	case XE_PL_TT:
 	default:
 		/* for now kick out to system */
@@ -362,11 +390,12 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 #if  !defined(CONFIG_X86)
 		mem->bus.caching = ttm_write_combined;
 #endif
-		break;
+		return 0;
+	case XE_PL_STOLEN:
+		return xe_ttm_stolen_io_mem_reserve(xe, mem);
 	default:
 		return -EINVAL;
 	}
-	return 0;
 }
 
 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
@@ -673,14 +702,18 @@ out:
 
 }
 
-static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 				       unsigned long page_offset)
 {
-	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
-	struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_gt *gt = mem_type_to_gt(xe, ttm_bo->resource->mem_type);
 	struct xe_res_cursor cursor;
 
-	xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
+	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
+		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
+
+	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
 	return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
 }
 
@@ -945,7 +978,8 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 			return bo;
 	}
 
-	if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) &&
+	if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT |
+		     XE_BO_CREATE_STOLEN_BIT) &&
 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
 		size = ALIGN(size, SZ_64K);
@@ -973,9 +1007,11 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		ctx.resv = resv;
 	}
 
-	err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
-	if (WARN_ON(err))
-		return ERR_PTR(err);
+	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
+		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+		if (WARN_ON(err))
+			return ERR_PTR(err);
+	}
 
 	/* Defer populating type_sg bos */
 	placement = (type == ttm_bo_type_sg ||
@@ -993,16 +1029,73 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	return bo;
 }
 
-struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
-				  struct xe_vm *vm, size_t size,
-				  enum ttm_bo_type type, u32 flags)
+static int __xe_bo_fixed_placement(struct xe_device *xe,
+				   struct xe_bo *bo,
+				   u32 flags,
+				   u64 start, u64 end, u64 size)
 {
-	struct xe_bo *bo;
+	struct ttm_place *place = bo->placements;
+
+	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
+		return -EINVAL;
+
+	place->flags = TTM_PL_FLAG_CONTIGUOUS;
+	place->fpfn = start >> PAGE_SHIFT;
+	place->lpfn = end >> PAGE_SHIFT;
+
+	switch (flags & (XE_BO_CREATE_STOLEN_BIT |
+		XE_BO_CREATE_VRAM0_BIT |XE_BO_CREATE_VRAM1_BIT)) {
+	case XE_BO_CREATE_VRAM0_BIT:
+		place->mem_type = XE_PL_VRAM0;
+		break;
+	case XE_BO_CREATE_VRAM1_BIT:
+		place->mem_type = XE_PL_VRAM1;
+		break;
+	case XE_BO_CREATE_STOLEN_BIT:
+		place->mem_type = XE_PL_STOLEN;
+		break;
+
+	default:
+		/* 0 or multiple of the above set */
+		return -EINVAL;
+	}
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = 1,
+		.placement = place,
+		.num_busy_placement = 1,
+		.busy_placement = place,
+	};
+
+	return 0;
+}
+
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_gt *gt, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = NULL;
 	int err;
 
 	if (vm)
 		xe_vm_assert_held(vm);
-	bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size,
+
+	if (start || end != ~0ULL) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+
+		flags |= XE_BO_FIXED_PLACEMENT_BIT;
+		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
+		if (err) {
+			xe_bo_free(bo);
+			return ERR_PTR(err);
+		}
+	}
+
+	bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size,
 				   type, flags);
 	if (IS_ERR(bo))
 		return bo;
@@ -1011,7 +1104,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
 		xe_vm_get(vm);
 	bo->vm = vm;
 
-	if (flags & XE_BO_CREATE_GGTT_BIT) {
+	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+		if (!gt && flags & XE_BO_CREATE_STOLEN_BIT)
+			gt = xe_device_get_gt(xe, 0);
+
 		XE_BUG_ON(!gt);
 
 		err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
@@ -1027,6 +1123,13 @@ err_unlock_put_bo:
 	return ERR_PTR(err);
 }
 
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags)
+{
+	return xe_bo_create_locked_range(xe, gt, vm, size, 0, ~0ULL, type, flags);
+}
+
 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
 			   struct xe_vm *vm, size_t size,
 			   enum ttm_bo_type type, u32 flags)
@@ -1039,13 +1142,21 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
 	return bo;
 }
 
-struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
-				   struct xe_vm *vm, size_t size,
-				   enum ttm_bo_type type, u32 flags)
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
+				      struct xe_vm *vm,
+				      size_t size, u64 offset,
+				      enum ttm_bo_type type, u32 flags)
 {
-	struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+	struct xe_bo *bo;
 	int err;
+	u64 start = offset == ~0ull ? 0 : offset;
+	u64 end = offset == ~0ull ? offset : start + size;
+
+	if (flags & XE_BO_CREATE_STOLEN_BIT &&
+	    xe_ttm_stolen_inaccessible(xe))
+		flags |= XE_BO_CREATE_GGTT_BIT;
 
+	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
 	if (IS_ERR(bo))
 		return bo;
 
@@ -1069,6 +1180,13 @@ err_put:
 	return ERR_PTR(err);
 }
 
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags)
+{
+	return xe_bo_create_pin_map_at(xe, gt, vm, size, ~0ull, type, flags);
+}
+
 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
 				     const void *data, size_t size,
 				     enum ttm_bo_type type, u32 flags)
@@ -1093,6 +1211,9 @@ static uint64_t vram_region_io_offset(struct xe_bo *bo)
 	struct xe_device *xe = xe_bo_device(bo);
 	struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type);
 
+	if (bo->ttm.resource->mem_type == XE_PL_STOLEN)
+		return xe_ttm_stolen_gpu_offset(xe);
+
 	return gt->mem.vram.io_start - xe->mem.vram.io_start;
 }
 
@@ -1174,7 +1295,7 @@ int xe_bo_pin(struct xe_bo *bo)
 		bool lmem;
 
 		XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
-		XE_BUG_ON(!mem_type_is_vram(place->mem_type));
+		XE_BUG_ON(!mem_type_is_vram(place->mem_type) && place->mem_type != XE_PL_STOLEN);
 
 		place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
 			       vram_region_io_offset(bo)) >> PAGE_SHIFT;
@@ -1305,7 +1426,7 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
 
 	*is_lmem = xe_bo_is_vram(bo);
 
-	if (!*is_lmem) {
+	if (!*is_lmem && !xe_bo_is_stolen(bo)) {
 		XE_BUG_ON(!bo->ttm.ttm);
 
 		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 1a49c0a3c4c6a..8d8a3332dbc86 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -12,8 +12,9 @@
 
 #define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
 
-#define XE_BO_CREATE_USER_BIT		BIT(1)
-#define XE_BO_CREATE_SYSTEM_BIT		BIT(2)
+#define XE_BO_CREATE_USER_BIT		BIT(0)
+#define XE_BO_CREATE_SYSTEM_BIT		BIT(1)
+#define XE_BO_CREATE_STOLEN_BIT		BIT(2)
 #define XE_BO_CREATE_VRAM0_BIT		BIT(3)
 #define XE_BO_CREATE_VRAM1_BIT		BIT(4)
 #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
@@ -24,6 +25,7 @@
 #define XE_BO_CREATE_PINNED_BIT		BIT(7)
 #define XE_BO_DEFER_BACKING		BIT(8)
 #define XE_BO_SCANOUT_BIT		BIT(9)
+#define XE_BO_FIXED_PLACEMENT_BIT	BIT(10)
 /* this one is trigger internally only */
 #define XE_BO_INTERNAL_TEST		BIT(30)
 #define XE_BO_INTERNAL_64K		BIT(31)
@@ -64,6 +66,7 @@
 #define XE_PL_TT		TTM_PL_TT
 #define XE_PL_VRAM0		TTM_PL_VRAM
 #define XE_PL_VRAM1		(XE_PL_VRAM0 + 1)
+#define XE_PL_STOLEN		(TTM_NUM_MEM_TYPES - 1)
 
 #define XE_BO_PROPS_INVALID	(-1)
 
@@ -76,6 +79,11 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 				    struct xe_gt *gt, struct dma_resv *resv,
 				    size_t size, enum ttm_bo_type type,
 				    u32 flags);
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_gt *gt, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags);
 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
 				  struct xe_vm *vm, size_t size,
 				  enum ttm_bo_type type, u32 flags);
@@ -85,6 +93,9 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
 				   struct xe_vm *vm, size_t size,
 				   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
+				      struct xe_vm *vm, size_t size, u64 offset,
+				      enum ttm_bo_type type, u32 flags);
 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
 				     const void *data, size_t size,
 				     enum ttm_bo_type type, u32 flags);
@@ -206,6 +217,7 @@ void xe_bo_vunmap(struct xe_bo *bo);
 
 bool mem_type_is_vram(u32 mem_type);
 bool xe_bo_is_vram(struct xe_bo *bo);
+bool xe_bo_is_stolen(struct xe_bo *bo);
 
 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
 
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 84db7b3f501ea..b0f8b157ffa3d 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -124,6 +124,10 @@ void xe_debugfs_register(struct xe_device *xe)
 	man = ttm_manager_type(bdev, XE_PL_TT);
 	ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
 
+	man = ttm_manager_type(bdev, XE_PL_STOLEN);
+	if (man)
+		ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
+
 	for_each_gt(gt, xe, id)
 		xe_gt_debugfs_register(gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 93dea2b9c4648..104ab12cc2edb 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -25,6 +25,7 @@
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
+#include "xe_ttm_stolen_mgr.h"
 #include "xe_vm.h"
 #include "xe_vm_madvise.h"
 #include "xe_wait_user_fence.h"
@@ -256,6 +257,9 @@ int xe_device_probe(struct xe_device *xe)
 			goto err_irq_shutdown;
 	}
 
+	/* Allocate and map stolen after potential VRAM resize */
+	xe_ttm_stolen_mgr_init(xe);
+
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_init(gt);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 42e2405f2f488..54c9362a30509 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -150,6 +150,38 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 	return true;
 }
 
+int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base)
+{
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int err;
+	u32 reg;
+
+	if (!xe->info.has_flat_ccs)  {
+		*vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+		if (flat_ccs_base)
+			*flat_ccs_base = *vram_size;
+		return 0;
+	}
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
+	*vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+	if (flat_ccs_base) {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		*flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+	}
+
+	if (flat_ccs_base)
+		drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n",
+			 *vram_size, *flat_ccs_base);
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
 int xe_mmio_probe_vram(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -159,7 +191,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	u64 original_size;
 	u64 current_size;
 	u64 flat_ccs_base;
-	int resize_result;
+	int resize_result, err;
 
 	if (!IS_DGFX(xe)) {
 		xe->mem.vram.mapping = 0;
@@ -184,27 +216,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 
-	if (xe->info.has_flat_ccs)  {
-		int err;
-		u32 reg;
-
-		err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-		if (err)
-			return err;
-		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
-		lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
-		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
-		flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
-
-		drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n",
-			 lmem_size, flat_ccs_base);
-
-		err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-		if (err)
-			return err;
-	} else {
-		flat_ccs_base = lmem_size;
-	}
+	err = xe_mmio_total_vram_size(xe, &lmem_size, &flat_ccs_base);
+	if (err)
+		return err;
 
 	resize_result = xe_resize_lmem_bar(xe, lmem_size);
 	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index adc7d7484afb9..a3b144553873b 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -130,5 +130,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg)
 }
 
 int xe_mmio_probe_vram(struct xe_device *xe);
+int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 81193ddd0af73..45850184650cb 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -12,6 +12,7 @@
 #include "xe_pt_walk.h"
 #include "xe_vm.h"
 #include "xe_res_cursor.h"
+#include "xe_ttm_stolen_mgr.h"
 
 struct xe_pt_dir {
 	struct xe_pt pt;
@@ -756,12 +757,14 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
 		else
 			xe_walk.cache = XE_CACHE_WB;
 	}
+	if (xe_bo_is_stolen(bo))
+		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
 
 	xe_bo_assert_held(bo);
 	if (xe_vma_is_userptr(vma))
 		xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1,
 				&curs);
-	else if (xe_bo_is_vram(bo))
+	else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
 		xe_res_first(bo->ttm.resource, vma->bo_offset,
 			     vma->end - vma->start + 1, &curs);
 	else
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index f54409850d749..365c8ad7aeb8e 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -33,10 +33,11 @@
 #include <drm/ttm/ttm_tt.h>
 
 #include "xe_bo.h"
+#include "xe_device.h"
 #include "xe_macros.h"
 #include "xe_ttm_vram_mgr.h"
 
-/* state back for walking over vram_mgr and gtt_mgr allocations */
+/* state back for walking over vram_mgr, stolen_mgr, and gtt_mgr allocations */
 struct xe_res_cursor {
 	u64 start;
 	u64 size;
@@ -44,8 +45,23 @@ struct xe_res_cursor {
 	void *node;
 	u32 mem_type;
 	struct scatterlist *sgl;
+	struct drm_buddy *mm;
 };
 
+static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
+{
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+
+	if (res->mem_type != XE_PL_STOLEN) {
+		return &xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0)->mem.vram_mgr->mm;
+	} else {
+		struct ttm_resource_manager *mgr =
+			ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+
+		return &to_xe_ttm_vram_mgr(mgr)->mm;
+	}
+}
+
 /**
  * xe_res_first - initialize a xe_res_cursor
  *
@@ -60,9 +76,6 @@ static inline void xe_res_first(struct ttm_resource *res,
 				u64 start, u64 size,
 				struct xe_res_cursor *cur)
 {
-	struct drm_buddy_block *block;
-	struct list_head *head, *next;
-
 	cur->sgl = NULL;
 	if (!res)
 		goto fallback;
@@ -72,8 +85,13 @@ static inline void xe_res_first(struct ttm_resource *res,
 	cur->mem_type = res->mem_type;
 
 	switch (cur->mem_type) {
+	case XE_PL_STOLEN:
 	case XE_PL_VRAM0:
-	case XE_PL_VRAM1:
+	case XE_PL_VRAM1: {
+		struct drm_buddy_block *block;
+		struct list_head *head, *next;
+		struct drm_buddy *mm = xe_res_get_buddy(res);
+
 		head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
 
 		block = list_first_entry_or_null(head,
@@ -82,8 +100,8 @@ static inline void xe_res_first(struct ttm_resource *res,
 		if (!block)
 			goto fallback;
 
-		while (start >= xe_ttm_vram_mgr_block_size(block)) {
-			start -= xe_ttm_vram_mgr_block_size(block);
+		while (start >= drm_buddy_block_size(mm, block)) {
+			start -= drm_buddy_block_size(mm, block);
 
 			next = block->link.next;
 			if (next != head)
@@ -91,12 +109,14 @@ static inline void xe_res_first(struct ttm_resource *res,
 						   link);
 		}
 
-		cur->start = xe_ttm_vram_mgr_block_start(block) + start;
-		cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+		cur->mm = mm;
+		cur->start = drm_buddy_block_offset(block) + start;
+		cur->size = min(drm_buddy_block_size(mm, block) - start,
 				size);
 		cur->remaining = size;
 		cur->node = block;
 		break;
+	}
 	default:
 		goto fallback;
 	}
@@ -188,6 +208,7 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
 	}
 
 	switch (cur->mem_type) {
+	case XE_PL_STOLEN:
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1:
 		start = size - cur->size;
@@ -197,15 +218,15 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
 		block = list_entry(next, struct drm_buddy_block, link);
 
 
-		while (start >= xe_ttm_vram_mgr_block_size(block)) {
-			start -= xe_ttm_vram_mgr_block_size(block);
+		while (start >= drm_buddy_block_size(cur->mm, block)) {
+			start -= drm_buddy_block_size(cur->mm, block);
 
 			next = block->link.next;
 			block = list_entry(next, struct drm_buddy_block, link);
 		}
 
-		cur->start = xe_ttm_vram_mgr_block_start(block) + start;
-		cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+		cur->start = drm_buddy_block_offset(block) + start;
+		cur->size = min(drm_buddy_block_size(cur->mm, block) - start,
 				cur->remaining);
 		cur->node = block;
 		break;
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
new file mode 100644
index 0000000000000..21ca7f79e63be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+#include <drm/drm_mm.h>
+
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "../i915/i915_reg.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_vram_mgr.h"
+
+bool xe_ttm_stolen_inaccessible(struct xe_device *xe)
+{
+	return !IS_DGFX(xe) && GRAPHICS_VERx100(xe) < 1270;
+}
+
+struct xe_ttm_stolen_mgr {
+	struct xe_ttm_vram_mgr base;
+
+	/* PCI base offset */
+	resource_size_t io_base;
+	/* GPU base offset */
+	resource_size_t stolen_base;
+
+	void *__iomem mapping;
+};
+
+static inline struct xe_ttm_stolen_mgr *
+to_stolen_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
+}
+
+static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct xe_gt *gt = to_gt(xe);
+	u64 vram_size, stolen_size;
+	int err;
+
+	err = xe_mmio_total_vram_size(xe, &vram_size, NULL);
+	if (err) {
+		drm_info(&xe->drm, "Querying total vram size failed\n");
+		return 0;
+	}
+
+	/* Use DSM base address instead for stolen memory */
+	mgr->stolen_base = xe_mmio_read64(gt, GEN12_DSMBASE.reg) & GEN12_BDSM_MASK;
+	if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base))
+		return 0;
+
+	stolen_size = vram_size - mgr->stolen_base;
+	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, 2))
+		mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
+
+	return stolen_size;
+}
+
+static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u32 stolen_size;
+	u32 ggc, gms;
+
+	ggc = xe_mmio_read32(to_gt(xe), GGC.reg);
+
+	/* check GGMS, should be fixed 0x3 (8MB) */
+	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
+		return 0;
+
+	mgr->stolen_base = mgr->io_base = pci_resource_start(pdev, 2) + SZ_8M;
+
+	/* return valid GMS value, -EIO if invalid */
+	gms = REG_FIELD_GET(GMS_MASK, ggc);
+	switch (gms) {
+	case 0x0 ... 0x04:
+		stolen_size = gms * 32 * SZ_1M;
+		break;
+	case 0xf0 ... 0xfe:
+		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
+		break;
+	default:
+		return 0;
+	}
+
+	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
+		return 0;
+
+	return stolen_size;
+}
+
+extern struct resource intel_graphics_stolen_res;
+
+static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+#ifdef CONFIG_X86
+	/* Map into GGTT */
+	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
+
+	/* Stolen memory is x86 only */
+	mgr->stolen_base = intel_graphics_stolen_res.start;
+	return resource_size(&intel_graphics_stolen_res);
+#else
+	return 0;
+#endif
+}
+
+void xe_ttm_stolen_mgr_init(struct xe_device *xe)
+{
+	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u64 stolen_size, pgsize;
+	int err;
+
+	if (IS_DGFX(xe))
+		stolen_size = detect_bar2_dgfx(xe, mgr);
+	else if (!xe_ttm_stolen_inaccessible(xe))
+		stolen_size = detect_bar2_integrated(xe, mgr);
+	else
+		stolen_size = detect_stolen(xe, mgr);
+
+	if (!stolen_size) {
+		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
+		return;
+	}
+
+	pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	if (pgsize < PAGE_SIZE)
+		pgsize = PAGE_SIZE;
+
+
+	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, pgsize);
+	if (err) {
+		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
+		return;
+	}
+
+	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
+		    stolen_size);
+
+	if (!xe_ttm_stolen_inaccessible(xe))
+		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size);
+}
+
+u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
+	struct xe_res_cursor cur;
+
+	if (!mgr->io_base)
+		return 0;
+
+	if (!IS_DGFX(xe) && xe_ttm_stolen_inaccessible(xe))
+		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
+
+	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
+	return mgr->io_base + cur.start;
+}
+
+static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
+					       struct xe_ttm_stolen_mgr *mgr,
+					       struct ttm_resource *mem)
+{
+	struct xe_res_cursor cur;
+
+	if (!mgr->io_base)
+		return -EIO;
+
+	xe_res_first(mem, 0, 4096, &cur);
+	mem->bus.offset = cur.start;
+
+	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
+	WARN_ON_ONCE(1);
+
+	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
+		mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset;
+
+	mem->bus.offset += mgr->io_base;
+	mem->bus.is_iomem = true;
+	mem->bus.caching = ttm_write_combined;
+
+	return 0;
+}
+
+static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
+						 struct xe_ttm_stolen_mgr *mgr,
+						 struct ttm_resource *mem)
+{
+#ifdef CONFIG_X86
+	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
+
+	/* XXX: Require BO to be mapped to GGTT? */
+	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
+		return -EIO;
+
+	/* GGTT is always contiguously mapped */
+	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
+
+	mem->bus.is_iomem = true;
+	mem->bus.caching = ttm_write_combined;
+
+	return 0;
+#else
+	/* How is it even possible to get here without gen12 stolen? */
+	drm_WARN_ON(&xe->drm, 1);
+	return -EIO;
+#endif
+}
+
+int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
+{
+	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
+
+	if (!mgr || !mgr->io_base)
+		return -EIO;
+
+	if (!xe_ttm_stolen_inaccessible(xe))
+		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
+	else
+		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
+}
+
+u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
+{
+	struct xe_ttm_stolen_mgr *mgr =
+		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
+
+	return mgr->stolen_base;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
new file mode 100644
index 0000000000000..ade37abb06233
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_STOLEN_MGR_H_
+#define _XE_TTM_STOLEN_MGR_H_
+
+#include <linux/types.h>
+
+struct ttm_resource;
+struct xe_bo;
+struct xe_device;
+
+void xe_ttm_stolen_mgr_init(struct xe_device *xe);
+int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem);
+bool xe_ttm_stolen_inaccessible(struct xe_device *xe);
+u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset);
+u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index e391e81d3640f..c7e21673b8fdd 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -15,25 +15,14 @@
 #include "xe_res_cursor.h"
 #include "xe_ttm_vram_mgr.h"
 
-static inline struct xe_ttm_vram_mgr *
-to_vram_mgr(struct ttm_resource_manager *man)
-{
-	return container_of(man, struct xe_ttm_vram_mgr, manager);
-}
-
-static inline struct xe_gt *
-mgr_to_gt(struct xe_ttm_vram_mgr *mgr)
-{
-	return mgr->gt;
-}
-
 static inline struct drm_buddy_block *
 xe_ttm_vram_mgr_first_block(struct list_head *list)
 {
 	return list_first_entry_or_null(list, struct drm_buddy_block, link);
 }
 
-static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head)
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
+						    struct list_head *head)
 {
 	struct drm_buddy_block *block;
 	u64 start, size;
@@ -43,12 +32,12 @@ static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head)
 		return false;
 
 	while (head != block->link.next) {
-		start = xe_ttm_vram_mgr_block_start(block);
-		size = xe_ttm_vram_mgr_block_size(block);
+		start = drm_buddy_block_offset(block);
+		size = drm_buddy_block_size(mm, block);
 
 		block = list_entry(block->link.next, struct drm_buddy_block,
 				   link);
-		if (start + size != xe_ttm_vram_mgr_block_start(block))
+		if (start + size != drm_buddy_block_offset(block))
 			return false;
 	}
 
@@ -61,7 +50,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 			       struct ttm_resource **res)
 {
 	u64 max_bytes, cur_size, min_block_size;
-	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct xe_ttm_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
 	struct drm_buddy *mm = &mgr->mm;
@@ -70,12 +59,12 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	int r;
 
 	lpfn = (u64)place->lpfn << PAGE_SHIFT;
-	if (!lpfn)
+	if (!lpfn || lpfn > man->size)
 		lpfn = man->size;
 
 	fpfn = (u64)place->fpfn << PAGE_SHIFT;
 
-	max_bytes = mgr->gt->mem.vram.size;
+	max_bytes = mgr->manager.size;
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
 		pages_per_block = ~0ul;
 	} else {
@@ -183,7 +172,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 			 * Compute the original_size value by subtracting the
 			 * last block size with (aligned size - original size)
 			 */
-			original_size = xe_ttm_vram_mgr_block_size(block) -
+			original_size = drm_buddy_block_size(mm, block) -
 				(size - cur_size);
 		}
 
@@ -201,8 +190,8 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	list_for_each_entry(block, &vres->blocks, link) {
 		unsigned long start;
 
-		start = xe_ttm_vram_mgr_block_start(block) +
-			xe_ttm_vram_mgr_block_size(block);
+		start = drm_buddy_block_offset(block) +
+			drm_buddy_block_size(mm, block);
 		start >>= PAGE_SHIFT;
 
 		if (start > PFN_UP(vres->base.size))
@@ -212,7 +201,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 		vres->base.start = max(vres->base.start, start);
 	}
 
-	if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks))
+	if (xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks))
 		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
 
 	*res = &vres->base;
@@ -233,7 +222,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
 {
 	struct xe_ttm_vram_mgr_resource *vres =
 		to_xe_ttm_vram_mgr_resource(res);
-	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct drm_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
@@ -248,7 +237,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
 static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
-	struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct drm_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
@@ -263,54 +252,54 @@ static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
 	.debug	= xe_ttm_vram_mgr_debug
 };
 
-static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg)
+static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
 {
+	struct xe_device *xe = to_xe_device(dev);
 	struct xe_ttm_vram_mgr *mgr = arg;
-	struct xe_device *xe = gt_to_xe(mgr->gt);
 	struct ttm_resource_manager *man = &mgr->manager;
-	int err;
 
 	ttm_resource_manager_set_used(man, false);
 
-	err = ttm_resource_manager_evict_all(&xe->ttm, man);
-	if (err)
+	if (ttm_resource_manager_evict_all(&xe->ttm, man))
 		return;
 
 	drm_buddy_fini(&mgr->mm);
 
-	ttm_resource_manager_cleanup(man);
-	ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id,
-			       NULL);
+	ttm_resource_manager_cleanup(&mgr->manager);
+
+	ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL);
 }
 
-int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
+int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
+			   u32 mem_type, u64 size, u64 default_page_size)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	struct ttm_resource_manager *man = &mgr->manager;
 	int err;
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
-
-	mgr->gt = gt;
 	man->func = &xe_ttm_vram_mgr_func;
+	mgr->mem_type = mem_type;
+	mutex_init(&mgr->lock);
+	mgr->default_page_size = default_page_size;
 
-	ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size);
-	err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
-	if (err)
-		return err;
+	ttm_resource_manager_init(man, &xe->ttm, size);
+	err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
 
-	mutex_init(&mgr->lock);
-	mgr->default_page_size = PAGE_SIZE;
+	ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager);
+	ttm_resource_manager_set_used(&mgr->manager, true);
+
+	return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+}
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
+{
+	struct xe_device *xe = gt_to_xe(gt);
 
-	ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id,
-			       &mgr->manager);
-	ttm_resource_manager_set_used(man, true);
+	XE_BUG_ON(xe_gt_is_media_type(gt));
 
-	err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
-	if (err)
-		return err;
+	mgr->gt = gt;
 
-	return 0;
+	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + gt->info.vram_id,
+				      gt->mem.vram.size, PAGE_SIZE);
 }
 
 int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index 537fccec43189..78f332d262242 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -12,6 +12,8 @@ enum dma_data_direction;
 struct xe_device;
 struct xe_gt;
 
+int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
+			   u32 mem_type, u64 size, u64 default_page_size);
 int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr);
 int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 			      struct ttm_resource *res,
@@ -22,20 +24,16 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
 			      struct sg_table *sgt);
 
-static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block)
-{
-	return drm_buddy_block_offset(block);
-}
-
-static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block)
-{
-	return PAGE_SIZE << drm_buddy_block_order(block);
-}
-
 static inline struct xe_ttm_vram_mgr_resource *
 to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
 {
 	return container_of(res, struct xe_ttm_vram_mgr_resource, base);
 }
 
+static inline struct xe_ttm_vram_mgr *
+to_xe_ttm_vram_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_vram_mgr, manager);
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 39b93c71c21b4..cf02c62ff427f 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -27,6 +27,8 @@ struct xe_ttm_vram_mgr {
 	u64 default_page_size;
 	/** @lock: protects allocations of VRAM */
 	struct mutex lock;
+
+	u32 mem_type;
 };
 
 /**
-- 
GitLab


From 60694edf668a5c837d7bf05bd2250388e2ada9a8 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 12 Jan 2023 17:25:28 -0500
Subject: [PATCH 1017/2340] drm/xe: Ensure VMA not userptr before calling
 xe_bo_is_stolen

Fix the below splat:

[  142.510525] [IGT] xe_exec_basic: starting subtest once-userptr
[  142.511339] BUG: kernel NULL pointer dereference, address: 0000000000000228
[  142.518311] #PF: supervisor read access in kernel mode
[  142.523458] #PF: error_code(0x0000) - not-present page
[  142.528604] PGD 0 P4D 0
[  142.531153] Oops: 0000 [#1] PREEMPT SMP NOPTI
[  142.535518] CPU: 4 PID: 1199 Comm: kworker/u16:8 Not tainted 6.1.0-rc1-xe+ #1
[  142.542656] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020
[  142.556033] Workqueue: events_unbound async_op_work_func [xe]
[  142.561810] RIP: 0010:xe_bo_is_stolen+0x0/0x20 [xe]
[  142.566709] Code: 20 c8 75 05 83 fa 07 74 05 c3 cc cc cc cc 48 8b 87 08 02 00 00 0f b6 80 2c ff ff ff c3 cc cc cc cc 66 0f 1f 84 00 00 00 00 00 <48> 8b 87 28 02 00 00 83 78 10 07 0f 94 c0 c3 cc cc cc cc 66 66 2e
[  142.585447] RSP: 0018:ffffc900019eb888 EFLAGS: 00010246
[  142.590678] RAX: 0000000000000002 RBX: 0000000000000000 RCX: ffff88813f6a2108
[  142.597821] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[  142.604962] RBP: ffffc900019ebbc0 R08: 0000000000000001 R09: 0000000000000000
[  142.612101] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88814107d600
[  142.619242] R13: ffffc900019eba20 R14: ffff888140442000 R15: 0000000000000000
[  142.626378] FS:  0000000000000000(0000) GS:ffff88849fa00000(0000) knlGS:0000000000000000
[  142.634468] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  142.640219] CR2: 0000000000000228 CR3: 000000010a4c0006 CR4: 0000000000770ee0
[  142.647361] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  142.654505] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  142.661639] PKRU: 55555554
[  142.664367] Call Trace:
[  142.666830]  <TASK>
[  142.668947]  __xe_pt_bind_vma+0x1a1/0xa50 [xe]
[  142.673417]  ? unwind_next_frame+0x187/0x770
[  142.677699]  ? __thaw_task+0xc0/0xc0
[  142.681293]  ? __lock_acquire+0x5e4/0x26e0
[  142.685409]  ? lockdep_hardirqs_on+0xbf/0x140
[  142.689779]  ? lock_acquire+0xd2/0x310
[  142.693548]  ? mark_held_locks+0x49/0x80
[  142.697485]  ? xe_vm_bind_vma+0xf1/0x3d0 [xe]
[  142.701866]  xe_vm_bind_vma+0xf1/0x3d0 [xe]
[  142.706082]  xe_vm_bind+0x76/0x140 [xe]
[  142.709944]  vm_bind_ioctl+0x26f/0xb40 [xe]
[  142.714161]  ? async_op_work_func+0x20c/0x450 [xe]
[  142.718974]  async_op_work_func+0x20c/0x450 [xe]
[  142.723620]  process_one_work+0x263/0x580
[  142.727645]  ? process_one_work+0x580/0x580
[  142.731839]  worker_thread+0x4d/0x3b0
[  142.735518]  ? process_one_work+0x580/0x580
[  142.739714]  kthread+0xeb/0x120
[  142.742872]  ? kthread_complete_and_exit+0x20/0x20
[  142.747671]  ret_from_fork+0x1f/0x30
[  142.751264]  </TASK>

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 45850184650cb..688bc4b56294c 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -757,7 +757,7 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
 		else
 			xe_walk.cache = XE_CACHE_WB;
 	}
-	if (xe_bo_is_stolen(bo))
+	if (!xe_vma_is_userptr(vma) && xe_bo_is_stolen(bo))
 		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
 
 	xe_bo_assert_held(bo);
-- 
GitLab


From da34c2cf85a4739d4e2b1b5515a0fbc8f8e60358 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 12 Jan 2023 17:25:30 -0500
Subject: [PATCH 1018/2340] drm/xe: Fake pulling gt->info.engine_mask from
 hwconfig blob

The blob doesn't fully support this yet, so fake for now to ensure our
driver load order is correct.

Once the blob supports pulling gt->info.engine_mask from the blob, this
patch can be removed.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c       | 3 +++
 drivers/gpu/drm/xe/xe_gt_types.h | 6 ++++++
 drivers/gpu/drm/xe/xe_pci.c      | 4 ++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index fd8232a4556e5..96d0f5845d87b 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -450,6 +450,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	/* XXX: Fake that we pull the engine mask from hwconfig blob */
+	gt->info.engine_mask = gt->info.__engine_mask;
+
 	/* Enables per hw engine IRQs */
 	xe_gt_irq_postinstall(gt);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index c80a9215098d9..2dbc8cedd6305 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -93,6 +93,12 @@ struct xe_gt {
 		u32 clock_freq;
 		/** @engine_mask: mask of engines present on GT */
 		u64 engine_mask;
+		/**
+		 * @__engine_mask: mask of engines present on GT read from
+		 * xe_pci.c, used to fake reading the engine_mask from the
+		 * hwconfig blob.
+		 */
+		u64 __engine_mask;
 	} info;
 
 	/**
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 55d8a597a0689..49f1f0489f1c8 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -420,13 +420,13 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		if (id == 0) {
 			gt->info.type = XE_GT_TYPE_MAIN;
 			gt->info.vram_id = id;
-			gt->info.engine_mask = desc->platform_engine_mask;
+			gt->info.__engine_mask = desc->platform_engine_mask;
 			gt->mmio.adj_limit = 0;
 			gt->mmio.adj_offset = 0;
 		} else {
 			gt->info.type = desc->extra_gts[id - 1].type;
 			gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
-			gt->info.engine_mask =
+			gt->info.__engine_mask =
 				desc->extra_gts[id - 1].engine_mask;
 			gt->mmio.adj_limit =
 				desc->extra_gts[id - 1].mmio_adj_limit;
-- 
GitLab


From 99c821b00bf65e76415bf4c8d04d4d92987505cb Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 12 Jan 2023 17:25:31 -0500
Subject: [PATCH 1019/2340] drm/xe/guc: Report submission version of GuC
 firmware

Starting in 70.6.* GuC firmware the CSS header includes the submission
version, pull this from the CSS header. Prior 70.* versions accidentally
omitted this informatio so hard code to the correct values. This
information will be used by VFs when communicating with the PF.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Philippe Lecluse <philippe.lecluse1@gmail.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_types.h |  9 ++++++
 drivers/gpu/drm/xe/xe_uc_fw.c     | 46 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_uc_fw_abi.h |  6 +++-
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index ca177853cc12c..c2a484282ef2c 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -51,6 +51,15 @@ struct xe_guc {
 			/** @seqno: suspend fences seqno */
 			u32 seqno;
 		} suspend;
+		/** @version: submission version */
+		struct {
+			/** @major: major version of GuC submission */
+			u32 major;
+			/** @minor: minor version of GuC submission */
+			u32 minor;
+			/** @patch: patch version of GuC submission */
+			u32 patch;
+		} version;
 	} submission_state;
 	/** @hwconfig: Hardware config state */
 	struct {
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index cd264cf50d305..bd89ac27828e5 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -184,6 +184,40 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
 	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
 }
 
+static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_guc *guc = &gt->uc.guc;
+
+	XE_BUG_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
+	XE_WARN_ON(uc_fw->major_ver_found < 70);
+
+	if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) {
+		/* v70.6.0 adds CSS header support */
+		guc->submission_state.version.major =
+			FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+				  css->submission_version);
+		guc->submission_state.version.minor =
+			FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+				  css->submission_version);
+		guc->submission_state.version.patch =
+			FIELD_GET(CSS_SW_VERSION_UC_PATCH,
+				  css->submission_version);
+	} else if (uc_fw->minor_ver_found >= 3) {
+		/* v70.3.0 introduced v1.1.0 */
+		guc->submission_state.version.major = 1;
+		guc->submission_state.version.minor = 1;
+		guc->submission_state.version.patch = 0;
+	} else {
+		/* v70.0.0 introduced v1.0.0 */
+		guc->submission_state.version.major = 1;
+		guc->submission_state.version.minor = 0;
+		guc->submission_state.version.patch = 0;
+	}
+
+	uc_fw->private_data_size = css->private_data_size;
+}
+
 int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
@@ -278,7 +312,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	}
 
 	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
-		uc_fw->private_data_size = css->private_data_size;
+		guc_read_css_info(uc_fw, css);
 
 	obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size,
 				     ttm_bo_type_kernel,
@@ -403,4 +437,14 @@ void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
 		   uc_fw->major_ver_found, uc_fw->minor_ver_found);
 	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
 	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
+		struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+		struct xe_guc *guc = &gt->uc.guc;
+
+		drm_printf(p, "\tSubmit version: %u.%u.%u\n",
+			   guc->submission_state.version.major,
+			   guc->submission_state.version.minor,
+			   guc->submission_state.version.patch);
+	}
 }
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index dafd26cb0c413..fc7b1855ee90f 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -69,7 +69,11 @@ struct uc_css_header {
 #define CSS_SW_VERSION_UC_MAJOR		(0xFF << 16)
 #define CSS_SW_VERSION_UC_MINOR		(0xFF << 8)
 #define CSS_SW_VERSION_UC_PATCH		(0xFF << 0)
-	u32 reserved0[13];
+	union {
+		u32 submission_version; /* only applies to GuC */
+		u32 reserved2;
+	};
+	u32 reserved0[12];
 	union {
 		u32 private_data_size; /* only applies to GuC */
 		u32 reserved1;
-- 
GitLab


From f900725af8b66ec8484680c693fa4ae93cb7259d Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 12 Jan 2023 17:25:32 -0500
Subject: [PATCH 1020/2340] drm/xe/guc: s/xe_guc_send_mmio/xe_guc_mmio_send

Now aligns with the xe_guc_ct_send naming.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Philippe Lecluse <philippe.lecluse1@gmail.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c          | 6 +++---
 drivers/gpu/drm/xe/xe_guc.h          | 2 +-
 drivers/gpu/drm/xe/xe_guc_ct.c       | 2 +-
 drivers/gpu/drm/xe/xe_guc_hwconfig.c | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 4a7e8f9a14d5b..2efa01dfff6d0 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -599,7 +599,7 @@ int xe_guc_suspend(struct xe_guc *guc)
 		XE_GUC_ACTION_CLIENT_SOFT_RESET,
 	};
 
-	ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
 	if (ret) {
 		drm_err(&guc_to_xe(guc)->drm,
 			"GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
@@ -630,7 +630,7 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
 #define MEDIA_SOFT_SCRATCH(n)           _MMIO(0x190310 + (n) * 4)
 #define MEDIA_SOFT_SCRATCH_COUNT        4
 
-int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len)
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
@@ -747,7 +747,7 @@ static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
 	XE_BUG_ON(len == 1 && upper_32_bits(val));
 
 	/* Self config must go over MMIO */
-	ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request));
+	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
 
 	if (unlikely(ret < 0))
 		return ret;
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 72b71d75566cb..923efee909915 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -22,7 +22,7 @@ int xe_guc_enable_communication(struct xe_guc *guc);
 int xe_guc_suspend(struct xe_guc *guc);
 void xe_guc_notify(struct xe_guc *guc);
 int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
-int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);
 int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
 int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
 void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 61a424c41779d..f48eb01847efa 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -268,7 +268,7 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
 			   enable ? GUC_CTB_CONTROL_ENABLE :
 			   GUC_CTB_CONTROL_DISABLE),
 	};
-	int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request));
+	int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
 
 	return ret > 0 ? -EPROTO : ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 8dfd48f71a7c1..57640d6087877 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -33,7 +33,7 @@ static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
 		size,
 	};
 
-	return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	return xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
 }
 
 static int guc_hwconfig_size(struct xe_guc *guc, u32 *size)
-- 
GitLab


From 5e37266307df08f981d929c267bab6bfae8c4d53 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 12 Jan 2023 17:25:33 -0500
Subject: [PATCH 1021/2340] drm/xe/guc: Add support GuC MMIO send / recv

SRIOV has a use case of GuC MMIO send / recv, add a function for it.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Philippe Lecluse <philippe.lecluse1@gmail.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 18 +++++++++++++++++-
 drivers/gpu/drm/xe/xe_guc.h |  2 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 2efa01dfff6d0..88a3a96da0845 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -630,7 +630,8 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
 #define MEDIA_SOFT_SCRATCH(n)           _MMIO(0x190310 + (n) * 4)
 #define MEDIA_SOFT_SCRATCH_COUNT        4
 
-int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
+int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
+			  u32 len, u32 *response_buf)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
@@ -640,6 +641,7 @@ int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
 	int ret;
 	int i;
 
+	BUILD_BUG_ON(GEN11_SOFT_SCRATCH_COUNT != MEDIA_SOFT_SCRATCH_COUNT);
 	XE_BUG_ON(guc->ct.enabled);
 	XE_BUG_ON(!len);
 	XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT);
@@ -723,10 +725,24 @@ proto:
 		return -EPROTO;
 	}
 
+	/* Just copy entire possible message response */
+	if (response_buf) {
+		response_buf[0] = header;
+
+		for (i = 1; i < GEN11_SOFT_SCRATCH_COUNT; i++)
+			response_buf[i] =
+				xe_mmio_read32(gt, reply_reg + i * sizeof(u32));
+	}
+
 	/* Use data from the GuC response as our return value */
 	return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header);
 }
 
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
+{
+	return xe_guc_mmio_send_recv(guc, request, len, NULL);
+}
+
 static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
 {
 	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 923efee909915..7be33458eef62 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -23,6 +23,8 @@ int xe_guc_suspend(struct xe_guc *guc);
 void xe_guc_notify(struct xe_guc *guc);
 int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
 int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len,
+			  u32 *response_buf);
 int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
 int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
 void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
-- 
GitLab


From c343bacfad5db03c4156ff3a44e3a5547afb246f Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 12 Jan 2023 17:25:36 -0500
Subject: [PATCH 1022/2340] drm/xe: Fix hidden gotcha regression with bo create

The bo_create ioctl relied on the internal ordering of memory regions to
be the same, make sure we don't allocate stolen instead of VRAM0.

Also remove a debug warning left in.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Philippe Lecluse <philippe.lecluse1@gmail.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h             | 8 +++++---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 -
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8d8a3332dbc86..8c2cdbe51ab53 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -13,10 +13,12 @@
 #define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
 
 #define XE_BO_CREATE_USER_BIT		BIT(0)
+/* The bits below need to be contiguous, or things break */
 #define XE_BO_CREATE_SYSTEM_BIT		BIT(1)
-#define XE_BO_CREATE_STOLEN_BIT		BIT(2)
-#define XE_BO_CREATE_VRAM0_BIT		BIT(3)
-#define XE_BO_CREATE_VRAM1_BIT		BIT(4)
+#define XE_BO_CREATE_VRAM0_BIT		BIT(2)
+#define XE_BO_CREATE_VRAM1_BIT		BIT(3)
+/* -- */
+#define XE_BO_CREATE_STOLEN_BIT		BIT(4)
 #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
 	(IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \
 	 XE_BO_CREATE_SYSTEM_BIT)
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 21ca7f79e63be..b4e9c88644e47 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -184,7 +184,6 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
 	mem->bus.offset = cur.start;
 
 	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
-	WARN_ON_ONCE(1);
 
 	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
 		mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset;
-- 
GitLab


From 2c33b49a6e6f8e176735eaca9ec6170478e0a426 Mon Sep 17 00:00:00 2001
From: Philippe Lecluse <philippe.lecluse@intel.com>
Date: Thu, 12 Jan 2023 17:25:38 -0500
Subject: [PATCH 1023/2340] drm/xe: enforce GSMBASE for DG1 instead of BAR2

On DG1, BAR2 is not reliable for reporting Vram size, need to use GSMBASE.
Simplify xe_mmio_total_vram_size to report vram size and usable size.

Signed-off-by: Philippe Lecluse <philippe.lecluse@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 54c9362a30509..88d475dca6db6 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -150,7 +150,7 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 	return true;
 }
 
-int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base)
+int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_size)
 {
 	struct xe_gt *gt = xe_device_get_gt(xe, 0);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -159,8 +159,12 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_
 
 	if (!xe->info.has_flat_ccs)  {
 		*vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-		if (flat_ccs_base)
-			*flat_ccs_base = *vram_size;
+		if (usable_size) {
+			if (xe->info.platform == XE_DG1)
+				*usable_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg);
+			else
+				*usable_size = *vram_size;
+		}
 		return 0;
 	}
 
@@ -170,15 +174,13 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_
 
 	reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
 	*vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
-	if (flat_ccs_base) {
+	if (usable_size) {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
-		*flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+		*usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+		drm_info(&xe->drm, "lmem_size: 0x%llx usable_size: 0x%llx\n",
+			 *vram_size, *usable_size);
 	}
 
-	if (flat_ccs_base)
-		drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n",
-			 *vram_size, *flat_ccs_base);
-
 	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 }
 
@@ -190,7 +192,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	u64 lmem_size;
 	u64 original_size;
 	u64 current_size;
-	u64 flat_ccs_base;
+	u64 usable_size;
 	int resize_result, err;
 
 	if (!IS_DGFX(xe)) {
@@ -212,11 +214,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	}
 
 	gt = xe_device_get_gt(xe, 0);
-	lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg);
-
 	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 
-	err = xe_mmio_total_vram_size(xe, &lmem_size, &flat_ccs_base);
+	err = xe_mmio_total_vram_size(xe, &lmem_size, &usable_size);
 	if (err)
 		return err;
 
@@ -244,7 +244,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
 #endif
 
-	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base);
+	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
 
 	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
 
-- 
GitLab


From d8731500721d5ae26819de36c63921f4baaafe00 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 12 Jan 2023 16:34:09 +0000
Subject: [PATCH 1024/2340] drm/xe/pcode: fix pcode error check

On DG2 we are now getting:

[  104.456607] xe 0000:03:00.0: [drm] *ERROR* PCODE timeout, retrying with preemption disabled

Looks like we just need to invert the error check for
xe_pcode_try_request(), which returns zero on success.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 39712e8437283..1a76fe4788536 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -155,7 +155,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 
 	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
 				   false, timeout_base_ms * 1000);
-	if (ret)
+	if (!ret)
 		goto out;
 
 	/*
-- 
GitLab


From 760f168db30a5c06893e87c88f25cd3578a8453a Mon Sep 17 00:00:00 2001
From: Philippe Lecluse <philippe.lecluse@intel.com>
Date: Fri, 13 Jan 2023 14:07:17 +0000
Subject: [PATCH 1025/2340] drm/xe: fix xe_mmio_total_vram_size

As also cause issue on PVC, moving back
to what we did before stolen was
introduced

Signed-off-by: Philippe Lecluse <philippe.lecluse@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 88d475dca6db6..54d2a94a75190 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -159,12 +159,8 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si
 
 	if (!xe->info.has_flat_ccs)  {
 		*vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-		if (usable_size) {
-			if (xe->info.platform == XE_DG1)
-				*usable_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg);
-			else
-				*usable_size = *vram_size;
-		}
+		if (usable_size)
+			*usable_size = min(*vram_size, xe_mmio_read64(gt, GEN12_GSMBASE.reg));
 		return 0;
 	}
 
-- 
GitLab


From 4aa18ae44686144c5c5d29113d6e2c5c3ebb349d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 22 Dec 2022 12:11:45 -0800
Subject: [PATCH 1026/2340] drm/xe/ggtt: Use BIT_ULL() for 64bit

Make sure it's 64bit value in both 32b and 64b arch.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index eab74a509f681..0018c84417470 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -20,8 +20,8 @@
 #include "gt/intel_gt_regs.h"
 
 /* FIXME: Common file, preferably auto-gen */
-#define MTL_GGTT_PTE_PAT0	BIT(52)
-#define MTL_GGTT_PTE_PAT1	BIT(53)
+#define MTL_GGTT_PTE_PAT0	BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1	BIT_ULL(53)
 
 u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 {
-- 
GitLab


From 857912c37ea786715e03b5bf25db07e28fc2ba73 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 22 Dec 2022 12:14:27 -0800
Subject: [PATCH 1027/2340] drm/xe: Fix some log messages on 32b

Either use the proper format or cast up to 64b depending on the case.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 6 +++---
 drivers/gpu/drm/xe/xe_guc_submit.c    | 2 +-
 drivers/gpu/drm/xe/xe_mmio.c          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 0f3b819f0a349..03a60d5b42f1d 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -263,9 +263,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_tiny;
 	}
 
-	kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n",
-		   xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE),
-		   xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE));
+	kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n",
+		   (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE),
+		   (unsigned long)xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE));
 
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index e0d424c2b78c0..2d4eb527d6e8b 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1630,7 +1630,7 @@ static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
 		for (i = parallel_read(xe, map, wq_desc.head);
 		     i != parallel_read(xe, map, wq_desc.tail);
 		     i = (i + sizeof(u32)) % WQ_SIZE)
-			drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32),
+			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
 				   parallel_read(xe, map, wq[i / sizeof(u32)]));
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 54d2a94a75190..7c87be130e026 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -234,7 +234,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 			 (u64)xe->mem.vram.size >> 20);
 	if (xe->mem.vram.size < lmem_size)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
-			 lmem_size, xe->mem.vram.size);
+			 lmem_size, (u64)xe->mem.vram.size);
 
 #ifdef CONFIG_64BIT
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
-- 
GitLab


From 9a6e6c14bfde967fca5a052cbee206d0b6169a1e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 22 Dec 2022 12:15:24 -0800
Subject: [PATCH 1028/2340] drm/xe/mmio: Use non-atomic writeq/readq variant
 for 32b

writeq() and readq() and other functions working on 64 bit variables
are not provided by 32b arch. For that it's needed to choose between
linux/io-64-nonatomic-hi-lo.h and linux/io-64-nonatomic-lo-hi.h,
spliting the read/write in 2 accesses. For xe driver, it doesn't matter
much, so just choose one and include in xe_mmio.h.

This also removes some ifdef CONFIG_64BIT we had around because of the
missing 64bit functions.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 7 -------
 drivers/gpu/drm/xe/xe_mmio.h | 1 +
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 7c87be130e026..8a953df2b4688 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -48,7 +48,6 @@ mask_err:
 	return err;
 }
 
-#ifdef CONFIG_64BIT
 static int
 _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 {
@@ -132,9 +131,6 @@ static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
 	return ret;
 }
-#else
-static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; }
-#endif
 
 static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 {
@@ -236,10 +232,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
 			 lmem_size, (u64)xe->mem.vram.size);
 
-#ifdef CONFIG_64BIT
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
-#endif
-
 	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
 
 	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index a3b144553873b..354be6fae0d47 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -7,6 +7,7 @@
 #define _XE_MMIO_H_
 
 #include <linux/delay.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "xe_gt_types.h"
 
-- 
GitLab


From ebec269c522fc9bb48d11b65456b01adbdecb97d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 22 Dec 2022 12:18:26 -0800
Subject: [PATCH 1029/2340] drm/xe: Fix tracepoints on 32b

Leave the types as u64, but cast the pointers to unsigned long before
assigning so the compiler doesn't throw warning about casting a pointer
to integer of different size.

Also, size_t should use %zu, not %ld.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_trace.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index a5f963f1f6ebc..d9f921d46b53f 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -33,10 +33,10 @@ DECLARE_EVENT_CLASS(xe_bo,
 		    TP_fast_assign(
 			   __entry->size = bo->size;
 			   __entry->flags = bo->flags;
-			   __entry->vm = (u64)bo->vm;
+			   __entry->vm = (unsigned long)bo->vm;
 			   ),
 
-		    TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx",
+		    TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx",
 			      __entry->size, __entry->flags, __entry->vm)
 );
 
@@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			   atomic_read(&job->engine->guc->state);
 			   __entry->flags = job->engine->flags;
 			   __entry->error = job->fence->error;
-			   __entry->fence = (u64)job->fence;
+			   __entry->fence = (unsigned long)job->fence;
 			   __entry->batch_addr = (u64)job->batch_addr[0];
 			   ),
 
@@ -273,7 +273,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
 		    TP_fast_assign(
 			   __entry->ctx = fence->dma.context;
 			   __entry->seqno = fence->dma.seqno;
-			   __entry->fence = (u64)fence;
+			   __entry->fence = (unsigned long)fence;
 			   ),
 
 		    TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
@@ -313,7 +313,7 @@ DECLARE_EVENT_CLASS(xe_vma,
 			     ),
 
 		    TP_fast_assign(
-			   __entry->vma = (u64)vma;
+			   __entry->vma = (unsigned long)vma;
 			   __entry->asid = vma->vm->usm.asid;
 			   __entry->start = vma->start;
 			   __entry->end = vma->end;
@@ -410,7 +410,7 @@ DECLARE_EVENT_CLASS(xe_vm,
 			     ),
 
 		    TP_fast_assign(
-			   __entry->vm = (u64)vm;
+			   __entry->vm = (unsigned long)vm;
 			   __entry->asid = vm->usm.asid;
 			   ),
 
-- 
GitLab


From 2c3878820bf0bbd659c2b897add8a011b5e9f2e1 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 22 Dec 2022 13:58:12 -0800
Subject: [PATCH 1030/2340] drm/xe/gt: Fix min() with u32 and u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following error while building for 32b:

	In file included from ../drivers/gpu/drm/xe/xe_gt.c:6:
	../drivers/gpu/drm/xe/xe_gt.c: In function ‘gt_ttm_mgr_init’:
	../include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast [-Werror]
	   20 |         (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
	      |                                   ^~

Cast it to u64 so size of the second operand matches the first one when
building it for 32 bits.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 96d0f5845d87b..39df6945e1d91 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -183,7 +183,7 @@ static int gt_ttm_mgr_init(struct xe_gt *gt)
 		if (err)
 			return err;
 		gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20),
-				   gt->mem.vram.size),
+				   (u64)gt->mem.vram.size),
 			       gtt_size);
 		xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1;
 	}
-- 
GitLab


From baf31a20fa7f3538d68ffa5262a715eb1d699cdd Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Wed, 22 Nov 2023 12:16:27 +0530
Subject: [PATCH 1031/2340] drm/i915/display: Get bigjoiner config before dsc
 config during readout

Currently we get bigjoiner config after the dsc get config, during HW
readout.
Since dsc_get_config now uses bigjoiner flags/pipes to compute DSC PPS
parameter pic_width, this results in a state mismatch when Bigjoiner
and DSC are used together.

So call get bigjoiner config before calling dsc get config function.

Fixes: 8b70b5691704 ("drm/i915/vdsc: Fill the intel_dsc_get_pps_config function")
Cc: Suraj Kandpal <suraj.kandpal@intel.com>
Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Cc: Animesh Manna <animesh.manna@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122064627.905828-1-ankit.k.nautiyal@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 7d48bcdd57974..4e957a8ddfdb3 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3746,8 +3746,8 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 	if (!active)
 		goto out;
 
-	intel_dsc_get_config(pipe_config);
 	intel_bigjoiner_get_config(pipe_config);
+	intel_dsc_get_config(pipe_config);
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder) ||
 	    DISPLAY_VER(dev_priv) >= 11)
-- 
GitLab


From 3203009fe58d407a150e1116d6900d6ddbbaa542 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Tue, 12 Dec 2023 13:51:30 +0200
Subject: [PATCH 1032/2340] drm/i915/display: Wait for PHY readiness not needed
 for disabling sequence

When going through the disconnection flow we don't need to wait for PHY
readiness and hence we can skip the wait part. For disabling the function
returns false as an indicator that the power is not enabled. After all,
we are not even using the return value when Type-C is disconnecting.

v2: Cleanup for increased readibility (Imre)

BSpec: 65380

For VLK-53734

Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231212115130.485911-1-mika.kahola@intel.com
---
 drivers/gpu/drm/i915/display/intel_tc.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index f64d348a969ef..dcf05e00e5052 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -1030,18 +1030,25 @@ static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enabl
 
 	__xelpdp_tc_phy_enable_tcss_power(tc, enable);
 
-	if ((!tc_phy_wait_for_ready(tc) ||
-	     !xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) &&
-	    !drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) {
-		if (enable) {
-			__xelpdp_tc_phy_enable_tcss_power(tc, false);
-			xelpdp_tc_phy_wait_for_tcss_power(tc, false);
-		}
+	if (enable && !tc_phy_wait_for_ready(tc))
+		goto out_disable;
 
-		return false;
-	}
+	if (!xelpdp_tc_phy_wait_for_tcss_power(tc, enable))
+		goto out_disable;
 
 	return true;
+
+out_disable:
+	if (drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY))
+		return false;
+
+	if (!enable)
+		return false;
+
+	__xelpdp_tc_phy_enable_tcss_power(tc, false);
+	xelpdp_tc_phy_wait_for_tcss_power(tc, false);
+
+	return false;
 }
 
 static void xelpdp_tc_phy_take_ownership(struct intel_tc_port *tc, bool take)
-- 
GitLab


From ac3420d3d428443a08b923f9118121c170192b62 Mon Sep 17 00:00:00 2001
From: Karthik Poosa <karthik.poosa@intel.com>
Date: Mon, 4 Dec 2023 20:18:09 +0530
Subject: [PATCH 1033/2340] drm/i915/hwmon: Fix static analysis tool reported
 issues

Updated i915 hwmon with fixes for issues reported by static analysis tool.
Fixed integer overflow with upcasting.

v2:
- Added Fixes tag (Badal).
- Updated commit message as per review comments (Anshuman).

Fixes: 4c2572fe0ae7 ("drm/i915/hwmon: Expose power1_max_interval")
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231204144809.1518704-1-karthik.poosa@intel.com
---
 drivers/gpu/drm/i915/i915_hwmon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 975da8e7f2a9f..8c3f443c8347e 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -175,7 +175,7 @@ hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
 	 *     tau4 = (4 | x) << y
 	 * but add 2 when doing the final right shift to account for units
 	 */
-	tau4 = ((1 << x_w) | x) << y;
+	tau4 = (u64)((1 << x_w) | x) << y;
 	/* val in hwmon interface units (millisec) */
 	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
@@ -211,7 +211,7 @@ hwm_power1_max_interval_store(struct device *dev,
 	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
 	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
 	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
-	tau4 = ((1 << x_w) | x) << y;
+	tau4 = (u64)((1 << x_w) | x) << y;
 	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
 	if (val > max_win)
-- 
GitLab


From 8c8546546f256f834e9c7cab48e5946df340d1a8 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:56:27 +0800
Subject: [PATCH 1034/2340] drm/rockchip: move output interface related
 definition to rockchip_drm_drv.h

The output interface related definition can shared between
vop and vop2, move them to rockchip_drm_drv.h can avoid duplicated
definition.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115627.1784735-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/analogix_dp-rockchip.c |  1 -
 drivers/gpu/drm/rockchip/cdn-dp-core.c          |  1 -
 drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c |  1 -
 drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c     |  1 -
 drivers/gpu/drm/rockchip/inno_hdmi.c            |  1 -
 drivers/gpu/drm/rockchip/rk3066_hdmi.c          |  1 -
 drivers/gpu/drm/rockchip/rockchip_drm_drv.h     | 17 +++++++++++++++++
 drivers/gpu/drm/rockchip/rockchip_drm_vop.h     | 12 ------------
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.h    | 16 +---------------
 drivers/gpu/drm/rockchip/rockchip_lvds.c        |  1 -
 drivers/gpu/drm/rockchip/rockchip_rgb.c         |  1 -
 11 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index 84aa811ca1e9c..bd08d57486fef 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -30,7 +30,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define RK3288_GRF_SOC_CON6		0x25c
 #define RK3288_EDP_LCDC_SEL		BIT(5)
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 21254e4e107a9..a855c45ae7f3d 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -24,7 +24,6 @@
 
 #include "cdn-dp-core.h"
 #include "cdn-dp-reg.h"
-#include "rockchip_drm_vop.h"
 
 static inline struct cdn_dp_device *connector_to_dp(struct drm_connector *connector)
 {
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index 6396f9324dab9..4cc8ed8f4fbd4 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -26,7 +26,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define DSI_PHY_RSTZ			0xa0
 #define PHY_DISFORCEPLL			0
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 341550199111f..fe33092abbe7d 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -18,7 +18,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define RK3228_GRF_SOC_CON2		0x0408
 #define RK3228_HDMI_SDAIN_MSK		BIT(14)
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index 6e5b922a121e2..f6d819803c0e0 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -23,7 +23,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #include "inno_hdmi.h"
 
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index 7d561c5a650fc..62e6d8187de76 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -18,7 +18,6 @@
 #include "rk3066_hdmi.h"
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 
 #define DEFAULT_PLLA_RATE 30000000
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index aeb03a57240fd..3d8ab2defa1be 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -20,6 +20,23 @@
 #define ROCKCHIP_MAX_CONNECTOR	2
 #define ROCKCHIP_MAX_CRTC	4
 
+/*
+ * display output interface supported by rockchip lcdc
+ */
+#define ROCKCHIP_OUT_MODE_P888		0
+#define ROCKCHIP_OUT_MODE_BT1120	0
+#define ROCKCHIP_OUT_MODE_P666		1
+#define ROCKCHIP_OUT_MODE_P565		2
+#define ROCKCHIP_OUT_MODE_BT656		5
+#define ROCKCHIP_OUT_MODE_S888		8
+#define ROCKCHIP_OUT_MODE_S888_DUMMY	12
+#define ROCKCHIP_OUT_MODE_YUV420	14
+/* for use special outface */
+#define ROCKCHIP_OUT_MODE_AAAA		15
+
+/* output flags */
+#define ROCKCHIP_OUTPUT_DSI_DUAL	BIT(0)
+
 struct drm_device;
 struct drm_connector;
 struct iommu_domain;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 4b2daefeb8c14..b33e5bdc26be1 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -277,18 +277,6 @@ struct vop_data {
 /* dst alpha ctrl define */
 #define DST_FACTOR_M0(x)		(((x) & 0x7) << 6)
 
-/*
- * display output interface supported by rockchip lcdc
- */
-#define ROCKCHIP_OUT_MODE_P888	0
-#define ROCKCHIP_OUT_MODE_P666	1
-#define ROCKCHIP_OUT_MODE_P565	2
-/* for use special outface */
-#define ROCKCHIP_OUT_MODE_AAAA	15
-
-/* output flags */
-#define ROCKCHIP_OUTPUT_DSI_DUAL	BIT(0)
-
 enum alpha_mode {
 	ALPHA_STRAIGHT,
 	ALPHA_INVERSE,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
index 56fd31e052381..7175f46a2014b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -7,10 +7,9 @@
 #ifndef _ROCKCHIP_DRM_VOP2_H
 #define _ROCKCHIP_DRM_VOP2_H
 
-#include "rockchip_drm_vop.h"
-
 #include <linux/regmap.h>
 #include <drm/drm_modes.h>
+#include "rockchip_drm_vop.h"
 
 #define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
 
@@ -166,19 +165,6 @@ struct vop2_data {
 #define WB_YRGB_FIFO_FULL_INTR		BIT(18)
 #define WB_COMPLETE_INTR		BIT(19)
 
-/*
- * display output interface supported by rockchip lcdc
- */
-#define ROCKCHIP_OUT_MODE_P888		0
-#define ROCKCHIP_OUT_MODE_BT1120	0
-#define ROCKCHIP_OUT_MODE_P666		1
-#define ROCKCHIP_OUT_MODE_P565		2
-#define ROCKCHIP_OUT_MODE_BT656		5
-#define ROCKCHIP_OUT_MODE_S888		8
-#define ROCKCHIP_OUT_MODE_S888_DUMMY	12
-#define ROCKCHIP_OUT_MODE_YUV420	14
-/* for use special outface */
-#define ROCKCHIP_OUT_MODE_AAAA		15
 
 enum vop_csc_format {
 	CSC_BT601L,
diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c
index f0f47e9abf5aa..59341654ec32b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
+++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
@@ -27,7 +27,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 #include "rockchip_lvds.h"
 
 #define DISPLAY_OUTPUT_RGB		0
diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c
index c677b71ae516b..dbfbde24698ef 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
@@ -19,7 +19,6 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
-#include "rockchip_drm_vop.h"
 #include "rockchip_rgb.h"
 
 struct rockchip_rgb {
-- 
GitLab


From 81a06f1d02e588cfa14c5e5953d9dc50b1d404be Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:19 +0800
Subject: [PATCH 1035/2340] Revert "drm/rockchip: vop2: Use regcache_sync() to
 fix suspend/resume"

This reverts commit b63a553e8f5aa6574eeb535a551817a93c426d8c.

regcache_sync will try to reload the configuration in regcache to
hardware, but the registers of 4 Cluster windows and Esmart1/2/3 on
the upcoming rk3588 can not be set successfully before internal PD
power on.

Also it's better to keep the hardware register as it is before we really
enable it.

So let's revert this version, and keep the first version:
commit afa965a45e01 ("drm/rockchip: vop2: fix suspend/resume")

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115719.1784834-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 312da57833620..57784d0a22a69 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -217,6 +217,8 @@ struct vop2 {
 	struct vop2_win win[];
 };
 
+static const struct regmap_config vop2_regmap_config;
+
 static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
 {
 	return container_of(crtc, struct vop2_video_port, crtc);
@@ -883,7 +885,11 @@ static void vop2_enable(struct vop2 *vop2)
 		return;
 	}
 
-	regcache_sync(vop2->map);
+	ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
+	if (ret) {
+		drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
+		return;
+	}
 
 	if (vop2->data->soc_id == 3566)
 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
@@ -913,8 +919,6 @@ static void vop2_disable(struct vop2 *vop2)
 
 	pm_runtime_put_sync(vop2->dev);
 
-	regcache_mark_dirty(vop2->map);
-
 	clk_disable_unprepare(vop2->aclk);
 	clk_disable_unprepare(vop2->hclk);
 }
-- 
GitLab


From bebad6bd4fbdc448ad3b337ad281b813e68f6f53 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:30 +0800
Subject: [PATCH 1036/2340] drm/rockchip: vop2: set half_block_en bit in all
 mode

At first we thought the half_block_en bit in AFBCD_CTRL register
only work in afbc mode. But the fact is that it control the line
buffer in all mode(afbc/tile/linear), so we need configure it in
all case.

As the cluster windows of rk3568 only supports afbc format
so is therefore not affected.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115730.1784893-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 25 ++++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 57784d0a22a69..639dfebc6bd1d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -521,6 +521,18 @@ static bool rockchip_vop2_mod_supported(struct drm_plane *plane, u32 format,
 	return vop2_convert_afbc_format(format) >= 0;
 }
 
+/*
+ * 0: Full mode, 16 lines for one tail
+ * 1: half block mode, 8 lines one tail
+ */
+static bool vop2_half_block_enable(struct drm_plane_state *pstate)
+{
+	if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90))
+		return false;
+	else
+		return true;
+}
+
 static u32 vop2_afbc_transform_offset(struct drm_plane_state *pstate,
 				      bool afbc_half_block_en)
 {
@@ -1144,6 +1156,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 	bool rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90;
 	struct rockchip_gem_object *rk_obj;
 	unsigned long offset;
+	bool half_block_en;
 	bool afbc_en;
 	dma_addr_t yrgb_mst;
 	dma_addr_t uv_mst;
@@ -1236,6 +1249,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 	dsp_info = (dsp_h - 1) << 16 | ((dsp_w - 1) & 0xffff);
 
 	format = vop2_convert_format(fb->format->format);
+	half_block_en = vop2_half_block_enable(pstate);
 
 	drm_dbg(vop2->drm, "vp%d update %s[%dx%d->%dx%d@%dx%d] fmt[%p4cc_%s] addr[%pad]\n",
 		vp->id, win->data->name, actual_w, actual_h, dsp_w, dsp_h,
@@ -1243,6 +1257,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 		&fb->format->format,
 		afbc_en ? "AFBC" : "", &yrgb_mst);
 
+	if (vop2_cluster_window(win))
+		vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en);
+
 	if (afbc_en) {
 		u32 stride;
 
@@ -1283,13 +1300,7 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 		vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
 		vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
 		vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
-		if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) {
-			vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 0);
-			transform_offset = vop2_afbc_transform_offset(pstate, false);
-		} else {
-			vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 1);
-			transform_offset = vop2_afbc_transform_offset(pstate, true);
-		}
+		transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
 		vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
 		vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info);
 		vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, transform_offset);
-- 
GitLab


From 20529a68307feed00dd3d431d3fff0572616b0f2 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:41 +0800
Subject: [PATCH 1037/2340] drm/rockchip: vop2: clear afbc en and transform bit
 for cluster window at linear mode

The enable bit and transform offset of cluster windows should be
cleared when it work at linear mode, or we may have a iommu fault
issue on rk3588 which cluster windows switch between afbc and linear
mode.

As the cluster windows of rk3568 only supports afbc format
so is therefore not affected.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115741.1784954-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 639dfebc6bd1d..a019cc9bbd54d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1312,6 +1312,11 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 		vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_270, rotate_270);
 		vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_90, rotate_90);
 	} else {
+		if (vop2_cluster_window(win)) {
+			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 0);
+			vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, 0);
+		}
+
 		vop2_win_write(win, VOP2_WIN_YRGB_VIR, DIV_ROUND_UP(fb->pitches[0], 4));
 	}
 
-- 
GitLab


From d1f8face0fc1298c88ef4a0479c3027b46ca2c77 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:57:52 +0800
Subject: [PATCH 1038/2340] drm/rockchip: vop2: Add write mask for VP config
 done

The write mask bit is used to make sure when writing
config done bit for one VP will not overwrite the other.

Unfortunately, the write mask bit is missing on
rk3566/8, that means when we write to these bits,
it will not take any effect.

We need this to make the vop work properly after
rk3566/8 variants.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115752.1785013-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index a019cc9bbd54d..c4a372286580d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -268,12 +268,23 @@ static bool vop2_cluster_window(const struct vop2_win *win)
 	return win->data->feature & WIN_FEATURE_CLUSTER;
 }
 
+/*
+ * Note:
+ * The write mask function is documented but missing on rk3566/8, writes
+ * to these bits have no effect. For newer soc(rk3588 and following) the
+ * write mask is needed for register writes.
+ *
+ * GLB_CFG_DONE_EN has no write mask bit.
+ *
+ */
 static void vop2_cfg_done(struct vop2_video_port *vp)
 {
 	struct vop2 *vop2 = vp->vop2;
+	u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN;
+
+	val |= BIT(vp->id) | (BIT(vp->id) << 16);
 
-	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE,
-			BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
+	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val);
 }
 
 static void vop2_win_disable(struct vop2_win *win)
-- 
GitLab


From dd49ee4614cfb0b1f627c4353b60cecfe998a374 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:58:05 +0800
Subject: [PATCH 1039/2340] drm/rockchip: vop2: Set YUV/RGB overlay mode

Set overlay mode register according to the
output mode is yuv or rgb.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115805.1785073-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_drv.h  |  1 +
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 17 ++++++++++++++---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.h |  1 +
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index 3d8ab2defa1be..bbb9e0bf68048 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -48,6 +48,7 @@ struct rockchip_crtc_state {
 	int output_bpc;
 	int output_flags;
 	bool enable_afbc;
+	bool yuv_overlay;
 	u32 bus_format;
 	u32 bus_flags;
 	int color_space;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index c4a372286580d..8cfb95f05f091 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1623,6 +1623,8 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	vop2->enable_count++;
 
+	vcstate->yuv_overlay = is_yuv_output(vcstate->bus_format);
+
 	vop2_crtc_enable_irq(vp, VP_INT_POST_BUF_EMPTY);
 
 	polflags = 0;
@@ -1650,7 +1652,7 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 	if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode))
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP;
 
-	if (is_yuv_output(vcstate->bus_format))
+	if (vcstate->yuv_overlay)
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y;
 
 	vop2_dither_setup(crtc, &dsp_ctrl);
@@ -1959,10 +1961,12 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 	u16 hdisplay;
 	u32 bg_dly;
 	u32 pre_scan_dly;
+	u32 ovl_ctrl;
 	int i;
 	struct vop2_video_port *vp0 = &vop2->vps[0];
 	struct vop2_video_port *vp1 = &vop2->vps[1];
 	struct vop2_video_port *vp2 = &vop2->vps[2];
+	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state);
 
 	adjusted_mode = &vp->crtc.state->adjusted_mode;
 	hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start;
@@ -1975,7 +1979,15 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
 	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
 
-	vop2_writel(vop2, RK3568_OVL_CTRL, 0);
+	ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL);
+	ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD;
+	if (vcstate->yuv_overlay)
+		ovl_ctrl |= RK3568_OVL_CTRL__YUV_MODE(vp->id);
+	else
+		ovl_ctrl &= ~RK3568_OVL_CTRL__YUV_MODE(vp->id);
+
+	vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl);
+
 	port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL);
 	port_sel &= RK3568_OVL_PORT_SEL__SEL_PORT;
 
@@ -2049,7 +2061,6 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 
 	vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel);
 	vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel);
-	vop2_writel(vop2, RK3568_OVL_CTRL, RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD);
 }
 
 static void vop2_setup_dly_for_windows(struct vop2 *vop2)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
index 7175f46a2014b..8d7ff52523fb4 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -401,6 +401,7 @@ enum dst_factor_mode {
 #define VOP2_COLOR_KEY_MASK				BIT(31)
 
 #define RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD		BIT(28)
+#define RK3568_OVL_CTRL__YUV_MODE(vp)			BIT(vp)
 
 #define RK3568_VP_BG_MIX_CTRL__BG_DLY			GENMASK(31, 24)
 
-- 
GitLab


From 075a5b3969becb1ebc2f1d4fa1a1fe9163679273 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:58:15 +0800
Subject: [PATCH 1040/2340] drm/rockchip: vop2: set bg dly and prescan dly at
 vop2_post_config

We need to setup background delay cycle and prescan
delay cycle when a mode is enable to avoid trigger
POST_BUF_EMPTY irq on rk3588.

Note: RK356x has no such requirement.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115815.1785131-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 26 ++++++++------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 8cfb95f05f091..ba25250968c3d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1460,8 +1460,18 @@ static void vop2_post_config(struct drm_crtc *crtc)
 	u32 top_margin = 100, bottom_margin = 100;
 	u16 hsize = hdisplay * (left_margin + right_margin) / 200;
 	u16 vsize = vdisplay * (top_margin + bottom_margin) / 200;
+	u16 hsync_len = mode->crtc_hsync_end - mode->crtc_hsync_start;
 	u16 hact_end, vact_end;
 	u32 val;
+	u32 bg_dly;
+	u32 pre_scan_dly;
+
+	bg_dly = vp->data->pre_scan_max_dly[3];
+	vop2_writel(vp->vop2, RK3568_VP_BG_MIX_CTRL(vp->id),
+		    FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly));
+
+	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
+	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
 
 	vsize = rounddown(vsize, 2);
 	hsize = rounddown(hsize, 2);
@@ -1956,11 +1966,6 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 	u32 layer_sel = 0;
 	u32 port_sel;
 	unsigned int nlayer, ofs;
-	struct drm_display_mode *adjusted_mode;
-	u16 hsync_len;
-	u16 hdisplay;
-	u32 bg_dly;
-	u32 pre_scan_dly;
 	u32 ovl_ctrl;
 	int i;
 	struct vop2_video_port *vp0 = &vop2->vps[0];
@@ -1968,17 +1973,6 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 	struct vop2_video_port *vp2 = &vop2->vps[2];
 	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state);
 
-	adjusted_mode = &vp->crtc.state->adjusted_mode;
-	hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start;
-	hdisplay = adjusted_mode->crtc_hdisplay;
-
-	bg_dly = vp->data->pre_scan_max_dly[3];
-	vop2_writel(vop2, RK3568_VP_BG_MIX_CTRL(vp->id),
-		    FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly));
-
-	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
-	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
-
 	ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL);
 	ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD;
 	if (vcstate->yuv_overlay)
-- 
GitLab


From c408af1afc4b74ea6df69e0313be97f1f83e981a Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:58:26 +0800
Subject: [PATCH 1041/2340] drm/rockchip: vop2: rename grf to sys_grf

The vop2 need to reference more grf(system grf, vop grf, vo0/1 grf,etc)
in the upcoming rk3588.

So we rename the current system grf to sys_grf.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115826.1785190-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index ba25250968c3d..851bad5a21e92 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -190,7 +190,7 @@ struct vop2 {
 	void __iomem *regs;
 	struct regmap *map;
 
-	struct regmap *grf;
+	struct regmap *sys_grf;
 
 	/* physical map length of vop2 register */
 	u32 len;
@@ -1524,9 +1524,9 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
 		dip &= ~RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL;
 		dip |= FIELD_PREP(RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL, polflags);
 		if (polflags & POLFLAG_DCLK_INV)
-			regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
+			regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
 		else
-			regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
+			regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
 		break;
 	case ROCKCHIP_VOP2_EP_HDMI0:
 		die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX;
@@ -2767,7 +2767,7 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 			return PTR_ERR(vop2->lut_regs);
 	}
 
-	vop2->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+	vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
 
 	vop2->hclk = devm_clk_get(vop2->dev, "hclk");
 	if (IS_ERR(vop2->hclk)) {
-- 
GitLab


From 4ccdc92c1fea732fac8f3438d6288719055fa141 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:58:50 +0800
Subject: [PATCH 1042/2340] dt-bindings: display: vop2: Add rk3588 support

The vop2 on rk3588 is similar to which on rk356x
but with 4 video ports and need to reference
more grf modules.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115850.1785311-1-andyshrk@163.com
---
 .../display/rockchip/rockchip-vop2.yaml       | 100 ++++++++++++++----
 1 file changed, 81 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop2.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop2.yaml
index b60b90472d42e..2531726af306b 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop2.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop2.yaml
@@ -8,8 +8,8 @@ title: Rockchip SoC display controller (VOP2)
 
 description:
   VOP2 (Video Output Processor v2) is the display controller for the Rockchip
-  series of SoCs which transfers the image data from a video memory
-  buffer to an external LCD interface.
+  series of SoCs which transfers the image data from a video memory buffer to
+  an external LCD interface.
 
 maintainers:
   - Sandy Huang <hjc@rock-chips.com>
@@ -20,6 +20,7 @@ properties:
     enum:
       - rockchip,rk3566-vop
       - rockchip,rk3568-vop
+      - rockchip,rk3588-vop
 
   reg:
     items:
@@ -27,8 +28,8 @@ properties:
           Must contain one entry corresponding to the base address and length
           of the register space.
       - description:
-          Can optionally contain a second entry corresponding to
-          the CRTC gamma LUT address.
+          Can optionally contain a second entry corresponding to the CRTC gamma
+          LUT address.
 
   reg-names:
     items:
@@ -41,45 +42,63 @@ properties:
       The VOP interrupt is shared by several interrupt sources, such as
       frame start (VSYNC), line flag and other status interrupts.
 
+  # See compatible-specific constraints below.
   clocks:
+    minItems: 5
     items:
-      - description: Clock for ddr buffer transfer.
-      - description: Clock for the ahb bus to R/W the phy regs.
+      - description: Clock for ddr buffer transfer via axi.
+      - description: Clock for the ahb bus to R/W the regs.
       - description: Pixel clock for video port 0.
       - description: Pixel clock for video port 1.
       - description: Pixel clock for video port 2.
+      - description: Pixel clock for video port 3.
+      - description: Peripheral(vop grf/dsi) clock.
 
   clock-names:
+    minItems: 5
     items:
       - const: aclk
       - const: hclk
       - const: dclk_vp0
       - const: dclk_vp1
       - const: dclk_vp2
+      - const: dclk_vp3
+      - const: pclk_vop
 
   rockchip,grf:
     $ref: /schemas/types.yaml#/definitions/phandle
     description:
-      Phandle to GRF regs used for misc control
+      Phandle to GRF regs used for control the polarity of dclk/hsync/vsync of DPI,
+      also used for query vop memory bisr enable status, etc.
+
+  rockchip,vo1-grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to VO GRF regs used for control the polarity of dclk/hsync/vsync of hdmi
+      on rk3588.
+
+  rockchip,vop-grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to VOP GRF regs used for control data path between vopr and hdmi/edp.
+
+  rockchip,pmu:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to PMU GRF used for query vop memory bisr status on rk3588.
 
   ports:
     $ref: /schemas/graph.yaml#/properties/ports
 
-    properties:
-      port@0:
+    patternProperties:
+      "^port@[0-3]$":
         $ref: /schemas/graph.yaml#/properties/port
-        description:
-          Output endpoint of VP0
+        description: Output endpoint of VP0/1/2/3.
 
-      port@1:
-        $ref: /schemas/graph.yaml#/properties/port
-        description:
-          Output endpoint of VP1
+    required:
+      - port@0
 
-      port@2:
-        $ref: /schemas/graph.yaml#/properties/port
-        description:
-          Output endpoint of VP2
+    unevaluatedProperties: false
 
   iommus:
     maxItems: 1
@@ -96,6 +115,49 @@ required:
   - clock-names
   - ports
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: rockchip,rk3588-vop
+    then:
+      properties:
+        clocks:
+          minItems: 7
+        clock-names:
+          minItems: 7
+
+        ports:
+          required:
+            - port@0
+            - port@1
+            - port@2
+            - port@3
+
+      required:
+        - rockchip,grf
+        - rockchip,vo1-grf
+        - rockchip,vop-grf
+        - rockchip,pmu
+
+    else:
+      properties:
+        rockchip,vo1-grf: false
+        rockchip,vop-grf: false
+        rockchip,pmu: false
+
+        clocks:
+          maxItems: 5
+        clock-names:
+          maxItems: 5
+
+        ports:
+          required:
+            - port@0
+            - port@1
+            - port@2
+
 additionalProperties: false
 
 examples:
-- 
GitLab


From dc7226acacc6502291446f9e33cf96246ec49a30 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:59:07 +0800
Subject: [PATCH 1043/2340] dt-bindings: rockchip,vop2: Add more endpoint
 definition

There are 2 HDMI, 2 DP, 2 eDP on rk3588, so add
corresponding endpoint definition for it.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115907.1785377-1-andyshrk@163.com
---
 include/dt-bindings/soc/rockchip,vop2.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/dt-bindings/soc/rockchip,vop2.h b/include/dt-bindings/soc/rockchip,vop2.h
index 6e66a802b96a5..668f199df9f04 100644
--- a/include/dt-bindings/soc/rockchip,vop2.h
+++ b/include/dt-bindings/soc/rockchip,vop2.h
@@ -10,5 +10,9 @@
 #define ROCKCHIP_VOP2_EP_LVDS0	5
 #define ROCKCHIP_VOP2_EP_MIPI1	6
 #define ROCKCHIP_VOP2_EP_LVDS1	7
+#define ROCKCHIP_VOP2_EP_HDMI1	8
+#define ROCKCHIP_VOP2_EP_EDP1	9
+#define ROCKCHIP_VOP2_EP_DP0	10
+#define ROCKCHIP_VOP2_EP_DP1	11
 
 #endif /* __DT_BINDINGS_ROCKCHIP_VOP2_H */
-- 
GitLab


From 5a028e8f062fc862f051f8e62a0d5a1abac91955 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:59:19 +0800
Subject: [PATCH 1044/2340] drm/rockchip: vop2: Add support for rk3588

VOP2 on rk3588:

Four video ports:
VP0 Max 4096x2160
VP1 Max 4096x2160
VP2 Max 4096x2160
VP3 Max 2048x1080

4 4K Cluster windows with AFBC/line RGB and AFBC-only YUV support
4 4K Esmart windows with line RGB/YUV support

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115919.1785435-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 400 ++++++++++++++++++-
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.h |  81 ++++
 drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 221 ++++++++++
 3 files changed, 696 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 851bad5a21e92..8c74af3fa6a85 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -191,6 +191,9 @@ struct vop2 {
 	struct regmap *map;
 
 	struct regmap *sys_grf;
+	struct regmap *vop_grf;
+	struct regmap *vo1_grf;
+	struct regmap *sys_pmu;
 
 	/* physical map length of vop2 register */
 	u32 len;
@@ -209,6 +212,7 @@ struct vop2 {
 	unsigned int enable_count;
 	struct clk *hclk;
 	struct clk *aclk;
+	struct clk *pclk;
 
 	/* optional internal rgb encoder */
 	struct rockchip_rgb *rgb;
@@ -217,6 +221,23 @@ struct vop2 {
 	struct vop2_win win[];
 };
 
+#define vop2_output_if_is_hdmi(x)	((x) == ROCKCHIP_VOP2_EP_HDMI0 || \
+					 (x) == ROCKCHIP_VOP2_EP_HDMI1)
+
+#define vop2_output_if_is_dp(x)		((x) == ROCKCHIP_VOP2_EP_DP0 || \
+					 (x) == ROCKCHIP_VOP2_EP_DP1)
+
+#define vop2_output_if_is_edp(x)	((x) == ROCKCHIP_VOP2_EP_EDP0 || \
+					 (x) == ROCKCHIP_VOP2_EP_EDP1)
+
+#define vop2_output_if_is_mipi(x)	((x) == ROCKCHIP_VOP2_EP_MIPI0 || \
+					 (x) == ROCKCHIP_VOP2_EP_MIPI1)
+
+#define vop2_output_if_is_lvds(x)	((x) == ROCKCHIP_VOP2_EP_LVDS0 || \
+					 (x) == ROCKCHIP_VOP2_EP_LVDS1)
+
+#define vop2_output_if_is_dpi(x)	((x) == ROCKCHIP_VOP2_EP_RGB0)
+
 static const struct regmap_config vop2_regmap_config;
 
 static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
@@ -475,6 +496,17 @@ static bool vop2_output_uv_swap(u32 bus_format, u32 output_mode)
 		return false;
 }
 
+static bool vop2_output_rg_swap(struct vop2 *vop2, u32 bus_format)
+{
+	if (vop2->data->soc_id == 3588) {
+		if (bus_format == MEDIA_BUS_FMT_YUV8_1X24 ||
+		    bus_format == MEDIA_BUS_FMT_YUV10_1X30)
+			return true;
+	}
+
+	return false;
+}
+
 static bool is_yuv_output(u32 bus_format)
 {
 	switch (bus_format) {
@@ -879,13 +911,32 @@ static int vop2_core_clks_prepare_enable(struct vop2 *vop2)
 		goto err;
 	}
 
+	ret = clk_prepare_enable(vop2->pclk);
+	if (ret < 0) {
+		drm_err(vop2->drm, "failed to enable pclk - %d\n", ret);
+		goto err1;
+	}
+
 	return 0;
+err1:
+	clk_disable_unprepare(vop2->aclk);
 err:
 	clk_disable_unprepare(vop2->hclk);
 
 	return ret;
 }
 
+static void rk3588_vop2_power_domain_enable_all(struct vop2 *vop2)
+{
+	u32 pd;
+
+	pd = vop2_readl(vop2, RK3588_SYS_PD_CTRL);
+	pd &= ~(VOP2_PD_CLUSTER0 | VOP2_PD_CLUSTER1 | VOP2_PD_CLUSTER2 |
+		VOP2_PD_CLUSTER3 | VOP2_PD_ESMART);
+
+	vop2_writel(vop2, RK3588_SYS_PD_CTRL, pd);
+}
+
 static void vop2_enable(struct vop2 *vop2)
 {
 	int ret;
@@ -917,6 +968,9 @@ static void vop2_enable(struct vop2 *vop2)
 	if (vop2->data->soc_id == 3566)
 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
 
+	if (vop2->data->soc_id == 3588)
+		rk3588_vop2_power_domain_enable_all(vop2);
+
 	vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
 
 	/*
@@ -942,6 +996,7 @@ static void vop2_disable(struct vop2 *vop2)
 
 	pm_runtime_put_sync(vop2->dev);
 
+	clk_disable_unprepare(vop2->pclk);
 	clk_disable_unprepare(vop2->aclk);
 	clk_disable_unprepare(vop2->hclk);
 }
@@ -1309,7 +1364,19 @@ static void vop2_plane_atomic_update(struct drm_plane *plane,
 			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1);
 		vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format);
 		vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
-		vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+		/*
+		 * On rk3566/8, this bit is auto gating enable,
+		 * but this function is not work well so we need
+		 * to disable it for these two platform.
+		 * On rk3588, and the following new soc(rk3528/rk3576),
+		 * this bit is gating disable, we should write 1 to
+		 * disable gating when enable afbc.
+		 */
+		if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
+			vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+		else
+			vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1);
+
 		vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
 		transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
 		vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
@@ -1507,10 +1574,10 @@ static void vop2_post_config(struct drm_crtc *crtc)
 	vop2_vp_write(vp, RK3568_VP_DSP_BG, 0);
 }
 
-static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
-				u32 polflags)
+static unsigned long rk3568_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
 {
 	struct vop2 *vop2 = vp->vop2;
+	struct drm_crtc *crtc = &vp->crtc;
 	u32 die, dip;
 
 	die = vop2_readl(vop2, RK3568_DSP_IF_EN);
@@ -1572,13 +1639,281 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
 		break;
 	default:
 		drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
-		return;
+		return 0;
 	}
 
 	dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
 
 	vop2_writel(vop2, RK3568_DSP_IF_EN, die);
 	vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
+
+	return crtc->state->adjusted_mode.crtc_clock  * 1000LL;
+}
+
+/*
+ * calc the dclk on rk3588
+ * the available div of dclk is 1, 2, 4
+ */
+static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max_dclk)
+{
+	if (child_clk * 4 <= max_dclk)
+		return child_clk * 4;
+	else if (child_clk * 2 <= max_dclk)
+		return child_clk * 2;
+	else if (child_clk <= max_dclk)
+		return child_clk;
+	else
+		return 0;
+}
+
+/*
+ * 4 pixclk/cycle on rk3588
+ * RGB/eDP/HDMI: if_pixclk >= dclk_core
+ * DP: dp_pixclk = dclk_out <= dclk_core
+ * DSI: mipi_pixclk <= dclk_out <= dclk_core
+ */
+static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id,
+					 int *dclk_core_div, int *dclk_out_div,
+					 int *if_pixclk_div, int *if_dclk_div)
+{
+	struct vop2 *vop2 = vp->vop2;
+	struct drm_crtc *crtc = &vp->crtc;
+	struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
+	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state);
+	int output_mode = vcstate->output_mode;
+	unsigned long v_pixclk = adjusted_mode->crtc_clock * 1000LL; /* video timing pixclk */
+	unsigned long dclk_core_rate = v_pixclk >> 2;
+	unsigned long dclk_rate = v_pixclk;
+	unsigned long dclk_out_rate;
+	unsigned long if_dclk_rate;
+	unsigned long if_pixclk_rate;
+	int K = 1;
+
+	if (vop2_output_if_is_hdmi(id)) {
+		/*
+		 * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate
+		 * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate
+		 */
+		if (output_mode == ROCKCHIP_OUT_MODE_YUV420) {
+			dclk_rate = dclk_rate >> 1;
+			K = 2;
+		}
+
+		if_pixclk_rate = (dclk_core_rate << 1) / K;
+		if_dclk_rate = dclk_core_rate / K;
+		/*
+		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+		 * *if_dclk_div = dclk_rate / if_dclk_rate;
+		 */
+		 *if_pixclk_div = 2;
+		 *if_dclk_div = 4;
+	} else if (vop2_output_if_is_edp(id)) {
+		/*
+		 * edp_pixclk = edp_dclk > dclk_core
+		 */
+		if_pixclk_rate = v_pixclk / K;
+		dclk_rate = if_pixclk_rate * K;
+		/*
+		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+		 * *if_dclk_div = *if_pixclk_div;
+		 */
+		*if_pixclk_div = K;
+		*if_dclk_div = K;
+	} else if (vop2_output_if_is_dp(id)) {
+		if (output_mode == ROCKCHIP_OUT_MODE_YUV420)
+			dclk_out_rate = v_pixclk >> 3;
+		else
+			dclk_out_rate = v_pixclk >> 2;
+
+		dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
+		if (!dclk_rate) {
+			drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n",
+				dclk_out_rate);
+			return 0;
+		}
+		*dclk_out_div = dclk_rate / dclk_out_rate;
+	} else if (vop2_output_if_is_mipi(id)) {
+		if_pixclk_rate = dclk_core_rate / K;
+		/*
+		 * dclk_core = dclk_out * K = if_pixclk * K = v_pixclk / 4
+		 */
+		dclk_out_rate = if_pixclk_rate;
+		/*
+		 * dclk_rate = N * dclk_core_rate N = (1,2,4 ),
+		 * we get a little factor here
+		 */
+		dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
+		if (!dclk_rate) {
+			drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n",
+				dclk_out_rate);
+			return 0;
+		}
+		*dclk_out_div = dclk_rate / dclk_out_rate;
+		/*
+		 * mipi pixclk == dclk_out
+		 */
+		*if_pixclk_div = 1;
+	} else if (vop2_output_if_is_dpi(id)) {
+		dclk_rate = v_pixclk;
+	}
+
+	*dclk_core_div = dclk_rate / dclk_core_rate;
+	*if_pixclk_div = ilog2(*if_pixclk_div);
+	*if_dclk_div = ilog2(*if_dclk_div);
+	*dclk_core_div = ilog2(*dclk_core_div);
+	*dclk_out_div = ilog2(*dclk_out_div);
+
+	drm_dbg(vop2->drm, "dclk: %ld, pixclk_div: %d, dclk_div: %d\n",
+		dclk_rate, *if_pixclk_div, *if_dclk_div);
+
+	return dclk_rate;
+}
+
+/*
+ * MIPI port mux on rk3588:
+ * 0: Video Port2
+ * 1: Video Port3
+ * 3: Video Port 1(MIPI1 only)
+ */
+static u32 rk3588_get_mipi_port_mux(int vp_id)
+{
+	if (vp_id == 1)
+		return 3;
+	else if (vp_id == 3)
+		return 1;
+	else
+		return 0;
+}
+
+static u32 rk3588_get_hdmi_pol(u32 flags)
+{
+	u32 val;
+
+	val = (flags & DRM_MODE_FLAG_NHSYNC) ? BIT(HSYNC_POSITIVE) : 0;
+	val |= (flags & DRM_MODE_FLAG_NVSYNC) ? BIT(VSYNC_POSITIVE) : 0;
+
+	return val;
+}
+
+static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
+{
+	struct vop2 *vop2 = vp->vop2;
+	int dclk_core_div, dclk_out_div, if_pixclk_div, if_dclk_div;
+	unsigned long clock;
+	u32 die, dip, div, vp_clk_div, val;
+
+	clock = rk3588_calc_cru_cfg(vp, id, &dclk_core_div, &dclk_out_div,
+				    &if_pixclk_div, &if_dclk_div);
+	if (!clock)
+		return 0;
+
+	vp_clk_div = FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_CORE_DIV, dclk_core_div);
+	vp_clk_div |= FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_OUT_DIV, dclk_out_div);
+
+	die = vop2_readl(vop2, RK3568_DSP_IF_EN);
+	dip = vop2_readl(vop2, RK3568_DSP_IF_POL);
+	div = vop2_readl(vop2, RK3568_DSP_IF_CTRL);
+
+	switch (id) {
+	case ROCKCHIP_VOP2_EP_HDMI0:
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 |
+			    FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
+		val = rk3588_get_hdmi_pol(polflags);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1));
+		regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5));
+		break;
+	case ROCKCHIP_VOP2_EP_HDMI1:
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 |
+			    FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
+		val = rk3588_get_hdmi_pol(polflags);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4));
+		regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7));
+		break;
+	case ROCKCHIP_VOP2_EP_EDP0:
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_EDP0 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0));
+		break;
+	case ROCKCHIP_VOP2_EP_EDP1:
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
+		div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
+		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_EDP1 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
+		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3));
+		break;
+	case ROCKCHIP_VOP2_EP_MIPI0:
+		div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_MIPI0_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX;
+		val = rk3588_get_mipi_port_mux(vp->id);
+		die |= RK3588_SYS_DSP_INFACE_EN_MIPI0 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX, !!val);
+		break;
+	case ROCKCHIP_VOP2_EP_MIPI1:
+		div &= ~RK3588_DSP_IF_MIPI1_PCLK_DIV;
+		div |= FIELD_PREP(RK3588_DSP_IF_MIPI1_PCLK_DIV, if_pixclk_div);
+		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
+		val = rk3588_get_mipi_port_mux(vp->id);
+		die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, val);
+		break;
+	case ROCKCHIP_VOP2_EP_DP0:
+		die &= ~RK3588_SYS_DSP_INFACE_EN_DP0_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_DP0 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP0_MUX, vp->id);
+		dip &= ~RK3588_DSP_IF_POL__DP0_PIN_POL;
+		dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags);
+		break;
+	case ROCKCHIP_VOP2_EP_DP1:
+		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
+		die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
+			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id);
+		dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL;
+		dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags);
+		break;
+	default:
+		drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
+		return 0;
+	}
+
+	dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
+
+	vop2_vp_write(vp, RK3588_VP_CLK_CTRL, vp_clk_div);
+	vop2_writel(vop2, RK3568_DSP_IF_EN, die);
+	vop2_writel(vop2, RK3568_DSP_IF_CTRL, div);
+	vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
+
+	return clock;
+}
+
+static unsigned long vop2_set_intf_mux(struct vop2_video_port *vp, int ep_id, u32 polflags)
+{
+	struct vop2 *vop2 = vp->vop2;
+
+	if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
+		return rk3568_set_intf_mux(vp, ep_id, polflags);
+	else if (vop2->data->soc_id == 3588)
+		return rk3588_set_intf_mux(vp, ep_id, polflags);
+	else
+		return 0;
 }
 
 static int us_to_vertical_line(struct drm_display_mode *mode, int us)
@@ -1648,9 +1983,17 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 	drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) {
 		struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder);
 
-		rk3568_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
+		/*
+		 * for drive a high resolution(4KP120, 8K), vop on rk3588/rk3576 need
+		 * process multi(1/2/4/8) pixels per cycle, so the dclk feed by the
+		 * system cru may be the 1/2 or 1/4 of mode->clock.
+		 */
+		clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
 	}
 
+	if (!clock)
+		return;
+
 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
 	    !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
 		out_mode = ROCKCHIP_OUT_MODE_P888;
@@ -1661,6 +2004,8 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode))
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP;
+	if (vop2_output_rg_swap(vop2, vcstate->bus_format))
+		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RG_SWAP;
 
 	if (vcstate->yuv_overlay)
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y;
@@ -2022,6 +2367,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 			port_sel &= ~RK3568_OVL_PORT_SEL__CLUSTER1;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__CLUSTER1, vp->id);
 			break;
+		case ROCKCHIP_VOP2_CLUSTER2:
+			port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER2;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER2, vp->id);
+			break;
+		case ROCKCHIP_VOP2_CLUSTER3:
+			port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER3;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER3, vp->id);
+			break;
 		case ROCKCHIP_VOP2_ESMART0:
 			port_sel &= ~RK3568_OVL_PORT_SEL__ESMART0;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART0, vp->id);
@@ -2030,6 +2383,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp)
 			port_sel &= ~RK3568_OVL_PORT_SEL__ESMART1;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART1, vp->id);
 			break;
+		case ROCKCHIP_VOP2_ESMART2:
+			port_sel &= ~RK3588_OVL_PORT_SEL__ESMART2;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART2, vp->id);
+			break;
+		case ROCKCHIP_VOP2_ESMART3:
+			port_sel &= ~RK3588_OVL_PORT_SEL__ESMART3;
+			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART3, vp->id);
+			break;
 		case ROCKCHIP_VOP2_SMART0:
 			port_sel &= ~RK3568_OVL_PORT_SEL__SMART0;
 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__SMART0, vp->id);
@@ -2766,8 +3127,29 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 		if (IS_ERR(vop2->lut_regs))
 			return PTR_ERR(vop2->lut_regs);
 	}
+	if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_GRF) {
+		vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+		if (IS_ERR(vop2->sys_grf))
+			return dev_err_probe(dev, PTR_ERR(vop2->sys_grf), "cannot get sys_grf");
+	}
+
+	if (vop2_data->feature & VOP2_FEATURE_HAS_VOP_GRF) {
+		vop2->vop_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vop-grf");
+		if (IS_ERR(vop2->vop_grf))
+			return dev_err_probe(dev, PTR_ERR(vop2->vop_grf), "cannot get vop_grf");
+	}
+
+	if (vop2_data->feature & VOP2_FEATURE_HAS_VO1_GRF) {
+		vop2->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vo1-grf");
+		if (IS_ERR(vop2->vo1_grf))
+			return dev_err_probe(dev, PTR_ERR(vop2->vo1_grf), "cannot get vo1_grf");
+	}
 
-	vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+	if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_PMU) {
+		vop2->sys_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,pmu");
+		if (IS_ERR(vop2->sys_pmu))
+			return dev_err_probe(dev, PTR_ERR(vop2->sys_pmu), "cannot get sys_pmu");
+	}
 
 	vop2->hclk = devm_clk_get(vop2->dev, "hclk");
 	if (IS_ERR(vop2->hclk)) {
@@ -2781,6 +3163,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(vop2->aclk);
 	}
 
+	vop2->pclk = devm_clk_get_optional(vop2->dev, "pclk_vop");
+	if (IS_ERR(vop2->pclk)) {
+		drm_err(vop2->drm, "failed to get pclk source\n");
+		return PTR_ERR(vop2->pclk);
+	}
+
 	vop2->irq = platform_get_irq(pdev, 0);
 	if (vop2->irq < 0) {
 		drm_err(vop2->drm, "cannot find irq for vop2\n");
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
index 8d7ff52523fb4..2763e54350a94 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -13,9 +13,16 @@
 
 #define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
 
+#define VOP2_FEATURE_HAS_SYS_GRF	BIT(0)
+#define VOP2_FEATURE_HAS_VO0_GRF	BIT(1)
+#define VOP2_FEATURE_HAS_VO1_GRF	BIT(2)
+#define VOP2_FEATURE_HAS_VOP_GRF	BIT(3)
+#define VOP2_FEATURE_HAS_SYS_PMU	BIT(4)
+
 #define WIN_FEATURE_AFBDC		BIT(0)
 #define WIN_FEATURE_CLUSTER		BIT(1)
 
+#define HIWORD_UPDATE(v, h, l)  ((GENMASK(h, l) << 16) | ((v) << (l)))
 /*
  *  the delay number of a window in different mode.
  */
@@ -38,6 +45,18 @@ enum vop2_scale_down_mode {
 	VOP2_SCALE_DOWN_AVG,
 };
 
+/*
+ * vop2 internal power domain id,
+ * should be all none zero, 0 will be treat as invalid;
+ */
+#define VOP2_PD_CLUSTER0	BIT(0)
+#define VOP2_PD_CLUSTER1	BIT(1)
+#define VOP2_PD_CLUSTER2	BIT(2)
+#define VOP2_PD_CLUSTER3	BIT(3)
+#define VOP2_PD_DSC_8K		BIT(5)
+#define VOP2_PD_DSC_4K		BIT(6)
+#define VOP2_PD_ESMART		BIT(7)
+
 enum vop2_win_regs {
 	VOP2_WIN_ENABLE,
 	VOP2_WIN_FORMAT,
@@ -138,6 +157,7 @@ struct vop2_video_port_data {
 
 struct vop2_data {
 	u8 nr_vps;
+	u64 feature;
 	const struct vop2_win_data *win;
 	const struct vop2_video_port_data *vp;
 	struct vop_rect max_input;
@@ -192,6 +212,11 @@ enum dst_factor_mode {
 };
 
 #define RK3568_GRF_VO_CON1			0x0364
+
+#define RK3588_GRF_SOC_CON1			0x0304
+#define RK3588_GRF_VOP_CON2			0x08
+#define RK3588_GRF_VO1_CON0			0x00
+
 /* System registers definition */
 #define RK3568_REG_CFG_DONE			0x000
 #define RK3568_VERSION_INFO			0x004
@@ -200,6 +225,7 @@ enum dst_factor_mode {
 #define RK3568_DSP_IF_EN			0x028
 #define RK3568_DSP_IF_CTRL			0x02c
 #define RK3568_DSP_IF_POL			0x030
+#define RK3588_SYS_PD_CTRL			0x034
 #define RK3568_WB_CTRL				0x40
 #define RK3568_WB_XSCAL_FACTOR			0x44
 #define RK3568_WB_YRGB_MST			0x48
@@ -220,9 +246,14 @@ enum dst_factor_mode {
 #define RK3568_VP_INT_RAW_STATUS(vp)		(0xAC + (vp) * 0x10)
 
 /* Video Port registers definition */
+#define RK3568_VP0_CTRL_BASE			0x0C00
+#define RK3568_VP1_CTRL_BASE			0x0D00
+#define RK3568_VP2_CTRL_BASE			0x0E00
+#define RK3588_VP3_CTRL_BASE			0x0F00
 #define RK3568_VP_DSP_CTRL			0x00
 #define RK3568_VP_MIPI_CTRL			0x04
 #define RK3568_VP_COLOR_BAR_CTRL		0x08
+#define RK3588_VP_CLK_CTRL			0x0C
 #define RK3568_VP_3D_LUT_CTRL			0x10
 #define RK3568_VP_3D_LUT_MST			0x20
 #define RK3568_VP_DSP_BG			0x2C
@@ -264,6 +295,17 @@ enum dst_factor_mode {
 #define RK3568_SMART_DLY_NUM			0x6F8
 
 /* Cluster register definition, offset relative to window base */
+#define RK3568_CLUSTER0_CTRL_BASE		0x1000
+#define RK3568_CLUSTER1_CTRL_BASE		0x1200
+#define RK3588_CLUSTER2_CTRL_BASE		0x1400
+#define RK3588_CLUSTER3_CTRL_BASE		0x1600
+#define RK3568_ESMART0_CTRL_BASE		0x1800
+#define RK3568_ESMART1_CTRL_BASE		0x1A00
+#define RK3568_SMART0_CTRL_BASE			0x1C00
+#define RK3568_SMART1_CTRL_BASE			0x1E00
+#define RK3588_ESMART2_CTRL_BASE		0x1C00
+#define RK3588_ESMART3_CTRL_BASE		0x1E00
+
 #define RK3568_CLUSTER_WIN_CTRL0		0x00
 #define RK3568_CLUSTER_WIN_CTRL1		0x04
 #define RK3568_CLUSTER_WIN_YRGB_MST		0x10
@@ -357,13 +399,18 @@ enum dst_factor_mode {
 #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN		BIT(17)
 #define RK3568_VP_DSP_CTRL__PRE_DITHER_DOWN_EN		BIT(16)
 #define RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y		BIT(15)
+#define RK3568_VP_DSP_CTRL__DSP_RG_SWAP			BIT(10)
 #define RK3568_VP_DSP_CTRL__DSP_RB_SWAP			BIT(9)
+#define RK3568_VP_DSP_CTRL__DSP_BG_SWAP			BIT(8)
 #define RK3568_VP_DSP_CTRL__DSP_INTERLACE		BIT(7)
 #define RK3568_VP_DSP_CTRL__DSP_FILED_POL		BIT(6)
 #define RK3568_VP_DSP_CTRL__P2I_EN			BIT(5)
 #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV		BIT(4)
 #define RK3568_VP_DSP_CTRL__OUT_MODE			GENMASK(3, 0)
 
+#define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV		GENMASK(3, 2)
+#define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV		GENMASK(1, 0)
+
 #define RK3568_VP_POST_SCL_CTRL__VSCALEDOWN		BIT(1)
 #define RK3568_VP_POST_SCL_CTRL__HSCALEDOWN		BIT(0)
 
@@ -382,11 +429,37 @@ enum dst_factor_mode {
 #define RK3568_SYS_DSP_INFACE_EN_HDMI			BIT(1)
 #define RK3568_SYS_DSP_INFACE_EN_RGB			BIT(0)
 
+#define RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX		GENMASK(22, 21)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX		GENMASK(20, 20)
+#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX		GENMASK(19, 18)
+#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX		GENMASK(17, 16)
+#define RK3588_SYS_DSP_INFACE_EN_DP1_MUX		GENMASK(15, 14)
+#define RK3588_SYS_DSP_INFACE_EN_DP0_MUX		GENMASK(13, 12)
+#define RK3588_SYS_DSP_INFACE_EN_DPI			GENMASK(9, 8)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI1			BIT(7)
+#define RK3588_SYS_DSP_INFACE_EN_MIPI0			BIT(6)
+#define RK3588_SYS_DSP_INFACE_EN_HDMI1			BIT(5)
+#define RK3588_SYS_DSP_INFACE_EN_EDP1			BIT(4)
+#define RK3588_SYS_DSP_INFACE_EN_HDMI0			BIT(3)
+#define RK3588_SYS_DSP_INFACE_EN_EDP0			BIT(2)
+#define RK3588_SYS_DSP_INFACE_EN_DP1			BIT(1)
+#define RK3588_SYS_DSP_INFACE_EN_DP0			BIT(0)
+
+#define RK3588_DSP_IF_MIPI1_PCLK_DIV			GENMASK(27, 26)
+#define RK3588_DSP_IF_MIPI0_PCLK_DIV			GENMASK(25, 24)
+#define RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV		GENMASK(22, 22)
+#define RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV		GENMASK(21, 20)
+#define RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV		GENMASK(18, 18)
+#define RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV		GENMASK(17, 16)
+
 #define RK3568_DSP_IF_POL__MIPI_PIN_POL			GENMASK(19, 16)
 #define RK3568_DSP_IF_POL__EDP_PIN_POL			GENMASK(15, 12)
 #define RK3568_DSP_IF_POL__HDMI_PIN_POL			GENMASK(7, 4)
 #define RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL		GENMASK(3, 0)
 
+#define RK3588_DSP_IF_POL__DP1_PIN_POL			GENMASK(14, 12)
+#define RK3588_DSP_IF_POL__DP0_PIN_POL			GENMASK(10, 8)
+
 #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK	BIT(5)
 #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2			BIT(4)
 
@@ -408,8 +481,12 @@ enum dst_factor_mode {
 #define RK3568_OVL_PORT_SEL__SEL_PORT			GENMASK(31, 16)
 #define RK3568_OVL_PORT_SEL__SMART1			GENMASK(31, 30)
 #define RK3568_OVL_PORT_SEL__SMART0			GENMASK(29, 28)
+#define RK3588_OVL_PORT_SEL__ESMART3			GENMASK(31, 30)
+#define RK3588_OVL_PORT_SEL__ESMART2			GENMASK(29, 28)
 #define RK3568_OVL_PORT_SEL__ESMART1			GENMASK(27, 26)
 #define RK3568_OVL_PORT_SEL__ESMART0			GENMASK(25, 24)
+#define RK3588_OVL_PORT_SEL__CLUSTER3			GENMASK(23, 22)
+#define RK3588_OVL_PORT_SEL__CLUSTER2			GENMASK(21, 20)
 #define RK3568_OVL_PORT_SEL__CLUSTER1			GENMASK(19, 18)
 #define RK3568_OVL_PORT_SEL__CLUSTER0			GENMASK(17, 16)
 #define RK3568_OVL_PORT_SET__PORT2_MUX			GENMASK(11, 8)
@@ -422,6 +499,10 @@ enum dst_factor_mode {
 #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_1		GENMASK(15, 8)
 #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_0		GENMASK(7, 0)
 
+#define RK3568_CLUSTER_WIN_CTRL0__WIN0_EN		BIT(0)
+
+#define RK3568_SMART_REGION0_CTRL__WIN0_EN		BIT(0)
+
 #define RK3568_SMART_DLY_NUM__SMART1			GENMASK(31, 24)
 #define RK3568_SMART_DLY_NUM__SMART0			GENMASK(23, 16)
 #define RK3568_SMART_DLY_NUM__ESMART1			GENMASK(15, 8)
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 2c45d81983a5a..275d265891dbf 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -34,6 +34,30 @@ static const uint32_t formats_cluster[] = {
 	DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
 };
 
+static const uint32_t formats_esmart[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV21, /* yvu420_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV61, /* yvu422_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
+	DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV42, /* yvu444_8bit linear mode, 2 plane */
+	DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
+	DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
+	DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
+	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
+	DRM_FORMAT_YUYV, /* yuv422_8bit[YUYV] linear mode */
+	DRM_FORMAT_UYVY, /* yuv422_8bit[UYVY] linear mode */
+};
+
 static const uint32_t formats_rk356x_esmart[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
@@ -236,7 +260,188 @@ static const struct vop2_win_data rk3568_vop_win_data[] = {
 	},
 };
 
+static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
+	{
+		.id = 0,
+		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.gamma_lut_len = 1024,
+		.cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
+		.max_output = { 4096, 2304 },
+		/* hdr2sdr sdr2hdr hdr2hdr sdr2sdr */
+		.pre_scan_max_dly = { 76, 65, 65, 54 },
+		.offset = 0xc00,
+	}, {
+		.id = 1,
+		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.gamma_lut_len = 1024,
+		.cubic_lut_len = 729, /* 9x9x9 */
+		.max_output = { 4096, 2304 },
+		.pre_scan_max_dly = { 76, 65, 65, 54 },
+		.offset = 0xd00,
+	}, {
+		.id = 2,
+		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.gamma_lut_len = 1024,
+		.cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
+		.max_output = { 4096, 2304 },
+		.pre_scan_max_dly = { 52, 52, 52, 52 },
+		.offset = 0xe00,
+	}, {
+		.id = 3,
+		.gamma_lut_len = 1024,
+		.max_output = { 2048, 1536 },
+		.pre_scan_max_dly = { 52, 52, 52, 52 },
+		.offset = 0xf00,
+	},
+};
+
+/*
+ * rk3588 vop with 4 cluster, 4 esmart win.
+ * Every cluster can work as 4K win or split into two win.
+ * All win in cluster support AFBCD.
+ *
+ * Every esmart win and smart win support 4 Multi-region.
+ *
+ * Scale filter mode:
+ *
+ * * Cluster:  bicubic for horizontal scale up, others use bilinear
+ * * ESmart:
+ *    * nearest-neighbor/bilinear/bicubic for scale up
+ *    * nearest-neighbor/bilinear/average for scale down
+ *
+ * AXI Read ID assignment:
+ * Two AXI bus:
+ * AXI0 is a read/write bus with a higher performance.
+ * AXI1 is a read only bus.
+ *
+ * Every window on a AXI bus must assigned two unique
+ * read id(yrgb_id/uv_id, valid id are 0x1~0xe).
+ *
+ * AXI0:
+ * Cluster0/1, Esmart0/1, WriteBack
+ *
+ * AXI 1:
+ * Cluster2/3, Esmart2/3
+ *
+ */
+static const struct vop2_win_data rk3588_vop_win_data[] = {
+	{
+		.name = "Cluster0-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER0,
+		.base = 0x1000,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 0,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Cluster1-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER1,
+		.base = 0x1200,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 1,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Cluster2-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER2,
+		.base = 0x1400,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 4,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Cluster3-win0",
+		.phys_id = ROCKCHIP_VOP2_CLUSTER3,
+		.base = 0x1600,
+		.formats = formats_cluster,
+		.nformats = ARRAY_SIZE(formats_cluster),
+		.format_modifiers = format_modifiers_afbc,
+		.layer_sel_id = 5,
+		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.max_upscale_factor = 4,
+		.max_downscale_factor = 4,
+		.dly = { 4, 26, 29 },
+		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
+	}, {
+		.name = "Esmart0-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART0,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.base = 0x1800,
+		.layer_sel_id = 2,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	}, {
+		.name = "Esmart1-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART1,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.base = 0x1a00,
+		.layer_sel_id = 3,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	}, {
+		.name = "Esmart2-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART2,
+		.base = 0x1c00,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.layer_sel_id = 6,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	}, {
+		.name = "Esmart3-win0",
+		.phys_id = ROCKCHIP_VOP2_ESMART3,
+		.formats = formats_esmart,
+		.nformats = ARRAY_SIZE(formats_esmart),
+		.format_modifiers = format_modifiers,
+		.base = 0x1e00,
+		.layer_sel_id = 7,
+		.supported_rotations = DRM_MODE_REFLECT_Y,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.max_upscale_factor = 8,
+		.max_downscale_factor = 8,
+		.dly = { 23, 45, 48 },
+	},
+};
+
 static const struct vop2_data rk3566_vop = {
+	.feature = VOP2_FEATURE_HAS_SYS_GRF,
 	.nr_vps = 3,
 	.max_input = { 4096, 2304 },
 	.max_output = { 4096, 2304 },
@@ -247,6 +452,7 @@ static const struct vop2_data rk3566_vop = {
 };
 
 static const struct vop2_data rk3568_vop = {
+	.feature = VOP2_FEATURE_HAS_SYS_GRF,
 	.nr_vps = 3,
 	.max_input = { 4096, 2304 },
 	.max_output = { 4096, 2304 },
@@ -256,6 +462,18 @@ static const struct vop2_data rk3568_vop = {
 	.soc_id = 3568,
 };
 
+static const struct vop2_data rk3588_vop = {
+	.feature = VOP2_FEATURE_HAS_SYS_GRF | VOP2_FEATURE_HAS_VO1_GRF |
+		   VOP2_FEATURE_HAS_VOP_GRF | VOP2_FEATURE_HAS_SYS_PMU,
+	.nr_vps = 4,
+	.max_input = { 4096, 4320 },
+	.max_output = { 4096, 4320 },
+	.vp = rk3588_vop_video_ports,
+	.win = rk3588_vop_win_data,
+	.win_size = ARRAY_SIZE(rk3588_vop_win_data),
+	.soc_id = 3588,
+};
+
 static const struct of_device_id vop2_dt_match[] = {
 	{
 		.compatible = "rockchip,rk3566-vop",
@@ -263,6 +481,9 @@ static const struct of_device_id vop2_dt_match[] = {
 	}, {
 		.compatible = "rockchip,rk3568-vop",
 		.data = &rk3568_vop,
+	}, {
+		.compatible = "rockchip,rk3588-vop",
+		.data = &rk3588_vop
 	}, {
 	},
 };
-- 
GitLab


From 9d7fe7704d534c2d043aff2987f10671a8b4373d Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 19:59:31 +0800
Subject: [PATCH 1045/2340] drm/rockchip: vop2: rename VOP_FEATURE_OUTPUT_10BIT
 to VOP2_VP_FEATURE_OUTPUT_10BIT

VOP2 has multiple independent video ports with different
feature, so rename VOP_FEATURE_OUTPUT_10BIT to
VOP2_VP_FEATURE_OUTPUT_10BIT for more clearly meaning.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211115931.1785495-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 +-
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 2 +-
 drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 8c74af3fa6a85..44508c2dd6145 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1995,7 +1995,7 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 		return;
 
 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
-	    !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
+	    !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
 		out_mode = ROCKCHIP_OUT_MODE_P888;
 	else
 		out_mode = vcstate->output_mode;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
index 2763e54350a94..615a16196aff6 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -11,7 +11,7 @@
 #include <drm/drm_modes.h>
 #include "rockchip_drm_vop.h"
 
-#define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
+#define VOP2_VP_FEATURE_OUTPUT_10BIT        BIT(0)
 
 #define VOP2_FEATURE_HAS_SYS_GRF	BIT(0)
 #define VOP2_FEATURE_HAS_VO0_GRF	BIT(1)
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 275d265891dbf..48170694ac6b8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -136,7 +136,7 @@ static const uint64_t format_modifiers_afbc[] = {
 static const struct vop2_video_port_data rk3568_vop_video_ports[] = {
 	{
 		.id = 0,
-		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
 		.gamma_lut_len = 1024,
 		.cubic_lut_len = 9 * 9 * 9,
 		.max_output = { 4096, 2304 },
@@ -263,7 +263,7 @@ static const struct vop2_win_data rk3568_vop_win_data[] = {
 static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
 	{
 		.id = 0,
-		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
 		.gamma_lut_len = 1024,
 		.cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
 		.max_output = { 4096, 2304 },
@@ -272,7 +272,7 @@ static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
 		.offset = 0xc00,
 	}, {
 		.id = 1,
-		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
 		.gamma_lut_len = 1024,
 		.cubic_lut_len = 729, /* 9x9x9 */
 		.max_output = { 4096, 2304 },
@@ -280,7 +280,7 @@ static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
 		.offset = 0xd00,
 	}, {
 		.id = 2,
-		.feature = VOP_FEATURE_OUTPUT_10BIT,
+		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
 		.gamma_lut_len = 1024,
 		.cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
 		.max_output = { 4096, 2304 },
-- 
GitLab


From 6c3ab21f37a97a868193ccbeb8a492e51210ff31 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 11 Dec 2023 20:00:23 +0800
Subject: [PATCH 1046/2340] MAINTAINERS: Add myself as a reviewer for rockchip
 drm

As I am familiar with all the details of vop2 display
architecture, I can help review and test all related
changes in this subsystem, so add my email here to make
sure I get CC'd on rockchip drm changes.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211120023.1785687-1-andyshrk@163.com
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 98ffa3ed90a0b..f2cee321099fb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7169,6 +7169,7 @@ F:	include/linux/platform_data/shmob_drm.h
 DRM DRIVERS FOR ROCKCHIP
 M:	Sandy Huang <hjc@rock-chips.com>
 M:	Heiko Stübner <heiko@sntech.de>
+M:	Andy Yan <andy.yan@rock-chips.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
-- 
GitLab


From c4c5391adae2c5a328232bb4fecd9510310b2fdf Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 12 Dec 2023 20:39:25 -0800
Subject: [PATCH 1047/2340] drm/fourcc: fix spelling/typos

Correct spelling mistakes that were identified by codespell.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213043925.13852-1-rdunlap@infradead.org
---
 include/uapi/drm/drm_fourcc.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 3151f1fc7ebb4..84d502e429614 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -54,7 +54,7 @@ extern "C" {
  * Format modifiers may change any property of the buffer, including the number
  * of planes and/or the required allocation size. Format modifiers are
  * vendor-namespaced, and as such the relationship between a fourcc code and a
- * modifier is specific to the modifer being used. For example, some modifiers
+ * modifier is specific to the modifier being used. For example, some modifiers
  * may preserve meaning - such as number of planes - from the fourcc code,
  * whereas others may not.
  *
@@ -79,7 +79,7 @@ extern "C" {
  *   format.
  * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users
  *   see modifiers as opaque tokens they can check for equality and intersect.
- *   These users musn't need to know to reason about the modifier value
+ *   These users mustn't need to know to reason about the modifier value
  *   (i.e. they are not expected to extract information out of the modifier).
  *
  * Vendors should document their modifier usage in as much detail as
@@ -540,7 +540,7 @@ extern "C" {
  * This is a tiled layout using 4Kb tiles in row-major layout.
  * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which
  * are arranged in four groups (two wide, two high) with column-major layout.
- * Each group therefore consits out of four 256 byte units, which are also laid
+ * Each group therefore consists out of four 256 byte units, which are also laid
  * out as 2x2 column-major.
  * 256 byte units are made out of four 64 byte blocks of pixels, producing
  * either a square block or a 2:1 unit.
@@ -1103,7 +1103,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  */
 
 /*
- * The top 4 bits (out of the 56 bits alloted for specifying vendor specific
+ * The top 4 bits (out of the 56 bits allotted for specifying vendor specific
  * modifiers) denote the category for modifiers. Currently we have three
  * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of
  * sixteen different categories.
@@ -1419,7 +1419,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  * Amlogic FBC Memory Saving mode
  *
  * Indicates the storage is packed when pixel size is multiple of word
- * boudaries, i.e. 8bit should be stored in this mode to save allocation
+ * boundaries, i.e. 8bit should be stored in this mode to save allocation
  * memory.
  *
  * This mode reduces body layout to 3072 bytes per 64x32 superblock with
-- 
GitLab


From 37c476d68d29051f333944bd784d1054b495c5a8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 12 Dec 2023 20:32:26 -0800
Subject: [PATCH 1048/2340] drm/drm_modeset_helper_vtables.h: fix typos/spellos

Fix spelling problems as identified by codespell.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213043226.10046-1-rdunlap@infradead.org
---
 include/drm/drm_modeset_helper_vtables.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index a33cf74887375..881b03e4dc288 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -134,7 +134,7 @@ struct drm_crtc_helper_funcs {
 	 * Since this function is both called from the check phase of an atomic
 	 * commit, and the mode validation in the probe paths it is not allowed
 	 * to look at anything else but the passed-in mode, and validate it
-	 * against configuration-invariant hardward constraints. Any further
+	 * against configuration-invariant hardware constraints. Any further
 	 * limits which depend upon the configuration can only be checked in
 	 * @mode_fixup or @atomic_check.
 	 *
@@ -550,7 +550,7 @@ struct drm_encoder_helper_funcs {
 	 * Since this function is both called from the check phase of an atomic
 	 * commit, and the mode validation in the probe paths it is not allowed
 	 * to look at anything else but the passed-in mode, and validate it
-	 * against configuration-invariant hardward constraints. Any further
+	 * against configuration-invariant hardware constraints. Any further
 	 * limits which depend upon the configuration can only be checked in
 	 * @mode_fixup or @atomic_check.
 	 *
@@ -1474,7 +1474,7 @@ struct drm_mode_config_helper_funcs {
 	 * swapped into the various state pointers. The passed in state
 	 * therefore contains copies of the old/previous state. This hook should
 	 * commit the new state into hardware. Note that the helpers have
-	 * already waited for preceeding atomic commits and fences, but drivers
+	 * already waited for preceding atomic commits and fences, but drivers
 	 * can add more waiting calls at the start of their implementation, e.g.
 	 * to wait for driver-internal request for implicit syncing, before
 	 * starting to commit the update to the hardware.
-- 
GitLab


From 9567dab3a8cb4dfc4b0382c2678ad01bff13a3bf Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 12 Dec 2023 20:41:07 -0800
Subject: [PATCH 1049/2340] drm/uapi: drm_mode.h: fix spellos and grammar

Correct spellos reported by codespell.
Fix some grammar (as 's' to a few words).

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213044107.29214-1-rdunlap@infradead.org
---
 include/uapi/drm/drm_mode.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 95630f1701102..aee3d40c96f06 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -36,10 +36,10 @@ extern "C" {
 /**
  * DOC: overview
  *
- * DRM exposes many UAPI and structure definition to have a consistent
- * and standardized interface with user.
+ * DRM exposes many UAPI and structure definitions to have a consistent
+ * and standardized interface with users.
  * Userspace can refer to these structure definitions and UAPI formats
- * to communicate to driver
+ * to communicate to drivers.
  */
 
 #define DRM_CONNECTOR_NAME_LEN	32
@@ -540,7 +540,7 @@ struct drm_mode_get_connector {
 /* the PROP_ATOMIC flag is used to hide properties from userspace that
  * is not aware of atomic properties.  This is mostly to work around
  * older userspace (DDX drivers) that read/write each prop they find,
- * witout being aware that this could be triggering a lengthy modeset.
+ * without being aware that this could be triggering a lengthy modeset.
  */
 #define DRM_MODE_PROP_ATOMIC        0x80000000
 
@@ -664,7 +664,7 @@ struct drm_mode_fb_cmd {
 };
 
 #define DRM_MODE_FB_INTERLACED	(1<<0) /* for interlaced framebuffers */
-#define DRM_MODE_FB_MODIFIERS	(1<<1) /* enables ->modifer[] */
+#define DRM_MODE_FB_MODIFIERS	(1<<1) /* enables ->modifier[] */
 
 /**
  * struct drm_mode_fb_cmd2 - Frame-buffer metadata.
@@ -881,8 +881,8 @@ struct hdr_metadata_infoframe {
 	 * These are coded as unsigned 16-bit values in units of
 	 * 0.00002, where 0x0000 represents zero and 0xC350
 	 * represents 1.0000.
-	 * @display_primaries.x: X cordinate of color primary.
-	 * @display_primaries.y: Y cordinate of color primary.
+	 * @display_primaries.x: X coordinate of color primary.
+	 * @display_primaries.y: Y coordinate of color primary.
 	 */
 	struct {
 		__u16 x, y;
@@ -892,8 +892,8 @@ struct hdr_metadata_infoframe {
 	 * These are coded as unsigned 16-bit values in units of
 	 * 0.00002, where 0x0000 represents zero and 0xC350
 	 * represents 1.0000.
-	 * @white_point.x: X cordinate of whitepoint of color primary.
-	 * @white_point.y: Y cordinate of whitepoint of color primary.
+	 * @white_point.x: X coordinate of whitepoint of color primary.
+	 * @white_point.y: Y coordinate of whitepoint of color primary.
 	 */
 	struct {
 		__u16 x, y;
-- 
GitLab


From 7d66c8d7398d34b0aca788d21ad63f07ab1a3dbe Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Wed, 13 Dec 2023 12:24:32 +0100
Subject: [PATCH 1050/2340] drm/panel: synaptics-r63353: adjust the includes

Adjust the includes to explicitly include the correct headers.

Suggested-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213112432.2002832-1-dario.binacchi@amarulasolutions.com
---
 drivers/gpu/drm/panel/panel-synaptics-r63353.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-synaptics-r63353.c b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
index 3f61fcdc550bb..169c629746c71 100644
--- a/drivers/gpu/drm/panel/panel-synaptics-r63353.c
+++ b/drivers/gpu/drm/panel/panel-synaptics-r63353.c
@@ -9,10 +9,9 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/errno.h>
-#include <linux/fb.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/media-bus-format.h>
 
 #include <linux/gpio/consumer.h>
-- 
GitLab


From 1faed97a0a51a098ec5633d65455318f9e4ffc15 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Wed, 13 Dec 2023 12:24:01 +0100
Subject: [PATCH 1051/2340] drm/panel: ilitek-ili9805: adjust the includes

Adjust the includes to explicitly include the correct headers.

Suggested-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213112401.2000837-1-dario.binacchi@amarulasolutions.com
---
 drivers/gpu/drm/panel/panel-ilitek-ili9805.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
index 5054d1a2b2f54..1cbc25758bd2b 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9805.c
@@ -7,10 +7,9 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/errno.h>
-#include <linux/fb.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <linux/gpio/consumer.h>
 #include <linux/regulator/consumer.h>
-- 
GitLab


From 1e9974c7583456ca0bb9ccbf028c86154560f819 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Thu, 7 Dec 2023 16:49:41 +0100
Subject: [PATCH 1052/2340] drm/vc4: hdmi: Create destroy state implementation

Even though we were rolling our own custom state for the vc4 HDMI
controller driver, we were still using the generic helper to destroy
that state.

It was mostly working since the underlying state is the first member of
our state so the pointers are probably equal in all relevant cases, but
it's still fragile so let's fix this properly.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207-kms-hdmi-connector-state-v5-18-6538e19d634d@kernel.org
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 25c9c71256d35..f05e2c95a60d7 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -672,11 +672,21 @@ vc4_hdmi_connector_duplicate_state(struct drm_connector *connector)
 	return &new_state->base;
 }
 
+static void vc4_hdmi_connector_destroy_state(struct drm_connector *connector,
+					     struct drm_connector_state *state)
+{
+	struct vc4_hdmi_connector_state *vc4_state =
+		conn_state_to_vc4_hdmi_conn_state(state);
+
+	__drm_atomic_helper_connector_destroy_state(state);
+	kfree(vc4_state);
+}
+
 static const struct drm_connector_funcs vc4_hdmi_connector_funcs = {
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.reset = vc4_hdmi_connector_reset,
 	.atomic_duplicate_state = vc4_hdmi_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.atomic_destroy_state = vc4_hdmi_connector_destroy_state,
 	.atomic_get_property = vc4_hdmi_connector_get_property,
 	.atomic_set_property = vc4_hdmi_connector_set_property,
 };
-- 
GitLab


From ec2cbaf604f4a5f4bc5484ae86016ebe91236fdc Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Fri, 8 Dec 2023 18:08:56 +0100
Subject: [PATCH 1053/2340] drm/imagination: vm: Fix heap lookup condition

When conditionally checking for heap existence of a certian address in
pvr_vm_bind_op_map_init the condition whether the map request comes from a
user is incorrect: The context must not be the kernel-context to be a
user(space) context and should be looked up in pvr_heaps.

That makes addresses coming from userspace not being verfied against the
defined ranges and prevents firmware loading for meta cores.

Fixes: ff5f643de0bf ("drm/imagination: Add GEM and VM related code")
Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208170856.1748413-1-knaerzche@gmail.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index f42345fbe4bf9..82690cee978c4 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -225,7 +225,7 @@ pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
 			u64 device_addr, u64 size)
 {
 	struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
-	const bool is_user = vm_ctx == vm_ctx->pvr_dev->kernel_vm_ctx;
+	const bool is_user = vm_ctx != vm_ctx->pvr_dev->kernel_vm_ctx;
 	const u64 pvr_obj_size = pvr_gem_object_size(pvr_obj);
 	struct sg_table *sgt;
 	u64 offset_plus_size;
-- 
GitLab


From 7b1f8da7e17418831839e2d11774e2090cdbe473 Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:22 +0530
Subject: [PATCH 1054/2340] drm: ci: igt_runner: Remove todo

/sys/kernel/debug/dri/*/state exist for every atomic KMS driver.
We do not test non-atomic drivers, so remove the todo.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-2-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/igt_runner.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/ci/igt_runner.sh b/drivers/gpu/drm/ci/igt_runner.sh
index 2f815ee3a8a36..c6cf963592c51 100755
--- a/drivers/gpu/drm/ci/igt_runner.sh
+++ b/drivers/gpu/drm/ci/igt_runner.sh
@@ -15,7 +15,6 @@ cat /sys/kernel/debug/device_component/*
 '
 
 # Dump drm state to confirm that kernel was able to find a connected display:
-# TODO this path might not exist for all drivers.. maybe run modetest instead?
 set +e
 cat /sys/kernel/debug/dri/*/state
 set -e
-- 
GitLab


From 910d2d85febf93a115cdec961c75e6b208532eac Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:23 +0530
Subject: [PATCH 1055/2340] drm: ci: Force db410c to host mode

Force db410c to host mode to fix network issue which results in failure
to mount root fs via NFS.
See https://gitlab.freedesktop.org/gfx-ci/linux/-/commit/cb72a629

Use apq8016-sbc-usb-host.dtb which allows the USB controllers
to work in host mode.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-3-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/build.sh | 2 +-
 drivers/gpu/drm/ci/test.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh
index e5c5dcedd1084..e2260b4a1c671 100644
--- a/drivers/gpu/drm/ci/build.sh
+++ b/drivers/gpu/drm/ci/build.sh
@@ -19,7 +19,7 @@ if [[ "$KERNEL_ARCH" = "arm64" ]]; then
     DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dtb"
-    DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8016-sbc.dtb"
+    DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8016-sbc-usb-host.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/qcom/apq8096-db820c.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3.dtb"
     DEVICE_TREES+=" arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtb"
diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml
index f285ed67eb3d8..4af10f0ff82d4 100644
--- a/drivers/gpu/drm/ci/test.yml
+++ b/drivers/gpu/drm/ci/test.yml
@@ -102,7 +102,7 @@ msm:apq8016:
   stage: msm
   variables:
     DRIVER_NAME: msm
-    BM_DTB: https://${PIPELINE_ARTIFACTS_BASE}/arm64/apq8016-sbc.dtb
+    BM_DTB: https://${PIPELINE_ARTIFACTS_BASE}/arm64/apq8016-sbc-usb-host.dtb
     GPU_VERSION: apq8016
     BM_CMDLINE: "ip=dhcp console=ttyMSM0,115200n8 $BM_KERNEL_EXTRA_ARGS root=/dev/nfs rw nfsrootdebug nfsroot=,tcp,nfsvers=4.2 init=/init $BM_KERNELARGS"
     RUNNER_TAG: google-freedreno-db410c
-- 
GitLab


From 257893829a7f077153f437fe8b0d56f74251ad31 Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:24 +0530
Subject: [PATCH 1056/2340] drm: ci: arm64.config: Enable DA9211 regulator

Mediatek mt8173 board fails to boot with DA9211 regulator disabled.
Enable CONFIG_REGULATOR_DA9211=y in arm64.config to fix mt8173 boot
issue.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-4-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/arm64.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ci/arm64.config b/drivers/gpu/drm/ci/arm64.config
index b4f6534178838..8dbce9919a57e 100644
--- a/drivers/gpu/drm/ci/arm64.config
+++ b/drivers/gpu/drm/ci/arm64.config
@@ -186,6 +186,7 @@ CONFIG_HW_RANDOM_MTK=y
 CONFIG_MTK_DEVAPC=y
 CONFIG_PWM_MTK_DISP=y
 CONFIG_MTK_CMDQ=y
+CONFIG_REGULATOR_DA9211=y
 
 # For nouveau.  Note that DRM must be a module so that it's loaded after NFS is up to provide the firmware.
 CONFIG_ARCH_TEGRA=y
-- 
GitLab


From 7879c158a153f1cd113640c5644260cb1f619c35 Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:25 +0530
Subject: [PATCH 1057/2340] drm: ci: Enable new jobs

Enable the following jobs, as the issues noted in the
TODO comments have been resolved. This will ensure that these jobs
are now included and executed as part of the CI/CD pipeline.

msm:apq8016:
TODO: current issue: it is not fiding the NFS root.

mediatek:mt8173:
TODO: current issue: device is hanging.

virtio_gpu:none:
TODO: current issue: malloc(): corrupted top size.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-5-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/test.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml
index 4af10f0ff82d4..e0fdc55c9b69d 100644
--- a/drivers/gpu/drm/ci/test.yml
+++ b/drivers/gpu/drm/ci/test.yml
@@ -108,9 +108,6 @@ msm:apq8016:
     RUNNER_TAG: google-freedreno-db410c
   script:
     - ./install/bare-metal/fastboot.sh
-  rules:
-    # TODO: current issue: it is not fiding the NFS root. Fix and remove this rule.
-    - when: never
 
 msm:apq8096:
   extends:
@@ -280,9 +277,6 @@ mediatek:mt8173:
     DEVICE_TYPE: mt8173-elm-hana
     GPU_VERSION: mt8173
     RUNNER_TAG: mesa-ci-x86-64-lava-mt8173-elm-hana
-  rules:
-    # TODO: current issue: device is hanging. Fix and remove this rule.
-    - when: never
 
 mediatek:mt8183:
   extends:
@@ -340,6 +334,3 @@ virtio_gpu:none:
     - debian/x86_64_test-gl
     - testing:x86_64
     - igt:x86_64
-  rules:
-    # TODO: current issue: malloc(): corrupted top size. Fix and remove this rule.
-    - when: never
\ No newline at end of file
-- 
GitLab


From 34ec92879b379b52cc2581d5392b7dfb8c45857f Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:26 +0530
Subject: [PATCH 1058/2340] drm: ci: Use scripts/config to enable/disable
 configs

Instead of modifying files in git to enable/disable
configs, use scripts/config on the .config file which
will be used for building the kernel.

Acked-by: Helen Koike <helen.koike@collabora.com>
Suggested-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-6-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/build.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh
index e2260b4a1c671..97ff43759b91d 100644
--- a/drivers/gpu/drm/ci/build.sh
+++ b/drivers/gpu/drm/ci/build.sh
@@ -75,19 +75,19 @@ else
     fi
 fi
 
-for opt in $ENABLE_KCONFIGS; do
-  echo CONFIG_$opt=y >> drivers/gpu/drm/ci/${KERNEL_ARCH}.config
-done
-for opt in $DISABLE_KCONFIGS; do
-  echo CONFIG_$opt=n >> drivers/gpu/drm/ci/${KERNEL_ARCH}.config
-done
-
 if [[ -n "${MERGE_FRAGMENT}" ]]; then
     ./scripts/kconfig/merge_config.sh ${DEFCONFIG} drivers/gpu/drm/ci/${MERGE_FRAGMENT}
 else
     make `basename ${DEFCONFIG}`
 fi
 
+for opt in $ENABLE_KCONFIGS; do
+    ./scripts/config --enable CONFIG_$opt
+done
+for opt in $DISABLE_KCONFIGS; do
+    ./scripts/config --disable CONFIG_$opt
+done
+
 make ${KERNEL_IMAGE_NAME}
 
 mkdir -p /lava-files/
-- 
GitLab


From 3f1c87ddfa7915527da99eff4fe24edc96b52bd8 Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:27 +0530
Subject: [PATCH 1059/2340] drm: ci: mt8173: Do not set IGT_FORCE_DRIVER to
 panfrost

Mediatek 8173 and 8183 SOCs have a separate display controller
and GPU with different drivers for each. For mt8173, the GPU
driver is powervr and the display driver is mediatek. In the
case of mt8183, the GPU driver is panfrost and the display
driver is mediatek. Setting IGT_FORCE_DRIVER to panfrost for
mt8173 is not the expected driver. So set mediatek for mt8173.

Support to test both GPU and display drivers for these ARM SOCs
will be added in the next patch series to increase test coverage.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-7-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/igt_runner.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ci/igt_runner.sh b/drivers/gpu/drm/ci/igt_runner.sh
index c6cf963592c51..f1a08b9b146ff 100755
--- a/drivers/gpu/drm/ci/igt_runner.sh
+++ b/drivers/gpu/drm/ci/igt_runner.sh
@@ -20,9 +20,16 @@ cat /sys/kernel/debug/dri/*/state
 set -e
 
 case "$DRIVER_NAME" in
-    rockchip|mediatek|meson)
+    rockchip|meson)
         export IGT_FORCE_DRIVER="panfrost"
         ;;
+    mediatek)
+        if [ "$GPU_VERSION" = "mt8173" ]; then
+            export IGT_FORCE_DRIVER=${DRIVER_NAME}
+        elif [ "$GPU_VERSION" = "mt8183" ]; then
+            export IGT_FORCE_DRIVER="panfrost"
+        fi
+        ;;
     amdgpu)
         # Cannot use HWCI_KERNEL_MODULES as at that point we don't have the module in /lib
         mv /install/modules/lib/modules/* /lib/modules/.
-- 
GitLab


From dd1581a35e2fe3179e3c6f7622739b00ea9c2f3d Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:28 +0530
Subject: [PATCH 1060/2340] drm: ci: virtio: Make artifacts available

There were no artifacts available for virtio job.
So make the artifacts available in the pipeline job.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-8-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml
index e0fdc55c9b69d..2c9a1838e7284 100644
--- a/drivers/gpu/drm/ci/test.yml
+++ b/drivers/gpu/drm/ci/test.yml
@@ -329,6 +329,8 @@ virtio_gpu:none:
   script:
     - ln -sf $CI_PROJECT_DIR/install /install
     - mv install/bzImage /lava-files/bzImage
+    - mkdir -p $CI_PROJECT_DIR/results
+    - ln -sf $CI_PROJECT_DIR/results /results
     - install/crosvm-runner.sh install/igt_runner.sh
   needs:
     - debian/x86_64_test-gl
-- 
GitLab


From 09ac9260916d3678bedc3fd2099a7e485d13e9b4 Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:29 +0530
Subject: [PATCH 1061/2340] drm: ci: uprev IGT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

virtio-gpu kernel driver reports 16 for count_crtcs
which exceeds IGT_MAX_PIPES set to 8 in igt-gpu-tools.
This results in below memory corruption,

 malloc(): corrupted top size
 Received signal SIGABRT.
 Stack trace:
  #0 [fatal_sig_handler+0x17b]
  #1 [__sigaction+0x40]
  #2 [pthread_key_delete+0x14c]
  #3 [gsignal+0x12]
  #4 [abort+0xd3]
  #5 [__fsetlocking+0x290]
  #6 [timer_settime+0x37a]
  #7 [__default_morecore+0x1f1b]
  #8 [__libc_calloc+0x161]
  #9 [drmModeGetPlaneResources+0x44]
  #10 [igt_display_require+0x194]
  #11 [__igt_unique____real_main1356+0x93c]
  #12 [main+0x3f]
  #13 [__libc_init_first+0x8a]
  #14 [__libc_start_main+0x85]
  #15 [_start+0x21]
 
This is fixed in igt-gpu-tools by increasing IGT_MAX_PIPES to 16.  
https://patchwork.freedesktop.org/series/126327/
 
Uprev IGT to include the patches which fixes this issue.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-9-vignesh.raman@collabora.com
---
 drivers/gpu/drm/ci/gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml
index aeb9bab1b0691..dac92cc2777cc 100644
--- a/drivers/gpu/drm/ci/gitlab-ci.yml
+++ b/drivers/gpu/drm/ci/gitlab-ci.yml
@@ -5,7 +5,7 @@ variables:
   UPSTREAM_REPO: git://anongit.freedesktop.org/drm/drm
   TARGET_BRANCH: drm-next
 
-  IGT_VERSION: d1db7333d9c5fbbb05e50b0804123950d9dc1c46
+  IGT_VERSION: d2af13d9f5be5ce23d996e4afd3e45990f5ab977
 
   DEQP_RUNNER_GIT_URL: https://gitlab.freedesktop.org/anholt/deqp-runner.git
   DEQP_RUNNER_GIT_TAG: v0.15.0
-- 
GitLab


From 5f15dc44a99d78e86913c2c6bc59dfd8543c7cd6 Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:30 +0530
Subject: [PATCH 1062/2340] drm/doc: ci: Add IGT version details for flaky
 tests

Document the IGT version in the flaky tests reporting template.

Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-10-vignesh.raman@collabora.com
---
 Documentation/gpu/automated_testing.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/gpu/automated_testing.rst b/Documentation/gpu/automated_testing.rst
index 240e29d5ba68b..2d5a28866afe7 100644
--- a/Documentation/gpu/automated_testing.rst
+++ b/Documentation/gpu/automated_testing.rst
@@ -69,14 +69,15 @@ the result. They will still be run.
 
 Each new flake entry must be associated with a link to the email reporting the
 bug to the author of the affected driver, the board name or Device Tree name of
-the board, the first kernel version affected, and an approximation of the
-failure rate.
+the board, the first kernel version affected, the IGT version used for tests,
+and an approximation of the failure rate.
 
 They should be provided under the following format::
 
   # Bug Report: $LORE_OR_PATCHWORK_URL
   # Board Name: broken-board.dtb
-  # Version: 6.6-rc1
+  # Linux Version: 6.6-rc1
+  # IGT Version: 1.28-gd2af13d9f
   # Failure Rate: 100
   flaky-test
 
-- 
GitLab


From b1a2aa9bcbb88a7dc1c4df98dbf4f4df9ca79c9f Mon Sep 17 00:00:00 2001
From: Vignesh Raman <vignesh.raman@collabora.com>
Date: Thu, 7 Dec 2023 14:48:31 +0530
Subject: [PATCH 1063/2340] drm: ci: Update xfails

Update msm-apq8016-fails, mediatek-mt8173-fails and
virtio_gpu-none-fails to include the tests which fail.

Signed-off-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231207091831.660054-11-vignesh.raman@collabora.com
---
 .../drm/ci/xfails/mediatek-mt8173-fails.txt   | 13 ++++--
 .../gpu/drm/ci/xfails/msm-apq8016-fails.txt   |  5 ++
 .../drm/ci/xfails/virtio_gpu-none-fails.txt   | 46 +++++++++++++++++++
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt
index 671916067dbaf..ef0cb7c3698cf 100644
--- a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt
@@ -1,5 +1,4 @@
 kms_3d,Fail
-kms_addfb_basic@addfb25-bad-modifier,Fail
 kms_bw@linear-tiling-1-displays-1920x1080p,Fail
 kms_bw@linear-tiling-1-displays-2560x1440p,Fail
 kms_bw@linear-tiling-1-displays-3840x2160p,Fail
@@ -9,13 +8,19 @@ kms_bw@linear-tiling-2-displays-3840x2160p,Fail
 kms_bw@linear-tiling-3-displays-1920x1080p,Fail
 kms_bw@linear-tiling-3-displays-2560x1440p,Fail
 kms_bw@linear-tiling-3-displays-3840x2160p,Fail
+kms_color@invalid-gamma-lut-sizes,Fail
 kms_color@pipe-A-invalid-gamma-lut-sizes,Fail
 kms_color@pipe-B-invalid-gamma-lut-sizes,Fail
-kms_force_connector_basic@force-connector-state,Fail
+kms_cursor_legacy@cursor-vs-flip-atomic,Fail
+kms_cursor_legacy@cursor-vs-flip-legacy,Fail
+kms_flip@flip-vs-modeset-vs-hang,Fail
+kms_flip@flip-vs-panning-vs-hang,Fail
+kms_flip@flip-vs-suspend,Fail
+kms_flip@flip-vs-suspend-interruptible,Fail
 kms_force_connector_basic@force-edid,Fail
 kms_force_connector_basic@force-load-detect,Fail
 kms_force_connector_basic@prune-stale-modes,Fail
-kms_invalid_mode@int-max-clock,Fail
+kms_hdmi_inject@inject-4k,Fail
 kms_plane_scaling@planes-upscale-20x20,Fail
 kms_plane_scaling@planes-upscale-20x20-downscale-factor-0-25,Fail
 kms_plane_scaling@planes-upscale-20x20-downscale-factor-0-5,Fail
@@ -27,3 +32,5 @@ kms_properties@get_properties-sanity-atomic,Fail
 kms_properties@plane-properties-atomic,Fail
 kms_properties@plane-properties-legacy,Fail
 kms_rmfb@close-fd,Fail
+kms_selftest@drm_format,Timeout
+kms_selftest@drm_format_helper,Timeout
diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
index 9981682feab28..d39d254c935e9 100644
--- a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt
@@ -6,10 +6,15 @@ kms_cursor_legacy@all-pipes-single-bo,Fail
 kms_cursor_legacy@all-pipes-single-move,Fail
 kms_cursor_legacy@all-pipes-torture-bo,Fail
 kms_cursor_legacy@all-pipes-torture-move,Fail
+kms_cursor_legacy@forked-bo,Fail
+kms_cursor_legacy@forked-move,Fail
 kms_cursor_legacy@pipe-A-forked-bo,Fail
 kms_cursor_legacy@pipe-A-forked-move,Fail
 kms_cursor_legacy@pipe-A-single-bo,Fail
 kms_cursor_legacy@pipe-A-single-move,Fail
 kms_cursor_legacy@pipe-A-torture-bo,Fail
 kms_cursor_legacy@pipe-A-torture-move,Fail
+kms_force_connector_basic@force-edid,Fail
 kms_hdmi_inject@inject-4k,Fail
+kms_selftest@drm_format,Timeout
+kms_selftest@drm_format_helper,Timeout
diff --git a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt
index 9586b2339f6f6..007f21e56d89c 100644
--- a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt
+++ b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt
@@ -10,6 +10,49 @@ kms_bw@linear-tiling-1-displays-3840x2160p,Fail
 kms_bw@linear-tiling-2-displays-1920x1080p,Fail
 kms_bw@linear-tiling-2-displays-2560x1440p,Fail
 kms_bw@linear-tiling-2-displays-3840x2160p,Fail
+kms_bw@linear-tiling-3-displays-1920x1080p,Fail
+kms_bw@linear-tiling-3-displays-2560x1440p,Fail
+kms_bw@linear-tiling-3-displays-3840x2160p,Fail
+kms_bw@linear-tiling-4-displays-1920x1080p,Fail
+kms_bw@linear-tiling-4-displays-2560x1440p,Fail
+kms_bw@linear-tiling-4-displays-3840x2160p,Fail
+kms_bw@linear-tiling-5-displays-1920x1080p,Fail
+kms_bw@linear-tiling-5-displays-2560x1440p,Fail
+kms_bw@linear-tiling-5-displays-3840x2160p,Fail
+kms_bw@linear-tiling-6-displays-1920x1080p,Fail
+kms_bw@linear-tiling-6-displays-2560x1440p,Fail
+kms_bw@linear-tiling-6-displays-3840x2160p,Fail
+kms_bw@linear-tiling-7-displays-1920x1080p,Fail
+kms_bw@linear-tiling-7-displays-2560x1440p,Fail
+kms_bw@linear-tiling-7-displays-3840x2160p,Fail
+kms_bw@linear-tiling-8-displays-1920x1080p,Fail
+kms_bw@linear-tiling-8-displays-2560x1440p,Fail
+kms_bw@linear-tiling-8-displays-3840x2160p,Fail
+kms_flip@absolute-wf_vblank,Fail
+kms_flip@absolute-wf_vblank-interruptible,Fail
+kms_flip@basic-flip-vs-wf_vblank,Fail
+kms_flip@blocking-absolute-wf_vblank,Fail
+kms_flip@blocking-absolute-wf_vblank-interruptible,Fail
+kms_flip@blocking-wf_vblank,Fail
+kms_flip@busy-flip,Fail
+kms_flip@dpms-vs-vblank-race,Fail
+kms_flip@dpms-vs-vblank-race-interruptible,Fail
+kms_flip@flip-vs-absolute-wf_vblank,Fail
+kms_flip@flip-vs-absolute-wf_vblank-interruptible,Fail
+kms_flip@flip-vs-blocking-wf-vblank,Fail
+kms_flip@flip-vs-expired-vblank,Fail
+kms_flip@flip-vs-expired-vblank-interruptible,Fail
+kms_flip@flip-vs-modeset-vs-hang,Fail
+kms_flip@flip-vs-panning-vs-hang,Fail
+kms_flip@flip-vs-wf_vblank-interruptible,Fail
+kms_flip@modeset-vs-vblank-race,Fail
+kms_flip@modeset-vs-vblank-race-interruptible,Fail
+kms_flip@plain-flip-fb-recreate,Fail
+kms_flip@plain-flip-fb-recreate-interruptible,Fail
+kms_flip@plain-flip-ts-check,Fail
+kms_flip@plain-flip-ts-check-interruptible,Fail
+kms_flip@wf_vblank-ts-check,Fail
+kms_flip@wf_vblank-ts-check-interruptible,Fail
 kms_invalid_mode@int-max-clock,Fail
 kms_plane_scaling@downscale-with-modifier-factor-0-25,Fail
 kms_plane_scaling@downscale-with-rotation-factor-0-25,Fail
@@ -22,6 +65,9 @@ kms_plane_scaling@upscale-with-modifier-factor-0-25,Fail
 kms_plane_scaling@upscale-with-pixel-format-20x20,Fail
 kms_plane_scaling@upscale-with-pixel-format-factor-0-25,Fail
 kms_plane_scaling@upscale-with-rotation-20x20,Fail
+kms_selftest@drm_format,Timeout
+kms_selftest@drm_format_helper,Timeout
+kms_setmode@basic,Fail
 kms_vblank@crtc-id,Fail
 kms_vblank@invalid,Fail
 kms_vblank@pipe-A-accuracy-idle,Fail
-- 
GitLab


From e93bffc2ac0a833b42841f31fff955549d38ce98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Dec 2023 10:11:34 +0200
Subject: [PATCH 1064/2340] drm/i915: Reject async flips with bigjoiner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently async flips are busted when bigjoiner is in use.
As a short term fix simply reject async flips in that case.

Cc: stable@vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9769
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211081134.2698-1-ville.syrjala@linux.intel.com
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 4e957a8ddfdb3..e8b307ae93196 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5926,6 +5926,17 @@ static int intel_async_flip_check_uapi(struct intel_atomic_state *state,
 		return -EINVAL;
 	}
 
+	/*
+	 * FIXME: Bigjoiner+async flip is busted currently.
+	 * Remove this check once the issues are fixed.
+	 */
+	if (new_crtc_state->bigjoiner_pipes) {
+		drm_dbg_kms(&i915->drm,
+			    "[CRTC:%d:%s] async flip disallowed with bigjoiner\n",
+			    crtc->base.base.id, crtc->base.name);
+		return -EINVAL;
+	}
+
 	for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
 					     new_plane_state, i) {
 		if (plane->pipe != crtc->pipe)
-- 
GitLab


From e2e1916008aacf706ffa6bba65714c6d6200b196 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 28 Nov 2023 13:51:31 +0200
Subject: [PATCH 1065/2340] drm/i915/cdclk: s/-1/~0/ when dealing with unsigned
 values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cdclk_pll_is_unknown() used ~0 when checking for the "VCO is
unknown" value, but the assignment uses -1. They are the same
in the end, but let's use the same ~0 form on both sides for
consistency.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-2-ville.syrjala@linux.intel.com
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 8bb6bab7c8cd4..c774e020efd20 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1180,7 +1180,7 @@ sanitize:
 	/* force cdclk programming */
 	dev_priv->display.cdclk.hw.cdclk = 0;
 	/* force full PLL disable + enable */
-	dev_priv->display.cdclk.hw.vco = -1;
+	dev_priv->display.cdclk.hw.vco = ~0;
 }
 
 static void skl_cdclk_init_hw(struct drm_i915_private *dev_priv)
@@ -2075,7 +2075,7 @@ sanitize:
 	dev_priv->display.cdclk.hw.cdclk = 0;
 
 	/* force full PLL disable + enable */
-	dev_priv->display.cdclk.hw.vco = -1;
+	dev_priv->display.cdclk.hw.vco = ~0;
 }
 
 static void bxt_cdclk_init_hw(struct drm_i915_private *dev_priv)
-- 
GitLab


From 2581547335ff8acd877f1acd4ee57527eaaa0bde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 28 Nov 2023 13:51:32 +0200
Subject: [PATCH 1066/2340] drm/i915/cdclk: Give the squash waveform length a
 name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the slightly magic 'size = 16' with a bit more descriptive
name. We'll have another user for this value later on.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-3-ville.syrjala@linux.intel.com
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index c774e020efd20..a1660c6b86ecf 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1800,6 +1800,8 @@ static bool cdclk_pll_is_unknown(unsigned int vco)
 	return vco == ~0;
 }
 
+static const int cdclk_squash_len = 16;
+
 static int cdclk_squash_divider(u16 waveform)
 {
 	return hweight16(waveform ?: 0xffff);
@@ -1811,7 +1813,6 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91
 						    struct intel_cdclk_config *mid_cdclk_config)
 {
 	u16 old_waveform, new_waveform, mid_waveform;
-	int size = 16;
 	int div = 2;
 
 	/* Return if PLL is in an unknown state, force a complete disable and re-enable. */
@@ -1850,7 +1851,8 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91
 	}
 
 	mid_cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_squash_divider(mid_waveform) *
-						    mid_cdclk_config->vco, size * div);
+						    mid_cdclk_config->vco,
+						    cdclk_squash_len * div);
 
 	/* make sure the mid clock came out sane */
 
-- 
GitLab


From e1a914aef28f39aec5f107f31478d95aff3ae6db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 12 Dec 2023 00:16:36 +0200
Subject: [PATCH 1067/2340] drm/i915/cdclk: Remove the assumption that cdclk
 divider==2 when using squashing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we have a hardcoded assumption that the cdclk divider
(2*cd2x divider) is always 2 when squashing is used. While that
is true for all current platforms it might not hold in the future.
So eliminate the assumption and calculate the correct divider
from the other parameters.

v2: s/cd2x divider/cdclk divider/ (Gustavo)
    s/clock/unsquashed_cdclk/ (Gustavo)

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211221636.29658-1-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index a1660c6b86ecf..a12cf085dbdb4 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1880,9 +1880,9 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_config->cdclk;
 	int vco = cdclk_config->vco;
-	u32 val;
+	int unsquashed_cdclk;
 	u16 waveform;
-	int clock;
+	u32 val;
 
 	if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->display.cdclk.hw.vco > 0 && vco > 0 &&
 	    !cdclk_pll_is_unknown(dev_priv->display.cdclk.hw.vco)) {
@@ -1899,15 +1899,13 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv,
 
 	waveform = cdclk_squash_waveform(dev_priv, cdclk);
 
-	if (waveform)
-		clock = vco / 2;
-	else
-		clock = cdclk;
+	unsquashed_cdclk = DIV_ROUND_CLOSEST(cdclk * cdclk_squash_len,
+					     cdclk_squash_divider(waveform));
 
 	if (HAS_CDCLK_SQUASH(dev_priv))
 		dg2_cdclk_squash_program(dev_priv, waveform);
 
-	val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) |
+	val = bxt_cdclk_cd2x_div_sel(dev_priv, unsquashed_cdclk, vco) |
 		bxt_cdclk_cd2x_pipe(dev_priv, pipe);
 
 	/*
-- 
GitLab


From f23fe4d7d794c6d71dc6b8fdc510da2fc2174369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 12 Dec 2023 00:17:59 +0200
Subject: [PATCH 1068/2340] drm/i915/cdclk: Rewrite cdclk->voltage_level
 selection to use tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cdclk->voltage_level if ladders are hard to read, especially as
they're written the other way around compared to how bspec lists
the limits. Let's rewrite them to use simple arrays that gives us
the max cdclk for each voltage level.

v2: Bump the jsl/ehl max cdclk in the table to 652.8 MHz to
    accommodate JSL machines in CI that boot with high cdclk

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211221759.29725-1-ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 87 ++++++++++++++--------
 1 file changed, 57 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index a12cf085dbdb4..a1d87db2665b7 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1446,50 +1446,77 @@ static u8 bxt_calc_voltage_level(int cdclk)
 	return DIV_ROUND_UP(cdclk, 25000);
 }
 
+static u8 calc_voltage_level(int cdclk, int num_voltage_levels,
+			     const int voltage_level_max_cdclk[])
+{
+	int voltage_level;
+
+	for (voltage_level = 0; voltage_level < num_voltage_levels; voltage_level++) {
+		if (cdclk <= voltage_level_max_cdclk[voltage_level])
+			return voltage_level;
+	}
+
+	MISSING_CASE(cdclk);
+	return num_voltage_levels - 1;
+}
+
 static u8 icl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int icl_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 556800,
+		[2] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(icl_voltage_level_max_cdclk),
+				  icl_voltage_level_max_cdclk);
 }
 
 static u8 ehl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 326400)
-		return 3;
-	else if (cdclk > 312000)
-		return 2;
-	else if (cdclk > 180000)
-		return 1;
-	else
-		return 0;
+	static const int ehl_voltage_level_max_cdclk[] = {
+		[0] = 180000,
+		[1] = 312000,
+		[2] = 326400,
+		/*
+		 * Bspec lists the limit as 556.8 MHz, but some JSL
+		 * development boards (at least) boot with 652.8 MHz
+		 */
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(ehl_voltage_level_max_cdclk),
+				  ehl_voltage_level_max_cdclk);
 }
 
 static u8 tgl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 3;
-	else if (cdclk > 326400)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int tgl_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 326400,
+		[2] = 556800,
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(tgl_voltage_level_max_cdclk),
+				  tgl_voltage_level_max_cdclk);
 }
 
 static u8 rplu_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 3;
-	else if (cdclk > 480000)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int rplu_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 480000,
+		[2] = 556800,
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(rplu_voltage_level_max_cdclk),
+				  rplu_voltage_level_max_cdclk);
 }
 
 static void icl_readout_refclk(struct drm_i915_private *dev_priv,
-- 
GitLab


From 273361f54e5bcaccdd725a9ffac14a9fac672451 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 28 Nov 2023 13:51:35 +0200
Subject: [PATCH 1069/2340] drm/i915/mtl: Fix voltage_level for cdclk==480MHz
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow MTL to use voltage level 1 for 480MHz cdclk,
instead of the voltage level 2 that it's currently using.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-6-ville.syrjala@linux.intel.com
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index a1d87db2665b7..c985ebb6831a3 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -3516,7 +3516,7 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.modeset_calc_cdclk = bxt_modeset_calc_cdclk,
-	.calc_voltage_level = tgl_calc_voltage_level,
+	.calc_voltage_level = rplu_calc_voltage_level,
 };
 
 static const struct intel_cdclk_funcs rplu_cdclk_funcs = {
-- 
GitLab


From 46bdb77d8b61e560ebb95c8d3a355be84b5492d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 28 Nov 2023 13:51:36 +0200
Subject: [PATCH 1070/2340] drm/i915: Split
 intel_ddi_compute_min_voltage_level() into platform variants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mess inside intel_ddi_compute_min_voltage_level() is illegible.
Clean it up a bit by splitting the internals into per-platform
functions.

TODO: make it a vfunc?

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-7-ville.syrjala@linux.intel.com
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 37 +++++++++++++++++++-----
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 38f28c480b38b..bcfcd7e789f01 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3672,16 +3672,39 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 		AUDIO_OUTPUT_ENABLE(cpu_transcoder);
 }
 
+static int tgl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 2;
+	else
+		return 0;
+}
+
+static int jsl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 3;
+	else
+		return 0;
+}
+
+static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 1;
+	else
+		return 0;
+}
+
 void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
 					 struct intel_crtc_state *crtc_state)
 {
-	if (DISPLAY_VER(dev_priv) >= 12 && crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 2;
-	else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) &&
-		 crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 3;
-	else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 1;
+	if (DISPLAY_VER(dev_priv) >= 12)
+		crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state);
+	else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv))
+		crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state);
+	else if (DISPLAY_VER(dev_priv) >= 11)
+		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
 }
 
 static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv,
-- 
GitLab


From 0656afab88a6cf0efb3fbef394b68a4451b40365 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 28 Nov 2023 13:51:37 +0200
Subject: [PATCH 1071/2340] drm/i915/mtl: Calculate the correct voltage level
 from port_clock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On MTL we need to bump the voltage level to only 1 (not 2)
when port clock exceeds 594MHz. Make it so.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-8-ville.syrjala@linux.intel.com
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index bcfcd7e789f01..dd04bd7b348c5 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3699,7 +3699,9 @@ static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
 void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
 					 struct intel_crtc_state *crtc_state)
 {
-	if (DISPLAY_VER(dev_priv) >= 12)
+	if (DISPLAY_VER(dev_priv) >= 14)
+		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
+	else if (DISPLAY_VER(dev_priv) >= 12)
 		crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state);
 	else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv))
 		crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state);
-- 
GitLab


From 8cd53c6b200e6a4522524e8cf45adc45a35814e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 28 Nov 2023 13:51:38 +0200
Subject: [PATCH 1072/2340] drm/i915: Simplify
 intel_ddi_compute_min_voltage_level()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the redundant dev_priv parameters from
intel_ddi_compute_min_voltage_level() to make life easier.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-9-ville.syrjala@linux.intel.com
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
---
 drivers/gpu/drm/i915/display/intel_ddi.c    | 9 +++++----
 drivers/gpu/drm/i915/display/intel_ddi.h    | 3 +--
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index dd04bd7b348c5..12a29363e5dfe 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3696,9 +3696,10 @@ static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
 		return 0;
 }
 
-void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
-					 struct intel_crtc_state *crtc_state)
+void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
 	if (DISPLAY_VER(dev_priv) >= 14)
 		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
 	else if (DISPLAY_VER(dev_priv) >= 12)
@@ -3920,7 +3921,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_get_lane_lat_optim_mask(encoder);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	intel_hdmi_read_gcp_infoframe(encoder, pipe_config);
 
@@ -4200,7 +4201,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index 63853a1f6582c..434de7196875a 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -70,8 +70,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
 				    bool state);
-void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
-					 struct intel_crtc_state *crtc_state);
+void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state);
 int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder,
 			       enum transcoder cpu_transcoder,
 			       bool enable, u32 hdcp_mask);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index e8940acea8ad1..8a94323350303 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -614,7 +614,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_dp_audio_compute_config(encoder, pipe_config, conn_state);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	intel_psr_compute_config(intel_dp, pipe_config, conn_state);
 
-- 
GitLab


From 51ea405c47f833e55d19401b35b71100197e6d5d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 11 Dec 2023 11:28:30 -0500
Subject: [PATCH 1073/2340] drm/amdgpu: fix buffer funcs setting order on
 suspend harder

Part of commit c03581986234 ("drm/amdgpu: fix buffer funcs setting order on suspend")
got dropped accidently.  Add it back.

Fixes: c03581986234 ("drm/amdgpu: fix buffer funcs setting order on suspend")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 138c7b37af698..f58fb719292d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4589,8 +4589,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
 	amdgpu_ras_suspend(adev);
 
-	amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
 	amdgpu_device_ip_suspend_phase1(adev);
 
 	if (!adev->in_s0ix)
-- 
GitLab


From 5d1ff65f80fd8c11476bd10d10aa2b2b639de432 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 7 Dec 2023 10:54:54 +0000
Subject: [PATCH 1074/2340] drm/amd/display: Fix spelling mistake
 "SMC_MSG_AllowZstatesEntr" -> "SMC_MSG_AllowZstatesEntry"

There is a spelling mistake in a smu_print message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index d6db9d7fced2e..6d4a1ffab5ed9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -361,26 +361,26 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	case DCN_ZSTATE_SUPPORT_ALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 9) | (1 << 8);
-		smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW, param = %d\n", __func__, param);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_DISALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = 0;
-		smu_print("%s: SMC_MSG_AllowZstatesEntr msg_id = DISALLOW, param = %d\n",  __func__, param);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = %d\n",  __func__, param);
 		break;
 
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10);
-		smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 8);
-		smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
-- 
GitLab


From 1e13c5644c443dee727ac1330bc118c909a1cf07 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:41 -0100
Subject: [PATCH 1075/2340] drm/drm_mode_object: increase max objects to
 accommodate new color props

DRM_OBJECT_MAX_PROPERTY limits the number of properties to be attached
and we are increasing that value all time we add a new property (generic
or driver-specific).

In this series, we are adding 13 new KMS driver-specific properties for
AMD color manage:
- CRTC Gamma enumerated Transfer Function
- Plane: Degamma LUT+size+TF, HDR multiplier, shaper LUT+size+TF, 3D
  LUT+size, blend LUT+size+TF (12)

Therefore, just increase DRM_OBJECT_MAX_PROPERTY to a number (64) that
accomodates these new properties and gives some room for others,
avoiding change this number everytime we add a new KMS property.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/drm_mode_object.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h
index 912f1e4156853..08d7a7f0188fe 100644
--- a/include/drm/drm_mode_object.h
+++ b/include/drm/drm_mode_object.h
@@ -60,7 +60,7 @@ struct drm_mode_object {
 	void (*free_cb)(struct kref *kref);
 };
 
-#define DRM_OBJECT_MAX_PROPERTY 24
+#define DRM_OBJECT_MAX_PROPERTY 64
 /**
  * struct drm_object_properties - property tracking for &drm_mode_object
  */
-- 
GitLab


From 601603105325ad4ec62db95c9bc428202ece2c8f Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:42 -0100
Subject: [PATCH 1076/2340] drm/drm_property: make
 replace_property_blob_from_id a DRM helper

Place it in drm_property where drm_property_replace_blob and
drm_property_lookup_blob live. Then we can use the DRM helper for
driver-specific KMS properties too.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/arm/malidp_crtc.c |  2 +-
 drivers/gpu/drm/drm_atomic_uapi.c | 52 +++------------------------
 drivers/gpu/drm/drm_property.c    | 59 +++++++++++++++++++++++++++++++
 include/drm/drm_property.h        |  6 ++++
 4 files changed, 71 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index dc01c43f61930..d72c22dcf6855 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc,
 
 	/*
 	 * The size of the ctm is checked in
-	 * drm_atomic_replace_property_blob_from_id.
+	 * drm_property_replace_blob_from_id.
 	 */
 	ctm = (struct drm_color_ctm *)state->ctm->data;
 	for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index aee4a65d49591..29d4940188d49 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -362,48 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state,
 	return fence_ptr;
 }
 
-static int
-drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
-					 struct drm_property_blob **blob,
-					 uint64_t blob_id,
-					 ssize_t expected_size,
-					 ssize_t expected_elem_size,
-					 bool *replaced)
-{
-	struct drm_property_blob *new_blob = NULL;
-
-	if (blob_id != 0) {
-		new_blob = drm_property_lookup_blob(dev, blob_id);
-		if (new_blob == NULL) {
-			drm_dbg_atomic(dev,
-				       "cannot find blob ID %llu\n", blob_id);
-			return -EINVAL;
-		}
-
-		if (expected_size > 0 &&
-		    new_blob->length != expected_size) {
-			drm_dbg_atomic(dev,
-				       "[BLOB:%d] length %zu different from expected %zu\n",
-				       new_blob->base.id, new_blob->length, expected_size);
-			drm_property_blob_put(new_blob);
-			return -EINVAL;
-		}
-		if (expected_elem_size > 0 &&
-		    new_blob->length % expected_elem_size != 0) {
-			drm_dbg_atomic(dev,
-				       "[BLOB:%d] length %zu not divisible by element size %zu\n",
-				       new_blob->base.id, new_blob->length, expected_elem_size);
-			drm_property_blob_put(new_blob);
-			return -EINVAL;
-		}
-	}
-
-	*replaced |= drm_property_replace_blob(blob, new_blob);
-	drm_property_blob_put(new_blob);
-
-	return 0;
-}
-
 static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		struct drm_crtc_state *state, struct drm_property *property,
 		uint64_t val)
@@ -424,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 	} else if (property == config->prop_vrr_enabled) {
 		state->vrr_enabled = val;
 	} else if (property == config->degamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->degamma_lut,
 					val,
 					-1, sizeof(struct drm_color_lut),
@@ -432,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->ctm_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->ctm,
 					val,
 					sizeof(struct drm_color_ctm), -1,
@@ -440,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->gamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->gamma_lut,
 					val,
 					-1, sizeof(struct drm_color_lut),
@@ -581,7 +539,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 	} else if (property == plane->color_range_property) {
 		state->color_range = val;
 	} else if (property == config->prop_fb_damage_clips) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->fb_damage_clips,
 					val,
 					-1,
@@ -778,7 +736,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector,
 		if (state->link_status != DRM_LINK_STATUS_GOOD)
 			state->link_status = val;
 	} else if (property == config->hdr_output_metadata_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 				&state->hdr_output_metadata,
 				val,
 				sizeof(struct hdr_output_metadata), -1,
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index dfec479830e49..596272149a359 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -27,6 +27,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
 #include <drm/drm_property.h>
 
 #include "drm_crtc_internal.h"
@@ -751,6 +752,64 @@ bool drm_property_replace_blob(struct drm_property_blob **blob,
 }
 EXPORT_SYMBOL(drm_property_replace_blob);
 
+/**
+ * drm_property_replace_blob_from_id - replace a blob property taking a reference
+ * @dev: DRM device
+ * @blob: a pointer to the member blob to be replaced
+ * @blob_id: the id of the new blob to replace with
+ * @expected_size: expected size of the blob property
+ * @expected_elem_size: expected size of an element in the blob property
+ * @replaced: if the blob was in fact replaced
+ *
+ * Look up the new blob from id, take its reference, check expected sizes of
+ * the blob and its element and replace the old blob by the new one. Advertise
+ * if the replacement operation was successful.
+ *
+ * Return: true if the blob was in fact replaced. -EINVAL if the new blob was
+ * not found or sizes don't match.
+ */
+int drm_property_replace_blob_from_id(struct drm_device *dev,
+					 struct drm_property_blob **blob,
+					 uint64_t blob_id,
+					 ssize_t expected_size,
+					 ssize_t expected_elem_size,
+					 bool *replaced)
+{
+	struct drm_property_blob *new_blob = NULL;
+
+	if (blob_id != 0) {
+		new_blob = drm_property_lookup_blob(dev, blob_id);
+		if (new_blob == NULL) {
+			drm_dbg_atomic(dev,
+				       "cannot find blob ID %llu\n", blob_id);
+			return -EINVAL;
+		}
+
+		if (expected_size > 0 &&
+		    new_blob->length != expected_size) {
+			drm_dbg_atomic(dev,
+				       "[BLOB:%d] length %zu different from expected %zu\n",
+				       new_blob->base.id, new_blob->length, expected_size);
+			drm_property_blob_put(new_blob);
+			return -EINVAL;
+		}
+		if (expected_elem_size > 0 &&
+		    new_blob->length % expected_elem_size != 0) {
+			drm_dbg_atomic(dev,
+				       "[BLOB:%d] length %zu not divisible by element size %zu\n",
+				       new_blob->base.id, new_blob->length, expected_elem_size);
+			drm_property_blob_put(new_blob);
+			return -EINVAL;
+		}
+	}
+
+	*replaced |= drm_property_replace_blob(blob, new_blob);
+	drm_property_blob_put(new_blob);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_property_replace_blob_from_id);
+
 int drm_mode_getblob_ioctl(struct drm_device *dev,
 			   void *data, struct drm_file *file_priv)
 {
diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h
index 65bc9710a4702..082f29156b3e3 100644
--- a/include/drm/drm_property.h
+++ b/include/drm/drm_property.h
@@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev,
 						   const void *data);
 struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev,
 						   uint32_t id);
+int drm_property_replace_blob_from_id(struct drm_device *dev,
+				      struct drm_property_blob **blob,
+				      uint64_t blob_id,
+				      ssize_t expected_size,
+				      ssize_t expected_elem_size,
+				      bool *replaced);
 int drm_property_replace_global_blob(struct drm_device *dev,
 				     struct drm_property_blob **replace,
 				     size_t length,
-- 
GitLab


From 24013b9301349881c9fcd27e7edacc672e0bf6d3 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:43 -0100
Subject: [PATCH 1077/2340] drm/drm_plane: track color mgmt changes per plane

We will add color mgmt properties to DRM planes in the next patches and
we want to track when one of this properties change to define atomic
commit behaviors. Using a similar approach from CRTC color props, we set
a color_mgmt_changed boolean whenever a plane color prop changes.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_atomic.c              | 1 +
 drivers/gpu/drm/drm_atomic_state_helper.c | 1 +
 include/drm/drm_plane.h                   | 7 +++++++
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c31fc0b48c316..a91737adf8e7b 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 		   drm_get_color_encoding_name(state->color_encoding));
 	drm_printf(p, "\tcolor-range=%s\n",
 		   drm_get_color_range_name(state->color_range));
+	drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
 
 	if (plane->funcs->atomic_print_state)
 		plane->funcs->atomic_print_state(p, state);
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 54975de44a0e3..519228eb10953 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -352,6 +352,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
 	state->fence = NULL;
 	state->commit = NULL;
 	state->fb_damage_clips = NULL;
+	state->color_mgmt_changed = false;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
 
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index c6565a6f9324c..641fe298052dc 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -251,6 +251,13 @@ struct drm_plane_state {
 
 	/** @state: backpointer to global drm_atomic_state */
 	struct drm_atomic_state *state;
+
+	/**
+	 * @color_mgmt_changed: Color management properties have changed. Used
+	 * by the atomic helpers and drivers to steer the atomic commit control
+	 * flow.
+	 */
+	bool color_mgmt_changed : 1;
 };
 
 static inline struct drm_rect
-- 
GitLab


From 9342a9ae54ef299ffe5e4ce3d0be6a4da5edba0e Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:44 -0100
Subject: [PATCH 1078/2340] drm/amd/display: add driver-specific property for
 plane degamma LUT

Hook up driver-specific atomic operations for managing AMD color
properties. Create AMD driver-specific color management properties
and attach them according to HW capabilities defined by `struct
dc_color_caps`.

First add plane degamma LUT properties that means user-blob and its
size. We will add more plane color properties in the next patches. In
addition, we define AMD_PRIVATE_COLOR to guard these driver-specific
plane properties.

Plane degamma can be used to linearize input space for arithmetical
operations that are more accurate when applied in linear color.

v2:
- update degamma LUT prop description
- move private color operations from amdgpu_display to amdgpu_dm_color

v5:
- get degamma blob correctly (Joshua)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Co-developed-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      | 12 +++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  5 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 25 ++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 85 +++++++++++++++++++
 5 files changed, 138 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index d8083972e393e..a63b01aa230d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -343,6 +343,18 @@ struct amdgpu_mode_info {
 	int			disp_priority;
 	const struct amdgpu_display_funcs *funcs;
 	const enum drm_plane_type *plane_type;
+
+	/* Driver-private color mgmt props */
+
+	/* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+	 * convert encoded values to light linear values before sampling or
+	 * blending.
+	 */
+	struct drm_property *plane_degamma_lut_property;
+	/* @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of degamma LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_degamma_lut_size_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8e6fe9860cd6f..80346bebb7fc0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4067,6 +4067,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 		return r;
 	}
 
+#ifdef AMD_PRIVATE_COLOR
+	if (amdgpu_dm_create_color_properties(adev))
+		return -ENOMEM;
+#endif
+
 	r = amdgpu_dm_audio_init(adev);
 	if (r) {
 		dc_release_state(state->context);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 0a9974017ebaa..446a3962d9c99 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -727,6 +727,16 @@ extern const struct amdgpu_ip_block_version dm_ip_block;
 struct dm_plane_state {
 	struct drm_plane_state base;
 	struct dc_plane_state *dc_state;
+
+	/* Plane color mgmt */
+	/**
+	 * @degamma_lut:
+	 *
+	 * 1D LUT for mapping framebuffer/plane pixel data before sampling or
+	 * blending operations. It's usually applied to linearize input space.
+	 * The blob (if not NULL) is an array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *degamma_lut;
 };
 
 struct dm_crtc_state {
@@ -817,6 +827,7 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
 
 void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a4cb23d059bd6..421af919387f0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -84,6 +84,31 @@ void amdgpu_dm_init_color_mod(void)
 	setup_x_points_distribution();
 }
 
+#ifdef AMD_PRIVATE_COLOR
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+	struct drm_property *prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_DEGAMMA_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_DEGAMMA_LUT_SIZE",
+					 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_lut_size_property = prop;
+
+	return 0;
+}
+#endif
+
 /**
  * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
  * @blob: DRM color mgmt property blob
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 116121e647ca1..b1e50cfa9f83e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1357,6 +1357,10 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 		dc_plane_state_retain(dm_plane_state->dc_state);
 	}
 
+	if (old_dm_plane_state->degamma_lut)
+		dm_plane_state->degamma_lut =
+			drm_property_blob_get(old_dm_plane_state->degamma_lut);
+
 	return &dm_plane_state->base;
 }
 
@@ -1424,12 +1428,86 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
 
+	if (dm_plane_state->degamma_lut)
+		drm_property_blob_put(dm_plane_state->degamma_lut);
+
 	if (dm_plane_state->dc_state)
 		dc_plane_state_release(dm_plane_state->dc_state);
 
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+#ifdef AMD_PRIVATE_COLOR
+static void
+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+					     struct drm_plane *plane)
+{
+	struct amdgpu_mode_info mode_info = dm->adev->mode_info;
+	struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp;
+
+	/* Check HW color pipeline capabilities on DPP block (pre-blending)
+	 * before exposing related properties.
+	 */
+	if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_degamma_lut_property,
+					   0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_degamma_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+	}
+}
+
+static int
+dm_atomic_plane_set_property(struct drm_plane *plane,
+			     struct drm_plane_state *state,
+			     struct drm_property *property,
+			     uint64_t val)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
+	bool replaced = false;
+	int ret;
+
+	if (property == adev->mode_info.plane_degamma_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->degamma_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
+			       plane->base.id, plane->name,
+			       property->base.id, property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+dm_atomic_plane_get_property(struct drm_plane *plane,
+			     const struct drm_plane_state *state,
+			     struct drm_property *property,
+			     uint64_t *val)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
+
+	if (property == adev->mode_info.plane_degamma_lut_property) {
+		*val = (dm_plane_state->degamma_lut) ?
+			dm_plane_state->degamma_lut->base.id : 0;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
 static const struct drm_plane_funcs dm_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
@@ -1438,6 +1516,10 @@ static const struct drm_plane_funcs dm_plane_funcs = {
 	.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
 	.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
 	.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+#ifdef AMD_PRIVATE_COLOR
+	.atomic_set_property = dm_atomic_plane_set_property,
+	.atomic_get_property = dm_atomic_plane_get_property,
+#endif
 };
 
 int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
@@ -1517,6 +1599,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 
 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
 
+#ifdef AMD_PRIVATE_COLOR
+	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#endif
 	/* Create (reset) the plane state */
 	if (plane->funcs->reset)
 		plane->funcs->reset(plane);
-- 
GitLab


From ed342a2e78c4e4a8d82c2d19c95e8a3eb092c0d0 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar@amd.com>
Date: Fri, 1 Dec 2023 17:13:46 +0530
Subject: [PATCH 1079/2340] drm/amdgpu: Use the right method to get IP version

Replace direct usage of adev->ip_versions with amdgpu_ip_version.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c        | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c          | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             | 2 +-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++---
 drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c    | 6 +++---
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f58fb719292d1..85ed0d66a0298 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1599,7 +1599,7 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
 	if (adev->mman.keep_stolen_vga_memory)
 		return false;
 
-	return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
+	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3a632c3b1a2cd..0dcff2889e25d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1099,7 +1099,8 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
 {
 	bool xnack_mode = true;
 
-	if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+	if (amdgpu_sriov_vf(adev) &&
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
 		xnack_mode = false;
 
 	return xnack_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2ac5820e9c924..473a774294cee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -883,7 +883,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * GRBM interface.
 	 */
 	if ((vmhub == AMDGPU_GFXHUB(0)) &&
-	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+	    (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
 		RREG32_NO_KIQ(req);
 
 	for (j = 0; j < adev->usec_timeout; j++) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 80346bebb7fc0..e819d05755b3f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1712,7 +1712,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
 
 	/* Enable DWB for tested platforms only */
-	if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0))
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
 		init_data.num_virtual_links = 1;
 
 	INIT_LIST_HEAD(&adev->dm.da_list);
@@ -8939,7 +8939,7 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm,
 	}
 
 	wb_info->mcif_buf_params.p_vmid = 1;
-	if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0)) {
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) {
 		wb_info->mcif_warmup_params.start_address.quad_part = afb->address;
 		wb_info->mcif_warmup_params.region_size =
 			wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height;
@@ -9891,7 +9891,8 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	 * TODO: Remove this hack for all asics once it proves that the
 	 * fast updates works fine on DCN3.2+.
 	 */
-	if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) &&
+	    state->allow_modeset)
 		return true;
 
 	/* Exit early if we know that we're adding or removing the plane. */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index d8f8ad0e71375..4894f7ee737b4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -224,7 +224,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 	if (smu->is_apu)
 		adev->pm.fw_version = smu_version;
 
-	switch (adev->ip_versions[MP1_HWIP][0]) {
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 2):
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2;
 		break;
@@ -235,7 +235,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 		break;
 	default:
 		dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n",
-			adev->ip_versions[MP1_HWIP][0]);
+			amdgpu_ip_version(adev, MP1_HWIP, 0));
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_INV;
 		break;
 	}
@@ -733,7 +733,7 @@ int smu_v14_0_gfx_off_control(struct smu_context *smu, bool enable)
 	int ret = 0;
 	struct amdgpu_device *adev = smu->adev;
 
-	switch (adev->ip_versions[MP1_HWIP][0]) {
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 2):
 	case IP_VERSION(14, 0, 0):
 		if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
-- 
GitLab


From b70aed8f5d7686c4343f9ae618287404fa5a703e Mon Sep 17 00:00:00 2001
From: Saleemkhan Jamadar <saleemkhan.jamadar@amd.com>
Date: Tue, 28 Nov 2023 17:02:06 +0530
Subject: [PATCH 1080/2340] drm/amdgpu/jpeg: configure doorbell for each
 playback

Doorbell is configured during start of each playback.

v1 - add comment for the doorbell programming change

Signed-off-by: Saleemkhan Jamadar <saleemkhan.jamadar@amd.com>
Acked-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Veerabadhran Gopalakrishnan <Veerabadhran.Gopalakrishnan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 9df011323d4b9..6ede85b28cc8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle)
 	struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
 	int r;
 
-	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-				(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
-
-	WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
-		ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
-		VCN_JPEG_DB_CTRL__EN_MASK);
-
 	r = amdgpu_ring_test_helper(ring);
 	if (r)
 		return r;
@@ -336,6 +329,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled)
 		amdgpu_dpm_enable_jpeg(adev, true);
 
+	/* doorbell programming is done for every playback */
+	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+				(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+	WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+		ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+		VCN_JPEG_DB_CTRL__EN_MASK);
+
 	/* disable power gating */
 	r = jpeg_v4_0_5_disable_static_power_gating(adev);
 	if (r)
-- 
GitLab


From 31e6af1ff77533df2e8e006974a9b57adece0488 Mon Sep 17 00:00:00 2001
From: Ma Jun <Jun.Ma2@amd.com>
Date: Fri, 3 Nov 2023 13:41:42 +0800
Subject: [PATCH 1081/2340] drm/amd/pm: Remove redundant function members of
 pptable_funcs

Remove redundant functions members of pptable_funcs and change
the function type as static because they are not called by other
files.

Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  4 --
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  2 -
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  2 -
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  2 -
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 41 +++++++++----------
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  2 -
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  2 -
 7 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 48f5926d81535..5e7b8f29fecce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -212,10 +212,6 @@ int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 
 bool smu_v13_0_baco_is_support(struct smu_context *smu);
 
-enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu);
-
-int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state);
-
 int smu_v13_0_baco_enter(struct smu_context *smu);
 int smu_v13_0_baco_exit(struct smu_context *smu);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 2cb6b68222bab..4cd43bbec910e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2407,8 +2407,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = smu_v11_0_baco_enter,
 	.baco_exit = smu_v11_0_baco_exit,
 	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index a38233cc5b7ff..8d1d29ffb0f1c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3537,8 +3537,6 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = navi10_baco_enter,
 	.baco_exit = navi10_baco_exit,
 	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 1de9f8b5cc5fa..21fc033528fa9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -4428,8 +4428,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = sienna_cichlid_baco_enter,
 	.baco_exit = sienna_cichlid_baco_exit,
 	.mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 86fc7273d5888..2877e5da5b5ad 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2221,33 +2221,14 @@ static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
 	return 0;
 }
 
-bool smu_v13_0_baco_is_support(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-
-	if (amdgpu_sriov_vf(smu->adev) ||
-	    !smu_baco->platform_support)
-		return false;
-
-	/* return true if ASIC is in BACO state already */
-	if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
-		return true;
-
-	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
-	    !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
-		return false;
-
-	return true;
-}
-
-enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu)
+static enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
 
 	return smu_baco->state;
 }
 
-int smu_v13_0_baco_set_state(struct smu_context *smu,
+static int smu_v13_0_baco_set_state(struct smu_context *smu,
 			     enum smu_baco_state state)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
@@ -2281,6 +2262,24 @@ int smu_v13_0_baco_set_state(struct smu_context *smu,
 	return ret;
 }
 
+bool smu_v13_0_baco_is_support(struct smu_context *smu)
+{
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
+
+	if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
+		return false;
+
+	/* return true if ASIC is in BACO state already */
+	if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
+		return true;
+
+	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
+	    !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
+		return false;
+
+	return true;
+}
+
 int smu_v13_0_baco_enter(struct smu_context *smu)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index a4debd7ab8bd0..2d25a9e47651f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -3003,8 +3003,6 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.deep_sleep_control = smu_v13_0_deep_sleep_control,
 	.gfx_ulv_control = smu_v13_0_gfx_ulv_control,
 	.baco_is_support = smu_v13_0_baco_is_support,
-	.baco_get_state = smu_v13_0_baco_get_state,
-	.baco_set_state = smu_v13_0_baco_set_state,
 	.baco_enter = smu_v13_0_baco_enter,
 	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index f5596f031d00c..2909ec06b1cb9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2594,8 +2594,6 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
 	.baco_is_support = smu_v13_0_baco_is_support,
-	.baco_get_state = smu_v13_0_baco_get_state,
-	.baco_set_state = smu_v13_0_baco_set_state,
 	.baco_enter = smu_v13_0_baco_enter,
 	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
-- 
GitLab


From 9a10bd0df618f500ca526cf99f42504900020c2c Mon Sep 17 00:00:00 2001
From: Joshua Aberback <joshua.aberback@amd.com>
Date: Wed, 6 Dec 2023 14:52:22 -0500
Subject: [PATCH 1082/2340] drm/amd/display: Remove minor revision 5 until
 proper parser is ready

Reviewed-by: Dillon Varone <dillon.varone@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Joshua Aberback <joshua.aberback@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index aef964f1bcbe5..875a064bb9a5c 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1747,7 +1747,6 @@ static enum bp_result bios_parser_get_firmware_info(
 				result = get_firmware_info_v3_2(bp, info);
 				break;
 			case 4:
-			case 5:
 				result = get_firmware_info_v3_4(bp, info);
 				break;
 			default:
@@ -2398,7 +2397,6 @@ static enum bp_result get_vram_info_v30(
 	return result;
 }
 
-
 /*
  * get_integrated_info_v11
  *
-- 
GitLab


From 7f9b4fb450a65a46df3d454a53836cad7e1c79c6 Mon Sep 17 00:00:00 2001
From: Aurabindo Pillai <aurabindo.pillai@amd.com>
Date: Wed, 6 Dec 2023 14:52:23 -0500
Subject: [PATCH 1083/2340] drm/amd/display: Use explicit size for types in
 DCCG's struct dp_dto_params

Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index 6b44557fcb1a8..b9a06bf84cc97 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -59,8 +59,8 @@ enum dentist_dispclk_change_mode {
 struct dp_dto_params {
 	int otg_inst;
 	enum signal_type signal;
-	long long pixclk_hz;
-	long long refclk_hz;
+	uint64_t pixclk_hz;
+	uint64_t refclk_hz;
 };
 
 enum pixel_rate_div {
-- 
GitLab


From af68153ffe8c4f778ba9cbe1d1725a939ab94576 Mon Sep 17 00:00:00 2001
From: Ran Shi <ran.shi@amd.com>
Date: Wed, 6 Dec 2023 14:52:24 -0500
Subject: [PATCH 1084/2340] drm/amd/display: allow DP40 cables to do UHBR13.5

why:
With DP2.1a expansion we are allowing DP40 cables to do UHBR13.5

how:
Assume UHBR10 means UHBR13.5 also for unknown cable type and
passive cable type.

Reviewed-by: George Shen <george.shen@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Ran Shi <ran.shi@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/link/protocols/link_dp_capability.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index db87aa7b5c90f..3c5334cdb3fb9 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -412,12 +412,18 @@ static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link)
 {
 	enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN;
 
-	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20)
+	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) {
 		cable_max_link_rate = LINK_RATE_UHBR20;
-	else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY)
+	} else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) {
 		cable_max_link_rate = LINK_RATE_UHBR13_5;
-	else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10)
-		cable_max_link_rate = LINK_RATE_UHBR10;
+	} else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) {
+		// allow DP40 cables to do UHBR13.5 for passive or unknown cable type
+		if (link->dpcd_caps.cable_id.bits.CABLE_TYPE < 2) {
+			cable_max_link_rate = LINK_RATE_UHBR13_5;
+		} else {
+			cable_max_link_rate = LINK_RATE_UHBR10;
+		}
+	}
 
 	return cable_max_link_rate;
 }
-- 
GitLab


From d0f639c5869399bf6dde4d694d5f8c0ab8c0ec46 Mon Sep 17 00:00:00 2001
From: Taimur Hassan <syed.hassan@amd.com>
Date: Wed, 6 Dec 2023 14:52:25 -0500
Subject: [PATCH 1085/2340] drm/amd/display: Revert "Fix conversions between
 bytes and KB"

[Why & How]
HostVMMinPageSize is expected to be in KB according to spec,
the checks later down the line reflect this as well.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Taimur Hassan <syed.hassan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml2/display_mode_core.c  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 4d1336e5afc2c..180f8a98a361a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 				mode_lib->ms.NoOfDPPThisState,
 				mode_lib->ms.dpte_group_bytes,
 				s->HostVMInefficiencyFactor,
-				mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+				mode_lib->ms.soc.hostvm_min_page_size_kbytes,
 				mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 		s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
@@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
 						mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
 						mode_lib->ms.MetaRowBytes[j][k],
 						mode_lib->ms.DPTEBytesPerRow[j][k],
@@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 		CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
@@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 		UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
-		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
 		UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 		UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
@@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 	CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 	CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
@@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
 			locals->dpte_group_bytes,
 			s->HostVMInefficiencyFactor,
-			mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+			mode_lib->ms.soc.hostvm_min_page_size_kbytes,
 			mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 	locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
@@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 			CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 			CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
 			CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 			CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 			CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
 						locals->PDEAndMetaPTEBytesFrame[k],
 						locals->MetaRowByte[k],
 						locals->PixelPTEBytesPerRow[k],
-- 
GitLab


From 11edbb4497504540f5e73a8aabf1254b31cf0a82 Mon Sep 17 00:00:00 2001
From: Aurabindo Pillai <aurabindo.pillai@amd.com>
Date: Wed, 6 Dec 2023 14:52:26 -0500
Subject: [PATCH 1086/2340] drm/amd/display: trivial comment change

FP guard is valid for all recent asics, not just RV, so fix the comment.

Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 3e73c4e59d408..7dacb0f82d29b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -368,7 +368,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 	}
 	break;
 
-#endif /* CONFIG_DRM_AMD_DC_FP - Family RV */
+#endif	/* CONFIG_DRM_AMD_DC_FP */
 	default:
 		ASSERT(0); /* Unknown Asic */
 		break;
-- 
GitLab


From 2170fb03be28ad7807ea460101a60689c3f383e4 Mon Sep 17 00:00:00 2001
From: Michael Strauss <michael.strauss@amd.com>
Date: Wed, 6 Dec 2023 14:52:27 -0500
Subject: [PATCH 1087/2340] drm/amd/display: Revert DP2 MST hub triple display
 fix

[WHY]
Introduces regression with DP2 native displays

[HOW]
Revert the change

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Michael Strauss <michael.strauss@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 38fa7441df51f..814dbdcf9a787 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -160,13 +160,6 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 
 	if (pipe_ctx->stream == NULL)
 		return false;
-	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
-	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
-			return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
-				pipe_ctx->link_res.hpo_dp_link_enc &&
-				dc_is_dp_signal(pipe_ctx->stream->signal) &&
-				(pipe_ctx->stream->link->remote_sinks[0] == pipe_ctx->stream->sink));
-	}
 
 	return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
 		pipe_ctx->link_res.hpo_dp_link_enc &&
-- 
GitLab


From c1afbb715e33a2b208c27a989c5f929029ffe7d3 Mon Sep 17 00:00:00 2001
From: Fangzhi Zuo <jerry.zuo@amd.com>
Date: Wed, 6 Dec 2023 14:52:28 -0500
Subject: [PATCH 1088/2340] drm/amd/display: Populate dtbclk from bounding box

dtbclk is unavaliable from pmfw. Try to grab the value from bounding box

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Fangzhi Zuo <jerry.zuo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c   | 14 +++++++++-----
 .../amd/display/dc/dml2/dml2_translation_helper.c  |  5 +++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index e9d88f52717b4..3d12dabd39e47 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 600.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 186.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 1,
@@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 209.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 2,
@@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 209.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 3,
@@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 371.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 4,
@@ -160,7 +160,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 417.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 	},
 	.num_states = 5,
@@ -367,6 +367,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 				clock_limits[i].socclk_mhz;
 			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
 				clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+				clock_limits[i].dtbclk_mhz;
 			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
 				clk_table->num_entries;
 			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
@@ -379,6 +381,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 				clk_table->num_entries;
 			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
 				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+				clk_table->num_entries;
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 255af7875c087..279e7605a0a2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -425,8 +425,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		}
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) {
-			p->in_states->state_array[i].dtbclk_mhz =
-				dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
+			if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0)
+				p->in_states->state_array[i].dtbclk_mhz =
+					dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
 		}
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) {
-- 
GitLab


From bbc42960f9b68e548403e57b2cfd6e93e684864f Mon Sep 17 00:00:00 2001
From: Allen <allen.pan@amd.com>
Date: Wed, 6 Dec 2023 14:52:29 -0500
Subject: [PATCH 1089/2340] drm/amd/display: Disable OPTC pg to match DC
 Hubp/dpp pg

[Why]
To match the hardware sequence

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Allen <allen.pan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 13324422ff50e..4eb744f1bc9fb 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -717,6 +717,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_dcc = DCC_ENABLE,
 	.disable_dpp_power_gate = true,
 	.disable_hubp_power_gate = true,
+	.disable_optc_power_gate = true, /*should the same as above two*/
 	.disable_clock_gate = false,
 	.disable_dsc_power_gate = true,
 	.vsr_support = true,
-- 
GitLab


From fdb0ad2ff7c84bda30bfe3b8f90abd1f8d8788a0 Mon Sep 17 00:00:00 2001
From: Sung Joon Kim <sungkim@amd.com>
Date: Wed, 6 Dec 2023 14:52:30 -0500
Subject: [PATCH 1090/2340] drm/amd/display: Exit from idle state before
 accessing HW data

[why & how]
User interface cannot guarantee system is in
idle state, so need to ensure we exit idle state
before accessing any HW data.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Sung Joon Kim <sungkim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index 930fd929e93a4..8c5e7f858be38 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -64,11 +64,15 @@ static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight)
 
 static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm)
 {
+	dc_allow_idle_optimizations(abm->ctx->dc, false);
+
 	return dmub_abm_get_current_backlight(abm);
 }
 
 static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm)
 {
+	dc_allow_idle_optimizations(abm->ctx->dc, false);
+
 	return dmub_abm_get_target_backlight(abm);
 }
 
-- 
GitLab


From dd4e4bb28843393065eed279e869fac248d03f0f Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Wed, 6 Dec 2023 14:52:31 -0500
Subject: [PATCH 1091/2340] drm/amd/display: For prefetch mode > 0, extend
 prefetch if possible

[Description]
For mode programming we want to extend the prefetch as much as possible
(up to oto, or as long as we can for equ) if we're not already applying
the 60us prefetch requirement. This is to avoid intermittent underflow
issues during prefetch.

The prefetch extension is applied under the following scenarios:
1. We're in prefetch mode 1 (i.e. we don't support MCLK switch in blank)
2. We're using subvp or drr methods of p-state switch, in which case we
   we don't care if prefetch takes up more of the blanking time

Mode programming typically chooses the smallest prefetch time possible
(i.e. highest bandwidth during prefetch) presumably to create margin between
p-states / c-states that happen in vblank and prefetch. Therefore we only
apply this prefetch extension when p-state in vblank is not required (UCLK
p-states take up the most vblank time).

Reviewed-by: Jun Lei <jun.lei@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../dc/dml/dcn32/display_mode_vba_32.c        |  3 ++
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 33 +++++++++++++++----
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |  1 +
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index cbdfb762c10c5..6c84b0fa40f44 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 					(v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ||
 						v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
 							mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+					mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
+
 					/* Output */
 					&v->DSTXAfterScaler[k],
 					&v->DSTYAfterScaler[k],
@@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							v->SwathHeightCThisState[k], v->TWait,
 							(v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
 									mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+							mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
 
 							/* Output */
 							&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index d940dfa5ae43e..80fccd4999a58 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -3423,6 +3423,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightC,
 		double TWait,
 		double TPreReq,
+		bool ExtendPrefetchIfPossible,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
@@ -3892,12 +3893,32 @@ bool dml32_CalculatePrefetchSchedule(
 			/* Clamp to oto for bandwidth calculation */
 			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
 		} else {
-			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
-			TimeForFetchingMetaPTE = Tvm_equ;
-			TimeForFetchingRowInVBlank = Tr0_equ;
-			*PrefetchBandwidth = prefetch_bw_equ;
-			/* Clamp to equ for bandwidth calculation */
-			LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+			/* For mode programming we want to extend the prefetch as much as possible
+			 * (up to oto, or as long as we can for equ) if we're not already applying
+			 * the 60us prefetch requirement. This is to avoid intermittent underflow
+			 * issues during prefetch.
+			 *
+			 * The prefetch extension is applied under the following scenarios:
+			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
+			 * 2. We're using subvp or drr methods of p-state switch, in which case we
+			 *    we don't care if prefetch takes up more of the blanking time
+			 *
+			 * Mode programming typically chooses the smallest prefetch time possible
+			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
+			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
+			 * apply this prefetch extension when p-state in vblank is not required (UCLK
+			 * p-states take up the most vblank time).
+			 */
+			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
+				MyError = true;
+			} else {
+				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
+				TimeForFetchingMetaPTE = Tvm_equ;
+				TimeForFetchingRowInVBlank = Tr0_equ;
+				*PrefetchBandwidth = prefetch_bw_equ;
+				/* Clamp to equ for bandwidth calculation */
+				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+			}
 		}
 
 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 592d174df6c62..5d34735df83db 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightC,
 		double TWait,
 		double TPreReq,
+		bool ExtendPrefetchIfPossible,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
-- 
GitLab


From 9a902a9073c287353e25913c0761bfed49d75a88 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Wed, 6 Dec 2023 14:52:32 -0500
Subject: [PATCH 1092/2340] drm/amd/display: Force p-state disallow if leaving
 no plane config

[Description]
- When we're in a no plane config, DCN is always asserting
  P-State allow
- This creates a scenario where the P-State blackout can start
  just as VUPDATE takes place and transitions the DCN config to
  a one where one or more HUBP's are active which can result in
  underflow
- To fix this issue, force p-state disallow and unforce after
  the transition from no planes case -> one or more planes active

Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 608221b0dd5dc..c3c83178eb1e3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1877,6 +1877,8 @@ void dcn20_program_front_end_for_ctx(
 	int i;
 	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
+	unsigned int prev_hubp_count = 0;
+	unsigned int hubp_count = 0;
 
 	if (resource_is_pipe_topology_changed(dc->current_state, context))
 		resource_log_pipe_topology_update(dc, context);
@@ -1894,6 +1896,20 @@ void dcn20_program_front_end_for_ctx(
 		}
 	}
 
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
+			prev_hubp_count++;
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			hubp_count++;
+	}
+
+	if (prev_hubp_count == 0 && hubp_count > 0) {
+		if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+			dc->res_pool->hubbub->funcs->force_pstate_change_control(
+					dc->res_pool->hubbub, true, false);
+		udelay(500);
+	}
+
 	/* Set pipe update flags and lock pipes */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
@@ -2039,6 +2055,10 @@ void dcn20_post_unlock_program_front_end(
 		}
 	}
 
+	if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+		dc->res_pool->hubbub->funcs->force_pstate_change_control(
+				dc->res_pool->hubbub, false, false);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-- 
GitLab


From 7253c36b1febe7e76be3da26fbf875978b37e92c Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Wed, 6 Dec 2023 14:52:33 -0500
Subject: [PATCH 1093/2340] drm/amd/display: fix HW block PG sequence

[why]
Power up and power down has reverted programming order.
also make sure disable root clock last.

Reviewed-by: Muhammad Ahmed <ahmed.ahmed@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dcn35/dcn35_init.c |   3 +-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 134 +++++++++++++-----
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.h   |   6 +-
 .../drm/amd/display/dc/hwss/hw_sequencer.h    |   6 +-
 4 files changed, 105 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
index 296bf3a38cb90..d594905eb246f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
@@ -118,7 +118,8 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
 	.update_dsc_pg = dcn32_update_dsc_pg,
 	.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
 	.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
-	.block_power_control = dcn35_block_power_control,
+	.hw_block_power_up = dcn35_hw_block_power_up,
+	.hw_block_power_down = dcn35_hw_block_power_down,
 	.root_clock_control = dcn35_root_clock_control,
 	.set_idle_state = dcn35_set_idle_state,
 	.get_idle_state = dcn35_get_idle_state
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 09498aa92096d..9262d33361821 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1123,9 +1123,23 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 		update_state->pg_res_update[PG_HPO] = true;
 
 }
-
-void dcn35_block_power_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on)
+/**
+	 * power down sequence
+	 * ONO Region 3, DCPG 25: hpo - SKIPPED
+	 * ONO Region 4, DCPG 0: dchubp0, dpp0
+	 * ONO Region 6, DCPG 1: dchubp1, dpp1
+	 * ONO Region 8, DCPG 2: dchubp2, dpp2
+	 * ONO Region 10, DCPG 3: dchubp3, dpp3
+	 * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
+	 * ONO Region 5, DCPG 16: dsc0
+	 * ONO Region 7, DCPG 17: dsc1
+	 * ONO Region 9, DCPG 18: dsc2
+	 * ONO Region 11, DCPG 19: dsc3
+	 * ONO Region 2, DCPG 24: mpc opp optc dwb
+	 * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
+*/
+void dcn35_hw_block_power_down(struct dc *dc,
+	struct pg_block_update *update_state)
 {
 	int i = 0;
 	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
@@ -1134,66 +1148,99 @@ void dcn35_block_power_control(struct dc *dc,
 		return;
 	if (dc->debug.ignore_pg)
 		return;
+
 	if (update_state->pg_res_update[PG_HPO]) {
 		if (pg_cntl->funcs->hpo_pg_control)
-			pg_cntl->funcs->hpo_pg_control(pg_cntl, power_on);
+			pg_cntl->funcs->hpo_pg_control(pg_cntl, false);
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
 			update_state->pg_pipe_res_update[PG_DPP][i]) {
 			if (pg_cntl->funcs->hubp_dpp_pg_control)
-				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, power_on);
+				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false);
 		}
-
+	}
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
 		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
 			if (pg_cntl->funcs->dsc_pg_control)
-				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, power_on);
-		}
-
-		if (update_state->pg_pipe_res_update[PG_MPCC][i]) {
-			if (pg_cntl->funcs->mpcc_pg_control)
-				pg_cntl->funcs->mpcc_pg_control(pg_cntl, i, power_on);
-		}
-
-		if (update_state->pg_pipe_res_update[PG_OPP][i]) {
-			if (pg_cntl->funcs->opp_pg_control)
-				pg_cntl->funcs->opp_pg_control(pg_cntl, i, power_on);
-		}
-
-		if (update_state->pg_pipe_res_update[PG_OPTC][i]) {
-			if (pg_cntl->funcs->optc_pg_control)
-				pg_cntl->funcs->optc_pg_control(pg_cntl, i, power_on);
+				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false);
 		}
-	}
 
-	if (update_state->pg_res_update[PG_DWB]) {
-		if (pg_cntl->funcs->dwb_pg_control)
-			pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on);
-	}
 
 	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
 	if (pg_cntl->funcs->plane_otg_pg_control)
-		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on);
+		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false);
 
-}
+	//domain22, 23, 25 currently always on.
 
-void dcn35_root_clock_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on)
+}
+/**
+	 * power up sequence
+	 * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+	 * ONO Region 2, DCPG 24: mpc opp optc dwb
+	 * ONO Region 5, DCPG 16: dsc0
+	 * ONO Region 7, DCPG 17: dsc1
+	 * ONO Region 9, DCPG 18: dsc2
+	 * ONO Region 11, DCPG 19: dsc3
+	 * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
+	 * ONO Region 4, DCPG 0: dchubp0, dpp0
+	 * ONO Region 6, DCPG 1: dchubp1, dpp1
+	 * ONO Region 8, DCPG 2: dchubp2, dpp2
+	 * ONO Region 10, DCPG 3: dchubp3, dpp3
+	 * ONO Region 3, DCPG 25: hpo - SKIPPED
+ */
+void dcn35_hw_block_power_up(struct dc *dc,
+	struct pg_block_update *update_state)
 {
 	int i = 0;
 	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
 
 	if (!pg_cntl)
 		return;
+	if (dc->debug.ignore_pg)
+		return;
+	//domain22, 23, 25 currently always on.
+	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+	if (pg_cntl->funcs->plane_otg_pg_control)
+		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true);
+
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
+		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+			if (pg_cntl->funcs->dsc_pg_control)
+				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true);
+		}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
 			update_state->pg_pipe_res_update[PG_DPP][i]) {
-			if (dc->hwseq->funcs.dpp_root_clock_control)
-				dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			if (pg_cntl->funcs->hubp_dpp_pg_control)
+				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true);
 		}
+	}
+	if (update_state->pg_res_update[PG_HPO]) {
+		if (pg_cntl->funcs->hpo_pg_control)
+			pg_cntl->funcs->hpo_pg_control(pg_cntl, true);
+	}
+}
+void dcn35_root_clock_control(struct dc *dc,
+	struct pg_block_update *update_state, bool power_on)
+{
+	int i = 0;
+	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
 
+	if (!pg_cntl)
+		return;
+	/*enable root clock first when power up*/
+	if (power_on)
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+				update_state->pg_pipe_res_update[PG_DPP][i]) {
+				if (dc->hwseq->funcs.dpp_root_clock_control)
+					dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			}
+		}
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
 		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
 			if (power_on) {
 				if (dc->res_pool->dccg->funcs->enable_dsc)
@@ -1204,6 +1251,15 @@ void dcn35_root_clock_control(struct dc *dc,
 			}
 		}
 	}
+	/*disable root clock first when power down*/
+	if (!power_on)
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+				update_state->pg_pipe_res_update[PG_DPP][i]) {
+				if (dc->hwseq->funcs.dpp_root_clock_control)
+					dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			}
+		}
 }
 
 void dcn35_prepare_bandwidth(
@@ -1217,9 +1273,9 @@ void dcn35_prepare_bandwidth(
 
 		if (dc->hwss.root_clock_control)
 			dc->hwss.root_clock_control(dc, &pg_update_state, true);
-
-		if (dc->hwss.block_power_control)
-			dc->hwss.block_power_control(dc, &pg_update_state, true);
+		/*power up required HW block*/
+		if (dc->hwss.hw_block_power_up)
+			dc->hwss.hw_block_power_up(dc, &pg_update_state);
 	}
 
 	dcn20_prepare_bandwidth(dc, context);
@@ -1235,9 +1291,9 @@ void dcn35_optimize_bandwidth(
 
 	if (dc->hwss.calc_blocks_to_gate) {
 		dc->hwss.calc_blocks_to_gate(dc, context, &pg_update_state);
-
-		if (dc->hwss.block_power_control)
-			dc->hwss.block_power_control(dc, &pg_update_state, false);
+		/*try to power down unused block*/
+		if (dc->hwss.hw_block_power_down)
+			dc->hwss.hw_block_power_down(dc, &pg_update_state);
 
 		if (dc->hwss.root_clock_control)
 			dc->hwss.root_clock_control(dc, &pg_update_state, false);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index 0dff10d179b86..3837038dc4a8f 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -63,8 +63,10 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
 void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
-void dcn35_block_power_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on);
+void dcn35_hw_block_power_up(struct dc *dc,
+	struct pg_block_update *update_state);
+void dcn35_hw_block_power_down(struct dc *dc,
+	struct pg_block_update *update_state);
 void dcn35_root_clock_control(struct dc *dc,
 	struct pg_block_update *update_state, bool power_on);
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 452680fe9aabd..45dc6d4e95627 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -414,8 +414,10 @@ struct hw_sequencer_funcs {
 		struct pg_block_update *update_state);
 	void (*calc_blocks_to_ungate)(struct dc *dc, struct dc_state *context,
 		struct pg_block_update *update_state);
-	void (*block_power_control)(struct dc *dc,
-		struct pg_block_update *update_state, bool power_on);
+	void (*hw_block_power_up)(struct dc *dc,
+		struct pg_block_update *update_state);
+	void (*hw_block_power_down)(struct dc *dc,
+		struct pg_block_update *update_state);
 	void (*root_clock_control)(struct dc *dc,
 		struct pg_block_update *update_state, bool power_on);
 	void (*set_idle_state)(const struct dc *dc, bool allow_idle);
-- 
GitLab


From bcbd0787f8be31b17125d05cfaf71724774b9964 Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Wed, 6 Dec 2023 14:52:34 -0500
Subject: [PATCH 1094/2340] drm/amd/display: 3.2.264

Summary:

Bug fixes for:
  * DCN35 power gating
  * P-state change, & prefetch logic
  * ABM
  * DP 2.1

Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 61d08349a0d71..2c85f8ee682fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -49,7 +49,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.263"
+#define DC_VER "3.2.264"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
GitLab


From 571c2fa26aa654946447c282a09d40a56c7ff128 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Mon, 19 Jun 2023 15:04:24 -0500
Subject: [PATCH 1095/2340] drm/amd/display: Disable PSR-SU on Parade 0803 TCON
 again

When screen brightness is rapidly changed and PSR-SU is enabled the
display hangs on panels with this TCON even on the latest DCN 3.1.4
microcode (0x8002a81 at this time).

This was disabled previously as commit 072030b17830 ("drm/amd: Disable
PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert
"drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for
a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum
requirement for using PSR-SU on Phoenix")).

As hangs are still happening specifically with this TCON, disable PSR-SU
again for it until it can be root caused.

Cc: stable@vger.kernel.org
Cc: aaron.ma@canonical.com
Cc: binli@gnome.org
Cc: Marc Rossi <Marc.Rossi@amd.com>
Cc: Hamza Mahfooz <Hamza.Mahfooz@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2046131
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index a522a7c029110..1675314a3ff20 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link)
 				((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) ||
 				(dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07)))
 				isPSRSUSupported = false;
+			else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03)
+				isPSRSUSupported = false;
 			else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1)
 				isPSRSUSupported = true;
 		}
-- 
GitLab


From d5a348d96e4e2b924fa83e729f8791c03a4f8e24 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:45 -0100
Subject: [PATCH 1096/2340] drm/amd/display: add plane degamma TF
 driver-specific property

Allow userspace to tell the kernel driver the input space and,
therefore, uses correct predefined transfer function (TF) to go from
encoded values to linear values.

v2:
- rename TF enum prefix from DRM_ to AMDGPU_ (Harry)
- remove HLG TF

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |  5 +++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 +++++++++++++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 21 +++++++++++++++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 19 +++++++++++++++--
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index a63b01aa230d9..a1b7ae5c21492 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -355,6 +355,11 @@ struct amdgpu_mode_info {
 	 * size of degamma LUT as supported by the driver (read-only).
 	 */
 	struct drm_property *plane_degamma_lut_size_property;
+	/**
+	 * @plane_degamma_tf_property: Plane pre-defined transfer function to
+	 * to go from scanout/encoded values to linear values.
+	 */
+	struct drm_property *plane_degamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 446a3962d9c99..41bf0cf564338 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -724,6 +724,18 @@ struct amdgpu_dm_wb_connector {
 
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+enum amdgpu_transfer_function {
+	AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+	AMDGPU_TRANSFER_FUNCTION_SRGB,
+	AMDGPU_TRANSFER_FUNCTION_BT709,
+	AMDGPU_TRANSFER_FUNCTION_PQ,
+	AMDGPU_TRANSFER_FUNCTION_LINEAR,
+	AMDGPU_TRANSFER_FUNCTION_UNITY,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26,
+};
+
 struct dm_plane_state {
 	struct drm_plane_state base;
 	struct dc_plane_state *dc_state;
@@ -737,6 +749,13 @@ struct dm_plane_state {
 	 * The blob (if not NULL) is an array of &struct drm_color_lut.
 	 */
 	struct drm_property_blob *degamma_lut;
+	/**
+	 * @degamma_tf:
+	 *
+	 * Predefined transfer function to tell DC driver the input space to
+	 * linearize.
+	 */
+	enum amdgpu_transfer_function degamma_tf;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 421af919387f0..8e02d2b1249dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
+static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = {
+	{ AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
+	{ AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
+	{ AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
+	{ AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
+	{ AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
+	{ AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
+	{ AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
+	{ AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
+	{ AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
+};
+
 int
 amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 {
@@ -105,6 +117,15 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_degamma_lut_size_property = prop;
 
+	prop = drm_property_create_enum(adev_to_drm(adev),
+					DRM_MODE_PROP_ENUM,
+					"AMD_PLANE_DEGAMMA_TF",
+					amdgpu_transfer_function_enum_list,
+					ARRAY_SIZE(amdgpu_transfer_function_enum_list));
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_tf_property = prop;
+
 	return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index b1e50cfa9f83e..d5e12e05d161b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1337,8 +1337,11 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
 	amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
 	WARN_ON(amdgpu_state == NULL);
 
-	if (amdgpu_state)
-		__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+	if (!amdgpu_state)
+		return;
+
+	__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 }
 
 static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1361,6 +1364,8 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 		dm_plane_state->degamma_lut =
 			drm_property_blob_get(old_dm_plane_state->degamma_lut);
 
+	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+
 	return &dm_plane_state->base;
 }
 
@@ -1455,6 +1460,9 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 		drm_object_attach_property(&plane->base,
 					   mode_info.plane_degamma_lut_size_property,
 					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   dm->adev->mode_info.plane_degamma_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
 	}
 }
 
@@ -1477,6 +1485,11 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 							&replaced);
 		dm_plane_state->base.color_mgmt_changed |= replaced;
 		return ret;
+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
+		if (dm_plane_state->degamma_tf != val) {
+			dm_plane_state->degamma_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
 	} else {
 		drm_dbg_atomic(plane->dev,
 			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
@@ -1500,6 +1513,8 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 	if (property == adev->mode_info.plane_degamma_lut_property) {
 		*val = (dm_plane_state->degamma_lut) ?
 			dm_plane_state->degamma_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
+		*val = dm_plane_state->degamma_tf;
 	} else {
 		return -EINVAL;
 	}
-- 
GitLab


From 5a3b965b5810bd602d2c7d8ea79ffe8c6e81268d Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:46 -0100
Subject: [PATCH 1097/2340] drm/amd/display: explicitly define EOTF and inverse
 EOTF

Instead of relying on color block names to get the transfer function
intention regarding encoding pixel's luminance, define supported
Electro-Optical Transfer Functions (EOTFs) and inverse EOTFs, that
includes pure gamma or standardized transfer functions.

v3:
- squash linear and unity TFs to identity (Pekka)
- define the right TFs for BT.709 (Pekka and Harry)
- add comment about AMD TF coefficients

Suggested-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 27 +++++---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 67 ++++++++++++++-----
 2 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 41bf0cf564338..baa73a6f47a9a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -724,16 +724,27 @@ struct amdgpu_dm_wb_connector {
 
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD.
+ *
+ * It includes standardized transfer functions and pure power functions. The
+ * transfer function coefficients are available at modules/color/color_gamma.c
+ */
 enum amdgpu_transfer_function {
 	AMDGPU_TRANSFER_FUNCTION_DEFAULT,
-	AMDGPU_TRANSFER_FUNCTION_SRGB,
-	AMDGPU_TRANSFER_FUNCTION_BT709,
-	AMDGPU_TRANSFER_FUNCTION_PQ,
-	AMDGPU_TRANSFER_FUNCTION_LINEAR,
-	AMDGPU_TRANSFER_FUNCTION_UNITY,
-	AMDGPU_TRANSFER_FUNCTION_GAMMA22,
-	AMDGPU_TRANSFER_FUNCTION_GAMMA24,
-	AMDGPU_TRANSFER_FUNCTION_GAMMA26,
+	AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF,
+	AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_IDENTITY,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_BT709_OETF,
+	AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+        AMDGPU_TRANSFER_FUNCTION_COUNT
 };
 
 struct dm_plane_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 8e02d2b1249dc..a32abbd93a61c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,18 +85,57 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
-static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = {
-	{ AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" },
-	{ AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" },
-	{ AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" },
-	{ AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" },
-	{ AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" },
-	{ AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" },
-	{ AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" },
-	{ AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" },
-	{ AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" },
+static const char * const
+amdgpu_transfer_function_names[] = {
+	[AMDGPU_TRANSFER_FUNCTION_DEFAULT]		= "Default",
+	[AMDGPU_TRANSFER_FUNCTION_IDENTITY]		= "Identity",
+	[AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]		= "sRGB EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF]	= "BT.709 inv_OETF",
+	[AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]		= "PQ EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF]		= "Gamma 2.2 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF]		= "Gamma 2.4 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF]		= "Gamma 2.6 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]	= "sRGB inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_BT709_OETF]		= "BT.709 OETF",
+	[AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF]		= "PQ inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF]	= "Gamma 2.2 inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF]	= "Gamma 2.4 inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF]	= "Gamma 2.6 inv_EOTF",
 };
 
+static const u32 amdgpu_eotf =
+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+			  const char *name,
+			  u32 supported_tf)
+{
+	u32 transfer_functions = supported_tf |
+				 BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
+				 BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY);
+	struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
+	int i, len;
+
+	len = 0;
+	for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) {
+		if ((transfer_functions & BIT(i)) == 0)
+			continue;
+
+		enum_list[len].type = i;
+		enum_list[len].name = amdgpu_transfer_function_names[i];
+		len++;
+	}
+
+	return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
+					name, enum_list, len);
+}
+
 int
 amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 {
@@ -117,11 +156,9 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_degamma_lut_size_property = prop;
 
-	prop = drm_property_create_enum(adev_to_drm(adev),
-					DRM_MODE_PROP_ENUM,
-					"AMD_PLANE_DEGAMMA_TF",
-					amdgpu_transfer_function_enum_list,
-					ARRAY_SIZE(amdgpu_transfer_function_enum_list));
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_DEGAMMA_TF",
+					 amdgpu_eotf);
 	if (!prop)
 		return -ENOMEM;
 	adev->mode_info.plane_degamma_tf_property = prop;
-- 
GitLab


From e4cddd51bfab2a40529a4af35bd2c912b5a0c239 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:47 -0100
Subject: [PATCH 1098/2340] drm/amd/display: document AMDGPU pre-defined
 transfer functions

Brief documentation about pre-defined transfer function usage on AMD
display driver and standardized EOTFs and inverse EOTFs.

v3:
- Document BT709 OETF (Pekka)
- Fix description of sRGB and pure power funcs (Pekka)

v4:
- Add description of linear and non-linear forms (Harry)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Co-developed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a32abbd93a61c..49faaf606cfec 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,68 @@ void amdgpu_dm_init_color_mod(void)
 }
 
 #ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Identity: linear/identity relationship between pixel value and
+ *   luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
+ * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
+ * - BT.709: has a linear segment in the bottom part and then a power function
+ *   with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
+ *   ITU-R BT.709-6;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * The AMD color model is designed with an assumption that SDR (sRGB, BT.709,
+ * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ
+ * system. This has the implication that PQ EOTF (non-linear to linear) maps to
+ * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits.
+ *
+ * Non-linear and linear forms are described in the table below:
+ *
+ * ┌───────────┬─────────────────────┬──────────────────────┐
+ * │           │     Non-linear      │   Linear             │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │      sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │     BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │        PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0]         │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │  Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │
+ * └───────────┴─────────────────────┴──────────────────────┘
+ * * CCCS: Windows canonical composition color space
+ * ** Respectively
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ *   from the encoded value to an optical (linear) value. De-gamma functions
+ *   traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ *   from an optical/linear space (which might have been used for blending)
+ *   back to the encoded values. Gamma functions traditionally do this.
+ */
 static const char * const
 amdgpu_transfer_function_names[] = {
 	[AMDGPU_TRANSFER_FUNCTION_DEFAULT]		= "Default",
-- 
GitLab


From ec7b2a55463ea50401a8146793b61ee590255a45 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:48 -0100
Subject: [PATCH 1099/2340] drm/amd/display: add plane HDR multiplier
 driver-specific property

Multiplier to 'gain' the plane. When PQ is decoded using the fixed func
transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at
least) When sRGB is decoded, 1.0 -> 1.0.  Therefore, 1.0 multiplier = 80
nits for SDR content. So if you want, 203 nits for SDR content, pass in
(203.0 / 80.0).

v4:
- comment about the PQ TF need for L-to-NL (from Harry's review)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h        |  4 ++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h   | 17 +++++++++++++++++
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c |  6 ++++++
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 13 +++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index a1b7ae5c21492..218623e1b9aac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -360,6 +360,10 @@ struct amdgpu_mode_info {
 	 * to go from scanout/encoded values to linear values.
 	 */
 	struct drm_property *plane_degamma_tf_property;
+	/**
+	 * @plane_hdr_mult_property:
+	 */
+	struct drm_property *plane_hdr_mult_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index baa73a6f47a9a..d36d68d517a81 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -55,6 +55,9 @@
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)
+
 /*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
@@ -767,6 +770,20 @@ struct dm_plane_state {
 	 * linearize.
 	 */
 	enum amdgpu_transfer_function degamma_tf;
+	/**
+	 * @hdr_mult:
+	 *
+	 * Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
+	 * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+	 * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+	 * Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
+	 * want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
+	 * S31.32 sign-magnitude.
+	 *
+	 * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ
+	 * TF is needed for any subsequent linear-to-non-linear transforms.
+	 */
+	__u64 hdr_mult;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 49faaf606cfec..b5b34a9209e4d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -225,6 +225,12 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_degamma_tf_property = prop;
 
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_hdr_mult_property = prop;
+
 	return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index d5e12e05d161b..45a2c9b36630d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1342,6 +1342,7 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
 
 	__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
 	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
 }
 
 static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1365,6 +1366,7 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 			drm_property_blob_get(old_dm_plane_state->degamma_lut);
 
 	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
 
 	return &dm_plane_state->base;
 }
@@ -1464,6 +1466,10 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 					   dm->adev->mode_info.plane_degamma_tf_property,
 					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
 	}
+	/* HDR MULT is always available */
+	drm_object_attach_property(&plane->base,
+				   dm->adev->mode_info.plane_hdr_mult_property,
+				   AMDGPU_HDR_MULT_DEFAULT);
 }
 
 static int
@@ -1490,6 +1496,11 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 			dm_plane_state->degamma_tf = val;
 			dm_plane_state->base.color_mgmt_changed = 1;
 		}
+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
+		if (dm_plane_state->hdr_mult != val) {
+			dm_plane_state->hdr_mult = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
 	} else {
 		drm_dbg_atomic(plane->dev,
 			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
@@ -1515,6 +1526,8 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 			dm_plane_state->degamma_lut->base.id : 0;
 	} else if (property == adev->mode_info.plane_degamma_tf_property) {
 		*val = dm_plane_state->degamma_tf;
+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
+		*val = dm_plane_state->hdr_mult;
 	} else {
 		return -EINVAL;
 	}
-- 
GitLab


From a9210714d23190b44eed32f8bcadbe3b18d51a1d Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 8 Dec 2023 01:58:24 -0800
Subject: [PATCH 1100/2340] drm/amd/display: Fix memory leak in
 dm_set_writeback()

'wb_info' needs to be freed on error paths or it would leak the memory.

Smatch pointed this out.

Fixes: c81e13b929df ("drm/amd/display: Hande writeback request from userspace")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e819d05755b3f..6efa0c1bba9d2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8879,12 +8879,14 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm,
 	acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
 	if (!acrtc) {
 		DRM_ERROR("no amdgpu_crtc found\n");
+		kfree(wb_info);
 		return;
 	}
 
 	afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
 	if (!afb) {
 		DRM_ERROR("No amdgpu_framebuffer found\n");
+		kfree(wb_info);
 		return;
 	}
 
-- 
GitLab


From 4e95669ecb03d797355bc23871c5c43b9475d3dc Mon Sep 17 00:00:00 2001
From: Vignesh Chander <Vignesh.Chander@amd.com>
Date: Fri, 8 Dec 2023 12:00:26 -0600
Subject: [PATCH 1101/2340] drm/amdgpu: xgmi_fill_topology_info

1. Use the mirrored topology info to fill links for VF.
The new solution is required to simplify and optimize host driver logic.
Only use the new solution for VFs that support full duplex and
extended_peer_link_info otherwise the info would be incomplete.

2. avoid calling extended_link_info on VF as its not supported

Signed-off-by: Vignesh Chander <Vignesh.Chander@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 58 ++++++++++++++++++++----
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a21045d018f2b..1bf975b8d083e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
 			 get_extended_data) ||
 			amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
 				IP_VERSION(13, 0, 6);
-		bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps &
-						EXTEND_PEER_LINK_INFO_CMD_FLAG;
+		bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
+				psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
 
 		/* popluate the shared output buffer rather than the cmd input buffer
 		 * with node_ids as the input for GET_PEER_LINKS command execution.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 44d8c1a11e1b3..9a95b9f226b85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -823,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
 	return 0;
 }
 
+static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
+	struct amdgpu_device *peer_adev)
+{
+	struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info;
+	struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info;
+
+	for (int i = 0; i < peer_info->num_nodes; i++) {
+		if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			for (int j = 0; j < top_info->num_nodes; j++) {
+				if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) {
+					peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops;
+					peer_info->nodes[i].is_sharing_enabled =
+							top_info->nodes[j].is_sharing_enabled;
+					peer_info->nodes[i].num_links =
+							top_info->nodes[j].num_links;
+					return;
+				}
+			}
+		}
+	}
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
 	struct psp_xgmi_topology_info *top_info;
@@ -897,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 				goto exit_unlock;
 		}
 
-		/* get latest topology info for each device from psp */
-		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-			ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
-					&tmp_adev->psp.xgmi_context.top_info, false);
+		if (amdgpu_sriov_vf(adev) &&
+			adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+			/* only get topology for VF being init if it can support full duplex */
+			ret = psp_xgmi_get_topology_info(&adev->psp, count,
+						&adev->psp.xgmi_context.top_info, false);
 			if (ret) {
-				dev_err(tmp_adev->dev,
+				dev_err(adev->dev,
 					"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
-					tmp_adev->gmc.xgmi.node_id,
-					tmp_adev->gmc.xgmi.hive_id, ret);
-				/* To do : continue with some node failed or disable the whole hive */
+					adev->gmc.xgmi.node_id,
+					adev->gmc.xgmi.hive_id, ret);
+				/* To do: continue with some node failed or disable the whole hive*/
 				goto exit_unlock;
 			}
+
+			/* fill the topology info for peers instead of getting from PSP */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				amdgpu_xgmi_fill_topology_info(adev, tmp_adev);
+			}
+		} else {
+			/* get latest topology info for each device from psp */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+					&tmp_adev->psp.xgmi_context.top_info, false);
+				if (ret) {
+					dev_err(tmp_adev->dev,
+						"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+						tmp_adev->gmc.xgmi.node_id,
+						tmp_adev->gmc.xgmi.hive_id, ret);
+					/* To do : continue with some node failed or disable the whole hive */
+					goto exit_unlock;
+				}
+			}
 		}
 
 		/* get topology again for hives that support extended data */
-- 
GitLab


From 1819200166ce511ac298dc96b9b17eb655a9edc4 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 24 Aug 2023 14:41:30 -0400
Subject: [PATCH 1102/2340] drm/amdkfd: Export DMABufs from KFD using GEM
 handles

Create GEM handles for exporting DMABufs using GEM-Prime APIs. The GEM
handles are created in a drm_client_dev context to avoid exposing them
in user mode contexts through a DMABuf import.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 11 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  5 +++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 33 +++++++++++++++----
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 +--
 4 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 2d22f7d45512d..067690ba7bffd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -142,6 +142,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
 	int last_valid_bit;
+	int ret;
 
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
 
@@ -160,6 +161,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.enable_mes = adev->enable_mes,
 		};
 
+		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL);
+		if (ret) {
+			dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
+			return;
+		}
+
 		/* this is going to have a few of the MSBs set that we need to
 		 * clear
 		 */
@@ -198,6 +205,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
 							&gpu_resources);
+		if (adev->kfd.init_complete)
+			drm_client_register(&adev->kfd.client);
+		else
+			drm_client_release(&adev->kfd.client);
 
 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 16794c2eea35d..02973f5c8cafe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -33,6 +33,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memremap.h>
 #include <kgd_kfd_interface.h>
+#include <drm/drm_client.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_xcp.h"
@@ -83,6 +84,7 @@ struct kgd_mem {
 
 	struct amdgpu_sync sync;
 
+	uint32_t gem_handle;
 	bool aql_queue;
 	bool is_imported;
 };
@@ -105,6 +107,9 @@ struct amdgpu_kfd_dev {
 
 	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
 	struct dev_pagemap pgmap;
+
+	/* Client for KFD BO GEM handle allocations */
+	struct drm_client_dev client;
 };
 
 enum kgd_engine_type {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 73288f9ccaf85..ae7dfaf591596 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
+#include <linux/fdtable.h>
 #include <drm/ttm/ttm_tt.h>
 
 #include <drm/drm_exec.h>
@@ -806,13 +807,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
 {
 	if (!mem->dmabuf) {
-		struct dma_buf *ret = amdgpu_gem_prime_export(
-			&mem->bo->tbo.base,
+		struct amdgpu_device *bo_adev;
+		struct dma_buf *dmabuf;
+		int r, fd;
+
+		bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+		r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file,
+					       mem->gem_handle,
 			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
-				DRM_RDWR : 0);
-		if (IS_ERR(ret))
-			return PTR_ERR(ret);
-		mem->dmabuf = ret;
+					       DRM_RDWR : 0, &fd);
+		if (r)
+			return r;
+		dmabuf = dma_buf_get(fd);
+		close_fd(fd);
+		if (WARN_ON_ONCE(IS_ERR(dmabuf)))
+			return PTR_ERR(dmabuf);
+		mem->dmabuf = dmabuf;
 	}
 
 	return 0;
@@ -1778,6 +1788,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		pr_debug("Failed to allow vma node access. ret %d\n", ret);
 		goto err_node_allow;
 	}
+	ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+	if (ret)
+		goto err_gem_handle_create;
 	bo = gem_to_amdgpu_bo(gobj);
 	if (bo_type == ttm_bo_type_sg) {
 		bo->tbo.sg = sg;
@@ -1829,6 +1842,8 @@ allocate_init_user_pages_failed:
 err_pin_bo:
 err_validate_bo:
 	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+	drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
 	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
 	/* Don't unreserve system mem limit twice */
@@ -1941,8 +1956,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
 	/* Free the BO*/
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
-	if (mem->dmabuf)
+	if (!mem->is_imported)
+		drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+	if (mem->dmabuf) {
 		dma_buf_put(mem->dmabuf);
+		mem->dmabuf = NULL;
+	}
 	mutex_destroy(&mem->lock);
 
 	/* If this releases the last reference, it will end up calling
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f6d4748c1980a..b4cb907f80c60 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1855,8 +1855,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
 	return num_of_bos;
 }
 
-static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
-				      u32 *shared_fd)
+static int criu_get_prime_handle(struct kgd_mem *mem,
+				 int flags, u32 *shared_fd)
 {
 	struct dma_buf *dmabuf;
 	int ret;
-- 
GitLab


From 0188006d7c797a37c04471a2b4a34a7dfb21f363 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 19 Sep 2023 14:57:02 -0400
Subject: [PATCH 1103/2340] drm/amdkfd: Import DMABufs for interop through DRM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use drm_gem_prime_fd_to_handle to import DMABufs for interop. This
ensures that a GEM handle is created on import and that obj->dma_buf
will be set and remain set as long as the object is imported into KFD.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Reviewed-by: Xiaogang.Chen <Xiaogang.Chen@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  9 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 64 +++++++++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 15 ++---
 3 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 02973f5c8cafe..cf6ed5fce291f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -314,11 +314,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 					    struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 					      struct kfd_vm_fault_info *info);
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
-				      struct dma_buf *dmabuf,
-				      uint64_t va, void *drm_priv,
-				      struct kgd_mem **mem, uint64_t *size,
-				      uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset);
 int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
 				      struct dma_buf **dmabuf);
 void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ae7dfaf591596..48697b789342e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1956,8 +1956,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
 	/* Free the BO*/
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
-	if (!mem->is_imported)
-		drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+	drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
 	if (mem->dmabuf) {
 		dma_buf_put(mem->dmabuf);
 		mem->dmabuf = NULL;
@@ -2313,34 +2312,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 	return 0;
 }
 
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
-				      struct dma_buf *dma_buf,
-				      uint64_t va, void *drm_priv,
-				      struct kgd_mem **mem, uint64_t *size,
-				      uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+			     struct dma_buf *dma_buf,
+			     struct drm_gem_object *obj,
+			     uint64_t va, void *drm_priv,
+			     struct kgd_mem **mem, uint64_t *size,
+			     uint64_t *mmap_offset)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct drm_gem_object *obj;
 	struct amdgpu_bo *bo;
 	int ret;
 
-	obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
 	bo = gem_to_amdgpu_bo(obj);
 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT))) {
+				    AMDGPU_GEM_DOMAIN_GTT)))
 		/* Only VRAM and GTT BOs are supported */
-		ret = -EINVAL;
-		goto err_put_obj;
-	}
+		return -EINVAL;
 
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem) {
-		ret = -ENOMEM;
-		goto err_put_obj;
-	}
+	if (!*mem)
+		return -ENOMEM;
 
 	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
 	if (ret)
@@ -2390,8 +2381,41 @@ err_remove_mem:
 	drm_vma_node_revoke(&obj->vma_node, drm_priv);
 err_free_mem:
 	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset)
+{
+	struct drm_gem_object *obj;
+	uint32_t handle;
+	int ret;
+
+	ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+					 &handle);
+	if (ret)
+		return ret;
+	obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+	if (!obj) {
+		ret = -EINVAL;
+		goto err_release_handle;
+	}
+
+	ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+				mmap_offset);
+	if (ret)
+		goto err_put_obj;
+
+	(*mem)->gem_handle = handle;
+
+	return 0;
+
 err_put_obj:
 	drm_gem_object_put(obj);
+err_release_handle:
+	drm_gem_handle_delete(adev->kfd.client.file, handle);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b4cb907f80c60..ce4c52ec34d80 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1564,16 +1564,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 {
 	struct kfd_ioctl_import_dmabuf_args *args = data;
 	struct kfd_process_device *pdd;
-	struct dma_buf *dmabuf;
 	int idr_handle;
 	uint64_t size;
 	void *mem;
 	int r;
 
-	dmabuf = dma_buf_get(args->dmabuf_fd);
-	if (IS_ERR(dmabuf))
-		return PTR_ERR(dmabuf);
-
 	mutex_lock(&p->mutex);
 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
 	if (!pdd) {
@@ -1587,10 +1582,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 		goto err_unlock;
 	}
 
-	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
-					      args->va_addr, pdd->drm_priv,
-					      (struct kgd_mem **)&mem, &size,
-					      NULL);
+	r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,
+						 args->va_addr, pdd->drm_priv,
+						 (struct kgd_mem **)&mem, &size,
+						 NULL);
 	if (r)
 		goto err_unlock;
 
@@ -1601,7 +1596,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 	}
 
 	mutex_unlock(&p->mutex);
-	dma_buf_put(dmabuf);
 
 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
 
@@ -1612,7 +1606,6 @@ err_free:
 					       pdd->drm_priv, NULL);
 err_unlock:
 	mutex_unlock(&p->mutex);
-	dma_buf_put(dmabuf);
 	return r;
 }
 
-- 
GitLab


From 296b29ce8acb5dbb3ca1937f1b537b3f6be0460a Mon Sep 17 00:00:00 2001
From: Evan Quan <quanliangl@hotmail.com>
Date: Mon, 11 Dec 2023 18:06:26 +0800
Subject: [PATCH 1104/2340] drm/amd/pm: update driver_if and ppsmc headers for
 coming wbrf feature

Add those data structures to support Wifi RFI mitigation feature.

Signed-off-by: Evan Quan <quanliangl@hotmail.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h         | 11 +++++++++++
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h    |  3 ++-
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h    |  3 ++-
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h  |  5 ++---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h  |  3 ++-
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index c2265e027ca8b..ab36402f85b1b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1501,6 +1501,17 @@ enum smu_baco_seq {
 			 __dst_size);					   \
 })
 
+typedef struct {
+	uint16_t     LowFreq;
+	uint16_t     HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+	uint32_t		WifiBandEntryNum;
+	WifiOneBand_t	WifiBandEntry[11];
+	uint32_t		MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
 			uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 9dd1ed5b89406..b114d14fc053e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -1615,7 +1615,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS            9
 #define TABLE_DRIVER_INFO             10
 #define TABLE_ECCINFO                 11
-#define TABLE_COUNT                   12
+#define TABLE_WIFIBAND                12
+#define TABLE_COUNT                   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER                   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 62b7c0daff687..8b1496f8ce584 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -1605,7 +1605,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS            9
 #define TABLE_DRIVER_INFO             10
 #define TABLE_ECCINFO                 11
-#define TABLE_COUNT                   12
+#define TABLE_WIFIBAND                12
+#define TABLE_COUNT                   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER                   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index e2ee855c77488..e862d323caab7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -138,10 +138,9 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-
 #define PPSMC_MSG_DALNotPresent                  0x4E
-
-#define PPSMC_Message_Count                      0x4F
+#define PPSMC_MSG_EnableUCLKShadow               0x51
+#define PPSMC_Message_Count                      0x52
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
index 6aaefca9b595c..a6bf9cdd130ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
@@ -134,6 +134,7 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-#define PPSMC_Message_Count                      0x4D
+#define PPSMC_MSG_EnableUCLKShadow               0x51
+#define PPSMC_Message_Count                      0x52
 
 #endif
-- 
GitLab


From b8b39de646274366d17a3614fdaf65fa0716ab32 Mon Sep 17 00:00:00 2001
From: Evan Quan <quanliangl@hotmail.com>
Date: Mon, 11 Dec 2023 18:06:27 +0800
Subject: [PATCH 1105/2340] drm/amd/pm: setup the framework to support Wifi RFI
 mitigation feature

With WBRF feature supported, as a driver responding to the frequencies,
amdgpu driver is able to do shadow pstate switching to mitigate possible
interference(between its (G-)DDR memory clocks and local radio module
frequency bands used by Wifi 6/6e/7).

--
v1->v2:
  - update the prompt for feature support(Lijo)
v8->v9:
  - update parameter document for smu_wbrf_event_handler(Simon)
v9->v10:
v10->v11:
 - correct the logics for wbrf range sorting(Lijo)
v13:
 - Fix the format issue (IIpo Jarvinen)

Signed-off-by: Evan Quan <quanliangl@hotmail.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  17 ++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 191 ++++++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 ++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
 5 files changed, 235 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0af8ac81facd2..82d2940692766 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -252,6 +252,8 @@ extern int amdgpu_seamless;
 extern int amdgpu_user_partt_mode;
 extern int amdgpu_agp;
 
+extern int amdgpu_wbrf;
+
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8b33b130ea36e..a194db09cde6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -208,6 +208,7 @@ int amdgpu_umsch_mm;
 int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -971,6 +972,22 @@ module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
 MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
 module_param_named(agp, amdgpu_agp, int, 0444);
 
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index f464610a959f6..a3c54a053bd5c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1322,6 +1322,170 @@ static int smu_get_thermal_temperature_range(struct smu_context *smu)
 	return ret;
 }
 
+/**
+ * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges
+ *
+ * @smu: smu_context pointer
+ *
+ * Retrieve the wbrf exclusion ranges and send them to PMFW for proper handling.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu)
+{
+	struct wbrf_ranges_in_out wbrf_exclusion = {0};
+	struct freq_band_range *wifi_bands = wbrf_exclusion.band_list;
+	struct amdgpu_device *adev = smu->adev;
+	uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES;
+	uint64_t start, end;
+	int ret, i, j;
+
+	ret = amd_wbrf_retrieve_freq_band(adev->dev, &wbrf_exclusion);
+	if (ret) {
+		dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n");
+		return ret;
+	}
+
+	/*
+	 * The exclusion ranges array we got might be filled with holes and duplicate
+	 * entries. For example:
+	 * {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 6189), (0, 0)...}
+	 * We need to do some sortups to eliminate those holes and duplicate entries.
+	 * Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 0)...}
+	 */
+	for (i = 0; i < num_of_wbrf_ranges; i++) {
+		start = wifi_bands[i].start;
+		end = wifi_bands[i].end;
+
+		/* get the last valid entry to fill the intermediate hole */
+		if (!start && !end) {
+			for (j = num_of_wbrf_ranges - 1; j > i; j--)
+				if (wifi_bands[j].start && wifi_bands[j].end)
+					break;
+
+			/* no valid entry left */
+			if (j <= i)
+				break;
+
+			start = wifi_bands[i].start = wifi_bands[j].start;
+			end = wifi_bands[i].end = wifi_bands[j].end;
+			wifi_bands[j].start = 0;
+			wifi_bands[j].end = 0;
+			num_of_wbrf_ranges = j;
+		}
+
+		/* eliminate duplicate entries */
+		for (j = i + 1; j < num_of_wbrf_ranges; j++) {
+			if ((wifi_bands[j].start == start) && (wifi_bands[j].end == end)) {
+				wifi_bands[j].start = 0;
+				wifi_bands[j].end = 0;
+			}
+		}
+	}
+
+	/* Send the sorted wifi_bands to PMFW */
+	ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands);
+	/* Try to set the wifi_bands again */
+	if (unlikely(ret == -EBUSY)) {
+		mdelay(5);
+		ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands);
+	}
+
+	return ret;
+}
+
+/**
+ * smu_wbrf_event_handler - handle notify events
+ *
+ * @nb: notifier block
+ * @action: event type
+ * @_arg: event data
+ *
+ * Calls relevant amdgpu function in response to wbrf event
+ * notification from kernel.
+ */
+static int smu_wbrf_event_handler(struct notifier_block *nb,
+				  unsigned long action, void *_arg)
+{
+	struct smu_context *smu = container_of(nb, struct smu_context, wbrf_notifier);
+
+	switch (action) {
+	case WBRF_CHANGED:
+		smu_wbrf_handle_exclusion_ranges(smu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	};
+
+	return NOTIFY_OK;
+}
+
+/**
+ * smu_wbrf_support_check - check wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Verifies the ACPI interface whether wbrf is supported.
+ */
+static void smu_wbrf_support_check(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	smu->wbrf_supported = smu_is_asic_wbrf_supported(smu) && amdgpu_wbrf &&
+							acpi_amd_wbrf_supported_consumer(adev->dev);
+
+	if (smu->wbrf_supported)
+		dev_info(adev->dev, "RF interference mitigation is supported\n");
+}
+
+/**
+ * smu_wbrf_init - init driver wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Verifies the AMD ACPI interfaces and registers with the wbrf
+ * notifier chain if wbrf feature is supported.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_init(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
+
+	if (!smu->wbrf_supported)
+		return 0;
+
+	smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler;
+	ret = amd_wbrf_register_notifier(&smu->wbrf_notifier);
+	if (ret)
+		return ret;
+
+	/*
+	 * Some wifiband exclusion ranges may be already there
+	 * before our driver loaded. To make sure our driver
+	 * is awared of those exclusion ranges.
+	 */
+	ret = smu_wbrf_handle_exclusion_ranges(smu);
+	if (ret)
+		dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n");
+
+	return ret;
+}
+
+/**
+ * smu_wbrf_fini - tear down driver wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Unregisters with the wbrf notifier chain.
+ */
+static void smu_wbrf_fini(struct smu_context *smu)
+{
+	if (!smu->wbrf_supported)
+		return;
+
+	amd_wbrf_unregister_notifier(&smu->wbrf_notifier);
+}
+
 static int smu_smc_hw_setup(struct smu_context *smu)
 {
 	struct smu_feature *feature = &smu->smu_feature;
@@ -1414,6 +1578,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	if (ret)
 		return ret;
 
+	/* Enable UclkShadow on wbrf supported */
+	if (smu->wbrf_supported) {
+		ret = smu_enable_uclk_shadow(smu, true);
+		if (ret) {
+			dev_err(adev->dev, "Failed to enable UclkShadow feature to support wbrf!\n");
+			return ret;
+		}
+	}
+
 	/*
 	 * With SCPM enabled, these actions(and relevant messages) are
 	 * not needed and permitted.
@@ -1512,6 +1685,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	 */
 	ret = smu_set_min_dcef_deep_sleep(smu,
 					  smu->smu_table.boot_values.dcefclk / 100);
+	if (ret) {
+		dev_err(adev->dev, "Error setting min deepsleep dcefclk\n");
+		return ret;
+	}
+
+	/* Init wbrf support. Properly setup the notifier */
+	ret = smu_wbrf_init(smu);
+	if (ret)
+		dev_err(adev->dev, "Error during wbrf init call\n");
 
 	return ret;
 }
@@ -1567,6 +1749,13 @@ static int smu_hw_init(void *handle)
 		return ret;
 	}
 
+	/*
+	 * Check whether wbrf is supported. This needs to be done
+	 * before SMU setup starts since part of SMU configuration
+	 * relies on this.
+	 */
+	smu_wbrf_support_check(smu);
+
 	if (smu->is_apu) {
 		ret = smu_set_gfx_imu_enable(smu);
 		if (ret)
@@ -1733,6 +1922,8 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	smu_wbrf_fini(smu);
+
 	cancel_work_sync(&smu->throttling_logging_work);
 	cancel_work_sync(&smu->interrupt_work);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index ab36402f85b1b..f2a89a783c589 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -22,6 +22,8 @@
 #ifndef __AMDGPU_SMU_H__
 #define __AMDGPU_SMU_H__
 
+#include <linux/acpi_amd_wbrf.h>
+
 #include "amdgpu.h"
 #include "kgd_pp_interface.h"
 #include "dm_pp_interface.h"
@@ -570,6 +572,10 @@ struct smu_context {
 	struct delayed_work		swctf_delayed_work;
 
 	enum pp_xgmi_plpd_mode plpd_mode;
+
+	/* data structures for wbrf feature support */
+	bool				wbrf_supported;
+	struct notifier_block		wbrf_notifier;
 };
 
 struct i2c_adapter;
@@ -1375,6 +1381,22 @@ struct pptable_funcs {
 	 * @notify_rlc_state: Notify RLC power state to SMU.
 	 */
 	int (*notify_rlc_state)(struct smu_context *smu, bool en);
+
+	/**
+	 * @is_asic_wbrf_supported: check whether PMFW supports the wbrf feature
+	 */
+	bool (*is_asic_wbrf_supported)(struct smu_context *smu);
+
+	/**
+	 * @enable_uclk_shadow: Enable the uclk shadow feature on wbrf supported
+	 */
+	int (*enable_uclk_shadow)(struct smu_context *smu, bool enable);
+
+	/**
+	 * @set_wbrf_exclusion_ranges: notify SMU the wifi bands occupied
+	 */
+	int (*set_wbrf_exclusion_ranges)(struct smu_context *smu,
+					struct freq_band_range *exclusion_ranges);
 };
 
 typedef enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 64766ac69c53b..6f4d212607d7f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -98,6 +98,9 @@
 #define smu_set_config_table(smu, config_table)				smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table)
 #define smu_init_pptable_microcode(smu)					smu_ppt_funcs(init_pptable_microcode, 0, smu)
 #define smu_notify_rlc_state(smu, en)					smu_ppt_funcs(notify_rlc_state, 0, smu, en)
+#define smu_is_asic_wbrf_supported(smu)			smu_ppt_funcs(is_asic_wbrf_supported, false, smu)
+#define smu_enable_uclk_shadow(smu, enable)		smu_ppt_funcs(enable_uclk_shadow, 0, smu, enable)
+#define smu_set_wbrf_exclusion_ranges(smu, freq_band_range)		smu_ppt_funcs(set_wbrf_exclusion_ranges, -EOPNOTSUPP, smu, freq_band_range)
 
 #endif
 #endif
-- 
GitLab


From 71f69557cb12a4674a05b4c5fb730880f13366b1 Mon Sep 17 00:00:00 2001
From: Evan Quan <quanliangl@hotmail.com>
Date: Mon, 11 Dec 2023 18:06:28 +0800
Subject: [PATCH 1106/2340] drm/amd/pm: add flood detection for wbrf events

To protect PMFW from being overloaded.

Signed-off-by: Evan Quan <quanliangl@hotmail.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 29 +++++++++++++++----
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index a3c54a053bd5c..d409857fd622e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1410,7 +1410,8 @@ static int smu_wbrf_event_handler(struct notifier_block *nb,
 
 	switch (action) {
 	case WBRF_CHANGED:
-		smu_wbrf_handle_exclusion_ranges(smu);
+		schedule_delayed_work(&smu->wbrf_delayed_work,
+				      msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
 		break;
 	default:
 		return NOTIFY_DONE;
@@ -1419,6 +1420,20 @@ static int smu_wbrf_event_handler(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/**
+ * smu_wbrf_delayed_work_handler - callback on delayed work timer expired
+ *
+ * @work: struct work_struct pointer
+ *
+ * Flood is over and driver will consume the latest exclusion ranges.
+ */
+static void smu_wbrf_delayed_work_handler(struct work_struct *work)
+{
+	struct smu_context *smu = container_of(work, struct smu_context, wbrf_delayed_work.work);
+
+	smu_wbrf_handle_exclusion_ranges(smu);
+}
+
 /**
  * smu_wbrf_support_check - check wbrf support
  *
@@ -1448,12 +1463,13 @@ static void smu_wbrf_support_check(struct smu_context *smu)
  */
 static int smu_wbrf_init(struct smu_context *smu)
 {
-	struct amdgpu_device *adev = smu->adev;
 	int ret;
 
 	if (!smu->wbrf_supported)
 		return 0;
 
+	INIT_DELAYED_WORK(&smu->wbrf_delayed_work, smu_wbrf_delayed_work_handler);
+
 	smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler;
 	ret = amd_wbrf_register_notifier(&smu->wbrf_notifier);
 	if (ret)
@@ -1464,11 +1480,10 @@ static int smu_wbrf_init(struct smu_context *smu)
 	 * before our driver loaded. To make sure our driver
 	 * is awared of those exclusion ranges.
 	 */
-	ret = smu_wbrf_handle_exclusion_ranges(smu);
-	if (ret)
-		dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n");
+	schedule_delayed_work(&smu->wbrf_delayed_work,
+			      msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -1484,6 +1499,8 @@ static void smu_wbrf_fini(struct smu_context *smu)
 		return;
 
 	amd_wbrf_unregister_notifier(&smu->wbrf_notifier);
+
+	cancel_delayed_work_sync(&smu->wbrf_delayed_work);
 }
 
 static int smu_smc_hw_setup(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index f2a89a783c589..f97178a602bc3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -473,6 +473,12 @@ struct stb_context {
 
 #define WORKLOAD_POLICY_MAX 7
 
+/*
+ * Configure wbrf event handling pace as there can be only one
+ * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms.
+ */
+#define SMU_WBRF_EVENT_HANDLING_PACE	10
+
 struct smu_context {
 	struct amdgpu_device            *adev;
 	struct amdgpu_irq_src		irq_source;
@@ -576,6 +582,7 @@ struct smu_context {
 	/* data structures for wbrf feature support */
 	bool				wbrf_supported;
 	struct notifier_block		wbrf_notifier;
+	struct delayed_work		wbrf_delayed_work;
 };
 
 struct i2c_adapter;
-- 
GitLab


From 18df969b44a0bdc1f24f6ca6b10595dad6f57398 Mon Sep 17 00:00:00 2001
From: Ma Jun <Jun.Ma2@amd.com>
Date: Mon, 11 Dec 2023 18:06:29 +0800
Subject: [PATCH 1107/2340] drm/amd/pm: enable Wifi RFI mitigation feature
 support for SMU13.0.0

Fulfill the SMU13.0.0 support for Wifi RFI mitigation feature.

--
v10->v11:
 - downgrade the prompt level on message failure(Lijo)
v13:
 - Fix the format issue (IIpo Jarvinen)
 - Move function smu_v13_0_0_set_wbrf_exclusion_ranges to
smu_v13_0.c as a generic code for later use (IIpo Jarvinen)

Co-developed-by: Evan Quan <quanliangl@hotmail.com>
Signed-off-by: Evan Quan <quanliangl@hotmail.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  2 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  4 ++
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 48 +++++++++++++++++++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 22 +++++++++
 5 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index f97178a602bc3..2aa4fea873147 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -23,6 +23,7 @@
 #define __AMDGPU_SMU_H__
 
 #include <linux/acpi_amd_wbrf.h>
+#include <linux/units.h>
 
 #include "amdgpu.h"
 #include "kgd_pp_interface.h"
@@ -320,6 +321,7 @@ enum smu_table_id {
 	SMU_TABLE_PACE,
 	SMU_TABLE_ECCINFO,
 	SMU_TABLE_COMBO_PPTABLE,
+	SMU_TABLE_WIFIBAND,
 	SMU_TABLE_COUNT,
 };
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index ea8ea99b74368..953a767613b11 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -260,7 +260,8 @@
 	__SMU_DUMMY_MAP(PowerDownUmsch),	\
 	__SMU_DUMMY_MAP(SetSoftMaxVpe),	\
 	__SMU_DUMMY_MAP(SetSoftMinVpe), \
-	__SMU_DUMMY_MAP(GetMetricsVersion),
+	__SMU_DUMMY_MAP(GetMetricsVersion), \
+	__SMU_DUMMY_MAP(EnableUCLKShadow),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 5e7b8f29fecce..fbd57fa1a0049 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -294,5 +294,9 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 
 int smu_v13_0_disable_pmfw_state(struct smu_context *smu);
 
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable);
+
+int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
+						 struct freq_band_range *exclusion_ranges);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 2877e5da5b5ad..771a3d457c335 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2507,3 +2507,51 @@ int smu_v13_0_disable_pmfw_state(struct smu_context *smu)
 
 	return ret == 0 ? 0 : -EINVAL;
 }
+
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable)
+{
+	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableUCLKShadow, enable, NULL);
+}
+
+int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
+						 struct freq_band_range *exclusion_ranges)
+{
+	WifiBandEntryTable_t wifi_bands;
+	int valid_entries = 0;
+	int ret, i;
+
+	memset(&wifi_bands, 0, sizeof(wifi_bands));
+	for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) {
+		if (!exclusion_ranges[i].start && !exclusion_ranges[i].end)
+			break;
+
+		/* PMFW expects the inputs to be in Mhz unit */
+		wifi_bands.WifiBandEntry[valid_entries].LowFreq =
+			DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, HZ_PER_MHZ);
+		wifi_bands.WifiBandEntry[valid_entries++].HighFreq =
+			DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_PER_MHZ);
+	}
+	wifi_bands.WifiBandEntryNum = valid_entries;
+
+	/*
+	 * Per confirm with PMFW team, WifiBandEntryNum = 0
+	 * is a valid setting.
+	 *
+	 * Considering the scenarios below:
+	 * - At first the wifi device adds an exclusion range e.g. (2400,2500) to
+	 *   BIOS and our driver gets notified. We will set WifiBandEntryNum = 1
+	 *   and pass the WifiBandEntry (2400, 2500) to PMFW.
+	 *
+	 * - Later the wifi device removes the wifiband list added above and
+	 *   our driver gets notified again. At this time, driver will set
+	 *   WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW.
+	 *
+	 * - PMFW may still need to do some uclk shadow update(e.g. switching
+	 *   from shadow clock back to primary clock) on receiving this.
+	 */
+	ret = smu_cmn_update_table(smu, SMU_TABLE_WIFIBAND, 0, &wifi_bands, true);
+	if (ret)
+		dev_warn(smu->adev->dev, "Failed to set wifiband!");
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 2d25a9e47651f..a24aa886c6369 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -169,6 +169,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(AllowIHHostInterrupt,		PPSMC_MSG_AllowIHHostInterrupt,       0),
 	MSG_MAP(ReenableAcDcInterrupt,		PPSMC_MSG_ReenableAcDcInterrupt,       0),
 	MSG_MAP(DALNotPresent,		PPSMC_MSG_DALNotPresent,       0),
+	MSG_MAP(EnableUCLKShadow,		PPSMC_MSG_EnableUCLKShadow,            0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -253,6 +254,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(I2C_COMMANDS),
 	TAB_MAP(ECCINFO),
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -498,6 +500,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+		       sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
 
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
@@ -2938,6 +2943,20 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
 	return ret;
 }
 
+static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	switch (adev->ip_versions[MP1_HWIP][0]) {
+	case IP_VERSION(13, 0, 0):
+		return smu->smc_fw_version >= 0x004e6300;
+	case IP_VERSION(13, 0, 10):
+		return smu->smc_fw_version >= 0x00503300;
+	default:
+		return false;
+	}
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -3016,6 +3035,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.gpo_control = smu_v13_0_gpo_control,
 	.get_ecc_info = smu_v13_0_0_get_ecc_info,
 	.notify_display_change = smu_v13_0_notify_display_change,
+	.is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check,
+	.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+	.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
-- 
GitLab


From cca850267d33f1153e16e07dc7c32ce5bc3df1fe Mon Sep 17 00:00:00 2001
From: Evan Quan <quanliangl@hotmail.com>
Date: Mon, 11 Dec 2023 18:06:30 +0800
Subject: [PATCH 1108/2340] drm/amd/pm: enable Wifi RFI mitigation feature
 support for SMU13.0.7

Fulfill the SMU13.0.7 support for Wifi RFI mitigation feature.

--
v10->v11:
  - downgrade the prompt level on message failure(Lijo)
v13:
 - Fix the format issue (IIpo Jarvinen)
 - Remove duplicate code (IIpo Jarvinen)

Signed-off-by: Evan Quan <quanliangl@hotmail.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 2909ec06b1cb9..59606a19e3d2b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -140,6 +140,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(AllowGpo,			PPSMC_MSG_SetGpoAllow,           0),
 	MSG_MAP(GetPptLimit,			PPSMC_MSG_GetPptLimit,                 0),
 	MSG_MAP(NotifyPowerSource,		PPSMC_MSG_NotifyPowerSource,           0),
+	MSG_MAP(EnableUCLKShadow,		PPSMC_MSG_EnableUCLKShadow,            0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -222,6 +223,7 @@ static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(ACTIVITY_MONITOR_COEFF),
 	[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -512,6 +514,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu)
 		       AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+		       sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
 
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
@@ -2535,6 +2540,11 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context *smu,
 					       NULL);
 }
 
+static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu)
+{
+	return smu->smc_fw_version > 0x00524600;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2601,6 +2611,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.set_mp1_state = smu_v13_0_7_set_mp1_state,
 	.set_df_cstate = smu_v13_0_7_set_df_cstate,
 	.gpo_control = smu_v13_0_gpo_control,
+	.is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check,
+	.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+	.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
-- 
GitLab


From 94b1e028e15c94362420f9f3f711fafbf9d52996 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 7 Dec 2023 10:14:41 -0500
Subject: [PATCH 1109/2340] drm/amdgpu/sdma5.2: add begin/end_use ring
 callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add begin/end_use ring callbacks to disallow GFXOFF when
SDMA work is submitted and allow it again afterward.

This should avoid corner cases where GFXOFF is erroneously
entered when SDMA is still active.  For now just allow/disallow
GFXOFF in the begin and end helpers until we root cause the
issue.  This should not impact power as SDMA usage is pretty
minimal and GFXOSS should not be active when SDMA is active
anyway, this just makes it explicit.

v2: move everything into sdma5.2 code.  No reason for this
to be generic at this point.
v3: Add comments in new code

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com> (v1)
Tested-by: Mario Limonciello <mario.limonciello@amd.com> (v1)
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.15+
---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 28 ++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b51..0058f3f7cf6e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
 		*flags |= AMD_CG_SUPPORT_SDMA_LS;
 }
 
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+	 * disallow GFXOFF in some cases leading to
+	 * hangs in SDMA.  Disallow GFXOFF while SDMA is active.
+	 * We can probably just limit this to 5.2.3,
+	 * but it shouldn't hurt for other parts since
+	 * this GFXOFF will be disallowed anyway when SDMA is
+	 * active, this just makes it explicit.
+	 */
+	amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+	 * disallow GFXOFF in some cases leading to
+	 * hangs in SDMA.  Allow GFXOFF when SDMA is complete.
+	 */
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
 const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
 	.name = "sdma_v5_2",
 	.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
 	.test_ib = sdma_v5_2_ring_test_ib,
 	.insert_nop = sdma_v5_2_ring_insert_nop,
 	.pad_ib = sdma_v5_2_ring_pad_ib,
+	.begin_use = sdma_v5_2_ring_begin_use,
+	.end_use = sdma_v5_2_ring_end_use,
 	.emit_wreg = sdma_v5_2_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
-- 
GitLab


From 91963397c49aa2907aeafa52d929555dcbc9cd07 Mon Sep 17 00:00:00 2001
From: Friedrich Vock <friedrich.vock@gmx.de>
Date: Sat, 2 Dec 2023 01:17:40 +0100
Subject: [PATCH 1110/2340] drm/amdgpu: Enable tunneling on high-priority
 compute queues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This improves latency if the GPU is already busy with other work.
This is useful for VR compositors that submit highly latency-sensitive
compositing work on high-priority compute queues while the GPU is busy
rendering the next frame.

Userspace merge request:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462

v2: bump driver version (Alex)

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c   |  3 ++-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 82d2940692766..616b6c9117679 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -791,6 +791,7 @@ struct amdgpu_mqd_prop {
 	uint64_t eop_gpu_addr;
 	uint32_t hqd_pipe_priority;
 	uint32_t hqd_queue_priority;
+	bool allow_tunneling;
 	bool hqd_active;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a194db09cde6a..880137774b4eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -115,9 +115,10 @@
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
  * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
  * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	56
+#define KMS_DRIVER_MINOR	57
 #define KMS_DRIVER_PATCHLEVEL	0
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6a80d3ec887e9..45424ebf96814 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 				    struct amdgpu_mqd_prop *prop)
 {
 	struct amdgpu_device *adev = ring->adev;
+	bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+				    amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+	bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+				amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
 
 	memset(prop, 0, sizeof(*prop));
 
@@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 	 */
 	prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
 
-	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
-	     amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
-	    (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
-	     amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+	prop->allow_tunneling = is_high_prio_compute;
+	if (is_high_prio_compute || is_high_prio_gfx) {
 		prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
 		prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c8a3bf01743f6..73f6d7e72c737 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 	mqd->cp_hqd_pq_control = tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index c659ef0f47ce5..bdcf96df69e6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 	mqd->cp_hqd_pq_control = tmp;
-- 
GitLab


From e747235ef3c253298157b6cd634b9b2695f33d20 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 12 Dec 2023 15:53:38 +0200
Subject: [PATCH 1111/2340] drm/radeon: include drm/drm_edid.h only where
 needed

Including drm_edid.h from radeon_mode.h causes the rebuild of more than
a hundred files when drm_edid.h is modified, while there are only a
handful of files that actually need to include drm_edid.h.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/atombios_encoders.c | 1 +
 drivers/gpu/drm/radeon/dce3_1_afmt.c       | 1 +
 drivers/gpu/drm/radeon/dce6_afmt.c         | 1 +
 drivers/gpu/drm/radeon/evergreen.c         | 1 +
 drivers/gpu/drm/radeon/evergreen_hdmi.c    | 1 +
 drivers/gpu/drm/radeon/radeon_atombios.c   | 1 +
 drivers/gpu/drm/radeon/radeon_audio.c      | 1 +
 drivers/gpu/drm/radeon/radeon_audio.h      | 4 +++-
 drivers/gpu/drm/radeon/radeon_combios.c    | 1 +
 drivers/gpu/drm/radeon/radeon_encoders.c   | 1 +
 drivers/gpu/drm/radeon/radeon_mode.h       | 2 +-
 11 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 4aca09cab4b8c..6e537c5bd2959 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -29,6 +29,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_file.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/radeon_drm.h>
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index e8fe239b9d79c..324e9b7650986 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -21,6 +21,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <linux/hdmi.h>
+#include <drm/drm_edid.h>
 
 #include "radeon.h"
 #include "radeon_asic.h"
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 4a1d5447eac17..4c06f47453fd2 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -21,6 +21,7 @@
  *
  */
 #include <linux/hdmi.h>
+#include <drm/drm_edid.h>
 
 #include "dce6_afmt.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f0ae087be914e..a424b86008b88 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fourcc.h>
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 5f3078f8ab950..681119c91d947 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -26,6 +26,7 @@
  */
 #include <linux/hdmi.h>
 
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 85c4bb186203c..3596ea4a8b60f 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -27,6 +27,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_device.h>
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 279bf130a18c4..91b58fbc2be77 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -27,6 +27,7 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_eld.h>
+#include <drm/drm_edid.h>
 #include "dce6_afmt.h"
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 05e67867469b0..dacaaa007051e 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -27,7 +27,9 @@
 
 #include <linux/types.h>
 
-#define RREG32_ENDPOINT(block, reg)		\
+struct cea_sad;
+
+#define RREG32_ENDPOINT(block, reg)				\
 	radeon_audio_endpoint_rreg(rdev, (block), (reg))
 #define WREG32_ENDPOINT(block, reg, v)	\
 	radeon_audio_endpoint_wreg(rdev, (block), (reg), (v))
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 2620efc7c675b..6952b1273b0f7 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_device.h>
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 9cb6401fe97ed..3de3dce9e89d6 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -26,6 +26,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_device.h>
 #include <drm/radeon_drm.h>
 
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 1decdcec02649..59c4db13d90ae 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -32,13 +32,13 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 
+struct edid;
 struct radeon_bo;
 struct radeon_device;
 
-- 
GitLab


From 671994e3bf33a414dc6a8c147969dae3a15ba9de Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:49 -0100
Subject: [PATCH 1112/2340] drm/amd/display: add plane 3D LUT driver-specific
 properties

Add 3D LUT property for plane color transformations using a 3D lookup
table. 3D LUT allows for highly accurate and complex color
transformations and is suitable to adjust the balance between color
channels. It's also more complex to manage and require more
computational resources.

Since a 3D LUT has a limited number of entries in each dimension we want
to use them in an optimal fashion. This means using the 3D LUT in a
colorspace that is optimized for human vision, such as sRGB, PQ, or
another non-linear space. Therefore, userpace may need one 1D LUT
(shaper) before it to delinearize content and another 1D LUT after 3D
LUT (blend) to linearize content again for blending. The next patches
add these 1D LUTs to the plane color mgmt pipeline.

v3:
- improve commit message about 3D LUT
- describe the 3D LUT entries and size (Harry)

v4:
- advertise 3D LUT max size as the size of a single-dimension

v5:
- get lut3d blob correctly (Joshua)
- fix doc about 3d-lut dimension size (Sebastian)

Signed-off-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      | 18 ++++++++++++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  9 +++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 14 +++++++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 24 +++++++++++++++++++
 4 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 218623e1b9aac..7350094f3db6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -364,6 +364,24 @@ struct amdgpu_mode_info {
 	 * @plane_hdr_mult_property:
 	 */
 	struct drm_property *plane_hdr_mult_property;
+	/**
+	 * @plane_lut3d_property: Plane property for color transformation using
+	 * a 3D LUT (pre-blending), a three-dimensional array where each
+	 * element is an RGB triplet. Each dimension has the size of
+	 * lut3d_size. The array contains samples from the approximated
+	 * function. On AMD, values between samples are estimated by
+	 * tetrahedral interpolation. The array is accessed with three indices,
+	 * one for each input dimension (color channel), blue being the
+	 * outermost dimension, red the innermost.
+	 */
+	struct drm_property *plane_lut3d_property;
+	/**
+	 * @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of 3D LUT as supported by the driver (read-only). The max size
+	 * is the max size of one dimension and, therefore, the max number of
+	 * entries for 3D LUT array is the 3D LUT size cubed;
+	 */
+	struct drm_property *plane_lut3d_size_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index d36d68d517a81..0f78024ac0e53 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,11 @@ struct dm_plane_state {
 	 * TF is needed for any subsequent linear-to-non-linear transforms.
 	 */
 	__u64 hdr_mult;
+	/**
+	 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+	 * &struct drm_color_lut.
+	 */
+	struct drm_property_blob *lut3d;
 };
 
 struct dm_crtc_state {
@@ -869,6 +874,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 
 void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+/* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index b5b34a9209e4d..a3b4f6a4c4a8f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -231,6 +231,20 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_hdr_mult_property = prop;
 
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_LUT3D", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_lut3d_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_lut3d_size_property = prop;
+
 	return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 45a2c9b36630d..e588cff7d6b0a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1364,6 +1364,9 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 	if (old_dm_plane_state->degamma_lut)
 		dm_plane_state->degamma_lut =
 			drm_property_blob_get(old_dm_plane_state->degamma_lut);
+	if (old_dm_plane_state->lut3d)
+		dm_plane_state->lut3d =
+			drm_property_blob_get(old_dm_plane_state->lut3d);
 
 	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
 	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
@@ -1437,6 +1440,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 
 	if (dm_plane_state->degamma_lut)
 		drm_property_blob_put(dm_plane_state->degamma_lut);
+	if (dm_plane_state->lut3d)
+		drm_property_blob_put(dm_plane_state->lut3d);
 
 	if (dm_plane_state->dc_state)
 		dc_plane_state_release(dm_plane_state->dc_state);
@@ -1470,6 +1475,14 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 	drm_object_attach_property(&plane->base,
 				   dm->adev->mode_info.plane_hdr_mult_property,
 				   AMDGPU_HDR_MULT_DEFAULT);
+
+	if (dpp_color_caps.hw_3d_lut) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_lut3d_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_lut3d_size_property,
+					   MAX_COLOR_3DLUT_SIZE);
+	}
 }
 
 static int
@@ -1501,6 +1514,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 			dm_plane_state->hdr_mult = val;
 			dm_plane_state->base.color_mgmt_changed = 1;
 		}
+	} else if (property == adev->mode_info.plane_lut3d_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->lut3d,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
 	} else {
 		drm_dbg_atomic(plane->dev,
 			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
@@ -1528,6 +1549,9 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = dm_plane_state->degamma_tf;
 	} else if (property == adev->mode_info.plane_hdr_mult_property) {
 		*val = dm_plane_state->hdr_mult;
+	} else 	if (property == adev->mode_info.plane_lut3d_property) {
+		*val = (dm_plane_state->lut3d) ?
+			dm_plane_state->lut3d->base.id : 0;
 	} else {
 		return -EINVAL;
 	}
-- 
GitLab


From 058eb51912ca3a5fb121668b30e8e94d976afb27 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Tue, 12 Dec 2023 16:46:30 +0800
Subject: [PATCH 1113/2340] drm/amdgpu: Switch to aca bank for xgmi pcs err cnt

Instead of software managed counters.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Stanley.Yang <Stanley.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h              | 2 ++
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index e51e8918e6671..b399f1b62887a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -46,6 +46,8 @@
 #define MCA_REG__STATUS__ERRORCODEEXT(x)	MCA_REG_FIELD(x, 21, 16)
 #define MCA_REG__STATUS__ERRORCODE(x)		MCA_REG_FIELD(x, 15, 0)
 
+#define MCA_REG__MISC0__ERRCNT(x)		MCA_REG_FIELD(x, 43, 32)
+
 #define MCA_REG__SYND__ERRORINFORMATION(x)	MCA_REG_FIELD(x, 17, 0)
 
 enum amdgpu_mca_ip {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index ddd782fbee7a1..3998c9b31d076 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2537,13 +2537,15 @@ static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, st
 					  uint32_t *count)
 {
 	u32 ext_error_code;
+	u32 err_cnt;
 
 	ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
+	err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);
 
 	if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0)
-		*count = 1;
+		*count = err_cnt;
 	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
-		*count = 1;
+		*count = err_cnt;
 
 	return 0;
 }
-- 
GitLab


From c01b9be7b20999bfeaacc1e574be0db93c14c478 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 12 Dec 2023 01:09:16 -0600
Subject: [PATCH 1114/2340] drm/amd: Fix a probing order problem on SDMA 2.4

commit 751e293f2c99 ("drm/amd: Move microcode init from sw_init to
early_init for SDMA v2.4") made a fateful mistake in
`adev->sdma.num_instances` wasn't declared when sdma_v2_4_init_microcode()
was run. This caused probing to fail.

Move the declaration to right before sdma_v2_4_init_microcode().

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3043
Fixes: 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 45377a1752503..8d5d86675a7fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
+	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+
 	r = sdma_v2_4_init_microcode(adev);
 	if (r)
 		return r;
 
-	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
-
 	sdma_v2_4_set_ring_funcs(adev);
 	sdma_v2_4_set_buffer_funcs(adev);
 	sdma_v2_4_set_vm_pte_funcs(adev);
-- 
GitLab


From bd33bb1409b494558a2935f7bbc7842def957fcd Mon Sep 17 00:00:00 2001
From: Jonathan Kim <jonathan.kim@amd.com>
Date: Tue, 5 Dec 2023 12:22:07 -0500
Subject: [PATCH 1115/2340] drm/amdkfd: fix mes set shader debugger process
 management

MES provides the driver a call to explicitly flush stale process memory
within the MES to avoid a race condition that results in a fatal
memory violation.

When SET_SHADER_DEBUGGER is called, the driver passes a memory address
that represents a process context address MES uses to keep track of
future per-process calls.

Normally, MES will purge its process context list when the last queue
has been removed.  The driver, however, can call SET_SHADER_DEBUGGER
regardless of whether a queue has been added or not.

If SET_SHADER_DEBUGGER has been called with no queues as the last call
prior to process termination, the passed process context address will
still reside within MES.

On a new process call to SET_SHADER_DEBUGGER, the driver may end up
passing an identical process context address value (based on per-process
gpu memory address) to MES but is now pointing to a new allocated buffer
object during KFD process creation.  Since the MES is unaware of this,
access of the passed address points to the stale object within MES and
triggers a fatal memory violation.

The solution is for KFD to explicitly flush the process context address
from MES on process termination.

Note that the flush call and the MES debugger calls use the same MES
interface but are separated as KFD calls to avoid conflicting with each
other.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Tested-by: Alice Wong <shiwei.wong@amd.com>
Reviewed-by: Eric Huang <jinhuieric.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c       | 31 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h       | 10 +++---
 .../amd/amdkfd/kfd_process_queue_manager.c    |  1 +
 drivers/gpu/drm/amd/include/mes_v11_api_def.h |  3 +-
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index e544b823abf6c..e98de23250dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -916,6 +916,11 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
 	op_input.set_shader_debugger.flags.u32all = flags;
+
+	/* use amdgpu mes_flush_shader_debugger instead */
+	if (op_input.set_shader_debugger.flags.process_ctx_flush)
+		return -EINVAL;
+
 	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
 	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
 			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
@@ -935,6 +940,32 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 	return r;
 }
 
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				     uint64_t process_context_addr)
+{
+	struct mes_misc_op_input op_input = {0};
+	int r;
+
+	if (!adev->mes.funcs->misc_op) {
+		DRM_ERROR("mes flush shader debugger is not supported!\n");
+		return -EINVAL;
+	}
+
+	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+	op_input.set_shader_debugger.process_context_addr = process_context_addr;
+	op_input.set_shader_debugger.flags.process_ctx_flush = true;
+
+	amdgpu_mes_lock(&adev->mes);
+
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	if (r)
+		DRM_ERROR("failed to set_shader_debugger\n");
+
+	amdgpu_mes_unlock(&adev->mes);
+
+	return r;
+}
+
 static void
 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 894b9b1330009..7d4f93fea937a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -296,9 +296,10 @@ struct mes_misc_op_input {
 			uint64_t process_context_addr;
 			union {
 				struct {
-					uint64_t single_memop : 1;
-					uint64_t single_alu_op : 1;
-					uint64_t reserved: 30;
+					uint32_t single_memop : 1;
+					uint32_t single_alu_op : 1;
+					uint32_t reserved: 29;
+					uint32_t process_ctx_flush: 1;
 				};
 				uint32_t u32all;
 			} flags;
@@ -374,7 +375,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 				const uint32_t *tcp_watch_cntl,
 				uint32_t flags,
 				bool trap_en);
-
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				uint64_t process_context_addr);
 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
 			int queue_type, int idx,
 			struct amdgpu_mes_ctx_data *ctx_data,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 77f493262e058..8e55e78fce4e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -87,6 +87,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 		return;
 
 	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
 	pdd->already_dequeued = true;
 }
 
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index 1fbfd1aa987e3..ec5b9ab67c5e4 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -572,7 +572,8 @@ struct SET_SHADER_DEBUGGER {
 		struct {
 			uint32_t single_memop : 1;  /* SQ_DEBUG.single_memop */
 			uint32_t single_alu_op : 1; /* SQ_DEBUG.single_alu_op */
-			uint32_t reserved : 30;
+			uint32_t reserved : 29;
+			uint32_t process_ctx_flush : 1;
 		};
 		uint32_t u32all;
 	} flags;
-- 
GitLab


From f545d82479b46368bf00d0bfecf33fa914bd5f8f Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:50 -0100
Subject: [PATCH 1116/2340] drm/amd/display: add plane shaper LUT and TF
 driver-specific properties

On AMD HW, 3D LUT always assumes a preceding shaper 1D LUT used for
delinearizing and/or normalizing the color space before applying a 3D
LUT. Add pre-defined transfer function to enable delinearizing content
with or without shaper LUT, where AMD color module calculates the
resulted shaper curve. We apply an inverse EOTF to go from linear
values to encoded values. If we are already in a non-linear space and/or
don't need to normalize values, we can bypass shaper LUT with a linear
transfer function that is also the default TF value.

There is no shaper ROM. When setting shaper TF (!= Identity) and LUT at
the same time, the color module will combine the pre-defined TF and the
custom LUT values into the LUT that's actually programmed.

v2:
- squash commits for shaper LUT and shaper TF
- define inverse EOTF as supported shaper TFs

v3:
- spell out TF+LUT behavior in the commit and comments (Harry)
- replace BT709 EOTF by inv OETF

v5:
- get shaper blob correctly (Joshua)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      | 21 ++++++++++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 29 ++++++++++++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 33 +++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 7350094f3db6a..981a2eb252570 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -364,6 +364,27 @@ struct amdgpu_mode_info {
 	 * @plane_hdr_mult_property:
 	 */
 	struct drm_property *plane_hdr_mult_property;
+	/**
+	 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+	 * that converts color content before 3D LUT. If
+	 * plane_shaper_tf_property != Identity TF, AMD color module will
+	 * combine the user LUT values with pre-defined TF into the LUT
+	 * parameters to be programmed.
+	 */
+	struct drm_property *plane_shaper_lut_property;
+	/**
+	 * @shaper_lut_size_property: Plane property for the size of
+	 * pre-blending shaper LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_shaper_lut_size_property;
+	/**
+	 * @plane_shaper_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending shaper (before applying 3D LUT)
+	 * with or without LUT. There is no shaper ROM, but we can use AMD
+	 * color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_shaper_tf_property;
 	/**
 	 * @plane_lut3d_property: Plane property for color transformation using
 	 * a 3D LUT (pre-blending), a three-dimensional array where each
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 0f78024ac0e53..9bee60cbc2b59 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,17 @@ struct dm_plane_state {
 	 * TF is needed for any subsequent linear-to-non-linear transforms.
 	 */
 	__u64 hdr_mult;
+	/**
+	 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+	 * array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *shaper_lut;
+	/**
+	 * @shaper_tf:
+	 *
+	 * Predefined transfer function to delinearize color space.
+	 */
+	enum amdgpu_transfer_function shaper_tf;
 	/**
 	 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
 	 * &struct drm_color_lut.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a3b4f6a4c4a8f..866c6f1d3d996 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -173,6 +173,14 @@ static const u32 amdgpu_eotf =
 	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
 	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
 
+static const u32 amdgpu_inv_eotf =
+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
 static struct drm_property *
 amdgpu_create_tf_property(struct drm_device *dev,
 			  const char *name,
@@ -231,6 +239,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_hdr_mult_property = prop;
 
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_SHAPER_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_SHAPER_TF",
+					 amdgpu_inv_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_tf_property = prop;
+
 	prop = drm_property_create(adev_to_drm(adev),
 				   DRM_MODE_PROP_BLOB,
 				   "AMD_PLANE_LUT3D", 0);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index e588cff7d6b0a..7f0de29889f05 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1343,6 +1343,7 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
 	__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
 	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 	amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
+	amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 }
 
 static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1364,12 +1365,16 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 	if (old_dm_plane_state->degamma_lut)
 		dm_plane_state->degamma_lut =
 			drm_property_blob_get(old_dm_plane_state->degamma_lut);
+	if (old_dm_plane_state->shaper_lut)
+		dm_plane_state->shaper_lut =
+			drm_property_blob_get(old_dm_plane_state->shaper_lut);
 	if (old_dm_plane_state->lut3d)
 		dm_plane_state->lut3d =
 			drm_property_blob_get(old_dm_plane_state->lut3d);
 
 	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
 	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
+	dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
 
 	return &dm_plane_state->base;
 }
@@ -1442,6 +1447,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 		drm_property_blob_put(dm_plane_state->degamma_lut);
 	if (dm_plane_state->lut3d)
 		drm_property_blob_put(dm_plane_state->lut3d);
+	if (dm_plane_state->shaper_lut)
+		drm_property_blob_put(dm_plane_state->shaper_lut);
 
 	if (dm_plane_state->dc_state)
 		dc_plane_state_release(dm_plane_state->dc_state);
@@ -1477,6 +1484,14 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 				   AMDGPU_HDR_MULT_DEFAULT);
 
 	if (dpp_color_caps.hw_3d_lut) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_lut_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
 		drm_object_attach_property(&plane->base,
 					   mode_info.plane_lut3d_property, 0);
 		drm_object_attach_property(&plane->base,
@@ -1514,6 +1529,19 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 			dm_plane_state->hdr_mult = val;
 			dm_plane_state->base.color_mgmt_changed = 1;
 		}
+	} else if (property == adev->mode_info.plane_shaper_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->shaper_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
+		if (dm_plane_state->shaper_tf != val) {
+			dm_plane_state->shaper_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
 	} else if (property == adev->mode_info.plane_lut3d_property) {
 		ret = drm_property_replace_blob_from_id(plane->dev,
 							&dm_plane_state->lut3d,
@@ -1549,6 +1577,11 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = dm_plane_state->degamma_tf;
 	} else if (property == adev->mode_info.plane_hdr_mult_property) {
 		*val = dm_plane_state->hdr_mult;
+	} else 	if (property == adev->mode_info.plane_shaper_lut_property) {
+		*val = (dm_plane_state->shaper_lut) ?
+			dm_plane_state->shaper_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
+		*val = dm_plane_state->shaper_tf;
 	} else 	if (property == adev->mode_info.plane_lut3d_property) {
 		*val = (dm_plane_state->lut3d) ?
 			dm_plane_state->lut3d->base.id : 0;
-- 
GitLab


From 0ef47454dc82358b62a424b37c7520a84f307edb Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:51 -0100
Subject: [PATCH 1117/2340] drm/amd/display: add plane blend LUT and TF
 driver-specific properties

Blend 1D LUT or a pre-defined transfer function (TF) can be set to
linearize content before blending, so that it's positioned just before
blending planes in the AMD color mgmt pipeline, and after 3D LUT
(non-linear space). Shaper and Blend LUTs are 1D LUTs that sandwich 3D
LUT. Drivers should advertize blend properties according to HW caps.

There is no blend ROM for pre-defined TF. When setting blend TF (!=
Identity) and LUT at the same time, the color module will combine the
pre-defined TF and the custom LUT values into the LUT that's actually
programmed.

v3:
- spell out TF+LUT behavior in the commit and comments (Harry)

v5:
- get blend blob correctly

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      | 22 +++++++++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 ++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 21 +++++++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 37 +++++++++++++++++++
 4 files changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 981a2eb252570..6ff37272ccad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -403,6 +403,28 @@ struct amdgpu_mode_info {
 	 * entries for 3D LUT array is the 3D LUT size cubed;
 	 */
 	struct drm_property *plane_lut3d_size_property;
+	/**
+	 * @plane_blend_lut_property: Plane property for output gamma before
+	 * blending. Userspace set a blend LUT to convert colors after 3D LUT
+	 * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+	 * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+	 * != Identity TF, AMD color module will combine the user LUT values
+	 * with pre-defined TF into the LUT parameters to be programmed.
+	 */
+	struct drm_property *plane_blend_lut_property;
+	/**
+	 * @plane_blend_lut_size_property: Plane property to define the max
+	 * size of blend LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_blend_lut_size_property;
+	/**
+	 * @plane_blend_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending blend/out_gamma (after applying
+	 * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_blend_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 9bee60cbc2b59..a9a0bc875b783 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -800,6 +800,18 @@ struct dm_plane_state {
 	 * &struct drm_color_lut.
 	 */
 	struct drm_property_blob *lut3d;
+	/**
+	 * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+	 * array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *blend_lut;
+	/**
+	 * @blend_tf:
+	 *
+	 * Pre-defined transfer function for converting plane pixel data before
+	 * applying blend LUT.
+	 */
+	enum amdgpu_transfer_function blend_tf;
 };
 
 struct dm_crtc_state {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 866c6f1d3d996..425676f5afb7c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -274,6 +274,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_lut3d_size_property = prop;
 
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_BLEND_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_BLEND_TF",
+					 amdgpu_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_tf_property = prop;
+
 	return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 7f0de29889f05..35a4732483ba7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1344,6 +1344,7 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
 	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 	amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
 	amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 }
 
 static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1371,10 +1372,14 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 	if (old_dm_plane_state->lut3d)
 		dm_plane_state->lut3d =
 			drm_property_blob_get(old_dm_plane_state->lut3d);
+	if (old_dm_plane_state->blend_lut)
+		dm_plane_state->blend_lut =
+			drm_property_blob_get(old_dm_plane_state->blend_lut);
 
 	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
 	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
 	dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
+	dm_plane_state->blend_tf = old_dm_plane_state->blend_tf;
 
 	return &dm_plane_state->base;
 }
@@ -1449,6 +1454,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 		drm_property_blob_put(dm_plane_state->lut3d);
 	if (dm_plane_state->shaper_lut)
 		drm_property_blob_put(dm_plane_state->shaper_lut);
+	if (dm_plane_state->blend_lut)
+		drm_property_blob_put(dm_plane_state->blend_lut);
 
 	if (dm_plane_state->dc_state)
 		dc_plane_state_release(dm_plane_state->dc_state);
@@ -1498,6 +1505,17 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 					   mode_info.plane_lut3d_size_property,
 					   MAX_COLOR_3DLUT_SIZE);
 	}
+
+	if (dpp_color_caps.ogam_ram) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_lut_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+	}
 }
 
 static int
@@ -1550,6 +1568,19 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 							&replaced);
 		dm_plane_state->base.color_mgmt_changed |= replaced;
 		return ret;
+	} else if (property == adev->mode_info.plane_blend_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->blend_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_blend_tf_property) {
+		if (dm_plane_state->blend_tf != val) {
+			dm_plane_state->blend_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
 	} else {
 		drm_dbg_atomic(plane->dev,
 			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
@@ -1585,6 +1616,12 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 	} else 	if (property == adev->mode_info.plane_lut3d_property) {
 		*val = (dm_plane_state->lut3d) ?
 			dm_plane_state->lut3d->base.id : 0;
+	} else 	if (property == adev->mode_info.plane_blend_lut_property) {
+		*val = (dm_plane_state->blend_lut) ?
+			dm_plane_state->blend_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_blend_tf_property) {
+		*val = dm_plane_state->blend_tf;
+
 	} else {
 		return -EINVAL;
 	}
-- 
GitLab


From 0f5afa190b890052cae187496f660699f00067ef Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:52 -0100
Subject: [PATCH 1118/2340] drm/amd/display: add CRTC gamma TF driver-specific
 property

Add AMD pre-defined transfer function property to default DRM CRTC gamma
to convert to wire encoding with or without a user gamma LUT. There is
no post-blending regamma ROM for pre-defined TF. When setting Gamma TF
(!= Identity) and LUT at the same time, the color module will combine
the pre-defined TF and the custom LUT values into the LUT that's
actually programmed.

v2:
- enable CRTC prop in the end of driver-specific prop sequence
- define inverse EOTFs as supported regamma TFs
- reword driver-specific function doc to remove shaper/3D LUT

v3:
- spell out TF+LUT behavior in the commit and comments (Harry)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Co-developed-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |  7 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    | 72 +++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 6ff37272ccad4..9fbe09743ddec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -425,6 +425,13 @@ struct amdgpu_mode_info {
 	 * from a combination of pre-defined TF and the custom 1D LUT).
 	 */
 	struct drm_property *plane_blend_tf_property;
+	/* @regamma_tf_property: Transfer function for CRTC regamma
+	 * (post-blending). Possible values are defined by `enum
+	 * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *regamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index a9a0bc875b783..d9923c9f39b06 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -836,6 +836,14 @@ struct dm_crtc_state {
 	struct dc_info_packet vrr_infopacket;
 
 	int abm_level;
+
+        /**
+	 * @regamma_tf:
+	 *
+	 * Pre-defined transfer function for converting internal FB -> wire
+	 * encoding.
+	 */
+	enum amdgpu_transfer_function regamma_tf;
 };
 
 #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 425676f5afb7c..4150e9370daf3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -295,6 +295,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_blend_tf_property = prop;
 
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_CRTC_REGAMMA_TF",
+					 amdgpu_inv_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.regamma_tf_property = prop;
+
 	return 0;
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index cb0b48bb2a7dc..9224e2d7f32b9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -260,6 +260,7 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr
 	state->freesync_config = cur->freesync_config;
 	state->cm_has_degamma = cur->cm_has_degamma;
 	state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+	state->regamma_tf = cur->regamma_tf;
 	state->crc_skip_count = cur->crc_skip_count;
 	state->mpo_requested = cur->mpo_requested;
 	/* TODO Duplicate dc_stream after objects are stream object is flattened */
@@ -296,6 +297,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
 }
 #endif
 
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * drm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	if(adev->dm.dc->caps.color.mpc.ogam_ram)
+		drm_object_attach_property(&crtc->base,
+					   adev->mode_info.regamma_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static int
+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
+				   struct drm_crtc_state *state,
+				   struct drm_property *property,
+				   uint64_t val)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+	if (property == adev->mode_info.regamma_tf_property) {
+		if (acrtc_state->regamma_tf != val) {
+			acrtc_state->regamma_tf = val;
+			acrtc_state->base.color_mgmt_changed |= 1;
+		}
+	} else {
+		drm_dbg_atomic(crtc->dev,
+			       "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n",
+			       crtc->base.id, crtc->name,
+			       property->base.id, property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
+				   const struct drm_crtc_state *state,
+				   struct drm_property *property,
+				   uint64_t *val)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+	if (property == adev->mode_info.regamma_tf_property)
+		*val = acrtc_state->regamma_tf;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 /* Implemented only the options currently available for the driver */
 static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 	.reset = amdgpu_dm_crtc_reset_state,
@@ -314,6 +379,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 #if defined(CONFIG_DEBUG_FS)
 	.late_register = amdgpu_dm_crtc_late_register,
 #endif
+#ifdef AMD_PRIVATE_COLOR
+	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
+	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
+#endif
 };
 
 static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc)
@@ -489,6 +558,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 
 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
 
+#ifdef AMD_PRIVATE_COLOR
+	dm_crtc_additional_color_mgmt(&acrtc->base);
+#endif
 	return 0;
 
 fail:
-- 
GitLab


From 98fbb52772063ad2547d6d1b80ff99bc26761e79 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:53 -0100
Subject: [PATCH 1119/2340] drm/amd/display: add comments to describe DM crtc
 color mgmt behavior

Describe some expected behavior of the AMD DM color mgmt programming.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 4150e9370daf3..0b92513eab98f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -661,13 +661,25 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 		crtc->cm_is_degamma_srgb = true;
 		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 		stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
+		/*
+		 * Note: although we pass has_rom as parameter here, we never
+		 * actually use ROM because the color module only takes the ROM
+		 * path if transfer_func->type == PREDEFINED.
+		 *
+		 * See more in mod_color_calculate_regamma_params()
+		 */
 		r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
 				    regamma_size, has_rom);
 		if (r)
 			return r;
 	} else if (has_regamma) {
-		/* If atomic regamma, CRTC RGM goes into RGM LUT. */
+		/*
+		 * CRTC RGM goes into RGM LUT.
+		 *
+		 * Note: there is no implicit sRGB regamma here. We are using
+		 * degamma calculation from color module to calculate the curve
+		 * from a linear base.
+		 */
 		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 
-- 
GitLab


From 8b6b3f668f31a24b5406661388b9a69202e83e9d Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:54 -0100
Subject: [PATCH 1120/2340] drm/amd/display: encapsulate atomic regamma
 operation

We will wire up MPC 3D LUT to DM CRTC color pipeline in the next patch,
but so far, only for atomic interface. By checking
set_output_transfer_func in DC drivers with MPC 3D LUT support, we can
verify that regamma is only programmed when 3D LUT programming fails. As
a groundwork to introduce 3D LUT programming and better understand each
step, detach atomic regamma programming from the crtc colocr updating
code.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 55 ++++++++++++-------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 0b92513eab98f..dec734f038325 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -525,6 +525,37 @@ static int __set_output_tf(struct dc_transfer_func *func,
 	return res ? 0 : -ENOMEM;
 }
 
+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
+					const struct drm_color_lut *regamma_lut,
+					uint32_t regamma_size, bool has_rom)
+{
+	struct dc_transfer_func *out_tf = stream->out_transfer_func;
+	int ret = 0;
+
+	if (regamma_size) {
+		/*
+		 * CRTC RGM goes into RGM LUT.
+		 *
+		 * Note: there is no implicit sRGB regamma here. We are using
+		 * degamma calculation from color module to calculate the curve
+		 * from a linear base.
+		 */
+		out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+		out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+
+		ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
+	} else {
+		/*
+		 * No CRTC RGM means we can just put the block into bypass
+		 * since we don't have any plane level adjustments using it.
+		 */
+		out_tf->type = TF_TYPE_BYPASS;
+		out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
@@ -672,28 +703,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 				    regamma_size, has_rom);
 		if (r)
 			return r;
-	} else if (has_regamma) {
-		/*
-		 * CRTC RGM goes into RGM LUT.
-		 *
-		 * Note: there is no implicit sRGB regamma here. We are using
-		 * degamma calculation from color module to calculate the curve
-		 * from a linear base.
-		 */
-		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
-		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
-		r = __set_output_tf(stream->out_transfer_func, regamma_lut,
-				    regamma_size, has_rom);
+	} else {
+		regamma_size = has_regamma ? regamma_size : 0;
+		r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
+						 regamma_size, has_rom);
 		if (r)
 			return r;
-	} else {
-		/*
-		 * No CRTC RGM means we can just put the block into bypass
-		 * since we don't have any plane level adjustments using it.
-		 */
-		stream->out_transfer_func->type = TF_TYPE_BYPASS;
-		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 	}
 
 	/*
-- 
GitLab


From 6bd20f0f165f444c1d8184ebd238dd92966c9dca Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:55 -0100
Subject: [PATCH 1121/2340] drm/amd/display: add CRTC gamma TF support

Add predefined transfer function programming. There is no post-blending
out gamma ROM for hardcoded curves, but we can use AMD color modules to
program LUT parameters from pre-defined coefficients and an empty
regamma LUT (or bump up LUT parameters with pre-defined TF values).

v2:
- update crtc color mgmt if regamma TF differs between states (Joshua)
- map inverse EOTF to DC transfer function (Melissa)

v3:
- update AMDGPU TF list

v4:
- update comment regarding regamma behavior

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 77 +++++++++++++++----
 2 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6efa0c1bba9d2..22c46be291d1e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9860,6 +9860,7 @@ skip_modeset:
 	 * when a modeset is needed, to ensure it gets reprogrammed.
 	 */
 	if (dm_new_crtc_state->base.color_mgmt_changed ||
+	    dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
 	    drm_atomic_crtc_needs_modeset(new_crtc_state)) {
 		ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
 		if (ret)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index dec734f038325..9b930e3eb79df 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -490,16 +490,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
 	struct calculate_buffer cal_buffer = {0};
 	bool res;
 
-	ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
 	cal_buffer.buffer_index = -1;
 
-	gamma = dc_create_gamma();
-	if (!gamma)
-		return -ENOMEM;
+	if (lut_size) {
+		ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
 
-	gamma->num_entries = lut_size;
-	__drm_lut_to_dc_gamma(lut, gamma, false);
+		gamma = dc_create_gamma();
+		if (!gamma)
+			return -ENOMEM;
+
+		gamma->num_entries = lut_size;
+		__drm_lut_to_dc_gamma(lut, gamma, false);
+	}
 
 	if (func->tf == TRANSFER_FUNCTION_LINEAR) {
 		/*
@@ -507,41 +509,49 @@ static int __set_output_tf(struct dc_transfer_func *func,
 		 * on top of a linear input. But degamma params can be used
 		 * instead to simulate this.
 		 */
-		gamma->type = GAMMA_CUSTOM;
+		if (gamma)
+			gamma->type = GAMMA_CUSTOM;
 		res = mod_color_calculate_degamma_params(NULL, func,
-							gamma, true);
+							 gamma, gamma != NULL);
 	} else {
 		/*
 		 * Assume sRGB. The actual mapping will depend on whether the
 		 * input was legacy or not.
 		 */
-		gamma->type = GAMMA_CS_TFM_1D;
-		res = mod_color_calculate_regamma_params(func, gamma, false,
+		if (gamma)
+			gamma->type = GAMMA_CS_TFM_1D;
+		res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
 							 has_rom, NULL, &cal_buffer);
 	}
 
-	dc_gamma_release(&gamma);
+	if (gamma)
+		dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
 }
 
 static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
 					const struct drm_color_lut *regamma_lut,
-					uint32_t regamma_size, bool has_rom)
+					uint32_t regamma_size, bool has_rom,
+					enum dc_transfer_func_predefined tf)
 {
 	struct dc_transfer_func *out_tf = stream->out_transfer_func;
 	int ret = 0;
 
-	if (regamma_size) {
+	if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
 		/*
 		 * CRTC RGM goes into RGM LUT.
 		 *
 		 * Note: there is no implicit sRGB regamma here. We are using
 		 * degamma calculation from color module to calculate the curve
-		 * from a linear base.
+		 * from a linear base if gamma TF is not set. However, if gamma
+		 * TF (!= Linear) and LUT are set at the same time, we will use
+		 * regamma calculation, and the color module will combine the
+		 * pre-defined TF and the custom LUT values into the LUT that's
+		 * actually programmed.
 		 */
 		out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
-		out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+		out_tf->tf = tf;
 
 		ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
 	} else {
@@ -587,6 +597,36 @@ static int __set_input_tf(struct dc_transfer_func *func,
 	return res ? 0 : -ENOMEM;
 }
 
+static enum dc_transfer_func_predefined
+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
+{
+	switch (tf)
+	{
+	default:
+	case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
+	case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
+		return TRANSFER_FUNCTION_LINEAR;
+	case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF:
+		return TRANSFER_FUNCTION_SRGB;
+	case AMDGPU_TRANSFER_FUNCTION_BT709_OETF:
+	case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF:
+		return TRANSFER_FUNCTION_BT709;
+	case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF:
+		return TRANSFER_FUNCTION_PQ;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA22;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA24;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA26;
+	}
+}
+
 /**
  * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
  * @crtc_state: the DRM CRTC state
@@ -654,9 +694,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	const struct drm_color_lut *degamma_lut, *regamma_lut;
 	uint32_t degamma_size, regamma_size;
 	bool has_regamma, has_degamma;
+	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR;
 	bool is_legacy;
 	int r;
 
+	tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf);
+
 	r = amdgpu_dm_verify_lut_sizes(&crtc->base);
 	if (r)
 		return r;
@@ -706,7 +749,7 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	} else {
 		regamma_size = has_regamma ? regamma_size : 0;
 		r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
-						 regamma_size, has_rom);
+						 regamma_size, has_rom, tf);
 		if (r)
 			return r;
 	}
-- 
GitLab


From d9501844d53897ca7ac04697b8504940c6dfdbb3 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 12 Dec 2023 15:53:09 +0200
Subject: [PATCH 1122/2340] drm/amd: include drm/drm_edid.h only where needed

Including drm_edid.h from amdgpu_mode.h causes the rebuild of literally
hundreds of files when drm_edid.h is modified, while there are only a
handful of files that actually need to include drm_edid.h.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h                    | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c                    | 1 +
 drivers/gpu/drm/amd/amdgpu/atombios_encoders.c              | 1 +
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c                      | 1 +
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c                      | 1 +
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c                       | 1 +
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c                       | 1 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 1 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c        | 1 +
 9 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 9fbe09743ddec..790c08b8262d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -32,7 +32,6 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_framebuffer.h>
@@ -51,6 +50,7 @@ struct amdgpu_device;
 struct amdgpu_encoder;
 struct amdgpu_router;
 struct amdgpu_hpd;
+struct edid;
 
 #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
 #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index db6fc0cb18eb8..453a4b786cfcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0+
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 3ee219aa28918..7672abe6c140c 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -28,6 +28,7 @@
 
 #include <acpi/video.h>
 
+#include <drm/drm_edid.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_connectors.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index bb666cb7522e3..587ee632a3b8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 7af277f61ccad..f22ec27365bd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 143efc37a17fe..4dbe9b3259b50 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,6 +23,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index adeddfb7ff127..05bcce23385ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 19ae5587dd5ae..941e96f100f4e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -28,6 +28,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_edid.h>
 #include "dm_services.h"
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index b3e634b0f712e..16e72d623630c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -32,6 +32,7 @@
 #include "amdgpu_display.h"
 #include "dc.h"
 
+#include <drm/drm_edid.h>
 #include <drm/drm_atomic_state_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
 
-- 
GitLab


From 88d26ea639a8e9d314e6bffef5f382167e7203e2 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:56 -0100
Subject: [PATCH 1123/2340] drm/amd/display: set sdr_ref_white_level to 80 for
 out_transfer_func

Otherwise this is just initialized to 0. This needs to actually have a
value so that compute_curve can work for PQ EOTF.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 9b930e3eb79df..83c14fb577212 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -72,6 +72,7 @@
  */
 
 #define MAX_DRM_LUT_VALUE 0xFFFF
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
 
 /**
  * amdgpu_dm_init_color_mod - Initialize the color module.
@@ -552,6 +553,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
 		 */
 		out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
 		out_tf->tf = tf;
+		out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
 		ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
 	} else {
-- 
GitLab


From 6bed9d550e51534415a56f8de33f5b9d4e728e53 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:57 -0100
Subject: [PATCH 1124/2340] drm/amd/display: mark plane as needing reset if
 color props change

We should reset a plane state if at least one of the color management
properties differs from old and new state.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 22c46be291d1e..0af5d908e1690 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9929,6 +9929,10 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	 */
 	for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
 		struct amdgpu_framebuffer *old_afb, *new_afb;
+		struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+		dm_new_other_state = to_dm_plane_state(new_other_state);
+		dm_old_other_state = to_dm_plane_state(old_other_state);
 
 		if (other->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
@@ -9965,6 +9969,17 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 		    old_other_state->color_encoding != new_other_state->color_encoding)
 			return true;
 
+		/* HDR/Transfer Function changes. */
+		if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
+		    dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
+		    dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
+		    dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
+		    dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
+		    dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+		    dm_old_other_state->blend_lut != dm_new_other_state->blend_lut ||
+		    dm_old_other_state->blend_tf != dm_new_other_state->blend_tf)
+			return true;
+
 		/* Framebuffer checks fall at the end. */
 		if (!old_other_state->fb || !new_other_state->fb)
 			continue;
-- 
GitLab


From 73e5ea616a9f8c261d07e63b421947949ad6cbce Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:57:58 -0100
Subject: [PATCH 1125/2340] drm/amd/display: decouple steps for mapping CRTC
 degamma to DC plane

The next patch adds pre-blending degamma to AMD color mgmt pipeline, but
pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC
atomic degamma or implict degamma on legacy gamma. Detach degamma usage
regarging CRTC color properties to manage plane and CRTC color
correction combinations.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 60 +++++++++++++------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 83c14fb577212..f13d22851fe45 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -789,20 +789,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	return 0;
 }
 
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the preparation done on the stream for color management.
- *
- * Returns:
- * 0 on success. -ENOMEM if mem allocation fails.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
-				      struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+			     struct dc_plane_state *dc_plane_state)
 {
 	const struct drm_color_lut *degamma_lut;
 	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -825,8 +814,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 						 &degamma_size);
 		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
 
-		dc_plane_state->in_transfer_func->type =
-			TF_TYPE_DISTRIBUTED_POINTS;
+		dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 
 		/*
 		 * This case isn't fully correct, but also fairly
@@ -862,7 +850,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 				   degamma_lut, degamma_size);
 		if (r)
 			return r;
-	} else if (crtc->cm_is_degamma_srgb) {
+	} else {
 		/*
 		 * For legacy gamma support we need the regamma input
 		 * in linear space. Assume that the input is sRGB.
@@ -872,8 +860,44 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 
 		if (tf != TRANSFER_FUNCTION_SRGB &&
 		    !mod_color_calculate_degamma_params(NULL,
-			    dc_plane_state->in_transfer_func, NULL, false))
+							dc_plane_state->in_transfer_func,
+							NULL, false))
 			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct dc_plane_state *dc_plane_state)
+{
+	bool has_crtc_cm_degamma;
+	int ret;
+
+	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
+	if (has_crtc_cm_degamma){
+		/*
+		 * AMD HW doesn't have post-blending degamma caps. When DRM
+		 * CRTC atomic degamma is set, we maps it to DPP degamma block
+		 * (pre-blending) or, on legacy gamma, we use DPP degamma to
+		 * linearize (implicit degamma) from sRGB/BT709 according to
+		 * the input space.
+		 */
+		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state);
+		if (ret)
+			return ret;
 	} else {
 		/* ...Otherwise we can just bypass the DGM block. */
 		dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-- 
GitLab


From 683b8c7e7a94fb7445b8d300c7404322ad040bab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 8 Dec 2023 13:43:09 +0100
Subject: [PATCH 1126/2340] drm/amdgpu: fix tear down order in
 amdgpu_vm_pt_free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When freeing PD/PT with shadows it can happen that the shadow
destruction races with detaching the PD/PT from the VM causing a NULL
pointer dereference in the invalidation code.

Fix this by detaching the the PD/PT from the VM first and then
freeing the shadow instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
Fixes: https://gitlab.freedesktop.org/drm/amd/-/issues/2867
Cc: <stable@vger.kernel.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a2287bb252235..a160265ddc07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 
 	if (!entry->bo)
 		return;
+
+	entry->bo->vm_bo = NULL;
 	shadow = amdgpu_bo_shadowed(entry->bo);
 	if (shadow) {
 		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
 		amdgpu_bo_unref(&shadow);
 	}
 	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
-	entry->bo->vm_bo = NULL;
 
 	spin_lock(&entry->vm->status_lock);
 	list_del(&entry->vm_status);
-- 
GitLab


From 980f8710075acaeb226a94cde6dda8ffad30123c Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:57:59 -0100
Subject: [PATCH 1127/2340] drm/amd/display: add plane degamma TF and LUT
 support

Set DC plane with user degamma LUT or predefined TF from driver-specific
plane color properties. If plane and CRTC degamma are set in the same
time, plane degamma has priority.  That means, we only set CRTC degamma
if we don't have plane degamma LUT or TF to configure. We return -EINVAL
if we don't have plane degamma settings, so we can continue and check
CRTC degamma.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  4 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 70 +++++++++++++++++--
 3 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0af5d908e1690..911db58d0e1c6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5169,7 +5169,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	 * Always set input transfer function, since plane state is refreshed
 	 * every time.
 	 */
-	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+						plane_state,
+						dc_plane_state);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index d9923c9f39b06..16af549d0b425 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -918,6 +918,7 @@ int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct drm_plane_state *plane_state,
 				      struct dc_plane_state *dc_plane_state);
 
 void amdgpu_dm_update_connector_after_detect(
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index f13d22851fe45..13117516f2a47 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -868,9 +868,58 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
 	return 0;
 }
 
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+		       struct dc_plane_state *dc_plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *degamma_lut;
+	enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	uint32_t degamma_size;
+	bool has_degamma_lut;
+	int ret;
+
+	degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+					 &degamma_size);
+
+	has_degamma_lut = degamma_lut &&
+			  !__is_lut_linear(degamma_lut, degamma_size);
+
+	tf = dm_plane_state->degamma_tf;
+
+	/* If we don't have plane degamma LUT nor TF to set on DC, we have
+	 * nothing to do here, return.
+	 */
+	if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+		return -EINVAL;
+
+	dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf);
+
+	if (has_degamma_lut) {
+		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+		dc_plane_state->in_transfer_func->type =
+			TF_TYPE_DISTRIBUTED_POINTS;
+
+		ret = __set_input_tf(dc_plane_state->in_transfer_func,
+				     degamma_lut, degamma_size);
+		if (ret)
+			return ret;
+       } else {
+		dc_plane_state->in_transfer_func->type =
+			TF_TYPE_PREDEFINED;
+
+		if (!mod_color_calculate_degamma_params(NULL,
+		    dc_plane_state->in_transfer_func, NULL, false))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
  * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
  * @dc_plane_state: target DC surface
  *
  * Update the underlying dc_stream_state's input transfer function (ITF) in
@@ -881,13 +930,28 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
  * 0 on success. -ENOMEM if mem allocation fails.
  */
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct drm_plane_state *plane_state,
 				      struct dc_plane_state *dc_plane_state)
 {
 	bool has_crtc_cm_degamma;
 	int ret;
 
+	/* Initially, we can just bypass the DGM block. */
+	dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
+	dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
+
+	/* After, we start to update values according to color props */
 	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
-	if (has_crtc_cm_degamma){
+
+	ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
+	if (ret != -EINVAL)
+		return ret;
+
+	/* If we are here, it means we don't have plane degamma settings, check
+	 * if we have CRTC degamma waiting for mapping to pre-blending degamma
+	 * block
+	 */
+	if (has_crtc_cm_degamma) {
 		/*
 		 * AMD HW doesn't have post-blending degamma caps. When DRM
 		 * CRTC atomic degamma is set, we maps it to DPP degamma block
@@ -898,10 +962,6 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state);
 		if (ret)
 			return ret;
-	} else {
-		/* ...Otherwise we can just bypass the DGM block. */
-		dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-		dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 	}
 
 	return 0;
-- 
GitLab


From ef113a3b1964b40dd87287806865b947d70f7df5 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:58:00 -0100
Subject: [PATCH 1128/2340] drm/amd/display: reject atomic commit if setting
 both plane and CRTC degamma

DC only has pre-blending degamma caps (plane/DPP) that is currently in
use for CRTC/post-blending degamma, so that we don't have HW caps to
perform plane and CRTC degamma at the same time. Reject atomic updates
when serspace sets both plane and CRTC degamma properties.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 13117516f2a47..8fc715fa426cf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -944,9 +944,20 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
 
 	ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
-	if (ret != -EINVAL)
+	if (ret == -ENOMEM)
 		return ret;
 
+	/* We only have one degamma block available (pre-blending) for the
+	 * whole color correction pipeline, so that we can't actually perform
+	 * plane and CRTC degamma at the same time. Explicitly reject atomic
+	 * updates when userspace sets both plane and CRTC degamma properties.
+	 */
+	if (has_crtc_cm_degamma && ret != -EINVAL){
+		drm_dbg_kms(crtc->base.crtc->dev,
+			    "doesn't support plane and CRTC degamma at the same time\n");
+			return -EINVAL;
+	}
+
 	/* If we are here, it means we don't have plane degamma settings, check
 	 * if we have CRTC degamma waiting for mapping to pre-blending degamma
 	 * block
-- 
GitLab


From 889044f9e04f0829dd92640c551941bbe77bc0ea Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:01 -0100
Subject: [PATCH 1129/2340] drm/amd/display: add dc_fixpt_from_s3132 helper

Detach value translation from CTM to reuse it for programming HDR
multiplier property.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c  |  8 +-------
 drivers/gpu/drm/amd/display/include/fixed31_32.h     | 12 ++++++++++++
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 8fc715fa426cf..c160dbef52249 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -405,7 +405,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
 static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 				   struct fixed31_32 *matrix)
 {
-	int64_t val;
 	int i;
 
 	/*
@@ -424,12 +423,7 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 		}
 
 		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-		val = ctm->matrix[i - (i / 4)];
-		/* If negative, convert to 2's complement. */
-		if (val & (1ULL << 63))
-			val = -(val & ~(1ULL << 63));
-
-		matrix[i].value = val;
+		matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index d4cf7ead1d877..84da1dd34efd1 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
 static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL };
 static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL };
 
+static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x)
+{
+	struct fixed31_32 val;
+
+	/* If negative, convert to 2's complement. */
+	if (x & (1ULL << 63))
+		x = -(x & ~(1ULL << 63));
+
+	val.value = x;
+	return val;
+}
+
 /*
  * @brief
  * Initialization routines
-- 
GitLab


From 4bc59ddf57c1f68ea035c4f242108f29d91797fd Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:02 -0100
Subject: [PATCH 1130/2340] drm/amd/display: add HDR multiplier support

With `dc_fixpt_from_s3132()` translation, we can just use it to set
hdr_mult.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c       | 1 +
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 911db58d0e1c6..4a1139b3cbc5c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8268,6 +8268,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
 			bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
+			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
 		}
 
 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index c160dbef52249..35f1485248276 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -927,6 +927,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 				      struct drm_plane_state *plane_state,
 				      struct dc_plane_state *dc_plane_state)
 {
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
 	bool has_crtc_cm_degamma;
 	int ret;
 
@@ -937,6 +938,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	/* After, we start to update values according to color props */
 	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
 
+	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
 	ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
 	if (ret == -ENOMEM)
 		return ret;
-- 
GitLab


From aba8b76baabde681ab4ff686452005d80d949345 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:58:03 -0100
Subject: [PATCH 1131/2340] drm/amd/display: add plane shaper LUT support

Map DC shaper LUT to DM plane color management. Shaper LUT can be used
to delinearize and/or normalize the color space for computational
efficiency and achiving specific visual styles. If a plane degamma is
apply to linearize the color space, a custom shaper 1D LUT can be used
just before applying 3D LUT.

v2:
- use DPP color caps to verify plane 3D LUT support
- add debug message if shaper LUT programming fails

v4:
- remove helper to check 3D LUT color caps (Harry)
- update desc of lut3d-setup helper from MPC to DPP

v5:
- remove color_mgmt_changed check that prevents color updates (Joshua)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 93 ++++++++++++++++++-
 3 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4a1139b3cbc5c..73b12ca2fe607 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8269,6 +8269,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
 			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
+			bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func;
 		}
 
 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 16af549d0b425..7706d412d206f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -908,6 +908,8 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 /* 3D LUT max size is 17x17x17 (4913 entries) */
 #define MAX_COLOR_3DLUT_SIZE 17
 #define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+				struct drm_plane_state *plane_state);
 /* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 35f1485248276..daa500b04cb54 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -623,6 +623,63 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 	}
 }
 
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+				       uint32_t shaper_size,
+				       struct dc_transfer_func *func_shaper)
+{
+	int ret = 0;
+
+	if (shaper_size) {
+		/*
+		 * If user shaper LUT is set, we assume a linear color space
+		 * (linearized by degamma 1D LUT or not).
+		 */
+		func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+
+		ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, false);
+	} else {
+		func_shaper->type = TF_TYPE_BYPASS;
+		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
+/**
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
+ * shaper and 3D LUTs match the hw supported size
+ * @adev: amdgpu device
+ * @crtc_state: the DRM CRTC state
+ *
+ * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
+ * newer) and if the user shaper and 3D LUTs match the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+				struct drm_plane_state *plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *shaper = NULL;
+	uint32_t exp_size, size;
+	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut;
+
+	/* shaper LUT is only available if 3D LUT color caps */
+	exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
+	shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size);
+
+	if (shaper && size != exp_size) {
+		drm_dbg(&adev->ddev,
+			"Invalid Shaper LUT size. Should be %u but got %u.\n",
+			exp_size, size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
  * @crtc_state: the DRM CRTC state
@@ -910,6 +967,30 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state,
 	return 0;
 }
 
+static int
+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
+				     struct dc_plane_state *dc_plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *shaper_lut;
+	uint32_t shaper_size;
+	int ret;
+
+	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
+	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
+	shaper_size = shaper_lut != NULL ? shaper_size : 0;
+
+	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size,
+					  dc_plane_state->in_shaper_func);
+	if (ret)
+		drm_dbg_kms(plane_state->plane->dev,
+			    "setting plane %d shaper LUT failed.\n",
+			    plane_state->plane->index);
+
+	return ret;
+}
+
 /**
  * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
  * @crtc: amdgpu_dm crtc state
@@ -927,10 +1008,16 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 				      struct drm_plane_state *plane_state,
 				      struct dc_plane_state *dc_plane_state)
 {
-	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
 	bool has_crtc_cm_degamma;
 	int ret;
 
+	ret = amdgpu_dm_verify_lut3d_size(adev, plane_state);
+	if (ret) {
+		drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n");
+		return ret;
+	}
+
 	/* Initially, we can just bypass the DGM block. */
 	dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
 	dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
@@ -938,8 +1025,6 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	/* After, we start to update values according to color props */
 	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
 
-	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
-
 	ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
 	if (ret == -ENOMEM)
 		return ret;
@@ -972,5 +1057,5 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 			return ret;
 	}
 
-	return 0;
+	return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state);
 }
-- 
GitLab


From 99de686115b00e765a5e9345e10c9d7312e4c7ea Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:58:04 -0100
Subject: [PATCH 1132/2340] drm/amd/display: add plane shaper TF support

Enable usage of predefined transfer func in addition to shaper 1D LUT.
That means we can save some complexity by just setting a predefined
curve, instead of programming a custom curve when preparing color space
for applying 3D LUT.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c   | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index daa500b04cb54..8239904f9d902 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -624,20 +624,23 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 }
 
 static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+				       bool has_rom,
+				       enum dc_transfer_func_predefined tf,
 				       uint32_t shaper_size,
 				       struct dc_transfer_func *func_shaper)
 {
 	int ret = 0;
 
-	if (shaper_size) {
+	if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
 		/*
 		 * If user shaper LUT is set, we assume a linear color space
 		 * (linearized by degamma 1D LUT or not).
 		 */
 		func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
-		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+		func_shaper->tf = tf;
+		func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
-		ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, false);
+		ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom);
 	} else {
 		func_shaper->type = TF_TYPE_BYPASS;
 		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
@@ -972,6 +975,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 				     struct dc_plane_state *dc_plane_state)
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 	const struct drm_color_lut *shaper_lut;
 	uint32_t shaper_size;
 	int ret;
@@ -980,8 +984,11 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 
 	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
 	shaper_size = shaper_lut != NULL ? shaper_size : 0;
+	shaper_tf = dm_plane_state->shaper_tf;
 
-	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size,
+	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+					  amdgpu_tf_to_dc_tf(shaper_tf),
+					  shaper_size,
 					  dc_plane_state->in_shaper_func);
 	if (ret)
 		drm_dbg_kms(plane_state->plane->dev,
-- 
GitLab


From 69a83fd3f0a86374b2fcfab1c02363495704e652 Mon Sep 17 00:00:00 2001
From: Woody Suwalski <terraluna977@gmail.com>
Date: Tue, 12 Dec 2023 18:31:34 -0500
Subject: [PATCH 1133/2340] drm/radeon: Prevent multiple debug error lines on
 suspend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix to avoid multiple debug error lines printed on every suspend by Radeon driver's debugfs.

radeon_debugfs_init() calls debugfs_create_file() for every ring.

This results in printing multiple error lines to the screen and dmesg similar to this:

[   92.378726] debugfs: File 'radeon_ring_gfx' in directory '0000:00:01.0' already present!
[   92.378732] debugfs: File 'radeon_ring_cp1' in directory '0000:00:01.0' already present!
[   92.378734] debugfs: File 'radeon_ring_cp2' in directory '0000:00:01.0' already present!
[   92.378737] debugfs: File 'radeon_ring_dma1' in directory '0000:00:01.0' already present!
[   92.378739] debugfs: File 'radeon_ring_dma2' in directory '0000:00:01.0' already present!
[   92.380775] debugfs: File 'radeon_ring_uvd' in directory '0000:00:01.0' already present!
[   92.406620] debugfs: File 'radeon_ring_vce1' in directory '0000:00:01.0' already present!
[   92.406624] debugfs: File 'radeon_ring_vce2' in directory '0000:00:01.0' already present!

Patch v1: The fix was to run lookup() for the file before trying to (re)create that debug file.
Patch v2: Call the radeon_debugfs_init() only once when radeon ring is initialized (as suggested
by Christian K. - thanks)

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Woody Suwalski <terraluna977@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index e6534fa9f1fb5..38048593bb4a3 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -413,6 +413,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 			dev_err(rdev->dev, "(%d) ring map failed\n", r);
 			return r;
 		}
+		radeon_debugfs_ring_init(rdev, ring);
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
 	ring->ring_free_dw = ring->ring_size / 4;
@@ -421,7 +422,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 		ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index;
 		ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4];
 	}
-	radeon_debugfs_ring_init(rdev, ring);
 	radeon_ring_lockup_update(rdev, ring);
 	return 0;
 }
-- 
GitLab


From 65d2765d6291a49d5cdfc0fd88ba5689ed27dbe2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 4 Dec 2023 15:51:50 +0100
Subject: [PATCH 1134/2340] drm/amdgpu: warn when there are still mappings when
 a BO is destroyed v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can only happen when there is a reference counting bug.

v2: fix typo

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d79b4ca1ecfc4..5ad03f2afdb45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1343,6 +1343,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 
 	abo = ttm_to_amdgpu_bo(bo);
 
+	WARN_ON(abo->vm_bo);
+
 	if (abo->kfd_bo)
 		amdgpu_amdkfd_release_notify(abo);
 
-- 
GitLab


From c2949a49dfe960e952400029e14751dceff79d38 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 03:27:42 +0300
Subject: [PATCH 1135/2340] drm/msm/dpu: enable writeback on SM8350

Enable WB2 hardware block, enabling writeback support on this platform.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570188/
Link: https://lore.kernel.org/r/20231203002743.1291956-3-dmitry.baryshkov@linaro.org
---
 .../drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index bed87250e68c1..194239de393e4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -30,6 +30,7 @@ static const struct dpu_mdp_cfg sm8350_mdp = {
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
 	},
 };
@@ -297,6 +298,21 @@ static const struct dpu_dsc_cfg sm8350_dsc[] = {
 	},
 };
 
+static const struct dpu_wb_cfg sm8350_wb[] = {
+	{
+		.name = "wb_2", .id = WB_2,
+		.base = 0x65000, .len = 0x2c8,
+		.features = WB_SM8250_MASK,
+		.format_list = wb2_formats,
+		.num_formats = ARRAY_SIZE(wb2_formats),
+		.clk_ctrl = DPU_CLK_CTRL_WB2,
+		.xin_id = 6,
+		.vbif_idx = VBIF_RT,
+		.maxlinewidth = 4096,
+		.intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4),
+	},
+};
+
 static const struct dpu_intf_cfg sm8350_intf[] = {
 	{
 		.name = "intf_0", .id = INTF_0,
@@ -392,6 +408,8 @@ const struct dpu_mdss_cfg dpu_sm8350_cfg = {
 	.dsc = sm8350_dsc,
 	.merge_3d_count = ARRAY_SIZE(sm8350_merge_3d),
 	.merge_3d = sm8350_merge_3d,
+	.wb_count = ARRAY_SIZE(sm8350_wb),
+	.wb = sm8350_wb,
 	.intf_count = ARRAY_SIZE(sm8350_intf),
 	.intf = sm8350_intf,
 	.vbif_count = ARRAY_SIZE(sdm845_vbif),
-- 
GitLab


From eaa647cdbf2e357b4a14903f2f1e47ed9c4f8df3 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 3 Dec 2023 03:27:43 +0300
Subject: [PATCH 1136/2340] drm/msm/dpu: enable writeback on SM8450

Enable WB2 hardware block, enabling writeback support on this platform.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/570187/
Link: https://lore.kernel.org/r/20231203002743.1291956-4-dmitry.baryshkov@linaro.org
---
 .../drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 7a647e1f729d9..a8a1d65a93af3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -31,6 +31,7 @@ static const struct dpu_mdp_cfg sm8450_mdp = {
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
 	},
 };
@@ -315,6 +316,21 @@ static const struct dpu_dsc_cfg sm8450_dsc[] = {
 	},
 };
 
+static const struct dpu_wb_cfg sm8450_wb[] = {
+	{
+		.name = "wb_2", .id = WB_2,
+		.base = 0x65000, .len = 0x2c8,
+		.features = WB_SM8250_MASK,
+		.format_list = wb2_formats,
+		.num_formats = ARRAY_SIZE(wb2_formats),
+		.clk_ctrl = DPU_CLK_CTRL_WB2,
+		.xin_id = 6,
+		.vbif_idx = VBIF_RT,
+		.maxlinewidth = 4096,
+		.intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4),
+	},
+};
+
 static const struct dpu_intf_cfg sm8450_intf[] = {
 	{
 		.name = "intf_0", .id = INTF_0,
@@ -410,6 +426,8 @@ const struct dpu_mdss_cfg dpu_sm8450_cfg = {
 	.dsc = sm8450_dsc,
 	.merge_3d_count = ARRAY_SIZE(sm8450_merge_3d),
 	.merge_3d = sm8450_merge_3d,
+	.wb_count = ARRAY_SIZE(sm8450_wb),
+	.wb = sm8450_wb,
 	.intf_count = ARRAY_SIZE(sm8450_intf),
 	.intf = sm8450_intf,
 	.vbif_count = ARRAY_SIZE(sdm845_vbif),
-- 
GitLab


From 2b72e50c62de60ad2d6bcd86aa38d4ccbdd633f2 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 11 Dec 2023 10:19:55 -0800
Subject: [PATCH 1137/2340] drm/msm/dpu: Ratelimit framedone timeout msgs

When we start getting these, we get a *lot*.  So ratelimit it to not
flood dmesg.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Marijn Suijten <marijn.suijten@somainline.org>
Patchwork: https://patchwork.freedesktop.org/patch/571584/
Link: https://lore.kernel.org/r/20231211182000.218088-1-robdclark@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 5 ++++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h     | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 250c21a4cfc7e..4ceb3421003c4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -39,6 +39,9 @@
 #define DPU_ERROR_ENC(e, fmt, ...) DPU_ERROR("enc%d " fmt,\
 		(e) ? (e)->base.base.id : -1, ##__VA_ARGS__)
 
+#define DPU_ERROR_ENC_RATELIMITED(e, fmt, ...) DPU_ERROR_RATELIMITED("enc%d " fmt,\
+		(e) ? (e)->base.base.id : -1, ##__VA_ARGS__)
+
 /*
  * Two to anticipate panels that can do cmd/vid dynamic switching
  * plan is to create all possible physical encoder types, and switch between
@@ -2279,7 +2282,7 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t)
 		return;
 	}
 
-	DPU_ERROR_ENC(dpu_enc, "frame done timeout\n");
+	DPU_ERROR_ENC_RATELIMITED(dpu_enc, "frame done timeout\n");
 
 	if (atomic_inc_return(&dpu_enc->frame_done_timeout_cnt) == 1)
 		msm_disp_snapshot_state(drm_enc->dev);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index df6271017b800..9112a702a00f2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -51,6 +51,7 @@
 	} while (0)
 
 #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__)
+#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__)
 
 /**
  * ktime_compare_safe - compare two ktime structures
-- 
GitLab


From 88806318e2c2f212e91da2c4143509db4b16c9cb Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 10 Dec 2023 02:21:24 +0300
Subject: [PATCH 1138/2340] dt-bindings: display: msm: dp: declare compatible
 string for sm8150

Add compatible string for the DisplayPort controller found on the
Qualcomm SM8150 platform.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571438/
Link: https://lore.kernel.org/r/20231209232132.3580045-2-dmitry.baryshkov@linaro.org
---
 Documentation/devicetree/bindings/display/msm/dp-controller.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
index 93ded71c52d72..ae53cbfb21932 100644
--- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@@ -29,6 +29,7 @@ properties:
           - qcom,sm8650-dp
       - items:
           - enum:
+              - qcom,sm8150-dp
               - qcom,sm8250-dp
               - qcom,sm8450-dp
               - qcom,sm8550-dp
-- 
GitLab


From c806d59695e1cdd288e11f8dcc8abdd187a3570e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 11 Dec 2023 17:54:40 +0300
Subject: [PATCH 1139/2340] drm/msm/dpu: remove extra drm_encoder_cleanup from
 the error path

The drmm handler will perform drm_encoder_cleanup() for us. Moreover if
we call drm_encoder_cleanup() manually, the drmm_encoder_alloc_release()
will spawn warnings at drivers/gpu/drm/drm_encoder.c:214. Drop these
extra drm_encoder_cleanup() calls.

Fixes: cd42c56d9c0b ("drm/msm/dpu: use drmm-managed allocation for dpu_encoder_virt")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reported-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Tested-by: Abhinav Kumar <quic_abhinavk@quicinc.com> #sm8250 CI
Patchwork: https://patchwork.freedesktop.org/patch/571562/
Link: https://lore.kernel.org/r/20231211145440.3647001-1-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index dc24fe4bb3b0c..d60edb93d4f7a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -586,7 +586,6 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev,
 		rc = msm_dp_modeset_init(priv->dp[i], dev, encoder);
 		if (rc) {
 			DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc);
-			drm_encoder_cleanup(encoder);
 			return rc;
 		}
 	}
@@ -619,7 +618,6 @@ static int _dpu_kms_initialize_hdmi(struct drm_device *dev,
 	rc = msm_hdmi_modeset_init(priv->hdmi, dev, encoder);
 	if (rc) {
 		DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc);
-		drm_encoder_cleanup(encoder);
 		return rc;
 	}
 
@@ -651,7 +649,6 @@ static int _dpu_kms_initialize_writeback(struct drm_device *dev,
 			n_formats);
 	if (rc) {
 		DPU_ERROR("dpu_writeback_init, rc = %d\n", rc);
-		drm_encoder_cleanup(encoder);
 		return rc;
 	}
 
-- 
GitLab


From 043e5b302625634f5590f3da09652cdfa1037851 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:39 -0800
Subject: [PATCH 1140/2340] drm/msm/dpu: add formats check for writeback
 encoder

In preparation for adding more formats to dpu writeback add
format validation to it to fail any unsupported formats.

changes in v4:
	- change the failure message of the API
	  drm_atomic_helper_check_wb_connector_state() to a generic
	  one in case it checks more errors later and moreoever it
	  already has debug message to indicate its failure
	- change the corresponding DPU_ERROR to DPU_DEBUG in-line with
	  other atomic_check failure messages

changes in v3:
	- rebase on top of msm-next
	- replace drm_atomic_helper_check_wb_encoder_state() with
	  drm_atomic_helper_check_wb_connector_state() due to the
	  rebase

changes in v2:
	- correct some grammar in the commit text

Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback")
Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571811/
Link: https://lore.kernel.org/r/20231212205254.12422-2-quic_abhinavk@quicinc.com
[DB: removed extra debug message]
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index bb94909caa254..798d33e3207f6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -272,6 +272,7 @@ static int dpu_encoder_phys_wb_atomic_check(
 {
 	struct drm_framebuffer *fb;
 	const struct drm_display_mode *mode = &crtc_state->mode;
+	int ret;
 
 	DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n",
 			phys_enc->hw_wb->idx, mode->name, mode->hdisplay, mode->vdisplay);
@@ -308,7 +309,7 @@ static int dpu_encoder_phys_wb_atomic_check(
 		return -EINVAL;
 	}
 
-	return 0;
+	return drm_atomic_helper_check_wb_connector_state(conn_state->connector, conn_state->state);
 }
 
 
-- 
GitLab


From ecf594453a6fbc6c06278cf815e3ece4a1b8261b Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:40 -0800
Subject: [PATCH 1141/2340] drm/msm/dpu: rename dpu_encoder_phys_wb_setup_cdp
 to match its functionality

dpu_encoder_phys_wb_setup_cdp() is not programming the chroma down
prefetch block. Its setting up the display ctl path for writeback.

Hence rename it to dpu_encoder_phys_wb_setup_ctl() to match what its
actually doing.

Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback")
Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571812/
Link: https://lore.kernel.org/r/20231212205254.12422-3-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 798d33e3207f6..9ca41de5ff7b6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -207,10 +207,10 @@ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc,
 }
 
 /**
- * dpu_encoder_phys_wb_setup_cdp - setup chroma down prefetch block
+ * dpu_encoder_phys_wb_setup_ctl - setup wb pipeline for ctl path
  * @phys_enc:Pointer to physical encoder
  */
-static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc)
+static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_hw_wb *hw_wb;
 	struct dpu_hw_ctl *ctl;
@@ -376,7 +376,7 @@ static void dpu_encoder_phys_wb_setup(
 
 	dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
 
-	dpu_encoder_phys_wb_setup_cdp(phys_enc);
+	dpu_encoder_phys_wb_setup_ctl(phys_enc);
 
 }
 
-- 
GitLab


From 79caf2f2202b9eaad3a5a726e4b33807f67d0f1b Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:41 -0800
Subject: [PATCH 1142/2340] drm/msm/dpu: fix writeback programming for YUV
 cases

For YUV cases, setting the required format bits was missed
out in the register programming. Lets fix it now in preparation
of adding YUV formats support for writeback.

changes in v2:
    - dropped the fixes tag as its not a fix but adding
      new functionality

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571814/
Link: https://lore.kernel.org/r/20231212205254.12422-4-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 1 -
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c           | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 9ca41de5ff7b6..c6e9c11b6971d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -272,7 +272,6 @@ static int dpu_encoder_phys_wb_atomic_check(
 {
 	struct drm_framebuffer *fb;
 	const struct drm_display_mode *mode = &crtc_state->mode;
-	int ret;
 
 	DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n",
 			phys_enc->hw_wb->idx, mode->name, mode->hdisplay, mode->vdisplay);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
index ed0e80616129a..e75995f7fcea9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
@@ -89,6 +89,9 @@ static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
 			dst_format |= BIT(14); /* DST_ALPHA_X */
 	}
 
+	if (DPU_FORMAT_IS_YUV(fmt))
+		dst_format |= BIT(15);
+
 	pattern = (fmt->element[3] << 24) |
 		(fmt->element[2] << 16) |
 		(fmt->element[1] << 8)  |
-- 
GitLab


From 9c4998efec47a6f92d53bdcfea2b48783e4fedb5 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:42 -0800
Subject: [PATCH 1143/2340] drm/msm/dpu: move csc matrices to dpu_hw_util

Since the type and usage of CSC matrices is spanning across DPU
lets introduce a helper to the dpu_hw_util to return the CSC
corresponding to the request type. This will help to add more
supported CSC types such as the RGB to YUV one which is used in
the case of CDM.

changes in v3:
	- drop the extra wrapper and export the matrices directly

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571816/
Link: https://lore.kernel.org/r/20231212205254.12422-5-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 30 ++++++++++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c   | 31 +--------------------
 2 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index fe083b2e5696a..aa50005042d16 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -19,6 +19,36 @@
 #define MISR_CTRL_STATUS_CLEAR          BIT(10)
 #define MISR_CTRL_FREE_RUN_MASK         BIT(31)
 
+static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
+	{
+		/* S15.16 format */
+		0x00012A00, 0x00000000, 0x00019880,
+		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
+		0x00012A00, 0x00020480, 0x00000000,
+	},
+	/* signed bias */
+	{ 0xfff0, 0xff80, 0xff80,},
+	{ 0x0, 0x0, 0x0,},
+	/* unsigned clamp */
+	{ 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,},
+	{ 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,},
+};
+
+static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
+	{
+		/* S15.16 format */
+		0x00012A00, 0x00000000, 0x00019880,
+		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
+		0x00012A00, 0x00020480, 0x00000000,
+	},
+	/* signed bias */
+	{ 0xffc0, 0xfe00, 0xfe00,},
+	{ 0x0, 0x0, 0x0,},
+	/* unsigned clamp */
+	{ 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,},
+	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
+};
+
 /*
  * This is the common struct maintained by each sub block
  * for mapping the register offsets in this block to the
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 3235ab1325400..ff975ad511456 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -21,6 +21,7 @@
 #include "dpu_kms.h"
 #include "dpu_formats.h"
 #include "dpu_hw_sspp.h"
+#include "dpu_hw_util.h"
 #include "dpu_trace.h"
 #include "dpu_crtc.h"
 #include "dpu_vbif.h"
@@ -508,36 +509,6 @@ static void _dpu_plane_setup_pixel_ext(struct dpu_hw_scaler3_cfg *scale_cfg,
 	}
 }
 
-static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
-	{
-		/* S15.16 format */
-		0x00012A00, 0x00000000, 0x00019880,
-		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
-		0x00012A00, 0x00020480, 0x00000000,
-	},
-	/* signed bias */
-	{ 0xfff0, 0xff80, 0xff80,},
-	{ 0x0, 0x0, 0x0,},
-	/* unsigned clamp */
-	{ 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,},
-	{ 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,},
-};
-
-static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
-	{
-		/* S15.16 format */
-		0x00012A00, 0x00000000, 0x00019880,
-		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
-		0x00012A00, 0x00020480, 0x00000000,
-		},
-	/* signed bias */
-	{ 0xffc0, 0xfe00, 0xfe00,},
-	{ 0x0, 0x0, 0x0,},
-	/* unsigned clamp */
-	{ 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,},
-	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
-};
-
 static const struct dpu_csc_cfg *_dpu_plane_get_csc(struct dpu_sw_pipe *pipe,
 						    const struct dpu_format *fmt)
 {
-- 
GitLab


From a5ec9a44d8a3ae09fdf6dfc12e89c8663bb631ea Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:43 -0800
Subject: [PATCH 1144/2340] drm/msm/dpu: add cdm blocks to sc7280
 dpu_hw_catalog

Add CDM blocks to the sc7280 dpu_hw_catalog to support
YUV format output from writeback block.

changes in v3:
	- change the comment from sub-blk to clk for CDM

changes in v2:
	- remove explicit zero assignment for features
	- move sc7280_cdm to dpu_hw_catalog from the sc7280
	  catalog file as its definition can be re-used

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571818/
Link: https://lore.kernel.org/r/20231212205254.12422-6-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h  |  1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c      | 10 ++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h      | 13 +++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h         |  5 +++++
 4 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 209675de6742f..19c2b74547966 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -248,6 +248,7 @@ const struct dpu_mdss_cfg dpu_sc7280_cfg = {
 	.mdss_ver = &sc7280_mdss_ver,
 	.caps = &sc7280_dpu_caps,
 	.mdp = &sc7280_mdp,
+	.cdm = &sc7280_cdm,
 	.ctl_count = ARRAY_SIZE(sc7280_ctl),
 	.ctl = sc7280_ctl,
 	.sspp_count = ARRAY_SIZE(sc7280_sspp),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index d52aae54bbd59..b304bebedb844 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -426,6 +426,16 @@ static const struct dpu_dsc_sub_blks dsc_sblk_1 = {
 	.ctl = {.name = "ctl", .base = 0xF80, .len = 0x10},
 };
 
+/*************************************************************
+ * CDM block config
+ *************************************************************/
+static const struct dpu_cdm_cfg sc7280_cdm = {
+	.name = "cdm_0",
+	.id = CDM_0,
+	.len = 0x228,
+	.base = 0x79200,
+};
+
 /*************************************************************
  * VBIF sub blocks config
  *************************************************************/
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index e3c0d007481bc..ba82ef4560a6d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -682,6 +682,17 @@ struct dpu_vbif_cfg {
 	u32 memtype[MAX_XIN_COUNT];
 };
 
+/**
+ * struct dpu_cdm_cfg - information of chroma down blocks
+ * @name               string name for debug purposes
+ * @id                 enum identifying this block
+ * @base               register offset of this block
+ * @features           bit mask identifying sub-blocks/features
+ */
+struct dpu_cdm_cfg {
+	DPU_HW_BLK_INFO;
+};
+
 /**
  * Define CDP use cases
  * @DPU_PERF_CDP_UDAGE_RT: real-time use cases
@@ -805,6 +816,8 @@ struct dpu_mdss_cfg {
 	u32 wb_count;
 	const struct dpu_wb_cfg *wb;
 
+	const struct dpu_cdm_cfg *cdm;
+
 	u32 ad_count;
 
 	u32 dspp_count;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index a6702b2bfc686..f319c8232ea5a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -185,6 +185,11 @@ enum dpu_dsc {
 	DSC_MAX
 };
 
+enum dpu_cdm {
+	CDM_0 = 1,
+	CDM_MAX
+};
+
 enum dpu_pingpong {
 	PINGPONG_NONE,
 	PINGPONG_0,
-- 
GitLab


From e1239661c9e9a628e0871f461bafebba324658d9 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:44 -0800
Subject: [PATCH 1145/2340] drm/msm/dpu: add cdm blocks to sm8250
 dpu_hw_catalog

Add CDM blocks to the sm8250 dpu_hw_catalog to support
YUV format output from writeback block.

changes in v2:
	- re-use the cdm definition from sc7280

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571819/
Link: https://lore.kernel.org/r/20231212205254.12422-7-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 2359c16e9206b..58b0f50518c8c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -384,6 +384,7 @@ const struct dpu_mdss_cfg dpu_sm8250_cfg = {
 	.mdss_ver = &sm8250_mdss_ver,
 	.caps = &sm8250_dpu_caps,
 	.mdp = &sm8250_mdp,
+	.cdm = &sc7280_cdm,
 	.ctl_count = ARRAY_SIZE(sm8250_ctl),
 	.ctl = sm8250_ctl,
 	.sspp_count = ARRAY_SIZE(sm8250_sspp),
-- 
GitLab


From 0afac0ba60242ff827f4ffe56375c421a4e69a0f Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:45 -0800
Subject: [PATCH 1146/2340] drm/msm/dpu: add dpu_hw_cdm abstraction for CDM
 block

CDM block comes with its own set of registers and operations
which can be done. In-line with other hardware blocks, this
change adds the dpu_hw_cdm abstraction for the CDM block.

changes in v4:
	- used FIELD_PREP() for dpu_hw_cdm_setup_cdwn() operations
	- change to lowercase hex in dpu_hw_cdm_bind_pingpong_blk()
	- move disable assignment inside else in dpu_hw_cdm_bind_pingpong_blk()

changes in v3:
	- fix commit text from sub-blk to blk for CDM
	- fix kbot issue for missing static for dpu_hw_cdm_enable()
	- fix kbot issue for incorrect documentation style
	- add more documentation for enums and struct in dpu_hw_cdm.h
	- drop "enable" parameter from bind_pingpong_blk() as we can
	  just use PINGPONG_NONE for disable cases
	- drop unnecessary bit operation for zero value of cdm_cfg

changes in v2:
	- replace bit magic with relevant defines
	- use drmm_kzalloc instead of kzalloc/free
	- some formatting fixes
	- inline _setup_cdm_ops()
	- protect bind_pingpong_blk with core_rev check
	- drop setup_csc_data() and setup_cdwn() ops as they
	  are merged into enable()

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312101815.B3ZH7Pfy-lkp@intel.com/
Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571824/
Link: https://lore.kernel.org/r/20231212205254.12422-8-quic_abhinavk@quicinc.com
[DB: Added linux/bitfield.h inclusion]
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/Makefile                |   1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c  | 247 ++++++++++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h  | 142 +++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h |   1 +
 4 files changed, 391 insertions(+)
 create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c
 create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 49671364fdcfb..b1173128b5b97 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -63,6 +63,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
 	disp/dpu1/dpu_encoder_phys_wb.o \
 	disp/dpu1/dpu_formats.o \
 	disp/dpu1/dpu_hw_catalog.o \
+	disp/dpu1/dpu_hw_cdm.o \
 	disp/dpu1/dpu_hw_ctl.o \
 	disp/dpu1/dpu_hw_dsc.o \
 	disp/dpu1/dpu_hw_dsc_1_2.o \
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c
new file mode 100644
index 0000000000000..e9cdc7934a499
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+
+#include <drm/drm_managed.h>
+
+#include "dpu_hw_mdss.h"
+#include "dpu_hw_util.h"
+#include "dpu_hw_catalog.h"
+#include "dpu_hw_cdm.h"
+#include "dpu_kms.h"
+
+#define CDM_CSC_10_OPMODE                  0x000
+#define CDM_CSC_10_BASE                    0x004
+
+#define CDM_CDWN2_OP_MODE                  0x100
+#define CDM_CDWN2_CLAMP_OUT                0x104
+#define CDM_CDWN2_PARAMS_3D_0              0x108
+#define CDM_CDWN2_PARAMS_3D_1              0x10C
+#define CDM_CDWN2_COEFF_COSITE_H_0         0x110
+#define CDM_CDWN2_COEFF_COSITE_H_1         0x114
+#define CDM_CDWN2_COEFF_COSITE_H_2         0x118
+#define CDM_CDWN2_COEFF_OFFSITE_H_0        0x11C
+#define CDM_CDWN2_COEFF_OFFSITE_H_1        0x120
+#define CDM_CDWN2_COEFF_OFFSITE_H_2        0x124
+#define CDM_CDWN2_COEFF_COSITE_V           0x128
+#define CDM_CDWN2_COEFF_OFFSITE_V          0x12C
+#define CDM_CDWN2_OUT_SIZE                 0x130
+
+#define CDM_HDMI_PACK_OP_MODE              0x200
+#define CDM_CSC_10_MATRIX_COEFF_0          0x004
+
+#define CDM_MUX                            0x224
+
+/* CDM CDWN2 sub-block bit definitions */
+#define CDM_CDWN2_OP_MODE_EN                  BIT(0)
+#define CDM_CDWN2_OP_MODE_ENABLE_H            BIT(1)
+#define CDM_CDWN2_OP_MODE_ENABLE_V            BIT(2)
+#define CDM_CDWN2_OP_MODE_BITS_OUT_8BIT       BIT(7)
+#define CDM_CDWN2_V_PIXEL_METHOD_MASK         GENMASK(6, 5)
+#define CDM_CDWN2_H_PIXEL_METHOD_MASK         GENMASK(4, 3)
+
+/* CDM CSC10 sub-block bit definitions */
+#define CDM_CSC10_OP_MODE_EN               BIT(0)
+#define CDM_CSC10_OP_MODE_SRC_FMT_YUV      BIT(1)
+#define CDM_CSC10_OP_MODE_DST_FMT_YUV      BIT(2)
+
+/* CDM HDMI pack sub-block bit definitions */
+#define CDM_HDMI_PACK_OP_MODE_EN           BIT(0)
+
+/*
+ * Horizontal coefficients for cosite chroma downscale
+ * s13 representation of coefficients
+ */
+static u32 cosite_h_coeff[] = {0x00000016, 0x000001cc, 0x0100009e};
+
+/*
+ * Horizontal coefficients for offsite chroma downscale
+ */
+static u32 offsite_h_coeff[] = {0x000b0005, 0x01db01eb, 0x00e40046};
+
+/*
+ * Vertical coefficients for cosite chroma downscale
+ */
+static u32 cosite_v_coeff[] = {0x00080004};
+/*
+ * Vertical coefficients for offsite chroma downscale
+ */
+static u32 offsite_v_coeff[] = {0x00060002};
+
+static int dpu_hw_cdm_setup_cdwn(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cfg)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	u32 opmode;
+	u32 out_size;
+
+	switch (cfg->h_cdwn_type) {
+	case CDM_CDWN_DISABLE:
+		opmode = 0;
+		break;
+	case CDM_CDWN_PIXEL_DROP:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_PIXEL_DROP);
+		break;
+	case CDM_CDWN_AVG:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_AVG);
+		break;
+	case CDM_CDWN_COSITE:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_COSITE);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_0,
+			      cosite_h_coeff[0]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_1,
+			      cosite_h_coeff[1]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_2,
+			      cosite_h_coeff[2]);
+		break;
+	case CDM_CDWN_OFFSITE:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, CDM_CDWN2_METHOD_OFFSITE);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_0,
+			      offsite_h_coeff[0]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_1,
+			      offsite_h_coeff[1]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_2,
+			      offsite_h_coeff[2]);
+		break;
+	default:
+		DPU_ERROR("%s invalid horz down sampling type\n", __func__);
+		return -EINVAL;
+	}
+
+	switch (cfg->v_cdwn_type) {
+	case CDM_CDWN_DISABLE:
+		/* if its only Horizontal downsample, we dont need to do anything here */
+		break;
+	case CDM_CDWN_PIXEL_DROP:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_PIXEL_DROP);
+		break;
+	case CDM_CDWN_AVG:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_AVG);
+		break;
+	case CDM_CDWN_COSITE:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_COSITE);
+		DPU_REG_WRITE(c,
+			      CDM_CDWN2_COEFF_COSITE_V,
+			      cosite_v_coeff[0]);
+		break;
+	case CDM_CDWN_OFFSITE:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_OFFSITE);
+		DPU_REG_WRITE(c,
+			      CDM_CDWN2_COEFF_OFFSITE_V,
+			      offsite_v_coeff[0]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (cfg->output_bit_depth != CDM_CDWN_OUTPUT_10BIT)
+		opmode |= CDM_CDWN2_OP_MODE_BITS_OUT_8BIT;
+
+	if (cfg->v_cdwn_type || cfg->h_cdwn_type)
+		opmode |= CDM_CDWN2_OP_MODE_EN; /* EN CDWN module */
+	else
+		opmode &= ~CDM_CDWN2_OP_MODE_EN;
+
+	out_size = (cfg->output_width & 0xFFFF) | ((cfg->output_height & 0xFFFF) << 16);
+	DPU_REG_WRITE(c, CDM_CDWN2_OUT_SIZE, out_size);
+	DPU_REG_WRITE(c, CDM_CDWN2_OP_MODE, opmode);
+	DPU_REG_WRITE(c, CDM_CDWN2_CLAMP_OUT, ((0x3FF << 16) | 0x0));
+
+	return 0;
+}
+
+static int dpu_hw_cdm_enable(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cdm)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	const struct dpu_format *fmt;
+	u32 opmode = 0;
+	u32 csc = 0;
+
+	if (!ctx || !cdm)
+		return -EINVAL;
+
+	fmt = cdm->output_fmt;
+
+	if (!DPU_FORMAT_IS_YUV(fmt))
+		return -EINVAL;
+
+	dpu_hw_csc_setup(&ctx->hw, CDM_CSC_10_MATRIX_COEFF_0, cdm->csc_cfg, true);
+	dpu_hw_cdm_setup_cdwn(ctx, cdm);
+
+	if (cdm->output_type == CDM_CDWN_OUTPUT_HDMI) {
+		if (fmt->chroma_sample != DPU_CHROMA_H1V2)
+			return -EINVAL; /*unsupported format */
+		opmode = CDM_HDMI_PACK_OP_MODE_EN;
+		opmode |= (fmt->chroma_sample << 1);
+	}
+
+	csc |= CDM_CSC10_OP_MODE_DST_FMT_YUV;
+	csc &= ~CDM_CSC10_OP_MODE_SRC_FMT_YUV;
+	csc |= CDM_CSC10_OP_MODE_EN;
+
+	if (ctx && ctx->ops.bind_pingpong_blk)
+		ctx->ops.bind_pingpong_blk(ctx, cdm->pp_id);
+
+	DPU_REG_WRITE(c, CDM_CSC_10_OPMODE, csc);
+	DPU_REG_WRITE(c, CDM_HDMI_PACK_OP_MODE, opmode);
+	return 0;
+}
+
+static void dpu_hw_cdm_bind_pingpong_blk(struct dpu_hw_cdm *ctx, const enum dpu_pingpong pp)
+{
+	struct dpu_hw_blk_reg_map *c;
+	int mux_cfg;
+
+	c = &ctx->hw;
+
+	mux_cfg = DPU_REG_READ(c, CDM_MUX);
+	mux_cfg &= ~0xf;
+
+	if (pp)
+		mux_cfg |= (pp - PINGPONG_0) & 0x7;
+	else
+		mux_cfg |= 0xf;
+
+	DPU_REG_WRITE(c, CDM_MUX, mux_cfg);
+}
+
+struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev,
+				   const struct dpu_cdm_cfg *cfg, void __iomem *addr,
+				   const struct dpu_mdss_version *mdss_rev)
+{
+	struct dpu_hw_cdm *c;
+
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return ERR_PTR(-ENOMEM);
+
+	c->hw.blk_addr = addr + cfg->base;
+	c->hw.log_mask = DPU_DBG_MASK_CDM;
+
+	/* Assign ops */
+	c->idx = cfg->id;
+	c->caps = cfg;
+
+	c->ops.enable = dpu_hw_cdm_enable;
+	if (mdss_rev->core_major_ver >= 5)
+		c->ops.bind_pingpong_blk = dpu_hw_cdm_bind_pingpong_blk;
+
+	return c;
+}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h
new file mode 100644
index 0000000000000..348424df87c66
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DPU_HW_CDM_H
+#define _DPU_HW_CDM_H
+
+#include "dpu_hw_mdss.h"
+#include "dpu_hw_top.h"
+
+struct dpu_hw_cdm;
+
+/**
+ * struct dpu_hw_cdm_cfg : current configuration of CDM block
+ *
+ *  @output_width:         output ROI width of CDM block
+ *  @output_height:        output ROI height of CDM block
+ *  @output_bit_depth:     output bit-depth of CDM block
+ *  @h_cdwn_type:          downsample type used for horizontal pixels
+ *  @v_cdwn_type:          downsample type used for vertical pixels
+ *  @output_fmt:           handle to dpu_format of CDM block
+ *  @csc_cfg:              handle to CSC matrix programmed for CDM block
+ *  @output_type:          interface to which CDM is paired (HDMI/WB)
+ *  @pp_id:                ping-pong block to which CDM is bound to
+ */
+struct dpu_hw_cdm_cfg {
+	u32 output_width;
+	u32 output_height;
+	u32 output_bit_depth;
+	u32 h_cdwn_type;
+	u32 v_cdwn_type;
+	const struct dpu_format *output_fmt;
+	const struct dpu_csc_cfg *csc_cfg;
+	u32 output_type;
+	int pp_id;
+};
+
+/*
+ * These values are used indicate which type of downsample is used
+ * in the horizontal/vertical direction for the CDM block.
+ */
+enum dpu_hw_cdwn_type {
+	CDM_CDWN_DISABLE,
+	CDM_CDWN_PIXEL_DROP,
+	CDM_CDWN_AVG,
+	CDM_CDWN_COSITE,
+	CDM_CDWN_OFFSITE,
+};
+
+/*
+ * CDM block can be paired with WB or HDMI block. These values match
+ * the input with which the CDM block is paired.
+ */
+enum dpu_hw_cdwn_output_type {
+	CDM_CDWN_OUTPUT_HDMI,
+	CDM_CDWN_OUTPUT_WB,
+};
+
+/*
+ * CDM block can give an 8-bit or 10-bit output. These values
+ * are used to indicate the output bit depth of CDM block
+ */
+enum dpu_hw_cdwn_output_bit_depth {
+	CDM_CDWN_OUTPUT_8BIT,
+	CDM_CDWN_OUTPUT_10BIT,
+};
+
+/*
+ * CDM block can downsample using different methods. These values
+ * are used to indicate the downsample method which can be used
+ * either in the horizontal or vertical direction.
+ */
+enum dpu_hw_cdwn_op_mode_method_h_v {
+	CDM_CDWN2_METHOD_PIXEL_DROP,
+	CDM_CDWN2_METHOD_AVG,
+	CDM_CDWN2_METHOD_COSITE,
+	CDM_CDWN2_METHOD_OFFSITE
+};
+
+/**
+ * struct dpu_hw_cdm_ops : Interface to the chroma down Hw driver functions
+ *                         Assumption is these functions will be called after
+ *                         clocks are enabled
+ *  @enable:               Enables the output to interface and programs the
+ *                         output packer
+ *  @bind_pingpong_blk:    enable/disable the connection with pingpong which
+ *                         will feed pixels to this cdm
+ */
+struct dpu_hw_cdm_ops {
+	/**
+	 * Enable the CDM module
+	 * @cdm         Pointer to chroma down context
+	 */
+	int (*enable)(struct dpu_hw_cdm *cdm, struct dpu_hw_cdm_cfg *cfg);
+
+	/**
+	 * Enable/disable the connection with pingpong
+	 * @cdm         Pointer to chroma down context
+	 * @pp          pingpong block id.
+	 */
+	void (*bind_pingpong_blk)(struct dpu_hw_cdm *cdm, const enum dpu_pingpong pp);
+};
+
+/**
+ * struct dpu_hw_cdm - cdm description
+ * @base: Hardware block base structure
+ * @hw: Block hardware details
+ * @idx: CDM index
+ * @caps: Pointer to cdm_cfg
+ * @ops: handle to operations possible for this CDM
+ */
+struct dpu_hw_cdm {
+	struct dpu_hw_blk base;
+	struct dpu_hw_blk_reg_map hw;
+
+	/* chroma down */
+	const struct dpu_cdm_cfg *caps;
+	enum  dpu_cdm  idx;
+
+	/* ops */
+	struct dpu_hw_cdm_ops ops;
+};
+
+/**
+ * dpu_hw_cdm_init - initializes the cdm hw driver object.
+ * should be called once before accessing every cdm.
+ * @dev: DRM device handle
+ * @cdm: CDM catalog entry for which driver object is required
+ * @addr :   mapped register io address of MDSS
+ * @mdss_rev: mdss hw core revision
+ */
+struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev,
+				   const struct dpu_cdm_cfg *cdm, void __iomem *addr,
+				   const struct dpu_mdss_version *mdss_rev);
+
+static inline struct dpu_hw_cdm *to_dpu_hw_cdm(struct dpu_hw_blk *hw)
+{
+	return container_of(hw, struct dpu_hw_cdm, base);
+}
+
+#endif /*_DPU_HW_CDM_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index f319c8232ea5a..9db4cf61bd29d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -466,6 +466,7 @@ struct dpu_mdss_color {
 #define DPU_DBG_MASK_ROT      (1 << 9)
 #define DPU_DBG_MASK_DSPP     (1 << 10)
 #define DPU_DBG_MASK_DSC      (1 << 11)
+#define DPU_DBG_MASK_CDM      (1 << 12)
 
 /**
  * struct dpu_hw_tear_check - Struct contains parameters to configure
-- 
GitLab


From f58a6bf404b3cd6127d022350e67560ed323ee52 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:46 -0800
Subject: [PATCH 1147/2340] drm/msm/dpu: add cdm blocks to RM

Add the RM APIs necessary to initialize and allocate CDM
blocks to be used by the rest of the DPU pipeline.

changes in v2:
	- treat cdm_init() failure as fatal
	- fixed the commit text

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571822/
Link: https://lore.kernel.org/r/20231212205254.12422-9-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 13 +++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index 0bb28cf4a6cbc..7ed476b963042 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -8,6 +8,7 @@
 #include "dpu_kms.h"
 #include "dpu_hw_lm.h"
 #include "dpu_hw_ctl.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_hw_pingpong.h"
 #include "dpu_hw_sspp.h"
 #include "dpu_hw_intf.h"
@@ -176,6 +177,18 @@ int dpu_rm_init(struct drm_device *dev,
 		rm->hw_sspp[sspp->id - SSPP_NONE] = hw;
 	}
 
+	if (cat->cdm) {
+		struct dpu_hw_cdm *hw;
+
+		hw = dpu_hw_cdm_init(dev, cat->cdm, mmio, cat->mdss_ver);
+		if (IS_ERR(hw)) {
+			rc = PTR_ERR(hw);
+			DPU_ERROR("failed cdm object creation: err %d\n", rc);
+			goto fail;
+		}
+		rm->cdm_blk = &hw->base;
+	}
+
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
index 36752d837be4c..e3f83ebc656b3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
@@ -22,6 +22,7 @@ struct dpu_global_state;
  * @hw_wb: array of wb hardware resources
  * @dspp_blks: array of dspp hardware resources
  * @hw_sspp: array of sspp hardware resources
+ * @cdm_blk: cdm hardware resource
  */
 struct dpu_rm {
 	struct dpu_hw_blk *pingpong_blks[PINGPONG_MAX - PINGPONG_0];
@@ -33,6 +34,7 @@ struct dpu_rm {
 	struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0];
 	struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0];
 	struct dpu_hw_sspp *hw_sspp[SSPP_MAX - SSPP_NONE];
+	struct dpu_hw_blk *cdm_blk;
 };
 
 /**
-- 
GitLab


From 5ef42da742e1ddf35f4a417f8117d10936f45d92 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:47 -0800
Subject: [PATCH 1148/2340] drm/msm/dpu: add support to allocate CDM from RM

Even though there is usually only one CDM block, it can be
used by either HDMI, DisplayPort OR Writeback interfaces.

Hence its allocation needs to be tracked properly by the
resource manager to ensure appropriate availability of the
block.

changes in v2:
	- move needs_cdm to topology struct

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571827/
Link: https://lore.kernel.org/r/20231212205254.12422-10-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h |  1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h     |  1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c      | 38 +++++++++++++++++++--
 drivers/gpu/drm/msm/msm_drv.h               |  2 ++
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index 9db4cf61bd29d..5df5459040572 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -98,6 +98,7 @@ enum dpu_hw_blk_type {
 	DPU_HW_BLK_DSPP,
 	DPU_HW_BLK_MERGE_3D,
 	DPU_HW_BLK_DSC,
+	DPU_HW_BLK_CDM,
 	DPU_HW_BLK_MAX,
 };
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 9112a702a00f2..d1207f4ec3ae5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -136,6 +136,7 @@ struct dpu_global_state {
 	uint32_t ctl_to_enc_id[CTL_MAX - CTL_0];
 	uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0];
 	uint32_t dsc_to_enc_id[DSC_MAX - DSC_0];
+	uint32_t cdm_to_enc_id;
 };
 
 struct dpu_global_state
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index 7ed476b963042..b58a9c2ae326c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -435,6 +435,26 @@ static int _dpu_rm_reserve_dsc(struct dpu_rm *rm,
 	return 0;
 }
 
+static int _dpu_rm_reserve_cdm(struct dpu_rm *rm,
+			       struct dpu_global_state *global_state,
+			       struct drm_encoder *enc)
+{
+	/* try allocating only one CDM block */
+	if (!rm->cdm_blk) {
+		DPU_ERROR("CDM block does not exist\n");
+		return -EIO;
+	}
+
+	if (global_state->cdm_to_enc_id) {
+		DPU_ERROR("CDM_0 is already allocated\n");
+		return -EIO;
+	}
+
+	global_state->cdm_to_enc_id = enc->base.id;
+
+	return 0;
+}
+
 static int _dpu_rm_make_reservation(
 		struct dpu_rm *rm,
 		struct dpu_global_state *global_state,
@@ -460,6 +480,14 @@ static int _dpu_rm_make_reservation(
 	if (ret)
 		return ret;
 
+	if (reqs->topology.needs_cdm) {
+		ret = _dpu_rm_reserve_cdm(rm, global_state, enc);
+		if (ret) {
+			DPU_ERROR("unable to find CDM blk\n");
+			return ret;
+		}
+	}
+
 	return ret;
 }
 
@@ -470,9 +498,9 @@ static int _dpu_rm_populate_requirements(
 {
 	reqs->topology = req_topology;
 
-	DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d\n",
+	DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d cdm: %d\n",
 		      reqs->topology.num_lm, reqs->topology.num_dsc,
-		      reqs->topology.num_intf);
+		      reqs->topology.num_intf, reqs->topology.needs_cdm);
 
 	return 0;
 }
@@ -501,6 +529,7 @@ void dpu_rm_release(struct dpu_global_state *global_state,
 		ARRAY_SIZE(global_state->dsc_to_enc_id), enc->base.id);
 	_dpu_rm_clear_mapping(global_state->dspp_to_enc_id,
 		ARRAY_SIZE(global_state->dspp_to_enc_id), enc->base.id);
+	_dpu_rm_clear_mapping(&global_state->cdm_to_enc_id, 1, enc->base.id);
 }
 
 int dpu_rm_reserve(
@@ -574,6 +603,11 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm,
 		hw_to_enc_id = global_state->dsc_to_enc_id;
 		max_blks = ARRAY_SIZE(rm->dsc_blks);
 		break;
+	case DPU_HW_BLK_CDM:
+		hw_blks = &rm->cdm_blk;
+		hw_to_enc_id = &global_state->cdm_to_enc_id;
+		max_blks = 1;
+		break;
 	default:
 		DPU_ERROR("blk type %d not managed by rm\n", type);
 		return 0;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index c0446fa66b98d..16a7cbc0b7dd8 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -90,12 +90,14 @@ enum msm_event_wait {
  * @num_intf:     number of interfaces the panel is mounted on
  * @num_dspp:     number of dspp blocks used
  * @num_dsc:      number of Display Stream Compression (DSC) blocks used
+ * @needs_cdm:    indicates whether cdm block is needed for this display topology
  */
 struct msm_display_topology {
 	u32 num_lm;
 	u32 num_intf;
 	u32 num_dspp;
 	u32 num_dsc;
+	bool needs_cdm;
 };
 
 /* Commit/Event thread specific structure */
-- 
GitLab


From 53d5abe67e5870142f970064b1371a10d2fe7a15 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:48 -0800
Subject: [PATCH 1149/2340] drm/msm/dpu: add CDM related logic to dpu_hw_ctl
 layer

CDM block will need its own logic to program the flush and active
bits in the dpu_hw_ctl layer.

Make necessary changes in dpu_hw_ctl to support CDM programming.

changes in v3:
	- drop unused cdm_active as reported by kbot
	- retained the R-b as its a trivial change

changes in v2:
	- remove unused empty line
	- pass in cdm_num to update_pending_flush_cdm()

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312102047.S0I69pCs-lkp@intel.com/
Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571829/
Link: https://lore.kernel.org/r/20231212205254.12422-11-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c | 33 ++++++++++++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h | 12 ++++++++
 2 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
index e7b680a151d6d..e76565c3e6a43 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
@@ -32,11 +32,13 @@
 #define   CTL_DSC_ACTIVE                0x0E8
 #define   CTL_WB_ACTIVE                 0x0EC
 #define   CTL_INTF_ACTIVE               0x0F4
+#define   CTL_CDM_ACTIVE                0x0F8
 #define   CTL_FETCH_PIPE_ACTIVE         0x0FC
 #define   CTL_MERGE_3D_FLUSH            0x100
 #define   CTL_DSC_FLUSH                0x104
 #define   CTL_WB_FLUSH                  0x108
 #define   CTL_INTF_FLUSH                0x110
+#define   CTL_CDM_FLUSH                0x114
 #define   CTL_INTF_MASTER               0x134
 #define   CTL_DSPP_n_FLUSH(n)           ((0x13C) + ((n) * 4))
 
@@ -46,6 +48,7 @@
 #define DPU_REG_RESET_TIMEOUT_US        2000
 #define  MERGE_3D_IDX   23
 #define  DSC_IDX        22
+#define CDM_IDX         26
 #define  INTF_IDX       31
 #define WB_IDX          16
 #define  DSPP_IDX       29  /* From DPU hw rev 7.x.x */
@@ -107,6 +110,7 @@ static inline void dpu_hw_ctl_clear_pending_flush(struct dpu_hw_ctl *ctx)
 	ctx->pending_wb_flush_mask = 0;
 	ctx->pending_merge_3d_flush_mask = 0;
 	ctx->pending_dsc_flush_mask = 0;
+	ctx->pending_cdm_flush_mask = 0;
 
 	memset(ctx->pending_dspp_flush_mask, 0,
 		sizeof(ctx->pending_dspp_flush_mask));
@@ -151,6 +155,10 @@ static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx)
 		DPU_REG_WRITE(&ctx->hw, CTL_DSC_FLUSH,
 			      ctx->pending_dsc_flush_mask);
 
+	if (ctx->pending_flush_mask & BIT(CDM_IDX))
+		DPU_REG_WRITE(&ctx->hw, CTL_CDM_FLUSH,
+			      ctx->pending_cdm_flush_mask);
+
 	DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask);
 }
 
@@ -282,6 +290,13 @@ static void dpu_hw_ctl_update_pending_flush_wb(struct dpu_hw_ctl *ctx,
 	}
 }
 
+static void dpu_hw_ctl_update_pending_flush_cdm(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num)
+{
+	/* update pending flush only if CDM_0 is flushed */
+	if (cdm_num == CDM_0)
+		ctx->pending_flush_mask |= BIT(CDM_IDX);
+}
+
 static void dpu_hw_ctl_update_pending_flush_wb_v1(struct dpu_hw_ctl *ctx,
 		enum dpu_wb wb)
 {
@@ -310,6 +325,12 @@ static void dpu_hw_ctl_update_pending_flush_dsc_v1(struct dpu_hw_ctl *ctx,
 	ctx->pending_flush_mask |= BIT(DSC_IDX);
 }
 
+static void dpu_hw_ctl_update_pending_flush_cdm_v1(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num)
+{
+	ctx->pending_cdm_flush_mask |= BIT(cdm_num - CDM_0);
+	ctx->pending_flush_mask |= BIT(CDM_IDX);
+}
+
 static void dpu_hw_ctl_update_pending_flush_dspp(struct dpu_hw_ctl *ctx,
 	enum dpu_dspp dspp, u32 dspp_sub_blk)
 {
@@ -543,6 +564,9 @@ static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 
 	if (cfg->dsc)
 		DPU_REG_WRITE(c, CTL_DSC_ACTIVE, cfg->dsc);
+
+	if (cfg->cdm)
+		DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cfg->cdm);
 }
 
 static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx,
@@ -586,6 +610,7 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 	u32 wb_active = 0;
 	u32 merge3d_active = 0;
 	u32 dsc_active;
+	u32 cdm_active;
 
 	/*
 	 * This API resets each portion of the CTL path namely,
@@ -621,6 +646,12 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 		dsc_active &= ~cfg->dsc;
 		DPU_REG_WRITE(c, CTL_DSC_ACTIVE, dsc_active);
 	}
+
+	if (cfg->cdm) {
+		cdm_active = DPU_REG_READ(c, CTL_CDM_ACTIVE);
+		cdm_active &= ~cfg->cdm;
+		DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cdm_active);
+	}
 }
 
 static void dpu_hw_ctl_set_fetch_pipe_active(struct dpu_hw_ctl *ctx,
@@ -654,12 +685,14 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops,
 		ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb_v1;
 		ops->update_pending_flush_dsc =
 			dpu_hw_ctl_update_pending_flush_dsc_v1;
+		ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm_v1;
 	} else {
 		ops->trigger_flush = dpu_hw_ctl_trigger_flush;
 		ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg;
 		ops->update_pending_flush_intf =
 			dpu_hw_ctl_update_pending_flush_intf;
 		ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb;
+		ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm;
 	}
 	ops->clear_pending_flush = dpu_hw_ctl_clear_pending_flush;
 	ops->update_pending_flush = dpu_hw_ctl_update_pending_flush;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
index 279ebd8dfbff7..ff85b5ee0acf8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
@@ -39,6 +39,7 @@ struct dpu_hw_stage_cfg {
  * @mode_3d:               3d mux configuration
  * @merge_3d:              3d merge block used
  * @intf_mode_sel:         Interface mode, cmd / vid
+ * @cdm:                   CDM block used
  * @stream_sel:            Stream selection for multi-stream interfaces
  * @dsc:                   DSC BIT masks used
  */
@@ -48,6 +49,7 @@ struct dpu_hw_intf_cfg {
 	enum dpu_3d_blend_mode mode_3d;
 	enum dpu_merge_3d merge_3d;
 	enum dpu_ctl_mode_sel intf_mode_sel;
+	enum dpu_cdm cdm;
 	int stream_sel;
 	unsigned int dsc;
 };
@@ -166,6 +168,14 @@ struct dpu_hw_ctl_ops {
 	void (*update_pending_flush_dsc)(struct dpu_hw_ctl *ctx,
 					 enum dpu_dsc blk);
 
+	/**
+	 * OR in the given flushbits to the cached pending_(cdm_)flush_mask
+	 * No effect on hardware
+	 * @ctx: ctl path ctx pointer
+	 * @cdm_num: idx of cdm to be flushed
+	 */
+	void (*update_pending_flush_cdm)(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num);
+
 	/**
 	 * Write the value of the pending_flush_mask to hardware
 	 * @ctx       : ctl path ctx pointer
@@ -239,6 +249,7 @@ struct dpu_hw_ctl_ops {
  * @pending_intf_flush_mask: pending INTF flush
  * @pending_wb_flush_mask: pending WB flush
  * @pending_dsc_flush_mask: pending DSC flush
+ * @pending_cdm_flush_mask: pending CDM flush
  * @ops: operation list
  */
 struct dpu_hw_ctl {
@@ -256,6 +267,7 @@ struct dpu_hw_ctl {
 	u32 pending_merge_3d_flush_mask;
 	u32 pending_dspp_flush_mask[DSPP_MAX - DSPP_0];
 	u32 pending_dsc_flush_mask;
+	u32 pending_cdm_flush_mask;
 
 	/* ops */
 	struct dpu_hw_ctl_ops ops;
-- 
GitLab


From a780a82a58eccb73fda4b0712c5cc189be7c92b3 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:49 -0800
Subject: [PATCH 1150/2340] drm/msm/dpu: add an API to setup the CDM block for
 writeback

Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
the writeback encoder to setup the CDM block.

Currently, this is defined and used within the writeback's physical
encoder layer however, the function can be modified to be used to setup
the CDM block even for non-writeback interfaces.

Until those modifications are planned and made, keep it local to
writeback.

changes in v3:
	- call bind_pingpong_blk() directly as disable() is dropped
	- add dpu_csc10_rgb2yuv_601l to dpu_hw_util.h and use it
	- fix kbot error on the function doc
	- document that dpu_encoder_helper_phys_setup_cdm() doesn't handle
	  DPU_CHROMA_H1V2

changes in v2:
	- add the RGB2YUV CSC matrix to dpu util as needed by CDM
	- use dpu_hw_get_csc_cfg() to get and program CSC
	- drop usage of setup_csc_data() and setup_cdwn() cdm ops
	  as they both have been merged into enable()
	- drop reduntant hw_cdm and hw_pp checks

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312102149.qmbCdsg2-lkp@intel.com/
Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571833/
Link: https://lore.kernel.org/r/20231212205254.12422-12-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  6 ++
 .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 93 ++++++++++++++++++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   | 14 +++
 3 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index e2934a6702d1c..bdb89cbbcfb87 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -14,8 +14,10 @@
 #include "dpu_hw_intf.h"
 #include "dpu_hw_wb.h"
 #include "dpu_hw_pingpong.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_hw_ctl.h"
 #include "dpu_hw_top.h"
+#include "dpu_hw_util.h"
 #include "dpu_encoder.h"
 #include "dpu_crtc.h"
 
@@ -150,6 +152,7 @@ enum dpu_intr_idx {
  * @hw_pp:		Hardware interface to the ping pong registers
  * @hw_intf:		Hardware interface to the intf registers
  * @hw_wb:		Hardware interface to the wb registers
+ * @hw_cdm:		Hardware interface to the CDM registers
  * @dpu_kms:		Pointer to the dpu_kms top level
  * @cached_mode:	DRM mode cached at mode_set time, acted on in enable
  * @enabled:		Whether the encoder has enabled and running a mode
@@ -178,6 +181,7 @@ struct dpu_encoder_phys {
 	struct dpu_hw_pingpong *hw_pp;
 	struct dpu_hw_intf *hw_intf;
 	struct dpu_hw_wb *hw_wb;
+	struct dpu_hw_cdm *hw_cdm;
 	struct dpu_kms *dpu_kms;
 	struct drm_display_mode cached_mode;
 	enum dpu_enc_split_role split_role;
@@ -207,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
  * @wbirq_refcount:     Reference count of writeback interrupt
  * @wb_done_timeout_cnt: number of wb done irq timeout errors
  * @wb_cfg:  writeback block config to store fb related details
+ * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
  * @wb_conn: backpointer to writeback connector
  * @wb_job: backpointer to current writeback job
  * @dest:   dpu buffer layout for current writeback output buffer
@@ -216,6 +221,7 @@ struct dpu_encoder_phys_wb {
 	atomic_t wbirq_refcount;
 	int wb_done_timeout_cnt;
 	struct dpu_hw_wb_cfg wb_cfg;
+	struct dpu_hw_cdm_cfg cdm_cfg;
 	struct drm_writeback_connector *wb_conn;
 	struct drm_writeback_job *wb_job;
 	struct dpu_hw_fmt_layout dest;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index c6e9c11b6971d..9c87020c71198 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -259,6 +259,96 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 	}
 }
 
+/**
+ * dpu_encoder_helper_phys_setup_cdm - setup chroma down sampling block
+ *                                     This API does not handle DPU_CHROMA_H1V2.
+ * @phys_enc:Pointer to physical encoder
+ */
+static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
+{
+	struct dpu_hw_cdm *hw_cdm;
+	struct dpu_hw_cdm_cfg *cdm_cfg;
+	struct dpu_hw_pingpong *hw_pp;
+	struct dpu_encoder_phys_wb *wb_enc;
+	const struct msm_format *format;
+	const struct dpu_format *dpu_fmt;
+	struct drm_writeback_job *wb_job;
+	int ret;
+
+	if (!phys_enc)
+		return;
+
+	wb_enc = to_dpu_encoder_phys_wb(phys_enc);
+	cdm_cfg = &wb_enc->cdm_cfg;
+	hw_pp = phys_enc->hw_pp;
+	hw_cdm = phys_enc->hw_cdm;
+	wb_job = wb_enc->wb_job;
+
+	format = msm_framebuffer_format(wb_enc->wb_job->fb);
+	dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
+
+	if (!hw_cdm)
+		return;
+
+	if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
+		DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
+			  dpu_fmt->base.pixel_format);
+		if (hw_cdm->ops.bind_pingpong_blk)
+			hw_cdm->ops.bind_pingpong_blk(hw_cdm, PINGPONG_NONE);
+
+		return;
+	}
+
+	memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
+
+	cdm_cfg->output_width = wb_job->fb->width;
+	cdm_cfg->output_height = wb_job->fb->height;
+	cdm_cfg->output_fmt = dpu_fmt;
+	cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
+	cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
+			CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
+	cdm_cfg->csc_cfg = &dpu_csc10_rgb2yuv_601l;
+
+	/* enable 10 bit logic */
+	switch (cdm_cfg->output_fmt->chroma_sample) {
+	case DPU_CHROMA_RGB:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	case DPU_CHROMA_H2V1:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	case DPU_CHROMA_420:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
+		break;
+	case DPU_CHROMA_H1V2:
+	default:
+		DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
+			  DRMID(phys_enc->parent));
+		cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	}
+
+	DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
+		  DRMID(phys_enc->parent), cdm_cfg->output_width,
+		  cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
+		  cdm_cfg->output_type, cdm_cfg->output_bit_depth,
+		  cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
+
+	if (hw_cdm->ops.enable) {
+		cdm_cfg->pp_id = hw_pp->idx;
+		ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
+		if (ret < 0) {
+			DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
+				  DRMID(phys_enc->parent), ret);
+			return;
+		}
+	}
+}
+
 /**
  * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
  * @phys_enc:	Pointer to physical encoder
@@ -375,8 +465,9 @@ static void dpu_encoder_phys_wb_setup(
 
 	dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
 
-	dpu_encoder_phys_wb_setup_ctl(phys_enc);
+	dpu_encoder_helper_phys_setup_cdm(phys_enc);
 
+	dpu_encoder_phys_wb_setup_ctl(phys_enc);
 }
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index aa50005042d16..c0aaad2023da6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -19,6 +19,8 @@
 #define MISR_CTRL_STATUS_CLEAR          BIT(10)
 #define MISR_CTRL_FREE_RUN_MASK         BIT(31)
 
+#define TO_S15D16(_x_)((_x_) << 7)
+
 static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
 	{
 		/* S15.16 format */
@@ -49,6 +51,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
 	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
 };
 
+static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
+	{
+		TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
+		TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
+		TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
+	},
+	{ 0x00, 0x00, 0x00 },
+	{ 0x0040, 0x0200, 0x0200 },
+	{ 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
+	{ 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
+};
+
 /*
  * This is the common struct maintained by each sub block
  * for mapping the register offsets in this block to the
-- 
GitLab


From f88c0c8fdb6bb49fc969e5adfcf8ee69bdb753df Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:50 -0800
Subject: [PATCH 1151/2340] drm/msm/dpu: plug-in the cdm related bits to
 writeback setup

To setup and enable CDM block for the writeback pipeline, lets
add the pieces together to set the active bits and the flush
bits for the CDM block.

changes in v2:
	- passed the cdm idx to update_pending_flush_cdm()
	  (have retained the R-b as its a minor change)

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571831/
Link: https://lore.kernel.org/r/20231212205254.12422-13-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 9c87020c71198..4cd2d9e3131a4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -214,6 +214,7 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_hw_wb *hw_wb;
 	struct dpu_hw_ctl *ctl;
+	struct dpu_hw_cdm *hw_cdm;
 
 	if (!phys_enc) {
 		DPU_ERROR("invalid encoder\n");
@@ -222,6 +223,7 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 
 	hw_wb = phys_enc->hw_wb;
 	ctl = phys_enc->hw_ctl;
+	hw_cdm = phys_enc->hw_cdm;
 
 	if (test_bit(DPU_CTL_ACTIVE_CFG, &ctl->caps->features) &&
 		(phys_enc->hw_ctl &&
@@ -238,6 +240,9 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 		if (mode_3d && hw_pp && hw_pp->merge_3d)
 			intf_cfg.merge_3d = hw_pp->merge_3d->idx;
 
+		if (hw_cdm)
+			intf_cfg.cdm = hw_cdm->idx;
+
 		if (phys_enc->hw_pp->merge_3d && phys_enc->hw_pp->merge_3d->ops.setup_3d_mode)
 			phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d,
 					mode_3d);
@@ -411,6 +416,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc)
 	struct dpu_hw_wb *hw_wb;
 	struct dpu_hw_ctl *hw_ctl;
 	struct dpu_hw_pingpong *hw_pp;
+	struct dpu_hw_cdm *hw_cdm;
 	u32 pending_flush = 0;
 
 	if (!phys_enc)
@@ -419,6 +425,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc)
 	hw_wb = phys_enc->hw_wb;
 	hw_pp = phys_enc->hw_pp;
 	hw_ctl = phys_enc->hw_ctl;
+	hw_cdm = phys_enc->hw_cdm;
 
 	DPU_DEBUG("[wb:%d]\n", hw_wb->idx - WB_0);
 
@@ -434,6 +441,9 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc)
 		hw_ctl->ops.update_pending_flush_merge_3d(hw_ctl,
 				hw_pp->merge_3d->idx);
 
+	if (hw_cdm && hw_ctl->ops.update_pending_flush_cdm)
+		hw_ctl->ops.update_pending_flush_cdm(hw_ctl, hw_cdm->idx);
+
 	if (hw_ctl->ops.get_pending_flush)
 		pending_flush = hw_ctl->ops.get_pending_flush(hw_ctl);
 
-- 
GitLab


From 8b45a26f2ba9a052a8b46163fbc9d0d8739ca15d Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:51 -0800
Subject: [PATCH 1152/2340] drm/msm/dpu: reserve cdm blocks for writeback in
 case of YUV output

Reserve CDM blocks for writeback if the format of the output fb
is YUV. At the moment, the reservation is done only for writeback
but can easily be extended by relaxing the checks once other
interfaces are ready to output YUV.

changes in v3:
	- squash CDM disable during encoder cleanup into this change

changes in v2:
	- use needs_cdm from topology struct
	- drop fb related checks from atomic_mode_set()

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571838/
Link: https://lore.kernel.org/r/20231212205254.12422-14-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 37 +++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 4ceb3421003c4..54042754f0112 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -16,6 +16,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_file.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_framebuffer.h>
 
 #include "msm_drv.h"
 #include "dpu_kms.h"
@@ -26,6 +27,7 @@
 #include "dpu_hw_dspp.h"
 #include "dpu_hw_dsc.h"
 #include "dpu_hw_merge3d.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_formats.h"
 #include "dpu_encoder_phys.h"
 #include "dpu_crtc.h"
@@ -584,6 +586,7 @@ static int dpu_encoder_virt_atomic_check(
 	struct drm_display_mode *adj_mode;
 	struct msm_display_topology topology;
 	struct dpu_global_state *global_state;
+	struct drm_framebuffer *fb;
 	struct drm_dsc_config *dsc;
 	int i = 0;
 	int ret = 0;
@@ -624,6 +627,22 @@ static int dpu_encoder_virt_atomic_check(
 
 	topology = dpu_encoder_get_topology(dpu_enc, dpu_kms, adj_mode, crtc_state, dsc);
 
+	/*
+	 * Use CDM only for writeback at the moment as other interfaces cannot handle it.
+	 * if writeback itself cannot handle cdm for some reason it will fail in its atomic_check()
+	 * earlier.
+	 */
+	if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) {
+		fb = conn_state->writeback_job->fb;
+
+		if (fb && DPU_FORMAT_IS_YUV(to_dpu_format(msm_framebuffer_format(fb))))
+			topology.needs_cdm = true;
+		if (topology.needs_cdm && !dpu_enc->cur_master->hw_cdm)
+			crtc_state->mode_changed = true;
+		else if (!topology.needs_cdm && dpu_enc->cur_master->hw_cdm)
+			crtc_state->mode_changed = true;
+	}
+
 	/*
 	 * Release and Allocate resources on every modeset
 	 * Dont allocate when active is false.
@@ -1064,6 +1083,15 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
 
 	dpu_enc->dsc_mask = dsc_mask;
 
+	if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) {
+		struct dpu_hw_blk *hw_cdm = NULL;
+
+		dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
+					      drm_enc->base.id, DPU_HW_BLK_CDM,
+					      &hw_cdm, 1);
+		dpu_enc->cur_master->hw_cdm = hw_cdm ? to_dpu_hw_cdm(hw_cdm) : NULL;
+	}
+
 	cstate = to_dpu_crtc_state(crtc_state);
 
 	for (i = 0; i < num_lm; i++) {
@@ -2052,6 +2080,15 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
 					phys_enc->hw_pp->merge_3d->idx);
 	}
 
+	if (phys_enc->hw_cdm) {
+		if (phys_enc->hw_cdm->ops.bind_pingpong_blk && phys_enc->hw_pp)
+			phys_enc->hw_cdm->ops.bind_pingpong_blk(phys_enc->hw_cdm,
+								PINGPONG_NONE);
+		if (phys_enc->hw_ctl->ops.update_pending_flush_cdm)
+			phys_enc->hw_ctl->ops.update_pending_flush_cdm(phys_enc->hw_ctl,
+								       phys_enc->hw_cdm->idx);
+	}
+
 	if (dpu_enc->dsc) {
 		dpu_encoder_unprep_dsc(dpu_enc);
 		dpu_enc->dsc = NULL;
-- 
GitLab


From 8c16b988ba2d3dfc38fc8def0a9ccaefa50debd4 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:52 -0800
Subject: [PATCH 1153/2340] drm/msm/dpu: introduce separate wb2_format arrays
 for rgb and yuv

Lets rename the existing wb2_formats array wb2_formats_rgb to indicate
that it has only RGB formats and can be used on any chipset having a WB
block.

Introduce a new wb2_formats_rgb_yuv array to the catalog to
indicate support for YUV formats to writeback in addition to RGB.

Chipsets which have support for CDM block will use the newly added
wb2_formats_rgb_yuv array.

changes in v3:
	- change type of wb2_formats_rgb/wb2_formats_rgb_yuv to u32
	  to fix checkpatch warnings

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571837/
Link: https://lore.kernel.org/r/20231212205254.12422-15-quic_abhinavk@quicinc.com
[DB: fixed newer catalog entries]
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 .../msm/disp/dpu1/catalog/dpu_10_0_sm8650.h   |  4 +-
 .../msm/disp/dpu1/catalog/dpu_6_0_sm8250.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_6_2_sc7180.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_7_0_sm8350.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_7_2_sc7280.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_8_1_sm8450.h    |  4 +-
 .../msm/disp/dpu1/catalog/dpu_9_0_sm8550.h    |  4 +-
 .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    | 37 ++++++++++++++++++-
 8 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
index 04d2a73dd942d..eb5dfff2ec4f4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
@@ -341,8 +341,8 @@ static const struct dpu_wb_cfg sm8650_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
 		.maxlinewidth = 4096,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 58b0f50518c8c..a57d50b1f0280 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -336,8 +336,8 @@ static const struct dpu_wb_cfg sm8250_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb_yuv,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index bcfedfc8251af..7382ebb6e5b2a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -157,8 +157,8 @@ static const struct dpu_wb_cfg sc7180_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index 194239de393e4..aced16e350daa 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -303,8 +303,8 @@ static const struct dpu_wb_cfg sm8350_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 19c2b74547966..2f153e0b5c6a9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -169,8 +169,8 @@ static const struct dpu_wb_cfg sc7280_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb_yuv,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index a8a1d65a93af3..a1779c5597ae7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -321,8 +321,8 @@ static const struct dpu_wb_cfg sm8450_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
index bf56265967c0f..ad48defa154f7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -315,8 +315,8 @@ static const struct dpu_wb_cfg sm8550_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
 		.maxlinewidth = 4096,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index b304bebedb844..54e8717403a0a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -202,7 +202,7 @@ static const u32 rotation_v2_formats[] = {
 	/* TODO add formats after validation */
 };
 
-static const uint32_t wb2_formats[] = {
+static const u32 wb2_formats_rgb[] = {
 	DRM_FORMAT_RGB565,
 	DRM_FORMAT_BGR565,
 	DRM_FORMAT_RGB888,
@@ -236,6 +236,41 @@ static const uint32_t wb2_formats[] = {
 	DRM_FORMAT_XBGR4444,
 };
 
+static const u32 wb2_formats_rgb_yuv[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_RGBA4444,
+	DRM_FORMAT_RGBX4444,
+	DRM_FORMAT_XRGB4444,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_BGRA4444,
+	DRM_FORMAT_BGRX4444,
+	DRM_FORMAT_XBGR4444,
+	DRM_FORMAT_NV12,
+};
+
 /*************************************************************
  * SSPP sub blocks config
  *************************************************************/
-- 
GitLab


From 341fb24a67663ec0e6b8cd012a3892d92d133349 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <quic_abhinavk@quicinc.com>
Date: Tue, 12 Dec 2023 12:52:53 -0800
Subject: [PATCH 1154/2340] drm/msm/dpu: add cdm blocks to dpu snapshot

Now that CDM block support has been added to DPU lets also add its
entry to the DPU snapshot to help debugging.

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571835/
Link: https://lore.kernel.org/r/20231212205254.12422-16-quic_abhinavk@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index d60edb93d4f7a..723cc1d821431 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -944,6 +944,10 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k
 		}
 	}
 
+	if (cat->cdm)
+		msm_disp_snapshot_add_block(disp_state, cat->cdm->len,
+					    dpu_kms->mmio + cat->cdm->base, cat->cdm->name);
+
 	pm_runtime_put_sync(&dpu_kms->pdev->dev);
 }
 
-- 
GitLab


From 45284ff733e4caf6c118aae5131eb7e7cf3eea5a Mon Sep 17 00:00:00 2001
From: Paloma Arellano <quic_parellan@quicinc.com>
Date: Tue, 12 Dec 2023 15:10:58 -0800
Subject: [PATCH 1155/2340] drm/msm/dpu: Add mutex lock in control vblank irq

Add a mutex lock to control vblank irq to synchronize vblank
enable/disable operations happening from different threads to prevent
race conditions while registering/unregistering the vblank irq callback.

v4: -Removed vblank_ctl_lock from dpu_encoder_virt, so it is only a
    parameter of dpu_encoder_phys.
    -Switch from atomic refcnt to a simple int counter as mutex has
    now been added
v3: Mistakenly did not change wording in last version. It is done now.
v2: Slightly changed wording of commit message

Signed-off-by: Paloma Arellano <quic_parellan@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/571854/
Link: https://lore.kernel.org/r/20231212231101.9240-2-quic_parellan@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c   |  1 -
 .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  4 ++-
 .../drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c  | 32 ++++++++++++------
 .../drm/msm/disp/dpu1/dpu_encoder_phys_vid.c  | 33 ++++++++++++-------
 4 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 54042754f0112..000d13fad0f7a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2464,7 +2464,6 @@ void dpu_encoder_phys_init(struct dpu_encoder_phys *phys_enc,
 	phys_enc->enc_spinlock = p->enc_spinlock;
 	phys_enc->enable_state = DPU_ENC_DISABLED;
 
-	atomic_set(&phys_enc->vblank_refcount, 0);
 	atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 	atomic_set(&phys_enc->pending_ctlstart_cnt, 0);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index bdb89cbbcfb87..993f263433314 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -155,6 +155,7 @@ enum dpu_intr_idx {
  * @hw_cdm:		Hardware interface to the CDM registers
  * @dpu_kms:		Pointer to the dpu_kms top level
  * @cached_mode:	DRM mode cached at mode_set time, acted on in enable
+ * @vblank_ctl_lock:	Vblank ctl mutex lock to protect vblank_refcount
  * @enabled:		Whether the encoder has enabled and running a mode
  * @split_role:		Role to play in a split-panel configuration
  * @intf_mode:		Interface mode
@@ -184,11 +185,12 @@ struct dpu_encoder_phys {
 	struct dpu_hw_cdm *hw_cdm;
 	struct dpu_kms *dpu_kms;
 	struct drm_display_mode cached_mode;
+	struct mutex vblank_ctl_lock;
 	enum dpu_enc_split_role split_role;
 	enum dpu_intf_mode intf_mode;
 	spinlock_t *enc_spinlock;
 	enum dpu_enc_enable_state enable_state;
-	atomic_t vblank_refcount;
+	int vblank_refcount;
 	atomic_t vsync_cnt;
 	atomic_t underrun_cnt;
 	atomic_t pending_ctlstart_cnt;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index 76f1f66e41cd0..a301e2833177a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -246,7 +246,8 @@ static int dpu_encoder_phys_cmd_control_vblank_irq(
 		return -EINVAL;
 	}
 
-	refcount = atomic_read(&phys_enc->vblank_refcount);
+	mutex_lock(&phys_enc->vblank_ctl_lock);
+	refcount = phys_enc->vblank_refcount;
 
 	/* Slave encoders don't report vblank */
 	if (!dpu_encoder_phys_cmd_is_master(phys_enc))
@@ -262,16 +263,24 @@ static int dpu_encoder_phys_cmd_control_vblank_irq(
 		      phys_enc->hw_pp->idx - PINGPONG_0,
 		      enable ? "true" : "false", refcount);
 
-	if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1)
-		ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_RDPTR],
-				dpu_encoder_phys_cmd_te_rd_ptr_irq,
-				phys_enc);
-	else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0)
-		ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_RDPTR]);
+	if (enable) {
+		if (phys_enc->vblank_refcount == 0)
+			ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_RDPTR],
+					dpu_encoder_phys_cmd_te_rd_ptr_irq,
+					phys_enc);
+		if (!ret)
+			phys_enc->vblank_refcount++;
+	} else if (!enable) {
+		if (phys_enc->vblank_refcount == 1)
+			ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_RDPTR]);
+		if (!ret)
+			phys_enc->vblank_refcount--;
+	}
 
 end:
+	mutex_unlock(&phys_enc->vblank_ctl_lock);
 	if (ret) {
 		DRM_ERROR("vblank irq err id:%u pp:%d ret:%d, enable %s/%d\n",
 			  DRMID(phys_enc->parent),
@@ -287,7 +296,7 @@ static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc,
 {
 	trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent),
 			phys_enc->hw_pp->idx - PINGPONG_0,
-			enable, atomic_read(&phys_enc->vblank_refcount));
+			enable, phys_enc->vblank_refcount);
 
 	if (enable) {
 		dpu_core_irq_register_callback(phys_enc->dpu_kms,
@@ -728,6 +737,9 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev,
 
 	dpu_encoder_phys_init(phys_enc, p);
 
+	mutex_init(&phys_enc->vblank_ctl_lock);
+	phys_enc->vblank_refcount = 0;
+
 	dpu_encoder_phys_cmd_init_ops(&phys_enc->ops);
 	phys_enc->intf_mode = INTF_MODE_CMD;
 	cmd_enc->stream_sel = 0;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 2b4e5b5eff447..d0f56c5c4cce9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -366,7 +366,8 @@ static int dpu_encoder_phys_vid_control_vblank_irq(
 	int ret = 0;
 	int refcount;
 
-	refcount = atomic_read(&phys_enc->vblank_refcount);
+	mutex_lock(&phys_enc->vblank_ctl_lock);
+	refcount = phys_enc->vblank_refcount;
 
 	/* Slave encoders don't report vblank */
 	if (!dpu_encoder_phys_vid_is_master(phys_enc))
@@ -379,18 +380,26 @@ static int dpu_encoder_phys_vid_control_vblank_irq(
 	}
 
 	DRM_DEBUG_VBL("id:%u enable=%d/%d\n", DRMID(phys_enc->parent), enable,
-		      atomic_read(&phys_enc->vblank_refcount));
+		      refcount);
 
-	if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1)
-		ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_VSYNC],
-				dpu_encoder_phys_vid_vblank_irq,
-				phys_enc);
-	else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0)
-		ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_VSYNC]);
+	if (enable) {
+		if (phys_enc->vblank_refcount == 0)
+			ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_VSYNC],
+					dpu_encoder_phys_vid_vblank_irq,
+					phys_enc);
+		if (!ret)
+			phys_enc->vblank_refcount++;
+	} else if (!enable) {
+		if (phys_enc->vblank_refcount == 1)
+			ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_VSYNC]);
+		if (!ret)
+			phys_enc->vblank_refcount--;
+	}
 
 end:
+	mutex_unlock(&phys_enc->vblank_ctl_lock);
 	if (ret) {
 		DRM_ERROR("failed: id:%u intf:%d ret:%d enable:%d refcnt:%d\n",
 			  DRMID(phys_enc->parent),
@@ -614,7 +623,7 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc,
 	trace_dpu_enc_phys_vid_irq_ctrl(DRMID(phys_enc->parent),
 			    phys_enc->hw_intf->idx - INTF_0,
 			    enable,
-			    atomic_read(&phys_enc->vblank_refcount));
+			   phys_enc->vblank_refcount);
 
 	if (enable) {
 		ret = dpu_encoder_phys_vid_control_vblank_irq(phys_enc, true);
@@ -707,6 +716,8 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev,
 	DPU_DEBUG_VIDENC(phys_enc, "\n");
 
 	dpu_encoder_phys_init(phys_enc, p);
+	mutex_init(&phys_enc->vblank_ctl_lock);
+	phys_enc->vblank_refcount = 0;
 
 	dpu_encoder_phys_vid_init_ops(&phys_enc->ops);
 	phys_enc->intf_mode = INTF_MODE_VIDEO;
-- 
GitLab


From aee797df03c69ae39822848ac36e8fd6ed41cf4e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Wed, 13 Dec 2023 02:57:28 +0200
Subject: [PATCH 1156/2340] drm/msm/dpu: move CSC tables to dpu_hw_util.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move CSC tables out of the header file to fix following kind of warnings:

In file included from drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h:8,
                 from drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c:5:
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h:54:33: warning: ‘dpu_csc10_rgb2yuv_601l’ defined but not used [-Wunused-const-variable=]
   54 | static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
      |                                 ^~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h:39:33: warning: ‘dpu_csc10_YUV2RGB_601L’ defined but not used [-Wunused-const-variable=]
   39 | static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
      |                                 ^~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h:24:33: warning: ‘dpu_csc_YUV2RGB_601L’ defined but not used [-Wunused-const-variable=]
   24 | static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
      |                                 ^~~~~~~~~~~~~~~~~~~~

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/571869/
Link: https://lore.kernel.org/r/20231213005728.53060-1-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 44 +++++++++++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 44 ++-------------------
 2 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
index 0b05061e3e621..395fdcea28b9b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
@@ -561,3 +561,47 @@ bool dpu_hw_clk_force_ctrl(struct dpu_hw_blk_reg_map *c,
 
 	return clk_forced_on;
 }
+
+#define TO_S15D16(_x_)((_x_) << 7)
+
+const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
+	{
+		/* S15.16 format */
+		0x00012A00, 0x00000000, 0x00019880,
+		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
+		0x00012A00, 0x00020480, 0x00000000,
+	},
+	/* signed bias */
+	{ 0xfff0, 0xff80, 0xff80,},
+	{ 0x0, 0x0, 0x0,},
+	/* unsigned clamp */
+	{ 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,},
+	{ 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,},
+};
+
+const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
+	{
+		/* S15.16 format */
+		0x00012A00, 0x00000000, 0x00019880,
+		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
+		0x00012A00, 0x00020480, 0x00000000,
+	},
+	/* signed bias */
+	{ 0xffc0, 0xfe00, 0xfe00,},
+	{ 0x0, 0x0, 0x0,},
+	/* unsigned clamp */
+	{ 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,},
+	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
+};
+
+const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
+	{
+		TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
+		TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
+		TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
+	},
+	{ 0x00, 0x00, 0x00 },
+	{ 0x0040, 0x0200, 0x0200 },
+	{ 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
+	{ 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
+};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index c0aaad2023da6..25cc13b57756b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -21,47 +21,9 @@
 
 #define TO_S15D16(_x_)((_x_) << 7)
 
-static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
-	{
-		/* S15.16 format */
-		0x00012A00, 0x00000000, 0x00019880,
-		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
-		0x00012A00, 0x00020480, 0x00000000,
-	},
-	/* signed bias */
-	{ 0xfff0, 0xff80, 0xff80,},
-	{ 0x0, 0x0, 0x0,},
-	/* unsigned clamp */
-	{ 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,},
-	{ 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,},
-};
-
-static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
-	{
-		/* S15.16 format */
-		0x00012A00, 0x00000000, 0x00019880,
-		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
-		0x00012A00, 0x00020480, 0x00000000,
-	},
-	/* signed bias */
-	{ 0xffc0, 0xfe00, 0xfe00,},
-	{ 0x0, 0x0, 0x0,},
-	/* unsigned clamp */
-	{ 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,},
-	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
-};
-
-static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
-	{
-		TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
-		TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
-		TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
-	},
-	{ 0x00, 0x00, 0x00 },
-	{ 0x0040, 0x0200, 0x0200 },
-	{ 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
-	{ 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
-};
+extern const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L;
+extern const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L;
+extern const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l;
 
 /*
  * This is the common struct maintained by each sub block
-- 
GitLab


From 980fffd0c69e5df0f67ee089d405899d532aeeab Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Wed, 13 Dec 2023 13:30:17 -0800
Subject: [PATCH 1157/2340] drm/msm/dpu: Set input_sel bit for INTF

Set the input_sel bit for encoders as it was missed in the initial
implementation.

Reported-by: Rob Clark <robdclark@gmail.com>
Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/39
Fixes: 91143873a05d ("drm/msm/dpu: Add MISR register support for interface")
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/572007/
Link: https://lore.kernel.org/r/20231213-encoder-fixup-v4-1-6da6cd1bf118@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c | 2 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c   | 2 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 9 +++++++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 3 ++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index 0b6a0a7dcc392..226133af78407 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -322,7 +322,7 @@ static u32 dpu_hw_intf_get_line_count(struct dpu_hw_intf *intf)
 
 static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf, bool enable, u32 frame_count)
 {
-	dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count);
+	dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count, 0x1);
 }
 
 static int dpu_hw_intf_collect_misr(struct dpu_hw_intf *intf, u32 *misr_value)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
index 25af52ab602f5..bbc9756ecde9f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
@@ -85,7 +85,7 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx,
 
 static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count)
 {
-	dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count);
+	dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count, 0x0);
 }
 
 static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
index 395fdcea28b9b..6971ddc8679f2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
@@ -475,9 +475,13 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset,
 		      cfg->danger_safe_en ? QOS_QOS_CTRL_DANGER_SAFE_EN : 0);
 }
 
+/*
+ * note: Aside from encoders, input_sel should be set to 0x0 by default
+ */
 void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
 		u32 misr_ctrl_offset,
-		bool enable, u32 frame_count)
+		bool enable, u32 frame_count,
+		u8 input_sel)
 {
 	u32 config = 0;
 
@@ -488,7 +492,8 @@ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
 
 	if (enable) {
 		config = (frame_count & MISR_FRAME_COUNT_MASK) |
-			MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK;
+			MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK |
+			((input_sel & 0xF) << 24);
 
 		DPU_REG_WRITE(c, misr_ctrl_offset, config);
 	} else {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index 25cc13b57756b..ddb3da883e322 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -363,7 +363,8 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset,
 void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
 		u32 misr_ctrl_offset,
 		bool enable,
-		u32 frame_count);
+		u32 frame_count,
+		u8 input_sel);
 
 int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c,
 		u32 misr_ctrl_offset,
-- 
GitLab


From 3313c23f3eab698bc6b904520ee608fc0f7b03d0 Mon Sep 17 00:00:00 2001
From: Jessica Zhang <quic_jesszhan@quicinc.com>
Date: Wed, 13 Dec 2023 13:30:18 -0800
Subject: [PATCH 1158/2340] drm/msm/dpu: Drop enable and frame_count parameters
 from dpu_hw_setup_misr()

Drop the enable and frame_count parameters from dpu_hw_setup_misr() as they
are always set to the same values.

In addition, replace MISR_FRAME_COUNT_MASK with MISR_FRAME_COUNT as
frame_count is always set to the same value.

Fixes: 7b37523fb1d1 ("drm/msm/dpu: Move MISR methods to dpu_hw_util")
Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/572009/
Link: https://lore.kernel.org/r/20231213-encoder-fixup-v4-2-6da6cd1bf118@quicinc.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c    |  4 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c |  4 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c |  6 +++---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h |  4 ++--
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c   |  6 +++---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h   |  3 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 19 +++++--------------
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h |  9 +++------
 8 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index a798c10036e1e..88c2e51ab1662 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved.
  * Copyright (C) 2013 Red Hat
  * Author: Rob Clark <robdclark@gmail.com>
@@ -114,7 +114,7 @@ static void dpu_crtc_setup_lm_misr(struct dpu_crtc_state *crtc_state)
 			continue;
 
 		/* Calculate MISR over 1 frame */
-		m->hw_lm->ops.setup_misr(m->hw_lm, true, 1);
+		m->hw_lm->ops.setup_misr(m->hw_lm);
 	}
 }
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 000d13fad0f7a..83380bc92a00a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2,7 +2,7 @@
 /*
  * Copyright (C) 2013 Red Hat
  * Copyright (c) 2014-2018, 2020-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  *
  * Author: Rob Clark <robdclark@gmail.com>
  */
@@ -262,7 +262,7 @@ void dpu_encoder_setup_misr(const struct drm_encoder *drm_enc)
 		if (!phys->hw_intf || !phys->hw_intf->ops.setup_misr)
 			continue;
 
-		phys->hw_intf->ops.setup_misr(phys->hw_intf, true, 1);
+		phys->hw_intf->ops.setup_misr(phys->hw_intf);
 	}
 }
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index 226133af78407..6bba531d6dc41 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
@@ -320,9 +320,9 @@ static u32 dpu_hw_intf_get_line_count(struct dpu_hw_intf *intf)
 	return DPU_REG_READ(c, INTF_LINE_COUNT);
 }
 
-static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf, bool enable, u32 frame_count)
+static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf)
 {
-	dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count, 0x1);
+	dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, 0x1);
 }
 
 static int dpu_hw_intf_collect_misr(struct dpu_hw_intf *intf, u32 *misr_value)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
index 215401bb042eb..0bd57a32144a6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
@@ -95,7 +95,7 @@ struct dpu_hw_intf_ops {
 
 	void (*bind_pingpong_blk)(struct dpu_hw_intf *intf,
 			const enum dpu_pingpong pp);
-	void (*setup_misr)(struct dpu_hw_intf *intf, bool enable, u32 frame_count);
+	void (*setup_misr)(struct dpu_hw_intf *intf);
 	int (*collect_misr)(struct dpu_hw_intf *intf, u32 *misr_value);
 
 	// Tearcheck on INTF since DPU 5.0.0
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
index bbc9756ecde9f..1d3ccf3228c62 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
@@ -83,9 +83,9 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx,
 	}
 }
 
-static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count)
+static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx)
 {
-	dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count, 0x0);
+	dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, 0x0);
 }
 
 static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
index 8835fd1064131..0a33817552499 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
@@ -57,7 +58,7 @@ struct dpu_hw_lm_ops {
 	/**
 	 * setup_misr: Enable/disable MISR
 	 */
-	void (*setup_misr)(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count);
+	void (*setup_misr)(struct dpu_hw_mixer *ctx);
 
 	/**
 	 * collect_misr: Read MISR signature
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
index 6971ddc8679f2..dd475827314ec 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 #define pr_fmt(fmt)	"[drm:%s:%d] " fmt, __func__, __LINE__
@@ -479,9 +479,7 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset,
  * note: Aside from encoders, input_sel should be set to 0x0 by default
  */
 void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
-		u32 misr_ctrl_offset,
-		bool enable, u32 frame_count,
-		u8 input_sel)
+		u32 misr_ctrl_offset, u8 input_sel)
 {
 	u32 config = 0;
 
@@ -490,16 +488,9 @@ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
 	/* Clear old MISR value (in case it's read before a new value is calculated)*/
 	wmb();
 
-	if (enable) {
-		config = (frame_count & MISR_FRAME_COUNT_MASK) |
-			MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK |
-			((input_sel & 0xF) << 24);
-
-		DPU_REG_WRITE(c, misr_ctrl_offset, config);
-	} else {
-		DPU_REG_WRITE(c, misr_ctrl_offset, 0);
-	}
-
+	config = MISR_FRAME_COUNT | MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK |
+		((input_sel & 0xF) << 24);
+	DPU_REG_WRITE(c, misr_ctrl_offset, config);
 }
 
 int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index ddb3da883e322..64ded69fa903e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
@@ -13,7 +13,7 @@
 #include "dpu_hw_catalog.h"
 
 #define REG_MASK(n)                     ((BIT(n)) - 1)
-#define MISR_FRAME_COUNT_MASK           0xFF
+#define MISR_FRAME_COUNT                0x1
 #define MISR_CTRL_ENABLE                BIT(8)
 #define MISR_CTRL_STATUS                BIT(9)
 #define MISR_CTRL_STATUS_CLEAR          BIT(10)
@@ -361,10 +361,7 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset,
 			   const struct dpu_hw_qos_cfg *cfg);
 
 void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
-		u32 misr_ctrl_offset,
-		bool enable,
-		u32 frame_count,
-		u8 input_sel);
+		u32 misr_ctrl_offset, u8 input_sel);
 
 int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c,
 		u32 misr_ctrl_offset,
-- 
GitLab


From d4ca26ac4be0d9aea7005c40df75e6775749671b Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 7 Nov 2023 02:43:33 +0200
Subject: [PATCH 1159/2340] drm/msm/dp: call dp_display_get_next_bridge()
 during probe

The funcion dp_display_get_next_bridge() can return -EPROBE_DEFER if the
next bridge is not (yet) available. However returning -EPROBE_DEFER from
msm_dp_modeset_init() is not ideal. This leads to -EPROBE return from
component_bind, which can easily result in -EPROBE_DEFR loops.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Tested-by: Konrad Dybcio <konrad.dybcio@linaro.org> # sc8180x-primus
Reviewed-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Reviewed-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/566208/
Link: https://lore.kernel.org/r/20231107004424.2112698-1-dmitry.baryshkov@linaro.org
---
 drivers/gpu/drm/msm/dp/dp_display.c | 36 +++++++++++++++++------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 0405ff08d7622..d37d599aec273 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1210,15 +1210,27 @@ static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pde
 	return NULL;
 }
 
-static int dp_auxbus_done_probe(struct drm_dp_aux *aux)
+static int dp_display_get_next_bridge(struct msm_dp *dp);
+
+static int dp_display_probe_tail(struct device *dev)
 {
-	int rc;
+	struct msm_dp *dp = dev_get_drvdata(dev);
+	int ret;
 
-	rc = component_add(aux->dev, &dp_display_comp_ops);
-	if (rc)
-		DRM_ERROR("eDP component add failed, rc=%d\n", rc);
+	ret = dp_display_get_next_bridge(dp);
+	if (ret)
+		return ret;
 
-	return rc;
+	ret = component_add(dev, &dp_display_comp_ops);
+	if (ret)
+		DRM_ERROR("component add failed, rc=%d\n", ret);
+
+	return ret;
+}
+
+static int dp_auxbus_done_probe(struct drm_dp_aux *aux)
+{
+	return dp_display_probe_tail(aux->dev);
 }
 
 static int dp_display_probe(struct platform_device *pdev)
@@ -1293,11 +1305,9 @@ static int dp_display_probe(struct platform_device *pdev)
 			goto err;
 		}
 	} else {
-		rc = component_add(&pdev->dev, &dp_display_comp_ops);
-		if (rc) {
-			DRM_ERROR("component add failed, rc=%d\n", rc);
+		rc = dp_display_probe_tail(&pdev->dev);
+		if (rc)
 			goto err;
-		}
 	}
 
 	return rc;
@@ -1428,7 +1438,7 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 	 * For DisplayPort interfaces external bridges are optional, so
 	 * silently ignore an error if one is not present (-ENODEV).
 	 */
-	rc = devm_dp_parser_find_next_bridge(dp->drm_dev->dev, dp_priv->parser);
+	rc = devm_dp_parser_find_next_bridge(&dp->pdev->dev, dp_priv->parser);
 	if (!dp->is_edp && rc == -ENODEV)
 		return 0;
 
@@ -1448,10 +1458,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 
 	dp_priv = container_of(dp_display, struct dp_display_private, dp_display);
 
-	ret = dp_display_get_next_bridge(dp_display);
-	if (ret)
-		return ret;
-
 	ret = dp_bridge_init(dp_display, dev, encoder);
 	if (ret) {
 		DRM_DEV_ERROR(dev->dev,
-- 
GitLab


From afa5cf3175a22b719a65fc0b13dbf78196a60869 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 12 Dec 2023 20:40:14 -0800
Subject: [PATCH 1160/2340] drm/i915/uapi: fix typos/spellos and punctuation

Use "its" for possessive form instead of "it's".
Hyphenate multi-word adjectives.
Correct some spelling.
End one line of code with ';' instead of ','. The before and after
  object files are identical.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213044014.21410-1-rdunlap@infradead.org
---
 include/uapi/drm/i915_drm.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 218edb0a96f8c..fd4f9574d177a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_FENCE	 44
 
 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
- * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * user-specified buffers for post-mortem debugging of GPU hangs. See
  * EXEC_OBJECT_CAPTURE.
  */
 #define I915_PARAM_HAS_EXEC_CAPTURE	 45
@@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy {
 	 * is accurate.
 	 *
 	 * The returned dword is split into two fields to indicate both
-	 * the engine classess on which the object is being read, and the
+	 * the engine classes on which the object is being read, and the
 	 * engine class on which it is currently being written (if any).
 	 *
 	 * The low word (bits 0:15) indicate if the object is being written
@@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise {
 	__u32 handle;
 
 	/* Advice: either the buffer will be needed again in the near future,
-	 *         or wont be and could be discarded under memory pressure.
+	 *         or won't be and could be discarded under memory pressure.
 	 */
 	__u32 madv;
 
@@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info {
  * 	// enough to hold our array of engines. The kernel will fill out the
  * 	// item.length for us, which is the number of bytes we need.
  * 	//
- * 	// Alternatively a large buffer can be allocated straight away enabling
+ *	// Alternatively a large buffer can be allocated straightaway enabling
  * 	// querying in one pass, in which case item.length should contain the
  * 	// length of the provided buffer.
  * 	err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
@@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info {
  * 	// Now that we allocated the required number of bytes, we call the ioctl
  * 	// again, this time with the data_ptr pointing to our newly allocated
  * 	// blob, which the kernel can then populate with info on all engines.
- * 	item.data_ptr = (uintptr_t)&info,
+ *	item.data_ptr = (uintptr_t)&info;
  *
  * 	err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
  * 	if (err) ...
@@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info {
 /**
  * struct drm_i915_engine_info
  *
- * Describes one engine and it's capabilities as known to the driver.
+ * Describes one engine and its capabilities as known to the driver.
  */
 struct drm_i915_engine_info {
 	/** @engine: Engine class and instance. */
-- 
GitLab


From a7430e2bf950394cc44b523184338b092dc6aef6 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:35 +0800
Subject: [PATCH 1161/2340] drm/mediatek: Rename OVL_ADAPTOR_TYPE_RDMA

Rename OVL_ADAPTOR_TYPE_RDMA to OVL_ADAPTOR_TYPE_MDP_RDMA
to align the naming rule of mtk_ovl_adaptor_comp_id.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-12-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c   | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 6bf6367853fba..114eded8177e9 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -27,7 +27,7 @@
 #define MTK_OVL_ADAPTOR_LAYER_NUM 4
 
 enum mtk_ovl_adaptor_comp_type {
-	OVL_ADAPTOR_TYPE_RDMA = 0,
+	OVL_ADAPTOR_TYPE_MDP_RDMA = 0,
 	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_ETHDR,
 	OVL_ADAPTOR_TYPE_NUM,
@@ -62,20 +62,20 @@ struct mtk_disp_ovl_adaptor {
 };
 
 static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
-	[OVL_ADAPTOR_TYPE_RDMA]		= "vdo1-rdma",
+	[OVL_ADAPTOR_TYPE_MDP_RDMA]	= "vdo1-rdma",
 	[OVL_ADAPTOR_TYPE_MERGE]	= "merge",
 	[OVL_ADAPTOR_TYPE_ETHDR]	= "ethdr",
 };
 
 static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
-	[OVL_ADAPTOR_MDP_RDMA0]	= { OVL_ADAPTOR_TYPE_RDMA, 0 },
-	[OVL_ADAPTOR_MDP_RDMA1]	= { OVL_ADAPTOR_TYPE_RDMA, 1 },
-	[OVL_ADAPTOR_MDP_RDMA2]	= { OVL_ADAPTOR_TYPE_RDMA, 2 },
-	[OVL_ADAPTOR_MDP_RDMA3]	= { OVL_ADAPTOR_TYPE_RDMA, 3 },
-	[OVL_ADAPTOR_MDP_RDMA4]	= { OVL_ADAPTOR_TYPE_RDMA, 4 },
-	[OVL_ADAPTOR_MDP_RDMA5]	= { OVL_ADAPTOR_TYPE_RDMA, 5 },
-	[OVL_ADAPTOR_MDP_RDMA6]	= { OVL_ADAPTOR_TYPE_RDMA, 6 },
-	[OVL_ADAPTOR_MDP_RDMA7]	= { OVL_ADAPTOR_TYPE_RDMA, 7 },
+	[OVL_ADAPTOR_MDP_RDMA0]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 0 },
+	[OVL_ADAPTOR_MDP_RDMA1]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 1 },
+	[OVL_ADAPTOR_MDP_RDMA2]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 2 },
+	[OVL_ADAPTOR_MDP_RDMA3]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 3 },
+	[OVL_ADAPTOR_MDP_RDMA4]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 4 },
+	[OVL_ADAPTOR_MDP_RDMA5]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 5 },
+	[OVL_ADAPTOR_MDP_RDMA6]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 6 },
+	[OVL_ADAPTOR_MDP_RDMA7]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 7 },
 	[OVL_ADAPTOR_MERGE0]	= { OVL_ADAPTOR_TYPE_MERGE, 1 },
 	[OVL_ADAPTOR_MERGE1]	= { OVL_ADAPTOR_TYPE_MERGE, 2 },
 	[OVL_ADAPTOR_MERGE2]	= { OVL_ADAPTOR_TYPE_MERGE, 3 },
@@ -388,7 +388,7 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
 static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = {
 	{
 		.compatible = "mediatek,mt8195-vdo1-rdma",
-		.data = (void *)OVL_ADAPTOR_TYPE_RDMA,
+		.data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA,
 	}, {
 		.compatible = "mediatek,mt8195-disp-merge",
 		.data = (void *)OVL_ADAPTOR_TYPE_MERGE,
-- 
GitLab


From 8daf02f03ca4cf70b1f985293b8515252f9767aa Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:36 +0800
Subject: [PATCH 1162/2340] drm/mediatek: Add component ID to component match
 structure

Add component ID to component match structure so we can
configure them with a for-loop.

The main reason we do such code refactoring is that
there is a new hardware component called "Padding" since
MT8188, while MT8195 doesn't have this module, we can't
use the original logic to manage the components.

While MT8195 does not define Padding in the device tree,
the corresponding components will be NULL and being skipped
by the functions.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-13-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c   | 69 ++++++++-----------
 1 file changed, 30 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 114eded8177e9..9f569d0f52b33 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -52,6 +52,7 @@ enum mtk_ovl_adaptor_comp_id {
 
 struct ovl_adaptor_comp_match {
 	enum mtk_ovl_adaptor_comp_type type;
+	enum mtk_ddp_comp_id comp_id;
 	int alias_id;
 };
 
@@ -68,19 +69,19 @@ static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
 };
 
 static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
-	[OVL_ADAPTOR_MDP_RDMA0]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 0 },
-	[OVL_ADAPTOR_MDP_RDMA1]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 1 },
-	[OVL_ADAPTOR_MDP_RDMA2]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 2 },
-	[OVL_ADAPTOR_MDP_RDMA3]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 3 },
-	[OVL_ADAPTOR_MDP_RDMA4]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 4 },
-	[OVL_ADAPTOR_MDP_RDMA5]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 5 },
-	[OVL_ADAPTOR_MDP_RDMA6]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 6 },
-	[OVL_ADAPTOR_MDP_RDMA7]	= { OVL_ADAPTOR_TYPE_MDP_RDMA, 7 },
-	[OVL_ADAPTOR_MERGE0]	= { OVL_ADAPTOR_TYPE_MERGE, 1 },
-	[OVL_ADAPTOR_MERGE1]	= { OVL_ADAPTOR_TYPE_MERGE, 2 },
-	[OVL_ADAPTOR_MERGE2]	= { OVL_ADAPTOR_TYPE_MERGE, 3 },
-	[OVL_ADAPTOR_MERGE3]	= { OVL_ADAPTOR_TYPE_MERGE, 4 },
-	[OVL_ADAPTOR_ETHDR0]	= { OVL_ADAPTOR_TYPE_ETHDR, 0 },
+	[OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0 },
+	[OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1 },
+	[OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2 },
+	[OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3 },
+	[OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4 },
+	[OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5 },
+	[OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6 },
+	[OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7 },
+	[OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1 },
+	[OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2 },
+	[OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3 },
+	[OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4 },
+	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0 },
 };
 
 void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx,
@@ -314,36 +315,26 @@ size_t mtk_ovl_adaptor_get_num_formats(struct device *dev)
 
 void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex)
 {
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA0);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA1);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA2);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA3);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA4);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA5);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA6);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA7);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE1);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE2);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE3);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE4);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_ETHDR_MIXER);
+	int i;
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i])
+			continue;
+		mtk_mutex_add_comp(mutex, comp_matches[i].comp_id);
+	}
 }
 
 void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex)
 {
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA0);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA1);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA2);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA3);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA4);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA5);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA6);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA7);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE1);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE2);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE3);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE4);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_ETHDR_MIXER);
+	int i;
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i])
+			continue;
+		mtk_mutex_remove_comp(mutex, comp_matches[i].comp_id);
+	}
 }
 
 void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next)
-- 
GitLab


From 7bacaee4f5d0fdf00b1734c1be67cc415b3822c0 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:37 +0800
Subject: [PATCH 1163/2340] drm/mediatek: Manage component's clock with
 function pointers

By registering component related functions to the pointers,
we can easily manage them within a for-loop and simplify the
logic of clock control significantly.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-14-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c   | 89 +++++++++----------
 1 file changed, 43 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 9f569d0f52b33..a815dc8e2110c 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -54,6 +54,7 @@ struct ovl_adaptor_comp_match {
 	enum mtk_ovl_adaptor_comp_type type;
 	enum mtk_ddp_comp_id comp_id;
 	int alias_id;
+	const struct mtk_ddp_comp_funcs *funcs;
 };
 
 struct mtk_disp_ovl_adaptor {
@@ -68,20 +69,35 @@ static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
 	[OVL_ADAPTOR_TYPE_ETHDR]	= "ethdr",
 };
 
+static const struct mtk_ddp_comp_funcs ethdr = {
+	.clk_enable = mtk_ethdr_clk_enable,
+	.clk_disable = mtk_ethdr_clk_disable,
+};
+
+static const struct mtk_ddp_comp_funcs merge = {
+	.clk_enable = mtk_merge_clk_enable,
+	.clk_disable = mtk_merge_clk_disable,
+};
+
+static const struct mtk_ddp_comp_funcs rdma = {
+	.clk_enable = mtk_mdp_rdma_clk_enable,
+	.clk_disable = mtk_mdp_rdma_clk_disable,
+};
+
 static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
-	[OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0 },
-	[OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1 },
-	[OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2 },
-	[OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3 },
-	[OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4 },
-	[OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5 },
-	[OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6 },
-	[OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7 },
-	[OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1 },
-	[OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2 },
-	[OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3 },
-	[OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4 },
-	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0 },
+	[OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7, &rdma },
+	[OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1, &merge },
+	[OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge },
+	[OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge },
+	[OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge },
+	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, &ethdr },
 };
 
 void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx,
@@ -197,40 +213,25 @@ int mtk_ovl_adaptor_clk_enable(struct device *dev)
 		ret = pm_runtime_get_sync(comp);
 		if (ret < 0) {
 			dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret);
-			goto pwr_err;
+			goto error;
 		}
 	}
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
-
-		if (i < OVL_ADAPTOR_MERGE0)
-			ret = mtk_mdp_rdma_clk_enable(comp);
-		else if (i < OVL_ADAPTOR_ETHDR0)
-			ret = mtk_merge_clk_enable(comp);
-		else
-			ret = mtk_ethdr_clk_enable(comp);
+		if (!comp || !comp_matches[i].funcs->clk_enable)
+			continue;
+		ret = comp_matches[i].funcs->clk_enable(comp);
 		if (ret) {
 			dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret);
-			goto clk_err;
+			while (--i >= 0)
+				comp_matches[i].funcs->clk_disable(comp);
+			i = OVL_ADAPTOR_MERGE0;
+			goto error;
 		}
 	}
-
-	return ret;
-
-clk_err:
-	while (--i >= 0) {
-		comp = ovl_adaptor->ovl_adaptor_comp[i];
-		if (i < OVL_ADAPTOR_MERGE0)
-			mtk_mdp_rdma_clk_disable(comp);
-		else if (i < OVL_ADAPTOR_ETHDR0)
-			mtk_merge_clk_disable(comp);
-		else
-			mtk_ethdr_clk_disable(comp);
-	}
-	i = OVL_ADAPTOR_MERGE0;
-
-pwr_err:
+	return 0;
+error:
 	while (--i >= 0)
 		pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]);
 
@@ -245,15 +246,11 @@ void mtk_ovl_adaptor_clk_disable(struct device *dev)
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
-
-		if (i < OVL_ADAPTOR_MERGE0) {
-			mtk_mdp_rdma_clk_disable(comp);
+		if (!comp || !comp_matches[i].funcs->clk_disable)
+			continue;
+		comp_matches[i].funcs->clk_disable(comp);
+		if (i < OVL_ADAPTOR_MERGE0)
 			pm_runtime_put(comp);
-		} else if (i < OVL_ADAPTOR_ETHDR0) {
-			mtk_merge_clk_disable(comp);
-		} else {
-			mtk_ethdr_clk_disable(comp);
-		}
 	}
 }
 
-- 
GitLab


From b97fa2f3e19b9bbac934a2cfa9218aa67f12240e Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:38 +0800
Subject: [PATCH 1164/2340] drm/mediatek: Power on/off devices with function
 pointers

Different from OVL, OVL adaptor is a pseudo device so we didn't
define it in the device tree, consequently, pm_runtime_resume_and_get()
called by .atomic_enable() powers on no device. For this reason, we
implement a function to power on the RDMAs in OVL adaptor, and the
system will make sure the IOMMUs are powered on as well because of the
device link (iommus) in the RDMA nodes in DTS.

This patch separates power and clock management process, it would be
easier to maintain and add extensions.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-15-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_disp_drv.h       |  4 +
 .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c   | 75 +++++++++++++++----
 drivers/gpu/drm/mediatek/mtk_drm_crtc.c       | 10 +--
 drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c   |  2 +
 drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h   | 20 +++++
 drivers/gpu/drm/mediatek/mtk_mdp_rdma.c       | 16 ++++
 6 files changed, 107 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
index 1311562d25cc2..2d426775b7ea4 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
@@ -110,6 +110,8 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev,
 			     unsigned int next);
 void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev,
 				unsigned int next);
+int mtk_ovl_adaptor_power_on(struct device *dev);
+void mtk_ovl_adaptor_power_off(struct device *dev);
 int mtk_ovl_adaptor_clk_enable(struct device *dev);
 void mtk_ovl_adaptor_clk_disable(struct device *dev);
 void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
@@ -151,6 +153,8 @@ void mtk_rdma_disable_vblank(struct device *dev);
 const u32 *mtk_rdma_get_formats(struct device *dev);
 size_t mtk_rdma_get_num_formats(struct device *dev);
 
+int mtk_mdp_rdma_power_on(struct device *dev);
+void mtk_mdp_rdma_power_off(struct device *dev);
 int mtk_mdp_rdma_clk_enable(struct device *dev);
 void mtk_mdp_rdma_clk_disable(struct device *dev);
 void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index a815dc8e2110c..e00d8a395ca0a 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -80,6 +80,8 @@ static const struct mtk_ddp_comp_funcs merge = {
 };
 
 static const struct mtk_ddp_comp_funcs rdma = {
+	.power_on = mtk_mdp_rdma_power_on,
+	.power_off = mtk_mdp_rdma_power_off,
 	.clk_enable = mtk_mdp_rdma_clk_enable,
 	.clk_disable = mtk_mdp_rdma_clk_disable,
 };
@@ -201,21 +203,72 @@ void mtk_ovl_adaptor_stop(struct device *dev)
 	mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
 }
 
-int mtk_ovl_adaptor_clk_enable(struct device *dev)
+/**
+ * power_off - Power off the devices in OVL adaptor
+ * @dev: Device to be powered off
+ * @num: Number of the devices to be powered off
+ *
+ * Calls the .power_off() ovl_adaptor component callback if it is present.
+ */
+static inline void power_off(struct device *dev, int num)
 {
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
-	struct device *comp;
-	int ret;
 	int i;
 
-	for (i = 0; i < OVL_ADAPTOR_MERGE0; i++) {
-		comp = ovl_adaptor->ovl_adaptor_comp[i];
-		ret = pm_runtime_get_sync(comp);
+	if (num > OVL_ADAPTOR_ID_MAX)
+		num = OVL_ADAPTOR_ID_MAX;
+
+	for (i = num - 1; i >= 0; i--) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->power_off)
+			continue;
+
+		comp_matches[i].funcs->power_off(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
+}
+
+/**
+ * mtk_ovl_adaptor_power_on - Power on the devices in OVL adaptor
+ * @dev: Device to be powered on
+ *
+ * Different from OVL, OVL adaptor is a pseudo device so
+ * we didn't define it in the device tree, pm_runtime_resume_and_get()
+ * called by .atomic_enable() power on no device in OVL adaptor,
+ * we have to implement a function to do the job instead.
+ *
+ * Return: Zero for success or negative number for failure.
+ */
+int mtk_ovl_adaptor_power_on(struct device *dev)
+{
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+	int i, ret;
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->power_on)
+			continue;
+
+		ret = comp_matches[i].funcs->power_on(ovl_adaptor->ovl_adaptor_comp[i]);
 		if (ret < 0) {
 			dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret);
-			goto error;
+			power_off(dev, i);
+			return ret;
 		}
 	}
+	return 0;
+}
+
+void mtk_ovl_adaptor_power_off(struct device *dev)
+{
+	power_off(dev, OVL_ADAPTOR_ID_MAX);
+}
+
+int mtk_ovl_adaptor_clk_enable(struct device *dev)
+{
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+	struct device *comp;
+	int ret;
+	int i;
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
@@ -226,16 +279,10 @@ int mtk_ovl_adaptor_clk_enable(struct device *dev)
 			dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret);
 			while (--i >= 0)
 				comp_matches[i].funcs->clk_disable(comp);
-			i = OVL_ADAPTOR_MERGE0;
-			goto error;
+			return ret;
 		}
 	}
 	return 0;
-error:
-	while (--i >= 0)
-		pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]);
-
-	return ret;
 }
 
 void mtk_ovl_adaptor_clk_disable(struct device *dev)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index c277b9fae9502..4d5ff39dc2ef3 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -721,7 +721,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
 
-	ret = pm_runtime_resume_and_get(comp->dev);
+	ret = mtk_ddp_comp_power_on(comp);
 	if (ret < 0) {
 		DRM_DEV_ERROR(comp->dev, "Failed to enable power domain: %d\n", ret);
 		return;
@@ -731,7 +731,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	ret = mtk_crtc_ddp_hw_init(mtk_crtc);
 	if (ret) {
-		pm_runtime_put(comp->dev);
+		mtk_ddp_comp_power_off(comp);
 		return;
 	}
 
@@ -744,7 +744,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
-	int i, ret;
+	int i;
 
 	DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
 	if (!mtk_crtc->enabled)
@@ -774,9 +774,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	drm_crtc_vblank_off(crtc);
 	mtk_crtc_ddp_hw_fini(mtk_crtc);
-	ret = pm_runtime_put(comp->dev);
-	if (ret < 0)
-		DRM_DEV_ERROR(comp->dev, "Failed to disable power domain: %d\n", ret);
+	mtk_ddp_comp_power_off(comp);
 
 	mtk_crtc->enabled = false;
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 3046c0409353b..a9b5a21cde2da 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -398,6 +398,8 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = {
 };
 
 static const struct mtk_ddp_comp_funcs ddp_ovl_adaptor = {
+	.power_on = mtk_ovl_adaptor_power_on,
+	.power_off = mtk_ovl_adaptor_power_off,
 	.clk_enable = mtk_ovl_adaptor_clk_enable,
 	.clk_disable = mtk_ovl_adaptor_clk_disable,
 	.config = mtk_ovl_adaptor_config,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 4bae55bdb034e..15b2eafff4384 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -7,6 +7,7 @@
 #define MTK_DRM_DDP_COMP_H
 
 #include <linux/io.h>
+#include <linux/pm_runtime.h>
 #include <linux/soc/mediatek/mtk-cmdq.h>
 #include <linux/soc/mediatek/mtk-mmsys.h>
 #include <linux/soc/mediatek/mtk-mutex.h>
@@ -46,6 +47,8 @@ enum mtk_ddp_comp_type {
 struct mtk_ddp_comp;
 struct cmdq_pkt;
 struct mtk_ddp_comp_funcs {
+	int (*power_on)(struct device *dev);
+	void (*power_off)(struct device *dev);
 	int (*clk_enable)(struct device *dev);
 	void (*clk_disable)(struct device *dev);
 	void (*config)(struct device *dev, unsigned int w,
@@ -92,6 +95,23 @@ struct mtk_ddp_comp {
 	const struct mtk_ddp_comp_funcs *funcs;
 };
 
+static inline int mtk_ddp_comp_power_on(struct mtk_ddp_comp *comp)
+{
+	if (comp->funcs && comp->funcs->power_on)
+		return comp->funcs->power_on(comp->dev);
+	else
+		return pm_runtime_resume_and_get(comp->dev);
+	return 0;
+}
+
+static inline void mtk_ddp_comp_power_off(struct mtk_ddp_comp *comp)
+{
+	if (comp->funcs && comp->funcs->power_off)
+		comp->funcs->power_off(comp->dev);
+	else
+		pm_runtime_put(comp->dev);
+}
+
 static inline int mtk_ddp_comp_clk_enable(struct mtk_ddp_comp *comp)
 {
 	if (comp->funcs && comp->funcs->clk_enable)
diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
index c3adaeefd551a..dc7c9e9919908 100644
--- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
@@ -242,6 +242,22 @@ size_t mtk_mdp_rdma_get_num_formats(struct device *dev)
 	return ARRAY_SIZE(formats);
 }
 
+int mtk_mdp_rdma_power_on(struct device *dev)
+{
+	int ret = pm_runtime_resume_and_get(dev);
+
+	if (ret < 0) {
+		dev_err(dev, "Failed to power on: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+void mtk_mdp_rdma_power_off(struct device *dev)
+{
+	pm_runtime_put(dev);
+}
+
 int mtk_mdp_rdma_clk_enable(struct device *dev)
 {
 	struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev);
-- 
GitLab


From c90ca391c1e4b7fca1648e5015a57e39080e7ab3 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:39 +0800
Subject: [PATCH 1165/2340] drm/mediatek: Start/Stop components with function
 pointers

By registering component related functions to the pointers,
we can easily manage them within a for-loop and simplify the
logic of component start/stop process.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-16-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 .../gpu/drm/mediatek/mtk_disp_ovl_adaptor.c   | 20 +++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index e00d8a395ca0a..65c5153bdcd80 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -72,6 +72,8 @@ static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
 static const struct mtk_ddp_comp_funcs ethdr = {
 	.clk_enable = mtk_ethdr_clk_enable,
 	.clk_disable = mtk_ethdr_clk_disable,
+	.start = mtk_ethdr_start,
+	.stop = mtk_ethdr_stop,
 };
 
 static const struct mtk_ddp_comp_funcs merge = {
@@ -191,16 +193,30 @@ void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
 
 void mtk_ovl_adaptor_start(struct device *dev)
 {
+	int i;
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
 
-	mtk_ethdr_start(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->start)
+			continue;
+
+		comp_matches[i].funcs->start(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
 }
 
 void mtk_ovl_adaptor_stop(struct device *dev)
 {
+	int i;
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
 
-	mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->stop)
+			continue;
+
+		comp_matches[i].funcs->stop(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
 }
 
 /**
-- 
GitLab


From 9c5a05fc8fca2d3be919f92816fc8d11ebdfd7be Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:40 +0800
Subject: [PATCH 1166/2340] drm/mediatek: Sort OVL adaptor components

Sort OVL adaptor components' names in alphabetical order.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-17-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 65c5153bdcd80..30f2475d1c0b4 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -27,13 +27,14 @@
 #define MTK_OVL_ADAPTOR_LAYER_NUM 4
 
 enum mtk_ovl_adaptor_comp_type {
-	OVL_ADAPTOR_TYPE_MDP_RDMA = 0,
-	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_ETHDR,
+	OVL_ADAPTOR_TYPE_MDP_RDMA,
+	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_NUM,
 };
 
 enum mtk_ovl_adaptor_comp_id {
+	OVL_ADAPTOR_ETHDR0,
 	OVL_ADAPTOR_MDP_RDMA0,
 	OVL_ADAPTOR_MDP_RDMA1,
 	OVL_ADAPTOR_MDP_RDMA2,
@@ -46,7 +47,6 @@ enum mtk_ovl_adaptor_comp_id {
 	OVL_ADAPTOR_MERGE1,
 	OVL_ADAPTOR_MERGE2,
 	OVL_ADAPTOR_MERGE3,
-	OVL_ADAPTOR_ETHDR0,
 	OVL_ADAPTOR_ID_MAX
 };
 
@@ -64,9 +64,9 @@ struct mtk_disp_ovl_adaptor {
 };
 
 static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
+	[OVL_ADAPTOR_TYPE_ETHDR]	= "ethdr",
 	[OVL_ADAPTOR_TYPE_MDP_RDMA]	= "vdo1-rdma",
 	[OVL_ADAPTOR_TYPE_MERGE]	= "merge",
-	[OVL_ADAPTOR_TYPE_ETHDR]	= "ethdr",
 };
 
 static const struct mtk_ddp_comp_funcs ethdr = {
@@ -89,6 +89,7 @@ static const struct mtk_ddp_comp_funcs rdma = {
 };
 
 static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
+	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, &ethdr },
 	[OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma },
 	[OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma },
 	[OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma },
@@ -101,7 +102,6 @@ static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
 	[OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge },
 	[OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge },
 	[OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge },
-	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, &ethdr },
 };
 
 void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx,
@@ -399,6 +399,7 @@ void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex)
 
 void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next)
 {
+	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2);
@@ -406,11 +407,11 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsig
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER);
-	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 }
 
 void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next)
 {
+	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2);
@@ -418,7 +419,6 @@ void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, un
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER);
-	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 }
 
 static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
-- 
GitLab


From 1168bb692bb9c45a31deda73cc3c87d368c7f490 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:41 +0800
Subject: [PATCH 1167/2340] drm/mediatek: Refine device table of OVL adaptor

- Adjust indentation to align with other files
- Sort device table in alphabetical order
- Add sentinel to device table

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-18-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 30f2475d1c0b4..92eeb1005ec36 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -437,17 +437,10 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
 }
 
 static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = {
-	{
-		.compatible = "mediatek,mt8195-vdo1-rdma",
-		.data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA,
-	}, {
-		.compatible = "mediatek,mt8195-disp-merge",
-		.data = (void *)OVL_ADAPTOR_TYPE_MERGE,
-	}, {
-		.compatible = "mediatek,mt8195-disp-ethdr",
-		.data = (void *)OVL_ADAPTOR_TYPE_ETHDR,
-	},
-	{},
+	{ .compatible = "mediatek,mt8195-disp-ethdr", .data = (void *)OVL_ADAPTOR_TYPE_ETHDR },
+	{ .compatible = "mediatek,mt8195-disp-merge", .data = (void *)OVL_ADAPTOR_TYPE_MERGE },
+	{ .compatible = "mediatek,mt8195-vdo1-rdma", .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA },
+	{ /* sentinel */ }
 };
 
 static int compare_of(struct device *dev, void *data)
-- 
GitLab


From ba527e9a11b33cc7315171807add5b2f594b23ac Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:42 +0800
Subject: [PATCH 1168/2340] drm/mediatek: Support MT8188 Padding in display
 driver

Padding is a new display module on MT8188, it provides ability
to add pixels to width and height of a layer with specified colors.

Due to hardware design, Mixer in VDOSYS1 requires width of a layer
to be 2-pixel-align, or 4-pixel-align when ETHDR is enabled,
we need Padding to deal with odd width.

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-19-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/Makefile       |   3 +-
 drivers/gpu/drm/mediatek/mtk_disp_drv.h |   4 +
 drivers/gpu/drm/mediatek/mtk_drm_drv.c  |   1 +
 drivers/gpu/drm/mediatek/mtk_drm_drv.h  |   2 +-
 drivers/gpu/drm/mediatek/mtk_padding.c  | 160 ++++++++++++++++++++++++
 5 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/mediatek/mtk_padding.c

diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile
index d4d193f60271c..5e4436403b8d2 100644
--- a/drivers/gpu/drm/mediatek/Makefile
+++ b/drivers/gpu/drm/mediatek/Makefile
@@ -16,7 +16,8 @@ mediatek-drm-y := mtk_disp_aal.o \
 		  mtk_dsi.o \
 		  mtk_dpi.o \
 		  mtk_ethdr.o \
-		  mtk_mdp_rdma.o
+		  mtk_mdp_rdma.o \
+		  mtk_padding.o
 
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o
 
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
index 2d426775b7ea4..74fa563393831 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
@@ -164,4 +164,8 @@ void mtk_mdp_rdma_config(struct device *dev, struct mtk_mdp_rdma_cfg *cfg,
 const u32 *mtk_mdp_rdma_get_formats(struct device *dev);
 size_t mtk_mdp_rdma_get_num_formats(struct device *dev);
 
+int mtk_padding_clk_enable(struct device *dev);
+void mtk_padding_clk_disable(struct device *dev);
+void mtk_padding_start(struct device *dev);
+void mtk_padding_stop(struct device *dev);
 #endif
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 48581da51857a..f7504d1edc627 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -996,6 +996,7 @@ static struct platform_driver * const mtk_drm_drivers[] = {
 	&mtk_dsi_driver,
 	&mtk_ethdr_driver,
 	&mtk_mdp_rdma_driver,
+	&mtk_padding_driver,
 };
 
 static int __init mtk_drm_init(void)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index 6f98fff4f1a4c..33fadb08dc1c7 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
@@ -77,5 +77,5 @@ extern struct platform_driver mtk_dpi_driver;
 extern struct platform_driver mtk_dsi_driver;
 extern struct platform_driver mtk_ethdr_driver;
 extern struct platform_driver mtk_mdp_rdma_driver;
-
+extern struct platform_driver mtk_padding_driver;
 #endif /* MTK_DRM_DRV_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_padding.c b/drivers/gpu/drm/mediatek/mtk_padding.c
new file mode 100644
index 0000000000000..0d6451c149b67
--- /dev/null
+++ b/drivers/gpu/drm/mediatek/mtk_padding.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
+
+#include "mtk_disp_drv.h"
+#include "mtk_drm_crtc.h"
+#include "mtk_drm_ddp_comp.h"
+
+#define PADDING_CONTROL_REG	0x00
+#define PADDING_BYPASS			BIT(0)
+#define PADDING_ENABLE			BIT(1)
+#define PADDING_PIC_SIZE_REG	0x04
+#define PADDING_H_REG		0x08 /* horizontal */
+#define PADDING_V_REG		0x0c /* vertical */
+#define PADDING_COLOR_REG	0x10
+
+/**
+ * struct mtk_padding - Basic information of the Padding
+ * @clk: Clock of the module
+ * @reg: Virtual address of the Padding for CPU to access
+ * @cmdq_reg: CMDQ setting of the Padding
+ *
+ * Every Padding should have different clock source, register base, and
+ * CMDQ settings, we stored these differences all together.
+ */
+struct mtk_padding {
+	struct clk		*clk;
+	void __iomem		*reg;
+	struct cmdq_client_reg	cmdq_reg;
+};
+
+int mtk_padding_clk_enable(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(padding->clk);
+}
+
+void mtk_padding_clk_disable(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(padding->clk);
+}
+
+void mtk_padding_start(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	writel(PADDING_ENABLE | PADDING_BYPASS,
+	       padding->reg + PADDING_CONTROL_REG);
+
+	/*
+	 * Notice that even the padding is in bypass mode,
+	 * all the settings must be cleared to 0 or
+	 * undefined behaviors could happen
+	 */
+	writel(0, padding->reg + PADDING_PIC_SIZE_REG);
+	writel(0, padding->reg + PADDING_H_REG);
+	writel(0, padding->reg + PADDING_V_REG);
+	writel(0, padding->reg + PADDING_COLOR_REG);
+}
+
+void mtk_padding_stop(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	writel(0, padding->reg + PADDING_CONTROL_REG);
+}
+
+static int mtk_padding_bind(struct device *dev, struct device *master, void *data)
+{
+	return 0;
+}
+
+static void mtk_padding_unbind(struct device *dev, struct device *master, void *data)
+{
+}
+
+static const struct component_ops mtk_padding_component_ops = {
+	.bind	= mtk_padding_bind,
+	.unbind = mtk_padding_unbind,
+};
+
+static int mtk_padding_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_padding *priv;
+	struct resource *res;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(dev, "failed to get clk\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	priv->reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(priv->reg)) {
+		dev_err(dev, "failed to do ioremap\n");
+		return PTR_ERR(priv->reg);
+	}
+
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	ret = cmdq_dev_get_client_reg(dev, &priv->cmdq_reg, 0);
+	if (ret) {
+		dev_err(dev, "failed to get gce client reg\n");
+		return ret;
+	}
+#endif
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = devm_pm_runtime_enable(dev);
+	if (ret)
+		return ret;
+
+	ret = component_add(dev, &mtk_padding_component_ops);
+	if (ret) {
+		pm_runtime_disable(dev);
+		return dev_err_probe(dev, ret, "failed to add component\n");
+	}
+
+	return 0;
+}
+
+static int mtk_padding_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &mtk_padding_component_ops);
+	return 0;
+}
+
+static const struct of_device_id mtk_padding_driver_dt_match[] = {
+	{ .compatible = "mediatek,mt8188-disp-padding" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_padding_driver_dt_match);
+
+struct platform_driver mtk_padding_driver = {
+	.probe		= mtk_padding_probe,
+	.remove		= mtk_padding_remove,
+	.driver		= {
+		.name	= "mediatek-disp-padding",
+		.owner	= THIS_MODULE,
+		.of_match_table = mtk_padding_driver_dt_match,
+	},
+};
-- 
GitLab


From 21b287146adf39304193e4c49198021e06a28ded Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:44 +0800
Subject: [PATCH 1169/2340] drm/mediatek: Return error if MDP RDMA failed to
 enable the clock

Return the result of clk_prepare_enable() instead of
always returns 0.

Fixes: f8946e2b6bb2 ("drm/mediatek: Add display MDP RDMA support for MT8195")

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-21-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_mdp_rdma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
index dc7c9e9919908..ee9ce9b6d0786 100644
--- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
@@ -262,8 +262,7 @@ int mtk_mdp_rdma_clk_enable(struct device *dev)
 {
 	struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev);
 
-	clk_prepare_enable(rdma->clk);
-	return 0;
+	return clk_prepare_enable(rdma->clk);
 }
 
 void mtk_mdp_rdma_clk_disable(struct device *dev)
-- 
GitLab


From 8ac6935e5689a491f0bec78fec732722b3dad094 Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:45 +0800
Subject: [PATCH 1170/2340] drm/mediatek: Remove the redundant driver data for
 DPI

DPI input is in 1T2P mode on both MT8195 and MT8188.
Remove the redundant driver data to align the settings, or
the screen will glitch.

Fixes: 2847cd7e6403 ("drm/mediatek: Add mt8188 dpi compatibles and platform data")

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-22-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_dpi.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 4e3d9f7b4d8c9..beb7d9d08e971 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -966,20 +966,6 @@ static const struct mtk_dpi_conf mt8186_conf = {
 	.csc_enable_bit = CSC_ENABLE,
 };
 
-static const struct mtk_dpi_conf mt8188_dpintf_conf = {
-	.cal_factor = mt8195_dpintf_calculate_factor,
-	.max_clock_khz = 600000,
-	.output_fmts = mt8195_output_fmts,
-	.num_output_fmts = ARRAY_SIZE(mt8195_output_fmts),
-	.pixels_per_iter = 4,
-	.input_2pixel = false,
-	.dimension_mask = DPINTF_HPW_MASK,
-	.hvsize_mask = DPINTF_HSIZE_MASK,
-	.channel_swap_shift = DPINTF_CH_SWAP,
-	.yuv422_en_bit = DPINTF_YUV422_EN,
-	.csc_enable_bit = DPINTF_CSC_ENABLE,
-};
-
 static const struct mtk_dpi_conf mt8192_conf = {
 	.cal_factor = mt8183_calculate_factor,
 	.reg_h_fre_con = 0xe0,
@@ -1103,7 +1089,7 @@ static const struct of_device_id mtk_dpi_of_ids[] = {
 	{ .compatible = "mediatek,mt8173-dpi", .data = &mt8173_conf },
 	{ .compatible = "mediatek,mt8183-dpi", .data = &mt8183_conf },
 	{ .compatible = "mediatek,mt8186-dpi", .data = &mt8186_conf },
-	{ .compatible = "mediatek,mt8188-dp-intf", .data = &mt8188_dpintf_conf },
+	{ .compatible = "mediatek,mt8188-dp-intf", .data = &mt8195_dpintf_conf },
 	{ .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf },
 	{ .compatible = "mediatek,mt8195-dp-intf", .data = &mt8195_dpintf_conf },
 	{ /* sentinel */ },
-- 
GitLab


From 73b5ab27ab2ee616f2709dc212c2b0007894a12e Mon Sep 17 00:00:00 2001
From: Hsiao Chien Sung <shawn.sung@mediatek.com>
Date: Thu, 14 Dec 2023 13:58:46 +0800
Subject: [PATCH 1171/2340] drm/mediatek: Fix underrun in VDO1 when switches
 off the layer

Do not reset Merge while using CMDQ because reset API doesn't
wait for frame done event as CMDQ does and could lead to
underrun when the layer is switching off.

Fixes: aaf94f7c3ae6 ("drm/mediatek: Add display merge async reset control")

Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-23-shawn.sung@mediatek.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_disp_merge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_merge.c b/drivers/gpu/drm/mediatek/mtk_disp_merge.c
index e525a6b9e5b0b..22f768d923d5a 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_merge.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_merge.c
@@ -103,7 +103,7 @@ void mtk_merge_stop_cmdq(struct device *dev, struct cmdq_pkt *cmdq_pkt)
 	mtk_ddp_write(cmdq_pkt, 0, &priv->cmdq_reg, priv->regs,
 		      DISP_REG_MERGE_CTRL);
 
-	if (priv->async_clk)
+	if (!cmdq_pkt && priv->async_clk)
 		reset_control_reset(priv->reset_ctl);
 }
 
-- 
GitLab


From 486c95af5d76047d5cb50727270b1961dacb9380 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:58:05 -0100
Subject: [PATCH 1172/2340] drm/amd/display: add plane 3D LUT support

Wire up DC 3D LUT to DM plane color management (pre-blending). On AMD
display HW, 3D LUT comes after a shaper curve and we always have to
program a shaper curve to delinearize or normalize the color space
before applying a 3D LUT (since we have a reduced number of LUT
entries).

In this version, the default values of 3D LUT for size and bit_depth are
17x17x17 and 12-bit, but we already provide here a more generic
mechanisms to program other supported values (9x9x9 size and 10-bit).

v2:
- started with plane 3D LUT instead of CRTC 3D LUT support

v4:
- lut3d_size is the max dimension size instead of # of entries

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 102 +++++++++++++++++-
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 73b12ca2fe607..131174c17cf04 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8270,6 +8270,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
 			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
 			bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func;
+			bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func;
 		}
 
 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 8239904f9d902..f55e0a69dd320 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -623,6 +623,86 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 	}
 }
 
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+				const struct drm_color_lut lut,
+				int bit_precision)
+{
+	rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+	rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+	rgb->blue  = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+				    uint32_t lut3d_size,
+				    struct tetrahedral_params *params,
+				    bool use_tetrahedral_9,
+				    int bit_depth)
+{
+	struct dc_rgb *lut0;
+	struct dc_rgb *lut1;
+	struct dc_rgb *lut2;
+	struct dc_rgb *lut3;
+	int lut_i, i;
+
+
+	if (use_tetrahedral_9) {
+		lut0 = params->tetrahedral_9.lut0;
+		lut1 = params->tetrahedral_9.lut1;
+		lut2 = params->tetrahedral_9.lut2;
+		lut3 = params->tetrahedral_9.lut3;
+	} else {
+		lut0 = params->tetrahedral_17.lut0;
+		lut1 = params->tetrahedral_17.lut1;
+		lut2 = params->tetrahedral_17.lut2;
+		lut3 = params->tetrahedral_17.lut3;
+	}
+
+	for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+		/*
+		 * We should consider the 3D LUT RGB values are distributed
+		 * along four arrays lut0-3 where the first sizes 1229 and the
+		 * other 1228. The bit depth supported for 3dlut channel is
+		 * 12-bit, but DC also supports 10-bit.
+		 *
+		 * TODO: improve color pipeline API to enable the userspace set
+		 * bit depth and 3D LUT size/stride, as specified by VA-API.
+		 */
+		__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+		__to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth);
+		__to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth);
+		__to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth);
+	}
+	/* lut0 has 1229 points (lut_size/4 + 1) */
+	__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d,
+				   uint32_t drm_lut3d_size,
+				   struct dc_3dlut *lut)
+{
+	if (!drm_lut3d_size) {
+		lut->state.bits.initialized = 0;
+	} else {
+		/* Stride and bit depth are not programmable by API yet.
+		 * Therefore, only supports 17x17x17 3D LUT (12-bit).
+		 */
+		lut->lut_3d.use_tetrahedral_9 = false;
+		lut->lut_3d.use_12bits = true;
+		lut->state.bits.initialized = 1;
+		__drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d,
+					lut->lut_3d.use_tetrahedral_9,
+					MAX_COLOR_3DLUT_BITDEPTH);
+	}
+}
+
 static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
 				       bool has_rom,
 				       enum dc_transfer_func_predefined tf,
@@ -665,8 +745,8 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
 				struct drm_plane_state *plane_state)
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-	const struct drm_color_lut *shaper = NULL;
-	uint32_t exp_size, size;
+	const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
+	uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE;
 	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut;
 
 	/* shaper LUT is only available if 3D LUT color caps */
@@ -680,6 +760,17 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
+	/* The number of 3D LUT entries is the dimension size cubed */
+	exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0;
+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size);
+
+	if (lut3d && size != exp_size) {
+		drm_dbg(&adev->ddev,
+			"Invalid 3D LUT size. Should be %u but got %u.\n",
+			exp_size, size);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -976,8 +1067,8 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
 	enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-	const struct drm_color_lut *shaper_lut;
-	uint32_t shaper_size;
+	const struct drm_color_lut *shaper_lut, *lut3d;
+	uint32_t shaper_size, lut3d_size;
 	int ret;
 
 	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
@@ -985,7 +1076,10 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
 	shaper_size = shaper_lut != NULL ? shaper_size : 0;
 	shaper_tf = dm_plane_state->shaper_tf;
+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size);
+	lut3d_size = lut3d != NULL ? lut3d_size : 0;
 
+	amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func);
 	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
 					  amdgpu_tf_to_dc_tf(shaper_tf),
 					  shaper_size,
-- 
GitLab


From 8d26795ae61a5f64ba7db4f3240dc9ab2138d361 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:06 -0100
Subject: [PATCH 1173/2340] drm/amd/display: handle empty LUTs in
 __set_input_tf

Unlike degamma, blend gamma doesn't support hardcoded curve
(predefined/ROM), but we can use AMD color module to fill blend gamma
parameters when we have non-linear plane gamma TF without plane gamma
LUT. The regular degamma path doesn't hit this.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index f55e0a69dd320..7383995384322 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -578,17 +578,21 @@ static int __set_input_tf(struct dc_transfer_func *func,
 	struct dc_gamma *gamma = NULL;
 	bool res;
 
-	gamma = dc_create_gamma();
-	if (!gamma)
-		return -ENOMEM;
+	if (lut_size) {
+		gamma = dc_create_gamma();
+		if (!gamma)
+			return -ENOMEM;
 
-	gamma->type = GAMMA_CUSTOM;
-	gamma->num_entries = lut_size;
+		gamma->type = GAMMA_CUSTOM;
+		gamma->num_entries = lut_size;
 
-	__drm_lut_to_dc_gamma(lut, gamma, false);
+		__drm_lut_to_dc_gamma(lut, gamma, false);
+	}
 
-	res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
-	dc_gamma_release(&gamma);
+	res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != NULL);
+
+	if (gamma)
+		dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
 }
-- 
GitLab


From 783ed4460fe55b01ff32a7c6ad8239974874a16a Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:07 -0100
Subject: [PATCH 1174/2340] drm/amd/display: add plane blend LUT and TF support

Map plane blend properties to DPP blend gamma. Plane blend is a
post-3D LUT curve that linearizes color space for blending. It may be
defined by a user-blob LUT and/or predefined transfer function. As
hardcoded curve (ROM) is not supported on blend gamma, we use AMD color
module to fill parameters when setting non-linear TF with empty LUT.

v2:
- rename DRM TFs to AMDGPU TFs

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 56 +++++++++++++++++--
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 131174c17cf04..7fd85ec7f8114 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8271,6 +8271,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
 			bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func;
 			bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func;
+			bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf;
 		}
 
 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 7383995384322..27ddc65767197 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -733,6 +733,35 @@ static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
 	return ret;
 }
 
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+				       bool has_rom,
+				       enum dc_transfer_func_predefined tf,
+				       uint32_t blend_size,
+				       struct dc_transfer_func *func_blend)
+{
+	int ret = 0;
+
+	if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * DRM plane gamma LUT or TF means we are linearizing color
+		 * space before blending (similar to degamma programming). As
+		 * we don't have hardcoded curve support, or we use AMD color
+		 * module to fill the parameters that will be translated to HW
+		 * points.
+		 */
+		func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+		func_blend->tf = tf;
+		func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_input_tf(func_blend, blend_lut, blend_size);
+	} else {
+		func_blend->type = TF_TYPE_BYPASS;
+		func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
 /**
  * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
  * shaper and 3D LUTs match the hw supported size
@@ -1071,8 +1100,9 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
 	enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
-	const struct drm_color_lut *shaper_lut, *lut3d;
-	uint32_t shaper_size, lut3d_size;
+	enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+	uint32_t shaper_size, lut3d_size, blend_size;
 	int ret;
 
 	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
@@ -1088,12 +1118,30 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 					  amdgpu_tf_to_dc_tf(shaper_tf),
 					  shaper_size,
 					  dc_plane_state->in_shaper_func);
-	if (ret)
+	if (ret) {
 		drm_dbg_kms(plane_state->plane->dev,
 			    "setting plane %d shaper LUT failed.\n",
 			    plane_state->plane->index);
 
-	return ret;
+		return ret;
+	}
+
+	blend_tf = dm_plane_state->blend_tf;
+	blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size);
+	blend_size = blend_lut != NULL ? blend_size : 0;
+
+	ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+					 amdgpu_tf_to_dc_tf(blend_tf),
+					 blend_size, dc_plane_state->blend_tf);
+	if (ret) {
+		drm_dbg_kms(plane_state->plane->dev,
+			    "setting plane %d gamma lut failed.\n",
+			    plane_state->plane->index);
+
+		return ret;
+	}
+
+	return 0;
 }
 
 /**
-- 
GitLab


From f81996637000a050477d597ef99e832079f99bd2 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:08 -0100
Subject: [PATCH 1175/2340] drm/amd/display: allow newer DC hardware to use
 degamma ROM for PQ/HLG

Need to funnel the color caps through to these functions so it can check
that the hardware is capable.

v2:
- remove redundant color caps assignment on plane degamma map (Harry)
- pass color caps to degamma params

v3:
- remove unused color_caps parameter from set_color_properties (Harry)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 27ddc65767197..3a1ca13eaee4d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -565,6 +565,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
+ * @caps: dc color capabilities
  * @func: transfer function
  * @lut: lookup table that defines the color space
  * @lut_size: size of respective lut.
@@ -572,7 +573,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
  * Returns:
  * 0 in case of success. -ENOMEM if fails.
  */
-static int __set_input_tf(struct dc_transfer_func *func,
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func,
 			  const struct drm_color_lut *lut, uint32_t lut_size)
 {
 	struct dc_gamma *gamma = NULL;
@@ -589,7 +590,7 @@ static int __set_input_tf(struct dc_transfer_func *func,
 		__drm_lut_to_dc_gamma(lut, gamma, false);
 	}
 
-	res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != NULL);
+	res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
 
 	if (gamma)
 		dc_gamma_release(&gamma);
@@ -753,7 +754,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
 		func_blend->tf = tf;
 		func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
 
-		ret = __set_input_tf(func_blend, blend_lut, blend_size);
+		ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
 	} else {
 		func_blend->type = TF_TYPE_BYPASS;
 		func_blend->tf = TRANSFER_FUNCTION_LINEAR;
@@ -969,7 +970,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 
 static int
 map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
-			     struct dc_plane_state *dc_plane_state)
+			     struct dc_plane_state *dc_plane_state,
+			     struct dc_color_caps *caps)
 {
 	const struct drm_color_lut *degamma_lut;
 	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -1024,7 +1026,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
 			dc_plane_state->in_transfer_func->tf =
 				TRANSFER_FUNCTION_LINEAR;
 
-		r = __set_input_tf(dc_plane_state->in_transfer_func,
+		r = __set_input_tf(caps, dc_plane_state->in_transfer_func,
 				   degamma_lut, degamma_size);
 		if (r)
 			return r;
@@ -1037,7 +1039,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
 		dc_plane_state->in_transfer_func->tf = tf;
 
 		if (tf != TRANSFER_FUNCTION_SRGB &&
-		    !mod_color_calculate_degamma_params(NULL,
+		    !mod_color_calculate_degamma_params(caps,
 							dc_plane_state->in_transfer_func,
 							NULL, false))
 			return -ENOMEM;
@@ -1048,7 +1050,8 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
 
 static int
 __set_dm_plane_degamma(struct drm_plane_state *plane_state,
-		       struct dc_plane_state *dc_plane_state)
+		       struct dc_plane_state *dc_plane_state,
+		       struct dc_color_caps *color_caps)
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
 	const struct drm_color_lut *degamma_lut;
@@ -1079,7 +1082,7 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state,
 		dc_plane_state->in_transfer_func->type =
 			TF_TYPE_DISTRIBUTED_POINTS;
 
-		ret = __set_input_tf(dc_plane_state->in_transfer_func,
+		ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func,
 				     degamma_lut, degamma_size);
 		if (ret)
 			return ret;
@@ -1087,7 +1090,7 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state,
 		dc_plane_state->in_transfer_func->type =
 			TF_TYPE_PREDEFINED;
 
-		if (!mod_color_calculate_degamma_params(NULL,
+		if (!mod_color_calculate_degamma_params(color_caps,
 		    dc_plane_state->in_transfer_func, NULL, false))
 			return -ENOMEM;
 	}
@@ -1162,6 +1165,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 				      struct dc_plane_state *dc_plane_state)
 {
 	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+	struct dc_color_caps *color_caps = NULL;
 	bool has_crtc_cm_degamma;
 	int ret;
 
@@ -1171,6 +1175,9 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		return ret;
 	}
 
+	if (dc_plane_state->ctx && dc_plane_state->ctx->dc)
+		color_caps = &dc_plane_state->ctx->dc->caps.color;
+
 	/* Initially, we can just bypass the DGM block. */
 	dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
 	dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
@@ -1178,7 +1185,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	/* After, we start to update values according to color props */
 	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
 
-	ret = __set_dm_plane_degamma(plane_state, dc_plane_state);
+	ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps);
 	if (ret == -ENOMEM)
 		return ret;
 
@@ -1205,7 +1212,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		 * linearize (implicit degamma) from sRGB/BT709 according to
 		 * the input space.
 		 */
-		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state);
+		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps);
 		if (ret)
 			return ret;
 	}
-- 
GitLab


From 94aeb4117343d072e3a35b9595bcbfc0058ee724 Mon Sep 17 00:00:00 2001
From: "Wang, Beyond" <Wang.Beyond@amd.com>
Date: Tue, 12 Dec 2023 21:03:04 +0800
Subject: [PATCH 1176/2340] drm/amdgpu: fix ftrace event amdgpu_bo_move always
 move on same heap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue: during evict or validate happened on amdgpu_bo, the 'from' and
'to' is always same in ftrace event of amdgpu_bo_move

where calling the 'trace_amdgpu_bo_move', the comment says move_notify
is called before move happens, but actually it is called after move
happens, here the new_mem is same as bo->resource

Fix: move trace_amdgpu_bo_move from move_notify to amdgpu_bo_move

Signed-off-by: Wang, Beyond <Wang.Beyond@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    |  5 +++--
 3 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5ad03f2afdb45..425cebcc5cbff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1245,19 +1245,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
  * amdgpu_bo_move_notify - notification about a memory move
  * @bo: pointer to a buffer object
  * @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
  *
  * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
  * bookkeeping.
  * TTM driver callback which is called when ttm moves a buffer.
  */
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem)
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
-	struct ttm_resource *old_mem = bo->resource;
 
 	if (!amdgpu_bo_is_amdgpu_bo(bo))
 		return;
@@ -1274,13 +1270,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	/* remember the eviction */
 	if (evict)
 		atomic64_inc(&adev->num_evictions);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	/* move_notify is called before move happens */
-	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }
 
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d28e21baef16e..a3ea8a82db23a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -344,9 +344,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 			   size_t buffer_size, uint32_t *metadata_size,
 			   uint64_t *flags);
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem);
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
 void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
 vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ab4a762aed5bd..75c9fd2c6c2a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -545,10 +545,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			return r;
 	}
 
+	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &adev->num_bytes_moved);
-	amdgpu_bo_move_notify(bo, evict, new_mem);
+	amdgpu_bo_move_notify(bo, evict);
 	return 0;
 }
 
@@ -1553,7 +1554,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 static void
 amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	amdgpu_bo_move_notify(bo, false, NULL);
+	amdgpu_bo_move_notify(bo, false);
 }
 
 static struct ttm_device_funcs amdgpu_bo_driver = {
-- 
GitLab


From a2f2f43f74cd050146cd2660bbc3c7e1e7c0da0b Mon Sep 17 00:00:00 2001
From: Peyton Lee <peytolee@amd.com>
Date: Tue, 12 Dec 2023 10:12:33 +0800
Subject: [PATCH 1177/2340] drm/amd/pm: support return vpe clock table

pm supports return vpe clock table and soc clock table

Signed-off-by: Peyton Lee <peytolee@amd.com>
Reviewed-by: Li Ma <li.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dm_pp_smu.h    |  2 ++
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 10 ++++++++++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |  1 +
 .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c  | 20 +++++++++++++++++++
 4 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4440d08743aa1..bd7ba0a25198d 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -247,6 +247,7 @@ struct pp_smu_funcs_nv {
 #define PP_SMU_NUM_MEMCLK_DPM_LEVELS  4
 #define PP_SMU_NUM_DCLK_DPM_LEVELS    8
 #define PP_SMU_NUM_VCLK_DPM_LEVELS    8
+#define PP_SMU_NUM_VPECLK_DPM_LEVELS  8
 
 struct dpm_clock {
   uint32_t  Freq;    // In MHz
@@ -262,6 +263,7 @@ struct dpm_clocks {
 	struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS];
 	struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS];
 	struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS];
+	struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS];
 };
 
 
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 97b40c5fa1ff6..6627ee07d52d9 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -616,6 +616,16 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
 			  enable ? "enable" : "disable", ret);
 }
 
+void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable)
+{
+	int ret = 0;
+
+	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable);
+	if (ret)
+		DRM_ERROR("Dpm %s vpe failed, ret = %d.\n",
+			  enable ? "enable" : "disable", ret);
+}
+
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index d76b0a60db443..3047ffe7f2444 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -445,6 +445,7 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev);
 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable);
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 94ccdbfd70909..47fdbae4adfc0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -1085,6 +1085,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
 					       0, NULL);
 }
 
+static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+	DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+	uint8_t idx;
+
+	/* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+	for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+		clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+		clock_table->SocClocks[idx].Vol = 0;
+	}
+
+	for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+		clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+		clock_table->VPEClocks[idx].Vol = 0;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.check_fw_status = smu_v14_0_check_fw_status,
 	.check_fw_version = smu_v14_0_check_fw_version,
@@ -1115,6 +1134,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
 	.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
 	.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
+	.get_dpm_clock_table = smu_14_0_0_get_dpm_table,
 };
 
 static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
-- 
GitLab


From cb19dc4a64598ffbfd4354083f809fae082fa4c3 Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:09 -0100
Subject: [PATCH 1178/2340] drm/amd/display: copy 3D LUT settings from crtc
 state to stream_update

When commiting planes, we copy color mgmt resources to the stream state.
Do the same for shaper and 3D LUTs.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Co-developed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7fd85ec7f8114..25ec9129758bd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8483,6 +8483,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				&acrtc_state->stream->csc_color_matrix;
 			bundle->stream_update.out_transfer_func =
 				acrtc_state->stream->out_transfer_func;
+			bundle->stream_update.lut3d_func =
+				(struct dc_3dlut *) acrtc_state->stream->lut3d_func;
+			bundle->stream_update.func_shaper =
+				(struct dc_transfer_func *) acrtc_state->stream->func_shaper;
 		}
 
 		acrtc_state->stream->abm_level = acrtc_state->abm_level;
-- 
GitLab


From b8b92c1bd7788b1f13d547ee2ce8a93baf55b814 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:58:10 -0100
Subject: [PATCH 1179/2340] drm/amd/display: add plane CTM driver-specific
 property

Plane CTM for pre-blending color space conversion. Only enable
driver-specific plane CTM property on drivers that support both pre- and
post-blending gamut remap matrix, i.e., DCN3+ family. Otherwise it
conflits with DRM CRTC CTM property.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h      |  2 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  7 +++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  7 +++++++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 21 +++++++++++++++++++
 4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 790c08b8262d6..2e4911050cc5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -364,6 +364,8 @@ struct amdgpu_mode_info {
 	 * @plane_hdr_mult_property:
 	 */
 	struct drm_property *plane_hdr_mult_property;
+
+	struct drm_property *plane_ctm_property;
 	/**
 	 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
 	 * that converts color content before 3D LUT. If
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 7706d412d206f..2d5af83d40b53 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -784,6 +784,13 @@ struct dm_plane_state {
 	 * TF is needed for any subsequent linear-to-non-linear transforms.
 	 */
 	__u64 hdr_mult;
+	/**
+	 * @ctm:
+	 *
+	 * Color transformation matrix. The blob (if not NULL) is a &struct
+	 * drm_color_ctm_3x4.
+	 */
+	struct drm_property_blob *ctm;
 	/**
 	 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
 	 * array of &struct drm_color_lut.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 3a1ca13eaee4d..3eed47736b26b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -240,6 +240,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
 		return -ENOMEM;
 	adev->mode_info.plane_hdr_mult_property = prop;
 
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_CTM", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_ctm_property = prop;
+
 	prop = drm_property_create(adev_to_drm(adev),
 				   DRM_MODE_PROP_BLOB,
 				   "AMD_PLANE_SHAPER_LUT", 0);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 35a4732483ba7..f10c5154d06ad 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1366,6 +1366,9 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 	if (old_dm_plane_state->degamma_lut)
 		dm_plane_state->degamma_lut =
 			drm_property_blob_get(old_dm_plane_state->degamma_lut);
+	if (old_dm_plane_state->ctm)
+		dm_plane_state->ctm =
+			drm_property_blob_get(old_dm_plane_state->ctm);
 	if (old_dm_plane_state->shaper_lut)
 		dm_plane_state->shaper_lut =
 			drm_property_blob_get(old_dm_plane_state->shaper_lut);
@@ -1450,6 +1453,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 
 	if (dm_plane_state->degamma_lut)
 		drm_property_blob_put(dm_plane_state->degamma_lut);
+	if (dm_plane_state->ctm)
+		drm_property_blob_put(dm_plane_state->ctm);
 	if (dm_plane_state->lut3d)
 		drm_property_blob_put(dm_plane_state->lut3d);
 	if (dm_plane_state->shaper_lut)
@@ -1490,6 +1495,11 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
 				   dm->adev->mode_info.plane_hdr_mult_property,
 				   AMDGPU_HDR_MULT_DEFAULT);
 
+	/* Only enable plane CTM if both DPP and MPC gamut remap is available. */
+	if (dm->dc->caps.color.mpc.gamut_remap)
+		drm_object_attach_property(&plane->base,
+					   dm->adev->mode_info.plane_ctm_property, 0);
+
 	if (dpp_color_caps.hw_3d_lut) {
 		drm_object_attach_property(&plane->base,
 					   mode_info.plane_shaper_lut_property, 0);
@@ -1547,6 +1557,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 			dm_plane_state->hdr_mult = val;
 			dm_plane_state->base.color_mgmt_changed = 1;
 		}
+	} else if (property == adev->mode_info.plane_ctm_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->ctm,
+							val,
+							sizeof(struct drm_color_ctm), -1,
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
 	} else if (property == adev->mode_info.plane_shaper_lut_property) {
 		ret = drm_property_replace_blob_from_id(plane->dev,
 							&dm_plane_state->shaper_lut,
@@ -1608,6 +1626,9 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = dm_plane_state->degamma_tf;
 	} else if (property == adev->mode_info.plane_hdr_mult_property) {
 		*val = dm_plane_state->hdr_mult;
+	} else if (property == adev->mode_info.plane_ctm_property) {
+		*val = (dm_plane_state->ctm) ?
+			dm_plane_state->ctm->base.id : 0;
 	} else 	if (property == adev->mode_info.plane_shaper_lut_property) {
 		*val = (dm_plane_state->shaper_lut) ?
 			dm_plane_state->shaper_lut->base.id : 0;
-- 
GitLab


From 5f82a0c90ccaf0d1390b5c1b83a83d38bca526da Mon Sep 17 00:00:00 2001
From: Peyton Lee <peytolee@amd.com>
Date: Tue, 12 Dec 2023 11:11:24 +0800
Subject: [PATCH 1180/2340] drm/amdgpu/vpe: enable vpe dpm

enable vpe dpm

Signed-off-by: Peyton Lee <peytolee@amd.com>
Reviewed-by: Lang Yu <lang.yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 249 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h |  12 ++
 drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c   |  15 ++
 3 files changed, 276 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index e81579708e966..6f149b54d4d39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
 #include "soc15_common.h"
 #include "vpe_v6_1.h"
 
@@ -33,8 +34,186 @@
 /* VPE CSA resides in the 4th page of CSA */
 #define AMDGPU_CSA_VPE_OFFSET 	(4096 * 3)
 
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL			4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART	8
+#define GET_PRATIO_INTEGER_PART(x)		((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
 static void vpe_set_ring_funcs(struct amdgpu_device *adev);
 
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+	*remainder = dividend % divisor;
+	return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+	uint16_t dividend,
+	uint16_t divisor,
+	uint16_t *remainder)
+{
+	return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+	bool arg1_negative = numerator < 0;
+	bool arg2_negative = denominator < 0;
+
+	uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator);
+	uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator);
+
+	uint16_t remainder;
+
+	/* determine integer part */
+	uint16_t res_value = complete_integer_division_u16(
+		arg1_value, arg2_value, &remainder);
+
+	if (res_value > 127 /* CHAR_MAX */)
+		return 0;
+
+	/* determine fractional part */
+	{
+		unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+		do {
+			remainder <<= 1;
+
+			res_value <<= 1;
+
+			if (remainder >= arg2_value) {
+				res_value |= 1;
+				remainder -= arg2_value;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		uint16_t summand = (remainder << 1) >= arg2_value;
+
+		if ((res_value + summand) > 32767 /* SHRT_MAX */)
+			return 0;
+
+		res_value += summand;
+	}
+
+	if (arg1_negative ^ arg2_negative)
+		res_value = -res_value;
+
+	return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+	uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+	if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+		pratio = 0;
+
+	return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = vpe->ring.adev;
+	uint32_t dpm_ctl;
+
+	if (adev->pm.dpm_enabled) {
+		struct dpm_clocks clock_table = { 0 };
+		struct dpm_clock *VPEClks;
+		struct dpm_clock *SOCClks;
+		uint32_t idx;
+		uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+		uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+		dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+		dpm_ctl |= 1; /* DPM enablement */
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+		/* Get VPECLK and SOCCLK */
+		if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+			dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+			goto disable_dpm;
+		}
+
+		SOCClks = clock_table.SocClocks;
+		VPEClks = clock_table.VPEClocks;
+
+		/* vpe dpm only cares 4 levels. */
+		for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+			uint32_t soc_dpm_level;
+			uint32_t min_freq;
+
+			if (idx == 0)
+				soc_dpm_level = 0;
+			else
+				soc_dpm_level = (idx * 2) + 1;
+
+			/* clamp the max level */
+			if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1)
+				soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1;
+
+			min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+				   SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+			switch (idx) {
+			case 0:
+				pratio_vmin_freq = min_freq;
+				break;
+			case 1:
+				pratio_vmid_freq = min_freq;
+				break;
+			case 2:
+				pratio_vnorm_freq = min_freq;
+				break;
+			case 3:
+				pratio_vmax_freq = min_freq;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+			uint32_t pratio_ctl;
+
+			pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+			pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+			pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+			pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl);		/* PRatio */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000);	/* 1ms, unit=1/24MHz */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000);	/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+			dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+		} else {
+			dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+			goto disable_dpm;
+		}
+	}
+	return 0;
+
+disable_dpm:
+	dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+	dpm_ctl &= 0xfffffffe; /* Disable DPM */
+	WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+	dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+	return 0;
+}
+
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
 {
 	struct amdgpu_firmware_info ucode = {
@@ -134,6 +313,19 @@ static int vpe_early_init(void *handle)
 	return 0;
 }
 
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vpe.idle_work.work);
+	unsigned int fences = 0;
+
+	fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+
+	if (fences == 0)
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+	else
+		schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
 
 static int vpe_common_init(struct amdgpu_vpe *vpe)
 {
@@ -150,6 +342,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe)
 		return r;
 	}
 
+	vpe->context_started = false;
+	INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
 	return 0;
 }
 
@@ -219,6 +414,9 @@ static int vpe_hw_fini(void *handle)
 
 	vpe_ring_stop(vpe);
 
+	/* Power off VPE */
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
 	return 0;
 }
 
@@ -226,6 +424,8 @@ static int vpe_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
 	return vpe_hw_fini(adev);
 }
 
@@ -430,6 +630,21 @@ static int vpe_set_clockgating_state(void *handle,
 static int vpe_set_powergating_state(void *handle,
 				     enum amd_powergating_state state)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	if (!adev->pm.dpm_enabled)
+		dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+	dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+	if (state == AMD_PG_STATE_GATE) {
+		amdgpu_dpm_enable_vpe(adev, false);
+		vpe->context_started = false;
+	} else {
+		amdgpu_dpm_enable_vpe(adev, true);
+	}
+
 	return 0;
 }
 
@@ -595,6 +810,38 @@ err0:
 	return ret;
 }
 
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+	/* Power on VPE and notify VPE of new context  */
+	if (!vpe->context_started) {
+		uint32_t context_notify;
+
+		/* Power on VPE */
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+		/* Indicates that a job from a new context has been submitted. */
+		context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+		if ((context_notify & 0x1) == 0)
+			context_notify |= 0x1;
+		else
+			context_notify &= ~(0x1);
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+		vpe->context_started = true;
+	}
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
 static const struct amdgpu_ring_funcs vpe_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_VPE,
 	.align_mask = 0xf,
@@ -625,6 +872,8 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
 	.init_cond_exec = vpe_ring_init_cond_exec,
 	.patch_cond_exec = vpe_ring_patch_cond_exec,
 	.preempt_ib = vpe_ring_preempt_ib,
+	.begin_use = vpe_ring_begin_use,
+	.end_use = vpe_ring_end_use,
 };
 
 static void vpe_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
index 29d56f7ae4a9e..1153ddaea64d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -47,6 +47,15 @@ struct vpe_regs {
 	uint32_t queue0_rb_wptr_lo;
 	uint32_t queue0_rb_wptr_hi;
 	uint32_t queue0_preempt;
+
+	uint32_t dpm_enable;
+	uint32_t dpm_pratio;
+	uint32_t dpm_request_interval;
+	uint32_t dpm_decision_threshold;
+	uint32_t dpm_busy_clamp_threshold;
+	uint32_t dpm_idle_clamp_threshold;
+	uint32_t dpm_request_lv;
+	uint32_t context_indicator;
 };
 
 struct amdgpu_vpe {
@@ -63,12 +72,15 @@ struct amdgpu_vpe {
 	struct amdgpu_bo		*cmdbuf_obj;
 	uint64_t			cmdbuf_gpu_addr;
 	uint32_t			*cmdbuf_cpu_addr;
+	struct delayed_work		idle_work;
+	bool				context_started;
 };
 
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
 int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
 int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
 int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
 
 #define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
 #define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
index 174f13eff5754..d20060a51e052 100644
--- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -96,6 +96,10 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
 		adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
 
 		amdgpu_vpe_psp_update_sram(adev);
+
+		/* Config DPM */
+		amdgpu_vpe_configure_dpm(vpe);
+
 		return 0;
 	}
 
@@ -128,6 +132,8 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
 	}
 
 	vpe_v6_1_halt(vpe, false);
+	/* Config DPM */
+	amdgpu_vpe_configure_dpm(vpe);
 
 	return 0;
 }
@@ -264,6 +270,15 @@ static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe)
 	vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI;
 	vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT;
 
+	vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2;
+	vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4;
+	vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3;
+	vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4;
+	vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2;
+	vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3;
+	vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1;
+	vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3;
+
 	return 0;
 }
 
-- 
GitLab


From 34dc227bf2f34085313be39d76b12f08bfe8efc0 Mon Sep 17 00:00:00 2001
From: Kenneth Feng <kenneth.feng@amd.com>
Date: Wed, 13 Dec 2023 14:58:34 +0800
Subject: [PATCH 1181/2340] drm/amd/pm: add power save mode workload for smu
 13.0.10

add power save mode workload for smu 13.0.10, so that in compute mode,
pmfw will add margin since some applications requres higher margin.

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index a24aa886c6369..231122622a9c0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2545,16 +2545,19 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 
 	workload_mask = 1 << workload_type;
 
-	/* Add optimizations for SMU13.0.0.  Reuse the power saving profile */
-	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
-	    (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0)) &&
-	    ((smu->adev->pm.fw_version == 0x004e6601) ||
-	     (smu->adev->pm.fw_version >= 0x004e7400))) {
-		workload_type = smu_cmn_to_asic_specific_index(smu,
-							       CMN2ASIC_MAPPING_WORKLOAD,
-							       PP_SMC_POWER_PROFILE_POWERSAVING);
-		if (workload_type >= 0)
-			workload_mask |= 1 << workload_type;
+	/* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
+	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) {
+		if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+			((smu->adev->pm.fw_version == 0x004e6601) ||
+			(smu->adev->pm.fw_version >= 0x004e7300))) ||
+			(amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+			 smu->adev->pm.fw_version >= 0x00504500)) {
+			workload_type = smu_cmn_to_asic_specific_index(smu,
+														   CMN2ASIC_MAPPING_WORKLOAD,
+														   PP_SMC_POWER_PROFILE_POWERSAVING);
+			if (workload_type >= 0)
+				workload_mask |= 1 << workload_type;
+		}
 	}
 
 	return smu_cmn_send_smc_msg_with_param(smu,
-- 
GitLab


From 3dad69090743c5f4642aeb628b8542a1e335dded Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 16 Nov 2023 18:58:11 -0100
Subject: [PATCH 1182/2340] drm/amd/display: add plane CTM support

Map the plane CTM driver-specific property to DC plane, instead of DC
stream. The remaining steps to program DPP block are already implemented
on DC shared-code.

v3:
- fix comment about plane and CRTC CTMs priorities (Harry)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 25ec9129758bd..d7ac020bd8af8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9983,6 +9983,7 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 		if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
 		    dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
 		    dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
+		    dm_old_other_state->ctm != dm_new_other_state->ctm ||
 		    dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
 		    dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
 		    dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 3eed47736b26b..d52c3333ea139 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1172,6 +1172,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 				      struct dc_plane_state *dc_plane_state)
 {
 	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	struct drm_color_ctm *ctm = NULL;
 	struct dc_color_caps *color_caps = NULL;
 	bool has_crtc_cm_degamma;
 	int ret;
@@ -1224,5 +1226,29 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 			return ret;
 	}
 
+	/* Setup CRTC CTM. */
+	if (dm_plane_state->ctm) {
+		ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data;
+		/*
+		 * DCN2 and older don't support both pre-blending and
+		 * post-blending gamut remap. For this HW family, if we have
+		 * the plane and CRTC CTMs simultaneously, CRTC CTM takes
+		 * priority, and we discard plane CTM, as implemented in
+		 * dcn10_program_gamut_remap(). However, DCN3+ has DPP
+		 * (pre-blending) and MPC (post-blending) `gamut remap` blocks;
+		 * therefore, we can program plane and CRTC CTMs together by
+		 * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
+		 * as it's done by dcn30_program_gamut_remap().
+		 */
+		__drm_ctm_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+
+		dc_plane_state->gamut_remap_matrix.enable_remap = true;
+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+	} else {
+		/* Bypass CTM. */
+		dc_plane_state->gamut_remap_matrix.enable_remap = false;
+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+	}
+
 	return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state);
 }
-- 
GitLab


From 6872a189be508b9383bc081d462a5d99cbb8319d Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Thu, 16 Nov 2023 18:58:12 -0100
Subject: [PATCH 1183/2340] drm/amd/display: Add 3x4 CTM support for plane CTM

Create drm_color_ctm_3x4 to support 3x4-dimension plane CTM matrix and
convert DRM CTM to DC CSC float matrix.

v3:
- rename ctm2 to ctm_3x4 (Harry)

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 28 +++++++++++++++++--
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |  2 +-
 include/uapi/drm/drm_mode.h                   |  8 ++++++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index d52c3333ea139..96aecc1a71a33 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -434,6 +434,28 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 	}
 }
 
+/**
+ * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix with 3x4 dimensions
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
+				       struct fixed31_32 *matrix)
+{
+	int i;
+
+	/* The format provided is S31.32, using signed-magnitude representation.
+	 * Our fixed31_32 is also S31.32, but is using 2's complement. We have
+	 * to convert from signed-magnitude to 2's complement.
+	 */
+	for (i = 0; i < 12; i++) {
+		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+		matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]);
+	}
+}
+
 /**
  * __set_legacy_tf - Calculates the legacy transfer function
  * @func: transfer function
@@ -1173,7 +1195,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 {
 	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
-	struct drm_color_ctm *ctm = NULL;
+	struct drm_color_ctm_3x4 *ctm = NULL;
 	struct dc_color_caps *color_caps = NULL;
 	bool has_crtc_cm_degamma;
 	int ret;
@@ -1228,7 +1250,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 
 	/* Setup CRTC CTM. */
 	if (dm_plane_state->ctm) {
-		ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data;
+		ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data;
 		/*
 		 * DCN2 and older don't support both pre-blending and
 		 * post-blending gamut remap. For this HW family, if we have
@@ -1240,7 +1262,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		 * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
 		 * as it's done by dcn30_program_gamut_remap().
 		 */
-		__drm_ctm_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+		__drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
 
 		dc_plane_state->gamut_remap_matrix.enable_remap = true;
 		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index f10c5154d06ad..8a4c40b4c27e4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1561,7 +1561,7 @@ dm_atomic_plane_set_property(struct drm_plane *plane,
 		ret = drm_property_replace_blob_from_id(plane->dev,
 							&dm_plane_state->ctm,
 							val,
-							sizeof(struct drm_color_ctm), -1,
+							sizeof(struct drm_color_ctm_3x4), -1,
 							&replaced);
 		dm_plane_state->base.color_mgmt_changed |= replaced;
 		return ret;
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 95630f1701102..39d9ac0c0a809 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -846,6 +846,14 @@ struct drm_color_ctm {
 	__u64 matrix[9];
 };
 
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+
 struct drm_color_lut {
 	/*
 	 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
-- 
GitLab


From afe58346d5d3887b3e49ff623d2f2e471f232a8d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 27 Nov 2023 17:26:29 -0500
Subject: [PATCH 1184/2340] drm/amdgpu/debugfs: fix error code when smc
 register accessors are NULL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Should be -EOPNOTSUPP.

Fixes: 5104fdf50d32 ("drm/amdgpu: Fix a null pointer access when the smc_rreg pointer is NULL")
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 424bed7382965..2cebf2145d9a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -755,7 +755,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	int r;
 
 	if (!adev->smc_rreg)
-		return -EPERM;
+		return -EOPNOTSUPP;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
@@ -814,7 +814,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	int r;
 
 	if (!adev->smc_wreg)
-		return -EPERM;
+		return -EOPNOTSUPP;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
-- 
GitLab


From 804c49ef30735d70c1df0c58ebec313149a3933c Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Thu, 14 Dec 2023 09:01:54 +0800
Subject: [PATCH 1185/2340] drm/amd/pm: Remove unneeded semicolon

./drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c:1418:2-3: Unneeded semicolon

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7743
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index d409857fd622e..c16703868e5ca 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1415,7 +1415,7 @@ static int smu_wbrf_event_handler(struct notifier_block *nb,
 		break;
 	default:
 		return NOTIFY_DONE;
-	};
+	}
 
 	return NOTIFY_OK;
 }
-- 
GitLab


From ac16667237a82e2597e329eb9bc520d1cf9dff30 Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Thu, 14 Dec 2023 23:24:11 +0800
Subject: [PATCH 1186/2340] drm/amd/pm: fix a double-free in si_dpm_init

When the allocation of
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries fails,
amdgpu_free_extended_power_table is called to free some fields of adev.
However, when the control flow returns to si_dpm_sw_init, it goes to
label dpm_failed and calls si_dpm_fini, which calls
amdgpu_free_extended_power_table again and free those fields again. Thus
a double-free is triggered.

Fixes: 841686df9f7d ("drm/amdgpu: add SI DPM support (v4)")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index fc8e4ac6c8e76..df4f20293c16a 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -7379,10 +7379,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
 		kcalloc(4,
 			sizeof(struct amdgpu_clock_voltage_dependency_entry),
 			GFP_KERNEL);
-	if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
-		amdgpu_free_extended_power_table(adev);
+	if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries)
 		return -ENOMEM;
-	}
+
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4;
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0;
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;
-- 
GitLab


From 28dd788382c43b330480f57cd34cde0840896743 Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Fri, 15 Dec 2023 00:24:58 +0800
Subject: [PATCH 1187/2340] drivers/amd/pm: fix a use-after-free in
 kv_parse_power_table

When ps allocated by kzalloc equals to NULL, kv_parse_power_table
frees adev->pm.dpm.ps that allocated before. However, after the control
flow goes through the following call chains:

kv_parse_power_table
  |-> kv_dpm_init
        |-> kv_dpm_sw_init
	      |-> kv_dpm_fini

The adev->pm.dpm.ps is used in the for loop of kv_dpm_fini after its
first free in kv_parse_power_table and causes a use-after-free bug.

Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 5d28c951a3197..5cb4725c773f6 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -2735,10 +2735,8 @@ static int kv_parse_power_table(struct amdgpu_device *adev)
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
 		ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL);
-		if (ps == NULL) {
-			kfree(adev->pm.dpm.ps);
+		if (ps == NULL)
 			return -ENOMEM;
-		}
 		adev->pm.dpm.ps[i].ps_priv = ps;
 		k = 0;
 		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
-- 
GitLab


From c2709b2d6a537ca0fa0f1da36fdaf07e48ef447d Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Fri, 15 Dec 2023 00:58:42 +0800
Subject: [PATCH 1188/2340] gpu/drm/radeon: fix two memleaks in radeon_vm_init

When radeon_bo_create and radeon_vm_clear_bo fail, the vm->page_tables
allocated before need to be freed. However, neither radeon_vm_init
itself nor its caller have done such deallocation.

Fixes: 6d2f2944e95e ("drm/radeon: use normal BOs for the page tables v4")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_vm.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 987cabbf1318e..c38b4d5d6a14f 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -1204,13 +1204,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	r = radeon_bo_create(rdev, pd_size, align, true,
 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
 			     NULL, &vm->page_directory);
-	if (r)
+	if (r) {
+		kfree(vm->page_tables);
+		vm->page_tables = NULL;
 		return r;
-
+	}
 	r = radeon_vm_clear_bo(rdev, vm->page_directory);
 	if (r) {
 		radeon_bo_unref(&vm->page_directory);
 		vm->page_directory = NULL;
+		kfree(vm->page_tables);
+		vm->page_tables = NULL;
 		return r;
 	}
 
-- 
GitLab


From a6582701178a47c4d0cb2188c965c59c0c0647c8 Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Fri, 15 Dec 2023 00:59:38 +0800
Subject: [PATCH 1189/2340] drm/amd/pm: fix a double-free in
 amdgpu_parse_extended_power_table

The amdgpu_free_extended_power_table is called in every error-handling
paths of amdgpu_parse_extended_power_table. However, after the following
call chain of returning:

amdgpu_parse_extended_power_table
  |-> kv_dpm_init / si_dpm_init
      (the only two caller of amdgpu_parse_extended_power_table)
        |-> kv_dpm_sw_init / si_dpm_sw_init
            (the only caller of kv_dpm_init / si_dpm_init, accordingly)
              |-> kv_dpm_fini / si_dpm_fini
                  (goto dpm_failed in xx_dpm_sw_init)
                    |-> amdgpu_free_extended_power_table

As above, the amdgpu_free_extended_power_table is called twice in this
returning chain and thus a double-free is triggered. Similarily, the
last kfree in amdgpu_parse_extended_power_table also cause a double free
with amdgpu_free_extended_power_table in kv_dpm_fini.

Fixes: 84176663e70d ("drm/amd/pm: create a new holder for those APIs used only by legacy ASICs(si/kv)")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 52 +++++--------------
 1 file changed, 13 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 81fb4e5dd804b..60377747bab4f 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -272,10 +272,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddcDependencyOnSCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usVddciDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -283,10 +281,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddciDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usVddcDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -294,10 +290,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddcDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usMvddDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -305,10 +299,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usMaxClockVoltageOnDCOffset) {
 			ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v =
@@ -339,10 +331,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				kcalloc(psl->ucNumEntries,
 					sizeof(struct amdgpu_phase_shedding_limits_entry),
 					GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries)
 				return -ENOMEM;
-			}
 
 			entry = &psl->entries[0];
 			for (i = 0; i < psl->ucNumEntries; i++) {
@@ -383,10 +373,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ATOM_PPLIB_CAC_Leakage_Record *entry;
 			u32 size = cac_table->ucNumEntries * sizeof(struct amdgpu_cac_leakage_table);
 			adev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries)
 				return -ENOMEM;
-			}
 			entry = &cac_table->entries[0];
 			for (i = 0; i < cac_table->ucNumEntries; i++) {
 				if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) {
@@ -438,10 +426,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_vce_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -493,10 +479,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_uvd_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -525,10 +509,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -548,10 +530,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(ext_hdr->usPPMTableOffset));
 			adev->pm.dpm.dyn_state.ppm_table =
 				kzalloc(sizeof(struct amdgpu_ppm_table), GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.ppm_table) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.ppm_table)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign;
 			adev->pm.dpm.dyn_state.ppm_table->cpu_core_number =
 				le16_to_cpu(ppm->usCpuCoreNumber);
@@ -583,10 +563,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -606,10 +584,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ATOM_PowerTune_Table *pt;
 			adev->pm.dpm.dyn_state.cac_tdp_table =
 				kzalloc(sizeof(struct amdgpu_cac_tdp_table), GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.cac_tdp_table) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.cac_tdp_table)
 				return -ENOMEM;
-			}
 			if (rev > 0) {
 				ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *)
 					(mode_info->atom_context->bios + data_offset +
@@ -645,10 +621,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ret = amdgpu_parse_clk_voltage_dep_table(
 					&adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk,
 					dep_table);
-			if (ret) {
-				kfree(adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk.entries);
+			if (ret)
 				return ret;
-			}
 		}
 	}
 
-- 
GitLab


From 8b881b5d6fe9ebb7736097f37103c9b07ea45642 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 14 Dec 2023 10:41:41 -0500
Subject: [PATCH 1190/2340] drm/amd/display: fix documentation for
 amdgpu_dm_verify_lut3d_size()

It takes the plane state rather than the crtc state.

Fixes: aba8b76baabd ("drm/amd/display: add plane shaper LUT support")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Melissa Wen <mwen@igalia.com>
Cc: Harry.Wentland@amd.com
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 96aecc1a71a33..c6ed0d854b01d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -796,7 +796,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
  * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
  * shaper and 3D LUTs match the hw supported size
  * @adev: amdgpu device
- * @crtc_state: the DRM CRTC state
+ * @plane_state: the DRM plane state
  *
  * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
  * newer) and if the user shaper and 3D LUTs match the supported size.
-- 
GitLab


From c6ef0a2265c518aa6699b64d10a7e5a9049ac96a Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Thu, 14 Dec 2023 18:45:16 -0100
Subject: [PATCH 1191/2340] drm/amd/display: fix documentation for
 dm_crtc_additional_color_mgmt()

warning: expecting prototype for drm_crtc_additional_color_mgmt().
Prototype was for dm_crtc_additional_color_mgmt() instead

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312141801.o9eBCxt9-lkp@intel.com/
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 9224e2d7f32b9..7545a184e43a6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -299,7 +299,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
 
 #ifdef AMD_PRIVATE_COLOR
 /**
- * drm_crtc_additional_color_mgmt - enable additional color properties
+ * dm_crtc_additional_color_mgmt - enable additional color properties
  * @crtc: DRM CRTC
  *
  * This function lets the driver enable post-blending CRTC regamma transfer
-- 
GitLab


From 97bb5e691189d342fc617dc0f1ab3e51a3676602 Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Wed, 13 Dec 2023 12:16:12 +0530
Subject: [PATCH 1192/2340] drm/i915: Add Wa_14019877138

Enable Force Dispatch Ends Collection for DG2.

BSpec: 46001

Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213064612.480032-1-haridhar.kalvala@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 3 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 9de41703fae58..50962cfd1353a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -469,6 +469,9 @@
 #define XEHP_PSS_MODE2				MCR_REG(0x703c)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
+#define XEHP_PSS_CHICKEN			MCR_REG(0x7044)
+#define   FD_END_COLLECT			REG_BIT(5)
+
 #define GEN7_SC_INSTDONE			_MMIO(0x7100)
 #define GEN12_SC_INSTDONE_EXTRA			_MMIO(0x7104)
 #define GEN12_SC_INSTDONE_EXTRA2		_MMIO(0x7108)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4cbf9e5126459..3eacbc50caf8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -777,6 +777,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 
 	/* Wa_18019271663:dg2 */
 	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+	/* Wa_14019877138:dg2 */
+	wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
 }
 
 static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
-- 
GitLab


From 937d02cc79c6828fef28a4d80d8d0ad2f7bf2b62 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 14 Dec 2023 00:05:26 +0200
Subject: [PATCH 1193/2340] drm/i915/mtl: Fix HDMI/DP PLL clock selection

Select the HDMI specific PLL clock only for HDMI outputs.

Fixes: 62618c7f117e ("drm/i915/mtl: C20 PLL programming")
Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213220526.1828827-1-imre.deak@intel.com
---
 drivers/gpu/drm/i915/display/intel_cx0_phy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 4e6ea71ff6294..884a1da360893 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -2468,7 +2468,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder,
 
 	val |= XELPDP_FORWARD_CLOCK_UNGATE;
 
-	if (is_hdmi_frl(crtc_state->port_clock))
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) &&
+	    is_hdmi_frl(crtc_state->port_clock))
 		val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_DIV18CLK);
 	else
 		val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_MAXPCLK);
-- 
GitLab


From e6174e8e19e8fd26016c941c7271868326cd861a Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:39 +0800
Subject: [PATCH 1194/2340] drm/i915: Use kmap_local_page() in
 gem/i915_gem_object.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the call from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults and preemption disables.

There're 2 reasons why i915_gem_object_read_from_page_kmap() doesn't
need to disable pagefaults and preemption for mapping:

1. The flush operation is safe. In drm/i915/gem/i915_gem_object.c,
i915_gem_object_read_from_page_kmap() calls drm_clflush_virt_range() to
use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86
and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush
operation is global.

2. Any context switch caused by preemption or page faults (page fault
may cause sleep) doesn't affect the validity of local mapping.

Therefore, i915_gem_object_read_from_page_kmap() is a function where
the use of kmap_local_page() in place of kmap_atomic() is correctly
suited.

Convert the calls of kmap_atomic() / kunmap_atomic() to
kmap_local_page() / kunmap_local().

And remove the redundant variable that stores the address of the mapped
page since kunmap_local() can accept any pointer within the page.

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-2-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 25eeeb863209e..58e6c680fe0df 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -500,17 +500,15 @@ static void
 i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
 {
 	pgoff_t idx = offset >> PAGE_SHIFT;
-	void *src_map;
 	void *src_ptr;
 
-	src_map = kmap_atomic(i915_gem_object_get_page(obj, idx));
-
-	src_ptr = src_map + offset_in_page(offset);
+	src_ptr = kmap_local_page(i915_gem_object_get_page(obj, idx))
+	          + offset_in_page(offset);
 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 		drm_clflush_virt_range(src_ptr, size);
 	memcpy(dst, src_ptr, size);
 
-	kunmap_atomic(src_map);
+	kunmap_local(src_ptr);
 }
 
 static void
-- 
GitLab


From f4d88908cd9a430a7473eea6ff2300a3b728e11c Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:40 +0800
Subject: [PATCH 1195/2340] drm/i915: Use memcpy_[from/to]_page() in
 gem/i915_gem_pyhs.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1],  and this patch converts the call from
kmap_atomic() + memcpy() to memcpy_[from/to]_page(), which use
kmap_local_page() to build local mapping and then do memcpy().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults and preemption disables.

In drm/i915/gem/i915_gem_phys.c, the functions
i915_gem_object_get_pages_phys() and i915_gem_object_put_pages_phys()
don't need to disable pagefaults and preemption for mapping because of
2 reasons:

1. The flush operation is safe. In drm/i915/gem/i915_gem_object.c,
i915_gem_object_get_pages_phys() and i915_gem_object_put_pages_phys()
calls drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush.
Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in
drm_clflush_virt_range(), the flush operation is global.

2. Any context switch caused by preemption or page faults (page fault
may cause sleep) doesn't affect the validity of local mapping.

Therefore, i915_gem_object_get_pages_phys() and
i915_gem_object_put_pages_phys() are two functions where the uses of
local mappings in place of atomic mappings are correctly suited.

Convert the calls of kmap_atomic() / kunmap_atomic() + memcpy() to
memcpy_from_page() and memcpy_to_page().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-3-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_phys.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 5df128e2f4dc2..ef85c6dc9fd59 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -65,16 +65,13 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	dst = vaddr;
 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 		struct page *page;
-		void *src;
 
 		page = shmem_read_mapping_page(mapping, i);
 		if (IS_ERR(page))
 			goto err_st;
 
-		src = kmap_atomic(page);
-		memcpy(dst, src, PAGE_SIZE);
+		memcpy_from_page(dst, page, 0, PAGE_SIZE);
 		drm_clflush_virt_range(dst, PAGE_SIZE);
-		kunmap_atomic(src);
 
 		put_page(page);
 		dst += PAGE_SIZE;
@@ -113,16 +110,13 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 			struct page *page;
-			char *dst;
 
 			page = shmem_read_mapping_page(mapping, i);
 			if (IS_ERR(page))
 				continue;
 
-			dst = kmap_atomic(page);
 			drm_clflush_virt_range(src, PAGE_SIZE);
-			memcpy(dst, src, PAGE_SIZE);
-			kunmap_atomic(dst);
+			memcpy_to_page(page, 0, src, PAGE_SIZE);
 
 			set_page_dirty(page);
 			if (obj->mm.madv == I915_MADV_WILLNEED)
-- 
GitLab


From 756eed0f2602f73df8d6c5bc8418ecd11cce9803 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:41 +0800
Subject: [PATCH 1196/2340] drm/i915: Use kmap_local_page() in
 gem/i915_gem_shmem.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1].

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults or preemption disables.

In drm/i915/gem/i915_gem_shmem.c, the function shmem_pwrite() need to
disable pagefault to eliminate the potential recursion fault[2]. But
here __copy_from_user_inatomic() doesn't need to disable preemption and
local mapping is valid for sched in/out.

So it can use kmap_local_page() / kunmap_local() with
pagefault_disable() / pagefault_enable() to replace atomic mapping.

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com
[2]: https://patchwork.freedesktop.org/patch/295840/

Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-4-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 73a4a4eb29e08..38b72d86560f0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -485,11 +485,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
 		if (err < 0)
 			return err;
 
-		vaddr = kmap_atomic(page);
+		vaddr = kmap_local_page(page);
+		pagefault_disable();
 		unwritten = __copy_from_user_inatomic(vaddr + pg,
 						      user_data,
 						      len);
-		kunmap_atomic(vaddr);
+		pagefault_enable();
+		kunmap_local(vaddr);
 
 		err = aops->write_end(obj->base.filp, mapping, offset, len,
 				      len - unwritten, page, data);
-- 
GitLab


From 1fcb967595a5156da2f081a5ade319c60fc5af72 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:42 +0800
Subject: [PATCH 1197/2340] drm/i915: Use kmap_local_page() in
 gem/selftests/huge_pages.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the call from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults or preemption disables.

In drm/i915/gem/selftests/huge_pages.c, function __cpu_check_shmem()
mainly uses mapping to flush cache and check the value. There're
2 reasons why __cpu_check_shmem() doesn't need to disable pagefaults
and preemption for mapping:

1. The flush operation is safe. Function __cpu_check_shmem() calls
drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since
CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in
drm_clflush_virt_range(), the flush operation is global.

2. Any context switch caused by preemption or page faults (page fault
may cause sleep) doesn't affect the validity of local mapping.

Therefore, __cpu_check_shmem() is a function where the use of
kmap_local_page() in place of kmap_atomic() is correctly suited.

Convert the calls of kmap_atomic() / kunmap_atomic() to
kmap_local_page() / kunmap_local().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-5-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 6b9f6cf50bf6b..c9e6d77abab07 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1082,7 +1082,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		goto err_unlock;
 
 	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
-		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
+		u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n));
 
 		if (needs_flush & CLFLUSH_BEFORE)
 			drm_clflush_virt_range(ptr, PAGE_SIZE);
@@ -1090,12 +1090,12 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		if (ptr[dword] != val) {
 			pr_err("n=%lu ptr[%u]=%u, val=%u\n",
 			       n, dword, ptr[dword], val);
-			kunmap_atomic(ptr);
+			kunmap_local(ptr);
 			err = -EINVAL;
 			break;
 		}
 
-		kunmap_atomic(ptr);
+		kunmap_local(ptr);
 	}
 
 	i915_gem_object_finish_access(obj);
-- 
GitLab


From 40b399000665ee154927a8e0d7b0c7e7505bbaef Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:43 +0800
Subject: [PATCH 1198/2340] drm/i915: Use kmap_local_page() in
 gem/selftests/i915_gem_coherency.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the call from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration)..

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults or preemption disables.

In drm/i915/gem/selftests/i915_gem_coherency.c, functions cpu_set()
and cpu_get() mainly uses mapping to flush cache and assign the value.
There're 2 reasons why cpu_set() and cpu_get() don't need to disable
pagefaults and preemption for mapping:

1. The flush operation is safe. cpu_set() and cpu_get() call
drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since
CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in
drm_clflush_virt_range(), the flush operation is global.

2. Any context switch caused by preemption or page faults (page fault
may cause sleep) doesn't affect the validity of local mapping.

Therefore, cpu_set() and cpu_get() are functions where the use of
kmap_local_page() in place of kmap_atomic() is correctly suited.

Convert the calls of kmap_atomic() / kunmap_atomic() to
kmap_local_page() / kunmap_local().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-6-zhao1.liu@linux.intel.com
---
 .../gpu/drm/i915/gem/selftests/i915_gem_coherency.c  | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3fd68a099a85e..2a0c0634d446e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -24,7 +24,6 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
-	void *map;
 	u32 *cpu;
 	int err;
 
@@ -34,8 +33,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
-	map = kmap_atomic(page);
-	cpu = map + offset_in_page(offset);
+	cpu = kmap_local_page(page) + offset_in_page(offset);
 
 	if (needs_clflush & CLFLUSH_BEFORE)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
@@ -45,7 +43,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 	if (needs_clflush & CLFLUSH_AFTER)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
-	kunmap_atomic(map);
+	kunmap_local(cpu);
 	i915_gem_object_finish_access(ctx->obj);
 
 out:
@@ -57,7 +55,6 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
-	void *map;
 	u32 *cpu;
 	int err;
 
@@ -67,15 +64,14 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
-	map = kmap_atomic(page);
-	cpu = map + offset_in_page(offset);
+	cpu = kmap_local_page(page) + offset_in_page(offset);
 
 	if (needs_clflush & CLFLUSH_BEFORE)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	*v = *cpu;
 
-	kunmap_atomic(map);
+	kunmap_local(cpu);
 	i915_gem_object_finish_access(ctx->obj);
 
 out:
-- 
GitLab


From b1c51b0e2e7cb98f643a801c50f8ad76ebc36450 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:44 +0800
Subject: [PATCH 1199/2340] drm/i915: Use kmap_local_page() in
 gem/selftests/i915_gem_context.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the call from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption.

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults or preemption disables.

In drm/i915/gem/selftests/i915_gem_context.c, functions cpu_fill() and
cpu_check() mainly uses mapping to flush cache and check/assign the
value.

There're 2 reasons why cpu_fill() and cpu_check() don't need to disable
pagefaults and preemption for mapping:

1. The flush operation is safe. cpu_fill() and cpu_check() call
drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since
CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in
drm_clflush_virt_range(), the flush operation is global.

2. Any context switch caused by preemption or page faults (page fault
may cause sleep) doesn't affect the validity of local mapping.

Therefore, cpu_fill() and cpu_check() are functions where the use of
kmap_local_page() in place of kmap_atomic() is correctly suited.

Convert the calls of kmap_atomic() / kunmap_atomic() to
kmap_local_page() / kunmap_local().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-7-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7021b6e9b219e..89d4dc8b60c6a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -489,12 +489,12 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map;
 
-		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		map = kmap_local_page(i915_gem_object_get_page(obj, n));
 		for (m = 0; m < DW_PER_PAGE; m++)
 			map[m] = value;
 		if (!has_llc)
 			drm_clflush_virt_range(map, PAGE_SIZE);
-		kunmap_atomic(map);
+		kunmap_local(map);
 	}
 
 	i915_gem_object_finish_access(obj);
@@ -520,7 +520,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map, m;
 
-		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		map = kmap_local_page(i915_gem_object_get_page(obj, n));
 		if (needs_flush & CLFLUSH_BEFORE)
 			drm_clflush_virt_range(map, PAGE_SIZE);
 
@@ -546,7 +546,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 		}
 
 out_unmap:
-		kunmap_atomic(map);
+		kunmap_local(map);
 		if (err)
 			break;
 	}
-- 
GitLab


From 55a6e46180cb8b36fb1076501b569bfd42df1644 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:45 +0800
Subject: [PATCH 1200/2340] drm/i915: Use memcpy_from_page() in
 gt/uc/intel_uc_fw.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the call from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption  (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults or preemption disables.

In drm/i915/gt/uc/intel_us_fw.c, the function intel_uc_fw_copy_rsa()
just use the mapping to do memory copy so it doesn't need to disable
pagefaults and preemption for mapping. Thus the local mapping without
atomic context (not disable pagefaults / preemption) is enough.

Therefore, intel_uc_fw_copy_rsa() is a function where the use of
memcpy_from_page() with kmap_local_page() in place of memcpy() with
kmap_atomic() is correctly suited.

Convert the calls of memcpy() with kmap_atomic() / kunmap_atomic() to
memcpy_from_page() which uses local mapping to copy.

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/T/#u

Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-8-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 362639162ed60..756093eaf2ad1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
 
 		for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
 			u32 len = min_t(u32, size, PAGE_SIZE - offset);
-			void *vaddr;
 
 			if (idx > 0) {
 				idx--;
 				continue;
 			}
 
-			vaddr = kmap_atomic(page);
-			memcpy(dst, vaddr + offset, len);
-			kunmap_atomic(vaddr);
+			memcpy_from_page(dst, page, offset, len);
 
 			offset = 0;
 			dst += len;
-- 
GitLab


From e4865c60dd6e312e58c85247e48899af7e19041a Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:46 +0800
Subject: [PATCH 1201/2340] drm/i915: Use kmap_local_page() in
 i915_cmd_parser.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the call from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults and preemption disables.

There're 2 reasons why function copy_batch() doesn't need to disable
pagefaults and preemption for mapping:

1. The flush operation is safe. In i915_cmd_parser.c, copy_batch() calls
drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush.
Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu
in drm_clflush_virt_range(), the flush operation is global.

2. Any context switch caused by preemption or page faults (page fault
may cause sleep) doesn't affect the validity of local mapping.

Therefore, copy_batch() is a function where the use of
kmap_local_page() in place of kmap_atomic() is correctly suited.

Convert the calls of kmap_atomic() / kunmap_atomic() to
kmap_local_page() / kunmap_local().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-9-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index ddf49c2dbb917..2905df83e180e 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1211,11 +1211,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		for (n = offset >> PAGE_SHIFT; remain; n++) {
 			int len = min(remain, PAGE_SIZE - x);
 
-			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+			src = kmap_local_page(i915_gem_object_get_page(src_obj, n));
 			if (src_needs_clflush)
 				drm_clflush_virt_range(src + x, len);
 			memcpy(ptr, src + x, len);
-			kunmap_atomic(src);
+			kunmap_local(src);
 
 			ptr += len;
 			remain -= len;
-- 
GitLab


From 31accc37eaee98a90b25809ed58c6ee4956ab642 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Sun, 3 Dec 2023 21:29:47 +0800
Subject: [PATCH 1202/2340] drm/i915: Use kmap_local_page() in
 gem/i915_gem_execbuffer.c

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1], and this patch converts the calls from
kmap_atomic() to kmap_local_page().

The main difference between atomic and local mappings is that local
mappings doesn't disable page faults or preemption (the preemption is
disabled for !PREEMPT_RT case, otherwise it only disables migration).

With kmap_local_page(), we can avoid the often unwanted side effect of
unnecessary page faults and preemption disables.

In i915_gem_execbuffer.c, eb->reloc_cache.vaddr is mapped by
kmap_atomic() in eb_relocate_entry(), and is unmapped by
kunmap_atomic() in reloc_cache_reset().

And this mapping/unmapping occurs in two places: one is in
eb_relocate_vma(), and another is in eb_relocate_vma_slow().

The function eb_relocate_vma() or eb_relocate_vma_slow() doesn't
need to disable pagefaults and preemption during the above mapping/
unmapping.

So it can simply use kmap_local_page() / kunmap_local() that can
instead do the mapping / unmapping regardless of the context.

Convert the calls of kmap_atomic() / kunmap_atomic() to
kmap_local_page() / kunmap_local().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com

Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-10-zhao1.liu@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b1aa62dfb155d..12cb2ef8dbd62 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1158,7 +1158,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache)
 
 	vaddr = unmask_page(cache->vaddr);
 	if (cache->vaddr & KMAP)
-		kunmap_atomic(vaddr);
+		kunmap_local(vaddr);
 	else
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
 }
@@ -1174,7 +1174,7 @@ static void reloc_cache_remap(struct reloc_cache *cache,
 	if (cache->vaddr & KMAP) {
 		struct page *page = i915_gem_object_get_page(obj, cache->page);
 
-		vaddr = kmap_atomic(page);
+		vaddr = kmap_local_page(page);
 		cache->vaddr = unmask_flags(cache->vaddr) |
 			(unsigned long)vaddr;
 	} else {
@@ -1204,7 +1204,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer
 		if (cache->vaddr & CLFLUSH_AFTER)
 			mb();
 
-		kunmap_atomic(vaddr);
+		kunmap_local(vaddr);
 		i915_gem_object_finish_access(obj);
 	} else {
 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
@@ -1236,7 +1236,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	struct page *page;
 
 	if (cache->vaddr) {
-		kunmap_atomic(unmask_page(cache->vaddr));
+		kunmap_local(unmask_page(cache->vaddr));
 	} else {
 		unsigned int flushes;
 		int err;
@@ -1258,7 +1258,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	if (!obj->mm.dirty)
 		set_page_dirty(page);
 
-	vaddr = kmap_atomic(page);
+	vaddr = kmap_local_page(page);
 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 	cache->page = pageno;
 
-- 
GitLab


From b39610c773431ac7991cf6235e26d693ccabd9e9 Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Fri, 8 Dec 2023 16:08:25 +0000
Subject: [PATCH 1203/2340] drm/imagination: Fixed infinite loop in
 pvr_vm_mips_map()

Unwinding loop in error path for this function uses unsigned limit
variable, causing the promotion of the signed counter variable.

--> 204         for (; pfn >= start_pfn; pfn--)
                       ^^^^^^^^^^^^^^^^
If start_pfn can be zero then this is an endless loop.  I've seen this
code in other places as well.  This loop is slightly off as well.  It
should decrement pfn on the first iteration.

Fix by making the loop limit variables signed. Also fix missing
predecrement by modifying to while loop.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208160825.92933-1-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_vm_mips.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c
index 2bc7181a4c3eb..b7fef3c797e6c 100644
--- a/drivers/gpu/drm/imagination/pvr_vm_mips.c
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c
@@ -152,8 +152,8 @@ pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
 	u64 end;
 	u32 cache_policy;
 	u32 pte_flags;
-	u32 start_pfn;
-	u32 end_pfn;
+	s32 start_pfn;
+	s32 end_pfn;
 	s32 pfn;
 	int err;
 
@@ -201,7 +201,7 @@ pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj)
 	return 0;
 
 err_unmap_pages:
-	for (; pfn >= start_pfn; pfn--)
+	while (--pfn >= start_pfn)
 		WRITE_ONCE(mips_data->pt[pfn], 0);
 
 	pvr_mmu_flush_request_all(pvr_dev);
-- 
GitLab


From f1f55ed3ffe4212f5c96106bf6396c461a2bf223 Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Fri, 8 Dec 2023 16:30:19 +0000
Subject: [PATCH 1204/2340] drm/imagination: Fixed oops when misusing ioctl
 CREATE_HWRT_DATASET

While writing the matching IGT suite I discovered that it's possible to
cause a kernel oops when using DRM_IOCTL_PVR_CREATE_HWRT_DATASET when
the call to hwrt_init_common_fw_structure() fails.

Use an unwind-type error path to avoid cleaning up the object using the
the release function before it is fully resolved.

Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231208163019.95913-1-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_hwrt.c | 27 +++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.c b/drivers/gpu/drm/imagination/pvr_hwrt.c
index c4213c18489e6..54f88d6c01e56 100644
--- a/drivers/gpu/drm/imagination/pvr_hwrt.c
+++ b/drivers/gpu/drm/imagination/pvr_hwrt.c
@@ -458,7 +458,7 @@ pvr_hwrt_dataset_create(struct pvr_file *pvr_file,
 			struct drm_pvr_ioctl_create_hwrt_dataset_args *args)
 {
 	struct pvr_hwrt_dataset *hwrt;
-	int err;
+	int err, i = 0;
 
 	/* Create and fill out the kernel structure */
 	hwrt = kzalloc(sizeof(*hwrt), GFP_KERNEL);
@@ -466,35 +466,36 @@ pvr_hwrt_dataset_create(struct pvr_file *pvr_file,
 	if (!hwrt)
 		return ERR_PTR(-ENOMEM);
 
-	kref_init(&hwrt->ref_count);
-
 	err = hwrt_init_kernel_structure(pvr_file, args, hwrt);
 	if (err < 0)
 		goto err_free;
 
 	err = hwrt_init_common_fw_structure(pvr_file, args, hwrt);
 	if (err < 0)
-		goto err_free;
+		goto err_fini_kernel_structure;
 
-	for (int i = 0; i < ARRAY_SIZE(hwrt->data); i++) {
+	for (; i < ARRAY_SIZE(hwrt->data); i++) {
 		err = hwrt_data_init_fw_structure(pvr_file, hwrt, args,
 						  &args->rt_data_args[i],
 						  &hwrt->data[i]);
-		if (err < 0) {
-			i--;
-			/* Destroy already created structures. */
-			for (; i >= 0; i--)
-				hwrt_data_fini_fw_structure(hwrt, i);
-			goto err_free;
-		}
+		if (err < 0)
+			goto err_fini_data_structures;
 
 		hwrt->data[i].hwrt_dataset = hwrt;
 	}
 
+	kref_init(&hwrt->ref_count);
 	return hwrt;
 
+err_fini_data_structures:
+	while (--i >= 0)
+		hwrt_data_fini_fw_structure(hwrt, i);
+
+err_fini_kernel_structure:
+	hwrt_fini_kernel_structure(hwrt);
+
 err_free:
-	pvr_hwrt_dataset_put(hwrt);
+	kfree(hwrt);
 
 	return ERR_PTR(err);
 }
-- 
GitLab


From f175498378bdae2ebcf61170a2a866cb96e8a69a Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Wed, 13 Dec 2023 14:44:30 +0000
Subject: [PATCH 1205/2340] drm/imagination: Fix ERR_PTR test on pointer to
 pointer.

drivers/gpu/drm/imagination/pvr_vm.c:631 pvr_vm_create_context()
  error: 'vm_ctx->mmu_ctx' dereferencing possible ERR_PTR()

612         vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
613         err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
                                      ^
The address is never an error pointer so this will always return 0.
Remove the ampersand.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213144431.94956-1-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 82690cee978c4..598d79d7c7d08 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -568,7 +568,7 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
 		       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
 
 	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
-	err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
+	err = PTR_ERR_OR_ZERO(vm_ctx->mmu_ctx);
 	if (err) {
 		vm_ctx->mmu_ctx = NULL;
 		goto err_put_ctx;
-- 
GitLab


From 8a53e29fe05c56f643eaab285f224c09b9c3dd4c Mon Sep 17 00:00:00 2001
From: Donald Robson <donald.robson@imgtec.com>
Date: Wed, 13 Dec 2023 14:44:31 +0000
Subject: [PATCH 1206/2340] drm/imagination: Fix error path in
 pvr_vm_create_context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is possible to double free the vm_ctx->mmu_ctx object in this
function.

    630 err_page_table_destroy:
--> 631         pvr_mmu_context_destroy(vm_ctx->mmu_ctx);

The pvr_vm_context_put() function does:

        kref_put(&vm_ctx->ref_count, pvr_vm_context_release);

Here the pvr_vm_context_release() will call:

        pvr_mmu_context_destroy(vm_ctx->mmu_ctx);

Refactor to an unwind style.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213144431.94956-2-donald.robson@imgtec.com
---
 drivers/gpu/drm/imagination/pvr_vm.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
index 598d79d7c7d08..e59517ba039ef 100644
--- a/drivers/gpu/drm/imagination/pvr_vm.c
+++ b/drivers/gpu/drm/imagination/pvr_vm.c
@@ -556,23 +556,12 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
 	if (!vm_ctx)
 		return ERR_PTR(-ENOMEM);
 
-	drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0);
-
 	vm_ctx->pvr_dev = pvr_dev;
-	kref_init(&vm_ctx->ref_count);
-	mutex_init(&vm_ctx->lock);
-
-	drm_gpuvm_init(&vm_ctx->gpuvm_mgr,
-		       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
-		       0, &pvr_dev->base, &vm_ctx->dummy_gem,
-		       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
 
 	vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
 	err = PTR_ERR_OR_ZERO(vm_ctx->mmu_ctx);
-	if (err) {
-		vm_ctx->mmu_ctx = NULL;
-		goto err_put_ctx;
-	}
+	if (err)
+		goto err_free;
 
 	if (is_userspace_context) {
 		err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_fwmemcontext),
@@ -583,13 +572,22 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
 			goto err_page_table_destroy;
 	}
 
+	drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0);
+	drm_gpuvm_init(&vm_ctx->gpuvm_mgr,
+		       is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
+		       0, &pvr_dev->base, &vm_ctx->dummy_gem,
+		       0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
+
+	mutex_init(&vm_ctx->lock);
+	kref_init(&vm_ctx->ref_count);
+
 	return vm_ctx;
 
 err_page_table_destroy:
 	pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
 
-err_put_ctx:
-	pvr_vm_context_put(vm_ctx);
+err_free:
+	kfree(vm_ctx);
 
 	return ERR_PTR(err);
 }
-- 
GitLab


From 24149412dfc71f7f4a54868702e9145e396263d3 Mon Sep 17 00:00:00 2001
From: Jonathan Kim <jonathan.kim@amd.com>
Date: Wed, 13 Dec 2023 22:08:03 -0500
Subject: [PATCH 1207/2340] drm/amdkfd: only flush mes process context if mes
 support is there

Fix up on mes process context flush to prevent non-mes devices from
spamming error messages or running into undefined behaviour during
process termination.

Fixes: bd33bb1409b4 ("drm/amdkfd: fix mes set shader debugger process management")
Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Eric Huang <jinhuieric.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 8e55e78fce4e8..43eff221eae58 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -87,7 +87,8 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 		return;
 
 	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
-	amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
+	if (dev->kfd->shared_resources.enable_mes)
+		amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
 	pdd->already_dequeued = true;
 }
 
-- 
GitLab


From 65a618dd73216e111baab144a837f842dbb6a738 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Thu, 14 Dec 2023 09:42:03 -0500
Subject: [PATCH 1208/2340] drm/amdkfd: svm range always mapped flag not
 working on APU

On gfx943 APU there is no VRAM and page migration, queue CWSR area, svm
range with always mapped flag, is not mapped to GPU correctly. This
works fine if retry fault on CWSR area can be recovered, but could cause
deadlock if there is another retry fault recover waiting for CWSR to
finish.

Fix this by mapping svm range with always mapped flag to GPU with ACCESS
attribute if XNACK ON.

There is side effect, because all GPUs have ACCESS attribute by default
on new svm range with XNACK on, the CWSR area will be mapped to all GPUs
after this change. This side effect will be fixed with Thunk change to
set CWSR svm range with ACCESS_IN_PLACE attribute on the GPU that user
queue is created.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 617d20f0380f3..fb0011f802771 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1607,18 +1607,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			if (test_bit(gpuidx, prange->bitmap_access))
 				bitmap_set(ctx->bitmap, gpuidx, 1);
 		}
+
+		/*
+		 * If prange is already mapped or with always mapped flag,
+		 * update mapping on GPUs with ACCESS attribute
+		 */
+		if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+			if (prange->mapped_to_gpu ||
+			    prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
+				bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+		}
 	} else {
 		bitmap_or(ctx->bitmap, prange->bitmap_access,
 			  prange->bitmap_aip, MAX_GPU_INSTANCE);
 	}
 
 	if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
-		bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
-		if (!prange->mapped_to_gpu ||
-		    bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
-			r = 0;
-			goto free_ctx;
-		}
+		r = 0;
+		goto free_ctx;
 	}
 
 	if (prange->actual_loc && !prange->ttm_res) {
-- 
GitLab


From 78b4dfd35999e22b4f589a3e070c4aa5f07ce3a2 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Fri, 8 Dec 2023 17:41:25 -0500
Subject: [PATCH 1209/2340] drm/amdgpu: increase hmm range get pages timeout

When application tries to allocate all system memory and cause memory
to swap out. Needs more time for hmm_range_fault to validate the
remaining page for allocation. To be safe, increase timeout value to
1 second for 64MB range.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 081267161d401..b24eb5821fd19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
 			hmm_range->start, hmm_range->end);
 
-		/* Assuming 128MB takes maximum 1 second to fault page address */
-		timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL);
+		/* Assuming 64MB takes maximum 1 second to fault page address */
+		timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL);
 		timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
 		timeout = jiffies + msecs_to_jiffies(timeout);
 
-- 
GitLab


From 0c8c0e7a9eebc2de03d161de4376e0d9158b6817 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Fri, 8 Dec 2023 17:49:54 -0500
Subject: [PATCH 1210/2340] drm/amdgpu: make an improvement on
 amdgpu_hmm_range_get_pages

Only schedule when hmm_range_fault returns error.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index b24eb5821fd19..55b65fc04b651 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -199,6 +199,7 @@ retry:
 		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
 		r = hmm_range_fault(hmm_range);
 		if (unlikely(r)) {
+			schedule();
 			/*
 			 * FIXME: This timeout should encompass the retry from
 			 * mmu_interval_read_retry() as well.
@@ -212,7 +213,6 @@ retry:
 			break;
 		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
 		hmm_range->start = hmm_range->end;
-		schedule();
 	} while (hmm_range->end < end);
 
 	hmm_range->start = start;
-- 
GitLab


From 7046ca9c1ba64938f1b498026419d47b0993c69f Mon Sep 17 00:00:00 2001
From: Li Ma <li.ma@amd.com>
Date: Fri, 15 Dec 2023 11:37:20 +0800
Subject: [PATCH 1211/2340] drm/amd/swsmu: remove duplicate definition of smu
 v14_0_0 driver if version

There is a repeated define of smu v14_0_0 driver if version, so delete
one in driver if header.

Signed-off-by: Li Ma <li.ma@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h   | 5 -----
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h                 | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 8f42771e1f0a2..5bb7a63c0602b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -24,11 +24,6 @@
 #ifndef SMU14_DRIVER_IF_V14_0_0_H
 #define SMU14_DRIVER_IF_V14_0_0_H
 
-// *** IMPORTANT ***
-// SMU TEAM: Always increment the interface version if
-// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 7
-
 typedef struct {
   int32_t value;
   uint32_t numFractionalBits;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index a5b569976f192..3f7463c1c1a91 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -26,8 +26,8 @@
 #include "amdgpu_smu.h"
 
 #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
-#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x6
 
 #define FEATURE_MASK(feature) (1ULL << feature)
 
-- 
GitLab


From 0f657938e4345a77be871d906f3e0de3c58a7a49 Mon Sep 17 00:00:00 2001
From: Samson Tam <samson.tam@amd.com>
Date: Tue, 28 Nov 2023 16:53:12 -0500
Subject: [PATCH 1212/2340] drm/amd/display: do not send commands to DMUB if
 DMUB is inactive from S3

[Why]
On resume from S3, may get apply_idle_optimizations call while DMUB
is inactive which will just time out.

[How]
Set and track power state in dmub_srv and check power state before
sending commands to DMUB.  Add interface in both dmub_srv and
dc_dmub_srv

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Samson Tam <samson.tam@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  3 +++
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 14 +++++++++++++
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |  2 ++
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h   | 21 +++++++++++++++++++
 .../gpu/drm/amd/display/dmub/src/dmub_srv.c   | 15 +++++++++++++
 5 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d7ac020bd8af8..54861136dafd9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2687,6 +2687,7 @@ static int dm_suspend(void *handle)
 	hpd_rx_irq_work_suspend(dm);
 
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
 
 	return 0;
 }
@@ -2882,6 +2883,7 @@ static int dm_resume(void *handle)
 		if (r)
 			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
 
+		dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
 		dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 		dc_resume(dm->dc);
@@ -2932,6 +2934,7 @@ static int dm_resume(void *handle)
 	}
 
 	/* power on hardware */
+	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 	/* program HPD filter */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 53400cc05b5b6..1a4d615ccdec1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1268,3 +1268,17 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 		ASSERT(0);
 }
 
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState)
+{
+	struct dmub_srv *dmub;
+
+	if (!dc_dmub_srv)
+		return;
+
+	dmub = dc_dmub_srv->dmub;
+
+	if (powerState == DC_ACPI_CM_POWER_STATE_D0)
+		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0);
+	else
+		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index d4a60f53faab1..c25ce7546f718 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -102,4 +102,6 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con
 bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
 void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle);
 void dc_dmub_srv_exit_low_power_state(const struct dc *dc);
+
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState);
 #endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index df63aa8f01e98..d1a4ed6f5916e 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -150,6 +150,13 @@ enum dmub_memory_access_type {
 	DMUB_MEMORY_ACCESS_DMA
 };
 
+/* enum dmub_power_state type - to track DC power state in dmub_srv */
+enum dmub_srv_power_state_type {
+	DMUB_POWER_STATE_UNDEFINED = 0,
+	DMUB_POWER_STATE_D0 = 1,
+	DMUB_POWER_STATE_D3 = 8
+};
+
 /**
  * struct dmub_region - dmub hw memory region
  * @base: base address for region, must be 256 byte aligned
@@ -485,6 +492,8 @@ struct dmub_srv {
 	/* Feature capabilities reported by fw */
 	struct dmub_feature_caps feature_caps;
 	struct dmub_visual_confirm_color visual_confirm_color;
+
+	enum dmub_srv_power_state_type power_state;
 };
 
 /**
@@ -889,6 +898,18 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub);
  */
 void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
 
+/**
+ * dmub_srv_set_power_state() - Track DC power state in dmub_srv
+ * @dmub: The dmub service
+ * @power_state: DC power state setting
+ *
+ * Store DC power state in dmub_srv.  If dmub_srv is in D3, then don't send messages to DMUB
+ *
+ * Return:
+ *   void
+ */
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index a329a6ecf4dd6..53ac1c66dd86e 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -713,6 +713,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 		dmub->hw_funcs.reset_release(dmub);
 
 	dmub->hw_init = true;
+	dmub->power_state = DMUB_POWER_STATE_D0;
 
 	return DMUB_STATUS_OK;
 }
@@ -766,6 +767,9 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_INVALID;
+
 	if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
 	    dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
 		return DMUB_STATUS_HW_FAILURE;
@@ -784,6 +788,9 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_INVALID;
+
 	/**
 	 * Read back all the queued commands to ensure that they've
 	 * been flushed to framebuffer memory. Otherwise DMCUB might
@@ -1100,3 +1107,11 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_
 				subvp_index);
 	}
 }
+
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state)
+{
+	if (!dmub || !dmub->hw_init)
+		return;
+
+	dmub->power_state = dmub_srv_power_state;
+}
-- 
GitLab


From c57a0f50c060b7c58f974306fe103eabb881ccbc Mon Sep 17 00:00:00 2001
From: Muhammad Ahmed <ahmed.ahmed@amd.com>
Date: Fri, 1 Dec 2023 19:11:50 -0500
Subject: [PATCH 1213/2340] drm/amd/display: remove HPO PG in driver side

[why & how]
Add config to make HPO PG handled in dmubfw ips entry/exit

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Muhammad Ahmed <ahmed.ahmed@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 4eb744f1bc9fb..4e1db842b98cc 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -718,6 +718,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_dpp_power_gate = true,
 	.disable_hubp_power_gate = true,
 	.disable_optc_power_gate = true, /*should the same as above two*/
+	.disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
 	.disable_clock_gate = false,
 	.disable_dsc_power_gate = true,
 	.vsr_support = true,
-- 
GitLab


From 669080888691c312cc926322a7b24600121c90fb Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Mon, 4 Dec 2023 15:19:34 -0500
Subject: [PATCH 1214/2340] drm/amd/display: Revert " drm/amd/display: Use
 channel_width = 2 for vram table 3.0"

[Description]
Revert commit fec05adc40c2 ("drm/amd/display: Use channel_width = 2 for vram table 3.0")
Because the issue is being fixed from VBIOS side.

Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 875a064bb9a5c..fcd65a2057ad4 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -2386,13 +2386,7 @@ static enum bp_result get_vram_info_v30(
 		return BP_RESULT_BADBIOSTABLE;
 
 	info->num_chans = info_v30->channel_num;
-	/* As suggested by VBIOS we should always use
-	 * dram_channel_width_bytes = 2 when using VRAM
-	 * table version 3.0. This is because the channel_width
-	 * param in the VRAM info table is changed in 7000 series and
-	 * no longer represents the memory channel width.
-	 */
-	info->dram_channel_width_bytes = 2;
+	info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8;
 
 	return result;
 }
-- 
GitLab


From 648d7be8ecf47b0556e32550145c70db153b16fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Dec 2023 23:37:47 +0200
Subject: [PATCH 1215/2340] drm/i915/dmc: Don't enable any pipe DMC events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pipe DMC seems to be making a mess of things in ADL. Various weird
symptoms have been observed such as missing vblank irqs, typicalle
happening when using multiple displays.

Keep all pipe DMC event handlers disabled until needed (which is never
atm). This is also what Windows does on ADL+.

We can also drop DG2 from disable_all_flip_queue_events() since
on DG2 the pipe DMC is the one that handles the flip queue events.

Cc: stable@vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8685
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-2-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dmc.c | 43 ++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 63e080e070236..073b85b57679a 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -389,7 +389,7 @@ disable_all_flip_queue_events(struct drm_i915_private *i915)
 	enum intel_dmc_id dmc_id;
 
 	/* TODO: check if the following applies to all D13+ platforms. */
-	if (!IS_DG2(i915) && !IS_TIGERLAKE(i915))
+	if (!IS_TIGERLAKE(i915))
 		return;
 
 	for_each_dmc_id(dmc_id) {
@@ -493,6 +493,45 @@ void intel_dmc_disable_pipe(struct drm_i915_private *i915, enum pipe pipe)
 		intel_de_rmw(i915, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0);
 }
 
+static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915,
+			       enum intel_dmc_id dmc_id, i915_reg_t reg)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+	u32 start = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, 0));
+	u32 end = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12));
+
+	return offset >= start && offset < end;
+}
+
+static bool disable_dmc_evt(struct drm_i915_private *i915,
+			    enum intel_dmc_id dmc_id,
+			    i915_reg_t reg, u32 data)
+{
+	if (!is_dmc_evt_ctl_reg(i915, dmc_id, reg))
+		return false;
+
+	/* keep all pipe DMC events disabled by default */
+	if (dmc_id != DMC_FW_MAIN)
+		return true;
+
+	return false;
+}
+
+static u32 dmc_mmiodata(struct drm_i915_private *i915,
+			struct intel_dmc *dmc,
+			enum intel_dmc_id dmc_id, int i)
+{
+	if (disable_dmc_evt(i915, dmc_id,
+			    dmc->dmc_info[dmc_id].mmioaddr[i],
+			    dmc->dmc_info[dmc_id].mmiodata[i]))
+		return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
+				      DMC_EVT_CTL_TYPE_EDGE_0_1) |
+			REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
+				       DMC_EVT_CTL_EVENT_ID_FALSE);
+	else
+		return dmc->dmc_info[dmc_id].mmiodata[i];
+}
+
 /**
  * intel_dmc_load_program() - write the firmware from memory to register.
  * @i915: i915 drm device.
@@ -532,7 +571,7 @@ void intel_dmc_load_program(struct drm_i915_private *i915)
 	for_each_dmc_id(dmc_id) {
 		for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) {
 			intel_de_write(i915, dmc->dmc_info[dmc_id].mmioaddr[i],
-				       dmc->dmc_info[dmc_id].mmiodata[i]);
+				       dmc_mmiodata(i915, dmc, dmc_id, i));
 		}
 	}
 
-- 
GitLab


From be2fce7891e20bdd1c785dd590c59d0ad6a1525a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Dec 2023 23:37:48 +0200
Subject: [PATCH 1216/2340] drm/i915/dmc: Also disable the flip queue event on
 TGL main DMC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike later platforms TGL has its flip queue event (CLK_MSEC) on
the main DMC (as opposed to the pipe DMC). Currently we're doing
a second pass to disable that, but let's just follow the same
approach as the later platforms and never even enable the event
in the first place.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-3-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dmc.c | 83 ++----------------------
 1 file changed, 5 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 073b85b57679a..9385898e3aa59 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -335,77 +335,6 @@ static void disable_event_handler(struct drm_i915_private *i915,
 	intel_de_write(i915, htp_reg, 0);
 }
 
-static void
-disable_flip_queue_event(struct drm_i915_private *i915,
-			 i915_reg_t ctl_reg, i915_reg_t htp_reg)
-{
-	u32 event_ctl;
-	u32 event_htp;
-
-	event_ctl = intel_de_read(i915, ctl_reg);
-	event_htp = intel_de_read(i915, htp_reg);
-	if (event_ctl != (DMC_EVT_CTL_ENABLE |
-			  DMC_EVT_CTL_RECURRING |
-			  REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
-					 DMC_EVT_CTL_TYPE_EDGE_0_1) |
-			  REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
-					 DMC_EVT_CTL_EVENT_ID_CLK_MSEC)) ||
-	    !event_htp) {
-		drm_dbg_kms(&i915->drm,
-			    "Unexpected DMC event configuration (control %08x htp %08x)\n",
-			    event_ctl, event_htp);
-		return;
-	}
-
-	disable_event_handler(i915, ctl_reg, htp_reg);
-}
-
-static bool
-get_flip_queue_event_regs(struct drm_i915_private *i915, enum intel_dmc_id dmc_id,
-			  i915_reg_t *ctl_reg, i915_reg_t *htp_reg)
-{
-	if (dmc_id == DMC_FW_MAIN) {
-		if (DISPLAY_VER(i915) == 12) {
-			*ctl_reg = DMC_EVT_CTL(i915, dmc_id, 3);
-			*htp_reg = DMC_EVT_HTP(i915, dmc_id, 3);
-
-			return true;
-		}
-	} else if (dmc_id >= DMC_FW_PIPEA && dmc_id <= DMC_FW_PIPED) {
-		if (IS_DG2(i915)) {
-			*ctl_reg = DMC_EVT_CTL(i915, dmc_id, 2);
-			*htp_reg = DMC_EVT_HTP(i915, dmc_id, 2);
-
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static void
-disable_all_flip_queue_events(struct drm_i915_private *i915)
-{
-	enum intel_dmc_id dmc_id;
-
-	/* TODO: check if the following applies to all D13+ platforms. */
-	if (!IS_TIGERLAKE(i915))
-		return;
-
-	for_each_dmc_id(dmc_id) {
-		i915_reg_t ctl_reg;
-		i915_reg_t htp_reg;
-
-		if (!has_dmc_id_fw(i915, dmc_id))
-			continue;
-
-		if (!get_flip_queue_event_regs(i915, dmc_id, &ctl_reg, &htp_reg))
-			continue;
-
-		disable_flip_queue_event(i915, ctl_reg, htp_reg);
-	}
-}
-
 static void disable_all_event_handlers(struct drm_i915_private *i915)
 {
 	enum intel_dmc_id dmc_id;
@@ -514,6 +443,11 @@ static bool disable_dmc_evt(struct drm_i915_private *i915,
 	if (dmc_id != DMC_FW_MAIN)
 		return true;
 
+	/* also disable the flip queue event on the main DMC on TGL */
+	if (IS_TIGERLAKE(i915) &&
+	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC)
+		return true;
+
 	return false;
 }
 
@@ -579,13 +513,6 @@ void intel_dmc_load_program(struct drm_i915_private *i915)
 
 	gen9_set_dc_state_debugmask(i915);
 
-	/*
-	 * Flip queue events need to be disabled before enabling DC5/6.
-	 * i915 doesn't use the flip queue feature, so disable it already
-	 * here.
-	 */
-	disable_all_flip_queue_events(i915);
-
 	pipedmc_clock_gating_wa(i915, false);
 }
 
-- 
GitLab


From e1a4e3cb3ac67ced1fe9e83fea6d8d91f7c4e864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 13 Dec 2023 17:08:07 +0200
Subject: [PATCH 1217/2340] drm/i915/dmc: Also disable HRR event on TGL/ADLS
 main DMC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike later platforms TGL/ADLS has the half refresh rate (HRR) event
on the main DMC (as opposed to the pipe DMC). Since we're disabling
that event on all later platforms already let's do the same on
TGL/ADLS as well.

There is supposedly a bit somewhere (DMC_CHICKEN on TGL) to make
the handler not do anything, but we don't currently have code
to frob it. Though that bit should be off by default, the ADL+
experience has shown us that trusting any of this isn't a good
idea. So seems safer to just disable all event handlers we know
that we don't need.

Also the TGL/ADLS DMC firmware is apparently using the wrong event
(undelayed vblank) here anyway. It should be using the delayed
vblank event instead (like ADL+ firmware does), but they didn't
release a firmware fix for this and instead just hacked around
this in the Windows driver code :/

v2: Also disable the event on ADLS (Imre)

Cc: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231213150807.21331-1-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dmc.c      | 5 +++++
 drivers/gpu/drm/i915/display/intel_dmc_regs.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 9385898e3aa59..fcc7283b7dcd4 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -448,6 +448,11 @@ static bool disable_dmc_evt(struct drm_i915_private *i915,
 	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC)
 		return true;
 
+	/* also disable the HRR event on the main DMC on TGL/ADLS */
+	if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) &&
+	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A)
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
index cf10094acae3d..90d0dbb41cfe6 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
@@ -60,6 +60,7 @@
 
 #define DMC_EVT_CTL_EVENT_ID_MASK	REG_GENMASK(15, 8)
 #define DMC_EVT_CTL_EVENT_ID_FALSE	0x01
+#define DMC_EVT_CTL_EVENT_ID_VBLANK_A	0x32 /* main DMC */
 /* An event handler scheduled to run at a 1 kHz frequency. */
 #define DMC_EVT_CTL_EVENT_ID_CLK_MSEC	0xbf
 
-- 
GitLab


From 5f23cea2d9ccc94c5de236312649fe85b89d6f26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Dec 2023 23:37:50 +0200
Subject: [PATCH 1218/2340] drm/i915/dmc: Print out the DMC mmio register list
 at fw load time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To help with debugging print out the mmio list contained in the DMC
firmware. Also highlight the event registers, and whether we're going
to disable them or not.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-5-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dmc.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index fcc7283b7dcd4..b70502586ab98 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -432,6 +432,16 @@ static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915,
 	return offset >= start && offset < end;
 }
 
+static bool is_dmc_evt_htp_reg(struct drm_i915_private *i915,
+			       enum intel_dmc_id dmc_id, i915_reg_t reg)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+	u32 start = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, 0));
+	u32 end = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12));
+
+	return offset >= start && offset < end;
+}
+
 static bool disable_dmc_evt(struct drm_i915_private *i915,
 			    enum intel_dmc_id dmc_id,
 			    i915_reg_t reg, u32 data)
@@ -713,9 +723,17 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
 		return 0;
 	}
 
+	drm_dbg_kms(&i915->drm, "DMC %d:\n", dmc_id);
 	for (i = 0; i < mmio_count; i++) {
 		dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
 		dmc_info->mmiodata[i] = mmiodata[i];
+
+		drm_dbg_kms(&i915->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n",
+			    i, mmioaddr[i], mmiodata[i],
+			    is_dmc_evt_ctl_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" :
+			    is_dmc_evt_htp_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "",
+			    disable_dmc_evt(i915, dmc_id, dmc_info->mmioaddr[i],
+					    dmc_info->mmiodata[i]) ? " (disabling)" : "");
 	}
 	dmc_info->mmio_count = mmio_count;
 	dmc_info->start_mmioaddr = start_mmioaddr;
-- 
GitLab


From 716c3cf21784479a1934b670ec67f320cbb5d308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 14 Nov 2023 15:41:41 +0200
Subject: [PATCH 1219/2340] drm/i915/display: Remove dead code around
 intel_atomic_helper->free_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After switching to directly using dma_fence instead of i915_sw_fence we
have left some dead code around intel_atomic_helper->free_list. Remove that
dead code.

v2: Remove intel_atomic_state->freed as well

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231114134141.2527694-1-jouni.hogander@intel.com
---
 drivers/gpu/drm/i915/display/intel_display.c  | 20 -------------------
 .../gpu/drm/i915/display/intel_display_core.h |  6 ------
 .../drm/i915/display/intel_display_driver.c   |  7 -------
 .../drm/i915/display/intel_display_types.h    |  2 --
 4 files changed, 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index e8b307ae93196..66b4c3f3ceb16 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6974,24 +6974,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	drm_WARN_ON(&dev_priv->drm, update_pipes);
 }
 
-static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
-{
-	struct intel_atomic_state *state, *next;
-	struct llist_node *freed;
-
-	freed = llist_del_all(&dev_priv->display.atomic_helper.free_list);
-	llist_for_each_entry_safe(state, next, freed, freed)
-		drm_atomic_state_put(&state->base);
-}
-
-void intel_atomic_helper_free_state_worker(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), display.atomic_helper.free_work);
-
-	intel_atomic_helper_free_state(dev_priv);
-}
-
 static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state)
 {
 	struct drm_i915_private *i915 = to_i915(intel_state->base.dev);
@@ -7027,8 +7009,6 @@ static void intel_atomic_cleanup_work(struct work_struct *work)
 	drm_atomic_helper_cleanup_planes(&i915->drm, &state->base);
 	drm_atomic_helper_commit_cleanup_done(&state->base);
 	drm_atomic_state_put(&state->base);
-
-	intel_atomic_helper_free_state(i915);
 }
 
 static void intel_atomic_prepare_plane_clear_colors(struct intel_atomic_state *state)
diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index 3308f07968ff8..7349a2b928c07 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -295,12 +295,6 @@ struct intel_display {
 		const struct intel_audio_funcs *audio;
 	} funcs;
 
-	/* Grouping using anonymous structs. Keep sorted. */
-	struct intel_atomic_helper {
-		struct llist_head free_list;
-		struct work_struct free_work;
-	} atomic_helper;
-
 	struct {
 		/* backlight registers and fields in struct intel_panel */
 		struct mutex lock;
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 62f7b10484bec..9df9097a0255a 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -259,10 +259,6 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915)
 	if (ret)
 		goto cleanup_vga_client_pw_domain_dmc;
 
-	init_llist_head(&i915->display.atomic_helper.free_list);
-	INIT_WORK(&i915->display.atomic_helper.free_work,
-		  intel_atomic_helper_free_state_worker);
-
 	intel_init_quirks(i915);
 
 	intel_fbc_init(i915);
@@ -430,9 +426,6 @@ void intel_display_driver_remove(struct drm_i915_private *i915)
 	flush_workqueue(i915->display.wq.flip);
 	flush_workqueue(i915->display.wq.modeset);
 
-	flush_work(&i915->display.atomic_helper.free_work);
-	drm_WARN_ON(&i915->drm, !llist_empty(&i915->display.atomic_helper.free_list));
-
 	/*
 	 * MST topology needs to be suspended so we don't have any calls to
 	 * fbdev after it's finalized. MST will be destroyed later as part of
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 341d80c2b9de9..3fdd8a5179831 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -684,8 +684,6 @@ struct intel_atomic_state {
 	bool skip_intermediate_wm;
 
 	bool rps_interactive;
-
-	struct llist_node freed;
 };
 
 struct intel_plane_state {
-- 
GitLab


From c8048dd0b07df68724805254b9e994d99e9a7af4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?=
 <nfraprado@collabora.com>
Date: Tue, 21 Nov 2023 09:29:27 -0500
Subject: [PATCH 1220/2340] drm/mediatek: dp: Add phy_mtk_dp module as
 pre-dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mtk_dp driver registers a phy device which is handled by the
phy_mtk_dp driver and assumes that the phy probe will complete
synchronously, proceeding to make use of functionality exposed by that
driver right away. This assumption however is false when the phy driver
is built as a module, causing the mtk_dp driver to fail probe in this
case.

Add the phy_mtk_dp module as a pre-dependency to the mtk_dp module to
ensure the phy module has been loaded before the dp, so that the phy
probe happens synchrounously and the mtk_dp driver can probe
successfully even with the phy driver built as a module.

Suggested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Fixes: f70ac097a2cf ("drm/mediatek: Add MT8195 Embedded DisplayPort driver")
Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Guillaume Ranquet <granquet@baylibre.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231121142938.460846-1-nfraprado@collabora.com/
Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
---
 drivers/gpu/drm/mediatek/mtk_dp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c
index e4c16ba9902dc..2136a596efa18 100644
--- a/drivers/gpu/drm/mediatek/mtk_dp.c
+++ b/drivers/gpu/drm/mediatek/mtk_dp.c
@@ -2818,3 +2818,4 @@ MODULE_AUTHOR("Markus Schneider-Pargmann <msp@baylibre.com>");
 MODULE_AUTHOR("Bo-Chen Chen <rex-bc.chen@mediatek.com>");
 MODULE_DESCRIPTION("MediaTek DisplayPort Driver");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: phy_mtk_dp");
-- 
GitLab


From 6914968a0b52507bf19d85e5fb9e35272e17cd35 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 17 Dec 2023 01:59:10 +0200
Subject: [PATCH 1221/2340] drm/bridge: properly refcount DT nodes in aux
 bridge drivers

The aux-bridge and aux-hpd-bridge drivers didn't call of_node_get() on
the device nodes further used for dev->of_node and platform data. When
bridge devices are released, the reference counts are decreased,
resulting in refcount underflow / use-after-free warnings. Get
corresponding refcounts during AUX bridge allocation.

Reported-by: Luca Weiss <luca.weiss@fairphone.com>
Fixes: 2a04739139b2 ("drm/bridge: add transparent bridge helper")
Fixes: 26f4bac3d884 ("drm/bridge: aux-hpd: Replace of_device.h with explicit include")
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231216235910.911958-1-dmitry.baryshkov@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/bridge/aux-bridge.c     | 3 ++-
 drivers/gpu/drm/bridge/aux-hpd-bridge.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c
index 49d7c2ab1ecc3..b29980f95379e 100644
--- a/drivers/gpu/drm/bridge/aux-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-bridge.c
@@ -6,6 +6,7 @@
  */
 #include <linux/auxiliary_bus.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #include <drm/drm_bridge.h>
 #include <drm/bridge/aux-bridge.h>
@@ -57,7 +58,7 @@ int drm_aux_bridge_register(struct device *parent)
 	adev->id = ret;
 	adev->name = "aux_bridge";
 	adev->dev.parent = parent;
-	adev->dev.of_node = parent->of_node;
+	adev->dev.of_node = of_node_get(parent->of_node);
 	adev->dev.release = drm_aux_bridge_release;
 
 	ret = auxiliary_device_init(adev);
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
index 1999a053d59bd..bb55f697a1819 100644
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -68,9 +68,9 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent,
 	adev->id = ret;
 	adev->name = "dp_hpd_bridge";
 	adev->dev.parent = parent;
-	adev->dev.of_node = parent->of_node;
+	adev->dev.of_node = of_node_get(parent->of_node);
 	adev->dev.release = drm_aux_hpd_bridge_release;
-	adev->dev.platform_data = np;
+	adev->dev.platform_data = of_node_get(np);
 
 	ret = auxiliary_device_init(adev);
 	if (ret) {
-- 
GitLab


From f64fa332602c311a76495fd0139bd4abb9aa7bbf Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Mon, 9 Oct 2023 15:07:38 +0300
Subject: [PATCH 1222/2340] accel/habanalabs: add pcie reset prepare/done hooks

When working on a bare-metal system, if FLR will happen the firmware
will handle it and driver will have no knowledge of it, and this will
cause two issues:

1.The driver will be in operational state while it should be in reset.
  This will cause the heartbeat mechanism to keep sending messages to FW
  while pci device is in reset. Eventually heartbeat will fail and
  the device will end up in non-operational state.

2. After FW handles the FLR, and due to the reset it'll go back to
   preboot stage, and driver need to perform hard reset in order to
   load the boot fit binary.

This patch will add reset_prepare hook that will set the device to
be in disabled state, so it'll be not operational, and also
reset_done hook which will be called after the actual FLR handling,
then it will perform hard reset.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../accel/habanalabs/common/habanalabs_drv.c  | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 306a5bc9bf894..35ae0ff347f54 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -670,6 +670,38 @@ static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
+static void hl_pci_reset_prepare(struct pci_dev *pdev)
+{
+	struct hl_device *hdev;
+
+	hdev = pci_get_drvdata(pdev);
+	if (!hdev)
+		return;
+
+	hdev->disabled = true;
+}
+
+static void hl_pci_reset_done(struct pci_dev *pdev)
+{
+	struct hl_device *hdev;
+	u32 flags;
+
+	hdev = pci_get_drvdata(pdev);
+	if (!hdev)
+		return;
+
+	/*
+	 * Schedule a thread to trigger hard reset.
+	 * The reason for this handler, is for rare cases where the driver is up
+	 * and FLR occurs. This is valid only when working with no VM, so FW handles FLR
+	 * and resets the device. FW will go back preboot stage, so driver needs to perform
+	 * hard reset in order to load FW fit again.
+	 */
+	flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
+
+	hl_device_reset(hdev, flags);
+}
+
 static const struct dev_pm_ops hl_pm_ops = {
 	.suspend = hl_pmops_suspend,
 	.resume = hl_pmops_resume,
@@ -679,6 +711,8 @@ static const struct pci_error_handlers hl_pci_err_handler = {
 	.error_detected = hl_pci_err_detected,
 	.slot_reset = hl_pci_err_slot_reset,
 	.resume = hl_pci_err_resume,
+	.reset_prepare = hl_pci_reset_prepare,
+	.reset_done = hl_pci_reset_done,
 };
 
 static struct pci_driver hl_pci_driver = {
-- 
GitLab


From fbc2a09e09201eb64b94c0c7e4d2b4ec5337f844 Mon Sep 17 00:00:00 2001
From: Farah Kassabri <fkassabri@habana.ai>
Date: Wed, 18 Oct 2023 16:22:13 +0300
Subject: [PATCH 1223/2340] accel/habanalabs: update device boot error check

Use a predefined mask which set the device critical boot errors.
Driver will fail and stop its loading, only upon detecting at least
one of those errors defined in this mask.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c | 115 +++++-------------
 1 file changed, 32 insertions(+), 83 deletions(-)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 47e8384134aa9..9e9dfe013659f 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -646,39 +646,27 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	return rc;
 }
 
-static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
-								u32 sts_val)
+static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, u32 sts_val)
 {
 	bool err_exists = false;
 
 	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
 		return false;
 
-	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - DRAM initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+		dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n");
 
-	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
+	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
 		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - Thermal Sensor initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+		dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n");
 
 	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
 		if (hdev->bmc_enable) {
-			dev_err(hdev->dev,
-				"Device boot error - Skipped waiting for BMC\n");
-			err_exists = true;
+			dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n");
 		} else {
-			dev_info(hdev->dev,
-				"Device boot message - Skipped waiting for BMC\n");
+			dev_info(hdev->dev, "Device boot message - Skipped waiting for BMC\n");
 			/* This is an info so we don't want it to disable the
 			 * device
 			 */
@@ -686,48 +674,29 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		}
 	}
 
-	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
-		dev_err(hdev->dev,
-			"Device boot error - Serdes data from BMC not available\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+		dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n");
 
-	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - NIC F/W initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+		dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n");
 
-	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
-		dev_err(hdev->dev,
-			"Device boot warning - security not ready\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+		dev_err(hdev->dev, "Device boot warning - security not ready\n");
 
-	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
 		dev_err(hdev->dev, "Device boot error - security failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
 		dev_err(hdev->dev, "Device boot error - eFuse failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL)
 		dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
 		dev_err(hdev->dev, "Device boot error - PLL failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL)
 		dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
-		err_exists = true;
-	}
 
 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
 		/* Ignore this bit, don't prevent driver loading */
@@ -735,52 +704,32 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
 	}
 
-	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL)
 		dev_err(hdev->dev, "Device boot error - binning failure\n");
-		err_exists = true;
-	}
 
 	if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
 		dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
 
+	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+		dev_err(hdev->dev, "Device boot warning - Skipped DRAM initialization\n");
+
+	if (err_val & CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
+		dev_err(hdev->dev, "Device boot error - ARC memory scrub failed\n");
+
+	/* All warnings should go here in order not to reach the unknown error validation */
 	if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
 		dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
 		err_exists = true;
 	}
 
-	/* All warnings should go here in order not to reach the unknown error validation */
-	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
-		dev_warn(hdev->dev,
-			"Device boot warning - Skipped DRAM initialization\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
-	}
+	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL)
+		dev_warn(hdev->dev, "Device boot warning - Failed to load preboot primary image\n");
 
-	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - Failed to load preboot primary image\n");
-		/* This is a warning so we don't want it to disable the
-		 * device as we have a secondary preboot image
-		 */
-		err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
-	}
+	if (err_val & CPU_BOOT_ERR0_TPM_FAIL)
+		dev_warn(hdev->dev, "Device boot warning - TPM failure\n");
 
-	if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - TPM failure\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
-	}
-
-	if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
-		dev_err(hdev->dev,
-			"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
+	if (err_val & CPU_BOOT_ERR_FATAL_MASK)
 		err_exists = true;
-	}
 
 	/* return error only if it's in the predefined mask */
 	if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
-- 
GitLab


From c6485482330d9f9a745d4bfec4dca722bd2bd75f Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 15 Oct 2023 14:04:53 +0300
Subject: [PATCH 1224/2340] accel/habanalabs/gaudi2: assume hard-reset by FW
 upon PCIe AXI drain

When a PCIe AXI drain event happens, it is possible that the driver
cannot access the device through PCIe, and therefore cannot send a
hard-reset request to FW.
Starting from FW version 1.13, FW will initiate a hard-reset in such
a case without waiting for a reset request from the driver.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs.h | 8 ++++++++
 drivers/accel/habanalabs/gaudi2/gaudi2.c     | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 1655c101c7052..5c69a482b8de7 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3594,6 +3594,14 @@ static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major
 	return false;
 }
 
+static inline bool hl_is_fw_sw_ver_equal_or_greater(struct hl_device *hdev, u32 fw_sw_major,
+							u32 fw_sw_minor)
+{
+	return (hdev->fw_sw_major_ver > fw_sw_major ||
+			(hdev->fw_sw_major_ver == fw_sw_major &&
+					hdev->fw_sw_minor_ver >= fw_sw_minor));
+}
+
 /*
  * Kernel module functions that can be accessed by entire module
  */
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 819660c684cfc..b739078c2d87b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -10007,6 +10007,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
+			is_critical = true;
 		break;
 
 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
-- 
GitLab


From e8bc0c1b1b730eb8759f5305ebd2d6876952e539 Mon Sep 17 00:00:00 2001
From: Farah Kassabri <fkassabri@habana.ai>
Date: Sun, 29 Oct 2023 16:16:16 +0200
Subject: [PATCH 1225/2340] accel/habanalabs: add log when eq event is not
 received

Add error log when no eq event is received from FW,
to cover a scenario when FW is stuck for some reason.
In such case driver will not receive neither the eq error interrupt
or the eq heartbeat event, and will just initiate a reset without
indication in the dmesg about the reason.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 9711e8fc979d9..d95a981b29060 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1049,10 +1049,12 @@ static void hl_device_eq_heartbeat(struct hl_device *hdev)
 	if (!prop->cpucp_info.eq_health_check_supported)
 		return;
 
-	if (hdev->eq_heartbeat_received)
+	if (hdev->eq_heartbeat_received) {
 		hdev->eq_heartbeat_received = false;
-	else
+	} else {
+		dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
 		hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+	}
 }
 
 static void hl_device_heartbeat(struct work_struct *work)
-- 
GitLab


From 42422993cf28d456778ee9168d73758ec037cd51 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 30 Oct 2023 12:23:57 +0200
Subject: [PATCH 1226/2340] accel/habanalabs: add support for Gaudi2C device

Gaudi2 with PCI revision ID with the value of '3' represents Gaudi2C
device and should be detected and initialized as Gaudi2.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c                 | 3 +++
 drivers/accel/habanalabs/common/habanalabs.h             | 2 ++
 drivers/accel/habanalabs/common/habanalabs_drv.c         | 3 +++
 drivers/accel/habanalabs/common/mmu/mmu.c                | 1 +
 drivers/accel/habanalabs/common/sysfs.c                  | 3 +++
 drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h | 1 +
 6 files changed, 13 insertions(+)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index d95a981b29060..d9447aeb3937e 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -853,6 +853,9 @@ static int device_early_init(struct hl_device *hdev)
 		gaudi2_set_asic_funcs(hdev);
 		strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name));
 		break;
+	case ASIC_GAUDI2C:
+		gaudi2_set_asic_funcs(hdev);
+		strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name));
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 5c69a482b8de7..7b0209e5bad61 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1262,6 +1262,7 @@ struct hl_dec {
  * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
  * @ASIC_GAUDI2: Gaudi2 device.
  * @ASIC_GAUDI2B: Gaudi2B device.
+ * @ASIC_GAUDI2C: Gaudi2C device.
  */
 enum hl_asic_type {
 	ASIC_INVALID,
@@ -1270,6 +1271,7 @@ enum hl_asic_type {
 	ASIC_GAUDI_SEC,
 	ASIC_GAUDI2,
 	ASIC_GAUDI2B,
+	ASIC_GAUDI2C,
 };
 
 struct hl_cs_parser;
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 35ae0ff347f54..e542fd40e16c6 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -141,6 +141,9 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev)
 		case REV_ID_B:
 			asic_type = ASIC_GAUDI2B;
 			break;
+		case REV_ID_C:
+			asic_type = ASIC_GAUDI2C;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c
index b2145716c6053..b654302a68fc0 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@@ -596,6 +596,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 		break;
 	case ASIC_GAUDI2:
 	case ASIC_GAUDI2B:
+	case ASIC_GAUDI2C:
 		/* MMUs in Gaudi2 are always host resident */
 		hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
 		break;
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 01f89f029355e..2786063730555 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -251,6 +251,9 @@ static ssize_t device_type_show(struct device *dev,
 	case ASIC_GAUDI2B:
 		str = "GAUDI2B";
 		break;
+	case ASIC_GAUDI2C:
+		str = "GAUDI2C";
+		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 				hdev->asic_type);
diff --git a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
index f5d497dc9bdc1..4f951cada0776 100644
--- a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
+++ b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
@@ -25,6 +25,7 @@ enum hl_revision_id {
 	REV_ID_INVALID				= 0x00,
 	REV_ID_A				= 0x01,
 	REV_ID_B				= 0x02,
+	REV_ID_C				= 0x03
 };
 
 #endif /* INCLUDE_PCI_GENERAL_H_ */
-- 
GitLab


From d1958dce5ab6a3e089c60cf474e8c9b7e96e70ad Mon Sep 17 00:00:00 2001
From: Farah Kassabri <fkassabri@habana.ai>
Date: Tue, 31 Oct 2023 12:20:36 +0200
Subject: [PATCH 1227/2340] accel/habanalabs: fix EQ heartbeat mechanism

Stop rescheduling another heartbeat check when EQ heartbeat check fails
as it generates confusing logs in dmesg that the heartbeat fails.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index d9447aeb3937e..6bf5f1d0d0057 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1044,20 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 	return (vendor_id == PCI_VENDOR_ID_HABANALABS);
 }
 
-static void hl_device_eq_heartbeat(struct hl_device *hdev)
+static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
 {
-	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 
 	if (!prop->cpucp_info.eq_health_check_supported)
-		return;
+		return 0;
 
 	if (hdev->eq_heartbeat_received) {
 		hdev->eq_heartbeat_received = false;
 	} else {
 		dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
-		hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+		return -EIO;
 	}
+
+	return 0;
 }
 
 static void hl_device_heartbeat(struct work_struct *work)
@@ -1074,10 +1075,9 @@ static void hl_device_heartbeat(struct work_struct *work)
 	/*
 	 * For EQ health check need to check if driver received the heartbeat eq event
 	 * in order to validate the eq is working.
+	 * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
 	 */
-	hl_device_eq_heartbeat(hdev);
-
-	if (!hdev->asic_funcs->send_heartbeat(hdev))
+	if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
 		goto reschedule;
 
 	if (hl_device_operational(hdev, NULL))
-- 
GitLab


From 0ec346779644039c4c05cfa7f071b1a24e54d8d9 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Tue, 31 Oct 2023 13:51:10 +0200
Subject: [PATCH 1228/2340] accel/habanalabs/gaudi2: fix undef opcode reporting

currently the undefined opcode event bit in set only for lower cp and
only if 'write_enable' is true. It should be set anyway and for all
streams in order to report that event to userspace.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index b739078c2d87b..5075f92d15ccf 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7929,21 +7929,19 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}
 
-		if (i == QMAN_STREAMS && error_count) {
-			/* check for undefined opcode */
-			if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
-					hdev->captured_err_info.undef_opcode.write_enable) {
+		/* check for undefined opcode */
+		if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK) {
+			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+			if (hdev->captured_err_info.undef_opcode.write_enable) {
 				memset(&hdev->captured_err_info.undef_opcode, 0,
 						sizeof(hdev->captured_err_info.undef_opcode));
-
-				hdev->captured_err_info.undef_opcode.write_enable = false;
 				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
 				hdev->captured_err_info.undef_opcode.engine_id =
 							gaudi2_queue_id_to_engine_id[qid_base];
-				*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
 			}
 
-			handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
+			if (i == QMAN_STREAMS)
+				handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
 		}
 	}
 
-- 
GitLab


From 571cdb6e3b9a9c077b39047091f0ccc721b92b83 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Thu, 9 Nov 2023 10:53:10 +0200
Subject: [PATCH 1229/2340] accel/habanalabs: remove 'get temperature' debug
 print

The print was added long back for a specific debug and can
now be removed.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/hwmon.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 8598056216e75..1ee2ee07e9ed5 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -578,10 +578,6 @@ int hl_get_temperature(struct hl_device *hdev,
 				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
-
-	dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n",
-		pkt.ctl, pkt.sensor_index, pkt.type);
-
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 						0, &result);
 
-- 
GitLab


From 4b0b1fbc7757169b6d304545a321c7a88f13f8f0 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 20 Jul 2023 16:50:39 +0300
Subject: [PATCH 1230/2340] accel/habanalabs: set hard reset flag if graceful
 reset is skipped

hl_device_cond_reset() might be called with the hard reset flag unset,
because a compute reset upon device release as part of a graceful reset
is valid.
If the conditions for graceful reset are not met, hl_device_reset() will
be called for an immediate reset. In this case a compute reset is not
valid, so it will be replaced with a hard reset together with a debug
message about it.
This message might be confusing, as it implies that a compute reset was
requested when it shouldn't. To prevent this confusion, set the hard
reset flag in hl_device_cond_reset() if going to an immediate reset.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 6bf5f1d0d0057..a365791a9f5c6 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -2040,7 +2040,7 @@ device_reset:
 	if (ctx)
 		hl_ctx_put(ctx);
 
-	return hl_device_reset(hdev, flags);
+	return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD);
 }
 
 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
-- 
GitLab


From ae303d885d4a0fcea65330de9327d28edfebd206 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 6 Nov 2023 18:41:35 +0200
Subject: [PATCH 1231/2340] accel/habanalabs/gaudi2: get the correct QM CQ info
 upon an error

Upon a QM error, the address/size from both the CQ and the ARC_CQ are
printed, although the instruction that led to the error was received
from only one of them.

Moreover, in case of a QM undefined opcode, only one of these
address/size sets will be captured based on the value of ARC_CQ_PTR.
However, this value can be non-zero even if currently the CQ is used, in
case the CQ/ARC_CQ are alternately used.

Under the assumption of having a stop-on-error configuration, modify to
use CP_STS.CUR_CQ field to get the relevant CQ for the QM error.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c      | 44 +++++++++----------
 .../include/gaudi2/asic_reg/gaudi2_regs.h     |  1 +
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 5075f92d15ccf..77c480725a84b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7860,36 +7860,36 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 
 static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
 {
-	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
-	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
-
-	lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
-	hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
-	cq_ptr = ((u64) hi) << 32 | lo;
-	cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
-
-	lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
-	hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
-	arc_cq_ptr = ((u64) hi) << 32 | lo;
-	arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
+	u32 lo, hi, cq_ptr_size, cp_sts;
+	u64 cq_ptr, cp_current_inst;
+	bool is_arc_cq;
+
+	cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
+	is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
+
+	if (is_arc_cq) {
+		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
+		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
+		cq_ptr = ((u64) hi) << 32 | lo;
+		cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
+	} else {
+		lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
+		hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
+		cq_ptr = ((u64) hi) << 32 | lo;
+		cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
+	}
 
 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
 	hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
 	cp_current_inst = ((u64) hi) << 32 | lo;
 
 	dev_info(hdev->dev,
-		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
-		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
+		is_arc_cq ? "ARC_" : "", cq_ptr, cq_ptr_size, cp_current_inst);
 
 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-		if (arc_cq_ptr) {
-			hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
-			hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
-		} else {
-			hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
-			hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
-		}
-
+		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+		hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
 		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
 	}
 }
diff --git a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index a08378d0802b5..8018214a7b59b 100644
--- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -250,6 +250,7 @@
 #define QM_ARC_CQ_PTR_HI_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_HI - mmPDMA0_QM_BASE)
 #define QM_ARC_CQ_TSIZE_OFFSET		(mmPDMA0_QM_ARC_CQ_TSIZE - mmPDMA0_QM_BASE)
 
+#define QM_CP_STS_4_OFFSET		(mmPDMA0_QM_CP_STS_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_LO_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_LO_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_HI_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_HI_4 - mmPDMA0_QM_BASE)
 
-- 
GitLab


From c9f9d0e3d0db300315d3bde7f122439633e6f007 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Sun, 12 Nov 2023 13:38:42 +0200
Subject: [PATCH 1232/2340] accel/habanalabs: print error code when mapping
 fails

Failure to map is considered a non-trivial error and we need to notify
the user about it.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/memory.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 0b8689fe0b64d..3348ad12c2375 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -955,8 +955,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 				(i + 1) == phys_pg_pack->npages);
 		if (rc) {
 			dev_err(hdev->dev,
-				"map failed for handle %u, npages: %llu, mapped: %llu",
-				phys_pg_pack->handle, phys_pg_pack->npages,
+				"map failed (%d) for handle %u, npages: %llu, mapped: %llu\n",
+				rc, phys_pg_pack->handle, phys_pg_pack->npages,
 				mapped_pg_cnt);
 			goto err;
 		}
@@ -1186,7 +1186,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
 
 	rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 	if (rc) {
-		dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
+		dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n",
+			rc, handle);
 		mutex_unlock(&hdev->mmu_lock);
 		goto map_err;
 	}
-- 
GitLab


From 47a552863d6c9ea26abe9ad35d2c35e4d6896551 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 13 Nov 2023 21:11:13 +0200
Subject: [PATCH 1233/2340] accel/habanalabs: expose module id through sysfs

Module ID exposes the physical location of the device in the server,
from the pov of the devices in regard to how they are connected by
internal fabric.

This information is already exposed in our INFO ioctl, but there are
utilities and scripts running in data-center which are already
accessing sysfs for topology information and it is easier for them
to continue getting that information from sysfs instead of opening
a file descriptor.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 Documentation/ABI/testing/sysfs-driver-habanalabs |  6 ++++++
 drivers/accel/habanalabs/common/sysfs.c           | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index c63ca1ad500d1..89fe3b09d4adb 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -149,6 +149,12 @@ Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the MME compute
                 engine. This property is valid only for the Goya ASIC family
 
+What:           /sys/class/accel/accel<n>/device/module_id
+Date:           Nov 2023
+KernelVersion:  not yet upstreamed
+Contact:        ogabbay@kernel.org
+Description:    Displays the device's module id
+
 What:           /sys/class/accel/accel<n>/device/pci_addr
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 2786063730555..8d2164691d817 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -386,6 +386,14 @@ static ssize_t security_enabled_show(struct device *dev,
 	return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
 }
 
+static ssize_t module_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location));
+}
+
 static DEVICE_ATTR_RO(armcp_kernel_ver);
 static DEVICE_ATTR_RO(armcp_ver);
 static DEVICE_ATTR_RO(cpld_ver);
@@ -405,6 +413,7 @@ static DEVICE_ATTR_RO(thermal_ver);
 static DEVICE_ATTR_RO(uboot_ver);
 static DEVICE_ATTR_RO(fw_os_ver);
 static DEVICE_ATTR_RO(security_enabled);
+static DEVICE_ATTR_RO(module_id);
 
 static struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
@@ -430,6 +439,7 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_uboot_ver.attr,
 	&dev_attr_fw_os_ver.attr,
 	&dev_attr_security_enabled.attr,
+	&dev_attr_module_id.attr,
 	NULL,
 };
 
-- 
GitLab


From 5bc155cfea605cd64aa372b44a67473b49c4726c Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 17 Nov 2023 12:49:19 +0200
Subject: [PATCH 1234/2340] accel/habanalabs/gaudi2: use correct registers to
 dump QM CQ info

The QM CQ PTR_LO/PTR_HI/TSIZE registers are for pushing a CQ entry, and
although they are updated by HW even when descriptors are fetched by PQ
and CB addresses are fed into CQ, the correct registers to use when
dumping the CQ info are the ones with the _STS suffix.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c             | 12 ++++++------
 .../habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 77c480725a84b..bf537c2082cda 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7868,15 +7868,15 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base,
 	is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
 
 	if (is_arc_cq) {
-		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
-		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
+		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
+		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
 		cq_ptr = ((u64) hi) << 32 | lo;
-		cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
+		cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
 	} else {
-		lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
-		hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
+		lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
+		hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
 		cq_ptr = ((u64) hi) << 32 | lo;
-		cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
+		cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
 	}
 
 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
diff --git a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index 8018214a7b59b..d21fcd3880b44 100644
--- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -242,13 +242,13 @@
 #define QM_FENCE2_OFFSET		(mmPDMA0_QM_CP_FENCE2_RDATA_0 - mmPDMA0_QM_BASE)
 #define QM_SEI_STATUS_OFFSET		(mmPDMA0_QM_SEI_STATUS - mmPDMA0_QM_BASE)
 
-#define QM_CQ_PTR_LO_4_OFFSET		(mmPDMA0_QM_CQ_PTR_LO_4 - mmPDMA0_QM_BASE)
-#define QM_CQ_PTR_HI_4_OFFSET		(mmPDMA0_QM_CQ_PTR_HI_4 - mmPDMA0_QM_BASE)
-#define QM_CQ_TSIZE_4_OFFSET		(mmPDMA0_QM_CQ_TSIZE_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_TSIZE_STS_4_OFFSET	(mmPDMA0_QM_CQ_TSIZE_STS_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_PTR_LO_STS_4_OFFSET	(mmPDMA0_QM_CQ_PTR_LO_STS_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_PTR_HI_STS_4_OFFSET	(mmPDMA0_QM_CQ_PTR_HI_STS_4 - mmPDMA0_QM_BASE)
 
-#define QM_ARC_CQ_PTR_LO_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_LO - mmPDMA0_QM_BASE)
-#define QM_ARC_CQ_PTR_HI_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_HI - mmPDMA0_QM_BASE)
-#define QM_ARC_CQ_TSIZE_OFFSET		(mmPDMA0_QM_ARC_CQ_TSIZE - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_TSIZE_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_TSIZE_STS - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_PTR_LO_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_PTR_LO_STS - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_PTR_HI_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_PTR_HI_STS - mmPDMA0_QM_BASE)
 
 #define QM_CP_STS_4_OFFSET		(mmPDMA0_QM_CP_STS_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_LO_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_LO_4 - mmPDMA0_QM_BASE)
-- 
GitLab


From 7259eb7b534735b9c1153654c0bb4c5f059c0dd3 Mon Sep 17 00:00:00 2001
From: Moti Haimovski <mhaimovski@habana.ai>
Date: Sun, 12 Nov 2023 18:07:10 +0200
Subject: [PATCH 1235/2340] accel/habanalabs/gaudi2: add signed dev info uAPI

User will provide a nonce via the INFO ioctl, and will retrieve
the signed device info generated using given nonce.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/firmware_if.c |  8 +++
 drivers/accel/habanalabs/common/habanalabs.h  |  2 +
 .../habanalabs/common/habanalabs_ioctl.c      | 53 +++++++++++++++++++
 include/linux/habanalabs/cpucp_if.h           |  8 ++-
 include/uapi/drm/habanalabs_accel.h           | 28 ++++++++++
 5 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 9e9dfe013659f..3558a6a8e1925 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -3244,6 +3244,14 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
 					HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
 }
 
+int hl_fw_get_dev_info_signed(struct hl_device *hdev,
+			      struct cpucp_dev_info_signed *dev_info_signed, u32 nonce)
+{
+	return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_INFO_SIGNED_GET, dev_info_signed,
+					 sizeof(struct cpucp_dev_info_signed), nonce,
+					 HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
+}
+
 int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
 						dma_addr_t buff, u32 *size)
 {
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 7b0209e5bad61..dd3fe3ddc00a6 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3964,6 +3964,8 @@ long hl_fw_get_max_power(struct hl_device *hdev);
 void hl_fw_set_max_power(struct hl_device *hdev);
 int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
 				u32 nonce);
+int hl_fw_get_dev_info_signed(struct hl_device *hdev,
+			      struct cpucp_dev_info_signed *dev_info_signed, u32 nonce);
 int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 8ef36effb95bc..a92713e0e5801 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -19,6 +19,9 @@
 
 #include <asm/msr.h>
 
+/* make sure there is space for all the signed info */
+static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
+
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
 	[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@@ -719,6 +722,53 @@ free_sec_attest_info:
 	return rc;
 }
 
+static int dev_info_signed(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	struct cpucp_dev_info_signed *dev_info_signed;
+	struct hl_info_signed *info;
+	u32 max_size = args->return_size;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	dev_info_signed = kzalloc(sizeof(*dev_info_signed), GFP_KERNEL);
+	if (!dev_info_signed)
+		return -ENOMEM;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		rc = -ENOMEM;
+		goto free_dev_info_signed;
+	}
+
+	rc = hl_fw_get_dev_info_signed(hpriv->hdev,
+					dev_info_signed, args->sec_attest_nonce);
+	if (rc)
+		goto free_info;
+
+	info->nonce = le32_to_cpu(dev_info_signed->nonce);
+	info->info_sig_len = dev_info_signed->info_sig_len;
+	info->pub_data_len = le16_to_cpu(dev_info_signed->pub_data_len);
+	info->certificate_len = le16_to_cpu(dev_info_signed->certificate_len);
+	info->dev_info_len = sizeof(struct cpucp_info);
+	memcpy(&info->info_sig, &dev_info_signed->info_sig, sizeof(info->info_sig));
+	memcpy(&info->public_data, &dev_info_signed->public_data, sizeof(info->public_data));
+	memcpy(&info->certificate, &dev_info_signed->certificate, sizeof(info->certificate));
+	memcpy(&info->dev_info, &dev_info_signed->info, info->dev_info_len);
+
+	rc = copy_to_user(out, info, min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
+
+free_info:
+	kfree(info);
+free_dev_info_signed:
+	kfree(dev_info_signed);
+
+	return rc;
+}
+
+
 static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	int rc;
@@ -1089,6 +1139,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_FW_GENERIC_REQ:
 		return send_fw_generic_request(hdev, args);
 
+	case HL_INFO_DEV_SIGNED:
+		return dev_info_signed(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -EINVAL;
diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 86ea7c63a0d29..f316c8d0f3fc9 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -659,6 +659,12 @@ enum pq_init_status {
  *       number (nonce) provided by the host to prevent replay attacks.
  *       public key and certificate also provided as part of the FW response.
  *
+ * CPUCP_PACKET_INFO_SIGNED_GET -
+ *       Get the device information signed by the Trusted Platform device.
+ *       device info data is also hashed with some unique number (nonce) provided
+ *       by the host to prevent replay attacks. public key and certificate also
+ *       provided as part of the FW response.
+ *
  * CPUCP_PACKET_MONITOR_DUMP_GET -
  *       Get monitors registers dump from the CpuCP kernel.
  *       The CPU will put the registers dump in the a buffer allocated by the driver
@@ -733,7 +739,7 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_ENGINE_CORE_ASID_SET,	/* internal */
 	CPUCP_PACKET_RESERVED2,			/* not used */
 	CPUCP_PACKET_SEC_ATTEST_GET,		/* internal */
-	CPUCP_PACKET_RESERVED3,			/* not used */
+	CPUCP_PACKET_INFO_SIGNED_GET,		/* internal */
 	CPUCP_PACKET_RESERVED4,			/* not used */
 	CPUCP_PACKET_MONITOR_DUMP_GET,		/* debugfs */
 	CPUCP_PACKET_RESERVED5,			/* not used */
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index 347c7b62e60e5..a512dc4cffd05 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -846,6 +846,7 @@ enum hl_server_type {
 #define HL_INFO_HW_ERR_EVENT			36
 #define HL_INFO_FW_ERR_EVENT			37
 #define HL_INFO_USER_ENGINE_ERR_EVENT		38
+#define HL_INFO_DEV_SIGNED			40
 
 #define HL_INFO_VERSION_MAX_LEN			128
 #define HL_INFO_CARD_NAME_MAX_LEN		16
@@ -1256,6 +1257,7 @@ struct hl_info_dev_memalloc_page_sizes {
 #define SEC_SIGNATURE_BUF_SZ	255	/* (256 - 1) 1 byte used for size */
 #define SEC_PUB_DATA_BUF_SZ	510	/* (512 - 2) 2 bytes used for size */
 #define SEC_CERTIFICATE_BUF_SZ	2046	/* (2048 - 2) 2 bytes used for size */
+#define SEC_DEV_INFO_BUF_SZ	5120
 
 /*
  * struct hl_info_sec_attest - attestation report of the boot
@@ -1290,6 +1292,32 @@ struct hl_info_sec_attest {
 	__u8 pad0[2];
 };
 
+/*
+ * struct hl_info_signed - device information signed by a secured device.
+ * @nonce: number only used once. random number provided by host. this also passed to the quote
+ *         command as a qualifying data.
+ * @pub_data_len: length of the public data (bytes)
+ * @certificate_len: length of the certificate (bytes)
+ * @info_sig_len: length of the attestation signature (bytes)
+ * @public_data: public key info signed info data (outPublic + name + qualifiedName)
+ * @certificate: certificate for the signing key
+ * @info_sig: signature of the info + nonce data.
+ * @dev_info_len: length of device info (bytes)
+ * @dev_info: device info as byte array.
+ */
+struct hl_info_signed {
+	__u32 nonce;
+	__u16 pub_data_len;
+	__u16 certificate_len;
+	__u8 info_sig_len;
+	__u8 public_data[SEC_PUB_DATA_BUF_SZ];
+	__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+	__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
+	__u16 dev_info_len;
+	__u8 dev_info[SEC_DEV_INFO_BUF_SZ];
+	__u8 pad[2];
+};
+
 /**
  * struct hl_page_fault_info - page fault information.
  * @timestamp: timestamp of page fault.
-- 
GitLab


From d980e1ced9899451d2d2a84030ce9f2a04eae7e8 Mon Sep 17 00:00:00 2001
From: Ariel Suller <asuller@habana.ai>
Date: Thu, 23 Nov 2023 16:27:07 +0200
Subject: [PATCH 1236/2340] accel/habanalabs: report 3 instances of Infineon
 second stage

Infineon controller second stage has 3 instances that their version
need to be reported by driver.

Signed-off-by: Ariel Suller <asuller@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/sysfs.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 8d2164691d817..c940c5f1d109e 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -8,6 +8,7 @@
 #include "habanalabs.h"
 
 #include <linux/pci.h>
+#include <linux/types.h>
 
 static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -80,12 +81,27 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	struct cpucp_info *cpucp_info;
+	u32 infineon_second_stage_version;
+	u32 infineon_second_stage_first_instance;
+	u32 infineon_second_stage_second_instance;
+	u32 infineon_second_stage_third_instance;
+	u32 mask = 0xff;
 
 	cpucp_info = &hdev->asic_prop.cpucp_info;
 
+	infineon_second_stage_version = le32_to_cpu(cpucp_info->infineon_second_stage_version);
+	infineon_second_stage_first_instance = infineon_second_stage_version & mask;
+	infineon_second_stage_second_instance =
+					(infineon_second_stage_version >> 8) & mask;
+	infineon_second_stage_third_instance =
+					(infineon_second_stage_version >> 16) & mask;
+
 	if (cpucp_info->infineon_second_stage_version)
-		return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
-				le32_to_cpu(cpucp_info->infineon_second_stage_version));
+		return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
+				le32_to_cpu(cpucp_info->infineon_version),
+				infineon_second_stage_first_instance,
+				infineon_second_stage_second_instance,
+				infineon_second_stage_third_instance);
 	else
 		return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
 }
-- 
GitLab


From 565ee78840906da33f124c143e0c788e42f824e8 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 23 Nov 2023 09:49:36 +0200
Subject: [PATCH 1237/2340] accel/habanalabs/gaudi2: add zero padding when
 printing QM CP instruction

QM instructions are in multiples of 64 bits and the command type is in
the upper bits of first QWORD.
To make it clearer that an undefined command is due to a type of 0x0,
always print all 64 bits and add a zero padding if needed.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index bf537c2082cda..f81b57649b00a 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7884,7 +7884,7 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base,
 	cp_current_inst = ((u64) hi) << 32 | lo;
 
 	dev_info(hdev->dev,
-		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
+		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
 		is_arc_cq ? "ARC_" : "", cq_ptr, cq_ptr_size, cp_current_inst);
 
 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-- 
GitLab


From cf0719a8a3e72cb82d83f79aa57ae11d86324915 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 27 Sep 2023 18:50:30 +0300
Subject: [PATCH 1238/2340] accel/habanalabs: update debugfs-driver-habanalabs
 with the device-name directory

The device debugfs directory was modified to be named as the
parent device name.
Update the paths accordingly.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     | 72 +++++++++----------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 042fd125fbc96..a7a432dc40156 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -1,4 +1,4 @@
-What:           /sys/kernel/debug/accel/<n>/addr
+What:           /sys/kernel/debug/accel/<parent_device>/addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -8,34 +8,34 @@ Description:    Sets the device address to be used for read or write through
                 only when the IOMMU is disabled.
                 The acceptable value is a string that starts with "0x"
 
-What:           /sys/kernel/debug/accel/<n>/clk_gate
+What:           /sys/kernel/debug/accel/<parent_device>/clk_gate
 Date:           May 2020
 KernelVersion:  5.8
 Contact:        ogabbay@kernel.org
 Description:    This setting is now deprecated as clock gating is handled solely by the f/w
 
-What:           /sys/kernel/debug/accel/<n>/command_buffers
+What:           /sys/kernel/debug/accel/<parent_device>/command_buffers
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently allocated
                 command buffers
 
-What:           /sys/kernel/debug/accel/<n>/command_submission
+What:           /sys/kernel/debug/accel/<parent_device>/command_submission
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently active
                 command submissions
 
-What:           /sys/kernel/debug/accel/<n>/command_submission_jobs
+What:           /sys/kernel/debug/accel/<parent_device>/command_submission_jobs
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with detailed information about each JOB (CB) of
                 each active command submission
 
-What:           /sys/kernel/debug/accel/<n>/data32
+What:           /sys/kernel/debug/accel/<parent_device>/data32
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -50,7 +50,7 @@ Description:    Allows the root user to read or write directly through the
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
-What:           /sys/kernel/debug/accel/<n>/data64
+What:           /sys/kernel/debug/accel/<parent_device>/data64
 Date:           Jan 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@@ -65,7 +65,7 @@ Description:    Allows the root user to read or write 64 bit data directly
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
-What:           /sys/kernel/debug/accel/<n>/data_dma
+What:           /sys/kernel/debug/accel/<parent_device>/data_dma
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@@ -83,7 +83,7 @@ Description:    Allows the root user to read from the device's internal
                 workloads.
                 Only supported on GAUDI at this stage.
 
-What:           /sys/kernel/debug/accel/<n>/device
+What:           /sys/kernel/debug/accel/<parent_device>/device
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -91,14 +91,14 @@ Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
 
-What:           /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
+What:           /sys/kernel/debug/accel/<parent_device>/device_release_watchdog_timeout
 Date:           Oct 2022
 KernelVersion:  6.2
 Contact:        ttayar@habana.ai
 Description:    The watchdog timeout value in seconds for a device release upon
                 certain error cases, after which the device is reset.
 
-What:           /sys/kernel/debug/accel/<n>/dma_size
+What:           /sys/kernel/debug/accel/<parent_device>/dma_size
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@@ -108,7 +108,7 @@ Description:    Specify the size of the DMA transaction when using DMA to read
                 When the write is finished, the user can read the "data_dma"
                 blob
 
-What:           /sys/kernel/debug/accel/<n>/dump_razwi_events
+What:           /sys/kernel/debug/accel/<parent_device>/dump_razwi_events
 Date:           Aug 2022
 KernelVersion:  5.20
 Contact:        fkassabri@habana.ai
@@ -117,7 +117,7 @@ Description:    Dumps all razwi events to dmesg if exist.
                 the routine will clear the status register.
                 Usage: cat dump_razwi_events
 
-What:           /sys/kernel/debug/accel/<n>/dump_security_violations
+What:           /sys/kernel/debug/accel/<parent_device>/dump_security_violations
 Date:           Jan 2021
 KernelVersion:  5.12
 Contact:        ogabbay@kernel.org
@@ -125,14 +125,14 @@ Description:    Dumps all security violations to dmesg. This will also ack
                 all security violations meanings those violations will not be
                 dumped next time user calls this API
 
-What:           /sys/kernel/debug/accel/<n>/engines
+What:           /sys/kernel/debug/accel/<parent_device>/engines
 Date:           Jul 2019
 KernelVersion:  5.3
 Contact:        ogabbay@kernel.org
 Description:    Displays the status registers values of the device engines and
                 their derived idle status
 
-What:           /sys/kernel/debug/accel/<n>/i2c_addr
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -140,7 +140,7 @@ Description:    Sets I2C device address for I2C transaction that is generated
                 by the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_bus
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_bus
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -148,7 +148,7 @@ Description:    Sets I2C bus address for I2C transaction that is generated by
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_data
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_data
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -157,7 +157,7 @@ Description:    Triggers an I2C transaction that is generated by the device's
                 reading from the file generates a read transaction, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_len
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_len
 Date:           Dec 2021
 KernelVersion:  5.17
 Contact:        obitton@habana.ai
@@ -165,7 +165,7 @@ Description:    Sets I2C length in bytes for I2C transaction that is generated b
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_reg
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_reg
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -173,35 +173,35 @@ Description:    Sets I2C register id for I2C transaction that is generated by
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/led0
+What:           /sys/kernel/debug/accel/<parent_device>/led0
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the first S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/led1
+What:           /sys/kernel/debug/accel/<parent_device>/led1
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the second S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/led2
+What:           /sys/kernel/debug/accel/<parent_device>/led2
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the third S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/memory_scrub
+What:           /sys/kernel/debug/accel/<parent_device>/memory_scrub
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
 Description:    Allows the root user to scrub the dram memory. The scrubbing
                 value can be set using the debugfs file memory_scrub_val.
 
-What:           /sys/kernel/debug/accel/<n>/memory_scrub_val
+What:           /sys/kernel/debug/accel/<parent_device>/memory_scrub_val
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
@@ -209,7 +209,7 @@ Description:    The value to which the dram will be set to when the user
                 scrubs the dram using 'memory_scrub' debugfs file and
                 the scrubbing value when using module param 'memory_scrub'
 
-What:           /sys/kernel/debug/accel/<n>/mmu
+What:           /sys/kernel/debug/accel/<parent_device>/mmu
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -219,7 +219,7 @@ Description:    Displays the hop values and physical address for a given ASID
                 e.g. to display info about VA 0x1000 for ASID 1 you need to do:
                 echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
 
-What:           /sys/kernel/debug/accel/<n>/mmu_error
+What:           /sys/kernel/debug/accel/<parent_device>/mmu_error
 Date:           Mar 2021
 KernelVersion:  5.12
 Contact:        fkassabri@habana.ai
@@ -229,7 +229,7 @@ Description:    Check and display page fault or access violation mmu errors for
                 echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
                 cat /sys/kernel/debug/accel/0/mmu_error
 
-What:           /sys/kernel/debug/accel/<n>/monitor_dump
+What:           /sys/kernel/debug/accel/<parent_device>/monitor_dump
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@@ -243,7 +243,7 @@ Description:    Allows the root user to dump monitors status from the device's
                 This interface doesn't support concurrency in the same device.
                 Only supported on GAUDI.
 
-What:           /sys/kernel/debug/accel/<n>/monitor_dump_trig
+What:           /sys/kernel/debug/accel/<parent_device>/monitor_dump_trig
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@@ -253,14 +253,14 @@ Description:    Triggers dump of monitor data. The value to trigger the operatio
                 When the write is finished, the user can read the "monitor_dump"
                 blob
 
-What:           /sys/kernel/debug/accel/<n>/set_power_state
+What:           /sys/kernel/debug/accel/<parent_device>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                 for D3Hot
 
-What:           /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
+What:           /sys/kernel/debug/accel/<parent_device>/skip_reset_on_timeout
 Date:           Jun 2021
 KernelVersion:  5.13
 Contact:        ynudelman@habana.ai
@@ -268,7 +268,7 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                 "0" means device will be reset in case some CS has timed out,
                 otherwise it will not be reset.
 
-What:           /sys/kernel/debug/accel/<n>/state_dump
+What:           /sys/kernel/debug/accel/<parent_device>/state_dump
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ynudelman@habana.ai
@@ -279,7 +279,7 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                 Writing an integer X discards X state dumps, so that the
                 next read would return X+1-st newest state dump.
 
-What:           /sys/kernel/debug/accel/<n>/stop_on_err
+What:           /sys/kernel/debug/accel/<parent_device>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@@ -287,13 +287,13 @@ Description:    Sets the stop-on_error option for the device engines. Value of
                 "0" is for disable, otherwise enable.
                 Relevant only for GOYA and GAUDI.
 
-What:           /sys/kernel/debug/accel/<n>/timeout_locked
+What:           /sys/kernel/debug/accel/<parent_device>/timeout_locked
 Date:           Sep 2021
 KernelVersion:  5.16
 Contact:        obitton@habana.ai
 Description:    Sets the command submission timeout value in seconds.
 
-What:           /sys/kernel/debug/accel/<n>/userptr
+What:           /sys/kernel/debug/accel/<parent_device>/userptr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -301,7 +301,7 @@ Description:    Displays a list with information about the current user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
 
-What:           /sys/kernel/debug/accel/<n>/userptr_lookup
+What:           /sys/kernel/debug/accel/<parent_device>/userptr_lookup
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ogabbay@kernel.org
@@ -309,7 +309,7 @@ Description:    Allows to search for specific user pointers (user virtual
                 addresses) that are pinned and mapped to DMA addresses, and see
                 their resolution to the specific dma address.
 
-What:           /sys/kernel/debug/accel/<n>/vm
+What:           /sys/kernel/debug/accel/<parent_device>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-- 
GitLab


From aa5cea38ce687021bf97f9f4cdb18b26db290964 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 3 May 2023 13:19:17 +0300
Subject: [PATCH 1239/2340] accel/habanalabs: add parent_device sysfs attribute

The device debugfs directory was modified to be named as the
device-name.
This name is the parent device name, i.e. either the PCI address in case
of an ASIC, or the simulator device name in case of a simulator.

This change makes it more difficult for a user to access the debugfs
directory for a specific accel device, because he can't just use the
accel minor id, but he needs to do more device-dependent operations to
get the device name.

To make it easier to get this name, add a 'parent_device' sysfs
attribute that the user can read using the minor id before accessing
debugfs.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 Documentation/ABI/testing/sysfs-driver-habanalabs | 6 ++++++
 drivers/accel/habanalabs/common/habanalabs.h      | 3 +++
 drivers/accel/habanalabs/common/sysfs.c           | 9 +++++++++
 3 files changed, 18 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index 89fe3b09d4adb..4244f5af4b54b 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -155,6 +155,12 @@ KernelVersion:  not yet upstreamed
 Contact:        ogabbay@kernel.org
 Description:    Displays the device's module id
 
+What:           /sys/class/accel/accel<n>/device/parent_device
+Date:           Nov 2023
+KernelVersion:  6.8
+Contact:        ttayar@habana.ai
+Description:    Displays the name of the parent device of the accel device
+
 What:           /sys/class/accel/accel<n>/device/pci_addr
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index dd3fe3ddc00a6..2a900c9941fee 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3521,6 +3521,9 @@ struct hl_device {
 	u8				heartbeat;
 };
 
+/* Retrieve PCI device name in case of a PCI device or dev name in simulator */
+#define HL_DEV_NAME(hdev)	\
+		((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE")
 
 /**
  * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index c940c5f1d109e..8a9f988321576 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -410,6 +410,13 @@ static ssize_t module_id_show(struct device *dev,
 	return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location));
 }
 
+static ssize_t parent_device_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", HL_DEV_NAME(hdev));
+}
+
 static DEVICE_ATTR_RO(armcp_kernel_ver);
 static DEVICE_ATTR_RO(armcp_ver);
 static DEVICE_ATTR_RO(cpld_ver);
@@ -430,6 +437,7 @@ static DEVICE_ATTR_RO(uboot_ver);
 static DEVICE_ATTR_RO(fw_os_ver);
 static DEVICE_ATTR_RO(security_enabled);
 static DEVICE_ATTR_RO(module_id);
+static DEVICE_ATTR_RO(parent_device);
 
 static struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
@@ -456,6 +464,7 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_fw_os_ver.attr,
 	&dev_attr_security_enabled.attr,
 	&dev_attr_module_id.attr,
+	&dev_attr_parent_device.attr,
 	NULL,
 };
 
-- 
GitLab


From bc5f15abcf95ce7e4c2e33daddcb5850ee5e671d Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 29 Nov 2023 16:20:31 +0200
Subject: [PATCH 1240/2340] accel/habanalabs/gaudi2: avoid overriding existing
 undefined opcode data

Part of the undefined opcode data is updated in
gaudi2_handle_qman_err_generic() and some in
handle_lower_qman_data_on_err().
However, the 'write_enable' flag is checked only in
gaudi2_handle_qman_err_generic(), and information of more than a single
error can be mixed there.

Moreover, handle_lower_qman_data_on_err() is called only for the lower
QMAN, so for an error in the upper QMAN there is only a partial info.

Move all the data update to be done in a single place, protected by the
'write_enable' flag.
As mainly the lower QMAN's info is interesting, avoid saving the partial
info for the upper QMAN.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 40 +++++++++++-------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index f81b57649b00a..e0e5615ef9b0f 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7858,10 +7858,11 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 	return !!ecc_data->is_critical;
 }
 
-static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
 {
-	u32 lo, hi, cq_ptr_size, cp_sts;
+	struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
 	u64 cq_ptr, cp_current_inst;
+	u32 lo, hi, cq_size, cp_sts;
 	bool is_arc_cq;
 
 	cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
@@ -7871,12 +7872,12 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base,
 		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
 		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
 		cq_ptr = ((u64) hi) << 32 | lo;
-		cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
+		cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
 	} else {
 		lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
 		hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
 		cq_ptr = ((u64) hi) << 32 | lo;
-		cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
+		cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
 	}
 
 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
@@ -7885,12 +7886,16 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base,
 
 	dev_info(hdev->dev,
 		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
-		is_arc_cq ? "ARC_" : "", cq_ptr, cq_ptr_size, cp_current_inst);
+		is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
 
-	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
-		hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
-		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+	if (undef_opcode->write_enable) {
+		memset(undef_opcode, 0, sizeof(*undef_opcode));
+		undef_opcode->timestamp = ktime_get();
+		undef_opcode->cq_addr = cq_ptr;
+		undef_opcode->cq_size = cq_size;
+		undef_opcode->engine_id = engine_id;
+		undef_opcode->stream_id = QMAN_STREAMS;
+		undef_opcode->write_enable = 0;
 	}
 }
 
@@ -7929,19 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}
 
-		/* check for undefined opcode */
-		if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK) {
+		/* Check for undefined opcode error in lower QM */
+		if ((i == QMAN_STREAMS) &&
+				(glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
+			handle_lower_qman_data_on_err(hdev, qman_base,
+							gaudi2_queue_id_to_engine_id[qid_base]);
 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
-			if (hdev->captured_err_info.undef_opcode.write_enable) {
-				memset(&hdev->captured_err_info.undef_opcode, 0,
-						sizeof(hdev->captured_err_info.undef_opcode));
-				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
-				hdev->captured_err_info.undef_opcode.engine_id =
-							gaudi2_queue_id_to_engine_id[qid_base];
-			}
-
-			if (i == QMAN_STREAMS)
-				handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
 		}
 	}
 
-- 
GitLab


From a9f07790a4b2250f0140e9a61c7f842fd9b618c7 Mon Sep 17 00:00:00 2001
From: Xingyuan Mo <hdthky0@gmail.com>
Date: Fri, 8 Dec 2023 21:00:59 +0800
Subject: [PATCH 1241/2340] accel/habanalabs: fix information leak in
 sec_attest_info()

This function may copy the pad0 field of struct hl_info_sec_attest to user
mode which has not been initialized, resulting in leakage of kernel heap
data to user mode. To prevent this, use kzalloc() to allocate and zero out
the buffer, which can also eliminate other uninitialized holes, if any.

Fixes: 0c88760f8f5e ("habanalabs/gaudi2: add secured attestation info uapi")
Signed-off-by: Xingyuan Mo <hdthky0@gmail.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/common/habanalabs_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index a92713e0e5801..1dd6e23172caa 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -688,7 +688,7 @@ static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if (!sec_attest_info)
 		return -ENOMEM;
 
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		rc = -ENOMEM;
 		goto free_sec_attest_info;
-- 
GitLab


From 933a2a376fb3f22ba4774f74233571504ac56b02 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 19 Dec 2023 14:57:34 +1100
Subject: [PATCH 1242/2340] drm: using mul_u32_u32() requires linux/math64.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some pending include file cleanups produced this error:

In file included from include/linux/kernel.h:27,
                 from drivers/gpu/ipu-v3/ipu-dp.c:7:
include/drm/drm_color_mgmt.h: In function 'drm_color_lut_extract':
include/drm/drm_color_mgmt.h:45:46: error: implicit declaration of function 'mul_u32_u32' [-Werror=implicit-function-declaration]
   45 |                 return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(user_input, (1 << bit_precision) - 1),
      |                                              ^~~~~~~~~~~

Fixes: c6fbb6bca108 ("drm: Fix color LUT rounding")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231219145734.13e40e1e@canb.auug.org.au
---
 include/drm/drm_color_mgmt.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h
index 54b2b2467bfd7..ed81741036d76 100644
--- a/include/drm/drm_color_mgmt.h
+++ b/include/drm/drm_color_mgmt.h
@@ -24,6 +24,7 @@
 #define __DRM_COLOR_MGMT_H__
 
 #include <linux/ctype.h>
+#include <linux/math64.h>
 #include <drm/drm_property.h>
 
 struct drm_crtc;
-- 
GitLab


From 6e4337f695c25162f0296934152506ad596fcebf Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Thu, 30 Nov 2023 18:54:48 -0500
Subject: [PATCH 1243/2340] drm/amd/display: Unify optimize_required flags and
 VRR adjustments

[why]
There is only a single call to dc_post_update_surfaces_to_stream so
there is no need to have two flags to control it. Unifying this to a
single flag allows dc_stream_adjust_vmin_vmax to skip actual
programming when there is no change required.

[how]
Remove wm_optimze_required flag and set only optimize_required in its
place. Then in dc_stream_adjust_vmin_vmax, check that the stream timing
range matches the requested one and skip programming if they are equal.

Reviewed-by: Jun Lei <jun.lei@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c           | 14 +++++---------
 drivers/gpu/drm/amd/display/dc/dc.h                |  1 -
 drivers/gpu/drm/amd/display/dc/dc_stream.h         |  2 --
 .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c    |  2 +-
 .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c    |  8 ++++----
 5 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 9d3925603979e..0d42074f33a64 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -409,9 +409,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 	 * avoid conflicting with firmware updates.
 	 */
 	if (dc->ctx->dce_version > DCE_VERSION_MAX)
-		if (dc->optimized_required || dc->wm_optimized_required)
+		if (dc->optimized_required)
 			return false;
 
+	if (!memcmp(&stream->adjust, adjust, sizeof(*adjust)))
+		return true;
+
 	stream->adjust.v_total_max = adjust->v_total_max;
 	stream->adjust.v_total_mid = adjust->v_total_mid;
 	stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
@@ -2223,7 +2226,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 	}
 
 	dc->optimized_required = false;
-	dc->wm_optimized_required = false;
 }
 
 static void init_state(struct dc *dc, struct dc_state *context)
@@ -2743,8 +2745,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
 		} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
 			dc->optimized_required = true;
 		}
-
-		dc->optimized_required |= dc->wm_optimized_required;
 	}
 
 	return type;
@@ -2952,9 +2952,6 @@ static void copy_stream_update_to_stream(struct dc *dc,
 	if (update->vrr_active_fixed)
 		stream->vrr_active_fixed = *update->vrr_active_fixed;
 
-	if (update->crtc_timing_adjust)
-		stream->adjust = *update->crtc_timing_adjust;
-
 	if (update->dpms_off)
 		stream->dpms_off = *update->dpms_off;
 
@@ -4401,8 +4398,7 @@ static bool full_update_required(struct dc *dc,
 			stream_update->mst_bw_update ||
 			stream_update->func_shaper ||
 			stream_update->lut3d_func ||
-			stream_update->pending_test_pattern ||
-			stream_update->crtc_timing_adjust))
+			stream_update->pending_test_pattern))
 		return true;
 
 	if (stream) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 2c85f8ee682fe..c87ec3e0a8cfd 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1026,7 +1026,6 @@ struct dc {
 
 	/* Require to optimize clocks and bandwidth for added/removed planes */
 	bool optimized_required;
-	bool wm_optimized_required;
 	bool idle_optimizations_allowed;
 	bool enable_c20_dtm_b0;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 4ac48c346a339..27118e6727548 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -130,7 +130,6 @@ union stream_update_flags {
 		uint32_t wb_update:1;
 		uint32_t dsc_changed : 1;
 		uint32_t mst_bw : 1;
-		uint32_t crtc_timing_adjust : 1;
 		uint32_t fams_changed : 1;
 	} bits;
 
@@ -342,7 +341,6 @@ struct dc_stream_update {
 	struct dc_3dlut *lut3d_func;
 
 	struct test_pattern *pending_test_pattern;
-	struct dc_crtc_timing_adjust *crtc_timing_adjust;
 };
 
 bool dc_is_stream_unchanged(
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index cdb903116eb7c..0d9e41315f849 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -3068,7 +3068,7 @@ void dcn10_prepare_bandwidth(
 			context,
 			false);
 
-	dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
+	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
 			&context->bw_ctx.bw.dcn.watermarks,
 			dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 			true);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index c3c83178eb1e3..5bb5e29602769 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2146,10 +2146,10 @@ void dcn20_prepare_bandwidth(
 	}
 
 	/* program dchubbub watermarks:
-	 * For assigning wm_optimized_required, use |= operator since we don't want
+	 * For assigning optimized_required, use |= operator since we don't want
 	 * to clear the value if the optimize has not happened yet
 	 */
-	dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
 					&context->bw_ctx.bw.dcn.watermarks,
 					dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 					false);
@@ -2162,10 +2162,10 @@ void dcn20_prepare_bandwidth(
 	if (hubbub->funcs->program_compbuf_size) {
 		if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
 			compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
-			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
 		} else {
 			compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
-			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
 		}
 
 		hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
-- 
GitLab


From a0d25fcd75d40441712ff210cba2e49fc771a8b3 Mon Sep 17 00:00:00 2001
From: Johnson Chen <johnson.chen@amd.com>
Date: Mon, 4 Dec 2023 18:48:15 -0500
Subject: [PATCH 1244/2340] drm/amd/display: Add function for dumping clk
 registers

[why]
Allow devs to check raw clk register values by dumping them on the log

[how]
Add clk register dump implementation

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Johnson Chen <johnson.chen@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  |  7 +++----
 .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h   | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 8d4c0b209872b..789091f3bdc85 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -50,6 +50,7 @@
 #include "dc_dmub_srv.h"
 #include "link.h"
 #include "logger_types.h"
+
 #undef DC_LOGGER
 #define DC_LOGGER \
 	clk_mgr->base.base.ctx->logger
@@ -417,9 +418,8 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a,
 }
 
 static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
-		struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+		struct clk_mgr_dcn35 *clk_mgr)
 {
-
 }
 
 static struct clk_bw_params dcn35_bw_params = {
@@ -986,7 +986,6 @@ void dcn35_clk_mgr_construct(
 		struct dccg *dccg)
 {
 	struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 };
-	struct clk_log_info log_info = {0};
 	clk_mgr->base.base.ctx = ctx;
 	clk_mgr->base.base.funcs = &dcn35_funcs;
 
@@ -1039,7 +1038,7 @@ void dcn35_clk_mgr_construct(
 		dcn35_bw_params.wm_table = ddr5_wm_table;
 	}
 	/* Saved clocks configured at boot for debug purposes */
-	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
+	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
 
 	clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
 	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index fa9614bcb1605..cbba39d251e53 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -62,6 +62,25 @@ struct dcn3_clk_internal {
 	uint32_t CLK4_CLK0_CURRENT_CNT; //fclk
 };
 
+struct dcn35_clk_internal {
+	int dummy;
+	uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
+	uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk
+	uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk
+	uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk
+	uint32_t CLK1_CLK4_CURRENT_CNT; //dtbclk
+	//uint32_t CLK1_CLK5_CURRENT_CNT; //dpiaclk
+	//uint32_t CLK1_CLK6_CURRENT_CNT; //srdbgclk
+	uint32_t CLK1_CLK3_DS_CNTL;	    //dcf_deep_sleep_divider
+	uint32_t CLK1_CLK3_ALLOW_DS;	//dcf_deep_sleep_allow
+
+	uint32_t CLK1_CLK0_BYPASS_CNTL; //dispclk bypass
+	uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass
+	uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass
+	uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass
+	uint32_t CLK1_CLK4_BYPASS_CNTL; //dtbclk bypass
+};
+
 struct dcn301_clk_internal {
 	int dummy;
 	uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
-- 
GitLab


From 4e08378b2dc1fbe64c9e1730f3260672b22fac03 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Thu, 14 Dec 2023 15:07:32 -0600
Subject: [PATCH 1245/2340] drm/amd/display: Add a new DC debug mask for PSR-SU

Some issues have been raised that appear to be tied to PSR-SU.
To allow users to confirm they're tied to PSR-SU without turning off
PSR entirely introduce a new debug mask:

amdgpu.dcdebugmask=0x200

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 3 +++
 drivers/gpu/drm/amd/include/amd_shared.h              | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index 08ce3bb8f640d..1f08c6564c3bf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -51,6 +51,9 @@ static bool link_supports_psrsu(struct dc_link *link)
 	    !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap)
 		return false;
 
+	if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU)
+		return false;
+
 	return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub);
 }
 
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index bf7f258c324a1..c739f12fb937d 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -258,6 +258,7 @@ enum DC_DEBUG_MASK {
 	DC_DISABLE_REPLAY = 0x50,
 	DC_ENABLE_DPIA_TRACE = 0x80,
 	DC_ENABLE_DML2 = 0x100,
+	DC_DISABLE_PSR_SU = 0x200,
 };
 
 enum amd_dpm_forced_level;
-- 
GitLab


From 65550a9cc5c371b4027c8e8199293899cb2f5af7 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <hamza.mahfooz@amd.com>
Date: Fri, 15 Dec 2023 10:37:39 -0500
Subject: [PATCH 1246/2340] drm/amd/display: disable FPO and SubVP for older
 DMUB versions on DCN32x

There have recently been changes that break backwards compatibility,
that were introduced into DMUB firmware (for DCN32x) concerning FPO and
SubVP. So, since those are just power optimization features, we can just
disable them unless the user is using a new enough version of DMUB
firmware.

Cc: stable@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2870
Fixes: ed6e2782e974 ("drm/amd/display: For cursor P-State allow for SubVP")
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Closes: https://lore.kernel.org/r/CABXGCsNRb0QbF2pKLJMDhVOKxyGD6-E+8p-4QO6FOWa6zp22_A@mail.gmail.com/
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 5c323718ec906..0f0972ad441a9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -960,6 +960,12 @@ void dcn32_init_hw(struct dc *dc)
 		dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support;
 		dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable;
 		dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
+
+		if (dc->ctx->dmub_srv->dmub->fw_version <
+		    DMUB_FW_VERSION(7, 0, 35)) {
+			dc->debug.force_disable_subvp = true;
+			dc->debug.disable_fpo_optimizations = true;
+		}
 	}
 }
 
-- 
GitLab


From 5dd0bd06cb6c02b445d28144a83c561225c2fa5f Mon Sep 17 00:00:00 2001
From: Wayne Lin <wayne.lin@amd.com>
Date: Tue, 5 Dec 2023 14:55:31 +0800
Subject: [PATCH 1247/2340] drm/amd/display: Add case for dcn35 to support usb4
 dmub hpd event

[Why & how]
Refactor dc_is_dmub_outbox_supported() a bit and add case for dcn35 to
register dmub outbox notification irq to handle usb4 relevant hpd event.

Reviewed-by: Roman Li <roman.li@amd.com>
Reviewed-by: Jun Lei <jun.lei@amd.com>
Signed-off-by: Wayne Lin <wayne.lin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 26 ++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 0d42074f33a64..445f87fa0eac8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -5089,18 +5089,28 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
  */
 bool dc_is_dmub_outbox_supported(struct dc *dc)
 {
-	/* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
-	if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
-	    dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
-	    !dc->debug.dpia_debug.bits.disable_dpia)
-		return true;
+	switch (dc->ctx->asic_id.chip_family) {
 
-	if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
-	    !dc->debug.dpia_debug.bits.disable_dpia)
-		return true;
+	case FAMILY_YELLOW_CARP:
+		/* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
+		if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
+		    !dc->debug.dpia_debug.bits.disable_dpia)
+			return true;
+	break;
+
+	case AMDGPU_FAMILY_GC_11_0_1:
+	case AMDGPU_FAMILY_GC_11_5_0:
+		if (!dc->debug.dpia_debug.bits.disable_dpia)
+			return true;
+	break;
+
+	default:
+		break;
+	}
 
 	/* dmub aux needs dmub notifications to be enabled */
 	return dc->debug.enable_dmub_aux_for_legacy_ddc;
+
 }
 
 /**
-- 
GitLab


From 6fb12518ca58412dc51054e2a7400afb41328d85 Mon Sep 17 00:00:00 2001
From: Josip Pavic <josip.pavic@amd.com>
Date: Tue, 5 Dec 2023 12:01:05 -0500
Subject: [PATCH 1248/2340] drm/amd/display: make flip_timestamp_in_us a 64-bit
 variable

[Why]
This variable currently overflows after about 71 minutes. This doesn't
cause any known functional issues but it does make debugging more
difficult.

[How]
Make it a 64-bit variable.

Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Josip Pavic <josip.pavic@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index e2a3aa8812df4..811474f4419bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -244,7 +244,7 @@ enum pixel_format {
 #define DC_MAX_DIRTY_RECTS 3
 struct dc_flip_addrs {
 	struct dc_plane_address address;
-	unsigned int flip_timestamp_in_us;
+	unsigned long long flip_timestamp_in_us;
 	bool flip_immediate;
 	/* TODO: add flip duration for FreeSync */
 	bool triplebuffer_flips;
-- 
GitLab


From b55349a03837213f2b32af9e7d3d7c9083f297d1 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 15 Dec 2023 15:42:38 -0600
Subject: [PATCH 1249/2340] Documentation/amdgpu: Add Hawk Point processors

These have been announced so add them to the table.

Link: https://www.amd.com/en/product/13971
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 Documentation/gpu/amdgpu/apu-asic-info-table.csv | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
index 2e76b427ba1ee..b8ada69919f39 100644
--- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
@@ -9,4 +9,5 @@ Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembran
 Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
 Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
 Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
-Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
\ No newline at end of file
+Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
+Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
-- 
GitLab


From ab44f9daa89c0f081cfd2d79bfe40b27b556a946 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 15 Dec 2023 15:44:16 -0600
Subject: [PATCH 1250/2340] Documentation/amdgpu: Remove a spurious character

`/` wasn't meant to be in the Dragon Range line

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 Documentation/gpu/amdgpu/apu-asic-info-table.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
index b8ada69919f39..18868abe2a913 100644
--- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
@@ -7,7 +7,7 @@ SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
 Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
 Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
-Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
+Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
 Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
 Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
 Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
-- 
GitLab


From e48c8cbeebbd7e2e4d3fe8508b4beb7c00800de4 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 15 Dec 2023 14:37:45 -0600
Subject: [PATCH 1251/2340] drm/amd: Add missing definitions for
 `SMU_MAX_LEVELS_VDDGFX`

It is reported that on a Topaz dGPU the kernel emits:
	amdgpu: can't get the mac of 5

This is because there is no definition for max levels of VDDGFX
declared for SMU71 or SMU7. The correct definition is VDDC so
use this.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3049
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c      | 1 +
 drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
index 9e4228232f024..ad1fd3150d03e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
@@ -2298,6 +2298,7 @@ static uint32_t ci_get_mac_definition(uint32_t value)
 	case SMU_MAX_ENTRIES_SMIO:
 		return SMU7_MAX_ENTRIES_SMIO;
 	case SMU_MAX_LEVELS_VDDC:
+	case SMU_MAX_LEVELS_VDDGFX:
 		return SMU7_MAX_LEVELS_VDDC;
 	case SMU_MAX_LEVELS_VDDCI:
 		return SMU7_MAX_LEVELS_VDDCI;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
index 97d9802fe6731..17d2f5bff4a7e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
@@ -2263,6 +2263,7 @@ static uint32_t iceland_get_mac_definition(uint32_t value)
 	case SMU_MAX_ENTRIES_SMIO:
 		return SMU71_MAX_ENTRIES_SMIO;
 	case SMU_MAX_LEVELS_VDDC:
+	case SMU_MAX_LEVELS_VDDGFX:
 		return SMU71_MAX_LEVELS_VDDC;
 	case SMU_MAX_LEVELS_VDDCI:
 		return SMU71_MAX_LEVELS_VDDCI;
-- 
GitLab


From 006ad514a50cc49d904fd004b69c842ddfaabf1f Mon Sep 17 00:00:00 2001
From: Xiaogang Chen <xiaogang.chen@amd.com>
Date: Fri, 15 Dec 2023 17:14:07 -0600
Subject: [PATCH 1252/2340] drm/amdkfd: Use partial hmm page walk during buffer
 validation in SVM

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: Philip Yang <philip.yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 ++++-------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 79 +++++++++++-------------
 2 files changed, 48 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b854cbf06dcee..d630100b9e91b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
 	put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-	unsigned long cpages = 0;
-	unsigned long i;
-
-	for (i = 0; i < migrate->npages; i++) {
-		if (migrate->src[i] & MIGRATE_PFN_VALID &&
-		    migrate->src[i] & MIGRATE_PFN_MIGRATE)
-			cpages++;
-	}
-	return cpages;
-}
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
 {
 	unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 	struct dma_fence *mfence = NULL;
 	struct migrate_vma migrate = { 0 };
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	dma_addr_t *scratch;
 	void *buf;
 	int r = -ENOMEM;
@@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
 	migrate_vma_pages(&migrate);
 
-	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-		svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
-
 	svm_migrate_copy_done(adev, mfence);
 	migrate_vma_finalize(&migrate);
 
+	mpages = cpages - svm_migrate_unsuccessful_pages(&migrate);
+	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+			 mpages, cpages, migrate.npages);
+
 	kfd_smi_event_migration_end(node, p->lead_thread->pid,
 				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
 				    0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 out_free:
 	kvfree(buf);
 out:
-	if (!r && cpages) {
+	if (!r && mpages) {
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+			WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-		return cpages;
+		return mpages;
 	}
 	return r;
 }
@@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	struct vm_area_struct *vma;
 	uint64_t ttm_res_offset;
 	struct kfd_node *node;
-	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
 	if (start_mgr < prange->start || last_mgr > prange->last) {
@@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 			pr_debug("failed %ld to migrate\n", r);
 			break;
 		} else {
-			cpages += r;
+			mpages += r;
 		}
 		ttm_res_offset += next - addr;
 		addr = next;
 	}
 
-	if (cpages) {
+	if (mpages) {
 		prange->actual_loc = best_loc;
-		prange->vram_pages = prange->vram_pages + cpages;
+		prange->vram_pages += mpages;
 	} else if (!prange->actual_loc) {
 		/* if no page migrated and all pages from prange are at
 		 * sys ram drop svm_bo got from svm_range_vram_node_new
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index fb0011f802771..aacd85a4fce1e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		      unsigned long offset, unsigned long npages,
-		      unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
+		      unsigned long *hmm_pfns, uint32_t gpuidx)
 {
 	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
 	dma_addr_t *addr = prange->dma_addr[gpuidx];
 	struct device *dev = adev->dev;
 	struct page *page;
-	uint64_t vram_pages_dev;
 	int i, r;
 
 	if (!addr) {
@@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		prange->dma_addr[gpuidx] = addr;
 	}
 
-	vram_pages_dev = 0;
 	addr += offset;
 	for (i = 0; i < npages; i++) {
 		if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
@@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		if (is_zone_device_page(page)) {
 			struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
 
-			vram_pages_dev++;
 			addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
 				   bo_adev->vm_manager.vram_base_offset -
 				   bo_adev->kfd.pgmap.range.start;
@@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));
 	}
-	*vram_pages = vram_pages_dev;
+
 	return 0;
 }
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		  unsigned long offset, unsigned long npages,
-		  unsigned long *hmm_pfns, uint64_t *vram_pages)
+		  unsigned long *hmm_pfns)
 {
 	struct kfd_process *p;
 	uint32_t gpuidx;
@@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		}
 
 		r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
-					  hmm_pfns, gpuidx, vram_pages);
+					  hmm_pfns, gpuidx);
 		if (r)
 			break;
 	}
@@ -885,14 +882,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
 
 static void *
 svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
-		     uint64_t offset)
+		     uint64_t offset, uint64_t *vram_pages)
 {
+	unsigned char *src = (unsigned char *)psrc + offset;
 	unsigned char *dst;
+	uint64_t i;
 
 	dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
 	if (!dst)
 		return NULL;
-	memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+	if (!vram_pages) {
+		memcpy(dst, src, num_elements * size);
+		return (void *)dst;
+	}
+
+	*vram_pages = 0;
+	for (i = 0; i < num_elements; i++) {
+		dma_addr_t *temp;
+		temp = (dma_addr_t *)dst + i;
+		*temp = *((dma_addr_t *)src + i);
+		if (*temp&SVM_RANGE_VRAM_DOMAIN)
+			(*vram_pages)++;
+	}
 
 	return (void *)dst;
 }
@@ -906,7 +918,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
 		if (!src->dma_addr[i])
 			continue;
 		dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
-					sizeof(*src->dma_addr[i]), src->npages, 0);
+					sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
 		if (!dst->dma_addr[i])
 			return -ENOMEM;
 	}
@@ -917,7 +929,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
 static int
 svm_range_split_array(void *ppnew, void *ppold, size_t size,
 		      uint64_t old_start, uint64_t old_n,
-		      uint64_t new_start, uint64_t new_n)
+		      uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)
 {
 	unsigned char *new, *old, *pold;
 	uint64_t d;
@@ -929,11 +941,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
 		return 0;
 
 	d = (new_start - old_start) * size;
-	new = svm_range_copy_array(pold, size, new_n, d);
+	/* get dma addr array for new range and calculte its vram page number */
+	new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);
 	if (!new)
 		return -ENOMEM;
 	d = (new_start == old_start) ? new_n * size : 0;
-	old = svm_range_copy_array(pold, size, old_n, d);
+	old = svm_range_copy_array(pold, size, old_n, d, NULL);
 	if (!old) {
 		kvfree(new);
 		return -ENOMEM;
@@ -955,10 +968,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
 	for (i = 0; i < MAX_GPU_INSTANCE; i++) {
 		r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
 					  sizeof(*old->dma_addr[i]), old->start,
-					  npages, new->start, new->npages);
+					  npages, new->start, new->npages,
+					  old->actual_loc ? &new->vram_pages : NULL);
 		if (r)
 			return r;
 	}
+	if (old->actual_loc)
+		old->vram_pages -= new->vram_pages;
 
 	return 0;
 }
@@ -982,11 +998,6 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
 	new->svm_bo = svm_range_bo_ref(old->svm_bo);
 	new->ttm_res = old->ttm_res;
 
-	/* set new's vram_pages as old range's now, the acurate vram_pages
-	 * will be updated during mapping
-	 */
-	new->vram_pages = min(old->vram_pages, new->npages);
-
 	spin_lock(&new->svm_bo->list_lock);
 	list_add(&new->svm_bo_list, &new->svm_bo->range_list);
 	spin_unlock(&new->svm_bo->list_lock);
@@ -1109,7 +1120,7 @@ static int
 svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
 		     struct list_head *insert_list, struct list_head *remap_list)
 {
-	struct svm_range *tail;
+	struct svm_range *tail = NULL;
 	int r = svm_range_split(prange, prange->start, new_last, &tail);
 
 	if (!r) {
@@ -1124,7 +1135,7 @@ static int
 svm_range_split_head(struct svm_range *prange, uint64_t new_start,
 		     struct list_head *insert_list, struct list_head *remap_list)
 {
-	struct svm_range *head;
+	struct svm_range *head = NULL;
 	int r = svm_range_split(prange, new_start, prange->last, &head);
 
 	if (!r) {
@@ -1573,7 +1584,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	struct svm_validate_context *ctx;
 	unsigned long start, end, addr;
 	struct kfd_process *p;
-	uint64_t vram_pages;
 	void *owner;
 	int32_t idx;
 	int r = 0;
@@ -1648,15 +1658,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 	}
 
-	vram_pages = 0;
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = map_start << PAGE_SHIFT;
+	end = (map_last + 1) << PAGE_SHIFT;
 	for (addr = start; !r && addr < end; ) {
 		struct hmm_range *hmm_range;
 		unsigned long map_start_vma;
 		unsigned long map_last_vma;
 		struct vm_area_struct *vma;
-		uint64_t vram_pages_vma;
 		unsigned long next = 0;
 		unsigned long offset;
 		unsigned long npages;
@@ -1683,13 +1691,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 
 		if (!r) {
-			offset = (addr - start) >> PAGE_SHIFT;
+			offset = (addr >> PAGE_SHIFT) - prange->start;
 			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
-					      hmm_range->hmm_pfns, &vram_pages_vma);
+					      hmm_range->hmm_pfns);
 			if (r)
 				pr_debug("failed %d to dma map range\n", r);
-			else
-				vram_pages += vram_pages_vma;
 		}
 
 		svm_range_lock(prange);
@@ -1722,19 +1728,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		addr = next;
 	}
 
-	if (addr == end) {
-		prange->vram_pages = vram_pages;
-
-		/* if prange does not include any vram page and it
-		 * has not released svm_bo drop its svm_bo reference
-		 * and set its actaul_loc to sys ram
-		 */
-		if (!vram_pages && prange->ttm_res) {
-			prange->actual_loc = 0;
-			svm_range_vram_node_free(prange);
-		}
-	}
-
 	svm_range_unreserve_bos(ctx);
 	if (!r)
 		prange->validate_timestamp = ktime_get_boottime();
-- 
GitLab


From 8b09656b22c052d02e4761eb4cbe611289866245 Mon Sep 17 00:00:00 2001
From: Samson Tam <samson.tam@amd.com>
Date: Tue, 5 Dec 2023 21:25:36 -0500
Subject: [PATCH 1253/2340] drm/amd/display: skip error logging when DMUB is
 inactive from S3

[Why]
On resume from S3, while DMUB is inactive, DMUB queue and execute
calls will not work.  Skip reporting errors in these scenarios

[How]
Add new return code during DMUB queue and execute calls when DMUB
is in S3 state. Skip logging errors in these scenarios

Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Samson Tam <samson.tam@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 34 +++++++++++++------
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h   |  1 +
 .../gpu/drm/amd/display/dmub/src/dmub_srv.c   |  4 +--
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 1a4d615ccdec1..eb6f5640f19ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -140,7 +140,10 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_cmd_execute(dmub);
+			if (status == DMUB_STATUS_POWER_STATE_D3)
+				return false;
+
 			dmub_srv_wait_for_idle(dmub, 100000);
 
 			/* Requeue the command. */
@@ -148,16 +151,20 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 		}
 
 		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			if (status != DMUB_STATUS_POWER_STATE_D3) {
+				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			}
 			return false;
 		}
 	}
 
 	status = dmub_srv_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
-		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
 		return false;
 	}
 
@@ -218,7 +225,10 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_cmd_execute(dmub);
+			if (status == DMUB_STATUS_POWER_STATE_D3)
+				return false;
+
 			dmub_srv_wait_for_idle(dmub, 100000);
 
 			/* Requeue the command. */
@@ -226,16 +236,20 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 		}
 
 		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			if (status != DMUB_STATUS_POWER_STATE_D3) {
+				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			}
 			return false;
 		}
 	}
 
 	status = dmub_srv_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
-		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
 		return false;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index d1a4ed6f5916e..c78c9224ab606 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -86,6 +86,7 @@ enum dmub_status {
 	DMUB_STATUS_TIMEOUT,
 	DMUB_STATUS_INVALID,
 	DMUB_STATUS_HW_FAILURE,
+	DMUB_STATUS_POWER_STATE_D3
 };
 
 /* enum dmub_asic - dmub asic identifier */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 53ac1c66dd86e..9ad738805320d 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -768,7 +768,7 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 		return DMUB_STATUS_INVALID;
 
 	if (dmub->power_state != DMUB_POWER_STATE_D0)
-		return DMUB_STATUS_INVALID;
+		return DMUB_STATUS_POWER_STATE_D3;
 
 	if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
 	    dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
@@ -789,7 +789,7 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 		return DMUB_STATUS_INVALID;
 
 	if (dmub->power_state != DMUB_POWER_STATE_D0)
-		return DMUB_STATUS_INVALID;
+		return DMUB_STATUS_POWER_STATE_D3;
 
 	/**
 	 * Read back all the queued commands to ensure that they've
-- 
GitLab


From 72eaa723187b87f1793529eaadbcfaa836c17812 Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Wed, 6 Dec 2023 17:14:48 -0500
Subject: [PATCH 1254/2340] drm/amd/display: get dprefclk ss info from
 integration info table

[why & how]
we have two SSC_En:
we get ssc_info from dce_info for MPLL_SSC_EN.
we used to call VBIOS cmdtbl's smu_info's SS persentage for DPRECLK SS info,
is used for DP AUDIO and VBIOS' smu_info table was from systemIntegrationInfoTable.

since dcn35 VBIOS removed smu_info, driver need to use integrationInfotable directly.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/bios/bios_parser2.c    | 19 ++++++++++++++-----
 .../display/include/grph_object_ctrl_defs.h   |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index fcd65a2057ad4..960c4b4f6ddf3 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1014,13 +1014,20 @@ static enum bp_result get_ss_info_v4_5(
 		DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
 		break;
 	case AS_SIGNAL_TYPE_DISPLAY_PORT:
-		ss_info->spread_spectrum_percentage =
+		if (bp->base.integrated_info) {
+			DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage);
+			ss_info->spread_spectrum_percentage =
+					bp->base.integrated_info->gpuclk_ss_percentage;
+			ss_info->type.CENTER_MODE =
+					bp->base.integrated_info->gpuclk_ss_type;
+		} else {
+			ss_info->spread_spectrum_percentage =
 				disp_cntl_tbl->dp_ss_percentage;
-		ss_info->spread_spectrum_range =
+			ss_info->spread_spectrum_range =
 				disp_cntl_tbl->dp_ss_rate_10hz * 10;
-		if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
-			ss_info->type.CENTER_MODE = true;
-
+			if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+				ss_info->type.CENTER_MODE = true;
+		}
 		DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
 		break;
 	case AS_SIGNAL_TYPE_GPU_PLL:
@@ -2813,6 +2820,8 @@ static enum bp_result get_integrated_info_v2_2(
 	info->ma_channel_number = info_v2_2->umachannelnumber;
 	info->dp_ss_control =
 		le16_to_cpu(info_v2_2->reserved1);
+	info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage;
+	info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type;
 
 	for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) {
 		info->ext_disp_conn_info.gu_id[i] =
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index bc96d02113608..813463ffe15c5 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -417,6 +417,8 @@ struct integrated_info {
 	/* V2.1 */
 	struct edp_info edp1_info;
 	struct edp_info edp2_info;
+	uint32_t gpuclk_ss_percentage;
+	uint32_t gpuclk_ss_type;
 };
 
 /*
-- 
GitLab


From 3582e0ba8a675d72c3cc6dd1b847e6aa757845da Mon Sep 17 00:00:00 2001
From: Josip Pavic <josip.pavic@amd.com>
Date: Tue, 5 Dec 2023 16:57:27 -0500
Subject: [PATCH 1255/2340] drm/amd/display: dereference variable before
 checking for zero

[Why]
Driver incorrectly checks if pointer variable OutBpp is null instead of
if the value being pointed to is zero.

[How]
Dereference OutBpp before checking for a value of zero.

Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Josip Pavic <josip.pavic@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 180f8a98a361a..b95bf27f2fe2f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -5420,7 +5420,7 @@ static void CalculateOutputLink(
 					*OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
 												OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
 
-					if (OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
 						*RequiresDSC = true;
 						LinkDSCEnable = true;
 						*OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
-- 
GitLab


From b5882675074086245589daa21c9d2b205810b83f Mon Sep 17 00:00:00 2001
From: George Shen <george.shen@amd.com>
Date: Tue, 5 Dec 2023 18:34:47 -0500
Subject: [PATCH 1256/2340] drm/amd/display: Set test_pattern_changed update
 flag on pipe enable

[Why]
In certain cases, ODM pipe split can occur while stream already has test
pattern enabled. The new pipe used in the ODM combine config must be
configured to output the test pattern in this case.

[How]
If the stream is configured to output test pattern, then set the
test_pattern_changed update flag for the new pipe when it gets enabled.

Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: George Shen <george.shen@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 5bb5e29602769..2c8eff815ec11 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1400,6 +1400,10 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 		new_pipe->update_flags.bits.scaler = 1;
 		new_pipe->update_flags.bits.viewport = 1;
 		new_pipe->update_flags.bits.det_size = 1;
+		if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE &&
+				new_pipe->stream_res.test_pattern_params.width != 0 &&
+				new_pipe->stream_res.test_pattern_params.height != 0)
+			new_pipe->update_flags.bits.test_pattern_changed = 1;
 		if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
 			new_pipe->update_flags.bits.odm = 1;
 			new_pipe->update_flags.bits.global_sync = 1;
-- 
GitLab


From ec39a6d00382dfd23bf74ec28c7cf4b87884ae1b Mon Sep 17 00:00:00 2001
From: Muhammad Ahmed <ahmed.ahmed@amd.com>
Date: Wed, 6 Dec 2023 18:07:57 -0500
Subject: [PATCH 1257/2340] drm/amd/display: add debug option for
 ExtendedVBlank DLG adjust

[why & how]
Add new option for debug usage

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Muhammad Ahmed <ahmed.ahmed@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h                     | 1 +
 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index c87ec3e0a8cfd..ce00a6eeb1640 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -979,6 +979,7 @@ struct dc_debug_options {
 	unsigned int ips2_eval_delay_us;
 	unsigned int ips2_entry_delay_us;
 	bool disable_timeout;
+	bool disable_extblankadj;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 2c8eff815ec11..72c580ec650cf 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2221,7 +2221,8 @@ void dcn20_optimize_bandwidth(
 			dc->clk_mgr,
 			context,
 			true);
-	if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) {
+	if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+		!dc->debug.disable_extblankadj) {
 		for (i = 0; i < dc->res_pool->pipe_count; ++i) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-- 
GitLab


From 0d26644bc57d8737c8e2fb3145366f7d0b941935 Mon Sep 17 00:00:00 2001
From: Allen Pan <allen.pan@amd.com>
Date: Thu, 7 Dec 2023 17:26:10 -0500
Subject: [PATCH 1258/2340] drm/amd/display: fix usb-c connector_type

[why]
BIOS switches to use USB-C connector type 0x18, but VBIOS's
objectInfo table not supported yet. driver needs to patch it
based on enc_cap from system integration info table.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Allen Pan <allen.pan@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c    | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
index f91e088952755..da94e5309fbaf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -256,6 +256,10 @@ void dcn35_link_encoder_construct(
 		enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
 		enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
 		enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+		if (bp_cap_info.DP_IS_USB_C) {
+			/*BIOS not switch to use CONNECTOR_ID_USBC = 24 yet*/
+			enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+		}
 
 	} else {
 		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
@@ -264,4 +268,5 @@ void dcn35_link_encoder_construct(
 	}
 	if (enc10->base.ctx->dc->debug.hdmi20_disable)
 		enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+
 }
-- 
GitLab


From 8e57c06bf4b0f51a4d6958e15e1a99c9520d00fa Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Mon, 4 Dec 2023 14:10:05 -0500
Subject: [PATCH 1259/2340] drm/amd/display: Refactor DMCUB enter/exit idle
 interface

[Why]
We can hang in place trying to send commands when the DMCUB isn't
powered on.

[How]
We need to exit out of the idle state prior to sending a command,
but the process that performs the exit also invokes a command itself.

Fixing this issue involves the following:

1. Using a software state to track whether or not we need to start
   the process to exit idle or notify idle.

It's possible for the hardware to have exited an idle state without
driver knowledge, but entering one is always restricted to a driver
allow - which makes the SW state vs HW state mismatch issue purely one
of optimization, which should seldomly be hit, if at all.

2. Refactor any instances of exit/notify idle to use a single wrapper
   that maintains this SW state.

This works simialr to dc_allow_idle_optimizations, but works at the
DMCUB level and makes sure the state is marked prior to any notify/exit
idle so we don't enter an infinite loop.

3. Make sure we exit out of idle prior to sending any commands or
   waiting for DMCUB idle.

This patch takes care of 1/2. A future patch will take care of wrapping
DMCUB command submission with calls to this new interface.

Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Reviewed-by: Hansen Dsouza <hansen.dsouza@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  4 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 37 ++++++++++++++++++-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  |  6 ++-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   |  8 +---
 4 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 54861136dafd9..97776ba1c70a9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2856,7 +2856,7 @@ static int dm_resume(void *handle)
 	bool need_hotplug = false;
 
 	if (dm->dc->caps.ips_support) {
-		dc_dmub_srv_exit_low_power_state(dm->dc);
+		dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
 	}
 
 	if (amdgpu_in_reset(adev)) {
@@ -9001,7 +9001,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			if (new_con_state->crtc &&
 				new_con_state->crtc->state->active &&
 				drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) {
-				dc_dmub_srv_exit_low_power_state(dm->dc);
+				dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
 				break;
 			}
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index eb6f5640f19ae..ccfe2b6046fd0 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1162,6 +1162,9 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
 	enum dmub_status status;
 
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return true;
+
 	if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
 		return true;
 
@@ -1183,7 +1186,7 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 	return true;
 }
 
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
+static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 {
 	union dmub_rb_cmd cmd = {0};
 
@@ -1207,7 +1210,7 @@ void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
+static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 {
 	const uint32_t max_num_polls = 10000;
 	uint32_t allow_state = 0;
@@ -1220,6 +1223,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 	if (!dc->idle_optimizations_allowed)
 		return;
 
+	if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
+		return;
+
 	if (dc->hwss.get_idle_state &&
 		dc->hwss.set_idle_state &&
 		dc->clk_mgr->funcs->exit_low_power_state) {
@@ -1296,3 +1302,30 @@ void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_c
 	else
 		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
 }
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle)
+{
+	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return;
+
+	if (dc_dmub_srv->idle_allowed == allow_idle)
+		return;
+
+	/*
+	 * Entering a low power state requires a driver notification.
+	 * Powering up the hardware requires notifying PMFW and DMCUB.
+	 * Clearing the driver idle allow requires a DMCUB command.
+	 * DMCUB commands requires the DMCUB to be powered up and restored.
+	 *
+	 * Exit out early to prevent an infinite loop of DMCUB commands
+	 * triggering exit low power - use software state to track this.
+	 */
+	dc_dmub_srv->idle_allowed = allow_idle;
+
+	if (!allow_idle)
+		dc_dmub_srv_exit_low_power_state(dc);
+	else
+		dc_dmub_srv_notify_idle(dc, allow_idle);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index c25ce7546f718..b63cba6235fc7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -50,6 +50,8 @@ struct dc_dmub_srv {
 
 	struct dc_context *ctx;
 	void *dm;
+
+	bool idle_allowed;
 };
 
 void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
@@ -100,8 +102,8 @@ void dc_dmub_srv_enable_dpia_trace(const struct dc *dc);
 void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index);
 
 bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle);
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc);
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
 
 void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState);
 #endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 9262d33361821..f48001317fabc 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -687,11 +687,7 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	}
 
 	// TODO: review other cases when idle optimization is allowed
-
-	if (!enable)
-		dc_dmub_srv_exit_low_power_state(dc);
-	else
-		dc_dmub_srv_notify_idle(dc, enable);
+	dc_dmub_srv_apply_idle_power_optimizations(dc, enable);
 
 	return true;
 }
@@ -701,7 +697,7 @@ void dcn35_z10_restore(const struct dc *dc)
 	if (dc->debug.disable_z10)
 		return;
 
-	dc_dmub_srv_exit_low_power_state(dc);
+	dc_dmub_srv_apply_idle_power_optimizations(dc, false);
 
 	dcn31_z10_restore(dc);
 }
-- 
GitLab


From 8892780834ae294bc3697c7d0e056d7743900b39 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Mon, 4 Dec 2023 16:35:04 -0500
Subject: [PATCH 1260/2340] drm/amd/display: Wake DMCUB before sending a
 command

[Why]
We can hang in place trying to send commands when the DMCUB isn't
powered on.

[How]
For functions that execute within a DC context or DC lock we can
wrap the direct calls to dm_execute_dmub_cmd/list with code that
exits idle power optimizations and reallows once we're done with
the command submission on success.

For DM direct submissions the DM will need to manage the enter/exit
sequencing manually.

We cannot invoke a DMCUB command directly within the DM execution
helper or we can deadlock.

Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Reviewed-by: Hansen Dsouza <hansen.dsouza@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  2 +-
 .../drm/amd/display/dc/bios/command_table2.c  | 12 ++---
 .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c  |  2 +-
 .../dc/clk_mgr/dcn314/dcn314_clk_mgr.c        |  2 +-
 .../dc/clk_mgr/dcn315/dcn315_clk_mgr.c        |  2 +-
 .../dc/clk_mgr/dcn316/dcn316_clk_mgr.c        |  2 +-
 .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c      | 12 ++---
 .../drm/amd/display/dc/core/dc_hw_sequencer.c |  2 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 53 ++++++++++++++++---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  | 40 ++++++++++++++
 drivers/gpu/drm/amd/display/dc/dc_helper.c    |  6 +--
 .../gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c | 14 ++---
 .../drm/amd/display/dc/dce/dmub_hw_lock_mgr.c |  2 +-
 .../gpu/drm/amd/display/dc/dce/dmub_outbox.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 14 ++---
 .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c |  2 +-
 .../display/dc/dcn31/dcn31_dio_link_encoder.c |  4 +-
 .../amd/display/dc/dcn31/dcn31_panel_cntl.c   |  4 +-
 .../amd/display/dc/hwss/dcn21/dcn21_hwseq.c   |  4 +-
 .../amd/display/dc/hwss/dcn30/dcn30_hwseq.c   |  8 +--
 .../amd/display/dc/hwss/dcn31/dcn31_hwseq.c   |  4 +-
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c   |  6 +--
 .../dc/link/protocols/link_dp_capability.c    |  2 +-
 .../display/dc/link/protocols/link_dp_dpia.c  |  3 +-
 25 files changed, 143 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 97776ba1c70a9..dfab1627890a9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10910,7 +10910,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
 	input->cea_total_length = total_length;
 	memcpy(input->payload, data, length);
 
-	res = dm_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+	res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 	if (!res) {
 		DRM_ERROR("EDID CEA parser failed\n");
 		return false;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index ab0adabf9dd4c..293a919d605d1 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -123,7 +123,7 @@ static void encoder_control_dmcub(
 		sizeof(cmd.digx_encoder_control.header);
 	cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result encoder_control_digx_v1_5(
@@ -259,7 +259,7 @@ static void transmitter_control_dmcub(
 		sizeof(cmd.dig1_transmitter_control.header);
 	cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result transmitter_control_v1_6(
@@ -321,7 +321,7 @@ static void transmitter_control_dmcub_v1_7(
 		sizeof(cmd.dig1_transmitter_control.header);
 	cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result transmitter_control_v1_7(
@@ -429,7 +429,7 @@ static void set_pixel_clock_dmcub(
 		sizeof(cmd.set_pixel_clock.header);
 	cmd.set_pixel_clock.pixel_clock.clk = *clk;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result set_pixel_clock_v7(
@@ -796,7 +796,7 @@ static void enable_disp_power_gating_dmcub(
 		sizeof(cmd.enable_disp_power_gating.header);
 	cmd.enable_disp_power_gating.power_gating.pwr = *pwr;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result enable_disp_power_gating_v2_1(
@@ -1006,7 +1006,7 @@ static void enable_lvtma_control_dmcub(
 			pwrseq_instance;
 	cmd.lvtma_control.data.bypass_panel_control_wait =
 			bypass_panel_control_wait;
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result enable_lvtma_control(
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 3db4ef564b997..ce1386e22576e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -253,7 +253,7 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 7326b75658461..7575282563267 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -284,7 +284,7 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index 8776055bbeaae..644da46373209 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -232,7 +232,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 09151cc56ce4f..12f3e8aa46d8d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -239,7 +239,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 789091f3bdc85..9c660d1facc76 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -343,7 +343,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 445f87fa0eac8..f2fc7df68b58d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -522,7 +522,7 @@ dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
 		cmd.secure_display.roi_info.y_end = rect->y + rect->height;
 	}
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 }
 
 static inline void
@@ -3381,7 +3381,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
 
 			update_dirty_rect->panel_inst = panel_inst;
 			update_dirty_rect->pipe_idx = j;
-			dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+			dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 		}
 	}
 }
@@ -5207,7 +5207,7 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
 			);
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -5261,7 +5261,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
 	cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type;
 	cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data;
 
-	if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
+	if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
 		/* command is not processed by dmub */
 		notify->sc_status = SET_CONFIG_UNKNOWN_ERROR;
 		return is_cmd_complete;
@@ -5304,7 +5304,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
 	cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst;
 	cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots;
 
-	if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		/* command is not processed by dmub */
 		return DC_ERROR_UNEXPECTED;
 
@@ -5342,7 +5342,7 @@ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
 	cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE;
 	cmd.dpia_hpd_int_enable.enable = hpd_int_enable;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index fe07160932d69..fc18b9dc946f9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -724,7 +724,7 @@ void hwss_send_dmcub_cmd(union block_sequence_params *params)
 	union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd;
 	enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type;
 
-	dm_execute_dmub_cmd(ctx, cmd, wait_type);
+	dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type);
 }
 
 void hwss_program_manual_trigger(union block_sequence_params *params)
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index ccfe2b6046fd0..fea13bcd4dc75 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -360,7 +360,7 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal
 	cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
@@ -374,7 +374,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
 	cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream)
@@ -467,7 +467,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
 		sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -488,7 +488,7 @@ void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv)
 	cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data);
 
 	/* If command was processed, copy feature caps to dmub srv */
-	if (dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 	    cmd.query_feature_caps.header.ret_status == 0) {
 		memcpy(&dc_dmub_srv->dmub->feature_caps,
 		       &cmd.query_feature_caps.query_feature_caps_data,
@@ -513,7 +513,7 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 	cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst;
 
 	// If command was processed, copy feature caps to dmub srv
-	if (dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 		cmd.visual_confirm_color.header.ret_status == 0) {
 		memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color,
 			&cmd.visual_confirm_color.visual_confirm_color_data,
@@ -875,7 +875,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF;
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data)
@@ -1112,7 +1112,7 @@ void dc_send_update_cursor_info_to_dmu(
 				pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp);
 
 		/* Combine 2nd cmds update_curosr_info to DMU */
-		dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+		dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
 	}
 }
 
@@ -1207,6 +1207,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 			dc->hwss.set_idle_state(dc, true);
 	}
 
+	/* NOTE: This does not use the "wake" interface since this is part of the wake path. */
 	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
@@ -1329,3 +1330,41 @@ void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_
 	else
 		dc_dmub_srv_notify_idle(dc, allow_idle);
 }
+
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+				  enum dm_dmub_wait_type wait_type)
+{
+	return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type);
+}
+
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	bool result = false, reallow_idle = false;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	if (count == 0)
+		return true;
+
+	if (dc_dmub_srv->idle_allowed) {
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+		reallow_idle = true;
+	}
+
+	/*
+	 * These may have different implementations in DM, so ensure
+	 * that we guide it to the expected helper.
+	 */
+	if (count > 1)
+		result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type);
+	else
+		result = dm_execute_dmub_cmd(ctx, cmd, wait_type);
+
+	if (result && reallow_idle)
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index b63cba6235fc7..784ca3e444143 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -106,4 +106,44 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
 void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
 
 void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState);
+
+/**
+ * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution.
+ *
+ * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations,
+ * This function is a convenience wrapper for a single command execution.
+ *
+ * @ctx: DC context
+ * @cmd: The command to send/receive
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+				  enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution.
+ *
+ * If the DMCUB hardware was asleep then it wakes the DMUB before
+ * executing the command and attempts to re-enter if the command
+ * submission was successful.
+ *
+ * This should be the preferred command submission interface provided
+ * the DC lock is acquired.
+ *
+ * Entry/exit out of idle power optimizations would need to be
+ * manually performed otherwise through dc_allow_idle_optimizations().
+ *
+ * @ctx: DC context
+ * @count: Number of commands to send/receive
+ * @cmd: Array of commands to send
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+
+
 #endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index cb6eaddab7205..8f9a678256150 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write(
 	cmd_buf->header.payload_bytes =
 			sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 
@@ -67,7 +67,7 @@ static inline void submit_dmub_burst_write(
 	cmd_buf->header.payload_bytes =
 			sizeof(uint32_t) * offload->reg_seq_count;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 
@@ -80,7 +80,7 @@ static inline void submit_dmub_reg_wait(
 {
 	struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 	offload->reg_seq_count = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 42c802afc4681..4cff36351f40a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -76,7 +76,7 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
 	cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
 	cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dmub_abm_init(struct abm *abm, uint32_t backlight)
@@ -155,7 +155,7 @@ bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask)
 	cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
 	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -186,7 +186,7 @@ void dmub_abm_init_config(struct abm *abm,
 
 	cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 }
 
@@ -203,7 +203,7 @@ bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, un
 	cmd.abm_pause.abm_pause_data.panel_mask = panel_mask;
 	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -246,7 +246,7 @@ bool dmub_abm_save_restore(
 
 	cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	// Copy iramtable data into local structure
 	memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
@@ -274,7 +274,7 @@ bool dmub_abm_set_pipe(struct abm *abm,
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
 	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -296,7 +296,7 @@ bool dmub_abm_set_backlight_level(struct abm *abm,
 	cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
 	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index 2aa0e01a6891b..ba1fec3016d5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -47,7 +47,7 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv,
 	if (!lock)
 		cmd.lock_hw.lock_hw_data.should_release = 1;
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
index d8009b2dc56a0..98a778996e1a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
@@ -48,5 +48,5 @@ void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv)
 		sizeof(cmd.outbox1_enable.header);
 	cmd.outbox1_enable.enable = true;
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 9d4170a356a20..3d7cef17f8818 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -171,7 +171,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state *
 	cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst;
 	cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -199,7 +199,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
 
 	cmd.psr_enable.header.payload_bytes = 0; // Send header only
 
-	dm_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	/* Below loops 1000 x 500us = 500 ms.
 	 *  Exit PSR may need to wait 1-2 frames to power up. Timeout after at
@@ -248,7 +248,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_
 	cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
 	cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
 	cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst;
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -267,7 +267,7 @@ static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub,
 	cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle;
 	cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -286,7 +286,7 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt
 	cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt;
 	cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -423,7 +423,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 		copy_settings_data->relock_delay_frame_cnt = 2;
 	copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -444,7 +444,7 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
 	cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC;
 	cmd.psr_enable.header.payload_bytes = 0;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index 68cad55c72ab8..e13d69a22c1c7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -691,7 +691,7 @@ static void dmcub_PLAT_54186_wa(struct hubp *hubp,
 	cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
 
 	PERF_TRACE();  // TODO: remove after performance is stable.
-	dm_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 	PERF_TRACE();  // TODO: remove after performance is stable.
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
index 4596f3bac1b4c..26be5fee7411d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
@@ -125,7 +125,7 @@ static bool query_dp_alt_from_dmub(struct link_encoder *enc,
 	cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
 	cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
 
-	if (!dm_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		return false;
 
 	return true;
@@ -436,7 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx,
 
 	cmd.dig1_dpia_control.dpia_control = *dpia_control;
 
-	dm_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index d849b1eaa4a5c..03248422d6ffd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -52,7 +52,7 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub
 	cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data);
 	cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 
-	return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+	return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
 
 static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl)
@@ -85,7 +85,7 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
 		panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
 	cmd.panel_cntl.data.bl_pwm_ref_div2 =
 		panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2;
-	if (!dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		return 0;
 
 	panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 08783ad097d21..8e88dcaf88f5b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -154,7 +154,7 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst,
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
 	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -173,7 +173,7 @@ static void dmub_abm_set_backlight(struct dc_context *dc, uint32_t backlight_pwm
 	cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
 	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index fd8a8c10a2019..d337578d7e734 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -754,7 +754,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ;
 				cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -876,7 +876,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 					cmd.mall.cursor_height = cursor_attr.height;
 					cmd.mall.cursor_pitch = cursor_attr.pitch;
 
-					dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+					dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 					/* Use copied cursor, and it's okay to not switch back */
 					cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part;
@@ -892,7 +892,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.mall.tmr_scale = tmr_scale;
 				cmd.mall.debug_bits = dc->debug.mall_error_as_fatal;
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -909,7 +909,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	cmd.mall.header.payload_bytes =
 		sizeof(cmd.mall) - sizeof(cmd.mall.header);
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 5d62805f3bdf6..994250b6f2ef8 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -416,7 +416,7 @@ void dcn31_z10_save_init(struct dc *dc)
 	cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
 	cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn31_z10_restore(const struct dc *dc)
@@ -434,7 +434,7 @@ void dcn31_z10_restore(const struct dc *dc)
 	cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
 	cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 0f0972ad441a9..cb9d8389329ff 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -277,7 +277,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ;
 				cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -311,7 +311,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
 				cmd.cab.cab_alloc_ways = (uint8_t)ways;
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -327,7 +327,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	cmd.cab.header.payload_bytes =
 			sizeof(cmd.cab) - sizeof(cmd.cab.header);
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index 3c5334cdb3fb9..289f5d1333424 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -1398,7 +1398,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id)
 	cmd.cable_id.header.payload_bytes = sizeof(cmd.cable_id.data);
 	cmd.cable_id.data.input.phy_inst = resource_transmitter_to_phy_idx(
 			link->dc, link->link_enc->transmitter);
-	if (dm_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 			cmd.cable_id.header.ret_status == 1) {
 		cable_id->raw = cmd.cable_id.data.output_raw;
 		DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 0bb7491339098..982eda3c46f56 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -90,7 +90,8 @@ bool dpia_query_hpd_status(struct dc_link *link)
 	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
 
 	/* Return HPD status reported by DMUB if query successfully executed. */
-	if (dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
+	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	    cmd.query_hpd.data.status == AUX_RET_SUCCESS)
 		is_hpd_high = cmd.query_hpd.data.result;
 
 	DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
-- 
GitLab


From e5ffd1263dd5b44929c676171802e7b6af483f21 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Tue, 5 Dec 2023 11:22:56 -0500
Subject: [PATCH 1261/2340] drm/amd/display: Wake DMCUB before executing GPINT
 commands

[Why]
DMCUB can be in idle when we attempt to interface with the HW through
the GPINT mailbox resulting in a system hang.

[How]
Add dc_wake_and_execute_gpint() to wrap the wake, execute, sleep
sequence.

If the GPINT executes successfully then DMCUB will be put back into
sleep after the optional response is returned.

It functions similar to the inbox command interface.

Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Reviewed-by: Hansen Dsouza <hansen.dsouza@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 29 ++------
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  | 72 ++++++++++++++-----
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h  | 11 +++
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 19 ++---
 4 files changed, 77 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 98b41ec7288e8..68a8463239127 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -2976,7 +2976,6 @@ static int dmub_trace_mask_set(void *data, u64 val)
 	struct amdgpu_device *adev = data;
 	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
 	enum dmub_gpint_command cmd;
-	enum dmub_status status;
 	u64 mask = 0xffff;
 	u8 shift = 0;
 	u32 res;
@@ -3003,13 +3002,7 @@ static int dmub_trace_mask_set(void *data, u64 val)
 			break;
 		}
 
-		status = dmub_srv_send_gpint_command(srv, cmd, res, 30);
-
-		if (status == DMUB_STATUS_TIMEOUT)
-			return -ETIMEDOUT;
-		else if (status == DMUB_STATUS_INVALID)
-			return -EINVAL;
-		else if (status != DMUB_STATUS_OK)
+		if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT))
 			return -EIO;
 
 		usleep_range(100, 1000);
@@ -3026,7 +3019,6 @@ static int dmub_trace_mask_show(void *data, u64 *val)
 	enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
 	struct amdgpu_device *adev = data;
 	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
-	enum dmub_status status;
 	u8 shift = 0;
 	u64 raw = 0;
 	u64 res = 0;
@@ -3036,23 +3028,12 @@ static int dmub_trace_mask_show(void *data, u64 *val)
 		return -EINVAL;
 
 	while (i < 4) {
-		status = dmub_srv_send_gpint_command(srv, cmd, 0, 30);
-
-		if (status == DMUB_STATUS_OK) {
-			status = dmub_srv_get_gpint_response(srv, (u32 *) &raw);
-
-			if (status == DMUB_STATUS_INVALID)
-				return -EINVAL;
-			else if (status != DMUB_STATUS_OK)
-				return -EIO;
-		} else if (status == DMUB_STATUS_TIMEOUT) {
-			return -ETIMEDOUT;
-		} else if (status == DMUB_STATUS_INVALID) {
-			return -EINVAL;
-		} else {
+		uint32_t response;
+
+		if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 			return -EIO;
-		}
 
+		raw = response;
 		usleep_range(100, 1000);
 
 		cmd++;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index fea13bcd4dc75..4b93e7a529d50 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -301,17 +301,11 @@ bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
 bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
 				    unsigned int stream_mask)
 {
-	struct dmub_srv *dmub;
-	const uint32_t timeout = 30;
-
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
 		return false;
 
-	dmub = dc_dmub_srv->dmub;
-
-	return dmub_srv_send_gpint_command(
-		       dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
-		       stream_mask, timeout) == DMUB_STATUS_OK;
+	return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
+					 stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv)
@@ -1126,25 +1120,20 @@ bool dc_dmub_check_min_version(struct dmub_srv *srv)
 void dc_dmub_srv_enable_dpia_trace(const struct dc *dc)
 {
 	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
-	struct dmub_srv *dmub;
-	enum dmub_status status;
-	static const uint32_t timeout_us = 30;
 
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
 		DC_LOG_ERROR("%s: invalid parameters.", __func__);
 		return;
 	}
 
-	dmub = dc_dmub_srv->dmub;
-
-	status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, 0x0010, timeout_us);
-	if (status != DMUB_STATUS_OK) {
+	if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1,
+				       0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
 		DC_LOG_ERROR("timeout updating trace buffer mask word\n");
 		return;
 	}
 
-	status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, 0x0000, timeout_us);
-	if (status != DMUB_STATUS_OK) {
+	if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK,
+				       0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
 		DC_LOG_ERROR("timeout updating trace buffer mask word\n");
 		return;
 	}
@@ -1368,3 +1357,52 @@ bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned in
 
 	return result;
 }
+
+static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+				  uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30;
+	enum dmub_status status;
+
+	if (response)
+		*response = 0;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us);
+	if (status != DMUB_STATUS_OK) {
+		if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT)
+			return true;
+
+		return false;
+	}
+
+	if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+		dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response);
+
+	return true;
+}
+
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+			       uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	bool result = false, reallow_idle = false;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	if (dc_dmub_srv->idle_allowed) {
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+		reallow_idle = true;
+	}
+
+	result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type);
+
+	if (result && reallow_idle)
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 784ca3e444143..952bfb368886e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -145,5 +145,16 @@ bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cm
 bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
 				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
 
+/**
+ * dc_wake_and_execute_gpint()
+ *
+ * @ctx: DC context
+ * @command_code: The command ID to send to DMCUB
+ * @param: The parameter to message DMCUB
+ * @response: Optional response out value - may be NULL.
+ * @wait_type: The wait behavior for the execution
+ */
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+			       uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type);
 
 #endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 3d7cef17f8818..3e243e407bb87 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -105,23 +105,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
  */
 static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst)
 {
-	struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
 	uint32_t raw_state = 0;
 	uint32_t retry_count = 0;
-	enum dmub_status status;
 
 	do {
 		// Send gpint command and wait for ack
-		status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30);
-
-		if (status == DMUB_STATUS_OK) {
-			// GPINT was executed, get response
-			dmub_srv_get_gpint_response(srv, &raw_state);
+		if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state,
+					      DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
 			*state = convert_psr_state(raw_state);
-		} else
+		} else {
 			// Return invalid state when GPINT times out
 			*state = PSR_STATE_INVALID;
-
+		}
 	} while (++retry_count <= 1000 && *state == PSR_STATE_INVALID);
 
 	// Assert if max retry hit
@@ -452,13 +447,11 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
  */
 static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst)
 {
-	struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
 	uint16_t param = (uint16_t)(panel_inst << 8);
 
 	/* Send gpint command and wait for ack */
-	dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30);
-
-	dmub_srv_get_gpint_response(srv, residency);
+	dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency,
+				  DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
 
 static const struct dmub_psr_funcs psr_funcs = {
-- 
GitLab


From 09a4ec5da92c84952db117f0d576fdd8368c873a Mon Sep 17 00:00:00 2001
From: Dillon Varone <dillon.varone@amd.com>
Date: Fri, 17 Nov 2023 16:37:50 -0500
Subject: [PATCH 1262/2340] drm/amd/display: Refactor dc_state interface

[WHY?]
Part of the dc_state interface that deals with adding streams and planes should
remain public, while others that deal with internal status' and subvp should be
private to DC.

[HOW?]
Move and rename the public functions to dc_state.h and private functions to
dc_state_priv.h. Also add some additional functions for extracting subvp meta
data from the state.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Jun Lei <jun.lei@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  45 +-
 drivers/gpu/drm/amd/display/dc/Makefile       |   2 +-
 .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c  |   3 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c      | 171 ++----
 .../drm/amd/display/dc/core/dc_hw_sequencer.c |   8 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c | 294 +---------
 .../gpu/drm/amd/display/dc/core/dc_state.c    | 527 ++++++++++++++++++
 .../gpu/drm/amd/display/dc/core/dc_stream.c   |  33 +-
 .../gpu/drm/amd/display/dc/core/dc_surface.c  |   6 +-
 drivers/gpu/drm/amd/display/dc/dc.h           |  16 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |  31 +-
 drivers/gpu/drm/amd/display/dc/dc_plane.h     |  38 ++
 .../gpu/drm/amd/display/dc/dc_plane_priv.h    |  34 ++
 drivers/gpu/drm/amd/display/dc/dc_state.h     |  74 +++
 .../gpu/drm/amd/display/dc/dc_state_priv.h    |  97 ++++
 drivers/gpu/drm/amd/display/dc/dc_stream.h    |  44 --
 .../gpu/drm/amd/display/dc/dc_stream_priv.h   |  35 ++
 drivers/gpu/drm/amd/display/dc/dc_types.h     |   5 +
 .../display/dc/dcn32/dcn32_resource_helpers.c |  28 +-
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  |   5 +-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  45 +-
 .../drm/amd/display/dc/dml2/dml2_dc_types.h   |   1 +
 .../amd/display/dc/dml2/dml2_mall_phantom.c   |  73 +--
 .../display/dc/dml2/dml2_translation_helper.c |  10 +-
 .../gpu/drm/amd/display/dc/dml2/dml2_utils.c  |   6 +-
 .../drm/amd/display/dc/dml2/dml2_wrapper.c    |   2 +-
 .../drm/amd/display/dc/dml2/dml2_wrapper.h    |  32 +-
 .../amd/display/dc/hwss/dce110/dce110_hwseq.c |   3 +-
 .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c   |  17 +-
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   |  42 +-
 .../amd/display/dc/hwss/dcn30/dcn30_hwseq.c   |   4 +-
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c   |  36 +-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   |   3 +-
 drivers/gpu/drm/amd/display/dc/inc/resource.h |   1 -
 .../dc/resource/dcn32/dcn32_resource.c        |  83 ++-
 .../dc/resource/dcn321/dcn321_resource.c      |  21 +-
 .../dc/resource/dcn35/dcn35_resource.c        |   2 +
 37 files changed, 1171 insertions(+), 706 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/core/dc_state.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dc_plane.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/dc_state.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/dc_state_priv.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/dc_stream_priv.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index dfab1627890a9..b3ffb25dc6b11 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -37,6 +37,7 @@
 #include "dc/dc_dmub_srv.h"
 #include "dc/dc_edid_parser.h"
 #include "dc/dc_stat.h"
+#include "dc/dc_state.h"
 #include "amdgpu_dm_trace.h"
 #include "dpcd_defs.h"
 #include "link/protocols/link_dpcd.h"
@@ -2607,7 +2608,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	memset(del_streams, 0, sizeof(del_streams));
 
-	context = dc_create_state(dc);
+	context = dc_state_create(dc);
 	if (context == NULL)
 		goto context_alloc_fail;
 
@@ -2622,12 +2623,12 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	/* Remove all planes for removed streams and then remove the streams */
 	for (i = 0; i < del_streams_count; i++) {
-		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
 			res = DC_FAIL_DETACH_SURFACES;
 			goto fail;
 		}
 
-		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+		res = dc_state_remove_stream(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -2635,7 +2636,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 	res = dc_commit_streams(dc, context->streams, context->stream_count);
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 context_alloc_fail:
 	return res;
@@ -2662,7 +2663,7 @@ static int dm_suspend(void *handle)
 
 		dc_allow_idle_optimizations(adev->dm.dc, false);
 
-		dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
+		dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state);
 
 		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
 
@@ -2909,7 +2910,7 @@ static int dm_resume(void *handle)
 
 		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true);
 
-		dc_release_state(dm->cached_dc_state);
+		dc_state_release(dm->cached_dc_state);
 		dm->cached_dc_state = NULL;
 
 		amdgpu_dm_irq_resume_late(adev);
@@ -2919,8 +2920,8 @@ static int dm_resume(void *handle)
 		return 0;
 	}
 	/* Recreate dc_state - DC invalidates it when setting power state to S3. */
-	dc_release_state(dm_state->context);
-	dm_state->context = dc_create_state(dm->dc);
+	dc_state_release(dm_state->context);
+	dm_state->context = dc_state_create(dm->dc);
 	/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
 	dc_resource_state_construct(dm->dc, dm_state->context);
 
@@ -3998,7 +3999,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj)
 	old_state = to_dm_atomic_state(obj->state);
 
 	if (old_state && old_state->context)
-		new_state->context = dc_copy_state(old_state->context);
+		new_state->context = dc_state_create_copy(old_state->context);
 
 	if (!new_state->context) {
 		kfree(new_state);
@@ -4014,7 +4015,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj,
 	struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
 
 	if (dm_state && dm_state->context)
-		dc_release_state(dm_state->context);
+		dc_state_release(dm_state->context);
 
 	kfree(dm_state);
 }
@@ -4050,7 +4051,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 	if (!state)
 		return -ENOMEM;
 
-	state->context = dc_create_state(adev->dm.dc);
+	state->context = dc_state_create(adev->dm.dc);
 	if (!state->context) {
 		kfree(state);
 		return -ENOMEM;
@@ -4065,7 +4066,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 
 	r = amdgpu_display_modeset_create_props(adev);
 	if (r) {
-		dc_release_state(state->context);
+		dc_state_release(state->context);
 		kfree(state);
 		return r;
 	}
@@ -4077,7 +4078,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 
 	r = amdgpu_dm_audio_init(adev);
 	if (r) {
-		dc_release_state(state->context);
+		dc_state_release(state->context);
 		kfree(state);
 		return r;
 	}
@@ -6658,7 +6659,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 	if (!dc_plane_state)
 		goto cleanup;
 
-	dc_state = dc_create_state(dc);
+	dc_state = dc_state_create(dc);
 	if (!dc_state)
 		goto cleanup;
 
@@ -6685,9 +6686,9 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 		dc_result = dc_validate_plane(dc, dc_plane_state);
 
 	if (dc_result == DC_OK)
-		dc_result = dc_add_stream_to_ctx(dc, dc_state, stream);
+		dc_result = dc_state_add_stream(dc, dc_state, stream);
 
-	if (dc_result == DC_OK && !dc_add_plane_to_context(
+	if (dc_result == DC_OK && !dc_state_add_plane(
 						dc,
 						stream,
 						dc_plane_state,
@@ -6699,7 +6700,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 
 cleanup:
 	if (dc_state)
-		dc_release_state(dc_state);
+		dc_state_release(dc_state);
 
 	if (dc_plane_state)
 		dc_plane_state_release(dc_plane_state);
@@ -8858,7 +8859,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 					dc_stream_get_status(dm_new_crtc_state->stream);
 
 			if (!status)
-				status = dc_stream_get_status_from_state(dc_state,
+				status = dc_state_get_stream_status(dc_state,
 									 dm_new_crtc_state->stream);
 			if (!status)
 				drm_err(dev,
@@ -9783,7 +9784,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 				crtc->base.id);
 
 		/* i.e. reset mode */
-		if (dc_remove_stream_from_ctx(
+		if (dc_state_remove_stream(
 				dm->dc,
 				dm_state->context,
 				dm_old_crtc_state->stream) != DC_OK) {
@@ -9826,7 +9827,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 			DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n",
 					 crtc->base.id);
 
-			if (dc_add_stream_to_ctx(
+			if (dc_state_add_stream(
 					dm->dc,
 					dm_state->context,
 					dm_new_crtc_state->stream) != DC_OK) {
@@ -10148,7 +10149,7 @@ static int dm_update_plane_state(struct dc *dc,
 		if (ret)
 			return ret;
 
-		if (!dc_remove_plane_from_context(
+		if (!dc_state_remove_plane(
 				dc,
 				dm_old_crtc_state->stream,
 				dm_old_plane_state->dc_state,
@@ -10226,7 +10227,7 @@ static int dm_update_plane_state(struct dc *dc,
 		 * state. It'll be released when the atomic state is
 		 * cleaned.
 		 */
-		if (!dc_add_plane_to_context(
+		if (!dc_state_add_plane(
 				dc,
 				dm_new_crtc_state->stream,
 				dc_new_plane_state,
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 390e7a99be541..bae4fdddbf97c 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -62,7 +62,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
 include $(AMD_DC)
 
 DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
-dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o
+dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o
 
 DISPLAY_CORE += dc_vm_helper.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 7dacb0f82d29b..28a2a837d2f0a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -29,6 +29,7 @@
 #include "dc_types.h"
 #include "dccg.h"
 #include "clk_mgr_internal.h"
+#include "dc_state_priv.h"
 #include "link.h"
 
 #include "dce100/dce_clk_mgr.h"
@@ -63,7 +64,7 @@ int clk_mgr_helper_get_active_display_cnt(
 		/* Don't count SubVP phantom pipes as part of active
 		 * display count
 		 */
-		if (stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
 			continue;
 
 		/*
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index f2fc7df68b58d..f5250022b98e5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -34,6 +34,8 @@
 #include "dce/dce_hwseq.h"
 
 #include "resource.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
 
 #include "gpio_service_interface.h"
 #include "clk_mgr.h"
@@ -811,7 +813,7 @@ static void dc_destruct(struct dc *dc)
 		link_enc_cfg_init(dc, dc->current_state);
 
 	if (dc->current_state) {
-		dc_release_state(dc->current_state);
+		dc_state_release(dc->current_state);
 		dc->current_state = NULL;
 	}
 
@@ -1028,7 +1030,7 @@ static bool dc_construct(struct dc *dc,
 	 * on creation it copies the contents of dc->dml
 	 */
 
-	dc->current_state = dc_create_state(dc);
+	dc->current_state = dc_state_create(dc);
 
 	if (!dc->current_state) {
 		dm_error("%s: failed to create validate ctx\n", __func__);
@@ -1118,7 +1120,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
 	int i, j;
-	struct dc_state *dangling_context = dc_create_state(dc);
+	struct dc_state *dangling_context = dc_state_create(dc);
 	struct dc_state *current_ctx;
 	struct pipe_ctx *pipe;
 	struct timing_generator *tg;
@@ -1164,6 +1166,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 		}
 
 		if (should_disable && old_stream) {
+			bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM;
 			pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 			tg = pipe->stream_res.tg;
 			/* When disabling plane for a phantom pipe, we must turn on the
@@ -1172,18 +1175,19 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 			 * state that can result in underflow or hang when enabling it
 			 * again for different use.
 			 */
-			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			if (is_phantom) {
 				if (tg->funcs->enable_crtc) {
 					int main_pipe_width, main_pipe_height;
+					struct dc_stream_state *old_paired_stream = dc_state_get_paired_subvp_stream(dc->current_state, old_stream);
 
-					main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width;
-					main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height;
+					main_pipe_width = old_paired_stream->dst.width;
+					main_pipe_height = old_paired_stream->dst.height;
 					if (dc->hwss.blank_phantom)
 						dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height);
 					tg->funcs->enable_crtc(tg);
 				}
 			}
-			dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
+			dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
 
 			if (pipe->stream && pipe->plane_state)
@@ -1206,7 +1210,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 			 * The OTG is set to disable on falling edge of VUPDATE so the plane disable
 			 * will still get it's double buffer update.
 			 */
-			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			if (is_phantom) {
 				if (tg->funcs->disable_phantom_crtc)
 					tg->funcs->disable_phantom_crtc(tg);
 			}
@@ -1215,7 +1219,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 
 	current_ctx = dc->current_state;
 	dc->current_state = dangling_context;
-	dc_release_state(current_ctx);
+	dc_state_release(current_ctx);
 }
 
 static void disable_vbios_mode_if_required(
@@ -1287,7 +1291,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
 		int count = 0;
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (!pipe->plane_state || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
 			continue;
 
 		/* Timeout 100 ms */
@@ -1513,7 +1517,7 @@ static void program_timing_sync(
 		}
 
 		for (k = 0; k < group_size; k++) {
-			struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream);
+			struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream);
 
 			status->timing_sync_info.group_id = num_group;
 			status->timing_sync_info.group_size = group_size;
@@ -1840,7 +1844,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		/* Check old context for SubVP */
-		subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+		subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
 		if (subvp_prev_use)
 			break;
 	}
@@ -1998,9 +2002,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	old_state = dc->current_state;
 	dc->current_state = context;
 
-	dc_release_state(old_state);
+	dc_state_release(old_state);
 
-	dc_retain_state(dc->current_state);
+	dc_state_retain(dc->current_state);
 
 	return result;
 }
@@ -2071,7 +2075,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	if (handle_exit_odm2to1)
 		res = commit_minimal_transition_state(dc, dc->current_state);
 
-	context = dc_create_state(dc);
+	context = dc_state_create(dc);
 	if (!context)
 		goto context_alloc_fail;
 
@@ -2091,7 +2095,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 				streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
 
 			if (dc_is_embedded_signal(streams[i]->signal)) {
-				struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]);
+				struct dc_stream_status *status = dc_state_get_stream_status(context, streams[i]);
 
 				if (dc->hwss.is_abm_supported)
 					status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]);
@@ -2102,7 +2106,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	}
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 context_alloc_fail:
 
@@ -2156,7 +2160,7 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
 		pipe = &context->res_ctx.pipe_ctx[i];
 
 		// Don't check flip pending on phantom pipes
-		if (!pipe->plane_state || (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM))
+		if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM))
 			continue;
 
 		/* Must set to false to start with, due to OR in update function */
@@ -2228,103 +2232,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 	dc->optimized_required = false;
 }
 
-static void init_state(struct dc *dc, struct dc_state *context)
-{
-	/* Each context must have their own instance of VBA and in order to
-	 * initialize and obtain IP and SOC the base DML instance from DC is
-	 * initially copied into every context
-	 */
-	memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
-}
-
-struct dc_state *dc_create_state(struct dc *dc)
-{
-	struct dc_state *context = kvzalloc(sizeof(struct dc_state),
-					    GFP_KERNEL);
-
-	if (!context)
-		return NULL;
-
-	init_state(dc, context);
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	if (dc->debug.using_dml2) {
-		dml2_create(dc, &dc->dml2_options, &context->bw_ctx.dml2);
-	}
-#endif
-	kref_init(&context->refcount);
-
-	return context;
-}
-
-struct dc_state *dc_copy_state(struct dc_state *src_ctx)
-{
-	int i, j;
-	struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
-
-	if (!new_ctx)
-		return NULL;
-	memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	if (new_ctx->bw_ctx.dml2 && !dml2_create_copy(&new_ctx->bw_ctx.dml2, src_ctx->bw_ctx.dml2)) {
-		dc_release_state(new_ctx);
-		return NULL;
- 	}
-#endif
-
-	for (i = 0; i < MAX_PIPES; i++) {
-			struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i];
-
-			if (cur_pipe->top_pipe)
-				cur_pipe->top_pipe =  &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
-			if (cur_pipe->bottom_pipe)
-				cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
-			if (cur_pipe->prev_odm_pipe)
-				cur_pipe->prev_odm_pipe =  &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-
-			if (cur_pipe->next_odm_pipe)
-				cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
-	}
-
-	for (i = 0; i < new_ctx->stream_count; i++) {
-			dc_stream_retain(new_ctx->streams[i]);
-			for (j = 0; j < new_ctx->stream_status[i].plane_count; j++)
-				dc_plane_state_retain(
-					new_ctx->stream_status[i].plane_states[j]);
-	}
-
-	kref_init(&new_ctx->refcount);
-
-	return new_ctx;
-}
-
-void dc_retain_state(struct dc_state *context)
-{
-	kref_get(&context->refcount);
-}
-
-static void dc_state_free(struct kref *kref)
-{
-	struct dc_state *context = container_of(kref, struct dc_state, refcount);
-	dc_resource_state_destruct(context);
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	dml2_destroy(context->bw_ctx.dml2);
-	context->bw_ctx.dml2 = 0;
-#endif
-
-	kvfree(context);
-}
-
-void dc_release_state(struct dc_state *context)
-{
-	kref_put(&context->refcount, dc_state_free);
-}
-
 bool dc_set_generic_gpio_for_stereo(bool enable,
 		struct gpio_service *gpio_service)
 {
@@ -2992,7 +2899,7 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				       update->dsc_config->num_slices_v != 0);
 
 		/* Use temporarry context for validating new DSC config */
-		struct dc_state *dsc_validate_context = dc_create_state(dc);
+		struct dc_state *dsc_validate_context = dc_state_create(dc);
 
 		if (dsc_validate_context) {
 			dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
@@ -3005,7 +2912,7 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				update->dsc_config = NULL;
 			}
 
-			dc_release_state(dsc_validate_context);
+			dc_state_release(dsc_validate_context);
 		} else {
 			DC_ERROR("Failed to allocate new validate context for DSC change\n");
 			update->dsc_config = NULL;
@@ -3104,7 +3011,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 			new_planes[i] = srf_updates[i].surface;
 
 		/* initialize scratch memory for building context */
-		context = dc_create_state(dc);
+		context = dc_state_create(dc);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return false;
@@ -3120,14 +3027,14 @@ static bool update_planes_and_stream_state(struct dc *dc,
 			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
 
 		/*remove old surfaces from context */
-		if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) {
 
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
 
 		/* add surface to context */
-		if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
+		if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
 
 			BREAK_TO_DEBUGGER();
 			goto fail;
@@ -3185,7 +3092,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 	return true;
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 	return false;
 
@@ -3626,7 +3533,7 @@ static void commit_planes_for_stream(struct dc *dc,
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		// Check old context for SubVP
-		subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+		subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
 		if (subvp_prev_use)
 			break;
 	}
@@ -3634,7 +3541,7 @@ static void commit_planes_for_stream(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			subvp_curr_use = true;
 			break;
 		}
@@ -4019,7 +3926,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) {
 			subvp_active = true;
 			break;
 		}
@@ -4056,7 +3963,7 @@ struct pipe_split_policy_backup {
 static void release_minimal_transition_state(struct dc *dc,
 		struct dc_state *context, struct pipe_split_policy_backup *policy)
 {
-	dc_release_state(context);
+	dc_state_release(context);
 	/* restore previous pipe split and odm policy */
 	if (!dc->config.is_vmin_only_asic)
 		dc->debug.pipe_split_policy = policy->mpc_policy;
@@ -4067,7 +3974,7 @@ static void release_minimal_transition_state(struct dc *dc,
 static struct dc_state *create_minimal_transition_state(struct dc *dc,
 		struct dc_state *base_context, struct pipe_split_policy_backup *policy)
 {
-	struct dc_state *minimal_transition_context = dc_create_state(dc);
+	struct dc_state *minimal_transition_context = dc_state_create(dc);
 	unsigned int i, j;
 
 	if (!dc->config.is_vmin_only_asic) {
@@ -4211,7 +4118,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
 			subvp_in_use = true;
 			break;
 		}
@@ -4523,7 +4430,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		if (dc->res_pool->funcs->save_mall_state)
 			dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 		if (!commit_minimal_transition_state(dc, context)) {
-			dc_release_state(context);
+			dc_state_release(context);
 			return false;
 		}
 		if (dc->res_pool->funcs->restore_mall_state)
@@ -4593,7 +4500,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		struct dc_state *old = dc->current_state;
 
 		dc->current_state = context;
-		dc_release_state(old);
+		dc_state_release(old);
 
 		// clear any forced full updates
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -4652,7 +4559,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 
 		/* initialize scratch memory for building context */
-		context = dc_create_state(dc);
+		context = dc_state_create(dc);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return;
@@ -4698,7 +4605,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
 			DC_ERROR("Mode validation failed for stream update!\n");
-			dc_release_state(context);
+			dc_state_release(context);
 			return;
 		}
 	}
@@ -4731,7 +4638,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 		struct dc_state *old = dc->current_state;
 
 		dc->current_state = context;
-		dc_release_state(old);
+		dc_state_release(old);
 
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index fc18b9dc946f9..170bad6f33c7b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -31,6 +31,7 @@
 #include "basics/dc_common.h"
 #include "resource.h"
 #include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
 
 #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
 
@@ -440,8 +441,7 @@ void get_subvp_visual_confirm_color(
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.paired_stream &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
 			/* SubVP enable - red */
 			color->color_g_y = 0;
 			color->color_b_cb = 0;
@@ -454,7 +454,7 @@ void get_subvp_visual_confirm_color(
 		}
 	}
 
-	if (enable_subvp && pipe_ctx->stream->mall_stream_config.type == SUBVP_NONE) {
+	if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
 		color->color_r_cr = 0;
 		if (pipe_ctx->stream->allow_freesync == 1) {
 			/* SubVP enable and DRR on - green */
@@ -529,7 +529,7 @@ void hwss_build_fast_sequence(struct dc *dc,
 			}
 			if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
 				if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) &&
-						current_mpc_pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+						dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN) {
 					block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.addr = &current_mpc_pipe->plane_state->address;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 4382d9ae4292d..4a6a7d7557e07 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2993,189 +2993,6 @@ bool resource_update_pipes_for_plane_with_slice_count(
 	return result;
 }
 
-bool dc_add_plane_to_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context)
-{
-	struct resource_pool *pool = dc->res_pool;
-	struct pipe_ctx *otg_master_pipe;
-	struct dc_stream_status *stream_status = NULL;
-	bool added = false;
-
-	stream_status = dc_stream_get_status_from_state(context, stream);
-	if (stream_status == NULL) {
-		dm_error("Existing stream not found; failed to attach surface!\n");
-		goto out;
-	} else if (stream_status->plane_count == MAX_SURFACE_NUM) {
-		dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
-				plane_state, MAX_SURFACE_NUM);
-		goto out;
-	}
-
-	otg_master_pipe = resource_get_otg_master_for_stream(
-			&context->res_ctx, stream);
-	added = resource_append_dpp_pipes_for_plane_composition(context,
-			dc->current_state, pool, otg_master_pipe, plane_state);
-
-	if (added) {
-		stream_status->plane_states[stream_status->plane_count] =
-				plane_state;
-		stream_status->plane_count++;
-		dc_plane_state_retain(plane_state);
-	}
-
-out:
-	return added;
-}
-
-bool dc_remove_plane_from_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context)
-{
-	int i;
-	struct dc_stream_status *stream_status = NULL;
-	struct resource_pool *pool = dc->res_pool;
-
-	if (!plane_state)
-		return true;
-
-	for (i = 0; i < context->stream_count; i++)
-		if (context->streams[i] == stream) {
-			stream_status = &context->stream_status[i];
-			break;
-		}
-
-	if (stream_status == NULL) {
-		dm_error("Existing stream not found; failed to remove plane.\n");
-		return false;
-	}
-
-	resource_remove_dpp_pipes_for_plane_composition(
-			context, pool, plane_state);
-
-	for (i = 0; i < stream_status->plane_count; i++) {
-		if (stream_status->plane_states[i] == plane_state) {
-			dc_plane_state_release(stream_status->plane_states[i]);
-			break;
-		}
-	}
-
-	if (i == stream_status->plane_count) {
-		dm_error("Existing plane_state not found; failed to detach it!\n");
-		return false;
-	}
-
-	stream_status->plane_count--;
-
-	/* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
-	for (; i < stream_status->plane_count; i++)
-		stream_status->plane_states[i] = stream_status->plane_states[i + 1];
-
-	stream_status->plane_states[stream_status->plane_count] = NULL;
-
-	if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
-		/* ODM combine could prevent us from supporting more planes
-		 * we will reset ODM slice count back to 1 when all planes have
-		 * been removed to maximize the amount of planes supported when
-		 * new planes are added.
-		 */
-		resource_update_pipes_for_stream_with_slice_count(
-				context, dc->current_state, dc->res_pool, stream, 1);
-
-	return true;
-}
-
-/**
- * dc_rem_all_planes_for_stream - Remove planes attached to the target stream.
- *
- * @dc: Current dc state.
- * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
- *
- * Return:
- * Return true if DC was able to remove all planes from the target
- * stream, otherwise, return false.
- */
-bool dc_rem_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context)
-{
-	int i, old_plane_count;
-	struct dc_stream_status *stream_status = NULL;
-	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
-
-	for (i = 0; i < context->stream_count; i++)
-			if (context->streams[i] == stream) {
-				stream_status = &context->stream_status[i];
-				break;
-			}
-
-	if (stream_status == NULL) {
-		dm_error("Existing stream %p not found!\n", stream);
-		return false;
-	}
-
-	old_plane_count = stream_status->plane_count;
-
-	for (i = 0; i < old_plane_count; i++)
-		del_planes[i] = stream_status->plane_states[i];
-
-	for (i = 0; i < old_plane_count; i++)
-		if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context))
-			return false;
-
-	return true;
-}
-
-static bool add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		const struct dc_validation_set set[],
-		int set_count,
-		struct dc_state *context)
-{
-	int i, j;
-
-	for (i = 0; i < set_count; i++)
-		if (set[i].stream == stream)
-			break;
-
-	if (i == set_count) {
-		dm_error("Stream %p not found in set!\n", stream);
-		return false;
-	}
-
-	for (j = 0; j < set[i].plane_count; j++)
-		if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context))
-			return false;
-
-	return true;
-}
-
-bool dc_add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state * const *plane_states,
-		int plane_count,
-		struct dc_state *context)
-{
-	struct dc_validation_set set;
-	int i;
-
-	set.stream = stream;
-	set.plane_count = plane_count;
-
-	for (i = 0; i < plane_count; i++)
-		set.plane_states[i] = plane_states[i];
-
-	return add_all_planes_for_stream(dc, stream, &set, 1, context);
-}
-
 bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
 		       struct dc_stream_state *new_stream)
 {
@@ -3327,84 +3144,6 @@ static struct audio *find_first_free_audio(
 	return NULL;
 }
 
-/*
- * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
- */
-enum dc_status dc_add_stream_to_ctx(
-		struct dc *dc,
-		struct dc_state *new_ctx,
-		struct dc_stream_state *stream)
-{
-	enum dc_status res;
-	DC_LOGGER_INIT(dc->ctx->logger);
-
-	if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) {
-		DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	new_ctx->streams[new_ctx->stream_count] = stream;
-	dc_stream_retain(stream);
-	new_ctx->stream_count++;
-
-	res = resource_add_otg_master_for_stream_output(
-			new_ctx, dc->res_pool, stream);
-	if (res != DC_OK)
-		DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
-
-	return res;
-}
-
-/*
- * dc_remove_stream_from_ctx() - Remove a stream from a dc_state.
- */
-enum dc_status dc_remove_stream_from_ctx(
-			struct dc *dc,
-			struct dc_state *new_ctx,
-			struct dc_stream_state *stream)
-{
-	int i;
-	struct dc_context *dc_ctx = dc->ctx;
-	struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
-			&new_ctx->res_ctx, stream);
-
-	if (!del_pipe) {
-		DC_ERROR("Pipe not found for stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	resource_update_pipes_for_stream_with_slice_count(new_ctx,
-			dc->current_state, dc->res_pool, stream, 1);
-	resource_remove_otg_master_for_stream_output(
-			new_ctx, dc->res_pool, stream);
-
-	for (i = 0; i < new_ctx->stream_count; i++)
-		if (new_ctx->streams[i] == stream)
-			break;
-
-	if (new_ctx->streams[i] != stream) {
-		DC_ERROR("Context doesn't have stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	dc_stream_release(new_ctx->streams[i]);
-	new_ctx->stream_count--;
-
-	/* Trim back arrays */
-	for (; i < new_ctx->stream_count; i++) {
-		new_ctx->streams[i] = new_ctx->streams[i + 1];
-		new_ctx->stream_status[i] = new_ctx->stream_status[i + 1];
-	}
-
-	new_ctx->streams[new_ctx->stream_count] = NULL;
-	memset(
-			&new_ctx->stream_status[new_ctx->stream_count],
-			0,
-			sizeof(new_ctx->stream_status[0]));
-
-	return DC_OK;
-}
-
 static struct dc_stream_state *find_pll_sharable_stream(
 		struct dc_stream_state *stream_needs_pll,
 		struct dc_state *context)
@@ -3855,6 +3594,31 @@ static bool planes_changed_for_existing_stream(struct dc_state *context,
 	return false;
 }
 
+static bool add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		const struct dc_validation_set set[],
+		int set_count,
+		struct dc_state *state)
+{
+	int i, j;
+
+	for (i = 0; i < set_count; i++)
+		if (set[i].stream == stream)
+			break;
+
+	if (i == set_count) {
+		dm_error("Stream %p not found in set!\n", stream);
+		return false;
+	}
+
+	for (j = 0; j < set[i].plane_count; j++)
+		if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state))
+			return false;
+
+	return true;
+}
+
 /**
  * dc_validate_with_context - Validate and update the potential new stream in the context object
  *
@@ -3960,7 +3724,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 						       unchanged_streams[i],
 						       set,
 						       set_count)) {
-			if (!dc_rem_all_planes_for_stream(dc,
+			if (!dc_state_rem_all_planes_for_stream(dc,
 							  unchanged_streams[i],
 							  context)) {
 				res = DC_FAIL_DETACH_SURFACES;
@@ -3982,12 +3746,12 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 			}
 		}
 
-		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
 			res = DC_FAIL_DETACH_SURFACES;
 			goto fail;
 		}
 
-		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+		res = dc_state_remove_stream(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -4010,7 +3774,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 	/* Add new streams and then add all planes for the new stream */
 	for (i = 0; i < add_streams_count; i++) {
 		calculate_phy_pix_clks(add_streams[i]);
-		res = dc_add_stream_to_ctx(dc, context, add_streams[i]);
+		res = dc_state_add_stream(dc, context, add_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
new file mode 100644
index 0000000000000..ec5b0d114c7c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "core_types.h"
+#include "core_status.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+#include "dc_plane_priv.h"
+
+#include "dm_services.h"
+#include "resource.h"
+
+#include "dml2/dml2_wrapper.h"
+#include "dml2/dml2_internal_types.h"
+
+#define DC_LOGGER \
+	dc->ctx->logger
+#define DC_LOGGER_INIT(logger)
+
+/* Public dc_state functions */
+static void init_state(struct dc *dc, struct dc_state *state)
+{
+	/* Each context must have their own instance of VBA and in order to
+	 * initialize and obtain IP and SOC the base DML instance from DC is
+	 * initially copied into every context
+	 */
+	memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
+}
+
+struct dc_state *dc_state_create(struct dc *dc)
+{
+	struct dc_state *state = kvzalloc(sizeof(struct dc_state),
+			GFP_KERNEL);
+
+	if (!state)
+		return NULL;
+
+	init_state(dc, state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (dc->debug.using_dml2)
+		dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2);
+#endif
+
+	kref_init(&state->refcount);
+
+	return state;
+}
+
+struct dc_state *dc_state_create_copy(struct dc_state *src_state)
+{
+	int i, j;
+	struct dc_state *new_state = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
+
+	if (!new_state)
+		return NULL;
+
+	memcpy(new_state, src_state, sizeof(struct dc_state));
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (new_state->bw_ctx.dml2 && !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
+		dc_state_release(new_state);
+		return NULL;
+	}
+#endif
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *cur_pipe = &new_state->res_ctx.pipe_ctx[i];
+
+		if (cur_pipe->top_pipe)
+			cur_pipe->top_pipe =  &new_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+
+		if (cur_pipe->bottom_pipe)
+			cur_pipe->bottom_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+
+		if (cur_pipe->prev_odm_pipe)
+			cur_pipe->prev_odm_pipe =  &new_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+
+		if (cur_pipe->next_odm_pipe)
+			cur_pipe->next_odm_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+	}
+
+	for (i = 0; i < new_state->stream_count; i++) {
+		dc_stream_retain(new_state->streams[i]);
+		for (j = 0; j < new_state->stream_status[i].plane_count; j++)
+			dc_plane_state_retain(
+					new_state->stream_status[i].plane_states[j]);
+	}
+
+	kref_init(&new_state->refcount);
+
+	return new_state;
+}
+
+void dc_state_retain(struct dc_state *context)
+{
+	kref_get(&context->refcount);
+}
+
+static void dc_state_free(struct kref *kref)
+{
+	struct dc_state *state = container_of(kref, struct dc_state, refcount);
+
+	dc_resource_state_destruct(state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	dml2_destroy(state->bw_ctx.dml2);
+	state->bw_ctx.dml2 = 0;
+#endif
+
+	kvfree(state);
+}
+
+void dc_state_release(struct dc_state *state)
+{
+	kref_put(&state->refcount, dc_state_free);
+}
+/*
+ * dc_state_add_stream() - Add a new dc_stream_state to a dc_state.
+ */
+enum dc_status dc_state_add_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	enum dc_status res;
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (state->stream_count >= dc->res_pool->timing_generator_count) {
+		DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	state->streams[state->stream_count] = stream;
+	dc_stream_retain(stream);
+	state->stream_count++;
+
+	res = resource_add_otg_master_for_stream_output(
+			state, dc->res_pool, stream);
+	if (res != DC_OK)
+		DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
+
+	return res;
+}
+
+/*
+ * dc_state_remove_stream() - Remove a stream from a dc_state.
+ */
+enum dc_status dc_state_remove_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	int i;
+	struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
+			&state->res_ctx, stream);
+
+	if (!del_pipe) {
+		dm_error("Pipe not found for stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	resource_update_pipes_for_stream_with_slice_count(state,
+			dc->current_state, dc->res_pool, stream, 1);
+	resource_remove_otg_master_for_stream_output(
+			state, dc->res_pool, stream);
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream)
+			break;
+
+	if (state->streams[i] != stream) {
+		dm_error("Context doesn't have stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	dc_stream_release(state->streams[i]);
+	state->stream_count--;
+
+	/* Trim back arrays */
+	for (; i < state->stream_count; i++) {
+		state->streams[i] = state->streams[i + 1];
+		state->stream_status[i] = state->stream_status[i + 1];
+	}
+
+	state->streams[state->stream_count] = NULL;
+	memset(
+			&state->stream_status[state->stream_count],
+			0,
+			sizeof(state->stream_status[0]));
+
+	return DC_OK;
+}
+
+bool dc_state_add_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state)
+{
+	struct resource_pool *pool = dc->res_pool;
+	struct pipe_ctx *otg_master_pipe;
+	struct dc_stream_status *stream_status = NULL;
+	bool added = false;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+	if (stream_status == NULL) {
+		dm_error("Existing stream not found; failed to attach surface!\n");
+		goto out;
+	} else if (stream_status->plane_count == MAX_SURFACE_NUM) {
+		dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
+				plane_state, MAX_SURFACE_NUM);
+		goto out;
+	}
+
+	otg_master_pipe = resource_get_otg_master_for_stream(
+			&state->res_ctx, stream);
+	added = resource_append_dpp_pipes_for_plane_composition(state,
+			dc->current_state, pool, otg_master_pipe, plane_state);
+
+	if (added) {
+		stream_status->plane_states[stream_status->plane_count] =
+				plane_state;
+		stream_status->plane_count++;
+		dc_plane_state_retain(plane_state);
+	}
+
+out:
+	return added;
+}
+
+bool dc_state_remove_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state)
+{
+	int i;
+	struct dc_stream_status *stream_status = NULL;
+	struct resource_pool *pool = dc->res_pool;
+
+	if (!plane_state)
+		return true;
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream not found; failed to remove plane.\n");
+		return false;
+	}
+
+	resource_remove_dpp_pipes_for_plane_composition(
+			state, pool, plane_state);
+
+	for (i = 0; i < stream_status->plane_count; i++) {
+		if (stream_status->plane_states[i] == plane_state) {
+			dc_plane_state_release(stream_status->plane_states[i]);
+			break;
+		}
+	}
+
+	if (i == stream_status->plane_count) {
+		dm_error("Existing plane_state not found; failed to detach it!\n");
+		return false;
+	}
+
+	stream_status->plane_count--;
+
+	/* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
+	for (; i < stream_status->plane_count; i++)
+		stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+
+	stream_status->plane_states[stream_status->plane_count] = NULL;
+
+	if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+		/* ODM combine could prevent us from supporting more planes
+		 * we will reset ODM slice count back to 1 when all planes have
+		 * been removed to maximize the amount of planes supported when
+		 * new planes are added.
+		 */
+		resource_update_pipes_for_stream_with_slice_count(
+				state, dc->current_state, dc->res_pool, stream, 1);
+
+	return true;
+}
+
+/**
+ * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream.
+ *
+ * @dc: Current dc state.
+ * @stream: Target stream, which we want to remove the attached plans.
+ * @context: New context.
+ *
+ * Return:
+ * Return true if DC was able to remove all planes from the target
+ * stream, otherwise, return false.
+ */
+bool dc_state_rem_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_state *state)
+{
+	int i, old_plane_count;
+	struct dc_stream_status *stream_status = NULL;
+	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream %p not found!\n", stream);
+		return false;
+	}
+
+	old_plane_count = stream_status->plane_count;
+
+	for (i = 0; i < old_plane_count; i++)
+		del_planes[i] = stream_status->plane_states[i];
+
+	for (i = 0; i < old_plane_count; i++)
+		if (!dc_state_remove_plane(dc, stream, del_planes[i], state))
+			return false;
+
+	return true;
+}
+
+bool dc_state_add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state * const *plane_states,
+		int plane_count,
+		struct dc_state *state)
+{
+	int i;
+	bool result = true;
+
+	for (i = 0; i < plane_count; i++)
+		if (!dc_state_add_plane(dc, stream, plane_states[i], state)) {
+			result = false;
+			break;
+		}
+
+	return result;
+}
+
+/* Private dc_state functions */
+
+/**
+ * dc_state_get_stream_status - Get stream status from given dc state
+ * @state: DC state to find the stream status in
+ * @stream: The stream to get the stream status for
+ *
+ * The given stream is expected to exist in the given dc state. Otherwise, NULL
+ * will be returned.
+ */
+struct dc_stream_status *dc_state_get_stream_status(
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	uint8_t i;
+
+	if (state == NULL)
+		return NULL;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (stream == state->streams[i])
+			return &state->stream_status[i];
+	}
+
+	return NULL;
+}
+
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+		const struct pipe_ctx *pipe_ctx)
+{
+	if (pipe_ctx->stream == NULL)
+		return SUBVP_NONE;
+
+	return pipe_ctx->stream->mall_stream_config.type;
+}
+
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+		const struct dc_stream_state *stream)
+{
+	return stream->mall_stream_config.type;
+}
+
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+		const struct dc_stream_state *stream)
+{
+	return stream->mall_stream_config.paired_stream;
+}
+
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *main_stream)
+{
+	struct dc_stream_state *phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+
+	if (phantom_stream != NULL) {
+		phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
+		phantom_stream->dpms_off = true;
+	}
+
+	return phantom_stream;
+}
+
+void dc_state_release_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	dc_stream_release(phantom_stream);
+}
+
+struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *main_plane)
+{
+	struct dc_plane_state *phantom_plane = dc_create_plane_state(dc);
+
+	if (phantom_plane != NULL)
+		phantom_plane->is_phantom = true;
+
+	return phantom_plane;
+}
+
+void dc_state_release_phantom_plane(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *phantom_plane)
+{
+	dc_plane_state_release(phantom_plane);
+}
+
+/* add phantom streams to context and generate correct meta inside dc_state */
+enum dc_status dc_state_add_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream,
+		struct dc_stream_state *main_stream)
+{
+	enum dc_status res = dc_state_add_stream(dc, state, phantom_stream);
+
+	/* setup subvp meta */
+	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
+	phantom_stream->mall_stream_config.paired_stream = main_stream;
+	main_stream->mall_stream_config.type = SUBVP_MAIN;
+	main_stream->mall_stream_config.paired_stream = phantom_stream;
+
+	return res;
+}
+
+enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	/* reset subvp meta */
+	phantom_stream->mall_stream_config.paired_stream->mall_stream_config.type = SUBVP_NONE;
+	phantom_stream->mall_stream_config.paired_stream->mall_stream_config.paired_stream = NULL;
+
+	/* remove stream from state */
+	return dc_state_remove_stream(dc, state, phantom_stream);
+}
+
+bool dc_state_add_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state)
+{
+	return dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+}
+
+bool dc_state_remove_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state)
+{
+	return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state);
+}
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_state *state)
+{
+	return dc_state_rem_all_planes_for_stream(dc, phantom_stream, state);
+}
+
+bool dc_state_add_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state * const *phantom_planes,
+		int plane_count,
+		struct dc_state *state)
+{
+	return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 38cd29b210c05..86de4bf2ce138 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -31,6 +31,8 @@
 #include "ipp.h"
 #include "timing_generator.h"
 #include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
 
 #define DC_LOGGER dc->ctx->logger
 
@@ -54,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
 	}
 }
 
-static bool dc_stream_construct(struct dc_stream_state *stream,
+bool dc_stream_construct(struct dc_stream_state *stream,
 	struct dc_sink *dc_sink_data)
 {
 	uint32_t i = 0;
@@ -127,7 +129,7 @@ static bool dc_stream_construct(struct dc_stream_state *stream,
 	return true;
 }
 
-static void dc_stream_destruct(struct dc_stream_state *stream)
+void dc_stream_destruct(struct dc_stream_state *stream)
 {
 	dc_sink_release(stream->sink);
 	if (stream->out_transfer_func != NULL) {
@@ -208,31 +210,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 	return new_stream;
 }
 
-/**
- * dc_stream_get_status_from_state - Get stream status from given dc state
- * @state: DC state to find the stream status in
- * @stream: The stream to get the stream status for
- *
- * The given stream is expected to exist in the given dc state. Otherwise, NULL
- * will be returned.
- */
-struct dc_stream_status *dc_stream_get_status_from_state(
-	struct dc_state *state,
-	struct dc_stream_state *stream)
-{
-	uint8_t i;
-
-	if (state == NULL)
-		return NULL;
-
-	for (i = 0; i < state->stream_count; i++) {
-		if (stream == state->streams[i])
-			return &state->stream_status[i];
-	}
-
-	return NULL;
-}
-
 /**
  * dc_stream_get_status() - Get current stream status of the given stream state
  * @stream: The stream to get the stream status for.
@@ -244,7 +221,7 @@ struct dc_stream_status *dc_stream_get_status(
 	struct dc_stream_state *stream)
 {
 	struct dc *dc = stream->ctx->dc;
-	return dc_stream_get_status_from_state(dc->current_state, stream);
+	return dc_state_get_stream_status(dc->current_state, stream);
 }
 
 static void program_cursor_attributes(
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index a80e45300783c..19a2c7140ae84 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -32,10 +32,12 @@
 #include "transform.h"
 #include "dpp.h"
 
+#include "dc_plane_priv.h"
+
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
 {
 	plane_state->ctx = ctx;
 
@@ -63,7 +65,7 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl
 
 }
 
-static void dc_plane_destruct(struct dc_plane_state *plane_state)
+void dc_plane_destruct(struct dc_plane_state *plane_state)
 {
 	if (plane_state->gamma_correction != NULL) {
 		dc_gamma_release(&plane_state->gamma_correction);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index ce00a6eeb1640..d638679cf31ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -27,6 +27,8 @@
 #define DC_INTERFACE_H_
 
 #include "dc_types.h"
+#include "dc_state.h"
+#include "dc_plane.h"
 #include "grph_object_defs.h"
 #include "logger_types.h"
 #include "hdcp_msg_types.h"
@@ -1386,16 +1388,6 @@ struct dc_surface_update {
 	const struct colorspace_transform *gamut_remap_matrix;
 };
 
-/*
- * Create a new surface with default parameters;
- */
-struct dc_plane_state *dc_create_plane_state(struct dc *dc);
-const struct dc_plane_status *dc_plane_get_status(
-		const struct dc_plane_state *plane_state);
-
-void dc_plane_state_retain(struct dc_plane_state *plane_state);
-void dc_plane_state_release(struct dc_plane_state *plane_state);
-
 void dc_gamma_retain(struct dc_gamma *dc_gamma);
 void dc_gamma_release(struct dc_gamma **dc_gamma);
 struct dc_gamma *dc_create_gamma(void);
@@ -1486,10 +1478,6 @@ enum dc_status dc_commit_streams(struct dc *dc,
 				 struct dc_stream_state *streams[],
 				 uint8_t stream_count);
 
-struct dc_state *dc_create_state(struct dc *dc);
-struct dc_state *dc_copy_state(struct dc_state *src_ctx);
-void dc_retain_state(struct dc_state *context);
-void dc_release_state(struct dc_state *context);
 
 struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc,
 		struct dc_stream_state *stream,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 4b93e7a529d50..704d4ff722f02 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -33,6 +33,7 @@
 #include "cursor_reg_cache.h"
 #include "resource.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 
 #define CTX dc_dmub_srv->ctx
 #define DC_LOGGER CTX->logger
@@ -532,12 +533,14 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
  * 3. Populate the drr_info with the min and max supported vtotal values
  */
 static void populate_subvp_cmd_drr_info(struct dc *dc,
+		struct dc_state *context,
 		struct pipe_ctx *subvp_pipe,
 		struct pipe_ctx *vblank_pipe,
 		struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data)
 {
+	struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
-	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	struct dc_crtc_timing *phantom_timing = &phantom_stream->timing;
 	struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
 	uint16_t drr_frame_us = 0;
 	uint16_t min_drr_supported_us = 0;
@@ -625,7 +628,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
@@ -642,7 +645,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 
 	if (vblank_pipe->stream->ignore_msa_timing_param &&
 		(vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed))
-		populate_subvp_cmd_drr_info(dc, pipe, vblank_pipe, pipe_data);
+		populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data);
 }
 
 /**
@@ -667,10 +670,15 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
 	uint32_t subvp0_prefetch_us = 0;
 	uint32_t subvp1_prefetch_us = 0;
 	uint32_t prefetch_delta_us = 0;
-	struct dc_crtc_timing *phantom_timing0 = &subvp_pipes[0]->stream->mall_stream_config.paired_stream->timing;
-	struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
+	struct dc_stream_state *phantom_stream0 = NULL;
+	struct dc_stream_state *phantom_stream1 = NULL;
+	struct dc_crtc_timing *phantom_timing0 = &phantom_stream0->timing;
+	struct dc_crtc_timing *phantom_timing1 = &phantom_stream1->timing;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
 
+	phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream);
+	phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream);
+
 	subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
 			(uint64_t)phantom_timing0->h_total * 1000000),
 			(((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
@@ -720,8 +728,9 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	uint32_t j;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data =
 			&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
+	struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
-	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	struct dc_crtc_timing *phantom_timing = &phantom_stream->timing;
 	uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den;
 
 	pipe_data->mode = SUBVP;
@@ -775,7 +784,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	for (j = 0; j < dc->res_pool->pipe_count; j++) {
 		struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
 
-		if (phantom_pipe->stream == subvp_pipe->stream->mall_stream_config.paired_stream) {
+		if (phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) {
 			pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst;
 			if (phantom_pipe->bottom_pipe) {
 				pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst;
@@ -809,6 +818,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 	union dmub_rb_cmd cmd;
 	struct pipe_ctx *subvp_pipes[2];
 	uint32_t wm_val_refclk = 0;
+	enum mall_stream_type pipe_mall_type;
 
 	memset(&cmd, 0, sizeof(cmd));
 	// FW command for SUBVP
@@ -824,7 +834,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		 */
 		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			subvp_pipes[subvp_count++] = pipe;
 	}
 
@@ -832,6 +842,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		// For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd
 		for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+			pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 			if (!pipe->stream)
 				continue;
@@ -843,11 +854,11 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
 					pipe->stream->mall_stream_config.paired_stream &&
-					pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+					pipe_mall_type == SUBVP_MAIN) {
 				populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
 			} else if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
-					pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+					pipe_mall_type == SUBVP_NONE) {
 				// Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where
 				// we run through DML without calculating "natural" P-state support
 				populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h
new file mode 100644
index 0000000000000..ef380cae816a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_H_
+#define _DC_PLANE_H_
+
+#include "dc.h"
+#include "dc_hw_types.h"
+
+struct dc_plane_state *dc_create_plane_state(struct dc *dc);
+const struct dc_plane_status *dc_plane_get_status(
+		const struct dc_plane_state *plane_state);
+void dc_plane_state_retain(struct dc_plane_state *plane_state);
+void dc_plane_state_release(struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
new file mode 100644
index 0000000000000..9ee184c1df007
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_PRIV_H_
+#define _DC_PLANE_PRIV_H_
+
+#include "dc_plane.h"
+
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state);
+void dc_plane_destruct(struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h
new file mode 100644
index 0000000000000..df1a8b85a6523
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_H_
+#define _DC_STATE_H_
+
+#include "dc.h"
+#include "inc/core_status.h"
+
+
+struct dc_state *dc_state_create(struct dc *dc);
+struct dc_state *dc_state_create_copy(struct dc_state *src_state);
+void dc_state_retain(struct dc_state *state);
+void dc_state_release(struct dc_state *state);
+
+enum dc_status dc_state_add_stream(struct dc *dc,
+				    struct dc_state *state,
+				    struct dc_stream_state *stream);
+
+enum dc_status dc_state_remove_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream);
+
+bool dc_state_add_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state);
+
+bool dc_state_remove_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state);
+
+bool dc_state_rem_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_state *state);
+
+bool dc_state_add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state * const *plane_states,
+		int plane_count,
+		struct dc_state *state);
+
+struct dc_stream_status *dc_state_get_stream_status(
+	struct dc_state *state,
+	struct dc_stream_state *stream);
+#endif /* _DC_STATE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
new file mode 100644
index 0000000000000..16c09caa13176
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_PRIV_H_
+#define _DC_STATE_PRIV_H_
+
+#include "dc_state.h"
+#include "dc_stream.h"
+
+struct dc_stream_status *dc_state_get_stream_status(
+	struct dc_state *state,
+	struct dc_stream_state *stream);
+
+/* Get the type of the provided resource (none, phantom, main) based on the provided
+ * context. If the context is unavailable, determine only if phantom or not.
+ */
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+		const struct pipe_ctx *pipe_ctx);
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+		const struct dc_stream_state *stream);
+
+/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+		const struct dc_stream_state *stream);
+
+/* allocate's phantom stream or plane and returns pointer to the object */
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *main_stream);
+struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *main_plane);
+
+/* deallocate's phantom stream or plane */
+void dc_state_release_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream);
+void dc_state_release_phantom_plane(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *phantom_plane);
+
+/* add/remove phantom stream to context and generate subvp meta data */
+enum dc_status dc_state_add_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream,
+		struct dc_stream_state *main_stream);
+enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream);
+
+bool dc_state_add_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state);
+
+bool dc_state_remove_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state);
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_state *state);
+
+bool dc_state_add_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state * const *phantom_planes,
+		int plane_count,
+		struct dc_state *state);
+
+#endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 27118e6727548..f617428c6a576 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -146,12 +146,6 @@ struct test_pattern {
 
 #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR)
 
-enum mall_stream_type {
-	SUBVP_NONE, // subvp not in use
-	SUBVP_MAIN, // subvp in use, this stream is main stream
-	SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
-};
-
 struct mall_stream_config {
 	/* MALL stream config to indicate if the stream is phantom or not.
 	 * We will use a phantom stream to indicate that the pipe is phantom.
@@ -413,41 +407,6 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
 				  uint32_t *h_position,
 				  uint32_t *v_position);
 
-enum dc_status dc_add_stream_to_ctx(
-			struct dc *dc,
-		struct dc_state *new_ctx,
-		struct dc_stream_state *stream);
-
-enum dc_status dc_remove_stream_from_ctx(
-		struct dc *dc,
-			struct dc_state *new_ctx,
-			struct dc_stream_state *stream);
-
-
-bool dc_add_plane_to_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context);
-
-bool dc_remove_plane_from_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context);
-
-bool dc_rem_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context);
-
-bool dc_add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state * const *plane_states,
-		int plane_count,
-		struct dc_state *context);
-
 bool dc_stream_add_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_writeback_info *wb_info);
@@ -516,9 +475,6 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink);
 void dc_stream_retain(struct dc_stream_state *dc_stream);
 void dc_stream_release(struct dc_stream_state *dc_stream);
 
-struct dc_stream_status *dc_stream_get_status_from_state(
-	struct dc_state *state,
-	struct dc_stream_state *stream);
 struct dc_stream_status *dc_stream_get_status(
 	struct dc_stream_state *dc_stream);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
new file mode 100644
index 0000000000000..710d3b04c7e86
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STREAM_PRIV_H_
+#define _DC_STREAM_PRIV_H_
+
+#include "dc_stream.h"
+
+bool dc_stream_construct(struct dc_stream_state *stream,
+	struct dc_sink *dc_sink_data);
+void dc_stream_destruct(struct dc_stream_state *stream);
+
+#endif // _DC_STREAM_PRIV_H_
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 7313cfe69498c..870cd3932015c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1161,4 +1161,9 @@ enum dc_hpd_enable_select {
 	HPD_EN_FOR_SECONDARY_EDP_ONLY,
 };
 
+enum mall_stream_type {
+	SUBVP_NONE, // subvp not in use
+	SUBVP_MAIN, // subvp in use, this stream is main stream
+	SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
+};
 #endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index e8159a459bcef..b3d3607411756 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -28,6 +28,7 @@
 #include "dcn20/dcn20_resource.h"
 #include "dml/dcn32/display_mode_vba_util_32.h"
 #include "dml/dcn32/dcn32_fpu.h"
+#include "dc_state_priv.h"
 
 static bool is_dual_plane(enum surface_pixel_format format)
 {
@@ -190,7 +191,7 @@ bool dcn32_subvp_in_use(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE)
+		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
 			return true;
 	}
 	return false;
@@ -264,18 +265,17 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 
 	// Do not override if a stream has multiple planes
 	for (i = 0; i < context->stream_count; i++) {
-		if (context->stream_status[i].plane_count > 1) {
+		if (context->stream_status[i].plane_count > 1)
 			return;
-		}
-		if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) {
+
+		if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
 			stream_count++;
-		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 			if (dcn32_allow_subvp_high_refresh_rate(dc, context, pipe_ctx)) {
 
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
@@ -290,7 +290,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-			if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx)) {
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
 					if (pipe_segments[i] > 4)
 						pipe_segments[i] = 4;
@@ -337,14 +337,14 @@ void dcn32_determine_det_override(struct dc *dc,
 
 	for (i = 0; i < context->stream_count; i++) {
 		/* Don't count SubVP streams for DET allocation */
-		if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM)
+		if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
 			stream_count++;
 	}
 
 	if (stream_count > 0) {
 		stream_segments = 18 / stream_count;
 		for (i = 0; i < context->stream_count; i++) {
-			if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+			if (dc_state_get_stream_subvp_type(context, context->streams[i]) == SUBVP_PHANTOM)
 				continue;
 
 			if (context->stream_status[i].plane_count > 0)
@@ -716,10 +716,11 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
 				resource_is_pipe_type(pipe, DPP_PIPE)) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (pipe_mall_type == SUBVP_MAIN) {
 				subvp_count++;
 
 				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
@@ -728,7 +729,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
 			}
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE) {
 				non_subvp_pipes++;
 				drr_psr_capable = (drr_psr_capable || dcn32_is_psr_capable(pipe));
 				if (pipe->stream->ignore_msa_timing_param &&
@@ -776,10 +777,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
 				resource_is_pipe_type(pipe, DPP_PIPE)) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (pipe_mall_type == SUBVP_MAIN) {
 				subvp_count++;
 
 				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
@@ -788,7 +790,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
 			}
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE) {
 				non_subvp_pipes++;
 				vblank_psr_capable = (vblank_psr_capable || dcn32_is_psr_capable(pipe));
 				if (pipe->stream->ignore_msa_timing_param &&
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index ec77b2b41ba36..1ec8ca1fdb4fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -33,6 +33,7 @@
 
 #include "link.h"
 #include "dcn20_fpu.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc->ctx->logger
@@ -1074,7 +1075,7 @@ void dcn20_calculate_dlg_params(struct dc *dc,
 		pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1424,7 +1425,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc,
 		 */
 		if (res_ctx->pipe_ctx[i].plane_state &&
 				(res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
-				 res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM))
+				dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM))
 			pipes[pipe_cnt].pipe.src.num_cursors = 0;
 		else
 			pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index de209ca0cf8cc..50d223fa1e498 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -32,6 +32,7 @@
 #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -341,7 +342,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
 		if (!pipe->stream)
 			continue;
 
-		if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			pipes[pipe_idx].pipe.dest.vstartup_start =
 				get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 			pipes[pipe_idx].pipe.dest.vupdate_offset =
@@ -624,7 +625,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc,
 		if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) &&
 				!(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) &&
 				(!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE &&
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE &&
 				(refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) &&
 				!pipe->plane_state->address.tmz_surface &&
 				(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 ||
@@ -682,7 +683,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context
 
 		// Find the minimum pipe split count for non SubVP pipes
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) {
 			split_cnt = 0;
 			while (pipe) {
 				split_cnt++;
@@ -735,8 +736,8 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
 		 * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
 		 */
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			phantom = pipe->stream->mall_stream_config.paired_stream;
+			dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			phantom = dc_state_get_paired_subvp_stream(context, pipe->stream);
 			microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
 					phantom->timing.v_addressable;
 
@@ -804,6 +805,7 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 	int16_t stretched_drr_us = 0;
 	int16_t drr_stretched_vblank_us = 0;
 	int16_t max_vblank_mallregion = 0;
+	struct dc_stream_state *phantom_stream;
 
 	// Find SubVP pipe
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -816,7 +818,7 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
@@ -829,13 +831,14 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 				!resource_is_pipe_type(pipe, DPP_PIPE))
 			continue;
 
-		if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
+		if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
 				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed))
 			break;
 	}
 
+	phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
 	main_timing = &pipe->stream->timing;
-	phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
+	phantom_timing = &phantom_stream->timing;
 	drr_timing = &drr_pipe->stream->timing;
 	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
 			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
@@ -895,6 +898,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
 	struct dc_crtc_timing *vblank_timing = NULL;
+	struct dc_stream_state *phantom_stream;
+	enum mall_stream_type pipe_mall_type;
 
 	/* For SubVP + VBLANK/DRR cases, we assume there can only be
 	 * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -904,6 +909,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 	 */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		// We check for master pipe, but it shouldn't matter since we only need
 		// the pipe for timing info (stream should be same for any pipe splits)
@@ -911,18 +917,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 				!resource_is_pipe_type(pipe, DPP_PIPE))
 			continue;
 
-		if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		if (!found && pipe_mall_type == SUBVP_NONE) {
 			// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
 			vblank_index = i;
 			found = true;
 		}
 
-		if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
 			subvp_pipe = pipe;
 	}
 	if (found) {
+		phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 		main_timing = &subvp_pipe->stream->timing;
-		phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+		phantom_timing = &phantom_stream->timing;
 		vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
 		// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
 		// Also include the prefetch end to mallstart delay time
@@ -977,7 +984,7 @@ static bool subvp_subvp_admissable(struct dc *dc,
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
 			refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 				pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 			refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
@@ -1026,23 +1033,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc,
 
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (!pipe->stream)
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+			if (pipe_mall_type == SUBVP_MAIN)
 				subvp_count++;
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE)
 				non_subvp_pipes++;
-			}
 		}
 
 		// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
 		// switching (SubVP + VACTIVE unsupported). In situations where we force
 		// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
 		if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+				pipe_mall_type == SUBVP_NONE) {
 			vactive_count++;
 		}
 		pipe_idx++;
@@ -1078,7 +1085,7 @@ static void assign_subvp_index(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
 		if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			pipe_ctx->subvp_index = index++;
 		} else {
 			pipe_ctx->subvp_index = 0;
@@ -1684,7 +1691,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
 				pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1716,7 +1723,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 				context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) &&
 				context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
 			/* SS: all active surfaces stored in MALL */
-			if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) {
 				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
 
 				if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
index e85866db80ff7..7ca7f2a743c25 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
@@ -38,5 +38,6 @@
 #include "core_types.h"
 #include "dsc.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 
 #endif //__DML2_DC_TYPES_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
index 32f8a43af3d68..0c146af34d82a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
@@ -51,7 +51,7 @@ unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx,
 
 		// Find the phantom pipes
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
 			mblk_width = ctx->config.mall_cfg.mblk_width_pixels;
 			mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels;
@@ -253,7 +253,7 @@ static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context
 		 *   to combine this with SubVP can cause issues with the scheduling).
 		 */
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 &&
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 &&
 				vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
 			while (pipe) {
 				num_pipes++;
@@ -317,7 +317,7 @@ static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *st
 
 		// Find the minimum pipe split count for non SubVP pipes
 		if (pipe->stream && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) {
 			split_cnt = 0;
 			while (pipe) {
 				split_cnt++;
@@ -372,8 +372,8 @@ static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *c
 		 * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
 		 */
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			phantom = pipe->stream->mall_stream_config.paired_stream;
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
 			microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
 					phantom->timing.v_addressable;
 
@@ -435,6 +435,7 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context
 	struct pipe_ctx *pipe = NULL;
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
+	struct dc_stream_state *phantom_stream;
 	int16_t prefetch_us = 0;
 	int16_t mall_region_us = 0;
 	int16_t drr_frame_us = 0;	// nominal frame time
@@ -453,12 +454,13 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
+	phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
 	main_timing = &pipe->stream->timing;
-	phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
+	phantom_timing = &phantom_stream->timing;
 	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
 			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
 			ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us;
@@ -519,6 +521,8 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
 	struct dc_crtc_timing *vblank_timing = NULL;
+	struct dc_stream_state *phantom_stream;
+	enum mall_stream_type pipe_mall_type;
 
 	/* For SubVP + VBLANK/DRR cases, we assume there can only be
 	 * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -528,19 +532,20 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 	 */
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
 
 		// We check for master pipe, but it shouldn't matter since we only need
 		// the pipe for timing info (stream should be same for any pipe splits)
 		if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
 
-		if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		if (!found && pipe_mall_type == SUBVP_NONE) {
 			// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
 			vblank_index = i;
 			found = true;
 		}
 
-		if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
 			subvp_pipe = pipe;
 	}
 	// Use ignore_msa_timing_param flag to identify as DRR
@@ -548,8 +553,9 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 		// SUBVP + DRR case
 		schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing);
 	} else if (found) {
+		phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream);
 		main_timing = &subvp_pipe->stream->timing;
-		phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+		phantom_timing = &phantom_stream->timing;
 		vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
 		// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
 		// Also include the prefetch end to mallstart delay time
@@ -602,19 +608,20 @@ bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc
 
 	for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
 
 		if (!pipe->stream)
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+				pipe_mall_type == SUBVP_MAIN)
 			subvp_count++;
 
 		// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
 		// switching (SubVP + VACTIVE unsupported). In situations where we force
 		// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
 		if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		    pipe_mall_type == SUBVP_NONE) {
 			vactive_count++;
 		}
 		pipe_idx++;
@@ -708,14 +715,10 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state
 static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup)
 {
 	struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx];
-	struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_stream_for_sink(ref_pipe->stream->sink);
-
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+	struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream(
+			ctx->config.svp_pstate.callbacks.dc,
+			state,
+			ref_pipe->stream);
 
 	/* stream has limited viewport and small timing */
 	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -723,7 +726,10 @@ static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, s
 	memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst));
 	set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup);
 
-	ctx->config.svp_pstate.callbacks.add_stream_to_ctx(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+	ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc,
+			state,
+			phantom_stream,
+			ref_pipe->stream);
 	return phantom_stream;
 }
 
@@ -740,7 +746,10 @@ static void enable_phantom_plane(struct dml2_context *ctx,
 		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) {
 			phantom_plane = prev_phantom_plane;
 		} else {
-			phantom_plane = ctx->config.svp_pstate.callbacks.create_plane(ctx->config.svp_pstate.callbacks.dc);
+			phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane(
+					ctx->config.svp_pstate.callbacks.dc,
+					state,
+					curr_pipe->plane_state);
 		}
 
 		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
@@ -763,9 +772,7 @@ static void enable_phantom_plane(struct dml2_context *ctx,
 		phantom_plane->clip_rect.y = 0;
 		phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
 
-		phantom_plane->is_phantom = true;
-
-		ctx->config.svp_pstate.callbacks.add_plane_to_context(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
+		ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
 
 		curr_pipe = curr_pipe->bottom_pipe;
 		prev_phantom_plane = phantom_plane;
@@ -790,7 +797,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_
 		// We determine which phantom pipes were added by comparing with
 		// the phantom stream.
 		if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
 			pipe->stream->use_dynamic_meta = false;
 			pipe->plane_state->flip_immediate = false;
 			if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) {
@@ -822,7 +829,7 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str
 		del_planes[i] = stream_status->plane_states[i];
 
 	for (i = 0; i < old_plane_count; i++)
-		if (!ctx->config.svp_pstate.callbacks.remove_plane_from_context(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
+		if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
 			return false;
 
 	return true;
@@ -838,19 +845,23 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
 		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
 			phantom_plane = pipe->plane_state;
 			phantom_stream = pipe->stream;
 
 			remove_all_planes_for_stream(ctx, pipe->stream, state);
-			ctx->config.svp_pstate.callbacks.remove_stream_from_ctx(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream);
+			ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream);
 
 			/* Ref count is incremented on allocation and also when added to the context.
 			 * Therefore we must call release for the the phantom plane and stream once
 			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
 			 */
-			ctx->config.svp_pstate.callbacks.plane_state_release(phantom_plane);
-			ctx->config.svp_pstate.callbacks.stream_release(phantom_stream);
+			ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc,
+					state,
+					phantom_plane);
+			ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc,
+					state,
+					phantom_stream);
 
 			removed_pipe = true;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 279e7605a0a2b..bc2959fda5be3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -1051,6 +1051,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 {
 	int i = 0, j = 0;
 	int disp_cfg_stream_location, disp_cfg_plane_location;
+	enum mall_stream_type stream_mall_type;
 
 	for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
 		dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false;
@@ -1071,6 +1072,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 
 	for (i = 0; i < context->stream_count; i++) {
 		disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg);
+		stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]);
 
 		if (disp_cfg_stream_location < 0)
 			disp_cfg_stream_location = dml_dispcfg->num_timings++;
@@ -1115,10 +1117,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 				populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]);
 				populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context);
 
-				if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) {
+				if (stream_mall_type == SUBVP_MAIN) {
 					dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
 					dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize;
-				} else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) {
+				} else if (stream_mall_type == SUBVP_PHANTOM) {
 					dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
 					dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable;
 					dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required;
@@ -1147,9 +1149,9 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 						break;
 					}
 
-					if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN)
+					if (stream_mall_type == SUBVP_MAIN)
 						dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
-					else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+					else if (stream_mall_type == SUBVP_PHANTOM)
 						dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
 
 					dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 814dbdcf9a787..9156e2ea55f5a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -279,6 +279,7 @@ static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, str
 void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt)
 {
 	unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id;
+	enum mall_stream_type pipe_mall_type;
 	bool unbounded_req_enabled = false;
 	struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch;
 
@@ -326,7 +327,8 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 		 */
 		populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]);
+		if (pipe_mall_type == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false;
@@ -353,7 +355,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) &&
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) {
 			/* SS: all active surfaces stored in MALL */
-			if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (pipe_mall_type != SUBVP_PHANTOM) {
 				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes;
 			} else {
 				/* SUBVP: phantom surfaces only stored in MALL */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 9d354fde6908a..638591ed057d8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -418,7 +418,7 @@ static int find_drr_eligible_stream(struct dc_state *display_state)
 	int i;
 
 	for (i = 0; i < display_state->stream_count; i++) {
-		if (display_state->streams[i]->mall_stream_config.type == SUBVP_NONE
+		if (dc_state_get_stream_subvp_type(display_state, display_state->streams[i]) == SUBVP_NONE
 			&& display_state->streams[i]->ignore_msa_timing_param) {
 			// Use ignore_msa_timing_param flag to identify as DRR
 			return i;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 0de6886969c69..505a5d7ae20dd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -93,15 +93,31 @@ struct dml2_dc_callbacks {
 struct dml2_dc_svp_callbacks {
 	struct dc *dc;
 	bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx);
-	struct dc_stream_state* (*create_stream_for_sink)(struct dc_sink *dc_sink_data);
-	struct dc_plane_state* (*create_plane)(struct dc *dc);
-	enum dc_status (*add_stream_to_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
-	bool (*add_plane_to_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	bool (*remove_plane_from_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	enum dc_status (*remove_stream_from_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *stream);
-	void (*plane_state_release)(struct dc_plane_state *plane_state);
-	void (*stream_release)(struct dc_stream_state *stream);
+	struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *main_stream);
+	struct dc_plane_state* (*create_phantom_plane)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_plane_state *main_plane);
+	enum dc_status (*add_phantom_stream)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *phantom_stream,
+			struct dc_stream_state *main_stream);
+	bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
+	bool (*remove_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
+	enum dc_status (*remove_phantom_stream)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *stream);
+	void (*release_phantom_plane)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_plane_state *plane);
+	void (*release_phantom_stream)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *stream);
 	void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc);
+	enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx);
+	enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream);
+	struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream);
 };
 
 struct dml2_clks_table_entry {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index c73fe5e9b3610..968f6ba7ccf61 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -55,6 +55,7 @@
 #include "audio.h"
 #include "reg_helper.h"
 #include "panel_cntl.h"
+#include "dc_state_priv.h"
 #include "dpcd_defs.h"
 /* include DCE11 register header files */
 #include "dce/dce_11_0_d.h"
@@ -1596,7 +1597,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	 * is constructed with the same sink). Make sure not to override
 	 * and link programming on the main.
 	 */
-	if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+	if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 		pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
 		pipe_ctx->stream->link->replay_settings.replay_feature_enabled = false;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 0d9e41315f849..96fb563b10090 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -56,6 +56,7 @@
 #include "dc_trace.h"
 #include "dce/dmub_outbox.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc_logger
@@ -115,7 +116,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
 		    !pipe_ctx->stream ||
 		    (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) ||
 		    !tg->funcs->is_tg_enabled(tg) ||
-			pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)
+			dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM)
 			continue;
 
 		if (lock)
@@ -1200,7 +1201,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
 	// Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle,
 	// so don't wait for MPCC_IDLE in the programming sequence
-	if (opp != NULL && !pipe_ctx->plane_state->is_phantom)
+	if (opp != NULL && dc_state_get_pipe_subvp_type(NULL, pipe_ctx) != SUBVP_PHANTOM)
 		opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 
 	dc->optimized_required = true;
@@ -2276,7 +2277,7 @@ void dcn10_enable_timing_synchronization(
 	DC_SYNC_INFO("Setting up OTG reset trigger\n");
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
@@ -2296,14 +2297,14 @@ void dcn10_enable_timing_synchronization(
 		if (grouped_pipes[i]->stream == NULL)
 			continue;
 
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream->vblank_synchronized = false;
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
@@ -2317,11 +2318,11 @@ void dcn10_enable_timing_synchronization(
 	 * synchronized. Look at last pipe programmed to reset.
 	 */
 
-	if (grouped_pipes[1]->stream && grouped_pipes[1]->stream->mall_stream_config.type != SUBVP_PHANTOM)
+	if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[1]) != SUBVP_PHANTOM)
 		wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg);
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
@@ -2329,7 +2330,7 @@ void dcn10_enable_timing_synchronization(
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 72c580ec650cf..0c9f4ea109cb1 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -55,6 +55,7 @@
 #include "inc/link_enc_cfg.h"
 #include "link_hwss.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc_logger
@@ -625,7 +626,7 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 
 void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
-	bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+	bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -847,7 +848,7 @@ enum dc_status dcn20_enable_stream_timing(
 	/* TODO enable stream if timing changed */
 	/* TODO unblank stream if DP */
 
-	if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+	if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
 		if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable)
 			pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg);
 	}
@@ -1370,6 +1371,9 @@ void dcn20_pipe_control_lock(
 
 static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
 {
+	bool old_is_phantom = dc_state_get_pipe_subvp_type(NULL, old_pipe) == SUBVP_PHANTOM;
+	bool new_is_phantom = dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM;
+
 	new_pipe->update_flags.raw = 0;
 
 	/* If non-phantom pipe is being transitioned to a phantom pipe,
@@ -1379,8 +1383,8 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * be different). The post_unlock sequence will set the correct
 	 * update flags to enable the phantom pipe.
 	 */
-	if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom &&
-			new_pipe->plane_state && new_pipe->plane_state->is_phantom) {
+	if (old_pipe->plane_state && !old_is_phantom &&
+			new_pipe->plane_state && new_is_phantom) {
 		new_pipe->update_flags.bits.disable = 1;
 		return;
 	}
@@ -1416,14 +1420,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * The remove-add sequence of the phantom pipe always results in the pipe
 	 * being blanked in enable_stream_timing (DPG).
 	 */
-	if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+	if (new_pipe->stream && dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM)
 		new_pipe->update_flags.bits.enable = 1;
 
 	/* Phantom pipes are effectively disabled, if the pipe was previously phantom
 	 * we have to enable
 	 */
-	if (old_pipe->plane_state && old_pipe->plane_state->is_phantom &&
-			new_pipe->plane_state && !new_pipe->plane_state->is_phantom)
+	if (old_pipe->plane_state && old_is_phantom &&
+			new_pipe->plane_state && !new_is_phantom)
 		new_pipe->update_flags.bits.enable = 1;
 
 	if (old_pipe->plane_state && !new_pipe->plane_state) {
@@ -1560,6 +1564,7 @@ static void dcn20_update_dchubp_dpp(
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct dccg *dccg = dc->res_pool->dccg;
 	bool viewport_changed = false;
+	enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx);
 
 	if (pipe_ctx->update_flags.bits.dppclk)
 		dpp->funcs->dpp_dppclk_control(dpp, false, true);
@@ -1705,7 +1710,7 @@ static void dcn20_update_dchubp_dpp(
 		pipe_ctx->update_flags.bits.plane_changed ||
 		plane_state->update_flags.bits.addr_update) {
 		if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				pipe_mall_type == SUBVP_MAIN) {
 			union block_sequence_params params;
 
 			params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
@@ -1719,7 +1724,7 @@ static void dcn20_update_dchubp_dpp(
 	if (pipe_ctx->update_flags.bits.enable)
 		hubp->funcs->set_blank(hubp, false);
 	/* If the stream paired with this plane is phantom, the plane is also phantom */
-	if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM
+	if (pipe_ctx->stream && pipe_mall_type == SUBVP_PHANTOM
 			&& hubp->funcs->phantom_hubp_post_enable)
 		hubp->funcs->phantom_hubp_post_enable(hubp);
 }
@@ -1926,15 +1931,16 @@ void dcn20_program_front_end_for_ctx(
 		struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
 
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
-			dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
 
 			if (tg->funcs->enable_crtc) {
 				if (dc->hwss.blank_phantom) {
 					int main_pipe_width, main_pipe_height;
+					struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(dc->current_state, dc->current_state->res_ctx.pipe_ctx[i].stream);
 
-					main_pipe_width = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.width;
-					main_pipe_height = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.height;
+					main_pipe_width = phantom_stream->dst.width;
+					main_pipe_height = phantom_stream->dst.height;
 					dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height);
 				}
 				tg->funcs->enable_crtc(tg);
@@ -1963,7 +1969,7 @@ void dcn20_program_front_end_for_ctx(
 			 * DET allocation.
 			 */
 			if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
-					(context->res_ctx.pipe_ctx[i].plane_state && context->res_ctx.pipe_ctx[i].plane_state->is_phantom)))
+					(context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM)))
 				hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
 			hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
 			DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
@@ -1988,7 +1994,7 @@ void dcn20_program_front_end_for_ctx(
 					 * but the MPO still exists until the double buffered update of the main pipe so we
 					 * will get a frame of underflow if the phantom pipe is programmed here.
 					 */
-					if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM)
+					if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
 						dcn20_program_pipe(dc, pipe, context);
 				}
 
@@ -2050,7 +2056,7 @@ void dcn20_post_unlock_program_front_end(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		// Don't check flip pending on phantom pipes
 		if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
-				pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
 			struct hubp *hubp = pipe->plane_res.hubp;
 			int j = 0;
 			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us
@@ -2073,7 +2079,7 @@ void dcn20_post_unlock_program_front_end(
 			 * programming sequence).
 			 */
 			while (pipe) {
-				if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 					/* When turning on the phantom pipe we want to run through the
 					 * entire enable sequence, so apply all the "enable" flags.
 					 */
@@ -2143,7 +2149,7 @@ void dcn20_prepare_bandwidth(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		// At optimize don't restore the original watermark value
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
 			context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
 			break;
 		}
@@ -2187,7 +2193,7 @@ void dcn20_optimize_bandwidth(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		// At optimize don't need  to restore the original watermark value
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
 			context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index d337578d7e734..327c227a10ead 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -51,7 +51,7 @@
 #include "dcn20/dcn20_hwseq.h"
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
-
+#include "dc_state_priv.h"
 
 
@@ -966,7 +966,7 @@ void dcn30_hardware_release(struct dc *dc)
 		if (!pipe->stream)
 			continue;
 
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_MAIN) {
 			subvp_in_use = true;
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index cb9d8389329ff..d3d16c7af38c3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -51,6 +51,7 @@
 #include "dcn32/dcn32_resource.h"
 #include "link.h"
 #include "../dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -349,7 +350,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
 		if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			// There is at least 1 SubVP pipe, so enable SubVP
 			enable_subvp = true;
 			break;
@@ -375,18 +376,20 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
 	bool subvp_immediate_flip = false;
 	bool subvp_in_use = false;
 	struct pipe_ctx *pipe;
+	enum mall_stream_type pipe_mall_type = SUBVP_NONE;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
-		if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN) {
 			subvp_in_use = true;
 			break;
 		}
 	}
 
 	if (top_pipe_to_program && top_pipe_to_program->stream && top_pipe_to_program->plane_state) {
-		if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN &&
+		if (dc_state_get_pipe_subvp_type(context, top_pipe_to_program) == SUBVP_MAIN &&
 				top_pipe_to_program->plane_state->flip_immediate)
 			subvp_immediate_flip = true;
 	}
@@ -398,7 +401,7 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
 		if (!lock) {
 			for (i = 0; i < dc->res_pool->pipe_count; i++) {
 				pipe = &context->res_ctx.pipe_ctx[i];
-				if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
+				if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN &&
 						should_lock_all_pipes)
 					pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK);
 			}
@@ -420,7 +423,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params)
 	bool subvp_immediate_flip = false;
 
 	if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) {
-		if (pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN &&
+		if (dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN &&
 				pipe_ctx->plane_state->flip_immediate)
 			subvp_immediate_flip = true;
 	}
@@ -609,7 +612,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (!pipe->stream || !(pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+		if (!pipe->stream || !(dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
 		    pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, false);
@@ -624,7 +627,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+		if (pipe->stream && (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
 				pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
@@ -671,8 +674,8 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
 			if (cursor_size > 16384)
 				cache_cursor = true;
 
-			if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-					hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
+			if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+				hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
 			} else {
 				// MALL not supported with Stereo3D
 				hubp->funcs->hubp_update_mall_sel(hubp,
@@ -714,9 +717,8 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context)
 			 *        see if CURSOR_REQ_MODE will be back to 1 for SubVP
 			 *        when it should be 0 for MPO
 			 */
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 				hubp->funcs->hubp_prepare_subvp_buffering(hubp, true);
-			}
 		}
 	}
 }
@@ -1228,7 +1230,7 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_
 			continue;
 
 		if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))
-			&& pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			&& dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) {
 			pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg);
 			reset_sync_context_for_pipe(dc, context, i);
 			otg_disabled[i] = true;
@@ -1379,7 +1381,7 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN &&
 				pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) {
 			if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) {
 
@@ -1405,7 +1407,7 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe)
 {
 	phantom_pipe->update_flags.raw = 0;
-	if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+	if (dc_state_get_pipe_subvp_type(NULL, phantom_pipe) == SUBVP_PHANTOM) {
 		if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
 			phantom_pipe->update_flags.bits.enable = 1;
 			phantom_pipe->update_flags.bits.mpcc = 1;
@@ -1491,8 +1493,8 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 		 * pipe, wait for the double buffer update to complete first before we do
 		 * ANY phantom pipe programming.
 		 */
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM &&
-				old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM &&
+				old_pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_PHANTOM) {
 			old_pipe->stream_res.tg->funcs->wait_for_state(
 					old_pipe->stream_res.tg,
 					CRTC_STATE_VBLANK);
@@ -1504,7 +1506,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) {
 			// If old context or new context has phantom pipes, apply
 			// the phantom timings now. We can't change the phantom
 			// pipe configuration safely without driver acquiring
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index f48001317fabc..56a4bc4766845 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -56,6 +56,7 @@
 #include "dcn30/dcn30_cm_common.h"
 #include "dcn31/dcn31_hwseq.h"
 #include "dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger) \
 	struct dal_logger *dc_logger = logger
@@ -948,7 +949,7 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
-	bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+	bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 0458d2d749f47..c7a00a28c3b00 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -622,5 +622,4 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 		struct pipe_ctx *pipe_ctx);
 
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream);
-
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index f6cbcc9b40069..dbcb9c5ea9af2 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -89,6 +89,8 @@
 #include "dcn20/dcn20_vmid.h"
 #include "dml/dcn32/dcn32_fpu.h"
 
+#include "dc_state_priv.h"
+
 #include "dml2/dml2_wrapper.h"
 
 #define DC_LOGGER_INIT(logger)
@@ -1644,7 +1646,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
 			phantom_plane = prev_phantom_plane;
 		else
-			phantom_plane = dc_create_plane_state(dc);
+			phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state);
 
 		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
 		memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
@@ -1665,9 +1667,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 		phantom_plane->clip_rect.y = 0;
 		phantom_plane->clip_rect.height = phantom_stream->src.height;
 
-		phantom_plane->is_phantom = true;
-
-		dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context);
+		dc_state_add_phantom_plane(dc, phantom_stream, phantom_plane, context);
 
 		curr_pipe = curr_pipe->bottom_pipe;
 		prev_phantom_plane = phantom_plane;
@@ -1683,13 +1683,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	struct dc_stream_state *phantom_stream = NULL;
 	struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
 
-	phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink);
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+	phantom_stream = dc_state_create_phantom_stream(dc, context, ref_pipe->stream);
 
 	/* stream has limited viewport and small timing */
 	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -1699,7 +1693,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx);
 	DC_FP_END();
 
-	dc_add_stream_to_ctx(dc, context, phantom_stream);
+	dc_state_add_phantom_stream(dc, context, phantom_stream, ref_pipe->stream);
 	return phantom_stream;
 }
 
@@ -1714,7 +1708,7 @@ void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
 
 		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			phantom_plane = pipe->plane_state;
 			phantom_stream = pipe->stream;
 
@@ -1735,41 +1729,33 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fa
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			phantom_plane = pipe->plane_state;
 			phantom_stream = pipe->stream;
 
-			dc_rem_all_planes_for_stream(dc, pipe->stream, context);
-			dc_remove_stream_from_ctx(dc, context, pipe->stream);
+			dc_state_rem_all_planes_for_stream(dc, pipe->stream, context);
+
+			/* For non-full updates, a shallow copy of the current state
+			 * is created. In this case we don't want to erase the current
+			 * state (there can be 2 HIRQL threads, one in flip, and one in
+			 * checkMPO) that can cause a race condition.
+			 *
+			 * This is just a workaround, needs a proper fix.
+			 */
+			if (!fast_update)
+				dc_state_remove_phantom_stream(dc, context, pipe->stream);
+			else
+				dc_state_remove_stream(dc, context, pipe->stream);
 
 			/* Ref count is incremented on allocation and also when added to the context.
 			 * Therefore we must call release for the the phantom plane and stream once
 			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
 			 */
-			dc_plane_state_release(phantom_plane);
-			dc_stream_release(phantom_stream);
+			dc_state_release_phantom_plane(dc, context, phantom_plane);
+			dc_state_release_phantom_stream(dc, context, phantom_stream);
 
 			removed_pipe = true;
 		}
-
-		/* For non-full updates, a shallow copy of the current state
-		 * is created. In this case we don't want to erase the current
-		 * state (there can be 2 HIRQL threads, one in flip, and one in
-		 * checkMPO) that can cause a race condition.
-		 *
-		 * This is just a workaround, needs a proper fix.
-		 */
-		if (!fast_update) {
-			// Clear all phantom stream info
-			if (pipe->stream) {
-				pipe->stream->mall_stream_config.type = SUBVP_NONE;
-				pipe->stream->mall_stream_config.paired_stream = NULL;
-			}
-
-			if (pipe->plane_state) {
-				pipe->plane_state->is_phantom = false;
-			}
-		}
 	}
 	return removed_pipe;
 }
@@ -1798,7 +1784,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context,
 		// We determine which phantom pipes were added by comparing with
 		// the phantom stream.
 		if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			pipe->stream->use_dynamic_meta = false;
 			pipe->plane_state->flip_immediate = false;
 			if (!resource_build_scaling_params(pipe)) {
@@ -1933,7 +1919,7 @@ int dcn32_populate_dml_pipes_from_context(
 		 * This is just a workaround -- needs a proper fix.
 		 */
 		if (!fast_validate) {
-			switch (pipe->stream->mall_stream_config.type) {
+			switch (dc_state_get_pipe_subvp_type(context, pipe)) {
 			case SUBVP_MAIN:
 				pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
 				subvp_in_use = true;
@@ -2454,16 +2440,19 @@ static bool dcn32_resource_construct(
 	dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head;
 
 	dc->dml2_options.svp_pstate.callbacks.dc = dc;
-	dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context;
-	dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
-	dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state;
-	dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context;
-	dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx;
-	dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink;
-	dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release;
-	dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+	dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
 
 	dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
 	dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 12986fe0b2892..8c278c2fd2870 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -92,6 +92,8 @@
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 
+#include "dc_state_priv.h"
+
 #define DC_LOGGER_INIT(logger)
 
 enum dcn321_clk_src_array_id {
@@ -2008,16 +2010,19 @@ static bool dcn321_resource_construct(
 	dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head;
 
 	dc->dml2_options.svp_pstate.callbacks.dc = dc;
-	dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context;
-	dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
-	dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state;
-	dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context;
-	dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx;
-	dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink;
-	dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release;
-	dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+	dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
 
 	dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
 	dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 4e1db842b98cc..d0c3c5b470949 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -102,6 +102,8 @@
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 
+#include "dc_state_priv.h"
+
 #include "link_enc_cfg.h"
 #define DC_LOGGER_INIT(logger)
 
-- 
GitLab


From 012a04b1d6af629077bf98e172d946bf893a4726 Mon Sep 17 00:00:00 2001
From: Dillon Varone <dillon.varone@amd.com>
Date: Tue, 21 Nov 2023 15:07:01 -0500
Subject: [PATCH 1263/2340] drm/amd/display: Refactor phantom resource
 allocation

[WHY?]
Phantom streams and planes were previously not referenced explcitly on creation.

[HOW?]
To reduce memory management complexity, add an additional phantom streams and planes
reference into dc_state, and move mall_stream_config to stream_status inside
the state to make it safe to modify in shallow copies. Also consildates any logic
that is affected by this change to dc_state.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Jun Lei <jun.lei@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   9 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c      |  91 +---
 .../drm/amd/display/dc/core/dc_hw_sequencer.c |  11 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c | 128 +-----
 .../gpu/drm/amd/display/dc/core/dc_state.c    | 428 ++++++++++++++++--
 .../gpu/drm/amd/display/dc/core/dc_stream.c   |  13 +-
 drivers/gpu/drm/amd/display/dc/dc.h           |  18 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |   7 +-
 drivers/gpu/drm/amd/display/dc/dc_state.h     |   6 +-
 .../gpu/drm/amd/display/dc/dc_state_priv.h    |  15 +-
 drivers/gpu/drm/amd/display/dc/dc_stream.h    |  30 +-
 .../gpu/drm/amd/display/dc/dc_stream_priv.h   |   2 +
 .../display/dc/dcn32/dcn32_resource_helpers.c |  65 ---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |   6 +-
 .../amd/display/dc/dml2/dml2_mall_phantom.c   |  30 +-
 .../drm/amd/display/dc/dml2/dml2_wrapper.h    |   5 +-
 .../amd/display/dc/hwss/dce110/dce110_hwseq.c |   7 +-
 .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c   |  25 +-
 .../amd/display/dc/hwss/dcn10/dcn10_hwseq.h   |   7 +-
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   |  27 +-
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.h   |   2 +-
 .../amd/display/dc/hwss/dcn201/dcn201_hwseq.c |   8 +-
 .../amd/display/dc/hwss/dcn201/dcn201_hwseq.h |   2 +-
 .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c   |  42 +-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   |   8 +-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.h   |   2 +-
 .../drm/amd/display/dc/hwss/hw_sequencer.h    |   8 +-
 .../display/dc/hwss/hw_sequencer_private.h    |   1 +
 .../gpu/drm/amd/display/dc/inc/core_types.h   |  20 +-
 .../gpu/drm/amd/display/dc/inc/hw/hw_shared.h |   1 +
 .../dc/resource/dcn32/dcn32_resource.c        |  85 ----
 .../dc/resource/dcn32/dcn32_resource.h        |  14 -
 .../dc/resource/dcn321/dcn321_resource.c      |   4 -
 33 files changed, 578 insertions(+), 549 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b3ffb25dc6b11..fc230ecc71805 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2608,12 +2608,10 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	memset(del_streams, 0, sizeof(del_streams));
 
-	context = dc_state_create(dc);
+	context = dc_state_create_current_copy(dc);
 	if (context == NULL)
 		goto context_alloc_fail;
 
-	dc_resource_state_copy_construct_current(dc, context);
-
 	/* First remove from context all streams */
 	for (i = 0; i < context->stream_count; i++) {
 		struct dc_stream_state *stream = context->streams[i];
@@ -2923,7 +2921,6 @@ static int dm_resume(void *handle)
 	dc_state_release(dm_state->context);
 	dm_state->context = dc_state_create(dm->dc);
 	/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
-	dc_resource_state_construct(dm->dc, dm_state->context);
 
 	/* Before powering on DC we need to re-initialize DMUB. */
 	dm_dmub_hw_resume(adev);
@@ -4051,14 +4048,12 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 	if (!state)
 		return -ENOMEM;
 
-	state->context = dc_state_create(adev->dm.dc);
+	state->context = dc_state_create_current_copy(adev->dm.dc);
 	if (!state->context) {
 		kfree(state);
 		return -ENOMEM;
 	}
 
-	dc_resource_state_copy_construct_current(adev->dm.dc, state->context);
-
 	drm_atomic_private_obj_init(adev_to_drm(adev),
 				    &adev->dm.atomic_obj,
 				    &state->base,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index f5250022b98e5..630a55b2c1d48 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1046,8 +1046,6 @@ static bool dc_construct(struct dc *dc,
 	if (!create_link_encoders(dc))
 		goto fail;
 
-	dc_resource_state_construct(dc, dc->current_state);
-
 	return true;
 
 fail:
@@ -1120,7 +1118,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
 	int i, j;
-	struct dc_state *dangling_context = dc_state_create(dc);
+	struct dc_state *dangling_context = dc_state_create_current_copy(dc);
 	struct dc_state *current_ctx;
 	struct pipe_ctx *pipe;
 	struct timing_generator *tg;
@@ -1128,8 +1126,6 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 	if (dangling_context == NULL)
 		return;
 
-	dc_resource_state_copy_construct(dc->current_state, dangling_context);
-
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct dc_stream_state *old_stream =
 				dc->current_state->res_ctx.pipe_ctx[i].stream;
@@ -1187,7 +1183,11 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 					tg->funcs->enable_crtc(tg);
 				}
 			}
-			dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
+
+			if (is_phantom)
+				dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true);
+			else
+				dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
 
 			if (pipe->stream && pipe->plane_state)
@@ -1562,7 +1562,7 @@ static void program_timing_sync(
 		if (group_size > 1) {
 			if (sync_type == TIMING_SYNCHRONIZABLE) {
 				dc->hwss.enable_timing_synchronization(
-					dc, group_index, group_size, pipe_set);
+					dc, ctx, group_index, group_size, pipe_set);
 			} else
 				if (sync_type == VBLANK_SYNCHRONIZABLE) {
 				dc->hwss.enable_vblanks_synchronization(
@@ -2075,12 +2075,10 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	if (handle_exit_odm2to1)
 		res = commit_minimal_transition_state(dc, dc->current_state);
 
-	context = dc_state_create(dc);
+	context = dc_state_create_current_copy(dc);
 	if (!context)
 		goto context_alloc_fail;
 
-	dc_resource_state_copy_construct_current(dc, context);
-
 	res = dc_validate_with_context(dc, set, stream_count, context, false);
 	if (res != DC_OK) {
 		BREAK_TO_DEBUGGER();
@@ -2218,7 +2216,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 			if (context->res_ctx.pipe_ctx[i].stream == NULL ||
 					context->res_ctx.pipe_ctx[i].plane_state == NULL) {
 				context->res_ctx.pipe_ctx[i].pipe_idx = i;
-				dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
+				dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]);
 			}
 
 		process_deferred_updates(dc);
@@ -2899,11 +2897,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				       update->dsc_config->num_slices_v != 0);
 
 		/* Use temporarry context for validating new DSC config */
-		struct dc_state *dsc_validate_context = dc_state_create(dc);
+		struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state);
 
 		if (dsc_validate_context) {
-			dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
-
 			stream->timing.dsc_cfg = *update->dsc_config;
 			stream->timing.flags.DSC = enable_dsc;
 			if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
@@ -3011,20 +3007,17 @@ static bool update_planes_and_stream_state(struct dc *dc,
 			new_planes[i] = srf_updates[i].surface;
 
 		/* initialize scratch memory for building context */
-		context = dc_state_create(dc);
+		context = dc_state_create_copy(dc->current_state);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return false;
 		}
 
-		dc_resource_state_copy_construct(
-				dc->current_state, context);
-
 		/* For each full update, remove all existing phantom pipes first.
 		 * Ensures that we have enough pipes for newly added MPO planes
 		 */
-		if (dc->res_pool->funcs->remove_phantom_pipes)
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+		dc_state_remove_phantom_streams_and_planes(dc, context);
+		dc_state_release_phantom_streams_and_planes(dc, context);
 
 		/*remove old surfaces from context */
 		if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) {
@@ -3059,19 +3052,6 @@ static bool update_planes_and_stream_state(struct dc *dc,
 
 	if (update_type == UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
-			/* For phantom pipes we remove and create a new set of phantom pipes
-			 * for each full update (because we don't know if we'll need phantom
-			 * pipes until after the first round of validation). However, if validation
-			 * fails we need to keep the existing phantom pipes (because we don't update
-			 * the dc->current_state).
-			 *
-			 * The phantom stream/plane refcount is decremented for validation because
-			 * we assume it'll be removed (the free comes when the dc_state is freed),
-			 * but if validation fails we have to increment back the refcount so it's
-			 * consistent.
-			 */
-			if (dc->res_pool->funcs->retain_phantom_pipes)
-				dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state);
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
@@ -3390,6 +3370,7 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 {
 	int i, j;
 	struct pipe_ctx *top_pipe_to_program = NULL;
+	struct dc_stream_status *stream_status = NULL;
 	dc_z10_restore(dc);
 
 	top_pipe_to_program = resource_get_otg_master_for_stream(
@@ -3425,6 +3406,8 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 		}
 	}
 
+	stream_status = dc_state_get_stream_status(context, stream);
+
 	build_dmub_cmd_list(dc,
 			srf_updates,
 			surface_count,
@@ -3437,7 +3420,8 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 			context->dmub_cmd_count,
 			context->block_sequence,
 			&(context->block_sequence_steps),
-			top_pipe_to_program);
+			top_pipe_to_program,
+			stream_status);
 	hwss_execute_sequence(dc,
 			context->block_sequence,
 			context->block_sequence_steps);
@@ -3974,7 +3958,7 @@ static void release_minimal_transition_state(struct dc *dc,
 static struct dc_state *create_minimal_transition_state(struct dc *dc,
 		struct dc_state *base_context, struct pipe_split_policy_backup *policy)
 {
-	struct dc_state *minimal_transition_context = dc_state_create(dc);
+	struct dc_state *minimal_transition_context = NULL;
 	unsigned int i, j;
 
 	if (!dc->config.is_vmin_only_asic) {
@@ -3986,7 +3970,7 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc,
 	policy->subvp_policy = dc->debug.force_disable_subvp;
 	dc->debug.force_disable_subvp = true;
 
-	dc_resource_state_copy_construct(base_context, minimal_transition_context);
+	minimal_transition_context = dc_state_create_copy(base_context);
 
 	/* commit minimal state */
 	if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) {
@@ -4018,7 +4002,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	bool success = false;
 	struct dc_state *minimal_transition_context;
 	struct pipe_split_policy_backup policy;
-	struct mall_temp_config mall_temp_config;
 
 	/* commit based on new context */
 	/* Since all phantom pipes are removed in full validation,
@@ -4027,8 +4010,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	 * pipe as subvp/phantom will be cleared (dc copy constructor
 	 * creates a shallow copy).
 	 */
-	if (dc->res_pool->funcs->save_mall_state)
-		dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 	minimal_transition_context = create_minimal_transition_state(dc,
 			context, &policy);
 	if (minimal_transition_context) {
@@ -4041,16 +4022,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 			success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK;
 		}
 		release_minimal_transition_state(dc, minimal_transition_context, &policy);
-		if (dc->res_pool->funcs->restore_mall_state)
-			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-		/* If we do a minimal transition with plane removal and the context
-		 * has subvp we also have to retain back the phantom stream / planes
-		 * since the refcount is decremented as part of the min transition
-		 * (we commit a state with no subvp, so the phantom streams / planes
-		 * had to be removed).
-		 */
-		if (dc->res_pool->funcs->retain_phantom_pipes)
-			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 	}
 
 	if (!success) {
@@ -4383,7 +4354,6 @@ bool dc_update_planes_and_stream(struct dc *dc,
 	struct dc_state *context;
 	enum surface_update_type update_type;
 	int i;
-	struct mall_temp_config mall_temp_config;
 	struct dc_fast_update fast_update[MAX_SURFACES] = {0};
 
 	/* In cases where MPO and split or ODM are used transitions can
@@ -4427,23 +4397,10 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		 * pipe as subvp/phantom will be cleared (dc copy constructor
 		 * creates a shallow copy).
 		 */
-		if (dc->res_pool->funcs->save_mall_state)
-			dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 		if (!commit_minimal_transition_state(dc, context)) {
 			dc_state_release(context);
 			return false;
 		}
-		if (dc->res_pool->funcs->restore_mall_state)
-			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-
-		/* If we do a minimal transition with plane removal and the context
-		 * has subvp we also have to retain back the phantom stream / planes
-		 * since the refcount is decremented as part of the min transition
-		 * (we commit a state with no subvp, so the phantom streams / planes
-		 * had to be removed).
-		 */
-		if (dc->res_pool->funcs->retain_phantom_pipes)
-			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 		update_type = UPDATE_TYPE_FULL;
 	}
 
@@ -4559,14 +4516,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 
 		/* initialize scratch memory for building context */
-		context = dc_state_create(dc);
+		context = dc_state_create_copy(state);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return;
 		}
 
-		dc_resource_state_copy_construct(state, context);
-
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
 			struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -4711,7 +4666,7 @@ void dc_set_power_state(
 
 	switch (power_state) {
 	case DC_ACPI_CM_POWER_STATE_D0:
-		dc_resource_state_construct(dc, dc->current_state);
+		dc_state_construct(dc, dc->current_state);
 
 		dc_z10_restore(dc);
 
@@ -4726,7 +4681,7 @@ void dc_set_power_state(
 	default:
 		ASSERT(dc->current_state->stream_count == 0);
 
-		dc_resource_state_destruct(dc->current_state);
+		dc_state_destruct(dc->current_state);
 
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 170bad6f33c7b..48fdfacc413aa 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -473,7 +473,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 		unsigned int dmub_cmd_count,
 		struct block_sequence block_sequence[],
 		int *num_steps,
-		struct pipe_ctx *pipe_ctx)
+		struct pipe_ctx *pipe_ctx,
+		struct dc_stream_status *stream_status)
 {
 	struct dc_plane_state *plane = pipe_ctx->plane_state;
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -490,7 +491,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true;
-		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+				plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
 		block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
 		(*num_steps)++;
 	}
@@ -529,7 +531,7 @@ void hwss_build_fast_sequence(struct dc *dc,
 			}
 			if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
 				if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) &&
-						dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN) {
+						stream_status->mall_stream_config.type == SUBVP_MAIN) {
 					block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.addr = &current_mpc_pipe->plane_state->address;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index;
@@ -612,7 +614,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false;
-		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+				plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
 		block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
 		(*num_steps)++;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 4a6a7d7557e07..716b59bd03b65 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -42,6 +42,7 @@
 #include "link_enc_cfg.h"
 #include "link.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 #include "virtual/virtual_link_hwss.h"
 #include "link/hwss/link_hwss_dio.h"
 #include "link/hwss/link_hwss_dpia.h"
@@ -3523,34 +3524,6 @@ enum dc_status resource_map_pool_resources(
 	return DC_ERROR_UNEXPECTED;
 }
 
-/**
- * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
- *
- * @dc: copy out of dc->current_state
- * @dst_ctx: copy into this
- *
- * This function makes a shallow copy of the current DC state and increments
- * refcounts on existing streams and planes.
- */
-void dc_resource_state_copy_construct_current(
-		const struct dc *dc,
-		struct dc_state *dst_ctx)
-{
-	dc_resource_state_copy_construct(dc->current_state, dst_ctx);
-}
-
-
-void dc_resource_state_construct(
-		const struct dc *dc,
-		struct dc_state *dst_ctx)
-{
-	dst_ctx->clk_mgr = dc->clk_mgr;
-
-	/* Initialise DIG link encoder resource tracking variables. */
-	link_enc_cfg_init(dc, dst_ctx);
-}
-
-
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
 {
 	if (dc->res_pool == NULL)
@@ -3724,6 +3697,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 						       unchanged_streams[i],
 						       set,
 						       set_count)) {
+
 			if (!dc_state_rem_all_planes_for_stream(dc,
 							  unchanged_streams[i],
 							  context)) {
@@ -3746,12 +3720,24 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 			}
 		}
 
-		if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
-			res = DC_FAIL_DETACH_SURFACES;
-			goto fail;
+		if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) {
+			/* remove phantoms specifically */
+			if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+
+			res = dc_state_remove_phantom_stream(dc, context, del_streams[i]);
+			dc_state_release_phantom_stream(dc, context, del_streams[i]);
+		} else {
+			if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+
+			res = dc_state_remove_stream(dc, context, del_streams[i]);
 		}
 
-		res = dc_state_remove_stream(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -4280,84 +4266,6 @@ static void set_vtem_info_packet(
 	*info_packet = stream->vtem_infopacket;
 }
 
-void dc_resource_state_destruct(struct dc_state *context)
-{
-	int i, j;
-
-	for (i = 0; i < context->stream_count; i++) {
-		for (j = 0; j < context->stream_status[i].plane_count; j++)
-			dc_plane_state_release(
-				context->stream_status[i].plane_states[j]);
-
-		context->stream_status[i].plane_count = 0;
-		dc_stream_release(context->streams[i]);
-		context->streams[i] = NULL;
-	}
-	context->stream_count = 0;
-	context->stream_mask = 0;
-	memset(&context->res_ctx, 0, sizeof(context->res_ctx));
-	memset(&context->pp_display_cfg, 0, sizeof(context->pp_display_cfg));
-	memset(&context->dcn_bw_vars, 0, sizeof(context->dcn_bw_vars));
-	context->clk_mgr = NULL;
-	memset(&context->bw_ctx.bw, 0, sizeof(context->bw_ctx.bw));
-	memset(context->block_sequence, 0, sizeof(context->block_sequence));
-	context->block_sequence_steps = 0;
-	memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd));
-	context->dmub_cmd_count = 0;
-	memset(&context->perf_params, 0, sizeof(context->perf_params));
-	memset(&context->scratch, 0, sizeof(context->scratch));
-}
-
-void dc_resource_state_copy_construct(
-		const struct dc_state *src_ctx,
-		struct dc_state *dst_ctx)
-{
-	int i, j;
-	struct kref refcount = dst_ctx->refcount;
-#ifdef CONFIG_DRM_AMD_DC_FP
-	struct dml2_context *dml2 = NULL;
-
-	// Need to preserve allocated dml2 context
-	if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
-		dml2 = dst_ctx->bw_ctx.dml2;
-#endif
-
-	*dst_ctx = *src_ctx;
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	// Preserve allocated dml2 context
-	if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
-		dst_ctx->bw_ctx.dml2 = dml2;
-#endif
-
-	for (i = 0; i < MAX_PIPES; i++) {
-		struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i];
-
-		if (cur_pipe->top_pipe)
-			cur_pipe->top_pipe =  &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
-		if (cur_pipe->bottom_pipe)
-			cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
-		if (cur_pipe->next_odm_pipe)
-			cur_pipe->next_odm_pipe =  &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
-		if (cur_pipe->prev_odm_pipe)
-			cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-	}
-
-	for (i = 0; i < dst_ctx->stream_count; i++) {
-		dc_stream_retain(dst_ctx->streams[i]);
-		for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++)
-			dc_plane_state_retain(
-				dst_ctx->stream_status[i].plane_states[j]);
-	}
-
-	/* context refcount should not be overridden */
-	dst_ctx->refcount = refcount;
-
-}
-
 struct clock_source *dc_resource_find_first_free_pll(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index ec5b0d114c7c2..66a70a60c479a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -31,6 +31,7 @@
 
 #include "dm_services.h"
 #include "resource.h"
+#include "link_enc_cfg.h"
 
 #include "dml2/dml2_wrapper.h"
 #include "dml2/dml2_internal_types.h"
@@ -39,7 +40,144 @@
 	dc->ctx->logger
 #define DC_LOGGER_INIT(logger)
 
-/* Public dc_state functions */
+/* Private dc_state helper functions */
+static bool dc_state_track_phantom_stream(struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	if (state->phantom_stream_count >= MAX_PHANTOM_PIPES)
+		return false;
+
+	state->phantom_streams[state->phantom_stream_count++] = phantom_stream;
+
+	return true;
+}
+
+static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+	bool res = false;
+	int i;
+
+	/* first find phantom stream in the dc_state */
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		if (state->phantom_streams[i] == phantom_stream) {
+			state->phantom_streams[i] = NULL;
+			res = true;
+			break;
+		}
+	}
+
+	/* failed to find stream in state */
+	if (!res)
+		return res;
+
+	/* trim back phantom streams */
+	state->phantom_stream_count--;
+	for (; i < state->phantom_stream_count; i++)
+		state->phantom_streams[i] = state->phantom_streams[i + 1];
+
+	return res;
+}
+
+static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		if (state->phantom_streams[i] == phantom_stream)
+			return true;
+	}
+
+	return false;
+}
+
+static bool dc_state_track_phantom_plane(struct dc_state *state,
+		struct dc_plane_state *phantom_plane)
+{
+	if (state->phantom_plane_count >= MAX_PHANTOM_PIPES)
+		return false;
+
+	state->phantom_planes[state->phantom_plane_count++] = phantom_plane;
+
+	return true;
+}
+
+static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+	bool res = false;
+	int i;
+
+	/* first find phantom plane in the dc_state */
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		if (state->phantom_planes[i] == phantom_plane) {
+			state->phantom_planes[i] = NULL;
+			res = true;
+			break;
+		}
+	}
+
+	/* failed to find plane in state */
+	if (!res)
+		return res;
+
+	/* trim back phantom planes */
+	state->phantom_plane_count--;
+	for (; i < state->phantom_plane_count; i++)
+		state->phantom_planes[i] = state->phantom_planes[i + 1];
+
+	return res;
+}
+
+static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		if (state->phantom_planes[i] == phantom_plane)
+			return true;
+	}
+
+	return false;
+}
+
+static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state)
+{
+	int i, j;
+
+	memcpy(dst_state, src_state, sizeof(struct dc_state));
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i];
+
+		if (cur_pipe->top_pipe)
+			cur_pipe->top_pipe =  &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+
+		if (cur_pipe->bottom_pipe)
+			cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+
+		if (cur_pipe->prev_odm_pipe)
+			cur_pipe->prev_odm_pipe =  &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+
+		if (cur_pipe->next_odm_pipe)
+			cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+	}
+
+	/* retain phantoms */
+	for (i = 0; i < dst_state->phantom_stream_count; i++)
+		dc_stream_retain(dst_state->phantom_streams[i]);
+
+	for (i = 0; i < dst_state->phantom_plane_count; i++)
+		dc_plane_state_retain(dst_state->phantom_planes[i]);
+
+	/* retain streams and planes */
+	for (i = 0; i < dst_state->stream_count; i++) {
+		dc_stream_retain(dst_state->streams[i]);
+		for (j = 0; j < dst_state->stream_status[i].plane_count; j++)
+			dc_plane_state_retain(
+					dst_state->stream_status[i].plane_states[j]);
+	}
+
+}
+
 static void init_state(struct dc *dc, struct dc_state *state)
 {
 	/* Each context must have their own instance of VBA and in order to
@@ -49,6 +187,7 @@ static void init_state(struct dc *dc, struct dc_state *state)
 	memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
 }
 
+/* Public dc_state functions */
 struct dc_state *dc_state_create(struct dc *dc)
 {
 	struct dc_state *state = kvzalloc(sizeof(struct dc_state),
@@ -58,6 +197,7 @@ struct dc_state *dc_state_create(struct dc *dc)
 		return NULL;
 
 	init_state(dc, state);
+	dc_state_construct(dc, state);
 
 #ifdef CONFIG_DRM_AMD_DC_FP
 	if (dc->debug.using_dml2)
@@ -69,61 +209,112 @@ struct dc_state *dc_state_create(struct dc *dc)
 	return state;
 }
 
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state)
+{
+	struct kref refcount = dst_state->refcount;
+
+	dc_state_copy_internal(dst_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (src_state->bw_ctx.dml2)
+		dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2);
+#endif
+
+	/* context refcount should not be overridden */
+	dst_state->refcount = refcount;
+}
+
 struct dc_state *dc_state_create_copy(struct dc_state *src_state)
 {
-	int i, j;
-	struct dc_state *new_state = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
+	struct dc_state *new_state;
 
+	new_state = kvmalloc(sizeof(struct dc_state),
+			GFP_KERNEL);
 	if (!new_state)
 		return NULL;
 
-	memcpy(new_state, src_state, sizeof(struct dc_state));
+	dc_state_copy_internal(new_state, src_state);
 
 #ifdef CONFIG_DRM_AMD_DC_FP
-	if (new_state->bw_ctx.dml2 && !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
+	if (src_state->bw_ctx.dml2 &&
+			!dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
 		dc_state_release(new_state);
 		return NULL;
 	}
 #endif
 
-	for (i = 0; i < MAX_PIPES; i++) {
-		struct pipe_ctx *cur_pipe = &new_state->res_ctx.pipe_ctx[i];
+	kref_init(&new_state->refcount);
 
-		if (cur_pipe->top_pipe)
-			cur_pipe->top_pipe =  &new_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+	return new_state;
+}
 
-		if (cur_pipe->bottom_pipe)
-			cur_pipe->bottom_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state)
+{
+	dc_state_copy(dst_state, dc->current_state);
+}
 
-		if (cur_pipe->prev_odm_pipe)
-			cur_pipe->prev_odm_pipe =  &new_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+struct dc_state *dc_state_create_current_copy(struct dc *dc)
+{
+	return dc_state_create_copy(dc->current_state);
+}
 
-		if (cur_pipe->next_odm_pipe)
-			cur_pipe->next_odm_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-	}
+void dc_state_construct(struct dc *dc, struct dc_state *state)
+{
+	state->clk_mgr = dc->clk_mgr;
 
-	for (i = 0; i < new_state->stream_count; i++) {
-		dc_stream_retain(new_state->streams[i]);
-		for (j = 0; j < new_state->stream_status[i].plane_count; j++)
-			dc_plane_state_retain(
-					new_state->stream_status[i].plane_states[j]);
+	/* Initialise DIG link encoder resource tracking variables. */
+	link_enc_cfg_init(dc, state);
+}
+
+void dc_state_destruct(struct dc_state *state)
+{
+	int i, j;
+
+	for (i = 0; i < state->stream_count; i++) {
+		for (j = 0; j < state->stream_status[i].plane_count; j++)
+			dc_plane_state_release(
+					state->stream_status[i].plane_states[j]);
+
+		state->stream_status[i].plane_count = 0;
+		dc_stream_release(state->streams[i]);
+		state->streams[i] = NULL;
 	}
+	state->stream_count = 0;
 
-	kref_init(&new_state->refcount);
+	/* release tracked phantoms */
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		dc_stream_release(state->phantom_streams[i]);
+		state->phantom_streams[i] = NULL;
+	}
 
-	return new_state;
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		dc_plane_state_release(state->phantom_planes[i]);
+		state->phantom_planes[i] = NULL;
+	}
+	state->stream_mask = 0;
+	memset(&state->res_ctx, 0, sizeof(state->res_ctx));
+	memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
+	memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars));
+	state->clk_mgr = NULL;
+	memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw));
+	memset(state->block_sequence, 0, sizeof(state->block_sequence));
+	state->block_sequence_steps = 0;
+	memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd));
+	state->dmub_cmd_count = 0;
+	memset(&state->perf_params, 0, sizeof(state->perf_params));
+	memset(&state->scratch, 0, sizeof(state->scratch));
 }
 
-void dc_state_retain(struct dc_state *context)
+void dc_state_retain(struct dc_state *state)
 {
-	kref_get(&context->refcount);
+	kref_get(&state->refcount);
 }
 
 static void dc_state_free(struct kref *kref)
 {
 	struct dc_state *state = container_of(kref, struct dc_state, refcount);
 
-	dc_resource_state_destruct(state);
+	dc_state_destruct(state);
 
 #ifdef CONFIG_DRM_AMD_DC_FP
 	dml2_destroy(state->bw_ctx.dml2);
@@ -403,35 +594,65 @@ struct dc_stream_status *dc_state_get_stream_status(
 enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
 		const struct pipe_ctx *pipe_ctx)
 {
-	if (pipe_ctx->stream == NULL)
-		return SUBVP_NONE;
-
-	return pipe_ctx->stream->mall_stream_config.type;
+	return dc_state_get_stream_subvp_type(state, pipe_ctx->stream);
 }
 
 enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
 		const struct dc_stream_state *stream)
 {
-	return stream->mall_stream_config.type;
+	int i;
+
+	enum mall_stream_type type = SUBVP_NONE;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (state->streams[i] == stream) {
+			type = state->stream_status[i].mall_stream_config.type;
+			break;
+		}
+	}
+
+	return type;
 }
 
 struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
 		const struct dc_stream_state *stream)
 {
-	return stream->mall_stream_config.paired_stream;
+	int i;
+
+	struct dc_stream_state *paired_stream = NULL;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (state->streams[i] == stream) {
+			paired_stream = state->stream_status[i].mall_stream_config.paired_stream;
+			break;
+		}
+	}
+
+	return paired_stream;
 }
 
 struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
 		struct dc_state *state,
 		struct dc_stream_state *main_stream)
 {
-	struct dc_stream_state *phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+	struct dc_stream_state *phantom_stream;
 
-	if (phantom_stream != NULL) {
-		phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-		phantom_stream->dpms_off = true;
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+
+	if (!phantom_stream) {
+		DC_LOG_ERROR("Failed to allocate phantom stream.\n");
+		return NULL;
 	}
 
+	/* track phantom stream in dc_state */
+	dc_state_track_phantom_stream(state, phantom_stream);
+
+	phantom_stream->is_phantom = true;
+	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
+	phantom_stream->dpms_off = true;
+
 	return phantom_stream;
 }
 
@@ -439,6 +660,13 @@ void dc_state_release_phantom_stream(const struct dc *dc,
 		struct dc_state *state,
 		struct dc_stream_state *phantom_stream)
 {
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!dc_state_untrack_phantom_stream(state, phantom_stream)) {
+		DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state);
+		return;
+	}
+
 	dc_stream_release(phantom_stream);
 }
 
@@ -448,8 +676,17 @@ struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
 {
 	struct dc_plane_state *phantom_plane = dc_create_plane_state(dc);
 
-	if (phantom_plane != NULL)
-		phantom_plane->is_phantom = true;
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!phantom_plane) {
+		DC_LOG_ERROR("Failed to allocate phantom plane.\n");
+		return NULL;
+	}
+
+	/* track phantom inside dc_state */
+	dc_state_track_phantom_plane(state, phantom_plane);
+
+	phantom_plane->is_phantom = true;
 
 	return phantom_plane;
 }
@@ -458,6 +695,13 @@ void dc_state_release_phantom_plane(const struct dc *dc,
 		struct dc_state *state,
 		struct dc_plane_state *phantom_plane)
 {
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!dc_state_untrack_phantom_plane(state, phantom_plane)) {
+		DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state);
+		return;
+	}
+
 	dc_plane_state_release(phantom_plane);
 }
 
@@ -467,13 +711,23 @@ enum dc_status dc_state_add_phantom_stream(struct dc *dc,
 		struct dc_stream_state *phantom_stream,
 		struct dc_stream_state *main_stream)
 {
+	struct dc_stream_status *main_stream_status;
+	struct dc_stream_status *phantom_stream_status;
 	enum dc_status res = dc_state_add_stream(dc, state, phantom_stream);
 
+	/* check if stream is tracked */
+	if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) {
+		/* stream must be tracked if added to state */
+		dc_state_track_phantom_stream(state, phantom_stream);
+	}
+
 	/* setup subvp meta */
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = main_stream;
-	main_stream->mall_stream_config.type = SUBVP_MAIN;
-	main_stream->mall_stream_config.paired_stream = phantom_stream;
+	main_stream_status = dc_state_get_stream_status(state, main_stream);
+	phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+	phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM;
+	phantom_stream_status->mall_stream_config.paired_stream = main_stream;
+	main_stream_status->mall_stream_config.type = SUBVP_MAIN;
+	main_stream_status->mall_stream_config.paired_stream = phantom_stream;
 
 	return res;
 }
@@ -482,9 +736,18 @@ enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
 		struct dc_state *state,
 		struct dc_stream_state *phantom_stream)
 {
+	struct dc_stream_status *main_stream_status;
+	struct dc_stream_status *phantom_stream_status;
+
 	/* reset subvp meta */
-	phantom_stream->mall_stream_config.paired_stream->mall_stream_config.type = SUBVP_NONE;
-	phantom_stream->mall_stream_config.paired_stream->mall_stream_config.paired_stream = NULL;
+	phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+	main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream);
+	phantom_stream_status->mall_stream_config.type = SUBVP_NONE;
+	phantom_stream_status->mall_stream_config.paired_stream = NULL;
+	if (main_stream_status) {
+		main_stream_status->mall_stream_config.type = SUBVP_NONE;
+		main_stream_status->mall_stream_config.paired_stream = NULL;
+	}
 
 	/* remove stream from state */
 	return dc_state_remove_stream(dc, state, phantom_stream);
@@ -496,7 +759,15 @@ bool dc_state_add_phantom_plane(
 		struct dc_plane_state *phantom_plane,
 		struct dc_state *state)
 {
-	return dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+	bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+
+	/* check if stream is tracked */
+	if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) {
+		/* stream must be tracked if added to state */
+		dc_state_track_phantom_plane(state, phantom_plane);
+	}
+
+	return res;
 }
 
 bool dc_state_remove_phantom_plane(
@@ -511,9 +782,37 @@ bool dc_state_remove_phantom_plane(
 bool dc_state_rem_all_phantom_planes_for_stream(
 		const struct dc *dc,
 		struct dc_stream_state *phantom_stream,
-		struct dc_state *state)
+		struct dc_state *state,
+		bool should_release_planes)
 {
-	return dc_state_rem_all_planes_for_stream(dc, phantom_stream, state);
+	int i, old_plane_count;
+	struct dc_stream_status *stream_status = NULL;
+	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == phantom_stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream %p not found!\n", phantom_stream);
+		return false;
+	}
+
+	old_plane_count = stream_status->plane_count;
+
+	for (i = 0; i < old_plane_count; i++)
+		del_planes[i] = stream_status->plane_states[i];
+
+	for (i = 0; i < old_plane_count; i++) {
+		if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state))
+			return false;
+		if (should_release_planes)
+			dc_state_release_phantom_plane(dc, state, del_planes[i]);
+	}
+
+	return true;
 }
 
 bool dc_state_add_all_phantom_planes_for_stream(
@@ -525,3 +824,38 @@ bool dc_state_add_all_phantom_planes_for_stream(
 {
 	return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state);
 }
+
+bool dc_state_remove_phantom_streams_and_planes(
+	struct dc *dc,
+	struct dc_state *state)
+{
+	int i;
+	bool removed_phantom = false;
+	struct dc_stream_state *phantom_stream = NULL;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+		if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+			phantom_stream = pipe->stream;
+
+			dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false);
+			dc_state_remove_phantom_stream(dc, state, phantom_stream);
+			removed_phantom = true;
+		}
+	}
+	return removed_phantom;
+}
+
+void dc_state_release_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_stream_count; i++)
+		dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]);
+
+	for (i = 0; i < state->phantom_plane_count; i++)
+		dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 86de4bf2ce138..f2b265ed7fc21 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -123,8 +123,7 @@ bool dc_stream_construct(struct dc_stream_state *stream,
 	}
 	stream->out_transfer_func->type = TF_TYPE_BYPASS;
 
-	stream->stream_id = stream->ctx->dc_stream_id_count;
-	stream->ctx->dc_stream_id_count++;
+	dc_stream_assign_stream_id(stream);
 
 	return true;
 }
@@ -138,6 +137,13 @@ void dc_stream_destruct(struct dc_stream_state *stream)
 	}
 }
 
+void dc_stream_assign_stream_id(struct dc_stream_state *stream)
+{
+	/* MSB is reserved to indicate phantoms */
+	stream->stream_id = stream->ctx->dc_stream_id_count;
+	stream->ctx->dc_stream_id_count++;
+}
+
 void dc_stream_retain(struct dc_stream_state *stream)
 {
 	kref_get(&stream->refcount);
@@ -198,8 +204,7 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 	if (new_stream->out_transfer_func)
 		dc_transfer_func_retain(new_stream->out_transfer_func);
 
-	new_stream->stream_id = new_stream->ctx->dc_stream_id_count;
-	new_stream->ctx->dc_stream_id_count++;
+	dc_stream_assign_stream_id(new_stream);
 
 	/* If using dynamic encoder assignment, wait till stream committed to assign encoder. */
 	if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign)
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index d638679cf31ae..a270b4bf7b95c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1388,6 +1388,9 @@ struct dc_surface_update {
 	const struct colorspace_transform *gamut_remap_matrix;
 };
 
+/*
+ * Create a new surface with default parameters;
+ */
 void dc_gamma_retain(struct dc_gamma *dc_gamma);
 void dc_gamma_release(struct dc_gamma **dc_gamma);
 struct dc_gamma *dc_create_gamma(void);
@@ -1451,27 +1454,12 @@ enum dc_status dc_validate_global_state(
 		struct dc_state *new_ctx,
 		bool fast_validate);
 
-
-void dc_resource_state_construct(
-		const struct dc *dc,
-		struct dc_state *dst_ctx);
-
 bool dc_acquire_release_mpc_3dlut(
 		struct dc *dc, bool acquire,
 		struct dc_stream_state *stream,
 		struct dc_3dlut **lut,
 		struct dc_transfer_func **shaper);
 
-void dc_resource_state_copy_construct(
-		const struct dc_state *src_ctx,
-		struct dc_state *dst_ctx);
-
-void dc_resource_state_copy_construct_current(
-		const struct dc *dc,
-		struct dc_state *dst_ctx);
-
-void dc_resource_state_destruct(struct dc_state *context);
-
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
 
 enum dc_status dc_commit_streams(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 704d4ff722f02..1d315f7cdce38 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -672,12 +672,14 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
 	uint32_t prefetch_delta_us = 0;
 	struct dc_stream_state *phantom_stream0 = NULL;
 	struct dc_stream_state *phantom_stream1 = NULL;
-	struct dc_crtc_timing *phantom_timing0 = &phantom_stream0->timing;
-	struct dc_crtc_timing *phantom_timing1 = &phantom_stream1->timing;
+	struct dc_crtc_timing *phantom_timing0 = NULL;
+	struct dc_crtc_timing *phantom_timing1 = NULL;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
 
 	phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream);
 	phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream);
+	phantom_timing0 = &phantom_stream0->timing;
+	phantom_timing1 = &phantom_stream1->timing;
 
 	subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
 			(uint64_t)phantom_timing0->h_total * 1000000),
@@ -853,7 +855,6 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 			 */
 			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
-					pipe->stream->mall_stream_config.paired_stream &&
 					pipe_mall_type == SUBVP_MAIN) {
 				populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
 			} else if (resource_is_pipe_type(pipe, OTG_MASTER) &&
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h
index df1a8b85a6523..d167fdbfa8a97 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_state.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_state.h
@@ -29,9 +29,13 @@
 #include "dc.h"
 #include "inc/core_status.h"
 
-
 struct dc_state *dc_state_create(struct dc *dc);
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state);
 struct dc_state *dc_state_create_copy(struct dc_state *src_state);
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state);
+struct dc_state *dc_state_create_current_copy(struct dc *dc);
+void dc_state_construct(struct dc *dc, struct dc_state *state);
+void dc_state_destruct(struct dc_state *state);
 void dc_state_retain(struct dc_state *state);
 void dc_state_release(struct dc_state *state);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
index 16c09caa13176..c1f44e09a6c1b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -29,10 +29,6 @@
 #include "dc_state.h"
 #include "dc_stream.h"
 
-struct dc_stream_status *dc_state_get_stream_status(
-	struct dc_state *state,
-	struct dc_stream_state *stream);
-
 /* Get the type of the provided resource (none, phantom, main) based on the provided
  * context. If the context is unavailable, determine only if phantom or not.
  */
@@ -85,7 +81,8 @@ bool dc_state_remove_phantom_plane(
 bool dc_state_rem_all_phantom_planes_for_stream(
 		const struct dc *dc,
 		struct dc_stream_state *phantom_stream,
-		struct dc_state *state);
+		struct dc_state *state,
+		bool should_release_planes);
 
 bool dc_state_add_all_phantom_planes_for_stream(
 		const struct dc *dc,
@@ -94,4 +91,12 @@ bool dc_state_add_all_phantom_planes_for_stream(
 		int plane_count,
 		struct dc_state *state);
 
+bool dc_state_remove_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state);
+
+void dc_state_release_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state);
+
 #endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index f617428c6a576..a23eebd9933b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -38,6 +38,14 @@ struct timing_sync_info {
 	bool master;
 };
 
+struct mall_stream_config {
+	/* MALL stream config to indicate if the stream is phantom or not.
+	 * We will use a phantom stream to indicate that the pipe is phantom.
+	 */
+	enum mall_stream_type type;
+	struct dc_stream_state *paired_stream;	// master / slave stream
+};
+
 struct dc_stream_status {
 	int primary_otg_inst;
 	int stream_enc_inst;
@@ -50,6 +58,7 @@ struct dc_stream_status {
 	struct timing_sync_info timing_sync_info;
 	struct dc_plane_state *plane_states[MAX_SURFACE_NUM];
 	bool is_abm_supported;
+	struct mall_stream_config mall_stream_config;
 };
 
 enum hubp_dmdata_mode {
@@ -146,25 +155,6 @@ struct test_pattern {
 
 #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR)
 
-struct mall_stream_config {
-	/* MALL stream config to indicate if the stream is phantom or not.
-	 * We will use a phantom stream to indicate that the pipe is phantom.
-	 */
-	enum mall_stream_type type;
-	struct dc_stream_state *paired_stream;	// master / slave stream
-};
-
-/* Temp struct used to save and restore MALL config
- * during validation.
- *
- * TODO: Move MALL config into dc_state instead of stream struct
- * to avoid needing to save/restore.
- */
-struct mall_temp_config {
-	struct mall_stream_config mall_stream_config[MAX_PIPES];
-	bool is_phantom_plane[MAX_PIPES];
-};
-
 struct dc_stream_debug_options {
 	char force_odm_combine_segments;
 };
@@ -294,7 +284,7 @@ struct dc_stream_state {
 	bool has_non_synchronizable_pclk;
 	bool vblank_synchronized;
 	bool fpo_in_use;
-	struct mall_stream_config mall_stream_config;
+	bool is_phantom;
 };
 
 #define ABM_LEVEL_IMMEDIATE_DISABLE 255
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
index 710d3b04c7e86..7476fd52ce2b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
@@ -32,4 +32,6 @@ bool dc_stream_construct(struct dc_stream_state *stream,
 	struct dc_sink *dc_sink_data);
 void dc_stream_destruct(struct dc_stream_state *stream);
 
+void dc_stream_assign_stream_id(struct dc_stream_state *stream);
+
 #endif // _DC_STREAM_PRIV_H_
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index b3d3607411756..1743ebcd6b2ee 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -430,71 +430,6 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 		dcn32_determine_det_override(dc, context, pipes);
 }
 
-/**
- * dcn32_save_mall_state(): Save MALL (SubVP) state for fast validation cases
- *
- * This function saves the MALL (SubVP) case for fast validation cases. For fast validation,
- * there are situations where a shallow copy of the dc->current_state is created for the
- * validation. In this case we want to save and restore the mall config because we always
- * teardown subvp at the beginning of validation (and don't attempt to add it back if it's
- * fast validation). If we don't restore the subvp config in cases of fast validation +
- * shallow copy of the dc->current_state, the dc->current_state will have a partially
- * removed subvp state when we did not intend to remove it.
- *
- * NOTE: This function ONLY works if the streams are not moved to a different pipe in the
- *       validation. We don't expect this to happen in fast_validation=1 cases.
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed
- * @temp_config: struct used to cache the existing MALL state
- *
- * Return: void
- */
-void dcn32_save_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream)
-			temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config;
-
-		if (pipe->plane_state)
-			temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom;
-	}
-}
-
-/**
- * dcn32_restore_mall_state(): Restore MALL (SubVP) state for fast validation cases
- *
- * Restore the MALL state based on the previously saved state from dcn32_save_mall_state
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed, restore MALL state into here
- * @temp_config: struct that has the cached MALL state
- *
- * Return: void
- */
-void dcn32_restore_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream)
-			pipe->stream->mall_stream_config = temp_config->mall_stream_config[i];
-
-		if (pipe->plane_state)
-			pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i];
-	}
-}
-
 #define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW)
 /*
  * Scaling factor for v_blank stretch calculations considering timing in
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 50d223fa1e498..10c890a9cb7c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1539,7 +1539,8 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 		// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
 		// remove phantom pipes and repopulate dml pipes
 		if (!found_supported_config) {
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+			dc_state_remove_phantom_streams_and_planes(dc, context);
+			dc_state_release_phantom_streams_and_planes(dc, context);
 			vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
 			*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
 
@@ -1937,7 +1938,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		return false;
 
 	// For each full update, remove all existing phantom pipes first
-	dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
+	dc_state_remove_phantom_streams_and_planes(dc, context);
+	dc_state_release_phantom_streams_and_planes(dc, context);
 
 	dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
index 0c146af34d82a..282d70e2b18ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
@@ -807,7 +807,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_
 	}
 }
 
-static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
+static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
 {
 	int i, old_plane_count;
 	struct dc_stream_status *stream_status = NULL;
@@ -828,9 +828,11 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str
 	for (i = 0; i < old_plane_count; i++)
 		del_planes[i] = stream_status->plane_states[i];
 
-	for (i = 0; i < old_plane_count; i++)
+	for (i = 0; i < old_plane_count; i++) {
 		if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
 			return false;
+		ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]);
+	}
 
 	return true;
 }
@@ -839,39 +841,21 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state
 {
 	int i;
 	bool removed_pipe = false;
-	struct dc_plane_state *phantom_plane = NULL;
 	struct dc_stream_state *phantom_stream = NULL;
 
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
 		// build scaling params for phantom pipes
 		if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
 			phantom_stream = pipe->stream;
 
-			remove_all_planes_for_stream(ctx, pipe->stream, state);
-			ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream);
-
-			/* Ref count is incremented on allocation and also when added to the context.
-			 * Therefore we must call release for the the phantom plane and stream once
-			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
-			 */
-			ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc,
-					state,
-					phantom_plane);
-			ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc,
-					state,
-					phantom_stream);
+			remove_all_phantom_planes_for_stream(ctx, phantom_stream, state);
+			ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+			ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
 
 			removed_pipe = true;
 		}
 
-		// Clear all phantom stream info
-		if (pipe->stream) {
-			pipe->stream->mall_stream_config.type = SUBVP_NONE;
-			pipe->stream->mall_stream_config.paired_stream = NULL;
-		}
-
 		if (pipe->plane_state) {
 			pipe->plane_state->is_phantom = false;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 505a5d7ae20dd..ee0eb184eb6d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -104,7 +104,10 @@ struct dml2_dc_svp_callbacks {
 			struct dc_stream_state *phantom_stream,
 			struct dc_stream_state *main_stream);
 	bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	bool (*remove_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
+	bool (*remove_phantom_plane)(const struct dc *dc,
+			struct dc_stream_state *stream,
+			struct dc_plane_state *plane_state,
+			struct dc_state *context);
 	enum dc_status (*remove_phantom_stream)(struct dc *dc,
 			struct dc_state *state,
 			struct dc_stream_state *stream);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 968f6ba7ccf61..762e674f9998c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1685,7 +1685,7 @@ static void disable_vga_and_power_gate_all_controllers(
 				true);
 
 		dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i;
-		dc->hwss.disable_plane(dc,
+		dc->hwss.disable_plane(dc, dc->current_state,
 			&dc->current_state->res_ctx.pipe_ctx[i]);
 	}
 }
@@ -2134,7 +2134,7 @@ static void dce110_reset_hw_ctx_wrap(
 										old_clk))
 				old_clk->funcs->cs_power_down(old_clk);
 
-			dc->hwss.disable_plane(dc, pipe_ctx_old);
+			dc->hwss.disable_plane(dc, dc->current_state, pipe_ctx_old);
 
 			pipe_ctx_old->stream = NULL;
 		}
@@ -2498,6 +2498,7 @@ static bool wait_for_reset_trigger_to_occur(
 /* Enable timing synchronization for a group of Timing Generators. */
 static void dce110_enable_timing_synchronization(
 		struct dc *dc,
+		struct dc_state *state,
 		int group_index,
 		int group_size,
 		struct pipe_ctx *grouped_pipes[])
@@ -2843,7 +2844,7 @@ static void dce110_post_unlock_program_front_end(
 {
 }
 
-static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
+static void dce110_power_down_fe(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	int fe_idx = pipe_ctx->plane_res.mi ?
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 96fb563b10090..d0247cd7833f0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -1181,7 +1181,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
@@ -1201,7 +1203,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
 	// Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle,
 	// so don't wait for MPCC_IDLE in the programming sequence
-	if (opp != NULL && dc_state_get_pipe_subvp_type(NULL, pipe_ctx) != SUBVP_PHANTOM)
+	if (opp != NULL && dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM)
 		opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 
 	dc->optimized_required = true;
@@ -1291,7 +1293,7 @@ void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -1417,12 +1419,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -2263,6 +2265,7 @@ void dcn10_enable_vblanks_synchronization(
 
 void dcn10_enable_timing_synchronization(
 	struct dc *dc,
+	struct dc_state *state,
 	int group_index,
 	int group_size,
 	struct pipe_ctx *grouped_pipes[])
@@ -2277,7 +2280,7 @@ void dcn10_enable_timing_synchronization(
 	DC_SYNC_INFO("Setting up OTG reset trigger\n");
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
@@ -2297,14 +2300,14 @@ void dcn10_enable_timing_synchronization(
 		if (grouped_pipes[i]->stream == NULL)
 			continue;
 
-		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream->vblank_synchronized = false;
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
@@ -2318,11 +2321,11 @@ void dcn10_enable_timing_synchronization(
 	 * synchronized. Look at last pipe programmed to reset.
 	 */
 
-	if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[1]) != SUBVP_PHANTOM)
+	if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[1]) != SUBVP_PHANTOM)
 		wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg);
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
@@ -3022,7 +3025,7 @@ void dcn10_post_unlock_program_front_end(
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
-			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
index ef6d56da417cd..bc5dd68a24088 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
@@ -75,7 +75,7 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn10_reset_hw_ctx_wrap(
 		struct dc *dc,
 		struct dc_state *context);
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn10_lock_all_pipes(
 		struct dc *dc,
 		struct dc_state *context,
@@ -108,13 +108,16 @@ void dcn10_power_down_on_boot(struct dc *dc);
 enum dc_status dce110_apply_ctx_to_hw(
 		struct dc *dc,
 		struct dc_state *context);
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx);
 void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data);
 void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx);
 void dce110_power_down(struct dc *dc);
 void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
 void dcn10_enable_timing_synchronization(
 		struct dc *dc,
+		struct dc_state *state,
 		int group_index,
 		int group_size,
 		struct pipe_ctx *grouped_pipes[]);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 0c9f4ea109cb1..88fe102f82882 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -624,9 +624,9 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 }
 
 
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
-	bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM;
+	bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -1369,10 +1369,13 @@ void dcn20_pipe_control_lock(
 	}
 }
 
-static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
+static void dcn20_detect_pipe_changes(struct dc_state *old_state,
+		struct dc_state *new_state,
+		struct pipe_ctx *old_pipe,
+		struct pipe_ctx *new_pipe)
 {
-	bool old_is_phantom = dc_state_get_pipe_subvp_type(NULL, old_pipe) == SUBVP_PHANTOM;
-	bool new_is_phantom = dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM;
+	bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM;
+	bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM;
 
 	new_pipe->update_flags.raw = 0;
 
@@ -1420,7 +1423,7 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * The remove-add sequence of the phantom pipe always results in the pipe
 	 * being blanked in enable_stream_timing (DPG).
 	 */
-	if (new_pipe->stream && dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM)
+	if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM)
 		new_pipe->update_flags.bits.enable = 1;
 
 	/* Phantom pipes are effectively disabled, if the pipe was previously phantom
@@ -1782,7 +1785,7 @@ static void dcn20_program_pipe(
 				pipe_ctx->pipe_dlg_param.vupdate_offset,
 				pipe_ctx->pipe_dlg_param.vupdate_width);
 
-		if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
 			pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
 
 		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
@@ -1921,7 +1924,7 @@ void dcn20_program_front_end_for_ctx(
 
 	/* Set pipe update flags and lock pipes */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
-		dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
+		dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
 				&context->res_ctx.pipe_ctx[i]);
 
 	/* When disabling phantom pipes, turn on phantom OTG first (so we can get double
@@ -1971,7 +1974,7 @@ void dcn20_program_front_end_for_ctx(
 			if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
 					(context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM)))
 				hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
-			hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 			DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
 		}
 
@@ -2044,7 +2047,7 @@ void dcn20_post_unlock_program_front_end(
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
-			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	/*
 	 * If we are enabling a pipe, we need to wait for pending clear as this is a critical
@@ -2954,7 +2957,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 		/*to do*/
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -2971,7 +2974,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
index ab02e4e9c8c29..b94c85340abff 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -52,7 +52,7 @@ void dcn20_program_output_csc(struct dc *dc,
 void dcn20_enable_stream(struct pipe_ctx *pipe_ctx);
 void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings);
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn20_disable_pixel_data(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
index d3fe6092f50e8..d5769f38874fd 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
@@ -320,7 +320,7 @@ void dcn201_init_hw(struct dc *dc)
 		res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = res_pool->opps[i];
 		/*To do: number of MPCC != number of opp*/
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -337,7 +337,7 @@ void dcn201_init_hw(struct dc *dc)
 	for (i = 0; i < res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -369,7 +369,9 @@ void dcn201_init_hw(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn201_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
index 26cd62be64181..6a50a9894be6a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
@@ -33,7 +33,7 @@ void dcn201_init_hw(struct dc *dc);
 void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings);
 void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn201_plane_atomic_disconnect(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn201_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn201_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
 void dcn201_pipe_control_lock(
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index d3d16c7af38c3..6f360f008f67a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -349,8 +349,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream &&
-				dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
+		if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			// There is at least 1 SubVP pipe, so enable SubVP
 			enable_subvp = true;
 			break;
@@ -419,14 +418,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params)
 {
 	struct dc *dc = params->subvp_pipe_control_lock_fast_params.dc;
 	bool lock = params->subvp_pipe_control_lock_fast_params.lock;
-	struct pipe_ctx *pipe_ctx = params->subvp_pipe_control_lock_fast_params.pipe_ctx;
-	bool subvp_immediate_flip = false;
-
-	if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) {
-		if (dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN &&
-				pipe_ctx->plane_state->flip_immediate)
-			subvp_immediate_flip = true;
-	}
+	bool subvp_immediate_flip = params->subvp_pipe_control_lock_fast_params.subvp_immediate_flip;
 
 	// Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared.
 	if (subvp_immediate_flip) {
@@ -1382,7 +1374,7 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN &&
-				pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) {
+				dc_state_get_paired_subvp_stream(context, pipe->stream) == phantom_pipe->stream) {
 			if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) {
 
 				phantom_plane->src_rect.x = pipe->plane_state->src_rect.x;
@@ -1407,21 +1399,19 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe)
 {
 	phantom_pipe->update_flags.raw = 0;
-	if (dc_state_get_pipe_subvp_type(NULL, phantom_pipe) == SUBVP_PHANTOM) {
-		if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
-			phantom_pipe->update_flags.bits.enable = 1;
-			phantom_pipe->update_flags.bits.mpcc = 1;
-			phantom_pipe->update_flags.bits.dppclk = 1;
-			phantom_pipe->update_flags.bits.hubp_interdependent = 1;
-			phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
-			phantom_pipe->update_flags.bits.gamut_remap = 1;
-			phantom_pipe->update_flags.bits.scaler = 1;
-			phantom_pipe->update_flags.bits.viewport = 1;
-			phantom_pipe->update_flags.bits.det_size = 1;
-			if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
-				phantom_pipe->update_flags.bits.odm = 1;
-				phantom_pipe->update_flags.bits.global_sync = 1;
-			}
+	if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
+		phantom_pipe->update_flags.bits.enable = 1;
+		phantom_pipe->update_flags.bits.mpcc = 1;
+		phantom_pipe->update_flags.bits.dppclk = 1;
+		phantom_pipe->update_flags.bits.hubp_interdependent = 1;
+		phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+		phantom_pipe->update_flags.bits.gamut_remap = 1;
+		phantom_pipe->update_flags.bits.scaler = 1;
+		phantom_pipe->update_flags.bits.viewport = 1;
+		phantom_pipe->update_flags.bits.det_size = 1;
+		if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
+			phantom_pipe->update_flags.bits.odm = 1;
+			phantom_pipe->update_flags.bits.global_sync = 1;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 56a4bc4766845..582852ed21fbf 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -814,12 +814,12 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -946,10 +946,10 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
-	bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM;
+	bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index 3837038dc4a8f..b7bafe7fe2fdb 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -57,7 +57,7 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context);
 void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
 			       struct dc_state *context);
-void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 
 void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 45dc6d4e95627..6ca8c5488d507 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -50,7 +50,7 @@ struct pg_block_update;
 struct subvp_pipe_control_lock_fast_params {
 	struct dc *dc;
 	bool lock;
-	struct pipe_ctx *pipe_ctx;
+	bool subvp_immediate_flip;
 };
 
 struct pipe_control_lock_params {
@@ -200,7 +200,7 @@ struct hw_sequencer_funcs {
 			struct dc_state *context);
 	enum dc_status (*apply_ctx_to_hw)(struct dc *dc,
 			struct dc_state *context);
-	void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 	void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank);
 	void (*apply_ctx_for_surface)(struct dc *dc,
 			const struct dc_stream_state *stream,
@@ -248,6 +248,7 @@ struct hw_sequencer_funcs {
 	void (*enable_per_frame_crtc_position_reset)(struct dc *dc,
 			int group_size, struct pipe_ctx *grouped_pipes[]);
 	void (*enable_timing_synchronization)(struct dc *dc,
+			struct dc_state *state,
 			int group_index, int group_size,
 			struct pipe_ctx *grouped_pipes[]);
 	void (*enable_vblanks_synchronization)(struct dc *dc,
@@ -474,7 +475,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 		unsigned int dmub_cmd_count,
 		struct block_sequence block_sequence[],
 		int *num_steps,
-		struct pipe_ctx *pipe_ctx);
+		struct pipe_ctx *pipe_ctx,
+		struct dc_stream_status *stream_status);
 
 void hwss_send_dmcub_cmd(union block_sequence_params *params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
index 82c5921668754..6137cf09aa54d 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -79,6 +79,7 @@ struct hwseq_private_funcs {
 	void (*update_plane_addr)(const struct dc *dc,
 			struct pipe_ctx *pipe_ctx);
 	void (*plane_atomic_disconnect)(struct dc *dc,
+			struct dc_state *state,
 			struct pipe_ctx *pipe_ctx);
 	void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx);
 	bool (*set_input_transfer_func)(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 10397d4dfb075..c3b973abb89a7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -200,11 +200,7 @@ struct resource_funcs {
 			unsigned int pipe_cnt,
             unsigned int index);
 
-	bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
-	void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
 	void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
-	void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
-	void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
 	void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx);
 };
 
@@ -526,6 +522,14 @@ struct dc_state {
 	 * @stream_status: Planes status on a given stream
 	 */
 	struct dc_stream_status stream_status[MAX_PIPES];
+	/**
+	 * @phantom_streams: Stream state properties for phantoms
+	 */
+	struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+	/**
+	 * @phantom_planes: Planes state properties for phantoms
+	 */
+	struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
 
 	/**
 	 * @stream_count: Total of streams in use
@@ -533,6 +537,14 @@ struct dc_state {
 	uint8_t stream_count;
 	uint8_t stream_mask;
 
+	/**
+	 * @stream_count: Total phantom streams in use
+	 */
+	uint8_t phantom_stream_count;
+	/**
+	 * @stream_count: Total phantom planes in use
+	 */
+	uint8_t phantom_plane_count;
 	/**
 	 * @res_ctx: Persistent state of resources
 	 */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index b95ae9596c3b1..dcae23faeee3d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -43,6 +43,7 @@
  * to be used inside loops and for determining array sizes.
  */
 #define MAX_PIPES 6
+#define MAX_PHANTOM_PIPES (MAX_PIPES / 2)
 #define MAX_DIG_LINK_ENCODERS 7
 #define MAX_DWB_PIPES	1
 #define MAX_HPO_DP2_ENCODERS	4
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index dbcb9c5ea9af2..c4d71e7f18af4 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1697,69 +1697,6 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	return phantom_stream;
 }
 
-void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
-	int i;
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_stream_state *phantom_stream = NULL;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
-			phantom_stream = pipe->stream;
-
-			dc_plane_state_retain(phantom_plane);
-			dc_stream_retain(phantom_stream);
-		}
-	}
-}
-
-// return true if removed piped from ctx, false otherwise
-bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
-{
-	int i;
-	bool removed_pipe = false;
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_stream_state *phantom_stream = NULL;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
-			phantom_stream = pipe->stream;
-
-			dc_state_rem_all_planes_for_stream(dc, pipe->stream, context);
-
-			/* For non-full updates, a shallow copy of the current state
-			 * is created. In this case we don't want to erase the current
-			 * state (there can be 2 HIRQL threads, one in flip, and one in
-			 * checkMPO) that can cause a race condition.
-			 *
-			 * This is just a workaround, needs a proper fix.
-			 */
-			if (!fast_update)
-				dc_state_remove_phantom_stream(dc, context, pipe->stream);
-			else
-				dc_state_remove_stream(dc, context, pipe->stream);
-
-			/* Ref count is incremented on allocation and also when added to the context.
-			 * Therefore we must call release for the the phantom plane and stream once
-			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
-			 */
-			dc_state_release_phantom_plane(dc, context, phantom_plane);
-			dc_state_release_phantom_stream(dc, context, phantom_stream);
-
-			removed_pipe = true;
-		}
-	}
-	return removed_pipe;
-}
-
 /* TODO: Input to this function should indicate which pipe indexes (or streams)
  * require a phantom pipe / stream
  */
@@ -1803,7 +1740,6 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val
 	int vlevel = 0;
 	int pipe_cnt = 0;
 	display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
-	struct mall_temp_config mall_temp_config;
 
 	/* To handle Freesync properly, setting FreeSync DML parameters
 	 * to its default state for the first stage of validation
@@ -1813,29 +1749,12 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val
 
 	DC_LOGGER_INIT(dc->ctx->logger);
 
-	/* For fast validation, there are situations where a shallow copy of
-	 * of the dc->current_state is created for the validation. In this case
-	 * we want to save and restore the mall config because we always
-	 * teardown subvp at the beginning of validation (and don't attempt
-	 * to add it back if it's fast validation). If we don't restore the
-	 * subvp config in cases of fast validation + shallow copy of the
-	 * dc->current_state, the dc->current_state will have a partially
-	 * removed subvp state when we did not intend to remove it.
-	 */
-	if (fast_validate) {
-		memset(&mall_temp_config, 0, sizeof(mall_temp_config));
-		dcn32_save_mall_state(dc, context, &mall_temp_config);
-	}
-
 	BW_VAL_TRACE_COUNT();
 
 	DC_FP_START();
 	out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
 	DC_FP_END();
 
-	if (fast_validate)
-		dcn32_restore_mall_state(dc, context, &mall_temp_config);
-
 	if (pipe_cnt == 0)
 		goto validate_out;
 
@@ -2023,10 +1942,6 @@ static struct resource_funcs dcn32_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
-	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
-	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
-	.save_mall_state = dcn32_save_mall_state,
-	.restore_mall_state = dcn32_restore_mall_state,
 	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 9ca799da1a56e..b27fadd452bb8 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -91,12 +91,6 @@ bool dcn32_release_post_bldn_3dlut(
 		struct dc_3dlut **lut,
 		struct dc_transfer_func **shaper);
 
-bool dcn32_remove_phantom_pipes(struct dc *dc,
-		struct dc_state *context, bool fast_update);
-
-void dcn32_retain_phantom_pipes(struct dc *dc,
-		struct dc_state *context);
-
 void dcn32_add_phantom_pipes(struct dc *dc,
 		struct dc_state *context,
 		display_e2e_pipe_params_st *pipes,
@@ -169,14 +163,6 @@ void dcn32_determine_det_override(struct dc *dc,
 void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 	display_e2e_pipe_params_st *pipes);
 
-void dcn32_save_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config);
-
-void dcn32_restore_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config);
-
 struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context);
 
 bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 8c278c2fd2870..74412e5f03fef 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1607,10 +1607,6 @@ static struct resource_funcs dcn321_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
-	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
-	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
-	.save_mall_state = dcn32_save_mall_state,
-	.restore_mall_state = dcn32_restore_mall_state,
 	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
-- 
GitLab


From b03281e925f996ffc850ad25de10f4586a8c7435 Mon Sep 17 00:00:00 2001
From: Dillon Varone <dillon.varone@amd.com>
Date: Mon, 11 Dec 2023 16:49:20 -0500
Subject: [PATCH 1264/2340] drm/amd/display: Fix null reference to state when
 getting subvp type

[WHY&HOW]
Need to provide valid pointer to dc_state when getting subvp pipe type.

Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index d0247cd7833f0..632aa091b6b6b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -2333,7 +2333,7 @@ void dcn10_enable_timing_synchronization(
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
-- 
GitLab


From 760ed918fb1f857490868e4bc91265a4d5d37f37 Mon Sep 17 00:00:00 2001
From: Dillon Varone <dillon.varone@amd.com>
Date: Wed, 13 Dec 2023 15:04:42 -0500
Subject: [PATCH 1265/2340] drm/amd/display: Create dc_state after resource
 initialization

[WHY&HOW]
After refactoring dc_state, it is always constructed at the time of its
creation. Construction can only happen after dc resources are initialized, so
move creation to be after this.

Reviewed-by: George Shen <george.shen@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 630a55b2c1d48..043913d65b162 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1025,6 +1025,15 @@ static bool dc_construct(struct dc *dc,
 	}
 #endif
 
+	if (!create_links(dc, init_params->num_virtual_links))
+		goto fail;
+
+	/* Create additional DIG link encoder objects if fewer than the platform
+	 * supports were created during link construction.
+	 */
+	if (!create_link_encoders(dc))
+		goto fail;
+
 	/* Creation of current_state must occur after dc->dml
 	 * is initialized in dc_create_resource_pool because
 	 * on creation it copies the contents of dc->dml
@@ -1037,15 +1046,6 @@ static bool dc_construct(struct dc *dc,
 		goto fail;
 	}
 
-	if (!create_links(dc, init_params->num_virtual_links))
-		goto fail;
-
-	/* Create additional DIG link encoder objects if fewer than the platform
-	 * supports were created during link construction.
-	 */
-	if (!create_link_encoders(dc))
-		goto fail;
-
 	return true;
 
 fail:
-- 
GitLab


From 08daec77fddf23cd246a0662c6dc0d60229caaee Mon Sep 17 00:00:00 2001
From: Dillon Varone <dillon.varone@amd.com>
Date: Wed, 13 Dec 2023 16:39:22 -0500
Subject: [PATCH 1266/2340] drm/amd/display: Deep copy dml2_context when
 copying dc_state

[WHY&HOW]
dml2_context should be deep copied from src to dst dc_state.

Reviewed-by: George Shen <george.shen@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_state.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 66a70a60c479a..dd52cab7ecdf8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -212,10 +212,14 @@ struct dc_state *dc_state_create(struct dc *dc)
 void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state)
 {
 	struct kref refcount = dst_state->refcount;
+#ifdef CONFIG_DRM_AMD_DC_FP
+	struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2;
+#endif
 
 	dc_state_copy_internal(dst_state, src_state);
 
 #ifdef CONFIG_DRM_AMD_DC_FP
+	dst_state->bw_ctx.dml2 = dst_dml2;
 	if (src_state->bw_ctx.dml2)
 		dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2);
 #endif
-- 
GitLab


From 5a82b8d6c05f9b30828ede1b103b9ee5cb5c912e Mon Sep 17 00:00:00 2001
From: Michael Strauss <michael.strauss@amd.com>
Date: Thu, 30 Nov 2023 10:44:36 -0500
Subject: [PATCH 1267/2340] drm/amd/display: Fix lightup regression with DP2
 single display configs

[WHY]
Previous fix for multiple displays downstream of DP2 MST hub caused regression

[HOW]
Match sink IDs instead of sink struct addresses

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Michael Strauss <michael.strauss@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 9156e2ea55f5a..8c0794af5adee 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -161,6 +161,14 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 	if (pipe_ctx->stream == NULL)
 		return false;
 
+	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
+	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
+		return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+			pipe_ctx->link_res.hpo_dp_link_enc &&
+			dc_is_dp_signal(pipe_ctx->stream->signal) &&
+			(pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id));
+	}
+
 	return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
 		pipe_ctx->link_res.hpo_dp_link_enc &&
 		dc_is_dp_signal(pipe_ctx->stream->signal));
-- 
GitLab


From dff45f03f508c92cd8eb2050e27b726726b8ae0b Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Fri, 8 Dec 2023 11:56:56 -0500
Subject: [PATCH 1268/2340] drm/amd/display: Only clear symclk otg flag for
 HDMI

[Description]
There is a corner case where the symclk otg flag is cleared
when disabling the phantom pipe for subvp (because the phantom
and main pipe share the same link). This is undesired because
we need the maintain the correct symclk otg flag state for
the main pipe.

For now only clear the flag only for HDMI signal type, since
it's only set for HDMI signal type (phantom is virtual). The
ideal solution is to not clear it if the stream is phantom but
currently there's a bug that doesn't allow us to do this. Once
this issue is fixed the proper fix can be implemented.

Reviewed-by: Samson Tam <samson.tam@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++-
 drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c   | 3 ++-
 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c   | 3 ++-
 drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c   | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 762e674f9998c..6457099ed5882 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -2125,7 +2125,8 @@ static void dce110_reset_hw_ctx_wrap(
 				BREAK_TO_DEBUGGER();
 			}
 			pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg);
-			pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+			if (dc_is_hdmi_tmds_signal(pipe_ctx_old->stream->signal))
+				pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 			pipe_ctx_old->plane_res.mi->funcs->free_mem_input(
 					pipe_ctx_old->plane_res.mi, dc->current_state->stream_count);
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 632aa091b6b6b..1c5e3bb6f0ee4 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -1058,7 +1058,8 @@ static void dcn10_reset_back_end_for_pipe(
 		if (pipe_ctx->stream_res.tg->funcs->set_drr)
 			pipe_ctx->stream_res.tg->funcs->set_drr(
 					pipe_ctx->stream_res.tg, NULL);
-		pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+		if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+			pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 88fe102f82882..0dfcb3cdcd20c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2624,7 +2624,8 @@ static void dcn20_reset_back_end_for_pipe(
 		 * the case where the same symclk is shared across multiple otg
 		 * instances
 		 */
-		link->phy_state.symclk_ref_cnts.otg = 0;
+		if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+			link->phy_state.symclk_ref_cnts.otg = 0;
 		if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) {
 			link_hwss->disable_link_output(link,
 					&pipe_ctx->link_res, pipe_ctx->stream->signal);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 994250b6f2ef8..260860c259f3a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -524,7 +524,8 @@ static void dcn31_reset_back_end_for_pipe(
 	if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
 		pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
 				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-	pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+	if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+		pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 
 	if (pipe_ctx->stream_res.tg->funcs->set_drr)
 		pipe_ctx->stream_res.tg->funcs->set_drr(
-- 
GitLab


From e7b2b108cdeab76a7e7324459e50b0c1214c0386 Mon Sep 17 00:00:00 2001
From: Ilya Bakoulin <ilya.bakoulin@amd.com>
Date: Fri, 8 Dec 2023 12:19:33 -0500
Subject: [PATCH 1269/2340] drm/amd/display: Fix hang/underflow when
 transitioning to ODM4:1

[Why]
Under some circumstances, disabling an OPTC and attempting to reclaim
its OPP(s) for a different OPTC could cause a hang/underflow due to OPPs
not being properly disconnected from the disabled OPTC.

[How]
Ensure that all OPPs are unassigned from an OPTC when it gets disabled.

Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Ilya Bakoulin <ilya.bakoulin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c | 7 +++++++
 drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index a2c4db2cebdd6..91ea0d4da06a9 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -172,6 +172,13 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 	REG_UPDATE(OTG_CONTROL,
 			OTG_MASTER_EN, 0);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index a4a39f1638cf2..08a59cf449cae 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -144,6 +144,13 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 	REG_UPDATE(OTG_CONTROL,
 			OTG_MASTER_EN, 0);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
-- 
GitLab


From 85fce153995e177ca307786b4ecf190b4daa540c Mon Sep 17 00:00:00 2001
From: Allen Pan <allen.pan@amd.com>
Date: Fri, 8 Dec 2023 18:49:19 -0500
Subject: [PATCH 1270/2340] drm/amd/display: change static screen wait
 frame_count for ips

[Why]
the original wait for 2 static frames before enter static screen
was not good enough for IPS-enabled case since enter/exit takes more time.

[How]
Changed logic for hardcoded wait frame values.

Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Allen Pan <allen.pan@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h           |  1 +
 .../gpu/drm/amd/display/dc/dcn35/dcn35_init.c |  2 +-
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 41 +++++++++++++++++++
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.h   |  4 ++
 .../dc/resource/dcn35/dcn35_resource.c        |  3 +-
 5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index a270b4bf7b95c..f622f4f0e1a0c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -982,6 +982,7 @@ struct dc_debug_options {
 	unsigned int ips2_entry_delay_us;
 	bool disable_timeout;
 	bool disable_extblankadj;
+	unsigned int static_screen_wait_frames;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
index d594905eb246f..a630aa77dcec0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
@@ -68,7 +68,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
 	.prepare_bandwidth = dcn35_prepare_bandwidth,
 	.optimize_bandwidth = dcn35_optimize_bandwidth,
 	.update_bandwidth = dcn20_update_bandwidth,
-	.set_drr = dcn10_set_drr,
+	.set_drr = dcn35_set_drr,
 	.get_position = dcn10_get_position,
 	.set_static_screen_control = dcn30_set_static_screen_control,
 	.setup_stereo = dcn10_setup_stereo,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 582852ed21fbf..ad710b4036de0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1312,3 +1312,44 @@ uint32_t dcn35_get_idle_state(const struct dc *dc)
 
 	return 0;
 }
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust)
+{
+	int i = 0;
+	struct drr_params params = {0};
+	// DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
+	unsigned int event_triggers = 0x800;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
+
+	params.vertical_total_max = adjust.v_total_max;
+	params.vertical_total_min = adjust.v_total_min;
+	params.vertical_total_mid = adjust.v_total_mid;
+	params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
+
+	for (i = 0; i < num_pipes; i++) {
+		if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
+			struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
+			struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
+
+			if (dc->debug.static_screen_wait_frames) {
+				unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total);
+
+				if (frame_rate >= 120 && dc->caps.ips_support &&
+					dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
+					/*ips enable case*/
+					num_frames = 2 * (frame_rate % 60);
+				}
+			}
+			if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
+				pipe_ctx[i]->stream_res.tg->funcs->set_drr(
+					pipe_ctx[i]->stream_res.tg, &params);
+			if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+				if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
+					pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
+						pipe_ctx[i]->stream_res.tg,
+						event_triggers, num_frames);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index b7bafe7fe2fdb..fd66316e33de3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -86,4 +86,8 @@ void dcn35_dsc_pg_control(
 
 void dcn35_set_idle_state(const struct dc *dc, bool allow_idle);
 uint32_t dcn35_get_idle_state(const struct dc *dc);
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust);
+
 #endif /* __DC_HWSS_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index d0c3c5b470949..810cfe2136322 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -780,7 +780,8 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.ignore_pg = true,
 	.psp_disabled_wa = true,
 	.ips2_eval_delay_us = 200,
-	.ips2_entry_delay_us = 400
+	.ips2_entry_delay_us = 400,
+	.static_screen_wait_frames = 2,
 };
 
 static const struct dc_panel_config panel_config_defaults = {
-- 
GitLab


From 0061080e5d1982e4dd424c4ba1d6ae20f11eb03d Mon Sep 17 00:00:00 2001
From: Anthony Koo <anthony.koo@amd.com>
Date: Sat, 9 Dec 2023 12:35:25 -0500
Subject: [PATCH 1271/2340] drm/amd/display: [FW Promotion] Release 0.0.197.0

 - Remove unused dmub_fw_boot_options flag

Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Anthony Koo <anthony.koo@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 3c092064c72ec..c64b6c848ef72 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -653,7 +653,7 @@ union dmub_fw_boot_options {
 		uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
 		uint32_t usb4_cm_version: 1; /**< 1 CM support */
 		uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
-		uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */
+		uint32_t reserved0: 1;
 		uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
 		uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
 		uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
-- 
GitLab


From 731b2f6e6be4a4946724e47c15cba1e40568ad13 Mon Sep 17 00:00:00 2001
From: Aric Cyr <aric.cyr@amd.com>
Date: Sun, 10 Dec 2023 21:20:56 -0500
Subject: [PATCH 1272/2340] drm/amd/display: 3.2.265

This DC patchset brings improvements in multiple areas. In summary, we highlight:

- change static screen wait frame_count for ips
- Fix hang/underflow when transitioning to ODM4:1
- Only clear symclk otg flag for HDMI
- Fix lightup regression with DP2 single display configs
- Refactor phantom resource allocation
- Refactor dc_state interface
- Wake DMCUB before executing GPINT commands
- Wake DMCUB before sending a command
- Refactor DMCUB enter/exit idle interface
- enable dcn35 idle power optimization
- fix usb-c connector_type
- add debug option for ExtendedVBlank DLG adjust
- Set test_pattern_changed update flag on pipe enable
- dereference variable before checking for zero
- get dprefclk ss info from integration info table
- skip error logging when DMUB is inactive from S3
- make flip_timestamp_in_us a 64-bit variable
- Add case for dcn35 to support usb4 dmub hpd event
- Add function for dumping clk registers
- Unify optimize_required flags and VRR adjustments
- Revert using channel_width as 2 for vram table 3.0
- remove HPO PG in driver side
- do not send commands to DMUB if DMUB is inactive from S3

Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Aric Cyr <aric.cyr@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index f622f4f0e1a0c..3ca72c097aa15 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -51,7 +51,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.264"
+#define DC_VER "3.2.265"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
GitLab


From 9f91e983ee82d3b6f6d713e1c84ebb8d53180b3d Mon Sep 17 00:00:00 2001
From: YiPeng Chai <YiPeng.Chai@amd.com>
Date: Tue, 12 Dec 2023 17:26:58 +0800
Subject: [PATCH 1273/2340] drm/amdgpu: MCA supports recording umc address
 information

MCA supports recording umc address information.

V2:
  Move err_addr variable from struct ras_err_node to
struct ras_err_info.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c  | 13 +++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 22 +++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h  | 13 +++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c   |  4 ++--
 8 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 210aea590a52e..8911310f98df2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -218,6 +218,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st
 int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
 {
 	struct amdgpu_smuio_mcm_config_info mcm_info;
+	struct ras_err_addr err_addr = {0};
 	struct mca_bank_set mca_set;
 	struct mca_bank_node *node;
 	struct mca_bank_entry *entry;
@@ -246,10 +247,18 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
 		mcm_info.socket_id = entry->info.socket_id;
 		mcm_info.die_id = entry->info.aid;
 
+		if (blk == AMDGPU_RAS_BLOCK__UMC) {
+			err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS];
+			err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID];
+			err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR];
+		}
+
 		if (type == AMDGPU_MCA_ERROR_TYPE_UE)
-			amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count);
+			amdgpu_ras_error_statistic_ue_count(err_data,
+				&mcm_info, &err_addr, (uint64_t)count);
 		else
-			amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count);
+			amdgpu_ras_error_statistic_ce_count(err_data,
+				&mcm_info, &err_addr, (uint64_t)count);
 	}
 
 out_mca_release:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index bacb59d8b701a..bad62141f7084 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1156,8 +1156,10 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
 		for_each_ras_error(err_node, err_data) {
 			err_info = &err_node->err_info;
 
-			amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count);
-			amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count);
+			amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+					&err_info->mcm_info, NULL, err_info->ce_count);
+			amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+					&err_info->mcm_info, NULL, err_info->ue_count);
 		}
 	} else {
 		/* for legacy asic path which doesn't has error source info */
@@ -3691,7 +3693,8 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct
 }
 
 static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
-						      struct amdgpu_smuio_mcm_config_info *mcm_info)
+				struct amdgpu_smuio_mcm_config_info *mcm_info,
+				struct ras_err_addr *err_addr)
 {
 	struct ras_err_node *err_node;
 
@@ -3705,6 +3708,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
 
 	memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
 
+	if (err_addr)
+		memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr));
+
 	err_data->err_list_count++;
 	list_add_tail(&err_node->node, &err_data->err_node_list);
 	list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
@@ -3713,7 +3719,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
 }
 
 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count)
 {
 	struct ras_err_info *err_info;
 
@@ -3723,7 +3730,7 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
 	if (!count)
 		return 0;
 
-	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
 	if (!err_info)
 		return -EINVAL;
 
@@ -3734,7 +3741,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
 }
 
 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count)
 {
 	struct ras_err_info *err_info;
 
@@ -3744,7 +3752,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
 	if (!count)
 		return 0;
 
-	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
 	if (!err_info)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6a941eb8fb8fd..76fb85628716f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -452,10 +452,17 @@ struct ras_fs_data {
 	char debugfs_name[32];
 };
 
+struct ras_err_addr {
+	uint64_t err_status;
+	uint64_t err_ipid;
+	uint64_t err_addr;
+};
+
 struct ras_err_info {
 	struct amdgpu_smuio_mcm_config_info mcm_info;
 	u64 ce_count;
 	u64 ue_count;
+	struct ras_err_addr err_addr;
 };
 
 struct ras_err_node {
@@ -806,8 +813,10 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
 int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
 void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count);
 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 9a95b9f226b85..a6c88f2fe6e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1313,10 +1313,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a
 
 	switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
 	case AMDGPU_MCA_ERROR_TYPE_UE:
-		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL);
 		break;
 	case AMDGPU_MCA_ERROR_TYPE_CE:
-		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 00b21ece081f9..131cddbdda0dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3828,8 +3828,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
 	/* the caller should make sure initialize value of
 	 * err_data->ue_count and err_data->ce_count
 	 */
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
 }
 
 static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 9b0146732e13c..fb53aacdcba20 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -652,8 +652,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
 					&ue_count);
 
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
 }
 
 static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 0f24af6f28102..2d688dca26bed 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2156,7 +2156,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
 					&ue_count);
 
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
 }
 
 static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index e9c2ff74f0bc1..8d60c39ae1c55 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -166,8 +166,8 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
 	umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count);
 	umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count);
 
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
 
 	return 0;
 }
-- 
GitLab


From a8c77a121ce12d5ce5500f5777e00e5a841ad51a Mon Sep 17 00:00:00 2001
From: YiPeng Chai <YiPeng.Chai@amd.com>
Date: Wed, 13 Dec 2023 16:14:40 +0800
Subject: [PATCH 1274/2340] drm/amdgpu: Add poison mode check error condition
 for umc v12_0

Add poison mode check error condition for umc v12_0.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c        | 20 ++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.h        |  4 ++--
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |  4 ++--
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 8d60c39ae1c55..8430888760bae 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -88,16 +88,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
 		umc_v12_0_reset_error_count_per_channel, NULL);
 }
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
 {
+	if (amdgpu_ras_is_poison_mode_supported(adev) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
+		return true;
+
 	return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
 }
 
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
 {
+	if (amdgpu_ras_is_poison_mode_supported(adev) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
+		return false;
+
 	return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
@@ -105,7 +115,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
 		/* Identify data parity error in replay mode */
 		((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
-		!(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
+		!(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
 }
 
 static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
@@ -124,7 +134,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
 	mc_umc_status =
 		RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-	if (umc_v12_0_is_correctable_error(mc_umc_status))
+	if (umc_v12_0_is_correctable_error(adev, mc_umc_status))
 		*error_count += 1;
 }
 
@@ -142,7 +152,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
 	mc_umc_status =
 		RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-	if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
+	if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))
 		*error_count += 1;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index b34b1e358f8b8..17b4b52d6f136 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -117,8 +117,8 @@
 		(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
 	} while (0)
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status);
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status);
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
 
 extern const uint32_t
 	umc_v12_0_channel_idx_tbl[]
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 3998c9b31d076..6cbd335a804b9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2524,9 +2524,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
 		return 0;
 	}
 
-	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0))
+	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0))
 		*count = 1;
-	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0))
+	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0))
 		*count = 1;
 
 	return 0;
-- 
GitLab


From 6fe08f56db798659beca41ab5b1727a31518f794 Mon Sep 17 00:00:00 2001
From: YiPeng Chai <YiPeng.Chai@amd.com>
Date: Wed, 13 Dec 2023 16:15:30 +0800
Subject: [PATCH 1275/2340] drm/amd/pm: smu v13_0_6 supports ecc info by
 default

smu v13_0_6 supports ecc info by default.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 6cbd335a804b9..81b217bbdebbb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2864,6 +2864,13 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,
 	return ret;
 }
 
+static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu,
+			void *table)
+{
+	/* Support ecc info by default */
+	return 0;
+}
+
 static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	/* init dpm */
 	.get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask,
@@ -2918,6 +2925,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.i2c_init = smu_v13_0_6_i2c_control_init,
 	.i2c_fini = smu_v13_0_6_i2c_control_fini,
 	.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
+	.get_ecc_info = smu_v13_0_6_get_ecc_info,
 };
 
 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
-- 
GitLab


From 99cab331a4ee621e3604542ca88f9d76f2865aef Mon Sep 17 00:00:00 2001
From: YiPeng Chai <YiPeng.Chai@amd.com>
Date: Tue, 12 Dec 2023 17:23:23 +0800
Subject: [PATCH 1276/2340] drm/amdgpu: Add umc page retirement for umc v12_0

Add umc page retirement for umc v12_0.

V2:
  1. Changed umc page retirement check condition
     to call umc_v12_0_is_uncorrectable_error.
  2. Use memset to clear the contents of the umc
     error address structure.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 56 ++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.h |  4 ++
 2 files changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 8430888760bae..7458a218e89db 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "umc/umc_12_0_0_offset.h"
 #include "umc/umc_12_0_0_sh_mask.h"
+#include "mp/mp_13_0_6_sh_mask.h"
 
 const uint32_t
 	umc_v12_0_channel_idx_tbl[]
@@ -370,6 +371,59 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	amdgpu_mca_smu_log_ras_error(adev,
+		AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status);
+	amdgpu_mca_smu_log_ras_error(adev,
+		AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status);
+}
+
+static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	struct ras_err_node *err_node;
+	uint64_t mc_umc_status;
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	for_each_ras_error(err_node, err_data) {
+		mc_umc_status = err_node->err_info.err_addr.err_status;
+		if (!mc_umc_status)
+			continue;
+
+		if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) {
+			uint64_t mca_addr, err_addr, mca_ipid;
+			uint32_t InstanceIdLo;
+			struct amdgpu_smuio_mcm_config_info *mcm_info;
+
+			mcm_info = &err_node->err_info.mcm_info;
+			mca_addr = err_node->err_info.err_addr.err_addr;
+			mca_ipid = err_node->err_info.err_addr.err_ipid;
+
+			err_addr =  REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+			InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
+
+			dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
+				mca_ipid,
+				mcm_info->die_id,
+				MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
+				MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
+				err_addr);
+
+			umc_v12_0_convert_error_address(adev,
+				err_data, err_addr,
+				MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
+				MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
+				mcm_info->die_id);
+
+			/* Clear umc error address content */
+			memset(&err_node->err_info.err_addr,
+				0, sizeof(err_node->err_info.err_addr));
+		}
+	}
+}
+
 static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
 {
 	amdgpu_umc_loop_channels(adev,
@@ -396,4 +450,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
 	},
 	.err_cnt_init = umc_v12_0_err_cnt_init,
 	.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
+	.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
+	.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 17b4b52d6f136..e8de3a92251a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -117,6 +117,10 @@
 		(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
 	} while (0)
 
+#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
+			(((_ipid_lo) >> 12) & 0xF))
+#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+
 bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
 bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
 
-- 
GitLab


From 87825c860eb8e4b80391c51ea1bb99e5cbac0025 Mon Sep 17 00:00:00 2001
From: ZhenGuo Yin <zhenguo.yin@amd.com>
Date: Tue, 19 Dec 2023 14:39:42 +0800
Subject: [PATCH 1277/2340] drm/amdgpu: re-create idle bo's PTE during VM state
 machine reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Idle bo's PTE needs to be re-created when resetting VM state machine.
Set idle bo's vm_bo as moved to mark it as invalid.

Fixes: 55bf196f60df ("drm/amdgpu: reset VM when an error is detected")
Signed-off-by: ZhenGuo Yin <zhenguo.yin@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7da71b6a9dc6a..b8fcb6c556989 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -285,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
 	list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = vm_bo->bo;
 
+		vm_bo->moved = true;
 		if (!bo || bo->tbo.type != ttm_bo_type_kernel)
 			list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
 		else if (bo->parent)
-- 
GitLab


From 1ef151d7aa0a36050fab8063ec35b2c7c0f9870c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 13 Jan 2023 15:09:07 -0800
Subject: [PATCH 1278/2340] drm/xe: Add documentation for mem_type

mem_type field was added in commit d8b52a02cb40 ("drm/xe: Implement
stolen memory.") to designate the TTM memory type for that mgr. Add
kernel-doc with its description.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index cf02c62ff427f..39aa2ec1b9680 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -27,7 +27,7 @@ struct xe_ttm_vram_mgr {
 	u64 default_page_size;
 	/** @lock: protects allocations of VRAM */
 	struct mutex lock;
-
+	/** @mem_type: The TTM memory type */
 	u32 mem_type;
 };
 
-- 
GitLab


From a02a0c6d53099579e3b7aa811e1e254a11681c8a Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 5 Dec 2022 21:07:56 -0800
Subject: [PATCH 1279/2340] drm/xe: Add min config for kunit integration
 ARCH=um

Some of the tests may benefit from running with ARCH=um, forgoing any
additional setup on the CI build side. Add min config for that.

Tested with:

	./tools/testing/kunit/kunit.py build \
		--kunitconfig drivers/gpu/drm/xe/.kunitconfig \
		--jobs $(nproc) \
		--build_dir build_kunit

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 drivers/gpu/drm/xe/.kunitconfig | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/.kunitconfig

diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
new file mode 100644
index 0000000000000..ce8a4348669d2
--- /dev/null
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -0,0 +1,7 @@
+# xe dependencies
+CONFIG_KUNIT=y
+CONFIG_PCI=y
+CONFIG_DRM=y
+CONFIG_DRM_XE=y
+CONFIG_EXPERT=y
+CONFIG_DRM_XE_KUNIT_TEST=y
-- 
GitLab


From 1598955dfce242113c4ba2cbdb5d4c7c28695a70 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Thu, 19 Jan 2023 21:18:44 +0100
Subject: [PATCH 1280/2340] drm/xe/Kconfig.debug: select DEBUG_FS for KUnit
 runs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KUnit reuquires debugfs, as otherwise, it won't build:

$ make ARCH=x86_64 O=.kunit --jobs=8
ERROR:root:../drivers/gpu/drm/xe/display/intel_display_debugfs.c:1612:6: error: redefinition of ‘intel_display_debugfs_register’
 1612 | void intel_display_debugfs_register(struct drm_i915_private *i915)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from ../drivers/gpu/drm/xe/display/intel_display_debugfs.c:18:
../drivers/gpu/drm/xe/display/intel_display_debugfs.h:18:20: note: previous definition of ‘intel_display_debugfs_register’ with type ‘void(struct xe_device *)’
   18 | static inline void intel_display_debugfs_register(struct drm_i915_private *i915) {}
      |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_display_debugfs.c:1935:6: error: redefinition of ‘intel_connector_debugfs_add’
 1935 | void intel_connector_debugfs_add(struct intel_connector *intel_connector)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_display_debugfs.h:19:20: note: previous definition of ‘intel_connector_debugfs_add’ with type ‘void(struct intel_connector *)’
   19 | static inline void intel_connector_debugfs_add(struct intel_connector *connector) {}
      |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_display_debugfs.c:1993:6: error: redefinition of ‘intel_crtc_debugfs_add’
 1993 | void intel_crtc_debugfs_add(struct drm_crtc *crtc)
      |      ^~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_display_debugfs.h:20:20: note: previous definition of ‘intel_crtc_debugfs_add’ with type ‘void(struct drm_crtc *)’
   20 | static inline void intel_crtc_debugfs_add(struct drm_crtc *crtc) {}
      |                    ^~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/.kunitconfig  | 1 +
 drivers/gpu/drm/xe/Kconfig.debug | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index ce8a4348669d2..aaf30db71cea3 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -1,6 +1,7 @@
 # xe dependencies
 CONFIG_KUNIT=y
 CONFIG_PCI=y
+CONFIG_DEBUG_FS=y
 CONFIG_DRM=y
 CONFIG_DRM_XE=y
 CONFIG_EXPERT=y
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index b61fd43a76fed..565be3f6b9b96 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -63,7 +63,7 @@ config DRM_XE_SIMPLE_ERROR_CAPTURE
 
 config DRM_XE_KUNIT_TEST
         tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
-	depends on DRM_XE && KUNIT
+	depends on DRM_XE && KUNIT && DEBUG_FS
 	default KUNIT_ALL_TESTS
 	select DRM_EXPORT_FOR_TESTS if m
 	help
-- 
GitLab


From a4c75c0fd613a1cfb7f5ba6b494b80b40adbc78f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Thu, 19 Jan 2023 21:26:49 +0100
Subject: [PATCH 1281/2340] drm/xe: KUnit tests depend on
 CONFIG_DRM_FBDEV_EMULATION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ERROR:root:../drivers/gpu/drm/xe/display/intel_fbdev.c:585:5: error: redefinition of ‘intel_fbdev_init’
  585 | int intel_fbdev_init(struct drm_device *dev)
      |     ^~~~~~~~~~~~~~~~
In file included from ../drivers/gpu/drm/xe/display/intel_fbdev.c:55:
../drivers/gpu/drm/xe/display/intel_fbdev.h:26:19: note: previous definition of ‘intel_fbdev_init’ with type ‘int(struct drm_device *)’
   26 | static inline int intel_fbdev_init(struct drm_device *dev)
      |                   ^~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:626:6: error: redefinition of ‘intel_fbdev_initial_config_async’
  626 | void intel_fbdev_initial_config_async(struct drm_device *dev)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:31:20: note: previous definition of ‘intel_fbdev_initial_config_async’ with type ‘void(struct drm_device *)’
   31 | static inline void intel_fbdev_initial_config_async(struct drm_device *dev)
      |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:646:6: error: redefinition of ‘intel_fbdev_unregister’
  646 | void intel_fbdev_unregister(struct drm_i915_private *dev_priv)
      |      ^~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:35:20: note: previous definition of ‘intel_fbdev_unregister’ with type ‘void(struct xe_device *)’
   35 | static inline void intel_fbdev_unregister(struct drm_i915_private *dev_priv)
      |                    ^~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:661:6: error: redefinition of ‘intel_fbdev_fini’
  661 | void intel_fbdev_fini(struct drm_i915_private *dev_priv)
      |      ^~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:39:20: note: previous definition of ‘intel_fbdev_fini’ with type ‘void(struct xe_device *)’
   39 | static inline void intel_fbdev_fini(struct drm_i915_private *dev_priv)
      |                    ^~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:692:6: error: redefinition of ‘intel_fbdev_set_suspend’
  692 | void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous)
      |      ^~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:43:20: note: previous definition of ‘intel_fbdev_set_suspend’ with type ‘void(struct drm_device *, int,  bool)’ {aka ‘void(struct drm_device *, int,  _Bool)’}
   43 | static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous)
      |                    ^~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:751:6: error: redefinition of ‘intel_fbdev_output_poll_changed’
  751 | void intel_fbdev_output_poll_changed(struct drm_device *dev)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:47:20: note: previous definition of ‘intel_fbdev_output_poll_changed’ with type ‘void(struct drm_device *)’
   47 | static inline void intel_fbdev_output_poll_changed(struct drm_device *dev)
      |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:770:6: error: redefinition of ‘intel_fbdev_restore_mode’
  770 | void intel_fbdev_restore_mode(struct drm_device *dev)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:51:20: note: previous definition of ‘intel_fbdev_restore_mode’ with type ‘void(struct drm_device *)’
   51 | static inline void intel_fbdev_restore_mode(struct drm_device *dev)
      |                    ^~~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.c:785:27: error: redefinition of ‘intel_fbdev_framebuffer’
  785 | struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev)
      |                           ^~~~~~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/xe/display/intel_fbdev.h:54:41: note: previous definition of ‘intel_fbdev_framebuffer’ with type ‘struct intel_framebuffer *(struct intel_fbdev *)’
   54 | static inline struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev)
      |                                         ^~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/.kunitconfig  | 3 +++
 drivers/gpu/drm/xe/Kconfig.debug | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index aaf30db71cea3..06ed30420a8df 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -3,6 +3,9 @@ CONFIG_KUNIT=y
 CONFIG_PCI=y
 CONFIG_DEBUG_FS=y
 CONFIG_DRM=y
+CONFIG_DRM_FBDEV_EMULATION=y
+CONFIG_DRM_KMS_HELPER=y
 CONFIG_DRM_XE=y
 CONFIG_EXPERT=y
+CONFIG_FB=y
 CONFIG_DRM_XE_KUNIT_TEST=y
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 565be3f6b9b96..9c773dd74cbdd 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -64,6 +64,7 @@ config DRM_XE_SIMPLE_ERROR_CAPTURE
 config DRM_XE_KUNIT_TEST
         tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
 	depends on DRM_XE && KUNIT && DEBUG_FS
+	depends on FB && FB = DRM_KMS_HELPER && DRM_FBDEV_EMULATION
 	default KUNIT_ALL_TESTS
 	select DRM_EXPORT_FOR_TESTS if m
 	help
-- 
GitLab


From 9484c7dce4e99a38970baebe9ffdd5d76d757f2c Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Fri, 20 Jan 2023 16:43:27 +0530
Subject: [PATCH 1282/2340] drm/xe/gt: Enable interrupt while initializing root
 gt

At present the interrupts are enabled while initializing the last GT.
But this is incorrect for a Multi-GT platform, as root GT initialization
will fail with interrupt disabled. Interrupts are required for
the GuC submission triggered during initialization.
Enable the interrupt during the root GT initialization.

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index df2e3573201df..a2caa20f2fb34 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -334,7 +334,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
 		      GEN11_GU_MISC_GSE);
 
-	if (gt->info.id + 1 == xe->info.tile_count)
+	if (gt->info.id == XE_GT0)
 		dg1_intr_enable(xe, true);
 }
 
-- 
GitLab


From 6c8c1e74faecb6ca3057f154e911a52cf6a53d32 Mon Sep 17 00:00:00 2001
From: Philippe Lecluse <philippe.lecluse@intel.com>
Date: Fri, 20 Jan 2023 16:30:25 +0100
Subject: [PATCH 1283/2340] drm/xe: Fix Meteor Lake rsa issue on guc loading

[  117.901473] xe 0000:00:02.0: [drm] GuC load failed: status = 0x400000A0
[  117.901506] xe 0000:00:02.0: [drm] GuC load failed: status: Reset = 0, BootROM = 0x50, UKernel = 0x00, MIA = 0x00, Auth = 0x01

Signed-off-by: Philippe Lecluse <philippe.lecluse@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 39df6945e1d91..61a6430cb435a 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -436,6 +436,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_hw_fence_irq;
 
+	setup_private_ppat(gt);
+
 	if (!xe_gt_is_media_type(gt)) {
 		err = xe_ggtt_init(gt, gt->mem.ggtt);
 		if (err)
@@ -488,8 +490,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_hw_fence_irq;
 
-	setup_private_ppat(gt);
-
 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
 
 	err = xe_gt_clock_init(gt);
-- 
GitLab


From b3ab1b918e59c84ddaf190f75ba93be6cdea1fcb Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 19 Jan 2023 12:41:17 -0500
Subject: [PATCH 1284/2340] drm/xe/guc_pc: Fix Meteor Lake registers.

When adding the frequency management, Meteor Lake platform
was left behind. Handling it properly now.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 70 ++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d751ee98de115..3ba0c8a351099 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -38,6 +38,9 @@
 #define GEN12_RPSTAT1		_MMIO(0x1381b4)
 #define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
 
+#define MTL_MIRROR_TARGET_WP1	_MMIO(0xc60)
+#define   MTL_CAGF_MASK		REG_GENMASK(8, 0)
+
 #define GT_FREQUENCY_MULTIPLIER	50
 #define GEN9_FREQ_SCALER	3
 
@@ -312,7 +315,20 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
 				   freq);
 }
 
-static void pc_update_rp_values(struct xe_guc_pc *pc)
+static void mtl_update_rpe_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY.reg);
+	else
+		reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY.reg);
+
+	pc->rpe_freq = REG_FIELD_GET(MTL_RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static void tgl_update_rpe_value(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
@@ -329,6 +345,17 @@ static void pc_update_rp_values(struct xe_guc_pc *pc)
 		reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg);
 
 	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static void pc_update_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_METEORLAKE)
+		mtl_update_rpe_value(pc);
+	else
+		tgl_update_rpe_value(pc);
 
 	/*
 	 * RPe is decided at runtime by PCODE. In the rare case where that's
@@ -343,6 +370,7 @@ static ssize_t freq_act_show(struct device *dev,
 {
 	struct kobject *kobj = &dev->kobj;
 	struct xe_gt *gt = kobj_to_gt(kobj);
+	struct xe_device *xe = gt_to_xe(gt);
 	u32 freq;
 	ssize_t ret;
 
@@ -355,10 +383,17 @@ static ssize_t freq_act_show(struct device *dev,
 		return ret;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
-	freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg);
+
+	if (xe->info.platform == XE_METEORLAKE) {
+		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1.reg);
+		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
+	} else {
+		freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg);
+		freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
+	}
+
 	xe_device_mem_access_put(gt_to_xe(gt));
 
-	freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
@@ -607,7 +642,24 @@ static const struct attribute *pc_attrs[] = {
 	NULL
 };
 
-static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP.reg);
+	else
+		reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP.reg);
+	pc->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, reg) *
+		GT_FREQUENCY_MULTIPLIER;
+	pc->rpn_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, reg) *
+		GT_FREQUENCY_MULTIPLIER;
+}
+
+static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
@@ -623,6 +675,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
 	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 }
 
+static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_METEORLAKE)
+		mtl_init_fused_rp_values(pc);
+	else
+		tgl_init_fused_rp_values(pc);
+}
 static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
 {
 	int ret;
-- 
GitLab


From a93bcc3acf1fdf55b1906e37744ebab9be884a5d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Fri, 20 Jan 2023 15:43:58 +0100
Subject: [PATCH 1285/2340] drm/xe: skip Kunit tests requiring real hardware
 when running on UML

Some tests are meant to run only on real hardware. Skip those,
if no device was found.

Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 49f1f0489f1c8..b61bde17f123d 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -615,16 +615,23 @@ void xe_unregister_pci_driver(void)
 }
 
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
-static int dev_to_xe_device_fn(struct device *dev, void *data)
+struct kunit_test_data {
+	int ndevs;
+	xe_device_fn xe_fn;
+};
+
+static int dev_to_xe_device_fn(struct device *dev, void *__data)
 
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	int (*xe_fn)(struct xe_device *xe) = data;
+	struct kunit_test_data *data = __data;
 	int ret = 0;
 	int idx;
 
+	data->ndevs++;
+
 	if (drm_dev_enter(drm, &idx))
-		ret = xe_fn(to_xe_device(dev_get_drvdata(dev)));
+		ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev)));
 	drm_dev_exit(idx);
 
 	return ret;
@@ -645,7 +652,18 @@ static int dev_to_xe_device_fn(struct device *dev, void *data)
  */
 int xe_call_for_each_device(xe_device_fn xe_fn)
 {
-	return driver_for_each_device(&xe_pci_driver.driver, NULL,
-				      xe_fn, dev_to_xe_device_fn);
+	int ret;
+	struct kunit_test_data data = {
+	    .xe_fn = xe_fn,
+	    .ndevs = 0,
+	};
+
+	ret = driver_for_each_device(&xe_pci_driver.driver, NULL,
+				     &data, dev_to_xe_device_fn);
+
+	if (!data.ndevs)
+		kunit_skip(current->kunit_test, "test runs only on hardware\n");
+
+	return ret;
 }
 #endif
-- 
GitLab


From 8375e58c3ac96a43603530a6f02fc81a455982e7 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Mon, 23 Jan 2023 18:17:56 +0100
Subject: [PATCH 1286/2340] drm/xe: Use global macros to set PM functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This aligns with other drivers and fixes build failure when
CONFIG_PM_SLEEP is not set, such as on RISC-V.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b61bde17f123d..67fd9c3818f90 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -584,15 +584,8 @@ static int xe_pci_runtime_idle(struct device *dev)
 }
 
 static const struct dev_pm_ops xe_pm_ops = {
-	.suspend = xe_pci_suspend,
-	.resume = xe_pci_resume,
-	.freeze = xe_pci_suspend,
-	.thaw = xe_pci_resume,
-	.poweroff = xe_pci_suspend,
-	.restore = xe_pci_resume,
-	.runtime_suspend = xe_pci_runtime_suspend,
-	.runtime_resume = xe_pci_runtime_resume,
-	.runtime_idle = xe_pci_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume)
+	SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle)
 };
 
 static struct pci_driver xe_pci_driver = {
-- 
GitLab


From 3949d57f1ef62ea00344617fd638ed6c778db8d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Mon, 23 Jan 2023 08:43:10 -0800
Subject: [PATCH 1287/2340] drm/xe/uapi: Rename XE_ENGINE_PROPERTY_X to
 XE_ENGINE_SET_PROPERTY_X
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Engine property get uAPI will be added, so to avoid ambiguity here
renaming XE_ENGINE_PROPERTY_X to XE_ENGINE_SET_PROPERTY_X.

No changes in behavior.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 18 +++++++++---------
 include/uapi/drm/xe_drm.h      | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 63219bd98be7b..1b85bf4abe3d4 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -314,15 +314,15 @@ typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
 					 u64 value, bool create);
 
 static const xe_engine_set_property_fn engine_set_property_funcs[] = {
-	[XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority,
-	[XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice,
-	[XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
-	[XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
-	[XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence,
-	[XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
-	[XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
-	[XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
-	[XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
+	[XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority,
+	[XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice,
+	[XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
+	[XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
+	[XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence,
+	[XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
+	[XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
+	[XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
+	[XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
 };
 
 static int engine_user_ext_set_property(struct xe_device *xe,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index f64b1c785fad9..8dc8ebbaf337a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -511,21 +511,21 @@ struct drm_xe_engine_set_property {
 	__u32 engine_id;
 
 	/** @property: property to set */
-#define XE_ENGINE_PROPERTY_PRIORITY			0
-#define XE_ENGINE_PROPERTY_TIMESLICE			1
-#define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT		2
+#define XE_ENGINE_SET_PROPERTY_PRIORITY			0
+#define XE_ENGINE_SET_PROPERTY_TIMESLICE		1
+#define XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
 	/*
 	 * Long running or ULLS engine mode. DMA fences not allowed in this
 	 * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves
 	 * as a sanity check the UMD knows what it is doing. Can only be set at
 	 * engine create time.
 	 */
-#define XE_ENGINE_PROPERTY_COMPUTE_MODE			3
-#define XE_ENGINE_PROPERTY_PERSISTENCE			4
-#define XE_ENGINE_PROPERTY_JOB_TIMEOUT			5
-#define XE_ENGINE_PROPERTY_ACC_TRIGGER			6
-#define XE_ENGINE_PROPERTY_ACC_NOTIFY			7
-#define XE_ENGINE_PROPERTY_ACC_GRANULARITY		8
+#define XE_ENGINE_SET_PROPERTY_COMPUTE_MODE		3
+#define XE_ENGINE_SET_PROPERTY_PERSISTENCE		4
+#define XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT		5
+#define XE_ENGINE_SET_PROPERTY_ACC_TRIGGER		6
+#define XE_ENGINE_SET_PROPERTY_ACC_NOTIFY		7
+#define XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY		8
 	__u32 property;
 
 	/** @value: property value */
-- 
GitLab


From 19431b029b8b5d095e77767f269cb142c687084e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Mon, 23 Jan 2023 09:11:32 -0800
Subject: [PATCH 1288/2340] drm/xe/uapi: Add XE_ENGINE_GET_PROPERTY uAPI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is intended to get some properties that are of interest of UMDs
like the ban state.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |  2 ++
 drivers/gpu/drm/xe/xe_engine.c | 26 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_engine.h |  2 ++
 include/uapi/drm/xe_drm.h      | 22 +++++++++++++++++++++-
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 104ab12cc2edb..9881b591bfdd3 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -89,6 +89,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl,
 			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_ENGINE_GET_PROPERTY, xe_engine_get_property_ioctl,
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 1b85bf4abe3d4..b69dcbef08249 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -639,6 +639,32 @@ put_rpm:
 	return err;
 }
 
+int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_engine_get_property *args = data;
+	struct xe_engine *e;
+
+	mutex_lock(&xef->engine.lock);
+	e = xa_load(&xef->engine.xa, args->engine_id);
+	mutex_unlock(&xef->engine.lock);
+
+	if (XE_IOCTL_ERR(xe, !e))
+		return -ENOENT;
+
+	switch (args->property) {
+	case XE_ENGINE_GET_PROPERTY_BAN:
+		args->value = !!(e->flags & ENGINE_FLAG_BANNED);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void engine_kill_compute(struct xe_engine *e)
 {
 	if (!xe_vm_in_compute_mode(e->vm))
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
index 4d1b609fea7e0..a3a44534003f7 100644
--- a/drivers/gpu/drm/xe/xe_engine.h
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -50,5 +50,7 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file);
 int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file);
+int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file);
 
 #endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 8dc8ebbaf337a..756c5994ae63d 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -118,6 +118,7 @@ struct xe_user_extension {
 #define DRM_XE_ENGINE_SET_PROPERTY	0x0a
 #define DRM_XE_WAIT_USER_FENCE		0x0b
 #define DRM_XE_VM_MADVISE		0x0c
+#define DRM_XE_ENGINE_GET_PROPERTY	0x0d
 
 /* Must be kept compact -- no holes */
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -127,6 +128,7 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
 #define DRM_IOCTL_XE_VM_BIND			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
 #define DRM_IOCTL_XE_ENGINE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create)
+#define DRM_IOCTL_XE_ENGINE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_GET_PROPERTY, struct drm_xe_engine_get_property)
 #define DRM_IOCTL_XE_ENGINE_DESTROY		DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy)
 #define DRM_IOCTL_XE_EXEC			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_MMIO			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio)
@@ -568,8 +570,26 @@ struct drm_xe_engine_create {
 	__u64 reserved[2];
 };
 
+struct drm_xe_engine_get_property {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @engine_id: Engine ID */
+	__u32 engine_id;
+
+	/** @property: property to get */
+#define XE_ENGINE_GET_PROPERTY_BAN			0
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
 struct drm_xe_engine_destroy {
-	/** @vm_id: VM ID */
+	/** @engine_id: Engine ID */
 	__u32 engine_id;
 
 	/** @pad: MBZ */
-- 
GitLab


From 09a68b4a76e3d870d2fad34099d27cc7e2c9939b Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Mon, 23 Jan 2023 15:41:58 +0100
Subject: [PATCH 1289/2340] drm/xe: Convert memory device refcount to s32

The comparison with < 0 suggests that the memory device access
should be signed to handle underflow. This makes it work more reliably.

As a result, the max refcount is now S32_MAX instead.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       | 2 +-
 drivers/gpu/drm/xe/xe_device_types.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 9881b591bfdd3..98f08cd9d4b01 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -351,7 +351,7 @@ void xe_device_mem_access_get(struct xe_device *xe)
 	if (resumed)
 		xe_pm_runtime_put(xe);
 
-	XE_WARN_ON(xe->mem_access.ref == U32_MAX);
+	XE_WARN_ON(xe->mem_access.ref == S32_MAX);
 }
 
 void xe_device_mem_access_put(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index d62ee85bfcbe0..81bc293fb240c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -173,7 +173,7 @@ struct xe_device {
 		/** @lock: protect the ref count */
 		struct mutex lock;
 		/** @ref: ref count of memory accesses */
-		u32 ref;
+		s32 ref;
 		/** @hold_rpm: need to put rpm ref back at the end */
 		bool hold_rpm;
 	} mem_access;
-- 
GitLab


From 9b6483af3709386fe0e544bfa8cc01f8a92e0d57 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 24 Jan 2023 18:28:03 +0100
Subject: [PATCH 1290/2340] drm/xe: Map initial FB at the same place in GGTT
 too

I saw a flicker when booting xe, and it's very likely that the original
FB was not mapped at the same place when inheriting, fix it.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c   |  7 ++++++-
 drivers/gpu/drm/xe/xe_ggtt.c | 16 +++++++++++++---
 drivers/gpu/drm/xe/xe_ggtt.h |  1 +
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index f07d1cd63fddf..1fcde1e933012 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1110,7 +1110,12 @@ xe_bo_create_locked_range(struct xe_device *xe,
 
 		XE_BUG_ON(!gt);
 
-		err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
+		if (flags & XE_BO_CREATE_STOLEN_BIT &&
+		    flags & XE_BO_FIXED_PLACEMENT_BIT) {
+			err = xe_ggtt_insert_bo_at(gt->mem.ggtt, bo, start);
+		} else {
+			err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
+		}
 		if (err)
 			goto err_unlock_put_bo;
 	}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0018c84417470..b1b9fc57a5db1 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -256,7 +256,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	xe_ggtt_invalidate(ggtt->gt);
 }
 
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end)
 {
 	int err;
 
@@ -271,12 +271,22 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 		return err;
 
 	mutex_lock(&ggtt->lock);
-	err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size);
+	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, 0, 0, start, end, 0);
 	if (!err)
 		xe_ggtt_map_bo(ggtt, bo);
 	mutex_unlock(&ggtt->lock);
 
-	return 0;
+	return err;
+}
+
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs)
+{
+	return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size);
+}
+
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
 }
 
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 289c6852ad1a2..ab9cfdab5cca1 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -23,6 +23,7 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs);
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 
 #endif
-- 
GitLab


From f3edf6917ca8e4e11a6af39e926558d4609dd9ea Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 20 Jan 2023 11:48:53 +0000
Subject: [PATCH 1291/2340] drm/xe/bo: reduce xe_bo_create_pin_map()
 restrictions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On DGFX this blows up if can call this with a system memory object:

XE_BUG_ON(!mem_type_is_vram(place->mem_type) && place->mem_type != XE_PL_STOLEN);

If we consider dpt it looks like we can already in theory hit this, if
we run out of vram and stolen vram. It at least seems reasonable to
allow calling this on any object which supports CPU access.

Note this also changes the behaviour with stolen VRAM and suspend, such
that we no longer attempt to migrate stolen objects into system memory.
However nothing in stolen should ever need to be restored (same on
integrated), so should be fine. Also on small-bar systems the stolen
portion is pretty much always non-CPU accessible, and currently pinned
objects use plain memcpy when being moved, which doesn't play nicely.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 1fcde1e933012..3c9d90dcf1251 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1290,25 +1290,26 @@ int xe_bo_pin(struct xe_bo *bo)
 		return err;
 
 	/*
-	 * For pinned objects in on DGFX, we expect these objects to be in
-	 * contiguous VRAM memory. Required eviction / restore during suspend /
-	 * resume (force restore to same physical address).
+	 * For pinned objects in on DGFX, which are also in vram, we expect
+	 * these to be in contiguous VRAM memory. Required eviction / restore
+	 * during suspend / resume (force restore to same physical address).
 	 */
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
 	    bo->flags & XE_BO_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
 		bool lmem;
 
-		XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
-		XE_BUG_ON(!mem_type_is_vram(place->mem_type) && place->mem_type != XE_PL_STOLEN);
+		if (mem_type_is_vram(place->mem_type)) {
+			XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
 
-		place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
-			       vram_region_io_offset(bo)) >> PAGE_SHIFT;
-		place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
+			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
+				       vram_region_io_offset(bo)) >> PAGE_SHIFT;
+			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
 
-		spin_lock(&xe->pinned.lock);
-		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
-		spin_unlock(&xe->pinned.lock);
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+			spin_unlock(&xe->pinned.lock);
+		}
 	}
 
 	ttm_bo_pin(&bo->ttm);
@@ -1364,11 +1365,15 @@ void xe_bo_unpin(struct xe_bo *bo)
 
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
 	    bo->flags & XE_BO_INTERNAL_TEST)) {
-		XE_BUG_ON(list_empty(&bo->pinned_link));
+		struct ttm_place *place = &(bo->placements[0]);
 
-		spin_lock(&xe->pinned.lock);
-		list_del_init(&bo->pinned_link);
-		spin_unlock(&xe->pinned.lock);
+		if (mem_type_is_vram(place->mem_type)) {
+			XE_BUG_ON(list_empty(&bo->pinned_link));
+
+			spin_lock(&xe->pinned.lock);
+			list_del_init(&bo->pinned_link);
+			spin_unlock(&xe->pinned.lock);
+		}
 	}
 
 	ttm_bo_unpin(&bo->ttm);
-- 
GitLab


From e63f81adcc4283aed7d4fe5da1219881cc6f67d4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 22 Dec 2022 14:09:02 +0000
Subject: [PATCH 1292/2340] drm/xe/ppgtt: clear the scratch page
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to ensure we don't leak the contents to userspace.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 688bc4b56294c..d7fb1ddb8789c 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -333,14 +333,16 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
 	u8 id = gt->info.id;
 	int i;
 
-	vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K,
-					  ttm_bo_type_kernel,
-					  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
-					  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
-					  XE_BO_CREATE_PINNED_BIT);
+	vm->scratch_bo[id] = xe_bo_create_pin_map(xe, gt, vm, SZ_4K,
+						  ttm_bo_type_kernel,
+						  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+						  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+						  XE_BO_CREATE_PINNED_BIT);
 	if (IS_ERR(vm->scratch_bo[id]))
 		return PTR_ERR(vm->scratch_bo[id]);
-	xe_bo_pin(vm->scratch_bo[id]);
+
+	xe_map_memset(vm->xe, &vm->scratch_bo[id]->vmap, 0, 0,
+		      vm->scratch_bo[id]->size);
 
 	for (i = 0; i < vm->pt_root[id]->level; i++) {
 		vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i);
-- 
GitLab


From b1e52b65712969a74f0ba9ffbf67dde98ce33c2f Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 19 Jan 2023 12:16:51 +0000
Subject: [PATCH 1293/2340] drm/xe/ppgtt: fix scratch page usage on DG2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On DG2 when running the xe_vm IGT, the kernel generates loads of CAT
errors and GT resets (sometimes at least).  On small-bar systems seems
to trigger a lot more easily (maybe due to difference in allocation
strategy). Appears to be related to scratch, since we seem to use the
64K TLB hint on scratch entries, even though the scratch page is a 4K
vram page. Bumping the scratch page size and physical alignment seems
to fix it. Or at least we no longer hit:

[  148.872683] xe 0000:03:00.0: [drm] Engine memory cat error: guc_id=0
[  148.872701] xe 0000:03:00.0: [drm] Engine memory cat error: guc_id=0
[  148.875108] WARNING: CPU: 0 PID: 953 at drivers/gpu/drm/xe/xe_guc_submit.c:797

However to keep things simple, so we don't have to deal with 64K TLB
hints, just move the scratch page into system memory on platforms that
require 64K VRAM pages.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d7fb1ddb8789c..01673fe969302 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -175,8 +175,6 @@ static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
 	if (level == 0) {
 		u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0,
 					    XE_CACHE_WB, 0, 0);
-		if (vm->flags & XE_VM_FLAGS_64K)
-			empty |= GEN12_PTE_PS64;
 
 		return empty;
 	} else {
@@ -331,13 +329,23 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
 			 struct xe_vm *vm)
 {
 	u8 id = gt->info.id;
+	unsigned int flags;
 	int i;
 
+	/*
+	 * So we don't need to worry about 64K TLB hints when dealing with
+	 * scratch entires, rather keep the scratch page in system memory on
+	 * platforms where 64K pages are needed for VRAM.
+	 */
+	flags = XE_BO_CREATE_PINNED_BIT;
+	if (vm->flags & XE_VM_FLAGS_64K)
+		flags |= XE_BO_CREATE_SYSTEM_BIT;
+	else
+		flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt);
+
 	vm->scratch_bo[id] = xe_bo_create_pin_map(xe, gt, vm, SZ_4K,
 						  ttm_bo_type_kernel,
-						  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
-						  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
-						  XE_BO_CREATE_PINNED_BIT);
+						  flags);
 	if (IS_ERR(vm->scratch_bo[id]))
 		return PTR_ERR(vm->scratch_bo[id]);
 
-- 
GitLab


From 5e53d1e806aeb2b05c85d24cd75f848631e8a121 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 26 Jan 2023 11:31:34 +0000
Subject: [PATCH 1294/2340] drm/xe/ggtt: fix alignment usage for DG2

Spec says we need to use 64K VRAM pages for GGTT on platforms like DG2.
In GGTT this just means aligning the GTT address to 64K and ensuring
that we have 16 consecutive entries each pointing to the respective 4K
entry. We already ensure we have 64K pages underneath, so it's just a
case of forcing the GTT alignment.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c       | 25 +++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_ggtt_types.h |  3 +++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index b1b9fc57a5db1..e9273b5d2a9f2 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -108,6 +108,9 @@ int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
 	ggtt->gsm = gt->mmio.regs + SZ_8M;
 	ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE;
 
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		ggtt->flags |= XE_GGTT_FLAGS_64K;
+
 	/*
 	 * 8B per entry, each points to a 4KB page.
 	 *
@@ -256,7 +259,8 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	xe_ggtt_invalidate(ggtt->gt);
 }
 
-static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end)
+static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+				  u64 start, u64 end, u64 alignment)
 {
 	int err;
 
@@ -271,7 +275,8 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 st
 		return err;
 
 	mutex_lock(&ggtt->lock);
-	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, 0, 0, start, end, 0);
+	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
+					  alignment, 0, start, end, 0);
 	if (!err)
 		xe_ggtt_map_bo(ggtt, bo);
 	mutex_unlock(&ggtt->lock);
@@ -281,12 +286,24 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 st
 
 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs)
 {
-	return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size);
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) {
+		if (XE_WARN_ON(!IS_ALIGNED(ofs, SZ_64K)) ||
+		    XE_WARN_ON(!IS_ALIGNED(bo->size, SZ_64K)))
+			return -EINVAL;
+	}
+
+	return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size, 0);
 }
 
 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
-	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
+	u64 alignment;
+
+	alignment = GEN8_PAGE_SIZE;
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		alignment = SZ_64K;
+
+	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, alignment);
 }
 
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index e041930017631..ea70aaef4b317 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -16,6 +16,9 @@ struct xe_ggtt {
 
 	u64 size;
 
+#define XE_GGTT_FLAGS_64K BIT(0)
+	unsigned int flags;
+
 	struct xe_bo *scratch;
 
 	struct mutex lock;
-- 
GitLab


From c5151fa80060a869c0308067e758a271c217ff61 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 16 Jan 2023 10:46:21 +0000
Subject: [PATCH 1295/2340] drm/xe/ggtt: fix GGTT scratch usage for DG2

Scratch page is in VRAM, and therefore requires 64K GTT layout. In GGTT
world this just means having 16 consecutive entries, with 64K GTT
alignment for the GTT address of the first entry (also matching physical
alignment). However to keep things simple just dump it into system
memory, like we already do for ppGTT.  While we are here, also give it
known default value.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index e9273b5d2a9f2..baa080cd11337 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -13,6 +13,7 @@
 #include "xe_device.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
+#include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_wopcm.h"
 
@@ -152,23 +153,30 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
 int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int flags;
 	int err;
 
-	ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE,
-					    ttm_bo_type_kernel,
-					    XE_BO_CREATE_VRAM_IF_DGFX(gt) |
-					    XE_BO_CREATE_PINNED_BIT);
+	/*
+	 * So we don't need to worry about 64K GGTT layout when dealing with
+	 * scratch entires, rather keep the scratch page in system memory on
+	 * platforms where 64K pages are needed for VRAM.
+	 */
+	flags = XE_BO_CREATE_PINNED_BIT;
+	if (ggtt->flags & XE_GGTT_FLAGS_64K)
+		flags |= XE_BO_CREATE_SYSTEM_BIT;
+	else
+		flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt);
+
+	ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, GEN8_PAGE_SIZE,
+					     ttm_bo_type_kernel,
+					     flags);
+
 	if (IS_ERR(ggtt->scratch)) {
 		err = PTR_ERR(ggtt->scratch);
 		goto err;
 	}
 
-	err = xe_bo_pin(ggtt->scratch);
-	xe_bo_unlock_no_vm(ggtt->scratch);
-	if (err) {
-		xe_bo_put(ggtt->scratch);
-		goto err;
-	}
+	xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size);
 
 	xe_ggtt_initial_clear(ggtt);
 	return 0;
-- 
GitLab


From e89b384cde622f6f553a740c73870327ee86fcc5 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 6 Jan 2023 11:34:57 -0800
Subject: [PATCH 1296/2340] drm/xe/migrate: Update emit_pte to cope with a size
 level than 4k
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

emit_pte assumes the size argument is 4k aligned, this may not be true
for the PTEs emitted for CSS as seen by below call stack:

[   56.734228] xe_migrate_copy:585: size=327680, ccs_start=327680, css_size=1280,4096
[   56.734250] xe_migrate_copy:643: size=262144
[   56.734252] emit_pte:404: ptes=64
[   56.734255] emit_pte:418: chunk=64
[   56.734257] xe_migrate_copy:650: size=1024	@ CCS emit PTE
[   56.734259] emit_pte:404: ptes=1
[   56.734261] emit_pte:418: chunk=1
[   56.734339] xe_migrate_copy:643: size=65536
[   56.734342] emit_pte:404: ptes=16
[   56.734344] emit_pte:418: chunk=16
[   56.734346] xe_migrate_copy:650: size=256	# CCS emit PTE
[   56.734348] emit_pte:404: ptes=1
[   56.734350] emit_pte:418: chunk=1
[   56.734352] xe_res_next:174: size=4096, remaining=0

Update emit_pte to handle sizes less than 4k.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index d3fa7bec78d38..377ab019b4c84 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -453,11 +453,8 @@ static void emit_pte(struct xe_migrate *m,
 		while (chunk--) {
 			u64 addr;
 
-			XE_BUG_ON(cur->start & (PAGE_SIZE - 1));
-
+			addr = xe_res_dma(cur) & PAGE_MASK;
 			if (is_vram) {
-				addr = cur->start;
-
 				/* Is this a 64K PTE entry? */
 				if ((m->eng->vm->flags & XE_VM_FLAGS_64K) &&
 				    !(cur_ofs & (16 * 8 - 1))) {
@@ -466,14 +463,12 @@ static void emit_pte(struct xe_migrate *m,
 				}
 
 				addr |= GEN12_PPGTT_PTE_LM;
-			} else {
-				addr = xe_res_dma(cur);
 			}
 			addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 
-			xe_res_next(cur, PAGE_SIZE);
+			xe_res_next(cur, min(size, (u32)PAGE_SIZE));
 			cur_ofs += 8;
 		}
 	}
@@ -615,13 +610,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
 
 	if (!src_is_vram)
-		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &src_it);
 	else
-		xe_res_first(src, 0, bo->size, &src_it);
+		xe_res_first(src, 0, size, &src_it);
 	if (!dst_is_vram)
-		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it);
+		xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &dst_it);
 	else
-		xe_res_first(dst, 0, bo->size, &dst_it);
+		xe_res_first(dst, 0, size, &dst_it);
 
 	if (copy_system_ccs)
 		xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo),
-- 
GitLab


From 5b643660875d01c203782a86ac5e3353849bc513 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 20 Jan 2023 09:17:50 -0800
Subject: [PATCH 1297/2340] drm/xe: Don't process TLB invalidation done in CT
 fast-path

We can't currently do this due to TLB invalidation done handler
expecting the seqno being received in-order, with the fast-path a TLB
invalidation done could pass one being processed in the slow-path in an
extreme corner case. Remove TLB invalidation done from the fast-path for
now and in a follow up reenable this once the TLB invalidation done
handler can deal with out of order seqno.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index f48eb01847efa..6e25c1d5d43ea 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -966,7 +966,14 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 			return 0;
 
 		switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) {
-		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		/*
+		 * FIXME: We really should process
+		 * XE_GUC_ACTION_TLB_INVALIDATION_DONE here in the fast-path as
+		 * these critical for page fault performance. We currently can't
+		 * due to TLB invalidation done algorithm expecting the seqno
+		 * returned in-order. With some small changes to the algorithm
+		 * and locking we should be able to support out-of-order seqno.
+		 */
 		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
 			break;	/* Process these in fast-path */
 		default:
-- 
GitLab


From a9351846d94568d96e7400be343392c58e4f82e6 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 17 Jan 2023 20:31:24 -0800
Subject: [PATCH 1298/2340] drm/xe: Break of TLB invalidation into its own file

TLB invalidation is used by more than USM (page faults) so break this
code out into its own file.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                 |   1 +
 drivers/gpu/drm/xe/xe_gt.c                  |   5 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |   1 +
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |  99 +----------------
 drivers/gpu/drm/xe/xe_gt_pagefault.h        |   3 -
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 115 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |  19 ++++
 drivers/gpu/drm/xe/xe_guc_ct.c              |   1 +
 drivers/gpu/drm/xe/xe_vm.c                  |   1 +
 9 files changed, 146 insertions(+), 99 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index f8da32b550bcd..998f7044b0470 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -57,6 +57,7 @@ xe-y += xe_bb.o \
 	xe_gt_mcr.o \
 	xe_gt_pagefault.o \
 	xe_gt_sysfs.o \
+	xe_gt_tlb_invalidation.o \
 	xe_gt_topology.o \
 	xe_guc.o \
 	xe_guc_ads.o \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 61a6430cb435a..96136f130eda7 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -19,6 +19,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_sysfs.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_fence.h"
 #include "xe_irq.h"
@@ -571,6 +572,10 @@ int xe_gt_init(struct xe_gt *gt)
 		xe_hw_fence_irq_init(&gt->fence_irq[i]);
 	}
 
+	err = xe_gt_tlb_invalidation_init(gt);
+	if (err)
+		return err;
+
 	err = xe_gt_pagefault_init(gt);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index cd18887841410..01303bbe073c2 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -12,6 +12,7 @@
 #include "xe_gt_debugfs.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
 #include "xe_macros.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 7125113b7390f..93a8efe5d0a02 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -10,9 +10,10 @@
 
 #include "xe_bo.h"
 #include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
-#include "xe_gt_pagefault.h"
 #include "xe_migrate.h"
 #include "xe_pt.h"
 #include "xe_trace.h"
@@ -61,40 +62,6 @@ guc_to_gt(struct xe_guc *guc)
 	return container_of(guc, struct xe_gt, uc.guc);
 }
 
-static int send_tlb_invalidation(struct xe_guc *guc)
-{
-	struct xe_gt *gt = guc_to_gt(guc);
-	u32 action[] = {
-		XE_GUC_ACTION_TLB_INVALIDATION,
-		0,
-		XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
-		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
-		XE_GUC_TLB_INVAL_FLUSH_CACHE,
-	};
-	int seqno;
-	int ret;
-
-	/*
-	 * XXX: The seqno algorithm relies on TLB invalidation being processed
-	 * in order which they currently are, if that changes the algorithm will
-	 * need to be updated.
-	 */
-	mutex_lock(&guc->ct.lock);
-	seqno = gt->usm.tlb_invalidation_seqno;
-	action[1] = seqno;
-	gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) %
-		TLB_INVALIDATION_SEQNO_MAX;
-	if (!gt->usm.tlb_invalidation_seqno)
-		gt->usm.tlb_invalidation_seqno = 1;
-	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
-				    G2H_LEN_DW_TLB_INVALIDATE, 1);
-	if (!ret)
-		ret = seqno;
-	mutex_unlock(&guc->ct.lock);
-
-	return ret;
-}
-
 static bool access_is_atomic(enum access_type access_type)
 {
 	return access_type == ACCESS_TYPE_ATOMIC;
@@ -278,7 +245,7 @@ unlock_vm:
 		 * defer TLB invalidate + fault response to a callback of fence
 		 * too
 		 */
-		ret = send_tlb_invalidation(&gt->uc.guc);
+		ret = xe_gt_tlb_invalidation(gt);
 		if (ret >= 0)
 			ret = 0;
 	}
@@ -433,7 +400,6 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
 	if (!xe->info.supports_usm)
 		return 0;
 
-	gt->usm.tlb_invalidation_seqno = 1;
 	for (i = 0; i < NUM_PF_QUEUE; ++i) {
 		gt->usm.pf_queue[i].gt = gt;
 		spin_lock_init(&gt->usm.pf_queue[i].lock);
@@ -482,65 +448,6 @@ void xe_gt_pagefault_reset(struct xe_gt *gt)
 	}
 }
 
-int xe_gt_tlb_invalidation(struct xe_gt *gt)
-{
-	return send_tlb_invalidation(&gt->uc.guc);
-}
-
-static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
-{
-	if (gt->usm.tlb_invalidation_seqno_recv >= seqno)
-		return true;
-
-	if (seqno - gt->usm.tlb_invalidation_seqno_recv >
-	    (TLB_INVALIDATION_SEQNO_MAX / 2))
-		return true;
-
-	return false;
-}
-
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
-{
-	struct xe_device *xe = gt_to_xe(gt);
-	struct xe_guc *guc = &gt->uc.guc;
-	int ret;
-
-	/*
-	 * XXX: See above, this algorithm only works if seqno are always in
-	 * order
-	 */
-	ret = wait_event_timeout(guc->ct.wq,
-				 tlb_invalidation_seqno_past(gt, seqno),
-				 HZ / 5);
-	if (!ret) {
-		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
-			seqno, gt->usm.tlb_invalidation_seqno_recv);
-		return -ETIME;
-	}
-
-	return 0;
-}
-
-int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
-	struct xe_gt *gt = guc_to_gt(guc);
-	int expected_seqno;
-
-	if (unlikely(len != 1))
-		return -EPROTO;
-
-	/* Sanity check on seqno */
-	expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) %
-		TLB_INVALIDATION_SEQNO_MAX;
-	XE_WARN_ON(expected_seqno != msg[0]);
-
-	gt->usm.tlb_invalidation_seqno_recv = msg[0];
-	smp_wmb();
-	wake_up_all(&guc->ct.wq);
-
-	return 0;
-}
-
 static int granularity_in_byte(int val)
 {
 	switch (val) {
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
index 35f68027cc9ce..839c065a5e4c3 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.h
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h
@@ -13,10 +13,7 @@ struct xe_guc;
 
 int xe_gt_pagefault_init(struct xe_gt *gt);
 void xe_gt_pagefault_reset(struct xe_gt *gt);
-int xe_gt_tlb_invalidation(struct xe_gt *gt);
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
-int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
 #endif	/* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
new file mode 100644
index 0000000000000..fea7a557d2139
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
+{
+	gt->usm.tlb_invalidation_seqno = 1;
+
+	return 0;
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 action[] = {
+		XE_GUC_ACTION_TLB_INVALIDATION,
+		0,
+		XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
+		XE_GUC_TLB_INVAL_FLUSH_CACHE,
+	};
+	int seqno;
+	int ret;
+
+	/*
+	 * XXX: The seqno algorithm relies on TLB invalidation being processed
+	 * in order which they currently are, if that changes the algorithm will
+	 * need to be updated.
+	 */
+	mutex_lock(&guc->ct.lock);
+	seqno = gt->usm.tlb_invalidation_seqno;
+	action[1] = seqno;
+	gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) %
+		TLB_INVALIDATION_SEQNO_MAX;
+	if (!gt->usm.tlb_invalidation_seqno)
+		gt->usm.tlb_invalidation_seqno = 1;
+	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
+				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret)
+		ret = seqno;
+	mutex_unlock(&guc->ct.lock);
+
+	return ret;
+}
+
+int xe_gt_tlb_invalidation(struct xe_gt *gt)
+{
+	return send_tlb_invalidation(&gt->uc.guc);
+}
+
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+	if (gt->usm.tlb_invalidation_seqno_recv >= seqno)
+		return true;
+
+	if (seqno - gt->usm.tlb_invalidation_seqno_recv >
+	    (TLB_INVALIDATION_SEQNO_MAX / 2))
+		return true;
+
+	return false;
+}
+
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc *guc = &gt->uc.guc;
+	int ret;
+
+	/*
+	 * XXX: See above, this algorithm only works if seqno are always in
+	 * order
+	 */
+	ret = wait_event_timeout(guc->ct.wq,
+				 tlb_invalidation_seqno_past(gt, seqno),
+				 HZ / 5);
+	if (!ret) {
+		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			seqno, gt->usm.tlb_invalidation_seqno_recv);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int expected_seqno;
+
+	if (unlikely(len != 1))
+		return -EPROTO;
+
+	/* Sanity check on seqno */
+	expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) %
+		TLB_INVALIDATION_SEQNO_MAX;
+	XE_WARN_ON(expected_seqno != msg[0]);
+
+	gt->usm.tlb_invalidation_seqno_recv = msg[0];
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
new file mode 100644
index 0000000000000..f1c3b34b19932
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TLB_INVALIDATION_H_
+#define _XE_GT_TLB_INVALIDATION_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+struct xe_guc;
+
+int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
+int xe_gt_tlb_invalidation(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_TLB_INVALIDATION_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 6e25c1d5d43ea..84d4302d4e721 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -15,6 +15,7 @@
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_guc_submit.h"
 #include "xe_map.h"
 #include "xe_trace.h"
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d47a8617c5b67..c548cd04f9cf6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -19,6 +19,7 @@
 #include "xe_engine.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_migrate.h"
 #include "xe_pm.h"
 #include "xe_preempt_fence.h"
-- 
GitLab


From 62ad062150c2ab72b0881c2f24f710e4c0bc4cd7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 17 Jan 2023 20:49:38 -0800
Subject: [PATCH 1299/2340] drm/xe: Move TLB invalidation variable to own
 sub-structure in GT

TLB invalidations no longer just restricted to USM, move the variables
to own sub-structure.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |  6 ++++--
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 20 +++++++++----------
 drivers/gpu/drm/xe/xe_gt_types.h            | 22 ++++++++++-----------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 01303bbe073c2..ea308b123474e 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -11,13 +11,15 @@
 #include "xe_gt.h"
 #include "xe_gt_debugfs.h"
 #include "xe_gt_mcr.h"
-#include "xe_gt_pagefault.h"
-#include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
 #include "xe_macros.h"
 #include "xe_uc_debugfs.h"
 
+#ifdef CONFIG_DRM_XE_DEBUG
+#include "xe_gt_tlb_invalidation.h"
+#endif
+
 static struct xe_gt *node_to_gt(struct drm_info_node *node)
 {
 	return node->info_ent->data;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index fea7a557d2139..a39a2fb163aef 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -16,7 +16,7 @@ guc_to_gt(struct xe_guc *guc)
 
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
-	gt->usm.tlb_invalidation_seqno = 1;
+	gt->tlb_invalidation.seqno = 1;
 
 	return 0;
 }
@@ -40,12 +40,12 @@ static int send_tlb_invalidation(struct xe_guc *guc)
 	 * need to be updated.
 	 */
 	mutex_lock(&guc->ct.lock);
-	seqno = gt->usm.tlb_invalidation_seqno;
+	seqno = gt->tlb_invalidation.seqno;
 	action[1] = seqno;
-	gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) %
+	gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
 		TLB_INVALIDATION_SEQNO_MAX;
-	if (!gt->usm.tlb_invalidation_seqno)
-		gt->usm.tlb_invalidation_seqno = 1;
+	if (!gt->tlb_invalidation.seqno)
+		gt->tlb_invalidation.seqno = 1;
 	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
 	if (!ret)
@@ -62,10 +62,10 @@ int xe_gt_tlb_invalidation(struct xe_gt *gt)
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
 {
-	if (gt->usm.tlb_invalidation_seqno_recv >= seqno)
+	if (gt->tlb_invalidation.seqno_recv >= seqno)
 		return true;
 
-	if (seqno - gt->usm.tlb_invalidation_seqno_recv >
+	if (seqno - gt->tlb_invalidation.seqno_recv >
 	    (TLB_INVALIDATION_SEQNO_MAX / 2))
 		return true;
 
@@ -87,7 +87,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 				 HZ / 5);
 	if (!ret) {
 		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
-			seqno, gt->usm.tlb_invalidation_seqno_recv);
+			seqno, gt->tlb_invalidation.seqno_recv);
 		return -ETIME;
 	}
 
@@ -103,11 +103,11 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		return -EPROTO;
 
 	/* Sanity check on seqno */
-	expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) %
+	expected_seqno = (gt->tlb_invalidation.seqno_recv + 1) %
 		TLB_INVALIDATION_SEQNO_MAX;
 	XE_WARN_ON(expected_seqno != msg[0]);
 
-	gt->usm.tlb_invalidation_seqno_recv = msg[0];
+	gt->tlb_invalidation.seqno_recv = msg[0];
 	smp_wmb();
 	wake_up_all(&guc->ct.wq);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 2dbc8cedd6305..3bfce7abe857a 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -160,6 +160,17 @@ struct xe_gt {
 		struct work_struct worker;
 	} reset;
 
+	/** @tlb_invalidation: TLB invalidation state */
+	struct {
+		/** @seqno: TLB invalidation seqno, protected by CT lock */
+#define TLB_INVALIDATION_SEQNO_MAX	0x100000
+		int seqno;
+		/**
+		 * @seqno_recv: last received TLB invalidation seqno, protected by CT lock
+		 */
+		int seqno_recv;
+	} tlb_invalidation;
+
 	/** @usm: unified shared memory state */
 	struct {
 		/**
@@ -175,17 +186,6 @@ struct xe_gt {
 		 * operations (e.g. mmigrations, fixing page tables)
 		 */
 		u16 reserved_bcs_instance;
-		/**
-		 * @tlb_invalidation_seqno: TLB invalidation seqno, protected by
-		 * CT lock
-		 */
-#define TLB_INVALIDATION_SEQNO_MAX	0x100000
-		int tlb_invalidation_seqno;
-		/**
-		 * @tlb_invalidation_seqno_recv: last received TLB invalidation
-		 * seqno, protected by CT lock
-		 */
-		int tlb_invalidation_seqno_recv;
 		/** @pf_wq: page fault work queue, unbound, high priority */
 		struct workqueue_struct *pf_wq;
 		/** @acc_wq: access counter work queue, unbound, high priority */
-- 
GitLab


From fc108a8b759f52b879e9a39642ee7988d251e453 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 17 Jan 2023 21:11:43 -0800
Subject: [PATCH 1300/2340] drm/xe: Add TLB invalidation fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fence will be signaled when TLB invalidation completion.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c                    |  1 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c            |  2 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c          |  2 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c   | 43 +++++++++++++++++--
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h   |  6 ++-
 .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h | 26 +++++++++++
 drivers/gpu/drm/xe/xe_gt_types.h              |  5 +++
 drivers/gpu/drm/xe/xe_vm.c                    |  2 +-
 8 files changed, 80 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 96136f130eda7..28bbb3159531f 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -669,6 +669,7 @@ static int gt_reset(struct xe_gt *gt)
 
 	xe_uc_stop_prepare(&gt->uc);
 	xe_gt_pagefault_reset(gt);
+	xe_gt_tlb_invalidation_reset(gt);
 
 	err = xe_uc_stop(&gt->uc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index ea308b123474e..946398f08bb55 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -99,7 +99,7 @@ static int invalidate_tlb(struct seq_file *m, void *data)
 	int seqno;
 	int ret = 0;
 
-	seqno = xe_gt_tlb_invalidation(gt);
+	seqno = xe_gt_tlb_invalidation(gt, NULL);
 	XE_WARN_ON(seqno < 0);
 	if (seqno > 0)
 		ret = xe_gt_tlb_invalidation_wait(gt, seqno);
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 93a8efe5d0a02..705093cb63d76 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -245,7 +245,7 @@ unlock_vm:
 		 * defer TLB invalidate + fault response to a callback of fence
 		 * too
 		 */
-		ret = xe_gt_tlb_invalidation(gt);
+		ret = xe_gt_tlb_invalidation(gt, NULL);
 		if (ret >= 0)
 			ret = 0;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index a39a2fb163aef..0058a155eeb9e 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -17,11 +17,27 @@ guc_to_gt(struct xe_guc *guc)
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
 	gt->tlb_invalidation.seqno = 1;
+	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
 
 	return 0;
 }
 
-static int send_tlb_invalidation(struct xe_guc *guc)
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
+{
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+
+	mutex_lock(&gt->uc.guc.ct.lock);
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		list_del(&fence->link);
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+	mutex_unlock(&gt->uc.guc.ct.lock);
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc,
+				 struct xe_gt_tlb_invalidation_fence *fence)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 action[] = {
@@ -41,6 +57,15 @@ static int send_tlb_invalidation(struct xe_guc *guc)
 	 */
 	mutex_lock(&guc->ct.lock);
 	seqno = gt->tlb_invalidation.seqno;
+	if (fence) {
+		/*
+		 * FIXME: How to deal TLB invalidation timeout, right now we
+		 * just have an endless fence which isn't ideal.
+		 */
+		fence->seqno = seqno;
+		list_add_tail(&fence->link,
+			      &gt->tlb_invalidation.pending_fences);
+	}
 	action[1] = seqno;
 	gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
 		TLB_INVALIDATION_SEQNO_MAX;
@@ -55,9 +80,10 @@ static int send_tlb_invalidation(struct xe_guc *guc)
 	return ret;
 }
 
-int xe_gt_tlb_invalidation(struct xe_gt *gt)
+int xe_gt_tlb_invalidation(struct xe_gt *gt,
+			   struct xe_gt_tlb_invalidation_fence *fence)
 {
-	return send_tlb_invalidation(&gt->uc.guc);
+	return send_tlb_invalidation(&gt->uc.guc, fence);
 }
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
@@ -97,8 +123,11 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_gt_tlb_invalidation_fence *fence;
 	int expected_seqno;
 
+	lockdep_assert_held(&guc->ct.lock);
+
 	if (unlikely(len != 1))
 		return -EPROTO;
 
@@ -111,5 +140,13 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	smp_wmb();
 	wake_up_all(&guc->ct.wq);
 
+	fence = list_first_entry_or_null(&gt->tlb_invalidation.pending_fences,
+					 typeof(*fence), link);
+	if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
+		list_del(&fence->link);
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index f1c3b34b19932..7e6fbf46f0e38 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -8,11 +8,15 @@
 
 #include <linux/types.h>
 
+#include "xe_gt_tlb_invalidation_types.h"
+
 struct xe_gt;
 struct xe_guc;
 
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
-int xe_gt_tlb_invalidation(struct xe_gt *gt);
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
+int xe_gt_tlb_invalidation(struct xe_gt *gt,
+			   struct xe_gt_tlb_invalidation_fence *fence);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
new file mode 100644
index 0000000000000..ab57c14c6d144
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_
+#define _XE_GT_TLB_INVALIDATION_TYPES_H_
+
+#include <linux/dma-fence.h>
+
+/**
+ * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
+ *
+ * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB
+ * invalidation completion.
+ */
+struct xe_gt_tlb_invalidation_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending tlb fences */
+	struct list_head link;
+	/** @seqno: seqno of TLB invalidation to signal fence one */
+	int seqno;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 3bfce7abe857a..a755e3a865522 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -169,6 +169,11 @@ struct xe_gt {
 		 * @seqno_recv: last received TLB invalidation seqno, protected by CT lock
 		 */
 		int seqno_recv;
+		/**
+		 * @pending_fences: list of pending fences waiting TLB
+		 * invaliations, protected by CT lock
+		 */
+		struct list_head pending_fences;
 	} tlb_invalidation;
 
 	/** @usm: unified shared memory state */
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c548cd04f9cf6..aae9acc7759a9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3345,7 +3345,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 		if (xe_pt_zap_ptes(gt, vma)) {
 			gt_needs_invalidate |= BIT(id);
 			xe_device_wmb(xe);
-			seqno[id] = xe_gt_tlb_invalidation(gt);
+			seqno[id] = xe_gt_tlb_invalidation(gt, NULL);
 			if (seqno[id] < 0)
 				return seqno[id];
 		}
-- 
GitLab


From f4a8add94f2f28bd215b07b72abcbd2fd17d2012 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 18 Jan 2023 14:43:56 -0800
Subject: [PATCH 1301/2340] drm/xe: Invalidate TLB after unbind is complete
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This gets tricky as we can't do the TLB invalidation until the unbind
operation is done on the hardware and we can't signal the unbind as
complete until the TLB invalidation is done. To work around this we
create an unbind fence which does a TLB invalidation after unbind is
done on the hardware, signals on TLB invalidation completion, and this
fence is installed in the BO dma-resv slot and installed in out-syncs
for the unbind operation.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Suggested-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com
Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  2 +
 drivers/gpu/drm/xe/xe_gt_types.h            |  9 ++
 drivers/gpu/drm/xe/xe_pt.c                  | 96 +++++++++++++++++++++
 3 files changed, 107 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 0058a155eeb9e..23094d3645835 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -18,6 +18,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
 	gt->tlb_invalidation.seqno = 1;
 	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
+	spin_lock_init(&gt->tlb_invalidation.lock);
+	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a755e3a865522..3b2d9842add7b 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -174,6 +174,15 @@ struct xe_gt {
 		 * invaliations, protected by CT lock
 		 */
 		struct list_head pending_fences;
+		/** @fence_context: context for TLB invalidation fences */
+		u64 fence_context;
+		/**
+		 * @fence_seqno: seqno to TLB invalidation fences, protected by
+		 * tlb_invalidation.lock
+		 */
+		u32 fence_seqno;
+		/** @lock: protects TLB invalidation fences */
+		spinlock_t lock;
 	} tlb_invalidation;
 
 	/** @usm: unified shared memory state */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 01673fe969302..65a6f54b22a9b 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -6,6 +6,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_migrate.h"
 #include "xe_pt.h"
 #include "xe_pt_types.h"
@@ -1461,6 +1462,83 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
 	.pre_commit = xe_pt_userptr_pre_commit,
 };
 
+struct invalidation_fence {
+	struct xe_gt_tlb_invalidation_fence base;
+	struct xe_gt *gt;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct work_struct work;
+};
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+	.get_driver_name = invalidation_fence_get_driver_name,
+	.get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+static void invalidation_fence_cb(struct dma_fence *fence,
+				  struct dma_fence_cb *cb)
+{
+	struct invalidation_fence *ifence =
+		container_of(cb, struct invalidation_fence, cb);
+
+	queue_work(system_wq, &ifence->work);
+	dma_fence_put(ifence->fence);
+}
+
+static void invalidation_fence_work_func(struct work_struct *w)
+{
+	struct invalidation_fence *ifence =
+		container_of(w, struct invalidation_fence, work);
+
+	xe_gt_tlb_invalidation(ifence->gt, &ifence->base);
+}
+
+static int invalidation_fence_init(struct xe_gt *gt,
+				   struct invalidation_fence *ifence,
+				   struct dma_fence *fence)
+{
+	int ret;
+
+	spin_lock_irq(&gt->tlb_invalidation.lock);
+	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
+		       &gt->tlb_invalidation.lock,
+		       gt->tlb_invalidation.fence_context,
+		       ++gt->tlb_invalidation.fence_seqno);
+	spin_unlock_irq(&gt->tlb_invalidation.lock);
+
+	INIT_LIST_HEAD(&ifence->base.link);
+
+	dma_fence_get(&ifence->base.base);	/* Ref for caller */
+	ifence->fence = fence;
+	ifence->gt = gt;
+
+	INIT_WORK(&ifence->work, invalidation_fence_work_func);
+	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
+	if (ret == -ENOENT) {
+		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
+		invalidation_fence_work_func(&ifence->work);
+	} else if (ret) {
+		dma_fence_put(&ifence->base.base);	/* Caller ref */
+		dma_fence_put(&ifence->base.base);	/* Creation ref */
+	}
+
+	XE_WARN_ON(ret && ret != -ENOENT);
+
+	return ret && ret != -ENOENT ? ret : 0;
+}
+
 /**
  * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
  * address range.
@@ -1496,6 +1574,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 	struct xe_vm *vm = vma->vm;
 	u32 num_entries;
 	struct dma_fence *fence = NULL;
+	struct invalidation_fence *ifence;
 	LLIST_HEAD(deferred);
 
 	xe_bo_assert_held(vma->bo);
@@ -1511,6 +1590,10 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 
 	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
 
+	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+	if (!ifence)
+		return ERR_PTR(-ENOMEM);
+
 	/*
 	 * Even if we were already evicted and unbind to destroy, we need to
 	 * clear again here. The eviction may have updated pagetables at a
@@ -1523,6 +1606,17 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 					   syncs, num_syncs,
 					   &unbind_pt_update.base);
 	if (!IS_ERR(fence)) {
+		int err;
+
+		/* TLB invalidation must be done before signaling unbind */
+		err = invalidation_fence_init(gt, ifence, fence);
+		if (err) {
+			dma_fence_put(fence);
+			kfree(ifence);
+			return ERR_PTR(err);
+		}
+		fence = &ifence->base.base;
+
 		/* add shared fence now for pagetable delayed destroy */
 		dma_resv_add_fence(&vm->resv, fence,
 				   DMA_RESV_USAGE_BOOKKEEP);
@@ -1534,6 +1628,8 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 		xe_pt_commit_unbind(vma, entries, num_entries,
 				    unbind_pt_update.locked ? &deferred : NULL);
 		vma->gt_present &= ~BIT(gt->info.id);
+	} else {
+		kfree(ifence);
 	}
 
 	if (!vma->gt_present)
-- 
GitLab


From c6b0948ff8d0842b55f05b794590ffc0a44c0656 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 20 Jan 2023 09:38:03 -0800
Subject: [PATCH 1302/2340] drm/xe: Kernel doc GT TLB invalidations

Document all exported functions.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 52 ++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 23094d3645835..1cb4d3a6bc57a 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -14,6 +14,15 @@ guc_to_gt(struct xe_guc *guc)
 	return container_of(guc, struct xe_gt, uc.guc);
 }
 
+/**
+ * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
+ * @gt: graphics tile
+ *
+ * Initialize GT TLB invalidation state, purely software initialization, should
+ * be called once during driver load.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
 	gt->tlb_invalidation.seqno = 1;
@@ -24,7 +33,13 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 	return 0;
 }
 
-void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
+/**
+ * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
+ * @gt: graphics tile
+ *
+ * Signal any pending invalidation fences, should be called during a GT reset
+ */
+ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
 {
 	struct xe_gt_tlb_invalidation_fence *fence, *next;
 
@@ -82,6 +97,19 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	return ret;
 }
 
+/**
+ * xe_gt_tlb_invalidation - Issue a TLB invalidation on this GT
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ *
+ * Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous
+ * and caller can either use the invalidation fence or seqno +
+ * xe_gt_tlb_invalidation_wait to wait for completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
 int xe_gt_tlb_invalidation(struct xe_gt *gt,
 			   struct xe_gt_tlb_invalidation_fence *fence)
 {
@@ -100,6 +128,16 @@ static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
 	return false;
 }
 
+/**
+ * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
+ * @gt: graphics tile
+ * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
+ *
+ * Wait for 200ms for a TLB invalidation to complete, in practice we always
+ * should receive the TLB invalidation within 200ms.
+ *
+ * Return: 0 on success, -ETIME on TLB invalidation timeout
+ */
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 {
 	struct xe_device *xe = gt_to_xe(gt);
@@ -122,6 +160,18 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 	return 0;
 }
 
+/**
+ * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
+ * @guc: guc
+ * @msg: message indicating TLB invalidation done
+ * @len: length of message
+ *
+ * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
+ * invalidation fences for seqno. Algorithm for this depends on seqno being
+ * received in-order and asserts this assumption.
+ *
+ * Return: 0 on success, -EPROTO for malformed messages.
+ */
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
-- 
GitLab


From 24b52db6ae00d8e8c4a7af5622890b70d4de51b9 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 19 Jan 2023 19:21:35 -0800
Subject: [PATCH 1303/2340] drm/xe: Add TLB invalidation fence ftrace

This will help debug issues with TLB invalidation fences.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  5 +++
 drivers/gpu/drm/xe/xe_pt.c                  |  5 +++
 drivers/gpu/drm/xe/xe_trace.h               | 50 +++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 1cb4d3a6bc57a..4d179357ce65d 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -7,6 +7,7 @@
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
+#include "xe_trace.h"
 
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
@@ -82,6 +83,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 		fence->seqno = seqno;
 		list_add_tail(&fence->link,
 			      &gt->tlb_invalidation.pending_fences);
+		trace_xe_gt_tlb_invalidation_fence_send(fence);
 	}
 	action[1] = seqno;
 	gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
@@ -194,7 +196,10 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 
 	fence = list_first_entry_or_null(&gt->tlb_invalidation.pending_fences,
 					 typeof(*fence), link);
+	if (fence)
+		trace_xe_gt_tlb_invalidation_fence_recv(fence);
 	if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
+		trace_xe_gt_tlb_invalidation_fence_signal(fence);
 		list_del(&fence->link);
 		dma_fence_signal(&fence->base);
 		dma_fence_put(&fence->base);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 65a6f54b22a9b..b220d1d5cfe36 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -13,6 +13,7 @@
 #include "xe_pt_walk.h"
 #include "xe_vm.h"
 #include "xe_res_cursor.h"
+#include "xe_trace.h"
 #include "xe_ttm_stolen_mgr.h"
 
 struct xe_pt_dir {
@@ -1493,6 +1494,7 @@ static void invalidation_fence_cb(struct dma_fence *fence,
 	struct invalidation_fence *ifence =
 		container_of(cb, struct invalidation_fence, cb);
 
+	trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
 	queue_work(system_wq, &ifence->work);
 	dma_fence_put(ifence->fence);
 }
@@ -1502,6 +1504,7 @@ static void invalidation_fence_work_func(struct work_struct *w)
 	struct invalidation_fence *ifence =
 		container_of(w, struct invalidation_fence, work);
 
+	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
 	xe_gt_tlb_invalidation(ifence->gt, &ifence->base);
 }
 
@@ -1511,6 +1514,8 @@ static int invalidation_fence_init(struct xe_gt *gt,
 {
 	int ret;
 
+	trace_xe_gt_tlb_invalidation_fence_create(&ifence->base);
+
 	spin_lock_irq(&gt->tlb_invalidation.lock);
 	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
 		       &gt->tlb_invalidation.lock,
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index d9f921d46b53f..b5b0f1bff7ec8 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -16,10 +16,60 @@
 #include "xe_engine_types.h"
 #include "xe_gpu_scheduler_types.h"
 #include "xe_gt_types.h"
+#include "xe_gt_tlb_invalidation_types.h"
 #include "xe_guc_engine_types.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
+DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
+		    TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(u64, fence)
+			     __field(int, seqno)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->fence = (u64)fence;
+			   __entry->seqno = fence->seqno;
+			   ),
+
+		    TP_printk("fence=0x%016llx, seqno=%d",
+			      __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence,
+	     xe_gt_tlb_invalidation_fence_work_func,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
 DECLARE_EVENT_CLASS(xe_bo,
 		    TP_PROTO(struct xe_bo *bo),
 		    TP_ARGS(bo),
-- 
GitLab


From 38224c00d9c284030d60be83571e5f1bd5fc79c6 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 24 Jan 2023 10:35:59 -0800
Subject: [PATCH 1304/2340] drm/xe: Add TDR for invalidation fence timeout
 cleanup

Endless fences are not good, add a TDR to cleanup any invalidation
fences which have not received an invalidation message within a timeout
period.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c   | 58 +++++++++++++++++--
 .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h |  2 +
 drivers/gpu/drm/xe/xe_gt_types.h              |  5 ++
 drivers/gpu/drm/xe/xe_trace.h                 |  5 ++
 4 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 4d179357ce65d..9e026fd0a45d9 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -9,12 +9,45 @@
 #include "xe_guc_ct.h"
 #include "xe_trace.h"
 
+#define TLB_TIMEOUT	(HZ / 4)
+
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
 {
 	return container_of(guc, struct xe_gt, uc.guc);
 }
 
+static void xe_gt_tlb_fence_timeout(struct work_struct *work)
+{
+	struct xe_gt *gt = container_of(work, struct xe_gt,
+					tlb_invalidation.fence_tdr.work);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+
+	mutex_lock(&gt->uc.guc.ct.lock);
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
+						    fence->invalidation_time);
+
+		if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
+			break;
+
+		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
+		drm_err(&gt_to_xe(gt)->drm, "TLB invalidation fence timeout, seqno=%d",
+			fence->seqno);
+
+		list_del(&fence->link);
+		fence->base.error = -ETIME;
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		queue_delayed_work(system_wq,
+				   &gt->tlb_invalidation.fence_tdr,
+				   TLB_TIMEOUT);
+	mutex_unlock(&gt->uc.guc.ct.lock);
+}
+
 /**
  * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
  * @gt: graphics tile
@@ -30,6 +63,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
 	spin_lock_init(&gt->tlb_invalidation.lock);
 	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
+	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
+			  xe_gt_tlb_fence_timeout);
 
 	return 0;
 }
@@ -44,6 +79,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
 	struct xe_gt_tlb_invalidation_fence *fence, *next;
 
+	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+
 	mutex_lock(&gt->uc.guc.ct.lock);
 	list_for_each_entry_safe(fence, next,
 				 &gt->tlb_invalidation.pending_fences, link) {
@@ -67,6 +104,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	};
 	int seqno;
 	int ret;
+	bool queue_work;
 
 	/*
 	 * XXX: The seqno algorithm relies on TLB invalidation being processed
@@ -76,10 +114,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	mutex_lock(&guc->ct.lock);
 	seqno = gt->tlb_invalidation.seqno;
 	if (fence) {
-		/*
-		 * FIXME: How to deal TLB invalidation timeout, right now we
-		 * just have an endless fence which isn't ideal.
-		 */
+		queue_work = list_empty(&gt->tlb_invalidation.pending_fences);
 		fence->seqno = seqno;
 		list_add_tail(&fence->link,
 			      &gt->tlb_invalidation.pending_fences);
@@ -92,6 +127,13 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 		gt->tlb_invalidation.seqno = 1;
 	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret && fence) {
+		fence->invalidation_time = ktime_get();
+		if (queue_work)
+			queue_delayed_work(system_wq,
+					   &gt->tlb_invalidation.fence_tdr,
+					   TLB_TIMEOUT);
+	}
 	if (!ret)
 		ret = seqno;
 	mutex_unlock(&guc->ct.lock);
@@ -152,7 +194,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 	 */
 	ret = wait_event_timeout(guc->ct.wq,
 				 tlb_invalidation_seqno_past(gt, seqno),
-				 HZ / 5);
+				 TLB_TIMEOUT);
 	if (!ret) {
 		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
 			seqno, gt->tlb_invalidation.seqno_recv);
@@ -201,6 +243,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
 		trace_xe_gt_tlb_invalidation_fence_signal(fence);
 		list_del(&fence->link);
+		if (!list_empty(&gt->tlb_invalidation.pending_fences))
+			mod_delayed_work(system_wq,
+					 &gt->tlb_invalidation.fence_tdr,
+					 TLB_TIMEOUT);
+		else
+			cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
 		dma_fence_signal(&fence->base);
 		dma_fence_put(&fence->base);
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
index ab57c14c6d144..934c828efe31c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -21,6 +21,8 @@ struct xe_gt_tlb_invalidation_fence {
 	struct list_head link;
 	/** @seqno: seqno of TLB invalidation to signal fence one */
 	int seqno;
+	/** @invalidation_time: time of TLB invalidation */
+	ktime_t invalidation_time;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 3b2d9842add7b..a40fab262ac9e 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -174,6 +174,11 @@ struct xe_gt {
 		 * invaliations, protected by CT lock
 		 */
 		struct list_head pending_fences;
+		/**
+		 * @fence_tdr: schedules a delayed call to
+		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
+		 */
+		struct delayed_work fence_tdr;
 		/** @fence_context: context for TLB invalidation fences */
 		u64 fence_context;
 		/**
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index b5b0f1bff7ec8..d1cd4b57a9740 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -70,6 +70,11 @@ DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
 	     TP_ARGS(fence)
 );
 
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
 DECLARE_EVENT_CLASS(xe_bo,
 		    TP_PROTO(struct xe_bo *bo),
 		    TP_ARGS(bo),
-- 
GitLab


From a12d9216740c23dc7f526db108b4a82f1e0807e2 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 24 Jan 2023 16:14:55 -0800
Subject: [PATCH 1305/2340] drm/xe: Only set VM->asid for platforms that
 support a ASID

This will help with TLB invalidation as the ASID in TLB invalidate
should be zero for platforms that do not support a ASID.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index aae9acc7759a9..83f8c8a186d80 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1429,10 +1429,12 @@ static void vm_destroy_work_func(struct work_struct *w)
 		xe_device_mem_access_put(xe);
 		xe_pm_runtime_put(xe);
 
-		mutex_lock(&xe->usm.lock);
-		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
-		XE_WARN_ON(lookup != vm);
-		mutex_unlock(&xe->usm.lock);
+		if (xe->info.supports_usm) {
+			mutex_lock(&xe->usm.lock);
+			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+			XE_WARN_ON(lookup != vm);
+			mutex_unlock(&xe->usm.lock);
+		}
 	}
 
 	/*
@@ -1917,16 +1919,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		return err;
 	}
 
-	mutex_lock(&xe->usm.lock);
-	err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
-			      XA_LIMIT(0, XE_MAX_ASID - 1),
-			      &xe->usm.next_asid, GFP_KERNEL);
-	mutex_unlock(&xe->usm.lock);
-	if (err) {
-		xe_vm_close_and_put(vm);
-		return err;
+	if (xe->info.supports_usm) {
+		mutex_lock(&xe->usm.lock);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+				      XA_LIMIT(0, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		mutex_unlock(&xe->usm.lock);
+		if (err) {
+			xe_vm_close_and_put(vm);
+			return err;
+		}
+		vm->usm.asid = asid;
 	}
-	vm->usm.asid = asid;
 
 	args->vm_id = id;
 
-- 
GitLab


From 0335b53cc48cab91bb089ee5c7558cc84da3958d Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 24 Jan 2023 16:21:11 -0800
Subject: [PATCH 1306/2340] drm/xe: Delete debugfs entry to issue TLB
 invalidation

Not used, let's remove this.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 946398f08bb55..daae42d3ab3bb 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -16,10 +16,6 @@
 #include "xe_macros.h"
 #include "xe_uc_debugfs.h"
 
-#ifdef CONFIG_DRM_XE_DEBUG
-#include "xe_gt_tlb_invalidation.h"
-#endif
-
 static struct xe_gt *node_to_gt(struct drm_info_node *node)
 {
 	return node->info_ent->data;
@@ -92,32 +88,12 @@ static int steering(struct seq_file *m, void *data)
 	return 0;
 }
 
-#ifdef CONFIG_DRM_XE_DEBUG
-static int invalidate_tlb(struct seq_file *m, void *data)
-{
-	struct xe_gt *gt = node_to_gt(m->private);
-	int seqno;
-	int ret = 0;
-
-	seqno = xe_gt_tlb_invalidation(gt, NULL);
-	XE_WARN_ON(seqno < 0);
-	if (seqno > 0)
-		ret = xe_gt_tlb_invalidation_wait(gt, seqno);
-	XE_WARN_ON(ret < 0);
-
-	return 0;
-}
-#endif
-
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
 	{"sa_info", sa_info, 0},
 	{"topology", topology, 0},
 	{"steering", steering, 0},
-#ifdef CONFIG_DRM_XE_DEBUG
-	{"invalidate_tlb", invalidate_tlb, 0},
-#endif
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
-- 
GitLab


From 9d25e284ea468930b0310b432784eef45e83e378 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 24 Jan 2023 16:33:09 -0800
Subject: [PATCH 1307/2340] drm/xe: Add has_range_tlb_invalidation device
 attribute

This will help implementing range based TLB invalidations.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 81bc293fb240c..ef723b08de89e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -87,6 +87,8 @@ struct xe_device {
 		bool has_flat_ccs;
 		/** @has_4tile: Whether tile-4 tiling is supported */
 		bool has_4tile;
+		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
+		bool has_range_tlb_invalidation;
 	} info;
 
 	/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 67fd9c3818f90..2482ce8e3df43 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -70,6 +70,7 @@ struct xe_device_desc {
 	bool supports_usm;
 	bool has_flat_ccs;
 	bool has_4tile;
+	bool has_range_tlb_invalidation;
 };
 
 #define PLATFORM(x)		\
@@ -139,6 +140,7 @@ static const struct xe_device_desc dg1_desc = {
 	.require_force_probe = true, \
 	.graphics_ver = 12, \
 	.graphics_rel = 50, \
+	.has_range_tlb_invalidation = true, \
 	.has_flat_ccs = true, \
 	.dma_mask_size = 46, \
 	.max_tiles = 1, \
@@ -255,6 +257,7 @@ static const struct xe_device_desc mtl_desc = {
 	.max_tiles = 2,
 	.vm_max_level = 3,
 	.media_ver = 13,
+	.has_range_tlb_invalidation = true,
 	PLATFORM(XE_METEORLAKE),
 	.extra_gts = xelpmp_gts,
 	.platform_engine_mask = MTL_MAIN_ENGINES,
@@ -407,6 +410,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	xe->info.supports_usm = desc->supports_usm;
 	xe->info.has_flat_ccs = desc->has_flat_ccs;
 	xe->info.has_4tile = desc->has_4tile;
+	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
 
 	spd = subplatform_get(xe, desc);
 	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
-- 
GitLab


From 332dd0116c82a75df175a459fa69dda3f23491a7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 24 Jan 2023 16:21:58 -0800
Subject: [PATCH 1308/2340] drm/xe: Add range based TLB invalidations

If the platform supports range based TLB invalidations use them. Hide
these details in the xe_gt_tlb_invalidation layer.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |  7 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 87 +++++++++++++++++----
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |  4 +-
 drivers/gpu/drm/xe/xe_pt.c                  |  9 ++-
 drivers/gpu/drm/xe/xe_vm.c                  |  2 +-
 5 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 705093cb63d76..e1a5a3a70c922 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -240,12 +240,7 @@ unlock_vm:
 		goto retry_userptr;
 
 	if (!ret) {
-		/*
-		 * FIXME: Doing a full TLB invalidation for now, likely could
-		 * defer TLB invalidate + fault response to a callback of fence
-		 * too
-		 */
-		ret = xe_gt_tlb_invalidation(gt, NULL);
+		ret = xe_gt_tlb_invalidation(gt, NULL, vma);
 		if (ret >= 0)
 			ret = 0;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 9e026fd0a45d9..0b37cd09a59a2 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -92,16 +92,10 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 }
 
 static int send_tlb_invalidation(struct xe_guc *guc,
-				 struct xe_gt_tlb_invalidation_fence *fence)
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u32 *action, int len)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
-	u32 action[] = {
-		XE_GUC_ACTION_TLB_INVALIDATION,
-		0,
-		XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
-		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
-		XE_GUC_TLB_INVAL_FLUSH_CACHE,
-	};
 	int seqno;
 	int ret;
 	bool queue_work;
@@ -125,7 +119,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 		TLB_INVALIDATION_SEQNO_MAX;
 	if (!gt->tlb_invalidation.seqno)
 		gt->tlb_invalidation.seqno = 1;
-	ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
+	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
 	if (!ret && fence) {
 		fence->invalidation_time = ktime_get();
@@ -146,18 +140,83 @@ static int send_tlb_invalidation(struct xe_guc *guc,
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
  * completion, can be NULL
+ * @vma: VMA to invalidate
  *
- * Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous
- * and caller can either use the invalidation fence or seqno +
- * xe_gt_tlb_invalidation_wait to wait for completion.
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
  *
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
 int xe_gt_tlb_invalidation(struct xe_gt *gt,
-			   struct xe_gt_tlb_invalidation_fence *fence)
+			   struct xe_gt_tlb_invalidation_fence *fence,
+			   struct xe_vma *vma)
 {
-	return send_tlb_invalidation(&gt->uc.guc, fence);
+	struct xe_device *xe = gt_to_xe(gt);
+#define MAX_TLB_INVALIDATION_LEN	7
+	u32 action[MAX_TLB_INVALIDATION_LEN];
+	int len = 0;
+
+	XE_BUG_ON(!vma);
+
+	if (!xe->info.has_range_tlb_invalidation) {
+		action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+		action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
+#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+		XE_GUC_TLB_INVAL_FLUSH_CACHE)
+		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
+	} else {
+		u64 start = vma->start;
+		u64 length = vma->end - vma->start + 1;
+		u64 align, end;
+
+		if (length < SZ_4K)
+			length = SZ_4K;
+
+		/*
+		 * We need to invalidate a higher granularity if start address
+		 * is not aligned to length. When start is not aligned with
+		 * length we need to find the length large enough to create an
+		 * address mask covering the required range.
+		 */
+		align = roundup_pow_of_two(length);
+		start = ALIGN_DOWN(vma->start, align);
+		end = ALIGN(vma->start + length, align);
+		length = align;
+		while (start + length < end) {
+			length <<= 1;
+			start = ALIGN_DOWN(vma->start, length);
+		}
+
+		/*
+		 * Minimum invalidation size for a 2MB page that the hardware
+		 * expects is 16MB
+		 */
+		if (length >= SZ_2M) {
+			length = max_t(u64, SZ_16M, length);
+			start = ALIGN_DOWN(vma->start, length);
+		}
+
+		XE_BUG_ON(length < SZ_4K);
+		XE_BUG_ON(!is_power_of_2(length));
+		XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
+		XE_BUG_ON(!IS_ALIGNED(start, length));
+
+		action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+		action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
+		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+		action[len++] = vma->vm->usm.asid;
+		action[len++] = lower_32_bits(start);
+		action[len++] = upper_32_bits(start);
+		action[len++] = ilog2(length) - ilog2(SZ_4K);
+	}
+
+	XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
+
+	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index 7e6fbf46f0e38..b4c4f717bc8ab 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -12,11 +12,13 @@
 
 struct xe_gt;
 struct xe_guc;
+struct xe_vma;
 
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
 int xe_gt_tlb_invalidation(struct xe_gt *gt,
-			   struct xe_gt_tlb_invalidation_fence *fence);
+			   struct xe_gt_tlb_invalidation_fence *fence,
+			   struct xe_vma *vma);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index b220d1d5cfe36..cde75708d8439 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1466,6 +1466,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
 struct invalidation_fence {
 	struct xe_gt_tlb_invalidation_fence base;
 	struct xe_gt *gt;
+	struct xe_vma *vma;
 	struct dma_fence *fence;
 	struct dma_fence_cb cb;
 	struct work_struct work;
@@ -1505,12 +1506,13 @@ static void invalidation_fence_work_func(struct work_struct *w)
 		container_of(w, struct invalidation_fence, work);
 
 	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-	xe_gt_tlb_invalidation(ifence->gt, &ifence->base);
+	xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma);
 }
 
 static int invalidation_fence_init(struct xe_gt *gt,
 				   struct invalidation_fence *ifence,
-				   struct dma_fence *fence)
+				   struct dma_fence *fence,
+				   struct xe_vma *vma)
 {
 	int ret;
 
@@ -1528,6 +1530,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
 	dma_fence_get(&ifence->base.base);	/* Ref for caller */
 	ifence->fence = fence;
 	ifence->gt = gt;
+	ifence->vma = vma;
 
 	INIT_WORK(&ifence->work, invalidation_fence_work_func);
 	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1614,7 +1617,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 		int err;
 
 		/* TLB invalidation must be done before signaling unbind */
-		err = invalidation_fence_init(gt, ifence, fence);
+		err = invalidation_fence_init(gt, ifence, fence, vma);
 		if (err) {
 			dma_fence_put(fence);
 			kfree(ifence);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 83f8c8a186d80..4fc8e24f93ce2 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3349,7 +3349,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 		if (xe_pt_zap_ptes(gt, vma)) {
 			gt_needs_invalidate |= BIT(id);
 			xe_device_wmb(xe);
-			seqno[id] = xe_gt_tlb_invalidation(gt, NULL);
+			seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma);
 			if (seqno[id] < 0)
 				return seqno[id];
 		}
-- 
GitLab


From 74a8b2c6e2d6f17fcd9977de298eff20a46b0af7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 25 Jan 2023 10:36:05 -0800
Subject: [PATCH 1309/2340] drm/xe: Propagate error from bind operations to
 async fence

If an bind operation fails we need to report it via the async fence.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4fc8e24f93ce2..8ba548e49add4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1641,6 +1641,7 @@ err_fences:
 
 struct async_op_fence {
 	struct dma_fence fence;
+	struct dma_fence *wait_fence;
 	struct dma_fence_cb cb;
 	struct xe_vm *vm;
 	wait_queue_head_t wq;
@@ -1668,8 +1669,10 @@ static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 	struct async_op_fence *afence =
 		container_of(cb, struct async_op_fence, cb);
 
+	afence->fence.error = afence->wait_fence->error;
 	dma_fence_signal(&afence->fence);
 	xe_vm_put(afence->vm);
+	dma_fence_put(afence->wait_fence);
 	dma_fence_put(&afence->fence);
 }
 
@@ -1685,13 +1688,17 @@ static void add_async_op_fence_cb(struct xe_vm *vm,
 		wake_up_all(&afence->wq);
 	}
 
+	afence->wait_fence = dma_fence_get(fence);
 	afence->vm = xe_vm_get(vm);
 	dma_fence_get(&afence->fence);
 	ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
-	if (ret == -ENOENT)
+	if (ret == -ENOENT) {
+		afence->fence.error = afence->wait_fence->error;
 		dma_fence_signal(&afence->fence);
+	}
 	if (ret) {
 		xe_vm_put(vm);
+		dma_fence_put(afence->wait_fence);
 		dma_fence_put(&afence->fence);
 	}
 	XE_WARN_ON(ret && ret != -ENOENT);
-- 
GitLab


From da3799c975726572066f1c6bc6a6f65cb1f01c84 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 30 Jan 2023 10:55:35 -0800
Subject: [PATCH 1310/2340] drm/xe: Use GuC to do GGTT invalidations for the
 GuC firmware

Only the GuC should be issuing TLB invalidations if it is enabled. Part
of this patch is sanitize the device on driver unload to ensure we do
not send GuC based TLB invalidations during driver unload.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c              | 14 +++++++
 drivers/gpu/drm/xe/xe_ggtt.c                | 12 +++++-
 drivers/gpu/drm/xe/xe_gt.c                  | 13 +++++++
 drivers/gpu/drm/xe/xe_gt.h                  |  1 +
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |  2 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 43 +++++++++++++++------
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |  7 ++--
 drivers/gpu/drm/xe/xe_guc.c                 |  2 +
 drivers/gpu/drm/xe/xe_guc_types.h           |  2 +
 drivers/gpu/drm/xe/xe_pt.c                  |  2 +-
 drivers/gpu/drm/xe/xe_uc.c                  |  9 ++++-
 drivers/gpu/drm/xe/xe_uc.h                  |  1 +
 drivers/gpu/drm/xe/xe_vm.c                  |  2 +-
 13 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 98f08cd9d4b01..8fe0324ccef35 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -215,6 +215,16 @@ err_put:
 	return ERR_PTR(err);
 }
 
+static void xe_device_sanitize(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_sanitize(gt);
+}
+
 int xe_device_probe(struct xe_device *xe)
 {
 	struct xe_gt *gt;
@@ -274,6 +284,10 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_debugfs_register(xe);
 
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
+	if (err)
+		return err;
+
 	return 0;
 
 err_irq_shutdown:
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index baa080cd11337..20450ed8400b1 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -13,6 +13,7 @@
 #include "xe_device.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_wopcm.h"
@@ -200,10 +201,17 @@ void xe_ggtt_invalidate(struct xe_gt *gt)
 	 * therefore flushing WC buffers.  Is that really true here?
 	 */
 	xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN);
-	if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
+
+	if (gt->uc.guc.submission_state.enabled) {
+		int seqno;
+
+		seqno = xe_gt_tlb_invalidation_guc(gt);
+		XE_WARN_ON(seqno <= 0);
+		if (seqno > 0)
+			xe_gt_tlb_invalidation_wait(gt, seqno);
+	} else if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
 		struct xe_device *xe = gt_to_xe(gt);
 
-		/* TODO: also use vfunc here */
 		if (xe->info.platform == XE_PVC) {
 			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg,
 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 28bbb3159531f..0e0d5cadb3e7d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -196,6 +196,15 @@ static int gt_ttm_mgr_init(struct xe_gt *gt)
 	return 0;
 }
 
+void xe_gt_sanitize(struct xe_gt *gt)
+{
+	/*
+	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
+	 * reload
+	 */
+	gt->uc.guc.submission_state.enabled = false;
+}
+
 static void gt_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_gt *gt = arg;
@@ -662,6 +671,8 @@ static int gt_reset(struct xe_gt *gt)
 
 	drm_info(&xe->drm, "GT reset started\n");
 
+	xe_gt_sanitize(gt);
+
 	xe_device_mem_access_get(gt_to_xe(gt));
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
@@ -742,6 +753,8 @@ int xe_gt_suspend(struct xe_gt *gt)
 	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
 		return -ENODEV;
 
+	xe_gt_sanitize(gt);
+
 	xe_device_mem_access_get(gt_to_xe(gt));
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 5dc08a993cfeb..5635f28031704 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -26,6 +26,7 @@ int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
 void xe_gt_migrate_wait(struct xe_gt *gt);
+void xe_gt_sanitize(struct xe_gt *gt);
 
 struct xe_gt *xe_find_full_gt(struct xe_gt *gt);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index e1a5a3a70c922..ce79eb48feb84 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -240,7 +240,7 @@ unlock_vm:
 		goto retry_userptr;
 
 	if (!ret) {
-		ret = xe_gt_tlb_invalidation(gt, NULL, vma);
+		ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma);
 		if (ret >= 0)
 			ret = 0;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 0b37cd09a59a2..f6a2dd26cad4f 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -135,8 +135,34 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	return ret;
 }
 
+#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+		XE_GUC_TLB_INVAL_FLUSH_CACHE)
+
 /**
- * xe_gt_tlb_invalidation - Issue a TLB invalidation on this GT
+ * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
+ * @gt: graphics tile
+ *
+ * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
+ * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_TLB_INVALIDATION,
+		0,  /* seqno, replaced in send_tlb_invalidation */
+		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
+	};
+
+	return send_tlb_invalidation(&gt->uc.guc, NULL, action,
+				     ARRAY_SIZE(action));
+}
+
+/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
  * completion, can be NULL
@@ -150,9 +176,9 @@ static int send_tlb_invalidation(struct xe_guc *guc,
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
-int xe_gt_tlb_invalidation(struct xe_gt *gt,
-			   struct xe_gt_tlb_invalidation_fence *fence,
-			   struct xe_vma *vma)
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN	7
@@ -161,12 +187,9 @@ int xe_gt_tlb_invalidation(struct xe_gt *gt,
 
 	XE_BUG_ON(!vma);
 
+	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
 	if (!xe->info.has_range_tlb_invalidation) {
-		action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
-		action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
-#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
-		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
-		XE_GUC_TLB_INVAL_FLUSH_CACHE)
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
 		u64 start = vma->start;
@@ -205,8 +228,6 @@ int xe_gt_tlb_invalidation(struct xe_gt *gt,
 		XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
 		XE_BUG_ON(!IS_ALIGNED(start, length));
 
-		action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
-		action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
 		action[len++] = vma->vm->usm.asid;
 		action[len++] = lower_32_bits(start);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index b4c4f717bc8ab..b333c1709397b 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -16,9 +16,10 @@ struct xe_vma;
 
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
-int xe_gt_tlb_invalidation(struct xe_gt *gt,
-			   struct xe_gt_tlb_invalidation_fence *fence,
-			   struct xe_vma *vma);
+int xe_gt_tlb_invalidation_guc(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 88a3a96da0845..5cdfdfd0de40f 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -309,6 +309,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 int xe_guc_post_load_init(struct xe_guc *guc)
 {
 	xe_guc_ads_populate_post_load(&guc->ads);
+	guc->submission_state.enabled = true;
 
 	return 0;
 }
@@ -795,6 +796,7 @@ void xe_guc_sanitize(struct xe_guc *guc)
 {
 	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
 	xe_guc_ct_disable(&guc->ct);
+	guc->submission_state.enabled = false;
 }
 
 int xe_guc_reset_prepare(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index c2a484282ef2c..ac7eec28934d0 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -60,6 +60,8 @@ struct xe_guc {
 			/** @patch: patch version of GuC submission */
 			u32 patch;
 		} version;
+		/** @enabled: submission is enabled */
+		bool enabled;
 	} submission_state;
 	/** @hwconfig: Hardware config state */
 	struct {
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index cde75708d8439..3333b413686eb 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1506,7 +1506,7 @@ static void invalidation_fence_work_func(struct work_struct *w)
 		container_of(w, struct invalidation_fence, work);
 
 	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-	xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma);
+	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
 }
 
 static int invalidation_fence_init(struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 938d146980030..7886c8b853972 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -88,10 +88,15 @@ static int uc_reset(struct xe_uc *uc)
 	return 0;
 }
 
-static int uc_sanitize(struct xe_uc *uc)
+void xe_uc_sanitize(struct xe_uc *uc)
 {
 	xe_huc_sanitize(&uc->huc);
 	xe_guc_sanitize(&uc->guc);
+}
+
+static int xe_uc_sanitize_reset(struct xe_uc *uc)
+{
+	xe_uc_sanitize(uc);
 
 	return uc_reset(uc);
 }
@@ -129,7 +134,7 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
 		return 0;
 
-	ret = uc_sanitize(uc);
+	ret = xe_uc_sanitize_reset(uc);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 380e722f95fce..d6efc9ef00d3d 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -17,5 +17,6 @@ void xe_uc_stop_prepare(struct xe_uc *uc);
 int xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
+void xe_uc_sanitize(struct xe_uc *uc);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 8ba548e49add4..4bbb0d0b09286 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3356,7 +3356,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 		if (xe_pt_zap_ptes(gt, vma)) {
 			gt_needs_invalidate |= BIT(id);
 			xe_device_wmb(xe);
-			seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma);
+			seqno[id] = xe_gt_tlb_invalidation_vma(gt, NULL, vma);
 			if (seqno[id] < 0)
 				return seqno[id];
 		}
-- 
GitLab


From c3ca5465564e7b6459e868b3433fff4e44a7fd64 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 25 Jan 2023 15:27:21 -0800
Subject: [PATCH 1311/2340] drm/xe: Lock GGTT on when restoring kernel BOs

Make lockdep happy as we required to hold the GGTT when calling
xe_ggtt_map_bo.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_bo_evict.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 7046dc203138f..3fb3c8c77efa9 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -147,8 +147,11 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 			return ret;
 		}
 
-		if (bo->flags & XE_BO_CREATE_GGTT_BIT)
+		if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+			mutex_lock(&bo->gt->mem.ggtt->lock);
 			xe_ggtt_map_bo(bo->gt->mem.ggtt, bo);
+			mutex_unlock(&bo->gt->mem.ggtt->lock);
+		}
 
 		/*
 		 * We expect validate to trigger a move VRAM and our move code
-- 
GitLab


From bae8ddae1881f645d679cd8189de995c26e9d694 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 26 Jan 2023 09:54:20 -0800
Subject: [PATCH 1312/2340] drm/xe: Propagate VM unbind error to invalidation
 fence

If a VM unbind hits an error, do not issue a TLB invalidation and
propagate the error the invalidation fence.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 3333b413686eb..4299689fe6a8f 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1496,7 +1496,13 @@ static void invalidation_fence_cb(struct dma_fence *fence,
 		container_of(cb, struct invalidation_fence, cb);
 
 	trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
-	queue_work(system_wq, &ifence->work);
+	if (!ifence->fence->error) {
+		queue_work(system_wq, &ifence->work);
+	} else {
+		ifence->base.base.error = ifence->fence->error;
+		dma_fence_signal(&ifence->base.base);
+		dma_fence_put(&ifence->base.base);
+	}
 	dma_fence_put(ifence->fence);
 }
 
-- 
GitLab


From 9f9f09d4071685855d43a77c8799578d26ba3f24 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 26 Jan 2023 10:05:53 -0800
Subject: [PATCH 1313/2340] drm/xe: Signal invalidation fence immediately if CT
 send fails

This means we are in the middle of a GT reset and no need to do TLB
invalidation so just signal invalidation fence immediately.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 23 +++++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f6a2dd26cad4f..2521c8a65690b 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -69,6 +69,15 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 	return 0;
 }
 
+static void
+invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+	trace_xe_gt_tlb_invalidation_fence_signal(fence);
+	list_del(&fence->link);
+	dma_fence_signal(&fence->base);
+	dma_fence_put(&fence->base);
+}
+
 /**
  * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
  * @gt: graphics tile
@@ -83,11 +92,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 
 	mutex_lock(&gt->uc.guc.ct.lock);
 	list_for_each_entry_safe(fence, next,
-				 &gt->tlb_invalidation.pending_fences, link) {
-		list_del(&fence->link);
-		dma_fence_signal(&fence->base);
-		dma_fence_put(&fence->base);
-	}
+				 &gt->tlb_invalidation.pending_fences, link)
+		invalidation_fence_signal(fence);
 	mutex_unlock(&gt->uc.guc.ct.lock);
 }
 
@@ -130,6 +136,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	}
 	if (!ret)
 		ret = seqno;
+	if (ret < 0 && fence)
+		invalidation_fence_signal(fence);
 	mutex_unlock(&guc->ct.lock);
 
 	return ret;
@@ -321,16 +329,13 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	if (fence)
 		trace_xe_gt_tlb_invalidation_fence_recv(fence);
 	if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
-		trace_xe_gt_tlb_invalidation_fence_signal(fence);
-		list_del(&fence->link);
+		invalidation_fence_signal(fence);
 		if (!list_empty(&gt->tlb_invalidation.pending_fences))
 			mod_delayed_work(system_wq,
 					 &gt->tlb_invalidation.fence_tdr,
 					 TLB_TIMEOUT);
 		else
 			cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
-		dma_fence_signal(&fence->base);
-		dma_fence_put(&fence->base);
 	}
 
 	return 0;
-- 
GitLab


From 5669899e9b3c3f38252902141483f5a09c8eedd3 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 27 Jan 2023 12:53:14 -0800
Subject: [PATCH 1314/2340] drm/xe: Add has_asid to device info

Rather than alias supports_usm to ASIS support, add an explicit
variable to indicate ASID support.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_lrc.c          | 4 ++--
 drivers/gpu/drm/xe/xe_pci.c          | 3 +++
 drivers/gpu/drm/xe/xe_vm.c           | 4 ++--
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index ef723b08de89e..4c4a912141a97 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -81,6 +81,8 @@ struct xe_device {
 		u8 media_ver;
 		/** @supports_usm: Supports unified shared memory */
 		bool supports_usm;
+		/** @has_asid: Has address space ID */
+		bool has_asid;
 		/** @enable_guc: GuC submission enabled */
 		bool enable_guc;
 		/** @has_flat_ccs: Whether flat CCS metadata is used */
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 056c2c5a0b818..347ff9b344942 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -682,14 +682,14 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
 			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
-	if (xe->info.supports_usm && vm) {
+	if (xe->info.has_asid && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
 				     (e->usm.acc_granularity <<
 				      ACC_GRANULARITY_S) | vm->usm.asid);
+	if (xe->info.supports_usm && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
 				     (e->usm.acc_notify << ACC_NOTIFY_S) |
 				     e->usm.acc_trigger);
-	}
 
 	lrc->desc = GEN8_CTX_VALID;
 	lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 2482ce8e3df43..c159306e04cf4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -71,6 +71,7 @@ struct xe_device_desc {
 	bool has_flat_ccs;
 	bool has_4tile;
 	bool has_range_tlb_invalidation;
+	bool has_asid;
 };
 
 #define PLATFORM(x)		\
@@ -225,6 +226,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.max_tiles = 2,
 	.vm_max_level = 4,
 	.supports_usm = true,
+	.has_asid = true,
 };
 
 #define MTL_MEDIA_ENGINES \
@@ -408,6 +410,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	xe->info.vm_max_level = desc->vm_max_level;
 	xe->info.media_ver = desc->media_ver;
 	xe->info.supports_usm = desc->supports_usm;
+	xe->info.has_asid = desc->has_asid;
 	xe->info.has_flat_ccs = desc->has_flat_ccs;
 	xe->info.has_4tile = desc->has_4tile;
 	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4bbb0d0b09286..04481851fa005 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1429,7 +1429,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 		xe_device_mem_access_put(xe);
 		xe_pm_runtime_put(xe);
 
-		if (xe->info.supports_usm) {
+		if (xe->info.has_asid) {
 			mutex_lock(&xe->usm.lock);
 			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
 			XE_WARN_ON(lookup != vm);
@@ -1926,7 +1926,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		return err;
 	}
 
-	if (xe->info.supports_usm) {
+	if (xe->info.has_asid) {
 		mutex_lock(&xe->usm.lock);
 		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
 				      XA_LIMIT(0, XE_MAX_ASID - 1),
-- 
GitLab


From 5387e865d90e927ba0af9d37855c9bd47cc9d00a Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 27 Jan 2023 13:00:28 -0800
Subject: [PATCH 1315/2340] drm/xe: Add TLB invalidation fence after rebinds
 issued from execs

If we add an TLB invalidation fence for rebinds issued from execs we
should be able to drop the TLB invalidation from the ring operations.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 200 ++++++++++++++++++++-----------------
 1 file changed, 110 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 4299689fe6a8f..23f308184ba1d 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1156,6 +1156,96 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
 	.pre_commit = xe_pt_userptr_pre_commit,
 };
 
+struct invalidation_fence {
+	struct xe_gt_tlb_invalidation_fence base;
+	struct xe_gt *gt;
+	struct xe_vma *vma;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct work_struct work;
+};
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+	.get_driver_name = invalidation_fence_get_driver_name,
+	.get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+static void invalidation_fence_cb(struct dma_fence *fence,
+				  struct dma_fence_cb *cb)
+{
+	struct invalidation_fence *ifence =
+		container_of(cb, struct invalidation_fence, cb);
+
+	trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
+	if (!ifence->fence->error) {
+		queue_work(system_wq, &ifence->work);
+	} else {
+		ifence->base.base.error = ifence->fence->error;
+		dma_fence_signal(&ifence->base.base);
+		dma_fence_put(&ifence->base.base);
+	}
+	dma_fence_put(ifence->fence);
+}
+
+static void invalidation_fence_work_func(struct work_struct *w)
+{
+	struct invalidation_fence *ifence =
+		container_of(w, struct invalidation_fence, work);
+
+	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
+	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+}
+
+static int invalidation_fence_init(struct xe_gt *gt,
+				   struct invalidation_fence *ifence,
+				   struct dma_fence *fence,
+				   struct xe_vma *vma)
+{
+	int ret;
+
+	trace_xe_gt_tlb_invalidation_fence_create(&ifence->base);
+
+	spin_lock_irq(&gt->tlb_invalidation.lock);
+	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
+		       &gt->tlb_invalidation.lock,
+		       gt->tlb_invalidation.fence_context,
+		       ++gt->tlb_invalidation.fence_seqno);
+	spin_unlock_irq(&gt->tlb_invalidation.lock);
+
+	INIT_LIST_HEAD(&ifence->base.link);
+
+	dma_fence_get(&ifence->base.base);	/* Ref for caller */
+	ifence->fence = fence;
+	ifence->gt = gt;
+	ifence->vma = vma;
+
+	INIT_WORK(&ifence->work, invalidation_fence_work_func);
+	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
+	if (ret == -ENOENT) {
+		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
+		invalidation_fence_work_func(&ifence->work);
+	} else if (ret) {
+		dma_fence_put(&ifence->base.base);	/* Caller ref */
+		dma_fence_put(&ifence->base.base);	/* Creation ref */
+	}
+
+	XE_WARN_ON(ret && ret != -ENOENT);
+
+	return ret && ret != -ENOENT ? ret : 0;
+}
+
 /**
  * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
  * address range.
@@ -1194,6 +1284,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 	struct xe_vm *vm = vma->vm;
 	u32 num_entries;
 	struct dma_fence *fence;
+	struct invalidation_fence *ifence = NULL;
 	int err;
 
 	bind_pt_update.locked = false;
@@ -1212,6 +1303,12 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 
 	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
 
+	if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
+		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+		if (!ifence)
+			return ERR_PTR(-ENOMEM);
+	}
+
 	fence = xe_migrate_update_pgtables(gt->migrate,
 					   vm, vma->bo,
 					   e ? e : vm->eng[gt->info.id],
@@ -1221,6 +1318,18 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 	if (!IS_ERR(fence)) {
 		LLIST_HEAD(deferred);
 
+		/* TLB invalidation must be done before signaling rebind */
+		if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
+			int err = invalidation_fence_init(gt, ifence, fence,
+							  vma);
+			if (err) {
+				dma_fence_put(fence);
+				kfree(ifence);
+				return ERR_PTR(err);
+			}
+			fence = &ifence->base.base;
+		}
+
 		/* add shared fence now for pagetable delayed destroy */
 		dma_resv_add_fence(&vm->resv, fence, !rebind &&
 				   vma->last_munmap_rebind ?
@@ -1246,6 +1355,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 			queue_work(vm->xe->ordered_wq,
 				   &vm->preempt.rebind_work);
 	} else {
+		kfree(ifence);
 		if (bind_pt_update.locked)
 			up_read(&vm->userptr.notifier_lock);
 		xe_pt_abort_bind(vma, entries, num_entries);
@@ -1463,96 +1573,6 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
 	.pre_commit = xe_pt_userptr_pre_commit,
 };
 
-struct invalidation_fence {
-	struct xe_gt_tlb_invalidation_fence base;
-	struct xe_gt *gt;
-	struct xe_vma *vma;
-	struct dma_fence *fence;
-	struct dma_fence_cb cb;
-	struct work_struct work;
-};
-
-static const char *
-invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
-{
-	return "xe";
-}
-
-static const char *
-invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
-{
-	return "invalidation_fence";
-}
-
-static const struct dma_fence_ops invalidation_fence_ops = {
-	.get_driver_name = invalidation_fence_get_driver_name,
-	.get_timeline_name = invalidation_fence_get_timeline_name,
-};
-
-static void invalidation_fence_cb(struct dma_fence *fence,
-				  struct dma_fence_cb *cb)
-{
-	struct invalidation_fence *ifence =
-		container_of(cb, struct invalidation_fence, cb);
-
-	trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
-	if (!ifence->fence->error) {
-		queue_work(system_wq, &ifence->work);
-	} else {
-		ifence->base.base.error = ifence->fence->error;
-		dma_fence_signal(&ifence->base.base);
-		dma_fence_put(&ifence->base.base);
-	}
-	dma_fence_put(ifence->fence);
-}
-
-static void invalidation_fence_work_func(struct work_struct *w)
-{
-	struct invalidation_fence *ifence =
-		container_of(w, struct invalidation_fence, work);
-
-	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
-}
-
-static int invalidation_fence_init(struct xe_gt *gt,
-				   struct invalidation_fence *ifence,
-				   struct dma_fence *fence,
-				   struct xe_vma *vma)
-{
-	int ret;
-
-	trace_xe_gt_tlb_invalidation_fence_create(&ifence->base);
-
-	spin_lock_irq(&gt->tlb_invalidation.lock);
-	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
-		       &gt->tlb_invalidation.lock,
-		       gt->tlb_invalidation.fence_context,
-		       ++gt->tlb_invalidation.fence_seqno);
-	spin_unlock_irq(&gt->tlb_invalidation.lock);
-
-	INIT_LIST_HEAD(&ifence->base.link);
-
-	dma_fence_get(&ifence->base.base);	/* Ref for caller */
-	ifence->fence = fence;
-	ifence->gt = gt;
-	ifence->vma = vma;
-
-	INIT_WORK(&ifence->work, invalidation_fence_work_func);
-	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
-	if (ret == -ENOENT) {
-		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
-		invalidation_fence_work_func(&ifence->work);
-	} else if (ret) {
-		dma_fence_put(&ifence->base.base);	/* Caller ref */
-		dma_fence_put(&ifence->base.base);	/* Creation ref */
-	}
-
-	XE_WARN_ON(ret && ret != -ENOENT);
-
-	return ret && ret != -ENOENT ? ret : 0;
-}
-
 /**
  * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
  * address range.
-- 
GitLab


From 50a48cca608102a53a0961bd95aefb53a8ced3ab Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 26 Jan 2023 10:40:41 -0800
Subject: [PATCH 1316/2340] drm/xe: Drop TLB invalidation from ring operations

Now that we issue TLB invalidations on unbinds and rebind from execs we
no longer need to issue TLB invalidations from the ring operations.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 40 +-------------------------------
 1 file changed, 1 insertion(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index fda7978a63e04..54db4ca19a36e 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -83,31 +83,6 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
 	return i;
 }
 
-static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
-{
-	u32 flags = PIPE_CONTROL_CS_STALL |
-		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
-		PIPE_CONTROL_TLB_INVALIDATE |
-		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
-		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-		PIPE_CONTROL_VF_CACHE_INVALIDATE |
-		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
-		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
-		PIPE_CONTROL_QW_WRITE |
-		PIPE_CONTROL_STORE_DATA_INDEX;
-
-	flags &= ~mask_flags;
-
-	dw[i++] = GFX_OP_PIPE_CONTROL(6);
-	dw[i++] = flags;
-	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
-	dw[i++] = 0;
-	dw[i++] = 0;
-	dw[i++] = 0;
-
-	return i;
-}
-
 #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
 
 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
@@ -148,11 +123,6 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
 
-	/* XXX: Conditional flushing possible */
-	dw[i++] = preparser_disable(true);
-	i = emit_flush_invalidate(0, dw, i);
-	dw[i++] = preparser_disable(false);
-
 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
 				seqno, dw, i);
 
@@ -181,9 +151,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	struct xe_device *xe = gt_to_xe(gt);
 	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
 
-	/* XXX: Conditional flushing possible */
 	dw[i++] = preparser_disable(true);
-	i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i);
 	/* Wa_1809175790 */
 	if (!xe->info.has_flat_ccs) {
 		if (decode)
@@ -244,15 +212,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	struct xe_gt *gt = job->engine->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	bool pvc = xe->info.platform == XE_PVC;
-	u32 mask_flags = 0;
 
-	/* XXX: Conditional flushing possible */
 	dw[i++] = preparser_disable(true);
-	if (pvc)
-		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
-	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
-		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
-	i = emit_pipe_invalidate(mask_flags, dw, i);
 	/* Wa_1809175790 */
 	if (!xe->info.has_flat_ccs)
 		i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i);
@@ -287,6 +248,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 
 	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
 
+	/* XXX: Do we need this? Leaving for now. */
 	dw[i++] = preparser_disable(true);
 	i = emit_flush_invalidate(0, dw, i);
 	dw[i++] = preparser_disable(false);
-- 
GitLab


From 77775e24e684c761d44ba2f804581c0c42e0ad38 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 31 Jan 2023 23:36:39 +0100
Subject: [PATCH 1317/2340] drm/xe: Add debugfs for dumping GGTT mappings

Adding a debugfs dump of GGTT was useful for some debugging I did,
and easy to add. Might be useful for others too.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c       | 13 +++++++++++++
 drivers/gpu/drm/xe/xe_ggtt.h       |  4 ++++
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
 3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 20450ed8400b1..907a603572b2b 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -345,3 +345,16 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 
 	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
 }
+
+int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&ggtt->lock);
+	if (err)
+		return err;
+
+	drm_mm_print(&ggtt->mm, p);
+	mutex_unlock(&ggtt->lock);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index ab9cfdab5cca1..3469aa2b1a02e 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -8,6 +8,8 @@
 
 #include "xe_ggtt_types.h"
 
+struct drm_printer;
+
 u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset);
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
 void xe_ggtt_invalidate(struct xe_gt *gt);
@@ -26,4 +28,6 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs);
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 
+int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index daae42d3ab3bb..c320e58810ce3 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -8,6 +8,7 @@
 
 #include "xe_device.h"
 #include "xe_force_wake.h"
+#include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_gt_debugfs.h"
 #include "xe_gt_mcr.h"
@@ -88,12 +89,21 @@ static int steering(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int ggtt(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	return xe_ggtt_dump(gt->mem.ggtt, &p);
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
 	{"sa_info", sa_info, 0},
 	{"topology", topology, 0},
 	{"steering", steering, 0},
+	{"ggtt", ggtt, 0},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
-- 
GitLab


From 62421b45d431dc6f023334800eae1bffb1e77eb2 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 20 Jan 2023 16:59:09 -0800
Subject: [PATCH 1318/2340] drm/xe: Fix typo in MCR documentation

Add missing "multicast" word and adapt/wrap the rest of the sentence.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index ddce2c41c7f51..7c97031cd716c 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -23,12 +23,12 @@
  *
  * MMIO accesses to MCR registers are controlled according to the settings
  * programmed in the platform's MCR_SELECTOR register(s).  MMIO writes to MCR
- * registers can be done in either a (i.e., a single write updates all
+ * registers can be done in either multicast (a single write updates all
  * instances of the register to the same value) or unicast (a write updates only
- * one specific instance).  Reads of MCR registers always operate in a unicast
- * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR.
- * Selection of a specific MCR instance for unicast operations is referred to
- * as "steering."
+ * one specific instance) form.  Reads of MCR registers always operate in a
+ * unicast manner regardless of how the multicast/unicast bit is set in
+ * MCR_SELECTOR.  Selection of a specific MCR instance for unicast operations is
+ * referred to as "steering."
  *
  * If MCR register operations are steered toward a hardware unit that is
  * fused off or currently powered down due to power gating, the MMIO operation
-- 
GitLab


From b799aa5a04d09c4b3abe79b1c6563d54823410e6 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 25 Jan 2023 14:14:38 -0800
Subject: [PATCH 1319/2340] drm/xe: Fix xe_tuning include

xe_tuning.c should include xe_tuning.h, not xe_wa.h

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_tuning.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index e043db0373687..96b16b8d03cf0 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -3,7 +3,7 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include "xe_wa.h"
+#include "xe_tuning.h"
 
 #include "xe_platform_types.h"
 #include "xe_gt_types.h"
-- 
GitLab


From 2679be71f1372e8fac07d1be5443a5ba26b27345 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 23 Jan 2023 09:38:27 -0800
Subject: [PATCH 1320/2340] drm/xe: Remove TODO from rtp infra

The function pointer is already present as match_func, inside
struct xe_rtp_rule and handled as so instead of inside rtp_regval as
originally thought out when this was written.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp_types.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index b55b556a24958..630a2ec53fc6f 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -24,10 +24,6 @@ enum {
 struct xe_rtp_regval {
 	/** @reg: Register */
 	u32		reg;
-	/*
-	 * TODO: maybe we need a union here with a func pointer for cases
-	 * that are too specific to be generalized
-	 */
 	/** @clr_bits: bits to clear when updating register */
 	u32		clr_bits;
 	/** @set_bits: bits to set when updating register */
-- 
GitLab


From 43f98df1f5f0ef94d79ba2ef4f841a3f547f7a04 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 25 Jan 2023 13:10:24 -0800
Subject: [PATCH 1321/2340] drm/xe: Remove TODO from workaround documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LRC workarounds are already implemented: remove leftover TODO.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index b56141ba7145d..699a39fb786a2 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -35,9 +35,6 @@
  *   creation to have a "primed golden context", i.e. a context image that
  *   already contains the changes needed to all the registers.
  *
- *   TODO: Although these workarounds are maintained here, they are not
- *   currently being applied.
- *
  * - Engine workarounds: the list of these WAs is applied whenever the specific
  *   engine is reset. It's also possible that a set of engine classes share a
  *   common power domain and they are reset together. This happens on some
-- 
GitLab


From 3319b213d7c8bdeaa001fec7b60aefa2390112d4 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 30 Jan 2023 14:14:37 -0800
Subject: [PATCH 1322/2340] drm/xe/mcr: Use designated init for
 xe_steering_types

There is already a BUILD_BUG_ON() check to make sure the size follow the
number of steering types. Also make sure the right index is being used
for each steering type.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 7c97031cd716c..d7c89f7b56e2a 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -249,12 +249,12 @@ static const struct {
 	const char *name;
 	void (*init)(struct xe_gt *);
 } xe_steering_types[] = {
-	{ "L3BANK",	init_steering_l3bank },
-	{ "MSLICE",	init_steering_mslice },
-	{ "LNCF",	NULL },		/* initialized by mslice init */
-	{ "DSS",	init_steering_dss },
-	{ "OADDRM",	init_steering_oaddrm },
-	{ "INSTANCE 0",	init_steering_inst0 },
+	[L3BANK] =	{ "L3BANK",	init_steering_l3bank },
+	[MSLICE] =	{ "MSLICE",	init_steering_mslice },
+	[LNCF] =	{ "LNCF",	NULL }, /* initialized by mslice init */
+	[DSS] =		{ "DSS",	init_steering_dss },
+	[OADDRM] =	{ "OADDRM",	init_steering_oaddrm },
+	[INSTANCE0] =	{ "INSTANCE 0",	init_steering_inst0 },
 };
 
 void xe_gt_mcr_init(struct xe_gt *gt)
-- 
GitLab


From 564d64f83de9759c1faa4a64ee4aed8465281ecb Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 30 Jan 2023 17:08:37 -0800
Subject: [PATCH 1323/2340] drm/xe/mcr: Add SQIDI steering for DG2

Like detailed in commit 927dfdd09d8c ("drm/i915/dg2: Add SQIDI
steering"), some registers are expected to have the selector
initialized just once and never set to anything else. For xe, the
registers with SQIDI replication type (SF and MCFG) were missing,
resulting in warnings like:

	[  410.685565] xe 0000:03:00.0: Did not find MCR register 0x8724 in any MCR steering table

While adding these registers, abstract the handling for
"dg2_gam_ranges", moving them together with SF/MCFG to a dedicated
table. This also avoids that range to be checked for platforms other
than DG2. For DG2, this is the new steering output:

	# cat /sys/kernel/debug/dri/0/gt0/steering
	...
	IMPLICIT steering: group=0x0, instance=0x0
		0x000b00 - 0x000bff
		0x001000 - 0x001fff
		0x004000 - 0x004aff
		0x008700 - 0x0087ff
		0x00c800 - 0x00cfff
		0x00f000 - 0x00ffff

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c       |  2 +
 drivers/gpu/drm/xe/xe_gt_mcr.c   | 80 +++++++++++++++++++-------------
 drivers/gpu/drm/xe/xe_gt_mcr.h   |  2 +
 drivers/gpu/drm/xe/xe_gt_types.h |  7 +++
 4 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0e0d5cadb3e7d..20dbc08d36855 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -500,6 +500,7 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_hw_fence_irq;
 
+	xe_gt_mcr_set_implicit_defaults(gt);
 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
 
 	err = xe_gt_clock_init(gt);
@@ -633,6 +634,7 @@ static int do_gt_restart(struct xe_gt *gt)
 
 	setup_private_ppat(gt);
 
+	xe_gt_mcr_set_implicit_defaults(gt);
 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
 
 	err = xe_wopcm_init(&gt->uc.wopcm);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index d7c89f7b56e2a..bb71071c3435f 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -155,15 +155,13 @@ static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
 	{},
 };
 
-/*
- * DG2 GAM registers are a special case; this table is checked directly in
- * xe_gt_mcr_get_nonterminated_steering and is not hooked up via
- * gt->steering[].
- */
-static const struct xe_mmio_range dg2_gam_ranges[] = {
-	{ 0x004000, 0x004AFF },
-	{ 0x00C800, 0x00CFFF },
-	{ 0x00F000, 0x00FFFF },
+static const struct xe_mmio_range dg2_implicit_steering_table[] = {
+	{ 0x000B00, 0x000BFF },		/* SF (SQIDI replication) */
+	{ 0x001000, 0x001FFF },		/* SF (SQIDI replication) */
+	{ 0x004000, 0x004AFF },		/* GAM (MSLICE replication) */
+	{ 0x008700, 0x0087FF },		/* MCFG (SQIDI replication) */
+	{ 0x00C800, 0x00CFFF },		/* GAM (MSLICE replication) */
+	{ 0x00F000, 0x00FFFF },		/* GAM (MSLICE replication) */
 	{},
 };
 
@@ -255,12 +253,14 @@ static const struct {
 	[DSS] =		{ "DSS",	init_steering_dss },
 	[OADDRM] =	{ "OADDRM",	init_steering_oaddrm },
 	[INSTANCE0] =	{ "INSTANCE 0",	init_steering_inst0 },
+	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
 };
 
 void xe_gt_mcr_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
+	BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
 	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
 
 	spin_lock_init(&gt->mcr_lock);
@@ -280,6 +280,7 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 		gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
 		gt->steering[LNCF].ranges = xehp_lncf_steering_table;
 		gt->steering[DSS].ranges = xehp_dss_steering_table;
+		gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table;
 	} else {
 		gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
 		gt->steering[DSS].ranges = xelp_dss_steering_table;
@@ -291,6 +292,33 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 			xe_steering_types[i].init(gt);
 }
 
+/**
+ * xe_gt_mcr_set_implicit_defaults - Initialize steer control registers
+ * @gt: GT structure
+ *
+ * Some register ranges don't need to have their steering control registers
+ * changed on each access - it's sufficient to set them once on initialization.
+ * This function sets those registers for each platform *
+ */
+void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_DG2) {
+		u32 steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, 0) |
+			REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, 2);
+
+		xe_mmio_write32(gt, MCFG_MCR_SELECTOR.reg, steer_val);
+		xe_mmio_write32(gt, SF_MCR_SELECTOR.reg, steer_val);
+		/*
+		 * For GAM registers, all reads should be directed to instance 1
+		 * (unicast reads against other instances are not allowed),
+		 * and instance 1 is already the hardware's default steering
+		 * target, which we never change
+		 */
+	}
+}
+
 /*
  * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
  *    will steer a register to a non-terminated instance
@@ -305,14 +333,15 @@ void xe_gt_mcr_init(struct xe_gt *gt)
  * steering.
  *
  * Returns true if the caller should steer to the @group/@instance values
- * returned.  Returns false if the caller need not perform any steering (i.e.,
- * the DG2 GAM range special case).
+ * returned.  Returns false if the caller need not perform any steering
  */
 static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 						 i915_mcr_reg_t reg,
 						 u8 *group, u8 *instance)
 {
-	for (int type = 0; type < NUM_STEERING_TYPES; type++) {
+	const struct xe_mmio_range *implicit_ranges;
+
+	for (int type = 0; type < IMPLICIT_STEERING; type++) {
 		if (!gt->steering[type].ranges)
 			continue;
 
@@ -325,27 +354,15 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 		}
 	}
 
-	/*
-	 * All MCR registers should usually be part of one of the steering
-	 * ranges we're tracking.  However there's one special case:  DG2
-	 * GAM registers are technically multicast registers, but are special
-	 * in a number of ways:
-	 *  - they have their own dedicated steering control register (they
-	 *    don't share 0xFDC with other MCR classes)
-	 *  - all reads should be directed to instance 1 (unicast reads against
-	 *    other instances are not allowed), and instance 1 is already the
-	 *    the hardware's default steering target, which we never change
-	 *
-	 * Ultimately this means that we can just treat them as if they were
-	 * unicast registers and all operations will work properly.
-	 */
-	for (int i = 0; dg2_gam_ranges[i].end > 0; i++)
-		if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg))
-			return false;
+	implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
+	if (implicit_ranges)
+		for (int i = 0; implicit_ranges[i].end > 0; i++)
+			if (xe_mmio_in_range(&implicit_ranges[i], reg.reg))
+				return false;
 
 	/*
-	 * Not found in a steering table and not a DG2 GAM register?  We'll
-	 * just steer to 0/0 as a guess and raise a warning.
+	 * Not found in a steering table and not a register with implicit
+	 * steering. Just steer to 0/0 as a guess and raise a warning.
 	 */
 	drm_WARN(&gt_to_xe(gt)->drm, true,
 		 "Did not find MCR register %#x in any MCR steering table\n",
@@ -467,7 +484,6 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
 					   group, instance, 0);
 		mcr_unlock(gt);
 	} else {
-		/* DG2 GAM special case rules; treat as if unicast */
 		val = xe_mmio_read32(gt, reg.reg);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index 62ec6eb654a0b..c31987d2177c9 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -13,6 +13,8 @@ struct xe_gt;
 
 void xe_gt_mcr_init(struct xe_gt *gt);
 
+void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt);
+
 u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg,
 			   int group, int instance);
 u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a40fab262ac9e..b01edd3fdc4d5 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -66,6 +66,13 @@ enum xe_steering_type {
 	 */
 	INSTANCE0,
 
+	/*
+	 * Register ranges that don't need special steering for each register:
+	 * it's sufficient to keep the HW-default for the selector, or only
+	 * change it once, on GT initialization. This needs to be the last
+	 * steering type.
+	 */
+	IMPLICIT_STEERING,
 	NUM_STEERING_TYPES
 };
 
-- 
GitLab


From 3747c88428a199620ca626a196781516c6da12e6 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 25 Jan 2023 15:03:07 -0800
Subject: [PATCH 1324/2340] drm/xe: Rename xe_rtp_regval to xe_rtp_action

It's true that the struct records the register and the value (in form of
2 masks) to restore, but it also records more fields important to
the application of workarounds/tuning, etc. One important part is what
is the macro used to record these fields: SET/CLR/WR/FIELD_SET/etc.

Thinking of the table as a set of rules + actions is more intuitive than
rules + regval.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c       | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_rtp.h       | 26 +++++++++++++-------------
 drivers/gpu/drm/xe/xe_rtp_types.h |  9 ++++++---
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 9e8d0e43c6438..d3484b906d4ac 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -91,13 +91,13 @@ static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
 			     u32 mmio_base,
 			     struct xe_reg_sr *sr)
 {
-	u32 reg = entry->regval.reg + mmio_base;
+	u32 reg = entry->action.reg + mmio_base;
 	struct xe_reg_sr_entry sr_entry = {
-		.clr_bits = entry->regval.clr_bits,
-		.set_bits = entry->regval.set_bits,
-		.read_mask = entry->regval.read_mask,
-		.masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG,
-		.reg_type = entry->regval.reg_type,
+		.clr_bits = entry->action.clr_bits,
+		.set_bits = entry->action.set_bits,
+		.read_mask = entry->action.read_mask,
+		.masked_reg = entry->action.flags & XE_RTP_FLAG_MASKED_REG,
+		.reg_type = entry->action.reg_type,
 	};
 
 	xe_reg_sr_add(sr, reg, &sr_entry);
@@ -124,7 +124,7 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 	for (entry = entries; entry && entry->name; entry++) {
 		u32 mmio_base = 0;
 
-		if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) {
+		if (entry->action.flags & XE_RTP_FLAG_FOREACH_ENGINE) {
 			struct xe_hw_engine *each_hwe;
 			enum xe_hw_engine_id id;
 
@@ -135,7 +135,7 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 					rtp_add_sr_entry(entry, gt, mmio_base, sr);
 			}
 		} else if (rule_matches(gt, hwe, entry)) {
-			if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE)
+			if (entry->action.flags & XE_RTP_FLAG_ENGINE_BASE)
 				mmio_base = hwe->mmio_base;
 
 			rtp_add_sr_entry(entry, gt, mmio_base, sr);
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index d4e11fdde77fd..d86c6ba92b03b 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -199,21 +199,21 @@ struct xe_reg_sr;
  * XE_RTP_WR - Helper to write a value to the register, overriding all the bits
  * @reg_: Register
  * @val_: Value to set
- * @...: Additional fields to override in the struct xe_rtp_regval entry
+ * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * The correspondent notation in bspec is:
  *
  *	REGNAME = VALUE
  */
 #define XE_RTP_WR(reg_, val_, ...)						\
-	.regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_),		\
+	.action = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_),		\
 		    .read_mask = (~0u), ##__VA_ARGS__ }
 
 /**
  * XE_RTP_SET - Set bits from @val_ in the register.
  * @reg_: Register
  * @val_: Bits to set in the register
- * @...: Additional fields to override in the struct xe_rtp_regval entry
+ * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * For masked registers this translates to a single write, while for other
  * registers it's a RMW. The correspondent bspec notation is (example for bits 2
@@ -223,14 +223,14 @@ struct xe_reg_sr;
  *	REGNAME[5] = 1
  */
 #define XE_RTP_SET(reg_, val_, ...)						\
-	.regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_),	\
+	.action = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_),	\
 		    .read_mask = (val_), ##__VA_ARGS__ }
 
 /**
  * XE_RTP_CLR: Clear bits from @val_ in the register.
  * @reg_: Register
  * @val_: Bits to clear in the register
- * @...: Additional fields to override in the struct xe_rtp_regval entry
+ * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * For masked registers this translates to a single write, while for other
  * registers it's a RMW. The correspondent bspec notation is (example for bits 2
@@ -240,7 +240,7 @@ struct xe_reg_sr;
  *	REGNAME[5] = 0
  */
 #define XE_RTP_CLR(reg_, val_, ...)						\
-	.regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0,		\
+	.action = { .reg = reg_, .clr_bits = (val_), .set_bits = 0,		\
 		    .read_mask = (val_), ##__VA_ARGS__ }
 
 /**
@@ -248,7 +248,7 @@ struct xe_reg_sr;
  * @reg_: Register
  * @mask_bits_: Mask of bits to be changed in the register, forming a field
  * @val_: Value to set in the field denoted by @mask_bits_
- * @...: Additional fields to override in the struct xe_rtp_regval entry
+ * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * For masked registers this translates to a single write, while for other
  * registers it's a RMW. The correspondent bspec notation is:
@@ -256,25 +256,25 @@ struct xe_reg_sr;
  *	REGNAME[<end>:<start>] = VALUE
  */
 #define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...)				\
-	.regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+	.action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
 		    .read_mask = (mask_bits_), ##__VA_ARGS__ }
 
 #define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)		\
-	.regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+	.action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
 		    .read_mask = 0, ##__VA_ARGS__ }
 
 /**
  * XE_WHITELIST_REGISTER - Add register to userspace whitelist
  * @reg_: Register
  * @flags_: Whitelist-specific flags to set
- * @...: Additional fields to override in the struct xe_rtp_regval entry
+ * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * Add a register to the whitelist, allowing userspace to modify the ster with
  * regular user privileges.
  */
 #define XE_WHITELIST_REGISTER(reg_, flags_, ...)				\
 	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
-	.regval = { .reg = reg_, .set_bits = (flags_),			\
+	.action = { .reg = reg_, .set_bits = (flags_),			\
 		    .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,		\
 		    ##__VA_ARGS__ }
 
@@ -287,12 +287,12 @@ struct xe_reg_sr;
 #define XE_RTP_NAME(s_)	.name = (s_)
 
 /**
- * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry
+ * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_action entry
  * @f1_: Last part of a ``XE_RTP_FLAG_*``
  * @...: Additional flags, defined like @f1_
  *
  * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be
- * easily used to define struct xe_rtp_regval entries. Example:
+ * easily used to define struct xe_rtp_action entries. Example:
  *
  * .. code-block:: c
  *
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 630a2ec53fc6f..9cd722f310cbc 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -19,9 +19,12 @@ enum {
 };
 
 /**
- * struct xe_rtp_regval - register and value for rtp table
+ * struct xe_rtp_action - action to take for any matching rule
+ *
+ * This struct records what action should be taken in a register that has a
+ * matching rule. Example of actions: set/clear bits.
  */
-struct xe_rtp_regval {
+struct xe_rtp_action {
 	/** @reg: Register */
 	u32		reg;
 	/** @clr_bits: bits to clear when updating register */
@@ -93,7 +96,7 @@ struct xe_rtp_rule {
 /** struct xe_rtp_entry - Entry in an rtp table */
 struct xe_rtp_entry {
 	const char *name;
-	const struct xe_rtp_regval regval;
+	const struct xe_rtp_action action;
 	const struct xe_rtp_rule *rules;
 	unsigned int n_rules;
 };
-- 
GitLab


From 944a5e993a3e8a54ec56feec3253bb6b6f5c90d7 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 25 Jan 2023 16:40:02 -0800
Subject: [PATCH 1325/2340] drm/xe/rtp: Split action and entry flags

Entry flags is meant for the whole entry, including the rule
evaluation. Action flags are for flags applied to the register or
action being taken. Since there's only one action per entry, the
distinction was not important and a u8 was spared. However more and more
workarounds are needing multiple actions. This prepares for multiple
action support.

Right now there are these action flags:

 - XE_RTP_ACTION_FLAG_MASKED_REG: register in the action is a masked
   register
 - XE_RTP_ACTION_FLAG_ENGINE_BASE: the engine base should be added to
   the register in order to form the real address

And this entry flag:

 - XE_RTP_ENTRY_FLAG_FOREACH_ENGINE: the rules should be evaluated for
   each engine on the gt. It also automatically implies
   XE_RTP_ACTION_FLAG_ENGINE_BASE.

Since there are likely not that many rules, reduce n_rules to u8 so the
overall entry size doesn't increase more than needed.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_whitelist.c |  2 +-
 drivers/gpu/drm/xe/xe_rtp.c           | 38 +++++++++++++++----------
 drivers/gpu/drm/xe/xe_rtp.h           | 41 +++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_rtp_types.h     |  9 +++---
 drivers/gpu/drm/xe/xe_wa.c            | 36 +++++++++++------------
 5 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 2e0c87b723952..469b274198b19 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -42,7 +42,7 @@ static const struct xe_rtp_entry register_whitelist[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
 	  XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0),
 				RING_FORCE_TO_NONPRIV_ACCESS_RD,
-				XE_RTP_FLAG(ENGINE_BASE))
+				XE_RTP_ACTION_FLAG(ENGINE_BASE))
 	},
 	{ XE_RTP_NAME("16014440446_part_1"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index d3484b906d4ac..11135db1a19d8 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -96,13 +96,30 @@ static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
 		.clr_bits = entry->action.clr_bits,
 		.set_bits = entry->action.set_bits,
 		.read_mask = entry->action.read_mask,
-		.masked_reg = entry->action.flags & XE_RTP_FLAG_MASKED_REG,
+		.masked_reg = entry->action.flags & XE_RTP_ACTION_FLAG_MASKED_REG,
 		.reg_type = entry->action.reg_type,
 	};
 
 	xe_reg_sr_add(sr, reg, &sr_entry);
 }
 
+static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
+			    struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
+{
+	u32 mmio_base;
+
+	if (!rule_matches(gt, hwe, entry))
+		return;
+
+	if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
+	    (entry->action.flags & XE_RTP_ACTION_FLAG_ENGINE_BASE))
+		mmio_base = hwe->mmio_base;
+	else
+		mmio_base = 0;
+
+	rtp_add_sr_entry(entry, gt, mmio_base, sr);
+}
+
 /**
  * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
  * @entries: Table with RTP definitions
@@ -122,23 +139,14 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 	const struct xe_rtp_entry *entry;
 
 	for (entry = entries; entry && entry->name; entry++) {
-		u32 mmio_base = 0;
-
-		if (entry->action.flags & XE_RTP_FLAG_FOREACH_ENGINE) {
+		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
 			struct xe_hw_engine *each_hwe;
 			enum xe_hw_engine_id id;
 
-			for_each_hw_engine(each_hwe, gt, id) {
-				mmio_base = each_hwe->mmio_base;
-
-				if (rule_matches(gt, each_hwe, entry))
-					rtp_add_sr_entry(entry, gt, mmio_base, sr);
-			}
-		} else if (rule_matches(gt, hwe, entry)) {
-			if (entry->action.flags & XE_RTP_FLAG_ENGINE_BASE)
-				mmio_base = hwe->mmio_base;
-
-			rtp_add_sr_entry(entry, gt, mmio_base, sr);
+			for_each_hw_engine(each_hwe, gt, id)
+				rtp_process_one(entry, gt, each_hwe, sr);
+		} else {
+			rtp_process_one(entry, gt, hwe, sr);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index d86c6ba92b03b..5d9ad31b00488 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -53,7 +53,8 @@ struct xe_reg_sr;
  * Helper macros for concatenating prefix - do not use them directly outside
  * this header
  */
-#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) |
+#define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ENTRY_FLAG_, x) |
+#define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ACTION_FLAG_, x) |
 #define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
 
 /*
@@ -287,26 +288,50 @@ struct xe_reg_sr;
 #define XE_RTP_NAME(s_)	.name = (s_)
 
 /**
- * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_action entry
- * @f1_: Last part of a ``XE_RTP_FLAG_*``
+ * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry
+ * @f1_: Last part of a ``XE_RTP_ENTRY_FLAG_*``
  * @...: Additional flags, defined like @f1_
  *
- * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be
- * easily used to define struct xe_rtp_action entries. Example:
+ * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to @f1_ so it can
+ * be easily used to define struct xe_rtp_action entries. Example:
  *
  * .. code-block:: c
  *
  *	const struct xe_rtp_entry wa_entries[] = {
  *		...
  *		{ XE_RTP_NAME("test-entry"),
- *		  XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG),
+ *		  ...
+ *		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ENTRY_FLAG(f1_, ...)						\
+	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_ENTRY_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+
+/**
+ * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action
+ * @f1_: Last part of a ``XE_RTP_ENTRY_*``
+ * @...: Additional flags, defined like @f1_
+ *
+ * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to @f1_ so it
+ * can be easily used to define struct xe_rtp_action entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  ...
+ *		  XE_RTP_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)),
  *		  ...
  *		},
  *		...
  *	};
  */
-#define XE_RTP_FLAG(f1_, ...)							\
-	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+#define XE_RTP_ACTION_FLAG(f1_, ...)						\
+	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_ACTION_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
 
 /**
  * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 9cd722f310cbc..f7efb17d00e59 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -34,9 +34,8 @@ struct xe_rtp_action {
 #define XE_RTP_NOCHECK		.read_mask = 0
 	/** @read_mask: mask for bits to consider when reading value back */
 	u32		read_mask;
-#define XE_RTP_FLAG_FOREACH_ENGINE	BIT(0)
-#define XE_RTP_FLAG_MASKED_REG		BIT(1)
-#define XE_RTP_FLAG_ENGINE_BASE		BIT(2)
+#define XE_RTP_ACTION_FLAG_MASKED_REG		BIT(0)
+#define XE_RTP_ACTION_FLAG_ENGINE_BASE		BIT(1)
 	/** @flags: flags to apply on rule evaluation or action */
 	u8		flags;
 	/** @reg_type: register type, see ``XE_RTP_REG_*`` */
@@ -98,7 +97,9 @@ struct xe_rtp_entry {
 	const char *name;
 	const struct xe_rtp_action action;
 	const struct xe_rtp_rule *rules;
-	unsigned int n_rules;
+	u8 n_rules;
+#define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE	BIT(0)
+	u8 flags;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 699a39fb786a2..c1c098994c843 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -103,15 +103,15 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
 		       ENGINE_CLASS(VIDEO_DECODE),
 		       FUNC(match_14011060649)),
-	  XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS,
-		     XE_RTP_FLAG(FOREACH_ENGINE))
+	  XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
 	{ XE_RTP_NAME("16010515920"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
 		       STEP(A0, B0),
 		       ENGINE_CLASS(VIDEO_DECODE)),
-	  XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS,
-		     XE_RTP_FLAG(FOREACH_ENGINE))
+	  XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
 	{ XE_RTP_NAME("22010523718"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
@@ -191,12 +191,12 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("1606931601"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
@@ -205,47 +205,47 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("18019627453"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("1409804808"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
 	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("14010229206, 1409085225"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
 	  XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
 		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
 		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("1406941453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG))
+	  XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
 	  XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{}
 };
@@ -255,13 +255,13 @@ static const struct xe_rtp_entry lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3,
 		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 			   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
-			   XE_RTP_FLAG(MASKED_REG))
+			   XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("16011163337"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
@@ -273,13 +273,13 @@ static const struct xe_rtp_entry lrc_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG1)),
 	  XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3,
 		     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{ XE_RTP_NAME("22010493298"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
 	  XE_RTP_SET(HIZ_CHICKEN,
 		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
-		     XE_RTP_FLAG(MASKED_REG))
+		     XE_RTP_ACTION_FLAG(MASKED_REG))
 	},
 	{}
 };
-- 
GitLab


From 844c0700a675a5e30644c867ae7b30cb680d176d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 25 Jan 2023 23:33:38 -0800
Subject: [PATCH 1326/2340] drm/xe/rtp: Support multiple actions per entry

Just like there is support for multiple rules per entry in an rtp table,
also support multiple actions. This makes it easier to add support for
workarounds that need to change multiple registers. It also makes it
slightly more readable as now the action part resembles the rule part.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_whitelist.c |  31 +++---
 drivers/gpu/drm/xe/xe_rtp.c           |  30 +++---
 drivers/gpu/drm/xe/xe_rtp.h           |  97 +++++++++++------
 drivers/gpu/drm/xe/xe_rtp_types.h     |   3 +-
 drivers/gpu/drm/xe/xe_tuning.c        |   7 +-
 drivers/gpu/drm/xe/xe_wa.c            | 146 +++++++++++++-------------
 6 files changed, 179 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 469b274198b19..a34617a642ec6 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -26,35 +26,32 @@ static bool match_not_render(const struct xe_gt *gt,
 static const struct xe_rtp_entry register_whitelist[] = {
 	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT,
-				RING_FORCE_TO_NONPRIV_ACCESS_RD |
-				RING_FORCE_TO_NONPRIV_RANGE_4)
+	  XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT,
+				   RING_FORCE_TO_NONPRIV_ACCESS_RD |
+				   RING_FORCE_TO_NONPRIV_RANGE_4))
 	},
 	{ XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0)
+	  XE_RTP_ACTIONS(WHITELIST(GEN7_COMMON_SLICE_CHICKEN1, 0))
 	},
 	{ XE_RTP_NAME("1806527549"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0)
+	  XE_RTP_ACTIONS(WHITELIST(HIZ_CHICKEN, 0))
 	},
 	{ XE_RTP_NAME("allow_read_ctx_timestamp"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
-	  XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0),
+	  XE_RTP_ACTIONS(WHITELIST(RING_CTX_TIMESTAMP(0),
 				RING_FORCE_TO_NONPRIV_ACCESS_RD,
-				XE_RTP_ACTION_FLAG(ENGINE_BASE))
+				XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 	},
-	{ XE_RTP_NAME("16014440446_part_1"),
+	{ XE_RTP_NAME("16014440446"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
-	  XE_WHITELIST_REGISTER(_MMIO(0x4400),
-				RING_FORCE_TO_NONPRIV_DENY |
-				RING_FORCE_TO_NONPRIV_RANGE_64)
-	},
-	{ XE_RTP_NAME("16014440446_part_2"),
-	  XE_RTP_RULES(PLATFORM(PVC)),
-	  XE_WHITELIST_REGISTER(_MMIO(0x4500),
-				RING_FORCE_TO_NONPRIV_DENY |
-				RING_FORCE_TO_NONPRIV_RANGE_64)
+	  XE_RTP_ACTIONS(WHITELIST(_MMIO(0x4400),
+				   RING_FORCE_TO_NONPRIV_DENY |
+				   RING_FORCE_TO_NONPRIV_RANGE_64),
+			 WHITELIST(_MMIO(0x4500),
+				   RING_FORCE_TO_NONPRIV_DENY |
+				   RING_FORCE_TO_NONPRIV_RANGE_64))
 	},
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 11135db1a19d8..5b1316b588d81 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -86,18 +86,18 @@ static bool rule_matches(struct xe_gt *gt,
 	return true;
 }
 
-static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
+static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 			     struct xe_gt *gt,
 			     u32 mmio_base,
 			     struct xe_reg_sr *sr)
 {
-	u32 reg = entry->action.reg + mmio_base;
+	u32 reg = action->reg + mmio_base;
 	struct xe_reg_sr_entry sr_entry = {
-		.clr_bits = entry->action.clr_bits,
-		.set_bits = entry->action.set_bits,
-		.read_mask = entry->action.read_mask,
-		.masked_reg = entry->action.flags & XE_RTP_ACTION_FLAG_MASKED_REG,
-		.reg_type = entry->action.reg_type,
+		.clr_bits = action->clr_bits,
+		.set_bits = action->set_bits,
+		.read_mask = action->read_mask,
+		.masked_reg = action->flags & XE_RTP_ACTION_FLAG_MASKED_REG,
+		.reg_type = action->reg_type,
 	};
 
 	xe_reg_sr_add(sr, reg, &sr_entry);
@@ -106,18 +106,22 @@ static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
 static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
 			    struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
 {
+	const struct xe_rtp_action *action;
 	u32 mmio_base;
+	unsigned int i;
 
 	if (!rule_matches(gt, hwe, entry))
 		return;
 
-	if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
-	    (entry->action.flags & XE_RTP_ACTION_FLAG_ENGINE_BASE))
-		mmio_base = hwe->mmio_base;
-	else
-		mmio_base = 0;
+	for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
+		if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
+		    (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE))
+			mmio_base = hwe->mmio_base;
+		else
+			mmio_base = 0;
 
-	rtp_add_sr_entry(entry, gt, mmio_base, sr);
+		rtp_add_sr_entry(action, gt, mmio_base, sr);
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 5d9ad31b00488..1ac3fd1c0734d 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -44,10 +44,8 @@ struct xe_reg_sr;
 #define CALL_FOR_EACH(MACRO_, x, ...)						\
 	_CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__)
 
-#define _XE_RTP_REG(x_)	(x_),						\
-			.reg_type = XE_RTP_REG_REGULAR
-#define _XE_RTP_MCR_REG(x_) (x_),					\
-			    .reg_type = XE_RTP_REG_MCR
+#define _XE_RTP_REG(x_)	(x_), XE_RTP_REG_REGULAR
+#define _XE_RTP_MCR_REG(x_) (x_), XE_RTP_REG_MCR
 
 /*
  * Helper macros for concatenating prefix - do not use them directly outside
@@ -56,6 +54,7 @@ struct xe_reg_sr;
 #define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ENTRY_FLAG_, x) |
 #define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ACTION_FLAG_, x) |
 #define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
+#define __ADD_XE_RTP_ACTION_PREFIX(x) CONCATENATE(XE_RTP_ACTION_, x) ,
 
 /*
  * Macros to encode rules to match against platform, IP version, stepping, etc.
@@ -197,8 +196,10 @@ struct xe_reg_sr;
 	{ .match_type = XE_RTP_MATCH_DISCRETE }
 
 /**
- * XE_RTP_WR - Helper to write a value to the register, overriding all the bits
+ * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all
+ *                    the bits
  * @reg_: Register
+ * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
  * @val_: Value to set
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -206,13 +207,15 @@ struct xe_reg_sr;
  *
  *	REGNAME = VALUE
  */
-#define XE_RTP_WR(reg_, val_, ...)						\
-	.action = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_),		\
-		    .read_mask = (~0u), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_WR(reg_, reg_type_, val_, ...)				\
+	{ .reg = (reg_), .reg_type = (reg_type_),				\
+	  .clr_bits = ~0u, .set_bits = (val_),					\
+	  .read_mask = (~0u), ##__VA_ARGS__ }
 
 /**
- * XE_RTP_SET - Set bits from @val_ in the register.
+ * XE_RTP_ACTION_SET - Set bits from @val_ in the register.
  * @reg_: Register
+ * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
  * @val_: Bits to set in the register
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -223,13 +226,15 @@ struct xe_reg_sr;
  *	REGNAME[2] = 1
  *	REGNAME[5] = 1
  */
-#define XE_RTP_SET(reg_, val_, ...)						\
-	.action = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_),	\
-		    .read_mask = (val_), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_SET(reg_, reg_type_, val_, ...)				\
+	{ .reg = (reg_), .reg_type = (reg_type_),				\
+	  .clr_bits = (val_), .set_bits = (val_),				\
+	  .read_mask = (val_), ##__VA_ARGS__ }
 
 /**
- * XE_RTP_CLR: Clear bits from @val_ in the register.
+ * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register.
  * @reg_: Register
+ * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
  * @val_: Bits to clear in the register
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -240,13 +245,15 @@ struct xe_reg_sr;
  *	REGNAME[2] = 0
  *	REGNAME[5] = 0
  */
-#define XE_RTP_CLR(reg_, val_, ...)						\
-	.action = { .reg = reg_, .clr_bits = (val_), .set_bits = 0,		\
-		    .read_mask = (val_), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_CLR(reg_, reg_type_, val_, ...)				\
+	{ .reg = (reg_), .reg_type = (reg_type_),				\
+	  .clr_bits = (val_), .set_bits = 0,					\
+	  .read_mask = (val_), ##__VA_ARGS__ }
 
 /**
- * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in
+ * XE_RTP_ACTION_FIELD_SET: Set a bit range
  * @reg_: Register
+ * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
  * @mask_bits_: Mask of bits to be changed in the register, forming a field
  * @val_: Value to set in the field denoted by @mask_bits_
  * @...: Additional fields to override in the struct xe_rtp_action entry
@@ -256,28 +263,31 @@ struct xe_reg_sr;
  *
  *	REGNAME[<end>:<start>] = VALUE
  */
-#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...)				\
-	.action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
-		    .read_mask = (mask_bits_), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_FIELD_SET(reg_, reg_type_, mask_bits_, val_, ...)		\
+	{ .reg = (reg_), .reg_type = (reg_type_),				\
+	  .clr_bits = (mask_bits_), .set_bits = (val_),				\
+	  .read_mask = (mask_bits_), ##__VA_ARGS__ }
 
-#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)		\
-	.action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
-		    .read_mask = 0, ##__VA_ARGS__ }
+#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, reg_type_, mask_bits_, val_, ...)	\
+	{ .reg = (reg_), .reg_type = (reg_type_),				\
+	  .clr_bits = (mask_bits_), .set_bits = (val_),				\
+	  .read_mask = 0, ##__VA_ARGS__ }
 
 /**
- * XE_WHITELIST_REGISTER - Add register to userspace whitelist
+ * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist
  * @reg_: Register
- * @flags_: Whitelist-specific flags to set
+ * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
+ * @val_: Whitelist-specific flags to set
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * Add a register to the whitelist, allowing userspace to modify the ster with
  * regular user privileges.
  */
-#define XE_WHITELIST_REGISTER(reg_, flags_, ...)				\
+#define XE_RTP_ACTION_WHITELIST(reg_, reg_type_, val_, ...)			\
 	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
-	.action = { .reg = reg_, .set_bits = (flags_),			\
-		    .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,		\
-		    ##__VA_ARGS__ }
+	{ .reg = (reg_), .reg_type = (reg_type_), .set_bits = (val_),		\
+	  .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,				\
+	  ##__VA_ARGS__ }
 
 /**
  * XE_RTP_NAME - Helper to set the name in xe_rtp_entry
@@ -324,7 +334,7 @@ struct xe_reg_sr;
  *		...
  *		{ XE_RTP_NAME("test-entry"),
  *		  ...
- *		  XE_RTP_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)),
+ *		  XE_RTP_ACTION_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)),
  *		  ...
  *		},
  *		...
@@ -359,6 +369,33 @@ struct xe_reg_sr;
 		CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__)	\
 	}
 
+/**
+ * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry
+ * @a1: Action to take. Last part of XE_RTP_ACTION_*
+ * @...: Additional rules, defined like @r1
+ *
+ * At least one rule is needed and up to 4 are supported. Multiple rules are
+ * AND'ed together, i.e. all the rules must evaluate to true for the entry to
+ * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(...),
+ *		  XE_RTP_ACTIONS(SET(..), SET(...), CLR(...)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ACTIONS(a1, ...)							\
+	.n_actions = COUNT_ARGS(a1, ##__VA_ARGS__),				\
+	.actions = (struct xe_rtp_action[]) {					\
+		CALL_FOR_EACH(__ADD_XE_RTP_ACTION_PREFIX, a1, ##__VA_ARGS__)	\
+	}
+
 void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 		    struct xe_gt *gt, struct xe_hw_engine *hwe);
 
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index f7efb17d00e59..fac0bd6d5b1e0 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -95,9 +95,10 @@ struct xe_rtp_rule {
 /** struct xe_rtp_entry - Entry in an rtp table */
 struct xe_rtp_entry {
 	const char *name;
-	const struct xe_rtp_action action;
+	const struct xe_rtp_action *actions;
 	const struct xe_rtp_rule *rules;
 	u8 n_rules;
+	u8 n_actions;
 #define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE	BIT(0)
 	u8 flags;
 };
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 96b16b8d03cf0..3cc32e3e7a90f 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -19,7 +19,7 @@
 static const struct xe_rtp_entry gt_tunings[] = {
 	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS)
+	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
 	},
 	{}
 };
@@ -27,8 +27,9 @@ static const struct xe_rtp_entry gt_tunings[] = {
 static const struct xe_rtp_entry context_tunings[] = {
 	{ XE_RTP_NAME("1604555607"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
-	  XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
-					FF_MODE2_TDS_TIMER_128)
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
+						FF_MODE2_TDS_TIMER_MASK,
+						FF_MODE2_TDS_TIMER_128))
 	},
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index c1c098994c843..9d2e4555091c7 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -103,86 +103,83 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
 		       ENGINE_CLASS(VIDEO_DECODE),
 		       FUNC(match_14011060649)),
-	  XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
 	{ XE_RTP_NAME("16010515920"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
 		       STEP(A0, B0),
 		       ENGINE_CLASS(VIDEO_DECODE)),
-	  XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
 	{ XE_RTP_NAME("22010523718"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
-	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011006942"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
-	  XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14010948348"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011037102"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011371254"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
 	},
-	{ XE_RTP_NAME("14011431319/0"),
+	{ XE_RTP_NAME("14011431319"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(UNSLCGCTL9440,
-		     GAMTLBOACS_CLKGATE_DIS |
-		     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
-		     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
-		     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
-		     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
-		     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
-		     GAMTLBBLT_CLKGATE_DIS)
-	},
-	{ XE_RTP_NAME("14011431319/1"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(UNSLCGCTL9444,
-		     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
-		     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
-		     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
-		     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
-		     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
-		     GAMTLBMERT_CLKGATE_DIS |
-		     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
-		     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
+			     GAMTLBOACS_CLKGATE_DIS |
+			     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
+			     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
+			     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
+			     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
+			     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
+			     GAMTLBBLT_CLKGATE_DIS),
+			 SET(UNSLCGCTL9444,
+			     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
+			     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
+			     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
+			     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
+			     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
+			     GAMTLBMERT_CLKGATE_DIS |
+			     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
+			     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14010569222"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011028019"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14014830051"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN)
+	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
 	},
 	{ XE_RTP_NAME("14015795083"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)
+	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
 	{ XE_RTP_NAME("14011059788"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)
+	  XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
 	},
 	{ XE_RTP_NAME("1409420604"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("1408615072"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL))
 	},
 	{}
 };
@@ -190,62 +187,67 @@ static const struct xe_rtp_entry gt_was[] = {
 static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1606931601"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE)
+	  XE_RTP_ACTIONS(SET(GEN7_FF_THREAD_MODE,
+			     GEN12_FF_TESSELATION_DOP_GATE_DISABLE))
 	},
 	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("18019627453"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1409804808"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14010229206, 1409085225"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
-		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
-		     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-		     GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1406941453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
-	  XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1,
+			     GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{}
 };
@@ -253,33 +255,35 @@ static const struct xe_rtp_entry engine_was[] = {
 static const struct xe_rtp_entry lrc_was[] = {
 	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3,
-		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(GEN11_COMMON_SLICE_CHICKEN3,
+			     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
-			   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
-			   XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(FIELD_SET(GEN8_CS_CHICKEN1,
+				   GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+				   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
+				   XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("16011163337"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  /* read verification is ignored due to 1608008084. */
-	  XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK,
-					FF_MODE2_GS_TIMER_224)
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2,
+						FF_MODE2_GS_TIMER_MASK,
+						FF_MODE2_GS_TIMER_224))
 	},
 	{ XE_RTP_NAME("1409044764"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3,
-		     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(CLR(GEN11_COMMON_SLICE_CHICKEN3,
+			     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22010493298"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_SET(HIZ_CHICKEN,
-		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
-		     XE_RTP_ACTION_FLAG(MASKED_REG))
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN,
+			     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{}
 };
-- 
GitLab


From dc97898e8121878829ee3cf48fa8ce154807f90b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Thu, 16 Feb 2023 06:16:44 -0800
Subject: [PATCH 1327/2340] drm/xe: Initialize ret in mcr_lock()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ret is not initialized in mcr_lock() when running in platforms with
graphics IP version < 1270, this could cause drm_WARN_ON_ONCE()
to hit eventually(what just happened to me).

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index bb71071c3435f..7617f03408794 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -383,7 +383,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 static void mcr_lock(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	int ret;
+	int ret = 0;
 
 	spin_lock(&gt->mcr_lock);
 
-- 
GitLab


From 780637e28783af505864151da78e713f62ed64ae Mon Sep 17 00:00:00 2001
From: Carlos Santa <carlos.santa@intel.com>
Date: Wed, 15 Feb 2023 12:34:25 -0800
Subject: [PATCH 1328/2340] drm/xe: Update the list of devices to add even more
 TGL devices

The list of GTs got splitted a while back between GT1
and GT2 on TGL.

References: https://patchwork.freedesktop.org/patch/388414/
CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Carlos Santa <carlos.santa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 2 +-
 include/drm/xe_pciids.h     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c159306e04cf4..2b1c80bb6e467 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -279,7 +279,7 @@ static const struct xe_device_desc mtl_desc = {
  * PCI ID matches, otherwise we'll use the wrong info struct above.
  */
 static const struct pci_device_id pciidlist[] = {
-	XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
 	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
 	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
 	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index e539594ed9391..b9e9f5b2b0ac2 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -41,8 +41,8 @@
 	MACRO__(0x9AF8, ## __VA_ARGS__)
 
 #define XE_TGL_IDS(MACRO__, ...)		\
-	XE_TGL_GT1_IDS(MACRO__, ...),		\
-	XE_TGL_GT2_IDS(MACRO__, ...)
+	XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
 
 /* RKL */
 #define XE_RKL_IDS(MACRO__, ...)		\
-- 
GitLab


From ba00da78ce4d2a7fe7ef245e1168b7946827995d Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 17 Feb 2023 12:12:17 -0500
Subject: [PATCH 1329/2340] drm/xe: Remove unseless xe_force_wake_prune.

(!(gt->info.engine_mask & BIT(i))) cases are already
handled in the init function. And these masks are not
modified between the init and the prune.

Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c | 15 ---------------
 drivers/gpu/drm/xe/xe_force_wake.h |  2 --
 drivers/gpu/drm/xe/xe_gt.c         |  2 --
 3 files changed, 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index d2080e6fbe10c..5bd87118ac73a 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -100,21 +100,6 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 	}
 }
 
-void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw)
-{
-	int i, j;
-
-	/* Call after fuses have been read, prune domains that are fused off */
-
-	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j)
-		if (!(gt->info.engine_mask & BIT(i)))
-			fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0;
-
-	for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j)
-		if (!(gt->info.engine_mask & BIT(i)))
-			fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0;
-}
-
 static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
 {
 	xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val);
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index 5adb8daa3b718..7c534cdd5fe93 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -15,8 +15,6 @@ void xe_force_wake_init_gt(struct xe_gt *gt,
 			   struct xe_force_wake *fw);
 void xe_force_wake_init_engines(struct xe_gt *gt,
 				struct xe_force_wake *fw);
-void xe_force_wake_prune(struct  xe_gt *gt,
-			 struct xe_force_wake *fw);
 int xe_force_wake_get(struct xe_force_wake *fw,
 		      enum xe_force_wake_domains domains);
 int xe_force_wake_put(struct xe_force_wake *fw,
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 20dbc08d36855..c17279653561b 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -602,8 +602,6 @@ int xe_gt_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	xe_force_wake_prune(gt, gt_to_fw(gt));
-
 	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
 	if (err)
 		return err;
-- 
GitLab


From 0d83be772c1f8e0d3db4a26a5f1308e058a98354 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 15 Feb 2023 10:28:43 +0000
Subject: [PATCH 1330/2340] drm/xe/mmio: fix forcewake ref leak in
 xe_mmio_ioctl

Make sure we properly release the forcewake ref on all error paths.

v2(Lucas):
  - Make it less verbose and just fold the unimplemented options into
    the default. The exact return value doesn't seem to matter for the
    corresponding IGT.
  - Replace the user triggerable WARN() with drm_dbg().

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 8a953df2b4688..ba327b1e8dead 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -425,31 +425,28 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 
 	if (args->flags & DRM_XE_MMIO_WRITE) {
 		switch (bits_flag) {
-		case DRM_XE_MMIO_8BIT:
-			return -EINVAL; /* TODO */
-		case DRM_XE_MMIO_16BIT:
-			return -EINVAL; /* TODO */
 		case DRM_XE_MMIO_32BIT:
-			if (XE_IOCTL_ERR(xe, args->value > U32_MAX))
-				return -EINVAL;
+			if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) {
+				ret = -EINVAL;
+				goto exit;
+			}
 			xe_mmio_write32(to_gt(xe), args->addr, args->value);
 			break;
 		case DRM_XE_MMIO_64BIT:
 			xe_mmio_write64(to_gt(xe), args->addr, args->value);
 			break;
 		default:
-			drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
-			ret = -EINVAL;
+			drm_dbg(&xe->drm, "Invalid MMIO bit size");
+			fallthrough;
+		case DRM_XE_MMIO_8BIT: /* TODO */
+		case DRM_XE_MMIO_16BIT: /* TODO */
+			ret = -ENOTSUPP;
 			goto exit;
 		}
 	}
 
 	if (args->flags & DRM_XE_MMIO_READ) {
 		switch (bits_flag) {
-		case DRM_XE_MMIO_8BIT:
-			return -EINVAL; /* TODO */
-		case DRM_XE_MMIO_16BIT:
-			return -EINVAL; /* TODO */
 		case DRM_XE_MMIO_32BIT:
 			args->value = xe_mmio_read32(to_gt(xe), args->addr);
 			break;
@@ -457,8 +454,11 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 			args->value = xe_mmio_read64(to_gt(xe), args->addr);
 			break;
 		default:
-			drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
-			ret = -EINVAL;
+			drm_dbg(&xe->drm, "Invalid MMIO bit size");
+			fallthrough;
+		case DRM_XE_MMIO_8BIT: /* TODO */
+		case DRM_XE_MMIO_16BIT: /* TODO */
+			ret = -ENOTSUPP;
 		}
 	}
 
-- 
GitLab


From 6062acc1b8664ade91b4609ea056badd6f1e6802 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 15 Feb 2023 10:28:44 +0000
Subject: [PATCH 1331/2340] drm/xe/stolen: don't map stolen on small-bar

The driver should still be functional with small-bar, just that the vram
size is clamped to the BAR size (until we add proper support for tiered
vram). For stolen vram we shouldn't iomap anything if the BAR size
doesn't also contain the stolen portion, since on discrete the stolen
portion is always at the end of normal vram. Stolen should still be
functional, just that allocating CPU visible io memory will always
return an error.

v2 (Lucas)
  - Mention in the commit message that stolen vram is always as the end
    of normal vram, which is why stolen in not mappable on small-bar
    systems.
  - Just make xe_ttm_stolen_inaccessible() return true for such cases.
    Also rename to xe_ttm_stolen_cpu_inaccessible to better describe
    that we are talking about direct CPU access. Plus add some
    kernel-doc.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/209
Reported-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c             |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 47 +++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h |  2 +-
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3c9d90dcf1251..b4fa856b79d14 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1158,7 +1158,7 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
 	u64 end = offset == ~0ull ? offset : start + size;
 
 	if (flags & XE_BO_CREATE_STOLEN_BIT &&
-	    xe_ttm_stolen_inaccessible(xe))
+	    xe_ttm_stolen_cpu_inaccessible(xe))
 		flags |= XE_BO_CREATE_GGTT_BIT;
 
 	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index b4e9c88644e47..e20c567f276fe 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -21,11 +21,6 @@
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_vram_mgr.h"
 
-bool xe_ttm_stolen_inaccessible(struct xe_device *xe)
-{
-	return !IS_DGFX(xe) && GRAPHICS_VERx100(xe) < 1270;
-}
-
 struct xe_ttm_stolen_mgr {
 	struct xe_ttm_vram_mgr base;
 
@@ -43,6 +38,34 @@ to_stolen_mgr(struct ttm_resource_manager *man)
 	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
 }
 
+/**
+ * xe_ttm_stolen_cpu_inaccessible - Can we directly CPU access stolen memory for
+ * this device.
+ *
+ * On some integrated platforms we can't directly access stolen via the CPU
+ * (like some normal system memory).  Also on small-bar systems for discrete,
+ * since stolen is always as the end of normal VRAM, and the BAR likely doesn't
+ * stretch that far. However CPU access of stolen is generally rare, and at
+ * least on discrete should not be needed.
+ *
+ * If this is indeed inaccessible then we fallback to using the GGTT mappable
+ * aperture for CPU access. On discrete platforms we have no such thing, so when
+ * later attempting to CPU map the memory an error is instead thrown.
+ */
+bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe)
+{
+	struct ttm_resource_manager *ttm_mgr =
+		ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr;
+
+	if (!ttm_mgr)
+		return true;
+
+	mgr = to_stolen_mgr(ttm_mgr);
+
+	return !mgr->io_base || GRAPHICS_VERx100(xe) < 1270;
+}
+
 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -126,7 +149,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 
 	if (IS_DGFX(xe))
 		stolen_size = detect_bar2_dgfx(xe, mgr);
-	else if (!xe_ttm_stolen_inaccessible(xe))
+	else if (GRAPHICS_VERx100(xe) >= 1270)
 		stolen_size = detect_bar2_integrated(xe, mgr);
 	else
 		stolen_size = detect_stolen(xe, mgr);
@@ -140,7 +163,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	if (pgsize < PAGE_SIZE)
 		pgsize = PAGE_SIZE;
 
-
 	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, pgsize);
 	if (err) {
 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
@@ -150,7 +172,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
 		    stolen_size);
 
-	if (!xe_ttm_stolen_inaccessible(xe))
+	if (!xe_ttm_stolen_cpu_inaccessible(xe))
 		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size);
 }
 
@@ -161,10 +183,9 @@ u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
 	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
 	struct xe_res_cursor cur;
 
-	if (!mgr->io_base)
-		return 0;
+	XE_BUG_ON(!mgr->io_base);
 
-	if (!IS_DGFX(xe) && xe_ttm_stolen_inaccessible(xe))
+	if (!IS_DGFX(xe) && xe_ttm_stolen_cpu_inaccessible(xe))
 		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
 
 	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
@@ -202,6 +223,8 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
 #ifdef CONFIG_X86
 	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
 
+	XE_BUG_ON(IS_DGFX(xe));
+
 	/* XXX: Require BO to be mapped to GGTT? */
 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
 		return -EIO;
@@ -228,7 +251,7 @@ int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
 	if (!mgr || !mgr->io_base)
 		return -EIO;
 
-	if (!xe_ttm_stolen_inaccessible(xe))
+	if (!xe_ttm_stolen_cpu_inaccessible(xe))
 		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
 	else
 		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
index ade37abb06233..2fda97b97a053 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
@@ -14,7 +14,7 @@ struct xe_device;
 
 void xe_ttm_stolen_mgr_init(struct xe_device *xe);
 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem);
-bool xe_ttm_stolen_inaccessible(struct xe_device *xe);
+bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe);
 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset);
 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe);
 
-- 
GitLab


From b47b0ef1ba34e351228b57ce7ba74efc6d7b2c24 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 15 Feb 2023 10:28:45 +0000
Subject: [PATCH 1332/2340] drm/xe/query: zero the region info

There are also some reserved fields in here which are not currently
cleared when handing back to userspace. Otherwise we might run into
issues if we later wish to use them.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Lucas De Marchi lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 6e904e97f4568..f9f21bd1bfd72 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -115,16 +115,13 @@ static int query_memory_usage(struct xe_device *xe,
 		return -EINVAL;
 	}
 
-	usage = kmalloc(size, GFP_KERNEL);
+	usage = kzalloc(size, GFP_KERNEL);
 	if (XE_IOCTL_ERR(xe, !usage))
 		return -ENOMEM;
 
-	usage->pad = 0;
-
 	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
 	usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM;
 	usage->regions[0].instance = 0;
-	usage->regions[0].pad = 0;
 	usage->regions[0].min_page_size = PAGE_SIZE;
 	usage->regions[0].max_page_size = PAGE_SIZE;
 	usage->regions[0].total_size = man->size << PAGE_SHIFT;
@@ -138,7 +135,6 @@ static int query_memory_usage(struct xe_device *xe,
 				XE_MEM_REGION_CLASS_VRAM;
 			usage->regions[usage->num_regions].instance =
 				usage->num_regions;
-			usage->regions[usage->num_regions].pad = 0;
 			usage->regions[usage->num_regions].min_page_size =
 				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
 				SZ_64K : PAGE_SIZE;
-- 
GitLab


From 671ca05d7c9766407d7d7e4785d52e4a15d56027 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 15:33:43 -0800
Subject: [PATCH 1333/2340] drm/xe: Make local functions static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few static functions not being declared like that break the build with
W=1, like e.g.

	cc1: all warnings being treated as errors
	make[2]: *** [../scripts/Makefile.build:250: drivers/gpu/drm/xe/xe_gt.o] Error 1
	../drivers/gpu/drm/xe/xe_guc.c:240:6: error: no previous prototype for ‘guc_write_params’ [-Werror=missing-prototypes]
	  240 | void guc_write_params(struct xe_guc *guc)
	      |      ^~~~~~~~~~~~~~~~

Make them static.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c              | 6 +++---
 drivers/gpu/drm/xe/xe_guc.c             | 4 ++--
 drivers/gpu/drm/xe/xe_irq.c             | 2 +-
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index c17279653561b..809e9b14c314d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -218,7 +218,7 @@ static void gt_fini(struct drm_device *drm, void *arg)
 
 static void gt_reset_worker(struct work_struct *w);
 
-int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
+static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 {
 	struct xe_sched_job *job;
 	struct xe_bb *bb;
@@ -252,7 +252,7 @@ int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 	return 0;
 }
 
-int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
+static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 {
 	struct xe_reg_sr *sr = &e->hwe->reg_lrc;
 	struct xe_reg_sr_entry *entry;
@@ -609,7 +609,7 @@ int xe_gt_init(struct xe_gt *gt)
 	return 0;
 }
 
-int do_gt_reset(struct xe_gt *gt)
+static int do_gt_reset(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 5cdfdfd0de40f..32bcc40463e11 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -237,7 +237,7 @@ static void guc_init_params(struct xe_guc *guc)
  * transfer. These parameters are read by the firmware on startup
  * and cannot be changed thereafter.
  */
-void guc_write_params(struct xe_guc *guc)
+static void guc_write_params(struct xe_guc *guc)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
 	int i;
@@ -560,7 +560,7 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
 			"Received early GuC exception notification!\n");
 }
 
-void guc_enable_irq(struct xe_guc *guc)
+static void guc_enable_irq(struct xe_guc *guc)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 events = xe_gt_is_media_type(gt) ?
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index a2caa20f2fb34..ab703f1c8b583 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -448,7 +448,7 @@ static void dg1_irq_reset(struct xe_gt *gt)
 	GEN3_IRQ_RESET(gt, GEN8_PCU_);
 }
 
-void xe_irq_reset(struct xe_device *xe)
+static void xe_irq_reset(struct xe_device *xe)
 {
 	struct xe_gt *gt;
 	u8 id;
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 8a8d814a0e7aa..977c963a8cd0a 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -56,9 +56,9 @@ static const enum xe_engine_class user_to_xe_engine_class[] = {
 	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
 };
 
-int check_hw_engines(struct xe_device *xe,
-		     struct drm_xe_engine_class_instance *eci,
-		     int num_engines)
+static int check_hw_engines(struct xe_device *xe,
+			    struct drm_xe_engine_class_instance *eci,
+			    int num_engines)
 {
 	int i;
 
-- 
GitLab


From 3dbec4703ee7b67a8dba47e5f1e668b7b17aeb1b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 15:33:44 -0800
Subject: [PATCH 1334/2340] drm/xe: Fix application of LRC tunings

LRC tunings were added after the gt ones and didn't add the call
in xe_gt_record_default_lrcs() to process them like is done for
workarounds. Add such a function and call it from
xe_gt_record_default_lrcs().

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c     |  1 +
 drivers/gpu/drm/xe/xe_tuning.c | 15 ++++++++++++++-
 drivers/gpu/drm/xe/xe_tuning.h |  2 ++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 809e9b14c314d..5a3c8fd5936a7 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -320,6 +320,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 
 		xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe);
 		xe_wa_process_lrc(hwe);
+		xe_tuning_process_lrc(hwe);
 
 		default_lrc = drmm_kzalloc(&xe->drm,
 					   xe_lrc_size(xe, hwe->class),
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 3cc32e3e7a90f..595eb2de90adf 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -24,7 +24,7 @@ static const struct xe_rtp_entry gt_tunings[] = {
 	{}
 };
 
-static const struct xe_rtp_entry context_tunings[] = {
+static const struct xe_rtp_entry lrc_tunings[] = {
 	{ XE_RTP_NAME("1604555607"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
@@ -38,3 +38,16 @@ void xe_tuning_process_gt(struct xe_gt *gt)
 {
 	xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
 }
+
+/**
+ * xe_tuning_process_lrc - process lrc tunings
+ * @hwe: engine instance to process tunings for
+ *
+ * Process LRC table for this platform, saving in @hwe all the tunings that need
+ * to be applied on context restore. These are tunings touching registers that
+ * are part of the HW context image.
+ */
+void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
+{
+	xe_rtp_process(lrc_tunings, &hwe->reg_lrc, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
index 66dbc93192bde..2b95b0c8effc4 100644
--- a/drivers/gpu/drm/xe/xe_tuning.h
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -7,7 +7,9 @@
 #define _XE_TUNING_
 
 struct xe_gt;
+struct xe_hw_engine;
 
 void xe_tuning_process_gt(struct xe_gt *gt);
+void xe_tuning_process_lrc(struct xe_hw_engine *hwe);
 
 #endif
-- 
GitLab


From 220d957b5954ee4631fe781adfbfae8592b34811 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 15:33:46 -0800
Subject: [PATCH 1335/2340] drm/xe: Remove unused functions

xe_gt_topology_dss_group_mask and xe_gt_topology_count_dss are probably
leftover from initial implementation - they are not called from
anywhere. Remove those functions.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_topology.c | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 8e02e362ba27d..3dd7cbbff0714 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -94,31 +94,6 @@ xe_gt_topology_init(struct xe_gt *gt)
 	xe_gt_topology_dump(gt, &p);
 }
 
-unsigned int
-xe_gt_topology_count_dss(xe_dss_mask_t mask)
-{
-	return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS);
-}
-
-u64
-xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize)
-{
-	xe_dss_mask_t per_dss_mask = {};
-	u64 grpmask = 0;
-
-	WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask));
-
-	bitmap_fill(per_dss_mask, grpsize);
-	for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) {
-		if (bitmap_intersects(mask, per_dss_mask, grpsize))
-			grpmask |= BIT(i);
-
-		bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS);
-	}
-
-	return grpmask;
-}
-
 void
 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
 {
-- 
GitLab


From 1d1b9262c5cb3c7c3d2a9f63e207dbb3d17bb3cc Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 15:33:47 -0800
Subject: [PATCH 1336/2340] drm/xe: Add missing doc for xe parameter

Fix the following warning:

	../drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c:55: warning: Function
	parameter or member 'xe' not described in
	'xe_ttm_stolen_cpu_inaccessible'

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index e20c567f276fe..097454f782865 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -41,6 +41,7 @@ to_stolen_mgr(struct ttm_resource_manager *man)
 /**
  * xe_ttm_stolen_cpu_inaccessible - Can we directly CPU access stolen memory for
  * this device.
+ * @xe: xe device
  *
  * On some integrated platforms we can't directly access stolen via the CPU
  * (like some normal system memory).  Also on small-bar systems for discrete,
-- 
GitLab


From cb30cfdce50011ea53f5425b8be264f26cef60d8 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 15:33:48 -0800
Subject: [PATCH 1337/2340] drm/xe: Add missing include xe_wait_user_fence.h

Make xe_wait_user_fence.c include xe_wait_user_fence.h so it doesn't
rely on indirect includes and also doesn't fail the build due to missing
prototype for xe_wait_user_fence_ioctl().

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 977c963a8cd0a..15c2e5aa08d2f 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_wait_user_fence.h"
+
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/xe_drm.h>
-- 
GitLab


From e50bbbb9baf64dfe77f236636961b1ceb1b4c19d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 16:27:05 -0800
Subject: [PATCH 1338/2340] drm/xe: Remove duplicate media_ver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

media_verx100 supersedes the info from media_ver. Leave media_ver in the
struct xe_device_desc, used in xe_pci.c since it's easier to define
common parts of the platforms like that. However all the rest of the
driver should be using media_verx100 that is more future proof.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/216
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 --
 drivers/gpu/drm/xe/xe_mmio.c         | 2 +-
 drivers/gpu/drm/xe/xe_pci.c          | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4c4a912141a97..c9f74dc4c9fda 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -77,8 +77,6 @@ struct xe_device {
 		u8 tile_count;
 		/** @vm_max_level: Max VM level */
 		u8 vm_max_level;
-		/** @media_ver: Media version */
-		u8 media_ver;
 		/** @supports_usm: Supports unified shared memory */
 		bool supports_usm;
 		/** @has_asid: Has address space ID */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index ba327b1e8dead..c414ece6dfe3b 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -287,7 +287,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg);
 	adj_tile_count = xe->info.tile_count =
 		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
-	if (xe->info.media_ver >= 13)
+	if (xe->info.media_verx100 >= 1300)
 		xe->info.tile_count *= 2;
 
 	drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 2b1c80bb6e467..88e28649c4002 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -408,7 +408,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	xe->info.vram_flags = desc->vram_flags;
 	xe->info.tile_count = desc->max_tiles;
 	xe->info.vm_max_level = desc->vm_max_level;
-	xe->info.media_ver = desc->media_ver;
 	xe->info.supports_usm = desc->supports_usm;
 	xe->info.has_asid = desc->has_asid;
 	xe->info.has_flat_ccs = desc->has_flat_ccs;
-- 
GitLab


From 5a4a8e8b3b0be40c7cdf928ad8b6cfe6e5c465fd Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 11:39:50 -0800
Subject: [PATCH 1339/2340] drm/xe: Remove outdated build workaround

Use the more common "call cc-disable-warning" way to disable warnings.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 998f7044b0470..58ee9e82156d3 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -13,14 +13,8 @@
 # will most likely get a sudden build breakage... Hopefully we will fix
 # new warnings before CI updates!
 subdir-ccflags-y := -Wall -Wextra
-# making these call cc-disable-warning breaks when trying to build xe.mod.o
-# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree,
-# so it was somehow fixed by the changes in the build system. Move it back to
-# $(call cc-disable-warning, ...) after rebase.
-subdir-ccflags-y += -Wno-unused-parameter
-subdir-ccflags-y += -Wno-type-limits
-#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
-#subdir-ccflags-y += $(call cc-disable-warning, type-limits)
+subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
+subdir-ccflags-y += $(call cc-disable-warning, type-limits)
 subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
 subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
 # clang warnings
-- 
GitLab


From 84ff55006578d169b9331014bd34f0da2ca0616b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Feb 2023 11:39:52 -0800
Subject: [PATCH 1340/2340] drm/xe/guc: Remove i915_regs.h include

i915_regs.h is not needed, particularly in a header file. What is needed
is i915_reg_defs.h for use of _MMIO() and similar macros.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h
index 1e16a9b76ddc1..513a7e0c8a5aa 100644
--- a/drivers/gpu/drm/xe/xe_guc_reg.h
+++ b/drivers/gpu/drm/xe/xe_guc_reg.h
@@ -9,7 +9,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-#include "i915_reg.h"
+#include "i915_reg_defs.h"
 
 /* Definitions of GuC H/W registers, bits, etc */
 
-- 
GitLab


From 5b7e50e2ea1745bd09c3d99a4f7c49d630124825 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 Feb 2023 12:18:45 +0000
Subject: [PATCH 1341/2340] drm/xe/pm: fix unbalanced ref handling

In local_pci_probe() the core kernel increments the rpm for the device,
just before calling into the probe hook. If the driver/device supports
runtime pm it is then meant to put this ref during probe (like we do in
xe_pm_runtime_init()). However when removing the device we then also
need to take the reference back, otherwise the ref that is put in
pci_device_remove() will be unbalanced when for example unloading the
driver, leading to warnings like:

    [ 3808.596345] xe 0000:03:00.0: Runtime PM usage count underflow!

Fix this by incrementing the rpm ref when removing the device.

v2:
  - Improve the terminology in the commit message; s/drop/put/ etc (Lucas & Rodrigo)
  - Also call pm_runtime_forbid(dev) (Rodrigo)

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/193
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 1 +
 drivers/gpu/drm/xe/xe_pm.c  | 8 ++++++++
 drivers/gpu/drm/xe/xe_pm.h  | 1 +
 3 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 88e28649c4002..3474b5c9f1740 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -364,6 +364,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
 		return;
 
 	xe_device_remove(xe);
+	xe_pm_runtime_fini(xe);
 	pci_set_drvdata(pdev, NULL);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index fb0355530e7bd..0ef92b746595d 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -117,6 +117,14 @@ void xe_pm_runtime_init(struct xe_device *xe)
 	pm_runtime_put_autosuspend(dev);
 }
 
+void xe_pm_runtime_fini(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	pm_runtime_get_sync(dev);
+	pm_runtime_forbid(dev);
+}
+
 int xe_pm_runtime_suspend(struct xe_device *xe)
 {
 	struct xe_gt *gt;
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index b8c5f9558e263..6a885585f6534 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -14,6 +14,7 @@ int xe_pm_suspend(struct xe_device *xe);
 int xe_pm_resume(struct xe_device *xe);
 
 void xe_pm_runtime_init(struct xe_device *xe);
+void xe_pm_runtime_fini(struct xe_device *xe);
 int xe_pm_runtime_suspend(struct xe_device *xe);
 int xe_pm_runtime_resume(struct xe_device *xe);
 int xe_pm_runtime_get(struct xe_device *xe);
-- 
GitLab


From 353dfaaa31648c4e6f7f3fee5001f047ebf3ed67 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 22 Feb 2023 21:00:35 -0800
Subject: [PATCH 1342/2340] drm/xe: Fix kunit integration due to missing
 prototypes

In order to avoid  -Werror=missing-prototypes, add the prototypes
in a separate tests/<test-name>_test.h file that is included by both
the implementation (tests/xe_<testname>.c, injected in xe.ko) and the
kunit module (tests/xe_<testname>_test.c -> xe-<testname>-test.ko).

v2: Add header and don't add ifdef to files that are already not built
when not using kunit (Matt Auld)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c           |  2 ++
 drivers/gpu/drm/xe/tests/xe_bo_test.c      |  5 ++---
 drivers/gpu/drm/xe/tests/xe_bo_test.h      | 14 ++++++++++++++
 drivers/gpu/drm/xe/tests/xe_dma_buf.c      |  2 ++
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c |  4 ++--
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.h | 13 +++++++++++++
 drivers/gpu/drm/xe/tests/xe_migrate.c      |  2 ++
 drivers/gpu/drm/xe/tests/xe_migrate_test.c |  4 ++--
 drivers/gpu/drm/xe/tests/xe_migrate_test.h | 13 +++++++++++++
 9 files changed, 52 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_bo_test.h
 create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
 create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate_test.h

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 87ac21cc8ca93..f03fb907b59a6 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -5,6 +5,8 @@
 
 #include <kunit/test.h>
 
+#include "tests/xe_bo_test.h"
+
 #include "xe_bo_evict.h"
 #include "xe_pci.h"
 
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
index c8fa29b0b3b23..92dda4fca21b9 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -3,10 +3,9 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include <kunit/test.h>
+#include "xe_bo_test.h"
 
-void xe_ccs_migrate_kunit(struct kunit *test);
-void xe_bo_evict_kunit(struct kunit *test);
+#include <kunit/test.h>
 
 static struct kunit_case xe_bo_tests[] = {
 	KUNIT_CASE(xe_ccs_migrate_kunit),
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
new file mode 100644
index 0000000000000..d751a618c0c8f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_BO_TEST_H__
+#define __XE_BO_TEST_H__
+
+struct kunit;
+
+void xe_ccs_migrate_kunit(struct kunit *test);
+void xe_bo_evict_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 615d22e3f7318..e66a8361ae1f0 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -5,6 +5,8 @@
 
 #include <kunit/test.h>
 
+#include "tests/xe_dma_buf_test.h"
+
 #include "xe_pci.h"
 
 static bool p2p_enabled(struct dma_buf_test_params *params)
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
index 7bb292da11936..a1adfd1e1605e 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -3,9 +3,9 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include <kunit/test.h>
+#include "xe_dma_buf_test.h"
 
-void xe_dma_buf_kunit(struct kunit *test);
+#include <kunit/test.h>
 
 static struct kunit_case xe_dma_buf_tests[] = {
 	KUNIT_CASE(xe_dma_buf_kunit),
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
new file mode 100644
index 0000000000000..4e9a8bef57518
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_DMA_BUF_TEST_H__
+#define __XE_DMA_BUF_TEST_H__
+
+struct kunit;
+
+void xe_dma_buf_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 03a60d5b42f1d..0de17e90aba9c 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -5,6 +5,8 @@
 
 #include <kunit/test.h>
 
+#include "tests/xe_migrate_test.h"
+
 #include "xe_pci.h"
 
 static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index ad779e2bd0717..d6be360c3b6d1 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -3,9 +3,9 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include <kunit/test.h>
+#include "xe_migrate_test.h"
 
-void xe_migrate_sanity_kunit(struct kunit *test);
+#include <kunit/test.h>
 
 static struct kunit_case xe_migrate_tests[] = {
 	KUNIT_CASE(xe_migrate_sanity_kunit),
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
new file mode 100644
index 0000000000000..db1f8ef035bba
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __XE_MIGRATE_TEST_H__
+#define __XE_MIGRATE_TEST_H__
+
+struct kunit;
+
+void xe_migrate_sanity_kunit(struct kunit *test);
+
+#endif
-- 
GitLab


From 74f800c7a982db1d10e2c0c0a0164ee1db878652 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Feb 2023 11:08:14 -0800
Subject: [PATCH 1343/2340] drm/xe: Remove gen-based mmio offsets from hw
 engine init

During early generations of Intel GPUs, hardware engines would sometimes
move to new MMIO offsets from one platform/generation to the next.
These days engines the hardware teams put more effort into ensuring that
engines stay at consistent locations; even major design changes (like
the introduction of standalone media) keep the MMIO locations of the
engines constant.

Since all platforms supported by the Xe driver are new enough to have a
single MMIO offset for each engine (and since our crystal ball says that
these offsets are very unlikely to change again in the foreseeable
future), we can simplify the driver's engine definitions and remove the
gen-based MMIO bases.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 132 +++++++-----------------------
 1 file changed, 28 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index fd89dd90131c9..986f675aaf88c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -31,11 +31,7 @@ struct engine_info {
 	unsigned int class : 8;
 	unsigned int instance : 8;
 	enum xe_force_wake_domains domain;
-	/* mmio bases table *must* be sorted in reverse graphics_ver order */
-	struct engine_mmio_base {
-		unsigned int graphics_ver : 8;
-		unsigned int base : 24;
-	} mmio_bases[MAX_MMIO_BASES];
+	u32 mmio_base;
 };
 
 static const struct engine_info engine_infos[] = {
@@ -44,90 +40,70 @@ static const struct engine_info engine_infos[] = {
 		.class = XE_ENGINE_CLASS_RENDER,
 		.instance = 0,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 1, .base = RENDER_RING_BASE }
-		},
+		.mmio_base = RENDER_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS0] = {
 		.name = "bcs0",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 0,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 6, .base = BLT_RING_BASE }
-		},
+		.mmio_base = BLT_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS1] = {
 		.name = "bcs1",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 1,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS1_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS2] = {
 		.name = "bcs2",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 2,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS2_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS3] = {
 		.name = "bcs3",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 3,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS3_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS4] = {
 		.name = "bcs4",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 4,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS4_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS5] = {
 		.name = "bcs5",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 5,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS5_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS6] = {
 		.name = "bcs6",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 6,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS6_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS7] = {
 		.name = "bcs7",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 7,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS7_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS8] = {
 		.name = "bcs8",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 8,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
-		},
+		.mmio_base = XEHPC_BCS8_RING_BASE,
 	},
 
 	[XE_HW_ENGINE_VCS0] = {
@@ -135,166 +111,115 @@ static const struct engine_info engine_infos[] = {
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 0,
 		.domain = XE_FW_MEDIA_VDBOX0,
-		.mmio_bases = {
-			{ .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
-			{ .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
-			{ .graphics_ver = 4, .base = BSD_RING_BASE }
-		},
+		.mmio_base = GEN11_BSD_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS1] = {
 		.name = "vcs1",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 1,
 		.domain = XE_FW_MEDIA_VDBOX1,
-		.mmio_bases = {
-			{ .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
-			{ .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
-		},
+		.mmio_base = GEN11_BSD2_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS2] = {
 		.name = "vcs2",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 2,
 		.domain = XE_FW_MEDIA_VDBOX2,
-		.mmio_bases = {
-			{ .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
-		},
+		.mmio_base = GEN11_BSD3_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS3] = {
 		.name = "vcs3",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 3,
 		.domain = XE_FW_MEDIA_VDBOX3,
-		.mmio_bases = {
-			{ .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
-		},
+		.mmio_base = GEN11_BSD4_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS4] = {
 		.name = "vcs4",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 4,
 		.domain = XE_FW_MEDIA_VDBOX4,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
-		},
+		.mmio_base = XEHP_BSD5_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS5] = {
 		.name = "vcs5",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 5,
 		.domain = XE_FW_MEDIA_VDBOX5,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
-		},
+		.mmio_base = XEHP_BSD6_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS6] = {
 		.name = "vcs6",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 6,
 		.domain = XE_FW_MEDIA_VDBOX6,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
-		},
+		.mmio_base = XEHP_BSD7_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS7] = {
 		.name = "vcs7",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 7,
 		.domain = XE_FW_MEDIA_VDBOX7,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
-		},
+		.mmio_base = XEHP_BSD8_RING_BASE,
 	},
 	[XE_HW_ENGINE_VECS0] = {
 		.name = "vecs0",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 0,
 		.domain = XE_FW_MEDIA_VEBOX0,
-		.mmio_bases = {
-			{ .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
-			{ .graphics_ver = 7, .base = VEBOX_RING_BASE }
-		},
+		.mmio_base = GEN11_VEBOX_RING_BASE,
 	},
 	[XE_HW_ENGINE_VECS1] = {
 		.name = "vecs1",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 1,
 		.domain = XE_FW_MEDIA_VEBOX1,
-		.mmio_bases = {
-			{ .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
-		},
+		.mmio_base = GEN11_VEBOX2_RING_BASE,
 	},
 	[XE_HW_ENGINE_VECS2] = {
 		.name = "vecs2",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 2,
 		.domain = XE_FW_MEDIA_VEBOX2,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
-		},
+		.mmio_base = XEHP_VEBOX3_RING_BASE,
 	},
 	[XE_HW_ENGINE_VECS3] = {
 		.name = "vecs3",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 3,
 		.domain = XE_FW_MEDIA_VEBOX3,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
-		},
+		.mmio_base = XEHP_VEBOX4_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS0] = {
 		.name = "ccs0",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 0,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE },
-		},
+		.mmio_base = GEN12_COMPUTE0_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS1] = {
 		.name = "ccs1",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 1,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE },
-		},
+		.mmio_base = GEN12_COMPUTE1_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS2] = {
 		.name = "ccs2",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 2,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE },
-		},
+		.mmio_base = GEN12_COMPUTE2_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS3] = {
 		.name = "ccs3",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 3,
 		.domain = XE_FW_RENDER,
-		.mmio_bases = {
-			{ .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE },
-		},
+		.mmio_base = GEN12_COMPUTE3_RING_BASE,
 	},
 };
 
-static u32 engine_info_mmio_base(const struct engine_info *info,
-				 unsigned int graphics_ver)
-{
-	int i;
-
-	for (i = 0; i < MAX_MMIO_BASES; i++)
-		if (graphics_ver >= info->mmio_bases[i].graphics_ver)
-			break;
-
-	XE_BUG_ON(i == MAX_MMIO_BASES);
-	XE_BUG_ON(!info->mmio_bases[i].base);
-
-	return info->mmio_bases[i].base;
-}
-
 static void hw_engine_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_hw_engine *hwe = arg;
@@ -346,7 +271,6 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 				 enum xe_hw_engine_id id)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	const struct engine_info *info;
 
 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
@@ -362,7 +286,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	hwe->gt = gt;
 	hwe->class = info->class;
 	hwe->instance = info->instance;
-	hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe));
+	hwe->mmio_base = info->mmio_base;
 	hwe->domain = info->domain;
 	hwe->name = info->name;
 	hwe->fence_irq = &gt->fence_irq[info->class];
-- 
GitLab


From ba11f1b7ea5b59fdf58e5dec7b73fa914de65f8d Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Feb 2023 14:16:01 -0800
Subject: [PATCH 1344/2340] drm/xe: Assume MTL's forcewake register continues
 to future platforms

Starting with MTL, the GT forcewake ack register moved from 0x130044 to
0xDFC.  We expect this change to carry forward to future platforms as
well, so forcewake initialization should use an IP version check instead
of matching the MTL platform specifically.

The (re)definition of FORCEWAKE_ACK_GT_MTL in the forcewake file is also
unnecessary; we can take the definition that already exists in the
dedicated register header.

Bspec: 65031, 64629
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 5bd87118ac73a..21d04a02847bb 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -36,8 +36,6 @@ static void domain_init(struct xe_force_wake_domain *domain,
 	domain->mask = mask;
 }
 
-#define FORCEWAKE_ACK_GT_MTL                 _MMIO(0xdfc)
-
 void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 {
 	struct xe_device *xe = gt_to_xe(gt);
@@ -48,7 +46,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 	/* Assuming gen11+ so assert this assumption is correct */
 	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
 
-	if (xe->info.platform == XE_METEORLAKE) {
+	if (xe->info.graphics_verx100 >= 1270) {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
 			    XE_FW_DOMAIN_ID_GT,
 			    FORCEWAKE_GT_GEN9.reg,
-- 
GitLab


From ea9f879d037ff4d7851f35ba91dc774dd9033308 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:38 -0800
Subject: [PATCH 1345/2340] drm/xe: Sort includes

Sort includes and split them in blocks:

1) .h corresponding to the .c. Example: xe_bb.c should have a "#include
   "xe_bb.h" first.
2) #include <linux/...>
3) #include <drm/...>
4) local includes
5) i915 includes

This is accomplished by running
`clang-format --style=file -i --sort-includes drivers/gpu/drm/xe/*.[ch]`
and ignoring all the changes after the includes. There are also some
manual tweaks to split the blocks.

v2: Also sort includes in headers

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c                  |  3 ++-
 drivers/gpu/drm/xe/xe_bo.c                  |  1 -
 drivers/gpu/drm/xe/xe_bo_evict.c            |  3 ++-
 drivers/gpu/drm/xe/xe_debugfs.c             |  3 ++-
 drivers/gpu/drm/xe/xe_device.c              |  8 ++++----
 drivers/gpu/drm/xe/xe_device.h              |  2 +-
 drivers/gpu/drm/xe/xe_dma_buf.c             |  9 ++++-----
 drivers/gpu/drm/xe/xe_engine.c              |  3 ++-
 drivers/gpu/drm/xe/xe_exec.c                |  3 ++-
 drivers/gpu/drm/xe/xe_execlist.c            | 10 +++++-----
 drivers/gpu/drm/xe/xe_force_wake.c          |  5 +++--
 drivers/gpu/drm/xe/xe_ggtt.c                |  6 +++---
 drivers/gpu/drm/xe/xe_gt.c                  |  3 ++-
 drivers/gpu/drm/xe/xe_gt_clock.c            |  7 ++++---
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |  3 ++-
 drivers/gpu/drm/xe/xe_gt_mcr.c              |  3 ++-
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |  3 ++-
 drivers/gpu/drm/xe/xe_gt_sysfs.c            |  5 ++++-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  3 ++-
 drivers/gpu/drm/xe/xe_gt_topology.c         |  3 ++-
 drivers/gpu/drm/xe/xe_guc.c                 | 14 ++++++++------
 drivers/gpu/drm/xe/xe_guc.h                 |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c             |  6 ++++--
 drivers/gpu/drm/xe/xe_guc_ct.c              |  5 +++--
 drivers/gpu/drm/xe/xe_guc_ct_types.h        |  2 +-
 drivers/gpu/drm/xe/xe_guc_debugfs.c         |  3 ++-
 drivers/gpu/drm/xe/xe_guc_fwif.h            |  4 ++--
 drivers/gpu/drm/xe/xe_guc_hwconfig.c        |  3 ++-
 drivers/gpu/drm/xe/xe_guc_log.c             |  3 ++-
 drivers/gpu/drm/xe/xe_guc_pc.c              | 13 ++++++++-----
 drivers/gpu/drm/xe/xe_guc_pc_types.h        |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c          |  9 +++++----
 drivers/gpu/drm/xe/xe_huc.c                 |  3 ++-
 drivers/gpu/drm/xe/xe_huc_debugfs.c         |  3 ++-
 drivers/gpu/drm/xe/xe_hw_engine.c           |  2 +-
 drivers/gpu/drm/xe/xe_hw_fence_types.h      |  2 +-
 drivers/gpu/drm/xe/xe_irq.c                 |  6 ++++--
 drivers/gpu/drm/xe/xe_lrc.c                 |  6 +++---
 drivers/gpu/drm/xe/xe_migrate.c             | 12 +++++++-----
 drivers/gpu/drm/xe/xe_mmio.c                |  2 +-
 drivers/gpu/drm/xe/xe_mocs.c                |  5 +++--
 drivers/gpu/drm/xe/xe_module.c              |  2 ++
 drivers/gpu/drm/xe/xe_pci.c                 |  4 ++--
 drivers/gpu/drm/xe/xe_pcode.c               |  9 ++++-----
 drivers/gpu/drm/xe/xe_pm.c                  |  5 +++--
 drivers/gpu/drm/xe/xe_preempt_fence.c       |  3 ++-
 drivers/gpu/drm/xe/xe_pt.c                  |  5 +++--
 drivers/gpu/drm/xe/xe_query.c               | 12 +++++++-----
 drivers/gpu/drm/xe/xe_reg_sr.c              |  4 ++--
 drivers/gpu/drm/xe/xe_reg_sr_types.h        |  2 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c       |  6 +++---
 drivers/gpu/drm/xe/xe_ring_ops.c            |  5 +++--
 drivers/gpu/drm/xe/xe_rtp.h                 |  2 +-
 drivers/gpu/drm/xe/xe_sa.c                  |  4 +++-
 drivers/gpu/drm/xe/xe_sync.c                |  5 +++--
 drivers/gpu/drm/xe/xe_trace.h               |  4 ++--
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c         |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c      |  2 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c        |  2 +-
 drivers/gpu/drm/xe/xe_tuning.c              |  2 +-
 drivers/gpu/drm/xe/xe_uc.c                  |  5 +++--
 drivers/gpu/drm/xe/xe_uc_fw.h               |  4 ++--
 drivers/gpu/drm/xe/xe_uc_fw_abi.h           |  2 +-
 drivers/gpu/drm/xe/xe_vm.c                  |  2 +-
 drivers/gpu/drm/xe/xe_vm_madvise.c          |  8 +++++---
 drivers/gpu/drm/xe/xe_wopcm.c               |  3 ++-
 66 files changed, 172 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 8b9209571fd01..d10448d1b4d7e 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -4,10 +4,11 @@
  */
 
 #include "xe_bb.h"
-#include "xe_sa.h"
+
 #include "xe_device.h"
 #include "xe_engine_types.h"
 #include "xe_hw_fence.h"
+#include "xe_sa.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index b4fa856b79d14..3e5393e00b43a 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -3,7 +3,6 @@
  * Copyright © 2021 Intel Corporation
  */
 
-
 #include "xe_bo.h"
 
 #include <linux/dma-buf.h>
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 3fb3c8c77efa9..bbf89a58cdf5b 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -3,8 +3,9 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include "xe_bo.h"
 #include "xe_bo_evict.h"
+
+#include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index b0f8b157ffa3d..7827a785b0204 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_debugfs.h"
+
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
 
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_debugfs.h"
 #include "xe_gt_debugfs.h"
 #include "xe_step.h"
 
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 8fe0324ccef35..6d7d57d08a997 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -5,12 +5,12 @@
 
 #include "xe_device.h"
 
-#include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_aperture.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
-#include <drm/xe_drm.h>
 #include <drm/drm_managed.h>
-#include <drm/drm_atomic_helper.h>
+#include <drm/xe_drm.h>
 
 #include "xe_bo.h"
 #include "xe_debugfs.h"
@@ -20,8 +20,8 @@
 #include "xe_exec.h"
 #include "xe_gt.h"
 #include "xe_irq.h"
-#include "xe_module.h"
 #include "xe_mmio.h"
+#include "xe_module.h"
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 88d55671b0685..ed55ef567d186 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -12,8 +12,8 @@ struct xe_file;
 #include <drm/drm_util.h>
 
 #include "xe_device_types.h"
-#include "xe_macros.h"
 #include "xe_force_wake.h"
+#include "xe_macros.h"
 
 #include "gt/intel_gpu_commands.h"
 
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index d09ff25bd9409..9b252cc782b78 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -3,20 +3,19 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_dma_buf.h"
+
+#include <kunit/test.h>
 #include <linux/dma-buf.h>
+#include <linux/pci-p2pdma.h>
 
 #include <drm/drm_device.h>
 #include <drm/drm_prime.h>
-
 #include <drm/ttm/ttm_tt.h>
 
-#include <kunit/test.h>
-#include <linux/pci-p2pdma.h>
-
 #include "tests/xe_test.h"
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_dma_buf.h"
 #include "xe_ttm_vram_mgr.h"
 #include "xe_vm.h"
 
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index b69dcbef08249..519fbbcabdb9a 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -5,10 +5,11 @@
 
 #include "xe_engine.h"
 
+#include <linux/nospec.h>
+
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/xe_drm.h>
-#include <linux/nospec.h>
 
 #include "xe_device.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 00f298acc4366..97fd1a311f2df 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_exec.h"
+
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/xe_drm.h>
@@ -10,7 +12,6 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
-#include "xe_exec.h"
 #include "xe_macros.h"
 #include "xe_sched_job.h"
 #include "xe_sync.h"
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 47587571123a3..8ff1f36f89f79 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -3,15 +3,15 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#include <drm/drm_managed.h>
-
 #include "xe_execlist.h"
 
+#include <drm/drm_managed.h>
+
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
-#include "xe_hw_fence.h"
 #include "xe_gt.h"
+#include "xe_hw_fence.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
@@ -19,11 +19,11 @@
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 
-#include "i915_reg.h"
+#include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
-#include "gt/intel_engine_regs.h"
+#include "i915_reg.h"
 
 #define XE_EXECLIST_HANG_LIMIT 1
 
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 21d04a02847bb..1ead587cd5c99 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_force_wake.h"
+
 #include <drm/drm_util.h>
 
-#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
-#include "gt/intel_gt_regs.h"
 
+#include "gt/intel_gt_regs.h"
 
 #define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 907a603572b2b..3730bbeb26b21 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -6,20 +6,20 @@
 #include "xe_ggtt.h"
 
 #include <linux/sizes.h>
-#include <drm/i915_drm.h>
 
 #include <drm/drm_managed.h>
+#include <drm/i915_drm.h>
 
-#include "xe_device.h"
 #include "xe_bo.h"
+#include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_wopcm.h"
 
-#include "i915_reg.h"
 #include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
 
 /* FIXME: Common file, preferably auto-gen */
 #define MTL_GGTT_PTE_PAT0	BIT_ULL(52)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 5a3c8fd5936a7..74e9445befe4e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_gt.h"
+
 #include <linux/minmax.h>
 
 #include <drm/drm_managed.h>
@@ -14,7 +16,6 @@
 #include "xe_execlist.h"
 #include "xe_force_wake.h"
 #include "xe_ggtt.h"
-#include "xe_gt.h"
 #include "xe_gt_clock.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 575433e9718ac..2d9f2aa42bade 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -3,15 +3,16 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include "i915_reg.h"
-#include "gt/intel_gt_regs.h"
+#include "xe_gt_clock.h"
 
 #include "xe_device.h"
 #include "xe_gt.h"
-#include "xe_gt_clock.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
 
+#include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
+
 static u32 read_reference_ts_freq(struct xe_gt *gt)
 {
 	u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c320e58810ce3..78942e12e76cb 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_gt_debugfs.h"
+
 #include <drm/drm_debugfs.h>
 #include <drm/drm_managed.h>
 
@@ -10,7 +12,6 @@
 #include "xe_force_wake.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
-#include "xe_gt_debugfs.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 7617f03408794..8fa59988d08e4 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -3,8 +3,9 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include "xe_gt.h"
 #include "xe_gt_mcr.h"
+
+#include "xe_gt.h"
 #include "xe_gt_topology.h"
 #include "xe_gt_types.h"
 #include "xe_mmio.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index ce79eb48feb84..0e7047b89a83c 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_gt_pagefault.h"
+
 #include <linux/circ_buf.h>
 
 #include <drm/drm_managed.h>
@@ -10,7 +12,6 @@
 
 #include "xe_bo.h"
 #include "xe_gt.h"
-#include "xe_gt_pagefault.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index 2d966d935b8e0..c01cc689058c3 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -3,11 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_gt_sysfs.h"
+
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
+
 #include <drm/drm_managed.h>
+
 #include "xe_gt.h"
-#include "xe_gt_sysfs.h"
 
 static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
 {
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 2521c8a65690b..f279e21300aa9 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -3,8 +3,9 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#include "xe_gt.h"
 #include "xe_gt_tlb_invalidation.h"
+
+#include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_trace.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 3dd7cbbff0714..c76aaea1887c3 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -3,10 +3,11 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_gt_topology.h"
+
 #include <linux/bitmap.h>
 
 #include "xe_gt.h"
-#include "xe_gt_topology.h"
 #include "xe_mmio.h"
 
 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 32bcc40463e11..db3d8c947603a 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -3,24 +3,26 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc.h"
+
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_guc.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
 #include "xe_guc_ads.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_hwconfig.h"
 #include "xe_guc_log.h"
-#include "xe_guc_reg.h"
 #include "xe_guc_pc.h"
+#include "xe_guc_reg.h"
 #include "xe_guc_submit.h"
-#include "xe_gt.h"
+#include "xe_mmio.h"
 #include "xe_platform_types.h"
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
-#include "xe_mmio.h"
-#include "xe_force_wake.h"
-#include "i915_reg_defs.h"
+
 #include "gt/intel_gt_regs.h"
+#include "i915_reg_defs.h"
 
 /* TODO: move to common file */
 #define GUC_PVC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 7be33458eef62..74a74051f354e 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -6,8 +6,8 @@
 #ifndef _XE_GUC_H_
 #define _XE_GUC_H_
 
-#include "xe_hw_engine_types.h"
 #include "xe_guc_types.h"
+#include "xe_hw_engine_types.h"
 #include "xe_macros.h"
 
 struct drm_printer;
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 0c08cecaca401..a4e947f0c557d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -3,20 +3,22 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_ads.h"
+
 #include <drm/drm_managed.h>
 
 #include "xe_bo.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
-#include "xe_guc_ads.h"
 #include "xe_guc_reg.h"
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
-#include "gt/intel_gt_regs.h"
+
 #include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
 
 /* Slack of a few additional entries per engine */
 #define ADS_REGSET_EXTRA_MAX	8
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 84d4302d4e721..5e00b75d3ca21 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_ct.h"
+
 #include <linux/bitfield.h>
 #include <linux/circ_buf.h>
 #include <linux/delay.h>
@@ -12,10 +14,9 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
-#include "xe_guc.h"
-#include "xe_guc_ct.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
 #include "xe_guc_submit.h"
 #include "xe_map.h"
 #include "xe_trace.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 17b148bf3735d..e0f9063e9b65f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -6,8 +6,8 @@
 #ifndef _XE_GUC_CT_TYPES_H_
 #define _XE_GUC_CT_TYPES_H_
 
-#include <linux/iosys-map.h>
 #include <linux/interrupt.h>
+#include <linux/iosys-map.h>
 #include <linux/spinlock_types.h>
 #include <linux/wait.h>
 #include <linux/xarray.h>
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index 916e9633b3227..6b72db4d5bb23 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_debugfs.h"
+
 #include <drm/drm_debugfs.h>
 #include <drm/drm_managed.h>
 
@@ -10,7 +12,6 @@
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
-#include "xe_guc_debugfs.h"
 #include "xe_guc_log.h"
 #include "xe_macros.h"
 
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index f562404a6cf71..20155ba4ef073 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -10,9 +10,9 @@
 
 #include "abi/guc_actions_abi.h"
 #include "abi/guc_actions_slpc_abi.h"
-#include "abi/guc_errors_abi.h"
-#include "abi/guc_communication_mmio_abi.h"
 #include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_communication_mmio_abi.h"
+#include "abi/guc_errors_abi.h"
 #include "abi/guc_klvs_abi.h"
 #include "abi/guc_messages_abi.h"
 
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 57640d6087877..a6982f323ed1f 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_hwconfig.h"
+
 #include <drm/drm_managed.h>
 
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
-#include "xe_guc_hwconfig.h"
 #include "xe_map.h"
 
 static struct xe_gt *
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 7ec1b2bb1f8e5..9a7b5d5906c1c 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -3,11 +3,12 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_log.h"
+
 #include <drm/drm_managed.h>
 
 #include "xe_bo.h"
 #include "xe_gt.h"
-#include "xe_guc_log.h"
 #include "xe_map.h"
 #include "xe_module.h"
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 3ba0c8a351099..28b86e8f3f6ea 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -3,21 +3,24 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_pc.h"
+
+#include <linux/delay.h>
+
 #include <drm/drm_managed.h>
+
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
-#include "xe_gt_types.h"
 #include "xe_gt_sysfs.h"
+#include "xe_gt_types.h"
 #include "xe_guc_ct.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_pcode.h"
-#include "i915_reg_defs.h"
-#include "i915_reg.h"
-
-#include <linux/delay.h>
 
+#include "i915_reg.h"
+#include "i915_reg_defs.h"
 #include "intel_mchbar_regs.h"
 
 /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 39548e03acf4f..2afd0dbc35426 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -6,8 +6,8 @@
 #ifndef _XE_GUC_PC_TYPES_H_
 #define _XE_GUC_PC_TYPES_H_
 
-#include <linux/types.h>
 #include <linux/mutex.h>
+#include <linux/types.h>
 
 /**
  * struct xe_guc_pc - GuC Power Conservation (PC)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 2d4eb527d6e8b..6469d3cd3beb9 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_guc_submit.h"
+
 #include <linux/bitfield.h>
 #include <linux/bitmap.h>
 #include <linux/circ_buf.h>
@@ -13,13 +15,12 @@
 
 #include "xe_device.h"
 #include "xe_engine.h"
+#include "xe_force_wake.h"
+#include "xe_gpu_scheduler.h"
+#include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_engine_types.h"
-#include "xe_guc_submit.h"
-#include "xe_gt.h"
-#include "xe_force_wake.h"
-#include "xe_gpu_scheduler.h"
 #include "xe_hw_engine.h"
 #include "xe_hw_fence.h"
 #include "xe_lrc.h"
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 82e7fb3a62922..a9448c6f6418f 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_huc.h"
+
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_reg.h"
-#include "xe_huc.h"
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
 
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
index 268bac36336a0..ee3d8315036a0 100644
--- a/drivers/gpu/drm/xe/xe_huc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_huc_debugfs.h"
+
 #include <drm/drm_debugfs.h>
 #include <drm/drm_managed.h>
 
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_huc.h"
-#include "xe_huc_debugfs.h"
 #include "xe_macros.h"
 
 static struct xe_gt *
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 986f675aaf88c..074133d440090 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -22,8 +22,8 @@
 #include "xe_wa.h"
 
 #include "gt/intel_engine_regs.h"
-#include "i915_reg.h"
 #include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
 
 #define MAX_MMIO_BASES 3
 struct engine_info {
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
index a78e50eb3cb87..b33c4956e8ea0 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -6,8 +6,8 @@
 #ifndef _XE_HW_FENCE_TYPES_H_
 #define _XE_HW_FENCE_TYPES_H_
 
-#include <linux/iosys-map.h>
 #include <linux/dma-fence.h>
+#include <linux/iosys-map.h>
 #include <linux/irq_work.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index ab703f1c8b583..46431f0e4af82 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -3,19 +3,21 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "xe_irq.h"
+
 #include <linux/sched/clock.h>
 
 #include <drm/drm_managed.h>
 
 #include "xe_device.h"
 #include "xe_drv.h"
-#include "xe_guc.h"
 #include "xe_gt.h"
+#include "xe_guc.h"
 #include "xe_hw_engine.h"
 #include "xe_mmio.h"
 
-#include "i915_reg.h"
 #include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
 
 static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
 {
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 347ff9b344942..ed6fcf7620e13 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -9,15 +9,15 @@
 #include "xe_device.h"
 #include "xe_engine_types.h"
 #include "xe_gt.h"
-#include "xe_map.h"
 #include "xe_hw_fence.h"
+#include "xe_map.h"
 #include "xe_vm.h"
 
-#include "i915_reg.h"
+#include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
-#include "gt/intel_engine_regs.h"
+#include "i915_reg.h"
 
 #define GEN8_CTX_VALID				(1 << 0)
 #define GEN8_CTX_L3LLC_COHERENT			(1 << 5)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 377ab019b4c84..bbab524dcee62 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -2,8 +2,15 @@
 /*
  * Copyright © 2020 Intel Corporation
  */
+
 #include "xe_migrate.h"
 
+#include <linux/sizes.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_engine.h"
@@ -20,11 +27,6 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-#include <linux/sizes.h>
-#include <drm/drm_managed.h>
-#include <drm/ttm/ttm_tt.h>
-#include <drm/xe_drm.h>
-
 #include "gt/intel_gpu_commands.h"
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index c414ece6dfe3b..a117437f84826 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -14,9 +14,9 @@
 #include "xe_macros.h"
 #include "xe_module.h"
 
-#include "i915_reg.h"
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
 
 #define XEHP_MTCFG_ADDR		_MMIO(0x101800)
 #define TILE_COUNT		REG_GENMASK(15, 8)
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 86b966fffbe5f..d91054c787024 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_mocs.h"
+
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
 #include "xe_gt.h"
-#include "xe_platform_types.h"
 #include "xe_mmio.h"
-#include "xe_mocs.h"
+#include "xe_platform_types.h"
 #include "xe_step_types.h"
 
 #include "gt/intel_gt_regs.h"
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index cc862553a252f..3f5d03a586968 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -3,6 +3,8 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "xe_module.h"
+
 #include <linux/init.h>
 #include <linux/module.h>
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 3474b5c9f1740..884f9b16c9de0 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -10,12 +10,12 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 
-#include <drm/drm_drv.h>
 #include <drm/drm_color_mgmt.h>
+#include <drm/drm_drv.h>
 #include <drm/xe_pciids.h>
 
-#include "xe_drv.h"
 #include "xe_device.h"
+#include "xe_drv.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 #include "xe_pm.h"
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 1a76fe4788536..fb1ce2d49bec5 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -3,15 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include "xe_pcode_api.h"
 #include "xe_pcode.h"
 
-#include "xe_gt.h"
-#include "xe_mmio.h"
-
+#include <linux/delay.h>
 #include <linux/errno.h>
 
-#include <linux/delay.h>
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_pcode_api.h"
 
 /**
  * DOC: PCODE
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 0ef92b746595d..9a74d15052c49 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_pm.h"
+
 #include <linux/pm_runtime.h>
 
 #include <drm/ttm/ttm_placement.h>
@@ -10,9 +12,8 @@
 #include "xe_bo.h"
 #include "xe_bo_evict.h"
 #include "xe_device.h"
-#include "xe_pm.h"
-#include "xe_gt.h"
 #include "xe_ggtt.h"
+#include "xe_gt.h"
 #include "xe_irq.h"
 #include "xe_pcode.h"
 
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 6ab9ff4427664..78ad8c2098736 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -3,10 +3,11 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_preempt_fence.h"
+
 #include <linux/slab.h>
 
 #include "xe_engine.h"
-#include "xe_preempt_fence.h"
 #include "xe_vm.h"
 
 static void preempt_fence_work_func(struct work_struct *w)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 23f308184ba1d..00d9fff538284 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -3,18 +3,19 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_pt.h"
+
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_migrate.h"
-#include "xe_pt.h"
 #include "xe_pt_types.h"
 #include "xe_pt_walk.h"
-#include "xe_vm.h"
 #include "xe_res_cursor.h"
 #include "xe_trace.h"
 #include "xe_ttm_stolen_mgr.h"
+#include "xe_vm.h"
 
 struct xe_pt_dir {
 	struct xe_pt pt;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index f9f21bd1bfd72..0f70945176f60 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -3,17 +3,19 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include <drm/xe_drm.h>
-#include <drm/ttm/ttm_placement.h>
+#include "xe_query.h"
+
 #include <linux/nospec.h>
 
+#include <drm/ttm/ttm_placement.h>
+#include <drm/xe_drm.h>
+
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_gt.h"
-#include "xe_macros.h"
-#include "xe_query.h"
 #include "xe_ggtt.h"
+#include "xe_gt.h"
 #include "xe_guc_hwconfig.h"
+#include "xe_macros.h"
 
 static const enum xe_engine_class xe_to_user_engine_class[] = {
 	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 16e025dcf2ccc..4d12f8a3043ff 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -9,16 +9,16 @@
 #include <linux/string_helpers.h>
 #include <linux/xarray.h>
 
-#include <drm/drm_print.h>
 #include <drm/drm_managed.h>
+#include <drm/drm_print.h>
 
-#include "xe_rtp_types.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
+#include "xe_rtp_types.h"
 
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
index 2fa7ff3966ba7..b234a8673e54b 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr_types.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -6,8 +6,8 @@
 #ifndef _XE_REG_SR_TYPES_
 #define _XE_REG_SR_TYPES_
 
-#include <linux/xarray.h>
 #include <linux/types.h>
+#include <linux/xarray.h>
 
 #include "i915_reg_defs.h"
 
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index a34617a642ec6..f6ce801215e7a 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -5,12 +5,12 @@
 
 #include "xe_reg_whitelist.h"
 
-#include "xe_platform_types.h"
 #include "xe_gt_types.h"
+#include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#include "../i915/gt/intel_engine_regs.h"
-#include "../i915/gt/intel_gt_regs.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
 
 #undef _MMIO
 #undef MCR_REG
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 54db4ca19a36e..2e118d37b88ce 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -3,18 +3,19 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_ring_ops.h"
+
 #include "xe_engine_types.h"
 #include "xe_gt.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
-#include "xe_ring_ops.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
-#include "i915_reg.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
+#include "i915_reg.h"
 
 static u32 preparser_disable(bool state)
 {
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 1ac3fd1c0734d..d6ba0b7e5042e 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -6,8 +6,8 @@
 #ifndef _XE_RTP_
 #define _XE_RTP_
 
-#include <linux/xarray.h>
 #include <linux/types.h>
+#include <linux/xarray.h>
 
 #include "xe_rtp_types.h"
 
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 7403410cd8064..96c4b0ef24fe8 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -3,14 +3,16 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_sa.h"
+
 #include <linux/kernel.h>
+
 #include <drm/drm_managed.h>
 
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_map.h"
-#include "xe_sa.h"
 
 static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
 {
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 0fbd8d0978cfd..99f1ed87196d6 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -8,13 +8,14 @@
 #include <linux/kthread.h>
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
-#include <drm/xe_drm.h>
+
 #include <drm/drm_print.h>
 #include <drm/drm_syncobj.h>
+#include <drm/xe_drm.h>
 
 #include "xe_device_types.h"
-#include "xe_sched_job_types.h"
 #include "xe_macros.h"
+#include "xe_sched_job_types.h"
 
 #define SYNC_FLAGS_TYPE_MASK 0x3
 #define SYNC_FLAGS_FENCE_INSTALLED	0x10000
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index d1cd4b57a9740..878ab4115d912 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -9,14 +9,14 @@
 #if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
 #define _XE_TRACE_H_
 
-#include <linux/types.h>
 #include <linux/tracepoint.h>
+#include <linux/types.h>
 
 #include "xe_bo_types.h"
 #include "xe_engine_types.h"
 #include "xe_gpu_scheduler_types.h"
-#include "xe_gt_types.h"
 #include "xe_gt_tlb_invalidation_types.h"
+#include "xe_gt_types.h"
 #include "xe_guc_engine_types.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
index a0ba8bba84d14..8075781070f27 100644
--- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
@@ -6,8 +6,8 @@
 
 #include <drm/drm_managed.h>
 
-#include <drm/ttm/ttm_range_manager.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
 #include <drm/ttm/ttm_tt.h>
 
 #include "xe_bo.h"
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 097454f782865..fe0f707ad0546 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -8,8 +8,8 @@
 #include <drm/drm_mm.h>
 
 #include <drm/ttm/ttm_device.h>
-#include <drm/ttm/ttm_range_manager.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
 
 #include "../i915/i915_reg.h"
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index c7e21673b8fdd..643365b18bc7f 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -6,8 +6,8 @@
 
 #include <drm/drm_managed.h>
 
-#include <drm/ttm/ttm_range_manager.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
 
 #include "xe_bo.h"
 #include "xe_device.h"
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 595eb2de90adf..a3872f0330cb0 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -5,8 +5,8 @@
 
 #include "xe_tuning.h"
 
-#include "xe_platform_types.h"
 #include "xe_gt_types.h"
+#include "xe_platform_types.h"
 #include "xe_rtp.h"
 
 #include "gt/intel_gt_regs.h"
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 7886c8b853972..4ccf2b3435e16 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_uc.h"
+
 #include "xe_device.h"
-#include "xe_huc.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_pc.h"
 #include "xe_guc_submit.h"
-#include "xe_uc.h"
+#include "xe_huc.h"
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index b0df5064b27da..ca64d379bb5ef 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -8,9 +8,9 @@
 
 #include <linux/errno.h>
 
-#include "xe_uc_fw_types.h"
-#include "xe_uc_fw_abi.h"
 #include "xe_macros.h"
+#include "xe_uc_fw_abi.h"
+#include "xe_uc_fw_types.h"
 
 struct drm_printer;
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index fc7b1855ee90f..89e994ed4e009 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -6,8 +6,8 @@
 #ifndef _XE_UC_FW_ABI_H
 #define _XE_UC_FW_ABI_H
 
-#include <linux/types.h>
 #include <linux/build_bug.h>
+#include <linux/types.h>
 
 /**
  * DOC: Firmware Layout
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 04481851fa005..fcac31f11706f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -25,8 +25,8 @@
 #include "xe_preempt_fence.h"
 #include "xe_pt.h"
 #include "xe_res_cursor.h"
-#include "xe_trace.h"
 #include "xe_sync.h"
+#include "xe_trace.h"
 
 #define TEST_VM_ASYNC_OPS_ERROR
 
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 4498aa2fbd471..89a02c8e04247 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -3,13 +3,15 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#include <drm/xe_drm.h>
-#include <drm/ttm/ttm_tt.h>
+#include "xe_vm_madvise.h"
+
 #include <linux/nospec.h>
 
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
 #include "xe_bo.h"
 #include "xe_vm.h"
-#include "xe_vm_madvise.h"
 
 static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
 				       struct xe_vma **vmas, int num_vmas,
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index 8fe182afa06c5..7bb880355f6ba 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -3,13 +3,14 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include "xe_wopcm.h"
+
 #include "xe_device.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_guc_reg.h"
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
-#include "xe_wopcm.h"
 
 /**
  * DOC: Write Once Protected Content Memory (WOPCM) Layout
-- 
GitLab


From b79e8fd954c48fba74b2c3807f6093ce40e9ab7f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:39 -0800
Subject: [PATCH 1346/2340] drm/xe: Remove dependency on intel_engine_regs.h

Create regs/xe_engine_regs.h file with all the registers and bit
definitions used by the xe driver. Eventually the registers may be
defined in a different way and since xe doesn't supported below gen12,
the number of registers touched is much smaller, so create a new header.

The definitions themselves are direct copy from the
gt/intel_engine_regs.h file, just sorting the registers by address.
Cleaning those up and adhering to a common coding style is left for
later.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 98 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_execlist.c         |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c          |  3 +-
 drivers/gpu/drm/xe/xe_hw_engine.c        |  2 +-
 drivers/gpu/drm/xe/xe_lrc.c              |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c             |  2 +-
 drivers/gpu/drm/xe/xe_reg_sr.c           |  2 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c    |  2 +-
 drivers/gpu/drm/xe/xe_wa.c               |  2 +-
 9 files changed, 107 insertions(+), 8 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_engine_regs.h

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
new file mode 100644
index 0000000000000..6dfa3cf2fd43c
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_REGS_H_
+#define _XE_ENGINE_REGS_H_
+
+#include <asm/page.h>
+
+#include "i915_reg_defs.h"
+
+#define RING_TAIL(base)				_MMIO((base) + 0x30)
+
+#define RING_HEAD(base)				_MMIO((base) + 0x34)
+#define   HEAD_ADDR				0x001FFFFC
+
+#define RING_START(base)			_MMIO((base) + 0x38)
+
+#define RING_CTL(base)				_MMIO((base) + 0x3c)
+#define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
+#define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
+
+#define RING_PSMI_CTL(base)			_MMIO((base) + 0x50)
+#define   GEN8_RC_SEMA_IDLE_MSG_DISABLE			REG_BIT(12)
+#define   GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
+
+#define RING_ACTHD_UDW(base)			_MMIO((base) + 0x5c)
+#define RING_DMA_FADD_UDW(base)			_MMIO((base) + 0x60)
+#define RING_IPEIR(base)			_MMIO((base) + 0x64)
+#define RING_IPEHR(base)			_MMIO((base) + 0x68)
+#define RING_ACTHD(base)			_MMIO((base) + 0x74)
+#define RING_DMA_FADD(base)			_MMIO((base) + 0x78)
+#define RING_HWS_PGA(base)			_MMIO((base) + 0x80)
+#define IPEIR(base)				_MMIO((base) + 0x88)
+#define IPEHR(base)				_MMIO((base) + 0x8c)
+#define RING_HWSTAM(base)			_MMIO((base) + 0x98)
+#define RING_MI_MODE(base)			_MMIO((base) + 0x9c)
+#define RING_NOPID(base)			_MMIO((base) + 0x94)
+
+#define RING_IMR(base)				_MMIO((base) + 0xa8)
+#define   RING_MAX_NONPRIV_SLOTS  12
+
+#define RING_EIR(base)				_MMIO((base) + 0xb0)
+#define RING_EMR(base)				_MMIO((base) + 0xb4)
+#define RING_ESR(base)				_MMIO((base) + 0xb8)
+#define RING_BBADDR(base)			_MMIO((base) + 0x140)
+#define RING_BBADDR_UDW(base)			_MMIO((base) + 0x168)
+#define RING_EXECLIST_STATUS_LO(base)		_MMIO((base) + 0x234)
+#define RING_EXECLIST_STATUS_HI(base)		_MMIO((base) + 0x234 + 4)
+
+#define RING_CONTEXT_CONTROL(base)		_MMIO((base) + 0x244)
+#define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
+#define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
+
+#define RING_MODE_GEN7(base)			_MMIO((base) + 0x29c)
+#define   GEN11_GFX_DISABLE_LEGACY_MODE		(1 << 3)
+
+#define RING_TIMESTAMP(base)			_MMIO((base) + 0x358)
+
+#define RING_TIMESTAMP_UDW(base)		_MMIO((base) + 0x358 + 4)
+#define   RING_VALID_MASK			0x00000001
+#define   RING_VALID				0x00000001
+#define   STOP_RING				REG_BIT(8)
+#define   TAIL_ADDR				0x001FFFF8
+
+#define RING_CTX_TIMESTAMP(base)		_MMIO((base) + 0x3a8)
+
+#define RING_FORCE_TO_NONPRIV(base, i)		_MMIO(((base) + 0x4d0) + (i) * 4)
+#define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
+#define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK	REG_GENMASK(25, 2)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RW	(0 << 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RD	(1 << 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_WR	(2 << 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_INVALID	(3 << 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	(3 << 28)
+#define   RING_FORCE_TO_NONPRIV_RANGE_1		(0 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_4		(1 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_16	(2 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_64	(3 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_MASK	(3 << 0)
+#define   RING_FORCE_TO_NONPRIV_MASK_VALID	(RING_FORCE_TO_NONPRIV_RANGE_MASK | \
+						 RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
+						 RING_FORCE_TO_NONPRIV_DENY)
+#define   RING_MAX_NONPRIV_SLOTS  12
+
+#define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
+
+#define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
+#define	  EL_CTRL_LOAD				REG_BIT(0)
+
+#define VDBOX_CGCTL3F10(base)			_MMIO((base) + 0x3f10)
+#define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
+
+#define VDBOX_CGCTL3F18(base)			_MMIO((base) + 0x3f18)
+#define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 8ff1f36f89f79..e700737a213db 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
@@ -19,7 +20,6 @@
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index a4e947f0c557d..6cd07f51b8282 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
@@ -17,9 +18,9 @@
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
 
+
 /* Slack of a few additional entries per engine */
 #define ADS_REGSET_EXTRA_MAX	8
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 074133d440090..f7c5f709b088b 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_execlist.h"
@@ -21,7 +22,6 @@
 #include "xe_sched_job.h"
 #include "xe_wa.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index ed6fcf7620e13..887d9189fcec3 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -5,6 +5,7 @@
 
 #include "xe_lrc.h"
 
+#include "regs/xe_engine_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
@@ -13,7 +14,6 @@
 #include "xe_map.h"
 #include "xe_vm.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index a117437f84826..07db7912a9313 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -8,13 +8,13 @@
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 4d12f8a3043ff..359de4724bb54 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -12,6 +12,7 @@
 #include <drm/drm_managed.h>
 #include <drm/drm_print.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
@@ -20,7 +21,6 @@
 #include "xe_mmio.h"
 #include "xe_rtp_types.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
 
 #define XE_REG_SR_GROW_STEP_DEFAULT	16
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index f6ce801215e7a..5aa73c1c40973 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -5,11 +5,11 @@
 
 #include "xe_reg_whitelist.h"
 
+#include "regs/xe_engine_regs.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
 
 #undef _MMIO
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 9d2e4555091c7..92065341c0016 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -7,6 +7,7 @@
 
 #include <linux/compiler_types.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
@@ -16,7 +17,6 @@
 #include "xe_rtp.h"
 #include "xe_step.h"
 
-#include "gt/intel_engine_regs.h"
 #include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
-- 
GitLab


From 226bfec858c93797dbd3d47d1418ed68684fa752 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:40 -0800
Subject: [PATCH 1347/2340] drm/xe: Remove dependency on intel_gt_regs.h

Create regs/xe_gt_regs.h file with all the registers and bit
definitions used by the xe driver. Eventually the registers may be
defined in a different way and since xe doesn't supported below gen12,
the number of registers touched is much smaller, so create a new header.

The definitions themselves are direct copy from the
gt/intel_gt_regs.h file, just sorting the registers by address.
Cleaning those up and adhering to a common coding style is left for
later.

v2: Make the change to MCR_REG location in a separate patch to go
    through the i915 branch  (Matt Roper / Rodrigo)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h  | 288 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_execlist.c      |   2 +-
 drivers/gpu/drm/xe/xe_force_wake.c    |   3 +-
 drivers/gpu/drm/xe/xe_ggtt.c          |   2 +-
 drivers/gpu/drm/xe/xe_gt.c            |   3 +-
 drivers/gpu/drm/xe/xe_gt_clock.c      |   2 +-
 drivers/gpu/drm/xe/xe_gt_mcr.c        |   3 +-
 drivers/gpu/drm/xe/xe_guc.c           |   2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c       |   4 +-
 drivers/gpu/drm/xe/xe_guc_pc.c        |   2 +-
 drivers/gpu/drm/xe/xe_hw_engine.c     |   2 +-
 drivers/gpu/drm/xe/xe_irq.c           |   2 +-
 drivers/gpu/drm/xe/xe_lrc.c           |   2 +-
 drivers/gpu/drm/xe/xe_mmio.c          |   2 +-
 drivers/gpu/drm/xe/xe_mocs.c          |   3 +-
 drivers/gpu/drm/xe/xe_reg_sr.c        |   3 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c |   3 +-
 drivers/gpu/drm/xe/xe_ring_ops.c      |   2 +-
 drivers/gpu/drm/xe/xe_tuning.c        |   3 +-
 drivers/gpu/drm/xe/xe_wa.c            |   2 +-
 20 files changed, 307 insertions(+), 28 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_gt_regs.h

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
new file mode 100644
index 0000000000000..3eb92c9750850
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_REGS_H_
+#define _XE_GT_REGS_H_
+
+#include "i915_reg_defs.h"
+
+/* RPM unit config (Gen8+) */
+#define RPM_CONFIG0				_MMIO(0xd00)
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
+#define   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
+#define   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+
+#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n)	_MMIO(0xd50 + (n) * 4)
+#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n)	_MMIO(0xd70 + (n) * 4)
+#define FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0xd84)
+#define FORCEWAKE_ACK_GT_MTL			_MMIO(0xdfc)
+
+#define GEN9_LNCFCMOCS(i)			_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
+#define LNCFCMOCS_REG_COUNT			32
+
+#define MCFG_MCR_SELECTOR			_MMIO(0xfd0)
+#define MTL_MCR_SELECTOR			_MMIO(0xfd4)
+#define SF_MCR_SELECTOR				_MMIO(0xfd8)
+#define GEN8_MCR_SELECTOR			_MMIO(0xfdc)
+#define GAM_MCR_SELECTOR			_MMIO(0xfe0)
+#define   GEN11_MCR_MULTICAST			REG_BIT(31)
+#define   GEN11_MCR_SLICE(slice)		(((slice) & 0xf) << 27)
+#define   GEN11_MCR_SLICE_MASK			GEN11_MCR_SLICE(0xf)
+#define   GEN11_MCR_SUBSLICE(subslice)		(((subslice) & 0x7) << 24)
+#define   GEN11_MCR_SUBSLICE_MASK		GEN11_MCR_SUBSLICE(0x7)
+#define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
+#define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
+
+#define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
+#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
+
+#define GEN9_CS_DEBUG_MODE1			_MMIO(0x20ec)
+#define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
+
+#define PS_INVOCATION_COUNT			_MMIO(0x2348)
+
+#define GEN8_CS_CHICKEN1			_MMIO(0x2580)
+#define   GEN9_PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
+#define   GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)	(((hi) << 2) | ((lo) << 1))
+#define   GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 0)
+#define   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 1)
+#define   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
+#define   GEN9_PREEMPT_GPGPU_LEVEL_MASK		GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
+
+#define GEN12_GLOBAL_MOCS(i)			_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
+#define GEN12_CCS_AUX_INV			_MMIO(0x4208)
+
+#define GEN12_VD0_AUX_INV			_MMIO(0x4218)
+#define GEN12_VE0_AUX_INV			_MMIO(0x4238)
+
+#define GEN12_VE1_AUX_INV			_MMIO(0x42b8)
+#define   AUX_INV				REG_BIT(0)
+
+#define GEN12_PAT_INDEX(index)			_MMIO(0x4800 + (index) * 4)
+#define XEHP_TILE0_ADDR_RANGE			MCR_REG(0x4900)
+#define XEHP_FLAT_CCS_BASE_ADDR			MCR_REG(0x4910)
+
+#define GEN12_FF_MODE2				_MMIO(0x6604)
+#define XEHP_FF_MODE2				MCR_REG(0x6604)
+#define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
+#define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
+#define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
+#define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
+
+#define HIZ_CHICKEN				_MMIO(0x7018)
+#define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
+
+/* GEN7 chicken */
+#define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
+
+#define GEN11_COMMON_SLICE_CHICKEN3		_MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3		MCR_REG(0x7304)
+#define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
+#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE	REG_BIT(12)
+#define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	REG_BIT(11)
+#define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE	REG_BIT(9)
+
+#define XEHP_SQCM				MCR_REG(0x8724)
+#define   EN_32B_ACCESS				REG_BIT(30)
+
+#define	GEN10_MIRROR_FUSE3			_MMIO(0x9118)
+#define   GEN10_L3BANK_PAIR_COUNT		4
+#define   GEN10_L3BANK_MASK			0x0F
+/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
+#define   GEN12_MAX_MSLICES			4
+#define   GEN12_MEML3_EN_MASK			0x0F
+
+/* Fuse readout registers for GT */
+#define XEHP_FUSE4				_MMIO(0x9114)
+#define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
+
+#define GEN11_GT_VEBOX_VDBOX_DISABLE		_MMIO(0x9140)
+#define   GEN11_GT_VDBOX_DISABLE_MASK		0xff
+#define   GEN11_GT_VEBOX_DISABLE_SHIFT		16
+#define   GEN11_GT_VEBOX_DISABLE_MASK		(0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
+
+#define GEN6_GDRST				_MMIO(0x941c)
+#define   GEN11_GRDOM_GUC			REG_BIT(3)
+#define   GEN6_GRDOM_FULL			(1 << 0)
+#define   GEN11_GRDOM_FULL			GEN6_GRDOM_FULL
+
+#define GEN7_MISCCPCTL				_MMIO(0x9424)
+#define   GEN7_DOP_CLOCK_GATE_ENABLE		(1 << 0)
+#define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE	REG_BIT(1)
+
+#define UNSLCGCTL9430				_MMIO(0x9430)
+#define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x9434)
+#define   VFUNIT_CLKGATE_DIS			REG_BIT(20)
+#define   TSGUNIT_CLKGATE_DIS			REG_BIT(17) /* XEHPSDV */
+#define   CG3DDISCFEG_CLKGATE_DIS		REG_BIT(17) /* DG2 */
+#define   GAMEDIA_CLKGATE_DIS			REG_BIT(11)
+#define   HSUNIT_CLKGATE_DIS			REG_BIT(8)
+#define   VSUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLCGCTL9440				_MMIO(0x9440)
+#define   GAMTLBOACS_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBVDBOX5_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBVDBOX6_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBVDBOX3_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBVDBOX4_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBVDBOX7_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBVDBOX2_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBVDBOX0_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBKCR_CLKGATE_DIS			REG_BIT(16)
+#define   GAMTLBGUC_CLKGATE_DIS			REG_BIT(15)
+#define   GAMTLBBLT_CLKGATE_DIS			REG_BIT(14)
+#define   GAMTLBVDBOX1_CLKGATE_DIS		REG_BIT(6)
+
+#define UNSLCGCTL9444				_MMIO(0x9444)
+#define   GAMTLBGFXA0_CLKGATE_DIS		REG_BIT(30)
+#define   GAMTLBGFXA1_CLKGATE_DIS		REG_BIT(29)
+#define   GAMTLBCOMPA0_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBCOMPA1_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBCOMPB0_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBCOMPB1_CLKGATE_DIS		REG_BIT(25)
+#define   GAMTLBCOMPC0_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBCOMPC1_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBCOMPD0_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBCOMPD1_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBMERT_CLKGATE_DIS		REG_BIT(20)
+#define   GAMTLBVEBOX3_CLKGATE_DIS		REG_BIT(19)
+#define   GAMTLBVEBOX2_CLKGATE_DIS		REG_BIT(18)
+#define   GAMTLBVEBOX1_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBVEBOX0_CLKGATE_DIS		REG_BIT(16)
+#define   LTCDD_CLKGATE_DIS			REG_BIT(10)
+
+#define GEN11_SLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE		MCR_REG(0x94d4)
+#define   SARBUNIT_CLKGATE_DIS			(1 << 5)
+#define   RCCUNIT_CLKGATE_DIS			(1 << 7)
+#define   MSCUNIT_CLKGATE_DIS			(1 << 10)
+#define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
+#define   L3_CLKGATE_DIS			REG_BIT(16)
+#define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE2		_MMIO(0x94e4)
+#define   VSUNIT_CLKGATE_DIS_TGL		REG_BIT(19)
+#define   PSDUNIT_CLKGATE_DIS			REG_BIT(5)
+
+#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE	MCR_REG(0x9524)
+#define   DSS_ROUTER_CLKGATE_DIS		REG_BIT(28)
+#define   GWUNIT_CLKGATE_DIS			REG_BIT(16)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2		MCR_REG(0x9528)
+#define   CPSSUNIT_CLKGATE_DIS			REG_BIT(9)
+
+#define SSMCGCTL9530				MCR_REG(0x9530)
+#define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
+
+#define GEN10_DFR_RATIO_EN_AND_CHICKEN		MCR_REG(0x9550)
+#define   DFR_DISABLE				(1 << 9)
+
+#define GEN6_RPNSWREQ				_MMIO(0xa008)
+#define GEN6_RC_CONTROL				_MMIO(0xa090)
+#define GEN6_RC_STATE				_MMIO(0xa094)
+
+#define GEN6_PMINTRMSK				_MMIO(0xa168)
+#define   GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC	(1 << 31)
+#define   ARAT_EXPIRED_INTRMSK			(1 << 9)
+
+#define FORCEWAKE_GT_GEN9			_MMIO(0xa188)
+
+#define GEN9_PG_ENABLE				_MMIO(0xa210)
+
+/* GPM unit config (Gen9+) */
+#define CTC_MODE				_MMIO(0xa26c)
+#define   CTC_SOURCE_PARAMETER_MASK		1
+#define   CTC_SOURCE_CRYSTAL_CLOCK		0
+#define   CTC_SOURCE_DIVIDE_LOGIC		1
+#define   CTC_SHIFT_PARAMETER_SHIFT		1
+#define   CTC_SHIFT_PARAMETER_MASK		(0x3 << CTC_SHIFT_PARAMETER_SHIFT)
+
+#define FORCEWAKE_RENDER_GEN9			_MMIO(0xa278)
+#define FORCEWAKE_MEDIA_VDBOX_GEN11(n)		_MMIO(0xa540 + (n) * 4)
+#define FORCEWAKE_MEDIA_VEBOX_GEN11(n)		_MMIO(0xa560 + (n) * 4)
+
+#define GEN10_SAMPLER_MODE			MCR_REG(0xe18c)
+#define   ENABLE_SMALLPL			REG_BIT(15)
+#define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
+#define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
+
+#define GEN9_ROW_CHICKEN4			MCR_REG(0xe48c)
+#define   GEN12_DISABLE_GRF_CLEAR		REG_BIT(13)
+#define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
+#define   GEN12_DISABLE_TDL_PUSH		REG_BIT(9)
+#define   GEN11_DIS_PICK_2ND_EU			REG_BIT(7)
+#define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
+#define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
+#define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
+
+#define GEN7_ROW_CHICKEN2			_MMIO(0xe4f4)
+#define   GEN12_DISABLE_READ_SUPPRESSION	REG_BIT(15)
+#define   GEN12_DISABLE_EARLY_READ		REG_BIT(14)
+#define   GEN12_ENABLE_LARGE_GRF_MODE		REG_BIT(12)
+#define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
+#define   GEN12_DISABLE_DOP_GATING              REG_BIT(0)
+
+#define SARB_CHICKEN1				MCR_REG(0xe90c)
+#define   COMP_CKN_IN				REG_GENMASK(30, 29)
+
+#define GEN12_RCU_MODE				_MMIO(0x14800)
+#define   GEN12_RCU_MODE_CCS_ENABLE		REG_BIT(0)
+
+#define FORCEWAKE_ACK_GT_GEN9			_MMIO(0x130044)
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
+
+#define GEN6_GT_CORE_STATUS			_MMIO(0x138060)
+#define   GEN6_RC0				0
+#define   GEN6_RC6				3
+
+#define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
+#define GEN6_GT_GFX_RC6				_MMIO(0x138108)
+
+#define GFX_FLSH_CNTL_GEN6			_MMIO(0x101008)
+#define   GFX_FLSH_CNTL_EN			(1 << 0)
+
+#define GEN11_GT_INTR_DW(x)			_MMIO(0x190018 + ((x) * 4))
+
+#define GEN11_GUC_SG_INTR_ENABLE		_MMIO(0x190038)
+#define   ENGINE1_MASK				REG_GENMASK(31, 16)
+#define   ENGINE0_MASK				REG_GENMASK(15, 0)
+
+#define GEN11_GPM_WGBOXPERF_INTR_ENABLE		_MMIO(0x19003c)
+
+#define GEN11_INTR_IDENTITY_REG(x)		_MMIO(0x190060 + ((x) * 4))
+#define   GEN11_INTR_DATA_VALID			(1 << 31)
+#define   GEN11_INTR_ENGINE_INSTANCE(x)		(((x) & GENMASK(25, 20)) >> 20)
+#define   GEN11_INTR_ENGINE_CLASS(x)		(((x) & GENMASK(18, 16)) >> 16)
+#define   GEN11_INTR_ENGINE_INTR(x)		((x) & 0xffff)
+#define   OTHER_GUC_INSTANCE			0
+
+#define GEN11_RENDER_COPY_INTR_ENABLE		_MMIO(0x190030)
+#define GEN11_VCS_VECS_INTR_ENABLE		_MMIO(0x190034)
+#define GEN12_CCS_RSVD_INTR_ENABLE		_MMIO(0x190048)
+#define GEN11_IIR_REG_SELECTOR(x)		_MMIO(0x190070 + ((x) * 4))
+#define GEN11_RCS0_RSVD_INTR_MASK		_MMIO(0x190090)
+#define GEN11_BCS_RSVD_INTR_MASK		_MMIO(0x1900a0)
+#define GEN11_VCS0_VCS1_INTR_MASK		_MMIO(0x1900a8)
+#define GEN11_VCS2_VCS3_INTR_MASK		_MMIO(0x1900ac)
+#define GEN11_VECS0_VECS1_INTR_MASK		_MMIO(0x1900d0)
+#define GEN11_GUC_SG_INTR_MASK			_MMIO(0x1900e8)
+#define GEN11_GPM_WGBOXPERF_INTR_MASK		_MMIO(0x1900ec)
+#define GEN12_CCS0_CCS1_INTR_MASK		_MMIO(0x190100)
+#define GEN12_CCS2_CCS3_INTR_MASK		_MMIO(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK		_MMIO(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK		_MMIO(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK		_MMIO(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK		_MMIO(0x19011c)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index e700737a213db..fe20c61281347 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -8,6 +8,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
@@ -21,7 +22,6 @@
 #include "xe_sched_job.h"
 
 #include "gt/intel_gpu_commands.h"
-#include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
 #include "i915_reg.h"
 
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 1ead587cd5c99..77a210acfac39 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -7,11 +7,10 @@
 
 #include <drm/drm_util.h>
 
+#include "regs/xe_gt_regs.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
 
-#include "gt/intel_gt_regs.h"
-
 #define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
 
 static struct xe_gt *
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 3730bbeb26b21..3bf4373211498 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -10,6 +10,7 @@
 #include <drm/drm_managed.h>
 #include <drm/i915_drm.h>
 
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
@@ -18,7 +19,6 @@
 #include "xe_mmio.h"
 #include "xe_wopcm.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
 /* FIXME: Common file, preferably auto-gen */
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 74e9445befe4e..343370b44506e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -9,6 +9,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_gt_regs.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
@@ -41,8 +42,6 @@
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
-#include "gt/intel_gt_regs.h"
-
 struct xe_gt *xe_find_full_gt(struct xe_gt *gt)
 {
 	struct xe_gt *search;
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 2d9f2aa42bade..fd0ca33925cd9 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -5,12 +5,12 @@
 
 #include "xe_gt_clock.h"
 
+#include "regs/xe_gt_regs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
 static u32 read_reference_ts_freq(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 8fa59988d08e4..10eff02cc7db1 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -5,13 +5,12 @@
 
 #include "xe_gt_mcr.h"
 
+#include "regs/xe_gt_regs.h"
 #include "xe_gt.h"
 #include "xe_gt_topology.h"
 #include "xe_gt_types.h"
 #include "xe_mmio.h"
 
-#include "gt/intel_gt_regs.h"
-
 /**
  * DOC: GT Multicast/Replicated (MCR) Register Support
  *
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index db3d8c947603a..661effa9830ff 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -5,6 +5,7 @@
 
 #include "xe_guc.h"
 
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
@@ -21,7 +22,6 @@
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg_defs.h"
 
 /* TODO: move to common file */
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 6cd07f51b8282..49725093fb470 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -8,6 +8,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
@@ -18,9 +19,6 @@
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
 
-#include "gt/intel_gt_regs.h"
-
-
 /* Slack of a few additional entries per engine */
 #define ADS_REGSET_EXTRA_MAX	8
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 28b86e8f3f6ea..f983f47cefb6a 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -9,6 +9,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
@@ -31,7 +32,6 @@
 #define GEN10_FREQ_INFO_REC	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
 
-#include "gt/intel_gt_regs.h"
 /* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */
 #define   REQ_RATIO_MASK	REG_GENMASK(31, 23)
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index f7c5f709b088b..5e7f21b319bb3 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -8,6 +8,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_execlist.h"
@@ -22,7 +23,6 @@
 #include "xe_sched_job.h"
 #include "xe_wa.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
 #define MAX_MMIO_BASES 3
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 46431f0e4af82..04b3801fc0a8b 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -9,6 +9,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_gt_regs.h"
 #include "xe_device.h"
 #include "xe_drv.h"
 #include "xe_gt.h"
@@ -16,7 +17,6 @@
 #include "xe_hw_engine.h"
 #include "xe_mmio.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
 static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 887d9189fcec3..bf12f71fbe729 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -6,6 +6,7 @@
 #include "xe_lrc.h"
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
@@ -15,7 +16,6 @@
 #include "xe_vm.h"
 
 #include "gt/intel_gpu_commands.h"
-#include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
 #include "i915_reg.h"
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 07db7912a9313..5e6ca0d2076ad 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -9,13 +9,13 @@
 #include <drm/xe_drm.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
 #define XEHP_MTCFG_ADDR		_MMIO(0x101800)
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index d91054c787024..7ff81041d5ce6 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -5,6 +5,7 @@
 
 #include "xe_mocs.h"
 
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
@@ -13,8 +14,6 @@
 #include "xe_platform_types.h"
 #include "xe_step_types.h"
 
-#include "gt/intel_gt_regs.h"
-
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
 #define mocs_dbg drm_dbg
 #else
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 359de4724bb54..d675164697108 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -13,6 +13,7 @@
 #include <drm/drm_print.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
@@ -21,8 +22,6 @@
 #include "xe_mmio.h"
 #include "xe_rtp_types.h"
 
-#include "gt/intel_gt_regs.h"
-
 #define XE_REG_SR_GROW_STEP_DEFAULT	16
 
 static void reg_sr_fini(struct drm_device *drm, void *arg)
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 5aa73c1c40973..2dd10e62718f1 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -6,12 +6,11 @@
 #include "xe_reg_whitelist.h"
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#include "gt/intel_gt_regs.h"
-
 #undef _MMIO
 #undef MCR_REG
 #define _MMIO(x)	_XE_RTP_REG(x)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2e118d37b88ce..1b633222fda61 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -5,6 +5,7 @@
 
 #include "xe_ring_ops.h"
 
+#include "regs/xe_gt_regs.h"
 #include "xe_engine_types.h"
 #include "xe_gt.h"
 #include "xe_lrc.h"
@@ -13,7 +14,6 @@
 #include "xe_vm_types.h"
 
 #include "gt/intel_gpu_commands.h"
-#include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc_reg.h"
 #include "i915_reg.h"
 
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index a3872f0330cb0..624b257ecfbc5 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -5,12 +5,11 @@
 
 #include "xe_tuning.h"
 
+#include "regs/xe_gt_regs.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#include "gt/intel_gt_regs.h"
-
 #undef _MMIO
 #undef MCR_REG
 #define _MMIO(x)	_XE_RTP_REG(x)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 92065341c0016..155cfd1dcc500 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -8,6 +8,7 @@
 #include <linux/compiler_types.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
@@ -17,7 +18,6 @@
 #include "xe_rtp.h"
 #include "xe_step.h"
 
-#include "gt/intel_gt_regs.h"
 #include "i915_reg.h"
 
 /**
-- 
GitLab


From 0992884d09cc1c91e9c3310a9204eb080db37714 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:41 -0800
Subject: [PATCH 1348/2340] drm/xe: Remove dependency on intel_lrc_reg.h

Create regs/xe_lrc_layout.h file with all the offsets used by the xe
driver. Eventually the xe driver may use a different way to define them
since it doesn't supported below gen12.

v2: Rename file to intel_lrc_layout.h since it's not really about
    registers (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_execlist.c        |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c      |  3 +--
 drivers/gpu/drm/xe/xe_lrc.c             |  2 +-
 drivers/gpu/drm/xe/xe_ring_ops.c        |  2 +-
 5 files changed, 21 insertions(+), 5 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_lrc_layout.h

diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
new file mode 100644
index 0000000000000..4be81abc86ad9
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LRC_LAYOUT_H_
+#define _XE_LRC_LAYOUT_H_
+
+#define CTX_CONTEXT_CONTROL		(0x02 + 1)
+#define CTX_RING_HEAD			(0x04 + 1)
+#define CTX_RING_TAIL			(0x06 + 1)
+#define CTX_RING_START			(0x08 + 1)
+#define CTX_RING_CTL			(0x0a + 1)
+#define CTX_PDP0_UDW			(0x30 + 1)
+#define CTX_PDP0_LDW			(0x32 + 1)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index fe20c61281347..d788a6e894a6e 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -9,6 +9,7 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
@@ -22,7 +23,6 @@
 #include "xe_sched_job.h"
 
 #include "gt/intel_gpu_commands.h"
-#include "gt/intel_lrc_reg.h"
 #include "i915_reg.h"
 
 #define XE_EXECLIST_HANG_LIMIT 1
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 6469d3cd3beb9..aa21f2bb5cbaa 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -13,6 +13,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_lrc_layout.h"
 #include "xe_device.h"
 #include "xe_engine.h"
 #include "xe_force_wake.h"
@@ -32,8 +33,6 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-#include "gt/intel_lrc_reg.h"
-
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
 {
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index bf12f71fbe729..5baa3cf538522 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -7,6 +7,7 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
@@ -16,7 +17,6 @@
 #include "xe_vm.h"
 
 #include "gt/intel_gpu_commands.h"
-#include "gt/intel_lrc_reg.h"
 #include "i915_reg.h"
 
 #define GEN8_CTX_VALID				(1 << 0)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 1b633222fda61..6275f8c348782 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -6,6 +6,7 @@
 #include "xe_ring_ops.h"
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
 #include "xe_engine_types.h"
 #include "xe_gt.h"
 #include "xe_lrc.h"
@@ -14,7 +15,6 @@
 #include "xe_vm_types.h"
 
 #include "gt/intel_gpu_commands.h"
-#include "gt/intel_lrc_reg.h"
 #include "i915_reg.h"
 
 static u32 preparser_disable(bool state)
-- 
GitLab


From 63955b3bfa0b69fd86b9e827e0f14f3fa4508826 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:42 -0800
Subject: [PATCH 1349/2340] drm/xe: Remove dependency on intel_gpu_commands.h

Copy the macros used by xe in intel_gpu_commands.h to
regs/xe_gpu_commands.h. PIPE_CONTROL_3D_ENGINE_FLAGS and
PIPE_CONTROL_3D_ARCH_FLAGS were already defined in
drivers/gpu/drm/xe/xe_ring_ops.c and only used there. So let that define
to be used instead of also adding to the new header.

v2: Let PIPE_CONTROL_3D_ENGINE_FLAGS/PIPE_CONTROL_3D_ARCH_FLAGS in the
    only .c that uses it instead of redefining (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 79 +++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_bb.c                |  3 +-
 drivers/gpu/drm/xe/xe_device.h            |  3 +-
 drivers/gpu/drm/xe/xe_execlist.c          |  2 +-
 drivers/gpu/drm/xe/xe_lrc.c               |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c           |  3 +-
 drivers/gpu/drm/xe/xe_ring_ops.c          | 49 +++++++-------
 7 files changed, 109 insertions(+), 32 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_gpu_commands.h

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
new file mode 100644
index 0000000000000..288576035ce37
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_COMMANDS_H_
+#define _XE_GPU_COMMANDS_H_
+
+#define INSTR_CLIENT_SHIFT      29
+#define   INSTR_MI_CLIENT       0x0
+#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
+
+#define MI_INSTR(opcode, flags) \
+	(__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
+
+#define MI_NOOP			MI_INSTR(0, 0)
+#define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
+
+#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
+#define   MI_ARB_ENABLE			(1<<0)
+#define   MI_ARB_DISABLE		(0<<0)
+
+#define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
+#define MI_STORE_DATA_IMM	MI_INSTR(0x20, 0)
+
+#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
+#define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
+#define   MI_LRI_FORCE_POSTED		(1<<12)
+
+#define MI_FLUSH_DW		MI_INSTR(0x26, 1)
+#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
+#define   MI_INVALIDATE_TLB		(1<<18)
+#define   MI_FLUSH_DW_CCS		(1<<16)
+#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
+#define   MI_FLUSH_DW_USE_GTT		(1<<2)
+
+#define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
+
+#define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT		21
+#define   DST_ACCESS_TYPE_SHIFT		20
+#define   CCS_SIZE_MASK			0x3FF
+#define   CCS_SIZE_SHIFT		8
+#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 25)
+#define   NUM_CCS_BYTES_PER_BLOCK	256
+#define   NUM_BYTES_PER_CCS_BYTE	256
+#define   NUM_CCS_BLKS_PER_XFER		1024
+
+#define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
+#define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
+#define   XY_FAST_COLOR_BLT_DW		16
+#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
+
+#define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
+#define   BLT_DEPTH_32			(3<<24)
+
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
+#define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
+#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
+#define   PIPE_CONTROL_PSD_SYNC				(1<<17)
+#define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10)
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+
+#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index d10448d1b4d7e..5b24018e2a80b 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -5,6 +5,7 @@
 
 #include "xe_bb.h"
 
+#include "regs/xe_gpu_commands.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
 #include "xe_hw_fence.h"
@@ -12,8 +13,6 @@
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
-#include "gt/intel_gpu_commands.h"
-
 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 {
 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index ed55ef567d186..263620953c3b3 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -11,12 +11,11 @@ struct xe_file;
 
 #include <drm/drm_util.h>
 
+#include "regs/xe_gpu_commands.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_macros.h"
 
-#include "gt/intel_gpu_commands.h"
-
 static inline struct xe_device *to_xe_device(const struct drm_device *dev)
 {
 	return container_of(dev, struct xe_device, drm);
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index d788a6e894a6e..8441ce24cfcfd 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -8,6 +8,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_bo.h"
@@ -22,7 +23,6 @@
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 
-#include "gt/intel_gpu_commands.h"
 #include "i915_reg.h"
 
 #define XE_EXECLIST_HANG_LIMIT 1
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 5baa3cf538522..4435ec7504898 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -6,6 +6,7 @@
 #include "xe_lrc.h"
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_bo.h"
@@ -16,7 +17,6 @@
 #include "xe_map.h"
 #include "xe_vm.h"
 
-#include "gt/intel_gpu_commands.h"
 #include "i915_reg.h"
 
 #define GEN8_CTX_VALID				(1 << 0)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index bbab524dcee62..e2ee51381ac1b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -11,6 +11,7 @@
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
 
+#include "regs/xe_gpu_commands.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_engine.h"
@@ -27,8 +28,6 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-#include "gt/intel_gpu_commands.h"
-
 /**
  * struct xe_migrate - migrate context.
  */
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 6275f8c348782..ef8cef20acd65 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -5,6 +5,7 @@
 
 #include "xe_ring_ops.h"
 
+#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_engine_types.h"
@@ -14,9 +15,32 @@
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
-#include "gt/intel_gpu_commands.h"
 #include "i915_reg.h"
 
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
+		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+		PIPE_CONTROL_TILE_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_STALL | \
+		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+		PIPE_CONTROL_PSD_SYNC | \
+		PIPE_CONTROL_AMFS_FLUSH | \
+		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+		PIPE_CONTROL_3D_ENGINE_FLAGS | \
+		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+		PIPE_CONTROL_FLUSH_ENABLE | \
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+		PIPE_CONTROL_DC_FLUSH_ENABLE)
+
+
 static u32 preparser_disable(bool state)
 {
 	return MI_ARB_CHECK | BIT(8) | state;
@@ -181,29 +205,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
 
-/*
- * 3D-related flags that can't be set on _engines_ that lack access to the 3D
- * pipeline (i.e., CCS engines).
- */
-#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
-		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
-		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
-		PIPE_CONTROL_TILE_CACHE_FLUSH | \
-		PIPE_CONTROL_DEPTH_STALL | \
-		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
-		PIPE_CONTROL_PSD_SYNC | \
-		PIPE_CONTROL_AMFS_FLUSH | \
-		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
-		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
-
-/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
-#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
-		PIPE_CONTROL_3D_ENGINE_FLAGS | \
-		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
-		PIPE_CONTROL_FLUSH_ENABLE | \
-		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
-		PIPE_CONTROL_DC_FLUSH_ENABLE)
-
 static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 					    struct xe_lrc *lrc,
 					    u64 batch_addr, u32 seqno)
-- 
GitLab


From c584148145f73819a5ed968dc64ae10060fcd2c5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:43 -0800
Subject: [PATCH 1350/2340] drm/xe: Remove dependency on i915_reg.h

Copy the macros used by xe in i915_reg.h to regs/xe_regs.h. A minimal
cleanup is done while copying so they adhere minimally to the coding
style.  Further reordering and cleaning is left for later.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h      | 108 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_execlist.c       |   3 +-
 drivers/gpu/drm/xe/xe_ggtt.c           |   3 +-
 drivers/gpu/drm/xe/xe_gt_clock.c       |   3 +-
 drivers/gpu/drm/xe/xe_guc_pc.c         |   3 +-
 drivers/gpu/drm/xe/xe_hw_engine.c      |   3 +-
 drivers/gpu/drm/xe/xe_irq.c            |   3 +-
 drivers/gpu/drm/xe/xe_lrc.c            |   3 +-
 drivers/gpu/drm/xe/xe_mmio.c           |   3 +-
 drivers/gpu/drm/xe/xe_pci.c            |   3 +-
 drivers/gpu/drm/xe/xe_ring_ops.c       |   4 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |   3 +-
 drivers/gpu/drm/xe/xe_wa.c             |   3 +-
 13 files changed, 120 insertions(+), 25 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_regs.h

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
new file mode 100644
index 0000000000000..a697162e1a774
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#ifndef _XE_REGS_H_
+#define _XE_REGS_H_
+
+#include "i915_reg_defs.h"
+
+#define GU_CNTL					_MMIO(0x101010)
+#define   LMEM_INIT				REG_BIT(7)
+
+#define RENDER_RING_BASE			0x02000
+#define GEN11_BSD_RING_BASE			0x1c0000
+#define GEN11_BSD2_RING_BASE			0x1c4000
+#define GEN11_BSD3_RING_BASE			0x1d0000
+#define GEN11_BSD4_RING_BASE			0x1d4000
+#define XEHP_BSD5_RING_BASE			0x1e0000
+#define XEHP_BSD6_RING_BASE			0x1e4000
+#define XEHP_BSD7_RING_BASE			0x1f0000
+#define XEHP_BSD8_RING_BASE			0x1f4000
+#define VEBOX_RING_BASE				0x1a000
+#define GEN11_VEBOX_RING_BASE			0x1c8000
+#define GEN11_VEBOX2_RING_BASE			0x1d8000
+#define XEHP_VEBOX3_RING_BASE			0x1e8000
+#define XEHP_VEBOX4_RING_BASE			0x1f8000
+#define GEN12_COMPUTE0_RING_BASE		0x1a000
+#define GEN12_COMPUTE1_RING_BASE		0x1c000
+#define GEN12_COMPUTE2_RING_BASE		0x1e000
+#define GEN12_COMPUTE3_RING_BASE		0x26000
+#define BLT_RING_BASE				0x22000
+#define XEHPC_BCS1_RING_BASE			0x3e0000
+#define XEHPC_BCS2_RING_BASE			0x3e2000
+#define XEHPC_BCS3_RING_BASE			0x3e4000
+#define XEHPC_BCS4_RING_BASE			0x3e6000
+#define XEHPC_BCS5_RING_BASE			0x3e8000
+#define XEHPC_BCS6_RING_BASE			0x3ea000
+#define XEHPC_BCS7_RING_BASE			0x3ec000
+#define XEHPC_BCS8_RING_BASE			0x3ee000
+#define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
+#define   GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
+#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
+#define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
+#define   GT_RENDER_USER_INTERRUPT		(1 <<  0)
+
+#define GEN7_FF_THREAD_MODE			_MMIO(0x20a0)
+#define   GEN12_FF_TESSELATION_DOP_GATE_DISABLE BIT(19)
+
+#define PVC_RP_STATE_CAP			_MMIO(0x281014)
+#define MTL_RP_STATE_CAP			_MMIO(0x138000)
+
+#define MTL_MEDIAP_STATE_CAP			_MMIO(0x138020)
+#define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
+#define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
+
+#define MTL_GT_RPE_FREQUENCY			_MMIO(0x13800c)
+#define MTL_MPE_FREQUENCY			_MMIO(0x13802c)
+#define   MTL_RPE_MASK				REG_GENMASK(8, 0)
+
+#define TRANSCODER_A_OFFSET			0x60000
+#define TRANSCODER_B_OFFSET			0x61000
+#define TRANSCODER_C_OFFSET			0x62000
+#define TRANSCODER_D_OFFSET			0x63000
+#define TRANSCODER_DSI0_OFFSET			0x6b000
+#define TRANSCODER_DSI1_OFFSET			0x6b800
+#define PIPE_A_OFFSET				0x70000
+#define PIPE_B_OFFSET				0x71000
+#define PIPE_C_OFFSET				0x72000
+#define PIPE_D_OFFSET				0x73000
+#define PIPE_DSI0_OFFSET			0x7b000
+#define PIPE_DSI1_OFFSET			0x7b800
+
+#define GEN8_PCU_ISR				_MMIO(0x444e0)
+#define GEN8_PCU_IMR				_MMIO(0x444e4)
+#define GEN8_PCU_IIR				_MMIO(0x444e8)
+#define GEN8_PCU_IER				_MMIO(0x444ec)
+
+#define GEN11_GU_MISC_ISR			_MMIO(0x444f0)
+#define GEN11_GU_MISC_IMR			_MMIO(0x444f4)
+#define GEN11_GU_MISC_IIR			_MMIO(0x444f8)
+#define GEN11_GU_MISC_IER			_MMIO(0x444fc)
+#define   GEN11_GU_MISC_GSE			(1 << 27)
+
+#define GEN11_GFX_MSTR_IRQ			_MMIO(0x190010)
+#define   GEN11_MASTER_IRQ			(1 << 31)
+#define   GEN11_GU_MISC_IRQ			(1 << 29)
+#define   GEN11_DISPLAY_IRQ			(1 << 16)
+#define   GEN11_GT_DW_IRQ(x)			(1 << (x))
+
+#define DG1_MSTR_TILE_INTR			_MMIO(0x190008)
+#define   DG1_MSTR_IRQ				REG_BIT(31)
+#define   DG1_MSTR_TILE(t)			REG_BIT(t)
+
+#define GEN9_TIMESTAMP_OVERRIDE					_MMIO(0x44074)
+#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT	0
+#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK	0x3ff
+#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
+#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
+
+#define GGC					_MMIO(0x108040)
+#define   GMS_MASK				REG_GENMASK(15, 8)
+#define   GGMS_MASK				REG_GENMASK(7, 6)
+
+#define GEN12_GSMBASE				_MMIO(0x108100)
+#define GEN12_DSMBASE				_MMIO(0x1080C0)
+#define   GEN12_BDSM_MASK			REG_GENMASK64(63, 20)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 8441ce24cfcfd..be47d28da4c74 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -11,6 +11,7 @@
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine.h"
@@ -23,8 +24,6 @@
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 
-#include "i915_reg.h"
-
 #define XE_EXECLIST_HANG_LIMIT 1
 
 #define GEN11_SW_CTX_ID_SHIFT 37
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 3bf4373211498..d6ebc1d77f4d5 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -11,6 +11,7 @@
 #include <drm/i915_drm.h>
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
@@ -19,8 +20,6 @@
 #include "xe_mmio.h"
 #include "xe_wopcm.h"
 
-#include "i915_reg.h"
-
 /* FIXME: Common file, preferably auto-gen */
 #define MTL_GGTT_PTE_PAT0	BIT_ULL(52)
 #define MTL_GGTT_PTE_PAT1	BIT_ULL(53)
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index fd0ca33925cd9..60a2966bc1fd2 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -6,13 +6,12 @@
 #include "xe_gt_clock.h"
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
 
-#include "i915_reg.h"
-
 static u32 read_reference_ts_freq(struct xe_gt *gt)
 {
 	u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index f983f47cefb6a..acaba5c375e5a 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -10,6 +10,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
@@ -20,8 +21,6 @@
 #include "xe_mmio.h"
 #include "xe_pcode.h"
 
-#include "i915_reg.h"
-#include "i915_reg_defs.h"
 #include "intel_mchbar_regs.h"
 
 /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 5e7f21b319bb3..ae541b5e50f36 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -9,6 +9,7 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_execlist.h"
@@ -23,8 +24,6 @@
 #include "xe_sched_job.h"
 #include "xe_wa.h"
 
-#include "i915_reg.h"
-
 #define MAX_MMIO_BASES 3
 struct engine_info {
 	const char *name;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 04b3801fc0a8b..071ccc75b71bf 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -10,6 +10,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_device.h"
 #include "xe_drv.h"
 #include "xe_gt.h"
@@ -17,8 +18,6 @@
 #include "xe_hw_engine.h"
 #include "xe_mmio.h"
 
-#include "i915_reg.h"
-
 static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
 {
 	u32 val = xe_mmio_read32(gt, reg.reg);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 4435ec7504898..af4518a82db27 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -9,6 +9,7 @@
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
@@ -17,8 +18,6 @@
 #include "xe_map.h"
 #include "xe_vm.h"
 
-#include "i915_reg.h"
-
 #define GEN8_CTX_VALID				(1 << 0)
 #define GEN8_CTX_L3LLC_COHERENT			(1 << 5)
 #define GEN8_CTX_PRIVILEGE			(1 << 8)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 5e6ca0d2076ad..65b0df9bb5796 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -10,14 +10,13 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 
-#include "i915_reg.h"
-
 #define XEHP_MTCFG_ADDR		_MMIO(0x101800)
 #define TILE_COUNT		REG_GENMASK(15, 8)
 #define GEN12_LMEM_BAR		2
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 884f9b16c9de0..6dcefb8cc7c37 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -14,6 +14,7 @@
 #include <drm/drm_drv.h>
 #include <drm/xe_pciids.h>
 
+#include "regs/xe_regs.h"
 #include "xe_device.h"
 #include "xe_drv.h"
 #include "xe_macros.h"
@@ -21,8 +22,6 @@
 #include "xe_pm.h"
 #include "xe_step.h"
 
-#include "i915_reg.h"
-
 #define DEV_INFO_FOR_EACH_FLAG(func) \
 	func(require_force_probe); \
 	func(is_dgfx); \
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index ef8cef20acd65..7dd886536fbc9 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -8,6 +8,7 @@
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
+#include "regs/xe_regs.h"
 #include "xe_engine_types.h"
 #include "xe_gt.h"
 #include "xe_lrc.h"
@@ -15,8 +16,6 @@
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
-#include "i915_reg.h"
-
 /*
  * 3D-related flags that can't be set on _engines_ that lack access to the 3D
  * pipeline (i.e., CCS engines).
@@ -40,7 +39,6 @@
 		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
 		PIPE_CONTROL_DC_FLUSH_ENABLE)
 
-
 static u32 preparser_disable(bool state)
 {
 	return MI_ARB_CHECK | BIT(8) | state;
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index fe0f707ad0546..2e8d07ad42aee 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -11,8 +11,7 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_range_manager.h>
 
-#include "../i915/i915_reg.h"
-
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 155cfd1dcc500..df72b15dfeb0d 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -9,6 +9,7 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
@@ -18,8 +19,6 @@
 #include "xe_rtp.h"
 #include "xe_step.h"
 
-#include "i915_reg.h"
-
 /**
  * DOC: Hardware workarounds
  *
-- 
GitLab


From e12ef39272a3690bc779e2d4d812e36c0e7d45f8 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:44 -0800
Subject: [PATCH 1351/2340] drm/xe/guc_pc: Move gt register to the proper place

Move a few defines from xe_guc_pc.c to the right register, now that
there is one: xe_gt_regs.h.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++
 drivers/gpu/drm/xe/xe_guc_pc.c       | 6 ------
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 3eb92c9750850..df2677c9c9f6d 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -187,6 +187,7 @@
 #define   DFR_DISABLE				(1 << 9)
 
 #define GEN6_RPNSWREQ				_MMIO(0xa008)
+#define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
 #define GEN6_RC_CONTROL				_MMIO(0xa090)
 #define GEN6_RC_STATE				_MMIO(0xa094)
 
@@ -243,6 +244,7 @@
 #define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 
 #define GEN6_GT_CORE_STATUS			_MMIO(0x138060)
+#define   RCN_MASK				REG_GENMASK(2, 0)
 #define   GEN6_RC0				0
 #define   GEN6_RC6				3
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index acaba5c375e5a..d91dad8638efb 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -31,12 +31,6 @@
 #define GEN10_FREQ_INFO_REC	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
 
-/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */
-#define   REQ_RATIO_MASK	REG_GENMASK(31, 23)
-
-/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */
-#define   RCN_MASK	REG_GENMASK(2, 0)
-
 #define GEN12_RPSTAT1		_MMIO(0x1381b4)
 #define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
 
-- 
GitLab


From 5ec15f83117f2f89af39109c264c1fb0bbf8b5f0 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:45 -0800
Subject: [PATCH 1352/2340] drm/xe: Remove dependency on intel_mchbar_regs.h

The only thing really needed is the base offset, MCHBAR_MIRROR_BASE_SNB.
Remove the include and just define it inplace.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d91dad8638efb..5a8d827ba7704 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -21,12 +21,12 @@
 #include "xe_mmio.h"
 #include "xe_pcode.h"
 
-#include "intel_mchbar_regs.h"
+#define MCHBAR_MIRROR_BASE_SNB	0x140000
 
-/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
-#define   RP0_MASK	REG_GENMASK(7, 0)
-#define   RP1_MASK	REG_GENMASK(15, 8)
-#define   RPN_MASK	REG_GENMASK(23, 16)
+#define GEN6_RP_STATE_CAP			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define   RP0_MASK				REG_GENMASK(7, 0)
+#define   RP1_MASK				REG_GENMASK(15, 8)
+#define   RPN_MASK				REG_GENMASK(23, 16)
 
 #define GEN10_FREQ_INFO_REC	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
-- 
GitLab


From 3457388fcd145d64e6852ca60084e822bec81e9f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:15:48 -0800
Subject: [PATCH 1353/2340] drm/xe: Prefer single underscore for header guards

Keep header guards consistent with regard to ifdef used. Prefer the more
commonly used in the driver.

	$ git grep  "ifndef __XE_" -- drivers/gpu/drm/xe | wc -l
	8
	$ git grep  "ifndef _XE_" -- drivers/gpu/drm/xe | wc -l
	112

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo_test.h      | 4 ++--
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.h | 4 ++--
 drivers/gpu/drm/xe/tests/xe_migrate_test.h | 4 ++--
 drivers/gpu/drm/xe/tests/xe_test.h         | 4 ++--
 drivers/gpu/drm/xe/xe_gt_topology.h        | 6 +++---
 drivers/gpu/drm/xe/xe_map.h                | 4 ++--
 drivers/gpu/drm/xe/xe_migrate.h            | 4 ++--
 drivers/gpu/drm/xe/xe_res_cursor.h         | 4 ++--
 8 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
index d751a618c0c8f..0113ab45066a4 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h
@@ -3,8 +3,8 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#ifndef __XE_BO_TEST_H__
-#define __XE_BO_TEST_H__
+#ifndef _XE_BO_TEST_H_
+#define _XE_BO_TEST_H_
 
 struct kunit;
 
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
index 4e9a8bef57518..e6b464ddd5260 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
@@ -3,8 +3,8 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#ifndef __XE_DMA_BUF_TEST_H__
-#define __XE_DMA_BUF_TEST_H__
+#ifndef _XE_DMA_BUF_TEST_H_
+#define _XE_DMA_BUF_TEST_H_
 
 struct kunit;
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
index db1f8ef035bba..7c645c66824f8 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -3,8 +3,8 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#ifndef __XE_MIGRATE_TEST_H__
-#define __XE_MIGRATE_TEST_H__
+#ifndef _XE_MIGRATE_TEST_H_
+#define _XE_MIGRATE_TEST_H_
 
 struct kunit;
 
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
index 1ec502b5acf38..00c8a3f9af814 100644
--- a/drivers/gpu/drm/xe/tests/xe_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -3,8 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#ifndef __XE_TEST_H__
-#define __XE_TEST_H__
+#ifndef _XE_TEST_H_
+#define _XE_TEST_H_
 
 #include <linux/types.h>
 
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index 7a0abc64084f2..b2540dc266f21 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -3,8 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#ifndef __XE_GT_TOPOLOGY_H__
-#define __XE_GT_TOPOLOGY_H__
+#ifndef _XE_GT_TOPOLOGY_H_
+#define _XE_GT_TOPOLOGY_H_
 
 #include "xe_gt_types.h"
 
@@ -17,4 +17,4 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
 unsigned int
 xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
 
-#endif /* __XE_GT_TOPOLOGY_H__ */
+#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
index 0bac1f73a80d6..032c2e8b54386 100644
--- a/drivers/gpu/drm/xe/xe_map.h
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -3,8 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#ifndef __XE_MAP_H__
-#define __XE_MAP_H__
+#ifndef _XE_MAP_H_
+#define _XE_MAP_H_
 
 #include <linux/iosys-map.h>
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index b2d55283252f0..a569851db6f7f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -3,8 +3,8 @@
  * Copyright © 2020 Intel Corporation
  */
 
-#ifndef __XE_MIGRATE__
-#define __XE_MIGRATE__
+#ifndef _XE_MIGRATE_
+#define _XE_MIGRATE_
 
 #include <drm/drm_mm.h>
 
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 365c8ad7aeb8e..4e99fae26b4c0 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -21,8 +21,8 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#ifndef __XE_RES_CURSOR_H__
-#define __XE_RES_CURSOR_H__
+#ifndef _XE_RES_CURSOR_H_
+#define _XE_RES_CURSOR_H_
 
 #include <linux/scatterlist.h>
 
-- 
GitLab


From 8cb49012ac171698b1253dea45e56c284e997d38 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 25 Feb 2023 12:10:39 -0800
Subject: [PATCH 1354/2340] drm/xe: Do not spread i915_reg_defs.h include

Reduce the use of i915_reg_defs.h so it can be encapsulated in a single
place.

1) If it was being included by mistake, remove
2) If it was included for FIELD_GET()/FIELD_PREP()/GENMASK() and the
   like, just include <linux/bitfield.h>
3) If it was included to be able to define additional registers, move
   the registers to the relavant headers (regs/xe_regs.h or
   regs/xe_gt_regs.h)

v2:
  - Squash commit fixing i915_reg_defs.h include and with the one
    introducing regs/xe_reg_defs.h
  - Remove more cases of i915_reg_defs.h being used when all it was
    needed was linux/bitfield.h  (Matt Roper)
  - Move some  registers to the corresponding regs/*.h file (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo squashed here the removal of the i915 include]
---
 drivers/gpu/drm/xe/Makefile              |  3 ---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  2 +-
 drivers/gpu/drm/xe/regs/xe_gt_regs.h     |  8 +++++++-
 drivers/gpu/drm/xe/regs/xe_reg_defs.h    | 11 +++++++++++
 drivers/gpu/drm/xe/regs/xe_regs.h        |  4 +++-
 drivers/gpu/drm/xe/xe_device.c           |  3 +--
 drivers/gpu/drm/xe/xe_gt_mcr.h           |  2 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c     |  1 +
 drivers/gpu/drm/xe/xe_gt_topology.c      | 15 +++++----------
 drivers/gpu/drm/xe/xe_guc.c              |  8 --------
 drivers/gpu/drm/xe/xe_guc_reg.h          |  6 +++++-
 drivers/gpu/drm/xe/xe_migrate.c          |  1 +
 drivers/gpu/drm/xe/xe_pcode_api.h        |  2 ++
 drivers/gpu/drm/xe/xe_reg_sr_types.h     |  2 --
 drivers/gpu/drm/xe/xe_rtp.h              |  2 --
 drivers/gpu/drm/xe/xe_rtp_types.h        |  2 --
 drivers/gpu/drm/xe/xe_step.c             |  2 ++
 17 files changed, 40 insertions(+), 34 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_reg_defs.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 58ee9e82156d3..b3426f328d211 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -100,9 +100,6 @@ xe-y += xe_bb.o \
 	xe_wa.o \
 	xe_wopcm.o
 
-# XXX: Needed for i915 register definitions. Will be removed after xe-regs.
-subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/
-
 obj-$(CONFIG_DRM_XE) += xe.o
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
 \
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 6dfa3cf2fd43c..2aa67d001c34b 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -8,7 +8,7 @@
 
 #include <asm/page.h>
 
-#include "i915_reg_defs.h"
+#include "regs/xe_reg_defs.h"
 
 #define RING_TAIL(base)				_MMIO((base) + 0x30)
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index df2677c9c9f6d..47377d2167e0a 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -6,7 +6,7 @@
 #ifndef _XE_GT_REGS_H_
 #define _XE_GT_REGS_H_
 
-#include "i915_reg_defs.h"
+#include "regs/xe_reg_defs.h"
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0				_MMIO(0xd00)
@@ -108,6 +108,12 @@
 #define   GEN11_GT_VEBOX_DISABLE_SHIFT		16
 #define   GEN11_GT_VEBOX_DISABLE_MASK		(0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
 
+#define XELP_EU_ENABLE				_MMIO(0x9134)	/* "_DISABLE" on Xe_LP */
+#define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		_MMIO(0x913c)
+#define XEHP_GT_COMPUTE_DSS_ENABLE		_MMIO(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		_MMIO(0x9148)
+
 #define GEN6_GDRST				_MMIO(0x941c)
 #define   GEN11_GRDOM_GUC			REG_BIT(3)
 #define   GEN6_GRDOM_FULL			(1 << 0)
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
new file mode 100644
index 0000000000000..5f6735697d9c2
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_DEFS_H_
+#define _XE_REG_DEFS_H_
+
+#include "../../i915/i915_reg_defs.h"
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index a697162e1a774..2e7fbdedb5eb0 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -5,7 +5,7 @@
 #ifndef _XE_REGS_H_
 #define _XE_REGS_H_
 
-#include "i915_reg_defs.h"
+#include "regs/xe_reg_defs.h"
 
 #define GU_CNTL					_MMIO(0x101010)
 #define   LMEM_INIT				REG_BIT(7)
@@ -70,6 +70,8 @@
 #define PIPE_DSI0_OFFSET			0x7b000
 #define PIPE_DSI1_OFFSET			0x7b800
 
+#define SOFTWARE_FLAGS_SPR33			_MMIO(0x4f084)
+
 #define GEN8_PCU_ISR				_MMIO(0x444e0)
 #define GEN8_PCU_IMR				_MMIO(0x444e4)
 #define GEN8_PCU_IIR				_MMIO(0x444e8)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 6d7d57d08a997..00e8ed2353534 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -12,6 +12,7 @@
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
 
+#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_debugfs.h"
 #include "xe_dma_buf.h"
@@ -335,8 +336,6 @@ static void device_kill_persitent_engines(struct xe_device *xe,
 	mutex_unlock(&xe->persitent_engines.lock);
 }
 
-#define SOFTWARE_FLAGS_SPR33         _MMIO(0x4F084)
-
 void xe_device_wmb(struct xe_device *xe)
 {
 	struct xe_gt *gt = xe_device_get_gt(xe, 0);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index c31987d2177c9..2a6cd38c8cb75 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -6,7 +6,7 @@
 #ifndef _XE_GT_MCR_H_
 #define _XE_GT_MCR_H_
 
-#include "i915_reg_defs.h"
+#include "regs/xe_reg_defs.h"
 
 struct drm_printer;
 struct xe_gt;
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 0e7047b89a83c..1677640e10753 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -5,6 +5,7 @@
 
 #include "xe_gt_pagefault.h"
 
+#include <linux/bitfield.h>
 #include <linux/circ_buf.h>
 
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index c76aaea1887c3..2123f84be336e 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -7,18 +7,13 @@
 
 #include <linux/bitmap.h>
 
+#include "regs/xe_gt_regs.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
 
 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
 #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
 
-#define XELP_EU_ENABLE				0x9134	/* "_DISABLE" on Xe_LP */
-#define   XELP_EU_MASK				REG_GENMASK(7, 0)
-#define XELP_GT_GEOMETRY_DSS_ENABLE		0x913c
-#define XEHP_GT_COMPUTE_DSS_ENABLE		0x9144
-#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		0x9148
-
 static void
 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
 {
@@ -41,7 +36,7 @@ static void
 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE);
+	u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE.reg);
 	u32 val = 0;
 	int i;
 
@@ -86,10 +81,10 @@ xe_gt_topology_init(struct xe_gt *gt)
 	}
 
 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs,
-		      XELP_GT_GEOMETRY_DSS_ENABLE);
+		      XELP_GT_GEOMETRY_DSS_ENABLE.reg);
 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
-		      XEHP_GT_COMPUTE_DSS_ENABLE,
-		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
+		      XEHP_GT_COMPUTE_DSS_ENABLE.reg,
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT.reg);
 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
 
 	xe_gt_topology_dump(gt, &p);
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 661effa9830ff..58b9841616e45 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -22,14 +22,6 @@
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
-#include "i915_reg_defs.h"
-
-/* TODO: move to common file */
-#define GUC_PVC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
-#define PVC_MOCS_UC_INDEX		1
-#define PVC_GUC_MOCS_INDEX(index)	REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\
-						       index)
-
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
 {
diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h
index 513a7e0c8a5aa..efd60c186bbc0 100644
--- a/drivers/gpu/drm/xe/xe_guc_reg.h
+++ b/drivers/gpu/drm/xe/xe_guc_reg.h
@@ -9,7 +9,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-#include "i915_reg_defs.h"
+#include "regs/xe_reg_defs.h"
 
 /* Definitions of GuC H/W registers, bits, etc */
 
@@ -93,6 +93,10 @@
 #define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	(1<<10)
 #define   GUC_ENABLE_MIA_CLOCK_GATING		(1<<15)
 #define   GUC_GEN10_SHIM_WC_ENABLE		(1<<21)
+#define   PVC_GUC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
+#define   PVC_MOCS_UC_INDEX			1
+#define   PVC_GUC_MOCS_INDEX(index)		REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK,\
+							       index)
 
 #define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
 #define   GUC_SEND_TRIGGER		  (1<<0)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index e2ee51381ac1b..79aa3508ae3e7 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -5,6 +5,7 @@
 
 #include "xe_migrate.h"
 
+#include <linux/bitfield.h>
 #include <linux/sizes.h>
 
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 0762c8a912c79..4e689cd4b23b8 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -5,6 +5,8 @@
 
 /* Internal to xe_pcode */
 
+#include "regs/xe_reg_defs.h"
+
 #define PCODE_MAILBOX			_MMIO(0x138124)
 #define   PCODE_READY			REG_BIT(31)
 #define   PCODE_MB_PARAM2		REG_GENMASK(23, 16)
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
index b234a8673e54b..0e6d542ff1b4e 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr_types.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -9,8 +9,6 @@
 #include <linux/types.h>
 #include <linux/xarray.h>
 
-#include "i915_reg_defs.h"
-
 struct xe_reg_sr_entry {
 	u32		clr_bits;
 	u32		set_bits;
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index d6ba0b7e5042e..bd44fd8bbe057 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -11,8 +11,6 @@
 
 #include "xe_rtp_types.h"
 
-#include "i915_reg_defs.h"
-
 /*
  * Register table poke infrastructure
  */
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index fac0bd6d5b1e0..e87f1b280d968 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -8,8 +8,6 @@
 
 #include <linux/types.h>
 
-#include "i915_reg_defs.h"
-
 struct xe_hw_engine;
 struct xe_gt;
 
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index ca77d09715297..14f482f29ae44 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -5,6 +5,8 @@
 
 #include "xe_step.h"
 
+#include <linux/bitfield.h>
+
 #include "xe_device.h"
 #include "xe_platform_types.h"
 
-- 
GitLab


From 7bc08d2f49b065cbabca8caad142df147b96dfff Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 23 Feb 2023 10:57:34 -0800
Subject: [PATCH 1355/2340] drm/xe/mocs: Drop unwanted TGL table

TGL/RKL/ADLS/ADLP are all supposed to use the same MOCS table, with
values defined in the bspec.  Any entries listed in the bspec as
reserved/error/undefined should always be initialized to the most cached
and least coherent setting possible so that any userspace accidentally
referencing those undefined entries will only experience an increase in
coherency if spec updates down the road start defining real values.

The TGL and gen12 table entries defined in the driver today are
identical except that the TGL includes one additional (incorrect)
setting for table index 1.  Furthermore, the TGL-specific initialization
does not define a dedicated value for info->unused_entries_index, so
this incorrect table entry 1 also gets used to populate the MOCS
registers for all reserved/unused table entries.  This incorrect
behavior is a holdover from i915 where the platform was enabled with an
incorrect setting and by the time we noticed, it was too late to fix the
table without breaking ABI compatibility (and on TGL we did indeed have
some buggy userspace that was referencing the 'reserved' entry 1).
Since the Xe driver starts fresh with a clean slate on ABI, there's no
need to repeat the mistakes of i915 here.

v2:
 - Reword/clarify commit message.  (Lucas)

Bspec: 45101
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 46 ------------------------------------
 1 file changed, 46 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 7ff81041d5ce6..618b0069bcbae 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -247,47 +247,6 @@ struct xe_mocs_info {
 		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 		L3_1_UC)
 
-static const struct xe_mocs_entry tgl_mocs_desc[] = {
-	/*
-	 * NOTE:
-	 * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
-	 * These reserved entries should never be used, they may be changed
-	 * to low performant variants with better coherency in the future if
-	 * more entries are needed. We are programming index XE_MOCS_PTE(1)
-	 * only, __init_mocs_table() take care to program unused index with
-	 * this entry.
-	 */
-	MOCS_ENTRY(XE_MOCS_PTE,
-		   LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
-		   L3_1_UC),
-	GEN11_MOCS_ENTRIES,
-
-	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
-	MOCS_ENTRY(48,
-		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
-		   L3_3_WB),
-	/* Implicitly enable L1 - HDC:L1 + L3 */
-	MOCS_ENTRY(49,
-		   LE_1_UC | LE_TC_1_LLC,
-		   L3_3_WB),
-	/* Implicitly enable L1 - HDC:L1 + LLC */
-	MOCS_ENTRY(50,
-		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
-		   L3_1_UC),
-	/* Implicitly enable L1 - HDC:L1 */
-	MOCS_ENTRY(51,
-		   LE_1_UC | LE_TC_1_LLC,
-		   L3_1_UC),
-	/* HW Special Case (CCS) */
-	MOCS_ENTRY(60,
-		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
-		   L3_1_UC),
-	/* HW Special Case (Displayable) */
-	MOCS_ENTRY(61,
-		   LE_1_UC | LE_TC_1_LLC,
-		   L3_3_WB),
-};
-
 static const struct xe_mocs_entry dg1_mocs_desc[] = {
 	/* UC */
 	MOCS_ENTRY(1, 0, L3_1_UC),
@@ -422,11 +381,6 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 5;
 		break;
 	case XE_TIGERLAKE:
-		info->size  = ARRAY_SIZE(tgl_mocs_desc);
-		info->table = tgl_mocs_desc;
-		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
-		info->uc_index = 3;
-		break;
 	case XE_ALDERLAKE_S:
 	case XE_ALDERLAKE_P:
 		info->size  = ARRAY_SIZE(gen12_mocs_desc);
-- 
GitLab


From 579a6546d33c92d810d19e971fd85ee4d0b9a5ce Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 23 Feb 2023 10:57:35 -0800
Subject: [PATCH 1356/2340] drm/xe/mocs: Add missing RKL handling

RKL should use the same "gen12" MOCS handling as TGL/ADL-S/ADL-P.

Bspec: 45101
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 618b0069bcbae..7f0dd7e7364d5 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -381,6 +381,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 5;
 		break;
 	case XE_TIGERLAKE:
+	case XE_ROCKETLAKE:
 	case XE_ALDERLAKE_S:
 	case XE_ALDERLAKE_P:
 		info->size  = ARRAY_SIZE(gen12_mocs_desc);
-- 
GitLab


From d1000e3fc9fa6bfb88d37a177542b9b24802081f Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 23 Feb 2023 10:57:36 -0800
Subject: [PATCH 1357/2340] drm/xe/mocs: Drop xe_mocs_info_index

The values in the xe_mocs_info_index enum only match old pre-gen12
hardware not supported by the Xe driver.

The only usage of this enum was to set a default value for
info->unused_entries_index, but this is unnecessary since every platform
in the subsequent switch statement sets a proper platform-specific value
(and the XE_MOCS_PTE default doesn't even make sense since the hardware
dropped the "use PAT settings" capability in gen12).

v2:
 - Add a check that unusued_entries_index is non-zero; even for
   platforms where this is a valid table entry, it's never the one we
   want this value assigned to.  (Lucas)

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 40 +++++++++++-------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 7f0dd7e7364d5..65295cd4f4ad3 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -23,30 +23,6 @@ static inline void mocs_dbg(const struct drm_device *dev,
 { /* noop */ }
 #endif
 
-/*
- * MOCS indexes used for GPU surfaces, defining the cacheability of the
- * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
- */
-enum xe_mocs_info_index {
-	/*
-	 * Not cached anywhere, coherency between CPU and GPU accesses is
-	 * guaranteed.
-	 */
-	XE_MOCS_UNCACHED,
-	/*
-	 * Cacheability and coherency controlled by the kernel automatically
-	 * based on the xxxx  IOCTL setting and the current
-	 * usage of the surface (used for display scanout or not).
-	 */
-	XE_MOCS_PTE,
-	/*
-	 * Cached in all GPU caches available on the platform.
-	 * Coherency between CPU and GPU accesses to the surface is not
-	 * guaranteed without extra synchronization.
-	 */
-	XE_MOCS_CACHED,
-};
-
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_RENDER_L3CC = BIT(1),
@@ -341,7 +317,6 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 
 	memset(info, 0, sizeof(struct xe_mocs_info));
 
-	info->unused_entries_index = XE_MOCS_PTE;
 	switch (xe->info.platform) {
 	case XE_PVC:
 		info->size = ARRAY_SIZE(pvc_mocs_desc);
@@ -395,6 +370,16 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		return 0;
 	}
 
+	/*
+	 * Index 0 is a reserved/unused table entry on most platforms, but
+	 * even on those where it does represent a legitimate MOCS entry, it
+	 * never represents the "most cached, least coherent" behavior we want
+	 * to populate undefined table rows with.  So if unused_entries_index
+	 * is still 0 at this point, we'll assume that it was omitted by
+	 * mistake in the switch statement above.
+	 */
+	XE_WARN_ON(info->unused_entries_index == 0);
+
 	if (XE_WARN_ON(info->size > info->n_entries))
 		return 0;
 
@@ -406,9 +391,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 }
 
 /*
- * Get control_value from MOCS entry taking into account when it's not used
- * then if unused_entries_index is non-zero then its value will be returned
- * otherwise XE_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry.  If the table entry is not defined, the
+ * settings from unused_entries_index will be returned.
  */
 static u32 get_entry_control(const struct xe_mocs_info *info,
 			     unsigned int index)
-- 
GitLab


From 6c57023ec42713e6cb91fdfbbd77147979e597e2 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 23 Feb 2023 10:57:37 -0800
Subject: [PATCH 1358/2340] drm/xe/mocs: Drop duplicate assignment of uc_index

The DG1 branch needlessly assigns uc_index twice.  Drop the second
instance.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 65295cd4f4ad3..e24d0dbc178ef 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -352,7 +352,6 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->table = dg1_mocs_desc;
 		info->uc_index = 1;
 		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
-		info->uc_index = 1;
 		info->unused_entries_index = 5;
 		break;
 	case XE_TIGERLAKE:
-- 
GitLab


From ee17e7f34a5e8a996da0c54e31584c5b089d65ff Mon Sep 17 00:00:00 2001
From: Philippe Lecluse <philippe.lecluse@intel.com>
Date: Thu, 23 Feb 2023 10:57:38 -0800
Subject: [PATCH 1359/2340] drm/xe/mocs: add MTL mocs

It was incorrectly using dg2_mocs for now.

v2 (MattR):
 - Use REG_GENMASK/REG_FIELD_PREP for bitfields
 - Add bspec references

Bspec: 45101, 45410, 63882
Signed-off-by: Philippe Lecluse <philippe.lecluse@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 69 +++++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index e24d0dbc178ef..e00fa2155dc40 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -62,6 +62,10 @@ struct xe_mocs_info {
 #define L3_GLBGO(value)		((value) << 6)
 #define L3_LKUP(value)		((value) << 7)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY	REG_GENMASK(3, 2)
+#define IG_PAT			REG_BIT(8)
+
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
@@ -89,6 +93,12 @@ struct xe_mocs_info {
 #define L3_2_RESERVED		_L3_CACHEABILITY(2)
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB                 REG_FIELD_PREP(_L4_CACHEABILITY, 0)
+#define L4_1_WT                 REG_FIELD_PREP(_L4_CACHEABILITY, 1)
+#define L4_2_RESERVED           REG_FIELD_PREP(_L4_CACHEABILITY, 2)
+#define L4_3_UC                 REG_FIELD_PREP(_L4_CACHEABILITY, 3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
 		.control_value = __control_value, \
@@ -310,6 +320,57 @@ static const struct xe_mocs_entry pvc_mocs_desc[] = {
 	MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct xe_mocs_entry mtl_mocs_desc[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   0,
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT,
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT,
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
 static unsigned int get_mocs_settings(struct xe_device *xe,
 				      struct xe_mocs_info *info)
 {
@@ -327,11 +388,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 2;
 		break;
 	case XE_METEORLAKE:
-		info->size = ARRAY_SIZE(dg2_mocs_desc);
-		info->table = dg2_mocs_desc;
+		info->size = ARRAY_SIZE(mtl_mocs_desc);
+		info->table = mtl_mocs_desc;
 		info->n_entries = MTL_NUM_MOCS_ENTRIES;
-		info->uc_index = 1;
-		info->unused_entries_index = 3;
+		info->uc_index = 9;
+		info->unused_entries_index = 1;
 		break;
 	case XE_DG2:
 		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
-- 
GitLab


From f659ac1564d96b1ba19694db9899d6fb18ffc3e7 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 23 Feb 2023 10:57:39 -0800
Subject: [PATCH 1360/2340] drm/xe/mocs: LNCF MOCS settings only need to be
 restored on pre-Xe_HP

Reprogramming the LNCF MOCS registers on render domain reset is not
intended to be regular driver programming, but rather the implementation
of a specific workaround (Wa_1607983814).  This workaround no longer
applies on Xe_HP any beyond, so we can expect that these registers, like
the rest of the LNCF/LBCF registers, will maintain their values through
all engine resets.  We should only add these registers to the GuC's
save/restore list on platforms that need the workaround.

Furthermore, xe_mocs_init_engine() appears to be another attempt to
satisfy this same workaround.  This is unnecessary on the Xe driver
since even on platforms where the workaround is necessary, all
single-engine resets are initiated by the GuC and thus the GuC will take
care of saving/restoring these registers.  The only host-initiated
resets we have in Xe are full GT resets which will already
(re)initialize these registers as part of the regular xe_mocs_init()
flow.

v2:
 - Add needs_wa_1607983814() so that calculate_regset_size() doesn't
   overallocate regset space when the workaround isn't needed.  (Lucas)
 - On platforms affected by Wa_1607983814, only add the LNCF MOCS
   registers to the render engine's GuC save/restore list; resets of
   other engines don't need to save/restore these.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_execlist.c   |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c    | 20 ++++++++++++++++----
 drivers/gpu/drm/xe/xe_guc_submit.c |  1 -
 drivers/gpu/drm/xe/xe_mocs.c       | 13 -------------
 drivers/gpu/drm/xe/xe_mocs.h       |  1 -
 5 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index be47d28da4c74..ae7ee56f1b1b7 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -460,7 +460,7 @@ static void execlist_engine_suspend_wait(struct xe_engine *e)
 
 static void execlist_engine_resume(struct xe_engine *e)
 {
-	xe_mocs_init_engine(e);
+	/* NIY */
 }
 
 static const struct xe_engine_ops execlist_engine_ops = {
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 49725093fb470..7a892ff7aba32 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -207,6 +207,11 @@ static void guc_ads_fini(struct drm_device *drm, void *arg)
 	xe_bo_unpin_map_no_vm(ads->bo);
 }
 
+static bool needs_wa_1607983814(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) < 1250;
+}
+
 static size_t calculate_regset_size(struct xe_gt *gt)
 {
 	struct xe_reg_sr_entry *sr_entry;
@@ -219,7 +224,10 @@ static size_t calculate_regset_size(struct xe_gt *gt)
 		xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
 			count++;
 
-	count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES;
+	count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
+
+	if (needs_wa_1607983814(gt_to_xe(gt)))
+		count += LNCFCMOCS_REG_COUNT;
 
 	return count * sizeof(struct guc_mmio_reg);
 }
@@ -431,6 +439,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 					  struct iosys_map *regset_map,
 					  struct xe_hw_engine *hwe)
 {
+	struct xe_device *xe = ads_to_xe(ads);
 	struct xe_hw_engine *hwe_rcs_reset_domain =
 		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
 	struct xe_reg_sr_entry *entry;
@@ -465,9 +474,12 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 					  e->reg, e->flags, count++);
 	}
 
-	for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
-		guc_mmio_regset_write_one(ads, regset_map,
-					  GEN9_LNCFCMOCS(i).reg, 0, count++);
+	/* Wa_1607983814 */
+	if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
+		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
+			guc_mmio_regset_write_one(ads, regset_map,
+						  GEN9_LNCFCMOCS(i).reg, 0, count++);
+		}
 	}
 
 	XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg)));
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index aa21f2bb5cbaa..0ba6f5dcd029f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1270,7 +1270,6 @@ static void guc_engine_resume(struct xe_engine *e)
 
 	XE_BUG_ON(e->guc->suspend_pending);
 
-	xe_mocs_init_engine(e);
 	guc_engine_add_msg(e, msg, RESUME);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index e00fa2155dc40..ef237853fdab3 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -517,19 +517,6 @@ static void init_l3cc_table(struct xe_gt *gt,
 	}
 }
 
-void xe_mocs_init_engine(const struct xe_engine *engine)
-{
-	struct xe_mocs_info table;
-	unsigned int flags;
-
-	flags = get_mocs_settings(engine->gt->xe, &table);
-	if (!flags)
-		return;
-
-	if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER)
-		init_l3cc_table(engine->gt, &table);
-}
-
 void xe_mocs_init(struct xe_gt *gt)
 {
 	struct xe_mocs_info table;
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index aba1abe216ab2..63500a1d6660a 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -11,7 +11,6 @@
 struct xe_engine;
 struct xe_gt;
 
-void xe_mocs_init_engine(const struct xe_engine *engine);
 void xe_mocs_init(struct xe_gt *gt);
 
 /**
-- 
GitLab


From 90385dcfc040648e928a883298a19e2afbba41e5 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 23 Feb 2023 10:57:40 -0800
Subject: [PATCH 1361/2340] drm/xe/mocs: Drop HAS_RENDER_L3CC flag

The HAS_RENDER_L3CC is set unconditionally so there's no need to keep it
as a dedicated flag.  For error checking purposes, we can just make sure
the 'table' field is initialized properly.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index ef237853fdab3..e09c6242aafc0 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -25,7 +25,6 @@ static inline void mocs_dbg(const struct drm_device *dev,
 
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
-	HAS_RENDER_L3CC = BIT(1),
 };
 
 struct xe_mocs_entry {
@@ -440,10 +439,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	 */
 	XE_WARN_ON(info->unused_entries_index == 0);
 
-	if (XE_WARN_ON(info->size > info->n_entries))
+	if (XE_WARN_ON(info->size > info->n_entries)) {
+		info->table = NULL;
 		return 0;
+	}
 
-	flags = HAS_RENDER_L3CC;
 	if (!IS_DGFX(xe))
 		flags |= HAS_GLOBAL_MOCS;
 
@@ -538,6 +538,6 @@ void xe_mocs_init(struct xe_gt *gt)
 	 * sure the LNCFCMOCSx registers are programmed for the subsequent
 	 * memory transactions including guc transactions
 	 */
-	if (flags & HAS_RENDER_L3CC)
+	if (table.table)
 		init_l3cc_table(gt, &table);
 }
-- 
GitLab


From e103c45f501a32eaa9e0a12db1c1e167b06f78cf Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 22 Dec 2022 10:53:59 +0000
Subject: [PATCH 1362/2340] drm/xe: prefer xe_bo_create_pin_map()

With small-bar we likely want to annotate all the kernel users that
require CPU access with vram. If xe_bo_create_pin_map() is the central
place for that then we should have a central place to annotate.

This also simplifies the code and fixes what appears to be a double
xe_bo_put(hwe->hwsp) in the error handling.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 23 +++-----------
 drivers/gpu/drm/xe/xe_lrc.c       | 53 +++++++++++--------------------
 drivers/gpu/drm/xe/xe_lrc_types.h |  1 -
 3 files changed, 22 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index ae541b5e50f36..b035e2fa67449 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -310,24 +310,14 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 	xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt);
 
-	hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel,
-					XE_BO_CREATE_VRAM_IF_DGFX(gt) |
-					XE_BO_CREATE_GGTT_BIT);
+	hwe->hwsp = xe_bo_create_pin_map(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel,
+					 XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					 XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(hwe->hwsp)) {
 		err = PTR_ERR(hwe->hwsp);
 		goto err_name;
 	}
 
-	err = xe_bo_pin(hwe->hwsp);
-	if (err)
-		goto err_unlock_put_hwsp;
-
-	err = xe_bo_vmap(hwe->hwsp);
-	if (err)
-		goto err_unpin_hwsp;
-
-	xe_bo_unlock_no_vm(hwe->hwsp);
-
 	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
 	if (err)
 		goto err_hwsp;
@@ -353,15 +343,10 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 
 	return 0;
 
-err_unpin_hwsp:
-	xe_bo_unpin(hwe->hwsp);
-err_unlock_put_hwsp:
-	xe_bo_unlock_no_vm(hwe->hwsp);
-	xe_bo_put(hwe->hwsp);
 err_kernel_lrc:
 	xe_lrc_finish(&hwe->kernel_lrc);
 err_hwsp:
-	xe_bo_put(hwe->hwsp);
+	xe_bo_unpin_map_no_vm(hwe->hwsp);
 err_name:
 	hwe->name = NULL;
 
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index af4518a82db27..9140b057a5ba3 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -615,7 +615,11 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 
 	lrc->flags = 0;
 
-	lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm,
+	/*
+	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
+	 * via VM bind calls.
+	 */
+	lrc->bo = xe_bo_create_pin_map(xe, hwe->gt, vm,
 				      ring_size + xe_lrc_size(xe, hwe->class),
 				      ttm_bo_type_kernel,
 				      XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) |
@@ -628,21 +632,6 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	else
 		lrc->full_gt = hwe->gt;
 
-	/*
-	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
-	 * via VM bind calls.
-	 */
-	err = xe_bo_pin(lrc->bo);
-	if (err)
-		goto err_unlock_put_bo;
-	lrc->flags |= XE_LRC_PINNED;
-
-	err = xe_bo_vmap(lrc->bo);
-	if (err)
-		goto err_unpin_bo;
-
-	xe_bo_unlock_vm_held(lrc->bo);
-
 	lrc->ring.size = ring_size;
 	lrc->ring.tail = 0;
 
@@ -652,8 +641,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	if (!gt->default_lrc[hwe->class]) {
 		init_data = empty_lrc_data(hwe);
 		if (!init_data) {
-			xe_lrc_finish(lrc);
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto err_lrc_finish;
 		}
 	}
 
@@ -710,12 +699,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 
 	return 0;
 
-err_unpin_bo:
-	if (lrc->flags & XE_LRC_PINNED)
-		xe_bo_unpin(lrc->bo);
-err_unlock_put_bo:
-	xe_bo_unlock_vm_held(lrc->bo);
-	xe_bo_put(lrc->bo);
+err_lrc_finish:
+	xe_lrc_finish(lrc);
 	return err;
 }
 
@@ -724,17 +709,15 @@ void xe_lrc_finish(struct xe_lrc *lrc)
 	struct ww_acquire_ctx ww;
 
 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
-	if (lrc->flags & XE_LRC_PINNED) {
-		if (lrc->bo->vm)
-			xe_vm_lock(lrc->bo->vm, &ww, 0, false);
-		else
-			xe_bo_lock_no_vm(lrc->bo, NULL);
-		xe_bo_unpin(lrc->bo);
-		if (lrc->bo->vm)
-			xe_vm_unlock(lrc->bo->vm, &ww);
-		else
-			xe_bo_unlock_no_vm(lrc->bo);
-	}
+	if (lrc->bo->vm)
+		xe_vm_lock(lrc->bo->vm, &ww, 0, false);
+	else
+		xe_bo_lock_no_vm(lrc->bo, NULL);
+	xe_bo_unpin(lrc->bo);
+	if (lrc->bo->vm)
+		xe_vm_unlock(lrc->bo->vm, &ww);
+	else
+		xe_bo_unlock_no_vm(lrc->bo);
 	xe_bo_put(lrc->bo);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 2827efa2091d2..8fe08535873d6 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -25,7 +25,6 @@ struct xe_lrc {
 
 	/** @flags: LRC flags */
 	u32 flags;
-#define XE_LRC_PINNED BIT(1)
 
 	/** @ring: submission ring state */
 	struct {
-- 
GitLab


From d79bdcdf06a3b421ac386f3513365f0bf2a5649a Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 22 Dec 2022 10:36:47 +0000
Subject: [PATCH 1363/2340] drm/xe/bo: explicitly reject zero sized BO

In the depths of ttm, when allocating the vma node this should result in
-ENOSPC it seems. However we should probably rather reject as part of
our own ioctl sanity checking, and then treat as programmer error in the
lower levels.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3e5393e00b43a..09b8db6d7ba36 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -971,6 +971,9 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	/* Only kernel objects should set GT */
 	XE_BUG_ON(gt && type != ttm_bo_type_kernel);
 
+	if (XE_WARN_ON(!size))
+		return ERR_PTR(-EINVAL);
+
 	if (!bo) {
 		bo = xe_bo_alloc();
 		if (IS_ERR(bo))
@@ -1524,6 +1527,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_ERR(xe, args->handle))
 		return -EINVAL;
 
+	if (XE_IOCTL_ERR(xe, !args->size))
+		return -EINVAL;
+
 	if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX))
 		return -EINVAL;
 
-- 
GitLab


From 3ea9f1f1f699c44b3064006b51566ed6accc6a53 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 24 Feb 2023 16:21:37 -0800
Subject: [PATCH 1364/2340] drm/xe/device: Prefer the drm-managed mutex_init

There's inconsistent use of mutex_init(), in xe_device_create(), with
several of them never calling mutex_destroy() in xe_device_destroy().
Migrate all of them to drmm_mutex_init(), so the destroy part is
automatically called.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230225002138.1759016-2-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 00e8ed2353534..8a9f1e5ce34d8 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -155,7 +155,6 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
 	struct xe_device *xe = to_xe_device(dev);
 
 	destroy_workqueue(xe->ordered_wq);
-	mutex_destroy(&xe->persitent_engines.lock);
 	ttm_device_fini(&xe->ttm);
 }
 
@@ -187,10 +186,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 
 	init_waitqueue_head(&xe->ufence_wq);
 
-	mutex_init(&xe->usm.lock);
+	drmm_mutex_init(&xe->drm, &xe->usm.lock);
 	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
 
-	mutex_init(&xe->persitent_engines.lock);
+	drmm_mutex_init(&xe->drm, &xe->persitent_engines.lock);
 	INIT_LIST_HEAD(&xe->persitent_engines.list);
 
 	spin_lock_init(&xe->pinned.lock);
@@ -200,14 +199,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 
 	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
 
-	mutex_init(&xe->sb_lock);
+	drmm_mutex_init(&xe->drm, &xe->sb_lock);
 	xe->enabled_irq_mask = ~0;
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
 	if (err)
 		goto err_put;
 
-	mutex_init(&xe->mem_access.lock);
+	drmm_mutex_init(&xe->drm, &xe->mem_access.lock);
+
 	return xe;
 
 err_put:
-- 
GitLab


From 541623a406fe1fd516ac9564b2388a3ec31610fe Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 1 Mar 2023 17:34:05 -0800
Subject: [PATCH 1365/2340] drm/xe: Fix typo persitent->persistent

Fix typo as noticed by Matt Roper:

	git grep -l persitent | xargs sed -i 's/persitent/persistent/g'

... and then fix coding style issues.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://lore.kernel.org/r/20230302013411.3262608-2-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       | 46 ++++++++++++++--------------
 drivers/gpu/drm/xe/xe_device.h       |  6 ++--
 drivers/gpu/drm/xe/xe_device_types.h |  8 ++---
 drivers/gpu/drm/xe/xe_engine.c       |  6 ++--
 drivers/gpu/drm/xe/xe_engine_types.h |  6 ++--
 drivers/gpu/drm/xe/xe_execlist.c     |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c   |  2 +-
 7 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 8a9f1e5ce34d8..49ce11fc1174f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -51,8 +51,8 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 	return 0;
 }
 
-static void device_kill_persitent_engines(struct xe_device *xe,
-					  struct xe_file *xef);
+static void device_kill_persistent_engines(struct xe_device *xe,
+					   struct xe_file *xef);
 
 static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 {
@@ -69,7 +69,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 	}
 	mutex_unlock(&xef->engine.lock);
 	mutex_destroy(&xef->engine.lock);
-	device_kill_persitent_engines(xe, xef);
+	device_kill_persistent_engines(xe, xef);
 
 	mutex_lock(&xef->vm.lock);
 	xa_for_each(&xef->vm.xa, idx, vm)
@@ -189,8 +189,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	drmm_mutex_init(&xe->drm, &xe->usm.lock);
 	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
 
-	drmm_mutex_init(&xe->drm, &xe->persitent_engines.lock);
-	INIT_LIST_HEAD(&xe->persitent_engines.list);
+	drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
+	INIT_LIST_HEAD(&xe->persistent_engines.list);
 
 	spin_lock_init(&xe->pinned.lock);
 	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
@@ -305,35 +305,35 @@ void xe_device_shutdown(struct xe_device *xe)
 {
 }
 
-void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e)
+void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e)
 {
-	mutex_lock(&xe->persitent_engines.lock);
-	list_add_tail(&e->persitent.link, &xe->persitent_engines.list);
-	mutex_unlock(&xe->persitent_engines.lock);
+	mutex_lock(&xe->persistent_engines.lock);
+	list_add_tail(&e->persistent.link, &xe->persistent_engines.list);
+	mutex_unlock(&xe->persistent_engines.lock);
 }
 
-void xe_device_remove_persitent_engines(struct xe_device *xe,
-					struct xe_engine *e)
+void xe_device_remove_persistent_engines(struct xe_device *xe,
+					 struct xe_engine *e)
 {
-	mutex_lock(&xe->persitent_engines.lock);
-	if (!list_empty(&e->persitent.link))
-		list_del(&e->persitent.link);
-	mutex_unlock(&xe->persitent_engines.lock);
+	mutex_lock(&xe->persistent_engines.lock);
+	if (!list_empty(&e->persistent.link))
+		list_del(&e->persistent.link);
+	mutex_unlock(&xe->persistent_engines.lock);
 }
 
-static void device_kill_persitent_engines(struct xe_device *xe,
-					  struct xe_file *xef)
+static void device_kill_persistent_engines(struct xe_device *xe,
+					   struct xe_file *xef)
 {
 	struct xe_engine *e, *next;
 
-	mutex_lock(&xe->persitent_engines.lock);
-	list_for_each_entry_safe(e, next, &xe->persitent_engines.list,
-				 persitent.link)
-		if (e->persitent.xef == xef) {
+	mutex_lock(&xe->persistent_engines.lock);
+	list_for_each_entry_safe(e, next, &xe->persistent_engines.list,
+				 persistent.link)
+		if (e->persistent.xef == xef) {
 			xe_engine_kill(e);
-			list_del_init(&e->persitent.link);
+			list_del_init(&e->persistent.link);
 		}
-	mutex_unlock(&xe->persitent_engines.lock);
+	mutex_unlock(&xe->persistent_engines.lock);
 }
 
 void xe_device_wmb(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 263620953c3b3..25c5087f5aad1 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -37,9 +37,9 @@ int xe_device_probe(struct xe_device *xe);
 void xe_device_remove(struct xe_device *xe);
 void xe_device_shutdown(struct xe_device *xe);
 
-void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e);
-void xe_device_remove_persitent_engines(struct xe_device *xe,
-					struct xe_engine *e);
+void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e);
+void xe_device_remove_persistent_engines(struct xe_device *xe,
+					 struct xe_engine *e);
 
 void xe_device_wmb(struct xe_device *xe);
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c9f74dc4c9fda..00b1db28a4b47 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -138,13 +138,13 @@ struct xe_device {
 		struct mutex lock;
 	} usm;
 
-	/** @persitent_engines: engines that are closed but still running */
+	/** @persistent_engines: engines that are closed but still running */
 	struct {
-		/** @lock: protects persitent engines */
+		/** @lock: protects persistent engines */
 		struct mutex lock;
-		/** @list: list of persitent engines */
+		/** @list: list of persistent engines */
 		struct list_head list;
-	} persitent_engines;
+	} persistent_engines;
 
 	/** @pinned: pinned BO state */
 	struct {
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 519fbbcabdb9a..3e40fb6d3f98f 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -47,7 +47,7 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe,
 	e->fence_irq = &gt->fence_irq[hwe->class];
 	e->ring_ops = gt->ring_ops[hwe->class];
 	e->ops = gt->engine_ops;
-	INIT_LIST_HEAD(&e->persitent.link);
+	INIT_LIST_HEAD(&e->persistent.link);
 	INIT_LIST_HEAD(&e->compute.link);
 	INIT_LIST_HEAD(&e->multi_gt_link);
 
@@ -620,7 +620,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 		goto put_engine;
 	}
 
-	e->persitent.xef = xef;
+	e->persistent.xef = xef;
 
 	mutex_lock(&xef->engine.lock);
 	err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
@@ -716,7 +716,7 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!(e->flags & ENGINE_FLAG_PERSISTENT))
 		xe_engine_kill(e);
 	else
-		xe_device_add_persitent_engines(xe, e);
+		xe_device_add_persistent_engines(xe, e);
 
 	trace_xe_engine_close(e);
 	xe_engine_put(e);
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
index 3dfa1c14e181a..2f6f0f2a0a8bf 100644
--- a/drivers/gpu/drm/xe/xe_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -94,14 +94,14 @@ struct xe_engine {
 	};
 
 	/**
-	 * @persitent: persitent engine state
+	 * @persistent: persistent engine state
 	 */
 	struct {
 		/** @xef: file which this engine belongs to */
 		struct xe_file *xef;
-		/** @link: link in list of persitent engines */
+		/** @link: link in list of persistent engines */
 		struct list_head link;
-	} persitent;
+	} persistent;
 
 	union {
 		/**
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index ae7ee56f1b1b7..e540e5d287a07 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -400,7 +400,7 @@ static void execlist_engine_fini_async(struct work_struct *w)
 	spin_unlock_irqrestore(&exl->port->lock, flags);
 
 	if (e->flags & ENGINE_FLAG_PERSISTENT)
-		xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+		xe_device_remove_persistent_engines(gt_to_xe(e->gt), e);
 	drm_sched_entity_fini(&exl->entity);
 	drm_sched_fini(&exl->sched);
 	kfree(exl);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 0ba6f5dcd029f..d0b48c885fda1 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -890,7 +890,7 @@ static void __guc_engine_fini_async(struct work_struct *w)
 	trace_xe_engine_destroy(e);
 
 	if (e->flags & ENGINE_FLAG_PERSISTENT)
-		xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+		xe_device_remove_persistent_engines(gt_to_xe(e->gt), e);
 	release_guc_id(guc, e);
 	xe_sched_entity_fini(&ge->entity);
 	xe_sched_fini(&ge->sched);
-- 
GitLab


From 63239946bc0101c2b10c119c77cd4b132d2c6484 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Thu, 2 Mar 2023 08:00:38 -0800
Subject: [PATCH 1366/2340] drm/xe: Fix size of xe_eu_mask_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XE_MAX_DSS_FUSE_REGS was being used to calculate the size of
xe_eu_mask_t while it should use XE_MAX_EU_FUSE_REGS.
There are no know issues about this but fixing it anyways.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index b01edd3fdc4d5..74b4e6776bf16 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -31,7 +31,7 @@ enum xe_gt_type {
 #define XE_MAX_EU_FUSE_REGS	1
 
 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
-typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
 
 struct xe_mmio_range {
 	u32 start;
-- 
GitLab


From cedbc0b75790a1ee4f0bad0124c84b6813c2ef8c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 6 Mar 2023 13:24:50 -0800
Subject: [PATCH 1367/2340] drm/xe: Fix duplicated setting for register 0x6604
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The following warning shows up for TGL:

		[drm:xe_reg_sr_add [xe]] *ERROR* Discarding save-restore reg 6604 (clear: 00ff0000, set: 00040000, masked: no): ret=-22
		[drm:xe_reg_sr_add [xe]] *ERROR* Discarding save-restore reg 6604 (clear: 00ff0000, set: 00040000, masked: no): ret=-22

That is because the same register is being set both by the WAs and the
tunings. Like was done in i915, prefer the tuning over the workaround
since that is applicable for more platforms. Also fix the tuning: it
was incorrectly using the MCR version of the register, but that only
became true in XEHP.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/233
Reported-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230306212450.803557-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 -
 drivers/gpu/drm/xe/xe_tuning.c       | 11 ++++++-----
 drivers/gpu/drm/xe/xe_wa.c           | 10 +++-------
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 47377d2167e0a..6a728d2809c52 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -70,7 +70,6 @@
 #define XEHP_FLAT_CCS_BASE_ADDR			MCR_REG(0x4910)
 
 #define GEN12_FF_MODE2				_MMIO(0x6604)
-#define XEHP_FF_MODE2				MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 624b257ecfbc5..2861a014c85c4 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -24,11 +24,12 @@ static const struct xe_rtp_entry gt_tunings[] = {
 };
 
 static const struct xe_rtp_entry lrc_tunings[] = {
-	{ XE_RTP_NAME("1604555607"),
-	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
-	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
-						FF_MODE2_TDS_TIMER_MASK,
-						FF_MODE2_TDS_TIMER_128))
+	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  /* read verification is ignored due to 1608008084. */
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2,
+						FF_MODE2_GS_TIMER_MASK,
+						FF_MODE2_GS_TIMER_224))
 	},
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index df72b15dfeb0d..71e9e1a111f89 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -265,13 +265,9 @@ static const struct xe_rtp_entry lrc_was[] = {
 				   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
 				   XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
-	{ XE_RTP_NAME("16011163337"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  /* read verification is ignored due to 1608008084. */
-	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2,
-						FF_MODE2_GS_TIMER_MASK,
-						FF_MODE2_GS_TIMER_224))
-	},
+
+	/* DG1 */
+
 	{ XE_RTP_NAME("1409044764"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
 	  XE_RTP_ACTIONS(CLR(GEN11_COMMON_SLICE_CHICKEN3,
-- 
GitLab


From 7dae750dde42459483054384a5d234b54e643cdd Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 6 Mar 2023 08:57:57 -0800
Subject: [PATCH 1368/2340] drm/xe: Fix ROW_CHICKEN2 define

When this register was added in xe for some workarounds, it was copied
from i915 before the registers got changed to add the MCR annotation.
The register 0xe4f4 is MCR since gen8, long before any GPU supported by
the xe driver. Replace all occurrences with the right register.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230306165757.633796-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +-
 drivers/gpu/drm/xe/xe_wa.c           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 6a728d2809c52..d3b862e4cd0db 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -230,7 +230,7 @@
 #define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
-#define GEN7_ROW_CHICKEN2			_MMIO(0xe4f4)
+#define GEN8_ROW_CHICKEN2			MCR_REG(0xe4f4)
 #define   GEN12_DISABLE_READ_SUPPRESSION	REG_BIT(15)
 #define   GEN12_DISABLE_EARLY_READ		REG_BIT(14)
 #define   GEN12_ENABLE_LARGE_GRF_MODE		REG_BIT(12)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 71e9e1a111f89..03c5b01a14e40 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -191,7 +191,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("1606931601"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
@@ -213,7 +213,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
+	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14010229206, 1409085225"),
-- 
GitLab


From 282c683a56e9713a3b70c4cffd17cb48bdbacca2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 2 Mar 2023 09:54:59 +0100
Subject: [PATCH 1369/2340] drm/xe/tests: Remove CONFIG_FB dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We currently don't have any tests that explicitly depends on this
config option, so remove that build dependency.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 9c773dd74cbdd..565be3f6b9b96 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -64,7 +64,6 @@ config DRM_XE_SIMPLE_ERROR_CAPTURE
 config DRM_XE_KUNIT_TEST
         tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
 	depends on DRM_XE && KUNIT && DEBUG_FS
-	depends on FB && FB = DRM_KMS_HELPER && DRM_FBDEV_EMULATION
 	default KUNIT_ALL_TESTS
 	select DRM_EXPORT_FOR_TESTS if m
 	help
-- 
GitLab


From 907a319c8c8e125224b088f91f468f549f1e1da7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 2 Mar 2023 10:01:41 +0100
Subject: [PATCH 1370/2340] drm/xe/tests: Grab a memory access reference around
 the migrate sanity test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It appears we don't hold a memory access reference for the accesses in
this test, which may results in printed warnings and possibly the GT
not woken up for the memory accesses.

Add a memory access reference around the test.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 0de17e90aba9c..b7e4a126e8b73 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -366,7 +366,9 @@ static int migrate_test_run_device(struct xe_device *xe)
 
 		kunit_info(test, "Testing gt id %d.\n", id);
 		xe_vm_lock(m->eng->vm, &ww, 0, true);
+		xe_device_mem_access_get(xe);
 		xe_migrate_sanity_test(m, test);
+		xe_device_mem_access_put(xe);
 		xe_vm_unlock(m->eng->vm, &ww);
 	}
 
-- 
GitLab


From b99cb6216bdf350e2d94c547c27f063b4434ae5d Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Mon, 6 Mar 2023 05:34:59 -0800
Subject: [PATCH 1371/2340] drm/xe/migrate: Fix number of PT structs in docbook

Update xe_migrate_doc.h with 32 page table structs (not 48)

v2: minor typo fix

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230306133459.7803-1-niranjana.vishwanathapura@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate_doc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
index 6a68fdff08dce..63c7d67b5b624 100644
--- a/drivers/gpu/drm/xe/xe_migrate_doc.h
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -21,7 +21,7 @@
  * table BOs for updates, and identity map the entire device's VRAM with 1 GB
  * pages.
  *
- * Currently the page structure consists of 48 phyiscal pages with 16 being
+ * Currently the page structure consists of 32 physical pages with 16 being
  * reserved for BO mapping during copies and clear, 1 reserved for kernel binds,
  * several pages are needed to setup the identity mappings (exact number based
  * on how many bits of address space the device has), and the rest are reserved
-- 
GitLab


From 6db7761bbca649319096431c38670c596107596d Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Wed, 8 Mar 2023 17:23:22 +0100
Subject: [PATCH 1372/2340] drm/xe/stolen: Exclude reserved lmem portion

The address set by firmware in GEN12_DSMBASE in driver initialization
doesn't mean "anything above that and until end of lmem is part of DSM".
In fact, there may be a few KB that is not part of DSM on the end of
lmem. How large is that space is platform-dependent, but since it's
always less than the DSM granularity, it can be simplified by simply
aligning the size down.

Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 2e8d07ad42aee..1116e217ebc29 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -88,7 +88,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, 2))
 		mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
 
-	return stolen_size;
+	/*
+	 * There may be few KB of platform dependent reserved memory at the end
+	 * of lmem which is not part of the DSM. Such reserved memory portion is
+	 * always less then DSM granularity so align down the stolen_size to DSM
+	 * granularity to accommodate such reserve lmem portion.
+	 */
+	return ALIGN_DOWN(stolen_size, SZ_1M);
 }
 
 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
-- 
GitLab


From 39fd0b4507c3ba86ef04827208dd3aa85d2d796e Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 7 Mar 2023 16:55:08 -0800
Subject: [PATCH 1373/2340] drm/xe/guc: Handle regset overflow check for entire
 GT

Checking whether a single engine's register save/restore entries
overflow the expected/pre-allocated GuC ADS regset area isn't terribly
useful; we actually want to check whether the combined entries from all
engines on the GT overflow the regset space.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230308005509.2975663-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ads.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 7a892ff7aba32..fd9911ffeae47 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -482,8 +482,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 		}
 	}
 
-	XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg)));
-
 	return count;
 }
 
@@ -496,6 +494,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
 	u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
 	struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
 							    regset_offset);
+	unsigned int regset_used = 0;
 
 	for_each_hw_engine(hwe, gt, id) {
 		unsigned int count;
@@ -521,7 +520,11 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
 
 		addr += count * sizeof(struct guc_mmio_reg);
 		iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
+
+		regset_used += count * sizeof(struct guc_mmio_reg);
 	}
+
+	XE_BUG_ON(regset_used > ads->regset_size);
 }
 
 static void guc_um_init_params(struct xe_guc_ads *ads)
-- 
GitLab


From 2a8477f7614a62b41b034e3eaf017d41e8a58ce9 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 8 Mar 2023 12:30:08 +0000
Subject: [PATCH 1374/2340] drm/xe: s/lmem/vram/

This seems to be the preferred nomenclature in xe. Currently we are
intermixing vram and lmem, which is confusing.

v2 (Gwan-gyeong Mun & Lucas):
  - Rather apply to the entire driver

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.debug      |  2 +-
 drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
 drivers/gpu/drm/xe/xe_bo.c            | 10 +++----
 drivers/gpu/drm/xe/xe_bo.h            |  6 ++--
 drivers/gpu/drm/xe/xe_ggtt.c          |  6 ++--
 drivers/gpu/drm/xe/xe_migrate.c       | 12 ++++----
 drivers/gpu/drm/xe/xe_mmio.c          | 40 +++++++++++++--------------
 drivers/gpu/drm/xe/xe_module.c        |  6 ++--
 drivers/gpu/drm/xe/xe_module.h        |  2 +-
 drivers/gpu/drm/xe/xe_pt.c            | 12 ++++----
 drivers/gpu/drm/xe/xe_vm.c            | 10 +++----
 11 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 565be3f6b9b96..93b284cdd0a2e 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -41,7 +41,7 @@ config DRM_XE_DEBUG_VM
 	  If in doubt, say "N".
 
 config DRM_XE_DEBUG_MEM
-	bool "Enable passing SYS/LMEM addresses to user space"
+	bool "Enable passing SYS/VRAM addresses to user space"
 	default n
 	help
 	  Pass object location trough uapi. Intended for extended
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index b7e4a126e8b73..ac659b94e7f52 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -129,7 +129,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	}
 	dma_fence_put(fence);
 
-	/* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */
+	/* Try to copy 0xc0 from sysmem to vram with 2MB or 64KiB/4KiB pages */
 	xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size);
 	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 09b8db6d7ba36..cfb79519b6731 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1299,12 +1299,12 @@ int xe_bo_pin(struct xe_bo *bo)
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
 	    bo->flags & XE_BO_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
-		bool lmem;
+		bool vram;
 
 		if (mem_type_is_vram(place->mem_type)) {
 			XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
 
-			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
+			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) -
 				       vram_region_io_offset(bo)) >> PAGE_SHIFT;
 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
 
@@ -1424,7 +1424,7 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
 }
 
 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
-		      size_t page_size, bool *is_lmem)
+		      size_t page_size, bool *is_vram)
 {
 	struct xe_res_cursor cur;
 	u64 page;
@@ -1436,9 +1436,9 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
 	page = offset >> PAGE_SHIFT;
 	offset &= (PAGE_SIZE - 1);
 
-	*is_lmem = xe_bo_is_vram(bo);
+	*is_vram = xe_bo_is_vram(bo);
 
-	if (!*is_lmem && !xe_bo_is_stolen(bo)) {
+	if (!*is_vram && !xe_bo_is_stolen(bo)) {
 		XE_BUG_ON(!bo->ttm.ttm);
 
 		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8c2cdbe51ab53..4350845542bf4 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -196,14 +196,14 @@ static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
 
 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
-		      size_t page_size, bool *is_lmem);
+		      size_t page_size, bool *is_vram);
 
 static inline dma_addr_t
 xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
 {
-	bool is_lmem;
+	bool is_vram;
 
-	return xe_bo_addr(bo, 0, page_size, &is_lmem);
+	return xe_bo_addr(bo, 0, page_size, &is_vram);
 }
 
 static inline u32
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index d6ebc1d77f4d5..99bc9036c7a0d 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -28,12 +28,12 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 	u64 pte;
-	bool is_lmem;
+	bool is_vram;
 
-	pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+	pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram);
 	pte |= GEN8_PAGE_PRESENT;
 
-	if (is_lmem)
+	if (is_vram)
 		pte |= GEN12_GGTT_PTE_LM;
 
 	/* FIXME: vfunc + pass in caching rules */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 79aa3508ae3e7..4a9fe1f7128d1 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -222,15 +222,15 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 			level++;
 		}
 	} else {
-		bool is_lmem;
-		u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem);
+		bool is_vram;
+		u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_vram);
 
 		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 
 		if (xe->info.supports_usm) {
 			batch = gt->usm.bb_pool.bo;
 			batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE,
-						&is_lmem);
+						&is_vram);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 		}
 	}
@@ -933,12 +933,12 @@ static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
 	 */
 	XE_BUG_ON(update->qwords > 0x1ff);
 	if (!ppgtt_ofs) {
-		bool is_lmem;
+		bool is_vram;
 
 		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
 							   GEN8_PAGE_SIZE,
-							   &is_lmem));
-		XE_BUG_ON(!is_lmem);
+							   &is_vram));
+		XE_BUG_ON(!is_vram);
 	}
 
 	do {
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 65b0df9bb5796..e5bd4609aaeed 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -68,7 +68,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 	return 1;
 }
 
-static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
+static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct pci_bus *root = pdev->bus;
@@ -78,31 +78,31 @@ static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
 	u32 pci_cmd;
 	int i;
 	int ret;
-	u64 force_lmem_bar_size = xe_force_lmem_bar_size;
+	u64 force_vram_bar_size = xe_force_vram_bar_size;
 
 	current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR));
 
-	if (force_lmem_bar_size) {
+	if (force_vram_bar_size) {
 		u32 bar_sizes;
 
-		rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M;
+		rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
 		bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
 
 		if (rebar_size == current_size)
 			return 0;
 
 		if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) ||
-		    rebar_size >= roundup_pow_of_two(lmem_size)) {
-			rebar_size = lmem_size;
+		    rebar_size >= roundup_pow_of_two(vram_size)) {
+			rebar_size = vram_size;
 			drm_info(&xe->drm,
 				 "Given bar size is not within supported size, setting it to default: %llu\n",
-				 (u64)lmem_size >> 20);
+				 (u64)vram_size >> 20);
 		}
 	} else {
 		rebar_size = current_size;
 
-		if (rebar_size != roundup_pow_of_two(lmem_size))
-			rebar_size = lmem_size;
+		if (rebar_size != roundup_pow_of_two(vram_size))
+			rebar_size = vram_size;
 		else
 			return 0;
 	}
@@ -117,7 +117,7 @@ static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
 	}
 
 	if (!root_res) {
-		drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n");
+		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing\n");
 		return -1;
 	}
 
@@ -168,7 +168,7 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si
 	if (usable_size) {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
 		*usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
-		drm_info(&xe->drm, "lmem_size: 0x%llx usable_size: 0x%llx\n",
+		drm_info(&xe->drm, "vram_size: 0x%llx usable_size: 0x%llx\n",
 			 *vram_size, *usable_size);
 	}
 
@@ -180,7 +180,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct xe_gt *gt;
 	u8 id;
-	u64 lmem_size;
+	u64 vram_size;
 	u64 original_size;
 	u64 current_size;
 	u64 usable_size;
@@ -207,29 +207,29 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	gt = xe_device_get_gt(xe, 0);
 	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 
-	err = xe_mmio_total_vram_size(xe, &lmem_size, &usable_size);
+	err = xe_mmio_total_vram_size(xe, &vram_size, &usable_size);
 	if (err)
 		return err;
 
-	resize_result = xe_resize_lmem_bar(xe, lmem_size);
+	resize_result = xe_resize_vram_bar(xe, vram_size);
 	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
 
-	xe->mem.vram.size = min(current_size, lmem_size);
+	xe->mem.vram.size = min(current_size, vram_size);
 
 	if (!xe->mem.vram.size)
 		return -EIO;
 
 	if (resize_result > 0)
-		drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n",
+		drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to %lluMiB\n",
 			 (u64)original_size >> 20,
 			 (u64)current_size >> 20);
-	else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size)
+	else if (xe->mem.vram.size < vram_size && !xe_force_vram_bar_size)
 		drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
 			 (u64)xe->mem.vram.size >> 20);
-	if (xe->mem.vram.size < lmem_size)
+	if (xe->mem.vram.size < vram_size)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
-			 lmem_size, (u64)xe->mem.vram.size);
+			 vram_size, (u64)xe->mem.vram.size);
 
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
 	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
@@ -360,7 +360,7 @@ int xe_mmio_init(struct xe_device *xe)
 	 * and we should not continue with driver initialization.
 	 */
 	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) {
-		drm_err(&xe->drm, "LMEM not initialized by firmware\n");
+		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 3f5d03a586968..e8ee7a9b08781 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -18,9 +18,9 @@ bool enable_guc = true;
 module_param_named_unsafe(enable_guc, enable_guc, bool, 0444);
 MODULE_PARM_DESC(enable_guc, "Enable GuC submission");
 
-u32 xe_force_lmem_bar_size;
-module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600);
-MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)");
+u32 xe_force_vram_bar_size;
+module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600);
+MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
 
 int xe_guc_log_level = 5;
 module_param_named(guc_log_level, xe_guc_log_level, int, 0600);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 2c6ee46f55953..86916c1763825 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -8,6 +8,6 @@
 /* Module modprobe variables */
 extern bool enable_guc;
 extern bool enable_display;
-extern u32 xe_force_lmem_bar_size;
+extern u32 xe_force_vram_bar_size;
 extern int xe_guc_log_level;
 extern char *xe_param_force_probe;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 00d9fff538284..64da981524559 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -61,12 +61,12 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
 		    const enum xe_cache_level level)
 {
 	u64 pde;
-	bool is_lmem;
+	bool is_vram;
 
-	pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+	pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram);
 	pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
-	XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem);
+	XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_vram);
 
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
 
@@ -79,13 +79,13 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
 }
 
 static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
-			   size_t page_size, bool *is_lmem)
+			   size_t page_size, bool *is_vram)
 {
 	if (xe_vma_is_userptr(vma)) {
 		struct xe_res_cursor cur;
 		u64 page;
 
-		*is_lmem = false;
+		*is_vram = false;
 		page = offset >> PAGE_SHIFT;
 		offset &= (PAGE_SIZE - 1);
 
@@ -93,7 +93,7 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 				&cur);
 		return xe_res_dma(&cur) + offset;
 	} else {
-		return xe_bo_addr(vma->bo, offset, page_size, is_lmem);
+		return xe_bo_addr(vma->bo, offset, page_size, is_vram);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index fcac31f11706f..a8254a4148f70 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3379,7 +3379,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 {
 	struct rb_node *node;
-	bool is_lmem;
+	bool is_vram;
 	uint64_t addr;
 
 	if (!down_read_trylock(&vm->lock)) {
@@ -3387,8 +3387,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		return 0;
 	}
 	if (vm->pt_root[gt_id]) {
-		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
-		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS");
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram);
+		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
 	}
 
 	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
@@ -3401,11 +3401,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
 			addr = xe_res_dma(&cur);
 		} else {
-			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram);
 		}
 		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
 			   vma->start, vma->end, vma->end - vma->start + 1ull,
-			   addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS");
+			   addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
 	}
 	up_read(&vm->lock);
 
-- 
GitLab


From 7c7225ddaa343a3f380f8b92cd2b30e1b5701cb1 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 8 Mar 2023 16:55:29 -0800
Subject: [PATCH 1375/2340] drm/xe: Separate engine fuse handling into
 dedicated functions

The single function to handle fuse registers for all types of engines is
becoming a bit long and hard to follow (and we haven't even added the
compute engines yet).  Let's split it into dedicated functions for each
engine class.

v2:
 - Add note about BCS0 always being present.  (Bala)
 - Add forcewake assertion to read_copy_fuses.  (Bala)

Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230309005530.3140173-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 37 +++++++++++++++++++------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b035e2fa67449..abfd35491b478 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -369,29 +369,29 @@ static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
 	}
 }
 
-static void read_fuses(struct xe_gt *gt)
+static void read_media_fuses(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 media_fuse;
 	u16 vdbox_mask;
 	u16 vebox_mask;
-	u32 bcs_mask;
 	int i, j;
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
+	media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg);
+
 	/*
-	 * FIXME: Hack job, thinking we should have table of vfuncs for each
-	 * class which picks the correct vfunc based on IP version.
+	 * Pre-Xe_HP platforms had register bits representing absent engines,
+	 * whereas Xe_HP and beyond have bits representing present engines.
+	 * Invert the polarity on old platforms so that we can use common
+	 * handling below.
 	 */
-
-	media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg);
 	if (GRAPHICS_VERx100(xe) < 1250)
 		media_fuse = ~media_fuse;
 
-	vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
-	vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
-		      GEN11_GT_VEBOX_DISABLE_SHIFT;
+	vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse);
+	vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse);
 
 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
 		if (!(gt->info.engine_mask & BIT(i)))
@@ -412,28 +412,37 @@ static void read_fuses(struct xe_gt *gt)
 			drm_info(&xe->drm, "vecs%u fused off\n", j);
 		}
 	}
+}
+
+static void read_copy_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 bcs_mask;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg);
 	bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask);
 
-	for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
+	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
 		if (!(gt->info.engine_mask & BIT(i)))
 			continue;
 
-		if (!(BIT(j/2) & bcs_mask)) {
+		if (!(BIT(j / 2) & bcs_mask)) {
 			gt->info.engine_mask &= ~BIT(i);
 			drm_info(&xe->drm, "bcs%u fused off\n", j);
 		}
 	}
-
-	/* TODO: compute engines */
 }
 
 int xe_hw_engines_init_early(struct xe_gt *gt)
 {
 	int i;
 
-	read_fuses(gt);
+	read_media_fuses(gt);
+	read_copy_fuses(gt);
+	/* TODO: compute engines */
 
 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
-- 
GitLab


From 13fb0c98723f54a884090864983fff4953deb185 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 8 Mar 2023 16:55:30 -0800
Subject: [PATCH 1376/2340] drm/xe: Add support for CCS engine fusing

For Xe_HP platforms that can have multiple CCS engines, the
presence/absence of each CCS is inferred by the presence/absence of any
DSS in the corresponding quadrant of the GT's DSS mask.

This handling is only needed on platforms that can have more than one
CCS.  The CCS is never fused off on platforms like MTL that can only
have one.

v2:
 - Add extra warnings to try to catch mistakes where the register counts
   in get_num_dss_regs() are updated without corresponding updates to
   the register parameters passed to load_dss_mask().  (Lucas)
 - Add kerneldoc for xe_gt_topology_has_dss_in_quadrant() and clarify
   why we care about quadrants of the DSS space.  (Lucas)
 - Ensure CCS engine counting treats engine mask as 64-bit.  (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230309005530.3140173-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_topology.c | 65 ++++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_gt_topology.h |  3 ++
 drivers/gpu/drm/xe/xe_hw_engine.c   | 29 ++++++++++++-
 3 files changed, 85 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 2123f84be336e..f2cbee53462b1 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -62,6 +62,21 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
 	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
 }
 
+static void
+get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
+{
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		*geometry_regs = 0;
+		*compute_regs = 2;
+	} else if (GRAPHICS_VERx100(xe) >= 1250) {
+		*geometry_regs = 1;
+		*compute_regs = 1;
+	} else {
+		*geometry_regs = 1;
+		*compute_regs = 0;
+	}
+}
+
 void
 xe_gt_topology_init(struct xe_gt *gt)
 {
@@ -69,18 +84,17 @@ xe_gt_topology_init(struct xe_gt *gt)
 	struct drm_printer p = drm_debug_printer("GT topology");
 	int num_geometry_regs, num_compute_regs;
 
-	if (GRAPHICS_VERx100(xe) == 1260) {
-		num_geometry_regs = 0;
-		num_compute_regs = 2;
-	} else if (GRAPHICS_VERx100(xe) >= 1250) {
-		num_geometry_regs = 1;
-		num_compute_regs = 1;
-	} else {
-		num_geometry_regs = 1;
-		num_compute_regs = 0;
-	}
+	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
 
-	load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs,
+	/*
+	 * Register counts returned shouldn't exceed the number of registers
+	 * passed as parameters below.
+	 */
+	drm_WARN_ON(&xe->drm, num_geometry_regs > 1);
+	drm_WARN_ON(&xe->drm, num_compute_regs > 2);
+
+	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
+		      num_geometry_regs,
 		      XELP_GT_GEOMETRY_DSS_ENABLE.reg);
 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
 		      XEHP_GT_COMPUTE_DSS_ENABLE.reg,
@@ -113,3 +127,32 @@ xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum)
 {
 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
 }
+
+/**
+ * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
+ * @gt: GT to check
+ * @quad: Which quadrant of the DSS space to check
+ *
+ * Since Xe_HP platforms can have up to four CCS engines, those engines
+ * are each logically associated with a quarter of the possible DSS.  If there
+ * are no DSS present in one of the four quadrants of the DSS space, the
+ * corresponding CCS engine is also not available for use.
+ *
+ * Returns false if all DSS in a quadrant of the GT are fused off, else true.
+ */
+bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	xe_dss_mask_t all_dss;
+	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
+
+	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+		  XE_MAX_DSS_FUSE_BITS);
+
+	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
+	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
+
+	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
+
+	return quad_first < (quad + 1) * dss_per_quad;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index b2540dc266f21..f47ab1b1269c7 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -17,4 +17,7 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
 unsigned int
 xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
 
+bool
+xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
+
 #endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index abfd35491b478..63a4efd5edccd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -436,13 +436,40 @@ static void read_copy_fuses(struct xe_gt *gt)
 	}
 }
 
+static void read_compute_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	/*
+	 * CCS fusing based on DSS masks only applies to platforms that can
+	 * have more than one CCS.
+	 */
+	if (hweight64(gt->info.engine_mask &
+		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
+		return;
+
+	/*
+	 * CCS availability on Xe_HP is inferred from the presence of DSS in
+	 * each quadrant.
+	 */
+	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "ccs%u fused off\n", j);
+		}
+	}
+}
+
 int xe_hw_engines_init_early(struct xe_gt *gt)
 {
 	int i;
 
 	read_media_fuses(gt);
 	read_copy_fuses(gt);
-	/* TODO: compute engines */
+	read_compute_fuses(gt);
 
 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
-- 
GitLab


From 4b1430f77553ca3e4f9033d4d614b193da233a30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 8 Mar 2023 19:49:22 +0100
Subject: [PATCH 1377/2340] drm/xe/vm: Use the correct vma destroy sequence on
 userptr failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the below warning by using the correct vma destroy sequence:

[   92.204921] ------------[ cut here ]------------
[   92.204954] WARNING: CPU: 3 PID: 2449 at drivers/gpu/drm/xe/xe_vm.c:933 xe_vma_destroy+0x280/0x290 [xe]
[   92.205002] Modules linked in: ccm nft_objref cmac nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_raw iptable_security ip_set nf_tables nfnetlink ip6table_filter iptable_filter bnep sunrpc vfat fat iwlmvm mac80211 intel_rapl_msr ee1004 ppdev intel_rapl_common snd_hda_codec_realtek libarc4 iTCO_wdt snd_hda_codec_generic intel_pmc_bxt x86_pkg_temp_thermal iTCO_vendor_support intel_powerclamp coretemp intel_cstate iwlwifi btusb btrtl btbcm snd_hda_intel btintel snd_intel_dspcfg eeepc_wmi snd_hda_codec asus_wmi bluetooth snd_hwdep snd_seq ledtrig_audio snd_hda_core snd_seq_device sparse_keymap cfg80211 snd_pcm intel_uncore joydev platform_profile mei_me wmi_bmof intel_wmi_thunderbolt snd_timer pcspkr ecdh_generic i2c_i801 snd
[   92.205060]  ecc mei rfkill soundcore idma64 i2c_smbus parport_pc parport acpi_pad acpi_tad xe drm_ttm_helper ttm i2c_algo_bit drm_suballoc_helper kunit drm_buddy gpu_sched drm_display_helper drm_kms_helper drm crct10dif_pclmul crc32_pclmul crc32c_intel nvme nvme_core e1000e ghash_clmulni_intel drm_panel_orientation_quirks video wmi pinctrl_tigerlake usb_storage ip6_tables ip_tables fuse
[   92.205242] CPU: 3 PID: 2449 Comm: xe_vm Tainted: G     U             6.1.0+ #120
[   92.205254] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 0403 01/26/2021
[   92.205266] RIP: 0010:xe_vma_destroy+0x280/0x290 [xe]
[   92.205299] Code: 74 15 48 8b 93 a0 01 00 00 48 8b 83 a8 01 00 00 48 89 42 08 48 89 10 4c 89 ab a0 01 00 00 4c 89 ab a8 01 00 00 e9 1b fe ff ff <0f> 0b e9 a3 fe ff ff 0f 0b e9 82 fe ff ff 66 90 0f 1f 44 00 00 48
[   92.205322] RSP: 0018:ffffaadd465c3a58 EFLAGS: 00010246
[   92.205331] RAX: 0000000000000000 RBX: ffff9706d53ed400 RCX: 0000000000000001
[   92.205341] RDX: ffff9706d53ed480 RSI: ffffffffa756dc2b RDI: ffffffffa760a05e
[   92.205351] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000002c5370a2
[   92.205361] R10: ffff9706ca520000 R11: 0000000022c5370a R12: ffff9706cad03800
[   92.205370] R13: 000000000004ffff R14: fffffffffffffff2 R15: 0000000000000000
[   92.205380] FS:  00007fe98203a940(0000) GS:ffff970dffac0000(0000) knlGS:0000000000000000
[   92.205392] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   92.205400] CR2: 00007fe982ccb000 CR3: 000000010d6e6003 CR4: 0000000000770ee0
[   92.205410] PKRU: 55555554
[   92.205415] Call Trace:
[   92.205419]  <TASK>
[   92.205426]  vm_bind_ioctl_lookup_vma+0x9bb/0xbf0 [xe]
[   92.205461]  ? lock_is_held_type+0xe3/0x140
[   92.205472]  ? xe_vm_find_overlapping_vma+0x77/0x90 [xe]
[   92.205503]  ? __vm_bind_ioctl_lookup_vma.constprop.0+0x9e/0xe0 [xe]
[   92.205533]  ? __lock_acquire+0x3a3/0x1fb0
[   92.205543]  ? register_lock_class+0x38/0x480
[   92.205550]  ? __lock_acquire+0x3a3/0x1fb0
[   92.205558]  ? __lock_acquire+0x3a3/0x1fb0
[   92.205567]  ? __lock_acquire+0x3a3/0x1fb0
[   92.205579]  ? lock_acquire+0xbf/0x2b0
[   92.205586]  ? lock_acquire+0xcf/0x2b0
[   92.205597]  xe_vm_bind_ioctl+0x977/0x1c30 [xe]
[   92.205630]  ? find_held_lock+0x2b/0x80
[   92.205640]  ? lock_release+0x131/0x2c0
[   92.205648]  ? xe_vm_ttm_bo+0x40/0x40 [xe]
[   92.205677]  drm_ioctl_kernel+0xa1/0x150 [drm]
[   92.205706]  drm_ioctl+0x221/0x420 [drm]
[   92.205727]  ? xe_vm_ttm_bo+0x40/0x40 [xe]
[   92.205764]  __x64_sys_ioctl+0x8d/0xd0
[   92.205774]  do_syscall_64+0x37/0x90
[   92.205781]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[   92.205790] RIP: 0033:0x7fe982be8d6f
[   92.205797] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00
[   92.205821] RSP: 002b:00007ffde9f9c560 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[   92.205832] RAX: ffffffffffffffda RBX: 00007fadeadbe000 RCX: 00007fe982be8d6f
[   92.205842] RDX: 00007ffde9f9c5f0 RSI: 0000000040786445 RDI: 0000000000000003
[   92.205851] RBP: 00007ffde9f9c5f0 R08: 00007fadeadbe000 R09: 0000000000040000
[   92.205861] R10: 0000000000000003 R11: 0000000000000246 R12: 0000000040786445
[   92.205871] R13: 0000000000000003 R14: 0000000000000003 R15: 00007fe982e02000
[   92.205888]  </TASK>
[   92.205892] irq event stamp: 82723
[   92.205897] hardirqs last  enabled at (82731): [<ffffffffa617660e>] __up_console_sem+0x5e/0x70
[   92.205910] hardirqs last disabled at (82738): [<ffffffffa61765f3>] __up_console_sem+0x43/0x70
[   92.205922] softirqs last  enabled at (82182): [<ffffffffa60f026d>] __irq_exit_rcu+0xed/0x160
[   92.205935] softirqs last disabled at (82163): [<ffffffffa60f026d>] __irq_exit_rcu+0xed/0x160
[   92.205947] ---[ end trace 0000000000000000 ]---

Reported-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a8254a4148f70..6cc3204adaa8a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2835,7 +2835,8 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 
 		err = xe_vma_userptr_pin_pages(vma);
 		if (err) {
-			xe_vma_destroy(vma, NULL);
+			prep_vma_destroy(vm, vma);
+			xe_vma_destroy_unlocked(vma);
 
 			return ERR_PTR(err);
 		} else {
-- 
GitLab


From e84535d86043af8fc9edcbbeb00f2e47e8ccb130 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 6 Mar 2023 16:40:27 -0800
Subject: [PATCH 1378/2340] drm/xe/mcr: Add L3BANK steering for DG2

Some register ranges with replication type L3BANK were missing from the
driver table. The following warning was triggering when adding a
workaround touching the register 0xb188:

	xe 0000:03:00.0: Did not find MCR register 0xb188 in any MCR steering table

Add the L3BANK ranges according to the spec.

v2:
  - Fix typo in one of the ranges: s/0x00BCFF/0x008CFF/ (Matt Roper)
  - Add termination rule in the init function for L3BANK (Matt Roper)

Bspec: 66534
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 10eff02cc7db1..ab8fc649ba52e 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -47,6 +47,12 @@ static const struct xe_mmio_range xelp_l3bank_steering_table[] = {
 	{},
 };
 
+static const struct xe_mmio_range xehp_l3bank_steering_table[] = {
+	{ 0x008C80, 0x008CFF },
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
 /*
  * Although the bspec lists more "MSLICE" ranges than shown here, some of those
  * are of a "GAM" subclass that has special rules and doesn't need to be
@@ -180,6 +186,18 @@ static void init_steering_l3bank(struct xe_gt *gt)
 		gt->steering[L3BANK].group_target = __ffs(mslice_mask);
 		gt->steering[L3BANK].instance_target =
 			bank_mask & BIT(0) ? 0 : 2;
+	} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
+		u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+						xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+		u32 bank = __ffs(mslice_mask) * 8;
+
+		/*
+		 * Like mslice registers, look for a valid mslice and steer to
+		 * the first L3BANK of that quad. Access to the Nth L3 bank is
+		 * split between the first bits of group and instance
+		 */
+		gt->steering[L3BANK].group_target = (bank >> 2) & 0x7;
+		gt->steering[L3BANK].instance_target = bank & 0x3;
 	} else {
 		u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK,
 					 ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
@@ -277,6 +295,7 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 		gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
 		gt->steering[DSS].ranges = xehpc_dss_steering_table;
 	} else if (xe->info.platform == XE_DG2) {
+		gt->steering[L3BANK].ranges = xehp_l3bank_steering_table;
 		gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
 		gt->steering[LNCF].ranges = xehp_lncf_steering_table;
 		gt->steering[DSS].ranges = xehp_dss_steering_table;
-- 
GitLab


From 6b980aa88d403db3e4cf5b58965dfa9a5f27c740 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 8 Mar 2023 18:18:57 -0800
Subject: [PATCH 1379/2340] drm/xe/mcr: Document how to initialize
 group/instance

Add a sentence about the initialization so it's clear for newcomers how
to tweak the init functions for new platforms.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index ab8fc649ba52e..909059112179a 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -34,7 +34,10 @@
  * fused off or currently powered down due to power gating, the MMIO operation
  * is "terminated" by the hardware.  Terminated read operations will return a
  * value of zero and terminated unicast write operations will be silently
- * ignored.
+ * ignored. During device initialization, the goal of the various
+ * ``init_steering_*()`` functions is to apply the platform-specific rules for
+ * each MCR register type to identify a steering target that will select a
+ * non-terminated instance.
  */
 
 enum {
-- 
GitLab


From 8846ffb457587e5d393a83ce977c3db7c800fe58 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 3 Mar 2023 22:26:55 -0800
Subject: [PATCH 1380/2340] drm/xe: Allow const propagation in gt_to_xe()

Replace the inline function with a _Generic() so gt_to_xe() can work
with a const struct xe_gt*, which leads to a const struct xe *.
This allows a const gt being passed around and when the xe device is
needed, compiler won't issue a warning that calling gt_to_xe() would
discard the const. Rather, just propagate the const to the xe pointer
being returned.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 5635f28031704..086369f7ee6df 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -49,10 +49,10 @@ static inline bool xe_gt_is_media_type(struct xe_gt *gt)
 	return gt->info.type == XE_GT_TYPE_MEDIA;
 }
 
-static inline struct xe_device * gt_to_xe(struct xe_gt *gt)
-{
-	return gt->xe;
-}
+#define gt_to_xe(gt__)								\
+	_Generic(gt__,								\
+		 const struct xe_gt *: (const struct xe_device *)((gt__)->xe),	\
+		 struct xe_gt *: (gt__)->xe)
 
 static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
 {
-- 
GitLab


From 1415283befa0e47df1270d10356a074793664757 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 3 Mar 2023 22:30:04 -0800
Subject: [PATCH 1381/2340] drm/xe: Constify xe_dss_mask_group_ffs()

Due to how xe_dss_mask_t is implemented, the type is a pointer. Since
this is only used for looking up the bits, make it const so it can be
used together with a const gt passed around.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_topology.c | 2 +-
 drivers/gpu/drm/xe/xe_gt_topology.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index f2cbee53462b1..14cf135fd648a 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -123,7 +123,7 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
  * groupsize and groupnum are non-zero.
  */
 unsigned int
-xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum)
+xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
 {
 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index f47ab1b1269c7..5f35deed9128e 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -15,7 +15,7 @@ void xe_gt_topology_init(struct xe_gt *gt);
 void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
 
 unsigned int
-xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
+xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
 
 bool
 xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
-- 
GitLab


From 4c128558fe16b77013a251bcc3af8caa77fb7732 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 1 Mar 2023 01:31:06 -0800
Subject: [PATCH 1382/2340] drm/xe/rtp: Move match function from wa to rtp

Match functions are generally useful for other parts of the code (e.g.
xe_tuning.c). Move and rename the single one available to create a place
where similar match functions can be added.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c |  6 ++++++
 drivers/gpu/drm/xe/xe_rtp.h | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_wa.c  |  8 +-------
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 5b1316b588d81..c04eca290ef03 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -154,3 +154,9 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 		}
 	}
 }
+
+bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+				const struct xe_hw_engine *hwe)
+{
+	return hwe->instance % 2 == 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index bd44fd8bbe057..9bd2532442ed4 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -397,4 +397,16 @@ struct xe_reg_sr;
 void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 		    struct xe_gt *gt, struct xe_hw_engine *hwe);
 
+/* Match functions to be used with XE_RTP_MATCH_FUNC */
+
+/**
+ * xe_rtp_match_even_instance - Match if engine instance is even
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if engine instance is even, false otherwise
+ */
+bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+				const struct xe_hw_engine *hwe);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 03c5b01a14e40..67539f9d70b4e 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -91,17 +91,11 @@
 #define _MMIO(x)	_XE_RTP_REG(x)
 #define MCR_REG(x)	_XE_RTP_MCR_REG(x)
 
-static bool match_14011060649(const struct xe_gt *gt,
-			      const struct xe_hw_engine *hwe)
-{
-	return hwe->instance % 2 == 0;
-}
-
 static const struct xe_rtp_entry gt_was[] = {
 	{ XE_RTP_NAME("14011060649"),
 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
 		       ENGINE_CLASS(VIDEO_DECODE),
-		       FUNC(match_14011060649)),
+		       FUNC(xe_rtp_match_even_instance)),
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
-- 
GitLab


From 043790f3edb554f8db3e841fd17a33b622bc2b31 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 1 Mar 2023 01:31:07 -0800
Subject: [PATCH 1383/2340] drm/xe/rtp: Add match for render reset domain

This allows to create WA/tuning rules that match the first engine that
is either of compute or render class. This matters for platforms that
don't have a render engine and that may have arbitrary compute engines
fused off: some register programming need to be added to one of those
engines.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_types.h |  5 +++++
 drivers/gpu/drm/xe/xe_rtp.c             | 10 ++++++++++
 drivers/gpu/drm/xe/xe_rtp.h             | 18 ++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 05a2fdc381d75..2c40384957da3 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -23,6 +23,7 @@ enum xe_engine_class {
 
 enum xe_hw_engine_id {
 	XE_HW_ENGINE_RCS0,
+#define XE_HW_ENGINE_RCS_MASK	GENMASK_ULL(XE_HW_ENGINE_RCS0, XE_HW_ENGINE_RCS0)
 	XE_HW_ENGINE_BCS0,
 	XE_HW_ENGINE_BCS1,
 	XE_HW_ENGINE_BCS2,
@@ -32,6 +33,7 @@ enum xe_hw_engine_id {
 	XE_HW_ENGINE_BCS6,
 	XE_HW_ENGINE_BCS7,
 	XE_HW_ENGINE_BCS8,
+#define XE_HW_ENGINE_BCS_MASK	GENMASK_ULL(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS0)
 	XE_HW_ENGINE_VCS0,
 	XE_HW_ENGINE_VCS1,
 	XE_HW_ENGINE_VCS2,
@@ -40,14 +42,17 @@ enum xe_hw_engine_id {
 	XE_HW_ENGINE_VCS5,
 	XE_HW_ENGINE_VCS6,
 	XE_HW_ENGINE_VCS7,
+#define XE_HW_ENGINE_VCS_MASK	GENMASK_ULL(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0)
 	XE_HW_ENGINE_VECS0,
 	XE_HW_ENGINE_VECS1,
 	XE_HW_ENGINE_VECS2,
 	XE_HW_ENGINE_VECS3,
+#define XE_HW_ENGINE_VECS_MASK	GENMASK_ULL(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0)
 	XE_HW_ENGINE_CCS0,
 	XE_HW_ENGINE_CCS1,
 	XE_HW_ENGINE_CCS2,
 	XE_HW_ENGINE_CCS3,
+#define XE_HW_ENGINE_CCS_MASK	GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
 	XE_NUM_HW_ENGINES,
 };
 
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index c04eca290ef03..0d2f51bb06e83 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -160,3 +160,13 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt,
 {
 	return hwe->instance % 2 == 0;
 }
+
+bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+					  const struct xe_hw_engine *hwe)
+{
+	u64 render_compute_mask = gt->info.engine_mask &
+		(XE_HW_ENGINE_CCS_MASK | XE_HW_ENGINE_RCS_MASK);
+
+	return render_compute_mask &&
+		hwe->engine_id == __ffs(render_compute_mask);
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 9bd2532442ed4..433f0cbff57f7 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -409,4 +409,22 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 bool xe_rtp_match_even_instance(const struct xe_gt *gt,
 				const struct xe_hw_engine *hwe);
 
+/*
+ * xe_rtp_match_first_render_or_compute - Match if it's first render or compute
+ * engine in the GT
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Registers on the render reset domain need to have their values re-applied
+ * when any of those engines are reset. Since the engines reset together, a
+ * programming can be set to just one of them. For simplicity the first engine
+ * of either render or compute class can be chosen.
+ *
+ * Returns: true if engine id is the first to match the render reset domain,
+ * false otherwise.
+ */
+bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+					  const struct xe_hw_engine *hwe);
+
 #endif
-- 
GitLab


From f647eff1725430dd835ac05a9f8f1661e2765f8e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 1 Mar 2023 01:31:08 -0800
Subject: [PATCH 1384/2340] drm/xe: Remove dump function from reg_sr

The dump function was originally added with the idea that it could be
re-used both for printing the reg-sr data and saving it to pass to GuC
via ADS. This was not used by the GuC integration, so remove it now to
give place to a new debug.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c       | 26 --------------------------
 drivers/gpu/drm/xe/xe_reg_sr.h       |  2 --
 drivers/gpu/drm/xe/xe_reg_sr_types.h |  5 -----
 3 files changed, 33 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index d675164697108..b2370241ebf95 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -43,32 +43,6 @@ int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
 	return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
 }
 
-int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
-		      struct xe_reg_sr_kv **dst)
-{
-	struct xe_reg_sr_kv *iter;
-	struct xe_reg_sr_entry *entry;
-	unsigned long idx;
-
-	if (xa_empty(&sr->xa)) {
-		*dst = NULL;
-		return 0;
-	}
-
-	*dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL);
-	if (!*dst)
-		return -ENOMEM;
-
-	iter = *dst;
-	xa_for_each(&sr->xa, idx, entry) {
-		iter->k = idx;
-		iter->v = *entry;
-		iter++;
-	}
-
-	return 0;
-}
-
 static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
 {
 	if (sr->pool.used == sr->pool.allocated) {
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index c3a9db251e925..9f47230c8ddcd 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -16,8 +16,6 @@ struct xe_device;
 struct xe_gt;
 
 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
-int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
-		      struct xe_reg_sr_kv **dst);
 
 int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
 		  const struct xe_reg_sr_entry *e);
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
index 0e6d542ff1b4e..3d22578910051 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr_types.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -23,11 +23,6 @@ struct xe_reg_sr_entry {
 	u8		reg_type;
 };
 
-struct xe_reg_sr_kv {
-	u32			k;
-	struct xe_reg_sr_entry	v;
-};
-
 struct xe_reg_sr {
 	struct {
 		struct xe_reg_sr_entry *arr;
-- 
GitLab


From 766849c4accad67f8affa37c580d44f48be193b6 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 1 Mar 2023 01:31:09 -0800
Subject: [PATCH 1385/2340] drm/xe: Name LRC wa after the engine it belongs

This makes it easier when printing the register-save-restore values
to know what is the engine.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 343370b44506e..daa433d0f2f55 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -318,7 +318,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		if (gt->default_lrc[hwe->class])
 			continue;
 
-		xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe);
+		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
 		xe_wa_process_lrc(hwe);
 		xe_tuning_process_lrc(hwe);
 
-- 
GitLab


From 91ed180b419a1b2ccf9cc41999cb87eb9805fa38 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 1 Mar 2023 01:31:12 -0800
Subject: [PATCH 1386/2340] drm/xe/pvc: Remove A* steppings

The PVC pre-production A* steppings are not going to be supported in xe
driver - the steppings are important for the WAs and since we are not
adding the pre-productions ones, there is no need to add the stepping.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_step.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index 14f482f29ae44..ee927dfd3eb37 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -79,14 +79,12 @@ static const struct xe_step_info dg2_g12_revid_step_tbl[] = {
 };
 
 static const struct xe_step_info pvc_revid_step_tbl[] = {
-	[0x3] = { .graphics = STEP_A0 },
 	[0x5] = { .graphics = STEP_B0 },
 	[0x6] = { .graphics = STEP_B1 },
 	[0x7] = { .graphics = STEP_C0 },
 };
 
 static const int pvc_basedie_subids[] = {
-	[0x0] = STEP_A0,
 	[0x3] = STEP_B0,
 	[0x4] = STEP_B1,
 	[0x5] = STEP_B3,
-- 
GitLab


From 6d4f49b7dec3126c6d5491bcea5ae815b025d042 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 9 Mar 2023 14:17:46 +0200
Subject: [PATCH 1387/2340] drm/xe: make compound literal initialization const

Be careful about having const in the compound literal initialization to
keep the initializers in rodata. Here, the impact is 1.8k of mutable
data moved to rodata.

add/remove: 0/1 grow/shrink: 0/0 up/down: 0/-1804 (-1804)
Data                                         old     new   delta
__compound_literal                          1804       -   -1804
Total: Before=42425, After=40621, chg -4.25%
add/remove: 0/0 grow/shrink: 1/0 up/down: 1804/0 (1804)
RO Data                                      old     new   delta
__compound_literal                          7696    9500   +1804
Total: Before=138535, After=140339, chg +1.30%

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230309121746.479146-1-jani.nikula@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 433f0cbff57f7..ac983ce93684d 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -363,7 +363,7 @@ struct xe_reg_sr;
  */
 #define XE_RTP_RULES(r1, ...)							\
 	.n_rules = COUNT_ARGS(r1, ##__VA_ARGS__),				\
-	.rules = (struct xe_rtp_rule[]) {					\
+	.rules = (const struct xe_rtp_rule[]) {					\
 		CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__)	\
 	}
 
@@ -390,7 +390,7 @@ struct xe_reg_sr;
  */
 #define XE_RTP_ACTIONS(a1, ...)							\
 	.n_actions = COUNT_ARGS(a1, ##__VA_ARGS__),				\
-	.actions = (struct xe_rtp_action[]) {					\
+	.actions = (const struct xe_rtp_action[]) {				\
 		CALL_FOR_EACH(__ADD_XE_RTP_ACTION_PREFIX, a1, ##__VA_ARGS__)	\
 	}
 
-- 
GitLab


From 11823d48abce17d45e7e8c9bd525203f0096c6e8 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Thu, 9 Mar 2023 18:48:56 +0530
Subject: [PATCH 1388/2340] drm/xe: Fix overflow in vram manager

The overflow caused xe_bo_restore_kernel to return an error
Fix overflow in vram manager alloc function.

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 643365b18bc7f..159ca7105df10 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -118,7 +118,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 
 		cur_size = size;
 
-		if (fpfn + size != place->lpfn << PAGE_SHIFT) {
+		if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
 			/*
 			 * Except for actual range allocation, modify the size and
 			 * min_block_size conforming to continuous flag enablement
-- 
GitLab


From ddad061e8fbcba69bbdd9ee05b1749810c419920 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 14 Mar 2023 08:58:37 +0000
Subject: [PATCH 1389/2340] drm/xe: one more s/lmem/vram/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks to have been introduced in some very recent changes, in-between
merging the driver wide s/lmem/vram/.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 1116e217ebc29..27cc31f022a5a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -90,9 +90,9 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 
 	/*
 	 * There may be few KB of platform dependent reserved memory at the end
-	 * of lmem which is not part of the DSM. Such reserved memory portion is
+	 * of vram which is not part of the DSM. Such reserved memory portion is
 	 * always less then DSM granularity so align down the stolen_size to DSM
-	 * granularity to accommodate such reserve lmem portion.
+	 * granularity to accommodate such reserve vram portion.
 	 */
 	return ALIGN_DOWN(stolen_size, SZ_1M);
 }
-- 
GitLab


From 69db25e447b8a3b9153db8a9004c50b080d0497e Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 14 Mar 2023 08:58:38 +0000
Subject: [PATCH 1390/2340] drm/xe: add xe_ttm_stolen_cpu_access_needs_ggtt()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xe_ttm_stolen_cpu_inaccessible() was originally meant to just cover the
case where stolen is not directly CPU accessible on some older
integrated platforms, and as such a GGTT mapping was also required for
CPU access (as per the check in xe_bo_create_pin_map_at()).

However with small-bar systems on dgfx we have one more case where
stolen is also inaccessible, however here we don't have any fallback
GGTT mode for CPU access. Fix the check in xe_bo_create_pin_map_at() to
make this distinction clear. In such a case the later vmap() will fail
anyway.

v2: fix kernel-doc warning
v3: Simplify further and remove cpu_inaccessible()

Suggested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c             |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 39 ++++++++------------------
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h |  2 +-
 3 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index cfb79519b6731..5e309b26f75c5 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1160,7 +1160,7 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
 	u64 end = offset == ~0ull ? offset : start + size;
 
 	if (flags & XE_BO_CREATE_STOLEN_BIT &&
-	    xe_ttm_stolen_cpu_inaccessible(xe))
+	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		flags |= XE_BO_CREATE_GGTT_BIT;
 
 	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 27cc31f022a5a..9629b1a677f21 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -38,32 +38,17 @@ to_stolen_mgr(struct ttm_resource_manager *man)
 }
 
 /**
- * xe_ttm_stolen_cpu_inaccessible - Can we directly CPU access stolen memory for
- * this device.
+ * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
+ * stolen, can we then fallback to mapping through the GGTT.
  * @xe: xe device
  *
- * On some integrated platforms we can't directly access stolen via the CPU
- * (like some normal system memory).  Also on small-bar systems for discrete,
- * since stolen is always as the end of normal VRAM, and the BAR likely doesn't
- * stretch that far. However CPU access of stolen is generally rare, and at
- * least on discrete should not be needed.
- *
- * If this is indeed inaccessible then we fallback to using the GGTT mappable
- * aperture for CPU access. On discrete platforms we have no such thing, so when
- * later attempting to CPU map the memory an error is instead thrown.
+ * Some older integrated platforms don't support reliable CPU access for stolen,
+ * however on such hardware we can always use the mappable part of the GGTT for
+ * CPU access. Check if that's the case for this device.
  */
-bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe)
+bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
 {
-	struct ttm_resource_manager *ttm_mgr =
-		ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
-	struct xe_ttm_stolen_mgr *mgr;
-
-	if (!ttm_mgr)
-		return true;
-
-	mgr = to_stolen_mgr(ttm_mgr);
-
-	return !mgr->io_base || GRAPHICS_VERx100(xe) < 1270;
+	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
 }
 
 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
@@ -178,7 +163,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
 		    stolen_size);
 
-	if (!xe_ttm_stolen_cpu_inaccessible(xe))
+	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size);
 }
 
@@ -191,7 +176,7 @@ u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
 
 	XE_BUG_ON(!mgr->io_base);
 
-	if (!IS_DGFX(xe) && xe_ttm_stolen_cpu_inaccessible(xe))
+	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
 
 	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
@@ -257,10 +242,10 @@ int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
 	if (!mgr || !mgr->io_base)
 		return -EIO;
 
-	if (!xe_ttm_stolen_cpu_inaccessible(xe))
-		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
-	else
+	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
+	else
+		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
 }
 
 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
index 2fda97b97a053..1777245ff8101 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
@@ -14,7 +14,7 @@ struct xe_device;
 
 void xe_ttm_stolen_mgr_init(struct xe_device *xe);
 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem);
-bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe);
+bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe);
 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset);
 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe);
 
-- 
GitLab


From 143800547b96dfc56d1f50a135c367fbfd40fd5d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:29:59 -0700
Subject: [PATCH 1391/2340] drm/xe/rtp: Add match helper for gslice fused off

Add match helper to detect when the first gslice is fused off, as needed
by future workarounds.

v2:
  - Add warning if called on a platform without geometry pipeline
    (Matt Roper)
  - Hardcode 4 as the number of gslices, which matches all the currently
    supported platforms. PVC doesn't have geometry pipeline and
    shouldn't use this function (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-2-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c | 16 ++++++++++++++++
 drivers/gpu/drm/xe/xe_rtp.h | 11 +++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 0d2f51bb06e83..cb9dd894547d8 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -8,6 +8,7 @@
 #include <drm/xe_drm.h>
 
 #include "xe_gt.h"
+#include "xe_gt_topology.h"
 #include "xe_macros.h"
 #include "xe_reg_sr.h"
 
@@ -170,3 +171,18 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
 	return render_compute_mask &&
 		hwe->engine_id == __ffs(render_compute_mask);
 }
+
+bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
+					 const struct xe_hw_engine *hwe)
+{
+	unsigned int dss_per_gslice = 4;
+	unsigned int dss;
+
+	if (drm_WARN(&gt_to_xe(gt)->drm, !gt->fuse_topo.g_dss_mask,
+		     "Checking gslice for platform without geometry pipeline\n"))
+		return false;
+
+	dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0);
+
+	return dss >= dss_per_gslice;
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index ac983ce93684d..a3be7c77753a1 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -427,4 +427,15 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt,
 bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
 					  const struct xe_hw_engine *hwe);
 
+/*
+ * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if first gslice is fused off, false otherwise.
+ */
+bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
+					 const struct xe_hw_engine *hwe);
+
 #endif
-- 
GitLab


From 5be84050ddce298503e7290d375b6dcf3ce920d2 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:00 -0700
Subject: [PATCH 1392/2340] drm/xe/reg_sr: Tweak verbosity for register
 printing

If there is no register to save-restore or whitelist, just return. This
drops some noise from the log, particurlarly for platforms with several
engines like PVC:

	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs0 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs0 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs1 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs1 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs2 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs2 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs5 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs5 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs6 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs6 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs7 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs7 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying bcs8 save-restore MMIOs
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs8 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying ccs0 save-restore MMIOs
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0x20e4] = 0x00008000
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0xb01c] = 0x00000001
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0xe48c] = 0x00000800
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0xe7c8] = 0x40000000
	...

On a PVC system it should show something like below. Whitelist calls
are still there since they aren't actually empty - driver just doesn't
print each individual entry. This will be fixed in future.

	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs0 registers
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs1 registers
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs2 registers
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs5 registers
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs6 registers
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs7 registers
	[drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs8 registers
	[drm:xe_reg_sr_apply_mmio [xe]] Applying ccs0 save-restore MMIOs
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0x20e4] = 0x00008000
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0xb01c] = 0x00000001
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0xe48c] = 0x00000800
	[drm:xe_reg_sr_apply_mmio [xe]] REG[0xe7c8] = 0x40000000

v2: Only tweak log verbosity, leave the whitelist printout for later
    since decoding the whitelist is more complex.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-3-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index b2370241ebf95..3d041c9330df0 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -167,6 +167,9 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
 	unsigned long reg;
 	int err;
 
+	if (xa_empty(&sr->xa))
+		return;
+
 	drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name);
 
 	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
@@ -194,6 +197,9 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 	unsigned int slot = 0;
 	int err;
 
+	if (xa_empty(&sr->xa))
+		return;
+
 	drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
 
 	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
-- 
GitLab


From d855d2246ea6b04cbda372846b21c040fb068575 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:01 -0700
Subject: [PATCH 1393/2340] drm/xe: Print whitelist while applying

Besides printing the various register save-restore, it's also useful to
know the register being allowed/denied access from unprivileged batch
buffers. Print them during device probe.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-4-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c        |  4 +++
 drivers/gpu/drm/xe/xe_reg_whitelist.c | 41 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_reg_whitelist.h |  7 +++++
 3 files changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 3d041c9330df0..ef12de48ab73c 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -20,6 +20,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
+#include "xe_reg_whitelist.h"
 #include "xe_rtp_types.h"
 
 #define XE_REG_SR_GROW_STEP_DEFAULT	16
@@ -193,6 +194,7 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_reg_sr_entry *entry;
+	struct drm_printer p;
 	unsigned long reg;
 	unsigned int slot = 0;
 	int err;
@@ -206,7 +208,9 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 	if (err)
 		goto err_force_wake;
 
+	p = drm_debug_printer(KBUILD_MODNAME);
 	xa_for_each(&sr->xa, reg, entry) {
+		xe_reg_whitelist_print_entry(&p, 0, reg, entry);
 		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
 				reg | entry->set_bits);
 		slot++;
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 2dd10e62718f1..6c8577e8dba60 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -67,3 +67,44 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
 {
 	xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
 }
+
+/**
+ * xe_reg_whitelist_print_entry - print one whitelist entry
+ * @p: DRM printer
+ * @indent: indent level
+ * @reg: register allowed/denied
+ * @entry: save-restore entry
+ *
+ * Print details about the entry added to allow/deny access
+ */
+void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
+				  u32 reg, struct xe_reg_sr_entry *entry)
+{
+	u32 val = entry->set_bits;
+	const char *access_str = "(invalid)";
+	unsigned range_bit = 2;
+	u32 range_start, range_end;
+	bool deny;
+
+	deny = val & RING_FORCE_TO_NONPRIV_DENY;
+
+	switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) {
+	case RING_FORCE_TO_NONPRIV_RANGE_4: range_bit = 4; break;
+	case RING_FORCE_TO_NONPRIV_RANGE_16: range_bit = 6; break;
+	case RING_FORCE_TO_NONPRIV_RANGE_64: range_bit = 8; break;
+	}
+
+	range_start = reg & REG_GENMASK(25, range_bit);
+	range_end = range_start | REG_GENMASK(range_bit, 0);
+
+	switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
+	case RING_FORCE_TO_NONPRIV_ACCESS_RW: access_str = "rw"; break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_RD: access_str = "read"; break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_WR: access_str = "write"; break;
+	}
+
+	drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n",
+			  range_start, range_end,
+			  deny ? "deny" : "allow",
+			  access_str);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
index 6e861b1bdb01e..c76d81c528da2 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.h
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -6,8 +6,15 @@
 #ifndef _XE_REG_WHITELIST_
 #define _XE_REG_WHITELIST_
 
+#include <linux/types.h>
+
+struct drm_printer;
 struct xe_hw_engine;
+struct xe_reg_sr_entry;
 
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
 
+void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
+				  u32 reg, struct xe_reg_sr_entry *entry);
+
 #endif
-- 
GitLab


From 6647e2fe23f595dc46780b7cc26be872ca168643 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:02 -0700
Subject: [PATCH 1394/2340] drm/xe/debugfs: Dump register save-restore tables

Add debugfs entry to dump the final tables with register save-restore
information.

For the workarounds, this has a format a little bit different than when the
values are applied because we don't want to read the values from the HW
when dumping via debugfs. For whitelist it just re-uses the print
function added for when the whitelist is being built.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-5-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c    | 30 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_reg_sr.c        | 21 +++++++++++++++++++
 drivers/gpu/drm/xe/xe_reg_sr.h        |  2 ++
 drivers/gpu/drm/xe/xe_reg_whitelist.c | 18 ++++++++++++++++
 drivers/gpu/drm/xe/xe_reg_whitelist.h |  3 +++
 5 files changed, 74 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 78942e12e76cb..9fab8017490f2 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -16,6 +16,8 @@
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
 #include "xe_macros.h"
+#include "xe_reg_sr.h"
+#include "xe_reg_whitelist.h"
 #include "xe_uc_debugfs.h"
 
 static struct xe_gt *node_to_gt(struct drm_info_node *node)
@@ -98,6 +100,33 @@ static int ggtt(struct seq_file *m, void *data)
 	return xe_ggtt_dump(gt->mem.ggtt, &p);
 }
 
+static int register_save_restore(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	xe_reg_sr_dump(&gt->reg_sr, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "Engine\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_sr_dump(&hwe->reg_sr, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "LRC\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_sr_dump(&hwe->reg_lrc, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "Whitelist\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_whitelist_dump(&hwe->reg_whitelist, &p);
+
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
@@ -105,6 +134,7 @@ static const struct drm_info_list debugfs_list[] = {
 	{"topology", topology, 0},
 	{"steering", steering, 0},
 	{"ggtt", ggtt, 0},
+	{"register-save-restore", register_save_restore, 0},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index ef12de48ab73c..37ae8412cb006 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -229,3 +229,24 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 err_force_wake:
 	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
 }
+
+/**
+ * xe_reg_sr_dump - print all save/restore entries
+ * @sr: Save/restore entries
+ * @p: DRM printer
+ */
+void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+
+	if (!sr->name || xa_empty(&sr->xa))
+		return;
+
+	drm_printf(p, "%s\n", sr->name);
+	xa_for_each(&sr->xa, reg, entry)
+		drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n",
+			   reg, entry->clr_bits, entry->set_bits,
+			   str_yes_no(entry->masked_reg),
+			   str_yes_no(entry->reg_type == XE_RTP_REG_MCR));
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index 9f47230c8ddcd..3af369089faab 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -14,8 +14,10 @@
 
 struct xe_device;
 struct xe_gt;
+struct drm_printer;
 
 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
+void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p);
 
 int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
 		  const struct xe_reg_sr_entry *e);
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 6c8577e8dba60..c4b3a20452996 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -108,3 +108,21 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
 			  deny ? "deny" : "allow",
 			  access_str);
 }
+
+/**
+ * xe_reg_whitelist_dump - print all whitelist entries
+ * @sr: Save/restore entries
+ * @p: DRM printer
+ */
+void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+
+	if (!sr->name || xa_empty(&sr->xa))
+		return;
+
+	drm_printf(p, "%s\n", sr->name);
+	xa_for_each(&sr->xa, reg, entry)
+		xe_reg_whitelist_print_entry(p, 1, reg, entry);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
index c76d81c528da2..69b121d377da0 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.h
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -10,6 +10,7 @@
 
 struct drm_printer;
 struct xe_hw_engine;
+struct xe_reg_sr;
 struct xe_reg_sr_entry;
 
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
@@ -17,4 +18,6 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
 void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
 				  u32 reg, struct xe_reg_sr_entry *entry);
 
+void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p);
+
 #endif
-- 
GitLab


From 6b5ccd6360e29e67a760f82d0b28cf7c058732f7 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:03 -0700
Subject: [PATCH 1395/2340] drm/xe: Reorder WAs to consider the platform

Now that number of platforms is growing, it's getting hard to know the
workarounds for each platform. Split the entries inside the same table
so the workarounds checking IP version are listed first, followed by
each platform. Next step when it grows too much is to split in smaller
tables.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-6-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 93 ++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 67539f9d70b4e..155cabe16e2e9 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -99,6 +99,24 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
+	{ XE_RTP_NAME("14011059788"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
+	},
+
+	/* DG1 */
+
+	{ XE_RTP_NAME("1409420604"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("1408615072"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL))
+	},
+
+	/* DG2 */
+
 	{ XE_RTP_NAME("16010515920"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
 		       STEP(A0, B0),
@@ -162,47 +180,15 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
-	{ XE_RTP_NAME("14011059788"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
-	},
-	{ XE_RTP_NAME("1409420604"),
-	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("1408615072"),
-	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL))
-	},
 	{}
 };
 
 static const struct xe_rtp_entry engine_was[] = {
-	{ XE_RTP_NAME("14015227452"),
-	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
-	},
-	{ XE_RTP_NAME("1606931601"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
-	},
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(GEN7_FF_THREAD_MODE,
 			     GEN12_FF_TESSELATION_DOP_GATE_DISABLE))
 	},
-	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
-	},
-	{ XE_RTP_NAME("18019627453"),
-	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
-	},
 	{ XE_RTP_NAME("1409804808"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
@@ -217,6 +203,30 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
+	{ XE_RTP_NAME("1606931601"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("1406941453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1,
+			     GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+
+	/* TGL */
+
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
@@ -224,6 +234,9 @@ static const struct xe_rtp_entry engine_was[] = {
 			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
+
+	/* RKL */
+
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
@@ -231,15 +244,17 @@ static const struct xe_rtp_entry engine_was[] = {
 			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
-	{ XE_RTP_NAME("1406941453"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL,
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
-	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1,
-			     GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+	{ XE_RTP_NAME("18019627453"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{}
-- 
GitLab


From a19220fa5f1a740d98654ee1d6cf11a8e0158018 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:04 -0700
Subject: [PATCH 1396/2340] drm/xe: Add PVC gt workarounds

Synchronize with i915 the PVC gt workarounds as of committ
commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w
access order").

v2: Add masked flag to XEHPC_LNCFMISCCFGREG0 (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-7-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  9 +++++++++
 drivers/gpu/drm/xe/xe_wa.c           | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d3b862e4cd0db..411cdbae1894a 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -216,6 +216,15 @@
 #define FORCEWAKE_MEDIA_VDBOX_GEN11(n)		_MMIO(0xa540 + (n) * 4)
 #define FORCEWAKE_MEDIA_VEBOX_GEN11(n)		_MMIO(0xa560 + (n) * 4)
 
+#define XEHPC_LNCFMISCCFGREG0			MCR_REG(0xb01c)
+#define   XEHPC_OVRLSCCC			REG_BIT(0)
+
+#define RENDER_MOD_CTRL				MCR_REG(0xcf2c)
+#define COMP_MOD_CTRL				MCR_REG(0xcf30)
+#define XEHP_VDBX_MOD_CTRL			MCR_REG(0xcf34)
+#define XEHP_VEBX_MOD_CTRL			MCR_REG(0xcf38)
+#define   FORCE_MISS_FTLB			REG_BIT(3)
+
 #define GEN10_SAMPLER_MODE			MCR_REG(0xe18c)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 155cabe16e2e9..e8d523033b874 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -180,6 +180,25 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
+	{ XE_RTP_NAME("18018781329"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("16016694945"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
 	{}
 };
 
-- 
GitLab


From 4688d9ce2e3d0ad59147970295018cec4c67afa5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:05 -0700
Subject: [PATCH 1397/2340] drm/xe: Add PVC engine workarounds

Sync PVC engine workarounds with i915.

v2: Remove 16016694945. It was added by mistake. It's a GT workaround,
already present in the GT table (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-8-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  9 +++++++++
 drivers/gpu/drm/xe/xe_wa.c           | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 411cdbae1894a..e55c2f83b3532 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -43,6 +43,9 @@
 #define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
 #define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
 
+#define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
+#define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
+
 #define GEN9_CS_DEBUG_MODE1			_MMIO(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
 
@@ -230,6 +233,9 @@
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
 #define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
 
+#define CACHE_MODE_SS				MCR_REG(0xe420)
+#define   DISABLE_ECC				REG_BIT(5)
+
 #define GEN9_ROW_CHICKEN4			MCR_REG(0xe48c)
 #define   GEN12_DISABLE_GRF_CLEAR		REG_BIT(13)
 #define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
@@ -246,6 +252,9 @@
 #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
 #define   GEN12_DISABLE_DOP_GATING              REG_BIT(0)
 
+#define LSC_CHICKEN_BIT_0			MCR_REG(0xe7c8)
+#define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
+
 #define SARB_CHICKEN1				MCR_REG(0xe90c)
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index e8d523033b874..4fe01168f45f1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -276,6 +276,28 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("22014226127"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE))
+	},
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("16015675438"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14014999345"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), STEP(B0, C0)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
 	{}
 };
 
-- 
GitLab


From 911aeb0f61b8cb9b903105d2e585e80baadb513b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:06 -0700
Subject: [PATCH 1398/2340] drm/xe: Add missing DG2 gt workarounds and tunings

Synchronize with i915 the DG2 gt workarounds as of
commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access
order").

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-9-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 17 +++++++++++++-
 drivers/gpu/drm/xe/xe_tuning.c       |  4 ++++
 drivers/gpu/drm/xe/xe_wa.c           | 34 +++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index e55c2f83b3532..f1a9d065120e4 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -168,7 +168,6 @@
 #define   GAMTLBVEBOX0_CLKGATE_DIS		REG_BIT(16)
 #define   LTCDD_CLKGATE_DIS			REG_BIT(10)
 
-#define GEN11_SLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x94d4)
 #define XEHP_SLICE_UNIT_LEVEL_CLKGATE		MCR_REG(0x94d4)
 #define   SARBUNIT_CLKGATE_DIS			(1 << 5)
 #define   RCCUNIT_CLKGATE_DIS			(1 << 7)
@@ -222,12 +221,28 @@
 #define XEHPC_LNCFMISCCFGREG0			MCR_REG(0xb01c)
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
 
+#define XEHP_L3NODEARBCFG			MCR_REG(0xb0b4)
+#define   XEHP_LNESPARE				REG_BIT(19)
+
+#define XEHP_L3SCQREG7				MCR_REG(0xb188)
+#define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
+
+#define XEHP_MERT_MOD_CTRL			MCR_REG(0xcf28)
 #define RENDER_MOD_CTRL				MCR_REG(0xcf2c)
 #define COMP_MOD_CTRL				MCR_REG(0xcf30)
 #define XEHP_VDBX_MOD_CTRL			MCR_REG(0xcf34)
 #define XEHP_VEBX_MOD_CTRL			MCR_REG(0xcf38)
 #define   FORCE_MISS_FTLB			REG_BIT(3)
 
+#define XEHP_GAMSTLB_CTRL			MCR_REG(0xcf4c)
+#define   CONTROL_BLOCK_CLKGATE_DIS		REG_BIT(12)
+#define   EGRESS_BLOCK_CLKGATE_DIS		REG_BIT(11)
+#define   TAG_BLOCK_CLKGATE_DIS			REG_BIT(7)
+
+#define XEHP_GAMCNTRL_CTRL			MCR_REG(0xcf54)
+#define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
+#define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
+
 #define GEN10_SAMPLER_MODE			MCR_REG(0xe18c)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 2861a014c85c4..47b27dccb3854 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -16,6 +16,10 @@
 #define MCR_REG(x)	_XE_RTP_MCR_REG(x)
 
 static const struct xe_rtp_entry gt_tunings[] = {
+	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
+	},
 	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 4fe01168f45f1..13c8dbf49cbac 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -132,6 +132,14 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
 	  XE_RTP_ACTIONS(SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
 	},
+	{ XE_RTP_NAME("14012362059"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("14012362059"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
+	},
 	{ XE_RTP_NAME("14010948348"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
@@ -142,7 +150,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	},
 	{ XE_RTP_NAME("14011371254"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011431319"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
@@ -172,6 +180,13 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
 	},
+	{ XE_RTP_NAME("14010680813"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
+			     CONTROL_BLOCK_CLKGATE_DIS |
+			     EGRESS_BLOCK_CLKGATE_DIS |
+			     TAG_BLOCK_CLKGATE_DIS))
+	},
 	{ XE_RTP_NAME("14014830051"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
@@ -180,6 +195,23 @@ static const struct xe_rtp_entry gt_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
+	{ XE_RTP_NAME("18018781329"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("1509235366"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_GAMCNTRL_CTRL,
+			     INVALIDATION_BROADCAST_MODE_DIS |
+			     GLOBAL_INVALIDATION_MODE))
+	},
+	{ XE_RTP_NAME("14010648519"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE))
+	},
 
 	/* PVC */
 
-- 
GitLab


From 4d5ab1216385941fa9336b13cb27c259b149ab43 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:07 -0700
Subject: [PATCH 1399/2340] drm/xe: Add missing DG2 engine workarounds

Synchronize with i915 the DG2 gt workarounds as of
commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access
order").

A few simplifications were done when the WA should be applied to
some steps of a subplatform and all the steppings of the other
subplatforms. This happened with Wa_1509727124, Wa_22012856258 and a few
others. In figure the pre-production steppings will be removed, so this
can be already simplified a little bit.

v2:
  - Make 1308578152 conditional on first gslice fused off
  - Add the missing Wa_1608949956/Wa_14010198302 (Matt Roper)
v3:
  - Do not duplicate the implementation of 18019627453 since it's
    already covered by other WA numbers in graphics versions 1200 and
    1210

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-10-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  27 ++++
 drivers/gpu/drm/xe/xe_wa.c           | 191 ++++++++++++++++++++++++++-
 2 files changed, 213 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index f1a9d065120e4..9320cb01d4240 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -48,6 +48,7 @@
 
 #define GEN9_CS_DEBUG_MODE1			_MMIO(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
+#define   GEN12_REPLAY_MODE_GRANULARITY		REG_BIT(0)
 
 #define PS_INVOCATION_COUNT			_MMIO(0x2348)
 
@@ -91,6 +92,9 @@
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE	REG_BIT(9)
 
+#define VFG_PREEMPTION_CHICKEN			_MMIO(0x83b4)
+#define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
+
 #define XEHP_SQCM				MCR_REG(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
@@ -248,8 +252,13 @@
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
 #define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
 
+#define GEN9_HALF_SLICE_CHICKEN7		MCR_REG(0xe194)
+#define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
+
 #define CACHE_MODE_SS				MCR_REG(0xe420)
+#define   ENABLE_EU_COUNT_FOR_TDL_FLUSH		REG_BIT(10)
 #define   DISABLE_ECC				REG_BIT(5)
+#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
 
 #define GEN9_ROW_CHICKEN4			MCR_REG(0xe48c)
 #define   GEN12_DISABLE_GRF_CLEAR		REG_BIT(13)
@@ -260,6 +269,10 @@
 #define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
+#define GEN8_ROW_CHICKEN			MCR_REG(0xe4f0)
+#define   UGM_BACKUP_MODE			REG_BIT(13)
+#define   MDQ_ARBITRATION_MODE			REG_BIT(12)
+
 #define GEN8_ROW_CHICKEN2			MCR_REG(0xe4f4)
 #define   GEN12_DISABLE_READ_SUPPRESSION	REG_BIT(15)
 #define   GEN12_DISABLE_EARLY_READ		REG_BIT(14)
@@ -267,8 +280,22 @@
 #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
 #define   GEN12_DISABLE_DOP_GATING              REG_BIT(0)
 
+#define XEHP_HDC_CHICKEN0			MCR_REG(0xe5f0)
+#define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
+
+#define RT_CTRL					MCR_REG(0xe530)
+#define   DIS_NULL_QUERY			REG_BIT(10)
+
 #define LSC_CHICKEN_BIT_0			MCR_REG(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
+#define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
+
+#define LSC_CHICKEN_BIT_0_UDW			MCR_REG(0xe7c8 + 4)
+#define   DIS_CHAIN_2XSIMD8			REG_BIT(55 - 32)
+#define   FORCE_SLM_FENCE_SCOPE_TO_TILE		REG_BIT(42 - 32)
+#define   FORCE_UGM_FENCE_SCOPE_TO_TILE		REG_BIT(41 - 32)
+#define   MAXREQS_PER_BANK			REG_GENMASK(39 - 32, 37 - 32)
+#define   DISABLE_128B_EVICTION_COMMAND_UDW	REG_BIT(36 - 32)
 
 #define SARB_CHICKEN1				MCR_REG(0xe90c)
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 13c8dbf49cbac..306541b229bf1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -91,6 +91,9 @@
 #define _MMIO(x)	_XE_RTP_REG(x)
 #define MCR_REG(x)	_XE_RTP_MCR_REG(x)
 
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
 static const struct xe_rtp_entry gt_was[] = {
 	{ XE_RTP_NAME("14011060649"),
 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
@@ -259,8 +262,8 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
-	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
@@ -298,16 +301,192 @@ static const struct xe_rtp_entry engine_was[] = {
 
 	/* DG2 */
 
+	{ XE_RTP_NAME("22013037850"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
+			     DISABLE_128B_EVICTION_COMMAND_UDW))
+	},
+	{ XE_RTP_NAME("22014226127"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE))
+	},
+	{ XE_RTP_NAME("18017747507"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN,
+			     POLYGON_TRIFAN_LINELOOP_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("22012826095, 22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
+				   MAXREQS_PER_BANK,
+				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
+	},
+	{ XE_RTP_NAME("22012826095, 22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
+				   MAXREQS_PER_BANK,
+				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
+	},
+	{ XE_RTP_NAME("22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
+	},
+	{ XE_RTP_NAME("22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
+	},
+	{ XE_RTP_NAME("14010918519"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0,
+			     FORCE_SLM_FENCE_SCOPE_TO_TILE |
+			     FORCE_UGM_FENCE_SCOPE_TO_TILE,
+			     /*
+			      * Ignore read back as it always returns 0 in these
+			      * steps
+			      */
+			     .read_mask = 0))
+	},
 	{ XE_RTP_NAME("14015227452"),
-	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
-	{ XE_RTP_NAME("18019627453"),
+	{ XE_RTP_NAME("16015675438"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2,
+			     PERF_FIX_BALANCING_CFE_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("16011620976, 22015475538"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
+	},
+	{ XE_RTP_NAME("22012654132"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
+			     XE_RTP_ACTION_FLAG(MASKED_REG),
+			     /*
+			      * Register can't be read back for verification on
+			      * DG2 due to Wa_14012342262
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("22012654132"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
+			     XE_RTP_ACTION_FLAG(MASKED_REG),
+			     /*
+			      * Register can't be read back for verification on
+			      * DG2 due to Wa_14012342262
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("1509727124"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+	  XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("22012856258"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14013392000"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14012419201"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4,
+			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14012419201"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4,
+			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("1308578152"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER),
+		       FUNC(xe_rtp_match_first_gslice_fused_off)),
+	  XE_RTP_ACTIONS(CLR(GEN9_CS_DEBUG_MODE1,
+			     GEN12_REPLAY_MODE_GRANULARITY,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("22010960976, 14013347512"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
+			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("1608949956, 14010198302"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN,
+			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
+	{ XE_RTP_NAME("22010430635"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4,
+			     GEN12_DISABLE_GRF_CLEAR,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14013202645"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+	},
+	{ XE_RTP_NAME("14013202645"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+	},
+	{ XE_RTP_NAME("22012532006"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7,
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("22012532006"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7,
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("22014600077"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(B0, FOREVER),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
+			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
+			     XE_RTP_ACTION_FLAG(MASKED_REG),
+			     /* 
+			      * Wa_14012342262 write-only reg, so skip
+			      * verification
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("22014600077"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
+			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
+			     XE_RTP_ACTION_FLAG(MASKED_REG),
+			     /* 
+			      * Wa_14012342262 write-only reg, so skip
+			      * verification
+			      */
+			     .read_mask = 0))
+	},
 
 	/* PVC */
 
@@ -365,6 +544,8 @@ static const struct xe_rtp_entry lrc_was[] = {
 	{}
 };
 
+__diag_pop();
+
 /**
  * xe_wa_process_gt - process GT workaround table
  * @gt: GT instance to process workarounds for
-- 
GitLab


From 11f78b130835695150ddeae98a90d433e5b02d1e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:08 -0700
Subject: [PATCH 1400/2340] drm/xe: Add missing DG2 lrc tunings

Synchronize with i915 the DG2 tunings as of
commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access
order").

Contrary to the tuning "gang timer" for TGL, there is no quick
justification for why the read back is disabled in i915. Keep it
with that flag for now. That can be tentatively removed later when the
read values are checked.

v2: Use XEHP_FF_MODE2 instead of GEN12_FF_MODE2 (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-11-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  7 +++++++
 drivers/gpu/drm/xe/xe_tuning.c       | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 9320cb01d4240..97a9d78e88314 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -73,7 +73,11 @@
 #define XEHP_TILE0_ADDR_RANGE			MCR_REG(0x4900)
 #define XEHP_FLAT_CCS_BASE_ADDR			MCR_REG(0x4910)
 
+#define CHICKEN_RASTER_2			MCR_REG(0x6208)
+#define   TBIMR_FAST_CLIP			REG_BIT(5)
+
 #define GEN12_FF_MODE2				_MMIO(0x6604)
+#define XEHP_FF_MODE2				MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
@@ -228,6 +232,9 @@
 #define XEHP_L3NODEARBCFG			MCR_REG(0xb0b4)
 #define   XEHP_LNESPARE				REG_BIT(19)
 
+#define XEHP_L3SQCREG5				MCR_REG(0xb158)
+#define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
+
 #define XEHP_L3SCQREG7				MCR_REG(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
 
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 47b27dccb3854..7ff5eb762da54 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -35,6 +35,26 @@ static const struct xe_rtp_entry lrc_tunings[] = {
 						FF_MODE2_GS_TIMER_MASK,
 						FF_MODE2_GS_TIMER_224))
 	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+	{ XE_RTP_NAME("Tuning: TDS gang timer"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  /* read verification is ignored as in i915 - need to check enabling */
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
+						FF_MODE2_TDS_TIMER_MASK,
+						FF_MODE2_TDS_TIMER_128))
+	},
+	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
 	{}
 };
 
-- 
GitLab


From 8cd7e9759766d717cf4c7be53e17acf6dff19283 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:09 -0700
Subject: [PATCH 1401/2340] drm/xe: Add missing DG2 lrc workarounds

Synchronize with i915 the DG2 lrc workarounds as of
commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access
order").

A few simplifications were done when the WA should be applied to some
steps of a subplatform and all the steppings of the other subplatforms.
In this case, it was simply applied to all the steppings, which only
means applying it to a few more A* steppings.

The implementation of the workaround 16011186671 triggers a bug in the
RTP infra: it's not possible to set the flag the usual way when having
multiple actions in the entry. This may be fixed later, but for now it's
sufficient to just set the flag directly without the helper macro.

v2: Fix 14014947963 to use FIELD_SET (Matt Roper)

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-12-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 19 +++++++++++
 drivers/gpu/drm/xe/xe_wa.c           | 51 ++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 97a9d78e88314..a079e1aef5a48 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -73,9 +73,16 @@
 #define XEHP_TILE0_ADDR_RANGE			MCR_REG(0x4900)
 #define XEHP_FLAT_CCS_BASE_ADDR			MCR_REG(0x4910)
 
+#define CHICKEN_RASTER_1			MCR_REG(0x6204)
+#define   DIS_SF_ROUND_NEAREST_EVEN		REG_BIT(8)
+
 #define CHICKEN_RASTER_2			MCR_REG(0x6208)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
+#define VFLSKPD					MCR_REG(0x62a8)
+#define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
+#define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
+
 #define GEN12_FF_MODE2				_MMIO(0x6604)
 #define XEHP_FF_MODE2				MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
@@ -83,6 +90,12 @@
 #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
+#define CACHE_MODE_1				_MMIO(0x7004)
+#define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
+
+#define XEHP_PSS_MODE2				MCR_REG(0x703c)
+#define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
+
 #define HIZ_CHICKEN				_MMIO(0x7018)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
 
@@ -96,6 +109,12 @@
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE	REG_BIT(9)
 
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1		MCR_REG(0x731c)
+#define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
+
+#define VF_PREEMPTION				_MMIO(0x83a4)
+#define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
+
 #define VFG_PREEMPTION_CHICKEN			_MMIO(0x83b4)
 #define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 306541b229bf1..0621706f46e62 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -541,6 +541,57 @@ static const struct xe_rtp_entry lrc_was[] = {
 			     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("16011186671"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)),
+	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH,
+			     .flags = XE_RTP_ACTION_FLAG_MASKED_REG),
+			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE,
+			     .flags = XE_RTP_ACTION_FLAG_MASKED_REG))
+	},
+	{ XE_RTP_NAME("14010469329"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
+			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
+			     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("16013271637"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
+			     MSC_MSAA_REODER_BUF_BYPASS_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("14014947963"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION,
+				   PREEMPTION_VERTEX_COUNT,
+				   0x4000,
+				   XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("18018764978"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2,
+			     SCOREBOARD_STALL_FLUSH_CONTROL,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("15010599737"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("18019271663"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
 	{}
 };
 
-- 
GitLab


From 95ff48c2e7a6f4968b1f795462e7e3af334c2749 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:10 -0700
Subject: [PATCH 1402/2340] drm/xe: Add missing ADL-P engine workaround

Add the one missing workaround for ADL-P when comparing to i915 up to
commit 7cdae9e9ee5e ("drm/i915: Move DG2 tuning to the right function").

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-13-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 0621706f46e62..e21c7ec53b2fd 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -299,6 +299,16 @@ static const struct xe_rtp_entry engine_was[] = {
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 
+	/* ADL-P */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+
 	/* DG2 */
 
 	{ XE_RTP_NAME("22013037850"),
-- 
GitLab


From fd93946d594efc6df3f48c684ce87cbbde82dcb9 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 17:30:11 -0700
Subject: [PATCH 1403/2340] drm/xe: Add missing LRC workarounds for graphics
 1200

Synchronize LRC workarounds for graphics version 1200 with i915 up to
commit 7cdae9e9ee5e ("drm/i915: Move DG2 tuning to the right function").
These were probably missed for TGL/RKL before because in i915 it uses a
!IS_DG1() condition.  Avoid a similar issue by just checking the
graphics version 1200 since DG1 is 1210.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230314003012.2600353-14-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  4 ++++
 drivers/gpu/drm/xe/xe_wa.c           | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index a079e1aef5a48..73b0c0bdde5d0 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -98,10 +98,14 @@
 
 #define HIZ_CHICKEN				_MMIO(0x7018)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
+#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE	REG_BIT(13)
 
 /* GEN7 chicken */
 #define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
 
+#define COMMON_SLICE_CHICKEN4			_MMIO(0x7300)
+#define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
+
 #define GEN11_COMMON_SLICE_CHICKEN3		_MMIO(0x7304)
 #define XEHP_COMMON_SLICE_CHICKEN3		MCR_REG(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index e21c7ec53b2fd..59d2daab59291 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -536,6 +536,16 @@ static const struct xe_rtp_entry lrc_was[] = {
 				   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
 				   XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
+	{ XE_RTP_NAME("1606376872"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC,
+			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	},
 
 	/* DG1 */
 
-- 
GitLab


From ccbb6ad52ab1a0fa4d386dc9f591240f5eb81646 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 Mar 2023 14:16:28 -0700
Subject: [PATCH 1404/2340] drm/xe: Replace i915 with xe in uapi

All structs and defines had already been renamed to "xe", but some
comments with "i915" were left over. Rename them.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://lore.kernel.org/r/20230313211628.2492587-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 756c5994ae63d..32a4265de4022 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -37,7 +37,7 @@ extern "C" {
  */
 
 /**
- * struct i915_user_extension - Base class for defining a chain of extensions
+ * struct xe_user_extension - Base class for defining a chain of extensions
  *
  * Many interfaces need to grow over time. In most cases we can simply
  * extend the struct and have userspace pass in more data. Another option,
@@ -55,20 +55,20 @@ extern "C" {
  *
  * .. code-block:: C
  *
- *	struct i915_user_extension ext3 {
+ *	struct xe_user_extension ext3 {
  *		.next_extension = 0, // end
  *		.name = ...,
  *	};
- *	struct i915_user_extension ext2 {
+ *	struct xe_user_extension ext2 {
  *		.next_extension = (uintptr_t)&ext3,
  *		.name = ...,
  *	};
- *	struct i915_user_extension ext1 {
+ *	struct xe_user_extension ext1 {
  *		.next_extension = (uintptr_t)&ext2,
  *		.name = ...,
  *	};
  *
- * Typically the struct i915_user_extension would be embedded in some uAPI
+ * Typically the struct xe_user_extension would be embedded in some uAPI
  * struct, and in this case we would feed it the head of the chain(i.e ext1),
  * which would then apply all of the above extensions.
  *
@@ -77,7 +77,7 @@ struct xe_user_extension {
 	/**
 	 * @next_extension:
 	 *
-	 * Pointer to the next struct i915_user_extension, or zero if the end.
+	 * Pointer to the next struct xe_user_extension, or zero if the end.
 	 */
 	__u64 next_extension;
 	/**
@@ -87,7 +87,7 @@ struct xe_user_extension {
 	 *
 	 * Also note that the name space for this is not global for the whole
 	 * driver, but rather its scope/meaning is limited to the specific piece
-	 * of uAPI which has embedded the struct i915_user_extension.
+	 * of uAPI which has embedded the struct xe_user_extension.
 	 */
 	__u32 name;
 	/**
@@ -99,7 +99,7 @@ struct xe_user_extension {
 };
 
 /*
- * i915 specific ioctls.
+ * xe specific ioctls.
  *
  * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
  * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
-- 
GitLab


From 5fd92bdd54e2f0e0611e690f3e03d6d3fa9621d8 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 9 Mar 2023 14:21:33 +0200
Subject: [PATCH 1405/2340] drm/xe/irq: the irq handler local variable need not
 be static

It's just a local variable.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 071ccc75b71bf..9527e7fb9b6ef 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -516,7 +516,7 @@ static void irq_uninstall(struct drm_device *drm, void *arg)
 int xe_irq_install(struct xe_device *xe)
 {
 	int irq = to_pci_dev(xe->drm.dev)->irq;
-	static irq_handler_t irq_handler;
+	irq_handler_t irq_handler;
 	int err;
 
 	irq_handler = xe_irq_handler(xe);
-- 
GitLab


From 8eb7ad99ae66b4244a1239bfa8723d1a06beddb9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Fri, 10 Mar 2023 09:13:39 +0100
Subject: [PATCH 1406/2340] drm/xe/xe_uc_fw: Use firmware files from standard
 locations

The GuC/HuC firmware files used by Xe drivers are the same as
used by i915. Use the already-known location to find those
firmware files, for a couple of reasons:

1. Avoid having the same firmware placed on two different
   places on MODULE_FIRMWARE(), if both 915 and xe drivers
   are compiled;

2. Having firmware files located on different locations may end
   creating bigger initramfs, as the same files will be copied
   twice my mkinitrd/dracut/...;

3. this is the place where those firmware files are located at
   https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
   Upstream doesn't expect them to have on other places;

4. When built with display support, DMC firmware will be
   loaded from i915/ directory. It is very confusing to have
   some firmware files on a different place for the same driver.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Lucas de Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
[ Mostly agree with the direction of "use the firmware blobs from
  upstream at their current location for these platforms". Previous
  directory was not wrong as the plan was to have it handled in the
  upstream firmware repo. For future platforms the location can be
  changed if the support is only in xe ]
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230310081338.3275583-1-mauro.chehab@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 4 ++--
 drivers/gpu/drm/xe/xe_uc_fw.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index bd89ac27828e5..47b51ad5b015d 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -55,12 +55,12 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 	fw_def(TIGERLAKE,    0, huc_def(tgl,  7, 9, 3))
 
 #define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
-	"xe/" \
+	"i915/" \
 	__stringify(prefix_) "_" name_ "_" \
 	__stringify(major_) ".bin"
 
 #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
-        "xe/" \
+        "i915/" \
        __stringify(prefix_) name_ \
        __stringify(major_) "." \
        __stringify(minor_) "." \
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index ca64d379bb5ef..bf31c3bb0e0f8 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -175,6 +175,6 @@ static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
 	return __xe_uc_fw_get_upload_size(uc_fw);
 }
 
-#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe"
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915"
 
 #endif
-- 
GitLab


From 7c51050b3b0799f5d74331a7eb81a7066d520731 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 9 Mar 2023 17:20:20 +0100
Subject: [PATCH 1407/2340] drm/xe: Use a define to set initial seqno for
 fences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also for HW fences, write the initial seqno - 1 to the HW completed
seqno to initialize.

v2:
- Use __dma_fence_is_later() to compare hw fence seqnos. (Matthew Auld)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c   | 5 +++--
 drivers/gpu/drm/xe/xe_hw_fence.c | 4 ++--
 drivers/gpu/drm/xe/xe_hw_fence.h | 2 ++
 drivers/gpu/drm/xe/xe_lrc.c      | 3 +++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 3e40fb6d3f98f..edd1192f6ec51 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -13,6 +13,7 @@
 
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_hw_fence.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
 #include "xe_migrate.h"
@@ -57,11 +58,11 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe,
 
 	if (xe_engine_is_parallel(e)) {
 		e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
-		e->parallel.composite_fence_seqno = 1;
+		e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
 	}
 	if (e->flags & ENGINE_FLAG_VM) {
 		e->bind.fence_ctx = dma_fence_context_alloc(1);
-		e->bind.fence_seqno = 1;
+		e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
 	}
 
 	for (i = 0; i < width; ++i) {
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index e56ca28675451..ffe1a3992ef54 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -129,7 +129,7 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
 	ctx->gt = gt;
 	ctx->irq = irq;
 	ctx->dma_fence_ctx = dma_fence_context_alloc(1);
-	ctx->next_seqno = 1;
+	ctx->next_seqno = XE_FENCE_INITIAL_SEQNO;
 	sprintf(ctx->name, "%s", name);
 }
 
@@ -165,7 +165,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
 	u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
 
 	return dma_fence->error ||
-		(s32)fence->dma.seqno <= (s32)seqno;
+		!__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops);
 }
 
 static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
index 07f202db65269..523c2611ef5d6 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.h
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -8,6 +8,8 @@
 
 #include "xe_hw_fence_types.h"
 
+#define XE_FENCE_INITIAL_SEQNO 1
+
 int xe_hw_fence_module_init(void);
 void xe_hw_fence_module_exit(void);
 
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 9140b057a5ba3..fb8c6f7d6528e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -697,6 +697,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
 
+	map = __xe_lrc_seqno_map(lrc);
+	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+
 	return 0;
 
 err_lrc_finish:
-- 
GitLab


From fc1cc680304db1c452156968f4ab95f9c553f746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 10 Mar 2023 17:56:55 +0100
Subject: [PATCH 1408/2340] drm/xe/migrate: Update cpu page-table updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't wait for GPU to be able to update page-tables using CPU. Putting
ourselves to sleep may be more of a problem than using GPU for
page-table updates. Also allow the vm to be NULL since the migrate
kunit test uses NULL for vm.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 4a9fe1f7128d1..366892198d11b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -979,25 +979,13 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 	int err;
 	u32 i;
 
-	/* Wait on BO moves for 10 ms, then fall back to GPU job */
-	if (bo) {
-		long wait;
+	if (bo && !dma_resv_test_signaled(bo->ttm.base.resv,
+					  DMA_RESV_USAGE_KERNEL))
+		return ERR_PTR(-ETIME);
 
-		wait = dma_resv_wait_timeout(bo->ttm.base.resv,
-					     DMA_RESV_USAGE_KERNEL,
-					     true, HZ / 100);
-		if (wait <= 0)
-			return ERR_PTR(-ETIME);
-	}
-	if (wait_vm) {
-		long wait;
-
-		wait = dma_resv_wait_timeout(&vm->resv,
-					     DMA_RESV_USAGE_BOOKKEEP,
-					     true, HZ / 100);
-		if (wait <= 0)
-			return ERR_PTR(-ETIME);
-	}
+	if (wait_vm && !dma_resv_test_signaled(&vm->resv,
+					       DMA_RESV_USAGE_BOOKKEEP))
+		return ERR_PTR(-ETIME);
 
 	if (ops->pre_commit) {
 		err = ops->pre_commit(pt_update);
@@ -1011,8 +999,10 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 			      update->ofs, update->qwords, update);
 	}
 
-	trace_xe_vm_cpu_bind(vm);
-	xe_device_wmb(vm->xe);
+	if (vm) {
+		trace_xe_vm_cpu_bind(vm);
+		xe_device_wmb(vm->xe);
+	}
 
 	fence = dma_fence_get_stub();
 
-- 
GitLab


From 17a28ea23c4087cf4580744a70105ccc83efc769 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 10 Mar 2023 16:41:08 +0100
Subject: [PATCH 1409/2340] drm/xe/tests: Support CPU page-table updates in the
 migrate test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The migrate test currently supports only GPU pagetable updates and
will thus break if we fix the CPU pagetable update selection.

Fix the migrate test first.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index ac659b94e7f52..a3bace16282ee 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -66,9 +66,16 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
 {
 	int i;
 	u64 *ptr = dst;
-
-	for (i = 0; i < num_qwords; i++)
-		ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+	u64 value;
+
+	for (i = 0; i < num_qwords; i++) {
+		value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+		if (map)
+			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+				  sizeof(u64), u64, value);
+		else
+			ptr[i] = value;
+	}
 }
 
 static const struct xe_migrate_pt_update_ops sanity_ops = {
-- 
GitLab


From 155c9165542863c97b5284afa37e3d8e385a8815 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 10 Mar 2023 12:03:47 +0100
Subject: [PATCH 1410/2340] drm/xe: Introduce xe_engine_is_idle()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce xe_engine_is_idle, and replace the static function in
xe_migrate.c.
The latter had two flaws. First the seqno == 1 test might return a false
true value each time the seqno counter wrapped, Second, the
cur_seqno == next_seqno test would never return true.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c  | 23 +++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_engine.h  |  2 ++
 drivers/gpu/drm/xe/xe_migrate.c |  8 +-------
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index edd1192f6ec51..8011f5827cbee 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -683,6 +683,29 @@ static void engine_kill_compute(struct xe_engine *e)
 	up_write(&e->vm->lock);
 }
 
+/**
+ * xe_engine_is_idle() - Whether an engine is idle.
+ * @engine: The engine
+ *
+ * FIXME: Need to determine what to use as the short-lived
+ * timeline lock for the engines, so that the return value
+ * of this function becomes more than just an advisory
+ * snapshot in time. The timeline lock must protect the
+ * seqno from racing submissions on the same engine.
+ * Typically vm->resv, but user-created timeline locks use the migrate vm
+ * and never grabs the migrate vm->resv so we have a race there.
+ *
+ * Return: True if the engine is idle, false otherwise.
+ */
+bool xe_engine_is_idle(struct xe_engine *engine)
+{
+	if (XE_WARN_ON(xe_engine_is_parallel(engine)))
+		return false;
+
+	return xe_lrc_seqno(&engine->lrc[0]) ==
+		engine->lrc[0].fence_ctx.next_seqno - 1;
+}
+
 void xe_engine_kill(struct xe_engine *e)
 {
 	struct xe_engine *engine = e, *next;
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
index a3a44534003f7..1cf7f23c4afd6 100644
--- a/drivers/gpu/drm/xe/xe_engine.h
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -42,6 +42,8 @@ static inline bool xe_engine_is_parallel(struct xe_engine *engine)
 	return engine->width > 1;
 }
 
+bool xe_engine_is_idle(struct xe_engine *engine);
+
 void xe_engine_kill(struct xe_engine *e);
 
 int xe_engine_create_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 366892198d11b..4a600c64b5d0e 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1024,12 +1024,6 @@ static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
 	return true;
 }
 
-static bool engine_is_idle(struct xe_engine *e)
-{
-	return !e || e->lrc[0].fence_ctx.next_seqno == 1 ||
-		xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno;
-}
-
 /**
  * xe_migrate_update_pgtables() - Pipelined page-table update
  * @m: The migrate context.
@@ -1082,7 +1076,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
 
 	/* Use the CPU if no in syncs and engine is idle */
-	if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) {
+	if (no_in_syncs(syncs, num_syncs) && (!eng || xe_engine_is_idle(eng))) {
 		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
 							num_updates,
 							first_munmap_rebind,
-- 
GitLab


From a5dfb471bba18fc38dc623ff1fa4387f48dacba6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 10 Mar 2023 12:07:12 +0100
Subject: [PATCH 1411/2340] drm/xe: Use a small negative initial seqno
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Causes an early 32-bit wrap and may thus help CI catch wrapping errors
that may otherwise not show early enough.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_fence.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
index 523c2611ef5d6..cfe5fd603787e 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.h
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -8,7 +8,8 @@
 
 #include "xe_hw_fence_types.h"
 
-#define XE_FENCE_INITIAL_SEQNO 1
+/* Cause an early wrap to catch wrapping errors */
+#define XE_FENCE_INITIAL_SEQNO (-127)
 
 int xe_hw_fence_module_init(void);
 void xe_hw_fence_module_exit(void);
-- 
GitLab


From 7cba3396fd7e87a976b8ad1e30d734b72dec7e31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 13 Mar 2023 20:37:24 +0100
Subject: [PATCH 1412/2340] drm/xe/tests: Test both CPU- and GPU page-table
 updates with the migrate test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a test parameter to force GPU page-table updates with the migrate
test and test both CPU- and GPU updates. Also provide some timing
results.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 24 ++++++++++++++++++++++--
 drivers/gpu/drm/xe/tests/xe_test.h    |  1 +
 drivers/gpu/drm/xe/xe_migrate.c       | 17 +++++++++++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index a3bace16282ee..e50b6ceb56e63 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -64,6 +64,8 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
 		   u32 qword_ofs, u32 num_qwords,
 		   const struct xe_vm_pgtable_update *update)
 {
+	struct migrate_test_params *p =
+		to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));
 	int i;
 	u64 *ptr = dst;
 	u64 value;
@@ -76,6 +78,10 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
 		else
 			ptr[i] = value;
 	}
+
+	kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU");
+	if (p->force_gpu && map)
+		KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n");
 }
 
 static const struct xe_migrate_pt_update_ops sanity_ops = {
@@ -177,11 +183,12 @@ out_unlock:
 }
 
 static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
-			   struct kunit *test)
+			   struct kunit *test, bool force_gpu)
 {
 	struct xe_device *xe = gt_to_xe(m->gt);
 	struct dma_fence *fence;
 	u64 retval, expected;
+	ktime_t then, now;
 	int i;
 
 	struct xe_vm_pgtable_update update = {
@@ -192,16 +199,26 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
 	struct xe_migrate_pt_update pt_update = {
 		.ops = &sanity_ops,
 	};
+	struct migrate_test_params p = {
+		.base.id = XE_TEST_LIVE_MIGRATE,
+		.force_gpu = force_gpu,
+	};
 
+	test->priv = &p;
 	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
 	expected = 0xf0f0f0f0f0f0f0f0ULL;
 	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
 
+	then = ktime_get();
 	fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1,
 					   NULL, 0, &pt_update);
+	now = ktime_get();
 	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
 		return;
 
+	kunit_info(test, "Updating without syncing took %llu us,\n",
+		   (unsigned long long)ktime_to_us(ktime_sub(now, then)));
+
 	dma_fence_put(fence);
 	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
 	check(retval, expected, "PTE[0] must stay untouched", test);
@@ -344,7 +361,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		test_copy(m, big, test);
 	}
 
-	test_pt_update(m, pt, test);
+	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
+	test_pt_update(m, pt, test, false);
+	kunit_info(test, "Testing page table update using GPU\n");
+	test_pt_update(m, pt, test, true);
 
 out:
 	xe_bb_free(bb, NULL);
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
index 00c8a3f9af814..7a1ae213e750a 100644
--- a/drivers/gpu/drm/xe/tests/xe_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -18,6 +18,7 @@
  */
 enum xe_test_priv_id {
 	XE_TEST_LIVE_DMA_BUF,
+	XE_TEST_LIVE_MIGRATE,
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 4a600c64b5d0e..3ee3d707a8ca7 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -13,6 +13,7 @@
 #include <drm/xe_drm.h>
 
 #include "regs/xe_gpu_commands.h"
+#include "tests/xe_test.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_engine.h"
@@ -967,6 +968,16 @@ struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
 	return xe_vm_get(m->eng->vm);
 }
 
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+struct migrate_test_params {
+	struct xe_test_priv base;
+	bool force_gpu;
+};
+
+#define to_migrate_test_params(_priv) \
+	container_of(_priv, struct migrate_test_params, base)
+#endif
+
 static struct dma_fence *
 xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 			       struct xe_vm *vm, struct xe_bo *bo,
@@ -974,11 +985,17 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 			       u32 num_updates, bool wait_vm,
 			       struct xe_migrate_pt_update *pt_update)
 {
+	XE_TEST_DECLARE(struct migrate_test_params *test =
+			to_migrate_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));)
 	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
 	struct dma_fence *fence;
 	int err;
 	u32 i;
 
+	if (XE_TEST_ONLY(test && test->force_gpu))
+		return ERR_PTR(-ETIME);
+
 	if (bo && !dma_resv_test_signaled(bo->ttm.base.resv,
 					  DMA_RESV_USAGE_KERNEL))
 		return ERR_PTR(-ETIME);
-- 
GitLab


From 8e41443e1bb7a9aa03263ab9e317ef04927be5aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 14 Mar 2023 15:56:44 +0100
Subject: [PATCH 1413/2340] drm/xe/vm: Defer vm rebind until next exec if
 nothing to execute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If all compute engines of a vm in compute mode are idle,
defer a rebind to the next exec to avoid the VM unnecessarily trying
to make memory resident and compete with other VMs for available
memory space.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c     |  1 +
 drivers/gpu/drm/xe/xe_vm.c       | 18 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_vm.h       | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_vm_types.h |  5 +++++
 4 files changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 97fd1a311f2df..ea869f2452efe 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -364,6 +364,7 @@ retry:
 				     &job->drm.s_fence->finished);
 
 	xe_sched_job_push(job);
+	xe_vm_reactivate_rebind(vm);
 
 err_repin:
 	if (!xe_vm_no_dma_fences(vm))
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 6cc3204adaa8a..207d20da5c688 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -226,6 +226,19 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm)
 	return 0;
 }
 
+static bool xe_vm_is_idle(struct xe_vm *vm)
+{
+	struct xe_engine *e;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+		if (!xe_engine_is_idle(e))
+			return false;
+	}
+
+	return true;
+}
+
 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
 {
 	struct list_head *link;
@@ -548,6 +561,11 @@ retry:
 	if (err)
 		goto out_unlock_outer;
 
+	if (xe_vm_is_idle(vm)) {
+		vm->preempt.rebind_deactivated = true;
+		goto out_unlock;
+	}
+
 	/* Fresh preempt fences already installed. Everyting is running. */
 	if (!preempt_fences_waiting(vm))
 		goto out_unlock;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 3468ed9d05288..748dc16ebed9a 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -98,6 +98,23 @@ extern struct ttm_device_funcs xe_ttm_funcs;
 
 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
 
+/**
+ * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute
+ * vms.
+ * @vm: The vm.
+ *
+ * If the rebind functionality on a compute vm was disabled due
+ * to nothing to execute. Reactivate it and run the rebind worker.
+ * This function should be called after submitting a batch to a compute vm.
+ */
+static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
+{
+	if (xe_vm_in_compute_mode(vm) && vm->preempt.rebind_deactivated) {
+		vm->preempt.rebind_deactivated = false;
+		queue_work(system_unbound_wq, &vm->preempt.rebind_work);
+	}
+}
+
 static inline bool xe_vma_is_userptr(struct xe_vma *vma)
 {
 	return !vma->bo;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 2a3b911ab3585..fada7896867f3 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -293,6 +293,11 @@ struct xe_vm {
 		struct list_head engines;
 		/** @num_engines: number user engines attached to this VM */
 		int num_engines;
+		/**
+		 * @rebind_deactivated: Whether rebind has been temporarily deactivated
+		 * due to no work available. Protected by the vm resv.
+		 */
+		bool rebind_deactivated;
 		/**
 		 * @rebind_work: worker to rebind invalidated userptrs / evicted
 		 * BOs
-- 
GitLab


From 2492f4544e6f81c3bb37abdcbc027bf7934b0310 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 14 Mar 2023 08:58:39 +0000
Subject: [PATCH 1414/2340] drm/xe/vram: start tracking the io_size

First step towards supporting small-bar is to track the io_size for
vram. We can longer assume that the io_size == vram size. This way we
know how much is CPU accessible via the BAR, and how much is not.
Effectively giving us a two tiered vram, where in some later patches we
can support different allocation strategies depending on if the memory
needs to be CPU accessible or not.

Note as this stage we still clamp the vram size to the usable vram size.
Only in the final patch do we turn this on for real, and allow distinct
io_size and vram_size.

v2: (Lucas):
  - Improve the commit message, plus improve the kernel-doc for the
    io_size to give a better sense of what it actually is.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 14 +++++++--
 drivers/gpu/drm/xe/xe_gt_types.h     | 14 +++++++--
 drivers/gpu/drm/xe/xe_mmio.c         | 44 ++++++++++++++++++++--------
 3 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 00b1db28a4b47..8d99450f0bf45 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -115,9 +115,19 @@ struct xe_device {
 	struct {
 		/** @vram: VRAM info for device */
 		struct {
-			/** @io_start: start address of VRAM */
+			/** @io_start: IO start address of VRAM */
 			resource_size_t io_start;
-			/** @size: size of VRAM */
+			/**
+			 * @io_size: IO size of VRAM.
+			 *
+			 * This represents how much of VRAM we can access via
+			 * the CPU through the VRAM BAR. This can be smaller
+			 * than @size, in which case only part of VRAM is CPU
+			 * accessible (typically the first 256M). This
+			 * configuration is known as small-bar.
+			 */
+			resource_size_t io_size;
+			/** @size: Total size of VRAM */
 			resource_size_t size;
 			/** @mapping: pointer to VRAM mappable space */
 			void *__iomem mapping;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 74b4e6776bf16..8f29aba455e05 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -143,9 +143,19 @@ struct xe_gt {
 		 * (virtual split), can be subset of global device VRAM
 		 */
 		struct {
-			/** @io_start: start address of VRAM */
+			/** @io_start: IO start address of this VRAM instance */
 			resource_size_t io_start;
-			/** @size: size of VRAM */
+			/**
+			 * @io_size: IO size of this VRAM instance
+			 *
+			 * This represents how much of this VRAM we can access
+			 * via the CPU through the VRAM BAR. This can be smaller
+			 * than @size, in which case only part of VRAM is CPU
+			 * accessible (typically the first 256M). This
+			 * configuration is known as small-bar.
+			 */
+			resource_size_t io_size;
+			/** @size: size of VRAM. */
 			resource_size_t size;
 			/** @mapping: pointer to VRAM mappable space */
 			void *__iomem mapping;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index e5bd4609aaeed..5cacaa05759ad 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -182,7 +182,6 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	u8 id;
 	u64 vram_size;
 	u64 original_size;
-	u64 current_size;
 	u64 usable_size;
 	int resize_result, err;
 
@@ -190,11 +189,13 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		xe->mem.vram.mapping = 0;
 		xe->mem.vram.size = 0;
 		xe->mem.vram.io_start = 0;
+		xe->mem.vram.io_size = 0;
 
 		for_each_gt(gt, xe, id) {
 			gt->mem.vram.mapping = 0;
 			gt->mem.vram.size = 0;
 			gt->mem.vram.io_start = 0;
+			gt->mem.vram.io_size = 0;
 		}
 		return 0;
 	}
@@ -212,10 +213,10 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		return err;
 
 	resize_result = xe_resize_vram_bar(xe, vram_size);
-	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
-
-	xe->mem.vram.size = min(current_size, vram_size);
+	xe->mem.vram.io_size = min(usable_size,
+				   pci_resource_len(pdev, GEN12_LMEM_BAR));
+	xe->mem.vram.size = xe->mem.vram.io_size;
 
 	if (!xe->mem.vram.size)
 		return -EIO;
@@ -223,15 +224,15 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	if (resize_result > 0)
 		drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to %lluMiB\n",
 			 (u64)original_size >> 20,
-			 (u64)current_size >> 20);
-	else if (xe->mem.vram.size < vram_size && !xe_force_vram_bar_size)
+			 (u64)xe->mem.vram.io_size >> 20);
+	else if (xe->mem.vram.io_size < usable_size && !xe_force_vram_bar_size)
 		drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
 			 (u64)xe->mem.vram.size >> 20);
 	if (xe->mem.vram.size < vram_size)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
 			 vram_size, (u64)xe->mem.vram.size);
 
-	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
 	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
 
 	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
@@ -239,7 +240,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	/* FIXME: Assuming equally partitioned VRAM, incorrect */
 	if (xe->info.tile_count > 1) {
 		u8 adj_tile_count = xe->info.tile_count;
-		resource_size_t size, io_start;
+		resource_size_t size, io_start, io_size;
 
 		for_each_gt(gt, xe, id)
 			if (xe_gt_is_media_type(gt))
@@ -249,15 +250,31 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 		size = xe->mem.vram.size / adj_tile_count;
 		io_start = xe->mem.vram.io_start;
+		io_size = xe->mem.vram.io_size;
 
 		for_each_gt(gt, xe, id) {
-			if (id && !xe_gt_is_media_type(gt))
-				io_start += size;
+			if (id && !xe_gt_is_media_type(gt)) {
+				io_size -= min(io_size, size);
+				io_start += io_size;
+			}
 
 			gt->mem.vram.size = size;
-			gt->mem.vram.io_start = io_start;
-			gt->mem.vram.mapping = xe->mem.vram.mapping +
-				(io_start - xe->mem.vram.io_start);
+
+			/*
+			 * XXX: multi-tile small-bar might be wild. Hopefully
+			 * full tile without any mappable vram is not something
+			 * we care about.
+			 */
+
+			gt->mem.vram.io_size = min(size, io_size);
+			if (io_size) {
+				gt->mem.vram.io_start = io_start;
+				gt->mem.vram.mapping = xe->mem.vram.mapping +
+					(io_start - xe->mem.vram.io_start);
+			} else {
+				drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+				return -ENODEV;
+			}
 
 			drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
 				 id, gt->info.vram_id, &gt->mem.vram.io_start,
@@ -266,6 +283,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	} else {
 		gt->mem.vram.size = xe->mem.vram.size;
 		gt->mem.vram.io_start = xe->mem.vram.io_start;
+		gt->mem.vram.io_size = xe->mem.vram.io_size;
 		gt->mem.vram.mapping = xe->mem.vram.mapping;
 
 		drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);
-- 
GitLab


From 436dbd6bffbf895ea151cf21af410ec1978cc10d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 Mar 2023 15:34:41 -0700
Subject: [PATCH 1415/2340] drm/xe/mcr: Separate version from engine type
 selection

In order to improve readability and make it more future proof,
split the engine type from the graphics/platform checks.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230317223441.3891073-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 909059112179a..5412f77bc26fd 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -290,22 +290,24 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
 
 		gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
-	} else if (GRAPHICS_VERx100(xe) >= 1270) {
-		gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
-		gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
-		gt->steering[DSS].ranges = xelpg_dss_steering_table;
-	} else if (xe->info.platform == XE_PVC) {
-		gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
-		gt->steering[DSS].ranges = xehpc_dss_steering_table;
-	} else if (xe->info.platform == XE_DG2) {
-		gt->steering[L3BANK].ranges = xehp_l3bank_steering_table;
-		gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
-		gt->steering[LNCF].ranges = xehp_lncf_steering_table;
-		gt->steering[DSS].ranges = xehp_dss_steering_table;
-		gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table;
 	} else {
-		gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
-		gt->steering[DSS].ranges = xelp_dss_steering_table;
+		if (GRAPHICS_VERx100(xe) >= 1270) {
+			gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
+			gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+			gt->steering[DSS].ranges = xelpg_dss_steering_table;
+		} else if (xe->info.platform == XE_PVC) {
+			gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
+			gt->steering[DSS].ranges = xehpc_dss_steering_table;
+		} else if (xe->info.platform == XE_DG2) {
+			gt->steering[L3BANK].ranges = xehp_l3bank_steering_table;
+			gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
+			gt->steering[LNCF].ranges = xehp_lncf_steering_table;
+			gt->steering[DSS].ranges = xehp_dss_steering_table;
+			gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table;
+		} else {
+			gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
+			gt->steering[DSS].ranges = xelp_dss_steering_table;
+		}
 	}
 
 	/* Select non-terminated steering target for each type */
-- 
GitLab


From 1a653b879d6e408813096434ece5fa46c0752343 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 14 Mar 2023 08:58:40 +0000
Subject: [PATCH 1416/2340] drm/xe/buddy: remove the virtualized start

Hopefully not needed anymore. We can add a .compatible() hook once we
need to differentiate between mappable and non-mappable vram. If the
allocation is not contiguous then the start value is kind of
meaningless, so rather just mark as invalid.

In upstream, TTM wants to eventually remove the ttm_resource.start
usage.

References: 544432703b2f ("drm/ttm: Add new callbacks to ttm res mgr")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           |  6 ++++++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 29 ++++++++++++++--------------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 5e309b26f75c5..3ca28f84dff75 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -672,6 +672,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 				void *new_addr = gt->mem.vram.mapping +
 					(new_mem->start << PAGE_SHIFT);
 
+				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
+					ret = -EINVAL;
+					xe_device_mem_access_put(xe);
+					goto out;
+				}
+
 				XE_BUG_ON(new_mem->start !=
 					  bo->placements->fpfn);
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 159ca7105df10..bafcadaed6b07 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -54,7 +54,6 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	struct xe_ttm_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
 	struct drm_buddy *mm = &mgr->mm;
-	struct drm_buddy_block *block;
 	unsigned long pages_per_block;
 	int r;
 
@@ -186,24 +185,24 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 			list_splice_tail(trim_list, &vres->blocks);
 	}
 
-	vres->base.start = 0;
-	list_for_each_entry(block, &vres->blocks, link) {
-		unsigned long start;
+	if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) &&
+	    xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks))
+		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
 
-		start = drm_buddy_block_offset(block) +
-			drm_buddy_block_size(mm, block);
-		start >>= PAGE_SHIFT;
+	/*
+	 * For some kernel objects we still rely on the start when io mapping
+	 * the object.
+	 */
+	if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) {
+		struct drm_buddy_block *block = list_first_entry(&vres->blocks,
+								 typeof(*block),
+								 link);
 
-		if (start > PFN_UP(vres->base.size))
-			start -= PFN_UP(vres->base.size);
-		else
-			start = 0;
-		vres->base.start = max(vres->base.start, start);
+		vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT;
+	} else {
+		vres->base.start = XE_BO_INVALID_OFFSET;
 	}
 
-	if (xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks))
-		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
-
 	*res = &vres->base;
 	return 0;
 
-- 
GitLab


From eb230dc47dd6f543ae2ff9c85bbe86243502e171 Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Fri, 17 Mar 2023 22:23:35 +0530
Subject: [PATCH 1417/2340] drm/xe: Use max wopcm size when validating the
 preset GuC wopcm size

When the GuC wopcm base and size registers are populated by BIOS/IFWI,
validate the parameters against the maximum allowed wopcm size.

Bpsec: 44982

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wopcm.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index 7bb880355f6ba..c8cc3f5e6154a 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -54,6 +54,8 @@
 						   for MTL, do a proper probe
 						   sooner or later */
 #define GEN11_WOPCM_SIZE		SZ_2M
+
+#define GEN12_MAX_WOPCM_SIZE            SZ_8M
 /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
 #define WOPCM_RESERVED_SIZE		SZ_16K
 
@@ -221,6 +223,13 @@ int xe_wopcm_init(struct xe_wopcm *wopcm)
 	if (locked) {
 		drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
 			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+		/*
+		 * When the GuC wopcm base and size are preprogrammed by
+		 * BIOS/IFWI, check against the max allowed wopcm size to
+		 * validate if the programmed values align to the wopcm layout.
+		 */
+		wopcm->size = GEN12_MAX_WOPCM_SIZE;
+
 		goto check;
 	}
 
-- 
GitLab


From 11a2407ed5f017edcea436220ebba7c8619924f2 Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Fri, 17 Mar 2023 21:05:30 +0530
Subject: [PATCH 1418/2340] drm/xe: Stop accepting value in xe_migrate_clear

Although xe_migrate_clear() has a value argument, currently the driver
is only passing 0 at all the places this function is invoked with the
exception the kunit tests are using the parameter to validate this
function with different values.
xe_migrate_clear() is failing on platforms with link copy engines
because xe_migrate_clear() via emit_clear() is using the blitter
instruction XY_FAST_COLOR_BLT to clear the memory. But this instruction
is not supported by link copy engine.
So the solution is to use the alternate instruction MEM_SET when
platform contains link copy engine. But MEM_SET instruction accepts only
8-bit value for setting whereas the value agrument of xe_migrate_clear()
is 32-bit.
So instead of spreading this limitation around all invocations of
xe_migrate_clear() and causing more confusion, it was decided to not
accept any value itself as driver does not really need this currently.

All the kunit tests are adapted as per the new function prototype.

This will be followed by a patch to add support for link copy engines.

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  9 ++++
 drivers/gpu/drm/xe/tests/xe_bo.c          |  2 +-
 drivers/gpu/drm/xe/tests/xe_migrate.c     | 18 +++----
 drivers/gpu/drm/xe/xe_bo.c                |  2 +-
 drivers/gpu/drm/xe/xe_device_types.h      |  2 +
 drivers/gpu/drm/xe/xe_migrate.c           | 65 +++++++++++++++++++----
 drivers/gpu/drm/xe/xe_migrate.h           |  3 +-
 drivers/gpu/drm/xe/xe_pci.c               |  3 ++
 8 files changed, 80 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 288576035ce37..e60372a827237 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -6,6 +6,8 @@
 #ifndef _XE_GPU_COMMANDS_H_
 #define _XE_GPU_COMMANDS_H_
 
+#include "regs/xe_reg_defs.h"
+
 #define INSTR_CLIENT_SHIFT      29
 #define   INSTR_MI_CLIENT       0x0
 #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
@@ -56,6 +58,13 @@
 #define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
 #define   BLT_DEPTH_32			(3<<24)
 
+#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
+#define   PVC_MEM_SET_CMD_LEN_DW	7
+#define   PVC_MS_MATRIX			REG_BIT(17)
+#define   PVC_MS_DATA_FIELD		GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
+
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
 #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
 #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index f03fb907b59a6..3c60cbdf516c1 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -32,7 +32,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 
 	/* Optionally clear bo *and* CCS data in VRAM. */
 	if (clear) {
-		fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0);
+		fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource);
 		if (IS_ERR(fence)) {
 			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
 			return PTR_ERR(fence);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index e50b6ceb56e63..17829f878757e 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -99,7 +99,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 		      struct kunit *test)
 {
 	struct xe_device *xe = gt_to_xe(m->gt);
-	u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL;
+	u64 retval, expected = 0;
 	bool big = bo->size >= SZ_2M;
 	struct dma_fence *fence;
 	const char *str = big ? "Copying big bo" : "Copying small bo";
@@ -130,7 +130,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	}
 
 	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
-	fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0);
+	fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource);
 	if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
 				 "Clearing sysmem small bo", test)) {
 		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
@@ -311,10 +311,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	bb->len = 0;
 	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
-	expected = 0x12345678U;
+	expected = 0;
 
 	emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
-		   expected, IS_DGFX(xe));
+		   IS_DGFX(xe));
 	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
 		       test);
 
@@ -326,8 +326,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* Clear a small bo */
 	kunit_info(test, "Clearing small buffer object\n");
 	xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
-	expected = 0x224488ff;
-	fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected);
+	expected = 0;
+	fence = xe_migrate_clear(m, tiny, tiny->ttm.resource);
 	if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
 		goto out;
 
@@ -342,11 +342,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		test_copy(m, tiny, test);
 	}
 
-	/* Clear a big bo with a fixed value */
+	/* Clear a big bo */
 	kunit_info(test, "Clearing big buffer object\n");
 	xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
-	expected = 0x11223344U;
-	fence = xe_migrate_clear(m, big, big->ttm.resource, expected);
+	expected = 0;
+	fence = xe_migrate_clear(m, big, big->ttm.resource);
 	if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
 		goto out;
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3ca28f84dff75..ba156a85460c7 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -686,7 +686,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		}
 	} else {
 		if (move_lacks_source)
-			fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0);
+			fence = xe_migrate_clear(gt->migrate, bo, new_mem);
 		else
 			fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
 		if (IS_ERR(fence)) {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 8d99450f0bf45..377a8979bc06d 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -89,6 +89,8 @@ struct xe_device {
 		bool has_4tile;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		bool has_range_tlb_invalidation;
+		/** @has_link_copy_engines: Whether the platform has link copy engines */
+		bool has_link_copy_engine;
 	} info;
 
 	/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 3ee3d707a8ca7..9102fa1d87591 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -747,14 +747,35 @@ err_sync:
 	return fence;
 }
 
-static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
-		      u32 size, u32 pitch, u32 value, bool is_vram)
+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+				 u32 size, u32 pitch)
 {
+	u32 *cs = bb->cs + bb->len;
+	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+	u32 len = PVC_MEM_SET_CMD_LEN_DW;
+
+	*cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
+	*cs++ = pitch - 1;
+	*cs++ = (size / pitch) - 1;
+	*cs++ = pitch - 1;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	*cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
+
+	XE_BUG_ON(cs - bb->cs != len + bb->len);
+
+	bb->len += len;
+}
+
+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
+				 u64 src_ofs, u32 size, u32 pitch, bool is_vram)
+{
+	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
 	u32 len = XY_FAST_COLOR_BLT_DW;
 	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
 
-	if (GRAPHICS_VERx100(gt->xe) < 1250)
+	if (GRAPHICS_VERx100(xe) < 1250)
 		len = 11;
 
 	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
@@ -766,7 +787,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
 	*cs++ = lower_32_bits(src_ofs);
 	*cs++ = upper_32_bits(src_ofs);
 	*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
-	*cs++ = value;
+	*cs++ = 0;
 	*cs++ = 0;
 	*cs++ = 0;
 	*cs++ = 0;
@@ -780,7 +801,30 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
 	}
 
 	XE_BUG_ON(cs - bb->cs != len + bb->len);
+
 	bb->len += len;
+}
+
+static u32 emit_clear_cmd_len(struct xe_device *xe)
+{
+	if (xe->info.has_link_copy_engine)
+		return PVC_MEM_SET_CMD_LEN_DW;
+	else
+		return XY_FAST_COLOR_BLT_DW;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+		      u32 size, u32 pitch, bool is_vram)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.has_link_copy_engine) {
+		emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
+
+	} else {
+		emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
+				     is_vram);
+	}
 
 	return 0;
 }
@@ -790,10 +834,9 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
  * @m: The migration context.
  * @bo: The buffer object @dst is currently bound to.
  * @dst: The dst TTM resource to be cleared.
- * @value: Clear value.
  *
- * Clear the contents of @dst. On flat CCS devices,
- * the CCS metadata is cleared to zero as well on VRAM destionations.
+ * Clear the contents of @dst to zero. On flat CCS devices,
+ * the CCS metadata is cleared to zero as well on VRAM destinations.
  * TODO: Eliminate the @bo argument.
  *
  * Return: Pointer to a dma_fence representing the last clear batch, or
@@ -802,8 +845,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
  */
 struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct xe_bo *bo,
-				   struct ttm_resource *dst,
-				   u32 value)
+				   struct ttm_resource *dst)
 {
 	bool clear_vram = mem_type_is_vram(dst->mem_type);
 	struct xe_gt *gt = m->gt;
@@ -837,7 +879,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		batch_size = 2 +
 			pte_update_size(m, clear_vram, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
-					XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+					emit_clear_cmd_len(xe), 0,
+					NUM_PT_PER_BLIT);
 		if (xe_device_has_flat_ccs(xe) && clear_vram)
 			batch_size += EMIT_COPY_CCS_DW;
 
@@ -868,7 +911,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		update_idx = bb->len;
 
 		emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
-			   value, clear_vram);
+			   clear_vram);
 		if (xe_device_has_flat_ccs(xe) && clear_vram) {
 			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
 				      m->cleared_vram_ofs, false, clear_L0);
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index a569851db6f7f..1ff6e0a90de50 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -79,8 +79,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct xe_bo *bo,
-				   struct ttm_resource *dst,
-				   u32 value);
+				   struct ttm_resource *dst);
 
 struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6dcefb8cc7c37..0a3b61f08d370 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -71,6 +71,7 @@ struct xe_device_desc {
 	bool has_4tile;
 	bool has_range_tlb_invalidation;
 	bool has_asid;
+	bool has_link_copy_engine;
 };
 
 #define PLATFORM(x)		\
@@ -226,6 +227,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.vm_max_level = 4,
 	.supports_usm = true,
 	.has_asid = true,
+	.has_link_copy_engine = true,
 };
 
 #define MTL_MEDIA_ENGINES \
@@ -413,6 +415,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	xe->info.has_flat_ccs = desc->has_flat_ccs;
 	xe->info.has_4tile = desc->has_4tile;
 	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
+	xe->info.has_link_copy_engine = desc->has_link_copy_engine;
 
 	spd = subplatform_get(xe, desc);
 	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
-- 
GitLab


From 793e6612deea5cf8117100b1d47754800b24dcfa Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 21 Mar 2023 11:44:06 +0000
Subject: [PATCH 1419/2340] drm/xe/buddy: add visible tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the allocation code with the i915 version. This simplifies the
code a little, and importantly we get the accounting at the mgr level,
which is useful for debug (and maybe userspace), plus per resource
tracking so we can easily check if a resource is using one or pages in
the mappable part of vram (useful for eviction), or if the resource is
completely within the mappable portion (useful for checking if the
resource can be safely CPU mapped).

v2: Fix missing PAGE_SHIFT
v3: (Gwan-gyeong Mun)
  - Fix incorrect usage of ilog2(mm.chunk_size).
  - Fix calculation when checking for impossible allocation sizes, also
    check much earlier.
v4: (Gwan-gyeong Mun)
  - Fix calculation when extending the [fpfn, lpfn] range due to the
    roundup_pow_of_two().
v5: (Gwan-gyeong Mun)
  - Move the check for running out of mappable VRAM to before doing any of
    the roundup_pow_of_two().
v6: (Jani)
  - Stop abusing BUG_ON(). We can easily just use WARN_ON() here and
    return a proper error to the caller, which is much nicer if we ever
    trigger these.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c     |  18 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c       | 211 +++++++++++----------
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h       |   3 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h |   6 +
 4 files changed, 129 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 9629b1a677f21..31887fec1073b 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -135,7 +135,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 {
 	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	u64 stolen_size, pgsize;
+	u64 stolen_size, io_size, pgsize;
 	int err;
 
 	if (IS_DGFX(xe))
@@ -154,7 +154,17 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	if (pgsize < PAGE_SIZE)
 		pgsize = PAGE_SIZE;
 
-	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, pgsize);
+	/*
+	 * We don't try to attempt partial visible support for stolen vram,
+	 * since stolen is always at the end of vram, and the BAR size is pretty
+	 * much always 256M, with small-bar.
+	 */
+	io_size = 0;
+	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		io_size = stolen_size;
+
+	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
+				     io_size, pgsize);
 	if (err) {
 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
 		return;
@@ -163,8 +173,8 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
 		    stolen_size);
 
-	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
-		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size);
+	if (io_size)
+		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
 }
 
 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index bafcadaed6b07..78979c0b6024f 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -49,45 +49,29 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_resource **res)
 {
-	u64 max_bytes, cur_size, min_block_size;
 	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
 	struct xe_ttm_vram_mgr_resource *vres;
-	u64 size, remaining_size, lpfn, fpfn;
 	struct drm_buddy *mm = &mgr->mm;
-	unsigned long pages_per_block;
-	int r;
-
-	lpfn = (u64)place->lpfn << PAGE_SHIFT;
-	if (!lpfn || lpfn > man->size)
-		lpfn = man->size;
+	u64 size, remaining_size, min_page_size;
+	unsigned long lpfn;
+	int err;
 
-	fpfn = (u64)place->fpfn << PAGE_SHIFT;
+	lpfn = place->lpfn;
+	if (!lpfn || lpfn > man->size >> PAGE_SHIFT)
+		lpfn = man->size >> PAGE_SHIFT;
 
-	max_bytes = mgr->manager.size;
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-		pages_per_block = ~0ul;
-	} else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		pages_per_block = HPAGE_PMD_NR;
-#else
-		/* default to 2MB */
-		pages_per_block = 2UL << (20UL - PAGE_SHIFT);
-#endif
-
-		pages_per_block = max_t(uint32_t, pages_per_block,
-					tbo->page_alignment);
-	}
+	if (tbo->base.size >> PAGE_SHIFT > (lpfn - place->fpfn))
+		return -E2BIG; /* don't trigger eviction for the impossible */
 
 	vres = kzalloc(sizeof(*vres), GFP_KERNEL);
 	if (!vres)
 		return -ENOMEM;
 
 	ttm_resource_init(tbo, place, &vres->base);
-	remaining_size = vres->base.size;
 
 	/* bail out quickly if there's likely not enough VRAM for this BO */
-	if (ttm_resource_manager_usage(man) > max_bytes) {
-		r = -ENOSPC;
+	if (ttm_resource_manager_usage(man) > man->size) {
+		err = -ENOSPC;
 		goto error_fini;
 	}
 
@@ -96,95 +80,100 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
 		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
 
-	if (fpfn || lpfn != man->size)
-		/* Allocate blocks in desired range */
+	if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
-	mutex_lock(&mgr->lock);
-	while (remaining_size) {
-		if (tbo->page_alignment)
-			min_block_size = tbo->page_alignment << PAGE_SHIFT;
-		else
-			min_block_size = mgr->default_page_size;
-
-		XE_BUG_ON(min_block_size < mm->chunk_size);
-
-		/* Limit maximum size to 2GiB due to SG table limitations */
-		size = min(remaining_size, 2ULL << 30);
-
-		if (size >= pages_per_block << PAGE_SHIFT)
-			min_block_size = pages_per_block << PAGE_SHIFT;
-
-		cur_size = size;
-
-		if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
-			/*
-			 * Except for actual range allocation, modify the size and
-			 * min_block_size conforming to continuous flag enablement
-			 */
-			if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-				size = roundup_pow_of_two(size);
-				min_block_size = size;
-			/*
-			 * Modify the size value if size is not
-			 * aligned with min_block_size
-			 */
-			} else if (!IS_ALIGNED(size, min_block_size)) {
-				size = round_up(size, min_block_size);
-			}
-		}
+	if (WARN_ON(!vres->base.size)) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+	size = vres->base.size;
 
-		r = drm_buddy_alloc_blocks(mm, fpfn,
-					   lpfn,
-					   size,
-					   min_block_size,
-					   &vres->blocks,
-					   vres->flags);
-		if (unlikely(r))
-			goto error_free_blocks;
+	min_page_size = mgr->default_page_size;
+	if (tbo->page_alignment)
+		min_page_size = tbo->page_alignment << PAGE_SHIFT;
+
+	if (WARN_ON(min_page_size < mm->chunk_size)) {
+		err = -EINVAL;
+		goto error_fini;
+	}
 
-		if (size > remaining_size)
-			remaining_size = 0;
-		else
-			remaining_size -= size;
+	if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */
+		err = -EINVAL;
+		goto error_fini;
 	}
-	mutex_unlock(&mgr->lock);
 
-	if (cur_size != size) {
-		struct drm_buddy_block *block;
-		struct list_head *trim_list;
-		u64 original_size;
-		LIST_HEAD(temp);
+	if (WARN_ON((size > SZ_2G &&
+		     (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) {
+		err = -EINVAL;
+		goto error_fini;
+	}
 
-		trim_list = &vres->blocks;
-		original_size = vres->base.size;
+	if (WARN_ON(!IS_ALIGNED(size, min_page_size))) {
+		err = -EINVAL;
+		goto error_fini;
+	}
 
+	mutex_lock(&mgr->lock);
+	if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) {
+		mutex_unlock(&mgr->lock);
+		err = -ENOSPC;
+		goto error_fini;
+	}
+
+	if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn &&
+	    place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		size = roundup_pow_of_two(size);
+		min_page_size = size;
+
+		lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
+	}
+
+	remaining_size = size;
+	do {
 		/*
-		 * If size value is rounded up to min_block_size, trim the last
-		 * block to the required size
+		 * Limit maximum size to 2GiB due to SG table limitations.
+		 * FIXME: Should maybe be handled as part of sg construction.
 		 */
-		if (!list_is_singular(&vres->blocks)) {
-			block = list_last_entry(&vres->blocks, typeof(*block), link);
-			list_move_tail(&block->link, &temp);
-			trim_list = &temp;
-			/*
-			 * Compute the original_size value by subtracting the
-			 * last block size with (aligned size - original size)
-			 */
-			original_size = drm_buddy_block_size(mm, block) -
-				(size - cur_size);
-		}
+		u64 alloc_size = min_t(u64, remaining_size, SZ_2G);
+
+		err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					     (u64)lpfn << PAGE_SHIFT,
+					     alloc_size,
+					     min_page_size,
+					     &vres->blocks,
+					     vres->flags);
+		if (err)
+			goto error_free_blocks;
 
-		mutex_lock(&mgr->lock);
-		drm_buddy_block_trim(mm,
-				     original_size,
-				     trim_list);
-		mutex_unlock(&mgr->lock);
+		remaining_size -= alloc_size;
+	} while (remaining_size);
 
-		if (!list_empty(&temp))
-			list_splice_tail(trim_list, &vres->blocks);
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks))
+			size = vres->base.size;
 	}
 
+	if (lpfn <= mgr->visible_size >> PAGE_SHIFT) {
+		vres->used_visible_size = size;
+	} else {
+		struct drm_buddy_block *block;
+
+		list_for_each_entry(block, &vres->blocks, link) {
+			u64 start = drm_buddy_block_offset(block);
+
+			if (start < mgr->visible_size) {
+				u64 end = start + drm_buddy_block_size(mm, block);
+
+				vres->used_visible_size +=
+					min(end, mgr->visible_size) - start;
+			}
+		}
+	}
+
+	mgr->visible_avail -= vres->used_visible_size;
+	mutex_unlock(&mgr->lock);
+
 	if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) &&
 	    xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks))
 		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
@@ -213,7 +202,7 @@ error_fini:
 	ttm_resource_fini(man, &vres->base);
 	kfree(vres);
 
-	return r;
+	return err;
 }
 
 static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
@@ -226,6 +215,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
 
 	mutex_lock(&mgr->lock);
 	drm_buddy_free_list(mm, &vres->blocks);
+	mgr->visible_avail += vres->used_visible_size;
 	mutex_unlock(&mgr->lock);
 
 	ttm_resource_fini(man, res);
@@ -240,6 +230,13 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
 	struct drm_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
+	drm_printf(printer, "visible_avail: %lluMiB\n",
+		   (u64)mgr->visible_avail >> 20);
+	drm_printf(printer, "visible_size: %lluMiB\n",
+		   (u64)mgr->visible_size >> 20);
+
 	drm_buddy_print(mm, printer);
 	mutex_unlock(&mgr->lock);
 	drm_printf(printer, "man size:%llu\n", man->size);
@@ -262,6 +259,8 @@ static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
 	if (ttm_resource_manager_evict_all(&xe->ttm, man))
 		return;
 
+	WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size);
+
 	drm_buddy_fini(&mgr->mm);
 
 	ttm_resource_manager_cleanup(&mgr->manager);
@@ -270,7 +269,8 @@ static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
 }
 
 int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
-			   u32 mem_type, u64 size, u64 default_page_size)
+			   u32 mem_type, u64 size, u64 io_size,
+			   u64 default_page_size)
 {
 	struct ttm_resource_manager *man = &mgr->manager;
 	int err;
@@ -279,6 +279,8 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
 	mgr->mem_type = mem_type;
 	mutex_init(&mgr->lock);
 	mgr->default_page_size = default_page_size;
+	mgr->visible_size = io_size;
+	mgr->visible_avail = io_size;
 
 	ttm_resource_manager_init(man, &xe->ttm, size);
 	err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
@@ -298,7 +300,8 @@ int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
 	mgr->gt = gt;
 
 	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + gt->info.vram_id,
-				      gt->mem.vram.size, PAGE_SIZE);
+				      gt->mem.vram.size, gt->mem.vram.io_size,
+				      PAGE_SIZE);
 }
 
 int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index 78f332d262242..35e5367a79fbd 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -13,7 +13,8 @@ struct xe_device;
 struct xe_gt;
 
 int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
-			   u32 mem_type, u64 size, u64 default_page_size);
+			   u32 mem_type, u64 size, u64 io_size,
+			   u64 default_page_size);
 int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr);
 int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 			      struct ttm_resource *res,
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 39aa2ec1b9680..3d9417ff7434a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -23,6 +23,10 @@ struct xe_ttm_vram_mgr {
 	struct ttm_resource_manager manager;
 	/** @mm: DRM buddy allocator which manages the VRAM */
 	struct drm_buddy mm;
+	/** @visible_size: Proped size of the CPU visible portion */
+	u64 visible_size;
+	/** @visible_avail: CPU visible portion still unallocated */
+	u64 visible_avail;
 	/** @default_page_size: default page size */
 	u64 default_page_size;
 	/** @lock: protects allocations of VRAM */
@@ -39,6 +43,8 @@ struct xe_ttm_vram_mgr_resource {
 	struct ttm_resource base;
 	/** @blocks: list of DRM buddy blocks */
 	struct list_head blocks;
+	/** @used_visible_size: How many CPU visible bytes this resource is using */
+	u64 used_visible_size;
 	/** @flags: flags associated with the resource */
 	unsigned long flags;
 };
-- 
GitLab


From ce79c6c43af7280c1f26d700959d04a7e62092af Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 21 Mar 2023 11:44:07 +0000
Subject: [PATCH 1420/2340] drm/xe/buddy: add compatible and intersects hooks

Copy this from i915. We need .compatible for vram -> vram transfers, so
they don't just get nooped by ttm, if need to move something from
mappable to non-mappble or vice versa. The .intersects is needed for
eviction, to determine if a victim resource is worth eviction. e.g if we
need mappable space there is no point in evicting a resource that has
zero mappable pages.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 62 ++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 78979c0b6024f..73836b9b7fed2 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -242,9 +242,71 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
 	drm_printf(printer, "man size:%llu\n", man->size);
 }
 
+static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	if (!place->fpfn && !place->lpfn)
+		return true;
+
+	if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT)
+		return vres->used_visible_size > 0;
+
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long fpfn =
+			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+		if (place->fpfn < lpfn && place->lpfn > fpfn)
+			return true;
+	}
+
+	return false;
+}
+
+static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	if (!place->fpfn && !place->lpfn)
+		return true;
+
+	if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT)
+		return vres->used_visible_size == size;
+
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long fpfn =
+			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+		if (fpfn < place->fpfn || lpfn > place->lpfn)
+			return false;
+	}
+
+	return true;
+}
+
 static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
 	.alloc	= xe_ttm_vram_mgr_new,
 	.free	= xe_ttm_vram_mgr_del,
+	.intersects = xe_ttm_vram_mgr_intersects,
+	.compatible = xe_ttm_vram_mgr_compatible,
 	.debug	= xe_ttm_vram_mgr_debug
 };
 
-- 
GitLab


From 044f0cfb19473cd1b60a69c802cac0651066fa21 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 20 Mar 2023 10:46:24 -0700
Subject: [PATCH 1421/2340] drm/xe: Drop zero length arrays

Zero-length arrays as fake flexible arrays are deprecated and we are
moving towards adopting C99 flexible-array members instead.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine_types.h    | 2 +-
 drivers/gpu/drm/xe/xe_sched_job_types.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
index 2f6f0f2a0a8bf..a0cd80cb9d7b1 100644
--- a/drivers/gpu/drm/xe/xe_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -164,7 +164,7 @@ struct xe_engine {
 	/** @entity: DRM sched entity for this engine (1 to 1 relationship) */
 	struct drm_sched_entity *entity;
 	/** @lrc: logical ring context for this engine */
-	struct xe_lrc lrc[0];
+	struct xe_lrc lrc[];
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index fd1d75996127b..5534bfacaa168 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -40,7 +40,7 @@ struct xe_sched_job {
 	/** @migrate_flush_flags: Additional flush flags for migration jobs */
 	u32 migrate_flush_flags;
 	/** @batch_addr: batch buffer address of job */
-	u64 batch_addr[0];
+	u64 batch_addr[];
 };
 
 #endif
-- 
GitLab


From 38c04b47cec861cf4007b3e53cbf584e494e2762 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 28 Feb 2023 11:17:30 +0100
Subject: [PATCH 1422/2340] drm/xe: Use atomic instead of mutex for
 xe_device_mem_access_ongoing

xe_guc_ct_fast_path() is called from an irq context, and cannot lock
the mutex used by xe_device_mem_access_ongoing().

Fortunately it is easy to fix, and the atomic guarantees are good enough
to ensure xe->mem_access.hold_rpm is set before last ref is dropped.

As far as I can tell, the runtime ref in device access should be
killable, but don't dare to do it yet.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       | 18 ++++++++----------
 drivers/gpu/drm/xe/xe_device.h       | 14 ++++----------
 drivers/gpu/drm/xe/xe_device_types.h |  4 +---
 3 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 49ce11fc1174f..ffacf80c89422 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -206,8 +206,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	if (err)
 		goto err_put;
 
-	drmm_mutex_init(&xe->drm, &xe->mem_access.lock);
-
 	return xe;
 
 err_put:
@@ -354,25 +352,25 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 void xe_device_mem_access_get(struct xe_device *xe)
 {
 	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
+	int ref = atomic_inc_return(&xe->mem_access.ref);
 
-	mutex_lock(&xe->mem_access.lock);
-	if (xe->mem_access.ref++ == 0)
+	if (ref == 1)
 		xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
-	mutex_unlock(&xe->mem_access.lock);
 
 	/* The usage counter increased if device was immediately resumed */
 	if (resumed)
 		xe_pm_runtime_put(xe);
 
-	XE_WARN_ON(xe->mem_access.ref == S32_MAX);
+	XE_WARN_ON(ref == S32_MAX);
 }
 
 void xe_device_mem_access_put(struct xe_device *xe)
 {
-	mutex_lock(&xe->mem_access.lock);
-	if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm)
+	bool hold = xe->mem_access.hold_rpm;
+	int ref = atomic_dec_return(&xe->mem_access.ref);
+
+	if (!ref && hold)
 		xe_pm_runtime_put(xe);
-	mutex_unlock(&xe->mem_access.lock);
 
-	XE_WARN_ON(xe->mem_access.ref < 0);
+	XE_WARN_ON(ref < 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 25c5087f5aad1..d277f8985f7bf 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -90,20 +90,14 @@ static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
 void xe_device_mem_access_get(struct xe_device *xe);
 void xe_device_mem_access_put(struct xe_device *xe);
 
-static inline void xe_device_assert_mem_access(struct xe_device *xe)
+static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
 {
-	XE_WARN_ON(!xe->mem_access.ref);
+	return atomic_read(&xe->mem_access.ref);
 }
 
-static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
+static inline void xe_device_assert_mem_access(struct xe_device *xe)
 {
-	bool ret;
-
-	mutex_lock(&xe->mem_access.lock);
-	ret = xe->mem_access.ref;
-	mutex_unlock(&xe->mem_access.lock);
-
-	return ret;
+	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
 }
 
 static inline bool xe_device_in_fault_mode(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 377a8979bc06d..3917b9152eb94 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -184,10 +184,8 @@ struct xe_device {
 	 * triggering additional actions when they occur.
 	 */
 	struct {
-		/** @lock: protect the ref count */
-		struct mutex lock;
 		/** @ref: ref count of memory accesses */
-		s32 ref;
+		atomic_t ref;
 		/** @hold_rpm: need to put rpm ref back at the end */
 		bool hold_rpm;
 	} mem_access;
-- 
GitLab


From 4f1411e2dab7a398c31cebbeedebbe11b239c9d9 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 21 Mar 2023 18:16:47 -0700
Subject: [PATCH 1423/2340] drm/xe: Reinstate render / compute cache
 invalidation in ring ops

Render / compute engines have additional caches (not just TLBs) that
need to be invalidated each batch, reinstate these invalidations in ring
ops.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  5 ++++
 drivers/gpu/drm/xe/xe_ring_ops.c          | 30 +++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index e60372a827237..9d6508d74d620 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -66,20 +66,25 @@
 #define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
 
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
 #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
 #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
+#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
 #define   PIPE_CONTROL_PSD_SYNC				(1<<17)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
 #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11)
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10)
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
 #define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
 #define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
 #define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
 #define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
 #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 7dd886536fbc9..5480746d40e81 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -106,6 +106,30 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
 	return i;
 }
 
+static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
+{
+	u32 flags = PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+		PIPE_CONTROL_VF_CACHE_INVALIDATE |
+		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+		PIPE_CONTROL_QW_WRITE |
+		PIPE_CONTROL_STORE_DATA_INDEX;
+
+	flags &= ~mask_flags;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = flags;
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
 #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
 
 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
@@ -212,8 +236,14 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	struct xe_gt *gt = job->engine->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	bool pvc = xe->info.platform == XE_PVC;
+	u32 mask_flags = 0;
 
 	dw[i++] = preparser_disable(true);
+	if (pvc)
+		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
+	i = emit_pipe_invalidate(mask_flags, dw, i);
 	/* Wa_1809175790 */
 	if (!xe->info.has_flat_ccs)
 		i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i);
-- 
GitLab


From 99c5952fe36107ee57fa0ad7115ffa76222a8810 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 Mar 2023 10:35:45 +0000
Subject: [PATCH 1424/2340] drm/xe/gt: some error handling fixes

Make sure we pass along the correct errors.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index daa433d0f2f55..07464ba427469 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -234,7 +234,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
-		return PTR_ERR(bb);
+		return PTR_ERR(job);
 	}
 
 	xe_sched_job_arm(job);
@@ -285,7 +285,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
-		return PTR_ERR(bb);
+		return PTR_ERR(job);
 	}
 
 	xe_sched_job_arm(job);
@@ -545,8 +545,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
 
 	if (!xe_gt_is_media_type(gt)) {
 		gt->migrate = xe_migrate_init(gt);
-		if (IS_ERR(gt->migrate))
+		if (IS_ERR(gt->migrate)) {
+			err = PTR_ERR(gt->migrate);
 			goto err_force_wake;
+		}
 	} else {
 		gt->migrate = xe_find_full_gt(gt)->migrate;
 	}
-- 
GitLab


From 59ea53eecb7154a2ac8aa39f21f16a144be3eecc Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 23 Mar 2023 09:25:00 -0700
Subject: [PATCH 1425/2340] drm/xe: Use BO's GT to determine dma_offset when
 programming PTEs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than using the passed in GT, use the BO's GT determine dma_offset
when programming PTEs as these two GT's could differ (i.e. mapping a BO
from a remote GT). The BO's GT is correct GT to use as this where BO
resides, while the passed in GT is where the mapping is created.

v2:
  (Thomas)      - Kernel doc, extra new line
  (CI)          - Rebase to tip

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 13 +++++++++++++
 drivers/gpu/drm/xe/xe_bo.h |  2 ++
 drivers/gpu/drm/xe/xe_pt.c |  4 +++-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index ba156a85460c7..42a5978ecc742 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -79,6 +79,19 @@ mem_type_to_gt(struct xe_device *xe, u32 mem_type)
 	return xe_device_get_gt(xe, mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0));
 }
 
+/**
+ * xe_bo_to_gt() - Get a GT from a BO's memory location
+ * @bo: The buffer object
+ *
+ * Get a GT from a BO's memory location, should be called on BOs in VRAM only.
+ *
+ * Return: xe_gt object which is closest to the BO
+ */
+struct xe_gt *xe_bo_to_gt(struct xe_bo *bo)
+{
+	return mem_type_to_gt(xe_bo_device(bo), bo->ttm.resource->mem_type);
+}
+
 static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
 			   u32 bo_flags, u32 *c)
 {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 4350845542bf4..e38894c1255d7 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -105,6 +105,8 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 			      u32 bo_flags);
 
+struct xe_gt *xe_bo_to_gt(struct xe_bo *bo);
+
 static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
 {
 	return container_of(bo, struct xe_bo, ttm);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 64da981524559..7aa12f86e55bd 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -758,10 +758,12 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
 	int ret;
 
 	if (is_vram) {
+		struct xe_gt *bo_gt = xe_bo_to_gt(bo);
+
 		xe_walk.default_pte = GEN12_PPGTT_PTE_LM;
 		if (vma && vma->use_atomic_access_pte_bit)
 			xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE;
-		xe_walk.dma_offset = gt->mem.vram.io_start -
+		xe_walk.dma_offset = bo_gt->mem.vram.io_start -
 			gt_to_xe(gt)->mem.vram.io_start;
 		xe_walk.cache = XE_CACHE_WB;
 	} else {
-- 
GitLab


From b4eecedc75c1b75eee359c806fc964f70e0fc983 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 20 Mar 2023 13:58:36 -0700
Subject: [PATCH 1426/2340] drm/xe: Fix potential deadlock handling page faults

Within a class the GuC will hault scheduling if the head of the queue
can't be scheduled the queue will block. This can lead to deadlock if
BCS0-7 all have faults and another engine on BCS0-7 is at head of the
GuC scheduling queue as the migration engine used to fix tthe fault will
be blocked. To work around this set the migration engine to the highest
priority when servicing page faults.

v2 (Maarten): Set priority to kernel once at creation

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9102fa1d87591..b9b2a64447d82 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -360,6 +360,8 @@ struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
 		xe_vm_close_and_put(vm);
 		return ERR_CAST(m->eng);
 	}
+	if (xe->info.supports_usm)
+		m->eng->priority = XE_ENGINE_PRIORITY_KERNEL;
 
 	mutex_init(&m->job_mutex);
 
-- 
GitLab


From 85ea2bd2fd18ec43e2569da3e21c91fc6832b464 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 23 Mar 2023 15:46:50 -0700
Subject: [PATCH 1427/2340] drm/xe/huc: Support for loading unversiond HuC

Follow the new direction of firmware and add macro
support for loading unversioned HuC. Keep HuC
versioned loading support as well for platforms
that fall under force_probe support

Add check to ensure driver does not do any version check
for HuC if going through unversioned load.

v2: unversioned firmware to be the default for platforms
not under force_probe. Maintain versioned firmware macro support
for platforms under force-probe protection.
v3: Minor style and naming adjustments (Lucas)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230323224651.1187366-2-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 53 ++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 47b51ad5b015d..f3e4e3774d682 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -50,18 +50,22 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 	fw_def(DG1,          0, guc_def(dg1,  70, 5, 2)) \
 	fw_def(TIGERLAKE,    0, guc_def(tgl,  70, 5, 2))
 
-#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
-	fw_def(DG1,          0, huc_def(dg1,  7, 9, 3)) \
-	fw_def(TIGERLAKE,    0, huc_def(tgl,  7, 9, 3))
+#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \
+	fw_def(DG1,          0, huc_def(dg1)) \
+	fw_def(TIGERLAKE,    0, huc_def(tgl))
+
+#define __MAKE_HUC_FW_PATH(prefix_, name_) \
+        "i915/" \
+        __stringify(prefix_) "_" name_ ".bin"
 
 #define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
 	"i915/" \
 	__stringify(prefix_) "_" name_ "_" \
 	__stringify(major_) ".bin"
 
-#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
+#define __MAKE_UC_FW_PATH_FULL_VER(prefix_, name_, major_, minor_, patch_) \
         "i915/" \
-       __stringify(prefix_) name_ \
+       __stringify(prefix_) "_" name_ "_" \
        __stringify(major_) "." \
        __stringify(minor_) "." \
        __stringify(patch_) ".bin"
@@ -69,15 +73,19 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
 	__MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_)
 
-#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
-	__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
+#define MAKE_HUC_FW_PATH(prefix_) \
+	__MAKE_HUC_FW_PATH(prefix_, "huc")
+
+#define MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, patch_) \
+	__MAKE_UC_FW_PATH_FULL_VER(prefix_, "huc", major_, minor_, patch_)
+
 
 /* All blobs need to be declared via MODULE_FIRMWARE() */
 #define XE_UC_MODULE_FW(platform_, revid_, uc_) \
 	MODULE_FIRMWARE(uc_);
 
 XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH)
-XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH, MAKE_HUC_FW_PATH_FULL_VER)
 
 /* The below structs and macros are used to iterate across the list of blobs */
 struct __packed uc_fw_blob {
@@ -93,9 +101,12 @@ struct __packed uc_fw_blob {
 	UC_FW_BLOB(major_, minor_, \
 		   MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
 
-#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \
+#define HUC_FW_BLOB(prefix_) \
+	UC_FW_BLOB(0, 0, MAKE_HUC_FW_PATH(prefix_))
+
+#define HUC_FW_VERSION_BLOB(prefix_, major_, minor_, bld_num_) \
 	UC_FW_BLOB(major_, minor_, \
-		   MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_))
+		   MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, bld_num_))
 
 struct __packed uc_fw_platform_requirement {
 	enum xe_platform p;
@@ -122,7 +133,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 		XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
 	};
 	static const struct uc_fw_platform_requirement blobs_huc[] = {
-		XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
+		XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_VERSION_BLOB)
 	};
 	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
 		[XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
@@ -299,15 +310,17 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
 					   css->sw_version);
 
-	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
-	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
-		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
-			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
-			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
-		if (!xe_uc_fw_is_overridden(uc_fw)) {
-			err = -ENOEXEC;
-			goto fail;
+	if (uc_fw->major_ver_wanted) {
+		if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+		    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+			drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+				   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+				   uc_fw->major_ver_found, uc_fw->minor_ver_found,
+				   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+			if (!xe_uc_fw_is_overridden(uc_fw)) {
+				err = -ENOEXEC;
+				goto fail;
+			}
 		}
 	}
 
-- 
GitLab


From 9bddebf1f0f6e7a8a6418dfc14fdaa6233ba0524 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 23 Mar 2023 15:46:51 -0700
Subject: [PATCH 1428/2340] drm/xe: Load HuC on Alderlake S

Alderlake S uses TGL HuC.

Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230323224651.1187366-3-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index f3e4e3774d682..5c3789f670490 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -51,6 +51,7 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 	fw_def(TIGERLAKE,    0, guc_def(tgl,  70, 5, 2))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \
+	fw_def(ALDERLAKE_S,  0, huc_def(tgl)) \
 	fw_def(DG1,          0, huc_def(dg1)) \
 	fw_def(TIGERLAKE,    0, huc_def(tgl))
 
-- 
GitLab


From ef5e3c2f703d05c9d296d8f8ad0a0f48f6c1fcc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Thu, 23 Mar 2023 12:24:59 -0700
Subject: [PATCH 1429/2340] drm/xe: Add max engine priority to xe query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Intel Vulkan driver needs to know what is the maximum priority to fill
a device info struct for applications.

Right now we getting this information by creating a engine and setting
priorities from min to high to know what is the maximum priority for
running process but this leads to info messages to be printed to
dmesg:

xe 0000:03:00.0: [drm] Ioctl argument check failed at drivers/gpu/drm/xe/xe_engine.c:178: value == DRM_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_NICE)

It does not cause any harm but when executing a test suite like
crucible it causes thousands of those messages to be printed.

So here adding one more property to drm_xe_query_config to fetch the
max engine priority.

Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 10 ++++++++--
 drivers/gpu/drm/xe/xe_engine.h |  1 +
 drivers/gpu/drm/xe/xe_query.c  |  3 +++
 include/uapi/drm/xe_drm.h      |  3 ++-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 8011f5827cbee..141cb223ba022 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -169,14 +169,20 @@ struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
 	return e;
 }
 
+enum xe_engine_priority
+xe_engine_device_get_max_priority(struct xe_device *xe)
+{
+	return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH :
+			               XE_ENGINE_PRIORITY_NORMAL;
+}
+
 static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
 			       u64 value, bool create)
 {
 	if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH &&
-			 !capable(CAP_SYS_NICE)))
+	if (XE_IOCTL_ERR(xe, value > xe_engine_device_get_max_priority(xe)))
 		return -EPERM;
 
 	return e->ops->set_priority(e, value);
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
index 1cf7f23c4afd6..b95d9b0408773 100644
--- a/drivers/gpu/drm/xe/xe_engine.h
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -54,5 +54,6 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file);
 int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file);
+enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 0f70945176f60..dd64ff0d2a575 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -12,6 +12,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_engine.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_guc_hwconfig.h"
@@ -194,6 +195,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count;
 	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
 		hweight_long(xe->info.mem_region_mask);
+	config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] =
+		xe_engine_device_get_max_priority(xe);
 
 	if (copy_to_user(query_ptr, config, size)) {
 		kfree(config);
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 32a4265de4022..b3bcb71068506 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -184,7 +184,8 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_VA_BITS			3
 #define XE_QUERY_CONFIG_GT_COUNT		4
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
-#define XE_QUERY_CONFIG_NUM_PARAM		XE_QUERY_CONFIG_MEM_REGION_COUNT + 1
+#define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY	6
+#define XE_QUERY_CONFIG_NUM_PARAM		XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1
 	__u64 info[];
 };
 
-- 
GitLab


From cf667aec0abeda839937cbd92884799b19df1ab7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 24 Mar 2023 09:33:36 -0700
Subject: [PATCH 1430/2340] drm/xe: Decrement fault mode counts in
 xe_vm_close_and_put

Rather waiting for the VM to be destroyed (all refs to VM go to zero),
drop the fault mode counts when the VM is closed in xe_vm_close_and_put.
This avoids a window where user space can create a faulting VM, close
it, and a subsequent creation of a non-faulting VM fails.

v2 (Lucas): Drop VLK reference in commit message

Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Suggested-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 207d20da5c688..49aa4ddedbf27 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1427,6 +1427,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	XE_WARN_ON(!list_empty(&vm->extobj.list));
 	up_write(&vm->lock);
 
+	mutex_lock(&xe->usm.lock);
+	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode--;
+	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode--;
+	mutex_unlock(&xe->usm.lock);
+
 	xe_vm_put(vm);
 }
 
@@ -1469,18 +1476,10 @@ static void vm_destroy_work_func(struct work_struct *w)
 	}
 	xe_vm_unlock(vm, &ww);
 
-	mutex_lock(&xe->usm.lock);
-	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
-		xe->usm.num_vm_in_fault_mode--;
-	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
-		xe->usm.num_vm_in_non_fault_mode--;
-	mutex_unlock(&xe->usm.lock);
-
 	trace_xe_vm_free(vm);
 	dma_fence_put(vm->rebind_fence);
 	dma_resv_fini(&vm->resv);
 	kfree(vm);
-
 }
 
 void xe_vm_free(struct kref *ref)
-- 
GitLab


From 576c6380da47592dc793669c6738742385f1bbf1 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Mar 2023 14:04:10 -0700
Subject: [PATCH 1431/2340] drm/xe/pat: Move PAT setup to a dedicated file

PAT handling is growing in complexity and will continue to do so in
upcoming platforms.  Separate it out to a dedicated file to keep things
tidy.

The code is moved as-is here (aside from a few unused #define's that are
just dropped); further changes will come in future patches.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230324210415.2434992-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  1 +
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 -
 drivers/gpu/drm/xe/xe_gt.c           | 82 +---------------------------
 drivers/gpu/drm/xe/xe_pat.c          | 81 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pat.h          | 13 +++++
 5 files changed, 98 insertions(+), 80 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_pat.c
 create mode 100644 drivers/gpu/drm/xe/xe_pat.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b3426f328d211..6ef80889fddb3 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -71,6 +71,7 @@ xe-y += xe_bb.o \
 	xe_mmio.o \
 	xe_mocs.o \
 	xe_module.o \
+	xe_pat.o \
 	xe_pci.o \
 	xe_pcode.o \
 	xe_pm.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 73b0c0bdde5d0..e33885f429b5b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -69,7 +69,6 @@
 #define GEN12_VE1_AUX_INV			_MMIO(0x42b8)
 #define   AUX_INV				REG_BIT(0)
 
-#define GEN12_PAT_INDEX(index)			_MMIO(0x4800 + (index) * 4)
 #define XEHP_TILE0_ADDR_RANGE			MCR_REG(0x4900)
 #define XEHP_FLAT_CCS_BASE_ADDR			MCR_REG(0x4910)
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 07464ba427469..245117e67e9bc 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -29,6 +29,7 @@
 #include "xe_map.h"
 #include "xe_migrate.h"
 #include "xe_mmio.h"
+#include "xe_pat.h"
 #include "xe_mocs.h"
 #include "xe_reg_sr.h"
 #include "xe_ring_ops.h"
@@ -92,83 +93,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
 	return 0;
 }
 
-/* FIXME: These should be in a common file */
-#define CHV_PPAT_SNOOP			REG_BIT(6)
-#define GEN8_PPAT_AGE(x)		((x)<<4)
-#define GEN8_PPAT_LLCeLLC		(3<<2)
-#define GEN8_PPAT_LLCELLC		(2<<2)
-#define GEN8_PPAT_LLC			(1<<2)
-#define GEN8_PPAT_WB			(3<<0)
-#define GEN8_PPAT_WT			(2<<0)
-#define GEN8_PPAT_WC			(1<<0)
-#define GEN8_PPAT_UC			(0<<0)
-#define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
-#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
-#define GEN12_PPAT_CLOS(x)              ((x)<<2)
-
-static void tgl_setup_private_ppat(struct xe_gt *gt)
-{
-	/* TGL doesn't support LLC or AGE settings */
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB);
-}
-
-static void pvc_setup_private_ppat(struct xe_gt *gt)
-{
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
-			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg,
-			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg,
-			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg,
-			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB);
-}
-
-#define MTL_PPAT_L4_CACHE_POLICY_MASK   REG_GENMASK(3, 2)
-#define MTL_PAT_INDEX_COH_MODE_MASK     REG_GENMASK(1, 0)
-#define MTL_PPAT_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
-#define MTL_PPAT_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
-#define MTL_PPAT_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
-#define MTL_3_COH_2W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
-#define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
-#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
-
-static void mtl_setup_private_ppat(struct xe_gt *gt)
-{
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg,
-			MTL_PPAT_1_WT | MTL_2_COH_1W);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg,
-			MTL_PPAT_3_UC | MTL_2_COH_1W);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg,
-			MTL_PPAT_0_WB | MTL_2_COH_1W);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
-			MTL_PPAT_0_WB | MTL_3_COH_2W);
-}
-
-static void setup_private_ppat(struct xe_gt *gt)
-{
-	struct xe_device *xe = gt_to_xe(gt);
-
-	if (xe->info.platform == XE_METEORLAKE)
-		mtl_setup_private_ppat(gt);
-	else if (xe->info.platform == XE_PVC)
-		pvc_setup_private_ppat(gt);
-	else
-		tgl_setup_private_ppat(gt);
-}
-
 static int gt_ttm_mgr_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
@@ -447,7 +371,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_hw_fence_irq;
 
-	setup_private_ppat(gt);
+	xe_pat_init(gt);
 
 	if (!xe_gt_is_media_type(gt)) {
 		err = xe_ggtt_init(gt, gt->mem.ggtt);
@@ -633,7 +557,7 @@ static int do_gt_restart(struct xe_gt *gt)
 	enum xe_hw_engine_id id;
 	int err;
 
-	setup_private_ppat(gt);
+	xe_pat_init(gt);
 
 	xe_gt_mcr_set_implicit_defaults(gt);
 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
new file mode 100644
index 0000000000000..1cbb3d6ea9493
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_pat.h"
+
+#include "regs/xe_reg_defs.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#define GEN12_PAT_INDEX(index)			_MMIO(0x4800 + (index) * 4)
+
+#define GEN8_PPAT_WB			(3<<0)
+#define GEN8_PPAT_WT			(2<<0)
+#define GEN8_PPAT_WC			(1<<0)
+#define GEN8_PPAT_UC			(0<<0)
+#define GEN12_PPAT_CLOS(x)              ((x)<<2)
+
+static void tgl_setup_private_ppat(struct xe_gt *gt)
+{
+	/* TGL doesn't support LLC or AGE settings */
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB);
+}
+
+static void pvc_setup_private_ppat(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg,
+			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg,
+			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg,
+			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB);
+}
+
+#define MTL_PPAT_L4_CACHE_POLICY_MASK   REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK     REG_GENMASK(1, 0)
+#define MTL_PPAT_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
+static void mtl_setup_private_ppat(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg,
+			MTL_PPAT_1_WT | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg,
+			MTL_PPAT_3_UC | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg,
+			MTL_PPAT_0_WB | MTL_2_COH_1W);
+	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+			MTL_PPAT_0_WB | MTL_3_COH_2W);
+}
+
+void xe_pat_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_METEORLAKE)
+		mtl_setup_private_ppat(gt);
+	else if (xe->info.platform == XE_PVC)
+		pvc_setup_private_ppat(gt);
+	else
+		tgl_setup_private_ppat(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
new file mode 100644
index 0000000000000..659de40081316
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PAT_H_
+#define _XE_PAT_H_
+
+struct xe_gt;
+
+void xe_pat_init(struct xe_gt *gt);
+
+#endif
-- 
GitLab


From 4f843703133970c852cf4661e584bdea55fd1a7a Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Mar 2023 14:04:11 -0700
Subject: [PATCH 1432/2340] drm/xe/pat: Use table-based programming of PAT
 settings

Provide per-platform tables of PAT values rather than per-platform
functions.  This will simplify the handling of unicast vs MCR registers
in the upcoming patches.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230324210415.2434992-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 77 +++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 1cbb3d6ea9493..d1a3e170fb33f 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -17,34 +17,27 @@
 #define GEN8_PPAT_UC			(0<<0)
 #define GEN12_PPAT_CLOS(x)              ((x)<<2)
 
-static void tgl_setup_private_ppat(struct xe_gt *gt)
-{
-	/* TGL doesn't support LLC or AGE settings */
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB);
-}
+const u32 tgl_pat_table[] = {
+	[0] = GEN8_PPAT_WB,
+	[1] = GEN8_PPAT_WC,
+	[2] = GEN8_PPAT_WT,
+	[3] = GEN8_PPAT_UC,
+	[4] = GEN8_PPAT_WB,
+	[5] = GEN8_PPAT_WB,
+	[6] = GEN8_PPAT_WB,
+	[7] = GEN8_PPAT_WB,
+};
 
-static void pvc_setup_private_ppat(struct xe_gt *gt)
-{
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
-			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg,
-			GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg,
-			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg,
-			GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB);
-}
+const u32 pvc_pat_table[] = {
+	[0] = GEN8_PPAT_UC,
+	[1] = GEN8_PPAT_WC,
+	[2] = GEN8_PPAT_WT,
+	[3] = GEN8_PPAT_WB,
+	[4] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT,
+	[5] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB,
+	[6] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT,
+	[7] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB,
+};
 
 #define MTL_PPAT_L4_CACHE_POLICY_MASK   REG_GENMASK(3, 2)
 #define MTL_PAT_INDEX_COH_MODE_MASK     REG_GENMASK(1, 0)
@@ -55,27 +48,27 @@ static void pvc_setup_private_ppat(struct xe_gt *gt)
 #define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
 #define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
 
-static void mtl_setup_private_ppat(struct xe_gt *gt)
-{
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg,
-			MTL_PPAT_1_WT | MTL_2_COH_1W);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg,
-			MTL_PPAT_3_UC | MTL_2_COH_1W);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg,
-			MTL_PPAT_0_WB | MTL_2_COH_1W);
-	xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
-			MTL_PPAT_0_WB | MTL_3_COH_2W);
-}
+const u32 mtl_pat_table[] = {
+	[0] = MTL_PPAT_0_WB,
+	[1] = MTL_PPAT_1_WT | MTL_2_COH_1W,
+	[2] = MTL_PPAT_3_UC | MTL_2_COH_1W,
+	[3] = MTL_PPAT_0_WB | MTL_2_COH_1W,
+	[4] = MTL_PPAT_0_WB | MTL_3_COH_2W,
+};
+
+#define PROGRAM_PAT_UNICAST(gt, table) do { \
+	for (int i = 0; i < ARRAY_SIZE(table); i++) \
+		xe_mmio_write32(gt, GEN12_PAT_INDEX(i).reg, table[i]); \
+} while (0)
 
 void xe_pat_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
 	if (xe->info.platform == XE_METEORLAKE)
-		mtl_setup_private_ppat(gt);
+		PROGRAM_PAT_UNICAST(gt, mtl_pat_table);
 	else if (xe->info.platform == XE_PVC)
-		pvc_setup_private_ppat(gt);
+		PROGRAM_PAT_UNICAST(gt, pvc_pat_table);
 	else
-		tgl_setup_private_ppat(gt);
+		PROGRAM_PAT_UNICAST(gt, tgl_pat_table);
 }
-- 
GitLab


From 152d7f2db978780f6c7e95711c00dc1e0888535b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Mar 2023 14:04:12 -0700
Subject: [PATCH 1433/2340] drm/xe/pat: Handle unicast vs MCR PAT registers

The PAT_INDEX registers are MCR registers on some platforms and unicast
on others.  On MTL the handling even varies between GTs:  the primary GT
uses MCR registers while the media GT uses unicast registers.  Let's add
proper MCR programming on the relevant platforms/GTs.

Given that we PAT tables to change pretty regularly on future platforms,
we'll make PAT programming an exception to the usual model of assuming
new platforms should inherit the previous platform's behavior.  Instead
we'll raise a warning if the current platform isn't handled in the
if/else ladder.  This should help prevent subtle cache misbehavior if we
forget to add the table for a new platform.

Bspec: 66534, 67609, 67788
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230324210415.2434992-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index d1a3e170fb33f..6e3a74aa46e7a 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -7,9 +7,10 @@
 
 #include "regs/xe_reg_defs.h"
 #include "xe_gt.h"
+#include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 
-#define GEN12_PAT_INDEX(index)			_MMIO(0x4800 + (index) * 4)
+#define _PAT_INDEX(index)			(0x4800 + (index) * 4)
 
 #define GEN8_PPAT_WB			(3<<0)
 #define GEN8_PPAT_WT			(2<<0)
@@ -58,17 +59,39 @@ const u32 mtl_pat_table[] = {
 
 #define PROGRAM_PAT_UNICAST(gt, table) do { \
 	for (int i = 0; i < ARRAY_SIZE(table); i++) \
-		xe_mmio_write32(gt, GEN12_PAT_INDEX(i).reg, table[i]); \
+		xe_mmio_write32(gt, _PAT_INDEX(i), table[i]); \
+} while (0)
+
+#define PROGRAM_PAT_MCR(gt, table) do { \
+	for (int i = 0; i < ARRAY_SIZE(table); i++) \
+		xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]); \
 } while (0)
 
 void xe_pat_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (xe->info.platform == XE_METEORLAKE)
-		PROGRAM_PAT_UNICAST(gt, mtl_pat_table);
-	else if (xe->info.platform == XE_PVC)
-		PROGRAM_PAT_UNICAST(gt, pvc_pat_table);
-	else
+	if (xe->info.platform == XE_METEORLAKE) {
+		if (xe_gt_is_media_type(gt))
+			PROGRAM_PAT_UNICAST(gt, mtl_pat_table);
+		else
+			PROGRAM_PAT_MCR(gt, mtl_pat_table);
+	} else if (xe->info.platform == XE_PVC) {
+		PROGRAM_PAT_MCR(gt, pvc_pat_table);
+	} else if (xe->info.platform == XE_DG2) {
+		PROGRAM_PAT_MCR(gt, pvc_pat_table);
+	} else if (GRAPHICS_VERx100(xe) <= 1210) {
 		PROGRAM_PAT_UNICAST(gt, tgl_pat_table);
+	} else {
+		/*
+		 * Going forward we expect to need new PAT settings for most
+		 * new platforms; failure to provide a new table can easily
+		 * lead to subtle, hard-to-debug problems.  If none of the
+		 * conditions above match the platform we're running on we'll
+		 * raise an error rather than trying to silently inherit the
+		 * most recent platform's behavior.
+		 */
+		drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%2d!\n",
+			GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
+	}
 }
-- 
GitLab


From 366974e4a69c09a441eca7802028e60b39903386 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Mar 2023 14:04:13 -0700
Subject: [PATCH 1434/2340] drm/xe/pat: Clean up PAT register definitions

Replace the deprecated "GEN" terminology in the PAT definitions.

Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230324210415.2434992-5-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 73 +++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 6e3a74aa46e7a..d2935ef0e274e 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -12,49 +12,52 @@
 
 #define _PAT_INDEX(index)			(0x4800 + (index) * 4)
 
-#define GEN8_PPAT_WB			(3<<0)
-#define GEN8_PPAT_WT			(2<<0)
-#define GEN8_PPAT_WC			(1<<0)
-#define GEN8_PPAT_UC			(0<<0)
-#define GEN12_PPAT_CLOS(x)              ((x)<<2)
+#define MTL_L4_POLICY_MASK			REG_GENMASK(3, 2)
+#define MTL_PAT_3_UC				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 3)
+#define MTL_PAT_1_WT				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 1)
+#define MTL_PAT_0_WB				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 0)
+#define MTL_INDEX_COH_MODE_MASK			REG_GENMASK(1, 0)
+#define MTL_3_COH_2W				REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W				REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON				REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 0)
+
+#define PVC_CLOS_LEVEL_MASK			REG_GENMASK(3, 2)
+#define PVC_PAT_CLOS(x)				REG_FIELD_PREP(PVC_CLOS_LEVEL_MASK, x)
+
+#define TGL_MEM_TYPE_MASK			REG_GENMASK(1, 0)
+#define TGL_PAT_WB				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 3)
+#define TGL_PAT_WT				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 2)
+#define TGL_PAT_WC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1)
+#define TGL_PAT_UC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0)
 
 const u32 tgl_pat_table[] = {
-	[0] = GEN8_PPAT_WB,
-	[1] = GEN8_PPAT_WC,
-	[2] = GEN8_PPAT_WT,
-	[3] = GEN8_PPAT_UC,
-	[4] = GEN8_PPAT_WB,
-	[5] = GEN8_PPAT_WB,
-	[6] = GEN8_PPAT_WB,
-	[7] = GEN8_PPAT_WB,
+	[0] = TGL_PAT_WB,
+	[1] = TGL_PAT_WC,
+	[2] = TGL_PAT_WT,
+	[3] = TGL_PAT_UC,
+	[4] = TGL_PAT_WB,
+	[5] = TGL_PAT_WB,
+	[6] = TGL_PAT_WB,
+	[7] = TGL_PAT_WB,
 };
 
 const u32 pvc_pat_table[] = {
-	[0] = GEN8_PPAT_UC,
-	[1] = GEN8_PPAT_WC,
-	[2] = GEN8_PPAT_WT,
-	[3] = GEN8_PPAT_WB,
-	[4] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT,
-	[5] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB,
-	[6] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT,
-	[7] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB,
+	[0] = TGL_PAT_UC,
+	[1] = TGL_PAT_WC,
+	[2] = TGL_PAT_WT,
+	[3] = TGL_PAT_WB,
+	[4] = PVC_PAT_CLOS(1) | TGL_PAT_WT,
+	[5] = PVC_PAT_CLOS(1) | TGL_PAT_WB,
+	[6] = PVC_PAT_CLOS(2) | TGL_PAT_WT,
+	[7] = PVC_PAT_CLOS(2) | TGL_PAT_WB,
 };
 
-#define MTL_PPAT_L4_CACHE_POLICY_MASK   REG_GENMASK(3, 2)
-#define MTL_PAT_INDEX_COH_MODE_MASK     REG_GENMASK(1, 0)
-#define MTL_PPAT_3_UC   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
-#define MTL_PPAT_1_WT   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
-#define MTL_PPAT_0_WB   REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
-#define MTL_3_COH_2W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
-#define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
-#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
-
 const u32 mtl_pat_table[] = {
-	[0] = MTL_PPAT_0_WB,
-	[1] = MTL_PPAT_1_WT | MTL_2_COH_1W,
-	[2] = MTL_PPAT_3_UC | MTL_2_COH_1W,
-	[3] = MTL_PPAT_0_WB | MTL_2_COH_1W,
-	[4] = MTL_PPAT_0_WB | MTL_3_COH_2W,
+	[0] = MTL_PAT_0_WB,
+	[1] = MTL_PAT_1_WT | MTL_2_COH_1W,
+	[2] = MTL_PAT_3_UC | MTL_2_COH_1W,
+	[3] = MTL_PAT_0_WB | MTL_2_COH_1W,
+	[4] = MTL_PAT_0_WB | MTL_3_COH_2W,
 };
 
 #define PROGRAM_PAT_UNICAST(gt, table) do { \
-- 
GitLab


From f16a3f6335e84c07de4b5dd263f0c26e3a3fa5a4 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Mar 2023 14:04:14 -0700
Subject: [PATCH 1435/2340] drm/xe/mtl: Fix PAT table coherency settings

Re-sync our MTL PAT table with the bspec.  1-way coherency should only
be set on table entry 3.  We do not want an incorrect setting here to
accidentally paper over other bugs elsewhere in the driver.

Bspec: 45101
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230324210415.2434992-6-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index d2935ef0e274e..e83f7895b8537 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -54,8 +54,8 @@ const u32 pvc_pat_table[] = {
 
 const u32 mtl_pat_table[] = {
 	[0] = MTL_PAT_0_WB,
-	[1] = MTL_PAT_1_WT | MTL_2_COH_1W,
-	[2] = MTL_PAT_3_UC | MTL_2_COH_1W,
+	[1] = MTL_PAT_1_WT,
+	[2] = MTL_PAT_3_UC,
 	[3] = MTL_PAT_0_WB | MTL_2_COH_1W,
 	[4] = MTL_PAT_0_WB | MTL_3_COH_2W,
 };
-- 
GitLab


From 7321a713c6c952d66d5fae8e8478c904b61bb735 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 Mar 2023 14:04:15 -0700
Subject: [PATCH 1436/2340] drm/xe/mtl: Handle PAT_INDEX offset jump

Starting with MTL, the number of entries in the PAT table increased to
16.  The register offset jumped between index 7 and index 8, so a slight
adjustment is needed to ensure the PAT_INDEX macros select the proper
offset for the upper half of the table.

Note that although there are 16 registers in the hardware, the driver is
currently only asked to program the first 5, and we leave the rest at
their hardware default values.  That means we don't actually touch the
upper half of the PAT table in the driver today and this patch won't
have any functional effect [yet].

Bspec: 44235
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230324210415.2434992-7-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index e83f7895b8537..b59b6a2347bbd 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -10,7 +10,9 @@
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 
-#define _PAT_INDEX(index)			(0x4800 + (index) * 4)
+#define _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
+								   0x4800, 0x4804, \
+								   0x4848, 0x484c)
 
 #define MTL_L4_POLICY_MASK			REG_GENMASK(3, 2)
 #define MTL_PAT_3_UC				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 3)
-- 
GitLab


From 8deba79f5deb0a751894a0cf74eff3806e7adfb4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 23 Mar 2023 11:59:21 +0000
Subject: [PATCH 1437/2340] drm/xe: add XE_BO_CREATE_VRAM_MASK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So we don't have to keep repeating VRAM0 | VRAM1. Also if there are ever
more instances, then we have less places to update.

Suggested-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 8 +++-----
 drivers/gpu/drm/xe/xe_bo.h | 2 ++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 42a5978ecc742..448c4305480c3 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -999,8 +999,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 			return bo;
 	}
 
-	if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT |
-		     XE_BO_CREATE_STOLEN_BIT) &&
+	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
 		size = ALIGN(size, SZ_64K);
@@ -1064,8 +1063,7 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 	place->fpfn = start >> PAGE_SHIFT;
 	place->lpfn = end >> PAGE_SHIFT;
 
-	switch (flags & (XE_BO_CREATE_STOLEN_BIT |
-		XE_BO_CREATE_VRAM0_BIT |XE_BO_CREATE_VRAM1_BIT)) {
+	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
 	case XE_BO_CREATE_VRAM0_BIT:
 		place->mem_type = XE_PL_VRAM0;
 		break;
@@ -1771,7 +1769,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
 {
 	return bo->ttm.type == ttm_bo_type_device &&
 		!(bo->flags & XE_BO_CREATE_SYSTEM_BIT) &&
-		(bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT));
+		(bo->flags & XE_BO_CREATE_VRAM_MASK);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index e38894c1255d7..7b2104d1bda77 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -17,6 +17,8 @@
 #define XE_BO_CREATE_SYSTEM_BIT		BIT(1)
 #define XE_BO_CREATE_VRAM0_BIT		BIT(2)
 #define XE_BO_CREATE_VRAM1_BIT		BIT(3)
+#define XE_BO_CREATE_VRAM_MASK		(XE_BO_CREATE_VRAM0_BIT | \
+					 XE_BO_CREATE_VRAM1_BIT)
 /* -- */
 #define XE_BO_CREATE_STOLEN_BIT		BIT(4)
 #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
-- 
GitLab


From e7dc1341f0dab3363baac28044b46237ed251802 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 23 Mar 2023 11:59:22 +0000
Subject: [PATCH 1438/2340] drm/xe/bo: refactor try_add_vram
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Get rid of some of the duplication here. In a future patch we need to
also consider [fpfn, lpfn], so better adjust in only one place.

Suggested-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 86 ++++++++++++++------------------------
 1 file changed, 31 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 448c4305480c3..fc8cb96b2cb98 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -106,55 +106,42 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
 	}
 }
 
-static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
-			  struct ttm_place *places, u32 bo_flags, u32 *c)
+static void add_vram(struct xe_device *xe, struct xe_bo *bo,
+		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
 {
-	struct xe_gt *gt;
+	struct xe_gt *gt = mem_type_to_gt(xe, mem_type);
 
-	if (bo_flags & XE_BO_CREATE_VRAM0_BIT) {
-		gt = mem_type_to_gt(xe, XE_PL_VRAM0);
-		XE_BUG_ON(!gt->mem.vram.size);
+	XE_BUG_ON(!gt->mem.vram.size);
 
-		places[*c] = (struct ttm_place) {
-			.mem_type = XE_PL_VRAM0,
-			/*
-			 * For eviction / restore on suspend / resume objects
-			 * pinned in VRAM must be contiguous
-			 */
-			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
-					     XE_BO_CREATE_GGTT_BIT) ?
-				TTM_PL_FLAG_CONTIGUOUS : 0,
-		};
-		*c += 1;
+	places[*c] = (struct ttm_place) {
+		.mem_type = mem_type,
+		/*
+		 * For eviction / restore on suspend / resume objects
+		 * pinned in VRAM must be contiguous
+		 */
+		.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+				     XE_BO_CREATE_GGTT_BIT) ?
+			TTM_PL_FLAG_CONTIGUOUS : 0,
+	};
+	*c += 1;
 
-		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
-			bo->props.preferred_mem_type = XE_PL_VRAM0;
-	}
+	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+		bo->props.preferred_mem_type = mem_type;
 }
 
-static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
-			  struct ttm_place *places, u32 bo_flags, u32 *c)
+static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
+			 struct ttm_place *places, u32 bo_flags, u32 *c)
 {
-	struct xe_gt *gt;
-
-	if (bo_flags & XE_BO_CREATE_VRAM1_BIT) {
-		gt = mem_type_to_gt(xe, XE_PL_VRAM1);
-		XE_BUG_ON(!gt->mem.vram.size);
-
-		places[*c] = (struct ttm_place) {
-			.mem_type = XE_PL_VRAM1,
-			/*
-			 * For eviction / restore on suspend / resume objects
-			 * pinned in VRAM must be contiguous
-			 */
-			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
-					     XE_BO_CREATE_GGTT_BIT) ?
-				TTM_PL_FLAG_CONTIGUOUS : 0,
-		};
-		*c += 1;
-
-		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
-			bo->props.preferred_mem_type = XE_PL_VRAM1;
+	if (bo->props.preferred_gt == XE_GT1) {
+		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c);
+		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c);
+	} else {
+		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c);
+		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c);
 	}
 }
 
@@ -184,20 +171,9 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 
 	if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) {
 		try_add_system(bo, places, bo_flags, &c);
-		if (bo->props.preferred_gt == XE_GT1) {
-			try_add_vram1(xe, bo, places, bo_flags, &c);
-			try_add_vram0(xe, bo, places, bo_flags, &c);
-		} else {
-			try_add_vram0(xe, bo, places, bo_flags, &c);
-			try_add_vram1(xe, bo, places, bo_flags, &c);
-		}
-	} else if (bo->props.preferred_gt == XE_GT1) {
-		try_add_vram1(xe, bo, places, bo_flags, &c);
-		try_add_vram0(xe, bo, places, bo_flags, &c);
-		try_add_system(bo, places, bo_flags, &c);
+		try_add_vram(xe, bo, places, bo_flags, &c);
 	} else {
-		try_add_vram0(xe, bo, places, bo_flags, &c);
-		try_add_vram1(xe, bo, places, bo_flags, &c);
+		try_add_vram(xe, bo, places, bo_flags, &c);
 		try_add_system(bo, places, bo_flags, &c);
 	}
 	try_add_stolen(xe, bo, places, bo_flags, &c);
-- 
GitLab


From 011d8fa362962424c3f444c1dac3653f86f350b3 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 27 Mar 2023 10:58:24 -0700
Subject: [PATCH 1439/2340] drm/xe/pat: Define PAT tables as static

The tables are only used within this file; there's no reason for them
not to be static.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230327175824.2967914-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index b59b6a2347bbd..c2faf0931649f 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -32,7 +32,7 @@
 #define TGL_PAT_WC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1)
 #define TGL_PAT_UC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0)
 
-const u32 tgl_pat_table[] = {
+static const u32 tgl_pat_table[] = {
 	[0] = TGL_PAT_WB,
 	[1] = TGL_PAT_WC,
 	[2] = TGL_PAT_WT,
@@ -43,7 +43,7 @@ const u32 tgl_pat_table[] = {
 	[7] = TGL_PAT_WB,
 };
 
-const u32 pvc_pat_table[] = {
+static const u32 pvc_pat_table[] = {
 	[0] = TGL_PAT_UC,
 	[1] = TGL_PAT_WC,
 	[2] = TGL_PAT_WT,
@@ -54,7 +54,7 @@ const u32 pvc_pat_table[] = {
 	[7] = PVC_PAT_CLOS(2) | TGL_PAT_WB,
 };
 
-const u32 mtl_pat_table[] = {
+static const u32 mtl_pat_table[] = {
 	[0] = MTL_PAT_0_WB,
 	[1] = MTL_PAT_1_WT,
 	[2] = MTL_PAT_3_UC,
-- 
GitLab


From 33de290bd1792b7e60b1379f1eb9185c481e06eb Mon Sep 17 00:00:00 2001
From: "Chang, Bruce" <yu.bruce.chang@intel.com>
Date: Thu, 23 Mar 2023 19:38:58 +0000
Subject: [PATCH 1440/2340] drm/xe: don't auto fall back to execlist mode if
 guc failed to init

In general, this is due to FW load failure, should just report
error and fail the probe so that user can easily retry again.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 4 ++--
 drivers/gpu/drm/xe/xe_uc.c | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 245117e67e9bc..6322e0689a9e8 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -379,9 +379,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 			goto err_force_wake;
 	}
 
-	/* Allow driver to load if uC init fails (likely missing firmware) */
 	err = xe_uc_init(&gt->uc);
-	XE_WARN_ON(err);
+	if (err)
+		goto err_force_wake;
 
 	err = xe_uc_init_hwconfig(&gt->uc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 4ccf2b3435e16..70eabf5671568 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -54,9 +54,6 @@ int xe_uc_init(struct xe_uc *uc)
 	return 0;
 
 err:
-	/* If any uC firmwares not found, fall back to execlists */
-	xe_device_guc_submission_disable(uc_to_xe(uc));
-
 	return ret;
 }
 
-- 
GitLab


From 3d4451d30f36ffe21f8c5eea7db9678330ee83c4 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 28 Mar 2023 12:30:39 -0700
Subject: [PATCH 1441/2340] drm/xe: Better error messages for
 xe_gt_record_default_lrcs

Add some error messages describing the problem when
xe_gt_record_default_lrcs fails.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 6322e0689a9e8..fd7a5b43ba3e3 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -23,6 +23,7 @@
 #include "xe_gt_sysfs.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
+#include "xe_guc_engine_types.h"
 #include "xe_hw_fence.h"
 #include "xe_irq.h"
 #include "xe_lrc.h"
@@ -257,30 +258,43 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 				     hwe, ENGINE_FLAG_WA);
 		if (IS_ERR(e)) {
 			err = PTR_ERR(e);
+			drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,e failed=%d",
+				gt->info.id, hwe->name, err);
 			goto put_vm;
 		}
 
 		/* Prime golden LRC with known good state */
 		err = emit_wa_job(gt, e);
-		if (err)
+		if (err) {
+			drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_wa_job,e failed=%d",
+				gt->info.id, hwe->name, e->guc->id, err);
 			goto put_engine;
+		}
 
 		nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance),
 					 1, hwe, ENGINE_FLAG_WA);
 		if (IS_ERR(nop_e)) {
 			err = PTR_ERR(nop_e);
+			drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,nop_e failed=%d",
+				gt->info.id, hwe->name, err);
 			goto put_engine;
 		}
 
 		/* Switch to different LRC */
 		err = emit_nop_job(gt, nop_e);
-		if (err)
+		if (err) {
+			drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,nop_e failed=%d",
+				gt->info.id, hwe->name, nop_e->guc->id, err);
 			goto put_nop_e;
+		}
 
 		/* Reload golden LRC to record the effect of any indirect W/A */
 		err = emit_nop_job(gt, e);
-		if (err)
+		if (err) {
+			drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,e failed=%d",
+				gt->info.id, hwe->name, e->guc->id, err);
 			goto put_nop_e;
+		}
 
 		xe_map_memcpy_from(xe, default_lrc,
 				   &e->lrc[0].bo->vmap,
-- 
GitLab


From 681818fdb97de821cc1ee6b81c7a09f3ef8fc96d Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 29 Mar 2023 10:33:32 -0700
Subject: [PATCH 1442/2340] drm/xe: Include hardware prefetch buffer in
 batchbuffer allocations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hardware prefetches several cachelines of data from batchbuffers
before they are parsed.  This prefetching only stops when the parser
encounters an MI_BATCH_BUFFER_END instruction (or a nested
MI_BATCH_BUFFER_START), so we must ensure that there is enough padding
at the end of the batchbuffer to prevent the prefetcher from running
past the end of the allocation and potentially faulting.

Bspec: 45717
Link: https://lore.kernel.org/r/20230329173334.4015124-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 5b24018e2a80b..f326f117ba3bb 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -8,11 +8,26 @@
 #include "regs/xe_gpu_commands.h"
 #include "xe_device.h"
 #include "xe_engine_types.h"
+#include "xe_gt.h"
 #include "xe_hw_fence.h"
 #include "xe_sa.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 
+static int bb_prefetch(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt->xe;
+
+	if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
+		/*
+		 * RCS and CCS require 1K, although other engines would be
+		 * okay with 512.
+		 */
+		return SZ_1K;
+	else
+		return SZ_512;
+}
+
 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 {
 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
@@ -21,8 +36,14 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 	if (!bb)
 		return ERR_PTR(-ENOMEM);
 
-	bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool :
-			      &gt->usm.bb_pool, 4 * dwords + 4);
+	/*
+	 * We need to allocate space for the requested number of dwords,
+	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
+	 * space to accomodate the platform-specific hardware prefetch
+	 * requirements.
+	 */
+	bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool : &gt->usm.bb_pool,
+			      4 * (dwords + 1) + bb_prefetch(gt));
 	if (IS_ERR(bb->bo)) {
 		err = PTR_ERR(bb->bo);
 		goto err;
-- 
GitLab


From 9b36f7af2024ef30866f5fa0b1132ca924fd81fc Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 29 Mar 2023 10:33:33 -0700
Subject: [PATCH 1443/2340] drm/xe: Adjust batchbuffer space warning when
 creating a job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We should WARN (not BUG) when creating a job if the batchbuffer does not
have sufficient space and padding.  The hardware prefetch requirements
should also be considered.

Link: https://lore.kernel.org/r/20230329173334.4015124-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index f326f117ba3bb..7172801ee5705 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -63,10 +63,10 @@ __xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
 {
 	u32 size = drm_suballoc_size(bb->bo);
 
-	XE_BUG_ON((bb->len * 4 + 1) > size);
-
 	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 
+	WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt) > size);
+
 	xe_sa_bo_flush_write(bb->bo);
 
 	return xe_sched_job_create(kernel_eng, addr);
-- 
GitLab


From 1bf1d86f12d4d07108d480878193acd1e4d87668 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 29 Mar 2023 10:33:34 -0700
Subject: [PATCH 1444/2340] drm/xe: Don't emit extra MI_BATCH_BUFFER_END in WA
 batchbuffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MI_BATCH_BUFFER_END is already added automatically by
__xe_bb_create_job(); including it in the construction of the workaround
batchbuffer results in an unnecessary duplicate.

Link: https://lore.kernel.org/r/20230329173334.4015124-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index fd7a5b43ba3e3..bc821f431c45a 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -203,8 +203,6 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 			bb->cs[bb->len++] = entry->set_bits;
 		}
 	}
-	bb->cs[bb->len++] = MI_NOOP;
-	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 
 	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
-- 
GitLab


From 6b8ddaf3721e86bacc0be72bf12fa76233b9becf Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 23 Mar 2023 22:17:52 -0700
Subject: [PATCH 1445/2340] drm/xe: Remove unused revid from firmware name

The rev field is always 0 so it ends up never used. In i915 it was
introduced because of CML: up to rev 5 it reuses the guc and huc
firmware blobs from KBL. After that there is a specific firmware for
that platform.  This can be reintroduced later if ever needed.

With the removal of revid the packed attribute in
uc_fw_platform_requirement, which is there only for reducing the space
these tables take, can also be removed since it has even more limited
usefulness: currently there's only padding of 2 bytes. Remove the
attribute to avoid the unaligned access.

	$ pahole -C uc_fw_platform_requirement build64/drivers/gpu/drm/xe/xe_uc_fw.o
	struct uc_fw_platform_requirement {
		enum xe_platform           p;                    /*     0     4 */
		const struct uc_fw_blob    blob;                 /*     4    10 */

		/* size: 16, cachelines: 1, members: 2 */
		/* padding: 2 */
		/* last cacheline: 16 bytes */
	};

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230324051754.1346390-2-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5c3789f670490..a9107e86b81f5 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -39,21 +39,21 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 
 /*
  * List of required GuC and HuC binaries per-platform.
- * Must be ordered based on platform + revid, from newer to older.
+ * Must be ordered based on platform, from newer to older.
  */
 #define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
-	fw_def(METEORLAKE,   0, guc_def(mtl,  70, 5, 2)) \
-	fw_def(ALDERLAKE_P,  0, guc_def(adlp,  70, 5, 2)) \
-	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 5, 2)) \
-	fw_def(PVC,          0, guc_def(pvc,  70, 5, 2)) \
-	fw_def(DG2,          0, guc_def(dg2,  70, 5, 2)) \
-	fw_def(DG1,          0, guc_def(dg1,  70, 5, 2)) \
-	fw_def(TIGERLAKE,    0, guc_def(tgl,  70, 5, 2))
+	fw_def(METEORLAKE,   guc_def(mtl,  70, 5, 2)) \
+	fw_def(ALDERLAKE_P,  guc_def(adlp,  70, 5, 2)) \
+	fw_def(ALDERLAKE_S,  guc_def(tgl,  70, 5, 2)) \
+	fw_def(PVC,          guc_def(pvc,  70, 5, 2)) \
+	fw_def(DG2,          guc_def(dg2,  70, 5, 2)) \
+	fw_def(DG1,          guc_def(dg1,  70, 5, 2)) \
+	fw_def(TIGERLAKE,    guc_def(tgl,  70, 5, 2))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \
-	fw_def(ALDERLAKE_S,  0, huc_def(tgl)) \
-	fw_def(DG1,          0, huc_def(dg1)) \
-	fw_def(TIGERLAKE,    0, huc_def(tgl))
+	fw_def(ALDERLAKE_S,	huc_def(tgl)) \
+	fw_def(DG1,		huc_def(dg1)) \
+	fw_def(TIGERLAKE,	huc_def(tgl))
 
 #define __MAKE_HUC_FW_PATH(prefix_, name_) \
         "i915/" \
@@ -82,7 +82,7 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 
 
 /* All blobs need to be declared via MODULE_FIRMWARE() */
-#define XE_UC_MODULE_FW(platform_, revid_, uc_) \
+#define XE_UC_MODULE_FW(platform_, uc_) \
 	MODULE_FIRMWARE(uc_);
 
 XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH)
@@ -109,16 +109,14 @@ struct __packed uc_fw_blob {
 	UC_FW_BLOB(major_, minor_, \
 		   MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, bld_num_))
 
-struct __packed uc_fw_platform_requirement {
+struct uc_fw_platform_requirement {
 	enum xe_platform p;
-	u8 rev; /* first platform rev using this FW */
 	const struct uc_fw_blob blob;
 };
 
-#define MAKE_FW_LIST(platform_, revid_, uc_) \
+#define MAKE_FW_LIST(platform_, uc_) \
 { \
 	.p = XE_##platform_, \
-	.rev = revid_, \
 	.blob = uc_, \
 },
 
@@ -143,7 +141,6 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	static const struct uc_fw_platform_requirement *fw_blobs;
 	enum xe_platform p = xe->info.platform;
 	u32 fw_count;
-	u8 rev = xe->info.revid;
 	int i;
 
 	XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
@@ -151,7 +148,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	fw_count = blobs_all[uc_fw->type].count;
 
 	for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
-		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
+		if (p == fw_blobs[i].p) {
 			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
 
 			uc_fw->path = blob->path;
-- 
GitLab


From f7339fe79654c2b63634d65eb72c089d45029065 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Thu, 30 Mar 2023 21:41:05 +0000
Subject: [PATCH 1446/2340] drm/xe/tests: Use proper batch base address

In xe_migrate_sanity_kunit test, use proper batch base address
by considering usm case.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 17829f878757e..90f4e1c4f029f 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -35,8 +35,9 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
 			  struct xe_bb *bb, u32 second_idx, const char *str,
 			  struct kunit *test)
 {
+	u64 batch_base = xe_migrate_batch_base(m, xe->info.supports_usm);
 	struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb,
-							      m->batch_base_ofs,
+							      batch_base,
 							      second_idx);
 	struct dma_fence *fence;
 
-- 
GitLab


From 370997d168d64e84c12164bffdd326fd240a9790 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Fri, 31 Mar 2023 16:40:12 +0000
Subject: [PATCH 1447/2340] drm/xe/tests: Set correct expectation

In xe_migrate_sanity_kunit test, use correct expected value as
the expected value was not only used for the xe_migrate_clear(),
but also for the xe_migrate_copy() operation.

v2: Add 'Fixes' tag and update commit text

Fixes: 11a2407ed5f0 ("drm/xe: Stop accepting value in xe_migrate_clear")
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 90f4e1c4f029f..862d11b2210f9 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -147,6 +147,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size);
 	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
 
+	expected = 0xc0c0c0c0c0c0c0c0;
 	fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource,
 				bo->ttm.resource);
 	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" :
-- 
GitLab


From c33a721943f46851f10eb34852a3fd1fedcd3639 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Fri, 31 Mar 2023 16:52:50 +0000
Subject: [PATCH 1448/2340] drm/xe: Use proper vram offset

In xe_migrate functions, use proper vram io offset of the
tiles while calculating addresses.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c      | 12 ++++++------
 drivers/gpu/drm/xe/xe_bo.h      |  1 +
 drivers/gpu/drm/xe/xe_migrate.c | 13 ++++++++-----
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index fc8cb96b2cb98..1835f049c21ea 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1206,12 +1206,12 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
  * XXX: This is in the VM bind data path, likely should calculate this once and
  * store, with a recalculation if the BO is moved.
  */
-static uint64_t vram_region_io_offset(struct xe_bo *bo)
+uint64_t vram_region_io_offset(struct ttm_resource *res)
 {
-	struct xe_device *xe = xe_bo_device(bo);
-	struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type);
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+	struct xe_gt *gt = mem_type_to_gt(xe, res->mem_type);
 
-	if (bo->ttm.resource->mem_type == XE_PL_STOLEN)
+	if (res->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_gpu_offset(xe);
 
 	return gt->mem.vram.io_start - xe->mem.vram.io_start;
@@ -1298,7 +1298,7 @@ int xe_bo_pin(struct xe_bo *bo)
 			XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
 
 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) -
-				       vram_region_io_offset(bo)) >> PAGE_SHIFT;
+				       vram_region_io_offset(bo->ttm.resource)) >> PAGE_SHIFT;
 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
 
 			spin_lock(&xe->pinned.lock);
@@ -1442,7 +1442,7 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
 
 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
 			     page_size, &cur);
-		return cur.start + offset + vram_region_io_offset(bo);
+		return cur.start + offset + vram_region_io_offset(bo->ttm.resource);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 7b2104d1bda77..dd58edcb93987 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -224,6 +224,7 @@ void xe_bo_vunmap(struct xe_bo *bo);
 bool mem_type_is_vram(u32 mem_type);
 bool xe_bo_is_vram(struct xe_bo *bo);
 bool xe_bo_is_stolen(struct xe_bo *bo);
+uint64_t vram_region_io_offset(struct ttm_resource *res);
 
 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index b9b2a64447d82..13cfb7ad28500 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -392,6 +392,7 @@ static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur)
 
 static u32 pte_update_size(struct xe_migrate *m,
 			   bool is_vram,
+			   struct ttm_resource *res,
 			   struct xe_res_cursor *cur,
 			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
 			   u32 cmd_size, u32 pt_ofs, u32 avail_pts)
@@ -417,7 +418,8 @@ static u32 pte_update_size(struct xe_migrate *m,
 		cmds += cmd_size;
 	} else {
 		/* Offset into identity map. */
-		*L0_ofs = xe_migrate_vram_ofs(cur->start);
+		*L0_ofs = xe_migrate_vram_ofs(cur->start +
+					      vram_region_io_offset(res));
 		cmds += cmd_size;
 	}
 
@@ -467,6 +469,7 @@ static void emit_pte(struct xe_migrate *m,
 					addr |= GEN12_PTE_PS64;
 				}
 
+				addr += vram_region_io_offset(bo->ttm.resource);
 				addr |= GEN12_PPGTT_PTE_LM;
 			}
 			addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
@@ -646,17 +649,17 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		src_L0 = min(src_L0, dst_L0);
 
-		batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0,
+		batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
 					      &src_L0_ofs, &src_L0_pt, 0, 0,
 					      NUM_PT_PER_BLIT);
 
-		batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0,
+		batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0,
 					      &dst_L0_ofs, &dst_L0_pt, 0,
 					      NUM_PT_PER_BLIT, NUM_PT_PER_BLIT);
 
 		if (copy_system_ccs) {
 			ccs_size = xe_device_ccs_bytes(xe, src_L0);
-			batch_size += pte_update_size(m, false, &ccs_it, &ccs_size,
+			batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size,
 						      &ccs_ofs, &ccs_pt, 0,
 						      2 * NUM_PT_PER_BLIT,
 						      NUM_PT_PER_BLIT);
@@ -879,7 +882,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		/* Calculate final sizes and batch size.. */
 		batch_size = 2 +
-			pte_update_size(m, clear_vram, &src_it,
+			pte_update_size(m, clear_vram, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
 					emit_clear_cmd_len(xe), 0,
 					NUM_PT_PER_BLIT);
-- 
GitLab


From 96578d106b30dc3a6550624477a092d793052660 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 31 Mar 2023 16:09:02 -0700
Subject: [PATCH 1449/2340] drm/xe: Fix platform order

Platform order in enum xe_platform started to be used by some parts of
the code, like the GuC/HuC firmware loading logic. The order itself is
not very important, but it's better to follow a convention: as was
documented in the comment above the enum, reorder the platforms by
graphics version. While at it, remove the gen terminology.

v2:
  - Use "graphics version" instead of chronological order (Matt Roper)
  - Also change pciidlist to follow the same order
  - Remove "gen" from comments around enum xe_platform

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230331230902.1603294-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c            |  4 ++--
 drivers/gpu/drm/xe/xe_platform_types.h | 12 +++++++-----
 drivers/gpu/drm/xe/xe_uc_fw.c          |  4 ++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 0a3b61f08d370..e7bfcc5f51c26 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -281,11 +281,11 @@ static const struct xe_device_desc mtl_desc = {
  */
 static const struct pci_device_id pciidlist[] = {
 	XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
 	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
 	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
 	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
-	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
-	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
 	XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
 	{ }
 };
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 72612c832e88b..80c19bffe79c7 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -6,27 +6,29 @@
 #ifndef _XE_PLATFORM_INFO_TYPES_H_
 #define _XE_PLATFORM_INFO_TYPES_H_
 
-/* Keep in gen based order, and chronological order within a gen */
+/*
+ * Keep this in graphics version based order and chronological order within a
+ * version
+ */
 enum xe_platform {
 	XE_PLATFORM_UNINITIALIZED = 0,
-	/* gen12 */
 	XE_TIGERLAKE,
 	XE_ROCKETLAKE,
+	XE_ALDERLAKE_S,
+	XE_ALDERLAKE_P,
 	XE_DG1,
 	XE_DG2,
 	XE_PVC,
-	XE_ALDERLAKE_S,
-	XE_ALDERLAKE_P,
 	XE_METEORLAKE,
 };
 
 enum xe_subplatform {
 	XE_SUBPLATFORM_UNINITIALIZED = 0,
 	XE_SUBPLATFORM_NONE,
+	XE_SUBPLATFORM_ADLP_RPLU,
 	XE_SUBPLATFORM_DG2_G10,
 	XE_SUBPLATFORM_DG2_G11,
 	XE_SUBPLATFORM_DG2_G12,
-	XE_SUBPLATFORM_ADLP_RPLU,
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index a9107e86b81f5..777fa6f523dca 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -43,11 +43,11 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
  */
 #define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
 	fw_def(METEORLAKE,   guc_def(mtl,  70, 5, 2)) \
-	fw_def(ALDERLAKE_P,  guc_def(adlp,  70, 5, 2)) \
-	fw_def(ALDERLAKE_S,  guc_def(tgl,  70, 5, 2)) \
 	fw_def(PVC,          guc_def(pvc,  70, 5, 2)) \
 	fw_def(DG2,          guc_def(dg2,  70, 5, 2)) \
 	fw_def(DG1,          guc_def(dg1,  70, 5, 2)) \
+	fw_def(ALDERLAKE_P,  guc_def(adlp,  70, 5, 2)) \
+	fw_def(ALDERLAKE_S,  guc_def(tgl,  70, 5, 2)) \
 	fw_def(TIGERLAKE,    guc_def(tgl,  70, 5, 2))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \
-- 
GitLab


From 1a545ed74b33eaf6dee6d4159be07819ad89a569 Mon Sep 17 00:00:00 2001
From: "Chang, Bruce" <yu.bruce.chang@intel.com>
Date: Mon, 3 Apr 2023 22:20:31 +0000
Subject: [PATCH 1450/2340] drm/xe: fix pvc unload issue

Currently, unload pvc driver will generate a null dereference
and the call stack is as below.

[ 4850.618000] Call Trace:
[ 4850.620740]  <TASK>
[ 4850.623134]  ttm_bo_cleanup_memtype_use+0x3f/0x50 [ttm]
[ 4850.628661]  ttm_bo_release+0x154/0x2c0 [ttm]
[ 4850.633317]  ? drm_buddy_fini+0x62/0x80 [drm_buddy]
[ 4850.638487]  ? __kmem_cache_free+0x27d/0x2c0
[ 4850.643054]  ttm_bo_put+0x38/0x60 [ttm]
[ 4850.647190]  xe_gem_object_free+0x1f/0x30 [xe]
[ 4850.651945]  drm_gem_object_free+0x1e/0x30 [drm]
[ 4850.656904]  ggtt_fini_noalloc+0x9d/0xe0 [xe]
[ 4850.661574]  drm_managed_release+0xb5/0x150 [drm]
[ 4850.666617]  drm_dev_release+0x30/0x50 [drm]
[ 4850.671209]  devm_drm_dev_init_release+0x3c/0x60 [drm]

There are a couple issues, but the main one is due to TTM has only
one TTM_PL_TT region, but since pvc has 2 tiles and tries to setup
1 TTM_PL_TT each tile. The second will overwrite the first one.

During unload time, the first tile will reset the TTM_PL_TT manger
and when the second tile is trying to free Bo and it will generate
the null reference since the TTM manage is already got reset to 0.

The fix is to use one global TTM_PL_TT manager.

v2: make gtt mgr global and change the name to sys_mgr

Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Vivi, Rodrigo <rodrigo.vivi@intel.com>
Signed-off-by: Bruce Chang <yu.bruce.chang@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                   |  2 +-
 drivers/gpu/drm/xe/xe_device.c                |  3 +
 drivers/gpu/drm/xe/xe_device.h                |  1 +
 drivers/gpu/drm/xe/xe_device_types.h          |  2 +
 drivers/gpu/drm/xe/xe_gt.c                    | 18 -----
 drivers/gpu/drm/xe/xe_gt_types.h              |  2 -
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h           | 16 -----
 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h     | 18 -----
 .../xe/{xe_ttm_gtt_mgr.c => xe_ttm_sys_mgr.c} | 69 ++++++++-----------
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.h           | 13 ++++
 10 files changed, 47 insertions(+), 97 deletions(-)
 delete mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
 delete mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
 rename drivers/gpu/drm/xe/{xe_ttm_gtt_mgr.c => xe_ttm_sys_mgr.c} (52%)
 create mode 100644 drivers/gpu/drm/xe/xe_ttm_sys_mgr.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 6ef80889fddb3..42459727e67ac 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -88,7 +88,7 @@ xe-y += xe_bb.o \
 	xe_step.o \
 	xe_sync.o \
 	xe_trace.o \
-	xe_ttm_gtt_mgr.o \
+	xe_ttm_sys_mgr.o \
 	xe_ttm_stolen_mgr.o \
 	xe_ttm_vram_mgr.o \
 	xe_tuning.o \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ffacf80c89422..b13bbdeeef51a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -27,6 +27,7 @@
 #include "xe_pm.h"
 #include "xe_query.h"
 #include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
 #include "xe_vm.h"
 #include "xe_vm_madvise.h"
 #include "xe_wait_user_fence.h"
@@ -262,6 +263,8 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		goto err_irq_shutdown;
 
+	xe_ttm_sys_mgr_init(xe);
+
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_init_noalloc(gt);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index d277f8985f7bf..cbae480a20922 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -116,4 +116,5 @@ static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
 }
 
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 3917b9152eb94..74326091bf98f 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -134,6 +134,8 @@ struct xe_device {
 			/** @mapping: pointer to VRAM mappable space */
 			void *__iomem mapping;
 		} vram;
+		/** @sys_mgr: system TTM manager */
+		struct ttm_resource_manager sys_mgr;
 	} mem;
 
 	/** @usm: unified memory state */
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index bc821f431c45a..daaf93e23bbfc 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -36,7 +36,6 @@
 #include "xe_ring_ops.h"
 #include "xe_sa.h"
 #include "xe_sched_job.h"
-#include "xe_ttm_gtt_mgr.h"
 #include "xe_ttm_vram_mgr.h"
 #include "xe_tuning.h"
 #include "xe_uc.h"
@@ -77,16 +76,11 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
 		if (!gt->mem.vram_mgr)
 			return -ENOMEM;
 
-		gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr),
-					       GFP_KERNEL);
-		if (!gt->mem.gtt_mgr)
-			return -ENOMEM;
 	} else {
 		struct xe_gt *full_gt = xe_find_full_gt(gt);
 
 		gt->mem.ggtt = full_gt->mem.ggtt;
 		gt->mem.vram_mgr = full_gt->mem.vram_mgr;
-		gt->mem.gtt_mgr = full_gt->mem.gtt_mgr;
 	}
 
 	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
@@ -98,26 +92,14 @@ static int gt_ttm_mgr_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
-	struct sysinfo si;
-	u64 gtt_size;
-
-	si_meminfo(&si);
-	gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
 
 	if (gt->mem.vram.size) {
 		err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr);
 		if (err)
 			return err;
-		gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20),
-				   (u64)gt->mem.vram.size),
-			       gtt_size);
 		xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1;
 	}
 
-	err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size);
-	if (err)
-		return err;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 8f29aba455e05..9d3117fad2e4b 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -162,8 +162,6 @@ struct xe_gt {
 		} vram;
 		/** @vram_mgr: VRAM TTM manager */
 		struct xe_ttm_vram_mgr *vram_mgr;
-		/** @gtt_mr: GTT TTM manager */
-		struct xe_ttm_gtt_mgr *gtt_mgr;
 		/** @ggtt: Global graphics translation table */
 		struct xe_ggtt *ggtt;
 	} mem;
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
deleted file mode 100644
index d1d57cb9c2b89..0000000000000
--- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _XE_TTGM_GTT_MGR_H_
-#define _XE_TTGM_GTT_MGR_H_
-
-#include "xe_ttm_gtt_mgr_types.h"
-
-struct xe_gt;
-
-int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
-			u64 gtt_size);
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
deleted file mode 100644
index c66737488326f..0000000000000
--- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _XE_TTM_GTT_MGR_TYPES_H_
-#define _XE_TTM_GTT_MGR_TYPES_H_
-
-#include <drm/ttm/ttm_device.h>
-
-struct xe_gt;
-
-struct xe_ttm_gtt_mgr {
-	struct xe_gt *gt;
-	struct ttm_resource_manager manager;
-};
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
similarity index 52%
rename from drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
rename to drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 8075781070f27..5b0674bbb8ed0 100644
--- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -4,6 +4,8 @@
  * Copyright (C) 2021-2002 Red Hat
  */
 
+#include "xe_ttm_sys_mgr.h"
+
 #include <drm/drm_managed.h>
 
 #include <drm/ttm/ttm_placement.h>
@@ -12,31 +14,24 @@
 
 #include "xe_bo.h"
 #include "xe_gt.h"
-#include "xe_ttm_gtt_mgr.h"
 
-struct xe_ttm_gtt_node {
+struct xe_ttm_sys_node {
 	struct ttm_buffer_object *tbo;
 	struct ttm_range_mgr_node base;
 };
 
-static inline struct xe_ttm_gtt_mgr *
-to_gtt_mgr(struct ttm_resource_manager *man)
-{
-	return container_of(man, struct xe_ttm_gtt_mgr, manager);
-}
-
-static inline struct xe_ttm_gtt_node *
-to_xe_ttm_gtt_node(struct ttm_resource *res)
+static inline struct xe_ttm_sys_node *
+to_xe_ttm_sys_node(struct ttm_resource *res)
 {
-	return container_of(res, struct xe_ttm_gtt_node, base.base);
+	return container_of(res, struct xe_ttm_sys_node, base.base);
 }
 
-static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man,
+static int xe_ttm_sys_mgr_new(struct ttm_resource_manager *man,
 			      struct ttm_buffer_object *tbo,
 			      const struct ttm_place *place,
 			      struct ttm_resource **res)
 {
-	struct xe_ttm_gtt_node *node;
+	struct xe_ttm_sys_node *node;
 	int r;
 
 	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
@@ -66,32 +61,31 @@ err_fini:
 	return r;
 }
 
-static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man,
+static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man,
 			       struct ttm_resource *res)
 {
-	struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res);
+	struct xe_ttm_sys_node *node = to_xe_ttm_sys_node(res);
 
 	ttm_resource_fini(man, res);
 	kfree(node);
 }
 
-static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man,
+static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man,
 				 struct drm_printer *printer)
 {
 
 }
 
-static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = {
-	.alloc = xe_ttm_gtt_mgr_new,
-	.free = xe_ttm_gtt_mgr_del,
-	.debug = xe_ttm_gtt_mgr_debug
+static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
+	.alloc = xe_ttm_sys_mgr_new,
+	.free = xe_ttm_sys_mgr_del,
+	.debug = xe_ttm_sys_mgr_debug
 };
 
-static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg)
+static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg)
 {
-	struct xe_ttm_gtt_mgr *mgr = arg;
-	struct xe_device *xe = gt_to_xe(mgr->gt);
-	struct ttm_resource_manager *man = &mgr->manager;
+	struct xe_device *xe = (struct xe_device *)arg;
+	struct ttm_resource_manager *man = &xe->mem.sys_mgr;
 	int err;
 
 	ttm_resource_manager_set_used(man, false);
@@ -104,27 +98,18 @@ static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg)
 	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
 }
 
-int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
-			u64 gtt_size)
+int xe_ttm_sys_mgr_init(struct xe_device *xe)
 {
-	struct xe_device *xe = gt_to_xe(gt);
-	struct ttm_resource_manager *man = &mgr->manager;
-	int err;
-
-	XE_BUG_ON(xe_gt_is_media_type(gt));
+	struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+	struct sysinfo si;
+	u64 gtt_size;
 
-	mgr->gt = gt;
+	si_meminfo(&si);
+	gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
 	man->use_tt = true;
-	man->func = &xe_ttm_gtt_mgr_func;
-
+	man->func = &xe_ttm_sys_mgr_func;
 	ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
-
-	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man);
 	ttm_resource_manager_set_used(man, true);
-
-	err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr);
-	if (err)
-		return err;
-
-	return 0;
+	return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h
new file mode 100644
index 0000000000000..e8f5cd395b289
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TTM_SYS_MGR_H_
+#define _XE_TTM_SYS_MGR_H_
+
+struct xe_device;
+
+int xe_ttm_sys_mgr_init(struct xe_device *xe);
+
+#endif
-- 
GitLab


From 06d06064f725c207a4d14b7410f5498d68c1fb86 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:20:59 -0700
Subject: [PATCH 1451/2340] drm/xe/irq: Drop gen3_ prefixes

"Gen" terminology should be avoided in the Xe driver and "gen3" refers
to platforms that are 9 (!!) graphics generations earlier than the
oldest supported by the Xe driver, so this prefix really doesn't make
sense.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 42 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 9527e7fb9b6ef..afaebc0c589e9 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -18,7 +18,7 @@
 #include "xe_hw_engine.h"
 #include "xe_mmio.h"
 
-static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
+static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
 {
 	u32 val = xe_mmio_read32(gt, reg.reg);
 
@@ -34,24 +34,24 @@ static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
 	xe_mmio_read32(gt, reg.reg);
 }
 
-static void gen3_irq_init(struct xe_gt *gt,
-			  i915_reg_t imr, u32 imr_val,
-			  i915_reg_t ier, u32 ier_val,
-			  i915_reg_t iir)
+static void irq_init(struct xe_gt *gt,
+		     i915_reg_t imr, u32 imr_val,
+		     i915_reg_t ier, u32 ier_val,
+		     i915_reg_t iir)
 {
-	gen3_assert_iir_is_zero(gt, iir);
+	assert_iir_is_zero(gt, iir);
 
 	xe_mmio_write32(gt, ier.reg, ier_val);
 	xe_mmio_write32(gt, imr.reg, imr_val);
 	xe_mmio_read32(gt, imr.reg);
 }
-#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \
-	gen3_irq_init((gt), \
-		      type##IMR, imr_val, \
-		      type##IER, ier_val, \
-		      type##IIR)
+#define IRQ_INIT(gt, type, imr_val, ier_val) \
+	irq_init((gt), \
+		 type##IMR, imr_val, \
+		 type##IER, ier_val, \
+		 type##IIR)
 
-static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
+static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
 			   i915_reg_t ier)
 {
 	xe_mmio_write32(gt, imr.reg, 0xffffffff);
@@ -65,8 +65,8 @@ static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
 	xe_mmio_write32(gt, iir.reg, 0xffffffff);
 	xe_mmio_read32(gt, iir.reg);
 }
-#define GEN3_IRQ_RESET(gt, type) \
-	gen3_irq_reset((gt), type##IMR, type##IIR, type##IER)
+#define IRQ_RESET(gt, type) \
+	irq_reset((gt), type##IMR, type##IIR, type##IER)
 
 static u32 gen11_intr_disable(struct xe_gt *gt)
 {
@@ -172,8 +172,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 
 	gen11_gt_irq_postinstall(xe, gt);
 
-	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
-		      GEN11_GU_MISC_GSE);
+	IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
 
 	gen11_intr_enable(gt, true);
 }
@@ -332,8 +331,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	gen11_gt_irq_postinstall(xe, gt);
 
-	GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
-		      GEN11_GU_MISC_GSE);
+	IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
 
 	if (gt->info.id == XE_GT0)
 		dg1_intr_enable(xe, true);
@@ -434,8 +432,8 @@ static void gen11_irq_reset(struct xe_gt *gt)
 
 	gen11_gt_irq_reset(gt);
 
-	GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
-	GEN3_IRQ_RESET(gt, GEN8_PCU_);
+	IRQ_RESET(gt, GEN11_GU_MISC_);
+	IRQ_RESET(gt, GEN8_PCU_);
 }
 
 static void dg1_irq_reset(struct xe_gt *gt)
@@ -445,8 +443,8 @@ static void dg1_irq_reset(struct xe_gt *gt)
 
 	gen11_gt_irq_reset(gt);
 
-	GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
-	GEN3_IRQ_RESET(gt, GEN8_PCU_);
+	IRQ_RESET(gt, GEN11_GU_MISC_);
+	IRQ_RESET(gt, GEN8_PCU_);
 }
 
 static void xe_irq_reset(struct xe_device *xe)
-- 
GitLab


From 9293b67de6602bcf0415da0f3ae3dbf98396183c Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:00 -0700
Subject: [PATCH 1452/2340] drm/xe/irq: Add helpers to find ISR/IIR/IMR/IER
 registers

For cases where IRQ_INIT and IRQ_RESET are used, the relevant interrupt
registers are always consecutive and ordered ISR, IMR, IIR, IER.  Adding
helpers to look these up from a base offset will let us eliminate some
of the CPP pasting and simplify other upcoming patches.

v2:
 - s/_REGS/_OFFSET/ for consistency.  (Lucas)
 - Move IMR/IIR/IER helpers into xe_irq.c; they aren't needed anywhere
   else.  (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h | 11 ++---------
 drivers/gpu/drm/xe/xe_irq.c       | 32 +++++++++++++++++++------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 2e7fbdedb5eb0..61b6b356c90e1 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -72,15 +72,8 @@
 
 #define SOFTWARE_FLAGS_SPR33			_MMIO(0x4f084)
 
-#define GEN8_PCU_ISR				_MMIO(0x444e0)
-#define GEN8_PCU_IMR				_MMIO(0x444e4)
-#define GEN8_PCU_IIR				_MMIO(0x444e8)
-#define GEN8_PCU_IER				_MMIO(0x444ec)
-
-#define GEN11_GU_MISC_ISR			_MMIO(0x444f0)
-#define GEN11_GU_MISC_IMR			_MMIO(0x444f4)
-#define GEN11_GU_MISC_IIR			_MMIO(0x444f8)
-#define GEN11_GU_MISC_IER			_MMIO(0x444fc)
+#define PCU_IRQ_OFFSET				0x444e0
+#define GU_MISC_IRQ_OFFSET			0x444f0
 #define   GEN11_GU_MISC_GSE			(1 << 27)
 
 #define GEN11_GFX_MSTR_IRQ			_MMIO(0x190010)
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index afaebc0c589e9..64e0e74f66a22 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -18,6 +18,14 @@
 #include "xe_hw_engine.h"
 #include "xe_mmio.h"
 
+/*
+ * Interrupt registers for a unit are always consecutive and ordered
+ * ISR, IMR, IIR, IER.
+ */
+#define IMR(offset)				_MMIO(offset + 0x4)
+#define IIR(offset)				_MMIO(offset + 0x8)
+#define IER(offset)				_MMIO(offset + 0xc)
+
 static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
 {
 	u32 val = xe_mmio_read32(gt, reg.reg);
@@ -47,9 +55,9 @@ static void irq_init(struct xe_gt *gt,
 }
 #define IRQ_INIT(gt, type, imr_val, ier_val) \
 	irq_init((gt), \
-		 type##IMR, imr_val, \
-		 type##IER, ier_val, \
-		 type##IIR)
+		 IMR(type), imr_val, \
+		 IER(type), ier_val, \
+		 IIR(type))
 
 static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
 			   i915_reg_t ier)
@@ -66,7 +74,7 @@ static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
 	xe_mmio_read32(gt, iir.reg);
 }
 #define IRQ_RESET(gt, type) \
-	irq_reset((gt), type##IMR, type##IIR, type##IER)
+	irq_reset((gt), IMR(type), IIR(type), IER(type))
 
 static u32 gen11_intr_disable(struct xe_gt *gt)
 {
@@ -89,9 +97,9 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
 	if (!(master_ctl & GEN11_GU_MISC_IRQ))
 		return 0;
 
-	iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg);
+	iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET).reg);
 	if (likely(iir))
-		xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir);
+		xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET).reg, iir);
 
 	return iir;
 }
@@ -172,7 +180,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 
 	gen11_gt_irq_postinstall(xe, gt);
 
-	IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
+	IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
 
 	gen11_intr_enable(gt, true);
 }
@@ -331,7 +339,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	gen11_gt_irq_postinstall(xe, gt);
 
-	IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
+	IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
 
 	if (gt->info.id == XE_GT0)
 		dg1_intr_enable(xe, true);
@@ -432,8 +440,8 @@ static void gen11_irq_reset(struct xe_gt *gt)
 
 	gen11_gt_irq_reset(gt);
 
-	IRQ_RESET(gt, GEN11_GU_MISC_);
-	IRQ_RESET(gt, GEN8_PCU_);
+	IRQ_RESET(gt, GU_MISC_IRQ_OFFSET);
+	IRQ_RESET(gt, PCU_IRQ_OFFSET);
 }
 
 static void dg1_irq_reset(struct xe_gt *gt)
@@ -443,8 +451,8 @@ static void dg1_irq_reset(struct xe_gt *gt)
 
 	gen11_gt_irq_reset(gt);
 
-	IRQ_RESET(gt, GEN11_GU_MISC_);
-	IRQ_RESET(gt, GEN8_PCU_);
+	IRQ_RESET(gt, GU_MISC_IRQ_OFFSET);
+	IRQ_RESET(gt, PCU_IRQ_OFFSET);
 }
 
 static void xe_irq_reset(struct xe_device *xe)
-- 
GitLab


From ca14d553434ed1e1522afb8f37ed7b6fb2b9f043 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:01 -0700
Subject: [PATCH 1453/2340] drm/xe/irq: Drop IRQ_INIT and IRQ_RESET macros

It's no longer necessary to wrap these operations in macros; a simple
function will suffice.  Also switch to function names that more clearly
describe what operation is being performed:  unmask_and_enable() and
mask_and_disable().

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 63 +++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 64e0e74f66a22..202d961824113 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -42,39 +42,40 @@ static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
 	xe_mmio_read32(gt, reg.reg);
 }
 
-static void irq_init(struct xe_gt *gt,
-		     i915_reg_t imr, u32 imr_val,
-		     i915_reg_t ier, u32 ier_val,
-		     i915_reg_t iir)
+/*
+ * Unmask and enable the specified interrupts.  Does not check current state,
+ * so any bits not specified here will become masked and disabled.
+ */
+static void unmask_and_enable(struct xe_gt *gt, u32 irqregs, u32 bits)
 {
-	assert_iir_is_zero(gt, iir);
+	/*
+	 * If we're just enabling an interrupt now, it shouldn't already
+	 * be raised in the IIR.
+	 */
+	assert_iir_is_zero(gt, IIR(irqregs));
+
+	xe_mmio_write32(gt, IER(irqregs).reg, bits);
+	xe_mmio_write32(gt, IMR(irqregs).reg, ~bits);
 
-	xe_mmio_write32(gt, ier.reg, ier_val);
-	xe_mmio_write32(gt, imr.reg, imr_val);
-	xe_mmio_read32(gt, imr.reg);
+	/* Posting read */
+	xe_mmio_read32(gt, IMR(irqregs).reg);
 }
-#define IRQ_INIT(gt, type, imr_val, ier_val) \
-	irq_init((gt), \
-		 IMR(type), imr_val, \
-		 IER(type), ier_val, \
-		 IIR(type))
-
-static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
-			   i915_reg_t ier)
+
+/* Mask and disable all interrupts. */
+static void mask_and_disable(struct xe_gt *gt, u32 irqregs)
 {
-	xe_mmio_write32(gt, imr.reg, 0xffffffff);
-	xe_mmio_read32(gt, imr.reg);
+	xe_mmio_write32(gt, IMR(irqregs).reg, ~0);
+	/* Posting read */
+	xe_mmio_read32(gt, IMR(irqregs).reg);
 
-	xe_mmio_write32(gt, ier.reg, 0);
+	xe_mmio_write32(gt, IER(irqregs).reg, 0);
 
 	/* IIR can theoretically queue up two events. Be paranoid. */
-	xe_mmio_write32(gt, iir.reg, 0xffffffff);
-	xe_mmio_read32(gt, iir.reg);
-	xe_mmio_write32(gt, iir.reg, 0xffffffff);
-	xe_mmio_read32(gt, iir.reg);
+	xe_mmio_write32(gt, IIR(irqregs).reg, ~0);
+	xe_mmio_read32(gt, IIR(irqregs).reg);
+	xe_mmio_write32(gt, IIR(irqregs).reg, ~0);
+	xe_mmio_read32(gt, IIR(irqregs).reg);
 }
-#define IRQ_RESET(gt, type) \
-	irq_reset((gt), IMR(type), IIR(type), IER(type))
 
 static u32 gen11_intr_disable(struct xe_gt *gt)
 {
@@ -180,7 +181,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 
 	gen11_gt_irq_postinstall(xe, gt);
 
-	IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
+	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE);
 
 	gen11_intr_enable(gt, true);
 }
@@ -339,7 +340,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	gen11_gt_irq_postinstall(xe, gt);
 
-	IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE);
+	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE);
 
 	if (gt->info.id == XE_GT0)
 		dg1_intr_enable(xe, true);
@@ -440,8 +441,8 @@ static void gen11_irq_reset(struct xe_gt *gt)
 
 	gen11_gt_irq_reset(gt);
 
-	IRQ_RESET(gt, GU_MISC_IRQ_OFFSET);
-	IRQ_RESET(gt, PCU_IRQ_OFFSET);
+	mask_and_disable(gt, GU_MISC_IRQ_OFFSET);
+	mask_and_disable(gt, PCU_IRQ_OFFSET);
 }
 
 static void dg1_irq_reset(struct xe_gt *gt)
@@ -451,8 +452,8 @@ static void dg1_irq_reset(struct xe_gt *gt)
 
 	gen11_gt_irq_reset(gt);
 
-	IRQ_RESET(gt, GU_MISC_IRQ_OFFSET);
-	IRQ_RESET(gt, PCU_IRQ_OFFSET);
+	mask_and_disable(gt, GU_MISC_IRQ_OFFSET);
+	mask_and_disable(gt, PCU_IRQ_OFFSET);
 }
 
 static void xe_irq_reset(struct xe_device *xe)
-- 
GitLab


From 6b7ece97dd21d2b80a41f6192f89f8848c3b1d76 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:02 -0700
Subject: [PATCH 1454/2340] drm/xe/irq: Drop unnecessary GEN11_ and GEN12_
 register prefixes

Any interrupt registers that were introduced by platforms i915
considered to be "gen11" or "gen12" are present on all platforms that
the Xe driver supports; drop the unnecessary prefixes.

While working in the area, also convert a few open-coded bit
manipulations over to REG_BIT and REG_FIELD_GET notation.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-5-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo: removed display. That was later squashed to the xe Display patch]
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  42 +++++------
 drivers/gpu/drm/xe/regs/xe_regs.h    |  12 +--
 drivers/gpu/drm/xe/xe_guc.c          |   6 +-
 drivers/gpu/drm/xe/xe_irq.c          | 109 +++++++++++++--------------
 4 files changed, 84 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index e33885f429b5b..23d3b8f7e349d 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -348,34 +348,34 @@
 #define GFX_FLSH_CNTL_GEN6			_MMIO(0x101008)
 #define   GFX_FLSH_CNTL_EN			(1 << 0)
 
-#define GEN11_GT_INTR_DW(x)			_MMIO(0x190018 + ((x) * 4))
+#define GT_INTR_DW(x)				_MMIO(0x190018 + ((x) * 4))
 
-#define GEN11_GUC_SG_INTR_ENABLE		_MMIO(0x190038)
+#define GUC_SG_INTR_ENABLE			_MMIO(0x190038)
 #define   ENGINE1_MASK				REG_GENMASK(31, 16)
 #define   ENGINE0_MASK				REG_GENMASK(15, 0)
 
-#define GEN11_GPM_WGBOXPERF_INTR_ENABLE		_MMIO(0x19003c)
+#define GPM_WGBOXPERF_INTR_ENABLE		_MMIO(0x19003c)
 
-#define GEN11_INTR_IDENTITY_REG(x)		_MMIO(0x190060 + ((x) * 4))
-#define   GEN11_INTR_DATA_VALID			(1 << 31)
-#define   GEN11_INTR_ENGINE_INSTANCE(x)		(((x) & GENMASK(25, 20)) >> 20)
-#define   GEN11_INTR_ENGINE_CLASS(x)		(((x) & GENMASK(18, 16)) >> 16)
-#define   GEN11_INTR_ENGINE_INTR(x)		((x) & 0xffff)
+#define INTR_IDENTITY_REG(x)			_MMIO(0x190060 + ((x) * 4))
+#define   INTR_DATA_VALID			REG_BIT(31)
+#define   INTR_ENGINE_INSTANCE(x)		REG_FIELD_GET(GENMASK(25, 20), x)
+#define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
+#define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
 #define   OTHER_GUC_INSTANCE			0
 
-#define GEN11_RENDER_COPY_INTR_ENABLE		_MMIO(0x190030)
-#define GEN11_VCS_VECS_INTR_ENABLE		_MMIO(0x190034)
-#define GEN12_CCS_RSVD_INTR_ENABLE		_MMIO(0x190048)
-#define GEN11_IIR_REG_SELECTOR(x)		_MMIO(0x190070 + ((x) * 4))
-#define GEN11_RCS0_RSVD_INTR_MASK		_MMIO(0x190090)
-#define GEN11_BCS_RSVD_INTR_MASK		_MMIO(0x1900a0)
-#define GEN11_VCS0_VCS1_INTR_MASK		_MMIO(0x1900a8)
-#define GEN11_VCS2_VCS3_INTR_MASK		_MMIO(0x1900ac)
-#define GEN11_VECS0_VECS1_INTR_MASK		_MMIO(0x1900d0)
-#define GEN11_GUC_SG_INTR_MASK			_MMIO(0x1900e8)
-#define GEN11_GPM_WGBOXPERF_INTR_MASK		_MMIO(0x1900ec)
-#define GEN12_CCS0_CCS1_INTR_MASK		_MMIO(0x190100)
-#define GEN12_CCS2_CCS3_INTR_MASK		_MMIO(0x190104)
+#define RENDER_COPY_INTR_ENABLE			_MMIO(0x190030)
+#define VCS_VECS_INTR_ENABLE			_MMIO(0x190034)
+#define CCS_RSVD_INTR_ENABLE			_MMIO(0x190048)
+#define IIR_REG_SELECTOR(x)			_MMIO(0x190070 + ((x) * 4))
+#define RCS0_RSVD_INTR_MASK			_MMIO(0x190090)
+#define BCS_RSVD_INTR_MASK			_MMIO(0x1900a0)
+#define VCS0_VCS1_INTR_MASK			_MMIO(0x1900a8)
+#define VCS2_VCS3_INTR_MASK			_MMIO(0x1900ac)
+#define VECS0_VECS1_INTR_MASK			_MMIO(0x1900d0)
+#define GUC_SG_INTR_MASK			_MMIO(0x1900e8)
+#define GPM_WGBOXPERF_INTR_MASK			_MMIO(0x1900ec)
+#define CCS0_CCS1_INTR_MASK			_MMIO(0x190100)
+#define CCS2_CCS3_INTR_MASK			_MMIO(0x190104)
 #define XEHPC_BCS1_BCS2_INTR_MASK		_MMIO(0x190110)
 #define XEHPC_BCS3_BCS4_INTR_MASK		_MMIO(0x190114)
 #define XEHPC_BCS5_BCS6_INTR_MASK		_MMIO(0x190118)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 61b6b356c90e1..c2a278b25fc9f 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -74,13 +74,13 @@
 
 #define PCU_IRQ_OFFSET				0x444e0
 #define GU_MISC_IRQ_OFFSET			0x444f0
-#define   GEN11_GU_MISC_GSE			(1 << 27)
+#define   GU_MISC_GSE				REG_BIT(27)
 
-#define GEN11_GFX_MSTR_IRQ			_MMIO(0x190010)
-#define   GEN11_MASTER_IRQ			(1 << 31)
-#define   GEN11_GU_MISC_IRQ			(1 << 29)
-#define   GEN11_DISPLAY_IRQ			(1 << 16)
-#define   GEN11_GT_DW_IRQ(x)			(1 << (x))
+#define GFX_MSTR_IRQ				_MMIO(0x190010)
+#define   MASTER_IRQ				REG_BIT(31)
+#define   GU_MISC_IRQ				REG_BIT(29)
+#define   DISPLAY_IRQ				REG_BIT(16)
+#define   GT_DW_IRQ(x)				REG_BIT(x)
 
 #define DG1_MSTR_TILE_INTR			_MMIO(0x190008)
 #define   DG1_MSTR_IRQ				REG_BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 58b9841616e45..ee71b969bcbfc 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -561,12 +561,12 @@ static void guc_enable_irq(struct xe_guc *guc)
 		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
 		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
 
-	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg,
 			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
 	if (xe_gt_is_media_type(gt))
-		xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0);
+		xe_mmio_rmw32(gt, GUC_SG_INTR_MASK.reg, events, 0);
 	else
-		xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events);
+		xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~events);
 }
 
 int xe_guc_enable_communication(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 202d961824113..69963f36dbc77 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -79,7 +79,7 @@ static void mask_and_disable(struct xe_gt *gt, u32 irqregs)
 
 static u32 gen11_intr_disable(struct xe_gt *gt)
 {
-	xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0);
+	xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, 0);
 
 	/*
 	 * Now with master disabled, get a sample of level indications
@@ -87,7 +87,7 @@ static u32 gen11_intr_disable(struct xe_gt *gt)
 	 * New indications can and will light up during processing,
 	 * and will generate new interrupt after enabling master.
 	 */
-	return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+	return xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
 }
 
 static u32
@@ -95,7 +95,7 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
 {
 	u32 iir;
 
-	if (!(master_ctl & GEN11_GU_MISC_IRQ))
+	if (!(master_ctl & GU_MISC_IRQ))
 		return 0;
 
 	iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET).reg);
@@ -107,9 +107,9 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
 
 static inline void gen11_intr_enable(struct xe_gt *gt, bool stall)
 {
-	xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ);
+	xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, MASTER_IRQ);
 	if (stall)
-		xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+		xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
 }
 
 static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
@@ -132,14 +132,14 @@ static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 	smask = irqs << 16;
 
 	/* Enable RCS, BCS, VCS and VECS class interrupts. */
-	xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask);
-	xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask);
+	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg, dmask);
+	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg, dmask);
 	if (ccs_mask)
-		xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask);
+		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, smask);
 
 	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask);
-	xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask);
+	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg, ~smask);
+	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg, ~smask);
 	if (bcs_mask & (BIT(1)|BIT(2)))
 		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask);
 	if (bcs_mask & (BIT(3)|BIT(4)))
@@ -148,31 +148,31 @@ static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask);
 	if (bcs_mask & (BIT(7)|BIT(8)))
 		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask);
-	xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask);
-	xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask);
+	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~dmask);
+	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~dmask);
 	//if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
-	//	intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask);
+	//	intel_uncore_write(uncore, VCS4_VCS5_INTR_MASK, ~dmask);
 	//if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
-	//	intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask);
-	xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask);
+	//	intel_uncore_write(uncore, VCS6_VCS7_INTR_MASK, ~dmask);
+	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~dmask);
 	//if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
-	//	intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask);
+	//	intel_uncore_write(uncore, VECS2_VECS3_INTR_MASK, ~dmask);
 	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~dmask);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(gt,  GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK.reg, ~dmask);
 
 	/*
 	 * RPS interrupts will get enabled/disabled on demand when RPS itself
 	 * is enabled/disabled.
 	 */
 	/* TODO: gt->pm_ier, gt->pm_imr */
-	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
 
 	/* Same thing for GuC interrupts */
-	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg,  ~0);
 }
 
 static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
@@ -181,7 +181,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 
 	gen11_gt_irq_postinstall(xe, gt);
 
-	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE);
+	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
 	gen11_intr_enable(gt, true);
 }
@@ -197,7 +197,7 @@ gen11_gt_engine_identity(struct xe_device *xe,
 
 	lockdep_assert_held(&xe->irq.lock);
 
-	xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit));
+	xe_mmio_write32(gt, IIR_REG_SELECTOR(bank).reg, BIT(bit));
 
 	/*
 	 * NB: Specs do not specify how long to spin wait,
@@ -205,18 +205,17 @@ gen11_gt_engine_identity(struct xe_device *xe,
 	 */
 	timeout_ts = (local_clock() >> 10) + 100;
 	do {
-		ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg);
-	} while (!(ident & GEN11_INTR_DATA_VALID) &&
+		ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank).reg);
+	} while (!(ident & INTR_DATA_VALID) &&
 		 !time_after32(local_clock() >> 10, timeout_ts));
 
-	if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) {
+	if (unlikely(!(ident & INTR_DATA_VALID))) {
 		drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
 			bank, bit, ident);
 		return 0;
 	}
 
-	xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg,
-			GEN11_INTR_DATA_VALID);
+	xe_mmio_write32(gt, INTR_IDENTITY_REG(bank).reg, INTR_DATA_VALID);
 
 	return ident;
 }
@@ -250,24 +249,24 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 	spin_lock(&xe->irq.lock);
 
 	for (bank = 0; bank < 2; bank++) {
-		if (!(master_ctl & GEN11_GT_DW_IRQ(bank)))
+		if (!(master_ctl & GT_DW_IRQ(bank)))
 			continue;
 
 		if (!xe_gt_is_media_type(gt)) {
 			intr_dw[bank] =
-				xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg);
+				xe_mmio_read32(gt, GT_INTR_DW(bank).reg);
 			for_each_set_bit(bit, intr_dw + bank, 32)
 				identity[bit] = gen11_gt_engine_identity(xe, gt,
 									 bank,
 									 bit);
-			xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg,
+			xe_mmio_write32(gt, GT_INTR_DW(bank).reg,
 					intr_dw[bank]);
 		}
 
 		for_each_set_bit(bit, intr_dw + bank, 32) {
-			class = GEN11_INTR_ENGINE_CLASS(identity[bit]);
-			instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]);
-			intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]);
+			class = INTR_ENGINE_CLASS(identity[bit]);
+			instance = INTR_ENGINE_INSTANCE(identity[bit]);
+			intr_vec = INTR_ENGINE_INTR(identity[bit]);
 
 			if (class == XE_ENGINE_CLASS_OTHER) {
 				gen11_gt_other_irq_handler(gt, instance,
@@ -340,7 +339,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	gen11_gt_irq_postinstall(xe, gt);
 
-	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE);
+	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
 	if (gt->info.id == XE_GT0)
 		dg1_intr_enable(xe, true);
@@ -368,7 +367,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 			continue;
 
 		if (!xe_gt_is_media_type(gt))
-			master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+			master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
 
 		/*
 		 * We might be in irq handler just when PCIe DPC is initiated
@@ -382,7 +381,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		}
 
 		if (!xe_gt_is_media_type(gt))
-			xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl);
+			xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl);
 		gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
 	}
 
@@ -399,14 +398,14 @@ static void gen11_gt_irq_reset(struct xe_gt *gt)
 	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
 
 	/* Disable RCS, BCS, VCS and VECS class engines. */
-	xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg,	 0);
-	xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg,	 0);
 	if (ccs_mask)
-		xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0);
+		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, 0);
 
 	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg,	~0);
-	xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg,	~0);
 	if (bcs_mask & (BIT(1)|BIT(2)))
 		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0);
 	if (bcs_mask & (BIT(3)|BIT(4)))
@@ -415,24 +414,24 @@ static void gen11_gt_irq_reset(struct xe_gt *gt)
 		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0);
 	if (bcs_mask & (BIT(7)|BIT(8)))
 		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0);
-	xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg,	~0);
-	xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg,	~0);
 //	if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
-//		xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg,   ~0);
+//		xe_mmio_write32(xe, VCS4_VCS5_INTR_MASK.reg,   ~0);
 //	if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
-//		xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg,   ~0);
-	xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg,	~0);
+//		xe_mmio_write32(xe, VCS6_VCS7_INTR_MASK.reg,   ~0);
+	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg,	~0);
 //	if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
-//		xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0);
+//		xe_mmio_write32(xe, VECS2_VECS3_INTR_MASK.reg, ~0);
 	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~0);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(gt,  GEN12_CCS2_CCS3_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK.reg, ~0);
 
-	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
-	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,	 0);
-	xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg,		~0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg,		~0);
 }
 
 static void gen11_irq_reset(struct xe_gt *gt)
-- 
GitLab


From dd12b0ff2cf29904194bc8a5f0a8bc7a2b7041fa Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:03 -0700
Subject: [PATCH 1455/2340] drm/xe/irq: Rename and clarify top-level interrupt
 handling routines

Platforms supported by the Xe driver handle top-level interrupts in one
of two ways:
 - Xe_LP platforms only have a "graphics master" register and lack a
   "master tile" register, so top-level interrupt detection and
   enable/disable happens in the graphics master.
 - Xe_LP+ (aka DG1) and beyond have a "master tile" interrupt register
   that controls the enable/disable of top-level interrupts and must
   also be consulted to determine which tiles have received interrupts
   before the driver moves on the process the graphics master register.

For functions that are only relevant to the first set of platforms,
rename the function prefix to Xe_LP since "gen11" doesn't make sense in
the Xe driver.  Also add some comments briefly describing the two
top-level handlers.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-6-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 46 +++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 69963f36dbc77..4bdcccda71693 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -77,7 +77,7 @@ static void mask_and_disable(struct xe_gt *gt, u32 irqregs)
 	xe_mmio_read32(gt, IIR(irqregs).reg);
 }
 
-static u32 gen11_intr_disable(struct xe_gt *gt)
+static u32 xelp_intr_disable(struct xe_gt *gt)
 {
 	xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, 0);
 
@@ -105,7 +105,7 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
 	return iir;
 }
 
-static inline void gen11_intr_enable(struct xe_gt *gt, bool stall)
+static inline void xelp_intr_enable(struct xe_gt *gt, bool stall)
 {
 	xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, MASTER_IRQ);
 	if (stall)
@@ -175,7 +175,7 @@ static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 	xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg,  ~0);
 }
 
-static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	/* TODO: PCH */
 
@@ -183,7 +183,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 
 	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
-	gen11_intr_enable(gt, true);
+	xelp_intr_enable(gt, true);
 }
 
 static u32
@@ -285,7 +285,11 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 	spin_unlock(&xe->irq.lock);
 }
 
-static irqreturn_t gen11_irq_handler(int irq, void *arg)
+/*
+ * Top-level interrupt handler for Xe_LP platforms (which did not have
+ * a "master tile" interrupt register.
+ */
+static irqreturn_t xelp_irq_handler(int irq, void *arg)
 {
 	struct xe_device *xe = arg;
 	struct xe_gt *gt = xe_device_get_gt(xe, 0);	/* Only 1 GT here */
@@ -293,9 +297,9 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	long unsigned int intr_dw[2];
 	u32 identity[32];
 
-	master_ctl = gen11_intr_disable(gt);
+	master_ctl = xelp_intr_disable(gt);
 	if (!master_ctl) {
-		gen11_intr_enable(gt, false);
+		xelp_intr_enable(gt, false);
 		return IRQ_NONE;
 	}
 
@@ -303,7 +307,7 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 
 	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
 
-	gen11_intr_enable(gt, false);
+	xelp_intr_enable(gt, false);
 
 	return IRQ_HANDLED;
 }
@@ -345,6 +349,11 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 		dg1_intr_enable(xe, true);
 }
 
+/*
+ * Top-level interrupt handler for Xe_LP+ and beyond.  These platforms have
+ * a "master tile" interrupt register which must be consulted before the
+ * "graphics master" interrupt register.
+ */
 static irqreturn_t dg1_irq_handler(int irq, void *arg)
 {
 	struct xe_device *xe = arg;
@@ -434,9 +443,9 @@ static void gen11_gt_irq_reset(struct xe_gt *gt)
 	xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg,		~0);
 }
 
-static void gen11_irq_reset(struct xe_gt *gt)
+static void xelp_irq_reset(struct xe_gt *gt)
 {
-	gen11_intr_disable(gt);
+	xelp_intr_disable(gt);
 
 	gen11_gt_irq_reset(gt);
 
@@ -461,13 +470,10 @@ static void xe_irq_reset(struct xe_device *xe)
 	u8 id;
 
 	for_each_gt(gt, xe, id) {
-		if (GRAPHICS_VERx100(xe) >= 1210) {
+		if (GRAPHICS_VERx100(xe) >= 1210)
 			dg1_irq_reset(gt);
-		} else if (GRAPHICS_VER(xe) >= 11) {
-			gen11_irq_reset(gt);
-		} else {
-			drm_err(&xe->drm, "No interrupt reset hook");
-		}
+		else
+			xelp_irq_reset(gt);
 	}
 }
 
@@ -477,10 +483,8 @@ void xe_gt_irq_postinstall(struct xe_gt *gt)
 
 	if (GRAPHICS_VERx100(xe) >= 1210)
 		dg1_irq_postinstall(xe, gt);
-	else if (GRAPHICS_VER(xe) >= 11)
-		gen11_irq_postinstall(xe, gt);
 	else
-		drm_err(&xe->drm, "No interrupt postinstall hook");
+		xelp_irq_postinstall(xe, gt);
 }
 
 static void xe_irq_postinstall(struct xe_device *xe)
@@ -496,10 +500,8 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe)
 {
 	if (GRAPHICS_VERx100(xe) >= 1210) {
 		return dg1_irq_handler;
-	} else if (GRAPHICS_VER(xe) >= 11) {
-		return gen11_irq_handler;
 	} else {
-		return NULL;
+		return xelp_irq_handler;
 	}
 }
 
-- 
GitLab


From c94cd8f2d2784dff57581389f59d3051bc312fc2 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:04 -0700
Subject: [PATCH 1456/2340] drm/xe/irq: Drop remaining "gen11_" prefix from IRQ
 functions

The remaining "gen11_*" IRQ functions are common to all platforms
supported by the Xe driver.  Drop the unnecessary prefix.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-7-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 46 ++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 4bdcccda71693..f5b038cb18605 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -91,7 +91,7 @@ static u32 xelp_intr_disable(struct xe_gt *gt)
 }
 
 static u32
-gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
+gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
 {
 	u32 iir;
 
@@ -112,7 +112,7 @@ static inline void xelp_intr_enable(struct xe_gt *gt, bool stall)
 		xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
 }
 
-static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	u32 irqs, dmask, smask;
 	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
@@ -179,7 +179,7 @@ static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
 	/* TODO: PCH */
 
-	gen11_gt_irq_postinstall(xe, gt);
+	gt_irq_postinstall(xe, gt);
 
 	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
@@ -187,10 +187,10 @@ static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 }
 
 static u32
-gen11_gt_engine_identity(struct xe_device *xe,
-			 struct xe_gt *gt,
-			 const unsigned int bank,
-			 const unsigned int bit)
+gt_engine_identity(struct xe_device *xe,
+		   struct xe_gt *gt,
+		   const unsigned int bank,
+		   const unsigned int bit)
 {
 	u32 timeout_ts;
 	u32 ident;
@@ -223,7 +223,7 @@ gen11_gt_engine_identity(struct xe_device *xe,
 #define   OTHER_MEDIA_GUC_INSTANCE           16
 
 static void
-gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
+gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
 {
 	if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
 		return xe_guc_irq_handler(&gt->uc.guc, iir);
@@ -237,9 +237,9 @@ gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
 	}
 }
 
-static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
-				 u32 master_ctl, long unsigned int *intr_dw,
-				 u32 *identity)
+static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
+			   u32 master_ctl, long unsigned int *intr_dw,
+			   u32 *identity)
 {
 	unsigned int bank, bit;
 	u16 instance, intr_vec;
@@ -256,9 +256,8 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 			intr_dw[bank] =
 				xe_mmio_read32(gt, GT_INTR_DW(bank).reg);
 			for_each_set_bit(bit, intr_dw + bank, 32)
-				identity[bit] = gen11_gt_engine_identity(xe, gt,
-									 bank,
-									 bit);
+				identity[bit] = gt_engine_identity(xe, gt,
+								   bank, bit);
 			xe_mmio_write32(gt, GT_INTR_DW(bank).reg,
 					intr_dw[bank]);
 		}
@@ -269,8 +268,7 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 			intr_vec = INTR_ENGINE_INTR(identity[bit]);
 
 			if (class == XE_ENGINE_CLASS_OTHER) {
-				gen11_gt_other_irq_handler(gt, instance,
-							   intr_vec);
+				gt_other_irq_handler(gt, instance, intr_vec);
 				continue;
 			}
 
@@ -303,9 +301,9 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 	}
 
-	gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+	gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
 
-	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+	gu_misc_iir = gu_misc_irq_ack(gt, master_ctl);
 
 	xelp_intr_enable(gt, false);
 
@@ -341,7 +339,7 @@ static void dg1_intr_enable(struct xe_device *xe, bool stall)
 
 static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 {
-	gen11_gt_irq_postinstall(xe, gt);
+	gt_irq_postinstall(xe, gt);
 
 	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
@@ -391,17 +389,17 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 
 		if (!xe_gt_is_media_type(gt))
 			xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl);
-		gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+		gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
 	}
 
-	gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+	gu_misc_iir = gu_misc_irq_ack(gt, master_ctl);
 
 	dg1_intr_enable(xe, false);
 
 	return IRQ_HANDLED;
 }
 
-static void gen11_gt_irq_reset(struct xe_gt *gt)
+static void gt_irq_reset(struct xe_gt *gt)
 {
 	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
 	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
@@ -447,7 +445,7 @@ static void xelp_irq_reset(struct xe_gt *gt)
 {
 	xelp_intr_disable(gt);
 
-	gen11_gt_irq_reset(gt);
+	gt_irq_reset(gt);
 
 	mask_and_disable(gt, GU_MISC_IRQ_OFFSET);
 	mask_and_disable(gt, PCU_IRQ_OFFSET);
@@ -458,7 +456,7 @@ static void dg1_irq_reset(struct xe_gt *gt)
 	if (gt->info.id == 0)
 		dg1_intr_disable(gt_to_xe(gt));
 
-	gen11_gt_irq_reset(gt);
+	gt_irq_reset(gt);
 
 	mask_and_disable(gt, GU_MISC_IRQ_OFFSET);
 	mask_and_disable(gt, PCU_IRQ_OFFSET);
-- 
GitLab


From bf26d6984c28f319eeca22bc8b76399e93613dea Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:05 -0700
Subject: [PATCH 1457/2340] drm/xe/irq: Drop commented-out code for
 non-existent media engines

Although the hardware team has set aside some register bits for extra
media engines, no platform supported by the Xe driver today has VCS4-7
or VECS2-3.  Drop the corresponding code (which was already commented
out); we can bring it back easily enough if such engines show up on a
future platform.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-8-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index f5b038cb18605..62ecd71be063b 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -150,13 +150,7 @@ static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask);
 	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~dmask);
 	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~dmask);
-	//if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
-	//	intel_uncore_write(uncore, VCS4_VCS5_INTR_MASK, ~dmask);
-	//if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
-	//	intel_uncore_write(uncore, VCS6_VCS7_INTR_MASK, ~dmask);
 	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~dmask);
-	//if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
-	//	intel_uncore_write(uncore, VECS2_VECS3_INTR_MASK, ~dmask);
 	if (ccs_mask & (BIT(0)|BIT(1)))
 		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~dmask);
 	if (ccs_mask & (BIT(2)|BIT(3)))
@@ -423,13 +417,7 @@ static void gt_irq_reset(struct xe_gt *gt)
 		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0);
 	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg,	~0);
 	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg,	~0);
-//	if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
-//		xe_mmio_write32(xe, VCS4_VCS5_INTR_MASK.reg,   ~0);
-//	if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
-//		xe_mmio_write32(xe, VCS6_VCS7_INTR_MASK.reg,   ~0);
 	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg,	~0);
-//	if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
-//		xe_mmio_write32(xe, VECS2_VECS3_INTR_MASK.reg, ~0);
 	if (ccs_mask & (BIT(0)|BIT(1)))
 		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~0);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-- 
GitLab


From b73d520b3d0ff559da7e15a49ef12a591c61105a Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 31 Mar 2023 17:21:06 -0700
Subject: [PATCH 1458/2340] drm/xe/irq: Don't clobber display interrupts on
 multi-tile platforms

Although our only multi-tile platform today (PVC) doesn't support
display, it's possible that some future multi-tile platform will.
If/when this happens, display interrupts (both traditional display and
ASLE backlight interrupts raised as a Gunit interrupt) should be
delivered to the primary tile.  Save away tile0's master_ctl value so
that it can still be used for display interrupt handling after the GT
loop.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230401002106.588656-9-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 62ecd71be063b..e812a5b66a6b6 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -350,7 +350,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 {
 	struct xe_device *xe = arg;
 	struct xe_gt *gt;
-	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir;
+	u32 master_tile_ctl, master_ctl = 0, tile0_master_ctl = 0, gu_misc_iir;
 	long unsigned int intr_dw[2];
 	u32 identity[32];
 	u8 id;
@@ -384,9 +384,17 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		if (!xe_gt_is_media_type(gt))
 			xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl);
 		gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+
+		/*
+		 * Save primary tile's master interrupt register for display
+		 * processing below.
+		 */
+		if (id == 0)
+			tile0_master_ctl = master_ctl;
 	}
 
-	gu_misc_iir = gu_misc_irq_ack(gt, master_ctl);
+	/* Gunit GSE interrupts can trigger display backlight operations */
+	gu_misc_iir = gu_misc_irq_ack(gt, tile0_master_ctl);
 
 	dg1_intr_enable(xe, false);
 
-- 
GitLab


From d19ad0e80ebe3da48dc8122d6beca9d3d35df454 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:45 -0700
Subject: [PATCH 1459/2340] drm/xe: Extract function to initialize xe->info

Extract the part setting up from xe->info from xe_pci_probe() into its
own function. This pairs nicely with the display counterpart, avoids
info initialization to be placed elsewhere and helps future
improvements to build fake devices for tests.

While at it, normalize the names a little bit: the _get() suffix may be
mistaken by lock-related operation, so rename the function to
"find_subplatform()". Also rename the variable to subplatform_desc to
make it easier to understand, even if longer.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230401085151.1786204-2-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 98 ++++++++++++++++++++-----------------
 1 file changed, 54 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index e7bfcc5f51c26..ebd27b917c594 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -343,7 +343,7 @@ static bool id_blocked(u16 device_id)
 }
 
 static const struct xe_subplatform_desc *
-subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc)
+find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc)
 {
 	const struct xe_subplatform_desc *sp;
 	const u16 *id;
@@ -356,49 +356,12 @@ subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc)
 	return NULL;
 }
 
-static void xe_pci_remove(struct pci_dev *pdev)
+static void xe_info_init(struct xe_device *xe,
+			 const struct xe_device_desc *desc,
+			 const struct xe_subplatform_desc *subplatform_desc)
 {
-	struct xe_device *xe;
-
-	xe = pci_get_drvdata(pdev);
-	if (!xe) /* driver load aborted, nothing to cleanup */
-		return;
-
-	xe_device_remove(xe);
-	xe_pm_runtime_fini(xe);
-	pci_set_drvdata(pdev, NULL);
-}
-
-static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	const struct xe_device_desc *desc = (void *)ent->driver_data;
-	const struct xe_subplatform_desc *spd;
-	struct xe_device *xe;
 	struct xe_gt *gt;
 	u8 id;
-	int err;
-
-	if (desc->require_force_probe && !id_forced(pdev->device)) {
-		dev_info(&pdev->dev,
-			 "Your graphics device %04x is not officially supported\n"
-			 "by xe driver in this kernel version. To force Xe probe,\n"
-			 "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
-			 "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
-			 "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
-			 pdev->device, pdev->device, pdev->device,
-			 pdev->device, pdev->device);
-		return -ENODEV;
-	}
-
-	if (id_blocked(pdev->device)) {
-		dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
-			 pdev->vendor, pdev->device);
-		return -ENODEV;
-	}
-
-	xe = xe_device_create(pdev, ent);
-	if (IS_ERR(xe))
-		return PTR_ERR(xe);
 
 	xe->info.graphics_verx100 = desc->graphics_ver * 100 +
 				    desc->graphics_rel;
@@ -417,8 +380,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
 	xe->info.has_link_copy_engine = desc->has_link_copy_engine;
 
-	spd = subplatform_get(xe, desc);
-	xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
+	xe->info.subplatform = subplatform_desc ?
+		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
 	xe->info.step = xe_step_get(xe);
 
 	for (id = 0; id < xe->info.tile_count; ++id) {
@@ -443,9 +406,56 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 				desc->extra_gts[id - 1].mmio_adj_offset;
 		}
 	}
+}
+
+static void xe_pci_remove(struct pci_dev *pdev)
+{
+	struct xe_device *xe;
+
+	xe = pci_get_drvdata(pdev);
+	if (!xe) /* driver load aborted, nothing to cleanup */
+		return;
+
+	xe_device_remove(xe);
+	xe_pm_runtime_fini(xe);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct xe_device_desc *desc = (void *)ent->driver_data;
+	const struct xe_subplatform_desc *subplatform_desc;
+	struct xe_device *xe;
+	int err;
+
+	if (desc->require_force_probe && !id_forced(pdev->device)) {
+		dev_info(&pdev->dev,
+			 "Your graphics device %04x is not officially supported\n"
+			 "by xe driver in this kernel version. To force Xe probe,\n"
+			 "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
+			 "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
+			 "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
+			 pdev->device, pdev->device, pdev->device,
+			 pdev->device, pdev->device);
+		return -ENODEV;
+	}
+
+	if (id_blocked(pdev->device)) {
+		dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
+			 pdev->vendor, pdev->device);
+		return -ENODEV;
+	}
+
+	xe = xe_device_create(pdev, ent);
+	if (IS_ERR(xe))
+		return PTR_ERR(xe);
+
+	subplatform_desc = find_subplatform(xe, desc);
 
+	xe_info_init(xe, desc, subplatform_desc);
 	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d",
-		desc->platform_name, spd ? spd->name : "",
+		desc->platform_name,
+		subplatform_desc ? subplatform_desc->name : "",
 		xe->info.devid, xe->info.revid,
 		xe->info.is_dgfx, xe->info.graphics_verx100,
 		xe->info.media_verx100,
-- 
GitLab


From af049be5a33e12fb993028eb378fd61545e72f5e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:46 -0700
Subject: [PATCH 1460/2340] drm/xe: Move test infra out of xe_pci.[ch]

Move code out of xe_pci.[ch] into tests/*.[ch], like is done in other
similar compilation units. Even if this is not part of "tests for
xe_pci.c", they are functions exported and required by other tests. It's
better not to clutter the module headers and sources with them.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230401085151.1786204-3-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c       |  2 +
 drivers/gpu/drm/xe/tests/xe_dma_buf.c  |  1 +
 drivers/gpu/drm/xe/tests/xe_migrate.c  |  1 +
 drivers/gpu/drm/xe/tests/xe_pci.c      | 62 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_pci_test.h | 15 +++++++
 drivers/gpu/drm/xe/xe_pci.c            | 52 +--------------------
 drivers/gpu/drm/xe/xe_pci.h            |  9 ----
 7 files changed, 82 insertions(+), 60 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_pci.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_pci_test.h

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 3c60cbdf516c1..aa433a7b59b7e 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,8 @@
 #include <kunit/test.h>
 
 #include "tests/xe_bo_test.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
 
 #include "xe_bo_evict.h"
 #include "xe_pci.h"
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index e66a8361ae1f0..cf9dddf1a8d72 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -6,6 +6,7 @@
 #include <kunit/test.h>
 
 #include "tests/xe_dma_buf_test.h"
+#include "tests/xe_pci_test.h"
 
 #include "xe_pci.h"
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 862d11b2210f9..eeb6c3be2e37e 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -6,6 +6,7 @@
 #include <kunit/test.h>
 
 #include "tests/xe_migrate_test.h"
+#include "tests/xe_pci_test.h"
 
 #include "xe_pci.h"
 
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
new file mode 100644
index 0000000000000..643bddb352149
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "tests/xe_pci_test.h"
+
+#include "tests/xe_test.h"
+
+#include <kunit/test.h>
+
+struct kunit_test_data {
+	int ndevs;
+	xe_device_fn xe_fn;
+};
+
+static int dev_to_xe_device_fn(struct device *dev, void *__data)
+
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct kunit_test_data *data = __data;
+	int ret = 0;
+	int idx;
+
+	data->ndevs++;
+
+	if (drm_dev_enter(drm, &idx))
+		ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev)));
+	drm_dev_exit(idx);
+
+	return ret;
+}
+
+/**
+ * xe_call_for_each_device - Iterate over all devices this driver binds to
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterated over all devices this driver binds to, and calls
+ * @xe_fn: for each one of them. If the called function returns anything else
+ * than 0, iteration is stopped and the return value is returned by this
+ * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
+ * called for the corresponding drm device.
+ *
+ * Return: Zero or the error code of a call to @xe_fn returning an error
+ * code.
+ */
+int xe_call_for_each_device(xe_device_fn xe_fn)
+{
+	int ret;
+	struct kunit_test_data data = {
+	    .xe_fn = xe_fn,
+	    .ndevs = 0,
+	};
+
+	ret = driver_for_each_device(&xe_pci_driver.driver, NULL,
+				     &data, dev_to_xe_device_fn);
+
+	if (!data.ndevs)
+		kunit_skip(current->kunit_test, "test runs only on hardware\n");
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
new file mode 100644
index 0000000000000..de65d8c9ccb53
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PCI_TEST_H_
+#define _XE_PCI_TEST_H_
+
+struct xe_device;
+
+typedef int (*xe_device_fn)(struct xe_device *);
+
+int xe_call_for_each_device(xe_device_fn xe_fn);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index ebd27b917c594..c567436afcdc4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -627,55 +627,5 @@ void xe_unregister_pci_driver(void)
 }
 
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
-struct kunit_test_data {
-	int ndevs;
-	xe_device_fn xe_fn;
-};
-
-static int dev_to_xe_device_fn(struct device *dev, void *__data)
-
-{
-	struct drm_device *drm = dev_get_drvdata(dev);
-	struct kunit_test_data *data = __data;
-	int ret = 0;
-	int idx;
-
-	data->ndevs++;
-
-	if (drm_dev_enter(drm, &idx))
-		ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev)));
-	drm_dev_exit(idx);
-
-	return ret;
-}
-
-/**
- * xe_call_for_each_device - Iterate over all devices this driver binds to
- * @xe_fn: Function to call for each device.
- *
- * This function iterated over all devices this driver binds to, and calls
- * @xe_fn: for each one of them. If the called function returns anything else
- * than 0, iteration is stopped and the return value is returned by this
- * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
- * called for the corresponding drm device.
- *
- * Return: Zero or the error code of a call to @xe_fn returning an error
- * code.
- */
-int xe_call_for_each_device(xe_device_fn xe_fn)
-{
-	int ret;
-	struct kunit_test_data data = {
-	    .xe_fn = xe_fn,
-	    .ndevs = 0,
-	};
-
-	ret = driver_for_each_device(&xe_pci_driver.driver, NULL,
-				     &data, dev_to_xe_device_fn);
-
-	if (!data.ndevs)
-		kunit_skip(current->kunit_test, "test runs only on hardware\n");
-
-	return ret;
-}
+#include "tests/xe_pci.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
index 9e3089549d5fb..611c1209b14cc 100644
--- a/drivers/gpu/drm/xe/xe_pci.h
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -6,16 +6,7 @@
 #ifndef _XE_PCI_H_
 #define _XE_PCI_H_
 
-#include "tests/xe_test.h"
-
 int xe_register_pci_driver(void);
 void xe_unregister_pci_driver(void);
 
-#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
-struct xe_device;
-
-typedef int (*xe_device_fn)(struct xe_device *);
-
-int xe_call_for_each_device(xe_device_fn xe_fn);
-#endif
 #endif
-- 
GitLab


From 60d5c6abc289cc5d561758e71fb2c392c1ec2161 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:47 -0700
Subject: [PATCH 1461/2340] drm/xe: Use symbol namespace for kunit tests

Instead of simply using EXPORT_SYMBOL() to export the functions needed
in xe.ko to be be called across modules, use EXPORT_SYMBOL_IF_KUNIT()
which will export the symbol under the EXPORTED_FOR_KUNIT_TESTING
namespace.

This avoids accidentally "leaking" these functions and letting them be
called from outside the kunit tests. If these functiosn are accidentally
called from another module, they receive a modpost error like below:

	ERROR: modpost: module XXXXXXX uses symbol
	xe_ccs_migrate_kunit from namespace EXPORTED_FOR_KUNIT_TESTING,
	but does not import it.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Link: https://lore.kernel.org/r/20230401085151.1786204-4-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c           | 5 +++--
 drivers/gpu/drm/xe/tests/xe_bo_test.c      | 1 +
 drivers/gpu/drm/xe/tests/xe_dma_buf.c      | 3 ++-
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 1 +
 drivers/gpu/drm/xe/tests/xe_migrate.c      | 3 ++-
 drivers/gpu/drm/xe/tests/xe_migrate_test.c | 1 +
 6 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index aa433a7b59b7e..9bd381e5b7a6a 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -4,6 +4,7 @@
  */
 
 #include <kunit/test.h>
+#include <kunit/visibility.h>
 
 #include "tests/xe_bo_test.h"
 #include "tests/xe_pci_test.h"
@@ -166,7 +167,7 @@ void xe_ccs_migrate_kunit(struct kunit *test)
 {
 	xe_call_for_each_device(ccs_test_run_device);
 }
-EXPORT_SYMBOL(xe_ccs_migrate_kunit);
+EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
 
 static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test)
 {
@@ -304,4 +305,4 @@ void xe_bo_evict_kunit(struct kunit *test)
 {
 	xe_call_for_each_device(evict_test_run_device);
 }
-EXPORT_SYMBOL(xe_bo_evict_kunit);
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
index 92dda4fca21b9..1c868e3635bc3 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -22,3 +22,4 @@ kunit_test_suite(xe_bo_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index cf9dddf1a8d72..513a3b3362e9f 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -4,6 +4,7 @@
  */
 
 #include <kunit/test.h>
+#include <kunit/visibility.h>
 
 #include "tests/xe_dma_buf_test.h"
 #include "tests/xe_pci_test.h"
@@ -259,4 +260,4 @@ void xe_dma_buf_kunit(struct kunit *test)
 {
 	xe_call_for_each_device(dma_buf_run_device);
 }
-EXPORT_SYMBOL(xe_dma_buf_kunit);
+EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
index a1adfd1e1605e..35312bfd5fb72 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -21,3 +21,4 @@ kunit_test_suite(xe_dma_buf_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index eeb6c3be2e37e..cdcecf8d5eef8 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -4,6 +4,7 @@
  */
 
 #include <kunit/test.h>
+#include <kunit/visibility.h>
 
 #include "tests/xe_migrate_test.h"
 #include "tests/xe_pci_test.h"
@@ -409,4 +410,4 @@ void xe_migrate_sanity_kunit(struct kunit *test)
 {
 	xe_call_for_each_device(migrate_test_run_device);
 }
-EXPORT_SYMBOL(xe_migrate_sanity_kunit);
+EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index d6be360c3b6d1..39179eae890b0 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -21,3 +21,4 @@ kunit_test_suite(xe_migrate_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
-- 
GitLab


From e460410023d95b0845aa99f2d9c0625b143ca593 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:48 -0700
Subject: [PATCH 1462/2340] drm/xe: Generalize fake device creation

Instead of requiring tests to initialize a fake device an keep it in
sync with xe_pci.c when it's platform-dependent, export a function from
xe_pci.c to be used and piggy back on the device info creation. For
simpler tests that don't need any specific platform and just need a fake
xe device to pass around, xe_pci_fake_device_init_any() can be used.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230401085151.1786204-5-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c      | 47 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_pci_test.h | 16 +++++++++
 drivers/gpu/drm/xe/xe_pci.c            |  2 +-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 643bddb352149..cc65ac5657b33 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -8,6 +8,7 @@
 #include "tests/xe_test.h"
 
 #include <kunit/test.h>
+#include <kunit/visibility.h>
 
 struct kunit_test_data {
 	int ndevs;
@@ -60,3 +61,49 @@ int xe_call_for_each_device(xe_device_fn xe_fn)
 
 	return ret;
 }
+
+int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
+			    enum xe_subplatform subplatform)
+{
+	const struct pci_device_id *ent = pciidlist;
+	const struct xe_device_desc *desc;
+	const struct xe_subplatform_desc *subplatform_desc;
+
+	if (platform == XE_TEST_PLATFORM_ANY) {
+		desc = (const void *)ent->driver_data;
+		subplatform_desc = NULL;
+		goto done;
+	}
+
+	for (ent = pciidlist; ent->device; ent++) {
+		desc = (const void *)ent->driver_data;
+		if (desc->platform == platform)
+			break;
+	}
+
+	if (!ent->device)
+		return -ENODEV;
+
+	if (subplatform == XE_TEST_SUBPLATFORM_ANY) {
+		subplatform_desc = desc->subplatforms;
+		goto done;
+	}
+
+	for (subplatform_desc = desc->subplatforms;
+	     subplatform_desc && subplatform_desc->subplatform;
+	     subplatform_desc++)
+		if (subplatform_desc->subplatform == subplatform)
+			break;
+
+	if (subplatform == XE_SUBPLATFORM_NONE && subplatform_desc)
+		return -ENODEV;
+
+	if (subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc)
+		return -ENODEV;
+
+done:
+	xe_info_init(xe, desc, subplatform_desc);
+
+	return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index de65d8c9ccb53..43294e8c62bb2 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -6,10 +6,26 @@
 #ifndef _XE_PCI_TEST_H_
 #define _XE_PCI_TEST_H_
 
+#include "xe_platform_types.h"
+
 struct xe_device;
 
+/*
+ * Some defines just for clarity: these mean the test doesn't care about what
+ * platform it will get since it doesn't depend on any platform-specific bits
+ */
+#define XE_TEST_PLATFORM_ANY	XE_PLATFORM_UNINITIALIZED
+#define XE_TEST_SUBPLATFORM_ANY	XE_SUBPLATFORM_UNINITIALIZED
+
 typedef int (*xe_device_fn)(struct xe_device *);
 
 int xe_call_for_each_device(xe_device_fn xe_fn);
 
+int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
+			    enum xe_subplatform subplatform);
+
+#define xe_pci_fake_device_init_any(xe__)					\
+	xe_pci_fake_device_init(xe__, XE_TEST_PLATFORM_ANY,			\
+				XE_TEST_SUBPLATFORM_ANY)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c567436afcdc4..f6050a17c950f 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -423,7 +423,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
 
 static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	const struct xe_device_desc *desc = (void *)ent->driver_data;
+	const struct xe_device_desc *desc = (const void *)ent->driver_data;
 	const struct xe_subplatform_desc *subplatform_desc;
 	struct xe_device *xe;
 	int err;
-- 
GitLab


From 7bf350ecb240c9db63031e3a1b6c99acd73c90ed Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:49 -0700
Subject: [PATCH 1463/2340] drm/xe/reg_sr: Save errors for kunit integration

When there's an entry that is dropped when xe_reg_sr_add(), there's
not much we can do other than reporting the error - it's for certain a
driver issue or conflicting workarounds/tunings. Save the number of
errors to be used later by kunit to report where it happens.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230401085151.1786204-6-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c       | 8 ++++++++
 drivers/gpu/drm/xe/xe_reg_sr_types.h | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 37ae8412cb006..f97673be2e62e 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -82,6 +82,13 @@ static bool compatible_entries(const struct xe_reg_sr_entry *e1,
 	return true;
 }
 
+static void reg_sr_inc_error(struct xe_reg_sr *sr)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	sr->errors++;
+#endif
+}
+
 int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
 		  const struct xe_reg_sr_entry *e)
 {
@@ -119,6 +126,7 @@ fail:
 	DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n",
 		  idx, e->clr_bits, e->set_bits,
 		  str_yes_no(e->masked_reg), ret);
+	reg_sr_inc_error(sr);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
index 3d22578910051..91469784fd907 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr_types.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -32,6 +32,10 @@ struct xe_reg_sr {
 	} pool;
 	struct xarray xa;
 	const char *name;
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	unsigned int errors;
+#endif
 };
 
 #endif
-- 
GitLab


From 4cc0440229c61dca680f5acaf2e529e67f9bde72 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:50 -0700
Subject: [PATCH 1464/2340] drm/xe: Add basic unit tests for rtp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add some basic unit tests for rtp. This is intended to prove the
functionality of the rtp itself, like coalescing entries, rejecting
non-disjoint values, etc.

Contrary to the other tests in xe, this is a unit test to test the
sw-side only, so it can be executed on any machine - it doesn't interact
with the real hardware. Running it produces the following output:

	$ ./tools/testing/kunit/kunit.py run --raw_output-kunit  \
		--kunitconfig drivers/gpu/drm/xe/.kunitconfig xe_rtp
	...
	[01:26:27] Starting KUnit Kernel (1/1)...
	KTAP version 1
	1..1
	    KTAP version 1
	    # Subtest: xe_rtp
	    1..1
		KTAP version 1
		# Subtest: xe_rtp_process_tests
		ok 1 coalesce-same-reg
		ok 2 no-match-no-add
		ok 3 no-match-no-add-multiple-rules
		ok 4 two-regs-two-entries
		ok 5 clr-one-set-other
		ok 6 set-field
	[drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000001, set: 00000001, masked: no): ret=-22
		ok 7 conflict-duplicate
	[drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000003, set: 00000000, masked: no): ret=-22
		ok 8 conflict-not-disjoint
	[drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000002, set: 00000002, masked: no): ret=-22
	[drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000001, set: 00000001, masked: yes): ret=-22
		ok 9 conflict-reg-type
	    # xe_rtp_process_tests: pass:9 fail:0 skip:0 total:9
	    ok 1 xe_rtp_process_tests
	# Totals: pass:9 fail:0 skip:0 total:9
	ok 1 xe_rtp
	...

Note that the ERRORs in the kernel log are expected since it's testing
incompatible entries.

v2:
  - Use parameterized table for tests  (Michał Winiarski)
  - Move everything to the xe_rtp_test.ko and only add a few exports to the
    right namespace
  - Add more tests to cover FIELD_SET, CLR, partially true rules, etc

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst<maarten.lankhorst@linux.intel.com> # v1
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20230401085151.1786204-7-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.debug       |   1 +
 drivers/gpu/drm/xe/tests/Makefile      |   7 +-
 drivers/gpu/drm/xe/tests/xe_rtp_test.c | 318 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_reg_sr.c         |   2 +
 drivers/gpu/drm/xe/xe_rtp.c            |   3 +
 5 files changed, 329 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_rtp_test.c

diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 93b284cdd0a2e..11bb13c73e7b6 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -66,6 +66,7 @@ config DRM_XE_KUNIT_TEST
 	depends on DRM_XE && KUNIT && DEBUG_FS
 	default KUNIT_ALL_TESTS
 	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_KUNIT_TEST_HELPERS
 	help
 	  Choose this option to allow the driver to perform selftests under
 	  the kunit framework
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 47056b6459e3c..c5c2f108d017e 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \
-	xe_migrate_test.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
+	xe_bo_test.o \
+	xe_dma_buf_test.o \
+	xe_migrate_test.o \
+	xe_rtp_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
new file mode 100644
index 0000000000000..29e112c108c62
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_reg_defs.h"
+#include "xe_device_types.h"
+#include "xe_pci_test.h"
+#include "xe_reg_sr.h"
+#include "xe_rtp.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x)	_XE_RTP_REG(x)
+#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+
+#define REGULAR_REG1	_MMIO(1)
+#define REGULAR_REG2	_MMIO(2)
+#define REGULAR_REG3	_MMIO(3)
+#define MCR_REG1	MCR_REG(1)
+#define MCR_REG2	MCR_REG(2)
+#define MCR_REG3	MCR_REG(3)
+
+struct rtp_test_case {
+	const char *name;
+	struct {
+		u32 offset;
+		u32 type;
+	} expected_reg;
+        u32 expected_set_bits;
+	u32 expected_clr_bits;
+	unsigned long expected_count;
+	unsigned int expected_sr_errors;
+	const struct xe_rtp_entry *entries;
+};
+
+static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+{
+	return true;
+}
+
+static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+{
+	return false;
+}
+
+static const struct rtp_test_case cases[] = {
+	{
+		.name = "coalesce-same-reg",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0) | REG_BIT(1),
+		.expected_clr_bits = REG_BIT(0) | REG_BIT(1),
+		.expected_count = 1,
+		/* Different bits on the same register: create a single entry */
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "no-match-no-add",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		/* Don't coalesce second entry since rules don't match */
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_no)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "no-match-no-add-multiple-rules",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		/* Don't coalesce second entry due to one of the rules */
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes), FUNC(match_no)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "two-regs-two-entries",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 2,
+		/* Same bits on different registers are not coalesced */
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG2, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "clr-one-set-other",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(1) | REG_BIT(0),
+		.expected_count = 1,
+		/* Check clr vs set actions on different bits */
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+#define TEMP_MASK	REG_GENMASK(10, 8)
+#define TEMP_FIELD	REG_FIELD_PREP(TEMP_MASK, 2)
+		.name = "set-field",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = TEMP_FIELD,
+		.expected_clr_bits = TEMP_MASK,
+		.expected_count = 1,
+		/* Check FIELD_SET works */
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1,
+						   TEMP_MASK, TEMP_FIELD))
+			},
+			{}
+		},
+#undef TEMP_MASK
+#undef TEMP_FIELD
+	},
+	{
+		.name = "conflict-duplicate",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 1,
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: setting same values twice */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "conflict-not-disjoint",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 1,
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: bits are not disjoint with previous entries */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_GENMASK(1, 0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "conflict-reg-type",
+		.expected_reg = { REGULAR_REG1 },
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 2,
+		.entries = (const struct xe_rtp_entry[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: regular vs MCR */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(MCR_REG1, REG_BIT(1)))
+			},
+			/* drop: regular vs masked */
+			{ XE_RTP_NAME("basic-3"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0),
+					     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			},
+			{}
+		},
+	},
+};
+
+static void xe_rtp_process_tests(struct kunit *test)
+{
+	const struct rtp_test_case *param = test->param_value;
+	struct xe_device *xe = test->priv;
+	struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr;
+	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
+	unsigned long idx, count = 0;
+
+	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
+	xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL);
+
+	xa_for_each(&reg_sr->xa, idx, sre) {
+		if (idx == param->expected_reg.offset)
+			sr_entry = sre;
+
+		count++;
+	}
+
+	KUNIT_EXPECT_EQ(test, count, param->expected_count);
+	KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits);
+	KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits);
+	KUNIT_EXPECT_EQ(test, sr_entry->reg_type, param->expected_reg.type);
+	KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors);
+}
+
+static void rtp_desc(const struct rtp_test_case *t, char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc);
+
+static int xe_rtp_test_init(struct kunit *test)
+{
+	struct xe_device *xe;
+	struct device *dev;
+	int ret;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	ret = xe_pci_fake_device_init_any(xe);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	xe->drm.dev = dev;
+	test->priv = xe;
+
+	return 0;
+}
+
+static void xe_rtp_test_exit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	drm_kunit_helper_free_device(test, xe->drm.dev);
+}
+
+static struct kunit_case xe_rtp_tests[] = {
+	KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_test_suite = {
+	.name = "xe_rtp",
+	.init = xe_rtp_test_init,
+	.exit = xe_rtp_test_exit,
+	.test_cases = xe_rtp_tests,
+};
+
+kunit_test_suite(xe_rtp_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index f97673be2e62e..ff83da4cf4a76 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -5,6 +5,7 @@
 
 #include "xe_reg_sr.h"
 
+#include <kunit/visibility.h>
 #include <linux/align.h>
 #include <linux/string_helpers.h>
 #include <linux/xarray.h>
@@ -43,6 +44,7 @@ int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
 
 	return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
 }
+EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init);
 
 static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
 {
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index cb9dd894547d8..20acd43cb60be 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -5,6 +5,8 @@
 
 #include "xe_rtp.h"
 
+#include <kunit/visibility.h>
+
 #include <drm/xe_drm.h>
 
 #include "xe_gt.h"
@@ -155,6 +157,7 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 		}
 	}
 }
+EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process);
 
 bool xe_rtp_match_even_instance(const struct xe_gt *gt,
 				const struct xe_hw_engine *hwe)
-- 
GitLab


From b9d773fc515a2d57ca96a6a368ac6e8845b2b3c5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 1 Apr 2023 01:51:51 -0700
Subject: [PATCH 1465/2340] drm/xe: Add test for GT workarounds and tunings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to avoid mistakes when populating the workarounds, it's good to
be able to test if the entries added are all compatible for a certain
platform. The platform itself is not needed as long as we create fake
devices with enough configuration for the RTP helpers to process the
tables.  Common mistakes that can be avoided:

	- Entries clashing the bitfields being updated
	- Register type being mixed (MCR vs regular / masked vs regular)
	- Unexpected errors while adding the reg_sr entry

To test, inject a duplicate entry in gt_was, but with platform == tigerlake
rather than the currenct graphics version check:

       { XE_RTP_NAME("14011059788"),
	 XE_RTP_RULES(PLATFORM(TIGERLAKE)),
	 XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
       },

This produces the following result:

	$  ./tools/testing/kunit/kunit.py run \
		--kunitconfig drivers/gpu/drm/xe/.kunitconfig  xe_wa

	[14:18:02] Starting KUnit Kernel (1/1)...
	[14:18:02] ============================================================
	[14:18:02] ==================== xe_wa (1 subtest) =====================
	[14:18:02] ======================== xe_wa_gt  =========================
	[14:18:02] [drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 9550 (clear: 00000200, set: 00000200, masked: no): ret=-22
	[14:18:02]     # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:116
	[14:18:02]     Expected gt->reg_sr.errors == 0, but
	[14:18:02]         gt->reg_sr.errors == 1 (0x1)
	[14:18:02] [FAILED] TIGERLAKE (B0)
	[14:18:02] [PASSED] DG1 (A0)
	[14:18:02] [PASSED] DG1 (B0)
	...

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20230401085151.1786204-8-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/Makefile     |   3 +-
 drivers/gpu/drm/xe/tests/xe_wa_test.c | 136 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_tuning.c        |   3 +
 drivers/gpu/drm/xe/xe_wa.c            |   2 +
 4 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_wa_test.c

diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index c5c2f108d017e..56919abb3f2a5 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -4,4 +4,5 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
 	xe_bo_test.o \
 	xe_dma_buf_test.o \
 	xe_migrate_test.o \
-	xe_rtp_test.o
+	xe_rtp_test.o \
+	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
new file mode 100644
index 0000000000000..7a86be830b93e
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_pci_test.h"
+#include "xe_reg_sr.h"
+#include "xe_tuning.h"
+#include "xe_wa.h"
+
+struct platform_test_case {
+	const char *name;
+	enum xe_platform platform;
+	enum xe_subplatform subplatform;
+	struct xe_step_info step;
+};
+
+#define PLATFORM_CASE(platform__, graphics_step__)				\
+	{									\
+		.name = #platform__ " (" #graphics_step__ ")",			\
+		.platform = XE_ ## platform__,					\
+		.subplatform = XE_SUBPLATFORM_NONE,				\
+		.step = { .graphics = STEP_ ## graphics_step__ }		\
+	}
+
+
+#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__)			\
+	{										\
+		.name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")",	\
+		.platform = XE_ ## platform__,						\
+		.subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__,	\
+		.step = { .graphics = STEP_ ## graphics_step__ }			\
+	}
+
+static const struct platform_test_case cases[] = {
+	PLATFORM_CASE(TIGERLAKE, B0),
+	PLATFORM_CASE(DG1, A0),
+	PLATFORM_CASE(DG1, B0),
+	PLATFORM_CASE(ALDERLAKE_S, A0),
+	PLATFORM_CASE(ALDERLAKE_S, B0),
+	PLATFORM_CASE(ALDERLAKE_S, C0),
+	PLATFORM_CASE(ALDERLAKE_S, D0),
+	SUBPLATFORM_CASE(DG2, G10, A0),
+	SUBPLATFORM_CASE(DG2, G10, A1),
+	SUBPLATFORM_CASE(DG2, G10, B0),
+	SUBPLATFORM_CASE(DG2, G10, C0),
+	SUBPLATFORM_CASE(DG2, G11, A0),
+	SUBPLATFORM_CASE(DG2, G11, B0),
+	SUBPLATFORM_CASE(DG2, G11, B1),
+	SUBPLATFORM_CASE(DG2, G12, A0),
+	SUBPLATFORM_CASE(DG2, G12, A1),
+	PLATFORM_CASE(PVC, B0),
+	PLATFORM_CASE(PVC, B1),
+	PLATFORM_CASE(PVC, C0),
+};
+
+static void platform_desc(const struct platform_test_case *t, char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
+
+static int xe_wa_test_init(struct kunit *test)
+{
+	const struct platform_test_case *param = test->param_value;
+	struct xe_device *xe;
+	struct device *dev;
+	int ret;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	ret = xe_pci_fake_device_init(xe, param->platform, param->subplatform);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	xe->info.step = param->step;
+
+	/* TODO: init hw engines for engine/LRC WAs */
+	xe->drm.dev = dev;
+	test->priv = xe;
+
+	return 0;
+}
+
+static void xe_wa_test_exit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	drm_kunit_helper_free_device(test, xe->drm.dev);
+}
+
+static void xe_wa_gt(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_gt *gt;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		xe_reg_sr_init(&gt->reg_sr, "GT", xe);
+
+		xe_wa_process_gt(gt);
+		xe_tuning_process_gt(gt);
+
+		KUNIT_ASSERT_EQ(test, gt->reg_sr.errors, 0);
+	}
+}
+
+static struct kunit_case xe_wa_tests[] = {
+	KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_test_suite = {
+	.name = "xe_wa",
+	.init = xe_wa_test_init,
+	.exit = xe_wa_test_exit,
+	.test_cases = xe_wa_tests,
+};
+
+kunit_test_suite(xe_rtp_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 7ff5eb762da54..27cf1330facd6 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -5,6 +5,8 @@
 
 #include "xe_tuning.h"
 
+#include <kunit/visibility.h>
+
 #include "regs/xe_gt_regs.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
@@ -62,6 +64,7 @@ void xe_tuning_process_gt(struct xe_gt *gt)
 {
 	xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
 }
+EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
 
 /**
  * xe_tuning_process_lrc - process lrc tunings
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 59d2daab59291..a7d681b7538d4 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -5,6 +5,7 @@
 
 #include "xe_wa.h"
 
+#include <kunit/visibility.h>
 #include <linux/compiler_types.h>
 
 #include "regs/xe_engine_regs.h"
@@ -628,6 +629,7 @@ void xe_wa_process_gt(struct xe_gt *gt)
 {
 	xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL);
 }
+EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
 
 /**
  * xe_wa_process_engine - process engine workaround table
-- 
GitLab


From ad55ead7f3c7b041dbf058a9c4b954be5929bb5e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 23 Mar 2023 22:17:54 -0700
Subject: [PATCH 1466/2340] drm/xe: Update GuC/HuC firmware autoselect logic

Update the logic to autoselect GuC/HuC for the platforms with the
following improvements:

- Document what is the firmware file that is expected to be
  loaded and what is checked from blob headers

- When the platform is under force-probe it's desired to enforce
  the full-version requirement so the correct firmware is used
  before widespread adoption and backward-compatibility
  commitments

- Directory from which we expect firmware blobs to be available in
  upstream linux-firmware repository depends on the platform: for
  the ones supported by i915 it uses the i915/ directory, but the ones
  expected to be supported by xe, it's on the xe/ directory. This
  means that for platforms in the intersection, the firmware is
  loaded from a different directory, but that is not much important
  in the firmware repo and it avoids firmware duplication.

- Make the table with the firmware definitions clearly state the
  versions being expected. Now with macros to select the version it's
  possible to choose between full-version/major-version for GuC and
  full-version/no-version for HuC. These are similar to the macros used
  in i915, but implemented in a slightly different way to avoid
  duplicating the macros for each firmware/type and functionality,
  besides adding the support for different directories.

- There is no check added regarding force-probe since xe should
  reuse the same firmware files published for i915 for past
  platforms. This can be improved later with additional
  kunit checking against a hardcoded list of platforms that
  falls in this category.

- As mentioned in the TODO, the major version fallback was not
  implemented before as currently each platform only supports one
  major. That can be easily added later.

- GuC version for MTL and PVC were updated to 70.6.4, using the exact
  full version, while the

After this the GuC firmware used by PVC changes to pvc_guc_70.5.2.bin
since it's using a file not published yet.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Link: https://lore.kernel.org/r/20230324051754.1346390-4-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c       | 315 +++++++++++++++++-----------
 drivers/gpu/drm/xe/xe_uc_fw.h       |   2 +-
 drivers/gpu/drm/xe/xe_uc_fw_types.h |   7 +
 3 files changed, 204 insertions(+), 120 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 777fa6f523dca..2beee7f8eff7e 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -17,6 +17,137 @@
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
 
+/*
+ * List of required GuC and HuC binaries per-platform. They must be ordered
+ * based on platform, from newer to older.
+ *
+ * Versioning follows the guidelines from
+ * Documentation/driver-api/firmware/firmware-usage-guidelines.rst. There is a
+ * distinction for platforms being officially supported by the driver or not.
+ * Platforms not available publicly or not yet officially supported by the
+ * driver (under force-probe), use the mmp_ver(): the firmware autoselect logic
+ * will select the firmware from disk with filename that matches the full
+ * "mpp version", i.e. major.minor.patch. mmp_ver() should only be used for
+ * this case.
+ *
+ * For platforms officially supported by the driver, the filename always only
+ * ever contains the major version (GuC) or no version at all (HuC).
+ *
+ * After loading the file, the driver parses the versions embedded in the blob.
+ * The major version needs to match a major version supported by the driver (if
+ * any). The minor version is also checked and a notice emitted to the log if
+ * the version found is smaller than the version wanted. This is done only for
+ * informational purposes so users may have a chance to upgrade, but the driver
+ * still loads and use the older firmware.
+ *
+ * Examples:
+ *
+ *	1) Platform officially supported by i915 - using Tigerlake as example.
+ *	   Driver loads the following firmware blobs from disk:
+ *
+ *		- i915/tgl_guc_<major>.bin
+ *		- i915/tgl_huc.bin
+ *
+ *	   <major> number for GuC is checked that it matches the version inside
+ *	   the blob. <minor> version is checked and if smaller than the expected
+ *	   an info message is emitted about that.
+ *
+ *	1) XE_<FUTUREINTELPLATFORM>, still under require_force_probe. Using
+ *	   "wipplat" as a short-name. Driver loads the following firmware blobs
+ *	   from disk:
+ *
+ *		- xe/wipplat_guc_<major>.<minor>.<patch>.bin
+ *		- xe/wipplat_huc_<major>.<minor>.<patch>.bin
+ *
+ *	   <major> and <minor> are checked that they match the version inside
+ *	   the blob. Both of them need to match exactly what the driver is
+ *	   expecting, otherwise it fails.
+ *
+ *	3) Platform officially supported by xe and out of force-probe. Using
+ *	   "plat" as a short-name. Except for the different directory, the
+ *	   behavior is the same as (1). Driver loads the following firmware
+ *	   blobs from disk:
+ *
+ *		- xe/plat_guc_<major>.bin
+ *		- xe/plat_huc.bin
+ *
+ *	   <major> number for GuC is checked that it matches the version inside
+ *	   the blob. <minor> version is checked and if smaller than the expected
+ *	   an info message is emitted about that.
+ *
+ * For the platforms already released with a major version, they should never be
+ * removed from the table. Instead new entries with newer versions may be added
+ * before them, so they take precedence.
+ *
+ * TODO: Currently there's no fallback on major version. That's because xe
+ * driver only supports the one major version of each firmware in the table.
+ * This needs to be fixed when the major version of GuC is updated.
+ */
+
+struct uc_fw_entry {
+	enum xe_platform platform;
+	struct {
+		const char *path;
+		u16 major;
+		u16 minor;
+		bool full_ver_required;
+	};
+};
+
+struct fw_blobs_by_type {
+	const struct uc_fw_entry *entries;
+	u32 count;
+};
+
+#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
+	fw_def(METEORLAKE,	mmp_ver(  i915,	guc,	mtl,	70, 6, 4))	\
+	fw_def(PVC,		mmp_ver(  xe,	guc,	pvc,	70, 6, 4))	\
+	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
+	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
+	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 5))		\
+	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
+
+#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)				\
+	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,	tgl))			\
+	fw_def(DG1,		no_ver(i915,	huc,	dg1))			\
+	fw_def(TIGERLAKE,	no_ver(i915,	huc,	tgl))
+
+#define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
+	__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
+
+#define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c))
+#define fw_filename_major_ver(dir_, uc_, shortname_, a, b)			\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a))
+#define fw_filename_no_ver(dir_, uc_, shortname_)				\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "")
+
+#define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
+	{ fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c),			\
+	  a, b, true }
+#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b)			\
+	{ fw_filename_major_ver(dir_, uc_, shortname_, a, b),			\
+	  a, b }
+#define uc_fw_entry_no_ver(dir_, uc_, shortname_)				\
+	{ fw_filename_no_ver(dir_, uc_, shortname_),				\
+	  0, 0 }
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename)				\
+	MODULE_FIRMWARE(fw_filename);
+
+#define XE_UC_FW_ENTRY(platform__, entry__)					\
+	{									\
+		.platform = XE_ ## platform__,					\
+		entry__,							\
+	},
+
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,					\
+		     fw_filename_mmp_ver, fw_filename_major_ver)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,					\
+		     fw_filename_mmp_ver, fw_filename_no_ver)
+
 static struct xe_gt *
 __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
 {
@@ -37,123 +168,38 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
 	return gt_to_xe(uc_fw_to_gt(uc_fw));
 }
 
-/*
- * List of required GuC and HuC binaries per-platform.
- * Must be ordered based on platform, from newer to older.
- */
-#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
-	fw_def(METEORLAKE,   guc_def(mtl,  70, 5, 2)) \
-	fw_def(PVC,          guc_def(pvc,  70, 5, 2)) \
-	fw_def(DG2,          guc_def(dg2,  70, 5, 2)) \
-	fw_def(DG1,          guc_def(dg1,  70, 5, 2)) \
-	fw_def(ALDERLAKE_P,  guc_def(adlp,  70, 5, 2)) \
-	fw_def(ALDERLAKE_S,  guc_def(tgl,  70, 5, 2)) \
-	fw_def(TIGERLAKE,    guc_def(tgl,  70, 5, 2))
-
-#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \
-	fw_def(ALDERLAKE_S,	huc_def(tgl)) \
-	fw_def(DG1,		huc_def(dg1)) \
-	fw_def(TIGERLAKE,	huc_def(tgl))
-
-#define __MAKE_HUC_FW_PATH(prefix_, name_) \
-        "i915/" \
-        __stringify(prefix_) "_" name_ ".bin"
-
-#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
-	"i915/" \
-	__stringify(prefix_) "_" name_ "_" \
-	__stringify(major_) ".bin"
-
-#define __MAKE_UC_FW_PATH_FULL_VER(prefix_, name_, major_, minor_, patch_) \
-        "i915/" \
-       __stringify(prefix_) "_" name_ "_" \
-       __stringify(major_) "." \
-       __stringify(minor_) "." \
-       __stringify(patch_) ".bin"
-
-#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
-	__MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_)
-
-#define MAKE_HUC_FW_PATH(prefix_) \
-	__MAKE_HUC_FW_PATH(prefix_, "huc")
-
-#define MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, patch_) \
-	__MAKE_UC_FW_PATH_FULL_VER(prefix_, "huc", major_, minor_, patch_)
-
-
-/* All blobs need to be declared via MODULE_FIRMWARE() */
-#define XE_UC_MODULE_FW(platform_, uc_) \
-	MODULE_FIRMWARE(uc_);
-
-XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH)
-XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH, MAKE_HUC_FW_PATH_FULL_VER)
-
-/* The below structs and macros are used to iterate across the list of blobs */
-struct __packed uc_fw_blob {
-	u8 major;
-	u8 minor;
-	const char *path;
-};
-
-#define UC_FW_BLOB(major_, minor_, path_) \
-	{ .major = major_, .minor = minor_, .path = path_ }
-
-#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
-	UC_FW_BLOB(major_, minor_, \
-		   MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
-
-#define HUC_FW_BLOB(prefix_) \
-	UC_FW_BLOB(0, 0, MAKE_HUC_FW_PATH(prefix_))
-
-#define HUC_FW_VERSION_BLOB(prefix_, major_, minor_, bld_num_) \
-	UC_FW_BLOB(major_, minor_, \
-		   MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, bld_num_))
-
-struct uc_fw_platform_requirement {
-	enum xe_platform p;
-	const struct uc_fw_blob blob;
-};
-
-#define MAKE_FW_LIST(platform_, uc_) \
-{ \
-	.p = XE_##platform_, \
-	.blob = uc_, \
-},
-
-struct fw_blobs_by_type {
-	const struct uc_fw_platform_requirement *blobs;
-	u32 count;
-};
-
 static void
 uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 {
-	static const struct uc_fw_platform_requirement blobs_guc[] = {
-		XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
+	static const struct uc_fw_entry entries_guc[] = {
+		XE_GUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY,
+				     uc_fw_entry_mmp_ver,
+				     uc_fw_entry_major_ver)
 	};
-	static const struct uc_fw_platform_requirement blobs_huc[] = {
-		XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_VERSION_BLOB)
+	static const struct uc_fw_entry entries_huc[] = {
+		XE_HUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY,
+				     uc_fw_entry_mmp_ver,
+				     uc_fw_entry_no_ver)
 	};
 	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
-		[XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
-		[XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+		[XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) },
+		[XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) },
 	};
-	static const struct uc_fw_platform_requirement *fw_blobs;
+	static const struct uc_fw_entry *entries;
 	enum xe_platform p = xe->info.platform;
-	u32 fw_count;
+	u32 count;
 	int i;
 
 	XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
-	fw_blobs = blobs_all[uc_fw->type].blobs;
-	fw_count = blobs_all[uc_fw->type].count;
-
-	for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
-		if (p == fw_blobs[i].p) {
-			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
-
-			uc_fw->path = blob->path;
-			uc_fw->major_ver_wanted = blob->major;
-			uc_fw->minor_ver_wanted = blob->minor;
+	entries = blobs_all[uc_fw->type].entries;
+	count = blobs_all[uc_fw->type].count;
+
+	for (i = 0; i < count && p <= entries[i].platform; i++) {
+		if (p == entries[i].platform) {
+			uc_fw->path = entries[i].path;
+			uc_fw->major_ver_wanted = entries[i].major;
+			uc_fw->minor_ver_wanted = entries[i].minor;
+			uc_fw->full_ver_required = entries[i].full_ver_required;
 			break;
 		}
 	}
@@ -227,6 +273,47 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 	uc_fw->private_data_size = css->private_data_size;
 }
 
+static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+
+	/* Driver has no requirement on any version, any is good. */
+	if (!uc_fw->major_ver_wanted)
+		return 0;
+
+	/*
+	 * If full version is required, both major and minor should match.
+	 * Otherwise, at least the major version.
+	 */
+	if (uc_fw->major_ver_wanted != uc_fw->major_ver_found ||
+	    (uc_fw->full_ver_required &&
+	     uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) {
+		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+		goto fail;
+	}
+
+	if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) {
+		drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
+			   xe_uc_fw_type_repr(uc_fw->type),
+			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
+			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			   uc_fw->path);
+		drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
+			 XE_UC_FIRMWARE_URL);
+	}
+
+	return 0;
+
+fail:
+	if (xe_uc_fw_is_overridden(uc_fw))
+		return 0;
+
+	return -ENOEXEC;
+}
+
 int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
@@ -308,19 +395,9 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
 					   css->sw_version);
 
-	if (uc_fw->major_ver_wanted) {
-		if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
-		    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
-			drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
-				   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-				   uc_fw->major_ver_found, uc_fw->minor_ver_found,
-				   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
-			if (!xe_uc_fw_is_overridden(uc_fw)) {
-				err = -ENOEXEC;
-				goto fail;
-			}
-		}
-	}
+	err = uc_fw_check_version_requirements(uc_fw);
+	if (err)
+		goto fail;
 
 	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
 		guc_read_css_info(uc_fw, css);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index bf31c3bb0e0f8..e16267e712807 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -175,6 +175,6 @@ static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
 	return __xe_uc_fw_get_upload_size(uc_fw);
 }
 
-#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915"
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 1cfd30a655df1..837f49a2347ef 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -78,6 +78,13 @@ struct xe_uc_fw {
 	const char *path;
 	/** @user_overridden: user provided path to uC firmware via modparam */
 	bool user_overridden;
+	/**
+	 * @full_ver_required: driver still under development and not ready
+	 * for backward-compatible firmware. To be used only for **new**
+	 * platforms, i.e. still under require_force_probe protection and not
+	 * supported by i915.
+	 */
+	bool full_ver_required;
 	/** @size: size of uC firmware including css header */
 	size_t size;
 
-- 
GitLab


From 1c060057ec29e0305aa314c19a80090c21524faa Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 5 Apr 2023 16:20:03 -0700
Subject: [PATCH 1467/2340] drm/xe: Always write
 GEN12_RCU_MODE.GEN12_RCU_MODE_CCS_ENABLE for CCS engines

If CCS0 was fused we did not write this register thus CCS engine were
not enabled resulting in driver load failures.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 63a4efd5edccd..4b56c35b988d4 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -253,7 +253,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 	u32 ccs_mask =
 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
 
-	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0))
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
 		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
 				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
 
-- 
GitLab


From 61e72e77b66259945fca89dcbfea32f7cbfc3b07 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 5 Apr 2023 15:47:25 -0700
Subject: [PATCH 1468/2340] drm/xe: Always log GuC/HuC firmware versions

When debugging issues related to GuC/HuC, it's important to know what is
the firmware version being used. The version from the filename can't be
relied upon, also because it normally only contains the major version
(except for the ones under experimental support).

Log the version from the blob after reading the CSS header. Example:

	xe 0000:03:00.0: [drm] Using GuC firmware (70.5) from i915/dg2_guc_70.bin

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230405224725.1993719-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2beee7f8eff7e..7a410c106df4d 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -395,6 +395,11 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
 					   css->sw_version);
 
+	drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n",
+		 xe_uc_fw_type_repr(uc_fw->type),
+		 uc_fw->major_ver_found, uc_fw->minor_ver_found,
+		 uc_fw->path);
+
 	err = uc_fw_check_version_requirements(uc_fw);
 	if (err)
 		goto fail;
-- 
GitLab


From c8d72dfb288740a59afaf135da15db598fae0475 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:13 -0700
Subject: [PATCH 1469/2340] drm/xe: Start splitting xe_device_desc into
 graphics/media structures

Rather than storing all characteristics for an entire platform in the
xe_device_desc structure, create secondary graphics and media structures
to hold traits and feature flags specific to those IPs.  This will
eventually allow us to assign the graphics and media characteristics at
runtime based on the contents of the relevant GMD_ID registers.

For now, just move the IP versions into the new structures to keep
things simple.  Other IP-specific fields will migrate to these
structures in future patches.

Note that there's one functional change introduced by this:  previously
PVC was recognized as media version 12.60.  That's technically true, but
in practice the media engines are fused off on all production hardware.
By simply not assigning a media IP structure to PVC it will effectively
be treated as IP version 0.0 now (which the rest of the driver should
treat as non-existent media).

v2:
 - Split the new structures out to their own header.  This will ease the
   addition of KUnit tests later.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c       | 94 +++++++++++++++++++++----------
 drivers/gpu/drm/xe/xe_pci_types.h | 21 +++++++
 2 files changed, 86 insertions(+), 29 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_pci_types.h

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f6050a17c950f..ef5a668be4499 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -19,6 +19,7 @@
 #include "xe_drv.h"
 #include "xe_macros.h"
 #include "xe_module.h"
+#include "xe_pci_types.h"
 #include "xe_pm.h"
 #include "xe_step.h"
 
@@ -42,10 +43,8 @@ struct xe_gt_desc {
 };
 
 struct xe_device_desc {
-	u8 graphics_ver;
-	u8 graphics_rel;
-	u8 media_ver;
-	u8 media_rel;
+	const struct xe_graphics_desc *graphics;
+	const struct xe_media_desc *media;
 
 	u64 platform_engine_mask; /* Engines supported by the HW */
 
@@ -80,17 +79,57 @@ struct xe_device_desc {
 
 #define NOP(x)	x
 
+static const struct xe_graphics_desc graphics_xelp = {
+	.ver = 12,
+	.rel = 0,
+};
+
+static const struct xe_graphics_desc graphics_xelpp = {
+	.ver = 12,
+	.rel = 10,
+};
+
+static const struct xe_graphics_desc graphics_xehpg = {
+	.ver = 12,
+	.rel = 55,
+};
+
+static const struct xe_graphics_desc graphics_xehpc = {
+	.ver = 12,
+	.rel = 60,
+};
+
+static const struct xe_graphics_desc graphics_xelpg = {
+	.ver = 12,
+	.rel = 70,
+};
+
+static const struct xe_media_desc media_xem = {
+	.ver = 12,
+	.rel = 0,
+};
+
+static const struct xe_media_desc media_xehpm = {
+	.ver = 12,
+	.rel = 55,
+};
+
+static const struct xe_media_desc media_xelpmp = {
+	.ver = 13,
+	.rel = 0,
+};
+
 /* Keep in gen based order, and chronological order within a gen */
 #define GEN12_FEATURES \
 	.require_force_probe = true, \
-	.graphics_ver = 12, \
-	.media_ver = 12, \
 	.dma_mask_size = 39, \
 	.max_tiles = 1, \
 	.vm_max_level = 3, \
 	.vram_flags = 0
 
 static const struct xe_device_desc tgl_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
 	GEN12_FEATURES,
 	PLATFORM(XE_TIGERLAKE),
 	.platform_engine_mask =
@@ -100,6 +139,8 @@ static const struct xe_device_desc tgl_desc = {
 };
 
 static const struct xe_device_desc adl_s_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
 	GEN12_FEATURES,
 	PLATFORM(XE_ALDERLAKE_S),
 	.platform_engine_mask =
@@ -111,6 +152,8 @@ static const struct xe_device_desc adl_s_desc = {
 static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
 
 static const struct xe_device_desc adl_p_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
 	GEN12_FEATURES,
 	PLATFORM(XE_ALDERLAKE_P),
 	.platform_engine_mask =
@@ -127,9 +170,10 @@ static const struct xe_device_desc adl_p_desc = {
 	.is_dgfx = 1
 
 static const struct xe_device_desc dg1_desc = {
+	.graphics = &graphics_xelpp,
+	.media = &media_xem,
 	GEN12_FEATURES,
 	DGFX_FEATURES,
-	.graphics_rel = 10,
 	PLATFORM(XE_DG1),
 	.platform_engine_mask =
 		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
@@ -139,26 +183,18 @@ static const struct xe_device_desc dg1_desc = {
 
 #define XE_HP_FEATURES \
 	.require_force_probe = true, \
-	.graphics_ver = 12, \
-	.graphics_rel = 50, \
 	.has_range_tlb_invalidation = true, \
 	.has_flat_ccs = true, \
 	.dma_mask_size = 46, \
 	.max_tiles = 1, \
 	.vm_max_level = 3
 
-#define XE_HPM_FEATURES \
-	.media_ver = 12, \
-	.media_rel = 50
-
 static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
 static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
 static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 
 #define DG2_FEATURES \
 	DGFX_FEATURES, \
-	.graphics_rel = 55, \
-	.media_rel = 55, \
 	PLATFORM(XE_DG2), \
 	.subplatforms = (const struct xe_subplatform_desc[]) { \
 		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
@@ -177,15 +213,17 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 	.has_4tile = 1
 
 static const struct xe_device_desc ats_m_desc = {
+	.graphics = &graphics_xehpg,
+	.media = &media_xehpm,
 	XE_HP_FEATURES,
-	XE_HPM_FEATURES,
 
 	DG2_FEATURES,
 };
 
 static const struct xe_device_desc dg2_desc = {
+	.graphics = &graphics_xehpg,
+	.media = &media_xehpm,
 	XE_HP_FEATURES,
-	XE_HPM_FEATURES,
 
 	DG2_FEATURES,
 };
@@ -212,14 +250,12 @@ static const struct xe_gt_desc pvc_gts[] = {
 };
 
 static const __maybe_unused struct xe_device_desc pvc_desc = {
+	.graphics = &graphics_xehpc,
 	XE_HP_FEATURES,
-	XE_HPM_FEATURES,
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
 	.extra_gts = pvc_gts,
-	.graphics_rel = 60,
 	.has_flat_ccs = 0,
-	.media_rel = 60,
 	.platform_engine_mask = PVC_ENGINES,
 	.vram_flags = XE_VRAM_FLAGS_NEED64K,
 	.dma_mask_size = 52,
@@ -250,16 +286,15 @@ static const struct xe_gt_desc xelpmp_gts[] = {
 
 static const struct xe_device_desc mtl_desc = {
 	/*
-	 * Real graphics IP version will be obtained from hardware GMD_ID
-	 * register.  Value provided here is just for sanity checking.
+	 * FIXME:  Real graphics/media IP will be mapped from hardware
+	 * GMD_ID register.  Hardcoded assignments here will go away soon.
 	 */
+	.graphics = &graphics_xelpg,
+	.media = &media_xelpmp,
 	.require_force_probe = true,
-	.graphics_ver = 12,
-	.graphics_rel = 70,
 	.dma_mask_size = 46,
 	.max_tiles = 2,
 	.vm_max_level = 3,
-	.media_ver = 13,
 	.has_range_tlb_invalidation = true,
 	PLATFORM(XE_METEORLAKE),
 	.extra_gts = xelpmp_gts,
@@ -363,10 +398,11 @@ static void xe_info_init(struct xe_device *xe,
 	struct xe_gt *gt;
 	u8 id;
 
-	xe->info.graphics_verx100 = desc->graphics_ver * 100 +
-				    desc->graphics_rel;
-	xe->info.media_verx100 = desc->media_ver * 100 +
-				 desc->media_rel;
+	xe->info.graphics_verx100 = desc->graphics->ver * 100 +
+				    desc->graphics->rel;
+	if (desc->media)
+		xe->info.media_verx100 = desc->media->ver * 100 +
+					 desc->media->rel;
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.platform = desc->platform;
 	xe->info.dma_mask_size = desc->dma_mask_size;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
new file mode 100644
index 0000000000000..cc372694eccaf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PCI_TYPES_H_
+#define _XE_PCI_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_graphics_desc {
+	u8 ver;
+	u8 rel;
+};
+
+struct xe_media_desc {
+	u8 ver;
+	u8 rel;
+};
+
+#endif
-- 
GitLab


From c94f32e4f5453a55c1c83a81481784f617f96df8 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:14 -0700
Subject: [PATCH 1470/2340] drm/xe: Set require_force_probe in each platform's
 description

Set require_force_probe explicitly in each platform's description
structure rather than embedding it within the FOO_FEATURES macros.  Even
though we expect all platforms currently supported by the Xe driver to
be under force_probe protection, this will help prepare for some other
upcoming restructuring.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index ef5a668be4499..06c194ab56dd8 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -121,7 +121,6 @@ static const struct xe_media_desc media_xelpmp = {
 
 /* Keep in gen based order, and chronological order within a gen */
 #define GEN12_FEATURES \
-	.require_force_probe = true, \
 	.dma_mask_size = 39, \
 	.max_tiles = 1, \
 	.vm_max_level = 3, \
@@ -132,6 +131,7 @@ static const struct xe_device_desc tgl_desc = {
 	.media = &media_xem,
 	GEN12_FEATURES,
 	PLATFORM(XE_TIGERLAKE),
+	.require_force_probe = true,
 	.platform_engine_mask =
 		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
 		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
@@ -143,6 +143,7 @@ static const struct xe_device_desc adl_s_desc = {
 	.media = &media_xem,
 	GEN12_FEATURES,
 	PLATFORM(XE_ALDERLAKE_S),
+	.require_force_probe = true,
 	.platform_engine_mask =
 		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
 		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
@@ -156,6 +157,7 @@ static const struct xe_device_desc adl_p_desc = {
 	.media = &media_xem,
 	GEN12_FEATURES,
 	PLATFORM(XE_ALDERLAKE_P),
+	.require_force_probe = true,
 	.platform_engine_mask =
 		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
 		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
@@ -175,6 +177,7 @@ static const struct xe_device_desc dg1_desc = {
 	GEN12_FEATURES,
 	DGFX_FEATURES,
 	PLATFORM(XE_DG1),
+	.require_force_probe = true,
 	.platform_engine_mask =
 		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
 		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
@@ -182,7 +185,6 @@ static const struct xe_device_desc dg1_desc = {
 };
 
 #define XE_HP_FEATURES \
-	.require_force_probe = true, \
 	.has_range_tlb_invalidation = true, \
 	.has_flat_ccs = true, \
 	.dma_mask_size = 46, \
@@ -208,13 +210,13 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
 		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
 		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \
-	.require_force_probe = true, \
 	.vram_flags = XE_VRAM_FLAGS_NEED64K, \
 	.has_4tile = 1
 
 static const struct xe_device_desc ats_m_desc = {
 	.graphics = &graphics_xehpg,
 	.media = &media_xehpm,
+	.require_force_probe = true,
 	XE_HP_FEATURES,
 
 	DG2_FEATURES,
@@ -223,6 +225,7 @@ static const struct xe_device_desc ats_m_desc = {
 static const struct xe_device_desc dg2_desc = {
 	.graphics = &graphics_xehpg,
 	.media = &media_xehpm,
+	.require_force_probe = true,
 	XE_HP_FEATURES,
 
 	DG2_FEATURES,
@@ -254,6 +257,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	XE_HP_FEATURES,
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
+	.require_force_probe = true,
 	.extra_gts = pvc_gts,
 	.has_flat_ccs = 0,
 	.platform_engine_mask = PVC_ENGINES,
-- 
GitLab


From ce22dece001d6dfedbff0b63596e9aaa5b5ae78b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:15 -0700
Subject: [PATCH 1471/2340] drm/xe: Move most platform traits to graphics IP

Most of the traits currently in the device descriptor structures are
either tied to the graphics IP or should be inferred from the graphics
IP.  This becomes important on MTL and beyond where IP versions are
supposed to be detected from the hardware's GMD_ID registers rather than
mapped from PCI devid.

Engine masks are left where they are for now; they'll be dealt with
separately in a future patch.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c       | 99 ++++++++++++++-----------------
 drivers/gpu/drm/xe/xe_pci_types.h | 11 ++++
 2 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 06c194ab56dd8..b981b1e62bfab 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -53,24 +53,18 @@ struct xe_device_desc {
 	const struct xe_subplatform_desc *subplatforms;
 	const struct xe_gt_desc *extra_gts;
 
-	u8 dma_mask_size; /* available DMA address bits */
-
 	u8 gt; /* GT number, 0 if undefined */
 
 #define DEFINE_FLAG(name) u8 name:1
 	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
 #undef DEFINE_FLAG
 
-	u8 vram_flags;
-	u8 max_tiles;
-	u8 vm_max_level;
-
-	bool supports_usm;
-	bool has_flat_ccs;
+	/*
+	 * FIXME: Xe doesn't care about presence/lack of 4tile since we can
+	 * already determine that from the graphics IP version.  This flag
+	 * should eventually move entirely into the display code's own logic.
+	 */
 	bool has_4tile;
-	bool has_range_tlb_invalidation;
-	bool has_asid;
-	bool has_link_copy_engine;
 };
 
 #define PLATFORM(x)		\
@@ -82,26 +76,57 @@ struct xe_device_desc {
 static const struct xe_graphics_desc graphics_xelp = {
 	.ver = 12,
 	.rel = 0,
+
+	.dma_mask_size = 39,
+	.vm_max_level = 3,
 };
 
 static const struct xe_graphics_desc graphics_xelpp = {
 	.ver = 12,
 	.rel = 10,
+
+	.dma_mask_size = 39,
+	.vm_max_level = 3,
 };
 
+#define XE_HP_FEATURES \
+	.has_range_tlb_invalidation = true, \
+	.has_flat_ccs = true, \
+	.dma_mask_size = 46, \
+	.vm_max_level = 3
+
 static const struct xe_graphics_desc graphics_xehpg = {
 	.ver = 12,
 	.rel = 55,
+
+	XE_HP_FEATURES,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
 };
 
 static const struct xe_graphics_desc graphics_xehpc = {
 	.ver = 12,
 	.rel = 60,
+
+	XE_HP_FEATURES,
+	.dma_mask_size = 52,
+	.max_tiles = 2,
+	.vm_max_level = 4,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+
+	.has_asid = 1,
+	.has_flat_ccs = 0,
+	.has_link_copy_engine = 1,
+	.supports_usm = 1,
 };
 
 static const struct xe_graphics_desc graphics_xelpg = {
 	.ver = 12,
 	.rel = 70,
+
+	XE_HP_FEATURES,
+	.max_tiles = 2,
+
+	.has_flat_ccs = 0,
 };
 
 static const struct xe_media_desc media_xem = {
@@ -119,17 +144,9 @@ static const struct xe_media_desc media_xelpmp = {
 	.rel = 0,
 };
 
-/* Keep in gen based order, and chronological order within a gen */
-#define GEN12_FEATURES \
-	.dma_mask_size = 39, \
-	.max_tiles = 1, \
-	.vm_max_level = 3, \
-	.vram_flags = 0
-
 static const struct xe_device_desc tgl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	GEN12_FEATURES,
 	PLATFORM(XE_TIGERLAKE),
 	.require_force_probe = true,
 	.platform_engine_mask =
@@ -141,7 +158,6 @@ static const struct xe_device_desc tgl_desc = {
 static const struct xe_device_desc adl_s_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	GEN12_FEATURES,
 	PLATFORM(XE_ALDERLAKE_S),
 	.require_force_probe = true,
 	.platform_engine_mask =
@@ -155,7 +171,6 @@ static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
 static const struct xe_device_desc adl_p_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	GEN12_FEATURES,
 	PLATFORM(XE_ALDERLAKE_P),
 	.require_force_probe = true,
 	.platform_engine_mask =
@@ -174,7 +189,6 @@ static const struct xe_device_desc adl_p_desc = {
 static const struct xe_device_desc dg1_desc = {
 	.graphics = &graphics_xelpp,
 	.media = &media_xem,
-	GEN12_FEATURES,
 	DGFX_FEATURES,
 	PLATFORM(XE_DG1),
 	.require_force_probe = true,
@@ -184,13 +198,6 @@ static const struct xe_device_desc dg1_desc = {
 		BIT(XE_HW_ENGINE_VCS2),
 };
 
-#define XE_HP_FEATURES \
-	.has_range_tlb_invalidation = true, \
-	.has_flat_ccs = true, \
-	.dma_mask_size = 46, \
-	.max_tiles = 1, \
-	.vm_max_level = 3
-
 static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
 static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
 static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
@@ -210,14 +217,12 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
 		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
 		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \
-	.vram_flags = XE_VRAM_FLAGS_NEED64K, \
 	.has_4tile = 1
 
 static const struct xe_device_desc ats_m_desc = {
 	.graphics = &graphics_xehpg,
 	.media = &media_xehpm,
 	.require_force_probe = true,
-	XE_HP_FEATURES,
 
 	DG2_FEATURES,
 };
@@ -226,7 +231,6 @@ static const struct xe_device_desc dg2_desc = {
 	.graphics = &graphics_xehpg,
 	.media = &media_xehpm,
 	.require_force_probe = true,
-	XE_HP_FEATURES,
 
 	DG2_FEATURES,
 };
@@ -254,20 +258,11 @@ static const struct xe_gt_desc pvc_gts[] = {
 
 static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.graphics = &graphics_xehpc,
-	XE_HP_FEATURES,
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
 	.require_force_probe = true,
 	.extra_gts = pvc_gts,
-	.has_flat_ccs = 0,
 	.platform_engine_mask = PVC_ENGINES,
-	.vram_flags = XE_VRAM_FLAGS_NEED64K,
-	.dma_mask_size = 52,
-	.max_tiles = 2,
-	.vm_max_level = 4,
-	.supports_usm = true,
-	.has_asid = true,
-	.has_link_copy_engine = true,
 };
 
 #define MTL_MEDIA_ENGINES \
@@ -296,10 +291,6 @@ static const struct xe_device_desc mtl_desc = {
 	.graphics = &graphics_xelpg,
 	.media = &media_xelpmp,
 	.require_force_probe = true,
-	.dma_mask_size = 46,
-	.max_tiles = 2,
-	.vm_max_level = 3,
-	.has_range_tlb_invalidation = true,
 	PLATFORM(XE_METEORLAKE),
 	.extra_gts = xelpmp_gts,
 	.platform_engine_mask = MTL_MAIN_ENGINES,
@@ -409,16 +400,16 @@ static void xe_info_init(struct xe_device *xe,
 					 desc->media->rel;
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.platform = desc->platform;
-	xe->info.dma_mask_size = desc->dma_mask_size;
-	xe->info.vram_flags = desc->vram_flags;
-	xe->info.tile_count = desc->max_tiles;
-	xe->info.vm_max_level = desc->vm_max_level;
-	xe->info.supports_usm = desc->supports_usm;
-	xe->info.has_asid = desc->has_asid;
-	xe->info.has_flat_ccs = desc->has_flat_ccs;
+	xe->info.dma_mask_size = desc->graphics->dma_mask_size;
+	xe->info.vram_flags = desc->graphics->vram_flags;
+	xe->info.tile_count = desc->graphics->max_tiles ?: 1;
+	xe->info.vm_max_level = desc->graphics->vm_max_level;
+	xe->info.supports_usm = desc->graphics->supports_usm;
+	xe->info.has_asid = desc->graphics->has_asid;
+	xe->info.has_flat_ccs = desc->graphics->has_flat_ccs;
 	xe->info.has_4tile = desc->has_4tile;
-	xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
-	xe->info.has_link_copy_engine = desc->has_link_copy_engine;
+	xe->info.has_range_tlb_invalidation = desc->graphics->has_range_tlb_invalidation;
+	xe->info.has_link_copy_engine = desc->graphics->has_link_copy_engine;
 
 	xe->info.subplatform = subplatform_desc ?
 		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index cc372694eccaf..e1749ceee9e0d 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -11,6 +11,17 @@
 struct xe_graphics_desc {
 	u8 ver;
 	u8 rel;
+
+	u8 dma_mask_size;	/* available DMA address bits */
+	u8 max_tiles;		/* defaults to 1 if unset */
+	u8 vm_max_level;
+	u8 vram_flags;
+
+	u8 has_asid:1;
+	u8 has_flat_ccs:1;
+	u8 has_link_copy_engine:1;
+	u8 has_range_tlb_invalidation:1;
+	u8 supports_usm:1;
 };
 
 struct xe_media_desc {
-- 
GitLab


From 33b270d9392825874c4e484e8652dad2cf901c97 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:16 -0700
Subject: [PATCH 1472/2340] drm/xe: Move engine masks into IP descriptor
 structures

Break the top-level platform_engine_mask field into separate
hw_engine_mask fields in the graphics and media structures.  Since
hardware has more flexibility to mix-and-match IP versions going
forward, this allows each IP to list exactly which engines it provides;
the final per-GT engine list can then be constructured from those:

 * On platforms without a standalone media GT (i.e., media IP versions
   prior to 13), the primary GT's engine list is the union of the
   graphics IP's engine list and the media IP's engine list.
 * Otherwise, GT0's engine list is the graphics IP's engine list.
 * For GT1 and beyond, the type of GT determines which IP's engine list
   is used.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-5-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c       | 93 ++++++++++++++-----------------
 drivers/gpu/drm/xe/xe_pci_types.h |  4 ++
 2 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b981b1e62bfab..198cae9c5116a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -37,7 +37,6 @@ struct xe_subplatform_desc {
 struct xe_gt_desc {
 	enum xe_gt_type type;
 	u8 vram_id;
-	u64 engine_mask;
 	u32 mmio_adj_limit;
 	u32 mmio_adj_offset;
 };
@@ -46,8 +45,6 @@ struct xe_device_desc {
 	const struct xe_graphics_desc *graphics;
 	const struct xe_media_desc *media;
 
-	u64 platform_engine_mask; /* Engines supported by the HW */
-
 	enum xe_platform platform;
 	const char *platform_name;
 	const struct xe_subplatform_desc *subplatforms;
@@ -77,6 +74,8 @@ static const struct xe_graphics_desc graphics_xelp = {
 	.ver = 12,
 	.rel = 0,
 
+	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
+
 	.dma_mask_size = 39,
 	.vm_max_level = 3,
 };
@@ -85,6 +84,8 @@ static const struct xe_graphics_desc graphics_xelpp = {
 	.ver = 12,
 	.rel = 10,
 
+	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
+
 	.dma_mask_size = 39,
 	.vm_max_level = 3,
 };
@@ -99,6 +100,11 @@ static const struct xe_graphics_desc graphics_xehpg = {
 	.ver = 12,
 	.rel = 55,
 
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) |
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
+
 	XE_HP_FEATURES,
 	.vram_flags = XE_VRAM_FLAGS_NEED64K,
 };
@@ -107,6 +113,15 @@ static const struct xe_graphics_desc graphics_xehpc = {
 	.ver = 12,
 	.rel = 60,
 
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) |
+		BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) |
+		BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) |
+		BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) |
+		BIT(XE_HW_ENGINE_BCS8) |
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) |
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
+
 	XE_HP_FEATURES,
 	.dma_mask_size = 52,
 	.max_tiles = 2,
@@ -123,6 +138,10 @@ static const struct xe_graphics_desc graphics_xelpg = {
 	.ver = 12,
 	.rel = 70,
 
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_CCS0),
+
 	XE_HP_FEATURES,
 	.max_tiles = 2,
 
@@ -132,16 +151,28 @@ static const struct xe_graphics_desc graphics_xelpg = {
 static const struct xe_media_desc media_xem = {
 	.ver = 12,
 	.rel = 0,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0),
 };
 
 static const struct xe_media_desc media_xehpm = {
 	.ver = 12,
 	.rel = 55,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1),
 };
 
 static const struct xe_media_desc media_xelpmp = {
 	.ver = 13,
 	.rel = 0,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0),	/* TODO: add GSC0 */
 };
 
 static const struct xe_device_desc tgl_desc = {
@@ -149,10 +180,6 @@ static const struct xe_device_desc tgl_desc = {
 	.media = &media_xem,
 	PLATFORM(XE_TIGERLAKE),
 	.require_force_probe = true,
-	.platform_engine_mask =
-		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
-		BIT(XE_HW_ENGINE_VCS2),
 };
 
 static const struct xe_device_desc adl_s_desc = {
@@ -160,10 +187,6 @@ static const struct xe_device_desc adl_s_desc = {
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_S),
 	.require_force_probe = true,
-	.platform_engine_mask =
-		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
-		BIT(XE_HW_ENGINE_VCS2),
 };
 
 static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
@@ -173,10 +196,6 @@ static const struct xe_device_desc adl_p_desc = {
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_P),
 	.require_force_probe = true,
-	.platform_engine_mask =
-		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
-		BIT(XE_HW_ENGINE_VCS2),
 	.subplatforms = (const struct xe_subplatform_desc[]) {
 		{ XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids },
 		{},
@@ -192,10 +211,6 @@ static const struct xe_device_desc dg1_desc = {
 	DGFX_FEATURES,
 	PLATFORM(XE_DG1),
 	.require_force_probe = true,
-	.platform_engine_mask =
-		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
-		BIT(XE_HW_ENGINE_VCS2),
 };
 
 static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
@@ -211,12 +226,6 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 		{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
 		{ } \
 	}, \
-	.platform_engine_mask = \
-		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \
-		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
-		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
-		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \
 	.has_4tile = 1
 
 static const struct xe_device_desc ats_m_desc = {
@@ -235,22 +244,10 @@ static const struct xe_device_desc dg2_desc = {
 	DG2_FEATURES,
 };
 
-#define PVC_ENGINES \
-	BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \
-	BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \
-	BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \
-	BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \
-	BIT(XE_HW_ENGINE_BCS8) | \
-	BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \
-	BIT(XE_HW_ENGINE_VCS2) | \
-	BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
-	BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3)
-
 static const struct xe_gt_desc pvc_gts[] = {
 	{
 		.type = XE_GT_TYPE_REMOTE,
 		.vram_id = 1,
-		.engine_mask = PVC_ENGINES,
 		.mmio_adj_limit = 0,
 		.mmio_adj_offset = 0,
 	},
@@ -262,27 +259,17 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	PLATFORM(XE_PVC),
 	.require_force_probe = true,
 	.extra_gts = pvc_gts,
-	.platform_engine_mask = PVC_ENGINES,
 };
 
-#define MTL_MEDIA_ENGINES \
-	BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
-	BIT(XE_HW_ENGINE_VECS0)	/* TODO: GSC0 */
-
 static const struct xe_gt_desc xelpmp_gts[] = {
 	{
 		.type = XE_GT_TYPE_MEDIA,
 		.vram_id = 0,
-		.engine_mask = MTL_MEDIA_ENGINES,
 		.mmio_adj_limit = 0x40000,
 		.mmio_adj_offset = 0x380000,
 	},
 };
 
-#define MTL_MAIN_ENGINES \
-	BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
-	BIT(XE_HW_ENGINE_CCS0)
-
 static const struct xe_device_desc mtl_desc = {
 	/*
 	 * FIXME:  Real graphics/media IP will be mapped from hardware
@@ -293,7 +280,6 @@ static const struct xe_device_desc mtl_desc = {
 	.require_force_probe = true,
 	PLATFORM(XE_METEORLAKE),
 	.extra_gts = xelpmp_gts,
-	.platform_engine_mask = MTL_MAIN_ENGINES,
 };
 
 #undef PLATFORM
@@ -423,14 +409,19 @@ static void xe_info_init(struct xe_device *xe,
 		if (id == 0) {
 			gt->info.type = XE_GT_TYPE_MAIN;
 			gt->info.vram_id = id;
-			gt->info.__engine_mask = desc->platform_engine_mask;
+
+			gt->info.__engine_mask = desc->graphics->hw_engine_mask;
+			if (MEDIA_VER(xe) < 13 && desc->media)
+				gt->info.__engine_mask |= desc->media->hw_engine_mask;
+
 			gt->mmio.adj_limit = 0;
 			gt->mmio.adj_offset = 0;
 		} else {
 			gt->info.type = desc->extra_gts[id - 1].type;
 			gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
-			gt->info.__engine_mask =
-				desc->extra_gts[id - 1].engine_mask;
+			gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ?
+				desc->media->hw_engine_mask :
+				desc->graphics->hw_engine_mask;
 			gt->mmio.adj_limit =
 				desc->extra_gts[id - 1].mmio_adj_limit;
 			gt->mmio.adj_offset =
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index e1749ceee9e0d..1b3dff8886ad4 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -17,6 +17,8 @@ struct xe_graphics_desc {
 	u8 vm_max_level;
 	u8 vram_flags;
 
+	u64 hw_engine_mask;	/* hardware engines provided by graphics IP */
+
 	u8 has_asid:1;
 	u8 has_flat_ccs:1;
 	u8 has_link_copy_engine:1;
@@ -27,6 +29,8 @@ struct xe_graphics_desc {
 struct xe_media_desc {
 	u8 ver;
 	u8 rel;
+
+	u64 hw_engine_mask;	/* hardware engines provided by media IP */
 };
 
 #endif
-- 
GitLab


From bd75664b9c3ff1829bc5acfd6789c0094e7bd617 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:17 -0700
Subject: [PATCH 1473/2340] drm/xe: Clarify GT counting logic

The total number of GTs supported by a platform should be one primary
GT, one media GT (if media version >= 13), and a number of remote tile
GTs dependent on the graphics IP present.  Express this more clearly in
the device setup.

Note that xe->info.tile_count is inaccurately named; the rest of the
driver treats this as the GT count, not just the tile count.  This
will need to be cleaned up at some point down the road.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-6-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c       | 18 ++++++++++++++----
 drivers/gpu/drm/xe/xe_pci_types.h |  3 ++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 198cae9c5116a..0697496c26d0e 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -124,7 +124,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
 
 	XE_HP_FEATURES,
 	.dma_mask_size = 52,
-	.max_tiles = 2,
+	.max_remote_tiles = 1,
 	.vm_max_level = 4,
 	.vram_flags = XE_VRAM_FLAGS_NEED64K,
 
@@ -143,8 +143,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
 		BIT(XE_HW_ENGINE_CCS0),
 
 	XE_HP_FEATURES,
-	.max_tiles = 2,
-
 	.has_flat_ccs = 0,
 };
 
@@ -388,7 +386,6 @@ static void xe_info_init(struct xe_device *xe,
 	xe->info.platform = desc->platform;
 	xe->info.dma_mask_size = desc->graphics->dma_mask_size;
 	xe->info.vram_flags = desc->graphics->vram_flags;
-	xe->info.tile_count = desc->graphics->max_tiles ?: 1;
 	xe->info.vm_max_level = desc->graphics->vm_max_level;
 	xe->info.supports_usm = desc->graphics->supports_usm;
 	xe->info.has_asid = desc->graphics->has_asid;
@@ -397,6 +394,19 @@ static void xe_info_init(struct xe_device *xe,
 	xe->info.has_range_tlb_invalidation = desc->graphics->has_range_tlb_invalidation;
 	xe->info.has_link_copy_engine = desc->graphics->has_link_copy_engine;
 
+	/*
+	 * All platforms have at least one primary GT.  Any platform with media
+	 * version 13 or higher has an additional dedicated media GT.  And
+	 * depending on the graphics IP there may be additional "remote tiles."
+	 * All of these together determine the overall GT count.
+	 *
+	 * FIXME: 'tile_count' here is misnamed since the rest of the driver
+	 * treats it as the number of GTs rather than just the number of tiles.
+	 */
+	xe->info.tile_count = 1 + desc->graphics->max_remote_tiles;
+	if (MEDIA_VER(xe) >= 13)
+		xe->info.tile_count++;
+
 	xe->info.subplatform = subplatform_desc ?
 		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
 	xe->info.step = xe_step_get(xe);
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 1b3dff8886ad4..f4bc5ef1bce36 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -13,12 +13,13 @@ struct xe_graphics_desc {
 	u8 rel;
 
 	u8 dma_mask_size;	/* available DMA address bits */
-	u8 max_tiles;		/* defaults to 1 if unset */
 	u8 vm_max_level;
 	u8 vram_flags;
 
 	u64 hw_engine_mask;	/* hardware engines provided by graphics IP */
 
+	u8 max_remote_tiles:2;
+
 	u8 has_asid:1;
 	u8 has_flat_ccs:1;
 	u8 has_link_copy_engine:1;
-- 
GitLab


From 9a08b2b935cedec1c563b03999cb37bfbeeb8b22 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:18 -0700
Subject: [PATCH 1474/2340] drm/xe: Add printable name to IP descriptors

Printing the name, along with the IP version number, will help reduce
confusion about which IP is present on a platform.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-7-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  4 ++++
 drivers/gpu/drm/xe/xe_pci.c          | 21 ++++++++++++++++++---
 drivers/gpu/drm/xe/xe_pci_types.h    |  2 ++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 74326091bf98f..87b92f5f078dc 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -51,6 +51,10 @@ struct xe_device {
 
 	/** @info: device info */
 	struct intel_device_info {
+		/** @graphics_name: graphics IP name */
+		const char *graphics_name;
+		/** @media_name: media IP name */
+		const char *media_name;
 		/** @graphics_verx100: graphics IP version */
 		u32 graphics_verx100;
 		/** @media_verx100: media IP version */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 0697496c26d0e..a9233d3a8ff16 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -71,6 +71,7 @@ struct xe_device_desc {
 #define NOP(x)	x
 
 static const struct xe_graphics_desc graphics_xelp = {
+	.name = "Xe_LP",
 	.ver = 12,
 	.rel = 0,
 
@@ -81,6 +82,7 @@ static const struct xe_graphics_desc graphics_xelp = {
 };
 
 static const struct xe_graphics_desc graphics_xelpp = {
+	.name = "Xe_LP+",
 	.ver = 12,
 	.rel = 10,
 
@@ -97,6 +99,7 @@ static const struct xe_graphics_desc graphics_xelpp = {
 	.vm_max_level = 3
 
 static const struct xe_graphics_desc graphics_xehpg = {
+	.name = "Xe_HPG",
 	.ver = 12,
 	.rel = 55,
 
@@ -110,6 +113,7 @@ static const struct xe_graphics_desc graphics_xehpg = {
 };
 
 static const struct xe_graphics_desc graphics_xehpc = {
+	.name = "Xe_HPC",
 	.ver = 12,
 	.rel = 60,
 
@@ -135,6 +139,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
 };
 
 static const struct xe_graphics_desc graphics_xelpg = {
+	.name = "Xe_LPG",
 	.ver = 12,
 	.rel = 70,
 
@@ -147,6 +152,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 };
 
 static const struct xe_media_desc media_xem = {
+	.name = "Xe_M",
 	.ver = 12,
 	.rel = 0,
 
@@ -156,6 +162,7 @@ static const struct xe_media_desc media_xem = {
 };
 
 static const struct xe_media_desc media_xehpm = {
+	.name = "Xe_HPM",
 	.ver = 12,
 	.rel = 55,
 
@@ -165,6 +172,7 @@ static const struct xe_media_desc media_xehpm = {
 };
 
 static const struct xe_media_desc media_xelpmp = {
+	.name = "Xe_LPM+",
 	.ver = 13,
 	.rel = 0,
 
@@ -384,6 +392,8 @@ static void xe_info_init(struct xe_device *xe,
 					 desc->media->rel;
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.platform = desc->platform;
+	xe->info.graphics_name = desc->graphics->name;
+	xe->info.media_name = desc->media ? desc->media->name : "none";
 	xe->info.dma_mask_size = desc->graphics->dma_mask_size;
 	xe->info.vram_flags = desc->graphics->vram_flags;
 	xe->info.vm_max_level = desc->graphics->vm_max_level;
@@ -485,12 +495,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	subplatform_desc = find_subplatform(xe, desc);
 
 	xe_info_init(xe, desc, subplatform_desc);
-	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d",
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d",
 		desc->platform_name,
 		subplatform_desc ? subplatform_desc->name : "",
 		xe->info.devid, xe->info.revid,
-		xe->info.is_dgfx, xe->info.graphics_verx100,
-		xe->info.media_verx100,
+		xe->info.is_dgfx,
+		xe->info.graphics_name,
+		xe->info.graphics_verx100 / 100,
+		xe->info.graphics_verx100 % 100,
+		xe->info.media_name,
+		xe->info.media_verx100 / 100,
+		xe->info.media_verx100 % 100,
 		xe->info.dma_mask_size, xe->info.tile_count);
 
 	drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index f4bc5ef1bce36..e479c1c4ed307 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 struct xe_graphics_desc {
+	const char *name;
 	u8 ver;
 	u8 rel;
 
@@ -28,6 +29,7 @@ struct xe_graphics_desc {
 };
 
 struct xe_media_desc {
+	const char *name;
 	u8 ver;
 	u8 rel;
 
-- 
GitLab


From 5822bba943ad2ecb386e8a27614e753ad7e285fa Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:19 -0700
Subject: [PATCH 1475/2340] drm/xe: Select graphics/media descriptors from
 GMD_ID

If graphics_desc and media_desc are not specified in a platform's
xe_device_desc, treat this as an indication that the IP version should
be determined from the hardware's GMD_ID register.

Note that leaving media_desc unset for a platform that simply doesn't
have the IP (e.g., PVC) is also okay --- a read of the GMD_ID register
offset will be attempted, but since there's no register at that location
a value of '0' will be returned, effectively disabling media support.

Mapping of version -> IP description is done via a table lookup; this
table will be re-used in future patches for some KUnit testing.

v2:
 - Drop dummy structures.  NULL can be safely used for both the GMD_ID
   cases and the "media not present case."
 - Use a table-based lookup of GMD_ID versions rather than a simple
   switch statement; the table will allow us to easily perform kunit
   testing of all the IP descriptors.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-8-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |   6 +
 drivers/gpu/drm/xe/xe_pci.c          | 184 +++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_pci_types.h    |   5 +
 3 files changed, 159 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 23d3b8f7e349d..8fc7677e2d13e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -22,6 +22,12 @@
 #define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n)	_MMIO(0xd50 + (n) * 4)
 #define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n)	_MMIO(0xd70 + (n) * 4)
 #define FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0xd84)
+
+#define GMD_ID					_MMIO(0xd8c)
+#define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
+#define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
+#define   GMD_ID_STEP				REG_GENMASK(5, 0)
+
 #define FORCEWAKE_ACK_GT_MTL			_MMIO(0xdfc)
 
 #define GEN9_LNCFCMOCS(i)			_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index a9233d3a8ff16..7dab489cb5e8d 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -15,6 +15,7 @@
 #include <drm/xe_pciids.h>
 
 #include "regs/xe_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_device.h"
 #include "xe_drv.h"
 #include "xe_macros.h"
@@ -140,9 +141,6 @@ static const struct xe_graphics_desc graphics_xehpc = {
 
 static const struct xe_graphics_desc graphics_xelpg = {
 	.name = "Xe_LPG",
-	.ver = 12,
-	.rel = 70,
-
 	.hw_engine_mask =
 		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
 		BIT(XE_HW_ENGINE_CCS0),
@@ -173,9 +171,6 @@ static const struct xe_media_desc media_xehpm = {
 
 static const struct xe_media_desc media_xelpmp = {
 	.name = "Xe_LPM+",
-	.ver = 13,
-	.rel = 0,
-
 	.hw_engine_mask =
 		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
 		BIT(XE_HW_ENGINE_VECS0),	/* TODO: add GSC0 */
@@ -277,12 +272,7 @@ static const struct xe_gt_desc xelpmp_gts[] = {
 };
 
 static const struct xe_device_desc mtl_desc = {
-	/*
-	 * FIXME:  Real graphics/media IP will be mapped from hardware
-	 * GMD_ID register.  Hardcoded assignments here will go away soon.
-	 */
-	.graphics = &graphics_xelpg,
-	.media = &media_xelpmp,
+	/* .graphics and .media determined via GMD_ID */
 	.require_force_probe = true,
 	PLATFORM(XE_METEORLAKE),
 	.extra_gts = xelpmp_gts,
@@ -290,6 +280,17 @@ static const struct xe_device_desc mtl_desc = {
 
 #undef PLATFORM
 
+/* Map of GMD_ID values to graphics IP */
+static struct gmdid_map graphics_ip_map[] = {
+	{ 1270, &graphics_xelpg },
+	{ 1271, &graphics_xelpg },
+};
+
+/* Map of GMD_ID values to media IP */
+static struct gmdid_map media_ip_map[] = {
+	{ 1300, &media_xelpmp },
+};
+
 #define INTEL_VGA_DEVICE(id, info) {			\
 	PCI_DEVICE(PCI_VENDOR_ID_INTEL, id),		\
 	PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16,	\
@@ -378,31 +379,135 @@ find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc)
 	return NULL;
 }
 
-static void xe_info_init(struct xe_device *xe,
+static u32 peek_gmdid(struct xe_device *xe, u32 gmdid_offset)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	void __iomem *map = pci_iomap_range(pdev, 0, gmdid_offset, sizeof(u32));
+	u32 ver;
+
+	if (!map) {
+		drm_err(&xe->drm, "Failed to read GMD_ID (%#x) from PCI BAR.\n",
+			gmdid_offset);
+		return 0;
+	}
+
+	ver = ioread32(map);
+	pci_iounmap(pdev, map);
+
+	return REG_FIELD_GET(GMD_ID_ARCH_MASK, ver) * 100 +
+		REG_FIELD_GET(GMD_ID_RELEASE_MASK, ver);
+}
+
+static void handle_gmdid(struct xe_device *xe,
 			 const struct xe_device_desc *desc,
-			 const struct xe_subplatform_desc *subplatform_desc)
+			 const struct xe_graphics_desc **graphics,
+			 const struct xe_media_desc **media)
+{
+	u32 ver;
+
+	if (desc->graphics) {
+		/*
+		 * Pre-GMD_ID platform; device descriptor already points to
+		 * the appropriate graphics descriptor.
+		 */
+		*graphics = desc->graphics;
+		xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel;
+	} else {
+		/*
+		 * GMD_ID platform; read IP version from hardware and select
+		 * graphics descriptor based on the result.
+		 */
+		ver = peek_gmdid(xe, GMD_ID.reg);
+		for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+			if (ver == graphics_ip_map[i].ver) {
+				xe->info.graphics_verx100 = ver;
+				*graphics = graphics_ip_map[i].ip;
+
+				break;
+			}
+		}
+
+		if (!xe->info.graphics_verx100) {
+			drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
+				ver / 100, ver % 100);
+		}
+	}
+
+	if (desc->media) {
+		/*
+		 * Pre-GMD_ID platform; device descriptor already points to
+		 * the appropriate media descriptor.
+		 */
+		*media = desc->media;
+		xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel;
+	} else {
+		/*
+		 * GMD_ID platform; read IP version from hardware and select
+		 * media descriptor based on the result.
+		 *
+		 * desc->media can also be NULL for a pre-GMD_ID platform that
+		 * simply doesn't have media (e.g., PVC); in that case the
+		 * attempt to read GMD_ID will return 0 (since there's no
+		 * register at that location).
+		 */
+		ver = peek_gmdid(xe, GMD_ID.reg + 0x380000);
+		if (ver == 0)
+			return;
+
+		for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+			if (ver == media_ip_map[i].ver) {
+				xe->info.media_verx100 = ver;
+				*media = media_ip_map[i].ip;
+
+				break;
+			}
+		}
+
+		if (!xe->info.media_verx100) {
+			drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
+				ver / 100, ver % 100);
+		}
+	}
+}
+
+
+static int xe_info_init(struct xe_device *xe,
+			const struct xe_device_desc *desc,
+			const struct xe_subplatform_desc *subplatform_desc)
 {
+	const struct xe_graphics_desc *graphics_desc = NULL;
+	const struct xe_media_desc *media_desc = NULL;
 	struct xe_gt *gt;
 	u8 id;
 
-	xe->info.graphics_verx100 = desc->graphics->ver * 100 +
-				    desc->graphics->rel;
-	if (desc->media)
-		xe->info.media_verx100 = desc->media->ver * 100 +
-					 desc->media->rel;
+	/*
+	 * If this platform supports GMD_ID, we'll detect the proper IP
+	 * descriptor to use from hardware registers.
+	 */
+	handle_gmdid(xe, desc, &graphics_desc, &media_desc);
+
+	/*
+	 * If we couldn't detect the graphics IP, that's considered a fatal
+	 * error and we should abort driver load.  Failing to detect media
+	 * IP is non-fatal; we'll just proceed without enabling media support.
+	 */
+	if (!graphics_desc)
+		return -ENODEV;
+
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.platform = desc->platform;
-	xe->info.graphics_name = desc->graphics->name;
-	xe->info.media_name = desc->media ? desc->media->name : "none";
-	xe->info.dma_mask_size = desc->graphics->dma_mask_size;
-	xe->info.vram_flags = desc->graphics->vram_flags;
-	xe->info.vm_max_level = desc->graphics->vm_max_level;
-	xe->info.supports_usm = desc->graphics->supports_usm;
-	xe->info.has_asid = desc->graphics->has_asid;
-	xe->info.has_flat_ccs = desc->graphics->has_flat_ccs;
+	xe->info.graphics_name = graphics_desc->name;
+	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_4tile = desc->has_4tile;
-	xe->info.has_range_tlb_invalidation = desc->graphics->has_range_tlb_invalidation;
-	xe->info.has_link_copy_engine = desc->graphics->has_link_copy_engine;
+
+	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
+	xe->info.vram_flags = graphics_desc->vram_flags;
+	xe->info.vm_max_level = graphics_desc->vm_max_level;
+	xe->info.supports_usm = graphics_desc->supports_usm;
+	xe->info.has_asid = graphics_desc->has_asid;
+	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
+	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
+	xe->info.has_link_copy_engine = graphics_desc->has_link_copy_engine;
 
 	/*
 	 * All platforms have at least one primary GT.  Any platform with media
@@ -413,7 +518,7 @@ static void xe_info_init(struct xe_device *xe,
 	 * FIXME: 'tile_count' here is misnamed since the rest of the driver
 	 * treats it as the number of GTs rather than just the number of tiles.
 	 */
-	xe->info.tile_count = 1 + desc->graphics->max_remote_tiles;
+	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
 	if (MEDIA_VER(xe) >= 13)
 		xe->info.tile_count++;
 
@@ -430,9 +535,9 @@ static void xe_info_init(struct xe_device *xe,
 			gt->info.type = XE_GT_TYPE_MAIN;
 			gt->info.vram_id = id;
 
-			gt->info.__engine_mask = desc->graphics->hw_engine_mask;
-			if (MEDIA_VER(xe) < 13 && desc->media)
-				gt->info.__engine_mask |= desc->media->hw_engine_mask;
+			gt->info.__engine_mask = graphics_desc->hw_engine_mask;
+			if (MEDIA_VER(xe) < 13 && media_desc)
+				gt->info.__engine_mask |= media_desc->hw_engine_mask;
 
 			gt->mmio.adj_limit = 0;
 			gt->mmio.adj_offset = 0;
@@ -440,14 +545,16 @@ static void xe_info_init(struct xe_device *xe,
 			gt->info.type = desc->extra_gts[id - 1].type;
 			gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
 			gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ?
-				desc->media->hw_engine_mask :
-				desc->graphics->hw_engine_mask;
+				media_desc->hw_engine_mask :
+				graphics_desc->hw_engine_mask;
 			gt->mmio.adj_limit =
 				desc->extra_gts[id - 1].mmio_adj_limit;
 			gt->mmio.adj_offset =
 				desc->extra_gts[id - 1].mmio_adj_offset;
 		}
 	}
+
+	return 0;
 }
 
 static void xe_pci_remove(struct pci_dev *pdev)
@@ -494,7 +601,12 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	subplatform_desc = find_subplatform(xe, desc);
 
-	xe_info_init(xe, desc, subplatform_desc);
+	err = xe_info_init(xe, desc, subplatform_desc);
+	if (err) {
+		drm_dev_put(&xe->drm);
+		return err;
+	}
+
 	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d",
 		desc->platform_name,
 		subplatform_desc ? subplatform_desc->name : "",
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index e479c1c4ed307..ba31b933eb8e0 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -36,4 +36,9 @@ struct xe_media_desc {
 	u64 hw_engine_mask;	/* hardware engines provided by media IP */
 };
 
+struct gmdid_map {
+	unsigned int ver;
+	const void *ip;
+};
+
 #endif
-- 
GitLab


From 3713ed52ef2bc9272afdd195fe24b011a4dcd44d Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:20 -0700
Subject: [PATCH 1476/2340] drm/xe: Add KUnit test for xe_pci.c IP engine lists

Add a simple KUnit test to ensure that the hardware engine lists for
GMD_ID IP definitions are sensible (i.e., no graphics engines defined
for the media IP and vice versa).

Only the IP descriptors for GMD_ID platforms are checked for now.
Presumably the engine lists on older pre-GMD_ID platforms shouldn't be
changing.  We can extend the KUnit testing in the future if we decide we
want to check those as well.

v2:
 - Add missing 'const' in xe_call_for_each_media_ip to avoid compiler
   warning.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-9-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/Makefile      |  1 +
 drivers/gpu/drm/xe/tests/xe_pci.c      | 44 +++++++++++++++
 drivers/gpu/drm/xe/tests/xe_pci_test.c | 74 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_pci_test.h |  6 +++
 4 files changed, 125 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_pci_test.c

diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 56919abb3f2a5..51f1a7f017d4d 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -4,5 +4,6 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
 	xe_bo_test.o \
 	xe_dma_buf_test.o \
 	xe_migrate_test.o \
+	xe_pci_test.o \
 	xe_rtp_test.o \
 	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index cc65ac5657b33..2178ad71c0da2 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -62,6 +62,50 @@ int xe_call_for_each_device(xe_device_fn xe_fn)
 	return ret;
 }
 
+/**
+ * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterates over the descriptors for all graphics IPs recognized
+ * by the driver and calls @xe_fn: for each one of them.
+ */
+void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn)
+{
+	const struct xe_graphics_desc *ip, *last = NULL;
+
+	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+		ip = graphics_ip_map[i].ip;
+		if (ip == last)
+			continue;
+
+		xe_fn(ip);
+		last = ip;
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip);
+
+/**
+ * xe_call_for_each_media_ip - Iterate over all recognized media IPs
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterates over the descriptors for all media IPs recognized
+ * by the driver and calls @xe_fn: for each one of them.
+ */
+void xe_call_for_each_media_ip(xe_media_fn xe_fn)
+{
+	const struct xe_media_desc *ip, *last = NULL;
+
+	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+		ip = media_ip_map[i].ip;
+		if (ip == last)
+			continue;
+
+		xe_fn(ip);
+		last = ip;
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip);
+
 int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 			    enum xe_subplatform subplatform)
 {
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
new file mode 100644
index 0000000000000..9c6f6c2c6c6eb
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "tests/xe_test.h"
+
+#include "xe_device.h"
+#include "xe_pci_test.h"
+#include "xe_pci_types.h"
+
+static void check_graphics_ip(const struct xe_graphics_desc *graphics)
+{
+	struct kunit *test = xe_cur_kunit();
+	u64 mask = graphics->hw_engine_mask;
+
+	/* RCS, CCS, and BCS engines are allowed on the graphics IP */
+	mask &= ~(XE_HW_ENGINE_RCS_MASK |
+		  XE_HW_ENGINE_CCS_MASK |
+		  XE_HW_ENGINE_BCS_MASK);
+
+	/* Any remaining engines are an error */
+	KUNIT_ASSERT_EQ(test, mask, 0);
+}
+
+static void check_media_ip(const struct xe_media_desc *media)
+{
+	struct kunit *test = xe_cur_kunit();
+	u64 mask = media->hw_engine_mask;
+
+	/*
+	 * VCS and VECS engines are allowed on the media IP
+	 *
+	 * TODO:  Add GSCCS once support is added to the driver.
+	 */
+	mask &= ~(XE_HW_ENGINE_VCS_MASK |
+		  XE_HW_ENGINE_VECS_MASK);
+
+	/* Any remaining engines are an error */
+	KUNIT_ASSERT_EQ(test, mask, 0);
+}
+
+static void xe_gmdid_graphics_ip(struct kunit *test)
+{
+	xe_call_for_each_graphics_ip(check_graphics_ip);
+}
+
+static void xe_gmdid_media_ip(struct kunit *test)
+{
+	xe_call_for_each_media_ip(check_media_ip);
+}
+
+static struct kunit_case xe_pci_tests[] = {
+	KUNIT_CASE(xe_gmdid_graphics_ip),
+	KUNIT_CASE(xe_gmdid_media_ip),
+	{}
+};
+
+static struct kunit_suite xe_pci_test_suite = {
+	.name = "xe_pci",
+	.test_cases = xe_pci_tests,
+};
+
+kunit_test_suite(xe_pci_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index 43294e8c62bb2..cc0f1d141a4d7 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -9,6 +9,8 @@
 #include "xe_platform_types.h"
 
 struct xe_device;
+struct xe_graphics_desc;
+struct xe_media_desc;
 
 /*
  * Some defines just for clarity: these mean the test doesn't care about what
@@ -18,8 +20,12 @@ struct xe_device;
 #define XE_TEST_SUBPLATFORM_ANY	XE_SUBPLATFORM_UNINITIALIZED
 
 typedef int (*xe_device_fn)(struct xe_device *);
+typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *);
+typedef void (*xe_media_fn)(const struct xe_media_desc *);
 
 int xe_call_for_each_device(xe_device_fn xe_fn);
+void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
+void xe_call_for_each_media_ip(xe_media_fn xe_fn);
 
 int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 			    enum xe_subplatform subplatform);
-- 
GitLab


From 21cc8aadddf9feca921389beafaad40224f8d219 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 6 Apr 2023 16:56:21 -0700
Subject: [PATCH 1477/2340] drm/xe: Clean up xe_device_desc

Now that most of the characteristics of a device are associated with the
graphics and media IPs, the remaining contents of xe_device_desc can be
cleaned up a bit:

 * 'gt' is unused; drop it
 * DEV_INFO_FOR_EACH_FLAG only covers two flags and is only used in this
   one file; drop the unnecessary macro complexity
 * Convert .has_4tile to a single bitfield bit so that it can be packed
   with the other feature flags
 * Move 'platform' lower in the structure for better packing

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230406235621.1914492-10-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 7dab489cb5e8d..2524ee1c73e32 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -24,11 +24,6 @@
 #include "xe_pm.h"
 #include "xe_step.h"
 
-#define DEV_INFO_FOR_EACH_FLAG(func) \
-	func(require_force_probe); \
-	func(is_dgfx); \
-	/* Keep has_* in alphabetical order */ \
-
 struct xe_subplatform_desc {
 	enum xe_subplatform subplatform;
 	const char *name;
@@ -46,23 +41,20 @@ struct xe_device_desc {
 	const struct xe_graphics_desc *graphics;
 	const struct xe_media_desc *media;
 
-	enum xe_platform platform;
 	const char *platform_name;
 	const struct xe_subplatform_desc *subplatforms;
 	const struct xe_gt_desc *extra_gts;
 
-	u8 gt; /* GT number, 0 if undefined */
-
-#define DEFINE_FLAG(name) u8 name:1
-	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
-#undef DEFINE_FLAG
+	enum xe_platform platform;
 
+	u8 require_force_probe:1;
+	u8 is_dgfx:1;
 	/*
 	 * FIXME: Xe doesn't care about presence/lack of 4tile since we can
 	 * already determine that from the graphics IP version.  This flag
 	 * should eventually move entirely into the display code's own logic.
 	 */
-	bool has_4tile;
+	u8 has_4tile:1;
 };
 
 #define PLATFORM(x)		\
-- 
GitLab


From 36919ebeaacab3409c8266248221f392ee7ea9d8 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 6 Apr 2023 16:18:45 +0100
Subject: [PATCH 1478/2340] drm/xe: fix suspend-resume for dgfx

This stopped working now that TTM treats moving a pinned object through
ttm_bo_validate() as an error, for the general case. Add some new
routines to handle the new special casing needed for suspend-resume.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/244
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Tested-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       | 129 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_bo.h       |   3 +
 drivers/gpu/drm/xe/xe_bo_evict.c |   8 +-
 3 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 1835f049c21ea..9a565203deaca 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -696,6 +696,135 @@ out:
 
 }
 
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to sytem memory.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+	struct ttm_place place = {
+		.mem_type = XE_PL_TT,
+	};
+	struct ttm_placement placement = {
+		.placement = &place,
+		.num_placement = 1,
+	};
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_vram(bo)))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	if (!bo->ttm.ttm) {
+		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
+		if (!bo->ttm.ttm) {
+			ret = -ENOMEM;
+			goto err_res_free;
+		}
+	}
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
+/**
+ * xe_bo_restore_pinned() - Restore a pinned VRAM object
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved back to VRAM.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_restore_pinned(struct xe_bo *bo)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 				       unsigned long page_offset)
 {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index dd58edcb93987..effa9d0cf0f69 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -231,6 +231,9 @@ bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
 int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
 
+int xe_bo_evict_pinned(struct xe_bo *bo);
+int xe_bo_restore_pinned(struct xe_bo *bo);
+
 extern struct ttm_device_funcs xe_ttm_funcs;
 
 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index bbf89a58cdf5b..6642c5f520096 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -63,7 +63,7 @@ int xe_bo_evict_all(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_evict(bo, true);
+		ret = xe_bo_evict_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		if (ret) {
@@ -97,7 +97,7 @@ int xe_bo_evict_all(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_evict(bo, true);
+		ret = xe_bo_evict_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		if (ret)
@@ -141,7 +141,7 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_validate(bo, NULL, false);
+		ret = xe_bo_restore_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		if (ret) {
 			xe_bo_put(bo);
@@ -205,7 +205,7 @@ int xe_bo_restore_user(struct xe_device *xe)
 		spin_unlock(&xe->pinned.lock);
 
 		xe_bo_lock(bo, &ww, 0, false);
-		ret = xe_bo_validate(bo, NULL, false);
+		ret = xe_bo_restore_pinned(bo);
 		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		if (ret) {
-- 
GitLab


From 2988cf02ee303a96052a6c486b9bbb6e4fd5c030 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Fri, 7 Apr 2023 13:55:22 -0700
Subject: [PATCH 1479/2340] drm/xe: Fix memory use after free

The wait_event_timeout() on g2h_fence.wq which is declared on
stack can return before the wake_up() gets called, resulting in a
stack out of bound access when wake_up() accesses the g2h_fene.wq.

Do not declare g2h_fence related wait_queue_head_t on stack.

Fixes the below KASAN BUG and associated kernel crashes.

BUG: KASAN: stack-out-of-bounds in do_raw_spin_lock+0x6f/0x1e0
Read of size 4 at addr ffff88826252f4ac by task kworker/u128:5/467

CPU: 25 PID: 467 Comm: kworker/u128:5 Tainted: G  U 6.3.0-rc4-xe #1
Workqueue: events_unbound g2h_worker_func [xe]
Call Trace:
 <TASK>
 dump_stack_lvl+0x64/0xb0
 print_report+0xc2/0x600
 kasan_report+0x96/0xc0
 do_raw_spin_lock+0x6f/0x1e0
 _raw_spin_lock_irqsave+0x47/0x60
 __wake_up_common_lock+0xc0/0x150
 dequeue_one_g2h+0x20f/0x6a0 [xe]
 g2h_worker_func+0xa9/0x180 [xe]
 process_one_work+0x527/0x990
 worker_thread+0x2d1/0x640
 kthread+0x174/0x1b0
 ret_from_fork+0x29/0x50
 </TASK>

Tested-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 7 +++----
 drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 ++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 5e00b75d3ca21..9055ff133a7c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -23,7 +23,6 @@
 
 /* Used when a CT send wants to block and / or receive data */
 struct g2h_fence {
-	wait_queue_head_t wq;
 	u32 *response_buffer;
 	u32 seqno;
 	u16 response_len;
@@ -142,6 +141,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 	ct->fence_context = dma_fence_context_alloc(1);
 	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
 	init_waitqueue_head(&ct->wq);
+	init_waitqueue_head(&ct->g2h_fence_wq);
 
 	primelockdep(ct);
 
@@ -484,7 +484,6 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 			void *ptr;
 
 			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
-			init_waitqueue_head(&g2h_fence->wq);
 			ptr = xa_store(&ct->fence_lookup,
 				       g2h_fence->seqno,
 				       g2h_fence, GFP_ATOMIC);
@@ -709,7 +708,7 @@ retry_same_fence:
 		return ret;
 	}
 
-	ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ);
+	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
 	if (!ret) {
 		drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
 			g2h_fence.seqno, action[0]);
@@ -801,7 +800,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 	g2h_fence->done = true;
 	smp_mb();
 
-	wake_up(&g2h_fence->wq);
+	wake_up_all(&ct->g2h_fence_wq);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index e0f9063e9b65f..fd27dacf00c59 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -74,6 +74,8 @@ struct xe_guc_ct {
 	struct xarray fence_lookup;
 	/** @wq: wait queue used for reliable CT sends and freeing G2H credits */
 	wait_queue_head_t wq;
+	/** @g2h_fence_wq: wait queue used for G2H fencing */
+	wait_queue_head_t g2h_fence_wq;
 #ifdef XE_GUC_CT_SELFTEST
 	/** @suppress_irq_handler: force flow control to sender */
 	bool suppress_irq_handler;
-- 
GitLab


From 0a12a612c870231172d30196e6245ea471fabaed Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 10 Apr 2023 13:02:29 -0700
Subject: [PATCH 1480/2340] drm/xe: Let primary and media GT share a
 kernel_bb_pool

The media GT requires a valid gt->kernel_bb_pool during driver probe to
allocate the WA and NOOP batchbuffers used to record default context
images.  Dynamically allocate the bb_pools so that the primary and media
GT can use the same pool during driver init.

The media GT still shouldn't be need the USM pool, so only hook up the
kernel_bb_pool for now.

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230410200229.2726648-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c         |  2 +-
 drivers/gpu/drm/xe/xe_gt.c         | 32 ++++++++++++++++++------------
 drivers/gpu/drm/xe/xe_gt_debugfs.c |  4 ++--
 drivers/gpu/drm/xe/xe_gt_types.h   | 10 +++++++---
 drivers/gpu/drm/xe/xe_migrate.c    |  4 ++--
 drivers/gpu/drm/xe/xe_sa.c         | 23 ++++++++++++++-------
 drivers/gpu/drm/xe/xe_sa.h         |  4 +---
 7 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 7172801ee5705..3deb2d55f4218 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -42,7 +42,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 	 * space to accomodate the platform-specific hardware prefetch
 	 * requirements.
 	 */
-	bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool : &gt->usm.bb_pool,
+	bb->bo = xe_sa_bo_new(!usm ? gt->kernel_bb_pool : gt->usm.bb_pool,
 			      4 * (dwords + 1) + bb_prefetch(gt));
 	if (IS_ERR(bb->bo)) {
 		err = PTR_ERR(bb->bo);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index daaf93e23bbfc..4186f7f0d42f5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -137,7 +137,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
-	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo);
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
@@ -186,7 +186,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 		}
 	}
 
-	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo);
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
@@ -439,26 +439,32 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
-	/*
-	 * FIXME: This should be ok as SA should only be used by gt->migrate and
-	 * vm->gt->migrate and both should be pointing to a non-media GT. But to
-	 * realy safe, convert gt->kernel_bb_pool to a pointer and point a media
-	 * GT to the kernel_bb_pool on a real tile.
-	 */
 	if (!xe_gt_is_media_type(gt)) {
-		err = xe_sa_bo_manager_init(gt, &gt->kernel_bb_pool, SZ_1M, 16);
-		if (err)
+		gt->kernel_bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16);
+		if (IS_ERR(gt->kernel_bb_pool)) {
+			err = PTR_ERR(gt->kernel_bb_pool);
 			goto err_force_wake;
+		}
 
 		/*
 		 * USM has its only SA pool to non-block behind user operations
 		 */
 		if (gt_to_xe(gt)->info.supports_usm) {
-			err = xe_sa_bo_manager_init(gt, &gt->usm.bb_pool,
-						    SZ_1M, 16);
-			if (err)
+			gt->usm.bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16);
+			if (IS_ERR(gt->usm.bb_pool)) {
+				err = PTR_ERR(gt->usm.bb_pool);
 				goto err_force_wake;
+			}
 		}
+	} else {
+		struct xe_gt *full_gt = xe_find_full_gt(gt);
+
+		/*
+		 * Media GT's kernel_bb_pool is only used while recording the
+		 * default context during GT init.  The USM pool should never
+		 * be needed on the media GT.
+		 */
+		gt->kernel_bb_pool = full_gt->kernel_bb_pool;
 	}
 
 	if (!xe_gt_is_media_type(gt)) {
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 9fab8017490f2..c45486c2015a9 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -66,8 +66,8 @@ static int sa_info(struct seq_file *m, void *data)
 	struct xe_gt *gt = node_to_gt(m->private);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	drm_suballoc_dump_debug_info(&gt->kernel_bb_pool.base, &p,
-				     gt->kernel_bb_pool.gpu_addr);
+	drm_suballoc_dump_debug_info(&gt->kernel_bb_pool->base, &p,
+				     gt->kernel_bb_pool->gpu_addr);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 9d3117fad2e4b..7c47d67aa8be8 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -214,7 +214,7 @@ struct xe_gt {
 		 * behind any user operations which may have resulted in a
 		 * fault.
 		 */
-		struct xe_sa_manager bb_pool;
+		struct xe_sa_manager *bb_pool;
 		/**
 		 * @reserved_bcs_instance: reserved BCS instance used for USM
 		 * operations (e.g. mmigrations, fixing page tables)
@@ -304,8 +304,12 @@ struct xe_gt {
 	/** @hw_engines: hardware engines on the GT */
 	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
 
-	/** @kernel_bb_pool: Pool from which batchbuffers are allocated */
-	struct xe_sa_manager kernel_bb_pool;
+	/**
+	 * @kernel_bb_pool: Pool from which batchbuffers are allocated.
+	 *
+	 * Media GT shares a pool with its primary GT.
+	 */
+	struct xe_sa_manager *kernel_bb_pool;
 
 	/** @migrate: Migration helper for vram blits and clearing */
 	struct xe_migrate *migrate;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 13cfb7ad28500..2169d687ba3fd 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -161,7 +161,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
 	u32 map_ofs, level, i;
 	struct xe_device *xe = gt_to_xe(m->gt);
-	struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo;
+	struct xe_bo *bo, *batch = gt->kernel_bb_pool->bo;
 	u64 entry;
 	int ret;
 
@@ -229,7 +229,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 
 		if (xe->info.supports_usm) {
-			batch = gt->usm.bb_pool.bo;
+			batch = gt->usm.bb_pool->bo;
 			batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE,
 						&is_vram);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 96c4b0ef24fe8..c16f7c14ff526 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -33,13 +33,18 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
 	sa_manager->bo = NULL;
 }
 
-int xe_sa_bo_manager_init(struct xe_gt *gt,
-			  struct xe_sa_manager *sa_manager,
-			  u32 size, u32 align)
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 managed_size = size - SZ_4K;
 	struct xe_bo *bo;
+	int ret;
+
+	struct xe_sa_manager *sa_manager = drmm_kzalloc(&gt_to_xe(gt)->drm,
+							sizeof(*sa_manager),
+							GFP_KERNEL);
+	if (!sa_manager)
+		return ERR_PTR(-ENOMEM);
 
 	sa_manager->bo = NULL;
 
@@ -49,7 +54,7 @@ int xe_sa_bo_manager_init(struct xe_gt *gt,
 	if (IS_ERR(bo)) {
 		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
 			PTR_ERR(bo));
-		return PTR_ERR(bo);
+		return (struct xe_sa_manager *)bo;
 	}
 	sa_manager->bo = bo;
 
@@ -61,15 +66,19 @@ int xe_sa_bo_manager_init(struct xe_gt *gt,
 		if (!sa_manager->cpu_ptr) {
 			xe_bo_unpin_map_no_vm(sa_manager->bo);
 			sa_manager->bo = NULL;
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 		}
 	} else {
 		sa_manager->cpu_ptr = bo->vmap.vaddr;
 		memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
-					sa_manager);
+	ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
+				       sa_manager);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return sa_manager;
 }
 
 struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
index 742282ef71792..3063fb34c720c 100644
--- a/drivers/gpu/drm/xe/xe_sa.h
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -11,9 +11,7 @@ struct dma_fence;
 struct xe_bo;
 struct xe_gt;
 
-int xe_sa_bo_manager_init(struct xe_gt *gt,
-			  struct xe_sa_manager *sa_manager,
-			  u32 size, u32 align);
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align);
 
 struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
 				  u32 size);
-- 
GitLab


From 67f2f0d7371709cb91d46d4c557aaa28b902674c Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 10 Apr 2023 14:26:58 -0700
Subject: [PATCH 1481/2340] drm/xe: Don't grab runtime PM ref in engine create
 IOCTL

A VM had a runtime PM ref, a engine can't be created without a VM, and
the engine holds a ref to the VM thus this is unnecessary. Beyond that
taking a ref in the engine create IOCTL and dropping it in the destroy
IOCTL is wrong as a user doesn't have to call the destroy IOCTL (e.g.
they can just kill the process or close the driver FD). If a user does
this PM refs are leaked.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 43 ++++++++++------------------------
 1 file changed, 13 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 141cb223ba022..5666c8e00c978 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -539,8 +539,6 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
 	       return -EINVAL;
 
-	xe_pm_runtime_get(xe);
-
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
 		for_each_gt(gt, xe, id) {
 			struct xe_engine *new;
@@ -552,16 +550,12 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			logical_mask = bind_engine_logical_mask(xe, gt, eci,
 								args->width,
 								args->num_placements);
-			if (XE_IOCTL_ERR(xe, !logical_mask)) {
-				err = -EINVAL;
-				goto put_rpm;
-			}
+			if (XE_IOCTL_ERR(xe, !logical_mask))
+				return -EINVAL;
 
 			hwe = find_hw_engine(xe, eci[0]);
-			if (XE_IOCTL_ERR(xe, !hwe)) {
-				err = -EINVAL;
-				goto put_rpm;
-			}
+			if (XE_IOCTL_ERR(xe, !hwe))
+				return -EINVAL;
 
 			migrate_vm = xe_migrate_get_vm(gt->migrate);
 			new = xe_engine_create(xe, migrate_vm, logical_mask,
@@ -576,7 +570,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 				err = PTR_ERR(new);
 				if (e)
 					goto put_engine;
-				goto put_rpm;
+				return err;
 			}
 			if (id == 0)
 				e = new;
@@ -589,30 +583,22 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 		logical_mask = calc_validate_logical_mask(xe, gt, eci,
 							  args->width,
 							  args->num_placements);
-		if (XE_IOCTL_ERR(xe, !logical_mask)) {
-			err = -EINVAL;
-			goto put_rpm;
-		}
+		if (XE_IOCTL_ERR(xe, !logical_mask))
+			return -EINVAL;
 
 		hwe = find_hw_engine(xe, eci[0]);
-		if (XE_IOCTL_ERR(xe, !hwe)) {
-			err = -EINVAL;
-			goto put_rpm;
-		}
+		if (XE_IOCTL_ERR(xe, !hwe))
+			return -EINVAL;
 
 		vm = xe_vm_lookup(xef, args->vm_id);
-		if (XE_IOCTL_ERR(xe, !vm)) {
-			err = -ENOENT;
-			goto put_rpm;
-		}
+		if (XE_IOCTL_ERR(xe, !vm))
+			return -ENOENT;
 
 		e = xe_engine_create(xe, vm, logical_mask,
 				     args->width, hwe, ENGINE_FLAG_PERSISTENT);
 		xe_vm_put(vm);
-		if (IS_ERR(e)) {
-			err = PTR_ERR(e);
-			goto put_rpm;
-		}
+		if (IS_ERR(e))
+			return PTR_ERR(e);
 	}
 
 	if (args->extensions) {
@@ -642,8 +628,6 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 put_engine:
 	xe_engine_kill(e);
 	xe_engine_put(e);
-put_rpm:
-	xe_pm_runtime_put(xe);
 	return err;
 }
 
@@ -750,7 +734,6 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
 
 	trace_xe_engine_close(e);
 	xe_engine_put(e);
-	xe_pm_runtime_put(xe);
 
 	return 0;
 }
-- 
GitLab


From 689f40f520b6434db29f7b3d7c64b3305b310992 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 10 Apr 2023 11:39:08 -0700
Subject: [PATCH 1482/2340] drm/xe: Use packed bitfields for xe->info feature
 flags

Replace 'bool' fields with single bits to allow the various device
feature flags to pack more tightly.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230410183910.2696628-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 87b92f5f078dc..f1011ddb58501 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -61,8 +61,6 @@ struct xe_device {
 		u32 media_verx100;
 		/** @mem_region_mask: mask of valid memory regions */
 		u32 mem_region_mask;
-		/** @is_dgfx: is discrete device */
-		bool is_dgfx;
 		/** @platform: XE platform enum */
 		enum xe_platform platform;
 		/** @subplatform: XE subplatform enum */
@@ -81,20 +79,23 @@ struct xe_device {
 		u8 tile_count;
 		/** @vm_max_level: Max VM level */
 		u8 vm_max_level;
+
+		/** @is_dgfx: is discrete device */
+		u8 is_dgfx:1;
 		/** @supports_usm: Supports unified shared memory */
-		bool supports_usm;
+		u8 supports_usm:1;
 		/** @has_asid: Has address space ID */
-		bool has_asid;
+		u8 has_asid:1;
 		/** @enable_guc: GuC submission enabled */
-		bool enable_guc;
+		u8 enable_guc:1;
 		/** @has_flat_ccs: Whether flat CCS metadata is used */
-		bool has_flat_ccs;
+		u8 has_flat_ccs:1;
 		/** @has_4tile: Whether tile-4 tiling is supported */
-		bool has_4tile;
+		u8 has_4tile:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
-		bool has_range_tlb_invalidation;
+		u8 has_range_tlb_invalidation:1;
 		/** @has_link_copy_engines: Whether the platform has link copy engines */
-		bool has_link_copy_engine;
+		u8 has_link_copy_engine:1;
 	} info;
 
 	/** @irq: device interrupt state */
-- 
GitLab


From bf08dd47d1567cb922d60a669e5a8a0c40253840 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 10 Apr 2023 11:39:09 -0700
Subject: [PATCH 1483/2340] drm/xe: Track whether platform has LLC

Some driver initialization is conditional on the presence of an LLC.
Add an extra feature flag to support this.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230410183910.2696628-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f1011ddb58501..f3cf5a4e5ab23 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -92,6 +92,8 @@ struct xe_device {
 		u8 has_flat_ccs:1;
 		/** @has_4tile: Whether tile-4 tiling is supported */
 		u8 has_4tile:1;
+		/** @has_llc: Device has a shared CPU+GPU last level cache */
+		u8 has_llc:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
 		/** @has_link_copy_engines: Whether the platform has link copy engines */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 2524ee1c73e32..1ea175d6a7d32 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -55,6 +55,7 @@ struct xe_device_desc {
 	 * should eventually move entirely into the display code's own logic.
 	 */
 	u8 has_4tile:1;
+	u8 has_llc:1;
 };
 
 #define PLATFORM(x)		\
@@ -172,6 +173,7 @@ static const struct xe_device_desc tgl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_TIGERLAKE),
+	.has_llc = 1,
 	.require_force_probe = true,
 };
 
@@ -179,6 +181,7 @@ static const struct xe_device_desc adl_s_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_S),
+	.has_llc = 1,
 	.require_force_probe = true,
 };
 
@@ -188,6 +191,7 @@ static const struct xe_device_desc adl_p_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_P),
+	.has_llc = 1,
 	.require_force_probe = true,
 	.subplatforms = (const struct xe_subplatform_desc[]) {
 		{ XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids },
@@ -491,6 +495,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_4tile = desc->has_4tile;
+	xe->info.has_llc = desc->has_llc;
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
 	xe->info.vram_flags = graphics_desc->vram_flags;
-- 
GitLab


From 3c6be2542e353268b27ca4d3cc433c9e6a49bd26 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 10 Apr 2023 11:39:10 -0700
Subject: [PATCH 1484/2340] drm/xe: Only request PCODE_WRITE_MIN_FREQ_TABLE on
 LLC platforms

PCODE_WRITE_MIN_FREQ_TABLE is only applicable to platforms with an LLC.
Change the discrete GPU check to an LLC check instead; this take care of
skipping not only the discrete platforms, but also integrated platforms
like MTL that do not have an LLC.

Fixes MTL dmesg error:

  xe 0000:00:02.0: [drm] *ERROR* PCODE Mailbox failed: 1 Illegal Command

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230410183910.2696628-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index fb1ce2d49bec5..99bb730684edc 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -210,7 +210,7 @@ int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
 	int ret;
 	u32 freq;
 
-	if (IS_DGFX(gt_to_xe(gt)))
+	if (!gt_to_xe(gt)->info.has_llc)
 		return 0;
 
 	if (max_gt_freq <= min_gt_freq)
-- 
GitLab


From 94324e6bed4b5d973c0df5d2d7d0f50503306a28 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 6 Apr 2023 10:58:11 -0700
Subject: [PATCH 1485/2340] drm/xe: GuC and HuC loading support for RKL

Rocketlake uses TGL GuC and HuC

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 7a410c106df4d..2c2080928a826 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -106,11 +106,13 @@ struct fw_blobs_by_type {
 	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
 	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 5))		\
 	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 5))		\
 	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)				\
 	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,	tgl))			\
 	fw_def(DG1,		no_ver(i915,	huc,	dg1))			\
+	fw_def(ROCKETLAKE,	no_ver(i915,	huc,	tgl))			\
 	fw_def(TIGERLAKE,	no_ver(i915,	huc,	tgl))
 
 #define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
-- 
GitLab


From 221896e54a30282e7dce2f7f228d4f49b2b970c2 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 11 Apr 2023 11:04:58 +0100
Subject: [PATCH 1486/2340] drm/xe/mmio: stop incorrectly triggering drm_warn

CI keeps triggering:

xe 0000:03:00.0: [drm] Restricting VRAM size to PCI resource size
(0x400000000->0x3fa000000)

Due to usable_size vs vram_size differences. However, we only want to
trigger the drm_warn() to let developers know that the system they are
using is going clamp the VRAM size to match the IO size, where they can
likely only use 256M of VRAM. Once we properly support small-bar we can
revisit this.

v2 (Lucas): Drop the TODO for now

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 5cacaa05759ad..98357c1f109f9 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -228,9 +228,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	else if (xe->mem.vram.io_size < usable_size && !xe_force_vram_bar_size)
 		drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
 			 (u64)xe->mem.vram.size >> 20);
-	if (xe->mem.vram.size < vram_size)
+	if (usable_size > xe->mem.vram.io_size)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
-			 vram_size, (u64)xe->mem.vram.size);
+			 usable_size, xe->mem.vram.io_size);
 
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
 	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
-- 
GitLab


From a8a39c15b011b8ed986f55c6e52e015b0d81da8a Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Tue, 11 Apr 2023 15:53:51 -0700
Subject: [PATCH 1487/2340] drm/xe: Add Rocketlake device info

Add missing device info for Rocketlake.
While at it, also set the value for IS_ROCKETLAKE
macro which is right now set to 0.

v2: Also add abox_mask to the device info(Lucas)
v3: rebase
v4: Set IS_ROCKETLAKE (Anusha)

Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Tested-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>(v2)
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1ea175d6a7d32..df7590b7dc2c4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -177,6 +177,13 @@ static const struct xe_device_desc tgl_desc = {
 	.require_force_probe = true,
 };
 
+static const struct xe_device_desc rkl_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ROCKETLAKE),
+	.require_force_probe = true,
+};
+
 static const struct xe_device_desc adl_s_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
@@ -300,6 +307,7 @@ static struct gmdid_map media_ip_map[] = {
  */
 static const struct pci_device_id pciidlist[] = {
 	XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
 	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
 	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
 	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
-- 
GitLab


From fa4fe0db0885b089200cc336207e40f6902ebbb2 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 18 Apr 2023 13:41:47 +0100
Subject: [PATCH 1488/2340] drm/xe/tlb: fix expected_seqno calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It looks like when tlb_invalidation.seqno overflows
TLB_INVALIDATION_SEQNO_MAX, we start counting again from one, as per
send_tlb_invalidation(). This is also inline with initial value we give
it in xe_gt_tlb_invalidation_init().  When calculating the
expected_seqno we should also take this into account.

While we are here also print out the values if we ever trigger the
warning.

v2 (José):
  - drm_WARN_ON() is preferred over plain WARN_ON(), since it gives
    information on the originating device.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/248
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f279e21300aa9..604f189dbd703 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -319,7 +319,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	/* Sanity check on seqno */
 	expected_seqno = (gt->tlb_invalidation.seqno_recv + 1) %
 		TLB_INVALIDATION_SEQNO_MAX;
-	XE_WARN_ON(expected_seqno != msg[0]);
+	if (!expected_seqno)
+		expected_seqno = 1;
+	if (drm_WARN_ON(&gt->xe->drm, expected_seqno != msg[0])) {
+		drm_err(&gt->xe->drm, "TLB expected_seqno(%d) != msg(%u)\n",
+			expected_seqno, msg[0]);
+	}
 
 	gt->tlb_invalidation.seqno_recv = msg[0];
 	smp_wmb();
-- 
GitLab


From 79f2432e3138a3240a99441fc077181e2e8c8fb9 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 19 Apr 2023 15:49:09 -0700
Subject: [PATCH 1489/2340] drm/xe/sr: Apply masked registers properly

The 'clear' field for register save/restore entries was being placed in
the value bits of the register rather than the mask bits; make sure it
gets shifted into the mask bits.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230419224909.4000920-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index ff83da4cf4a76..e38397fc771a1 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -148,7 +148,7 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg,
 	 * supposed to set all bits.
 	 */
 	if (entry->masked_reg)
-		val = (entry->clr_bits ?: entry->set_bits << 16);
+		val = (entry->clr_bits ?: entry->set_bits) << 16;
 	else if (entry->clr_bits + 1)
 		val = (entry->reg_type == XE_RTP_REG_MCR ?
 		       xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) :
-- 
GitLab


From 1a9d163c4243c679e7a8d4c4abd787e40249485f Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 6 Apr 2023 17:26:24 +0100
Subject: [PATCH 1490/2340] drm/xe/sched_job: prefer dma_fence_is_later
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Doesn't look like we are accounting for seqno wrap. Just use
__dma_fence_is_later() like we already do for xe_hw_fence_signaled().

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_sched_job.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index d9add0370a984..795146dfd663c 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -229,7 +229,9 @@ bool xe_sched_job_started(struct xe_sched_job *job)
 {
 	struct xe_lrc *lrc = job->engine->lrc;
 
-	return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job);
+	return !__dma_fence_is_later(xe_sched_job_seqno(job),
+				     xe_lrc_start_seqno(lrc),
+				     job->fence->ops);
 }
 
 bool xe_sched_job_completed(struct xe_sched_job *job)
@@ -241,7 +243,8 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
 	 * parallel handshake is done.
 	 */
 
-	return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job);
+	return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
+				     job->fence->ops);
 }
 
 void xe_sched_job_arm(struct xe_sched_job *job)
-- 
GitLab


From 7500477ded53343921b24e7ec5770197af710d94 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 6 Apr 2023 17:26:25 +0100
Subject: [PATCH 1491/2340] drm/xe/lrc: give start_seqno a better default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If looking at the initial engine dump we should expect this to match
XE_FENCE_INITIAL_SEQNO - 1.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_lrc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index fb8c6f7d6528e..ae605e7805de4 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -700,6 +700,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	map = __xe_lrc_seqno_map(lrc);
 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
 
+	map = __xe_lrc_start_seqno_map(lrc);
+	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+
 	return 0;
 
 err_lrc_finish:
-- 
GitLab


From d33dc1dc29cab7871f9b0adee7b94b4dc5de5cb1 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 21 Apr 2023 07:50:05 -0700
Subject: [PATCH 1492/2340] drm/xe: Fix xe_mmio_rmw32 operation

xe_mmio_rmw32 was failing to invert the passed in mask, resulting in a
register update that wasn't the expected RMW operation.  Fortunately the
impact of this mistake was limited, since this function isn't heavily
used in Xe right now; this will mostly fix some GuC PM interrupt
unmasking.

v2:
 - Rename parameters as 'clr' and 'set' to clarify semantics.  (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230421145006.10940-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 354be6fae0d47..be7ba2813d58d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -42,13 +42,13 @@ static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg)
 	return readl(gt->mmio.regs + reg);
 }
 
-static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask,
-				 u32 val)
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 clr,
+				 u32 set)
 {
 	u32 old, reg_val;
 
 	old = xe_mmio_read32(gt, reg);
-	reg_val = (old & mask) | val;
+	reg_val = (old & ~clr) | set;
 	xe_mmio_write32(gt, reg, reg_val);
 
 	return old;
-- 
GitLab


From e881b1292f1791826476f1a2eaf80cc85e2677c5 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 18 Apr 2023 16:02:47 -0700
Subject: [PATCH 1493/2340] drm/xe: Drop GFX_FLSH_CNTL_GEN6 write during GGTT
 invalidation

The write of GFX_FLSH_CNTL_GEN6 was inherited from the i915 codebase
where it was used to force a flush of the write-combine buffer in cases
where the GSM/GGTT were mapped as WC.  Since Xe never uses WC mappings
of the GGTT, this register write is unnecessary.  Furthermore, this
register was removed on Xe_HP-based platforms, so this write winds up
clobbering an unrelated register.

v2:
 - Also drop GFX_FLSH_CNTL_GEN6 from the register file now that it's no
   longer used.  (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230418230247.3802438-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 ---
 drivers/gpu/drm/xe/xe_ggtt.c         | 5 -----
 2 files changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 8fc7677e2d13e..c1d73f3e7bc35 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -351,9 +351,6 @@
 #define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
 #define GEN6_GT_GFX_RC6				_MMIO(0x138108)
 
-#define GFX_FLSH_CNTL_GEN6			_MMIO(0x101008)
-#define   GFX_FLSH_CNTL_EN			(1 << 0)
-
 #define GT_INTR_DW(x)				_MMIO(0x190018 + ((x) * 4))
 
 #define GUC_SG_INTR_ENABLE			_MMIO(0x190038)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 99bc9036c7a0d..0fda9a18049b0 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -196,11 +196,6 @@ void xe_ggtt_invalidate(struct xe_gt *gt)
 {
 	/* TODO: vfunc for GuC vs. non-GuC */
 
-	/* TODO: i915 makes comments about this being uncached and
-	 * therefore flushing WC buffers.  Is that really true here?
-	 */
-	xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN);
-
 	if (gt->uc.guc.submission_state.enabled) {
 		int seqno;
 
-- 
GitLab


From 96cb46df567e04bcc569ffde9c426b078c5601b1 Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Tue, 25 Apr 2023 16:31:07 +0530
Subject: [PATCH 1494/2340] drm/xe: Keep all resize bar related prints inside
 xe_resize_vram_bar

xe_resize_vram_bar() function is already printing the status of bar
resizing. It has prints covering both success and failure.
There is no need of additional prints in the caller which were not so
easily to follow.

Modified all BAR size prints to consistently print the size in MiB.

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 98357c1f109f9..5536f84682c03 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -59,7 +59,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 
 	ret = pci_resize_resource(pdev, resno, bar_size);
 	if (ret) {
-		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n",
+		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
 			 resno, 1 << bar_size, ERR_PTR(ret));
 		return -1;
 	}
@@ -95,7 +95,7 @@ static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size)
 		    rebar_size >= roundup_pow_of_two(vram_size)) {
 			rebar_size = vram_size;
 			drm_info(&xe->drm,
-				 "Given bar size is not within supported size, setting it to default: %llu\n",
+				 "Given bar size is not within supported size, setting it to default: %lluMiB\n",
 				 (u64)vram_size >> 20);
 		}
 	} else {
@@ -107,6 +107,9 @@ static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size)
 			return 0;
 	}
 
+	drm_info(&xe->drm, "Resizing bar from %lluMiB -> %lluMiB\n",
+		 (u64)current_size >> 20, (u64)rebar_size >> 20);
+
 	while (root->parent)
 		root = root->parent;
 
@@ -117,7 +120,7 @@ static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size)
 	}
 
 	if (!root_res) {
-		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing\n");
+		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
 		return -1;
 	}
 
@@ -183,7 +186,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	u64 vram_size;
 	u64 original_size;
 	u64 usable_size;
-	int resize_result, err;
+	int err;
 
 	if (!IS_DGFX(xe)) {
 		xe->mem.vram.mapping = 0;
@@ -212,7 +215,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	if (err)
 		return err;
 
-	resize_result = xe_resize_vram_bar(xe, vram_size);
+	xe_resize_vram_bar(xe, vram_size);
 	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
 	xe->mem.vram.io_size = min(usable_size,
 				   pci_resource_len(pdev, GEN12_LMEM_BAR));
@@ -221,16 +224,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	if (!xe->mem.vram.size)
 		return -EIO;
 
-	if (resize_result > 0)
-		drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to %lluMiB\n",
-			 (u64)original_size >> 20,
-			 (u64)xe->mem.vram.io_size >> 20);
-	else if (xe->mem.vram.io_size < usable_size && !xe_force_vram_bar_size)
-		drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
-			 (u64)xe->mem.vram.size >> 20);
 	if (usable_size > xe->mem.vram.io_size)
-		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
-			 usable_size, xe->mem.vram.io_size);
+		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (%lluMiB->%lluMiB)\n",
+			 (u64)usable_size >> 20, (u64)xe->mem.vram.io_size >> 20);
 
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
 	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
-- 
GitLab


From a180f4e13c4473f4e66e5666dbb6157d56d83dcf Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Thu, 20 Apr 2023 11:26:48 +0530
Subject: [PATCH 1495/2340] drm/xe/guc_pc: Reorder forcewake and xe_pm_runtime
 calls

When the device is runtime suspended, reading some of the sysfs
entries under device/gt#/ causes a resume error
This is due to the ordering of pm_runtime and forcewake calls.
Reorder to wake up using xe_pm_runtime_get and then forcewake

v2: add goto statements (Rodrigo)

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 5a8d827ba7704..b853831b342b3 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -370,15 +370,14 @@ static ssize_t freq_act_show(struct device *dev,
 	u32 freq;
 	ssize_t ret;
 
+	xe_device_mem_access_get(gt_to_xe(gt));
 	/*
 	 * When in RC6, actual frequency is 0. Let's block RC6 so we are able
 	 * to verify that our freq requests are really happening.
 	 */
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		return ret;
-
-	xe_device_mem_access_get(gt_to_xe(gt));
+		goto out;
 
 	if (xe->info.platform == XE_METEORLAKE) {
 		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1.reg);
@@ -388,11 +387,11 @@ static ssize_t freq_act_show(struct device *dev,
 		freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
 	}
 
-	xe_device_mem_access_put(gt_to_xe(gt));
-
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	xe_device_mem_access_put(gt_to_xe(gt));
 	return ret;
 }
 static DEVICE_ATTR_RO(freq_act);
@@ -405,22 +404,23 @@ static ssize_t freq_cur_show(struct device *dev,
 	u32 freq;
 	ssize_t ret;
 
+	xe_device_mem_access_get(gt_to_xe(gt));
 	/*
 	 * GuC SLPC plays with cur freq request when GuCRC is enabled
 	 * Block RC6 for a more reliable read.
 	 */
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		return ret;
+		goto out;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	xe_device_mem_access_put(gt_to_xe(gt));
 	return ret;
 }
 static DEVICE_ATTR_RO(freq_cur);
@@ -610,17 +610,17 @@ static ssize_t rc6_residency_show(struct device *dev,
 	u32 reg;
 	ssize_t ret;
 
+	xe_device_mem_access_get(pc_to_xe(pc));
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		return ret;
+		goto out;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
 	reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg);
-	xe_device_mem_access_put(pc_to_xe(pc));
-
 	ret = sysfs_emit(buff, "%u\n", reg);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
 static DEVICE_ATTR_RO(rc6_residency);
-- 
GitLab


From fdb3abcebba5d4a647739bb79a3818bd81956f64 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 25 Apr 2023 10:51:16 +0200
Subject: [PATCH 1496/2340] drm/xe: Fix build without CONFIG_PM_SLEEP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Build without CONFIG_PM_SLEEP (such as for riscv) was failing due
to unused xe_pci_runtime_* functions.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index df7590b7dc2c4..5f750edce5427 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -702,7 +702,6 @@ static int xe_pci_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static int xe_pci_runtime_suspend(struct device *dev)
 {
@@ -765,6 +764,7 @@ static int xe_pci_runtime_idle(struct device *dev)
 
 	return 0;
 }
+#endif
 
 static const struct dev_pm_ops xe_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume)
-- 
GitLab


From a121594006813eff7864a63e14573f3f5523e29c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Wed, 26 Apr 2023 09:20:05 -0700
Subject: [PATCH 1497/2340] drm/xe: Limit the system memory size to half of the
 system memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ttm_global_init() imposes this limitation.

Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 5b0674bbb8ed0..3e1fa0c832cab 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -105,7 +105,10 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe)
 	u64 gtt_size;
 
 	si_meminfo(&si);
-	gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
+	gtt_size = (u64)si.totalram * si.mem_unit;
+	/* TTM limits allocation of all TTM devices by 50% of system memory */
+	gtt_size /= 2;
+
 	man->use_tt = true;
 	man->func = &xe_ttm_sys_mgr_func;
 	ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
-- 
GitLab


From 052df73b9e90305487ad9349d0fc8b59ddb6007b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 26 Apr 2023 09:09:40 -0400
Subject: [PATCH 1498/2340] drm/xe: Update comment on why d3cold is still
 blocked.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The main issue with buddy allocator was fixed, but then
we ended up on other issues, so we need to step back and rethink
our strategy with D3cold. So, let's update the comment with a
todo list so we don't get tempted in removing it before we are
really ready.

Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 5f750edce5427..c1f2f63548d3d 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -755,10 +755,15 @@ static int xe_pci_runtime_idle(struct device *dev)
 	struct xe_device *xe = pdev_to_xe_device(pdev);
 
 	/*
-	 * FIXME: d3cold should be allowed (true) if
+	 * TODO: d3cold should be allowed (true) if
 	 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
-	 * however the change to the buddy allocator broke the
-	 * xe_bo_restore_kernel when the pci device is disabled
+	 * but maybe include some other conditions. So, before
+	 * we can re-enable the D3cold, we need to:
+	 * 1. rewrite the VRAM save / restore to avoid buffer object locks
+	 * 2. block D3cold if we have a big amount of device memory in use
+	 *    in order to reduce the latency.
+	 * 3. at resume, detect if we really lost power and avoid memory
+	 *    restoration if we were only up to d3cold
 	 */
 	 xe->d3cold_allowed = false;
 
-- 
GitLab


From 9d3c8fb98ba31873c0ebbc42c5d8133fa59f7ac7 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 26 Apr 2023 12:07:20 -0400
Subject: [PATCH 1499/2340] drm/xe: Fix print of RING_EXECLIST_SQ_CONTENTS_HI

On xe_hw_engine_print_state we were printing:
value_of(0x510) + 4 instead of
value_of(0x514) as desired.

So, let's properly define a RING_EXECLIST_SQ_CONTENTS_HI
register to fix the issue and also to avoid other issues
like that.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 ++-
 drivers/gpu/drm/xe/xe_execlist.c         | 4 ++--
 drivers/gpu/drm/xe/xe_hw_engine.c        | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 2aa67d001c34b..a1e1d1c206fa5 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -84,7 +84,8 @@
 						 RING_FORCE_TO_NONPRIV_DENY)
 #define   RING_MAX_NONPRIV_SLOTS  12
 
-#define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
+#define RING_EXECLIST_SQ_CONTENTS_LO(base)	_MMIO((base) + 0x510)
+#define RING_EXECLIST_SQ_CONTENTS_HI(base)	_MMIO((base) + 0x510 + 4)
 
 #define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
 #define	  EL_CTRL_LOAD				REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index e540e5d287a07..64b520ddca9cb 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -84,9 +84,9 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg,
 			_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
 
-	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0,
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base).reg,
 			lower_32_bits(lrc_desc));
-	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4,
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base).reg,
 			upper_32_bits(lrc_desc));
 	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg,
 			EL_CTRL_LOAD);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 4b56c35b988d4..23b9f120c258b 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -528,10 +528,10 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg));
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
 		hw_engine_mmio_read32(hwe,
-					 RING_EXECLIST_SQ_CONTENTS(0).reg));
+					 RING_EXECLIST_SQ_CONTENTS_LO(0).reg));
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
 		hw_engine_mmio_read32(hwe,
-					 RING_EXECLIST_SQ_CONTENTS(0).reg) + 4);
+					 RING_EXECLIST_SQ_CONTENTS_HI(0).reg));
 	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
 		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg));
 
-- 
GitLab


From 58e19acf0cdf3f18c1c868165f45d3ea626b9c3f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 12 Apr 2023 16:28:41 -0700
Subject: [PATCH 1500/2340] drm/xe: Cleanup page-related defines

Rename the following defines to lose the GEN* prefixes since they don't
make sense for xe:

GEN8_PTE_SHIFT		-> XE_PTE_SHIFT
GEN8_PAGE_SIZE		-> XE_PAGE_SIZE
GEN8_PTE_MASK		-> XE_PTE_MASK
GEN8_PDE_SHIFT		-> XE_PDE_SHIFT
GEN8_PDES		-> XE_PDES
GEN8_PDE_MASK		-> XE_PDE_MASK
GEN8_64K_PTE_SHIFT	-> XE_64K_PTE_SHIFT
GEN8_64K_PAGE_SIZE	-> XE_64K_PAGE_SIZE
GEN8_64K_PTE_MASK	-> XE_64K_PTE_MASK
GEN8_64K_PDE_MASK	-> XE_64K_PDE_MASK
GEN8_PDE_PS_2M		-> XE_PDE_PS_2M
GEN8_PDPE_PS_1G		-> XE_PDPE_PS_1G
GEN8_PDE_IPS_64K	-> XE_PDE_IPS_64K
GEN12_GGTT_PTE_LM	-> XE_GGTT_PTE_LM
GEN12_USM_PPGTT_PTE_AE	-> XE_USM_PPGTT_PTE_AE
GEN12_PPGTT_PTE_LM	-> XE_PPGTT_PTE_LM
GEN12_PDE_64K		-> XE_PDE_64K
GEN12_PTE_PS64		-> XE_PTE_PS64
GEN8_PAGE_PRESENT	-> XE_PAGE_PRESENT
GEN8_PAGE_RW		-> XE_PAGE_RW
PTE_READ_ONLY		-> XE_PTE_READ_ONLY

Keep an XE_ prefix to make sure we don't mix the defines for the CPU
(e.g. PAGE_SIZE) with the ones fro the GPU).

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 15 +++---
 drivers/gpu/drm/xe/xe_bo.h            | 52 ++++++++++----------
 drivers/gpu/drm/xe/xe_ggtt.c          | 24 +++++-----
 drivers/gpu/drm/xe/xe_migrate.c       | 69 ++++++++++++++-------------
 drivers/gpu/drm/xe/xe_pt.c            | 40 ++++++++--------
 drivers/gpu/drm/xe/xe_vm.c            | 20 ++++----
 6 files changed, 112 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index cdcecf8d5eef8..0f4371ad1fd95 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -265,7 +265,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto vunmap;
 	}
 
-	pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE,
+	pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
 				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
 				  XE_BO_CREATE_PINNED_BIT);
@@ -294,20 +294,21 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	}
 
 	kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n",
-		   (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE),
-		   (unsigned long)xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE));
+		   (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, XE_PAGE_SIZE),
+		   (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE));
 
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
-	xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef);
+	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
+		  0xdeaddeadbeefbeef);
 	expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
 	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
-		expected |= GEN12_PTE_PS64;
+		expected |= XE_PTE_PS64;
 	xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
 	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
-		 &src_it, GEN8_PAGE_SIZE, pt);
+		 &src_it, XE_PAGE_SIZE, pt);
 	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
 
-	retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
+	retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
 			   u64);
 	check(retval, expected, "PTE entry write", test);
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index effa9d0cf0f69..8354d05ccdf37 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -39,32 +39,32 @@
 #define PPAT_CACHED                     BIT_ULL(7)
 #define PPAT_DISPLAY_ELLC               BIT_ULL(4)
 
-#define GEN8_PTE_SHIFT			12
-#define GEN8_PAGE_SIZE			(1 << GEN8_PTE_SHIFT)
-#define GEN8_PTE_MASK			(GEN8_PAGE_SIZE - 1)
-#define GEN8_PDE_SHIFT			(GEN8_PTE_SHIFT - 3)
-#define GEN8_PDES			(1 << GEN8_PDE_SHIFT)
-#define GEN8_PDE_MASK			(GEN8_PDES - 1)
-
-#define GEN8_64K_PTE_SHIFT		16
-#define GEN8_64K_PAGE_SIZE		(1 << GEN8_64K_PTE_SHIFT)
-#define GEN8_64K_PTE_MASK		(GEN8_64K_PAGE_SIZE - 1)
-#define GEN8_64K_PDE_MASK		(GEN8_PDE_MASK >> 4)
-
-#define GEN8_PDE_PS_2M			BIT_ULL(7)
-#define GEN8_PDPE_PS_1G			BIT_ULL(7)
-#define GEN8_PDE_IPS_64K		BIT_ULL(11)
-
-#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
-#define GEN12_USM_PPGTT_PTE_AE		BIT_ULL(10)
-#define GEN12_PPGTT_PTE_LM		BIT_ULL(11)
-#define GEN12_PDE_64K			BIT_ULL(6)
-#define GEN12_PTE_PS64                  BIT_ULL(8)
-
-#define GEN8_PAGE_PRESENT		BIT_ULL(0)
-#define GEN8_PAGE_RW			BIT_ULL(1)
-
-#define PTE_READ_ONLY			BIT(0)
+#define XE_PTE_SHIFT			12
+#define XE_PAGE_SIZE			(1 << XE_PTE_SHIFT)
+#define XE_PTE_MASK			(XE_PAGE_SIZE - 1)
+#define XE_PDE_SHIFT			(XE_PTE_SHIFT - 3)
+#define XE_PDES				(1 << XE_PDE_SHIFT)
+#define XE_PDE_MASK			(XE_PDES - 1)
+
+#define XE_64K_PTE_SHIFT		16
+#define XE_64K_PAGE_SIZE		(1 << XE_64K_PTE_SHIFT)
+#define XE_64K_PTE_MASK			(XE_64K_PAGE_SIZE - 1)
+#define XE_64K_PDE_MASK			(XE_PDE_MASK >> 4)
+
+#define XE_PDE_PS_2M			BIT_ULL(7)
+#define XE_PDPE_PS_1G			BIT_ULL(7)
+#define XE_PDE_IPS_64K			BIT_ULL(11)
+
+#define XE_GGTT_PTE_LM			BIT_ULL(1)
+#define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
+#define XE_PPGTT_PTE_LM			BIT_ULL(11)
+#define XE_PDE_64K			BIT_ULL(6)
+#define XE_PTE_PS64			BIT_ULL(8)
+
+#define XE_PAGE_PRESENT			BIT_ULL(0)
+#define XE_PAGE_RW			BIT_ULL(1)
+
+#define XE_PTE_READ_ONLY		BIT(0)
 
 #define XE_PL_SYSTEM		TTM_PL_SYSTEM
 #define XE_PL_TT		TTM_PL_TT
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0fda9a18049b0..dbc45ef084b47 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -30,11 +30,11 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 	u64 pte;
 	bool is_vram;
 
-	pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram);
-	pte |= GEN8_PAGE_PRESENT;
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram);
+	pte |= XE_PAGE_PRESENT;
 
 	if (is_vram)
-		pte |= GEN12_GGTT_PTE_LM;
+		pte |= XE_GGTT_PTE_LM;
 
 	/* FIXME: vfunc + pass in caching rules */
 	if (xe->info.platform == XE_METEORLAKE) {
@@ -56,10 +56,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
 
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
 {
-	XE_BUG_ON(addr & GEN8_PTE_MASK);
+	XE_BUG_ON(addr & XE_PTE_MASK);
 	XE_BUG_ON(addr >= ggtt->size);
 
-	writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]);
+	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
 }
 
 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
@@ -76,7 +76,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 
 	while (start < end) {
 		xe_ggtt_set_pte(ggtt, start, scratch_pte);
-		start += GEN8_PAGE_SIZE;
+		start += XE_PAGE_SIZE;
 	}
 }
 
@@ -107,7 +107,7 @@ int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
 	}
 
 	ggtt->gsm = gt->mmio.regs + SZ_8M;
-	ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE;
+	ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE;
 
 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
 		ggtt->flags |= XE_GGTT_FLAGS_64K;
@@ -167,7 +167,7 @@ int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
 	else
 		flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt);
 
-	ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, GEN8_PAGE_SIZE,
+	ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, XE_PAGE_SIZE,
 					     ttm_bo_type_kernel,
 					     flags);
 
@@ -224,8 +224,8 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 	scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
 
 	printk("%sGlobal GTT:", prefix);
-	for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) {
-		unsigned int i = addr / GEN8_PAGE_SIZE;
+	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
+		unsigned int i = addr / XE_PAGE_SIZE;
 
 		XE_BUG_ON(addr > U32_MAX);
 		if (ggtt->gsm[i] == scratch_pte)
@@ -261,7 +261,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	u64 start = bo->ggtt_node.start;
 	u64 offset, pte;
 
-	for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) {
+	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
 		pte = xe_ggtt_pte_encode(bo, offset);
 		xe_ggtt_set_pte(ggtt, start + offset, pte);
 	}
@@ -309,7 +309,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
 	u64 alignment;
 
-	alignment = GEN8_PAGE_SIZE;
+	alignment = XE_PAGE_SIZE;
 	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
 		alignment = SZ_64K;
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 2169d687ba3fd..a8e66b84dc63b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -147,7 +147,7 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 		return PTR_ERR(m->cleared_bo);
 
 	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
-	vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram);
+	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE, &is_vram);
 	XE_BUG_ON(!is_vram);
 	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
 
@@ -166,9 +166,9 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	int ret;
 
 	/* Can't bump NUM_PT_SLOTS too high */
-	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE);
+	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
 	/* Must be a multiple of 64K to support all platforms */
-	BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K);
+	BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K);
 	/* And one slot reserved for the 4KiB page table updates */
 	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
 
@@ -176,7 +176,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M);
 
 	bo = xe_bo_create_pin_map(vm->xe, m->gt, vm,
-				  num_entries * GEN8_PAGE_SIZE,
+				  num_entries * XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
 				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
 				  XE_BO_CREATE_PINNED_BIT);
@@ -189,14 +189,14 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		return ret;
 	}
 
-	entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB);
+	entry = gen8_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
 	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
 
-	map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE;
+	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
 
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
-		entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE,
+		entry = gen8_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
 					XE_CACHE_WB, 0, 0);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
@@ -211,10 +211,10 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		XE_BUG_ON(xe->info.supports_usm);
 
 		/* Write out batch too */
-		m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE;
+		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
 		for (i = 0; i < batch->size;
-		     i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE :
-			     GEN8_PAGE_SIZE) {
+		     i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE :
+		     XE_PAGE_SIZE) {
 			entry = gen8_pte_encode(NULL, batch, i,
 						XE_CACHE_WB, 0, 0);
 
@@ -224,13 +224,13 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		}
 	} else {
 		bool is_vram;
-		u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_vram);
+		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram);
 
 		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 
 		if (xe->info.supports_usm) {
 			batch = gt->usm.bb_pool->bo;
-			batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE,
+			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE,
 						&is_vram);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 		}
@@ -240,20 +240,20 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		u32 flags = 0;
 
 		if (vm->flags & XE_VM_FLAGS_64K && level == 1)
-			flags = GEN12_PDE_64K;
+			flags = XE_PDE_64K;
 
 		entry = gen8_pde_encode(bo, map_ofs + (level - 1) *
-					GEN8_PAGE_SIZE, XE_CACHE_WB);
-		xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64,
+					XE_PAGE_SIZE, XE_CACHE_WB);
+		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
 			  entry | flags);
 	}
 
 	/* Write PDE's that point to our BO. */
 	for (i = 0; i < num_entries - num_level; i++) {
-		entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE,
+		entry = gen8_pde_encode(bo, i * XE_PAGE_SIZE,
 					XE_CACHE_WB);
 
-		xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE +
+		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
 			  (i + 1) * 8, u64, entry);
 	}
 
@@ -262,9 +262,9 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		u64 pos, ofs, flags;
 
 		level = 2;
-		ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8;
-		flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED |
-			GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G;
+		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
+		flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED |
+			XE_PPGTT_PTE_LM | XE_PDPE_PS_1G;
 
 		/*
 		 * Use 1GB pages, it shouldn't matter the physical amount of
@@ -294,10 +294,10 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	 * the different addresses in VM.
 	 */
 #define NUM_VMUSA_UNIT_PER_PAGE	32
-#define VM_SA_UPDATE_UNIT_SIZE	(GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
+#define VM_SA_UPDATE_UNIT_SIZE		(XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
 #define NUM_VMUSA_WRITES_PER_UNIT	(VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
 	drm_suballoc_manager_init(&m->vm_update_sa,
-				  (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) *
+				  (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
 				  NUM_VMUSA_UNIT_PER_PAGE, 0);
 
 	m->pt_bo = bo;
@@ -403,7 +403,7 @@ static u32 pte_update_size(struct xe_migrate *m,
 	if (!is_vram) {
 		/* Clip L0 to available size */
 		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
-		u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+		u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
 
 		*L0 = size;
 		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
@@ -433,7 +433,7 @@ static void emit_pte(struct xe_migrate *m,
 		     u32 size, struct xe_bo *bo)
 {
 	u32 ptes;
-	u64 ofs = at_pt * GEN8_PAGE_SIZE;
+	u64 ofs = at_pt * XE_PAGE_SIZE;
 	u64 cur_ofs;
 
 	/*
@@ -443,7 +443,7 @@ static void emit_pte(struct xe_migrate *m,
 	 * on running tests.
 	 */
 
-	ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
 
 	while (ptes) {
 		u32 chunk = min(0x1ffU, ptes);
@@ -466,13 +466,13 @@ static void emit_pte(struct xe_migrate *m,
 				if ((m->eng->vm->flags & XE_VM_FLAGS_64K) &&
 				    !(cur_ofs & (16 * 8 - 1))) {
 					XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
-					addr |= GEN12_PTE_PS64;
+					addr |= XE_PTE_PS64;
 				}
 
 				addr += vram_region_io_offset(bo->ttm.resource);
-				addr |= GEN12_PPGTT_PTE_LM;
+				addr |= XE_PPGTT_PTE_LM;
 			}
-			addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+			addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW;
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 
@@ -697,7 +697,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE);
+		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
+			  XE_PAGE_SIZE);
 		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
 						  dst_L0_ofs, dst_is_vram,
 						  src_L0, ccs_ofs, copy_ccs);
@@ -915,7 +916,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
+		emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
 			   clear_vram);
 		if (xe_device_has_flat_ccs(xe) && clear_vram) {
 			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
@@ -985,7 +986,7 @@ static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
 		bool is_vram;
 
 		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
-							   GEN8_PAGE_SIZE,
+							   XE_PAGE_SIZE,
 							   &is_vram));
 		XE_BUG_ON(!is_vram);
 	}
@@ -1202,7 +1203,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		/* Map our PT's to gtt */
 		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
 			(num_updates * 2 + 1);
-		bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs;
+		bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
 		bb->cs[bb->len++] = 0; /* upper_32_bits */
 
 		for (i = 0; i < num_updates; i++) {
@@ -1220,9 +1221,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		update_idx = bb->len;
 
 		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
-			(page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE;
+			(page_ofs / sizeof(u64)) * XE_PAGE_SIZE;
 		for (i = 0; i < num_updates; i++)
-			write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE,
+			write_pgtable(m->gt, bb, addr + i * XE_PAGE_SIZE,
 				      &updates[i], pt_update);
 	} else {
 		/* phys pages, no preamble required */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 7aa12f86e55bd..f15282996c3b1 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -63,8 +63,8 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
 	u64 pde;
 	bool is_vram;
 
-	pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram);
-	pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
 
 	XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_vram);
 
@@ -100,10 +100,10 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 			     u32 pt_level)
 {
-	pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
 
-	if (unlikely(flags & PTE_READ_ONLY))
-		pte &= ~GEN8_PAGE_RW;
+	if (unlikely(flags & XE_PTE_READ_ONLY))
+		pte &= ~XE_PAGE_RW;
 
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
 
@@ -120,9 +120,9 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 	}
 
 	if (pt_level == 1)
-		pte |= GEN8_PDE_PS_2M;
+		pte |= XE_PDE_PS_2M;
 	else if (pt_level == 2)
-		pte |= GEN8_PDPE_PS_1G;
+		pte |= XE_PDPE_PS_1G;
 
 	/* XXX: Does hw support 1 GiB pages? */
 	XE_BUG_ON(pt_level > 2);
@@ -152,14 +152,14 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 	bool is_vram;
 
 	if (vma)
-		pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram);
+		pte = vma_addr(vma, offset, XE_PAGE_SIZE, &is_vram);
 	else
-		pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram);
+		pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram);
 
 	if (is_vram) {
-		pte |= GEN12_PPGTT_PTE_LM;
+		pte |= XE_PPGTT_PTE_LM;
 		if (vma && vma->use_atomic_access_pte_bit)
-			pte |= GEN12_USM_PPGTT_PTE_AE;
+			pte |= XE_USM_PPGTT_PTE_AE;
 	}
 
 	return __gen8_pte_encode(pte, cache, flags, pt_level);
@@ -210,7 +210,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
 	int err;
 
 	size = !level ?  sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
-		GEN8_PDES * sizeof(struct xe_ptw *);
+		XE_PDES * sizeof(struct xe_ptw *);
 	pt = kzalloc(size, GFP_KERNEL);
 	if (!pt)
 		return ERR_PTR(-ENOMEM);
@@ -264,7 +264,7 @@ void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
 		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
 	} else {
 		empty = __xe_pt_empty_pte(gt, vm, pt->level);
-		for (i = 0; i < GEN8_PDES; i++)
+		for (i = 0; i < XE_PDES; i++)
 			xe_pt_write(vm->xe, map, i, empty);
 	}
 }
@@ -279,7 +279,7 @@ void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
  */
 unsigned int xe_pt_shift(unsigned int level)
 {
-	return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level;
+	return XE_PTE_SHIFT + XE_PDE_SHIFT * level;
 }
 
 /**
@@ -306,7 +306,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
 	if (pt->level > 0 && pt->num_live) {
 		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
 
-		for (i = 0; i < GEN8_PDES; i++) {
+		for (i = 0; i < XE_PDES; i++) {
 			if (xe_pt_entry(pt_dir, i))
 				xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
 					      deferred);
@@ -488,7 +488,7 @@ xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
 	entry->qwords = 0;
 
 	if (alloc_entries) {
-		entry->pt_entries = kmalloc_array(GEN8_PDES,
+		entry->pt_entries = kmalloc_array(XE_PDES,
 						  sizeof(*entry->pt_entries),
 						  GFP_KERNEL);
 		if (!entry->pt_entries)
@@ -648,7 +648,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		 */
 		if (level == 0 && !xe_parent->is_compact) {
 			if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
-				pte |= GEN12_PTE_PS64;
+				pte |= XE_PTE_PS64;
 			else if (XE_WARN_ON(xe_walk->needs_64K))
 				return -EINVAL;
 		}
@@ -698,7 +698,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 &&
 		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
 			walk->shifts = xe_compact_pt_shifts;
-			flags |= GEN12_PDE_64K;
+			flags |= XE_PDE_64K;
 			xe_child->is_compact = true;
 		}
 
@@ -760,9 +760,9 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
 	if (is_vram) {
 		struct xe_gt *bo_gt = xe_bo_to_gt(bo);
 
-		xe_walk.default_pte = GEN12_PPGTT_PTE_LM;
+		xe_walk.default_pte = XE_PPGTT_PTE_LM;
 		if (vma && vma->use_atomic_access_pte_bit)
-			xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE;
+			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
 		xe_walk.dma_offset = bo_gt->mem.vram.io_start -
 			gt_to_xe(gt)->mem.vram.io_start;
 		xe_walk.cache = XE_CACHE_WB;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 49aa4ddedbf27..e634bb96f9ccd 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -59,7 +59,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
 	bool in_kthread = !current->mm;
 	unsigned long notifier_seq;
 	int pinned, ret, i;
-	bool read_only = vma->pte_flags & PTE_READ_ONLY;
+	bool read_only = vma->pte_flags & XE_PTE_READ_ONLY;
 
 	lockdep_assert_held(&vm->lock);
 	XE_BUG_ON(!xe_vma_is_userptr(vma));
@@ -844,7 +844,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	vma->start = start;
 	vma->end = end;
 	if (read_only)
-		vma->pte_flags = PTE_READ_ONLY;
+		vma->pte_flags = XE_PTE_READ_ONLY;
 
 	if (gt_mask) {
 		vma->gt_mask = gt_mask;
@@ -899,7 +899,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 {
 	struct xe_vm *vm = vma->vm;
 	struct xe_device *xe = vm->xe;
-	bool read_only = vma->pte_flags & PTE_READ_ONLY;
+	bool read_only = vma->pte_flags & XE_PTE_READ_ONLY;
 
 	if (xe_vma_is_userptr(vma)) {
 		if (vma->userptr.sg) {
@@ -1960,7 +1960,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
 	/* Warning: Security issue - never enable by default */
-	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE);
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
 #endif
 
 	return 0;
@@ -2617,7 +2617,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					  first->userptr.ptr,
 					  first->start,
 					  lookup->start - 1,
-					  (first->pte_flags & PTE_READ_ONLY),
+					  (first->pte_flags & XE_PTE_READ_ONLY),
 					  first->gt_mask);
 		if (first->bo)
 			xe_bo_unlock(first->bo, &ww);
@@ -2648,7 +2648,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					 last->userptr.ptr + chunk,
 					 last->start + chunk,
 					 last->end,
-					 (last->pte_flags & PTE_READ_ONLY),
+					 (last->pte_flags & XE_PTE_READ_ONLY),
 					 last->gt_mask);
 		if (last->bo)
 			xe_bo_unlock(last->bo, &ww);
@@ -3405,7 +3405,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		return 0;
 	}
 	if (vm->pt_root[gt_id]) {
-		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram);
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE,
+				  &is_vram);
 		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
 	}
 
@@ -3416,10 +3417,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		if (is_userptr) {
 			struct xe_res_cursor cur;
 
-			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
+			xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
+					&cur);
 			addr = xe_res_dma(&cur);
 		} else {
-			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram);
+			addr = xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
 		}
 		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
 			   vma->start, vma->end, vma->end - vma->start + 1ull,
-- 
GitLab


From bb36f4b4ed279c7deed936957f733b2af0d3d78f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 12 Apr 2023 16:28:44 -0700
Subject: [PATCH 1501/2340] drm/xe: Rename RC0/RC6 macros

Follow up commits will mass-remove the gen prefix/suffix. For GEN6_RC0
and GEN6_RC6 that would make the variable too short and easy to
conflict. So, add "GT_" prefix that is also part of the register name.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++--
 drivers/gpu/drm/xe/xe_guc_pc.c       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index c1d73f3e7bc35..fb91f04c3c6cf 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -345,8 +345,8 @@
 
 #define GEN6_GT_CORE_STATUS			_MMIO(0x138060)
 #define   RCN_MASK				REG_GENMASK(2, 0)
-#define   GEN6_RC0				0
-#define   GEN6_RC6				3
+#define   GT_RC0				0
+#define   GT_RC6				3
 
 #define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
 #define GEN6_GT_GFX_RC6				_MMIO(0x138108)
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index b853831b342b3..0b6d0577a8a7b 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -592,9 +592,9 @@ static ssize_t rc_status_show(struct device *dev,
 	xe_device_mem_access_put(gt_to_xe(gt));
 
 	switch (REG_FIELD_GET(RCN_MASK, reg)) {
-	case GEN6_RC6:
+	case GT_RC6:
 		return sysfs_emit(buff, "rc6\n");
-	case GEN6_RC0:
+	case GT_RC0:
 		return sysfs_emit(buff, "rc0\n");
 	default:
 		return -ENOENT;
-- 
GitLab


From 56492dacee943dd8241e29fe6a2d698d0029035c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 12 Apr 2023 16:28:45 -0700
Subject: [PATCH 1502/2340] drm/xe: Rename instruction field to avoid confusion

There was both BLT_DEPTH_32 and XY_FAST_COLOR_BLT_DEPTH_32 - also add
the prefix to the first to make it clear this is about the FAST_**COPY**
operation. While at it, remove the GEN9_ prefix.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 ++--
 drivers/gpu/drm/xe/xe_migrate.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 9d6508d74d620..05531d43514f2 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -55,8 +55,8 @@
 #define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 21)
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 
-#define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
-#define   BLT_DEPTH_32			(3<<24)
+#define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
+#define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
 
 #define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
 #define   PVC_MEM_SET_CMD_LEN_DW	7
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index a8e66b84dc63b..d7da5bf2d9842 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -518,8 +518,8 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 	XE_BUG_ON(pitch / 4 > S16_MAX);
 	XE_BUG_ON(pitch > U16_MAX);
 
-	bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
-	bb->cs[bb->len++] = BLT_DEPTH_32 | pitch;
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
 	bb->cs[bb->len++] = 0;
 	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
 	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
-- 
GitLab


From e8178f8076dedf8526f8dc78f8fb9b3017991641 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sun, 16 Apr 2023 23:54:14 -0700
Subject: [PATCH 1503/2340] drm/xe/guc: Rename GEN11_SOFT_SCRATCH for clarity

That register is a completely different register, it's not the same as
SOFT_SCRATCH for GEN11 and beyond. Rename to to the same name as the
bspec uses, including the new variant for media. Also, move the
definitions to the guc header.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c     | 26 +++++++++++---------------
 drivers/gpu/drm/xe/xe_guc_reg.h |  9 ++++++---
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index ee71b969bcbfc..ff2df4f30e971 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -622,9 +622,6 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
 	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
 }
 
-#define MEDIA_SOFT_SCRATCH(n)           _MMIO(0x190310 + (n) * 4)
-#define MEDIA_SOFT_SCRATCH_COUNT        4
-
 int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 			  u32 len, u32 *response_buf)
 {
@@ -632,15 +629,17 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 header, reply;
 	u32 reply_reg = xe_gt_is_media_type(gt) ?
-		MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg;
+		MED_VF_SW_FLAG(0).reg : VF_SW_FLAG(0).reg;
+	const u32 LAST_INDEX = VF_SW_FLAG_COUNT;
 	int ret;
 	int i;
 
-	BUILD_BUG_ON(GEN11_SOFT_SCRATCH_COUNT != MEDIA_SOFT_SCRATCH_COUNT);
+	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
+
 	XE_BUG_ON(guc->ct.enabled);
 	XE_BUG_ON(!len);
-	XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT);
-	XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT);
+	XE_BUG_ON(len > VF_SW_FLAG_COUNT);
+	XE_BUG_ON(len > MED_VF_SW_FLAG_COUNT);
 	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
 		  GUC_HXG_ORIGIN_HOST);
 	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
@@ -650,17 +649,14 @@ retry:
 	/* Not in critical data-path, just do if else for GT type */
 	if (xe_gt_is_media_type(gt)) {
 		for (i = 0; i < len; ++i)
-			xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg,
+			xe_mmio_write32(gt, MED_VF_SW_FLAG(i).reg,
 					request[i]);
-#define LAST_INDEX	MEDIA_SOFT_SCRATCH_COUNT - 1
-		xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg);
+		xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX).reg);
 	} else {
 		for (i = 0; i < len; ++i)
-			xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg,
+			xe_mmio_write32(gt, VF_SW_FLAG(i).reg,
 					request[i]);
-#undef LAST_INDEX
-#define LAST_INDEX	GEN11_SOFT_SCRATCH_COUNT - 1
-		xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg);
+		xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX).reg);
 	}
 
 	xe_guc_notify(guc);
@@ -724,7 +720,7 @@ proto:
 	if (response_buf) {
 		response_buf[0] = header;
 
-		for (i = 1; i < GEN11_SOFT_SCRATCH_COUNT; i++)
+		for (i = 1; i < VF_SW_FLAG_COUNT; i++)
 			response_buf[i] =
 				xe_mmio_read32(gt, reply_reg + i * sizeof(u32));
 	}
diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h
index efd60c186bbc0..0cd38d51cc60a 100644
--- a/drivers/gpu/drm/xe/xe_guc_reg.h
+++ b/drivers/gpu/drm/xe/xe_guc_reg.h
@@ -35,9 +35,6 @@
 #define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
 #define SOFT_SCRATCH_COUNT		16
 
-#define GEN11_SOFT_SCRATCH(n)		_MMIO(0x190240 + (n) * 4)
-#define GEN11_SOFT_SCRATCH_COUNT	4
-
 #define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
 #define UOS_RSA_SCRATCH_COUNT		64
 
@@ -130,6 +127,12 @@ struct guc_doorbell_info {
 #define GUC_WD_VECS_IER			_MMIO(0xC558)
 #define GUC_PM_P24C_IER			_MMIO(0xC55C)
 
+#define VF_SW_FLAG(n)			_MMIO(0x190240 + (n) * 4)
+#define VF_SW_FLAG_COUNT		4
+
+#define MED_VF_SW_FLAG(n)		_MMIO(0x190310 + (n) * 4)
+#define MED_VF_SW_FLAG_COUNT		4
+
 /* GuC Interrupt Vector */
 #define GUC_INTR_GUC2HOST		BIT(15)
 #define GUC_INTR_EXEC_ERROR		BIT(14)
-- 
GitLab


From a9b1a1361472f9094a6a3d6216d46d14b5bcc6f5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 15 Apr 2023 23:37:12 -0700
Subject: [PATCH 1504/2340] drm/xe/guc: Move GuC registers to regs/

There's no good reason to keep the GuC registers outside the regs/
directory: move the header with GuC registers under that.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/{xe_guc_reg.h => regs/xe_guc_regs.h} | 4 ++--
 drivers/gpu/drm/xe/xe_guc.c                             | 2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c                         | 2 +-
 drivers/gpu/drm/xe/xe_huc.c                             | 2 +-
 drivers/gpu/drm/xe/xe_uc_fw.c                           | 2 +-
 drivers/gpu/drm/xe/xe_wopcm.c                           | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)
 rename drivers/gpu/drm/xe/{xe_guc_reg.h => regs/xe_guc_regs.h} (99%)

diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
similarity index 99%
rename from drivers/gpu/drm/xe/xe_guc_reg.h
rename to drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 0cd38d51cc60a..011868ff38aaf 100644
--- a/drivers/gpu/drm/xe/xe_guc_reg.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -3,8 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#ifndef _XE_GUC_REG_H_
-#define _XE_GUC_REG_H_
+#ifndef _XE_GUC_REGS_H_
+#define _XE_GUC_REGS_H_
 
 #include <linux/compiler.h>
 #include <linux/types.h>
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index ff2df4f30e971..e00177f4d294e 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -6,6 +6,7 @@
 #include "xe_guc.h"
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
@@ -15,7 +16,6 @@
 #include "xe_guc_hwconfig.h"
 #include "xe_guc_log.h"
 #include "xe_guc_pc.h"
-#include "xe_guc_reg.h"
 #include "xe_guc_submit.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index fd9911ffeae47..fe1d5be1241ec 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -9,10 +9,10 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
-#include "xe_guc_reg.h"
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
 #include "xe_map.h"
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index a9448c6f6418f..a1c3e54faa6e6 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -5,12 +5,12 @@
 
 #include "xe_huc.h"
 
+#include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
-#include "xe_guc_reg.h"
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2c2080928a826..bb8d986453328 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -8,11 +8,11 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
-#include "xe_guc_reg.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index c8cc3f5e6154a..7b5014aea9c8b 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -5,10 +5,10 @@
 
 #include "xe_wopcm.h"
 
+#include "regs/xe_guc_regs.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
-#include "xe_guc_reg.h"
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
 
-- 
GitLab


From 1bd4db39dee51161c48e8669e410fff0a0f69be1 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 27 Apr 2023 13:44:46 +0200
Subject: [PATCH 1505/2340] drm/xe: Remove extra xe_mmio_read32 from
 xe_mmio_wait32

Commit 7aaec3a623ad ("drm/xe: Let's return last value read on
xe_mmio_wait32.") mentions that we should return the last value read,
but we never actually return it. This breaks display which depends on
the value being actually returned where needed.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Fixes: 7aaec3a623ad ("drm/xe: Let's return last value read on xe_mmio_wait32.")
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/257
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index be7ba2813d58d..b72a0a75259fa 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -93,9 +93,6 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask,
 	u32 read;
 
 	for (;;) {
-		if ((xe_mmio_read32(gt, reg) & mask) == val)
-			return 0;
-
 		read = xe_mmio_read32(gt, reg);
 		if ((read & mask) == val) {
 			ret = 0;
-- 
GitLab


From 7b829f6dd638c2cb45c7710bc7cd1d0395ea9bc1 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:47 -0700
Subject: [PATCH 1506/2340] drm/xe/guc: Convert GuC registers to
 REG_FIELD/REG_BIT

Cleanup GuC register declarations by converting them to use REG_FIELD,
REG_BIT and REG_GENMASK. While converting, also reorder the bitfields
so they follow the convention of declaring the higher bits first.

v2:
  - Drop unused HUC_LOADING_AGENT_VCR and DMA_ADDRESS_SPACE_GTT (Matt Roper)
  - Simplify HUC_LOADING_AGENT_GUC define (Matt Roper)

Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_guc_regs.h | 165 ++++++++++++--------------
 drivers/gpu/drm/xe/xe_ggtt.c          |   6 +-
 drivers/gpu/drm/xe/xe_guc.c           |   8 +-
 drivers/gpu/drm/xe/xe_guc_ads.c       |   3 +-
 4 files changed, 86 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 011868ff38aaf..1960f9e78ec47 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -14,23 +14,18 @@
 /* Definitions of GuC H/W registers, bits, etc */
 
 #define GUC_STATUS			_MMIO(0xc000)
-#define   GS_RESET_SHIFT		0
-#define   GS_MIA_IN_RESET		  (0x01 << GS_RESET_SHIFT)
-#define   GS_BOOTROM_SHIFT		1
-#define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
-#define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
-#define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
-#define   GS_UKERNEL_SHIFT		8
-#define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
-#define   GS_MIA_SHIFT			16
-#define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
-#define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
-#define   GS_MIA_HALT_REQUESTED		  (0x02 << GS_MIA_SHIFT)
-#define   GS_MIA_ISR_ENTRY		  (0x04 << GS_MIA_SHIFT)
-#define   GS_AUTH_STATUS_SHIFT		30
-#define   GS_AUTH_STATUS_MASK		  (0x03 << GS_AUTH_STATUS_SHIFT)
-#define   GS_AUTH_STATUS_BAD		  (0x01 << GS_AUTH_STATUS_SHIFT)
-#define   GS_AUTH_STATUS_GOOD		  (0x02 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_MASK		REG_GENMASK(31, 30)
+#define   GS_AUTH_STATUS_BAD		REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1)
+#define   GS_AUTH_STATUS_GOOD		REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2)
+#define   GS_MIA_MASK			REG_GENMASK(18, 16)
+#define   GS_MIA_CORE_STATE		REG_FIELD_PREP(GS_MIA_MASK, 0x1)
+#define   GS_MIA_HALT_REQUESTED		REG_FIELD_PREP(GS_MIA_MASK, 0x2)
+#define   GS_MIA_ISR_ENTRY		REG_FIELD_PREP(GS_MIA_MASK, 0x4)
+#define   GS_UKERNEL_MASK		REG_GENMASK(15, 8)
+#define   GS_BOOTROM_MASK		REG_GENMASK(7, 1)
+#define   GS_BOOTROM_RSA_FAILED		REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50)
+#define   GS_BOOTROM_JUMP_PASSED	REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
+#define   GS_MIA_IN_RESET		REG_BIT(0)
 
 #define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
 #define SOFT_SCRATCH_COUNT		16
@@ -42,90 +37,86 @@
 #define DMA_ADDR_0_HIGH			_MMIO(0xc304)
 #define DMA_ADDR_1_LOW			_MMIO(0xc308)
 #define DMA_ADDR_1_HIGH			_MMIO(0xc30c)
-#define   DMA_ADDRESS_SPACE_WOPCM	  (7 << 16)
-#define   DMA_ADDRESS_SPACE_GTT		  (8 << 16)
+#define   DMA_ADDR_SPACE_MASK		REG_GENMASK(20, 16)
+#define   DMA_ADDRESS_SPACE_WOPCM	REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
 #define DMA_COPY_SIZE			_MMIO(0xc310)
 #define DMA_CTRL			_MMIO(0xc314)
-#define   HUC_UKERNEL			  (1<<9)
-#define   UOS_MOVE			  (1<<4)
-#define   START_DMA			  (1<<0)
+#define   HUC_UKERNEL			REG_BIT(9)
+#define   UOS_MOVE			REG_BIT(4)
+#define   START_DMA			REG_BIT(0)
 #define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
-#define   GUC_WOPCM_OFFSET_VALID	  (1<<0)
-#define   HUC_LOADING_AGENT_VCR		  (0<<1)
-#define   HUC_LOADING_AGENT_GUC		  (1<<1)
 #define   GUC_WOPCM_OFFSET_SHIFT	14
-#define   GUC_WOPCM_OFFSET_MASK		  (0x3ffff << GUC_WOPCM_OFFSET_SHIFT)
-#define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
+#define   GUC_WOPCM_OFFSET_MASK		REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT)
+#define   HUC_LOADING_AGENT_GUC		REG_BIT(1)
+#define   GUC_WOPCM_OFFSET_VALID	REG_BIT(0)
+#define GUC_MAX_IDLE_COUNT		_MMIO(0xc3e4)
 
-#define HUC_STATUS2             _MMIO(0xD3B0)
-#define   HUC_FW_VERIFIED       (1<<7)
+#define HUC_STATUS2			_MMIO(0xd3b0)
+#define   HUC_FW_VERIFIED		REG_BIT(7)
 
-#define GEN11_HUC_KERNEL_LOAD_INFO	_MMIO(0xC1DC)
-#define   HUC_LOAD_SUCCESSFUL		  (1 << 0)
+#define GEN11_HUC_KERNEL_LOAD_INFO	_MMIO(0xc1dc)
+#define   HUC_LOAD_SUCCESSFUL		REG_BIT(0)
 
 #define GUC_WOPCM_SIZE			_MMIO(0xc050)
-#define   GUC_WOPCM_SIZE_LOCKED		  (1<<0)
-#define   GUC_WOPCM_SIZE_SHIFT		12
-#define   GUC_WOPCM_SIZE_MASK		  (0xfffff << GUC_WOPCM_SIZE_SHIFT)
+#define   GUC_WOPCM_SIZE_MASK		REG_GENMASK(31, 12)
+#define   GUC_WOPCM_SIZE_LOCKED		REG_BIT(0)
 
 #define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
 #define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
 #define GEN9_GT_PM_CONFIG		_MMIO(0x13816c)
-#define   GT_DOORBELL_ENABLE		  (1<<0)
+#define   GT_DOORBELL_ENABLE		REG_BIT(0)
 
 #define GEN8_GTCR			_MMIO(0x4274)
-#define   GEN8_GTCR_INVALIDATE		  (1<<0)
-
-#define GEN12_GUC_TLB_INV_CR		_MMIO(0xcee8)
-#define   GEN12_GUC_TLB_INV_CR_INVALIDATE	(1 << 0)
-
-#define GUC_ARAT_C6DIS			_MMIO(0xA178)
-
-#define GUC_SHIM_CONTROL		_MMIO(0xc064)
-#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	(1<<0)
-#define   GUC_ENABLE_READ_CACHE_LOGIC		(1<<1)
-#define   GUC_ENABLE_MIA_CACHING		(1<<2)
-#define   GUC_GEN10_MSGCH_ENABLE		(1<<4)
-#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	(1<<9)
-#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	(1<<10)
-#define   GUC_ENABLE_MIA_CLOCK_GATING		(1<<15)
-#define   GUC_GEN10_SHIM_WC_ENABLE		(1<<21)
+#define   GEN8_GTCR_INVALIDATE		REG_BIT(0)
+
+#define GEN12_GUC_TLB_INV_CR			_MMIO(0xcee8)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE	REG_BIT(0)
+
+#define GUC_ARAT_C6DIS				_MMIO(0xa178)
+
+#define GUC_SHIM_CONTROL			_MMIO(0xc064)
 #define   PVC_GUC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
-#define   PVC_MOCS_UC_INDEX			1
-#define   PVC_GUC_MOCS_INDEX(index)		REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK,\
+#define   PVC_GUC_MOCS_UC_INDEX			1
+#define   PVC_GUC_MOCS_INDEX(index)		REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \
 							       index)
+#define   GUC_GEN10_SHIM_WC_ENABLE		REG_BIT(21)
+#define   GUC_ENABLE_MIA_CLOCK_GATING		REG_BIT(15)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	REG_BIT(10)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	REG_BIT(9)
+#define   GUC_GEN10_MSGCH_ENABLE		REG_BIT(4)
+#define   GUC_ENABLE_MIA_CACHING		REG_BIT(2)
+#define   GUC_ENABLE_READ_CACHE_LOGIC		REG_BIT(1)
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	REG_BIT(0)
 
-#define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
-#define   GUC_SEND_TRIGGER		  (1<<0)
-#define GEN11_GUC_HOST_INTERRUPT	_MMIO(0x1901f0)
 
-#define GUC_NUM_DOORBELLS		256
+#define GUC_SEND_INTERRUPT			_MMIO(0xc4c8)
+#define   GUC_SEND_TRIGGER			REG_BIT(0)
+#define GEN11_GUC_HOST_INTERRUPT		_MMIO(0x1901f0)
+
+#define GUC_NUM_DOORBELLS			256
 
 /* format of the HW-monitored doorbell cacheline */
 struct guc_doorbell_info {
 	u32 db_status;
-#define GUC_DOORBELL_DISABLED		0
-#define GUC_DOORBELL_ENABLED		1
+#define GUC_DOORBELL_DISABLED			0
+#define GUC_DOORBELL_ENABLED			1
 
 	u32 cookie;
 	u32 reserved[14];
 } __packed;
 
-#define GEN8_DRBREGL(x)			_MMIO(0x1000 + (x) * 8)
-#define   GEN8_DRB_VALID		  (1<<0)
-#define GEN8_DRBREGU(x)			_MMIO(0x1000 + (x) * 8 + 4)
+#define GEN8_DRBREGL(x)				_MMIO(0x1000 + (x) * 8)
+#define   GEN8_DRB_VALID			REG_BIT(0)
+#define GEN8_DRBREGU(x)				_MMIO(0x1000 + (x) * 8 + 4)
 
 #define GEN12_DIST_DBS_POPULATED		_MMIO(0xd08)
-#define   GEN12_DOORBELLS_PER_SQIDI_SHIFT	16
-#define   GEN12_DOORBELLS_PER_SQIDI		(0xff)
-#define   GEN12_SQIDIS_DOORBELL_EXIST		(0xffff)
-
-#define DE_GUCRMR			_MMIO(0x44054)
+#define   GEN12_DOORBELLS_PER_SQIDI_MASK	REG_GENMASK(23, 16)
+#define   GEN12_SQIDIS_DOORBELL_EXIST_MASK	REG_GENMASK(15, 0)
 
-#define GUC_BCS_RCS_IER			_MMIO(0xC550)
-#define GUC_VCS2_VCS1_IER		_MMIO(0xC554)
-#define GUC_WD_VECS_IER			_MMIO(0xC558)
-#define GUC_PM_P24C_IER			_MMIO(0xC55C)
+#define GUC_BCS_RCS_IER				_MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER			_MMIO(0xC554)
+#define GUC_WD_VECS_IER				_MMIO(0xC558)
+#define GUC_PM_P24C_IER				_MMIO(0xC55C)
 
 #define VF_SW_FLAG(n)			_MMIO(0x190240 + (n) * 4)
 #define VF_SW_FLAG_COUNT		4
@@ -134,21 +125,21 @@ struct guc_doorbell_info {
 #define MED_VF_SW_FLAG_COUNT		4
 
 /* GuC Interrupt Vector */
-#define GUC_INTR_GUC2HOST		BIT(15)
-#define GUC_INTR_EXEC_ERROR		BIT(14)
-#define GUC_INTR_DISPLAY_EVENT		BIT(13)
-#define GUC_INTR_SEM_SIG		BIT(12)
-#define GUC_INTR_IOMMU2GUC		BIT(11)
-#define GUC_INTR_DOORBELL_RANG		BIT(10)
-#define GUC_INTR_DMA_DONE		BIT(9)
-#define GUC_INTR_FATAL_ERROR		BIT(8)
-#define GUC_INTR_NOTIF_ERROR		BIT(7)
-#define GUC_INTR_SW_INT_6		BIT(6)
-#define GUC_INTR_SW_INT_5		BIT(5)
-#define GUC_INTR_SW_INT_4		BIT(4)
-#define GUC_INTR_SW_INT_3		BIT(3)
-#define GUC_INTR_SW_INT_2		BIT(2)
-#define GUC_INTR_SW_INT_1		BIT(1)
-#define GUC_INTR_SW_INT_0		BIT(0)
+#define GUC_INTR_GUC2HOST			BIT(15)
+#define GUC_INTR_EXEC_ERROR			BIT(14)
+#define GUC_INTR_DISPLAY_EVENT			BIT(13)
+#define GUC_INTR_SEM_SIG			BIT(12)
+#define GUC_INTR_IOMMU2GUC			BIT(11)
+#define GUC_INTR_DOORBELL_RANG			BIT(10)
+#define GUC_INTR_DMA_DONE			BIT(9)
+#define GUC_INTR_FATAL_ERROR			BIT(8)
+#define GUC_INTR_NOTIF_ERROR			BIT(7)
+#define GUC_INTR_SW_INT_6			BIT(6)
+#define GUC_INTR_SW_INT_5			BIT(5)
+#define GUC_INTR_SW_INT_4			BIT(4)
+#define GUC_INTR_SW_INT_3			BIT(3)
+#define GUC_INTR_SW_INT_2			BIT(2)
+#define GUC_INTR_SW_INT_1			BIT(1)
+#define GUC_INTR_SW_INT_0			BIT(0)
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index dbc45ef084b47..10a262a0c4cd0 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -186,11 +186,11 @@ err:
 }
 
 #define GEN12_GUC_TLB_INV_CR                     _MMIO(0xcee8)
-#define   GEN12_GUC_TLB_INV_CR_INVALIDATE        (1 << 0)
+#define   GEN12_GUC_TLB_INV_CR_INVALIDATE        REG_BIT(0)
 #define PVC_GUC_TLB_INV_DESC0			_MMIO(0xcf7c)
-#define   PVC_GUC_TLB_INV_DESC0_VALID		 (1 << 0)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		 REG_BIT(0)
 #define PVC_GUC_TLB_INV_DESC1			_MMIO(0xcf80)
-#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	 (1 << 6)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	 REG_BIT(6)
 
 void xe_ggtt_invalidate(struct xe_gt *gt)
 {
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e00177f4d294e..d18f2e25ce56c 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -357,7 +357,7 @@ static void guc_prepare_xfer(struct xe_guc *guc)
 				GUC_ENABLE_MIA_CACHING;
 
 	if (xe->info.platform == XE_PVC)
-		shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX);
+		shim_flags |= PVC_GUC_MOCS_INDEX(PVC_GUC_MOCS_UC_INDEX);
 
 	/* Must program this register before loading the ucode with DMA */
 	xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags);
@@ -848,11 +848,11 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 
 	drm_printf(p, "\nGuC status 0x%08x:\n", status);
 	drm_printf(p, "\tBootrom status = 0x%x\n",
-		   (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+		   REG_FIELD_GET(GS_BOOTROM_MASK, status));
 	drm_printf(p, "\tuKernel status = 0x%x\n",
-		   (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+		   REG_FIELD_GET(GS_UKERNEL_MASK, status));
 	drm_printf(p, "\tMIA Core status = 0x%x\n",
-		   (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+		   REG_FIELD_GET(GS_MIA_MASK, status));
 	drm_printf(p, "\tLog level = %d\n",
 		   xe_guc_log_get_level(&guc->log));
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index fe1d5be1241ec..d4fc2d357a78a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -561,8 +561,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads)
 
 		ads_blob_write(ads,
 			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
-			       ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
-				& GEN12_DOORBELLS_PER_SQIDI) + 1);
+			       REG_FIELD_GET(GEN12_DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
 	}
 }
 
-- 
GitLab


From d9b79ad275e7a98c566b3ac4b32950142d6bf9ad Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:48 -0700
Subject: [PATCH 1507/2340] drm/xe: Drop gen afixes from registers

The defines for the registers were brought over from i915 while
bootstrapping the driver. As xe supports TGL and later only, it doesn't
make sense to keep the GEN* prefixes and suffixes in the registers: TGL
is graphics version 12, previously called "GEN12". So drop the prefix
everywhere.

v2:
  - Also drop _TGL suffix and reword commit message as suggested
    by Matt Roper. While at it, rename VSUNIT_CLKGATE_DIS_TGL to
    VSUNIT_CLKGATE2_DIS with the additional "2", so it doesn't clash
    with the define for the other register

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h  |   8 +-
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |   2 +-
 drivers/gpu/drm/xe/regs/xe_gt_regs.h      | 191 +++++++++++-----------
 drivers/gpu/drm/xe/regs/xe_guc_regs.h     |  32 ++--
 drivers/gpu/drm/xe/regs/xe_regs.h         |  41 +++--
 drivers/gpu/drm/xe/xe_execlist.c          |   8 +-
 drivers/gpu/drm/xe/xe_force_wake.c        |  18 +-
 drivers/gpu/drm/xe/xe_ggtt.c              |  12 +-
 drivers/gpu/drm/xe/xe_gt.c                |   4 +-
 drivers/gpu/drm/xe/xe_gt_clock.c          |  26 +--
 drivers/gpu/drm/xe/xe_gt_mcr.c            |  30 ++--
 drivers/gpu/drm/xe/xe_guc.c               |  10 +-
 drivers/gpu/drm/xe/xe_guc_ads.c           |  11 +-
 drivers/gpu/drm/xe/xe_guc_pc.c            |  12 +-
 drivers/gpu/drm/xe/xe_huc.c               |   4 +-
 drivers/gpu/drm/xe/xe_hw_engine.c         |  44 ++---
 drivers/gpu/drm/xe/xe_mmio.c              |   3 +-
 drivers/gpu/drm/xe/xe_mocs.c              |   7 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c     |   2 +-
 drivers/gpu/drm/xe/xe_ring_ops.c          |   8 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c    |   2 +-
 drivers/gpu/drm/xe/xe_tuning.c            |   2 +-
 drivers/gpu/drm/xe/xe_wa.c                |  86 +++++-----
 23 files changed, 280 insertions(+), 283 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index a1e1d1c206fa5..9d61f59412893 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -22,8 +22,8 @@
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 
 #define RING_PSMI_CTL(base)			_MMIO((base) + 0x50)
-#define   GEN8_RC_SEMA_IDLE_MSG_DISABLE			REG_BIT(12)
-#define   GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
+#define   RC_SEMA_IDLE_MSG_DISABLE			REG_BIT(12)
+#define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
 
 #define RING_ACTHD_UDW(base)			_MMIO((base) + 0x5c)
 #define RING_DMA_FADD_UDW(base)			_MMIO((base) + 0x60)
@@ -53,8 +53,8 @@
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
 
-#define RING_MODE_GEN7(base)			_MMIO((base) + 0x29c)
-#define   GEN11_GFX_DISABLE_LEGACY_MODE		(1 << 3)
+#define RING_MODE(base)				_MMIO((base) + 0x29c)
+#define   GFX_DISABLE_LEGACY_MODE		(1 << 3)
 
 #define RING_TIMESTAMP(base)			_MMIO((base) + 0x358)
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 05531d43514f2..0f9c5b0b8a3ba 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -37,7 +37,7 @@
 #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
 #define   MI_FLUSH_DW_USE_GTT		(1<<2)
 
-#define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
+#define MI_BATCH_BUFFER_START		MI_INSTR(0x31, 1)
 
 #define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
 #define   SRC_ACCESS_TYPE_SHIFT		21
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index fb91f04c3c6cf..5a0a08c84f3de 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -10,18 +10,18 @@
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0				_MMIO(0xd00)
-#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
-#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
-#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
-#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
-#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
-#define   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
-#define   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
-
-#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n)	_MMIO(0xd50 + (n) * 4)
-#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n)	_MMIO(0xd70 + (n) * 4)
-#define FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0xd84)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
+#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
+#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+
+#define FORCEWAKE_ACK_MEDIA_VDBOX(n)		_MMIO(0xd50 + (n) * 4)
+#define FORCEWAKE_ACK_MEDIA_VEBOX(n)		_MMIO(0xd70 + (n) * 4)
+#define FORCEWAKE_ACK_RENDER			_MMIO(0xd84)
 
 #define GMD_ID					_MMIO(0xd8c)
 #define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
@@ -30,49 +30,49 @@
 
 #define FORCEWAKE_ACK_GT_MTL			_MMIO(0xdfc)
 
-#define GEN9_LNCFCMOCS(i)			_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
+#define LNCFCMOCS(i)				_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
 #define LNCFCMOCS_REG_COUNT			32
 
 #define MCFG_MCR_SELECTOR			_MMIO(0xfd0)
 #define MTL_MCR_SELECTOR			_MMIO(0xfd4)
 #define SF_MCR_SELECTOR				_MMIO(0xfd8)
-#define GEN8_MCR_SELECTOR			_MMIO(0xfdc)
+#define MCR_SELECTOR				_MMIO(0xfdc)
 #define GAM_MCR_SELECTOR			_MMIO(0xfe0)
-#define   GEN11_MCR_MULTICAST			REG_BIT(31)
-#define   GEN11_MCR_SLICE(slice)		(((slice) & 0xf) << 27)
-#define   GEN11_MCR_SLICE_MASK			GEN11_MCR_SLICE(0xf)
-#define   GEN11_MCR_SUBSLICE(subslice)		(((subslice) & 0x7) << 24)
-#define   GEN11_MCR_SUBSLICE_MASK		GEN11_MCR_SUBSLICE(0x7)
+#define   MCR_MULTICAST				REG_BIT(31)
+#define   MCR_SLICE(slice)			(((slice) & 0xf) << 27)
+#define   MCR_SLICE_MASK			MCR_SLICE(0xf)
+#define   MCR_SUBSLICE(subslice)		(((subslice) & 0x7) << 24)
+#define   MCR_SUBSLICE_MASK			MCR_SUBSLICE(0x7)
 #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
 #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
 
-#define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
-#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
+#define FF_SLICE_CS_CHICKEN1			_MMIO(0x20e0)
+#define   FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
 
 #define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
 
-#define GEN9_CS_DEBUG_MODE1			_MMIO(0x20ec)
+#define CS_DEBUG_MODE1				_MMIO(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
-#define   GEN12_REPLAY_MODE_GRANULARITY		REG_BIT(0)
+#define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
 
 #define PS_INVOCATION_COUNT			_MMIO(0x2348)
 
-#define GEN8_CS_CHICKEN1			_MMIO(0x2580)
-#define   GEN9_PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
-#define   GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)	(((hi) << 2) | ((lo) << 1))
-#define   GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 0)
-#define   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 1)
-#define   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
-#define   GEN9_PREEMPT_GPGPU_LEVEL_MASK		GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
+#define CS_CHICKEN1				_MMIO(0x2580)
+#define   PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
+#define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
+#define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
+#define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
+#define   PREEMPT_GPGPU_COMMAND_LEVEL		PREEMPT_GPGPU_LEVEL(1, 0)
+#define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
 
-#define GEN12_GLOBAL_MOCS(i)			_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
-#define GEN12_CCS_AUX_INV			_MMIO(0x4208)
+#define GLOBAL_MOCS(i)				_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
+#define CCS_AUX_INV				_MMIO(0x4208)
 
-#define GEN12_VD0_AUX_INV			_MMIO(0x4218)
-#define GEN12_VE0_AUX_INV			_MMIO(0x4238)
+#define VD0_AUX_INV				_MMIO(0x4218)
+#define VE0_AUX_INV				_MMIO(0x4238)
 
-#define GEN12_VE1_AUX_INV			_MMIO(0x42b8)
+#define VE1_AUX_INV				_MMIO(0x42b8)
 #define   AUX_INV				REG_BIT(0)
 
 #define XEHP_TILE0_ADDR_RANGE			MCR_REG(0x4900)
@@ -88,7 +88,7 @@
 #define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
 
-#define GEN12_FF_MODE2				_MMIO(0x6604)
+#define FF_MODE2				_MMIO(0x6604)
 #define XEHP_FF_MODE2				MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
@@ -101,22 +101,21 @@
 #define XEHP_PSS_MODE2				MCR_REG(0x703c)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
-#define HIZ_CHICKEN				_MMIO(0x7018)
+#define HIZ_CHICKEN					_MMIO(0x7018)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
-#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE	REG_BIT(13)
+#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
 
-/* GEN7 chicken */
-#define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
+#define COMMON_SLICE_CHICKEN1			_MMIO(0x7010)
 
 #define COMMON_SLICE_CHICKEN4			_MMIO(0x7300)
 #define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
 
-#define GEN11_COMMON_SLICE_CHICKEN3		_MMIO(0x7304)
-#define XEHP_COMMON_SLICE_CHICKEN3		MCR_REG(0x7304)
+#define COMMON_SLICE_CHICKEN3				_MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3			MCR_REG(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
-#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE	REG_BIT(12)
-#define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	REG_BIT(11)
-#define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE	REG_BIT(9)
+#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE		REG_BIT(12)
+#define   BLEND_EMB_FIX_DISABLE_IN_RCC			REG_BIT(11)
+#define   DISABLE_CPS_AWARE_COLOR_PIPE			REG_BIT(9)
 
 #define XEHP_SLICE_COMMON_ECO_CHICKEN1		MCR_REG(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
@@ -130,21 +129,21 @@
 #define XEHP_SQCM				MCR_REG(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
-#define	GEN10_MIRROR_FUSE3			_MMIO(0x9118)
-#define   GEN10_L3BANK_PAIR_COUNT		4
-#define   GEN10_L3BANK_MASK			0x0F
+#define	MIRROR_FUSE3				_MMIO(0x9118)
+#define   L3BANK_PAIR_COUNT			4
+#define   L3BANK_MASK				0x0F
 /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
-#define   GEN12_MAX_MSLICES			4
-#define   GEN12_MEML3_EN_MASK			0x0F
+#define   MAX_MSLICES				4
+#define   MEML3_EN_MASK				0x0F
 
 /* Fuse readout registers for GT */
 #define XEHP_FUSE4				_MMIO(0x9114)
 #define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
 
-#define GEN11_GT_VEBOX_VDBOX_DISABLE		_MMIO(0x9140)
-#define   GEN11_GT_VDBOX_DISABLE_MASK		0xff
-#define   GEN11_GT_VEBOX_DISABLE_SHIFT		16
-#define   GEN11_GT_VEBOX_DISABLE_MASK		(0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
+#define GT_VEBOX_VDBOX_DISABLE			_MMIO(0x9140)
+#define   GT_VDBOX_DISABLE_MASK			0xff
+#define   GT_VEBOX_DISABLE_SHIFT		16
+#define   GT_VEBOX_DISABLE_MASK			(0x0f << GT_VEBOX_DISABLE_SHIFT)
 
 #define XELP_EU_ENABLE				_MMIO(0x9134)	/* "_DISABLE" on Xe_LP */
 #define   XELP_EU_MASK				REG_GENMASK(7, 0)
@@ -152,14 +151,13 @@
 #define XEHP_GT_COMPUTE_DSS_ENABLE		_MMIO(0x9144)
 #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		_MMIO(0x9148)
 
-#define GEN6_GDRST				_MMIO(0x941c)
-#define   GEN11_GRDOM_GUC			REG_BIT(3)
-#define   GEN6_GRDOM_FULL			(1 << 0)
-#define   GEN11_GRDOM_FULL			GEN6_GRDOM_FULL
+#define GDRST					_MMIO(0x941c)
+#define   GRDOM_GUC				REG_BIT(3)
+#define   GRDOM_FULL				REG_BIT(0)
 
-#define GEN7_MISCCPCTL				_MMIO(0x9424)
-#define   GEN7_DOP_CLOCK_GATE_ENABLE		(1 << 0)
-#define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE	REG_BIT(1)
+#define MISCCPCTL				_MMIO(0x9424)
+#define   DOP_CLOCK_GATE_RENDER_ENABLE		REG_BIT(1)
+#define   DOP_CLOCK_GATE_ENABLE			REG_BIT((0)
 
 #define UNSLCGCTL9430				_MMIO(0x9430)
 #define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
@@ -213,10 +211,9 @@
 #define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
 
 #define UNSLICE_UNIT_LEVEL_CLKGATE2		_MMIO(0x94e4)
-#define   VSUNIT_CLKGATE_DIS_TGL		REG_BIT(19)
-#define   PSDUNIT_CLKGATE_DIS			REG_BIT(5)
+#define   VSUNIT_CLKGATE2_DIS			REG_BIT(19)
 
-#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE	MCR_REG(0x9524)
+#define SUBSLICE_UNIT_LEVEL_CLKGATE		MCR_REG(0x9524)
 #define   DSS_ROUTER_CLKGATE_DIS		REG_BIT(28)
 #define   GWUNIT_CLKGATE_DIS			REG_BIT(16)
 
@@ -226,21 +223,21 @@
 #define SSMCGCTL9530				MCR_REG(0x9530)
 #define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
 
-#define GEN10_DFR_RATIO_EN_AND_CHICKEN		MCR_REG(0x9550)
+#define DFR_RATIO_EN_AND_CHICKEN		MCR_REG(0x9550)
 #define   DFR_DISABLE				(1 << 9)
 
-#define GEN6_RPNSWREQ				_MMIO(0xa008)
+#define RPNSWREQ				_MMIO(0xa008)
 #define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
-#define GEN6_RC_CONTROL				_MMIO(0xa090)
-#define GEN6_RC_STATE				_MMIO(0xa094)
+#define RC_CONTROL				_MMIO(0xa090)
+#define RC_STATE				_MMIO(0xa094)
 
-#define GEN6_PMINTRMSK				_MMIO(0xa168)
-#define   GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC	(1 << 31)
+#define PMINTRMSK				_MMIO(0xa168)
+#define   PMINTR_DISABLE_REDIRECT_TO_GUC	(1 << 31)
 #define   ARAT_EXPIRED_INTRMSK			(1 << 9)
 
-#define FORCEWAKE_GT_GEN9			_MMIO(0xa188)
+#define FORCEWAKE_GT				_MMIO(0xa188)
 
-#define GEN9_PG_ENABLE				_MMIO(0xa210)
+#define PG_ENABLE				_MMIO(0xa210)
 
 /* GPM unit config (Gen9+) */
 #define CTC_MODE				_MMIO(0xa26c)
@@ -250,9 +247,9 @@
 #define   CTC_SHIFT_PARAMETER_SHIFT		1
 #define   CTC_SHIFT_PARAMETER_MASK		(0x3 << CTC_SHIFT_PARAMETER_SHIFT)
 
-#define FORCEWAKE_RENDER_GEN9			_MMIO(0xa278)
-#define FORCEWAKE_MEDIA_VDBOX_GEN11(n)		_MMIO(0xa540 + (n) * 4)
-#define FORCEWAKE_MEDIA_VEBOX_GEN11(n)		_MMIO(0xa560 + (n) * 4)
+#define FORCEWAKE_RENDER			_MMIO(0xa278)
+#define FORCEWAKE_MEDIA_VDBOX(n)		_MMIO(0xa540 + (n) * 4)
+#define FORCEWAKE_MEDIA_VEBOX(n)		_MMIO(0xa560 + (n) * 4)
 
 #define XEHPC_LNCFMISCCFGREG0			MCR_REG(0xb01c)
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
@@ -282,12 +279,12 @@
 #define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
 
-#define GEN10_SAMPLER_MODE			MCR_REG(0xe18c)
+#define SAMPLER_MODE				MCR_REG(0xe18c)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
-#define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
+#define   SAMPLER_ENABLE_HEADLESS_MSG		REG_BIT(5)
 
-#define GEN9_HALF_SLICE_CHICKEN7		MCR_REG(0xe194)
+#define HALF_SLICE_CHICKEN7				MCR_REG(0xe194)
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
 
 #define CACHE_MODE_SS				MCR_REG(0xe420)
@@ -295,27 +292,27 @@
 #define   DISABLE_ECC				REG_BIT(5)
 #define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
 
-#define GEN9_ROW_CHICKEN4			MCR_REG(0xe48c)
-#define   GEN12_DISABLE_GRF_CLEAR		REG_BIT(13)
+#define ROW_CHICKEN4				MCR_REG(0xe48c)
+#define   DISABLE_GRF_CLEAR			REG_BIT(13)
 #define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
-#define   GEN12_DISABLE_TDL_PUSH		REG_BIT(9)
-#define   GEN11_DIS_PICK_2ND_EU			REG_BIT(7)
-#define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
+#define   DISABLE_TDL_PUSH			REG_BIT(9)
+#define   DIS_PICK_2ND_EU			REG_BIT(7)
+#define   DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
 #define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
-#define GEN8_ROW_CHICKEN			MCR_REG(0xe4f0)
+#define ROW_CHICKEN				MCR_REG(0xe4f0)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
 
-#define GEN8_ROW_CHICKEN2			MCR_REG(0xe4f4)
-#define   GEN12_DISABLE_READ_SUPPRESSION	REG_BIT(15)
-#define   GEN12_DISABLE_EARLY_READ		REG_BIT(14)
-#define   GEN12_ENABLE_LARGE_GRF_MODE		REG_BIT(12)
-#define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
-#define   GEN12_DISABLE_DOP_GATING              REG_BIT(0)
+#define ROW_CHICKEN2				MCR_REG(0xe4f4)
+#define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
+#define   DISABLE_EARLY_READ			REG_BIT(14)
+#define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
+#define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
+#define   DISABLE_DOP_GATING			REG_BIT(0)
 
-#define XEHP_HDC_CHICKEN0			MCR_REG(0xe5f0)
+#define XEHP_HDC_CHICKEN0					MCR_REG(0xe5f0)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
 
 #define RT_CTRL					MCR_REG(0xe530)
@@ -335,21 +332,21 @@
 #define SARB_CHICKEN1				MCR_REG(0xe90c)
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
-#define GEN12_RCU_MODE				_MMIO(0x14800)
-#define   GEN12_RCU_MODE_CCS_ENABLE		REG_BIT(0)
+#define RCU_MODE				_MMIO(0x14800)
+#define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
 
-#define FORCEWAKE_ACK_GT_GEN9			_MMIO(0x130044)
+#define FORCEWAKE_ACK_GT			_MMIO(0x130044)
 #define   FORCEWAKE_KERNEL			BIT(0)
 #define   FORCEWAKE_USER			BIT(1)
 #define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 
-#define GEN6_GT_CORE_STATUS			_MMIO(0x138060)
+#define GT_CORE_STATUS				_MMIO(0x138060)
 #define   RCN_MASK				REG_GENMASK(2, 0)
 #define   GT_RC0				0
 #define   GT_RC6				3
 
-#define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
-#define GEN6_GT_GFX_RC6				_MMIO(0x138108)
+#define GT_GFX_RC6_LOCKED			_MMIO(0x138104)
+#define GT_GFX_RC6				_MMIO(0x138108)
 
 #define GT_INTR_DW(x)				_MMIO(0x190018 + ((x) * 4))
 
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 1960f9e78ec47..bc9b42b38795f 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -54,23 +54,21 @@
 #define HUC_STATUS2			_MMIO(0xd3b0)
 #define   HUC_FW_VERIFIED		REG_BIT(7)
 
-#define GEN11_HUC_KERNEL_LOAD_INFO	_MMIO(0xc1dc)
+#define HUC_KERNEL_LOAD_INFO		_MMIO(0xc1dc)
 #define   HUC_LOAD_SUCCESSFUL		REG_BIT(0)
 
 #define GUC_WOPCM_SIZE			_MMIO(0xc050)
 #define   GUC_WOPCM_SIZE_MASK		REG_GENMASK(31, 12)
 #define   GUC_WOPCM_SIZE_LOCKED		REG_BIT(0)
 
-#define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
-#define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
-#define GEN9_GT_PM_CONFIG		_MMIO(0x13816c)
+#define GT_PM_CONFIG			_MMIO(0x13816c)
 #define   GT_DOORBELL_ENABLE		REG_BIT(0)
 
-#define GEN8_GTCR			_MMIO(0x4274)
-#define   GEN8_GTCR_INVALIDATE		REG_BIT(0)
+#define GTCR				_MMIO(0x4274)
+#define   GTCR_INVALIDATE		REG_BIT(0)
 
-#define GEN12_GUC_TLB_INV_CR			_MMIO(0xcee8)
-#define   GEN12_GUC_TLB_INV_CR_INVALIDATE	REG_BIT(0)
+#define GUC_TLB_INV_CR			_MMIO(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE	REG_BIT(0)
 
 #define GUC_ARAT_C6DIS				_MMIO(0xa178)
 
@@ -79,11 +77,11 @@
 #define   PVC_GUC_MOCS_UC_INDEX			1
 #define   PVC_GUC_MOCS_INDEX(index)		REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \
 							       index)
-#define   GUC_GEN10_SHIM_WC_ENABLE		REG_BIT(21)
+#define   GUC_SHIM_WC_ENABLE			REG_BIT(21)
 #define   GUC_ENABLE_MIA_CLOCK_GATING		REG_BIT(15)
 #define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	REG_BIT(10)
 #define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	REG_BIT(9)
-#define   GUC_GEN10_MSGCH_ENABLE		REG_BIT(4)
+#define   GUC_MSGCH_ENABLE			REG_BIT(4)
 #define   GUC_ENABLE_MIA_CACHING		REG_BIT(2)
 #define   GUC_ENABLE_READ_CACHE_LOGIC		REG_BIT(1)
 #define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	REG_BIT(0)
@@ -91,7 +89,7 @@
 
 #define GUC_SEND_INTERRUPT			_MMIO(0xc4c8)
 #define   GUC_SEND_TRIGGER			REG_BIT(0)
-#define GEN11_GUC_HOST_INTERRUPT		_MMIO(0x1901f0)
+#define GUC_HOST_INTERRUPT			_MMIO(0x1901f0)
 
 #define GUC_NUM_DOORBELLS			256
 
@@ -105,13 +103,13 @@ struct guc_doorbell_info {
 	u32 reserved[14];
 } __packed;
 
-#define GEN8_DRBREGL(x)				_MMIO(0x1000 + (x) * 8)
-#define   GEN8_DRB_VALID			REG_BIT(0)
-#define GEN8_DRBREGU(x)				_MMIO(0x1000 + (x) * 8 + 4)
+#define DRBREGL(x)				_MMIO(0x1000 + (x) * 8)
+#define   DRB_VALID				REG_BIT(0)
+#define DRBREGU(x)				_MMIO(0x1000 + (x) * 8 + 4)
 
-#define GEN12_DIST_DBS_POPULATED		_MMIO(0xd08)
-#define   GEN12_DOORBELLS_PER_SQIDI_MASK	REG_GENMASK(23, 16)
-#define   GEN12_SQIDIS_DOORBELL_EXIST_MASK	REG_GENMASK(15, 0)
+#define DIST_DBS_POPULATED			_MMIO(0xd08)
+#define   DOORBELLS_PER_SQIDI_MASK		REG_GENMASK(23, 16)
+#define   SQIDIS_DOORBELL_EXIST_MASK		REG_GENMASK(15, 0)
 
 #define GUC_BCS_RCS_IER				_MMIO(0xC550)
 #define GUC_VCS2_VCS1_IER			_MMIO(0xC554)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index c2a278b25fc9f..50fc3c4690865 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -11,23 +11,22 @@
 #define   LMEM_INIT				REG_BIT(7)
 
 #define RENDER_RING_BASE			0x02000
-#define GEN11_BSD_RING_BASE			0x1c0000
-#define GEN11_BSD2_RING_BASE			0x1c4000
-#define GEN11_BSD3_RING_BASE			0x1d0000
-#define GEN11_BSD4_RING_BASE			0x1d4000
+#define BSD_RING_BASE				0x1c0000
+#define BSD2_RING_BASE				0x1c4000
+#define BSD3_RING_BASE				0x1d0000
+#define BSD4_RING_BASE				0x1d4000
 #define XEHP_BSD5_RING_BASE			0x1e0000
 #define XEHP_BSD6_RING_BASE			0x1e4000
 #define XEHP_BSD7_RING_BASE			0x1f0000
 #define XEHP_BSD8_RING_BASE			0x1f4000
-#define VEBOX_RING_BASE				0x1a000
-#define GEN11_VEBOX_RING_BASE			0x1c8000
-#define GEN11_VEBOX2_RING_BASE			0x1d8000
+#define VEBOX_RING_BASE				0x1c8000
+#define VEBOX2_RING_BASE			0x1d8000
 #define XEHP_VEBOX3_RING_BASE			0x1e8000
 #define XEHP_VEBOX4_RING_BASE			0x1f8000
-#define GEN12_COMPUTE0_RING_BASE		0x1a000
-#define GEN12_COMPUTE1_RING_BASE		0x1c000
-#define GEN12_COMPUTE2_RING_BASE		0x1e000
-#define GEN12_COMPUTE3_RING_BASE		0x26000
+#define COMPUTE0_RING_BASE			0x1a000
+#define COMPUTE1_RING_BASE			0x1c000
+#define COMPUTE2_RING_BASE			0x1e000
+#define COMPUTE3_RING_BASE			0x26000
 #define BLT_RING_BASE				0x22000
 #define XEHPC_BCS1_RING_BASE			0x3e0000
 #define XEHPC_BCS2_RING_BASE			0x3e2000
@@ -43,8 +42,8 @@
 #define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
 #define   GT_RENDER_USER_INTERRUPT		(1 <<  0)
 
-#define GEN7_FF_THREAD_MODE			_MMIO(0x20a0)
-#define   GEN12_FF_TESSELATION_DOP_GATE_DISABLE BIT(19)
+#define FF_THREAD_MODE				_MMIO(0x20a0)
+#define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
 
 #define PVC_RP_STATE_CAP			_MMIO(0x281014)
 #define MTL_RP_STATE_CAP			_MMIO(0x138000)
@@ -86,18 +85,18 @@
 #define   DG1_MSTR_IRQ				REG_BIT(31)
 #define   DG1_MSTR_TILE(t)			REG_BIT(t)
 
-#define GEN9_TIMESTAMP_OVERRIDE					_MMIO(0x44074)
-#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT	0
-#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK	0x3ff
-#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
-#define   GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
+#define TIMESTAMP_OVERRIDE					_MMIO(0x44074)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT		0
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		0x3ff
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
 
 #define GGC					_MMIO(0x108040)
 #define   GMS_MASK				REG_GENMASK(15, 8)
 #define   GGMS_MASK				REG_GENMASK(7, 6)
 
-#define GEN12_GSMBASE				_MMIO(0x108100)
-#define GEN12_DSMBASE				_MMIO(0x1080C0)
-#define   GEN12_BDSM_MASK			REG_GENMASK64(63, 20)
+#define GSMBASE					_MMIO(0x108100)
+#define DSMBASE					_MMIO(0x1080C0)
+#define   BDSM_MASK				REG_GENMASK64(63, 20)
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 64b520ddca9cb..d524ac5c7b579 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -60,8 +60,8 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	}
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
-		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
-				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+		xe_mmio_write32(hwe->gt, RCU_MODE.reg,
+				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 	lrc->ring.old_tail = lrc->ring.tail;
@@ -81,8 +81,8 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg,
 			xe_bo_ggtt_addr(hwe->hwsp));
 	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg);
-	xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg,
-			_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+	xe_mmio_write32(gt, RING_MODE(hwe->mmio_base).reg,
+			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
 
 	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base).reg,
 			lower_32_bits(lrc_desc));
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 77a210acfac39..53d73f36a1216 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -49,14 +49,14 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 	if (xe->info.graphics_verx100 >= 1270) {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
 			    XE_FW_DOMAIN_ID_GT,
-			    FORCEWAKE_GT_GEN9.reg,
+			    FORCEWAKE_GT.reg,
 			    FORCEWAKE_ACK_GT_MTL.reg,
 			    BIT(0), BIT(16));
 	} else {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
 			    XE_FW_DOMAIN_ID_GT,
-			    FORCEWAKE_GT_GEN9.reg,
-			    FORCEWAKE_ACK_GT_GEN9.reg,
+			    FORCEWAKE_GT.reg,
+			    FORCEWAKE_ACK_GT.reg,
 			    BIT(0), BIT(16));
 	}
 }
@@ -71,8 +71,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 	if (!xe_gt_is_media_type(gt))
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
 			    XE_FW_DOMAIN_ID_RENDER,
-			    FORCEWAKE_RENDER_GEN9.reg,
-			    FORCEWAKE_ACK_RENDER_GEN9.reg,
+			    FORCEWAKE_RENDER.reg,
+			    FORCEWAKE_ACK_RENDER.reg,
 			    BIT(0), BIT(16));
 
 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
@@ -81,8 +81,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
 			    XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
-			    FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg,
-			    FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg,
+			    FORCEWAKE_MEDIA_VDBOX(j).reg,
+			    FORCEWAKE_ACK_MEDIA_VDBOX(j).reg,
 			    BIT(0), BIT(16));
 	}
 
@@ -92,8 +92,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
 			    XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
-			    FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg,
-			    FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg,
+			    FORCEWAKE_MEDIA_VEBOX(j).reg,
+			    FORCEWAKE_ACK_MEDIA_VEBOX(j).reg,
 			    BIT(0), BIT(16));
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 10a262a0c4cd0..fc580d961dbbe 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -185,12 +185,12 @@ err:
 	return err;
 }
 
-#define GEN12_GUC_TLB_INV_CR                     _MMIO(0xcee8)
-#define   GEN12_GUC_TLB_INV_CR_INVALIDATE        REG_BIT(0)
+#define GUC_TLB_INV_CR				_MMIO(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
 #define PVC_GUC_TLB_INV_DESC0			_MMIO(0xcf7c)
-#define   PVC_GUC_TLB_INV_DESC0_VALID		 REG_BIT(0)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
 #define PVC_GUC_TLB_INV_DESC1			_MMIO(0xcf80)
-#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	 REG_BIT(6)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
 
 void xe_ggtt_invalidate(struct xe_gt *gt)
 {
@@ -212,8 +212,8 @@ void xe_ggtt_invalidate(struct xe_gt *gt)
 			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg,
 					PVC_GUC_TLB_INV_DESC0_VALID);
 		} else
-			xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg,
-					GEN12_GUC_TLB_INV_CR_INVALIDATE);
+			xe_mmio_write32(gt, GUC_TLB_INV_CR.reg,
+					GUC_TLB_INV_CR_INVALIDATE);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 4186f7f0d42f5..0d4664e344dac 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -541,8 +541,8 @@ static int do_gt_reset(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
-	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
-	err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5000,
+	xe_mmio_write32(gt, GDRST.reg, GRDOM_FULL);
+	err = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_FULL, 5000,
 			     NULL, false);
 	if (err)
 		drm_err(&xe->drm,
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 60a2966bc1fd2..1b7d002845350 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -14,16 +14,16 @@
 
 static u32 read_reference_ts_freq(struct xe_gt *gt)
 {
-	u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg);
+	u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE.reg);
 	u32 base_freq, frac_freq;
 
-	base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
-		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
+	base_freq = ((ts_override & TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
+		     TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
 	base_freq *= 1000000;
 
 	frac_freq = ((ts_override &
-		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
-		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
+		      TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
+		     TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
 	frac_freq = 1000000 / (frac_freq + 1);
 
 	return base_freq + frac_freq;
@@ -36,17 +36,17 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
 	const u32 f25_mhz = 25000000;
 	const u32 f38_4_mhz = 38400000;
 	u32 crystal_clock =
-		(rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
-		GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+		(rpm_config_reg & RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+		RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
 
 	switch (crystal_clock) {
-	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ :
 		return f24_mhz;
-	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ :
 		return f19_2_mhz;
-	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ :
 		return f38_4_mhz;
-	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ :
 		return f25_mhz;
 	default:
 		XE_BUG_ON("NOT_POSSIBLE");
@@ -74,8 +74,8 @@ int xe_gt_clock_init(struct xe_gt *gt)
 		 * register increments from this frequency (it might
 		 * increment only every few clock cycle).
 		 */
-		freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
-			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+		freq >>= 3 - ((c0 & RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+			      RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
 	}
 
 	gt->info.clock_freq = freq;
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 5412f77bc26fd..aa04ba5a6dbee 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -177,8 +177,8 @@ static const struct xe_mmio_range dg2_implicit_steering_table[] = {
 static void init_steering_l3bank(struct xe_gt *gt)
 {
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
-		u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
-						xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
+						xe_mmio_read32(gt, MIRROR_FUSE3.reg));
 		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
 					      xe_mmio_read32(gt, XEHP_FUSE4.reg));
 
@@ -190,8 +190,8 @@ static void init_steering_l3bank(struct xe_gt *gt)
 		gt->steering[L3BANK].instance_target =
 			bank_mask & BIT(0) ? 0 : 2;
 	} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
-		u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
-						xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
+						xe_mmio_read32(gt, MIRROR_FUSE3.reg));
 		u32 bank = __ffs(mslice_mask) * 8;
 
 		/*
@@ -202,8 +202,8 @@ static void init_steering_l3bank(struct xe_gt *gt)
 		gt->steering[L3BANK].group_target = (bank >> 2) & 0x7;
 		gt->steering[L3BANK].instance_target = bank & 0x3;
 	} else {
-		u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK,
-					 ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+		u32 fuse = REG_FIELD_GET(L3BANK_MASK,
+					 ~xe_mmio_read32(gt, MIRROR_FUSE3.reg));
 
 		gt->steering[L3BANK].group_target = 0;	/* unused */
 		gt->steering[L3BANK].instance_target = __ffs(fuse);
@@ -212,8 +212,8 @@ static void init_steering_l3bank(struct xe_gt *gt)
 
 static void init_steering_mslice(struct xe_gt *gt)
 {
-	u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
-				 xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+	u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
+				 xe_mmio_read32(gt, MIRROR_FUSE3.reg));
 
 	/*
 	 * mslice registers are valid (not terminated) if either the meml3
@@ -329,8 +329,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 
 	if (xe->info.platform == XE_DG2) {
-		u32 steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, 0) |
-			REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, 2);
+		u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
+			REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
 
 		xe_mmio_write32(gt, MCFG_MCR_SELECTOR.reg, steer_val);
 		xe_mmio_write32(gt, SF_MCR_SELECTOR.reg, steer_val);
@@ -448,9 +448,9 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag
 		steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
 			REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
 	} else {
-		steer_reg = GEN8_MCR_SELECTOR.reg;
-		steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) |
-			REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance);
+		steer_reg = MCR_SELECTOR.reg;
+		steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) |
+			REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance);
 	}
 
 	/*
@@ -461,7 +461,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag
 	 * No need to save old steering reg value.
 	 */
 	if (rw_flag == MCR_OP_READ)
-		steer_val |= GEN11_MCR_MULTICAST;
+		steer_val |= MCR_MULTICAST;
 
 	xe_mmio_write32(gt, steer_reg, steer_val);
 
@@ -477,7 +477,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag
 	 * operation.
 	 */
 	if (rw_flag == MCR_OP_WRITE)
-		xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST);
+		xe_mmio_write32(gt, steer_reg, MCR_MULTICAST);
 
 	return val;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index d18f2e25ce56c..4e9e9b1aad023 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -278,7 +278,7 @@ int xe_guc_init(struct xe_guc *guc)
 	if (xe_gt_is_media_type(gt))
 		guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg;
 	else
-		guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg;
+		guc->notify_reg = GUC_HOST_INTERRUPT.reg;
 
 	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
 
@@ -317,9 +317,9 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
+	xe_mmio_write32(gt, GDRST.reg, GRDOM_GUC);
 
-	ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5000,
+	ret = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_GUC, 5000,
 			     &gdrst, false);
 	if (ret) {
 		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
@@ -362,7 +362,7 @@ static void guc_prepare_xfer(struct xe_guc *guc)
 	/* Must program this register before loading the ucode with DMA */
 	xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags);
 
-	xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE);
+	xe_mmio_write32(gt, GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE);
 }
 
 /*
@@ -575,7 +575,7 @@ int xe_guc_enable_communication(struct xe_guc *guc)
 
 	guc_enable_irq(guc);
 
-	xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg,
+	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK.reg,
 		      ARAT_EXPIRED_INTRMSK, 0);
 
 	err = xe_guc_ct_enable(&guc->ct);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index d4fc2d357a78a..6a723bda2aa93 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -450,10 +450,10 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 		u32 flags;
 		bool skip;
 	} *e, extra_regs[] = {
-		{ .reg = RING_MODE_GEN7(hwe->mmio_base).reg,		},
+		{ .reg = RING_MODE(hwe->mmio_base).reg,		},
 		{ .reg = RING_HWS_PGA(hwe->mmio_base).reg,		},
 		{ .reg = RING_IMR(hwe->mmio_base).reg,			},
-		{ .reg = GEN12_RCU_MODE.reg, .flags = 0x3,
+		{ .reg = RCU_MODE.reg, .flags = 0x3,
 		  .skip = hwe != hwe_rcs_reset_domain			},
 	};
 	u32 i;
@@ -478,7 +478,8 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 	if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
 		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
 			guc_mmio_regset_write_one(ads, regset_map,
-						  GEN9_LNCFCMOCS(i).reg, 0, count++);
+						  LNCFCMOCS(i).reg, 0,
+						  count++);
 		}
 	}
 
@@ -557,11 +558,11 @@ static void guc_doorbell_init(struct xe_guc_ads *ads)
 
 	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
 		u32 distdbreg =
-			xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg);
+			xe_mmio_read32(gt, DIST_DBS_POPULATED.reg);
 
 		ads_blob_write(ads,
 			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
-			       REG_FIELD_GET(GEN12_DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
+			       REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 0b6d0577a8a7b..6d59e36b6e5c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -413,7 +413,7 @@ static ssize_t freq_cur_show(struct device *dev,
 	if (ret)
 		goto out;
 
-	freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg);
+	freq = xe_mmio_read32(gt, RPNSWREQ.reg);
 
 	freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
@@ -588,7 +588,7 @@ static ssize_t rc_status_show(struct device *dev,
 	u32 reg;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
-	reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg);
+	reg = xe_mmio_read32(gt, GT_CORE_STATUS.reg);
 	xe_device_mem_access_put(gt_to_xe(gt));
 
 	switch (REG_FIELD_GET(RCN_MASK, reg)) {
@@ -615,7 +615,7 @@ static ssize_t rc6_residency_show(struct device *dev,
 	if (ret)
 		goto out;
 
-	reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg);
+	reg = xe_mmio_read32(gt, GT_GFX_RC6.reg);
 	ret = sysfs_emit(buff, "%u\n", reg);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
@@ -745,9 +745,9 @@ static int pc_gucrc_disable(struct xe_guc_pc *pc)
 	if (ret)
 		return ret;
 
-	xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0);
-	xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0);
+	xe_mmio_write32(gt, PG_ENABLE.reg, 0);
+	xe_mmio_write32(gt, RC_CONTROL.reg, 0);
+	xe_mmio_write32(gt, RC_STATE.reg, 0);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index a1c3e54faa6e6..55dcaab34ea46 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -84,7 +84,7 @@ int xe_huc_auth(struct xe_huc *huc)
 		goto fail;
 	}
 
-	ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
+	ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO.reg,
 			     HUC_LOAD_SUCCESSFUL,
 			     HUC_LOAD_SUCCESSFUL, 100000, NULL, false);
 	if (ret) {
@@ -126,7 +126,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
 		return;
 
 	drm_printf(p, "\nHuC status: 0x%08x\n",
-		   xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg));
+		   xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO.reg));
 
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 23b9f120c258b..795302bcd3aef 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -110,28 +110,28 @@ static const struct engine_info engine_infos[] = {
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 0,
 		.domain = XE_FW_MEDIA_VDBOX0,
-		.mmio_base = GEN11_BSD_RING_BASE,
+		.mmio_base = BSD_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS1] = {
 		.name = "vcs1",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 1,
 		.domain = XE_FW_MEDIA_VDBOX1,
-		.mmio_base = GEN11_BSD2_RING_BASE,
+		.mmio_base = BSD2_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS2] = {
 		.name = "vcs2",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 2,
 		.domain = XE_FW_MEDIA_VDBOX2,
-		.mmio_base = GEN11_BSD3_RING_BASE,
+		.mmio_base = BSD3_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS3] = {
 		.name = "vcs3",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 3,
 		.domain = XE_FW_MEDIA_VDBOX3,
-		.mmio_base = GEN11_BSD4_RING_BASE,
+		.mmio_base = BSD4_RING_BASE,
 	},
 	[XE_HW_ENGINE_VCS4] = {
 		.name = "vcs4",
@@ -166,14 +166,14 @@ static const struct engine_info engine_infos[] = {
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 0,
 		.domain = XE_FW_MEDIA_VEBOX0,
-		.mmio_base = GEN11_VEBOX_RING_BASE,
+		.mmio_base = VEBOX_RING_BASE,
 	},
 	[XE_HW_ENGINE_VECS1] = {
 		.name = "vecs1",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 1,
 		.domain = XE_FW_MEDIA_VEBOX1,
-		.mmio_base = GEN11_VEBOX2_RING_BASE,
+		.mmio_base = VEBOX2_RING_BASE,
 	},
 	[XE_HW_ENGINE_VECS2] = {
 		.name = "vecs2",
@@ -194,28 +194,28 @@ static const struct engine_info engine_infos[] = {
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 0,
 		.domain = XE_FW_RENDER,
-		.mmio_base = GEN12_COMPUTE0_RING_BASE,
+		.mmio_base = COMPUTE0_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS1] = {
 		.name = "ccs1",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 1,
 		.domain = XE_FW_RENDER,
-		.mmio_base = GEN12_COMPUTE1_RING_BASE,
+		.mmio_base = COMPUTE1_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS2] = {
 		.name = "ccs2",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 2,
 		.domain = XE_FW_RENDER,
-		.mmio_base = GEN12_COMPUTE2_RING_BASE,
+		.mmio_base = COMPUTE2_RING_BASE,
 	},
 	[XE_HW_ENGINE_CCS3] = {
 		.name = "ccs3",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 3,
 		.domain = XE_FW_RENDER,
-		.mmio_base = GEN12_COMPUTE3_RING_BASE,
+		.mmio_base = COMPUTE3_RING_BASE,
 	},
 };
 
@@ -254,14 +254,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
-		xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
-				_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+		xe_mmio_write32(hwe->gt, RCU_MODE.reg,
+				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 
 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0);
 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg,
 			       xe_bo_ggtt_addr(hwe->hwsp));
-	hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg,
-			       _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+	hw_engine_mmio_write32(hwe, RING_MODE(0).reg,
+			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg,
 			       _MASKED_BIT_DISABLE(STOP_RING));
 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
@@ -379,7 +379,7 @@ static void read_media_fuses(struct xe_gt *gt)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg);
+	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE.reg);
 
 	/*
 	 * Pre-Xe_HP platforms had register bits representing absent engines,
@@ -390,8 +390,8 @@ static void read_media_fuses(struct xe_gt *gt)
 	if (GRAPHICS_VERx100(xe) < 1250)
 		media_fuse = ~media_fuse;
 
-	vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse);
-	vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse);
+	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
+	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
 
 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
 		if (!(gt->info.engine_mask & BIT(i)))
@@ -421,8 +421,8 @@ static void read_copy_fuses(struct xe_gt *gt)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg);
-	bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask);
+	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3.reg);
+	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
 
 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
@@ -546,7 +546,7 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg));
 	drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg));
+		hw_engine_mmio_read32(hwe, RING_MODE(0).reg));
 
 	drm_printf(p, "\tRING_IMR:   0x%08x\n",
 		hw_engine_mmio_read32(hwe, RING_IMR(0).reg));
@@ -573,8 +573,8 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
 		hw_engine_mmio_read32(hwe, IPEHR(0).reg));
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
-		drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n",
-			xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg));
+		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
+			xe_mmio_read32(hwe->gt, RCU_MODE.reg));
 
 }
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 5536f84682c03..9b466803c68e4 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -158,7 +158,8 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si
 	if (!xe->info.has_flat_ccs)  {
 		*vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 		if (usable_size)
-			*usable_size = min(*vram_size, xe_mmio_read64(gt, GEN12_GSMBASE.reg));
+			*usable_size = min(*vram_size,
+					   xe_mmio_read64(gt, GSMBASE.reg));
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index e09c6242aafc0..67c63facdbf94 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -512,8 +512,9 @@ static void init_l3cc_table(struct xe_gt *gt,
 	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
 				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc);
-		xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc);
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).reg,
+			 l3cc);
+		xe_mmio_write32(gt, LNCFCMOCS(i).reg, l3cc);
 	}
 }
 
@@ -531,7 +532,7 @@ void xe_mocs_init(struct xe_gt *gt)
 	gt->mocs.wb_index = table.wb_index;
 
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg);
+		__init_mocs_table(gt, &table, GLOBAL_MOCS(0).reg);
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index c4b3a20452996..5a26657069121 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -31,7 +31,7 @@ static const struct xe_rtp_entry register_whitelist[] = {
 	},
 	{ XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(WHITELIST(GEN7_COMMON_SLICE_CHICKEN1, 0))
+	  XE_RTP_ACTIONS(WHITELIST(COMMON_SLICE_CHICKEN1, 0))
 	},
 	{ XE_RTP_NAME("1806527549"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 5480746d40e81..4c5f46f892416 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -85,7 +85,7 @@ static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 
 static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
 {
-	dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag;
+	dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag;
 	dw[i++] = lower_32_bits(batch_addr);
 	dw[i++] = upper_32_bits(batch_addr);
 
@@ -202,9 +202,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	/* Wa_1809175790 */
 	if (!xe->info.has_flat_ccs) {
 		if (decode)
-			i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i);
+			i = emit_aux_table_inv(gt, VD0_AUX_INV.reg, dw, i);
 		else
-			i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i);
+			i = emit_aux_table_inv(gt, VE0_AUX_INV.reg, dw, i);
 	}
 	dw[i++] = preparser_disable(false);
 
@@ -246,7 +246,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	i = emit_pipe_invalidate(mask_flags, dw, i);
 	/* Wa_1809175790 */
 	if (!xe->info.has_flat_ccs)
-		i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i);
+		i = emit_aux_table_inv(gt, CCS_AUX_INV.reg, dw, i);
 	dw[i++] = preparser_disable(false);
 
 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 31887fec1073b..9ce0a05855396 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -65,7 +65,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	}
 
 	/* Use DSM base address instead for stolen memory */
-	mgr->stolen_base = xe_mmio_read64(gt, GEN12_DSMBASE.reg) & GEN12_BDSM_MASK;
+	mgr->stolen_base = xe_mmio_read64(gt, DSMBASE.reg) & BDSM_MASK;
 	if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 27cf1330facd6..43912312cfba7 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -33,7 +33,7 @@ static const struct xe_rtp_entry lrc_tunings[] = {
 	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  /* read verification is ignored due to 1608008084. */
-	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2,
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2,
 						FF_MODE2_GS_TIMER_MASK,
 						FF_MODE2_GS_TIMER_224))
 	},
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index a7d681b7538d4..7a9bf588301e9 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -105,7 +105,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	},
 	{ XE_RTP_NAME("14011059788"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
+	  XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
 	},
 
 	/* DG1 */
@@ -116,7 +116,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	},
 	{ XE_RTP_NAME("1408615072"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL))
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE2_DIS))
 	},
 
 	/* DG2 */
@@ -134,7 +134,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	},
 	{ XE_RTP_NAME("14011006942"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
-	  XE_RTP_ACTIONS(SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14012362059"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
@@ -197,7 +197,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	},
 	{ XE_RTP_NAME("14015795083"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
 	{ XE_RTP_NAME("18018781329"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
@@ -221,7 +221,7 @@ static const struct xe_rtp_entry gt_was[] = {
 
 	{ XE_RTP_NAME("14015795083"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
-	  XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE))
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
 	{ XE_RTP_NAME("18018781329"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
@@ -241,42 +241,42 @@ static const struct xe_rtp_entry gt_was[] = {
 static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN7_FF_THREAD_MODE,
-			     GEN12_FF_TESSELATION_DOP_GATE_DISABLE))
+	  XE_RTP_ACTIONS(SET(FF_THREAD_MODE,
+			     FF_TESSELATION_DOP_GATE_DISABLE))
 	},
 	{ XE_RTP_NAME("1409804808"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14010229206, 1409085225"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1606931601"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1406941453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL,
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1,
-			     GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1,
+			     FFSC_PERCTX_PREEMPT_CTRL,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 
@@ -285,8 +285,8 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
-			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 
@@ -295,8 +295,8 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
-			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 
@@ -305,8 +305,8 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
 	  XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
-			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-			     GEN8_RC_SEMA_IDLE_MSG_DISABLE,
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 
@@ -366,7 +366,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("16015675438"),
@@ -405,36 +405,36 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("1509727124"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB,
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22012856258"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14013392000"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14012419201"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4,
-			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14012419201"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4,
-			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1308578152"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER),
 		       FUNC(xe_rtp_match_first_gslice_fused_off)),
-	  XE_RTP_ACTIONS(CLR(GEN9_CS_DEBUG_MODE1,
-			     GEN12_REPLAY_MODE_GRANULARITY,
+	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1,
+			     REPLAY_MODE_GRANULARITY,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22010960976, 14013347512"),
@@ -445,14 +445,14 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("1608949956, 14010198302"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN,
 			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22010430635"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4,
-			     GEN12_DISABLE_GRF_CLEAR,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_GRF_CLEAR,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("14013202645"),
@@ -465,13 +465,13 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("22012532006"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7,
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
 			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("22012532006"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7,
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
 			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
@@ -507,7 +507,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("16015675438"),
@@ -526,15 +526,15 @@ static const struct xe_rtp_entry engine_was[] = {
 static const struct xe_rtp_entry lrc_was[] = {
 	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_ACTIONS(SET(GEN11_COMMON_SLICE_CHICKEN3,
-			     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3,
+			     DISABLE_CPS_AWARE_COLOR_PIPE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_ACTIONS(FIELD_SET(GEN8_CS_CHICKEN1,
-				   GEN9_PREEMPT_GPGPU_LEVEL_MASK,
-				   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
+	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1,
+				   PREEMPT_GPGPU_LEVEL_MASK,
+				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
 				   XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("1806527549"),
@@ -552,7 +552,7 @@ static const struct xe_rtp_entry lrc_was[] = {
 
 	{ XE_RTP_NAME("1409044764"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
-	  XE_RTP_ACTIONS(CLR(GEN11_COMMON_SLICE_CHICKEN3,
+	  XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3,
 			     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
@@ -581,7 +581,7 @@ static const struct xe_rtp_entry lrc_was[] = {
 	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+			     DISABLE_CPS_AWARE_COLOR_PIPE,
 			     XE_RTP_ACTION_FLAG(MASKED_REG)))
 	},
 	{ XE_RTP_NAME("16013271637"),
-- 
GitLab


From 5f230a144a33d9a33448063a23d65c53b6d84cea Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:49 -0700
Subject: [PATCH 1508/2340] drm/xe: Use REG_FIELD/REG_BIT for all regs/*.h

Convert the macro declarations to the equivalent GENMASK and
and bitfield prep for all registers.

v2 (Matt Roper):
  - Fix wrong conversion of RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK
  - Reorder fields of XEHP_SLICE_UNIT_LEVEL_CLKGATE for consistency
  - Simplify CTC_SOURCE_* by only defining CTC_SOURCE_DIVIDE_LOGIC
    as REG_BIT(0)

v3: Also remove DOP_CLOCK_GATE_ENABLE that is unused and wrongly defined

Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 24 +++++-----
 drivers/gpu/drm/xe/regs/xe_gt_regs.h     | 58 ++++++++++--------------
 drivers/gpu/drm/xe/regs/xe_regs.h        | 12 ++---
 drivers/gpu/drm/xe/xe_gt_clock.c         | 19 ++++----
 4 files changed, 50 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 9d61f59412893..f1e75703e4bce 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -22,7 +22,7 @@
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 
 #define RING_PSMI_CTL(base)			_MMIO((base) + 0x50)
-#define   RC_SEMA_IDLE_MSG_DISABLE			REG_BIT(12)
+#define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
 #define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
 
 #define RING_ACTHD_UDW(base)			_MMIO((base) + 0x5c)
@@ -54,7 +54,7 @@
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
 
 #define RING_MODE(base)				_MMIO((base) + 0x29c)
-#define   GFX_DISABLE_LEGACY_MODE		(1 << 3)
+#define   GFX_DISABLE_LEGACY_MODE		REG_BIT(3)
 
 #define RING_TIMESTAMP(base)			_MMIO((base) + 0x358)
 
@@ -68,17 +68,17 @@
 
 #define RING_FORCE_TO_NONPRIV(base, i)		_MMIO(((base) + 0x4d0) + (i) * 4)
 #define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	REG_GENMASK(29, 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RW	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RD	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 1)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_WR	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 2)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_INVALID	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 3)
 #define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK	REG_GENMASK(25, 2)
-#define   RING_FORCE_TO_NONPRIV_ACCESS_RW	(0 << 28)
-#define   RING_FORCE_TO_NONPRIV_ACCESS_RD	(1 << 28)
-#define   RING_FORCE_TO_NONPRIV_ACCESS_WR	(2 << 28)
-#define   RING_FORCE_TO_NONPRIV_ACCESS_INVALID	(3 << 28)
-#define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	(3 << 28)
-#define   RING_FORCE_TO_NONPRIV_RANGE_1		(0 << 0)
-#define   RING_FORCE_TO_NONPRIV_RANGE_4		(1 << 0)
-#define   RING_FORCE_TO_NONPRIV_RANGE_16	(2 << 0)
-#define   RING_FORCE_TO_NONPRIV_RANGE_64	(3 << 0)
-#define   RING_FORCE_TO_NONPRIV_RANGE_MASK	(3 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_MASK	REG_GENMASK(1, 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_1		REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_4		REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 1)
+#define   RING_FORCE_TO_NONPRIV_RANGE_16	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 2)
+#define   RING_FORCE_TO_NONPRIV_RANGE_64	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 3)
 #define   RING_FORCE_TO_NONPRIV_MASK_VALID	(RING_FORCE_TO_NONPRIV_RANGE_MASK | \
 						 RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
 						 RING_FORCE_TO_NONPRIV_DENY)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5a0a08c84f3de..657b8cc961bb0 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -9,15 +9,13 @@
 #include "regs/xe_reg_defs.h"
 
 /* RPM unit config (Gen8+) */
-#define RPM_CONFIG0				_MMIO(0xd00)
-#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
-#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
+#define RPM_CONFIG0					_MMIO(0xd00)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK		REG_GENMASK(5, 3)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ		0
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
-#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
-#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
-#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ		3
+#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
 
 #define FORCEWAKE_ACK_MEDIA_VDBOX(n)		_MMIO(0xd50 + (n) * 4)
 #define FORCEWAKE_ACK_MEDIA_VEBOX(n)		_MMIO(0xd70 + (n) * 4)
@@ -39,15 +37,15 @@
 #define MCR_SELECTOR				_MMIO(0xfdc)
 #define GAM_MCR_SELECTOR			_MMIO(0xfe0)
 #define   MCR_MULTICAST				REG_BIT(31)
-#define   MCR_SLICE(slice)			(((slice) & 0xf) << 27)
-#define   MCR_SLICE_MASK			MCR_SLICE(0xf)
-#define   MCR_SUBSLICE(subslice)		(((subslice) & 0x7) << 24)
-#define   MCR_SUBSLICE_MASK			MCR_SUBSLICE(0x7)
+#define   MCR_SLICE_MASK			REG_GENMASK(30, 27)
+#define   MCR_SLICE(slice)			REG_FIELD_PREP(MCR_SLICE_MASK, slice)
+#define   MCR_SUBSLICE_MASK			REG_GENMASK(26, 24)
+#define   MCR_SUBSLICE(subslice)		REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice)
 #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
 #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
 
 #define FF_SLICE_CS_CHICKEN1			_MMIO(0x20e0)
-#define   FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
+#define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
 
 #define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
@@ -59,12 +57,12 @@
 #define PS_INVOCATION_COUNT			_MMIO(0x2348)
 
 #define CS_CHICKEN1				_MMIO(0x2580)
-#define   PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
 #define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
 #define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
 #define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
 #define   PREEMPT_GPGPU_COMMAND_LEVEL		PREEMPT_GPGPU_LEVEL(1, 0)
 #define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
+#define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
 
 #define GLOBAL_MOCS(i)				_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
 #define CCS_AUX_INV				_MMIO(0x4208)
@@ -131,19 +129,18 @@
 
 #define	MIRROR_FUSE3				_MMIO(0x9118)
 #define   L3BANK_PAIR_COUNT			4
-#define   L3BANK_MASK				0x0F
+#define   L3BANK_MASK				REG_GENMASK(3, 0)
 /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
 #define   MAX_MSLICES				4
-#define   MEML3_EN_MASK				0x0F
+#define   MEML3_EN_MASK				REG_GENMASK(3, 0)
 
 /* Fuse readout registers for GT */
 #define XEHP_FUSE4				_MMIO(0x9114)
 #define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
 
 #define GT_VEBOX_VDBOX_DISABLE			_MMIO(0x9140)
-#define   GT_VDBOX_DISABLE_MASK			0xff
-#define   GT_VEBOX_DISABLE_SHIFT		16
-#define   GT_VEBOX_DISABLE_MASK			(0x0f << GT_VEBOX_DISABLE_SHIFT)
+#define   GT_VEBOX_DISABLE_MASK			REG_GENMASK(19, 16)
+#define   GT_VDBOX_DISABLE_MASK			REG_GENMASK(7, 0)
 
 #define XELP_EU_ENABLE				_MMIO(0x9134)	/* "_DISABLE" on Xe_LP */
 #define   XELP_EU_MASK				REG_GENMASK(7, 0)
@@ -157,7 +154,6 @@
 
 #define MISCCPCTL				_MMIO(0x9424)
 #define   DOP_CLOCK_GATE_RENDER_ENABLE		REG_BIT(1)
-#define   DOP_CLOCK_GATE_ENABLE			REG_BIT((0)
 
 #define UNSLCGCTL9430				_MMIO(0x9430)
 #define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
@@ -203,12 +199,12 @@
 #define   LTCDD_CLKGATE_DIS			REG_BIT(10)
 
 #define XEHP_SLICE_UNIT_LEVEL_CLKGATE		MCR_REG(0x94d4)
-#define   SARBUNIT_CLKGATE_DIS			(1 << 5)
-#define   RCCUNIT_CLKGATE_DIS			(1 << 7)
-#define   MSCUNIT_CLKGATE_DIS			(1 << 10)
-#define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
-#define   L3_CLKGATE_DIS			REG_BIT(16)
 #define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
+#define   L3_CLKGATE_DIS			REG_BIT(16)
+#define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
+#define   MSCUNIT_CLKGATE_DIS			REG_BIT(10)
+#define   RCCUNIT_CLKGATE_DIS			REG_BIT(7)
+#define   SARBUNIT_CLKGATE_DIS			REG_BIT(5)
 
 #define UNSLICE_UNIT_LEVEL_CLKGATE2		_MMIO(0x94e4)
 #define   VSUNIT_CLKGATE2_DIS			REG_BIT(19)
@@ -224,7 +220,7 @@
 #define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
 
 #define DFR_RATIO_EN_AND_CHICKEN		MCR_REG(0x9550)
-#define   DFR_DISABLE				(1 << 9)
+#define   DFR_DISABLE				REG_BIT(9)
 
 #define RPNSWREQ				_MMIO(0xa008)
 #define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
@@ -232,20 +228,16 @@
 #define RC_STATE				_MMIO(0xa094)
 
 #define PMINTRMSK				_MMIO(0xa168)
-#define   PMINTR_DISABLE_REDIRECT_TO_GUC	(1 << 31)
-#define   ARAT_EXPIRED_INTRMSK			(1 << 9)
+#define   PMINTR_DISABLE_REDIRECT_TO_GUC	REG_BIT(31)
+#define   ARAT_EXPIRED_INTRMSK			REG_BIT(9)
 
 #define FORCEWAKE_GT				_MMIO(0xa188)
 
 #define PG_ENABLE				_MMIO(0xa210)
 
-/* GPM unit config (Gen9+) */
 #define CTC_MODE				_MMIO(0xa26c)
-#define   CTC_SOURCE_PARAMETER_MASK		1
-#define   CTC_SOURCE_CRYSTAL_CLOCK		0
-#define   CTC_SOURCE_DIVIDE_LOGIC		1
-#define   CTC_SHIFT_PARAMETER_SHIFT		1
-#define   CTC_SHIFT_PARAMETER_MASK		(0x3 << CTC_SHIFT_PARAMETER_SHIFT)
+#define   CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
+#define   CTC_SOURCE_DIVIDE_LOGIC		REG_BIT(0)
 
 #define FORCEWAKE_RENDER			_MMIO(0xa278)
 #define FORCEWAKE_MEDIA_VDBOX(n)		_MMIO(0xa540 + (n) * 4)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 50fc3c4690865..9d18430fd2252 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -37,10 +37,10 @@
 #define XEHPC_BCS7_RING_BASE			0x3ec000
 #define XEHPC_BCS8_RING_BASE			0x3ee000
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
-#define   GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
-#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
+#define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
+#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
 #define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
-#define   GT_RENDER_USER_INTERRUPT		(1 <<  0)
+#define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
 
 #define FF_THREAD_MODE				_MMIO(0x20a0)
 #define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
@@ -86,10 +86,8 @@
 #define   DG1_MSTR_TILE(t)			REG_BIT(t)
 
 #define TIMESTAMP_OVERRIDE					_MMIO(0x44074)
-#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT		0
-#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		0x3ff
-#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
-#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
 
 #define GGC					_MMIO(0x108040)
 #define   GMS_MASK				REG_GENMASK(15, 8)
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 1b7d002845350..49625d49bdcc1 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -17,13 +17,12 @@ static u32 read_reference_ts_freq(struct xe_gt *gt)
 	u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE.reg);
 	u32 base_freq, frac_freq;
 
-	base_freq = ((ts_override & TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
-		     TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
+	base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
+				  ts_override) + 1;
 	base_freq *= 1000000;
 
-	frac_freq = ((ts_override &
-		      TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
-		     TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
+	frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK,
+				  ts_override);
 	frac_freq = 1000000 / (frac_freq + 1);
 
 	return base_freq + frac_freq;
@@ -35,9 +34,8 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
 	const u32 f24_mhz = 24000000;
 	const u32 f25_mhz = 25000000;
 	const u32 f38_4_mhz = 38400000;
-	u32 crystal_clock =
-		(rpm_config_reg & RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
-		RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
+					  rpm_config_reg);
 
 	switch (crystal_clock) {
 	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ :
@@ -62,7 +60,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
 	/* Assuming gen11+ so assert this assumption is correct */
 	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
 
-	if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
+	if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
 		freq = read_reference_ts_freq(gt);
 	} else {
 		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg);
@@ -74,8 +72,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
 		 * register increments from this frequency (it might
 		 * increment only every few clock cycle).
 		 */
-		freq >>= 3 - ((c0 & RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
-			      RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+		freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
 	}
 
 	gt->info.clock_freq = freq;
-- 
GitLab


From 143e3bc7832f85676d0e4235d4238f0c9b0682da Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:50 -0700
Subject: [PATCH 1509/2340] drm/xe: Clarify register types on PAT programming

Clarify a few things related to the PAT programming, particularly on
MTL:

	- The register type doesn't change depending on the GT - what
	  happens is that media GT writes to other set of registers that
	  are not MCR
	- Remove "UNICAST": otherwise it's confusing why it's not using
	  MCR registers with the unicast function variant

Also, there isn't much reason to keep those parts as macros: promote
them to proper functions and let the compiler inline if it sees fit.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index c2faf0931649f..fcf6ae2c92ccb 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -62,31 +62,36 @@ static const u32 mtl_pat_table[] = {
 	[4] = MTL_PAT_0_WB | MTL_3_COH_2W,
 };
 
-#define PROGRAM_PAT_UNICAST(gt, table) do { \
-	for (int i = 0; i < ARRAY_SIZE(table); i++) \
-		xe_mmio_write32(gt, _PAT_INDEX(i), table[i]); \
-} while (0)
+static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
+{
+	for (int i = 0; i < n_entries; i++)
+		xe_mmio_write32(gt, _PAT_INDEX(i), table[i]);
+}
 
-#define PROGRAM_PAT_MCR(gt, table) do { \
-	for (int i = 0; i < ARRAY_SIZE(table); i++) \
-		xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]); \
-} while (0)
+static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
+{
+	for (int i = 0; i < n_entries; i++)
+		xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]);
+}
 
 void xe_pat_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
 	if (xe->info.platform == XE_METEORLAKE) {
+		/*
+		 * SAMedia register offsets are adjusted by the write methods
+		 * and they target registers that are not MCR, while for normal
+		 * GT they are MCR
+		 */
 		if (xe_gt_is_media_type(gt))
-			PROGRAM_PAT_UNICAST(gt, mtl_pat_table);
+			program_pat(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table));
 		else
-			PROGRAM_PAT_MCR(gt, mtl_pat_table);
-	} else if (xe->info.platform == XE_PVC) {
-		PROGRAM_PAT_MCR(gt, pvc_pat_table);
-	} else if (xe->info.platform == XE_DG2) {
-		PROGRAM_PAT_MCR(gt, pvc_pat_table);
+			program_pat_mcr(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table));
+	} else if (xe->info.platform == XE_PVC || xe->info.platform == XE_DG2) {
+		program_pat_mcr(gt, pvc_pat_table, ARRAY_SIZE(pvc_pat_table));
 	} else if (GRAPHICS_VERx100(xe) <= 1210) {
-		PROGRAM_PAT_UNICAST(gt, tgl_pat_table);
+		program_pat(gt, tgl_pat_table, ARRAY_SIZE(tgl_pat_table));
 	} else {
 		/*
 		 * Going forward we expect to need new PAT settings for most
-- 
GitLab


From 36e22be498fb8361ef411ac7d8cf9404338f6fc2 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:52 -0700
Subject: [PATCH 1510/2340] drm/xe: Introduce xe_reg/xe_reg_mcr

Stop using i915 types for registers. Use our own types. Differently from
i915, this will keep under the register definition the knowledge for the
different types of registers. For now, the "flags"/"options" are mcr and
masked, although only the former is being used.

Additionally MCR registers have their own type. The only place that
should really look inside a xe_mcr_reg_t is that code dealing with the
steering and using other APIs when the register is MCR has been a source
of problem in the past.

Most of the driver is agnostic to the register differences since they
either use the definition from the header or already call the correct
MCR_REG()/_MMIO() macros. By embeding the struct xe_reg inside the
struct it's also possible to guarantee the compiler will break if
using RANDOM_MCR_REG.reg is attempted, since now the u32 is inside the
inner struct.

v2:
  - Deep a dedicated type for MCR registers to avoid misuse
    (Matt Roper, Jani)
  - Drop the typedef and just use a struct since it's not an opaque type
    (Jani)
  - Add more kernel-doc
v3:
  - Use only 22 bits for the register address since all the platforms
    supported so far have only 4MB of MMIO per tile  (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-7-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h | 95 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_mcr.c        | 44 ++++++++-----
 drivers/gpu/drm/xe/xe_gt_mcr.h        | 11 ++--
 drivers/gpu/drm/xe/xe_irq.c           |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c          |  2 +-
 5 files changed, 131 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 5f6735697d9c2..e31137e2c42f2 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -8,4 +8,99 @@
 
 #include "../../i915/i915_reg_defs.h"
 
+/**
+ * struct xe_reg - Register definition
+ *
+ * Register defintion to be used by the individual register. Although the same
+ * definition is used for xe_reg and xe_reg_mcr, they use different internal
+ * APIs for accesses.
+ */
+struct xe_reg {
+	union {
+		struct {
+			/** @reg: address */
+			u32 reg:22;
+			/**
+			 * @masked: register is "masked", with upper 16bits used
+			 * to identify the bits that are updated on the lower
+			 * bits
+			 */
+			u32 masked:1;
+			/**
+			 * @mcr: register is multicast/replicated in the
+			 * hardware and needs special handling. Any register
+			 * with this set should also use a type of xe_reg_mcr_t.
+			 * It's only here so the few places that deal with MCR
+			 * registers specially (xe_sr.c) and tests using the raw
+			 * value can inspect it.
+			 */
+			u32 mcr:1;
+		};
+		/** @raw: Raw value with both address and options */
+		u32 raw;
+	};
+};
+
+/**
+ * struct xe_reg_mcr - MCR register definition
+ *
+ * MCR register is the same as a regular register, but uses another type since
+ * the internal API used for accessing them is different: it's never correct to
+ * use regular MMIO access.
+ */
+struct xe_reg_mcr {
+	/** @__reg: The register */
+	struct xe_reg __reg;
+};
+
+
+/**
+ * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the
+ * read/written bits on the lower 16 bits.
+ *
+ * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER()
+ */
+#define XE_REG_OPTION_MASKED		.masked = 1
+
+/**
+ * XE_REG_INITIALIZER - Initializer for xe_reg_t.
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ *
+ * Register field is mandatory, and additional options may be passed as
+ * arguments. Usually ``XE_REG()`` should be preferred since it creates an
+ * object of the right type. However when initializing static const storage,
+ * where a compound statement is not allowed, this can be used instead.
+ */
+#define XE_REG_INITIALIZER(r_, ...)    { .reg = r_, __VA_ARGS__ }
+
+
+/**
+ * XE_REG - Create a struct xe_reg from offset and additional flags
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG(r_, ...)		((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__))
+
+/**
+ * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG_MCR(r_, ...)	((const struct xe_reg_mcr){					\
+				 .__reg = XE_REG_INITIALIZER(r_,  ##__VA_ARGS__, .mcr = 1)	\
+				 })
+
+/*
+ * TODO: remove these once the register declarations are not using them anymore
+ */
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(r_)	((const struct xe_reg){ .reg = r_ })
+#define MCR_REG(r_)	((const struct xe_reg_mcr){ .__reg.reg = r_,		\
+						    .__reg.mcr = 1 })
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index aa04ba5a6dbee..55b240a5eaa79 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -40,6 +40,11 @@
  * non-terminated instance.
  */
 
+static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
+{
+	return reg_mcr.__reg;
+}
+
 enum {
 	MCR_OP_READ,
 	MCR_OP_WRITE
@@ -360,9 +365,10 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
  * returned.  Returns false if the caller need not perform any steering
  */
 static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
-						 i915_mcr_reg_t reg,
+						 struct xe_reg_mcr reg_mcr,
 						 u8 *group, u8 *instance)
 {
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
 	const struct xe_mmio_range *implicit_ranges;
 
 	for (int type = 0; type < IMPLICIT_STEERING; type++) {
@@ -436,9 +442,10 @@ static void mcr_unlock(struct xe_gt *gt) {
  *
  * Caller needs to make sure the relevant forcewake wells are up.
  */
-static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag,
-				int group, int instance, u32 value)
+static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+				u8 rw_flag, int group, int instance, u32 value)
 {
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
 	u32 steer_reg, steer_val, val = 0;
 
 	lockdep_assert_held(&gt->mcr_lock);
@@ -485,7 +492,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag
 /**
  * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register
  * @gt: GT structure
- * @reg: register to read
+ * @reg_mcr: register to read
  *
  * Reads a GT MCR register.  The read will be steered to a non-terminated
  * instance (i.e., one that isn't fused off or powered down by power gating).
@@ -494,17 +501,19 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag
  *
  * Returns the value from a non-terminated instance of @reg.
  */
-u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
 {
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
 	u8 group, instance;
 	u32 val;
 	bool steer;
 
-	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
+	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr,
+						     &group, &instance);
 
 	if (steer) {
 		mcr_lock(gt);
-		val = rw_with_mcr_steering(gt, reg, MCR_OP_READ,
+		val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ,
 					   group, instance, 0);
 		mcr_unlock(gt);
 	} else {
@@ -517,7 +526,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
 /**
  * xe_gt_mcr_unicast_read - read a specific instance of an MCR register
  * @gt: GT structure
- * @reg: the MCR register to read
+ * @reg_mcr: the MCR register to read
  * @group: the MCR group
  * @instance: the MCR instance
  *
@@ -525,13 +534,13 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
  * group/instance.
  */
 u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
-			   i915_mcr_reg_t reg,
+			   struct xe_reg_mcr reg_mcr,
 			   int group, int instance)
 {
 	u32 val;
 
 	mcr_lock(gt);
-	val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0);
+	val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0);
 	mcr_unlock(gt);
 
 	return val;
@@ -540,7 +549,7 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
 /**
  * xe_gt_mcr_unicast_write - write a specific instance of an MCR register
  * @gt: GT structure
- * @reg: the MCR register to write
+ * @reg_mcr: the MCR register to write
  * @value: value to write
  * @group: the MCR group
  * @instance: the MCR instance
@@ -548,24 +557,27 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
  * Write an MCR register in unicast mode after steering toward a specific
  * group/instance.
  */
-void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
-			     int group, int instance)
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+			     u32 value, int group, int instance)
 {
 	mcr_lock(gt);
-	rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value);
+	rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value);
 	mcr_unlock(gt);
 }
 
 /**
  * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register
  * @gt: GT structure
- * @reg: the MCR register to write
+ * @reg_mcr: the MCR register to write
  * @value: value to write
  *
  * Write an MCR register in multicast mode to update all instances.
  */
-void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value)
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+			       u32 value)
 {
+	struct xe_reg reg = to_xe_reg(reg_mcr);
+
 	/*
 	 * Synchronize with any unicast operations.  Once we have exclusive
 	 * access, the MULTICAST bit should already be set, so there's no need
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index 2a6cd38c8cb75..27ca1bc880a00 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -15,13 +15,14 @@ void xe_gt_mcr_init(struct xe_gt *gt);
 
 void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt);
 
-u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg,
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
 			   int group, int instance);
-u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg);
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr mcr_reg);
 
-void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
-			     int group, int instance);
-void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value);
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			     u32 value, int group, int instance);
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			       u32 value);
 
 void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index e812a5b66a6b6..9dd730d707e5c 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -26,7 +26,7 @@
 #define IIR(offset)				_MMIO(offset + 0x8)
 #define IER(offset)				_MMIO(offset + 0xc)
 
-static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
+static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg)
 {
 	u32 val = xe_mmio_read32(gt, reg.reg);
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 9b466803c68e4..24a3c18421449 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -393,7 +393,7 @@ int xe_mmio_init(struct xe_device *xe)
 	DRM_XE_MMIO_READ |\
 	DRM_XE_MMIO_WRITE)
 
-static const i915_reg_t mmio_read_whitelist[] = {
+static const struct xe_reg mmio_read_whitelist[] = {
 	RING_TIMESTAMP(RENDER_RING_BASE),
 };
 
-- 
GitLab


From 3512a78a3cefcd9ec0177771f637de0fe4a64ea2 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:53 -0700
Subject: [PATCH 1511/2340] drm/xe: Use XE_REG/XE_REG_MCR

These should replace the _MMIO() and MCR_REG() from i915, with the goal
of being more extensible, allowing to pass the additional fields for
struct xe_reg and struct xe_reg_mcr. Replace all uses of _MMIO() and
MCR_REG() in xe.

Since the RTP, reg-save-restore and WA infra are not ready to use the
new type, just undef the macro like was done for the i915 types
previously. That conversion will come later.

v2: Remove MEDIA_SOFT_SCRATCH_COUNT/MEDIA_SOFT_SCRATCH re-added by
    mistake (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-8-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  74 +++----
 drivers/gpu/drm/xe/regs/xe_gt_regs.h     | 235 ++++++++++++-----------
 drivers/gpu/drm/xe/regs/xe_guc_regs.h    |  64 +++---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h    |   9 -
 drivers/gpu/drm/xe/regs/xe_regs.h        |  28 +--
 drivers/gpu/drm/xe/tests/xe_rtp_test.c   |  20 +-
 drivers/gpu/drm/xe/xe_ggtt.c             |   6 +-
 drivers/gpu/drm/xe/xe_guc.c              |   4 +-
 drivers/gpu/drm/xe/xe_guc_pc.c           |  14 +-
 drivers/gpu/drm/xe/xe_irq.c              |   6 +-
 drivers/gpu/drm/xe/xe_mmio.c             |   2 +-
 drivers/gpu/drm/xe/xe_mocs.c             |   4 +-
 drivers/gpu/drm/xe/xe_pat.c              |   2 +-
 drivers/gpu/drm/xe/xe_pcode_api.h        |   6 +-
 drivers/gpu/drm/xe/xe_reg_sr.c           |   4 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c    |  12 +-
 drivers/gpu/drm/xe/xe_rtp.h              |  10 +-
 drivers/gpu/drm/xe/xe_tuning.c           |   8 +-
 drivers/gpu/drm/xe/xe_wa.c               |   8 +-
 19 files changed, 254 insertions(+), 262 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index f1e75703e4bce..80b66844a8ec4 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -10,63 +10,63 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define RING_TAIL(base)				_MMIO((base) + 0x30)
+#define RING_TAIL(base)				XE_REG((base) + 0x30)
 
-#define RING_HEAD(base)				_MMIO((base) + 0x34)
+#define RING_HEAD(base)				XE_REG((base) + 0x34)
 #define   HEAD_ADDR				0x001FFFFC
 
-#define RING_START(base)			_MMIO((base) + 0x38)
+#define RING_START(base)			XE_REG((base) + 0x38)
 
-#define RING_CTL(base)				_MMIO((base) + 0x3c)
+#define RING_CTL(base)				XE_REG((base) + 0x3c)
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 
-#define RING_PSMI_CTL(base)			_MMIO((base) + 0x50)
+#define RING_PSMI_CTL(base)			XE_REG((base) + 0x50)
 #define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
 #define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
 
-#define RING_ACTHD_UDW(base)			_MMIO((base) + 0x5c)
-#define RING_DMA_FADD_UDW(base)			_MMIO((base) + 0x60)
-#define RING_IPEIR(base)			_MMIO((base) + 0x64)
-#define RING_IPEHR(base)			_MMIO((base) + 0x68)
-#define RING_ACTHD(base)			_MMIO((base) + 0x74)
-#define RING_DMA_FADD(base)			_MMIO((base) + 0x78)
-#define RING_HWS_PGA(base)			_MMIO((base) + 0x80)
-#define IPEIR(base)				_MMIO((base) + 0x88)
-#define IPEHR(base)				_MMIO((base) + 0x8c)
-#define RING_HWSTAM(base)			_MMIO((base) + 0x98)
-#define RING_MI_MODE(base)			_MMIO((base) + 0x9c)
-#define RING_NOPID(base)			_MMIO((base) + 0x94)
-
-#define RING_IMR(base)				_MMIO((base) + 0xa8)
+#define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
+#define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
+#define RING_IPEIR(base)			XE_REG((base) + 0x64)
+#define RING_IPEHR(base)			XE_REG((base) + 0x68)
+#define RING_ACTHD(base)			XE_REG((base) + 0x74)
+#define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
+#define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
+#define IPEIR(base)				XE_REG((base) + 0x88)
+#define IPEHR(base)				XE_REG((base) + 0x8c)
+#define RING_HWSTAM(base)			XE_REG((base) + 0x98)
+#define RING_MI_MODE(base)			XE_REG((base) + 0x9c)
+#define RING_NOPID(base)			XE_REG((base) + 0x94)
+
+#define RING_IMR(base)				XE_REG((base) + 0xa8)
 #define   RING_MAX_NONPRIV_SLOTS  12
 
-#define RING_EIR(base)				_MMIO((base) + 0xb0)
-#define RING_EMR(base)				_MMIO((base) + 0xb4)
-#define RING_ESR(base)				_MMIO((base) + 0xb8)
-#define RING_BBADDR(base)			_MMIO((base) + 0x140)
-#define RING_BBADDR_UDW(base)			_MMIO((base) + 0x168)
-#define RING_EXECLIST_STATUS_LO(base)		_MMIO((base) + 0x234)
-#define RING_EXECLIST_STATUS_HI(base)		_MMIO((base) + 0x234 + 4)
+#define RING_EIR(base)				XE_REG((base) + 0xb0)
+#define RING_EMR(base)				XE_REG((base) + 0xb4)
+#define RING_ESR(base)				XE_REG((base) + 0xb8)
+#define RING_BBADDR(base)			XE_REG((base) + 0x140)
+#define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
+#define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
+#define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
 
-#define RING_CONTEXT_CONTROL(base)		_MMIO((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244)
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
 
-#define RING_MODE(base)				_MMIO((base) + 0x29c)
+#define RING_MODE(base)				XE_REG((base) + 0x29c)
 #define   GFX_DISABLE_LEGACY_MODE		REG_BIT(3)
 
-#define RING_TIMESTAMP(base)			_MMIO((base) + 0x358)
+#define RING_TIMESTAMP(base)			XE_REG((base) + 0x358)
 
-#define RING_TIMESTAMP_UDW(base)		_MMIO((base) + 0x358 + 4)
+#define RING_TIMESTAMP_UDW(base)		XE_REG((base) + 0x358 + 4)
 #define   RING_VALID_MASK			0x00000001
 #define   RING_VALID				0x00000001
 #define   STOP_RING				REG_BIT(8)
 #define   TAIL_ADDR				0x001FFFF8
 
-#define RING_CTX_TIMESTAMP(base)		_MMIO((base) + 0x3a8)
+#define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
 
-#define RING_FORCE_TO_NONPRIV(base, i)		_MMIO(((base) + 0x4d0) + (i) * 4)
+#define RING_FORCE_TO_NONPRIV(base, i)		XE_REG(((base) + 0x4d0) + (i) * 4)
 #define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	REG_GENMASK(29, 28)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RW	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0)
@@ -84,16 +84,16 @@
 						 RING_FORCE_TO_NONPRIV_DENY)
 #define   RING_MAX_NONPRIV_SLOTS  12
 
-#define RING_EXECLIST_SQ_CONTENTS_LO(base)	_MMIO((base) + 0x510)
-#define RING_EXECLIST_SQ_CONTENTS_HI(base)	_MMIO((base) + 0x510 + 4)
+#define RING_EXECLIST_SQ_CONTENTS_LO(base)	XE_REG((base) + 0x510)
+#define RING_EXECLIST_SQ_CONTENTS_HI(base)	XE_REG((base) + 0x510 + 4)
 
-#define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
+#define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
 #define	  EL_CTRL_LOAD				REG_BIT(0)
 
-#define VDBOX_CGCTL3F10(base)			_MMIO((base) + 0x3f10)
+#define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
 #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
 
-#define VDBOX_CGCTL3F18(base)			_MMIO((base) + 0x3f18)
+#define VDBOX_CGCTL3F18(base)			XE_REG((base) + 0x3f18)
 #define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
 
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 657b8cc961bb0..5648305a8f5a4 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -9,7 +9,7 @@
 #include "regs/xe_reg_defs.h"
 
 /* RPM unit config (Gen8+) */
-#define RPM_CONFIG0					_MMIO(0xd00)
+#define RPM_CONFIG0					XE_REG(0xd00)
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK		REG_GENMASK(5, 3)
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ		0
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
@@ -17,25 +17,26 @@
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ		3
 #define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
 
-#define FORCEWAKE_ACK_MEDIA_VDBOX(n)		_MMIO(0xd50 + (n) * 4)
-#define FORCEWAKE_ACK_MEDIA_VEBOX(n)		_MMIO(0xd70 + (n) * 4)
-#define FORCEWAKE_ACK_RENDER			_MMIO(0xd84)
+#define FORCEWAKE_ACK_MEDIA_VDBOX(n)		XE_REG(0xd50 + (n) * 4)
+#define FORCEWAKE_ACK_MEDIA_VEBOX(n)		XE_REG(0xd70 + (n) * 4)
+#define FORCEWAKE_ACK_RENDER			XE_REG(0xd84)
 
-#define GMD_ID					_MMIO(0xd8c)
+#define GMD_ID					XE_REG(0xd8c)
 #define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
 #define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
 #define   GMD_ID_STEP				REG_GENMASK(5, 0)
 
-#define FORCEWAKE_ACK_GT_MTL			_MMIO(0xdfc)
+#define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
 
-#define LNCFCMOCS(i)				_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
+/* L3 Cache Control */
+#define LNCFCMOCS(i)				XE_REG(0xb020 + (i) * 4)
 #define LNCFCMOCS_REG_COUNT			32
 
-#define MCFG_MCR_SELECTOR			_MMIO(0xfd0)
-#define MTL_MCR_SELECTOR			_MMIO(0xfd4)
-#define SF_MCR_SELECTOR				_MMIO(0xfd8)
-#define MCR_SELECTOR				_MMIO(0xfdc)
-#define GAM_MCR_SELECTOR			_MMIO(0xfe0)
+#define MCFG_MCR_SELECTOR			XE_REG(0xfd0)
+#define MTL_MCR_SELECTOR			XE_REG(0xfd4)
+#define SF_MCR_SELECTOR				XE_REG(0xfd8)
+#define MCR_SELECTOR				XE_REG(0xfdc)
+#define GAM_MCR_SELECTOR			XE_REG(0xfe0)
 #define   MCR_MULTICAST				REG_BIT(31)
 #define   MCR_SLICE_MASK			REG_GENMASK(30, 27)
 #define   MCR_SLICE(slice)			REG_FIELD_PREP(MCR_SLICE_MASK, slice)
@@ -44,19 +45,19 @@
 #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
 #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
 
-#define FF_SLICE_CS_CHICKEN1			_MMIO(0x20e0)
+#define FF_SLICE_CS_CHICKEN1			XE_REG(0x20e0)
 #define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
 
-#define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
+#define FF_SLICE_CS_CHICKEN2			XE_REG(0x20e4)
 #define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
 
-#define CS_DEBUG_MODE1				_MMIO(0x20ec)
+#define CS_DEBUG_MODE1				XE_REG(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
 #define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
 
-#define PS_INVOCATION_COUNT			_MMIO(0x2348)
+#define PS_INVOCATION_COUNT			XE_REG(0x2348)
 
-#define CS_CHICKEN1				_MMIO(0x2580)
+#define CS_CHICKEN1				XE_REG(0x2580)
 #define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
 #define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
 #define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
@@ -64,70 +65,70 @@
 #define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
 #define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
 
-#define GLOBAL_MOCS(i)				_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
-#define CCS_AUX_INV				_MMIO(0x4208)
+#define GLOBAL_MOCS(i)				XE_REG(0x4000 + (i) * 4) /* Global MOCS regs */
+#define CCS_AUX_INV				XE_REG(0x4208)
 
-#define VD0_AUX_INV				_MMIO(0x4218)
-#define VE0_AUX_INV				_MMIO(0x4238)
+#define VD0_AUX_INV				XE_REG(0x4218)
+#define VE0_AUX_INV				XE_REG(0x4238)
 
-#define VE1_AUX_INV				_MMIO(0x42b8)
+#define VE1_AUX_INV				XE_REG(0x42b8)
 #define   AUX_INV				REG_BIT(0)
 
-#define XEHP_TILE0_ADDR_RANGE			MCR_REG(0x4900)
-#define XEHP_FLAT_CCS_BASE_ADDR			MCR_REG(0x4910)
+#define XEHP_TILE0_ADDR_RANGE			XE_REG_MCR(0x4900)
+#define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
 
-#define CHICKEN_RASTER_1			MCR_REG(0x6204)
+#define CHICKEN_RASTER_1			XE_REG_MCR(0x6204)
 #define   DIS_SF_ROUND_NEAREST_EVEN		REG_BIT(8)
 
-#define CHICKEN_RASTER_2			MCR_REG(0x6208)
+#define CHICKEN_RASTER_2			XE_REG_MCR(0x6208)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
-#define VFLSKPD					MCR_REG(0x62a8)
+#define VFLSKPD					XE_REG_MCR(0x62a8)
 #define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
 
-#define FF_MODE2				_MMIO(0x6604)
-#define XEHP_FF_MODE2				MCR_REG(0x6604)
+#define FF_MODE2				XE_REG(0x6604)
+#define XEHP_FF_MODE2				XE_REG_MCR(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
-#define CACHE_MODE_1				_MMIO(0x7004)
+#define CACHE_MODE_1				XE_REG(0x7004)
 #define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
 
-#define XEHP_PSS_MODE2				MCR_REG(0x703c)
+#define XEHP_PSS_MODE2				XE_REG_MCR(0x703c)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
-#define HIZ_CHICKEN					_MMIO(0x7018)
+#define HIZ_CHICKEN					XE_REG(0x7018)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
 #define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
 
-#define COMMON_SLICE_CHICKEN1			_MMIO(0x7010)
+#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
 
-#define COMMON_SLICE_CHICKEN4			_MMIO(0x7300)
+#define COMMON_SLICE_CHICKEN4			XE_REG(0x7300)
 #define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
 
-#define COMMON_SLICE_CHICKEN3				_MMIO(0x7304)
-#define XEHP_COMMON_SLICE_CHICKEN3			MCR_REG(0x7304)
+#define COMMON_SLICE_CHICKEN3				XE_REG(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3			XE_REG_MCR(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
 #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE		REG_BIT(12)
 #define   BLEND_EMB_FIX_DISABLE_IN_RCC			REG_BIT(11)
 #define   DISABLE_CPS_AWARE_COLOR_PIPE			REG_BIT(9)
 
-#define XEHP_SLICE_COMMON_ECO_CHICKEN1		MCR_REG(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1		XE_REG_MCR(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
 
-#define VF_PREEMPTION				_MMIO(0x83a4)
+#define VF_PREEMPTION				XE_REG(0x83a4)
 #define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
 
-#define VFG_PREEMPTION_CHICKEN			_MMIO(0x83b4)
+#define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4)
 #define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
 
-#define XEHP_SQCM				MCR_REG(0x8724)
+#define XEHP_SQCM				XE_REG_MCR(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
-#define	MIRROR_FUSE3				_MMIO(0x9118)
+#define	MIRROR_FUSE3				XE_REG(0x9118)
 #define   L3BANK_PAIR_COUNT			4
 #define   L3BANK_MASK				REG_GENMASK(3, 0)
 /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
@@ -135,30 +136,30 @@
 #define   MEML3_EN_MASK				REG_GENMASK(3, 0)
 
 /* Fuse readout registers for GT */
-#define XEHP_FUSE4				_MMIO(0x9114)
+#define XEHP_FUSE4				XE_REG(0x9114)
 #define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
 
-#define GT_VEBOX_VDBOX_DISABLE			_MMIO(0x9140)
+#define GT_VEBOX_VDBOX_DISABLE			XE_REG(0x9140)
 #define   GT_VEBOX_DISABLE_MASK			REG_GENMASK(19, 16)
 #define   GT_VDBOX_DISABLE_MASK			REG_GENMASK(7, 0)
 
-#define XELP_EU_ENABLE				_MMIO(0x9134)	/* "_DISABLE" on Xe_LP */
+#define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
 #define   XELP_EU_MASK				REG_GENMASK(7, 0)
-#define XELP_GT_GEOMETRY_DSS_ENABLE		_MMIO(0x913c)
-#define XEHP_GT_COMPUTE_DSS_ENABLE		_MMIO(0x9144)
-#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		_MMIO(0x9148)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
+#define XEHP_GT_COMPUTE_DSS_ENABLE		XE_REG(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		XE_REG(0x9148)
 
-#define GDRST					_MMIO(0x941c)
+#define GDRST					XE_REG(0x941c)
 #define   GRDOM_GUC				REG_BIT(3)
 #define   GRDOM_FULL				REG_BIT(0)
 
-#define MISCCPCTL				_MMIO(0x9424)
+#define MISCCPCTL				XE_REG(0x9424)
 #define   DOP_CLOCK_GATE_RENDER_ENABLE		REG_BIT(1)
 
-#define UNSLCGCTL9430				_MMIO(0x9430)
+#define UNSLCGCTL9430				XE_REG(0x9430)
 #define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
 
-#define UNSLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x9434)
+#define UNSLICE_UNIT_LEVEL_CLKGATE		XE_REG(0x9434)
 #define   VFUNIT_CLKGATE_DIS			REG_BIT(20)
 #define   TSGUNIT_CLKGATE_DIS			REG_BIT(17) /* XEHPSDV */
 #define   CG3DDISCFEG_CLKGATE_DIS		REG_BIT(17) /* DG2 */
@@ -166,7 +167,7 @@
 #define   HSUNIT_CLKGATE_DIS			REG_BIT(8)
 #define   VSUNIT_CLKGATE_DIS			REG_BIT(3)
 
-#define UNSLCGCTL9440				_MMIO(0x9440)
+#define UNSLCGCTL9440				XE_REG(0x9440)
 #define   GAMTLBOACS_CLKGATE_DIS		REG_BIT(28)
 #define   GAMTLBVDBOX5_CLKGATE_DIS		REG_BIT(27)
 #define   GAMTLBVDBOX6_CLKGATE_DIS		REG_BIT(26)
@@ -180,7 +181,7 @@
 #define   GAMTLBBLT_CLKGATE_DIS			REG_BIT(14)
 #define   GAMTLBVDBOX1_CLKGATE_DIS		REG_BIT(6)
 
-#define UNSLCGCTL9444				_MMIO(0x9444)
+#define UNSLCGCTL9444				XE_REG(0x9444)
 #define   GAMTLBGFXA0_CLKGATE_DIS		REG_BIT(30)
 #define   GAMTLBGFXA1_CLKGATE_DIS		REG_BIT(29)
 #define   GAMTLBCOMPA0_CLKGATE_DIS		REG_BIT(28)
@@ -198,7 +199,7 @@
 #define   GAMTLBVEBOX0_CLKGATE_DIS		REG_BIT(16)
 #define   LTCDD_CLKGATE_DIS			REG_BIT(10)
 
-#define XEHP_SLICE_UNIT_LEVEL_CLKGATE		MCR_REG(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE		XE_REG_MCR(0x94d4)
 #define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
 #define   L3_CLKGATE_DIS			REG_BIT(16)
 #define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
@@ -206,85 +207,85 @@
 #define   RCCUNIT_CLKGATE_DIS			REG_BIT(7)
 #define   SARBUNIT_CLKGATE_DIS			REG_BIT(5)
 
-#define UNSLICE_UNIT_LEVEL_CLKGATE2		_MMIO(0x94e4)
+#define UNSLICE_UNIT_LEVEL_CLKGATE2		XE_REG(0x94e4)
 #define   VSUNIT_CLKGATE2_DIS			REG_BIT(19)
 
-#define SUBSLICE_UNIT_LEVEL_CLKGATE		MCR_REG(0x9524)
+#define SUBSLICE_UNIT_LEVEL_CLKGATE		XE_REG_MCR(0x9524)
 #define   DSS_ROUTER_CLKGATE_DIS		REG_BIT(28)
 #define   GWUNIT_CLKGATE_DIS			REG_BIT(16)
 
-#define SUBSLICE_UNIT_LEVEL_CLKGATE2		MCR_REG(0x9528)
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2		XE_REG_MCR(0x9528)
 #define   CPSSUNIT_CLKGATE_DIS			REG_BIT(9)
 
-#define SSMCGCTL9530				MCR_REG(0x9530)
+#define SSMCGCTL9530				XE_REG_MCR(0x9530)
 #define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
 
-#define DFR_RATIO_EN_AND_CHICKEN		MCR_REG(0x9550)
+#define DFR_RATIO_EN_AND_CHICKEN		XE_REG_MCR(0x9550)
 #define   DFR_DISABLE				REG_BIT(9)
 
-#define RPNSWREQ				_MMIO(0xa008)
+#define RPNSWREQ				XE_REG(0xa008)
 #define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
-#define RC_CONTROL				_MMIO(0xa090)
-#define RC_STATE				_MMIO(0xa094)
+#define RC_CONTROL				XE_REG(0xa090)
+#define RC_STATE				XE_REG(0xa094)
 
-#define PMINTRMSK				_MMIO(0xa168)
+#define PMINTRMSK				XE_REG(0xa168)
 #define   PMINTR_DISABLE_REDIRECT_TO_GUC	REG_BIT(31)
 #define   ARAT_EXPIRED_INTRMSK			REG_BIT(9)
 
-#define FORCEWAKE_GT				_MMIO(0xa188)
+#define FORCEWAKE_GT				XE_REG(0xa188)
 
-#define PG_ENABLE				_MMIO(0xa210)
+#define PG_ENABLE				XE_REG(0xa210)
 
-#define CTC_MODE				_MMIO(0xa26c)
+#define CTC_MODE				XE_REG(0xa26c)
 #define   CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
 #define   CTC_SOURCE_DIVIDE_LOGIC		REG_BIT(0)
 
-#define FORCEWAKE_RENDER			_MMIO(0xa278)
-#define FORCEWAKE_MEDIA_VDBOX(n)		_MMIO(0xa540 + (n) * 4)
-#define FORCEWAKE_MEDIA_VEBOX(n)		_MMIO(0xa560 + (n) * 4)
+#define FORCEWAKE_RENDER			XE_REG(0xa278)
+#define FORCEWAKE_MEDIA_VDBOX(n)		XE_REG(0xa540 + (n) * 4)
+#define FORCEWAKE_MEDIA_VEBOX(n)		XE_REG(0xa560 + (n) * 4)
 
-#define XEHPC_LNCFMISCCFGREG0			MCR_REG(0xb01c)
+#define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c)
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
 
-#define XEHP_L3NODEARBCFG			MCR_REG(0xb0b4)
+#define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
 #define   XEHP_LNESPARE				REG_BIT(19)
 
-#define XEHP_L3SQCREG5				MCR_REG(0xb158)
+#define XEHP_L3SQCREG5				XE_REG_MCR(0xb158)
 #define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
 
-#define XEHP_L3SCQREG7				MCR_REG(0xb188)
+#define XEHP_L3SCQREG7				XE_REG_MCR(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
 
-#define XEHP_MERT_MOD_CTRL			MCR_REG(0xcf28)
-#define RENDER_MOD_CTRL				MCR_REG(0xcf2c)
-#define COMP_MOD_CTRL				MCR_REG(0xcf30)
-#define XEHP_VDBX_MOD_CTRL			MCR_REG(0xcf34)
-#define XEHP_VEBX_MOD_CTRL			MCR_REG(0xcf38)
+#define XEHP_MERT_MOD_CTRL			XE_REG_MCR(0xcf28)
+#define RENDER_MOD_CTRL				XE_REG_MCR(0xcf2c)
+#define COMP_MOD_CTRL				XE_REG_MCR(0xcf30)
+#define XEHP_VDBX_MOD_CTRL			XE_REG_MCR(0xcf34)
+#define XEHP_VEBX_MOD_CTRL			XE_REG_MCR(0xcf38)
 #define   FORCE_MISS_FTLB			REG_BIT(3)
 
-#define XEHP_GAMSTLB_CTRL			MCR_REG(0xcf4c)
+#define XEHP_GAMSTLB_CTRL			XE_REG_MCR(0xcf4c)
 #define   CONTROL_BLOCK_CLKGATE_DIS		REG_BIT(12)
 #define   EGRESS_BLOCK_CLKGATE_DIS		REG_BIT(11)
 #define   TAG_BLOCK_CLKGATE_DIS			REG_BIT(7)
 
-#define XEHP_GAMCNTRL_CTRL			MCR_REG(0xcf54)
+#define XEHP_GAMCNTRL_CTRL			XE_REG_MCR(0xcf54)
 #define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
 
-#define SAMPLER_MODE				MCR_REG(0xe18c)
+#define SAMPLER_MODE				XE_REG_MCR(0xe18c)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
 #define   SAMPLER_ENABLE_HEADLESS_MSG		REG_BIT(5)
 
-#define HALF_SLICE_CHICKEN7				MCR_REG(0xe194)
+#define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194)
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
 
-#define CACHE_MODE_SS				MCR_REG(0xe420)
+#define CACHE_MODE_SS				XE_REG_MCR(0xe420)
 #define   ENABLE_EU_COUNT_FOR_TDL_FLUSH		REG_BIT(10)
 #define   DISABLE_ECC				REG_BIT(5)
 #define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
 
-#define ROW_CHICKEN4				MCR_REG(0xe48c)
+#define ROW_CHICKEN4				XE_REG_MCR(0xe48c)
 #define   DISABLE_GRF_CLEAR			REG_BIT(13)
 #define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
 #define   DISABLE_TDL_PUSH			REG_BIT(9)
@@ -293,84 +294,84 @@
 #define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
-#define ROW_CHICKEN				MCR_REG(0xe4f0)
+#define ROW_CHICKEN				XE_REG_MCR(0xe4f0)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
 
-#define ROW_CHICKEN2				MCR_REG(0xe4f4)
+#define ROW_CHICKEN2				XE_REG_MCR(0xe4f4)
 #define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
 #define   DISABLE_EARLY_READ			REG_BIT(14)
 #define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
 #define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
 #define   DISABLE_DOP_GATING			REG_BIT(0)
 
-#define XEHP_HDC_CHICKEN0					MCR_REG(0xe5f0)
+#define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
 
-#define RT_CTRL					MCR_REG(0xe530)
+#define RT_CTRL					XE_REG_MCR(0xe530)
 #define   DIS_NULL_QUERY			REG_BIT(10)
 
-#define LSC_CHICKEN_BIT_0			MCR_REG(0xe7c8)
+#define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
 
-#define LSC_CHICKEN_BIT_0_UDW			MCR_REG(0xe7c8 + 4)
+#define LSC_CHICKEN_BIT_0_UDW			XE_REG_MCR(0xe7c8 + 4)
 #define   DIS_CHAIN_2XSIMD8			REG_BIT(55 - 32)
 #define   FORCE_SLM_FENCE_SCOPE_TO_TILE		REG_BIT(42 - 32)
 #define   FORCE_UGM_FENCE_SCOPE_TO_TILE		REG_BIT(41 - 32)
 #define   MAXREQS_PER_BANK			REG_GENMASK(39 - 32, 37 - 32)
 #define   DISABLE_128B_EVICTION_COMMAND_UDW	REG_BIT(36 - 32)
 
-#define SARB_CHICKEN1				MCR_REG(0xe90c)
+#define SARB_CHICKEN1				XE_REG_MCR(0xe90c)
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
-#define RCU_MODE				_MMIO(0x14800)
+#define RCU_MODE				XE_REG(0x14800)
 #define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
 
-#define FORCEWAKE_ACK_GT			_MMIO(0x130044)
+#define FORCEWAKE_ACK_GT			XE_REG(0x130044)
 #define   FORCEWAKE_KERNEL			BIT(0)
 #define   FORCEWAKE_USER			BIT(1)
 #define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 
-#define GT_CORE_STATUS				_MMIO(0x138060)
+#define GT_CORE_STATUS				XE_REG(0x138060)
 #define   RCN_MASK				REG_GENMASK(2, 0)
 #define   GT_RC0				0
 #define   GT_RC6				3
 
-#define GT_GFX_RC6_LOCKED			_MMIO(0x138104)
-#define GT_GFX_RC6				_MMIO(0x138108)
+#define GT_GFX_RC6_LOCKED			XE_REG(0x138104)
+#define GT_GFX_RC6				XE_REG(0x138108)
 
-#define GT_INTR_DW(x)				_MMIO(0x190018 + ((x) * 4))
+#define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
 
-#define GUC_SG_INTR_ENABLE			_MMIO(0x190038)
+#define GUC_SG_INTR_ENABLE			XE_REG(0x190038)
 #define   ENGINE1_MASK				REG_GENMASK(31, 16)
 #define   ENGINE0_MASK				REG_GENMASK(15, 0)
 
-#define GPM_WGBOXPERF_INTR_ENABLE		_MMIO(0x19003c)
+#define GPM_WGBOXPERF_INTR_ENABLE		XE_REG(0x19003c)
 
-#define INTR_IDENTITY_REG(x)			_MMIO(0x190060 + ((x) * 4))
+#define INTR_IDENTITY_REG(x)			XE_REG(0x190060 + ((x) * 4))
 #define   INTR_DATA_VALID			REG_BIT(31)
 #define   INTR_ENGINE_INSTANCE(x)		REG_FIELD_GET(GENMASK(25, 20), x)
 #define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
 #define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
 #define   OTHER_GUC_INSTANCE			0
 
-#define RENDER_COPY_INTR_ENABLE			_MMIO(0x190030)
-#define VCS_VECS_INTR_ENABLE			_MMIO(0x190034)
-#define CCS_RSVD_INTR_ENABLE			_MMIO(0x190048)
-#define IIR_REG_SELECTOR(x)			_MMIO(0x190070 + ((x) * 4))
-#define RCS0_RSVD_INTR_MASK			_MMIO(0x190090)
-#define BCS_RSVD_INTR_MASK			_MMIO(0x1900a0)
-#define VCS0_VCS1_INTR_MASK			_MMIO(0x1900a8)
-#define VCS2_VCS3_INTR_MASK			_MMIO(0x1900ac)
-#define VECS0_VECS1_INTR_MASK			_MMIO(0x1900d0)
-#define GUC_SG_INTR_MASK			_MMIO(0x1900e8)
-#define GPM_WGBOXPERF_INTR_MASK			_MMIO(0x1900ec)
-#define CCS0_CCS1_INTR_MASK			_MMIO(0x190100)
-#define CCS2_CCS3_INTR_MASK			_MMIO(0x190104)
-#define XEHPC_BCS1_BCS2_INTR_MASK		_MMIO(0x190110)
-#define XEHPC_BCS3_BCS4_INTR_MASK		_MMIO(0x190114)
-#define XEHPC_BCS5_BCS6_INTR_MASK		_MMIO(0x190118)
-#define XEHPC_BCS7_BCS8_INTR_MASK		_MMIO(0x19011c)
+#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
+#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
+#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
+#define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
+#define RCS0_RSVD_INTR_MASK			XE_REG(0x190090)
+#define BCS_RSVD_INTR_MASK			XE_REG(0x1900a0)
+#define VCS0_VCS1_INTR_MASK			XE_REG(0x1900a8)
+#define VCS2_VCS3_INTR_MASK			XE_REG(0x1900ac)
+#define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0)
+#define GUC_SG_INTR_MASK			XE_REG(0x1900e8)
+#define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec)
+#define CCS0_CCS1_INTR_MASK			XE_REG(0x190100)
+#define CCS2_CCS3_INTR_MASK			XE_REG(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK		XE_REG(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK		XE_REG(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK		XE_REG(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
 
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index bc9b42b38795f..37e0ac5509317 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -13,7 +13,7 @@
 
 /* Definitions of GuC H/W registers, bits, etc */
 
-#define GUC_STATUS			_MMIO(0xc000)
+#define GUC_STATUS			XE_REG(0xc000)
 #define   GS_AUTH_STATUS_MASK		REG_GENMASK(31, 30)
 #define   GS_AUTH_STATUS_BAD		REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1)
 #define   GS_AUTH_STATUS_GOOD		REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2)
@@ -27,52 +27,52 @@
 #define   GS_BOOTROM_JUMP_PASSED	REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
 #define   GS_MIA_IN_RESET		REG_BIT(0)
 
-#define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
+#define SOFT_SCRATCH(n)			XE_REG(0xc180 + (n) * 4)
 #define SOFT_SCRATCH_COUNT		16
 
-#define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH(i)		XE_REG(0xc200 + (i) * 4)
 #define UOS_RSA_SCRATCH_COUNT		64
 
-#define DMA_ADDR_0_LOW			_MMIO(0xc300)
-#define DMA_ADDR_0_HIGH			_MMIO(0xc304)
-#define DMA_ADDR_1_LOW			_MMIO(0xc308)
-#define DMA_ADDR_1_HIGH			_MMIO(0xc30c)
+#define DMA_ADDR_0_LOW			XE_REG(0xc300)
+#define DMA_ADDR_0_HIGH			XE_REG(0xc304)
+#define DMA_ADDR_1_LOW			XE_REG(0xc308)
+#define DMA_ADDR_1_HIGH			XE_REG(0xc30c)
 #define   DMA_ADDR_SPACE_MASK		REG_GENMASK(20, 16)
 #define   DMA_ADDRESS_SPACE_WOPCM	REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
-#define DMA_COPY_SIZE			_MMIO(0xc310)
-#define DMA_CTRL			_MMIO(0xc314)
+#define DMA_COPY_SIZE			XE_REG(0xc310)
+#define DMA_CTRL			XE_REG(0xc314)
 #define   HUC_UKERNEL			REG_BIT(9)
 #define   UOS_MOVE			REG_BIT(4)
 #define   START_DMA			REG_BIT(0)
-#define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
+#define DMA_GUC_WOPCM_OFFSET		XE_REG(0xc340)
 #define   GUC_WOPCM_OFFSET_SHIFT	14
 #define   GUC_WOPCM_OFFSET_MASK		REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT)
 #define   HUC_LOADING_AGENT_GUC		REG_BIT(1)
 #define   GUC_WOPCM_OFFSET_VALID	REG_BIT(0)
-#define GUC_MAX_IDLE_COUNT		_MMIO(0xc3e4)
+#define GUC_MAX_IDLE_COUNT		XE_REG(0xc3e4)
 
-#define HUC_STATUS2			_MMIO(0xd3b0)
+#define HUC_STATUS2			XE_REG(0xd3b0)
 #define   HUC_FW_VERIFIED		REG_BIT(7)
 
-#define HUC_KERNEL_LOAD_INFO		_MMIO(0xc1dc)
+#define HUC_KERNEL_LOAD_INFO		XE_REG(0xc1dc)
 #define   HUC_LOAD_SUCCESSFUL		REG_BIT(0)
 
-#define GUC_WOPCM_SIZE			_MMIO(0xc050)
+#define GUC_WOPCM_SIZE			XE_REG(0xc050)
 #define   GUC_WOPCM_SIZE_MASK		REG_GENMASK(31, 12)
 #define   GUC_WOPCM_SIZE_LOCKED		REG_BIT(0)
 
-#define GT_PM_CONFIG			_MMIO(0x13816c)
+#define GT_PM_CONFIG			XE_REG(0x13816c)
 #define   GT_DOORBELL_ENABLE		REG_BIT(0)
 
-#define GTCR				_MMIO(0x4274)
+#define GTCR				XE_REG(0x4274)
 #define   GTCR_INVALIDATE		REG_BIT(0)
 
-#define GUC_TLB_INV_CR			_MMIO(0xcee8)
+#define GUC_TLB_INV_CR			XE_REG(0xcee8)
 #define   GUC_TLB_INV_CR_INVALIDATE	REG_BIT(0)
 
-#define GUC_ARAT_C6DIS				_MMIO(0xa178)
+#define GUC_ARAT_C6DIS				XE_REG(0xa178)
 
-#define GUC_SHIM_CONTROL			_MMIO(0xc064)
+#define GUC_SHIM_CONTROL			XE_REG(0xc064)
 #define   PVC_GUC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
 #define   PVC_GUC_MOCS_UC_INDEX			1
 #define   PVC_GUC_MOCS_INDEX(index)		REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \
@@ -87,9 +87,9 @@
 #define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	REG_BIT(0)
 
 
-#define GUC_SEND_INTERRUPT			_MMIO(0xc4c8)
+#define GUC_SEND_INTERRUPT			XE_REG(0xc4c8)
 #define   GUC_SEND_TRIGGER			REG_BIT(0)
-#define GUC_HOST_INTERRUPT			_MMIO(0x1901f0)
+#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0)
 
 #define GUC_NUM_DOORBELLS			256
 
@@ -103,24 +103,24 @@ struct guc_doorbell_info {
 	u32 reserved[14];
 } __packed;
 
-#define DRBREGL(x)				_MMIO(0x1000 + (x) * 8)
+#define DRBREGL(x)				XE_REG(0x1000 + (x) * 8)
 #define   DRB_VALID				REG_BIT(0)
-#define DRBREGU(x)				_MMIO(0x1000 + (x) * 8 + 4)
+#define DRBREGU(x)				XE_REG(0x1000 + (x) * 8 + 4)
 
-#define DIST_DBS_POPULATED			_MMIO(0xd08)
+#define DIST_DBS_POPULATED			XE_REG(0xd08)
 #define   DOORBELLS_PER_SQIDI_MASK		REG_GENMASK(23, 16)
 #define   SQIDIS_DOORBELL_EXIST_MASK		REG_GENMASK(15, 0)
 
-#define GUC_BCS_RCS_IER				_MMIO(0xC550)
-#define GUC_VCS2_VCS1_IER			_MMIO(0xC554)
-#define GUC_WD_VECS_IER				_MMIO(0xC558)
-#define GUC_PM_P24C_IER				_MMIO(0xC55C)
+#define GUC_BCS_RCS_IER				XE_REG(0xC550)
+#define GUC_VCS2_VCS1_IER			XE_REG(0xC554)
+#define GUC_WD_VECS_IER				XE_REG(0xC558)
+#define GUC_PM_P24C_IER				XE_REG(0xC55C)
 
-#define VF_SW_FLAG(n)			_MMIO(0x190240 + (n) * 4)
-#define VF_SW_FLAG_COUNT		4
+#define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4)
+#define VF_SW_FLAG_COUNT			4
 
-#define MED_VF_SW_FLAG(n)		_MMIO(0x190310 + (n) * 4)
-#define MED_VF_SW_FLAG_COUNT		4
+#define MED_VF_SW_FLAG(n)			XE_REG(0x190310 + (n) * 4)
+#define MED_VF_SW_FLAG_COUNT			4
 
 /* GuC Interrupt Vector */
 #define GUC_INTR_GUC2HOST			BIT(15)
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index e31137e2c42f2..787f223bc7275 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -94,13 +94,4 @@ struct xe_reg_mcr {
 				 .__reg = XE_REG_INITIALIZER(r_,  ##__VA_ARGS__, .mcr = 1)	\
 				 })
 
-/*
- * TODO: remove these once the register declarations are not using them anymore
- */
-#undef _MMIO
-#undef MCR_REG
-#define _MMIO(r_)	((const struct xe_reg){ .reg = r_ })
-#define MCR_REG(r_)	((const struct xe_reg_mcr){ .__reg.reg = r_,		\
-						    .__reg.mcr = 1 })
-
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 9d18430fd2252..e0734c8f922c6 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,7 +7,7 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define GU_CNTL					_MMIO(0x101010)
+#define GU_CNTL					XE_REG(0x101010)
 #define   LMEM_INIT				REG_BIT(7)
 
 #define RENDER_RING_BASE			0x02000
@@ -42,18 +42,18 @@
 #define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
 #define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
 
-#define FF_THREAD_MODE				_MMIO(0x20a0)
+#define FF_THREAD_MODE				XE_REG(0x20a0)
 #define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
 
-#define PVC_RP_STATE_CAP			_MMIO(0x281014)
-#define MTL_RP_STATE_CAP			_MMIO(0x138000)
+#define PVC_RP_STATE_CAP			XE_REG(0x281014)
+#define MTL_RP_STATE_CAP			XE_REG(0x138000)
 
-#define MTL_MEDIAP_STATE_CAP			_MMIO(0x138020)
+#define MTL_MEDIAP_STATE_CAP			XE_REG(0x138020)
 #define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
 #define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
 
-#define MTL_GT_RPE_FREQUENCY			_MMIO(0x13800c)
-#define MTL_MPE_FREQUENCY			_MMIO(0x13802c)
+#define MTL_GT_RPE_FREQUENCY			XE_REG(0x13800c)
+#define MTL_MPE_FREQUENCY			XE_REG(0x13802c)
 #define   MTL_RPE_MASK				REG_GENMASK(8, 0)
 
 #define TRANSCODER_A_OFFSET			0x60000
@@ -69,32 +69,32 @@
 #define PIPE_DSI0_OFFSET			0x7b000
 #define PIPE_DSI1_OFFSET			0x7b800
 
-#define SOFTWARE_FLAGS_SPR33			_MMIO(0x4f084)
+#define SOFTWARE_FLAGS_SPR33			XE_REG(0x4f084)
 
 #define PCU_IRQ_OFFSET				0x444e0
 #define GU_MISC_IRQ_OFFSET			0x444f0
 #define   GU_MISC_GSE				REG_BIT(27)
 
-#define GFX_MSTR_IRQ				_MMIO(0x190010)
+#define GFX_MSTR_IRQ				XE_REG(0x190010)
 #define   MASTER_IRQ				REG_BIT(31)
 #define   GU_MISC_IRQ				REG_BIT(29)
 #define   DISPLAY_IRQ				REG_BIT(16)
 #define   GT_DW_IRQ(x)				REG_BIT(x)
 
-#define DG1_MSTR_TILE_INTR			_MMIO(0x190008)
+#define DG1_MSTR_TILE_INTR			XE_REG(0x190008)
 #define   DG1_MSTR_IRQ				REG_BIT(31)
 #define   DG1_MSTR_TILE(t)			REG_BIT(t)
 
-#define TIMESTAMP_OVERRIDE					_MMIO(0x44074)
+#define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
 #define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
 #define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
 
-#define GGC					_MMIO(0x108040)
+#define GGC					XE_REG(0x108040)
 #define   GMS_MASK				REG_GENMASK(15, 8)
 #define   GGMS_MASK				REG_GENMASK(7, 6)
 
-#define GSMBASE					_MMIO(0x108100)
-#define DSMBASE					_MMIO(0x1080C0)
+#define GSMBASE					XE_REG(0x108100)
+#define DSMBASE					XE_REG(0x1080C0)
 #define   BDSM_MASK				REG_GENMASK64(63, 20)
 
 #endif
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 29e112c108c62..51d215f08113d 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -18,17 +18,17 @@
 #include "xe_reg_sr.h"
 #include "xe_rtp.h"
 
-#undef _MMIO
-#undef MCR_REG
-#define _MMIO(x)	_XE_RTP_REG(x)
-#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+#undef XE_REG
+#undef XE_REG_MCR
+#define XE_REG(x, ...)		_XE_RTP_REG(x)
+#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
 
-#define REGULAR_REG1	_MMIO(1)
-#define REGULAR_REG2	_MMIO(2)
-#define REGULAR_REG3	_MMIO(3)
-#define MCR_REG1	MCR_REG(1)
-#define MCR_REG2	MCR_REG(2)
-#define MCR_REG3	MCR_REG(3)
+#define REGULAR_REG1	XE_REG(1)
+#define REGULAR_REG2	XE_REG(2)
+#define REGULAR_REG3	XE_REG(3)
+#define MCR_REG1	XE_REG_MCR(1)
+#define MCR_REG2	XE_REG_MCR(2)
+#define MCR_REG3	XE_REG_MCR(3)
 
 struct rtp_test_case {
 	const char *name;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index fc580d961dbbe..4e5ad616063d7 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -185,11 +185,11 @@ err:
 	return err;
 }
 
-#define GUC_TLB_INV_CR				_MMIO(0xcee8)
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
 #define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
-#define PVC_GUC_TLB_INV_DESC0			_MMIO(0xcf7c)
+#define PVC_GUC_TLB_INV_DESC0			XE_REG(0xcf7c)
 #define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
-#define PVC_GUC_TLB_INV_DESC1			_MMIO(0xcf80)
+#define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
 #define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
 
 void xe_ggtt_invalidate(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 4e9e9b1aad023..89d20faced19d 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -22,6 +22,8 @@
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
+#define MEDIA_GUC_HOST_INTERRUPT        XE_REG(0x190304)
+
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
 {
@@ -244,8 +246,6 @@ static void guc_write_params(struct xe_guc *guc)
 		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]);
 }
 
-#define MEDIA_GUC_HOST_INTERRUPT        _MMIO(0x190304)
-
 int xe_guc_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 6d59e36b6e5c7..72d460d5323bd 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -23,18 +23,18 @@
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
-#define GEN6_RP_STATE_CAP			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
-#define   RP0_MASK				REG_GENMASK(7, 0)
-#define   RP1_MASK				REG_GENMASK(15, 8)
-#define   RPN_MASK				REG_GENMASK(23, 16)
+#define GEN6_RP_STATE_CAP	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define   RP0_MASK		REG_GENMASK(7, 0)
+#define   RP1_MASK		REG_GENMASK(15, 8)
+#define   RPN_MASK		REG_GENMASK(23, 16)
 
-#define GEN10_FREQ_INFO_REC	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define GEN10_FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
 
-#define GEN12_RPSTAT1		_MMIO(0x1381b4)
+#define GEN12_RPSTAT1		XE_REG(0x1381b4)
 #define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
 
-#define MTL_MIRROR_TARGET_WP1	_MMIO(0xc60)
+#define MTL_MIRROR_TARGET_WP1	XE_REG(0xc60)
 #define   MTL_CAGF_MASK		REG_GENMASK(8, 0)
 
 #define GT_FREQUENCY_MULTIPLIER	50
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 9dd730d707e5c..2fffb2865cab6 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -22,9 +22,9 @@
  * Interrupt registers for a unit are always consecutive and ordered
  * ISR, IMR, IIR, IER.
  */
-#define IMR(offset)				_MMIO(offset + 0x4)
-#define IIR(offset)				_MMIO(offset + 0x8)
-#define IER(offset)				_MMIO(offset + 0xc)
+#define IMR(offset)				XE_REG(offset + 0x4)
+#define IIR(offset)				XE_REG(offset + 0x8)
+#define IER(offset)				XE_REG(offset + 0xc)
 
 static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg)
 {
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 24a3c18421449..3b719c774efa6 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -17,7 +17,7 @@
 #include "xe_macros.h"
 #include "xe_module.h"
 
-#define XEHP_MTCFG_ADDR		_MMIO(0x101800)
+#define XEHP_MTCFG_ADDR		XE_REG(0x101800)
 #define TILE_COUNT		REG_GENMASK(15, 8)
 #define GEN12_LMEM_BAR		2
 
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 67c63facdbf94..f2ceecd536ed0 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -477,8 +477,8 @@ static void __init_mocs_table(struct xe_gt *gt,
 	for (i = 0;
 	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs);
-		xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs);
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, XE_REG(addr + i * 4).reg, mocs);
+		xe_mmio_write32(gt, XE_REG(addr + i * 4).reg, mocs);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index fcf6ae2c92ccb..abee41fa3cb9d 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -71,7 +71,7 @@ static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
 static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
 {
 	for (int i = 0; i < n_entries; i++)
-		xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]);
+		xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_INDEX(i)), table[i]);
 }
 
 void xe_pat_init(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 4e689cd4b23b8..837ff7c71280e 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -7,7 +7,7 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define PCODE_MAILBOX			_MMIO(0x138124)
+#define PCODE_MAILBOX			XE_REG(0x138124)
 #define   PCODE_READY			REG_BIT(31)
 #define   PCODE_MB_PARAM2		REG_GENMASK(23, 16)
 #define   PCODE_MB_PARAM1		REG_GENMASK(15, 8)
@@ -22,8 +22,8 @@
 #define     PCODE_GT_RATIO_OUT_OF_RANGE	0x10
 #define     PCODE_REJECTED		0x11
 
-#define PCODE_DATA0			_MMIO(0x138128)
-#define PCODE_DATA1			_MMIO(0x13812C)
+#define PCODE_DATA0			XE_REG(0x138128)
+#define PCODE_DATA1			XE_REG(0x13812C)
 
 /* Min Freq QOS Table */
 #define   PCODE_WRITE_MIN_FREQ_TABLE	0x8
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index e38397fc771a1..b7bbba929170b 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -151,7 +151,7 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg,
 		val = (entry->clr_bits ?: entry->set_bits) << 16;
 	else if (entry->clr_bits + 1)
 		val = (entry->reg_type == XE_RTP_REG_MCR ?
-		       xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) :
+		       xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(reg)) :
 		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
 	else
 		val = 0;
@@ -166,7 +166,7 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg,
 	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val);
 
 	if (entry->reg_type == XE_RTP_REG_MCR)
-		xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val);
+		xe_gt_mcr_multicast_write(gt, XE_REG_MCR(reg), val);
 	else
 		xe_mmio_write32(gt, reg, val);
 }
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 5a26657069121..310d5dfe30d57 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -11,10 +11,10 @@
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#undef _MMIO
-#undef MCR_REG
-#define _MMIO(x)	_XE_RTP_REG(x)
-#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+#undef XE_REG
+#undef XE_REG_MCR
+#define XE_REG(x, ...)		_XE_RTP_REG(x)
+#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
 
 static bool match_not_render(const struct xe_gt *gt,
 			     const struct xe_hw_engine *hwe)
@@ -45,10 +45,10 @@ static const struct xe_rtp_entry register_whitelist[] = {
 	},
 	{ XE_RTP_NAME("16014440446"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
-	  XE_RTP_ACTIONS(WHITELIST(_MMIO(0x4400),
+	  XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400),
 				   RING_FORCE_TO_NONPRIV_DENY |
 				   RING_FORCE_TO_NONPRIV_RANGE_64),
-			 WHITELIST(_MMIO(0x4500),
+			 WHITELIST(XE_REG(0x4500),
 				   RING_FORCE_TO_NONPRIV_DENY |
 				   RING_FORCE_TO_NONPRIV_RANGE_64))
 	},
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index a3be7c77753a1..c0c587a739808 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -197,7 +197,7 @@ struct xe_reg_sr;
  * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all
  *                    the bits
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
+ * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Value to set
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -213,7 +213,7 @@ struct xe_reg_sr;
 /**
  * XE_RTP_ACTION_SET - Set bits from @val_ in the register.
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
+ * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Bits to set in the register
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -232,7 +232,7 @@ struct xe_reg_sr;
 /**
  * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register.
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
+ * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Bits to clear in the register
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -251,7 +251,7 @@ struct xe_reg_sr;
 /**
  * XE_RTP_ACTION_FIELD_SET: Set a bit range
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
+ * @reg_type_: Register type - automatically expanded by XE_REG
  * @mask_bits_: Mask of bits to be changed in the register, forming a field
  * @val_: Value to set in the field denoted by @mask_bits_
  * @...: Additional fields to override in the struct xe_rtp_action entry
@@ -274,7 +274,7 @@ struct xe_reg_sr;
 /**
  * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO
+ * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Whitelist-specific flags to set
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 43912312cfba7..f6eefa951175f 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -12,10 +12,10 @@
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#undef _MMIO
-#undef MCR_REG
-#define _MMIO(x)	_XE_RTP_REG(x)
-#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+#undef XE_REG
+#undef XE_REG_MCR
+#define XE_REG(x, ...)		_XE_RTP_REG(x)
+#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
 
 static const struct xe_rtp_entry gt_tunings[] = {
 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 7a9bf588301e9..ed3fa51ccd246 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -87,10 +87,10 @@
  *    a more declarative approach rather than procedural.
  */
 
-#undef _MMIO
-#undef MCR_REG
-#define _MMIO(x)	_XE_RTP_REG(x)
-#define MCR_REG(x)	_XE_RTP_MCR_REG(x)
+#undef XE_REG
+#undef XE_REG_MCR
+#define XE_REG(x, ...)		_XE_RTP_REG(x)
+#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
 
 __diag_push();
 __diag_ignore_all("-Woverride-init", "Allow field overrides in table");
-- 
GitLab


From ca2acce76d81fda9520b8b797119deddbe660968 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:54 -0700
Subject: [PATCH 1512/2340] drm/xe: Annotate masked registers used by RTP

Go over all registers used in xe_rtp tables and mark the registers as
masked if they were passed a XE_RTP_ACTION_FLAG(MASKED_REG) flag.
This will allow the flag to be removed in future when xe_rtp starts
using the real xe_reg_t type.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  2 +-
 drivers/gpu/drm/xe/regs/xe_gt_regs.h     | 48 ++++++++++++------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 80b66844a8ec4..f6b3b99a562a6 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -21,7 +21,7 @@
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 
-#define RING_PSMI_CTL(base)			XE_REG((base) + 0x50)
+#define RING_PSMI_CTL(base)			XE_REG((base) + 0x50, XE_REG_OPTION_MASKED)
 #define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
 #define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5648305a8f5a4..8dd3bf2f63772 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -45,19 +45,19 @@
 #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
 #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
 
-#define FF_SLICE_CS_CHICKEN1			XE_REG(0x20e0)
+#define FF_SLICE_CS_CHICKEN1			XE_REG(0x20e0, XE_REG_OPTION_MASKED)
 #define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
 
-#define FF_SLICE_CS_CHICKEN2			XE_REG(0x20e4)
+#define FF_SLICE_CS_CHICKEN2			XE_REG(0x20e4, XE_REG_OPTION_MASKED)
 #define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
 
-#define CS_DEBUG_MODE1				XE_REG(0x20ec)
+#define CS_DEBUG_MODE1				XE_REG(0x20ec, XE_REG_OPTION_MASKED)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
 #define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
 
 #define PS_INVOCATION_COUNT			XE_REG(0x2348)
 
-#define CS_CHICKEN1				XE_REG(0x2580)
+#define CS_CHICKEN1				XE_REG(0x2580, XE_REG_OPTION_MASKED)
 #define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
 #define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
 #define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
@@ -77,13 +77,13 @@
 #define XEHP_TILE0_ADDR_RANGE			XE_REG_MCR(0x4900)
 #define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
 
-#define CHICKEN_RASTER_1			XE_REG_MCR(0x6204)
+#define CHICKEN_RASTER_1			XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED)
 #define   DIS_SF_ROUND_NEAREST_EVEN		REG_BIT(8)
 
-#define CHICKEN_RASTER_2			XE_REG_MCR(0x6208)
+#define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
-#define VFLSKPD					XE_REG_MCR(0x62a8)
+#define VFLSKPD					XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
 #define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
 
@@ -94,35 +94,35 @@
 #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
-#define CACHE_MODE_1				XE_REG(0x7004)
+#define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
 #define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
 
-#define XEHP_PSS_MODE2				XE_REG_MCR(0x703c)
+#define XEHP_PSS_MODE2				XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
-#define HIZ_CHICKEN					XE_REG(0x7018)
+#define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
 #define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
 
 #define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
 
-#define COMMON_SLICE_CHICKEN4			XE_REG(0x7300)
+#define COMMON_SLICE_CHICKEN4			XE_REG(0x7300, XE_REG_OPTION_MASKED)
 #define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
 
-#define COMMON_SLICE_CHICKEN3				XE_REG(0x7304)
-#define XEHP_COMMON_SLICE_CHICKEN3			XE_REG_MCR(0x7304)
+#define COMMON_SLICE_CHICKEN3				XE_REG(0x7304, XE_REG_OPTION_MASKED)
+#define XEHP_COMMON_SLICE_CHICKEN3			XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
 #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE		REG_BIT(12)
 #define   BLEND_EMB_FIX_DISABLE_IN_RCC			REG_BIT(11)
 #define   DISABLE_CPS_AWARE_COLOR_PIPE			REG_BIT(9)
 
-#define XEHP_SLICE_COMMON_ECO_CHICKEN1		XE_REG_MCR(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1		XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
 
-#define VF_PREEMPTION				XE_REG(0x83a4)
+#define VF_PREEMPTION				XE_REG(0x83a4, XE_REG_OPTION_MASKED)
 #define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
 
-#define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4)
+#define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4, XE_REG_OPTION_MASKED)
 #define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
 
 #define XEHP_SQCM				XE_REG_MCR(0x8724)
@@ -244,7 +244,7 @@
 #define FORCEWAKE_MEDIA_VDBOX(n)		XE_REG(0xa540 + (n) * 4)
 #define FORCEWAKE_MEDIA_VEBOX(n)		XE_REG(0xa560 + (n) * 4)
 
-#define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c)
+#define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED)
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
 
 #define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
@@ -272,20 +272,20 @@
 #define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
 
-#define SAMPLER_MODE				XE_REG_MCR(0xe18c)
+#define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
 #define   SAMPLER_ENABLE_HEADLESS_MSG		REG_BIT(5)
 
-#define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194)
+#define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
 
-#define CACHE_MODE_SS				XE_REG_MCR(0xe420)
+#define CACHE_MODE_SS				XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
 #define   ENABLE_EU_COUNT_FOR_TDL_FLUSH		REG_BIT(10)
 #define   DISABLE_ECC				REG_BIT(5)
 #define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
 
-#define ROW_CHICKEN4				XE_REG_MCR(0xe48c)
+#define ROW_CHICKEN4				XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED)
 #define   DISABLE_GRF_CLEAR			REG_BIT(13)
 #define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
 #define   DISABLE_TDL_PUSH			REG_BIT(9)
@@ -294,18 +294,18 @@
 #define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
-#define ROW_CHICKEN				XE_REG_MCR(0xe4f0)
+#define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
 
-#define ROW_CHICKEN2				XE_REG_MCR(0xe4f4)
+#define ROW_CHICKEN2				XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
 #define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
 #define   DISABLE_EARLY_READ			REG_BIT(14)
 #define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
 #define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
 #define   DISABLE_DOP_GATING			REG_BIT(0)
 
-#define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0)
+#define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
 
 #define RT_CTRL					XE_REG_MCR(0xe530)
-- 
GitLab


From 07fbd1f85df18a9a33556de76499fd3693639a7d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:55 -0700
Subject: [PATCH 1513/2340] drm/xe: Plumb xe_reg into WAs, rtp, etc

Now that struct xe_reg and struct xe_reg_mcr are types that can be used
by xe, convert more of the driver to use them. Some notes about the
conversions:

	- The RTP tables don't need the MASKED flags anymore in the
	  actions as that information now comes from the register
	  definition

	- There is no need for the _XE_RTP_REG/_XE_RTP_REG_MCR macros
	  and the register types on RTP infra: that comes from the
	  register definitions.

	- When declaring the RTP entries, there is no need anymore to
	  undef XE_REG and friends: the RTP macros deal with removing
	  the cast where needed due to not being able to use a compound
	  statement for initialization in the tables

	- The index in the reg-sr xarray is the register offset only.
	  Otherwise we wouldn't catch mistakes about adding both a
	  MCR-style and normal-style registers. For that, the register
	  is now also part of the entry, so the options can be compared
	  to check for compatible entries.

In order to be able to accomplish this, some improvements are needed on
the RTP macros. Change its implementation to concentrate on "pasting a prefix
to each argument" rather than the more general "call any macro for each
argument". Hopefully this will avoid trying to extend this infra and
making it more complex. With the use of tuples for building the
arguments, it's not possible to pass additional register fields and
using xe_reg in the RTP tables.

xe_mmio_* still need to be converted, from u32 to xe_reg, but that is
left for another change.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-10-lucas.demarchi@intel.com
Link: https://lore.kernel.org/r/20230427223256.1432787-6-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  41 +++---
 drivers/gpu/drm/xe/xe_guc_ads.c        |   2 +-
 drivers/gpu/drm/xe/xe_reg_sr.c         |  51 ++++----
 drivers/gpu/drm/xe/xe_reg_sr.h         |   3 +-
 drivers/gpu/drm/xe/xe_reg_sr_types.h   |  10 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c  |   4 +-
 drivers/gpu/drm/xe/xe_rtp.c            |   7 +-
 drivers/gpu/drm/xe/xe_rtp.h            | 166 ++++++++++++++-----------
 drivers/gpu/drm/xe/xe_rtp_types.h      |  22 ++--
 drivers/gpu/drm/xe/xe_tuning.c         |   7 +-
 drivers/gpu/drm/xe/xe_wa.c             | 134 +++++++-------------
 11 files changed, 206 insertions(+), 241 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 51d215f08113d..ad2fe8a39a78b 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -18,25 +18,21 @@
 #include "xe_reg_sr.h"
 #include "xe_rtp.h"
 
-#undef XE_REG
-#undef XE_REG_MCR
-#define XE_REG(x, ...)		_XE_RTP_REG(x)
-#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
-
 #define REGULAR_REG1	XE_REG(1)
 #define REGULAR_REG2	XE_REG(2)
 #define REGULAR_REG3	XE_REG(3)
 #define MCR_REG1	XE_REG_MCR(1)
 #define MCR_REG2	XE_REG_MCR(2)
 #define MCR_REG3	XE_REG_MCR(3)
+#define MASKED_REG1	XE_REG(1, XE_REG_OPTION_MASKED)
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
 
 struct rtp_test_case {
 	const char *name;
-	struct {
-		u32 offset;
-		u32 type;
-	} expected_reg;
-        u32 expected_set_bits;
+	struct xe_reg expected_reg;
+	u32 expected_set_bits;
 	u32 expected_clr_bits;
 	unsigned long expected_count;
 	unsigned int expected_sr_errors;
@@ -56,7 +52,7 @@ static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
 static const struct rtp_test_case cases[] = {
 	{
 		.name = "coalesce-same-reg",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0) | REG_BIT(1),
 		.expected_clr_bits = REG_BIT(0) | REG_BIT(1),
 		.expected_count = 1,
@@ -75,7 +71,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "no-match-no-add",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
@@ -94,7 +90,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "no-match-no-add-multiple-rules",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
@@ -113,7 +109,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "two-regs-two-entries",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 2,
@@ -132,7 +128,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "clr-one-set-other",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(1) | REG_BIT(0),
 		.expected_count = 1,
@@ -153,7 +149,7 @@ static const struct rtp_test_case cases[] = {
 #define TEMP_MASK	REG_GENMASK(10, 8)
 #define TEMP_FIELD	REG_FIELD_PREP(TEMP_MASK, 2)
 		.name = "set-field",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = TEMP_FIELD,
 		.expected_clr_bits = TEMP_MASK,
 		.expected_count = 1,
@@ -171,7 +167,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "conflict-duplicate",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
@@ -191,7 +187,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "conflict-not-disjoint",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
@@ -211,7 +207,7 @@ static const struct rtp_test_case cases[] = {
 	},
 	{
 		.name = "conflict-reg-type",
-		.expected_reg = { REGULAR_REG1 },
+		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
@@ -229,8 +225,7 @@ static const struct rtp_test_case cases[] = {
 			/* drop: regular vs masked */
 			{ XE_RTP_NAME("basic-3"),
 			  XE_RTP_RULES(FUNC(match_yes)),
-			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0),
-					     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			  XE_RTP_ACTIONS(SET(MASKED_REG1, REG_BIT(0)))
 			},
 			{}
 		},
@@ -249,7 +244,7 @@ static void xe_rtp_process_tests(struct kunit *test)
 	xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL);
 
 	xa_for_each(&reg_sr->xa, idx, sre) {
-		if (idx == param->expected_reg.offset)
+		if (idx == param->expected_reg.reg)
 			sr_entry = sre;
 
 		count++;
@@ -258,7 +253,7 @@ static void xe_rtp_process_tests(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, count, param->expected_count);
 	KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits);
 	KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits);
-	KUNIT_EXPECT_EQ(test, sr_entry->reg_type, param->expected_reg.type);
+	KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw);
 	KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 6a723bda2aa93..676137dcb5106 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -461,7 +461,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
 
 	xa_for_each(&hwe->reg_sr.xa, idx, entry) {
-		u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0;
+		u32 flags = entry->reg.masked ? GUC_REGSET_MASKED : 0;
 
 		guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++);
 	}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index b7bbba929170b..d129e6d7cb1ff 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -75,10 +75,7 @@ static bool compatible_entries(const struct xe_reg_sr_entry *e1,
 	    e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits)
 		return false;
 
-	if (e1->masked_reg != e2->masked_reg)
-		return false;
-
-	if (e1->reg_type != e2->reg_type)
+	if (e1->reg.raw != e2->reg.raw)
 		return false;
 
 	return true;
@@ -91,10 +88,10 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
 #endif
 }
 
-int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+int xe_reg_sr_add(struct xe_reg_sr *sr,
 		  const struct xe_reg_sr_entry *e)
 {
-	unsigned long idx = reg;
+	unsigned long idx = e->reg.reg;
 	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
 	int ret;
 
@@ -125,18 +122,30 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
 	return 0;
 
 fail:
-	DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n",
+	DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
 		  idx, e->clr_bits, e->set_bits,
-		  str_yes_no(e->masked_reg), ret);
+		  str_yes_no(e->reg.masked),
+		  str_yes_no(e->reg.mcr),
+		  ret);
 	reg_sr_inc_error(sr);
 
 	return ret;
 }
 
-static void apply_one_mmio(struct xe_gt *gt, u32 reg,
-			   struct xe_reg_sr_entry *entry)
+/*
+ * Convert back from encoded value to type-safe, only to be used when reg.mcr
+ * is true
+ */
+static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
+{
+	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
+}
+
+static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg reg = entry->reg;
+	struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
 	u32 val;
 
 	/*
@@ -147,12 +156,12 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg,
 	 * When it's not masked, we have to read it from hardware, unless we are
 	 * supposed to set all bits.
 	 */
-	if (entry->masked_reg)
+	if (reg.masked)
 		val = (entry->clr_bits ?: entry->set_bits) << 16;
 	else if (entry->clr_bits + 1)
-		val = (entry->reg_type == XE_RTP_REG_MCR ?
-		       xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(reg)) :
-		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+		val = (reg.mcr ?
+		       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
+		       xe_mmio_read32(gt, reg.reg)) & (~entry->clr_bits);
 	else
 		val = 0;
 
@@ -163,12 +172,12 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg,
 	 */
 	val |= entry->set_bits;
 
-	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val);
+	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.reg, val);
 
-	if (entry->reg_type == XE_RTP_REG_MCR)
-		xe_gt_mcr_multicast_write(gt, XE_REG_MCR(reg), val);
+	if (entry->reg.mcr)
+		xe_gt_mcr_multicast_write(gt, reg_mcr, val);
 	else
-		xe_mmio_write32(gt, reg, val);
+		xe_mmio_write32(gt, reg.reg, val);
 }
 
 void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
@@ -188,7 +197,7 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
 		goto err_force_wake;
 
 	xa_for_each(&sr->xa, reg, entry)
-		apply_one_mmio(gt, reg, entry);
+		apply_one_mmio(gt, entry);
 
 	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
@@ -257,6 +266,6 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p)
 	xa_for_each(&sr->xa, reg, entry)
 		drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n",
 			   reg, entry->clr_bits, entry->set_bits,
-			   str_yes_no(entry->masked_reg),
-			   str_yes_no(entry->reg_type == XE_RTP_REG_MCR));
+			   str_yes_no(entry->reg.masked),
+			   str_yes_no(entry->reg.mcr));
 }
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index 3af369089faab..0bfe66ea29bfc 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -19,8 +19,7 @@ struct drm_printer;
 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
 void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p);
 
-int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
-		  const struct xe_reg_sr_entry *e);
+int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e);
 void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
 void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 			       struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
index 91469784fd907..ad48a52b824a1 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr_types.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -9,18 +9,14 @@
 #include <linux/types.h>
 #include <linux/xarray.h>
 
+#include "regs/xe_reg_defs.h"
+
 struct xe_reg_sr_entry {
+	struct xe_reg	reg;
 	u32		clr_bits;
 	u32		set_bits;
 	/* Mask for bits to consider when reading value back */
 	u32		read_mask;
-	/*
-	 * "Masked registers" are marked in spec as register with the upper 16
-	 * bits as a mask for the bits that is being updated on the lower 16
-	 * bits when writing to it.
-	 */
-	u8		masked_reg;
-	u8		reg_type;
 };
 
 struct xe_reg_sr {
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 310d5dfe30d57..7a2bb60ebd850 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -11,10 +11,8 @@
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#undef XE_REG
 #undef XE_REG_MCR
-#define XE_REG(x, ...)		_XE_RTP_REG(x)
-#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
 
 static bool match_not_render(const struct xe_gt *gt,
 			     const struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 20acd43cb60be..f2a0e8eb4936b 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -94,16 +94,15 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 			     u32 mmio_base,
 			     struct xe_reg_sr *sr)
 {
-	u32 reg = action->reg + mmio_base;
 	struct xe_reg_sr_entry sr_entry = {
+		.reg = action->reg,
 		.clr_bits = action->clr_bits,
 		.set_bits = action->set_bits,
 		.read_mask = action->read_mask,
-		.masked_reg = action->flags & XE_RTP_ACTION_FLAG_MASKED_REG,
-		.reg_type = action->reg_type,
 	};
 
-	xe_reg_sr_add(sr, reg, &sr_entry);
+	sr_entry.reg.reg += mmio_base;
+	xe_reg_sr_add(sr, &sr_entry);
 }
 
 static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index c0c587a739808..afbf5a2674f4f 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -22,43 +22,77 @@ struct xe_reg_sr;
 /*
  * Helper macros - not to be used outside this header.
  */
-/* This counts to 12. Any more, it will return 13th argument. */
-#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
-#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define _XE_ESC(...) __VA_ARGS__
+#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,))
+#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_
 
-#define __CONCAT(a, b) a ## b
-#define CONCATENATE(a, b) __CONCAT(a, b)
+#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
+#define __XE_FIRST(x_,...) x_
+#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
+#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__)
 
-#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x)
-#define __CALL_FOR_EACH_2(MACRO_, x, ...)					\
-	MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__)
-#define __CALL_FOR_EACH_3(MACRO_, x, ...)					\
-	MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__)
-#define __CALL_FOR_EACH_4(MACRO_, x, ...)					\
-	MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__)
+#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
 
-#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...)					\
-	CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__)
-#define CALL_FOR_EACH(MACRO_, x, ...)						\
-	_CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__)
+#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
+#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
 
-#define _XE_RTP_REG(x_)	(x_), XE_RTP_REG_REGULAR
-#define _XE_RTP_MCR_REG(x_) (x_), XE_RTP_REG_MCR
+#define __XE_RTP_PASTE_SEP_COMMA		,
+#define __XE_RTP_PASTE_SEP_BITWISE_OR		|
 
 /*
- * Helper macros for concatenating prefix - do not use them directly outside
- * this header
+ * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple
+ * @args, with the end result separated by @sep_. @sep must be one of the
+ * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such
+ * prefix.
+ *
+ * Examples:
+ *
+ * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
+ *
+ * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
+ *    expands to:
+ *
+ *	XE_RTP_TEST2_FOO
+ *
+ * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
+ *
+ * 4) #define __XE_RTP_PASTE_SEP_MY_SEP	BANANA
+ *    XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
  */
-#define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ENTRY_FLAG_, x) |
-#define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ACTION_FLAG_, x) |
-#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
-#define __ADD_XE_RTP_ACTION_PREFIX(x) CONCATENATE(XE_RTP_ACTION_, x) ,
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+
+
+/*
+ * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
+ *
+ * Example:
+ *
+ *	#define foo(a_)	((struct foo){ .a = a_ })
+ *	XE_RTP_DROP_CAST(foo(10))
+ *	expands to:
+ *
+ *	{ .a = 10 }
+ */
+#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+
 
 /*
  * Macros to encode rules to match against platform, IP version, stepping, etc.
  * Shouldn't be used directly - see XE_RTP_RULES()
  */
-
 #define _XE_RTP_RULE_PLATFORM(plat__)						\
 	{ .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ }
 
@@ -197,7 +231,6 @@ struct xe_reg_sr;
  * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all
  *                    the bits
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Value to set
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -205,15 +238,14 @@ struct xe_reg_sr;
  *
  *	REGNAME = VALUE
  */
-#define XE_RTP_ACTION_WR(reg_, reg_type_, val_, ...)				\
-	{ .reg = (reg_), .reg_type = (reg_type_),				\
+#define XE_RTP_ACTION_WR(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
 	  .clr_bits = ~0u, .set_bits = (val_),					\
 	  .read_mask = (~0u), ##__VA_ARGS__ }
 
 /**
  * XE_RTP_ACTION_SET - Set bits from @val_ in the register.
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Bits to set in the register
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -224,15 +256,14 @@ struct xe_reg_sr;
  *	REGNAME[2] = 1
  *	REGNAME[5] = 1
  */
-#define XE_RTP_ACTION_SET(reg_, reg_type_, val_, ...)				\
-	{ .reg = (reg_), .reg_type = (reg_type_),				\
-	  .clr_bits = (val_), .set_bits = (val_),				\
-	  .read_mask = (val_), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_SET(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = val_, .set_bits = val_,					\
+	  .read_mask = val_, ##__VA_ARGS__ }
 
 /**
  * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register.
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Bits to clear in the register
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
@@ -243,15 +274,14 @@ struct xe_reg_sr;
  *	REGNAME[2] = 0
  *	REGNAME[5] = 0
  */
-#define XE_RTP_ACTION_CLR(reg_, reg_type_, val_, ...)				\
-	{ .reg = (reg_), .reg_type = (reg_type_),				\
-	  .clr_bits = (val_), .set_bits = 0,					\
-	  .read_mask = (val_), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_CLR(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = val_, .set_bits = 0,					\
+	  .read_mask = val_, ##__VA_ARGS__ }
 
 /**
  * XE_RTP_ACTION_FIELD_SET: Set a bit range
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by XE_REG
  * @mask_bits_: Mask of bits to be changed in the register, forming a field
  * @val_: Value to set in the field denoted by @mask_bits_
  * @...: Additional fields to override in the struct xe_rtp_action entry
@@ -261,29 +291,29 @@ struct xe_reg_sr;
  *
  *	REGNAME[<end>:<start>] = VALUE
  */
-#define XE_RTP_ACTION_FIELD_SET(reg_, reg_type_, mask_bits_, val_, ...)		\
-	{ .reg = (reg_), .reg_type = (reg_type_),				\
-	  .clr_bits = (mask_bits_), .set_bits = (val_),				\
-	  .read_mask = (mask_bits_), ##__VA_ARGS__ }
+#define XE_RTP_ACTION_FIELD_SET(reg_, mask_bits_, val_, ...)			\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = mask_bits_, .set_bits = val_,				\
+	  .read_mask = mask_bits_, ##__VA_ARGS__ }
 
-#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, reg_type_, mask_bits_, val_, ...)	\
-	{ .reg = (reg_), .reg_type = (reg_type_),				\
+#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)	\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
 	  .clr_bits = (mask_bits_), .set_bits = (val_),				\
 	  .read_mask = 0, ##__VA_ARGS__ }
 
 /**
  * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist
  * @reg_: Register
- * @reg_type_: Register type - automatically expanded by XE_REG
  * @val_: Whitelist-specific flags to set
  * @...: Additional fields to override in the struct xe_rtp_action entry
  *
  * Add a register to the whitelist, allowing userspace to modify the ster with
  * regular user privileges.
  */
-#define XE_RTP_ACTION_WHITELIST(reg_, reg_type_, val_, ...)			\
+#define XE_RTP_ACTION_WHITELIST(reg_, val_, ...)				\
 	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
-	{ .reg = (reg_), .reg_type = (reg_type_), .set_bits = (val_),		\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .set_bits = val_,							\
 	  .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,				\
 	  ##__VA_ARGS__ }
 
@@ -297,11 +327,10 @@ struct xe_reg_sr;
 
 /**
  * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry
- * @f1_: Last part of a ``XE_RTP_ENTRY_FLAG_*``
- * @...: Additional flags, defined like @f1_
+ * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix
  *
- * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to @f1_ so it can
- * be easily used to define struct xe_rtp_action entries. Example:
+ * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags
+ * when defining struct xe_rtp_entry entries. Example:
  *
  * .. code-block:: c
  *
@@ -315,16 +344,15 @@ struct xe_reg_sr;
  *		...
  *	};
  */
-#define XE_RTP_ENTRY_FLAG(f1_, ...)						\
-	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_ENTRY_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+#define XE_RTP_ENTRY_FLAG(...)							\
+	.flags = (XE_RTP_PASTE_FOREACH(ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__)))
 
 /**
  * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action
- * @f1_: Last part of a ``XE_RTP_ENTRY_*``
- * @...: Additional flags, defined like @f1_
+ * @...: Action flags, without the ``XE_RTP_ACTION_FLAG_`` prefix
  *
- * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to @f1_ so it
- * can be easily used to define struct xe_rtp_action entries. Example:
+ * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to the flags
+ * when defining struct xe_rtp_action entries. Example:
  *
  * .. code-block:: c
  *
@@ -338,13 +366,12 @@ struct xe_reg_sr;
  *		...
  *	};
  */
-#define XE_RTP_ACTION_FLAG(f1_, ...)						\
-	.flags = (CALL_FOR_EACH(__ADD_XE_RTP_ACTION_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+#define XE_RTP_ACTION_FLAG(...)							\
+	.flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__)))
 
 /**
  * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
- * @r1: Last part of XE_RTP_MATCH_*
- * @...: Additional rules, defined like @r1
+ * @...: Rules
  *
  * At least one rule is needed and up to 4 are supported. Multiple rules are
  * AND'ed together, i.e. all the rules must evaluate to true for the entry to
@@ -361,16 +388,15 @@ struct xe_reg_sr;
  *		...
  *	};
  */
-#define XE_RTP_RULES(r1, ...)							\
-	.n_rules = COUNT_ARGS(r1, ##__VA_ARGS__),				\
+#define XE_RTP_RULES(...)							\
+	.n_rules = _XE_COUNT_ARGS(__VA_ARGS__),					\
 	.rules = (const struct xe_rtp_rule[]) {					\
-		CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__)	\
+		XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__))	\
 	}
 
 /**
  * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry
- * @a1: Action to take. Last part of XE_RTP_ACTION_*
- * @...: Additional rules, defined like @r1
+ * @...: Actions to be taken
  *
  * At least one rule is needed and up to 4 are supported. Multiple rules are
  * AND'ed together, i.e. all the rules must evaluate to true for the entry to
@@ -388,10 +414,10 @@ struct xe_reg_sr;
  *		...
  *	};
  */
-#define XE_RTP_ACTIONS(a1, ...)							\
-	.n_actions = COUNT_ARGS(a1, ##__VA_ARGS__),				\
+#define XE_RTP_ACTIONS(...)							\
+	.n_actions = _XE_COUNT_ARGS(__VA_ARGS__),				\
 	.actions = (const struct xe_rtp_action[]) {				\
-		CALL_FOR_EACH(__ADD_XE_RTP_ACTION_PREFIX, a1, ##__VA_ARGS__)	\
+		XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__))	\
 	}
 
 void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index e87f1b280d968..12df8a9e9c45f 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -8,14 +8,11 @@
 
 #include <linux/types.h>
 
+#include "regs/xe_reg_defs.h"
+
 struct xe_hw_engine;
 struct xe_gt;
 
-enum {
-	XE_RTP_REG_REGULAR,
-	XE_RTP_REG_MCR,
-};
-
 /**
  * struct xe_rtp_action - action to take for any matching rule
  *
@@ -24,20 +21,17 @@ enum {
  */
 struct xe_rtp_action {
 	/** @reg: Register */
-	u32		reg;
+	struct xe_reg		reg;
 	/** @clr_bits: bits to clear when updating register */
-	u32		clr_bits;
+	u32			clr_bits;
 	/** @set_bits: bits to set when updating register */
-	u32		set_bits;
+	u32			set_bits;
 #define XE_RTP_NOCHECK		.read_mask = 0
 	/** @read_mask: mask for bits to consider when reading value back */
-	u32		read_mask;
-#define XE_RTP_ACTION_FLAG_MASKED_REG		BIT(0)
-#define XE_RTP_ACTION_FLAG_ENGINE_BASE		BIT(1)
+	u32			read_mask;
+#define XE_RTP_ACTION_FLAG_ENGINE_BASE		BIT(0)
 	/** @flags: flags to apply on rule evaluation or action */
-	u8		flags;
-	/** @reg_type: register type, see ``XE_RTP_REG_*`` */
-	u8		reg_type;
+	u8			flags;
 };
 
 enum {
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index f6eefa951175f..5fc6a408429b2 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -12,10 +12,8 @@
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
 
-#undef XE_REG
 #undef XE_REG_MCR
-#define XE_REG(x, ...)		_XE_RTP_REG(x)
-#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
 
 static const struct xe_rtp_entry gt_tunings[] = {
 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
@@ -54,8 +52,7 @@ static const struct xe_rtp_entry lrc_tunings[] = {
 	},
 	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
 	},
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index ed3fa51ccd246..b0bb2f4438f43 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -87,10 +87,8 @@
  *    a more declarative approach rather than procedural.
  */
 
-#undef XE_REG
 #undef XE_REG_MCR
-#define XE_REG(x, ...)		_XE_RTP_REG(x)
-#define XE_REG_MCR(x, ...)	_XE_RTP_MCR_REG(x)
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
 
 __diag_push();
 __diag_ignore_all("-Woverride-init", "Allow field overrides in table");
@@ -232,8 +230,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	},
 	{ XE_RTP_NAME("16016694945"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
-	  XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC))
 	},
 	{}
 };
@@ -248,36 +245,30 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS))
 	},
 	{ XE_RTP_NAME("14010229206, 1409085225"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
 		       ENGINE_CLASS(RENDER),
 		       IS_INTEGRATED),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
 	},
 	{ XE_RTP_NAME("1606931601"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ))
 	},
 	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE))
 	},
 	{ XE_RTP_NAME("1406941453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL))
 	},
 	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1,
-			     FFSC_PERCTX_PREEMPT_CTRL,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     FFSC_PERCTX_PREEMPT_CTRL))
 	},
 
 	/* TGL */
@@ -286,8 +277,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
 			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-			     RC_SEMA_IDLE_MSG_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     RC_SEMA_IDLE_MSG_DISABLE))
 	},
 
 	/* RKL */
@@ -296,8 +286,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
 			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-			     RC_SEMA_IDLE_MSG_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     RC_SEMA_IDLE_MSG_DISABLE))
 	},
 
 	/* ADL-P */
@@ -306,8 +295,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
 			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
-			     RC_SEMA_IDLE_MSG_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     RC_SEMA_IDLE_MSG_DISABLE))
 	},
 
 	/* DG2 */
@@ -324,8 +312,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("18017747507"),
 	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN,
-			     POLYGON_TRIFAN_LINELOOP_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     POLYGON_TRIFAN_LINELOOP_DISABLE))
 	},
 	{ XE_RTP_NAME("22012826095, 22013059131"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0),
@@ -366,15 +353,13 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
 	},
 	{ XE_RTP_NAME("16015675438"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2,
-			     PERF_FIX_BALANCING_CFE_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     PERF_FIX_BALANCING_CFE_DISABLE))
 	},
 	{ XE_RTP_NAME("16011620976, 22015475538"),
 	  XE_RTP_RULES(PLATFORM(DG2),
@@ -385,7 +370,6 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
-			     XE_RTP_ACTION_FLAG(MASKED_REG),
 			     /*
 			      * Register can't be read back for verification on
 			      * DG2 due to Wa_14012342262
@@ -396,7 +380,6 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
-			     XE_RTP_ACTION_FLAG(MASKED_REG),
 			     /*
 			      * Register can't be read back for verification on
 			      * DG2 due to Wa_14012342262
@@ -405,55 +388,46 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("1509727124"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB))
 	},
 	{ XE_RTP_NAME("22012856258"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
 	},
 	{ XE_RTP_NAME("14013392000"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
 	},
 	{ XE_RTP_NAME("14012419201"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
 	},
 	{ XE_RTP_NAME("14012419201"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
 	},
 	{ XE_RTP_NAME("1308578152"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER),
 		       FUNC(xe_rtp_match_first_gslice_fused_off)),
 	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1,
-			     REPLAY_MODE_GRANULARITY,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     REPLAY_MODE_GRANULARITY))
 	},
 	{ XE_RTP_NAME("22010960976, 14013347512"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
-			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK))
 	},
 	{ XE_RTP_NAME("1608949956, 14010198302"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN,
-			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
 	},
 	{ XE_RTP_NAME("22010430635"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_GRF_CLEAR,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DISABLE_GRF_CLEAR))
 	},
 	{ XE_RTP_NAME("14013202645"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER)),
@@ -466,21 +440,18 @@ static const struct xe_rtp_entry engine_was[] = {
 	{ XE_RTP_NAME("22012532006"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
 	},
 	{ XE_RTP_NAME("22012532006"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
 	},
 	{ XE_RTP_NAME("22014600077"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(B0, FOREVER),
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
 			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
-			     XE_RTP_ACTION_FLAG(MASKED_REG),
 			     /* 
 			      * Wa_14012342262 write-only reg, so skip
 			      * verification
@@ -491,7 +462,6 @@ static const struct xe_rtp_entry engine_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
 			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
-			     XE_RTP_ACTION_FLAG(MASKED_REG),
 			     /* 
 			      * Wa_14012342262 write-only reg, so skip
 			      * verification
@@ -507,18 +477,15 @@ static const struct xe_rtp_entry engine_was[] = {
 	},
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
 	},
 	{ XE_RTP_NAME("16015675438"),
 	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE))
 	},
 	{ XE_RTP_NAME("14014999345"),
 	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), STEP(B0, C0)),
-	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC))
 	},
 	{}
 };
@@ -527,25 +494,21 @@ static const struct xe_rtp_entry lrc_was[] = {
 	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3,
-			     DISABLE_CPS_AWARE_COLOR_PIPE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DISABLE_CPS_AWARE_COLOR_PIPE))
 	},
 	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1,
 				   PREEMPT_GPGPU_LEVEL_MASK,
-				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
-				   XE_RTP_ACTION_FLAG(MASKED_REG)))
+				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL))
 	},
 	{ XE_RTP_NAME("1806527549"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
-	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE))
 	},
 	{ XE_RTP_NAME("1606376872"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
-	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC))
 	},
 
 	/* DG1 */
@@ -553,65 +516,54 @@ static const struct xe_rtp_entry lrc_was[] = {
 	{ XE_RTP_NAME("1409044764"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
 	  XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3,
-			     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN))
 	},
 	{ XE_RTP_NAME("22010493298"),
 	  XE_RTP_RULES(PLATFORM(DG1)),
 	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN,
-			     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE))
 	},
 
 	/* DG2 */
 
 	{ XE_RTP_NAME("16011186671"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)),
-	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH,
-			     .flags = XE_RTP_ACTION_FLAG_MASKED_REG),
-			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE,
-			     .flags = XE_RTP_ACTION_FLAG_MASKED_REG))
+	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
+			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
 	},
 	{ XE_RTP_NAME("14010469329"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
 	},
 	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     DISABLE_CPS_AWARE_COLOR_PIPE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     DISABLE_CPS_AWARE_COLOR_PIPE))
 	},
 	{ XE_RTP_NAME("16013271637"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
-			     MSC_MSAA_REODER_BUF_BYPASS_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     MSC_MSAA_REODER_BUF_BYPASS_DISABLE))
 	},
 	{ XE_RTP_NAME("14014947963"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION,
 				   PREEMPTION_VERTEX_COUNT,
-				   0x4000,
-				   XE_RTP_ACTION_FLAG(MASKED_REG)))
+				   0x4000))
 	},
 	{ XE_RTP_NAME("18018764978"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2,
-			     SCOREBOARD_STALL_FLUSH_CONTROL,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+			     SCOREBOARD_STALL_FLUSH_CONTROL))
 	},
 	{ XE_RTP_NAME("15010599737"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN))
 	},
 	{ XE_RTP_NAME("18019271663"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE,
-			     XE_RTP_ACTION_FLAG(MASKED_REG)))
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
 	{}
 };
-- 
GitLab


From 9a56502fe1815f0032eea07ce3584acf17173ce1 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 15:32:56 -0700
Subject: [PATCH 1514/2340] drm/xe: Move helper macros to separate header

The macros to handle the RTP tables are very scary, but shouldn't be
used outside of the header adding the infra. Move it to a separate
header and make sure it's only included when it can be.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230427223256.1432787-11-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile         |  6 ++-
 drivers/gpu/drm/xe/xe_rtp.h         | 75 ++------------------------
 drivers/gpu/drm/xe/xe_rtp_helpers.h | 81 +++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+), 72 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_rtp_helpers.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 42459727e67ac..71c604ecff531 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -103,10 +103,12 @@ xe-y += xe_bb.o \
 
 obj-$(CONFIG_DRM_XE) += xe.o
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
-\
+
 # header test
+hdrtest_find_args := -not -path xe_rtp_helpers.h
+
 always-$(CONFIG_DRM_XE_WERROR) += \
-	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h'))
+	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args)))
 
 quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
       cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index afbf5a2674f4f..8a89ad45589a2 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -9,8 +9,13 @@
 #include <linux/types.h>
 #include <linux/xarray.h>
 
+#define _XE_RTP_INCLUDE_PRIVATE_HELPERS
+
+#include "xe_rtp_helpers.h"
 #include "xe_rtp_types.h"
 
+#undef _XE_RTP_INCLUDE_PRIVATE_HELPERS
+
 /*
  * Register table poke infrastructure
  */
@@ -19,76 +24,6 @@ struct xe_hw_engine;
 struct xe_gt;
 struct xe_reg_sr;
 
-/*
- * Helper macros - not to be used outside this header.
- */
-#define _XE_ESC(...) __VA_ARGS__
-#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,))
-#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_
-
-#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
-#define __XE_FIRST(x_,...) x_
-#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
-#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__)
-
-#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
-
-#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
-#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
-
-#define __XE_RTP_PASTE_SEP_COMMA		,
-#define __XE_RTP_PASTE_SEP_BITWISE_OR		|
-
-/*
- * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple
- * @args, with the end result separated by @sep_. @sep must be one of the
- * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such
- * prefix.
- *
- * Examples:
- *
- * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
- *    expands to:
- *
- *	XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
- *
- * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
- *    expands to:
- *
- *	XE_RTP_TEST2_FOO
- *
- * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR))
- *    expands to:
- *
- *	XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
- *
- * 4) #define __XE_RTP_PASTE_SEP_MY_SEP	BANANA
- *    XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR))
- *    expands to:
- *
- *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
- */
-#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
-#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
-#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
-
-
-/*
- * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
- *
- * Example:
- *
- *	#define foo(a_)	((struct foo){ .a = a_ })
- *	XE_RTP_DROP_CAST(foo(10))
- *	expands to:
- *
- *	{ .a = 10 }
- */
-#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
-
-
 /*
  * Macros to encode rules to match against platform, IP version, stepping, etc.
  * Shouldn't be used directly - see XE_RTP_RULES()
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
new file mode 100644
index 0000000000000..1beea434d52d2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_RTP_HELPERS_
+#define _XE_RTP_HELPERS_
+
+#ifndef _XE_RTP_INCLUDE_PRIVATE_HELPERS
+#error "This header is supposed to be included by xe_rtp.h only"
+#endif
+
+/*
+ * Helper macros - not to be used outside this header.
+ */
+#define _XE_ESC(...) __VA_ARGS__
+#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,))
+#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_
+
+#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
+#define __XE_FIRST(x_,...) x_
+#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
+#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__)
+
+#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
+
+#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
+#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
+
+#define __XE_RTP_PASTE_SEP_COMMA		,
+#define __XE_RTP_PASTE_SEP_BITWISE_OR		|
+
+/*
+ * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple
+ * @args, with the end result separated by @sep_. @sep must be one of the
+ * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such
+ * prefix.
+ *
+ * Examples:
+ *
+ * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
+ *
+ * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
+ *    expands to:
+ *
+ *	XE_RTP_TEST2_FOO
+ *
+ * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
+ *
+ * 4) #define __XE_RTP_PASTE_SEP_MY_SEP	BANANA
+ *    XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
+ */
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+
+/*
+ * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
+ *
+ * Example:
+ *
+ *	#define foo(a_)	((struct foo){ .a = a_ })
+ *	XE_RTP_DROP_CAST(foo(10))
+ *	expands to:
+ *
+ *	{ .a = 10 }
+ */
+#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+
+#endif
-- 
GitLab


From ad799e4ace0dd8b81ff698dc92d6f1419fc49d4f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 27 Apr 2023 11:44:09 -0700
Subject: [PATCH 1515/2340] drm/xe: Fix media detection for pre-GMD_ID
 platforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reading the GMD_ID register on platforms before that register
became available is not reliable. The assumption was that since the
register was not allocated, it would return 0. But on PVC for example it
returns garbage (or a very specific number), triggering the following
error:

	xe 0000:8c:00.0: [drm] *ERROR* Hardware reports unknown media version 1025.55

Fix it by stop relying on the value returned by that registers on
platforms before GMD_ID. Instead this relies on the graphics description
struct being already pre-set on the device: this can only ever be true
for platforms before the GMD_ID support. In that case, GMD_ID is skipped
and the hardcoded values are used.  This should also help on early
bring-up in case the GMD_ID returns something not expected and we need to
temporarily hardcode values. With this, PVC doesn't trigger the error
and goes straight to:

	xe 0000:8c:00.0: [drm:xe_display_info_init [xe]] No display IP, skipping
	xe 0000:8c:00.0: [drm:xe_pci_probe [xe]] XE_PVC  0bd5:002f dgfx:1 gfx:Xe_HPC (12.60) media:none (0.00) dma_m_s:52 tc:2
	xe 0000:8c:00.0: [drm:xe_pci_probe [xe]] Stepping = (G:C0, M:**, D:**, B:B3)

Fixes: 5822bba943ad ("drm/xe: Select graphics/media descriptors from GMD_ID")
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://lore.kernel.org/r/20230427184408.1340988-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 113 ++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c1f2f63548d3d..473ee8df2db2b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -38,7 +38,9 @@ struct xe_gt_desc {
 };
 
 struct xe_device_desc {
+	/* Should only ever be set for platforms without GMD_ID */
 	const struct xe_graphics_desc *graphics;
+	/* Should only ever be set for platforms without GMD_ID */
 	const struct xe_media_desc *media;
 
 	const char *platform_name;
@@ -402,6 +404,30 @@ static u32 peek_gmdid(struct xe_device *xe, u32 gmdid_offset)
 		REG_FIELD_GET(GMD_ID_RELEASE_MASK, ver);
 }
 
+/*
+ * Pre-GMD_ID platform: device descriptor already points to the appropriate
+ * graphics descriptor. Simply forward the description and calculate the version
+ * appropriately. "graphics" should be present in all such platforms, while
+ * media is optional.
+ */
+static void handle_pre_gmdid(struct xe_device *xe,
+			     const struct xe_device_desc *desc,
+			     const struct xe_graphics_desc **graphics,
+			     const struct xe_media_desc **media)
+{
+	*graphics = desc->graphics;
+	xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel;
+
+	*media = desc->media;
+	if (*media)
+		xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel;
+
+}
+
+/*
+ * GMD_ID platform: read IP version from hardware and select graphics descriptor
+ * based on the result.
+ */
 static void handle_gmdid(struct xe_device *xe,
 			 const struct xe_device_desc *desc,
 			 const struct xe_graphics_desc **graphics,
@@ -409,69 +435,35 @@ static void handle_gmdid(struct xe_device *xe,
 {
 	u32 ver;
 
-	if (desc->graphics) {
-		/*
-		 * Pre-GMD_ID platform; device descriptor already points to
-		 * the appropriate graphics descriptor.
-		 */
-		*graphics = desc->graphics;
-		xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel;
-	} else {
-		/*
-		 * GMD_ID platform; read IP version from hardware and select
-		 * graphics descriptor based on the result.
-		 */
-		ver = peek_gmdid(xe, GMD_ID.reg);
-		for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
-			if (ver == graphics_ip_map[i].ver) {
-				xe->info.graphics_verx100 = ver;
-				*graphics = graphics_ip_map[i].ip;
-
-				break;
-			}
-		}
+	ver = peek_gmdid(xe, GMD_ID.reg);
+	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+		if (ver == graphics_ip_map[i].ver) {
+			xe->info.graphics_verx100 = ver;
+			*graphics = graphics_ip_map[i].ip;
 
-		if (!xe->info.graphics_verx100) {
-			drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
-				ver / 100, ver % 100);
+			break;
 		}
 	}
 
-	if (desc->media) {
-		/*
-		 * Pre-GMD_ID platform; device descriptor already points to
-		 * the appropriate media descriptor.
-		 */
-		*media = desc->media;
-		xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel;
-	} else {
-		/*
-		 * GMD_ID platform; read IP version from hardware and select
-		 * media descriptor based on the result.
-		 *
-		 * desc->media can also be NULL for a pre-GMD_ID platform that
-		 * simply doesn't have media (e.g., PVC); in that case the
-		 * attempt to read GMD_ID will return 0 (since there's no
-		 * register at that location).
-		 */
-		ver = peek_gmdid(xe, GMD_ID.reg + 0x380000);
-		if (ver == 0)
-			return;
-
-		for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
-			if (ver == media_ip_map[i].ver) {
-				xe->info.media_verx100 = ver;
-				*media = media_ip_map[i].ip;
-
-				break;
-			}
-		}
+	if (!xe->info.graphics_verx100) {
+		drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
+			ver / 100, ver % 100);
+	}
+
+	ver = peek_gmdid(xe, GMD_ID.reg + 0x380000);
+	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+		if (ver == media_ip_map[i].ver) {
+			xe->info.media_verx100 = ver;
+			*media = media_ip_map[i].ip;
 
-		if (!xe->info.media_verx100) {
-			drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
-				ver / 100, ver % 100);
+			break;
 		}
 	}
+
+	if (!xe->info.media_verx100) {
+		drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
+			ver / 100, ver % 100);
+	}
 }
 
 
@@ -486,9 +478,14 @@ static int xe_info_init(struct xe_device *xe,
 
 	/*
 	 * If this platform supports GMD_ID, we'll detect the proper IP
-	 * descriptor to use from hardware registers.
+	 * descriptor to use from hardware registers. desc->graphics will only
+	 * ever be set at this point for platforms before GMD_ID. In that case
+	 * the IP descriptions and versions are simply derived from that.
 	 */
-	handle_gmdid(xe, desc, &graphics_desc, &media_desc);
+	if (desc->graphics)
+		handle_pre_gmdid(xe, desc, &graphics_desc, &media_desc);
+	else
+		handle_gmdid(xe, desc, &graphics_desc, &media_desc);
 
 	/*
 	 * If we couldn't detect the graphics IP, that's considered a fatal
-- 
GitLab


From 4c69e4b4c60a855e6726034e68d0f23029c19301 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Tue, 25 Apr 2023 12:26:24 -0700
Subject: [PATCH 1516/2340] drm/xe: Enable Raptorlake-P
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Raptorlake-P was tested and it is working as the same as Alderlake-P.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 473ee8df2db2b..13a5ce18ee05d 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -312,6 +312,7 @@ static const struct pci_device_id pciidlist[] = {
 	XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
 	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
 	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
 	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
 	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
 	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
-- 
GitLab


From 9bc252522dbb0e6c34e9e0e26a599fa28555d907 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Wed, 3 May 2023 14:49:22 -0300
Subject: [PATCH 1517/2340] drm/xe: Include only relevant header in xe_module.h

Things defined in <linux/init.h> are not really used by that header.
Replace that with <linux/types.h>, to have bool and u32 available.

Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230503174922.252111-1-gustavo.sousa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 86916c1763825..7169907c3365d 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -3,7 +3,7 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#include <linux/init.h>
+#include <linux/types.h>
 
 /* Module modprobe variables */
 extern bool enable_guc;
-- 
GitLab


From e3ec5e75911b04b5e9ce67907024d7c5d9a6cb99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Fri, 14 Apr 2023 15:08:33 -0700
Subject: [PATCH 1518/2340] drm/xe: Set default MOCS value for cs instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CS instructions that dont have a explicit MOCS field will use this
default MOCS value.

To do this, it was necessary to initialize part of the mocs earlier
and add new function that loads another array of rtp entries set
during run-time.

This is still missing to handle of mocs read for platforms with
HAS_L3_CCS_READ(aka PVC).

v2:
- move to xe_hw_engine.c
- remove CMD_CCTL auxiliary macros

v3:
- rebased

Bspec: 45826
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 12 ++++++++
 drivers/gpu/drm/xe/xe_gt.c               |  2 ++
 drivers/gpu/drm/xe/xe_hw_engine.c        | 35 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_mocs.c             | 11 ++++++--
 drivers/gpu/drm/xe/xe_mocs.h             |  1 +
 5 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index f6b3b99a562a6..717d560626cec 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -44,6 +44,18 @@
 #define RING_EIR(base)				XE_REG((base) + 0xb0)
 #define RING_EMR(base)				XE_REG((base) + 0xb4)
 #define RING_ESR(base)				XE_REG((base) + 0xb8)
+
+#define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
+/*
+ * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
+ * The lsb of each can be considered a separate enabling bit for encryption.
+ * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
+ * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
+ * 15:14 == Reserved => 31:30 are set to 0.
+ */
+#define   CMD_CCTL_WRITE_OVERRIDE_MASK		REG_GENMASK(13, 8)
+#define   CMD_CCTL_READ_OVERRIDE_MASK		REG_GENMASK(6, 1)
+
 #define RING_BBADDR(base)			XE_REG((base) + 0x140)
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
 #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0d4664e344dac..603bb3ae3e370 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -390,6 +390,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	/* Rerun MCR init as we now have hw engine list */
 	xe_gt_mcr_init(gt);
 
+	xe_mocs_init_early(gt);
+
 	err = xe_hw_engines_init_early(gt);
 	if (err)
 		goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 795302bcd3aef..04ec276cfcf53 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -21,6 +21,7 @@
 #include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_reg_sr.h"
+#include "xe_rtp.h"
 #include "xe_sched_job.h"
 #include "xe_wa.h"
 
@@ -267,6 +268,39 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
 }
 
+static void
+hw_engine_setup_default_state(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	const u8 mocs_write_idx = gt->mocs.uc_index;
+	/* TODO: missing handling of HAS_L3_CCS_READ platforms */
+	const u8 mocs_read_idx = gt->mocs.uc_index;
+	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
+			        REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
+	const struct xe_rtp_entry engine_was[] = {
+		/*
+		 * RING_CMD_CCTL specifies the default MOCS entry that will be
+		 * used by the command streamer when executing commands that
+		 * don't have a way to explicitly specify a MOCS setting.
+		 * The default should usually reference whichever MOCS entry
+		 * corresponds to uncached behavior, although use of a WB cached
+		 * entry is recommended by the spec in certain circumstances on
+		 * specific platforms.
+		 */
+		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
+		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
+		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
+					   CMD_CCTL_WRITE_OVERRIDE_MASK |
+					   CMD_CCTL_READ_OVERRIDE_MASK,
+					   ring_cmd_cctl_val,
+					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		{}
+	};
+
+	xe_rtp_process(engine_was, &hwe->reg_sr, gt, hwe);
+}
+
 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 				 enum xe_hw_engine_id id)
 {
@@ -293,6 +327,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 
 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
 	xe_wa_process_engine(hwe);
+	hw_engine_setup_default_state(hwe);
 
 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
 	xe_reg_whitelist_process_engine(hwe);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index f2ceecd536ed0..0d07811a573f6 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -518,6 +518,15 @@ static void init_l3cc_table(struct xe_gt *gt,
 	}
 }
 
+void xe_mocs_init_early(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+
+	get_mocs_settings(gt->xe, &table);
+	gt->mocs.uc_index = table.uc_index;
+	gt->mocs.wb_index = table.wb_index;
+}
+
 void xe_mocs_init(struct xe_gt *gt)
 {
 	struct xe_mocs_info table;
@@ -528,8 +537,6 @@ void xe_mocs_init(struct xe_gt *gt)
 	 */
 	flags = get_mocs_settings(gt->xe, &table);
 	mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
-	gt->mocs.uc_index = table.uc_index;
-	gt->mocs.wb_index = table.wb_index;
 
 	if (flags & HAS_GLOBAL_MOCS)
 		__init_mocs_table(gt, &table, GLOBAL_MOCS(0).reg);
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index 63500a1d6660a..25f7b35a76daf 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -11,6 +11,7 @@
 struct xe_engine;
 struct xe_gt;
 
+void xe_mocs_init_early(struct xe_gt *gt);
 void xe_mocs_init(struct xe_gt *gt);
 
 /**
-- 
GitLab


From bb95a4f9f5c2e9b0a43590958ba1430519592909 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Fri, 14 Apr 2023 20:50:33 -0700
Subject: [PATCH 1519/2340] drm/xe: Set default MOCS value for copy cs
 instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

copy cs instructions that dont have a explict MOCS field will use this
default MOCS value.

v2:
- move to xe_hw_engine.c
- remove BLIT_CCTL auxiliary macros
- removed MASKED_REG

v3:
- rebased

v4:
- process workaround in hwe->reg_lrc

v5:
- add a new function and call it from xe_gt_record_default_lrcs()
because hwe->reg_lrc is initialized later

BSpec: 45807
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  6 +++++
 drivers/gpu/drm/xe/xe_gt.c               |  1 +
 drivers/gpu/drm/xe/xe_hw_engine.c        | 29 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_hw_engine.h        |  1 +
 4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 717d560626cec..79873bf64e8dd 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -58,6 +58,12 @@
 
 #define RING_BBADDR(base)			XE_REG((base) + 0x140)
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
+
+/* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */
+#define BLIT_CCTL(base)				XE_REG((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK		REG_GENMASK(14, 9)
+#define   BLIT_CCTL_SRC_MOCS_MASK		REG_GENMASK(6, 1)
+
 #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
 #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 603bb3ae3e370..3afca3dd9657d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -225,6 +225,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 
 		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
 		xe_wa_process_lrc(hwe);
+		xe_hw_engine_setup_default_lrc_state(hwe);
 		xe_tuning_process_lrc(hwe);
 
 		default_lrc = drmm_kzalloc(&xe->drm,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 04ec276cfcf53..a9adac0624f69 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -268,6 +268,35 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
 }
 
+void
+xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	const u8 mocs_write_idx = gt->mocs.uc_index;
+	const u8 mocs_read_idx = gt->mocs.uc_index;
+	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
+			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
+	const struct xe_rtp_entry lrc_was[] = {
+		/*
+		 * Some blitter commands do not have a field for MOCS, those
+		 * commands will use MOCS index pointed by BLIT_CCTL.
+		 * BLIT_CCTL registers are needed to be programmed to un-cached.
+		 */
+		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
+		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
+			       ENGINE_CLASS(COPY)),
+		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
+				 BLIT_CCTL_DST_MOCS_MASK |
+				 BLIT_CCTL_SRC_MOCS_MASK,
+				 blit_cctl_val,
+				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		{}
+	};
+
+	xe_rtp_process(lrc_was, &hwe->reg_lrc, gt, hwe);
+}
+
 static void
 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 {
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index ceab653972568..013efcd6d8c5b 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -17,6 +17,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
 void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p);
 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
 				enum xe_engine_class engine_class);
+void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
 
 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
 static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
-- 
GitLab


From 116d32515214910d8a34538dbd09ef26a878d5ae Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 4 May 2023 22:29:43 +0200
Subject: [PATCH 1520/2340] drm/xe: Fix splat during error dump

Allow xe_bo_addr without lock to print debug information, such
as from xe_analyze_vm.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 19 +++++++++++++++----
 drivers/gpu/drm/xe/xe_bo.h |  2 ++
 drivers/gpu/drm/xe/xe_vm.c |  2 +-
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 9a565203deaca..4693372ec82e9 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1545,15 +1545,18 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+/*
+ * Resolve a BO address. There is no assert to check if the proper lock is held
+ * so it should only be used in cases where it is not fatal to get the wrong
+ * address, such as printing debug information, but not in cases where memory is
+ * written based on this result.
+ */
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
 		      size_t page_size, bool *is_vram)
 {
 	struct xe_res_cursor cur;
 	u64 page;
 
-	if (!READ_ONCE(bo->ttm.pin_count))
-		xe_bo_assert_held(bo);
-
 	XE_BUG_ON(page_size > PAGE_SIZE);
 	page = offset >> PAGE_SHIFT;
 	offset &= (PAGE_SIZE - 1);
@@ -1575,6 +1578,14 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
 	}
 }
 
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+		      size_t page_size, bool *is_vram)
+{
+	if (!READ_ONCE(bo->ttm.pin_count))
+		xe_bo_assert_held(bo);
+	return __xe_bo_addr(bo, offset, page_size, is_vram);
+}
+
 int xe_bo_vmap(struct xe_bo *bo)
 {
 	void *virtual;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8354d05ccdf37..7e111332c35aa 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -199,6 +199,8 @@ static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
 }
 
 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
+		      size_t page_size, bool *is_vram);
 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
 		      size_t page_size, bool *is_vram);
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index e634bb96f9ccd..06b559ff80bf5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3421,7 +3421,7 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 					&cur);
 			addr = xe_res_dma(&cur);
 		} else {
-			addr = xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
+			addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
 		}
 		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
 			   vma->start, vma->end, vma->end - vma->start + 1ull,
-- 
GitLab


From 14dac5a5748cc477f5d8887a45ca32011b9ffea3 Mon Sep 17 00:00:00 2001
From: Christopher Snowhill <kode54@gmail.com>
Date: Mon, 24 Apr 2023 19:19:21 -0700
Subject: [PATCH 1521/2340] drm/xe: Enable the compat ioctl functionality
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required at the minimum for the DRM UAPI to function from
32-bit userspace with a 64-bit kernel.

Signed-off-by: Christopher Snowhill <kode54@gmail.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b13bbdeeef51a..2f8777f365a42 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -112,7 +112,7 @@ static const struct file_operations xe_driver_fops = {
 	.mmap = drm_gem_mmap,
 	.poll = drm_poll,
 	.read = drm_read,
-//	.compat_ioctl = i915_ioc32_compat_ioctl,
+	.compat_ioctl = drm_compat_ioctl,
 	.llseek = noop_llseek,
 };
 
-- 
GitLab


From dbeb2bd25350c7e771547638e266ce16030ba91c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 4 May 2023 00:32:44 -0700
Subject: [PATCH 1522/2340] drm/xe: Do not mark 1809175790 as a WA

Additional programming annotated with Wa_<number> should be reserved to
those that have a official workaround. Just pointing to a bug or
additional reference can be done with something else. Copy what i915
does and refer to it as "hsdes: ....".

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230504073250.1436293-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 4c5f46f892416..c1b738e033c7f 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -199,7 +199,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
 
 	dw[i++] = preparser_disable(true);
-	/* Wa_1809175790 */
+
+	/* hsdes: 1809175790 */
 	if (!xe->info.has_flat_ccs) {
 		if (decode)
 			i = emit_aux_table_inv(gt, VD0_AUX_INV.reg, dw, i);
@@ -244,9 +245,11 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
 		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
 	i = emit_pipe_invalidate(mask_flags, dw, i);
-	/* Wa_1809175790 */
+
+	/* hsdes: 1809175790 */
 	if (!xe->info.has_flat_ccs)
 		i = emit_aux_table_inv(gt, CCS_AUX_INV.reg, dw, i);
+
 	dw[i++] = preparser_disable(false);
 
 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
-- 
GitLab


From 215bb2ce605bb182939e4dee445b6d95e0d1b843 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 4 May 2023 00:32:45 -0700
Subject: [PATCH 1523/2340] drm/xe: Fix comment on Wa_22013088509

On i915 the "see comment about Wa_22013088509" referred to the comment
in the graphics version >= 11 branch, where there were more details
about it. From the platforms supported by xe, only PVC needs
Wa_22013088509, but as the comment says, it's simpler to do it for all
platforms as there is no downside.  Bring the missing comment over from
i915 and reword it to fit xe better.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230504073250.1436293-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 55b240a5eaa79..02afb313bfea7 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -461,9 +461,16 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
 	}
 
 	/*
-	 * Always leave the hardware in multicast mode when doing reads
-	 * (see comment about Wa_22013088509 below) and only change it
-	 * to unicast mode when doing writes of a specific instance.
+	 * Always leave the hardware in multicast mode when doing reads and only
+	 * change it to unicast mode when doing writes of a specific instance.
+	 *
+	 * The setting of the multicast/unicast bit usually wouldn't matter for
+	 * read operations (which always return the value from a single register
+	 * instance regardless of how that bit is set), but some platforms may
+	 * have workarounds requiring us to remain in multicast mode for reads,
+	 * e.g. Wa_22013088509 on PVC.  There's no real downside to this, so
+	 * we'll just go ahead and do so on all platforms; we'll only clear the
+	 * multicast bit from the mask when explicitly doing a write operation.
 	 *
 	 * No need to save old steering reg value.
 	 */
-- 
GitLab


From a31153fcb1dc2baaf13e520f71f332d4eae28b52 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 4 May 2023 00:32:46 -0700
Subject: [PATCH 1524/2340] drm/xe/guc: Remove special handling for PVC A*

The rest of the driver doesn't really support PVC before B0 stepping.
Drop the special handling in xe_guc.c.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230504073250.1436293-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 89d20faced19d..62b4fcf84acfa 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -143,8 +143,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 
 	/* Wa_16011759253 */
 	/* Wa_22011383443 */
-	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) ||
-	    IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0))
 		flags |= GUC_WA_GAM_CREDITS;
 
 	/* Wa_14014475959 */
@@ -164,11 +163,8 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 
 	/*
 	 * Wa_2201180203
-	 * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe
-	 * errors. Disable this for PVC A0 steppings.
 	 */
-	if (GRAPHICS_VER(xe) <= 12 &&
-	    !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+	if (GRAPHICS_VER(xe) <= 12)
 		flags |= GUC_WA_PRE_PARSER;
 
 	/* Wa_16011777198 */
@@ -180,9 +176,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	/*
 	 * Wa_22012727170
 	 * Wa_22012727685
-	 *
-	 * This WA is applicable to PVC CT A0, but causes media regressions. 
-	 * Drop the WA for PVC.
 	 */
 	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
 	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
@@ -194,10 +187,9 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
 
 	/* Wa_1509372804 */
-	if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0))
+	if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_B0, STEP_C0))
 		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
 
-
 	return flags;
 }
 
-- 
GitLab


From 98ce59e9ba5cd513bd57e0f4558a33833e07f7e8 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 28 Apr 2023 23:23:27 -0700
Subject: [PATCH 1525/2340] drm/xe/guc: Handle RCU_MODE as masked from
 definition

guc_mmio_regset_write() had a flags for the registers to be added to the
GuC's regset list. The only register actually using that was RCU_MODE,
but it was setting the flags to a bogus value. From
struct xe_guc_fwif.h,

	#define GUC_REGSET_MASKED               BIT(0)
	#define GUC_REGSET_MASKED_WITH_VALUE    BIT(2)
	#define GUC_REGSET_RESTORE_ONLY         BIT(3)

Cross checking with i915, the only flag to set in RCU_MODE is
GUC_REGSET_MASKED. That can be done automatically from the register, as
long as the definition is correct.

Add the XE_REG_OPTION_MASKED annotation to RCU_MODE and kill the "flags"
field in guc_mmio_regset_write(): guc_mmio_regset_write_one() can decide
that based on the register being passed.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230429062332.354139-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c      | 31 +++++++++++-----------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 8dd3bf2f63772..4a38f78277b5e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -325,7 +325,7 @@
 #define SARB_CHICKEN1				XE_REG_MCR(0xe90c)
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
-#define RCU_MODE				XE_REG(0x14800)
+#define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
 #define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
 
 #define FORCEWAKE_ACK_GT			XE_REG(0x130044)
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 676137dcb5106..84c2d7c624c63 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -422,12 +422,12 @@ static void guc_capture_list_init(struct xe_guc_ads *ads)
 
 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
 				      struct iosys_map *regset_map,
-				      u32 reg, u32 flags,
+				      struct xe_reg reg,
 				      unsigned int n_entry)
 {
 	struct guc_mmio_reg entry = {
-		.offset = reg,
-		.flags = flags,
+		.offset = reg.reg,
+		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
 		/* TODO: steering */
 	};
 
@@ -446,40 +446,33 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 	unsigned long idx;
 	unsigned count = 0;
 	const struct {
-		u32 reg;
-		u32 flags;
+		struct xe_reg reg;
 		bool skip;
 	} *e, extra_regs[] = {
-		{ .reg = RING_MODE(hwe->mmio_base).reg,		},
-		{ .reg = RING_HWS_PGA(hwe->mmio_base).reg,		},
-		{ .reg = RING_IMR(hwe->mmio_base).reg,			},
-		{ .reg = RCU_MODE.reg, .flags = 0x3,
-		  .skip = hwe != hwe_rcs_reset_domain			},
+		{ .reg = RING_MODE(hwe->mmio_base),			},
+		{ .reg = RING_HWS_PGA(hwe->mmio_base),			},
+		{ .reg = RING_IMR(hwe->mmio_base),			},
+		{ .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain	},
 	};
 	u32 i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
 
-	xa_for_each(&hwe->reg_sr.xa, idx, entry) {
-		u32 flags = entry->reg.masked ? GUC_REGSET_MASKED : 0;
-
-		guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++);
-	}
+	xa_for_each(&hwe->reg_sr.xa, idx, entry)
+		guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
 
 	for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
 		if (e->skip)
 			continue;
 
-		guc_mmio_regset_write_one(ads, regset_map,
-					  e->reg, e->flags, count++);
+		guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
 	}
 
 	/* Wa_1607983814 */
 	if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
 		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
 			guc_mmio_regset_write_one(ads, regset_map,
-						  LNCFCMOCS(i).reg, 0,
-						  count++);
+						  LNCFCMOCS(i), count++);
 		}
 	}
 
-- 
GitLab


From a56d8dabf134e30ed898128aae6ca830c03b6abb Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Fri, 5 May 2023 14:40:19 +0200
Subject: [PATCH 1526/2340] drm/xe: Do not sleep in atomic

Set atomic in xe_mmio_wait32() otherwise we would
be scheduling in atomic context.

Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 02afb313bfea7..125c63bdc9b5f 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -424,7 +424,7 @@ static void mcr_lock(struct xe_gt *gt)
 	 */
 	if (GRAPHICS_VERx100(xe) >= 1270)
 		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL,
-				     false);
+				     true);
 
 	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
 }
-- 
GitLab


From a5cecbac92d5a50dd2f70a01dc53e19312f4081f Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Fri, 5 May 2023 15:34:33 +0200
Subject: [PATCH 1527/2340] drm/xe: Print GT info on TLB inv failure

Print GT info on TLB inv failure for better debugbility.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 604f189dbd703..9e7fe8d9bca41 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -34,8 +34,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 			break;
 
 		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
-		drm_err(&gt_to_xe(gt)->drm, "TLB invalidation fence timeout, seqno=%d",
-			fence->seqno);
+		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d",
+			gt->info.id, fence->seqno);
 
 		list_del(&fence->link);
 		fence->base.error = -ETIME;
@@ -285,8 +285,8 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 				 tlb_invalidation_seqno_past(gt, seqno),
 				 TLB_TIMEOUT);
 	if (!ret) {
-		drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
-			seqno, gt->tlb_invalidation.seqno_recv);
+		drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
 		return -ETIME;
 	}
 
-- 
GitLab


From e3e4964d335c73e931ea21c8f318d419d3cdb4cc Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 12 Apr 2023 14:09:23 +0300
Subject: [PATCH 1528/2340] drm/xe: destroy clients engine and vm xarrays on
 close

xe_file_close cleanups the xarrays but forgets
to destroy them causing a memleak in xarray internals.
Found with kmemleak.

Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Christoph Manszewski <christoph.manszewski@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 2f8777f365a42..e686c25a0ad1d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -69,6 +69,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 		xe_engine_put(e);
 	}
 	mutex_unlock(&xef->engine.lock);
+	xa_destroy(&xef->engine.xa);
 	mutex_destroy(&xef->engine.lock);
 	device_kill_persistent_engines(xe, xef);
 
@@ -76,6 +77,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 	xa_for_each(&xef->vm.xa, idx, vm)
 		xe_vm_close_and_put(vm);
 	mutex_unlock(&xef->vm.lock);
+	xa_destroy(&xef->vm.xa);
 	mutex_destroy(&xef->vm.lock);
 
 	kfree(xef);
-- 
GitLab


From 9ca14f94d294862d6f5ee30a6b73f295cfaa5d08 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Mon, 8 May 2023 05:22:23 +0000
Subject: [PATCH 1529/2340] drm/xe: Handle -EDEADLK case in preempt worker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With multiple active VMs, under memory pressure, it is possible that
ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and
return -ENOMEM.

Until ttm properly handles locking in such scenarios, best thing the
driver can do is unwind the lock and retry.

Update preempt worker to retry validating BOs with a timeout upon
-ENOMEM.

v2: revert retry timeout upon -EAGAIN (Thomas)

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 06b559ff80bf5..d9579bf5002db 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -10,6 +10,7 @@
 #include <drm/ttm/ttm_execbuf_util.h>
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
+#include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -508,6 +509,8 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
 		kvfree(tv);
 }
 
+#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
+
 static void preempt_rebind_work_func(struct work_struct *w)
 {
 	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
@@ -519,6 +522,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	struct dma_fence *rebind_fence;
 	unsigned int fence_count = 0;
 	LIST_HEAD(preempt_fences);
+	ktime_t end = 0;
 	int err;
 	long wait;
 	int __maybe_unused tries = 0;
@@ -637,6 +641,24 @@ out_unlock_outer:
 		trace_xe_vm_rebind_worker_retry(vm);
 		goto retry;
 	}
+
+	/*
+	 * With multiple active VMs, under memory pressure, it is possible that
+	 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+	 * Until ttm properly handles locking in such scenarios, best thing the
+	 * driver can do is retry with a timeout. Killing the VM or putting it
+	 * in error state after timeout or other error scenarios is still TBD.
+	 */
+	if (err == -ENOMEM) {
+		ktime_t cur = ktime_get();
+
+		end = end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
+		if (ktime_before(cur, end)) {
+			msleep(20);
+			trace_xe_vm_rebind_worker_retry(vm);
+			goto retry;
+		}
+	}
 	up_write(&vm->lock);
 
 	free_preempt_fences(&preempt_fences);
-- 
GitLab


From 34f89ac8e66cd5121fb05c765acc3c67ddbef7a0 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 9 May 2023 05:08:24 +0000
Subject: [PATCH 1530/2340] drm/xe: Handle -EDEADLK case in exec ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With multiple active VMs, under memory pressure, it is possible that
ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and
return -ENOMEM.

Until ttm properly handles locking in such scenarios, best thing the
driver can do is unwind the lock and retry.

Update xe_exec_begin to retry validating BOs with a timeout upon
-ENOMEM.

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index ea869f2452efe..3db1b159586ee 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -8,6 +8,7 @@
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
 #include <drm/xe_drm.h>
+#include <linux/delay.h>
 
 #include "xe_bo.h"
 #include "xe_device.h"
@@ -91,6 +92,8 @@
  *	Unlock all
  */
 
+#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
+
 static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
 			 struct ttm_validate_buffer tv_onstack[],
 			 struct ttm_validate_buffer **tv,
@@ -99,12 +102,14 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
 	struct xe_vm *vm = e->vm;
 	struct xe_vma *vma;
 	LIST_HEAD(dups);
-	int err;
+	ktime_t end = 0;
+	int err = 0;
 
 	*tv = NULL;
 	if (xe_vm_no_dma_fences(e->vm))
 		return 0;
 
+retry:
 	err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
 	if (err)
 		return err;
@@ -122,11 +127,27 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
 		if (err) {
 			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
 			*tv = NULL;
-			return err;
+			break;
+		}
+	}
+
+	/*
+	 * With multiple active VMs, under memory pressure, it is possible that
+	 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+	 * Until ttm properly handles locking in such scenarios, best thing the
+	 * driver can do is retry with a timeout.
+	 */
+	if (err == -ENOMEM) {
+		ktime_t cur = ktime_get();
+
+		end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
+		if (ktime_before(cur, end)) {
+			msleep(20);
+			goto retry;
 		}
 	}
 
-	return 0;
+	return err;
 }
 
 static void xe_exec_end(struct xe_engine *e,
-- 
GitLab


From ce8bf5bd059542431230eac216693a579dc09dba Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 8 May 2023 15:53:19 -0700
Subject: [PATCH 1531/2340] drm/xe/mmio: Use struct xe_reg

Convert all the callers to deal with xe_mmio_*() using struct xe_reg
instead of plain u32. In a few places there was also a rename
s/reg/reg_val/ when dealing with the value returned so it doesn't get
mixed up with the register address.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230508225322.2692066-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c           |   2 +-
 drivers/gpu/drm/xe/xe_execlist.c         |  18 +--
 drivers/gpu/drm/xe/xe_force_wake.c       |  25 ++--
 drivers/gpu/drm/xe/xe_force_wake_types.h |   6 +-
 drivers/gpu/drm/xe/xe_ggtt.c             |   6 +-
 drivers/gpu/drm/xe/xe_gt.c               |   4 +-
 drivers/gpu/drm/xe/xe_gt_clock.c         |   6 +-
 drivers/gpu/drm/xe/xe_gt_mcr.c           |  37 +++---
 drivers/gpu/drm/xe/xe_gt_topology.c      |  18 +--
 drivers/gpu/drm/xe/xe_guc.c              |  61 +++++-----
 drivers/gpu/drm/xe/xe_guc_ads.c          |   3 +-
 drivers/gpu/drm/xe/xe_guc_pc.c           |  32 +++---
 drivers/gpu/drm/xe/xe_guc_types.h        |   3 +-
 drivers/gpu/drm/xe/xe_huc.c              |   4 +-
 drivers/gpu/drm/xe/xe_hw_engine.c        |  85 +++++++-------
 drivers/gpu/drm/xe/xe_irq.c              | 138 +++++++++++------------
 drivers/gpu/drm/xe/xe_mmio.c             |  31 +++--
 drivers/gpu/drm/xe/xe_mmio.h             |  57 +++++-----
 drivers/gpu/drm/xe/xe_mocs.c             |   8 +-
 drivers/gpu/drm/xe/xe_pat.c              |  14 ++-
 drivers/gpu/drm/xe/xe_pcode.c            |  16 +--
 drivers/gpu/drm/xe/xe_reg_sr.c           |  14 ++-
 drivers/gpu/drm/xe/xe_ring_ops.c         |  11 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c   |   4 +-
 drivers/gpu/drm/xe/xe_uc_fw.c            |  16 +--
 drivers/gpu/drm/xe/xe_wopcm.c            |  12 +-
 26 files changed, 331 insertions(+), 300 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index e686c25a0ad1d..8039142ae1a1c 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -345,7 +345,7 @@ void xe_device_wmb(struct xe_device *xe)
 
 	wmb();
 	if (IS_DGFX(xe))
-		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0);
+		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
 }
 
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index d524ac5c7b579..b0ccc4ff84614 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -60,7 +60,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	}
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
-		xe_mmio_write32(hwe->gt, RCU_MODE.reg,
+		xe_mmio_write32(hwe->gt, RCU_MODE,
 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
@@ -78,17 +78,17 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	 */
 	wmb();
 
-	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg,
+	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
 			xe_bo_ggtt_addr(hwe->hwsp));
-	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg);
-	xe_mmio_write32(gt, RING_MODE(hwe->mmio_base).reg,
+	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
+	xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
 			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
 
-	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base).reg,
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
 			lower_32_bits(lrc_desc));
-	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base).reg,
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
 			upper_32_bits(lrc_desc));
-	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg,
+	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
 			EL_CTRL_LOAD);
 }
 
@@ -173,8 +173,8 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe)
 	struct xe_gt *gt = hwe->gt;
 	u32 hi, lo;
 
-	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg);
-	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg);
+	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
+	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
 
 	printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class,
 	       hwe->instance, hi, lo);
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 53d73f36a1216..363b81c3d7469 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -8,6 +8,7 @@
 #include <drm/drm_util.h>
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_reg_defs.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
 
@@ -27,7 +28,7 @@ fw_to_xe(struct xe_force_wake *fw)
 
 static void domain_init(struct xe_force_wake_domain *domain,
 			enum xe_force_wake_domain_id id,
-			u32 reg, u32 ack, u32 val, u32 mask)
+			struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask)
 {
 	domain->id = id;
 	domain->reg_ctl = reg;
@@ -49,14 +50,14 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 	if (xe->info.graphics_verx100 >= 1270) {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
 			    XE_FW_DOMAIN_ID_GT,
-			    FORCEWAKE_GT.reg,
-			    FORCEWAKE_ACK_GT_MTL.reg,
+			    FORCEWAKE_GT,
+			    FORCEWAKE_ACK_GT_MTL,
 			    BIT(0), BIT(16));
 	} else {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
 			    XE_FW_DOMAIN_ID_GT,
-			    FORCEWAKE_GT.reg,
-			    FORCEWAKE_ACK_GT.reg,
+			    FORCEWAKE_GT,
+			    FORCEWAKE_ACK_GT,
 			    BIT(0), BIT(16));
 	}
 }
@@ -71,8 +72,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 	if (!xe_gt_is_media_type(gt))
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
 			    XE_FW_DOMAIN_ID_RENDER,
-			    FORCEWAKE_RENDER.reg,
-			    FORCEWAKE_ACK_RENDER.reg,
+			    FORCEWAKE_RENDER,
+			    FORCEWAKE_ACK_RENDER,
 			    BIT(0), BIT(16));
 
 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
@@ -81,8 +82,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
 			    XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
-			    FORCEWAKE_MEDIA_VDBOX(j).reg,
-			    FORCEWAKE_ACK_MEDIA_VDBOX(j).reg,
+			    FORCEWAKE_MEDIA_VDBOX(j),
+			    FORCEWAKE_ACK_MEDIA_VDBOX(j),
 			    BIT(0), BIT(16));
 	}
 
@@ -92,8 +93,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
 			    XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
-			    FORCEWAKE_MEDIA_VEBOX(j).reg,
-			    FORCEWAKE_ACK_MEDIA_VEBOX(j).reg,
+			    FORCEWAKE_MEDIA_VEBOX(j),
+			    FORCEWAKE_ACK_MEDIA_VEBOX(j),
 			    BIT(0), BIT(16));
 	}
 }
@@ -128,7 +129,7 @@ static int domain_sleep_wait(struct xe_gt *gt,
 	for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \
 		for_each_if((domain__ = ((fw__)->domains + \
 					 (ffs(tmp__) - 1))) && \
-					 domain__->reg_ctl)
+					 domain__->reg_ctl.reg)
 
 int xe_force_wake_get(struct xe_force_wake *fw,
 		      enum xe_force_wake_domains domains)
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index 208dd629d7b12..cb782696855be 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -9,6 +9,8 @@
 #include <linux/mutex.h>
 #include <linux/types.h>
 
+#include "regs/xe_reg_defs.h"
+
 enum xe_force_wake_domain_id {
 	XE_FW_DOMAIN_ID_GT = 0,
 	XE_FW_DOMAIN_ID_RENDER,
@@ -56,9 +58,9 @@ struct xe_force_wake_domain {
 	/** @id: domain force wake id */
 	enum xe_force_wake_domain_id id;
 	/** @reg_ctl: domain wake control register address */
-	u32 reg_ctl;
+	struct xe_reg reg_ctl;
 	/** @reg_ack: domain ack register address */
-	u32 reg_ack;
+	struct xe_reg reg_ack;
 	/** @val: domain wake write value */
 	u32 val;
 	/** @mask: domain mask */
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 4e5ad616063d7..98903354b4366 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -207,12 +207,12 @@ void xe_ggtt_invalidate(struct xe_gt *gt)
 		struct xe_device *xe = gt_to_xe(gt);
 
 		if (xe->info.platform == XE_PVC) {
-			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg,
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
-			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg,
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
 					PVC_GUC_TLB_INV_DESC0_VALID);
 		} else
-			xe_mmio_write32(gt, GUC_TLB_INV_CR.reg,
+			xe_mmio_write32(gt, GUC_TLB_INV_CR,
 					GUC_TLB_INV_CR_INVALIDATE);
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3afca3dd9657d..cbe063a40aca8 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -544,8 +544,8 @@ static int do_gt_reset(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
-	xe_mmio_write32(gt, GDRST.reg, GRDOM_FULL);
-	err = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_FULL, 5000,
+	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
+	err = xe_mmio_wait32(gt, GDRST, 0, GRDOM_FULL, 5000,
 			     NULL, false);
 	if (err)
 		drm_err(&xe->drm,
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 49625d49bdcc1..7cf11078ff572 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -14,7 +14,7 @@
 
 static u32 read_reference_ts_freq(struct xe_gt *gt)
 {
-	u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE.reg);
+	u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE);
 	u32 base_freq, frac_freq;
 
 	base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
@@ -54,7 +54,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
 
 int xe_gt_clock_init(struct xe_gt *gt)
 {
-	u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg);
+	u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE);
 	u32 freq = 0;
 
 	/* Assuming gen11+ so assert this assumption is correct */
@@ -63,7 +63,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
 	if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
 		freq = read_reference_ts_freq(gt);
 	} else {
-		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg);
+		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0);
 
 		freq = get_crystal_clock_freq(c0);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 125c63bdc9b5f..c6b9e9869fee5 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -40,6 +40,8 @@
  * non-terminated instance.
  */
 
+#define STEER_SEMAPHORE		XE_REG(0xFD0)
+
 static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
 {
 	return reg_mcr.__reg;
@@ -183,9 +185,9 @@ static void init_steering_l3bank(struct xe_gt *gt)
 {
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
 		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
-						xe_mmio_read32(gt, MIRROR_FUSE3.reg));
+						xe_mmio_read32(gt, MIRROR_FUSE3));
 		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
-					      xe_mmio_read32(gt, XEHP_FUSE4.reg));
+					      xe_mmio_read32(gt, XEHP_FUSE4));
 
 		/*
 		 * Group selects mslice, instance selects bank within mslice.
@@ -196,7 +198,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
 			bank_mask & BIT(0) ? 0 : 2;
 	} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
 		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
-						xe_mmio_read32(gt, MIRROR_FUSE3.reg));
+						xe_mmio_read32(gt, MIRROR_FUSE3));
 		u32 bank = __ffs(mslice_mask) * 8;
 
 		/*
@@ -208,7 +210,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
 		gt->steering[L3BANK].instance_target = bank & 0x3;
 	} else {
 		u32 fuse = REG_FIELD_GET(L3BANK_MASK,
-					 ~xe_mmio_read32(gt, MIRROR_FUSE3.reg));
+					 ~xe_mmio_read32(gt, MIRROR_FUSE3));
 
 		gt->steering[L3BANK].group_target = 0;	/* unused */
 		gt->steering[L3BANK].instance_target = __ffs(fuse);
@@ -218,7 +220,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
 static void init_steering_mslice(struct xe_gt *gt)
 {
 	u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
-				 xe_mmio_read32(gt, MIRROR_FUSE3.reg));
+				 xe_mmio_read32(gt, MIRROR_FUSE3));
 
 	/*
 	 * mslice registers are valid (not terminated) if either the meml3
@@ -337,8 +339,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
 		u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
 			REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
 
-		xe_mmio_write32(gt, MCFG_MCR_SELECTOR.reg, steer_val);
-		xe_mmio_write32(gt, SF_MCR_SELECTOR.reg, steer_val);
+		xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val);
+		xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val);
 		/*
 		 * For GAM registers, all reads should be directed to instance 1
 		 * (unicast reads against other instances are not allowed),
@@ -376,7 +378,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 			continue;
 
 		for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
-			if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg.reg)) {
+			if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg)) {
 				*group = gt->steering[type].group_target;
 				*instance = gt->steering[type].instance_target;
 				return true;
@@ -387,7 +389,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 	implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
 	if (implicit_ranges)
 		for (int i = 0; implicit_ranges[i].end > 0; i++)
-			if (xe_mmio_in_range(&implicit_ranges[i], reg.reg))
+			if (xe_mmio_in_range(&implicit_ranges[i], reg))
 				return false;
 
 	/*
@@ -403,8 +405,6 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 	return true;
 }
 
-#define STEER_SEMAPHORE		0xFD0
-
 /*
  * Obtain exclusive access to MCR steering.  On MTL and beyond we also need
  * to synchronize with external clients (e.g., firmware), so a semaphore
@@ -446,16 +446,17 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
 				u8 rw_flag, int group, int instance, u32 value)
 {
 	const struct xe_reg reg = to_xe_reg(reg_mcr);
-	u32 steer_reg, steer_val, val = 0;
+	struct xe_reg steer_reg;
+	u32 steer_val, val = 0;
 
 	lockdep_assert_held(&gt->mcr_lock);
 
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
-		steer_reg = MTL_MCR_SELECTOR.reg;
+		steer_reg = MTL_MCR_SELECTOR;
 		steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
 			REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
 	} else {
-		steer_reg = MCR_SELECTOR.reg;
+		steer_reg = MCR_SELECTOR;
 		steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) |
 			REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance);
 	}
@@ -480,9 +481,9 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
 	xe_mmio_write32(gt, steer_reg, steer_val);
 
 	if (rw_flag == MCR_OP_READ)
-		val = xe_mmio_read32(gt, reg.reg);
+		val = xe_mmio_read32(gt, reg);
 	else
-		xe_mmio_write32(gt, reg.reg, value);
+		xe_mmio_write32(gt, reg, value);
 
 	/*
 	 * If we turned off the multicast bit (during a write) we're required
@@ -524,7 +525,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
 					   group, instance, 0);
 		mcr_unlock(gt);
 	} else {
-		val = xe_mmio_read32(gt, reg.reg);
+		val = xe_mmio_read32(gt, reg);
 	}
 
 	return val;
@@ -591,7 +592,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
 	 * to touch the steering register.
 	 */
 	mcr_lock(gt);
-	xe_mmio_write32(gt, reg.reg, value);
+	xe_mmio_write32(gt, reg, value);
 	mcr_unlock(gt);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 14cf135fd648a..7c3e347e4d745 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -26,7 +26,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
 
 	va_start(argp, numregs);
 	for (i = 0; i < numregs; i++)
-		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32));
+		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
 	va_end(argp);
 
 	bitmap_from_arr32(mask, fuse_val, numregs * 32);
@@ -36,7 +36,7 @@ static void
 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE.reg);
+	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
 	u32 val = 0;
 	int i;
 
@@ -47,15 +47,15 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
 	 * of enable).
 	 */
 	if (GRAPHICS_VERx100(xe) < 1250)
-		reg = ~reg & XELP_EU_MASK;
+		reg_val = ~reg_val & XELP_EU_MASK;
 
 	/* On PVC, one bit = one EU */
 	if (GRAPHICS_VERx100(xe) == 1260) {
-		val = reg;
+		val = reg_val;
 	} else {
 		/* All other platforms, one bit = 2 EU */
-		for (i = 0; i < fls(reg); i++)
-			if (reg & BIT(i))
+		for (i = 0; i < fls(reg_val); i++)
+			if (reg_val & BIT(i))
 				val |= 0x3 << 2 * i;
 	}
 
@@ -95,10 +95,10 @@ xe_gt_topology_init(struct xe_gt *gt)
 
 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
 		      num_geometry_regs,
-		      XELP_GT_GEOMETRY_DSS_ENABLE.reg);
+		      XELP_GT_GEOMETRY_DSS_ENABLE);
 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
-		      XEHP_GT_COMPUTE_DSS_ENABLE.reg,
-		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT.reg);
+		      XEHP_GT_COMPUTE_DSS_ENABLE,
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
 
 	xe_gt_topology_dump(gt, &p);
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 62b4fcf84acfa..e8a126ad400f2 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -232,10 +232,10 @@ static void guc_write_params(struct xe_guc *guc)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0);
+	xe_mmio_write32(gt, SOFT_SCRATCH(0), 0);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
-		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]);
+		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
 }
 
 int xe_guc_init(struct xe_guc *guc)
@@ -268,9 +268,9 @@ int xe_guc_init(struct xe_guc *guc)
 	guc_init_params(guc);
 
 	if (xe_gt_is_media_type(gt))
-		guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg;
+		guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT;
 	else
-		guc->notify_reg = GUC_HOST_INTERRUPT.reg;
+		guc->notify_reg = GUC_HOST_INTERRUPT;
 
 	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
 
@@ -309,9 +309,9 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	xe_mmio_write32(gt, GDRST.reg, GRDOM_GUC);
+	xe_mmio_write32(gt, GDRST, GRDOM_GUC);
 
-	ret = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_GUC, 5000,
+	ret = xe_mmio_wait32(gt, GDRST, 0, GRDOM_GUC, 5000,
 			     &gdrst, false);
 	if (ret) {
 		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
@@ -319,7 +319,7 @@ int xe_guc_reset(struct xe_guc *guc)
 		goto err_out;
 	}
 
-	guc_status = xe_mmio_read32(gt, GUC_STATUS.reg);
+	guc_status = xe_mmio_read32(gt, GUC_STATUS);
 	if (!(guc_status & GS_MIA_IN_RESET)) {
 		drm_err(&xe->drm,
 			"GuC status: 0x%x, MIA core expected to be in reset\n",
@@ -352,9 +352,9 @@ static void guc_prepare_xfer(struct xe_guc *guc)
 		shim_flags |= PVC_GUC_MOCS_INDEX(PVC_GUC_MOCS_UC_INDEX);
 
 	/* Must program this register before loading the ucode with DMA */
-	xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags);
+	xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
 
-	xe_mmio_write32(gt, GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE);
+	xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
 }
 
 /*
@@ -370,7 +370,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
 	if (guc->fw.rsa_size > 256) {
 		u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
 				    xe_uc_fw_rsa_offset(&guc->fw);
-		xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr);
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
 		return 0;
 	}
 
@@ -379,7 +379,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
 		return -ENOMEM;
 
 	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
-		xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]);
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]);
 
 	return 0;
 }
@@ -407,7 +407,7 @@ static int guc_wait_ucode(struct xe_guc *guc)
 	 * 200ms. Even at slowest clock, this should be sufficient. And
 	 * in the working case, a larger timeout makes no difference.
 	 */
-	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg,
+	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS,
 			     FIELD_PREP(GS_UKERNEL_MASK,
 					XE_GUC_LOAD_STATUS_READY),
 			     GS_UKERNEL_MASK, 200000, &status, false);
@@ -435,7 +435,7 @@ static int guc_wait_ucode(struct xe_guc *guc)
 		    XE_GUC_LOAD_STATUS_EXCEPTION) {
 			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
 				 xe_mmio_read32(guc_to_gt(guc),
-						SOFT_SCRATCH(13).reg));
+						SOFT_SCRATCH(13)));
 			ret = -ENXIO;
 		}
 
@@ -532,10 +532,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg);
+	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
 	msg &= XE_GUC_RECV_MSG_EXCEPTION |
 		XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
-	xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0);
+	xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
 
 	if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
 		drm_err(&guc_to_xe(guc)->drm,
@@ -553,12 +553,12 @@ static void guc_enable_irq(struct xe_guc *guc)
 		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
 		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
 
-	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg,
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
 			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
 	if (xe_gt_is_media_type(gt))
-		xe_mmio_rmw32(gt, GUC_SG_INTR_MASK.reg, events, 0);
+		xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
 	else
-		xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~events);
+		xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~events);
 }
 
 int xe_guc_enable_communication(struct xe_guc *guc)
@@ -567,7 +567,7 @@ int xe_guc_enable_communication(struct xe_guc *guc)
 
 	guc_enable_irq(guc);
 
-	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK.reg,
+	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
 		      ARAT_EXPIRED_INTRMSK, 0);
 
 	err = xe_guc_ct_enable(&guc->ct);
@@ -620,8 +620,8 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 header, reply;
-	u32 reply_reg = xe_gt_is_media_type(gt) ?
-		MED_VF_SW_FLAG(0).reg : VF_SW_FLAG(0).reg;
+	struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
+		MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
 	const u32 LAST_INDEX = VF_SW_FLAG_COUNT;
 	int ret;
 	int i;
@@ -641,14 +641,14 @@ retry:
 	/* Not in critical data-path, just do if else for GT type */
 	if (xe_gt_is_media_type(gt)) {
 		for (i = 0; i < len; ++i)
-			xe_mmio_write32(gt, MED_VF_SW_FLAG(i).reg,
+			xe_mmio_write32(gt, MED_VF_SW_FLAG(i),
 					request[i]);
-		xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX).reg);
+		xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX));
 	} else {
 		for (i = 0; i < len; ++i)
-			xe_mmio_write32(gt, VF_SW_FLAG(i).reg,
+			xe_mmio_write32(gt, VF_SW_FLAG(i),
 					request[i]);
-		xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX).reg);
+		xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX));
 	}
 
 	xe_guc_notify(guc);
@@ -712,9 +712,10 @@ proto:
 	if (response_buf) {
 		response_buf[0] = header;
 
-		for (i = 1; i < VF_SW_FLAG_COUNT; i++)
-			response_buf[i] =
-				xe_mmio_read32(gt, reply_reg + i * sizeof(u32));
+		for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
+			reply_reg.reg += i * sizeof(u32);
+			response_buf[i] = xe_mmio_read32(gt, reply_reg);
+		}
 	}
 
 	/* Use data from the GuC response as our return value */
@@ -836,7 +837,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 	if (err)
 		return;
 
-	status = xe_mmio_read32(gt, GUC_STATUS.reg);
+	status = xe_mmio_read32(gt, GUC_STATUS);
 
 	drm_printf(p, "\nGuC status 0x%08x:\n", status);
 	drm_printf(p, "\tBootrom status = 0x%x\n",
@@ -851,7 +852,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 	drm_puts(p, "\nScratch registers:\n");
 	for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
 		drm_printf(p, "\t%2d: \t0x%x\n",
-			   i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg));
+			   i, xe_mmio_read32(gt, SOFT_SCRATCH(i)));
 	}
 
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 84c2d7c624c63..683f2df09c49d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -428,7 +428,6 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
 	struct guc_mmio_reg entry = {
 		.offset = reg.reg,
 		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
-		/* TODO: steering */
 	};
 
 	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
@@ -551,7 +550,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads)
 
 	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
 		u32 distdbreg =
-			xe_mmio_read32(gt, DIST_DBS_POPULATED.reg);
+			xe_mmio_read32(gt, DIST_DBS_POPULATED);
 
 		ads_blob_write(ads,
 			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 72d460d5323bd..e799faa1c6b8c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -317,9 +317,9 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
 	u32 reg;
 
 	if (xe_gt_is_media_type(gt))
-		reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY.reg);
+		reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY);
 	else
-		reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY.reg);
+		reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
 
 	pc->rpe_freq = REG_FIELD_GET(MTL_RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 }
@@ -336,9 +336,9 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc)
 	 * PCODE at a different register
 	 */
 	if (xe->info.platform == XE_PVC)
-		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
 	else
-		reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg);
+		reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC);
 
 	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 }
@@ -380,10 +380,10 @@ static ssize_t freq_act_show(struct device *dev,
 		goto out;
 
 	if (xe->info.platform == XE_METEORLAKE) {
-		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1.reg);
+		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
 		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
 	} else {
-		freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg);
+		freq = xe_mmio_read32(gt, GEN12_RPSTAT1);
 		freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
 	}
 
@@ -413,7 +413,7 @@ static ssize_t freq_cur_show(struct device *dev,
 	if (ret)
 		goto out;
 
-	freq = xe_mmio_read32(gt, RPNSWREQ.reg);
+	freq = xe_mmio_read32(gt, RPNSWREQ);
 
 	freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
@@ -588,7 +588,7 @@ static ssize_t rc_status_show(struct device *dev,
 	u32 reg;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
-	reg = xe_mmio_read32(gt, GT_CORE_STATUS.reg);
+	reg = xe_mmio_read32(gt, GT_CORE_STATUS);
 	xe_device_mem_access_put(gt_to_xe(gt));
 
 	switch (REG_FIELD_GET(RCN_MASK, reg)) {
@@ -615,7 +615,7 @@ static ssize_t rc6_residency_show(struct device *dev,
 	if (ret)
 		goto out;
 
-	reg = xe_mmio_read32(gt, GT_GFX_RC6.reg);
+	reg = xe_mmio_read32(gt, GT_GFX_RC6);
 	ret = sysfs_emit(buff, "%u\n", reg);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
@@ -646,9 +646,9 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
 	xe_device_assert_mem_access(pc_to_xe(pc));
 
 	if (xe_gt_is_media_type(gt))
-		reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP.reg);
+		reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
 	else
-		reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP.reg);
+		reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
 	pc->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, reg) *
 		GT_FREQUENCY_MULTIPLIER;
 	pc->rpn_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, reg) *
@@ -664,9 +664,9 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
 	xe_device_assert_mem_access(pc_to_xe(pc));
 
 	if (xe->info.platform == XE_PVC)
-		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
 	else
-		reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg);
+		reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP);
 	pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 }
@@ -745,9 +745,9 @@ static int pc_gucrc_disable(struct xe_guc_pc *pc)
 	if (ret)
 		return ret;
 
-	xe_mmio_write32(gt, PG_ENABLE.reg, 0);
-	xe_mmio_write32(gt, RC_CONTROL.reg, 0);
-	xe_mmio_write32(gt, RC_STATE.reg, 0);
+	xe_mmio_write32(gt, PG_ENABLE, 0);
+	xe_mmio_write32(gt, RC_CONTROL, 0);
+	xe_mmio_write32(gt, RC_STATE, 0);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index ac7eec28934d0..a304dce4e9f4d 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -9,6 +9,7 @@
 #include <linux/idr.h>
 #include <linux/xarray.h>
 
+#include "regs/xe_reg_defs.h"
 #include "xe_guc_ads_types.h"
 #include "xe_guc_ct_types.h"
 #include "xe_guc_fwif.h"
@@ -74,7 +75,7 @@ struct xe_guc {
 	/**
 	 * @notify_reg: Register which is written to notify GuC of H2G messages
 	 */
-	u32 notify_reg;
+	struct xe_reg notify_reg;
 	/** @params: Control params for fw initialization */
 	u32 params[GUC_CTL_MAX_DWORDS];
 };
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 55dcaab34ea46..e0377083d1f2d 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -84,7 +84,7 @@ int xe_huc_auth(struct xe_huc *huc)
 		goto fail;
 	}
 
-	ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO.reg,
+	ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO,
 			     HUC_LOAD_SUCCESSFUL,
 			     HUC_LOAD_SUCCESSFUL, 100000, NULL, false);
 	if (ret) {
@@ -126,7 +126,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
 		return;
 
 	drm_printf(p, "\nHuC status: 0x%08x\n",
-		   xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO.reg));
+		   xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO));
 
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index a9adac0624f69..5e275aff89742 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -233,20 +233,25 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
 	hwe->gt = NULL;
 }
 
-static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val)
+static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
+				   u32 val)
 {
-	XE_BUG_ON(reg & hwe->mmio_base);
+	XE_BUG_ON(reg.reg & hwe->mmio_base);
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
-	xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val);
+	reg.reg += hwe->mmio_base;
+
+	xe_mmio_write32(hwe->gt, reg, val);
 }
 
-static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg)
+static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 {
-	XE_BUG_ON(reg & hwe->mmio_base);
+	XE_BUG_ON(reg.reg & hwe->mmio_base);
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
-	return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base);
+	reg.reg += hwe->mmio_base;
+
+	return xe_mmio_read32(hwe->gt, reg);
 }
 
 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
@@ -255,17 +260,17 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
-		xe_mmio_write32(hwe->gt, RCU_MODE.reg,
+		xe_mmio_write32(hwe->gt, RCU_MODE,
 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 
-	hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0);
-	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg,
+	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
+	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
 			       xe_bo_ggtt_addr(hwe->hwsp));
-	hw_engine_mmio_write32(hwe, RING_MODE(0).reg,
+	hw_engine_mmio_write32(hwe, RING_MODE(0),
 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
-	hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg,
+	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
 			       _MASKED_BIT_DISABLE(STOP_RING));
-	hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
+	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
 }
 
 void
@@ -443,7 +448,7 @@ static void read_media_fuses(struct xe_gt *gt)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE.reg);
+	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
 
 	/*
 	 * Pre-Xe_HP platforms had register bits representing absent engines,
@@ -485,7 +490,7 @@ static void read_copy_fuses(struct xe_gt *gt)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
-	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3.reg);
+	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
 
 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
@@ -582,63 +587,63 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
 	drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base);
 
 	drm_printf(p, "\tHWSTAM: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg));
+		hw_engine_mmio_read32(hwe, RING_HWSTAM(0)));
 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg));
+		hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)));
 
 	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg));
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)));
 	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg));
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)));
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
 		hw_engine_mmio_read32(hwe,
-					 RING_EXECLIST_SQ_CONTENTS_LO(0).reg));
+					 RING_EXECLIST_SQ_CONTENTS_LO(0)));
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
 		hw_engine_mmio_read32(hwe,
-					 RING_EXECLIST_SQ_CONTENTS_HI(0).reg));
+					 RING_EXECLIST_SQ_CONTENTS_HI(0)));
 	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg));
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0)));
 
 	drm_printf(p, "\tRING_START: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_START(0).reg));
+		hw_engine_mmio_read32(hwe, RING_START(0)));
 	drm_printf(p, "\tRING_HEAD:  0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR);
+		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR);
 	drm_printf(p, "\tRING_TAIL:  0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR);
+		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR);
 	drm_printf(p, "\tRING_CTL: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_CTL(0).reg));
+		hw_engine_mmio_read32(hwe, RING_CTL(0)));
 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg));
+		hw_engine_mmio_read32(hwe, RING_MI_MODE(0)));
 	drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_MODE(0).reg));
+		hw_engine_mmio_read32(hwe, RING_MODE(0)));
 
 	drm_printf(p, "\tRING_IMR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_IMR(0).reg));
+		hw_engine_mmio_read32(hwe, RING_IMR(0)));
 	drm_printf(p, "\tRING_ESR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_ESR(0).reg));
+		hw_engine_mmio_read32(hwe, RING_ESR(0)));
 	drm_printf(p, "\tRING_EMR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EMR(0).reg));
+		hw_engine_mmio_read32(hwe, RING_EMR(0)));
 	drm_printf(p, "\tRING_EIR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EIR(0).reg));
+		hw_engine_mmio_read32(hwe, RING_EIR(0)));
 
         drm_printf(p, "\tACTHD:  0x%08x_%08x\n",
-		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg),
-		hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg));
+		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)),
+		hw_engine_mmio_read32(hwe, RING_ACTHD(0)));
         drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
-		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg),
-		hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg));
+		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)),
+		hw_engine_mmio_read32(hwe, RING_BBADDR(0)));
         drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
-		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg),
-		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg));
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)),
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)));
 
 	drm_printf(p, "\tIPEIR: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, IPEIR(0).reg));
+		hw_engine_mmio_read32(hwe, IPEIR(0)));
 	drm_printf(p, "\tIPEHR: 0x%08x\n\n",
-		hw_engine_mmio_read32(hwe, IPEHR(0).reg));
+		hw_engine_mmio_read32(hwe, IPEHR(0)));
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
-			xe_mmio_read32(hwe->gt, RCU_MODE.reg));
+			xe_mmio_read32(hwe->gt, RCU_MODE));
 
 }
 
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 2fffb2865cab6..7c58cf5269512 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -28,7 +28,7 @@
 
 static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg)
 {
-	u32 val = xe_mmio_read32(gt, reg.reg);
+	u32 val = xe_mmio_read32(gt, reg);
 
 	if (val == 0)
 		return;
@@ -36,10 +36,10 @@ static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg)
 	drm_WARN(&gt_to_xe(gt)->drm, 1,
 		 "Interrupt register 0x%x is not zero: 0x%08x\n",
 		 reg.reg, val);
-	xe_mmio_write32(gt, reg.reg, 0xffffffff);
-	xe_mmio_read32(gt, reg.reg);
-	xe_mmio_write32(gt, reg.reg, 0xffffffff);
-	xe_mmio_read32(gt, reg.reg);
+	xe_mmio_write32(gt, reg, 0xffffffff);
+	xe_mmio_read32(gt, reg);
+	xe_mmio_write32(gt, reg, 0xffffffff);
+	xe_mmio_read32(gt, reg);
 }
 
 /*
@@ -54,32 +54,32 @@ static void unmask_and_enable(struct xe_gt *gt, u32 irqregs, u32 bits)
 	 */
 	assert_iir_is_zero(gt, IIR(irqregs));
 
-	xe_mmio_write32(gt, IER(irqregs).reg, bits);
-	xe_mmio_write32(gt, IMR(irqregs).reg, ~bits);
+	xe_mmio_write32(gt, IER(irqregs), bits);
+	xe_mmio_write32(gt, IMR(irqregs), ~bits);
 
 	/* Posting read */
-	xe_mmio_read32(gt, IMR(irqregs).reg);
+	xe_mmio_read32(gt, IMR(irqregs));
 }
 
 /* Mask and disable all interrupts. */
 static void mask_and_disable(struct xe_gt *gt, u32 irqregs)
 {
-	xe_mmio_write32(gt, IMR(irqregs).reg, ~0);
+	xe_mmio_write32(gt, IMR(irqregs), ~0);
 	/* Posting read */
-	xe_mmio_read32(gt, IMR(irqregs).reg);
+	xe_mmio_read32(gt, IMR(irqregs));
 
-	xe_mmio_write32(gt, IER(irqregs).reg, 0);
+	xe_mmio_write32(gt, IER(irqregs), 0);
 
 	/* IIR can theoretically queue up two events. Be paranoid. */
-	xe_mmio_write32(gt, IIR(irqregs).reg, ~0);
-	xe_mmio_read32(gt, IIR(irqregs).reg);
-	xe_mmio_write32(gt, IIR(irqregs).reg, ~0);
-	xe_mmio_read32(gt, IIR(irqregs).reg);
+	xe_mmio_write32(gt, IIR(irqregs), ~0);
+	xe_mmio_read32(gt, IIR(irqregs));
+	xe_mmio_write32(gt, IIR(irqregs), ~0);
+	xe_mmio_read32(gt, IIR(irqregs));
 }
 
 static u32 xelp_intr_disable(struct xe_gt *gt)
 {
-	xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, 0);
+	xe_mmio_write32(gt, GFX_MSTR_IRQ, 0);
 
 	/*
 	 * Now with master disabled, get a sample of level indications
@@ -87,7 +87,7 @@ static u32 xelp_intr_disable(struct xe_gt *gt)
 	 * New indications can and will light up during processing,
 	 * and will generate new interrupt after enabling master.
 	 */
-	return xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
+	return xe_mmio_read32(gt, GFX_MSTR_IRQ);
 }
 
 static u32
@@ -98,18 +98,18 @@ gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
 	if (!(master_ctl & GU_MISC_IRQ))
 		return 0;
 
-	iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET).reg);
+	iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET));
 	if (likely(iir))
-		xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET).reg, iir);
+		xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET), iir);
 
 	return iir;
 }
 
 static inline void xelp_intr_enable(struct xe_gt *gt, bool stall)
 {
-	xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, MASTER_IRQ);
+	xe_mmio_write32(gt, GFX_MSTR_IRQ, MASTER_IRQ);
 	if (stall)
-		xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
+		xe_mmio_read32(gt, GFX_MSTR_IRQ);
 }
 
 static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
@@ -132,41 +132,41 @@ static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 	smask = irqs << 16;
 
 	/* Enable RCS, BCS, VCS and VECS class interrupts. */
-	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg, dmask);
-	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg, dmask);
+	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
 	if (ccs_mask)
-		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, smask);
+		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
 
 	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg, ~smask);
-	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg, ~smask);
+	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
+	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
 	if (bcs_mask & (BIT(1)|BIT(2)))
-		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
 	if (bcs_mask & (BIT(3)|BIT(4)))
-		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
 	if (bcs_mask & (BIT(5)|BIT(6)))
-		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
 	if (bcs_mask & (BIT(7)|BIT(8)))
-		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask);
-	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~dmask);
-	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~dmask);
-	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
+	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
+	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
 	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK.reg, ~dmask);
+		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~dmask);
 
 	/*
 	 * RPS interrupts will get enabled/disabled on demand when RPS itself
 	 * is enabled/disabled.
 	 */
 	/* TODO: gt->pm_ier, gt->pm_imr */
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK,  ~0);
 
 	/* Same thing for GuC interrupts */
-	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg,  ~0);
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, 0);
+	xe_mmio_write32(gt, GUC_SG_INTR_MASK,  ~0);
 }
 
 static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
@@ -191,7 +191,7 @@ gt_engine_identity(struct xe_device *xe,
 
 	lockdep_assert_held(&xe->irq.lock);
 
-	xe_mmio_write32(gt, IIR_REG_SELECTOR(bank).reg, BIT(bit));
+	xe_mmio_write32(gt, IIR_REG_SELECTOR(bank), BIT(bit));
 
 	/*
 	 * NB: Specs do not specify how long to spin wait,
@@ -199,7 +199,7 @@ gt_engine_identity(struct xe_device *xe,
 	 */
 	timeout_ts = (local_clock() >> 10) + 100;
 	do {
-		ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank).reg);
+		ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank));
 	} while (!(ident & INTR_DATA_VALID) &&
 		 !time_after32(local_clock() >> 10, timeout_ts));
 
@@ -209,7 +209,7 @@ gt_engine_identity(struct xe_device *xe,
 		return 0;
 	}
 
-	xe_mmio_write32(gt, INTR_IDENTITY_REG(bank).reg, INTR_DATA_VALID);
+	xe_mmio_write32(gt, INTR_IDENTITY_REG(bank), INTR_DATA_VALID);
 
 	return ident;
 }
@@ -248,11 +248,11 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 
 		if (!xe_gt_is_media_type(gt)) {
 			intr_dw[bank] =
-				xe_mmio_read32(gt, GT_INTR_DW(bank).reg);
+				xe_mmio_read32(gt, GT_INTR_DW(bank));
 			for_each_set_bit(bit, intr_dw + bank, 32)
 				identity[bit] = gt_engine_identity(xe, gt,
 								   bank, bit);
-			xe_mmio_write32(gt, GT_INTR_DW(bank).reg,
+			xe_mmio_write32(gt, GT_INTR_DW(bank),
 					intr_dw[bank]);
 		}
 
@@ -310,14 +310,14 @@ static u32 dg1_intr_disable(struct xe_device *xe)
 	u32 val;
 
 	/* First disable interrupts */
-	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0);
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, 0);
 
 	/* Get the indication levels and ack the master unit */
-	val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+	val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR);
 	if (unlikely(!val))
 		return 0;
 
-	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val);
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, val);
 
 	return val;
 }
@@ -326,9 +326,9 @@ static void dg1_intr_enable(struct xe_device *xe, bool stall)
 {
 	struct xe_gt *gt = xe_device_get_gt(xe, 0);
 
-	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ);
+	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
 	if (stall)
-		xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+		xe_mmio_read32(gt, DG1_MSTR_TILE_INTR);
 }
 
 static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
@@ -368,7 +368,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 			continue;
 
 		if (!xe_gt_is_media_type(gt))
-			master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ.reg);
+			master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ);
 
 		/*
 		 * We might be in irq handler just when PCIe DPC is initiated
@@ -382,7 +382,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		}
 
 		if (!xe_gt_is_media_type(gt))
-			xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl);
+			xe_mmio_write32(gt, GFX_MSTR_IRQ, master_ctl);
 		gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
 
 		/*
@@ -407,34 +407,34 @@ static void gt_irq_reset(struct xe_gt *gt)
 	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
 
 	/* Disable RCS, BCS, VCS and VECS class engines. */
-	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg,	 0);
-	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg,	 0);
+	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE,	 0);
+	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE,	 0);
 	if (ccs_mask)
-		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, 0);
+		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, 0);
 
 	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg,	~0);
-	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg,	~0);
+	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK,	~0);
+	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK,	~0);
 	if (bcs_mask & (BIT(1)|BIT(2)))
-		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~0);
 	if (bcs_mask & (BIT(3)|BIT(4)))
-		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~0);
 	if (bcs_mask & (BIT(5)|BIT(6)))
-		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~0);
 	if (bcs_mask & (BIT(7)|BIT(8)))
-		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0);
-	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg,	~0);
-	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg,	~0);
-	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg,	~0);
+		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~0);
+	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK,	~0);
+	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK,	~0);
+	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK,	~0);
 	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~0);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK.reg, ~0);
+		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~0);
 
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg,  ~0);
-	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg,	 0);
-	xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg,		~0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0);
+	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK,  ~0);
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,	 0);
+	xe_mmio_write32(gt, GUC_SG_INTR_MASK,		~0);
 }
 
 static void xelp_irq_reset(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 3b719c774efa6..0e91004fa06d4 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -153,13 +153,13 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si
 	struct xe_gt *gt = xe_device_get_gt(xe, 0);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	int err;
-	u32 reg;
+	u32 reg_val;
 
 	if (!xe->info.has_flat_ccs)  {
 		*vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 		if (usable_size)
 			*usable_size = min(*vram_size,
-					   xe_mmio_read64(gt, GSMBASE.reg));
+					   xe_mmio_read64(gt, GSMBASE));
 		return 0;
 	}
 
@@ -167,11 +167,11 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si
 	if (err)
 		return err;
 
-	reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
-	*vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+	reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
+	*vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg_val) * SZ_1G;
 	if (usable_size) {
-		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
-		*usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+		reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		*usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg_val) * SZ_64K;
 		drm_info(&xe->drm, "vram_size: 0x%llx usable_size: 0x%llx\n",
 			 *vram_size, *usable_size);
 	}
@@ -298,7 +298,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	if (xe->info.tile_count == 1)
 		return;
 
-	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg);
+	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR);
 	adj_tile_count = xe->info.tile_count =
 		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 	if (xe->info.media_verx100 >= 1300)
@@ -374,7 +374,7 @@ int xe_mmio_init(struct xe_device *xe)
 	 * keep the GT powered down; we won't be able to communicate with it
 	 * and we should not continue with driver initialization.
 	 */
-	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) {
+	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
 		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
 		return -ENODEV;
 	}
@@ -403,6 +403,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	struct xe_device *xe = to_xe_device(dev);
 	struct drm_xe_mmio *args = data;
 	unsigned int bits_flag, bytes;
+	struct xe_reg reg;
 	bool allowed;
 	int ret = 0;
 
@@ -435,6 +436,12 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size))
 		return -EINVAL;
 
+	/*
+	 * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle
+	 * multicast registers. Steering would need uapi extension.
+	 */
+	reg = XE_REG(args->addr);
+
 	xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
 
 	if (args->flags & DRM_XE_MMIO_WRITE) {
@@ -444,10 +451,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 				ret = -EINVAL;
 				goto exit;
 			}
-			xe_mmio_write32(to_gt(xe), args->addr, args->value);
+			xe_mmio_write32(to_gt(xe), reg, args->value);
 			break;
 		case DRM_XE_MMIO_64BIT:
-			xe_mmio_write64(to_gt(xe), args->addr, args->value);
+			xe_mmio_write64(to_gt(xe), reg, args->value);
 			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
@@ -462,10 +469,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & DRM_XE_MMIO_READ) {
 		switch (bits_flag) {
 		case DRM_XE_MMIO_32BIT:
-			args->value = xe_mmio_read32(to_gt(xe), args->addr);
+			args->value = xe_mmio_read32(to_gt(xe), reg);
 			break;
 		case DRM_XE_MMIO_64BIT:
-			args->value = xe_mmio_read64(to_gt(xe), args->addr);
+			args->value = xe_mmio_read64(to_gt(xe), reg);
 			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index b72a0a75259fa..f9a23b4ef77dc 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 
+#include "regs/xe_reg_defs.h"
 #include "xe_gt_types.h"
 
 struct drm_device;
@@ -17,33 +18,33 @@ struct xe_device;
 
 int xe_mmio_init(struct xe_device *xe);
 
-static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg)
+static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 {
-	if (reg < gt->mmio.adj_limit)
-		reg += gt->mmio.adj_offset;
+	if (reg.reg < gt->mmio.adj_limit)
+		reg.reg += gt->mmio.adj_offset;
 
-	return readb(gt->mmio.regs + reg);
+	return readb(gt->mmio.regs + reg.reg);
 }
 
 static inline void xe_mmio_write32(struct xe_gt *gt,
-				   u32 reg, u32 val)
+				   struct xe_reg reg, u32 val)
 {
-	if (reg < gt->mmio.adj_limit)
-		reg += gt->mmio.adj_offset;
+	if (reg.reg < gt->mmio.adj_limit)
+		reg.reg += gt->mmio.adj_offset;
 
-	writel(val, gt->mmio.regs + reg);
+	writel(val, gt->mmio.regs + reg.reg);
 }
 
-static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg)
+static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
 {
-	if (reg < gt->mmio.adj_limit)
-		reg += gt->mmio.adj_offset;
+	if (reg.reg < gt->mmio.adj_limit)
+		reg.reg += gt->mmio.adj_offset;
 
-	return readl(gt->mmio.regs + reg);
+	return readl(gt->mmio.regs + reg.reg);
 }
 
-static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 clr,
-				 u32 set)
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
+				u32 set)
 {
 	u32 old, reg_val;
 
@@ -55,24 +56,24 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 clr,
 }
 
 static inline void xe_mmio_write64(struct xe_gt *gt,
-				   u32 reg, u64 val)
+				   struct xe_reg reg, u64 val)
 {
-	if (reg < gt->mmio.adj_limit)
-		reg += gt->mmio.adj_offset;
+	if (reg.reg < gt->mmio.adj_limit)
+		reg.reg += gt->mmio.adj_offset;
 
-	writeq(val, gt->mmio.regs + reg);
+	writeq(val, gt->mmio.regs + reg.reg);
 }
 
-static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg)
+static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
 {
-	if (reg < gt->mmio.adj_limit)
-		reg += gt->mmio.adj_offset;
+	if (reg.reg < gt->mmio.adj_limit)
+		reg.reg += gt->mmio.adj_offset;
 
-	return readq(gt->mmio.regs + reg);
+	return readq(gt->mmio.regs + reg.reg);
 }
 
 static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
-					     u32 reg, u32 val,
+					     struct xe_reg reg, u32 val,
 					     u32 mask, u32 eval)
 {
 	u32 reg_val;
@@ -83,8 +84,9 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 	return (reg_val & mask) != eval ? -EINVAL : 0;
 }
 
-static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask,
-				 u32 timeout_us, u32 *out_val, bool atomic)
+static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 val,
+				 u32 mask, u32 timeout_us, u32 *out_val,
+				 bool atomic)
 {
 	ktime_t cur = ktime_get_raw();
 	const ktime_t end = ktime_add_us(cur, timeout_us);
@@ -122,9 +124,10 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask,
 int xe_mmio_ioctl(struct drm_device *dev, void *data,
 		  struct drm_file *file);
 
-static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg)
+static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
+				    struct xe_reg reg)
 {
-	return range && reg >= range->start && reg <= range->end;
+	return range && reg.reg >= range->start && reg.reg <= range->end;
 }
 
 int xe_mmio_probe_vram(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 0d07811a573f6..f30e1a0ce5dc0 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -477,8 +477,10 @@ static void __init_mocs_table(struct xe_gt *gt,
 	for (i = 0;
 	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, XE_REG(addr + i * 4).reg, mocs);
-		xe_mmio_write32(gt, XE_REG(addr + i * 4).reg, mocs);
+		struct xe_reg reg = XE_REG(addr + i * 4);
+
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, reg.reg, mocs);
+		xe_mmio_write32(gt, reg, mocs);
 	}
 }
 
@@ -514,7 +516,7 @@ static void init_l3cc_table(struct xe_gt *gt,
 	     i++) {
 		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).reg,
 			 l3cc);
-		xe_mmio_write32(gt, LNCFCMOCS(i).reg, l3cc);
+		xe_mmio_write32(gt, LNCFCMOCS(i), l3cc);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index abee41fa3cb9d..b56a65779d26b 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -64,14 +64,20 @@ static const u32 mtl_pat_table[] = {
 
 static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
 {
-	for (int i = 0; i < n_entries; i++)
-		xe_mmio_write32(gt, _PAT_INDEX(i), table[i]);
+	for (int i = 0; i < n_entries; i++) {
+		struct xe_reg reg = XE_REG(_PAT_INDEX(i));
+
+		xe_mmio_write32(gt, reg, table[i]);
+	}
 }
 
 static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
 {
-	for (int i = 0; i < n_entries; i++)
-		xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_INDEX(i)), table[i]);
+	for (int i = 0; i < n_entries; i++) {
+		struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i));
+
+		xe_gt_mcr_multicast_write(gt, reg_mcr, table[i]);
+	}
 }
 
 void xe_pat_init(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 99bb730684edc..7ab70a83f88d7 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -43,7 +43,7 @@ static int pcode_mailbox_status(struct xe_gt *gt)
 
 	lockdep_assert_held(&gt->pcode.lock);
 
-	err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK;
+	err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
 	if (err) {
 		drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
 			err_decode[err].str ?: "Unknown");
@@ -60,22 +60,22 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
 	int err;
 	lockdep_assert_held(&gt->pcode.lock);
 
-	if ((xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) != 0)
+	if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
 		return -EAGAIN;
 
-	xe_mmio_write32(gt, PCODE_DATA0.reg, *data0);
-	xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0);
-	xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox);
+	xe_mmio_write32(gt, PCODE_DATA0, *data0);
+	xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0);
+	xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox);
 
-	err = xe_mmio_wait32(gt, PCODE_MAILBOX.reg, 0, PCODE_READY,
+	err = xe_mmio_wait32(gt, PCODE_MAILBOX, 0, PCODE_READY,
 			     timeout_ms * 1000, NULL, atomic);
 	if (err)
 		return err;
 
 	if (return_data) {
-		*data0 = xe_mmio_read32(gt, PCODE_DATA0.reg);
+		*data0 = xe_mmio_read32(gt, PCODE_DATA0);
 		if (data1)
-			*data1 = xe_mmio_read32(gt, PCODE_DATA1.reg);
+			*data1 = xe_mmio_read32(gt, PCODE_DATA1);
 	}
 
 	return pcode_mailbox_status(gt);
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index d129e6d7cb1ff..f75ef8d7500ac 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -161,7 +161,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 	else if (entry->clr_bits + 1)
 		val = (reg.mcr ?
 		       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
-		       xe_mmio_read32(gt, reg.reg)) & (~entry->clr_bits);
+		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
 	else
 		val = 0;
 
@@ -177,7 +177,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 	if (entry->reg.mcr)
 		xe_gt_mcr_multicast_write(gt, reg_mcr, val);
 	else
-		xe_mmio_write32(gt, reg.reg, val);
+		xe_mmio_write32(gt, reg, val);
 }
 
 void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
@@ -230,15 +230,17 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 	p = drm_debug_printer(KBUILD_MODNAME);
 	xa_for_each(&sr->xa, reg, entry) {
 		xe_reg_whitelist_print_entry(&p, 0, reg, entry);
-		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
 				reg | entry->set_bits);
 		slot++;
 	}
 
 	/* And clear the rest just in case of garbage */
-	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++)
-		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
-				RING_NOPID(mmio_base).reg);
+	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
+		u32 addr = RING_NOPID(mmio_base).reg;
+
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
+	}
 
 	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index c1b738e033c7f..ce829bd488252 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -44,10 +44,11 @@ static u32 preparser_disable(bool state)
 	return MI_ARB_CHECK | BIT(8) | state;
 }
 
-static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i)
+static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
+			      u32 *dw, int i)
 {
 	dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
-	dw[i++] = addr + gt->mmio.adj_offset;
+	dw[i++] = reg.reg + gt->mmio.adj_offset;
 	dw[i++] = AUX_INV;
 	dw[i++] = MI_NOOP;
 
@@ -203,9 +204,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	/* hsdes: 1809175790 */
 	if (!xe->info.has_flat_ccs) {
 		if (decode)
-			i = emit_aux_table_inv(gt, VD0_AUX_INV.reg, dw, i);
+			i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
 		else
-			i = emit_aux_table_inv(gt, VE0_AUX_INV.reg, dw, i);
+			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
 	}
 	dw[i++] = preparser_disable(false);
 
@@ -248,7 +249,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
 	/* hsdes: 1809175790 */
 	if (!xe->info.has_flat_ccs)
-		i = emit_aux_table_inv(gt, CCS_AUX_INV.reg, dw, i);
+		i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
 
 	dw[i++] = preparser_disable(false);
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 9ce0a05855396..a3855870321f5 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -65,7 +65,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	}
 
 	/* Use DSM base address instead for stolen memory */
-	mgr->stolen_base = xe_mmio_read64(gt, DSMBASE.reg) & BDSM_MASK;
+	mgr->stolen_base = xe_mmio_read64(gt, DSMBASE) & BDSM_MASK;
 	if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base))
 		return 0;
 
@@ -88,7 +88,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 	u32 stolen_size;
 	u32 ggc, gms;
 
-	ggc = xe_mmio_read32(to_gt(xe), GGC.reg);
+	ggc = xe_mmio_read32(to_gt(xe), GGC);
 
 	/* check GGMS, should be fixed 0x3 (8MB) */
 	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index bb8d986453328..ed37437600f06 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -462,33 +462,33 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 
 	/* Set the source address for the uCode */
 	src_offset = uc_fw_ggtt_offset(uc_fw);
-	xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset));
-	xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_HIGH, upper_32_bits(src_offset));
 
 	/* Set the DMA destination */
-	xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset);
-	xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM);
+	xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
+	xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
 
 	/*
 	 * Set the transfer size. The header plus uCode will be copied to WOPCM
 	 * via DMA, excluding any other components
 	 */
-	xe_mmio_write32(gt, DMA_COPY_SIZE.reg,
+	xe_mmio_write32(gt, DMA_COPY_SIZE,
 			sizeof(struct uc_css_header) + uc_fw->ucode_size);
 
 	/* Start the DMA */
-	xe_mmio_write32(gt, DMA_CTRL.reg,
+	xe_mmio_write32(gt, DMA_CTRL,
 			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
 
 	/* Wait for DMA to finish */
-	ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl,
+	ret = xe_mmio_wait32(gt, DMA_CTRL, 0, START_DMA, 100000, &dma_ctrl,
 			     false);
 	if (ret)
 		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
 			xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
 
 	/* Disable the bits once DMA is over */
-	xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags));
+	xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index 7b5014aea9c8b..11eea970c2076 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -124,8 +124,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
 static bool __wopcm_regs_locked(struct xe_gt *gt,
 				u32 *guc_wopcm_base, u32 *guc_wopcm_size)
 {
-	u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg);
-	u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg);
+	u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET);
+	u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE);
 
 	if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
 	    !(reg_base & GUC_WOPCM_OFFSET_VALID))
@@ -152,13 +152,13 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
 	XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
 
 	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
-	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask,
+	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
 					 size | GUC_WOPCM_SIZE_LOCKED);
 	if (err)
 		goto err_out;
 
 	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
-	err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg,
+	err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET,
 					 base | huc_agent, mask,
 					 base | huc_agent |
 					 GUC_WOPCM_OFFSET_VALID);
@@ -171,10 +171,10 @@ err_out:
 	drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
 	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
 		   DMA_GUC_WOPCM_OFFSET.reg,
-		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg));
+		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
 	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
 		   GUC_WOPCM_SIZE.reg,
-		   xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg));
+		   xe_mmio_read32(gt, GUC_WOPCM_SIZE));
 
 	return err;
 }
-- 
GitLab


From ee21379acc1a5c0de612097de74213aa7015471b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 8 May 2023 15:53:21 -0700
Subject: [PATCH 1532/2340] drm/xe: Rename reg field to addr

Rename the address field to "addr" rather than "reg" so it's easier to
understand what it is.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230508225322.2692066-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h  |  6 ++---
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  2 +-
 drivers/gpu/drm/xe/xe_force_wake.c     |  2 +-
 drivers/gpu/drm/xe/xe_gt_mcr.c         |  2 +-
 drivers/gpu/drm/xe/xe_guc.c            |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c        |  2 +-
 drivers/gpu/drm/xe/xe_hw_engine.c      |  8 +++----
 drivers/gpu/drm/xe/xe_irq.c            |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c           |  2 +-
 drivers/gpu/drm/xe/xe_mmio.h           | 32 +++++++++++++-------------
 drivers/gpu/drm/xe/xe_mocs.c           |  6 ++---
 drivers/gpu/drm/xe/xe_pci.c            |  4 ++--
 drivers/gpu/drm/xe/xe_reg_sr.c         |  6 ++---
 drivers/gpu/drm/xe/xe_ring_ops.c       |  2 +-
 drivers/gpu/drm/xe/xe_rtp.c            |  2 +-
 drivers/gpu/drm/xe/xe_wopcm.c          |  4 ++--
 16 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 787f223bc7275..478787c75e292 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -18,8 +18,8 @@
 struct xe_reg {
 	union {
 		struct {
-			/** @reg: address */
-			u32 reg:22;
+			/** @addr: address */
+			u32 addr:22;
 			/**
 			 * @masked: register is "masked", with upper 16bits used
 			 * to identify the bits that are updated on the lower
@@ -73,7 +73,7 @@ struct xe_reg_mcr {
  * object of the right type. However when initializing static const storage,
  * where a compound statement is not allowed, this can be used instead.
  */
-#define XE_REG_INITIALIZER(r_, ...)    { .reg = r_, __VA_ARGS__ }
+#define XE_REG_INITIALIZER(r_, ...)    { .addr = r_, __VA_ARGS__ }
 
 
 /**
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index ad2fe8a39a78b..4b2aac5ccf28a 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -244,7 +244,7 @@ static void xe_rtp_process_tests(struct kunit *test)
 	xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL);
 
 	xa_for_each(&reg_sr->xa, idx, sre) {
-		if (idx == param->expected_reg.reg)
+		if (idx == param->expected_reg.addr)
 			sr_entry = sre;
 
 		count++;
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 363b81c3d7469..f0f0592fc598a 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -129,7 +129,7 @@ static int domain_sleep_wait(struct xe_gt *gt,
 	for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \
 		for_each_if((domain__ = ((fw__)->domains + \
 					 (ffs(tmp__) - 1))) && \
-					 domain__->reg_ctl.reg)
+					 domain__->reg_ctl.addr)
 
 int xe_force_wake_get(struct xe_force_wake *fw,
 		      enum xe_force_wake_domains domains)
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index c6b9e9869fee5..3db550c85e32c 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -398,7 +398,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 	 */
 	drm_WARN(&gt_to_xe(gt)->drm, true,
 		 "Did not find MCR register %#x in any MCR steering table\n",
-		 reg.reg);
+		 reg.addr);
 	*group = 0;
 	*instance = 0;
 
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e8a126ad400f2..eb4af4c711246 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -713,7 +713,7 @@ proto:
 		response_buf[0] = header;
 
 		for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
-			reply_reg.reg += i * sizeof(u32);
+			reply_reg.addr += i * sizeof(u32);
 			response_buf[i] = xe_mmio_read32(gt, reply_reg);
 		}
 	}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 683f2df09c49d..6d550d746909c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -426,7 +426,7 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
 				      unsigned int n_entry)
 {
 	struct guc_mmio_reg entry = {
-		.offset = reg.reg,
+		.offset = reg.addr,
 		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
 	};
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 5e275aff89742..696b9d949163d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -236,20 +236,20 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
 				   u32 val)
 {
-	XE_BUG_ON(reg.reg & hwe->mmio_base);
+	XE_BUG_ON(reg.addr & hwe->mmio_base);
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
-	reg.reg += hwe->mmio_base;
+	reg.addr += hwe->mmio_base;
 
 	xe_mmio_write32(hwe->gt, reg, val);
 }
 
 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 {
-	XE_BUG_ON(reg.reg & hwe->mmio_base);
+	XE_BUG_ON(reg.addr & hwe->mmio_base);
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
-	reg.reg += hwe->mmio_base;
+	reg.addr += hwe->mmio_base;
 
 	return xe_mmio_read32(hwe->gt, reg);
 }
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 7c58cf5269512..1c26ec5ab4f0a 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -35,7 +35,7 @@ static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg)
 
 	drm_WARN(&gt_to_xe(gt)->drm, 1,
 		 "Interrupt register 0x%x is not zero: 0x%08x\n",
-		 reg.reg, val);
+		 reg.addr, val);
 	xe_mmio_write32(gt, reg, 0xffffffff);
 	xe_mmio_read32(gt, reg);
 	xe_mmio_write32(gt, reg, 0xffffffff);
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 0e91004fa06d4..c7fbb1cc1f64a 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -421,7 +421,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 		unsigned int i;
 
 		for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
-			if (mmio_read_whitelist[i].reg == args->addr) {
+			if (mmio_read_whitelist[i].addr == args->addr) {
 				allowed = true;
 				break;
 			}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index f9a23b4ef77dc..1407f1189b0d2 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -20,27 +20,27 @@ int xe_mmio_init(struct xe_device *xe);
 
 static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 {
-	if (reg.reg < gt->mmio.adj_limit)
-		reg.reg += gt->mmio.adj_offset;
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
 
-	return readb(gt->mmio.regs + reg.reg);
+	return readb(gt->mmio.regs + reg.addr);
 }
 
 static inline void xe_mmio_write32(struct xe_gt *gt,
 				   struct xe_reg reg, u32 val)
 {
-	if (reg.reg < gt->mmio.adj_limit)
-		reg.reg += gt->mmio.adj_offset;
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
 
-	writel(val, gt->mmio.regs + reg.reg);
+	writel(val, gt->mmio.regs + reg.addr);
 }
 
 static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
 {
-	if (reg.reg < gt->mmio.adj_limit)
-		reg.reg += gt->mmio.adj_offset;
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
 
-	return readl(gt->mmio.regs + reg.reg);
+	return readl(gt->mmio.regs + reg.addr);
 }
 
 static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
@@ -58,18 +58,18 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
 static inline void xe_mmio_write64(struct xe_gt *gt,
 				   struct xe_reg reg, u64 val)
 {
-	if (reg.reg < gt->mmio.adj_limit)
-		reg.reg += gt->mmio.adj_offset;
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
 
-	writeq(val, gt->mmio.regs + reg.reg);
+	writeq(val, gt->mmio.regs + reg.addr);
 }
 
 static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
 {
-	if (reg.reg < gt->mmio.adj_limit)
-		reg.reg += gt->mmio.adj_offset;
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
 
-	return readq(gt->mmio.regs + reg.reg);
+	return readq(gt->mmio.regs + reg.addr);
 }
 
 static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
@@ -127,7 +127,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
 				    struct xe_reg reg)
 {
-	return range && reg.reg >= range->start && reg.reg <= range->end;
+	return range && reg.addr >= range->start && reg.addr <= range->end;
 }
 
 int xe_mmio_probe_vram(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index f30e1a0ce5dc0..817afd301d526 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -479,7 +479,7 @@ static void __init_mocs_table(struct xe_gt *gt,
 	     i++) {
 		struct xe_reg reg = XE_REG(addr + i * 4);
 
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, reg.reg, mocs);
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs);
 		xe_mmio_write32(gt, reg, mocs);
 	}
 }
@@ -514,7 +514,7 @@ static void init_l3cc_table(struct xe_gt *gt,
 	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
 				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).reg,
+		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr,
 			 l3cc);
 		xe_mmio_write32(gt, LNCFCMOCS(i), l3cc);
 	}
@@ -541,7 +541,7 @@ void xe_mocs_init(struct xe_gt *gt)
 	mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
 
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt, &table, GLOBAL_MOCS(0).reg);
+		__init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr);
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 13a5ce18ee05d..2ad3ad275e8a6 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -436,7 +436,7 @@ static void handle_gmdid(struct xe_device *xe,
 {
 	u32 ver;
 
-	ver = peek_gmdid(xe, GMD_ID.reg);
+	ver = peek_gmdid(xe, GMD_ID.addr);
 	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
 		if (ver == graphics_ip_map[i].ver) {
 			xe->info.graphics_verx100 = ver;
@@ -451,7 +451,7 @@ static void handle_gmdid(struct xe_device *xe,
 			ver / 100, ver % 100);
 	}
 
-	ver = peek_gmdid(xe, GMD_ID.reg + 0x380000);
+	ver = peek_gmdid(xe, GMD_ID.addr + 0x380000);
 	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
 		if (ver == media_ip_map[i].ver) {
 			xe->info.media_verx100 = ver;
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index f75ef8d7500ac..434133444d741 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -91,7 +91,7 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
 int xe_reg_sr_add(struct xe_reg_sr *sr,
 		  const struct xe_reg_sr_entry *e)
 {
-	unsigned long idx = e->reg.reg;
+	unsigned long idx = e->reg.addr;
 	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
 	int ret;
 
@@ -172,7 +172,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 	 */
 	val |= entry->set_bits;
 
-	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.reg, val);
+	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.addr, val);
 
 	if (entry->reg.mcr)
 		xe_gt_mcr_multicast_write(gt, reg_mcr, val);
@@ -237,7 +237,7 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 
 	/* And clear the rest just in case of garbage */
 	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
-		u32 addr = RING_NOPID(mmio_base).reg;
+		u32 addr = RING_NOPID(mmio_base).addr;
 
 		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
 	}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index ce829bd488252..06364bb2e95bd 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -48,7 +48,7 @@ static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
 			      u32 *dw, int i)
 {
 	dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
-	dw[i++] = reg.reg + gt->mmio.adj_offset;
+	dw[i++] = reg.addr + gt->mmio.adj_offset;
 	dw[i++] = AUX_INV;
 	dw[i++] = MI_NOOP;
 
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index f2a0e8eb4936b..0c6a23e14a715 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -101,7 +101,7 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 		.read_mask = action->read_mask,
 	};
 
-	sr_entry.reg.reg += mmio_base;
+	sr_entry.reg.addr += mmio_base;
 	xe_reg_sr_add(sr, &sr_entry);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index 11eea970c2076..35fde8965bca9 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -170,10 +170,10 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
 err_out:
 	drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
 	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
-		   DMA_GUC_WOPCM_OFFSET.reg,
+		   DMA_GUC_WOPCM_OFFSET.addr,
 		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
 	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
-		   GUC_WOPCM_SIZE.reg,
+		   GUC_WOPCM_SIZE.addr,
 		   xe_mmio_read32(gt, GUC_WOPCM_SIZE));
 
 	return err;
-- 
GitLab


From 50f1f0591638ec43eb041e27ab5e4eae47882cbc Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 8 May 2023 15:53:22 -0700
Subject: [PATCH 1533/2340] drm/xe: Fix indent in xe_hw_engine_print_state()

Fix the indent to align with open parenthesis, following the coding
style.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230508225322.2692066-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 66 +++++++++++++++----------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 696b9d949163d..751f6c3bba173 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -580,70 +580,70 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
 		return;
 
 	drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name,
-		hwe->logical_instance);
+		   hwe->logical_instance);
 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
-		hwe->domain,
-		xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain));
+		   hwe->domain,
+		   xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain));
 	drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base);
 
 	drm_printf(p, "\tHWSTAM: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_HWSTAM(0)));
+		   hw_engine_mmio_read32(hwe, RING_HWSTAM(0)));
 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)));
+		   hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)));
 
 	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)));
+		   hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)));
 	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)));
+		   hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)));
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
-		hw_engine_mmio_read32(hwe,
+		   hw_engine_mmio_read32(hwe,
 					 RING_EXECLIST_SQ_CONTENTS_LO(0)));
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
-		hw_engine_mmio_read32(hwe,
+		   hw_engine_mmio_read32(hwe,
 					 RING_EXECLIST_SQ_CONTENTS_HI(0)));
 	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0)));
+		   hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0)));
 
 	drm_printf(p, "\tRING_START: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_START(0)));
+		   hw_engine_mmio_read32(hwe, RING_START(0)));
 	drm_printf(p, "\tRING_HEAD:  0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR);
+		   hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR);
 	drm_printf(p, "\tRING_TAIL:  0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR);
+		   hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR);
 	drm_printf(p, "\tRING_CTL: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_CTL(0)));
+		   hw_engine_mmio_read32(hwe, RING_CTL(0)));
 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_MI_MODE(0)));
+		   hw_engine_mmio_read32(hwe, RING_MI_MODE(0)));
 	drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_MODE(0)));
+		   hw_engine_mmio_read32(hwe, RING_MODE(0)));
 
 	drm_printf(p, "\tRING_IMR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_IMR(0)));
+		   hw_engine_mmio_read32(hwe, RING_IMR(0)));
 	drm_printf(p, "\tRING_ESR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_ESR(0)));
+		   hw_engine_mmio_read32(hwe, RING_ESR(0)));
 	drm_printf(p, "\tRING_EMR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EMR(0)));
+		   hw_engine_mmio_read32(hwe, RING_EMR(0)));
 	drm_printf(p, "\tRING_EIR:   0x%08x\n",
-		hw_engine_mmio_read32(hwe, RING_EIR(0)));
-
-        drm_printf(p, "\tACTHD:  0x%08x_%08x\n",
-		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)),
-		hw_engine_mmio_read32(hwe, RING_ACTHD(0)));
-        drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
-		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)),
-		hw_engine_mmio_read32(hwe, RING_BBADDR(0)));
-        drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
-		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)),
-		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)));
+		   hw_engine_mmio_read32(hwe, RING_EIR(0)));
+
+	drm_printf(p, "\tACTHD:  0x%08x_%08x\n",
+		   hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)),
+		   hw_engine_mmio_read32(hwe, RING_ACTHD(0)));
+	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
+		   hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)),
+		   hw_engine_mmio_read32(hwe, RING_BBADDR(0)));
+	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
+		   hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)),
+		   hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)));
 
 	drm_printf(p, "\tIPEIR: 0x%08x\n",
-		hw_engine_mmio_read32(hwe, IPEIR(0)));
+		   hw_engine_mmio_read32(hwe, IPEIR(0)));
 	drm_printf(p, "\tIPEHR: 0x%08x\n\n",
-		hw_engine_mmio_read32(hwe, IPEHR(0)));
+		   hw_engine_mmio_read32(hwe, IPEHR(0)));
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
-			xe_mmio_read32(hwe->gt, RCU_MODE));
+			   xe_mmio_read32(hwe->gt, RCU_MODE));
 
 }
 
-- 
GitLab


From a2db3192115d8cafa3dcae024873957929a4eae0 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 5 May 2023 15:49:10 +0100
Subject: [PATCH 1534/2340] drm/xe: fix tlb_invalidation_seqno_past()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Checking seqno_recv >= seqno looks like it will incorrectly report true
when the seqno has wrapped (not unlikely given
TLB_INVALIDATION_SEQNO_MAX). Calling xe_gt_tlb_invalidation_wait() might
then return before the flush has been completed by the GuC.

Fix this by treating a large negative delta as an indication that the
seqno has wrapped around. Similar to how we treat a large positive delta
as an indication that the seqno_recv must have wrapped around, but in
that case the seqno has likely also signalled.

It looks like we could also potentially make the seqno use the full
32bits as supported by the GuC.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 9e7fe8d9bca41..c815a42e2cdba 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -251,14 +251,15 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
 {
-	if (gt->tlb_invalidation.seqno_recv >= seqno)
-		return true;
+	if (seqno - gt->tlb_invalidation.seqno_recv <
+	    -(TLB_INVALIDATION_SEQNO_MAX / 2))
+		return false;
 
 	if (seqno - gt->tlb_invalidation.seqno_recv >
 	    (TLB_INVALIDATION_SEQNO_MAX / 2))
 		return true;
 
-	return false;
+	return gt->tlb_invalidation.seqno_recv >= seqno;
 }
 
 /**
-- 
GitLab


From 5737f74e294775b9fa7fb07f80212c5bdffd5476 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 19 Apr 2023 14:37:02 -0700
Subject: [PATCH 1535/2340] drm/xe/adlp: Add revid => step mapping

Setup the mapping from PCI revid to IP stepping for ADL-P (and its RPL-P
subplatform) in case this information becomes important for implementing
workarounds.

Bspec: 55376
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230419213703.3993439-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_step.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index ee927dfd3eb37..bcdb4601c2f6f 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -60,6 +60,17 @@ static const struct xe_step_info adls_revids[] = {
 	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
 };
 
+static const struct xe_step_info adlp_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info adlp_rpl_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 },
+};
+
 static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
 	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
 	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
@@ -118,6 +129,12 @@ struct xe_step_info xe_step_get(struct xe_device *xe)
 	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
 		revids = dg2_g12_revid_step_tbl;
 		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ADLP_RPLU) {
+		revids = adlp_rpl_revids;
+		size = ARRAY_SIZE(adlp_rpl_revids);
+	} else if (xe->info.platform == XE_ALDERLAKE_P) {
+		revids = adlp_revids;
+		size = ARRAY_SIZE(adlp_revids);
 	} else if (xe->info.platform == XE_ALDERLAKE_S) {
 		revids = adls_revids;
 		size = ARRAY_SIZE(adls_revids);
-- 
GitLab


From 500f90620cce13e8fd9e7dfc19701d753c4b3625 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 19 Apr 2023 14:37:03 -0700
Subject: [PATCH 1536/2340] drm/xe/adln: Enable ADL-N

ADL-N is pretty much the same as ADL-P (i.e., Xe_LP graphics + Xe_M
media + Xe_LPD display).  However unlike ADL-P, there's no GuC hwconfig
support so the "tgl" GuC firmware should be loaded (i.e., the same
situation as ADL-S).

Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli@intel.com>
Link: https://lore.kernel.org/r/20230419213703.3993439-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c           | 1 +
 drivers/gpu/drm/xe/xe_pci.c            | 9 +++++++++
 drivers/gpu/drm/xe/xe_platform_types.h | 1 +
 drivers/gpu/drm/xe/xe_step.c           | 7 +++++++
 drivers/gpu/drm/xe/xe_uc_fw.c          | 1 +
 5 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 817afd301d526..c7a9e733ef3b1 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -418,6 +418,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	case XE_ROCKETLAKE:
 	case XE_ALDERLAKE_S:
 	case XE_ALDERLAKE_P:
+	case XE_ALDERLAKE_N:
 		info->size  = ARRAY_SIZE(gen12_mocs_desc);
 		info->table = gen12_mocs_desc;
 		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 2ad3ad275e8a6..f0d0e999aa56f 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -208,6 +208,14 @@ static const struct xe_device_desc adl_p_desc = {
 	},
 };
 
+static const struct xe_device_desc adl_n_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_N),
+	.has_llc = 1,
+	.require_force_probe = true,
+};
+
 #define DGFX_FEATURES \
 	.is_dgfx = 1
 
@@ -312,6 +320,7 @@ static const struct pci_device_id pciidlist[] = {
 	XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
 	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
 	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
 	XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
 	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
 	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 80c19bffe79c7..abbb8a1f29a8e 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -16,6 +16,7 @@ enum xe_platform {
 	XE_ROCKETLAKE,
 	XE_ALDERLAKE_S,
 	XE_ALDERLAKE_P,
+	XE_ALDERLAKE_N,
 	XE_DG1,
 	XE_DG2,
 	XE_PVC,
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index bcdb4601c2f6f..a443d9bd7bbbf 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -71,6 +71,10 @@ static const struct xe_step_info adlp_rpl_revids[] = {
 	[0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 },
 };
 
+static const struct xe_step_info adln_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 },
+};
+
 static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
 	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
 	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
@@ -129,6 +133,9 @@ struct xe_step_info xe_step_get(struct xe_device *xe)
 	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
 		revids = dg2_g12_revid_step_tbl;
 		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+	} else if (xe->info.platform == XE_ALDERLAKE_N) {
+		revids = adln_revids;
+		size = ARRAY_SIZE(adln_revids);
 	} else if (xe->info.subplatform == XE_SUBPLATFORM_ADLP_RPLU) {
 		revids = adlp_rpl_revids;
 		size = ARRAY_SIZE(adlp_rpl_revids);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index ed37437600f06..609ca3f2ffa4b 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -104,6 +104,7 @@ struct fw_blobs_by_type {
 	fw_def(PVC,		mmp_ver(  xe,	guc,	pvc,	70, 6, 4))	\
 	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
 	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
+	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
 	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 5))		\
 	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 5))		\
 	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 5))		\
-- 
GitLab


From 85635f5d47d7304a44bc45b419f8f31423712ef8 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 12 May 2023 16:36:49 -0700
Subject: [PATCH 1537/2340] drm/xe: Load HuC on Alderlake P

Alderlake P uses TGL HuC and it was not added together with ADL-S,
because it was failing for unrelated reasons. Now that those are fixed,
allow it to load HuC.

	# cat /sys/kernel/debug/dri/0/gt0/uc/huc_info
	HuC firmware: i915/tgl_huc.bin
		status: RUNNING
		version: wanted 0.0, found 7.9
		uCode: 589504 bytes
		RSA: 256 bytes

	HuC status: 0x00090001

Reviewed-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Link: https://lore.kernel.org/r/20230512233649.3218736-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 609ca3f2ffa4b..5703213bdf1b5 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -111,6 +111,7 @@ struct fw_blobs_by_type {
 	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)				\
+	fw_def(ALDERLAKE_P,	no_ver(i915,	huc,	tgl))			\
 	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,	tgl))			\
 	fw_def(DG1,		no_ver(i915,	huc,	dg1))			\
 	fw_def(ROCKETLAKE,	no_ver(i915,	huc,	tgl))			\
-- 
GitLab


From d0e96f3d5255f62bc9721392b198acc4d302de32 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 12 May 2023 14:10:04 +0200
Subject: [PATCH 1538/2340] drm/xe: Remove unused define

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_drv.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
index 0377e5e4e35f4..d45b71426cc83 100644
--- a/drivers/gpu/drm/xe/xe_drv.h
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -11,7 +11,6 @@
 #define DRIVER_NAME		"xe"
 #define DRIVER_DESC		"Intel Xe Graphics"
 #define DRIVER_DATE		"20201103"
-#define DRIVER_TIMESTAMP	1604406085
 
 /* Interface history:
  *
-- 
GitLab


From a029aecaa42018a9ebc90fbf6e2920acfc4c6b3f Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 11 May 2023 16:48:21 -0300
Subject: [PATCH 1539/2340] drm/xe: Get rid of MAKE_INIT_EXIT_FUNCS

There is not much of a benefit from using that macro as of now and it
hurts grepability or other ways of cross-referencing.

Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_module.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index e8ee7a9b08781..e2a61aaf50b68 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -35,12 +35,16 @@ struct init_funcs {
 	int (*init)(void);
 	void (*exit)(void);
 };
-#define MAKE_INIT_EXIT_FUNCS(name)		\
-	{ .init = xe_##name##_module_init,	\
-	  .exit = xe_##name##_module_exit, }
+
 static const struct init_funcs init_funcs[] = {
-	MAKE_INIT_EXIT_FUNCS(hw_fence),
-	MAKE_INIT_EXIT_FUNCS(sched_job),
+	{
+		.init = xe_hw_fence_module_init,
+		.exit = xe_hw_fence_module_exit,
+	},
+	{
+		.init = xe_sched_job_module_init,
+		.exit = xe_sched_job_module_exit,
+	},
 };
 
 static int __init xe_init(void)
-- 
GitLab


From 9afd4b2d2a8df9023849ddd25d5e064b6555ee34 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 11 May 2023 16:48:22 -0300
Subject: [PATCH 1540/2340] drm/xe: Call exit functions when
 xe_register_pci_driver() fails

Move xe_register_pci_driver() and xe_unregister_pci_driver() to
init_funcs to make sure that exit functions are also called when
xe_register_pci_driver() fails.

Note that this also allows adding init functions to be run after
xe_register_pci_driver().

v2:
 - Move functions to init_funcs instead of having a special case for
   xe_register_pci_driver(). (Jani)

Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_module.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index e2a61aaf50b68..496a9001dc3e2 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -45,6 +45,10 @@ static const struct init_funcs init_funcs[] = {
 		.init = xe_sched_job_module_init,
 		.exit = xe_sched_job_module_exit,
 	},
+	{
+		.init = xe_register_pci_driver,
+		.exit = xe_unregister_pci_driver,
+	},
 };
 
 static int __init xe_init(void)
@@ -60,15 +64,13 @@ static int __init xe_init(void)
 		}
 	}
 
-	return xe_register_pci_driver();
+	return 0;
 }
 
 static void __exit xe_exit(void)
 {
 	int i;
 
-	xe_unregister_pci_driver();
-
 	for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
 		init_funcs[i].exit();
 }
-- 
GitLab


From ed1df9897434a1da3f86c868825450fef47def23 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Sun, 16 Apr 2023 16:17:43 -0700
Subject: [PATCH 1541/2340] drm/xe: Allow compute VMs to output dma-fences on
 binds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Binds are not long running jobs thus we can export dma-fences even if a
VM is in compute mode.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d9579bf5002db..62496a4008d2b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3166,7 +3166,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
 					  &syncs_user[num_syncs], false,
-					  xe_vm_no_dma_fences(vm));
+					  xe_vm_in_fault_mode(vm));
 		if (err)
 			goto free_syncs;
 	}
-- 
GitLab


From 7cabe5580cb9dc16dcda0a163dc718e069c4c199 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Sun, 16 Apr 2023 16:14:26 -0700
Subject: [PATCH 1542/2340] drm/xe: Allow dma-fences as in-syncs for compute /
 faulting VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is allowed and encouraged by the dma-fencing rules. This along with
allowing compute VMs to export dma-fences on binds will result in a
simpler compute UMD.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_sync.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 99f1ed87196d6..1e4e4acb2c4a2 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -105,6 +105,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 {
 	struct drm_xe_sync sync_in;
 	int err;
+	bool signal;
 
 	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
 		return -EFAULT;
@@ -113,9 +114,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)))
 		return -EINVAL;
 
+	signal = sync_in.flags & DRM_XE_SYNC_SIGNAL;
 	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
 	case DRM_XE_SYNC_SYNCOBJ:
-		if (XE_IOCTL_ERR(xe, no_dma_fences))
+		if (XE_IOCTL_ERR(xe, no_dma_fences && signal))
 			return -ENOTSUPP;
 
 		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
@@ -125,7 +127,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		if (XE_IOCTL_ERR(xe, !sync->syncobj))
 			return -ENOENT;
 
-		if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) {
+		if (!signal) {
 			sync->fence = drm_syncobj_fence_get(sync->syncobj);
 			if (XE_IOCTL_ERR(xe, !sync->fence))
 				return -EINVAL;
@@ -133,7 +135,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		break;
 
 	case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
-		if (XE_IOCTL_ERR(xe, no_dma_fences))
+		if (XE_IOCTL_ERR(xe, no_dma_fences && signal))
 			return -ENOTSUPP;
 
 		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
@@ -146,7 +148,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		if (XE_IOCTL_ERR(xe, !sync->syncobj))
 			return -ENOENT;
 
-		if (sync_in.flags & DRM_XE_SYNC_SIGNAL) {
+		if (signal) {
 			sync->chain_fence = dma_fence_chain_alloc();
 			if (!sync->chain_fence)
 				return -ENOMEM;
@@ -168,7 +170,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		break;
 
 	case DRM_XE_SYNC_USER_FENCE:
-		if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL)))
+		if (XE_IOCTL_ERR(xe, !signal))
 			return -ENOTSUPP;
 
 		if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7))
-- 
GitLab


From 75a6aadb9ae71a046534fb781b7c832c6586131b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 16 Jan 2023 20:52:57 +0100
Subject: [PATCH 1543/2340] drm/xe: Introduce GT oriented log messages

While debugging GT related problems, it's good to know which GT was
reporting problems. Introduce helper macros to allow prefix GT logs
with GT identifier. We will use them in upcoming patches.

v2: use xe_ prefix (Lucas)
v3: use correct include

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_printk.h | 46 +++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_printk.h

diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
new file mode 100644
index 0000000000000..0b801429cf1a1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_PRINTK_H_
+#define _XE_GT_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+
+#define xe_gt_printk(_gt, _level, _fmt, ...) \
+	drm_##_level(&(_gt)->xe->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_err(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_warn(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_notice(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_info(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), info, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_dbg(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_WARN(_gt, _condition, _fmt, ...) \
+	drm_WARN(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
+	drm_WARN_ONCE(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_WARN_ON(_gt, _condition) \
+	xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
+
+#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
+	xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+
+#endif
-- 
GitLab


From 3e535bd504057bab1970b2dd1b594908ca3de74d Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Sun, 12 Mar 2023 15:48:21 +0100
Subject: [PATCH 1544/2340] drm/xe: Use GT oriented log messages in xe_gt.c

Replace generic log messages with ones dedicated for the GT.
While around replace errno logs from plain %d to pretty %pe.

v2: rebased
v3: unify errno logs

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 49 +++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index cbe063a40aca8..80d42c7c7cfa9 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -20,6 +20,7 @@
 #include "xe_gt_clock.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_sysfs.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
@@ -239,16 +240,16 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 				     hwe, ENGINE_FLAG_WA);
 		if (IS_ERR(e)) {
 			err = PTR_ERR(e);
-			drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,e failed=%d",
-				gt->info.id, hwe->name, err);
+			xe_gt_err(gt, "hwe %s: xe_engine_create failed (%pe)\n",
+				  hwe->name, e);
 			goto put_vm;
 		}
 
 		/* Prime golden LRC with known good state */
 		err = emit_wa_job(gt, e);
 		if (err) {
-			drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_wa_job,e failed=%d",
-				gt->info.id, hwe->name, e->guc->id, err);
+			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), e->guc->id);
 			goto put_engine;
 		}
 
@@ -256,24 +257,24 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 					 1, hwe, ENGINE_FLAG_WA);
 		if (IS_ERR(nop_e)) {
 			err = PTR_ERR(nop_e);
-			drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,nop_e failed=%d",
-				gt->info.id, hwe->name, err);
+			xe_gt_err(gt, "hwe %s: nop xe_engine_create failed (%pe)\n",
+				  hwe->name, nop_e);
 			goto put_engine;
 		}
 
 		/* Switch to different LRC */
 		err = emit_nop_job(gt, nop_e);
 		if (err) {
-			drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,nop_e failed=%d",
-				gt->info.id, hwe->name, nop_e->guc->id, err);
+			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), nop_e->guc->id);
 			goto put_nop_e;
 		}
 
 		/* Reload golden LRC to record the effect of any indirect W/A */
 		err = emit_nop_job(gt, e);
 		if (err) {
-			drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,e failed=%d",
-				gt->info.id, hwe->name, e->guc->id, err);
+			xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), e->guc->id);
 			goto put_nop_e;
 		}
 
@@ -541,15 +542,14 @@ int xe_gt_init(struct xe_gt *gt)
 
 static int do_gt_reset(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
 	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
 	err = xe_mmio_wait32(gt, GDRST, 0, GRDOM_FULL, 5000,
 			     NULL, false);
 	if (err)
-		drm_err(&xe->drm,
-			"GT reset failed to clear GEN11_GRDOM_FULL\n");
+		xe_gt_err(gt, "failed to clear GEN11_GRDOM_FULL (%pe)\n",
+			  ERR_PTR(err));
 
 	return err;
 }
@@ -592,14 +592,13 @@ static int do_gt_restart(struct xe_gt *gt)
 
 static int gt_reset(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
 	/* We only support GT resets with GuC submission */
 	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
 		return -ENODEV;
 
-	drm_info(&xe->drm, "GT reset started\n");
+	xe_gt_info(gt, "reset started\n");
 
 	xe_gt_sanitize(gt);
 
@@ -628,7 +627,7 @@ static int gt_reset(struct xe_gt *gt)
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
 
-	drm_info(&xe->drm, "GT reset done\n");
+	xe_gt_info(gt, "reset done\n");
 
 	return 0;
 
@@ -637,7 +636,7 @@ err_out:
 err_msg:
 	XE_WARN_ON(xe_uc_start(&gt->uc));
 	xe_device_mem_access_put(gt_to_xe(gt));
-	drm_err(&xe->drm, "GT reset failed, err=%d\n", err);
+	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
 	return err;
 }
@@ -651,15 +650,13 @@ static void gt_reset_worker(struct work_struct *w)
 
 void xe_gt_reset_async(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
-
-	drm_info(&xe->drm, "Try GT reset\n");
+	xe_gt_info(gt, "trying reset\n");
 
 	/* Don't do a reset while one is already in flight */
 	if (xe_uc_reset_prepare(&gt->uc))
 		return;
 
-	drm_info(&xe->drm, "Doing GT reset\n");
+	xe_gt_info(gt, "reset queued\n");
 	queue_work(gt->ordered_wq, &gt->reset.worker);
 }
 
@@ -676,7 +673,6 @@ void xe_gt_suspend_prepare(struct xe_gt *gt)
 
 int xe_gt_suspend(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
 	/* For now suspend/resume is only allowed with GuC */
@@ -696,7 +692,7 @@ int xe_gt_suspend(struct xe_gt *gt)
 
 	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	drm_info(&xe->drm, "GT suspended\n");
+	xe_gt_info(gt, "suspended\n");
 
 	return 0;
 
@@ -704,14 +700,13 @@ err_force_wake:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 err_msg:
 	xe_device_mem_access_put(gt_to_xe(gt));
-	drm_err(&xe->drm, "GT suspend failed: %d\n", err);
+	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
 
 	return err;
 }
 
 int xe_gt_resume(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
@@ -725,7 +720,7 @@ int xe_gt_resume(struct xe_gt *gt)
 
 	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	drm_info(&xe->drm, "GT resumed\n");
+	xe_gt_info(gt, "resumed\n");
 
 	return 0;
 
@@ -733,7 +728,7 @@ err_force_wake:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 err_msg:
 	xe_device_mem_access_put(gt_to_xe(gt));
-	drm_err(&xe->drm, "GT resume failed: %d\n", err);
+	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
 
 	return err;
 }
-- 
GitLab


From e799485044cb3c0019a226ff3a92a532ca2a4e7e Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 18 May 2023 17:12:39 -0400
Subject: [PATCH 1545/2340] drm/xe: Introduce the dev_coredump infrastructure.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The goal is to use devcoredump infrastructure to report error states
captured at the crash time.

The error state will contain useful information for GPU hang debug, such
as INSTDONE registers and the current buffers getting executed, as well
as any other information that helps user space and allow later replays of
the error.

The proposal here is to avoid a Xe only error_state like i915 and use
a standard dev_coredump infrastructure to expose the error state.

For our own case, the data is only useful if it is a snapshot of the
time when the GPU crash has happened, since we reset the GPU immediately
after and the registers might have changed. So the proposal here is to
have an internal snapshot to be printed out later.

Also, usually a subsequent GPU hang can be only a cause of the initial
one. So we only save the 'first' hang. The dev_coredump has a delayed
work queue where it remove the coredump and free all the data within a
few moments of the error. When that happens we also reset our capture
state and allow further snapshots.

Right now this infra only print out the time of the hang. More information
will be migrated here on subsequent work. Also, in order to organize the
dump better, the goal is to propose dev_coredump changes itself to allow
multiple files and different controls. But for now we start Xe usage of
it without any dependency on dev_coredump core changes.

v2: Add dma_fence annotation for capture that might happen during long
    running. (Thomas and Matt)
    Use xe->drm.primary->index on drm_info msg. (Jani)
v3: checkpatch fixes
v4: Fix building and locking issues found by Francois.
    Actually let's kill all of the locking in here. gt_reset serialization
    already guarantee that there will be only one capture at the same time.
    Also, the devcoredump has its own locking to protect the free and reads
    and drivers don't need to duplicate it.
    Besides this, the dma_fence locking was pushed to a following patch
    since it is not needed in this one.
    Fix a use after free identified by KASAN: Do not stash the faulty_engine
    since that will be freed somewhere else.
v5: Fix Uptime - ktime_get_boottime actually returns the Uptime. (Francois)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/Kconfig                |   1 +
 drivers/gpu/drm/xe/Makefile               |   1 +
 drivers/gpu/drm/xe/xe_devcoredump.c       | 126 ++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_devcoredump.h       |  20 ++++
 drivers/gpu/drm/xe/xe_devcoredump_types.h |  45 ++++++++
 drivers/gpu/drm/xe/xe_device_types.h      |   4 +
 drivers/gpu/drm/xe/xe_guc_submit.c        |   2 +
 7 files changed, 199 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_devcoredump.c
 create mode 100644 drivers/gpu/drm/xe/xe_devcoredump.h
 create mode 100644 drivers/gpu/drm/xe/xe_devcoredump_types.h

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 62f54e6d62d93..0a4854a59c90f 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -23,6 +23,7 @@ config DRM_XE
 	select DRM_TTM_HELPER
 	select DRM_SCHED
 	select MMU_NOTIFIER
+	select WANT_DEV_COREDUMP
 	help
 	  Experimental driver for Intel Xe series GPUs
 
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 71c604ecff531..5d277d060eba9 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -37,6 +37,7 @@ xe-y += xe_bb.o \
 	xe_bo.o \
 	xe_bo_evict.o \
 	xe_debugfs.o \
+	xe_devcoredump.o \
 	xe_device.o \
 	xe_dma_buf.o \
 	xe_engine.o \
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
new file mode 100644
index 0000000000000..561db73a3c8c5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_devcoredump.h"
+#include "xe_devcoredump_types.h"
+
+#include <linux/devcoredump.h>
+#include <generated/utsrelease.h>
+
+#include "xe_engine.h"
+#include "xe_gt.h"
+
+/**
+ * DOC: Xe device coredump
+ *
+ * Devices overview:
+ * Xe uses dev_coredump infrastructure for exposing the crash errors in a
+ * standardized way.
+ * devcoredump exposes a temporary device under /sys/class/devcoredump/
+ * which is linked with our card device directly.
+ * The core dump can be accessed either from
+ * /sys/class/drm/card<n>/device/devcoredump/ or from
+ * /sys/class/devcoredump/devcd<m> where
+ * /sys/class/devcoredump/devcd<m>/failing_device is a link to
+ * /sys/class/drm/card<n>/device/.
+ *
+ * Snapshot at hang:
+ * The 'data' file is printed with a drm_printer pointer at devcoredump read
+ * time. For this reason, we need to take snapshots from when the hang has
+ * happened, and not only when the user is reading the file. Otherwise the
+ * information is outdated since the resets might have happened in between.
+ *
+ * 'First' failure snapshot:
+ * In general, the first hang is the most critical one since the following hangs
+ * can be a consequence of the initial hang. For this reason we only take the
+ * snapshot of the 'first' failure and ignore subsequent calls of this function,
+ * at least while the coredump device is alive. Dev_coredump has a delayed work
+ * queue that will eventually delete the device and free all the dump
+ * information.
+ */
+
+#ifdef CONFIG_DEV_COREDUMP
+
+static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump)
+{
+	return container_of(coredump, struct xe_device, devcoredump);
+}
+
+static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
+				   size_t count, void *data, size_t datalen)
+{
+	struct xe_devcoredump *coredump = data;
+	struct xe_devcoredump_snapshot *ss;
+	struct drm_printer p;
+	struct drm_print_iterator iter;
+	struct timespec64 ts;
+
+	iter.data = buffer;
+	iter.offset = 0;
+	iter.start = offset;
+	iter.remain = count;
+
+	ss = &coredump->snapshot;
+	p = drm_coredump_printer(&iter);
+
+	drm_printf(&p, "**** Xe Device Coredump ****\n");
+	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+
+	ts = ktime_to_timespec64(ss->snapshot_time);
+	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+	ts = ktime_to_timespec64(ss->boot_time);
+	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+
+	return count - iter.remain;
+}
+
+static void xe_devcoredump_free(void *data)
+{
+	struct xe_devcoredump *coredump = data;
+
+	coredump->captured = false;
+	drm_info(&coredump_to_xe(coredump)->drm,
+		 "Xe device coredump has been deleted.\n");
+}
+
+static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+				 struct xe_engine *e)
+{
+	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+
+	ss->snapshot_time = ktime_get_real();
+	ss->boot_time = ktime_get_boottime();
+}
+
+/**
+ * xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @e: The faulty xe_engine, where the issue was detected.
+ *
+ * This function should be called at the crash time within the serialized
+ * gt_reset. It is skipped if we still have the core dump device available
+ * with the information of the 'first' snapshot.
+ */
+void xe_devcoredump(struct xe_engine *e)
+{
+	struct xe_device *xe = gt_to_xe(e->gt);
+	struct xe_devcoredump *coredump = &xe->devcoredump;
+
+	if (coredump->captured) {
+		drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+		return;
+	}
+
+	coredump->captured = true;
+	devcoredump_snapshot(coredump, e);
+
+	drm_info(&xe->drm, "Xe device coredump has been created\n");
+	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+		 xe->drm.primary->index);
+
+	dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
+		      xe_devcoredump_read, xe_devcoredump_free);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
new file mode 100644
index 0000000000000..8548821292275
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVCOREDUMP_H_
+#define _XE_DEVCOREDUMP_H_
+
+struct xe_device;
+struct xe_engine;
+
+#ifdef CONFIG_DEV_COREDUMP
+void xe_devcoredump(struct xe_engine *e);
+#else
+static inline void xe_devcoredump(struct xe_engine *e)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
new file mode 100644
index 0000000000000..52bd27ca10369
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVCOREDUMP_TYPES_H_
+#define _XE_DEVCOREDUMP_TYPES_H_
+
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+
+struct xe_device;
+
+/**
+ * struct xe_devcoredump_snapshot - Crash snapshot
+ *
+ * This struct contains all the useful information quickly captured at the time
+ * of the crash. So, any subsequent reads of the coredump points to a data that
+ * shows the state of the GPU of when the issue has happened.
+ */
+struct xe_devcoredump_snapshot {
+	/** @snapshot_time:  Time of this capture. */
+	ktime_t snapshot_time;
+	/** @boot_time:  Relative boot time so the uptime can be calculated. */
+	ktime_t boot_time;
+};
+
+/**
+ * struct xe_devcoredump - Xe devcoredump main structure
+ *
+ * This struct represents the live and active dev_coredump node.
+ * It is created/populated at the time of a crash/error. Then it
+ * is read later when user access the device coredump data file
+ * for reading the information.
+ */
+struct xe_devcoredump {
+	/** @xe: Xe device. */
+	struct xe_device *xe;
+	/** @captured: The snapshot of the first hang has already been taken. */
+	bool captured;
+	/** @snapshot: Snapshot is captured at time of the first crash */
+	struct xe_devcoredump_snapshot snapshot;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f3cf5a4e5ab23..91edbe4a3730e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -12,6 +12,7 @@
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_device.h>
 
+#include "xe_devcoredump_types.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_step_types.h"
@@ -49,6 +50,9 @@ struct xe_device {
 	/** @drm: drm device */
 	struct drm_device drm;
 
+	/** @devcoredump: device coredump */
+	struct xe_devcoredump devcoredump;
+
 	/** @info: device info */
 	struct intel_device_info {
 		/** @graphics_name: graphics IP name */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index d0b48c885fda1..55b51ff791b85 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -14,6 +14,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_lrc_layout.h"
+#include "xe_devcoredump.h"
 #include "xe_device.h"
 #include "xe_engine.h"
 #include "xe_force_wake.h"
@@ -800,6 +801,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
 			   xe_sched_job_seqno(job), e->guc->id, e->flags);
 		simple_error_capture(e);
+		xe_devcoredump(e);
 	} else {
 		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
 			 xe_sched_job_seqno(job), e->guc->id, e->flags);
-- 
GitLab


From 656d29506ca89b4af1d2380ff4cab15f40ae9e19 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:07 -0400
Subject: [PATCH 1546/2340] drm/xe: Do not take any action if our device was
 removed.

Unfortunately devcoredump infrastructure does not provide and
interface for us to force the device removal upon the pci_remove
time of our device.

The devcoredump is linked at the device level, so when in use
it will prevent the module removal, but it doesn't prevent the
call of the pci_remove callback. This callback cannot fail
anyway and we end up clearing and freeing the entire pci device.

Hence, after we removed the pci device, we shouldn't allow any
read or free operations to avoid segmentation fault.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 561db73a3c8c5..00b9cc44c773c 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -57,6 +57,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	struct drm_print_iterator iter;
 	struct timespec64 ts;
 
+	/* Our device is gone already... */
+	if (!data || !coredump_to_xe(coredump))
+		return -ENODEV;
+
 	iter.data = buffer;
 	iter.offset = 0;
 	iter.start = offset;
@@ -81,6 +85,10 @@ static void xe_devcoredump_free(void *data)
 {
 	struct xe_devcoredump *coredump = data;
 
+	/* Our device is gone. Nothing to do... */
+	if (!data || !coredump_to_xe(coredump))
+		return;
+
 	coredump->captured = false;
 	drm_info(&coredump_to_xe(coredump)->drm,
 		 "Xe device coredump has been deleted.\n");
-- 
GitLab


From a7ca8157ec7b59b597ba47cb98eaa82cb0b1d4af Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:08 -0400
Subject: [PATCH 1547/2340] drm/xe: Extract non mapped regions out of GuC CTB
 into its own struct.

No functional change here. The goal is to have a clear split between
the mapped portions of the CTB and the static information, so we can
easily capture snapshots that will be used for later read out with
the devcoredump infrastructure.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 155 ++++++++++++++-------------
 drivers/gpu/drm/xe/xe_guc_ct_types.h |  20 ++--
 2 files changed, 95 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 9055ff133a7c7..e16e5fe37ed49 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -172,13 +172,14 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
 				struct iosys_map *map)
 {
-	h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
-	h2g->resv_space = 0;
-	h2g->tail = 0;
-	h2g->head = 0;
-	h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
-		h2g->resv_space;
-	h2g->broken = false;
+	h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+	h2g->info.resv_space = 0;
+	h2g->info.tail = 0;
+	h2g->info.head = 0;
+	h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+				     h2g->info.size) -
+			  h2g->info.resv_space;
+	h2g->info.broken = false;
 
 	h2g->desc = *map;
 	xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
@@ -189,13 +190,14 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
 static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
 				struct iosys_map *map)
 {
-	g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
-	g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
-	g2h->head = 0;
-	g2h->tail = 0;
-	g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) -
-		g2h->resv_space;
-	g2h->broken = false;
+	g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+	g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+	g2h->info.head = 0;
+	g2h->info.tail = 0;
+	g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+				     g2h->info.size) -
+			  g2h->info.resv_space;
+	g2h->info.broken = false;
 
 	g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
 	xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
@@ -212,7 +214,7 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
 
 	desc_addr = xe_bo_ggtt_addr(ct->bo);
 	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
-	size = ct->ctbs.h2g.size * sizeof(u32);
+	size = ct->ctbs.h2g.info.size * sizeof(u32);
 
 	err = xe_guc_self_cfg64(guc,
 				GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
@@ -240,7 +242,7 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
 	desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
 	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
 		CTB_H2G_BUFFER_SIZE;
-	size = ct->ctbs.g2h.size * sizeof(u32);
+	size = ct->ctbs.g2h.info.size * sizeof(u32);
 
 	err = xe_guc_self_cfg64(guc,
 				GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
@@ -329,11 +331,12 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
 
 	lockdep_assert_held(&ct->lock);
 
-	if (cmd_len > h2g->space) {
-		h2g->head = desc_read(ct_to_xe(ct), h2g, head);
-		h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
-			h2g->resv_space;
-		if (cmd_len > h2g->space)
+	if (cmd_len > h2g->info.space) {
+		h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+		h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+					     h2g->info.size) -
+				  h2g->info.resv_space;
+		if (cmd_len > h2g->info.space)
 			return false;
 	}
 
@@ -344,7 +347,7 @@ static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
 {
 	lockdep_assert_held(&ct->lock);
 
-	return ct->ctbs.g2h.space > g2h_len;
+	return ct->ctbs.g2h.info.space > g2h_len;
 }
 
 static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
@@ -360,16 +363,16 @@ static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
 static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
 {
 	lockdep_assert_held(&ct->lock);
-	ct->ctbs.h2g.space -= cmd_len;
+	ct->ctbs.h2g.info.space -= cmd_len;
 }
 
 static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 {
-	XE_BUG_ON(g2h_len > ct->ctbs.g2h.space);
+	XE_BUG_ON(g2h_len > ct->ctbs.g2h.info.space);
 
 	if (g2h_len) {
 		spin_lock_irq(&ct->fast_lock);
-		ct->ctbs.g2h.space -= g2h_len;
+		ct->ctbs.g2h.info.space -= g2h_len;
 		ct->g2h_outstanding += num_g2h;
 		spin_unlock_irq(&ct->fast_lock);
 	}
@@ -378,10 +381,10 @@ static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
 {
 	lockdep_assert_held(&ct->fast_lock);
-	XE_WARN_ON(ct->ctbs.g2h.space + g2h_len >
-		   ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space);
+	XE_WARN_ON(ct->ctbs.g2h.info.space + g2h_len >
+		   ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
 
-	ct->ctbs.g2h.space += g2h_len;
+	ct->ctbs.g2h.info.space += g2h_len;
 	--ct->g2h_outstanding;
 }
 
@@ -400,20 +403,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
 	u32 cmd_len = len + GUC_CTB_HDR_LEN;
 	u32 cmd_idx = 0, i;
-	u32 tail = h2g->tail;
+	u32 tail = h2g->info.tail;
 	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
 							 tail * sizeof(u32));
 
 	lockdep_assert_held(&ct->lock);
 	XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
-	XE_BUG_ON(tail > h2g->size);
+	XE_BUG_ON(tail > h2g->info.size);
 
 	/* Command will wrap, zero fill (NOPs), return and check credits again */
-	if (tail + cmd_len > h2g->size) {
-		xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32));
-		h2g_reserve_space(ct, (h2g->size - tail));
-		h2g->tail = 0;
-		desc_write(xe, h2g, tail, h2g->tail);
+	if (tail + cmd_len > h2g->info.size) {
+		xe_map_memset(xe, &map, 0, 0,
+			      (h2g->info.size - tail) * sizeof(u32));
+		h2g_reserve_space(ct, (h2g->info.size - tail));
+		h2g->info.tail = 0;
+		desc_write(xe, h2g, tail, h2g->info.tail);
 
 		return -EAGAIN;
 	}
@@ -445,11 +449,11 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	xe_device_wmb(ct_to_xe(ct));
 
 	/* Update local copies */
-	h2g->tail = (tail + cmd_len) % h2g->size;
+	h2g->info.tail = (tail + cmd_len) % h2g->info.size;
 	h2g_reserve_space(ct, cmd_len);
 
 	/* Update descriptor */
-	desc_write(xe, h2g, tail, h2g->tail);
+	desc_write(xe, h2g, tail, h2g->info.tail);
 
 	return 0;
 }
@@ -466,7 +470,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 	XE_BUG_ON(!g2h_len && num_g2h);
 	lockdep_assert_held(&ct->lock);
 
-	if (unlikely(ct->ctbs.h2g.broken)) {
+	if (unlikely(ct->ctbs.h2g.info.broken)) {
 		ret = -EPIPE;
 		goto out;
 	}
@@ -554,8 +558,9 @@ try_again:
 		if (sleep_period_ms == 1024)
 			goto broken;
 
-		trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail,
-						 h2g->size, h2g->space,
+		trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail,
+						 h2g->info.size,
+						 h2g->info.space,
 						 len + GUC_CTB_HDR_LEN);
 		msleep(sleep_period_ms);
 		sleep_period_ms <<= 1;
@@ -565,15 +570,16 @@ try_again:
 		struct xe_device *xe = ct_to_xe(ct);
 		struct guc_ctb *g2h = &ct->ctbs.g2h;
 
-		trace_xe_guc_ct_g2h_flow_control(g2h->head,
+		trace_xe_guc_ct_g2h_flow_control(g2h->info.head,
 						 desc_read(xe, g2h, tail),
-						 g2h->size, g2h->space,
+						 g2h->info.size,
+						 g2h->info.space,
 						 g2h_fence ?
 						 GUC_CTB_HXG_MSG_MAX_LEN :
 						 g2h_len);
 
 #define g2h_avail(ct)	\
-	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head)
+	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
 		if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
 					g2h_avail(ct), HZ))
 			goto broken;
@@ -590,7 +596,7 @@ try_again:
 broken:
 	drm_err(drm, "No forward process on H2G, reset required");
 	xe_guc_ct_print(ct, &p);
-	ct->ctbs.h2g.broken = true;
+	ct->ctbs.h2g.info.broken = true;
 
 	return -EDEADLK;
 }
@@ -656,7 +662,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
 		return false;
 
 #define ct_alive(ct)	\
-	(ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken)
+	(ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken)
 	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
 		return false;
 #undef ct_alive
@@ -821,7 +827,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		drm_err(&xe->drm,
 			"G2H channel broken on read, origin=%d, reset required\n",
 			origin);
-		ct->ctbs.g2h.broken = true;
+		ct->ctbs.g2h.info.broken = true;
 
 		return -EPROTO;
 	}
@@ -840,7 +846,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		drm_err(&xe->drm,
 			"G2H channel broken on read, type=%d, reset required\n",
 			type);
-		ct->ctbs.g2h.broken = true;
+		ct->ctbs.g2h.info.broken = true;
 
 		ret = -EOPNOTSUPP;
 	}
@@ -919,36 +925,37 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 	if (!ct->enabled)
 		return -ENODEV;
 
-	if (g2h->broken)
+	if (g2h->info.broken)
 		return -EPIPE;
 
 	/* Calculate DW available to read */
 	tail = desc_read(xe, g2h, tail);
-	avail = tail - g2h->head;
+	avail = tail - g2h->info.head;
 	if (unlikely(avail == 0))
 		return 0;
 
 	if (avail < 0)
-		avail += g2h->size;
+		avail += g2h->info.size;
 
 	/* Read header */
-	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32));
+	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+			   sizeof(u32));
 	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
 	if (len > avail) {
 		drm_err(&xe->drm,
 			"G2H channel broken on read, avail=%d, len=%d, reset required\n",
 			avail, len);
-		g2h->broken = true;
+		g2h->info.broken = true;
 
 		return -EPROTO;
 	}
 
-	head = (g2h->head + 1) % g2h->size;
+	head = (g2h->info.head + 1) % g2h->info.size;
 	avail = len - 1;
 
 	/* Read G2H message */
-	if (avail + head > g2h->size) {
-		u32 avail_til_wrap = g2h->size - head;
+	if (avail + head > g2h->info.size) {
+		u32 avail_til_wrap = g2h->info.size - head;
 
 		xe_map_memcpy_from(xe, msg + 1,
 				   &g2h->cmds, sizeof(u32) * head,
@@ -983,8 +990,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 	}
 
 	/* Update local / descriptor header */
-	g2h->head = (head + avail) % g2h->size;
-	desc_write(xe, g2h, head, g2h->head);
+	g2h->info.head = (head + avail) % g2h->info.size;
+	desc_write(xe, g2h, head, g2h->info.head);
 
 	return len;
 }
@@ -1093,12 +1100,12 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
 {
 	u32 head, tail;
 
-	drm_printf(p, "\tsize: %d\n", ctb->size);
-	drm_printf(p, "\tresv_space: %d\n", ctb->resv_space);
-	drm_printf(p, "\thead: %d\n", ctb->head);
-	drm_printf(p, "\ttail: %d\n", ctb->tail);
-	drm_printf(p, "\tspace: %d\n", ctb->space);
-	drm_printf(p, "\tbroken: %d\n", ctb->broken);
+	drm_printf(p, "\tsize: %d\n", ctb->info.size);
+	drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
+	drm_printf(p, "\thead: %d\n", ctb->info.head);
+	drm_printf(p, "\ttail: %d\n", ctb->info.tail);
+	drm_printf(p, "\tspace: %d\n", ctb->info.space);
+	drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
 
 	head = desc_read(xe, ctb, head);
 	tail = desc_read(xe, ctb, tail);
@@ -1114,7 +1121,7 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
 			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
 				   xe_map_rd(xe, &map, 0, u32));
 			++head;
-			if (head == ctb->size) {
+			if (head == ctb->info.size) {
 				head = 0;
 				map = ctb->cmds;
 			} else {
@@ -1168,12 +1175,12 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
 	if (!ret) {
 		xe_guc_ct_irq_handler(ct);
 		msleep(200);
-		if (g2h->space !=
-		    CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+		if (g2h->info.space !=
+		    CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) {
 			drm_printf(p, "Mismatch on space %d, %d\n",
-				   g2h->space,
-				   CIRC_SPACE(0, 0, g2h->size) -
-				   g2h->resv_space);
+				   g2h->info.space,
+				   CIRC_SPACE(0, 0, g2h->info.size) -
+				   g2h->info.resv_space);
 			ret = -EIO;
 		}
 		if (ct->g2h_outstanding) {
@@ -1185,12 +1192,12 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
 
 	/* Check failure path for blocking CTs too */
 	xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action));
-	if (g2h->space !=
-	    CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+	if (g2h->info.space !=
+	    CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) {
 		drm_printf(p, "Mismatch on space %d, %d\n",
-			   g2h->space,
-			   CIRC_SPACE(0, 0, g2h->size) -
-			   g2h->resv_space);
+			   g2h->info.space,
+			   CIRC_SPACE(0, 0, g2h->info.size) -
+			   g2h->info.resv_space);
 		ret = -EIO;
 	}
 	if (ct->g2h_outstanding) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index fd27dacf00c59..64e3dd14d4b2b 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -19,13 +19,9 @@
 struct xe_bo;
 
 /**
- * struct guc_ctb - GuC command transport buffer (CTB)
+ * struct guc_ctb_info - GuC command transport buffer (CTB) info
  */
-struct guc_ctb {
-	/** @desc: dma buffer map for CTB descriptor */
-	struct iosys_map desc;
-	/** @cmds: dma buffer map for CTB commands */
-	struct iosys_map cmds;
+struct guc_ctb_info {
 	/** @size: size of CTB commands (DW) */
 	u32 size;
 	/** @resv_space: reserved space of CTB commands (DW) */
@@ -40,6 +36,18 @@ struct guc_ctb {
 	bool broken;
 };
 
+/**
+ * struct guc_ctb - GuC command transport buffer (CTB)
+ */
+struct guc_ctb {
+	/** @desc: dma buffer map for CTB descriptor */
+	struct iosys_map desc;
+	/** @cmds: dma buffer map for CTB commands */
+	struct iosys_map cmds;
+	/** @info: CTB info */
+	struct guc_ctb_info info;
+};
+
 /**
  * struct xe_guc_ct - GuC command transport (CT) layer
  *
-- 
GitLab


From 513260dfd150a49ad117f1b7c50097a1d74c0085 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:09 -0400
Subject: [PATCH 1548/2340] drm/xe: Convert GuC CT print to snapshot capture
 and print.

The goal is to allow for a snapshot capture to be taken at the time
of the crash, while the print out can happen at a later time through
the exposed devcoredump virtual device.

v2: Handle memory allocation failures. (Matthew)
    Do not use GFP_ATOMIC on cases like debugfs prints. (Matthew)
v3: checkpatch fixes
v4: Do not use atomic in the g2h_worker_func (Matthew)

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c          |   2 +-
 drivers/gpu/drm/xe/xe_guc_ct.c       | 166 +++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_guc_ct.h       |   8 +-
 drivers/gpu/drm/xe/xe_guc_ct_types.h |  26 +++++
 drivers/gpu/drm/xe/xe_guc_submit.c   |   2 +-
 5 files changed, 179 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index eb4af4c711246..b72407e24d097 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -857,6 +857,6 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 
-	xe_guc_ct_print(&guc->ct, p);
+	xe_guc_ct_print(&guc->ct, p, false);
 	xe_guc_submit_print(guc, p);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index e16e5fe37ed49..e8c2edb1359d9 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -595,7 +595,7 @@ try_again:
 
 broken:
 	drm_err(drm, "No forward process on H2G, reset required");
-	xe_guc_ct_print(ct, &p);
+	xe_guc_ct_print(ct, &p, true);
 	ct->ctbs.h2g.info.broken = true;
 
 	return -EDEADLK;
@@ -1088,38 +1088,40 @@ static void g2h_worker_func(struct work_struct *w)
 			struct drm_device *drm = &ct_to_xe(ct)->drm;
 			struct drm_printer p = drm_info_printer(drm->dev);
 
-			xe_guc_ct_print(ct, &p);
+			xe_guc_ct_print(ct, &p, false);
 			kick_reset(ct);
 		}
 	} while (ret == 1);
 	xe_device_mem_access_put(ct_to_xe(ct));
 }
 
-static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
-			     struct drm_printer *p)
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+				     struct guc_ctb_snapshot *snapshot,
+				     bool atomic)
 {
 	u32 head, tail;
 
-	drm_printf(p, "\tsize: %d\n", ctb->info.size);
-	drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
-	drm_printf(p, "\thead: %d\n", ctb->info.head);
-	drm_printf(p, "\ttail: %d\n", ctb->info.tail);
-	drm_printf(p, "\tspace: %d\n", ctb->info.space);
-	drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
+	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+			   sizeof(struct guc_ct_buffer_desc));
+	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
 
-	head = desc_read(xe, ctb, head);
-	tail = desc_read(xe, ctb, tail);
-	drm_printf(p, "\thead (memory): %d\n", head);
-	drm_printf(p, "\ttail (memory): %d\n", tail);
-	drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
+	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
+				       atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!snapshot->cmds) {
+		drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
+		return;
+	}
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
 
 	if (head != tail) {
 		struct iosys_map map =
 			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
 
 		while (head != tail) {
-			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
-				   xe_map_rd(xe, &map, 0, u32));
+			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
 			++head;
 			if (head == ctb->info.size) {
 				head = 0;
@@ -1131,20 +1133,140 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
 	}
 }
 
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+				   struct drm_printer *p)
+{
+	u32 head, tail;
+
+	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+	drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
+	drm_printf(p, "\thead: %d\n", snapshot->info.head);
+	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+
+	if (!snapshot->cmds)
+		return;
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
+
+	while (head != tail) {
+		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+			   snapshot->cmds[head]);
+		++head;
+		if (head == snapshot->info.size)
+			head = 0;
+	}
+}
+
+static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
 {
+	kfree(snapshot->cmds);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+						      bool atomic)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = kzalloc(sizeof(*snapshot),
+			   atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
+		return NULL;
+	}
+
 	if (ct->enabled) {
+		snapshot->ct_enabled = true;
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
+					 &snapshot->h2g, atomic);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
+					 &snapshot->g2h, atomic);
+	}
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p)
+{
+	if (!snapshot)
+		return;
+
+	if (snapshot->ct_enabled) {
 		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
-		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
+		guc_ctb_snapshot_print(&snapshot->h2g, p);
 
 		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
-		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
-		drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
+		guc_ctb_snapshot_print(&snapshot->g2h, p);
+
+		drm_printf(p, "\tg2h outstanding: %d\n",
+			   snapshot->g2h_outstanding);
 	} else {
 		drm_puts(p, "\nCT disabled\n");
 	}
 }
 
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	guc_ctb_snapshot_free(&snapshot->h2g);
+	guc_ctb_snapshot_free(&snapshot->g2h);
+	kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+{
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+	xe_guc_ct_snapshot_print(snapshot, p);
+	xe_guc_ct_snapshot_free(snapshot);
+}
+
 #ifdef XE_GUC_CT_SELFTEST
 /*
  * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
@@ -1166,7 +1288,7 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
 		ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
 		if (ret) {
 			drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
-			xe_guc_ct_print(ct, p);
+			xe_guc_ct_print(ct, p, true);
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 49fb74f91e4df..3e04ee64652c1 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -13,9 +13,15 @@ struct drm_printer;
 int xe_guc_ct_init(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
 void xe_guc_ct_disable(struct xe_guc_ct *ct);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
 
+struct xe_guc_ct_snapshot *
+xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p);
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+
 static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 {
 	wake_up_all(&ct->wq);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 64e3dd14d4b2b..93046d95b009e 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -48,6 +48,32 @@ struct guc_ctb {
 	struct guc_ctb_info info;
 };
 
+/**
+ * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
+ */
+struct guc_ctb_snapshot {
+	/** @desc: snapshot of the CTB descriptor */
+	struct guc_ct_buffer_desc desc;
+	/** @cmds: snapshot of the CTB commands */
+	u32 *cmds;
+	/** @info: snapshot of the CTB info */
+	struct guc_ctb_info info;
+};
+
+/**
+ * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
+ */
+struct xe_guc_ct_snapshot {
+	/** @ct_enabled: CT enabled info at capture time. */
+	bool ct_enabled;
+	/** @g2h_outstanding: G2H outstanding info at the capture time */
+	u32 g2h_outstanding;
+	/** @g2h: G2H CTB snapshot */
+	struct guc_ctb_snapshot g2h;
+	/** @h2g: H2G CTB snapshot */
+	struct guc_ctb_snapshot h2g;
+};
+
 /**
  * struct xe_guc_ct - GuC command transport (CT) layer
  *
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 55b51ff791b85..f587aa48c5bd1 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -764,7 +764,7 @@ static void simple_error_capture(struct xe_engine *e)
 		}
 
 		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
-		xe_guc_ct_print(&guc->ct, &p);
+		xe_guc_ct_print(&guc->ct, &p, true);
 		guc_engine_print(e, &p);
 		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
 			if (hwe->class != e->hwe->class ||
-- 
GitLab


From 5ed53446325475514b78f9072a2f85ca24fc9548 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:10 -0400
Subject: [PATCH 1549/2340] drm/xe: Add GuC CT snapshot to xe_devcoredump.

Let's start to move our existent logs to devcoredump one by
one. Any format change should come on follow-up work.

v2: Rebase and add the dma_fence locking annotation here.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c       | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_devcoredump_types.h |  4 ++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 00b9cc44c773c..b0e3db148ce26 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -11,6 +11,7 @@
 
 #include "xe_engine.h"
 #include "xe_gt.h"
+#include "xe_guc_ct.h"
 
 /**
  * DOC: Xe device coredump
@@ -48,6 +49,11 @@ static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump)
 	return container_of(coredump, struct xe_device, devcoredump);
 }
 
+static struct xe_guc *engine_to_guc(struct xe_engine *e)
+{
+	return &e->gt->uc.guc;
+}
+
 static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 				   size_t count, void *data, size_t datalen)
 {
@@ -78,6 +84,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	ts = ktime_to_timespec64(ss->boot_time);
 	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
 
+	drm_printf(&p, "\n**** GuC CT ****\n");
+	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
+
 	return count - iter.remain;
 }
 
@@ -89,6 +98,8 @@ static void xe_devcoredump_free(void *data)
 	if (!data || !coredump_to_xe(coredump))
 		return;
 
+	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
+
 	coredump->captured = false;
 	drm_info(&coredump_to_xe(coredump)->drm,
 		 "Xe device coredump has been deleted.\n");
@@ -98,9 +109,15 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 				 struct xe_engine *e)
 {
 	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+	struct xe_guc *guc = engine_to_guc(e);
+	bool cookie;
 
 	ss->snapshot_time = ktime_get_real();
 	ss->boot_time = ktime_get_boottime();
+
+	cookie = dma_fence_begin_signalling();
+	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
+	dma_fence_end_signalling(cookie);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 52bd27ca10369..4e3371c7b9c5c 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -23,6 +23,10 @@ struct xe_devcoredump_snapshot {
 	ktime_t snapshot_time;
 	/** @boot_time:  Relative boot time so the uptime can be calculated. */
 	ktime_t boot_time;
+
+	/* GuC snapshots */
+	/** @ct_snapshot: GuC CT snapshot */
+	struct xe_guc_ct_snapshot *ct;
 };
 
 /**
-- 
GitLab


From 1825c492daafc39e2eaeacc0f05372aca4ab6f7f Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:11 -0400
Subject: [PATCH 1550/2340] drm/xe: Introduce guc_submit_types.h with relevant
 structs.

These structs and definitions are only used for the guc_submit
and they were added specifically for the parallel submission.

While doing that also delete the unused struct guc_wq_item.

v2: checkpatch fixes.

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_fwif.h         | 29 -----------
 drivers/gpu/drm/xe/xe_guc_submit.c       | 42 +++++-----------
 drivers/gpu/drm/xe/xe_guc_submit_types.h | 64 ++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 59 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_guc_submit_types.h

diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 20155ba4ef073..27d132ce2087e 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -46,35 +46,6 @@
 #define GUC_MAX_ENGINE_CLASSES		16
 #define GUC_MAX_INSTANCES_PER_CLASS	32
 
-/* Work item for submitting workloads into work queue of GuC. */
-#define WQ_STATUS_ACTIVE		1
-#define WQ_STATUS_SUSPENDED		2
-#define WQ_STATUS_CMD_ERROR		3
-#define WQ_STATUS_ENGINE_ID_NOT_USED	4
-#define WQ_STATUS_SUSPENDED_FROM_RESET	5
-#define WQ_TYPE_NOOP			0x4
-#define WQ_TYPE_MULTI_LRC		0x5
-#define WQ_TYPE_MASK			GENMASK(7, 0)
-#define WQ_LEN_MASK			GENMASK(26, 16)
-
-#define WQ_GUC_ID_MASK			GENMASK(15, 0)
-#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
-
-struct guc_wq_item {
-	u32 header;
-	u32 context_desc;
-	u32 submit_element_info;
-	u32 fence_id;
-} __packed;
-
-struct guc_sched_wq_desc {
-	u32 head;
-	u32 tail;
-	u32 error_offset;
-	u32 wq_status;
-	u32 reserved[28];
-} __packed;
-
 /* Helper for context registration H2G */
 struct guc_ctxt_registration_info {
 	u32 flags;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index f587aa48c5bd1..914e9ffef43f7 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -23,6 +23,7 @@
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_engine_types.h"
+#include "xe_guc_submit_types.h"
 #include "xe_hw_engine.h"
 #include "xe_hw_fence.h"
 #include "xe_lrc.h"
@@ -379,32 +380,12 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
 		       __guc_engine_policy_action_size(&policy), 0, 0);
 }
 
-#define PARALLEL_SCRATCH_SIZE	2048
-#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
-#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
-#define CACHELINE_BYTES		64
-
-struct sync_semaphore {
-	u32 semaphore;
-	u8 unused[CACHELINE_BYTES - sizeof(u32)];
-};
-
-struct parallel_scratch {
-	struct guc_sched_wq_desc wq_desc;
-
-	struct sync_semaphore go;
-	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
-
-	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
-		sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
-
-	u32 wq[WQ_SIZE / sizeof(u32)];
-};
-
 #define parallel_read(xe_, map_, field_) \
-	xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
+	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_)
 #define parallel_write(xe_, map_, field_, val_) \
-	xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
+	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_, val_)
 
 static void __register_mlrc_engine(struct xe_guc *guc,
 				   struct xe_engine *e,
@@ -487,13 +468,13 @@ static void register_engine(struct xe_engine *e)
 		struct iosys_map map = xe_lrc_parallel_map(lrc);
 
 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq_desc));
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
 		info.wq_desc_hi = upper_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq_desc));
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
 		info.wq_base_lo = lower_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq[0]));
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 		info.wq_base_hi = upper_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq[0]));
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 		info.wq_size = WQ_SIZE;
 
 		e->guc->wqi_head = 0;
@@ -595,8 +576,8 @@ static void wq_item_append(struct xe_engine *e)
 
 	XE_BUG_ON(i != wqi_size / sizeof(u32));
 
-	iosys_map_incr(&map, offsetof(struct parallel_scratch,
-					wq[e->guc->wqi_tail / sizeof(u32)]));
+	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
+				      wq[e->guc->wqi_tail / sizeof(u32)]));
 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
 	e->guc->wqi_tail += wqi_size;
 	XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
@@ -1672,6 +1653,7 @@ static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
 		guc_engine_wq_print(e, p);
 
 	spin_lock(&sched->base.job_list_lock);
+
 	list_for_each_entry(job, &sched->base.pending_list, drm.list)
 		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
 			   xe_sched_job_seqno(job),
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
new file mode 100644
index 0000000000000..d23759959be9b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_TYPES_H_
+#define _XE_GUC_SUBMIT_TYPES_H_
+
+#include "xe_hw_engine_types.h"
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_NOOP			0x4
+#define WQ_TYPE_MULTI_LRC		0x5
+#define WQ_TYPE_MASK			GENMASK(7, 0)
+#define WQ_LEN_MASK			GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK			GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
+
+#define PARALLEL_SCRATCH_SIZE	2048
+#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES		64
+
+struct guc_sched_wq_desc {
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u32 wq_status;
+	u32 reserved[28];
+} __packed;
+
+struct sync_semaphore {
+	u32 semaphore;
+	u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+/**
+ * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
+ */
+struct guc_submit_parallel_scratch {
+	/** @wq_desc: Guc scheduler workqueue descriptor */
+	struct guc_sched_wq_desc wq_desc;
+
+	/** @go: Go Semaphore */
+	struct sync_semaphore go;
+	/** @join: Joined semaphore for the relevant hw engine instances */
+	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+	/** @unused: Unused/Reserved memory space */
+	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+		  sizeof(struct sync_semaphore) *
+		  (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+	/** @wq: Workqueue info */
+	u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+#endif
-- 
GitLab


From bbdf97c140064975552bedb70b2b4329ab758f0b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:12 -0400
Subject: [PATCH 1551/2340] drm/xe: Convert GuC Engine print to snapshot
 capture and print.

The goal is to allow for a snapshot capture to be taken at the time
of the crash, while the print out can happen at a later time through
the exposed devcoredump virtual device.

v2: Handle memory allocation failures. (Matthew)
Do not use GFP_ATOMIC on cases like debugfs prints. (Matthew)
v3: checkpatch
v4: pending_list allocation needs to be atomic because of the
    spin_lock. (Matthew)
    get back to GFP_ATOMIC only. (lockdep).

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c       | 237 +++++++++++++++++++----
 drivers/gpu/drm/xe/xe_guc_submit.h       |  10 +-
 drivers/gpu/drm/xe/xe_guc_submit_types.h |  91 +++++++++
 3 files changed, 298 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 914e9ffef43f7..3ff133e8463c8 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1594,75 +1594,234 @@ int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	return 0;
 }
 
-static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
+static void
+guc_engine_wq_snapshot_capture(struct xe_engine *e,
+			       struct xe_guc_submit_engine_snapshot *snapshot)
 {
 	struct xe_guc *guc = engine_to_guc(e);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
 	int i;
 
+	snapshot->guc.wqi_head = e->guc->wqi_head;
+	snapshot->guc.wqi_tail = e->guc->wqi_tail;
+	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
+	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
+	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
+							  wq_desc.wq_status);
+
+	if (snapshot->parallel.wq_desc.head !=
+	    snapshot->parallel.wq_desc.tail) {
+		for (i = snapshot->parallel.wq_desc.head;
+		     i != snapshot->parallel.wq_desc.tail;
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			snapshot->parallel.wq[i / sizeof(u32)] =
+				parallel_read(xe, map, wq[i / sizeof(u32)]);
+	}
+}
+
+static void
+guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
+			     struct drm_printer *p)
+{
+	int i;
+
 	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
-		   e->guc->wqi_head, parallel_read(xe, map, wq_desc.head));
+		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
 	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
-		   e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail));
-	drm_printf(p, "\tWQ status: %u\n",
-		   parallel_read(xe, map, wq_desc.wq_status));
-	if (parallel_read(xe, map, wq_desc.head) !=
-	    parallel_read(xe, map, wq_desc.tail)) {
-		for (i = parallel_read(xe, map, wq_desc.head);
-		     i != parallel_read(xe, map, wq_desc.tail);
+		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
+	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
+
+	if (snapshot->parallel.wq_desc.head !=
+	    snapshot->parallel.wq_desc.tail) {
+		for (i = snapshot->parallel.wq_desc.head;
+		     i != snapshot->parallel.wq_desc.tail;
 		     i = (i + sizeof(u32)) % WQ_SIZE)
 			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
-				   parallel_read(xe, map, wq[i / sizeof(u32)]));
+				   snapshot->parallel.wq[i / sizeof(u32)]);
 	}
 }
 
-static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
+/**
+ * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine.
+ * @e: Xe Engine.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC Submit Engine snapshot object that must be freed by the
+ * caller, using `xe_guc_engine_snapshot_free`.
+ */
+struct xe_guc_submit_engine_snapshot *
+xe_guc_engine_snapshot_capture(struct xe_engine *e)
 {
+	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &e->guc->sched;
 	struct xe_sched_job *job;
+	struct xe_guc_submit_engine_snapshot *snapshot;
+	int i;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
+		return NULL;
+	}
+
+	snapshot->guc.id = e->guc->id;
+	memcpy(&snapshot->name, &e->name, sizeof(snapshot->name));
+	snapshot->class = e->class;
+	snapshot->logical_mask = e->logical_mask;
+	snapshot->width = e->width;
+	snapshot->refcount = kref_read(&e->refcount);
+	snapshot->sched_timeout = sched->base.timeout;
+	snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us;
+	snapshot->sched_props.preempt_timeout_us =
+		e->sched_props.preempt_timeout_us;
+
+	snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot),
+				      GFP_ATOMIC);
+
+	if (!snapshot->lrc) {
+		drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
+	} else {
+		for (i = 0; i < e->width; ++i) {
+			struct xe_lrc *lrc = e->lrc + i;
+
+			snapshot->lrc[i].context_desc =
+				lower_32_bits(xe_lrc_ggtt_addr(lrc));
+			snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
+			snapshot->lrc[i].tail.internal = lrc->ring.tail;
+			snapshot->lrc[i].tail.memory =
+				xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+			snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
+			snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
+		}
+	}
+
+	snapshot->schedule_state = atomic_read(&e->guc->state);
+	snapshot->engine_flags = e->flags;
+
+	snapshot->parallel_execution = xe_engine_is_parallel(e);
+	if (snapshot->parallel_execution)
+		guc_engine_wq_snapshot_capture(e, snapshot);
+
+	spin_lock(&sched->base.job_list_lock);
+	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
+	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
+					       sizeof(struct pending_list_snapshot),
+					       GFP_ATOMIC);
+
+	if (!snapshot->pending_list) {
+		drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
+	} else {
+		i = 0;
+		list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+			snapshot->pending_list[i].seqno =
+				xe_sched_job_seqno(job);
+			snapshot->pending_list[i].fence =
+				dma_fence_is_signaled(job->fence) ? 1 : 0;
+			snapshot->pending_list[i].finished =
+				dma_fence_is_signaled(&job->drm.s_fence->finished)
+				? 1 : 0;
+			i++;
+		}
+	}
+
+	spin_unlock(&sched->base.job_list_lock);
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot.
+ * @snapshot: GuC Submit Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC Submit Engine snapshot object.
+ */
+void
+xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
+			     struct drm_printer *p)
+{
 	int i;
 
-	drm_printf(p, "\nGuC ID: %d\n", e->guc->id);
-	drm_printf(p, "\tName: %s\n", e->name);
-	drm_printf(p, "\tClass: %d\n", e->class);
-	drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask);
-	drm_printf(p, "\tWidth: %d\n", e->width);
-	drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount));
-	drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout);
-	drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us);
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
+	drm_printf(p, "\tName: %s\n", snapshot->name);
+	drm_printf(p, "\tClass: %d\n", snapshot->class);
+	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
+	drm_printf(p, "\tWidth: %d\n", snapshot->width);
+	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
+	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
+	drm_printf(p, "\tTimeslice: %u (us)\n",
+		   snapshot->sched_props.timeslice_us);
 	drm_printf(p, "\tPreempt timeout: %u (us)\n",
-		   e->sched_props.preempt_timeout_us);
-	for (i = 0; i < e->width; ++i ) {
-		struct xe_lrc *lrc = e->lrc + i;
+		   snapshot->sched_props.preempt_timeout_us);
 
+	for (i = 0; snapshot->lrc && i < snapshot->width; ++i) {
 		drm_printf(p, "\tHW Context Desc: 0x%08x\n",
-			   lower_32_bits(xe_lrc_ggtt_addr(lrc)));
+			   snapshot->lrc[i].context_desc);
 		drm_printf(p, "\tLRC Head: (memory) %u\n",
-			   xe_lrc_ring_head(lrc));
+			   snapshot->lrc[i].head);
 		drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
-			   lrc->ring.tail,
-			   xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL));
+			   snapshot->lrc[i].tail.internal,
+			   snapshot->lrc[i].tail.memory);
 		drm_printf(p, "\tStart seqno: (memory) %d\n",
-			   xe_lrc_start_seqno(lrc));
-		drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc));
+			   snapshot->lrc[i].start_seqno);
+		drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
 	}
-	drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state));
-	drm_printf(p, "\tFlags: 0x%lx\n", e->flags);
-	if (xe_engine_is_parallel(e))
-		guc_engine_wq_print(e, p);
+	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
+	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags);
 
-	spin_lock(&sched->base.job_list_lock);
+	if (snapshot->parallel_execution)
+		guc_engine_wq_snapshot_print(snapshot, p);
 
-	list_for_each_entry(job, &sched->base.pending_list, drm.list)
+	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
+	     i++)
 		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
-			   xe_sched_job_seqno(job),
-			   dma_fence_is_signaled(job->fence) ? 1 : 0,
-			   dma_fence_is_signaled(&job->drm.s_fence->finished) ?
-			   1 : 0);
-	spin_unlock(&sched->base.job_list_lock);
+			   snapshot->pending_list[i].seqno,
+			   snapshot->pending_list[i].fence,
+			   snapshot->pending_list[i].finished);
+}
+
+/**
+ * xe_guc_engine_snapshot_free - Free all allocated objects for a given
+ * snapshot.
+ * @snapshot: GuC Submit Engine snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kfree(snapshot->lrc);
+	kfree(snapshot->pending_list);
+	kfree(snapshot);
 }
 
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
+{
+	struct xe_guc_submit_engine_snapshot *snapshot;
+
+	snapshot = xe_guc_engine_snapshot_capture(e);
+	xe_guc_engine_snapshot_print(snapshot, p);
+	xe_guc_engine_snapshot_free(snapshot);
+}
+
+/**
+ * xe_guc_submit_print - GuC Submit Print.
+ * @guc: GuC.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function capture and prints snapshots of **all** GuC Engines.
+ */
 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
 {
 	struct xe_engine *e;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 8002734d6f24d..4153c2d220130 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -13,7 +13,6 @@ struct xe_engine;
 struct xe_guc;
 
 int xe_guc_submit_init(struct xe_guc *guc);
-void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
 
 int xe_guc_submit_reset_prepare(struct xe_guc *guc);
 void xe_guc_submit_reset_wait(struct xe_guc *guc);
@@ -27,4 +26,13 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 					   u32 len);
 int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
+struct xe_guc_submit_engine_snapshot *
+xe_guc_engine_snapshot_capture(struct xe_engine *e);
+void
+xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
+			     struct drm_printer *p);
+void
+xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot);
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index d23759959be9b..88e855dae0563 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -61,4 +61,95 @@ struct guc_submit_parallel_scratch {
 	u32 wq[WQ_SIZE / sizeof(u32)];
 };
 
+struct lrc_snapshot {
+	u32 context_desc;
+	u32 head;
+	struct {
+		u32 internal;
+		u32 memory;
+	} tail;
+	u32 start_seqno;
+	u32 seqno;
+};
+
+struct pending_list_snapshot {
+	u32 seqno;
+	bool fence;
+	bool finished;
+};
+
+/**
+ * struct xe_guc_submit_engine_snapshot - Snapshot for devcoredump
+ */
+struct xe_guc_submit_engine_snapshot {
+	/** @name: name of this engine */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @class: class of this engine */
+	enum xe_engine_class class;
+	/**
+	 * @logical_mask: logical mask of where job submitted to engine can run
+	 */
+	u32 logical_mask;
+	/** @width: width (number BB submitted per exec) of this engine */
+	u16 width;
+	/** @refcount: ref count of this engine */
+	u32 refcount;
+	/**
+	 * @sched_timeout: the time after which a job is removed from the
+	 * scheduler.
+	 */
+	long sched_timeout;
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @lrc: LRC Snapshot */
+	struct lrc_snapshot *lrc;
+
+	/** @schedule_state: Schedule State at the moment of Crash */
+	u32 schedule_state;
+	/** @engine_flags: Flags of the faulty engine */
+	unsigned long engine_flags;
+
+	/** @guc: GuC Engine Snapshot */
+	struct {
+		/** @wqi_head: work queue item head */
+		u32 wqi_head;
+		/** @wqi_tail: work queue item tail */
+		u32 wqi_tail;
+		/** @id: GuC id for this xe_engine */
+		u16 id;
+	} guc;
+
+	/**
+	 * @parallel_execution: Indication if the failure was during parallel
+	 * execution
+	 */
+	bool parallel_execution;
+	/** @parallel: snapshot of the useful parallel scratch */
+	struct {
+		/** @wq_desc: Workqueue description */
+		struct {
+			/** @head: Workqueue Head */
+			u32 head;
+			/** @tail: Workqueue Tail */
+			u32 tail;
+			/** @status: Workqueue Status */
+			u32 status;
+		} wq_desc;
+		/** @wq: Workqueue Items */
+		u32 wq[WQ_SIZE / sizeof(u32)];
+	} parallel;
+
+	/** @pending_list_size: Size of the pending list snapshot array */
+	int pending_list_size;
+	/** @pending_list: snapshot of the pending list info */
+	struct pending_list_snapshot *pending_list;
+};
+
 #endif
-- 
GitLab


From 3847ec03ddd4b688cd02929356ee979acddfa03f Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:13 -0400
Subject: [PATCH 1552/2340] drm/xe: Add GuC Submit Engine snapshot to
 xe_devcoredump.

Let's start to move our existent logs to devcoredump one by
one. Any format change should come on follow-up work.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c       | 4 ++++
 drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index b0e3db148ce26..7296c0137b475 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -12,6 +12,7 @@
 #include "xe_engine.h"
 #include "xe_gt.h"
 #include "xe_guc_ct.h"
+#include "xe_guc_submit.h"
 
 /**
  * DOC: Xe device coredump
@@ -86,6 +87,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 
 	drm_printf(&p, "\n**** GuC CT ****\n");
 	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
+	xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p);
 
 	return count - iter.remain;
 }
@@ -99,6 +101,7 @@ static void xe_devcoredump_free(void *data)
 		return;
 
 	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
+	xe_guc_engine_snapshot_free(coredump->snapshot.ge);
 
 	coredump->captured = false;
 	drm_info(&coredump_to_xe(coredump)->drm,
@@ -117,6 +120,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 
 	cookie = dma_fence_begin_signalling();
 	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
+	coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e);
 	dma_fence_end_signalling(cookie);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 4e3371c7b9c5c..7c64532241391 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -27,6 +27,8 @@ struct xe_devcoredump_snapshot {
 	/* GuC snapshots */
 	/** @ct_snapshot: GuC CT snapshot */
 	struct xe_guc_ct_snapshot *ct;
+	/** @ge: Guc Engine snapshot */
+	struct xe_guc_submit_engine_snapshot *ge;
 };
 
 /**
-- 
GitLab


From a4db55558785191a9ff0d295ccf181f18856cb58 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:14 -0400
Subject: [PATCH 1553/2340] drm/xe: Convert Xe HW Engine print to snapshot
 capture and print.

The goal is to allow for a snapshot capture to be taken at the time
of the crash, while the print out can happen at a later time through
the exposed devcoredump virtual device.

v2: Addressing these Matthew comments:
    - Handle memory allocation failures.
    - Do not use GFP_ATOMIC on cases like debugfs prints.
    - placement of @reg doc.
    - identation issues.
v3: checkpatch
v4: Rebase and get back to GFP_ATOMIC only.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c      |   2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c      |   2 +-
 drivers/gpu/drm/xe/xe_hw_engine.c       | 209 +++++++++++++++++-------
 drivers/gpu/drm/xe/xe_hw_engine.h       |   8 +-
 drivers/gpu/drm/xe/xe_hw_engine_types.h |  78 +++++++++
 5 files changed, 240 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c45486c2015a9..8bf441e850a00 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -42,7 +42,7 @@ static int hw_engines(struct seq_file *m, void *data)
 	}
 
 	for_each_hw_engine(hwe, gt, id)
-		xe_hw_engine_print_state(hwe, &p);
+		xe_hw_engine_print(hwe, &p);
 
 	xe_device_mem_access_put(xe);
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 3ff133e8463c8..7be06320dbd72 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -751,7 +751,7 @@ static void simple_error_capture(struct xe_engine *e)
 			if (hwe->class != e->hwe->class ||
 			    !(BIT(hwe->logical_instance) & adj_logical_mask))
 				continue;
-			xe_hw_engine_print_state(hwe, &p);
+			xe_hw_engine_print(hwe, &p);
 		}
 		xe_analyze_vm(&p, e->vm, e->gt->info.id);
 		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 751f6c3bba173..71ac4defb947d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -574,77 +574,174 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
 		xe_hw_fence_irq_run(hwe->fence_irq);
 }
 
-void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
+/**
+ * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
+ * @hwe: Xe HW Engine.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a Xe HW Engine snapshot object that must be freed by the
+ * caller, using `xe_hw_engine_snapshot_free`.
+ */
+struct xe_hw_engine_snapshot *
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 {
+	struct xe_hw_engine_snapshot *snapshot;
+	int len;
+
 	if (!xe_hw_engine_is_valid(hwe))
+		return NULL;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot)
+		return NULL;
+
+	len = strlen(hwe->name) + 1;
+	snapshot->name = kzalloc(len, GFP_ATOMIC);
+	if (snapshot->name)
+		strscpy(snapshot->name, hwe->name, len);
+
+	snapshot->class = hwe->class;
+	snapshot->logical_instance = hwe->logical_instance;
+	snapshot->forcewake.domain = hwe->domain;
+	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
+						    hwe->domain);
+	snapshot->mmio_base = hwe->mmio_base;
+
+	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
+	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
+							   RING_HWS_PGA(0));
+	snapshot->reg.ring_execlist_status_lo =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
+	snapshot->reg.ring_execlist_status_hi =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
+	snapshot->reg.ring_execlist_sq_contents_lo =
+		hw_engine_mmio_read32(hwe,
+				      RING_EXECLIST_SQ_CONTENTS_LO(0));
+	snapshot->reg.ring_execlist_sq_contents_hi =
+		hw_engine_mmio_read32(hwe,
+				      RING_EXECLIST_SQ_CONTENTS_HI(0));
+	snapshot->reg.ring_execlist_control =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0));
+	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+	snapshot->reg.ring_head =
+		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
+	snapshot->reg.ring_tail =
+		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
+	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
+	snapshot->reg.ring_mi_mode =
+		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
+	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
+	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
+	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
+	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
+	snapshot->reg.ring_acthd_udw =
+		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
+	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
+	snapshot->reg.ring_bbaddr_udw =
+		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
+	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
+	snapshot->reg.ring_dma_fadd_udw =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
+	snapshot->reg.ring_dma_fadd =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
+	snapshot->reg.ipeir = hw_engine_mmio_read32(hwe, IPEIR(0));
+	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, IPEHR(0));
+
+	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
+
+	return snapshot;
+}
+
+/**
+ * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given Xe HW Engine snapshot object.
+ */
+void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	if (!snapshot)
 		return;
 
-	drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name,
-		   hwe->logical_instance);
+	drm_printf(p, "%s (physical), logical instance=%d\n",
+		   snapshot->name ? snapshot->name : "",
+		   snapshot->logical_instance);
 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
-		   hwe->domain,
-		   xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain));
-	drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base);
-
-	drm_printf(p, "\tHWSTAM: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_HWSTAM(0)));
-	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)));
-
+		   snapshot->forcewake.domain, snapshot->forcewake.ref);
+	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
+	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
 	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)));
+		   snapshot->reg.ring_execlist_status_lo);
 	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)));
+		   snapshot->reg.ring_execlist_status_hi);
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe,
-					 RING_EXECLIST_SQ_CONTENTS_LO(0)));
+		   snapshot->reg.ring_execlist_sq_contents_lo);
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe,
-					 RING_EXECLIST_SQ_CONTENTS_HI(0)));
+		   snapshot->reg.ring_execlist_sq_contents_hi);
 	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0)));
-
-	drm_printf(p, "\tRING_START: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_START(0)));
-	drm_printf(p, "\tRING_HEAD:  0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR);
-	drm_printf(p, "\tRING_TAIL:  0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR);
-	drm_printf(p, "\tRING_CTL: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_CTL(0)));
+		   snapshot->reg.ring_execlist_control);
+	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
+	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
+	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
+	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
+	drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_MI_MODE(0)));
-	drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_MODE(0)));
-
-	drm_printf(p, "\tRING_IMR:   0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_IMR(0)));
-	drm_printf(p, "\tRING_ESR:   0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_ESR(0)));
-	drm_printf(p, "\tRING_EMR:   0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_EMR(0)));
-	drm_printf(p, "\tRING_EIR:   0x%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_EIR(0)));
-
-	drm_printf(p, "\tACTHD:  0x%08x_%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)),
-		   hw_engine_mmio_read32(hwe, RING_ACTHD(0)));
-	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)),
-		   hw_engine_mmio_read32(hwe, RING_BBADDR(0)));
+		   snapshot->reg.ring_mode);
+	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
+	drm_printf(p, "\tRING_ESR:   0x%08x\n", snapshot->reg.ring_esr);
+	drm_printf(p, "\tRING_EMR:   0x%08x\n", snapshot->reg.ring_emr);
+	drm_printf(p, "\tRING_EIR:   0x%08x\n", snapshot->reg.ring_eir);
+	drm_printf(p, "\tACTHD:  0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
+		   snapshot->reg.ring_acthd);
+	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
+		   snapshot->reg.ring_bbaddr);
 	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
-		   hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)),
-		   hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)));
+		   snapshot->reg.ring_dma_fadd_udw,
+		   snapshot->reg.ring_dma_fadd);
+	drm_printf(p, "\tIPEIR: 0x%08x\n", snapshot->reg.ipeir);
+	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
+	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
+			   snapshot->reg.rcu_mode);
+}
 
-	drm_printf(p, "\tIPEIR: 0x%08x\n",
-		   hw_engine_mmio_read32(hwe, IPEIR(0)));
-	drm_printf(p, "\tIPEHR: 0x%08x\n\n",
-		   hw_engine_mmio_read32(hwe, IPEHR(0)));
+/**
+ * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
 
-	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
-		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
-			   xe_mmio_read32(hwe->gt, RCU_MODE));
+	kfree(snapshot->name);
+	kfree(snapshot);
+}
+
+/**
+ * xe_hw_engine_print - Xe HW Engine Print.
+ * @hwe: Hardware Engine.
+ * @p: drm_printer.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
+{
+	struct xe_hw_engine_snapshot *snapshot;
 
+	snapshot = xe_hw_engine_snapshot_capture(hwe);
+	xe_hw_engine_snapshot_print(snapshot, p);
+	xe_hw_engine_snapshot_free(snapshot);
 }
 
 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 013efcd6d8c5b..7eca9d53c7b15 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -14,9 +14,15 @@ int xe_hw_engines_init_early(struct xe_gt *gt);
 int xe_hw_engines_init(struct xe_gt *gt);
 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
-void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p);
 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
 				enum xe_engine_class engine_class);
+
+struct xe_hw_engine_snapshot *
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe);
+void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
+void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
+				 struct drm_printer *p);
+void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
 void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
 
 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 2c40384957da3..d788e67312b99 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -109,4 +109,82 @@ struct xe_hw_engine {
 	enum xe_hw_engine_id engine_id;
 };
 
+/**
+ * struct xe_hw_engine_snapshot - Hardware engine snapshot
+ *
+ * Contains the snapshot of useful hardware engine info and registers.
+ */
+struct xe_hw_engine_snapshot {
+	/** @name: name of the hw engine */
+	char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @forcewake: Force Wake information snapshot */
+	struct {
+		/** @domain: force wake domain of this hw engine */
+		enum xe_force_wake_domains domain;
+		/** @ref: Forcewake ref for the above domain */
+		int ref;
+	} forcewake;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/** @reg: Useful MMIO register snapshot */
+	struct {
+		/** @ring_hwstam: RING_HWSTAM */
+		u32 ring_hwstam;
+		/** @ring_hws_pga: RING_HWS_PGA */
+		u32 ring_hws_pga;
+		/** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */
+		u32 ring_execlist_status_lo;
+		/** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */
+		u32 ring_execlist_status_hi;
+		/** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */
+		u32 ring_execlist_sq_contents_lo;
+		/** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */
+		u32 ring_execlist_sq_contents_hi;
+		/** @ring_execlist_control: RING_EXECLIST_CONTROL */
+		u32 ring_execlist_control;
+		/** @ring_start: RING_START */
+		u32 ring_start;
+		/** @ring_head: RING_HEAD */
+		u32 ring_head;
+		/** @ring_tail: RING_TAIL */
+		u32 ring_tail;
+		/** @ring_ctl: RING_CTL */
+		u32 ring_ctl;
+		/** @ring_mi_mode: RING_MI_MODE */
+		u32 ring_mi_mode;
+		/** @ring_mode: RING_MODE */
+		u32 ring_mode;
+		/** @ring_imr: RING_IMR */
+		u32 ring_imr;
+		/** @ring_esr: RING_ESR */
+		u32 ring_esr;
+		/** @ring_emr: RING_EMR */
+		u32 ring_emr;
+		/** @ring_eir: RING_EIR */
+		u32 ring_eir;
+		/** @ring_acthd_udw: RING_ACTHD_UDW */
+		u32 ring_acthd_udw;
+		/** @ring_acthd: RING_ACTHD */
+		u32 ring_acthd;
+		/** @ring_bbaddr_udw: RING_BBADDR_UDW */
+		u32 ring_bbaddr_udw;
+		/** @ring_bbaddr: RING_BBADDR */
+		u32 ring_bbaddr;
+		/** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */
+		u32 ring_dma_fadd_udw;
+		/** @ring_dma_fadd: RING_DMA_FADD */
+		u32 ring_dma_fadd;
+		/** @ipeir: IPEIR */
+		u32 ipeir;
+		/** @ipehr: IPEHR */
+		u32 ipehr;
+		/** @rcu_mode: RCU_MODE */
+		u32 rcu_mode;
+	} reg;
+};
+
 #endif
-- 
GitLab


From 01a87f3181caab1b5eca8ae5a7436c1031b6f5a8 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:15 -0400
Subject: [PATCH 1554/2340] drm/xe: Add HW Engine snapshot to xe_devcoredump.

Let's continue to add our existent simple logs to devcoredump one
by one. Any format change should come on follow-up work.

v2: remove unnecessary, and now duplicated, dma_fence annotation. (Matthew)
v3: avoid for_each with faulty_engine since that can be already freed at
    the time of the read/free. Instead, iterate in the full array of
    hw_engines. (Kasan)

Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c       | 41 +++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_devcoredump_types.h |  4 +++
 2 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 7296c0137b475..f53f4b51233ae 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -9,10 +9,13 @@
 #include <linux/devcoredump.h>
 #include <generated/utsrelease.h>
 
+#include "xe_device.h"
 #include "xe_engine.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_submit.h"
+#include "xe_hw_engine.h"
 
 /**
  * DOC: Xe device coredump
@@ -63,6 +66,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	struct drm_printer p;
 	struct drm_print_iterator iter;
 	struct timespec64 ts;
+	int i;
 
 	/* Our device is gone already... */
 	if (!data || !coredump_to_xe(coredump))
@@ -89,12 +93,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
 	xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p);
 
+	drm_printf(&p, "\n**** HW Engines ****\n");
+	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+		if (coredump->snapshot.hwe[i])
+			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
+						    &p);
+
 	return count - iter.remain;
 }
 
 static void xe_devcoredump_free(void *data)
 {
 	struct xe_devcoredump *coredump = data;
+	int i;
 
 	/* Our device is gone. Nothing to do... */
 	if (!data || !coredump_to_xe(coredump))
@@ -102,6 +113,9 @@ static void xe_devcoredump_free(void *data)
 
 	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
 	xe_guc_engine_snapshot_free(coredump->snapshot.ge);
+	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+		if (coredump->snapshot.hwe[i])
+			xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
 
 	coredump->captured = false;
 	drm_info(&coredump_to_xe(coredump)->drm,
@@ -113,14 +127,41 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 {
 	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
 	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = e->logical_mask;
+	u32 width_mask = (0x1 << e->width) - 1;
+	int i;
 	bool cookie;
 
 	ss->snapshot_time = ktime_get_real();
 	ss->boot_time = ktime_get_boottime();
 
 	cookie = dma_fence_begin_signalling();
+	for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+		if (adj_logical_mask & BIT(i)) {
+			adj_logical_mask |= width_mask << i;
+			i += e->width;
+		} else {
+			++i;
+		}
+	}
+
+	xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
+
 	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
 	coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e);
+
+	for_each_hw_engine(hwe, e->gt, id) {
+		if (hwe->class != e->hwe->class ||
+		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+			coredump->snapshot.hwe[id] = NULL;
+			continue;
+		}
+		coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
+	}
+
+	xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
 	dma_fence_end_signalling(cookie);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 7c64532241391..350b905d1797f 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -9,6 +9,8 @@
 #include <linux/ktime.h>
 #include <linux/mutex.h>
 
+#include "xe_hw_engine_types.h"
+
 struct xe_device;
 
 /**
@@ -29,6 +31,8 @@ struct xe_devcoredump_snapshot {
 	struct xe_guc_ct_snapshot *ct;
 	/** @ge: Guc Engine snapshot */
 	struct xe_guc_submit_engine_snapshot *ge;
+	/** @hwe: HW Engine snapshot array */
+	struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
 };
 
 /**
-- 
GitLab


From 328f3414b13c06a85e447d6f2d5abd70b547c3ee Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 16 May 2023 10:54:16 -0400
Subject: [PATCH 1555/2340] drm/xe: Limit CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE to
 itself.

There are multiple kind of config prints and with the upcoming
devcoredump there will be another layer. Let's limit the config
to the top level functions and leave the clean-up work for the
compilers so we don't create a spider-web of configs.

No functional change. Just a preparation for the devcoredump.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 62496a4008d2b..40295beea3a2e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3415,7 +3415,6 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 {
 	struct rb_node *node;
@@ -3453,9 +3452,3 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 
 	return 0;
 }
-#else
-int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
-{
-	return 0;
-}
-#endif
-- 
GitLab


From 5013ad8dd75fdc035ff068980c91cf2ea821d142 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 11 May 2023 20:19:09 +0200
Subject: [PATCH 1556/2340] drm/xe: Move Media GuC register definition to regs/

This GuC register can be moved together with the rest of the
GuC register definitions and be named in a similar way.

v2: fix placement

Bspec: 63363
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com> #v1
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_guc_regs.h | 2 ++
 drivers/gpu/drm/xe/xe_guc.c           | 4 +---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 37e0ac5509317..b4f27cadb68f5 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -119,6 +119,8 @@ struct guc_doorbell_info {
 #define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4)
 #define VF_SW_FLAG_COUNT			4
 
+#define MED_GUC_HOST_INTERRUPT			XE_REG(0x190304)
+
 #define MED_VF_SW_FLAG(n)			XE_REG(0x190310 + (n) * 4)
 #define MED_VF_SW_FLAG_COUNT			4
 
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index b72407e24d097..92d7326902526 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -22,8 +22,6 @@
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
-#define MEDIA_GUC_HOST_INTERRUPT        XE_REG(0x190304)
-
 static struct xe_gt *
 guc_to_gt(struct xe_guc *guc)
 {
@@ -268,7 +266,7 @@ int xe_guc_init(struct xe_guc *guc)
 	guc_init_params(guc);
 
 	if (xe_gt_is_media_type(gt))
-		guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT;
+		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
 	else
 		guc->notify_reg = GUC_HOST_INTERRUPT;
 
-- 
GitLab


From 915757a6cbf1d77877374627a284cafe9c0de7cd Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 19 May 2023 11:19:02 +0200
Subject: [PATCH 1557/2340] drm/xe: Change GuC interrupt data

Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass
additional payload data to the GuC but this capability is not
used by the firmware yet.

Stop using value mandated by legacy GuC interrupt register and
use default notify value (zero) instead.

Bspec: 49813, 63363
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 92d7326902526..71f18b32d09b6 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -598,8 +598,14 @@ int xe_guc_suspend(struct xe_guc *guc)
 void xe_guc_notify(struct xe_guc *guc)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
+	const u32 default_notify_data = 0;
 
-	xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER);
+	/*
+	 * Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass
+	 * additional payload data to the GuC but this capability is not
+	 * used by the firmware yet. Use default value in the meantime.
+	 */
+	xe_mmio_write32(gt, guc->notify_reg, default_notify_data);
 }
 
 int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
-- 
GitLab


From 82f428b627607cd4ae0355c09b3164961b041505 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 22 May 2023 11:52:52 +0100
Subject: [PATCH 1558/2340] drm/xe: fix kernel-doc issues

drivers/gpu/drm/xe/xe_guc_submit_types.h:47: warning: cannot understand
function prototype: 'struct guc_submit_parallel_scratch '

drivers/gpu/drm/xe/xe_devcoredump_types.h:38: warning: Function
parameter or member 'ct' not described in 'xe_devcoredump_snapshot'

CI doesn't appear to be running BAT anymore, assuming this is caused by
the CI.Hooks now failing due to above warnings.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 +-
 drivers/gpu/drm/xe/xe_guc_submit_types.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 350b905d1797f..c0d711eb6ab31 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -27,7 +27,7 @@ struct xe_devcoredump_snapshot {
 	ktime_t boot_time;
 
 	/* GuC snapshots */
-	/** @ct_snapshot: GuC CT snapshot */
+	/** @ct: GuC CT snapshot */
 	struct xe_guc_ct_snapshot *ct;
 	/** @ge: Guc Engine snapshot */
 	struct xe_guc_submit_engine_snapshot *ge;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index 88e855dae0563..6765b2c6eab15 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -41,7 +41,7 @@ struct sync_semaphore {
 };
 
 /**
- * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
+ * struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
  */
 struct guc_submit_parallel_scratch {
 	/** @wq_desc: Guc scheduler workqueue descriptor */
-- 
GitLab


From 6fedf8426d377ea9b57c91870d495006a683605e Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Fri, 19 May 2023 16:48:02 -0300
Subject: [PATCH 1559/2340] drm/xe: Do not forget to drm_dev_put() in
 xe_pci_probe()

The function drm_dev_put() should also be called if xe_device_probe()
fails.

v2:
  - Improve commit message. (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230519194802.578182-1-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f0d0e999aa56f..c7184e49b10b4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -614,10 +614,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	subplatform_desc = find_subplatform(xe, desc);
 
 	err = xe_info_init(xe, desc, subplatform_desc);
-	if (err) {
-		drm_dev_put(&xe->drm);
-		return err;
-	}
+	if (err)
+		goto err_drm_put;
 
 	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d",
 		desc->platform_name,
@@ -640,10 +638,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, xe);
 	err = pci_enable_device(pdev);
-	if (err) {
-		drm_dev_put(&xe->drm);
-		return err;
-	}
+	if (err)
+		goto err_drm_put;
 
 	pci_set_master(pdev);
 
@@ -651,14 +647,20 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		drm_dbg(&xe->drm, "can't enable MSI");
 
 	err = xe_device_probe(xe);
-	if (err) {
-		pci_disable_device(pdev);
-		return err;
-	}
+	if (err)
+		goto err_pci_disable;
 
 	xe_pm_runtime_init(xe);
 
 	return 0;
+
+err_pci_disable:
+	pci_disable_device(pdev);
+
+err_drm_put:
+	drm_dev_put(&xe->drm);
+
+	return err;
 }
 
 static void xe_pci_shutdown(struct pci_dev *pdev)
-- 
GitLab


From b67ece5b173375451de5c3a562c43aaf410001c5 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 18 May 2023 18:56:50 -0300
Subject: [PATCH 1560/2340] drm/xe: Call drmm_add_action_or_reset() early in
 xe_device_create()

Otherwise no cleanup is actually done if we branch to err_put.

This works for now: currently we do know that, once inside
xe_device_destroy(), ttm_device_init() was successful so we can safely
call ttm_device_fini(); and, for xe->ordered_wq, there is an upcoming
commit to check its value before calling destroy_workqueue().

However, we might need change this in the future if we have more
initializers called that can fail in a way that we can not know which
one was it once inside xe_device_destroy().

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230518215651.502159-2-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 8039142ae1a1c..42456d0448278 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -181,6 +181,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	if (WARN_ON(err))
 		goto err_put;
 
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+	if (err)
+		goto err_put;
+
 	xe->info.devid = pdev->device;
 	xe->info.revid = pdev->revision;
 	xe->info.enable_guc = enable_guc;
@@ -205,10 +209,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	drmm_mutex_init(&xe->drm, &xe->sb_lock);
 	xe->enabled_irq_mask = ~0;
 
-	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
-	if (err)
-		goto err_put;
-
 	return xe;
 
 err_put:
-- 
GitLab


From c93b6de7cc7610a269afe0e84a0b3e2b81a746cd Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 18 May 2023 18:56:51 -0300
Subject: [PATCH 1561/2340] drm/xe: Fail xe_device_create() if wq allocation
 fails

Let's make sure we give the driver a valid workqueue.

While at it, also make sure to call destroy_workqueue() only if the
workqueue is a valid one. That is necessary because xe_device_destroy()
is indirectly called as part of the cleanup process of a failed
xe_device_create().

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230518215651.502159-3-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 42456d0448278..2c65eb84e6e98 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -10,6 +10,7 @@
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_managed.h>
+#include <drm/drm_print.h>
 #include <drm/xe_drm.h>
 
 #include "regs/xe_regs.h"
@@ -157,7 +158,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
 {
 	struct xe_device *xe = to_xe_device(dev);
 
-	destroy_workqueue(xe->ordered_wq);
+	if (xe->ordered_wq)
+		destroy_workqueue(xe->ordered_wq);
+
 	ttm_device_fini(&xe->ttm);
 }
 
@@ -205,6 +208,11 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	INIT_LIST_HEAD(&xe->pinned.evicted);
 
 	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+	if (!xe->ordered_wq) {
+		drm_err(&xe->drm, "Failed to allocate xe-ordered-wq\n");
+		err = -ENOMEM;
+		goto err_put;
+	}
 
 	drmm_mutex_init(&xe->drm, &xe->sb_lock);
 	xe->enabled_irq_mask = ~0;
-- 
GitLab


From 6ed6ba32dba14ef851ecb7190597d6bac77618e2 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 24 May 2023 11:59:52 -0700
Subject: [PATCH 1562/2340] drm/xe: Add stepping support for GMD_ID platforms

For platforms with GMD_ID registers, the IP stepping should be
determined from the 'revid' field of those registers rather than from
the PCI revid.

The hardware teams have indicated that they plan to keep the revid =>
stepping mapping consistent across all GMD_ID platforms, with major
steppings (A0, B0, C0, etc.) having revids that are multiples of 4, and
minor steppings (A1, A2, A3, etc.) taking the intermediate values.  For
now we'll trust that hardware follows through on this plan; if they have
to change direction in the future (e.g., they wind up needing something
like an "A4" that doesn't fit this scheme), we can add a GMD_ID-based
lookup table when the time comes.

v2:
 - Set xe->info.platform before finding stepping; the pre-GMD_ID code
   relies on this value to pick a lookup table.
v3:
 - Also set xe->info.subplatform before picking the stepping for
   pre-GMD_ID lookup.

Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://lore.kernel.org/r/20230524185952.666158-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  2 +-
 drivers/gpu/drm/xe/xe_pci.c          | 28 +++++++++++------
 drivers/gpu/drm/xe/xe_step.c         | 45 +++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_step.h         |  5 +++-
 drivers/gpu/drm/xe/xe_step_types.h   | 13 ++++----
 5 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 4a38f78277b5e..5c239989608f0 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -24,7 +24,7 @@
 #define GMD_ID					XE_REG(0xd8c)
 #define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
 #define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
-#define   GMD_ID_STEP				REG_GENMASK(5, 0)
+#define   GMD_ID_REVID				REG_GENMASK(5, 0)
 
 #define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c7184e49b10b4..50027eb642ea6 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -441,11 +441,14 @@ static void handle_pre_gmdid(struct xe_device *xe,
 static void handle_gmdid(struct xe_device *xe,
 			 const struct xe_device_desc *desc,
 			 const struct xe_graphics_desc **graphics,
-			 const struct xe_media_desc **media)
+			 const struct xe_media_desc **media,
+			 u32 *graphics_revid,
+			 u32 *media_revid)
 {
 	u32 ver;
 
 	ver = peek_gmdid(xe, GMD_ID.addr);
+	*graphics_revid = REG_FIELD_GET(GMD_ID_REVID, ver);
 	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
 		if (ver == graphics_ip_map[i].ver) {
 			xe->info.graphics_verx100 = ver;
@@ -461,6 +464,7 @@ static void handle_gmdid(struct xe_device *xe,
 	}
 
 	ver = peek_gmdid(xe, GMD_ID.addr + 0x380000);
+	*media_revid = REG_FIELD_GET(GMD_ID_REVID, ver);
 	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
 		if (ver == media_ip_map[i].ver) {
 			xe->info.media_verx100 = ver;
@@ -483,19 +487,30 @@ static int xe_info_init(struct xe_device *xe,
 {
 	const struct xe_graphics_desc *graphics_desc = NULL;
 	const struct xe_media_desc *media_desc = NULL;
+	u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
 	struct xe_gt *gt;
 	u8 id;
 
+	xe->info.platform = desc->platform;
+	xe->info.subplatform = subplatform_desc ?
+		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
+
 	/*
 	 * If this platform supports GMD_ID, we'll detect the proper IP
 	 * descriptor to use from hardware registers. desc->graphics will only
 	 * ever be set at this point for platforms before GMD_ID. In that case
 	 * the IP descriptions and versions are simply derived from that.
 	 */
-	if (desc->graphics)
+	if (desc->graphics) {
 		handle_pre_gmdid(xe, desc, &graphics_desc, &media_desc);
-	else
-		handle_gmdid(xe, desc, &graphics_desc, &media_desc);
+		xe->info.step = xe_step_pre_gmdid_get(xe);
+	} else {
+		handle_gmdid(xe, desc, &graphics_desc, &media_desc,
+			     &graphics_gmdid_revid, &media_gmdid_revid);
+		xe->info.step = xe_step_gmdid_get(xe,
+						  graphics_gmdid_revid,
+						  media_gmdid_revid);
+	}
 
 	/*
 	 * If we couldn't detect the graphics IP, that's considered a fatal
@@ -506,7 +521,6 @@ static int xe_info_init(struct xe_device *xe,
 		return -ENODEV;
 
 	xe->info.is_dgfx = desc->is_dgfx;
-	xe->info.platform = desc->platform;
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_4tile = desc->has_4tile;
@@ -534,10 +548,6 @@ static int xe_info_init(struct xe_device *xe,
 	if (MEDIA_VER(xe) >= 13)
 		xe->info.tile_count++;
 
-	xe->info.subplatform = subplatform_desc ?
-		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
-	xe->info.step = xe_step_get(xe);
-
 	for (id = 0; id < xe->info.tile_count; ++id) {
 		gt = xe->gt + id;
 		gt->info.id = id;
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index a443d9bd7bbbf..1baf79ba02ad4 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -107,7 +107,14 @@ static const int pvc_basedie_subids[] = {
 
 __diag_pop();
 
-struct xe_step_info xe_step_get(struct xe_device *xe)
+/**
+ * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid
+ * @xe: Xe device
+ *
+ * Convert the PCI revid into proper IP steppings.  This should only be
+ * used on platforms that do not have GMD_ID support.
+ */
+struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
 {
 	const struct xe_step_info *revids = NULL;
 	struct xe_step_info step = {};
@@ -198,6 +205,42 @@ struct xe_step_info xe_step_get(struct xe_device *xe)
 	return step;
 }
 
+/**
+ * xe_step_gmdid_get - Determine IP steppings from GMD_ID revid fields
+ * @xe: Xe device
+ * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field
+ * @media_gmdid_revid: value of media GMD_ID register's revid field
+ *
+ * Convert the revid fields of the GMD_ID registers into proper IP steppings.
+ *
+ * GMD_ID revid values are currently expected to have consistent meanings on
+ * all platforms:  major steppings (A0, B0, etc.) are 4 apart, with minor
+ * steppings (A1, A2, etc.) taking the values in between.
+ */
+struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
+				      u32 graphics_gmdid_revid,
+				      u32 media_gmdid_revid)
+{
+	struct xe_step_info step = {
+		.graphics = STEP_A0 + graphics_gmdid_revid,
+		.media = STEP_A0 + media_gmdid_revid,
+	};
+
+	if (step.graphics >= STEP_FUTURE) {
+		step.graphics = STEP_FUTURE;
+		drm_dbg(&xe->drm, "Graphics GMD_ID revid value %d treated as future stepping\n",
+			graphics_gmdid_revid);
+	}
+
+	if (step.media >= STEP_FUTURE) {
+		step.media = STEP_FUTURE;
+		drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n",
+			graphics_gmdid_revid);
+	}
+
+	return step;
+}
+
 #define STEP_NAME_CASE(name)	\
 	case STEP_##name:	\
 		return #name;
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
index 0c596c8579fbe..a384b640f2af5 100644
--- a/drivers/gpu/drm/xe/xe_step.h
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -12,7 +12,10 @@
 
 struct xe_device;
 
-struct xe_step_info xe_step_get(struct xe_device *xe);
+struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe);
+struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
+				      u32 graphics_gmdid_revid,
+				      u32 media_gmdid_revid);
 const char *xe_step_name(enum xe_step step);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
index b7859f9647caa..ccc9b4795e959 100644
--- a/drivers/gpu/drm/xe/xe_step_types.h
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -21,21 +21,20 @@ struct xe_step_info {
 	func(A0)			\
 	func(A1)			\
 	func(A2)			\
+	func(A3)			\
 	func(B0)			\
 	func(B1)			\
 	func(B2)			\
 	func(B3)			\
 	func(C0)			\
 	func(C1)			\
+	func(C2)			\
+	func(C3)			\
 	func(D0)			\
 	func(D1)			\
-	func(E0)			\
-	func(F0)			\
-	func(G0)			\
-	func(H0)			\
-	func(I0)			\
-	func(I1)			\
-	func(J0)
+	func(D2)			\
+	func(D3)			\
+	func(E0)
 
 /*
  * Symbolic steppings that do not match the hardware. These are valid both as gt
-- 
GitLab


From 1b1d3710380d5f0517dcaabe1b96b6401f68ec37 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 16 May 2023 03:26:53 +0000
Subject: [PATCH 1563/2340] drm/xe: Apply upper limit to sg element size

The iommu_dma_map_sg() function ensures iova allocation doesn't
cross dma segment boundary. It does so by padding some sg elements.
This can cause overflow, ending up with sg->length being set to 0.
Avoid this by halving the maximum segment size (rounded down to
PAGE_SIZE).

Specify maximum segment size for sg elements by using
sg_alloc_table_from_pages_segment() to allocate sg_table.

v2: Use correct max segment size in dma_set_max_seg_size() call

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c   |  8 +++++---
 drivers/gpu/drm/xe/xe_bo.h   | 24 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_mmio.c |  7 ++-----
 drivers/gpu/drm/xe/xe_vm.c   |  8 +++++---
 4 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4693372ec82e9..7a5118bf4dc02 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -251,9 +251,11 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
 	if (xe_tt->sg)
 		return 0;
 
-	ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages,
-					0, (u64)num_pages << PAGE_SHIFT,
-					GFP_KERNEL);
+	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
+						num_pages, 0,
+						(u64)num_pages << PAGE_SHIFT,
+						xe_sg_segment_size(xe_tt->dev),
+						GFP_KERNEL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 7e111332c35aa..2d08622f58a7d 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -296,6 +296,30 @@ void xe_bo_put_commit(struct llist_head *deferred);
 
 struct sg_table *xe_bo_get_sg(struct xe_bo *bo);
 
+/*
+ * xe_sg_segment_size() - Provides upper limit for sg segment size.
+ * @dev: device pointer
+ *
+ * Returns the maximum segment size for the 'struct scatterlist'
+ * elements.
+ */
+static inline unsigned int xe_sg_segment_size(struct device *dev)
+{
+	struct scatterlist __maybe_unused sg;
+	size_t max = BIT_ULL(sizeof(sg.length) * 8) - 1;
+
+	max = min_t(size_t, max, dma_max_mapping_size(dev));
+
+	/*
+	 * The iommu_dma_map_sg() function ensures iova allocation doesn't
+	 * cross dma segment boundary. It does so by padding some sg elements.
+	 * This can cause overflow, ending up with sg->length being set to 0.
+	 * Avoid this by ensuring maximum segment size is half of 'max'
+	 * rounded down to PAGE_SIZE.
+	 */
+	return round_down(max / 2, PAGE_SIZE);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 /**
  * xe_bo_is_mem_type - Whether the bo currently resides in the given
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index c7fbb1cc1f64a..4c270a07136ed 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -11,6 +11,7 @@
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
+#include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
@@ -26,11 +27,7 @@ static int xe_set_dma_info(struct xe_device *xe)
 	unsigned int mask_size = xe->info.dma_mask_size;
 	int err;
 
-	/*
-	 * We don't have a max segment size, so set it to the max so sg's
-	 * debugging layer doesn't complain
-	 */
-	dma_set_max_seg_size(xe->drm.dev, UINT_MAX);
+	dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
 
 	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 40295beea3a2e..25a61735aac81 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -117,9 +117,11 @@ mm_closed:
 	if (ret)
 		goto out;
 
-	ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned,
-					0, (u64)pinned << PAGE_SHIFT,
-					GFP_KERNEL);
+	ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
+						pinned, 0,
+						(u64)pinned << PAGE_SHIFT,
+						xe_sg_segment_size(xe->drm.dev),
+						GFP_KERNEL);
 	if (ret) {
 		vma->userptr.sg = NULL;
 		goto out;
-- 
GitLab


From e2bd81af05cb6dc9cbf7a367a48e43316207dd0e Mon Sep 17 00:00:00 2001
From: Christopher Snowhill <kode54@gmail.com>
Date: Wed, 24 May 2023 18:56:06 -0700
Subject: [PATCH 1564/2340] drm/xe: Add explicit padding to uAPI definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pad the uAPI definition so that it would align identically between
64-bit and 32-bit uarch, so consumers using this header will work
correctly from 32-bit compat userspace on a 64-bit kernel. Do it
in a minimally invasive way, so that 64-bit userspace will still
work with the previous header, and so that no fields suddenly
change sizes.

Originally inspired by mlankhorst.

Signed-off-by: Christopher Snowhill <kode54@gmail.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b3bcb71068506..34aff9e15fe6f 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -91,7 +91,7 @@ struct xe_user_extension {
 	 */
 	__u32 name;
 	/**
-	 * @flags: MBZ
+	 * @pad: MBZ
 	 *
 	 * All undefined bits must be zero.
 	 */
@@ -291,6 +291,9 @@ struct drm_xe_gem_create {
 	 */
 	__u32 handle;
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	/** @reserved: Reserved */
 	__u64 reserved[2];
 };
@@ -335,6 +338,9 @@ struct drm_xe_ext_vm_set_property {
 #define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
 	__u32 property;
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	/** @value: property value */
 	__u64 value;
 
@@ -379,6 +385,9 @@ struct drm_xe_vm_bind_op {
 	 */
 	__u32 obj;
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	union {
 		/**
 		 * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE,
@@ -469,6 +478,9 @@ struct drm_xe_vm_bind {
 	/** @num_binds: number of binds in this IOCTL */
 	__u32 num_binds;
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	union {
 		/** @bind: used if num_binds == 1 */
 		struct drm_xe_vm_bind_op bind;
@@ -482,6 +494,9 @@ struct drm_xe_vm_bind {
 	/** @num_syncs: amount of syncs to wait on */
 	__u32 num_syncs;
 
+	/** @pad2: MBZ */
+	__u32 pad2;
+
 	/** @syncs: pointer to struct drm_xe_sync array */
 	__u64 syncs;
 
@@ -497,6 +512,9 @@ struct drm_xe_ext_engine_set_property {
 	/** @property: property to set */
 	__u32 property;
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	/** @value: property value */
 	__u64 value;
 };
@@ -612,6 +630,9 @@ struct drm_xe_sync {
 #define DRM_XE_SYNC_USER_FENCE		0x3
 #define DRM_XE_SYNC_SIGNAL		0x10
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	union {
 		__u32 handle;
 		/**
@@ -656,6 +677,9 @@ struct drm_xe_exec {
 	 */
 	__u16 num_batch_buffer;
 
+	/** @pad: MBZ */
+	__u16 pad[3];
+
 	/** @reserved: Reserved */
 	__u64 reserved[2];
 };
@@ -718,6 +742,8 @@ struct drm_xe_wait_user_fence {
 #define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
 #define DRM_XE_UFENCE_WAIT_VM_ERROR	(1 << 2)
 	__u16 flags;
+	/** @pad: MBZ */
+	__u32 pad;
 	/** @value: compare value */
 	__u64 value;
 	/** @mask: comparison mask */
@@ -750,6 +776,9 @@ struct drm_xe_vm_madvise {
 	/** @vm_id: The ID VM in which the VMA exists */
 	__u32 vm_id;
 
+	/** @pad: MBZ */
+	__u32 pad;
+
 	/** @range: Number of bytes in the VMA */
 	__u64 range;
 
@@ -794,6 +823,9 @@ struct drm_xe_vm_madvise {
 	/** @property: property to set */
 	__u32 property;
 
+	/** @pad2: MBZ */
+	__u32 pad2;
+
 	/** @value: property value */
 	__u64 value;
 
-- 
GitLab


From 1799c761c48059366f081adeef718fa13d4bb133 Mon Sep 17 00:00:00 2001
From: Christopher Snowhill <kode54@gmail.com>
Date: Wed, 24 May 2023 18:56:07 -0700
Subject: [PATCH 1565/2340] drm/xe: Validate uAPI padding and reserved fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Padding and reserved fields are declared such that they must be
zeroed, so verify that they're all zero in the respective ioctl
functions.

Derived from original patch by mlankhorst.

v2:
	Removed extensions checks where there were none originally. (José)
	Moved extraneous parentheses to the correct places. (Lucas)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Christopher Snowhill <kode54@gmail.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c              |  6 ++++--
 drivers/gpu/drm/xe/xe_engine.c          | 18 ++++++++++++++----
 drivers/gpu/drm/xe/xe_exec.c            |  4 +++-
 drivers/gpu/drm/xe/xe_mmio.c            |  3 ++-
 drivers/gpu/drm/xe/xe_query.c           |  3 ++-
 drivers/gpu/drm/xe/xe_sync.c            |  4 +++-
 drivers/gpu/drm/xe/xe_vm.c              | 22 +++++++++++++++++++---
 drivers/gpu/drm/xe/xe_vm_madvise.c      |  4 +++-
 drivers/gpu/drm/xe/xe_wait_user_fence.c |  3 ++-
 9 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 7a5118bf4dc02..798b9938e534c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1646,7 +1646,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	u32 handle;
 	int err;
 
-	if (XE_IOCTL_ERR(xe, args->extensions))
+	if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, args->flags &
@@ -1716,7 +1717,8 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_gem_mmap_offset *args = data;
 	struct drm_gem_object *gem_obj;
 
-	if (XE_IOCTL_ERR(xe, args->extensions))
+	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, args->flags))
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 5666c8e00c978..4fca422e9e7bb 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -348,7 +348,8 @@ static int engine_user_ext_set_property(struct xe_device *xe,
 		return -EFAULT;
 
 	if (XE_IOCTL_ERR(xe, ext.property >=
-			 ARRAY_SIZE(engine_set_property_funcs)))
+			 ARRAY_SIZE(engine_set_property_funcs)) ||
+	    XE_IOCTL_ERR(xe, ext.pad))
 		return -EINVAL;
 
 	idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
@@ -380,7 +381,8 @@ static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
 	if (XE_IOCTL_ERR(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, ext.name >=
+	if (XE_IOCTL_ERR(xe, ext.pad) ||
+	    XE_IOCTL_ERR(xe, ext.name >=
 			 ARRAY_SIZE(engine_user_extension_funcs)))
 		return -EINVAL;
 
@@ -523,7 +525,8 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 	int len;
 	int err;
 
-	if (XE_IOCTL_ERR(xe, args->flags))
+	if (XE_IOCTL_ERR(xe, args->flags) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	len = args->width * args->num_placements;
@@ -639,6 +642,9 @@ int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_engine_get_property *args = data;
 	struct xe_engine *e;
 
+	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
 	mutex_lock(&xef->engine.lock);
 	e = xa_load(&xef->engine.xa, args->engine_id);
 	mutex_unlock(&xef->engine.lock);
@@ -718,7 +724,8 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_engine_destroy *args = data;
 	struct xe_engine *e;
 
-	if (XE_IOCTL_ERR(xe, args->pad))
+	if (XE_IOCTL_ERR(xe, args->pad) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	mutex_lock(&xef->engine.lock);
@@ -748,6 +755,9 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
 	int ret;
 	u32 idx;
 
+	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
 	e = xe_engine_lookup(xef, args->engine_id);
 	if (XE_IOCTL_ERR(xe, !e))
 		return -ENOENT;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 3db1b159586ee..e44076ee2e115 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -181,7 +181,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	bool write_locked;
 	int err = 0;
 
-	if (XE_IOCTL_ERR(xe, args->extensions))
+	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	engine = xe_engine_lookup(xef, args->engine_id);
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 4c270a07136ed..87dd417e3f08c 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -404,7 +404,8 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	bool allowed;
 	int ret = 0;
 
-	if (XE_IOCTL_ERR(xe, args->extensions))
+	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS))
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index dd64ff0d2a575..b10959fde43bf 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -374,7 +374,8 @@ int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_xe_device_query *query = data;
 	u32 idx;
 
-	if (XE_IOCTL_ERR(xe, query->extensions != 0))
+	if (XE_IOCTL_ERR(xe, query->extensions) ||
+	    XE_IOCTL_ERR(xe, query->reserved[0] || query->reserved[1]))
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 1e4e4acb2c4a2..5acb37a8b2ab1 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -111,7 +111,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		return -EFAULT;
 
 	if (XE_IOCTL_ERR(xe, sync_in.flags &
-			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)))
+			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)) ||
+	    XE_IOCTL_ERR(xe, sync_in.pad) ||
+	    XE_IOCTL_ERR(xe, sync_in.reserved[0] || sync_in.reserved[1]))
 		return -EINVAL;
 
 	signal = sync_in.flags & DRM_XE_SYNC_SIGNAL;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 25a61735aac81..ffa102870d1fd 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1856,7 +1856,9 @@ static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
 		return -EFAULT;
 
 	if (XE_IOCTL_ERR(xe, ext.property >=
-			 ARRAY_SIZE(vm_set_property_funcs)))
+			 ARRAY_SIZE(vm_set_property_funcs)) ||
+	    XE_IOCTL_ERR(xe, ext.pad) ||
+	    XE_IOCTL_ERR(xe, ext.reserved[0] || ext.reserved[1]))
 		return -EINVAL;
 
 	return vm_set_property_funcs[ext.property](xe, vm, ext.value);
@@ -1884,7 +1886,8 @@ static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
 	if (XE_IOCTL_ERR(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, ext.name >=
+	if (XE_IOCTL_ERR(xe, ext.pad) ||
+	    XE_IOCTL_ERR(xe, ext.name >=
 			 ARRAY_SIZE(vm_user_extension_funcs)))
 		return -EINVAL;
 
@@ -1915,6 +1918,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	int err;
 	u32 flags = 0;
 
+	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
 	if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
 		return -EINVAL;
 
@@ -1998,7 +2004,8 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_vm_destroy *args = data;
 	struct xe_vm *vm;
 
-	if (XE_IOCTL_ERR(xe, args->pad))
+	if (XE_IOCTL_ERR(xe, args->pad) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	vm = xe_vm_lookup(xef, args->vm_id);
@@ -2914,6 +2921,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 	int i;
 
 	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]) ||
 	    XE_IOCTL_ERR(xe, !args->num_binds) ||
 	    XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
 		return -EINVAL;
@@ -2946,6 +2955,13 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
 		u32 region = (*bind_ops)[i].region;
 
+		if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) ||
+		    XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] ||
+				     (*bind_ops)[i].reserved[1])) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
 		if (i == 0) {
 			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
 		} else if (XE_IOCTL_ERR(xe, !*async) ||
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 89a02c8e04247..6c196431a60eb 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -301,7 +301,9 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
 	struct xe_vma **vmas = NULL;
 	int num_vmas = 0, err = 0, idx;
 
-	if (XE_IOCTL_ERR(xe, args->extensions))
+	if (XE_IOCTL_ERR(xe, args->extensions) ||
+	    XE_IOCTL_ERR(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs)))
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 15c2e5aa08d2f..6c8a60c600871 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -100,7 +100,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
 	unsigned long timeout = args->timeout;
 
-	if (XE_IOCTL_ERR(xe, args->extensions))
+	if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) ||
+	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS))
-- 
GitLab


From 58e30342c75d38606e30e02ef125252b10829450 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 24 May 2023 12:26:35 -0700
Subject: [PATCH 1566/2340] drm/xe/pvc: Don't try to invalidate AuxCCS TLB

Generally !has_flatccs implies that a platform has AuxCCS compression
and thus needs to invalidate the AuxCCS TLB.  However PVC is a special
case because it has no compression of either type (FlatCCS or AuxCCS)
so we should avoid writing to non-existent AuxCCS registers.

Reviewed-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Link: https://lore.kernel.org/r/20230524192635.673293-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 06364bb2e95bd..a09ee8c736b53 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -190,6 +190,18 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
 
+static bool has_aux_ccs(struct xe_device *xe)
+{
+	/*
+	 * PVC is a special case that has no compression of either type
+	 * (FlatCCS or AuxCCS).
+	 */
+	if (xe->info.platform == XE_PVC)
+		return false;
+
+	return !xe->info.has_flat_ccs;
+}
+
 static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 				   u64 batch_addr, u32 seqno)
 {
@@ -202,7 +214,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	dw[i++] = preparser_disable(true);
 
 	/* hsdes: 1809175790 */
-	if (!xe->info.has_flat_ccs) {
+	if (has_aux_ccs(xe)) {
 		if (decode)
 			i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
 		else
@@ -248,7 +260,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	i = emit_pipe_invalidate(mask_flags, dw, i);
 
 	/* hsdes: 1809175790 */
-	if (!xe->info.has_flat_ccs)
+	if (has_aux_ccs(xe))
 		i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
 
 	dw[i++] = preparser_disable(false);
-- 
GitLab


From a9bd807eb16be11e11f6c6d3921119381cc43135 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:38 -0700
Subject: [PATCH 1567/2340] drm/xe: Fix Wa_22011802037 annotation

It was missing one digit, so not showing up as a proper WA number. Add
the missing number and annotate it with a FIXME as there are more to be
implemented to consider this WA done: ensure CS is stop before doing a
reset, wait for pending.

Also, this WA applies to platforms up to graphics version 1270 (with the
exception of MTL A*, that are not supported in xe). Fix platform check.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/284
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 71f18b32d09b6..a8e249205bffa 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -160,9 +160,11 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 		flags |= GUC_WA_DUAL_QUEUE;
 
 	/*
-	 * Wa_2201180203
+	 * Wa_22011802037: FIXME - there's more to be done than simply setting
+	 * this flag: make sure each CS is stopped when preparing for GT reset
+	 * and wait for pending MI_FW.
 	 */
-	if (GRAPHICS_VER(xe) <= 12)
+	if (GRAPHICS_VERx100(xe) < 1270)
 		flags |= GUC_WA_PRE_PARSER;
 
 	/* Wa_16011777198 */
-- 
GitLab


From 72906d340b60f3dae545deef77376a0f598bece7 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:39 -0700
Subject: [PATCH 1568/2340] drm/xe/rtp: Split rtp process initialization

The selection between hwe and gt is exposed to the outside of rtp, by
the xe_rtp_process() function. However it doesn't make seense from the
caller point of view to pass a hwe and a gt as argument since the gt
should always be the one containing the hwe.

This clarifies the interface by separating the context creation into an
initializer. The initializer then passes the correct value and there
should never be a case with hwe and gt set: when hwe is passed, the gt
is the one containing it. Internally the functions continue receiving
the argument separately.

v2: Leave the device-only context to a separate patch if they are indeed
    needed later

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  3 +-
 drivers/gpu/drm/xe/xe_hw_engine.c      |  8 ++--
 drivers/gpu/drm/xe/xe_reg_whitelist.c  |  4 +-
 drivers/gpu/drm/xe/xe_rtp.c            | 63 +++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_rtp.h            |  9 +++-
 drivers/gpu/drm/xe/xe_rtp_types.h      | 13 ++++++
 drivers/gpu/drm/xe/xe_tuning.c         |  8 +++-
 drivers/gpu/drm/xe/xe_wa.c             | 12 +++--
 8 files changed, 91 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 4b2aac5ccf28a..f96ef19877193 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -238,10 +238,11 @@ static void xe_rtp_process_tests(struct kunit *test)
 	struct xe_device *xe = test->priv;
 	struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr;
 	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]);
 	unsigned long idx, count = 0;
 
 	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
-	xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL);
+	xe_rtp_process(&ctx, param->entries, reg_sr);
 
 	xa_for_each(&reg_sr->xa, idx, sre) {
 		if (idx == param->expected_reg.addr)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 71ac4defb947d..25b96f40d5a78 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -281,6 +281,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 	const u8 mocs_read_idx = gt->mocs.uc_index;
 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 	const struct xe_rtp_entry lrc_was[] = {
 		/*
 		 * Some blitter commands do not have a field for MOCS, those
@@ -299,7 +300,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 		{}
 	};
 
-	xe_rtp_process(lrc_was, &hwe->reg_lrc, gt, hwe);
+	xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
 }
 
 static void
@@ -311,7 +312,8 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 	const u8 mocs_read_idx = gt->mocs.uc_index;
 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
 			        REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
-	const struct xe_rtp_entry engine_was[] = {
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+	const struct xe_rtp_entry engine_entries[] = {
 		/*
 		 * RING_CMD_CCTL specifies the default MOCS entry that will be
 		 * used by the command streamer when executing commands that
@@ -332,7 +334,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 		{}
 	};
 
-	xe_rtp_process(engine_was, &hwe->reg_sr, gt, hwe);
+	xe_rtp_process(&ctx, engine_entries, &hwe->reg_sr);
 }
 
 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 7a2bb60ebd850..98f678d744459 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -63,7 +63,9 @@ static const struct xe_rtp_entry register_whitelist[] = {
  */
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
 {
-	xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process(&ctx, register_whitelist, &hwe->reg_whitelist);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 0c6a23e14a715..5be25fe0e8e48 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -23,11 +23,11 @@
  * the values to the registers that have matching rules.
  */
 
-static bool rule_matches(struct xe_gt *gt,
+static bool rule_matches(const struct xe_device *xe,
+			 struct xe_gt *gt,
 			 struct xe_hw_engine *hwe,
 			 const struct xe_rtp_entry *entry)
 {
-	const struct xe_device *xe = gt_to_xe(gt);
 	const struct xe_rtp_rule *r;
 	unsigned int i;
 	bool match;
@@ -62,22 +62,27 @@ static bool rule_matches(struct xe_gt *gt,
 			match = xe->info.step.graphics >= r->step_start &&
 				xe->info.step.graphics < r->step_end;
 			break;
+		case XE_RTP_MATCH_INTEGRATED:
+			match = !xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_DISCRETE:
+			match = xe->info.is_dgfx;
+			break;
 		case XE_RTP_MATCH_ENGINE_CLASS:
+			if (drm_WARN_ON(&xe->drm, !hwe))
+				return false;
+
 			match = hwe->class == r->engine_class;
 			break;
 		case XE_RTP_MATCH_NOT_ENGINE_CLASS:
+			if (drm_WARN_ON(&xe->drm, !hwe))
+				return false;
+
 			match = hwe->class != r->engine_class;
 			break;
 		case XE_RTP_MATCH_FUNC:
 			match = r->match_func(gt, hwe);
 			break;
-		case XE_RTP_MATCH_INTEGRATED:
-			match = !xe->info.is_dgfx;
-			break;
-		case XE_RTP_MATCH_DISCRETE:
-			match = xe->info.is_dgfx;
-			break;
-
 		default:
 			XE_WARN_ON(r->match_type);
 		}
@@ -105,14 +110,15 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 	xe_reg_sr_add(sr, &sr_entry);
 }
 
-static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
+static void rtp_process_one(const struct xe_rtp_entry *entry,
+			    struct xe_device *xe, struct xe_gt *gt,
 			    struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
 {
 	const struct xe_rtp_action *action;
 	u32 mmio_base;
 	unsigned int i;
 
-	if (!rule_matches(gt, hwe, entry))
+	if (!rule_matches(xe, gt, hwe, entry))
 		return;
 
 	for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
@@ -126,23 +132,46 @@ static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt,
 	}
 }
 
+static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
+			    struct xe_hw_engine **hwe,
+			    struct xe_gt **gt,
+			    struct xe_device **xe)
+{
+	switch (ctx->type) {
+	case XE_RTP_PROCESS_TYPE_GT:
+		*hwe = NULL;
+		*gt = ctx->gt;
+		*xe = gt_to_xe(*gt);
+		break;
+	case XE_RTP_PROCESS_TYPE_ENGINE:
+		*hwe = ctx->hwe;
+		*gt = (*hwe)->gt;
+		*xe = gt_to_xe(*gt);
+		break;
+	};
+}
+
 /**
  * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
+ * @ctx: The context for processing the table, with one of device, gt or hwe
  * @entries: Table with RTP definitions
  * @sr: Where to add an entry to with the values for matching. This can be
  *      viewed as the "coalesced view" of multiple the tables. The bits for each
  *      register set are expected not to collide with previously added entries
- * @gt: The GT to be used for matching rules
- * @hwe: Engine instance to use for matching rules and as mmio base
  *
  * Walk the table pointed by @entries (with an empty sentinel) and add all
  * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
  * used to calculate the right register offset
  */
-void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
-		    struct xe_gt *gt, struct xe_hw_engine *hwe)
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries, struct xe_reg_sr *sr)
 {
 	const struct xe_rtp_entry *entry;
+	struct xe_hw_engine *hwe = NULL;
+	struct xe_gt *gt = NULL;
+	struct xe_device *xe = NULL;
+
+	rtp_get_context(ctx, &hwe, &gt, &xe);
 
 	for (entry = entries; entry && entry->name; entry++) {
 		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
@@ -150,9 +179,9 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
 			enum xe_hw_engine_id id;
 
 			for_each_hw_engine(each_hwe, gt, id)
-				rtp_process_one(entry, gt, each_hwe, sr);
+				rtp_process_one(entry, xe, gt, each_hwe, sr);
 		} else {
-			rtp_process_one(entry, gt, hwe, sr);
+			rtp_process_one(entry, xe, gt, hwe, sr);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 8a89ad45589a2..c4b718b9632ee 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -355,8 +355,13 @@ struct xe_reg_sr;
 		XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__))	\
 	}
 
-void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
-		    struct xe_gt *gt, struct xe_hw_engine *hwe);
+#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__),							\
+	struct xe_hw_engine *:	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
+	struct xe_gt *:		(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
+
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries,
+		    struct xe_reg_sr *sr);
 
 /* Match functions to be used with XE_RTP_MATCH_FUNC */
 
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 12df8a9e9c45f..5afacbd9083d8 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -95,4 +95,17 @@ struct xe_rtp_entry {
 	u8 flags;
 };
 
+enum xe_rtp_process_type {
+	XE_RTP_PROCESS_TYPE_GT,
+	XE_RTP_PROCESS_TYPE_ENGINE,
+};
+
+struct xe_rtp_process_ctx {
+	union {
+		struct xe_gt *gt;
+		struct xe_hw_engine *hwe;
+	};
+	enum xe_rtp_process_type type;
+};
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 5fc6a408429b2..c2810ede3a655 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -59,7 +59,9 @@ static const struct xe_rtp_entry lrc_tunings[] = {
 
 void xe_tuning_process_gt(struct xe_gt *gt)
 {
-	xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process(&ctx, gt_tunings, &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
 
@@ -73,5 +75,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
  */
 void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
 {
-	xe_rtp_process(lrc_tunings, &hwe->reg_lrc, hwe->gt, hwe);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process(&ctx, lrc_tunings, &hwe->reg_lrc);
 }
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index b0bb2f4438f43..4b236b6f4c8ea 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -579,7 +579,9 @@ __diag_pop();
  */
 void xe_wa_process_gt(struct xe_gt *gt)
 {
-	xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process(&ctx, gt_was, &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
 
@@ -593,7 +595,9 @@ EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
  */
 void xe_wa_process_engine(struct xe_hw_engine *hwe)
 {
-	xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process(&ctx, engine_was, &hwe->reg_sr);
 }
 
 /**
@@ -606,5 +610,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe)
  */
 void xe_wa_process_lrc(struct xe_hw_engine *hwe)
 {
-	xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
 }
-- 
GitLab


From cc982f0c168149def829f204b575fad546e9d043 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:40 -0700
Subject: [PATCH 1569/2340] drm/xe/rtp: Replace XE_WARN_ON

Now that rule_matches() always has an xe pointer, replace the XE_WARN_ON
with the more appropriate drm_warn().

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 5be25fe0e8e48..5dcdfe45f0cbd 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -84,7 +84,9 @@ static bool rule_matches(const struct xe_device *xe,
 			match = r->match_func(gt, hwe);
 			break;
 		default:
-			XE_WARN_ON(r->match_type);
+			drm_warn(&xe->drm, "Invalid RTP match %u\n",
+				 r->match_type);
+			match = false;
 		}
 
 		if (!match)
-- 
GitLab


From 91042671d9f3102c7e100d2e9275cae13eb63462 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:41 -0700
Subject: [PATCH 1570/2340] drm/xe/rtp: Add "_sr" to entry/function names

The xe_rtp_process() function and xe_rtp_entry depend on the
save-restore struct. In future it will be desired to process rtp rules,
regardless of adding them to a save-restore. Rename the struct and
function so the intent is clear and the name is freed for future uses.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_rtp_test.c | 22 +++++++++++-----------
 drivers/gpu/drm/xe/xe_hw_engine.c      |  8 ++++----
 drivers/gpu/drm/xe/xe_reg_whitelist.c  |  4 ++--
 drivers/gpu/drm/xe/xe_rtp.c            | 26 ++++++++++++++------------
 drivers/gpu/drm/xe/xe_rtp.h            | 20 ++++++++++----------
 drivers/gpu/drm/xe/xe_rtp_types.h      |  4 ++--
 drivers/gpu/drm/xe/xe_tuning.c         |  8 ++++----
 drivers/gpu/drm/xe/xe_wa.c             | 12 ++++++------
 8 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index f96ef19877193..ab6f7a47db502 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -36,7 +36,7 @@ struct rtp_test_case {
 	u32 expected_clr_bits;
 	unsigned long expected_count;
 	unsigned int expected_sr_errors;
-	const struct xe_rtp_entry *entries;
+	const struct xe_rtp_entry_sr *entries;
 };
 
 static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
@@ -57,7 +57,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0) | REG_BIT(1),
 		.expected_count = 1,
 		/* Different bits on the same register: create a single entry */
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -76,7 +76,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
 		/* Don't coalesce second entry since rules don't match */
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -95,7 +95,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
 		/* Don't coalesce second entry due to one of the rules */
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -114,7 +114,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 2,
 		/* Same bits on different registers are not coalesced */
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -133,7 +133,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(1) | REG_BIT(0),
 		.expected_count = 1,
 		/* Check clr vs set actions on different bits */
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -154,7 +154,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = TEMP_MASK,
 		.expected_count = 1,
 		/* Check FIELD_SET works */
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1,
@@ -172,7 +172,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
 		.expected_sr_errors = 1,
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -192,7 +192,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
 		.expected_sr_errors = 1,
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -212,7 +212,7 @@ static const struct rtp_test_case cases[] = {
 		.expected_clr_bits = REG_BIT(0),
 		.expected_count = 1,
 		.expected_sr_errors = 2,
-		.entries = (const struct xe_rtp_entry[]) {
+		.entries = (const struct xe_rtp_entry_sr[]) {
 			{ XE_RTP_NAME("basic-1"),
 			  XE_RTP_RULES(FUNC(match_yes)),
 			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
@@ -242,7 +242,7 @@ static void xe_rtp_process_tests(struct kunit *test)
 	unsigned long idx, count = 0;
 
 	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
-	xe_rtp_process(&ctx, param->entries, reg_sr);
+	xe_rtp_process_to_sr(&ctx, param->entries, reg_sr);
 
 	xa_for_each(&reg_sr->xa, idx, sre) {
 		if (idx == param->expected_reg.addr)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 25b96f40d5a78..7e4b0b465244d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -282,7 +282,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
-	const struct xe_rtp_entry lrc_was[] = {
+	const struct xe_rtp_entry_sr lrc_was[] = {
 		/*
 		 * Some blitter commands do not have a field for MOCS, those
 		 * commands will use MOCS index pointed by BLIT_CCTL.
@@ -300,7 +300,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 		{}
 	};
 
-	xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
 }
 
 static void
@@ -313,7 +313,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
 			        REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
-	const struct xe_rtp_entry engine_entries[] = {
+	const struct xe_rtp_entry_sr engine_entries[] = {
 		/*
 		 * RING_CMD_CCTL specifies the default MOCS entry that will be
 		 * used by the command streamer when executing commands that
@@ -334,7 +334,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 		{}
 	};
 
-	xe_rtp_process(&ctx, engine_entries, &hwe->reg_sr);
+	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
 }
 
 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 98f678d744459..70892f1347189 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -20,7 +20,7 @@ static bool match_not_render(const struct xe_gt *gt,
 	return hwe->class != XE_ENGINE_CLASS_RENDER;
 }
 
-static const struct xe_rtp_entry register_whitelist[] = {
+static const struct xe_rtp_entry_sr register_whitelist[] = {
 	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT,
@@ -65,7 +65,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
-	xe_rtp_process(&ctx, register_whitelist, &hwe->reg_whitelist);
+	xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 5dcdfe45f0cbd..0be1f4cfc4d58 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -26,7 +26,7 @@
 static bool rule_matches(const struct xe_device *xe,
 			 struct xe_gt *gt,
 			 struct xe_hw_engine *hwe,
-			 const struct xe_rtp_entry *entry)
+			 const struct xe_rtp_entry_sr *entry)
 {
 	const struct xe_rtp_rule *r;
 	unsigned int i;
@@ -112,9 +112,9 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 	xe_reg_sr_add(sr, &sr_entry);
 }
 
-static void rtp_process_one(const struct xe_rtp_entry *entry,
-			    struct xe_device *xe, struct xe_gt *gt,
-			    struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
+static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
+			       struct xe_device *xe, struct xe_gt *gt,
+			       struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
 {
 	const struct xe_rtp_action *action;
 	u32 mmio_base;
@@ -154,10 +154,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
 }
 
 /**
- * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
+ * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to
+ *                        the save-restore argument.
  * @ctx: The context for processing the table, with one of device, gt or hwe
  * @entries: Table with RTP definitions
- * @sr: Where to add an entry to with the values for matching. This can be
+ * @sr: Save-restore struct where matching rules execute the action. This can be
  *      viewed as the "coalesced view" of multiple the tables. The bits for each
  *      register set are expected not to collide with previously added entries
  *
@@ -165,10 +166,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
  * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
  * used to calculate the right register offset
  */
-void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
-		    const struct xe_rtp_entry *entries, struct xe_reg_sr *sr)
+void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
+			  const struct xe_rtp_entry_sr *entries,
+			  struct xe_reg_sr *sr)
 {
-	const struct xe_rtp_entry *entry;
+	const struct xe_rtp_entry_sr *entry;
 	struct xe_hw_engine *hwe = NULL;
 	struct xe_gt *gt = NULL;
 	struct xe_device *xe = NULL;
@@ -181,13 +183,13 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
 			enum xe_hw_engine_id id;
 
 			for_each_hw_engine(each_hwe, gt, id)
-				rtp_process_one(entry, xe, gt, each_hwe, sr);
+				rtp_process_one_sr(entry, xe, gt, each_hwe, sr);
 		} else {
-			rtp_process_one(entry, xe, gt, hwe, sr);
+			rtp_process_one_sr(entry, xe, gt, hwe, sr);
 		}
 	}
 }
-EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process);
+EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
 
 bool xe_rtp_match_even_instance(const struct xe_gt *gt,
 				const struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index c4b718b9632ee..179b497186d28 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -261,7 +261,7 @@ struct xe_reg_sr;
 #define XE_RTP_NAME(s_)	.name = (s_)
 
 /**
- * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry
+ * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry_sr
  * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix
  *
  * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags
@@ -269,7 +269,7 @@ struct xe_reg_sr;
  *
  * .. code-block:: c
  *
- *	const struct xe_rtp_entry wa_entries[] = {
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
  *		...
  *		{ XE_RTP_NAME("test-entry"),
  *		  ...
@@ -291,7 +291,7 @@ struct xe_reg_sr;
  *
  * .. code-block:: c
  *
- *	const struct xe_rtp_entry wa_entries[] = {
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
  *		...
  *		{ XE_RTP_NAME("test-entry"),
  *		  ...
@@ -305,7 +305,7 @@ struct xe_reg_sr;
 	.flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__)))
 
 /**
- * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
+ * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry
  * @...: Rules
  *
  * At least one rule is needed and up to 4 are supported. Multiple rules are
@@ -314,7 +314,7 @@ struct xe_reg_sr;
  *
  * .. code-block:: c
  *
- *	const struct xe_rtp_entry wa_entries[] = {
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
  *		...
  *		{ XE_RTP_NAME("test-entry"),
  *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
@@ -330,7 +330,7 @@ struct xe_reg_sr;
 	}
 
 /**
- * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry
+ * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr
  * @...: Actions to be taken
  *
  * At least one rule is needed and up to 4 are supported. Multiple rules are
@@ -339,7 +339,7 @@ struct xe_reg_sr;
  *
  * .. code-block:: c
  *
- *	const struct xe_rtp_entry wa_entries[] = {
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
  *		...
  *		{ XE_RTP_NAME("test-entry"),
  *		  XE_RTP_RULES(...),
@@ -359,9 +359,9 @@ struct xe_reg_sr;
 	struct xe_hw_engine *:	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
 	struct xe_gt *:		(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
 
-void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
-		    const struct xe_rtp_entry *entries,
-		    struct xe_reg_sr *sr);
+void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
+			  const struct xe_rtp_entry_sr *entries,
+			  struct xe_reg_sr *sr);
 
 /* Match functions to be used with XE_RTP_MATCH_FUNC */
 
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 5afacbd9083d8..66d1cb0aaa081 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -84,8 +84,8 @@ struct xe_rtp_rule {
 	};
 };
 
-/** struct xe_rtp_entry - Entry in an rtp table */
-struct xe_rtp_entry {
+/** struct xe_rtp_entry_sr - Entry in an rtp table */
+struct xe_rtp_entry_sr {
 	const char *name;
 	const struct xe_rtp_action *actions;
 	const struct xe_rtp_rule *rules;
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index c2810ede3a655..412e59de98424 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -15,7 +15,7 @@
 #undef XE_REG_MCR
 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
 
-static const struct xe_rtp_entry gt_tunings[] = {
+static const struct xe_rtp_entry_sr gt_tunings[] = {
 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
@@ -27,7 +27,7 @@ static const struct xe_rtp_entry gt_tunings[] = {
 	{}
 };
 
-static const struct xe_rtp_entry lrc_tunings[] = {
+static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  /* read verification is ignored due to 1608008084. */
@@ -61,7 +61,7 @@ void xe_tuning_process_gt(struct xe_gt *gt)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
 
-	xe_rtp_process(&ctx, gt_tunings, &gt->reg_sr);
+	xe_rtp_process_to_sr(&ctx, gt_tunings, &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
 
@@ -77,5 +77,5 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
-	xe_rtp_process(&ctx, lrc_tunings, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc);
 }
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 4b236b6f4c8ea..557e90d79f0bf 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -93,7 +93,7 @@
 __diag_push();
 __diag_ignore_all("-Woverride-init", "Allow field overrides in table");
 
-static const struct xe_rtp_entry gt_was[] = {
+static const struct xe_rtp_entry_sr gt_was[] = {
 	{ XE_RTP_NAME("14011060649"),
 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
 		       ENGINE_CLASS(VIDEO_DECODE),
@@ -235,7 +235,7 @@ static const struct xe_rtp_entry gt_was[] = {
 	{}
 };
 
-static const struct xe_rtp_entry engine_was[] = {
+static const struct xe_rtp_entry_sr engine_was[] = {
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(FF_THREAD_MODE,
@@ -490,7 +490,7 @@ static const struct xe_rtp_entry engine_was[] = {
 	{}
 };
 
-static const struct xe_rtp_entry lrc_was[] = {
+static const struct xe_rtp_entry_sr lrc_was[] = {
 	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3,
@@ -581,7 +581,7 @@ void xe_wa_process_gt(struct xe_gt *gt)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
 
-	xe_rtp_process(&ctx, gt_was, &gt->reg_sr);
+	xe_rtp_process_to_sr(&ctx, gt_was, &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
 
@@ -597,7 +597,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
-	xe_rtp_process(&ctx, engine_was, &hwe->reg_sr);
+	xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr);
 }
 
 /**
@@ -612,5 +612,5 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
-	xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
 }
-- 
GitLab


From cefeb7634136b7273dff7fe20cedc95e01e51209 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:42 -0700
Subject: [PATCH 1571/2340] drm/xe/rtp: Allow to track active workarounds

Add the metadata in struct xe_rtp_process_ctx, to be set by
xe_rtp_process_ctx_enable_active_tracking(), so rtp knows how to mark
the active entries while processing the table. This can be used by the
WA infra to record what are the active workarounds.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-6-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c       | 48 ++++++++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_rtp.h       |  4 +++
 drivers/gpu/drm/xe/xe_rtp_types.h |  2 ++
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 0be1f4cfc4d58..29cf92f9b7b38 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -112,7 +112,7 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 	xe_reg_sr_add(sr, &sr_entry);
 }
 
-static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
+static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
 			       struct xe_device *xe, struct xe_gt *gt,
 			       struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
 {
@@ -121,7 +121,7 @@ static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
 	unsigned int i;
 
 	if (!rule_matches(xe, gt, hwe, entry))
-		return;
+		return false;
 
 	for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
 		if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
@@ -132,6 +132,8 @@ static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
 
 		rtp_add_sr_entry(action, gt, mmio_base, sr);
 	}
+
+	return true;
 }
 
 static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
@@ -153,6 +155,38 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
 	};
 }
 
+/**
+ * xe_rtp_process_ctx_enable_active_tracking - Enable tracking of active entries
+ *
+ * Set additional metadata to track what entries are considered "active", i.e.
+ * their rules match the condition. Bits are never cleared: entries with
+ * matching rules set the corresponding bit in the bitmap.
+ *
+ * @ctx: The context for processing the table
+ * @active_entries: bitmap to store the active entries
+ * @n_entries: number of entries to be processed
+ */
+void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
+					       unsigned long *active_entries,
+					       size_t n_entries)
+{
+	ctx->active_entries = active_entries;
+	ctx->n_entries = n_entries;
+}
+
+static void rtp_mark_active(struct xe_device *xe,
+			    struct xe_rtp_process_ctx *ctx,
+			    unsigned int bit)
+{
+	if (!ctx->active_entries)
+		return;
+
+	if (drm_WARN_ON(&xe->drm, bit > ctx->n_entries))
+		return;
+
+	bitmap_set(ctx->active_entries, bit, 1);
+}
+
 /**
  * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to
  *                        the save-restore argument.
@@ -178,15 +212,21 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 	rtp_get_context(ctx, &hwe, &gt, &xe);
 
 	for (entry = entries; entry && entry->name; entry++) {
+		bool match = false;
+
 		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
 			struct xe_hw_engine *each_hwe;
 			enum xe_hw_engine_id id;
 
 			for_each_hw_engine(each_hwe, gt, id)
-				rtp_process_one_sr(entry, xe, gt, each_hwe, sr);
+				match |= rtp_process_one_sr(entry, xe, gt,
+							    each_hwe, sr);
 		} else {
-			rtp_process_one_sr(entry, xe, gt, hwe, sr);
+			match = rtp_process_one_sr(entry, xe, gt, hwe, sr);
 		}
+
+		if (match)
+			rtp_mark_active(xe, ctx, entry - entries);
 	}
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 179b497186d28..e69f514ee6c45 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -359,6 +359,10 @@ struct xe_reg_sr;
 	struct xe_hw_engine *:	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
 	struct xe_gt *:		(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
 
+void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
+					       unsigned long *active_entries,
+					       size_t n_entries);
+
 void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 			  const struct xe_rtp_entry_sr *entries,
 			  struct xe_reg_sr *sr);
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 66d1cb0aaa081..03d97b666c64c 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -106,6 +106,8 @@ struct xe_rtp_process_ctx {
 		struct xe_hw_engine *hwe;
 	};
 	enum xe_rtp_process_type type;
+	unsigned long *active_entries;
+	size_t n_entries;
 };
 
 #endif
-- 
GitLab


From 49d329a0824df79bb04d720ccdc9dbc257ec7e6b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:43 -0700
Subject: [PATCH 1572/2340] drm/xe/wa: Track gt/engine/lrc active workarounds

Allocate the data to track workarounds on each gt of the device,
and pass that to RTP so the active workarounds are tracked.

Even if the workarounds available until now are mostly device
or platform centric, with the different IP versions for media and
graphics starting with MTL, it's possible that some workarounds
need to be applied only on select GTs. Also, given the workaround
database is per IP block, for tracking purposes there is no need to
differentiate the workarounds per engine class. Hence the bitmask
to track active workarounds can be tracked per GT.

v2: Move the tracking from per-device to per-GT basis (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-7-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c       |  5 +++++
 drivers/gpu/drm/xe/xe_gt_types.h | 10 +++++++++
 drivers/gpu/drm/xe/xe_wa.c       | 37 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_wa.h       |  1 +
 4 files changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 80d42c7c7cfa9..d139554316d4e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -315,6 +315,11 @@ int xe_gt_init_early(struct xe_gt *gt)
 		return err;
 
 	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
+
+	err = xe_wa_init(gt);
+	if (err)
+		return err;
+
 	xe_wa_process_gt(gt);
 	xe_tuning_process_gt(gt);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7c47d67aa8be8..017ab60f2498e 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -359,6 +359,16 @@ struct xe_gt {
 	 *    of a steered operation
 	 */
 	spinlock_t mcr_lock;
+
+	/** @wa_active: keep track of active workarounds */
+	struct {
+		/** @gt: bitmap with active GT workarounds */
+		unsigned long *gt;
+		/** @engine: bitmap with active engine workarounds */
+		unsigned long *engine;
+		/** @lrc: bitmap with active LRC workarounds */
+		unsigned long *lrc;
+	} wa_active;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 557e90d79f0bf..665714abc5f00 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -5,6 +5,7 @@
 
 #include "xe_wa.h"
 
+#include <drm/drm_managed.h>
 #include <kunit/visibility.h>
 #include <linux/compiler_types.h>
 
@@ -581,6 +582,8 @@ void xe_wa_process_gt(struct xe_gt *gt)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
 
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt,
+						  ARRAY_SIZE(gt_was));
 	xe_rtp_process_to_sr(&ctx, gt_was, &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
@@ -597,6 +600,8 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine,
+						  ARRAY_SIZE(engine_was));
 	xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr);
 }
 
@@ -612,5 +617,37 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc,
+						  ARRAY_SIZE(lrc_was));
 	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
 }
+
+/**
+ * xe_wa_init - initialize gt with workaround bookkeeping
+ * @gt: GT instance to initialize
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_wa_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	size_t n_lrc, n_engine, n_gt, total;
+	unsigned long *p;
+
+	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was));
+	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was));
+	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was));
+	total = n_gt + n_engine + n_lrc;
+
+	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	gt->wa_active.gt = p;
+	p += n_gt;
+	gt->wa_active.engine = p;
+	p += n_engine;
+	gt->wa_active.lrc = p;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index cd2307d587950..eae05bcecc68e 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -9,6 +9,7 @@
 struct xe_gt;
 struct xe_hw_engine;
 
+int xe_wa_init(struct xe_gt *gt);
 void xe_wa_process_gt(struct xe_gt *gt);
 void xe_wa_process_engine(struct xe_hw_engine *hwe);
 void xe_wa_process_lrc(struct xe_hw_engine *hwe);
-- 
GitLab


From 40a627cafe02d44d24fa800b1d93c5d17b4649a5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:44 -0700
Subject: [PATCH 1573/2340] drm/xe/debugfs: Dump active workarounds

Add a "workarounds" node in debugfs that can dump all the active
workarounds using the information recorded by rtp infra when those
workarounds were processed.

v2: move workarounds to be reported per-GT

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-8-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_wa.c         | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_wa.h         |  2 ++
 3 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 8bf441e850a00..339ecd5fad9b8 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -19,6 +19,7 @@
 #include "xe_reg_sr.h"
 #include "xe_reg_whitelist.h"
 #include "xe_uc_debugfs.h"
+#include "xe_wa.h"
 
 static struct xe_gt *node_to_gt(struct drm_info_node *node)
 {
@@ -127,6 +128,16 @@ static int register_save_restore(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int workarounds(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_wa_dump(gt, &p);
+
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
@@ -135,6 +146,7 @@ static const struct drm_info_list debugfs_list[] = {
 	{"steering", steering, 0},
 	{"ggtt", ggtt, 0},
 	{"register-save-restore", register_save_restore, 0},
+	{"workarounds", workarounds, 0},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 665714abc5f00..910579453316a 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -651,3 +651,20 @@ int xe_wa_init(struct xe_gt *gt)
 
 	return 0;
 }
+
+void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	size_t idx;
+
+	drm_printf(p, "GT Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was))
+		drm_printf_indent(p, 1, "%s\n", gt_was[idx].name);
+
+	drm_printf(p, "\nEngine Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was))
+		drm_printf_indent(p, 1, "%s\n", engine_was[idx].name);
+
+	drm_printf(p, "\nLRC Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was))
+		drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index eae05bcecc68e..defefa5d9611a 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -6,6 +6,7 @@
 #ifndef _XE_WA_
 #define _XE_WA_
 
+struct drm_printer;
 struct xe_gt;
 struct xe_hw_engine;
 
@@ -15,5 +16,6 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe);
 void xe_wa_process_lrc(struct xe_hw_engine *hwe);
 
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
 
 #endif
-- 
GitLab


From 00a5912c020df0bd4b752db714cb7256a83c0701 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:45 -0700
Subject: [PATCH 1574/2340] drm/xe/rtp: Rename STEP to GRAPHICS_STEP

Rename the RTP match in order to prepare the code base to check for the
media version. Up until MTL, the graphics vs media distinction wrt to
stepping was not ver relevant as they were the same GT. However, with
MTL this is no longer true.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c       |  2 +-
 drivers/gpu/drm/xe/xe_rtp.h       | 16 ++++----
 drivers/gpu/drm/xe/xe_rtp_types.h |  2 +-
 drivers/gpu/drm/xe/xe_wa.c        | 64 ++++++++++++++++++-------------
 4 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 29cf92f9b7b38..2ac7b47942fe2 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -57,7 +57,7 @@ static bool rule_matches(const struct xe_device *xe,
 			match = xe->info.media_verx100 >= r->ver_start &&
 				xe->info.media_verx100 <= r->ver_end;
 			break;
-		case XE_RTP_MATCH_STEP:
+		case XE_RTP_MATCH_GRAPHICS_STEP:
 			/* TODO: match media/display */
 			match = xe->info.step.graphics >= r->step_start &&
 				xe->info.step.graphics < r->step_end;
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index e69f514ee6c45..7ba9d2ecab926 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -35,8 +35,8 @@ struct xe_reg_sr;
 	{ .match_type = XE_RTP_MATCH_SUBPLATFORM,				\
 	  .platform = plat__, .subplatform = sub__ }
 
-#define _XE_RTP_RULE_STEP(start__, end__)					\
-	{ .match_type = XE_RTP_MATCH_STEP,					\
+#define _XE_RTP_RULE_GRAPHICS_STEP(start__, end__)				\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_STEP,				\
 	  .step_start = start__, .step_end = end__ }
 
 #define _XE_RTP_RULE_ENGINE_CLASS(cls__)					\
@@ -63,17 +63,17 @@ struct xe_reg_sr;
 	_XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
 
 /**
- * XE_RTP_RULE_STEP - Create rule matching platform stepping
+ * XE_RTP_RULE_GRAPHICS_STEP - Create rule matching graphics stepping
  * @start_: First stepping matching the rule
  * @end_: First stepping that does not match the rule
  *
- * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on
- * the left, exclusive on the right.
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
  *
  * Refer to XE_RTP_RULES() for expected usage.
  */
-#define XE_RTP_RULE_STEP(start_, end_)						\
-	_XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_)
+#define XE_RTP_RULE_GRAPHICS_STEP(start_, end_)					\
+	_XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_)
 
 /**
  * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
@@ -317,7 +317,7 @@ struct xe_reg_sr;
  *	const struct xe_rtp_entry_sr wa_entries[] = {
  *		...
  *		{ XE_RTP_NAME("test-entry"),
- *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
  *		  ...
  *		},
  *		...
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 03d97b666c64c..52adbf7de752e 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -39,11 +39,11 @@ enum {
 	XE_RTP_MATCH_SUBPLATFORM,
 	XE_RTP_MATCH_GRAPHICS_VERSION,
 	XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
+	XE_RTP_MATCH_GRAPHICS_STEP,
 	XE_RTP_MATCH_MEDIA_VERSION,
 	XE_RTP_MATCH_MEDIA_VERSION_RANGE,
 	XE_RTP_MATCH_INTEGRATED,
 	XE_RTP_MATCH_DISCRETE,
-	XE_RTP_MATCH_STEP,
 	XE_RTP_MATCH_ENGINE_CLASS,
 	XE_RTP_MATCH_NOT_ENGINE_CLASS,
 	XE_RTP_MATCH_FUNC,
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 910579453316a..d703dc0f7b21c 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -122,7 +122,7 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 
 	{ XE_RTP_NAME("16010515920"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
-		       STEP(A0, B0),
+		       GRAPHICS_STEP(A0, B0),
 		       ENGINE_CLASS(VIDEO_DECODE)),
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
@@ -136,27 +136,27 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14012362059"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
 	},
 	{ XE_RTP_NAME("14012362059"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
 	},
 	{ XE_RTP_NAME("14010948348"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011037102"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011371254"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011431319"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
 			     GAMTLBOACS_CLKGATE_DIS |
 			     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
@@ -176,15 +176,15 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 			     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14010569222"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14011028019"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
 	},
 	{ XE_RTP_NAME("14010680813"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
 			     CONTROL_BLOCK_CLKGATE_DIS |
 			     EGRESS_BLOCK_CLKGATE_DIS |
@@ -316,7 +316,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     POLYGON_TRIFAN_LINELOOP_DISABLE))
 	},
 	{ XE_RTP_NAME("22012826095, 22013059131"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
 				   MAXREQS_PER_BANK,
@@ -330,7 +330,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
 	},
 	{ XE_RTP_NAME("22013059131"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
 	},
@@ -368,7 +368,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
 	},
 	{ XE_RTP_NAME("22012654132"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
 			     /*
@@ -396,21 +396,25 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
 	},
 	{ XE_RTP_NAME("14013392000"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
 	},
 	{ XE_RTP_NAME("14012419201"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
 			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
 	},
 	{ XE_RTP_NAME("14012419201"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
 			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
 	},
 	{ XE_RTP_NAME("1308578152"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       ENGINE_CLASS(RENDER),
 		       FUNC(xe_rtp_match_first_gslice_fused_off)),
 	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1,
 			     REPLAY_MODE_GRANULARITY))
@@ -426,30 +430,35 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
 	},
 	{ XE_RTP_NAME("22010430635"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
 			     DISABLE_GRF_CLEAR))
 	},
 	{ XE_RTP_NAME("14013202645"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
 	},
 	{ XE_RTP_NAME("14013202645"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
 	},
 	{ XE_RTP_NAME("22012532006"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
 			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
 	},
 	{ XE_RTP_NAME("22012532006"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
 			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
 	},
 	{ XE_RTP_NAME("22014600077"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(B0, FOREVER),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(B0, FOREVER),
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
 			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
@@ -485,7 +494,8 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE))
 	},
 	{ XE_RTP_NAME("14014999345"),
-	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), STEP(B0, C0)),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE),
+		       GRAPHICS_STEP(B0, C0)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC))
 	},
 	{}
@@ -528,17 +538,17 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	/* DG2 */
 
 	{ XE_RTP_NAME("16011186671"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
 			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
 	},
 	{ XE_RTP_NAME("14010469329"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
 			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
 	},
 	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
 			     DISABLE_CPS_AWARE_COLOR_PIPE))
 	},
-- 
GitLab


From ed73d03c0803bdb70d7e56c7d8a2518fb9376047 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:46 -0700
Subject: [PATCH 1575/2340] drm/xe/rtp: Add check for media stepping

Start differentiating the media and graphics stepping as it will be
important for MTL. Note that RTP is still not prepared to handle the
different types of GT, i.e. checking for graphics version/range/stepping
on a media gt or vice versa still matches regardless of the gt being
passed as parameter. Changing it to accommodate MTL is left for a future
patch.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-10-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c       | 12 +++++++-----
 drivers/gpu/drm/xe/xe_rtp.h       | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_rtp_types.h |  1 +
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 2ac7b47942fe2..70769852a93da 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -43,13 +43,16 @@ static bool rule_matches(const struct xe_device *xe,
 				xe->info.subplatform == r->subplatform;
 			break;
 		case XE_RTP_MATCH_GRAPHICS_VERSION:
-			/* TODO: match display */
 			match = xe->info.graphics_verx100 == r->ver_start;
 			break;
 		case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
 			match = xe->info.graphics_verx100 >= r->ver_start &&
 				xe->info.graphics_verx100 <= r->ver_end;
 			break;
+		case XE_RTP_MATCH_GRAPHICS_STEP:
+			match = xe->info.step.graphics >= r->step_start &&
+				xe->info.step.graphics < r->step_end;
+			break;
 		case XE_RTP_MATCH_MEDIA_VERSION:
 			match = xe->info.media_verx100 == r->ver_start;
 			break;
@@ -57,10 +60,9 @@ static bool rule_matches(const struct xe_device *xe,
 			match = xe->info.media_verx100 >= r->ver_start &&
 				xe->info.media_verx100 <= r->ver_end;
 			break;
-		case XE_RTP_MATCH_GRAPHICS_STEP:
-			/* TODO: match media/display */
-			match = xe->info.step.graphics >= r->step_start &&
-				xe->info.step.graphics < r->step_end;
+		case XE_RTP_MATCH_MEDIA_STEP:
+			match = xe->info.step.media >= r->step_start &&
+				xe->info.step.media < r->step_end;
 			break;
 		case XE_RTP_MATCH_INTEGRATED:
 			match = !xe->info.is_dgfx;
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 7ba9d2ecab926..d55701d2f39b4 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -39,6 +39,10 @@ struct xe_reg_sr;
 	{ .match_type = XE_RTP_MATCH_GRAPHICS_STEP,				\
 	  .step_start = start__, .step_end = end__ }
 
+#define _XE_RTP_RULE_MEDIA_STEP(start__, end__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_STEP,				\
+	  .step_start = start__, .step_end = end__ }
+
 #define _XE_RTP_RULE_ENGINE_CLASS(cls__)					\
 	{ .match_type = XE_RTP_MATCH_ENGINE_CLASS,				\
 	  .engine_class = (cls__) }
@@ -75,6 +79,19 @@ struct xe_reg_sr;
 #define XE_RTP_RULE_GRAPHICS_STEP(start_, end_)					\
 	_XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_)
 
+/**
+ * XE_RTP_RULE_MEDIA_STEP - Create rule matching media stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_STEP(start_, end_)					\
+	_XE_RTP_RULE_MEDIA_STEP(STEP_##start_, STEP_##end_)
+
 /**
  * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
  * @cls_: Engine class to match
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 52adbf7de752e..af49cbf98407e 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -42,6 +42,7 @@ enum {
 	XE_RTP_MATCH_GRAPHICS_STEP,
 	XE_RTP_MATCH_MEDIA_VERSION,
 	XE_RTP_MATCH_MEDIA_VERSION_RANGE,
+	XE_RTP_MATCH_MEDIA_STEP,
 	XE_RTP_MATCH_INTEGRATED,
 	XE_RTP_MATCH_DISCRETE,
 	XE_RTP_MATCH_ENGINE_CLASS,
-- 
GitLab


From fe19328b900cc2c92054259e16d99023111c57f3 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:47 -0700
Subject: [PATCH 1576/2340] drm/xe/rtp: Add support for entries with no action

Add a separate struct to hold entries in a table that has no action
associated with each of them. The goal is that the caller in future can
set a per-context callback, or just use the active entry marking
feature.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-11-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c       | 65 +++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_rtp.h       |  3 ++
 drivers/gpu/drm/xe/xe_rtp_types.h |  7 ++++
 3 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 70769852a93da..ebcfb04c391a0 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -26,14 +26,14 @@
 static bool rule_matches(const struct xe_device *xe,
 			 struct xe_gt *gt,
 			 struct xe_hw_engine *hwe,
-			 const struct xe_rtp_entry_sr *entry)
+			 const struct xe_rtp_rule *rules,
+			 unsigned int n_rules)
 {
 	const struct xe_rtp_rule *r;
 	unsigned int i;
 	bool match;
 
-	for (r = entry->rules, i = 0; i < entry->n_rules;
-	     r = &entry->rules[++i]) {
+	for (r = rules, i = 0; i < n_rules; r = &rules[++i]) {
 		switch (r->match_type) {
 		case XE_RTP_MATCH_PLATFORM:
 			match = xe->info.platform == r->platform;
@@ -122,7 +122,7 @@ static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
 	u32 mmio_base;
 	unsigned int i;
 
-	if (!rule_matches(xe, gt, hwe, entry))
+	if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
 		return false;
 
 	for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
@@ -178,15 +178,18 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
 
 static void rtp_mark_active(struct xe_device *xe,
 			    struct xe_rtp_process_ctx *ctx,
-			    unsigned int bit)
+			    unsigned int first, unsigned int last)
 {
 	if (!ctx->active_entries)
 		return;
 
-	if (drm_WARN_ON(&xe->drm, bit > ctx->n_entries))
+	if (drm_WARN_ON(&xe->drm, last > ctx->n_entries))
 		return;
 
-	bitmap_set(ctx->active_entries, bit, 1);
+	if (first == last)
+		bitmap_set(ctx->active_entries, first, 1);
+	else
+		bitmap_set(ctx->active_entries, first, last - first + 2);
 }
 
 /**
@@ -228,11 +231,57 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 		}
 
 		if (match)
-			rtp_mark_active(xe, ctx, entry - entries);
+			rtp_mark_active(xe, ctx, entry - entries,
+					entry - entries);
 	}
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
 
+/**
+ * xe_rtp_process - Process all rtp @entries, without running any action
+ * @ctx: The context for processing the table, with one of device, gt or hwe
+ * @entries: Table with RTP definitions
+ *
+ * Walk the table pointed by @entries (with an empty sentinel), executing the
+ * rules. A few differences from xe_rtp_process_to_sr():
+ *
+ * 1. There is no action associated with each entry since this uses
+ *    struct xe_rtp_entry. Its main use is for marking active workarounds via
+ *    xe_rtp_process_ctx_enable_active_tracking().
+ * 2. There is support for OR operations by having entries with no name.
+ */
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries)
+{
+	const struct xe_rtp_entry *entry, *first_entry;
+	struct xe_hw_engine *hwe;
+	struct xe_gt *gt;
+	struct xe_device *xe;
+
+	rtp_get_context(ctx, &hwe, &gt, &xe);
+
+	first_entry = entries;
+	if (drm_WARN_ON(&xe->drm, !first_entry->name))
+		return;
+
+	for (entry = entries; entry && entry->rules; entry++) {
+		if (entry->name)
+			first_entry = entry;
+
+		if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
+			continue;
+
+		/* Fast-forward entry, eliminating the OR'ed entries */
+		for (entry++; entry && entry->rules; entry++)
+			if (entry->name)
+				break;
+		entry--;
+
+		rtp_mark_active(xe, ctx, first_entry - entries,
+				entry - entries);
+	}
+}
+
 bool xe_rtp_match_even_instance(const struct xe_gt *gt,
 				const struct xe_hw_engine *hwe)
 {
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index d55701d2f39b4..8581bd9b14269 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -384,6 +384,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 			  const struct xe_rtp_entry_sr *entries,
 			  struct xe_reg_sr *sr);
 
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries);
+
 /* Match functions to be used with XE_RTP_MATCH_FUNC */
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index af49cbf98407e..d170532a98a59 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -96,6 +96,13 @@ struct xe_rtp_entry_sr {
 	u8 flags;
 };
 
+/** struct xe_rtp_entry - Entry in an rtp table, with no action associated */
+struct xe_rtp_entry {
+	const char *name;
+	const struct xe_rtp_rule *rules;
+	u8 n_rules;
+};
+
 enum xe_rtp_process_type {
 	XE_RTP_PROCESS_TYPE_GT,
 	XE_RTP_PROCESS_TYPE_ENGINE,
-- 
GitLab


From 464f2243c1fb139d8200e96648131197bf50fb27 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:48 -0700
Subject: [PATCH 1577/2340] drm/xe: Include build directory

When doing out-of-tree builds with O= or KBUILD_OUTPUT=, it's important
to also add the directory where the target is saved. Otherwise any file
generated by the build system may not be available for other targets
depending on it.

The $(obj) is added automatically when building the entire kernel,
but it's not added when M=drivers/gpu/drm/xe is added.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-12-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 5d277d060eba9..db88c9d845693 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -27,7 +27,7 @@ subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
 # Fine grained warnings disable
 CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init)
 
-subdir-ccflags-y += -I$(srctree)/$(src)
+subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
 
 # Please keep these build lists sorted!
 
-- 
GitLab


From 9616e74b796c752ec29c3c83f3e33277d2b25b8e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:49 -0700
Subject: [PATCH 1578/2340] drm/xe: Add support for OOB workarounds

There are WAs that, due to their nature, cannot be applied from a
central place like xe_wa.c. Those are peppered around the rest of the
code, as needed. Now they have a new name:  "out-of-band workarounds".

These workarounds have their names and rules still grouped in xe_wa.c,
inside the xe_wa_oob array, which is generated at compile time by
xe_wa_oob.rules and the hostprog xe_gen_wa_oob. The code generation
guarantees that the header xe_wa_oob.h contains the IDs for the
workarounds that match the index in the table. This way the runtime
checks that are spread throughout the code are simple tests against the
bitmap saved during initialization.

v2: Fix prev_name tracking not working when it's empty, i.e. when there
    is more than 1 continuation rule.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-13-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/.gitignore      |   2 +
 drivers/gpu/drm/xe/Makefile        |  13 +++
 drivers/gpu/drm/xe/xe_gen_wa_oob.c | 165 +++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt.c         |   1 +
 drivers/gpu/drm/xe/xe_gt_types.h   |   2 +
 drivers/gpu/drm/xe/xe_wa.c         |  40 ++++++-
 drivers/gpu/drm/xe/xe_wa.h         |   9 ++
 drivers/gpu/drm/xe/xe_wa_oob.rules |   0
 8 files changed, 228 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gen_wa_oob.c
 create mode 100644 drivers/gpu/drm/xe/xe_wa_oob.rules

diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore
index 81972dce1affa..8778bf132674d 100644
--- a/drivers/gpu/drm/xe/.gitignore
+++ b/drivers/gpu/drm/xe/.gitignore
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 *.hdrtest
+/generated
+/xe_gen_wa_oob
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index db88c9d845693..b9d3553ab4769 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -29,6 +29,19 @@ CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init)
 
 subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
 
+# generated sources
+hostprogs := xe_gen_wa_oob
+
+generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h
+
+quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
+      cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
+
+$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
+	$(call cmd,wa_oob)
+
+$(obj)/xe_wa.o: $(generated_oob)
+
 # Please keep these build lists sorted!
 
 # core driver code
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
new file mode 100644
index 0000000000000..106ee2b027f00
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#define HEADER \
+	"// SPDX-License-Identifier: MIT\n" \
+	"\n" \
+	"/*\n" \
+	" * DO NOT MODIFY.\n" \
+	" *\n" \
+	" * This file was generated from rules: %s\n" \
+	" */\n" \
+	"#ifndef _GENERATED_XE_WA_OOB_\n" \
+	"#define _GENERATED_XE_WA_OOB_\n" \
+	"\n" \
+	"enum {\n"
+
+#define FOOTER \
+	"};\n" \
+	"\n" \
+	"#endif\n"
+
+static void print_usage(FILE *f)
+{
+	fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n",
+		program_invocation_short_name);
+}
+
+static void print_parse_error(const char *err_msg, const char *line,
+			      unsigned int lineno)
+{
+	fprintf(stderr, "ERROR: %s\nERROR: %u: %.60s\n",
+		err_msg, lineno, line);
+}
+
+static char *strip(char *line, size_t linelen)
+{
+	while (isspace(*(line + linelen)))
+		linelen--;
+
+	line[linelen - 1] = '\0';
+
+	return  line + strspn(line, " \f\n\r\t\v");
+}
+
+#define MAX_LINE_LEN 4096
+static int parse(FILE *input, FILE *csource, FILE *cheader)
+{
+	char line[MAX_LINE_LEN + 1];
+	char *name, *prev_name = NULL, *rules;
+	unsigned int lineno = 0, idx = 0;
+
+	while (fgets(line, sizeof(line), input)) {
+		size_t linelen;
+		bool is_continuation;
+
+		if (line[0] == '\0' || line[0] == '#' || line[0] == '\n') {
+			lineno++;
+			continue;
+		}
+
+		linelen = strlen(line);
+		if (linelen == MAX_LINE_LEN) {
+			print_parse_error("line too long", line, lineno);
+			return -EINVAL;
+		}
+
+		is_continuation = isspace(line[0]);
+		name = strip(line, linelen);
+
+		if (!is_continuation) {
+			name = strtok(name, " \t");
+			rules = strtok(NULL, "");
+		} else {
+			if (!prev_name) {
+				print_parse_error("invalid rule continuation",
+						  line, lineno);
+				return -EINVAL;
+			}
+
+			rules = name;
+			name = NULL;
+		}
+
+		if (rules[0] == '\0') {
+			print_parse_error("invalid empty rule\n", line, lineno);
+			return -EINVAL;
+		}
+
+		if (name) {
+			fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
+			fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n",
+				name, rules);
+		} else {
+			fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n",
+				rules);
+		}
+
+		idx++;
+		lineno++;
+		if (!is_continuation)
+			prev_name = name;
+	}
+
+	fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
+
+	return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+	enum {
+		ARGS_INPUT,
+		ARGS_CSOURCE,
+		ARGS_CHEADER,
+		_ARGS_COUNT
+	};
+	struct {
+		const char *fn;
+		const char *mode;
+		FILE *f;
+	} args[] = {
+		[ARGS_INPUT] = { .fn = argv[1], .mode = "r" },
+		[ARGS_CSOURCE] = { .fn = argv[2], .mode = "w" },
+		[ARGS_CHEADER] = { .fn = argv[3], .mode = "w" },
+	};
+	int ret = 1;
+
+	if (argc < 3) {
+		fprintf(stderr, "ERROR: wrong arguments\n");
+		print_usage(stderr);
+		return 1;
+	}
+
+	for (int i = 0; i < _ARGS_COUNT; i++) {
+		args[i].f = fopen(args[i].fn, args[i].mode);
+		if (!args[i].f) {
+			fprintf(stderr, "ERROR: Can't open %s: %m\n",
+				args[i].fn);
+			goto err;
+		}
+	}
+
+	fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn);
+	ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f,
+		    args[ARGS_CHEADER].f);
+	if (!ret)
+		fprintf(args[ARGS_CHEADER].f, FOOTER);
+
+err:
+	for (int i = 0; i < _ARGS_COUNT; i++) {
+		if (args[i].f)
+			fclose(args[i].f);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d139554316d4e..18eda5b1377fd 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -321,6 +321,7 @@ int xe_gt_init_early(struct xe_gt *gt)
 		return err;
 
 	xe_wa_process_gt(gt);
+	xe_wa_process_oob(gt);
 	xe_tuning_process_gt(gt);
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 017ab60f2498e..b83c834e7ced3 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -368,6 +368,8 @@ struct xe_gt {
 		unsigned long *engine;
 		/** @lrc: bitmap with active LRC workarounds */
 		unsigned long *lrc;
+		/** @oob: bitmap with active OOB workaroudns */
+		unsigned long *oob;
 	} wa_active;
 };
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d703dc0f7b21c..d9906f326d387 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -9,6 +9,7 @@
 #include <kunit/visibility.h>
 #include <linux/compiler_types.h>
 
+#include "generated/xe_wa_oob.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
@@ -73,8 +74,8 @@
  *      engine registers are restored in a context restore sequence. This is
  *      currently not used in the driver.
  *
- * - Other:  There are WAs that, due to their nature, cannot be applied from a
- *   central place. Those are peppered around the rest of the code, as needed.
+ * - Other/OOB:  There are WAs that, due to their nature, cannot be applied from
+ *   a central place. Those are peppered around the rest of the code, as needed.
  *   Workarounds related to the display IP are the main example.
  *
  * .. [1] Technically, some registers are powercontext saved & restored, so they
@@ -579,8 +580,31 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	{}
 };
 
+static __maybe_unused const struct xe_rtp_entry oob_was[] = {
+#include <generated/xe_wa_oob.c>
+	{}
+};
+
+static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT);
+
 __diag_pop();
 
+/**
+ * xe_wa_process_oob - process OOB workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process OOB workaround table for this platform, marking in @gt the
+ * workarounds that are active.
+ */
+void xe_wa_process_oob(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob,
+						  ARRAY_SIZE(oob_was));
+	xe_rtp_process(&ctx, oob_was);
+}
+
 /**
  * xe_wa_process_gt - process GT workaround table
  * @gt: GT instance to process workarounds for
@@ -641,13 +665,14 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe)
 int xe_wa_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	size_t n_lrc, n_engine, n_gt, total;
+	size_t n_oob, n_lrc, n_engine, n_gt, total;
 	unsigned long *p;
 
 	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was));
 	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was));
 	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was));
-	total = n_gt + n_engine + n_lrc;
+	n_oob = BITS_TO_LONGS(ARRAY_SIZE(oob_was));
+	total = n_gt + n_engine + n_lrc + n_oob;
 
 	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
 	if (!p)
@@ -658,6 +683,8 @@ int xe_wa_init(struct xe_gt *gt)
 	gt->wa_active.engine = p;
 	p += n_engine;
 	gt->wa_active.lrc = p;
+	p += n_lrc;
+	gt->wa_active.oob = p;
 
 	return 0;
 }
@@ -677,4 +704,9 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
 	drm_printf(p, "\nLRC Workarounds\n");
 	for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was))
 		drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name);
+
+	drm_printf(p, "\nOOB Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was))
+		if (oob_was[idx].name)
+			drm_printf_indent(p, 1, "%s\n", oob_was[idx].name);
 }
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index defefa5d9611a..cfe6859895241 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -11,6 +11,7 @@ struct xe_gt;
 struct xe_hw_engine;
 
 int xe_wa_init(struct xe_gt *gt);
+void xe_wa_process_oob(struct xe_gt *gt);
 void xe_wa_process_gt(struct xe_gt *gt);
 void xe_wa_process_engine(struct xe_hw_engine *hwe);
 void xe_wa_process_lrc(struct xe_hw_engine *hwe);
@@ -18,4 +19,12 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe);
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
 void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
 
+/**
+ * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any
+ *         other more specific type
+ * @gt__: gt instance
+ * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
+ */
+#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
new file mode 100644
index 0000000000000..e69de29bb2d1d
-- 
GitLab


From 7d356b25b32eec2a33bf2bc67974ef56f0778a7c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:50 -0700
Subject: [PATCH 1579/2340] drm/xe/guc: Port Wa_22012773006 to xe_wa

Let xe_guc.c start using XE_WA() for workarounds, starting from a simple
one: Wa_22012773006. It's also changed to start with graphics version
12, since that is the first supported by xe.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-14-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile        | 2 +-
 drivers/gpu/drm/xe/xe_guc.c        | 6 +++---
 drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b9d3553ab4769..a685e39d6b442 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
-$(obj)/xe_wa.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_wa.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index a8e249205bffa..08362db6a8860 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -5,6 +5,7 @@
 
 #include "xe_guc.h"
 
+#include "generated/xe_wa_oob.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
@@ -20,6 +21,7 @@
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
 #include "xe_uc_fw.h"
+#include "xe_wa.h"
 #include "xe_wopcm.h"
 
 static struct xe_gt *
@@ -134,9 +136,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 flags = 0;
 
-	/* Wa_22012773006:gen11,gen12 < XeHP */
-	if (GRAPHICS_VER(xe) >= 11 &&
-	    GRAPHICS_VERx100(xe) < 1250)
+	if (XE_WA(gt, 22012773006))
 		flags |= GUC_WA_POLLCS;
 
 	/* Wa_16011759253 */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index e69de29bb2d1d..b54f70eeccf42 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -0,0 +1 @@
+22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
-- 
GitLab


From fb395db74b91dc60d928d7bd3f1c4b845efd950a Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:51 -0700
Subject: [PATCH 1580/2340] drm/xe/guc: Port Wa_16011759253 to xe_wa

Port Wa_16011759253 to oob. Wa_22011383443, that has the same action,
doesn't need to be ported as it targets early PVC steppings.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-15-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 4 +---
 drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 08362db6a8860..1b3fbbd749236 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -139,9 +139,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (XE_WA(gt, 22012773006))
 		flags |= GUC_WA_POLLCS;
 
-	/* Wa_16011759253 */
-	/* Wa_22011383443 */
-	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0))
+	if (XE_WA(gt, 16011759253))
 		flags |= GUC_WA_GAM_CREDITS;
 
 	/* Wa_14014475959 */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index b54f70eeccf42..9b29a0dd09341 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1 +1,2 @@
 22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
+16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
-- 
GitLab


From 57a148d63d0b67822c44ba7253625c8dd3c13531 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:52 -0700
Subject: [PATCH 1581/2340] drm/xe/guc: Port Wa_14012197797/Wa_22011391025 to
 xe_wa

Wa_14012197797 and Wa_22011391025 apply to DG2 using the same action.
They apply to slightly different conditions. Add both to the oob rules
so they are both reported as active.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-16-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 9 +--------
 drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 1b3fbbd749236..54aaf6e6b5777 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -147,14 +147,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	    xe->info.platform == XE_DG2)
 		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
 
-	/*
-	 * Wa_14012197797
-	 * Wa_22011391025
-	 *
-	 * The same WA bit is used for both and 22011391025 is applicable to
-	 * all DG2.
-	 */
-	if (xe->info.platform == XE_DG2)
+	if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
 		flags |= GUC_WA_DUAL_QUEUE;
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 9b29a0dd09341..77ac4a4a3296c 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,2 +1,4 @@
 22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
 16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
+22011391025	PLATFORM(DG2)
+14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
-- 
GitLab


From bb0f2e05ad6c5a9f1fa325f847ea5a82002ede1d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:53 -0700
Subject: [PATCH 1582/2340] drm/xe/guc: Port Wa_16011777198 to xe_wa

Port Wa_16011777198 to xe_wa so it's reported as active.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-17-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 5 +----
 drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 54aaf6e6b5777..5eccc4b67381d 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -158,10 +158,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (GRAPHICS_VERx100(xe) < 1270)
 		flags |= GUC_WA_PRE_PARSER;
 
-	/* Wa_16011777198 */
-	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
-	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
-				STEP_B0))
+	if (XE_WA(gt, 16011777198))
 		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 77ac4a4a3296c..3d4304b7111e4 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -2,3 +2,5 @@
 16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
 22011391025	PLATFORM(DG2)
 14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
+16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
+		SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
-- 
GitLab


From 63bbd800ff013d2e6053ce94524e3219cabd8315 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:54 -0700
Subject: [PATCH 1583/2340] drm/xe/guc: Port Wa_22012727170/Wa_22012727685 to
 xe_wa

Wa_22012727170 and Wa_22012727685 apply to DG2 using the same action and
conditions. Add both to the oob rules so they are both reported as
active.

Do not Wa_22012727170 to PVC and MTL since only early A* steppings are
affected.

v2: Remove DG2_G10 from Wa_22012727685 to match current WA database
    (Matt Roper)
v3: GRAPHICS_STEP(A0, FOREVER) can be left alone for DG2 as this means
    all steppings

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-18-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 8 +-------
 drivers/gpu/drm/xe/xe_wa_oob.rules | 3 +++
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 5eccc4b67381d..1291f71348db7 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -161,13 +161,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (XE_WA(gt, 16011777198))
 		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
 
-	/*
-	 * Wa_22012727170
-	 * Wa_22012727685
-	 */
-	if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
-	    IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
-				STEP_FOREVER))
+	if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
 		flags |= GUC_WA_CONTEXT_ISOLATION;
 
 	/* Wa_16015675438, Wa_18020744125 */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 3d4304b7111e4..5b1beb2cf19f7 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -4,3 +4,6 @@
 14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
 16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
 		SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
+22012727170	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
+		SUBPLATFORM(DG2, G11)
+22012727685	SUBPLATFORM(DG2, G11)
-- 
GitLab


From 2b48b0df30cea3a617a69e44ca69bec7f01ed276 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:55 -0700
Subject: [PATCH 1584/2340] drm/xe/guc: Port Wa_16015675438/Wa_18020744125 to
 xe_wa

Wa_16015675438 and Wa_18020744125 apply to DG2 using the same action and
conditions. Add both to the oob rules so they are both reported as
active. Note that previously they were not checking by platform or IP
version, hence making them not future-proof.  Those workarounds should
only be active in PVC and DG2, besides the check for "no render engine".

v2: From current WA database, Wa_16015675438 applies to all DG2
    subplatforms except G11. Migrate condition to use subplatform and
    remove G11 from the match (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-19-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 4 ++--
 drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 1291f71348db7..cc58a20922367 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -164,8 +164,8 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
 		flags |= GUC_WA_CONTEXT_ISOLATION;
 
-	/* Wa_16015675438, Wa_18020744125 */
-	if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
+	if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) &&
+	    !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
 		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
 
 	/* Wa_1509372804 */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 5b1beb2cf19f7..ebb576f27b3ee 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -7,3 +7,7 @@
 22012727170	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
 		SUBPLATFORM(DG2, G11)
 22012727685	SUBPLATFORM(DG2, G11)
+16015675438	PLATFORM(PVC)
+		SUBPLATFORM(DG2, G10)
+		SUBPLATFORM(DG2, G12)
+18020744125	PLATFORM(PVC)
-- 
GitLab


From 5e782507f67ab378046f6fcb9de03fd25693fdc4 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:56 -0700
Subject: [PATCH 1585/2340] drm/xe/guc: Port Wa_1509372804 to xe_wa

Port Wa_1509372804 to xe_wa so it's reported as active.

v2: Match workaround database, starting from A0 stepping (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-20-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 3 +--
 drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index cc58a20922367..3ed460d3b6ca8 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -168,8 +168,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	    !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
 		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
 
-	/* Wa_1509372804 */
-	if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_B0, STEP_C0))
+	if (XE_WA(gt, 1509372804))
 		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
 
 	return flags;
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index ebb576f27b3ee..f6c4a0e055e01 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -11,3 +11,4 @@
 		SUBPLATFORM(DG2, G10)
 		SUBPLATFORM(DG2, G12)
 18020744125	PLATFORM(PVC)
+1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
-- 
GitLab


From 3e488e98fb9eb4cd9220417e69e75c8271294a02 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:57 -0700
Subject: [PATCH 1586/2340] drm/xe/rtp: Also check gt type

When running rules on MTL and beyond that have media as a standalone GT,
the rule should only match if the gt passed as parameter match the
version/range/stepping that the rule is checking. This allows
workarounds affecting only the media GT to be applied only on that GT
and vice-versa.

For platforms before MTL, the GT will not be of media type, even if it
includes media engines. Make sure to cover that case by checking if the
platforma has standalone media.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-21-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index ebcfb04c391a0..43a86358efb68 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -23,6 +23,11 @@
  * the values to the registers that have matching rules.
  */
 
+static bool has_samedia(const struct xe_device *xe)
+{
+	return xe->info.media_verx100 >= 1300;
+}
+
 static bool rule_matches(const struct xe_device *xe,
 			 struct xe_gt *gt,
 			 struct xe_hw_engine *hwe,
@@ -43,26 +48,32 @@ static bool rule_matches(const struct xe_device *xe,
 				xe->info.subplatform == r->subplatform;
 			break;
 		case XE_RTP_MATCH_GRAPHICS_VERSION:
-			match = xe->info.graphics_verx100 == r->ver_start;
+			match = xe->info.graphics_verx100 == r->ver_start &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
 			break;
 		case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
 			match = xe->info.graphics_verx100 >= r->ver_start &&
-				xe->info.graphics_verx100 <= r->ver_end;
+				xe->info.graphics_verx100 <= r->ver_end &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
 			break;
 		case XE_RTP_MATCH_GRAPHICS_STEP:
 			match = xe->info.step.graphics >= r->step_start &&
-				xe->info.step.graphics < r->step_end;
+				xe->info.step.graphics < r->step_end &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
 			break;
 		case XE_RTP_MATCH_MEDIA_VERSION:
-			match = xe->info.media_verx100 == r->ver_start;
+			match = xe->info.media_verx100 == r->ver_start &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
 			break;
 		case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
 			match = xe->info.media_verx100 >= r->ver_start &&
-				xe->info.media_verx100 <= r->ver_end;
+				xe->info.media_verx100 <= r->ver_end &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
 			break;
 		case XE_RTP_MATCH_MEDIA_STEP:
 			match = xe->info.step.media >= r->step_start &&
-				xe->info.step.media < r->step_end;
+				xe->info.step.media < r->step_end &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
 			break;
 		case XE_RTP_MATCH_INTEGRATED:
 			match = !xe->info.is_dgfx;
-- 
GitLab


From 87c299fa3a97740ddc0fa9b19ee4054004686f76 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 26 May 2023 09:43:58 -0700
Subject: [PATCH 1587/2340] drm/xe/guc: Port Wa_14014475959 to xe_wa and fix it

Port Wa_14014475959 to xe_wa fixing its condition. The workaround should
only be applied on the primary GT, not on media. So just checking by
MTL platform is not enough: checking GT is of the right type is also
needed.

Since the GRAPHICS_STEP() does checks the GT type, we could leave the
first check as a platform one: it'd would be easier to understand and
not go out of sync with the graphics_ip_map[] in
drivers/gpu/drm/xe/xe_pci.c. However it also means that new platforms
using the same IP wouldn't match. Prefer using the IP version.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230526164358.86393-22-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c        | 4 +---
 drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 3ed460d3b6ca8..ecc843d91f624 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -142,9 +142,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (XE_WA(gt, 16011759253))
 		flags |= GUC_WA_GAM_CREDITS;
 
-	/* Wa_14014475959 */
-	if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) ||
-	    xe->info.platform == XE_DG2)
+	if (XE_WA(gt, 14014475959))
 		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
 
 	if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index f6c4a0e055e01..1ecb10390b280 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,5 +1,7 @@
 22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
 16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
+14014475959	GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
+		PLATFORM(DG2)
 22011391025	PLATFORM(DG2)
 14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
 16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
-- 
GitLab


From 9922bb40e2ef98c17fb142d22843c0c70ba35e5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 24 May 2023 16:51:42 +0000
Subject: [PATCH 1588/2340] drm/xe: Fix the migrate selftest for integrated
 GPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TTM resource cursor was set up incorrectly.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 0f4371ad1fd95..f8ee9b9fca990 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -303,9 +303,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
 	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
 		expected |= XE_PTE_PS64;
-	xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+	if (xe_bo_is_vram(pt))
+		xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+	else
+		xe_res_first_sg(xe_bo_get_sg(pt), 0, pt->size, &src_it);
+
 	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
 		 &src_it, XE_PAGE_SIZE, pt);
+
 	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
 
 	retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
-- 
GitLab


From 3690a01ba926e3f1314d805d1af500fcf3edef7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 24 May 2023 16:52:29 +0000
Subject: [PATCH 1589/2340] drm/xe: Support copying of data between system
 memory bos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Modify the xe_migrate_copy() function somewhat to explicitly allow
copying of data between two buffer objects including system memory
buffer objects. Update the migrate test accordingly.

v2:
- Check that buffer object sizes match when copying (Matthew Auld)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 16 ++++-------
 drivers/gpu/drm/xe/xe_bo.c            |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c       | 40 +++++++++++++++++----------
 drivers/gpu/drm/xe/xe_migrate.h       |  3 +-
 4 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index f8ee9b9fca990..4a3ca2960fd54 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -150,7 +150,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
 
 	expected = 0xc0c0c0c0c0c0c0c0;
-	fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource,
+	fence = xe_migrate_copy(m, sysmem, bo, sysmem->ttm.resource,
 				bo->ttm.resource);
 	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" :
 				 "Copying small bo sysmem -> vram", test)) {
@@ -167,7 +167,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
 	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
 
-	fence = xe_migrate_copy(m, sysmem, bo->ttm.resource,
+	fence = xe_migrate_copy(m, bo, sysmem, bo->ttm.resource,
 				sysmem->ttm.resource);
 	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" :
 				 "Copying small bo vram -> sysmem", test)) {
@@ -347,10 +347,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
 	check(retval, expected, "Command clear small last value", test);
 
-	if (IS_DGFX(xe)) {
-		kunit_info(test, "Copying small buffer object to system\n");
-		test_copy(m, tiny, test);
-	}
+	kunit_info(test, "Copying small buffer object to system\n");
+	test_copy(m, tiny, test);
 
 	/* Clear a big bo */
 	kunit_info(test, "Clearing big buffer object\n");
@@ -366,10 +364,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
 	check(retval, expected, "Command clear big last value", test);
 
-	if (IS_DGFX(xe)) {
-		kunit_info(test, "Copying big buffer object to system\n");
-		test_copy(m, big, test);
-	}
+	kunit_info(test, "Copying big buffer object to system\n");
+	test_copy(m, big, test);
 
 	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
 	test_pt_update(m, pt, test, false);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 798b9938e534c..71864ef95328e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -679,7 +679,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		if (move_lacks_source)
 			fence = xe_migrate_clear(gt->migrate, bo, new_mem);
 		else
-			fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
+			fence = xe_migrate_copy(gt->migrate, bo, bo, old_mem, new_mem);
 		if (IS_ERR(fence)) {
 			ret = PTR_ERR(fence);
 			xe_device_mem_access_put(xe);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index d7da5bf2d9842..7d0a23577d360 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -582,30 +582,31 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 /**
  * xe_migrate_copy() - Copy content of TTM resources.
  * @m: The migration context.
- * @bo: The buffer object @src is currently bound to.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @dst_bo: If copying between resources created for the same bo, set this to
+ * the same value as @src_bo. If copying between buffer objects, set it to
+ * the buffer object @dst is currently bound to.
  * @src: The source TTM resource.
  * @dst: The dst TTM resource.
  *
  * Copies the contents of @src to @dst: On flat CCS devices,
  * the CCS metadata is copied as well if needed, or if not present,
  * the CCS metadata of @dst is cleared for security reasons.
- * It's currently not possible to copy between two system resources,
- * since that would require two TTM page-vectors.
- * TODO: Eliminate the @bo argument and supply two TTM page-vectors.
  *
  * Return: Pointer to a dma_fence representing the last copy batch, or
  * an error pointer on failure. If there is a failure, any copy operation
  * started by the function call has been synced.
  */
 struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
-				  struct xe_bo *bo,
+				  struct xe_bo *src_bo,
+				  struct xe_bo *dst_bo,
 				  struct ttm_resource *src,
 				  struct ttm_resource *dst)
 {
 	struct xe_gt *gt = m->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct dma_fence *fence = NULL;
-	u64 size = bo->size;
+	u64 size = src_bo->size;
 	struct xe_res_cursor src_it, dst_it, ccs_it;
 	u64 src_L0_ofs, dst_L0_ofs;
 	u32 src_L0_pt, dst_L0_pt;
@@ -614,20 +615,28 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	int err;
 	bool src_is_vram = mem_type_is_vram(src->mem_type);
 	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
-	bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo);
+	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
+		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
 	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
 
+	/* Copying CCS between two different BOs is not supported yet. */
+	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
+		return ERR_PTR(-EINVAL);
+
+	if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size))
+		return ERR_PTR(-EINVAL);
+
 	if (!src_is_vram)
-		xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &src_it);
+		xe_res_first_sg(xe_bo_get_sg(src_bo), 0, size, &src_it);
 	else
 		xe_res_first(src, 0, size, &src_it);
 	if (!dst_is_vram)
-		xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &dst_it);
+		xe_res_first_sg(xe_bo_get_sg(dst_bo), 0, size, &dst_it);
 	else
 		xe_res_first(dst, 0, size, &dst_it);
 
 	if (copy_system_ccs)
-		xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo),
+		xe_res_first_sg(xe_bo_get_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
 				PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
 				&ccs_it);
 
@@ -681,18 +690,18 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		if (!src_is_vram)
 			emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0,
-				 bo);
+				 src_bo);
 		else
 			xe_res_next(&src_it, src_L0);
 
 		if (!dst_is_vram)
 			emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0,
-				 bo);
+				 dst_bo);
 		else
 			xe_res_next(&dst_it, src_L0);
 
 		if (copy_system_ccs)
-			emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo);
+			emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, src_bo);
 
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
@@ -714,8 +723,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		xe_sched_job_add_migrate_flush(job, flush_flags);
 		if (!fence) {
-			err = job_add_deps(job, bo->ttm.base.resv,
+			err = job_add_deps(job, src_bo->ttm.base.resv,
 					   DMA_RESV_USAGE_BOOKKEEP);
+			if (!err && src_bo != dst_bo)
+				err = job_add_deps(job, dst_bo->ttm.base.resv,
+						   DMA_RESV_USAGE_BOOKKEEP);
 			if (err)
 				goto err_job;
 		}
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 1ff6e0a90de50..c283b626c21c0 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -73,7 +73,8 @@ struct xe_migrate_pt_update {
 struct xe_migrate *xe_migrate_init(struct xe_gt *gt);
 
 struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
-				  struct xe_bo *bo,
+				  struct xe_bo *src_bo,
+				  struct xe_bo *dst_bo,
 				  struct ttm_resource *src,
 				  struct ttm_resource *dst);
 
-- 
GitLab


From 38453f826db89045d505c2122fd8e25cd6099007 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 25 May 2023 12:45:42 +0100
Subject: [PATCH 1590/2340] drm/xe/bo: further limit where CCS pages are needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need to allocate extra pages for this if we know flat-ccs AUX state
is not even possible, like for normal system memory objects.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 71864ef95328e..394e4bfae6e19 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -285,6 +285,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 	struct xe_device *xe = xe_bo_device(bo);
 	struct xe_ttm_tt *tt;
+	unsigned long extra_pages;
 	int err;
 
 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -293,12 +294,15 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 
 	tt->dev = xe->drm.dev;
 
+	extra_pages = 0;
+	if (xe_bo_needs_ccs_pages(bo))
+		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
+					   PAGE_SIZE);
+
 	/* TODO: Select caching mode */
 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags,
 			  bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached,
-			  DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo),
-							   bo->ttm.base.size),
-				       PAGE_SIZE));
+			  extra_pages);
 	if (err) {
 		kfree(tt);
 		return NULL;
-- 
GitLab


From a2f9f4ff07aac81e80ff1e0913fdbfdde6ba6665 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 25 May 2023 12:45:43 +0100
Subject: [PATCH 1591/2340] drm/xe/migrate: retain CCS aux state for vram ->
 vram
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no mention that migrate_copy() will skip copying the CCS aux
state for all types of vram -> vram transfers.  Currently we don't need
such a facility but might be surprising if we ever do.

v2: (Lucas):
  - s/lmem/vram/ in the commit message
  - Tidy up the code a bit; use one emit_copy_ccs()
v3:
  - Reword the commit message

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7d0a23577d360..9a676287e7415 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -552,11 +552,19 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 
 	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
 		/*
-		 * If the bo doesn't have any CCS metadata attached, we still
-		 * need to clear it for security reasons.
+		 * If the src is already in vram, then it should already
+		 * have been cleared by us, or has been populated by the
+		 * user. Make sure we copy the CCS aux state as-is.
+		 *
+		 * Otherwise if the bo doesn't have any CCS metadata attached,
+		 * we still need to clear it for security reasons.
 		 */
-		emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false,
-			      dst_size);
+		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_vram_ofs;
+
+		emit_copy_ccs(gt, bb,
+			      dst_ofs, true,
+			      ccs_src_ofs, src_is_vram, dst_size);
+
 		flush_flags = MI_FLUSH_DW_CCS;
 	} else if (copy_ccs) {
 		if (!src_is_vram)
-- 
GitLab


From 565ce72e1c2d540d36ade02e6a7479c4c6a7f2d4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 24 May 2023 18:56:53 +0100
Subject: [PATCH 1592/2340] drm/xe: don't allocate under ct->lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Seems to be a sensitive lock, where ct->lock looks to be primed with
fs_reclaim, so holding that and then allocating memory will cause
lockdep to complain. We need to change the ordering wrt to grabbing the
ct->lock and potentially grabbing the runtime_pm, since some of the
runtime_pm routines can allocate memory (or at least that's what lockdep
seems to suggest).

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  4 ++++
 drivers/gpu/drm/xe/xe_guc_ct.c              | 13 +++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index c815a42e2cdba..20f8f0aae6b42 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -5,6 +5,7 @@
 
 #include "xe_gt_tlb_invalidation.h"
 
+#include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
@@ -112,6 +113,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	 * in order which they currently are, if that changes the algorithm will
 	 * need to be updated.
 	 */
+
+	xe_device_mem_access_get(gt->xe);
 	mutex_lock(&guc->ct.lock);
 	seqno = gt->tlb_invalidation.seqno;
 	if (fence) {
@@ -140,6 +143,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	if (ret < 0 && fence)
 		invalidation_fence_signal(fence);
 	mutex_unlock(&guc->ct.lock);
+	xe_device_mem_access_put(gt->xe);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index e8c2edb1359d9..9dc906f2651a5 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -498,26 +498,22 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 		}
 	}
 
-	xe_device_mem_access_get(ct_to_xe(ct));
 retry:
 	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
 	if (unlikely(ret))
-		goto put_wa;
+		goto out;
 
 	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
 			!!g2h_fence);
 	if (unlikely(ret)) {
 		if (ret == -EAGAIN)
 			goto retry;
-		goto put_wa;
+		goto out;
 	}
 
 	g2h_reserve_space(ct, g2h_len, num_g2h);
 	xe_guc_notify(ct_to_guc(ct));
-put_wa:
-	xe_device_mem_access_put(ct_to_xe(ct));
 out:
-
 	return ret;
 }
 
@@ -539,6 +535,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
 
 	XE_BUG_ON(g2h_len && g2h_fence);
 	lockdep_assert_held(&ct->lock);
+	xe_device_assert_mem_access(ct_to_xe(ct));
 
 try_again:
 	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
@@ -608,10 +605,14 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
 
 	XE_BUG_ON(g2h_len && g2h_fence);
 
+	xe_device_mem_access_get(ct_to_xe(ct));
+
 	mutex_lock(&ct->lock);
 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
 	mutex_unlock(&ct->lock);
 
+	xe_device_mem_access_put(ct_to_xe(ct));
+
 	return ret;
 }
 
-- 
GitLab


From 3af4365003971946fdd2cca44858d6d16929f2d3 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 24 May 2023 18:56:54 +0100
Subject: [PATCH 1593/2340] drm/xe: keep pulling mem_access_get further back
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lockdep is unhappy about ggtt->lock -> runtime_pm, where it seems
to think this can somehow get inverted. The ggtt->lock looks like a
potentially sensitive driver lock, so likely a sensible move to never
call the runtime_pm routines while holding it. Actually it looks like
d3cold wants to grab this, so perhaps this can indeed deadlock.

v2:
 - Don't forget about xe_gt_tlb_invalidation_vma(), which now needs
   explicit access_get.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c                |  6 ++++++
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 10 ++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 98903354b4366..1ed22b5f89adf 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -142,12 +142,14 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
 	u64 start, end;
 
 	/* Display may have allocated inside ggtt, so be careful with clearing here */
+	xe_device_mem_access_get(ggtt->gt->xe);
 	mutex_lock(&ggtt->lock);
 	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
 		xe_ggtt_clear(ggtt, start, end - start);
 
 	xe_ggtt_invalidate(ggtt->gt);
 	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(ggtt->gt->xe);
 }
 
 int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
@@ -284,12 +286,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 	if (err)
 		return err;
 
+	xe_device_mem_access_get(ggtt->gt->xe);
 	mutex_lock(&ggtt->lock);
 	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
 					  alignment, 0, start, end, 0);
 	if (!err)
 		xe_ggtt_map_bo(ggtt, bo);
 	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(ggtt->gt->xe);
 
 	return err;
 }
@@ -318,6 +322,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
 {
+	xe_device_mem_access_get(ggtt->gt->xe);
 	mutex_lock(&ggtt->lock);
 
 	xe_ggtt_clear(ggtt, node->start, node->size);
@@ -327,6 +332,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
 	xe_ggtt_invalidate(ggtt->gt);
 
 	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(ggtt->gt->xe);
 }
 
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 20f8f0aae6b42..44e442bf306ca 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -114,7 +114,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	 * need to be updated.
 	 */
 
-	xe_device_mem_access_get(gt->xe);
 	mutex_lock(&guc->ct.lock);
 	seqno = gt->tlb_invalidation.seqno;
 	if (fence) {
@@ -143,7 +142,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	if (ret < 0 && fence)
 		invalidation_fence_signal(fence);
 	mutex_unlock(&guc->ct.lock);
-	xe_device_mem_access_put(gt->xe);
 
 	return ret;
 }
@@ -196,7 +194,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN	7
 	u32 action[MAX_TLB_INVALIDATION_LEN];
-	int len = 0;
+	int len = 0, ret;
 
 	XE_BUG_ON(!vma);
 
@@ -250,7 +248,11 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 
 	XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
 
-	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
+	xe_device_mem_access_get(gt->xe);
+	ret = send_tlb_invalidation(&gt->uc.guc, fence, action, len);
+	xe_device_mem_access_put(gt->xe);
+
+	return ret;
 }
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
-- 
GitLab


From 094d739f4dbb6322ae21b3dab8e6a7d272347dc7 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 25 May 2023 21:14:29 +0200
Subject: [PATCH 1594/2340] drm/xe: Prevent evicting for page tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When creating page tables from xe_exec_ioctl, we may end up freeing
memory we just validated. To be certain this does not happen, do not
allow the current reservation to be evicted from the ioctl.

Callchain:
[  109.008522]  xe_bo_move_notify+0x5c/0xf0 [xe]
[  109.008548]  xe_bo_move+0x90/0x510 [xe]
[  109.008573]  ttm_bo_handle_move_mem+0xb7/0x170 [ttm]
[  109.008581]  ttm_bo_swapout+0x15e/0x360 [ttm]
[  109.008586]  ttm_device_swapout+0xc2/0x110 [ttm]
[  109.008592]  ttm_global_swapout+0x47/0xc0 [ttm]
[  109.008598]  ttm_tt_populate+0x7a/0x130 [ttm]
[  109.008603]  ttm_bo_handle_move_mem+0x160/0x170 [ttm]
[  109.008609]  ttm_bo_validate+0xe5/0x1d0 [ttm]
[  109.008614]  ttm_bo_init_reserved+0xac/0x190 [ttm]
[  109.008620]  __xe_bo_create_locked+0x153/0x260 [xe]
[  109.008645]  xe_bo_create_locked_range+0x77/0x360 [xe]
[  109.008671]  xe_bo_create_pin_map_at+0x33/0x1f0 [xe]
[  109.008695]  xe_bo_create_pin_map+0x11/0x20 [xe]
[  109.008721]  xe_pt_create+0x69/0xf0 [xe]
[  109.008749]  xe_pt_stage_bind_entry+0x208/0x430 [xe]
[  109.008776]  xe_pt_walk_range+0xe9/0x2a0 [xe]
[  109.008802]  xe_pt_walk_range+0x223/0x2a0 [xe]
[  109.008828]  xe_pt_walk_range+0x223/0x2a0 [xe]
[  109.008853]  __xe_pt_bind_vma+0x28d/0xbd0 [xe]
[  109.008878]  xe_vm_bind_vma+0xc7/0x2f0 [xe]
[  109.008904]  xe_vm_rebind+0x72/0x160 [xe]
[  109.008930]  xe_exec_ioctl+0x22b/0xa70 [xe]
[  109.008955]  drm_ioctl_kernel+0xb9/0x150 [drm]
[  109.008972]  drm_ioctl+0x210/0x430 [drm]
[  109.008988]  __x64_sys_ioctl+0x85/0xb0
[  109.008990]  do_syscall_64+0x38/0x90
[  109.008991]  entry_SYSCALL_64_after_hwframe+0x72/0xdc

Original warning:
[ 5613.149126] WARNING: CPU: 3 PID: 45883 at drivers/gpu/drm/xe/xe_vm.c:504 xe_vm_unlock_dma_resv+0x43/0x50 [xe]
...
[ 5613.226398] RIP: 0010:xe_vm_unlock_dma_resv+0x43/0x50 [xe]
[ 5613.316098] Call Trace:
[ 5613.318595]  <TASK>
[ 5613.320743]  xe_exec_ioctl+0x383/0x8a0 [xe]
[ 5613.325278]  ? __is_insn_slot_addr+0x8e/0x110
[ 5613.329719]  ? __is_insn_slot_addr+0x8e/0x110
[ 5613.334116]  ? kernel_text_address+0x75/0xf0
[ 5613.338429]  ? __pfx_stack_trace_consume_entry+0x10/0x10
[ 5613.343778]  ? __kernel_text_address+0x9/0x40
[ 5613.348181]  ? unwind_get_return_address+0x1a/0x30
[ 5613.353013]  ? __pfx_stack_trace_consume_entry+0x10/0x10
[ 5613.358362]  ? arch_stack_walk+0x99/0xf0
[ 5613.362329]  ? rcu_read_lock_sched_held+0xb/0x70
[ 5613.366996]  ? lock_acquire+0x287/0x2f0
[ 5613.370873]  ? rcu_read_lock_sched_held+0xb/0x70
[ 5613.375530]  ? rcu_read_lock_sched_held+0xb/0x70
[ 5613.380181]  ? lock_release+0x225/0x2e0
[ 5613.384059]  ? __pfx_xe_exec_ioctl+0x10/0x10 [xe]
[ 5613.389092]  drm_ioctl_kernel+0xc0/0x170
[ 5613.393068]  drm_ioctl+0x1b7/0x490
[ 5613.396519]  ? __pfx_xe_exec_ioctl+0x10/0x10 [xe]
[ 5613.401547]  ? lock_release+0x225/0x2e0
[ 5613.405432]  __x64_sys_ioctl+0x8a/0xb0
[ 5613.409232]  do_syscall_64+0x37/0x90

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/239
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 2 +-
 drivers/gpu/drm/xe/xe_bo.h | 7 ++++---
 drivers/gpu/drm/xe/xe_pt.c | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 394e4bfae6e19..39b3b9aa7c270 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1134,7 +1134,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
 
 	if (resv) {
-		ctx.allow_res_evict = true;
+		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
 		ctx.resv = resv;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 2d08622f58a7d..7ede50f2cbf3d 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -27,9 +27,10 @@
 #define XE_BO_CREATE_GGTT_BIT		BIT(5)
 #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
 #define XE_BO_CREATE_PINNED_BIT		BIT(7)
-#define XE_BO_DEFER_BACKING		BIT(8)
-#define XE_BO_SCANOUT_BIT		BIT(9)
-#define XE_BO_FIXED_PLACEMENT_BIT	BIT(10)
+#define XE_BO_CREATE_NO_RESV_EVICT	BIT(8)
+#define XE_BO_DEFER_BACKING		BIT(9)
+#define XE_BO_SCANOUT_BIT		BIT(10)
+#define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
 /* this one is trigger internally only */
 #define XE_BO_INTERNAL_TEST		BIT(30)
 #define XE_BO_INTERNAL_64K		BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index f15282996c3b1..30de6e902a8e5 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -219,7 +219,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
 				  ttm_bo_type_kernel,
 				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
 				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
-				  XE_BO_CREATE_PINNED_BIT);
+				  XE_BO_CREATE_PINNED_BIT |
+				  XE_BO_CREATE_NO_RESV_EVICT);
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto err_kfree;
-- 
GitLab


From 61f288a8972253f4168f37331e26b6b0f7c9bc9d Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 25 May 2023 15:43:23 -0400
Subject: [PATCH 1595/2340] drm/xe: Rework size helper to be a little more
 correct

The _total_vram_size helper is device based and is not complete.

Teach the helper to be tile aware and add the ability to size
DG1 correctly.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h   |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c           | 80 ++++++++++++++++----------
 drivers/gpu/drm/xe/xe_mmio.h           |  6 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 34 +++++------
 4 files changed, 74 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5c239989608f0..d8b480f69c5f0 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -74,7 +74,7 @@
 #define VE1_AUX_INV				XE_REG(0x42b8)
 #define   AUX_INV				REG_BIT(0)
 
-#define XEHP_TILE0_ADDR_RANGE			XE_REG_MCR(0x4900)
+#define XEHP_TILE_ADDR_RANGE(_idx)		XE_REG_MCR(0x4900 + (_idx) * 4)
 #define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
 
 #define CHICKEN_RASTER_1			XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 87dd417e3f08c..5e8791bd0b16d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: MIT
 /*
- * Copyright © 2021 Intel Corporation
+ * Copyright © 2021-2023 Intel Corporation
  */
 
 #include "xe_mmio.h"
@@ -20,7 +20,6 @@
 
 #define XEHP_MTCFG_ADDR		XE_REG(0x101800)
 #define TILE_COUNT		REG_GENMASK(15, 8)
-#define GEN12_LMEM_BAR		2
 
 static int xe_set_dma_info(struct xe_device *xe)
 {
@@ -145,34 +144,56 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 	return true;
 }
 
-int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_size)
+/**
+ * xe_mmio_tile_vram_size() - Collect vram size and offset information
+ * @gt: tile to get info for
+ * @vram_size: available vram (size - device reserved portions)
+ * @tile_size: actual vram size
+ * @tile_offset: physical start point in the vram address space
+ *
+ * There are 4 places for size information:
+ * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1)
+ * - TILEx size (actual vram size)
+ * - GSMBASE offset (TILEx - "stolen")
+ * - CSSBASE offset (TILEx - CSS space necessary)
+ *
+ * CSSBASE is always a lower/smaller offset then GSMBASE.
+ *
+ * The actual available size of memory is to the CCS or GSM base.
+ * NOTE: multi-tile bases will include the tile offset.
+ *
+ */
+int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_offset)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u64 offset;
 	int err;
-	u32 reg_val;
-
-	if (!xe->info.has_flat_ccs)  {
-		*vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-		if (usable_size)
-			*usable_size = min(*vram_size,
-					   xe_mmio_read64(gt, GSMBASE));
-		return 0;
-	}
+	u32 reg;
 
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		return err;
 
-	reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
-	*vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg_val) * SZ_1G;
-	if (usable_size) {
-		reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
-		*usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg_val) * SZ_64K;
-		drm_info(&xe->drm, "vram_size: 0x%llx usable_size: 0x%llx\n",
-			 *vram_size, *usable_size);
+	/* actual size */
+	if (unlikely(gt->xe->info.platform == XE_DG1)) {
+		*tile_size = pci_resource_len(to_pci_dev(gt->xe->drm.dev), GEN12_LMEM_BAR);
+		*tile_offset = 0;
+	} else {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
+		*tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+		*tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G;
 	}
 
+	/* minus device usage */
+	if (gt->xe->info.has_flat_ccs) {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+	} else {
+		offset = xe_mmio_read64(gt, GSMBASE);
+	}
+
+	/* remove the tile offset so we have just the available size */
+	*vram_size = offset - *tile_offset;
+
 	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 }
 
@@ -180,11 +201,12 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct xe_gt *gt;
-	u8 id;
-	u64 vram_size;
 	u64 original_size;
-	u64 usable_size;
+	u64 tile_offset;
+	u64 tile_size;
+	u64 vram_size;
 	int err;
+	u8 id;
 
 	if (!IS_DGFX(xe)) {
 		xe->mem.vram.mapping = 0;
@@ -209,25 +231,25 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	gt = xe_device_get_gt(xe, 0);
 	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 
-	err = xe_mmio_total_vram_size(xe, &vram_size, &usable_size);
+	err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset);
 	if (err)
 		return err;
 
 	xe_resize_vram_bar(xe, vram_size);
 	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
-	xe->mem.vram.io_size = min(usable_size,
+	xe->mem.vram.io_size = min(vram_size,
 				   pci_resource_len(pdev, GEN12_LMEM_BAR));
 	xe->mem.vram.size = xe->mem.vram.io_size;
 
 	if (!xe->mem.vram.size)
 		return -EIO;
 
-	if (usable_size > xe->mem.vram.io_size)
+	if (vram_size > xe->mem.vram.io_size)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (%lluMiB->%lluMiB)\n",
-			 (u64)usable_size >> 20, (u64)xe->mem.vram.io_size >> 20);
+			 (u64)vram_size >> 20, (u64)xe->mem.vram.io_size >> 20);
 
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
-	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
+	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, vram_size);
 
 	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 1407f1189b0d2..da91729a38542 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Copyright © 2021 Intel Corporation
+ * Copyright © 2021-2023 Intel Corporation
  */
 
 #ifndef _XE_MMIO_H_
@@ -16,6 +16,8 @@ struct drm_device;
 struct drm_file;
 struct xe_device;
 
+#define GEN12_LMEM_BAR		2
+
 int xe_mmio_init(struct xe_device *xe);
 
 static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
@@ -131,6 +133,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
 }
 
 int xe_mmio_probe_vram(struct xe_device *xe);
-int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base);
+int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_base);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index a3855870321f5..d49b2cfeba925 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: MIT
 /*
- * Copyright © 2021-2022 Intel Corporation
+ * Copyright © 2021-2023 Intel Corporation
  * Copyright (C) 2021-2002 Red Hat
  */
 
@@ -51,27 +51,29 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
 	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
 }
 
-static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+static s64 detect_bar2_dgfx(struct xe_gt *gt, struct xe_ttm_stolen_mgr *mgr)
 {
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	struct xe_gt *gt = to_gt(xe);
-	u64 vram_size, stolen_size;
-	int err;
-
-	err = xe_mmio_total_vram_size(xe, &vram_size, NULL);
-	if (err) {
-		drm_info(&xe->drm, "Querying total vram size failed\n");
+	struct pci_dev *pdev = to_pci_dev(gt->xe->drm.dev);
+	u64 stolen_size;
+	u64 tile_offset;
+	u64 tile_size;
+	u64 vram_size;
+
+	if (xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset)) {
+		drm_err(&gt->xe->drm, "Querying total vram size failed\n");
 		return 0;
 	}
 
 	/* Use DSM base address instead for stolen memory */
-	mgr->stolen_base = xe_mmio_read64(gt, DSMBASE) & BDSM_MASK;
-	if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base))
+	mgr->stolen_base = (xe_mmio_read64(gt, DSMBASE) & BDSM_MASK) - tile_offset;
+	if (drm_WARN_ON(&gt->xe->drm, tile_size < mgr->stolen_base))
 		return 0;
 
-	stolen_size = vram_size - mgr->stolen_base;
-	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, 2))
-		mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
+	stolen_size = tile_size - mgr->stolen_base;
+
+	/* Verify usage fits in the actual resource available */
+	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, GEN12_LMEM_BAR))
+		mgr->io_base = gt->mem.vram.io_start + mgr->stolen_base;
 
 	/*
 	 * There may be few KB of platform dependent reserved memory at the end
@@ -139,7 +141,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	int err;
 
 	if (IS_DGFX(xe))
-		stolen_size = detect_bar2_dgfx(xe, mgr);
+		stolen_size = detect_bar2_dgfx(to_gt(xe), mgr);
 	else if (GRAPHICS_VERx100(xe) >= 1270)
 		stolen_size = detect_bar2_integrated(xe, mgr);
 	else
-- 
GitLab


From 7f075300a31829a6a5a388313f1a67e31eba012e Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 25 May 2023 15:43:24 -0400
Subject: [PATCH 1596/2340] drm/xe: Simplify rebar sizing

"Right sizing" the PCI BAR is not necessary.  If rebar is needed
size to the maximum available.

Preserve the force_vram_bar_size sizing.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  14 +--
 drivers/gpu/drm/xe/xe_gt_types.h     |  12 +--
 drivers/gpu/drm/xe/xe_mmio.c         | 131 +++++++++++++++------------
 3 files changed, 89 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 91edbe4a3730e..0c31b341162a0 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Copyright © 2022 Intel Corporation
+ * Copyright © 2022-2023 Intel Corporation
  */
 
 #ifndef _XE_DEVICE_TYPES_H_
@@ -133,11 +133,13 @@ struct xe_device {
 			/**
 			 * @io_size: IO size of VRAM.
 			 *
-			 * This represents how much of VRAM we can access via
-			 * the CPU through the VRAM BAR. This can be smaller
-			 * than @size, in which case only part of VRAM is CPU
-			 * accessible (typically the first 256M). This
-			 * configuration is known as small-bar.
+			 * This represents how much of VRAM the CPU can access
+			 * via the VRAM BAR.
+			 * On systems that do not support large BAR IO space,
+			 * this can be smaller than the actual memory size, in
+			 * which case only part of VRAM is CPU accessible
+			 * (typically the first 256M).  This configuration is
+			 * known as small-bar.
 			 */
 			resource_size_t io_size;
 			/** @size: Total size of VRAM */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index b83c834e7ced3..993f855025fd5 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Copyright © 2022 Intel Corporation
+ * Copyright © 2022-2023 Intel Corporation
  */
 
 #ifndef _XE_GT_TYPES_H_
@@ -148,11 +148,11 @@ struct xe_gt {
 			/**
 			 * @io_size: IO size of this VRAM instance
 			 *
-			 * This represents how much of this VRAM we can access
-			 * via the CPU through the VRAM BAR. This can be smaller
-			 * than @size, in which case only part of VRAM is CPU
-			 * accessible (typically the first 256M). This
-			 * configuration is known as small-bar.
+			 * This represents how much of the VRAM the CPU can access
+			 * via the VRAM BAR.
+			 * This can be smaller than the actual @size, in which
+			 * case only part of VRAM is CPU accessible (typically
+			 * the first 256M). This configuration is known as small-bar.
 			 */
 			resource_size_t io_size;
 			/** @size: size of VRAM. */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 5e8791bd0b16d..665fcb23bbbb0 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -3,6 +3,8 @@
  * Copyright © 2021-2023 Intel Corporation
  */
 
+#include <linux/minmax.h>
+
 #include "xe_mmio.h"
 
 #include <drm/drm_managed.h>
@@ -21,6 +23,8 @@
 #define XEHP_MTCFG_ADDR		XE_REG(0x101800)
 #define TILE_COUNT		REG_GENMASK(15, 8)
 
+#define BAR_SIZE_SHIFT 20
+
 static int xe_set_dma_info(struct xe_device *xe)
 {
 	unsigned int mask_size = xe->info.dma_mask_size;
@@ -57,49 +61,61 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 	if (ret) {
 		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
 			 resno, 1 << bar_size, ERR_PTR(ret));
-		return -1;
+		return ret;
 	}
 
 	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
-	return 1;
+	return ret;
 }
 
-static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size)
+/*
+ * if force_vram_bar_size is set, attempt to set to the requested size
+ * else set to maximum possible size
+ */
+static int xe_resize_vram_bar(struct xe_device *xe)
 {
+	u64 force_vram_bar_size = xe_force_vram_bar_size;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct pci_bus *root = pdev->bus;
-	struct resource *root_res;
-	resource_size_t rebar_size;
 	resource_size_t current_size;
+	resource_size_t rebar_size;
+	struct resource *root_res;
+	u32 bar_size_mask;
 	u32 pci_cmd;
 	int i;
 	int ret;
-	u64 force_vram_bar_size = xe_force_vram_bar_size;
 
-	current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR));
+	/* gather some relevant info */
+	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+	bar_size_mask = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
 
+	if (!bar_size_mask)
+		return 0;
+
+	/* set to a specific size? */
 	if (force_vram_bar_size) {
-		u32 bar_sizes;
+		u32 bar_size_bit;
 
 		rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
-		bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
 
-		if (rebar_size == current_size)
-			return 0;
+		bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size));
 
-		if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) ||
-		    rebar_size >= roundup_pow_of_two(vram_size)) {
-			rebar_size = vram_size;
+		if (!bar_size_bit) {
 			drm_info(&xe->drm,
-				 "Given bar size is not within supported size, setting it to default: %lluMiB\n",
-				 (u64)vram_size >> 20);
+				 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
+				 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
+			return 0;
 		}
+
+		rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
+
+		if (rebar_size == current_size)
+			return 0;
 	} else {
-		rebar_size = current_size;
+		rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
 
-		if (rebar_size != roundup_pow_of_two(vram_size))
-			rebar_size = vram_size;
-		else
+		/* only resize if larger than current */
+		if (rebar_size <= current_size)
 			return 0;
 	}
 
@@ -144,6 +160,31 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 	return true;
 }
 
+static int xe_determine_lmem_bar_size(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int err;
+
+	if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
+		drm_err(&xe->drm, "pci resource is not valid\n");
+		return -ENXIO;
+	}
+
+	err = xe_resize_vram_bar(xe);
+	if (err)
+		return err;
+
+	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
+	xe->mem.vram.io_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+	if (!xe->mem.vram.io_size)
+		return -EIO;
+
+	/* set up a map to the total memory area. */
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
+
+	return 0;
+}
+
 /**
  * xe_mmio_tile_vram_size() - Collect vram size and offset information
  * @gt: tile to get info for
@@ -199,59 +240,37 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 
 int xe_mmio_probe_vram(struct xe_device *xe)
 {
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct xe_gt *gt;
-	u64 original_size;
 	u64 tile_offset;
 	u64 tile_size;
 	u64 vram_size;
 	int err;
 	u8 id;
 
-	if (!IS_DGFX(xe)) {
-		xe->mem.vram.mapping = 0;
-		xe->mem.vram.size = 0;
-		xe->mem.vram.io_start = 0;
-		xe->mem.vram.io_size = 0;
-
-		for_each_gt(gt, xe, id) {
-			gt->mem.vram.mapping = 0;
-			gt->mem.vram.size = 0;
-			gt->mem.vram.io_start = 0;
-			gt->mem.vram.io_size = 0;
-		}
+	if (!IS_DGFX(xe))
 		return 0;
-	}
-
-	if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
-		drm_err(&xe->drm, "pci resource is not valid\n");
-		return -ENXIO;
-	}
 
+	/* Get the size of the gt0 vram for later accessibility comparison */
 	gt = xe_device_get_gt(xe, 0);
-	original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-
 	err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset);
 	if (err)
 		return err;
 
-	xe_resize_vram_bar(xe, vram_size);
-	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
-	xe->mem.vram.io_size = min(vram_size,
-				   pci_resource_len(pdev, GEN12_LMEM_BAR));
-	xe->mem.vram.size = xe->mem.vram.io_size;
-
-	if (!xe->mem.vram.size)
-		return -EIO;
+	err = xe_determine_lmem_bar_size(xe);
+	if (err)
+		return err;
 
-	if (vram_size > xe->mem.vram.io_size)
-		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (%lluMiB->%lluMiB)\n",
-			 (u64)vram_size >> 20, (u64)xe->mem.vram.io_size >> 20);
+	/* small bar issues will only cover gt0 sizes */
+	if (xe->mem.vram.io_size < vram_size)
+		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
+			 vram_size, (u64)xe->mem.vram.io_size);
 
-	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
-	xe->mem.vram.size = min_t(u64, xe->mem.vram.size, vram_size);
+	/* Limit size to available memory to account for the current memory algorithm */
+	xe->mem.vram.io_size = min_t(u64, xe->mem.vram.io_size, vram_size);
+	xe->mem.vram.size = xe->mem.vram.io_size;
 
-	drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
+	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.io_size);
 
 	/* FIXME: Assuming equally partitioned VRAM, incorrect */
 	if (xe->info.tile_count > 1) {
-- 
GitLab


From 2d830096e41403ba67c9d066de2fb818f81d9591 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 25 May 2023 15:43:25 -0400
Subject: [PATCH 1597/2340] drm/xe: Size GT device memory correctly

The current method of sizing GT device memory is not quite right.

Update the algorithm to use the relevant HW information and offsets
to set up the sizing correctly.

Update the stolen memory sizing to reflect the changes, and to be
GT specific.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  2 +
 drivers/gpu/drm/xe/xe_gt_types.h     |  2 +
 drivers/gpu/drm/xe/xe_mmio.c         | 93 ++++++++++++++--------------
 3 files changed, 51 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 0c31b341162a0..5b3f270bf7906 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -144,6 +144,8 @@ struct xe_device {
 			resource_size_t io_size;
 			/** @size: Total size of VRAM */
 			resource_size_t size;
+			/** @base: Offset to apply for Device Physical Address control */
+			resource_size_t base;
 			/** @mapping: pointer to VRAM mappable space */
 			void *__iomem mapping;
 		} vram;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 993f855025fd5..093d650c35f4d 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -155,6 +155,8 @@ struct xe_gt {
 			 * the first 256M). This configuration is known as small-bar.
 			 */
 			resource_size_t io_size;
+			/** @base: offset of VRAM starting base */
+			resource_size_t base;
 			/** @size: size of VRAM. */
 			resource_size_t size;
 			/** @mapping: pointer to VRAM mappable space */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 665fcb23bbbb0..d3b57669c9a76 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -179,6 +179,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
 	if (!xe->mem.vram.io_size)
 		return -EIO;
 
+	xe->mem.vram.base = 0; /* DPA offset */
+
 	/* set up a map to the total memory area. */
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
 
@@ -240,6 +242,9 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 
 int xe_mmio_probe_vram(struct xe_device *xe)
 {
+	resource_size_t io_size;
+	u64 available_size = 0;
+	u64 total_size = 0;
 	struct xe_gt *gt;
 	u64 tile_offset;
 	u64 tile_size;
@@ -265,64 +270,60 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
 			 vram_size, (u64)xe->mem.vram.io_size);
 
-	/* Limit size to available memory to account for the current memory algorithm */
-	xe->mem.vram.io_size = min_t(u64, xe->mem.vram.io_size, vram_size);
-	xe->mem.vram.size = xe->mem.vram.io_size;
-
 	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
 		 &xe->mem.vram.io_size);
 
-	/* FIXME: Assuming equally partitioned VRAM, incorrect */
-	if (xe->info.tile_count > 1) {
-		u8 adj_tile_count = xe->info.tile_count;
-		resource_size_t size, io_start, io_size;
+	io_size = xe->mem.vram.io_size;
 
-		for_each_gt(gt, xe, id)
-			if (xe_gt_is_media_type(gt))
-				--adj_tile_count;
+	/* gt specific ranges */
+	for_each_gt(gt, xe, id) {
+		if (xe_gt_is_media_type(gt))
+			continue;
 
-		XE_BUG_ON(!adj_tile_count);
+		err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset);
+		if (err)
+			return err;
 
-		size = xe->mem.vram.size / adj_tile_count;
-		io_start = xe->mem.vram.io_start;
-		io_size = xe->mem.vram.io_size;
+		gt->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
+		gt->mem.vram.io_size = min_t(u64, vram_size, io_size);
 
-		for_each_gt(gt, xe, id) {
-			if (id && !xe_gt_is_media_type(gt)) {
-				io_size -= min(io_size, size);
-				io_start += io_size;
-			}
+		if (!gt->mem.vram.io_size) {
+			drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+			return -ENODEV;
+		}
 
-			gt->mem.vram.size = size;
-
-			/*
-			 * XXX: multi-tile small-bar might be wild. Hopefully
-			 * full tile without any mappable vram is not something
-			 * we care about.
-			 */
-
-			gt->mem.vram.io_size = min(size, io_size);
-			if (io_size) {
-				gt->mem.vram.io_start = io_start;
-				gt->mem.vram.mapping = xe->mem.vram.mapping +
-					(io_start - xe->mem.vram.io_start);
-			} else {
-				drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
-				return -ENODEV;
-			}
+		gt->mem.vram.base = tile_offset;
+
+		/* small bar can limit the visible size.  size accordingly */
+		gt->mem.vram.size = min_t(u64, vram_size, io_size);
+		gt->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
-			drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
-				 id, gt->info.vram_id, &gt->mem.vram.io_start,
-				 &gt->mem.vram.size);
+		drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, gt->info.vram_id,
+			 &gt->mem.vram.io_start, &gt->mem.vram.size);
+
+		if (gt->mem.vram.io_size < gt->mem.vram.size)
+			drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id,
+				 gt->info.vram_id, &gt->mem.vram.io_size);
+
+		/* calculate total size using tile size to get the correct HW sizing */
+		total_size += tile_size;
+		available_size += vram_size;
+
+		if (total_size > xe->mem.vram.io_size) {
+			drm_warn(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+				 &total_size, &xe->mem.vram.io_size);
 		}
-	} else {
-		gt->mem.vram.size = xe->mem.vram.size;
-		gt->mem.vram.io_start = xe->mem.vram.io_start;
-		gt->mem.vram.io_size = xe->mem.vram.io_size;
-		gt->mem.vram.mapping = xe->mem.vram.mapping;
 
-		drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);
+		io_size -= min_t(u64, tile_size, io_size);
 	}
+
+	xe->mem.vram.size = total_size;
+
+	drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.size);
+	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &available_size);
+
 	return 0;
 }
 
-- 
GitLab


From fb31517cd712f9a29608bc24fbcaf45d14e9c40e Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 25 May 2023 15:43:26 -0400
Subject: [PATCH 1598/2340] drm/xe: Rename GPU offset helper to reflect true
 usage

The _io_offset helper function is returning an offset into the GPU
address space.  Using the CPU address offset (io_) is not correct.

Rename to reflect usage.
Update to use GPU offset information.
Update PT dma_offset to use the helper

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c      | 8 ++++----
 drivers/gpu/drm/xe/xe_bo.h      | 2 +-
 drivers/gpu/drm/xe/xe_migrate.c | 4 ++--
 drivers/gpu/drm/xe/xe_pt.c      | 5 +----
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 39b3b9aa7c270..e766f8955718f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1341,7 +1341,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
  * XXX: This is in the VM bind data path, likely should calculate this once and
  * store, with a recalculation if the BO is moved.
  */
-uint64_t vram_region_io_offset(struct ttm_resource *res)
+uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 	struct xe_gt *gt = mem_type_to_gt(xe, res->mem_type);
@@ -1349,7 +1349,7 @@ uint64_t vram_region_io_offset(struct ttm_resource *res)
 	if (res->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_gpu_offset(xe);
 
-	return gt->mem.vram.io_start - xe->mem.vram.io_start;
+	return xe->mem.vram.base + gt->mem.vram.base;
 }
 
 /**
@@ -1433,7 +1433,7 @@ int xe_bo_pin(struct xe_bo *bo)
 			XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
 
 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) -
-				       vram_region_io_offset(bo->ttm.resource)) >> PAGE_SHIFT;
+				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
 
 			spin_lock(&xe->pinned.lock);
@@ -1580,7 +1580,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
 
 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
 			     page_size, &cur);
-		return cur.start + offset + vram_region_io_offset(bo->ttm.resource);
+		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 7ede50f2cbf3d..e6d08fa9c992b 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -227,7 +227,7 @@ void xe_bo_vunmap(struct xe_bo *bo);
 bool mem_type_is_vram(u32 mem_type);
 bool xe_bo_is_vram(struct xe_bo *bo);
 bool xe_bo_is_stolen(struct xe_bo *bo);
-uint64_t vram_region_io_offset(struct ttm_resource *res);
+uint64_t vram_region_gpu_offset(struct ttm_resource *res);
 
 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9a676287e7415..7a2188f02a86d 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -419,7 +419,7 @@ static u32 pte_update_size(struct xe_migrate *m,
 	} else {
 		/* Offset into identity map. */
 		*L0_ofs = xe_migrate_vram_ofs(cur->start +
-					      vram_region_io_offset(res));
+					      vram_region_gpu_offset(res));
 		cmds += cmd_size;
 	}
 
@@ -469,7 +469,7 @@ static void emit_pte(struct xe_migrate *m,
 					addr |= XE_PTE_PS64;
 				}
 
-				addr += vram_region_io_offset(bo->ttm.resource);
+				addr += vram_region_gpu_offset(bo->ttm.resource);
 				addr |= XE_PPGTT_PTE_LM;
 			}
 			addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 30de6e902a8e5..2a5481111a5f3 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -759,13 +759,10 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
 	int ret;
 
 	if (is_vram) {
-		struct xe_gt *bo_gt = xe_bo_to_gt(bo);
-
 		xe_walk.default_pte = XE_PPGTT_PTE_LM;
 		if (vma && vma->use_atomic_access_pte_bit)
 			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
-		xe_walk.dma_offset = bo_gt->mem.vram.io_start -
-			gt_to_xe(gt)->mem.vram.io_start;
+		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
 		xe_walk.cache = XE_CACHE_WB;
 	} else {
 		if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT)
-- 
GitLab


From 4e40483644098ef75ea1344e5cdc9285e30c28ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Tue, 23 May 2023 13:14:45 -0700
Subject: [PATCH 1599/2340] drm/xe: Replace PVC check by engine type check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__emit_job_gen12_render_compute() masks some PIPE_CONTROL bits that
do not exist in platforms without render engine.
So here replacing the PVC check by something more generic that will
support any future platforms without render engine.

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index a09ee8c736b53..a70fa6d9ae606 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -249,11 +249,11 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	u32 ppgtt_flag = get_ppgtt_flag(job);
 	struct xe_gt *gt = job->engine->gt;
 	struct xe_device *xe = gt_to_xe(gt);
-	bool pvc = xe->info.platform == XE_PVC;
+	bool lacks_render = !(xe->gt[0].info.engine_mask & XE_HW_ENGINE_RCS_MASK);
 	u32 mask_flags = 0;
 
 	dw[i++] = preparser_disable(true);
-	if (pvc)
+	if (lacks_render)
 		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
 	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
 		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
@@ -275,7 +275,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 						job->user_fence.value,
 						dw, i);
 
-	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i);
+	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
 
 	i = emit_user_interrupt(dw, i);
 
-- 
GitLab


From dbd6c64c99a8eb5ed85adec5a24e30a62ace7b91 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 1 Jun 2023 13:35:05 +0100
Subject: [PATCH 1600/2340] drm/xe/vm: fix double list add
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It looks like the driver only wants to track one vma for each external
object per vm. However it looks like bo_has_vm_references_locked() will
ignore any vma that is marked as vma->destroyed (not actually destroyed
yet). If we then mark our externally tracked vma as destroyed and then
create a new vma for the same object and vm, we can have two externally
tracked vma for the same object and vm. When the destroy actually
happens it tries to move the external tracking to a different vma, but
in this case it is already being tracked, leading to double list add
errors. It should be safe to simply drop the destroyed check in
bo_has_vm_references(), since the actual destroy will switch the
external tracking to the next available vma.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/290
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ffa102870d1fd..5af370640fb15 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -964,7 +964,7 @@ bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
 	struct xe_vma *vma;
 
 	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		if (vma != ignore && vma->vm == vm && !vma->destroyed)
+		if (vma != ignore && vma->vm == vm)
 			return vma;
 	}
 
-- 
GitLab


From dbc4f5d15a8eecf0f5e7ba1a8e563c31237f6adb Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:14 -0700
Subject: [PATCH 1601/2340] drm/xe/mtl: Disable media GT

Xe incorrectly conflates the concept of 'tile' and 'GT.'  Since MTL's
media support is not yet functioning properly, let's just disable it
completely for now while we fix the fundamental driver design.  Support
for media GTs on platforms like MTL will be re-added later.

v2:
 - Drop some unrelated code cleanup that didn't belong in this patch.
   (Lucas)
v3:
 - Drop unnecessary xe_gt.h include.  (Gustavo)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c |  2 --
 drivers/gpu/drm/xe/xe_pci.c  | 12 ------------
 2 files changed, 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index d3b57669c9a76..ef2353eef6fec 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -340,8 +340,6 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR);
 	adj_tile_count = xe->info.tile_count =
 		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
-	if (xe->info.media_verx100 >= 1300)
-		xe->info.tile_count *= 2;
 
 	drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
 		 xe->info.tile_count, adj_tile_count);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 50027eb642ea6..c8784f41506ed 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -275,20 +275,10 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.extra_gts = pvc_gts,
 };
 
-static const struct xe_gt_desc xelpmp_gts[] = {
-	{
-		.type = XE_GT_TYPE_MEDIA,
-		.vram_id = 0,
-		.mmio_adj_limit = 0x40000,
-		.mmio_adj_offset = 0x380000,
-	},
-};
-
 static const struct xe_device_desc mtl_desc = {
 	/* .graphics and .media determined via GMD_ID */
 	.require_force_probe = true,
 	PLATFORM(XE_METEORLAKE),
-	.extra_gts = xelpmp_gts,
 };
 
 #undef PLATFORM
@@ -545,8 +535,6 @@ static int xe_info_init(struct xe_device *xe,
 	 * treats it as the number of GTs rather than just the number of tiles.
 	 */
 	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
-	if (MEDIA_VER(xe) >= 13)
-		xe->info.tile_count++;
 
 	for (id = 0; id < xe->info.tile_count; ++id) {
 		gt = xe->gt + id;
-- 
GitLab


From a5edc7cdb3875115d1798f4d2057569cf257e7d2 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:15 -0700
Subject: [PATCH 1602/2340] drm/xe: Introduce xe_tile

Create a new xe_tile structure to begin separating the concept of "tile"
from "GT."  A tile is effectively a complete GPU, and a GT is just one
part of that.  On platforms like MTL, there's only a single full GPU
(tile) which has its IP blocks provided by two GTs.  In contrast, a
"multi-tile" platform like PVC is basically multiple complete GPUs
packed behind a single PCI device.

For now, just create xe_tile as a simple wrapper around xe_gt.  The
items in xe_gt that are truly tied to the tile rather than the GT will
be moved in future patches.  Support for multiple GTs per tile (i.e.,
the MTL standalone media case) will also be re-introduced in a future
patch.

v2:
 - Fix kunit test build
 - Move hunk from next patch to use local tile variable rather than
   direct xe->tiles[id] accesses.  (Lucas)
 - Mention compute in kerneldoc.  (Rodrigo)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c       |  2 +-
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  6 ++--
 drivers/gpu/drm/xe/xe_device.h         | 11 +++++--
 drivers/gpu/drm/xe/xe_device_types.h   | 40 ++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_gt_types.h       | 15 ++++++----
 drivers/gpu/drm/xe/xe_mmio.c           | 13 +++++----
 drivers/gpu/drm/xe/xe_pci.c            |  7 ++++-
 drivers/gpu/drm/xe/xe_ring_ops.c       |  2 +-
 drivers/gpu/drm/xe/xe_vm.c             |  2 +-
 drivers/gpu/drm/xe/xe_vm_types.h       |  8 +++---
 10 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9bd381e5b7a6a..6075f12a19626 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -174,7 +174,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 	struct xe_bo *bo, *external;
 	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
 		XE_BO_CREATE_VRAM_IF_DGFX(gt);
-	struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate);
+	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate);
 	struct ww_acquire_ctx ww;
 	int err, i;
 
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index ab6f7a47db502..45f2614f91ec1 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -13,6 +13,7 @@
 
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_reg_defs.h"
+#include "xe_device.h"
 #include "xe_device_types.h"
 #include "xe_pci_test.h"
 #include "xe_reg_sr.h"
@@ -236,9 +237,10 @@ static void xe_rtp_process_tests(struct kunit *test)
 {
 	const struct rtp_test_case *param = test->param_value;
 	struct xe_device *xe = test->priv;
-	struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr;
+	struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt;
+	struct xe_reg_sr *reg_sr = &gt->reg_sr;
 	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
-	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
 	unsigned long idx, count = 0;
 
 	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index cbae480a20922..f7acaf51a1fc7 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -48,12 +48,17 @@ static inline struct xe_file *to_xe_file(const struct drm_file *file)
 	return file->driver_priv;
 }
 
+static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
+{
+	return &xe->tiles[0];
+}
+
 static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
 {
 	struct xe_gt *gt;
 
-	XE_BUG_ON(gt_id > XE_MAX_GT);
-	gt = xe->gt + gt_id;
+	XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE);
+	gt = &xe->tiles[gt_id].primary_gt;
 	XE_BUG_ON(gt->info.id != gt_id);
 	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
 
@@ -65,7 +70,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
  */
 static inline struct xe_gt *to_gt(struct xe_device *xe)
 {
-	return xe->gt;
+	return &xe_device_get_root_tile(xe)->primary_gt;
 }
 
 static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5b3f270bf7906..b76344a9c33b0 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -29,7 +29,7 @@
 
 #define XE_GT0		0
 #define XE_GT1		1
-#define XE_MAX_GT	(XE_GT1 + 1)
+#define XE_MAX_TILES_PER_DEVICE	(XE_GT1 + 1)
 
 #define XE_MAX_ASID	(BIT(20))
 
@@ -43,6 +43,40 @@
 	 (_xe)->info.step.graphics >= (min_step) &&			\
 	 (_xe)->info.step.graphics < (max_step))
 
+#define tile_to_xe(tile__)								\
+	_Generic(tile__,								\
+		 const struct xe_tile *: (const struct xe_device *)((tile__)->xe),	\
+		 struct xe_tile *: (tile__)->xe)
+
+/**
+ * struct xe_tile - hardware tile structure
+ *
+ * From a driver perspective, a "tile" is effectively a complete GPU, containing
+ * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM.
+ *
+ * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI
+ * device and designate one "root" tile as being responsible for external PCI
+ * communication.  PCI BAR0 exposes the GGTT and MMIO register space for each
+ * tile in a stacked layout, and PCI BAR2 exposes the local memory associated
+ * with each tile similarly.  Device-wide interrupts can be enabled/disabled
+ * at the root tile, and the MSTR_TILE_INTR register will report which tiles
+ * have interrupts that need servicing.
+ */
+struct xe_tile {
+	/** @xe: Backpointer to tile's PCI device */
+	struct xe_device *xe;
+
+	/** @id: ID of the tile */
+	u8 id;
+
+	/**
+	 * @primary_gt: Primary GT
+	 */
+	struct xe_gt primary_gt;
+
+	/* TODO: Add media GT here */
+};
+
 /**
  * struct xe_device - Top level struct of XE device
  */
@@ -193,8 +227,8 @@ struct xe_device {
 	/** @ordered_wq: used to serialize compute mode resume */
 	struct workqueue_struct *ordered_wq;
 
-	/** @gt: graphics tile */
-	struct xe_gt gt[XE_MAX_GT];
+	/** @tiles: device tiles */
+	struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
 
 	/**
 	 * @mem_access: keep track of memory access in the device, possibly
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 093d650c35f4d..456e3e447a2ea 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -77,12 +77,17 @@ enum xe_steering_type {
 };
 
 /**
- * struct xe_gt - Top level struct of a graphics tile
+ * struct xe_gt - A "Graphics Technology" unit of the GPU
  *
- * A graphics tile may be a physical split (duplicate pieces of silicon,
- * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way
- * this structure encapsulates of everything a GT is (MMIO, VRAM, memory
- * management, microcontrols, and a hardware set of engines).
+ * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
+ * for implementing the graphics, compute, and/or media IP.  It encapsulates
+ * the hardware engines, programmable execution units, and GuC.   Each GT has
+ * its own handling of power management (RC6+forcewake) and multicast register
+ * steering.
+ *
+ * A GPU/tile may have a single GT that supplies all graphics, compute, and
+ * media functionality, or the graphics/compute and media may be split into
+ * separate GTs within a tile.
  */
 struct xe_gt {
 	/** @xe: backpointer to XE device */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index ef2353eef6fec..9bc5715e9ebe3 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -438,6 +438,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 		  struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
+	struct xe_gt *gt = xe_device_get_gt(xe, 0);
 	struct drm_xe_mmio *args = data;
 	unsigned int bits_flag, bytes;
 	struct xe_reg reg;
@@ -480,7 +481,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	 */
 	reg = XE_REG(args->addr);
 
-	xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
 	if (args->flags & DRM_XE_MMIO_WRITE) {
 		switch (bits_flag) {
@@ -489,10 +490,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 				ret = -EINVAL;
 				goto exit;
 			}
-			xe_mmio_write32(to_gt(xe), reg, args->value);
+			xe_mmio_write32(gt, reg, args->value);
 			break;
 		case DRM_XE_MMIO_64BIT:
-			xe_mmio_write64(to_gt(xe), reg, args->value);
+			xe_mmio_write64(gt, reg, args->value);
 			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
@@ -507,10 +508,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & DRM_XE_MMIO_READ) {
 		switch (bits_flag) {
 		case DRM_XE_MMIO_32BIT:
-			args->value = xe_mmio_read32(to_gt(xe), reg);
+			args->value = xe_mmio_read32(gt, reg);
 			break;
 		case DRM_XE_MMIO_64BIT:
-			args->value = xe_mmio_read64(to_gt(xe), reg);
+			args->value = xe_mmio_read64(gt, reg);
 			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
@@ -522,7 +523,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	}
 
 exit:
-	xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c8784f41506ed..d56b1c566d811 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -478,6 +478,7 @@ static int xe_info_init(struct xe_device *xe,
 	const struct xe_graphics_desc *graphics_desc = NULL;
 	const struct xe_media_desc *media_desc = NULL;
 	u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
+	struct xe_tile *tile;
 	struct xe_gt *gt;
 	u8 id;
 
@@ -537,7 +538,11 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
 
 	for (id = 0; id < xe->info.tile_count; ++id) {
-		gt = xe->gt + id;
+		tile = &xe->tiles[id];
+		tile->xe = xe;
+		tile->id = id;
+
+		gt = &tile->primary_gt;
 		gt->info.id = id;
 		gt->xe = xe;
 
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index a70fa6d9ae606..45117a2ab1a0c 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -249,7 +249,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	u32 ppgtt_flag = get_ppgtt_flag(job);
 	struct xe_gt *gt = job->engine->gt;
 	struct xe_device *xe = gt_to_xe(gt);
-	bool lacks_render = !(xe->gt[0].info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
 	u32 mask_flags = 0;
 
 	dw[i++] = preparser_disable(true);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 5af370640fb15..798cba1bda6bf 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3389,7 +3389,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	struct xe_device *xe = vma->vm->xe;
 	struct xe_gt *gt;
 	u32 gt_needs_invalidate = 0;
-	int seqno[XE_MAX_GT];
+	int seqno[XE_MAX_TILES_PER_DEVICE];
 	u8 id;
 	int ret;
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index fada7896867f3..203ba9d946b8d 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -159,7 +159,7 @@ struct xe_vm {
 	struct kref refcount;
 
 	/* engine used for (un)binding vma's */
-	struct xe_engine *eng[XE_MAX_GT];
+	struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE];
 
 	/** Protects @rebind_list and the page-table structures */
 	struct dma_resv resv;
@@ -167,9 +167,9 @@ struct xe_vm {
 	u64 size;
 	struct rb_root vmas;
 
-	struct xe_pt *pt_root[XE_MAX_GT];
-	struct xe_bo *scratch_bo[XE_MAX_GT];
-	struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL];
+	struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
+	struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
+	struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
 
 	/** @flags: flags for this VM, statically setup a creation time */
 #define XE_VM_FLAGS_64K			BIT(0)
-- 
GitLab


From f79ee3013ad57021f4557cd3aa964a14b5c94bd4 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:16 -0700
Subject: [PATCH 1603/2340] drm/xe: Add backpointer from gt to tile

Rather than a backpointer to the xe_device, a GT should have a
backpointer to its tile (which can then be used to lookup the device if
necessary).

The gt_to_xe() helper macro (which moves from xe_gt.h to xe_gt_types.h)
can and should still be used to jump directly from an xe_gt to
xe_device.

v2:
 - Fix kunit test build
 - Move a couple changes to the previous patch. (Lucas)

Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c            |  2 +-
 drivers/gpu/drm/xe/xe_bb.c                  |  2 +-
 drivers/gpu/drm/xe/xe_ggtt.c                | 12 ++++++------
 drivers/gpu/drm/xe/xe_gt.h                  |  5 -----
 drivers/gpu/drm/xe/xe_gt_printk.h           |  6 +++---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  8 ++++----
 drivers/gpu/drm/xe/xe_gt_types.h            | 14 ++++++++++++--
 drivers/gpu/drm/xe/xe_mmio.c                |  6 +++---
 drivers/gpu/drm/xe/xe_mocs.c                | 14 +++++++-------
 drivers/gpu/drm/xe/xe_pci.c                 |  2 +-
 drivers/gpu/drm/xe/xe_pt.c                  |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c      |  6 +++---
 12 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 6075f12a19626..8f3afdc6cca6a 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -90,7 +90,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 	}
 
 	/* Check last CCS value, or at least last value in page. */
-	offset = xe_device_ccs_bytes(gt->xe, bo->size);
+	offset = xe_device_ccs_bytes(gt_to_xe(gt), bo->size);
 	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
 	if (cpu_map[offset] != get_val) {
 		KUNIT_FAIL(test,
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 3deb2d55f4218..bf7c94b769d7f 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -16,7 +16,7 @@
 
 static int bb_prefetch(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt->xe;
+	struct xe_device *xe = gt_to_xe(gt);
 
 	if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
 		/*
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 1ed22b5f89adf..4eefb2b3166ce 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -142,14 +142,14 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
 	u64 start, end;
 
 	/* Display may have allocated inside ggtt, so be careful with clearing here */
-	xe_device_mem_access_get(ggtt->gt->xe);
+	xe_device_mem_access_get(gt_to_xe(ggtt->gt));
 	mutex_lock(&ggtt->lock);
 	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
 		xe_ggtt_clear(ggtt, start, end - start);
 
 	xe_ggtt_invalidate(ggtt->gt);
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(ggtt->gt->xe);
+	xe_device_mem_access_put(gt_to_xe(ggtt->gt));
 }
 
 int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
@@ -286,14 +286,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 	if (err)
 		return err;
 
-	xe_device_mem_access_get(ggtt->gt->xe);
+	xe_device_mem_access_get(gt_to_xe(ggtt->gt));
 	mutex_lock(&ggtt->lock);
 	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
 					  alignment, 0, start, end, 0);
 	if (!err)
 		xe_ggtt_map_bo(ggtt, bo);
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(ggtt->gt->xe);
+	xe_device_mem_access_put(gt_to_xe(ggtt->gt));
 
 	return err;
 }
@@ -322,7 +322,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
 {
-	xe_device_mem_access_get(ggtt->gt->xe);
+	xe_device_mem_access_get(gt_to_xe(ggtt->gt));
 	mutex_lock(&ggtt->lock);
 
 	xe_ggtt_clear(ggtt, node->start, node->size);
@@ -332,7 +332,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
 	xe_ggtt_invalidate(ggtt->gt);
 
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(ggtt->gt->xe);
+	xe_device_mem_access_put(gt_to_xe(ggtt->gt));
 }
 
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 086369f7ee6df..f4e98f499b360 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -49,11 +49,6 @@ static inline bool xe_gt_is_media_type(struct xe_gt *gt)
 	return gt->info.type == XE_GT_TYPE_MEDIA;
 }
 
-#define gt_to_xe(gt__)								\
-	_Generic(gt__,								\
-		 const struct xe_gt *: (const struct xe_device *)((gt__)->xe),	\
-		 struct xe_gt *: (gt__)->xe)
-
 static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
 {
 	struct xe_device *xe = gt_to_xe(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 0b801429cf1a1..5991bcadd47e0 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -11,7 +11,7 @@
 #include "xe_device_types.h"
 
 #define xe_gt_printk(_gt, _level, _fmt, ...) \
-	drm_##_level(&(_gt)->xe->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+	drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
 
 #define xe_gt_err(_gt, _fmt, ...) \
 	xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
@@ -32,10 +32,10 @@
 	xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
 
 #define xe_gt_WARN(_gt, _condition, _fmt, ...) \
-	drm_WARN(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+	drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
 
 #define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
-	drm_WARN_ONCE(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+	drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
 
 #define xe_gt_WARN_ON(_gt, _condition) \
 	xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 44e442bf306ca..2fcb477604e20 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -248,9 +248,9 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 
 	XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
 
-	xe_device_mem_access_get(gt->xe);
+	xe_device_mem_access_get(xe);
 	ret = send_tlb_invalidation(&gt->uc.guc, fence, action, len);
-	xe_device_mem_access_put(gt->xe);
+	xe_device_mem_access_put(xe);
 
 	return ret;
 }
@@ -328,8 +328,8 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		TLB_INVALIDATION_SEQNO_MAX;
 	if (!expected_seqno)
 		expected_seqno = 1;
-	if (drm_WARN_ON(&gt->xe->drm, expected_seqno != msg[0])) {
-		drm_err(&gt->xe->drm, "TLB expected_seqno(%d) != msg(%u)\n",
+	if (drm_WARN_ON(&gt_to_xe(gt)->drm, expected_seqno != msg[0])) {
+		drm_err(&gt_to_xe(gt)->drm, "TLB expected_seqno(%d) != msg(%u)\n",
 			expected_seqno, msg[0]);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 456e3e447a2ea..11605a99ad665 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -76,6 +76,16 @@ enum xe_steering_type {
 	NUM_STEERING_TYPES
 };
 
+#define gt_to_tile(gt__)							\
+	_Generic(gt__,								\
+		 const struct xe_gt *: (const struct xe_tile *)((gt__)->tile),	\
+		 struct xe_gt *: (gt__)->tile)
+
+#define gt_to_xe(gt__)										\
+	_Generic(gt__,										\
+		 const struct xe_gt *: (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
+		 struct xe_gt *: gt_to_tile(gt__)->xe)
+
 /**
  * struct xe_gt - A "Graphics Technology" unit of the GPU
  *
@@ -90,8 +100,8 @@ enum xe_steering_type {
  * separate GTs within a tile.
  */
 struct xe_gt {
-	/** @xe: backpointer to XE device */
-	struct xe_device *xe;
+	/** @tile: Backpointer to GT's tile */
+	struct xe_tile *tile;
 
 	/** @info: GT info */
 	struct {
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 9bc5715e9ebe3..79f902d2faea2 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -217,8 +217,8 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 		return err;
 
 	/* actual size */
-	if (unlikely(gt->xe->info.platform == XE_DG1)) {
-		*tile_size = pci_resource_len(to_pci_dev(gt->xe->drm.dev), GEN12_LMEM_BAR);
+	if (unlikely(gt_to_xe(gt)->info.platform == XE_DG1)) {
+		*tile_size = pci_resource_len(to_pci_dev(gt_to_xe(gt)->drm.dev), GEN12_LMEM_BAR);
 		*tile_offset = 0;
 	} else {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
@@ -227,7 +227,7 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 	}
 
 	/* minus device usage */
-	if (gt->xe->info.has_flat_ccs) {
+	if (gt_to_xe(gt)->info.has_flat_ccs) {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
 		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
 	} else {
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index c7a9e733ef3b1..86277ecb749bc 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -472,7 +472,7 @@ static void __init_mocs_table(struct xe_gt *gt,
 	unsigned int i;
 	u32 mocs;
 
-	mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
 	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
 		      "Unused entries index should have been defined\n");
 	for (i = 0;
@@ -480,7 +480,7 @@ static void __init_mocs_table(struct xe_gt *gt,
 	     i++) {
 		struct xe_reg reg = XE_REG(addr + i * 4);
 
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs);
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs);
 		xe_mmio_write32(gt, reg, mocs);
 	}
 }
@@ -509,13 +509,13 @@ static void init_l3cc_table(struct xe_gt *gt,
 	unsigned int i;
 	u32 l3cc;
 
-	mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
 	for (i = 0;
 	     i < (info->n_entries + 1) / 2 ?
 	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
 				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr,
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr,
 			 l3cc);
 		xe_mmio_write32(gt, LNCFCMOCS(i), l3cc);
 	}
@@ -525,7 +525,7 @@ void xe_mocs_init_early(struct xe_gt *gt)
 {
 	struct xe_mocs_info table;
 
-	get_mocs_settings(gt->xe, &table);
+	get_mocs_settings(gt_to_xe(gt), &table);
 	gt->mocs.uc_index = table.uc_index;
 	gt->mocs.wb_index = table.wb_index;
 }
@@ -538,8 +538,8 @@ void xe_mocs_init(struct xe_gt *gt)
 	/*
 	 * LLC and eDRAM control values are not applicable to dgfx
 	 */
-	flags = get_mocs_settings(gt->xe, &table);
-	mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
+	flags = get_mocs_settings(gt_to_xe(gt), &table);
+	mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
 
 	if (flags & HAS_GLOBAL_MOCS)
 		__init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index d56b1c566d811..f0db422def9d2 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -544,7 +544,7 @@ static int xe_info_init(struct xe_device *xe,
 
 		gt = &tile->primary_gt;
 		gt->info.id = id;
-		gt->xe = xe;
+		gt->tile = tile;
 
 		if (id == 0) {
 			gt->info.type = XE_GT_TYPE_MAIN;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 2a5481111a5f3..e2cd1946af5a4 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -696,7 +696,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		 * TODO: Suballocate the pt bo to avoid wasting a lot of
 		 * memory.
 		 */
-		if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 &&
+		if (GRAPHICS_VERx100(gt_to_xe(xe_walk->gt)) >= 1250 && level == 1 &&
 		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
 			walk->shifts = xe_compact_pt_shifts;
 			flags |= XE_PDE_64K;
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index d49b2cfeba925..49470f0722bd4 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -53,20 +53,20 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
 
 static s64 detect_bar2_dgfx(struct xe_gt *gt, struct xe_ttm_stolen_mgr *mgr)
 {
-	struct pci_dev *pdev = to_pci_dev(gt->xe->drm.dev);
+	struct pci_dev *pdev = to_pci_dev(gt_to_xe(gt)->drm.dev);
 	u64 stolen_size;
 	u64 tile_offset;
 	u64 tile_size;
 	u64 vram_size;
 
 	if (xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset)) {
-		drm_err(&gt->xe->drm, "Querying total vram size failed\n");
+		drm_err(&gt_to_xe(gt)->drm, "Querying total vram size failed\n");
 		return 0;
 	}
 
 	/* Use DSM base address instead for stolen memory */
 	mgr->stolen_base = (xe_mmio_read64(gt, DSMBASE) & BDSM_MASK) - tile_offset;
-	if (drm_WARN_ON(&gt->xe->drm, tile_size < mgr->stolen_base))
+	if (drm_WARN_ON(&gt_to_xe(gt)->drm, tile_size < mgr->stolen_base))
 		return 0;
 
 	stolen_size = tile_size - mgr->stolen_base;
-- 
GitLab


From 3643e6371542cc4782d3700f07130c9d250666d8 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:18 -0700
Subject: [PATCH 1604/2340] drm/xe: Add for_each_tile iterator

As we start splitting tile handling out from GT handling, we'll need to
be able to iterate over tiles separately from GTs.  This iterator will
be used in upcoming patches.

v2:
 - s/(id__++)/(id__)++/  (Gustavo)

Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-6-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h | 4 ++++
 drivers/gpu/drm/xe/xe_pci.c    | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index f7acaf51a1fc7..3516ac1dcbc46 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -83,6 +83,10 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe)
 	xe->info.enable_guc = false;
 }
 
+#define for_each_tile(tile__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \
+		for_each_if ((tile__) = &(xe__)->tiles[(id__)])
+
 #define for_each_gt(gt__, xe__, id__) \
 	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \
 		for_each_if ((gt__) = xe_device_get_gt((xe__), (id__)))
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f0db422def9d2..e8931661c0041 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -537,8 +537,7 @@ static int xe_info_init(struct xe_device *xe,
 	 */
 	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
 
-	for (id = 0; id < xe->info.tile_count; ++id) {
-		tile = &xe->tiles[id];
+	for_each_tile(tile, xe, id) {
 		tile->xe = xe;
 		tile->id = id;
 
-- 
GitLab


From 3b0d4a5579968f1c42044142a4997bab9fe7ffed Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:19 -0700
Subject: [PATCH 1605/2340] drm/xe: Move register MMIO into xe_tile

Each tile has its own register region in the BAR, containing instances
of all registers for the platform.  In contrast, the multiple GTs within
a tile share the same MMIO space; there's just a small subset of
registers (the GSI registers) which have multiple copies at different
offsets (0x0 for primary GT, 0x380000 for media GT).  Move the register
MMIO region size/pointers to the tile structure, leaving just the GSI
offset information in the GT structure.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-7-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++++++++++++
 drivers/gpu/drm/xe/xe_ggtt.c         |  3 ++-
 drivers/gpu/drm/xe/xe_gt_types.h     |  9 +++------
 drivers/gpu/drm/xe/xe_mmio.c         | 26 ++++++++++++++------------
 drivers/gpu/drm/xe/xe_mmio.h         | 21 ++++++++++++++++-----
 5 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index b76344a9c33b0..107a947a73618 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -75,6 +75,22 @@ struct xe_tile {
 	struct xe_gt primary_gt;
 
 	/* TODO: Add media GT here */
+
+	/**
+	 * @mmio: MMIO info for a tile.
+	 *
+	 * Each tile has its own 16MB space in BAR0, laid out as:
+	 * * 0-4MB: registers
+	 * * 4MB-8MB: reserved
+	 * * 8MB-16MB: global GTT
+	 */
+	struct {
+		/** @size: size of tile's MMIO space */
+		size_t size;
+
+		/** @regs: pointer to tile's MMIO space (starting with registers) */
+		void *regs;
+	} mmio;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 4eefb2b3166ce..cd8ada94e688e 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -93,6 +93,7 @@ static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
 int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	unsigned int gsm_size;
 
@@ -106,7 +107,7 @@ int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
 		return -ENOMEM;
 	}
 
-	ggtt->gsm = gt->mmio.regs + SZ_8M;
+	ggtt->gsm = tile->mmio.regs + SZ_8M;
 	ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE;
 
 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 11605a99ad665..81e6ab0c77e09 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -124,14 +124,11 @@ struct xe_gt {
 	} info;
 
 	/**
-	 * @mmio: mmio info for GT, can be subset of the global device mmio
-	 * space
+	 * @mmio: mmio info for GT.  All GTs within a tile share the same
+	 * register space, but have their own copy of GSI registers at a
+	 * specific offset, as well as their own forcewake handling.
 	 */
 	struct {
-		/** @size: size of MMIO space on GT */
-		size_t size;
-		/** @regs: pointer to MMIO space on GT */
-		void *regs;
 		/** @fw: force wake for GT */
 		struct xe_force_wake fw;
 		/**
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 79f902d2faea2..b27103080ca96 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -346,6 +346,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 
 	if (xe->info.tile_count > 1) {
 		const int mmio_bar = 0;
+		struct xe_tile *tile;
 		size_t size;
 		void *regs;
 
@@ -359,11 +360,11 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 		size = xe->mmio.size / adj_tile_count;
 		regs = xe->mmio.regs;
 
-		for_each_gt(gt, xe, id) {
-			if (id && !xe_gt_is_media_type(gt))
-				regs += size;
-			gt->mmio.size = size;
-			gt->mmio.regs = regs;
+		for_each_tile(tile, xe, id) {
+			tile->mmio.size = size;
+			tile->mmio.regs = regs;
+
+			regs += size;
 		}
 	}
 }
@@ -379,15 +380,16 @@ static void mmio_fini(struct drm_device *drm, void *arg)
 
 int xe_mmio_init(struct xe_device *xe)
 {
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
 	struct xe_gt *gt = xe_device_get_gt(xe, 0);
 	const int mmio_bar = 0;
 	int err;
 
 	/*
-	 * Map the entire BAR, which includes registers (0-4MB), reserved space
-	 * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs,
-	 * GGTTs) will derive the pointers they need from the mapping in the
-	 * device structure.
+	 * Map the first 16MB of th BAR, which includes the registers (0-4MB),
+	 * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile.
+	 * This will get remapped later if we determine that we're running
+	 * on a multi-tile system.
 	 */
 	xe->mmio.size = SZ_16M;
 	xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar,
@@ -401,9 +403,9 @@ int xe_mmio_init(struct xe_device *xe)
 	if (err)
 		return err;
 
-	/* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */
-	gt->mmio.size = xe->mmio.size;
-	gt->mmio.regs = xe->mmio.regs;
+	/* Setup first tile; other tiles (if present) will be setup later. */
+	root_tile->mmio.size = xe->mmio.size;
+	root_tile->mmio.regs = xe->mmio.regs;
 
 	/*
 	 * The boot firmware initializes local memory and assesses its health.
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index da91729a38542..0ba7aa790f0b3 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -10,6 +10,7 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "regs/xe_reg_defs.h"
+#include "xe_device_types.h"
 #include "xe_gt_types.h"
 
 struct drm_device;
@@ -22,27 +23,33 @@ int xe_mmio_init(struct xe_device *xe);
 
 static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 {
+	struct xe_tile *tile = gt_to_tile(gt);
+
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	return readb(gt->mmio.regs + reg.addr);
+	return readb(tile->mmio.regs + reg.addr);
 }
 
 static inline void xe_mmio_write32(struct xe_gt *gt,
 				   struct xe_reg reg, u32 val)
 {
+	struct xe_tile *tile = gt_to_tile(gt);
+
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	writel(val, gt->mmio.regs + reg.addr);
+	writel(val, tile->mmio.regs + reg.addr);
 }
 
 static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
 {
+	struct xe_tile *tile = gt_to_tile(gt);
+
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	return readl(gt->mmio.regs + reg.addr);
+	return readl(tile->mmio.regs + reg.addr);
 }
 
 static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
@@ -60,18 +67,22 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
 static inline void xe_mmio_write64(struct xe_gt *gt,
 				   struct xe_reg reg, u64 val)
 {
+	struct xe_tile *tile = gt_to_tile(gt);
+
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	writeq(val, gt->mmio.regs + reg.addr);
+	writeq(val, tile->mmio.regs + reg.addr);
 }
 
 static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
 {
+	struct xe_tile *tile = gt_to_tile(gt);
+
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	return readq(gt->mmio.regs + reg.addr);
+	return readq(tile->mmio.regs + reg.addr);
 }
 
 static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
-- 
GitLab


From ad703e06376d5d71acf61cac0c136b53959506bc Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:21 -0700
Subject: [PATCH 1606/2340] drm/xe: Move GGTT from GT to tile

The GGTT exists at the tile level.  When a tile contains multiple GTs,
they share the same GGTT.

v2:
 - Include some changes that were mis-squashed into the VRAM patch.
   (Gustavo)

Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-9-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  1 +
 drivers/gpu/drm/xe/xe_bo.c           |  6 ++--
 drivers/gpu/drm/xe/xe_bo_evict.c     |  8 +++--
 drivers/gpu/drm/xe/xe_device.c       | 18 +++++++---
 drivers/gpu/drm/xe/xe_device_types.h |  8 +++++
 drivers/gpu/drm/xe/xe_ggtt.c         | 42 +++++++++++-----------
 drivers/gpu/drm/xe/xe_ggtt.h         |  6 ++--
 drivers/gpu/drm/xe/xe_ggtt_types.h   |  2 +-
 drivers/gpu/drm/xe/xe_gt.c           | 10 +-----
 drivers/gpu/drm/xe/xe_gt_debugfs.c   |  2 +-
 drivers/gpu/drm/xe/xe_gt_types.h     |  3 --
 drivers/gpu/drm/xe/xe_tile.c         | 52 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_tile.h         | 14 ++++++++
 13 files changed, 125 insertions(+), 47 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_tile.c
 create mode 100644 drivers/gpu/drm/xe/xe_tile.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index a685e39d6b442..c914d02d8a8c3 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -101,6 +101,7 @@ xe-y += xe_bb.o \
 	xe_sched_job.o \
 	xe_step.o \
 	xe_sync.o \
+	xe_tile.o \
 	xe_trace.o \
 	xe_ttm_sys_mgr.o \
 	xe_ttm_stolen_mgr.o \
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e766f8955718f..915d4c4b15c42 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -964,7 +964,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 	WARN_ON(!list_empty(&bo->vmas));
 
 	if (bo->ggtt_node.size)
-		xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo);
+		xe_ggtt_remove_bo(gt_to_tile(bo->gt)->mem.ggtt, bo);
 
 	if (bo->vm && xe_bo_is_user(bo))
 		xe_vm_put(bo->vm);
@@ -1242,9 +1242,9 @@ xe_bo_create_locked_range(struct xe_device *xe,
 
 		if (flags & XE_BO_CREATE_STOLEN_BIT &&
 		    flags & XE_BO_FIXED_PLACEMENT_BIT) {
-			err = xe_ggtt_insert_bo_at(gt->mem.ggtt, bo, start);
+			err = xe_ggtt_insert_bo_at(gt_to_tile(gt)->mem.ggtt, bo, start);
 		} else {
-			err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
+			err = xe_ggtt_insert_bo(gt_to_tile(gt)->mem.ggtt, bo);
 		}
 		if (err)
 			goto err_unlock_put_bo;
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 6642c5f520096..a72963c54bf32 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -149,9 +149,11 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		}
 
 		if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
-			mutex_lock(&bo->gt->mem.ggtt->lock);
-			xe_ggtt_map_bo(bo->gt->mem.ggtt, bo);
-			mutex_unlock(&bo->gt->mem.ggtt->lock);
+			struct xe_tile *tile = gt_to_tile(bo->gt);
+
+			mutex_lock(&tile->mem.ggtt->lock);
+			xe_ggtt_map_bo(tile->mem.ggtt, bo);
+			mutex_unlock(&tile->mem.ggtt->lock);
 		}
 
 		/*
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 2c65eb84e6e98..0657842d8db2d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -27,6 +27,7 @@
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
+#include "xe_tile.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_sys_mgr.h"
 #include "xe_vm.h"
@@ -237,14 +238,19 @@ static void xe_device_sanitize(struct drm_device *drm, void *arg)
 
 int xe_device_probe(struct xe_device *xe)
 {
+	struct xe_tile *tile;
 	struct xe_gt *gt;
 	int err;
 	u8 id;
 
 	xe->info.mem_region_mask = 1;
 
-	for_each_gt(gt, xe, id) {
-		err = xe_gt_alloc(xe, gt);
+	for_each_tile(tile, xe, id) {
+		err = xe_tile_alloc(tile);
+		if (err)
+			return err;
+
+		err = xe_gt_alloc(xe, &tile->primary_gt);
 		if (err)
 			return err;
 	}
@@ -275,8 +281,12 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_ttm_sys_mgr_init(xe);
 
-	for_each_gt(gt, xe, id) {
-		err = xe_gt_init_noalloc(gt);
+	for_each_tile(tile, xe, id) {
+		err = xe_tile_init_noalloc(tile);
+		if (err)
+			goto err_irq_shutdown;
+
+		err = xe_gt_init_noalloc(&tile->primary_gt);
 		if (err)
 			goto err_irq_shutdown;
 	}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 107a947a73618..358b70ae888dc 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -17,6 +17,8 @@
 #include "xe_platform_types.h"
 #include "xe_step_types.h"
 
+struct xe_ggtt;
+
 #define XE_BO_INVALID_OFFSET	LONG_MAX
 
 #define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
@@ -91,6 +93,12 @@ struct xe_tile {
 		/** @regs: pointer to tile's MMIO space (starting with registers) */
 		void *regs;
 	} mmio;
+
+	/** @mem: memory management info for tile */
+	struct {
+		/** @ggtt: Global graphics translation table */
+		struct xe_ggtt *ggtt;
+	} mem;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index cd8ada94e688e..ff70a01f15912 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -90,24 +90,19 @@ static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
 	xe_bo_unpin_map_no_vm(ggtt->scratch);
 }
 
-int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
+int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
-	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	unsigned int gsm_size;
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
-
-	ggtt->gt = gt;
-
 	gsm_size = probe_gsm_size(pdev);
 	if (gsm_size == 0) {
 		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
 		return -ENOMEM;
 	}
 
-	ggtt->gsm = tile->mmio.regs + SZ_8M;
+	ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M;
 	ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE;
 
 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
@@ -143,19 +138,20 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
 	u64 start, end;
 
 	/* Display may have allocated inside ggtt, so be careful with clearing here */
-	xe_device_mem_access_get(gt_to_xe(ggtt->gt));
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
 	mutex_lock(&ggtt->lock);
 	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
 		xe_ggtt_clear(ggtt, start, end - start);
 
-	xe_ggtt_invalidate(ggtt->gt);
+	xe_ggtt_invalidate(ggtt);
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(gt_to_xe(ggtt->gt));
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
 }
 
-int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
+int xe_ggtt_init(struct xe_ggtt *ggtt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+	struct xe_gt *gt = &ggtt->tile->primary_gt;
 	unsigned int flags;
 	int err;
 
@@ -195,8 +191,14 @@ err:
 #define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
 #define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
 
-void xe_ggtt_invalidate(struct xe_gt *gt)
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
 {
+	/*
+	 * TODO: Loop over each GT in tile once media GT support is
+	 * re-added
+	 */
+	struct xe_gt *gt = &ggtt->tile->primary_gt;
+
 	/* TODO: vfunc for GuC vs. non-GuC */
 
 	if (gt->uc.guc.submission_state.enabled) {
@@ -269,7 +271,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 		xe_ggtt_set_pte(ggtt, start + offset, pte);
 	}
 
-	xe_ggtt_invalidate(ggtt->gt);
+	xe_ggtt_invalidate(ggtt);
 }
 
 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
@@ -287,14 +289,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 	if (err)
 		return err;
 
-	xe_device_mem_access_get(gt_to_xe(ggtt->gt));
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
 	mutex_lock(&ggtt->lock);
 	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
 					  alignment, 0, start, end, 0);
 	if (!err)
 		xe_ggtt_map_bo(ggtt, bo);
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(gt_to_xe(ggtt->gt));
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
 
 	return err;
 }
@@ -323,17 +325,17 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
 {
-	xe_device_mem_access_get(gt_to_xe(ggtt->gt));
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
 	mutex_lock(&ggtt->lock);
 
 	xe_ggtt_clear(ggtt, node->start, node->size);
 	drm_mm_remove_node(node);
 	node->size = 0;
 
-	xe_ggtt_invalidate(ggtt->gt);
+	xe_ggtt_invalidate(ggtt);
 
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(gt_to_xe(ggtt->gt));
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
 }
 
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 3469aa2b1a02e..8e7360926bea9 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -12,9 +12,9 @@ struct drm_printer;
 
 u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset);
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
-void xe_ggtt_invalidate(struct xe_gt *gt);
-int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt);
-int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt);
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
+int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt);
+int xe_ggtt_init(struct xe_ggtt *ggtt);
 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
 
 int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index ea70aaef4b317..d34b3e7339452 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -12,7 +12,7 @@ struct xe_bo;
 struct xe_gt;
 
 struct xe_ggtt {
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 
 	u64 size;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 18eda5b1377fd..0f07f810bb1fc 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -67,11 +67,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
 	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
 
 	if (!xe_gt_is_media_type(gt)) {
-		gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt),
-					    GFP_KERNEL);
-		if (!gt->mem.ggtt)
-			return -ENOMEM;
-
 		gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr),
 						GFP_KERNEL);
 		if (!gt->mem.vram_mgr)
@@ -80,7 +75,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
 	} else {
 		struct xe_gt *full_gt = xe_find_full_gt(gt);
 
-		gt->mem.ggtt = full_gt->mem.ggtt;
 		gt->mem.vram_mgr = full_gt->mem.vram_mgr;
 	}
 
@@ -354,8 +348,6 @@ int xe_gt_init_noalloc(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
-	err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt);
-
 err_force_wake:
 	err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 	XE_WARN_ON(err2);
@@ -376,7 +368,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	xe_pat_init(gt);
 
 	if (!xe_gt_is_media_type(gt)) {
-		err = xe_ggtt_init(gt, gt->mem.ggtt);
+		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
 		if (err)
 			goto err_force_wake;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 339ecd5fad9b8..1114254bc5193 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -98,7 +98,7 @@ static int ggtt(struct seq_file *m, void *data)
 	struct xe_gt *gt = node_to_gt(m->private);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	return xe_ggtt_dump(gt->mem.ggtt, &p);
+	return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p);
 }
 
 static int register_save_restore(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 81e6ab0c77e09..c06a0b27d6fca 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -14,7 +14,6 @@
 #include "xe_uc_types.h"
 
 struct xe_engine_ops;
-struct xe_ggtt;
 struct xe_migrate;
 struct xe_ring_ops;
 struct xe_ttm_gtt_mgr;
@@ -176,8 +175,6 @@ struct xe_gt {
 		} vram;
 		/** @vram_mgr: VRAM TTM manager */
 		struct xe_ttm_vram_mgr *vram_mgr;
-		/** @ggtt: Global graphics translation table */
-		struct xe_ggtt *ggtt;
 	} mem;
 
 	/** @reset: state for GT resets */
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
new file mode 100644
index 0000000000000..7ef594f301ca7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_tile.h"
+#include "xe_ttm_vram_mgr.h"
+
+/**
+ * xe_tile_alloc - Perform per-tile memory allocation
+ * @tile: Tile to perform allocations for
+ *
+ * Allocates various per-tile data structures using DRM-managed allocations.
+ * Does not touch the hardware.
+ *
+ * Returns -ENOMEM if allocations fail, otherwise 0.
+ */
+int xe_tile_alloc(struct xe_tile *tile)
+{
+	struct drm_device *drm = &tile_to_xe(tile)->drm;
+
+	tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt),
+				      GFP_KERNEL);
+	if (!tile->mem.ggtt)
+		return -ENOMEM;
+	tile->mem.ggtt->tile = tile;
+
+	return 0;
+}
+
+/**
+ * xe_tile_init_noalloc - Init tile up to the point where allocations can happen.
+ * @tile: The tile to initialize.
+ *
+ * This function prepares the tile to allow memory allocations to VRAM, but is
+ * not allowed to allocate memory itself. This state is useful for display
+ * readout, because the inherited display framebuffer will otherwise be
+ * overwritten as it is usually put at the start of VRAM.
+ *
+ * Note that since this is tile initialization, it should not perform any
+ * GT-specific operations, and thus does not need to hold GT forcewake.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_tile_init_noalloc(struct xe_tile *tile)
+{
+	return xe_ggtt_init_noalloc(tile->mem.ggtt);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
new file mode 100644
index 0000000000000..77529ea136a66
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_H_
+#define _XE_TILE_H_
+
+struct xe_tile;
+
+int xe_tile_alloc(struct xe_tile *tile);
+int xe_tile_init_noalloc(struct xe_tile *tile);
+
+#endif
-- 
GitLab


From ebd288cba7db7097ad50a4736ded94cb0d92fadf Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:23 -0700
Subject: [PATCH 1607/2340] drm/xe: Move VRAM from GT to tile

On platforms with VRAM, the VRAM is associated with the tile, not the
GT.

v2:
 - Unsquash the GGTT handling back into its own patch.
 - Fix kunit test build
v3:
 - Tweak the "FIXME" comment to clarify that this function will be
   completely gone by the end of the series.  (Lucas)
v4:
 - Move a few changes that were supposed to be part of the GGTT patch
   back to that commit.  (Gustavo)
v5:
 - Kerneldoc parameter name fix.

Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-11-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c           |  6 +-
 drivers/gpu/drm/xe/xe_bo.c                 | 44 ++++++------
 drivers/gpu/drm/xe/xe_bo.h                 |  4 +-
 drivers/gpu/drm/xe/xe_device.c             |  4 --
 drivers/gpu/drm/xe/xe_device_types.h       | 30 ++++++++
 drivers/gpu/drm/xe/xe_gt.c                 | 82 ++--------------------
 drivers/gpu/drm/xe/xe_gt.h                 |  1 -
 drivers/gpu/drm/xe/xe_gt_pagefault.c       |  6 +-
 drivers/gpu/drm/xe/xe_gt_types.h           | 37 ----------
 drivers/gpu/drm/xe/xe_irq.c                |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c               | 53 +++++++-------
 drivers/gpu/drm/xe/xe_mmio.h               |  2 +-
 drivers/gpu/drm/xe/xe_pci.c                |  2 -
 drivers/gpu/drm/xe/xe_query.c              |  4 +-
 drivers/gpu/drm/xe/xe_res_cursor.h         |  2 +-
 drivers/gpu/drm/xe/xe_tile.c               | 33 ++++++++-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c     | 18 ++---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c       | 16 ++---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h       |  4 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h |  6 +-
 20 files changed, 151 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 8f3afdc6cca6a..6235a6c73a06d 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -115,9 +115,9 @@ static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt,
 	int ret;
 
 	/* TODO: Sanity check */
-	vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id;
+	vram_bit = XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id;
 	kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id,
-		   gt->info.vram_id);
+		   gt_to_tile(gt)->id);
 
 	bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device,
 				 vram_bit);
@@ -179,7 +179,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 	int err, i;
 
 	kunit_info(test, "Testing device %s gt id %u vram id %u\n",
-		   dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id);
+		   dev_name(xe->drm.dev), gt->info.id, gt_to_tile(gt)->id);
 
 	for (i = 0; i < 2; ++i) {
 		xe_vm_lock(vm, &ww, 0, false);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 915d4c4b15c42..8ee6bad59a75d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -71,25 +71,25 @@ static bool xe_bo_is_user(struct xe_bo *bo)
 	return bo->flags & XE_BO_CREATE_USER_BIT;
 }
 
-static struct xe_gt *
-mem_type_to_gt(struct xe_device *xe, u32 mem_type)
+static struct xe_tile *
+mem_type_to_tile(struct xe_device *xe, u32 mem_type)
 {
 	XE_BUG_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type));
 
-	return xe_device_get_gt(xe, mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0));
+	return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
 }
 
 /**
- * xe_bo_to_gt() - Get a GT from a BO's memory location
+ * xe_bo_to_tile() - Get a tile from a BO's memory location
  * @bo: The buffer object
  *
- * Get a GT from a BO's memory location, should be called on BOs in VRAM only.
+ * Get a tile from a BO's memory location, should be called on BOs in VRAM only.
  *
- * Return: xe_gt object which is closest to the BO
+ * Return: xe_tile object which is closest to the BO
  */
-struct xe_gt *xe_bo_to_gt(struct xe_bo *bo)
+struct xe_tile *xe_bo_to_tile(struct xe_bo *bo)
 {
-	return mem_type_to_gt(xe_bo_device(bo), bo->ttm.resource->mem_type);
+	return mem_type_to_tile(xe_bo_device(bo), bo->ttm.resource->mem_type);
 }
 
 static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
@@ -109,9 +109,9 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
 {
-	struct xe_gt *gt = mem_type_to_gt(xe, mem_type);
+	struct xe_tile *tile = mem_type_to_tile(xe, mem_type);
 
-	XE_BUG_ON(!gt->mem.vram.size);
+	XE_BUG_ON(!tile->mem.vram.size);
 
 	places[*c] = (struct ttm_place) {
 		.mem_type = mem_type,
@@ -362,7 +362,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 				 struct ttm_resource *mem)
 {
 	struct xe_device *xe = ttm_to_xe_device(bdev);
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 
 	switch (mem->mem_type) {
 	case XE_PL_SYSTEM:
@@ -370,15 +370,15 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 		return 0;
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1:
-		gt = mem_type_to_gt(xe, mem->mem_type);
+		tile = mem_type_to_tile(xe, mem->mem_type);
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 
-		if (gt->mem.vram.mapping &&
+		if (tile->mem.vram.mapping &&
 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-			mem->bus.addr = (u8 *)gt->mem.vram.mapping +
+			mem->bus.addr = (u8 *)tile->mem.vram.mapping +
 				mem->bus.offset;
 
-		mem->bus.offset += gt->mem.vram.io_start;
+		mem->bus.offset += tile->mem.vram.io_start;
 		mem->bus.is_iomem = true;
 
 #if  !defined(CONFIG_X86)
@@ -638,9 +638,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	if (bo->gt)
 		gt = bo->gt;
 	else if (resource_is_vram(new_mem))
-		gt = mem_type_to_gt(xe, new_mem->mem_type);
+		gt = &mem_type_to_tile(xe, new_mem->mem_type)->primary_gt;
 	else if (resource_is_vram(old_mem))
-		gt = mem_type_to_gt(xe, old_mem->mem_type);
+		gt = &mem_type_to_tile(xe, old_mem->mem_type)->primary_gt;
 
 	XE_BUG_ON(!gt);
 	XE_BUG_ON(!gt->migrate);
@@ -664,7 +664,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 			/* Create a new VMAP once kernel BO back in VRAM */
 			if (!ret && resource_is_vram(new_mem)) {
-				void *new_addr = gt->mem.vram.mapping +
+				void *new_addr = gt_to_tile(gt)->mem.vram.mapping +
 					(new_mem->start << PAGE_SHIFT);
 
 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
@@ -836,14 +836,14 @@ static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 {
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
-	struct xe_gt *gt = mem_type_to_gt(xe, ttm_bo->resource->mem_type);
+	struct xe_tile *tile = mem_type_to_tile(xe, ttm_bo->resource->mem_type);
 	struct xe_res_cursor cursor;
 
 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
 
 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
-	return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
+	return (tile->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
 }
 
 static void __xe_bo_vunmap(struct xe_bo *bo);
@@ -1344,12 +1344,12 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
-	struct xe_gt *gt = mem_type_to_gt(xe, res->mem_type);
+	struct xe_tile *tile = mem_type_to_tile(xe, res->mem_type);
 
 	if (res->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_gpu_offset(xe);
 
-	return xe->mem.vram.base + gt->mem.vram.base;
+	return xe->mem.vram.base + tile->mem.vram.base;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index e6d08fa9c992b..6e29e45a90f24 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -22,7 +22,7 @@
 /* -- */
 #define XE_BO_CREATE_STOLEN_BIT		BIT(4)
 #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
-	(IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \
+	(IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id : \
 	 XE_BO_CREATE_SYSTEM_BIT)
 #define XE_BO_CREATE_GGTT_BIT		BIT(5)
 #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
@@ -108,7 +108,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 			      u32 bo_flags);
 
-struct xe_gt *xe_bo_to_gt(struct xe_bo *bo);
+struct xe_tile *xe_bo_to_tile(struct xe_bo *bo);
 
 static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 0657842d8db2d..50ce4e97299e4 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -285,10 +285,6 @@ int xe_device_probe(struct xe_device *xe)
 		err = xe_tile_init_noalloc(tile);
 		if (err)
 			goto err_irq_shutdown;
-
-		err = xe_gt_init_noalloc(&tile->primary_gt);
-		if (err)
-			goto err_irq_shutdown;
 	}
 
 	/* Allocate and map stolen after potential VRAM resize */
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 358b70ae888dc..9382d7f62f039 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -96,6 +96,36 @@ struct xe_tile {
 
 	/** @mem: memory management info for tile */
 	struct {
+		/**
+		 * @vram: VRAM info for tile.
+		 *
+		 * Although VRAM is associated with a specific tile, it can
+		 * still be accessed by all tiles' GTs.
+		 */
+		struct {
+			/** @io_start: IO start address of this VRAM instance */
+			resource_size_t io_start;
+			/**
+			 * @io_size: IO size of this VRAM instance
+			 *
+			 * This represents how much of this VRAM we can access
+			 * via the CPU through the VRAM BAR. This can be smaller
+			 * than @size, in which case only part of VRAM is CPU
+			 * accessible (typically the first 256M). This
+			 * configuration is known as small-bar.
+			 */
+			resource_size_t io_size;
+			/** @base: offset of VRAM starting base */
+			resource_size_t base;
+			/** @size: size of VRAM. */
+			resource_size_t size;
+			/** @mapping: pointer to VRAM mappable space */
+			void *__iomem mapping;
+		} vram;
+
+		/** @vram_mgr: VRAM TTM manager */
+		struct xe_ttm_vram_mgr *vram_mgr;
+
 		/** @ggtt: Global graphics translation table */
 		struct xe_ggtt *ggtt;
 	} mem;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0f07f810bb1fc..419fc471053c0 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -37,7 +37,6 @@
 #include "xe_ring_ops.h"
 #include "xe_sa.h"
 #include "xe_sched_job.h"
-#include "xe_ttm_vram_mgr.h"
 #include "xe_tuning.h"
 #include "xe_uc.h"
 #include "xe_vm.h"
@@ -46,58 +45,22 @@
 
 struct xe_gt *xe_find_full_gt(struct xe_gt *gt)
 {
-	struct xe_gt *search;
-	u8 id;
-
-	XE_BUG_ON(!xe_gt_is_media_type(gt));
-
-	for_each_gt(search, gt_to_xe(gt), id) {
-		if (search->info.vram_id == gt->info.vram_id)
-			return search;
-	}
-
-	XE_BUG_ON("NOT POSSIBLE");
-	return NULL;
+	/*
+	 * FIXME: Once the code is prepared for re-enabling, this function will
+	 * be gone. Just return the only possible gt for now.
+	 */
+	return gt;
 }
 
 int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
 {
-	struct drm_device *drm = &xe->drm;
-
 	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
 
-	if (!xe_gt_is_media_type(gt)) {
-		gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr),
-						GFP_KERNEL);
-		if (!gt->mem.vram_mgr)
-			return -ENOMEM;
-
-	} else {
-		struct xe_gt *full_gt = xe_find_full_gt(gt);
-
-		gt->mem.vram_mgr = full_gt->mem.vram_mgr;
-	}
-
 	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
 
 	return 0;
 }
 
-static int gt_ttm_mgr_init(struct xe_gt *gt)
-{
-	struct xe_device *xe = gt_to_xe(gt);
-	int err;
-
-	if (gt->mem.vram.size) {
-		err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr);
-		if (err)
-			return err;
-		xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1;
-	}
-
-	return 0;
-}
-
 void xe_gt_sanitize(struct xe_gt *gt)
 {
 	/*
@@ -321,41 +284,6 @@ int xe_gt_init_early(struct xe_gt *gt)
 	return 0;
 }
 
-/**
- * xe_gt_init_noalloc - Init GT up to the point where allocations can happen.
- * @gt: The GT to initialize.
- *
- * This function prepares the GT to allow memory allocations to VRAM, but is not
- * allowed to allocate memory itself. This state is useful for display readout,
- * because the inherited display framebuffer will otherwise be overwritten as it
- * is usually put at the start of VRAM.
- *
- * Returns: 0 on success, negative error code on error.
- */
-int xe_gt_init_noalloc(struct xe_gt *gt)
-{
-	int err, err2;
-
-	if (xe_gt_is_media_type(gt))
-		return 0;
-
-	xe_device_mem_access_get(gt_to_xe(gt));
-	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-	if (err)
-		goto err;
-
-	err = gt_ttm_mgr_init(gt);
-	if (err)
-		goto err_force_wake;
-
-err_force_wake:
-	err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-	XE_WARN_ON(err2);
-	xe_device_mem_access_put(gt_to_xe(gt));
-err:
-	return err;
-}
-
 static int gt_fw_domain_init(struct xe_gt *gt)
 {
 	int err, i;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index f4e98f499b360..27e913e9a43af 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -18,7 +18,6 @@
 
 int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt);
 int xe_gt_init_early(struct xe_gt *gt);
-int xe_gt_init_noalloc(struct xe_gt *gt);
 int xe_gt_init(struct xe_gt *gt);
 int xe_gt_record_default_lrcs(struct xe_gt *gt);
 void xe_gt_suspend_prepare(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 1677640e10753..f4f3d95ae6b1e 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -107,6 +107,7 @@ static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_vm *vm;
 	struct xe_vma *vma = NULL;
 	struct xe_bo *bo;
@@ -195,7 +196,7 @@ retry_userptr:
 		}
 
 		/* Migrate to VRAM, move should invalidate the VMA first */
-		ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+		ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id);
 		if (ret)
 			goto unlock_dma_resv;
 	} else if (bo) {
@@ -498,6 +499,7 @@ static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
 static int handle_acc(struct xe_gt *gt, struct acc *acc)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_vm *vm;
 	struct xe_vma *vma;
 	struct xe_bo *bo;
@@ -553,7 +555,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
 		goto unlock_vm;
 
 	/* Migrate to VRAM, move should invalidate the VMA first */
-	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id);
 
 	if (only_needs_bo_lock(bo))
 		xe_bo_unlock(bo, &ww);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index c06a0b27d6fca..a040ec896e702 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -16,8 +16,6 @@
 struct xe_engine_ops;
 struct xe_migrate;
 struct xe_ring_ops;
-struct xe_ttm_gtt_mgr;
-struct xe_ttm_vram_mgr;
 
 enum xe_gt_type {
 	XE_GT_TYPE_UNINITIALIZED,
@@ -108,8 +106,6 @@ struct xe_gt {
 		enum xe_gt_type type;
 		/** @id: id of GT */
 		u8 id;
-		/** @vram: id of the VRAM for this GT */
-		u8 vram_id;
 		/** @clock_freq: clock frequency */
 		u32 clock_freq;
 		/** @engine_mask: mask of engines present on GT */
@@ -144,39 +140,6 @@ struct xe_gt {
 	 */
 	struct xe_reg_sr reg_sr;
 
-	/**
-	 * @mem: memory management info for GT, multiple GTs can point to same
-	 * objects (virtual split)
-	 */
-	struct {
-		/**
-		 * @vram: VRAM info for GT, multiple GTs can point to same info
-		 * (virtual split), can be subset of global device VRAM
-		 */
-		struct {
-			/** @io_start: IO start address of this VRAM instance */
-			resource_size_t io_start;
-			/**
-			 * @io_size: IO size of this VRAM instance
-			 *
-			 * This represents how much of the VRAM the CPU can access
-			 * via the VRAM BAR.
-			 * This can be smaller than the actual @size, in which
-			 * case only part of VRAM is CPU accessible (typically
-			 * the first 256M). This configuration is known as small-bar.
-			 */
-			resource_size_t io_size;
-			/** @base: offset of VRAM starting base */
-			resource_size_t base;
-			/** @size: size of VRAM. */
-			resource_size_t size;
-			/** @mapping: pointer to VRAM mappable space */
-			void *__iomem mapping;
-		} vram;
-		/** @vram_mgr: VRAM TTM manager */
-		struct xe_ttm_vram_mgr *vram_mgr;
-	} mem;
-
 	/** @reset: state for GT resets */
 	struct {
 		/**
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 1c26ec5ab4f0a..e9614e90efaf7 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -364,7 +364,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 	}
 
 	for_each_gt(gt, xe, id) {
-		if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0)
+		if ((master_tile_ctl & DG1_MSTR_TILE(gt_to_tile(gt)->id)) == 0)
 			continue;
 
 		if (!xe_gt_is_media_type(gt))
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index b27103080ca96..86f010ac9ccfb 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -189,7 +189,7 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
 
 /**
  * xe_mmio_tile_vram_size() - Collect vram size and offset information
- * @gt: tile to get info for
+ * @tile: tile to get info for
  * @vram_size: available vram (size - device reserved portions)
  * @tile_size: actual vram size
  * @tile_offset: physical start point in the vram address space
@@ -206,8 +206,10 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
  * NOTE: multi-tile bases will include the tile offset.
  *
  */
-int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_offset)
+int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_offset)
 {
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *gt = &tile->primary_gt;
 	u64 offset;
 	int err;
 	u32 reg;
@@ -217,8 +219,8 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 		return err;
 
 	/* actual size */
-	if (unlikely(gt_to_xe(gt)->info.platform == XE_DG1)) {
-		*tile_size = pci_resource_len(to_pci_dev(gt_to_xe(gt)->drm.dev), GEN12_LMEM_BAR);
+	if (unlikely(xe->info.platform == XE_DG1)) {
+		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), GEN12_LMEM_BAR);
 		*tile_offset = 0;
 	} else {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
@@ -227,7 +229,7 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 	}
 
 	/* minus device usage */
-	if (gt_to_xe(gt)->info.has_flat_ccs) {
+	if (xe->info.has_flat_ccs) {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
 		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
 	} else {
@@ -242,10 +244,10 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 
 int xe_mmio_probe_vram(struct xe_device *xe)
 {
+	struct xe_tile *tile;
 	resource_size_t io_size;
 	u64 available_size = 0;
 	u64 total_size = 0;
-	struct xe_gt *gt;
 	u64 tile_offset;
 	u64 tile_size;
 	u64 vram_size;
@@ -255,9 +257,9 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	if (!IS_DGFX(xe))
 		return 0;
 
-	/* Get the size of the gt0 vram for later accessibility comparison */
-	gt = xe_device_get_gt(xe, 0);
-	err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset);
+	/* Get the size of the root tile's vram for later accessibility comparison */
+	tile = xe_device_get_root_tile(xe);
+	err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
 	if (err)
 		return err;
 
@@ -265,7 +267,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	if (err)
 		return err;
 
-	/* small bar issues will only cover gt0 sizes */
+	/* small bar issues will only cover root tile sizes */
 	if (xe->mem.vram.io_size < vram_size)
 		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
 			 vram_size, (u64)xe->mem.vram.io_size);
@@ -275,35 +277,32 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 	io_size = xe->mem.vram.io_size;
 
-	/* gt specific ranges */
-	for_each_gt(gt, xe, id) {
-		if (xe_gt_is_media_type(gt))
-			continue;
-
-		err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset);
+	/* tile specific ranges */
+	for_each_tile(tile, xe, id) {
+		err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
 		if (err)
 			return err;
 
-		gt->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
-		gt->mem.vram.io_size = min_t(u64, vram_size, io_size);
+		tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
+		tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
 
-		if (!gt->mem.vram.io_size) {
+		if (!tile->mem.vram.io_size) {
 			drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
 			return -ENODEV;
 		}
 
-		gt->mem.vram.base = tile_offset;
+		tile->mem.vram.base = tile_offset;
 
 		/* small bar can limit the visible size.  size accordingly */
-		gt->mem.vram.size = min_t(u64, vram_size, io_size);
-		gt->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
+		tile->mem.vram.size = min_t(u64, vram_size, io_size);
+		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
-		drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, gt->info.vram_id,
-			 &gt->mem.vram.io_start, &gt->mem.vram.size);
+		drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id,
+			 &tile->mem.vram.io_start, &tile->mem.vram.size);
 
-		if (gt->mem.vram.io_size < gt->mem.vram.size)
+		if (tile->mem.vram.io_size < tile->mem.vram.size)
 			drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id,
-				 gt->info.vram_id, &gt->mem.vram.io_size);
+				 tile->id, &tile->mem.vram.io_size);
 
 		/* calculate total size using tile size to get the correct HW sizing */
 		total_size += tile_size;
@@ -329,7 +328,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 static void xe_mmio_probe_tiles(struct xe_device *xe)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt;
 	u32 mtcfg;
 	u8 adj_tile_count;
 	u8 id;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 0ba7aa790f0b3..3c547d78afbaf 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -144,6 +144,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
 }
 
 int xe_mmio_probe_vram(struct xe_device *xe);
-int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_base);
+int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index e8931661c0041..b91d52205feb7 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -547,7 +547,6 @@ static int xe_info_init(struct xe_device *xe,
 
 		if (id == 0) {
 			gt->info.type = XE_GT_TYPE_MAIN;
-			gt->info.vram_id = id;
 
 			gt->info.__engine_mask = graphics_desc->hw_engine_mask;
 			if (MEDIA_VER(xe) < 13 && media_desc)
@@ -557,7 +556,6 @@ static int xe_info_init(struct xe_device *xe,
 			gt->mmio.adj_offset = 0;
 		} else {
 			gt->info.type = desc->extra_gts[id - 1].type;
-			gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
 			gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ?
 				media_desc->hw_engine_mask :
 				graphics_desc->hw_engine_mask;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index b10959fde43bf..799bf68800e7c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -182,7 +182,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	config->num_params = num_params;
 	config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
 		xe->info.devid | (xe->info.revid << 16);
-	if (to_gt(xe)->mem.vram.size)
+	if (xe_device_get_root_tile(xe)->mem.vram.size)
 		config->info[XE_QUERY_CONFIG_FLAGS] =
 			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
 	if (xe->info.enable_guc)
@@ -242,7 +242,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 			gts->gts[id].native_mem_regions = 0x1;
 		else
 			gts->gts[id].native_mem_regions =
-				BIT(gt->info.vram_id) << 1;
+				BIT(gt_to_tile(gt)->id) << 1;
 		gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^
 			gts->gts[id].native_mem_regions;
 	}
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 4e99fae26b4c0..f2ba609712d36 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -53,7 +53,7 @@ static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 
 	if (res->mem_type != XE_PL_STOLEN) {
-		return &xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0)->mem.vram_mgr->mm;
+		return &xe->tiles[res->mem_type - XE_PL_VRAM0].mem.vram_mgr->mm;
 	} else {
 		struct ttm_resource_manager *mgr =
 			ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 7ef594f301ca7..5530a6b6ef314 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -29,6 +29,25 @@ int xe_tile_alloc(struct xe_tile *tile)
 		return -ENOMEM;
 	tile->mem.ggtt->tile = tile;
 
+	tile->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*tile->mem.vram_mgr), GFP_KERNEL);
+	if (!tile->mem.vram_mgr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int tile_ttm_mgr_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	int err;
+
+	if (tile->mem.vram.size) {
+		err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr);
+		if (err)
+			return err;
+		xe->info.mem_region_mask |= BIT(tile->id) << 1;
+	}
+
 	return 0;
 }
 
@@ -48,5 +67,17 @@ int xe_tile_alloc(struct xe_tile *tile)
  */
 int xe_tile_init_noalloc(struct xe_tile *tile)
 {
-	return xe_ggtt_init_noalloc(tile->mem.ggtt);
+	int err;
+
+	xe_device_mem_access_get(tile_to_xe(tile));
+
+	err = tile_ttm_mgr_init(tile);
+	if (err)
+		goto err_mem_access;
+
+	err = xe_ggtt_init_noalloc(tile->mem.ggtt);
+
+err_mem_access:
+	xe_device_mem_access_put(tile_to_xe(tile));
+	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 49470f0722bd4..c68325161c199 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -51,29 +51,31 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
 	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
 }
 
-static s64 detect_bar2_dgfx(struct xe_gt *gt, struct xe_ttm_stolen_mgr *mgr)
+static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 {
-	struct pci_dev *pdev = to_pci_dev(gt_to_xe(gt)->drm.dev);
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	struct xe_gt *mmio = &tile->primary_gt;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	u64 stolen_size;
 	u64 tile_offset;
 	u64 tile_size;
 	u64 vram_size;
 
-	if (xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset)) {
-		drm_err(&gt_to_xe(gt)->drm, "Querying total vram size failed\n");
+	if (xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset)) {
+		drm_err(&xe->drm, "Querying total vram size failed\n");
 		return 0;
 	}
 
 	/* Use DSM base address instead for stolen memory */
-	mgr->stolen_base = (xe_mmio_read64(gt, DSMBASE) & BDSM_MASK) - tile_offset;
-	if (drm_WARN_ON(&gt_to_xe(gt)->drm, tile_size < mgr->stolen_base))
+	mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
+	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
 		return 0;
 
 	stolen_size = tile_size - mgr->stolen_base;
 
 	/* Verify usage fits in the actual resource available */
 	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, GEN12_LMEM_BAR))
-		mgr->io_base = gt->mem.vram.io_start + mgr->stolen_base;
+		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
 
 	/*
 	 * There may be few KB of platform dependent reserved memory at the end
@@ -141,7 +143,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 	int err;
 
 	if (IS_DGFX(xe))
-		stolen_size = detect_bar2_dgfx(to_gt(xe), mgr);
+		stolen_size = detect_bar2_dgfx(xe, mgr);
 	else if (GRAPHICS_VERx100(xe) >= 1270)
 		stolen_size = detect_bar2_integrated(xe, mgr);
 	else
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 73836b9b7fed2..1a84abd35fcff 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -353,16 +353,14 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
 	return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
 }
 
-int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
+int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr)
 {
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_device *xe = tile_to_xe(tile);
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
+	mgr->tile = tile;
 
-	mgr->gt = gt;
-
-	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + gt->info.vram_id,
-				      gt->mem.vram.size, gt->mem.vram.io_size,
+	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id,
+				      tile->mem.vram.size, tile->mem.vram.io_size,
 				      PAGE_SIZE);
 }
 
@@ -373,7 +371,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 			      enum dma_data_direction dir,
 			      struct sg_table **sgt)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
+	struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0];
 	struct xe_res_cursor cursor;
 	struct scatterlist *sg;
 	int num_entries = 0;
@@ -406,7 +404,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 	 */
 	xe_res_first(res, offset, length, &cursor);
 	for_each_sgtable_sg((*sgt), sg, i) {
-		phys_addr_t phys = cursor.start + gt->mem.vram.io_start;
+		phys_addr_t phys = cursor.start + tile->mem.vram.io_start;
 		size_t size = cursor.size;
 		dma_addr_t addr;
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index 35e5367a79fbd..6e1d6033d7391 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -10,12 +10,12 @@
 
 enum dma_data_direction;
 struct xe_device;
-struct xe_gt;
+struct xe_tile;
 
 int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
 			   u32 mem_type, u64 size, u64 io_size,
 			   u64 default_page_size);
-int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr);
 int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 			      struct ttm_resource *res,
 			      u64 offset, u64 length,
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 3d9417ff7434a..48bb991c14a56 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -9,7 +9,7 @@
 #include <drm/drm_buddy.h>
 #include <drm/ttm/ttm_device.h>
 
-struct xe_gt;
+struct xe_tile;
 
 /**
  * struct xe_ttm_vram_mgr - XE TTM VRAM manager
@@ -17,8 +17,8 @@ struct xe_gt;
  * Manages placement of TTM resource in VRAM.
  */
 struct xe_ttm_vram_mgr {
-	/** @gt: Graphics tile which the VRAM belongs to */
-	struct xe_gt *gt;
+	/** @tile: Tile which the VRAM belongs to */
+	struct xe_tile *tile;
 	/** @manager: Base TTM resource manager */
 	struct ttm_resource_manager manager;
 	/** @mm: DRM buddy allocator which manages the VRAM */
-- 
GitLab


From 876611c2b75689c6bea43bdbbbef9b358f71526a Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:25 -0700
Subject: [PATCH 1608/2340] drm/xe: Memory allocations are tile-based, not
 GT-based

Since memory and address spaces are a tile concept rather than a GT
concept, we need to plumb tile-based handling through lots of
memory-related code.

Note that one remaining shortcoming here that will need to be addressed
before media GT support can be re-enabled is that although the address
space is shared between a tile's GTs, each GT caches the PTEs
independently in their own TLB and thus TLB invalidation should be
handled at the GT level.

v2:
 - Fix kunit test build.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-13-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c      |   2 +-
 drivers/gpu/drm/xe/tests/xe_migrate.c |  21 ++--
 drivers/gpu/drm/xe/xe_bb.c            |   3 +-
 drivers/gpu/drm/xe/xe_bo.c            |  67 ++++++------
 drivers/gpu/drm/xe/xe_bo.h            |  18 +--
 drivers/gpu/drm/xe/xe_bo_evict.c      |   2 +-
 drivers/gpu/drm/xe/xe_bo_types.h      |   4 +-
 drivers/gpu/drm/xe/xe_device_types.h  |   7 ++
 drivers/gpu/drm/xe/xe_ggtt.c          |   5 +-
 drivers/gpu/drm/xe/xe_gt.c            |  21 +---
 drivers/gpu/drm/xe/xe_gt_debugfs.c    |   6 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c  |  14 +--
 drivers/gpu/drm/xe/xe_gt_types.h      |   7 --
 drivers/gpu/drm/xe/xe_guc_ads.c       |   5 +-
 drivers/gpu/drm/xe/xe_guc_ct.c        |   5 +-
 drivers/gpu/drm/xe/xe_guc_hwconfig.c  |   5 +-
 drivers/gpu/drm/xe/xe_guc_log.c       |   6 +-
 drivers/gpu/drm/xe/xe_guc_pc.c        |   5 +-
 drivers/gpu/drm/xe/xe_hw_engine.c     |   5 +-
 drivers/gpu/drm/xe/xe_lrc.c           |  13 +--
 drivers/gpu/drm/xe/xe_lrc_types.h     |   4 +-
 drivers/gpu/drm/xe/xe_migrate.c       |  23 ++--
 drivers/gpu/drm/xe/xe_migrate.h       |   5 +-
 drivers/gpu/drm/xe/xe_pt.c            | 144 ++++++++++++------------
 drivers/gpu/drm/xe/xe_pt.h            |  14 +--
 drivers/gpu/drm/xe/xe_sa.c            |  13 +--
 drivers/gpu/drm/xe/xe_sa.h            |   4 +-
 drivers/gpu/drm/xe/xe_tile.c          |   7 ++
 drivers/gpu/drm/xe/xe_uc_fw.c         |   5 +-
 drivers/gpu/drm/xe/xe_vm.c            | 152 +++++++++++++-------------
 drivers/gpu/drm/xe/xe_vm.h            |   2 +-
 drivers/gpu/drm/xe/xe_vm_types.h      |  12 +-
 include/uapi/drm/xe_drm.h             |   4 +-
 33 files changed, 298 insertions(+), 312 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 6235a6c73a06d..f933e5df6c121 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -173,7 +173,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 {
 	struct xe_bo *bo, *external;
 	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
-		XE_BO_CREATE_VRAM_IF_DGFX(gt);
+		XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt));
 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate);
 	struct ww_acquire_ctx ww;
 	int err, i;
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 4a3ca2960fd54..85ef9bacfe52b 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -63,7 +63,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
 
 static void
 sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
-		   struct xe_gt *gt, struct iosys_map *map, void *dst,
+		   struct xe_tile *tile, struct iosys_map *map, void *dst,
 		   u32 qword_ofs, u32 num_qwords,
 		   const struct xe_vm_pgtable_update *update)
 {
@@ -76,7 +76,7 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
 	for (i = 0; i < num_qwords; i++) {
 		value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
 		if (map)
-			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
 				  sizeof(u64), u64, value);
 		else
 			ptr[i] = value;
@@ -108,7 +108,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	const char *str = big ? "Copying big bo" : "Copying small bo";
 	int err;
 
-	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
+	struct xe_bo *sysmem = xe_bo_create_locked(xe, gt_to_tile(m->gt), NULL,
 						   bo->size,
 						   ttm_bo_type_kernel,
 						   XE_BO_CREATE_SYSTEM_BIT);
@@ -240,6 +240,7 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
 static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 {
 	struct xe_gt *gt = m->gt;
+	struct xe_tile *tile = gt_to_tile(m->gt);
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
 	struct xe_res_cursor src_it;
@@ -256,18 +257,18 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		return;
 	}
 
-	big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M,
+	big = xe_bo_create_pin_map(xe, tile, m->eng->vm, SZ_4M,
 				   ttm_bo_type_kernel,
-				   XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				   XE_BO_CREATE_PINNED_BIT);
 	if (IS_ERR(big)) {
 		KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
 		goto vunmap;
 	}
 
-	pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, XE_PAGE_SIZE,
+	pt = xe_bo_create_pin_map(xe, tile, m->eng->vm, XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_PINNED_BIT);
 	if (IS_ERR(pt)) {
 		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
@@ -275,10 +276,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_big;
 	}
 
-	tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm,
+	tiny = xe_bo_create_pin_map(xe, tile, m->eng->vm,
 				    2 * SZ_4K,
 				    ttm_bo_type_kernel,
-				    XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				    XE_BO_CREATE_PINNED_BIT);
 	if (IS_ERR(tiny)) {
 		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
@@ -286,7 +287,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_pt;
 	}
 
-	bb = xe_bb_new(m->gt, 32, xe->info.supports_usm);
+	bb = xe_bb_new(gt, 32, xe->info.supports_usm);
 	if (IS_ERR(bb)) {
 		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
 			   PTR_ERR(bb));
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index bf7c94b769d7f..f9b6b7adf99ff 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -30,6 +30,7 @@ static int bb_prefetch(struct xe_gt *gt)
 
 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 {
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
 	int err;
 
@@ -42,7 +43,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
 	 * space to accomodate the platform-specific hardware prefetch
 	 * requirements.
 	 */
-	bb->bo = xe_sa_bo_new(!usm ? gt->kernel_bb_pool : gt->usm.bb_pool,
+	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
 			      4 * (dwords + 1) + bb_prefetch(gt));
 	if (IS_ERR(bb->bo)) {
 		err = PTR_ERR(bb->bo);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8ee6bad59a75d..7c59487af86ad 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -458,7 +458,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 			}
 
 			xe_vm_assert_held(vm);
-			if (list_empty(&vma->rebind_link) && vma->gt_present)
+			if (list_empty(&vma->rebind_link) && vma->tile_present)
 				list_add_tail(&vma->rebind_link, &vm->rebind_list);
 
 			if (vm_resv_locked)
@@ -565,7 +565,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 	struct ttm_resource *old_mem = ttm_bo->resource;
 	struct ttm_tt *ttm = ttm_bo->ttm;
-	struct xe_gt *gt = NULL;
+	struct xe_tile *tile = NULL;
 	struct dma_fence *fence;
 	bool move_lacks_source;
 	bool needs_clear;
@@ -635,15 +635,15 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		goto out;
 	}
 
-	if (bo->gt)
-		gt = bo->gt;
+	if (bo->tile)
+		tile = bo->tile;
 	else if (resource_is_vram(new_mem))
-		gt = &mem_type_to_tile(xe, new_mem->mem_type)->primary_gt;
+		tile = mem_type_to_tile(xe, new_mem->mem_type);
 	else if (resource_is_vram(old_mem))
-		gt = &mem_type_to_tile(xe, old_mem->mem_type)->primary_gt;
+		tile = mem_type_to_tile(xe, old_mem->mem_type);
 
-	XE_BUG_ON(!gt);
-	XE_BUG_ON(!gt->migrate);
+	XE_BUG_ON(!tile);
+	XE_BUG_ON(!tile->primary_gt.migrate);
 
 	trace_xe_bo_move(bo);
 	xe_device_mem_access_get(xe);
@@ -664,7 +664,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 			/* Create a new VMAP once kernel BO back in VRAM */
 			if (!ret && resource_is_vram(new_mem)) {
-				void *new_addr = gt_to_tile(gt)->mem.vram.mapping +
+				void *new_addr = tile->mem.vram.mapping +
 					(new_mem->start << PAGE_SHIFT);
 
 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
@@ -681,9 +681,10 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		}
 	} else {
 		if (move_lacks_source)
-			fence = xe_migrate_clear(gt->migrate, bo, new_mem);
+			fence = xe_migrate_clear(tile->primary_gt.migrate, bo, new_mem);
 		else
-			fence = xe_migrate_copy(gt->migrate, bo, bo, old_mem, new_mem);
+			fence = xe_migrate_copy(tile->primary_gt.migrate,
+						bo, bo, old_mem, new_mem);
 		if (IS_ERR(fence)) {
 			ret = PTR_ERR(fence);
 			xe_device_mem_access_put(xe);
@@ -964,7 +965,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 	WARN_ON(!list_empty(&bo->vmas));
 
 	if (bo->ggtt_node.size)
-		xe_ggtt_remove_bo(gt_to_tile(bo->gt)->mem.ggtt, bo);
+		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
 
 	if (bo->vm && xe_bo_is_user(bo))
 		xe_vm_put(bo->vm);
@@ -1086,7 +1087,7 @@ void xe_bo_free(struct xe_bo *bo)
 }
 
 struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
-				    struct xe_gt *gt, struct dma_resv *resv,
+				    struct xe_tile *tile, struct dma_resv *resv,
 				    size_t size, enum ttm_bo_type type,
 				    u32 flags)
 {
@@ -1099,7 +1100,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	int err;
 
 	/* Only kernel objects should set GT */
-	XE_BUG_ON(gt && type != ttm_bo_type_kernel);
+	XE_BUG_ON(tile && type != ttm_bo_type_kernel);
 
 	if (XE_WARN_ON(!size))
 		return ERR_PTR(-EINVAL);
@@ -1120,7 +1121,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		alignment = SZ_4K >> PAGE_SHIFT;
 	}
 
-	bo->gt = gt;
+	bo->tile = tile;
 	bo->size = size;
 	bo->flags = flags;
 	bo->ttm.base.funcs = &xe_gem_object_funcs;
@@ -1202,7 +1203,7 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 
 struct xe_bo *
 xe_bo_create_locked_range(struct xe_device *xe,
-			  struct xe_gt *gt, struct xe_vm *vm,
+			  struct xe_tile *tile, struct xe_vm *vm,
 			  size_t size, u64 start, u64 end,
 			  enum ttm_bo_type type, u32 flags)
 {
@@ -1225,7 +1226,7 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		}
 	}
 
-	bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size,
+	bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL, size,
 				   type, flags);
 	if (IS_ERR(bo))
 		return bo;
@@ -1235,16 +1236,16 @@ xe_bo_create_locked_range(struct xe_device *xe,
 	bo->vm = vm;
 
 	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
-		if (!gt && flags & XE_BO_CREATE_STOLEN_BIT)
-			gt = xe_device_get_gt(xe, 0);
+		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
+			tile = xe_device_get_root_tile(xe);
 
-		XE_BUG_ON(!gt);
+		XE_BUG_ON(!tile);
 
 		if (flags & XE_BO_CREATE_STOLEN_BIT &&
 		    flags & XE_BO_FIXED_PLACEMENT_BIT) {
-			err = xe_ggtt_insert_bo_at(gt_to_tile(gt)->mem.ggtt, bo, start);
+			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, start);
 		} else {
-			err = xe_ggtt_insert_bo(gt_to_tile(gt)->mem.ggtt, bo);
+			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
 		}
 		if (err)
 			goto err_unlock_put_bo;
@@ -1258,18 +1259,18 @@ err_unlock_put_bo:
 	return ERR_PTR(err);
 }
 
-struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
 				  struct xe_vm *vm, size_t size,
 				  enum ttm_bo_type type, u32 flags)
 {
-	return xe_bo_create_locked_range(xe, gt, vm, size, 0, ~0ULL, type, flags);
+	return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags);
 }
 
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
 			   struct xe_vm *vm, size_t size,
 			   enum ttm_bo_type type, u32 flags)
 {
-	struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
 
 	if (!IS_ERR(bo))
 		xe_bo_unlock_vm_held(bo);
@@ -1277,7 +1278,7 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
 	return bo;
 }
 
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
 				      struct xe_vm *vm,
 				      size_t size, u64 offset,
 				      enum ttm_bo_type type, u32 flags)
@@ -1291,7 +1292,7 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		flags |= XE_BO_CREATE_GGTT_BIT;
 
-	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
+	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, flags);
 	if (IS_ERR(bo))
 		return bo;
 
@@ -1315,18 +1316,18 @@ err_put:
 	return ERR_PTR(err);
 }
 
-struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
 				   struct xe_vm *vm, size_t size,
 				   enum ttm_bo_type type, u32 flags)
 {
-	return xe_bo_create_pin_map_at(xe, gt, vm, size, ~0ull, type, flags);
+	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
 }
 
-struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 				     const void *data, size_t size,
 				     enum ttm_bo_type type, u32 flags)
 {
-	struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL,
+	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
 						ALIGN(size, PAGE_SIZE),
 						type, flags);
 	if (IS_ERR(bo))
@@ -1957,7 +1958,7 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 			   page_size);
 
 	bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
-			  XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) |
+			  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
 			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6e29e45a90f24..29eb7474f0185 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -21,8 +21,8 @@
 					 XE_BO_CREATE_VRAM1_BIT)
 /* -- */
 #define XE_BO_CREATE_STOLEN_BIT		BIT(4)
-#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
-	(IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id : \
+#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \
+	(IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \
 	 XE_BO_CREATE_SYSTEM_BIT)
 #define XE_BO_CREATE_GGTT_BIT		BIT(5)
 #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
@@ -81,27 +81,27 @@ struct xe_bo *xe_bo_alloc(void);
 void xe_bo_free(struct xe_bo *bo);
 
 struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
-				    struct xe_gt *gt, struct dma_resv *resv,
+				    struct xe_tile *tile, struct dma_resv *resv,
 				    size_t size, enum ttm_bo_type type,
 				    u32 flags);
 struct xe_bo *
 xe_bo_create_locked_range(struct xe_device *xe,
-			  struct xe_gt *gt, struct xe_vm *vm,
+			  struct xe_tile *tile, struct xe_vm *vm,
 			  size_t size, u64 start, u64 end,
 			  enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
 				  struct xe_vm *vm, size_t size,
 				  enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
 			   struct xe_vm *vm, size_t size,
 			   enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
 				   struct xe_vm *vm, size_t size,
 				   enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
 				      struct xe_vm *vm, size_t size, u64 offset,
 				      enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 				     const void *data, size_t size,
 				     enum ttm_bo_type type, u32 flags);
 
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index a72963c54bf32..9226195bd5608 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -149,7 +149,7 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		}
 
 		if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
-			struct xe_tile *tile = gt_to_tile(bo->gt);
+			struct xe_tile *tile = bo->tile;
 
 			mutex_lock(&tile->mem.ggtt->lock);
 			xe_ggtt_map_bo(tile->mem.ggtt, bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 06de3330211d2..f6ee920303af9 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -29,8 +29,8 @@ struct xe_bo {
 	u32 flags;
 	/** @vm: VM this BO is attached to, for extobj this will be NULL */
 	struct xe_vm *vm;
-	/** @gt: GT this BO is attached to (kernel BO only) */
-	struct xe_gt *gt;
+	/** @tile: Tile this BO is attached to (kernel BO only) */
+	struct xe_tile *tile;
 	/** @vmas: List of VMAs for this BO */
 	struct list_head vmas;
 	/** @placements: valid placements for this BO */
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9382d7f62f039..ee050b4b4d779 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -128,6 +128,13 @@ struct xe_tile {
 
 		/** @ggtt: Global graphics translation table */
 		struct xe_ggtt *ggtt;
+
+		/**
+		 * @kernel_bb_pool: Pool from which batchbuffers are allocated.
+		 *
+		 * Media GT shares a pool with its primary GT.
+		 */
+		struct xe_sa_manager *kernel_bb_pool;
 	} mem;
 };
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ff70a01f15912..d395d6fc1af66 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -151,7 +151,6 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
 int xe_ggtt_init(struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
-	struct xe_gt *gt = &ggtt->tile->primary_gt;
 	unsigned int flags;
 	int err;
 
@@ -164,9 +163,9 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
 	if (ggtt->flags & XE_GGTT_FLAGS_64K)
 		flags |= XE_BO_CREATE_SYSTEM_BIT;
 	else
-		flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt);
+		flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
 
-	ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, XE_PAGE_SIZE,
+	ggtt->scratch = xe_bo_create_pin_map(xe, ggtt->tile, NULL, XE_PAGE_SIZE,
 					     ttm_bo_type_kernel,
 					     flags);
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 419fc471053c0..74023a5dc8b2f 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -95,7 +95,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
-	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo);
+	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
@@ -144,7 +144,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 		}
 	}
 
-	batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo);
+	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
 	job = xe_bb_create_wa_job(e, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
@@ -370,31 +370,16 @@ static int all_fw_domain_init(struct xe_gt *gt)
 		goto err_force_wake;
 
 	if (!xe_gt_is_media_type(gt)) {
-		gt->kernel_bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16);
-		if (IS_ERR(gt->kernel_bb_pool)) {
-			err = PTR_ERR(gt->kernel_bb_pool);
-			goto err_force_wake;
-		}
-
 		/*
 		 * USM has its only SA pool to non-block behind user operations
 		 */
 		if (gt_to_xe(gt)->info.supports_usm) {
-			gt->usm.bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16);
+			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16);
 			if (IS_ERR(gt->usm.bb_pool)) {
 				err = PTR_ERR(gt->usm.bb_pool);
 				goto err_force_wake;
 			}
 		}
-	} else {
-		struct xe_gt *full_gt = xe_find_full_gt(gt);
-
-		/*
-		 * Media GT's kernel_bb_pool is only used while recording the
-		 * default context during GT init.  The USM pool should never
-		 * be needed on the media GT.
-		 */
-		gt->kernel_bb_pool = full_gt->kernel_bb_pool;
 	}
 
 	if (!xe_gt_is_media_type(gt)) {
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 1114254bc5193..b5a5538ae6306 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -64,11 +64,11 @@ static int force_reset(struct seq_file *m, void *data)
 
 static int sa_info(struct seq_file *m, void *data)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
+	struct xe_tile *tile = gt_to_tile(node_to_gt(m->private));
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	drm_suballoc_dump_debug_info(&gt->kernel_bb_pool->base, &p,
-				     gt->kernel_bb_pool->gpu_addr);
+	drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p,
+				     tile->mem.kernel_bb_pool->gpu_addr);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index f4f3d95ae6b1e..1ec140aaf2a7c 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -69,10 +69,10 @@ static bool access_is_atomic(enum access_type access_type)
 	return access_type == ACCESS_TYPE_ATOMIC;
 }
 
-static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma)
+static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
 {
-	return BIT(gt->info.id) & vma->gt_present &&
-		!(BIT(gt->info.id) & vma->usm.gt_invalidated);
+	return BIT(tile->id) & vma->tile_present &&
+		!(BIT(tile->id) & vma->usm.tile_invalidated);
 }
 
 static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
@@ -152,7 +152,7 @@ retry_userptr:
 	atomic = access_is_atomic(pf->access_type);
 
 	/* Check if VMA is valid */
-	if (vma_is_valid(gt, vma) && !atomic)
+	if (vma_is_valid(tile, vma) && !atomic)
 		goto unlock_vm;
 
 	/* TODO: Validate fault */
@@ -208,8 +208,8 @@ retry_userptr:
 
 	/* Bind VMA only to the GT that has faulted */
 	trace_xe_vma_pf_bind(vma);
-	fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0,
-				 vma->gt_present & BIT(gt->info.id));
+	fence = __xe_pt_bind_vma(tile, vma, xe_gt_migrate_engine(gt), NULL, 0,
+				 vma->tile_present & BIT(tile->id));
 	if (IS_ERR(fence)) {
 		ret = PTR_ERR(fence);
 		goto unlock_dma_resv;
@@ -225,7 +225,7 @@ retry_userptr:
 
 	if (xe_vma_is_userptr(vma))
 		ret = xe_vma_userptr_check_repin(vma);
-	vma->usm.gt_invalidated &= ~BIT(gt->info.id);
+	vma->usm.tile_invalidated &= ~BIT(tile->id);
 
 unlock_dma_resv:
 	if (only_needs_bo_lock(bo))
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a040ec896e702..c44560b6dc71f 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -278,13 +278,6 @@ struct xe_gt {
 	/** @hw_engines: hardware engines on the GT */
 	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
 
-	/**
-	 * @kernel_bb_pool: Pool from which batchbuffers are allocated.
-	 *
-	 * Media GT shares a pool with its primary GT.
-	 */
-	struct xe_sa_manager *kernel_bb_pool;
-
 	/** @migrate: Migration helper for vram blits and clearing */
 	struct xe_migrate *migrate;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 6d550d746909c..dd69d097b920d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -273,16 +273,17 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
 {
 	struct xe_device *xe = ads_to_xe(ads);
 	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bo *bo;
 	int err;
 
 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
 	ads->regset_size = calculate_regset_size(gt);
 
-	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) +
+	bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ads_size(ads) +
 				  MAX_GOLDEN_LRC_SIZE,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 9dc906f2651a5..137c184df4879 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -130,6 +130,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 {
 	struct xe_device *xe = ct_to_xe(ct);
 	struct xe_gt *gt = ct_to_gt(ct);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bo *bo;
 	int err;
 
@@ -145,9 +146,9 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 
 	primelockdep(ct);
 
-	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(),
+	bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ct_size(),
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index a6982f323ed1f..c8f875e970ab8 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -70,6 +70,7 @@ int xe_guc_hwconfig_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bo *bo;
 	u32 size;
 	int err;
@@ -94,9 +95,9 @@ int xe_guc_hwconfig_init(struct xe_guc *guc)
 	if (!size)
 		return -EINVAL;
 
-	bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size),
+	bo = xe_bo_create_pin_map(xe, tile, NULL, PAGE_ALIGN(size),
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 9a7b5d5906c1c..403aaafcaba6b 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -87,13 +87,13 @@ static void guc_log_fini(struct drm_device *drm, void *arg)
 int xe_guc_log_init(struct xe_guc_log *log)
 {
 	struct xe_device *xe = log_to_xe(log);
-	struct xe_gt *gt = log_to_gt(log);
+	struct xe_tile *tile = gt_to_tile(log_to_gt(log));
 	struct xe_bo *bo;
 	int err;
 
-	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(),
+	bo = xe_bo_create_pin_map(xe, tile, NULL, guc_log_size(),
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index e799faa1c6b8c..67faa9ee00063 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -888,6 +888,7 @@ static void pc_fini(struct drm_device *drm, void *arg)
 int xe_guc_pc_init(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_bo *bo;
 	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
@@ -895,9 +896,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 
 	mutex_init(&pc->freq_lock);
 
-	bo = xe_bo_create_pin_map(xe, gt, NULL, size,
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_GGTT_BIT);
 
 	if (IS_ERR(bo))
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 7e4b0b465244d..b12f65a2bab3e 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -373,6 +373,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 			  enum xe_hw_engine_id id)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
 	int err;
 
 	XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
@@ -381,8 +382,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 	xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt);
 
-	hwe->hwsp = xe_bo_create_pin_map(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel,
-					 XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+	hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
+					 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 					 XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(hwe->hwsp)) {
 		err = PTR_ERR(hwe->hwsp);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index ae605e7805de4..8f25a38f36a57 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -592,7 +592,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
 
 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
 {
-	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt);
+	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
 
 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
@@ -607,6 +607,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		struct xe_engine *e, struct xe_vm *vm, u32 ring_size)
 {
 	struct xe_gt *gt = hwe->gt;
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_device *xe = gt_to_xe(gt);
 	struct iosys_map map;
 	void *init_data = NULL;
@@ -619,19 +620,15 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
 	 * via VM bind calls.
 	 */
-	lrc->bo = xe_bo_create_pin_map(xe, hwe->gt, vm,
+	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
 				      ring_size + xe_lrc_size(xe, hwe->class),
 				      ttm_bo_type_kernel,
-				      XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) |
+				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				      XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(lrc->bo))
 		return PTR_ERR(lrc->bo);
 
-	if (xe_gt_is_media_type(hwe->gt))
-		lrc->full_gt = xe_find_full_gt(hwe->gt);
-	else
-		lrc->full_gt = hwe->gt;
-
+	lrc->tile = gt_to_tile(hwe->gt);
 	lrc->ring.size = ring_size;
 	lrc->ring.tail = 0;
 
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 8fe08535873d6..78220336062cf 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -20,8 +20,8 @@ struct xe_lrc {
 	 */
 	struct xe_bo *bo;
 
-	/** @full_gt: full GT which this LRC belongs to */
-	struct xe_gt *full_gt;
+	/** @tile: tile which this LRC belongs to */
+	struct xe_tile *tile;
 
 	/** @flags: LRC flags */
 	u32 flags;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7a2188f02a86d..3031a45db4900 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -129,6 +129,7 @@ static u64 xe_migrate_vram_ofs(u64 addr)
 static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 {
 	struct xe_gt *gt = m->gt;
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_device *xe = vm->xe;
 	size_t cleared_size;
 	u64 vram_addr;
@@ -139,9 +140,9 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 
 	cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER);
 	cleared_size = PAGE_ALIGN(cleared_size);
-	m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size,
+	m->cleared_bo = xe_bo_create_pin_map(xe, tile, vm, cleared_size,
 					     ttm_bo_type_kernel,
-					     XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+					     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 					     XE_BO_CREATE_PINNED_BIT);
 	if (IS_ERR(m->cleared_bo))
 		return PTR_ERR(m->cleared_bo);
@@ -161,7 +162,8 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
 	u32 map_ofs, level, i;
 	struct xe_device *xe = gt_to_xe(m->gt);
-	struct xe_bo *bo, *batch = gt->kernel_bb_pool->bo;
+	struct xe_tile *tile = gt_to_tile(m->gt);
+	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
 	u64 entry;
 	int ret;
 
@@ -175,10 +177,10 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 	/* Need to be sure everything fits in the first PT, or create more */
 	XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M);
 
-	bo = xe_bo_create_pin_map(vm->xe, m->gt, vm,
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
 				  num_entries * XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_PINNED_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
@@ -984,7 +986,7 @@ err_sync:
 	return fence;
 }
 
-static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
+static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 			  const struct xe_vm_pgtable_update *update,
 			  struct xe_migrate_pt_update *pt_update)
 {
@@ -1023,7 +1025,7 @@ static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
 			(chunk * 2 + 1);
 		bb->cs[bb->len++] = lower_32_bits(addr);
 		bb->cs[bb->len++] = upper_32_bits(addr);
-		ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk,
+		ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk,
 			      update);
 
 		bb->len += chunk * 2;
@@ -1081,7 +1083,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 	for (i = 0; i < num_updates; i++) {
 		const struct xe_vm_pgtable_update *update = &updates[i];
 
-		ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL,
+		ops->populate(pt_update, gt_to_tile(m->gt), &update->pt_bo->vmap, NULL,
 			      update->ofs, update->qwords, update);
 	}
 
@@ -1149,6 +1151,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 {
 	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
 	struct xe_gt *gt = m->gt;
+	struct xe_tile *tile = gt_to_tile(m->gt);
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_sched_job *job;
 	struct dma_fence *fence;
@@ -1243,7 +1246,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
 			(page_ofs / sizeof(u64)) * XE_PAGE_SIZE;
 		for (i = 0; i < num_updates; i++)
-			write_pgtable(m->gt, bb, addr + i * XE_PAGE_SIZE,
+			write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE,
 				      &updates[i], pt_update);
 	} else {
 		/* phys pages, no preamble required */
@@ -1253,7 +1256,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		/* Preemption is enabled again by the ring ops. */
 		emit_arb_clear(bb);
 		for (i = 0; i < num_updates; i++)
-			write_pgtable(m->gt, bb, 0, &updates[i], pt_update);
+			write_pgtable(tile, bb, 0, &updates[i], pt_update);
 	}
 
 	if (!eng)
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index c283b626c21c0..e627f4023d5a8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -19,6 +19,7 @@ struct xe_migrate;
 struct xe_migrate_pt_update;
 struct xe_sync_entry;
 struct xe_pt;
+struct xe_tile;
 struct xe_vm;
 struct xe_vm_pgtable_update;
 struct xe_vma;
@@ -31,7 +32,7 @@ struct xe_migrate_pt_update_ops {
 	/**
 	 * @populate: Populate a command buffer or page-table with ptes.
 	 * @pt_update: Embeddable callback argument.
-	 * @gt: The gt for the current operation.
+	 * @tile: The tile for the current operation.
 	 * @map: struct iosys_map into the memory to be populated.
 	 * @pos: If @map is NULL, map into the memory to be populated.
 	 * @ofs: qword offset into @map, unused if @map is NULL.
@@ -43,7 +44,7 @@ struct xe_migrate_pt_update_ops {
 	 * page-tables with PTEs.
 	 */
 	void (*populate)(struct xe_migrate_pt_update *pt_update,
-			 struct xe_gt *gt, struct iosys_map *map,
+			 struct xe_tile *tile, struct iosys_map *map,
 			 void *pos, u32 ofs, u32 num_qwords,
 			 const struct xe_vm_pgtable_update *update);
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index e2cd1946af5a4..094058cb5f93a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -165,12 +165,10 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 	return __gen8_pte_encode(pte, cache, flags, pt_level);
 }
 
-static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
+static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 			     unsigned int level)
 {
-	u8 id = gt->info.id;
-
-	XE_BUG_ON(xe_gt_is_media_type(gt));
+	u8 id = tile->id;
 
 	if (!vm->scratch_bo[id])
 		return 0;
@@ -189,7 +187,7 @@ static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
 /**
  * xe_pt_create() - Create a page-table.
  * @vm: The vm to create for.
- * @gt: The gt to create for.
+ * @tile: The tile to create for.
  * @level: The page-table level.
  *
  * Allocate and initialize a single struct xe_pt metadata structure. Also
@@ -201,7 +199,7 @@ static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
  * Return: A valid struct xe_pt pointer on success, Pointer error code on
  * error.
  */
-struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 			   unsigned int level)
 {
 	struct xe_pt *pt;
@@ -215,9 +213,9 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
 	if (!pt)
 		return ERR_PTR(-ENOMEM);
 
-	bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K,
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
 				  XE_BO_CREATE_PINNED_BIT |
 				  XE_BO_CREATE_NO_RESV_EVICT);
@@ -241,30 +239,28 @@ err_kfree:
 /**
  * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
  * entries.
- * @gt: The gt the scratch pagetable of which to use.
+ * @tile: The tile the scratch pagetable of which to use.
  * @vm: The vm we populate for.
  * @pt: The pagetable the bo of which to initialize.
  *
- * Populate the page-table bo of @pt with entries pointing into the gt's
+ * Populate the page-table bo of @pt with entries pointing into the tile's
  * scratch page-table tree if any. Otherwise populate with zeros.
  */
-void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
 			  struct xe_pt *pt)
 {
 	struct iosys_map *map = &pt->bo->vmap;
 	u64 empty;
 	int i;
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
-
-	if (!vm->scratch_bo[gt->info.id]) {
+	if (!vm->scratch_bo[tile->id]) {
 		/*
 		 * FIXME: Some memory is allocated already allocated to zero?
 		 * Find out which memory that is and avoid this memset...
 		 */
 		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
 	} else {
-		empty = __xe_pt_empty_pte(gt, vm, pt->level);
+		empty = __xe_pt_empty_pte(tile, vm, pt->level);
 		for (i = 0; i < XE_PDES; i++)
 			xe_pt_write(vm->xe, map, i, empty);
 	}
@@ -318,9 +314,9 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
 
 /**
  * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the
- * given gt and vm.
+ * given tile and vm.
  * @xe: xe device.
- * @gt: gt to set up for.
+ * @tile: tile to set up for.
  * @vm: vm to set up for.
  *
  * Sets up a pagetable tree with one page-table per level and a single
@@ -329,10 +325,10 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
  *
  * Return: 0 on success, negative error code on error.
  */
-int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
 			 struct xe_vm *vm)
 {
-	u8 id = gt->info.id;
+	u8 id = tile->id;
 	unsigned int flags;
 	int i;
 
@@ -345,9 +341,9 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
 	if (vm->flags & XE_VM_FLAGS_64K)
 		flags |= XE_BO_CREATE_SYSTEM_BIT;
 	else
-		flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt);
+		flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile);
 
-	vm->scratch_bo[id] = xe_bo_create_pin_map(xe, gt, vm, SZ_4K,
+	vm->scratch_bo[id] = xe_bo_create_pin_map(xe, tile, vm, SZ_4K,
 						  ttm_bo_type_kernel,
 						  flags);
 	if (IS_ERR(vm->scratch_bo[id]))
@@ -357,11 +353,11 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
 		      vm->scratch_bo[id]->size);
 
 	for (i = 0; i < vm->pt_root[id]->level; i++) {
-		vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i);
+		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
 		if (IS_ERR(vm->scratch_pt[id][i]))
 			return PTR_ERR(vm->scratch_pt[id][i]);
 
-		xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]);
+		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
 	}
 
 	return 0;
@@ -410,8 +406,8 @@ struct xe_pt_stage_bind_walk {
 	/* Input parameters for the walk */
 	/** @vm: The vm we're building for. */
 	struct xe_vm *vm;
-	/** @gt: The gt we're building for. */
-	struct xe_gt *gt;
+	/** @tile: The tile we're building for. */
+	struct xe_tile *tile;
 	/** @cache: Desired cache level for the ptes */
 	enum xe_cache_level cache;
 	/** @default_pte: PTE flag only template. No address is associated */
@@ -679,7 +675,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	if (covers || !*child) {
 		u64 flags = 0;
 
-		xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1);
+		xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
 		if (IS_ERR(xe_child))
 			return PTR_ERR(xe_child);
 
@@ -687,7 +683,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			       round_down(addr, 1ull << walk->shifts[level]));
 
 		if (!covers)
-			xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child);
+			xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child);
 
 		*child = &xe_child->base;
 
@@ -696,7 +692,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		 * TODO: Suballocate the pt bo to avoid wasting a lot of
 		 * memory.
 		 */
-		if (GRAPHICS_VERx100(gt_to_xe(xe_walk->gt)) >= 1250 && level == 1 &&
+		if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
 		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
 			walk->shifts = xe_compact_pt_shifts;
 			flags |= XE_PDE_64K;
@@ -719,7 +715,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
 /**
  * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
  * range.
- * @gt: The gt we're building for.
+ * @tile: The tile we're building for.
  * @vma: The vma indicating the address range.
  * @entries: Storage for the update entries used for connecting the tree to
  * the main tree at commit time.
@@ -735,7 +731,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
  * Return 0 on success, negative error code on error.
  */
 static int
-xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
+xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
 {
 	struct xe_bo *bo = vma->bo;
@@ -748,14 +744,14 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 		},
 		.vm = vma->vm,
-		.gt = gt,
+		.tile = tile,
 		.curs = &curs,
 		.va_curs_start = vma->start,
 		.pte_flags = vma->pte_flags,
 		.wupd.entries = entries,
 		.needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram,
 	};
-	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+	struct xe_pt *pt = vma->vm->pt_root[tile->id];
 	int ret;
 
 	if (is_vram) {
@@ -849,8 +845,8 @@ struct xe_pt_zap_ptes_walk {
 	struct xe_pt_walk base;
 
 	/* Input parameters for the walk */
-	/** @gt: The gt we're building for */
-	struct xe_gt *gt;
+	/** @tile: The tile we're building for */
+	struct xe_tile *tile;
 
 	/* Output */
 	/** @needs_invalidate: Whether we need to invalidate TLB*/
@@ -878,7 +874,7 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
 	 */
 	if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
 				    &end_offset)) {
-		xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap,
+		xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap,
 			      offset * sizeof(u64), 0,
 			      (end_offset - offset) * sizeof(u64));
 		xe_walk->needs_invalidate = true;
@@ -893,7 +889,7 @@ static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
 
 /**
  * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
- * @gt: The gt we're zapping for.
+ * @tile: The tile we're zapping for.
  * @vma: GPU VMA detailing address range.
  *
  * Eviction and Userptr invalidation needs to be able to zap the
@@ -907,7 +903,7 @@ static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
  * Return: Whether ptes were actually updated and a TLB invalidation is
  * required.
  */
-bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
 {
 	struct xe_pt_zap_ptes_walk xe_walk = {
 		.base = {
@@ -915,11 +911,11 @@ bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
 			.shifts = xe_normal_pt_shifts,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 		},
-		.gt = gt,
+		.tile = tile,
 	};
-	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+	struct xe_pt *pt = vma->vm->pt_root[tile->id];
 
-	if (!(vma->gt_present & BIT(gt->info.id)))
+	if (!(vma->tile_present & BIT(tile->id)))
 		return false;
 
 	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
@@ -929,7 +925,7 @@ bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
 }
 
 static void
-xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt,
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
 		       struct iosys_map *map, void *data,
 		       u32 qword_ofs, u32 num_qwords,
 		       const struct xe_vm_pgtable_update *update)
@@ -938,11 +934,9 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt,
 	u64 *ptr = data;
 	u32 i;
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
-
 	for (i = 0; i < num_qwords; i++) {
 		if (map)
-			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
 				  sizeof(u64), u64, ptes[i].pte);
 		else
 			ptr[i] = ptes[i].pte;
@@ -1016,14 +1010,14 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
 }
 
 static int
-xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma,
+xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
 		   struct xe_vm_pgtable_update *entries, u32 *num_entries,
 		   bool rebind)
 {
 	int err;
 
 	*num_entries = 0;
-	err = xe_pt_stage_bind(gt, vma, entries, num_entries);
+	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
 	if (!err)
 		BUG_ON(!*num_entries);
 	else /* abort! */
@@ -1250,7 +1244,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
 /**
  * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
  * address range.
- * @gt: The gt to bind for.
+ * @tile: The tile to bind for.
  * @vma: The vma to bind.
  * @e: The engine with which to do pipelined page-table updates.
  * @syncs: Entries to sync on before binding the built tree to the live vm tree.
@@ -1270,7 +1264,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
  * on success, an error pointer on error.
  */
 struct dma_fence *
-__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		 struct xe_sync_entry *syncs, u32 num_syncs,
 		 bool rebind)
 {
@@ -1291,18 +1285,17 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 	bind_pt_update.locked = false;
 	xe_bo_assert_held(vma->bo);
 	xe_vm_assert_held(vm);
-	XE_BUG_ON(xe_gt_is_media_type(gt));
 
 	vm_dbg(&vma->vm->xe->drm,
 	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
 	       vma->start, vma->end, e);
 
-	err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind);
+	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
 	if (err)
 		goto err;
 	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
 
-	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 
 	if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
 		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
@@ -1310,9 +1303,9 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 			return ERR_PTR(-ENOMEM);
 	}
 
-	fence = xe_migrate_update_pgtables(gt->migrate,
+	fence = xe_migrate_update_pgtables(tile->primary_gt.migrate,
 					   vm, vma->bo,
-					   e ? e : vm->eng[gt->info.id],
+					   e ? e : vm->eng[tile->id],
 					   entries, num_entries,
 					   syncs, num_syncs,
 					   &bind_pt_update.base);
@@ -1321,7 +1314,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 
 		/* TLB invalidation must be done before signaling rebind */
 		if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
-			int err = invalidation_fence_init(gt, ifence, fence,
+			int err = invalidation_fence_init(&tile->primary_gt, ifence, fence,
 							  vma);
 			if (err) {
 				dma_fence_put(fence);
@@ -1344,7 +1337,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 				  bind_pt_update.locked ? &deferred : NULL);
 
 		/* This vma is live (again?) now */
-		vma->gt_present |= BIT(gt->info.id);
+		vma->tile_present |= BIT(tile->id);
 
 		if (bind_pt_update.locked) {
 			vma->userptr.initial_bind = true;
@@ -1373,8 +1366,8 @@ struct xe_pt_stage_unbind_walk {
 	struct xe_pt_walk base;
 
 	/* Input parameters for the walk */
-	/** @gt: The gt we're unbinding from. */
-	struct xe_gt *gt;
+	/** @tile: The tile we're unbinding from. */
+	struct xe_tile *tile;
 
 	/**
 	 * @modified_start: Walk range start, modified to include any
@@ -1479,7 +1472,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
 /**
  * xe_pt_stage_unbind() - Build page-table update structures for an unbind
  * operation
- * @gt: The gt we're unbinding for.
+ * @tile: The tile we're unbinding for.
  * @vma: The vma we're unbinding.
  * @entries: Caller-provided storage for the update structures.
  *
@@ -1490,7 +1483,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
  *
  * Return: The number of entries used.
  */
-static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
+static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
 				       struct xe_vm_pgtable_update *entries)
 {
 	struct xe_pt_stage_unbind_walk xe_walk = {
@@ -1499,12 +1492,12 @@ static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
 			.shifts = xe_normal_pt_shifts,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 		},
-		.gt = gt,
+		.tile = tile,
 		.modified_start = vma->start,
 		.modified_end = vma->end + 1,
 		.wupd.entries = entries,
 	};
-	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+	struct xe_pt *pt = vma->vm->pt_root[tile->id];
 
 	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
 				 &xe_walk.base);
@@ -1514,19 +1507,17 @@ static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
 
 static void
 xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
-				  struct xe_gt *gt, struct iosys_map *map,
+				  struct xe_tile *tile, struct iosys_map *map,
 				  void *ptr, u32 qword_ofs, u32 num_qwords,
 				  const struct xe_vm_pgtable_update *update)
 {
 	struct xe_vma *vma = pt_update->vma;
-	u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level);
+	u64 empty = __xe_pt_empty_pte(tile, vma->vm, update->pt->level);
 	int i;
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
-
 	if (map && map->is_iomem)
 		for (i = 0; i < num_qwords; ++i)
-			xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
 				  sizeof(u64), u64, empty);
 	else if (map)
 		memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
@@ -1577,7 +1568,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
 /**
  * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
  * address range.
- * @gt: The gt to unbind for.
+ * @tile: The tile to unbind for.
  * @vma: The vma to unbind.
  * @e: The engine with which to do pipelined page-table updates.
  * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
@@ -1595,7 +1586,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
  * on success, an error pointer on error.
  */
 struct dma_fence *
-__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		   struct xe_sync_entry *syncs, u32 num_syncs)
 {
 	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
@@ -1614,16 +1605,15 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 
 	xe_bo_assert_held(vma->bo);
 	xe_vm_assert_held(vm);
-	XE_BUG_ON(xe_gt_is_media_type(gt));
 
 	vm_dbg(&vma->vm->xe->drm,
 	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
 	       vma->start, vma->end, e);
 
-	num_entries = xe_pt_stage_unbind(gt, vma, entries);
+	num_entries = xe_pt_stage_unbind(tile, vma, entries);
 	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
 
-	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 
 	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
 	if (!ifence)
@@ -1634,9 +1624,9 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 	 * clear again here. The eviction may have updated pagetables at a
 	 * lower level, because it needs to be more conservative.
 	 */
-	fence = xe_migrate_update_pgtables(gt->migrate,
+	fence = xe_migrate_update_pgtables(tile->primary_gt.migrate,
 					   vm, NULL, e ? e :
-					   vm->eng[gt->info.id],
+					   vm->eng[tile->id],
 					   entries, num_entries,
 					   syncs, num_syncs,
 					   &unbind_pt_update.base);
@@ -1644,7 +1634,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 		int err;
 
 		/* TLB invalidation must be done before signaling unbind */
-		err = invalidation_fence_init(gt, ifence, fence, vma);
+		err = invalidation_fence_init(&tile->primary_gt, ifence, fence, vma);
 		if (err) {
 			dma_fence_put(fence);
 			kfree(ifence);
@@ -1662,18 +1652,18 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		xe_pt_commit_unbind(vma, entries, num_entries,
 				    unbind_pt_update.locked ? &deferred : NULL);
-		vma->gt_present &= ~BIT(gt->info.id);
+		vma->tile_present &= ~BIT(tile->id);
 	} else {
 		kfree(ifence);
 	}
 
-	if (!vma->gt_present)
+	if (!vma->tile_present)
 		list_del_init(&vma->rebind_link);
 
 	if (unbind_pt_update.locked) {
 		XE_WARN_ON(!xe_vma_is_userptr(vma));
 
-		if (!vma->gt_present) {
+		if (!vma->tile_present) {
 			spin_lock(&vm->userptr.invalidated_lock);
 			list_del_init(&vma->userptr.invalidate_link);
 			spin_unlock(&vm->userptr.invalidated_lock);
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 1152043e5c63c..10f334b9c0043 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -13,8 +13,8 @@ struct dma_fence;
 struct xe_bo;
 struct xe_device;
 struct xe_engine;
-struct xe_gt;
 struct xe_sync_entry;
+struct xe_tile;
 struct xe_vm;
 struct xe_vma;
 
@@ -23,27 +23,27 @@ struct xe_vma;
 
 unsigned int xe_pt_shift(unsigned int level);
 
-struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 			   unsigned int level);
 
-int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
 			 struct xe_vm *vm);
 
-void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
 			  struct xe_pt *pt);
 
 void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
 
 struct dma_fence *
-__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		 struct xe_sync_entry *syncs, u32 num_syncs,
 		 bool rebind);
 
 struct dma_fence *
-__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		   struct xe_sync_entry *syncs, u32 num_syncs);
 
-bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma);
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
 
 u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
 		    const enum xe_cache_level level);
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index c16f7c14ff526..fee71080bd315 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -11,7 +11,6 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_gt.h"
 #include "xe_map.h"
 
 static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
@@ -33,14 +32,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
 	sa_manager->bo = NULL;
 }
 
-struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align)
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
 {
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_device *xe = tile_to_xe(tile);
 	u32 managed_size = size - SZ_4K;
 	struct xe_bo *bo;
 	int ret;
 
-	struct xe_sa_manager *sa_manager = drmm_kzalloc(&gt_to_xe(gt)->drm,
+	struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm,
 							sizeof(*sa_manager),
 							GFP_KERNEL);
 	if (!sa_manager)
@@ -48,8 +47,8 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 alig
 
 	sa_manager->bo = NULL;
 
-	bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo)) {
 		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
@@ -90,7 +89,7 @@ struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
 void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
 {
 	struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
-	struct xe_device *xe = gt_to_xe(sa_manager->bo->gt);
+	struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
 
 	if (!sa_manager->bo->vmap.is_iomem)
 		return;
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
index 3063fb34c720c..4e96483057d70 100644
--- a/drivers/gpu/drm/xe/xe_sa.h
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -9,9 +9,9 @@
 
 struct dma_fence;
 struct xe_bo;
-struct xe_gt;
+struct xe_tile;
 
-struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align);
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align);
 
 struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
 				  u32 size);
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 5530a6b6ef314..59d3e25ea5502 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -7,6 +7,7 @@
 
 #include "xe_device.h"
 #include "xe_ggtt.h"
+#include "xe_sa.h"
 #include "xe_tile.h"
 #include "xe_ttm_vram_mgr.h"
 
@@ -76,6 +77,12 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
 		goto err_mem_access;
 
 	err = xe_ggtt_init_noalloc(tile->mem.ggtt);
+	if (err)
+		goto err_mem_access;
+
+	tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
+	if (IS_ERR(tile->mem.kernel_bb_pool))
+		err = PTR_ERR(tile->mem.kernel_bb_pool);
 
 err_mem_access:
 	xe_device_mem_access_put(tile_to_xe(tile));
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5703213bdf1b5..2b9b9b4a67115 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -322,6 +322,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct device *dev = xe->drm.dev;
 	const struct firmware *fw = NULL;
 	struct uc_css_header *css;
@@ -411,9 +412,9 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
 		guc_read_css_info(uc_fw, css);
 
-	obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size,
+	obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size,
 				     ttm_bo_type_kernel,
-				     XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+				     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				     XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(obj)) {
 		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 798cba1bda6bf..ecfff4ffd00e1 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -465,7 +465,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 		xe_bo_assert_held(vma->bo);
 
 		list_del_init(&vma->notifier.rebind_link);
-		if (vma->gt_present && !vma->destroyed)
+		if (vma->tile_present && !vma->destroyed)
 			list_move_tail(&vma->rebind_link, &vm->rebind_list);
 	}
 	spin_unlock(&vm->notifier.list_lock);
@@ -703,7 +703,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	 * Tell exec and rebind worker they need to repin and rebind this
 	 * userptr.
 	 */
-	if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) {
+	if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->tile_present) {
 		spin_lock(&vm->userptr.invalidated_lock);
 		list_move_tail(&vma->userptr.invalidate_link,
 			       &vm->userptr.invalidated);
@@ -821,7 +821,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 
 	xe_vm_assert_held(vm);
 	list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
-		XE_WARN_ON(!vma->gt_present);
+		XE_WARN_ON(!vma->tile_present);
 
 		list_del_init(&vma->rebind_link);
 		dma_fence_put(fence);
@@ -842,10 +842,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    u64 bo_offset_or_userptr,
 				    u64 start, u64 end,
 				    bool read_only,
-				    u64 gt_mask)
+				    u64 tile_mask)
 {
 	struct xe_vma *vma;
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	u8 id;
 
 	XE_BUG_ON(start >= end);
@@ -870,12 +870,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	if (read_only)
 		vma->pte_flags = XE_PTE_READ_ONLY;
 
-	if (gt_mask) {
-		vma->gt_mask = gt_mask;
+	if (tile_mask) {
+		vma->tile_mask = tile_mask;
 	} else {
-		for_each_gt(gt, vm->xe, id)
-			if (!xe_gt_is_media_type(gt))
-				vma->gt_mask |= 0x1 << id;
+		for_each_tile(tile, vm->xe, id)
+			vma->tile_mask |= 0x1 << id;
 	}
 
 	if (vm->xe->info.platform == XE_PVC)
@@ -1162,8 +1161,8 @@ static void vm_destroy_work_func(struct work_struct *w);
 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 {
 	struct xe_vm *vm;
-	int err, i = 0, number_gts = 0;
-	struct xe_gt *gt;
+	int err, i = 0, number_tiles = 0;
+	struct xe_tile *tile;
 	u8 id;
 
 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
@@ -1215,15 +1214,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
 		vm->flags |= XE_VM_FLAGS_64K;
 
-	for_each_gt(gt, xe, id) {
-		if (xe_gt_is_media_type(gt))
-			continue;
-
+	for_each_tile(tile, xe, id) {
 		if (flags & XE_VM_FLAG_MIGRATION &&
-		    gt->info.id != XE_VM_FLAG_GT_ID(flags))
+		    tile->id != XE_VM_FLAG_GT_ID(flags))
 			continue;
 
-		vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level);
+		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
 		if (IS_ERR(vm->pt_root[id])) {
 			err = PTR_ERR(vm->pt_root[id]);
 			vm->pt_root[id] = NULL;
@@ -1232,11 +1228,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	}
 
 	if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
-		for_each_gt(gt, xe, id) {
+		for_each_tile(tile, xe, id) {
 			if (!vm->pt_root[id])
 				continue;
 
-			err = xe_pt_create_scratch(xe, gt, vm);
+			err = xe_pt_create_scratch(xe, tile, vm);
 			if (err)
 				goto err_scratch_pt;
 		}
@@ -1253,17 +1249,18 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	}
 
 	/* Fill pt_root after allocating scratch tables */
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (!vm->pt_root[id])
 			continue;
 
-		xe_pt_populate_empty(gt, vm, vm->pt_root[id]);
+		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
 	}
 	dma_resv_unlock(&vm->resv);
 
 	/* Kernel migration VM shouldn't have a circular loop.. */
 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
-		for_each_gt(gt, xe, id) {
+		for_each_tile(tile, xe, id) {
+			struct xe_gt *gt = &tile->primary_gt;
 			struct xe_vm *migrate_vm;
 			struct xe_engine *eng;
 
@@ -1280,11 +1277,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 				return ERR_CAST(eng);
 			}
 			vm->eng[id] = eng;
-			number_gts++;
+			number_tiles++;
 		}
 	}
 
-	if (number_gts > 1)
+	if (number_tiles > 1)
 		vm->composite_fence_ctx = dma_fence_context_alloc(1);
 
 	mutex_lock(&xe->usm.lock);
@@ -1299,7 +1296,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	return vm;
 
 err_scratch_pt:
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (!vm->pt_root[id])
 			continue;
 
@@ -1312,7 +1309,7 @@ err_scratch_pt:
 		xe_bo_put(vm->scratch_bo[id]);
 	}
 err_destroy_root:
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (vm->pt_root[id])
 			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
 	}
@@ -1369,7 +1366,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	struct rb_root contested = RB_ROOT;
 	struct ww_acquire_ctx ww;
 	struct xe_device *xe = vm->xe;
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	u8 id;
 
 	XE_BUG_ON(vm->preempt.num_engines);
@@ -1380,7 +1377,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	if (xe_vm_in_compute_mode(vm))
 		flush_work(&vm->preempt.rebind_work);
 
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (vm->eng[id]) {
 			xe_engine_kill(vm->eng[id]);
 			xe_engine_put(vm->eng[id]);
@@ -1417,7 +1414,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	 * install a fence to resv. Hence it's safe to
 	 * destroy the pagetables immediately.
 	 */
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (vm->scratch_bo[id]) {
 			u32 i;
 
@@ -1467,7 +1464,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 		container_of(w, struct xe_vm, destroy_work);
 	struct ww_acquire_ctx ww;
 	struct xe_device *xe = vm->xe;
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	u8 id;
 	void *lookup;
 
@@ -1492,7 +1489,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 	 * can be moved to xe_vm_close_and_put.
 	 */
 	xe_vm_lock(vm, &ww, 0, false);
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (vm->pt_root[id]) {
 			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
 			vm->pt_root[id] = NULL;
@@ -1528,11 +1525,9 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 	return vm;
 }
 
-u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt)
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 {
-	XE_BUG_ON(xe_gt_is_media_type(full_gt));
-
-	return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0,
+	return gen8_pde_encode(vm->pt_root[tile->id]->bo, 0,
 			       XE_CACHE_WB);
 }
 
@@ -1540,32 +1535,30 @@ static struct dma_fence *
 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
 		 struct xe_sync_entry *syncs, u32 num_syncs)
 {
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	struct dma_fence *fence = NULL;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = vma->vm;
 	int cur_fence = 0, i;
-	int number_gts = hweight_long(vma->gt_present);
+	int number_tiles = hweight_long(vma->tile_present);
 	int err;
 	u8 id;
 
 	trace_xe_vma_unbind(vma);
 
-	if (number_gts > 1) {
-		fences = kmalloc_array(number_gts, sizeof(*fences),
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
 				       GFP_KERNEL);
 		if (!fences)
 			return ERR_PTR(-ENOMEM);
 	}
 
-	for_each_gt(gt, vm->xe, id) {
-		if (!(vma->gt_present & BIT(id)))
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_present & BIT(id)))
 			goto next;
 
-		XE_BUG_ON(xe_gt_is_media_type(gt));
-
-		fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs);
+		fence = __xe_pt_unbind_vma(tile, vma, e, syncs, num_syncs);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			goto err_fences;
@@ -1580,7 +1573,7 @@ next:
 	}
 
 	if (fences) {
-		cf = dma_fence_array_create(number_gts, fences,
+		cf = dma_fence_array_create(number_tiles, fences,
 					    vm->composite_fence_ctx,
 					    vm->composite_fence_seqno++,
 					    false);
@@ -1612,32 +1605,31 @@ static struct dma_fence *
 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 	       struct xe_sync_entry *syncs, u32 num_syncs)
 {
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	struct dma_fence *fence;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = vma->vm;
 	int cur_fence = 0, i;
-	int number_gts = hweight_long(vma->gt_mask);
+	int number_tiles = hweight_long(vma->tile_mask);
 	int err;
 	u8 id;
 
 	trace_xe_vma_bind(vma);
 
-	if (number_gts > 1) {
-		fences = kmalloc_array(number_gts, sizeof(*fences),
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
 				       GFP_KERNEL);
 		if (!fences)
 			return ERR_PTR(-ENOMEM);
 	}
 
-	for_each_gt(gt, vm->xe, id) {
-		if (!(vma->gt_mask & BIT(id)))
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_mask & BIT(id)))
 			goto next;
 
-		XE_BUG_ON(xe_gt_is_media_type(gt));
-		fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs,
-					 vma->gt_present & BIT(id));
+		fence = __xe_pt_bind_vma(tile, vma, e, syncs, num_syncs,
+					 vma->tile_present & BIT(id));
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			goto err_fences;
@@ -1652,7 +1644,7 @@ next:
 	}
 
 	if (fences) {
-		cf = dma_fence_array_create(number_gts, fences,
+		cf = dma_fence_array_create(number_tiles, fences,
 					    vm->composite_fence_ctx,
 					    vm->composite_fence_seqno++,
 					    false);
@@ -2047,7 +2039,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 			return err;
 	}
 
-	if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) {
+	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
 		return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
 				  afence);
 	} else {
@@ -2649,7 +2641,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					  first->start,
 					  lookup->start - 1,
 					  (first->pte_flags & XE_PTE_READ_ONLY),
-					  first->gt_mask);
+					  first->tile_mask);
 		if (first->bo)
 			xe_bo_unlock(first->bo, &ww);
 		if (!new_first) {
@@ -2680,7 +2672,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					 last->start + chunk,
 					 last->end,
 					 (last->pte_flags & XE_PTE_READ_ONLY),
-					 last->gt_mask);
+					 last->tile_mask);
 		if (last->bo)
 			xe_bo_unlock(last->bo, &ww);
 		if (!new_last) {
@@ -2816,7 +2808,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 					       struct xe_bo *bo,
 					       u64 bo_offset_or_userptr,
 					       u64 addr, u64 range, u32 op,
-					       u64 gt_mask, u32 region)
+					       u64 tile_mask, u32 region)
 {
 	struct ww_acquire_ctx ww;
 	struct xe_vma *vma, lookup;
@@ -2837,7 +2829,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 		vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
 				    addr + range - 1,
 				    op & XE_VM_BIND_FLAG_READONLY,
-				    gt_mask);
+				    tile_mask);
 		xe_bo_unlock(bo, &ww);
 		if (!vma)
 			return ERR_PTR(-ENOMEM);
@@ -2877,7 +2869,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 		vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
 				    addr + range - 1,
 				    op & XE_VM_BIND_FLAG_READONLY,
-				    gt_mask);
+				    tile_mask);
 		if (!vma)
 			return ERR_PTR(-ENOMEM);
 
@@ -3114,11 +3106,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto put_engine;
 		}
 
-		if (bind_ops[i].gt_mask) {
-			u64 valid_gts = BIT(xe->info.tile_count) - 1;
+		if (bind_ops[i].tile_mask) {
+			u64 valid_tiles = BIT(xe->info.tile_count) - 1;
 
-			if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask &
-					 ~valid_gts)) {
+			if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask &
+					 ~valid_tiles)) {
 				err = -EINVAL;
 				goto put_engine;
 			}
@@ -3209,11 +3201,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 addr = bind_ops[i].addr;
 		u32 op = bind_ops[i].op;
 		u64 obj_offset = bind_ops[i].obj_offset;
-		u64 gt_mask = bind_ops[i].gt_mask;
+		u64 tile_mask = bind_ops[i].tile_mask;
 		u32 region = bind_ops[i].region;
 
 		vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
-						   addr, range, op, gt_mask,
+						   addr, range, op, tile_mask,
 						   region);
 		if (IS_ERR(vmas[i])) {
 			err = PTR_ERR(vmas[i]);
@@ -3387,8 +3379,8 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
 int xe_vm_invalidate_vma(struct xe_vma *vma)
 {
 	struct xe_device *xe = vma->vm->xe;
-	struct xe_gt *gt;
-	u32 gt_needs_invalidate = 0;
+	struct xe_tile *tile;
+	u32 tile_needs_invalidate = 0;
 	int seqno[XE_MAX_TILES_PER_DEVICE];
 	u8 id;
 	int ret;
@@ -3410,25 +3402,29 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 		}
 	}
 
-	for_each_gt(gt, xe, id) {
-		if (xe_pt_zap_ptes(gt, vma)) {
-			gt_needs_invalidate |= BIT(id);
+	for_each_tile(tile, xe, id) {
+		if (xe_pt_zap_ptes(tile, vma)) {
+			tile_needs_invalidate |= BIT(id);
 			xe_device_wmb(xe);
-			seqno[id] = xe_gt_tlb_invalidation_vma(gt, NULL, vma);
+			/*
+			 * FIXME: We potentially need to invalidate multiple
+			 * GTs within the tile
+			 */
+			seqno[id] = xe_gt_tlb_invalidation_vma(&tile->primary_gt, NULL, vma);
 			if (seqno[id] < 0)
 				return seqno[id];
 		}
 	}
 
-	for_each_gt(gt, xe, id) {
-		if (gt_needs_invalidate & BIT(id)) {
-			ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]);
+	for_each_tile(tile, xe, id) {
+		if (tile_needs_invalidate & BIT(id)) {
+			ret = xe_gt_tlb_invalidation_wait(&tile->primary_gt, seqno[id]);
 			if (ret < 0)
 				return ret;
 		}
 	}
 
-	vma->usm.gt_invalidated = vma->gt_mask;
+	vma->usm.tile_invalidated = vma->tile_mask;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 748dc16ebed9a..372f26153209b 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -54,7 +54,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma);
 
 #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
 
-u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt);
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
 
 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 203ba9d946b8d..c45c5daeeaa78 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -37,17 +37,17 @@ struct xe_vma {
 	/** @bo_offset: offset into BO if not a userptr, unused for userptr */
 	u64 bo_offset;
 
-	/** @gt_mask: GT mask of where to create binding for this VMA */
-	u64 gt_mask;
+	/** @tile_mask: Tile mask of where to create binding for this VMA */
+	u64 tile_mask;
 
 	/**
-	 * @gt_present: GT mask of binding are present for this VMA.
+	 * @tile_present: GT mask of binding are present for this VMA.
 	 * protected by vm->lock, vm->resv and for userptrs,
 	 * vm->userptr.notifier_lock for writing. Needs either for reading,
 	 * but if reading is done under the vm->lock only, it needs to be held
 	 * in write mode.
 	 */
-	u64 gt_present;
+	u64 tile_present;
 
 	/**
 	 * @destroyed: VMA is destroyed, in the sense that it shouldn't be
@@ -132,8 +132,8 @@ struct xe_vma {
 
 	/** @usm: unified shared memory state */
 	struct {
-		/** @gt_invalidated: VMA has been invalidated */
-		u64 gt_invalidated;
+		/** @tile_invalidated: VMA has been invalidated */
+		u64 tile_invalidated;
 	} usm;
 
 	struct {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 34aff9e15fe6f..edd29e7f39eb3 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -407,10 +407,10 @@ struct drm_xe_vm_bind_op {
 	__u64 addr;
 
 	/**
-	 * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs,
+	 * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
 	 * only applies to creating new VMAs
 	 */
-	__u64 gt_mask;
+	__u64 tile_mask;
 
 	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
 	__u32 op;
-- 
GitLab


From 08dea7674533cfd49764bcd09ba84de7143361ab Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:27 -0700
Subject: [PATCH 1609/2340] drm/xe: Move migration from GT to tile

Migration primarily focuses on the memory associated with a tile, so it
makes more sense to track this at the tile level (especially since the
driver was already skipping migration operations on media GTs).

Note that the blitter engine used to perform the migration always lives
in the tile's primary GT today.  In theory that could change if media
GTs ever start including blitter engines in the future, but we can
extend the design if/when that happens in the future.

v2:
 - Fix kunit test build
 - Kerneldoc parameter name update
v3:
 - Removed leftover prototype for removed function.  (Gustavo)
 - Remove unrelated / unwanted error handling change.  (Gustavo)

Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-15-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c      |  4 +-
 drivers/gpu/drm/xe/tests/xe_migrate.c | 25 ++++++-----
 drivers/gpu/drm/xe/xe_bo.c            |  6 +--
 drivers/gpu/drm/xe/xe_bo_evict.c      | 14 +++---
 drivers/gpu/drm/xe/xe_device_types.h  |  3 ++
 drivers/gpu/drm/xe/xe_engine.c        |  2 +-
 drivers/gpu/drm/xe/xe_gt.c            | 27 +++---------
 drivers/gpu/drm/xe/xe_gt.h            |  3 --
 drivers/gpu/drm/xe/xe_gt_pagefault.c  |  2 +-
 drivers/gpu/drm/xe/xe_gt_types.h      |  3 --
 drivers/gpu/drm/xe/xe_migrate.c       | 61 +++++++++++++--------------
 drivers/gpu/drm/xe/xe_migrate.h       |  4 +-
 drivers/gpu/drm/xe/xe_pt.c            |  4 +-
 drivers/gpu/drm/xe/xe_tile.c          |  6 +++
 drivers/gpu/drm/xe/xe_tile.h          |  2 +
 drivers/gpu/drm/xe/xe_vm.c            |  2 +-
 drivers/gpu/drm/xe/xe_vm_types.h      |  2 +-
 17 files changed, 79 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index f933e5df6c121..5309204d8d1b1 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 
 	/* Optionally clear bo *and* CCS data in VRAM. */
 	if (clear) {
-		fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource);
+		fence = xe_migrate_clear(gt_to_tile(gt)->migrate, bo, bo->ttm.resource);
 		if (IS_ERR(fence)) {
 			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
 			return PTR_ERR(fence);
@@ -174,7 +174,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 	struct xe_bo *bo, *external;
 	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
 		XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt));
-	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate);
+	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
 	struct ww_acquire_ctx ww;
 	int err, i;
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 85ef9bacfe52b..d9f1f31c92d27 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -101,14 +101,14 @@ static const struct xe_migrate_pt_update_ops sanity_ops = {
 static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 		      struct kunit *test)
 {
-	struct xe_device *xe = gt_to_xe(m->gt);
+	struct xe_device *xe = tile_to_xe(m->tile);
 	u64 retval, expected = 0;
 	bool big = bo->size >= SZ_2M;
 	struct dma_fence *fence;
 	const char *str = big ? "Copying big bo" : "Copying small bo";
 	int err;
 
-	struct xe_bo *sysmem = xe_bo_create_locked(xe, gt_to_tile(m->gt), NULL,
+	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL,
 						   bo->size,
 						   ttm_bo_type_kernel,
 						   XE_BO_CREATE_SYSTEM_BIT);
@@ -189,7 +189,7 @@ out_unlock:
 static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
 			   struct kunit *test, bool force_gpu)
 {
-	struct xe_device *xe = gt_to_xe(m->gt);
+	struct xe_device *xe = tile_to_xe(m->tile);
 	struct dma_fence *fence;
 	u64 retval, expected;
 	ktime_t then, now;
@@ -239,16 +239,15 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
 
 static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 {
-	struct xe_gt *gt = m->gt;
-	struct xe_tile *tile = gt_to_tile(m->gt);
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = m->tile;
+	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
 	struct xe_res_cursor src_it;
 	struct dma_fence *fence;
 	u64 retval, expected;
 	struct xe_bb *bb;
 	int err;
-	u8 id = gt->info.id;
+	u8 id = tile->id;
 
 	err = xe_bo_vmap(bo);
 	if (err) {
@@ -287,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_pt;
 	}
 
-	bb = xe_bb_new(gt, 32, xe->info.supports_usm);
+	bb = xe_bb_new(&tile->primary_gt, 32, xe->info.supports_usm);
 	if (IS_ERR(bb)) {
 		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
 			   PTR_ERR(bb));
@@ -324,7 +323,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
 	expected = 0;
 
-	emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+	emit_clear(&tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
 		   IS_DGFX(xe));
 	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
 		       test);
@@ -391,14 +390,14 @@ vunmap:
 static int migrate_test_run_device(struct xe_device *xe)
 {
 	struct kunit *test = xe_cur_kunit();
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	int id;
 
-	for_each_gt(gt, xe, id) {
-		struct xe_migrate *m = gt->migrate;
+	for_each_tile(tile, xe, id) {
+		struct xe_migrate *m = tile->migrate;
 		struct ww_acquire_ctx ww;
 
-		kunit_info(test, "Testing gt id %d.\n", id);
+		kunit_info(test, "Testing tile id %d.\n", id);
 		xe_vm_lock(m->eng->vm, &ww, 0, true);
 		xe_device_mem_access_get(xe);
 		xe_migrate_sanity_test(m, test);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 7c59487af86ad..8bac1717ca78f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -643,7 +643,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		tile = mem_type_to_tile(xe, old_mem->mem_type);
 
 	XE_BUG_ON(!tile);
-	XE_BUG_ON(!tile->primary_gt.migrate);
+	XE_BUG_ON(!tile->migrate);
 
 	trace_xe_bo_move(bo);
 	xe_device_mem_access_get(xe);
@@ -681,9 +681,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		}
 	} else {
 		if (move_lacks_source)
-			fence = xe_migrate_clear(tile->primary_gt.migrate, bo, new_mem);
+			fence = xe_migrate_clear(tile->migrate, bo, new_mem);
 		else
-			fence = xe_migrate_copy(tile->primary_gt.migrate,
+			fence = xe_migrate_copy(tile->migrate,
 						bo, bo, old_mem, new_mem);
 		if (IS_ERR(fence)) {
 			ret = PTR_ERR(fence);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 9226195bd5608..f559a7f3eb3e5 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -8,7 +8,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_ggtt.h"
-#include "xe_gt.h"
+#include "xe_tile.h"
 
 /**
  * xe_bo_evict_all - evict all BOs from VRAM
@@ -29,7 +29,7 @@ int xe_bo_evict_all(struct xe_device *xe)
 	struct ttm_device *bdev = &xe->ttm;
 	struct ww_acquire_ctx ww;
 	struct xe_bo *bo;
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	struct list_head still_in_list;
 	u32 mem_type;
 	u8 id;
@@ -83,8 +83,8 @@ int xe_bo_evict_all(struct xe_device *xe)
 	 * Wait for all user BO to be evicted as those evictions depend on the
 	 * memory moved below.
 	 */
-	for_each_gt(gt, xe, id)
-		xe_gt_migrate_wait(gt);
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
 
 	spin_lock(&xe->pinned.lock);
 	for (;;) {
@@ -186,7 +186,7 @@ int xe_bo_restore_user(struct xe_device *xe)
 {
 	struct ww_acquire_ctx ww;
 	struct xe_bo *bo;
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	struct list_head still_in_list;
 	u8 id;
 	int ret;
@@ -224,8 +224,8 @@ int xe_bo_restore_user(struct xe_device *xe)
 	spin_unlock(&xe->pinned.lock);
 
 	/* Wait for validate to complete */
-	for_each_gt(gt, xe, id)
-		xe_gt_migrate_wait(gt);
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index ee050b4b4d779..dfcf0a787e01a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -136,6 +136,9 @@ struct xe_tile {
 		 */
 		struct xe_sa_manager *kernel_bb_pool;
 	} mem;
+
+	/** @migrate: Migration helper for vram blits and clearing */
+	struct xe_migrate *migrate;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 4fca422e9e7bb..fd39da8594426 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -560,7 +560,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			if (XE_IOCTL_ERR(xe, !hwe))
 				return -EINVAL;
 
-			migrate_vm = xe_migrate_get_vm(gt->migrate);
+			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
 			new = xe_engine_create(xe, migrate_vm, logical_mask,
 					       args->width, hwe,
 					       ENGINE_FLAG_PERSISTENT |
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 74023a5dc8b2f..aa047db4c937e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -43,15 +43,6 @@
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
-struct xe_gt *xe_find_full_gt(struct xe_gt *gt)
-{
-	/*
-	 * FIXME: Once the code is prepared for re-enabling, this function will
-	 * be gone. Just return the only possible gt for now.
-	 */
-	return gt;
-}
-
 int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
 {
 	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
@@ -169,6 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 int xe_gt_record_default_lrcs(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	int err = 0;
@@ -192,7 +184,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		if (!default_lrc)
 			return -ENOMEM;
 
-		vm = xe_migrate_get_vm(gt->migrate);
+		vm = xe_migrate_get_vm(tile->migrate);
 		e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1,
 				     hwe, ENGINE_FLAG_WA);
 		if (IS_ERR(e)) {
@@ -383,13 +375,13 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	}
 
 	if (!xe_gt_is_media_type(gt)) {
-		gt->migrate = xe_migrate_init(gt);
-		if (IS_ERR(gt->migrate)) {
-			err = PTR_ERR(gt->migrate);
+		struct xe_tile *tile = gt_to_tile(gt);
+
+		tile->migrate = xe_migrate_init(tile);
+		if (IS_ERR(tile->migrate)) {
+			err = PTR_ERR(tile->migrate);
 			goto err_force_wake;
 		}
-	} else {
-		gt->migrate = xe_find_full_gt(gt)->migrate;
 	}
 
 	err = xe_uc_init_hw(&gt->uc);
@@ -644,11 +636,6 @@ err_msg:
 	return err;
 }
 
-void xe_gt_migrate_wait(struct xe_gt *gt)
-{
-	xe_migrate_wait(gt->migrate);
-}
-
 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
 				     enum xe_engine_class class,
 				     u16 instance, bool logical)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 27e913e9a43af..01c1d226faeb5 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -24,11 +24,8 @@ void xe_gt_suspend_prepare(struct xe_gt *gt);
 int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
-void xe_gt_migrate_wait(struct xe_gt *gt);
 void xe_gt_sanitize(struct xe_gt *gt);
 
-struct xe_gt *xe_find_full_gt(struct xe_gt *gt);
-
 /**
  * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
  * first that matches the same reset domain as @class
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 1ec140aaf2a7c..5436667ba82bf 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -208,7 +208,7 @@ retry_userptr:
 
 	/* Bind VMA only to the GT that has faulted */
 	trace_xe_vma_pf_bind(vma);
-	fence = __xe_pt_bind_vma(tile, vma, xe_gt_migrate_engine(gt), NULL, 0,
+	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
 				 vma->tile_present & BIT(tile->id));
 	if (IS_ERR(fence)) {
 		ret = PTR_ERR(fence);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index c44560b6dc71f..34d5dd98885e9 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -278,9 +278,6 @@ struct xe_gt {
 	/** @hw_engines: hardware engines on the GT */
 	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
 
-	/** @migrate: Migration helper for vram blits and clearing */
-	struct xe_migrate *migrate;
-
 	/** @pcode: GT's PCODE */
 	struct {
 		/** @lock: protecting GT's PCODE mailbox data */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 3031a45db4900..794c5c68589d8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -36,8 +36,8 @@
 struct xe_migrate {
 	/** @eng: Default engine used for migration */
 	struct xe_engine *eng;
-	/** @gt: Backpointer to the gt this struct xe_migrate belongs to. */
-	struct xe_gt *gt;
+	/** @tile: Backpointer to the tile this struct xe_migrate belongs to. */
+	struct xe_tile *tile;
 	/** @job_mutex: Timeline mutex for @eng. */
 	struct mutex job_mutex;
 	/** @pt_bo: Page-table buffer object. */
@@ -70,17 +70,17 @@ struct xe_migrate {
 #define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
 
 /**
- * xe_gt_migrate_engine() - Get this gt's migrate engine.
- * @gt: The gt.
+ * xe_tile_migrate_engine() - Get this tile's migrate engine.
+ * @tile: The tile.
  *
- * Returns the default migrate engine of this gt.
+ * Returns the default migrate engine of this tile.
  * TODO: Perhaps this function is slightly misplaced, and even unneeded?
  *
  * Return: The default migrate engine
  */
-struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt)
+struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile)
 {
-	return gt->migrate->eng;
+	return tile->migrate->eng;
 }
 
 static void xe_migrate_fini(struct drm_device *dev, void *arg)
@@ -128,8 +128,7 @@ static u64 xe_migrate_vram_ofs(u64 addr)
  */
 static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 {
-	struct xe_gt *gt = m->gt;
-	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_tile *tile = m->tile;
 	struct xe_device *xe = vm->xe;
 	size_t cleared_size;
 	u64 vram_addr;
@@ -155,14 +154,13 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 	return 0;
 }
 
-static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
+static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 				 struct xe_vm *vm)
 {
-	u8 id = gt->info.id;
+	struct xe_device *xe = tile_to_xe(tile);
+	u8 id = tile->id;
 	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
 	u32 map_ofs, level, i;
-	struct xe_device *xe = gt_to_xe(m->gt);
-	struct xe_tile *tile = gt_to_tile(m->gt);
 	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
 	u64 entry;
 	int ret;
@@ -231,7 +229,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 
 		if (xe->info.supports_usm) {
-			batch = gt->usm.bb_pool->bo;
+			batch = tile->primary_gt.usm.bb_pool->bo;
 			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE,
 						&is_vram);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
@@ -308,34 +306,33 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
 
 /**
  * xe_migrate_init() - Initialize a migrate context
- * @gt: Back-pointer to the gt we're initializing for.
+ * @tile: Back-pointer to the tile we're initializing for.
  *
  * Return: Pointer to a migrate context on success. Error pointer on error.
  */
-struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
+struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 {
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *primary_gt = &tile->primary_gt;
 	struct xe_migrate *m;
 	struct xe_vm *vm;
 	struct ww_acquire_ctx ww;
 	int err;
 
-	XE_BUG_ON(xe_gt_is_media_type(gt));
-
 	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
 	if (!m)
 		return ERR_PTR(-ENOMEM);
 
-	m->gt = gt;
+	m->tile = tile;
 
 	/* Special layout, prepared below.. */
 	vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
-			  XE_VM_FLAG_SET_GT_ID(gt));
+			  XE_VM_FLAG_SET_TILE_ID(tile));
 	if (IS_ERR(vm))
 		return ERR_CAST(vm);
 
 	xe_vm_lock(vm, &ww, 0, false);
-	err = xe_migrate_prepare_vm(gt, m, vm);
+	err = xe_migrate_prepare_vm(tile, m, vm);
 	xe_vm_unlock(vm, &ww);
 	if (err) {
 		xe_vm_close_and_put(vm);
@@ -343,9 +340,9 @@ struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
 	}
 
 	if (xe->info.supports_usm) {
-		struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
+		struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
 							   XE_ENGINE_CLASS_COPY,
-							   gt->usm.reserved_bcs_instance,
+							   primary_gt->usm.reserved_bcs_instance,
 							   false);
 		if (!hwe)
 			return ERR_PTR(-EINVAL);
@@ -354,7 +351,7 @@ struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
 					  BIT(hwe->logical_instance), 1,
 					  hwe, ENGINE_FLAG_KERNEL);
 	} else {
-		m->eng = xe_engine_create_class(xe, gt, vm,
+		m->eng = xe_engine_create_class(xe, primary_gt, vm,
 						XE_ENGINE_CLASS_COPY,
 						ENGINE_FLAG_KERNEL);
 	}
@@ -549,7 +546,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 			       u64 dst_ofs, bool dst_is_vram, u32 dst_size,
 			       u64 ccs_ofs, bool copy_ccs)
 {
-	struct xe_gt *gt = m->gt;
+	struct xe_gt *gt = &m->tile->primary_gt;
 	u32 flush_flags = 0;
 
 	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
@@ -613,7 +610,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct ttm_resource *src,
 				  struct ttm_resource *dst)
 {
-	struct xe_gt *gt = m->gt;
+	struct xe_gt *gt = &m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct dma_fence *fence = NULL;
 	u64 size = src_bo->size;
@@ -876,7 +873,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct ttm_resource *dst)
 {
 	bool clear_vram = mem_type_is_vram(dst->mem_type);
-	struct xe_gt *gt = m->gt;
+	struct xe_gt *gt = &m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct dma_fence *fence = NULL;
 	u64 size = bo->size;
@@ -1083,7 +1080,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 	for (i = 0; i < num_updates; i++) {
 		const struct xe_vm_pgtable_update *update = &updates[i];
 
-		ops->populate(pt_update, gt_to_tile(m->gt), &update->pt_bo->vmap, NULL,
+		ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL,
 			      update->ofs, update->qwords, update);
 	}
 
@@ -1150,9 +1147,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 			   struct xe_migrate_pt_update *pt_update)
 {
 	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
-	struct xe_gt *gt = m->gt;
-	struct xe_tile *tile = gt_to_tile(m->gt);
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = m->tile;
+	struct xe_gt *gt = &tile->primary_gt;
+	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_sched_job *job;
 	struct dma_fence *fence;
 	struct drm_suballoc *sa_bo = NULL;
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index e627f4023d5a8..204337ea3b4ed 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -71,7 +71,7 @@ struct xe_migrate_pt_update {
 	struct xe_vma *vma;
 };
 
-struct xe_migrate *xe_migrate_init(struct xe_gt *gt);
+struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
 
 struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct xe_bo *src_bo,
@@ -97,5 +97,5 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 void xe_migrate_wait(struct xe_migrate *m);
 
-struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt);
+struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 094058cb5f93a..41b2be028b8a4 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1303,7 +1303,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 			return ERR_PTR(-ENOMEM);
 	}
 
-	fence = xe_migrate_update_pgtables(tile->primary_gt.migrate,
+	fence = xe_migrate_update_pgtables(tile->migrate,
 					   vm, vma->bo,
 					   e ? e : vm->eng[tile->id],
 					   entries, num_entries,
@@ -1624,7 +1624,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 	 * clear again here. The eviction may have updated pagetables at a
 	 * lower level, because it needs to be more conservative.
 	 */
-	fence = xe_migrate_update_pgtables(tile->primary_gt.migrate,
+	fence = xe_migrate_update_pgtables(tile->migrate,
 					   vm, NULL, e ? e :
 					   vm->eng[tile->id],
 					   entries, num_entries,
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 59d3e25ea5502..fa56323aa9882 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -7,6 +7,7 @@
 
 #include "xe_device.h"
 #include "xe_ggtt.h"
+#include "xe_migrate.h"
 #include "xe_sa.h"
 #include "xe_tile.h"
 #include "xe_ttm_vram_mgr.h"
@@ -88,3 +89,8 @@ err_mem_access:
 	xe_device_mem_access_put(tile_to_xe(tile));
 	return err;
 }
+
+void xe_tile_migrate_wait(struct xe_tile *tile)
+{
+	xe_migrate_wait(tile->migrate);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index 77529ea136a66..33bf412921953 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -11,4 +11,6 @@ struct xe_tile;
 int xe_tile_alloc(struct xe_tile *tile);
 int xe_tile_init_noalloc(struct xe_tile *tile);
 
+void xe_tile_migrate_wait(struct xe_tile *tile);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ecfff4ffd00e1..7d4c7a66a35fe 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1267,7 +1267,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 			if (!vm->pt_root[id])
 				continue;
 
-			migrate_vm = xe_migrate_get_vm(gt->migrate);
+			migrate_vm = xe_migrate_get_vm(tile->migrate);
 			eng = xe_engine_create_class(xe, gt, migrate_vm,
 						     XE_ENGINE_CLASS_COPY,
 						     ENGINE_FLAG_VM);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index c45c5daeeaa78..76af6ac0fa846 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -179,7 +179,7 @@ struct xe_vm {
 #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
 #define XE_VM_FLAG_FAULT_MODE		BIT(5)
 #define XE_VM_FLAG_GT_ID(flags)		(((flags) >> 6) & 0x3)
-#define XE_VM_FLAG_SET_GT_ID(gt)	((gt)->info.id << 6)
+#define XE_VM_FLAG_SET_TILE_ID(tile)	((tile)->id << 6)
 	unsigned long flags;
 
 	/** @composite_fence_ctx: context composite fence */
-- 
GitLab


From ed006ba5e6e8334deb86fbc1e35d2411a4870281 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:28 -0700
Subject: [PATCH 1610/2340] drm/xe: Clarify 'gt' retrieval for primary tile

There are a bunch of places in the driver where we need to perform
non-GT MMIO against the platform's primary tile (display code, top-level
interrupt enable/disable, driver initialization, etc.).  Rename
'to_gt()' to 'xe_primary_mmio_gt()' to clarify that we're trying to get
a primary MMIO handle for these top-level operations.

In the future we need to move away from xe_gt as the target for MMIO
operations (most of which are completely unrelated to GT).

v2:
 - s/xe_primary_mmio_gt/xe_root_mmio_gt/ for more consistency with how
   we refer to tile 0.  (Lucas)
v3:
 - Tweak comment on xe_root_mmio_gt().  (Lucas)

Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-16-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c         |  2 +-
 drivers/gpu/drm/xe/xe_device.h         | 13 +++++++++++--
 drivers/gpu/drm/xe/xe_irq.c            |  6 +++---
 drivers/gpu/drm/xe/xe_mmio.c           |  6 +++---
 drivers/gpu/drm/xe/xe_query.c          |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |  4 ++--
 6 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 50ce4e97299e4..0ff3b94bd6620 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -355,7 +355,7 @@ static void device_kill_persistent_engines(struct xe_device *xe,
 
 void xe_device_wmb(struct xe_device *xe)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 
 	wmb();
 	if (IS_DGFX(xe))
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 3516ac1dcbc46..e88f685f3f215 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -66,9 +66,18 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
 }
 
 /*
- * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function.
+ * Provide a GT structure suitable for performing non-GT MMIO operations against
+ * the primary tile.  Primarily intended for early tile initialization, display
+ * handling, top-most interrupt enable/disable, etc.  Since anything using the
+ * MMIO handle returned by this function doesn't need GSI offset translation,
+ * we'll return the primary GT from the root tile.
+ *
+ * FIXME: Fix the driver design so that 'gt' isn't the target of all MMIO
+ * operations.
+ *
+ * Returns the primary gt of the root tile.
  */
-static inline struct xe_gt *to_gt(struct xe_device *xe)
+static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe)
 {
 	return &xe_device_get_root_tile(xe)->primary_gt;
 }
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index e9614e90efaf7..6d95456643869 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -284,7 +284,7 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 static irqreturn_t xelp_irq_handler(int irq, void *arg)
 {
 	struct xe_device *xe = arg;
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);	/* Only 1 GT here */
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	u32 master_ctl, gu_misc_iir;
 	long unsigned int intr_dw[2];
 	u32 identity[32];
@@ -306,7 +306,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 
 static u32 dg1_intr_disable(struct xe_device *xe)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	u32 val;
 
 	/* First disable interrupts */
@@ -324,7 +324,7 @@ static u32 dg1_intr_disable(struct xe_device *xe)
 
 static void dg1_intr_enable(struct xe_device *xe, bool stall)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 
 	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
 	if (stall)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 86f010ac9ccfb..7739282d364d7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -328,7 +328,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 static void xe_mmio_probe_tiles(struct xe_device *xe)
 {
-	struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt;
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	u32 mtcfg;
 	u8 adj_tile_count;
 	u8 id;
@@ -380,7 +380,7 @@ static void mmio_fini(struct drm_device *drm, void *arg)
 int xe_mmio_init(struct xe_device *xe)
 {
 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	const int mmio_bar = 0;
 	int err;
 
@@ -439,7 +439,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 		  struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	struct drm_xe_mmio *args = data;
 	unsigned int bits_flag, bytes;
 	struct xe_reg reg;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 799bf68800e7c..8087c94dd782a 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -259,7 +259,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 static int query_hwconfig(struct xe_device *xe,
 			  struct drm_xe_device_query *query)
 {
-	struct xe_gt *gt = xe_device_get_gt(xe, 0);
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
 	void __user *query_ptr = u64_to_user_ptr(query->data);
 	void *hwconfig;
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index c68325161c199..21ecc734f10a7 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -54,7 +54,7 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 {
 	struct xe_tile *tile = xe_device_get_root_tile(xe);
-	struct xe_gt *mmio = &tile->primary_gt;
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	u64 stolen_size;
 	u64 tile_offset;
@@ -92,7 +92,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 	u32 stolen_size;
 	u32 ggc, gms;
 
-	ggc = xe_mmio_read32(to_gt(xe), GGC);
+	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
 
 	/* check GGMS, should be fixed 0x3 (8MB) */
 	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
-- 
GitLab


From 68ccb9b2f71b5834b703b982a2a29d5bb3fabbe9 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:29 -0700
Subject: [PATCH 1611/2340] drm/xe: Drop vram_id

The VRAM ID is always the tile ID; there's no need to track it
separately within a GT.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-17-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b91d52205feb7..d896d9fa25566 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -32,7 +32,6 @@ struct xe_subplatform_desc {
 
 struct xe_gt_desc {
 	enum xe_gt_type type;
-	u8 vram_id;
 	u32 mmio_adj_limit;
 	u32 mmio_adj_offset;
 };
@@ -261,7 +260,6 @@ static const struct xe_device_desc dg2_desc = {
 static const struct xe_gt_desc pvc_gts[] = {
 	{
 		.type = XE_GT_TYPE_REMOTE,
-		.vram_id = 1,
 		.mmio_adj_limit = 0,
 		.mmio_adj_offset = 0,
 	},
-- 
GitLab


From 1e6c20be6c83817cf68637eb334dafac3a4b2512 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:30 -0700
Subject: [PATCH 1612/2340] drm/xe: Drop extra_gts[] declarations and
 XE_GT_TYPE_REMOTE

Now that tiles and GTs are handled separately, extra_gts[] doesn't
really provide any useful information that we can't just infer directly.
The primary GT of the root tile and of the remote tiles behave the same
way and don't need independent handling.

When we re-add support for media GTs in a future patch, the presence of
media can be determined from MEDIA_VER() (i.e., >= 13) and media's GSI
offset handling is expected to remain constant for all forseeable future
platforms, so it won't need to be provided in a definition structure
either.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-18-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_types.h |  1 -
 drivers/gpu/drm/xe/xe_pci.c      | 36 ++++++--------------------------
 2 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 34d5dd98885e9..99ab7ec99ccd5 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -20,7 +20,6 @@ struct xe_ring_ops;
 enum xe_gt_type {
 	XE_GT_TYPE_UNINITIALIZED,
 	XE_GT_TYPE_MAIN,
-	XE_GT_TYPE_REMOTE,
 	XE_GT_TYPE_MEDIA,
 };
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index d896d9fa25566..4fbcbfb8a93af 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -44,7 +44,6 @@ struct xe_device_desc {
 
 	const char *platform_name;
 	const struct xe_subplatform_desc *subplatforms;
-	const struct xe_gt_desc *extra_gts;
 
 	enum xe_platform platform;
 
@@ -257,20 +256,11 @@ static const struct xe_device_desc dg2_desc = {
 	DG2_FEATURES,
 };
 
-static const struct xe_gt_desc pvc_gts[] = {
-	{
-		.type = XE_GT_TYPE_REMOTE,
-		.mmio_adj_limit = 0,
-		.mmio_adj_offset = 0,
-	},
-};
-
 static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.graphics = &graphics_xehpc,
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
 	.require_force_probe = true,
-	.extra_gts = pvc_gts,
 };
 
 static const struct xe_device_desc mtl_desc = {
@@ -540,28 +530,14 @@ static int xe_info_init(struct xe_device *xe,
 		tile->id = id;
 
 		gt = &tile->primary_gt;
-		gt->info.id = id;
+		gt->info.id = id;	/* FIXME: Determine sensible numbering */
 		gt->tile = tile;
+		gt->info.type = XE_GT_TYPE_MAIN;
+		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
+		if (MEDIA_VER(xe) < 13 && media_desc)
+			gt->info.__engine_mask |= media_desc->hw_engine_mask;
 
-		if (id == 0) {
-			gt->info.type = XE_GT_TYPE_MAIN;
-
-			gt->info.__engine_mask = graphics_desc->hw_engine_mask;
-			if (MEDIA_VER(xe) < 13 && media_desc)
-				gt->info.__engine_mask |= media_desc->hw_engine_mask;
-
-			gt->mmio.adj_limit = 0;
-			gt->mmio.adj_offset = 0;
-		} else {
-			gt->info.type = desc->extra_gts[id - 1].type;
-			gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ?
-				media_desc->hw_engine_mask :
-				graphics_desc->hw_engine_mask;
-			gt->mmio.adj_limit =
-				desc->extra_gts[id - 1].mmio_adj_limit;
-			gt->mmio.adj_offset =
-				desc->extra_gts[id - 1].mmio_adj_offset;
-		}
+		/* TODO: Init media GT, if present */
 	}
 
 	return 0;
-- 
GitLab


From f6929e80cdf540d7106764bda38c4ce0601fee7b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:31 -0700
Subject: [PATCH 1613/2340] drm/xe: Allocate GT dynamically

In preparation for re-adding media GT support, switch the primary GT
within the tile to a dynamic allocation.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-19-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c  |  4 ++--
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  2 +-
 drivers/gpu/drm/xe/xe_device.c         |  4 ----
 drivers/gpu/drm/xe/xe_device.h         |  8 ++++++--
 drivers/gpu/drm/xe/xe_device_types.h   |  2 +-
 drivers/gpu/drm/xe/xe_ggtt.c           |  2 +-
 drivers/gpu/drm/xe/xe_gt.c             | 11 ++++++++---
 drivers/gpu/drm/xe/xe_gt.h             |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c        | 12 ++++++------
 drivers/gpu/drm/xe/xe_mmio.c           |  2 +-
 drivers/gpu/drm/xe/xe_pci.c            |  8 ++++++--
 drivers/gpu/drm/xe/xe_pt.c             |  4 ++--
 drivers/gpu/drm/xe/xe_vm.c             |  6 +++---
 13 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index d9f1f31c92d27..60266fea7faa2 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -286,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_pt;
 	}
 
-	bb = xe_bb_new(&tile->primary_gt, 32, xe->info.supports_usm);
+	bb = xe_bb_new(tile->primary_gt, 32, xe->info.supports_usm);
 	if (IS_ERR(bb)) {
 		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
 			   PTR_ERR(bb));
@@ -323,7 +323,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
 	expected = 0;
 
-	emit_clear(&tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+	emit_clear(tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
 		   IS_DGFX(xe));
 	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
 		       test);
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 45f2614f91ec1..b2beba0019cd7 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -237,7 +237,7 @@ static void xe_rtp_process_tests(struct kunit *test)
 {
 	const struct rtp_test_case *param = test->param_value;
 	struct xe_device *xe = test->priv;
-	struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt;
+	struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt;
 	struct xe_reg_sr *reg_sr = &gt->reg_sr;
 	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 0ff3b94bd6620..a4fc5bc54d02d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -249,10 +249,6 @@ int xe_device_probe(struct xe_device *xe)
 		err = xe_tile_alloc(tile);
 		if (err)
 			return err;
-
-		err = xe_gt_alloc(xe, &tile->primary_gt);
-		if (err)
-			return err;
 	}
 
 	err = xe_mmio_init(xe);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index e88f685f3f215..f2d8479f6ff62 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -58,7 +58,11 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
 	struct xe_gt *gt;
 
 	XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE);
-	gt = &xe->tiles[gt_id].primary_gt;
+
+	gt = xe->tiles[gt_id].primary_gt;
+	if (drm_WARN_ON(&xe->drm, !gt))
+		return NULL;
+
 	XE_BUG_ON(gt->info.id != gt_id);
 	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
 
@@ -79,7 +83,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
  */
 static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe)
 {
-	return &xe_device_get_root_tile(xe)->primary_gt;
+	return xe_device_get_root_tile(xe)->primary_gt;
 }
 
 static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index dfcf0a787e01a..5cb3fa9e8086a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -74,7 +74,7 @@ struct xe_tile {
 	/**
 	 * @primary_gt: Primary GT
 	 */
-	struct xe_gt primary_gt;
+	struct xe_gt *primary_gt;
 
 	/* TODO: Add media GT here */
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index d395d6fc1af66..8d3638826860b 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -196,7 +196,7 @@ void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
 	 * TODO: Loop over each GT in tile once media GT support is
 	 * re-added
 	 */
-	struct xe_gt *gt = &ggtt->tile->primary_gt;
+	struct xe_gt *gt = ggtt->tile->primary_gt;
 
 	/* TODO: vfunc for GuC vs. non-GuC */
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index aa047db4c937e..f00b82e90106d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -43,13 +43,18 @@
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
-int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
+struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
 {
-	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+	struct xe_gt *gt;
 
+	gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
+	if (!gt)
+		return ERR_PTR(-ENOMEM);
+
+	gt->tile = tile;
 	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
 
-	return 0;
+	return gt;
 }
 
 void xe_gt_sanitize(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 01c1d226faeb5..21d9044088def 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -16,7 +16,7 @@
 	     for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \
 			  xe_hw_engine_is_valid((hwe__)))
 
-int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt);
+struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
 int xe_gt_init_early(struct xe_gt *gt);
 int xe_gt_init(struct xe_gt *gt);
 int xe_gt_record_default_lrcs(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 794c5c68589d8..f50484759866d 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -229,7 +229,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 
 		if (xe->info.supports_usm) {
-			batch = tile->primary_gt.usm.bb_pool->bo;
+			batch = tile->primary_gt->usm.bb_pool->bo;
 			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE,
 						&is_vram);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
@@ -313,7 +313,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 {
 	struct xe_device *xe = tile_to_xe(tile);
-	struct xe_gt *primary_gt = &tile->primary_gt;
+	struct xe_gt *primary_gt = tile->primary_gt;
 	struct xe_migrate *m;
 	struct xe_vm *vm;
 	struct ww_acquire_ctx ww;
@@ -546,7 +546,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 			       u64 dst_ofs, bool dst_is_vram, u32 dst_size,
 			       u64 ccs_ofs, bool copy_ccs)
 {
-	struct xe_gt *gt = &m->tile->primary_gt;
+	struct xe_gt *gt = m->tile->primary_gt;
 	u32 flush_flags = 0;
 
 	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
@@ -610,7 +610,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct ttm_resource *src,
 				  struct ttm_resource *dst)
 {
-	struct xe_gt *gt = &m->tile->primary_gt;
+	struct xe_gt *gt = m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct dma_fence *fence = NULL;
 	u64 size = src_bo->size;
@@ -873,7 +873,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct ttm_resource *dst)
 {
 	bool clear_vram = mem_type_is_vram(dst->mem_type);
-	struct xe_gt *gt = &m->tile->primary_gt;
+	struct xe_gt *gt = m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct dma_fence *fence = NULL;
 	u64 size = bo->size;
@@ -1148,7 +1148,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 {
 	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
 	struct xe_tile *tile = m->tile;
-	struct xe_gt *gt = &tile->primary_gt;
+	struct xe_gt *gt = tile->primary_gt;
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_sched_job *job;
 	struct dma_fence *fence;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 7739282d364d7..475b14fe43568 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -209,7 +209,7 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
 int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_offset)
 {
 	struct xe_device *xe = tile_to_xe(tile);
-	struct xe_gt *gt = &tile->primary_gt;
+	struct xe_gt *gt = tile->primary_gt;
 	u64 offset;
 	int err;
 	u32 reg;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 4fbcbfb8a93af..be51c9e97a796 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -18,6 +18,7 @@
 #include "regs/xe_gt_regs.h"
 #include "xe_device.h"
 #include "xe_drv.h"
+#include "xe_gt.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 #include "xe_pci_types.h"
@@ -529,9 +530,12 @@ static int xe_info_init(struct xe_device *xe,
 		tile->xe = xe;
 		tile->id = id;
 
-		gt = &tile->primary_gt;
+		tile->primary_gt = xe_gt_alloc(tile);
+		if (IS_ERR(tile->primary_gt))
+			return PTR_ERR(tile->primary_gt);
+
+		gt = tile->primary_gt;
 		gt->info.id = id;	/* FIXME: Determine sensible numbering */
-		gt->tile = tile;
 		gt->info.type = XE_GT_TYPE_MAIN;
 		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
 		if (MEDIA_VER(xe) < 13 && media_desc)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 41b2be028b8a4..bef265715000c 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1314,7 +1314,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 
 		/* TLB invalidation must be done before signaling rebind */
 		if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
-			int err = invalidation_fence_init(&tile->primary_gt, ifence, fence,
+			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
 							  vma);
 			if (err) {
 				dma_fence_put(fence);
@@ -1634,7 +1634,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 		int err;
 
 		/* TLB invalidation must be done before signaling unbind */
-		err = invalidation_fence_init(&tile->primary_gt, ifence, fence, vma);
+		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
 		if (err) {
 			dma_fence_put(fence);
 			kfree(ifence);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7d4c7a66a35fe..44ad457761415 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1260,7 +1260,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	/* Kernel migration VM shouldn't have a circular loop.. */
 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
 		for_each_tile(tile, xe, id) {
-			struct xe_gt *gt = &tile->primary_gt;
+			struct xe_gt *gt = tile->primary_gt;
 			struct xe_vm *migrate_vm;
 			struct xe_engine *eng;
 
@@ -3410,7 +3410,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 			 * FIXME: We potentially need to invalidate multiple
 			 * GTs within the tile
 			 */
-			seqno[id] = xe_gt_tlb_invalidation_vma(&tile->primary_gt, NULL, vma);
+			seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
 			if (seqno[id] < 0)
 				return seqno[id];
 		}
@@ -3418,7 +3418,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 
 	for_each_tile(tile, xe, id) {
 		if (tile_needs_invalidate & BIT(id)) {
-			ret = xe_gt_tlb_invalidation_wait(&tile->primary_gt, seqno[id]);
+			ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
 			if (ret < 0)
 				return ret;
 		}
-- 
GitLab


From e2682f616b91c0000a02019047605956c85dcca1 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:32 -0700
Subject: [PATCH 1614/2340] drm/xe: Add media GT to tile

This media_gt pointer isn't actually allocated yet.  Future patches will
start hooking it up at appropriate places in the code, and then creation
of the media GT will be added once those infrastructure changes are in
place.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-20-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5cb3fa9e8086a..16a77703d4298 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -76,7 +76,12 @@ struct xe_tile {
 	 */
 	struct xe_gt *primary_gt;
 
-	/* TODO: Add media GT here */
+	/**
+	 * @media_gt: Media GT
+	 *
+	 * Only present on devices with media version >= 13.
+	 */
+	struct xe_gt *media_gt;
 
 	/**
 	 * @mmio: MMIO info for a tile.
-- 
GitLab


From 7e485d9816c134c6b54707143ee84f0adcd6c1d7 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:34 -0700
Subject: [PATCH 1615/2340] drm/xe: Interrupts are delivered per-tile, not
 per-GT

IRQ delivery and handling needs to be handled on a per-tile basis.  Note
that this is true even for the "GT interrupts" relating to engines and
GuCs --- the interrupts relating to both GTs get raised through a single
set of registers in the tile's sgunit range.

On true multi-tile platforms, interrupts on remote tiles are internally
forwarded to the root tile; the first thing the top-level interrupt
handler should do is consult the root tile's instance of
DG1_MSTR_TILE_INTR to determine which tile(s) had interrupts.  This
register is also responsible for enabling/disabling top-level reporting
of any interrupts to the OS.  Although this register technically exists
on all tiles, it should only be used on the root tile.

The (mis)use of struct xe_gt as a target for MMIO operations in the
driver makes the code somewhat confusing since we wind up needing a GT
pointer to handle programming that's unrelated to the GT.  To mitigate
this confusion, all of the xe_gt structures used solely as an MMIO
target in interrupt code are renamed to 'mmio' so that it's clear that
the structure being passed does not necessarily relate to any specific
GT (primary or media) that we might be dealing with interrupts for.
Reworking the driver's MMIO handling to not be dependent on xe_gt is
planned as a future patch series.

Note that GT initialization code currently calls xe_gt_irq_postinstall()
in an attempt to enable the HWE interrupts for the GT being initialized.
Unfortunately xe_gt_irq_postinstall() doesn't really match its name and
does a bunch of other stuff unrelated to the GT interrupts (such as
enabling the top-level device interrupts).  That will be addressed in
future patches.

v2:
 - Clarify commit message with explanation of why DG1_MSTR_TILE_INTR is
   only used on the root tile, even though it's an sgunit register that
   is technically present in each tile's MMIO space.  (Aravind)
 - Also clarify that the xe_gt used as a target for MMIO operations may
   or may not relate to the GT we're dealing with for interrupts.
   (Lucas)

Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-22-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c  |   2 +-
 drivers/gpu/drm/xe/xe_irq.c | 327 ++++++++++++++++++++----------------
 drivers/gpu/drm/xe/xe_irq.h |   4 +-
 3 files changed, 184 insertions(+), 149 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index f00b82e90106d..071d4fbd3efc2 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -310,7 +310,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	gt->info.engine_mask = gt->info.__engine_mask;
 
 	/* Enables per hw engine IRQs */
-	xe_gt_irq_postinstall(gt);
+	xe_gt_irq_postinstall(gt_to_tile(gt));
 
 	/* Rerun MCR init as we now have hw engine list */
 	xe_gt_mcr_init(gt);
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 6d95456643869..4d3ea3b66a7b6 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -26,60 +26,66 @@
 #define IIR(offset)				XE_REG(offset + 0x8)
 #define IER(offset)				XE_REG(offset + 0xc)
 
-static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg)
+static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
 {
-	u32 val = xe_mmio_read32(gt, reg);
+	u32 val = xe_mmio_read32(mmio, reg);
 
 	if (val == 0)
 		return;
 
-	drm_WARN(&gt_to_xe(gt)->drm, 1,
+	drm_WARN(&gt_to_xe(mmio)->drm, 1,
 		 "Interrupt register 0x%x is not zero: 0x%08x\n",
 		 reg.addr, val);
-	xe_mmio_write32(gt, reg, 0xffffffff);
-	xe_mmio_read32(gt, reg);
-	xe_mmio_write32(gt, reg, 0xffffffff);
-	xe_mmio_read32(gt, reg);
+	xe_mmio_write32(mmio, reg, 0xffffffff);
+	xe_mmio_read32(mmio, reg);
+	xe_mmio_write32(mmio, reg, 0xffffffff);
+	xe_mmio_read32(mmio, reg);
 }
 
 /*
  * Unmask and enable the specified interrupts.  Does not check current state,
  * so any bits not specified here will become masked and disabled.
  */
-static void unmask_and_enable(struct xe_gt *gt, u32 irqregs, u32 bits)
+static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
 {
+	struct xe_gt *mmio = tile->primary_gt;
+
 	/*
 	 * If we're just enabling an interrupt now, it shouldn't already
 	 * be raised in the IIR.
 	 */
-	assert_iir_is_zero(gt, IIR(irqregs));
+	assert_iir_is_zero(mmio, IIR(irqregs));
 
-	xe_mmio_write32(gt, IER(irqregs), bits);
-	xe_mmio_write32(gt, IMR(irqregs), ~bits);
+	xe_mmio_write32(mmio, IER(irqregs), bits);
+	xe_mmio_write32(mmio, IMR(irqregs), ~bits);
 
 	/* Posting read */
-	xe_mmio_read32(gt, IMR(irqregs));
+	xe_mmio_read32(mmio, IMR(irqregs));
 }
 
 /* Mask and disable all interrupts. */
-static void mask_and_disable(struct xe_gt *gt, u32 irqregs)
+static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
 {
-	xe_mmio_write32(gt, IMR(irqregs), ~0);
+	struct xe_gt *mmio = tile->primary_gt;
+
+	xe_mmio_write32(mmio, IMR(irqregs), ~0);
 	/* Posting read */
-	xe_mmio_read32(gt, IMR(irqregs));
+	xe_mmio_read32(mmio, IMR(irqregs));
 
-	xe_mmio_write32(gt, IER(irqregs), 0);
+	xe_mmio_write32(mmio, IER(irqregs), 0);
 
 	/* IIR can theoretically queue up two events. Be paranoid. */
-	xe_mmio_write32(gt, IIR(irqregs), ~0);
-	xe_mmio_read32(gt, IIR(irqregs));
-	xe_mmio_write32(gt, IIR(irqregs), ~0);
-	xe_mmio_read32(gt, IIR(irqregs));
+	xe_mmio_write32(mmio, IIR(irqregs), ~0);
+	xe_mmio_read32(mmio, IIR(irqregs));
+	xe_mmio_write32(mmio, IIR(irqregs), ~0);
+	xe_mmio_read32(mmio, IIR(irqregs));
 }
 
-static u32 xelp_intr_disable(struct xe_gt *gt)
+static u32 xelp_intr_disable(struct xe_device *xe)
 {
-	xe_mmio_write32(gt, GFX_MSTR_IRQ, 0);
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0);
 
 	/*
 	 * Now with master disabled, get a sample of level indications
@@ -87,36 +93,41 @@ static u32 xelp_intr_disable(struct xe_gt *gt)
 	 * New indications can and will light up during processing,
 	 * and will generate new interrupt after enabling master.
 	 */
-	return xe_mmio_read32(gt, GFX_MSTR_IRQ);
+	return xe_mmio_read32(mmio, GFX_MSTR_IRQ);
 }
 
 static u32
-gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
+gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
 {
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
 	u32 iir;
 
 	if (!(master_ctl & GU_MISC_IRQ))
 		return 0;
 
-	iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET));
+	iir = xe_mmio_read32(mmio, IIR(GU_MISC_IRQ_OFFSET));
 	if (likely(iir))
-		xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET), iir);
+		xe_mmio_write32(mmio, IIR(GU_MISC_IRQ_OFFSET), iir);
 
 	return iir;
 }
 
-static inline void xelp_intr_enable(struct xe_gt *gt, bool stall)
+static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
 {
-	xe_mmio_write32(gt, GFX_MSTR_IRQ, MASTER_IRQ);
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ);
 	if (stall)
-		xe_mmio_read32(gt, GFX_MSTR_IRQ);
+		xe_mmio_read32(mmio, GFX_MSTR_IRQ);
 }
 
-static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+static void gt_irq_postinstall(struct xe_tile *tile)
 {
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *mmio = tile->primary_gt;
 	u32 irqs, dmask, smask;
-	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
-	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+	u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COPY);
 
 	if (xe_device_guc_submission_enabled(xe)) {
 		irqs = GT_RENDER_USER_INTERRUPT |
@@ -132,57 +143,57 @@ static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 	smask = irqs << 16;
 
 	/* Enable RCS, BCS, VCS and VECS class interrupts. */
-	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
-	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
+	xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask);
+	xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask);
 	if (ccs_mask)
-		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+		xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask);
 
 	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
-	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+	xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask);
+	xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask);
 	if (bcs_mask & (BIT(1)|BIT(2)))
-		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+		xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
 	if (bcs_mask & (BIT(3)|BIT(4)))
-		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+		xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
 	if (bcs_mask & (BIT(5)|BIT(6)))
-		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+		xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
 	if (bcs_mask & (BIT(7)|BIT(8)))
-		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
-	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
-	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
-	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+		xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+	xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask);
+	xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask);
+	xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask);
 	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+		xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~dmask);
+		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~dmask);
 
 	/*
 	 * RPS interrupts will get enabled/disabled on demand when RPS itself
 	 * is enabled/disabled.
 	 */
 	/* TODO: gt->pm_ier, gt->pm_imr */
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0);
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK,  ~0);
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK,  ~0);
 
 	/* Same thing for GuC interrupts */
-	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, 0);
-	xe_mmio_write32(gt, GUC_SG_INTR_MASK,  ~0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_MASK,  ~0);
 }
 
-static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+static void xelp_irq_postinstall(struct xe_device *xe, struct xe_tile *tile)
 {
 	/* TODO: PCH */
 
-	gt_irq_postinstall(xe, gt);
+	gt_irq_postinstall(tile);
 
-	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
+	unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
-	xelp_intr_enable(gt, true);
+	xelp_intr_enable(xe, true);
 }
 
 static u32
 gt_engine_identity(struct xe_device *xe,
-		   struct xe_gt *gt,
+		   struct xe_gt *mmio,
 		   const unsigned int bank,
 		   const unsigned int bit)
 {
@@ -191,7 +202,7 @@ gt_engine_identity(struct xe_device *xe,
 
 	lockdep_assert_held(&xe->irq.lock);
 
-	xe_mmio_write32(gt, IIR_REG_SELECTOR(bank), BIT(bit));
+	xe_mmio_write32(mmio, IIR_REG_SELECTOR(bank), BIT(bit));
 
 	/*
 	 * NB: Specs do not specify how long to spin wait,
@@ -199,7 +210,7 @@ gt_engine_identity(struct xe_device *xe,
 	 */
 	timeout_ts = (local_clock() >> 10) + 100;
 	do {
-		ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank));
+		ident = xe_mmio_read32(mmio, INTR_IDENTITY_REG(bank));
 	} while (!(ident & INTR_DATA_VALID) &&
 		 !time_after32(local_clock() >> 10, timeout_ts));
 
@@ -209,7 +220,7 @@ gt_engine_identity(struct xe_device *xe,
 		return 0;
 	}
 
-	xe_mmio_write32(gt, INTR_IDENTITY_REG(bank), INTR_DATA_VALID);
+	xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), INTR_DATA_VALID);
 
 	return ident;
 }
@@ -231,10 +242,32 @@ gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
 	}
 }
 
-static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
+static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
+				    enum xe_engine_class class,
+				    unsigned int instance)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+
+	if (MEDIA_VER(xe) < 13)
+		return tile->primary_gt;
+
+	if (class == XE_ENGINE_CLASS_VIDEO_DECODE ||
+	    class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+		return tile->media_gt;
+
+	if (class == XE_ENGINE_CLASS_OTHER &&
+	    instance == OTHER_MEDIA_GUC_INSTANCE)
+		return tile->media_gt;
+
+	return tile->primary_gt;
+}
+
+static void gt_irq_handler(struct xe_tile *tile,
 			   u32 master_ctl, long unsigned int *intr_dw,
 			   u32 *identity)
 {
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *mmio = tile->primary_gt;
 	unsigned int bank, bit;
 	u16 instance, intr_vec;
 	enum xe_engine_class class;
@@ -246,27 +279,26 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 		if (!(master_ctl & GT_DW_IRQ(bank)))
 			continue;
 
-		if (!xe_gt_is_media_type(gt)) {
-			intr_dw[bank] =
-				xe_mmio_read32(gt, GT_INTR_DW(bank));
-			for_each_set_bit(bit, intr_dw + bank, 32)
-				identity[bit] = gt_engine_identity(xe, gt,
-								   bank, bit);
-			xe_mmio_write32(gt, GT_INTR_DW(bank),
-					intr_dw[bank]);
-		}
+		intr_dw[bank] = xe_mmio_read32(mmio, GT_INTR_DW(bank));
+		for_each_set_bit(bit, intr_dw + bank, 32)
+			identity[bit] = gt_engine_identity(xe, mmio, bank, bit);
+		xe_mmio_write32(mmio, GT_INTR_DW(bank), intr_dw[bank]);
 
 		for_each_set_bit(bit, intr_dw + bank, 32) {
+			struct xe_gt *engine_gt;
+
 			class = INTR_ENGINE_CLASS(identity[bit]);
 			instance = INTR_ENGINE_INSTANCE(identity[bit]);
 			intr_vec = INTR_ENGINE_INTR(identity[bit]);
 
+			engine_gt = pick_engine_gt(tile, class, instance);
+
 			if (class == XE_ENGINE_CLASS_OTHER) {
-				gt_other_irq_handler(gt, instance, intr_vec);
+				gt_other_irq_handler(engine_gt, instance, intr_vec);
 				continue;
 			}
 
-			hwe = xe_gt_hw_engine(gt, class, instance, false);
+			hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
 			if (!hwe)
 				continue;
 
@@ -284,60 +316,60 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
 static irqreturn_t xelp_irq_handler(int irq, void *arg)
 {
 	struct xe_device *xe = arg;
-	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
 	u32 master_ctl, gu_misc_iir;
 	long unsigned int intr_dw[2];
 	u32 identity[32];
 
-	master_ctl = xelp_intr_disable(gt);
+	master_ctl = xelp_intr_disable(xe);
 	if (!master_ctl) {
-		xelp_intr_enable(gt, false);
+		xelp_intr_enable(xe, false);
 		return IRQ_NONE;
 	}
 
-	gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+	gt_irq_handler(tile, master_ctl, intr_dw, identity);
 
-	gu_misc_iir = gu_misc_irq_ack(gt, master_ctl);
+	gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
 
-	xelp_intr_enable(gt, false);
+	xelp_intr_enable(xe, false);
 
 	return IRQ_HANDLED;
 }
 
 static u32 dg1_intr_disable(struct xe_device *xe)
 {
-	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
 	u32 val;
 
 	/* First disable interrupts */
-	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, 0);
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, 0);
 
 	/* Get the indication levels and ack the master unit */
-	val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR);
+	val = xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
 	if (unlikely(!val))
 		return 0;
 
-	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, val);
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, val);
 
 	return val;
 }
 
 static void dg1_intr_enable(struct xe_device *xe, bool stall)
 {
-	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
 
-	xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
 	if (stall)
-		xe_mmio_read32(gt, DG1_MSTR_TILE_INTR);
+		xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
 }
 
-static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+static void dg1_irq_postinstall(struct xe_device *xe, struct xe_tile *tile)
 {
-	gt_irq_postinstall(xe, gt);
+	gt_irq_postinstall(tile);
 
-	unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
+	unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 
-	if (gt->info.id == XE_GT0)
+	if (tile->id == 0)
 		dg1_intr_enable(xe, true);
 }
 
@@ -349,8 +381,8 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
 static irqreturn_t dg1_irq_handler(int irq, void *arg)
 {
 	struct xe_device *xe = arg;
-	struct xe_gt *gt;
-	u32 master_tile_ctl, master_ctl = 0, tile0_master_ctl = 0, gu_misc_iir;
+	struct xe_tile *tile;
+	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0;
 	long unsigned int intr_dw[2];
 	u32 identity[32];
 	u8 id;
@@ -363,12 +395,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 	}
 
-	for_each_gt(gt, xe, id) {
-		if ((master_tile_ctl & DG1_MSTR_TILE(gt_to_tile(gt)->id)) == 0)
+	for_each_tile(tile, xe, id) {
+		struct xe_gt *mmio = tile->primary_gt;
+
+		if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0)
 			continue;
 
-		if (!xe_gt_is_media_type(gt))
-			master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ);
+		master_ctl = xe_mmio_read32(mmio, GFX_MSTR_IRQ);
 
 		/*
 		 * We might be in irq handler just when PCIe DPC is initiated
@@ -376,118 +409,120 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		 * irq as device is inaccessible.
 		 */
 		if (master_ctl == REG_GENMASK(31, 0)) {
-			dev_dbg(gt_to_xe(gt)->drm.dev,
+			dev_dbg(tile_to_xe(tile)->drm.dev,
 				"Ignore this IRQ as device might be in DPC containment.\n");
 			return IRQ_HANDLED;
 		}
 
-		if (!xe_gt_is_media_type(gt))
-			xe_mmio_write32(gt, GFX_MSTR_IRQ, master_ctl);
-		gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+		xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl);
+
+		gt_irq_handler(tile, master_ctl, intr_dw, identity);
 
 		/*
-		 * Save primary tile's master interrupt register for display
-		 * processing below.
+		 * Display interrupts (including display backlight operations
+		 * that get reported as Gunit GSE) would only be hooked up to
+		 * the primary tile.
 		 */
 		if (id == 0)
-			tile0_master_ctl = master_ctl;
+			gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
 	}
 
-	/* Gunit GSE interrupts can trigger display backlight operations */
-	gu_misc_iir = gu_misc_irq_ack(gt, tile0_master_ctl);
-
 	dg1_intr_enable(xe, false);
 
 	return IRQ_HANDLED;
 }
 
-static void gt_irq_reset(struct xe_gt *gt)
+static void gt_irq_reset(struct xe_tile *tile)
 {
-	u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
-	u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+	struct xe_gt *mmio = tile->primary_gt;
+
+	u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+						   XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+						   XE_ENGINE_CLASS_COPY);
 
 	/* Disable RCS, BCS, VCS and VECS class engines. */
-	xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE,	 0);
-	xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE,	 0);
+	xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, 0);
 	if (ccs_mask)
-		xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, 0);
+		xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, 0);
 
 	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK,	~0);
-	xe_mmio_write32(gt, BCS_RSVD_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK,	~0);
 	if (bcs_mask & (BIT(1)|BIT(2)))
-		xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~0);
+		xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~0);
 	if (bcs_mask & (BIT(3)|BIT(4)))
-		xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~0);
+		xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~0);
 	if (bcs_mask & (BIT(5)|BIT(6)))
-		xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~0);
+		xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~0);
 	if (bcs_mask & (BIT(7)|BIT(8)))
-		xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~0);
-	xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK,	~0);
-	xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK,	~0);
-	xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK,	~0);
+		xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~0);
+	xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK,	~0);
 	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~0);
+		xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
 	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~0);
+		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~0);
 
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0);
-	xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK,  ~0);
-	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,	 0);
-	xe_mmio_write32(gt, GUC_SG_INTR_MASK,		~0);
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK,  ~0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE,	 0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_MASK,		~0);
 }
 
-static void xelp_irq_reset(struct xe_gt *gt)
+static void xelp_irq_reset(struct xe_tile *tile)
 {
-	xelp_intr_disable(gt);
+	xelp_intr_disable(tile_to_xe(tile));
 
-	gt_irq_reset(gt);
+	gt_irq_reset(tile);
 
-	mask_and_disable(gt, GU_MISC_IRQ_OFFSET);
-	mask_and_disable(gt, PCU_IRQ_OFFSET);
+	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
+	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
-static void dg1_irq_reset(struct xe_gt *gt)
+static void dg1_irq_reset(struct xe_tile *tile)
 {
-	if (gt->info.id == 0)
-		dg1_intr_disable(gt_to_xe(gt));
+	if (tile->id == 0)
+		dg1_intr_disable(tile_to_xe(tile));
 
-	gt_irq_reset(gt);
+	gt_irq_reset(tile);
 
-	mask_and_disable(gt, GU_MISC_IRQ_OFFSET);
-	mask_and_disable(gt, PCU_IRQ_OFFSET);
+	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
+	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
 static void xe_irq_reset(struct xe_device *xe)
 {
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	u8 id;
 
-	for_each_gt(gt, xe, id) {
+	for_each_tile(tile, xe, id) {
 		if (GRAPHICS_VERx100(xe) >= 1210)
-			dg1_irq_reset(gt);
+			dg1_irq_reset(tile);
 		else
-			xelp_irq_reset(gt);
+			xelp_irq_reset(tile);
 	}
 }
 
-void xe_gt_irq_postinstall(struct xe_gt *gt)
+void xe_gt_irq_postinstall(struct xe_tile *tile)
 {
-	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_device *xe = tile_to_xe(tile);
 
 	if (GRAPHICS_VERx100(xe) >= 1210)
-		dg1_irq_postinstall(xe, gt);
+		dg1_irq_postinstall(xe, tile);
 	else
-		xelp_irq_postinstall(xe, gt);
+		xelp_irq_postinstall(xe, tile);
 }
 
 static void xe_irq_postinstall(struct xe_device *xe)
 {
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	u8 id;
 
-	for_each_gt(gt, xe, id)
-		xe_gt_irq_postinstall(gt);
+	for_each_tile(tile, xe, id)
+		xe_gt_irq_postinstall(tile);
 }
 
 static irq_handler_t xe_irq_handler(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
index 34ecf22b32d36..69113c21e1cda 100644
--- a/drivers/gpu/drm/xe/xe_irq.h
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -7,10 +7,10 @@
 #define _XE_IRQ_H_
 
 struct xe_device;
-struct xe_gt;
+struct xe_tile;
 
 int xe_irq_install(struct xe_device *xe);
-void xe_gt_irq_postinstall(struct xe_gt *gt);
+void xe_gt_irq_postinstall(struct xe_tile *tile);
 void xe_irq_shutdown(struct xe_device *xe);
 void xe_irq_suspend(struct xe_device *xe);
 void xe_irq_resume(struct xe_device *xe);
-- 
GitLab


From 8e758225e52ec1acb5a0645b3750ea85cad82bbc Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:35 -0700
Subject: [PATCH 1616/2340] drm/xe/irq: Move ASLE backlight interrupt logic

Our only use of GUnit interrupts is to handle ASLE backlight operations
that are reported as GUnit GSE interrupts.  Move the enable/disable of
these interrupts to a more sensible place, in the same area where we
expect display interrupt code to be added by future patches.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-23-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 4d3ea3b66a7b6..601a54c60aef7 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -186,8 +186,6 @@ static void xelp_irq_postinstall(struct xe_device *xe, struct xe_tile *tile)
 
 	gt_irq_postinstall(tile);
 
-	unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
-
 	xelp_intr_enable(xe, true);
 }
 
@@ -367,8 +365,6 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_tile *tile)
 {
 	gt_irq_postinstall(tile);
 
-	unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
-
 	if (tile->id == 0)
 		dg1_intr_enable(xe, true);
 }
@@ -478,7 +474,6 @@ static void xelp_irq_reset(struct xe_tile *tile)
 
 	gt_irq_reset(tile);
 
-	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
@@ -489,7 +484,6 @@ static void dg1_irq_reset(struct xe_tile *tile)
 
 	gt_irq_reset(tile);
 
-	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
@@ -504,6 +498,9 @@ static void xe_irq_reset(struct xe_device *xe)
 		else
 			xelp_irq_reset(tile);
 	}
+
+	tile = xe_device_get_root_tile(xe);
+	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
 }
 
 void xe_gt_irq_postinstall(struct xe_tile *tile)
@@ -523,6 +520,13 @@ static void xe_irq_postinstall(struct xe_device *xe)
 
 	for_each_tile(tile, xe, id)
 		xe_gt_irq_postinstall(tile);
+
+	/*
+	 * ASLE backlight operations are reported via GUnit GSE interrupts
+	 * on the root tile.
+	 */
+	unmask_and_enable(xe_device_get_root_tile(xe),
+			  GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
 }
 
 static irq_handler_t xe_irq_handler(struct xe_device *xe)
-- 
GitLab


From 80d6e5874af2bb4a2fdc59029be64aa1d89a196b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:36 -0700
Subject: [PATCH 1617/2340] drm/xe/irq: Ensure primary GuC won't clobber media
 GuC's interrupt mask

Although primary and media GuC share a single interrupt enable bit, they
each have distinct bits in the mask register.  Although we always enable
interrupts for the primary GuC before the media GuC today (and never
disable either of them), this might not always be the case in the
future, so use a RMW when updating the mask register to ensure the other
GuC's mask doesn't get clobbered.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-24-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index ecc843d91f624..04a57af85d9e5 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -532,12 +532,15 @@ static void guc_enable_irq(struct xe_guc *guc)
 		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
 		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
 
+	/* Primary GuC and media GuC share a single enable bit */
 	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
 			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
-	if (xe_gt_is_media_type(gt))
-		xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
-	else
-		xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~events);
+
+	/*
+	 * There are separate mask bits for primary and media GuCs, so use
+	 * a RMW operation to avoid clobbering the other GuC's setting.
+	 */
+	xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
 }
 
 int xe_guc_enable_communication(struct xe_guc *guc)
-- 
GitLab


From 22a22236017631d98c8780cf03734e4383ae69d9 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:37 -0700
Subject: [PATCH 1618/2340] drm/xe/irq: Untangle postinstall functions

The xe_irq_postinstall() never actually gets called after installing the
interrupt handler.  This oversight seems to get papered over due to the
fact that the (misnamed) xe_gt_irq_postinstall does more than it really
should and gets called in the middle of the GT initialization.  The
callstack for postinstall is also a bit muddled with top-level device
interrupt enablement happening within platform-specific functions called
from the per-tile xe_gt_irq_postinstall() function.

Clean this all up by adding the missing call to xe_irq_postinstall()
after installing the interrupt handler and pull top-level irq enablement
up to xe_irq_postinstall where we'd expect it to be.

The xe_gt_irq_postinstall() function is still a bit misnamed here; an
upcoming patch will refocus its purpose and rename it.

v2:
 - Squash in patch to actually call xe_irq_postinstall() after
   installing the interrupt handler.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-25-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 37 +++++++++----------------------------
 1 file changed, 9 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 601a54c60aef7..09447d521a9f0 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -121,7 +121,7 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
 		xe_mmio_read32(mmio, GFX_MSTR_IRQ);
 }
 
-static void gt_irq_postinstall(struct xe_tile *tile)
+void xe_gt_irq_postinstall(struct xe_tile *tile)
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_gt *mmio = tile->primary_gt;
@@ -180,15 +180,6 @@ static void gt_irq_postinstall(struct xe_tile *tile)
 	xe_mmio_write32(mmio, GUC_SG_INTR_MASK,  ~0);
 }
 
-static void xelp_irq_postinstall(struct xe_device *xe, struct xe_tile *tile)
-{
-	/* TODO: PCH */
-
-	gt_irq_postinstall(tile);
-
-	xelp_intr_enable(xe, true);
-}
-
 static u32
 gt_engine_identity(struct xe_device *xe,
 		   struct xe_gt *mmio,
@@ -361,14 +352,6 @@ static void dg1_intr_enable(struct xe_device *xe, bool stall)
 		xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
 }
 
-static void dg1_irq_postinstall(struct xe_device *xe, struct xe_tile *tile)
-{
-	gt_irq_postinstall(tile);
-
-	if (tile->id == 0)
-		dg1_intr_enable(xe, true);
-}
-
 /*
  * Top-level interrupt handler for Xe_LP+ and beyond.  These platforms have
  * a "master tile" interrupt register which must be consulted before the
@@ -503,16 +486,6 @@ static void xe_irq_reset(struct xe_device *xe)
 	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
 }
 
-void xe_gt_irq_postinstall(struct xe_tile *tile)
-{
-	struct xe_device *xe = tile_to_xe(tile);
-
-	if (GRAPHICS_VERx100(xe) >= 1210)
-		dg1_irq_postinstall(xe, tile);
-	else
-		xelp_irq_postinstall(xe, tile);
-}
-
 static void xe_irq_postinstall(struct xe_device *xe)
 {
 	struct xe_tile *tile;
@@ -527,6 +500,12 @@ static void xe_irq_postinstall(struct xe_device *xe)
 	 */
 	unmask_and_enable(xe_device_get_root_tile(xe),
 			  GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
+
+	/* Enable top-level interrupts */
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		dg1_intr_enable(xe, true);
+	else
+		xelp_intr_enable(xe, true);
 }
 
 static irq_handler_t xe_irq_handler(struct xe_device *xe)
@@ -577,6 +556,8 @@ int xe_irq_install(struct xe_device *xe)
 		return err;
 	}
 
+	xe_irq_postinstall(xe);
+
 	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
 	if (err)
 		return err;
-- 
GitLab


From 3e29c149b3d813c25925636135c08bf5d51372b2 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:38 -0700
Subject: [PATCH 1619/2340] drm/xe: Replace xe_gt_irq_postinstall with
 xe_irq_enable_hwe

The majority of xe_gt_irq_postinstall() is really focused on the
hardware engine interrupts; other GT-related interrupts such as the GuC
are enabled/disabled independently.  Renaming the function and making it
truly GT-specific will make it more clear what the intended focus is.

Disabling/masking of other interrupts (such as GuC interrupts) is
unnecessary since that has already happened during the irq_reset stage,
and doing so will become harmful once the media GT is re-enabled since
calls to xe_gt_irq_postinstall during media GT initialization would
incorrectly disable the primary GT's GuC interrupts.

Also, since this function is called from gt_fw_domain_init(), it's not
necessary to also call it earlier during xe_irq_postinstall; just
xe_irq_resume to handle runtime resume should be sufficient.

v2:
 - Drop unnecessary !gt check.  (Lucas)
 - Reword some comments about enable/unmask for clarity.  (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-26-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c        |  4 +-
 drivers/gpu/drm/xe/xe_hw_engine.c |  1 +
 drivers/gpu/drm/xe/xe_irq.c       | 89 +++++++++++++++----------------
 drivers/gpu/drm/xe/xe_irq.h       |  3 +-
 4 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 071d4fbd3efc2..335148f1cd39c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -309,8 +309,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	/* XXX: Fake that we pull the engine mask from hwconfig blob */
 	gt->info.engine_mask = gt->info.__engine_mask;
 
-	/* Enables per hw engine IRQs */
-	xe_gt_irq_postinstall(gt_to_tile(gt));
+	/* Enable per hw engine IRQs */
+	xe_irq_enable_hwe(gt);
 
 	/* Rerun MCR init as we now have hw engine list */
 	xe_gt_mcr_init(gt);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b12f65a2bab3e..b42a0cb50159a 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -17,6 +17,7 @@
 #include "xe_gt.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_fence.h"
+#include "xe_irq.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 09447d521a9f0..d92f03870e597 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -121,13 +121,12 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
 		xe_mmio_read32(mmio, GFX_MSTR_IRQ);
 }
 
-void xe_gt_irq_postinstall(struct xe_tile *tile)
+/* Enable/unmask the HWE interrupts for a specific GT's engines. */
+void xe_irq_enable_hwe(struct xe_gt *gt)
 {
-	struct xe_device *xe = tile_to_xe(tile);
-	struct xe_gt *mmio = tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 ccs_mask, bcs_mask;
 	u32 irqs, dmask, smask;
-	u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COMPUTE);
-	u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COPY);
 
 	if (xe_device_guc_submission_enabled(xe)) {
 		irqs = GT_RENDER_USER_INTERRUPT |
@@ -139,45 +138,44 @@ void xe_gt_irq_postinstall(struct xe_tile *tile)
 		       GT_WAIT_SEMAPHORE_INTERRUPT;
 	}
 
+	ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
 	dmask = irqs << 16 | irqs;
 	smask = irqs << 16;
 
-	/* Enable RCS, BCS, VCS and VECS class interrupts. */
-	xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask);
-	xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask);
-	if (ccs_mask)
-		xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask);
-
-	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
-	xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask);
-	xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask);
-	if (bcs_mask & (BIT(1)|BIT(2)))
-		xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
-	if (bcs_mask & (BIT(3)|BIT(4)))
-		xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
-	if (bcs_mask & (BIT(5)|BIT(6)))
-		xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
-	if (bcs_mask & (BIT(7)|BIT(8)))
-		xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
-	xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask);
-	xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask);
-	xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask);
-	if (ccs_mask & (BIT(0)|BIT(1)))
-		xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask);
-	if (ccs_mask & (BIT(2)|BIT(3)))
-		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~dmask);
+	if (!xe_gt_is_media_type(gt)) {
+		/* Enable interrupts for each engine class */
+		xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+		if (ccs_mask)
+			xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+
+		/* Unmask interrupts for each engine instance */
+		xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
+		xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+		if (bcs_mask & (BIT(1)|BIT(2)))
+			xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(3)|BIT(4)))
+			xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(5)|BIT(6)))
+			xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(7)|BIT(8)))
+			xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+		if (ccs_mask & (BIT(0)|BIT(1)))
+			xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+		if (ccs_mask & (BIT(2)|BIT(3)))
+			xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~dmask);
+	}
 
-	/*
-	 * RPS interrupts will get enabled/disabled on demand when RPS itself
-	 * is enabled/disabled.
-	 */
-	/* TODO: gt->pm_ier, gt->pm_imr */
-	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
-	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK,  ~0);
+	if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
+		/* Enable interrupts for each engine class */
+		xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
 
-	/* Same thing for GuC interrupts */
-	xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0);
-	xe_mmio_write32(mmio, GUC_SG_INTR_MASK,  ~0);
+		/* Unmask interrupts for each engine instance */
+		xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
+		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
+		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+	}
 }
 
 static u32
@@ -488,12 +486,6 @@ static void xe_irq_reset(struct xe_device *xe)
 
 static void xe_irq_postinstall(struct xe_device *xe)
 {
-	struct xe_tile *tile;
-	u8 id;
-
-	for_each_tile(tile, xe, id)
-		xe_gt_irq_postinstall(tile);
-
 	/*
 	 * ASLE backlight operations are reported via GUnit GSE interrupts
 	 * on the root tile.
@@ -580,9 +572,16 @@ void xe_irq_suspend(struct xe_device *xe)
 
 void xe_irq_resume(struct xe_device *xe)
 {
+	struct xe_gt *gt;
+	int id;
+
 	spin_lock_irq(&xe->irq.lock);
 	xe->irq.enabled = true;
 	xe_irq_reset(xe);
 	xe_irq_postinstall(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_irq_enable_hwe(gt);
+
 	spin_unlock_irq(&xe->irq.lock);
 }
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
index 69113c21e1cda..bc42bc90d9676 100644
--- a/drivers/gpu/drm/xe/xe_irq.h
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -8,11 +8,12 @@
 
 struct xe_device;
 struct xe_tile;
+struct xe_gt;
 
 int xe_irq_install(struct xe_device *xe);
-void xe_gt_irq_postinstall(struct xe_tile *tile);
 void xe_irq_shutdown(struct xe_device *xe);
 void xe_irq_suspend(struct xe_device *xe);
 void xe_irq_resume(struct xe_device *xe);
+void xe_irq_enable_hwe(struct xe_gt *gt);
 
 #endif
-- 
GitLab


From d78a4778195079e0b2820550efeecb7b25fa764a Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:39 -0700
Subject: [PATCH 1620/2340] drm/xe: Invalidate TLB on all affected GTs during
 GGTT updates

The GGTT is part of the tile and is shared by the primary and media GTs
on platforms with a standalone media architecture.  However each of
these GTs has its own TLBs caching the page table lookups, and each
needs to be invalidated separately.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-27-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 8d3638826860b..d672494961131 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -190,13 +190,10 @@ err:
 #define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
 #define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
 
-void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 {
-	/*
-	 * TODO: Loop over each GT in tile once media GT support is
-	 * re-added
-	 */
-	struct xe_gt *gt = ggtt->tile->primary_gt;
+	if (!gt)
+		return;
 
 	/* TODO: vfunc for GuC vs. non-GuC */
 
@@ -221,6 +218,13 @@ void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
 	}
 }
 
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+{
+	/* Each GT in a tile has its own TLB to cache GGTT lookups */
+	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
+	ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
+}
+
 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 {
 	u64 addr, scratch_pte;
-- 
GitLab


From 933b78d678213f5c045c52cbc42bbee6653af250 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:40 -0700
Subject: [PATCH 1621/2340] drm/xe/tlb: Obtain forcewake when doing GGTT TLB
 invalidations

Updates to the GGTT can happen when there are no in-flight jobs keeping
the hardware awake.  If the GT is powered down when invalidation is
requested, we will not be able to communicate with the GuC (or MMIO) and
the invalidation request will go missing.  Explicitly grab GT forcewake
to ensure the GT and GuC are powered up during the TLB invalidation.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-28-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index d672494961131..14b6d68a63248 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -195,6 +195,13 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 	if (!gt)
 		return;
 
+	/*
+	 * Invalidation can happen when there's no in-flight work keeping the
+	 * GT awake.  We need to explicitly grab forcewake to ensure the GT
+	 * and GuC are accessible.
+	 */
+	xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+
 	/* TODO: vfunc for GuC vs. non-GuC */
 
 	if (gt->uc.guc.submission_state.enabled) {
@@ -216,6 +223,8 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 			xe_mmio_write32(gt, GUC_TLB_INV_CR,
 					GUC_TLB_INV_CR_INVALIDATE);
 	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 }
 
 void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
-- 
GitLab


From 37efea9ca2583990fbd706af0364ce9feb16bb1a Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:41 -0700
Subject: [PATCH 1622/2340] drm/xe: Allow GT looping and lookup on standalone
 media

Allow xe_device_get_gt() and for_each_gt() to operate as expected on
platforms with standalone media.

FIXME: We need to figure out a consistent ID scheme for GTs.  This patch
keeps the pre-existing behavior of 0/1 being the GT IDs for both PVC
(multi-tile) and MTL (multi-GT), but depending on the direction we
decide to go with uapi, we may change this in the future (e.g., to
return 0/1 on PVC and 0/2 on MTL).  Or if we decide we only need to
expose tiles to userspace and not GTs, we may not even need ID numbers
for the GTs anymore.

v2:
 - Restructure a bit to make the assertions more clear.
 - Clarify in commit message that the goal here is to preserve existing
   behavior; UAPI-visible changes may be introduced in the future once
   we settle on what we really want.
v3:
 - Store total GT count in xe_device for ease of lookup.  (Brian)
 - s/(id__++)/(id__)++/  (Gustavo)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-29-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h       | 42 +++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_device_types.h |  2 ++
 drivers/gpu/drm/xe/xe_pci.c          |  6 +++-
 3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index f2d8479f6ff62..779f71d066e6e 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -53,18 +53,42 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
 	return &xe->tiles[0];
 }
 
+#define XE_MAX_GT_PER_TILE 2
+
+static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
+{
+	if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE))
+		gt_id = 0;
+
+	return gt_id ? tile->media_gt : tile->primary_gt;
+}
+
 static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
 {
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
 	struct xe_gt *gt;
 
-	XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE);
-
-	gt = xe->tiles[gt_id].primary_gt;
-	if (drm_WARN_ON(&xe->drm, !gt))
+	/*
+	 * FIXME: This only works for now because multi-tile and standalone
+	 * media are mutually exclusive on the platforms we have today.
+	 *
+	 * id => GT mapping may change once we settle on how we want to handle
+	 * our UAPI.
+	 */
+	if (MEDIA_VER(xe) >= 13) {
+		gt = xe_tile_get_gt(root_tile, gt_id);
+	} else {
+		if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE))
+			gt_id = 0;
+
+		gt = xe->tiles[gt_id].primary_gt;
+	}
+
+	if (!gt)
 		return NULL;
 
-	XE_BUG_ON(gt->info.id != gt_id);
-	XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+	drm_WARN_ON(&xe->drm, gt->info.id != gt_id);
+	drm_WARN_ON(&xe->drm, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
 
 	return gt;
 }
@@ -100,8 +124,12 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe)
 	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \
 		for_each_if ((tile__) = &(xe__)->tiles[(id__)])
 
+/*
+ * FIXME: This only works for now since multi-tile and standalone media
+ * happen to be mutually exclusive.  Future platforms may change this...
+ */
 #define for_each_gt(gt__, xe__, id__) \
-	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \
+	for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \
 		for_each_if ((gt__) = xe_device_get_gt((xe__), (id__)))
 
 static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 16a77703d4298..3b50134cdcc0d 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -184,6 +184,8 @@ struct xe_device {
 		u8 vram_flags;
 		/** @tile_count: Number of tiles */
 		u8 tile_count;
+		/** @gt_count: Total number of GTs for entire device */
+		u8 gt_count;
 		/** @vm_max_level: Max VM level */
 		u8 vm_max_level;
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index be51c9e97a796..abb2f13260079 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -535,7 +535,11 @@ static int xe_info_init(struct xe_device *xe,
 			return PTR_ERR(tile->primary_gt);
 
 		gt = tile->primary_gt;
-		gt->info.id = id;	/* FIXME: Determine sensible numbering */
+		/*
+		 * FIXME: GT numbering scheme may change depending on UAPI
+		 * decisions.
+		 */
+		gt->info.id = xe->info.gt_count++;
 		gt->info.type = XE_GT_TYPE_MAIN;
 		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
 		if (MEDIA_VER(xe) < 13 && media_desc)
-- 
GitLab


From 1bc728dcb8adc9f9e88f34940a94bfa314d4f7c3 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:42 -0700
Subject: [PATCH 1623/2340] drm/xe: Update query uapi to support standalone
 media

Now that a higher GT count can result from either multiple tiles (with
one GT each) or an extra media GT within the root tile, we need to
update the query code slightly to stop looking at tile_count.

FIXME: As noted previously, we need to decide on a formal direction for
exposing tiles and/or GTs to userspace.

v2:
 - Drop num_gt() function in favor of stored xe->info.gt_count.  (Brian)
v3:
 - Keep XE_QUERY_GT_TYPE_REMOTE around for now.  Userspace probably
   doesn't actually need this, and we may remove it in the future, but
   for now let's avoid changing uapi.  (Brian)

Cc: Brian Welty <brian.welty@intel.com>
Reviewed-by: Brian Welty <brian.welty@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-30-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 8087c94dd782a..c4165fa3428e4 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -192,7 +192,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
 	config->info[XE_QUERY_CONFIG_VA_BITS] = 12 +
 		(9 * (xe->info.vm_max_level + 1));
-	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count;
+	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count;
 	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
 		hweight_long(xe->info.mem_region_mask);
 	config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] =
@@ -211,7 +211,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 {
 	struct xe_gt *gt;
 	size_t size = sizeof(struct drm_xe_query_gts) +
-		xe->info.tile_count * sizeof(struct drm_xe_query_gt);
+		xe->info.gt_count * sizeof(struct drm_xe_query_gt);
 	struct drm_xe_query_gts __user *query_ptr =
 		u64_to_user_ptr(query->data);
 	struct drm_xe_query_gts *gts;
@@ -228,14 +228,14 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 	if (XE_IOCTL_ERR(xe, !gts))
 		return -ENOMEM;
 
-	gts->num_gt = xe->info.tile_count;
+	gts->num_gt = xe->info.gt_count;
 	for_each_gt(gt, xe, id) {
-		if (id == 0)
-			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
-		else if (xe_gt_is_media_type(gt))
+		if (xe_gt_is_media_type(gt))
 			gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA;
-		else
+		else if (gt_to_tile(gt)->id > 0)
 			gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE;
+		else
+			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
 		gts->gts[id].instance = id;
 		gts->gts[id].clock_freq = gt->info.clock_freq;
 		if (!IS_DGFX(xe))
@@ -290,7 +290,7 @@ static int query_hwconfig(struct xe_device *xe,
 
 static size_t calc_topo_query_size(struct xe_device *xe)
 {
-	return xe->info.tile_count *
+	return xe->info.gt_count *
 		(3 * sizeof(struct drm_xe_query_topology_mask) +
 		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
 		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
-- 
GitLab


From 7bfbad97d38f1de4ffbc7d9dce6ee0128459293c Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:43 -0700
Subject: [PATCH 1624/2340] drm/xe: Reinstate media GT support

Now that tiles and GTs are handled separately and other prerequisite
changes are in place, we're ready to re-enable the media GT.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-31-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  8 ++++++++
 drivers/gpu/drm/xe/xe_pci.c          | 26 +++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d8b480f69c5f0..76c09526690ed 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -8,6 +8,14 @@
 
 #include "regs/xe_reg_defs.h"
 
+/*
+ * The GSI register range [0x0 - 0x40000) is replicated at a higher offset
+ * for the media GT.  xe_mmio and xe_gt_mcr functions will automatically
+ * translate offsets by MEDIA_GT_GSI_OFFSET when operating on the media GT.
+ */
+#define MEDIA_GT_GSI_OFFSET				0x380000
+#define MEDIA_GT_GSI_LENGTH				0x40000
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0					XE_REG(0xd00)
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK		REG_GENMASK(5, 3)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index abb2f13260079..208dc7a63f88c 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -545,7 +545,31 @@ static int xe_info_init(struct xe_device *xe,
 		if (MEDIA_VER(xe) < 13 && media_desc)
 			gt->info.__engine_mask |= media_desc->hw_engine_mask;
 
-		/* TODO: Init media GT, if present */
+		if (MEDIA_VER(xe) < 13 || !media_desc)
+			continue;
+
+		/*
+		 * Allocate and setup media GT for platforms with standalone
+		 * media.
+		 */
+		tile->media_gt = xe_gt_alloc(tile);
+		if (IS_ERR(tile->media_gt))
+			return PTR_ERR(tile->media_gt);
+
+		gt = tile->media_gt;
+		gt->info.type = XE_GT_TYPE_MEDIA;
+		gt->info.__engine_mask = media_desc->hw_engine_mask;
+		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
+		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
+
+		/*
+		 * FIXME: At the moment multi-tile and standalone media are
+		 * mutually exclusive on current platforms.  We'll need to
+		 * come up with a better way to number GTs if we ever wind
+		 * up with platforms that support both together.
+		 */
+		drm_WARN_ON(&xe->drm, id != 0);
+		gt->info.id = 1;
 	}
 
 	return 0;
-- 
GitLab


From 08516de501fae647fb29bf3b62718de56cc24014 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 1 Jun 2023 14:52:44 -0700
Subject: [PATCH 1625/2340] drm/xe: Add kerneldoc description of multi-tile
 devices

v2:
 - Fix doubled word.  (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230601215244.678611-32-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 Documentation/gpu/xe/index.rst   |  1 +
 Documentation/gpu/xe/xe_tile.rst | 14 ++++++++
 drivers/gpu/drm/xe/xe_tile.c     | 57 ++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+)
 create mode 100644 Documentation/gpu/xe/xe_tile.rst

diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index 2fddf9ed251e7..5c4d6bb370f38 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -21,3 +21,4 @@ DG2, etc is provided to prototype the driver.
    xe_wa
    xe_rtp
    xe_firmware
+   xe_tile
diff --git a/Documentation/gpu/xe/xe_tile.rst b/Documentation/gpu/xe/xe_tile.rst
new file mode 100644
index 0000000000000..c33f68dd95b6b
--- /dev/null
+++ b/Documentation/gpu/xe/xe_tile.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Multi-tile Devices
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
+   :doc: Multi-tile Design
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
+   :internal:
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index fa56323aa9882..6414aa810355e 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -12,6 +12,63 @@
 #include "xe_tile.h"
 #include "xe_ttm_vram_mgr.h"
 
+/**
+ * DOC: Multi-tile Design
+ *
+ * Different vendors use the term "tile" a bit differently, but in the Intel
+ * world, a 'tile' is pretty close to what most people would think of as being
+ * a complete GPU.  When multiple GPUs are placed behind a single PCI device,
+ * that's what is referred to as a "multi-tile device."  In such cases, pretty
+ * much all hardware is replicated per-tile, although certain responsibilities
+ * like PCI communication, reporting of interrupts to the OS, etc. are handled
+ * solely by the "root tile."  A multi-tile platform takes care of tying the
+ * tiles together in a way such that interrupt notifications from remote tiles
+ * are forwarded to the root tile, the per-tile vram is combined into a single
+ * address space, etc.
+ *
+ * In contrast, a "GT" (which officially stands for "Graphics Technology") is
+ * the subset of a GPU/tile that is responsible for implementing graphics
+ * and/or media operations.  The GT is where a lot of the driver implementation
+ * happens since it's where the hardware engines, the execution units, and the
+ * GuC all reside.
+ *
+ * Historically most Intel devices were single-tile devices that contained a
+ * single GT.  PVC is an example of an Intel platform built on a multi-tile
+ * design (i.e., multiple GPUs behind a single PCI device); each PVC tile only
+ * has a single GT.  In contrast, platforms like MTL that have separate chips
+ * for render and media IP are still only a single logical GPU, but the
+ * graphics and media IP blocks are each exposed as a separate GT within that
+ * single GPU.  This is important from a software perspective because multi-GT
+ * platforms like MTL only replicate a subset of the GPU hardware and behave
+ * differently than multi-tile platforms like PVC where nearly everything is
+ * replicated.
+ *
+ * Per-tile functionality (shared by all GTs within the tile):
+ *  - Complete 4MB MMIO space (containing SGunit/SoC registers, GT
+ *    registers, display registers, etc.)
+ *  - Global GTT
+ *  - VRAM (if discrete)
+ *  - Interrupt flows
+ *  - Migration context
+ *  - kernel batchbuffer pool
+ *  - Primary GT
+ *  - Media GT (if media version >= 13)
+ *
+ * Per-GT functionality:
+ *  - GuC
+ *  - Hardware engines
+ *  - Programmable hardware units (subslices, EUs)
+ *  - GSI subset of registers (multiple copies of these registers reside
+ *    within the complete MMIO space provided by the tile, but at different
+ *    offsets --- 0 for render, 0x380000 for media)
+ *  - Multicast register steering
+ *  - TLBs to cache page table translations
+ *  - Reset capability
+ *  - Low-level power management (e.g., C6)
+ *  - Clock frequency
+ *  - MOCS and PAT programming
+ */
+
 /**
  * xe_tile_alloc - Perform per-tile memory allocation
  * @tile: Tile to perform allocations for
-- 
GitLab


From 437bcbab1023e06edd8dbca99f5c44e5d2b30133 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 1 Jun 2023 16:44:19 -0300
Subject: [PATCH 1626/2340] drm/xe: Replace deprecated DRM_ERROR()

DRM_ERROR() has been deprecated in favor of pr_err(). However, we should
prefer to use xe_gt_err() or drm_err() whenever possible so we get gt-
or device-specific output with the error message.

v2:
  - Prefer drm_err() over pr_err(). (Matt, Jani)
v3:
  - Prefer xe_gt_err() over drm_err() when possible. (Matt)
v4:
  - Use the already available dev variable instead of xe->drm as
    parameter to drm_err(). (Matt)

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230601194419.1179609-1-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c | 7 +++++--
 drivers/gpu/drm/xe/xe_reg_sr.h | 3 ++-
 drivers/gpu/drm/xe/xe_rtp.c    | 2 +-
 drivers/gpu/drm/xe/xe_vm.c     | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 434133444d741..8580ff38b82ca 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -19,6 +19,7 @@
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_reg_whitelist.h"
@@ -89,7 +90,8 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr)
 }
 
 int xe_reg_sr_add(struct xe_reg_sr *sr,
-		  const struct xe_reg_sr_entry *e)
+		  const struct xe_reg_sr_entry *e,
+		  struct xe_gt *gt)
 {
 	unsigned long idx = e->reg.addr;
 	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
@@ -122,7 +124,8 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
 	return 0;
 
 fail:
-	DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
+	xe_gt_err(gt,
+		  "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
 		  idx, e->clr_bits, e->set_bits,
 		  str_yes_no(e->reg.masked),
 		  str_yes_no(e->reg.mcr),
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index 0bfe66ea29bfc..c3001798d9e85 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -19,7 +19,8 @@ struct drm_printer;
 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
 void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p);
 
-int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e);
+int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e,
+		  struct xe_gt *gt);
 void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
 void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
 			       struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 43a86358efb68..956bd39fe1a0b 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -122,7 +122,7 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 	};
 
 	sr_entry.reg.addr += mmio_base;
-	xe_reg_sr_add(sr, &sr_entry);
+	xe_reg_sr_add(sr, &sr_entry, gt);
 }
 
 static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 44ad457761415..17b7d543ae494 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -7,6 +7,7 @@
 
 #include <linux/dma-fence-array.h>
 
+#include <drm/drm_print.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
@@ -3048,7 +3049,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	}
 
 	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
-		DRM_ERROR("VM closed while we began looking up?\n");
+		drm_err(dev, "VM closed while we began looking up?\n");
 		err = -ENOENT;
 		goto put_vm;
 	}
-- 
GitLab


From 066d0952489b6ea269823dbbbb85d580ee6d23e0 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 2 Jun 2023 16:52:08 -0700
Subject: [PATCH 1627/2340] drm/xe: Reformat xe_guc_regs.h

Reformat the GuC register header according to the same rules used by
other register headers:
 - Register definitions are ordered by offset
 - Value of #define's start on column 49
 - Lowercase used for hex values

No functional change.

This header has some things that aren't directly related to register
definitions (e.g., number of doorbells, doorbell info structure, GuC
interrupt vector layout, etc.  These items have been moved to the bottom
of the header.

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230602235210.1314028-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_guc_regs.h | 186 +++++++++++++-------------
 1 file changed, 93 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index b4f27cadb68f5..ea8118f16722b 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -13,65 +13,37 @@
 
 /* Definitions of GuC H/W registers, bits, etc */
 
-#define GUC_STATUS			XE_REG(0xc000)
-#define   GS_AUTH_STATUS_MASK		REG_GENMASK(31, 30)
-#define   GS_AUTH_STATUS_BAD		REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1)
-#define   GS_AUTH_STATUS_GOOD		REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2)
-#define   GS_MIA_MASK			REG_GENMASK(18, 16)
-#define   GS_MIA_CORE_STATE		REG_FIELD_PREP(GS_MIA_MASK, 0x1)
-#define   GS_MIA_HALT_REQUESTED		REG_FIELD_PREP(GS_MIA_MASK, 0x2)
-#define   GS_MIA_ISR_ENTRY		REG_FIELD_PREP(GS_MIA_MASK, 0x4)
-#define   GS_UKERNEL_MASK		REG_GENMASK(15, 8)
-#define   GS_BOOTROM_MASK		REG_GENMASK(7, 1)
-#define   GS_BOOTROM_RSA_FAILED		REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50)
-#define   GS_BOOTROM_JUMP_PASSED	REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
-#define   GS_MIA_IN_RESET		REG_BIT(0)
-
-#define SOFT_SCRATCH(n)			XE_REG(0xc180 + (n) * 4)
-#define SOFT_SCRATCH_COUNT		16
-
-#define UOS_RSA_SCRATCH(i)		XE_REG(0xc200 + (i) * 4)
-#define UOS_RSA_SCRATCH_COUNT		64
-
-#define DMA_ADDR_0_LOW			XE_REG(0xc300)
-#define DMA_ADDR_0_HIGH			XE_REG(0xc304)
-#define DMA_ADDR_1_LOW			XE_REG(0xc308)
-#define DMA_ADDR_1_HIGH			XE_REG(0xc30c)
-#define   DMA_ADDR_SPACE_MASK		REG_GENMASK(20, 16)
-#define   DMA_ADDRESS_SPACE_WOPCM	REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
-#define DMA_COPY_SIZE			XE_REG(0xc310)
-#define DMA_CTRL			XE_REG(0xc314)
-#define   HUC_UKERNEL			REG_BIT(9)
-#define   UOS_MOVE			REG_BIT(4)
-#define   START_DMA			REG_BIT(0)
-#define DMA_GUC_WOPCM_OFFSET		XE_REG(0xc340)
-#define   GUC_WOPCM_OFFSET_SHIFT	14
-#define   GUC_WOPCM_OFFSET_MASK		REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT)
-#define   HUC_LOADING_AGENT_GUC		REG_BIT(1)
-#define   GUC_WOPCM_OFFSET_VALID	REG_BIT(0)
-#define GUC_MAX_IDLE_COUNT		XE_REG(0xc3e4)
-
-#define HUC_STATUS2			XE_REG(0xd3b0)
-#define   HUC_FW_VERIFIED		REG_BIT(7)
-
-#define HUC_KERNEL_LOAD_INFO		XE_REG(0xc1dc)
-#define   HUC_LOAD_SUCCESSFUL		REG_BIT(0)
-
-#define GUC_WOPCM_SIZE			XE_REG(0xc050)
-#define   GUC_WOPCM_SIZE_MASK		REG_GENMASK(31, 12)
-#define   GUC_WOPCM_SIZE_LOCKED		REG_BIT(0)
-
-#define GT_PM_CONFIG			XE_REG(0x13816c)
-#define   GT_DOORBELL_ENABLE		REG_BIT(0)
-
-#define GTCR				XE_REG(0x4274)
-#define   GTCR_INVALIDATE		REG_BIT(0)
-
-#define GUC_TLB_INV_CR			XE_REG(0xcee8)
-#define   GUC_TLB_INV_CR_INVALIDATE	REG_BIT(0)
+#define DIST_DBS_POPULATED			XE_REG(0xd08)
+#define   DOORBELLS_PER_SQIDI_MASK		REG_GENMASK(23, 16)
+#define   SQIDIS_DOORBELL_EXIST_MASK		REG_GENMASK(15, 0)
+
+#define DRBREGL(x)				XE_REG(0x1000 + (x) * 8)
+#define   DRB_VALID				REG_BIT(0)
+#define DRBREGU(x)				XE_REG(0x1000 + (x) * 8 + 4)
+
+#define GTCR					XE_REG(0x4274)
+#define   GTCR_INVALIDATE			REG_BIT(0)
 
 #define GUC_ARAT_C6DIS				XE_REG(0xa178)
 
+#define GUC_STATUS				XE_REG(0xc000)
+#define   GS_AUTH_STATUS_MASK			REG_GENMASK(31, 30)
+#define   GS_AUTH_STATUS_BAD			REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1)
+#define   GS_AUTH_STATUS_GOOD			REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2)
+#define   GS_MIA_MASK				REG_GENMASK(18, 16)
+#define   GS_MIA_CORE_STATE			REG_FIELD_PREP(GS_MIA_MASK, 0x1)
+#define   GS_MIA_HALT_REQUESTED			REG_FIELD_PREP(GS_MIA_MASK, 0x2)
+#define   GS_MIA_ISR_ENTRY			REG_FIELD_PREP(GS_MIA_MASK, 0x4)
+#define   GS_UKERNEL_MASK			REG_GENMASK(15, 8)
+#define   GS_BOOTROM_MASK			REG_GENMASK(7, 1)
+#define   GS_BOOTROM_RSA_FAILED			REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50)
+#define   GS_BOOTROM_JUMP_PASSED		REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
+#define   GS_MIA_IN_RESET			REG_BIT(0)
+
+#define GUC_WOPCM_SIZE				XE_REG(0xc050)
+#define   GUC_WOPCM_SIZE_MASK			REG_GENMASK(31, 12)
+#define   GUC_WOPCM_SIZE_LOCKED			REG_BIT(0)
+
 #define GUC_SHIM_CONTROL			XE_REG(0xc064)
 #define   PVC_GUC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
 #define   PVC_GUC_MOCS_UC_INDEX			1
@@ -86,35 +58,51 @@
 #define   GUC_ENABLE_READ_CACHE_LOGIC		REG_BIT(1)
 #define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	REG_BIT(0)
 
+#define SOFT_SCRATCH(n)				XE_REG(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT			16
+
+#define HUC_KERNEL_LOAD_INFO			XE_REG(0xc1dc)
+#define   HUC_LOAD_SUCCESSFUL			REG_BIT(0)
+
+#define UOS_RSA_SCRATCH(i)			XE_REG(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT			64
+
+#define DMA_ADDR_0_LOW				XE_REG(0xc300)
+#define DMA_ADDR_0_HIGH				XE_REG(0xc304)
+#define DMA_ADDR_1_LOW				XE_REG(0xc308)
+#define DMA_ADDR_1_HIGH				XE_REG(0xc30c)
+#define   DMA_ADDR_SPACE_MASK			REG_GENMASK(20, 16)
+#define   DMA_ADDRESS_SPACE_WOPCM		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
+#define DMA_COPY_SIZE				XE_REG(0xc310)
+#define DMA_CTRL				XE_REG(0xc314)
+#define   HUC_UKERNEL				REG_BIT(9)
+#define   UOS_MOVE				REG_BIT(4)
+#define   START_DMA				REG_BIT(0)
+#define DMA_GUC_WOPCM_OFFSET			XE_REG(0xc340)
+#define   GUC_WOPCM_OFFSET_SHIFT		14
+#define   GUC_WOPCM_OFFSET_MASK			REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT)
+#define   HUC_LOADING_AGENT_GUC			REG_BIT(1)
+#define   GUC_WOPCM_OFFSET_VALID		REG_BIT(0)
+#define GUC_MAX_IDLE_COUNT			XE_REG(0xc3e4)
 
 #define GUC_SEND_INTERRUPT			XE_REG(0xc4c8)
 #define   GUC_SEND_TRIGGER			REG_BIT(0)
-#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0)
 
-#define GUC_NUM_DOORBELLS			256
+#define GUC_BCS_RCS_IER				XE_REG(0xc550)
+#define GUC_VCS2_VCS1_IER			XE_REG(0xc554)
+#define GUC_WD_VECS_IER				XE_REG(0xc558)
+#define GUC_PM_P24C_IER				XE_REG(0xc55c)
 
-/* format of the HW-monitored doorbell cacheline */
-struct guc_doorbell_info {
-	u32 db_status;
-#define GUC_DOORBELL_DISABLED			0
-#define GUC_DOORBELL_ENABLED			1
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
 
-	u32 cookie;
-	u32 reserved[14];
-} __packed;
+#define HUC_STATUS2				XE_REG(0xd3b0)
+#define   HUC_FW_VERIFIED			REG_BIT(7)
 
-#define DRBREGL(x)				XE_REG(0x1000 + (x) * 8)
-#define   DRB_VALID				REG_BIT(0)
-#define DRBREGU(x)				XE_REG(0x1000 + (x) * 8 + 4)
-
-#define DIST_DBS_POPULATED			XE_REG(0xd08)
-#define   DOORBELLS_PER_SQIDI_MASK		REG_GENMASK(23, 16)
-#define   SQIDIS_DOORBELL_EXIST_MASK		REG_GENMASK(15, 0)
+#define GT_PM_CONFIG				XE_REG(0x13816c)
+#define   GT_DOORBELL_ENABLE			REG_BIT(0)
 
-#define GUC_BCS_RCS_IER				XE_REG(0xC550)
-#define GUC_VCS2_VCS1_IER			XE_REG(0xC554)
-#define GUC_WD_VECS_IER				XE_REG(0xC558)
-#define GUC_PM_P24C_IER				XE_REG(0xC55C)
+#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0)
 
 #define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4)
 #define VF_SW_FLAG_COUNT			4
@@ -125,21 +113,33 @@ struct guc_doorbell_info {
 #define MED_VF_SW_FLAG_COUNT			4
 
 /* GuC Interrupt Vector */
-#define GUC_INTR_GUC2HOST			BIT(15)
-#define GUC_INTR_EXEC_ERROR			BIT(14)
-#define GUC_INTR_DISPLAY_EVENT			BIT(13)
-#define GUC_INTR_SEM_SIG			BIT(12)
-#define GUC_INTR_IOMMU2GUC			BIT(11)
-#define GUC_INTR_DOORBELL_RANG			BIT(10)
-#define GUC_INTR_DMA_DONE			BIT(9)
-#define GUC_INTR_FATAL_ERROR			BIT(8)
-#define GUC_INTR_NOTIF_ERROR			BIT(7)
-#define GUC_INTR_SW_INT_6			BIT(6)
-#define GUC_INTR_SW_INT_5			BIT(5)
-#define GUC_INTR_SW_INT_4			BIT(4)
-#define GUC_INTR_SW_INT_3			BIT(3)
-#define GUC_INTR_SW_INT_2			BIT(2)
-#define GUC_INTR_SW_INT_1			BIT(1)
-#define GUC_INTR_SW_INT_0			BIT(0)
+#define GUC_INTR_GUC2HOST			REG_BIT(15)
+#define GUC_INTR_EXEC_ERROR			REG_BIT(14)
+#define GUC_INTR_DISPLAY_EVENT			REG_BIT(13)
+#define GUC_INTR_SEM_SIG			REG_BIT(12)
+#define GUC_INTR_IOMMU2GUC			REG_BIT(11)
+#define GUC_INTR_DOORBELL_RANG			REG_BIT(10)
+#define GUC_INTR_DMA_DONE			REG_BIT(9)
+#define GUC_INTR_FATAL_ERROR			REG_BIT(8)
+#define GUC_INTR_NOTIF_ERROR			REG_BIT(7)
+#define GUC_INTR_SW_INT_6			REG_BIT(6)
+#define GUC_INTR_SW_INT_5			REG_BIT(5)
+#define GUC_INTR_SW_INT_4			REG_BIT(4)
+#define GUC_INTR_SW_INT_3			REG_BIT(3)
+#define GUC_INTR_SW_INT_2			REG_BIT(2)
+#define GUC_INTR_SW_INT_1			REG_BIT(1)
+#define GUC_INTR_SW_INT_0			REG_BIT(0)
+
+#define GUC_NUM_DOORBELLS			256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+	u32 db_status;
+#define GUC_DOORBELL_DISABLED			0
+#define GUC_DOORBELL_ENABLED			1
+
+	u32 cookie;
+	u32 reserved[14];
+} __packed;
 
 #endif
-- 
GitLab


From 17a6726c3d3040c0a47d7ec5bd8cc4056a379017 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 2 Jun 2023 16:52:09 -0700
Subject: [PATCH 1628/2340] drm/xe: Initialize MOCS earlier

xe_mocs_init_early doesn't touch the hardware, it just sets up internal
software state.  There's no need to perform this step in the "forcewake
held" region.  Moving the init earlier will also make the uc_index
values available earlier which will be important for an upcoming GuC
init patch.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230602235210.1314028-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 335148f1cd39c..3799e663bad35 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -315,8 +315,6 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	/* Rerun MCR init as we now have hw engine list */
 	xe_gt_mcr_init(gt);
 
-	xe_mocs_init_early(gt);
-
 	err = xe_hw_engines_init_early(gt);
 	if (err)
 		goto err_force_wake;
@@ -429,6 +427,8 @@ int xe_gt_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	xe_mocs_init_early(gt);
+
 	xe_gt_sysfs_init(gt);
 
 	err = gt_fw_domain_init(gt);
-- 
GitLab


From 1fce9a6f69f57318842bd2771f761f203db6f49c Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 2 Jun 2023 16:52:10 -0700
Subject: [PATCH 1629/2340] drm/xe: Don't hardcode GuC's MOCS index in register
 header

Although PVC is currently the only platform that needs us to program a
GuC register with the index of an uncached MOCS entry, it's likely other
platforms will need this in the future.  Rather than hardcoding PVC's
index into the register header, we should just pull the appropriate
index from gt->mocs.uc_index to future-proof the code.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230602235210.1314028-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_guc_regs.h | 5 +----
 drivers/gpu/drm/xe/xe_guc.c           | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index ea8118f16722b..fcb747201bc1c 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -45,10 +45,7 @@
 #define   GUC_WOPCM_SIZE_LOCKED			REG_BIT(0)
 
 #define GUC_SHIM_CONTROL			XE_REG(0xc064)
-#define   PVC_GUC_MOCS_INDEX_MASK		REG_GENMASK(25, 24)
-#define   PVC_GUC_MOCS_UC_INDEX			1
-#define   PVC_GUC_MOCS_INDEX(index)		REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \
-							       index)
+#define   GUC_MOCS_INDEX_MASK			REG_GENMASK(25, 24)
 #define   GUC_SHIM_WC_ENABLE			REG_BIT(21)
 #define   GUC_ENABLE_MIA_CLOCK_GATING		REG_BIT(15)
 #define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	REG_BIT(10)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 04a57af85d9e5..e51d8fb4a354a 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -328,7 +328,7 @@ static void guc_prepare_xfer(struct xe_guc *guc)
 				GUC_ENABLE_MIA_CACHING;
 
 	if (xe->info.platform == XE_PVC)
-		shim_flags |= PVC_GUC_MOCS_INDEX(PVC_GUC_MOCS_UC_INDEX);
+		shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
 
 	/* Must program this register before loading the ucode with DMA */
 	xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
-- 
GitLab


From 433002ca3670769270a2f8f3a5073e9f370b0562 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Mon, 5 Jun 2023 12:08:56 -0400
Subject: [PATCH 1630/2340] drm/xe: REBAR resize should be best effort

The resizing of the PCI BAR is a best effort feature.  If it is
not available, it should not fail the driver probe.

Rework the resize to not exit on failure.

Fixes: 7f075300a318 ("drm/xe: Simplify rebar sizing")
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 475b14fe43568..f7a7f996b37f0 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -47,7 +47,7 @@ mask_err:
 	return err;
 }
 
-static int
+static void
 _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -61,18 +61,17 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 	if (ret) {
 		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
 			 resno, 1 << bar_size, ERR_PTR(ret));
-		return ret;
+		return;
 	}
 
 	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
-	return ret;
 }
 
 /*
  * if force_vram_bar_size is set, attempt to set to the requested size
  * else set to maximum possible size
  */
-static int xe_resize_vram_bar(struct xe_device *xe)
+static void xe_resize_vram_bar(struct xe_device *xe)
 {
 	u64 force_vram_bar_size = xe_force_vram_bar_size;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -83,14 +82,13 @@ static int xe_resize_vram_bar(struct xe_device *xe)
 	u32 bar_size_mask;
 	u32 pci_cmd;
 	int i;
-	int ret;
 
 	/* gather some relevant info */
 	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
 	bar_size_mask = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
 
 	if (!bar_size_mask)
-		return 0;
+		return;
 
 	/* set to a specific size? */
 	if (force_vram_bar_size) {
@@ -104,22 +102,22 @@ static int xe_resize_vram_bar(struct xe_device *xe)
 			drm_info(&xe->drm,
 				 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
 				 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
-			return 0;
+			return;
 		}
 
 		rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
 
 		if (rebar_size == current_size)
-			return 0;
+			return;
 	} else {
 		rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
 
 		/* only resize if larger than current */
 		if (rebar_size <= current_size)
-			return 0;
+			return;
 	}
 
-	drm_info(&xe->drm, "Resizing bar from %lluMiB -> %lluMiB\n",
+	drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
 		 (u64)current_size >> 20, (u64)rebar_size >> 20);
 
 	while (root->parent)
@@ -133,17 +131,16 @@ static int xe_resize_vram_bar(struct xe_device *xe)
 
 	if (!root_res) {
 		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
-		return -1;
+		return;
 	}
 
 	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
 
-	ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size);
+	_resize_bar(xe, GEN12_LMEM_BAR, rebar_size);
 
 	pci_assign_unassigned_bus_resources(pdev->bus);
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
-	return ret;
 }
 
 static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
@@ -163,16 +160,13 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 static int xe_determine_lmem_bar_size(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	int err;
 
 	if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
 		drm_err(&xe->drm, "pci resource is not valid\n");
 		return -ENXIO;
 	}
 
-	err = xe_resize_vram_bar(xe);
-	if (err)
-		return err;
+	xe_resize_vram_bar(xe);
 
 	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
 	xe->mem.vram.io_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-- 
GitLab


From 882b5d00f96a3a02874da2ffee24508df6d6b860 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 2 Jun 2023 16:10:54 -0700
Subject: [PATCH 1631/2340] drm/xe/wa: Extend scope of Wa_14015795083
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wa_14015795083 was already implemented for DG2 and PVC, but the
workaround database has been updated to extend it to more platforms.  It
should now apply to all platforms with graphics versions 12.00 - 12.60,
as well as A-step of Xe_LPG (12.70 / 12.71).

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://lore.kernel.org/r/20230602231054.1306865-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d9906f326d387..e5b3ff669465c 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -107,6 +107,10 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
 	  XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
 	},
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)),
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
 
 	/* DG1 */
 
@@ -195,10 +199,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
 	},
-	{ XE_RTP_NAME("14015795083"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
-	},
 	{ XE_RTP_NAME("18018781329"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
@@ -219,10 +219,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 
 	/* PVC */
 
-	{ XE_RTP_NAME("14015795083"),
-	  XE_RTP_RULES(PLATFORM(PVC)),
-	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
-	},
 	{ XE_RTP_NAME("18018781329"),
 	  XE_RTP_RULES(PLATFORM(PVC)),
 	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
@@ -234,6 +230,13 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_RULES(PLATFORM(PVC)),
 	  XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC))
 	},
+
+	/* Xe_LPG */
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
+
 	{}
 };
 
-- 
GitLab


From a4f08dbb712135680d086ffa9e8ee5c07e5fc661 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 31 May 2023 15:23:34 +0000
Subject: [PATCH 1632/2340] drm/xe: Use SPDX-License-Identifier instead of
 license text

Replace the license text with its SPDX-License-Identifier for
quick identification of the license and consistency with the
rest of the driver.

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index edd29e7f39eb3..4266760faf054 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1,26 +1,6 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * Copyright 2021 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ * Copyright © 2023 Intel Corporation
  */
 
 #ifndef _UAPI_XE_DRM_H_
-- 
GitLab


From fcca94c69b9539ed741ba5875ab4f1157cd781f8 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 31 May 2023 15:23:35 +0000
Subject: [PATCH 1633/2340] drm/xe: Group engine related structs

Move the definition of drm_xe_engine_class_instance to group it with
other engine related structs and to follow the ioctls order.

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4266760faf054..7d317b9564e93 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -116,24 +116,6 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_VM_MADVISE			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
 
-struct drm_xe_engine_class_instance {
-	__u16 engine_class;
-
-#define DRM_XE_ENGINE_CLASS_RENDER		0
-#define DRM_XE_ENGINE_CLASS_COPY		1
-#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
-#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
-#define DRM_XE_ENGINE_CLASS_COMPUTE		4
-	/*
-	 * Kernel only class (not actual hardware engine class). Used for
-	 * creating ordered queues of VM bind operations.
-	 */
-#define DRM_XE_ENGINE_CLASS_VM_BIND		5
-
-	__u16 engine_instance;
-	__u16 gt_id;
-};
-
 #define XE_MEM_REGION_CLASS_SYSMEM	0
 #define XE_MEM_REGION_CLASS_VRAM	1
 
@@ -536,6 +518,24 @@ struct drm_xe_engine_set_property {
 	__u64 reserved[2];
 };
 
+struct drm_xe_engine_class_instance {
+	__u16 engine_class;
+
+#define DRM_XE_ENGINE_CLASS_RENDER		0
+#define DRM_XE_ENGINE_CLASS_COPY		1
+#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
+#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
+#define DRM_XE_ENGINE_CLASS_COMPUTE		4
+	/*
+	 * Kernel only class (not actual hardware engine class). Used for
+	 * creating ordered queues of VM bind operations.
+	 */
+#define DRM_XE_ENGINE_CLASS_VM_BIND		5
+
+	__u16 engine_instance;
+	__u16 gt_id;
+};
+
 struct drm_xe_engine_create {
 	/** @extensions: Pointer to the first extension struct, if any */
 #define XE_ENGINE_EXTENSION_SET_PROPERTY               0
-- 
GitLab


From a0385a840ca02585d16a1ed4b10b501d17853d33 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 8 Jun 2023 09:59:14 +0200
Subject: [PATCH 1634/2340] drm/xe: Fix some formatting issues in uAPI

Fix spacing, alignment, and repeated words in the documentation.

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 7d317b9564e93..83868af45984c 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -105,16 +105,16 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
 #define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
 #define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
-#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
-#define DRM_IOCTL_XE_VM_BIND			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_VM_DESTROY			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
 #define DRM_IOCTL_XE_ENGINE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create)
 #define DRM_IOCTL_XE_ENGINE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_GET_PROPERTY, struct drm_xe_engine_get_property)
-#define DRM_IOCTL_XE_ENGINE_DESTROY		DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy)
-#define DRM_IOCTL_XE_EXEC			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
+#define DRM_IOCTL_XE_ENGINE_DESTROY		 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy)
+#define DRM_IOCTL_XE_EXEC			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_MMIO			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio)
-#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY	DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property)
+#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY	 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
-#define DRM_IOCTL_XE_VM_MADVISE			DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
+#define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
 
 #define XE_MEM_REGION_CLASS_SYSMEM	0
 #define XE_MEM_REGION_CLASS_VRAM	1
@@ -147,7 +147,7 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_GT_COUNT		4
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
 #define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY	6
-#define XE_QUERY_CONFIG_NUM_PARAM		XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1
+#define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1)
 	__u64 info[];
 };
 
@@ -399,8 +399,8 @@ struct drm_xe_vm_bind_op {
 	 * If this flag is clear and the IOCTL doesn't return an error, in
 	 * practice the bind op is good and will complete.
 	 *
-	 * If this flag is set and doesn't return return an error, the bind op
-	 * can still fail and recovery is needed. If configured, the bind op that
+	 * If this flag is set and doesn't return an error, the bind op can
+	 * still fail and recovery is needed. If configured, the bind op that
 	 * caused the error will be captured in drm_xe_vm_bind_op_error_capture.
 	 * Once the user sees the error (via a ufence +
 	 * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory
@@ -646,9 +646,9 @@ struct drm_xe_exec {
 	__u64 syncs;
 
 	/**
-	  * @address: address of batch buffer if num_batch_buffer == 1 or an
-	  * array of batch buffer addresses
-	  */
+	 * @address: address of batch buffer if num_batch_buffer == 1 or an
+	 * array of batch buffer addresses
+	 */
 	__u64 address;
 
 	/**
-- 
GitLab


From f1a5a9bf14182ae659cb3b5331021662c1ee1d9a Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 17 Jan 2023 18:34:34 -0800
Subject: [PATCH 1635/2340] drm/xe/guc: Read HXG fields from DW1 of G2H
 response
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The HXG fields are DW1 not DW0, fix this.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 137c184df4879..615cc4d4ad698 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -790,13 +790,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
 		g2h_fence->fail = true;
 		g2h_fence->error =
-			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
 		g2h_fence->hint =
-			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
 	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
 		g2h_fence->retry = true;
 		g2h_fence->reason =
-			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]);
+			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
 	} else if (g2h_fence->response_buffer) {
 		g2h_fence->response_len = response_len;
 		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
-- 
GitLab


From 1011812c642c664b254986fb34264c2ee8d2bb50 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Fri, 9 Jun 2023 11:38:14 -0300
Subject: [PATCH 1636/2340] drm/xe/reg_sr: Use a single parameter for
 xe_reg_sr_apply_whitelist()

All other parameters can be extracted from a single struct xe_hw_engine
reference. This removes redundancy and simplifies the code.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230609143815.302540-2-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c        | 3 +--
 drivers/gpu/drm/xe/xe_hw_engine.c | 2 +-
 drivers/gpu/drm/xe/xe_reg_sr.c    | 7 +++++--
 drivers/gpu/drm/xe/xe_reg_sr.h    | 4 ++--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3799e663bad35..2458397ce8af7 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -491,8 +491,7 @@ static int do_gt_restart(struct xe_gt *gt)
 
 	for_each_hw_engine(hwe, gt, id) {
 		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
-		xe_reg_sr_apply_whitelist(&hwe->reg_whitelist,
-					  hwe->mmio_base, gt);
+		xe_reg_sr_apply_whitelist(hwe);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b42a0cb50159a..68cd793cdfb56 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -381,7 +381,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	XE_BUG_ON(!(gt->info.engine_mask & BIT(id)));
 
 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
-	xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt);
+	xe_reg_sr_apply_whitelist(hwe);
 
 	hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
 					 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 8580ff38b82ca..65e6ad1906c63 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -20,6 +20,7 @@
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_printk.h"
+#include "xe_hw_engine_types.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_reg_whitelist.h"
@@ -211,12 +212,14 @@ err_force_wake:
 	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
 }
 
-void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
-			       struct xe_gt *gt)
+void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
 {
+	struct xe_reg_sr *sr = &hwe->reg_whitelist;
+	struct xe_gt *gt = hwe->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_reg_sr_entry *entry;
 	struct drm_printer p;
+	u32 mmio_base = hwe->mmio_base;
 	unsigned long reg;
 	unsigned int slot = 0;
 	int err;
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index c3001798d9e85..e3197c33afe29 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -14,6 +14,7 @@
 
 struct xe_device;
 struct xe_gt;
+struct xe_hw_engine;
 struct drm_printer;
 
 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
@@ -22,7 +23,6 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p);
 int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e,
 		  struct xe_gt *gt);
 void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
-void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
-			       struct xe_gt *gt);
+void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe);
 
 #endif
-- 
GitLab


From 5eeb8b443875f2a6f751ed2c77cc410fad6b2e61 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Fri, 9 Jun 2023 11:38:15 -0300
Subject: [PATCH 1637/2340] drm/xe/reg_sr: Apply limit to register whitelisting

If RING_MAX_NONPRIV_SLOTS denotes the maximum number of whitelisting
slots, then it makes sense to refuse going above it.

v2:
  - Use xe_gt_err() instead of drm_err() for more detailed info in the
    error message. (Matt)

Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230609143815.302540-3-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 65e6ad1906c63..7c88352636d23 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -235,6 +235,13 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
 
 	p = drm_debug_printer(KBUILD_MODNAME);
 	xa_for_each(&sr->xa, reg, entry) {
+		if (slot == RING_MAX_NONPRIV_SLOTS) {
+			xe_gt_err(gt,
+				  "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
+				  hwe->name, RING_MAX_NONPRIV_SLOTS);
+			break;
+		}
+
 		xe_reg_whitelist_print_entry(&p, 0, reg, entry);
 		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
 				reg | entry->set_bits);
-- 
GitLab


From 85dbfe47d07cddeac959ccc9352c4b0f1683225b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 5 Jun 2023 15:04:54 +0200
Subject: [PATCH 1638/2340] drm/xe: Invalidate TLB also on bind if in scratch
 page mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For scratch table mode we need to cover the case where a scratch PTE might
have been pre-fetched and cached and used instead of that of the newly
bound vma.
For compute vms, invalidate TLB globally using GuC before signalling
bind complete. For !long-running vms, invalidate TLB at batch start.

Also document how TLB invalidation works.

v2:
- Fix a pointer to the comment about TLB invalidation (Jose Souza).
- Add a bool to the vm whether we want to invalidate TLB at batch start.
- Invalidate TLB also on BCS- and video engines at batch start where
  needed.
- Use BIT() macro instead of explicit shift.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Tested-by: José Roberto de Souza <jose.souza@intel.com> #v1
Reported-by: José Roberto de Souza <jose.souza@intel.com> #v1
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  1 +
 drivers/gpu/drm/xe/xe_pt.c                | 17 +++++++-
 drivers/gpu/drm/xe/xe_ring_ops.c          | 47 +++++++++++++++++------
 drivers/gpu/drm/xe/xe_vm.c                |  2 +
 drivers/gpu/drm/xe/xe_vm_types.h          |  3 ++
 5 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 0f9c5b0b8a3ba..1a744c508174e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -73,6 +73,7 @@
 #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
+#define	  PIPE_CONTROL_TLB_INVALIDATE			BIT(18)
 #define   PIPE_CONTROL_PSD_SYNC				(1<<17)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index bef265715000c..2c472fafc8112 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1297,7 +1297,20 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 
-	if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
+	/*
+	 * If rebind, we have to invalidate TLB on !LR vms to invalidate
+	 * cached PTEs point to freed memory. on LR vms this is done
+	 * automatically when the context is re-enabled by the rebind worker,
+	 * or in fault mode it was invalidated on PTE zapping.
+	 *
+	 * If !rebind, and scratch enabled VMs, there is a chance the scratch
+	 * PTE is already cached in the TLB so it needs to be invalidated.
+	 * on !LR VMs this is done in the ring ops preceding a batch, but on
+	 * non-faulting LR, in particular on user-space batch buffer chaining,
+	 * it needs to be done here.
+	 */
+	if ((rebind && !xe_vm_no_dma_fences(vm) && !vm->batch_invalidate_tlb) ||
+	    (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_compute_mode(vm))) {
 		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
 		if (!ifence)
 			return ERR_PTR(-ENOMEM);
@@ -1313,7 +1326,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		LLIST_HEAD(deferred);
 
 		/* TLB invalidation must be done before signaling rebind */
-		if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
+		if (ifence) {
 			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
 							  vma);
 			if (err) {
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 45117a2ab1a0c..215606b5fae0e 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -15,6 +15,7 @@
 #include "xe_macros.h"
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
+#include "xe_vm.h"
 
 /*
  * 3D-related flags that can't be set on _engines_ that lack access to the 3D
@@ -74,9 +75,11 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 	return i;
 }
 
-static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
+			       u32 *dw, int i)
 {
-	dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW |
+		(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
 	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
 	dw[i++] = 0;
 	dw[i++] = value;
@@ -107,7 +110,8 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
 	return i;
 }
 
-static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
+static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
+				int i)
 {
 	u32 flags = PIPE_CONTROL_CS_STALL |
 		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
@@ -119,6 +123,9 @@ static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
 		PIPE_CONTROL_QW_WRITE |
 		PIPE_CONTROL_STORE_DATA_INDEX;
 
+	if (invalidate_tlb)
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+
 	flags &= ~mask_flags;
 
 	dw[i++] = GFX_OP_PIPE_CONTROL(6);
@@ -170,9 +177,17 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
 {
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
-
-	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
-				seqno, dw, i);
+	struct xe_vm *vm = job->engine->vm;
+
+	if (vm->batch_invalidate_tlb) {
+		dw[i++] = preparser_disable(true);
+		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, true, dw, i);
+		dw[i++] = preparser_disable(false);
+	} else {
+		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, dw, i);
+	}
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
@@ -181,7 +196,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
 						job->user_fence.value,
 						dw, i);
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
 	i = emit_user_interrupt(dw, i);
 
@@ -210,6 +225,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 	struct xe_gt *gt = job->engine->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+	struct xe_vm *vm = job->engine->vm;
 
 	dw[i++] = preparser_disable(true);
 
@@ -220,10 +236,16 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 		else
 			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
 	}
+
+	if (vm->batch_invalidate_tlb)
+		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, true, dw, i);
+
 	dw[i++] = preparser_disable(false);
 
-	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
-				seqno, dw, i);
+	if (!vm->batch_invalidate_tlb)
+		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, dw, i);
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
@@ -232,7 +254,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 						job->user_fence.value,
 						dw, i);
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
 	i = emit_user_interrupt(dw, i);
 
@@ -250,6 +272,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 	struct xe_gt *gt = job->engine->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	struct xe_vm *vm = job->engine->vm;
 	u32 mask_flags = 0;
 
 	dw[i++] = preparser_disable(true);
@@ -257,7 +280,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
 	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
 		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
-	i = emit_pipe_invalidate(mask_flags, dw, i);
+
+	/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
+	i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i);
 
 	/* hsdes: 1809175790 */
 	if (has_aux_ccs(xe))
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 17b7d543ae494..ea205244fcf9c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1237,11 +1237,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 			if (err)
 				goto err_scratch_pt;
 		}
+		vm->batch_invalidate_tlb = true;
 	}
 
 	if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
 		vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
+		vm->batch_invalidate_tlb = false;
 	}
 
 	if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 76af6ac0fa846..5242236b4b0e2 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -337,6 +337,9 @@ struct xe_vm {
 		/** @capture_once: capture only one error per VM */
 		bool capture_once;
 	} error_capture;
+
+	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
+	bool batch_invalidate_tlb;
 };
 
 #endif
-- 
GitLab


From 9f8f93bee3efdba3bf7853befe2219e3a300c305 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 2 Jun 2023 14:44:23 +0200
Subject: [PATCH 1639/2340] drm/xe: Emit a render cache flush after each
 rcs/ccs batch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to flush render caches before fence signalling, where we might
release the memory for reuse. We can't rely on userspace doing this,
so flush render caches after the batch, but before user fence- and
dma_fence signalling.

Copy the cache flush from i915, but omit PIPE_CONTROL_FLUSH_L3, since it
should be implied by the other flushes. Also omit
PIPE_CONTROL_TLB_INVALIDATE since there should be no apparent need to
invalidate TLB after batch completion.

v2:
- Update Makefile for OOB WA.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Tested-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com> #1
Reported-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile               |  2 +-
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  3 ++
 drivers/gpu/drm/xe/xe_ring_ops.c          | 35 +++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_wa_oob.rules        |  1 +
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index c914d02d8a8c3..73100c246a740 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_wa.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 1a744c508174e..12120dd37aa2a 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -66,6 +66,9 @@
 #define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
 
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+
+#define	  PIPE_CONTROL0_HDC_PIPELINE_FLUSH		BIT(9)	/* gen12 */
+
 #define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
 #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
 #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 215606b5fae0e..4cfd78e1ffa58 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -5,6 +5,7 @@
 
 #include "xe_ring_ops.h"
 
+#include "generated/xe_wa_oob.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
@@ -16,6 +17,7 @@
 #include "xe_sched_job.h"
 #include "xe_vm_types.h"
 #include "xe_vm.h"
+#include "xe_wa.h"
 
 /*
  * 3D-related flags that can't be set on _engines_ that lack access to the 3D
@@ -152,6 +154,37 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
 	return i;
 }
 
+static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
+{
+	struct xe_gt *gt = job->engine->gt;
+	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	u32 flags;
+
+	flags = (PIPE_CONTROL_CS_STALL |
+		 PIPE_CONTROL_TILE_CACHE_FLUSH |
+		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE);
+
+	if (XE_WA(gt, 1409600907))
+		flags |= PIPE_CONTROL_DEPTH_STALL;
+
+	if (lacks_render)
+		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+	dw[i++] = flags;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
 			      int i)
 {
@@ -295,6 +328,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+	i = emit_render_cache_flush(job, dw, i);
+
 	if (job->user_fence.used)
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 1ecb10390b280..15c23813398a5 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -14,3 +14,4 @@
 		SUBPLATFORM(DG2, G12)
 18020744125	PLATFORM(PVC)
 1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
+1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
-- 
GitLab


From 790bdc7cb2e7dafbac0aafc016dcb7493c925bac Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 9 Jun 2023 11:09:37 -0700
Subject: [PATCH 1640/2340] drm/xe: Handle unmapped userptr in analyze VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A corner exists where a userptr may have no mapping when analyze VM is
called, handle this case.

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ea205244fcf9c..fa4778bfd0639 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3455,9 +3455,13 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		if (is_userptr) {
 			struct xe_res_cursor cur;
 
-			xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
-					&cur);
-			addr = xe_res_dma(&cur);
+			if (vma->userptr.sg) {
+				xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
+						&cur);
+				addr = xe_res_dma(&cur);
+			} else {
+				addr = 0;
+			}
 		} else {
 			addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
 		}
-- 
GitLab


From 5e3220de6c72349f77977c62a991748d4e0fea26 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 9 Jun 2023 11:19:30 -0700
Subject: [PATCH 1641/2340] drm/xe: Use Xe ordered workqueue for rebind worker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A mix of the system unbound wq and Xe ordered wq was used for the
rebind, only use the Xe ordered wq. This will ensure only 1 rebind is
occuring at a time providing a somewhat clunky work around for short
comings in TTM wrt to memory contention. Once the TTM memory contention
is resolved we should be able to use a dedicated non-ordered workqueue.

Also add helper to queue rebind worker to avoid using wrong workqueue
going forward.

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_preempt_fence.c | 2 +-
 drivers/gpu/drm/xe/xe_vm.c            | 3 +--
 drivers/gpu/drm/xe/xe_vm.h            | 6 ++++++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 78ad8c2098736..219eefeb90ff7 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -25,7 +25,7 @@ static void preempt_fence_work_func(struct work_struct *w)
 	dma_fence_signal(&pfence->base);
 	dma_fence_end_signalling(cookie);
 
-	queue_work(system_unbound_wq, &e->vm->preempt.rebind_work);
+	xe_vm_queue_rebind_worker(e->vm);
 
 	xe_engine_put(e);
 }
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index fa4778bfd0639..be629783050f1 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3086,8 +3086,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 			/* Rebinds may have been blocked, give worker a kick */
 			if (xe_vm_in_compute_mode(vm))
-				queue_work(vm->xe->ordered_wq,
-					   &vm->preempt.rebind_work);
+				xe_vm_queue_rebind_worker(vm);
 		}
 
 		goto put_engine;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 372f26153209b..bb29968568412 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -124,6 +124,12 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma);
 
 int xe_vma_userptr_check_repin(struct xe_vma *vma);
 
+static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
+{
+	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
+	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
 /*
  * XE_ONSTACK_TV is used to size the tv_onstack array that is input
  * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
-- 
GitLab


From 3534b18c360525b4cff67b90db45d7b9e365bdf2 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 7 Jun 2023 11:43:52 -0700
Subject: [PATCH 1642/2340] drm/xe: s/XE_PTE_READ_ONLY/XE_PTE_FLAG_READ_ONLY

This define is for internal PTE flags rather than fields in the hardware
PTEs, rename as such. This will help in an upcoming patch to avoid
further confusion.

Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h |  2 +-
 drivers/gpu/drm/xe/xe_pt.c |  2 +-
 drivers/gpu/drm/xe/xe_vm.c | 12 +++++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 29eb7474f0185..552fe073e9c5b 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -65,7 +65,7 @@
 #define XE_PAGE_PRESENT			BIT_ULL(0)
 #define XE_PAGE_RW			BIT_ULL(1)
 
-#define XE_PTE_READ_ONLY		BIT(0)
+#define XE_PTE_FLAG_READ_ONLY		BIT(0)
 
 #define XE_PL_SYSTEM		TTM_PL_SYSTEM
 #define XE_PL_TT		TTM_PL_TT
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 2c472fafc8112..1ba93c2861aba 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -102,7 +102,7 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 {
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
 
-	if (unlikely(flags & XE_PTE_READ_ONLY))
+	if (unlikely(flags & XE_PTE_FLAG_READ_ONLY))
 		pte &= ~XE_PAGE_RW;
 
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index be629783050f1..51daa5fd78212 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -61,7 +61,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
 	bool in_kthread = !current->mm;
 	unsigned long notifier_seq;
 	int pinned, ret, i;
-	bool read_only = vma->pte_flags & XE_PTE_READ_ONLY;
+	bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
 
 	lockdep_assert_held(&vm->lock);
 	XE_BUG_ON(!xe_vma_is_userptr(vma));
@@ -869,7 +869,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	vma->start = start;
 	vma->end = end;
 	if (read_only)
-		vma->pte_flags = XE_PTE_READ_ONLY;
+		vma->pte_flags = XE_PTE_FLAG_READ_ONLY;
 
 	if (tile_mask) {
 		vma->tile_mask = tile_mask;
@@ -923,7 +923,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 {
 	struct xe_vm *vm = vma->vm;
 	struct xe_device *xe = vm->xe;
-	bool read_only = vma->pte_flags & XE_PTE_READ_ONLY;
+	bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
 
 	if (xe_vma_is_userptr(vma)) {
 		if (vma->userptr.sg) {
@@ -2643,7 +2643,8 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					  first->userptr.ptr,
 					  first->start,
 					  lookup->start - 1,
-					  (first->pte_flags & XE_PTE_READ_ONLY),
+					  (first->pte_flags &
+					   XE_PTE_FLAG_READ_ONLY),
 					  first->tile_mask);
 		if (first->bo)
 			xe_bo_unlock(first->bo, &ww);
@@ -2674,7 +2675,8 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					 last->userptr.ptr + chunk,
 					 last->start + chunk,
 					 last->end,
-					 (last->pte_flags & XE_PTE_READ_ONLY),
+					 (last->pte_flags &
+					  XE_PTE_FLAG_READ_ONLY),
 					 last->tile_mask);
 		if (last->bo)
 			xe_bo_unlock(last->bo, &ww);
-- 
GitLab


From 6713ee6ca19e3cd43798b4b40f8b13489c724a89 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 7 Jun 2023 11:51:36 -0700
Subject: [PATCH 1643/2340] drm/xe: Move XE_PTE_FLAG_READ_ONLY to xe_vm_types.h

XE_PTE_FLAG_READ_ONLY is specific to struct xe_vma, move it from xe_bo.h
to xe_vm_types.h to reflect that.

Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h       | 2 --
 drivers/gpu/drm/xe/xe_vm_types.h | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 552fe073e9c5b..dd3d448fee0bc 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -65,8 +65,6 @@
 #define XE_PAGE_PRESENT			BIT_ULL(0)
 #define XE_PAGE_RW			BIT_ULL(1)
 
-#define XE_PTE_FLAG_READ_ONLY		BIT(0)
-
 #define XE_PL_SYSTEM		TTM_PL_SYSTEM
 #define XE_PL_TT		TTM_PL_TT
 #define XE_PL_VRAM0		TTM_PL_VRAM
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 5242236b4b0e2..a51e84e584b43 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -30,6 +30,7 @@ struct xe_vma {
 	/** @end: end address of this VMA within its address domain */
 	u64 end;
 	/** @pte_flags: pte flags for this VMA */
+#define XE_PTE_FLAG_READ_ONLY		BIT(0)
 	u32 pte_flags;
 
 	/** @bo: BO if not a userptr, must be NULL is userptr */
-- 
GitLab


From a0ea91db616c386a9b5689dbbb7f57073f993368 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sun, 11 Jun 2023 15:24:43 -0700
Subject: [PATCH 1644/2340] drm/xe: Rename pte/pde encoding functions

Remove the leftover TODO by renameing the functions to use xe prefix.
Since the static __gen8_pte_encode() already has a double score,
just remove the prefix.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230611222447.2837573-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c       | 20 +++++++-------
 drivers/gpu/drm/xe/xe_pt.c            | 38 +++++++++++++--------------
 drivers/gpu/drm/xe/xe_pt.h            | 10 +++----
 drivers/gpu/drm/xe/xe_vm.c            |  4 +--
 5 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 60266fea7faa2..4c79c1dfa772a 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -300,7 +300,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
-	expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
+	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
 	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index f50484759866d..a62bd7ec8a42c 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -189,15 +189,15 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		return ret;
 	}
 
-	entry = gen8_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
+	entry = xe_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
 	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
 
 	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
 
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
-		entry = gen8_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
-					XE_CACHE_WB, 0, 0);
+		entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
+				      XE_CACHE_WB, 0, 0);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
 
@@ -215,8 +215,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		for (i = 0; i < batch->size;
 		     i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
-			entry = gen8_pte_encode(NULL, batch, i,
-						XE_CACHE_WB, 0, 0);
+			entry = xe_pte_encode(NULL, batch, i,
+					      XE_CACHE_WB, 0, 0);
 
 			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
 				  entry);
@@ -242,7 +242,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		if (vm->flags & XE_VM_FLAGS_64K && level == 1)
 			flags = XE_PDE_64K;
 
-		entry = gen8_pde_encode(bo, map_ofs + (level - 1) *
+		entry = xe_pde_encode(bo, map_ofs + (level - 1) *
 					XE_PAGE_SIZE, XE_CACHE_WB);
 		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
 			  entry | flags);
@@ -250,8 +250,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 	/* Write PDE's that point to our BO. */
 	for (i = 0; i < num_entries - num_level; i++) {
-		entry = gen8_pde_encode(bo, i * XE_PAGE_SIZE,
-					XE_CACHE_WB);
+		entry = xe_pde_encode(bo, i * XE_PAGE_SIZE,
+				      XE_CACHE_WB);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
 			  (i + 1) * 8, u64, entry);
@@ -1231,8 +1231,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 			BUG_ON(pt_bo->size != SZ_4K);
 
-			addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
-					       0, 0);
+			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
+					     0, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 		}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 1ba93c2861aba..29c1b1f0bd7c1 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -47,7 +47,7 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
 }
 
 /**
- * gen8_pde_encode() - Encode a page-table directory entry pointing to
+ * xe_pde_encode() - Encode a page-table directory entry pointing to
  * another page-table.
  * @bo: The page-table bo of the page-table to point to.
  * @bo_offset: Offset in the page-table bo to point to.
@@ -57,8 +57,8 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
  *
  * Return: An encoded page directory entry. No errors.
  */
-u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
-		    const enum xe_cache_level level)
+u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		  const enum xe_cache_level level)
 {
 	u64 pde;
 	bool is_vram;
@@ -97,8 +97,8 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 	}
 }
 
-static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
-			     u32 pt_level)
+static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
+			u32 pt_level)
 {
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
 
@@ -131,7 +131,7 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 }
 
 /**
- * gen8_pte_encode() - Encode a page-table entry pointing to memory.
+ * xe_pte_encode() - Encode a page-table entry pointing to memory.
  * @vma: The vma representing the memory to point to.
  * @bo: If @vma is NULL, representing the memory to point to.
  * @offset: The offset into @vma or @bo.
@@ -140,13 +140,11 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
  * @pt_level: The page-table level of the page-table into which the entry
  * is to be inserted.
  *
- * TODO: Rename.
- *
  * Return: An encoded page-table entry. No errors.
  */
-u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
-		    u64 offset, enum xe_cache_level cache,
-		    u32 flags, u32 pt_level)
+u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+		  u64 offset, enum xe_cache_level cache,
+		  u32 flags, u32 pt_level)
 {
 	u64 pte;
 	bool is_vram;
@@ -162,7 +160,7 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 			pte |= XE_USM_PPGTT_PTE_AE;
 	}
 
-	return __gen8_pte_encode(pte, cache, flags, pt_level);
+	return __pte_encode(pte, cache, flags, pt_level);
 }
 
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -174,13 +172,13 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 		return 0;
 
 	if (level == 0) {
-		u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0,
-					    XE_CACHE_WB, 0, 0);
+		u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0,
+					  XE_CACHE_WB, 0, 0);
 
 		return empty;
 	} else {
-		return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
-				       XE_CACHE_WB);
+		return xe_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
+				     XE_CACHE_WB);
 	}
 }
 
@@ -634,9 +632,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
-		pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
-					xe_walk->cache, xe_walk->pte_flags,
-					level);
+		pte = __pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
+				   xe_walk->cache, xe_walk->pte_flags,
+				   level);
 		pte |= xe_walk->default_pte;
 
 		/*
@@ -699,7 +697,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			xe_child->is_compact = true;
 		}
 
-		pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
+		pte = xe_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
 		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
 					 pte);
 	}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 10f334b9c0043..54e8a043d353d 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -45,10 +45,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 
 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
 
-u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
-		    const enum xe_cache_level level);
+u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		  const enum xe_cache_level level);
 
-u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
-		    u64 offset, enum xe_cache_level cache,
-		    u32 flags, u32 pt_level);
+u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+		  u64 offset, enum xe_cache_level cache,
+		  u32 flags, u32 pt_level);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 51daa5fd78212..eb2209d2d1cdb 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1530,8 +1530,8 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 
 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 {
-	return gen8_pde_encode(vm->pt_root[tile->id]->bo, 0,
-			       XE_CACHE_WB);
+	return xe_pde_encode(vm->pt_root[tile->id]->bo, 0,
+			     XE_CACHE_WB);
 }
 
 static struct dma_fence *
-- 
GitLab


From 90738d86650729cafb6d92191e6568d4b425b20a Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sun, 11 Jun 2023 15:24:44 -0700
Subject: [PATCH 1645/2340] drm/xe/guc: Fix typo s/enabled/enable/

Fix the log message when it fails to enable CT.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230611222447.2837573-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 615cc4d4ad698..22bc9ce846db7 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -312,7 +312,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 	return 0;
 
 err_out:
-	drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err);
+	drm_err(&xe->drm, "Failed to enable CT (%d)\n", err);
 
 	return err;
 }
-- 
GitLab


From 6dc3a12fb8185f98b525dbdb02fa5b810c4ff0bc Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sun, 11 Jun 2023 15:24:45 -0700
Subject: [PATCH 1646/2340] drm/xe/guc: Normalize error messages with %#x

One of the messages was printed without 0x prefix, so it was not clear
if it was decimal or hex: make sure to add the prefix by using %#x.
While at it, normalize the other messages in the same function to follow
the same pattern.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230611222447.2837573-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e51d8fb4a354a..43f862aaacbef 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -647,7 +647,7 @@ retry:
 			     GUC_HXG_MSG_0_ORIGIN, 50000, &reply, false);
 	if (ret) {
 timeout:
-		drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
+		drm_err(&xe->drm, "mmio request %#x: no reply %#x\n",
 			request[0], reply);
 		return ret;
 	}
@@ -673,7 +673,7 @@ timeout:
 	    GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
 		u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
 
-		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n",
+		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n",
 			request[0], reason);
 		goto retry;
 	}
@@ -683,7 +683,7 @@ timeout:
 		u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
 		u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
 
-		drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n",
+		drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n",
 			request[0], error, hint);
 		return -ENXIO;
 	}
-- 
GitLab


From 2846d10339a2cc304a1ae55ce75e61eb7f55eb0b Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Fri, 9 Jun 2023 08:19:54 +0530
Subject: [PATCH 1647/2340] drm/xe: Donot apply forcewake while reading actual
 frequency

RPSTAT1 is an sgunit register and thus doesn't need forcewake.
MTL_MIRROR_TARGET_WP1 is within an "always on" power domain and thus
doesn't require any forcewake to ensure the register is powered
up and usable. When GT is RC6 the actual frequency reported will be 0.

v2:
 - Add bspec index (Anshuman)
 - %s/GEN12_RPSTAT1/GT_PERF_STATUS as per bspec
v3: Update Fixes tag

Bspec: 51837, 67651
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230609024954.987039-1-badal.nilawar@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 67faa9ee00063..5d5cf4b0d508a 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -31,7 +31,7 @@
 #define GEN10_FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
 
-#define GEN12_RPSTAT1		XE_REG(0x1381b4)
+#define GT_PERF_STATUS		XE_REG(0x1381b4)
 #define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
 
 #define MTL_MIRROR_TARGET_WP1	XE_REG(0xc60)
@@ -371,26 +371,18 @@ static ssize_t freq_act_show(struct device *dev,
 	ssize_t ret;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
-	/*
-	 * When in RC6, actual frequency is 0. Let's block RC6 so we are able
-	 * to verify that our freq requests are really happening.
-	 */
-	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	if (ret)
-		goto out;
 
+	/* When in RC6, actual frequency reported will be 0. */
 	if (xe->info.platform == XE_METEORLAKE) {
 		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
 		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
 	} else {
-		freq = xe_mmio_read32(gt, GEN12_RPSTAT1);
+		freq = xe_mmio_read32(gt, GT_PERF_STATUS);
 		freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
 	}
 
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
 
-	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
 	xe_device_mem_access_put(gt_to_xe(gt));
 	return ret;
 }
-- 
GitLab


From 1e80d0c3c44806e6ff885102a937ea838a01f560 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Tue, 13 Jun 2023 15:12:32 +0530
Subject: [PATCH 1648/2340] drm/xe: Fix GT looping for standalone media

gt_count is only being incremented when initializing the primary GT;
since the media GT sets the ID directly, gt_count is not incremented
again, resulting in an incorrect count on MTL.  Use autoincrement while
assigning the media GTs ID to ensure gt_count is correct on MTL and
other future platforms with standalone media.

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Link: https://lore.kernel.org/r/20230613094232.3703549-1-riana.tauro@intel.com
[mattrope: Tweaked commit message to focus on gt_count importance]
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 208dc7a63f88c..71be80274683d 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -569,7 +569,7 @@ static int xe_info_init(struct xe_device *xe,
 		 * up with platforms that support both together.
 		 */
 		drm_WARN_ON(&xe->drm, id != 0);
-		gt->info.id = 1;
+		gt->info.id = xe->info.gt_count++;
 	}
 
 	return 0;
-- 
GitLab


From 35cbfe561912874a1f0d4b2ceb5fe890f0f58e46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 23 May 2023 15:50:19 +0200
Subject: [PATCH 1649/2340] drm/xe: Fix uninitialized variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using uninitialized variables leads to undefined behavior.

Moreover, it causes the compiler to complain with:
../drivers/gpu/drm/xe/xe_vm.c:3265:40: error: variable 'vma' is uninitialized when used here [-Werror,-Wuninitialized]
../drivers/gpu/drm/xe/xe_rtp.c:118:36: error: variable 'i' is uninitialized when used here [-Werror,-Wuninitialized]
../drivers/gpu/drm/xe/xe_mocs.c:449:3: error: variable 'flags' is uninitialized when used here [-Werror,-Wuninitialized]

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230523135020.345596-1-michal@hardline.pl
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 2 +-
 drivers/gpu/drm/xe/xe_rtp.c  | 2 +-
 drivers/gpu/drm/xe/xe_vm.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 86277ecb749bc..ccc852500eda0 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -373,7 +373,7 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = {
 static unsigned int get_mocs_settings(struct xe_device *xe,
 				      struct xe_mocs_info *info)
 {
-	unsigned int flags;
+	unsigned int flags = 0;
 
 	memset(info, 0, sizeof(struct xe_mocs_info));
 
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 956bd39fe1a0b..8aae34df38018 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -136,7 +136,7 @@ static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
 	if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
 		return false;
 
-	for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
+	for (i = 0, action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
 		if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
 		    (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE))
 			mmio_base = hwe->mmio_base;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index eb2209d2d1cdb..b8a0fe24d1d0c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3297,7 +3297,7 @@ destroy_vmas:
 		if (!vmas[i])
 			break;
 
-		list_for_each_entry_safe(vma, next, &vma->unbind_link,
+		list_for_each_entry_safe(vma, next, &vmas[i]->unbind_link,
 					 unbind_link) {
 			list_del_init(&vma->unbind_link);
 			if (!vma->destroyed) {
-- 
GitLab


From d0e2dd764a6d55cff35e9f609b724fcc62469ba6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 23 May 2023 15:50:20 +0200
Subject: [PATCH 1650/2340] drm/xe: Fix check for platform without geometry
 pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's not possible for the condition checking if we're running on
platform without geometry pipeline to ever be true, since
gt->fuse_topo.g_dss_mask is an array.

It also breaks the build:
../drivers/gpu/drm/xe/xe_rtp.c:183:50: error: address of array 'gt->fuse_topo.g_dss_mask' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion]

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230523135020.345596-2-michal@hardline.pl
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_topology.c | 5 +++++
 drivers/gpu/drm/xe/xe_gt_topology.h | 2 ++
 drivers/gpu/drm/xe/xe_rtp.c         | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 7c3e347e4d745..d4bbd0a835c22 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -128,6 +128,11 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
 	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
 }
 
+bool xe_dss_mask_empty(const xe_dss_mask_t mask)
+{
+	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
 /**
  * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
  * @gt: GT to check
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index 5f35deed9128e..d1b54fb52ea6c 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -17,6 +17,8 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
 unsigned int
 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
 
+bool xe_dss_mask_empty(const xe_dss_mask_t mask);
+
 bool
 xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
 
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 8aae34df38018..fb44cc7521d8c 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -315,7 +315,7 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
 	unsigned int dss_per_gslice = 4;
 	unsigned int dss;
 
-	if (drm_WARN(&gt_to_xe(gt)->drm, !gt->fuse_topo.g_dss_mask,
+	if (drm_WARN(&gt_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask),
 		     "Checking gslice for platform without geometry pipeline\n"))
 		return false;
 
-- 
GitLab


From ff063430caa810f2195d2390e79a990eb101c527 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 8 Jun 2023 11:12:17 -0700
Subject: [PATCH 1651/2340] drm/xe/mtl: Add some initial MTL workarounds

This adds a handful of workarounds that apply to production steppings of
MTL:
 - Wa_14018575942
 - Wa_22016670082
 - Wa_14017856879
 - Wa_18019271663

Wa_22016670082 is currently only applied to the primary GT at the
moment, but may need to be extended to the media GT in the future if a
pending update to the workaround database gets finalized.

OOB workarounds will need to be implemented separately in future patches
for Wa_14016712196, Wa_16018063123, and Wa_18013179988.

Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Link: https://lore.kernel.org/r/20230608181217.2385932-1-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  8 +++++++
 drivers/gpu/drm/xe/xe_wa.c           | 31 ++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 76c09526690ed..d2a0a5c8b02ae 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -133,6 +133,9 @@
 #define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4, XE_REG_OPTION_MASKED)
 #define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
 
+#define SQCNT1					XE_REG_MCR(0x8718)
+#define   ENFORCE_RAR				REG_BIT(23)
+
 #define XEHP_SQCM				XE_REG_MCR(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
@@ -268,7 +271,9 @@
 #define RENDER_MOD_CTRL				XE_REG_MCR(0xcf2c)
 #define COMP_MOD_CTRL				XE_REG_MCR(0xcf30)
 #define XEHP_VDBX_MOD_CTRL			XE_REG_MCR(0xcf34)
+#define XELPMP_VDBX_MOD_CTRL			XE_REG(0xcf34)
 #define XEHP_VEBX_MOD_CTRL			XE_REG_MCR(0xcf38)
+#define XELPMP_VEBX_MOD_CTRL			XE_REG(0xcf38)
 #define   FORCE_MISS_FTLB			REG_BIT(3)
 
 #define XEHP_GAMSTLB_CTRL			XE_REG_MCR(0xcf4c)
@@ -302,6 +307,9 @@
 #define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
+#define ROW_CHICKEN3				XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
+#define   DIS_FIX_EOT1_FLUSH			REG_BIT(9)
+
 #define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index e5b3ff669465c..5eaa9bed9d128 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -236,6 +236,22 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
+	{ XE_RTP_NAME("14018575942"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("22016670082"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
+	},
+
+	/* Xe_LPM+ */
+	{ XE_RTP_NAME("14018575942"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300)),
+	  XE_RTP_ACTIONS(SET(XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
 
 	{}
 };
@@ -502,6 +518,14 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       GRAPHICS_STEP(B0, C0)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC))
 	},
+
+	/* Xe_LPG */
+	{ XE_RTP_NAME("14017856879"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH))
+	},
+
 	{}
 };
 
@@ -580,6 +604,13 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
+
+	/* Xe_LPG */
+	{ XE_RTP_NAME("18019271663"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
+	},
+
 	{}
 };
 
-- 
GitLab


From ab10e976fbda8349163ceee2ce99b2bfc97031b8 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 14 Jun 2023 10:47:54 -0700
Subject: [PATCH 1652/2340] drm/xe: limit GGTT size to GUC_GGTT_TOP

The GuC can't access addresses above GUC_GGTT_TOP, so any GuC-accessible
objects can't be mapped above that offset. Instead of checking each
object to see if GuC may access it or not before mapping it, we just
limit the GGTT size to GUC_GGTT_TOP. This wastes a bit of address space
(about ~18 MBs, which is in addition to what already removed at the bottom
of the GGTT), but it is a good tradeoff to keep the code simple.

The in-code comment has also been updated to explain the limitation.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://lore.kernel.org/r/20230615002521.2587250-1-daniele.ceraolospurio@intel.com/
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 14b6d68a63248..0722c49585a0d 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -24,6 +24,9 @@
 #define MTL_GGTT_PTE_PAT0	BIT_ULL(52)
 #define MTL_GGTT_PTE_PAT1	BIT_ULL(53)
 
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP	0xFEE00000
+
 u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 {
 	struct xe_device *xe = xe_bo_device(bo);
@@ -111,12 +114,18 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 	/*
 	 * 8B per entry, each points to a 4KB page.
 	 *
-	 * The GuC owns the WOPCM space, thus we can't allocate GGTT address in
-	 * this area. Even though we likely configure the WOPCM to less than the
-	 * maximum value, to simplify the driver load (no need to fetch HuC +
-	 * GuC firmwares and determine there sizes before initializing the GGTT)
-	 * just start the GGTT allocation above the max WOPCM size. This might
-	 * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can
+	 * The GuC address space is limited on both ends of the GGTT, because
+	 * the GuC shim HW redirects accesses to those addresses to other HW
+	 * areas instead of going through the GGTT. On the bottom end, the GuC
+	 * can't access offsets below the WOPCM size, while on the top side the
+	 * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of
+	 * checking each object to see if they are accessed by GuC or not, we
+	 * just exclude those areas from the allocator. Additionally, to
+	 * simplify the driver load, we use the maximum WOPCM size in this logic
+	 * instead of the programmed one, so we don't need to wait until the
+	 * actual size to be programmed is determined (which requires FW fetch)
+	 * before initializing the GGTT. These simplifications might waste space
+	 * in the GGTT (about 20-25 MBs depending on the platform) but we can
 	 * live with this.
 	 *
 	 * Another benifit of this is the GuC bootrom can't access anything
@@ -125,6 +134,9 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 	 * Starting the GGTT allocations above the WOPCM max give us the correct
 	 * placement for free.
 	 */
+	if (ggtt->size > GUC_GGTT_TOP)
+		ggtt->size = GUC_GGTT_TOP;
+
 	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
 		    ggtt->size - xe_wopcm_size(xe));
 	mutex_init(&ggtt->lock);
-- 
GitLab


From ee6ad13705286b19f5ffc19000b1d1574208efc9 Mon Sep 17 00:00:00 2001
From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Date: Tue, 13 Jun 2023 15:07:40 +0530
Subject: [PATCH 1653/2340] drm/Xe: Use EOPNOTSUPP instead of ENOTSUPP

ENOTSUPP is not a standard Unix error should use
EOPNOTSUPP instead.

v2: Update commit description (Aravind)

Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c          | 2 +-
 drivers/gpu/drm/xe/xe_mmio.c            | 4 ++--
 drivers/gpu/drm/xe/xe_sync.c            | 6 +++---
 drivers/gpu/drm/xe/xe_vm.c              | 8 ++++----
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index fd39da8594426..49d92f089242c 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -612,7 +612,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 
 	if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
 			 !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
-		err = -ENOTSUPP;
+		err = -EOPNOTSUPP;
 		goto put_engine;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f7a7f996b37f0..f1336803b9157 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -495,7 +495,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 			fallthrough;
 		case DRM_XE_MMIO_8BIT: /* TODO */
 		case DRM_XE_MMIO_16BIT: /* TODO */
-			ret = -ENOTSUPP;
+			ret = -EOPNOTSUPP;
 			goto exit;
 		}
 	}
@@ -513,7 +513,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 			fallthrough;
 		case DRM_XE_MMIO_8BIT: /* TODO */
 		case DRM_XE_MMIO_16BIT: /* TODO */
-			ret = -ENOTSUPP;
+			ret = -EOPNOTSUPP;
 		}
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 5acb37a8b2ab1..7786b908a3fda 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -120,7 +120,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
 	case DRM_XE_SYNC_SYNCOBJ:
 		if (XE_IOCTL_ERR(xe, no_dma_fences && signal))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
 			return -EINVAL;
@@ -138,7 +138,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 
 	case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
 		if (XE_IOCTL_ERR(xe, no_dma_fences && signal))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
 			return -EINVAL;
@@ -173,7 +173,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 
 	case DRM_XE_SYNC_USER_FENCE:
 		if (XE_IOCTL_ERR(xe, !signal))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7))
 			return -EINVAL;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index b8a0fe24d1d0c..6edac7d4af870 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1819,10 +1819,10 @@ static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
 		return -EINVAL;
 
 	if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	vm->async_ops.error_capture.mm = current->mm;
 	vm->async_ops.error_capture.addr = value;
@@ -3072,7 +3072,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
 		if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
-			err = -ENOTSUPP;
+			err = -EOPNOTSUPP;
 		if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
 			err = EINVAL;
 		if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
@@ -3096,7 +3096,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
 			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
-		err = -ENOTSUPP;
+		err = -EOPNOTSUPP;
 		goto put_engine;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 6c8a60c600871..3122374341d6a 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -143,7 +143,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 
 		if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) {
 			xe_vm_put(vm);
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 		}
 
 		addr = vm->async_ops.error_capture.addr;
-- 
GitLab


From 37430402618db90b53aa782a6c49f66ab0efced0 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 15 Jun 2023 11:22:36 -0700
Subject: [PATCH 1654/2340] drm/xe: NULL binding implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add uAPI and implementation for NULL bindings. A NULL binding is defined
as writes dropped and read zero. A single bit in the uAPI has been added
which results in a single bit in the PTEs being set.

NULL bindings are intendedd to be used to implement VK sparse bindings,
in particular residencyNonResidentStrict property.

v2: Fix BUG_ON shown in VK testing, fix check patch warning, fix
xe_pt_scan_64K, update __gen8_pte_encode to understand NULL bindings,
remove else if vma_addr

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Suggested-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h           |  1 +
 drivers/gpu/drm/xe/xe_exec.c         |  2 +
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  4 +-
 drivers/gpu/drm/xe/xe_pt.c           | 54 ++++++++++-----
 drivers/gpu/drm/xe/xe_vm.c           | 99 ++++++++++++++++++----------
 drivers/gpu/drm/xe/xe_vm.h           | 12 +++-
 drivers/gpu/drm/xe/xe_vm_types.h     |  1 +
 include/uapi/drm/xe_drm.h            |  8 +++
 8 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index dd3d448fee0bc..3a148cc6e811e 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -61,6 +61,7 @@
 #define XE_PPGTT_PTE_LM			BIT_ULL(11)
 #define XE_PDE_64K			BIT_ULL(6)
 #define XE_PTE_PS64			BIT_ULL(8)
+#define XE_PTE_NULL			BIT_ULL(9)
 
 #define XE_PAGE_PRESENT			BIT_ULL(0)
 #define XE_PAGE_RW			BIT_ULL(1)
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index e44076ee2e115..4f7694a293488 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -120,6 +120,8 @@ retry:
 	 * to a location where the GPU can access it).
 	 */
 	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+		XE_WARN_ON(xe_vma_is_null(vma));
+
 		if (xe_vma_is_userptr(vma))
 			continue;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 5436667ba82bf..9dd8e5097e65d 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -533,8 +533,8 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
 
 	trace_xe_vma_acc(vma);
 
-	/* Userptr can't be migrated, nothing to do */
-	if (xe_vma_is_userptr(vma))
+	/* Userptr or null can't be migrated, nothing to do */
+	if (xe_vma_has_no_bo(vma))
 		goto unlock_vm;
 
 	/* Lock VM and BOs dma-resv */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 29c1b1f0bd7c1..fe1c77b139e4e 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -81,6 +81,9 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
 static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 			   size_t page_size, bool *is_vram)
 {
+	if (xe_vma_is_null(vma))
+		return 0;
+
 	if (xe_vma_is_userptr(vma)) {
 		struct xe_res_cursor cur;
 		u64 page;
@@ -105,6 +108,9 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 	if (unlikely(flags & XE_PTE_FLAG_READ_ONLY))
 		pte &= ~XE_PAGE_RW;
 
+	if (unlikely(flags & XE_PTE_FLAG_NULL))
+		pte |= XE_PTE_NULL;
+
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
 
 	switch (cache) {
@@ -557,6 +563,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
 	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
 		return false;
 
+	/* null VMA's do not have dma addresses */
+	if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
+		return true;
+
 	/* Is the DMA address huge PTE size aligned? */
 	size = next - addr;
 	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
@@ -579,6 +589,10 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
 	if (next > xe_walk->l0_end_addr)
 		return false;
 
+	/* null VMA's do not have dma addresses */
+	if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
+		return true;
+
 	xe_res_next(&curs, addr - xe_walk->va_curs_start);
 	for (; addr < next; addr += SZ_64K) {
 		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
@@ -629,10 +643,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	/* Is this a leaf entry ?*/
 	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
 		struct xe_res_cursor *curs = xe_walk->curs;
+		bool is_null = xe_walk->pte_flags & XE_PTE_FLAG_NULL;
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
-		pte = __pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
+		pte = __pte_encode(is_null ? 0 :
+				   xe_res_dma(curs) + xe_walk->dma_offset,
 				   xe_walk->cache, xe_walk->pte_flags,
 				   level);
 		pte |= xe_walk->default_pte;
@@ -652,7 +668,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		if (unlikely(ret))
 			return ret;
 
-		xe_res_next(curs, next - addr);
+		if (!is_null)
+			xe_res_next(curs, next - addr);
 		xe_walk->va_curs_start = next;
 		*action = ACTION_CONTINUE;
 
@@ -759,24 +776,29 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
 		xe_walk.cache = XE_CACHE_WB;
 	} else {
-		if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT)
+		if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT)
 			xe_walk.cache = XE_CACHE_WT;
 		else
 			xe_walk.cache = XE_CACHE_WB;
 	}
-	if (!xe_vma_is_userptr(vma) && xe_bo_is_stolen(bo))
+	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
 		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
 
 	xe_bo_assert_held(bo);
-	if (xe_vma_is_userptr(vma))
-		xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1,
-				&curs);
-	else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
-		xe_res_first(bo->ttm.resource, vma->bo_offset,
-			     vma->end - vma->start + 1, &curs);
-	else
-		xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
-				vma->end - vma->start + 1, &curs);
+
+	if (!xe_vma_is_null(vma)) {
+		if (xe_vma_is_userptr(vma))
+			xe_res_first_sg(vma->userptr.sg, 0,
+					vma->end - vma->start + 1, &curs);
+		else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
+			xe_res_first(bo->ttm.resource, vma->bo_offset,
+				     vma->end - vma->start + 1, &curs);
+		else
+			xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
+					vma->end - vma->start + 1, &curs);
+	} else {
+		curs.size = vma->end - vma->start + 1;
+	}
 
 	ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1,
 				&xe_walk.base);
@@ -965,7 +987,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
 
 	if (xe_vma_is_userptr(vma))
 		lockdep_assert_held_read(&vm->userptr.notifier_lock);
-	else
+	else if (!xe_vma_is_null(vma))
 		dma_resv_assert_held(vma->bo->ttm.base.resv);
 
 	dma_resv_assert_held(&vm->resv);
@@ -1341,7 +1363,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 				   DMA_RESV_USAGE_KERNEL :
 				   DMA_RESV_USAGE_BOOKKEEP);
 
-		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+		if (!xe_vma_has_no_bo(vma) && !vma->bo->vm)
 			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		xe_pt_commit_bind(vma, entries, num_entries, rebind,
@@ -1658,7 +1680,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 				   DMA_RESV_USAGE_BOOKKEEP);
 
 		/* This fence will be installed by caller when doing eviction */
-		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+		if (!xe_vma_has_no_bo(vma) && !vma->bo->vm)
 			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		xe_pt_commit_unbind(vma, entries, num_entries,
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 6edac7d4af870..5ac819a65cf14 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -590,7 +590,7 @@ retry:
 		goto out_unlock;
 
 	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
-		if (xe_vma_is_userptr(vma) || vma->destroyed)
+		if (xe_vma_has_no_bo(vma) || vma->destroyed)
 			continue;
 
 		err = xe_bo_validate(vma->bo, vm, false);
@@ -843,6 +843,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    u64 bo_offset_or_userptr,
 				    u64 start, u64 end,
 				    bool read_only,
+				    bool is_null,
 				    u64 tile_mask)
 {
 	struct xe_vma *vma;
@@ -868,8 +869,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	vma->vm = vm;
 	vma->start = start;
 	vma->end = end;
+	vma->pte_flags = 0;
 	if (read_only)
-		vma->pte_flags = XE_PTE_FLAG_READ_ONLY;
+		vma->pte_flags |= XE_PTE_FLAG_READ_ONLY;
+	if (is_null)
+		vma->pte_flags |= XE_PTE_FLAG_NULL;
 
 	if (tile_mask) {
 		vma->tile_mask = tile_mask;
@@ -886,23 +890,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 		vma->bo_offset = bo_offset_or_userptr;
 		vma->bo = xe_bo_get(bo);
 		list_add_tail(&vma->bo_link, &bo->vmas);
-	} else /* userptr */ {
-		u64 size = end - start + 1;
-		int err;
+	} else /* userptr or null */ {
+		if (!is_null) {
+			u64 size = end - start + 1;
+			int err;
 
-		vma->userptr.ptr = bo_offset_or_userptr;
+			vma->userptr.ptr = bo_offset_or_userptr;
 
-		err = mmu_interval_notifier_insert(&vma->userptr.notifier,
-						   current->mm,
-						   vma->userptr.ptr, size,
-						   &vma_userptr_notifier_ops);
-		if (err) {
-			kfree(vma);
-			vma = ERR_PTR(err);
-			return vma;
+			err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+							   current->mm,
+							   vma->userptr.ptr, size,
+							   &vma_userptr_notifier_ops);
+			if (err) {
+				kfree(vma);
+				vma = ERR_PTR(err);
+				return vma;
+			}
+
+			vma->userptr.notifier_seq = LONG_MAX;
 		}
 
-		vma->userptr.notifier_seq = LONG_MAX;
 		xe_vm_get(vm);
 	}
 
@@ -942,6 +949,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 		 */
 		mmu_interval_notifier_remove(&vma->userptr.notifier);
 		xe_vm_put(vm);
+	} else if (xe_vma_is_null(vma)) {
+		xe_vm_put(vm);
 	} else {
 		xe_bo_put(vma->bo);
 	}
@@ -1024,7 +1033,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 		list_del_init(&vma->userptr.invalidate_link);
 		spin_unlock(&vm->userptr.invalidated_lock);
 		list_del(&vma->userptr_link);
-	} else {
+	} else if (!xe_vma_is_null(vma)) {
 		xe_bo_assert_held(vma->bo);
 		list_del(&vma->bo_link);
 
@@ -1393,7 +1402,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	while (vm->vmas.rb_node) {
 		struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
 
-		if (xe_vma_is_userptr(vma)) {
+		if (xe_vma_has_no_bo(vma)) {
 			down_read(&vm->userptr.notifier_lock);
 			vma->destroyed = true;
 			up_read(&vm->userptr.notifier_lock);
@@ -1402,7 +1411,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		rb_erase(&vma->vm_node, &vm->vmas);
 
 		/* easy case, remove from VMA? */
-		if (xe_vma_is_userptr(vma) || vma->bo->vm) {
+		if (xe_vma_has_no_bo(vma) || vma->bo->vm) {
 			xe_vma_destroy(vma, NULL);
 			continue;
 		}
@@ -2036,7 +2045,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
 
-	if (!xe_vma_is_userptr(vma)) {
+	if (!xe_vma_has_no_bo(vma)) {
 		err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
 		if (err)
 			return err;
@@ -2645,6 +2654,8 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					  lookup->start - 1,
 					  (first->pte_flags &
 					   XE_PTE_FLAG_READ_ONLY),
+					  (first->pte_flags &
+					   XE_PTE_FLAG_NULL),
 					  first->tile_mask);
 		if (first->bo)
 			xe_bo_unlock(first->bo, &ww);
@@ -2652,7 +2663,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 			err = -ENOMEM;
 			goto unwind;
 		}
-		if (!first->bo) {
+		if (xe_vma_is_userptr(first)) {
 			err = xe_vma_userptr_pin_pages(new_first);
 			if (err)
 				goto unwind;
@@ -2677,6 +2688,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 					 last->end,
 					 (last->pte_flags &
 					  XE_PTE_FLAG_READ_ONLY),
+					 (last->pte_flags & XE_PTE_FLAG_NULL),
 					 last->tile_mask);
 		if (last->bo)
 			xe_bo_unlock(last->bo, &ww);
@@ -2684,7 +2696,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 			err = -ENOMEM;
 			goto unwind;
 		}
-		if (!last->bo) {
+		if (xe_vma_is_userptr(last)) {
 			err = xe_vma_userptr_pin_pages(new_last);
 			if (err)
 				goto unwind;
@@ -2744,7 +2756,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
 		      *next;
 	struct rb_node *node;
 
-	if (!xe_vma_is_userptr(vma)) {
+	if (!xe_vma_has_no_bo(vma)) {
 		if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
 			return ERR_PTR(-EINVAL);
 	}
@@ -2753,7 +2765,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
 	while ((node = rb_next(node))) {
 		if (!xe_vma_cmp_vma_cb(lookup, node)) {
 			__vma = to_xe_vma(node);
-			if (!xe_vma_is_userptr(__vma)) {
+			if (!xe_vma_has_no_bo(__vma)) {
 				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
 					goto flush_list;
 			}
@@ -2767,7 +2779,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
 	while ((node = rb_prev(node))) {
 		if (!xe_vma_cmp_vma_cb(lookup, node)) {
 			__vma = to_xe_vma(node);
-			if (!xe_vma_is_userptr(__vma)) {
+			if (!xe_vma_has_no_bo(__vma)) {
 				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
 					goto flush_list;
 			}
@@ -2826,21 +2838,23 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 
 	switch (VM_BIND_OP(op)) {
 	case XE_VM_BIND_OP_MAP:
-		XE_BUG_ON(!bo);
-
-		err = xe_bo_lock(bo, &ww, 0, true);
-		if (err)
-			return ERR_PTR(err);
+		if (bo) {
+			err = xe_bo_lock(bo, &ww, 0, true);
+			if (err)
+				return ERR_PTR(err);
+		}
 		vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
 				    addr + range - 1,
 				    op & XE_VM_BIND_FLAG_READONLY,
+				    op & XE_VM_BIND_FLAG_NULL,
 				    tile_mask);
-		xe_bo_unlock(bo, &ww);
+		if (bo)
+			xe_bo_unlock(bo, &ww);
 		if (!vma)
 			return ERR_PTR(-ENOMEM);
 
 		xe_vm_insert_vma(vm, vma);
-		if (!bo->vm) {
+		if (bo && !bo->vm) {
 			vm_insert_extobj(vm, vma);
 			err = add_preempt_fences(vm, bo);
 			if (err) {
@@ -2874,6 +2888,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 		vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
 				    addr + range - 1,
 				    op & XE_VM_BIND_FLAG_READONLY,
+				    op & XE_VM_BIND_FLAG_NULL,
 				    tile_mask);
 		if (!vma)
 			return ERR_PTR(-ENOMEM);
@@ -2899,11 +2914,12 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
 #ifdef TEST_VM_ASYNC_OPS_ERROR
 #define SUPPORTED_FLAGS	\
 	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
-	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
+	 XE_VM_BIND_FLAG_NULL | 0xffff)
 #else
 #define SUPPORTED_FLAGS	\
 	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
-	 XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
 #endif
 #define XE_64K_PAGE_MASK 0xffffull
 
@@ -2951,6 +2967,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u32 obj = (*bind_ops)[i].obj;
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
 		u32 region = (*bind_ops)[i].region;
+		bool is_null = op &  XE_VM_BIND_FLAG_NULL;
 
 		if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) ||
 		    XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] ||
@@ -2984,8 +3001,13 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
 				 XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_ERR(xe, obj && is_null) ||
+		    XE_IOCTL_ERR(xe, obj_offset && is_null) ||
+		    XE_IOCTL_ERR(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
+				 is_null) ||
 		    XE_IOCTL_ERR(xe, !obj &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
+				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP &&
+				 !is_null) ||
 		    XE_IOCTL_ERR(xe, !obj &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_ERR(xe, addr &&
@@ -3390,6 +3412,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	int ret;
 
 	XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
+	XE_WARN_ON(xe_vma_is_null(vma));
 	trace_xe_vma_usm_invalidate(vma);
 
 	/* Check that we don't race with page-table updates */
@@ -3452,8 +3475,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
 		struct xe_vma *vma = to_xe_vma(node);
 		bool is_userptr = xe_vma_is_userptr(vma);
+		bool is_null = xe_vma_is_null(vma);
 
-		if (is_userptr) {
+		if (is_null) {
+			addr = 0;
+		} else if (is_userptr) {
 			struct xe_res_cursor cur;
 
 			if (vma->userptr.sg) {
@@ -3468,7 +3494,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		}
 		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
 			   vma->start, vma->end, vma->end - vma->start + 1ull,
-			   addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
+			   addr, is_null ? "NULL" : is_userptr ? "USR" :
+			   is_vram ? "VRAM" : "SYS");
 	}
 	up_read(&vm->lock);
 
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index bb29968568412..5edb7771629c6 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -115,11 +115,21 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
 	}
 }
 
-static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+static inline bool xe_vma_is_null(struct xe_vma *vma)
+{
+	return vma->pte_flags & XE_PTE_FLAG_NULL;
+}
+
+static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
 {
 	return !vma->bo;
 }
 
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
+}
+
 int xe_vma_userptr_pin_pages(struct xe_vma *vma);
 
 int xe_vma_userptr_check_repin(struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index a51e84e584b43..9b39c5f64afad 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -31,6 +31,7 @@ struct xe_vma {
 	u64 end;
 	/** @pte_flags: pte flags for this VMA */
 #define XE_PTE_FLAG_READ_ONLY		BIT(0)
+#define XE_PTE_FLAG_NULL		BIT(1)
 	u32 pte_flags;
 
 	/** @bo: BO if not a userptr, must be NULL is userptr */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 83868af45984c..6a991afc563dd 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -418,6 +418,14 @@ struct drm_xe_vm_bind_op {
 	 * than differing the MAP to the page fault handler.
 	 */
 #define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 18)
+	/*
+	 * When the NULL flag is set, the page tables are setup with a special
+	 * bit which indicates writes are dropped and all reads return zero.  In
+	 * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
+	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
+	 * intended to implement VK sparse bindings.
+	 */
+#define XE_VM_BIND_FLAG_NULL		(0x1 << 19)
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
-- 
GitLab


From 8ae8a2e8dd21bd8bc94c9817874a97239aa867a2 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Sun, 21 May 2023 18:24:20 -0700
Subject: [PATCH 1655/2340] drm/xe: Long running job update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For long running (LR) jobs with the DRM scheduler we must return NULL in
run_job which results in signaling the job's finished fence immediately.
This prevents LR jobs from creating infinite dma-fences.

Signaling job's finished fence immediately breaks flow controlling ring
with the DRM scheduler. To work around this, the ring is flow controlled
and written in the exec IOCTL. Signaling job's finished fence
immediately also breaks the TDR which is used in reset / cleanup entity
paths so write a new path for LR entities.

v2: Better commit, white space, remove rmb(), better comment next to
emit_job()
v3 (Thomas): Change LR reference counting, fix working in commit

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c           | 32 ++++++++
 drivers/gpu/drm/xe/xe_engine.h           |  4 +
 drivers/gpu/drm/xe/xe_exec.c             |  8 ++
 drivers/gpu/drm/xe/xe_guc_engine_types.h |  2 +
 drivers/gpu/drm/xe/xe_guc_submit.c       | 96 ++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_trace.h            |  5 ++
 6 files changed, 134 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 49d92f089242c..1843e886a405b 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -18,6 +18,7 @@
 #include "xe_macros.h"
 #include "xe_migrate.h"
 #include "xe_pm.h"
+#include "xe_ring_ops_types.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
 
@@ -679,6 +680,37 @@ static void engine_kill_compute(struct xe_engine *e)
 	up_write(&e->vm->lock);
 }
 
+/**
+ * xe_engine_is_lr() - Whether an engine is long-running
+ * @e: The engine
+ *
+ * Return: True if the engine is long-running, false otherwise.
+ */
+bool xe_engine_is_lr(struct xe_engine *e)
+{
+	return e->vm && xe_vm_no_dma_fences(e->vm) &&
+		!(e->flags & ENGINE_FLAG_VM);
+}
+
+static s32 xe_engine_num_job_inflight(struct xe_engine *e)
+{
+	return e->lrc->fence_ctx.next_seqno - xe_lrc_seqno(e->lrc) - 1;
+}
+
+/**
+ * xe_engine_ring_full() - Whether an engine's ring is full
+ * @e: The engine
+ *
+ * Return: True if the engine's ring is full, false otherwise.
+ */
+bool xe_engine_ring_full(struct xe_engine *e)
+{
+	struct xe_lrc *lrc = e->lrc;
+	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
+
+	return xe_engine_num_job_inflight(e) >= max_job;
+}
+
 /**
  * xe_engine_is_idle() - Whether an engine is idle.
  * @engine: The engine
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
index b95d9b0408773..3017e4fe308d2 100644
--- a/drivers/gpu/drm/xe/xe_engine.h
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -42,6 +42,10 @@ static inline bool xe_engine_is_parallel(struct xe_engine *engine)
 	return engine->width > 1;
 }
 
+bool xe_engine_is_lr(struct xe_engine *e);
+
+bool xe_engine_ring_full(struct xe_engine *e);
+
 bool xe_engine_is_idle(struct xe_engine *engine);
 
 void xe_engine_kill(struct xe_engine *e);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 4f7694a293488..700f65b66d408 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -14,6 +14,7 @@
 #include "xe_device.h"
 #include "xe_engine.h"
 #include "xe_macros.h"
+#include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 #include "xe_sync.h"
 #include "xe_vm.h"
@@ -302,6 +303,11 @@ retry:
 		goto err_engine_end;
 	}
 
+	if (xe_engine_is_lr(engine) && xe_engine_ring_full(engine)) {
+		err = -EWOULDBLOCK;
+		goto err_engine_end;
+	}
+
 	job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
 				  addresses : &args->address);
 	if (IS_ERR(job)) {
@@ -388,6 +394,8 @@ retry:
 		xe_sync_entry_signal(&syncs[i], job,
 				     &job->drm.s_fence->finished);
 
+	if (xe_engine_is_lr(engine))
+		engine->ring_ops->emit_job(job);
 	xe_sched_job_push(job);
 	xe_vm_reactivate_rebind(vm);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h
index 512615d1ce8c1..5565412fe7f11 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h
@@ -31,6 +31,8 @@ struct xe_guc_engine {
 	 */
 #define MAX_STATIC_MSG_TYPE	3
 	struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
+	/** @lr_tdr: long running TDR worker */
+	struct work_struct lr_tdr;
 	/** @fini_async: do final fini async from this worker */
 	struct work_struct fini_async;
 	/** @resume_time: time of last resume */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 7be06320dbd72..9c0fd1368b77a 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -483,6 +483,14 @@ static void register_engine(struct xe_engine *e)
 		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
 	}
 
+	/*
+	 * We must keep a reference for LR engines if engine is registered with
+	 * the GuC as jobs signal immediately and can't destroy an engine if the
+	 * GuC has a reference to it.
+	 */
+	if (xe_engine_is_lr(e))
+		xe_engine_get(e);
+
 	set_engine_registered(e);
 	trace_xe_engine_register(e);
 	if (xe_engine_is_parallel(e))
@@ -645,6 +653,7 @@ guc_engine_run_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 	struct xe_engine *e = job->engine;
+	bool lr = xe_engine_is_lr(e);
 
 	XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
 		  !engine_banned(e) && !engine_suspended(e));
@@ -654,14 +663,19 @@ guc_engine_run_job(struct drm_sched_job *drm_job)
 	if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
 		if (!engine_registered(e))
 			register_engine(e);
-		e->ring_ops->emit_job(job);
+		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
+			e->ring_ops->emit_job(job);
 		submit_engine(e);
 	}
 
-	if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
+	if (lr) {
+		xe_sched_job_set_error(job, -EOPNOTSUPP);
+		return NULL;
+	} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
 		return job->fence;
-	else
+	} else {
 		return dma_fence_get(job->fence);
+	}
 }
 
 static void guc_engine_free_job(struct drm_sched_job *drm_job)
@@ -764,6 +778,55 @@ static void simple_error_capture(struct xe_engine *e)
 }
 #endif
 
+static void xe_guc_engine_trigger_cleanup(struct xe_engine *e)
+{
+	struct xe_guc *guc = engine_to_guc(e);
+
+	if (xe_engine_is_lr(e))
+		queue_work(guc_to_gt(guc)->ordered_wq, &e->guc->lr_tdr);
+	else
+		xe_sched_tdr_queue_imm(&e->guc->sched);
+}
+
+static void xe_guc_engine_lr_cleanup(struct work_struct *w)
+{
+	struct xe_guc_engine *ge =
+		container_of(w, struct xe_guc_engine, lr_tdr);
+	struct xe_engine *e = ge->engine;
+	struct xe_gpu_scheduler *sched = &ge->sched;
+
+	XE_WARN_ON(!xe_engine_is_lr(e));
+	trace_xe_engine_lr_cleanup(e);
+
+	/* Kill the run_job / process_msg entry points */
+	xe_sched_submission_stop(sched);
+
+	/* Engine state now stable, disable scheduling / deregister if needed */
+	if (engine_registered(e)) {
+		struct xe_guc *guc = engine_to_guc(e);
+		int ret;
+
+		set_engine_banned(e);
+		disable_scheduling_deregister(guc, e);
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 */
+		ret = wait_event_timeout(guc->ct.wq,
+					 !engine_pending_disable(e) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret) {
+			XE_WARN_ON("Schedule disable failed to respond");
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(e->gt);
+			return;
+		}
+	}
+
+	xe_sched_submission_start(sched);
+}
+
 static enum drm_gpu_sched_stat
 guc_engine_timedout_job(struct drm_sched_job *drm_job)
 {
@@ -815,7 +878,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 			err = -EIO;
 		set_engine_banned(e);
 		xe_engine_get(e);
-		disable_scheduling_deregister(engine_to_guc(e), e);
+		disable_scheduling_deregister(guc, e);
 
 		/*
 		 * Must wait for scheduling to be disabled before signalling
@@ -848,7 +911,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 	 */
 	xe_sched_add_pending_job(sched, job);
 	xe_sched_submission_start(sched);
-	xe_sched_tdr_queue_imm(&e->guc->sched);
+	xe_guc_engine_trigger_cleanup(e);
 
 	/* Mark all outstanding jobs as bad, thus completing them */
 	spin_lock(&sched->base.job_list_lock);
@@ -872,6 +935,8 @@ static void __guc_engine_fini_async(struct work_struct *w)
 
 	trace_xe_engine_destroy(e);
 
+	if (xe_engine_is_lr(e))
+		cancel_work_sync(&ge->lr_tdr);
 	if (e->flags & ENGINE_FLAG_PERSISTENT)
 		xe_device_remove_persistent_engines(gt_to_xe(e->gt), e);
 	release_guc_id(guc, e);
@@ -889,7 +954,7 @@ static void guc_engine_fini_async(struct xe_engine *e)
 	bool kernel = e->flags & ENGINE_FLAG_KERNEL;
 
 	INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
-	queue_work(system_unbound_wq, &e->guc->fini_async);
+	queue_work(system_wq, &e->guc->fini_async);
 
 	/* We must block on kernel engines so slabs are empty on driver unload */
 	if (kernel) {
@@ -1080,6 +1145,9 @@ static int guc_engine_init(struct xe_engine *e)
 		goto err_sched;
 	e->priority = XE_ENGINE_PRIORITY_NORMAL;
 
+	if (xe_engine_is_lr(e))
+		INIT_WORK(&e->guc->lr_tdr, xe_guc_engine_lr_cleanup);
+
 	mutex_lock(&guc->submission_state.lock);
 
 	err = alloc_guc_id(guc, e);
@@ -1131,7 +1199,7 @@ static void guc_engine_kill(struct xe_engine *e)
 {
 	trace_xe_engine_kill(e);
 	set_engine_killed(e);
-	xe_sched_tdr_queue_imm(&e->guc->sched);
+	xe_guc_engine_trigger_cleanup(e);
 }
 
 static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
@@ -1283,10 +1351,11 @@ static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
 	xe_sched_submission_stop(sched);
 
 	/* Clean up lost G2H + reset engine state */
-	if (engine_destroyed(e) && engine_registered(e)) {
-		if (engine_banned(e))
+	if (engine_registered(e)) {
+		if ((engine_banned(e) && engine_destroyed(e)) ||
+		    xe_engine_is_lr(e))
 			xe_engine_put(e);
-		else
+		else if (engine_destroyed(e))
 			__guc_engine_fini(guc, e);
 	}
 	if (e->guc->suspend_pending) {
@@ -1501,7 +1570,8 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	trace_xe_engine_deregister_done(e);
 
 	clear_engine_registered(e);
-	if (engine_banned(e))
+
+	if (engine_banned(e) || xe_engine_is_lr(e))
 		xe_engine_put(e);
 	else
 		__guc_engine_fini(guc, e);
@@ -1538,7 +1608,7 @@ int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	 */
 	set_engine_reset(e);
 	if (!engine_banned(e))
-		xe_sched_tdr_queue_imm(&e->guc->sched);
+		xe_guc_engine_trigger_cleanup(e);
 
 	return 0;
 }
@@ -1565,7 +1635,7 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 	/* Treat the same as engine reset */
 	set_engine_reset(e);
 	if (!engine_banned(e))
-		xe_sched_tdr_queue_imm(&e->guc->sched);
+		xe_guc_engine_trigger_cleanup(e);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 878ab4115d912..8a5d35f157919 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -220,6 +220,11 @@ DEFINE_EVENT(xe_engine, xe_engine_resubmit,
 	     TP_ARGS(e)
 );
 
+DEFINE_EVENT(xe_engine, xe_engine_lr_cleanup,
+	     TP_PROTO(struct xe_engine *e),
+	     TP_ARGS(e)
+);
+
 DECLARE_EVENT_CLASS(xe_sched_job,
 		    TP_PROTO(struct xe_sched_job *job),
 		    TP_ARGS(job),
-- 
GitLab


From 911cd9b3b4e1d10250987864fa19315c772edf9d Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 12 Apr 2023 18:48:41 -0700
Subject: [PATCH 1656/2340] drm/xe: Ensure LR engines are not persistent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With our ref counting scheme long running (LR) engines only close
properly if not persistent, ensure that LR engines are non-persistent.

v2: spell out LR

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 1843e886a405b..e72a94a944d00 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -599,7 +599,9 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			return -ENOENT;
 
 		e = xe_engine_create(xe, vm, logical_mask,
-				     args->width, hwe, ENGINE_FLAG_PERSISTENT);
+				     args->width, hwe,
+				     xe_vm_no_dma_fences(vm) ? 0 :
+				     ENGINE_FLAG_PERSISTENT);
 		xe_vm_put(vm);
 		if (IS_ERR(e))
 			return PTR_ERR(e);
-- 
GitLab


From 73c09901b0240bb6acdd957330e456e808ec52e6 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 27 Mar 2023 18:34:49 -0700
Subject: [PATCH 1657/2340] drm/xe: Only try to lock external BOs in VM bind

We only need to try to lock a BO if it's external as non-external BOs
share the dma-resv with the already locked VM. Trying to lock
non-external BOs caused an issue (list corruption) in an uncoming patch
which adds bulk LRU move. Since this code isn't needed, remove it.

v2: New commit message, s/mattthew/matthew/

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 5ac819a65cf14..c98801ee3f558 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2157,9 +2157,11 @@ static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
 		 */
 		xe_bo_get(vbo);
 
-		tv_bo.bo = &vbo->ttm;
-		tv_bo.num_shared = 1;
-		list_add(&tv_bo.head, &objs);
+		if (!vbo->vm) {
+			tv_bo.bo = &vbo->ttm;
+			tv_bo.num_shared = 1;
+			list_add(&tv_bo.head, &objs);
+		}
 	}
 
 again:
-- 
GitLab


From 7ba4c5f02763cc423bfa0c6a87a8dd5501dc3417 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 7 Jun 2023 08:45:20 -0700
Subject: [PATCH 1658/2340] drm/xe: VM LRU bulk move
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the TTM LRU bulk move for BOs tied to a VM. Update the bulk moves
LRU position on every exec.

v2: Bulk move for compute VMs, use WARN rather than BUG

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       | 31 +++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_bo.h       |  4 ++--
 drivers/gpu/drm/xe/xe_dma_buf.c  |  2 +-
 drivers/gpu/drm/xe/xe_exec.c     |  6 ++++++
 drivers/gpu/drm/xe/xe_vm.c       |  4 ++++
 drivers/gpu/drm/xe/xe_vm_types.h |  3 +++
 6 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8bac1717ca78f..43801994f069d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -992,6 +992,22 @@ static void xe_gem_object_free(struct drm_gem_object *obj)
 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
 }
 
+static void xe_gem_object_close(struct drm_gem_object *obj,
+				struct drm_file *file_priv)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
+		struct ww_acquire_ctx ww;
+
+		XE_WARN_ON(!xe_bo_is_user(bo));
+
+		xe_bo_lock(bo, &ww, 0, false);
+		ttm_bo_set_bulk_move(&bo->ttm, NULL);
+		xe_bo_unlock(bo, &ww);
+	}
+}
+
 static bool should_migrate_to_system(struct xe_bo *bo)
 {
 	struct xe_device *xe = xe_bo_device(bo);
@@ -1047,6 +1063,7 @@ static const struct vm_operations_struct xe_gem_vm_ops = {
 
 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
 	.free = xe_gem_object_free,
+	.close = xe_gem_object_close,
 	.mmap = drm_gem_ttm_mmap,
 	.export = xe_gem_prime_export,
 	.vm_ops = &xe_gem_vm_ops,
@@ -1088,8 +1105,8 @@ void xe_bo_free(struct xe_bo *bo)
 
 struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 				    struct xe_tile *tile, struct dma_resv *resv,
-				    size_t size, enum ttm_bo_type type,
-				    u32 flags)
+				    struct ttm_lru_bulk_move *bulk, size_t size,
+				    enum ttm_bo_type type, u32 flags)
 {
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
@@ -1156,7 +1173,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		return ERR_PTR(err);
 
 	bo->created = true;
-	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+	if (bulk)
+		ttm_bo_set_bulk_move(&bo->ttm, bulk);
+	else
+		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
 
 	return bo;
 }
@@ -1226,7 +1246,10 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		}
 	}
 
-	bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL, size,
+	bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
+				   vm && !xe_vm_in_fault_mode(vm) &&
+				   flags & XE_BO_CREATE_USER_BIT ?
+				   &vm->lru_bulk_move : NULL, size,
 				   type, flags);
 	if (IS_ERR(bo))
 		return bo;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 3a148cc6e811e..08ca1d06bf772 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -81,8 +81,8 @@ void xe_bo_free(struct xe_bo *bo);
 
 struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 				    struct xe_tile *tile, struct dma_resv *resv,
-				    size_t size, enum ttm_bo_type type,
-				    u32 flags);
+				    struct ttm_lru_bulk_move *bulk, size_t size,
+				    enum ttm_bo_type type, u32 flags);
 struct xe_bo *
 xe_bo_create_locked_range(struct xe_device *xe,
 			  struct xe_tile *tile, struct xe_vm *vm,
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 9b252cc782b78..975dee1f770ff 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -199,7 +199,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
 	int ret;
 
 	dma_resv_lock(resv, NULL);
-	bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
+	bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
 				   ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
 	if (IS_ERR(bo)) {
 		ret = PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 700f65b66d408..c52edff9a3584 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -399,6 +399,12 @@ retry:
 	xe_sched_job_push(job);
 	xe_vm_reactivate_rebind(vm);
 
+	if (!err && !xe_vm_no_dma_fences(vm)) {
+		spin_lock(&xe->ttm.lru_lock);
+		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+		spin_unlock(&xe->ttm.lru_lock);
+	}
+
 err_repin:
 	if (!xe_vm_no_dma_fences(vm))
 		up_read(&vm->userptr.notifier_lock);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c98801ee3f558..06ebc1cfc4f71 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -632,6 +632,10 @@ retry:
 
 #undef retry_required
 
+	spin_lock(&vm->xe->ttm.lru_lock);
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+	spin_unlock(&vm->xe->ttm.lru_lock);
+
 	/* Point of no return. */
 	arm_preempt_fences(vm, &preempt_fences);
 	resume_and_reinstall_preempt_fences(vm);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 9b39c5f64afad..c148dd49a6ca2 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -166,6 +166,9 @@ struct xe_vm {
 	/** Protects @rebind_list and the page-table structures */
 	struct dma_resv resv;
 
+	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
+	struct ttm_lru_bulk_move lru_bulk_move;
+
 	u64 size;
 	struct rb_root vmas;
 
-- 
GitLab


From 8489f30e0c8e47d2d654cfb31825ff37de7e5574 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 15 Jun 2023 18:20:52 +0100
Subject: [PATCH 1659/2340] drm/xe/bo: handle PL_TT -> PL_TT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When moving between PL_VRAM <-> PL_SYSTEM we have to have use PL_TT in
the middle as a temporary resource for the actual copy. In some GL
workloads it can be seen that once the resource has been moved to the
PL_TT we might have to bail out of the ttm_bo_validate(), before
finishing the final hop. If this happens the resource is left as
TTM_PL_FLAG_TEMPORARY, and when the ttm_bo_validate() is restarted the
current placement is always seen as incompatible, requiring us to
complete the move.  However if the BO allows PL_TT as a possible
placement we can end up attempting a PL_TT -> PL_TT move (like when
running out of VRAM) which leads to explosions in xe_bo_move(), like
triggering the XE_BUG_ON(!tile).

Going from TTM_PL_FLAG_TEMPORARY with PL_TT -> PL_VRAM should already
work as-is, so it looks like we only need to worry about PL_TT -> PL_TT
and it looks like we can just treat it as a dummy move, since no real
move is needed.

Reported-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 43801994f069d..ad0ea3014bfee 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -603,6 +603,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		goto out;
 	}
 
+	/*
+	 * Failed multi-hop where the old_mem is still marked as
+	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
+	 */
+	if (old_mem->mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_TT) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
 		ret = xe_bo_move_notify(bo, ctx);
 		if (ret)
-- 
GitLab


From 898f86c23c600c8f70bf1a03e81a7be97038a72d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 13 Jun 2023 11:03:55 -0700
Subject: [PATCH 1660/2340] drm/xe: Skip applying copy engine fuses

Like commit 69a3738ba57f ("drm/i915: Skip applying copy engine fuses"),
do not apply copy engine fuses for platforms where MEML3_EN is not
relevant for determining the presence of the copy engines.

Acked-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230613180356.2906441-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 68cd793cdfb56..b7b02c96e9982 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -492,6 +492,9 @@ static void read_copy_fuses(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 bcs_mask;
 
+	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
+		return;
+
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
-- 
GitLab


From 5db4afe1db56c10b6edef41ad9309bf0bed87f34 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Fri, 2 Jun 2023 20:27:32 +0300
Subject: [PATCH 1661/2340] drm/xe: Fix unreffed ptr leak on engine lookup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The engine xarray holds a ref to engine, guarded by the lock.
While we do lookup for engine, we need to take the ref inside
the lock to prevent unreffed pointer escaping and
causing potential use-after-free after.

v2: remove branch prediction hint (Thomas)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230602172732.1001057-1-mika.kuoppala@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index e72a94a944d00..097a1ea06002f 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -162,10 +162,9 @@ struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
 
 	mutex_lock(&xef->engine.lock);
 	e = xa_load(&xef->engine.xa, id);
-	mutex_unlock(&xef->engine.lock);
-
 	if (e)
 		xe_engine_get(e);
+	mutex_unlock(&xef->engine.lock);
 
 	return e;
 }
@@ -644,26 +643,27 @@ int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
 	struct xe_file *xef = to_xe_file(file);
 	struct drm_xe_engine_get_property *args = data;
 	struct xe_engine *e;
+	int ret;
 
 	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	mutex_lock(&xef->engine.lock);
-	e = xa_load(&xef->engine.xa, args->engine_id);
-	mutex_unlock(&xef->engine.lock);
-
+	e = xe_engine_lookup(xef, args->engine_id);
 	if (XE_IOCTL_ERR(xe, !e))
 		return -ENOENT;
 
 	switch (args->property) {
 	case XE_ENGINE_GET_PROPERTY_BAN:
 		args->value = !!(e->flags & ENGINE_FLAG_BANNED);
+		ret = 0;
 		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
 	}
 
-	return 0;
+	xe_engine_put(e);
+
+	return ret;
 }
 
 static void engine_kill_compute(struct xe_engine *e)
-- 
GitLab


From ffd6620fb746c59ad82070f1975c4a0e3d30520e Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 9 Jun 2023 07:37:12 +0000
Subject: [PATCH 1662/2340] drm/xe: Document structures for device query

This adds documentation to the various structures used to query
memory, GTs, topology, engines, and so on. It includes a functional
code snippet to query engines.

v2:
  - Rebase on drm-xe-next
  - Also document structures related to drm_xe_device_query, changed
    pseudo code to snippet (Lucas De Marchi)
v3:
  - Move changelog to commit
  - Fix warnings showed only using dim checkpath

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Link: https://lists.freedesktop.org/archives/intel-xe/2023-May/004704.html
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 75 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 6a991afc563dd..445f7b7689dd8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -119,8 +119,18 @@ struct xe_user_extension {
 #define XE_MEM_REGION_CLASS_SYSMEM	0
 #define XE_MEM_REGION_CLASS_VRAM	1
 
+/**
+ * struct drm_xe_query_mem_usage - describe memory regions and usage
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_MEM_USAGE, then the reply uses
+ * struct drm_xe_query_mem_usage in .data.
+ */
 struct drm_xe_query_mem_usage {
+	/** @num_params: number of memory regions returned in regions */
 	__u32 num_regions;
+
+	/** @pad: MBZ */
 	__u32 pad;
 
 	struct drm_xe_query_mem_region {
@@ -135,9 +145,20 @@ struct drm_xe_query_mem_usage {
 	} regions[];
 };
 
+/**
+ * struct drm_xe_query_config - describe the device configuration
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses
+ * struct drm_xe_query_config in .data.
+ */
 struct drm_xe_query_config {
+	/** @num_params: number of parameters returned in info */
 	__u32 num_params;
+
+	/** @pad: MBZ */
 	__u32 pad;
+
 #define XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
 #define XE_QUERY_CONFIG_FLAGS			1
 	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
@@ -148,11 +169,22 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
 #define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY	6
 #define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1)
+	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
 
+/**
+ * struct drm_xe_query_gts - describe GTs
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_GTS, then the reply uses struct
+ * drm_xe_query_gts in .data.
+ */
 struct drm_xe_query_gts {
+	/** @num_gt: number of GTs returned in gts */
 	__u32 num_gt;
+
+	/** @pad: MBZ */
 	__u32 pad;
 
 	/*
@@ -175,6 +207,13 @@ struct drm_xe_query_gts {
 	} gts[];
 };
 
+/**
+ * struct drm_xe_query_topology_mask - describe the topology mask of a GT
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, then the reply uses
+ * struct drm_xe_query_topology_mask in .data.
+ */
 struct drm_xe_query_topology_mask {
 	/** @gt_id: GT ID the mask is associated with */
 	__u16 gt_id;
@@ -192,6 +231,41 @@ struct drm_xe_query_topology_mask {
 	__u8 mask[];
 };
 
+/**
+ * struct drm_xe_device_query - main structure to query device information
+ *
+ * If size is set to 0, the driver fills it with the required size for the
+ * requested type of data to query. If size is equal to the required size,
+ * the queried information is copied into data.
+ *
+ * For example the following code snippet allows retrieving and printing
+ * information about the device engines with DRM_XE_DEVICE_QUERY_ENGINES:
+ *
+ * .. code-block:: C
+ *
+ *	struct drm_xe_engine_class_instance *hwe;
+ *	struct drm_xe_device_query query = {
+ *		.extensions = 0,
+ *		.query = DRM_XE_DEVICE_QUERY_ENGINES,
+ *		.size = 0,
+ *		.data = 0,
+ *	};
+ *	ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ *	hwe = malloc(query.size);
+ *	query.data = (uintptr_t)hwe;
+ *	ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ *	int num_engines = query.size / sizeof(*hwe);
+ *	for (int i = 0; i < num_engines; i++) {
+ *		printf("Engine %d: %s\n", i,
+ *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_RENDER ? "RENDER":
+ *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_COPY ? "COPY":
+ *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE":
+ *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE":
+ *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE":
+ *			"UNKNOWN");
+ *	}
+ *	free(hwe);
+ */
 struct drm_xe_device_query {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -526,6 +600,7 @@ struct drm_xe_engine_set_property {
 	__u64 reserved[2];
 };
 
+/** struct drm_xe_engine_class_instance - instance of an engine class */
 struct drm_xe_engine_class_instance {
 	__u16 engine_class;
 
-- 
GitLab


From 4f082f2c3a37d1b2fb90e048cc61616885b69648 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 22 Jun 2023 13:59:20 +0200
Subject: [PATCH 1663/2340] drm/xe: Move defines before relevant fields

Align on same rule in the whole file: defines then doc then relevant
field, with an empty line to separate fields.

v2:
  - Rebase on drm-xe-next
  - Fix ordering of defines and fields in uAPI (Lucas De Marchi)
v3: Remove useless empty lines (Lucas De Marchi)
v4: Move changelog to commit
v5: Rebase

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Link: https://lists.freedesktop.org/archives/intel-xe/2023-May/004704.html
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 73 ++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 445f7b7689dd8..be62b3a06db9d 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -60,6 +60,7 @@ struct xe_user_extension {
 	 * Pointer to the next struct xe_user_extension, or zero if the end.
 	 */
 	__u64 next_extension;
+
 	/**
 	 * @name: Name of the extension.
 	 *
@@ -70,6 +71,7 @@ struct xe_user_extension {
 	 * of uAPI which has embedded the struct xe_user_extension.
 	 */
 	__u32 name;
+
 	/**
 	 * @pad: MBZ
 	 *
@@ -218,11 +220,11 @@ struct drm_xe_query_topology_mask {
 	/** @gt_id: GT ID the mask is associated with */
 	__u16 gt_id;
 
-	/** @type: type of mask */
-	__u16 type;
 #define XE_TOPO_DSS_GEOMETRY	(1 << 0)
 #define XE_TOPO_DSS_COMPUTE	(1 << 1)
 #define XE_TOPO_EU_PER_DSS	(1 << 2)
+	/** @type: type of mask */
+	__u16 type;
 
 	/** @num_bytes: number of bytes in requested mask */
 	__u32 num_bytes;
@@ -270,15 +272,14 @@ struct drm_xe_device_query {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @query: The type of data to query */
-	__u32 query;
-
 #define DRM_XE_DEVICE_QUERY_ENGINES	0
 #define DRM_XE_DEVICE_QUERY_MEM_USAGE	1
 #define DRM_XE_DEVICE_QUERY_CONFIG	2
 #define DRM_XE_DEVICE_QUERY_GTS		3
 #define DRM_XE_DEVICE_QUERY_HWCONFIG	4
 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY	5
+	/** @query: The type of data to query */
+	__u32 query;
 
 	/** @size: Size of the queried data */
 	__u32 size;
@@ -301,12 +302,12 @@ struct drm_xe_gem_create {
 	 */
 	__u64 size;
 
+#define XE_GEM_CREATE_FLAG_DEFER_BACKING	(0x1 << 24)
+#define XE_GEM_CREATE_FLAG_SCANOUT		(0x1 << 25)
 	/**
 	 * @flags: Flags, currently a mask of memory instances of where BO can
 	 * be placed
 	 */
-#define XE_GEM_CREATE_FLAG_DEFER_BACKING	(0x1 << 24)
-#define XE_GEM_CREATE_FLAG_SCANOUT		(0x1 << 25)
 	__u32 flags;
 
 	/**
@@ -357,10 +358,13 @@ struct drm_xe_gem_mmap_offset {
 struct drm_xe_vm_bind_op_error_capture {
 	/** @error: errno that occured */
 	__s32 error;
+
 	/** @op: operation that encounter an error */
 	__u32 op;
+
 	/** @addr: address of bind op */
 	__u64 addr;
+
 	/** @size: size of bind */
 	__u64 size;
 };
@@ -370,8 +374,8 @@ struct drm_xe_ext_vm_set_property {
 	/** @base: base user extension */
 	struct xe_user_extension base;
 
-	/** @property: property to set */
 #define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
+	/** @property: property to set */
 	__u32 property;
 
 	/** @pad: MBZ */
@@ -385,17 +389,16 @@ struct drm_xe_ext_vm_set_property {
 };
 
 struct drm_xe_vm_create {
-	/** @extensions: Pointer to the first extension struct, if any */
 #define XE_VM_EXTENSION_SET_PROPERTY	0
+	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @flags: Flags */
-	__u32 flags;
-
 #define DRM_XE_VM_CREATE_SCRATCH_PAGE	(0x1 << 0)
 #define DRM_XE_VM_CREATE_COMPUTE_MODE	(0x1 << 1)
 #define DRM_XE_VM_CREATE_ASYNC_BIND_OPS	(0x1 << 2)
 #define DRM_XE_VM_CREATE_FAULT_MODE	(0x1 << 3)
+	/** @flags: Flags */
+	__u32 flags;
 
 	/** @vm_id: Returned VM ID */
 	__u32 vm_id;
@@ -430,6 +433,7 @@ struct drm_xe_vm_bind_op {
 		 * ignored for unbind
 		 */
 		__u64 obj_offset;
+
 		/** @userptr: user pointer to bind on */
 		__u64 userptr;
 	};
@@ -448,12 +452,6 @@ struct drm_xe_vm_bind_op {
 	 */
 	__u64 tile_mask;
 
-	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
-	__u32 op;
-
-	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
-	__u32 region;
-
 #define XE_VM_BIND_OP_MAP		0x0
 #define XE_VM_BIND_OP_UNMAP		0x1
 #define XE_VM_BIND_OP_MAP_USERPTR	0x2
@@ -500,6 +498,11 @@ struct drm_xe_vm_bind_op {
 	 * intended to implement VK sparse bindings.
 	 */
 #define XE_VM_BIND_FLAG_NULL		(0x1 << 19)
+	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
+	__u32 op;
+
+	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
+	__u32 region;
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
@@ -528,6 +531,7 @@ struct drm_xe_vm_bind {
 	union {
 		/** @bind: used if num_binds == 1 */
 		struct drm_xe_vm_bind_op bind;
+
 		/**
 		 * @vector_of_binds: userptr to array of struct
 		 * drm_xe_vm_bind_op if num_binds > 1
@@ -575,7 +579,6 @@ struct drm_xe_engine_set_property {
 	/** @engine_id: Engine ID */
 	__u32 engine_id;
 
-	/** @property: property to set */
 #define XE_ENGINE_SET_PROPERTY_PRIORITY			0
 #define XE_ENGINE_SET_PROPERTY_TIMESLICE		1
 #define XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
@@ -591,6 +594,7 @@ struct drm_xe_engine_set_property {
 #define XE_ENGINE_SET_PROPERTY_ACC_TRIGGER		6
 #define XE_ENGINE_SET_PROPERTY_ACC_NOTIFY		7
 #define XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY		8
+	/** @property: property to set */
 	__u32 property;
 
 	/** @value: property value */
@@ -602,8 +606,6 @@ struct drm_xe_engine_set_property {
 
 /** struct drm_xe_engine_class_instance - instance of an engine class */
 struct drm_xe_engine_class_instance {
-	__u16 engine_class;
-
 #define DRM_XE_ENGINE_CLASS_RENDER		0
 #define DRM_XE_ENGINE_CLASS_COPY		1
 #define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
@@ -614,14 +616,15 @@ struct drm_xe_engine_class_instance {
 	 * creating ordered queues of VM bind operations.
 	 */
 #define DRM_XE_ENGINE_CLASS_VM_BIND		5
+	__u16 engine_class;
 
 	__u16 engine_instance;
 	__u16 gt_id;
 };
 
 struct drm_xe_engine_create {
-	/** @extensions: Pointer to the first extension struct, if any */
 #define XE_ENGINE_EXTENSION_SET_PROPERTY               0
+	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
 	/** @width: submission width (number BB per exec) for this engine */
@@ -659,8 +662,8 @@ struct drm_xe_engine_get_property {
 	/** @engine_id: Engine ID */
 	__u32 engine_id;
 
-	/** @property: property to get */
 #define XE_ENGINE_GET_PROPERTY_BAN			0
+	/** @property: property to get */
 	__u32 property;
 
 	/** @value: property value */
@@ -685,19 +688,19 @@ struct drm_xe_sync {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	__u32 flags;
-
 #define DRM_XE_SYNC_SYNCOBJ		0x0
 #define DRM_XE_SYNC_TIMELINE_SYNCOBJ	0x1
 #define DRM_XE_SYNC_DMA_BUF		0x2
 #define DRM_XE_SYNC_USER_FENCE		0x3
 #define DRM_XE_SYNC_SIGNAL		0x10
+	__u32 flags;
 
 	/** @pad: MBZ */
 	__u32 pad;
 
 	union {
 		__u32 handle;
+
 		/**
 		 * @addr: Address of user fence. When sync passed in via exec
 		 * IOCTL this a GPU address in the VM. When sync passed in via
@@ -753,8 +756,6 @@ struct drm_xe_mmio {
 
 	__u32 addr;
 
-	__u32 flags;
-
 #define DRM_XE_MMIO_8BIT	0x0
 #define DRM_XE_MMIO_16BIT	0x1
 #define DRM_XE_MMIO_32BIT	0x2
@@ -762,6 +763,7 @@ struct drm_xe_mmio {
 #define DRM_XE_MMIO_BITS_MASK	0x3
 #define DRM_XE_MMIO_READ	0x4
 #define DRM_XE_MMIO_WRITE	0x8
+	__u32 flags;
 
 	__u64 value;
 
@@ -781,47 +783,57 @@ struct drm_xe_mmio {
 struct drm_xe_wait_user_fence {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
+
 	union {
 		/**
 		 * @addr: user pointer address to wait on, must qword aligned
 		 */
 		__u64 addr;
+
 		/**
 		 * @vm_id: The ID of the VM which encounter an error used with
 		 * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear.
 		 */
 		__u64 vm_id;
 	};
-	/** @op: wait operation (type of comparison) */
+
 #define DRM_XE_UFENCE_WAIT_EQ	0
 #define DRM_XE_UFENCE_WAIT_NEQ	1
 #define DRM_XE_UFENCE_WAIT_GT	2
 #define DRM_XE_UFENCE_WAIT_GTE	3
 #define DRM_XE_UFENCE_WAIT_LT	4
 #define DRM_XE_UFENCE_WAIT_LTE	5
+	/** @op: wait operation (type of comparison) */
 	__u16 op;
-	/** @flags: wait flags */
+
 #define DRM_XE_UFENCE_WAIT_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
 #define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
 #define DRM_XE_UFENCE_WAIT_VM_ERROR	(1 << 2)
+	/** @flags: wait flags */
 	__u16 flags;
+
 	/** @pad: MBZ */
 	__u32 pad;
+
 	/** @value: compare value */
 	__u64 value;
-	/** @mask: comparison mask */
+
 #define DRM_XE_UFENCE_WAIT_U8		0xffu
 #define DRM_XE_UFENCE_WAIT_U16		0xffffu
 #define DRM_XE_UFENCE_WAIT_U32		0xffffffffu
 #define DRM_XE_UFENCE_WAIT_U64		0xffffffffffffffffu
+	/** @mask: comparison mask */
 	__u64 mask;
+
 	/** @timeout: how long to wait before bailing, value in jiffies */
 	__s64 timeout;
+
 	/**
 	 * @num_engines: number of engine instances to wait on, must be zero
 	 * when DRM_XE_UFENCE_WAIT_SOFT_OP set
 	 */
 	__u64 num_engines;
+
 	/**
 	 * @instances: user pointer to array of drm_xe_engine_class_instance to
 	 * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set
@@ -882,7 +894,6 @@ struct drm_xe_vm_madvise {
 #define		DRM_XE_VMA_PRIORITY_HIGH	2	/* Must be elevated user */
 	/* Pin the VMA in memory, must be elevated user */
 #define DRM_XE_VM_MADVISE_PIN			6
-
 	/** @property: property to set */
 	__u32 property;
 
-- 
GitLab


From 1bc56a934f11cc9bb859116d30e828ccf2df54cf Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 22 Jun 2023 14:32:03 +0200
Subject: [PATCH 1664/2340] drm/xe: Document topology mask query

Provide information on the types of topology masks that can be
queried and add some examples.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index be62b3a06db9d..fef5e26aad2af 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -212,6 +212,9 @@ struct drm_xe_query_gts {
 /**
  * struct drm_xe_query_topology_mask - describe the topology mask of a GT
  *
+ * This is the hardware topology which reflects the internal physical
+ * structure of the GPU.
+ *
  * If a query is made with a struct drm_xe_device_query where .query
  * is equal to DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, then the reply uses
  * struct drm_xe_query_topology_mask in .data.
@@ -220,8 +223,29 @@ struct drm_xe_query_topology_mask {
 	/** @gt_id: GT ID the mask is associated with */
 	__u16 gt_id;
 
+	/*
+	 * To query the mask of Dual Sub Slices (DSS) available for geometry
+	 * operations. For example a query response containing the following
+	 * in mask:
+	 *   DSS_GEOMETRY    ff ff ff ff 00 00 00 00
+	 * means 32 DSS are available for geometry.
+	 */
 #define XE_TOPO_DSS_GEOMETRY	(1 << 0)
+	/*
+	 * To query the mask of Dual Sub Slices (DSS) available for compute
+	 * operations. For example a query response containing the following
+	 * in mask:
+	 *   DSS_COMPUTE    ff ff ff ff 00 00 00 00
+	 * means 32 DSS are available for compute.
+	 */
 #define XE_TOPO_DSS_COMPUTE	(1 << 1)
+	/*
+	 * To query the mask of Execution Units (EU) available per Dual Sub
+	 * Slices (DSS). For example a query response containing the following
+	 * in mask:
+	 *   EU_PER_DSS    ff ff 00 00 00 00 00 00
+	 * means each DSS has 16 EU.
+	 */
 #define XE_TOPO_EU_PER_DSS	(1 << 2)
 	/** @type: type of mask */
 	__u16 type;
-- 
GitLab


From 1105ac15d2a151bc87c3fe0e79f95c5cde90f1eb Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 31 Mar 2023 09:46:24 +0100
Subject: [PATCH 1665/2340] drm/xe/uapi: restrict system wide accounting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since this is considered an info leak (system wide accounting), rather
hide behind perfmon_capable().

v2:
  - Without perfmon_capable() it likely makes more sense to report as zero,
    instead of reporting as used == total size. This should give similar
    behaviour as i915 which rather tracks free instead of used.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index c4165fa3428e4..15e171ca7e628 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -128,7 +128,8 @@ static int query_memory_usage(struct xe_device *xe,
 	usage->regions[0].min_page_size = PAGE_SIZE;
 	usage->regions[0].max_page_size = PAGE_SIZE;
 	usage->regions[0].total_size = man->size << PAGE_SHIFT;
-	usage->regions[0].used = ttm_resource_manager_usage(man);
+	if (perfmon_capable())
+		usage->regions[0].used = ttm_resource_manager_usage(man);
 	usage->num_regions = 1;
 
 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
@@ -145,8 +146,13 @@ static int query_memory_usage(struct xe_device *xe,
 				SZ_1G;
 			usage->regions[usage->num_regions].total_size =
 				man->size;
-			usage->regions[usage->num_regions++].used =
-				ttm_resource_manager_usage(man);
+
+			if (perfmon_capable()) {
+				usage->regions[usage->num_regions].used =
+					ttm_resource_manager_usage(man);
+			}
+
+			usage->num_regions++;
 		}
 	}
 
-- 
GitLab


From a9c4a069fbc3a1e115fead47145bc0257a7b3509 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 31 Mar 2023 09:46:25 +0100
Subject: [PATCH 1666/2340] drm/xe/uapi: add some kernel-doc for region query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we need to extend this, we should also take the time to add some
basic kernel-doc here for the existing bits. Note that this is all still
subject to change when upstreaming.

Also convert XE_MEM_REGION_CLASS_* into an enum, so we can more easily
create links to it from other parts of the uapi.

Suggested-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 86 ++++++++++++++++++++++++++++++++-------
 1 file changed, 71 insertions(+), 15 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index fef5e26aad2af..0808b21de29a9 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -118,8 +118,71 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
 
-#define XE_MEM_REGION_CLASS_SYSMEM	0
-#define XE_MEM_REGION_CLASS_VRAM	1
+/**
+ * enum drm_xe_memory_class - Supported memory classes.
+ */
+enum drm_xe_memory_class {
+	/** @XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */
+	XE_MEM_REGION_CLASS_SYSMEM = 0,
+	/**
+	 * @XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this
+	 * represents the memory that is local to the device, which we
+	 * call VRAM. Not valid on integrated platforms.
+	 */
+	XE_MEM_REGION_CLASS_VRAM
+};
+
+/**
+ * struct drm_xe_query_mem_region - Describes some region as known to
+ * the driver.
+ */
+struct drm_xe_query_mem_region {
+	/**
+	 * @mem_class: The memory class describing this region.
+	 *
+	 * See enum drm_xe_memory_class for supported values.
+	 */
+	__u16 mem_class;
+	/**
+	 * @instance: The instance for this region.
+	 *
+	 * The @mem_class and @instance taken together will always give
+	 * a unique pair.
+	 */
+	__u16 instance;
+	/** @pad: MBZ */
+	__u32 pad;
+	/**
+	 * @min_page_size: Min page-size in bytes for this region.
+	 *
+	 * When the kernel allocates memory for this region, the
+	 * underlying pages will be at least @min_page_size in size.
+	 *
+	 * Important note: When userspace allocates a GTT address which
+	 * can point to memory allocated from this region, it must also
+	 * respect this minimum alignment. This is enforced by the
+	 * kernel.
+	 */
+	__u32 min_page_size;
+	/**
+	 * @max_page_size: Max page-size in bytes for this region.
+	 */
+	__u32 max_page_size;
+	/**
+	 * @total_size: The usable size in bytes for this region.
+	 */
+	__u64 total_size;
+	/**
+	 * @used: Estimate of the memory used in bytes for this region.
+	 *
+	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
+	 * accounting.  Without this the value here will always equal
+	 * zero.
+	 */
+	__u64 used;
+	/** @reserved: MBZ */
+	__u64 reserved[8];
+};
 
 /**
  * struct drm_xe_query_mem_usage - describe memory regions and usage
@@ -129,22 +192,12 @@ struct xe_user_extension {
  * struct drm_xe_query_mem_usage in .data.
  */
 struct drm_xe_query_mem_usage {
-	/** @num_params: number of memory regions returned in regions */
+	/** @num_regions: number of memory regions returned in @regions */
 	__u32 num_regions;
-
 	/** @pad: MBZ */
 	__u32 pad;
-
-	struct drm_xe_query_mem_region {
-		__u16 mem_class;
-		__u16 instance;	/* unique ID even among different classes */
-		__u32 pad;
-		__u32 min_page_size;
-		__u32 max_page_size;
-		__u64 total_size;
-		__u64 used;
-		__u64 reserved[8];
-	} regions[];
+	/** @regions: The returned regions for this device */
+	struct drm_xe_query_mem_region regions[];
 };
 
 /**
@@ -888,6 +941,9 @@ struct drm_xe_vm_madvise {
 	 * Setting the preferred location will trigger a migrate of the VMA
 	 * backing store to new location if the backing store is already
 	 * allocated.
+	 *
+	 * For DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS usage, see enum
+	 * drm_xe_memory_class.
 	 */
 #define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS	0
 #define DRM_XE_VM_MADVISE_PREFERRED_GT		1
-- 
GitLab


From 63f9c3cd36cad69d4422d86b2f86675f93df521a Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 26 Jun 2023 09:25:07 +0100
Subject: [PATCH 1667/2340] drm/xe/uapi: silence kernel-doc errors

./include/uapi/drm/xe_drm.h:263: warning: Function parameter or member
'gts' not described in 'drm_xe_query_gts'

./include/uapi/drm/xe_drm.h:854: WARNING: Inline emphasis start-string
without end-string.

With the idea to also include the uapi file in the pre-merge CI hooks
when building the kernel-doc, so first make sure it's clean:

https://gitlab.freedesktop.org/drm/xe/ci/-/merge_requests/16

v2: (Francois)
  - It makes more sense to just fix the kernel-doc for 'gts'

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 0808b21de29a9..8e7be15513332 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -242,11 +242,13 @@ struct drm_xe_query_gts {
 	/** @pad: MBZ */
 	__u32 pad;
 
-	/*
+	/**
+	 * @gts: The GTs returned for this device
+	 *
+	 * TODO: convert drm_xe_query_gt to proper kernel-doc.
 	 * TODO: Perhaps info about every mem region relative to this GT? e.g.
 	 * bandwidth between this GT and remote region?
 	 */
-
 	struct drm_xe_query_gt {
 #define XE_QUERY_GT_TYPE_MAIN		0
 #define XE_QUERY_GT_TYPE_REMOTE		1
@@ -852,8 +854,9 @@ struct drm_xe_mmio {
  * struct drm_xe_wait_user_fence - wait user fence
  *
  * Wait on user fence, XE will wakeup on every HW engine interrupt in the
- * instances list and check if user fence is complete:
- * (*addr & MASK) OP (VALUE & MASK)
+ * instances list and check if user fence is complete::
+ *
+ *	(*addr & MASK) OP (VALUE & MASK)
  *
  * Returns to user on user fence completion or timeout.
  */
-- 
GitLab


From 83ee6699b5964ff3b904a7064a61b453236296d6 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 26 Jun 2023 09:25:08 +0100
Subject: [PATCH 1668/2340] drm/doc: include xe_drm.h

Make sure the uapi gets picked up by the normal docs build.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 Documentation/gpu/driver-uapi.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst
index c08bcbb95fb30..e5070a0e95ab9 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI
     :doc: Overview
 
 .. kernel-doc:: include/uapi/drm/nouveau_drm.h
+
+drm/xe uAPI
+===========
+
+.. kernel-doc:: include/uapi/drm/xe_drm.h
-- 
GitLab


From 513e82627931d0ac6b74b9c2595008b3573a5158 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 19 Jun 2023 12:00:20 +0100
Subject: [PATCH 1669/2340] drm/xe/bo: consider bo->flags in xe_bo_migrate()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For VRAM allocations the bo->flags can control some characteristics of
the underlying memory, like whether it needs to be contiguous, and in
the future whether it needs to be in the CPU visible portion. Rather use
add_vram() in xe_bo_migrate() which should take care of such things for
us.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index ad0ea3014bfee..f54fb7bd184a5 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1852,6 +1852,7 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
  */
 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
 {
+	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
 		.no_wait_gpu = false,
@@ -1876,6 +1877,18 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
 	placement.placement = &requested;
 	placement.busy_placement = &requested;
 
+	/*
+	 * Stolen needs to be handled like below VRAM handling if we ever need
+	 * to support it.
+	 */
+	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
+
+	if (mem_type_is_vram(mem_type)) {
+		u32 c = 0;
+
+		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
+	}
+
 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
 }
 
-- 
GitLab


From 1c2097bbde107effe2183891f92c060aa64bfa8b Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Fri, 23 Jun 2023 10:54:30 +0530
Subject: [PATCH 1670/2340] drm/xe: add a new sysfs directory for gtidle
 properties

1) Add a new sysfs directory under devices/gt#/ called gtidle
   to contain idle properties of GT such as name, idle_status,
   idle_residency_ms

2) Remove forcewake calls for residency counter

v2:
    - abstract using function pointers (Anshuman)
    - remove forcewake calls for residency counter
    - use device_attr (Badal)
    - move rc functions to guc_pc
    - change name to gt_idle (Rodrigo)

v3:
    - return error for drmm_add_action_or_reset
    - replace file and functions with gt_idle prefix
      to gt_idle_sysfs (Himal)
    - use enum for gt idle state
    - move multiplier to gt idle and initialize (Anshuman)
    - correct doc annotation (Rodrigo)
    - remove return variable
    - use kobj_gt instead of new gtidle kobj
    - move residency_ms to gtidle file
    - retain xe_guc_pc prefix for functions in guc_rc file (Michal)

v4:
    - fix doc errors in xe_guc_pc file
    - change u64 to u32 for reading residency counter
    - keep gtidle states generic GT_IDLE_C[0/6] (Anshuman)

v5:
    - update commit message to include removal of
      forcewake calls (Anshuman)
    - return void from sysfs initialization function and add warnings
      (Andi)

v6:
    - remove extra lines (Anshuman)

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                 |   1 +
 drivers/gpu/drm/xe/xe_gt.c                  |   3 +
 drivers/gpu/drm/xe/xe_gt_idle_sysfs.c       | 162 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_idle_sysfs.h       |  13 ++
 drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h |  38 +++++
 drivers/gpu/drm/xe/xe_gt_types.h            |   4 +
 drivers/gpu/drm/xe/xe_guc_pc.c              |  45 ++----
 drivers/gpu/drm/xe/xe_guc_pc.h              |   2 +
 8 files changed, 239 insertions(+), 29 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 73100c246a740..8d6d3c070fc85 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -62,6 +62,7 @@ xe-y += xe_bb.o \
 	xe_gt.o \
 	xe_gt_clock.o \
 	xe_gt_debugfs.o \
+	xe_gt_idle_sysfs.o \
 	xe_gt_mcr.o \
 	xe_gt_pagefault.o \
 	xe_gt_sysfs.o \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 2458397ce8af7..bc76678a8276c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -18,6 +18,7 @@
 #include "xe_force_wake.h"
 #include "xe_ggtt.h"
 #include "xe_gt_clock.h"
+#include "xe_gt_idle_sysfs.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_printk.h"
@@ -306,6 +307,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	xe_gt_idle_sysfs_init(&gt->gtidle);
+
 	/* XXX: Fake that we pull the engine mask from hwconfig blob */
 	gt->info.engine_mask = gt->info.__engine_mask;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
new file mode 100644
index 0000000000000..ec77349dea764
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_idle_sysfs.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_pc.h"
+
+/**
+ * DOC: Xe GT Idle
+ *
+ * Provides sysfs entries for idle properties of GT
+ *
+ * device/gt#/gtidle/name - name of the state
+ * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms
+ * device/gt#/gtidle/idle_status - Provides current idle state
+ */
+
+static struct xe_gt_idle *dev_to_gtidle(struct device *dev)
+{
+	struct kobject *kobj = &dev->kobj;
+
+	return &kobj_to_gt(kobj->parent)->gtidle;
+}
+
+static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle)
+{
+	return container_of(gtidle, struct xe_gt, gtidle);
+}
+
+static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle)
+{
+	return &gtidle_to_gt(gtidle)->uc.guc.pc;
+}
+
+static const char *gt_idle_state_to_string(enum xe_gt_idle_state state)
+{
+	switch (state) {
+	case GT_IDLE_C0:
+		return "gt-c0";
+	case GT_IDLE_C6:
+		return "gt-c6";
+	default:
+		return "unknown";
+	}
+}
+
+static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
+{
+	u64 delta, overflow_residency, prev_residency;
+
+	overflow_residency = BIT_ULL(32);
+
+	/*
+	 * Counter wrap handling
+	 * Store previous hw counter values for counter wrap-around handling
+	 * Relying on sufficient frequency of queries otherwise counters can still wrap.
+	 */
+	prev_residency = gtidle->prev_residency;
+	gtidle->prev_residency = cur_residency;
+
+	/* delta */
+	if (cur_residency >= prev_residency)
+		delta = cur_residency - prev_residency;
+	else
+		delta = cur_residency + (overflow_residency - prev_residency);
+
+	/* Add delta to extended raw driver copy of idle residency */
+	cur_residency = gtidle->cur_residency + delta;
+	gtidle->cur_residency = cur_residency;
+
+	/* residency multiplier in ns, convert to ms */
+	cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6);
+
+	return cur_residency;
+}
+
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+
+	return sysfs_emit(buff, gtidle->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t idle_status_show(struct device *dev,
+				struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	enum xe_gt_idle_state state;
+
+	state = gtidle->idle_status(pc);
+
+	return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
+}
+static DEVICE_ATTR_RO(idle_status);
+
+static ssize_t idle_residency_ms_show(struct device *dev,
+				      struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	u64 residency;
+
+	residency = gtidle->idle_residency(pc);
+	return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
+}
+static DEVICE_ATTR_RO(idle_residency_ms);
+
+static const struct attribute *gt_idle_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_idle_status.attr,
+	&dev_attr_idle_residency_ms.attr,
+	NULL,
+};
+
+static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, gt_idle_attrs);
+	kobject_put(kobj);
+}
+
+void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
+{
+	struct xe_gt *gt = gtidle_to_gt(gtidle);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct kobject *kobj;
+	int err;
+
+	kobj = kobject_create_and_add("gtidle", gt->sysfs);
+	if (!kobj) {
+		drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
+		return;
+	}
+
+	sprintf(gtidle->name, "gt%d-rc\n", gt->info.id);
+	/* Multiplier for  RC6 Residency counter in units of 1.28us */
+	gtidle->residency_multiplier = 1280;
+	gtidle->idle_residency = xe_guc_pc_rc6_residency;
+	gtidle->idle_status = xe_guc_pc_rc_status;
+
+	err = sysfs_create_files(kobj, gt_idle_attrs);
+	if (err) {
+		kobject_put(kobj);
+		drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h
new file mode 100644
index 0000000000000..b0973f96c7abe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_IDLE_SYSFS_H_
+#define _XE_GT_IDLE_SYSFS_H_
+
+#include "xe_gt_idle_sysfs_types.h"
+
+void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+
+#endif /* _XE_GT_IDLE_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h
new file mode 100644
index 0000000000000..f99b447534f35
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_
+#define _XE_GT_IDLE_SYSFS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_guc_pc;
+
+/* States of GT Idle */
+enum xe_gt_idle_state {
+	GT_IDLE_C0,
+	GT_IDLE_C6,
+	GT_IDLE_UNKNOWN,
+};
+
+/**
+ * struct xe_gt_idle - A struct that contains idle properties based of gt
+ */
+struct xe_gt_idle {
+	/** @name: name */
+	char name[16];
+	/** @residency_multiplier: residency multiplier in ns */
+	u32 residency_multiplier;
+	/** @cur_residency: raw driver copy of idle residency */
+	u64 cur_residency;
+	/** @prev_residency: previous residency counter */
+	u64 prev_residency;
+	/** @idle_status: get the current idle state */
+	enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc);
+	/** @idle_residency: get idle residency counter */
+	u64 (*idle_residency)(struct xe_guc_pc *pc);
+};
+
+#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 99ab7ec99ccd5..7d4de019f9a5e 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -7,6 +7,7 @@
 #define _XE_GT_TYPES_H_
 
 #include "xe_force_wake_types.h"
+#include "xe_gt_idle_sysfs_types.h"
 #include "xe_hw_engine_types.h"
 #include "xe_hw_fence_types.h"
 #include "xe_reg_sr_types.h"
@@ -260,6 +261,9 @@ struct xe_gt {
 	/** @uc: micro controllers on the GT */
 	struct xe_uc uc;
 
+	/** @gtidle: idle properties of GT */
+	struct xe_gt_idle gtidle;
+
 	/** @engine_ops: submission backend engine operations */
 	const struct xe_engine_ops *engine_ops;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 5d5cf4b0d508a..f02bf16413808 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -76,12 +76,7 @@
  *
  * Render-C states is also a GuC PC feature that is now enabled in Xe for
  * all platforms.
- * Xe's GuC PC provides a sysfs API for Render-C States:
  *
- * device/gt#/rc* *read-only* files:
- * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6)
- * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec.
- *                  Prone to overflows.
  */
 
 static struct xe_guc *
@@ -572,10 +567,12 @@ out:
 }
 static DEVICE_ATTR_RW(freq_max);
 
-static ssize_t rc_status_show(struct device *dev,
-			      struct device_attribute *attr, char *buff)
+/**
+ * xe_guc_pc_rc_status - get the current Render C state
+ * @pc: XE_GuC_PC instance
+ */
+enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
 	struct xe_gt *gt = pc_to_gt(pc);
 	u32 reg;
 
@@ -585,37 +582,29 @@ static ssize_t rc_status_show(struct device *dev,
 
 	switch (REG_FIELD_GET(RCN_MASK, reg)) {
 	case GT_RC6:
-		return sysfs_emit(buff, "rc6\n");
+		return GT_IDLE_C6;
 	case GT_RC0:
-		return sysfs_emit(buff, "rc0\n");
+		return GT_IDLE_C0;
 	default:
-		return -ENOENT;
+		return GT_IDLE_UNKNOWN;
 	}
 }
-static DEVICE_ATTR_RO(rc_status);
 
-static ssize_t rc6_residency_show(struct device *dev,
-				  struct device_attribute *attr, char *buff)
+/**
+ * xe_guc_pc_rc6_residency - rc6 residency counter
+ * @pc: Xe_GuC_PC instance
+ */
+u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
 	struct xe_gt *gt = pc_to_gt(pc);
 	u32 reg;
-	ssize_t ret;
-
-	xe_device_mem_access_get(pc_to_xe(pc));
-	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	if (ret)
-		goto out;
 
+	xe_device_mem_access_get(gt_to_xe(gt));
 	reg = xe_mmio_read32(gt, GT_GFX_RC6);
-	ret = sysfs_emit(buff, "%u\n", reg);
+	xe_device_mem_access_put(gt_to_xe(gt));
 
-	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
-	xe_device_mem_access_put(pc_to_xe(pc));
-	return ret;
+	return reg;
 }
-static DEVICE_ATTR_RO(rc6_residency);
 
 static const struct attribute *pc_attrs[] = {
 	&dev_attr_freq_act.attr,
@@ -625,8 +614,6 @@ static const struct attribute *pc_attrs[] = {
 	&dev_attr_freq_rpn.attr,
 	&dev_attr_freq_min.attr,
 	&dev_attr_freq_max.attr,
-	&dev_attr_rc_status.attr,
-	&dev_attr_rc6_residency.attr,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index da29e49348688..976179dbc9a86 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -12,4 +12,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
 
+enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc);
+u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
 #endif /* _XE_GUC_PC_H_ */
-- 
GitLab


From 7b076d14f21a48de572e5191614b3e6b2d6ab823 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Fri, 23 Jun 2023 10:54:31 +0530
Subject: [PATCH 1671/2340] drm/xe/mtl: Add support to get C6 residency/status
 of MTL

Add the registers to get C6 residency of MTL SAMedia and
C6 status of MTL gts

v2:
   - move register definitions to regs header (Anshuman)
   - correct reg definition for mtl rc status
   - make idle_status function common (Badal)

v3:
   - remove extra line in commit message
   - use only media type check in initialization
   - use graphics ver check (Anshuman)

v4:
   - remove extra lines (Anshuman)

Bspec: 66300
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h  | 11 +++++--
 drivers/gpu/drm/xe/xe_gt_idle_sysfs.c | 14 ++++++---
 drivers/gpu/drm/xe/xe_guc_pc.c        | 41 ++++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_guc_pc.h        |  3 +-
 4 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d2a0a5c8b02ae..55b0f70e19046 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -16,6 +16,11 @@
 #define MEDIA_GT_GSI_OFFSET				0x380000
 #define MEDIA_GT_GSI_LENGTH				0x40000
 
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1				XE_REG(0xc60)
+#define   MTL_CAGF_MASK					REG_GENMASK(8, 0)
+#define   MTL_CC_MASK					REG_GENMASK(12, 9)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0					XE_REG(0xd00)
 #define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK		REG_GENMASK(5, 3)
@@ -349,10 +354,12 @@
 #define   FORCEWAKE_USER			BIT(1)
 #define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 
+#define MTL_MEDIA_MC6				XE_REG(0x138048)
+
 #define GT_CORE_STATUS				XE_REG(0x138060)
 #define   RCN_MASK				REG_GENMASK(2, 0)
-#define   GT_RC0				0
-#define   GT_RC6				3
+#define   GT_C0					0
+#define   GT_C6					3
 
 #define GT_GFX_RC6_LOCKED			XE_REG(0x138104)
 #define GT_GFX_RC6				XE_REG(0x138108)
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
index ec77349dea764..7238e96a116cf 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
@@ -142,11 +142,17 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 		return;
 	}
 
-	sprintf(gtidle->name, "gt%d-rc\n", gt->info.id);
-	/* Multiplier for  RC6 Residency counter in units of 1.28us */
+	if (xe_gt_is_media_type(gt)) {
+		sprintf(gtidle->name, "gt%d-mc\n", gt->info.id);
+		gtidle->idle_residency = xe_guc_pc_mc6_residency;
+	} else {
+		sprintf(gtidle->name, "gt%d-rc\n", gt->info.id);
+		gtidle->idle_residency = xe_guc_pc_rc6_residency;
+	}
+
+	/* Multiplier for Residency counter in units of 1.28us */
 	gtidle->residency_multiplier = 1280;
-	gtidle->idle_residency = xe_guc_pc_rc6_residency;
-	gtidle->idle_status = xe_guc_pc_rc_status;
+	gtidle->idle_status = xe_guc_pc_c_status;
 
 	err = sysfs_create_files(kobj, gt_idle_attrs);
 	if (err) {
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index f02bf16413808..3093cfeff0c2c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -34,9 +34,6 @@
 #define GT_PERF_STATUS		XE_REG(0x1381b4)
 #define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
 
-#define MTL_MIRROR_TARGET_WP1	XE_REG(0xc60)
-#define   MTL_CAGF_MASK		REG_GENMASK(8, 0)
-
 #define GT_FREQUENCY_MULTIPLIER	50
 #define GEN9_FREQ_SCALER	3
 
@@ -568,22 +565,30 @@ out:
 static DEVICE_ATTR_RW(freq_max);
 
 /**
- * xe_guc_pc_rc_status - get the current Render C state
+ * xe_guc_pc_c_status - get the current GT C state
  * @pc: XE_GuC_PC instance
  */
-enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc)
+enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
-	u32 reg;
+	u32 reg, gt_c_state;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
-	reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+		gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
+	} else {
+		reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+		gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
+	}
+
 	xe_device_mem_access_put(gt_to_xe(gt));
 
-	switch (REG_FIELD_GET(RCN_MASK, reg)) {
-	case GT_RC6:
+	switch (gt_c_state) {
+	case GT_C6:
 		return GT_IDLE_C6;
-	case GT_RC0:
+	case GT_C0:
 		return GT_IDLE_C0;
 	default:
 		return GT_IDLE_UNKNOWN;
@@ -606,6 +611,22 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
 	return reg;
 }
 
+/**
+ * xe_guc_pc_mc6_residency - mc6 residency counter
+ * @pc: Xe_GuC_PC instance
+ */
+u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u64 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return reg;
+}
+
 static const struct attribute *pc_attrs[] = {
 	&dev_attr_freq_act.attr,
 	&dev_attr_freq_cur.attr,
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 976179dbc9a86..370353a40a175 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -12,6 +12,7 @@ int xe_guc_pc_init(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
 
-enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc);
+enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
+u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
 #endif /* _XE_GUC_PC_H_ */
-- 
GitLab


From bc2e0215deeaa88dec44ff07e3a2b19283d53cdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 26 Jun 2023 20:17:38 +0200
Subject: [PATCH 1672/2340] drm/xe/bo: Fix swapin when moving to VRAM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a source system resource had been swapped out, we incorrectly
assumed that we were lacking source data for a move and therefore
cleared the destination instead of swapping in and copying the
swapped-out data. Fix this.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index f54fb7bd184a5..bdeef3bf40fce 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -568,6 +568,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	struct xe_tile *tile = NULL;
 	struct dma_fence *fence;
 	bool move_lacks_source;
+	bool tt_has_data;
 	bool needs_clear;
 	int ret = 0;
 
@@ -590,8 +591,10 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		goto out;
 	}
 
-	move_lacks_source = !resource_is_vram(old_mem) &&
-		(!ttm || !ttm_tt_is_populated(ttm));
+	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
+			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
+
+	move_lacks_source = !resource_is_vram(old_mem) && !tt_has_data;
 
 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 		(!ttm && ttm_bo->type == ttm_bo_type_device);
-- 
GitLab


From 3439cc46619a3f31780cbd4f820384f9586d5ee1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 26 Jun 2023 20:17:39 +0200
Subject: [PATCH 1673/2340] drm/xe/bo: Avoid creating a system resource when
 allocating a fresh VRAM bo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When creating a new bo, on the first move the bo->resource is typically
NULL. Our move callback rejected that instructing TTM to create a system
resource. In addition a struct ttm_tt with a page-vector was created,
although not populated with pages. Similarly when the clearing of VRAM
was complete, the system resource was put on a ghost object and freed
using the TTM delayed destroy mechanism.

This is a lot of pointless work. So avoid creating the system resource and
instead change the code to cope with a NULL bo->resource.

v2:
- Add some code comments (Matthew Brost)
v3:
- Fix a dereference of old_mem which might be NULL.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-3-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 48 +++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index bdeef3bf40fce..8920405b0182d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -479,7 +479,6 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
  * to unconditionally call unmap_attachment() when moving out to system.
  */
 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
-			     struct ttm_resource *old_res,
 			     struct ttm_resource *new_res)
 {
 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
@@ -564,6 +563,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 	struct ttm_resource *old_mem = ttm_bo->resource;
+	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 	struct ttm_tt *ttm = ttm_bo->ttm;
 	struct xe_tile *tile = NULL;
 	struct dma_fence *fence;
@@ -572,35 +572,29 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	bool needs_clear;
 	int ret = 0;
 
-	if (!old_mem) {
-		if (new_mem->mem_type != TTM_PL_SYSTEM) {
-			hop->mem_type = TTM_PL_SYSTEM;
-			hop->flags = TTM_PL_FLAG_TEMPORARY;
-			ret = -EMULTIHOP;
-			goto out;
-		}
-
+	/* Bo creation path, moving to system or TT. No clearing required. */
+	if (!old_mem && ttm) {
 		ttm_bo_move_null(ttm_bo, new_mem);
-		goto out;
+		return 0;
 	}
 
 	if (ttm_bo->type == ttm_bo_type_sg) {
 		ret = xe_bo_move_notify(bo, ctx);
 		if (!ret)
-			ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem);
+			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
 		goto out;
 	}
 
 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 
-	move_lacks_source = !resource_is_vram(old_mem) && !tt_has_data;
+	move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
 
 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 		(!ttm && ttm_bo->type == ttm_bo_type_device);
 
 	if ((move_lacks_source && !needs_clear) ||
-	    (old_mem->mem_type == XE_PL_SYSTEM &&
+	    (old_mem_type == XE_PL_SYSTEM &&
 	     new_mem->mem_type == XE_PL_TT)) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		goto out;
@@ -610,7 +604,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	 * Failed multi-hop where the old_mem is still marked as
 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
 	 */
-	if (old_mem->mem_type == XE_PL_TT &&
+	if (old_mem_type == XE_PL_TT &&
 	    new_mem->mem_type == XE_PL_TT) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		goto out;
@@ -622,7 +616,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 			goto out;
 	}
 
-	if (old_mem->mem_type == XE_PL_TT &&
+	if (old_mem_type == XE_PL_TT &&
 	    new_mem->mem_type == XE_PL_SYSTEM) {
 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
 						     DMA_RESV_USAGE_BOOKKEEP,
@@ -637,8 +631,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	}
 
 	if (!move_lacks_source &&
-	    ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
-	     (resource_is_vram(old_mem) &&
+	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
+	     (mem_type_is_vram(old_mem_type) &&
 	      new_mem->mem_type == XE_PL_SYSTEM))) {
 		hop->fpfn = 0;
 		hop->lpfn = 0;
@@ -652,8 +646,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		tile = bo->tile;
 	else if (resource_is_vram(new_mem))
 		tile = mem_type_to_tile(xe, new_mem->mem_type);
-	else if (resource_is_vram(old_mem))
-		tile = mem_type_to_tile(xe, old_mem->mem_type);
+	else if (mem_type_is_vram(old_mem_type))
+		tile = mem_type_to_tile(xe, old_mem_type);
 
 	XE_BUG_ON(!tile);
 	XE_BUG_ON(!tile->migrate);
@@ -703,8 +697,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 			xe_device_mem_access_put(xe);
 			goto out;
 		}
-		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
-						new_mem);
+		if (!move_lacks_source) {
+			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
+							true, new_mem);
+		} else {
+			/*
+			 * ttm_bo_move_accel_cleanup() may blow up if
+			 * bo->resource == NULL, so just attach the
+			 * fence and set the new resource.
+			 */
+			dma_resv_add_fence(ttm_bo->base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			ttm_bo_move_null(ttm_bo, new_mem);
+		}
+
 		dma_fence_put(fence);
 	}
 
-- 
GitLab


From 70ff6a999d7cae52b6b418c3110b6245dde9271c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 26 Jun 2023 20:17:40 +0200
Subject: [PATCH 1674/2340] drm/xe/bo: Gracefully handle errors from
 ttm_bo_move_accel_cleanup().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function ttm_bo_move_accel_cleanup() attempts to help pipeline a
move, and in doing so, needs memory allocations which may fail.

Rather than failing in a state where the new resource may freed while
accessed by the copy engine, sync uninterruptible and do a failsafe
cleanup.

v2:
- Don't try to attach the signaled fence on ttm_bo_move_accel_cleanup()
  error.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-4-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8920405b0182d..1f4d1790d57cd 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -700,6 +700,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		if (!move_lacks_source) {
 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
 							true, new_mem);
+			if (ret) {
+				dma_fence_wait(fence, false);
+				ttm_bo_move_null(ttm_bo, new_mem);
+				ret = 0;
+			}
 		} else {
 			/*
 			 * ttm_bo_move_accel_cleanup() may blow up if
-- 
GitLab


From a201c6ee37d63e7c0a2973fb7790e94211b7fa83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 26 Jun 2023 20:17:41 +0200
Subject: [PATCH 1675/2340] drm/xe/bo: Evict VRAM to TT rather than to system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The main difference is that we don't bounce and sync on eviction, allowing
for pipelined eviction. Moving forward we also need to be careful with
dma mappings which can be released in SYSTEM but may remain in TT.

v2:
- Remove a stale comment (Matthew Brost)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-5-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_dma_buf.c |  5 ++---
 drivers/gpu/drm/xe/xe_bo.c            | 17 ++++++++++++++++-
 drivers/gpu/drm/xe/xe_dma_buf.c       | 11 ++++-------
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 513a3b3362e9f..810a035bf7200 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -72,7 +72,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
 	}
 
 	/* Verify that also importer has been evicted to SYSTEM */
-	if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
+	if (exported != imported && !xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
 		KUNIT_FAIL(test, "Importer wasn't properly evicted.\n");
 		return;
 	}
@@ -91,8 +91,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
 	 * possible, saving a migration step as the transfer is just
 	 * likely as fast from system memory.
 	 */
-	if (params->force_different_devices &&
-	    params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+	if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
 		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
 	else
 		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 1f4d1790d57cd..17c0c6c2ae65a 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -40,6 +40,20 @@ static struct ttm_placement sys_placement = {
 	.busy_placement = &sys_placement_flags,
 };
 
+static const struct ttm_place tt_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_TT,
+	.flags = 0,
+};
+
+static struct ttm_placement tt_placement = {
+	.num_placement = 1,
+	.placement = &tt_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
 bool mem_type_is_vram(u32 mem_type)
 {
 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
@@ -225,9 +239,10 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1:
 	case XE_PL_STOLEN:
+		*placement = tt_placement;
+		break;
 	case XE_PL_TT:
 	default:
-		/* for now kick out to system */
 		*placement = sys_placement;
 		break;
 	}
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 975dee1f770ff..b9bf4b4dd8a5e 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -81,13 +81,10 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
 		return ERR_PTR(-EOPNOTSUPP);
 
 	if (!xe_bo_is_pinned(bo)) {
-		if (!attach->peer2peer ||
-		    bo->ttm.resource->mem_type == XE_PL_SYSTEM) {
-			if (xe_bo_can_migrate(bo, XE_PL_TT))
-				r = xe_bo_migrate(bo, XE_PL_TT);
-			else
-				r = xe_bo_validate(bo, NULL, false);
-		}
+		if (!attach->peer2peer)
+			r = xe_bo_migrate(bo, XE_PL_TT);
+		else
+			r = xe_bo_validate(bo, NULL, false);
 		if (r)
 			return ERR_PTR(r);
 	}
-- 
GitLab


From 5835dc7fa6e419627e23015c7dbde120a77ce738 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 25 May 2023 09:41:44 +0200
Subject: [PATCH 1676/2340] drm/xe: Fix vm refcount races
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a race in xe_vm_lookup() where the vm could disappear after
the lookup mutex unlock but before the get. The xe_vm_get() call
must be inside the lookup mutex.

Also fix a vm close race where multiple callers could potentially
succeed in calling xe_vm_close_and_put().

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Link: https://lists.freedesktop.org/archives/intel-xe/2023-May/004704.html
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230525074144.178961-1-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 06ebc1cfc4f71..bd143acbde0e1 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1533,10 +1533,9 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 
 	mutex_lock(&xef->vm.lock);
 	vm = xa_load(&xef->vm.xa, id);
-	mutex_unlock(&xef->vm.lock);
-
 	if (vm)
 		xe_vm_get(vm);
+	mutex_unlock(&xef->vm.lock);
 
 	return vm;
 }
@@ -2011,27 +2010,26 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
 	struct xe_file *xef = to_xe_file(file);
 	struct drm_xe_vm_destroy *args = data;
 	struct xe_vm *vm;
+	int err = 0;
 
 	if (XE_IOCTL_ERR(xe, args->pad) ||
 	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	vm = xe_vm_lookup(xef, args->vm_id);
-	if (XE_IOCTL_ERR(xe, !vm))
-		return -ENOENT;
-	xe_vm_put(vm);
-
-	/* FIXME: Extend this check to non-compute mode VMs */
-	if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
-		return -EBUSY;
-
 	mutex_lock(&xef->vm.lock);
-	xa_erase(&xef->vm.xa, args->vm_id);
+	vm = xa_load(&xef->vm.xa, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm))
+		err = -ENOENT;
+	else if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
+		err = -EBUSY;
+	else
+		xa_erase(&xef->vm.xa, args->vm_id);
 	mutex_unlock(&xef->vm.lock);
 
-	xe_vm_close_and_put(vm);
+	if (!err)
+		xe_vm_close_and_put(vm);
 
-	return 0;
+	return err;
 }
 
 static const u32 region_to_mem_type[] = {
-- 
GitLab


From c8a740775dfff4467c9dd9f1cad22d8bdc7cccfa Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Wed, 24 May 2023 14:36:53 +0530
Subject: [PATCH 1677/2340] drm/xe/pm: Disable PM on unbounded pcie parent
 bridge

Intel Discrete GFX cards gfx may have multiple PCIe endpoints,
they connects to root port via pcie upstream switch port(USP)
and virtual pcie switch port(VSP), sometimes VSP pcie devices
doesn't bind to pcieport driver. Without pcieport driver, pcie PM
comes without any warranty and with unbounded VSP gfx card won't
transition to low power pcie Device and Link states therefore
assert drm_warn on unbounded VSP and disable xe driver
PM support.

v2:
- Disable Xe PCI PM support. [Rodrigo]
v3:
- Changed subject and Rebase.
v4:
- %s/xe_pci_unbounded_bridge_disable_pm/xe_assert_on_unbounded_bridge.
  [Rodrigo]
- Use device_set_pm_not_required() instead of dev_pm_ops NULL assignment.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230524090653.1192566-1-anshuman.gupta@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c |  1 +
 drivers/gpu/drm/xe/xe_pm.c  | 14 ++++++++++++++
 drivers/gpu/drm/xe/xe_pm.h  |  1 +
 3 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 71be80274683d..96f1ee1ea17f4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -617,6 +617,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (IS_ERR(xe))
 		return PTR_ERR(xe);
 
+	xe_pm_assert_unbounded_bridge(xe);
 	subplatform_desc = find_subplatform(xe, desc);
 
 	err = xe_info_init(xe, desc, subplatform_desc);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 9a74d15052c49..20e9e522ab805 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -214,3 +214,17 @@ int xe_pm_runtime_get_if_active(struct xe_device *xe)
 	WARN_ON(pm_runtime_suspended(xe->drm.dev));
 	return pm_runtime_get_if_active(xe->drm.dev, true);
 }
+
+void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_dev *bridge = pci_upstream_bridge(pdev);
+
+	if (!bridge)
+		return;
+
+	if (!bridge->driver) {
+		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
+		device_set_pm_not_required(&pdev->dev);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 6a885585f6534..8418ee6faac5f 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -21,5 +21,6 @@ int xe_pm_runtime_get(struct xe_device *xe);
 int xe_pm_runtime_put(struct xe_device *xe);
 bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
 int xe_pm_runtime_get_if_active(struct xe_device *xe);
+void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
 
 #endif
-- 
GitLab


From 64c9ae213d2ab1cce824841518e9539f597ee91e Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Tue, 13 Jun 2023 10:47:39 -0700
Subject: [PATCH 1678/2340] drm/xe/kunit: Handle fake device creation for all
 platform/subplatform cases

For platform like Alderlake P there are subplatforms and
just Alderlake P. Unlike DG2 in which every flavour is
either a G10,G11 or G12 variant. In this case(Alderlake P/S),
the Kunit test evaluates the subplatform to NONE and is
unable to create a fake device. Removing the condition
in xe_pci_fake_device_init() to support this corner case
so driver can proceed with the unit testing.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230613174740.786041-1-anusha.srivatsa@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 2178ad71c0da2..a40879da2fbe1 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -139,9 +139,6 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 		if (subplatform_desc->subplatform == subplatform)
 			break;
 
-	if (subplatform == XE_SUBPLATFORM_NONE && subplatform_desc)
-		return -ENODEV;
-
 	if (subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc)
 		return -ENODEV;
 
-- 
GitLab


From 807e7cee6981d9c570f986bebc07829094acb3cb Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Tue, 13 Jun 2023 10:47:40 -0700
Subject: [PATCH 1679/2340] drm/xe: Add missing ADL entries to xe_test_wa

With the fake device creation fix in the previous patch,
adding Alderlake P platform in xe_wa_test.
With this, driver is able to run the kunit test for
ADLP properly.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230613174740.786041-2-anusha.srivatsa@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_wa_test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 7a86be830b93e..16f7f157c8753 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -46,6 +46,9 @@ static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(ALDERLAKE_S, B0),
 	PLATFORM_CASE(ALDERLAKE_S, C0),
 	PLATFORM_CASE(ALDERLAKE_S, D0),
+	PLATFORM_CASE(ALDERLAKE_P, A0),
+	PLATFORM_CASE(ALDERLAKE_P, B0),
+	PLATFORM_CASE(ALDERLAKE_P, C0),
 	SUBPLATFORM_CASE(DG2, G10, A0),
 	SUBPLATFORM_CASE(DG2, G10, A1),
 	SUBPLATFORM_CASE(DG2, G10, B0),
-- 
GitLab


From 420c6a6f65f4856f77dba278ae32e2701d8838f3 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 27 Jun 2023 15:28:56 -0700
Subject: [PATCH 1680/2340] drm/xe: fix HuC FW ordering for DG1

The firmware definitions must be ordered based on platform, from newer
to older, which means that the DG1 FW must come before the ADL one.

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8699
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230627222856.3165647-1-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2b9b9b4a67115..bc63c0d3e33ab 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -111,9 +111,9 @@ struct fw_blobs_by_type {
 	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)				\
+	fw_def(DG1,		no_ver(i915,	huc,	dg1))			\
 	fw_def(ALDERLAKE_P,	no_ver(i915,	huc,	tgl))			\
 	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,	tgl))			\
-	fw_def(DG1,		no_ver(i915,	huc,	dg1))			\
 	fw_def(ROCKETLAKE,	no_ver(i915,	huc,	tgl))			\
 	fw_def(TIGERLAKE,	no_ver(i915,	huc,	tgl))
 
-- 
GitLab


From f07d9a615b7b257bf2c2197262769286ddc75109 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 27 Jun 2023 17:16:42 -0700
Subject: [PATCH 1681/2340] drm/xe/slpc: Start SLPC before GuC submission on
 reset

The SLPC code has a strict 5ms timeout from when the start command is
queued to when we expect the reply to appear in memory. This works if
the CT channel is empty, but if the channel is busy there might be an
extra delay that causes the process to exceeded the timeout. We see
this issue when a reset occurs while userspace keeps submitting,
because the submission code is re-enabled first and it will start using
the channel to service those submissions.
To fix this, we can simply start SLPC before re-enabling submission.
This has also the benefit of not allowing submissions to go through with
an uninitialized SLPC.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/375
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230628001642.3170070-1-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 43f862aaacbef..8245bbc587701 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -802,14 +802,10 @@ int xe_guc_start(struct xe_guc *guc)
 {
 	int ret;
 
-	ret = xe_guc_submit_start(guc);
-	if (ret)
-		return ret;
-
 	ret = xe_guc_pc_start(&guc->pc);
 	XE_WARN_ON(ret);
 
-	return 0;
+	return xe_guc_submit_start(guc);
 }
 
 void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
-- 
GitLab


From 7f38e1e1063e1b9b2c8368c741ff5e679091e9f8 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 26 Jun 2023 14:22:20 -0700
Subject: [PATCH 1682/2340] drm/xe: fix bounds checking for 'len' in
 xe_engine_create_ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's this shared machine running xe.ko and I often log in to see my
tmux corrupted by messages such as:

    usercopy: Kernel memory overwrite attempt detected to wrapped address (offset 0, size 18446660151965198754)!

I also sometimes see:

    kernel BUG at mm/usercopy.c:102!

Someone is running a program that's definitely submitting random
numbers to this ioctl. If you pass width=65535 and
num_placements=32769 then you get a negative 'len', which avoids the
EINVAL check, leading to the bug.

Switch 'len' to u32. It is the result of the multiplication of two u16
numbers, so it won't be able to overflow back into smaller numbers as
an u32.

v2: Make len u32 instead of checking for <=0 (José).

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230626212221.136640-1-paulo.r.zanoni@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 097a1ea06002f..f1b8b22e0216d 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -522,7 +522,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 	struct xe_engine *e = NULL;
 	u32 logical_mask;
 	u32 id;
-	int len;
+	u32 len;
 	int err;
 
 	if (XE_IOCTL_ERR(xe, args->flags) ||
-- 
GitLab


From 2e60442a4fef935c76cd70858775b92f565642cc Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 26 Jun 2023 14:22:21 -0700
Subject: [PATCH 1683/2340] drm/xe: properly check bounds for
 xe_wait_user_fence_ioctl()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If !no_engines, then we use copy_from_user to copy to the 'eci' array,
which has XE_HW_ENGINE_MAX_INSTANCE members. The amount of members
copied is given by the user in args->num_engines, so add code to check
that args->num_engines does not exceed XE_HW_ENGINE_MAX_INSTANCE. It's
an unsigned value so there's no need to check for negative values.

Fixes error messages such as:

    Buffer overflow detected (54 < 18446744073709551520)!

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230626212221.136640-2-paulo.r.zanoni@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 3122374341d6a..098e2a4cff3fb 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -121,6 +121,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			 addr & 0x7))
 		return -EINVAL;
 
+	if (XE_IOCTL_ERR(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
+		return -EINVAL;
+
 	if (!no_engines) {
 		err = copy_from_user(eci, user_eci,
 				     sizeof(struct drm_xe_engine_class_instance) *
-- 
GitLab


From 5572a004685770f8daad7661c5494b65148ede9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20Kempczy=C5=84ski?=
 <zbigniew.kempczynski@intel.com>
Date: Wed, 28 Jun 2023 07:51:41 +0200
Subject: [PATCH 1684/2340] drm/xe: Use nanoseconds instead of jiffies in uapi
 for user fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using jiffies as a timeout from userspace is weird even if
theoretically exists possiblity of acquiring jiffies via getconf.
Unfortunately this method is unreliable and the returned
value may vary from the one configured in the kernel config.

Now timeout is expressed in nanoseconds and its interpretation depends
on setting DRM_XE_UFENCE_WAIT_ABSTIME flag. Relative timeout (flag
is not set) means fence expire at now() + timeout. Absolute timeout
(flag is set) means that the fence expires at exact point of time.
Passing negative timeout means we will wait "forever" by setting
wait time to MAX_SCHEDULE_TIMEOUT.

Cc: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20230628055141.398036-2-zbigniew.kempczynski@intel.com
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 47 +++++++++++++++++++------
 include/uapi/drm/xe_drm.h               | 16 +++++++--
 2 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 098e2a4cff3fb..c4420c0dbf9c2 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
+#include <drm/drm_utils.h>
 #include <drm/xe_drm.h>
 
 #include "xe_device.h"
@@ -84,6 +85,21 @@ static int check_hw_engines(struct xe_device *xe,
 			 DRM_XE_UFENCE_WAIT_VM_ERROR)
 #define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
 
+static unsigned long to_jiffies_timeout(struct drm_xe_wait_user_fence *args)
+{
+	unsigned long timeout;
+
+	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
+		return drm_timeout_abs_to_jiffies(args->timeout);
+
+	if (args->timeout == MAX_SCHEDULE_TIMEOUT || args->timeout == 0)
+		return args->timeout;
+
+	timeout = nsecs_to_jiffies(args->timeout);
+
+	return timeout ?: 1;
+}
+
 int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file)
 {
@@ -98,7 +114,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	int err;
 	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
 		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
-	unsigned long timeout = args->timeout;
+	unsigned long timeout;
+	ktime_t start;
 
 	if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) ||
 	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
@@ -152,8 +169,18 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 		addr = vm->async_ops.error_capture.addr;
 	}
 
-	if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT))
-		return -EINVAL;
+	/*
+	 * For negative timeout we want to wait "forever" by setting
+	 * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also
+	 * to args->timeout to avoid being zeroed on the signal delivery
+	 * (see arithmetics after wait).
+	 */
+	if (args->timeout < 0)
+		args->timeout = MAX_SCHEDULE_TIMEOUT;
+
+	timeout = to_jiffies_timeout(args);
+
+	start = ktime_get();
 
 	/*
 	 * FIXME: Very simple implementation at the moment, single wait queue
@@ -192,17 +219,17 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	} else {
 		remove_wait_queue(&xe->ufence_wq, &w_wait);
 	}
+
+	if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) {
+		args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout < 0)
+			args->timeout = 0;
+	}
+
 	if (XE_IOCTL_ERR(xe, err < 0))
 		return err;
 	else if (XE_IOCTL_ERR(xe, !timeout))
 		return -ETIME;
 
-	/*
-	 * Again very simple, return the time in jiffies that has past, may need
-	 * a more precision
-	 */
-	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
-		args->timeout = args->timeout - timeout;
-
 	return 0;
 }
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 8e7be15513332..347351a8f6184 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -904,8 +904,20 @@ struct drm_xe_wait_user_fence {
 #define DRM_XE_UFENCE_WAIT_U64		0xffffffffffffffffu
 	/** @mask: comparison mask */
 	__u64 mask;
-
-	/** @timeout: how long to wait before bailing, value in jiffies */
+	/**
+	 * @timeout: how long to wait before bailing, value in nanoseconds.
+	 * Without DRM_XE_UFENCE_WAIT_ABSTIME flag set (relative timeout)
+	 * it contains timeout expressed in nanoseconds to wait (fence will
+	 * expire at now() + timeout).
+	 * When DRM_XE_UFENCE_WAIT_ABSTIME flat is set (absolute timeout) wait
+	 * will end at timeout (uses system MONOTONIC_CLOCK).
+	 * Passing negative timeout leads to neverending wait.
+	 *
+	 * On relative timeout this value is updated with timeout left
+	 * (for restarting the call in case of signal delivery).
+	 * On absolute timeout this value stays intact (restarted call still
+	 * expire at the same point of time).
+	 */
 	__s64 timeout;
 
 	/**
-- 
GitLab


From e5a845fd8fa4ce61a99c87f37b63530fa4995750 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Wed, 28 Jun 2023 11:08:35 +0530
Subject: [PATCH 1685/2340] drm/xe: Add sysfs entry for tile

We have recently introduced tile for each gpu,
so lets add sysfs entry per tile for userspace
to provide required information specific to tile.

V5:
  - define ktype as const
V4:
  - Reorder headers - Aravind
V3:
  - Make API to return void and add drm_warn - Aravind
V2:
  - Add logs in failure path

Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile              |  1 +
 drivers/gpu/drm/xe/xe_device_types.h     |  3 ++
 drivers/gpu/drm/xe/xe_tile.c             |  3 ++
 drivers/gpu/drm/xe/xe_tile.h             |  2 +
 drivers/gpu/drm/xe/xe_tile_sysfs.c       | 59 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_tile_sysfs.h       | 19 ++++++++
 drivers/gpu/drm/xe/xe_tile_sysfs_types.h | 27 +++++++++++
 7 files changed, 114 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_tile_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_tile_sysfs.h
 create mode 100644 drivers/gpu/drm/xe/xe_tile_sysfs_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 8d6d3c070fc85..3ade82cf244e9 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -103,6 +103,7 @@ xe-y += xe_bb.o \
 	xe_step.o \
 	xe_sync.o \
 	xe_tile.o \
+	xe_tile_sysfs.o \
 	xe_trace.o \
 	xe_ttm_sys_mgr.o \
 	xe_ttm_stolen_mgr.o \
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 3b50134cdcc0d..5517229bb505b 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -144,6 +144,9 @@ struct xe_tile {
 
 	/** @migrate: Migration helper for vram blits and clearing */
 	struct xe_migrate *migrate;
+
+	/** @sysfs: sysfs' kobj used by xe_tile_sysfs */
+	struct kobject *sysfs;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 6414aa810355e..ac70486d09c32 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -10,6 +10,7 @@
 #include "xe_migrate.h"
 #include "xe_sa.h"
 #include "xe_tile.h"
+#include "xe_tile_sysfs.h"
 #include "xe_ttm_vram_mgr.h"
 
 /**
@@ -142,6 +143,8 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
 	if (IS_ERR(tile->mem.kernel_bb_pool))
 		err = PTR_ERR(tile->mem.kernel_bb_pool);
 
+	xe_tile_sysfs_init(tile);
+
 err_mem_access:
 	xe_device_mem_access_put(tile_to_xe(tile));
 	return err;
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index 33bf412921953..782c47f8bd450 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -6,6 +6,8 @@
 #ifndef _XE_TILE_H_
 #define _XE_TILE_H_
 
+#include "xe_device_types.h"
+
 struct xe_tile;
 
 int xe_tile_alloc(struct xe_tile *tile);
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
new file mode 100644
index 0000000000000..2d64fa54b5a82
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+
+static void xe_tile_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_tile_sysfs_kobj_type = {
+	.release = xe_tile_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void tile_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_tile *tile = arg;
+
+	kobject_put(tile->sysfs);
+}
+
+void xe_tile_sysfs_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct device *dev = xe->drm.dev;
+	struct kobj_tile *kt;
+	int err;
+
+	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
+	if (!kt)
+		return;
+
+	kobject_init(&kt->base, &xe_tile_sysfs_kobj_type);
+	kt->tile = tile;
+
+	err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
+	if (err) {
+		kobject_put(&kt->base);
+		drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err);
+		return;
+	}
+
+	tile->sysfs = &kt->base;
+
+	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h
new file mode 100644
index 0000000000000..e4f065039ebae
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SYSFS_H_
+#define _XE_TILE_SYSFS_H_
+
+#include "xe_tile_sysfs_types.h"
+
+void xe_tile_sysfs_init(struct xe_tile *tile);
+
+static inline struct xe_tile *
+kobj_to_tile(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_tile, base)->tile;
+}
+
+#endif /* _XE_TILE_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs_types.h b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h
new file mode 100644
index 0000000000000..75906ba11a9ec
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SYSFS_TYPES_H_
+#define _XE_TILE_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_tile;
+
+/**
+ * struct kobj_tile - A tile's kobject struct that connects the kobject
+ * and the TILE
+ *
+ * When dealing with multiple TILEs, this struct helps to understand which
+ * TILE needs to be addressed on a given sysfs call.
+ */
+struct kobj_tile {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @tile: A pointer to the tile itself */
+	struct xe_tile *tile;
+};
+
+#endif	/* _XE_TILE_SYSFS_TYPES_H_ */
-- 
GitLab


From 8c82f914a302e394e2a037241d84ca3af6577f97 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Tue, 6 Jun 2023 15:48:38 +0530
Subject: [PATCH 1686/2340] drm/xe: Add GTs under respective tile sysfs

With the separation of xe_tile and xe_gt, We now consider
a PCI device (xe_device) to contain one or more tiles (struct xe_tile).
Each tile will contain one or more GTs (struct xe_gt).
So lets align sysfs paths accordingly.

TODO: Currently we have gt0 under tile0 and gt1 under tile1
on multi-tile. This GT indexing still under discussion, when
it is concluded we need to revisit this change.

Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index c01cc689058c3..13570987a7569 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -31,7 +31,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg)
 
 int xe_gt_sysfs_init(struct xe_gt *gt)
 {
-	struct device *dev = gt_to_xe(gt)->drm.dev;
+	struct xe_tile *tile = gt_to_tile(gt);
 	struct kobj_gt *kg;
 	int err;
 
@@ -42,7 +42,7 @@ int xe_gt_sysfs_init(struct xe_gt *gt)
 	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
 	kg->gt = gt;
 
-	err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id);
+	err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
 	if (err) {
 		kobject_put(&kg->base);
 		return err;
-- 
GitLab


From 9641df819772662429721f4b14141308fcf2d667 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Wed, 28 Jun 2023 11:36:16 +0530
Subject: [PATCH 1687/2340] drm/xe: Add sysfs entry to report per tile memory
 size

Add sysfs entry to read per tile physical memory
including stolen memory.

V5:
  - rename var name and make it part of vram struct - Lucas
V4:
  - %s/addr_range/physical_vram_size_byes, make it
    user readable name - Joonas/Aravind
  - Display in bytes - Joonas/Aravind
V3:
  - Exclude DG1, replace sysfs_create_file/files - Aravind
V2:
  - Use DEVICE_ATTR_RO - Aravind
  - Dont put kobj on sysfs_file_create fail - Himal
  - Skip addr_range sysfs create for non dgfx - Himal

Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  5 +++++
 drivers/gpu/drm/xe/xe_mmio.c         |  1 +
 drivers/gpu/drm/xe/xe_tile_sysfs.c   | 19 +++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5517229bb505b..c404d250e4537 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -124,6 +124,11 @@ struct xe_tile {
 			resource_size_t base;
 			/** @size: size of VRAM. */
 			resource_size_t size;
+			/**
+			 * @actual_physical_size: Actual VRAM size
+			 * including stolen mem for tile
+			 */
+			resource_size_t actual_physical_size;
 			/** @mapping: pointer to VRAM mappable space */
 			void *__iomem mapping;
 		} vram;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f1336803b9157..7d92258cd35d0 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -277,6 +277,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		if (err)
 			return err;
 
+		tile->mem.vram.actual_physical_size = tile_size;
 		tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
 		tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
 
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 2d64fa54b5a82..16376607c68f9 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -20,6 +20,20 @@ static const struct kobj_type xe_tile_sysfs_kobj_type = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
+static ssize_t
+physical_vram_size_bytes_show(struct device *kdev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct xe_tile *tile = kobj_to_tile(&kdev->kobj);
+
+	return sysfs_emit(buf, "%llu\n", tile->mem.vram.actual_physical_size);
+}
+
+static DEVICE_ATTR_RO(physical_vram_size_bytes);
+
+static const struct attribute *physical_memsize_attr =
+	&dev_attr_physical_vram_size_bytes.attr;
+
 static void tile_sysfs_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_tile *tile = arg;
@@ -50,6 +64,11 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
 
 	tile->sysfs = &kt->base;
 
+	if (IS_DGFX(xe) && xe->info.platform != XE_DG1 &&
+	    sysfs_create_file(tile->sysfs, physical_memsize_attr))
+		drm_warn(&xe->drm,
+			 "Sysfs creation to read addr_range per tile failed\n");
+
 	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
 	if (err) {
 		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
-- 
GitLab


From e4b2893c17048aecb195553b60631fcb07360c4e Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Wed, 28 Jun 2023 11:53:16 +0530
Subject: [PATCH 1688/2340] drm/xe: Make usable size of VRAM readable

Current size member of vram struct does not give
complete information as what "size" contains. Does
it contain reserved portions or not. Name it usable
size and accordingly describe other size members as
well.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           |  2 +-
 drivers/gpu/drm/xe/xe_device_types.h | 13 ++++++++++---
 drivers/gpu/drm/xe/xe_mmio.c         |  6 +++---
 drivers/gpu/drm/xe/xe_query.c        |  2 +-
 drivers/gpu/drm/xe/xe_tile.c         |  2 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c |  3 ++-
 6 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 17c0c6c2ae65a..86947a6fcc7ca 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -125,7 +125,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 {
 	struct xe_tile *tile = mem_type_to_tile(xe, mem_type);
 
-	XE_BUG_ON(!tile->mem.vram.size);
+	XE_BUG_ON(!tile->mem.vram.usable_size);
 
 	places[*c] = (struct ttm_place) {
 		.mem_type = mem_type,
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c404d250e4537..db08d64abce19 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -122,11 +122,18 @@ struct xe_tile {
 			resource_size_t io_size;
 			/** @base: offset of VRAM starting base */
 			resource_size_t base;
-			/** @size: size of VRAM. */
-			resource_size_t size;
+			/**
+			 * @usable_size: usable size of VRAM
+			 *
+			 * Usable size of VRAM excluding reserved portions
+			 * (e.g stolen mem)
+			 */
+			resource_size_t usable_size;
 			/**
 			 * @actual_physical_size: Actual VRAM size
-			 * including stolen mem for tile
+			 *
+			 * Actual VRAM size including reserved portions
+			 * (e.g stolen mem)
 			 */
 			resource_size_t actual_physical_size;
 			/** @mapping: pointer to VRAM mappable space */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 7d92258cd35d0..5effb21db9d4a 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -289,13 +289,13 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		tile->mem.vram.base = tile_offset;
 
 		/* small bar can limit the visible size.  size accordingly */
-		tile->mem.vram.size = min_t(u64, vram_size, io_size);
+		tile->mem.vram.usable_size = min_t(u64, vram_size, io_size);
 		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
 		drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id,
-			 &tile->mem.vram.io_start, &tile->mem.vram.size);
+			 &tile->mem.vram.io_start, &tile->mem.vram.usable_size);
 
-		if (tile->mem.vram.io_size < tile->mem.vram.size)
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
 			drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id,
 				 tile->id, &tile->mem.vram.io_size);
 
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 15e171ca7e628..9acbb27dfcab9 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -188,7 +188,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	config->num_params = num_params;
 	config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
 		xe->info.devid | (xe->info.revid << 16);
-	if (xe_device_get_root_tile(xe)->mem.vram.size)
+	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
 		config->info[XE_QUERY_CONFIG_FLAGS] =
 			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
 	if (xe->info.enable_guc)
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index ac70486d09c32..e0bc2b60ab09a 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -101,7 +101,7 @@ static int tile_ttm_mgr_init(struct xe_tile *tile)
 	struct xe_device *xe = tile_to_xe(tile);
 	int err;
 
-	if (tile->mem.vram.size) {
+	if (tile->mem.vram.usable_size) {
 		err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr);
 		if (err)
 			return err;
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 1a84abd35fcff..a10fd0366da3c 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -360,7 +360,8 @@ int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr)
 	mgr->tile = tile;
 
 	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id,
-				      tile->mem.vram.size, tile->mem.vram.io_size,
+				      tile->mem.vram.usable_size,
+				      tile->mem.vram.io_size,
 				      PAGE_SIZE);
 }
 
-- 
GitLab


From 413343584725f1fab9c4c676504cf6478dc3281b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 14 Jun 2023 13:51:59 -0700
Subject: [PATCH 1689/2340] drm/xe: Return GMD_ID revid properly

peek_gmdid() returns the IP version, not the raw value of the GMD_ID
register.  Make sure we extract and return the rev_id field as well so
that it can be used to determine the IP steppings properly.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230614205202.3376752-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 96f1ee1ea17f4..5c0c2e9b901eb 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -374,23 +374,27 @@ find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc)
 	return NULL;
 }
 
-static u32 peek_gmdid(struct xe_device *xe, u32 gmdid_offset)
+static void peek_gmdid(struct xe_device *xe, u32 gmdid_offset, u32 *ver, u32 *revid)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	void __iomem *map = pci_iomap_range(pdev, 0, gmdid_offset, sizeof(u32));
-	u32 ver;
+	u32 val;
 
 	if (!map) {
 		drm_err(&xe->drm, "Failed to read GMD_ID (%#x) from PCI BAR.\n",
 			gmdid_offset);
-		return 0;
+		*ver = 0;
+		*revid = 0;
+
+		return;
 	}
 
-	ver = ioread32(map);
+	val = ioread32(map);
 	pci_iounmap(pdev, map);
 
-	return REG_FIELD_GET(GMD_ID_ARCH_MASK, ver) * 100 +
-		REG_FIELD_GET(GMD_ID_RELEASE_MASK, ver);
+	*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 +
+		REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
+	*revid = REG_FIELD_GET(GMD_ID_REVID, val);
 }
 
 /*
@@ -426,8 +430,7 @@ static void handle_gmdid(struct xe_device *xe,
 {
 	u32 ver;
 
-	ver = peek_gmdid(xe, GMD_ID.addr);
-	*graphics_revid = REG_FIELD_GET(GMD_ID_REVID, ver);
+	peek_gmdid(xe, GMD_ID.addr, &ver, graphics_revid);
 	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
 		if (ver == graphics_ip_map[i].ver) {
 			xe->info.graphics_verx100 = ver;
@@ -442,8 +445,8 @@ static void handle_gmdid(struct xe_device *xe,
 			ver / 100, ver % 100);
 	}
 
-	ver = peek_gmdid(xe, GMD_ID.addr + 0x380000);
-	*media_revid = REG_FIELD_GET(GMD_ID_REVID, ver);
+	peek_gmdid(xe, GMD_ID.addr + 0x380000, &ver, media_revid);
+
 	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
 		if (ver == media_ip_map[i].ver) {
 			xe->info.media_verx100 = ver;
-- 
GitLab


From 54c5b74a06939bec61aa59421aa1073c0b666c2c Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 14 Jun 2023 13:52:00 -0700
Subject: [PATCH 1690/2340] drm/xe: Don't raise error on fused-off media

It's legitimate for the media GMD_ID register to read back as 0x0 if
media functionality is fused off or otherwise not present on the
platform.  Avoid printing an "unknown media version" error message for
this case.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230614205202.3376752-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 5c0c2e9b901eb..749826548b4a9 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -447,6 +447,10 @@ static void handle_gmdid(struct xe_device *xe,
 
 	peek_gmdid(xe, GMD_ID.addr + 0x380000, &ver, media_revid);
 
+	/* Media may legitimately be fused off / not present */
+	if (ver == 0)
+		return;
+
 	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
 		if (ver == media_ip_map[i].ver) {
 			xe->info.media_verx100 = ver;
-- 
GitLab


From 98b6d092341128f753cff64b1bceda69c718b6af Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 14 Jun 2023 13:52:01 -0700
Subject: [PATCH 1691/2340] drm/xe: Print proper revid value for unknown media
 revision

If the GMD_ID register reports a higher media revision ID than we're
expecting, print the media revid, not the graphics revid, in the
debug message.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230614205202.3376752-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_step.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index 1baf79ba02ad4..371cac951e0f4 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -235,7 +235,7 @@ struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
 	if (step.media >= STEP_FUTURE) {
 		step.media = STEP_FUTURE;
 		drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n",
-			graphics_gmdid_revid);
+			media_gmdid_revid);
 	}
 
 	return step;
-- 
GitLab


From c0ab10ee2ee6a2c423f95154e0842a1b19a4c13b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 14 Jun 2023 13:52:02 -0700
Subject: [PATCH 1692/2340] drm/xe: Enable PCI device earlier

Newer Intel platforms require that inspect the contents of the GMD_ID
registers very early in the driver initialization process to determine
the IP version (and proper init sequences), of the platform.  Move the
general PCI device setup and enablement slightly earlier, before we
start trying to peek at the GMD_ID registers.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230614205202.3376752-5-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 749826548b4a9..e130ffe3ab559 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -627,10 +627,20 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	xe_pm_assert_unbounded_bridge(xe);
 	subplatform_desc = find_subplatform(xe, desc);
 
-	err = xe_info_init(xe, desc, subplatform_desc);
+	pci_set_drvdata(pdev, xe);
+	err = pci_enable_device(pdev);
 	if (err)
 		goto err_drm_put;
 
+	pci_set_master(pdev);
+
+	if (pci_enable_msi(pdev) < 0)
+		drm_dbg(&xe->drm, "can't enable MSI");
+
+	err = xe_info_init(xe, desc, subplatform_desc);
+	if (err)
+		goto err_pci_disable;
+
 	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d",
 		desc->platform_name,
 		subplatform_desc ? subplatform_desc->name : "",
@@ -650,16 +660,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		xe_step_name(xe->info.step.display),
 		xe_step_name(xe->info.step.basedie));
 
-	pci_set_drvdata(pdev, xe);
-	err = pci_enable_device(pdev);
-	if (err)
-		goto err_drm_put;
-
-	pci_set_master(pdev);
-
-	if (pci_enable_msi(pdev) < 0)
-		drm_dbg(&xe->drm, "can't enable MSI");
-
 	err = xe_device_probe(xe);
 	if (err)
 		goto err_pci_disable;
-- 
GitLab


From b747411964cd9011e05f4b9f5624be9ed71532c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 29 Jun 2023 22:51:33 +0200
Subject: [PATCH 1693/2340] drm/xe: Make page-table updates using the default
 engine happen in order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the default engine m->eng was used, there is no check for idle and
a cpu page-table update may thus happen in parallel with a gpu one.
Don't allow CPU page-table updates with the default engine until
the engine is idle.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230629205134.111849-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index a62bd7ec8a42c..be98690f2bc9b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1160,9 +1160,10 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	int err = 0;
 	bool usm = !eng && xe->info.supports_usm;
 	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
+	struct xe_engine *eng_override = !eng ? m->eng : eng;
 
 	/* Use the CPU if no in syncs and engine is idle */
-	if (no_in_syncs(syncs, num_syncs) && (!eng || xe_engine_is_idle(eng))) {
+	if (no_in_syncs(syncs, num_syncs) && xe_engine_is_idle(eng_override)) {
 		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
 							num_updates,
 							first_munmap_rebind,
-- 
GitLab


From 44869c72e847e015649ffd4366df88fe529826bb Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 4 Jul 2023 18:32:41 +0300
Subject: [PATCH 1694/2340] drm/xe/mmio: add xe_mmio_read16()

Little by little, make stuff feature complete.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 3c547d78afbaf..4953a9a3f1fb3 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -31,6 +31,16 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 	return readb(tile->mmio.regs + reg.addr);
 }
 
+static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readw(tile->mmio.regs + reg.addr);
+}
+
 static inline void xe_mmio_write32(struct xe_gt *gt,
 				   struct xe_reg reg, u32 val)
 {
-- 
GitLab


From c7fac450dd865d2ad3400a1df0e8655df75a465f Mon Sep 17 00:00:00 2001
From: Alan Previn <alan.previn.teres.alexis@intel.com>
Date: Fri, 2 Jun 2023 11:16:50 -0700
Subject: [PATCH 1695/2340] drm/xe/guc: Fix h2g_write usage of
 GUC_CTB_MSG_MAX_LEN

In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because
GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification
where all command formats are defined in units of dwords so that '1'
is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255
dwords). However, h2g_write was incorrectly assuming that
GUC_CTB_MSG_MAX_LEN was in bytes. Fix this.

v3: Fix nit on #define location.(Matt)
v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes
    a local array to consume 4x the stack size. Rework the function
    to avoid consuming stack even if the action size is large. (Matt)

Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 22bc9ce846db7..e71d069158dca 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -396,24 +396,27 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
 	spin_unlock_irq(&ct->fast_lock);
 }
 
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
 static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 		     u32 ct_fence_value, bool want_response)
 {
 	struct xe_device *xe = ct_to_xe(ct);
 	struct guc_ctb *h2g = &ct->ctbs.h2g;
-	u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
-	u32 cmd_len = len + GUC_CTB_HDR_LEN;
-	u32 cmd_idx = 0, i;
+	u32 cmd[H2G_CT_HEADERS];
 	u32 tail = h2g->info.tail;
+	u32 full_len;
 	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
 							 tail * sizeof(u32));
 
+	full_len = len + GUC_CTB_HDR_LEN;
+
 	lockdep_assert_held(&ct->lock);
-	XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
+	XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
 	XE_BUG_ON(tail > h2g->info.size);
 
 	/* Command will wrap, zero fill (NOPs), return and check credits again */
-	if (tail + cmd_len > h2g->info.size) {
+	if (tail + full_len > h2g->info.size) {
 		xe_map_memset(xe, &map, 0, 0,
 			      (h2g->info.size - tail) * sizeof(u32));
 		h2g_reserve_space(ct, (h2g->info.size - tail));
@@ -428,30 +431,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	 * dw1: HXG header (including action code)
 	 * dw2+: action data
 	 */
-	cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+	cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
 		FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
 		FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
 	if (want_response) {
-		cmd[cmd_idx++] =
+		cmd[1] =
 			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
 			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 	} else {
-		cmd[cmd_idx++] =
+		cmd[1] =
 			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
 			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 	}
-	for (i = 1; i < len; ++i)
-		cmd[cmd_idx++] = action[i];
+
+	/* H2G header in cmd[1] replaces action[0] so: */
+	--len;
+	++action;
 
 	/* Write H2G ensuring visable before descriptor update */
-	xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32));
+	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
 	xe_device_wmb(ct_to_xe(ct));
 
 	/* Update local copies */
-	h2g->info.tail = (tail + cmd_len) % h2g->info.size;
-	h2g_reserve_space(ct, cmd_len);
+	h2g->info.tail = (tail + full_len) % h2g->info.size;
+	h2g_reserve_space(ct, full_len);
 
 	/* Update descriptor */
 	desc_write(xe, h2g, tail, h2g->info.tail);
-- 
GitLab


From 43e82fb9ecf0009aeb95e284067a9a24a55a93ed Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Wed, 5 Jul 2023 14:06:33 +0530
Subject: [PATCH 1696/2340] drm/xe: make GT sysfs init return void

Currently return from xe_gt_sysfs_init() is ignored
and also a failure in xe_gt_sysfs_init() isn't fatal
so make it return void.

V2 :
   - add drm_warn in error paths - Himal
   - Edit commit message - Nirmoy

Acked-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sysfs.c | 23 ++++++++++++++---------
 drivers/gpu/drm/xe/xe_gt_sysfs.h |  2 +-
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index 13570987a7569..cdfe5995259b2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -29,30 +29,35 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg)
 	kobject_put(gt->sysfs);
 }
 
-int xe_gt_sysfs_init(struct xe_gt *gt)
+void xe_gt_sysfs_init(struct xe_gt *gt)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
 	struct kobj_gt *kg;
 	int err;
 
 	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
-	if (!kg)
-		return -ENOMEM;
+	if (!kg) {
+		drm_warn(&xe->drm, "Allocating kobject failed.\n");
+		return;
+	}
 
 	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
 	kg->gt = gt;
 
 	err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
 	if (err) {
+		drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);
 		kobject_put(&kg->base);
-		return err;
+		return;
 	}
 
 	gt->sysfs = &kg->base;
 
-	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_sysfs_fini, gt);
-	if (err)
-		return err;
-
-	return 0;
+	err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
index ecbfcc5c7d42c..e3ec278ca0be0 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -8,7 +8,7 @@
 
 #include "xe_gt_sysfs_types.h"
 
-int xe_gt_sysfs_init(struct xe_gt *gt);
+void xe_gt_sysfs_init(struct xe_gt *gt);
 
 static inline struct xe_gt *
 kobj_to_gt(struct kobject *kobj)
-- 
GitLab


From 55d8ac9631aaa8ae3794341c52009f635a0d3188 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Mon, 3 Jul 2023 14:36:10 +0530
Subject: [PATCH 1697/2340] drm/xe: make kobject type struct as constant

Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.")
the driver core allows the usage of const struct kobj_type.

Take advantage of this to constify the structure definition to prevent
modification at runtime.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index cdfe5995259b2..b955940e8dc6d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -17,7 +17,7 @@ static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
 	kfree(kobj);
 }
 
-static struct kobj_type xe_gt_sysfs_kobj_type = {
+static const struct kobj_type xe_gt_sysfs_kobj_type = {
 	.release = xe_gt_sysfs_kobj_release,
 	.sysfs_ops = &kobj_sysfs_ops,
 };
-- 
GitLab


From 54c9fb7e64fd3f0da1570e3d1c5446605e83210e Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 10 Jul 2023 07:41:21 -0700
Subject: [PATCH 1698/2340] drm/xe: Use internal VM flags in xe_vm_create

xe_vm_create used the IOCTL create flags in a few places rather than the
internal VM flags and this just happened to work as these values
matched. This is risky (and incorrect) as the internal flag values are
free to change. Fix this and use the internal VM flag values.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index bd143acbde0e1..a9cf62f7aac6b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1253,13 +1253,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		vm->batch_invalidate_tlb = true;
 	}
 
-	if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
+	if (flags & XE_VM_FLAG_COMPUTE_MODE) {
 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
 		vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
 		vm->batch_invalidate_tlb = false;
 	}
 
-	if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
+	if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
 		vm->async_ops.fence.context = dma_fence_context_alloc(1);
 		vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
 	}
-- 
GitLab


From 9d858b69b0cfb56dd67943138c10d84eeb73380f Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 22 Jun 2023 12:39:48 -0700
Subject: [PATCH 1699/2340] drm/xe: Ban a VM if rebind worker hits an error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We cannot recover a VM if a rebind worker hits an error, ban the VM if
happens to ensure we do not attempt to place this VM on the hardware
again.

A follow up will inform the user if this happens.

v2: Return -ECANCELED in exec VM closed or banned, check for closed or
banned within VM lock.
v3: Fix lockdep splat by looking engine outside of vm->lock
v4: Fix error path when engine lookup fails
v5: Add debug message in rebind worker on error, update comments wrt
locking, add xe_vm_close helper

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c     |  13 ++++
 drivers/gpu/drm/xe/xe_exec.c       |   6 +-
 drivers/gpu/drm/xe/xe_trace.h      |   5 ++
 drivers/gpu/drm/xe/xe_vm.c         | 104 ++++++++++++++++++-----------
 drivers/gpu/drm/xe/xe_vm.h         |  13 +++-
 drivers/gpu/drm/xe/xe_vm_madvise.c |   2 +-
 drivers/gpu/drm/xe/xe_vm_types.h   |  10 ++-
 7 files changed, 107 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index f1b8b22e0216d..af75c9a0ea7bb 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -597,10 +597,23 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 		if (XE_IOCTL_ERR(xe, !vm))
 			return -ENOENT;
 
+		err = down_read_interruptible(&vm->lock);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+
+		if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
+			up_read(&vm->lock);
+			xe_vm_put(vm);
+			return -ENOENT;
+		}
+
 		e = xe_engine_create(xe, vm, logical_mask,
 				     args->width, hwe,
 				     xe_vm_no_dma_fences(vm) ? 0 :
 				     ENGINE_FLAG_PERSISTENT);
+		up_read(&vm->lock);
 		xe_vm_put(vm);
 		if (IS_ERR(e))
 			return PTR_ERR(e);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index c52edff9a3584..bdf00e59e7a4c 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -297,9 +297,9 @@ retry:
 	if (err)
 		goto err_unlock_list;
 
-	if (xe_vm_is_closed(engine->vm)) {
-		drm_warn(&xe->drm, "Trying to schedule after vm is closed\n");
-		err = -EIO;
+	if (xe_vm_is_closed_or_banned(engine->vm)) {
+		drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
+		err = -ECANCELED;
 		goto err_engine_end;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 8a5d35f157919..d5894570f196c 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -478,6 +478,11 @@ DECLARE_EVENT_CLASS(xe_vm,
 			      __entry->asid)
 );
 
+DEFINE_EVENT(xe_vm, xe_vm_kill,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
 DEFINE_EVENT(xe_vm, xe_vm_create,
 	     TP_PROTO(struct xe_vm *vm),
 	     TP_ARGS(vm)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a9cf62f7aac6b..47e3d37b757c6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -514,6 +514,24 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
 
 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
 
+static void xe_vm_kill(struct xe_vm *vm)
+{
+	struct ww_acquire_ctx ww;
+	struct xe_engine *e;
+
+	lockdep_assert_held(&vm->lock);
+
+	xe_vm_lock(vm, &ww, 0, false);
+	vm->flags |= XE_VM_FLAG_BANNED;
+	trace_xe_vm_kill(vm);
+
+	list_for_each_entry(e, &vm->preempt.engines, compute.link)
+		e->ops->kill(e);
+	xe_vm_unlock(vm, &ww);
+
+	/* TODO: Inform user the VM is banned */
+}
+
 static void preempt_rebind_work_func(struct work_struct *w)
 {
 	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
@@ -533,13 +551,14 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
 	trace_xe_vm_rebind_worker_enter(vm);
 
-	if (xe_vm_is_closed(vm)) {
+	down_write(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm)) {
+		up_write(&vm->lock);
 		trace_xe_vm_rebind_worker_exit(vm);
 		return;
 	}
 
-	down_write(&vm->lock);
-
 retry:
 	if (vm->async_ops.error)
 		goto out_unlock_outer;
@@ -666,11 +685,14 @@ out_unlock_outer:
 			goto retry;
 		}
 	}
+	if (err) {
+		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+		xe_vm_kill(vm);
+	}
 	up_write(&vm->lock);
 
 	free_preempt_fences(&preempt_fences);
 
-	XE_WARN_ON(err < 0);	/* TODO: Kill VM or put in error state */
 	trace_xe_vm_rebind_worker_exit(vm);
 }
 
@@ -1140,11 +1162,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
 {
 	struct rb_node *node;
 
-	if (xe_vm_is_closed(vm))
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm))
 		return NULL;
 
 	XE_BUG_ON(vma->end >= vm->size);
-	lockdep_assert_held(&vm->lock);
 
 	node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
 
@@ -1377,6 +1400,13 @@ mm_closed:
 	wake_up_all(&vm->async_ops.error_capture.wq);
 }
 
+static void xe_vm_close(struct xe_vm *vm)
+{
+	down_write(&vm->lock);
+	vm->size = 0;
+	up_write(&vm->lock);
+}
+
 void xe_vm_close_and_put(struct xe_vm *vm)
 {
 	struct rb_root contested = RB_ROOT;
@@ -1387,8 +1417,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 
 	XE_BUG_ON(vm->preempt.num_engines);
 
-	vm->size = 0;
-	smp_mb();
+	xe_vm_close(vm);
+
 	flush_async_ops(vm);
 	if (xe_vm_in_compute_mode(vm))
 		flush_work(&vm->preempt.rebind_work);
@@ -3072,30 +3102,34 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (err)
 		return err;
 
-	vm = xe_vm_lookup(xef, args->vm_id);
-	if (XE_IOCTL_ERR(xe, !vm)) {
-		err = -EINVAL;
-		goto free_objs;
-	}
-
-	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
-		drm_err(dev, "VM closed while we began looking up?\n");
-		err = -ENOENT;
-		goto put_vm;
-	}
-
 	if (args->engine_id) {
 		e = xe_engine_lookup(xef, args->engine_id);
 		if (XE_IOCTL_ERR(xe, !e)) {
 			err = -ENOENT;
-			goto put_vm;
+			goto free_objs;
 		}
+
 		if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
 			err = -EINVAL;
 			goto put_engine;
 		}
 	}
 
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_ERR(xe, !vm)) {
+		err = -EINVAL;
+		goto put_engine;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto put_vm;
+
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
+		err = -ENOENT;
+		goto release_vm_lock;
+	}
+
 	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
 		if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
 			err = -EOPNOTSUPP;
@@ -3105,10 +3139,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			err = -EPROTO;
 
 		if (!err) {
-			down_write(&vm->lock);
 			trace_xe_vm_restart(vm);
 			vm_set_async_error(vm, 0);
-			up_write(&vm->lock);
 
 			queue_work(system_unbound_wq, &vm->async_ops.work);
 
@@ -3117,13 +3149,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 				xe_vm_queue_rebind_worker(vm);
 		}
 
-		goto put_engine;
+		goto release_vm_lock;
 	}
 
 	if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
 			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
 		err = -EOPNOTSUPP;
-		goto put_engine;
+		goto release_vm_lock;
 	}
 
 	for (i = 0; i < args->num_binds; ++i) {
@@ -3133,7 +3165,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		if (XE_IOCTL_ERR(xe, range > vm->size) ||
 		    XE_IOCTL_ERR(xe, addr > vm->size - range)) {
 			err = -EINVAL;
-			goto put_engine;
+			goto release_vm_lock;
 		}
 
 		if (bind_ops[i].tile_mask) {
@@ -3142,7 +3174,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask &
 					 ~valid_tiles)) {
 				err = -EINVAL;
-				goto put_engine;
+				goto release_vm_lock;
 			}
 		}
 	}
@@ -3150,13 +3182,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
 	if (!bos) {
 		err = -ENOMEM;
-		goto put_engine;
+		goto release_vm_lock;
 	}
 
 	vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
 	if (!vmas) {
 		err = -ENOMEM;
-		goto put_engine;
+		goto release_vm_lock;
 	}
 
 	for (i = 0; i < args->num_binds; ++i) {
@@ -3211,10 +3243,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto free_syncs;
 	}
 
-	err = down_write_killable(&vm->lock);
-	if (err)
-		goto free_syncs;
-
 	/* Do some error checking first to make the unwind easier */
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
@@ -3223,7 +3251,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 		err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
 		if (err)
-			goto release_vm_lock;
+			goto free_syncs;
 	}
 
 	for (i = 0; i < args->num_binds; ++i) {
@@ -3343,8 +3371,6 @@ destroy_vmas:
 			break;
 		}
 	}
-release_vm_lock:
-	up_write(&vm->lock);
 free_syncs:
 	while (num_syncs--) {
 		if (async && j &&
@@ -3357,11 +3383,13 @@ free_syncs:
 put_obj:
 	for (i = j; i < args->num_binds; ++i)
 		xe_bo_put(bos[i]);
+release_vm_lock:
+	up_write(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
 put_engine:
 	if (e)
 		xe_engine_put(e);
-put_vm:
-	xe_vm_put(vm);
 free_objs:
 	kfree(bos);
 	kfree(vmas);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 5edb7771629c6..02b409dd77d54 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -45,10 +45,21 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww);
 
 static inline bool xe_vm_is_closed(struct xe_vm *vm)
 {
-	/* Only guaranteed not to change when vm->resv is held */
+	/* Only guaranteed not to change when vm->lock is held */
 	return !vm->size;
 }
 
+static inline bool xe_vm_is_banned(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_BANNED;
+}
+
+static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
+{
+	lockdep_assert_held(&vm->lock);
+	return xe_vm_is_closed(vm) || xe_vm_is_banned(vm);
+}
+
 struct xe_vma *
 xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 6c196431a60eb..670c80c1f0a33 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -313,7 +313,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_ERR(xe, !vm))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
 		err = -ENOENT;
 		goto put_vm;
 	}
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index c148dd49a6ca2..3c885211a8d16 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -176,15 +176,19 @@ struct xe_vm {
 	struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
 	struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
 
-	/** @flags: flags for this VM, statically setup a creation time */
+	/**
+	 * @flags: flags for this VM, statically setup a creation time aside
+	 * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely
+	 */
 #define XE_VM_FLAGS_64K			BIT(0)
 #define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
 #define XE_VM_FLAG_ASYNC_BIND_OPS	BIT(2)
 #define XE_VM_FLAG_MIGRATION		BIT(3)
 #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
 #define XE_VM_FLAG_FAULT_MODE		BIT(5)
-#define XE_VM_FLAG_GT_ID(flags)		(((flags) >> 6) & 0x3)
-#define XE_VM_FLAG_SET_TILE_ID(tile)	((tile)->id << 6)
+#define XE_VM_FLAG_BANNED		BIT(6)
+#define XE_VM_FLAG_GT_ID(flags)		(((flags) >> 7) & 0x3)
+#define XE_VM_FLAG_SET_TILE_ID(tile)	((tile)->id << 7)
 	unsigned long flags;
 
 	/** @composite_fence_ctx: context composite fence */
-- 
GitLab


From 21ed3327e388c24ddbdc3b2e8533f0c3ab99953b Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 22 Jun 2023 13:03:04 -0700
Subject: [PATCH 1700/2340] drm/xe: Add helpers to hide struct xe_vma internals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will help with the GPUVA port as the internals of struct xe_vma
will change.

v2: Update comment around helpers

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c                  |   4 +-
 drivers/gpu/drm/xe/xe_exec.c                |   2 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |   7 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  14 +-
 drivers/gpu/drm/xe/xe_pt.c                  |  88 +++++------
 drivers/gpu/drm/xe/xe_trace.h               |  10 +-
 drivers/gpu/drm/xe/xe_vm.c                  | 163 ++++++++++----------
 drivers/gpu/drm/xe/xe_vm.h                  |  76 +++++++--
 drivers/gpu/drm/xe/xe_vm_madvise.c          |  12 +-
 9 files changed, 211 insertions(+), 165 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 86947a6fcc7ca..fb351c36cdc2f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -426,7 +426,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 	}
 
 	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		struct xe_vm *vm = vma->vm;
+		struct xe_vm *vm = xe_vma_vm(vma);
 
 		trace_xe_vma_evict(vma);
 
@@ -454,7 +454,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 
 		} else {
 			bool vm_resv_locked = false;
-			struct xe_vm *vm = vma->vm;
+			struct xe_vm *vm = xe_vma_vm(vma);
 
 			/*
 			 * We need to put the vma on the vm's rebind_list,
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index bdf00e59e7a4c..ba13d20ed348a 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -126,7 +126,7 @@ retry:
 		if (xe_vma_is_userptr(vma))
 			continue;
 
-		err = xe_bo_validate(vma->bo, vm, false);
+		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
 		if (err) {
 			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
 			*tv = NULL;
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 9dd8e5097e65d..4d0f402cc6305 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -77,7 +77,8 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
 
 static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
 {
-	if (lookup->start > vma->end || lookup->end < vma->start)
+	if (xe_vma_start(lookup) > xe_vma_end(vma) - 1 ||
+	    xe_vma_end(lookup) - 1 < xe_vma_start(vma))
 		return false;
 
 	return true;
@@ -171,7 +172,7 @@ retry_userptr:
 	}
 
 	/* Lock VM and BOs dma-resv */
-	bo = vma->bo;
+	bo = xe_vma_bo(vma);
 	if (only_needs_bo_lock(bo)) {
 		/* This path ensures the BO's LRU is updated */
 		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
@@ -538,7 +539,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
 		goto unlock_vm;
 
 	/* Lock VM and BOs dma-resv */
-	bo = vma->bo;
+	bo = xe_vma_bo(vma);
 	if (only_needs_bo_lock(bo)) {
 		/* This path ensures the BO's LRU is updated */
 		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 2fcb477604e20..f77368a164092 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -203,8 +203,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	if (!xe->info.has_range_tlb_invalidation) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
-		u64 start = vma->start;
-		u64 length = vma->end - vma->start + 1;
+		u64 start = xe_vma_start(vma);
+		u64 length = xe_vma_size(vma);
 		u64 align, end;
 
 		if (length < SZ_4K)
@@ -217,12 +217,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		 * address mask covering the required range.
 		 */
 		align = roundup_pow_of_two(length);
-		start = ALIGN_DOWN(vma->start, align);
-		end = ALIGN(vma->start + length, align);
+		start = ALIGN_DOWN(xe_vma_start(vma), align);
+		end = ALIGN(xe_vma_end(vma), align);
 		length = align;
 		while (start + length < end) {
 			length <<= 1;
-			start = ALIGN_DOWN(vma->start, length);
+			start = ALIGN_DOWN(xe_vma_start(vma), length);
 		}
 
 		/*
@@ -231,7 +231,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		 */
 		if (length >= SZ_2M) {
 			length = max_t(u64, SZ_16M, length);
-			start = ALIGN_DOWN(vma->start, length);
+			start = ALIGN_DOWN(xe_vma_start(vma), length);
 		}
 
 		XE_BUG_ON(length < SZ_4K);
@@ -240,7 +240,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		XE_BUG_ON(!IS_ALIGNED(start, length));
 
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
-		action[len++] = vma->vm->usm.asid;
+		action[len++] = xe_vma_vm(vma)->usm.asid;
 		action[len++] = lower_32_bits(start);
 		action[len++] = upper_32_bits(start);
 		action[len++] = ilog2(length) - ilog2(SZ_4K);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index fe1c77b139e4e..a697d43ec293a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -96,7 +96,7 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 				&cur);
 		return xe_res_dma(&cur) + offset;
 	} else {
-		return xe_bo_addr(vma->bo, offset, page_size, is_vram);
+		return xe_bo_addr(xe_vma_bo(vma), offset, page_size, is_vram);
 	}
 }
 
@@ -749,7 +749,7 @@ static int
 xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
 {
-	struct xe_bo *bo = vma->bo;
+	struct xe_bo *bo = xe_vma_bo(vma);
 	bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
 	struct xe_res_cursor curs;
 	struct xe_pt_stage_bind_walk xe_walk = {
@@ -758,15 +758,15 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 			.shifts = xe_normal_pt_shifts,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 		},
-		.vm = vma->vm,
+		.vm = xe_vma_vm(vma),
 		.tile = tile,
 		.curs = &curs,
-		.va_curs_start = vma->start,
+		.va_curs_start = xe_vma_start(vma),
 		.pte_flags = vma->pte_flags,
 		.wupd.entries = entries,
-		.needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram,
+		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram,
 	};
-	struct xe_pt *pt = vma->vm->pt_root[tile->id];
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
 
 	if (is_vram) {
@@ -788,20 +788,20 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 
 	if (!xe_vma_is_null(vma)) {
 		if (xe_vma_is_userptr(vma))
-			xe_res_first_sg(vma->userptr.sg, 0,
-					vma->end - vma->start + 1, &curs);
+			xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
+					&curs);
 		else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
-			xe_res_first(bo->ttm.resource, vma->bo_offset,
-				     vma->end - vma->start + 1, &curs);
+			xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
+				     xe_vma_size(vma), &curs);
 		else
-			xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
-					vma->end - vma->start + 1, &curs);
+			xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma),
+					xe_vma_size(vma), &curs);
 	} else {
-		curs.size = vma->end - vma->start + 1;
+		curs.size = xe_vma_size(vma);
 	}
 
-	ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1,
-				&xe_walk.base);
+	ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma),
+			       xe_vma_end(vma), &xe_walk.base);
 
 	*num_entries = xe_walk.wupd.num_used_entries;
 	return ret;
@@ -933,13 +933,13 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
 		},
 		.tile = tile,
 	};
-	struct xe_pt *pt = vma->vm->pt_root[tile->id];
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 
 	if (!(vma->tile_present & BIT(tile->id)))
 		return false;
 
-	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
-				 &xe_walk.base);
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
 
 	return xe_walk.needs_invalidate;
 }
@@ -974,21 +974,21 @@ static void xe_pt_abort_bind(struct xe_vma *vma,
 			continue;
 
 		for (j = 0; j < entries[i].qwords; j++)
-			xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL);
+			xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL);
 		kfree(entries[i].pt_entries);
 	}
 }
 
 static void xe_pt_commit_locks_assert(struct xe_vma *vma)
 {
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 
 	lockdep_assert_held(&vm->lock);
 
 	if (xe_vma_is_userptr(vma))
 		lockdep_assert_held_read(&vm->userptr.notifier_lock);
 	else if (!xe_vma_is_null(vma))
-		dma_resv_assert_held(vma->bo->ttm.base.resv);
+		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
 
 	dma_resv_assert_held(&vm->resv);
 }
@@ -1021,7 +1021,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
 
 			if (xe_pt_entry(pt_dir, j_))
 				xe_pt_destroy(xe_pt_entry(pt_dir, j_),
-					      vma->vm->flags, deferred);
+					      xe_vma_vm(vma)->flags, deferred);
 
 			pt_dir->dir.entries[j_] = &newpte->base;
 		}
@@ -1082,7 +1082,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
 	static u32 count;
 
 	if (count++ % divisor == divisor - 1) {
-		struct xe_vm *vm = vma->vm;
+		struct xe_vm *vm = xe_vma_vm(vma);
 
 		vma->userptr.divisor = divisor << 1;
 		spin_lock(&vm->userptr.invalidated_lock);
@@ -1125,7 +1125,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
 		container_of(pt_update, typeof(*userptr_update), base);
 	struct xe_vma *vma = pt_update->vma;
 	unsigned long notifier_seq = vma->userptr.notifier_seq;
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 
 	userptr_update->locked = false;
 
@@ -1296,19 +1296,19 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		},
 		.bind = true,
 	};
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	u32 num_entries;
 	struct dma_fence *fence;
 	struct invalidation_fence *ifence = NULL;
 	int err;
 
 	bind_pt_update.locked = false;
-	xe_bo_assert_held(vma->bo);
+	xe_bo_assert_held(xe_vma_bo(vma));
 	xe_vm_assert_held(vm);
 
-	vm_dbg(&vma->vm->xe->drm,
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
-	       vma->start, vma->end, e);
+	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
 
 	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
 	if (err)
@@ -1337,7 +1337,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	}
 
 	fence = xe_migrate_update_pgtables(tile->migrate,
-					   vm, vma->bo,
+					   vm, xe_vma_bo(vma),
 					   e ? e : vm->eng[tile->id],
 					   entries, num_entries,
 					   syncs, num_syncs,
@@ -1363,8 +1363,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 				   DMA_RESV_USAGE_KERNEL :
 				   DMA_RESV_USAGE_BOOKKEEP);
 
-		if (!xe_vma_has_no_bo(vma) && !vma->bo->vm)
-			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		xe_pt_commit_bind(vma, entries, num_entries, rebind,
 				  bind_pt_update.locked ? &deferred : NULL);
@@ -1526,14 +1526,14 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
 			.max_level = XE_PT_HIGHEST_LEVEL,
 		},
 		.tile = tile,
-		.modified_start = vma->start,
-		.modified_end = vma->end + 1,
+		.modified_start = xe_vma_start(vma),
+		.modified_end = xe_vma_end(vma),
 		.wupd.entries = entries,
 	};
-	struct xe_pt *pt = vma->vm->pt_root[tile->id];
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 
-	(void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
-				 &xe_walk.base);
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
 
 	return xe_walk.wupd.num_used_entries;
 }
@@ -1545,7 +1545,7 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
 				  const struct xe_vm_pgtable_update *update)
 {
 	struct xe_vma *vma = pt_update->vma;
-	u64 empty = __xe_pt_empty_pte(tile, vma->vm, update->pt->level);
+	u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level);
 	int i;
 
 	if (map && map->is_iomem)
@@ -1581,7 +1581,7 @@ xe_pt_commit_unbind(struct xe_vma *vma,
 			     i++) {
 				if (xe_pt_entry(pt_dir, i))
 					xe_pt_destroy(xe_pt_entry(pt_dir, i),
-						      vma->vm->flags, deferred);
+						      xe_vma_vm(vma)->flags, deferred);
 
 				pt_dir->dir.entries[i] = NULL;
 			}
@@ -1630,18 +1630,18 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 			.vma = vma,
 		},
 	};
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	u32 num_entries;
 	struct dma_fence *fence = NULL;
 	struct invalidation_fence *ifence;
 	LLIST_HEAD(deferred);
 
-	xe_bo_assert_held(vma->bo);
+	xe_bo_assert_held(xe_vma_bo(vma));
 	xe_vm_assert_held(vm);
 
-	vm_dbg(&vma->vm->xe->drm,
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
-	       vma->start, vma->end, e);
+	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
 
 	num_entries = xe_pt_stage_unbind(tile, vma, entries);
 	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
@@ -1680,8 +1680,8 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 				   DMA_RESV_USAGE_BOOKKEEP);
 
 		/* This fence will be installed by caller when doing eviction */
-		if (!xe_vma_has_no_bo(vma) && !vma->bo->vm)
-			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		xe_pt_commit_unbind(vma, entries, num_entries,
 				    unbind_pt_update.locked ? &deferred : NULL);
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index d5894570f196c..82ca25d8d017f 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -19,7 +19,7 @@
 #include "xe_gt_types.h"
 #include "xe_guc_engine_types.h"
 #include "xe_sched_job.h"
-#include "xe_vm_types.h"
+#include "xe_vm.h"
 
 DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
 		    TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
@@ -374,10 +374,10 @@ DECLARE_EVENT_CLASS(xe_vma,
 
 		    TP_fast_assign(
 			   __entry->vma = (unsigned long)vma;
-			   __entry->asid = vma->vm->usm.asid;
-			   __entry->start = vma->start;
-			   __entry->end = vma->end;
-			   __entry->ptr = (u64)vma->userptr.ptr;
+			   __entry->asid = xe_vma_vm(vma)->usm.asid;
+			   __entry->start = xe_vma_start(vma);
+			   __entry->end = xe_vma_end(vma) - 1;
+			   __entry->ptr = xe_vma_userptr(vma);
 			   ),
 
 		    TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 47e3d37b757c6..cec96beef3344 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -53,15 +53,14 @@ int xe_vma_userptr_check_repin(struct xe_vma *vma)
 
 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
 {
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_device *xe = vm->xe;
-	const unsigned long num_pages =
-		(vma->end - vma->start + 1) >> PAGE_SHIFT;
+	const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
 	struct page **pages;
 	bool in_kthread = !current->mm;
 	unsigned long notifier_seq;
 	int pinned, ret, i;
-	bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
+	bool read_only = xe_vma_read_only(vma);
 
 	lockdep_assert_held(&vm->lock);
 	XE_BUG_ON(!xe_vma_is_userptr(vma));
@@ -96,7 +95,8 @@ retry:
 	}
 
 	while (pinned < num_pages) {
-		ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
+		ret = get_user_pages_fast(xe_vma_userptr(vma) +
+					  pinned * PAGE_SIZE,
 					  num_pages - pinned,
 					  read_only ? 0 : FOLL_WRITE,
 					  &pages[pinned]);
@@ -299,7 +299,7 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
 	struct xe_vma *vma;
 
 	list_for_each_entry(vma, &vm->extobj.list, extobj.link)
-		dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
+		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage);
 }
 
 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
@@ -448,7 +448,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 	INIT_LIST_HEAD(objs);
 	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
 		tv_bo->num_shared = num_shared;
-		tv_bo->bo = &vma->bo->ttm;
+		tv_bo->bo = &xe_vma_bo(vma)->ttm;
 
 		list_add_tail(&tv_bo->head, objs);
 		tv_bo++;
@@ -463,7 +463,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 	spin_lock(&vm->notifier.list_lock);
 	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
 				 notifier.rebind_link) {
-		xe_bo_assert_held(vma->bo);
+		xe_bo_assert_held(xe_vma_bo(vma));
 
 		list_del_init(&vma->notifier.rebind_link);
 		if (vma->tile_present && !vma->destroyed)
@@ -612,7 +612,7 @@ retry:
 		if (xe_vma_has_no_bo(vma) || vma->destroyed)
 			continue;
 
-		err = xe_bo_validate(vma->bo, vm, false);
+		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
 		if (err)
 			goto out_unlock;
 	}
@@ -706,7 +706,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 				   unsigned long cur_seq)
 {
 	struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 	long err;
@@ -925,7 +925,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 
 			err = mmu_interval_notifier_insert(&vma->userptr.notifier,
 							   current->mm,
-							   vma->userptr.ptr, size,
+							   xe_vma_userptr(vma), size,
 							   &vma_userptr_notifier_ops);
 			if (err) {
 				kfree(vma);
@@ -945,7 +945,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 static bool vm_remove_extobj(struct xe_vma *vma)
 {
 	if (!list_empty(&vma->extobj.link)) {
-		vma->vm->extobj.entries--;
+		xe_vma_vm(vma)->extobj.entries--;
 		list_del_init(&vma->extobj.link);
 		return true;
 	}
@@ -954,9 +954,9 @@ static bool vm_remove_extobj(struct xe_vma *vma)
 
 static void xe_vma_destroy_late(struct xe_vma *vma)
 {
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_device *xe = vm->xe;
-	bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
+	bool read_only = xe_vma_read_only(vma);
 
 	if (xe_vma_is_userptr(vma)) {
 		if (vma->userptr.sg) {
@@ -978,7 +978,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 	} else if (xe_vma_is_null(vma)) {
 		xe_vm_put(vm);
 	} else {
-		xe_bo_put(vma->bo);
+		xe_bo_put(xe_vma_bo(vma));
 	}
 
 	kfree(vma);
@@ -999,7 +999,7 @@ bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
 	struct xe_vma *vma;
 
 	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		if (vma != ignore && vma->vm == vm)
+		if (vma != ignore && xe_vma_vm(vma) == vm)
 			return vma;
 	}
 
@@ -1027,7 +1027,7 @@ static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
 
 static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
 {
-	struct xe_bo *bo = vma->bo;
+	struct xe_bo *bo = xe_vma_bo(vma);
 
 	lockdep_assert_held_write(&vm->lock);
 
@@ -1048,7 +1048,7 @@ static void vma_destroy_cb(struct dma_fence *fence,
 
 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 {
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 
 	lockdep_assert_held_write(&vm->lock);
 	XE_BUG_ON(!list_empty(&vma->unbind_link));
@@ -1060,17 +1060,17 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 		spin_unlock(&vm->userptr.invalidated_lock);
 		list_del(&vma->userptr_link);
 	} else if (!xe_vma_is_null(vma)) {
-		xe_bo_assert_held(vma->bo);
+		xe_bo_assert_held(xe_vma_bo(vma));
 		list_del(&vma->bo_link);
 
 		spin_lock(&vm->notifier.list_lock);
 		list_del(&vma->notifier.rebind_link);
 		spin_unlock(&vm->notifier.list_lock);
 
-		if (!vma->bo->vm && vm_remove_extobj(vma)) {
+		if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
 			struct xe_vma *other;
 
-			other = bo_has_vm_references_locked(vma->bo, vm, NULL);
+			other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL);
 
 			if (other)
 				__vm_insert_extobj(vm, other);
@@ -1098,13 +1098,13 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
 {
 	struct ttm_validate_buffer tv[2];
 	struct ww_acquire_ctx ww;
-	struct xe_bo *bo = vma->bo;
+	struct xe_bo *bo = xe_vma_bo(vma);
 	LIST_HEAD(objs);
 	LIST_HEAD(dups);
 	int err;
 
 	memset(tv, 0, sizeof(tv));
-	tv[0].bo = xe_vm_ttm_bo(vma->vm);
+	tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma));
 	list_add(&tv[0].head, &objs);
 
 	if (bo) {
@@ -1127,11 +1127,11 @@ static struct xe_vma *to_xe_vma(const struct rb_node *node)
 	return (struct xe_vma *)node;
 }
 
-static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
+static int xe_vma_cmp(struct xe_vma *a, struct xe_vma *b)
 {
-	if (a->end < b->start) {
+	if (xe_vma_end(a) - 1 < xe_vma_start(b)) {
 		return -1;
-	} else if (b->end < a->start) {
+	} else if (xe_vma_end(b) - 1 < xe_vma_start(a)) {
 		return 1;
 	} else {
 		return 0;
@@ -1146,19 +1146,19 @@ static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
 int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
 {
 	struct xe_vma *cmp = to_xe_vma(node);
-	const struct xe_vma *own = key;
+	struct xe_vma *own = (struct xe_vma *)key;
 
-	if (own->start > cmp->end)
+	if (xe_vma_start(own) > xe_vma_end(cmp) - 1)
 		return 1;
 
-	if (own->end < cmp->start)
+	if (xe_vma_end(own) - 1 < xe_vma_start(cmp))
 		return -1;
 
 	return 0;
 }
 
 struct xe_vma *
-xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
+xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
 	struct rb_node *node;
 
@@ -1167,7 +1167,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
 	if (xe_vm_is_closed_or_banned(vm))
 		return NULL;
 
-	XE_BUG_ON(vma->end >= vm->size);
+	XE_BUG_ON(xe_vma_end(vma) > vm->size);
 
 	node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
 
@@ -1176,7 +1176,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
 
 static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
-	XE_BUG_ON(vma->vm != vm);
+	XE_BUG_ON(xe_vma_vm(vma) != vm);
 	lockdep_assert_held(&vm->lock);
 
 	rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
@@ -1184,7 +1184,7 @@ static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 
 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
-	XE_BUG_ON(vma->vm != vm);
+	XE_BUG_ON(xe_vma_vm(vma) != vm);
 	lockdep_assert_held(&vm->lock);
 
 	rb_erase(&vma->vm_node, &vm->vmas);
@@ -1445,7 +1445,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		rb_erase(&vma->vm_node, &vm->vmas);
 
 		/* easy case, remove from VMA? */
-		if (xe_vma_has_no_bo(vma) || vma->bo->vm) {
+		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
 			xe_vma_destroy(vma, NULL);
 			continue;
 		}
@@ -1584,7 +1584,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
 	struct dma_fence *fence = NULL;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
 	int number_tiles = hweight_long(vma->tile_present);
 	int err;
@@ -1654,7 +1654,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 	struct dma_fence *fence;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
-	struct xe_vm *vm = vma->vm;
+	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
 	int number_tiles = hweight_long(vma->tile_mask);
 	int err;
@@ -1840,7 +1840,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	struct dma_fence *fence;
 
 	xe_vm_assert_held(vm);
-	xe_bo_assert_held(vma->bo);
+	xe_bo_assert_held(xe_vma_bo(vma));
 
 	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
 	if (IS_ERR(fence))
@@ -2078,13 +2078,13 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
 
 	if (!xe_vma_has_no_bo(vma)) {
-		err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
+		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
 		if (err)
 			return err;
 	}
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
-		return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
+		return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs,
 				  afence);
 	} else {
 		int i;
@@ -2180,7 +2180,7 @@ static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
 
 	xe_vm_tv_populate(vm, &tv_vm);
 	list_add_tail(&tv_vm.head, &objs);
-	vbo = vma->bo;
+	vbo = xe_vma_bo(vma);
 	if (vbo) {
 		/*
 		 * An unbind can drop the last reference to the BO and
@@ -2540,7 +2540,7 @@ static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
 		} else {
 			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
 				XE_VM_BIND_OP_MAP;
-			xe_bo_get(__vma->bo);
+			xe_bo_get(xe_vma_bo(__vma));
 		}
 
 		if (!last) {
@@ -2550,7 +2550,7 @@ static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
 		}
 
 		err = __vm_bind_ioctl_async(vm, __vma, e,
-					    __vma->bo, bind_op, last ?
+					    xe_vma_bo(__vma), bind_op, last ?
 					    out_syncs : NULL,
 					    last ? num_out_syncs : 0);
 		if (err) {
@@ -2597,8 +2597,8 @@ static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
 	case XE_VM_BIND_OP_PREFETCH:
 		vma = xe_vm_find_overlapping_vma(vm, &lookup);
 		if (XE_IOCTL_ERR(xe, !vma) ||
-		    XE_IOCTL_ERR(xe, (vma->start != addr ||
-				 vma->end != addr + range - 1) && !async))
+		    XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr ||
+				 xe_vma_end(vma) != addr + range) && !async))
 			return -EINVAL;
 		break;
 	case XE_VM_BIND_OP_UNMAP_ALL:
@@ -2623,9 +2623,9 @@ static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
 	int err;
 
-	if (vma->bo && !vma->bo->vm) {
+	if (xe_vma_bo(vma) && !xe_vma_bo(vma)->vm) {
 		vm_insert_extobj(vm, vma);
-		err = add_preempt_fences(vm, vma->bo);
+		err = add_preempt_fences(vm, xe_vma_bo(vma));
 		if (err)
 			return err;
 	}
@@ -2674,25 +2674,25 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 		}
 	}
 
-	if (first->start != lookup->start) {
+	if (xe_vma_start(first) != xe_vma_start(lookup)) {
 		struct ww_acquire_ctx ww;
 
-		if (first->bo)
-			err = xe_bo_lock(first->bo, &ww, 0, true);
+		if (xe_vma_bo(first))
+			err = xe_bo_lock(xe_vma_bo(first), &ww, 0, true);
 		if (err)
 			goto unwind;
-		new_first = xe_vma_create(first->vm, first->bo,
-					  first->bo ? first->bo_offset :
-					  first->userptr.ptr,
-					  first->start,
-					  lookup->start - 1,
-					  (first->pte_flags &
-					   XE_PTE_FLAG_READ_ONLY),
+		new_first = xe_vma_create(xe_vma_vm(first), xe_vma_bo(first),
+					  xe_vma_bo(first) ?
+					  xe_vma_bo_offset(first) :
+					  xe_vma_userptr(first),
+					  xe_vma_start(first),
+					  xe_vma_start(lookup) - 1,
+					  xe_vma_read_only(first),
 					  (first->pte_flags &
 					   XE_PTE_FLAG_NULL),
 					  first->tile_mask);
-		if (first->bo)
-			xe_bo_unlock(first->bo, &ww);
+		if (xe_vma_bo(first))
+			xe_bo_unlock(xe_vma_bo(first), &ww);
 		if (!new_first) {
 			err = -ENOMEM;
 			goto unwind;
@@ -2707,25 +2707,25 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
 			goto unwind;
 	}
 
-	if (last->end != lookup->end) {
+	if (xe_vma_end(last) != xe_vma_end(lookup)) {
 		struct ww_acquire_ctx ww;
-		u64 chunk = lookup->end + 1 - last->start;
+		u64 chunk = xe_vma_end(lookup) - xe_vma_start(last);
 
-		if (last->bo)
-			err = xe_bo_lock(last->bo, &ww, 0, true);
+		if (xe_vma_bo(last))
+			err = xe_bo_lock(xe_vma_bo(last), &ww, 0, true);
 		if (err)
 			goto unwind;
-		new_last = xe_vma_create(last->vm, last->bo,
-					 last->bo ? last->bo_offset + chunk :
-					 last->userptr.ptr + chunk,
-					 last->start + chunk,
-					 last->end,
-					 (last->pte_flags &
-					  XE_PTE_FLAG_READ_ONLY),
+		new_last = xe_vma_create(xe_vma_vm(last), xe_vma_bo(last),
+					 xe_vma_bo(last) ?
+					 xe_vma_bo_offset(last) + chunk :
+					 xe_vma_userptr(last) + chunk,
+					 xe_vma_start(last) + chunk,
+					 xe_vma_end(last) - 1,
+					 xe_vma_read_only(last),
 					 (last->pte_flags & XE_PTE_FLAG_NULL),
 					 last->tile_mask);
-		if (last->bo)
-			xe_bo_unlock(last->bo, &ww);
+		if (xe_vma_bo(last))
+			xe_bo_unlock(xe_vma_bo(last), &ww);
 		if (!new_last) {
 			err = -ENOMEM;
 			goto unwind;
@@ -2791,7 +2791,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
 	struct rb_node *node;
 
 	if (!xe_vma_has_no_bo(vma)) {
-		if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
+		if (!xe_bo_can_migrate(xe_vma_bo(vma), region_to_mem_type[region]))
 			return ERR_PTR(-EINVAL);
 	}
 
@@ -2800,7 +2800,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
 		if (!xe_vma_cmp_vma_cb(lookup, node)) {
 			__vma = to_xe_vma(node);
 			if (!xe_vma_has_no_bo(__vma)) {
-				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+				if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region]))
 					goto flush_list;
 			}
 			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
@@ -2814,7 +2814,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
 		if (!xe_vma_cmp_vma_cb(lookup, node)) {
 			__vma = to_xe_vma(node);
 			if (!xe_vma_has_no_bo(__vma)) {
-				if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+				if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region]))
 					goto flush_list;
 			}
 			list_add(&__vma->unbind_link, &vma->unbind_link);
@@ -2842,7 +2842,7 @@ static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
 	xe_bo_assert_held(bo);
 
 	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		if (vma->vm != vm)
+		if (xe_vma_vm(vma) != vm)
 			continue;
 
 		prep_vma_destroy(vm, vma);
@@ -3436,14 +3436,14 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
  */
 int xe_vm_invalidate_vma(struct xe_vma *vma)
 {
-	struct xe_device *xe = vma->vm->xe;
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
 	struct xe_tile *tile;
 	u32 tile_needs_invalidate = 0;
 	int seqno[XE_MAX_TILES_PER_DEVICE];
 	u8 id;
 	int ret;
 
-	XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
+	XE_BUG_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
 	XE_WARN_ON(xe_vma_is_null(vma));
 	trace_xe_vma_usm_invalidate(vma);
 
@@ -3453,11 +3453,11 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 			WARN_ON_ONCE(!mmu_interval_check_retry
 				     (&vma->userptr.notifier,
 				      vma->userptr.notifier_seq));
-			WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv,
+			WARN_ON_ONCE(!dma_resv_test_signaled(&xe_vma_vm(vma)->resv,
 							     DMA_RESV_USAGE_BOOKKEEP));
 
 		} else {
-			xe_bo_assert_held(vma->bo);
+			xe_bo_assert_held(xe_vma_bo(vma));
 		}
 	}
 
@@ -3522,10 +3522,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 				addr = 0;
 			}
 		} else {
-			addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
+			addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE, &is_vram);
 		}
 		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
-			   vma->start, vma->end, vma->end - vma->start + 1ull,
+			   xe_vma_start(vma), xe_vma_end(vma) - 1,
+			   xe_vma_size(vma),
 			   addr, is_null ? "NULL" : is_userptr ? "USR" :
 			   is_vram ? "VRAM" : "SYS");
 	}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 02b409dd77d54..644a8aa604e81 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -61,7 +61,66 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
 }
 
 struct xe_vma *
-xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma);
+xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma);
+
+/**
+ * DOC: Provide accessors for vma members to facilitate easy change of
+ * implementation.
+ */
+static inline u64 xe_vma_start(struct xe_vma *vma)
+{
+	return vma->start;
+}
+
+static inline u64 xe_vma_size(struct xe_vma *vma)
+{
+	return vma->end - vma->start + 1;
+}
+
+static inline u64 xe_vma_end(struct xe_vma *vma)
+{
+	return xe_vma_start(vma) + xe_vma_size(vma);
+}
+
+static inline u64 xe_vma_bo_offset(struct xe_vma *vma)
+{
+	return vma->bo_offset;
+}
+
+static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma)
+{
+	return vma->bo;
+}
+
+static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma)
+{
+	return vma->vm;
+}
+
+static inline bool xe_vma_read_only(struct xe_vma *vma)
+{
+	return vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
+}
+
+static inline u64 xe_vma_userptr(struct xe_vma *vma)
+{
+	return vma->userptr.ptr;
+}
+
+static inline bool xe_vma_is_null(struct xe_vma *vma)
+{
+	return vma->pte_flags & XE_PTE_FLAG_NULL;
+}
+
+static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
+{
+	return !xe_vma_bo(vma);
+}
+
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
+}
 
 #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
 
@@ -126,21 +185,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
 	}
 }
 
-static inline bool xe_vma_is_null(struct xe_vma *vma)
-{
-	return vma->pte_flags & XE_PTE_FLAG_NULL;
-}
-
-static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
-{
-	return !vma->bo;
-}
-
-static inline bool xe_vma_is_userptr(struct xe_vma *vma)
-{
-	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
-}
-
 int xe_vma_userptr_pin_pages(struct xe_vma *vma);
 
 int xe_vma_userptr_check_repin(struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 670c80c1f0a33..0e8d0c513ee96 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -30,7 +30,7 @@ static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
 		struct xe_bo *bo;
 		struct ww_acquire_ctx ww;
 
-		bo = vmas[i]->bo;
+		bo = xe_vma_bo(vmas[i]);
 
 		err = xe_bo_lock(bo, &ww, 0, true);
 		if (err)
@@ -55,7 +55,7 @@ static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
 		struct xe_bo *bo;
 		struct ww_acquire_ctx ww;
 
-		bo = vmas[i]->bo;
+		bo = xe_vma_bo(vmas[i]);
 
 		err = xe_bo_lock(bo, &ww, 0, true);
 		if (err)
@@ -91,7 +91,7 @@ static int madvise_preferred_mem_class_gt(struct xe_device *xe,
 		struct xe_bo *bo;
 		struct ww_acquire_ctx ww;
 
-		bo = vmas[i]->bo;
+		bo = xe_vma_bo(vmas[i]);
 
 		err = xe_bo_lock(bo, &ww, 0, true);
 		if (err)
@@ -114,7 +114,7 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
 		struct xe_bo *bo;
 		struct ww_acquire_ctx ww;
 
-		bo = vmas[i]->bo;
+		bo = xe_vma_bo(vmas[i]);
 		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
 			return -EINVAL;
 
@@ -145,7 +145,7 @@ static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
 		struct xe_bo *bo;
 		struct ww_acquire_ctx ww;
 
-		bo = vmas[i]->bo;
+		bo = xe_vma_bo(vmas[i]);
 		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
 				 !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
 			return -EINVAL;
@@ -176,7 +176,7 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
 		struct xe_bo *bo;
 		struct ww_acquire_ctx ww;
 
-		bo = vmas[i]->bo;
+		bo = xe_vma_bo(vmas[i]);
 
 		err = xe_bo_lock(bo, &ww, 0, true);
 		if (err)
-- 
GitLab


From 5cecdd0bb6bf4b8979b7d071017560daecfc9200 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 26 Jun 2023 14:55:37 -0700
Subject: [PATCH 1701/2340] drm/xe: Remove __xe_vm_bind forward declaration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not needed so remove it.

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index cec96beef3344..825abc3c5a456 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -696,11 +696,6 @@ out_unlock_outer:
 	trace_xe_vm_rebind_worker_exit(vm);
 }
 
-struct async_op_fence;
-static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_engine *e, struct xe_sync_entry *syncs,
-			u32 num_syncs, struct async_op_fence *afence);
-
 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 				   const struct mmu_notifier_range *range,
 				   unsigned long cur_seq)
-- 
GitLab


From b06d47be7c83165d3b3e45e1d5f9520b79c7f5cc Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 7 Jul 2023 22:23:57 -0700
Subject: [PATCH 1702/2340] drm/xe: Port Xe to GPUVA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than open coding VM binds and VMA tracking, use the GPUVA
library. GPUVA provides a common infrastructure for VM binds to use mmap
/ munmap semantics and support for VK sparse bindings.

The concepts are:

1) xe_vm inherits from drm_gpuva_manager
2) xe_vma inherits from drm_gpuva
3) xe_vma_op inherits from drm_gpuva_op
4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the
GPUVA code to generate an VMA operations list which is parsed, committed,
and executed.

v2 (CI): Add break after default in case statement.
v3: Rebase
v4: Fix some error handling
v5: Use unlocked version VMA in error paths
v6: Rebase, address some review feedback mainly Thomas H
v7: Fix compile error in xe_vma_op_unwind, address checkpatch

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig            |    1 +
 drivers/gpu/drm/xe/tests/xe_migrate.c |    2 +-
 drivers/gpu/drm/xe/xe_bo.c            |   31 +-
 drivers/gpu/drm/xe/xe_bo.h            |   11 +-
 drivers/gpu/drm/xe/xe_device.c        |    2 +-
 drivers/gpu/drm/xe/xe_exec.c          |    4 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c  |   20 +-
 drivers/gpu/drm/xe/xe_migrate.c       |   14 +-
 drivers/gpu/drm/xe/xe_pt.c            |   46 +-
 drivers/gpu/drm/xe/xe_pt.h            |    2 +-
 drivers/gpu/drm/xe/xe_vm.c            | 1905 ++++++++++++-------------
 drivers/gpu/drm/xe/xe_vm.h            |   59 +-
 drivers/gpu/drm/xe/xe_vm_madvise.c    |   76 +-
 drivers/gpu/drm/xe/xe_vm_types.h      |  179 ++-
 14 files changed, 1175 insertions(+), 1177 deletions(-)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 0a4854a59c90f..2a595bc92ca4b 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -21,6 +21,7 @@ config DRM_XE
 	select VMAP_PFN
 	select DRM_TTM
 	select DRM_TTM_HELPER
+	select DRM_GPUVM
 	select DRM_SCHED
 	select MMU_NOTIFIER
 	select WANT_DEV_COREDUMP
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 4c79c1dfa772a..aedfb3dd559ea 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -300,7 +300,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
-	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
+	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0);
 	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index fb351c36cdc2f..00b8b5e7f197c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -412,7 +412,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
-	struct xe_vma *vma;
+	struct drm_gpuva *gpuva;
+	struct drm_gem_object *obj = &bo->ttm.base;
+	struct drm_gpuvm_bo *vm_bo;
 	int ret = 0;
 
 	dma_resv_assert_held(bo->ttm.base.resv);
@@ -425,10 +427,12 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 		dma_resv_iter_end(&cursor);
 	}
 
-	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		struct xe_vm *vm = xe_vma_vm(vma);
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
+			struct xe_vma *vma = gpuva_to_vma(gpuva);
+			struct xe_vm *vm = xe_vma_vm(vma);
 
-		trace_xe_vma_evict(vma);
+			trace_xe_vma_evict(vma);
 
 		if (xe_vm_in_fault_mode(vm)) {
 			/* Wait for pending binds / unbinds. */
@@ -454,7 +458,6 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 
 		} else {
 			bool vm_resv_locked = false;
-			struct xe_vm *vm = xe_vma_vm(vma);
 
 			/*
 			 * We need to put the vma on the vm's rebind_list,
@@ -462,9 +465,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 			 * that we indeed have it locked, put the vma an the
 			 * vm's notifier.rebind_list instead and scoop later.
 			 */
-			if (dma_resv_trylock(&vm->resv))
+			if (dma_resv_trylock(xe_vm_resv(vm)))
 				vm_resv_locked = true;
-			else if (ctx->resv != &vm->resv) {
+			else if (ctx->resv != xe_vm_resv(vm)) {
 				spin_lock(&vm->notifier.list_lock);
 				list_move_tail(&vma->notifier.rebind_link,
 					       &vm->notifier.rebind_list);
@@ -477,7 +480,8 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 				list_add_tail(&vma->rebind_link, &vm->rebind_list);
 
 			if (vm_resv_locked)
-				dma_resv_unlock(&vm->resv);
+				dma_resv_unlock(xe_vm_resv(vm));
+		}
 		}
 	}
 
@@ -1285,7 +1289,7 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		}
 	}
 
-	bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
+	bo = __xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
 				   vm && !xe_vm_in_fault_mode(vm) &&
 				   flags & XE_BO_CREATE_USER_BIT ?
 				   &vm->lru_bulk_move : NULL, size,
@@ -1293,6 +1297,13 @@ xe_bo_create_locked_range(struct xe_device *xe,
 	if (IS_ERR(bo))
 		return bo;
 
+	/*
+	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
+	 * to ensure the shared resv doesn't disappear under the bo, the bo
+	 * will keep a reference to the vm, and avoid circular references
+	 * by having all the vm's bo refereferences released at vm close
+	 * time.
+	 */
 	if (vm && xe_bo_is_user(bo))
 		xe_vm_get(vm);
 	bo->vm = vm;
@@ -1600,7 +1611,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
 		xe_vm_assert_held(vm);
 
 		ctx.allow_res_evict = allow_res_evict;
-		ctx.resv = &vm->resv;
+		ctx.resv = xe_vm_resv(vm);
 	}
 
 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 08ca1d06bf772..53a82ff7bce25 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -9,6 +9,15 @@
 #include "xe_bo_types.h"
 #include "xe_macros.h"
 #include "xe_vm_types.h"
+#include "xe_vm.h"
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
+
 
 #define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
 
@@ -149,7 +158,7 @@ void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
 static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
 {
 	if (bo) {
-		XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv);
+		XE_BUG_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm));
 		if (bo->vm)
 			xe_vm_assert_held(bo->vm);
 		else
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index a4fc5bc54d02d..6249eef752c58 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -134,7 +134,7 @@ static struct drm_driver driver = {
 	.driver_features =
 	    DRIVER_GEM |
 	    DRIVER_RENDER | DRIVER_SYNCOBJ |
-	    DRIVER_SYNCOBJ_TIMELINE,
+	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
 	.open = xe_file_open,
 	.postclose = xe_file_close,
 
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index ba13d20ed348a..07f4b2e8df16d 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -350,7 +350,7 @@ retry:
 	/* Wait behind munmap style rebinds */
 	if (!xe_vm_no_dma_fences(vm)) {
 		err = drm_sched_job_add_resv_dependencies(&job->drm,
-							  &vm->resv,
+							  xe_vm_resv(vm),
 							  DMA_RESV_USAGE_KERNEL);
 		if (err)
 			goto err_put_job;
@@ -378,7 +378,7 @@ retry:
 	xe_sched_job_arm(job);
 	if (!xe_vm_no_dma_fences(vm)) {
 		/* Block userptr invalidations / BO eviction */
-		dma_resv_add_fence(&vm->resv,
+		dma_resv_add_fence(xe_vm_resv(vm),
 				   &job->drm.s_fence->finished,
 				   DMA_RESV_USAGE_BOOKKEEP);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 4d0f402cc6305..d8ff05e25eda5 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -75,10 +75,10 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
 		!(BIT(tile->id) & vma->usm.tile_invalidated);
 }
 
-static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
+static bool vma_matches(struct xe_vma *vma, u64 page_addr)
 {
-	if (xe_vma_start(lookup) > xe_vma_end(vma) - 1 ||
-	    xe_vma_end(lookup) - 1 < xe_vma_start(vma))
+	if (page_addr > xe_vma_end(vma) - 1 ||
+	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
 		return false;
 
 	return true;
@@ -91,16 +91,14 @@ static bool only_needs_bo_lock(struct xe_bo *bo)
 
 static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
 {
-	struct xe_vma *vma = NULL, lookup;
+	struct xe_vma *vma = NULL;
 
-	lookup.start = page_addr;
-	lookup.end = lookup.start + SZ_4K - 1;
 	if (vm->usm.last_fault_vma) {   /* Fast lookup */
-		if (vma_matches(vm->usm.last_fault_vma, &lookup))
+		if (vma_matches(vm->usm.last_fault_vma, page_addr))
 			vma = vm->usm.last_fault_vma;
 	}
 	if (!vma)
-		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
 
 	return vma;
 }
@@ -489,12 +487,8 @@ static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
 {
 	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
 		sub_granularity_in_byte(acc->granularity);
-	struct xe_vma lookup;
-
-	lookup.start = page_va;
-	lookup.end = lookup.start + SZ_4K - 1;
 
-	return xe_vm_find_overlapping_vma(vm, &lookup);
+	return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
 }
 
 static int handle_acc(struct xe_gt *gt, struct acc *acc)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index be98690f2bc9b..f05335b16a1a8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -197,7 +197,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
 		entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
-				      XE_CACHE_WB, 0, 0);
+				      XE_CACHE_WB, 0);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
 
@@ -216,7 +216,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		     i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
 			entry = xe_pte_encode(NULL, batch, i,
-					      XE_CACHE_WB, 0, 0);
+					      XE_CACHE_WB, 0);
 
 			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
 				  entry);
@@ -1068,7 +1068,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 					  DMA_RESV_USAGE_KERNEL))
 		return ERR_PTR(-ETIME);
 
-	if (wait_vm && !dma_resv_test_signaled(&vm->resv,
+	if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm),
 					       DMA_RESV_USAGE_BOOKKEEP))
 		return ERR_PTR(-ETIME);
 
@@ -1159,7 +1159,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u64 addr;
 	int err = 0;
 	bool usm = !eng && xe->info.supports_usm;
-	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
+	bool first_munmap_rebind = vma &&
+		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
 	struct xe_engine *eng_override = !eng ? m->eng : eng;
 
 	/* Use the CPU if no in syncs and engine is idle */
@@ -1232,8 +1233,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 			BUG_ON(pt_bo->size != SZ_4K);
 
-			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
-					     0, 0);
+			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 		}
@@ -1281,7 +1281,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 * trigger preempts before moving forward
 	 */
 	if (first_munmap_rebind) {
-		err = job_add_deps(job, &vm->resv,
+		err = job_add_deps(job, xe_vm_resv(vm),
 				   DMA_RESV_USAGE_BOOKKEEP);
 		if (err)
 			goto err_job;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index a697d43ec293a..030fd911d1898 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -100,15 +100,15 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 	}
 }
 
-static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
-			u32 pt_level)
+static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
+			struct xe_vma *vma, u32 pt_level)
 {
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
 
-	if (unlikely(flags & XE_PTE_FLAG_READ_ONLY))
+	if (unlikely(vma && xe_vma_read_only(vma)))
 		pte &= ~XE_PAGE_RW;
 
-	if (unlikely(flags & XE_PTE_FLAG_NULL))
+	if (unlikely(vma && xe_vma_is_null(vma)))
 		pte |= XE_PTE_NULL;
 
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
@@ -142,7 +142,6 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
  * @bo: If @vma is NULL, representing the memory to point to.
  * @offset: The offset into @vma or @bo.
  * @cache: The cache level indicating
- * @flags: Currently only supports PTE_READ_ONLY for read-only access.
  * @pt_level: The page-table level of the page-table into which the entry
  * is to be inserted.
  *
@@ -150,7 +149,7 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
  */
 u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 		  u64 offset, enum xe_cache_level cache,
-		  u32 flags, u32 pt_level)
+		  u32 pt_level)
 {
 	u64 pte;
 	bool is_vram;
@@ -162,11 +161,11 @@ u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 
 	if (is_vram) {
 		pte |= XE_PPGTT_PTE_LM;
-		if (vma && vma->use_atomic_access_pte_bit)
+		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
 			pte |= XE_USM_PPGTT_PTE_AE;
 	}
 
-	return __pte_encode(pte, cache, flags, pt_level);
+	return __pte_encode(pte, cache, vma, pt_level);
 }
 
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -179,7 +178,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 
 	if (level == 0) {
 		u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0,
-					  XE_CACHE_WB, 0, 0);
+					  XE_CACHE_WB, 0);
 
 		return empty;
 	} else {
@@ -424,10 +423,9 @@ struct xe_pt_stage_bind_walk {
 	 */
 	bool needs_64K;
 	/**
-	 * @pte_flags: Flags determining PTE setup. These are not flags
-	 * encoded directly in the PTE. See @default_pte for those.
+	 * @vma: VMA being mapped
 	 */
-	u32 pte_flags;
+	struct xe_vma *vma;
 
 	/* Also input, but is updated during the walk*/
 	/** @curs: The DMA address cursor. */
@@ -564,7 +562,7 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
 		return false;
 
 	/* null VMA's do not have dma addresses */
-	if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
+	if (xe_vma_is_null(xe_walk->vma))
 		return true;
 
 	/* Is the DMA address huge PTE size aligned? */
@@ -590,7 +588,7 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
 		return false;
 
 	/* null VMA's do not have dma addresses */
-	if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
+	if (xe_vma_is_null(xe_walk->vma))
 		return true;
 
 	xe_res_next(&curs, addr - xe_walk->va_curs_start);
@@ -643,14 +641,13 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	/* Is this a leaf entry ?*/
 	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
 		struct xe_res_cursor *curs = xe_walk->curs;
-		bool is_null = xe_walk->pte_flags & XE_PTE_FLAG_NULL;
+		bool is_null = xe_vma_is_null(xe_walk->vma);
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
 		pte = __pte_encode(is_null ? 0 :
 				   xe_res_dma(curs) + xe_walk->dma_offset,
-				   xe_walk->cache, xe_walk->pte_flags,
-				   level);
+				   xe_walk->cache, xe_walk->vma, level);
 		pte |= xe_walk->default_pte;
 
 		/*
@@ -762,7 +759,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.tile = tile,
 		.curs = &curs,
 		.va_curs_start = xe_vma_start(vma),
-		.pte_flags = vma->pte_flags,
+		.vma = vma,
 		.wupd.entries = entries,
 		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram,
 	};
@@ -771,7 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 
 	if (is_vram) {
 		xe_walk.default_pte = XE_PPGTT_PTE_LM;
-		if (vma && vma->use_atomic_access_pte_bit)
+		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
 			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
 		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
 		xe_walk.cache = XE_CACHE_WB;
@@ -990,7 +987,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
 	else if (!xe_vma_is_null(vma))
 		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
 
-	dma_resv_assert_held(&vm->resv);
+	xe_vm_assert_held(vm);
 }
 
 static void xe_pt_commit_bind(struct xe_vma *vma,
@@ -1343,6 +1340,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 					   syncs, num_syncs,
 					   &bind_pt_update.base);
 	if (!IS_ERR(fence)) {
+		bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
 		LLIST_HEAD(deferred);
 
 		/* TLB invalidation must be done before signaling rebind */
@@ -1358,8 +1356,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		}
 
 		/* add shared fence now for pagetable delayed destroy */
-		dma_resv_add_fence(&vm->resv, fence, !rebind &&
-				   vma->last_munmap_rebind ?
+		dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+				   last_munmap_rebind ?
 				   DMA_RESV_USAGE_KERNEL :
 				   DMA_RESV_USAGE_BOOKKEEP);
 
@@ -1377,7 +1375,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 			up_read(&vm->userptr.notifier_lock);
 			xe_bo_put_commit(&deferred);
 		}
-		if (!rebind && vma->last_munmap_rebind &&
+		if (!rebind && last_munmap_rebind &&
 		    xe_vm_in_compute_mode(vm))
 			queue_work(vm->xe->ordered_wq,
 				   &vm->preempt.rebind_work);
@@ -1676,7 +1674,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 		fence = &ifence->base.base;
 
 		/* add shared fence now for pagetable delayed destroy */
-		dma_resv_add_fence(&vm->resv, fence,
+		dma_resv_add_fence(xe_vm_resv(vm), fence,
 				   DMA_RESV_USAGE_BOOKKEEP);
 
 		/* This fence will be installed by caller when doing eviction */
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 54e8a043d353d..aaf4b7b851e28 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -50,5 +50,5 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
 
 u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 		  u64 offset, enum xe_cache_level cache,
-		  u32 flags, u32 pt_level);
+		  u32 pt_level);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 825abc3c5a456..297b7977ed87d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -65,7 +65,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
 	lockdep_assert_held(&vm->lock);
 	XE_BUG_ON(!xe_vma_is_userptr(vma));
 retry:
-	if (vma->destroyed)
+	if (vma->gpuva.flags & XE_VMA_DESTROYED)
 		return 0;
 
 	notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
@@ -312,7 +312,7 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
 	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
 		e->ops->resume(e);
 
-		dma_resv_add_fence(&vm->resv, e->compute.pfence,
+		dma_resv_add_fence(xe_vm_resv(vm), e->compute.pfence,
 				   DMA_RESV_USAGE_BOOKKEEP);
 		xe_vm_fence_all_extobjs(vm, e->compute.pfence,
 					DMA_RESV_USAGE_BOOKKEEP);
@@ -350,7 +350,7 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
 
 	down_read(&vm->userptr.notifier_lock);
 
-	dma_resv_add_fence(&vm->resv, pfence,
+	dma_resv_add_fence(xe_vm_resv(vm), pfence,
 			   DMA_RESV_USAGE_BOOKKEEP);
 
 	xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
@@ -466,7 +466,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 		xe_bo_assert_held(xe_vma_bo(vma));
 
 		list_del_init(&vma->notifier.rebind_link);
-		if (vma->tile_present && !vma->destroyed)
+		if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
 			list_move_tail(&vma->rebind_link, &vm->rebind_list);
 	}
 	spin_unlock(&vm->notifier.list_lock);
@@ -609,7 +609,8 @@ retry:
 		goto out_unlock;
 
 	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
-		if (xe_vma_has_no_bo(vma) || vma->destroyed)
+		if (xe_vma_has_no_bo(vma) ||
+		    vma->gpuva.flags & XE_VMA_DESTROYED)
 			continue;
 
 		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
@@ -629,7 +630,7 @@ retry:
 	}
 
 	/* Wait on munmap style VM unbinds */
-	wait = dma_resv_wait_timeout(&vm->resv,
+	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
 				     DMA_RESV_USAGE_KERNEL,
 				     false, MAX_SCHEDULE_TIMEOUT);
 	if (wait <= 0) {
@@ -725,7 +726,8 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	 * Tell exec and rebind worker they need to repin and rebind this
 	 * userptr.
 	 */
-	if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->tile_present) {
+	if (!xe_vm_in_fault_mode(vm) &&
+	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
 		spin_lock(&vm->userptr.invalidated_lock);
 		list_move_tail(&vma->userptr.invalidate_link,
 			       &vm->userptr.invalidated);
@@ -740,13 +742,13 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	 * unbinds to complete, and those are attached as BOOKMARK fences
 	 * to the vm.
 	 */
-	dma_resv_iter_begin(&cursor, &vm->resv,
+	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
 			    DMA_RESV_USAGE_BOOKKEEP);
 	dma_resv_for_each_fence_unlocked(&cursor, fence)
 		dma_fence_enable_sw_signaling(fence);
 	dma_resv_iter_end(&cursor);
 
-	err = dma_resv_wait_timeout(&vm->resv,
+	err = dma_resv_wait_timeout(xe_vm_resv(vm),
 				    DMA_RESV_USAGE_BOOKKEEP,
 				    false, MAX_SCHEDULE_TIMEOUT);
 	XE_WARN_ON(err <= 0);
@@ -792,7 +794,7 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 	}
 
 	/* Take lock and move to rebind_list for rebinding. */
-	err = dma_resv_lock_interruptible(&vm->resv, NULL);
+	err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
 	if (err)
 		goto out_err;
 
@@ -801,7 +803,7 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 		list_move_tail(&vma->rebind_link, &vm->rebind_list);
 	}
 
-	dma_resv_unlock(&vm->resv);
+	dma_resv_unlock(xe_vm_resv(vm));
 
 	return 0;
 
@@ -830,7 +832,8 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
 
 static struct dma_fence *
 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
-	       struct xe_sync_entry *syncs, u32 num_syncs);
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op);
 
 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 {
@@ -851,7 +854,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 			trace_xe_vma_rebind_worker(vma);
 		else
 			trace_xe_vma_rebind_exec(vma);
-		fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
+		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
 		if (IS_ERR(fence))
 			return fence;
 	}
@@ -887,14 +890,14 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	INIT_LIST_HEAD(&vma->notifier.rebind_link);
 	INIT_LIST_HEAD(&vma->extobj.link);
 
-	vma->vm = vm;
-	vma->start = start;
-	vma->end = end;
-	vma->pte_flags = 0;
+	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
+	vma->gpuva.vm = &vm->gpuvm;
+	vma->gpuva.va.addr = start;
+	vma->gpuva.va.range = end - start + 1;
 	if (read_only)
-		vma->pte_flags |= XE_PTE_FLAG_READ_ONLY;
+		vma->gpuva.flags |= XE_VMA_READ_ONLY;
 	if (is_null)
-		vma->pte_flags |= XE_PTE_FLAG_NULL;
+		vma->gpuva.flags |= DRM_GPUVA_SPARSE;
 
 	if (tile_mask) {
 		vma->tile_mask = tile_mask;
@@ -904,19 +907,30 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	}
 
 	if (vm->xe->info.platform == XE_PVC)
-		vma->use_atomic_access_pte_bit = true;
+		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
 
 	if (bo) {
+		struct drm_gpuvm_bo *vm_bo;
+
 		xe_bo_assert_held(bo);
-		vma->bo_offset = bo_offset_or_userptr;
-		vma->bo = xe_bo_get(bo);
-		list_add_tail(&vma->bo_link, &bo->vmas);
+
+		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
+		if (IS_ERR(vm_bo)) {
+			kfree(vma);
+			return ERR_CAST(vm_bo);
+		}
+
+		drm_gem_object_get(&bo->ttm.base);
+		vma->gpuva.gem.obj = &bo->ttm.base;
+		vma->gpuva.gem.offset = bo_offset_or_userptr;
+		drm_gpuva_link(&vma->gpuva, vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
 	} else /* userptr or null */ {
 		if (!is_null) {
 			u64 size = end - start + 1;
 			int err;
 
-			vma->userptr.ptr = bo_offset_or_userptr;
+			vma->gpuva.gem.offset = bo_offset_or_userptr;
 
 			err = mmu_interval_notifier_insert(&vma->userptr.notifier,
 							   current->mm,
@@ -991,11 +1005,19 @@ static struct xe_vma *
 bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
 			    struct xe_vma *ignore)
 {
-	struct xe_vma *vma;
+	struct drm_gpuvm_bo *vm_bo;
+	struct drm_gpuva *va;
+	struct drm_gem_object *obj = &bo->ttm.base;
+
+	xe_bo_assert_held(bo);
+
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		drm_gpuvm_bo_for_each_va(va, vm_bo) {
+			struct xe_vma *vma = gpuva_to_vma(va);
 
-	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		if (vma != ignore && xe_vma_vm(vma) == vm)
-			return vma;
+			if (vma != ignore && xe_vma_vm(vma) == vm)
+				return vma;
+		}
 	}
 
 	return NULL;
@@ -1016,6 +1038,8 @@ static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
 
 static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
 {
+	lockdep_assert_held_write(&vm->lock);
+
 	list_add(&vma->extobj.link, &vm->extobj.list);
 	vm->extobj.entries++;
 }
@@ -1049,19 +1073,21 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	XE_BUG_ON(!list_empty(&vma->unbind_link));
 
 	if (xe_vma_is_userptr(vma)) {
-		XE_WARN_ON(!vma->destroyed);
+		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
+
 		spin_lock(&vm->userptr.invalidated_lock);
 		list_del_init(&vma->userptr.invalidate_link);
 		spin_unlock(&vm->userptr.invalidated_lock);
 		list_del(&vma->userptr_link);
 	} else if (!xe_vma_is_null(vma)) {
 		xe_bo_assert_held(xe_vma_bo(vma));
-		list_del(&vma->bo_link);
 
 		spin_lock(&vm->notifier.list_lock);
 		list_del(&vma->notifier.rebind_link);
 		spin_unlock(&vm->notifier.list_lock);
 
+		drm_gpuva_unlink(&vma->gpuva);
+
 		if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
 			struct xe_vma *other;
 
@@ -1116,65 +1142,34 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
 		xe_bo_put(bo);
 }
 
-static struct xe_vma *to_xe_vma(const struct rb_node *node)
-{
-	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
-	return (struct xe_vma *)node;
-}
-
-static int xe_vma_cmp(struct xe_vma *a, struct xe_vma *b)
-{
-	if (xe_vma_end(a) - 1 < xe_vma_start(b)) {
-		return -1;
-	} else if (xe_vma_end(b) - 1 < xe_vma_start(a)) {
-		return 1;
-	} else {
-		return 0;
-	}
-}
-
-static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
-{
-	return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
-}
-
-int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
-{
-	struct xe_vma *cmp = to_xe_vma(node);
-	struct xe_vma *own = (struct xe_vma *)key;
-
-	if (xe_vma_start(own) > xe_vma_end(cmp) - 1)
-		return 1;
-
-	if (xe_vma_end(own) - 1 < xe_vma_start(cmp))
-		return -1;
-
-	return 0;
-}
-
 struct xe_vma *
-xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma)
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
 {
-	struct rb_node *node;
+	struct drm_gpuva *gpuva;
 
 	lockdep_assert_held(&vm->lock);
 
 	if (xe_vm_is_closed_or_banned(vm))
 		return NULL;
 
-	XE_BUG_ON(xe_vma_end(vma) > vm->size);
+	XE_BUG_ON(start + range > vm->size);
 
-	node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
+	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
 
-	return node ? to_xe_vma(node) : NULL;
+	return gpuva ? gpuva_to_vma(gpuva) : NULL;
 }
 
-static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
+	int err;
+
 	XE_BUG_ON(xe_vma_vm(vma) != vm);
 	lockdep_assert_held(&vm->lock);
 
-	rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
+	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+	XE_WARN_ON(err);	/* Shouldn't be possible */
+
+	return err;
 }
 
 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
@@ -1182,18 +1177,38 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
 	XE_BUG_ON(xe_vma_vm(vma) != vm);
 	lockdep_assert_held(&vm->lock);
 
-	rb_erase(&vma->vm_node, &vm->vmas);
+	drm_gpuva_remove(&vma->gpuva);
 	if (vm->usm.last_fault_vma == vma)
 		vm->usm.last_fault_vma = NULL;
 }
 
-static void async_op_work_func(struct work_struct *w);
+static struct drm_gpuva_op *xe_vm_op_alloc(void)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+
+	if (unlikely(!op))
+		return NULL;
+
+	return &op->base;
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm);
+
+static struct drm_gpuvm_ops gpuvm_ops = {
+	.op_alloc = xe_vm_op_alloc,
+	.vm_free = xe_vm_free,
+};
+
+static void xe_vma_op_work_func(struct work_struct *w);
 static void vm_destroy_work_func(struct work_struct *w);
 
 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 {
+	struct drm_gem_object *vm_resv_obj;
 	struct xe_vm *vm;
-	int err, i = 0, number_tiles = 0;
+	int err, number_tiles = 0;
 	struct xe_tile *tile;
 	u8 id;
 
@@ -1202,12 +1217,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		return ERR_PTR(-ENOMEM);
 
 	vm->xe = xe;
-	kref_init(&vm->refcount);
-	dma_resv_init(&vm->resv);
 
 	vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
 
-	vm->vmas = RB_ROOT;
 	vm->flags = flags;
 
 	init_rwsem(&vm->lock);
@@ -1223,7 +1235,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	spin_lock_init(&vm->notifier.list_lock);
 
 	INIT_LIST_HEAD(&vm->async_ops.pending);
-	INIT_WORK(&vm->async_ops.work, async_op_work_func);
+	INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
 	spin_lock_init(&vm->async_ops.lock);
 
 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
@@ -1239,9 +1251,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		xe_device_mem_access_get(xe);
 	}
 
-	err = dma_resv_lock_interruptible(&vm->resv, NULL);
+	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+	if (!vm_resv_obj) {
+		err = -ENOMEM;
+		goto err_no_resv;
+	}
+
+	drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj,
+		       0, vm->size, 0, 0, &gpuvm_ops);
+
+	drm_gem_object_put(vm_resv_obj);
+
+	err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
 	if (err)
-		goto err_put;
+		goto err_close;
 
 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
 		vm->flags |= XE_VM_FLAGS_64K;
@@ -1255,7 +1278,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		if (IS_ERR(vm->pt_root[id])) {
 			err = PTR_ERR(vm->pt_root[id]);
 			vm->pt_root[id] = NULL;
-			goto err_destroy_root;
+			goto err_unlock_close;
 		}
 	}
 
@@ -1266,7 +1289,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 			err = xe_pt_create_scratch(xe, tile, vm);
 			if (err)
-				goto err_scratch_pt;
+				goto err_unlock_close;
 		}
 		vm->batch_invalidate_tlb = true;
 	}
@@ -1289,7 +1312,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
 	}
-	dma_resv_unlock(&vm->resv);
+	dma_resv_unlock(xe_vm_resv(vm));
 
 	/* Kernel migration VM shouldn't have a circular loop.. */
 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1307,8 +1330,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 						     ENGINE_FLAG_VM);
 			xe_vm_put(migrate_vm);
 			if (IS_ERR(eng)) {
-				xe_vm_close_and_put(vm);
-				return ERR_CAST(eng);
+				err = PTR_ERR(eng);
+				goto err_close;
 			}
 			vm->eng[id] = eng;
 			number_tiles++;
@@ -1329,27 +1352,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	return vm;
 
-err_scratch_pt:
-	for_each_tile(tile, xe, id) {
-		if (!vm->pt_root[id])
-			continue;
+err_unlock_close:
+	dma_resv_unlock(xe_vm_resv(vm));
+err_close:
+	xe_vm_close_and_put(vm);
+	return ERR_PTR(err);
 
-		i = vm->pt_root[id]->level;
-		while (i)
-			if (vm->scratch_pt[id][--i])
-				xe_pt_destroy(vm->scratch_pt[id][i],
-					      vm->flags, NULL);
-		xe_bo_unpin(vm->scratch_bo[id]);
-		xe_bo_put(vm->scratch_bo[id]);
-	}
-err_destroy_root:
-	for_each_tile(tile, xe, id) {
-		if (vm->pt_root[id])
-			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
-	}
-	dma_resv_unlock(&vm->resv);
-err_put:
-	dma_resv_fini(&vm->resv);
+err_no_resv:
 	kfree(vm);
 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
 		xe_device_mem_access_put(xe);
@@ -1404,16 +1413,17 @@ static void xe_vm_close(struct xe_vm *vm)
 
 void xe_vm_close_and_put(struct xe_vm *vm)
 {
-	struct rb_root contested = RB_ROOT;
+	LIST_HEAD(contested);
 	struct ww_acquire_ctx ww;
 	struct xe_device *xe = vm->xe;
 	struct xe_tile *tile;
+	struct xe_vma *vma, *next_vma;
+	struct drm_gpuva *gpuva, *next;
 	u8 id;
 
 	XE_BUG_ON(vm->preempt.num_engines);
 
 	xe_vm_close(vm);
-
 	flush_async_ops(vm);
 	if (xe_vm_in_compute_mode(vm))
 		flush_work(&vm->preempt.rebind_work);
@@ -1428,16 +1438,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 
 	down_write(&vm->lock);
 	xe_vm_lock(vm, &ww, 0, false);
-	while (vm->vmas.rb_node) {
-		struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
+	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
+		vma = gpuva_to_vma(gpuva);
 
 		if (xe_vma_has_no_bo(vma)) {
 			down_read(&vm->userptr.notifier_lock);
-			vma->destroyed = true;
+			vma->gpuva.flags |= XE_VMA_DESTROYED;
 			up_read(&vm->userptr.notifier_lock);
 		}
 
-		rb_erase(&vma->vm_node, &vm->vmas);
+		xe_vm_remove_vma(vm, vma);
 
 		/* easy case, remove from VMA? */
 		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
@@ -1445,7 +1455,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 			continue;
 		}
 
-		rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
+		list_add_tail(&vma->unbind_link, &contested);
 	}
 
 	/*
@@ -1465,22 +1475,21 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
 					      NULL);
 		}
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
 	}
 	xe_vm_unlock(vm, &ww);
 
-	if (contested.rb_node) {
-
-		/*
-		 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
-		 * Since we hold a refcount to the bo, we can remove and free
-		 * the members safely without locking.
-		 */
-		while (contested.rb_node) {
-			struct xe_vma *vma = to_xe_vma(contested.rb_node);
-
-			rb_erase(&vma->vm_node, &contested);
-			xe_vma_destroy_unlocked(vma);
-		}
+	/*
+	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+	 * Since we hold a refcount to the bo, we can remove and free
+	 * the members safely without locking.
+	 */
+	list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) {
+		list_del_init(&vma->unbind_link);
+		xe_vma_destroy_unlocked(vma);
 	}
 
 	if (vm->async_ops.error_capture.addr)
@@ -1503,7 +1512,6 @@ static void vm_destroy_work_func(struct work_struct *w)
 {
 	struct xe_vm *vm =
 		container_of(w, struct xe_vm, destroy_work);
-	struct ww_acquire_ctx ww;
 	struct xe_device *xe = vm->xe;
 	struct xe_tile *tile;
 	u8 id;
@@ -1524,29 +1532,17 @@ static void vm_destroy_work_func(struct work_struct *w)
 		}
 	}
 
-	/*
-	 * XXX: We delay destroying the PT root until the VM if freed as PT root
-	 * is needed for xe_vm_lock to work. If we remove that dependency this
-	 * can be moved to xe_vm_close_and_put.
-	 */
-	xe_vm_lock(vm, &ww, 0, false);
-	for_each_tile(tile, xe, id) {
-		if (vm->pt_root[id]) {
-			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
-			vm->pt_root[id] = NULL;
-		}
-	}
-	xe_vm_unlock(vm, &ww);
+	for_each_tile(tile, xe, id)
+		XE_WARN_ON(vm->pt_root[id]);
 
 	trace_xe_vm_free(vm);
 	dma_fence_put(vm->rebind_fence);
-	dma_resv_fini(&vm->resv);
 	kfree(vm);
 }
 
-void xe_vm_free(struct kref *ref)
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
 {
-	struct xe_vm *vm = container_of(ref, struct xe_vm, refcount);
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
 
 	/* To destroy the VM we need to be able to sleep */
 	queue_work(system_unbound_wq, &vm->destroy_work);
@@ -1573,7 +1569,8 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 
 static struct dma_fence *
 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
-		 struct xe_sync_entry *syncs, u32 num_syncs)
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool first_op, bool last_op)
 {
 	struct xe_tile *tile;
 	struct dma_fence *fence = NULL;
@@ -1598,7 +1595,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
 		if (!(vma->tile_present & BIT(id)))
 			goto next;
 
-		fence = __xe_pt_unbind_vma(tile, vma, e, syncs, num_syncs);
+		fence = __xe_pt_unbind_vma(tile, vma, e, first_op ? syncs : NULL,
+					   first_op ? num_syncs : 0);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			goto err_fences;
@@ -1624,8 +1622,11 @@ next:
 		}
 	}
 
-	for (i = 0; i < num_syncs; i++)
-		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+	if (last_op) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     cf ? &cf->base : fence);
+	}
 
 	return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
 
@@ -1643,7 +1644,8 @@ err_fences:
 
 static struct dma_fence *
 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
-	       struct xe_sync_entry *syncs, u32 num_syncs)
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op)
 {
 	struct xe_tile *tile;
 	struct dma_fence *fence;
@@ -1668,7 +1670,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 		if (!(vma->tile_mask & BIT(id)))
 			goto next;
 
-		fence = __xe_pt_bind_vma(tile, vma, e, syncs, num_syncs,
+		fence = __xe_pt_bind_vma(tile, vma, e, first_op ? syncs : NULL,
+					 first_op ? num_syncs : 0,
 					 vma->tile_present & BIT(id));
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
@@ -1695,8 +1698,11 @@ next:
 		}
 	}
 
-	for (i = 0; i < num_syncs; i++)
-		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+	if (last_op) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     cf ? &cf->base : fence);
+	}
 
 	return cf ? &cf->base : fence;
 
@@ -1794,15 +1800,29 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence)
 
 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 			struct xe_engine *e, struct xe_sync_entry *syncs,
-			u32 num_syncs, struct async_op_fence *afence)
+			u32 num_syncs, struct async_op_fence *afence,
+			bool immediate, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 
 	xe_vm_assert_held(vm);
 
-	fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
-	if (IS_ERR(fence))
-		return PTR_ERR(fence);
+	if (immediate) {
+		fence = xe_vm_bind_vma(vma, e, syncs, num_syncs, first_op,
+				       last_op);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+	} else {
+		int i;
+
+		XE_BUG_ON(!xe_vm_in_fault_mode(vm));
+
+		fence = dma_fence_get_stub();
+		if (last_op) {
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+		}
+	}
 	if (afence)
 		add_async_op_fence_cb(vm, fence, afence);
 
@@ -1812,32 +1832,35 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 
 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
 		      struct xe_bo *bo, struct xe_sync_entry *syncs,
-		      u32 num_syncs, struct async_op_fence *afence)
+		      u32 num_syncs, struct async_op_fence *afence,
+		      bool immediate, bool first_op, bool last_op)
 {
 	int err;
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(bo);
 
-	if (bo) {
+	if (bo && immediate) {
 		err = xe_bo_validate(bo, vm, true);
 		if (err)
 			return err;
 	}
 
-	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
+	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate,
+			    first_op, last_op);
 }
 
 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 			struct xe_engine *e, struct xe_sync_entry *syncs,
-			u32 num_syncs, struct async_op_fence *afence)
+			u32 num_syncs, struct async_op_fence *afence,
+			bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
 
-	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
+	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op);
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 	if (afence)
@@ -2066,7 +2089,8 @@ static const u32 region_to_mem_type[] = {
 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 			  struct xe_engine *e, u32 region,
 			  struct xe_sync_entry *syncs, u32 num_syncs,
-			  struct async_op_fence *afence)
+			  struct async_op_fence *afence, bool first_op,
+			  bool last_op)
 {
 	int err;
 
@@ -2080,14 +2104,16 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
 		return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs,
-				  afence);
+				  afence, true, first_op, last_op);
 	} else {
 		int i;
 
 		/* Nothing to do, signal fences now */
-		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], NULL,
-					     dma_fence_get_stub());
+		if (last_op) {
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL,
+						     dma_fence_get_stub());
+		}
 		if (afence)
 			dma_fence_signal(&afence->fence);
 		return 0;
@@ -2096,29 +2122,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 #define VM_BIND_OP(op)	(op & 0xffff)
 
-static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
-			   struct xe_engine *e, struct xe_bo *bo, u32 op,
-			   u32 region, struct xe_sync_entry *syncs,
-			   u32 num_syncs, struct async_op_fence *afence)
-{
-	switch (VM_BIND_OP(op)) {
-	case XE_VM_BIND_OP_MAP:
-		return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
-	case XE_VM_BIND_OP_UNMAP:
-	case XE_VM_BIND_OP_UNMAP_ALL:
-		return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
-	case XE_VM_BIND_OP_MAP_USERPTR:
-		return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
-	case XE_VM_BIND_OP_PREFETCH:
-		return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
-				      afence);
-		break;
-	default:
-		XE_BUG_ON("NOT POSSIBLE");
-		return -EINVAL;
-	}
-}
-
 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
 {
 	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
@@ -2134,810 +2137,847 @@ static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
 	tv->bo = xe_vm_ttm_bo(vm);
 }
 
-static bool is_map_op(u32 op)
-{
-	return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
-		VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
-}
-
-static bool is_unmap_op(u32 op)
+static void vm_set_async_error(struct xe_vm *vm, int err)
 {
-	return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
-		VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
+	lockdep_assert_held(&vm->lock);
+	vm->async_ops.error = err;
 }
 
-static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
-			 struct xe_engine *e, struct xe_bo *bo,
-			 struct drm_xe_vm_bind_op *bind_op,
-			 struct xe_sync_entry *syncs, u32 num_syncs,
-			 struct async_op_fence *afence)
+static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
+				    u64 addr, u64 range, u32 op)
 {
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
-	struct ttm_validate_buffer tv_bo, tv_vm;
-	struct ww_acquire_ctx ww;
-	struct xe_bo *vbo;
-	int err, i;
+	struct xe_device *xe = vm->xe;
+	struct xe_vma *vma;
+	bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
 
 	lockdep_assert_held(&vm->lock);
-	XE_BUG_ON(!list_empty(&vma->unbind_link));
-
-	/* Binds deferred to faults, signal fences now */
-	if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
-	    !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
-		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], NULL,
-					     dma_fence_get_stub());
-		if (afence)
-			dma_fence_signal(&afence->fence);
-		return 0;
-	}
-
-	xe_vm_tv_populate(vm, &tv_vm);
-	list_add_tail(&tv_vm.head, &objs);
-	vbo = xe_vma_bo(vma);
-	if (vbo) {
-		/*
-		 * An unbind can drop the last reference to the BO and
-		 * the BO is needed for ttm_eu_backoff_reservation so
-		 * take a reference here.
-		 */
-		xe_bo_get(vbo);
-
-		if (!vbo->vm) {
-			tv_bo.bo = &vbo->ttm;
-			tv_bo.num_shared = 1;
-			list_add(&tv_bo.head, &objs);
-		}
-	}
 
-again:
-	err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
-	if (!err) {
-		err = __vm_bind_ioctl(vm, vma, e, bo,
-				      bind_op->op, bind_op->region, syncs,
-				      num_syncs, afence);
-		ttm_eu_backoff_reservation(&ww, &objs);
-		if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
-			lockdep_assert_held_write(&vm->lock);
-			err = xe_vma_userptr_pin_pages(vma);
-			if (!err)
-				goto again;
-		}
+	switch (VM_BIND_OP(op)) {
+	case XE_VM_BIND_OP_MAP:
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		vma = xe_vm_find_overlapping_vma(vm, addr, range);
+		if (XE_IOCTL_ERR(xe, vma && !async))
+			return -EBUSY;
+		break;
+	case XE_VM_BIND_OP_UNMAP:
+	case XE_VM_BIND_OP_PREFETCH:
+		vma = xe_vm_find_overlapping_vma(vm, addr, range);
+		if (XE_IOCTL_ERR(xe, !vma))
+			return -ENODATA;	/* Not an actual error, IOCTL
+						   cleans up returns and 0 */
+		if (XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr ||
+				 xe_vma_end(vma) != addr + range) && !async))
+			return -EINVAL;
+		break;
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		if (XE_IOCTL_ERR(xe, list_empty(&bo->ttm.base.gpuva.list)))
+			return -ENODATA;	/* Not an actual error, IOCTL
+						   cleans up returns and 0 */
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		return -EINVAL;
 	}
-	xe_bo_put(vbo);
 
-	return err;
+	return 0;
 }
 
-struct async_op {
-	struct xe_vma *vma;
-	struct xe_engine *engine;
-	struct xe_bo *bo;
-	struct drm_xe_vm_bind_op bind_op;
-	struct xe_sync_entry *syncs;
-	u32 num_syncs;
-	struct list_head link;
-	struct async_op_fence *fence;
-};
-
-static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
+			     bool post_commit)
 {
-	while (op->num_syncs--)
-		xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
-	kfree(op->syncs);
-	xe_bo_put(op->bo);
-	if (op->engine)
-		xe_engine_put(op->engine);
-	xe_vm_put(vm);
-	if (op->fence)
-		dma_fence_put(&op->fence->fence);
-	kfree(op);
+	down_read(&vm->userptr.notifier_lock);
+	vma->gpuva.flags |= XE_VMA_DESTROYED;
+	up_read(&vm->userptr.notifier_lock);
+	if (post_commit)
+		xe_vm_remove_vma(vm, vma);
 }
 
-static struct async_op *next_async_op(struct xe_vm *vm)
+#undef ULL
+#define ULL	unsigned long long
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 {
-	return list_first_entry_or_null(&vm->async_ops.pending,
-					struct async_op, link);
-}
+	struct xe_vma *vma;
 
-static void vm_set_async_error(struct xe_vm *vm, int err)
+	switch (op->op) {
+	case DRM_GPUVA_OP_MAP:
+		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
+		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		vma = gpuva_to_vma(op->remap.unmap->va);
+		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->unmap.keep ? 1 : 0);
+		if (op->remap.prev)
+			vm_dbg(&xe->drm,
+			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.prev->va.addr,
+			       (ULL)op->remap.prev->va.range);
+		if (op->remap.next)
+			vm_dbg(&xe->drm,
+			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.next->va.addr,
+			       (ULL)op->remap.next->va.range);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		vma = gpuva_to_vma(op->unmap.va);
+		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->unmap.keep ? 1 : 0);
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+	}
+}
+#else
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 {
-	lockdep_assert_held(&vm->lock);
-	vm->async_ops.error = err;
 }
+#endif
 
-static void async_op_work_func(struct work_struct *w)
+/*
+ * Create operations list from IOCTL arguments, setup operations fields so parse
+ * and commit steps are decoupled from IOCTL arguments. This step can fail.
+ */
+static struct drm_gpuva_ops *
+vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
+			 u64 bo_offset_or_userptr, u64 addr, u64 range,
+			 u32 operation, u64 tile_mask, u32 region)
 {
-	struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
+	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
+	struct ww_acquire_ctx ww;
+	struct drm_gpuva_ops *ops;
+	struct drm_gpuva_op *__op;
+	struct xe_vma_op *op;
+	struct drm_gpuvm_bo *vm_bo;
+	int err;
 
-	for (;;) {
-		struct async_op *op;
-		int err;
+	lockdep_assert_held_write(&vm->lock);
 
-		if (vm->async_ops.error && !xe_vm_is_closed(vm))
-			break;
+	vm_dbg(&vm->xe->drm,
+	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
+	       VM_BIND_OP(operation), (ULL)addr, (ULL)range,
+	       (ULL)bo_offset_or_userptr);
 
-		spin_lock_irq(&vm->async_ops.lock);
-		op = next_async_op(vm);
-		if (op)
-			list_del_init(&op->link);
-		spin_unlock_irq(&vm->async_ops.lock);
+	switch (VM_BIND_OP(operation)) {
+	case XE_VM_BIND_OP_MAP:
+	case XE_VM_BIND_OP_MAP_USERPTR:
+		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
+						  obj, bo_offset_or_userptr);
+		if (IS_ERR(ops))
+			return ops;
 
-		if (!op)
-			break;
+		drm_gpuva_for_each_op(__op, ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
-		if (!xe_vm_is_closed(vm)) {
-			bool first, last;
+			op->tile_mask = tile_mask;
+			op->map.immediate =
+				operation & XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				operation & XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = operation & XE_VM_BIND_FLAG_NULL;
+		}
+		break;
+	case XE_VM_BIND_OP_UNMAP:
+		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
+		if (IS_ERR(ops))
+			return ops;
 
-			down_write(&vm->lock);
-again:
-			first = op->vma->first_munmap_rebind;
-			last = op->vma->last_munmap_rebind;
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-#define FORCE_ASYNC_OP_ERROR	BIT(31)
-			if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) {
-				err = vm_bind_ioctl(vm, op->vma, op->engine,
-						    op->bo, &op->bind_op,
-						    op->syncs, op->num_syncs,
-						    op->fence);
-			} else {
-				err = -ENOMEM;
-				op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR;
-			}
-#else
-			err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo,
-					    &op->bind_op, op->syncs,
-					    op->num_syncs, op->fence);
-#endif
-			/*
-			 * In order for the fencing to work (stall behind
-			 * existing jobs / prevent new jobs from running) all
-			 * the dma-resv slots need to be programmed in a batch
-			 * relative to execs / the rebind worker. The vm->lock
-			 * ensure this.
-			 */
-			if (!err && ((first && VM_BIND_OP(op->bind_op.op) ==
-				      XE_VM_BIND_OP_UNMAP) ||
-				     vm->async_ops.munmap_rebind_inflight)) {
-				if (last) {
-					op->vma->last_munmap_rebind = false;
-					vm->async_ops.munmap_rebind_inflight =
-						false;
-				} else {
-					vm->async_ops.munmap_rebind_inflight =
-						true;
-
-					async_op_cleanup(vm, op);
-
-					spin_lock_irq(&vm->async_ops.lock);
-					op = next_async_op(vm);
-					XE_BUG_ON(!op);
-					list_del_init(&op->link);
-					spin_unlock_irq(&vm->async_ops.lock);
-
-					goto again;
-				}
-			}
-			if (err) {
-				trace_xe_vma_fail(op->vma);
-				drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d",
-					 VM_BIND_OP(op->bind_op.op),
-					 err);
+		drm_gpuva_for_each_op(__op, ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
-				spin_lock_irq(&vm->async_ops.lock);
-				list_add(&op->link, &vm->async_ops.pending);
-				spin_unlock_irq(&vm->async_ops.lock);
+			op->tile_mask = tile_mask;
+		}
+		break;
+	case XE_VM_BIND_OP_PREFETCH:
+		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
+		if (IS_ERR(ops))
+			return ops;
 
-				vm_set_async_error(vm, err);
-				up_write(&vm->lock);
+		drm_gpuva_for_each_op(__op, ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
-				if (vm->async_ops.error_capture.addr)
-					vm_error_capture(vm, err,
-							 op->bind_op.op,
-							 op->bind_op.addr,
-							 op->bind_op.range);
-				break;
-			}
-			up_write(&vm->lock);
-		} else {
-			trace_xe_vma_flush(op->vma);
+			op->tile_mask = tile_mask;
+			op->prefetch.region = region;
+		}
+		break;
+	case XE_VM_BIND_OP_UNMAP_ALL:
+		XE_BUG_ON(!bo);
 
-			if (is_unmap_op(op->bind_op.op)) {
-				down_write(&vm->lock);
-				xe_vma_destroy_unlocked(op->vma);
-				up_write(&vm->lock);
-			}
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
 
-			if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-						   &op->fence->fence.flags)) {
-				if (!xe_vm_no_dma_fences(vm)) {
-					op->fence->started = true;
-					smp_wmb();
-					wake_up_all(&op->fence->wq);
-				}
-				dma_fence_signal(&op->fence->fence);
-			}
+		vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
+		if (!vm_bo)
+			break;
+
+		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+		xe_bo_unlock(bo, &ww);
+		if (IS_ERR(ops))
+			return ops;
+
+		drm_gpuva_for_each_op(__op, ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			op->tile_mask = tile_mask;
 		}
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
+		ops = ERR_PTR(-EINVAL);
+	}
 
-		async_op_cleanup(vm, op);
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	if (operation & FORCE_ASYNC_OP_ERROR) {
+		op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
+					      base.entry);
+		if (op)
+			op->inject_error = true;
 	}
+#endif
+
+	if (!IS_ERR(ops))
+		drm_gpuva_for_each_op(__op, ops)
+			print_op(vm->xe, __op);
+
+	return ops;
 }
 
-static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
-				 struct xe_engine *e, struct xe_bo *bo,
-				 struct drm_xe_vm_bind_op *bind_op,
-				 struct xe_sync_entry *syncs, u32 num_syncs)
+static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
+			      u64 tile_mask, bool read_only, bool is_null)
 {
-	struct async_op *op;
-	bool installed = false;
-	u64 seqno;
-	int i;
+	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+	struct xe_vma *vma;
+	struct ww_acquire_ctx ww;
+	int err;
 
-	lockdep_assert_held(&vm->lock);
+	lockdep_assert_held_write(&vm->lock);
 
-	op = kmalloc(sizeof(*op), GFP_KERNEL);
-	if (!op) {
-		return -ENOMEM;
+	if (bo) {
+		err = xe_bo_lock(bo, &ww, 0, true);
+		if (err)
+			return ERR_PTR(err);
 	}
+	vma = xe_vma_create(vm, bo, op->gem.offset,
+			    op->va.addr, op->va.addr +
+			    op->va.range - 1, read_only, is_null,
+			    tile_mask);
+	if (bo)
+		xe_bo_unlock(bo, &ww);
 
-	if (num_syncs) {
-		op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
-		if (!op->fence) {
-			kfree(op);
-			return -ENOMEM;
+	if (xe_vma_is_userptr(vma)) {
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err) {
+			prep_vma_destroy(vm, vma, false);
+			xe_vma_destroy_unlocked(vma);
+			return ERR_PTR(err);
 		}
+	} else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
+		vm_insert_extobj(vm, vma);
+		err = add_preempt_fences(vm, bo);
+		if (err) {
+			prep_vma_destroy(vm, vma, false);
+			xe_vma_destroy_unlocked(vma);
+			return ERR_PTR(err);
+		}
+	}
+
+	return vma;
+}
+
+/*
+ * Parse operations list and create any resources needed for the operations
+ * prior to fully committing to the operations. This setup can fail.
+ */
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
+				   struct drm_gpuva_ops **ops, int num_ops_list,
+				   struct xe_sync_entry *syncs, u32 num_syncs,
+				   struct list_head *ops_list, bool async)
+{
+	struct xe_vma_op *last_op = NULL;
+	struct list_head *async_list = NULL;
+	struct async_op_fence *fence = NULL;
+	int err, i;
+
+	lockdep_assert_held_write(&vm->lock);
+	XE_BUG_ON(num_ops_list > 1 && !async);
+
+	if (num_syncs && async) {
+		u64 seqno;
+
+		fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+		if (!fence)
+			return -ENOMEM;
 
 		seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
-		dma_fence_init(&op->fence->fence, &async_op_fence_ops,
+		dma_fence_init(&fence->fence, &async_op_fence_ops,
 			       &vm->async_ops.lock, e ? e->bind.fence_ctx :
 			       vm->async_ops.fence.context, seqno);
 
 		if (!xe_vm_no_dma_fences(vm)) {
-			op->fence->vm = vm;
-			op->fence->started = false;
-			init_waitqueue_head(&op->fence->wq);
+			fence->vm = vm;
+			fence->started = false;
+			init_waitqueue_head(&fence->wq);
 		}
-	} else {
-		op->fence = NULL;
 	}
-	op->vma = vma;
-	op->engine = e;
-	op->bo = bo;
-	op->bind_op = *bind_op;
-	op->syncs = syncs;
-	op->num_syncs = num_syncs;
-	INIT_LIST_HEAD(&op->link);
 
-	for (i = 0; i < num_syncs; i++)
-		installed |= xe_sync_entry_signal(&syncs[i], NULL,
-						  &op->fence->fence);
+	for (i = 0; i < num_ops_list; ++i) {
+		struct drm_gpuva_ops *__ops = ops[i];
+		struct drm_gpuva_op *__op;
 
-	if (!installed && op->fence)
-		dma_fence_signal(&op->fence->fence);
+		drm_gpuva_for_each_op(__op, __ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+			bool first = !async_list;
 
-	spin_lock_irq(&vm->async_ops.lock);
-	list_add_tail(&op->link, &vm->async_ops.pending);
-	spin_unlock_irq(&vm->async_ops.lock);
+			XE_BUG_ON(!first && !async);
 
-	if (!vm->async_ops.error)
-		queue_work(system_unbound_wq, &vm->async_ops.work);
+			INIT_LIST_HEAD(&op->link);
+			if (first)
+				async_list = ops_list;
+			list_add_tail(&op->link, async_list);
 
-	return 0;
-}
-
-static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
-			       struct xe_engine *e, struct xe_bo *bo,
-			       struct drm_xe_vm_bind_op *bind_op,
-			       struct xe_sync_entry *syncs, u32 num_syncs)
-{
-	struct xe_vma *__vma, *next;
-	struct list_head rebind_list;
-	struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
-	u32 num_in_syncs = 0, num_out_syncs = 0;
-	bool first = true, last;
-	int err;
-	int i;
+			if (first) {
+				op->flags |= XE_VMA_OP_FIRST;
+				op->num_syncs = num_syncs;
+				op->syncs = syncs;
+			}
 
-	lockdep_assert_held(&vm->lock);
+			op->engine = e;
 
-	/* Not a linked list of unbinds + rebinds, easy */
-	if (list_empty(&vma->unbind_link))
-		return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
-					     syncs, num_syncs);
+			switch (op->base.op) {
+			case DRM_GPUVA_OP_MAP:
+			{
+				struct xe_vma *vma;
 
-	/*
-	 * Linked list of unbinds + rebinds, decompose syncs into 'in / out'
-	 * passing the 'in' to the first operation and 'out' to the last. Also
-	 * the reference counting is a little tricky, increment the VM / bind
-	 * engine ref count on all but the last operation and increment the BOs
-	 * ref count on each rebind.
-	 */
+				vma = new_vma(vm, &op->base.map,
+					      op->tile_mask, op->map.read_only,
+					      op->map.is_null);
+				if (IS_ERR(vma)) {
+					err = PTR_ERR(vma);
+					goto free_fence;
+				}
 
-	XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
-		  VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL &&
-		  VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
+				op->map.vma = vma;
+				break;
+			}
+			case DRM_GPUVA_OP_REMAP:
+				if (op->base.remap.prev) {
+					struct xe_vma *vma;
+					bool read_only =
+						op->base.remap.unmap->va->flags &
+						XE_VMA_READ_ONLY;
+					bool is_null =
+						op->base.remap.unmap->va->flags &
+						DRM_GPUVA_SPARSE;
+
+					vma = new_vma(vm, op->base.remap.prev,
+						      op->tile_mask, read_only,
+						      is_null);
+					if (IS_ERR(vma)) {
+						err = PTR_ERR(vma);
+						goto free_fence;
+					}
+
+					op->remap.prev = vma;
+				}
 
-	/* Decompose syncs */
-	if (num_syncs) {
-		in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL);
-		out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL);
-		if (!in_syncs || !out_syncs) {
-			err = -ENOMEM;
-			goto out_error;
-		}
+				if (op->base.remap.next) {
+					struct xe_vma *vma;
+					bool read_only =
+						op->base.remap.unmap->va->flags &
+						XE_VMA_READ_ONLY;
+
+					bool is_null =
+						op->base.remap.unmap->va->flags &
+						DRM_GPUVA_SPARSE;
+
+					vma = new_vma(vm, op->base.remap.next,
+						      op->tile_mask, read_only,
+						      is_null);
+					if (IS_ERR(vma)) {
+						err = PTR_ERR(vma);
+						goto free_fence;
+					}
+
+					op->remap.next = vma;
+				}
 
-		for (i = 0; i < num_syncs; ++i) {
-			bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL;
+				/* XXX: Support no doing remaps */
+				op->remap.start =
+					xe_vma_start(gpuva_to_vma(op->base.remap.unmap->va));
+				op->remap.range =
+					xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va));
+				break;
+			case DRM_GPUVA_OP_UNMAP:
+				op->unmap.start =
+					xe_vma_start(gpuva_to_vma(op->base.unmap.va));
+				op->unmap.range =
+					xe_vma_size(gpuva_to_vma(op->base.unmap.va));
+				break;
+			case DRM_GPUVA_OP_PREFETCH:
+				/* Nothing to do */
+				break;
+			default:
+				XE_BUG_ON("NOT POSSIBLE");
+			}
 
-			if (signal)
-				out_syncs[num_out_syncs++] = syncs[i];
-			else
-				in_syncs[num_in_syncs++] = syncs[i];
+			last_op = op;
 		}
-	}
 
-	/* Do unbinds + move rebinds to new list */
-	INIT_LIST_HEAD(&rebind_list);
-	list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) {
-		if (__vma->destroyed ||
-		    VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) {
-			list_del_init(&__vma->unbind_link);
-			xe_bo_get(bo);
-			err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma,
-						    e ? xe_engine_get(e) : NULL,
-						    bo, bind_op, first ?
-						    in_syncs : NULL,
-						    first ? num_in_syncs : 0);
-			if (err) {
-				xe_bo_put(bo);
-				xe_vm_put(vm);
-				if (e)
-					xe_engine_put(e);
-				goto out_error;
-			}
-			in_syncs = NULL;
-			first = false;
-		} else {
-			list_move_tail(&__vma->unbind_link, &rebind_list);
-		}
+		last_op->ops = __ops;
 	}
-	last = list_empty(&rebind_list);
-	if (!last) {
-		xe_vm_get(vm);
-		if (e)
-			xe_engine_get(e);
-	}
-	err = __vm_bind_ioctl_async(vm, vma, e,
-				    bo, bind_op,
-				    first ? in_syncs :
-				    last ? out_syncs : NULL,
-				    first ? num_in_syncs :
-				    last ? num_out_syncs : 0);
-	if (err) {
-		if (!last) {
-			xe_vm_put(vm);
-			if (e)
-				xe_engine_put(e);
-		}
-		goto out_error;
-	}
-	in_syncs = NULL;
-
-	/* Do rebinds */
-	list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) {
-		list_del_init(&__vma->unbind_link);
-		last = list_empty(&rebind_list);
-
-		if (xe_vma_is_userptr(__vma)) {
-			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
-				XE_VM_BIND_OP_MAP_USERPTR;
-		} else {
-			bind_op->op = XE_VM_BIND_FLAG_ASYNC |
-				XE_VM_BIND_OP_MAP;
-			xe_bo_get(xe_vma_bo(__vma));
-		}
 
-		if (!last) {
-			xe_vm_get(vm);
-			if (e)
-				xe_engine_get(e);
-		}
+	if (!last_op)
+		return -ENODATA;
 
-		err = __vm_bind_ioctl_async(vm, __vma, e,
-					    xe_vma_bo(__vma), bind_op, last ?
-					    out_syncs : NULL,
-					    last ? num_out_syncs : 0);
-		if (err) {
-			if (!last) {
-				xe_vm_put(vm);
-				if (e)
-					xe_engine_put(e);
-			}
-			goto out_error;
-		}
-	}
+	last_op->flags |= XE_VMA_OP_LAST;
+	last_op->num_syncs = num_syncs;
+	last_op->syncs = syncs;
+	last_op->fence = fence;
 
-	kfree(syncs);
 	return 0;
 
-out_error:
-	kfree(in_syncs);
-	kfree(out_syncs);
-	kfree(syncs);
-
+free_fence:
+	kfree(fence);
 	return err;
 }
 
-static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
-				      u64 addr, u64 range, u32 op)
+static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 {
-	struct xe_device *xe = vm->xe;
-	struct xe_vma *vma, lookup;
-	bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
-
-	lockdep_assert_held(&vm->lock);
+	int err = 0;
 
-	lookup.start = addr;
-	lookup.end = addr + range - 1;
+	lockdep_assert_held_write(&vm->lock);
 
-	switch (VM_BIND_OP(op)) {
-	case XE_VM_BIND_OP_MAP:
-	case XE_VM_BIND_OP_MAP_USERPTR:
-		vma = xe_vm_find_overlapping_vma(vm, &lookup);
-		if (XE_IOCTL_ERR(xe, vma))
-			return -EBUSY;
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err |= xe_vm_insert_vma(vm, op->map.vma);
 		break;
-	case XE_VM_BIND_OP_UNMAP:
-	case XE_VM_BIND_OP_PREFETCH:
-		vma = xe_vm_find_overlapping_vma(vm, &lookup);
-		if (XE_IOCTL_ERR(xe, !vma) ||
-		    XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr ||
-				 xe_vma_end(vma) != addr + range) && !async))
-			return -EINVAL;
+	case DRM_GPUVA_OP_REMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
+				 true);
+		if (op->remap.prev)
+			err |= xe_vm_insert_vma(vm, op->remap.prev);
+		if (op->remap.next)
+			err |= xe_vm_insert_vma(vm, op->remap.next);
 		break;
-	case XE_VM_BIND_OP_UNMAP_ALL:
+	case DRM_GPUVA_OP_UNMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		/* Nothing to do */
 		break;
 	default:
 		XE_BUG_ON("NOT POSSIBLE");
-		return -EINVAL;
 	}
 
-	return 0;
-}
-
-static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
-{
-	down_read(&vm->userptr.notifier_lock);
-	vma->destroyed = true;
-	up_read(&vm->userptr.notifier_lock);
-	xe_vm_remove_vma(vm, vma);
+	op->flags |= XE_VMA_OP_COMMITTED;
+	return err;
 }
 
-static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
+static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+			       struct xe_vma_op *op)
 {
+	LIST_HEAD(objs);
+	LIST_HEAD(dups);
+	struct ttm_validate_buffer tv_bo, tv_vm;
+	struct ww_acquire_ctx ww;
+	struct xe_bo *vbo;
 	int err;
 
-	if (xe_vma_bo(vma) && !xe_vma_bo(vma)->vm) {
-		vm_insert_extobj(vm, vma);
-		err = add_preempt_fences(vm, xe_vma_bo(vma));
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
+	lockdep_assert_held_write(&vm->lock);
 
-/*
- * Find all overlapping VMAs in lookup range and add to a list in the returned
- * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that
- * need to be bound if first / last VMAs are not fully unbound. This is akin to
- * how munmap works.
- */
-static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
-					    struct xe_vma *lookup)
-{
-	struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
-	struct rb_node *node;
-	struct xe_vma *first = vma, *last = vma, *new_first = NULL,
-		      *new_last = NULL, *__vma, *next;
-	int err = 0;
-	bool first_munmap_rebind = false;
+	xe_vm_tv_populate(vm, &tv_vm);
+	list_add_tail(&tv_vm.head, &objs);
+	vbo = xe_vma_bo(vma);
+	if (vbo) {
+		/*
+		 * An unbind can drop the last reference to the BO and
+		 * the BO is needed for ttm_eu_backoff_reservation so
+		 * take a reference here.
+		 */
+		xe_bo_get(vbo);
 
-	lockdep_assert_held(&vm->lock);
-	XE_BUG_ON(!vma);
-
-	node = &vma->vm_node;
-	while ((node = rb_next(node))) {
-		if (!xe_vma_cmp_vma_cb(lookup, node)) {
-			__vma = to_xe_vma(node);
-			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
-			last = __vma;
-		} else {
-			break;
+		if (!vbo->vm) {
+			tv_bo.bo = &vbo->ttm;
+			tv_bo.num_shared = 1;
+			list_add(&tv_bo.head, &objs);
 		}
 	}
 
-	node = &vma->vm_node;
-	while ((node = rb_prev(node))) {
-		if (!xe_vma_cmp_vma_cb(lookup, node)) {
-			__vma = to_xe_vma(node);
-			list_add(&__vma->unbind_link, &vma->unbind_link);
-			first = __vma;
-		} else {
-			break;
-		}
+again:
+	err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
+	if (err) {
+		xe_bo_put(vbo);
+		return err;
 	}
 
-	if (xe_vma_start(first) != xe_vma_start(lookup)) {
-		struct ww_acquire_ctx ww;
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
 
-		if (xe_vma_bo(first))
-			err = xe_bo_lock(xe_vma_bo(first), &ww, 0, true);
-		if (err)
-			goto unwind;
-		new_first = xe_vma_create(xe_vma_vm(first), xe_vma_bo(first),
-					  xe_vma_bo(first) ?
-					  xe_vma_bo_offset(first) :
-					  xe_vma_userptr(first),
-					  xe_vma_start(first),
-					  xe_vma_start(lookup) - 1,
-					  xe_vma_read_only(first),
-					  (first->pte_flags &
-					   XE_PTE_FLAG_NULL),
-					  first->tile_mask);
-		if (xe_vma_bo(first))
-			xe_bo_unlock(xe_vma_bo(first), &ww);
-		if (!new_first) {
-			err = -ENOMEM;
-			goto unwind;
-		}
-		if (xe_vma_is_userptr(first)) {
-			err = xe_vma_userptr_pin_pages(new_first);
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma),
+				 op->syncs, op->num_syncs, op->fence,
+				 op->map.immediate || !xe_vm_in_fault_mode(vm),
+				 op->flags & XE_VMA_OP_FIRST,
+				 op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		bool prev = !!op->remap.prev;
+		bool next = !!op->remap.next;
+
+		if (!op->remap.unmap_done) {
+			vm->async_ops.munmap_rebind_inflight = true;
+			if (prev || next)
+				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
+			err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
+					   op->num_syncs,
+					   !prev && !next ? op->fence : NULL,
+					   op->flags & XE_VMA_OP_FIRST,
+					   op->flags & XE_VMA_OP_LAST && !prev &&
+					   !next);
 			if (err)
-				goto unwind;
+				break;
+			op->remap.unmap_done = true;
 		}
-		err = prep_replacement_vma(vm, new_first);
-		if (err)
-			goto unwind;
-	}
-
-	if (xe_vma_end(last) != xe_vma_end(lookup)) {
-		struct ww_acquire_ctx ww;
-		u64 chunk = xe_vma_end(lookup) - xe_vma_start(last);
 
-		if (xe_vma_bo(last))
-			err = xe_bo_lock(xe_vma_bo(last), &ww, 0, true);
-		if (err)
-			goto unwind;
-		new_last = xe_vma_create(xe_vma_vm(last), xe_vma_bo(last),
-					 xe_vma_bo(last) ?
-					 xe_vma_bo_offset(last) + chunk :
-					 xe_vma_userptr(last) + chunk,
-					 xe_vma_start(last) + chunk,
-					 xe_vma_end(last) - 1,
-					 xe_vma_read_only(last),
-					 (last->pte_flags & XE_PTE_FLAG_NULL),
-					 last->tile_mask);
-		if (xe_vma_bo(last))
-			xe_bo_unlock(xe_vma_bo(last), &ww);
-		if (!new_last) {
-			err = -ENOMEM;
-			goto unwind;
-		}
-		if (xe_vma_is_userptr(last)) {
-			err = xe_vma_userptr_pin_pages(new_last);
+		if (prev) {
+			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
+			err = xe_vm_bind(vm, op->remap.prev, op->engine,
+					 xe_vma_bo(op->remap.prev), op->syncs,
+					 op->num_syncs,
+					 !next ? op->fence : NULL, true, false,
+					 op->flags & XE_VMA_OP_LAST && !next);
+			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (err)
-				goto unwind;
+				break;
+			op->remap.prev = NULL;
 		}
-		err = prep_replacement_vma(vm, new_last);
-		if (err)
-			goto unwind;
-	}
 
-	prep_vma_destroy(vm, vma);
-	if (list_empty(&vma->unbind_link) && (new_first || new_last))
-		vma->first_munmap_rebind = true;
-	list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
-		if ((new_first || new_last) && !first_munmap_rebind) {
-			__vma->first_munmap_rebind = true;
-			first_munmap_rebind = true;
+		if (next) {
+			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
+			err = xe_vm_bind(vm, op->remap.next, op->engine,
+					 xe_vma_bo(op->remap.next),
+					 op->syncs, op->num_syncs,
+					 op->fence, true, false,
+					 op->flags & XE_VMA_OP_LAST);
+			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (err)
+				break;
+			op->remap.next = NULL;
 		}
-		prep_vma_destroy(vm, __vma);
-	}
-	if (new_first) {
-		xe_vm_insert_vma(vm, new_first);
-		list_add_tail(&new_first->unbind_link, &vma->unbind_link);
-		if (!new_last)
-			new_first->last_munmap_rebind = true;
+		vm->async_ops.munmap_rebind_inflight = false;
+
+		break;
 	}
-	if (new_last) {
-		xe_vm_insert_vma(vm, new_last);
-		list_add_tail(&new_last->unbind_link, &vma->unbind_link);
-		new_last->last_munmap_rebind = true;
+	case DRM_GPUVA_OP_UNMAP:
+		err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
+				   op->num_syncs, op->fence,
+				   op->flags & XE_VMA_OP_FIRST,
+				   op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = xe_vm_prefetch(vm, vma, op->engine, op->prefetch.region,
+				     op->syncs, op->num_syncs, op->fence,
+				     op->flags & XE_VMA_OP_FIRST,
+				     op->flags & XE_VMA_OP_LAST);
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
 	}
 
-	return vma;
-
-unwind:
-	list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link)
-		list_del_init(&__vma->unbind_link);
-	if (new_last) {
-		prep_vma_destroy(vm, new_last);
-		xe_vma_destroy_unlocked(new_last);
-	}
-	if (new_first) {
-		prep_vma_destroy(vm, new_first);
-		xe_vma_destroy_unlocked(new_first);
+	ttm_eu_backoff_reservation(&ww, &objs);
+	if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+		lockdep_assert_held_write(&vm->lock);
+		err = xe_vma_userptr_pin_pages(vma);
+		if (!err)
+			goto again;
 	}
+	xe_bo_put(vbo);
 
-	return ERR_PTR(err);
+	if (err)
+		trace_xe_vma_fail(vma);
+
+	return err;
 }
 
-/*
- * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch
- */
-static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
-					      struct xe_vma *lookup,
-					      u32 region)
+static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 {
-	struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma,
-		      *next;
-	struct rb_node *node;
+	int ret = 0;
 
-	if (!xe_vma_has_no_bo(vma)) {
-		if (!xe_bo_can_migrate(xe_vma_bo(vma), region_to_mem_type[region]))
-			return ERR_PTR(-EINVAL);
-	}
+	lockdep_assert_held_write(&vm->lock);
 
-	node = &vma->vm_node;
-	while ((node = rb_next(node))) {
-		if (!xe_vma_cmp_vma_cb(lookup, node)) {
-			__vma = to_xe_vma(node);
-			if (!xe_vma_has_no_bo(__vma)) {
-				if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region]))
-					goto flush_list;
-			}
-			list_add_tail(&__vma->unbind_link, &vma->unbind_link);
-		} else {
-			break;
-		}
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	if (op->inject_error) {
+		op->inject_error = false;
+		return -ENOMEM;
 	}
+#endif
 
-	node = &vma->vm_node;
-	while ((node = rb_prev(node))) {
-		if (!xe_vma_cmp_vma_cb(lookup, node)) {
-			__vma = to_xe_vma(node);
-			if (!xe_vma_has_no_bo(__vma)) {
-				if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region]))
-					goto flush_list;
-			}
-			list_add(&__vma->unbind_link, &vma->unbind_link);
-		} else {
-			break;
-		}
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		ret = __xe_vma_op_execute(vm, op->map.vma, op);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma;
+
+		if (!op->remap.unmap_done)
+			vma = gpuva_to_vma(op->base.remap.unmap->va);
+		else if (op->remap.prev)
+			vma = op->remap.prev;
+		else
+			vma = op->remap.next;
+
+		ret = __xe_vma_op_execute(vm, vma, op);
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+					  op);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		ret = __xe_vma_op_execute(vm,
+					  gpuva_to_vma(op->base.prefetch.va),
+					  op);
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
 	}
 
-	return vma;
+	return ret;
+}
 
-flush_list:
-	list_for_each_entry_safe(__vma, next, &vma->unbind_link,
-				 unbind_link)
-		list_del_init(&__vma->unbind_link);
+static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	bool last = op->flags & XE_VMA_OP_LAST;
 
-	return ERR_PTR(-EINVAL);
+	if (last) {
+		while (op->num_syncs--)
+			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
+		kfree(op->syncs);
+		if (op->engine)
+			xe_engine_put(op->engine);
+		if (op->fence)
+			dma_fence_put(&op->fence->fence);
+	}
+	if (!list_empty(&op->link)) {
+		spin_lock_irq(&vm->async_ops.lock);
+		list_del(&op->link);
+		spin_unlock_irq(&vm->async_ops.lock);
+	}
+	if (op->ops)
+		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
+	if (last)
+		xe_vm_put(vm);
 }
 
-static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
-						struct xe_bo *bo)
+static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
+			     bool post_commit)
 {
-	struct xe_vma *first = NULL, *vma;
+	lockdep_assert_held_write(&vm->lock);
 
-	lockdep_assert_held(&vm->lock);
-	xe_bo_assert_held(bo);
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (op->map.vma) {
+			prep_vma_destroy(vm, op->map.vma, post_commit);
+			xe_vma_destroy_unlocked(op->map.vma);
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
 
-	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		if (xe_vma_vm(vma) != vm)
-			continue;
+		down_read(&vm->userptr.notifier_lock);
+		vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+		up_read(&vm->userptr.notifier_lock);
+		if (post_commit)
+			xe_vm_insert_vma(vm, vma);
+		break;
+	}
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
 
-		prep_vma_destroy(vm, vma);
-		if (!first)
-			first = vma;
-		else
-			list_add_tail(&vma->unbind_link, &first->unbind_link);
+		if (op->remap.prev) {
+			prep_vma_destroy(vm, op->remap.prev, post_commit);
+			xe_vma_destroy_unlocked(op->remap.prev);
+		}
+		if (op->remap.next) {
+			prep_vma_destroy(vm, op->remap.next, post_commit);
+			xe_vma_destroy_unlocked(op->remap.next);
+		}
+		down_read(&vm->userptr.notifier_lock);
+		vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+		up_read(&vm->userptr.notifier_lock);
+		if (post_commit)
+			xe_vm_insert_vma(vm, vma);
+		break;
+	}
+	case DRM_GPUVA_OP_PREFETCH:
+		/* Nothing to do */
+		break;
+	default:
+		XE_BUG_ON("NOT POSSIBLE");
 	}
+}
 
-	return first;
+static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
+{
+	return list_first_entry_or_null(&vm->async_ops.pending,
+					struct xe_vma_op, link);
 }
 
-static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
-					       struct xe_bo *bo,
-					       u64 bo_offset_or_userptr,
-					       u64 addr, u64 range, u32 op,
-					       u64 tile_mask, u32 region)
+static void xe_vma_op_work_func(struct work_struct *w)
 {
-	struct ww_acquire_ctx ww;
-	struct xe_vma *vma, lookup;
-	int err;
+	struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
 
-	lockdep_assert_held(&vm->lock);
+	for (;;) {
+		struct xe_vma_op *op;
+		int err;
 
-	lookup.start = addr;
-	lookup.end = addr + range - 1;
+		if (vm->async_ops.error && !xe_vm_is_closed(vm))
+			break;
 
-	switch (VM_BIND_OP(op)) {
-	case XE_VM_BIND_OP_MAP:
-		if (bo) {
-			err = xe_bo_lock(bo, &ww, 0, true);
-			if (err)
-				return ERR_PTR(err);
-		}
-		vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
-				    addr + range - 1,
-				    op & XE_VM_BIND_FLAG_READONLY,
-				    op & XE_VM_BIND_FLAG_NULL,
-				    tile_mask);
-		if (bo)
-			xe_bo_unlock(bo, &ww);
-		if (!vma)
-			return ERR_PTR(-ENOMEM);
+		spin_lock_irq(&vm->async_ops.lock);
+		op = next_vma_op(vm);
+		spin_unlock_irq(&vm->async_ops.lock);
+
+		if (!op)
+			break;
 
-		xe_vm_insert_vma(vm, vma);
-		if (bo && !bo->vm) {
-			vm_insert_extobj(vm, vma);
-			err = add_preempt_fences(vm, bo);
+		if (!xe_vm_is_closed(vm)) {
+			down_write(&vm->lock);
+			err = xe_vma_op_execute(vm, op);
 			if (err) {
-				prep_vma_destroy(vm, vma);
+				drm_warn(&vm->xe->drm,
+					 "Async VM op(%d) failed with %d",
+					 op->base.op, err);
+				vm_set_async_error(vm, err);
+				up_write(&vm->lock);
+
+				if (vm->async_ops.error_capture.addr)
+					vm_error_capture(vm, err, 0, 0, 0);
+				break;
+			}
+			up_write(&vm->lock);
+		} else {
+			struct xe_vma *vma;
+
+			switch (op->base.op) {
+			case DRM_GPUVA_OP_REMAP:
+				vma = gpuva_to_vma(op->base.remap.unmap->va);
+				trace_xe_vma_flush(vma);
+
+				down_write(&vm->lock);
 				xe_vma_destroy_unlocked(vma);
+				up_write(&vm->lock);
+				break;
+			case DRM_GPUVA_OP_UNMAP:
+				vma = gpuva_to_vma(op->base.unmap.va);
+				trace_xe_vma_flush(vma);
+
+				down_write(&vm->lock);
+				xe_vma_destroy_unlocked(vma);
+				up_write(&vm->lock);
+				break;
+			default:
+				/* Nothing to do */
+				break;
+			}
 
-				return ERR_PTR(err);
+			if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+						   &op->fence->fence.flags)) {
+				if (!xe_vm_no_dma_fences(vm)) {
+					op->fence->started = true;
+					wake_up_all(&op->fence->wq);
+				}
+				dma_fence_signal(&op->fence->fence);
 			}
 		}
-		break;
-	case XE_VM_BIND_OP_UNMAP:
-		vma = vm_unbind_lookup_vmas(vm, &lookup);
-		break;
-	case XE_VM_BIND_OP_PREFETCH:
-		vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
-		break;
-	case XE_VM_BIND_OP_UNMAP_ALL:
-		XE_BUG_ON(!bo);
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		xe_vma_op_cleanup(vm, op);
+	}
+}
+
+static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
+				    struct list_head *ops_list, bool async)
+{
+	struct xe_vma_op *op, *last_op, *next;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	list_for_each_entry(op, ops_list, link) {
+		last_op = op;
+		err = xe_vma_op_commit(vm, op);
 		if (err)
-			return ERR_PTR(err);
-		vma = vm_unbind_all_lookup_vmas(vm, bo);
-		if (!vma)
-			vma = ERR_PTR(-EINVAL);
-		xe_bo_unlock(bo, &ww);
-		break;
-	case XE_VM_BIND_OP_MAP_USERPTR:
-		XE_BUG_ON(bo);
-
-		vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
-				    addr + range - 1,
-				    op & XE_VM_BIND_FLAG_READONLY,
-				    op & XE_VM_BIND_FLAG_NULL,
-				    tile_mask);
-		if (!vma)
-			return ERR_PTR(-ENOMEM);
+			goto unwind;
+	}
 
-		err = xe_vma_userptr_pin_pages(vma);
-		if (err) {
-			prep_vma_destroy(vm, vma);
-			xe_vma_destroy_unlocked(vma);
+	if (!async) {
+		err = xe_vma_op_execute(vm, last_op);
+		if (err)
+			goto unwind;
+		xe_vma_op_cleanup(vm, last_op);
+	} else {
+		int i;
+		bool installed = false;
 
-			return ERR_PTR(err);
-		} else {
-			xe_vm_insert_vma(vm, vma);
-		}
-		break;
-	default:
-		XE_BUG_ON("NOT POSSIBLE");
-		vma = ERR_PTR(-EINVAL);
+		for (i = 0; i < last_op->num_syncs; i++)
+			installed |= xe_sync_entry_signal(&last_op->syncs[i],
+							  NULL,
+							  &last_op->fence->fence);
+		if (!installed && last_op->fence)
+			dma_fence_signal(&last_op->fence->fence);
+
+		spin_lock_irq(&vm->async_ops.lock);
+		list_splice_tail(ops_list, &vm->async_ops.pending);
+		spin_unlock_irq(&vm->async_ops.lock);
+
+		if (!vm->async_ops.error)
+			queue_work(system_unbound_wq, &vm->async_ops.work);
 	}
 
-	return vma;
+	return 0;
+
+unwind:
+	list_for_each_entry_reverse(op, ops_list, link)
+		xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED);
+	list_for_each_entry_safe(op, next, ops_list, link)
+		xe_vma_op_cleanup(vm, op);
+
+	return err;
+}
+
+/*
+ * Unwind operations list, called after a failure of vm_bind_ioctl_ops_create or
+ * vm_bind_ioctl_ops_parse.
+ */
+static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
+				     struct drm_gpuva_ops **ops,
+				     int num_ops_list)
+{
+	int i;
+
+	for (i = 0; i < num_ops_list; ++i) {
+		struct drm_gpuva_ops *__ops = ops[i];
+		struct drm_gpuva_op *__op;
+
+		if (!__ops)
+			continue;
+
+		drm_gpuva_for_each_op(__op, __ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op, false);
+		}
+	}
 }
 
 #ifdef TEST_VM_ASYNC_OPS_ERROR
@@ -2963,8 +3003,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 	int i;
 
 	if (XE_IOCTL_ERR(xe, args->extensions) ||
-	    XE_IOCTL_ERR(xe, args->pad || args->pad2) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]) ||
 	    XE_IOCTL_ERR(xe, !args->num_binds) ||
 	    XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
 		return -EINVAL;
@@ -2996,14 +3034,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u32 obj = (*bind_ops)[i].obj;
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
 		u32 region = (*bind_ops)[i].region;
-		bool is_null = op &  XE_VM_BIND_FLAG_NULL;
-
-		if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) ||
-		    XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] ||
-				     (*bind_ops)[i].reserved[1])) {
-			err = -EINVAL;
-			goto free_bind_ops;
-		}
+		bool is_null = op & XE_VM_BIND_FLAG_NULL;
 
 		if (i == 0) {
 			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
@@ -3083,15 +3114,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_xe_vm_bind *args = data;
 	struct drm_xe_sync __user *syncs_user;
 	struct xe_bo **bos = NULL;
-	struct xe_vma **vmas = NULL;
+	struct drm_gpuva_ops **ops = NULL;
 	struct xe_vm *vm;
 	struct xe_engine *e = NULL;
 	u32 num_syncs;
 	struct xe_sync_entry *syncs = NULL;
 	struct drm_xe_vm_bind_op *bind_ops;
+	LIST_HEAD(ops_list);
 	bool async;
 	int err;
-	int i, j = 0;
+	int i;
 
 	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
 	if (err)
@@ -3180,8 +3212,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto release_vm_lock;
 	}
 
-	vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
-	if (!vmas) {
+	ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL);
+	if (!ops) {
 		err = -ENOMEM;
 		goto release_vm_lock;
 	}
@@ -3233,7 +3265,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
 					  &syncs_user[num_syncs], false,
-					  xe_vm_in_fault_mode(vm));
+					  xe_vm_no_dma_fences(vm));
 		if (err)
 			goto free_syncs;
 	}
@@ -3244,7 +3276,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 addr = bind_ops[i].addr;
 		u32 op = bind_ops[i].op;
 
-		err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
+		err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
 		if (err)
 			goto free_syncs;
 	}
@@ -3257,126 +3289,43 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 tile_mask = bind_ops[i].tile_mask;
 		u32 region = bind_ops[i].region;
 
-		vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
-						   addr, range, op, tile_mask,
-						   region);
-		if (IS_ERR(vmas[i])) {
-			err = PTR_ERR(vmas[i]);
-			vmas[i] = NULL;
-			goto destroy_vmas;
+		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
+						  addr, range, op, tile_mask,
+						  region);
+		if (IS_ERR(ops[i])) {
+			err = PTR_ERR(ops[i]);
+			ops[i] = NULL;
+			goto unwind_ops;
 		}
 	}
 
-	for (j = 0; j < args->num_binds; ++j) {
-		struct xe_sync_entry *__syncs;
-		u32 __num_syncs = 0;
-		bool first_or_last = j == 0 || j == args->num_binds - 1;
-
-		if (args->num_binds == 1) {
-			__num_syncs = num_syncs;
-			__syncs = syncs;
-		} else if (first_or_last && num_syncs) {
-			bool first = j == 0;
-
-			__syncs = kmalloc(sizeof(*__syncs) * num_syncs,
-					  GFP_KERNEL);
-			if (!__syncs) {
-				err = ENOMEM;
-				break;
-			}
-
-			/* in-syncs on first bind, out-syncs on last bind */
-			for (i = 0; i < num_syncs; ++i) {
-				bool signal = syncs[i].flags &
-					DRM_XE_SYNC_SIGNAL;
-
-				if ((first && !signal) || (!first && signal))
-					__syncs[__num_syncs++] = syncs[i];
-			}
-		} else {
-			__num_syncs = 0;
-			__syncs = NULL;
-		}
-
-		if (async) {
-			bool last = j == args->num_binds - 1;
-
-			/*
-			 * Each pass of async worker drops the ref, take a ref
-			 * here, 1 set of refs taken above
-			 */
-			if (!last) {
-				if (e)
-					xe_engine_get(e);
-				xe_vm_get(vm);
-			}
-
-			err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j],
-						  bind_ops + j, __syncs,
-						  __num_syncs);
-			if (err && !last) {
-				if (e)
-					xe_engine_put(e);
-				xe_vm_put(vm);
-			}
-			if (err)
-				break;
-		} else {
-			XE_BUG_ON(j != 0);	/* Not supported */
-			err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
-					    bind_ops + j, __syncs,
-					    __num_syncs, NULL);
-			break;	/* Needed so cleanup loops work */
-		}
-	}
+	err = vm_bind_ioctl_ops_parse(vm, e, ops, args->num_binds,
+				      syncs, num_syncs, &ops_list, async);
+	if (err)
+		goto unwind_ops;
 
-	/* Most of cleanup owned by the async bind worker */
-	if (async && !err) {
-		up_write(&vm->lock);
-		if (args->num_binds > 1)
-			kfree(syncs);
-		goto free_objs;
-	}
+	err = vm_bind_ioctl_ops_commit(vm, &ops_list, async);
+	up_write(&vm->lock);
 
-destroy_vmas:
-	for (i = j; err && i < args->num_binds; ++i) {
-		u32 op = bind_ops[i].op;
-		struct xe_vma *vma, *next;
+	for (i = 0; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
 
-		if (!vmas[i])
-			break;
+	kfree(bos);
+	kfree(ops);
+	if (args->num_binds > 1)
+		kfree(bind_ops);
 
-		list_for_each_entry_safe(vma, next, &vmas[i]->unbind_link,
-					 unbind_link) {
-			list_del_init(&vma->unbind_link);
-			if (!vma->destroyed) {
-				prep_vma_destroy(vm, vma);
-				xe_vma_destroy_unlocked(vma);
-			}
-		}
+	return err;
 
-		switch (VM_BIND_OP(op)) {
-		case XE_VM_BIND_OP_MAP:
-			prep_vma_destroy(vm, vmas[i]);
-			xe_vma_destroy_unlocked(vmas[i]);
-			break;
-		case XE_VM_BIND_OP_MAP_USERPTR:
-			prep_vma_destroy(vm, vmas[i]);
-			xe_vma_destroy_unlocked(vmas[i]);
-			break;
-		}
-	}
+unwind_ops:
+	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
 free_syncs:
-	while (num_syncs--) {
-		if (async && j &&
-		    !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
-			continue;	/* Still in async worker */
+	while (num_syncs--)
 		xe_sync_entry_cleanup(&syncs[num_syncs]);
-	}
 
 	kfree(syncs);
 put_obj:
-	for (i = j; i < args->num_binds; ++i)
+	for (i = 0; i < args->num_binds; ++i)
 		xe_bo_put(bos[i]);
 release_vm_lock:
 	up_write(&vm->lock);
@@ -3387,10 +3336,10 @@ put_engine:
 		xe_engine_put(e);
 free_objs:
 	kfree(bos);
-	kfree(vmas);
+	kfree(ops);
 	if (args->num_binds > 1)
 		kfree(bind_ops);
-	return err;
+	return err == -ENODATA ? 0 : err;
 }
 
 /*
@@ -3415,7 +3364,7 @@ int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 
 void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
 {
-	dma_resv_unlock(&vm->resv);
+	dma_resv_unlock(xe_vm_resv(vm));
 	ww_acquire_fini(ww);
 }
 
@@ -3448,7 +3397,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 			WARN_ON_ONCE(!mmu_interval_check_retry
 				     (&vma->userptr.notifier,
 				      vma->userptr.notifier_seq));
-			WARN_ON_ONCE(!dma_resv_test_signaled(&xe_vma_vm(vma)->resv,
+			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
 							     DMA_RESV_USAGE_BOOKKEEP));
 
 		} else {
@@ -3485,7 +3434,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 {
-	struct rb_node *node;
+	struct drm_gpuva *gpuva;
 	bool is_vram;
 	uint64_t addr;
 
@@ -3499,8 +3448,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
 	}
 
-	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
-		struct xe_vma *vma = to_xe_vma(node);
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
 		bool is_userptr = xe_vma_is_userptr(vma);
 		bool is_null = xe_vma_is_null(vma);
 
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 644a8aa604e81..d386e72cb9749 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -6,6 +6,7 @@
 #ifndef _XE_VM_H_
 #define _XE_VM_H_
 
+#include "xe_bo_types.h"
 #include "xe_macros.h"
 #include "xe_map.h"
 #include "xe_vm_types.h"
@@ -22,20 +23,19 @@ struct xe_file;
 struct xe_sync_entry;
 
 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
-void xe_vm_free(struct kref *ref);
 
 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
 int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
 
 static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
 {
-	kref_get(&vm->refcount);
+	drm_gpuvm_get(&vm->gpuvm);
 	return vm;
 }
 
 static inline void xe_vm_put(struct xe_vm *vm)
 {
-	kref_put(&vm->refcount, xe_vm_free);
+	drm_gpuvm_put(&vm->gpuvm);
 }
 
 int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
@@ -61,7 +61,22 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
 }
 
 struct xe_vma *
-xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma);
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
+
+static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva->vm, struct xe_vm, gpuvm);
+}
+
+static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva, struct xe_vma, gpuva);
+}
+
+static inline struct xe_vma_op *gpuva_op_to_vma_op(struct drm_gpuva_op *op)
+{
+	return container_of(op, struct xe_vma_op, base);
+}
 
 /**
  * DOC: Provide accessors for vma members to facilitate easy change of
@@ -69,12 +84,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma);
  */
 static inline u64 xe_vma_start(struct xe_vma *vma)
 {
-	return vma->start;
+	return vma->gpuva.va.addr;
 }
 
 static inline u64 xe_vma_size(struct xe_vma *vma)
 {
-	return vma->end - vma->start + 1;
+	return vma->gpuva.va.range;
 }
 
 static inline u64 xe_vma_end(struct xe_vma *vma)
@@ -84,32 +99,33 @@ static inline u64 xe_vma_end(struct xe_vma *vma)
 
 static inline u64 xe_vma_bo_offset(struct xe_vma *vma)
 {
-	return vma->bo_offset;
+	return vma->gpuva.gem.offset;
 }
 
 static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma)
 {
-	return vma->bo;
+	return !vma->gpuva.gem.obj ? NULL :
+		container_of(vma->gpuva.gem.obj, struct xe_bo, ttm.base);
 }
 
 static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma)
 {
-	return vma->vm;
+	return container_of(vma->gpuva.vm, struct xe_vm, gpuvm);
 }
 
 static inline bool xe_vma_read_only(struct xe_vma *vma)
 {
-	return vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
+	return vma->gpuva.flags & XE_VMA_READ_ONLY;
 }
 
 static inline u64 xe_vma_userptr(struct xe_vma *vma)
 {
-	return vma->userptr.ptr;
+	return vma->gpuva.gem.offset;
 }
 
 static inline bool xe_vma_is_null(struct xe_vma *vma)
 {
-	return vma->pte_flags & XE_PTE_FLAG_NULL;
+	return vma->gpuva.flags & DRM_GPUVA_SPARSE;
 }
 
 static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
@@ -122,8 +138,6 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
 	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
 }
 
-#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
-
 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
 
 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -218,6 +232,23 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
 
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
 
+/**
+ * xe_vm_resv() - Return's the vm's reservation object
+ * @vm: The vm
+ *
+ * Return: Pointer to the vm's reservation object.
+ */
+static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm)
+{
+	return drm_gpuvm_resv(&vm->gpuvm);
+}
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
 #define vm_dbg drm_dbg
 #else
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 0e8d0c513ee96..32f92743d851a 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -210,19 +210,12 @@ static const madvise_func madvise_funcs[] = {
 	[DRM_XE_VM_MADVISE_PIN] = madvise_pin,
 };
 
-static struct xe_vma *node_to_vma(const struct rb_node *node)
-{
-	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
-	return (struct xe_vma *)node;
-}
-
 static struct xe_vma **
 get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
 {
-	struct xe_vma **vmas;
-	struct xe_vma *vma, *__vma, lookup;
+	struct xe_vma **vmas, **__vmas;
+	struct drm_gpuva *gpuva;
 	int max_vmas = 8;
-	struct rb_node *node;
 
 	lockdep_assert_held(&vm->lock);
 
@@ -230,62 +223,23 @@ get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
 	if (!vmas)
 		return NULL;
 
-	lookup.start = addr;
-	lookup.end = addr + range - 1;
+	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
 
-	vma = xe_vm_find_overlapping_vma(vm, &lookup);
-	if (!vma)
-		return vmas;
+		if (xe_vma_is_userptr(vma))
+			continue;
 
-	if (!xe_vma_is_userptr(vma)) {
-		vmas[*num_vmas] = vma;
-		*num_vmas += 1;
-	}
-
-	node = &vma->vm_node;
-	while ((node = rb_next(node))) {
-		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
-			__vma = node_to_vma(node);
-			if (xe_vma_is_userptr(__vma))
-				continue;
-
-			if (*num_vmas == max_vmas) {
-				struct xe_vma **__vmas =
-					krealloc(vmas, max_vmas * sizeof(*vmas),
-						 GFP_KERNEL);
-
-				if (!__vmas)
-					return NULL;
-				vmas = __vmas;
-			}
-			vmas[*num_vmas] = __vma;
-			*num_vmas += 1;
-		} else {
-			break;
+		if (*num_vmas == max_vmas) {
+			max_vmas <<= 1;
+			__vmas = krealloc(vmas, max_vmas * sizeof(*vmas),
+					  GFP_KERNEL);
+			if (!__vmas)
+				return NULL;
+			vmas = __vmas;
 		}
-	}
 
-	node = &vma->vm_node;
-	while ((node = rb_prev(node))) {
-		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
-			__vma = node_to_vma(node);
-			if (xe_vma_is_userptr(__vma))
-				continue;
-
-			if (*num_vmas == max_vmas) {
-				struct xe_vma **__vmas =
-					krealloc(vmas, max_vmas * sizeof(*vmas),
-						 GFP_KERNEL);
-
-				if (!__vmas)
-					return NULL;
-				vmas = __vmas;
-			}
-			vmas[*num_vmas] = __vma;
-			*num_vmas += 1;
-		} else {
-			break;
-		}
+		vmas[*num_vmas] = vma;
+		*num_vmas += 1;
 	}
 
 	return vmas;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 3c885211a8d16..8aca079006ba7 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -6,6 +6,8 @@
 #ifndef _XE_VM_TYPES_H_
 #define _XE_VM_TYPES_H_
 
+#include <drm/drm_gpuvm.h>
+
 #include <linux/dma-resv.h>
 #include <linux/kref.h>
 #include <linux/mmu_notifier.h>
@@ -14,30 +16,23 @@
 #include "xe_device_types.h"
 #include "xe_pt_types.h"
 
+struct async_op_fence;
 struct xe_bo;
+struct xe_sync_entry;
 struct xe_vm;
 
-struct xe_vma {
-	struct rb_node vm_node;
-	/** @vm: VM which this VMA belongs to */
-	struct xe_vm *vm;
+#define TEST_VM_ASYNC_OPS_ERROR
+#define FORCE_ASYNC_OP_ERROR	BIT(31)
 
-	/**
-	 * @start: start address of this VMA within its address domain, end -
-	 * start + 1 == VMA size
-	 */
-	u64 start;
-	/** @end: end address of this VMA within its address domain */
-	u64 end;
-	/** @pte_flags: pte flags for this VMA */
-#define XE_PTE_FLAG_READ_ONLY		BIT(0)
-#define XE_PTE_FLAG_NULL		BIT(1)
-	u32 pte_flags;
-
-	/** @bo: BO if not a userptr, must be NULL is userptr */
-	struct xe_bo *bo;
-	/** @bo_offset: offset into BO if not a userptr, unused for userptr */
-	u64 bo_offset;
+#define XE_VMA_READ_ONLY	DRM_GPUVA_USERBITS
+#define XE_VMA_DESTROYED	(DRM_GPUVA_USERBITS << 1)
+#define XE_VMA_ATOMIC_PTE_BIT	(DRM_GPUVA_USERBITS << 2)
+#define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
+#define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
+
+struct xe_vma {
+	/** @gpuva: Base GPUVA object */
+	struct drm_gpuva gpuva;
 
 	/** @tile_mask: Tile mask of where to create binding for this VMA */
 	u64 tile_mask;
@@ -51,40 +46,8 @@ struct xe_vma {
 	 */
 	u64 tile_present;
 
-	/**
-	 * @destroyed: VMA is destroyed, in the sense that it shouldn't be
-	 * subject to rebind anymore. This field must be written under
-	 * the vm lock in write mode and the userptr.notifier_lock in
-	 * either mode. Read under the vm lock or the userptr.notifier_lock in
-	 * write mode.
-	 */
-	bool destroyed;
-
-	/**
-	 * @first_munmap_rebind: VMA is first in a sequence of ops that triggers
-	 * a rebind (munmap style VM unbinds). This indicates the operation
-	 * using this VMA must wait on all dma-resv slots (wait for pending jobs
-	 * / trigger preempt fences).
-	 */
-	bool first_munmap_rebind;
-
-	/**
-	 * @last_munmap_rebind: VMA is first in a sequence of ops that triggers
-	 * a rebind (munmap style VM unbinds). This indicates the operation
-	 * using this VMA must install itself into kernel dma-resv slot (blocks
-	 * future jobs) and kick the rebind work in compute mode.
-	 */
-	bool last_munmap_rebind;
-
-	/** @use_atomic_access_pte_bit: Set atomic access bit in PTE */
-	bool use_atomic_access_pte_bit;
-
-	union {
-		/** @bo_link: link into BO if not a userptr */
-		struct list_head bo_link;
-		/** @userptr_link: link into VM repin list if userptr */
-		struct list_head userptr_link;
-	};
+	/** @userptr_link: link into VM repin list if userptr */
+	struct list_head userptr_link;
 
 	/**
 	 * @rebind_link: link into VM if this VMA needs rebinding, and
@@ -107,8 +70,6 @@ struct xe_vma {
 
 	/** @userptr: user pointer state */
 	struct {
-		/** @ptr: user pointer */
-		uintptr_t ptr;
 		/** @invalidate_link: Link for the vm::userptr.invalidated list */
 		struct list_head invalidate_link;
 		/**
@@ -153,24 +114,19 @@ struct xe_vma {
 
 struct xe_device;
 
-#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
-
 struct xe_vm {
-	struct xe_device *xe;
+	/** @gpuvm: base GPUVM used to track VMAs */
+	struct drm_gpuvm gpuvm;
 
-	struct kref refcount;
+	struct xe_device *xe;
 
 	/* engine used for (un)binding vma's */
 	struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE];
 
-	/** Protects @rebind_list and the page-table structures */
-	struct dma_resv resv;
-
 	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
 	struct ttm_lru_bulk_move lru_bulk_move;
 
 	u64 size;
-	struct rb_root vmas;
 
 	struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
 	struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
@@ -351,4 +307,99 @@ struct xe_vm {
 	bool batch_invalidate_tlb;
 };
 
+/** struct xe_vma_op_map - VMA map operation */
+struct xe_vma_op_map {
+	/** @vma: VMA to map */
+	struct xe_vma *vma;
+	/** @immediate: Immediate bind */
+	bool immediate;
+	/** @read_only: Read only */
+	bool read_only;
+	/** @is_null: is NULL binding */
+	bool is_null;
+};
+
+/** struct xe_vma_op_unmap - VMA unmap operation */
+struct xe_vma_op_unmap {
+	/** @start: start of the VMA unmap */
+	u64 start;
+	/** @range: range of the VMA unmap */
+	u64 range;
+};
+
+/** struct xe_vma_op_remap - VMA remap operation */
+struct xe_vma_op_remap {
+	/** @prev: VMA preceding part of a split mapping */
+	struct xe_vma *prev;
+	/** @next: VMA subsequent part of a split mapping */
+	struct xe_vma *next;
+	/** @start: start of the VMA unmap */
+	u64 start;
+	/** @range: range of the VMA unmap */
+	u64 range;
+	/** @unmap_done: unmap operation in done */
+	bool unmap_done;
+};
+
+/** struct xe_vma_op_prefetch - VMA prefetch operation */
+struct xe_vma_op_prefetch {
+	/** @region: memory region to prefetch to */
+	u32 region;
+};
+
+/** enum xe_vma_op_flags - flags for VMA operation */
+enum xe_vma_op_flags {
+	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
+	XE_VMA_OP_FIRST		= (0x1 << 0),
+	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
+	XE_VMA_OP_LAST		= (0x1 << 1),
+	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
+	XE_VMA_OP_COMMITTED	= (0x1 << 2),
+};
+
+/** struct xe_vma_op - VMA operation */
+struct xe_vma_op {
+	/** @base: GPUVA base operation */
+	struct drm_gpuva_op base;
+	/**
+	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
+	 * operations is processed
+	 */
+	struct drm_gpuva_ops *ops;
+	/** @engine: engine for this operation */
+	struct xe_engine *engine;
+	/**
+	 * @syncs: syncs for this operation, only used on first and last
+	 * operation
+	 */
+	struct xe_sync_entry *syncs;
+	/** @num_syncs: number of syncs */
+	u32 num_syncs;
+	/** @link: async operation link */
+	struct list_head link;
+	/**
+	 * @fence: async operation fence, signaled on last operation complete
+	 */
+	struct async_op_fence *fence;
+	/** @tile_mask: gt mask for this operation */
+	u64 tile_mask;
+	/** @flags: operation flags */
+	enum xe_vma_op_flags flags;
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	/** @inject_error: inject error to test async op error handling */
+	bool inject_error;
+#endif
+
+	union {
+		/** @map: VMA map operation specific data */
+		struct xe_vma_op_map map;
+		/** @unmap: VMA unmap operation specific data */
+		struct xe_vma_op_unmap unmap;
+		/** @remap: VMA remap operation specific data */
+		struct xe_vma_op_remap remap;
+		/** @prefetch: VMA prefetch operation specific data */
+		struct xe_vma_op_prefetch prefetch;
+	};
+};
 #endif
-- 
GitLab


From 38fa29dc2b73b54299e973d292ec7fd507d3b8c0 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:39 +0100
Subject: [PATCH 1703/2340] drm/xe/tlb: drop unnecessary smp_wmb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

wake_up_all() and wait_event_timeout() already have the correct barriers
as per https://www.kernel.org/doc/Documentation/memory-barriers.txt.
This should ensure that the seqno_recv write can't be re-ordered wrt to
the actual wake_up_all() i.e we get woken up but there is no write.  The
reader side with wait_event_timeout() also has the correct barriers.
With that drop the hand rolled smp_wmb(), which is anyway missing some
kind of matching barrier on the reader side.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f77368a164092..48eb05f763e96 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -333,8 +333,11 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 			expected_seqno, msg[0]);
 	}
 
+	/*
+	 * wake_up_all() and wait_event_timeout() already have the correct
+	 * barriers.
+	 */
 	gt->tlb_invalidation.seqno_recv = msg[0];
-	smp_wmb();
 	wake_up_all(&guc->ct.wq);
 
 	fence = list_first_entry_or_null(&gt->tlb_invalidation.pending_fences,
-- 
GitLab


From 86ed09250e068faa840dadcd175d3cd8d174f998 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:40 +0100
Subject: [PATCH 1704/2340] drm/xe/tlb: ensure we access seqno_recv once
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure we load gt->tlb_invalidation.seqno_recv once, and use that for
our seqno checking. The gt->tlb_invalidation_seqno_past is a shared
global variable and can potentially change at any point here.  However
the checks here need to operate on a stable version of seqno_recv for
this to make any sense.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 48eb05f763e96..2b60251ea1c0f 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -257,15 +257,15 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
 {
-	if (seqno - gt->tlb_invalidation.seqno_recv <
-	    -(TLB_INVALIDATION_SEQNO_MAX / 2))
+	int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
+
+	if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
 		return false;
 
-	if (seqno - gt->tlb_invalidation.seqno_recv >
-	    (TLB_INVALIDATION_SEQNO_MAX / 2))
+	if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
 		return true;
 
-	return gt->tlb_invalidation.seqno_recv >= seqno;
+	return seqno_recv >= seqno;
 }
 
 /**
@@ -337,7 +337,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	 * wake_up_all() and wait_event_timeout() already have the correct
 	 * barriers.
 	 */
-	gt->tlb_invalidation.seqno_recv = msg[0];
+	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
 	wake_up_all(&guc->ct.wq);
 
 	fence = list_first_entry_or_null(&gt->tlb_invalidation.pending_fences,
-- 
GitLab


From c4bbc32e09ab9f74c725a8719df2b509c8ad8780 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:41 +0100
Subject: [PATCH 1705/2340] drm/xe: hold mem_access.ref for CT fast-path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just checking xe_device_mem_access_ongoing() is not enough, we also need
to hold the reference otherwise the ref can transition from 1 -> 0 as we
enter g2h_read(), leading to warnings. While we can't do a full rpm sync
in the IRQ, we can keep the device awake if the ref is non-zero.
Introduce a new helper for this and set it to work in for the CT
fast-path.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 5 +++++
 drivers/gpu/drm/xe/xe_device.h | 1 +
 drivers/gpu/drm/xe/xe_guc_ct.c | 5 ++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 6249eef752c58..bd2e10952989f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -364,6 +364,11 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
 }
 
+bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
+{
+	return atomic_inc_not_zero(&xe->mem_access.ref);
+}
+
 void xe_device_mem_access_get(struct xe_device *xe)
 {
 	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 779f71d066e6e..8e01bbadb1498 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -138,6 +138,7 @@ static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
 }
 
 void xe_device_mem_access_get(struct xe_device *xe);
+bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe);
 void xe_device_mem_access_put(struct xe_device *xe);
 
 static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index e71d069158dca..dcce6ed34370b 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1044,7 +1044,8 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 	struct xe_device *xe = ct_to_xe(ct);
 	int len;
 
-	if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe))
+	if (!xe_device_in_fault_mode(xe) ||
+	    !xe_device_mem_access_get_if_ongoing(xe))
 		return;
 
 	spin_lock(&ct->fast_lock);
@@ -1054,6 +1055,8 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 			g2h_fast_path(ct, ct->fast_msg, len);
 	} while (len > 0);
 	spin_unlock(&ct->fast_lock);
+
+	xe_device_mem_access_put(xe);
 }
 
 /* Returns less than zero on error, 0 on done, 1 on more available */
-- 
GitLab


From dad33831d8d137ee28b21c3c2296463a01aa5b78 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:42 +0100
Subject: [PATCH 1706/2340] drm/xe/ct: hold fast_lock when reserving space for
 g2h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reserving and checking for space on the g2h side relies on the
fast_lock, and not the CT lock since we need to release space from the
fast CT path. Make sure we hold it when checking for space and reserving
it. The main concern is calling __g2h_release_space() as we are reserving
something and since the info.space and info.g2h_outstanding operations
are not atomic we can get some nonsense values back.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index dcce6ed34370b..ba89db1dcfdb1 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -346,7 +346,10 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
 
 static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
 {
-	lockdep_assert_held(&ct->lock);
+	if (!g2h_len)
+		return true;
+
+	lockdep_assert_held(&ct->fast_lock);
 
 	return ct->ctbs.g2h.info.space > g2h_len;
 }
@@ -367,15 +370,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
 	ct->ctbs.h2g.info.space -= cmd_len;
 }
 
-static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 {
 	XE_BUG_ON(g2h_len > ct->ctbs.g2h.info.space);
 
 	if (g2h_len) {
-		spin_lock_irq(&ct->fast_lock);
+		lockdep_assert_held(&ct->fast_lock);
+
 		ct->ctbs.g2h.info.space -= g2h_len;
 		ct->g2h_outstanding += num_g2h;
-		spin_unlock_irq(&ct->fast_lock);
 	}
 }
 
@@ -505,21 +508,26 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 		}
 	}
 
+	if (g2h_len)
+		spin_lock_irq(&ct->fast_lock);
 retry:
 	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
 	if (unlikely(ret))
-		goto out;
+		goto out_unlock;
 
 	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
 			!!g2h_fence);
 	if (unlikely(ret)) {
 		if (ret == -EAGAIN)
 			goto retry;
-		goto out;
+		goto out_unlock;
 	}
 
-	g2h_reserve_space(ct, g2h_len, num_g2h);
+	__g2h_reserve_space(ct, g2h_len, num_g2h);
 	xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+	if (g2h_len)
+		spin_unlock_irq(&ct->fast_lock);
 out:
 	return ret;
 }
-- 
GitLab


From 4803f6e26f1678b8b5af2924199bc137e7ec5fad Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:43 +0100
Subject: [PATCH 1707/2340] drm/xe/tlb: increment next seqno after successful
 CT send
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we are in the middle of a GT reset or similar the CT might be
disabled, such that the CT send fails. However we already incremented
gt->tlb_invalidation.seqno which might lead to warnings, since we
effectively just skipped a seqno:

    0000:00:02.0: drm_WARN_ON(expected_seqno != msg[0])

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 2b60251ea1c0f..b4d024bf10be1 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -124,10 +124,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 		trace_xe_gt_tlb_invalidation_fence_send(fence);
 	}
 	action[1] = seqno;
-	gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
-		TLB_INVALIDATION_SEQNO_MAX;
-	if (!gt->tlb_invalidation.seqno)
-		gt->tlb_invalidation.seqno = 1;
 	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
 	if (!ret && fence) {
@@ -137,8 +133,13 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 					   &gt->tlb_invalidation.fence_tdr,
 					   TLB_TIMEOUT);
 	}
-	if (!ret)
+	if (!ret) {
+		gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
+			TLB_INVALIDATION_SEQNO_MAX;
+		if (!gt->tlb_invalidation.seqno)
+			gt->tlb_invalidation.seqno = 1;
 		ret = seqno;
+	}
 	if (ret < 0 && fence)
 		invalidation_fence_signal(fence);
 	mutex_unlock(&guc->ct.lock);
-- 
GitLab


From a4d362bbed8c86a632b5e22bf64d9c5564e3766e Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:44 +0100
Subject: [PATCH 1708/2340] drm/xe/ct: serialise fast_lock during CT disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fast-path CT could be running as we enter a runtime-suspend or
potentially a GT reset, however here we only use the ct->fast_lock and
not the full ct->lock. Before disabling the CT, also serialise against
the fast_lock to ensure any in-progress work finishes before we start
nuking the CT related stuff. Once we disable ct->enabled and drop the
lock, any new work should fail gracefully, and anything that was in
progress should be finished.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index ba89db1dcfdb1..acf488b00b661 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -301,8 +301,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 		goto err_out;
 
 	mutex_lock(&ct->lock);
+	spin_lock_irq(&ct->fast_lock);
 	ct->g2h_outstanding = 0;
 	ct->enabled = true;
+	spin_unlock_irq(&ct->fast_lock);
 	mutex_unlock(&ct->lock);
 
 	smp_mb();
@@ -319,8 +321,10 @@ err_out:
 
 void xe_guc_ct_disable(struct xe_guc_ct *ct)
 {
-	mutex_lock(&ct->lock);
-	ct->enabled = false;
+	mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+	spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+	ct->enabled = false; /* Finally disable CT communication */
+	spin_unlock_irq(&ct->fast_lock);
 	mutex_unlock(&ct->lock);
 
 	xa_destroy(&ct->fence_lookup);
-- 
GitLab


From 7b24cc3e309f31ad77b2ed136ce7606e0b3f67bb Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:45 +0100
Subject: [PATCH 1709/2340] drm/xe/gt: tweak placement for signalling TLB
 fences after GT reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Assumption here is that submission is disabled along with CT, and full
GT reset will also nuke TLBs, so should be safe to signal all in-flight
TLB fences, but only after the actual reset so move the placement
slightly.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index bc76678a8276c..a21d44bfe9e87 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -519,7 +519,6 @@ static int gt_reset(struct xe_gt *gt)
 
 	xe_uc_stop_prepare(&gt->uc);
 	xe_gt_pagefault_reset(gt);
-	xe_gt_tlb_invalidation_reset(gt);
 
 	err = xe_uc_stop(&gt->uc);
 	if (err)
@@ -529,6 +528,8 @@ static int gt_reset(struct xe_gt *gt)
 	if (err)
 		goto err_out;
 
+	xe_gt_tlb_invalidation_reset(gt);
+
 	err = do_gt_restart(gt);
 	if (err)
 		goto err_out;
-- 
GitLab


From 2ca01fe31b68bab12ccccef91196ea21cd93e065 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:46 +0100
Subject: [PATCH 1710/2340] drm/xe/tlb: also update seqno_recv during reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We might have various kworkers waiting for TLB flushes to complete which
are not tracked with an explicit TLB fence, however at this stage that
will never happen since the CT is already disabled, so make sure we
signal them here under the assumption that we have completed a full GT
reset.

v2:
  - We need to use seqno - 1 here. After acquiring ct->lock the seqno is
    actually the next users seqno and not the pending one.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 24 +++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index b4d024bf10be1..bf23d97d2fef6 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -86,13 +86,33 @@ invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
  *
  * Signal any pending invalidation fences, should be called during a GT reset
  */
- void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
 {
 	struct xe_gt_tlb_invalidation_fence *fence, *next;
+	struct xe_guc *guc = &gt->uc.guc;
+	int pending_seqno;
 
-	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+	/*
+	 * CT channel is already disabled at this point. No new TLB requests can
+	 * appear.
+	 */
 
 	mutex_lock(&gt->uc.guc.ct.lock);
+	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+	/*
+	 * We might have various kworkers waiting for TLB flushes to complete
+	 * which are not tracked with an explicit TLB fence, however at this
+	 * stage that will never happen since the CT is already disabled, so
+	 * make sure we signal them here under the assumption that we have
+	 * completed a full GT reset.
+	 */
+	if (gt->tlb_invalidation.seqno == 1)
+		pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
+	else
+		pending_seqno = gt->tlb_invalidation.seqno - 1;
+	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
+	wake_up_all(&guc->ct.wq);
+
 	list_for_each_entry_safe(fence, next,
 				 &gt->tlb_invalidation.pending_fences, link)
 		invalidation_fence_signal(fence);
-- 
GitLab


From 4aa5e3594f649d1bc202db302a8d5030d03c02fb Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:47 +0100
Subject: [PATCH 1711/2340] drm/xe/tlb: print seqno_recv on fence TLB timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To help debugging, sample the current seqno_recv and dump it out if we
encounter a TLB timeout for the fences path.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index bf23d97d2fef6..29819dc820c5d 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -35,8 +35,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 			break;
 
 		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
-		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d",
-			gt->info.id, fence->seqno);
+		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
+			gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
 
 		list_del(&fence->link);
 		fence->base.error = -ETIME;
-- 
GitLab


From 0b688f9b2880c655a8b161ec46932a6fe8da9ea9 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:48 +0100
Subject: [PATCH 1712/2340] drm/xe/ct: update g2h outstanding for CTB capture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks to always to be zero when inspecting the CTB dump.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index acf488b00b661..0b086d17c083c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1219,6 +1219,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
 
 	if (ct->enabled) {
 		snapshot->ct_enabled = true;
+		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
 		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
 					 &snapshot->h2g, atomic);
 		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
-- 
GitLab


From 35c8a964398e1c57968cc94cd6f4e3a64c796357 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 10 Jul 2023 10:40:49 +0100
Subject: [PATCH 1713/2340] drm/xe: handle TLB invalidations from CT fast-path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In various test cases that put the system under a heavy load, we can
sometimes see errors with missed TLB invalidations. In such cases we see
the interrupt arrive for the invalidation from the GuC, however the
actual processing of the completion is pushed onto a workqueue and
handled with all the other CT stuff, which might take longer than
expected. Since we expect TLB invalidations to complete within a
reasonable amount of time (at most ~250ms), and they do seem pretty
critical, allow handling directly from the CT fast-path.

v2 (José):
  - Actually use the correct spinlock/unlock_irq, since pending_lock is
    grabbed from IRQ.
v3:
  - Don't publish the TLB fence on the list until after we fully
    initialize it and successfully do the CT send. The list is now only
    protected by the spin_lock pending_lock and we can't hold that
    across the entire TLB send operation.
v4 (Matt Brost):
  - Be careful with racing against fast CT path writing the seqno,
    before we have actually published the fence.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/297
References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/320
References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/449
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 134 ++++++++++++--------
 drivers/gpu/drm/xe/xe_gt_types.h            |   5 +
 drivers/gpu/drm/xe/xe_guc_ct.c              |  12 +-
 3 files changed, 91 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 29819dc820c5d..e2b85559257c7 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -25,7 +25,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 					tlb_invalidation.fence_tdr.work);
 	struct xe_gt_tlb_invalidation_fence *fence, *next;
 
-	mutex_lock(&gt->uc.guc.ct.lock);
+	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
 	list_for_each_entry_safe(fence, next,
 				 &gt->tlb_invalidation.pending_fences, link) {
 		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
@@ -47,7 +47,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 		queue_delayed_work(system_wq,
 				   &gt->tlb_invalidation.fence_tdr,
 				   TLB_TIMEOUT);
-	mutex_unlock(&gt->uc.guc.ct.lock);
+	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
 }
 
 /**
@@ -63,6 +63,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 {
 	gt->tlb_invalidation.seqno = 1;
 	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
+	spin_lock_init(&gt->tlb_invalidation.pending_lock);
 	spin_lock_init(&gt->tlb_invalidation.lock);
 	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
 	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
@@ -72,14 +73,20 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 }
 
 static void
-invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
 {
 	trace_xe_gt_tlb_invalidation_fence_signal(fence);
-	list_del(&fence->link);
 	dma_fence_signal(&fence->base);
 	dma_fence_put(&fence->base);
 }
 
+static void
+invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+	list_del(&fence->link);
+	__invalidation_fence_signal(fence);
+}
+
 /**
  * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
  * @gt: graphics tile
@@ -98,6 +105,7 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
 	 */
 
 	mutex_lock(&gt->uc.guc.ct.lock);
+	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
 	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
 	/*
 	 * We might have various kworkers waiting for TLB flushes to complete
@@ -116,9 +124,23 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
 	list_for_each_entry_safe(fence, next,
 				 &gt->tlb_invalidation.pending_fences, link)
 		invalidation_fence_signal(fence);
+	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
 	mutex_unlock(&gt->uc.guc.ct.lock);
 }
 
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+	int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
+
+	if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
+		return false;
+
+	if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
+		return true;
+
+	return seqno_recv >= seqno;
+}
+
 static int send_tlb_invalidation(struct xe_guc *guc,
 				 struct xe_gt_tlb_invalidation_fence *fence,
 				 u32 *action, int len)
@@ -126,7 +148,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	struct xe_gt *gt = guc_to_gt(guc);
 	int seqno;
 	int ret;
-	bool queue_work;
 
 	/*
 	 * XXX: The seqno algorithm relies on TLB invalidation being processed
@@ -137,21 +158,35 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 	mutex_lock(&guc->ct.lock);
 	seqno = gt->tlb_invalidation.seqno;
 	if (fence) {
-		queue_work = list_empty(&gt->tlb_invalidation.pending_fences);
 		fence->seqno = seqno;
-		list_add_tail(&fence->link,
-			      &gt->tlb_invalidation.pending_fences);
 		trace_xe_gt_tlb_invalidation_fence_send(fence);
 	}
 	action[1] = seqno;
 	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
 				    G2H_LEN_DW_TLB_INVALIDATE, 1);
 	if (!ret && fence) {
-		fence->invalidation_time = ktime_get();
-		if (queue_work)
-			queue_delayed_work(system_wq,
-					   &gt->tlb_invalidation.fence_tdr,
-					   TLB_TIMEOUT);
+		spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+		/*
+		 * We haven't actually published the TLB fence as per
+		 * pending_fences, but in theory our seqno could have already
+		 * been written as we acquired the pending_lock. In such a case
+		 * we can just go ahead and signal the fence here.
+		 */
+		if (tlb_invalidation_seqno_past(gt, seqno)) {
+			__invalidation_fence_signal(fence);
+		} else {
+			fence->invalidation_time = ktime_get();
+			list_add_tail(&fence->link,
+				      &gt->tlb_invalidation.pending_fences);
+
+			if (list_is_singular(&gt->tlb_invalidation.pending_fences))
+				queue_delayed_work(system_wq,
+						   &gt->tlb_invalidation.fence_tdr,
+						   TLB_TIMEOUT);
+		}
+		spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+	} else if (ret < 0 && fence) {
+		__invalidation_fence_signal(fence);
 	}
 	if (!ret) {
 		gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
@@ -160,8 +195,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
 			gt->tlb_invalidation.seqno = 1;
 		ret = seqno;
 	}
-	if (ret < 0 && fence)
-		invalidation_fence_signal(fence);
 	mutex_unlock(&guc->ct.lock);
 
 	return ret;
@@ -276,19 +309,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	return ret;
 }
 
-static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
-{
-	int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
-
-	if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
-		return false;
-
-	if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
-		return true;
-
-	return seqno_recv >= seqno;
-}
-
 /**
  * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
  * @gt: graphics tile
@@ -336,22 +356,31 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
-	struct xe_gt_tlb_invalidation_fence *fence;
-	int expected_seqno;
-
-	lockdep_assert_held(&guc->ct.lock);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+	unsigned long flags;
 
 	if (unlikely(len != 1))
 		return -EPROTO;
 
-	/* Sanity check on seqno */
-	expected_seqno = (gt->tlb_invalidation.seqno_recv + 1) %
-		TLB_INVALIDATION_SEQNO_MAX;
-	if (!expected_seqno)
-		expected_seqno = 1;
-	if (drm_WARN_ON(&gt_to_xe(gt)->drm, expected_seqno != msg[0])) {
-		drm_err(&gt_to_xe(gt)->drm, "TLB expected_seqno(%d) != msg(%u)\n",
-			expected_seqno, msg[0]);
+	/*
+	 * This can also be run both directly from the IRQ handler and also in
+	 * process_g2h_msg(). Only one may process any individual CT message,
+	 * however the order they are processed here could result in skipping a
+	 * seqno. To handle that we just process all the seqnos from the last
+	 * seqno_recv up to and including the one in msg[0]. The delta should be
+	 * very small so there shouldn't be much of pending_fences we actually
+	 * need to iterate over here.
+	 *
+	 * From GuC POV we expect the seqnos to always appear in-order, so if we
+	 * see something later in the timeline we can be sure that anything
+	 * appearing earlier has already signalled, just that we have yet to
+	 * officially process the CT message like if racing against
+	 * process_g2h_msg().
+	 */
+	spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
+	if (tlb_invalidation_seqno_past(gt, msg[0])) {
+		spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
+		return 0;
 	}
 
 	/*
@@ -361,19 +390,24 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
 	wake_up_all(&guc->ct.wq);
 
-	fence = list_first_entry_or_null(&gt->tlb_invalidation.pending_fences,
-					 typeof(*fence), link);
-	if (fence)
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
 		trace_xe_gt_tlb_invalidation_fence_recv(fence);
-	if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
+
+		if (!tlb_invalidation_seqno_past(gt, fence->seqno))
+			break;
+
 		invalidation_fence_signal(fence);
-		if (!list_empty(&gt->tlb_invalidation.pending_fences))
-			mod_delayed_work(system_wq,
-					 &gt->tlb_invalidation.fence_tdr,
-					 TLB_TIMEOUT);
-		else
-			cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
 	}
 
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		mod_delayed_work(system_wq,
+				 &gt->tlb_invalidation.fence_tdr,
+				 TLB_TIMEOUT);
+	else
+		cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+
+	spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7d4de019f9a5e..28b8e8a86fc90 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -163,6 +163,11 @@ struct xe_gt {
 		 * invaliations, protected by CT lock
 		 */
 		struct list_head pending_fences;
+		/**
+		 * @pending_lock: protects @pending_fences and updating
+		 * @seqno_recv.
+		 */
+		spinlock_t pending_lock;
 		/**
 		 * @fence_tdr: schedules a delayed call to
 		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 0b086d17c083c..9fb5fd4391d2c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -994,15 +994,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 			return 0;
 
 		switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) {
-		/*
-		 * FIXME: We really should process
-		 * XE_GUC_ACTION_TLB_INVALIDATION_DONE here in the fast-path as
-		 * these critical for page fault performance. We currently can't
-		 * due to TLB invalidation done algorithm expecting the seqno
-		 * returned in-order. With some small changes to the algorithm
-		 * and locking we should be able to support out-of-order seqno.
-		 */
 		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
 			break;	/* Process these in fast-path */
 		default:
 			return 0;
@@ -1056,8 +1049,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 	struct xe_device *xe = ct_to_xe(ct);
 	int len;
 
-	if (!xe_device_in_fault_mode(xe) ||
-	    !xe_device_mem_access_get_if_ongoing(xe))
+	if (!xe_device_mem_access_get_if_ongoing(xe))
 		return;
 
 	spin_lock(&ct->fast_lock);
-- 
GitLab


From 356010a1a0c9fbe55d6c7e5dbd273a0fd224469e Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 26 Jun 2023 18:20:40 +0100
Subject: [PATCH 1714/2340] drm/xe/mmio: update gt_count when probing
 multi-tile

It looks like the single-tile PVC in CI dies during module load when doing
the pcode init. From the logs we try to access the address
0000000000138124 which doesn't map to anything, however 0x138124 also
looks to be the PCODE_MAILBOX register. So looks like the per-tile
mmio register mapping is NULL.

During probe the tile count is potentially trimmed, since we don't know
the real count until we actually probe the device. This seems to be
the case for single-tile PVC or similar devices.  However it looks like
the gt_count is never adjusted to respect this updated tile count. As a
result when later doing some for_each_gt() loop, like we do for the
pcode, we can get back some GT that maps to some non-existent tile
which hasn't been properly set up, leading to crashes.

Try to fix this by adjusting the gt_count after probing the tiles for
real.

v2: Fix typo so it actually builds

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/383
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 5effb21db9d4a..779f0a18a815a 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -335,6 +335,12 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	adj_tile_count = xe->info.tile_count =
 		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 
+	/*
+	 * FIXME: Needs some work for standalone media, but should be impossible
+	 * with multi-tile for now.
+	 */
+	xe->info.gt_count = xe->info.tile_count;
+
 	drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
 		 xe->info.tile_count, adj_tile_count);
 
-- 
GitLab


From b1f8f4b5eec62173955c04d98723a75f2cfd8f42 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Wed, 12 Jul 2023 18:25:21 -0700
Subject: [PATCH 1715/2340] drm/xe: Fix BUG_ON during bind with prefetch

It was missed that print_op needs to include DRM_GPUVA_OP_PREFETCH.

Else we hit the impossible BUG_ON:
[  886.371040] ------------[ cut here ]------------
[  886.371047] kernel BUG at drivers/gpu/drm/xe/xe_vm.c:2234!
[  886.371216] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[  886.371229] CPU: 1 PID: 3132 Comm: xe_exec_fault_m
[  886.371257] RIP: 0010:vm_bind_ioctl_ops_create+0x45f/0x470 [xe]
...
[  886.371517] Call Trace:
[  886.371525]  <TASK>
[  886.371531]  ? __die_body+0x1a/0x60
[  886.371546]  ? die+0x38/0x60
[  886.371557]  ? do_trap+0x10a/0x120
[  886.371568]  ? vm_bind_ioctl_ops_create+0x45f/0x470 [xe]

v2: add debug print for PREFETCH in print_op

Fixes: b06d47be7c83 ("drm/xe: Port Xe to GPUVA")
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 297b7977ed87d..ee67b4fd7320c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2227,6 +2227,11 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
 		       op->unmap.keep ? 1 : 0);
 		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma = gpuva_to_vma(op->prefetch.va);
+		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
+		break;
 	default:
 		XE_BUG_ON("NOT POSSIBLE");
 	}
-- 
GitLab


From 04194a4f780895799cf83c86d5bb8bc11560a536 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Wed, 12 Jul 2023 18:25:42 -0700
Subject: [PATCH 1716/2340] drm/xe: Fix lockdep warning from xe_vm_madvise

We need to hold vm->lock before the xe_vm_is_closed_or_banned().

Else we get this splat:
[  802.555227] ------------[ cut here ]------------
[  802.555234] WARNING: CPU: 33 PID: 3122 at drivers/gpu/drm/xe/xe_vm.h:60
[  802.555515] CPU: 33 PID: 3122 Comm: xe_exec_fault_m Tainted:
...
[  802.555709] Call Trace:
[  802.555714]  <TASK>
[  802.555720]  ? __warn+0x81/0x170
[  802.555737]  ? xe_vm_madvise_ioctl+0x2de/0x440 [xe]

Fixes: 9d858b69b0cf ("drm/xe: Ban a VM if rebind worker hits an error")
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm_madvise.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 32f92743d851a..5b775f7422337 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -267,11 +267,6 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_ERR(xe, !vm))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
-		err = -ENOENT;
-		goto put_vm;
-	}
-
 	if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) {
 		err = -EINVAL;
 		goto put_vm;
@@ -279,6 +274,11 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
 
 	down_read(&vm->lock);
 
+	if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
+		err = -ENOENT;
+		goto unlock_vm;
+	}
+
 	vmas = get_vmas(vm, &num_vmas, args->addr, args->range);
 	if (XE_IOCTL_ERR(xe, err))
 		goto unlock_vm;
-- 
GitLab


From 4cd6d492595fdcbb158def8b175ca1558363e742 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 11 Jul 2023 16:24:30 +0200
Subject: [PATCH 1717/2340] drm/xe: Cleanup SPACING style issues

Remove almost all existing style issues of type SPACING reported
by checkpatch.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h       |  4 ++--
 drivers/gpu/drm/xe/xe_device_types.h |  4 ++--
 drivers/gpu/drm/xe/xe_force_wake.c   |  2 +-
 drivers/gpu/drm/xe/xe_gt.h           |  2 +-
 drivers/gpu/drm/xe/xe_gt_clock.c     |  8 ++++----
 drivers/gpu/drm/xe/xe_gt_types.h     |  8 ++++----
 drivers/gpu/drm/xe/xe_rtp.h          |  4 ++--
 drivers/gpu/drm/xe/xe_rtp_helpers.h  | 10 +++++-----
 drivers/gpu/drm/xe/xe_sched_job.c    |  2 +-
 drivers/gpu/drm/xe/xe_uc_fw.c        |  4 ++--
 10 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 8e01bbadb1498..5c827beaea284 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -122,7 +122,7 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe)
 
 #define for_each_tile(tile__, xe__, id__) \
 	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \
-		for_each_if ((tile__) = &(xe__)->tiles[(id__)])
+		for_each_if((tile__) = &(xe__)->tiles[(id__)])
 
 /*
  * FIXME: This only works for now since multi-tile and standalone media
@@ -130,7 +130,7 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe)
  */
 #define for_each_gt(gt__, xe__, id__) \
 	for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \
-		for_each_if ((gt__) = xe_device_get_gt((xe__), (id__)))
+		for_each_if((gt__) = xe_device_get_gt((xe__), (id__)))
 
 static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
 {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index db08d64abce19..fb2329ccce065 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -47,8 +47,8 @@ struct xe_ggtt;
 
 #define tile_to_xe(tile__)								\
 	_Generic(tile__,								\
-		 const struct xe_tile *: (const struct xe_device *)((tile__)->xe),	\
-		 struct xe_tile *: (tile__)->xe)
+		 const struct xe_tile * : (const struct xe_device *)((tile__)->xe),	\
+		 struct xe_tile * : (tile__)->xe)
 
 /**
  * struct xe_tile - hardware tile structure
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index f0f0592fc598a..7403673d532dc 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -87,7 +87,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 			    BIT(0), BIT(16));
 	}
 
-	for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
 		if (!(gt->info.engine_mask & BIT(i)))
 			continue;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 21d9044088def..a523d7941afe8 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -13,7 +13,7 @@
 
 #define for_each_hw_engine(hwe__, gt__, id__) \
 	for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
-	     for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \
+	     for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
 			  xe_hw_engine_is_valid((hwe__)))
 
 struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 7cf11078ff572..932b61e0cf673 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -38,13 +38,13 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
 					  rpm_config_reg);
 
 	switch (crystal_clock) {
-	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ :
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
 		return f24_mhz;
-	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ :
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
 		return f19_2_mhz;
-	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ :
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
 		return f38_4_mhz;
-	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ :
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
 		return f25_mhz;
 	default:
 		XE_BUG_ON("NOT_POSSIBLE");
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 28b8e8a86fc90..78a9fe9f0bd36 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -75,13 +75,13 @@ enum xe_steering_type {
 
 #define gt_to_tile(gt__)							\
 	_Generic(gt__,								\
-		 const struct xe_gt *: (const struct xe_tile *)((gt__)->tile),	\
-		 struct xe_gt *: (gt__)->tile)
+		 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile),	\
+		 struct xe_gt * : (gt__)->tile)
 
 #define gt_to_xe(gt__)										\
 	_Generic(gt__,										\
-		 const struct xe_gt *: (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
-		 struct xe_gt *: gt_to_tile(gt__)->xe)
+		 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
+		 struct xe_gt * : gt_to_tile(gt__)->xe)
 
 /**
  * struct xe_gt - A "Graphics Technology" unit of the GPU
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 8581bd9b14269..04ccb26452ad4 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -373,8 +373,8 @@ struct xe_reg_sr;
 	}
 
 #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__),							\
-	struct xe_hw_engine *:	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
-	struct xe_gt *:		(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
+	struct xe_hw_engine * :	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
+	struct xe_gt * :	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
 
 void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
 					       unsigned long *active_entries,
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
index 1beea434d52d2..181b6290fac32 100644
--- a/drivers/gpu/drm/xe/xe_rtp_helpers.h
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -14,13 +14,13 @@
  * Helper macros - not to be used outside this header.
  */
 #define _XE_ESC(...) __VA_ARGS__
-#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,))
-#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_
+#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,))
+#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_
 
 #define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
-#define __XE_FIRST(x_,...) x_
+#define __XE_FIRST(x_, ...) x_
 #define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
-#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__)
+#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__)
 
 #define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
 
@@ -59,7 +59,7 @@
  *
  *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
  */
-#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_))
 #define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
 #define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
 #define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 795146dfd663c..c87f65c98b3da 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -35,7 +35,7 @@ int __init xe_sched_job_module_init(void)
 		kmem_cache_create("xe_sched_job_parallel",
 				  sizeof(struct xe_sched_job) +
 				  sizeof(u64) *
-				  XE_HW_ENGINE_MAX_INSTANCE , 0,
+				  XE_HW_ENGINE_MAX_INSTANCE, 0,
 				  SLAB_HWCACHE_ALIGN, NULL);
 	if (!xe_sched_job_parallel_slab) {
 		kmem_cache_destroy(xe_sched_job_slab);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index bc63c0d3e33ab..84df4ce45e03a 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -100,8 +100,8 @@ struct fw_blobs_by_type {
 };
 
 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
-	fw_def(METEORLAKE,	mmp_ver(  i915,	guc,	mtl,	70, 6, 4))	\
-	fw_def(PVC,		mmp_ver(  xe,	guc,	pvc,	70, 6, 4))	\
+	fw_def(METEORLAKE,	mmp_ver(i915,	guc,	mtl,	70, 6, 4))	\
+	fw_def(PVC,		mmp_ver(xe,	guc,	pvc,	70, 6, 4))	\
 	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
 	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
 	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
-- 
GitLab


From fb1d55efdfcbfd8711f7b8db65267f370fa0e49b Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 11 Jul 2023 16:58:20 +0200
Subject: [PATCH 1718/2340] drm/xe: Cleanup OPEN_BRACE style issues

Remove almost all existing style issues of type OPEN_BRACE reported
by checkpatch.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 3db550c85e32c..ff40753875649 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -429,7 +429,8 @@ static void mcr_lock(struct xe_gt *gt)
 	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
 }
 
-static void mcr_unlock(struct xe_gt *gt) {
+static void mcr_unlock(struct xe_gt *gt)
+{
 	/* Release hardware semaphore */
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
 		xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
-- 
GitLab


From 4ab5901cc0ed8951ae58b01740d0037dbbca8558 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 11 Jul 2023 17:33:55 +0200
Subject: [PATCH 1719/2340] drm/xe: Cleanup POINTER_LOCATION style issues

Remove all existing style issues of type POINTER_LOCATION reported
by checkpatch.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h | 2 +-
 drivers/gpu/drm/xe/xe_lrc.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 5c827beaea284..a64828bc6ad24 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -132,7 +132,7 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe)
 	for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \
 		for_each_if((gt__) = xe_device_get_gt((xe__), (id__)))
 
-static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
+static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
 {
 	return &gt->mmio.fw;
 }
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 8f25a38f36a57..ddb1b1d6d00d8 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -441,7 +441,7 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 	}
 }
 
-static void set_context_control(u32 * regs, struct xe_hw_engine *hwe)
+static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 {
 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
 				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-- 
GitLab


From 763931d25c7f40226c5e5edd8dcf90f2f2dfcddf Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 11 Jul 2023 17:35:57 +0200
Subject: [PATCH 1720/2340] drm/xe: Cleanup CODE_INDENT style issues

Remove all existing style issues of type CODE_INDENT reported
by checkpatch.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c     |  4 ++--
 drivers/gpu/drm/xe/xe_guc_fwif.h   | 12 ++++++------
 drivers/gpu/drm/xe/xe_guc_submit.c | 14 +++++++-------
 drivers/gpu/drm/xe/xe_hw_engine.c  |  2 +-
 drivers/gpu/drm/xe/xe_uc.c         | 10 +++++-----
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index af75c9a0ea7bb..bd800eaa37a68 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -173,7 +173,7 @@ enum xe_engine_priority
 xe_engine_device_get_max_priority(struct xe_device *xe)
 {
 	return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH :
-			               XE_ENGINE_PRIORITY_NORMAL;
+				       XE_ENGINE_PRIORITY_NORMAL;
 }
 
 static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
@@ -540,7 +540,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 		return -EFAULT;
 
 	if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
-	       return -EINVAL;
+		return -EINVAL;
 
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
 		for_each_gt(gt, xe, id) {
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 27d132ce2087e..e215e8b2c17a7 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -64,19 +64,19 @@ struct guc_ctxt_registration_info {
 
 /* 32-bit KLV structure as used by policy updates and others */
 struct guc_klv_generic_dw_t {
-        u32 kl;
-        u32 value;
+	u32 kl;
+	u32 value;
 } __packed;
 
 /* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
 struct guc_update_engine_policy_header {
-        u32 action;
-        u32 guc_id;
+	u32 action;
+	u32 guc_id;
 } __packed;
 
 struct guc_update_engine_policy {
-        struct guc_update_engine_policy_header header;
-        struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
+	struct guc_update_engine_policy_header header;
+	struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
 } __packed;
 
 /* GUC_CTL_* - Parameters for loading the GuC */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 9c0fd1368b77a..0c07cd4ad204f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -353,17 +353,17 @@ static const int xe_engine_prio_to_guc[] = {
 
 static void init_policies(struct xe_guc *guc, struct xe_engine *e)
 {
-        struct engine_policy policy;
+	struct engine_policy policy;
 	enum xe_engine_priority prio = e->priority;
 	u32 timeslice_us = e->sched_props.timeslice_us;
 	u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
 
 	XE_BUG_ON(!engine_registered(e));
 
-        __guc_engine_policy_start_klv(&policy, e->guc->id);
-        __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
-        __guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
-        __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+	__guc_engine_policy_start_klv(&policy, e->guc->id);
+	__guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
+	__guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
+	__guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
 
 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 		       __guc_engine_policy_action_size(&policy), 0, 0);
@@ -373,8 +373,8 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
 {
 	struct engine_policy policy;
 
-        __guc_engine_policy_start_klv(&policy, e->guc->id);
-        __guc_engine_policy_add_preemption_timeout(&policy, 1);
+	__guc_engine_policy_start_klv(&policy, e->guc->id);
+	__guc_engine_policy_add_preemption_timeout(&policy, 1);
 
 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 		       __guc_engine_policy_action_size(&policy), 0, 0);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b7b02c96e9982..1af5cccd11426 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -312,7 +312,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 	/* TODO: missing handling of HAS_L3_CCS_READ platforms */
 	const u8 mocs_read_idx = gt->mocs.uc_index;
 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
-			        REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
+				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 	const struct xe_rtp_entry_sr engine_entries[] = {
 		/*
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 70eabf5671568..e244d27b55d5c 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -201,14 +201,14 @@ int xe_uc_start(struct xe_uc *uc)
 
 static void uc_reset_wait(struct xe_uc *uc)
 {
-       int ret;
+	int ret;
 
 again:
-       xe_guc_reset_wait(&uc->guc);
+	xe_guc_reset_wait(&uc->guc);
 
-       ret = xe_uc_reset_prepare(uc);
-       if (ret)
-               goto again;
+	ret = xe_uc_reset_prepare(uc);
+	if (ret)
+		goto again;
 }
 
 int xe_uc_suspend(struct xe_uc *uc)
-- 
GitLab


From 80c58bdf0ea28ccb2e78647d53524ef86486e3ec Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 13 Jul 2023 15:38:48 +0200
Subject: [PATCH 1721/2340] drm/xe: Cleanup TRAILING_WHITESPACE style issues

Remove all existing style issues of type TRAILING_WHITESPACE reported
by checkpatch.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5eaa9bed9d128..21087f7a4609f 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -482,7 +482,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
 			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
-			     /* 
+			     /*
 			      * Wa_14012342262 write-only reg, so skip
 			      * verification
 			      */
@@ -492,7 +492,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
 			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
-			     /* 
+			     /*
 			      * Wa_14012342262 write-only reg, so skip
 			      * verification
 			      */
-- 
GitLab


From f5b85ab62b0ae0e6b5817312eeb252effaea2453 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 13 Jul 2023 16:20:20 +0200
Subject: [PATCH 1722/2340] drm/xe: Cleanup COMPLEX_MACRO style issues

Remove some style issues of type COMPLEX_MACRO reported by checkpatch.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c  | 2 +-
 drivers/gpu/drm/xe/xe_guc_debugfs.c | 2 +-
 drivers/gpu/drm/xe/xe_huc_debugfs.c | 2 +-
 drivers/gpu/drm/xe/xe_lrc.c         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index b5a5538ae6306..d0092d714ffe8 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -171,7 +171,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	 * entry and drm_debugfs_create_files just references the drm_info_list
 	 * passed in (e.g. can't define this on the stack).
 	 */
-#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
 	local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL);
 	if (!local) {
 		XE_WARN_ON("Couldn't allocate memory");
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index 6b72db4d5bb23..0178b1a2d367f 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -87,7 +87,7 @@ void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
 	struct drm_info_list *local;
 	int i;
 
-#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
 	local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
 	if (!local) {
 		XE_WARN_ON("Couldn't allocate memory");
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
index ee3d8315036a0..ae3c21315d594 100644
--- a/drivers/gpu/drm/xe/xe_huc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -53,7 +53,7 @@ void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
 	struct drm_info_list *local;
 	int i;
 
-#define DEBUGFS_SIZE	ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
 	local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
 	if (!local) {
 		XE_WARN_ON("Couldn't allocate memory");
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index ddb1b1d6d00d8..d5f782f8d2a6c 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -483,7 +483,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
 
 #define LRC_SEQNO_PPHWSP_OFFSET 512
-#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8
+#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
 #define LRC_PPHWSP_SIZE SZ_4K
 
-- 
GitLab


From 5ce58303440b7efb21c554cb0b6614482aab8fe9 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 13 Jul 2023 16:50:35 +0200
Subject: [PATCH 1723/2340] drm/xe: Fix typos

Fix minor issues: remove extra ';' and s/Initialise/Initialize/.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 2 +-
 drivers/gpu/drm/xe/xe_vm.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 8245bbc587701..ce8b35dcbc519 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -200,7 +200,7 @@ static void guc_init_params(struct xe_guc *guc)
 }
 
 /*
- * Initialise the GuC parameter block before starting the firmware
+ * Initialize the GuC parameter block before starting the firmware
  * transfer. These parameters are read by the firmware on startup
  * and cannot be changed thereafter.
  */
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ee67b4fd7320c..2052f1edc1ea4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3361,7 +3361,7 @@ int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 	XE_BUG_ON(!ww);
 
 	tv_vm.num_shared = num_resv;
-	tv_vm.bo = xe_vm_ttm_bo(vm);;
+	tv_vm.bo = xe_vm_ttm_bo(vm);
 	list_add_tail(&tv_vm.head, &objs);
 
 	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
-- 
GitLab


From b8c1ba831e675005ff871cd4a4e04ff90326b4ae Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Mon, 17 Jul 2023 10:20:18 +0200
Subject: [PATCH 1724/2340] drm/xe: Prevent flooding the kernel log with
 XE_IOCTL_ERR

Lower log level of XE_IOCTL_ERR macro to debug in order to prevent flooding
kernel log.

v2: Rename XE_IOCTL_ERR to XE_IOCTL_DBG (Rodrigo Vivi)
v3: Rebase
v4: Fix style, remove unrelated change about __FILE__ and __LINE__

Link: https://lists.freedesktop.org/archives/intel-xe/2023-May/004704.html
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c              |  26 ++---
 drivers/gpu/drm/xe/xe_engine.c          | 114 +++++++++---------
 drivers/gpu/drm/xe/xe_exec.c            |  14 +--
 drivers/gpu/drm/xe/xe_macros.h          |   4 +-
 drivers/gpu/drm/xe/xe_mmio.c            |  14 +--
 drivers/gpu/drm/xe/xe_query.c           |  30 ++---
 drivers/gpu/drm/xe/xe_sync.c            |  32 +++---
 drivers/gpu/drm/xe/xe_vm.c              | 146 ++++++++++++------------
 drivers/gpu/drm/xe/xe_vm_madvise.c      |  40 +++----
 drivers/gpu/drm/xe/xe_wait_user_fence.c |  30 ++---
 10 files changed, 225 insertions(+), 225 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 00b8b5e7f197c..1031cb69219d3 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1724,35 +1724,35 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	u32 handle;
 	int err;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags &
+	if (XE_IOCTL_DBG(xe, args->flags &
 			 ~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
 			   XE_GEM_CREATE_FLAG_SCANOUT |
 			   xe->info.mem_region_mask)))
 		return -EINVAL;
 
 	/* at least one memory type must be specified */
-	if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask)))
+	if (XE_IOCTL_DBG(xe, !(args->flags & xe->info.mem_region_mask)))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->handle))
+	if (XE_IOCTL_DBG(xe, args->handle))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !args->size))
+	if (XE_IOCTL_DBG(xe, !args->size))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX))
+	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK))
+	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
 		return -EINVAL;
 
 	if (args->vm_id) {
 		vm = xe_vm_lookup(xef, args->vm_id);
-		if (XE_IOCTL_ERR(xe, !vm))
+		if (XE_IOCTL_DBG(xe, !vm))
 			return -ENOENT;
 		err = xe_vm_lock(vm, &ww, 0, true);
 		if (err) {
@@ -1795,15 +1795,15 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_gem_mmap_offset *args = data;
 	struct drm_gem_object *gem_obj;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags))
+	if (XE_IOCTL_DBG(xe, args->flags))
 		return -EINVAL;
 
 	gem_obj = drm_gem_object_lookup(file, args->handle);
-	if (XE_IOCTL_ERR(xe, !gem_obj))
+	if (XE_IOCTL_DBG(xe, !gem_obj))
 		return -ENOENT;
 
 	/* The mmap offset was set up at BO allocation time. */
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index bd800eaa37a68..c31e55c10a33e 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -179,10 +179,10 @@ xe_engine_device_get_max_priority(struct xe_device *xe)
 static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
 			       u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
+	if (XE_IOCTL_DBG(xe, value > XE_ENGINE_PRIORITY_HIGH))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, value > xe_engine_device_get_max_priority(xe)))
+	if (XE_IOCTL_DBG(xe, value > xe_engine_device_get_max_priority(xe)))
 		return -EPERM;
 
 	return e->ops->set_priority(e, value);
@@ -210,33 +210,33 @@ static int engine_set_preemption_timeout(struct xe_device *xe,
 static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
 				   u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, !create))
+	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+	if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM))
+	if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_VM))
 		return -EINVAL;
 
 	if (value) {
 		struct xe_vm *vm = e->vm;
 		int err;
 
-		if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm)))
+		if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm)))
 			return -EOPNOTSUPP;
 
-		if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm)))
+		if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm)))
 			return -EOPNOTSUPP;
 
-		if (XE_IOCTL_ERR(xe, e->width != 1))
+		if (XE_IOCTL_DBG(xe, e->width != 1))
 			return -EINVAL;
 
 		e->compute.context = dma_fence_context_alloc(1);
 		spin_lock_init(&e->compute.lock);
 
 		err = xe_vm_add_compute_engine(vm, e);
-		if (XE_IOCTL_ERR(xe, err))
+		if (XE_IOCTL_DBG(xe, err))
 			return err;
 
 		e->flags |= ENGINE_FLAG_COMPUTE_MODE;
@@ -249,10 +249,10 @@ static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
 static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
 				  u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, !create))
+	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+	if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
 		return -EINVAL;
 
 	if (value)
@@ -266,7 +266,7 @@ static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
 static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
 				  u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, !create))
+	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
 	if (!capable(CAP_SYS_NICE))
@@ -278,10 +278,10 @@ static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
 static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
 				  u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, !create))
+	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
 	e->usm.acc_trigger = value;
@@ -292,10 +292,10 @@ static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
 static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
 				 u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, !create))
+	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
 	e->usm.acc_notify = value;
@@ -306,10 +306,10 @@ static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
 static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
 				      u64 value, bool create)
 {
-	if (XE_IOCTL_ERR(xe, !create))
+	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
 	e->usm.acc_granularity = value;
@@ -344,12 +344,12 @@ static int engine_user_ext_set_property(struct xe_device *xe,
 	u32 idx;
 
 	err = __copy_from_user(&ext, address, sizeof(ext));
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, ext.property >=
+	if (XE_IOCTL_DBG(xe, ext.property >=
 			 ARRAY_SIZE(engine_set_property_funcs)) ||
-	    XE_IOCTL_ERR(xe, ext.pad))
+	    XE_IOCTL_DBG(xe, ext.pad))
 		return -EINVAL;
 
 	idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
@@ -374,22 +374,22 @@ static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
 	int err;
 	u32 idx;
 
-	if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
 		return -E2BIG;
 
 	err = __copy_from_user(&ext, address, sizeof(ext));
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, ext.pad) ||
-	    XE_IOCTL_ERR(xe, ext.name >=
+	if (XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.name >=
 			 ARRAY_SIZE(engine_user_extension_funcs)))
 		return -EINVAL;
 
 	idx = array_index_nospec(ext.name,
 				 ARRAY_SIZE(engine_user_extension_funcs));
 	err = engine_user_extension_funcs[idx](xe, e, extensions, create);
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return err;
 
 	if (ext.next_extension)
@@ -435,11 +435,11 @@ static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 	enum xe_hw_engine_id id;
 	u32 logical_mask = 0;
 
-	if (XE_IOCTL_ERR(xe, width != 1))
+	if (XE_IOCTL_DBG(xe, width != 1))
 		return 0;
-	if (XE_IOCTL_ERR(xe, num_placements != 1))
+	if (XE_IOCTL_DBG(xe, num_placements != 1))
 		return 0;
-	if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0))
+	if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
 		return 0;
 
 	eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
@@ -466,7 +466,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 	u16 gt_id;
 	u32 return_mask = 0, prev_mask;
 
-	if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) &&
+	if (XE_IOCTL_DBG(xe, !xe_device_guc_submission_enabled(xe) &&
 			 len > 1))
 		return 0;
 
@@ -479,14 +479,14 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 			n = j * width + i;
 
 			hwe = find_hw_engine(xe, eci[n]);
-			if (XE_IOCTL_ERR(xe, !hwe))
+			if (XE_IOCTL_DBG(xe, !hwe))
 				return 0;
 
-			if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe)))
+			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
 				return 0;
 
-			if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) ||
-			    XE_IOCTL_ERR(xe, n && eci[n].engine_class != class))
+			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
+			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
 				return 0;
 
 			class = eci[n].engine_class;
@@ -498,7 +498,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 		}
 
 		/* Parallel submissions must be logically contiguous */
-		if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1))
+		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
 			return 0;
 
 		prev_mask = current_mask;
@@ -525,21 +525,21 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 	u32 len;
 	int err;
 
-	if (XE_IOCTL_ERR(xe, args->flags) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->flags) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	len = args->width * args->num_placements;
-	if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
 		return -EINVAL;
 
 	err = __copy_from_user(eci, user_eci,
 			       sizeof(struct drm_xe_engine_class_instance) *
 			       len);
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
+	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.tile_count))
 		return -EINVAL;
 
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
@@ -553,11 +553,11 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			logical_mask = bind_engine_logical_mask(xe, gt, eci,
 								args->width,
 								args->num_placements);
-			if (XE_IOCTL_ERR(xe, !logical_mask))
+			if (XE_IOCTL_DBG(xe, !logical_mask))
 				return -EINVAL;
 
 			hwe = find_hw_engine(xe, eci[0]);
-			if (XE_IOCTL_ERR(xe, !hwe))
+			if (XE_IOCTL_DBG(xe, !hwe))
 				return -EINVAL;
 
 			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
@@ -586,15 +586,15 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 		logical_mask = calc_validate_logical_mask(xe, gt, eci,
 							  args->width,
 							  args->num_placements);
-		if (XE_IOCTL_ERR(xe, !logical_mask))
+		if (XE_IOCTL_DBG(xe, !logical_mask))
 			return -EINVAL;
 
 		hwe = find_hw_engine(xe, eci[0]);
-		if (XE_IOCTL_ERR(xe, !hwe))
+		if (XE_IOCTL_DBG(xe, !hwe))
 			return -EINVAL;
 
 		vm = xe_vm_lookup(xef, args->vm_id);
-		if (XE_IOCTL_ERR(xe, !vm))
+		if (XE_IOCTL_DBG(xe, !vm))
 			return -ENOENT;
 
 		err = down_read_interruptible(&vm->lock);
@@ -603,7 +603,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			return err;
 		}
 
-		if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
+		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
 			up_read(&vm->lock);
 			xe_vm_put(vm);
 			return -ENOENT;
@@ -621,11 +621,11 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 
 	if (args->extensions) {
 		err = engine_user_extensions(xe, e, args->extensions, 0, true);
-		if (XE_IOCTL_ERR(xe, err))
+		if (XE_IOCTL_DBG(xe, err))
 			goto put_engine;
 	}
 
-	if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
+	if (XE_IOCTL_DBG(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
 			 !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
 		err = -EOPNOTSUPP;
 		goto put_engine;
@@ -658,11 +658,11 @@ int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
 	struct xe_engine *e;
 	int ret;
 
-	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	e = xe_engine_lookup(xef, args->engine_id);
-	if (XE_IOCTL_ERR(xe, !e))
+	if (XE_IOCTL_DBG(xe, !e))
 		return -ENOENT;
 
 	switch (args->property) {
@@ -771,14 +771,14 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_engine_destroy *args = data;
 	struct xe_engine *e;
 
-	if (XE_IOCTL_ERR(xe, args->pad) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	mutex_lock(&xef->engine.lock);
 	e = xa_erase(&xef->engine.xa, args->engine_id);
 	mutex_unlock(&xef->engine.lock);
-	if (XE_IOCTL_ERR(xe, !e))
+	if (XE_IOCTL_DBG(xe, !e))
 		return -ENOENT;
 
 	if (!(e->flags & ENGINE_FLAG_PERSISTENT))
@@ -802,14 +802,14 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
 	int ret;
 	u32 idx;
 
-	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	e = xe_engine_lookup(xef, args->engine_id);
-	if (XE_IOCTL_ERR(xe, !e))
+	if (XE_IOCTL_DBG(xe, !e))
 		return -ENOENT;
 
-	if (XE_IOCTL_ERR(xe, args->property >=
+	if (XE_IOCTL_DBG(xe, args->property >=
 			 ARRAY_SIZE(engine_set_property_funcs))) {
 		ret = -EINVAL;
 		goto out;
@@ -818,7 +818,7 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
 	idx = array_index_nospec(args->property,
 				 ARRAY_SIZE(engine_set_property_funcs));
 	ret = engine_set_property_funcs[idx](xe, e, args->value, false);
-	if (XE_IOCTL_ERR(xe, ret))
+	if (XE_IOCTL_DBG(xe, ret))
 		goto out;
 
 	if (args->extensions)
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 07f4b2e8df16d..ff9fa02b53955 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -184,22 +184,22 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	bool write_locked;
 	int err = 0;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) ||
-	    XE_IOCTL_ERR(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	engine = xe_engine_lookup(xef, args->engine_id);
-	if (XE_IOCTL_ERR(xe, !engine))
+	if (XE_IOCTL_DBG(xe, !engine))
 		return -ENOENT;
 
-	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM))
+	if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_VM))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer))
+	if (XE_IOCTL_DBG(xe, engine->width != args->num_batch_buffer))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) {
+	if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_BANNED)) {
 		err = -ECANCELED;
 		goto err_engine;
 	}
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
index 0d24c124d2025..038cf28604add 100644
--- a/drivers/gpu/drm/xe/xe_macros.h
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -12,8 +12,8 @@
 #define XE_WARN_ON WARN_ON
 #define XE_BUG_ON BUG_ON
 
-#define XE_IOCTL_ERR(xe, cond) \
-	((cond) && (drm_info(&(xe)->drm, \
+#define XE_IOCTL_DBG(xe, cond) \
+	((cond) && (drm_dbg(&(xe)->drm, \
 			    "Ioctl argument check failed at %s:%d: %s", \
 			    __FILE__, __LINE__, #cond), 1))
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 779f0a18a815a..448b874c7a3c7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -447,14 +447,14 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	bool allowed;
 	int ret = 0;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS))
+	if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value))
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value))
 		return -EINVAL;
 
 	allowed = capable(CAP_SYS_ADMIN);
@@ -469,12 +469,12 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	if (XE_IOCTL_ERR(xe, !allowed))
+	if (XE_IOCTL_DBG(xe, !allowed))
 		return -EPERM;
 
 	bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
 	bytes = 1 << bits_flag;
-	if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size))
+	if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size))
 		return -EINVAL;
 
 	/*
@@ -488,7 +488,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & DRM_XE_MMIO_WRITE) {
 		switch (bits_flag) {
 		case DRM_XE_MMIO_32BIT:
-			if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) {
+			if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) {
 				ret = -EINVAL;
 				goto exit;
 			}
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 9acbb27dfcab9..4b7869596ba86 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -60,12 +60,12 @@ static int query_engines(struct xe_device *xe,
 	if (query->size == 0) {
 		query->size = size;
 		return 0;
-	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
 		return -EINVAL;
 	}
 
 	hw_engine_info = kmalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_ERR(xe, !hw_engine_info))
+	if (XE_IOCTL_DBG(xe, !hw_engine_info))
 		return -ENOMEM;
 
 	for_each_gt(gt, xe, gt_id)
@@ -114,12 +114,12 @@ static int query_memory_usage(struct xe_device *xe,
 	if (query->size == 0) {
 		query->size = size;
 		return 0;
-	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
 		return -EINVAL;
 	}
 
 	usage = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_ERR(xe, !usage))
+	if (XE_IOCTL_DBG(xe, !usage))
 		return -ENOMEM;
 
 	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
@@ -177,12 +177,12 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	if (query->size == 0) {
 		query->size = size;
 		return 0;
-	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
 		return -EINVAL;
 	}
 
 	config = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_ERR(xe, !config))
+	if (XE_IOCTL_DBG(xe, !config))
 		return -ENOMEM;
 
 	config->num_params = num_params;
@@ -226,12 +226,12 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 	if (query->size == 0) {
 		query->size = size;
 		return 0;
-	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
 		return -EINVAL;
 	}
 
 	gts = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_ERR(xe, !gts))
+	if (XE_IOCTL_DBG(xe, !gts))
 		return -ENOMEM;
 
 	gts->num_gt = xe->info.gt_count;
@@ -273,12 +273,12 @@ static int query_hwconfig(struct xe_device *xe,
 	if (query->size == 0) {
 		query->size = size;
 		return 0;
-	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
 		return -EINVAL;
 	}
 
 	hwconfig = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_ERR(xe, !hwconfig))
+	if (XE_IOCTL_DBG(xe, !hwconfig))
 		return -ENOMEM;
 
 	xe_device_mem_access_get(xe);
@@ -332,7 +332,7 @@ static int query_gt_topology(struct xe_device *xe,
 	if (query->size == 0) {
 		query->size = size;
 		return 0;
-	} else if (XE_IOCTL_ERR(xe, query->size != size)) {
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
 		return -EINVAL;
 	}
 
@@ -380,15 +380,15 @@ int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_xe_device_query *query = data;
 	u32 idx;
 
-	if (XE_IOCTL_ERR(xe, query->extensions) ||
-	    XE_IOCTL_ERR(xe, query->reserved[0] || query->reserved[1]))
+	if (XE_IOCTL_DBG(xe, query->extensions) ||
+	    XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
+	if (XE_IOCTL_DBG(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
 		return -EINVAL;
 
 	idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
-	if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx]))
+	if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx]))
 		return -EINVAL;
 
 	return xe_query_funcs[idx](xe, query);
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 7786b908a3fda..9fcd7802ba30b 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -110,44 +110,44 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, sync_in.flags &
+	if (XE_IOCTL_DBG(xe, sync_in.flags &
 			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)) ||
-	    XE_IOCTL_ERR(xe, sync_in.pad) ||
-	    XE_IOCTL_ERR(xe, sync_in.reserved[0] || sync_in.reserved[1]))
+	    XE_IOCTL_DBG(xe, sync_in.pad) ||
+	    XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1]))
 		return -EINVAL;
 
 	signal = sync_in.flags & DRM_XE_SYNC_SIGNAL;
 	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
 	case DRM_XE_SYNC_SYNCOBJ:
-		if (XE_IOCTL_ERR(xe, no_dma_fences && signal))
+		if (XE_IOCTL_DBG(xe, no_dma_fences && signal))
 			return -EOPNOTSUPP;
 
-		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
 			return -EINVAL;
 
 		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
-		if (XE_IOCTL_ERR(xe, !sync->syncobj))
+		if (XE_IOCTL_DBG(xe, !sync->syncobj))
 			return -ENOENT;
 
 		if (!signal) {
 			sync->fence = drm_syncobj_fence_get(sync->syncobj);
-			if (XE_IOCTL_ERR(xe, !sync->fence))
+			if (XE_IOCTL_DBG(xe, !sync->fence))
 				return -EINVAL;
 		}
 		break;
 
 	case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
-		if (XE_IOCTL_ERR(xe, no_dma_fences && signal))
+		if (XE_IOCTL_DBG(xe, no_dma_fences && signal))
 			return -EOPNOTSUPP;
 
-		if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
 			return -EINVAL;
 
-		if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0))
+		if (XE_IOCTL_DBG(xe, sync_in.timeline_value == 0))
 			return -EINVAL;
 
 		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
-		if (XE_IOCTL_ERR(xe, !sync->syncobj))
+		if (XE_IOCTL_DBG(xe, !sync->syncobj))
 			return -ENOENT;
 
 		if (signal) {
@@ -156,7 +156,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 				return -ENOMEM;
 		} else {
 			sync->fence = drm_syncobj_fence_get(sync->syncobj);
-			if (XE_IOCTL_ERR(xe, !sync->fence))
+			if (XE_IOCTL_DBG(xe, !sync->fence))
 				return -EINVAL;
 
 			err = dma_fence_chain_find_seqno(&sync->fence,
@@ -167,15 +167,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		break;
 
 	case DRM_XE_SYNC_DMA_BUF:
-		if (XE_IOCTL_ERR(xe, "TODO"))
+		if (XE_IOCTL_DBG(xe, "TODO"))
 			return -EINVAL;
 		break;
 
 	case DRM_XE_SYNC_USER_FENCE:
-		if (XE_IOCTL_ERR(xe, !signal))
+		if (XE_IOCTL_DBG(xe, !signal))
 			return -EOPNOTSUPP;
 
-		if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7))
+		if (XE_IOCTL_DBG(xe, sync_in.addr & 0x7))
 			return -EINVAL;
 
 		if (exec) {
@@ -183,7 +183,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		} else {
 			sync->ufence = user_fence_create(xe, sync_in.addr,
 							 sync_in.timeline_value);
-			if (XE_IOCTL_ERR(xe, !sync->ufence))
+			if (XE_IOCTL_DBG(xe, !sync->ufence))
 				return -ENOMEM;
 		}
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2052f1edc1ea4..7f2f17c3b86e6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1875,13 +1875,13 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
 					u64 value)
 {
-	if (XE_IOCTL_ERR(xe, !value))
+	if (XE_IOCTL_DBG(xe, !value))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+	if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
 		return -EOPNOTSUPP;
 
-	if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
+	if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr))
 		return -EOPNOTSUPP;
 
 	vm->async_ops.error_capture.mm = current->mm;
@@ -1907,13 +1907,13 @@ static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
 	int err;
 
 	err = __copy_from_user(&ext, address, sizeof(ext));
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, ext.property >=
+	if (XE_IOCTL_DBG(xe, ext.property >=
 			 ARRAY_SIZE(vm_set_property_funcs)) ||
-	    XE_IOCTL_ERR(xe, ext.pad) ||
-	    XE_IOCTL_ERR(xe, ext.reserved[0] || ext.reserved[1]))
+	    XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
 		return -EINVAL;
 
 	return vm_set_property_funcs[ext.property](xe, vm, ext.value);
@@ -1934,20 +1934,20 @@ static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
 	struct xe_user_extension ext;
 	int err;
 
-	if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
 		return -E2BIG;
 
 	err = __copy_from_user(&ext, address, sizeof(ext));
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_ERR(xe, ext.pad) ||
-	    XE_IOCTL_ERR(xe, ext.name >=
+	if (XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.name >=
 			 ARRAY_SIZE(vm_user_extension_funcs)))
 		return -EINVAL;
 
 	err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		return err;
 
 	if (ext.next_extension)
@@ -1973,29 +1973,29 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	int err;
 	u32 flags = 0;
 
-	if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
 			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
 			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
 			 xe_device_in_non_fault_mode(xe)))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
 			 xe_device_in_fault_mode(xe)))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
 			 !xe->info.supports_usm))
 		return -EINVAL;
 
@@ -2014,7 +2014,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 
 	if (args->extensions) {
 		err = vm_user_extensions(xe, vm, args->extensions, 0);
-		if (XE_IOCTL_ERR(xe, err)) {
+		if (XE_IOCTL_DBG(xe, err)) {
 			xe_vm_close_and_put(vm);
 			return err;
 		}
@@ -2060,15 +2060,15 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
 	struct xe_vm *vm;
 	int err = 0;
 
-	if (XE_IOCTL_ERR(xe, args->pad) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
 	mutex_lock(&xef->vm.lock);
 	vm = xa_load(&xef->vm.xa, args->vm_id);
-	if (XE_IOCTL_ERR(xe, !vm))
+	if (XE_IOCTL_DBG(xe, !vm))
 		err = -ENOENT;
-	else if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
+	else if (XE_IOCTL_DBG(xe, vm->preempt.num_engines))
 		err = -EBUSY;
 	else
 		xa_erase(&xef->vm.xa, args->vm_id);
@@ -2156,21 +2156,21 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
 	case XE_VM_BIND_OP_MAP:
 	case XE_VM_BIND_OP_MAP_USERPTR:
 		vma = xe_vm_find_overlapping_vma(vm, addr, range);
-		if (XE_IOCTL_ERR(xe, vma && !async))
+		if (XE_IOCTL_DBG(xe, vma && !async))
 			return -EBUSY;
 		break;
 	case XE_VM_BIND_OP_UNMAP:
 	case XE_VM_BIND_OP_PREFETCH:
 		vma = xe_vm_find_overlapping_vma(vm, addr, range);
-		if (XE_IOCTL_ERR(xe, !vma))
+		if (XE_IOCTL_DBG(xe, !vma))
 			return -ENODATA;	/* Not an actual error, IOCTL
 						   cleans up returns and 0 */
-		if (XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr ||
-				 xe_vma_end(vma) != addr + range) && !async))
+		if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
+				      xe_vma_end(vma) != addr + range) && !async))
 			return -EINVAL;
 		break;
 	case XE_VM_BIND_OP_UNMAP_ALL:
-		if (XE_IOCTL_ERR(xe, list_empty(&bo->ttm.base.gpuva.list)))
+		if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
 			return -ENODATA;	/* Not an actual error, IOCTL
 						   cleans up returns and 0 */
 		break;
@@ -3007,9 +3007,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 	int err;
 	int i;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) ||
-	    XE_IOCTL_ERR(xe, !args->num_binds) ||
-	    XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, !args->num_binds) ||
+	    XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
 		return -EINVAL;
 
 	if (args->num_binds > 1) {
@@ -3024,7 +3024,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		err = __copy_from_user(*bind_ops, bind_user,
 				       sizeof(struct drm_xe_vm_bind_op) *
 				       args->num_binds);
-		if (XE_IOCTL_ERR(xe, err)) {
+		if (XE_IOCTL_DBG(xe, err)) {
 			err = -EFAULT;
 			goto free_bind_ops;
 		}
@@ -3043,60 +3043,60 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 
 		if (i == 0) {
 			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
-		} else if (XE_IOCTL_ERR(xe, !*async) ||
-			   XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
-			   XE_IOCTL_ERR(xe, VM_BIND_OP(op) ==
+		} else if (XE_IOCTL_DBG(xe, !*async) ||
+			   XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
+			   XE_IOCTL_DBG(xe, VM_BIND_OP(op) ==
 					XE_VM_BIND_OP_RESTART)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
-		if (XE_IOCTL_ERR(xe, !*async &&
+		if (XE_IOCTL_DBG(xe, !*async &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
-		if (XE_IOCTL_ERR(xe, !*async &&
+		if (XE_IOCTL_DBG(xe, !*async &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
-		if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
+		if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) >
 				 XE_VM_BIND_OP_PREFETCH) ||
-		    XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
-		    XE_IOCTL_ERR(xe, obj && is_null) ||
-		    XE_IOCTL_ERR(xe, obj_offset && is_null) ||
-		    XE_IOCTL_ERR(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
+		    XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_DBG(xe, obj && is_null) ||
+		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
+		    XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
 				 is_null) ||
-		    XE_IOCTL_ERR(xe, !obj &&
+		    XE_IOCTL_DBG(xe, !obj &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP &&
 				 !is_null) ||
-		    XE_IOCTL_ERR(xe, !obj &&
+		    XE_IOCTL_DBG(xe, !obj &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
-		    XE_IOCTL_ERR(xe, addr &&
+		    XE_IOCTL_DBG(xe, addr &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
-		    XE_IOCTL_ERR(xe, range &&
+		    XE_IOCTL_DBG(xe, range &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
-		    XE_IOCTL_ERR(xe, obj &&
+		    XE_IOCTL_DBG(xe, obj &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
-		    XE_IOCTL_ERR(xe, obj &&
+		    XE_IOCTL_DBG(xe, obj &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
-		    XE_IOCTL_ERR(xe, region &&
+		    XE_IOCTL_DBG(xe, region &&
 				 VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
-		    XE_IOCTL_ERR(xe, !(BIT(region) &
+		    XE_IOCTL_DBG(xe, !(BIT(region) &
 				       xe->info.mem_region_mask)) ||
-		    XE_IOCTL_ERR(xe, obj &&
+		    XE_IOCTL_DBG(xe, obj &&
 				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
-		if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) ||
-		    XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) ||
-		    XE_IOCTL_ERR(xe, range & ~PAGE_MASK) ||
-		    XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) !=
+		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, !range && VM_BIND_OP(op) !=
 				 XE_VM_BIND_OP_RESTART &&
 				 VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
 			err = -EINVAL;
@@ -3136,19 +3136,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	if (args->engine_id) {
 		e = xe_engine_lookup(xef, args->engine_id);
-		if (XE_IOCTL_ERR(xe, !e)) {
+		if (XE_IOCTL_DBG(xe, !e)) {
 			err = -ENOENT;
 			goto free_objs;
 		}
 
-		if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
+		if (XE_IOCTL_DBG(xe, !(e->flags & ENGINE_FLAG_VM))) {
 			err = -EINVAL;
 			goto put_engine;
 		}
 	}
 
 	vm = xe_vm_lookup(xef, args->vm_id);
-	if (XE_IOCTL_ERR(xe, !vm)) {
+	if (XE_IOCTL_DBG(xe, !vm)) {
 		err = -EINVAL;
 		goto put_engine;
 	}
@@ -3157,17 +3157,17 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (err)
 		goto put_vm;
 
-	if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
+	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
 		err = -ENOENT;
 		goto release_vm_lock;
 	}
 
 	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
-		if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+		if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
 			err = -EOPNOTSUPP;
-		if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
+		if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
 			err = EINVAL;
-		if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
+		if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error))
 			err = -EPROTO;
 
 		if (!err) {
@@ -3184,7 +3184,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto release_vm_lock;
 	}
 
-	if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
+	if (XE_IOCTL_DBG(xe, !vm->async_ops.error &&
 			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
 		err = -EOPNOTSUPP;
 		goto release_vm_lock;
@@ -3194,8 +3194,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
 
-		if (XE_IOCTL_ERR(xe, range > vm->size) ||
-		    XE_IOCTL_ERR(xe, addr > vm->size - range)) {
+		if (XE_IOCTL_DBG(xe, range > vm->size) ||
+		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
 			err = -EINVAL;
 			goto release_vm_lock;
 		}
@@ -3203,7 +3203,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		if (bind_ops[i].tile_mask) {
 			u64 valid_tiles = BIT(xe->info.tile_count) - 1;
 
-			if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask &
+			if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask &
 					 ~valid_tiles)) {
 				err = -EINVAL;
 				goto release_vm_lock;
@@ -3234,24 +3234,24 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			continue;
 
 		gem_obj = drm_gem_object_lookup(file, obj);
-		if (XE_IOCTL_ERR(xe, !gem_obj)) {
+		if (XE_IOCTL_DBG(xe, !gem_obj)) {
 			err = -ENOENT;
 			goto put_obj;
 		}
 		bos[i] = gem_to_xe_bo(gem_obj);
 
-		if (XE_IOCTL_ERR(xe, range > bos[i]->size) ||
-		    XE_IOCTL_ERR(xe, obj_offset >
+		if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
+		    XE_IOCTL_DBG(xe, obj_offset >
 				 bos[i]->size - range)) {
 			err = -EINVAL;
 			goto put_obj;
 		}
 
 		if (bos[i]->flags & XE_BO_INTERNAL_64K) {
-			if (XE_IOCTL_ERR(xe, obj_offset &
+			if (XE_IOCTL_DBG(xe, obj_offset &
 					 XE_64K_PAGE_MASK) ||
-			    XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) ||
-			    XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) {
+			    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
 				err = -EINVAL;
 				goto put_obj;
 			}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 5b775f7422337..9abcd742c8339 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -19,10 +19,10 @@ static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
 {
 	int i, err;
 
-	if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM))
+	if (XE_IOCTL_DBG(xe, value > XE_MEM_REGION_CLASS_VRAM))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM &&
+	if (XE_IOCTL_DBG(xe, value == XE_MEM_REGION_CLASS_VRAM &&
 			 !xe->info.is_dgfx))
 		return -EINVAL;
 
@@ -48,7 +48,7 @@ static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
 {
 	int i, err;
 
-	if (XE_IOCTL_ERR(xe, value > xe->info.tile_count))
+	if (XE_IOCTL_DBG(xe, value > xe->info.tile_count))
 		return -EINVAL;
 
 	for (i = 0; i < num_vmas; ++i) {
@@ -77,14 +77,14 @@ static int madvise_preferred_mem_class_gt(struct xe_device *xe,
 	u32 gt_id = upper_32_bits(value);
 	u32 mem_class = lower_32_bits(value);
 
-	if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM))
+	if (XE_IOCTL_DBG(xe, mem_class > XE_MEM_REGION_CLASS_VRAM))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM &&
+	if (XE_IOCTL_DBG(xe, mem_class == XE_MEM_REGION_CLASS_VRAM &&
 			 !xe->info.is_dgfx))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count))
+	if (XE_IOCTL_DBG(xe, gt_id > xe->info.tile_count))
 		return -EINVAL;
 
 	for (i = 0; i < num_vmas; ++i) {
@@ -115,7 +115,7 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
 		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
-		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
+		if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
 			return -EINVAL;
 
 		err = xe_bo_lock(bo, &ww, 0, true);
@@ -146,7 +146,7 @@ static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
 		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
-		if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
+		if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
 				 !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
 			return -EINVAL;
 
@@ -165,10 +165,10 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
 {
 	int i, err;
 
-	if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH))
+	if (XE_IOCTL_DBG(xe, value > DRM_XE_VMA_PRIORITY_HIGH))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH &&
+	if (XE_IOCTL_DBG(xe, value == DRM_XE_VMA_PRIORITY_HIGH &&
 			 !capable(CAP_SYS_NICE)))
 		return -EPERM;
 
@@ -255,40 +255,40 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
 	struct xe_vma **vmas = NULL;
 	int num_vmas = 0, err = 0, idx;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) ||
-	    XE_IOCTL_ERR(xe, args->pad || args->pad2) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs)))
+	if (XE_IOCTL_DBG(xe, args->property > ARRAY_SIZE(madvise_funcs)))
 		return -EINVAL;
 
 	vm = xe_vm_lookup(xef, args->vm_id);
-	if (XE_IOCTL_ERR(xe, !vm))
+	if (XE_IOCTL_DBG(xe, !vm))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) {
+	if (XE_IOCTL_DBG(xe, !xe_vm_in_fault_mode(vm))) {
 		err = -EINVAL;
 		goto put_vm;
 	}
 
 	down_read(&vm->lock);
 
-	if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) {
+	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
 		err = -ENOENT;
 		goto unlock_vm;
 	}
 
 	vmas = get_vmas(vm, &num_vmas, args->addr, args->range);
-	if (XE_IOCTL_ERR(xe, err))
+	if (XE_IOCTL_DBG(xe, err))
 		goto unlock_vm;
 
-	if (XE_IOCTL_ERR(xe, !vmas)) {
+	if (XE_IOCTL_DBG(xe, !vmas)) {
 		err = -ENOMEM;
 		goto unlock_vm;
 	}
 
-	if (XE_IOCTL_ERR(xe, !num_vmas)) {
+	if (XE_IOCTL_DBG(xe, !num_vmas)) {
 		err = -EINVAL;
 		goto unlock_vm;
 	}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index c4420c0dbf9c2..c4202df1d4f03 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -117,51 +117,51 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	unsigned long timeout;
 	ktime_t start;
 
-	if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) ||
-	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
+	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS))
+	if (XE_IOCTL_DBG(xe, args->flags & ~VALID_FLAGS))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->op > MAX_OP))
+	if (XE_IOCTL_DBG(xe, args->op > MAX_OP))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, no_engines &&
+	if (XE_IOCTL_DBG(xe, no_engines &&
 			 (args->num_engines || args->instances)))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines))
+	if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
 			 addr & 0x7))
 		return -EINVAL;
 
-	if (XE_IOCTL_ERR(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
+	if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
 		return -EINVAL;
 
 	if (!no_engines) {
 		err = copy_from_user(eci, user_eci,
 				     sizeof(struct drm_xe_engine_class_instance) *
 			     args->num_engines);
-		if (XE_IOCTL_ERR(xe, err))
+		if (XE_IOCTL_DBG(xe, err))
 			return -EFAULT;
 
-		if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci,
+		if (XE_IOCTL_DBG(xe, check_hw_engines(xe, eci,
 						      args->num_engines)))
 			return -EINVAL;
 	}
 
 	if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) {
-		if (XE_IOCTL_ERR(xe, args->vm_id >> 32))
+		if (XE_IOCTL_DBG(xe, args->vm_id >> 32))
 			return -EINVAL;
 
 		vm = xe_vm_lookup(to_xe_file(file), args->vm_id);
-		if (XE_IOCTL_ERR(xe, !vm))
+		if (XE_IOCTL_DBG(xe, !vm))
 			return -ENOENT;
 
-		if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) {
+		if (XE_IOCTL_DBG(xe, !vm->async_ops.error_capture.addr)) {
 			xe_vm_put(vm);
 			return -EOPNOTSUPP;
 		}
@@ -226,9 +226,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			args->timeout = 0;
 	}
 
-	if (XE_IOCTL_ERR(xe, err < 0))
+	if (XE_IOCTL_DBG(xe, err < 0))
 		return err;
-	else if (XE_IOCTL_ERR(xe, !timeout))
+	else if (XE_IOCTL_DBG(xe, !timeout))
 		return -ETIME;
 
 	return 0;
-- 
GitLab


From 3e8e7ee6a375217c4f6a9a96d50e3ae711832d37 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Mon, 17 Jul 2023 16:53:55 +0200
Subject: [PATCH 1725/2340] drm/xe: Cleanup style warnings

Reduce the number of warnings reported by checkpatch.pl from 118 to 48 by
addressing those warnings types:

  LEADING_SPACE
  LINE_SPACING
  BRACES
  TRAILING_SEMICOLON
  CONSTANT_COMPARISON
  BLOCK_COMMENT_STYLE
  RETURN_VOID
  ONE_SEMICOLON
  SUSPECT_CODE_INDENT
  LINE_CONTINUATIONS
  UNNECESSARY_ELSE
  UNSPECIFIED_INT
  UNNECESSARY_INT
  MISORDERED_TYPE

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c            |  2 +-
 drivers/gpu/drm/xe/xe_exec.c          |  1 +
 drivers/gpu/drm/xe/xe_execlist.h      |  2 +-
 drivers/gpu/drm/xe/xe_gt.h            |  2 +-
 drivers/gpu/drm/xe/xe_guc.c           |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c       |  2 +-
 drivers/gpu/drm/xe/xe_guc_ct.c        |  3 +-
 drivers/gpu/drm/xe/xe_guc_fwif.h      | 12 ++--
 drivers/gpu/drm/xe/xe_guc_submit.c    |  2 +-
 drivers/gpu/drm/xe/xe_huc.c           |  1 +
 drivers/gpu/drm/xe/xe_irq.c           | 11 ++--
 drivers/gpu/drm/xe/xe_lrc.c           | 80 +++++++++++++--------------
 drivers/gpu/drm/xe/xe_migrate.c       |  3 +-
 drivers/gpu/drm/xe/xe_pcode.c         |  1 +
 drivers/gpu/drm/xe/xe_reg_whitelist.c |  2 +-
 drivers/gpu/drm/xe/xe_res_cursor.h    |  1 -
 drivers/gpu/drm/xe/xe_sa.c            |  2 +-
 drivers/gpu/drm/xe/xe_uc_fw.c         |  4 +-
 drivers/gpu/drm/xe/xe_vm.c            |  8 +--
 drivers/gpu/drm/xe/xe_vm_doc.h        |  4 +-
 20 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 1031cb69219d3..9ad5cf3e24633 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1720,7 +1720,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	struct ww_acquire_ctx ww;
 	struct xe_vm *vm = NULL;
 	struct xe_bo *bo;
-	unsigned bo_flags = XE_BO_CREATE_USER_BIT;
+	unsigned int bo_flags = XE_BO_CREATE_USER_BIT;
 	u32 handle;
 	int err;
 
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index ff9fa02b53955..0209f325dda0e 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -243,6 +243,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	    vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
 		for (i = 0; i < args->num_syncs; i++) {
 			struct dma_fence *fence = syncs[i].fence;
+
 			if (fence) {
 				err = xe_vm_async_fence_wait_start(fence);
 				if (err)
diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h
index 6a0442a6eff62..26f600ac85522 100644
--- a/drivers/gpu/drm/xe/xe_execlist.h
+++ b/drivers/gpu/drm/xe/xe_execlist.h
@@ -11,7 +11,7 @@
 struct xe_device;
 struct xe_gt;
 
-#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock);
+#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock)
 
 int xe_execlist_init(struct xe_gt *gt);
 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index a523d7941afe8..7298653a73de3 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -13,7 +13,7 @@
 
 #define for_each_hw_engine(hwe__, gt__, id__) \
 	for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
-	     for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
+		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
 			  xe_hw_engine_is_valid((hwe__)))
 
 struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index ce8b35dcbc519..d44537abf7daa 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -186,7 +186,7 @@ static void guc_init_params(struct xe_guc *guc)
 	int i;
 
 	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
-	BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2);
+	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
 
 	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
 	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index dd69d097b920d..d4c3a5ce32525 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -444,7 +444,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
 	struct xe_reg_sr_entry *entry;
 	unsigned long idx;
-	unsigned count = 0;
+	unsigned int count = 0;
 	const struct {
 		struct xe_reg reg;
 		bool skip;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 9fb5fd4391d2c..c7992a8667e50 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -716,9 +716,8 @@ retry_same_fence:
 		ptr = xa_store(&ct->fence_lookup,
 			       g2h_fence.seqno,
 			       &g2h_fence, GFP_KERNEL);
-		if (IS_ERR(ptr)) {
+		if (IS_ERR(ptr))
 			return PTR_ERR(ptr);
-		}
 
 		goto retry_same_fence;
 	} else if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index e215e8b2c17a7..7515d7fbb723d 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -140,16 +140,20 @@ struct guc_update_engine_policy {
 
 struct guc_policies {
 	u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
-	/* In micro seconds. How much time to allow before DPC processing is
+	/*
+	 * In micro seconds. How much time to allow before DPC processing is
 	 * called back via interrupt (to prevent DPC queue drain starving).
-	 * Typically 1000s of micro seconds (example only, not granularity). */
+	 * Typically 1000s of micro seconds (example only, not granularity).
+	 */
 	u32 dpc_promote_time;
 
 	/* Must be set to take these new values. */
 	u32 is_valid;
 
-	/* Max number of WIs to process per call. A large value may keep CS
-	 * idle. */
+	/*
+	 * Max number of WIs to process per call. A large value may keep CS
+	 * idle.
+	 */
 	u32 max_num_work_items;
 
 	u32 global_flags;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 0c07cd4ad204f..99c9b7139195e 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -330,7 +330,7 @@ static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
 					   u32 data) \
 { \
 	XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
- \
+\
 	policy->h2g.klv[policy->count].kl = \
 		FIELD_PREP(GUC_KLV_0_KEY, \
 			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index e0377083d1f2d..373a65c77946b 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -68,6 +68,7 @@ int xe_huc_auth(struct xe_huc *huc)
 	struct xe_gt *gt = huc_to_gt(huc);
 	struct xe_guc *guc = huc_to_guc(huc);
 	int ret;
+
 	if (xe_uc_fw_is_disabled(&huc->fw))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index d92f03870e597..ca63532433269 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -250,7 +250,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
 }
 
 static void gt_irq_handler(struct xe_tile *tile,
-			   u32 master_ctl, long unsigned int *intr_dw,
+			   u32 master_ctl, unsigned long *intr_dw,
 			   u32 *identity)
 {
 	struct xe_device *xe = tile_to_xe(tile);
@@ -305,7 +305,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 	struct xe_device *xe = arg;
 	struct xe_tile *tile = xe_device_get_root_tile(xe);
 	u32 master_ctl, gu_misc_iir;
-	long unsigned int intr_dw[2];
+	unsigned long intr_dw[2];
 	u32 identity[32];
 
 	master_ctl = xelp_intr_disable(xe);
@@ -360,7 +360,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 	struct xe_device *xe = arg;
 	struct xe_tile *tile;
 	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0;
-	long unsigned int intr_dw[2];
+	unsigned long intr_dw[2];
 	u32 identity[32];
 	u8 id;
 
@@ -502,11 +502,10 @@ static void xe_irq_postinstall(struct xe_device *xe)
 
 static irq_handler_t xe_irq_handler(struct xe_device *xe)
 {
-	if (GRAPHICS_VERx100(xe) >= 1210) {
+	if (GRAPHICS_VERx100(xe) >= 1210)
 		return dg1_irq_handler;
-	} else {
+	else
 		return xelp_irq_handler;
-	}
 }
 
 static void irq_uninstall(struct drm_device *drm, void *arg)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index d5f782f8d2a6c..b726599f62284 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -374,46 +374,46 @@ static const u8 dg2_rcs_offsets[] = {
 };
 
 static const u8 mtl_rcs_offsets[] = {
-       NOP(1),
-       LRI(15, POSTED),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-       REG(0x180),
-       REG16(0x2b4),
-       REG(0x120),
-       REG(0x124),
-
-       NOP(1),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       NOP(2),
-       LRI(2, POSTED),
-       REG16(0x5a8),
-       REG16(0x5ac),
-
-       NOP(6),
-       LRI(1, 0),
-       REG(0x0c8),
-
-       END
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(2),
+	LRI(2, POSTED),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
 };
 
 #undef END
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index f05335b16a1a8..0c233380d4f25 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -511,7 +511,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 #define EMIT_COPY_DW 10
 static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 		      u64 src_ofs, u64 dst_ofs, unsigned int size,
-		      unsigned pitch)
+		      unsigned int pitch)
 {
 	XE_BUG_ON(size / pitch > S16_MAX);
 	XE_BUG_ON(pitch / 4 > S16_MAX);
@@ -1012,6 +1012,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 
 	do {
 		u64 addr = ppgtt_ofs + ofs * 8;
+
 		chunk = min(update->qwords, 0x1ffU);
 
 		/* Ensure populatefn can do memset64 by aligning bb->cs */
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 7ab70a83f88d7..e3ab1d3a367fe 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -58,6 +58,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
 			    bool atomic)
 {
 	int err;
+
 	lockdep_assert_held(&gt->pcode.lock);
 
 	if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 70892f1347189..ea6dd7d71b591 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -82,7 +82,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
 {
 	u32 val = entry->set_bits;
 	const char *access_str = "(invalid)";
-	unsigned range_bit = 2;
+	unsigned int range_bit = 2;
 	u32 range_start, range_end;
 	bool deny;
 
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index f2ba609712d36..2a6fdd284395b 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -130,7 +130,6 @@ fallback:
 	cur->node = NULL;
 	cur->mem_type = XE_PL_TT;
 	XE_WARN_ON(res && start + size > res->size);
-	return;
 }
 
 static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index fee71080bd315..2c4632259edd0 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -81,7 +81,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
 }
 
 struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
-				  unsigned size)
+				  unsigned int size)
 {
 	return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 84df4ce45e03a..75f7a4cf6cbe8 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -147,9 +147,9 @@ struct fw_blobs_by_type {
 		entry__,							\
 	},
 
-XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,					\
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
 		     fw_filename_mmp_ver, fw_filename_major_ver)
-XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,					\
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
 		     fw_filename_mmp_ver, fw_filename_no_ver)
 
 static struct xe_gt *
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7f2f17c3b86e6..2b9a7618b169c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2163,16 +2163,16 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
 	case XE_VM_BIND_OP_PREFETCH:
 		vma = xe_vm_find_overlapping_vma(vm, addr, range);
 		if (XE_IOCTL_DBG(xe, !vma))
-			return -ENODATA;	/* Not an actual error, IOCTL
-						   cleans up returns and 0 */
+			/* Not an actual error, IOCTL cleans up returns and 0 */
+			return -ENODATA;
 		if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
 				      xe_vma_end(vma) != addr + range) && !async))
 			return -EINVAL;
 		break;
 	case XE_VM_BIND_OP_UNMAP_ALL:
 		if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
-			return -ENODATA;	/* Not an actual error, IOCTL
-						   cleans up returns and 0 */
+			/* Not an actual error, IOCTL cleans up returns and 0 */
+			return -ENODATA;
 		break;
 	default:
 		XE_BUG_ON("NOT POSSIBLE");
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index 5b6216964c45d..b1b2dc4a6089c 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -428,8 +428,8 @@
  * the list of userptrs mapped in the VM, the list of engines using this VM, and
  * the array of external BOs mapped in the VM. When adding or removing any of the
  * aforemented state from the VM should acquire this lock in write mode. The VM
- * bind path also acquires this lock in write while while the exec / compute
- * mode rebind worker acquire this lock in read mode.
+ * bind path also acquires this lock in write while the exec / compute mode
+ * rebind worker acquire this lock in read mode.
  *
  * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
  * slots which is shared with any private BO in the VM. Expected to be acquired
-- 
GitLab


From 1737785ae5313e4941181025858fc90ed4acd314 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Mon, 17 Jul 2023 15:29:00 +0530
Subject: [PATCH 1726/2340] drm/xe: remove gucrc disable from suspend path

Currently GuCRC is disabled in suspend path for xe.
Rc6 is a prerequiste to enable s0ix and
should not be disabled for s2idle. There is no requirement
to disable GuCRC for S3+.

Remove it from xe_guc_pc_stop, thus removing from suspend path.
Retain the call in other places where xe_guc_pc_stop is
called.

v2: add description and return statement to kernel-doc (Rodrigo)
v3: update commit message (Rodrigo)
v4: add mem_access_get to the gucrc disable function

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c     |  1 +
 drivers/gpu/drm/xe/xe_guc_pc.c | 20 +++++++++++++-------
 drivers/gpu/drm/xe/xe_guc_pc.h |  1 +
 drivers/gpu/drm/xe/xe_uc.c     |  5 +++++
 drivers/gpu/drm/xe/xe_uc.h     |  1 +
 5 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a21d44bfe9e87..b31ef2a8ff17d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -517,6 +517,7 @@ static int gt_reset(struct xe_gt *gt)
 	if (err)
 		goto err_msg;
 
+	xe_uc_gucrc_disable(&gt->uc);
 	xe_uc_stop_prepare(&gt->uc);
 	xe_gt_pagefault_reset(gt);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 3093cfeff0c2c..03dfbde29fe51 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -730,12 +730,20 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
 	return ret;
 }
 
-static int pc_gucrc_disable(struct xe_guc_pc *pc)
+/**
+ * xe_guc_pc_gucrc_disable - Disable GuC RC
+ * @pc: Xe_GuC_PC instance
+ *
+ * Disables GuC RC by taking control of RC6 back from GuC.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
 	int ret;
 
-	xe_device_assert_mem_access(pc_to_xe(pc));
+	xe_device_mem_access_get(pc_to_xe(pc));
 
 	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
 	if (ret)
@@ -750,6 +758,7 @@ static int pc_gucrc_disable(struct xe_guc_pc *pc)
 	xe_mmio_write32(gt, RC_STATE, 0);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(pc_to_xe(pc));
 	return 0;
 }
 
@@ -827,7 +836,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 		goto out;
 
 	if (xe->info.platform == XE_PVC) {
-		pc_gucrc_disable(pc);
+		xe_guc_pc_gucrc_disable(pc);
 		ret = 0;
 		goto out;
 	}
@@ -850,10 +859,6 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 
-	ret = pc_gucrc_disable(pc);
-	if (ret)
-		goto out;
-
 	mutex_lock(&pc->freq_lock);
 	pc->freq_ready = false;
 	mutex_unlock(&pc->freq_lock);
@@ -876,6 +881,7 @@ static void pc_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc_pc *pc = arg;
 
+	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
 	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
 	xe_bo_unpin_map_no_vm(pc->bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 370353a40a175..81833a53b3c92 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -11,6 +11,7 @@
 int xe_guc_pc_init(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
+int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
 
 enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index e244d27b55d5c..addd6f2681b94 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -176,6 +176,11 @@ int xe_uc_reset_prepare(struct xe_uc *uc)
 	return xe_guc_reset_prepare(&uc->guc);
 }
 
+void xe_uc_gucrc_disable(struct xe_uc *uc)
+{
+	XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc));
+}
+
 void xe_uc_stop_prepare(struct xe_uc *uc)
 {
 	xe_guc_stop_prepare(&uc->guc);
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index d6efc9ef00d3d..42219b361df55 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -12,6 +12,7 @@ int xe_uc_init(struct xe_uc *uc);
 int xe_uc_init_hwconfig(struct xe_uc *uc);
 int xe_uc_init_post_hwconfig(struct xe_uc *uc);
 int xe_uc_init_hw(struct xe_uc *uc);
+void xe_uc_gucrc_disable(struct xe_uc *uc);
 int xe_uc_reset_prepare(struct xe_uc *uc);
 void xe_uc_stop_prepare(struct xe_uc *uc);
 int xe_uc_stop(struct xe_uc *uc);
-- 
GitLab


From ac0be3b5b28ecf4890b3fc3ebaec18e7ce5fcc86 Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Tue, 18 Jul 2023 13:36:59 +0530
Subject: [PATCH 1727/2340] drm/xe/pm: Add pci d3cold_capable support

Adding pci d3cold_capable check in order to initialize
d3cold_allowed as false statically.
It avoids vram save/restore latency during runtime
suspend/resume

v2:
- Added else block to xe_pci_runtime_idle. [Rodrigo]

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-2-anshuman.gupta@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  3 +++
 drivers/gpu/drm/xe/xe_pci.c          | 29 ++++++++++++++++------------
 drivers/gpu/drm/xe/xe_pm.c           | 22 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pm.h           |  1 +
 4 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index fb2329ccce065..7a62c54939a9e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -327,6 +327,9 @@ struct xe_device {
 		bool hold_rpm;
 	} mem_access;
 
+	/** d3cold_capable: Indicates if root port is d3cold capable */
+	bool d3cold_capable;
+
 	/** @d3cold_allowed: Indicates if d3cold is a valid device state */
 	bool d3cold_allowed;
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index e130ffe3ab559..4ff7be058e4e4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -665,6 +665,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_pci_disable;
 
 	xe_pm_runtime_init(xe);
+	xe_pm_init(xe);
 
 	return 0;
 
@@ -777,18 +778,22 @@ static int xe_pci_runtime_idle(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct xe_device *xe = pdev_to_xe_device(pdev);
 
-	/*
-	 * TODO: d3cold should be allowed (true) if
-	 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
-	 * but maybe include some other conditions. So, before
-	 * we can re-enable the D3cold, we need to:
-	 * 1. rewrite the VRAM save / restore to avoid buffer object locks
-	 * 2. block D3cold if we have a big amount of device memory in use
-	 *    in order to reduce the latency.
-	 * 3. at resume, detect if we really lost power and avoid memory
-	 *    restoration if we were only up to d3cold
-	 */
-	 xe->d3cold_allowed = false;
+	if (!xe->d3cold_capable) {
+		xe->d3cold_allowed = false;
+	} else {
+		/*
+		 * TODO: d3cold should be allowed (true) if
+		 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
+		 * but maybe include some other conditions. So, before
+		 * we can re-enable the D3cold, we need to:
+		 * 1. rewrite the VRAM save / restore to avoid buffer object locks
+		 * 2. block D3cold if we have a big amount of device memory in use
+		 *    in order to reduce the latency.
+		 * 3. at resume, detect if we really lost power and avoid memory
+		 *    restoration if we were only up to d3cold
+		 */
+		xe->d3cold_allowed = false;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 20e9e522ab805..2f553dcd6139d 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -106,6 +106,21 @@ int xe_pm_resume(struct xe_device *xe)
 	return 0;
 }
 
+static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
+{
+	struct pci_dev *root_pdev;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return false;
+
+	/* D3Cold requires PME capability and _PR3 power resource */
+	if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev))
+		return false;
+
+	return true;
+}
+
 void xe_pm_runtime_init(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
@@ -118,6 +133,13 @@ void xe_pm_runtime_init(struct xe_device *xe)
 	pm_runtime_put_autosuspend(dev);
 }
 
+void xe_pm_init(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	xe->d3cold_capable = xe_pm_pci_d3cold_capable(pdev);
+}
+
 void xe_pm_runtime_fini(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 8418ee6faac5f..864cd0be014a4 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -14,6 +14,7 @@ int xe_pm_suspend(struct xe_device *xe);
 int xe_pm_resume(struct xe_device *xe);
 
 void xe_pm_runtime_init(struct xe_device *xe);
+void xe_pm_init(struct xe_device *xe);
 void xe_pm_runtime_fini(struct xe_device *xe);
 int xe_pm_runtime_suspend(struct xe_device *xe);
 int xe_pm_runtime_resume(struct xe_device *xe);
-- 
GitLab


From fddebcbf7a47d661f3eb475de0b75be11c7c3bb8 Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Tue, 18 Jul 2023 13:37:00 +0530
Subject: [PATCH 1728/2340] drm/xe/pm: Refactor xe_pm_runtime_init

Wrap xe_pm_runtime_init inside xe_pm_init.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-3-anshuman.gupta@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 1 -
 drivers/gpu/drm/xe/xe_pm.c  | 3 ++-
 drivers/gpu/drm/xe/xe_pm.h  | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 4ff7be058e4e4..bc894b3546bf6 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -664,7 +664,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_pci_disable;
 
-	xe_pm_runtime_init(xe);
 	xe_pm_init(xe);
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 2f553dcd6139d..03d71dcf23935 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -121,7 +121,7 @@ static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
 	return true;
 }
 
-void xe_pm_runtime_init(struct xe_device *xe)
+static void xe_pm_runtime_init(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
 
@@ -137,6 +137,7 @@ void xe_pm_init(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
+	xe_pm_runtime_init(xe);
 	xe->d3cold_capable = xe_pm_pci_d3cold_capable(pdev);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 864cd0be014a4..193e5d6873537 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -13,7 +13,6 @@ struct xe_device;
 int xe_pm_suspend(struct xe_device *xe);
 int xe_pm_resume(struct xe_device *xe);
 
-void xe_pm_runtime_init(struct xe_device *xe);
 void xe_pm_init(struct xe_device *xe);
 void xe_pm_runtime_fini(struct xe_device *xe);
 int xe_pm_runtime_suspend(struct xe_device *xe);
-- 
GitLab


From b2d756199be822f4de8dd18fe4e3a939e4a06e7a Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Tue, 18 Jul 2023 13:37:01 +0530
Subject: [PATCH 1729/2340] drm/xe/pm: Add vram_d3cold_threshold Sysfs

Add per pci device vram_d3cold_threshold Sysfs to
control the d3cold allowed knob.
Adding a d3cold structure embedded in xe_device to encapsulate
d3cold related stuff.

v2:
- Check total vram before initializing default threshold. [Riana]
- Add static scope to vram_d3cold_threshold DEVICE_ATTR. [Riana]
v3:
- Fixed cosmetics review comment. [Riana]
- Fixed CI Hook failures.
- Used drmm_mutex_init().
v4:
- Fixed kernel-doc warnings.
v5:
- Added doc explaining need for the device sysfs. [Rodrigo]
- Removed TODO comment.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-4-anshuman.gupta@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  1 +
 drivers/gpu/drm/xe/xe_device_sysfs.c | 89 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_device_sysfs.h | 13 ++++
 drivers/gpu/drm/xe/xe_device_types.h | 24 ++++++--
 drivers/gpu/drm/xe/xe_pci.c          | 10 ++--
 drivers/gpu/drm/xe/xe_pm.c           | 37 ++++++++++--
 drivers/gpu/drm/xe/xe_pm.h           |  3 +
 7 files changed, 164 insertions(+), 13 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_device_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_device_sysfs.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 3ade82cf244e9..e5fb874a7aaf6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -52,6 +52,7 @@ xe-y += xe_bb.o \
 	xe_debugfs.o \
 	xe_devcoredump.o \
 	xe_device.o \
+	xe_device_sysfs.o \
 	xe_dma_buf.o \
 	xe_engine.o \
 	xe_exec.o \
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
new file mode 100644
index 0000000000000..99113a5a2b849
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_device_sysfs.h"
+#include "xe_pm.h"
+
+/**
+ * DOC: Xe device sysfs
+ * Xe driver requires exposing certain tunable knobs controlled by user space for
+ * each graphics device. Considering this, we need to add sysfs attributes at device
+ * level granularity.
+ * These sysfs attributes will be available under pci device kobj directory.
+ *
+ * vram_d3cold_threshold - Report/change vram used threshold(in MB) below
+ * which vram save/restore is permissible during runtime D3cold entry/exit.
+ */
+
+static ssize_t
+vram_d3cold_threshold_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int ret;
+
+	if (!xe)
+		return -EINVAL;
+
+	ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
+
+	return ret;
+}
+
+static ssize_t
+vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
+			    const char *buff, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	u32 vram_d3cold_threshold;
+	int ret;
+
+	if (!xe)
+		return -EINVAL;
+
+	ret = kstrtou32(buff, 0, &vram_d3cold_threshold);
+	if (ret)
+		return ret;
+
+	drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold);
+
+	ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold);
+
+	return ret ?: count;
+}
+
+static DEVICE_ATTR_RW(vram_d3cold_threshold);
+
+static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+}
+
+void xe_device_sysfs_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	int ret;
+
+	ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+	if (ret) {
+		drm_warn(&xe->drm, "Failed to create sysfs file\n");
+		return;
+	}
+
+	ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
+	if (ret)
+		drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n");
+}
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h
new file mode 100644
index 0000000000000..38b240684beea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_SYSFS_H_
+#define _XE_DEVICE_SYSFS_H_
+
+struct xe_device;
+
+void xe_device_sysfs_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 7a62c54939a9e..14b15ecc56178 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -327,11 +327,27 @@ struct xe_device {
 		bool hold_rpm;
 	} mem_access;
 
-	/** d3cold_capable: Indicates if root port is d3cold capable */
-	bool d3cold_capable;
+	/** @d3cold: Encapsulate d3cold related stuff */
+	struct {
+		/** capable: Indicates if root port is d3cold capable */
+		bool capable;
+
+		/** @allowed: Indicates if d3cold is a valid device state */
+		bool allowed;
 
-	/** @d3cold_allowed: Indicates if d3cold is a valid device state */
-	bool d3cold_allowed;
+		/**
+		 * @vram_threshold:
+		 *
+		 * This represents the permissible threshold(in megabytes)
+		 * for vram save/restore. d3cold will be disallowed,
+		 * when vram_usages is above or equals the threshold value
+		 * to avoid the vram save/restore latency.
+		 * Default threshold value is 300mb.
+		 */
+		u32 vram_threshold;
+		/** @lock: protect vram_threshold */
+		struct mutex lock;
+	} d3cold;
 
 	/* For pcode */
 	struct mutex sb_lock;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index bc894b3546bf6..74aba4f09f7da 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -738,7 +738,7 @@ static int xe_pci_runtime_suspend(struct device *dev)
 
 	pci_save_state(pdev);
 
-	if (xe->d3cold_allowed) {
+	if (xe->d3cold.allowed) {
 		pci_disable_device(pdev);
 		pci_ignore_hotplug(pdev);
 		pci_set_power_state(pdev, PCI_D3cold);
@@ -761,7 +761,7 @@ static int xe_pci_runtime_resume(struct device *dev)
 
 	pci_restore_state(pdev);
 
-	if (xe->d3cold_allowed) {
+	if (xe->d3cold.allowed) {
 		err = pci_enable_device(pdev);
 		if (err)
 			return err;
@@ -777,8 +777,8 @@ static int xe_pci_runtime_idle(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct xe_device *xe = pdev_to_xe_device(pdev);
 
-	if (!xe->d3cold_capable) {
-		xe->d3cold_allowed = false;
+	if (!xe->d3cold.capable) {
+		xe->d3cold.allowed = false;
 	} else {
 		/*
 		 * TODO: d3cold should be allowed (true) if
@@ -791,7 +791,7 @@ static int xe_pci_runtime_idle(struct device *dev)
 		 * 3. at resume, detect if we really lost power and avoid memory
 		 *    restoration if we were only up to d3cold
 		 */
-		xe->d3cold_allowed = false;
+		xe->d3cold.allowed = false;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 03d71dcf23935..261c0ad57b634 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -7,11 +7,13 @@
 
 #include <linux/pm_runtime.h>
 
+#include <drm/drm_managed.h>
 #include <drm/ttm/ttm_placement.h>
 
 #include "xe_bo.h"
 #include "xe_bo_evict.h"
 #include "xe_device.h"
+#include "xe_device_sysfs.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_irq.h"
@@ -137,8 +139,11 @@ void xe_pm_init(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
+	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
 	xe_pm_runtime_init(xe);
-	xe->d3cold_capable = xe_pm_pci_d3cold_capable(pdev);
+	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
+	xe_device_sysfs_init(xe);
+	xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
 }
 
 void xe_pm_runtime_fini(struct xe_device *xe)
@@ -155,7 +160,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 	u8 id;
 	int err;
 
-	if (xe->d3cold_allowed) {
+	if (xe->d3cold.allowed) {
 		if (xe_device_mem_access_ongoing(xe))
 			return -EBUSY;
 
@@ -181,7 +186,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	u8 id;
 	int err;
 
-	if (xe->d3cold_allowed) {
+	if (xe->d3cold.allowed) {
 		for_each_gt(gt, xe, id) {
 			err = xe_pcode_init(gt);
 			if (err)
@@ -202,7 +207,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	for_each_gt(gt, xe, id)
 		xe_gt_resume(gt);
 
-	if (xe->d3cold_allowed) {
+	if (xe->d3cold.allowed) {
 		err = xe_bo_restore_user(xe);
 		if (err)
 			return err;
@@ -251,3 +256,27 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
 		device_set_pm_not_required(&pdev->dev);
 	}
 }
+
+int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
+{
+	struct ttm_resource_manager *man;
+	u32 vram_total_mb = 0;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man)
+			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
+	}
+
+	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
+
+	if (threshold > vram_total_mb)
+		return -EINVAL;
+
+	mutex_lock(&xe->d3cold.lock);
+	xe->d3cold.vram_threshold = threshold;
+	mutex_unlock(&xe->d3cold.lock);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 193e5d6873537..bbd91a5855cdf 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -8,6 +8,8 @@
 
 #include <linux/pm_runtime.h>
 
+#define DEFAULT_VRAM_THRESHOLD 300 /* in MB */
+
 struct xe_device;
 
 int xe_pm_suspend(struct xe_device *xe);
@@ -22,5 +24,6 @@ int xe_pm_runtime_put(struct xe_device *xe);
 bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
 int xe_pm_runtime_get_if_active(struct xe_device *xe);
 void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
+int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
 
 #endif
-- 
GitLab


From 2ef08b98025bd09b74f68d1801995b0b068afbe7 Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Tue, 18 Jul 2023 13:37:02 +0530
Subject: [PATCH 1730/2340] drm/xe/pm: Toggle d3cold_allowed using vram_usages

Adding support to control d3cold by using vram_usages metric from
ttm resource manager.
When root port  is capable of d3cold but xe has disallowed d3cold
due to vram_usages above vram_d3ccold_threshol. It is required to
disable d3cold to avoid any resume failure because root port can
still transition to d3cold when all of pcie endpoints and
{upstream, virtual} switch ports will transition to d3hot.
Also cleaning up the TODO code comment.

v2:
- Modify d3cold.allowed in xe_pm_d3cold_allowed_toggle. [Riana]
- Cond changed (total_vram_used_mb < xe->d3cold.vram_threshold)
  according to doc comment.
v3:
- Added enum instead of true/false argument in
  d3cold_toggle(). [Rodrigo]
- Removed TODO comment. [Rodrigo]

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-5-anshuman.gupta@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 36 +++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_pm.c  | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pm.h  |  1 +
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 74aba4f09f7da..6d04e570735af 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -25,6 +25,11 @@
 #include "xe_pm.h"
 #include "xe_step.h"
 
+enum toggle_d3cold {
+	D3COLD_DISABLE,
+	D3COLD_ENABLE,
+};
+
 struct xe_subplatform_desc {
 	enum xe_subplatform subplatform;
 	const char *name;
@@ -726,6 +731,28 @@ static int xe_pci_resume(struct device *dev)
 	return 0;
 }
 
+static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct pci_dev *root_pdev;
+
+	if (!xe->d3cold.capable)
+		return;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return;
+
+	switch (toggle) {
+	case D3COLD_DISABLE:
+		pci_d3cold_disable(root_pdev);
+		break;
+	case D3COLD_ENABLE:
+		pci_d3cold_enable(root_pdev);
+		break;
+	}
+}
+
 static int xe_pci_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -743,6 +770,7 @@ static int xe_pci_runtime_suspend(struct device *dev)
 		pci_ignore_hotplug(pdev);
 		pci_set_power_state(pdev, PCI_D3cold);
 	} else {
+		d3cold_toggle(pdev, D3COLD_DISABLE);
 		pci_set_power_state(pdev, PCI_D3hot);
 	}
 
@@ -767,6 +795,8 @@ static int xe_pci_runtime_resume(struct device *dev)
 			return err;
 
 		pci_set_master(pdev);
+	} else {
+		d3cold_toggle(pdev, D3COLD_ENABLE);
 	}
 
 	return xe_pm_runtime_resume(xe);
@@ -780,15 +810,15 @@ static int xe_pci_runtime_idle(struct device *dev)
 	if (!xe->d3cold.capable) {
 		xe->d3cold.allowed = false;
 	} else {
+		xe_pm_d3cold_allowed_toggle(xe);
+
 		/*
 		 * TODO: d3cold should be allowed (true) if
 		 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
 		 * but maybe include some other conditions. So, before
 		 * we can re-enable the D3cold, we need to:
 		 * 1. rewrite the VRAM save / restore to avoid buffer object locks
-		 * 2. block D3cold if we have a big amount of device memory in use
-		 *    in order to reduce the latency.
-		 * 3. at resume, detect if we really lost power and avoid memory
+		 * 2. at resume, detect if we really lost power and avoid memory
 		 *    restoration if we were only up to d3cold
 		 */
 		xe->d3cold.allowed = false;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 261c0ad57b634..21964e4d09f87 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -280,3 +280,28 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
 
 	return 0;
 }
+
+void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
+{
+	struct ttm_resource_manager *man;
+	u32 total_vram_used_mb = 0;
+	u64 vram_used;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			vram_used = ttm_resource_manager_usage(man);
+			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
+		}
+	}
+
+	mutex_lock(&xe->d3cold.lock);
+
+	if (total_vram_used_mb < xe->d3cold.vram_threshold)
+		xe->d3cold.allowed = true;
+	else
+		xe->d3cold.allowed = false;
+
+	mutex_unlock(&xe->d3cold.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index bbd91a5855cdf..ee30cf025f646 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -25,5 +25,6 @@ bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
 int xe_pm_runtime_get_if_active(struct xe_device *xe);
 void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
+void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
 
 #endif
-- 
GitLab


From 09d88e3beb64b8d2e3043fef72dda0df62487e44 Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Tue, 18 Jul 2023 13:37:03 +0530
Subject: [PATCH 1731/2340] drm/xe/pm: Init pcode and restore vram on power
 lost

Don't init pcode and restore VRAM objects in vain.
We can rely on primary GT GUC_STATUS to detect whether
card has really lost power even when d3cold is allowed by xe.
Adding d3cold.lost_power flag to avoid pcode init and vram
restoration.
Also cleaning up the TODO code comment.

v2:
- %s/xe_guc_has_lost_power()/xe_guc_in_reset().
- Used existing gt instead of new variable. [Rodrigo]
- Added kernel-doc function comment. [Rodrigo]
- xe_guc_in_reset() return true if failed to get fw.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-6-anshuman.gupta@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  3 +++
 drivers/gpu/drm/xe/xe_guc.c          | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_guc.h          |  1 +
 drivers/gpu/drm/xe/xe_pci.c          |  2 --
 drivers/gpu/drm/xe/xe_pm.c           | 13 +++++++++++--
 5 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 14b15ecc56178..0897719751e98 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -335,6 +335,9 @@ struct xe_device {
 		/** @allowed: Indicates if d3cold is a valid device state */
 		bool allowed;
 
+		/** @power_lost: Indicates if card has really lost power. */
+		bool power_lost;
+
 		/**
 		 * @vram_threshold:
 		 *
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index d44537abf7daa..ed90d738d6739 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -844,3 +844,30 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 	xe_guc_ct_print(&guc->ct, p, false);
 	xe_guc_submit_print(guc, p);
 }
+
+/**
+ * xe_guc_in_reset() - Detect if GuC MIA is in reset.
+ * @guc: The GuC object
+ *
+ * This function detects runtime resume from d3cold by leveraging
+ * GUC_STATUS, GUC doesn't get reset during d3hot,
+ * it strictly to be called from RPM resume handler.
+ *
+ * Return: true if failed to get forcewake or GuC MIA is in Reset,
+ * otherwise false.
+ */
+bool xe_guc_in_reset(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return true;
+
+	status = xe_mmio_read32(gt, GUC_STATUS);
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	return  status & GS_MIA_IN_RESET;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 74a74051f354e..f64f22e971697 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -35,6 +35,7 @@ void xe_guc_reset_wait(struct xe_guc *guc);
 void xe_guc_stop_prepare(struct xe_guc *guc);
 int xe_guc_stop(struct xe_guc *guc);
 int xe_guc_start(struct xe_guc *guc);
+bool xe_guc_in_reset(struct xe_guc *guc);
 
 static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
 {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6d04e570735af..ae6e1394ff31f 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -818,8 +818,6 @@ static int xe_pci_runtime_idle(struct device *dev)
 		 * but maybe include some other conditions. So, before
 		 * we can re-enable the D3cold, we need to:
 		 * 1. rewrite the VRAM save / restore to avoid buffer object locks
-		 * 2. at resume, detect if we really lost power and avoid memory
-		 *    restoration if we were only up to d3cold
 		 */
 		xe->d3cold.allowed = false;
 	}
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 21964e4d09f87..f336aec7085dd 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -16,6 +16,7 @@
 #include "xe_device_sysfs.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
+#include "xe_guc.h"
 #include "xe_irq.h"
 #include "xe_pcode.h"
 
@@ -186,7 +187,15 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	u8 id;
 	int err;
 
-	if (xe->d3cold.allowed) {
+	/*
+	 * It can be possible that xe has allowed d3cold but other pcie devices
+	 * in gfx card soc would have blocked d3cold, therefore card has not
+	 * really lost power. Detecting primary Gt power is sufficient.
+	 */
+	gt = xe_device_get_gt(xe, 0);
+	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
+
+	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
 		for_each_gt(gt, xe, id) {
 			err = xe_pcode_init(gt);
 			if (err)
@@ -207,7 +216,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	for_each_gt(gt, xe, id)
 		xe_gt_resume(gt);
 
-	if (xe->d3cold.allowed) {
+	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
 		err = xe_bo_restore_user(xe);
 		if (err)
 			return err;
-- 
GitLab


From a00b8f1aae43c46658de0f7f55d8a65acb002159 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:03 +0100
Subject: [PATCH 1732/2340] drm/xe: fix xe_device_mem_access_get() races
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It looks like there is at least one race here, given that the
pm_runtime_suspended() check looks to return false if we are in the
process of suspending the device (RPM_SUSPENDING vs RPM_SUSPENDED).  We
later also do xe_pm_runtime_get_if_active(), but since the device is
suspending or has now suspended, this doesn't do anything either.
Following from this we can potentially return from
xe_device_mem_access_get() with the device suspended or about to be,
leading to broken behaviour.

Attempt to fix this by always grabbing the runtime ref when our internal
ref transitions from 0 -> 1. The hard part is then dealing with the
runtime_pm callbacks also calling xe_device_mem_access_get() and
deadlocking, which the pm_runtime_suspended() check prevented.

v2:
 - ct->lock looks to be primed with fs_reclaim, so holding that and then
   allocating memory will cause lockdep to complain. Now that we
   unconditionally grab the mem_access.lock around mem_access_{get,put}, we
   need to change the ordering wrt to grabbing the ct->lock, since some of
   the runtime_pm routines can allocate memory (or at least that's what
   lockdep seems to suggest). Hopefully not a big deal.  It might be that
   there were already issues with this, just that the atomics where
   "hiding" the potential issues.
v3:
 - Use Thomas Hellström' idea with tracking the active task that is
   executing in the resume or suspend callback, in order to avoid
   recursive resume/suspend calls deadlocking on itself.
 - Split the ct->lock change.
v4:
 - Add smb_mb() around accessing the pm_callback_task for extra safety.
   (Thomas Hellström)
v5:
 - Clarify the kernel-doc for the mem_access.lock, given that it is quite
   strange in what it protects (data vs code). The real motivation is to
   aid lockdep. (Rodrigo Vivi)
v6:
 - Split out the lock change. We still want this as a lockdep aid but
   only for the xe_device_mem_access_get() path. Sticking a lock on the
   put() looks be a no-go, also the runtime_put() there is always async.
 - Now that the lock is gone move to atomics and rely on the pm code
   serialising multiple callers on the 0 -> 1 transition.
 - g2h_worker_func() looks to be the next issue, given that
   suspend-resume callbacks are using CT, so try to handle that.
v7:
 - Add xe_device_mem_access_get_if_ongoing(), and use it in
   g2h_worker_func().
v8 (Anshuman):
 - Just always grab the rpm, instead of just on the 0 -> 1 transition,
   which is a lot clearer and simplifies the code quite a bit.
v9:
 - Make sure we also adjust the CT fast-path with if-active.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/258
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Acked-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       | 60 ++++++++++++++++++------
 drivers/gpu/drm/xe/xe_device.h       | 11 +----
 drivers/gpu/drm/xe/xe_device_types.h |  8 +++-
 drivers/gpu/drm/xe/xe_guc_ct.c       | 41 +++++++++++++++--
 drivers/gpu/drm/xe/xe_pm.c           | 68 ++++++++++++++++++----------
 drivers/gpu/drm/xe/xe_pm.h           |  2 +-
 6 files changed, 136 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index bd2e10952989f..47401bb49958f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -364,33 +364,67 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
 }
 
+bool xe_device_mem_access_ongoing(struct xe_device *xe)
+{
+	if (xe_pm_read_callback_task(xe) != NULL)
+		return true;
+
+	return atomic_read(&xe->mem_access.ref);
+}
+
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
+}
+
 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
 {
-	return atomic_inc_not_zero(&xe->mem_access.ref);
+	bool active;
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return true;
+
+	active = xe_pm_runtime_get_if_active(xe);
+	if (active) {
+		int ref = atomic_inc_return(&xe->mem_access.ref);
+
+		XE_WARN_ON(ref == S32_MAX);
+	}
+
+	return active;
 }
 
 void xe_device_mem_access_get(struct xe_device *xe)
 {
-	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
-	int ref = atomic_inc_return(&xe->mem_access.ref);
-
-	if (ref == 1)
-		xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
+	int ref;
+
+	/*
+	 * This looks racy, but should be fine since the pm_callback_task only
+	 * transitions from NULL -> current (and back to NULL again), during the
+	 * runtime_resume() or runtime_suspend() callbacks, for which there can
+	 * only be a single one running for our device. We only need to prevent
+	 * recursively calling the runtime_get or runtime_put from those
+	 * callbacks, as well as preventing triggering any access_ongoing
+	 * asserts.
+	 */
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
 
-	/* The usage counter increased if device was immediately resumed */
-	if (resumed)
-		xe_pm_runtime_put(xe);
+	xe_pm_runtime_get(xe);
+	ref = atomic_inc_return(&xe->mem_access.ref);
 
 	XE_WARN_ON(ref == S32_MAX);
 }
 
 void xe_device_mem_access_put(struct xe_device *xe)
 {
-	bool hold = xe->mem_access.hold_rpm;
-	int ref = atomic_dec_return(&xe->mem_access.ref);
+	int ref;
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
 
-	if (!ref && hold)
-		xe_pm_runtime_put(xe);
+	ref = atomic_dec_return(&xe->mem_access.ref);
+	xe_pm_runtime_put(xe);
 
 	XE_WARN_ON(ref < 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index a64828bc6ad24..8b085ffdc5f8d 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -141,15 +141,8 @@ void xe_device_mem_access_get(struct xe_device *xe);
 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe);
 void xe_device_mem_access_put(struct xe_device *xe);
 
-static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
-{
-	return atomic_read(&xe->mem_access.ref);
-}
-
-static inline void xe_device_assert_mem_access(struct xe_device *xe)
-{
-	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
-}
+void xe_device_assert_mem_access(struct xe_device *xe);
+bool xe_device_mem_access_ongoing(struct xe_device *xe);
 
 static inline bool xe_device_in_fault_mode(struct xe_device *xe)
 {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 0897719751e98..52c5f7ded7ce6 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -323,8 +323,6 @@ struct xe_device {
 	struct {
 		/** @ref: ref count of memory accesses */
 		atomic_t ref;
-		/** @hold_rpm: need to put rpm ref back at the end */
-		bool hold_rpm;
 	} mem_access;
 
 	/** @d3cold: Encapsulate d3cold related stuff */
@@ -352,6 +350,12 @@ struct xe_device {
 		struct mutex lock;
 	} d3cold;
 
+	/**
+	 * @pm_callback_task: Track the active task that is running in either
+	 * the runtime_suspend or runtime_resume callbacks.
+	 */
+	struct task_struct *pm_callback_task;
+
 	/* For pcode */
 	struct mutex sb_lock;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index c7992a8667e50..5d9ed5de5dbb8 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -19,6 +19,7 @@
 #include "xe_guc.h"
 #include "xe_guc_submit.h"
 #include "xe_map.h"
+#include "xe_pm.h"
 #include "xe_trace.h"
 
 /* Used when a CT send wants to block and / or receive data */
@@ -1046,9 +1047,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 {
 	struct xe_device *xe = ct_to_xe(ct);
+	bool ongoing;
 	int len;
 
-	if (!xe_device_mem_access_get_if_ongoing(xe))
+	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
 		return;
 
 	spin_lock(&ct->fast_lock);
@@ -1059,7 +1062,8 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 	} while (len > 0);
 	spin_unlock(&ct->fast_lock);
 
-	xe_device_mem_access_put(xe);
+	if (ongoing)
+		xe_device_mem_access_put(xe);
 }
 
 /* Returns less than zero on error, 0 on done, 1 on more available */
@@ -1090,9 +1094,36 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct)
 static void g2h_worker_func(struct work_struct *w)
 {
 	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+	bool ongoing;
 	int ret;
 
-	xe_device_mem_access_get(ct_to_xe(ct));
+	/*
+	 * Normal users must always hold mem_access.ref around CT calls. However
+	 * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+	 * at this stage we can't rely on mem_access.ref and even the
+	 * callback_task will be different than current.  For such cases we just
+	 * need to ensure we always process the responses from any blocking
+	 * ct_send requests or where we otherwise expect some response when
+	 * initiated from those callbacks (which will need to wait for the below
+	 * dequeue_one_g2h()).  The dequeue_one_g2h() will gracefully fail if
+	 * the device has suspended to the point that the CT communication has
+	 * been disabled.
+	 *
+	 * If we are inside the runtime pm callback, we can be the only task
+	 * still issuing CT requests (since that requires having the
+	 * mem_access.ref).  It seems like it might in theory be possible to
+	 * receive unsolicited events from the GuC just as we are
+	 * suspending-resuming, but those will currently anyway be lost when
+	 * eventually exiting from suspend, hence no need to wake up the device
+	 * here. If we ever need something stronger than get_if_ongoing() then
+	 * we need to be careful with blocking the pm callbacks from getting CT
+	 * responses, if the worker here is blocked on those callbacks
+	 * completing, creating a deadlock.
+	 */
+	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+		return;
+
 	do {
 		mutex_lock(&ct->lock);
 		ret = dequeue_one_g2h(ct);
@@ -1106,7 +1137,9 @@ static void g2h_worker_func(struct work_struct *w)
 			kick_reset(ct);
 		}
 	} while (ret == 1);
-	xe_device_mem_access_put(ct_to_xe(ct));
+
+	if (ongoing)
+		xe_device_mem_access_put(ct_to_xe(ct));
 }
 
 static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index f336aec7085dd..04b995aa848f4 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -155,37 +155,65 @@ void xe_pm_runtime_fini(struct xe_device *xe)
 	pm_runtime_forbid(dev);
 }
 
+static void xe_pm_write_callback_task(struct xe_device *xe,
+				      struct task_struct *task)
+{
+	WRITE_ONCE(xe->pm_callback_task, task);
+
+	/*
+	 * Just in case it's somehow possible for our writes to be reordered to
+	 * the extent that something else re-uses the task written in
+	 * pm_callback_task. For example after returning from the callback, but
+	 * before the reordered write that resets pm_callback_task back to NULL.
+	 */
+	smp_mb(); /* pairs with xe_pm_read_callback_task */
+}
+
+struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
+{
+	smp_mb(); /* pairs with xe_pm_write_callback_task */
+
+	return READ_ONCE(xe->pm_callback_task);
+}
+
 int xe_pm_runtime_suspend(struct xe_device *xe)
 {
 	struct xe_gt *gt;
 	u8 id;
-	int err;
+	int err = 0;
 
-	if (xe->d3cold.allowed) {
-		if (xe_device_mem_access_ongoing(xe))
-			return -EBUSY;
+	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
+		return -EBUSY;
+
+	/* Disable access_ongoing asserts and prevent recursive pm calls */
+	xe_pm_write_callback_task(xe, current);
 
+	if (xe->d3cold.allowed) {
 		err = xe_bo_evict_all(xe);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_suspend(gt);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	xe_irq_suspend(xe);
-
-	return 0;
+out:
+	xe_pm_write_callback_task(xe, NULL);
+	return err;
 }
 
 int xe_pm_runtime_resume(struct xe_device *xe)
 {
 	struct xe_gt *gt;
 	u8 id;
-	int err;
+	int err = 0;
+
+	/* Disable access_ongoing asserts and prevent recursive pm calls */
+	xe_pm_write_callback_task(xe, current);
 
 	/*
 	 * It can be possible that xe has allowed d3cold but other pcie devices
@@ -199,7 +227,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 		for_each_gt(gt, xe, id) {
 			err = xe_pcode_init(gt);
 			if (err)
-				return err;
+				goto out;
 		}
 
 		/*
@@ -208,7 +236,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 		 */
 		err = xe_bo_restore_kernel(xe);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	xe_irq_resume(xe);
@@ -219,10 +247,11 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
 		err = xe_bo_restore_user(xe);
 		if (err)
-			return err;
+			goto out;
 	}
-
-	return 0;
+out:
+	xe_pm_write_callback_task(xe, NULL);
+	return err;
 }
 
 int xe_pm_runtime_get(struct xe_device *xe)
@@ -236,19 +265,8 @@ int xe_pm_runtime_put(struct xe_device *xe)
 	return pm_runtime_put_autosuspend(xe->drm.dev);
 }
 
-/* Return true if resume operation happened and usage count was increased */
-bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe)
-{
-	/* In case we are suspended we need to immediately wake up */
-	if (pm_runtime_suspended(xe->drm.dev))
-		return !pm_runtime_resume_and_get(xe->drm.dev);
-
-	return false;
-}
-
 int xe_pm_runtime_get_if_active(struct xe_device *xe)
 {
-	WARN_ON(pm_runtime_suspended(xe->drm.dev));
 	return pm_runtime_get_if_active(xe->drm.dev, true);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index ee30cf025f646..08a633ce5145d 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -21,10 +21,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe);
 int xe_pm_runtime_resume(struct xe_device *xe);
 int xe_pm_runtime_get(struct xe_device *xe);
 int xe_pm_runtime_put(struct xe_device *xe);
-bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
 int xe_pm_runtime_get_if_active(struct xe_device *xe);
 void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
+struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
 
 #endif
-- 
GitLab


From 2d30332a5ec004effe24d669003bf94e7f167387 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:04 +0100
Subject: [PATCH 1733/2340] drm/xe/vm: tidy up xe_runtime_pm usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xe_device_mem_access_get() should be all that's needed here and
should now work as expected, without any strange races. In theory should
be no functional changes here.

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2b9a7618b169c..d4c6ddfce1ea6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1245,11 +1245,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	INIT_LIST_HEAD(&vm->extobj.list);
 
-	if (!(flags & XE_VM_FLAG_MIGRATION)) {
-		/* We need to immeditatelly exit from any D3 state */
-		xe_pm_runtime_get(xe);
+	if (!(flags & XE_VM_FLAG_MIGRATION))
 		xe_device_mem_access_get(xe);
-	}
 
 	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
 	if (!vm_resv_obj) {
@@ -1360,10 +1357,8 @@ err_close:
 
 err_no_resv:
 	kfree(vm);
-	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+	if (!(flags & XE_VM_FLAG_MIGRATION))
 		xe_device_mem_access_put(xe);
-		xe_pm_runtime_put(xe);
-	}
 	return ERR_PTR(err);
 }
 
@@ -1522,7 +1517,6 @@ static void vm_destroy_work_func(struct work_struct *w)
 
 	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
 		xe_device_mem_access_put(xe);
-		xe_pm_runtime_put(xe);
 
 		if (xe->info.has_asid) {
 			mutex_lock(&xe->usm.lock);
-- 
GitLab


From 6bfbd0c589bb89581bb89d2776924c3853296cfc Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:05 +0100
Subject: [PATCH 1734/2340] drm/xe/debugfs: grab mem_access around forcewake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need keep the device awake when performing any kind of mmio operation.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/279
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 7827a785b0204..047341d5689ab 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -70,6 +70,8 @@ static int forcewake_open(struct inode *inode, struct file *file)
 	struct xe_gt *gt;
 	u8 id;
 
+	xe_device_mem_access_get(xe);
+
 	for_each_gt(gt, xe, id)
 		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
@@ -85,6 +87,8 @@ static int forcewake_release(struct inode *inode, struct file *file)
 	for_each_gt(gt, xe, id)
 		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
+	xe_device_mem_access_put(xe);
+
 	return 0;
 }
 
-- 
GitLab


From 2d3ab1fa3195d2b0291625fcd0062796aaf15794 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:06 +0100
Subject: [PATCH 1735/2340] drm/xe/guc_pc: add missing mem_access for
 freq_rpe_show

The mem_access is meant to cover any kind of device level memory access,
mmio included.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 03dfbde29fe51..0927cb669603c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -422,8 +422,12 @@ static ssize_t freq_rpe_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
 
+	xe_device_mem_access_get(xe);
 	pc_update_rp_values(pc);
+	xe_device_mem_access_put(xe);
 	return sysfs_emit(buf, "%d\n", pc->rpe_freq);
 }
 static DEVICE_ATTR_RO(freq_rpe);
-- 
GitLab


From 7da1d76ff647cc08d9400562a75a92e41ba6d7bc Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:07 +0100
Subject: [PATCH 1736/2340] drm/xe/mmio: grab mem_access in xe_mmio_ioctl

Any kind of device memory access should first ensure the device is not
suspended, mmio included.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 448b874c7a3c7..8d0f07261bfd8 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -483,6 +483,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 	 */
 	reg = XE_REG(args->addr);
 
+	xe_device_mem_access_get(xe);
 	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
 	if (args->flags & DRM_XE_MMIO_WRITE) {
@@ -526,6 +527,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 
 exit:
 	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
 
 	return ret;
 }
-- 
GitLab


From 03af26c9c9767b096cf4b69544f0140898530531 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:08 +0100
Subject: [PATCH 1737/2340] drm/xe: ensure correct access_put ordering

Only call access_put after dropping the forcewake. In theory the device
could suspend, but really we want to start asserting that we have a
mem_access.ref when touching mmio.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c         | 8 ++++----
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +-
 drivers/gpu/drm/xe/xe_guc_pc.c     | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index b31ef2a8ff17d..3e32d38aeeea5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -535,8 +535,8 @@ static int gt_reset(struct xe_gt *gt)
 	if (err)
 		goto err_out;
 
-	xe_device_mem_access_put(gt_to_xe(gt));
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(err);
 
 	xe_gt_info(gt, "reset done\n");
@@ -579,8 +579,8 @@ void xe_gt_suspend_prepare(struct xe_gt *gt)
 
 	xe_uc_stop_prepare(&gt->uc);
 
-	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
 }
 
 int xe_gt_suspend(struct xe_gt *gt)
@@ -602,8 +602,8 @@ int xe_gt_suspend(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
-	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
 	xe_gt_info(gt, "suspended\n");
 
 	return 0;
@@ -630,8 +630,8 @@ int xe_gt_resume(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
-	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
 	xe_gt_info(gt, "resumed\n");
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index d0092d714ffe8..f9f653243f206 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -45,8 +45,8 @@ static int hw_engines(struct seq_file *m, void *data)
 	for_each_hw_engine(hwe, gt, id)
 		xe_hw_engine_print(hwe, &p);
 
-	xe_device_mem_access_put(xe);
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 0927cb669603c..91a3967fd7998 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -848,8 +848,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 	ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL);
 
 out:
-	xe_device_mem_access_put(pc_to_xe(pc));
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
 
-- 
GitLab


From 7eed01a926838d4f6b8c655801e6af5366ccec46 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:09 +0100
Subject: [PATCH 1738/2340] drm/xe: drop xe_device_mem_access_get() from
 guc_ct_send
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The callers should already be holding the mem_access reference, before
calling into this.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 5d9ed5de5dbb8..cb75db30800ce 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -625,14 +625,10 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
 
 	XE_BUG_ON(g2h_len && g2h_fence);
 
-	xe_device_mem_access_get(ct_to_xe(ct));
-
 	mutex_lock(&ct->lock);
 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
 	mutex_unlock(&ct->lock);
 
-	xe_device_mem_access_put(ct_to_xe(ct));
-
 	return ret;
 }
 
-- 
GitLab


From e018f44b29ed2de0a09186c728f173d0daaac448 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:10 +0100
Subject: [PATCH 1739/2340] drm/xe/ggtt: prime ggtt->lock against FS_RECLAIM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Increase the sensitivity of the ggtt->lock by priming it against
FS_RECLAIM, such that allocating memory while holding will result in
lockdep splats.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0722c49585a0d..4468c0ae0f6fd 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -93,6 +93,16 @@ static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
 	xe_bo_unpin_map_no_vm(ggtt->scratch);
 }
 
+static void primelockdep(struct xe_ggtt *ggtt)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ggtt->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
 int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
@@ -140,6 +150,7 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
 		    ggtt->size - xe_wopcm_size(xe));
 	mutex_init(&ggtt->lock);
+	primelockdep(ggtt);
 
 	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt);
 }
-- 
GitLab


From 7d623575a34539c0302a3ed3ec7321efcb281e37 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:11 +0100
Subject: [PATCH 1740/2340] drm/xe: drop xe_device_mem_access_get() from
 invalidation_vma
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lockdep gives the following splat:

[  594.158863] ffff888140da53f0 (&vm->userptr.notifier_lock){++++}-{3:3}, at: vma_userptr_invalidate+0xeb/0x330 [xe]
[  594.158921]
               but task is already holding lock:
[  594.158926] ffffffff82761940
(mmu_notifier_invalidate_range_start){+.+.}-{0:0}, at: unmap_vmas+0x0/0x1c0
[  594.158941]
               which lock already depends on the new lock.

[  594.158947]
               the existing dependency chain (in reverse order) is:
[  594.158953]
               -> #5 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}:
[  594.158961]        fs_reclaim_acquire+0x68/0xd0
[  594.158969]        __kmem_cache_alloc_node+0x2c/0x1b0
[  594.158975]        kmalloc_node_trace+0x1d/0xb0
[  594.158983]        alloc_worker+0x18/0x50
[  594.158989]        init_rescuer.part.0+0x13/0xa0
[  594.158995]        workqueue_init+0xdf/0x210
[  594.159001]        kernel_init_freeable+0x5c/0x2f0
[  594.159009]        kernel_init+0x11/0x1a0
[  594.159017]        ret_from_fork+0x29/0x50
[  594.159023]
               -> #4 (fs_reclaim){+.+.}-{0:0}:
[  594.159031]        fs_reclaim_acquire+0xa0/0xd0
[  594.159037]        __kmem_cache_alloc_node+0x2c/0x1b0
[  594.159042]        kmalloc_trace+0x20/0xb0
[  594.159048]        acpi_device_add+0x25a/0x3f0
[  594.159056]        acpi_add_single_object+0x387/0x750
[  594.159063]        acpi_bus_check_add+0x108/0x280
[  594.159069]        acpi_bus_scan+0x34/0xf0
[  594.159075]        acpi_scan_init+0xed/0x2b0
[  594.159082]        acpi_init+0x21e/0x520
[  594.159087]        do_one_initcall+0x53/0x260
[  594.159092]        kernel_init_freeable+0x18a/0x2f0
[  594.159099]        kernel_init+0x11/0x1a0
[  594.159105]        ret_from_fork+0x29/0x50
[  594.159110]
               -> #3 (acpi_device_lock){+.+.}-{3:3}:
[  594.159117]        __mutex_lock+0x95/0xd10
[  594.159122]        acpi_enable_wakeup_device_power+0x30/0x120
[  594.159130]        __acpi_device_wakeup_enable+0x34/0x110
[  594.159138]        acpi_pm_set_device_wakeup+0x55/0x140
[  594.159143]        __pci_enable_wake+0x56/0xb0
[  594.159150]        pci_finish_runtime_suspend+0x35/0x80
[  594.159157]        pci_pm_runtime_suspend+0xb5/0x1a0
[  594.159162]        __rpm_callback+0x3c/0x110
[  594.159170]        rpm_callback+0x58/0x70
[  594.159176]        rpm_suspend+0x15c/0x6f0
[  594.159182]        pm_runtime_work+0x9b/0xb0
[  594.159188]        process_one_work+0x263/0x520
[  594.159195]        worker_thread+0x4d/0x3b0
[  594.159200]        kthread+0xeb/0x120
[  594.159206]        ret_from_fork+0x29/0x50
[  594.159211]
               -> #2 (acpi_wakeup_lock){+.+.}-{3:3}:
[  594.159218]        __mutex_lock+0x95/0xd10
[  594.159223]        acpi_pm_set_device_wakeup+0x7a/0x140
[  594.159228]        __pci_enable_wake+0x77/0xb0
[  594.159234]        pci_pm_runtime_resume+0x70/0xd0
[  594.159240]        __rpm_callback+0x3c/0x110
[  594.159246]        rpm_callback+0x58/0x70
[  594.159252]        rpm_resume+0x50d/0x7a0
[  594.159258]        rpm_resume+0x267/0x7a0
[  594.159264]        __pm_runtime_resume+0x45/0x90
[  594.159270]        xe_pm_runtime_resume_and_get+0x12/0x50 [xe]
[  594.159314]        xe_device_mem_access_get+0x97/0xc0 [xe]
[  594.159346]        hw_engines+0x65/0xf0 [xe]
[  594.159380]        seq_read_iter+0x10d/0x4b0
[  594.159385]        seq_read+0x9e/0xd0
[  594.159390]        full_proxy_read+0x4e/0x80
[  594.159396]        vfs_read+0xb6/0x310
[  594.159401]        ksys_read+0x60/0xe0
[  594.159406]        do_syscall_64+0x38/0x90
[  594.159413]        entry_SYSCALL_64_after_hwframe+0x72/0xdc
[  594.159419]
               -> #1 (&xe->mem_access.lock){+.+.}-{3:3}:
[  594.159427]        xe_device_mem_access_get+0x43/0xc0 [xe]
[  594.159457]        xe_gt_tlb_invalidation_vma+0x53/0x190 [xe]
[  594.159490]        invalidation_fence_init+0x1d2/0x2c0 [xe]
[  594.159529]        __xe_pt_unbind_vma+0x151/0x4e0 [xe]
[  594.159564]        vm_bind_ioctl+0x48a/0xae0 [xe]
[  594.159602]        async_op_work_func+0x20c/0x530 [xe]
[  594.159634]        process_one_work+0x263/0x520
[  594.159640]        worker_thread+0x4d/0x3b0
[  594.159646]        kthread+0xeb/0x120
[  594.159650]        ret_from_fork+0x29/0x50
[  594.159655]
               -> #0 (&vm->userptr.notifier_lock){++++}-{3:3}:
[  594.159663]        __lock_acquire+0x16fa/0x2850
[  594.159670]        lock_acquire+0xd2/0x2e0
[  594.159676]        down_write+0x36/0xd0
[  594.159681]        vma_userptr_invalidate+0xeb/0x330 [xe]
[  594.159714]        __mmu_notifier_invalidate_range_start+0x239/0x2a0
[  594.159722]        unmap_vmas+0x1ac/0x1c0
[  594.159727]        unmap_region+0xb5/0x120
[  594.159732]        do_vmi_align_munmap+0x2be/0x430
[  594.159739]        do_vmi_munmap+0xea/0x120
[  594.159744]        __vm_munmap+0x9c/0x160
[  594.159750]        __x64_sys_munmap+0x12/0x20
[  594.159756]        do_syscall_64+0x38/0x90
[  594.159761]        entry_SYSCALL_64_after_hwframe+0x72/0xdc
[  594.159768]
               other info that might help us debug this:

[  594.159773] Chain exists of:
                 &vm->userptr.notifier_lock --> fs_reclaim -->
mmu_notifier_invalidate_range_start

[  594.159785]  Possible unsafe locking scenario:

[  594.159790]        CPU0                    CPU1
[  594.159794]        ----                    ----
[  594.159797]   lock(mmu_notifier_invalidate_range_start);
[  594.159802]                                lock(fs_reclaim);
[  594.159808]
lock(mmu_notifier_invalidate_range_start);
[  594.159814]   lock(&vm->userptr.notifier_lock);
[  594.159819]

The VM should be holding a mem_access.ref so this looks like it should
be a false positive and we can just drop the explicit mem_access in
xe_gt_tlb_invalidation().  The GGTT invalidation path also takes care to
hold mem_access.ref so should be fine there also, and we already assert
that we hold access.ref for the GuC communication underneath.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index e2b85559257c7..cad0ade595ec9 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -248,7 +248,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN	7
 	u32 action[MAX_TLB_INVALIDATION_LEN];
-	int len = 0, ret;
+	int len = 0;
 
 	XE_BUG_ON(!vma);
 
@@ -302,11 +302,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 
 	XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
 
-	xe_device_mem_access_get(xe);
-	ret = send_tlb_invalidation(&gt->uc.guc, fence, action, len);
-	xe_device_mem_access_put(xe);
-
-	return ret;
+	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
 
 /**
-- 
GitLab


From e3d2309250d49e4558b0abe95924b18f74995607 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 19 Jul 2023 09:38:12 +0100
Subject: [PATCH 1741/2340] drm/xe: add lockdep annotation for
 xe_device_mem_access_get()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The atomics here might hide potential issues, also rpm core is not
holding any lock when calling our rpm resume callback, so add a dummy lock
with the idea that xe_pm_runtime_resume() is eventually going to be
called when we are holding it. This only needs to happen once and then
lockdep can validate all callers and their locks.

v2: (Thomas Hellström)
 - Prefer static lockdep_map instead of full blown mutex.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 47401bb49958f..b1f36c986f0d0 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -34,6 +34,12 @@
 #include "xe_vm_madvise.h"
 #include "xe_wait_user_fence.h"
 
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map xe_device_mem_access_lockdep_map = {
+	.name = "xe_device_mem_access_lockdep_map"
+};
+#endif
+
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct xe_file *xef;
@@ -410,10 +416,28 @@ void xe_device_mem_access_get(struct xe_device *xe)
 	if (xe_pm_read_callback_task(xe) == current)
 		return;
 
+	/*
+	 * Since the resume here is synchronous it can be quite easy to deadlock
+	 * if we are not careful. Also in practice it might be quite timing
+	 * sensitive to ever see the 0 -> 1 transition with the callers locks
+	 * held, so deadlocks might exist but are hard for lockdep to ever see.
+	 * With this in mind, help lockdep learn about the potentially scary
+	 * stuff that can happen inside the runtime_resume callback by acquiring
+	 * a dummy lock (it doesn't protect anything and gets compiled out on
+	 * non-debug builds).  Lockdep then only needs to see the
+	 * mem_access_lockdep_map -> runtime_resume callback once, and then can
+	 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
+	 * For example if the (callers_locks) are ever grabbed in the
+	 * runtime_resume callback, lockdep should give us a nice splat.
+	 */
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
 	xe_pm_runtime_get(xe);
 	ref = atomic_inc_return(&xe->mem_access.ref);
 
 	XE_WARN_ON(ref == S32_MAX);
+
+	lock_map_release(&xe_device_mem_access_lockdep_map);
 }
 
 void xe_device_mem_access_put(struct xe_device *xe)
-- 
GitLab


From 6a0612aeabcce6c951788384b94d503b99eefaca Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 12 Jul 2023 17:28:39 +0100
Subject: [PATCH 1742/2340] drm/xe/selftests: hold rpm for
 evict_test_run_device()

We are calling fairly low level things like xe_bo_restore_kernel() which
expect caller to be holding mem_access.ref. Since we are doing stuff
like evict_all we likely don't want to race with rpm suspend, since that
potentially wants to do the same thing, so just wrap the whole test.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 5309204d8d1b1..a63f7a447ca45 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -12,6 +12,7 @@
 
 #include "xe_bo_evict.h"
 #include "xe_pci.h"
+#include "xe_pm.h"
 
 static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 			    bool clear, u64 get_val, u64 assign_val,
@@ -295,9 +296,13 @@ static int evict_test_run_device(struct xe_device *xe)
 		return 0;
 	}
 
+	xe_device_mem_access_get(xe);
+
 	for_each_gt(gt, xe, id)
 		evict_test_run_gt(xe, gt, test);
 
+	xe_device_mem_access_put(xe);
+
 	return 0;
 }
 
-- 
GitLab


From 939902913a25a0feaa9ca34969dd7e5b43fc2502 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 12 Jul 2023 16:27:21 +0100
Subject: [PATCH 1743/2340] drm/xe/selftests: hold rpm for ccs_test_migrate()

The GPU job will keep the device awake, however assumption here is that
caller of xe_migrate_clear() is also holding mem_access.ref otherwise we
hit the asserts in xe_sa_bo_flush_write() prior to the job construction.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index a63f7a447ca45..16e92400e5104 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -158,9 +158,13 @@ static int ccs_test_run_device(struct xe_device *xe)
 		return 0;
 	}
 
+	xe_device_mem_access_get(xe);
+
 	for_each_gt(gt, xe, id)
 		ccs_test_run_gt(xe, gt, test);
 
+	xe_device_mem_access_put(xe);
+
 	return 0;
 }
 
-- 
GitLab


From 5a142f9c675ab524a5f18457859ed2002507ea74 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 13 Jul 2023 10:13:33 +0100
Subject: [PATCH 1744/2340] drm/xe/selftests: restart GT after
 xe_bo_restore_kernel()

Test seems to be failing badly after calling xe_bo_restore_kernel().
Taking a snapshot of the CTB and copying back a potentially old version
seems risky, depending on what might have been inflight. Also it seems
snapshotting the ADS object and copying back results in serious
breakage. Normally when calling xe_bo_restore_kernel() we always fully
restart the GT, which re-intializes such things.  We could potentially
skip saving and restoring such objects in xe_bo_evict_all() however
seems quite fragile not to also restart the GT. Try to do that here by
triggering a GT reset.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 16e92400e5104..5d60dc6bfe711 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -218,7 +218,21 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 			goto cleanup_all;
 		}
 
+		xe_gt_sanitize(gt);
 		err = xe_bo_restore_kernel(xe);
+		/*
+		 * Snapshotting the CTB and copying back a potentially old
+		 * version seems risky, depending on what might have been
+		 * inflight. Also it seems snapshotting the ADS object and
+		 * copying back results in serious breakage. Normally when
+		 * calling xe_bo_restore_kernel() we always fully restart the
+		 * GT, which re-intializes such things.  We could potentially
+		 * skip saving and restoring such objects in xe_bo_evict_all()
+		 * however seems quite fragile not to also restart the GT. Try
+		 * to do that here by triggering a GT reset.
+		 */
+		xe_gt_reset_async(gt);
+		flush_work(&gt->reset.worker);
 		if (err) {
 			KUNIT_FAIL(test, "restore kernel err=%pe\n",
 				   ERR_PTR(err));
-- 
GitLab


From ee82d2da9c8ac13486550b2c86068e1d6edddf51 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 13 Jul 2023 10:00:49 +0100
Subject: [PATCH 1745/2340] drm/xe: add missing bulk_move reset

It looks like bulk_move is set during object construction, but is only
removed on object close, however in various places we might not yet have
an actual fd to close, like on the error paths for the gem_create ioctl,
and also one internal user for the evict_test_run_gt() selftest. Try to
handle those cases by manually resetting the bulk_move. This should
prevent triggering:

WARNING: CPU: 7 PID: 8252 at drivers/gpu/drm/ttm/ttm_bo.c:327
ttm_bo_release+0x25e/0x2a0 [ttm]

v2 (Nirmoy):
  - It should be safe to just unconditionally call
    __xe_bo_unset_bulk_move() in most places.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c |  7 +++++++
 drivers/gpu/drm/xe/xe_bo.c       | 26 +++++++++++++++++---------
 drivers/gpu/drm/xe/xe_bo.h       |  6 ++++++
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 5d60dc6bfe711..b32a9068d76c8 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -283,6 +283,10 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 		xe_bo_unlock(external, &ww);
 
 		xe_bo_put(external);
+
+		xe_bo_lock(bo, &ww, 0, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		continue;
 
@@ -293,6 +297,9 @@ cleanup_all:
 cleanup_external:
 		xe_bo_put(external);
 cleanup_bo:
+		xe_bo_lock(bo, &ww, 0, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_bo_unlock(bo, &ww);
 		xe_bo_put(bo);
 		break;
 	}
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 9ad5cf3e24633..a3bb14aa22345 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1327,6 +1327,7 @@ xe_bo_create_locked_range(struct xe_device *xe,
 	return bo;
 
 err_unlock_put_bo:
+	__xe_bo_unset_bulk_move(bo);
 	xe_bo_unlock_vm_held(bo);
 	xe_bo_put(bo);
 	return ERR_PTR(err);
@@ -1770,22 +1771,29 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
 	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
 			  bo_flags);
-	if (vm) {
-		xe_vm_unlock(vm, &ww);
-		xe_vm_put(vm);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_vm;
 	}
 
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
-	xe_bo_put(bo);
 	if (err)
-		return err;
+		goto out_bulk;
 
 	args->handle = handle;
+	goto out_put;
 
-	return 0;
+out_bulk:
+	if (vm && !xe_vm_in_fault_mode(vm))
+		__xe_bo_unset_bulk_move(bo);
+out_put:
+	xe_bo_put(bo);
+out_vm:
+	if (vm) {
+		xe_vm_unlock(vm, &ww);
+		xe_vm_put(vm);
+	}
+	return err;
 }
 
 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 53a82ff7bce25..3e98f3c0b85e4 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -144,6 +144,12 @@ static inline void xe_bo_put(struct xe_bo *bo)
 		drm_gem_object_put(&bo->ttm.base);
 }
 
+static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo)
+{
+	if (bo)
+		ttm_bo_set_bulk_move(&bo->ttm, NULL);
+}
+
 static inline void xe_bo_assert_held(struct xe_bo *bo)
 {
 	if (bo)
-- 
GitLab


From 0d39b6daa5455354c485cb4d521b08740456758e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 18 Jul 2023 12:39:23 -0700
Subject: [PATCH 1746/2340] drm/xe: Normalize XE_VM_FLAG* names

Rename XE_VM_FLAGS_64K to XE_VM_FLAG_64K to follow the other names and
s/GT/TILE/ that got missed in commit 08dea7674533 ("drm/xe: Move
migration from GT to tile").

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230718193924.3084759-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +-
 drivers/gpu/drm/xe/xe_migrate.c       | 8 ++++----
 drivers/gpu/drm/xe/xe_pt.c            | 4 ++--
 drivers/gpu/drm/xe/xe_vm.c            | 6 +++---
 drivers/gpu/drm/xe/xe_vm_types.h      | 4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index aedfb3dd559ea..30e5fdf6ca636 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
 	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0);
-	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
+	if (m->eng->vm->flags & XE_VM_FLAG_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
 		xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 0c233380d4f25..0515fbef8eece 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -201,7 +201,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
 
-		if (vm->flags & XE_VM_FLAGS_64K)
+		if (vm->flags & XE_VM_FLAG_64K)
 			i += 16;
 		else
 			i += 1;
@@ -213,7 +213,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		/* Write out batch too */
 		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
 		for (i = 0; i < batch->size;
-		     i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE :
+		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
 			entry = xe_pte_encode(NULL, batch, i,
 					      XE_CACHE_WB, 0);
@@ -239,7 +239,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	for (level = 1; level < num_level; level++) {
 		u32 flags = 0;
 
-		if (vm->flags & XE_VM_FLAGS_64K && level == 1)
+		if (vm->flags & XE_VM_FLAG_64K && level == 1)
 			flags = XE_PDE_64K;
 
 		entry = xe_pde_encode(bo, map_ofs + (level - 1) *
@@ -462,7 +462,7 @@ static void emit_pte(struct xe_migrate *m,
 			addr = xe_res_dma(cur) & PAGE_MASK;
 			if (is_vram) {
 				/* Is this a 64K PTE entry? */
-				if ((m->eng->vm->flags & XE_VM_FLAGS_64K) &&
+				if ((m->eng->vm->flags & XE_VM_FLAG_64K) &&
 				    !(cur_ofs & (16 * 8 - 1))) {
 					XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
 					addr |= XE_PTE_PS64;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 030fd911d1898..134c74545e8bc 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -341,7 +341,7 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
 	 * platforms where 64K pages are needed for VRAM.
 	 */
 	flags = XE_BO_CREATE_PINNED_BIT;
-	if (vm->flags & XE_VM_FLAGS_64K)
+	if (vm->flags & XE_VM_FLAG_64K)
 		flags |= XE_BO_CREATE_SYSTEM_BIT;
 	else
 		flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile);
@@ -761,7 +761,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.va_curs_start = xe_vma_start(vma),
 		.vma = vma,
 		.wupd.entries = entries,
-		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram,
+		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_vram,
 	};
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d4c6ddfce1ea6..668774081be7f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1264,11 +1264,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		goto err_close;
 
 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
-		vm->flags |= XE_VM_FLAGS_64K;
+		vm->flags |= XE_VM_FLAG_64K;
 
 	for_each_tile(tile, xe, id) {
 		if (flags & XE_VM_FLAG_MIGRATION &&
-		    tile->id != XE_VM_FLAG_GT_ID(flags))
+		    tile->id != XE_VM_FLAG_TILE_ID(flags))
 			continue;
 
 		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
@@ -2119,7 +2119,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
 {
 	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
-		XE_VM_FLAG_GT_ID(vm->flags) : 0;
+		XE_VM_FLAG_TILE_ID(vm->flags) : 0;
 
 	/* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
 	return &vm->pt_root[idx]->bo->ttm;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 8aca079006ba7..c1af0cd6aced1 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -136,14 +136,14 @@ struct xe_vm {
 	 * @flags: flags for this VM, statically setup a creation time aside
 	 * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely
 	 */
-#define XE_VM_FLAGS_64K			BIT(0)
+#define XE_VM_FLAG_64K			BIT(0)
 #define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
 #define XE_VM_FLAG_ASYNC_BIND_OPS	BIT(2)
 #define XE_VM_FLAG_MIGRATION		BIT(3)
 #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
 #define XE_VM_FLAG_FAULT_MODE		BIT(5)
 #define XE_VM_FLAG_BANNED		BIT(6)
-#define XE_VM_FLAG_GT_ID(flags)		(((flags) >> 7) & 0x3)
+#define XE_VM_FLAG_TILE_ID(flags)	(((flags) >> 7) & 0x3)
 #define XE_VM_FLAG_SET_TILE_ID(tile)	((tile)->id << 7)
 	unsigned long flags;
 
-- 
GitLab


From 4d18eac03212fc2d8c3d9715e2261ac50e989403 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 18 Jul 2023 12:39:24 -0700
Subject: [PATCH 1747/2340] drm/xe: Use FIELD_PREP/FIELD_GET for tile id
 encoding

Use FIELD_PREP()/FIELD_GET() to encode the tile id into flags. Besides
protecting for eventual overflow it also makes it easier to see a new
flag can't be added as BIT(7).

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230718193924.3084759-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index c1af0cd6aced1..1e3c7b98d775f 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -143,8 +143,8 @@ struct xe_vm {
 #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
 #define XE_VM_FLAG_FAULT_MODE		BIT(5)
 #define XE_VM_FLAG_BANNED		BIT(6)
-#define XE_VM_FLAG_TILE_ID(flags)	(((flags) >> 7) & 0x3)
-#define XE_VM_FLAG_SET_TILE_ID(tile)	((tile)->id << 7)
+#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(8, 7), flags)
+#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(8, 7), (tile)->id)
 	unsigned long flags;
 
 	/** @composite_fence_ctx: context composite fence */
-- 
GitLab


From 955c09e2cc4894b5997f548de1bd3bdfa18e60e4 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 19 Jul 2023 13:20:59 +0000
Subject: [PATCH 1748/2340] drm/xe: Rely on kmalloc/kzalloc log message

Those messages are unnecessary because a generic message is already
produced in case of allocation failure. Besides, this also removes a
misuse of the XE_IOCTL_DBG macro.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c  | 4 +---
 drivers/gpu/drm/xe/xe_guc_debugfs.c | 4 +---
 drivers/gpu/drm/xe/xe_huc_debugfs.c | 4 +---
 drivers/gpu/drm/xe/xe_query.c       | 8 ++++----
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index f9f653243f206..e622174a866de 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -173,10 +173,8 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	 */
 #define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
 	local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL);
-	if (!local) {
-		XE_WARN_ON("Couldn't allocate memory");
+	if (!local)
 		return;
-	}
 
 	memcpy(local, debugfs_list, DEBUGFS_SIZE);
 #undef DEBUGFS_SIZE
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index 0178b1a2d367f..b43c70de7e373 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -89,10 +89,8 @@ void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
 
 #define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
 	local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
-	if (!local) {
-		XE_WARN_ON("Couldn't allocate memory");
+	if (!local)
 		return;
-	}
 
 	memcpy(local, debugfs_list, DEBUGFS_SIZE);
 #undef DEBUGFS_SIZE
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
index ae3c21315d594..18585a7eeb9d4 100644
--- a/drivers/gpu/drm/xe/xe_huc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -55,10 +55,8 @@ void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
 
 #define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
 	local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
-	if (!local) {
-		XE_WARN_ON("Couldn't allocate memory");
+	if (!local)
 		return;
-	}
 
 	memcpy(local, debugfs_list, DEBUGFS_SIZE);
 #undef DEBUGFS_SIZE
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 4b7869596ba86..f880c9af1651a 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -65,7 +65,7 @@ static int query_engines(struct xe_device *xe,
 	}
 
 	hw_engine_info = kmalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_DBG(xe, !hw_engine_info))
+	if (!hw_engine_info)
 		return -ENOMEM;
 
 	for_each_gt(gt, xe, gt_id)
@@ -182,7 +182,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	}
 
 	config = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_DBG(xe, !config))
+	if (!config)
 		return -ENOMEM;
 
 	config->num_params = num_params;
@@ -231,7 +231,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 	}
 
 	gts = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_DBG(xe, !gts))
+	if (!gts)
 		return -ENOMEM;
 
 	gts->num_gt = xe->info.gt_count;
@@ -278,7 +278,7 @@ static int query_hwconfig(struct xe_device *xe,
 	}
 
 	hwconfig = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_DBG(xe, !hwconfig))
+	if (!hwconfig)
 		return -ENOMEM;
 
 	xe_device_mem_access_get(xe);
-- 
GitLab


From ea82d5aab53f8f13fa0834d0b4341ca0788c2a8f Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 19 Jul 2023 13:51:07 +0000
Subject: [PATCH 1749/2340] drm/xe/execlist: Remove leftover printk messages

Those look like leftover debug and are not even being used. If they were
real debug/info, they should be using the drm helpers.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_execlist.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index b0ccc4ff84614..b45594a45fe24 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -47,8 +47,6 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	struct xe_device *xe = gt_to_xe(gt);
 	u64 lrc_desc;
 
-	printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id);
-
 	lrc_desc = xe_lrc_descriptor(lrc);
 
 	if (GRAPHICS_VERx100(xe) >= 1250) {
@@ -125,9 +123,6 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port)
 	if (!port->running_exl)
 		return;
 
-	printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class,
-	       port->hwe->instance);
-
 	xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
 	__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
 	port->running_exl = NULL;
@@ -176,9 +171,6 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe)
 	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
 	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
 
-	printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class,
-	       hwe->instance, hi, lo);
-
 	return lo | (u64)hi << 32;
 }
 
-- 
GitLab


From 72e8d73b712d2232019b33d2331099d3071ea94a Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 19 Jul 2023 13:51:08 +0000
Subject: [PATCH 1750/2340] drm/xe: Cleanup style warnings and errors

Fix 6 errors and 20 warnings reported by checkpatch.pl.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c              |  2 +-
 drivers/gpu/drm/xe/xe_gt_mcr.c          |  2 +-
 drivers/gpu/drm/xe/xe_gt_sysfs.c        |  4 +---
 drivers/gpu/drm/xe/xe_guc.c             | 14 ++++++--------
 drivers/gpu/drm/xe/xe_hw_engine_types.h |  2 +-
 drivers/gpu/drm/xe/xe_map.h             |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c         |  2 +-
 drivers/gpu/drm/xe/xe_reg_whitelist.c   | 24 ++++++++++++++++++------
 drivers/gpu/drm/xe/xe_res_cursor.h      | 11 +++++------
 drivers/gpu/drm/xe/xe_wopcm.c           | 10 ++++------
 10 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a3bb14aa22345..49c80e95222b0 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1831,7 +1831,7 @@ int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
 	XE_BUG_ON(!ww);
 
 	tv_bo.num_shared = num_resv;
-	tv_bo.bo = &bo->ttm;;
+	tv_bo.bo = &bo->ttm;
 	list_add_tail(&tv_bo.head, &objs);
 
 	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index ff40753875649..c56815af06866 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -273,7 +273,7 @@ static void init_steering_inst0(struct xe_gt *gt)
 
 static const struct {
 	const char *name;
-	void (*init)(struct xe_gt *);
+	void (*init)(struct xe_gt *gt);
 } xe_steering_types[] = {
 	[L3BANK] =	{ "L3BANK",	init_steering_l3bank },
 	[MSLICE] =	{ "MSLICE",	init_steering_mslice },
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index b955940e8dc6d..c69d2e8a0fe1e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -37,10 +37,8 @@ void xe_gt_sysfs_init(struct xe_gt *gt)
 	int err;
 
 	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
-	if (!kg) {
-		drm_warn(&xe->drm, "Allocating kobject failed.\n");
+	if (!kg)
 		return;
-	}
 
 	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
 	kg->gt = gt;
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index ed90d738d6739..8ae0268387020 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -396,14 +396,12 @@ static int guc_wait_ucode(struct xe_guc *guc)
 		struct drm_printer p = drm_info_printer(drm->dev);
 
 		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
-		drm_info(drm, "GuC load failed: status: Reset = %d, "
-			"BootROM = 0x%02X, UKernel = 0x%02X, "
-			"MIA = 0x%02X, Auth = 0x%02X\n",
-			REG_FIELD_GET(GS_MIA_IN_RESET, status),
-			REG_FIELD_GET(GS_BOOTROM_MASK, status),
-			REG_FIELD_GET(GS_UKERNEL_MASK, status),
-			REG_FIELD_GET(GS_MIA_MASK, status),
-			REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+		drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+			 REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			 REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			 REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			 REG_FIELD_GET(GS_MIA_MASK, status),
+			 REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
 
 		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
 			drm_info(drm, "GuC firmware signature verification failed\n");
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index d788e67312b99..803d557cf5aa7 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -104,7 +104,7 @@ struct xe_hw_engine {
 	/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
 	struct xe_hw_fence_irq *fence_irq;
 	/** @irq_handler: IRQ handler to run when hw engine IRQ is received */
-	void (*irq_handler)(struct xe_hw_engine *, u16);
+	void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec);
 	/** @engine_id: id  for this hw engine */
 	enum xe_hw_engine_id engine_id;
 };
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
index 032c2e8b54386..f62e0c8b67aba 100644
--- a/drivers/gpu/drm/xe/xe_map.h
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: MIT
+/* SPDX-License-Identifier: MIT */
 /*
  * Copyright © 2022 Intel Corporation
  */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 0515fbef8eece..827938b666c55 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -475,7 +475,7 @@ static void emit_pte(struct xe_migrate *m,
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 
-			xe_res_next(cur, min(size, (u32)PAGE_SIZE));
+			xe_res_next(cur, min_t(u32, size, PAGE_SIZE));
 			cur_ofs += 8;
 		}
 	}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index ea6dd7d71b591..e83781f9a516b 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -89,18 +89,30 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
 	deny = val & RING_FORCE_TO_NONPRIV_DENY;
 
 	switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) {
-	case RING_FORCE_TO_NONPRIV_RANGE_4: range_bit = 4; break;
-	case RING_FORCE_TO_NONPRIV_RANGE_16: range_bit = 6; break;
-	case RING_FORCE_TO_NONPRIV_RANGE_64: range_bit = 8; break;
+	case RING_FORCE_TO_NONPRIV_RANGE_4:
+		range_bit = 4;
+		break;
+	case RING_FORCE_TO_NONPRIV_RANGE_16:
+		range_bit = 6;
+		break;
+	case RING_FORCE_TO_NONPRIV_RANGE_64:
+		range_bit = 8;
+		break;
 	}
 
 	range_start = reg & REG_GENMASK(25, range_bit);
 	range_end = range_start | REG_GENMASK(range_bit, 0);
 
 	switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
-	case RING_FORCE_TO_NONPRIV_ACCESS_RW: access_str = "rw"; break;
-	case RING_FORCE_TO_NONPRIV_ACCESS_RD: access_str = "read"; break;
-	case RING_FORCE_TO_NONPRIV_ACCESS_WR: access_str = "write"; break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_RW:
+		access_str = "rw";
+		break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_RD:
+		access_str = "read";
+		break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_WR:
+		access_str = "write";
+		break;
 	}
 
 	drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n",
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 2a6fdd284395b..dda963fe33009 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -51,15 +51,14 @@ struct xe_res_cursor {
 static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+	struct ttm_resource_manager *mgr;
 
-	if (res->mem_type != XE_PL_STOLEN) {
+	if (res->mem_type != XE_PL_STOLEN)
 		return &xe->tiles[res->mem_type - XE_PL_VRAM0].mem.vram_mgr->mm;
-	} else {
-		struct ttm_resource_manager *mgr =
-			ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
 
-		return &to_xe_ttm_vram_mgr(mgr)->mm;
-	}
+	mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+
+	return &to_xe_ttm_vram_mgr(mgr)->mm;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index 35fde8965bca9..d9acf8783b838 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -47,12 +47,10 @@
  */
 
 /* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
-#define DGFX_WOPCM_SIZE			SZ_4M	/* FIXME: Larger size require
-						   for 2 tile PVC, do a proper
-						   probe sooner or later */
-#define MTL_WOPCM_SIZE			SZ_4M	/* FIXME: Larger size require
-						   for MTL, do a proper probe
-						   sooner or later */
+/* FIXME: Larger size require for 2 tile PVC, do a proper probe sooner or later */
+#define DGFX_WOPCM_SIZE			SZ_4M
+/* FIXME: Larger size require for MTL, do a proper probe sooner or later */
+#define MTL_WOPCM_SIZE			SZ_4M
 #define GEN11_WOPCM_SIZE		SZ_2M
 
 #define GEN12_MAX_WOPCM_SIZE            SZ_8M
-- 
GitLab


From 0043a3e8a1f57e3aca91d4a99ff49031416119b6 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 19 Jul 2023 18:57:07 +0000
Subject: [PATCH 1751/2340] drm/xe/execlist: Log when using execlist submission

Make explicit in the log that execlist submission is used to prevent from
silently using it over GuC submission.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_execlist.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index b45594a45fe24..b15d095b395bd 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -318,9 +318,12 @@ static int execlist_engine_init(struct xe_engine *e)
 {
 	struct drm_gpu_scheduler *sched;
 	struct xe_execlist_engine *exl;
+	struct xe_device *xe = gt_to_xe(e->gt);
 	int err;
 
-	XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+	XE_BUG_ON(xe_device_guc_submission_enabled(xe));
+
+	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
 
 	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
 	if (!exl)
-- 
GitLab


From 845f64bdbfc96cefd7070621b18ff8f50c7857fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Sun, 9 Jul 2023 09:54:59 -0700
Subject: [PATCH 1752/2340] drm/xe: Introduce a range-fence utility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add generic utility to track range conflicts signaled by a dma-fence.
Tracking implemented via an interval tree. An example use case being
tracking conflicts for pending (un)binds from multiple bind engines. By
being generic ths idea would this could moved to the DRM level and used
in multiple drivers for similar problems.

v2: Make interval tree functions static (CI)
v3: Remove non-static cleanup function (CI)

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile         |   1 +
 drivers/gpu/drm/xe/xe_range_fence.c | 156 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_range_fence.h |  75 +++++++++++++
 3 files changed, 232 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_range_fence.c
 create mode 100644 drivers/gpu/drm/xe/xe_range_fence.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e5fb874a7aaf6..312e643397bad 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -95,6 +95,7 @@ xe-y += xe_bb.o \
 	xe_pt.o \
 	xe_pt_walk.o \
 	xe_query.o \
+	xe_range_fence.o \
 	xe_reg_sr.o \
 	xe_reg_whitelist.o \
 	xe_rtp.o \
diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c
new file mode 100644
index 0000000000000..d35d9ec58e86f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_range_fence.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/slab.h>
+
+#include "xe_macros.h"
+#include "xe_range_fence.h"
+
+#define XE_RANGE_TREE_START(_node)	((_node)->start)
+#define XE_RANGE_TREE_LAST(_node)	((_node)->last)
+
+INTERVAL_TREE_DEFINE(struct xe_range_fence, rb, u64, __subtree_last,
+		     XE_RANGE_TREE_START, XE_RANGE_TREE_LAST, static,
+		     xe_range_fence_tree);
+
+static void
+xe_range_fence_signal_notify(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct xe_range_fence *rfence = container_of(cb, typeof(*rfence), cb);
+	struct xe_range_fence_tree *tree = rfence->tree;
+
+	llist_add(&rfence->link, &tree->list);
+}
+
+static bool __xe_range_fence_tree_cleanup(struct xe_range_fence_tree *tree)
+{
+	struct llist_node *node = llist_del_all(&tree->list);
+	struct xe_range_fence *rfence, *next;
+
+	llist_for_each_entry_safe(rfence, next, node, link) {
+		xe_range_fence_tree_remove(rfence, &tree->root);
+		dma_fence_put(rfence->fence);
+		kfree(rfence);
+	}
+
+	return !!node;
+}
+
+/**
+ * xe_range_fence_insert() - range fence insert
+ * @tree: range fence tree to insert intoi
+ * @rfence: range fence
+ * @ops: range fence ops
+ * @start: start address of range fence
+ * @last: last address of range fence
+ * @fence: dma fence which signals range fence can be removed + freed
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int xe_range_fence_insert(struct xe_range_fence_tree *tree,
+			  struct xe_range_fence *rfence,
+			  const struct xe_range_fence_ops *ops,
+			  u64 start, u64 last, struct dma_fence *fence)
+{
+	int err = 0;
+
+	__xe_range_fence_tree_cleanup(tree);
+
+	if (dma_fence_is_signaled(fence))
+		goto free;
+
+	rfence->ops = ops;
+	rfence->start = start;
+	rfence->last = last;
+	rfence->tree = tree;
+	rfence->fence = dma_fence_get(fence);
+	err = dma_fence_add_callback(fence, &rfence->cb,
+				     xe_range_fence_signal_notify);
+	if (err == -ENOENT) {
+		dma_fence_put(fence);
+		err = 0;
+		goto free;
+	} else if (err == 0) {
+		xe_range_fence_tree_insert(rfence, &tree->root);
+		return 0;
+	}
+
+free:
+	if (ops->free)
+		ops->free(rfence);
+
+	return err;
+}
+
+static void xe_range_fence_tree_remove_all(struct xe_range_fence_tree *tree)
+{
+	struct xe_range_fence *rfence;
+	bool retry = true;
+
+	rfence = xe_range_fence_tree_iter_first(&tree->root, 0, U64_MAX);
+	while (rfence) {
+		/* Should be ok with the minimalistic callback */
+		if (dma_fence_remove_callback(rfence->fence, &rfence->cb))
+			llist_add(&rfence->link, &tree->list);
+		rfence = xe_range_fence_tree_iter_next(rfence, 0, U64_MAX);
+	}
+
+	while (retry)
+		retry = __xe_range_fence_tree_cleanup(tree);
+}
+
+/**
+ * xe_range_fence_tree_init() - Init range fence tree
+ * @tree: range fence tree
+ */
+void xe_range_fence_tree_init(struct xe_range_fence_tree *tree)
+{
+	memset(tree, 0, sizeof(*tree));
+}
+
+/**
+ * xe_range_fence_tree_fini() - Fini range fence tree
+ * @tree: range fence tree
+ */
+void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree)
+{
+	xe_range_fence_tree_remove_all(tree);
+	XE_WARN_ON(!RB_EMPTY_ROOT(&tree->root.rb_root));
+}
+
+/**
+ * xe_range_fence_tree_first() - range fence tree iterator first
+ * @tree: range fence tree
+ * @start: start address of range fence
+ * @last: last address of range fence
+ *
+ * Return: first range fence found in range or NULL
+ */
+struct xe_range_fence *
+xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start,
+			  u64 last)
+{
+	return xe_range_fence_tree_iter_first(&tree->root, start, last);
+}
+
+/**
+ * xe_range_fence_tree_next() - range fence tree iterator next
+ * @rfence: current range fence
+ * @start: start address of range fence
+ * @last: last address of range fence
+ *
+ * Return: next range fence found in range or NULL
+ */
+struct xe_range_fence *
+xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last)
+{
+	return xe_range_fence_tree_iter_next(rfence, start, last);
+}
+
+const struct xe_range_fence_ops xe_range_fence_kfree_ops = {
+	.free = (void (*)(struct xe_range_fence *rfence)) kfree,
+};
diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h
new file mode 100644
index 0000000000000..edd58b34f5c0b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_range_fence.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_RANGE_FENCE_H_
+#define _XE_RANGE_FENCE_H_
+
+#include <linux/dma-fence.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct xe_range_fence_tree;
+struct xe_range_fence;
+
+/** struct xe_range_fence_ops - XE range fence ops */
+struct xe_range_fence_ops {
+	/** @free: free range fence op */
+	void (*free)(struct xe_range_fence *rfence);
+};
+
+/** struct xe_range_fence - XE range fence (address conflict tracking) */
+struct xe_range_fence {
+	/** @rb: RB tree node inserted into interval tree */
+	struct rb_node rb;
+	/** @start: start address of range fence is interval tree */
+	u64 start;
+	/** @last: last address (inclusive) of range fence is interval tree */
+	u64 last;
+	/** @__subtree_last: interval tree internal usage */
+	u64 __subtree_last;
+	/**
+	 * @fence: fence signals address in range fence no longer has conflict
+	 */
+	struct dma_fence *fence;
+	/** @tree: interval tree which range fence belongs to */
+	struct xe_range_fence_tree *tree;
+	/**
+	 * @cb: callback when fence signals to remove range fence free from interval tree
+	 */
+	struct dma_fence_cb cb;
+	/** @link: used to defer free of range fence to non-irq context */
+	struct llist_node link;
+	/** @ops: range fence ops */
+	const struct xe_range_fence_ops *ops;
+};
+
+/** struct xe_range_fence_tree - interval tree to store range fences */
+struct xe_range_fence_tree {
+	/** @root: interval tree root */
+	struct rb_root_cached root;
+	/** @list: list of pending range fences to be freed */
+	struct llist_head list;
+};
+
+extern const struct xe_range_fence_ops xe_range_fence_kfree_ops;
+
+struct xe_range_fence *
+xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start,
+			  u64 last);
+
+struct xe_range_fence *
+xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last);
+
+void xe_range_fence_tree_init(struct xe_range_fence_tree *tree);
+
+void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree);
+
+int xe_range_fence_insert(struct xe_range_fence_tree *tree,
+			  struct xe_range_fence *rfence,
+			  const struct xe_range_fence_ops *ops,
+			  u64 start, u64 end,
+			  struct dma_fence *fence);
+
+#endif
-- 
GitLab


From fd84041d094ce8feb730911ca9c7fdfff1d4fb94 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 14:10:11 -0700
Subject: [PATCH 1753/2340] drm/xe: Make bind engines safe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We currently have a race between bind engines which can result in
corrupted page tables leading to faults.

A simple example:
bind A 0x0000-0x1000, engine A, has unsatisfied in-fence
bind B 0x1000-0x2000, engine B, no in-fences
exec A uses 0x1000-0x2000

Bind B will pass bind A and exec A will fault. This occurs as bind A
programs the root of the page table in a bind job which is held up by an
in-fence. Bind B in this case just programs a leaf entry of the
structure.

To fix use range-fence utility to track cross bind engine conflicts. In
the above example bind A would insert an dependency into the range-fence
tree with a key of 0x0-0x7fffffffff, bind B would find that dependency
and its bind job would scheduled behind the unsatisfied in-fence and
bind A's job.

Reviewed-by: Maarten Lankhorst<maarten.lankhorst@linux.intel.com>
Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c  |   2 +
 drivers/gpu/drm/xe/xe_migrate.h  |   8 +++
 drivers/gpu/drm/xe/xe_pt.c       | 115 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_vm.c       |   8 +++
 drivers/gpu/drm/xe/xe_vm_types.h |   7 ++
 5 files changed, 140 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 827938b666c55..d64567403068f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1074,6 +1074,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 		return ERR_PTR(-ETIME);
 
 	if (ops->pre_commit) {
+		pt_update->job = NULL;
 		err = ops->pre_commit(pt_update);
 		if (err)
 			return ERR_PTR(err);
@@ -1295,6 +1296,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		goto err_job;
 
 	if (ops->pre_commit) {
+		pt_update->job = job;
 		err = ops->pre_commit(pt_update);
 		if (err)
 			goto err_job;
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 204337ea3b4ed..0d62aff6421c5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -69,6 +69,14 @@ struct xe_migrate_pt_update {
 	const struct xe_migrate_pt_update_ops *ops;
 	/** @vma: The vma we're updating the pagetable for. */
 	struct xe_vma *vma;
+	/** @job: The job if a GPU page-table update. NULL otherwise */
+	struct xe_sched_job *job;
+	/** @start: Start of update for the range fence */
+	u64 start;
+	/** @last: Last of update for the range fence */
+	u64 last;
+	/** @tile_id: Tile ID of the update */
+	u8 tile_id;
 };
 
 struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 134c74545e8bc..0ca7853fc70a9 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1116,6 +1116,53 @@ struct xe_pt_migrate_pt_update {
 	bool locked;
 };
 
+/*
+ * This function adds the needed dependencies to a page-table update job
+ * to make sure racing jobs for separate bind engines don't race writing
+ * to the same page-table range, wreaking havoc. Initially use a single
+ * fence for the entire VM. An optimization would use smaller granularity.
+ */
+static int xe_pt_vm_dependencies(struct xe_sched_job *job,
+				 struct xe_range_fence_tree *rftree,
+				 u64 start, u64 last)
+{
+	struct xe_range_fence *rtfence;
+	struct dma_fence *fence;
+	int err;
+
+	rtfence = xe_range_fence_tree_first(rftree, start, last);
+	while (rtfence) {
+		fence = rtfence->fence;
+
+		if (!dma_fence_is_signaled(fence)) {
+			/*
+			 * Is this a CPU update? GPU is busy updating, so return
+			 * an error
+			 */
+			if (!job)
+				return -ETIME;
+
+			dma_fence_get(fence);
+			err = drm_sched_job_add_dependency(&job->drm, fence);
+			if (err)
+				return err;
+		}
+
+		rtfence = xe_range_fence_tree_next(rtfence, start, last);
+	}
+
+	return 0;
+}
+
+static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_range_fence_tree *rftree =
+		&xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id];
+
+	return xe_pt_vm_dependencies(pt_update->job, rftree,
+				     pt_update->start, pt_update->last);
+}
+
 static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
 {
 	struct xe_pt_migrate_pt_update *userptr_update =
@@ -1123,6 +1170,13 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
 	struct xe_vma *vma = pt_update->vma;
 	unsigned long notifier_seq = vma->userptr.notifier_seq;
 	struct xe_vm *vm = xe_vma_vm(vma);
+	int err = xe_pt_vm_dependencies(pt_update->job,
+					&vm->rftree[pt_update->tile_id],
+					pt_update->start,
+					pt_update->last);
+
+	if (err)
+		return err;
 
 	userptr_update->locked = false;
 
@@ -1161,6 +1215,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
 
 static const struct xe_migrate_pt_update_ops bind_ops = {
 	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_pre_commit,
 };
 
 static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
@@ -1258,6 +1313,27 @@ static int invalidation_fence_init(struct xe_gt *gt,
 	return ret && ret != -ENOENT ? ret : 0;
 }
 
+static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
+				       struct xe_pt_migrate_pt_update *update,
+				       struct xe_vm_pgtable_update *entries,
+				       u32 num_entries)
+{
+	int i, level = 0;
+
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+
+		if (entry->pt->level > level)
+			level = entry->pt->level;
+	}
+
+	/* Greedy (non-optimal) calculation but simple */
+	update->base.start = ALIGN_DOWN(xe_vma_start(vma),
+					0x1ull << xe_pt_shift(level));
+	update->base.last = ALIGN(xe_vma_end(vma),
+				  0x1ull << xe_pt_shift(level)) - 1;
+}
+
 /**
  * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
  * address range.
@@ -1290,6 +1366,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		.base = {
 			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
 			.vma = vma,
+			.tile_id = tile->id,
 		},
 		.bind = true,
 	};
@@ -1297,6 +1374,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	u32 num_entries;
 	struct dma_fence *fence;
 	struct invalidation_fence *ifence = NULL;
+	struct xe_range_fence *rfence;
 	int err;
 
 	bind_pt_update.locked = false;
@@ -1313,6 +1391,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
+	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
+				   num_entries);
 
 	/*
 	 * If rebind, we have to invalidate TLB on !LR vms to invalidate
@@ -1333,6 +1413,12 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 			return ERR_PTR(-ENOMEM);
 	}
 
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		kfree(ifence);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	fence = xe_migrate_update_pgtables(tile->migrate,
 					   vm, xe_vma_bo(vma),
 					   e ? e : vm->eng[tile->id],
@@ -1342,6 +1428,14 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	if (!IS_ERR(fence)) {
 		bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
 		LLIST_HEAD(deferred);
+		int err;
+
+		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+					    &xe_range_fence_kfree_ops,
+					    bind_pt_update.base.start,
+					    bind_pt_update.base.last, fence);
+		if (err)
+			dma_fence_wait(fence, false);
 
 		/* TLB invalidation must be done before signaling rebind */
 		if (ifence) {
@@ -1380,6 +1474,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 			queue_work(vm->xe->ordered_wq,
 				   &vm->preempt.rebind_work);
 	} else {
+		kfree(rfence);
 		kfree(ifence);
 		if (bind_pt_update.locked)
 			up_read(&vm->userptr.notifier_lock);
@@ -1589,6 +1684,7 @@ xe_pt_commit_unbind(struct xe_vma *vma,
 
 static const struct xe_migrate_pt_update_ops unbind_ops = {
 	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_pre_commit,
 };
 
 static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
@@ -1626,12 +1722,15 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 			.ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
 			&unbind_ops,
 			.vma = vma,
+			.tile_id = tile->id,
 		},
 	};
 	struct xe_vm *vm = xe_vma_vm(vma);
 	u32 num_entries;
 	struct dma_fence *fence = NULL;
 	struct invalidation_fence *ifence;
+	struct xe_range_fence *rfence;
+
 	LLIST_HEAD(deferred);
 
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -1645,11 +1744,19 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
+	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
+				   num_entries);
 
 	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
 	if (!ifence)
 		return ERR_PTR(-ENOMEM);
 
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		kfree(ifence);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/*
 	 * Even if we were already evicted and unbind to destroy, we need to
 	 * clear again here. The eviction may have updated pagetables at a
@@ -1664,6 +1771,13 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 	if (!IS_ERR(fence)) {
 		int err;
 
+		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+					    &xe_range_fence_kfree_ops,
+					    unbind_pt_update.base.start,
+					    unbind_pt_update.base.last, fence);
+		if (err)
+			dma_fence_wait(fence, false);
+
 		/* TLB invalidation must be done before signaling unbind */
 		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
 		if (err) {
@@ -1685,6 +1799,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 				    unbind_pt_update.locked ? &deferred : NULL);
 		vma->tile_present &= ~BIT(tile->id);
 	} else {
+		kfree(rfence);
 		kfree(ifence);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 668774081be7f..8f1ad906d4b0a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1243,6 +1243,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	INIT_LIST_HEAD(&vm->preempt.engines);
 	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
 
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_init(&vm->rftree[id]);
+
 	INIT_LIST_HEAD(&vm->extobj.list);
 
 	if (!(flags & XE_VM_FLAG_MIGRATION))
@@ -1356,6 +1359,8 @@ err_close:
 	return ERR_PTR(err);
 
 err_no_resv:
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
 	kfree(vm);
 	if (!(flags & XE_VM_FLAG_MIGRATION))
 		xe_device_mem_access_put(xe);
@@ -1500,6 +1505,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		xe->usm.num_vm_in_non_fault_mode--;
 	mutex_unlock(&xe->usm.lock);
 
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+
 	xe_vm_put(vm);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1e3c7b98d775f..f571571d966ac 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -15,6 +15,7 @@
 
 #include "xe_device_types.h"
 #include "xe_pt_types.h"
+#include "xe_range_fence.h"
 
 struct async_op_fence;
 struct xe_bo;
@@ -175,6 +176,12 @@ struct xe_vm {
 	 */
 	struct work_struct destroy_work;
 
+	/**
+	 * @rftree: range fence tree to track updates to page table structure.
+	 * Used to implement conflict tracking between independent bind engines.
+	 */
+	struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
+
 	/** @extobj: bookkeeping for external objects. Protected by the vm lock */
 	struct {
 		/** @enties: number of external BOs attached this VM */
-- 
GitLab


From 3188c0f4c893ce1b232cdf8a3e26ff6139079908 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 14:31:21 -0700
Subject: [PATCH 1754/2340] drm/xe: Remove xe_vma_op_unmap

xe_vma_op_unmap isn't used, remove it.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       |  5 -----
 drivers/gpu/drm/xe/xe_vm_types.h | 10 ----------
 2 files changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 8f1ad906d4b0a..c2e8d231b9eb0 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2520,11 +2520,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 					xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va));
 				break;
 			case DRM_GPUVA_OP_UNMAP:
-				op->unmap.start =
-					xe_vma_start(gpuva_to_vma(op->base.unmap.va));
-				op->unmap.range =
-					xe_vma_size(gpuva_to_vma(op->base.unmap.va));
-				break;
 			case DRM_GPUVA_OP_PREFETCH:
 				/* Nothing to do */
 				break;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index f571571d966ac..21dd0433a6a07 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -326,14 +326,6 @@ struct xe_vma_op_map {
 	bool is_null;
 };
 
-/** struct xe_vma_op_unmap - VMA unmap operation */
-struct xe_vma_op_unmap {
-	/** @start: start of the VMA unmap */
-	u64 start;
-	/** @range: range of the VMA unmap */
-	u64 range;
-};
-
 /** struct xe_vma_op_remap - VMA remap operation */
 struct xe_vma_op_remap {
 	/** @prev: VMA preceding part of a split mapping */
@@ -401,8 +393,6 @@ struct xe_vma_op {
 	union {
 		/** @map: VMA map operation specific data */
 		struct xe_vma_op_map map;
-		/** @unmap: VMA unmap operation specific data */
-		struct xe_vma_op_unmap unmap;
 		/** @remap: VMA remap operation specific data */
 		struct xe_vma_op_remap remap;
 		/** @prefetch: VMA prefetch operation specific data */
-- 
GitLab


From 8f33b4f054fc29a4774d8d10116ef460faeb84a8 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 14:46:01 -0700
Subject: [PATCH 1755/2340] drm/xe: Avoid doing rebinds

If we dont change page sizes we can avoid doing rebinds rather just do a
partial unbind. The algorithm to determine its page size is greedy as we
assume all pages in the removed VMA are the largest page used in the
VMA.

v2: Don't exceed 100 lines
v3: struct xe_vma_op_unmap remove in different patch, remove XXX comment

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c       |  1 +
 drivers/gpu/drm/xe/xe_vm.c       | 72 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_vm_types.h |  7 ++++
 3 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 0ca7853fc70a9..bc7fa5a850e28 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -668,6 +668,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		if (!is_null)
 			xe_res_next(curs, next - addr);
 		xe_walk->va_curs_start = next;
+		xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
 		*action = ACTION_CONTINUE;
 
 		return ret;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c2e8d231b9eb0..bdff1999058f9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2396,6 +2396,16 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 	return vma;
 }
 
+static u64 xe_vma_max_pte_size(struct xe_vma *vma)
+{
+	if (vma->gpuva.flags & XE_VMA_PTE_1G)
+		return SZ_1G;
+	else if (vma->gpuva.flags & XE_VMA_PTE_2M)
+		return SZ_2M;
+
+	return SZ_4K;
+}
+
 /*
  * Parse operations list and create any resources needed for the operations
  * prior to fully committing to the operations. This setup can fail.
@@ -2472,6 +2482,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 				break;
 			}
 			case DRM_GPUVA_OP_REMAP:
+			{
+				struct xe_vma *old =
+					gpuva_to_vma(op->base.remap.unmap->va);
+
+				op->remap.start = xe_vma_start(old);
+				op->remap.range = xe_vma_size(old);
+
 				if (op->base.remap.prev) {
 					struct xe_vma *vma;
 					bool read_only =
@@ -2490,6 +2507,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 					}
 
 					op->remap.prev = vma;
+
+					/*
+					 * Userptr creates a new SG mapping so
+					 * we must also rebind.
+					 */
+					op->remap.skip_prev = !xe_vma_is_userptr(old) &&
+						IS_ALIGNED(xe_vma_end(vma),
+							   xe_vma_max_pte_size(old));
+					if (op->remap.skip_prev) {
+						op->remap.range -=
+							xe_vma_end(vma) -
+							xe_vma_start(old);
+						op->remap.start = xe_vma_end(vma);
+					}
 				}
 
 				if (op->base.remap.next) {
@@ -2511,14 +2542,21 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 					}
 
 					op->remap.next = vma;
-				}
 
-				/* XXX: Support no doing remaps */
-				op->remap.start =
-					xe_vma_start(gpuva_to_vma(op->base.remap.unmap->va));
-				op->remap.range =
-					xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va));
+					/*
+					 * Userptr creates a new SG mapping so
+					 * we must also rebind.
+					 */
+					op->remap.skip_next = !xe_vma_is_userptr(old) &&
+						IS_ALIGNED(xe_vma_start(vma),
+							   xe_vma_max_pte_size(old));
+					if (op->remap.skip_next)
+						op->remap.range -=
+							xe_vma_end(old) -
+							xe_vma_start(vma);
+				}
 				break;
+			}
 			case DRM_GPUVA_OP_UNMAP:
 			case DRM_GPUVA_OP_PREFETCH:
 				/* Nothing to do */
@@ -2561,10 +2599,23 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 	case DRM_GPUVA_OP_REMAP:
 		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
 				 true);
-		if (op->remap.prev)
+
+		if (op->remap.prev) {
 			err |= xe_vm_insert_vma(vm, op->remap.prev);
-		if (op->remap.next)
+			if (!err && op->remap.skip_prev)
+				op->remap.prev = NULL;
+		}
+		if (op->remap.next) {
 			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err && op->remap.skip_next)
+				op->remap.next = NULL;
+		}
+
+		/* Adjust for partial unbind after removin VMA from VM */
+		if (!err) {
+			op->base.remap.unmap->va->va.addr = op->remap.start;
+			op->base.remap.unmap->va->va.range = op->remap.range;
+		}
 		break;
 	case DRM_GPUVA_OP_UNMAP:
 		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
@@ -2634,9 +2685,10 @@ again:
 		bool next = !!op->remap.next;
 
 		if (!op->remap.unmap_done) {
-			vm->async_ops.munmap_rebind_inflight = true;
-			if (prev || next)
+			if (prev || next) {
+				vm->async_ops.munmap_rebind_inflight = true;
 				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
+			}
 			err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
 					   op->num_syncs,
 					   !prev && !next ? op->fence : NULL,
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 21dd0433a6a07..a7386e8c28702 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -30,6 +30,9 @@ struct xe_vm;
 #define XE_VMA_ATOMIC_PTE_BIT	(DRM_GPUVA_USERBITS << 2)
 #define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
 #define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
+#define XE_VMA_PTE_4K		(DRM_GPUVA_USERBITS << 5)
+#define XE_VMA_PTE_2M		(DRM_GPUVA_USERBITS << 6)
+#define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 7)
 
 struct xe_vma {
 	/** @gpuva: Base GPUVA object */
@@ -336,6 +339,10 @@ struct xe_vma_op_remap {
 	u64 start;
 	/** @range: range of the VMA unmap */
 	u64 range;
+	/** @skip_prev: skip prev rebind */
+	bool skip_prev;
+	/** @skip_next: skip next rebind */
+	bool skip_next;
 	/** @unmap_done: unmap operation in done */
 	bool unmap_done;
 };
-- 
GitLab


From 1655c893af08997175e3404039e79f384c925ee3 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 20:44:25 -0700
Subject: [PATCH 1756/2340] drm/xe: Reduce the number list links in xe_vma

Combine the userptr, rebind, and destroy links into a union as
the lists these links belong to are mutually exclusive.

v2: Adjust which lists are combined (Thomas H)
v3: Add kernel doc why this is safe (Thomas H), remove related change
of list_del_init -> list_del (Rodrigo)

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       |  6 +++--
 drivers/gpu/drm/xe/xe_exec.c     |  2 +-
 drivers/gpu/drm/xe/xe_pt.c       |  2 +-
 drivers/gpu/drm/xe/xe_vm.c       | 43 ++++++++++++++++----------------
 drivers/gpu/drm/xe/xe_vm_types.h | 37 ++++++++++++++++-----------
 5 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 49c80e95222b0..a78ac158e967e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -476,8 +476,10 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 			}
 
 			xe_vm_assert_held(vm);
-			if (list_empty(&vma->rebind_link) && vma->tile_present)
-				list_add_tail(&vma->rebind_link, &vm->rebind_list);
+			if (list_empty(&vma->combined_links.rebind) &&
+			    vma->tile_present)
+				list_add_tail(&vma->combined_links.rebind,
+					      &vm->rebind_list);
 
 			if (vm_resv_locked)
 				dma_resv_unlock(xe_vm_resv(vm));
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 0209f325dda0e..89d167a432f67 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -120,7 +120,7 @@ retry:
 	 * BOs have valid placements possibly moving an evicted BO back
 	 * to a location where the GPU can access it).
 	 */
-	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
 		XE_WARN_ON(xe_vma_is_null(vma));
 
 		if (xe_vma_is_userptr(vma))
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index bc7fa5a850e28..b67144768af05 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1805,7 +1805,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 	}
 
 	if (!vma->tile_present)
-		list_del_init(&vma->rebind_link);
+		list_del_init(&vma->combined_links.rebind);
 
 	if (unbind_pt_update.locked) {
 		XE_WARN_ON(!xe_vma_is_userptr(vma));
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index bdff1999058f9..29a950a02a36b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -467,7 +467,8 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 
 		list_del_init(&vma->notifier.rebind_link);
 		if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
-			list_move_tail(&vma->rebind_link, &vm->rebind_list);
+			list_move_tail(&vma->combined_links.rebind,
+				       &vm->rebind_list);
 	}
 	spin_unlock(&vm->notifier.list_lock);
 
@@ -608,7 +609,7 @@ retry:
 	if (err)
 		goto out_unlock;
 
-	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
 		if (xe_vma_has_no_bo(vma) ||
 		    vma->gpuva.flags & XE_VMA_DESTROYED)
 			continue;
@@ -780,17 +781,20 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
 				 userptr.invalidate_link) {
 		list_del_init(&vma->userptr.invalidate_link);
-		list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
+		if (list_empty(&vma->combined_links.userptr))
+			list_move_tail(&vma->combined_links.userptr,
+				       &vm->userptr.repin_list);
 	}
 	spin_unlock(&vm->userptr.invalidated_lock);
 
 	/* Pin and move to temporary list */
-	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
+	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
+				 combined_links.userptr) {
 		err = xe_vma_userptr_pin_pages(vma);
 		if (err < 0)
 			goto out_err;
 
-		list_move_tail(&vma->userptr_link, &tmp_evict);
+		list_move_tail(&vma->combined_links.userptr, &tmp_evict);
 	}
 
 	/* Take lock and move to rebind_list for rebinding. */
@@ -798,10 +802,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 	if (err)
 		goto out_err;
 
-	list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
-		list_del_init(&vma->userptr_link);
-		list_move_tail(&vma->rebind_link, &vm->rebind_list);
-	}
+	list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr)
+		list_move_tail(&vma->combined_links.rebind, &vm->rebind_list);
 
 	dma_resv_unlock(xe_vm_resv(vm));
 
@@ -845,10 +847,11 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 		return NULL;
 
 	xe_vm_assert_held(vm);
-	list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
+	list_for_each_entry_safe(vma, next, &vm->rebind_list,
+				 combined_links.rebind) {
 		XE_WARN_ON(!vma->tile_present);
 
-		list_del_init(&vma->rebind_link);
+		list_del_init(&vma->combined_links.rebind);
 		dma_fence_put(fence);
 		if (rebind_worker)
 			trace_xe_vma_rebind_worker(vma);
@@ -883,9 +886,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 		return vma;
 	}
 
-	INIT_LIST_HEAD(&vma->rebind_link);
-	INIT_LIST_HEAD(&vma->unbind_link);
-	INIT_LIST_HEAD(&vma->userptr_link);
+	INIT_LIST_HEAD(&vma->combined_links.rebind);
 	INIT_LIST_HEAD(&vma->userptr.invalidate_link);
 	INIT_LIST_HEAD(&vma->notifier.rebind_link);
 	INIT_LIST_HEAD(&vma->extobj.link);
@@ -1070,7 +1071,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	struct xe_vm *vm = xe_vma_vm(vma);
 
 	lockdep_assert_held_write(&vm->lock);
-	XE_BUG_ON(!list_empty(&vma->unbind_link));
+	XE_BUG_ON(!list_empty(&vma->combined_links.destroy));
 
 	if (xe_vma_is_userptr(vma)) {
 		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
@@ -1078,7 +1079,6 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 		spin_lock(&vm->userptr.invalidated_lock);
 		list_del_init(&vma->userptr.invalidate_link);
 		spin_unlock(&vm->userptr.invalidated_lock);
-		list_del(&vma->userptr_link);
 	} else if (!xe_vma_is_null(vma)) {
 		xe_bo_assert_held(xe_vma_bo(vma));
 
@@ -1099,9 +1099,6 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	}
 
 	xe_vm_assert_held(vm);
-	if (!list_empty(&vma->rebind_link))
-		list_del(&vma->rebind_link);
-
 	if (fence) {
 		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
 						 vma_destroy_cb);
@@ -1451,11 +1448,12 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 
 		/* easy case, remove from VMA? */
 		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
+			list_del_init(&vma->combined_links.rebind);
 			xe_vma_destroy(vma, NULL);
 			continue;
 		}
 
-		list_add_tail(&vma->unbind_link, &contested);
+		list_move_tail(&vma->combined_links.destroy, &contested);
 	}
 
 	/*
@@ -1487,8 +1485,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	 * Since we hold a refcount to the bo, we can remove and free
 	 * the members safely without locking.
 	 */
-	list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) {
-		list_del_init(&vma->unbind_link);
+	list_for_each_entry_safe(vma, next_vma, &contested,
+				 combined_links.destroy) {
+		list_del_init(&vma->combined_links.destroy);
 		xe_vma_destroy_unlocked(vma);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index a7386e8c28702..00bf0065d5140 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -50,21 +50,28 @@ struct xe_vma {
 	 */
 	u64 tile_present;
 
-	/** @userptr_link: link into VM repin list if userptr */
-	struct list_head userptr_link;
-
-	/**
-	 * @rebind_link: link into VM if this VMA needs rebinding, and
-	 * if it's a bo (not userptr) needs validation after a possible
-	 * eviction. Protected by the vm's resv lock.
-	 */
-	struct list_head rebind_link;
-
-	/**
-	 * @unbind_link: link or list head if an unbind of multiple VMAs, in
-	 * single unbind op, is being done.
-	 */
-	struct list_head unbind_link;
+	/** @combined_links: links into lists which are mutually exclusive */
+	union {
+		/**
+		 * @userptr: link into VM repin list if userptr. Protected by
+		 * vm->lock in write mode.
+		 */
+		struct list_head userptr;
+		/**
+		 * @rebind: link into VM if this VMA needs rebinding, and
+		 * if it's a bo (not userptr) needs validation after a possible
+		 * eviction. Protected by the vm's resv lock and typically
+		 * vm->lock is also held in write mode. The only place where
+		 * vm->lock isn't held is the BO eviction path which has
+		 * mutually exclusive execution with userptr.
+		 */
+		struct list_head rebind;
+		/**
+		 * @destroy: link to contested list when VM is being closed.
+		 * Protected by vm->lock in write mode and vm's resv lock.
+		 */
+		struct list_head destroy;
+	} combined_links;
 
 	/** @destroy_cb: callback to destroy VMA when unbind job is done */
 	struct dma_fence_cb destroy_cb;
-- 
GitLab


From 3daf694ccf8afb936e3508c98738d52b13941397 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 20:50:24 -0700
Subject: [PATCH 1757/2340] drm/xe: Replace list_del_init with list_del for
 userptr.invalidate_link cleanup

This list isn't used again, list_del is the proper call.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 29a950a02a36b..1ea4f20ac7238 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1077,7 +1077,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
 
 		spin_lock(&vm->userptr.invalidated_lock);
-		list_del_init(&vma->userptr.invalidate_link);
+		list_del(&vma->userptr.invalidate_link);
 		spin_unlock(&vm->userptr.invalidated_lock);
 	} else if (!xe_vma_is_null(vma)) {
 		xe_bo_assert_held(xe_vma_bo(vma));
-- 
GitLab


From 63412a5a6718771214900aec51fc9253b36efcc5 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 21:00:51 -0700
Subject: [PATCH 1758/2340] drm/xe: Change tile masks from u64 to u8

This will save us a few bytes in the xe_vma structure.

v2: Use hweight8 rather than hweight_long (Rodrigo)

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 12 ++++++------
 drivers/gpu/drm/xe/xe_vm_types.h | 28 ++++++++++++++--------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1ea4f20ac7238..a01504ecd2afb 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -871,7 +871,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    u64 start, u64 end,
 				    bool read_only,
 				    bool is_null,
-				    u64 tile_mask)
+				    u8 tile_mask)
 {
 	struct xe_vma *vma;
 	struct xe_tile *tile;
@@ -1579,7 +1579,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
-	int number_tiles = hweight_long(vma->tile_present);
+	int number_tiles = hweight8(vma->tile_present);
 	int err;
 	u8 id;
 
@@ -1654,7 +1654,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
-	int number_tiles = hweight_long(vma->tile_mask);
+	int number_tiles = hweight8(vma->tile_mask);
 	int err;
 	u8 id;
 
@@ -2250,7 +2250,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 static struct drm_gpuva_ops *
 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			 u64 bo_offset_or_userptr, u64 addr, u64 range,
-			 u32 operation, u64 tile_mask, u32 region)
+			 u32 operation, u8 tile_mask, u32 region)
 {
 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
 	struct ww_acquire_ctx ww;
@@ -2354,7 +2354,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 }
 
 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
-			      u64 tile_mask, bool read_only, bool is_null)
+			      u8 tile_mask, bool read_only, bool is_null)
 {
 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
 	struct xe_vma *vma;
@@ -3339,7 +3339,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 addr = bind_ops[i].addr;
 		u32 op = bind_ops[i].op;
 		u64 obj_offset = bind_ops[i].obj_offset;
-		u64 tile_mask = bind_ops[i].tile_mask;
+		u8 tile_mask = bind_ops[i].tile_mask;
 		u32 region = bind_ops[i].region;
 
 		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 00bf0065d5140..4783a460d671f 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -38,18 +38,6 @@ struct xe_vma {
 	/** @gpuva: Base GPUVA object */
 	struct drm_gpuva gpuva;
 
-	/** @tile_mask: Tile mask of where to create binding for this VMA */
-	u64 tile_mask;
-
-	/**
-	 * @tile_present: GT mask of binding are present for this VMA.
-	 * protected by vm->lock, vm->resv and for userptrs,
-	 * vm->userptr.notifier_lock for writing. Needs either for reading,
-	 * but if reading is done under the vm->lock only, it needs to be held
-	 * in write mode.
-	 */
-	u64 tile_present;
-
 	/** @combined_links: links into lists which are mutually exclusive */
 	union {
 		/**
@@ -107,9 +95,21 @@ struct xe_vma {
 	/** @usm: unified shared memory state */
 	struct {
 		/** @tile_invalidated: VMA has been invalidated */
-		u64 tile_invalidated;
+		u8 tile_invalidated;
 	} usm;
 
+	/** @tile_mask: Tile mask of where to create binding for this VMA */
+	u8 tile_mask;
+
+	/**
+	 * @tile_present: GT mask of binding are present for this VMA.
+	 * protected by vm->lock, vm->resv and for userptrs,
+	 * vm->userptr.notifier_lock for writing. Needs either for reading,
+	 * but if reading is done under the vm->lock only, it needs to be held
+	 * in write mode.
+	 */
+	u8 tile_present;
+
 	struct {
 		struct list_head rebind_link;
 	} notifier;
@@ -395,7 +395,7 @@ struct xe_vma_op {
 	 */
 	struct async_op_fence *fence;
 	/** @tile_mask: gt mask for this operation */
-	u64 tile_mask;
+	u8 tile_mask;
 	/** @flags: operation flags */
 	enum xe_vma_op_flags flags;
 
-- 
GitLab


From eae553cbe03a7918f2b5dc9bda0dc35f7a7a308d Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 21:04:01 -0700
Subject: [PATCH 1759/2340] drm/xe: Combine destroy_cb and destroy_work in
 xe_vma into union

The callback kicks the worker thus mutually exclusive execution,
combining saves a bit of space in xe_vma.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm_types.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 4783a460d671f..b6657b6feb3c6 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -61,11 +61,12 @@ struct xe_vma {
 		struct list_head destroy;
 	} combined_links;
 
-	/** @destroy_cb: callback to destroy VMA when unbind job is done */
-	struct dma_fence_cb destroy_cb;
-
-	/** @destroy_work: worker to destroy this BO */
-	struct work_struct destroy_work;
+	union {
+		/** @destroy_cb: callback to destroy VMA when unbind job is done */
+		struct dma_fence_cb destroy_cb;
+		/** @destroy_work: worker to destroy this BO */
+		struct work_struct destroy_work;
+	};
 
 	/** @userptr: user pointer state */
 	struct {
-- 
GitLab


From a4cc60a55fd9a6bb8b50375d404f317ac2030941 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 19 Jul 2023 21:05:42 -0700
Subject: [PATCH 1760/2340] drm/xe: Only alloc userptr part of xe_vma for
 userptrs

Only alloc userptr part of xe_vma for userptrs, this will save on space
in the common BO case.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       |  8 +++--
 drivers/gpu/drm/xe/xe_vm_types.h | 56 ++++++++++++++++++--------------
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a01504ecd2afb..a07dc4f846b1b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -880,14 +880,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	XE_BUG_ON(start >= end);
 	XE_BUG_ON(end >= vm->size);
 
-	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (!bo && !is_null)	/* userptr */
+		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	else
+		vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
+			      GFP_KERNEL);
 	if (!vma) {
 		vma = ERR_PTR(-ENOMEM);
 		return vma;
 	}
 
 	INIT_LIST_HEAD(&vma->combined_links.rebind);
-	INIT_LIST_HEAD(&vma->userptr.invalidate_link);
 	INIT_LIST_HEAD(&vma->notifier.rebind_link);
 	INIT_LIST_HEAD(&vma->extobj.link);
 
@@ -931,6 +934,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 			u64 size = end - start + 1;
 			int err;
 
+			INIT_LIST_HEAD(&vma->userptr.invalidate_link);
 			vma->gpuva.gem.offset = bo_offset_or_userptr;
 
 			err = mmu_interval_notifier_insert(&vma->userptr.notifier,
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index b6657b6feb3c6..f7522f9ca40ee 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -34,6 +34,31 @@ struct xe_vm;
 #define XE_VMA_PTE_2M		(DRM_GPUVA_USERBITS << 6)
 #define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 7)
 
+/** struct xe_userptr - User pointer */
+struct xe_userptr {
+	/** @invalidate_link: Link for the vm::userptr.invalidated list */
+	struct list_head invalidate_link;
+	/**
+	 * @notifier: MMU notifier for user pointer (invalidation call back)
+	 */
+	struct mmu_interval_notifier notifier;
+	/** @sgt: storage for a scatter gather table */
+	struct sg_table sgt;
+	/** @sg: allocated scatter gather table */
+	struct sg_table *sg;
+	/** @notifier_seq: notifier sequence number */
+	unsigned long notifier_seq;
+	/**
+	 * @initial_bind: user pointer has been bound at least once.
+	 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
+	 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+	 */
+	bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+	u32 divisor;
+#endif
+};
+
 struct xe_vma {
 	/** @gpuva: Base GPUVA object */
 	struct drm_gpuva gpuva;
@@ -68,31 +93,6 @@ struct xe_vma {
 		struct work_struct destroy_work;
 	};
 
-	/** @userptr: user pointer state */
-	struct {
-		/** @invalidate_link: Link for the vm::userptr.invalidated list */
-		struct list_head invalidate_link;
-		/**
-		 * @notifier: MMU notifier for user pointer (invalidation call back)
-		 */
-		struct mmu_interval_notifier notifier;
-		/** @sgt: storage for a scatter gather table */
-		struct sg_table sgt;
-		/** @sg: allocated scatter gather table */
-		struct sg_table *sg;
-		/** @notifier_seq: notifier sequence number */
-		unsigned long notifier_seq;
-		/**
-		 * @initial_bind: user pointer has been bound at least once.
-		 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
-		 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
-		 */
-		bool initial_bind;
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-		u32 divisor;
-#endif
-	} userptr;
-
 	/** @usm: unified shared memory state */
 	struct {
 		/** @tile_invalidated: VMA has been invalidated */
@@ -122,6 +122,12 @@ struct xe_vma {
 		 */
 		struct list_head link;
 	} extobj;
+
+	/**
+	 * @userptr: user pointer state, only allocated for VMAs that are
+	 * user pointers
+	 */
+	struct xe_userptr userptr;
 };
 
 struct xe_device;
-- 
GitLab


From 7ead33156483f5e7a699002f2480757aaa34ab08 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 21 Jul 2023 12:16:13 -0700
Subject: [PATCH 1761/2340] drm/xe: Use migrate engine for page fault binds

We must use migrate engine for page fault binds in order to avoid a
deadlock as the migrate engine has a reserved BCS instance which cannot
be stuck on a fault. To use the migrate engine the engine argument to
xe_migrate_update_pgtables must be NULL, this was incorrectly wired up
so vm->eng[tile_id] was always being used. Fix this.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 3 +--
 drivers/gpu/drm/xe/xe_vm.c | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index b67144768af05..d2df519100108 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1421,8 +1421,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	}
 
 	fence = xe_migrate_update_pgtables(tile->migrate,
-					   vm, xe_vma_bo(vma),
-					   e ? e : vm->eng[tile->id],
+					   vm, xe_vma_bo(vma), e,
 					   entries, num_entries,
 					   syncs, num_syncs,
 					   &bind_pt_update.base);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a07dc4f846b1b..f3f6f01046a94 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1675,7 +1675,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 		if (!(vma->tile_mask & BIT(id)))
 			goto next;
 
-		fence = __xe_pt_bind_vma(tile, vma, e, first_op ? syncs : NULL,
+		fence = __xe_pt_bind_vma(tile, vma, e ? e : vm->eng[id],
+					 first_op ? syncs : NULL,
 					 first_op ? num_syncs : 0,
 					 vma->tile_present & BIT(id));
 		if (IS_ERR(fence)) {
-- 
GitLab


From 9700a1df0a5568a3eb8483de103d4078e273b36b Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 24 Jul 2023 11:47:44 +0100
Subject: [PATCH 1762/2340] drm/xe: add lockdep annotation for
 xe_device_mem_access_put()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The main motivation is with d3cold which will make the suspend and
resume callbacks even more scary, but is useful regardless. We already
have the needed annotation on the acquire side with
xe_device_mem_access_get(), and by adding the annotation on the release
side we should have a lot more confidence that our locking hierarchy is
correct.

v2:
  - Move the annotation into both callbacks for better symmetry. Also
    don't hold over the entire mem_access_get(); we only need to lockep
    to understand what is being held upon entering mem_access_get(), and
    how that matches up with locks in the callbacks.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |  4 ++--
 drivers/gpu/drm/xe/xe_device.h |  4 ++++
 drivers/gpu/drm/xe/xe_pm.c     | 27 +++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b1f36c986f0d0..f948a358f53e5 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -35,7 +35,7 @@
 #include "xe_wait_user_fence.h"
 
 #ifdef CONFIG_LOCKDEP
-static struct lockdep_map xe_device_mem_access_lockdep_map = {
+struct lockdep_map xe_device_mem_access_lockdep_map = {
 	.name = "xe_device_mem_access_lockdep_map"
 };
 #endif
@@ -431,13 +431,13 @@ void xe_device_mem_access_get(struct xe_device *xe)
 	 * runtime_resume callback, lockdep should give us a nice splat.
 	 */
 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+	lock_map_release(&xe_device_mem_access_lockdep_map);
 
 	xe_pm_runtime_get(xe);
 	ref = atomic_inc_return(&xe->mem_access.ref);
 
 	XE_WARN_ON(ref == S32_MAX);
 
-	lock_map_release(&xe_device_mem_access_lockdep_map);
 }
 
 void xe_device_mem_access_put(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 8b085ffdc5f8d..593accb68281f 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -16,6 +16,10 @@ struct xe_file;
 #include "xe_force_wake.h"
 #include "xe_macros.h"
 
+#ifdef CONFIG_LOCKDEP
+extern struct lockdep_map xe_device_mem_access_lockdep_map;
+#endif
+
 static inline struct xe_device *to_xe_device(const struct drm_device *dev)
 {
 	return container_of(dev, struct xe_device, drm);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 04b995aa848f4..cb2a00ea28e30 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -188,6 +188,29 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 	/* Disable access_ongoing asserts and prevent recursive pm calls */
 	xe_pm_write_callback_task(xe, current);
 
+	/*
+	 * The actual xe_device_mem_access_put() is always async underneath, so
+	 * exactly where that is called should makes no difference to us. However
+	 * we still need to be very careful with the locks that this callback
+	 * acquires and the locks that are acquired and held by any callers of
+	 * xe_device_mem_access_get(). We already have the matching annotation
+	 * on that side, but we also need it here. For example lockdep should be
+	 * able to tell us if the following scenario is in theory possible:
+	 *
+	 * CPU0                          | CPU1 (kworker)
+	 * lock(A)                       |
+	 *                               | xe_pm_runtime_suspend()
+	 *                               |      lock(A)
+	 * xe_device_mem_access_get()    |
+	 *
+	 * This will clearly deadlock since rpm core needs to wait for
+	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
+	 * on CPU0 which prevents CPU1 making forward progress.  With the
+	 * annotation here and in xe_device_mem_access_get() lockdep will see
+	 * the potential lock inversion and give us a nice splat.
+	 */
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
 	if (xe->d3cold.allowed) {
 		err = xe_bo_evict_all(xe);
 		if (err)
@@ -202,6 +225,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 
 	xe_irq_suspend(xe);
 out:
+	lock_map_release(&xe_device_mem_access_lockdep_map);
 	xe_pm_write_callback_task(xe, NULL);
 	return err;
 }
@@ -215,6 +239,8 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	/* Disable access_ongoing asserts and prevent recursive pm calls */
 	xe_pm_write_callback_task(xe, current);
 
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
 	/*
 	 * It can be possible that xe has allowed d3cold but other pcie devices
 	 * in gfx card soc would have blocked d3cold, therefore card has not
@@ -250,6 +276,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 			goto out;
 	}
 out:
+	lock_map_release(&xe_device_mem_access_lockdep_map);
 	xe_pm_write_callback_task(xe, NULL);
 	return err;
 }
-- 
GitLab


From 7a060d786cc1d75ffa04256826d805686b8f1043 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 24 Jul 2023 17:34:34 -0700
Subject: [PATCH 1763/2340] drm/xe/mtl: Map PPGTT as CPU:WC

On MTL and beyond, the GPU performs non-coherent accesses to the PPGTT
page tables.  These page tables should be mapped as CPU:WC.

Removes CAT errors triggered by xe_exec_basic@once-basic on MTL:

   xe 0000:00:02.0: [drm:__xe_pt_bind_vma [xe]] Preparing bind, with range [1a0000...1a0fff) engine 0000000000000000.
   xe 0000:00:02.0: [drm:xe_vm_dbg_print_entries [xe]] 1 entries to update
   xe 0000:00:02.0: [drm:xe_vm_dbg_print_entries [xe]]  0: Update level 3 at (0 + 1) [0...8000000000) f:0
   xe 0000:00:02.0: [drm] Engine memory cat error: guc_id=2
   xe 0000:00:02.0: [drm] Engine memory cat error: guc_id=2
   xe 0000:00:02.0: [drm] Timedout job: seqno=4294967169, guc_id=2, flags=0x4

v2:
 - Rename to XE_BO_PAGETABLE to make it more clear that this BO is the
   pagetable itself, rather than just being bound in the PPGTT.  (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/r/20230725003433.1992137-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 16 ++++++++++++----
 drivers/gpu/drm/xe/xe_bo.h |  1 +
 drivers/gpu/drm/xe/xe_pt.c |  3 ++-
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a78ac158e967e..4b7678db88f39 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -301,6 +301,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 	struct xe_device *xe = xe_bo_device(bo);
 	struct xe_ttm_tt *tt;
 	unsigned long extra_pages;
+	enum ttm_caching caching = ttm_cached;
 	int err;
 
 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -314,10 +315,17 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
 					   PAGE_SIZE);
 
-	/* TODO: Select caching mode */
-	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags,
-			  bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached,
-			  extra_pages);
+	/*
+	 * Display scanout is always non-coherent with the CPU cache.
+	 *
+	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
+	 * require a CPU:WC mapping.
+	 */
+	if (bo->flags & XE_BO_SCANOUT_BIT ||
+	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
+		caching = ttm_write_combined;
+
+	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
 	if (err) {
 		kfree(tt);
 		return NULL;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 3e98f3c0b85e4..12a291925fa95 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -40,6 +40,7 @@
 #define XE_BO_DEFER_BACKING		BIT(9)
 #define XE_BO_SCANOUT_BIT		BIT(10)
 #define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
+#define XE_BO_PAGETABLE			BIT(12)
 /* this one is trigger internally only */
 #define XE_BO_INTERNAL_TEST		BIT(30)
 #define XE_BO_INTERNAL_64K		BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d2df519100108..48a87b50a040d 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -221,7 +221,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
 				  XE_BO_CREATE_PINNED_BIT |
-				  XE_BO_CREATE_NO_RESV_EVICT);
+				  XE_BO_CREATE_NO_RESV_EVICT |
+				  XE_BO_PAGETABLE);
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto err_kfree;
-- 
GitLab


From 2a6d871bd97722e899780a8e429b0fb5f11dadc6 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 24 Jul 2023 17:34:35 -0700
Subject: [PATCH 1764/2340] drm/xe: xe_engine_create_ioctl should check
 gt_count, not tile_count

Platforms like MTL only have a single tile, but multiple GTs.
Ensure XE_ENGINE_CREATE accepts engine creation on gt1 on such
platforms.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230725003433.1992137-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index c31e55c10a33e..71f7787852267 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -416,7 +416,7 @@ find_hw_engine(struct xe_device *xe,
 	if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
 		return NULL;
 
-	if (eci.gt_id >= xe->info.tile_count)
+	if (eci.gt_id >= xe->info.gt_count)
 		return NULL;
 
 	idx = array_index_nospec(eci.engine_class,
@@ -539,7 +539,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
-	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.tile_count))
+	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
 		return -EINVAL;
 
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
-- 
GitLab


From 6a024f1bfdfe3b535786780f67c38429df17e857 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 31 Mar 2023 09:46:26 +0100
Subject: [PATCH 1765/2340] drm/xe/bo: support tiered vram allocation for
 small-bar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the new flag XE_BO_NEEDS_CPU_ACCESS, to force allocating in the
mappable part of vram. If no flag is specified we do a topdown
allocation, to limit the chances of stealing the precious mappable part,
if we don't need it. If this is a full-bar system, then this all gets
nooped.

For kernel users, it looks like xe_bo_create_pin_map() is the central
place which users should call if they want CPU access to the object, so
add the flag there.

We still need to plumb this through for userspace allocations. Also it
looks like page-tables are using pin_map(), which is less than ideal. If
we can already use the GPU to do page-table management, then maybe we
should just force that for small-bar.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  3 +-
 drivers/gpu/drm/xe/xe_bo.c            | 48 ++++++++++++++++++---------
 drivers/gpu/drm/xe/xe_bo.h            |  1 +
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |  4 +++
 4 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 30e5fdf6ca636..c332dc54cb70e 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -111,7 +111,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL,
 						   bo->size,
 						   ttm_bo_type_kernel,
-						   XE_BO_CREATE_SYSTEM_BIT);
+						   XE_BO_CREATE_SYSTEM_BIT |
+						   XE_BO_NEEDS_CPU_ACCESS);
 	if (IS_ERR(sysmem)) {
 		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
 			   str, PTR_ERR(sysmem));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4b7678db88f39..fa3fc825b7309 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -124,19 +124,28 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
 {
 	struct xe_tile *tile = mem_type_to_tile(xe, mem_type);
+	struct ttm_place place = { .mem_type = mem_type };
+	u64 io_size = tile->mem.vram.io_size;
 
 	XE_BUG_ON(!tile->mem.vram.usable_size);
 
-	places[*c] = (struct ttm_place) {
-		.mem_type = mem_type,
-		/*
-		 * For eviction / restore on suspend / resume objects
-		 * pinned in VRAM must be contiguous
-		 */
-		.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
-				     XE_BO_CREATE_GGTT_BIT) ?
-			TTM_PL_FLAG_CONTIGUOUS : 0,
-	};
+	/*
+	 * For eviction / restore on suspend / resume objects
+	 * pinned in VRAM must be contiguous
+	 */
+	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
+			XE_BO_CREATE_GGTT_BIT))
+		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+	if (io_size < tile->mem.vram.usable_size) {
+		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+			place.fpfn = 0;
+			place.lpfn = io_size >> PAGE_SHIFT;
+		} else {
+			place.flags |= TTM_PL_FLAG_TOPDOWN;
+		}
+	}
+	places[*c] = place;
 	*c += 1;
 
 	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
@@ -385,15 +394,20 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 				 struct ttm_resource *mem)
 {
 	struct xe_device *xe = ttm_to_xe_device(bdev);
-	struct xe_tile *tile;
 
 	switch (mem->mem_type) {
 	case XE_PL_SYSTEM:
 	case XE_PL_TT:
 		return 0;
 	case XE_PL_VRAM0:
-	case XE_PL_VRAM1:
-		tile = mem_type_to_tile(xe, mem->mem_type);
+	case XE_PL_VRAM1: {
+		struct xe_tile *tile = mem_type_to_tile(xe, mem->mem_type);
+		struct xe_ttm_vram_mgr_resource *vres =
+			to_xe_ttm_vram_mgr_resource(mem);
+
+		if (vres->used_visible_size < mem->size)
+			return -EINVAL;
+
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 
 		if (tile->mem.vram.mapping &&
@@ -408,7 +422,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 		mem->bus.caching = ttm_write_combined;
 #endif
 		return 0;
-	case XE_PL_STOLEN:
+	} case XE_PL_STOLEN:
 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
 	default:
 		return -EINVAL;
@@ -1376,7 +1390,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		flags |= XE_BO_CREATE_GGTT_BIT;
 
-	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, flags);
+	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
+				       flags | XE_BO_NEEDS_CPU_ACCESS);
 	if (IS_ERR(bo))
 		return bo;
 
@@ -1685,6 +1700,9 @@ int xe_bo_vmap(struct xe_bo *bo)
 
 	xe_bo_assert_held(bo);
 
+	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+		return -EINVAL;
+
 	if (!iosys_map_is_null(&bo->vmap))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 12a291925fa95..e231b2829bef2 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -41,6 +41,7 @@
 #define XE_BO_SCANOUT_BIT		BIT(10)
 #define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
 #define XE_BO_PAGETABLE			BIT(12)
+#define XE_BO_NEEDS_CPU_ACCESS		BIT(13)
 /* this one is trigger internally only */
 #define XE_BO_INTERNAL_TEST		BIT(30)
 #define XE_BO_INTERNAL_64K		BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index a10fd0366da3c..27e0d40daca82 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -373,11 +373,15 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 			      struct sg_table **sgt)
 {
 	struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0];
+	struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
 	struct xe_res_cursor cursor;
 	struct scatterlist *sg;
 	int num_entries = 0;
 	int i, r;
 
+	if (vres->used_visible_size < res->size)
+		return -EOPNOTSUPP;
+
 	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
 	if (!*sgt)
 		return -ENOMEM;
-- 
GitLab


From cd928fced9968558f1c7d724c23b1f8868c39774 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 31 Mar 2023 09:46:27 +0100
Subject: [PATCH 1766/2340] drm/xe/uapi: add the userspace bits for small-bar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mostly the same as i915. We add a new hint for userspace to force an
object into the mappable part of vram.

We also need to tell userspace how large the mappable part is. In Vulkan
for example, there will be two vram heaps for small-bar systems. And
here the size of each heap needs to be known. Likewise the used/avail
tracking needs to account for the mappable part.

We also limit the available tracking going forward, such that we limit
to privileged users only, since these values are system wide and are
technically considered an info leak.

v2 (Maarten):
  - s/NEEDS_CPU_ACCESS/NEEDS_VISIBLE_VRAM/ in the uapi. We also no
    longer require smem as an extra placement. This is more flexible,
    and lets us use this for clear-color surfaces, since we need CPU access
    there but we don't want to attach smem, since that effectively disables
    CCS from kernel pov.
  - Reject clear-color CCS buffers where NEEDS_VISIBLE_VRAM is not set,
    instead of migrating it behind the scenes.
v3 (José):
  - Split the changes that limit the accounting for perfmon_capable()
    into a separate patch.
  - Use XE_BO_CREATE_VRAM_MASK.
v4 (Gwan-gyeong Mun):
  - Add some kernel-doc for the query bits.
v5:
  - One small kernel-doc correction. The cpu_visible_size and
    corresponding used tracking are always zero for non
    XE_MEM_REGION_CLASS_VRAM.
v6:
  - Without perfmon_capable() it likely makes more sense to report as
    zero, instead of reporting as used == total size. This should give
    similar behaviour as i915 which rather tracks free instead of used.
  - Only enforce NEEDS_VISIBLE_VRAM on rc_ccs_cc_plane surfaces when the
    device is actually small-bar.

Testcase: igt/tests/xe_query
Testcase: igt/tests/xe_mmap@small-bar
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           | 13 ++++++--
 drivers/gpu/drm/xe/xe_query.c        |  8 +++--
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 18 +++++++++++
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h |  4 +++
 include/uapi/drm/xe_drm.h            | 47 +++++++++++++++++++++++++++-
 5 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index fa3fc825b7309..d89cf93acb61f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1109,7 +1109,6 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 			ret = ttm_bo_vm_fault_reserved(vmf,
 						       vmf->vma->vm_page_prot,
 						       TTM_BO_VM_NUM_PREFAULT);
-
 		drm_dev_exit(idx);
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
@@ -1760,6 +1759,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, args->flags &
 			 ~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
 			   XE_GEM_CREATE_FLAG_SCANOUT |
+			   XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
 			   xe->info.mem_region_mask)))
 		return -EINVAL;
 
@@ -1797,6 +1797,14 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		bo_flags |= XE_BO_SCANOUT_BIT;
 
 	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+
+	if (args->flags & XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
+		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+			return -EINVAL;
+
+		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+	}
+
 	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
 			  bo_flags);
 	if (IS_ERR(bo)) {
@@ -2081,7 +2089,8 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 
 	bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
 			  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
-			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT);
+			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
+			  XE_BO_NEEDS_CPU_ACCESS);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index f880c9af1651a..3997c644f8fc8 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -17,6 +17,7 @@
 #include "xe_gt.h"
 #include "xe_guc_hwconfig.h"
 #include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
 
 static const enum xe_engine_class xe_to_user_engine_class[] = {
 	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
@@ -148,10 +149,13 @@ static int query_memory_usage(struct xe_device *xe,
 				man->size;
 
 			if (perfmon_capable()) {
-				usage->regions[usage->num_regions].used =
-					ttm_resource_manager_usage(man);
+				xe_ttm_vram_get_used(man,
+						     &usage->regions[usage->num_regions].used,
+						     &usage->regions[usage->num_regions].cpu_visible_used);
 			}
 
+			usage->regions[usage->num_regions].cpu_visible_size =
+				xe_ttm_vram_get_cpu_visible_size(man);
 			usage->num_regions++;
 		}
 	}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 27e0d40daca82..06a54c8bd46f1 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -457,3 +457,21 @@ void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
 	sg_free_table(sgt);
 	kfree(sgt);
 }
+
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+	return mgr->visible_size;
+}
+
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+			  u64 *used, u64 *used_visible)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+	mutex_lock(&mgr->lock);
+	*used = mgr->mm.size - mgr->mm.avail;
+	*used_visible = mgr->visible_size - mgr->visible_avail;
+	mutex_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index 6e1d6033d7391..d184e19a92305 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -25,6 +25,10 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
 			      struct sg_table *sgt);
 
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+			  u64 *used, u64 *used_visible);
+
 static inline struct xe_ttm_vram_mgr_resource *
 to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
 {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 347351a8f6184..7f29c58f87a3b 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -180,8 +180,37 @@ struct drm_xe_query_mem_region {
 	 * zero.
 	 */
 	__u64 used;
+	/**
+	 * @cpu_visible_size: How much of this region can be CPU
+	 * accessed, in bytes.
+	 *
+	 * This will always be <= @total_size, and the remainder (if
+	 * any) will not be CPU accessible. If the CPU accessible part
+	 * is smaller than @total_size then this is referred to as a
+	 * small BAR system.
+	 *
+	 * On systems without small BAR (full BAR), the probed_size will
+	 * always equal the @total_size, since all of it will be CPU
+	 * accessible.
+	 *
+	 * Note this is only tracked for XE_MEM_REGION_CLASS_VRAM
+	 * regions (for other types the value here will always equal
+	 * zero).
+	 */
+	__u64 cpu_visible_size;
+	/**
+	 * @cpu_visible_used: Estimate of CPU visible memory used, in
+	 * bytes.
+	 *
+	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
+	 * accounting. Without this the value here will always equal
+	 * zero.  Note this is only currently tracked for
+	 * XE_MEM_REGION_CLASS_VRAM regions (for other types the value
+	 * here will always be zero).
+	 */
+	__u64 cpu_visible_used;
 	/** @reserved: MBZ */
-	__u64 reserved[8];
+	__u64 reserved[6];
 };
 
 /**
@@ -383,6 +412,22 @@ struct drm_xe_gem_create {
 
 #define XE_GEM_CREATE_FLAG_DEFER_BACKING	(0x1 << 24)
 #define XE_GEM_CREATE_FLAG_SCANOUT		(0x1 << 25)
+/*
+ * When using VRAM as a possible placement, ensure that the corresponding VRAM
+ * allocation will always use the CPU accessible part of VRAM. This is important
+ * for small-bar systems (on full-bar systems this gets turned into a noop).
+ *
+ * Note: System memory can be used as an extra placement if the kernel should
+ * spill the allocation to system memory, if space can't be made available in
+ * the CPU accessible part of VRAM (giving the same behaviour as the i915
+ * interface, see I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
+ *
+ * Note: For clear-color CCS surfaces the kernel needs to read the clear-color
+ * value stored in the buffer, and on discrete platforms we need to use VRAM for
+ * display surfaces, therefore the kernel requires setting this flag for such
+ * objects, otherwise an error is thrown on small-bar systems.
+ */
+#define XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(0x1 << 26)
 	/**
 	 * @flags: Flags, currently a mask of memory instances of where BO can
 	 * be placed
-- 
GitLab


From c00ce7f22317006a3f14465637093ae3d2e53463 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 31 Mar 2023 09:46:28 +0100
Subject: [PATCH 1767/2340] drm/xe: fully turn on small-bar support

This allows vram_size > io_size, instead of just clamping the vram size
to the BAR size, now that the driver supports it.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 8d0f07261bfd8..aa9c573b12432 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -261,11 +261,6 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	if (err)
 		return err;
 
-	/* small bar issues will only cover root tile sizes */
-	if (xe->mem.vram.io_size < vram_size)
-		drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
-			 vram_size, (u64)xe->mem.vram.io_size);
-
 	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
 		 &xe->mem.vram.io_size);
 
@@ -287,9 +282,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		}
 
 		tile->mem.vram.base = tile_offset;
-
-		/* small bar can limit the visible size.  size accordingly */
-		tile->mem.vram.usable_size = min_t(u64, vram_size, io_size);
+		tile->mem.vram.usable_size = vram_size;
 		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
 		drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id,
@@ -304,7 +297,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		available_size += vram_size;
 
 		if (total_size > xe->mem.vram.io_size) {
-			drm_warn(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+			drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
 				 &total_size, &xe->mem.vram.io_size);
 		}
 
-- 
GitLab


From c856cc138bf39aa38f1b97def8927c71b2a057c2 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 21 Jul 2023 15:44:50 -0400
Subject: [PATCH 1768/2340] drm/xe/uapi: Remove XE_QUERY_CONFIG_FLAGS_USE_GUC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This config is the only real one. If execlist remains in the
code it will forever be experimental and we shouldn't maintain
an uapi like that for that experimental piece of code that
should never be used by real users.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 3 ---
 include/uapi/drm/xe_drm.h     | 1 -
 2 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 3997c644f8fc8..6ba7baf7c7771 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -195,9 +195,6 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
 		config->info[XE_QUERY_CONFIG_FLAGS] =
 			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
-	if (xe->info.enable_guc)
-		config->info[XE_QUERY_CONFIG_FLAGS] |=
-			XE_QUERY_CONFIG_FLAGS_USE_GUC;
 	config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] =
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
 	config->info[XE_QUERY_CONFIG_VA_BITS] = 12 +
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 7f29c58f87a3b..259de80376b4a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -246,7 +246,6 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
 #define XE_QUERY_CONFIG_FLAGS			1
 	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
-	#define XE_QUERY_CONFIG_FLAGS_USE_GUC		(0x1 << 1)
 #define XE_QUERY_CONFIG_MIN_ALIGNEMENT		2
 #define XE_QUERY_CONFIG_VA_BITS			3
 #define XE_QUERY_CONFIG_GT_COUNT		4
-- 
GitLab


From c8dc15464880d725a18593bdfe6651bd235574c3 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 21 Jul 2023 15:56:36 -0400
Subject: [PATCH 1769/2340] drm/xe: Invert guc vs execlists parameters and
 info.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The module parameter should reflect the name of the optional,
experimental and unsafe option, rather than the default one.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c      | 2 +-
 drivers/gpu/drm/xe/xe_device.c       | 2 +-
 drivers/gpu/drm/xe/xe_device.h       | 7 +------
 drivers/gpu/drm/xe/xe_device_types.h | 4 ++--
 drivers/gpu/drm/xe/xe_module.c       | 6 +++---
 drivers/gpu/drm/xe/xe_module.h       | 2 +-
 6 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 047341d5689ab..491506a1e12e5 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -47,7 +47,7 @@ static int info(struct seq_file *m, void *data)
 	drm_printf(&p, "revid %d\n", xe->info.revid);
 	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
 	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
-	drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc));
+	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
 	drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm));
 	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
 	for_each_gt(gt, xe, id) {
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index f948a358f53e5..63ed59c61c840 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -197,7 +197,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 
 	xe->info.devid = pdev->device;
 	xe->info.revid = pdev->revision;
-	xe->info.enable_guc = enable_guc;
+	xe->info.force_execlist = force_execlist;
 
 	spin_lock_init(&xe->irq.lock);
 
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 593accb68281f..61a5cf1f73006 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -116,12 +116,7 @@ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe)
 
 static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
 {
-	return xe->info.enable_guc;
-}
-
-static inline void xe_device_guc_submission_disable(struct xe_device *xe)
-{
-	xe->info.enable_guc = false;
+	return !xe->info.force_execlist;
 }
 
 #define for_each_tile(tile__, xe__, id__) \
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 52c5f7ded7ce6..c521ffaf38719 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -210,8 +210,8 @@ struct xe_device {
 		u8 supports_usm:1;
 		/** @has_asid: Has address space ID */
 		u8 has_asid:1;
-		/** @enable_guc: GuC submission enabled */
-		u8 enable_guc:1;
+		/** @force_execlist: Forced execlist submission */
+		u8 force_execlist:1;
 		/** @has_flat_ccs: Whether flat CCS metadata is used */
 		u8 has_flat_ccs:1;
 		/** @has_4tile: Whether tile-4 tiling is supported */
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 496a9001dc3e2..ed3772a697628 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -14,9 +14,9 @@
 #include "xe_pci.h"
 #include "xe_sched_job.h"
 
-bool enable_guc = true;
-module_param_named_unsafe(enable_guc, enable_guc, bool, 0444);
-MODULE_PARM_DESC(enable_guc, "Enable GuC submission");
+bool force_execlist = false;
+module_param_named_unsafe(force_execlist, force_execlist, bool, 0444);
+MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
 
 u32 xe_force_vram_bar_size;
 module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 7169907c3365d..2c1f9199f909e 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 
 /* Module modprobe variables */
-extern bool enable_guc;
+extern bool force_execlist;
 extern bool enable_display;
 extern u32 xe_force_vram_bar_size;
 extern int xe_guc_log_level;
-- 
GitLab


From 342206b7cc064b8b004474c0baab2c67ced646d0 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 26 Jul 2023 09:33:48 -0700
Subject: [PATCH 1770/2340] drm/xe: Always use xe_vm_queue_rebind_worker helper

Do not queue the rebind worker directly, rather use the helper
xe_vm_queue_rebind_worker. This ensures we use the correct work queue.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c |  3 +--
 drivers/gpu/drm/xe/xe_vm.h | 14 +++++++-------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 48a87b50a040d..8b75a6145f9bc 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1472,8 +1472,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		}
 		if (!rebind && last_munmap_rebind &&
 		    xe_vm_in_compute_mode(vm))
-			queue_work(vm->xe->ordered_wq,
-				   &vm->preempt.rebind_work);
+			xe_vm_queue_rebind_worker(vm);
 	} else {
 		kfree(rfence);
 		kfree(ifence);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index d386e72cb9749..a1d30de37d203 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -182,6 +182,12 @@ extern struct ttm_device_funcs xe_ttm_funcs;
 
 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
 
+static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
+{
+	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
+	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
 /**
  * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute
  * vms.
@@ -195,7 +201,7 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
 {
 	if (xe_vm_in_compute_mode(vm) && vm->preempt.rebind_deactivated) {
 		vm->preempt.rebind_deactivated = false;
-		queue_work(system_unbound_wq, &vm->preempt.rebind_work);
+		xe_vm_queue_rebind_worker(vm);
 	}
 }
 
@@ -203,12 +209,6 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma);
 
 int xe_vma_userptr_check_repin(struct xe_vma *vma);
 
-static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
-{
-	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
-	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
-}
-
 /*
  * XE_ONSTACK_TV is used to size the tv_onstack array that is input
  * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
-- 
GitLab


From 70748acb7fb4c9bba5364de0d6fe0801f2addebb Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 26 Jul 2023 09:41:43 -0700
Subject: [PATCH 1771/2340] drm/xe: Signal out-syncs on VM binds if no
 operations

If no operations are generated for VM binds the out-syncs must still be
signaled.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index f3f6f01046a94..787008bf85e19 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3378,6 +3378,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 unwind_ops:
 	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
 free_syncs:
+	for (i = 0; err == -ENODATA && i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub());
 	while (num_syncs--)
 		xe_sync_entry_cleanup(&syncs[num_syncs]);
 
-- 
GitLab


From 6aa26f6eb829fb208c569b92837a13e889891db4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 26 Jul 2023 10:23:49 +0100
Subject: [PATCH 1772/2340] drm/xe/engine: add missing rpm for bind engines

Bind engines need to use the migration vm, however we don't have any rpm
for such a vm, otherwise the kernel would prevent rpm suspend-resume.
There are two issues here, first is the actual engine create which needs
to touch the lrc, but since that is in VRAM we trigger loads of missing
mem_access asserts. The second issue is when destroying the actual
engine, which requires GuC CT to deregister the context.

v2 (Rodrigo):
  - Just use ENGINE_FLAG_VM as the indicator that we need to hold an rpm
    ref. This also handles the case in xe_vm_create() where we create
    default bind engines.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/499
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/504
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_engine.c       | 19 +++++++++++++++++++
 drivers/gpu/drm/xe/xe_engine_types.h |  1 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 71f7787852267..f60d29b2b5066 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -76,6 +76,17 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe,
 	if (err)
 		goto err_lrc;
 
+	/*
+	 * Normally the user vm holds an rpm ref to keep the device
+	 * awake, and the context holds a ref for the vm, however for
+	 * some engines we use the kernels migrate vm underneath which
+	 * offers no such rpm ref. Make sure we keep a ref here, so we
+	 * can perform GuC CT actions when needed. Caller is expected to
+	 * have already grabbed the rpm ref outside any sensitive locks.
+	 */
+	if (e->flags & ENGINE_FLAG_VM)
+		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
+
 	return e;
 
 err_lrc:
@@ -152,6 +163,8 @@ void xe_engine_fini(struct xe_engine *e)
 		xe_lrc_finish(e->lrc + i);
 	if (e->vm)
 		xe_vm_put(e->vm);
+	if (e->flags & ENGINE_FLAG_VM)
+		xe_device_mem_access_put(gt_to_xe(e->gt));
 
 	kfree(e);
 }
@@ -560,6 +573,9 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			if (XE_IOCTL_DBG(xe, !hwe))
 				return -EINVAL;
 
+			/* The migration vm doesn't hold rpm ref */
+			xe_device_mem_access_get(xe);
+
 			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
 			new = xe_engine_create(xe, migrate_vm, logical_mask,
 					       args->width, hwe,
@@ -568,6 +584,9 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 					       (id ?
 					       ENGINE_FLAG_BIND_ENGINE_CHILD :
 					       0));
+
+			xe_device_mem_access_put(xe); /* now held by engine */
+
 			xe_vm_put(migrate_vm);
 			if (IS_ERR(new)) {
 				err = PTR_ERR(new);
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
index a0cd80cb9d7b1..f1d531735f6d2 100644
--- a/drivers/gpu/drm/xe/xe_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -69,6 +69,7 @@ struct xe_engine {
 #define ENGINE_FLAG_KERNEL		BIT(1)
 #define ENGINE_FLAG_PERSISTENT		BIT(2)
 #define ENGINE_FLAG_COMPUTE_MODE	BIT(3)
+/* Caller needs to hold rpm ref when creating engine with ENGINE_FLAG_VM */
 #define ENGINE_FLAG_VM			BIT(4)
 #define ENGINE_FLAG_BIND_ENGINE_CHILD	BIT(5)
 #define ENGINE_FLAG_WA			BIT(6)
-- 
GitLab


From 0e34fdb4a01a3e615c109694b5adc53590ccda19 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:01 -0700
Subject: [PATCH 1773/2340] drm/xe: Fix checking for unset value

Commit 37430402618d ("drm/xe: NULL binding implementation") introduced
the NULL binding implementation, but left a case in which the out value
is_vram is not set and the caller will use whatever was on stack.
Eventually the is_vram out could be removed, but this should at least
fix the current bug.

Fixes: 37430402618d ("drm/xe: NULL binding implementation")
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 8b75a6145f9bc..612de787c19e6 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -81,8 +81,10 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
 static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 			   size_t page_size, bool *is_vram)
 {
-	if (xe_vma_is_null(vma))
+	if (xe_vma_is_null(vma)) {
+		*is_vram = 0;
 		return 0;
+	}
 
 	if (xe_vma_is_userptr(vma)) {
 		struct xe_res_cursor cur;
-- 
GitLab


From 43b5d81e04773d08df1ed3ff8a40936dca726fda Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 26 Jul 2023 15:25:28 -0700
Subject: [PATCH 1774/2340] drm/xe: fix mcr semaphore locking for MTL

in commit 81593af6c88d ("drm/xe: Convert xe_mmio_wait32 to us so we can
stop using wait_for_us.") the mcr semaphore register read was
accidentally switched from waiting for the register to go to 1 to
waiting for the register to go to 0, so we need to flip it back.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index c56815af06866..9eb7a6a1348d6 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -420,10 +420,11 @@ static void mcr_lock(struct xe_gt *gt)
 	/*
 	 * Starting with MTL we also need to grab a semaphore register
 	 * to synchronize with external agents (e.g., firmware) that now
-	 * shares the same steering control register.
+	 * shares the same steering control register. The semaphore is obtained
+	 * when a read to the relevant register returns 1.
 	 */
 	if (GRAPHICS_VERx100(xe) >= 1270)
-		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL,
+		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
 				     true);
 
 	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
@@ -431,7 +432,7 @@ static void mcr_lock(struct xe_gt *gt)
 
 static void mcr_unlock(struct xe_gt *gt)
 {
-	/* Release hardware semaphore */
+	/* Release hardware semaphore - this is done by writing 1 to the register */
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
 		xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
 
-- 
GitLab


From 621c1fbd9b83fb6a731e0063ad4ea2d89ec20a9c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:02 -0700
Subject: [PATCH 1775/2340] drm/xe: Remove vma arg from xe_pte_encode()

All the callers pass a NULL vma, so the buffer is always the BO. Remove
the argument and the side effects of dealing with it.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c       |  8 ++---
 drivers/gpu/drm/xe/xe_pt.c            | 47 ++++-----------------------
 drivers/gpu/drm/xe/xe_pt.h            |  4 +--
 4 files changed, 13 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index c332dc54cb70e..9e9b228fe3153 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
-	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0);
+	expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0);
 	if (m->eng->vm->flags & XE_VM_FLAG_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index d64567403068f..0e369a05a5e71 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -196,8 +196,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
-		entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
-				      XE_CACHE_WB, 0);
+		entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
 
@@ -215,8 +214,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		for (i = 0; i < batch->size;
 		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
-			entry = xe_pte_encode(NULL, batch, i,
-					      XE_CACHE_WB, 0);
+			entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0);
 
 			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
 				  entry);
@@ -1235,7 +1233,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 			BUG_ON(pt_bo->size != SZ_4K);
 
-			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 0);
+			addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 		}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 612de787c19e6..ef7c258bdd90b 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -78,30 +78,6 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
 	return pde;
 }
 
-static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
-			   size_t page_size, bool *is_vram)
-{
-	if (xe_vma_is_null(vma)) {
-		*is_vram = 0;
-		return 0;
-	}
-
-	if (xe_vma_is_userptr(vma)) {
-		struct xe_res_cursor cur;
-		u64 page;
-
-		*is_vram = false;
-		page = offset >> PAGE_SHIFT;
-		offset &= (PAGE_SIZE - 1);
-
-		xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size,
-				&cur);
-		return xe_res_dma(&cur) + offset;
-	} else {
-		return xe_bo_addr(xe_vma_bo(vma), offset, page_size, is_vram);
-	}
-}
-
 static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
 			struct xe_vma *vma, u32 pt_level)
 {
@@ -140,34 +116,25 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
 
 /**
  * xe_pte_encode() - Encode a page-table entry pointing to memory.
- * @vma: The vma representing the memory to point to.
- * @bo: If @vma is NULL, representing the memory to point to.
- * @offset: The offset into @vma or @bo.
+ * @bo: The BO representing the memory to point to.
+ * @offset: The offset into @bo.
  * @cache: The cache level indicating
  * @pt_level: The page-table level of the page-table into which the entry
  * is to be inserted.
  *
  * Return: An encoded page-table entry. No errors.
  */
-u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
-		  u64 offset, enum xe_cache_level cache,
+u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
 		  u32 pt_level)
 {
 	u64 pte;
 	bool is_vram;
 
-	if (vma)
-		pte = vma_addr(vma, offset, XE_PAGE_SIZE, &is_vram);
-	else
-		pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram);
-
-	if (is_vram) {
+	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram);
+	if (is_vram)
 		pte |= XE_PPGTT_PTE_LM;
-		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
-			pte |= XE_USM_PPGTT_PTE_AE;
-	}
 
-	return __pte_encode(pte, cache, vma, pt_level);
+	return __pte_encode(pte, cache, NULL, pt_level);
 }
 
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -179,7 +146,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 		return 0;
 
 	if (level == 0) {
-		u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0,
+		u64 empty = xe_pte_encode(vm->scratch_bo[id], 0,
 					  XE_CACHE_WB, 0);
 
 		return empty;
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index aaf4b7b851e28..bbb00d6461ffd 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -48,7 +48,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
 u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
 		  const enum xe_cache_level level);
 
-u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
-		  u64 offset, enum xe_cache_level cache,
+u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
 		  u32 pt_level);
+
 #endif
-- 
GitLab


From 937b4be72baaba00fa71a02adac3716332876fa3 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:03 -0700
Subject: [PATCH 1776/2340] drm/xe: Decouple vram check from xe_bo_addr()

The output arg is_vram in xe_bo_addr() is unused by several callers.
It's also not what the function is mainly doing. Remove the argument and
let the interested callers to call xe_bo_is_vram().

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-6-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c      | 15 +++++----------
 drivers/gpu/drm/xe/xe_bo.h      | 10 +++-------
 drivers/gpu/drm/xe/xe_ggtt.c    |  5 ++---
 drivers/gpu/drm/xe/xe_migrate.c | 16 ++++------------
 drivers/gpu/drm/xe/xe_pt.c      | 10 +++-------
 drivers/gpu/drm/xe/xe_vm.c      | 10 ++++++----
 6 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index d89cf93acb61f..d4e60a96ed64a 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1526,12 +1526,11 @@ int xe_bo_pin(struct xe_bo *bo)
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
 	    bo->flags & XE_BO_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
-		bool vram;
 
 		if (mem_type_is_vram(place->mem_type)) {
 			XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
 
-			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) -
+			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
 
@@ -1656,8 +1655,7 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
  * address, such as printing debug information, but not in cases where memory is
  * written based on this result.
  */
-dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
-		      size_t page_size, bool *is_vram)
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
 {
 	struct xe_res_cursor cur;
 	u64 page;
@@ -1666,9 +1664,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
 	page = offset >> PAGE_SHIFT;
 	offset &= (PAGE_SIZE - 1);
 
-	*is_vram = xe_bo_is_vram(bo);
-
-	if (!*is_vram && !xe_bo_is_stolen(bo)) {
+	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
 		XE_BUG_ON(!bo->ttm.ttm);
 
 		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
@@ -1683,12 +1679,11 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
 	}
 }
 
-dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
-		      size_t page_size, bool *is_vram)
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
 {
 	if (!READ_ONCE(bo->ttm.pin_count))
 		xe_bo_assert_held(bo);
-	return __xe_bo_addr(bo, offset, page_size, is_vram);
+	return __xe_bo_addr(bo, offset, page_size);
 }
 
 int xe_bo_vmap(struct xe_bo *bo)
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index e231b2829bef2..b8817e13aeeb6 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -216,17 +216,13 @@ static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
 }
 
 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
-dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset,
-		      size_t page_size, bool *is_vram);
-dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
-		      size_t page_size, bool *is_vram);
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size);
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size);
 
 static inline dma_addr_t
 xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
 {
-	bool is_vram;
-
-	return xe_bo_addr(bo, 0, page_size, &is_vram);
+	return xe_bo_addr(bo, 0, page_size);
 }
 
 static inline u32
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 4468c0ae0f6fd..3eea65bd1bcda 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -31,12 +31,11 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 	u64 pte;
-	bool is_vram;
 
-	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram);
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pte |= XE_PAGE_PRESENT;
 
-	if (is_vram)
+	if (xe_bo_is_vram(bo))
 		pte |= XE_GGTT_PTE_LM;
 
 	/* FIXME: vfunc + pass in caching rules */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 0e369a05a5e71..03f50a14c5c2d 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -132,7 +132,6 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 	struct xe_device *xe = vm->xe;
 	size_t cleared_size;
 	u64 vram_addr;
-	bool is_vram;
 
 	if (!xe_device_has_flat_ccs(xe))
 		return 0;
@@ -147,8 +146,7 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 		return PTR_ERR(m->cleared_bo);
 
 	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
-	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE, &is_vram);
-	XE_BUG_ON(!is_vram);
+	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE);
 	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
 
 	return 0;
@@ -221,15 +219,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 			level++;
 		}
 	} else {
-		bool is_vram;
-		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram);
+		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
 
 		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 
 		if (xe->info.supports_usm) {
 			batch = tile->primary_gt->usm.bb_pool->bo;
-			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE,
-						&is_vram);
+			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
 		}
 	}
@@ -1000,12 +996,8 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	 */
 	XE_BUG_ON(update->qwords > 0x1ff);
 	if (!ppgtt_ofs) {
-		bool is_vram;
-
 		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
-							   XE_PAGE_SIZE,
-							   &is_vram));
-		XE_BUG_ON(!is_vram);
+							   XE_PAGE_SIZE));
 	}
 
 	do {
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ef7c258bdd90b..f69f7dbaca558 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -61,13 +61,10 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
 		  const enum xe_cache_level level)
 {
 	u64 pde;
-	bool is_vram;
 
-	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram);
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
 
-	XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_vram);
-
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
 
 	if (level != XE_CACHE_NONE)
@@ -128,10 +125,9 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
 		  u32 pt_level)
 {
 	u64 pte;
-	bool is_vram;
 
-	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram);
-	if (is_vram)
+	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE);
+	if (xe_bo_is_vram(bo))
 		pte |= XE_PPGTT_PTE_LM;
 
 	return __pte_encode(pte, cache, NULL, pt_level);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 787008bf85e19..205795823555c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3503,9 +3503,10 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 		return 0;
 	}
 	if (vm->pt_root[gt_id]) {
-		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE,
-				  &is_vram);
-		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
+		is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
+		drm_printf(p, " VM root: A:0x%llx %s\n", addr,
+			   is_vram ? "VRAM" : "SYS");
 	}
 
 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
@@ -3526,7 +3527,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 				addr = 0;
 			}
 		} else {
-			addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE, &is_vram);
+			addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
+			is_vram = xe_bo_is_vram(xe_vma_bo(vma));
 		}
 		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
 			   xe_vma_start(vma), xe_vma_end(vma) - 1,
-- 
GitLab


From b23ebae7ab4142ffa53a3d80ba1189d0631994e8 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:04 -0700
Subject: [PATCH 1777/2340] drm/xe: Set PTE_DM bit for stolen on MTL

Integrated graphics 1270 and beyond should set the PTE_LM bit in the PTE
when it's stolen memory. Add a new function, xe_bo_is_stolen_devmem(),
and use it when encoding the PTE.

In some places in the spec the PTE bit is called "Local Memory",
abbreviated as LM, and in others it's called "Device Memory" (DM). Since
we moved away from "Local Memory" and preferred the "vram" terminology,
also rename the macros as DM to follow the name of the new function.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-7-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c      | 15 +++++++++++++++
 drivers/gpu/drm/xe/xe_bo.h      |  5 +++--
 drivers/gpu/drm/xe/xe_ggtt.c    |  4 ++--
 drivers/gpu/drm/xe/xe_migrate.c |  4 ++--
 drivers/gpu/drm/xe/xe_pt.c      | 13 +++++++------
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index d4e60a96ed64a..65b56e7a2fde5 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -80,6 +80,21 @@ bool xe_bo_is_stolen(struct xe_bo *bo)
 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
 }
 
+/**
+ * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
+ * @bo: The BO
+ *
+ * The stolen memory is accessed through the PCI BAR for both DGFX and some
+ * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
+ *
+ * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
+ */
+bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
+{
+	return xe_bo_is_stolen(bo) &&
+		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
+}
+
 static bool xe_bo_is_user(struct xe_bo *bo)
 {
 	return bo->flags & XE_BO_CREATE_USER_BIT;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index b8817e13aeeb6..a9a32d6802084 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -67,9 +67,9 @@
 #define XE_PDPE_PS_1G			BIT_ULL(7)
 #define XE_PDE_IPS_64K			BIT_ULL(11)
 
-#define XE_GGTT_PTE_LM			BIT_ULL(1)
+#define XE_GGTT_PTE_DM			BIT_ULL(1)
 #define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
-#define XE_PPGTT_PTE_LM			BIT_ULL(11)
+#define XE_PPGTT_PTE_DM			BIT_ULL(11)
 #define XE_PDE_64K			BIT_ULL(6)
 #define XE_PTE_PS64			BIT_ULL(8)
 #define XE_PTE_NULL			BIT_ULL(9)
@@ -239,6 +239,7 @@ void xe_bo_vunmap(struct xe_bo *bo);
 bool mem_type_is_vram(u32 mem_type);
 bool xe_bo_is_vram(struct xe_bo *bo);
 bool xe_bo_is_stolen(struct xe_bo *bo);
+bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
 uint64_t vram_region_gpu_offset(struct ttm_resource *res);
 
 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 3eea65bd1bcda..bf46b90a76ad7 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -35,8 +35,8 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pte |= XE_PAGE_PRESENT;
 
-	if (xe_bo_is_vram(bo))
-		pte |= XE_GGTT_PTE_LM;
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_GGTT_PTE_DM;
 
 	/* FIXME: vfunc + pass in caching rules */
 	if (xe->info.platform == XE_METEORLAKE) {
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 03f50a14c5c2d..0405136bc0b1f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -258,7 +258,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		level = 2;
 		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
 		flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED |
-			XE_PPGTT_PTE_LM | XE_PDPE_PS_1G;
+			XE_PPGTT_PTE_DM | XE_PDPE_PS_1G;
 
 		/*
 		 * Use 1GB pages, it shouldn't matter the physical amount of
@@ -463,7 +463,7 @@ static void emit_pte(struct xe_migrate *m,
 				}
 
 				addr += vram_region_gpu_offset(bo->ttm.resource);
-				addr |= XE_PPGTT_PTE_LM;
+				addr |= XE_PPGTT_PTE_DM;
 			}
 			addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW;
 			bb->cs[bb->len++] = lower_32_bits(addr);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index f69f7dbaca558..d9192bf503624 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -127,8 +127,8 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
 	u64 pte;
 
 	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE);
-	if (xe_bo_is_vram(bo))
-		pte |= XE_PPGTT_PTE_LM;
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_PPGTT_PTE_DM;
 
 	return __pte_encode(pte, cache, NULL, pt_level);
 }
@@ -714,7 +714,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
 {
 	struct xe_bo *bo = xe_vma_bo(vma);
-	bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
+	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
+		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
 	struct xe_res_cursor curs;
 	struct xe_pt_stage_bind_walk xe_walk = {
 		.base = {
@@ -728,13 +729,13 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.va_curs_start = xe_vma_start(vma),
 		.vma = vma,
 		.wupd.entries = entries,
-		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_vram,
+		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
 	};
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
 
-	if (is_vram) {
-		xe_walk.default_pte = XE_PPGTT_PTE_LM;
+	if (is_devmem) {
+		xe_walk.default_pte = XE_PPGTT_PTE_DM;
 		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
 			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
 		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
-- 
GitLab


From 58052eb70cdeaaa2a48ec4369e702d097fee13f6 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:06 -0700
Subject: [PATCH 1778/2340] drm/xe: Fix MTL+ stolen memory mapping

Based on commit 8d8d062be6b9 ("drm/i915/mtl: Fix MTL stolen memory GGTT
mapping"). For stolen on MTL and beyond, the address in the PTE is the
offset from DSM base. While at it, update the comments explaining each
part of the calculation.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 21ecc734f10a7..271b3fba41296 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -94,11 +94,22 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 
 	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
 
-	/* check GGMS, should be fixed 0x3 (8MB) */
+	/*
+	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
+	 * GTT size
+	 */
 	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
 		return 0;
 
-	mgr->stolen_base = mgr->io_base = pci_resource_start(pdev, 2) + SZ_8M;
+	/*
+	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
+	 * PTEs, together with the DM flag being set. Previously there was no
+	 * such flag so the address was the io_base.
+	 *
+	 * DSMBASE = GSMBASE + 8MB
+	 */
+	mgr->stolen_base = SZ_8M;
+	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
 
 	/* return valid GMS value, -EIO if invalid */
 	gms = REG_FIELD_GET(GMS_MASK, ggc);
-- 
GitLab


From c0d6b6163fd99c5e73eca3b747e704877e070acc Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:07 -0700
Subject: [PATCH 1779/2340] drm/xe: Carve out top of DSM as reserved

Top of DSM contains the WOPCM where kernel driver shouldn't access as
it contains data from other HW agents. Carve it out from the stolen
memory. On a MTL system, the output now matches the expected values:

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-10-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h      | 3 +++
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index e0734c8f922c6..b344796bb868e 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -97,4 +97,7 @@
 #define DSMBASE					XE_REG(0x1080C0)
 #define   BDSM_MASK				REG_GENMASK64(63, 20)
 
+#define STOLEN_RESERVED				XE_REG(0x1082c0)
+#define   WOPCM_SIZE_MASK			REG_GENMASK64(8, 7)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 271b3fba41296..7bba8fff5a5d9 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -89,7 +89,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	u32 stolen_size;
+	u32 stolen_size, wopcm_size;
 	u32 ggc, gms;
 
 	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
@@ -124,6 +124,12 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 		return 0;
 	}
 
+	/* Carve out the top of DSM as it contains the reserved WOPCM region */
+	wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK,
+				     xe_mmio_read64(xe_root_mmio_gt(xe),
+						    STOLEN_RESERVED));
+	stolen_size -= (1U << wopcm_size) * SZ_1M;
+
 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
 		return 0;
 
-- 
GitLab


From fda48d15a4eade29a41d46d5a6f0bfa7556ccb72 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 26 Jul 2023 09:07:08 -0700
Subject: [PATCH 1780/2340] drm/xe: Sort xe_regs.h

Sort it by register address to make it easy to update when needed.

v2: Do not create exception for registers with same functionality.
Always sort it.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230726160708.3967790-11-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h | 63 ++++++++++++++++---------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index b344796bb868e..25275a36b2808 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,9 +7,6 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define GU_CNTL					XE_REG(0x101010)
-#define   LMEM_INIT				REG_BIT(7)
-
 #define RENDER_RING_BASE			0x02000
 #define BSD_RING_BASE				0x1c0000
 #define BSD2_RING_BASE				0x1c4000
@@ -45,16 +42,13 @@
 #define FF_THREAD_MODE				XE_REG(0x20a0)
 #define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
 
-#define PVC_RP_STATE_CAP			XE_REG(0x281014)
-#define MTL_RP_STATE_CAP			XE_REG(0x138000)
-
-#define MTL_MEDIAP_STATE_CAP			XE_REG(0x138020)
-#define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
-#define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
+#define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
 
-#define MTL_GT_RPE_FREQUENCY			XE_REG(0x13800c)
-#define MTL_MPE_FREQUENCY			XE_REG(0x13802c)
-#define   MTL_RPE_MASK				REG_GENMASK(8, 0)
+#define PCU_IRQ_OFFSET				0x444e0
+#define GU_MISC_IRQ_OFFSET			0x444f0
+#define   GU_MISC_GSE				REG_BIT(27)
 
 #define TRANSCODER_A_OFFSET			0x60000
 #define TRANSCODER_B_OFFSET			0x61000
@@ -71,33 +65,42 @@
 
 #define SOFTWARE_FLAGS_SPR33			XE_REG(0x4f084)
 
-#define PCU_IRQ_OFFSET				0x444e0
-#define GU_MISC_IRQ_OFFSET			0x444f0
-#define   GU_MISC_GSE				REG_BIT(27)
-
-#define GFX_MSTR_IRQ				XE_REG(0x190010)
-#define   MASTER_IRQ				REG_BIT(31)
-#define   GU_MISC_IRQ				REG_BIT(29)
-#define   DISPLAY_IRQ				REG_BIT(16)
-#define   GT_DW_IRQ(x)				REG_BIT(x)
-
-#define DG1_MSTR_TILE_INTR			XE_REG(0x190008)
-#define   DG1_MSTR_IRQ				REG_BIT(31)
-#define   DG1_MSTR_TILE(t)			REG_BIT(t)
-
-#define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
-#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
-#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
+#define GU_CNTL					XE_REG(0x101010)
+#define   LMEM_INIT				REG_BIT(7)
 
 #define GGC					XE_REG(0x108040)
 #define   GMS_MASK				REG_GENMASK(15, 8)
 #define   GGMS_MASK				REG_GENMASK(7, 6)
 
-#define GSMBASE					XE_REG(0x108100)
 #define DSMBASE					XE_REG(0x1080C0)
 #define   BDSM_MASK				REG_GENMASK64(63, 20)
 
+#define GSMBASE					XE_REG(0x108100)
+
 #define STOLEN_RESERVED				XE_REG(0x1082c0)
 #define   WOPCM_SIZE_MASK			REG_GENMASK64(8, 7)
 
+#define MTL_RP_STATE_CAP			XE_REG(0x138000)
+
+#define MTL_GT_RPE_FREQUENCY			XE_REG(0x13800c)
+
+#define MTL_MEDIAP_STATE_CAP			XE_REG(0x138020)
+#define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
+#define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
+
+#define MTL_MPE_FREQUENCY			XE_REG(0x13802c)
+#define   MTL_RPE_MASK				REG_GENMASK(8, 0)
+
+#define DG1_MSTR_TILE_INTR			XE_REG(0x190008)
+#define   DG1_MSTR_IRQ				REG_BIT(31)
+#define   DG1_MSTR_TILE(t)			REG_BIT(t)
+
+#define GFX_MSTR_IRQ				XE_REG(0x190010)
+#define   MASTER_IRQ				REG_BIT(31)
+#define   GU_MISC_IRQ				REG_BIT(29)
+#define   DISPLAY_IRQ				REG_BIT(16)
+#define   GT_DW_IRQ(x)				REG_BIT(x)
+
+#define PVC_RP_STATE_CAP			XE_REG(0x281014)
+
 #endif
-- 
GitLab


From f83a30f466ebbd56355b1f65ec9bcd5087840ffc Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 26 Jul 2023 17:30:42 -0400
Subject: [PATCH 1781/2340] drm/xe: Fix an invalid locking wait context bug

We cannot have spin locks around xe_irq_reset, since it will
call the intel_display_power_is_enabled() function, and
that needs a mutex lock. Hence causing the undesired
"[ BUG: Invalid wait context ]"

We cannot convert i915's power domain lock to spin lock
due to the nested dependency of non-atomic context waits.

So, let's move the xe_irq_reset functions from the
critical area, while still ensuring that we are protecting
the irq.enabled and ensuring the right serialization
in the irq handlers.

v2: On the first version, I had missed the fact that
irq.enabled is checked on the xe/display glue layer,
and that i915 display code is actually using the irq
spin lock properly. So, this got changed to a version
suggested by Matthew Auld.

v3: do not use lockdep_assert for display glue.
    do not save restore irq from inside IRQ or we can
    get bogus irq restore warnings

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/463
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index ca63532433269..69629be07de2b 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -308,6 +308,13 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 	unsigned long intr_dw[2];
 	u32 identity[32];
 
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
 	master_ctl = xelp_intr_disable(xe);
 	if (!master_ctl) {
 		xelp_intr_enable(xe, false);
@@ -366,6 +373,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 
 	/* TODO: This really shouldn't be copied+pasted */
 
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
 	master_tile_ctl = dg1_intr_disable(xe);
 	if (!master_tile_ctl) {
 		dg1_intr_enable(xe, false);
@@ -563,10 +577,14 @@ void xe_irq_shutdown(struct xe_device *xe)
 
 void xe_irq_suspend(struct xe_device *xe)
 {
+	int irq = to_pci_dev(xe->drm.dev)->irq;
+
 	spin_lock_irq(&xe->irq.lock);
-	xe->irq.enabled = false;
-	xe_irq_reset(xe);
+	xe->irq.enabled = false; /* no new irqs */
 	spin_unlock_irq(&xe->irq.lock);
+
+	synchronize_irq(irq); /* flush irqs */
+	xe_irq_reset(xe); /* turn irqs off */
 }
 
 void xe_irq_resume(struct xe_device *xe)
@@ -574,13 +592,15 @@ void xe_irq_resume(struct xe_device *xe)
 	struct xe_gt *gt;
 	int id;
 
-	spin_lock_irq(&xe->irq.lock);
+	/*
+	 * lock not needed:
+	 * 1. no irq will arrive before the postinstall
+	 * 2. display is not yet resumed
+	 */
 	xe->irq.enabled = true;
 	xe_irq_reset(xe);
-	xe_irq_postinstall(xe);
+	xe_irq_postinstall(xe); /* turn irqs on */
 
 	for_each_gt(gt, xe, id)
 		xe_irq_enable_hwe(gt);
-
-	spin_unlock_irq(&xe->irq.lock);
 }
-- 
GitLab


From 063e09af6e1d9a4f26cdd0eb896c19526cb0afd3 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 26 Jul 2023 17:03:52 -0400
Subject: [PATCH 1782/2340] drm/xe: Invert mask and val in xe_mmio_wait32.

The order: 'offset, mask, val'; is more common in other
drivers and in special in i915, where any dev could copy
a sequence and end up with unexpected behavior.

Done with coccinelle:
@rule1@
expression gt, reg, val, mask, timeout, out, atomic;
@@
- xe_mmio_wait32(gt, reg, val, mask, timeout, out, atomic)
+ xe_mmio_wait32(gt, reg, mask, val, timeout, out, atomic)

spatch -sp_file mmio.cocci *.c *.h compat-i915-headers/intel_uncore.h \
       --in-place

v2: Rebased after changes on xe_guc_mcr usage of xe_mmio_wait32.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c |  2 +-
 drivers/gpu/drm/xe/xe_gt.c         |  3 +--
 drivers/gpu/drm/xe/xe_guc.c        | 25 ++++++++++---------------
 drivers/gpu/drm/xe/xe_huc.c        |  3 +--
 drivers/gpu/drm/xe/xe_mmio.h       |  4 ++--
 drivers/gpu/drm/xe/xe_pcode.c      |  2 +-
 drivers/gpu/drm/xe/xe_uc_fw.c      |  2 +-
 7 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 7403673d532dc..aba0784b608ee 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -120,7 +120,7 @@ static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
 static int domain_sleep_wait(struct xe_gt *gt,
 			     struct xe_force_wake_domain *domain)
 {
-	return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0,
 			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
 			      NULL, false);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3e32d38aeeea5..bb7794cf2c1a4 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -456,8 +456,7 @@ static int do_gt_reset(struct xe_gt *gt)
 	int err;
 
 	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
-	err = xe_mmio_wait32(gt, GDRST, 0, GRDOM_FULL, 5000,
-			     NULL, false);
+	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
 	if (err)
 		xe_gt_err(gt, "failed to clear GEN11_GRDOM_FULL (%pe)\n",
 			  ERR_PTR(err));
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 8ae0268387020..2530b6243661b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -290,8 +290,7 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	xe_mmio_write32(gt, GDRST, GRDOM_GUC);
 
-	ret = xe_mmio_wait32(gt, GDRST, 0, GRDOM_GUC, 5000,
-			     &gdrst, false);
+	ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
 	if (ret) {
 		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
 			gdrst);
@@ -386,10 +385,9 @@ static int guc_wait_ucode(struct xe_guc *guc)
 	 * 200ms. Even at slowest clock, this should be sufficient. And
 	 * in the working case, a larger timeout makes no difference.
 	 */
-	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS,
-			     FIELD_PREP(GS_UKERNEL_MASK,
-					XE_GUC_LOAD_STATUS_READY),
-			     GS_UKERNEL_MASK, 200000, &status, false);
+	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK,
+			     FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
+			     200000, &status, false);
 
 	if (ret) {
 		struct drm_device *drm = &xe->drm;
@@ -639,10 +637,9 @@ retry:
 
 	xe_guc_notify(guc);
 
-	ret = xe_mmio_wait32(gt, reply_reg,
-			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
-					GUC_HXG_ORIGIN_GUC),
-			     GUC_HXG_MSG_0_ORIGIN, 50000, &reply, false);
+	ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN,
+			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
+			     50000, &reply, false);
 	if (ret) {
 timeout:
 		drm_err(&xe->drm, "mmio request %#x: no reply %#x\n",
@@ -654,11 +651,9 @@ timeout:
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
 	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
 
-		ret = xe_mmio_wait32(gt, reply_reg,
-				     FIELD_PREP(GUC_HXG_MSG_0_TYPE,
-						GUC_HXG_TYPE_RESPONSE_SUCCESS),
-				     GUC_HXG_MSG_0_TYPE, 1000000, &header,
-				     false);
+		ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_TYPE,
+				     FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+				     1000000, &header, false);
 
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
 			     GUC_HXG_ORIGIN_GUC))
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 373a65c77946b..dc1708b4e94a3 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -85,8 +85,7 @@ int xe_huc_auth(struct xe_huc *huc)
 		goto fail;
 	}
 
-	ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO,
-			     HUC_LOAD_SUCCESSFUL,
+	ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO, HUC_LOAD_SUCCESSFUL,
 			     HUC_LOAD_SUCCESSFUL, 100000, NULL, false);
 	if (ret) {
 		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 4953a9a3f1fb3..d24badca86771 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -107,8 +107,8 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 	return (reg_val & mask) != eval ? -EINVAL : 0;
 }
 
-static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 val,
-				 u32 mask, u32 timeout_us, u32 *out_val,
+static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask,
+				 u32 val, u32 timeout_us, u32 *out_val,
 				 bool atomic)
 {
 	ktime_t cur = ktime_get_raw();
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index e3ab1d3a367fe..7f1bf2297f51e 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -68,7 +68,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
 	xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0);
 	xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox);
 
-	err = xe_mmio_wait32(gt, PCODE_MAILBOX, 0, PCODE_READY,
+	err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0,
 			     timeout_ms * 1000, NULL, atomic);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 75f7a4cf6cbe8..5801c10f3ccc4 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -484,7 +484,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
 
 	/* Wait for DMA to finish */
-	ret = xe_mmio_wait32(gt, DMA_CTRL, 0, START_DMA, 100000, &dma_ctrl,
+	ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
 			     false);
 	if (ret)
 		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
-- 
GitLab


From 4f027e304a6c7ae77150965d10b8a1edee0398a2 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Thu, 27 Jul 2023 04:56:49 +0530
Subject: [PATCH 1783/2340] drm/xe: Notify Userspace when gt reset fails

Send uevent in case of gt reset failure. This intimation can be used by
userspace monitoring tool to do the device level reset/reboot
when GT reset fails. udevadm can be used to monitor the uevents.

v2:
- Support only gt failure notification (Rodrigo)

v3
- Rectify the comments in header file.

v4
- Use pci kobj instead of drm kobj for notification.(Rodrigo)
- Cleanup (Badal)

v5
- Add tile id and gt id as additional info provided by uevent.
- Provide code documentation for the uevent. (Rodrigo)

Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 19 +++++++++++++++++++
 include/uapi/drm/xe_drm.h  | 10 ++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index bb7794cf2c1a4..82b9874040706 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -8,6 +8,7 @@
 #include <linux/minmax.h>
 
 #include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
 
 #include "regs/xe_gt_regs.h"
 #include "xe_bb.h"
@@ -499,6 +500,20 @@ static int do_gt_restart(struct xe_gt *gt)
 	return 0;
 }
 
+static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_id)
+{
+	char *reset_event[4];
+
+	reset_event[0] = XE_RESET_FAILED_UEVENT "=NEEDS_RESET";
+	reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id);
+	reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id);
+	reset_event[3] = NULL;
+	kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, reset_event);
+
+	kfree(reset_event[1]);
+	kfree(reset_event[2]);
+}
+
 static int gt_reset(struct xe_gt *gt)
 {
 	int err;
@@ -549,6 +564,10 @@ err_msg:
 	xe_device_mem_access_put(gt_to_xe(gt));
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
+	/* Notify userspace about gt reset failure */
+	xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev),
+				   gt_to_tile(gt)->id, gt->info.id);
+
 	return err;
 }
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 259de80376b4a..3d09e9e9267b5 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -16,6 +16,16 @@ extern "C" {
  * subject to backwards-compatibility constraints.
  */
 
+/**
+ * DOC: uevent generated by xe on it's pci node.
+ *
+ * XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
+ * fails. The value supplied with the event is always "NEEDS_RESET".
+ * Additional information supplied is tile id and gt id of the gt unit for
+ * which reset has failed.
+ */
+#define XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
+
 /**
  * struct xe_user_extension - Base class for defining a chain of extensions
  *
-- 
GitLab


From 8f3013e0b22206b27f37dcf1b96ce68df3393040 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Thu, 27 Jul 2023 04:56:50 +0530
Subject: [PATCH 1784/2340] drm/xe: Introduce fault injection for gt reset

To trigger gt reset failure:
 echo 100 >  /sys/kernel/debug/dri/<cardX>/fail_gt_reset/probability
 echo 2 >  /sys/kernel/debug/dri/<cardX>/fail_gt_reset/times

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c | 10 ++++++++++
 drivers/gpu/drm/xe/xe_gt.c      |  8 +++++++-
 drivers/gpu/drm/xe/xe_gt.h      | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 491506a1e12e5..2de8a0b9da183 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -5,6 +5,7 @@
 
 #include "xe_debugfs.h"
 
+#include <linux/fault-inject.h>
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
@@ -20,6 +21,10 @@
 #include "xe_vm.h"
 #endif
 
+#ifdef CONFIG_FAULT_INJECTION
+DECLARE_FAULT_ATTR(gt_reset_failure);
+#endif
+
 static struct xe_device *node_to_xe(struct drm_info_node *node)
 {
 	return to_xe_device(node->minor->dev);
@@ -135,4 +140,9 @@ void xe_debugfs_register(struct xe_device *xe)
 
 	for_each_gt(gt, xe, id)
 		xe_gt_debugfs_register(gt);
+
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
+#endif
+
 }
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 82b9874040706..28bf577c7bf2e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -524,6 +524,11 @@ static int gt_reset(struct xe_gt *gt)
 
 	xe_gt_info(gt, "reset started\n");
 
+	if (xe_fault_inject_gt_reset()) {
+		err = -ECANCELED;
+		goto err_fail;
+	}
+
 	xe_gt_sanitize(gt);
 
 	xe_device_mem_access_get(gt_to_xe(gt));
@@ -562,6 +567,7 @@ err_out:
 err_msg:
 	XE_WARN_ON(xe_uc_start(&gt->uc));
 	xe_device_mem_access_put(gt_to_xe(gt));
+err_fail:
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
 	/* Notify userspace about gt reset failure */
@@ -583,7 +589,7 @@ void xe_gt_reset_async(struct xe_gt *gt)
 	xe_gt_info(gt, "trying reset\n");
 
 	/* Don't do a reset while one is already in flight */
-	if (xe_uc_reset_prepare(&gt->uc))
+	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
 		return;
 
 	xe_gt_info(gt, "reset queued\n");
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 7298653a73de3..caded203a8a03 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -7,6 +7,7 @@
 #define _XE_GT_H_
 
 #include <drm/drm_util.h>
+#include <linux/fault-inject.h>
 
 #include "xe_device_types.h"
 #include "xe_hw_engine.h"
@@ -16,6 +17,19 @@
 		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
 			  xe_hw_engine_is_valid((hwe__)))
 
+#ifdef CONFIG_FAULT_INJECTION
+extern struct fault_attr gt_reset_failure;
+static inline bool xe_fault_inject_gt_reset(void)
+{
+	return should_fail(&gt_reset_failure, 1);
+}
+#else
+static inline bool xe_fault_inject_gt_reset(void)
+{
+	return false;
+}
+#endif
+
 struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
 int xe_gt_init_early(struct xe_gt *gt);
 int xe_gt_init(struct xe_gt *gt);
-- 
GitLab


From f026520367be5f7e05531d6e601c822596ebe65f Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 25 Jul 2023 18:11:55 -0400
Subject: [PATCH 1785/2340] drm/xe: Only set PCI d3cold_allowed when we are
 really allowing.

First of all it was strange to see:
if (allowed) {
...
} else {
   D3COLD_ENABLE
}

But besides this misalignment, let's also use the pci
d3cold_allowed useful to us and know that we are not really
allowing d3cold.

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index ae6e1394ff31f..1a79c6a7dd5ee 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -766,6 +766,7 @@ static int xe_pci_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 
 	if (xe->d3cold.allowed) {
+		d3cold_toggle(pdev, D3COLD_ENABLE);
 		pci_disable_device(pdev);
 		pci_ignore_hotplug(pdev);
 		pci_set_power_state(pdev, PCI_D3cold);
@@ -795,8 +796,6 @@ static int xe_pci_runtime_resume(struct device *dev)
 			return err;
 
 		pci_set_master(pdev);
-	} else {
-		d3cold_toggle(pdev, D3COLD_ENABLE);
 	}
 
 	return xe_pm_runtime_resume(xe);
-- 
GitLab


From e07aa913161b0338708887a5e78bf57ffdfe67fa Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 25 Jul 2023 18:11:56 -0400
Subject: [PATCH 1786/2340] drm/xe: Move d3cold_allowed decision all together.

And let's use the VRAM threshold to keep d3cold temporarily disabled.

With this we have the ability to run D3Cold experiments just by
touching the vram_d3cold_threshold sysfs entry.

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 15 +--------------
 drivers/gpu/drm/xe/xe_pm.c  |  5 +++++
 drivers/gpu/drm/xe/xe_pm.h  |  7 ++++++-
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1a79c6a7dd5ee..4cbacc80594bc 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -806,20 +806,7 @@ static int xe_pci_runtime_idle(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct xe_device *xe = pdev_to_xe_device(pdev);
 
-	if (!xe->d3cold.capable) {
-		xe->d3cold.allowed = false;
-	} else {
-		xe_pm_d3cold_allowed_toggle(xe);
-
-		/*
-		 * TODO: d3cold should be allowed (true) if
-		 * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
-		 * but maybe include some other conditions. So, before
-		 * we can re-enable the D3cold, we need to:
-		 * 1. rewrite the VRAM save / restore to avoid buffer object locks
-		 */
-		xe->d3cold.allowed = false;
-	}
+	xe_pm_d3cold_allowed_toggle(xe);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index cb2a00ea28e30..1c62900d29d88 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -342,6 +342,11 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
 	u64 vram_used;
 	int i;
 
+	if (!xe->d3cold.capable) {
+		xe->d3cold.allowed = false;
+		return;
+	}
+
 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
 		man = ttm_manager_type(&xe->ttm, i);
 		if (man) {
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 08a633ce5145d..6b9031f7af243 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -8,7 +8,12 @@
 
 #include <linux/pm_runtime.h>
 
-#define DEFAULT_VRAM_THRESHOLD 300 /* in MB */
+/*
+ * TODO: Threshold = 0 will block D3Cold.
+ *       Before we can move this to a higher value (like 300), we need to:
+ *           1. rewrite the VRAM save / restore to avoid buffer object locks
+ */
+#define DEFAULT_VRAM_THRESHOLD 0 /* in MB */
 
 struct xe_device;
 
-- 
GitLab


From bba2ec4144f5a7683d9a26cafffca6031361ee66 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 25 Jul 2023 18:11:57 -0400
Subject: [PATCH 1787/2340] drm/xe: Fix the runtime_idle call and
 d3cold.allowed decision.

According to Documentation/power/runtime_pm.txt:

int pm_runtime_put(struct device *dev);
    - decrement the device's usage counter; if the result is 0 then run
      pm_request_idle(dev) and return its result

int pm_runtime_put_autosuspend(struct device *dev);
    - decrement the device's usage counter; if the result is 0 then run
      pm_request_autosuspend(dev) and return its result

We need to ensure that the idle function is called before suspending
so we take the right d3cold.allowed decision and respect the values
set on vram_d3cold_threshold sysfs. So we need pm_runtime_put()
instead of pm_runtime_put_autosuspend().

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Tested-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 1c62900d29d88..310e413c91a43 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -133,7 +133,7 @@ static void xe_pm_runtime_init(struct xe_device *xe)
 	pm_runtime_set_active(dev);
 	pm_runtime_allow(dev);
 	pm_runtime_mark_last_busy(dev);
-	pm_runtime_put_autosuspend(dev);
+	pm_runtime_put(dev);
 }
 
 void xe_pm_init(struct xe_device *xe)
@@ -289,7 +289,7 @@ int xe_pm_runtime_get(struct xe_device *xe)
 int xe_pm_runtime_put(struct xe_device *xe)
 {
 	pm_runtime_mark_last_busy(xe->drm.dev);
-	return pm_runtime_put_autosuspend(xe->drm.dev);
+	return pm_runtime_put(xe->drm.dev);
 }
 
 int xe_pm_runtime_get_if_active(struct xe_device *xe)
-- 
GitLab


From a32d82b4cfd63a9bc198bd9faa54844b8d04c5d3 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 25 Jul 2023 18:11:58 -0400
Subject: [PATCH 1788/2340] drm/xe: Only init runtime PM after all d3cold
 config is in place.

We cannot allow runtime pm suspend after we configured the
d3cold capable and threshold.

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 310e413c91a43..a20a2fb34a7df 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -141,10 +141,12 @@ void xe_pm_init(struct xe_device *xe)
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
 	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
-	xe_pm_runtime_init(xe);
+
 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
 	xe_device_sysfs_init(xe);
 	xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+
+	xe_pm_runtime_init(xe);
 }
 
 void xe_pm_runtime_fini(struct xe_device *xe)
-- 
GitLab


From d87c424afaf62f11ded6e66b4bdfbd5f5da8b330 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 25 Jul 2023 18:11:59 -0400
Subject: [PATCH 1789/2340] drm/xe: Ensure memory eviction on s2idle.

On discrete cards we cannot allow the pci subsystem to skip
the regular suspend and we need to unblock the d3cold.

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 54 ++++++++++++++++++++++---------------
 drivers/gpu/drm/xe/xe_pm.c  | 11 ++++++++
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 4cbacc80594bc..6e31b596683e7 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -30,6 +30,28 @@ enum toggle_d3cold {
 	D3COLD_ENABLE,
 };
 
+static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct pci_dev *root_pdev;
+
+	if (!xe->d3cold.capable)
+		return;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return;
+
+	switch (toggle) {
+	case D3COLD_DISABLE:
+		pci_d3cold_disable(root_pdev);
+		break;
+	case D3COLD_ENABLE:
+		pci_d3cold_enable(root_pdev);
+		break;
+	}
+}
+
 struct xe_subplatform_desc {
 	enum xe_subplatform subplatform;
 	const char *name;
@@ -697,6 +719,13 @@ static int xe_pci_suspend(struct device *dev)
 	if (err)
 		return err;
 
+	/*
+	 * Enabling D3Cold is needed for S2Idle/S0ix.
+	 * It is save to allow here since xe_pm_suspend has evicted
+	 * the local memory and the direct complete optimization is disabled.
+	 */
+	d3cold_toggle(pdev, D3COLD_ENABLE);
+
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 
@@ -712,6 +741,9 @@ static int xe_pci_resume(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	int err;
 
+	/* Give back the D3Cold decision to the runtime P M*/
+	d3cold_toggle(pdev, D3COLD_DISABLE);
+
 	err = pci_set_power_state(pdev, PCI_D0);
 	if (err)
 		return err;
@@ -731,28 +763,6 @@ static int xe_pci_resume(struct device *dev)
 	return 0;
 }
 
-static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
-{
-	struct xe_device *xe = pdev_to_xe_device(pdev);
-	struct pci_dev *root_pdev;
-
-	if (!xe->d3cold.capable)
-		return;
-
-	root_pdev = pcie_find_root_port(pdev);
-	if (!root_pdev)
-		return;
-
-	switch (toggle) {
-	case D3COLD_DISABLE:
-		pci_d3cold_disable(root_pdev);
-		break;
-	case D3COLD_ENABLE:
-		pci_d3cold_enable(root_pdev);
-		break;
-	}
-}
-
 static int xe_pci_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index a20a2fb34a7df..cdde0d87fd9f4 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -128,6 +128,17 @@ static void xe_pm_runtime_init(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
 
+	/*
+	 * Disable the system suspend direct complete optimization.
+	 * We need to ensure that the regular device suspend/resume functions
+	 * are called since our runtime_pm cannot guarantee local memory
+	 * eviction for d3cold.
+	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
+	 *       this option to integrated graphics as well.
+	 */
+	if (IS_DGFX(xe))
+		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
 	pm_runtime_use_autosuspend(dev);
 	pm_runtime_set_autosuspend_delay(dev, 1000);
 	pm_runtime_set_active(dev);
-- 
GitLab


From fe58a2432b0d07cf56704ecf1ca5e52e6c1e8fff Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 28 Jul 2023 10:56:02 -0700
Subject: [PATCH 1790/2340] drm/xe/mtl: Reduce Wa_14018575942 scope to the CCS
 engine

The MTL version of Wa_14018575942 has been updated to suggest only
applying the register change on the CCS engine.

Note that DG2 and PVC have a functionally equivalent workaround with
Wa_18018781329; for now that one is still applying to all engines,
although we'll keep an eye on it in case it changes to be CCS-specific
too.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20230728175601.2343755-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 21087f7a4609f..36c80e9fb7586 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -238,21 +238,13 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	},
 	{ XE_RTP_NAME("14018575942"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
-	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
-			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
+	  XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
 	},
 	{ XE_RTP_NAME("22016670082"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
 	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
 	},
 
-	/* Xe_LPM+ */
-	{ XE_RTP_NAME("14018575942"),
-	  XE_RTP_RULES(MEDIA_VERSION(1300)),
-	  XE_RTP_ACTIONS(SET(XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
-			 SET(XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
-	},
-
 	{}
 };
 
-- 
GitLab


From 757d9fdfe3db4de6ed5ef9961a301e5be7b2cd74 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 27 Jul 2023 19:00:14 -0700
Subject: [PATCH 1791/2340] drm/xe: Remove XE_GUC_CT_SELFTEST

XE_GUC_CT_SELFTEST enabled a debugfs entry to which ran a very simple
selftest ensuring the GuC CT code worked. This was added before the
kunit framework was available and before submissions were working too.
This test isn't worth porting over to the kunit frame as if the GuC CT
didn't work, literally almost nothing would work so just remove this.

Suggested-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 65 ----------------------------
 drivers/gpu/drm/xe/xe_guc_ct.h       |  9 ----
 drivers/gpu/drm/xe/xe_guc_ct_types.h |  6 ---
 drivers/gpu/drm/xe/xe_guc_debugfs.c  | 18 --------
 4 files changed, 98 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index cb75db30800ce..3f5084c6ffc86 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1310,68 +1310,3 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
 	xe_guc_ct_snapshot_print(snapshot, p);
 	xe_guc_ct_snapshot_free(snapshot);
 }
-
-#ifdef XE_GUC_CT_SELFTEST
-/*
- * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
- * control if enough sent, 8k sends is enough. Verify forward process, verify
- * credits expected values on exit.
- */
-void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
-{
-	struct guc_ctb *g2h = &ct->ctbs.g2h;
-	u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, };
-	u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, };
-	int ret;
-	int i;
-
-	ct->suppress_irq_handler = true;
-	drm_puts(p, "Starting GuC CT selftest\n");
-
-	for (i = 0; i < 8192; ++i) {
-		ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
-		if (ret) {
-			drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
-			xe_guc_ct_print(ct, p, true);
-			break;
-		}
-	}
-
-	ct->suppress_irq_handler = false;
-	if (!ret) {
-		xe_guc_ct_irq_handler(ct);
-		msleep(200);
-		if (g2h->info.space !=
-		    CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) {
-			drm_printf(p, "Mismatch on space %d, %d\n",
-				   g2h->info.space,
-				   CIRC_SPACE(0, 0, g2h->info.size) -
-				   g2h->info.resv_space);
-			ret = -EIO;
-		}
-		if (ct->g2h_outstanding) {
-			drm_printf(p, "Outstanding G2H, %d\n",
-				   ct->g2h_outstanding);
-			ret = -EIO;
-		}
-	}
-
-	/* Check failure path for blocking CTs too */
-	xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action));
-	if (g2h->info.space !=
-	    CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) {
-		drm_printf(p, "Mismatch on space %d, %d\n",
-			   g2h->info.space,
-			   CIRC_SPACE(0, 0, g2h->info.size) -
-			   g2h->info.resv_space);
-		ret = -EIO;
-	}
-	if (ct->g2h_outstanding) {
-		drm_printf(p, "Outstanding G2H, %d\n",
-			   ct->g2h_outstanding);
-		ret = -EIO;
-	}
-
-	drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS");
-}
-#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 3e04ee64652c1..f15f8a4857e07 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -25,13 +25,8 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
 static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 {
 	wake_up_all(&ct->wq);
-#ifdef XE_GUC_CT_SELFTEST
-	if (!ct->suppress_irq_handler && ct->enabled)
-		queue_work(system_unbound_wq, &ct->g2h_worker);
-#else
 	if (ct->enabled)
 		queue_work(system_unbound_wq, &ct->g2h_worker);
-#endif
 	xe_guc_ct_fast_path(ct);
 }
 
@@ -61,8 +56,4 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
 	return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
 }
 
-#ifdef XE_GUC_CT_SELFTEST
-void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p);
-#endif
-
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 93046d95b009e..dedbd686428ba 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -14,8 +14,6 @@
 
 #include "abi/guc_communication_ctb_abi.h"
 
-#define XE_GUC_CT_SELFTEST
-
 struct xe_bo;
 
 /**
@@ -110,10 +108,6 @@ struct xe_guc_ct {
 	wait_queue_head_t wq;
 	/** @g2h_fence_wq: wait queue used for G2H fencing */
 	wait_queue_head_t g2h_fence_wq;
-#ifdef XE_GUC_CT_SELFTEST
-	/** @suppress_irq_handler: force flow control to sender */
-	bool suppress_irq_handler;
-#endif
 	/** @msg: Message buffer */
 	u32 msg[GUC_CTB_MSG_MAX_LEN];
 	/** @fast_msg: Message buffer */
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index b43c70de7e373..167eb5593e03c 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -58,27 +58,9 @@ static int guc_log(struct seq_file *m, void *data)
 	return 0;
 }
 
-#ifdef XE_GUC_CT_SELFTEST
-static int guc_ct_selftest(struct seq_file *m, void *data)
-{
-	struct xe_guc *guc = node_to_guc(m->private);
-	struct xe_device *xe = guc_to_xe(guc);
-	struct drm_printer p = drm_seq_file_printer(m);
-
-	xe_device_mem_access_get(xe);
-	xe_guc_ct_selftest(&guc->ct, &p);
-	xe_device_mem_access_put(xe);
-
-	return 0;
-}
-#endif
-
 static const struct drm_info_list debugfs_list[] = {
 	{"guc_info", guc_info, 0},
 	{"guc_log", guc_log, 0},
-#ifdef XE_GUC_CT_SELFTEST
-	{"guc_ct_selftest", guc_ct_selftest, 0},
-#endif
 };
 
 void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
-- 
GitLab


From 8d7a91fe58c982a7709fabb53a51d87dbf94f6e9 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 27 Jul 2023 19:10:51 -0700
Subject: [PATCH 1792/2340] drm/xe: Remove ct->fence_context

This is unused, remove it.

Suggested-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 1 -
 drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 --
 2 files changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 3f5084c6ffc86..d322eadbe75a5 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -140,7 +140,6 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 	mutex_init(&ct->lock);
 	spin_lock_init(&ct->fast_lock);
 	xa_init(&ct->fence_lookup);
-	ct->fence_context = dma_fence_context_alloc(1);
 	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
 	init_waitqueue_head(&ct->wq);
 	init_waitqueue_head(&ct->g2h_fence_wq);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index dedbd686428ba..d814d4ee3fc65 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -100,8 +100,6 @@ struct xe_guc_ct {
 	bool enabled;
 	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
 	u32 fence_seqno;
-	/** @fence_context: context for G2H fence */
-	u64 fence_context;
 	/** @fence_lookup: G2H fence lookup */
 	struct xarray fence_lookup;
 	/** @wq: wait queue used for reliable CT sends and freeing G2H credits */
-- 
GitLab


From e3828ebf6cde583b76143e283f8c4a4e8a252145 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 27 Jul 2023 19:36:00 -0700
Subject: [PATCH 1793/2340] drm/xe: Add define WQ_HEADER_SIZE

Previously used a a magic '+ 3', use define instead.

Suggested-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 99c9b7139195e..a2eeb3ffe5489 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -558,8 +558,9 @@ static void wq_item_append(struct xe_engine *e)
 	struct xe_guc *guc = engine_to_guc(e);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
-	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3];
-	u32 wqi_size = (e->width + 3) * sizeof(u32);
+#define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
+	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
+	u32 wqi_size = (e->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
 	int i = 0, j;
 
-- 
GitLab


From 3207a32163cdf7b3345a44e255aae614859ea0d6 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 27 Jul 2023 14:55:28 +0000
Subject: [PATCH 1794/2340] drm/xe/macro: Remove unused constant

Remove XE_EXTRA_DEBUG for cleanup as it is not used.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_macros.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
index 038cf28604add..a7105050bce02 100644
--- a/drivers/gpu/drm/xe/xe_macros.h
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -8,7 +8,6 @@
 
 #include <linux/bug.h>
 
-#define XE_EXTRA_DEBUG 1
 #define XE_WARN_ON WARN_ON
 #define XE_BUG_ON BUG_ON
 
-- 
GitLab


From 99fea6828879381405dba598627aea79fa6edd78 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 27 Jul 2023 14:55:29 +0000
Subject: [PATCH 1795/2340] drm/xe: Prefer WARN() over BUG() to avoid crashing
 the kernel

Replace calls to XE_BUG_ON() with calls XE_WARN_ON() which in turn calls
WARN() instead of BUG(). BUG() crashes the kernel and should only be
used when it is absolutely unavoidable in case of catastrophic and
unrecoverable failures, which is not the case here.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c                  |  8 +--
 drivers/gpu/drm/xe/xe_bo.c                  | 52 +++++++++----------
 drivers/gpu/drm/xe/xe_bo.h                  | 14 +++---
 drivers/gpu/drm/xe/xe_bo_evict.c            |  4 +-
 drivers/gpu/drm/xe/xe_execlist.c            | 14 +++---
 drivers/gpu/drm/xe/xe_force_wake.c          |  4 +-
 drivers/gpu/drm/xe/xe_force_wake.h          |  4 +-
 drivers/gpu/drm/xe/xe_ggtt.c                | 12 ++---
 drivers/gpu/drm/xe/xe_gt_clock.c            |  4 +-
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |  2 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++---
 drivers/gpu/drm/xe/xe_guc.c                 | 22 ++++----
 drivers/gpu/drm/xe/xe_guc_ads.c             | 14 +++---
 drivers/gpu/drm/xe/xe_guc_ct.c              | 22 ++++----
 drivers/gpu/drm/xe/xe_guc_hwconfig.c        |  2 +-
 drivers/gpu/drm/xe/xe_guc_log.c             |  4 +-
 drivers/gpu/drm/xe/xe_guc_submit.c          | 48 +++++++++---------
 drivers/gpu/drm/xe/xe_huc.c                 |  2 +-
 drivers/gpu/drm/xe/xe_hw_engine.c           | 10 ++--
 drivers/gpu/drm/xe/xe_hw_fence.c            |  2 +-
 drivers/gpu/drm/xe/xe_lrc.c                 |  8 +--
 drivers/gpu/drm/xe/xe_macros.h              |  1 -
 drivers/gpu/drm/xe/xe_migrate.c             | 30 +++++------
 drivers/gpu/drm/xe/xe_pt.c                  | 22 ++++----
 drivers/gpu/drm/xe/xe_res_cursor.h          | 12 ++---
 drivers/gpu/drm/xe/xe_ring_ops.c            |  8 +--
 drivers/gpu/drm/xe/xe_sched_job.c           |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c      |  4 +-
 drivers/gpu/drm/xe/xe_uc_fw.c               | 16 +++---
 drivers/gpu/drm/xe/xe_uc_fw.h               |  2 +-
 drivers/gpu/drm/xe/xe_vm.c                  | 56 ++++++++++-----------
 drivers/gpu/drm/xe/xe_wait_user_fence.c     |  2 +-
 drivers/gpu/drm/xe/xe_wopcm.c               | 18 +++----
 33 files changed, 218 insertions(+), 219 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index f9b6b7adf99ff..b15a7cb7db4cb 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -78,7 +78,7 @@ struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
 {
 	u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
 
-	XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+	XE_WARN_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
 
 	return __xe_bb_create_job(wa_eng, bb, &addr);
 }
@@ -94,8 +94,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
 		4 * second_idx,
 	};
 
-	BUG_ON(second_idx > bb->len);
-	BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+	XE_WARN_ON(second_idx > bb->len);
+	XE_WARN_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION));
 
 	return __xe_bb_create_job(kernel_eng, bb, addr);
 }
@@ -105,7 +105,7 @@ struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
 {
 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
 
-	BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION);
+	XE_WARN_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION);
 	return __xe_bb_create_job(kernel_eng, bb, &addr);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65b56e7a2fde5..cf0faaefd03d4 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -103,7 +103,7 @@ static bool xe_bo_is_user(struct xe_bo *bo)
 static struct xe_tile *
 mem_type_to_tile(struct xe_device *xe, u32 mem_type)
 {
-	XE_BUG_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type));
+	XE_WARN_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type));
 
 	return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
 }
@@ -142,7 +142,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 	struct ttm_place place = { .mem_type = mem_type };
 	u64 io_size = tile->mem.vram.io_size;
 
-	XE_BUG_ON(!tile->mem.vram.usable_size);
+	XE_WARN_ON(!tile->mem.vram.usable_size);
 
 	/*
 	 * For eviction / restore on suspend / resume objects
@@ -285,7 +285,7 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
 	unsigned long num_pages = tt->num_pages;
 	int ret;
 
-	XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
 
 	if (xe_tt->sg)
 		return 0;
@@ -544,8 +544,8 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
 					       ttm);
 	struct sg_table *sg;
 
-	XE_BUG_ON(!attach);
-	XE_BUG_ON(!ttm_bo->ttm);
+	XE_WARN_ON(!attach);
+	XE_WARN_ON(!ttm_bo->ttm);
 
 	if (new_res->mem_type == XE_PL_SYSTEM)
 		goto out;
@@ -707,8 +707,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	else if (mem_type_is_vram(old_mem_type))
 		tile = mem_type_to_tile(xe, old_mem_type);
 
-	XE_BUG_ON(!tile);
-	XE_BUG_ON(!tile->migrate);
+	XE_WARN_ON(!tile);
+	XE_WARN_ON(!tile->migrate);
 
 	trace_xe_bo_move(bo);
 	xe_device_mem_access_get(xe);
@@ -738,7 +738,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 					goto out;
 				}
 
-				XE_BUG_ON(new_mem->start !=
+				XE_WARN_ON(new_mem->start !=
 					  bo->placements->fpfn);
 
 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
@@ -1198,7 +1198,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	int err;
 
 	/* Only kernel objects should set GT */
-	XE_BUG_ON(tile && type != ttm_bo_type_kernel);
+	XE_WARN_ON(tile && type != ttm_bo_type_kernel);
 
 	if (XE_WARN_ON(!size))
 		return ERR_PTR(-EINVAL);
@@ -1350,7 +1350,7 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
 			tile = xe_device_get_root_tile(xe);
 
-		XE_BUG_ON(!tile);
+		XE_WARN_ON(!tile);
 
 		if (flags & XE_BO_CREATE_STOLEN_BIT &&
 		    flags & XE_BO_FIXED_PLACEMENT_BIT) {
@@ -1481,8 +1481,8 @@ int xe_bo_pin_external(struct xe_bo *bo)
 	struct xe_device *xe = xe_bo_device(bo);
 	int err;
 
-	XE_BUG_ON(bo->vm);
-	XE_BUG_ON(!xe_bo_is_user(bo));
+	XE_WARN_ON(bo->vm);
+	XE_WARN_ON(!xe_bo_is_user(bo));
 
 	if (!xe_bo_is_pinned(bo)) {
 		err = xe_bo_validate(bo, NULL, false);
@@ -1514,20 +1514,20 @@ int xe_bo_pin(struct xe_bo *bo)
 	int err;
 
 	/* We currently don't expect user BO to be pinned */
-	XE_BUG_ON(xe_bo_is_user(bo));
+	XE_WARN_ON(xe_bo_is_user(bo));
 
 	/* Pinned object must be in GGTT or have pinned flag */
-	XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT |
+	XE_WARN_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT |
 				 XE_BO_CREATE_GGTT_BIT)));
 
 	/*
 	 * No reason we can't support pinning imported dma-bufs we just don't
 	 * expect to pin an imported dma-buf.
 	 */
-	XE_BUG_ON(bo->ttm.base.import_attach);
+	XE_WARN_ON(bo->ttm.base.import_attach);
 
 	/* We only expect at most 1 pin */
-	XE_BUG_ON(xe_bo_is_pinned(bo));
+	XE_WARN_ON(xe_bo_is_pinned(bo));
 
 	err = xe_bo_validate(bo, NULL, false);
 	if (err)
@@ -1543,7 +1543,7 @@ int xe_bo_pin(struct xe_bo *bo)
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
-			XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
+			XE_WARN_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
 
 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
@@ -1580,9 +1580,9 @@ void xe_bo_unpin_external(struct xe_bo *bo)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 
-	XE_BUG_ON(bo->vm);
-	XE_BUG_ON(!xe_bo_is_pinned(bo));
-	XE_BUG_ON(!xe_bo_is_user(bo));
+	XE_WARN_ON(bo->vm);
+	XE_WARN_ON(!xe_bo_is_pinned(bo));
+	XE_WARN_ON(!xe_bo_is_user(bo));
 
 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
 		spin_lock(&xe->pinned.lock);
@@ -1603,15 +1603,15 @@ void xe_bo_unpin(struct xe_bo *bo)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 
-	XE_BUG_ON(bo->ttm.base.import_attach);
-	XE_BUG_ON(!xe_bo_is_pinned(bo));
+	XE_WARN_ON(bo->ttm.base.import_attach);
+	XE_WARN_ON(!xe_bo_is_pinned(bo));
 
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
 	    bo->flags & XE_BO_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
-			XE_BUG_ON(list_empty(&bo->pinned_link));
+			XE_WARN_ON(list_empty(&bo->pinned_link));
 
 			spin_lock(&xe->pinned.lock);
 			list_del_init(&bo->pinned_link);
@@ -1675,12 +1675,12 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
 	struct xe_res_cursor cur;
 	u64 page;
 
-	XE_BUG_ON(page_size > PAGE_SIZE);
+	XE_WARN_ON(page_size > PAGE_SIZE);
 	page = offset >> PAGE_SHIFT;
 	offset &= (PAGE_SIZE - 1);
 
 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
-		XE_BUG_ON(!bo->ttm.ttm);
+		XE_WARN_ON(!bo->ttm.ttm);
 
 		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
 				page_size, &cur);
@@ -1874,7 +1874,7 @@ int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
 	LIST_HEAD(objs);
 	LIST_HEAD(dups);
 
-	XE_BUG_ON(!ww);
+	XE_WARN_ON(!ww);
 
 	tv_bo.num_shared = num_resv;
 	tv_bo.bo = &bo->ttm;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index a9a32d6802084..76b8c836deb74 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -166,7 +166,7 @@ void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
 static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
 {
 	if (bo) {
-		XE_BUG_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm));
+		XE_WARN_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm));
 		if (bo->vm)
 			xe_vm_assert_held(bo->vm);
 		else
@@ -178,8 +178,8 @@ static inline void xe_bo_lock_no_vm(struct xe_bo *bo,
 				    struct ww_acquire_ctx *ctx)
 {
 	if (bo) {
-		XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
-				     bo->ttm.base.resv != &bo->ttm.base._resv));
+		XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+				      bo->ttm.base.resv != &bo->ttm.base._resv));
 		dma_resv_lock(bo->ttm.base.resv, ctx);
 	}
 }
@@ -187,8 +187,8 @@ static inline void xe_bo_lock_no_vm(struct xe_bo *bo,
 static inline void xe_bo_unlock_no_vm(struct xe_bo *bo)
 {
 	if (bo) {
-		XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
-				     bo->ttm.base.resv != &bo->ttm.base._resv));
+		XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+				      bo->ttm.base.resv != &bo->ttm.base._resv));
 		dma_resv_unlock(bo->ttm.base.resv);
 	}
 }
@@ -228,8 +228,8 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
 static inline u32
 xe_bo_ggtt_addr(struct xe_bo *bo)
 {
-	XE_BUG_ON(bo->ggtt_node.size > bo->size);
-	XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
+	XE_WARN_ON(bo->ggtt_node.size > bo->size);
+	XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
 	return bo->ggtt_node.start;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index f559a7f3eb3e5..0d5c3a208ab4e 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -160,8 +160,8 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		 * We expect validate to trigger a move VRAM and our move code
 		 * should setup the iosys map.
 		 */
-		XE_BUG_ON(iosys_map_is_null(&bo->vmap));
-		XE_BUG_ON(!xe_bo_is_vram(bo));
+		XE_WARN_ON(iosys_map_is_null(&bo->vmap));
+		XE_WARN_ON(!xe_bo_is_vram(bo));
 
 		xe_bo_put(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index b15d095b395bd..a4d9531e3516c 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -50,10 +50,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	lrc_desc = xe_lrc_descriptor(lrc);
 
 	if (GRAPHICS_VERx100(xe) >= 1250) {
-		XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+		XE_WARN_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
 	} else {
-		XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
+		XE_WARN_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
 		lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id);
 	}
 
@@ -213,9 +213,9 @@ static void xe_execlist_make_active(struct xe_execlist_engine *exl)
 	struct xe_execlist_port *port = exl->port;
 	enum xe_engine_priority priority = exl->active_priority;
 
-	XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET);
-	XE_BUG_ON(priority < 0);
-	XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active));
+	XE_WARN_ON(priority == XE_ENGINE_PRIORITY_UNSET);
+	XE_WARN_ON(priority < 0);
+	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
 
 	spin_lock_irq(&port->lock);
 
@@ -321,7 +321,7 @@ static int execlist_engine_init(struct xe_engine *e)
 	struct xe_device *xe = gt_to_xe(e->gt);
 	int err;
 
-	XE_BUG_ON(xe_device_guc_submission_enabled(xe));
+	XE_WARN_ON(xe_device_guc_submission_enabled(xe));
 
 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
 
@@ -387,7 +387,7 @@ static void execlist_engine_fini_async(struct work_struct *w)
 	struct xe_execlist_engine *exl = e->execlist;
 	unsigned long flags;
 
-	XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+	XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
 
 	spin_lock_irqsave(&exl->port->lock, flags);
 	if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET))
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index aba0784b608ee..e563de8625814 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -45,7 +45,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 	mutex_init(&fw->lock);
 
 	/* Assuming gen11+ so assert this assumption is correct */
-	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+	XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
 
 	if (xe->info.graphics_verx100 >= 1270) {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
@@ -67,7 +67,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 	int i, j;
 
 	/* Assuming gen11+ so assert this assumption is correct */
-	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+	XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
 
 	if (!xe_gt_is_media_type(gt))
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index 7c534cdd5fe93..7f304704190e5 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -24,7 +24,7 @@ static inline int
 xe_force_wake_ref(struct xe_force_wake *fw,
 		  enum xe_force_wake_domains domain)
 {
-	XE_BUG_ON(!domain);
+	XE_WARN_ON(!domain);
 	return fw->domains[ffs(domain) - 1].ref;
 }
 
@@ -32,7 +32,7 @@ static inline void
 xe_force_wake_assert_held(struct xe_force_wake *fw,
 			  enum xe_force_wake_domains domain)
 {
-	XE_BUG_ON(!(fw->awake_domains & domain));
+	XE_WARN_ON(!(fw->awake_domains & domain));
 }
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index bf46b90a76ad7..286f36b9e2292 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -58,8 +58,8 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
 
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
 {
-	XE_BUG_ON(addr & XE_PTE_MASK);
-	XE_BUG_ON(addr >= ggtt->size);
+	XE_WARN_ON(addr & XE_PTE_MASK);
+	XE_WARN_ON(addr >= ggtt->size);
 
 	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
 }
@@ -69,7 +69,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 	u64 end = start + size - 1;
 	u64 scratch_pte;
 
-	XE_BUG_ON(start >= end);
+	XE_WARN_ON(start >= end);
 
 	if (ggtt->scratch)
 		scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
@@ -266,7 +266,7 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
 		unsigned int i = addr / XE_PAGE_SIZE;
 
-		XE_BUG_ON(addr > U32_MAX);
+		XE_WARN_ON(addr > U32_MAX);
 		if (ggtt->gsm[i] == scratch_pte)
 			continue;
 
@@ -315,7 +315,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 
 	if (XE_WARN_ON(bo->ggtt_node.size)) {
 		/* Someone's already inserted this BO in the GGTT */
-		XE_BUG_ON(bo->ggtt_node.size != bo->size);
+		XE_WARN_ON(bo->ggtt_node.size != bo->size);
 		return 0;
 	}
 
@@ -378,7 +378,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 		return;
 
 	/* This BO is not currently in the GGTT */
-	XE_BUG_ON(bo->ggtt_node.size != bo->size);
+	XE_WARN_ON(bo->ggtt_node.size != bo->size);
 
 	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 932b61e0cf673..2f77b8bbcf539 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -47,7 +47,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
 	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
 		return f25_mhz;
 	default:
-		XE_BUG_ON("NOT_POSSIBLE");
+		XE_WARN_ON("NOT_POSSIBLE");
 		return 0;
 	}
 }
@@ -58,7 +58,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
 	u32 freq = 0;
 
 	/* Assuming gen11+ so assert this assumption is correct */
-	XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+	XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
 
 	if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
 		freq = read_reference_ts_freq(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index e622174a866de..b871e45af813d 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -157,7 +157,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	char name[8];
 	int i;
 
-	XE_BUG_ON(!minor->debugfs_root);
+	XE_WARN_ON(!minor->debugfs_root);
 
 	sprintf(name, "gt%d", gt->info.id);
 	root = debugfs_create_dir(name, minor->debugfs_root);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index cad0ade595ec9..bcbeea62d5105 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -250,7 +250,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	u32 action[MAX_TLB_INVALIDATION_LEN];
 	int len = 0;
 
-	XE_BUG_ON(!vma);
+	XE_WARN_ON(!vma);
 
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
@@ -288,10 +288,10 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 			start = ALIGN_DOWN(xe_vma_start(vma), length);
 		}
 
-		XE_BUG_ON(length < SZ_4K);
-		XE_BUG_ON(!is_power_of_2(length));
-		XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
-		XE_BUG_ON(!IS_ALIGNED(start, length));
+		XE_WARN_ON(length < SZ_4K);
+		XE_WARN_ON(!is_power_of_2(length));
+		XE_WARN_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
+		XE_WARN_ON(!IS_ALIGNED(start, length));
 
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
 		action[len++] = xe_vma_vm(vma)->usm.asid;
@@ -300,7 +300,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		action[len++] = ilog2(length) - ilog2(SZ_4K);
 	}
 
-	XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
+	XE_WARN_ON(len > MAX_TLB_INVALIDATION_LEN);
 
 	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 2530b6243661b..2493c58599488 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -43,9 +43,9 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
 {
 	u32 addr = xe_bo_ggtt_addr(bo);
 
-	XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
-	XE_BUG_ON(addr >= GUC_GGTT_TOP);
-	XE_BUG_ON(bo->size > GUC_GGTT_TOP - addr);
+	XE_WARN_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
+	XE_WARN_ON(addr >= GUC_GGTT_TOP);
+	XE_WARN_ON(bo->size > GUC_GGTT_TOP - addr);
 
 	return addr;
 }
@@ -612,13 +612,13 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 
 	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
 
-	XE_BUG_ON(guc->ct.enabled);
-	XE_BUG_ON(!len);
-	XE_BUG_ON(len > VF_SW_FLAG_COUNT);
-	XE_BUG_ON(len > MED_VF_SW_FLAG_COUNT);
-	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
+	XE_WARN_ON(guc->ct.enabled);
+	XE_WARN_ON(!len);
+	XE_WARN_ON(len > VF_SW_FLAG_COUNT);
+	XE_WARN_ON(len > MED_VF_SW_FLAG_COUNT);
+	XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
 		  GUC_HXG_ORIGIN_HOST);
-	XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
+	XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
 		  GUC_HXG_TYPE_REQUEST);
 
 retry:
@@ -724,8 +724,8 @@ static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
 	};
 	int ret;
 
-	XE_BUG_ON(len > 2);
-	XE_BUG_ON(len == 1 && upper_32_bits(val));
+	XE_WARN_ON(len > 2);
+	XE_WARN_ON(len == 1 && upper_32_bits(val));
 
 	/* Self config must go over MMIO */
 	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index d4c3a5ce32525..a7da29be2e514 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -118,7 +118,7 @@ struct __guc_ads_blob {
 
 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
 {
-	XE_BUG_ON(!ads->regset_size);
+	XE_WARN_ON(!ads->regset_size);
 
 	return ads->regset_size;
 }
@@ -312,7 +312,7 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
 	struct xe_gt *gt = ads_to_gt(ads);
 	u32 prev_regset_size = ads->regset_size;
 
-	XE_BUG_ON(!ads->bo);
+	XE_WARN_ON(!ads->bo);
 
 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
 	ads->regset_size = calculate_regset_size(gt);
@@ -518,7 +518,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
 		regset_used += count * sizeof(struct guc_mmio_reg);
 	}
 
-	XE_BUG_ON(regset_used > ads->regset_size);
+	XE_WARN_ON(regset_used > ads->regset_size);
 }
 
 static void guc_um_init_params(struct xe_guc_ads *ads)
@@ -573,7 +573,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
 			offsetof(struct __guc_ads_blob, system_info));
 	u32 base = xe_bo_ggtt_addr(ads->bo);
 
-	XE_BUG_ON(!ads->bo);
+	XE_WARN_ON(!ads->bo);
 
 	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
 	guc_policies_init(ads);
@@ -597,7 +597,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
 			offsetof(struct __guc_ads_blob, system_info));
 	u32 base = xe_bo_ggtt_addr(ads->bo);
 
-	XE_BUG_ON(!ads->bo);
+	XE_WARN_ON(!ads->bo);
 
 	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
 	guc_policies_init(ads);
@@ -647,7 +647,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
 				   engine_enabled_masks[guc_class]))
 			continue;
 
-		XE_BUG_ON(!gt->default_lrc[class]);
+		XE_WARN_ON(!gt->default_lrc[class]);
 
 		real_size = xe_lrc_size(xe, class);
 		alloc_size = PAGE_ALIGN(real_size);
@@ -676,7 +676,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
 		offset += alloc_size;
 	}
 
-	XE_BUG_ON(total_size != ads->golden_lrc_size);
+	XE_WARN_ON(total_size != ads->golden_lrc_size);
 }
 
 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index d322eadbe75a5..7fb2690425f85 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -135,7 +135,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 	struct xe_bo *bo;
 	int err;
 
-	XE_BUG_ON(guc_ct_size() % PAGE_SIZE);
+	XE_WARN_ON(guc_ct_size() % PAGE_SIZE);
 
 	mutex_init(&ct->lock);
 	spin_lock_init(&ct->fast_lock);
@@ -283,7 +283,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 	struct xe_device *xe = ct_to_xe(ct);
 	int err;
 
-	XE_BUG_ON(ct->enabled);
+	XE_WARN_ON(ct->enabled);
 
 	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
 	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -376,7 +376,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
 
 static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 {
-	XE_BUG_ON(g2h_len > ct->ctbs.g2h.info.space);
+	XE_WARN_ON(g2h_len > ct->ctbs.g2h.info.space);
 
 	if (g2h_len) {
 		lockdep_assert_held(&ct->fast_lock);
@@ -419,8 +419,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	full_len = len + GUC_CTB_HDR_LEN;
 
 	lockdep_assert_held(&ct->lock);
-	XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
-	XE_BUG_ON(tail > h2g->info.size);
+	XE_WARN_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
+	XE_WARN_ON(tail > h2g->info.size);
 
 	/* Command will wrap, zero fill (NOPs), return and check credits again */
 	if (tail + full_len > h2g->info.size) {
@@ -478,10 +478,10 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 {
 	int ret;
 
-	XE_BUG_ON(g2h_len && g2h_fence);
-	XE_BUG_ON(num_g2h && g2h_fence);
-	XE_BUG_ON(g2h_len && !num_g2h);
-	XE_BUG_ON(!g2h_len && num_g2h);
+	XE_WARN_ON(g2h_len && g2h_fence);
+	XE_WARN_ON(num_g2h && g2h_fence);
+	XE_WARN_ON(g2h_len && !num_g2h);
+	XE_WARN_ON(!g2h_len && num_g2h);
 	lockdep_assert_held(&ct->lock);
 
 	if (unlikely(ct->ctbs.h2g.info.broken)) {
@@ -552,7 +552,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	unsigned int sleep_period_ms = 1;
 	int ret;
 
-	XE_BUG_ON(g2h_len && g2h_fence);
+	XE_WARN_ON(g2h_len && g2h_fence);
 	lockdep_assert_held(&ct->lock);
 	xe_device_assert_mem_access(ct_to_xe(ct));
 
@@ -622,7 +622,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
 {
 	int ret;
 
-	XE_BUG_ON(g2h_len && g2h_fence);
+	XE_WARN_ON(g2h_len && g2h_fence);
 
 	mutex_lock(&ct->lock);
 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index c8f875e970ab8..76aed9c348abc 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -120,7 +120,7 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_BUG_ON(!guc->hwconfig.bo);
+	XE_WARN_ON(!guc->hwconfig.bo);
 
 	xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
 			   guc->hwconfig.size);
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 403aaafcaba6b..63904007af0af 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -55,12 +55,12 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
 	size_t size;
 	int i, j;
 
-	XE_BUG_ON(!log->bo);
+	XE_WARN_ON(!log->bo);
 
 	size = log->bo->size;
 
 #define DW_PER_READ		128
-	XE_BUG_ON(size % (DW_PER_READ * sizeof(u32)));
+	XE_WARN_ON(size % (DW_PER_READ * sizeof(u32)));
 	for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
 		u32 read[DW_PER_READ];
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index a2eeb3ffe5489..9a4c96cb3f420 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -329,7 +329,7 @@ static void __guc_engine_policy_start_klv(struct engine_policy *policy,
 static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
 					   u32 data) \
 { \
-	XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
 \
 	policy->h2g.klv[policy->count].kl = \
 		FIELD_PREP(GUC_KLV_0_KEY, \
@@ -358,7 +358,7 @@ static void init_policies(struct xe_guc *guc, struct xe_engine *e)
 	u32 timeslice_us = e->sched_props.timeslice_us;
 	u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
 
-	XE_BUG_ON(!engine_registered(e));
+	XE_WARN_ON(!engine_registered(e));
 
 	__guc_engine_policy_start_klv(&policy, e->guc->id);
 	__guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
@@ -396,7 +396,7 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 	int len = 0;
 	int i;
 
-	XE_BUG_ON(!xe_engine_is_parallel(e));
+	XE_WARN_ON(!xe_engine_is_parallel(e));
 
 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
 	action[len++] = info->flags;
@@ -419,7 +419,7 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
 	}
 
-	XE_BUG_ON(len > MAX_MLRC_REG_SIZE);
+	XE_WARN_ON(len > MAX_MLRC_REG_SIZE);
 #undef MAX_MLRC_REG_SIZE
 
 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
@@ -453,7 +453,7 @@ static void register_engine(struct xe_engine *e)
 	struct xe_lrc *lrc = e->lrc;
 	struct guc_ctxt_registration_info info;
 
-	XE_BUG_ON(engine_registered(e));
+	XE_WARN_ON(engine_registered(e));
 
 	memset(&info, 0, sizeof(info));
 	info.context_idx = e->guc->id;
@@ -543,7 +543,7 @@ static int wq_noop_append(struct xe_engine *e)
 	if (wq_wait_for_space(e, wq_space_until_wrap(e)))
 		return -ENODEV;
 
-	XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+	XE_WARN_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
 
 	parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
@@ -583,13 +583,13 @@ static void wq_item_append(struct xe_engine *e)
 		wqi[i++] = lrc->ring.tail / sizeof(u64);
 	}
 
-	XE_BUG_ON(i != wqi_size / sizeof(u32));
+	XE_WARN_ON(i != wqi_size / sizeof(u32));
 
 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
 				      wq[e->guc->wqi_tail / sizeof(u32)]));
 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
 	e->guc->wqi_tail += wqi_size;
-	XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
+	XE_WARN_ON(e->guc->wqi_tail > WQ_SIZE);
 
 	xe_device_wmb(xe);
 
@@ -608,7 +608,7 @@ static void submit_engine(struct xe_engine *e)
 	int len = 0;
 	bool extra_submit = false;
 
-	XE_BUG_ON(!engine_registered(e));
+	XE_WARN_ON(!engine_registered(e));
 
 	if (xe_engine_is_parallel(e))
 		wq_item_append(e);
@@ -656,8 +656,8 @@ guc_engine_run_job(struct drm_sched_job *drm_job)
 	struct xe_engine *e = job->engine;
 	bool lr = xe_engine_is_lr(e);
 
-	XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
-		  !engine_banned(e) && !engine_suspended(e));
+	XE_WARN_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
+		   !engine_banned(e) && !engine_suspended(e));
 
 	trace_xe_sched_job_run(job);
 
@@ -984,7 +984,7 @@ static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
 	struct xe_engine *e = msg->private_data;
 	struct xe_guc *guc = engine_to_guc(e);
 
-	XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL);
+	XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
 	trace_xe_engine_cleanup_entity(e);
 
 	if (engine_registered(e))
@@ -1012,9 +1012,9 @@ static void suspend_fence_signal(struct xe_engine *e)
 {
 	struct xe_guc *guc = engine_to_guc(e);
 
-	XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) &&
-		  !guc_read_stopped(guc));
-	XE_BUG_ON(!e->guc->suspend_pending);
+	XE_WARN_ON(!engine_suspended(e) && !engine_killed(e) &&
+		   !guc_read_stopped(guc));
+	XE_WARN_ON(!e->guc->suspend_pending);
 
 	e->guc->suspend_pending = false;
 	smp_wmb();
@@ -1100,7 +1100,7 @@ static void guc_engine_process_msg(struct xe_sched_msg *msg)
 		__guc_engine_process_msg_resume(msg);
 		break;
 	default:
-		XE_BUG_ON("Unknown message type");
+		XE_WARN_ON("Unknown message type");
 	}
 }
 
@@ -1122,7 +1122,7 @@ static int guc_engine_init(struct xe_engine *e)
 	long timeout;
 	int err;
 
-	XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
+	XE_WARN_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
 
 	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
 	if (!ge)
@@ -1286,9 +1286,9 @@ static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
 {
 	struct xe_gpu_scheduler *sched = &e->guc->sched;
 
-	XE_BUG_ON(engine_registered(e));
-	XE_BUG_ON(engine_banned(e));
-	XE_BUG_ON(engine_killed(e));
+	XE_WARN_ON(engine_registered(e));
+	XE_WARN_ON(engine_banned(e));
+	XE_WARN_ON(engine_killed(e));
 
 	sched->base.timeout = job_timeout_ms;
 
@@ -1320,7 +1320,7 @@ static void guc_engine_resume(struct xe_engine *e)
 {
 	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
 
-	XE_BUG_ON(e->guc->suspend_pending);
+	XE_WARN_ON(e->guc->suspend_pending);
 
 	guc_engine_add_msg(e, msg, RESUME);
 }
@@ -1416,7 +1416,7 @@ int xe_guc_submit_stop(struct xe_guc *guc)
 	struct xe_engine *e;
 	unsigned long index;
 
-	XE_BUG_ON(guc_read_stopped(guc) != 1);
+	XE_WARN_ON(guc_read_stopped(guc) != 1);
 
 	mutex_lock(&guc->submission_state.lock);
 
@@ -1454,7 +1454,7 @@ int xe_guc_submit_start(struct xe_guc *guc)
 	struct xe_engine *e;
 	unsigned long index;
 
-	XE_BUG_ON(guc_read_stopped(guc) != 1);
+	XE_WARN_ON(guc_read_stopped(guc) != 1);
 
 	mutex_lock(&guc->submission_state.lock);
 	atomic_dec(&guc->submission_state.stopped);
@@ -1484,7 +1484,7 @@ g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
 		return NULL;
 	}
 
-	XE_BUG_ON(e->guc->id != guc_id);
+	XE_WARN_ON(e->guc->id != guc_id);
 
 	return e;
 }
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index dc1708b4e94a3..177cda14864ed 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -72,7 +72,7 @@ int xe_huc_auth(struct xe_huc *huc)
 	if (xe_uc_fw_is_disabled(&huc->fw))
 		return 0;
 
-	XE_BUG_ON(xe_uc_fw_is_running(&huc->fw));
+	XE_WARN_ON(xe_uc_fw_is_running(&huc->fw));
 
 	if (!xe_uc_fw_is_loaded(&huc->fw))
 		return -ENOEXEC;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1af5cccd11426..ead5aa2856195 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -237,7 +237,7 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
 				   u32 val)
 {
-	XE_BUG_ON(reg.addr & hwe->mmio_base);
+	XE_WARN_ON(reg.addr & hwe->mmio_base);
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
 	reg.addr += hwe->mmio_base;
@@ -247,7 +247,7 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
 
 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 {
-	XE_BUG_ON(reg.addr & hwe->mmio_base);
+	XE_WARN_ON(reg.addr & hwe->mmio_base);
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
 	reg.addr += hwe->mmio_base;
@@ -351,7 +351,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 
 	info = &engine_infos[id];
 
-	XE_BUG_ON(hwe->gt);
+	XE_WARN_ON(hwe->gt);
 
 	hwe->gt = gt;
 	hwe->class = info->class;
@@ -377,8 +377,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	struct xe_tile *tile = gt_to_tile(gt);
 	int err;
 
-	XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
-	XE_BUG_ON(!(gt->info.engine_mask & BIT(id)));
+	XE_WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
+	XE_WARN_ON(!(gt->info.engine_mask & BIT(id)));
 
 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 	xe_reg_sr_apply_whitelist(hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index ffe1a3992ef54..a6094c81f2ad0 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -188,7 +188,7 @@ static void xe_hw_fence_release(struct dma_fence *dma_fence)
 	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
 
 	trace_xe_hw_fence_free(fence);
-	XE_BUG_ON(!list_empty(&fence->irq_link));
+	XE_WARN_ON(!list_empty(&fence->irq_link));
 	call_rcu(&dma_fence->rcu, fence_free);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index b726599f62284..05f3d8d683793 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -108,7 +108,7 @@ static void set_offsets(u32 *regs,
 		*regs |= MI_LRI_LRM_CS_MMIO;
 		regs++;
 
-		XE_BUG_ON(!count);
+		XE_WARN_ON(!count);
 		do {
 			u32 offset = 0;
 			u8 v;
@@ -528,7 +528,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
 { \
 	struct iosys_map map = lrc->bo->vmap; \
 \
-	XE_BUG_ON(iosys_map_is_null(&map)); \
+	XE_WARN_ON(iosys_map_is_null(&map)); \
 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
 	return map; \
 } \
@@ -759,12 +759,12 @@ void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
 	u32 rhs;
 	size_t aligned_size;
 
-	XE_BUG_ON(!IS_ALIGNED(size, 4));
+	XE_WARN_ON(!IS_ALIGNED(size, 4));
 	aligned_size = ALIGN(size, 8);
 
 	ring = __xe_lrc_ring_map(lrc);
 
-	XE_BUG_ON(lrc->ring.tail >= lrc->ring.size);
+	XE_WARN_ON(lrc->ring.tail >= lrc->ring.size);
 	rhs = lrc->ring.size - lrc->ring.tail;
 	if (size > rhs) {
 		__xe_lrc_write_ring(lrc, ring, data, rhs);
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
index a7105050bce02..daf56c846d033 100644
--- a/drivers/gpu/drm/xe/xe_macros.h
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -9,7 +9,6 @@
 #include <linux/bug.h>
 
 #define XE_WARN_ON WARN_ON
-#define XE_BUG_ON BUG_ON
 
 #define XE_IOCTL_DBG(xe, cond) \
 	((cond) && (drm_dbg(&(xe)->drm, \
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 0405136bc0b1f..9c4b432d496f5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -106,7 +106,7 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg)
 
 static u64 xe_migrate_vm_addr(u64 slot, u32 level)
 {
-	XE_BUG_ON(slot >= NUM_PT_SLOTS);
+	XE_WARN_ON(slot >= NUM_PT_SLOTS);
 
 	/* First slot is reserved for mapping of PT bo and bb, start from 1 */
 	return (slot + 1ULL) << xe_pt_shift(level + 1);
@@ -171,7 +171,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
 
 	/* Need to be sure everything fits in the first PT, or create more */
-	XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M);
+	XE_WARN_ON(m->batch_base_ofs + batch->size >= SZ_2M);
 
 	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
 				  num_entries * XE_PAGE_SIZE,
@@ -205,7 +205,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	}
 
 	if (!IS_DGFX(xe)) {
-		XE_BUG_ON(xe->info.supports_usm);
+		XE_WARN_ON(xe->info.supports_usm);
 
 		/* Write out batch too */
 		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
@@ -487,7 +487,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 
 	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
 				    NUM_CCS_BYTES_PER_BLOCK);
-	XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
+	XE_WARN_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
 	*cs++ = XY_CTRL_SURF_COPY_BLT |
 		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
 		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
@@ -507,9 +507,9 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 		      u64 src_ofs, u64 dst_ofs, unsigned int size,
 		      unsigned int pitch)
 {
-	XE_BUG_ON(size / pitch > S16_MAX);
-	XE_BUG_ON(pitch / 4 > S16_MAX);
-	XE_BUG_ON(pitch > U16_MAX);
+	XE_WARN_ON(size / pitch > S16_MAX);
+	XE_WARN_ON(pitch / 4 > S16_MAX);
+	XE_WARN_ON(pitch > U16_MAX);
 
 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
@@ -569,7 +569,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 		 * At the moment, we don't support copying CCS metadata from
 		 * system to system.
 		 */
-		XE_BUG_ON(!src_is_vram && !dst_is_vram);
+		XE_WARN_ON(!src_is_vram && !dst_is_vram);
 
 		emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
 			      src_is_vram, dst_size);
@@ -781,7 +781,7 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
 	*cs++ = upper_32_bits(src_ofs);
 	*cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
 
-	XE_BUG_ON(cs - bb->cs != len + bb->len);
+	XE_WARN_ON(cs - bb->cs != len + bb->len);
 
 	bb->len += len;
 }
@@ -819,7 +819,7 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
 		*cs++ = 0;
 	}
 
-	XE_BUG_ON(cs - bb->cs != len + bb->len);
+	XE_WARN_ON(cs - bb->cs != len + bb->len);
 
 	bb->len += len;
 }
@@ -992,9 +992,9 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	 * PDE. This requires a BO that is almost vm->size big.
 	 *
 	 * This shouldn't be possible in practice.. might change when 16K
-	 * pages are used. Hence the BUG_ON.
+	 * pages are used. Hence the XE_WARN_ON.
 	 */
-	XE_BUG_ON(update->qwords > 0x1ff);
+	XE_WARN_ON(update->qwords > 0x1ff);
 	if (!ppgtt_ofs) {
 		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
 							   XE_PAGE_SIZE));
@@ -1184,7 +1184,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 * Worst case: Sum(2 * (each lower level page size) + (top level page size))
 	 * Should be reasonably bound..
 	 */
-	XE_BUG_ON(batch_size >= SZ_128K);
+	XE_WARN_ON(batch_size >= SZ_128K);
 
 	bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
 	if (IS_ERR(bb))
@@ -1194,7 +1194,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	if (!IS_DGFX(xe)) {
 		ppgtt_ofs = NUM_KERNEL_PDE - 1;
 		if (eng) {
-			XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
+			XE_WARN_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
 
 			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
 						 GFP_KERNEL, true, 0);
@@ -1223,7 +1223,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		for (i = 0; i < num_updates; i++) {
 			struct xe_bo *pt_bo = updates[i].pt_bo;
 
-			BUG_ON(pt_bo->size != SZ_4K);
+			XE_WARN_ON(pt_bo->size != SZ_4K);
 
 			addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d9192bf503624..b82ce01cc4cb9 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -106,7 +106,7 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
 		pte |= XE_PDPE_PS_1G;
 
 	/* XXX: Does hw support 1 GiB pages? */
-	XE_BUG_ON(pt_level > 2);
+	XE_WARN_ON(pt_level > 2);
 
 	return pte;
 }
@@ -196,7 +196,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 	pt->level = level;
 	pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
 
-	XE_BUG_ON(level > XE_VM_MAX_LEVEL);
+	XE_WARN_ON(level > XE_VM_MAX_LEVEL);
 
 	return pt;
 
@@ -265,7 +265,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
 	if (!pt)
 		return;
 
-	XE_BUG_ON(!list_empty(&pt->bo->vmas));
+	XE_WARN_ON(!list_empty(&pt->bo->vmas));
 	xe_bo_unpin(pt->bo);
 	xe_bo_put_deferred(pt->bo, deferred);
 
@@ -849,8 +849,8 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
 	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
 	pgoff_t end_offset;
 
-	XE_BUG_ON(!*child);
-	XE_BUG_ON(!level && xe_child->is_compact);
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level && xe_child->is_compact);
 
 	/*
 	 * Note that we're called from an entry callback, and we're dealing
@@ -1004,7 +1004,7 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
 	*num_entries = 0;
 	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
 	if (!err)
-		BUG_ON(!*num_entries);
+		XE_WARN_ON(!*num_entries);
 	else /* abort! */
 		xe_pt_abort_bind(vma, entries, *num_entries);
 
@@ -1026,7 +1026,7 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe,
 		u64 end;
 		u64 start;
 
-		XE_BUG_ON(entry->pt->is_compact);
+		XE_WARN_ON(entry->pt->is_compact);
 		start = entry->ofs * page_size;
 		end = start + page_size * entry->qwords;
 		vm_dbg(&xe->drm,
@@ -1356,7 +1356,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
 	if (err)
 		goto err;
-	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+	XE_WARN_ON(num_entries > ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
@@ -1515,8 +1515,8 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
 {
 	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
 
-	XE_BUG_ON(!*child);
-	XE_BUG_ON(!level && xe_child->is_compact);
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level && xe_child->is_compact);
 
 	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
 
@@ -1707,7 +1707,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
 
 	num_entries = xe_pt_stage_unbind(tile, vma, entries);
-	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+	XE_WARN_ON(num_entries > ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index dda963fe33009..5cb4b66a5d748 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -79,7 +79,7 @@ static inline void xe_res_first(struct ttm_resource *res,
 	if (!res)
 		goto fallback;
 
-	XE_BUG_ON(start + size > res->size);
+	XE_WARN_ON(start + size > res->size);
 
 	cur->mem_type = res->mem_type;
 
@@ -139,7 +139,7 @@ static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
 	while (start >= sg_dma_len(sgl)) {
 		start -= sg_dma_len(sgl);
 		sgl = sg_next(sgl);
-		XE_BUG_ON(!sgl);
+		XE_WARN_ON(!sgl);
 	}
 
 	cur->start = start;
@@ -161,9 +161,9 @@ static inline void xe_res_first_sg(const struct sg_table *sg,
 				   u64 start, u64 size,
 				   struct xe_res_cursor *cur)
 {
-	XE_BUG_ON(!sg);
-	XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
-		  !IS_ALIGNED(size, PAGE_SIZE));
+	XE_WARN_ON(!sg);
+	XE_WARN_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
+		   !IS_ALIGNED(size, PAGE_SIZE));
 	cur->node = NULL;
 	cur->start = start;
 	cur->remaining = size;
@@ -187,7 +187,7 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
 	struct list_head *next;
 	u64 start;
 
-	XE_BUG_ON(size > cur->remaining);
+	XE_WARN_ON(size > cur->remaining);
 
 	cur->remaining -= size;
 	if (!cur->remaining)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 4cfd78e1ffa58..2d0d392cd6916 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -233,7 +233,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
@@ -291,7 +291,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
@@ -339,7 +339,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
@@ -369,7 +369,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index c87f65c98b3da..85fd5980191c0 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -142,7 +142,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
 
 		/* Sanity check */
 		for (j = 0; j < e->width; ++j)
-			XE_BUG_ON(cf->base.seqno != fences[j]->seqno);
+			XE_WARN_ON(cf->base.seqno != fences[j]->seqno);
 
 		job->fence = &cf->base;
 	}
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 7bba8fff5a5d9..be0a25e239291 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -205,7 +205,7 @@ u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
 	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
 	struct xe_res_cursor cur;
 
-	XE_BUG_ON(!mgr->io_base);
+	XE_WARN_ON(!mgr->io_base);
 
 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
 		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
@@ -245,7 +245,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
 #ifdef CONFIG_X86
 	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
 
-	XE_BUG_ON(IS_DGFX(xe));
+	XE_WARN_ON(IS_DGFX(xe));
 
 	/* XXX: Require BO to be mapped to GGTT? */
 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5801c10f3ccc4..4b04f6e5388d4 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -158,7 +158,7 @@ __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
 	if (type == XE_UC_FW_TYPE_GUC)
 		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
 
-	XE_BUG_ON(type != XE_UC_FW_TYPE_HUC);
+	XE_WARN_ON(type != XE_UC_FW_TYPE_HUC);
 	return container_of(uc_fw, struct xe_gt, uc.huc.fw);
 }
 
@@ -194,7 +194,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	u32 count;
 	int i;
 
-	XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+	XE_WARN_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
 	entries = blobs_all[uc_fw->type].entries;
 	count = blobs_all[uc_fw->type].count;
 
@@ -223,8 +223,8 @@ size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	u32 size = min_t(u32, uc_fw->rsa_size, max_len);
 
-	XE_BUG_ON(size % 4);
-	XE_BUG_ON(!xe_uc_fw_is_available(uc_fw));
+	XE_WARN_ON(size % 4);
+	XE_WARN_ON(!xe_uc_fw_is_available(uc_fw));
 
 	xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
 			   xe_uc_fw_rsa_offset(uc_fw), size);
@@ -248,7 +248,7 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
 	struct xe_guc *guc = &gt->uc.guc;
 
-	XE_BUG_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
+	XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
 	XE_WARN_ON(uc_fw->major_ver_found < 70);
 
 	if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) {
@@ -335,8 +335,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	 * before we're looked at the HW caps to see if we have uc support
 	 */
 	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
-	XE_BUG_ON(uc_fw->status);
-	XE_BUG_ON(uc_fw->path);
+	XE_WARN_ON(uc_fw->status);
+	XE_WARN_ON(uc_fw->path);
 
 	uc_fw_auto_select(xe, uc_fw);
 	xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
@@ -502,7 +502,7 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 	int err;
 
 	/* make sure the status was cleared the last time we reset the uc */
-	XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw));
+	XE_WARN_ON(xe_uc_fw_is_loaded(uc_fw));
 
 	if (!xe_uc_fw_is_loadable(uc_fw))
 		return -ENOEXEC;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index e16267e712807..a519c77d49621 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -104,7 +104,7 @@ static inline enum xe_uc_fw_status
 __xe_uc_fw_status(struct xe_uc_fw *uc_fw)
 {
 	/* shouldn't call this before checking hw/blob availability */
-	XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+	XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
 	return uc_fw->status;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 205795823555c..0bebdac2287c8 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -63,7 +63,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
 	bool read_only = xe_vma_read_only(vma);
 
 	lockdep_assert_held(&vm->lock);
-	XE_BUG_ON(!xe_vma_is_userptr(vma));
+	XE_WARN_ON(!xe_vma_is_userptr(vma));
 retry:
 	if (vma->gpuva.flags & XE_VMA_DESTROYED)
 		return 0;
@@ -252,7 +252,7 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
 		struct dma_fence *fence;
 
 		link = list->next;
-		XE_BUG_ON(link == list);
+		XE_WARN_ON(link == list);
 
 		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
 					     e, e->compute.context,
@@ -329,7 +329,7 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
 	int err;
 	bool wait;
 
-	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
 
 	down_write(&vm->lock);
 
@@ -549,7 +549,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	long wait;
 	int __maybe_unused tries = 0;
 
-	XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
 	trace_xe_vm_rebind_worker_enter(vm);
 
 	down_write(&vm->lock);
@@ -708,7 +708,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	struct dma_fence *fence;
 	long err;
 
-	XE_BUG_ON(!xe_vma_is_userptr(vma));
+	XE_WARN_ON(!xe_vma_is_userptr(vma));
 	trace_xe_vma_userptr_invalidate(vma);
 
 	if (!mmu_notifier_range_blockable(range))
@@ -877,8 +877,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	struct xe_tile *tile;
 	u8 id;
 
-	XE_BUG_ON(start >= end);
-	XE_BUG_ON(end >= vm->size);
+	XE_WARN_ON(start >= end);
+	XE_WARN_ON(end >= vm->size);
 
 	if (!bo && !is_null)	/* userptr */
 		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
@@ -1075,7 +1075,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	struct xe_vm *vm = xe_vma_vm(vma);
 
 	lockdep_assert_held_write(&vm->lock);
-	XE_BUG_ON(!list_empty(&vma->combined_links.destroy));
+	XE_WARN_ON(!list_empty(&vma->combined_links.destroy));
 
 	if (xe_vma_is_userptr(vma)) {
 		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
@@ -1153,7 +1153,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
 	if (xe_vm_is_closed_or_banned(vm))
 		return NULL;
 
-	XE_BUG_ON(start + range > vm->size);
+	XE_WARN_ON(start + range > vm->size);
 
 	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
 
@@ -1164,7 +1164,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
 	int err;
 
-	XE_BUG_ON(xe_vma_vm(vma) != vm);
+	XE_WARN_ON(xe_vma_vm(vma) != vm);
 	lockdep_assert_held(&vm->lock);
 
 	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
@@ -1175,7 +1175,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 
 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
-	XE_BUG_ON(xe_vma_vm(vma) != vm);
+	XE_WARN_ON(xe_vma_vm(vma) != vm);
 	lockdep_assert_held(&vm->lock);
 
 	drm_gpuva_remove(&vma->gpuva);
@@ -1422,7 +1422,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	struct drm_gpuva *gpuva, *next;
 	u8 id;
 
-	XE_BUG_ON(vm->preempt.num_engines);
+	XE_WARN_ON(vm->preempt.num_engines);
 
 	xe_vm_close(vm);
 	flush_async_ops(vm);
@@ -1795,7 +1795,7 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence)
 		struct async_op_fence *afence =
 			container_of(fence, struct async_op_fence, fence);
 
-		XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
+		XE_WARN_ON(xe_vm_no_dma_fences(afence->vm));
 
 		smp_rmb();
 		return wait_event_interruptible(afence->wq, afence->started);
@@ -1821,7 +1821,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 	} else {
 		int i;
 
-		XE_BUG_ON(!xe_vm_in_fault_mode(vm));
+		XE_WARN_ON(!xe_vm_in_fault_mode(vm));
 
 		fence = dma_fence_get_stub();
 		if (last_op) {
@@ -2100,7 +2100,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 {
 	int err;
 
-	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
+	XE_WARN_ON(region > ARRAY_SIZE(region_to_mem_type));
 
 	if (!xe_vma_has_no_bo(vma)) {
 		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
@@ -2181,7 +2181,7 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
 			return -ENODATA;
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 		return -EINVAL;
 	}
 
@@ -2239,7 +2239,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 	}
 }
 #else
@@ -2315,7 +2315,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		}
 		break;
 	case XE_VM_BIND_OP_UNMAP_ALL:
-		XE_BUG_ON(!bo);
+		XE_WARN_ON(!bo);
 
 		err = xe_bo_lock(bo, &ww, 0, true);
 		if (err)
@@ -2338,7 +2338,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		}
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 		ops = ERR_PTR(-EINVAL);
 	}
 
@@ -2425,7 +2425,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 	int err, i;
 
 	lockdep_assert_held_write(&vm->lock);
-	XE_BUG_ON(num_ops_list > 1 && !async);
+	XE_WARN_ON(num_ops_list > 1 && !async);
 
 	if (num_syncs && async) {
 		u64 seqno;
@@ -2454,7 +2454,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 			bool first = !async_list;
 
-			XE_BUG_ON(!first && !async);
+			XE_WARN_ON(!first && !async);
 
 			INIT_LIST_HEAD(&op->link);
 			if (first)
@@ -2566,7 +2566,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 				/* Nothing to do */
 				break;
 			default:
-				XE_BUG_ON("NOT POSSIBLE");
+				XE_WARN_ON("NOT POSSIBLE");
 			}
 
 			last_op = op;
@@ -2628,7 +2628,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 		/* Nothing to do */
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 	}
 
 	op->flags |= XE_VMA_OP_COMMITTED;
@@ -2746,7 +2746,7 @@ again:
 				     op->flags & XE_VMA_OP_LAST);
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 	}
 
 	ttm_eu_backoff_reservation(&ww, &objs);
@@ -2805,7 +2805,7 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 					  op);
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 	}
 
 	return ret;
@@ -2881,7 +2881,7 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 		/* Nothing to do */
 		break;
 	default:
-		XE_BUG_ON("NOT POSSIBLE");
+		XE_WARN_ON("NOT POSSIBLE");
 	}
 }
 
@@ -3413,7 +3413,7 @@ int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 	LIST_HEAD(objs);
 	LIST_HEAD(dups);
 
-	XE_BUG_ON(!ww);
+	XE_WARN_ON(!ww);
 
 	tv_vm.num_shared = num_resv;
 	tv_vm.bo = xe_vm_ttm_bo(vm);
@@ -3447,7 +3447,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	u8 id;
 	int ret;
 
-	XE_BUG_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
+	XE_WARN_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
 	XE_WARN_ON(xe_vma_is_null(vma));
 	trace_xe_vma_usm_invalidate(vma);
 
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index c4202df1d4f03..761eed3a022f7 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -45,7 +45,7 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
 		passed = (rvalue & mask) <= (value & mask);
 		break;
 	default:
-		XE_BUG_ON("Not possible");
+		XE_WARN_ON("Not possible");
 	}
 
 	return passed ? 0 : 1;
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index d9acf8783b838..9a85bcc188303 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -144,10 +144,10 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
 	u32 mask;
 	int err;
 
-	XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
-	XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
-	XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
-	XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
+	XE_WARN_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+	XE_WARN_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+	XE_WARN_ON(!(size & GUC_WOPCM_SIZE_MASK));
+	XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK);
 
 	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
 	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
@@ -213,9 +213,9 @@ int xe_wopcm_init(struct xe_wopcm *wopcm)
 	drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K);
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
-	XE_BUG_ON(guc_fw_size >= wopcm->size);
-	XE_BUG_ON(huc_fw_size >= wopcm->size);
-	XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+	XE_WARN_ON(guc_fw_size >= wopcm->size);
+	XE_WARN_ON(huc_fw_size >= wopcm->size);
+	XE_WARN_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
 
 	locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size);
 	if (locked) {
@@ -256,8 +256,8 @@ check:
 			   guc_fw_size, huc_fw_size)) {
 		wopcm->guc.base = guc_wopcm_base;
 		wopcm->guc.size = guc_wopcm_size;
-		XE_BUG_ON(!wopcm->guc.base);
-		XE_BUG_ON(!wopcm->guc.size);
+		XE_WARN_ON(!wopcm->guc.base);
+		XE_WARN_ON(!wopcm->guc.size);
 	} else {
 		drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n");
 		return -E2BIG;
-- 
GitLab


From f82686ef74b96a51ba6c38f3ce119ba7f7995210 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 28 Jul 2023 20:53:41 -0700
Subject: [PATCH 1796/2340] drm/xe: remove header variable from parse_g2h_msg

The header variable is unused, remove it.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Suggested-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 7fb2690425f85..fb1d63ffaee49 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -829,12 +829,11 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
 	struct xe_device *xe = ct_to_xe(ct);
-	u32 header, hxg, origin, type;
+	u32 hxg, origin, type;
 	int ret;
 
 	lockdep_assert_held(&ct->lock);
 
-	header = msg[0];
 	hxg = msg[1];
 
 	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
-- 
GitLab


From 2a368a09ae1c3f7aebe6210927a1335186d3c6f7 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 25 Jul 2023 17:12:39 +0200
Subject: [PATCH 1797/2340] drm/xe: Fix error paths of __xe_bo_create_locked

ttm_bo_init_reserved() calls the destroy() callback if it fails.

Because of this, __xe_bo_create_locked is required to be responsible
for freeing the bo even when it's passed in as argument.

Additionally, if the placement check fails, the bo was kept alive.
Fix it too.

Reported-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index cf0faaefd03d4..a126130027669 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1200,8 +1200,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	/* Only kernel objects should set GT */
 	XE_WARN_ON(tile && type != ttm_bo_type_kernel);
 
-	if (XE_WARN_ON(!size))
+	if (XE_WARN_ON(!size)) {
+		xe_bo_free(bo);
 		return ERR_PTR(-EINVAL);
+	}
 
 	if (!bo) {
 		bo = xe_bo_alloc();
@@ -1239,8 +1241,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 
 	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
-		if (WARN_ON(err))
+		if (WARN_ON(err)) {
+			xe_ttm_bo_destroy(&bo->ttm);
 			return ERR_PTR(err);
+		}
 	}
 
 	/* Defer populating type_sg bos */
-- 
GitLab


From c22a4ed0c325cd29d7baf07d4cf2c127550b8859 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 1 Aug 2023 12:28:14 +0200
Subject: [PATCH 1798/2340] drm/xe: Rename xe_engine.[ch] to xe_exec_queue.[ch]

This is a preparation commit for a larger renaming of engine to exec queue.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                         | 2 +-
 drivers/gpu/drm/xe/xe_devcoredump.c                 | 2 +-
 drivers/gpu/drm/xe/xe_device.c                      | 2 +-
 drivers/gpu/drm/xe/xe_exec.c                        | 2 +-
 drivers/gpu/drm/xe/{xe_engine.c => xe_exec_queue.c} | 2 +-
 drivers/gpu/drm/xe/{xe_engine.h => xe_exec_queue.h} | 0
 drivers/gpu/drm/xe/xe_execlist.c                    | 2 +-
 drivers/gpu/drm/xe/xe_gt.c                          | 2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c                  | 2 +-
 drivers/gpu/drm/xe/xe_migrate.c                     | 2 +-
 drivers/gpu/drm/xe/xe_mocs.c                        | 2 +-
 drivers/gpu/drm/xe/xe_preempt_fence.c               | 2 +-
 drivers/gpu/drm/xe/xe_query.c                       | 2 +-
 drivers/gpu/drm/xe/xe_sched_job.c                   | 2 +-
 drivers/gpu/drm/xe/xe_vm.c                          | 2 +-
 15 files changed, 14 insertions(+), 14 deletions(-)
 rename drivers/gpu/drm/xe/{xe_engine.c => xe_exec_queue.c} (99%)
 rename drivers/gpu/drm/xe/{xe_engine.h => xe_exec_queue.h} (100%)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 312e643397bad..f8d63c9b97d5b 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -54,9 +54,9 @@ xe-y += xe_bb.o \
 	xe_device.o \
 	xe_device_sysfs.o \
 	xe_dma_buf.o \
-	xe_engine.o \
 	xe_exec.o \
 	xe_execlist.o \
+	xe_exec_queue.o \
 	xe_force_wake.o \
 	xe_ggtt.o \
 	xe_gpu_scheduler.o \
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index f53f4b51233ae..61ff97ea76593 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -10,7 +10,7 @@
 #include <generated/utsrelease.h>
 
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_guc_ct.h"
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 63ed59c61c840..a8ab86379ed67 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -18,7 +18,7 @@
 #include "xe_debugfs.h"
 #include "xe_dma_buf.h"
 #include "xe_drv.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_exec.h"
 #include "xe_gt.h"
 #include "xe_irq.h"
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 89d167a432f67..a043c649249b5 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -12,7 +12,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_macros.h"
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_exec_queue.c
similarity index 99%
rename from drivers/gpu/drm/xe/xe_engine.c
rename to drivers/gpu/drm/xe/xe_exec_queue.c
index f60d29b2b5066..f1cfc4b604d48 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -3,7 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 
 #include <linux/nospec.h>
 
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_exec_queue.h
similarity index 100%
rename from drivers/gpu/drm/xe/xe_engine.h
rename to drivers/gpu/drm/xe/xe_exec_queue.h
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index a4d9531e3516c..5b6748e1a37fa 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -14,7 +14,7 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_hw_fence.h"
 #include "xe_lrc.h"
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 28bf577c7bf2e..543b085723c58 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -14,7 +14,7 @@
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_execlist.h"
 #include "xe_force_wake.h"
 #include "xe_ggtt.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 9a4c96cb3f420..5198e91eeefbe 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -16,7 +16,7 @@
 #include "regs/xe_lrc_layout.h"
 #include "xe_devcoredump.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_force_wake.h"
 #include "xe_gpu_scheduler.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9c4b432d496f5..60f7226c92ff3 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -16,7 +16,7 @@
 #include "tests/xe_test.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_hw_engine.h"
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index ccc852500eda0..c9653978fc9f5 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -8,7 +8,7 @@
 #include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 219eefeb90ff7..e86604e0174da 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -7,7 +7,7 @@
 
 #include <linux/slab.h>
 
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_vm.h"
 
 static void preempt_fence_work_func(struct work_struct *w)
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 6ba7baf7c7771..382851f436b79 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -12,7 +12,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_guc_hwconfig.h"
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 85fd5980191c0..9944858de4d2d 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -9,7 +9,7 @@
 #include <linux/slab.h>
 
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_hw_engine_types.h"
 #include "xe_hw_fence.h"
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0bebdac2287c8..d3e82c4aed42b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -18,7 +18,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine.h"
+#include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_tlb_invalidation.h"
-- 
GitLab


From 9b9529ce379a08e68d65231497dd6bad94281902 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Mon, 31 Jul 2023 17:30:02 +0200
Subject: [PATCH 1799/2340] drm/xe: Rename engine to exec_queue

Engine was inappropriately used to refer to execution queues and it
also created some confusion with hardware engines. Where it applies
the exec_queue variable name is changed to q and comments are also
updated.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/162
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c         |   18 +-
 drivers/gpu/drm/xe/xe_bb.c                    |   26 +-
 drivers/gpu/drm/xe/xe_bb.h                    |    8 +-
 drivers/gpu/drm/xe/xe_devcoredump.c           |   38 +-
 drivers/gpu/drm/xe/xe_devcoredump.h           |    6 +-
 drivers/gpu/drm/xe/xe_devcoredump_types.h     |    2 +-
 drivers/gpu/drm/xe/xe_device.c                |   60 +-
 drivers/gpu/drm/xe/xe_device.h                |    8 +-
 drivers/gpu/drm/xe/xe_device_types.h          |    4 +-
 drivers/gpu/drm/xe/xe_engine_types.h          |  209 ----
 drivers/gpu/drm/xe/xe_exec.c                  |   60 +-
 drivers/gpu/drm/xe/xe_exec_queue.c            |  526 ++++----
 drivers/gpu/drm/xe/xe_exec_queue.h            |   64 +-
 drivers/gpu/drm/xe/xe_exec_queue_types.h      |  209 ++++
 drivers/gpu/drm/xe/xe_execlist.c              |  142 +--
 drivers/gpu/drm/xe/xe_execlist_types.h        |   14 +-
 drivers/gpu/drm/xe/xe_gt.c                    |   70 +-
 drivers/gpu/drm/xe/xe_gt_types.h              |    6 +-
 drivers/gpu/drm/xe/xe_guc_ads.c               |    2 +-
 drivers/gpu/drm/xe/xe_guc_ct.c                |   10 +-
 ...gine_types.h => xe_guc_exec_queue_types.h} |   26 +-
 drivers/gpu/drm/xe/xe_guc_fwif.h              |    6 +-
 drivers/gpu/drm/xe/xe_guc_submit.c            | 1088 ++++++++---------
 drivers/gpu/drm/xe/xe_guc_submit.h            |   20 +-
 drivers/gpu/drm/xe/xe_guc_submit_types.h      |   20 +-
 drivers/gpu/drm/xe/xe_guc_types.h             |    4 +-
 drivers/gpu/drm/xe/xe_lrc.c                   |   10 +-
 drivers/gpu/drm/xe/xe_lrc.h                   |    4 +-
 drivers/gpu/drm/xe/xe_migrate.c               |   64 +-
 drivers/gpu/drm/xe/xe_migrate.h               |    6 +-
 drivers/gpu/drm/xe/xe_mocs.h                  |    2 +-
 drivers/gpu/drm/xe/xe_preempt_fence.c         |   30 +-
 drivers/gpu/drm/xe/xe_preempt_fence.h         |    4 +-
 drivers/gpu/drm/xe/xe_preempt_fence_types.h   |    7 +-
 drivers/gpu/drm/xe/xe_pt.c                    |   18 +-
 drivers/gpu/drm/xe/xe_pt.h                    |    6 +-
 drivers/gpu/drm/xe/xe_query.c                 |    2 +-
 drivers/gpu/drm/xe/xe_ring_ops.c              |   38 +-
 drivers/gpu/drm/xe/xe_sched_job.c             |   74 +-
 drivers/gpu/drm/xe/xe_sched_job.h             |    4 +-
 drivers/gpu/drm/xe/xe_sched_job_types.h       |    6 +-
 drivers/gpu/drm/xe/xe_trace.h                 |  140 +--
 drivers/gpu/drm/xe/xe_vm.c                    |  192 +--
 drivers/gpu/drm/xe/xe_vm.h                    |    4 +-
 drivers/gpu/drm/xe/xe_vm_types.h              |   16 +-
 include/uapi/drm/xe_drm.h                     |   86 +-
 46 files changed, 1679 insertions(+), 1680 deletions(-)
 delete mode 100644 drivers/gpu/drm/xe/xe_engine_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_exec_queue_types.h
 rename drivers/gpu/drm/xe/{xe_guc_engine_types.h => xe_guc_exec_queue_types.h} (59%)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 9e9b228fe3153..5c8d5e78d9bc4 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -38,7 +38,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
 			  struct kunit *test)
 {
 	u64 batch_base = xe_migrate_batch_base(m, xe->info.supports_usm);
-	struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb,
+	struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb,
 							      batch_base,
 							      second_idx);
 	struct dma_fence *fence;
@@ -215,7 +215,7 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
 	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
 
 	then = ktime_get();
-	fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1,
+	fence = xe_migrate_update_pgtables(m, NULL, NULL, m->q, &update, 1,
 					   NULL, 0, &pt_update);
 	now = ktime_get();
 	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
@@ -257,7 +257,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		return;
 	}
 
-	big = xe_bo_create_pin_map(xe, tile, m->eng->vm, SZ_4M,
+	big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
 				   ttm_bo_type_kernel,
 				   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				   XE_BO_CREATE_PINNED_BIT);
@@ -266,7 +266,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto vunmap;
 	}
 
-	pt = xe_bo_create_pin_map(xe, tile, m->eng->vm, XE_PAGE_SIZE,
+	pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
 				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 				  XE_BO_CREATE_PINNED_BIT);
@@ -276,7 +276,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_big;
 	}
 
-	tiny = xe_bo_create_pin_map(xe, tile, m->eng->vm,
+	tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
 				    2 * SZ_4K,
 				    ttm_bo_type_kernel,
 				    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
@@ -295,14 +295,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	}
 
 	kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n",
-		   (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, XE_PAGE_SIZE),
+		   (unsigned long)xe_bo_main_addr(m->q->vm->pt_root[id]->bo, XE_PAGE_SIZE),
 		   (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE));
 
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
 	expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0);
-	if (m->eng->vm->flags & XE_VM_FLAG_64K)
+	if (m->q->vm->flags & XE_VM_FLAG_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
 		xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
@@ -399,11 +399,11 @@ static int migrate_test_run_device(struct xe_device *xe)
 		struct ww_acquire_ctx ww;
 
 		kunit_info(test, "Testing tile id %d.\n", id);
-		xe_vm_lock(m->eng->vm, &ww, 0, true);
+		xe_vm_lock(m->q->vm, &ww, 0, true);
 		xe_device_mem_access_get(xe);
 		xe_migrate_sanity_test(m, test);
 		xe_device_mem_access_put(xe);
-		xe_vm_unlock(m->eng->vm, &ww);
+		xe_vm_unlock(m->q->vm, &ww);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index b15a7cb7db4cb..38f4ce83a207e 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -7,7 +7,7 @@
 
 #include "regs/xe_gpu_commands.h"
 #include "xe_device.h"
-#include "xe_engine_types.h"
+#include "xe_exec_queue_types.h"
 #include "xe_gt.h"
 #include "xe_hw_fence.h"
 #include "xe_sa.h"
@@ -60,30 +60,30 @@ err:
 }
 
 static struct xe_sched_job *
-__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
+__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
 {
 	u32 size = drm_suballoc_size(bb->bo);
 
 	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 
-	WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt) > size);
+	WARN_ON(bb->len * 4 + bb_prefetch(q->gt) > size);
 
 	xe_sa_bo_flush_write(bb->bo);
 
-	return xe_sched_job_create(kernel_eng, addr);
+	return xe_sched_job_create(q, addr);
 }
 
-struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q,
 					 struct xe_bb *bb, u64 batch_base_ofs)
 {
 	u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
 
-	XE_WARN_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+	XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION));
 
-	return __xe_bb_create_job(wa_eng, bb, &addr);
+	return __xe_bb_create_job(q, bb, &addr);
 }
 
-struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
 						struct xe_bb *bb,
 						u64 batch_base_ofs,
 						u32 second_idx)
@@ -95,18 +95,18 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
 	};
 
 	XE_WARN_ON(second_idx > bb->len);
-	XE_WARN_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+	XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION));
 
-	return __xe_bb_create_job(kernel_eng, bb, addr);
+	return __xe_bb_create_job(q, bb, addr);
 }
 
-struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
 				      struct xe_bb *bb)
 {
 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
 
-	XE_WARN_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION);
-	return __xe_bb_create_job(kernel_eng, bb, &addr);
+	XE_WARN_ON(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION);
+	return __xe_bb_create_job(q, bb, &addr);
 }
 
 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index 0cc9260c96347..c5ae0770bab5c 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -11,16 +11,16 @@
 struct dma_fence;
 
 struct xe_gt;
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_sched_job;
 
 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
-struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
 				      struct xe_bb *bb);
-struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
 						struct xe_bb *bb, u64 batch_ofs,
 						u32 second_idx);
-struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q,
 					 struct xe_bb *bb, u64 batch_ofs);
 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
 
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 61ff97ea76593..68abc0b195beb 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -53,9 +53,9 @@ static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump)
 	return container_of(coredump, struct xe_device, devcoredump);
 }
 
-static struct xe_guc *engine_to_guc(struct xe_engine *e)
+static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
 {
-	return &e->gt->uc.guc;
+	return &q->gt->uc.guc;
 }
 
 static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
@@ -91,7 +91,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 
 	drm_printf(&p, "\n**** GuC CT ****\n");
 	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
-	xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p);
+	xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
 
 	drm_printf(&p, "\n**** HW Engines ****\n");
 	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
@@ -112,7 +112,7 @@ static void xe_devcoredump_free(void *data)
 		return;
 
 	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
-	xe_guc_engine_snapshot_free(coredump->snapshot.ge);
+	xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
 	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
 		if (coredump->snapshot.hwe[i])
 			xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
@@ -123,14 +123,14 @@ static void xe_devcoredump_free(void *data)
 }
 
 static void devcoredump_snapshot(struct xe_devcoredump *coredump,
-				 struct xe_engine *e)
+				 struct xe_exec_queue *q)
 {
 	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
-	u32 adj_logical_mask = e->logical_mask;
-	u32 width_mask = (0x1 << e->width) - 1;
+	u32 adj_logical_mask = q->logical_mask;
+	u32 width_mask = (0x1 << q->width) - 1;
 	int i;
 	bool cookie;
 
@@ -138,22 +138,22 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 	ss->boot_time = ktime_get_boottime();
 
 	cookie = dma_fence_begin_signalling();
-	for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+	for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
 		if (adj_logical_mask & BIT(i)) {
 			adj_logical_mask |= width_mask << i;
-			i += e->width;
+			i += q->width;
 		} else {
 			++i;
 		}
 	}
 
-	xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
+	xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
 
 	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
-	coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e);
+	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
 
-	for_each_hw_engine(hwe, e->gt, id) {
-		if (hwe->class != e->hwe->class ||
+	for_each_hw_engine(hwe, q->gt, id) {
+		if (hwe->class != q->hwe->class ||
 		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
 			coredump->snapshot.hwe[id] = NULL;
 			continue;
@@ -161,21 +161,21 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 		coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
 	}
 
-	xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
+	xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
 	dma_fence_end_signalling(cookie);
 }
 
 /**
  * xe_devcoredump - Take the required snapshots and initialize coredump device.
- * @e: The faulty xe_engine, where the issue was detected.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
  *
  * This function should be called at the crash time within the serialized
  * gt_reset. It is skipped if we still have the core dump device available
  * with the information of the 'first' snapshot.
  */
-void xe_devcoredump(struct xe_engine *e)
+void xe_devcoredump(struct xe_exec_queue *q)
 {
-	struct xe_device *xe = gt_to_xe(e->gt);
+	struct xe_device *xe = gt_to_xe(q->gt);
 	struct xe_devcoredump *coredump = &xe->devcoredump;
 
 	if (coredump->captured) {
@@ -184,7 +184,7 @@ void xe_devcoredump(struct xe_engine *e)
 	}
 
 	coredump->captured = true;
-	devcoredump_snapshot(coredump, e);
+	devcoredump_snapshot(coredump, q);
 
 	drm_info(&xe->drm, "Xe device coredump has been created\n");
 	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index 8548821292275..6ac218a5c1945 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -7,12 +7,12 @@
 #define _XE_DEVCOREDUMP_H_
 
 struct xe_device;
-struct xe_engine;
+struct xe_exec_queue;
 
 #ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_engine *e);
+void xe_devcoredump(struct xe_exec_queue *q);
 #else
-static inline void xe_devcoredump(struct xe_engine *e)
+static inline void xe_devcoredump(struct xe_exec_queue *q)
 {
 }
 #endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index c0d711eb6ab31..7fdad9c3d3dde 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -30,7 +30,7 @@ struct xe_devcoredump_snapshot {
 	/** @ct: GuC CT snapshot */
 	struct xe_guc_ct_snapshot *ct;
 	/** @ge: Guc Engine snapshot */
-	struct xe_guc_submit_engine_snapshot *ge;
+	struct xe_guc_submit_exec_queue_snapshot *ge;
 	/** @hwe: HW Engine snapshot array */
 	struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
 };
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index a8ab86379ed67..df1953759c670 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -53,33 +53,33 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 	mutex_init(&xef->vm.lock);
 	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
 
-	mutex_init(&xef->engine.lock);
-	xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1);
+	mutex_init(&xef->exec_queue.lock);
+	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
 
 	file->driver_priv = xef;
 	return 0;
 }
 
-static void device_kill_persistent_engines(struct xe_device *xe,
-					   struct xe_file *xef);
+static void device_kill_persistent_exec_queues(struct xe_device *xe,
+					       struct xe_file *xef);
 
 static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = file->driver_priv;
 	struct xe_vm *vm;
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	unsigned long idx;
 
-	mutex_lock(&xef->engine.lock);
-	xa_for_each(&xef->engine.xa, idx, e) {
-		xe_engine_kill(e);
-		xe_engine_put(e);
+	mutex_lock(&xef->exec_queue.lock);
+	xa_for_each(&xef->exec_queue.xa, idx, q) {
+		xe_exec_queue_kill(q);
+		xe_exec_queue_put(q);
 	}
-	mutex_unlock(&xef->engine.lock);
-	xa_destroy(&xef->engine.xa);
-	mutex_destroy(&xef->engine.lock);
-	device_kill_persistent_engines(xe, xef);
+	mutex_unlock(&xef->exec_queue.lock);
+	xa_destroy(&xef->exec_queue.xa);
+	mutex_destroy(&xef->exec_queue.lock);
+	device_kill_persistent_exec_queues(xe, xef);
 
 	mutex_lock(&xef->vm.lock);
 	xa_for_each(&xef->vm.xa, idx, vm)
@@ -99,15 +99,15 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl,
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_ENGINE_GET_PROPERTY, xe_engine_get_property_ioctl,
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl,
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl,
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
 			  DRM_RENDER_ALLOW),
@@ -324,33 +324,33 @@ void xe_device_shutdown(struct xe_device *xe)
 {
 }
 
-void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e)
+void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q)
 {
 	mutex_lock(&xe->persistent_engines.lock);
-	list_add_tail(&e->persistent.link, &xe->persistent_engines.list);
+	list_add_tail(&q->persistent.link, &xe->persistent_engines.list);
 	mutex_unlock(&xe->persistent_engines.lock);
 }
 
-void xe_device_remove_persistent_engines(struct xe_device *xe,
-					 struct xe_engine *e)
+void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
+					     struct xe_exec_queue *q)
 {
 	mutex_lock(&xe->persistent_engines.lock);
-	if (!list_empty(&e->persistent.link))
-		list_del(&e->persistent.link);
+	if (!list_empty(&q->persistent.link))
+		list_del(&q->persistent.link);
 	mutex_unlock(&xe->persistent_engines.lock);
 }
 
-static void device_kill_persistent_engines(struct xe_device *xe,
-					   struct xe_file *xef)
+static void device_kill_persistent_exec_queues(struct xe_device *xe,
+					       struct xe_file *xef)
 {
-	struct xe_engine *e, *next;
+	struct xe_exec_queue *q, *next;
 
 	mutex_lock(&xe->persistent_engines.lock);
-	list_for_each_entry_safe(e, next, &xe->persistent_engines.list,
+	list_for_each_entry_safe(q, next, &xe->persistent_engines.list,
 				 persistent.link)
-		if (e->persistent.xef == xef) {
-			xe_engine_kill(e);
-			list_del_init(&e->persistent.link);
+		if (q->persistent.xef == xef) {
+			xe_exec_queue_kill(q);
+			list_del_init(&q->persistent.link);
 		}
 	mutex_unlock(&xe->persistent_engines.lock);
 }
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 61a5cf1f73006..71582094834c6 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -6,7 +6,7 @@
 #ifndef _XE_DEVICE_H_
 #define _XE_DEVICE_H_
 
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_file;
 
 #include <drm/drm_util.h>
@@ -41,9 +41,9 @@ int xe_device_probe(struct xe_device *xe);
 void xe_device_remove(struct xe_device *xe);
 void xe_device_shutdown(struct xe_device *xe);
 
-void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e);
-void xe_device_remove_persistent_engines(struct xe_device *xe,
-					 struct xe_engine *e);
+void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q);
+void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
+					     struct xe_exec_queue *q);
 
 void xe_device_wmb(struct xe_device *xe);
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c521ffaf38719..128e0a9536929 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -377,13 +377,13 @@ struct xe_file {
 		struct mutex lock;
 	} vm;
 
-	/** @engine: Submission engine state for file */
+	/** @exec_queue: Submission exec queue state for file */
 	struct {
 		/** @xe: xarray to store engines */
 		struct xarray xa;
 		/** @lock: protects file engine state */
 		struct mutex lock;
-	} engine;
+	} exec_queue;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
deleted file mode 100644
index f1d531735f6d2..0000000000000
--- a/drivers/gpu/drm/xe/xe_engine_types.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _XE_ENGINE_TYPES_H_
-#define _XE_ENGINE_TYPES_H_
-
-#include <linux/kref.h>
-
-#include <drm/gpu_scheduler.h>
-
-#include "xe_gpu_scheduler_types.h"
-#include "xe_hw_engine_types.h"
-#include "xe_hw_fence_types.h"
-#include "xe_lrc_types.h"
-
-struct xe_execlist_engine;
-struct xe_gt;
-struct xe_guc_engine;
-struct xe_hw_engine;
-struct xe_vm;
-
-enum xe_engine_priority {
-	XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */
-	XE_ENGINE_PRIORITY_LOW = 0,
-	XE_ENGINE_PRIORITY_NORMAL,
-	XE_ENGINE_PRIORITY_HIGH,
-	XE_ENGINE_PRIORITY_KERNEL,
-
-	XE_ENGINE_PRIORITY_COUNT
-};
-
-/**
- * struct xe_engine - Submission engine
- *
- * Contains all state necessary for submissions. Can either be a user object or
- * a kernel object.
- */
-struct xe_engine {
-	/** @gt: graphics tile this engine can submit to */
-	struct xe_gt *gt;
-	/**
-	 * @hwe: A hardware of the same class. May (physical engine) or may not
-	 * (virtual engine) be where jobs actual engine up running. Should never
-	 * really be used for submissions.
-	 */
-	struct xe_hw_engine *hwe;
-	/** @refcount: ref count of this engine */
-	struct kref refcount;
-	/** @vm: VM (address space) for this engine */
-	struct xe_vm *vm;
-	/** @class: class of this engine */
-	enum xe_engine_class class;
-	/** @priority: priority of this exec queue */
-	enum xe_engine_priority priority;
-	/**
-	 * @logical_mask: logical mask of where job submitted to engine can run
-	 */
-	u32 logical_mask;
-	/** @name: name of this engine */
-	char name[MAX_FENCE_NAME_LEN];
-	/** @width: width (number BB submitted per exec) of this engine */
-	u16 width;
-	/** @fence_irq: fence IRQ used to signal job completion */
-	struct xe_hw_fence_irq *fence_irq;
-
-#define ENGINE_FLAG_BANNED		BIT(0)
-#define ENGINE_FLAG_KERNEL		BIT(1)
-#define ENGINE_FLAG_PERSISTENT		BIT(2)
-#define ENGINE_FLAG_COMPUTE_MODE	BIT(3)
-/* Caller needs to hold rpm ref when creating engine with ENGINE_FLAG_VM */
-#define ENGINE_FLAG_VM			BIT(4)
-#define ENGINE_FLAG_BIND_ENGINE_CHILD	BIT(5)
-#define ENGINE_FLAG_WA			BIT(6)
-
-	/**
-	 * @flags: flags for this engine, should statically setup aside from ban
-	 * bit
-	 */
-	unsigned long flags;
-
-	union {
-		/** @multi_gt_list: list head for VM bind engines if multi-GT */
-		struct list_head multi_gt_list;
-		/** @multi_gt_link: link for VM bind engines if multi-GT */
-		struct list_head multi_gt_link;
-	};
-
-	union {
-		/** @execlist: execlist backend specific state for engine */
-		struct xe_execlist_engine *execlist;
-		/** @guc: GuC backend specific state for engine */
-		struct xe_guc_engine *guc;
-	};
-
-	/**
-	 * @persistent: persistent engine state
-	 */
-	struct {
-		/** @xef: file which this engine belongs to */
-		struct xe_file *xef;
-		/** @link: link in list of persistent engines */
-		struct list_head link;
-	} persistent;
-
-	union {
-		/**
-		 * @parallel: parallel submission state
-		 */
-		struct {
-			/** @composite_fence_ctx: context composite fence */
-			u64 composite_fence_ctx;
-			/** @composite_fence_seqno: seqno for composite fence */
-			u32 composite_fence_seqno;
-		} parallel;
-		/**
-		 * @bind: bind submission state
-		 */
-		struct {
-			/** @fence_ctx: context bind fence */
-			u64 fence_ctx;
-			/** @fence_seqno: seqno for bind fence */
-			u32 fence_seqno;
-		} bind;
-	};
-
-	/** @sched_props: scheduling properties */
-	struct {
-		/** @timeslice_us: timeslice period in micro-seconds */
-		u32 timeslice_us;
-		/** @preempt_timeout_us: preemption timeout in micro-seconds */
-		u32 preempt_timeout_us;
-	} sched_props;
-
-	/** @compute: compute engine state */
-	struct {
-		/** @pfence: preemption fence */
-		struct dma_fence *pfence;
-		/** @context: preemption fence context */
-		u64 context;
-		/** @seqno: preemption fence seqno */
-		u32 seqno;
-		/** @link: link into VM's list of engines */
-		struct list_head link;
-		/** @lock: preemption fences lock */
-		spinlock_t lock;
-	} compute;
-
-	/** @usm: unified shared memory state */
-	struct {
-		/** @acc_trigger: access counter trigger */
-		u32 acc_trigger;
-		/** @acc_notify: access counter notify */
-		u32 acc_notify;
-		/** @acc_granularity: access counter granularity */
-		u32 acc_granularity;
-	} usm;
-
-	/** @ops: submission backend engine operations */
-	const struct xe_engine_ops *ops;
-
-	/** @ring_ops: ring operations for this engine */
-	const struct xe_ring_ops *ring_ops;
-	/** @entity: DRM sched entity for this engine (1 to 1 relationship) */
-	struct drm_sched_entity *entity;
-	/** @lrc: logical ring context for this engine */
-	struct xe_lrc lrc[];
-};
-
-/**
- * struct xe_engine_ops - Submission backend engine operations
- */
-struct xe_engine_ops {
-	/** @init: Initialize engine for submission backend */
-	int (*init)(struct xe_engine *e);
-	/** @kill: Kill inflight submissions for backend */
-	void (*kill)(struct xe_engine *e);
-	/** @fini: Fini engine for submission backend */
-	void (*fini)(struct xe_engine *e);
-	/** @set_priority: Set priority for engine */
-	int (*set_priority)(struct xe_engine *e,
-			    enum xe_engine_priority priority);
-	/** @set_timeslice: Set timeslice for engine */
-	int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us);
-	/** @set_preempt_timeout: Set preemption timeout for engine */
-	int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us);
-	/** @set_job_timeout: Set job timeout for engine */
-	int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms);
-	/**
-	 * @suspend: Suspend engine from executing, allowed to be called
-	 * multiple times in a row before resume with the caveat that
-	 * suspend_wait returns before calling suspend again.
-	 */
-	int (*suspend)(struct xe_engine *e);
-	/**
-	 * @suspend_wait: Wait for an engine to suspend executing, should be
-	 * call after suspend.
-	 */
-	void (*suspend_wait)(struct xe_engine *e);
-	/**
-	 * @resume: Resume engine execution, engine must be in a suspended
-	 * state and dma fence returned from most recent suspend call must be
-	 * signalled when this function is called.
-	 */
-	void (*resume)(struct xe_engine *e);
-};
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index a043c649249b5..629d81a789e77 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -95,19 +95,19 @@
 
 #define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
 
-static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
+static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww,
 			 struct ttm_validate_buffer tv_onstack[],
 			 struct ttm_validate_buffer **tv,
 			 struct list_head *objs)
 {
-	struct xe_vm *vm = e->vm;
+	struct xe_vm *vm = q->vm;
 	struct xe_vma *vma;
 	LIST_HEAD(dups);
 	ktime_t end = 0;
 	int err = 0;
 
 	*tv = NULL;
-	if (xe_vm_no_dma_fences(e->vm))
+	if (xe_vm_no_dma_fences(q->vm))
 		return 0;
 
 retry:
@@ -153,14 +153,14 @@ retry:
 	return err;
 }
 
-static void xe_exec_end(struct xe_engine *e,
+static void xe_exec_end(struct xe_exec_queue *q,
 			struct ttm_validate_buffer *tv_onstack,
 			struct ttm_validate_buffer *tv,
 			struct ww_acquire_ctx *ww,
 			struct list_head *objs)
 {
-	if (!xe_vm_no_dma_fences(e->vm))
-		xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
+	if (!xe_vm_no_dma_fences(q->vm))
+		xe_vm_unlock_dma_resv(q->vm, tv_onstack, tv, ww, objs);
 }
 
 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -170,7 +170,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct drm_xe_exec *args = data;
 	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
 	u64 __user *addresses_user = u64_to_user_ptr(args->address);
-	struct xe_engine *engine;
+	struct xe_exec_queue *q;
 	struct xe_sync_entry *syncs = NULL;
 	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
 	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
@@ -189,30 +189,30 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	engine = xe_engine_lookup(xef, args->engine_id);
-	if (XE_IOCTL_DBG(xe, !engine))
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
 		return -ENOENT;
 
-	if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_VM))
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, engine->width != args->num_batch_buffer))
+	if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_BANNED)) {
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
 		err = -ECANCELED;
-		goto err_engine;
+		goto err_exec_queue;
 	}
 
 	if (args->num_syncs) {
 		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
 		if (!syncs) {
 			err = -ENOMEM;
-			goto err_engine;
+			goto err_exec_queue;
 		}
 	}
 
-	vm = engine->vm;
+	vm = q->vm;
 
 	for (i = 0; i < args->num_syncs; i++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
@@ -222,9 +222,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto err_syncs;
 	}
 
-	if (xe_engine_is_parallel(engine)) {
+	if (xe_exec_queue_is_parallel(q)) {
 		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
-				       engine->width);
+				       q->width);
 		if (err) {
 			err = -EFAULT;
 			goto err_syncs;
@@ -294,26 +294,26 @@ retry:
 			goto err_unlock_list;
 	}
 
-	err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
+	err = xe_exec_begin(q, &ww, tv_onstack, &tv, &objs);
 	if (err)
 		goto err_unlock_list;
 
-	if (xe_vm_is_closed_or_banned(engine->vm)) {
+	if (xe_vm_is_closed_or_banned(q->vm)) {
 		drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
 		err = -ECANCELED;
-		goto err_engine_end;
+		goto err_exec_queue_end;
 	}
 
-	if (xe_engine_is_lr(engine) && xe_engine_ring_full(engine)) {
+	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
 		err = -EWOULDBLOCK;
-		goto err_engine_end;
+		goto err_exec_queue_end;
 	}
 
-	job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
+	job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
 				  addresses : &args->address);
 	if (IS_ERR(job)) {
 		err = PTR_ERR(job);
-		goto err_engine_end;
+		goto err_exec_queue_end;
 	}
 
 	/*
@@ -395,8 +395,8 @@ retry:
 		xe_sync_entry_signal(&syncs[i], job,
 				     &job->drm.s_fence->finished);
 
-	if (xe_engine_is_lr(engine))
-		engine->ring_ops->emit_job(job);
+	if (xe_exec_queue_is_lr(q))
+		q->ring_ops->emit_job(job);
 	xe_sched_job_push(job);
 	xe_vm_reactivate_rebind(vm);
 
@@ -412,8 +412,8 @@ err_repin:
 err_put_job:
 	if (err)
 		xe_sched_job_put(job);
-err_engine_end:
-	xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
+err_exec_queue_end:
+	xe_exec_end(q, tv_onstack, tv, &ww, &objs);
 err_unlock_list:
 	if (write_locked)
 		up_write(&vm->lock);
@@ -425,8 +425,8 @@ err_syncs:
 	for (i = 0; i < num_syncs; i++)
 		xe_sync_entry_cleanup(&syncs[i]);
 	kfree(syncs);
-err_engine:
-	xe_engine_put(engine);
+err_exec_queue:
+	xe_exec_queue_put(q);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index f1cfc4b604d48..1371829b9e356 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -22,57 +22,57 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-static struct xe_engine *__xe_engine_create(struct xe_device *xe,
-					    struct xe_vm *vm,
-					    u32 logical_mask,
-					    u16 width, struct xe_hw_engine *hwe,
-					    u32 flags)
+static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
+						    struct xe_vm *vm,
+						    u32 logical_mask,
+						    u16 width, struct xe_hw_engine *hwe,
+						    u32 flags)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	struct xe_gt *gt = hwe->gt;
 	int err;
 	int i;
 
-	e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
-	if (!e)
+	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+	if (!q)
 		return ERR_PTR(-ENOMEM);
 
-	kref_init(&e->refcount);
-	e->flags = flags;
-	e->hwe = hwe;
-	e->gt = gt;
+	kref_init(&q->refcount);
+	q->flags = flags;
+	q->hwe = hwe;
+	q->gt = gt;
 	if (vm)
-		e->vm = xe_vm_get(vm);
-	e->class = hwe->class;
-	e->width = width;
-	e->logical_mask = logical_mask;
-	e->fence_irq = &gt->fence_irq[hwe->class];
-	e->ring_ops = gt->ring_ops[hwe->class];
-	e->ops = gt->engine_ops;
-	INIT_LIST_HEAD(&e->persistent.link);
-	INIT_LIST_HEAD(&e->compute.link);
-	INIT_LIST_HEAD(&e->multi_gt_link);
+		q->vm = xe_vm_get(vm);
+	q->class = hwe->class;
+	q->width = width;
+	q->logical_mask = logical_mask;
+	q->fence_irq = &gt->fence_irq[hwe->class];
+	q->ring_ops = gt->ring_ops[hwe->class];
+	q->ops = gt->exec_queue_ops;
+	INIT_LIST_HEAD(&q->persistent.link);
+	INIT_LIST_HEAD(&q->compute.link);
+	INIT_LIST_HEAD(&q->multi_gt_link);
 
 	/* FIXME: Wire up to configurable default value */
-	e->sched_props.timeslice_us = 1 * 1000;
-	e->sched_props.preempt_timeout_us = 640 * 1000;
+	q->sched_props.timeslice_us = 1 * 1000;
+	q->sched_props.preempt_timeout_us = 640 * 1000;
 
-	if (xe_engine_is_parallel(e)) {
-		e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
-		e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
+	if (xe_exec_queue_is_parallel(q)) {
+		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
+		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
 	}
-	if (e->flags & ENGINE_FLAG_VM) {
-		e->bind.fence_ctx = dma_fence_context_alloc(1);
-		e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
+	if (q->flags & EXEC_QUEUE_FLAG_VM) {
+		q->bind.fence_ctx = dma_fence_context_alloc(1);
+		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
 	}
 
 	for (i = 0; i < width; ++i) {
-		err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K);
+		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
 		if (err)
 			goto err_lrc;
 	}
 
-	err = e->ops->init(e);
+	err = q->ops->init(q);
 	if (err)
 		goto err_lrc;
 
@@ -84,24 +84,24 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe,
 	 * can perform GuC CT actions when needed. Caller is expected to
 	 * have already grabbed the rpm ref outside any sensitive locks.
 	 */
-	if (e->flags & ENGINE_FLAG_VM)
+	if (q->flags & EXEC_QUEUE_FLAG_VM)
 		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
 
-	return e;
+	return q;
 
 err_lrc:
 	for (i = i - 1; i >= 0; --i)
-		xe_lrc_finish(e->lrc + i);
-	kfree(e);
+		xe_lrc_finish(q->lrc + i);
+	kfree(q);
 	return ERR_PTR(err);
 }
 
-struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
-				   u32 logical_mask, u16 width,
-				   struct xe_hw_engine *hwe, u32 flags)
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hwe, u32 flags)
 {
 	struct ww_acquire_ctx ww;
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	int err;
 
 	if (vm) {
@@ -109,16 +109,16 @@ struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
 		if (err)
 			return ERR_PTR(err);
 	}
-	e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags);
+	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
 	if (vm)
 		xe_vm_unlock(vm, &ww);
 
-	return e;
+	return q;
 }
 
-struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
-					 struct xe_vm *vm,
-					 enum xe_engine_class class, u32 flags)
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class, u32 flags)
 {
 	struct xe_hw_engine *hwe, *hwe0 = NULL;
 	enum xe_hw_engine_id id;
@@ -138,102 +138,102 @@ struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
 	if (!logical_mask)
 		return ERR_PTR(-ENODEV);
 
-	return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags);
+	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
 }
 
-void xe_engine_destroy(struct kref *ref)
+void xe_exec_queue_destroy(struct kref *ref)
 {
-	struct xe_engine *e = container_of(ref, struct xe_engine, refcount);
-	struct xe_engine *engine, *next;
+	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
+	struct xe_exec_queue *eq, *next;
 
-	if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) {
-		list_for_each_entry_safe(engine, next, &e->multi_gt_list,
+	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
+		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
 					 multi_gt_link)
-			xe_engine_put(engine);
+			xe_exec_queue_put(eq);
 	}
 
-	e->ops->fini(e);
+	q->ops->fini(q);
 }
 
-void xe_engine_fini(struct xe_engine *e)
+void xe_exec_queue_fini(struct xe_exec_queue *q)
 {
 	int i;
 
-	for (i = 0; i < e->width; ++i)
-		xe_lrc_finish(e->lrc + i);
-	if (e->vm)
-		xe_vm_put(e->vm);
-	if (e->flags & ENGINE_FLAG_VM)
-		xe_device_mem_access_put(gt_to_xe(e->gt));
+	for (i = 0; i < q->width; ++i)
+		xe_lrc_finish(q->lrc + i);
+	if (q->vm)
+		xe_vm_put(q->vm);
+	if (q->flags & EXEC_QUEUE_FLAG_VM)
+		xe_device_mem_access_put(gt_to_xe(q->gt));
 
-	kfree(e);
+	kfree(q);
 }
 
-struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
-	mutex_lock(&xef->engine.lock);
-	e = xa_load(&xef->engine.xa, id);
-	if (e)
-		xe_engine_get(e);
-	mutex_unlock(&xef->engine.lock);
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_load(&xef->exec_queue.xa, id);
+	if (q)
+		xe_exec_queue_get(q);
+	mutex_unlock(&xef->exec_queue.lock);
 
-	return e;
+	return q;
 }
 
-enum xe_engine_priority
-xe_engine_device_get_max_priority(struct xe_device *xe)
+enum xe_exec_queue_priority
+xe_exec_queue_device_get_max_priority(struct xe_device *xe)
 {
-	return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH :
-				       XE_ENGINE_PRIORITY_NORMAL;
+	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
+				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
 }
 
-static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
-			       u64 value, bool create)
+static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
+				   u64 value, bool create)
 {
-	if (XE_IOCTL_DBG(xe, value > XE_ENGINE_PRIORITY_HIGH))
+	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, value > xe_engine_device_get_max_priority(xe)))
+	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
 		return -EPERM;
 
-	return e->ops->set_priority(e, value);
+	return q->ops->set_priority(q, value);
 }
 
-static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e,
-				u64 value, bool create)
+static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
+				    u64 value, bool create)
 {
 	if (!capable(CAP_SYS_NICE))
 		return -EPERM;
 
-	return e->ops->set_timeslice(e, value);
+	return q->ops->set_timeslice(q, value);
 }
 
-static int engine_set_preemption_timeout(struct xe_device *xe,
-					 struct xe_engine *e, u64 value,
-					 bool create)
+static int exec_queue_set_preemption_timeout(struct xe_device *xe,
+					     struct xe_exec_queue *q, u64 value,
+					     bool create)
 {
 	if (!capable(CAP_SYS_NICE))
 		return -EPERM;
 
-	return e->ops->set_preempt_timeout(e, value);
+	return q->ops->set_preempt_timeout(q, value);
 }
 
-static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
-				   u64 value, bool create)
+static int exec_queue_set_compute_mode(struct xe_device *xe, struct xe_exec_queue *q,
+				       u64 value, bool create)
 {
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_VM))
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
 		return -EINVAL;
 
 	if (value) {
-		struct xe_vm *vm = e->vm;
+		struct xe_vm *vm = q->vm;
 		int err;
 
 		if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm)))
@@ -242,42 +242,42 @@ static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
 		if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm)))
 			return -EOPNOTSUPP;
 
-		if (XE_IOCTL_DBG(xe, e->width != 1))
+		if (XE_IOCTL_DBG(xe, q->width != 1))
 			return -EINVAL;
 
-		e->compute.context = dma_fence_context_alloc(1);
-		spin_lock_init(&e->compute.lock);
+		q->compute.context = dma_fence_context_alloc(1);
+		spin_lock_init(&q->compute.lock);
 
-		err = xe_vm_add_compute_engine(vm, e);
+		err = xe_vm_add_compute_exec_queue(vm, q);
 		if (XE_IOCTL_DBG(xe, err))
 			return err;
 
-		e->flags |= ENGINE_FLAG_COMPUTE_MODE;
-		e->flags &= ~ENGINE_FLAG_PERSISTENT;
+		q->flags |= EXEC_QUEUE_FLAG_COMPUTE_MODE;
+		q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
 	}
 
 	return 0;
 }
 
-static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
-				  u64 value, bool create)
+static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 value, bool create)
 {
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))
 		return -EINVAL;
 
 	if (value)
-		e->flags |= ENGINE_FLAG_PERSISTENT;
+		q->flags |= EXEC_QUEUE_FLAG_PERSISTENT;
 	else
-		e->flags &= ~ENGINE_FLAG_PERSISTENT;
+		q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
 
 	return 0;
 }
 
-static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
-				  u64 value, bool create)
+static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 value, bool create)
 {
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
@@ -285,11 +285,11 @@ static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
 	if (!capable(CAP_SYS_NICE))
 		return -EPERM;
 
-	return e->ops->set_job_timeout(e, value);
+	return q->ops->set_job_timeout(q, value);
 }
 
-static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
-				  u64 value, bool create)
+static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 value, bool create)
 {
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
@@ -297,13 +297,13 @@ static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
 	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
-	e->usm.acc_trigger = value;
+	q->usm.acc_trigger = value;
 
 	return 0;
 }
 
-static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
-				 u64 value, bool create)
+static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
+				     u64 value, bool create)
 {
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
@@ -311,13 +311,13 @@ static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
 	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
-	e->usm.acc_notify = value;
+	q->usm.acc_notify = value;
 
 	return 0;
 }
 
-static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
-				      u64 value, bool create)
+static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
+					  u64 value, bool create)
 {
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
@@ -325,34 +325,34 @@ static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
 	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
-	e->usm.acc_granularity = value;
+	q->usm.acc_granularity = value;
 
 	return 0;
 }
 
-typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
-					 struct xe_engine *e,
-					 u64 value, bool create);
-
-static const xe_engine_set_property_fn engine_set_property_funcs[] = {
-	[XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority,
-	[XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice,
-	[XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
-	[XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
-	[XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence,
-	[XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
-	[XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
-	[XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
-	[XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
+typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
+					     struct xe_exec_queue *q,
+					     u64 value, bool create);
+
+static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
+	[XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
+	[XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
+	[XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
+	[XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE] = exec_queue_set_compute_mode,
+	[XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
+	[XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
+	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
+	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
+	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
 };
 
-static int engine_user_ext_set_property(struct xe_device *xe,
-					struct xe_engine *e,
-					u64 extension,
-					bool create)
+static int exec_queue_user_ext_set_property(struct xe_device *xe,
+					    struct xe_exec_queue *q,
+					    u64 extension,
+					    bool create)
 {
 	u64 __user *address = u64_to_user_ptr(extension);
-	struct drm_xe_ext_engine_set_property ext;
+	struct drm_xe_ext_exec_queue_set_property ext;
 	int err;
 	u32 idx;
 
@@ -361,26 +361,26 @@ static int engine_user_ext_set_property(struct xe_device *xe,
 		return -EFAULT;
 
 	if (XE_IOCTL_DBG(xe, ext.property >=
-			 ARRAY_SIZE(engine_set_property_funcs)) ||
+			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
 	    XE_IOCTL_DBG(xe, ext.pad))
 		return -EINVAL;
 
-	idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
-	return engine_set_property_funcs[idx](xe, e, ext.value,  create);
+	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+	return exec_queue_set_property_funcs[idx](xe, q, ext.value,  create);
 }
 
-typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe,
-					   struct xe_engine *e,
-					   u64 extension,
-					   bool create);
+typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
+					       struct xe_exec_queue *q,
+					       u64 extension,
+					       bool create);
 
-static const xe_engine_set_property_fn engine_user_extension_funcs[] = {
-	[XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property,
+static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
+	[XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
 };
 
 #define MAX_USER_EXTENSIONS	16
-static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
-				  u64 extensions, int ext_number, bool create)
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number, bool create)
 {
 	u64 __user *address = u64_to_user_ptr(extensions);
 	struct xe_user_extension ext;
@@ -396,17 +396,17 @@ static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
 
 	if (XE_IOCTL_DBG(xe, ext.pad) ||
 	    XE_IOCTL_DBG(xe, ext.name >=
-			 ARRAY_SIZE(engine_user_extension_funcs)))
+			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
 		return -EINVAL;
 
 	idx = array_index_nospec(ext.name,
-				 ARRAY_SIZE(engine_user_extension_funcs));
-	err = engine_user_extension_funcs[idx](xe, e, extensions, create);
+				 ARRAY_SIZE(exec_queue_user_extension_funcs));
+	err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create);
 	if (XE_IOCTL_DBG(xe, err))
 		return err;
 
 	if (ext.next_extension)
-		return engine_user_extensions(xe, e, ext.next_extension,
+		return exec_queue_user_extensions(xe, q, ext.next_extension,
 					      ++ext_number, create);
 
 	return 0;
@@ -440,9 +440,9 @@ find_hw_engine(struct xe_device *xe,
 			       eci.engine_instance, true);
 }
 
-static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
-				    struct drm_xe_engine_class_instance *eci,
-				    u16 width, u16 num_placements)
+static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+					struct drm_xe_engine_class_instance *eci,
+					u16 width, u16 num_placements)
 {
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
@@ -520,19 +520,19 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 	return return_mask;
 }
 
-int xe_engine_create_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file)
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = to_xe_file(file);
-	struct drm_xe_engine_create *args = data;
+	struct drm_xe_exec_queue_create *args = data;
 	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
 	struct drm_xe_engine_class_instance __user *user_eci =
 		u64_to_user_ptr(args->instances);
 	struct xe_hw_engine *hwe;
 	struct xe_vm *vm, *migrate_vm;
 	struct xe_gt *gt;
-	struct xe_engine *e = NULL;
+	struct xe_exec_queue *q = NULL;
 	u32 logical_mask;
 	u32 id;
 	u32 len;
@@ -557,15 +557,15 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
 		for_each_gt(gt, xe, id) {
-			struct xe_engine *new;
+			struct xe_exec_queue *new;
 
 			if (xe_gt_is_media_type(gt))
 				continue;
 
 			eci[0].gt_id = gt->info.id;
-			logical_mask = bind_engine_logical_mask(xe, gt, eci,
-								args->width,
-								args->num_placements);
+			logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
+								    args->width,
+								    args->num_placements);
 			if (XE_IOCTL_DBG(xe, !logical_mask))
 				return -EINVAL;
 
@@ -577,28 +577,28 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			xe_device_mem_access_get(xe);
 
 			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
-			new = xe_engine_create(xe, migrate_vm, logical_mask,
-					       args->width, hwe,
-					       ENGINE_FLAG_PERSISTENT |
-					       ENGINE_FLAG_VM |
-					       (id ?
-					       ENGINE_FLAG_BIND_ENGINE_CHILD :
-					       0));
+			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
+						   args->width, hwe,
+						   EXEC_QUEUE_FLAG_PERSISTENT |
+						   EXEC_QUEUE_FLAG_VM |
+						   (id ?
+						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
+						    0));
 
 			xe_device_mem_access_put(xe); /* now held by engine */
 
 			xe_vm_put(migrate_vm);
 			if (IS_ERR(new)) {
 				err = PTR_ERR(new);
-				if (e)
-					goto put_engine;
+				if (q)
+					goto put_exec_queue;
 				return err;
 			}
 			if (id == 0)
-				e = new;
+				q = new;
 			else
 				list_add_tail(&new->multi_gt_list,
-					      &e->multi_gt_link);
+					      &q->multi_gt_link);
 		}
 	} else {
 		gt = xe_device_get_gt(xe, eci[0].gt_id);
@@ -628,223 +628,223 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
 			return -ENOENT;
 		}
 
-		e = xe_engine_create(xe, vm, logical_mask,
-				     args->width, hwe,
-				     xe_vm_no_dma_fences(vm) ? 0 :
-				     ENGINE_FLAG_PERSISTENT);
+		q = xe_exec_queue_create(xe, vm, logical_mask,
+					 args->width, hwe,
+					 xe_vm_no_dma_fences(vm) ? 0 :
+					 EXEC_QUEUE_FLAG_PERSISTENT);
 		up_read(&vm->lock);
 		xe_vm_put(vm);
-		if (IS_ERR(e))
-			return PTR_ERR(e);
+		if (IS_ERR(q))
+			return PTR_ERR(q);
 	}
 
 	if (args->extensions) {
-		err = engine_user_extensions(xe, e, args->extensions, 0, true);
+		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
 		if (XE_IOCTL_DBG(xe, err))
-			goto put_engine;
+			goto put_exec_queue;
 	}
 
-	if (XE_IOCTL_DBG(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
-			 !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
+	if (XE_IOCTL_DBG(xe, q->vm && xe_vm_in_compute_mode(q->vm) !=
+			 !!(q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))) {
 		err = -EOPNOTSUPP;
-		goto put_engine;
+		goto put_exec_queue;
 	}
 
-	e->persistent.xef = xef;
+	q->persistent.xef = xef;
 
-	mutex_lock(&xef->engine.lock);
-	err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
-	mutex_unlock(&xef->engine.lock);
+	mutex_lock(&xef->exec_queue.lock);
+	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->exec_queue.lock);
 	if (err)
-		goto put_engine;
+		goto put_exec_queue;
 
-	args->engine_id = id;
+	args->exec_queue_id = id;
 
 	return 0;
 
-put_engine:
-	xe_engine_kill(e);
-	xe_engine_put(e);
+put_exec_queue:
+	xe_exec_queue_kill(q);
+	xe_exec_queue_put(q);
 	return err;
 }
 
-int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file)
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = to_xe_file(file);
-	struct drm_xe_engine_get_property *args = data;
-	struct xe_engine *e;
+	struct drm_xe_exec_queue_get_property *args = data;
+	struct xe_exec_queue *q;
 	int ret;
 
 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	e = xe_engine_lookup(xef, args->engine_id);
-	if (XE_IOCTL_DBG(xe, !e))
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
 		return -ENOENT;
 
 	switch (args->property) {
-	case XE_ENGINE_GET_PROPERTY_BAN:
-		args->value = !!(e->flags & ENGINE_FLAG_BANNED);
+	case XE_EXEC_QUEUE_GET_PROPERTY_BAN:
+		args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED);
 		ret = 0;
 		break;
 	default:
 		ret = -EINVAL;
 	}
 
-	xe_engine_put(e);
+	xe_exec_queue_put(q);
 
 	return ret;
 }
 
-static void engine_kill_compute(struct xe_engine *e)
+static void exec_queue_kill_compute(struct xe_exec_queue *q)
 {
-	if (!xe_vm_in_compute_mode(e->vm))
+	if (!xe_vm_in_compute_mode(q->vm))
 		return;
 
-	down_write(&e->vm->lock);
-	list_del(&e->compute.link);
-	--e->vm->preempt.num_engines;
-	if (e->compute.pfence) {
-		dma_fence_enable_sw_signaling(e->compute.pfence);
-		dma_fence_put(e->compute.pfence);
-		e->compute.pfence = NULL;
+	down_write(&q->vm->lock);
+	list_del(&q->compute.link);
+	--q->vm->preempt.num_exec_queues;
+	if (q->compute.pfence) {
+		dma_fence_enable_sw_signaling(q->compute.pfence);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = NULL;
 	}
-	up_write(&e->vm->lock);
+	up_write(&q->vm->lock);
 }
 
 /**
- * xe_engine_is_lr() - Whether an engine is long-running
- * @e: The engine
+ * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
+ * @q: The exec_queue
  *
- * Return: True if the engine is long-running, false otherwise.
+ * Return: True if the exec_queue is long-running, false otherwise.
  */
-bool xe_engine_is_lr(struct xe_engine *e)
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
 {
-	return e->vm && xe_vm_no_dma_fences(e->vm) &&
-		!(e->flags & ENGINE_FLAG_VM);
+	return q->vm && xe_vm_no_dma_fences(q->vm) &&
+		!(q->flags & EXEC_QUEUE_FLAG_VM);
 }
 
-static s32 xe_engine_num_job_inflight(struct xe_engine *e)
+static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
 {
-	return e->lrc->fence_ctx.next_seqno - xe_lrc_seqno(e->lrc) - 1;
+	return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1;
 }
 
 /**
- * xe_engine_ring_full() - Whether an engine's ring is full
- * @e: The engine
+ * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
+ * @q: The exec_queue
  *
- * Return: True if the engine's ring is full, false otherwise.
+ * Return: True if the exec_queue's ring is full, false otherwise.
  */
-bool xe_engine_ring_full(struct xe_engine *e)
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
 {
-	struct xe_lrc *lrc = e->lrc;
+	struct xe_lrc *lrc = q->lrc;
 	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
 
-	return xe_engine_num_job_inflight(e) >= max_job;
+	return xe_exec_queue_num_job_inflight(q) >= max_job;
 }
 
 /**
- * xe_engine_is_idle() - Whether an engine is idle.
- * @engine: The engine
+ * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
+ * @q: The exec_queue
  *
  * FIXME: Need to determine what to use as the short-lived
- * timeline lock for the engines, so that the return value
+ * timeline lock for the exec_queues, so that the return value
  * of this function becomes more than just an advisory
  * snapshot in time. The timeline lock must protect the
- * seqno from racing submissions on the same engine.
+ * seqno from racing submissions on the same exec_queue.
  * Typically vm->resv, but user-created timeline locks use the migrate vm
  * and never grabs the migrate vm->resv so we have a race there.
  *
- * Return: True if the engine is idle, false otherwise.
+ * Return: True if the exec_queue is idle, false otherwise.
  */
-bool xe_engine_is_idle(struct xe_engine *engine)
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
 {
-	if (XE_WARN_ON(xe_engine_is_parallel(engine)))
+	if (XE_WARN_ON(xe_exec_queue_is_parallel(q)))
 		return false;
 
-	return xe_lrc_seqno(&engine->lrc[0]) ==
-		engine->lrc[0].fence_ctx.next_seqno - 1;
+	return xe_lrc_seqno(&q->lrc[0]) ==
+		q->lrc[0].fence_ctx.next_seqno - 1;
 }
 
-void xe_engine_kill(struct xe_engine *e)
+void xe_exec_queue_kill(struct xe_exec_queue *q)
 {
-	struct xe_engine *engine = e, *next;
+	struct xe_exec_queue *eq = q, *next;
 
-	list_for_each_entry_safe(engine, next, &engine->multi_gt_list,
+	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
 				 multi_gt_link) {
-		e->ops->kill(engine);
-		engine_kill_compute(engine);
+		q->ops->kill(eq);
+		exec_queue_kill_compute(eq);
 	}
 
-	e->ops->kill(e);
-	engine_kill_compute(e);
+	q->ops->kill(q);
+	exec_queue_kill_compute(q);
 }
 
-int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file)
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = to_xe_file(file);
-	struct drm_xe_engine_destroy *args = data;
-	struct xe_engine *e;
+	struct drm_xe_exec_queue_destroy *args = data;
+	struct xe_exec_queue *q;
 
 	if (XE_IOCTL_DBG(xe, args->pad) ||
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	mutex_lock(&xef->engine.lock);
-	e = xa_erase(&xef->engine.xa, args->engine_id);
-	mutex_unlock(&xef->engine.lock);
-	if (XE_IOCTL_DBG(xe, !e))
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (XE_IOCTL_DBG(xe, !q))
 		return -ENOENT;
 
-	if (!(e->flags & ENGINE_FLAG_PERSISTENT))
-		xe_engine_kill(e);
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT))
+		xe_exec_queue_kill(q);
 	else
-		xe_device_add_persistent_engines(xe, e);
+		xe_device_add_persistent_exec_queues(xe, q);
 
-	trace_xe_engine_close(e);
-	xe_engine_put(e);
+	trace_xe_exec_queue_close(q);
+	xe_exec_queue_put(q);
 
 	return 0;
 }
 
-int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file)
+int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = to_xe_file(file);
-	struct drm_xe_engine_set_property *args = data;
-	struct xe_engine *e;
+	struct drm_xe_exec_queue_set_property *args = data;
+	struct xe_exec_queue *q;
 	int ret;
 	u32 idx;
 
 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	e = xe_engine_lookup(xef, args->engine_id);
-	if (XE_IOCTL_DBG(xe, !e))
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
 		return -ENOENT;
 
 	if (XE_IOCTL_DBG(xe, args->property >=
-			 ARRAY_SIZE(engine_set_property_funcs))) {
+			 ARRAY_SIZE(exec_queue_set_property_funcs))) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	idx = array_index_nospec(args->property,
-				 ARRAY_SIZE(engine_set_property_funcs));
-	ret = engine_set_property_funcs[idx](xe, e, args->value, false);
+				 ARRAY_SIZE(exec_queue_set_property_funcs));
+	ret = exec_queue_set_property_funcs[idx](xe, q, args->value, false);
 	if (XE_IOCTL_DBG(xe, ret))
 		goto out;
 
 	if (args->extensions)
-		ret = engine_user_extensions(xe, e, args->extensions, 0,
-					     false);
+		ret = exec_queue_user_extensions(xe, q, args->extensions, 0,
+						 false);
 out:
-	xe_engine_put(e);
+	xe_exec_queue_put(q);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 3017e4fe308d2..94a6abee38a60 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -3,10 +3,10 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#ifndef _XE_ENGINE_H_
-#define _XE_ENGINE_H_
+#ifndef _XE_EXEC_QUEUE_H_
+#define _XE_EXEC_QUEUE_H_
 
-#include "xe_engine_types.h"
+#include "xe_exec_queue_types.h"
 #include "xe_vm_types.h"
 
 struct drm_device;
@@ -14,50 +14,50 @@ struct drm_file;
 struct xe_device;
 struct xe_file;
 
-struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
-				   u32 logical_mask, u16 width,
-				   struct xe_hw_engine *hw_engine, u32 flags);
-struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
-					 struct xe_vm *vm,
-					 enum xe_engine_class class, u32 flags);
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hw_engine, u32 flags);
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class, u32 flags);
 
-void xe_engine_fini(struct xe_engine *e);
-void xe_engine_destroy(struct kref *ref);
+void xe_exec_queue_fini(struct xe_exec_queue *q);
+void xe_exec_queue_destroy(struct kref *ref);
 
-struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id);
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id);
 
-static inline struct xe_engine *xe_engine_get(struct xe_engine *engine)
+static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q)
 {
-	kref_get(&engine->refcount);
-	return engine;
+	kref_get(&q->refcount);
+	return q;
 }
 
-static inline void xe_engine_put(struct xe_engine *engine)
+static inline void xe_exec_queue_put(struct xe_exec_queue *q)
 {
-	kref_put(&engine->refcount, xe_engine_destroy);
+	kref_put(&q->refcount, xe_exec_queue_destroy);
 }
 
-static inline bool xe_engine_is_parallel(struct xe_engine *engine)
+static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q)
 {
-	return engine->width > 1;
+	return q->width > 1;
 }
 
-bool xe_engine_is_lr(struct xe_engine *e);
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
 
-bool xe_engine_ring_full(struct xe_engine *e);
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q);
 
-bool xe_engine_is_idle(struct xe_engine *engine);
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q);
 
-void xe_engine_kill(struct xe_engine *e);
+void xe_exec_queue_kill(struct xe_exec_queue *q);
 
-int xe_engine_create_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file);
-int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file);
-int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file);
-int xe_engine_get_property_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file);
-enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe);
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file);
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file);
+enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
new file mode 100644
index 0000000000000..4506289b8b7bd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_QUEUE_TYPES_H_
+#define _XE_EXEC_QUEUE_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_lrc_types.h"
+
+struct xe_execlist_exec_queue;
+struct xe_gt;
+struct xe_guc_exec_queue;
+struct xe_hw_engine;
+struct xe_vm;
+
+enum xe_exec_queue_priority {
+	XE_EXEC_QUEUE_PRIORITY_UNSET = -2, /* For execlist usage only */
+	XE_EXEC_QUEUE_PRIORITY_LOW = 0,
+	XE_EXEC_QUEUE_PRIORITY_NORMAL,
+	XE_EXEC_QUEUE_PRIORITY_HIGH,
+	XE_EXEC_QUEUE_PRIORITY_KERNEL,
+
+	XE_EXEC_QUEUE_PRIORITY_COUNT
+};
+
+/**
+ * struct xe_exec_queue - Execution queue
+ *
+ * Contains all state necessary for submissions. Can either be a user object or
+ * a kernel object.
+ */
+struct xe_exec_queue {
+	/** @gt: graphics tile this exec queue can submit to */
+	struct xe_gt *gt;
+	/**
+	 * @hwe: A hardware of the same class. May (physical engine) or may not
+	 * (virtual engine) be where jobs actual engine up running. Should never
+	 * really be used for submissions.
+	 */
+	struct xe_hw_engine *hwe;
+	/** @refcount: ref count of this exec queue */
+	struct kref refcount;
+	/** @vm: VM (address space) for this exec queue */
+	struct xe_vm *vm;
+	/** @class: class of this exec queue */
+	enum xe_engine_class class;
+	/** @priority: priority of this exec queue */
+	enum xe_exec_queue_priority priority;
+	/**
+	 * @logical_mask: logical mask of where job submitted to exec queue can run
+	 */
+	u32 logical_mask;
+	/** @name: name of this exec queue */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @width: width (number BB submitted per exec) of this exec queue */
+	u16 width;
+	/** @fence_irq: fence IRQ used to signal job completion */
+	struct xe_hw_fence_irq *fence_irq;
+
+#define EXEC_QUEUE_FLAG_BANNED		BIT(0)
+#define EXEC_QUEUE_FLAG_KERNEL		BIT(1)
+#define EXEC_QUEUE_FLAG_PERSISTENT		BIT(2)
+#define EXEC_QUEUE_FLAG_COMPUTE_MODE	BIT(3)
+/* Caller needs to hold rpm ref when creating engine with EXEC_QUEUE_FLAG_VM */
+#define EXEC_QUEUE_FLAG_VM			BIT(4)
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+#define EXEC_QUEUE_FLAG_WA			BIT(6)
+
+	/**
+	 * @flags: flags for this exec queue, should statically setup aside from ban
+	 * bit
+	 */
+	unsigned long flags;
+
+	union {
+		/** @multi_gt_list: list head for VM bind engines if multi-GT */
+		struct list_head multi_gt_list;
+		/** @multi_gt_link: link for VM bind engines if multi-GT */
+		struct list_head multi_gt_link;
+	};
+
+	union {
+		/** @execlist: execlist backend specific state for exec queue */
+		struct xe_execlist_exec_queue *execlist;
+		/** @guc: GuC backend specific state for exec queue */
+		struct xe_guc_exec_queue *guc;
+	};
+
+	/**
+	 * @persistent: persistent exec queue state
+	 */
+	struct {
+		/** @xef: file which this exec queue belongs to */
+		struct xe_file *xef;
+		/** @link: link in list of persistent exec queues */
+		struct list_head link;
+	} persistent;
+
+	union {
+		/**
+		 * @parallel: parallel submission state
+		 */
+		struct {
+			/** @composite_fence_ctx: context composite fence */
+			u64 composite_fence_ctx;
+			/** @composite_fence_seqno: seqno for composite fence */
+			u32 composite_fence_seqno;
+		} parallel;
+		/**
+		 * @bind: bind submission state
+		 */
+		struct {
+			/** @fence_ctx: context bind fence */
+			u64 fence_ctx;
+			/** @fence_seqno: seqno for bind fence */
+			u32 fence_seqno;
+		} bind;
+	};
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @compute: compute exec queue state */
+	struct {
+		/** @pfence: preemption fence */
+		struct dma_fence *pfence;
+		/** @context: preemption fence context */
+		u64 context;
+		/** @seqno: preemption fence seqno */
+		u32 seqno;
+		/** @link: link into VM's list of exec queues */
+		struct list_head link;
+		/** @lock: preemption fences lock */
+		spinlock_t lock;
+	} compute;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @acc_trigger: access counter trigger */
+		u32 acc_trigger;
+		/** @acc_notify: access counter notify */
+		u32 acc_notify;
+		/** @acc_granularity: access counter granularity */
+		u32 acc_granularity;
+	} usm;
+
+	/** @ops: submission backend exec queue operations */
+	const struct xe_exec_queue_ops *ops;
+
+	/** @ring_ops: ring operations for this exec queue */
+	const struct xe_ring_ops *ring_ops;
+	/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
+	struct drm_sched_entity *entity;
+	/** @lrc: logical ring context for this exec queue */
+	struct xe_lrc lrc[];
+};
+
+/**
+ * struct xe_exec_queue_ops - Submission backend exec queue operations
+ */
+struct xe_exec_queue_ops {
+	/** @init: Initialize exec queue for submission backend */
+	int (*init)(struct xe_exec_queue *q);
+	/** @kill: Kill inflight submissions for backend */
+	void (*kill)(struct xe_exec_queue *q);
+	/** @fini: Fini exec queue for submission backend */
+	void (*fini)(struct xe_exec_queue *q);
+	/** @set_priority: Set priority for exec queue */
+	int (*set_priority)(struct xe_exec_queue *q,
+			    enum xe_exec_queue_priority priority);
+	/** @set_timeslice: Set timeslice for exec queue */
+	int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
+	/** @set_preempt_timeout: Set preemption timeout for exec queue */
+	int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
+	/** @set_job_timeout: Set job timeout for exec queue */
+	int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms);
+	/**
+	 * @suspend: Suspend exec queue from executing, allowed to be called
+	 * multiple times in a row before resume with the caveat that
+	 * suspend_wait returns before calling suspend again.
+	 */
+	int (*suspend)(struct xe_exec_queue *q);
+	/**
+	 * @suspend_wait: Wait for an exec queue to suspend executing, should be
+	 * call after suspend.
+	 */
+	void (*suspend_wait)(struct xe_exec_queue *q);
+	/**
+	 * @resume: Resume exec queue execution, exec queue must be in a suspended
+	 * state and dma fence returned from most recent suspend call must be
+	 * signalled when this function is called.
+	 */
+	void (*resume)(struct xe_exec_queue *q);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 5b6748e1a37fa..3b8be55fe19c9 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -91,7 +91,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 }
 
 static void __xe_execlist_port_start(struct xe_execlist_port *port,
-				     struct xe_execlist_engine *exl)
+				     struct xe_execlist_exec_queue *exl)
 {
 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
 	int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID);
@@ -109,7 +109,7 @@ static void __xe_execlist_port_start(struct xe_execlist_port *port,
 			port->last_ctx_id = 1;
 	}
 
-	__start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id);
+	__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
 	port->running_exl = exl;
 	exl->has_run = true;
 }
@@ -128,16 +128,16 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port)
 	port->running_exl = NULL;
 }
 
-static bool xe_execlist_is_idle(struct xe_execlist_engine *exl)
+static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
 {
-	struct xe_lrc *lrc = exl->engine->lrc;
+	struct xe_lrc *lrc = exl->q->lrc;
 
 	return lrc->ring.tail == lrc->ring.old_tail;
 }
 
 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
 {
-	struct xe_execlist_engine *exl = NULL;
+	struct xe_execlist_exec_queue *exl = NULL;
 	int i;
 
 	xe_execlist_port_assert_held(port);
@@ -145,12 +145,12 @@ static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
 	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
 		while (!list_empty(&port->active[i])) {
 			exl = list_first_entry(&port->active[i],
-					       struct xe_execlist_engine,
+					       struct xe_execlist_exec_queue,
 					       active_link);
 			list_del(&exl->active_link);
 
 			if (xe_execlist_is_idle(exl)) {
-				exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
 				continue;
 			}
 
@@ -198,7 +198,7 @@ static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
 }
 
 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
-					 enum xe_engine_priority priority)
+					 enum xe_exec_queue_priority priority)
 {
 	xe_execlist_port_assert_held(port);
 
@@ -208,25 +208,25 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
 	__xe_execlist_port_start_next_active(port);
 }
 
-static void xe_execlist_make_active(struct xe_execlist_engine *exl)
+static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
 {
 	struct xe_execlist_port *port = exl->port;
-	enum xe_engine_priority priority = exl->active_priority;
+	enum xe_exec_queue_priority priority = exl->active_priority;
 
-	XE_WARN_ON(priority == XE_ENGINE_PRIORITY_UNSET);
+	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
 	XE_WARN_ON(priority < 0);
 	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
 
 	spin_lock_irq(&port->lock);
 
 	if (exl->active_priority != priority &&
-	    exl->active_priority != XE_ENGINE_PRIORITY_UNSET) {
+	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
 		/* Priority changed, move it to the right list */
 		list_del(&exl->active_link);
-		exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
 	}
 
-	if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) {
+	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
 		exl->active_priority = priority;
 		list_add_tail(&exl->active_link, &port->active[priority]);
 	}
@@ -293,10 +293,10 @@ static struct dma_fence *
 execlist_run_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
-	struct xe_engine *e = job->engine;
-	struct xe_execlist_engine *exl = job->engine->execlist;
+	struct xe_exec_queue *q = job->q;
+	struct xe_execlist_exec_queue *exl = job->q->execlist;
 
-	e->ring_ops->emit_job(job);
+	q->ring_ops->emit_job(job);
 	xe_execlist_make_active(exl);
 
 	return dma_fence_get(job->fence);
@@ -314,11 +314,11 @@ static const struct drm_sched_backend_ops drm_sched_ops = {
 	.free_job = execlist_job_free,
 };
 
-static int execlist_engine_init(struct xe_engine *e)
+static int execlist_exec_queue_init(struct xe_exec_queue *q)
 {
 	struct drm_gpu_scheduler *sched;
-	struct xe_execlist_engine *exl;
-	struct xe_device *xe = gt_to_xe(e->gt);
+	struct xe_execlist_exec_queue *exl;
+	struct xe_device *xe = gt_to_xe(q->gt);
 	int err;
 
 	XE_WARN_ON(xe_device_guc_submission_enabled(xe));
@@ -329,13 +329,13 @@ static int execlist_engine_init(struct xe_engine *e)
 	if (!exl)
 		return -ENOMEM;
 
-	exl->engine = e;
+	exl->q = q;
 
 	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
-			     e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
 			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
-			     NULL, NULL, e->hwe->name,
-			     gt_to_xe(e->gt)->drm.dev);
+			     NULL, NULL, q->hwe->name,
+			     gt_to_xe(q->gt)->drm.dev);
 	if (err)
 		goto err_free;
 
@@ -344,30 +344,30 @@ static int execlist_engine_init(struct xe_engine *e)
 	if (err)
 		goto err_sched;
 
-	exl->port = e->hwe->exl_port;
+	exl->port = q->hwe->exl_port;
 	exl->has_run = false;
-	exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
-	e->execlist = exl;
-	e->entity = &exl->entity;
+	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+	q->execlist = exl;
+	q->entity = &exl->entity;
 
-	switch (e->class) {
+	switch (q->class) {
 	case XE_ENGINE_CLASS_RENDER:
-		sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1);
+		sprintf(q->name, "rcs%d", ffs(q->logical_mask) - 1);
 		break;
 	case XE_ENGINE_CLASS_VIDEO_DECODE:
-		sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1);
+		sprintf(q->name, "vcs%d", ffs(q->logical_mask) - 1);
 		break;
 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-		sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1);
+		sprintf(q->name, "vecs%d", ffs(q->logical_mask) - 1);
 		break;
 	case XE_ENGINE_CLASS_COPY:
-		sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1);
+		sprintf(q->name, "bcs%d", ffs(q->logical_mask) - 1);
 		break;
 	case XE_ENGINE_CLASS_COMPUTE:
-		sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1);
+		sprintf(q->name, "ccs%d", ffs(q->logical_mask) - 1);
 		break;
 	default:
-		XE_WARN_ON(e->class);
+		XE_WARN_ON(q->class);
 	}
 
 	return 0;
@@ -379,96 +379,96 @@ err_free:
 	return err;
 }
 
-static void execlist_engine_fini_async(struct work_struct *w)
+static void execlist_exec_queue_fini_async(struct work_struct *w)
 {
-	struct xe_execlist_engine *ee =
-		container_of(w, struct xe_execlist_engine, fini_async);
-	struct xe_engine *e = ee->engine;
-	struct xe_execlist_engine *exl = e->execlist;
+	struct xe_execlist_exec_queue *ee =
+		container_of(w, struct xe_execlist_exec_queue, fini_async);
+	struct xe_exec_queue *q = ee->q;
+	struct xe_execlist_exec_queue *exl = q->execlist;
 	unsigned long flags;
 
-	XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+	XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(q->gt)));
 
 	spin_lock_irqsave(&exl->port->lock, flags);
-	if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET))
+	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
 		list_del(&exl->active_link);
 	spin_unlock_irqrestore(&exl->port->lock, flags);
 
-	if (e->flags & ENGINE_FLAG_PERSISTENT)
-		xe_device_remove_persistent_engines(gt_to_xe(e->gt), e);
+	if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
+		xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q);
 	drm_sched_entity_fini(&exl->entity);
 	drm_sched_fini(&exl->sched);
 	kfree(exl);
 
-	xe_engine_fini(e);
+	xe_exec_queue_fini(q);
 }
 
-static void execlist_engine_kill(struct xe_engine *e)
+static void execlist_exec_queue_kill(struct xe_exec_queue *q)
 {
 	/* NIY */
 }
 
-static void execlist_engine_fini(struct xe_engine *e)
+static void execlist_exec_queue_fini(struct xe_exec_queue *q)
 {
-	INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async);
-	queue_work(system_unbound_wq, &e->execlist->fini_async);
+	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
+	queue_work(system_unbound_wq, &q->execlist->fini_async);
 }
 
-static int execlist_engine_set_priority(struct xe_engine *e,
-					enum xe_engine_priority priority)
+static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
+					    enum xe_exec_queue_priority priority)
 {
 	/* NIY */
 	return 0;
 }
 
-static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
 {
 	/* NIY */
 	return 0;
 }
 
-static int execlist_engine_set_preempt_timeout(struct xe_engine *e,
-					       u32 preempt_timeout_us)
+static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
+						   u32 preempt_timeout_us)
 {
 	/* NIY */
 	return 0;
 }
 
-static int execlist_engine_set_job_timeout(struct xe_engine *e,
-					   u32 job_timeout_ms)
+static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
+					       u32 job_timeout_ms)
 {
 	/* NIY */
 	return 0;
 }
 
-static int execlist_engine_suspend(struct xe_engine *e)
+static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
 {
 	/* NIY */
 	return 0;
 }
 
-static void execlist_engine_suspend_wait(struct xe_engine *e)
+static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
 
 {
 	/* NIY */
 }
 
-static void execlist_engine_resume(struct xe_engine *e)
+static void execlist_exec_queue_resume(struct xe_exec_queue *q)
 {
 	/* NIY */
 }
 
-static const struct xe_engine_ops execlist_engine_ops = {
-	.init = execlist_engine_init,
-	.kill = execlist_engine_kill,
-	.fini = execlist_engine_fini,
-	.set_priority = execlist_engine_set_priority,
-	.set_timeslice = execlist_engine_set_timeslice,
-	.set_preempt_timeout = execlist_engine_set_preempt_timeout,
-	.set_job_timeout = execlist_engine_set_job_timeout,
-	.suspend = execlist_engine_suspend,
-	.suspend_wait = execlist_engine_suspend_wait,
-	.resume = execlist_engine_resume,
+static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
+	.init = execlist_exec_queue_init,
+	.kill = execlist_exec_queue_kill,
+	.fini = execlist_exec_queue_fini,
+	.set_priority = execlist_exec_queue_set_priority,
+	.set_timeslice = execlist_exec_queue_set_timeslice,
+	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
+	.set_job_timeout = execlist_exec_queue_set_job_timeout,
+	.suspend = execlist_exec_queue_suspend,
+	.suspend_wait = execlist_exec_queue_suspend_wait,
+	.resume = execlist_exec_queue_resume,
 };
 
 int xe_execlist_init(struct xe_gt *gt)
@@ -477,7 +477,7 @@ int xe_execlist_init(struct xe_gt *gt)
 	if (xe_device_guc_submission_enabled(gt_to_xe(gt)))
 		return 0;
 
-	gt->engine_ops = &execlist_engine_ops;
+	gt->exec_queue_ops = &execlist_exec_queue_ops;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
index 9b1239b47292b..f94bbf4c53e43 100644
--- a/drivers/gpu/drm/xe/xe_execlist_types.h
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -10,27 +10,27 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
-#include "xe_engine_types.h"
+#include "xe_exec_queue_types.h"
 
 struct xe_hw_engine;
-struct xe_execlist_engine;
+struct xe_execlist_exec_queue;
 
 struct xe_execlist_port {
 	struct xe_hw_engine *hwe;
 
 	spinlock_t lock;
 
-	struct list_head active[XE_ENGINE_PRIORITY_COUNT];
+	struct list_head active[XE_EXEC_QUEUE_PRIORITY_COUNT];
 
 	u32 last_ctx_id;
 
-	struct xe_execlist_engine *running_exl;
+	struct xe_execlist_exec_queue *running_exl;
 
 	struct timer_list irq_fail;
 };
 
-struct xe_execlist_engine {
-	struct xe_engine *engine;
+struct xe_execlist_exec_queue {
+	struct xe_exec_queue *q;
 
 	struct drm_gpu_scheduler sched;
 
@@ -42,7 +42,7 @@ struct xe_execlist_engine {
 
 	struct work_struct fini_async;
 
-	enum xe_engine_priority active_priority;
+	enum xe_exec_queue_priority active_priority;
 	struct list_head active_link;
 };
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 543b085723c58..3077faa1e7923 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -26,7 +26,7 @@
 #include "xe_gt_sysfs.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
-#include "xe_guc_engine_types.h"
+#include "xe_guc_exec_queue_types.h"
 #include "xe_hw_fence.h"
 #include "xe_irq.h"
 #include "xe_lrc.h"
@@ -81,7 +81,7 @@ static void gt_fini(struct drm_device *drm, void *arg)
 
 static void gt_reset_worker(struct work_struct *w);
 
-static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
+static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
 {
 	struct xe_sched_job *job;
 	struct xe_bb *bb;
@@ -94,7 +94,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 		return PTR_ERR(bb);
 
 	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
-	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	job = xe_bb_create_wa_job(q, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
 		return PTR_ERR(job);
@@ -115,9 +115,9 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
 	return 0;
 }
 
-static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
+static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 {
-	struct xe_reg_sr *sr = &e->hwe->reg_lrc;
+	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
 	struct xe_reg_sr_entry *entry;
 	unsigned long reg;
 	struct xe_sched_job *job;
@@ -143,7 +143,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
 	}
 
 	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
-	job = xe_bb_create_wa_job(e, bb, batch_ofs);
+	job = xe_bb_create_wa_job(q, bb, batch_ofs);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
 		return PTR_ERR(job);
@@ -173,7 +173,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 	int err = 0;
 
 	for_each_hw_engine(hwe, gt, id) {
-		struct xe_engine *e, *nop_e;
+		struct xe_exec_queue *q, *nop_q;
 		struct xe_vm *vm;
 		void *default_lrc;
 
@@ -192,58 +192,58 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 			return -ENOMEM;
 
 		vm = xe_migrate_get_vm(tile->migrate);
-		e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1,
-				     hwe, ENGINE_FLAG_WA);
-		if (IS_ERR(e)) {
-			err = PTR_ERR(e);
-			xe_gt_err(gt, "hwe %s: xe_engine_create failed (%pe)\n",
-				  hwe->name, e);
+		q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1,
+					 hwe, EXEC_QUEUE_FLAG_WA);
+		if (IS_ERR(q)) {
+			err = PTR_ERR(q);
+			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
+				  hwe->name, q);
 			goto put_vm;
 		}
 
 		/* Prime golden LRC with known good state */
-		err = emit_wa_job(gt, e);
+		err = emit_wa_job(gt, q);
 		if (err) {
 			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
-				  hwe->name, ERR_PTR(err), e->guc->id);
-			goto put_engine;
+				  hwe->name, ERR_PTR(err), q->guc->id);
+			goto put_exec_queue;
 		}
 
-		nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance),
-					 1, hwe, ENGINE_FLAG_WA);
-		if (IS_ERR(nop_e)) {
-			err = PTR_ERR(nop_e);
-			xe_gt_err(gt, "hwe %s: nop xe_engine_create failed (%pe)\n",
-				  hwe->name, nop_e);
-			goto put_engine;
+		nop_q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance),
+					     1, hwe, EXEC_QUEUE_FLAG_WA);
+		if (IS_ERR(nop_q)) {
+			err = PTR_ERR(nop_q);
+			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
+				  hwe->name, nop_q);
+			goto put_exec_queue;
 		}
 
 		/* Switch to different LRC */
-		err = emit_nop_job(gt, nop_e);
+		err = emit_nop_job(gt, nop_q);
 		if (err) {
 			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
-				  hwe->name, ERR_PTR(err), nop_e->guc->id);
-			goto put_nop_e;
+				  hwe->name, ERR_PTR(err), nop_q->guc->id);
+			goto put_nop_q;
 		}
 
 		/* Reload golden LRC to record the effect of any indirect W/A */
-		err = emit_nop_job(gt, e);
+		err = emit_nop_job(gt, q);
 		if (err) {
 			xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
-				  hwe->name, ERR_PTR(err), e->guc->id);
-			goto put_nop_e;
+				  hwe->name, ERR_PTR(err), q->guc->id);
+			goto put_nop_q;
 		}
 
 		xe_map_memcpy_from(xe, default_lrc,
-				   &e->lrc[0].bo->vmap,
-				   xe_lrc_pphwsp_offset(&e->lrc[0]),
+				   &q->lrc[0].bo->vmap,
+				   xe_lrc_pphwsp_offset(&q->lrc[0]),
 				   xe_lrc_size(xe, hwe->class));
 
 		gt->default_lrc[hwe->class] = default_lrc;
-put_nop_e:
-		xe_engine_put(nop_e);
-put_engine:
-		xe_engine_put(e);
+put_nop_q:
+		xe_exec_queue_put(nop_q);
+put_exec_queue:
+		xe_exec_queue_put(q);
 put_vm:
 		xe_vm_put(vm);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 78a9fe9f0bd36..c326932e53d79 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -14,7 +14,7 @@
 #include "xe_sa_types.h"
 #include "xe_uc_types.h"
 
-struct xe_engine_ops;
+struct xe_exec_queue_ops;
 struct xe_migrate;
 struct xe_ring_ops;
 
@@ -269,8 +269,8 @@ struct xe_gt {
 	/** @gtidle: idle properties of GT */
 	struct xe_gt_idle gtidle;
 
-	/** @engine_ops: submission backend engine operations */
-	const struct xe_engine_ops *engine_ops;
+	/** @exec_queue_ops: submission backend exec queue operations */
+	const struct xe_exec_queue_ops *exec_queue_ops;
 
 	/**
 	 * @ring_ops: ring operations for this hw engine (1 per engine class)
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index a7da29be2e514..7d1244df959df 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -495,7 +495,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
 		u8 gc;
 
 		/*
-		 * 1. Write all MMIO entries for this engine to the table. No
+		 * 1. Write all MMIO entries for this exec queue to the table. No
 		 * need to worry about fused-off engines and when there are
 		 * entries in the regset: the reg_state_list has been zero'ed
 		 * by xe_guc_ads_populate()
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index fb1d63ffaee49..59136b6a7c6f4 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -888,11 +888,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
 		break;
 	case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
-		ret = xe_guc_engine_reset_handler(guc, payload, adj_len);
+		ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
 		break;
 	case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
-		ret = xe_guc_engine_reset_failure_handler(guc, payload,
-							  adj_len);
+		ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+							      adj_len);
 		break;
 	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
 		/* Selftest only at the moment */
@@ -902,8 +902,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		/* FIXME: Handle this */
 		break;
 	case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
-		ret = xe_guc_engine_memory_cat_error_handler(guc, payload,
-							     adj_len);
+		ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+								 adj_len);
 		break;
 	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
 		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
similarity index 59%
rename from drivers/gpu/drm/xe/xe_guc_engine_types.h
rename to drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
index 5565412fe7f11..4c39f01e4f528 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
@@ -12,22 +12,22 @@
 #include "xe_gpu_scheduler_types.h"
 
 struct dma_fence;
-struct xe_engine;
+struct xe_exec_queue;
 
 /**
- * struct xe_guc_engine - GuC specific state for an xe_engine
+ * struct xe_guc_exec_queue - GuC specific state for an xe_exec_queue
  */
-struct xe_guc_engine {
-	/** @engine: Backpointer to parent xe_engine */
-	struct xe_engine *engine;
-	/** @sched: GPU scheduler for this xe_engine */
+struct xe_guc_exec_queue {
+	/** @q: Backpointer to parent xe_exec_queue */
+	struct xe_exec_queue *q;
+	/** @sched: GPU scheduler for this xe_exec_queue */
 	struct xe_gpu_scheduler sched;
-	/** @entity: Scheduler entity for this xe_engine */
+	/** @entity: Scheduler entity for this xe_exec_queue */
 	struct xe_sched_entity entity;
 	/**
-	 * @static_msgs: Static messages for this xe_engine, used when a message
-	 * needs to sent through the GPU scheduler but memory allocations are
-	 * not allowed.
+	 * @static_msgs: Static messages for this xe_exec_queue, used when
+	 * a message needs to sent through the GPU scheduler but memory
+	 * allocations are not allowed.
 	 */
 #define MAX_STATIC_MSG_TYPE	3
 	struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
@@ -37,17 +37,17 @@ struct xe_guc_engine {
 	struct work_struct fini_async;
 	/** @resume_time: time of last resume */
 	u64 resume_time;
-	/** @state: GuC specific state for this xe_engine */
+	/** @state: GuC specific state for this xe_exec_queue */
 	atomic_t state;
 	/** @wqi_head: work queue item tail */
 	u32 wqi_head;
 	/** @wqi_tail: work queue item tail */
 	u32 wqi_tail;
-	/** @id: GuC id for this xe_engine */
+	/** @id: GuC id for this exec_queue */
 	u16 id;
 	/** @suspend_wait: wait queue used to wait on pending suspends */
 	wait_queue_head_t suspend_wait;
-	/** @suspend_pending: a suspend of the engine is pending */
+	/** @suspend_pending: a suspend of the exec_queue is pending */
 	bool suspend_pending;
 };
 
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 7515d7fbb723d..4216a6d9e4787 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -69,13 +69,13 @@ struct guc_klv_generic_dw_t {
 } __packed;
 
 /* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
-struct guc_update_engine_policy_header {
+struct guc_update_exec_queue_policy_header {
 	u32 action;
 	u32 guc_id;
 } __packed;
 
-struct guc_update_engine_policy {
-	struct guc_update_engine_policy_header header;
+struct guc_update_exec_queue_policy {
+	struct guc_update_exec_queue_policy_header header;
 	struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
 } __packed;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 5198e91eeefbe..42454c12efb30 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -22,7 +22,7 @@
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
-#include "xe_guc_engine_types.h"
+#include "xe_guc_exec_queue_types.h"
 #include "xe_guc_submit_types.h"
 #include "xe_hw_engine.h"
 #include "xe_hw_fence.h"
@@ -48,9 +48,9 @@ guc_to_xe(struct xe_guc *guc)
 }
 
 static struct xe_guc *
-engine_to_guc(struct xe_engine *e)
+exec_queue_to_guc(struct xe_exec_queue *q)
 {
-	return &e->gt->uc.guc;
+	return &q->gt->uc.guc;
 }
 
 /*
@@ -58,140 +58,140 @@ engine_to_guc(struct xe_engine *e)
  * as the same time (e.g. a suspend can be happning at the same time as schedule
  * engine done being processed).
  */
-#define ENGINE_STATE_REGISTERED		(1 << 0)
+#define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
 #define ENGINE_STATE_ENABLED		(1 << 1)
-#define ENGINE_STATE_PENDING_ENABLE	(1 << 2)
-#define ENGINE_STATE_PENDING_DISABLE	(1 << 3)
-#define ENGINE_STATE_DESTROYED		(1 << 4)
+#define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
+#define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
+#define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
 #define ENGINE_STATE_SUSPENDED		(1 << 5)
-#define ENGINE_STATE_RESET		(1 << 6)
+#define EXEC_QUEUE_STATE_RESET		(1 << 6)
 #define ENGINE_STATE_KILLED		(1 << 7)
 
-static bool engine_registered(struct xe_engine *e)
+static bool exec_queue_registered(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
 }
 
-static void set_engine_registered(struct xe_engine *e)
+static void set_exec_queue_registered(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
 }
 
-static void clear_engine_registered(struct xe_engine *e)
+static void clear_exec_queue_registered(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state);
+	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
 }
 
-static bool engine_enabled(struct xe_engine *e)
+static bool exec_queue_enabled(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED;
+	return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED;
 }
 
-static void set_engine_enabled(struct xe_engine *e)
+static void set_exec_queue_enabled(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_ENABLED, &e->guc->state);
+	atomic_or(ENGINE_STATE_ENABLED, &q->guc->state);
 }
 
-static void clear_engine_enabled(struct xe_engine *e)
+static void clear_exec_queue_enabled(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state);
+	atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state);
 }
 
-static bool engine_pending_enable(struct xe_engine *e)
+static bool exec_queue_pending_enable(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
 }
 
-static void set_engine_pending_enable(struct xe_engine *e)
+static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 }
 
-static void clear_engine_pending_enable(struct xe_engine *e)
+static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 }
 
-static bool engine_pending_disable(struct xe_engine *e)
+static bool exec_queue_pending_disable(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
 }
 
-static void set_engine_pending_disable(struct xe_engine *e)
+static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 }
 
-static void clear_engine_pending_disable(struct xe_engine *e)
+static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 }
 
-static bool engine_destroyed(struct xe_engine *e)
+static bool exec_queue_destroyed(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
 }
 
-static void set_engine_destroyed(struct xe_engine *e)
+static void set_exec_queue_destroyed(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
 }
 
-static bool engine_banned(struct xe_engine *e)
+static bool exec_queue_banned(struct xe_exec_queue *q)
 {
-	return (e->flags & ENGINE_FLAG_BANNED);
+	return (q->flags & EXEC_QUEUE_FLAG_BANNED);
 }
 
-static void set_engine_banned(struct xe_engine *e)
+static void set_exec_queue_banned(struct xe_exec_queue *q)
 {
-	e->flags |= ENGINE_FLAG_BANNED;
+	q->flags |= EXEC_QUEUE_FLAG_BANNED;
 }
 
-static bool engine_suspended(struct xe_engine *e)
+static bool exec_queue_suspended(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED;
+	return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED;
 }
 
-static void set_engine_suspended(struct xe_engine *e)
+static void set_exec_queue_suspended(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state);
+	atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state);
 }
 
-static void clear_engine_suspended(struct xe_engine *e)
+static void clear_exec_queue_suspended(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state);
+	atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state);
 }
 
-static bool engine_reset(struct xe_engine *e)
+static bool exec_queue_reset(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_RESET;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
 }
 
-static void set_engine_reset(struct xe_engine *e)
+static void set_exec_queue_reset(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_RESET, &e->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
 }
 
-static bool engine_killed(struct xe_engine *e)
+static bool exec_queue_killed(struct xe_exec_queue *q)
 {
-	return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED;
+	return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED;
 }
 
-static void set_engine_killed(struct xe_engine *e)
+static void set_exec_queue_killed(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_KILLED, &e->guc->state);
+	atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
 }
 
-static bool engine_killed_or_banned(struct xe_engine *e)
+static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
 {
-	return engine_killed(e) || engine_banned(e);
+	return exec_queue_killed(q) || exec_queue_banned(q);
 }
 
 static void guc_submit_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc *guc = arg;
 
-	xa_destroy(&guc->submission_state.engine_lookup);
+	xa_destroy(&guc->submission_state.exec_queue_lookup);
 	ida_destroy(&guc->submission_state.guc_ids);
 	bitmap_free(guc->submission_state.guc_ids_bitmap);
 }
@@ -201,7 +201,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
 #define GUC_ID_NUMBER_SLRC	(GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
 #define GUC_ID_START_MLRC	GUC_ID_NUMBER_SLRC
 
-static const struct xe_engine_ops guc_engine_ops;
+static const struct xe_exec_queue_ops guc_exec_queue_ops;
 
 static void primelockdep(struct xe_guc *guc)
 {
@@ -228,10 +228,10 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	if (!guc->submission_state.guc_ids_bitmap)
 		return -ENOMEM;
 
-	gt->engine_ops = &guc_engine_ops;
+	gt->exec_queue_ops = &guc_exec_queue_ops;
 
 	mutex_init(&guc->submission_state.lock);
-	xa_init(&guc->submission_state.engine_lookup);
+	xa_init(&guc->submission_state.exec_queue_lookup);
 	ida_init(&guc->submission_state.guc_ids);
 
 	spin_lock_init(&guc->submission_state.suspend.lock);
@@ -246,7 +246,7 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	return 0;
 }
 
-static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
+static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	int ret;
 	void *ptr;
@@ -260,11 +260,11 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
 	 */
 	lockdep_assert_held(&guc->submission_state.lock);
 
-	if (xe_engine_is_parallel(e)) {
+	if (xe_exec_queue_is_parallel(q)) {
 		void *bitmap = guc->submission_state.guc_ids_bitmap;
 
 		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
-					      order_base_2(e->width));
+					      order_base_2(q->width));
 	} else {
 		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
 				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
@@ -272,12 +272,12 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
 	if (ret < 0)
 		return ret;
 
-	e->guc->id = ret;
-	if (xe_engine_is_parallel(e))
-		e->guc->id += GUC_ID_START_MLRC;
+	q->guc->id = ret;
+	if (xe_exec_queue_is_parallel(q))
+		q->guc->id += GUC_ID_START_MLRC;
 
-	ptr = xa_store(&guc->submission_state.engine_lookup,
-		       e->guc->id, e, GFP_NOWAIT);
+	ptr = xa_store(&guc->submission_state.exec_queue_lookup,
+		       q->guc->id, q, GFP_NOWAIT);
 	if (IS_ERR(ptr)) {
 		ret = PTR_ERR(ptr);
 		goto err_release;
@@ -286,29 +286,29 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
 	return 0;
 
 err_release:
-	ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+	ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
 	return ret;
 }
 
-static void release_guc_id(struct xe_guc *guc, struct xe_engine *e)
+static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	mutex_lock(&guc->submission_state.lock);
-	xa_erase(&guc->submission_state.engine_lookup, e->guc->id);
-	if (xe_engine_is_parallel(e))
+	xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id);
+	if (xe_exec_queue_is_parallel(q))
 		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
-				      e->guc->id - GUC_ID_START_MLRC,
-				      order_base_2(e->width));
+				      q->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(q->width));
 	else
-		ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
 	mutex_unlock(&guc->submission_state.lock);
 }
 
-struct engine_policy {
+struct exec_queue_policy {
 	u32 count;
-	struct guc_update_engine_policy h2g;
+	struct guc_update_exec_queue_policy h2g;
 };
 
-static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
+static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
 {
 	size_t bytes = sizeof(policy->h2g.header) +
 		       (sizeof(policy->h2g.klv[0]) * policy->count);
@@ -316,8 +316,8 @@ static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
 	return bytes / sizeof(u32);
 }
 
-static void __guc_engine_policy_start_klv(struct engine_policy *policy,
-					  u16 guc_id)
+static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
+					      u16 guc_id)
 {
 	policy->h2g.header.action =
 		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
@@ -325,8 +325,8 @@ static void __guc_engine_policy_start_klv(struct engine_policy *policy,
 	policy->count = 0;
 }
 
-#define MAKE_ENGINE_POLICY_ADD(func, id) \
-static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
+#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
+static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
 					   u32 data) \
 { \
 	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
@@ -339,45 +339,45 @@ static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
 	policy->count++; \
 }
 
-MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
-MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
-MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
-#undef MAKE_ENGINE_POLICY_ADD
+MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+#undef MAKE_EXEC_QUEUE_POLICY_ADD
 
-static const int xe_engine_prio_to_guc[] = {
-	[XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
-	[XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
-	[XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
-	[XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
+static const int xe_exec_queue_prio_to_guc[] = {
+	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
+	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
+	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
+	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
 };
 
-static void init_policies(struct xe_guc *guc, struct xe_engine *e)
+static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 {
-	struct engine_policy policy;
-	enum xe_engine_priority prio = e->priority;
-	u32 timeslice_us = e->sched_props.timeslice_us;
-	u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
+	struct exec_queue_policy policy;
+	enum xe_exec_queue_priority prio = q->priority;
+	u32 timeslice_us = q->sched_props.timeslice_us;
+	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 
-	XE_WARN_ON(!engine_registered(e));
+	XE_WARN_ON(!exec_queue_registered(q));
 
-	__guc_engine_policy_start_klv(&policy, e->guc->id);
-	__guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
-	__guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
-	__guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
+	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
+	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
+	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
 
 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
-		       __guc_engine_policy_action_size(&policy), 0, 0);
+		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
 }
 
-static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
+static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
 {
-	struct engine_policy policy;
+	struct exec_queue_policy policy;
 
-	__guc_engine_policy_start_klv(&policy, e->guc->id);
-	__guc_engine_policy_add_preemption_timeout(&policy, 1);
+	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
+	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
 
 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
-		       __guc_engine_policy_action_size(&policy), 0, 0);
+		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
 }
 
 #define parallel_read(xe_, map_, field_) \
@@ -388,7 +388,7 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
 			field_, val_)
 
 static void __register_mlrc_engine(struct xe_guc *guc,
-				   struct xe_engine *e,
+				   struct xe_exec_queue *q,
 				   struct guc_ctxt_registration_info *info)
 {
 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
@@ -396,7 +396,7 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 	int len = 0;
 	int i;
 
-	XE_WARN_ON(!xe_engine_is_parallel(e));
+	XE_WARN_ON(!xe_exec_queue_is_parallel(q));
 
 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
 	action[len++] = info->flags;
@@ -408,12 +408,12 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 	action[len++] = info->wq_base_lo;
 	action[len++] = info->wq_base_hi;
 	action[len++] = info->wq_size;
-	action[len++] = e->width;
+	action[len++] = q->width;
 	action[len++] = info->hwlrca_lo;
 	action[len++] = info->hwlrca_hi;
 
-	for (i = 1; i < e->width; ++i) {
-		struct xe_lrc *lrc = e->lrc + i;
+	for (i = 1; i < q->width; ++i) {
+		struct xe_lrc *lrc = q->lrc + i;
 
 		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
@@ -446,24 +446,24 @@ static void __register_engine(struct xe_guc *guc,
 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
 }
 
-static void register_engine(struct xe_engine *e)
+static void register_engine(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_lrc *lrc = e->lrc;
+	struct xe_lrc *lrc = q->lrc;
 	struct guc_ctxt_registration_info info;
 
-	XE_WARN_ON(engine_registered(e));
+	XE_WARN_ON(exec_queue_registered(q));
 
 	memset(&info, 0, sizeof(info));
-	info.context_idx = e->guc->id;
-	info.engine_class = xe_engine_class_to_guc_class(e->class);
-	info.engine_submit_mask = e->logical_mask;
+	info.context_idx = q->guc->id;
+	info.engine_class = xe_engine_class_to_guc_class(q->class);
+	info.engine_submit_mask = q->logical_mask;
 	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
 	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
 	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
 
-	if (xe_engine_is_parallel(e)) {
+	if (xe_exec_queue_is_parallel(q)) {
 		u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
 		struct iosys_map map = xe_lrc_parallel_map(lrc);
 
@@ -477,8 +477,8 @@ static void register_engine(struct xe_engine *e)
 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 		info.wq_size = WQ_SIZE;
 
-		e->guc->wqi_head = 0;
-		e->guc->wqi_tail = 0;
+		q->guc->wqi_head = 0;
+		q->guc->wqi_tail = 0;
 		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
 		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
 	}
@@ -488,38 +488,38 @@ static void register_engine(struct xe_engine *e)
 	 * the GuC as jobs signal immediately and can't destroy an engine if the
 	 * GuC has a reference to it.
 	 */
-	if (xe_engine_is_lr(e))
-		xe_engine_get(e);
+	if (xe_exec_queue_is_lr(q))
+		xe_exec_queue_get(q);
 
-	set_engine_registered(e);
-	trace_xe_engine_register(e);
-	if (xe_engine_is_parallel(e))
-		__register_mlrc_engine(guc, e, &info);
+	set_exec_queue_registered(q);
+	trace_xe_exec_queue_register(q);
+	if (xe_exec_queue_is_parallel(q))
+		__register_mlrc_engine(guc, q, &info);
 	else
 		__register_engine(guc, &info);
-	init_policies(guc, e);
+	init_policies(guc, q);
 }
 
-static u32 wq_space_until_wrap(struct xe_engine *e)
+static u32 wq_space_until_wrap(struct xe_exec_queue *q)
 {
-	return (WQ_SIZE - e->guc->wqi_tail);
+	return (WQ_SIZE - q->guc->wqi_tail);
 }
 
-static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size)
+static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
 	unsigned int sleep_period_ms = 1;
 
 #define AVAILABLE_SPACE \
-	CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE)
+	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
 	if (wqi_size > AVAILABLE_SPACE) {
 try_again:
-		e->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
+		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
 		if (wqi_size > AVAILABLE_SPACE) {
 			if (sleep_period_ms == 1024) {
-				xe_gt_reset_async(e->gt);
+				xe_gt_reset_async(q->gt);
 				return -ENODEV;
 			}
 
@@ -533,52 +533,52 @@ try_again:
 	return 0;
 }
 
-static int wq_noop_append(struct xe_engine *e)
+static int wq_noop_append(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
-	u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1;
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
 
-	if (wq_wait_for_space(e, wq_space_until_wrap(e)))
+	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
 		return -ENODEV;
 
 	XE_WARN_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
 
-	parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
+	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
 		       FIELD_PREP(WQ_LEN_MASK, len_dw));
-	e->guc->wqi_tail = 0;
+	q->guc->wqi_tail = 0;
 
 	return 0;
 }
 
-static void wq_item_append(struct xe_engine *e)
+static void wq_item_append(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
 #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
 	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
-	u32 wqi_size = (e->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
+	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
 	int i = 0, j;
 
-	if (wqi_size > wq_space_until_wrap(e)) {
-		if (wq_noop_append(e))
+	if (wqi_size > wq_space_until_wrap(q)) {
+		if (wq_noop_append(q))
 			return;
 	}
-	if (wq_wait_for_space(e, wqi_size))
+	if (wq_wait_for_space(q, wqi_size))
 		return;
 
 	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
 		FIELD_PREP(WQ_LEN_MASK, len_dw);
-	wqi[i++] = xe_lrc_descriptor(e->lrc);
-	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) |
-		FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64));
+	wqi[i++] = xe_lrc_descriptor(q->lrc);
+	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
+		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
 	wqi[i++] = 0;
-	for (j = 1; j < e->width; ++j) {
-		struct xe_lrc *lrc = e->lrc + j;
+	for (j = 1; j < q->width; ++j) {
+		struct xe_lrc *lrc = q->lrc + j;
 
 		wqi[i++] = lrc->ring.tail / sizeof(u64);
 	}
@@ -586,55 +586,55 @@ static void wq_item_append(struct xe_engine *e)
 	XE_WARN_ON(i != wqi_size / sizeof(u32));
 
 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
-				      wq[e->guc->wqi_tail / sizeof(u32)]));
+				      wq[q->guc->wqi_tail / sizeof(u32)]));
 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
-	e->guc->wqi_tail += wqi_size;
-	XE_WARN_ON(e->guc->wqi_tail > WQ_SIZE);
+	q->guc->wqi_tail += wqi_size;
+	XE_WARN_ON(q->guc->wqi_tail > WQ_SIZE);
 
 	xe_device_wmb(xe);
 
-	map = xe_lrc_parallel_map(e->lrc);
-	parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail);
+	map = xe_lrc_parallel_map(q->lrc);
+	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
 }
 
 #define RESUME_PENDING	~0x0ull
-static void submit_engine(struct xe_engine *e)
+static void submit_exec_queue(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
-	struct xe_lrc *lrc = e->lrc;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_lrc *lrc = q->lrc;
 	u32 action[3];
 	u32 g2h_len = 0;
 	u32 num_g2h = 0;
 	int len = 0;
 	bool extra_submit = false;
 
-	XE_WARN_ON(!engine_registered(e));
+	XE_WARN_ON(!exec_queue_registered(q));
 
-	if (xe_engine_is_parallel(e))
-		wq_item_append(e);
+	if (xe_exec_queue_is_parallel(q))
+		wq_item_append(q);
 	else
 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 
-	if (engine_suspended(e) && !xe_engine_is_parallel(e))
+	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
 		return;
 
-	if (!engine_enabled(e) && !engine_suspended(e)) {
+	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
-		action[len++] = e->guc->id;
+		action[len++] = q->guc->id;
 		action[len++] = GUC_CONTEXT_ENABLE;
 		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
 		num_g2h = 1;
-		if (xe_engine_is_parallel(e))
+		if (xe_exec_queue_is_parallel(q))
 			extra_submit = true;
 
-		e->guc->resume_time = RESUME_PENDING;
-		set_engine_pending_enable(e);
-		set_engine_enabled(e);
-		trace_xe_engine_scheduling_enable(e);
+		q->guc->resume_time = RESUME_PENDING;
+		set_exec_queue_pending_enable(q);
+		set_exec_queue_enabled(q);
+		trace_xe_exec_queue_scheduling_enable(q);
 	} else {
 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
-		action[len++] = e->guc->id;
-		trace_xe_engine_submit(e);
+		action[len++] = q->guc->id;
+		trace_xe_exec_queue_submit(q);
 	}
 
 	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
@@ -642,31 +642,31 @@ static void submit_engine(struct xe_engine *e)
 	if (extra_submit) {
 		len = 0;
 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
-		action[len++] = e->guc->id;
-		trace_xe_engine_submit(e);
+		action[len++] = q->guc->id;
+		trace_xe_exec_queue_submit(q);
 
 		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 	}
 }
 
 static struct dma_fence *
-guc_engine_run_job(struct drm_sched_job *drm_job)
+guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
-	struct xe_engine *e = job->engine;
-	bool lr = xe_engine_is_lr(e);
+	struct xe_exec_queue *q = job->q;
+	bool lr = xe_exec_queue_is_lr(q);
 
-	XE_WARN_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
-		   !engine_banned(e) && !engine_suspended(e));
+	XE_WARN_ON((exec_queue_destroyed(q) || exec_queue_pending_disable(q)) &&
+		   !exec_queue_banned(q) && !exec_queue_suspended(q));
 
 	trace_xe_sched_job_run(job);
 
-	if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
-		if (!engine_registered(e))
-			register_engine(e);
+	if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
+		if (!exec_queue_registered(q))
+			register_engine(q);
 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
-			e->ring_ops->emit_job(job);
-		submit_engine(e);
+			q->ring_ops->emit_job(job);
+		submit_exec_queue(q);
 	}
 
 	if (lr) {
@@ -679,7 +679,7 @@ guc_engine_run_job(struct drm_sched_job *drm_job)
 	}
 }
 
-static void guc_engine_free_job(struct drm_sched_job *drm_job)
+static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 
@@ -692,37 +692,37 @@ static int guc_read_stopped(struct xe_guc *guc)
 	return atomic_read(&guc->submission_state.stopped);
 }
 
-#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable)			\
+#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
 	u32 action[] = {						\
 		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
-		e->guc->id,						\
+		q->guc->id,						\
 		GUC_CONTEXT_##enable_disable,				\
 	}
 
 static void disable_scheduling_deregister(struct xe_guc *guc,
-					  struct xe_engine *e)
+					  struct xe_exec_queue *q)
 {
-	MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
 	int ret;
 
-	set_min_preemption_timeout(guc, e);
+	set_min_preemption_timeout(guc, q);
 	smp_rmb();
-	ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) ||
+	ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
 				 guc_read_stopped(guc), HZ * 5);
 	if (!ret) {
-		struct xe_gpu_scheduler *sched = &e->guc->sched;
+		struct xe_gpu_scheduler *sched = &q->guc->sched;
 
 		XE_WARN_ON("Pending enable failed to respond");
 		xe_sched_submission_start(sched);
-		xe_gt_reset_async(e->gt);
+		xe_gt_reset_async(q->gt);
 		xe_sched_tdr_queue_imm(sched);
 		return;
 	}
 
-	clear_engine_enabled(e);
-	set_engine_pending_disable(e);
-	set_engine_destroyed(e);
-	trace_xe_engine_scheduling_disable(e);
+	clear_exec_queue_enabled(q);
+	set_exec_queue_pending_disable(q);
+	set_exec_queue_destroyed(q);
+	trace_xe_exec_queue_scheduling_disable(q);
 
 	/*
 	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
@@ -733,27 +733,27 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
 }
 
-static void guc_engine_print(struct xe_engine *e, struct drm_printer *p);
+static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p);
 
 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
-static void simple_error_capture(struct xe_engine *e)
+static void simple_error_capture(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct drm_printer p = drm_err_printer("");
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
-	u32 adj_logical_mask = e->logical_mask;
-	u32 width_mask = (0x1 << e->width) - 1;
+	u32 adj_logical_mask = q->logical_mask;
+	u32 width_mask = (0x1 << q->width) - 1;
 	int i;
 	bool cookie;
 
-	if (e->vm && !e->vm->error_capture.capture_once) {
-		e->vm->error_capture.capture_once = true;
+	if (q->vm && !q->vm->error_capture.capture_once) {
+		q->vm->error_capture.capture_once = true;
 		cookie = dma_fence_begin_signalling();
-		for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+		for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
 			if (adj_logical_mask & BIT(i)) {
 				adj_logical_mask |= width_mask << i;
-				i += e->width;
+				i += q->width;
 			} else {
 				++i;
 			}
@@ -761,66 +761,66 @@ static void simple_error_capture(struct xe_engine *e)
 
 		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
 		xe_guc_ct_print(&guc->ct, &p, true);
-		guc_engine_print(e, &p);
+		guc_exec_queue_print(q, &p);
 		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
-			if (hwe->class != e->hwe->class ||
+			if (hwe->class != q->hwe->class ||
 			    !(BIT(hwe->logical_instance) & adj_logical_mask))
 				continue;
 			xe_hw_engine_print(hwe, &p);
 		}
-		xe_analyze_vm(&p, e->vm, e->gt->info.id);
+		xe_analyze_vm(&p, q->vm, q->gt->info.id);
 		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
 		dma_fence_end_signalling(cookie);
 	}
 }
 #else
-static void simple_error_capture(struct xe_engine *e)
+static void simple_error_capture(struct xe_exec_queue *q)
 {
 }
 #endif
 
-static void xe_guc_engine_trigger_cleanup(struct xe_engine *e)
+static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	if (xe_engine_is_lr(e))
-		queue_work(guc_to_gt(guc)->ordered_wq, &e->guc->lr_tdr);
+	if (xe_exec_queue_is_lr(q))
+		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
 	else
-		xe_sched_tdr_queue_imm(&e->guc->sched);
+		xe_sched_tdr_queue_imm(&q->guc->sched);
 }
 
-static void xe_guc_engine_lr_cleanup(struct work_struct *w)
+static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 {
-	struct xe_guc_engine *ge =
-		container_of(w, struct xe_guc_engine, lr_tdr);
-	struct xe_engine *e = ge->engine;
+	struct xe_guc_exec_queue *ge =
+		container_of(w, struct xe_guc_exec_queue, lr_tdr);
+	struct xe_exec_queue *q = ge->q;
 	struct xe_gpu_scheduler *sched = &ge->sched;
 
-	XE_WARN_ON(!xe_engine_is_lr(e));
-	trace_xe_engine_lr_cleanup(e);
+	XE_WARN_ON(!xe_exec_queue_is_lr(q));
+	trace_xe_exec_queue_lr_cleanup(q);
 
 	/* Kill the run_job / process_msg entry points */
 	xe_sched_submission_stop(sched);
 
 	/* Engine state now stable, disable scheduling / deregister if needed */
-	if (engine_registered(e)) {
-		struct xe_guc *guc = engine_to_guc(e);
+	if (exec_queue_registered(q)) {
+		struct xe_guc *guc = exec_queue_to_guc(q);
 		int ret;
 
-		set_engine_banned(e);
-		disable_scheduling_deregister(guc, e);
+		set_exec_queue_banned(q);
+		disable_scheduling_deregister(guc, q);
 
 		/*
 		 * Must wait for scheduling to be disabled before signalling
 		 * any fences, if GT broken the GT reset code should signal us.
 		 */
 		ret = wait_event_timeout(guc->ct.wq,
-					 !engine_pending_disable(e) ||
+					 !exec_queue_pending_disable(q) ||
 					 guc_read_stopped(guc), HZ * 5);
 		if (!ret) {
 			XE_WARN_ON("Schedule disable failed to respond");
 			xe_sched_submission_start(sched);
-			xe_gt_reset_async(e->gt);
+			xe_gt_reset_async(q->gt);
 			return;
 		}
 	}
@@ -829,27 +829,27 @@ static void xe_guc_engine_lr_cleanup(struct work_struct *w)
 }
 
 static enum drm_gpu_sched_stat
-guc_engine_timedout_job(struct drm_sched_job *drm_job)
+guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 	struct xe_sched_job *tmp_job;
-	struct xe_engine *e = job->engine;
-	struct xe_gpu_scheduler *sched = &e->guc->sched;
-	struct xe_device *xe = guc_to_xe(engine_to_guc(e));
+	struct xe_exec_queue *q = job->q;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
 	int err = -ETIME;
 	int i = 0;
 
 	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
-		XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
-		XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e));
+		XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL);
+		XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q));
 
 		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
-			   xe_sched_job_seqno(job), e->guc->id, e->flags);
-		simple_error_capture(e);
-		xe_devcoredump(e);
+			   xe_sched_job_seqno(job), q->guc->id, q->flags);
+		simple_error_capture(q);
+		xe_devcoredump(q);
 	} else {
 		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
-			 xe_sched_job_seqno(job), e->guc->id, e->flags);
+			 xe_sched_job_seqno(job), q->guc->id, q->flags);
 	}
 	trace_xe_sched_job_timedout(job);
 
@@ -860,26 +860,26 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 	 * Kernel jobs should never fail, nor should VM jobs if they do
 	 * somethings has gone wrong and the GT needs a reset
 	 */
-	if (e->flags & ENGINE_FLAG_KERNEL ||
-	    (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) {
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
+	    (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
 		if (!xe_sched_invalidate_job(job, 2)) {
 			xe_sched_add_pending_job(sched, job);
 			xe_sched_submission_start(sched);
-			xe_gt_reset_async(e->gt);
+			xe_gt_reset_async(q->gt);
 			goto out;
 		}
 	}
 
 	/* Engine state now stable, disable scheduling if needed */
-	if (engine_enabled(e)) {
-		struct xe_guc *guc = engine_to_guc(e);
+	if (exec_queue_enabled(q)) {
+		struct xe_guc *guc = exec_queue_to_guc(q);
 		int ret;
 
-		if (engine_reset(e))
+		if (exec_queue_reset(q))
 			err = -EIO;
-		set_engine_banned(e);
-		xe_engine_get(e);
-		disable_scheduling_deregister(guc, e);
+		set_exec_queue_banned(q);
+		xe_exec_queue_get(q);
+		disable_scheduling_deregister(guc, q);
 
 		/*
 		 * Must wait for scheduling to be disabled before signalling
@@ -891,20 +891,20 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 		 */
 		smp_rmb();
 		ret = wait_event_timeout(guc->ct.wq,
-					 !engine_pending_disable(e) ||
+					 !exec_queue_pending_disable(q) ||
 					 guc_read_stopped(guc), HZ * 5);
 		if (!ret) {
 			XE_WARN_ON("Schedule disable failed to respond");
 			xe_sched_add_pending_job(sched, job);
 			xe_sched_submission_start(sched);
-			xe_gt_reset_async(e->gt);
+			xe_gt_reset_async(q->gt);
 			xe_sched_tdr_queue_imm(sched);
 			goto out;
 		}
 	}
 
 	/* Stop fence signaling */
-	xe_hw_fence_irq_stop(e->fence_irq);
+	xe_hw_fence_irq_stop(q->fence_irq);
 
 	/*
 	 * Fence state now stable, stop / start scheduler which cleans up any
@@ -912,7 +912,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 	 */
 	xe_sched_add_pending_job(sched, job);
 	xe_sched_submission_start(sched);
-	xe_guc_engine_trigger_cleanup(e);
+	xe_guc_exec_queue_trigger_cleanup(q);
 
 	/* Mark all outstanding jobs as bad, thus completing them */
 	spin_lock(&sched->base.job_list_lock);
@@ -921,53 +921,53 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job)
 	spin_unlock(&sched->base.job_list_lock);
 
 	/* Start fence signaling */
-	xe_hw_fence_irq_start(e->fence_irq);
+	xe_hw_fence_irq_start(q->fence_irq);
 
 out:
 	return DRM_GPU_SCHED_STAT_NOMINAL;
 }
 
-static void __guc_engine_fini_async(struct work_struct *w)
+static void __guc_exec_queue_fini_async(struct work_struct *w)
 {
-	struct xe_guc_engine *ge =
-		container_of(w, struct xe_guc_engine, fini_async);
-	struct xe_engine *e = ge->engine;
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc_exec_queue *ge =
+		container_of(w, struct xe_guc_exec_queue, fini_async);
+	struct xe_exec_queue *q = ge->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	trace_xe_engine_destroy(e);
+	trace_xe_exec_queue_destroy(q);
 
-	if (xe_engine_is_lr(e))
+	if (xe_exec_queue_is_lr(q))
 		cancel_work_sync(&ge->lr_tdr);
-	if (e->flags & ENGINE_FLAG_PERSISTENT)
-		xe_device_remove_persistent_engines(gt_to_xe(e->gt), e);
-	release_guc_id(guc, e);
+	if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
+		xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q);
+	release_guc_id(guc, q);
 	xe_sched_entity_fini(&ge->entity);
 	xe_sched_fini(&ge->sched);
 
-	if (!(e->flags & ENGINE_FLAG_KERNEL)) {
+	if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
 		kfree(ge);
-		xe_engine_fini(e);
+		xe_exec_queue_fini(q);
 	}
 }
 
-static void guc_engine_fini_async(struct xe_engine *e)
+static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
 {
-	bool kernel = e->flags & ENGINE_FLAG_KERNEL;
+	bool kernel = q->flags & EXEC_QUEUE_FLAG_KERNEL;
 
-	INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
-	queue_work(system_wq, &e->guc->fini_async);
+	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+	queue_work(system_wq, &q->guc->fini_async);
 
 	/* We must block on kernel engines so slabs are empty on driver unload */
 	if (kernel) {
-		struct xe_guc_engine *ge = e->guc;
+		struct xe_guc_exec_queue *ge = q->guc;
 
 		flush_work(&ge->fini_async);
 		kfree(ge);
-		xe_engine_fini(e);
+		xe_exec_queue_fini(q);
 	}
 }
 
-static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
+static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	/*
 	 * Might be done from within the GPU scheduler, need to do async as we
@@ -976,104 +976,104 @@ static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
 	 * this we and don't really care when everything is fini'd, just that it
 	 * is.
 	 */
-	guc_engine_fini_async(e);
+	guc_exec_queue_fini_async(q);
 }
 
-static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
+static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
 {
-	struct xe_engine *e = msg->private_data;
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
-	trace_xe_engine_cleanup_entity(e);
+	XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL);
+	trace_xe_exec_queue_cleanup_entity(q);
 
-	if (engine_registered(e))
-		disable_scheduling_deregister(guc, e);
+	if (exec_queue_registered(q))
+		disable_scheduling_deregister(guc, q);
 	else
-		__guc_engine_fini(guc, e);
+		__guc_exec_queue_fini(guc, q);
 }
 
-static bool guc_engine_allowed_to_change_state(struct xe_engine *e)
+static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
 {
-	return !engine_killed_or_banned(e) && engine_registered(e);
+	return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
 }
 
-static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg)
+static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
 {
-	struct xe_engine *e = msg->private_data;
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	if (guc_engine_allowed_to_change_state(e))
-		init_policies(guc, e);
+	if (guc_exec_queue_allowed_to_change_state(q))
+		init_policies(guc, q);
 	kfree(msg);
 }
 
-static void suspend_fence_signal(struct xe_engine *e)
+static void suspend_fence_signal(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	XE_WARN_ON(!engine_suspended(e) && !engine_killed(e) &&
+	XE_WARN_ON(!exec_queue_suspended(q) && !exec_queue_killed(q) &&
 		   !guc_read_stopped(guc));
-	XE_WARN_ON(!e->guc->suspend_pending);
+	XE_WARN_ON(!q->guc->suspend_pending);
 
-	e->guc->suspend_pending = false;
+	q->guc->suspend_pending = false;
 	smp_wmb();
-	wake_up(&e->guc->suspend_wait);
+	wake_up(&q->guc->suspend_wait);
 }
 
-static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg)
+static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
 {
-	struct xe_engine *e = msg->private_data;
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) &&
-	    engine_enabled(e)) {
-		wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING ||
+	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
+	    exec_queue_enabled(q)) {
+		wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
 			   guc_read_stopped(guc));
 
 		if (!guc_read_stopped(guc)) {
-			MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+			MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
 			s64 since_resume_ms =
 				ktime_ms_delta(ktime_get(),
-					       e->guc->resume_time);
-			s64 wait_ms = e->vm->preempt.min_run_period_ms -
+					       q->guc->resume_time);
+			s64 wait_ms = q->vm->preempt.min_run_period_ms -
 				since_resume_ms;
 
-			if (wait_ms > 0 && e->guc->resume_time)
+			if (wait_ms > 0 && q->guc->resume_time)
 				msleep(wait_ms);
 
-			set_engine_suspended(e);
-			clear_engine_enabled(e);
-			set_engine_pending_disable(e);
-			trace_xe_engine_scheduling_disable(e);
+			set_exec_queue_suspended(q);
+			clear_exec_queue_enabled(q);
+			set_exec_queue_pending_disable(q);
+			trace_xe_exec_queue_scheduling_disable(q);
 
 			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
 				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
 		}
-	} else if (e->guc->suspend_pending) {
-		set_engine_suspended(e);
-		suspend_fence_signal(e);
+	} else if (q->guc->suspend_pending) {
+		set_exec_queue_suspended(q);
+		suspend_fence_signal(q);
 	}
 }
 
-static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
+static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
 {
-	struct xe_engine *e = msg->private_data;
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	if (guc_engine_allowed_to_change_state(e)) {
-		MAKE_SCHED_CONTEXT_ACTION(e, ENABLE);
+	if (guc_exec_queue_allowed_to_change_state(q)) {
+		MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
 
-		e->guc->resume_time = RESUME_PENDING;
-		clear_engine_suspended(e);
-		set_engine_pending_enable(e);
-		set_engine_enabled(e);
-		trace_xe_engine_scheduling_enable(e);
+		q->guc->resume_time = RESUME_PENDING;
+		clear_exec_queue_suspended(q);
+		set_exec_queue_pending_enable(q);
+		set_exec_queue_enabled(q);
+		trace_xe_exec_queue_scheduling_enable(q);
 
 		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
 			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
 	} else {
-		clear_engine_suspended(e);
+		clear_exec_queue_suspended(q);
 	}
 }
 
@@ -1082,22 +1082,22 @@ static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
 #define SUSPEND		3
 #define RESUME		4
 
-static void guc_engine_process_msg(struct xe_sched_msg *msg)
+static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
 {
 	trace_xe_sched_msg_recv(msg);
 
 	switch (msg->opcode) {
 	case CLEANUP:
-		__guc_engine_process_msg_cleanup(msg);
+		__guc_exec_queue_process_msg_cleanup(msg);
 		break;
 	case SET_SCHED_PROPS:
-		__guc_engine_process_msg_set_sched_props(msg);
+		__guc_exec_queue_process_msg_set_sched_props(msg);
 		break;
 	case SUSPEND:
-		__guc_engine_process_msg_suspend(msg);
+		__guc_exec_queue_process_msg_suspend(msg);
 		break;
 	case RESUME:
-		__guc_engine_process_msg_resume(msg);
+		__guc_exec_queue_process_msg_resume(msg);
 		break;
 	default:
 		XE_WARN_ON("Unknown message type");
@@ -1105,20 +1105,20 @@ static void guc_engine_process_msg(struct xe_sched_msg *msg)
 }
 
 static const struct drm_sched_backend_ops drm_sched_ops = {
-	.run_job = guc_engine_run_job,
-	.free_job = guc_engine_free_job,
-	.timedout_job = guc_engine_timedout_job,
+	.run_job = guc_exec_queue_run_job,
+	.free_job = guc_exec_queue_free_job,
+	.timedout_job = guc_exec_queue_timedout_job,
 };
 
 static const struct xe_sched_backend_ops xe_sched_ops = {
-	.process_msg = guc_engine_process_msg,
+	.process_msg = guc_exec_queue_process_msg,
 };
 
-static int guc_engine_init(struct xe_engine *e)
+static int guc_exec_queue_init(struct xe_exec_queue *q)
 {
 	struct xe_gpu_scheduler *sched;
-	struct xe_guc *guc = engine_to_guc(e);
-	struct xe_guc_engine *ge;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_guc_exec_queue *ge;
 	long timeout;
 	int err;
 
@@ -1128,15 +1128,15 @@ static int guc_engine_init(struct xe_engine *e)
 	if (!ge)
 		return -ENOMEM;
 
-	e->guc = ge;
-	ge->engine = e;
+	q->guc = ge;
+	ge->q = q;
 	init_waitqueue_head(&ge->suspend_wait);
 
-	timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
+	timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
-			     e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
 			     64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
-			     e->name, gt_to_xe(e->gt)->drm.dev);
+			     q->name, gt_to_xe(q->gt)->drm.dev);
 	if (err)
 		goto err_free;
 
@@ -1144,45 +1144,45 @@ static int guc_engine_init(struct xe_engine *e)
 	err = xe_sched_entity_init(&ge->entity, sched);
 	if (err)
 		goto err_sched;
-	e->priority = XE_ENGINE_PRIORITY_NORMAL;
+	q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
-	if (xe_engine_is_lr(e))
-		INIT_WORK(&e->guc->lr_tdr, xe_guc_engine_lr_cleanup);
+	if (xe_exec_queue_is_lr(q))
+		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
 
 	mutex_lock(&guc->submission_state.lock);
 
-	err = alloc_guc_id(guc, e);
+	err = alloc_guc_id(guc, q);
 	if (err)
 		goto err_entity;
 
-	e->entity = &ge->entity;
+	q->entity = &ge->entity;
 
 	if (guc_read_stopped(guc))
 		xe_sched_stop(sched);
 
 	mutex_unlock(&guc->submission_state.lock);
 
-	switch (e->class) {
+	switch (q->class) {
 	case XE_ENGINE_CLASS_RENDER:
-		sprintf(e->name, "rcs%d", e->guc->id);
+		sprintf(q->name, "rcs%d", q->guc->id);
 		break;
 	case XE_ENGINE_CLASS_VIDEO_DECODE:
-		sprintf(e->name, "vcs%d", e->guc->id);
+		sprintf(q->name, "vcs%d", q->guc->id);
 		break;
 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-		sprintf(e->name, "vecs%d", e->guc->id);
+		sprintf(q->name, "vecs%d", q->guc->id);
 		break;
 	case XE_ENGINE_CLASS_COPY:
-		sprintf(e->name, "bcs%d", e->guc->id);
+		sprintf(q->name, "bcs%d", q->guc->id);
 		break;
 	case XE_ENGINE_CLASS_COMPUTE:
-		sprintf(e->name, "ccs%d", e->guc->id);
+		sprintf(q->name, "ccs%d", q->guc->id);
 		break;
 	default:
-		XE_WARN_ON(e->class);
+		XE_WARN_ON(q->class);
 	}
 
-	trace_xe_engine_create(e);
+	trace_xe_exec_queue_create(q);
 
 	return 0;
 
@@ -1196,133 +1196,133 @@ err_free:
 	return err;
 }
 
-static void guc_engine_kill(struct xe_engine *e)
+static void guc_exec_queue_kill(struct xe_exec_queue *q)
 {
-	trace_xe_engine_kill(e);
-	set_engine_killed(e);
-	xe_guc_engine_trigger_cleanup(e);
+	trace_xe_exec_queue_kill(q);
+	set_exec_queue_killed(q);
+	xe_guc_exec_queue_trigger_cleanup(q);
 }
 
-static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
-			       u32 opcode)
+static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
+				   u32 opcode)
 {
 	INIT_LIST_HEAD(&msg->link);
 	msg->opcode = opcode;
-	msg->private_data = e;
+	msg->private_data = q;
 
 	trace_xe_sched_msg_add(msg);
-	xe_sched_add_msg(&e->guc->sched, msg);
+	xe_sched_add_msg(&q->guc->sched, msg);
 }
 
 #define STATIC_MSG_CLEANUP	0
 #define STATIC_MSG_SUSPEND	1
 #define STATIC_MSG_RESUME	2
-static void guc_engine_fini(struct xe_engine *e)
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
 {
-	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP;
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
 
-	if (!(e->flags & ENGINE_FLAG_KERNEL))
-		guc_engine_add_msg(e, msg, CLEANUP);
+	if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL))
+		guc_exec_queue_add_msg(q, msg, CLEANUP);
 	else
-		__guc_engine_fini(engine_to_guc(e), e);
+		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
 }
 
-static int guc_engine_set_priority(struct xe_engine *e,
-				   enum xe_engine_priority priority)
+static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
+				       enum xe_exec_queue_priority priority)
 {
 	struct xe_sched_msg *msg;
 
-	if (e->priority == priority || engine_killed_or_banned(e))
+	if (q->priority == priority || exec_queue_killed_or_banned(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
-	e->priority = priority;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+	q->priority = priority;
 
 	return 0;
 }
 
-static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
 {
 	struct xe_sched_msg *msg;
 
-	if (e->sched_props.timeslice_us == timeslice_us ||
-	    engine_killed_or_banned(e))
+	if (q->sched_props.timeslice_us == timeslice_us ||
+	    exec_queue_killed_or_banned(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	e->sched_props.timeslice_us = timeslice_us;
-	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+	q->sched_props.timeslice_us = timeslice_us;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
 
 	return 0;
 }
 
-static int guc_engine_set_preempt_timeout(struct xe_engine *e,
-					  u32 preempt_timeout_us)
+static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
+					      u32 preempt_timeout_us)
 {
 	struct xe_sched_msg *msg;
 
-	if (e->sched_props.preempt_timeout_us == preempt_timeout_us ||
-	    engine_killed_or_banned(e))
+	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
+	    exec_queue_killed_or_banned(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	e->sched_props.preempt_timeout_us = preempt_timeout_us;
-	guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+	q->sched_props.preempt_timeout_us = preempt_timeout_us;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
 
 	return 0;
 }
 
-static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
+static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
 {
-	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
 
-	XE_WARN_ON(engine_registered(e));
-	XE_WARN_ON(engine_banned(e));
-	XE_WARN_ON(engine_killed(e));
+	XE_WARN_ON(exec_queue_registered(q));
+	XE_WARN_ON(exec_queue_banned(q));
+	XE_WARN_ON(exec_queue_killed(q));
 
 	sched->base.timeout = job_timeout_ms;
 
 	return 0;
 }
 
-static int guc_engine_suspend(struct xe_engine *e)
+static int guc_exec_queue_suspend(struct xe_exec_queue *q)
 {
-	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND;
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
 
-	if (engine_killed_or_banned(e) || e->guc->suspend_pending)
+	if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
 		return -EINVAL;
 
-	e->guc->suspend_pending = true;
-	guc_engine_add_msg(e, msg, SUSPEND);
+	q->guc->suspend_pending = true;
+	guc_exec_queue_add_msg(q, msg, SUSPEND);
 
 	return 0;
 }
 
-static void guc_engine_suspend_wait(struct xe_engine *e)
+static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	wait_event(e->guc->suspend_wait, !e->guc->suspend_pending ||
+	wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
 		   guc_read_stopped(guc));
 }
 
-static void guc_engine_resume(struct xe_engine *e)
+static void guc_exec_queue_resume(struct xe_exec_queue *q)
 {
-	struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
 
-	XE_WARN_ON(e->guc->suspend_pending);
+	XE_WARN_ON(q->guc->suspend_pending);
 
-	guc_engine_add_msg(e, msg, RESUME);
+	guc_exec_queue_add_msg(q, msg, RESUME);
 }
 
 /*
@@ -1331,49 +1331,49 @@ static void guc_engine_resume(struct xe_engine *e)
  * really shouldn't do much other than trap into the DRM scheduler which
  * synchronizes these operations.
  */
-static const struct xe_engine_ops guc_engine_ops = {
-	.init = guc_engine_init,
-	.kill = guc_engine_kill,
-	.fini = guc_engine_fini,
-	.set_priority = guc_engine_set_priority,
-	.set_timeslice = guc_engine_set_timeslice,
-	.set_preempt_timeout = guc_engine_set_preempt_timeout,
-	.set_job_timeout = guc_engine_set_job_timeout,
-	.suspend = guc_engine_suspend,
-	.suspend_wait = guc_engine_suspend_wait,
-	.resume = guc_engine_resume,
+static const struct xe_exec_queue_ops guc_exec_queue_ops = {
+	.init = guc_exec_queue_init,
+	.kill = guc_exec_queue_kill,
+	.fini = guc_exec_queue_fini,
+	.set_priority = guc_exec_queue_set_priority,
+	.set_timeslice = guc_exec_queue_set_timeslice,
+	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
+	.set_job_timeout = guc_exec_queue_set_job_timeout,
+	.suspend = guc_exec_queue_suspend,
+	.suspend_wait = guc_exec_queue_suspend_wait,
+	.resume = guc_exec_queue_resume,
 };
 
-static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
+static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 {
-	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
 
 	/* Stop scheduling + flush any DRM scheduler operations */
 	xe_sched_submission_stop(sched);
 
 	/* Clean up lost G2H + reset engine state */
-	if (engine_registered(e)) {
-		if ((engine_banned(e) && engine_destroyed(e)) ||
-		    xe_engine_is_lr(e))
-			xe_engine_put(e);
-		else if (engine_destroyed(e))
-			__guc_engine_fini(guc, e);
+	if (exec_queue_registered(q)) {
+		if ((exec_queue_banned(q) && exec_queue_destroyed(q)) ||
+		    xe_exec_queue_is_lr(q))
+			xe_exec_queue_put(q);
+		else if (exec_queue_destroyed(q))
+			__guc_exec_queue_fini(guc, q);
 	}
-	if (e->guc->suspend_pending) {
-		set_engine_suspended(e);
-		suspend_fence_signal(e);
+	if (q->guc->suspend_pending) {
+		set_exec_queue_suspended(q);
+		suspend_fence_signal(q);
 	}
-	atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
-		   &e->guc->state);
-	e->guc->resume_time = 0;
-	trace_xe_engine_stop(e);
+	atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+		   &q->guc->state);
+	q->guc->resume_time = 0;
+	trace_xe_exec_queue_stop(q);
 
 	/*
 	 * Ban any engine (aside from kernel and engines used for VM ops) with a
 	 * started but not complete job or if a job has gone through a GT reset
 	 * more than twice.
 	 */
-	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
 
 		if (job) {
@@ -1381,8 +1381,8 @@ static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
 			    !xe_sched_job_completed(job)) ||
 			    xe_sched_invalidate_job(job, 2)) {
 				trace_xe_sched_job_ban(job);
-				xe_sched_tdr_queue_imm(&e->guc->sched);
-				set_engine_banned(e);
+				xe_sched_tdr_queue_imm(&q->guc->sched);
+				set_exec_queue_banned(q);
 			}
 		}
 	}
@@ -1413,15 +1413,15 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc)
 
 int xe_guc_submit_stop(struct xe_guc *guc)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	unsigned long index;
 
 	XE_WARN_ON(guc_read_stopped(guc) != 1);
 
 	mutex_lock(&guc->submission_state.lock);
 
-	xa_for_each(&guc->submission_state.engine_lookup, index, e)
-		guc_engine_stop(guc, e);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_stop(guc, q);
 
 	mutex_unlock(&guc->submission_state.lock);
 
@@ -1433,16 +1433,16 @@ int xe_guc_submit_stop(struct xe_guc *guc)
 	return 0;
 }
 
-static void guc_engine_start(struct xe_engine *e)
+static void guc_exec_queue_start(struct xe_exec_queue *q)
 {
-	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
 
-	if (!engine_killed_or_banned(e)) {
+	if (!exec_queue_killed_or_banned(q)) {
 		int i;
 
-		trace_xe_engine_resubmit(e);
-		for (i = 0; i < e->width; ++i)
-			xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail);
+		trace_xe_exec_queue_resubmit(q);
+		for (i = 0; i < q->width; ++i)
+			xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
 		xe_sched_resubmit_jobs(sched);
 	}
 
@@ -1451,15 +1451,15 @@ static void guc_engine_start(struct xe_engine *e)
 
 int xe_guc_submit_start(struct xe_guc *guc)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	unsigned long index;
 
 	XE_WARN_ON(guc_read_stopped(guc) != 1);
 
 	mutex_lock(&guc->submission_state.lock);
 	atomic_dec(&guc->submission_state.stopped);
-	xa_for_each(&guc->submission_state.engine_lookup, index, e)
-		guc_engine_start(e);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_start(q);
 	mutex_unlock(&guc->submission_state.lock);
 
 	wake_up_all(&guc->ct.wq);
@@ -1467,36 +1467,36 @@ int xe_guc_submit_start(struct xe_guc *guc)
 	return 0;
 }
 
-static struct xe_engine *
-g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
+static struct xe_exec_queue *
+g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
 {
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
 	if (unlikely(guc_id >= GUC_ID_MAX)) {
 		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
 		return NULL;
 	}
 
-	e = xa_load(&guc->submission_state.engine_lookup, guc_id);
-	if (unlikely(!e)) {
+	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
+	if (unlikely(!q)) {
 		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
 		return NULL;
 	}
 
-	XE_WARN_ON(e->guc->id != guc_id);
+	XE_WARN_ON(q->guc->id != guc_id);
 
-	return e;
+	return q;
 }
 
-static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
+static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	u32 action[] = {
 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
-		e->guc->id,
+		q->guc->id,
 	};
 
-	trace_xe_engine_deregister(e);
+	trace_xe_exec_queue_deregister(q);
 
 	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
 }
@@ -1504,7 +1504,7 @@ static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
 
 	if (unlikely(len < 2)) {
@@ -1512,34 +1512,34 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		return -EPROTO;
 	}
 
-	e = g2h_engine_lookup(guc, guc_id);
-	if (unlikely(!e))
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
 		return -EPROTO;
 
-	if (unlikely(!engine_pending_enable(e) &&
-		     !engine_pending_disable(e))) {
+	if (unlikely(!exec_queue_pending_enable(q) &&
+		     !exec_queue_pending_disable(q))) {
 		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
-			atomic_read(&e->guc->state));
+			atomic_read(&q->guc->state));
 		return -EPROTO;
 	}
 
-	trace_xe_engine_scheduling_done(e);
+	trace_xe_exec_queue_scheduling_done(q);
 
-	if (engine_pending_enable(e)) {
-		e->guc->resume_time = ktime_get();
-		clear_engine_pending_enable(e);
+	if (exec_queue_pending_enable(q)) {
+		q->guc->resume_time = ktime_get();
+		clear_exec_queue_pending_enable(q);
 		smp_wmb();
 		wake_up_all(&guc->ct.wq);
 	} else {
-		clear_engine_pending_disable(e);
-		if (e->guc->suspend_pending) {
-			suspend_fence_signal(e);
+		clear_exec_queue_pending_disable(q);
+		if (q->guc->suspend_pending) {
+			suspend_fence_signal(q);
 		} else {
-			if (engine_banned(e)) {
+			if (exec_queue_banned(q)) {
 				smp_wmb();
 				wake_up_all(&guc->ct.wq);
 			}
-			deregister_engine(guc, e);
+			deregister_exec_queue(guc, q);
 		}
 	}
 
@@ -1549,7 +1549,7 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
 
 	if (unlikely(len < 1)) {
@@ -1557,33 +1557,33 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		return -EPROTO;
 	}
 
-	e = g2h_engine_lookup(guc, guc_id);
-	if (unlikely(!e))
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
 		return -EPROTO;
 
-	if (!engine_destroyed(e) || engine_pending_disable(e) ||
-	    engine_pending_enable(e) || engine_enabled(e)) {
+	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
+	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
 		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
-			atomic_read(&e->guc->state));
+			atomic_read(&q->guc->state));
 		return -EPROTO;
 	}
 
-	trace_xe_engine_deregister_done(e);
+	trace_xe_exec_queue_deregister_done(q);
 
-	clear_engine_registered(e);
+	clear_exec_queue_registered(q);
 
-	if (engine_banned(e) || xe_engine_is_lr(e))
-		xe_engine_put(e);
+	if (exec_queue_banned(q) || xe_exec_queue_is_lr(q))
+		xe_exec_queue_put(q);
 	else
-		__guc_engine_fini(guc, e);
+		__guc_exec_queue_fini(guc, q);
 
 	return 0;
 }
 
-int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
+int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
 
 	if (unlikely(len < 1)) {
@@ -1591,34 +1591,34 @@ int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		return -EPROTO;
 	}
 
-	e = g2h_engine_lookup(guc, guc_id);
-	if (unlikely(!e))
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
 		return -EPROTO;
 
 	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
 
 	/* FIXME: Do error capture, most likely async */
 
-	trace_xe_engine_reset(e);
+	trace_xe_exec_queue_reset(q);
 
 	/*
 	 * A banned engine is a NOP at this point (came from
-	 * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel
+	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
 	 * jobs by setting timeout of the job to the minimum value kicking
-	 * guc_engine_timedout_job.
+	 * guc_exec_queue_timedout_job.
 	 */
-	set_engine_reset(e);
-	if (!engine_banned(e))
-		xe_guc_engine_trigger_cleanup(e);
+	set_exec_queue_reset(q);
+	if (!exec_queue_banned(q))
+		xe_guc_exec_queue_trigger_cleanup(q);
 
 	return 0;
 }
 
-int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
-					   u32 len)
+int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					       u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
 
 	if (unlikely(len < 1)) {
@@ -1626,22 +1626,22 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 		return -EPROTO;
 	}
 
-	e = g2h_engine_lookup(guc, guc_id);
-	if (unlikely(!e))
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
 		return -EPROTO;
 
 	drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
-	trace_xe_engine_memory_cat_error(e);
+	trace_xe_exec_queue_memory_cat_error(q);
 
 	/* Treat the same as engine reset */
-	set_engine_reset(e);
-	if (!engine_banned(e))
-		xe_guc_engine_trigger_cleanup(e);
+	set_exec_queue_reset(q);
+	if (!exec_queue_banned(q))
+		xe_guc_exec_queue_trigger_cleanup(q);
 
 	return 0;
 }
 
-int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
+int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	u8 guc_class, instance;
@@ -1666,16 +1666,16 @@ int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
 }
 
 static void
-guc_engine_wq_snapshot_capture(struct xe_engine *e,
-			       struct xe_guc_submit_engine_snapshot *snapshot)
+guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
+				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
 	int i;
 
-	snapshot->guc.wqi_head = e->guc->wqi_head;
-	snapshot->guc.wqi_tail = e->guc->wqi_tail;
+	snapshot->guc.wqi_head = q->guc->wqi_head;
+	snapshot->guc.wqi_tail = q->guc->wqi_tail;
 	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
 	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
 	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
@@ -1692,8 +1692,8 @@ guc_engine_wq_snapshot_capture(struct xe_engine *e,
 }
 
 static void
-guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
-			     struct drm_printer *p)
+guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p)
 {
 	int i;
 
@@ -1714,23 +1714,23 @@ guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
 }
 
 /**
- * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine.
- * @e: Xe Engine.
+ * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
+ * @q: Xe exec queue.
  *
  * This can be printed out in a later stage like during dev_coredump
  * analysis.
  *
  * Returns: a GuC Submit Engine snapshot object that must be freed by the
- * caller, using `xe_guc_engine_snapshot_free`.
+ * caller, using `xe_guc_exec_queue_snapshot_free`.
  */
-struct xe_guc_submit_engine_snapshot *
-xe_guc_engine_snapshot_capture(struct xe_engine *e)
+struct xe_guc_submit_exec_queue_snapshot *
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = engine_to_guc(e);
+	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_gpu_scheduler *sched = &e->guc->sched;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
 	struct xe_sched_job *job;
-	struct xe_guc_submit_engine_snapshot *snapshot;
+	struct xe_guc_submit_exec_queue_snapshot *snapshot;
 	int i;
 
 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
@@ -1740,25 +1740,25 @@ xe_guc_engine_snapshot_capture(struct xe_engine *e)
 		return NULL;
 	}
 
-	snapshot->guc.id = e->guc->id;
-	memcpy(&snapshot->name, &e->name, sizeof(snapshot->name));
-	snapshot->class = e->class;
-	snapshot->logical_mask = e->logical_mask;
-	snapshot->width = e->width;
-	snapshot->refcount = kref_read(&e->refcount);
+	snapshot->guc.id = q->guc->id;
+	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
+	snapshot->class = q->class;
+	snapshot->logical_mask = q->logical_mask;
+	snapshot->width = q->width;
+	snapshot->refcount = kref_read(&q->refcount);
 	snapshot->sched_timeout = sched->base.timeout;
-	snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us;
+	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
 	snapshot->sched_props.preempt_timeout_us =
-		e->sched_props.preempt_timeout_us;
+		q->sched_props.preempt_timeout_us;
 
-	snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot),
+	snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
 				      GFP_ATOMIC);
 
 	if (!snapshot->lrc) {
 		drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
 	} else {
-		for (i = 0; i < e->width; ++i) {
-			struct xe_lrc *lrc = e->lrc + i;
+		for (i = 0; i < q->width; ++i) {
+			struct xe_lrc *lrc = q->lrc + i;
 
 			snapshot->lrc[i].context_desc =
 				lower_32_bits(xe_lrc_ggtt_addr(lrc));
@@ -1771,12 +1771,12 @@ xe_guc_engine_snapshot_capture(struct xe_engine *e)
 		}
 	}
 
-	snapshot->schedule_state = atomic_read(&e->guc->state);
-	snapshot->engine_flags = e->flags;
+	snapshot->schedule_state = atomic_read(&q->guc->state);
+	snapshot->exec_queue_flags = q->flags;
 
-	snapshot->parallel_execution = xe_engine_is_parallel(e);
+	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
 	if (snapshot->parallel_execution)
-		guc_engine_wq_snapshot_capture(e, snapshot);
+		guc_exec_queue_wq_snapshot_capture(q, snapshot);
 
 	spin_lock(&sched->base.job_list_lock);
 	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
@@ -1806,15 +1806,15 @@ xe_guc_engine_snapshot_capture(struct xe_engine *e)
 }
 
 /**
- * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot.
+ * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
  * @snapshot: GuC Submit Engine snapshot object.
  * @p: drm_printer where it will be printed out.
  *
  * This function prints out a given GuC Submit Engine snapshot object.
  */
 void
-xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
-			     struct drm_printer *p)
+xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p)
 {
 	int i;
 
@@ -1846,10 +1846,10 @@ xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
 		drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
 	}
 	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
-	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags);
+	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
 
 	if (snapshot->parallel_execution)
-		guc_engine_wq_snapshot_print(snapshot, p);
+		guc_exec_queue_wq_snapshot_print(snapshot, p);
 
 	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
 	     i++)
@@ -1860,14 +1860,14 @@ xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
 }
 
 /**
- * xe_guc_engine_snapshot_free - Free all allocated objects for a given
+ * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
  * snapshot.
  * @snapshot: GuC Submit Engine snapshot object.
  *
  * This function free all the memory that needed to be allocated at capture
  * time.
  */
-void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot)
+void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
 {
 	if (!snapshot)
 		return;
@@ -1877,13 +1877,13 @@ void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot)
 	kfree(snapshot);
 }
 
-static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
+static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
 {
-	struct xe_guc_submit_engine_snapshot *snapshot;
+	struct xe_guc_submit_exec_queue_snapshot *snapshot;
 
-	snapshot = xe_guc_engine_snapshot_capture(e);
-	xe_guc_engine_snapshot_print(snapshot, p);
-	xe_guc_engine_snapshot_free(snapshot);
+	snapshot = xe_guc_exec_queue_snapshot_capture(q);
+	xe_guc_exec_queue_snapshot_print(snapshot, p);
+	xe_guc_exec_queue_snapshot_free(snapshot);
 }
 
 /**
@@ -1895,14 +1895,14 @@ static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
  */
 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	unsigned long index;
 
 	if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
 		return;
 
 	mutex_lock(&guc->submission_state.lock);
-	xa_for_each(&guc->submission_state.engine_lookup, index, e)
-		guc_engine_print(e, p);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_print(q, p);
 	mutex_unlock(&guc->submission_state.lock);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 4153c2d220130..fc97869c5b865 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 
 struct drm_printer;
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_guc;
 
 int xe_guc_submit_init(struct xe_guc *guc);
@@ -21,18 +21,18 @@ int xe_guc_submit_start(struct xe_guc *guc);
 
 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
-int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
-int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
-					   u32 len);
-int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					       u32 len);
+int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
-struct xe_guc_submit_engine_snapshot *
-xe_guc_engine_snapshot_capture(struct xe_engine *e);
+struct xe_guc_submit_exec_queue_snapshot *
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
 void
-xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
-			     struct drm_printer *p);
+xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p);
 void
-xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot);
+xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index 6765b2c6eab15..649b0a8526921 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -79,20 +79,20 @@ struct pending_list_snapshot {
 };
 
 /**
- * struct xe_guc_submit_engine_snapshot - Snapshot for devcoredump
+ * struct xe_guc_submit_exec_queue_snapshot - Snapshot for devcoredump
  */
-struct xe_guc_submit_engine_snapshot {
-	/** @name: name of this engine */
+struct xe_guc_submit_exec_queue_snapshot {
+	/** @name: name of this exec queue */
 	char name[MAX_FENCE_NAME_LEN];
-	/** @class: class of this engine */
+	/** @class: class of this exec queue */
 	enum xe_engine_class class;
 	/**
-	 * @logical_mask: logical mask of where job submitted to engine can run
+	 * @logical_mask: logical mask of where job submitted to exec queue can run
 	 */
 	u32 logical_mask;
-	/** @width: width (number BB submitted per exec) of this engine */
+	/** @width: width (number BB submitted per exec) of this exec queue */
 	u16 width;
-	/** @refcount: ref count of this engine */
+	/** @refcount: ref count of this exec queue */
 	u32 refcount;
 	/**
 	 * @sched_timeout: the time after which a job is removed from the
@@ -113,8 +113,8 @@ struct xe_guc_submit_engine_snapshot {
 
 	/** @schedule_state: Schedule State at the moment of Crash */
 	u32 schedule_state;
-	/** @engine_flags: Flags of the faulty engine */
-	unsigned long engine_flags;
+	/** @exec_queue_flags: Flags of the faulty exec_queue */
+	unsigned long exec_queue_flags;
 
 	/** @guc: GuC Engine Snapshot */
 	struct {
@@ -122,7 +122,7 @@ struct xe_guc_submit_engine_snapshot {
 		u32 wqi_head;
 		/** @wqi_tail: work queue item tail */
 		u32 wqi_tail;
-		/** @id: GuC id for this xe_engine */
+		/** @id: GuC id for this exec_queue */
 		u16 id;
 	} guc;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index a304dce4e9f4d..a5e58917a4994 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -33,8 +33,8 @@ struct xe_guc {
 	struct xe_guc_pc pc;
 	/** @submission_state: GuC submission state */
 	struct {
-		/** @engine_lookup: Lookup an xe_engine from guc_id */
-		struct xarray engine_lookup;
+		/** @exec_queue_lookup: Lookup an xe_engine from guc_id */
+		struct xarray exec_queue_lookup;
 		/** @guc_ids: used to allocate new guc_ids, single-lrc */
 		struct ida guc_ids;
 		/** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 05f3d8d683793..09db8da261a39 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -12,7 +12,7 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_engine_types.h"
+#include "xe_exec_queue_types.h"
 #include "xe_gt.h"
 #include "xe_hw_fence.h"
 #include "xe_map.h"
@@ -604,7 +604,7 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
 #define ACC_NOTIFY_S            16
 
 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
-		struct xe_engine *e, struct xe_vm *vm, u32 ring_size)
+		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
 {
 	struct xe_gt *gt = hwe->gt;
 	struct xe_tile *tile = gt_to_tile(gt);
@@ -669,12 +669,12 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
 	if (xe->info.has_asid && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
-				     (e->usm.acc_granularity <<
+				     (q->usm.acc_granularity <<
 				      ACC_GRANULARITY_S) | vm->usm.asid);
 	if (xe->info.supports_usm && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
-				     (e->usm.acc_notify << ACC_NOTIFY_S) |
-				     e->usm.acc_trigger);
+				     (q->usm.acc_notify << ACC_NOTIFY_S) |
+				     q->usm.acc_trigger);
 
 	lrc->desc = GEN8_CTX_VALID;
 	lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index e37f89e75ef80..3a6e8fc5a8370 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -8,7 +8,7 @@
 #include "xe_lrc_types.h"
 
 struct xe_device;
-struct xe_engine;
+struct xe_exec_queue;
 enum xe_engine_class;
 struct xe_hw_engine;
 struct xe_vm;
@@ -16,7 +16,7 @@ struct xe_vm;
 #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
 
 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
-		struct xe_engine *e, struct xe_vm *vm, u32 ring_size);
+		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
 void xe_lrc_finish(struct xe_lrc *lrc);
 
 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 60f7226c92ff3..d0816d2090f07 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -34,8 +34,8 @@
  * struct xe_migrate - migrate context.
  */
 struct xe_migrate {
-	/** @eng: Default engine used for migration */
-	struct xe_engine *eng;
+	/** @q: Default exec queue used for migration */
+	struct xe_exec_queue *q;
 	/** @tile: Backpointer to the tile this struct xe_migrate belongs to. */
 	struct xe_tile *tile;
 	/** @job_mutex: Timeline mutex for @eng. */
@@ -78,9 +78,9 @@ struct xe_migrate {
  *
  * Return: The default migrate engine
  */
-struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile)
+struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile)
 {
-	return tile->migrate->eng;
+	return tile->migrate->q;
 }
 
 static void xe_migrate_fini(struct drm_device *dev, void *arg)
@@ -88,11 +88,11 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg)
 	struct xe_migrate *m = arg;
 	struct ww_acquire_ctx ww;
 
-	xe_vm_lock(m->eng->vm, &ww, 0, false);
+	xe_vm_lock(m->q->vm, &ww, 0, false);
 	xe_bo_unpin(m->pt_bo);
 	if (m->cleared_bo)
 		xe_bo_unpin(m->cleared_bo);
-	xe_vm_unlock(m->eng->vm, &ww);
+	xe_vm_unlock(m->q->vm, &ww);
 
 	dma_fence_put(m->fence);
 	if (m->cleared_bo)
@@ -100,8 +100,8 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg)
 	xe_bo_put(m->pt_bo);
 	drm_suballoc_manager_fini(&m->vm_update_sa);
 	mutex_destroy(&m->job_mutex);
-	xe_vm_close_and_put(m->eng->vm);
-	xe_engine_put(m->eng);
+	xe_vm_close_and_put(m->q->vm);
+	xe_exec_queue_put(m->q);
 }
 
 static u64 xe_migrate_vm_addr(u64 slot, u32 level)
@@ -341,20 +341,20 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		if (!hwe)
 			return ERR_PTR(-EINVAL);
 
-		m->eng = xe_engine_create(xe, vm,
-					  BIT(hwe->logical_instance), 1,
-					  hwe, ENGINE_FLAG_KERNEL);
+		m->q = xe_exec_queue_create(xe, vm,
+					    BIT(hwe->logical_instance), 1,
+					    hwe, EXEC_QUEUE_FLAG_KERNEL);
 	} else {
-		m->eng = xe_engine_create_class(xe, primary_gt, vm,
-						XE_ENGINE_CLASS_COPY,
-						ENGINE_FLAG_KERNEL);
+		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
+						  XE_ENGINE_CLASS_COPY,
+						  EXEC_QUEUE_FLAG_KERNEL);
 	}
-	if (IS_ERR(m->eng)) {
+	if (IS_ERR(m->q)) {
 		xe_vm_close_and_put(vm);
-		return ERR_CAST(m->eng);
+		return ERR_CAST(m->q);
 	}
 	if (xe->info.supports_usm)
-		m->eng->priority = XE_ENGINE_PRIORITY_KERNEL;
+		m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
 
 	mutex_init(&m->job_mutex);
 
@@ -456,7 +456,7 @@ static void emit_pte(struct xe_migrate *m,
 			addr = xe_res_dma(cur) & PAGE_MASK;
 			if (is_vram) {
 				/* Is this a 64K PTE entry? */
-				if ((m->eng->vm->flags & XE_VM_FLAG_64K) &&
+				if ((m->q->vm->flags & XE_VM_FLAG_64K) &&
 				    !(cur_ofs & (16 * 8 - 1))) {
 					XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
 					addr |= XE_PTE_PS64;
@@ -714,7 +714,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 						  src_L0, ccs_ofs, copy_ccs);
 
 		mutex_lock(&m->job_mutex);
-		job = xe_bb_create_migration_job(m->eng, bb,
+		job = xe_bb_create_migration_job(m->q, bb,
 						 xe_migrate_batch_base(m, usm),
 						 update_idx);
 		if (IS_ERR(job)) {
@@ -938,7 +938,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		}
 
 		mutex_lock(&m->job_mutex);
-		job = xe_bb_create_migration_job(m->eng, bb,
+		job = xe_bb_create_migration_job(m->q, bb,
 						 xe_migrate_batch_base(m, usm),
 						 update_idx);
 		if (IS_ERR(job)) {
@@ -1024,7 +1024,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 
 struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
 {
-	return xe_vm_get(m->eng->vm);
+	return xe_vm_get(m->q->vm);
 }
 
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
@@ -1106,7 +1106,7 @@ static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
  * @m: The migrate context.
  * @vm: The vm we'll be updating.
  * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr.
- * @eng: The engine to be used for the update or NULL if the default
+ * @q: The exec queue to be used for the update or NULL if the default
  * migration engine is to be used.
  * @updates: An array of update descriptors.
  * @num_updates: Number of descriptors in @updates.
@@ -1132,7 +1132,7 @@ struct dma_fence *
 xe_migrate_update_pgtables(struct xe_migrate *m,
 			   struct xe_vm *vm,
 			   struct xe_bo *bo,
-			   struct xe_engine *eng,
+			   struct xe_exec_queue *q,
 			   const struct xe_vm_pgtable_update *updates,
 			   u32 num_updates,
 			   struct xe_sync_entry *syncs, u32 num_syncs,
@@ -1150,13 +1150,13 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
 	u64 addr;
 	int err = 0;
-	bool usm = !eng && xe->info.supports_usm;
+	bool usm = !q && xe->info.supports_usm;
 	bool first_munmap_rebind = vma &&
 		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
-	struct xe_engine *eng_override = !eng ? m->eng : eng;
+	struct xe_exec_queue *q_override = !q ? m->q : q;
 
 	/* Use the CPU if no in syncs and engine is idle */
-	if (no_in_syncs(syncs, num_syncs) && xe_engine_is_idle(eng_override)) {
+	if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
 		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
 							num_updates,
 							first_munmap_rebind,
@@ -1186,14 +1186,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 */
 	XE_WARN_ON(batch_size >= SZ_128K);
 
-	bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
+	bb = xe_bb_new(gt, batch_size, !q && xe->info.supports_usm);
 	if (IS_ERR(bb))
 		return ERR_CAST(bb);
 
 	/* For sysmem PTE's, need to map them in our hole.. */
 	if (!IS_DGFX(xe)) {
 		ppgtt_ofs = NUM_KERNEL_PDE - 1;
-		if (eng) {
+		if (q) {
 			XE_WARN_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
 
 			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
@@ -1249,10 +1249,10 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 			write_pgtable(tile, bb, 0, &updates[i], pt_update);
 	}
 
-	if (!eng)
+	if (!q)
 		mutex_lock(&m->job_mutex);
 
-	job = xe_bb_create_migration_job(eng ?: m->eng, bb,
+	job = xe_bb_create_migration_job(q ?: m->q, bb,
 					 xe_migrate_batch_base(m, usm),
 					 update_idx);
 	if (IS_ERR(job)) {
@@ -1295,7 +1295,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	fence = dma_fence_get(&job->drm.s_fence->finished);
 	xe_sched_job_push(job);
 
-	if (!eng)
+	if (!q)
 		mutex_unlock(&m->job_mutex);
 
 	xe_bb_free(bb, fence);
@@ -1306,7 +1306,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 err_job:
 	xe_sched_job_put(job);
 err_bb:
-	if (!eng)
+	if (!q)
 		mutex_unlock(&m->job_mutex);
 	xe_bb_free(bb, NULL);
 err:
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 0d62aff6421c5..c729241776adf 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -14,7 +14,7 @@ struct ttm_resource;
 
 struct xe_bo;
 struct xe_gt;
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_migrate;
 struct xe_migrate_pt_update;
 struct xe_sync_entry;
@@ -97,7 +97,7 @@ struct dma_fence *
 xe_migrate_update_pgtables(struct xe_migrate *m,
 			   struct xe_vm *vm,
 			   struct xe_bo *bo,
-			   struct xe_engine *eng,
+			   struct xe_exec_queue *q,
 			   const struct xe_vm_pgtable_update *updates,
 			   u32 num_updates,
 			   struct xe_sync_entry *syncs, u32 num_syncs,
@@ -105,5 +105,5 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 void xe_migrate_wait(struct xe_migrate *m);
 
-struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile);
+struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index 25f7b35a76daf..d0f1ec4b0336a 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -8,7 +8,7 @@
 
 #include <linux/types.h>
 
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_gt;
 
 void xe_mocs_init_early(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index e86604e0174da..7bce2a332603c 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -15,19 +15,19 @@ static void preempt_fence_work_func(struct work_struct *w)
 	bool cookie = dma_fence_begin_signalling();
 	struct xe_preempt_fence *pfence =
 		container_of(w, typeof(*pfence), preempt_work);
-	struct xe_engine *e = pfence->engine;
+	struct xe_exec_queue *q = pfence->q;
 
 	if (pfence->error)
 		dma_fence_set_error(&pfence->base, pfence->error);
 	else
-		e->ops->suspend_wait(e);
+		q->ops->suspend_wait(q);
 
 	dma_fence_signal(&pfence->base);
 	dma_fence_end_signalling(cookie);
 
-	xe_vm_queue_rebind_worker(e->vm);
+	xe_vm_queue_rebind_worker(q->vm);
 
-	xe_engine_put(e);
+	xe_exec_queue_put(q);
 }
 
 static const char *
@@ -46,9 +46,9 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
 {
 	struct xe_preempt_fence *pfence =
 		container_of(fence, typeof(*pfence), base);
-	struct xe_engine *e = pfence->engine;
+	struct xe_exec_queue *q = pfence->q;
 
-	pfence->error = e->ops->suspend(e);
+	pfence->error = q->ops->suspend(q);
 	queue_work(system_unbound_wq, &pfence->preempt_work);
 	return true;
 }
@@ -104,43 +104,43 @@ void xe_preempt_fence_free(struct xe_preempt_fence *pfence)
  * xe_preempt_fence_alloc().
  * @pfence: The struct xe_preempt_fence pointer returned from
  *          xe_preempt_fence_alloc().
- * @e: The struct xe_engine used for arming.
+ * @q: The struct xe_exec_queue used for arming.
  * @context: The dma-fence context used for arming.
  * @seqno: The dma-fence seqno used for arming.
  *
  * Inserts the preempt fence into @context's timeline, takes @link off any
- * list, and registers the struct xe_engine as the xe_engine to be preempted.
+ * list, and registers the struct xe_exec_queue as the xe_engine to be preempted.
  *
  * Return: A pointer to a struct dma_fence embedded into the preempt fence.
  * This function doesn't error.
  */
 struct dma_fence *
-xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
 		     u64 context, u32 seqno)
 {
 	list_del_init(&pfence->link);
-	pfence->engine = xe_engine_get(e);
+	pfence->q = xe_exec_queue_get(q);
 	dma_fence_init(&pfence->base, &preempt_fence_ops,
-		      &e->compute.lock, context, seqno);
+		      &q->compute.lock, context, seqno);
 
 	return &pfence->base;
 }
 
 /**
  * xe_preempt_fence_create() - Helper to create and arm a preempt fence.
- * @e: The struct xe_engine used for arming.
+ * @q: The struct xe_exec_queue used for arming.
  * @context: The dma-fence context used for arming.
  * @seqno: The dma-fence seqno used for arming.
  *
  * Allocates and inserts the preempt fence into @context's timeline,
- * and registers @e as the struct xe_engine to be preempted.
+ * and registers @e as the struct xe_exec_queue to be preempted.
  *
  * Return: A pointer to the resulting struct dma_fence on success. An error
  * pointer on error. In particular if allocation fails it returns
  * ERR_PTR(-ENOMEM);
  */
 struct dma_fence *
-xe_preempt_fence_create(struct xe_engine *e,
+xe_preempt_fence_create(struct xe_exec_queue *q,
 			u64 context, u32 seqno)
 {
 	struct xe_preempt_fence *pfence;
@@ -149,7 +149,7 @@ xe_preempt_fence_create(struct xe_engine *e,
 	if (IS_ERR(pfence))
 		return ERR_CAST(pfence);
 
-	return xe_preempt_fence_arm(pfence, e, context, seqno);
+	return xe_preempt_fence_arm(pfence, q, context, seqno);
 }
 
 bool xe_fence_is_xe_preempt(const struct dma_fence *fence)
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h
index 4f3966103203c..9406c6fea525a 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.h
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.h
@@ -11,7 +11,7 @@
 struct list_head;
 
 struct dma_fence *
-xe_preempt_fence_create(struct xe_engine *e,
+xe_preempt_fence_create(struct xe_exec_queue *q,
 			u64 context, u32 seqno);
 
 struct xe_preempt_fence *xe_preempt_fence_alloc(void);
@@ -19,7 +19,7 @@ struct xe_preempt_fence *xe_preempt_fence_alloc(void);
 void xe_preempt_fence_free(struct xe_preempt_fence *pfence);
 
 struct dma_fence *
-xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
 		     u64 context, u32 seqno);
 
 static inline struct xe_preempt_fence *
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
index 9d9efd8ff0ed2..b54b5c29b5331 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -9,12 +9,11 @@
 #include <linux/dma-fence.h>
 #include <linux/workqueue.h>
 
-struct xe_engine;
+struct xe_exec_queue;
 
 /**
  * struct xe_preempt_fence - XE preempt fence
  *
- * A preemption fence which suspends the execution of an xe_engine on the
  * hardware and triggers a callback once the xe_engine is complete.
  */
 struct xe_preempt_fence {
@@ -22,8 +21,8 @@ struct xe_preempt_fence {
 	struct dma_fence base;
 	/** @link: link into list of pending preempt fences */
 	struct list_head link;
-	/** @engine: xe engine for this preempt fence */
-	struct xe_engine *engine;
+	/** @q: exec queue for this preempt fence */
+	struct xe_exec_queue *q;
 	/** @preempt_work: work struct which issues preemption */
 	struct work_struct preempt_work;
 	/** @error: preempt fence is in error state */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index b82ce01cc4cb9..c21d2681b4196 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1307,7 +1307,7 @@ static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
  * address range.
  * @tile: The tile to bind for.
  * @vma: The vma to bind.
- * @e: The engine with which to do pipelined page-table updates.
+ * @q: The exec_queue with which to do pipelined page-table updates.
  * @syncs: Entries to sync on before binding the built tree to the live vm tree.
  * @num_syncs: Number of @sync entries.
  * @rebind: Whether we're rebinding this vma to the same address range without
@@ -1325,7 +1325,7 @@ static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
  * on success, an error pointer on error.
  */
 struct dma_fence *
-__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
 		 struct xe_sync_entry *syncs, u32 num_syncs,
 		 bool rebind)
 {
@@ -1351,7 +1351,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
-	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
+	       xe_vma_start(vma), xe_vma_end(vma) - 1, q);
 
 	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
 	if (err)
@@ -1388,7 +1388,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 	}
 
 	fence = xe_migrate_update_pgtables(tile->migrate,
-					   vm, xe_vma_bo(vma), e,
+					   vm, xe_vma_bo(vma), q,
 					   entries, num_entries,
 					   syncs, num_syncs,
 					   &bind_pt_update.base);
@@ -1663,7 +1663,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
  * address range.
  * @tile: The tile to unbind for.
  * @vma: The vma to unbind.
- * @e: The engine with which to do pipelined page-table updates.
+ * @q: The exec_queue with which to do pipelined page-table updates.
  * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
  * @num_syncs: Number of @sync entries.
  *
@@ -1679,7 +1679,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
  * on success, an error pointer on error.
  */
 struct dma_fence *
-__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
 		   struct xe_sync_entry *syncs, u32 num_syncs)
 {
 	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
@@ -1704,7 +1704,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
-	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
+	       xe_vma_start(vma), xe_vma_end(vma) - 1, q);
 
 	num_entries = xe_pt_stage_unbind(tile, vma, entries);
 	XE_WARN_ON(num_entries > ARRAY_SIZE(entries));
@@ -1729,8 +1729,8 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 	 * lower level, because it needs to be more conservative.
 	 */
 	fence = xe_migrate_update_pgtables(tile->migrate,
-					   vm, NULL, e ? e :
-					   vm->eng[tile->id],
+					   vm, NULL, q ? q :
+					   vm->q[tile->id],
 					   entries, num_entries,
 					   syncs, num_syncs,
 					   &unbind_pt_update.base);
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index bbb00d6461ffd..01be7ab08f87e 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -12,7 +12,7 @@
 struct dma_fence;
 struct xe_bo;
 struct xe_device;
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_sync_entry;
 struct xe_tile;
 struct xe_vm;
@@ -35,12 +35,12 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
 void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
 
 struct dma_fence *
-__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
 		 struct xe_sync_entry *syncs, u32 num_syncs,
 		 bool rebind);
 
 struct dma_fence *
-__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
 		   struct xe_sync_entry *syncs, u32 num_syncs);
 
 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 382851f436b79..7ea235c71385f 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -203,7 +203,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
 		hweight_long(xe->info.mem_region_mask);
 	config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] =
-		xe_engine_device_get_max_priority(xe);
+		xe_exec_queue_device_get_max_priority(xe);
 
 	if (copy_to_user(query_ptr, config, size)) {
 		kfree(config);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2d0d392cd6916..6346ed24e2793 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -10,7 +10,7 @@
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "regs/xe_regs.h"
-#include "xe_engine_types.h"
+#include "xe_exec_queue_types.h"
 #include "xe_gt.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
@@ -156,7 +156,7 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
 
 static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
 {
-	struct xe_gt *gt = job->engine->gt;
+	struct xe_gt *gt = job->q->gt;
 	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
 	u32 flags;
 
@@ -172,7 +172,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
 
 	if (lacks_render)
 		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
-	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
 		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
 	dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
@@ -202,7 +202,7 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
 
 static u32 get_ppgtt_flag(struct xe_sched_job *job)
 {
-	return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
+	return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0;
 }
 
 static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
@@ -210,7 +210,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
 {
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
-	struct xe_vm *vm = job->engine->vm;
+	struct xe_vm *vm = job->q->vm;
 
 	if (vm->batch_invalidate_tlb) {
 		dw[i++] = preparser_disable(true);
@@ -255,10 +255,10 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 {
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
-	struct xe_gt *gt = job->engine->gt;
+	struct xe_gt *gt = job->q->gt;
 	struct xe_device *xe = gt_to_xe(gt);
-	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
-	struct xe_vm *vm = job->engine->vm;
+	bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+	struct xe_vm *vm = job->q->vm;
 
 	dw[i++] = preparser_disable(true);
 
@@ -302,16 +302,16 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 {
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
-	struct xe_gt *gt = job->engine->gt;
+	struct xe_gt *gt = job->q->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
-	struct xe_vm *vm = job->engine->vm;
+	struct xe_vm *vm = job->q->vm;
 	u32 mask_flags = 0;
 
 	dw[i++] = preparser_disable(true);
 	if (lacks_render)
 		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
-	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
 		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
 
 	/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
@@ -378,14 +378,14 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
 {
 	int i;
 
-	if (xe_sched_job_is_migration(job->engine)) {
-		emit_migration_job_gen12(job, job->engine->lrc,
+	if (xe_sched_job_is_migration(job->q)) {
+		emit_migration_job_gen12(job, job->q->lrc,
 					 xe_sched_job_seqno(job));
 		return;
 	}
 
-	for (i = 0; i < job->engine->width; ++i)
-		__emit_job_gen12_copy(job, job->engine->lrc + i,
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_copy(job, job->q->lrc + i,
 				      job->batch_addr[i],
 				      xe_sched_job_seqno(job));
 }
@@ -395,8 +395,8 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
 	int i;
 
 	/* FIXME: Not doing parallel handshake for now */
-	for (i = 0; i < job->engine->width; ++i)
-		__emit_job_gen12_video(job, job->engine->lrc + i,
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_video(job, job->q->lrc + i,
 				       job->batch_addr[i],
 				       xe_sched_job_seqno(job));
 }
@@ -405,8 +405,8 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
 {
 	int i;
 
-	for (i = 0; i < job->engine->width; ++i)
-		__emit_job_gen12_render_compute(job, job->engine->lrc + i,
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_render_compute(job, job->q->lrc + i,
 						job->batch_addr[i],
 						xe_sched_job_seqno(job));
 }
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 9944858de4d2d..de2851d24c968 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -57,58 +57,58 @@ static struct xe_sched_job *job_alloc(bool parallel)
 				 xe_sched_job_slab, GFP_KERNEL);
 }
 
-bool xe_sched_job_is_migration(struct xe_engine *e)
+bool xe_sched_job_is_migration(struct xe_exec_queue *q)
 {
-	return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) &&
-		!(e->flags & ENGINE_FLAG_WA);
+	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION) &&
+		!(q->flags & EXEC_QUEUE_FLAG_WA);
 }
 
 static void job_free(struct xe_sched_job *job)
 {
-	struct xe_engine *e = job->engine;
-	bool is_migration = xe_sched_job_is_migration(e);
+	struct xe_exec_queue *q = job->q;
+	bool is_migration = xe_sched_job_is_migration(q);
 
-	kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ?
+	kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
 			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
 }
 
 static struct xe_device *job_to_xe(struct xe_sched_job *job)
 {
-	return gt_to_xe(job->engine->gt);
+	return gt_to_xe(job->q->gt);
 }
 
-struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 					 u64 *batch_addr)
 {
 	struct xe_sched_job *job;
 	struct dma_fence **fences;
-	bool is_migration = xe_sched_job_is_migration(e);
+	bool is_migration = xe_sched_job_is_migration(q);
 	int err;
 	int i, j;
 	u32 width;
 
 	/* Migration and kernel engines have their own locking */
-	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM |
-			  ENGINE_FLAG_WA))) {
-		lockdep_assert_held(&e->vm->lock);
-		if (!xe_vm_no_dma_fences(e->vm))
-			xe_vm_assert_held(e->vm);
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM |
+			  EXEC_QUEUE_FLAG_WA))) {
+		lockdep_assert_held(&q->vm->lock);
+		if (!xe_vm_no_dma_fences(q->vm))
+			xe_vm_assert_held(q->vm);
 	}
 
-	job = job_alloc(xe_engine_is_parallel(e) || is_migration);
+	job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
 	if (!job)
 		return ERR_PTR(-ENOMEM);
 
-	job->engine = e;
+	job->q = q;
 	kref_init(&job->refcount);
-	xe_engine_get(job->engine);
+	xe_exec_queue_get(job->q);
 
-	err = drm_sched_job_init(&job->drm, e->entity, 1, NULL);
+	err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
 	if (err)
 		goto err_free;
 
-	if (!xe_engine_is_parallel(e)) {
-		job->fence = xe_lrc_create_seqno_fence(e->lrc);
+	if (!xe_exec_queue_is_parallel(q)) {
+		job->fence = xe_lrc_create_seqno_fence(q->lrc);
 		if (IS_ERR(job->fence)) {
 			err = PTR_ERR(job->fence);
 			goto err_sched_job;
@@ -116,38 +116,38 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
 	} else {
 		struct dma_fence_array *cf;
 
-		fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL);
+		fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
 		if (!fences) {
 			err = -ENOMEM;
 			goto err_sched_job;
 		}
 
-		for (j = 0; j < e->width; ++j) {
-			fences[j] = xe_lrc_create_seqno_fence(e->lrc + j);
+		for (j = 0; j < q->width; ++j) {
+			fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
 			if (IS_ERR(fences[j])) {
 				err = PTR_ERR(fences[j]);
 				goto err_fences;
 			}
 		}
 
-		cf = dma_fence_array_create(e->width, fences,
-					    e->parallel.composite_fence_ctx,
-					    e->parallel.composite_fence_seqno++,
+		cf = dma_fence_array_create(q->width, fences,
+					    q->parallel.composite_fence_ctx,
+					    q->parallel.composite_fence_seqno++,
 					    false);
 		if (!cf) {
-			--e->parallel.composite_fence_seqno;
+			--q->parallel.composite_fence_seqno;
 			err = -ENOMEM;
 			goto err_fences;
 		}
 
 		/* Sanity check */
-		for (j = 0; j < e->width; ++j)
+		for (j = 0; j < q->width; ++j)
 			XE_WARN_ON(cf->base.seqno != fences[j]->seqno);
 
 		job->fence = &cf->base;
 	}
 
-	width = e->width;
+	width = q->width;
 	if (is_migration)
 		width = 2;
 
@@ -155,7 +155,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
 		job->batch_addr[i] = batch_addr[i];
 
 	/* All other jobs require a VM to be open which has a ref */
-	if (unlikely(e->flags & ENGINE_FLAG_KERNEL))
+	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
 		xe_device_mem_access_get(job_to_xe(job));
 	xe_device_assert_mem_access(job_to_xe(job));
 
@@ -164,14 +164,14 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
 
 err_fences:
 	for (j = j - 1; j >= 0; --j) {
-		--e->lrc[j].fence_ctx.next_seqno;
+		--q->lrc[j].fence_ctx.next_seqno;
 		dma_fence_put(fences[j]);
 	}
 	kfree(fences);
 err_sched_job:
 	drm_sched_job_cleanup(&job->drm);
 err_free:
-	xe_engine_put(e);
+	xe_exec_queue_put(q);
 	job_free(job);
 	return ERR_PTR(err);
 }
@@ -188,9 +188,9 @@ void xe_sched_job_destroy(struct kref *ref)
 	struct xe_sched_job *job =
 		container_of(ref, struct xe_sched_job, refcount);
 
-	if (unlikely(job->engine->flags & ENGINE_FLAG_KERNEL))
+	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
 		xe_device_mem_access_put(job_to_xe(job));
-	xe_engine_put(job->engine);
+	xe_exec_queue_put(job->q);
 	dma_fence_put(job->fence);
 	drm_sched_job_cleanup(&job->drm);
 	job_free(job);
@@ -222,12 +222,12 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error)
 	trace_xe_sched_job_set_error(job);
 
 	dma_fence_enable_sw_signaling(job->fence);
-	xe_hw_fence_irq_run(job->engine->fence_irq);
+	xe_hw_fence_irq_run(job->q->fence_irq);
 }
 
 bool xe_sched_job_started(struct xe_sched_job *job)
 {
-	struct xe_lrc *lrc = job->engine->lrc;
+	struct xe_lrc *lrc = job->q->lrc;
 
 	return !__dma_fence_is_later(xe_sched_job_seqno(job),
 				     xe_lrc_start_seqno(lrc),
@@ -236,7 +236,7 @@ bool xe_sched_job_started(struct xe_sched_job *job)
 
 bool xe_sched_job_completed(struct xe_sched_job *job)
 {
-	struct xe_lrc *lrc = job->engine->lrc;
+	struct xe_lrc *lrc = job->q->lrc;
 
 	/*
 	 * Can safely check just LRC[0] seqno as that is last seqno written when
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 5315ad8656c24..6ca1d426c036f 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -14,7 +14,7 @@
 int xe_sched_job_module_init(void);
 void xe_sched_job_module_exit(void);
 
-struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 					 u64 *batch_addr);
 void xe_sched_job_destroy(struct kref *ref);
 
@@ -71,6 +71,6 @@ xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
 	job->migrate_flush_flags = flags;
 }
 
-bool xe_sched_job_is_migration(struct xe_engine *e);
+bool xe_sched_job_is_migration(struct xe_exec_queue *q);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 5534bfacaa168..71213ba9735bc 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -10,7 +10,7 @@
 
 #include <drm/gpu_scheduler.h>
 
-struct xe_engine;
+struct xe_exec_queue;
 
 /**
  * struct xe_sched_job - XE schedule job (batch buffer tracking)
@@ -18,8 +18,8 @@ struct xe_engine;
 struct xe_sched_job {
 	/** @drm: base DRM scheduler job */
 	struct drm_sched_job drm;
-	/** @engine: XE submission engine */
-	struct xe_engine *engine;
+	/** @q: Exec queue */
+	struct xe_exec_queue *q;
 	/** @refcount: ref count of this job */
 	struct kref refcount;
 	/**
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 82ca25d8d017f..5ea458dadf699 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -13,11 +13,11 @@
 #include <linux/types.h>
 
 #include "xe_bo_types.h"
-#include "xe_engine_types.h"
+#include "xe_exec_queue_types.h"
 #include "xe_gpu_scheduler_types.h"
 #include "xe_gt_tlb_invalidation_types.h"
 #include "xe_gt_types.h"
-#include "xe_guc_engine_types.h"
+#include "xe_guc_exec_queue_types.h"
 #include "xe_sched_job.h"
 #include "xe_vm.h"
 
@@ -105,9 +105,9 @@ DEFINE_EVENT(xe_bo, xe_bo_move,
 	     TP_ARGS(bo)
 );
 
-DECLARE_EVENT_CLASS(xe_engine,
-		    TP_PROTO(struct xe_engine *e),
-		    TP_ARGS(e),
+DECLARE_EVENT_CLASS(xe_exec_queue,
+		    TP_PROTO(struct xe_exec_queue *q),
+		    TP_ARGS(q),
 
 		    TP_STRUCT__entry(
 			     __field(enum xe_engine_class, class)
@@ -120,13 +120,13 @@ DECLARE_EVENT_CLASS(xe_engine,
 			     ),
 
 		    TP_fast_assign(
-			   __entry->class = e->class;
-			   __entry->logical_mask = e->logical_mask;
-			   __entry->gt_id = e->gt->info.id;
-			   __entry->width = e->width;
-			   __entry->guc_id = e->guc->id;
-			   __entry->guc_state = atomic_read(&e->guc->state);
-			   __entry->flags = e->flags;
+			   __entry->class = q->class;
+			   __entry->logical_mask = q->logical_mask;
+			   __entry->gt_id = q->gt->info.id;
+			   __entry->width = q->width;
+			   __entry->guc_id = q->guc->id;
+			   __entry->guc_state = atomic_read(&q->guc->state);
+			   __entry->flags = q->flags;
 			   ),
 
 		    TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
@@ -135,94 +135,94 @@ DECLARE_EVENT_CLASS(xe_engine,
 			      __entry->guc_state, __entry->flags)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_create,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_supress_resume,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_submit,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_submit,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_enable,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_disable,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_scheduling_done,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_done,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_register,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_register,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_deregister,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_deregister_done,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister_done,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_close,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_close,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_kill,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_kill,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cleanup_entity,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_destroy,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_destroy,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_reset,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_reset,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_stop,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_resubmit,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_resubmit,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
-DEFINE_EVENT(xe_engine, xe_engine_lr_cleanup,
-	     TP_PROTO(struct xe_engine *e),
-	     TP_ARGS(e)
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_lr_cleanup,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
 );
 
 DECLARE_EVENT_CLASS(xe_sched_job,
@@ -241,10 +241,10 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 
 		    TP_fast_assign(
 			   __entry->seqno = xe_sched_job_seqno(job);
-			   __entry->guc_id = job->engine->guc->id;
+			   __entry->guc_id = job->q->guc->id;
 			   __entry->guc_state =
-			   atomic_read(&job->engine->guc->state);
-			   __entry->flags = job->engine->flags;
+			   atomic_read(&job->q->guc->state);
+			   __entry->flags = job->q->flags;
 			   __entry->error = job->fence->error;
 			   __entry->fence = (unsigned long)job->fence;
 			   __entry->batch_addr = (u64)job->batch_addr[0];
@@ -303,7 +303,7 @@ DECLARE_EVENT_CLASS(xe_sched_msg,
 		    TP_fast_assign(
 			   __entry->opcode = msg->opcode;
 			   __entry->guc_id =
-			   ((struct xe_engine *)msg->private_data)->guc->id;
+			   ((struct xe_exec_queue *)msg->private_data)->guc->id;
 			   ),
 
 		    TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d3e82c4aed42b..374f111eea9c5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -165,15 +165,15 @@ out:
 
 static bool preempt_fences_waiting(struct xe_vm *vm)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
 	lockdep_assert_held(&vm->lock);
 	xe_vm_assert_held(vm);
 
-	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
-		if (!e->compute.pfence || (e->compute.pfence &&
-		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-			     &e->compute.pfence->flags))) {
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (!q->compute.pfence ||
+		    (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+						   &q->compute.pfence->flags))) {
 			return true;
 		}
 	}
@@ -195,10 +195,10 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
 	lockdep_assert_held(&vm->lock);
 	xe_vm_assert_held(vm);
 
-	if (*count >= vm->preempt.num_engines)
+	if (*count >= vm->preempt.num_exec_queues)
 		return 0;
 
-	for (; *count < vm->preempt.num_engines; ++(*count)) {
+	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
 		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
 
 		if (IS_ERR(pfence))
@@ -212,18 +212,18 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
 
 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
 	xe_vm_assert_held(vm);
 
-	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
-		if (e->compute.pfence) {
-			long timeout = dma_fence_wait(e->compute.pfence, false);
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (q->compute.pfence) {
+			long timeout = dma_fence_wait(q->compute.pfence, false);
 
 			if (timeout < 0)
 				return -ETIME;
-			dma_fence_put(e->compute.pfence);
-			e->compute.pfence = NULL;
+			dma_fence_put(q->compute.pfence);
+			q->compute.pfence = NULL;
 		}
 	}
 
@@ -232,11 +232,11 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm)
 
 static bool xe_vm_is_idle(struct xe_vm *vm)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
 	xe_vm_assert_held(vm);
-	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
-		if (!xe_engine_is_idle(e))
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (!xe_exec_queue_is_idle(q))
 			return false;
 	}
 
@@ -246,36 +246,36 @@ static bool xe_vm_is_idle(struct xe_vm *vm)
 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
 {
 	struct list_head *link;
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
-	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
 		struct dma_fence *fence;
 
 		link = list->next;
 		XE_WARN_ON(link == list);
 
 		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
-					     e, e->compute.context,
-					     ++e->compute.seqno);
-		dma_fence_put(e->compute.pfence);
-		e->compute.pfence = fence;
+					     q, q->compute.context,
+					     ++q->compute.seqno);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = fence;
 	}
 }
 
 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 	struct ww_acquire_ctx ww;
 	int err;
 
-	err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
+	err = xe_bo_lock(bo, &ww, vm->preempt.num_exec_queues, true);
 	if (err)
 		return err;
 
-	list_for_each_entry(e, &vm->preempt.engines, compute.link)
-		if (e->compute.pfence) {
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
+		if (q->compute.pfence) {
 			dma_resv_add_fence(bo->ttm.base.resv,
-					   e->compute.pfence,
+					   q->compute.pfence,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		}
 
@@ -304,22 +304,22 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
 
 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
 {
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
 	lockdep_assert_held(&vm->lock);
 	xe_vm_assert_held(vm);
 
-	list_for_each_entry(e, &vm->preempt.engines, compute.link) {
-		e->ops->resume(e);
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		q->ops->resume(q);
 
-		dma_resv_add_fence(xe_vm_resv(vm), e->compute.pfence,
+		dma_resv_add_fence(xe_vm_resv(vm), q->compute.pfence,
 				   DMA_RESV_USAGE_BOOKKEEP);
-		xe_vm_fence_all_extobjs(vm, e->compute.pfence,
+		xe_vm_fence_all_extobjs(vm, q->compute.pfence,
 					DMA_RESV_USAGE_BOOKKEEP);
 	}
 }
 
-int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 {
 	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
 	struct ttm_validate_buffer *tv;
@@ -337,16 +337,16 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
 	if (err)
 		goto out_unlock_outer;
 
-	pfence = xe_preempt_fence_create(e, e->compute.context,
-					 ++e->compute.seqno);
+	pfence = xe_preempt_fence_create(q, q->compute.context,
+					 ++q->compute.seqno);
 	if (!pfence) {
 		err = -ENOMEM;
 		goto out_unlock;
 	}
 
-	list_add(&e->compute.link, &vm->preempt.engines);
-	++vm->preempt.num_engines;
-	e->compute.pfence = pfence;
+	list_add(&q->compute.link, &vm->preempt.exec_queues);
+	++vm->preempt.num_exec_queues;
+	q->compute.pfence = pfence;
 
 	down_read(&vm->userptr.notifier_lock);
 
@@ -518,7 +518,7 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
 static void xe_vm_kill(struct xe_vm *vm)
 {
 	struct ww_acquire_ctx ww;
-	struct xe_engine *e;
+	struct xe_exec_queue *q;
 
 	lockdep_assert_held(&vm->lock);
 
@@ -526,8 +526,8 @@ static void xe_vm_kill(struct xe_vm *vm)
 	vm->flags |= XE_VM_FLAG_BANNED;
 	trace_xe_vm_kill(vm);
 
-	list_for_each_entry(e, &vm->preempt.engines, compute.link)
-		e->ops->kill(e);
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
+		q->ops->kill(q);
 	xe_vm_unlock(vm, &ww);
 
 	/* TODO: Inform user the VM is banned */
@@ -584,7 +584,7 @@ retry:
 	}
 
 	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
-				  false, vm->preempt.num_engines);
+				  false, vm->preempt.num_exec_queues);
 	if (err)
 		goto out_unlock_outer;
 
@@ -833,7 +833,7 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
 }
 
 static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	       struct xe_sync_entry *syncs, u32 num_syncs,
 	       bool first_op, bool last_op);
 
@@ -1241,7 +1241,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
 
-	INIT_LIST_HEAD(&vm->preempt.engines);
+	INIT_LIST_HEAD(&vm->preempt.exec_queues);
 	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
 
 	for_each_tile(tile, xe, id)
@@ -1320,21 +1320,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		for_each_tile(tile, xe, id) {
 			struct xe_gt *gt = tile->primary_gt;
 			struct xe_vm *migrate_vm;
-			struct xe_engine *eng;
+			struct xe_exec_queue *q;
 
 			if (!vm->pt_root[id])
 				continue;
 
 			migrate_vm = xe_migrate_get_vm(tile->migrate);
-			eng = xe_engine_create_class(xe, gt, migrate_vm,
-						     XE_ENGINE_CLASS_COPY,
-						     ENGINE_FLAG_VM);
+			q = xe_exec_queue_create_class(xe, gt, migrate_vm,
+						       XE_ENGINE_CLASS_COPY,
+						       EXEC_QUEUE_FLAG_VM);
 			xe_vm_put(migrate_vm);
-			if (IS_ERR(eng)) {
-				err = PTR_ERR(eng);
+			if (IS_ERR(q)) {
+				err = PTR_ERR(q);
 				goto err_close;
 			}
-			vm->eng[id] = eng;
+			vm->q[id] = q;
 			number_tiles++;
 		}
 	}
@@ -1422,7 +1422,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	struct drm_gpuva *gpuva, *next;
 	u8 id;
 
-	XE_WARN_ON(vm->preempt.num_engines);
+	XE_WARN_ON(vm->preempt.num_exec_queues);
 
 	xe_vm_close(vm);
 	flush_async_ops(vm);
@@ -1430,10 +1430,10 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		flush_work(&vm->preempt.rebind_work);
 
 	for_each_tile(tile, xe, id) {
-		if (vm->eng[id]) {
-			xe_engine_kill(vm->eng[id]);
-			xe_engine_put(vm->eng[id]);
-			vm->eng[id] = NULL;
+		if (vm->q[id]) {
+			xe_exec_queue_kill(vm->q[id]);
+			xe_exec_queue_put(vm->q[id]);
+			vm->q[id] = NULL;
 		}
 	}
 
@@ -1573,7 +1573,7 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 }
 
 static struct dma_fence *
-xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 		 struct xe_sync_entry *syncs, u32 num_syncs,
 		 bool first_op, bool last_op)
 {
@@ -1600,7 +1600,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
 		if (!(vma->tile_present & BIT(id)))
 			goto next;
 
-		fence = __xe_pt_unbind_vma(tile, vma, e, first_op ? syncs : NULL,
+		fence = __xe_pt_unbind_vma(tile, vma, q, first_op ? syncs : NULL,
 					   first_op ? num_syncs : 0);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
@@ -1611,8 +1611,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
 			fences[cur_fence++] = fence;
 
 next:
-		if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
-			e = list_next_entry(e, multi_gt_list);
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
 	}
 
 	if (fences) {
@@ -1648,7 +1648,7 @@ err_fences:
 }
 
 static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	       struct xe_sync_entry *syncs, u32 num_syncs,
 	       bool first_op, bool last_op)
 {
@@ -1675,7 +1675,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 		if (!(vma->tile_mask & BIT(id)))
 			goto next;
 
-		fence = __xe_pt_bind_vma(tile, vma, e ? e : vm->eng[id],
+		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
 					 first_op ? syncs : NULL,
 					 first_op ? num_syncs : 0,
 					 vma->tile_present & BIT(id));
@@ -1688,8 +1688,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
 			fences[cur_fence++] = fence;
 
 next:
-		if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
-			e = list_next_entry(e, multi_gt_list);
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
 	}
 
 	if (fences) {
@@ -1805,7 +1805,7 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence)
 }
 
 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_engine *e, struct xe_sync_entry *syncs,
+			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
 			u32 num_syncs, struct async_op_fence *afence,
 			bool immediate, bool first_op, bool last_op)
 {
@@ -1814,7 +1814,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 	xe_vm_assert_held(vm);
 
 	if (immediate) {
-		fence = xe_vm_bind_vma(vma, e, syncs, num_syncs, first_op,
+		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
 				       last_op);
 		if (IS_ERR(fence))
 			return PTR_ERR(fence);
@@ -1836,7 +1836,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 	return 0;
 }
 
-static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
+static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 		      struct xe_bo *bo, struct xe_sync_entry *syncs,
 		      u32 num_syncs, struct async_op_fence *afence,
 		      bool immediate, bool first_op, bool last_op)
@@ -1852,12 +1852,12 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
 			return err;
 	}
 
-	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate,
+	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, afence, immediate,
 			    first_op, last_op);
 }
 
 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_engine *e, struct xe_sync_entry *syncs,
+			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
 			u32 num_syncs, struct async_op_fence *afence,
 			bool first_op, bool last_op)
 {
@@ -1866,7 +1866,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
 
-	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op);
+	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 	if (afence)
@@ -2074,7 +2074,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
 	vm = xa_load(&xef->vm.xa, args->vm_id);
 	if (XE_IOCTL_DBG(xe, !vm))
 		err = -ENOENT;
-	else if (XE_IOCTL_DBG(xe, vm->preempt.num_engines))
+	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
 		err = -EBUSY;
 	else
 		xa_erase(&xef->vm.xa, args->vm_id);
@@ -2093,7 +2093,7 @@ static const u32 region_to_mem_type[] = {
 };
 
 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
-			  struct xe_engine *e, u32 region,
+			  struct xe_exec_queue *q, u32 region,
 			  struct xe_sync_entry *syncs, u32 num_syncs,
 			  struct async_op_fence *afence, bool first_op,
 			  bool last_op)
@@ -2109,7 +2109,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 	}
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
-		return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs,
+		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
 				  afence, true, first_op, last_op);
 	} else {
 		int i;
@@ -2414,7 +2414,7 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma)
  * Parse operations list and create any resources needed for the operations
  * prior to fully committing to the operations. This setup can fail.
  */
-static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				   struct drm_gpuva_ops **ops, int num_ops_list,
 				   struct xe_sync_entry *syncs, u32 num_syncs,
 				   struct list_head *ops_list, bool async)
@@ -2434,9 +2434,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 		if (!fence)
 			return -ENOMEM;
 
-		seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
+		seqno = q ? ++q->bind.fence_seqno : ++vm->async_ops.fence.seqno;
 		dma_fence_init(&fence->fence, &async_op_fence_ops,
-			       &vm->async_ops.lock, e ? e->bind.fence_ctx :
+			       &vm->async_ops.lock, q ? q->bind.fence_ctx :
 			       vm->async_ops.fence.context, seqno);
 
 		if (!xe_vm_no_dma_fences(vm)) {
@@ -2467,7 +2467,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
 				op->syncs = syncs;
 			}
 
-			op->engine = e;
+			op->q = q;
 
 			switch (op->base.op) {
 			case DRM_GPUVA_OP_MAP:
@@ -2677,7 +2677,7 @@ again:
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma),
+		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
 				 op->syncs, op->num_syncs, op->fence,
 				 op->map.immediate || !xe_vm_in_fault_mode(vm),
 				 op->flags & XE_VMA_OP_FIRST,
@@ -2693,7 +2693,7 @@ again:
 				vm->async_ops.munmap_rebind_inflight = true;
 				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
 			}
-			err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
+			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
 					   op->num_syncs,
 					   !prev && !next ? op->fence : NULL,
 					   op->flags & XE_VMA_OP_FIRST,
@@ -2706,7 +2706,7 @@ again:
 
 		if (prev) {
 			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
-			err = xe_vm_bind(vm, op->remap.prev, op->engine,
+			err = xe_vm_bind(vm, op->remap.prev, op->q,
 					 xe_vma_bo(op->remap.prev), op->syncs,
 					 op->num_syncs,
 					 !next ? op->fence : NULL, true, false,
@@ -2719,7 +2719,7 @@ again:
 
 		if (next) {
 			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
-			err = xe_vm_bind(vm, op->remap.next, op->engine,
+			err = xe_vm_bind(vm, op->remap.next, op->q,
 					 xe_vma_bo(op->remap.next),
 					 op->syncs, op->num_syncs,
 					 op->fence, true, false,
@@ -2734,13 +2734,13 @@ again:
 		break;
 	}
 	case DRM_GPUVA_OP_UNMAP:
-		err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
+		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
 				   op->num_syncs, op->fence,
 				   op->flags & XE_VMA_OP_FIRST,
 				   op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		err = xe_vm_prefetch(vm, vma, op->engine, op->prefetch.region,
+		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
 				     op->syncs, op->num_syncs, op->fence,
 				     op->flags & XE_VMA_OP_FIRST,
 				     op->flags & XE_VMA_OP_LAST);
@@ -2819,8 +2819,8 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
 		while (op->num_syncs--)
 			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
 		kfree(op->syncs);
-		if (op->engine)
-			xe_engine_put(op->engine);
+		if (op->q)
+			xe_exec_queue_put(op->q);
 		if (op->fence)
 			dma_fence_put(&op->fence->fence);
 	}
@@ -3174,7 +3174,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_bo **bos = NULL;
 	struct drm_gpuva_ops **ops = NULL;
 	struct xe_vm *vm;
-	struct xe_engine *e = NULL;
+	struct xe_exec_queue *q = NULL;
 	u32 num_syncs;
 	struct xe_sync_entry *syncs = NULL;
 	struct drm_xe_vm_bind_op *bind_ops;
@@ -3187,23 +3187,23 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (err)
 		return err;
 
-	if (args->engine_id) {
-		e = xe_engine_lookup(xef, args->engine_id);
-		if (XE_IOCTL_DBG(xe, !e)) {
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q)) {
 			err = -ENOENT;
 			goto free_objs;
 		}
 
-		if (XE_IOCTL_DBG(xe, !(e->flags & ENGINE_FLAG_VM))) {
+		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
 			err = -EINVAL;
-			goto put_engine;
+			goto put_exec_queue;
 		}
 	}
 
 	vm = xe_vm_lookup(xef, args->vm_id);
 	if (XE_IOCTL_DBG(xe, !vm)) {
 		err = -EINVAL;
-		goto put_engine;
+		goto put_exec_queue;
 	}
 
 	err = down_write_killable(&vm->lock);
@@ -3357,7 +3357,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 	}
 
-	err = vm_bind_ioctl_ops_parse(vm, e, ops, args->num_binds,
+	err = vm_bind_ioctl_ops_parse(vm, q, ops, args->num_binds,
 				      syncs, num_syncs, &ops_list, async);
 	if (err)
 		goto unwind_ops;
@@ -3391,9 +3391,9 @@ release_vm_lock:
 	up_write(&vm->lock);
 put_vm:
 	xe_vm_put(vm);
-put_engine:
-	if (e)
-		xe_engine_put(e);
+put_exec_queue:
+	if (q)
+		xe_exec_queue_put(q);
 free_objs:
 	kfree(bos);
 	kfree(ops);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index a1d30de37d203..805236578140c 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -18,7 +18,7 @@ struct drm_file;
 struct ttm_buffer_object;
 struct ttm_validate_buffer;
 
-struct xe_engine;
+struct xe_exec_queue;
 struct xe_file;
 struct xe_sync_entry;
 
@@ -164,7 +164,7 @@ static inline bool xe_vm_no_dma_fences(struct xe_vm *vm)
 	return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm);
 }
 
-int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e);
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
 
 int xe_vm_userptr_pin(struct xe_vm *vm);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index f7522f9ca40ee..f8675c3da3b14 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -138,8 +138,8 @@ struct xe_vm {
 
 	struct xe_device *xe;
 
-	/* engine used for (un)binding vma's */
-	struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE];
+	/* exec queue used for (un)binding vma's */
+	struct xe_exec_queue *q[XE_MAX_TILES_PER_DEVICE];
 
 	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
 	struct ttm_lru_bulk_move lru_bulk_move;
@@ -278,10 +278,10 @@ struct xe_vm {
 		 * an engine again
 		 */
 		s64 min_run_period_ms;
-		/** @engines: list of engines attached to this VM */
-		struct list_head engines;
-		/** @num_engines: number user engines attached to this VM */
-		int num_engines;
+		/** @exec_queues: list of exec queues attached to this VM */
+		struct list_head exec_queues;
+		/** @num_exec_queues: number exec queues attached to this VM */
+		int num_exec_queues;
 		/**
 		 * @rebind_deactivated: Whether rebind has been temporarily deactivated
 		 * due to no work available. Protected by the vm resv.
@@ -386,8 +386,8 @@ struct xe_vma_op {
 	 * operations is processed
 	 */
 	struct drm_gpuva_ops *ops;
-	/** @engine: engine for this operation */
-	struct xe_engine *engine;
+	/** @q: exec queue for this operation */
+	struct xe_exec_queue *q;
 	/**
 	 * @syncs: syncs for this operation, only used on first and last
 	 * operation
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 3d09e9e9267b5..86f16d50e9ccc 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -103,14 +103,14 @@ struct xe_user_extension {
 #define DRM_XE_VM_CREATE		0x03
 #define DRM_XE_VM_DESTROY		0x04
 #define DRM_XE_VM_BIND			0x05
-#define DRM_XE_ENGINE_CREATE		0x06
-#define DRM_XE_ENGINE_DESTROY		0x07
+#define DRM_XE_EXEC_QUEUE_CREATE		0x06
+#define DRM_XE_EXEC_QUEUE_DESTROY		0x07
 #define DRM_XE_EXEC			0x08
 #define DRM_XE_MMIO			0x09
-#define DRM_XE_ENGINE_SET_PROPERTY	0x0a
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x0a
 #define DRM_XE_WAIT_USER_FENCE		0x0b
 #define DRM_XE_VM_MADVISE		0x0c
-#define DRM_XE_ENGINE_GET_PROPERTY	0x0d
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x0d
 
 /* Must be kept compact -- no holes */
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -119,12 +119,12 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
 #define DRM_IOCTL_XE_VM_DESTROY			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
 #define DRM_IOCTL_XE_VM_BIND			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
-#define DRM_IOCTL_XE_ENGINE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create)
-#define DRM_IOCTL_XE_ENGINE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_GET_PROPERTY, struct drm_xe_engine_get_property)
-#define DRM_IOCTL_XE_ENGINE_DESTROY		 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy)
+#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
+#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
+#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
 #define DRM_IOCTL_XE_EXEC			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_MMIO			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio)
-#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY	 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property)
+#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY	 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
 
@@ -649,11 +649,11 @@ struct drm_xe_vm_bind {
 	__u32 vm_id;
 
 	/**
-	 * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
-	 * and engine must have same vm_id. If zero, the default VM bind engine
+	 * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
+	 * and exec queue must have same vm_id. If zero, the default VM bind engine
 	 * is used.
 	 */
-	__u32 engine_id;
+	__u32 exec_queue_id;
 
 	/** @num_binds: number of binds in this IOCTL */
 	__u32 num_binds;
@@ -685,8 +685,8 @@ struct drm_xe_vm_bind {
 	__u64 reserved[2];
 };
 
-/** struct drm_xe_ext_engine_set_property - engine set property extension */
-struct drm_xe_ext_engine_set_property {
+/** struct drm_xe_ext_exec_queue_set_property - exec queue set property extension */
+struct drm_xe_ext_exec_queue_set_property {
 	/** @base: base user extension */
 	struct xe_user_extension base;
 
@@ -701,32 +701,32 @@ struct drm_xe_ext_engine_set_property {
 };
 
 /**
- * struct drm_xe_engine_set_property - engine set property
+ * struct drm_xe_exec_queue_set_property - exec queue set property
  *
- * Same namespace for extensions as drm_xe_engine_create
+ * Same namespace for extensions as drm_xe_exec_queue_create
  */
-struct drm_xe_engine_set_property {
+struct drm_xe_exec_queue_set_property {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @engine_id: Engine ID */
-	__u32 engine_id;
+	/** @exec_queue_id: Exec queue ID */
+	__u32 exec_queue_id;
 
-#define XE_ENGINE_SET_PROPERTY_PRIORITY			0
-#define XE_ENGINE_SET_PROPERTY_TIMESLICE		1
-#define XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
+#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY			0
+#define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
+#define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
 	/*
 	 * Long running or ULLS engine mode. DMA fences not allowed in this
 	 * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves
 	 * as a sanity check the UMD knows what it is doing. Can only be set at
 	 * engine create time.
 	 */
-#define XE_ENGINE_SET_PROPERTY_COMPUTE_MODE		3
-#define XE_ENGINE_SET_PROPERTY_PERSISTENCE		4
-#define XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT		5
-#define XE_ENGINE_SET_PROPERTY_ACC_TRIGGER		6
-#define XE_ENGINE_SET_PROPERTY_ACC_NOTIFY		7
-#define XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY		8
+#define XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE		3
+#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		4
+#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		5
+#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		6
+#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		7
+#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY		8
 	/** @property: property to set */
 	__u32 property;
 
@@ -755,25 +755,25 @@ struct drm_xe_engine_class_instance {
 	__u16 gt_id;
 };
 
-struct drm_xe_engine_create {
-#define XE_ENGINE_EXTENSION_SET_PROPERTY               0
+struct drm_xe_exec_queue_create {
+#define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @width: submission width (number BB per exec) for this engine */
+	/** @width: submission width (number BB per exec) for this exec queue */
 	__u16 width;
 
-	/** @num_placements: number of valid placements for this engine */
+	/** @num_placements: number of valid placements for this exec queue */
 	__u16 num_placements;
 
-	/** @vm_id: VM to use for this engine */
+	/** @vm_id: VM to use for this exec queue */
 	__u32 vm_id;
 
 	/** @flags: MBZ */
 	__u32 flags;
 
-	/** @engine_id: Returned engine ID */
-	__u32 engine_id;
+	/** @exec_queue_id: Returned exec queue ID */
+	__u32 exec_queue_id;
 
 	/**
 	 * @instances: user pointer to a 2-d array of struct
@@ -788,14 +788,14 @@ struct drm_xe_engine_create {
 	__u64 reserved[2];
 };
 
-struct drm_xe_engine_get_property {
+struct drm_xe_exec_queue_get_property {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @engine_id: Engine ID */
-	__u32 engine_id;
+	/** @exec_queue_id: Exec queue ID */
+	__u32 exec_queue_id;
 
-#define XE_ENGINE_GET_PROPERTY_BAN			0
+#define XE_EXEC_QUEUE_GET_PROPERTY_BAN			0
 	/** @property: property to get */
 	__u32 property;
 
@@ -806,9 +806,9 @@ struct drm_xe_engine_get_property {
 	__u64 reserved[2];
 };
 
-struct drm_xe_engine_destroy {
-	/** @engine_id: Engine ID */
-	__u32 engine_id;
+struct drm_xe_exec_queue_destroy {
+	/** @exec_queue_id: Exec queue ID */
+	__u32 exec_queue_id;
 
 	/** @pad: MBZ */
 	__u32 pad;
@@ -855,8 +855,8 @@ struct drm_xe_exec {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @engine_id: Engine ID for the batch buffer */
-	__u32 engine_id;
+	/** @exec_queue_id: Exec queue ID for the batch buffer */
+	__u32 exec_queue_id;
 
 	/** @num_syncs: Amount of struct drm_xe_sync in array. */
 	__u32 num_syncs;
-- 
GitLab


From 038ff941afe2b05273d5f07b12e976dae195d8b8 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Fri, 4 Aug 2023 17:17:56 +0530
Subject: [PATCH 1800/2340] drm/xe: Add sysfs entries for engines under its GT

Add engines sysfs directory under its GT and
create sub directory for all engine class
(note its not per instance) present on GT.

For example,
DUT# cat /sys/class/drm/cardX/device/tileN/gtN/engines/
bcs/ ccs/

V9 :
   - Add missing drmm_add_action_or_reset
V8 :
   - Rebase
V7 :
   - Remove xe_gt.h from .h and include in .c - Matt
V6 :
   - Add kernel doc and arrange file in make file by alphabet - Matt
V5 :
   - replace xe_engine with xe_hw_engine - Matt
V4 :
   - Rebase to resolve conflicts - CI
V3 :
   - Move code in its own file
   - Rename API name
V2 :
   - Correct class mask logic - Himal
   - Remove extra parenthesis

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                   |   1 +
 drivers/gpu/drm/xe/xe_gt.c                    |   7 +
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 153 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h |  13 ++
 4 files changed, 174 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index f8d63c9b97d5b..2373832f932ec 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -78,6 +78,7 @@ xe-y += xe_bb.o \
 	xe_guc_pc.o \
 	xe_guc_submit.o \
 	xe_hw_engine.o \
+	xe_hw_engine_class_sysfs.o \
 	xe_hw_fence.o \
 	xe_huc.o \
 	xe_huc_debugfs.o \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3077faa1e7923..13320af4ddd3e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -28,6 +28,7 @@
 #include "xe_gt_topology.h"
 #include "xe_guc_exec_queue_types.h"
 #include "xe_hw_fence.h"
+#include "xe_hw_engine_class_sysfs.h"
 #include "xe_irq.h"
 #include "xe_lrc.h"
 #include "xe_map.h"
@@ -323,6 +324,12 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	err = xe_hw_engine_class_sysfs_init(gt);
+	if (err)
+		drm_warn(&gt_to_xe(gt)->drm,
+			 "failed to register engines sysfs directory, err: %d\n",
+			 err);
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 	XE_WARN_ON(err);
 	xe_device_mem_access_put(gt_to_xe(gt));
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
new file mode 100644
index 0000000000000..470a8c356abd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
+
+#define MAX_ENGINE_CLASS_NAME_LEN    16
+static void kobj_xe_hw_engine_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type kobj_xe_hw_engine_type = {
+	.release = kobj_xe_hw_engine_release,
+	.sysfs_ops = &kobj_sysfs_ops
+};
+
+static void kobj_xe_hw_engine_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	kobject_put(kobj);
+}
+
+	static struct kobject *
+kobj_xe_hw_engine(struct xe_device *xe, struct kobject *parent, char *name)
+{
+	struct kobject *kobj;
+	int err = 0;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return NULL;
+
+	kobject_init(kobj, &kobj_xe_hw_engine_type);
+	if (kobject_add(kobj, parent, "%s", name)) {
+		kobject_put(kobj);
+		return NULL;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_fini,
+				       kobj);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+
+	return kobj;
+}
+
+static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
+	.release = xe_hw_engine_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	kobject_put(kobj);
+}
+
+/**
+ * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT.
+ * @gt: Xe GT.
+ *
+ * This routine creates sysfs for HW engine classes and adds methods
+ * to get/set different scheduling properties for HW engines class.
+ *
+ * Returns: Returns error value for failure and 0 for success.
+ */
+int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct kobject *kobj;
+	u16 class_mask = 0;
+	int err = 0;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return -ENOMEM;
+
+	kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type);
+
+	err = kobject_add(kobj, gt->sysfs, "engines");
+	if (err) {
+		kobject_put(kobj);
+		return err;
+	}
+
+	for_each_hw_engine(hwe, gt, id) {
+		char name[MAX_ENGINE_CLASS_NAME_LEN];
+		struct kobject *khwe;
+
+		if (hwe->class == XE_ENGINE_CLASS_OTHER ||
+		    hwe->class == XE_ENGINE_CLASS_MAX)
+			continue;
+
+		if ((class_mask >> hwe->class) & 1)
+			continue;
+
+		class_mask |= 1 << hwe->class;
+
+		switch (hwe->class) {
+		case XE_ENGINE_CLASS_RENDER:
+			strcpy(name, "rcs");
+			break;
+		case XE_ENGINE_CLASS_VIDEO_DECODE:
+			strcpy(name, "vcs");
+			break;
+		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+			strcpy(name, "vecs");
+			break;
+		case XE_ENGINE_CLASS_COPY:
+			strcpy(name, "bcs");
+			break;
+		case XE_ENGINE_CLASS_COMPUTE:
+			strcpy(name, "ccs");
+			break;
+		default:
+			kobject_put(kobj);
+			return -EINVAL;
+		}
+
+		khwe = kobj_xe_hw_engine(xe, kobj, name);
+		if (!khwe) {
+			kobject_put(kobj);
+			return -EINVAL;
+		}
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini,
+				       kobj);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
new file mode 100644
index 0000000000000..b3916c3cf5b31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_CLASS_SYSFS_H_
+#define _XE_ENGINE_CLASS_SYSFS_H__
+
+struct xe_gt;
+
+int xe_hw_engine_class_sysfs_init(struct xe_gt *gt);
+
+#endif
-- 
GitLab


From eef55700f302b9af3228f74997e82eaca8635d14 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Fri, 4 Aug 2023 17:36:25 +0530
Subject: [PATCH 1801/2340] drm/xe: Add sysfs for default engine scheduler
 properties

For each HW engine under GT we are adding defaults sysfs
entry to list all engine scheduler properties and its
default values. So that it will be easier for user to
fetch default values of these properties anytime to go
back to default.

For example,
DUT# cat /sys/class/drm/card1/device/tileN/gtN/engines/bcs/.defaults/
job_timeout_ms         preempt_timeout_us     timeslice_duration_us

where,
@job_timeout_ms: The time after which a job is removed from the scheduler.
@preempt_timeout_us: How long to wait (in microseconds) for a preemption
                     event to occur when submitting a new context.
@timeslice_duration_us: Each context is scheduled for execution for the
                        timeslice duration, before switching to the next
                        context.

V12:
   - Add missing drmm_add_action_or_reset and remove sysfs files
V11:
   - Rebase
V10:
   - Remove xe_gt.h inclusion from .h - Matt
V9 :
   - Remove jiffies for job_timeout_ms - Matt
V8 :
   - replace xe_engine with xe_hw_engine - Matt
V7 :
   - Push all errors to one error path at every places - Niranjana
   - Describe struct member to resolve kernel doc err - CI hooks
V6 :
   - Use engine class interface instead of hw engine
     in sysfs for better interfacing readability - Niranjana
V5 :
   - Scheduling props should apply per class engine not per hardware engine - Matt
   - Do not record value of job_timeout_ms if changed based on dma_fence - Matt
V4 :
   - Resolve merge conflicts - CI
V3 :
   - Rearrange code in its own file
   - Rebase
   - Update commit message to reflect tile addition
V2 :
   - Use sysfs_create_files in this patch - Niranjana
   - Handle prototype error for xe_add_engine_defaults - CI hooks
   - Remove unused member sysfs_hwe - Niranjana

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c            |   6 +-
 drivers/gpu/drm/xe/xe_gt_types.h              |   3 +
 drivers/gpu/drm/xe/xe_guc_submit.c            |   3 +-
 drivers/gpu/drm/xe/xe_hw_engine.c             |   9 ++
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 136 +++++++++++++++---
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h |  22 +++
 drivers/gpu/drm/xe/xe_hw_engine_types.h       |  37 +++++
 7 files changed, 190 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 1371829b9e356..41a7ae1d1a53c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -53,9 +53,9 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	INIT_LIST_HEAD(&q->compute.link);
 	INIT_LIST_HEAD(&q->multi_gt_link);
 
-	/* FIXME: Wire up to configurable default value */
-	q->sched_props.timeslice_us = 1 * 1000;
-	q->sched_props.preempt_timeout_us = 640 * 1000;
+	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
+	q->sched_props.preempt_timeout_us =
+				hwe->eclass->sched_props.preempt_timeout_us;
 
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index c326932e53d79..35b8c19fa8bf5 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -286,6 +286,9 @@ struct xe_gt {
 	/** @hw_engines: hardware engines on the GT */
 	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
 
+	/** @eclass: per hardware engine class interface on the GT */
+	struct xe_hw_engine_class_intf  eclass[XE_ENGINE_CLASS_MAX];
+
 	/** @pcode: GT's PCODE */
 	struct {
 		/** @lock: protecting GT's PCODE mailbox data */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 42454c12efb30..e12cd4285e5dc 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1132,7 +1132,8 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	ge->q = q;
 	init_waitqueue_head(&ge->suspend_wait);
 
-	timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
+	timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT :
+		  q->hwe->eclass->sched_props.job_timeout_ms;
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
 			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
 			     64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index ead5aa2856195..b7be7b0acb35e 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -362,6 +362,15 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	hwe->fence_irq = &gt->fence_irq[info->class];
 	hwe->engine_id = id;
 
+	if (!gt->eclass[hwe->class].sched_props.job_timeout_ms) {
+		gt->eclass[hwe->class].sched_props.job_timeout_ms = 5 * 1000;
+		gt->eclass[hwe->class].sched_props.timeslice_us = 1 * 1000;
+		gt->eclass[hwe->class].sched_props.preempt_timeout_us = 640 * 1000;
+		/* Record default props */
+		gt->eclass[hwe->class].defaults = gt->eclass[hwe->class].sched_props;
+	}
+	hwe->eclass = &gt->eclass[hwe->class];
+
 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
 	xe_wa_process_engine(hwe);
 	hw_engine_setup_default_state(hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 470a8c356abd9..99a8197765bda 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -11,6 +11,9 @@
 #include "xe_hw_engine_class_sysfs.h"
 
 #define MAX_ENGINE_CLASS_NAME_LEN    16
+static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+					   struct kobject *parent);
+
 static void kobj_xe_hw_engine_release(struct kobject *kobj)
 {
 	kfree(kobj);
@@ -21,37 +24,116 @@ static const struct kobj_type kobj_xe_hw_engine_type = {
 	.sysfs_ops = &kobj_sysfs_ops
 };
 
-static void kobj_xe_hw_engine_fini(struct drm_device *drm, void *arg)
+static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
 {
 	struct kobject *kobj = arg;
 
 	kobject_put(kobj);
 }
 
-	static struct kobject *
-kobj_xe_hw_engine(struct xe_device *xe, struct kobject *parent, char *name)
+	static struct kobj_eclass *
+kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
+{
+	struct kobj_eclass *keclass;
+	int err = 0;
+
+	keclass = kzalloc(sizeof(*keclass), GFP_KERNEL);
+	if (!keclass)
+		return NULL;
+
+	kobject_init(&keclass->base, &kobj_xe_hw_engine_type);
+	if (kobject_add(&keclass->base, parent, "%s", name)) {
+		kobject_put(&keclass->base);
+		return NULL;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
+				       &keclass->base);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	return keclass;
+}
+
+static ssize_t job_timeout_default(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
+}
+
+static struct kobj_attribute job_timeout_def =
+__ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
+
+static ssize_t timeslice_default(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
+
+static ssize_t preempt_timeout_default(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
+
+static const struct attribute *defaults[] = {
+	&job_timeout_def.attr,
+	&timeslice_duration_def.attr,
+	&preempt_timeout_def.attr,
+	NULL
+};
+
+static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, defaults);
+	kobject_put(kobj);
+}
+
+static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+					   struct kobject *parent)
 {
 	struct kobject *kobj;
 	int err = 0;
 
 	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
 	if (!kobj)
-		return NULL;
+		return -ENOMEM;
 
 	kobject_init(kobj, &kobj_xe_hw_engine_type);
-	if (kobject_add(kobj, parent, "%s", name)) {
-		kobject_put(kobj);
-		return NULL;
-	}
+	err = kobject_add(kobj, parent, "%s", ".defaults");
+	if (err)
+		goto err_object;
+
+	err = sysfs_create_files(kobj, defaults);
+	if (err)
+		goto err_object;
 
-	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_fini,
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini,
 				       kobj);
 	if (err)
 		drm_warn(&xe->drm,
 			 "%s: drmm_add_action_or_reset failed, err: %d\n",
 			 __func__, err);
-
-	return kobj;
+	return err;
+err_object:
+	kobject_put(kobj);
+	return err;
 }
 
 static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
@@ -96,14 +178,12 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 	kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type);
 
 	err = kobject_add(kobj, gt->sysfs, "engines");
-	if (err) {
-		kobject_put(kobj);
-		return err;
-	}
+	if (err)
+		goto err_object;
 
 	for_each_hw_engine(hwe, gt, id) {
 		char name[MAX_ENGINE_CLASS_NAME_LEN];
-		struct kobject *khwe;
+		struct kobj_eclass *keclass;
 
 		if (hwe->class == XE_ENGINE_CLASS_OTHER ||
 		    hwe->class == XE_ENGINE_CLASS_MAX)
@@ -131,14 +211,23 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 			strcpy(name, "ccs");
 			break;
 		default:
-			kobject_put(kobj);
-			return -EINVAL;
+			err = -EINVAL;
+			goto err_object;
+		}
+
+		keclass = kobj_xe_hw_engine_class(xe, kobj, name);
+		if (!keclass) {
+			err = -EINVAL;
+			goto err_object;
 		}
 
-		khwe = kobj_xe_hw_engine(xe, kobj, name);
-		if (!khwe) {
-			kobject_put(kobj);
-			return -EINVAL;
+		keclass->eclass = hwe->eclass;
+		err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
+		if (err) {
+			drm_warn(&xe->drm,
+				 "Add .defaults to engines failed!, err: %d\n",
+				 err);
+			goto err_object;
 		}
 	}
 
@@ -150,4 +239,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 			 __func__, err);
 
 	return err;
+err_object:
+	kobject_put(kobj);
+	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
index b3916c3cf5b31..c093f381abbe5 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -6,8 +6,30 @@
 #ifndef _XE_ENGINE_CLASS_SYSFS_H_
 #define _XE_ENGINE_CLASS_SYSFS_H__
 
+#include <linux/kobject.h>
+
 struct xe_gt;
+struct xe_hw_engine_class_intf;
 
 int xe_hw_engine_class_sysfs_init(struct xe_gt *gt);
 
+/**
+ * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the
+ * eclass.
+ *
+ * When dealing with multiple eclass, this struct helps to understand which eclass
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_eclass {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @eclass: A pointer to the hw engine class interface */
+	struct xe_hw_engine_class_intf *eclass;
+};
+
+static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_eclass, base)->eclass;
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 803d557cf5aa7..97d9ba31b5fc7 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -63,6 +63,41 @@ struct xe_bo;
 struct xe_execlist_port;
 struct xe_gt;
 
+/**
+ * struct xe_hw_engine_class_intf - per hw engine class struct interface
+ *
+ * Contains all the hw engine properties per engine class.
+ *
+ * @sched_props: scheduling properties
+ * @defaults: default scheduling properties
+ */
+struct xe_hw_engine_class_intf {
+	/**
+	 * @sched_props: scheduling properties
+	 * @defaults: default scheduling properties
+	 */
+	struct {
+		/** @set_job_timeout: Set job timeout in ms for engine */
+		u32 job_timeout_ms;
+		/** @job_timeout_min: Min job timeout in ms for engine */
+		u32 job_timeout_min;
+		/** @job_timeout_max: Max job timeout in ms for engine */
+		u32 job_timeout_max;
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @timeslice_min: min timeslice period in micro-seconds */
+		u32 timeslice_min;
+		/** @timeslice_max: max timeslice period in micro-seconds */
+		u32 timeslice_max;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+		/** @preempt_timeout_min: min preemption timeout in micro-seconds */
+		u32 preempt_timeout_min;
+		/** @preempt_timeout_max: max preemption timeout in micro-seconds */
+		u32 preempt_timeout_max;
+	} sched_props, defaults;
+};
+
 /**
  * struct xe_hw_engine - Hardware engine
  *
@@ -107,6 +142,8 @@ struct xe_hw_engine {
 	void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec);
 	/** @engine_id: id  for this hw engine */
 	enum xe_hw_engine_id engine_id;
+	/** @eclass: pointer to per hw engine class interface */
+	struct xe_hw_engine_class_intf *eclass;
 };
 
 /**
-- 
GitLab


From e91a989ce151f022a7977c1ae4f21ac6d814d632 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Fri, 4 Aug 2023 18:08:25 +0530
Subject: [PATCH 1802/2340] drm/xe: Add job timeout engine property to sysfs

The time after which a job is removed from the scheduler.
Add sysfs entry to provide user defined job timeout to
scheduler.

The job timeout can be adjusted per-engine class using,

/sys/class/drm/cardX/device/tileN/gtN/engines/ccs/job_timeout_ms

V8:
  - Rebase
V7:
  - Rebase to use s/xe_engine/xe_hw_engine/ - Matt
V6:
  - Remove timeout validation, not relevant - Niranjana
  - Rebase to use common error path
V5:
  - Rebase to use engine class interface instead of hw engine
V4:
  - Rebase to per class engine props interface
V3:
  - Rebase
  - Update commit message to reflect tile update
V2:
  - Use sysfs_create_files as part of this patch

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 86 +++++++++++++------
 1 file changed, 62 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 99a8197765bda..03e0c29445b7a 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -24,38 +24,34 @@ static const struct kobj_type kobj_xe_hw_engine_type = {
 	.sysfs_ops = &kobj_sysfs_ops
 };
 
-static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
+static ssize_t job_timeout_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t count)
 {
-	struct kobject *kobj = arg;
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
 
-	kobject_put(kobj);
-}
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
 
-	static struct kobj_eclass *
-kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
-{
-	struct kobj_eclass *keclass;
-	int err = 0;
+	WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout);
 
-	keclass = kzalloc(sizeof(*keclass), GFP_KERNEL);
-	if (!keclass)
-		return NULL;
+	return count;
+}
 
-	kobject_init(&keclass->base, &kobj_xe_hw_engine_type);
-	if (kobject_add(&keclass->base, parent, "%s", name)) {
-		kobject_put(&keclass->base);
-		return NULL;
-	}
+static ssize_t job_timeout_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
-				       &keclass->base);
-	if (err)
-		drm_warn(&xe->drm,
-			 "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
-	return keclass;
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
 }
 
+static struct kobj_attribute job_timeout_attr =
+__ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store);
+
 static ssize_t job_timeout_default(struct kobject *kobj,
 				   struct kobj_attribute *attr, char *buf)
 {
@@ -97,6 +93,44 @@ static const struct attribute *defaults[] = {
 	NULL
 };
 
+static const struct attribute *files[] = {
+	&job_timeout_attr.attr,
+	NULL
+};
+
+static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, files);
+	kobject_put(kobj);
+}
+
+	static struct kobj_eclass *
+kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
+{
+	struct kobj_eclass *keclass;
+	int err = 0;
+
+	keclass = kzalloc(sizeof(*keclass), GFP_KERNEL);
+	if (!keclass)
+		return NULL;
+
+	kobject_init(&keclass->base, &kobj_xe_hw_engine_type);
+	if (kobject_add(&keclass->base, parent, "%s", name)) {
+		kobject_put(&keclass->base);
+		return NULL;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
+				       &keclass->base);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	return keclass;
+}
+
 static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg)
 {
 	struct kobject *kobj = arg;
@@ -229,6 +263,10 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 				 err);
 			goto err_object;
 		}
+
+		err = sysfs_create_files(&keclass->base, files);
+		if (err)
+			goto err_object;
 	}
 
 	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini,
-- 
GitLab


From bc3a06ead1cd49d3a5e0f707cbd6c8e173307388 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 3 Aug 2023 18:14:04 +0530
Subject: [PATCH 1803/2340] drm/xe: Add timeslice duration engine property to
 sysfs

Timeslices between multiple context is supported via
guc scheduling. Add sysfs entry to provide user defined
timeslice duration to guc scheduling.

The timeslice duration can be adjusted per-engine class using,

/sys/class/drm/cardX/device/tileN/gtN/engines/ccs/timeslice_duration_us

V8:
  - Rebase
V7:
  - Rebase to use s/xe_engine/xe_hw_engine/ - Matt
V6:
  - Remove duration validation, not relevant - Niranjana
V5:
  - Rebase to replace hw engine with eclass interface
V4:
  - Rebase to per class engine props interface
V3:
  - Rebase
  - Update commit messge to add tile
V2:
  - Rebase

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 03e0c29445b7a..01d5282d2b245 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -63,6 +63,35 @@ static ssize_t job_timeout_default(struct kobject *kobj,
 static struct kobj_attribute job_timeout_def =
 __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
 
+static ssize_t timeslice_duration_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_us, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
+}
+
+static struct kobj_attribute timeslice_duration_attr =
+	__ATTR(timeslice_duration_us, 0644, timeslice_duration_show,
+	       timeslice_duration_store);
+
 static ssize_t timeslice_default(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
 {
@@ -95,6 +124,7 @@ static const struct attribute *defaults[] = {
 
 static const struct attribute *files[] = {
 	&job_timeout_attr.attr,
+	&timeslice_duration_attr.attr,
 	NULL
 };
 
-- 
GitLab


From 69838d6330a7cc11de4f06f55122bfdb60693e70 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 3 Aug 2023 18:18:03 +0530
Subject: [PATCH 1804/2340] drm/xe: Add sysfs for preempt reset timeout

The preemption request and timeout is used for
higher priority context or kill hung context and reset
hardware engine.

The preempt timeout can be adjusted per-engine class using,

/sys/class/drm/cardX/device/tileN/gtN/engines/ccs/preempt_timeout_us

and can be disabled by setting it to 0.

V7:
  - Rebase
V6:
  - Rebase to use s/xe_engine/xe_hw_engine/ - Matt
V5:
  - Remove timeout validation, not relevant - Niranjana
V4:
  - Rebase to replace hw engine with eclass interface
V3:
  - Rebase to per class engine props interface
V2:
  - Rebase
  - Update commit message to add tile

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 01d5282d2b245..adbaaee1decab 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -103,6 +103,34 @@ static ssize_t timeslice_default(struct kobject *kobj,
 static struct kobj_attribute timeslice_duration_def =
 __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
 
+static ssize_t preempt_timeout_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
+}
+
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store);
+
 static ssize_t preempt_timeout_default(struct kobject *kobj,
 				       struct kobj_attribute *attr,
 				       char *buf)
@@ -125,6 +153,7 @@ static const struct attribute *defaults[] = {
 static const struct attribute *files[] = {
 	&job_timeout_attr.attr,
 	&timeslice_duration_attr.attr,
+	&preempt_timeout_attr.attr,
 	NULL
 };
 
-- 
GitLab


From d2776564729739f459e108b5ac83bcea57c44bca Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Fri, 4 Aug 2023 11:54:23 +0530
Subject: [PATCH 1805/2340] drm/xe: Add min/max cap for engine scheduler
 properties

Add sysfs entries for the min, max, and defaults for each of
engine scheduler controls for every hardware engine class.

Non-elevated user IOCTLs to set these controls must be within
the min-max ranges of the sysfs entries, elevated user can set
these controls to any value. However, introduced compile time
CONFIG min-max values which restricts elevated user to be in
compile time min-max range if at all sysfs min/max are violated.

Sysfs entries examples are,
DUT# cat /sys/class/drm/cardX/device/tileN/gtN/engines/ccs/.defaults/
job_timeout_max         job_timeout_ms          preempt_timeout_min     timeslice_duration_max  timeslice_duration_us
job_timeout_min         preempt_timeout_max     preempt_timeout_us      timeslice_duration_min

DUT# cat /sys/class/drm/card1/device/tileN/gtN/engines/ccs/
.defaults/              job_timeout_min         preempt_timeout_max     preempt_timeout_us      timeslice_duration_min
job_timeout_max         job_timeout_ms          preempt_timeout_min     timeslice_duration_max  timeslice_duration_us

V12:
   - Rebase
V11:
   - Make engine_get_prop_minmax and enforce_sched_limit static - Matt
   - use enum in place of string in engine_get_prop_minmax - Matt
   - no need to use enforce_sched_limit or no need to filter min/max
     per user type in sysfs - Matt
V10:
   - Add kernel doc for non-static func
   - Make helper to get min/max for range validation - Matt
   - Filter min/max per user type
V9 :
   - Rebase to use s/xe_engine/xe_hw_engine/ - Matt
V8 :
   - fix enforce_sched_limit and avoid code duplication - Niranjana
   - Make sure min < max - Niranjana
V7 :
   - Rebase to replace hw engine with eclass interface
   - return EINVAL in place of EPERM
   - Use some APIs to avoid code duplication
V6 :
   - Rebase changes to reflect per engine class props interface - MattB
   - Use #if ENABLED - MattB
   - Remove MAX_SCHED_TIMEOUT check as range validation is enough
V5 :
   - Rebase to resolve conflicts - CI
V4 :
   - Rebase
   - Update commit to reflect tile addition
   - Use XE_HW macro directly as they are already filtered
     for CONFIG checks - Niranjana
   - Add CONFIG for enable/disable min/max limitation
     on elevated user. Default is enable - Matt/Joonas
V3 :
   - Resolve CI hooks warning for kernel-doc
V2 :
   - Restric min/max setting to #define default min/max for
     elevated user - Himal
   - Remove unrelated changes from patch - Niranjana

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig                    |   6 +
 drivers/gpu/drm/xe/Kconfig.profile            |  46 +++
 drivers/gpu/drm/xe/xe_exec_queue.c            |  90 ++++-
 drivers/gpu/drm/xe/xe_hw_engine.c             |   8 +
 drivers/gpu/drm/xe/xe_hw_engine.h             |  31 ++
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 333 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h |   1 +
 7 files changed, 509 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/Kconfig.profile

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 2a595bc92ca4b..6742ed4feecd0 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -63,3 +63,9 @@ depends on DRM_XE
 depends on EXPERT
 source "drivers/gpu/drm/xe/Kconfig.debug"
 endmenu
+
+menu "drm/xe Profile Guided Optimisation"
+	visible if EXPERT
+	depends on DRM_XE
+	source "drivers/gpu/drm/xe/Kconfig.profile"
+endmenu
diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile
new file mode 100644
index 0000000000000..51951c8149a17
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.profile
@@ -0,0 +1,46 @@
+config DRM_XE_JOB_TIMEOUT_MAX
+	int "Default max job timeout (ms)"
+	default 10000 # milliseconds
+	help
+	  Configures the default max job timeout after which job will
+	  be forcefully taken away from scheduler.
+config DRM_XE_JOB_TIMEOUT_MIN
+	int "Default min job timeout (ms)"
+	default 1 # milliseconds
+	help
+	  Configures the default min job timeout after which job will
+	  be forcefully taken away from scheduler.
+config DRM_XE_TIMESLICE_MAX
+	int "Default max timeslice duration (us)"
+	default 10000000 # microseconds
+	help
+	  Configures the default max timeslice duration between multiple
+	  contexts by guc scheduling.
+config DRM_XE_TIMESLICE_MIN
+	int "Default min timeslice duration (us)"
+	default 1 # microseconds
+	help
+	  Configures the default min timeslice duration between multiple
+	  contexts by guc scheduling.
+config DRM_XE_PREEMPT_TIMEOUT_MAX
+	int "Default max preempt timeout (us)"
+	default 10000000 # microseconds
+	help
+	  Configures the default max preempt timeout after which context
+	  will be forcefully taken away and higher priority context will
+	  run.
+config DRM_XE_PREEMPT_TIMEOUT_MIN
+	int "Default min preempt timeout (us)"
+	default 1 # microseconds
+	help
+	  Configures the default min preempt timeout after which context
+	  will be forcefully taken away and higher priority context will
+	  run.
+config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT
+	bool "Default configuration of limitation on scheduler timeout"
+	default y
+	help
+	  Configures the enablement of limitation on scheduler timeout
+	  to apply to applicable user. For elevated user, all above MIN
+	  and MAX values will apply when this configuration is enable to
+	  apply limitation. By default limitation is applied.
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 41a7ae1d1a53c..901c609a657e6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -13,6 +13,7 @@
 
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
 #include "xe_hw_fence.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
@@ -22,6 +23,13 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
+enum xe_exec_queue_sched_prop {
+	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
+	XE_EXEC_QUEUE_TIMESLICE = 1,
+	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
+	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
+};
+
 static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 						    struct xe_vm *vm,
 						    u32 logical_mask,
@@ -201,11 +209,69 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
 	return q->ops->set_priority(q, value);
 }
 
+static bool xe_exec_queue_enforce_schedule_limit(void)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	return true;
+#else
+	return !capable(CAP_SYS_NICE);
+#endif
+}
+
+static void
+xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
+			      enum xe_exec_queue_sched_prop prop,
+			      u32 *min, u32 *max)
+{
+	switch (prop) {
+	case XE_EXEC_QUEUE_JOB_TIMEOUT:
+		*min = eclass->sched_props.job_timeout_min;
+		*max = eclass->sched_props.job_timeout_max;
+		break;
+	case XE_EXEC_QUEUE_TIMESLICE:
+		*min = eclass->sched_props.timeslice_min;
+		*max = eclass->sched_props.timeslice_max;
+		break;
+	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+		*min = eclass->sched_props.preempt_timeout_min;
+		*max = eclass->sched_props.preempt_timeout_max;
+		break;
+	default:
+		break;
+	}
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	if (capable(CAP_SYS_NICE)) {
+		switch (prop) {
+		case XE_EXEC_QUEUE_JOB_TIMEOUT:
+			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+			break;
+		case XE_EXEC_QUEUE_TIMESLICE:
+			*min = XE_HW_ENGINE_TIMESLICE_MIN;
+			*max = XE_HW_ENGINE_TIMESLICE_MAX;
+			break;
+		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+			break;
+		default:
+			break;
+		}
+	}
+#endif
+}
+
 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
 				    u64 value, bool create)
 {
-	if (!capable(CAP_SYS_NICE))
-		return -EPERM;
+	u32 min = 0, max = 0;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
 
 	return q->ops->set_timeslice(q, value);
 }
@@ -214,8 +280,14 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe,
 					     struct xe_exec_queue *q, u64 value,
 					     bool create)
 {
-	if (!capable(CAP_SYS_NICE))
-		return -EPERM;
+	u32 min = 0, max = 0;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
 
 	return q->ops->set_preempt_timeout(q, value);
 }
@@ -279,11 +351,17 @@ static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue
 static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
 				      u64 value, bool create)
 {
+	u32 min = 0, max = 0;
+
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_NICE))
-		return -EPERM;
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
 
 	return q->ops->set_job_timeout(q, value);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b7be7b0acb35e..b8fcc6e985cfc 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -364,8 +364,16 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 
 	if (!gt->eclass[hwe->class].sched_props.job_timeout_ms) {
 		gt->eclass[hwe->class].sched_props.job_timeout_ms = 5 * 1000;
+		gt->eclass[hwe->class].sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+		gt->eclass[hwe->class].sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
 		gt->eclass[hwe->class].sched_props.timeslice_us = 1 * 1000;
+		gt->eclass[hwe->class].sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
+		gt->eclass[hwe->class].sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
 		gt->eclass[hwe->class].sched_props.preempt_timeout_us = 640 * 1000;
+		gt->eclass[hwe->class].sched_props.preempt_timeout_min =
+								XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+		gt->eclass[hwe->class].sched_props.preempt_timeout_max =
+								XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
 		/* Record default props */
 		gt->eclass[hwe->class].defaults = gt->eclass[hwe->class].sched_props;
 	}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 7eca9d53c7b15..3d37d6d44261d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -10,6 +10,37 @@
 
 struct drm_printer;
 
+#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
+#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
+#else
+#define XE_HW_ENGINE_JOB_TIMEOUT_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MAX
+#define XE_HW_ENGINE_JOB_TIMEOUT_MAX CONFIG_DRM_XE_JOB_TIMEOUT_MAX
+#else
+#define XE_HW_ENGINE_JOB_TIMEOUT_MAX (10 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_TIMESLICE_MIN
+#define XE_HW_ENGINE_TIMESLICE_MIN CONFIG_DRM_XE_TIMESLICE_MIN
+#else
+#define XE_HW_ENGINE_TIMESLICE_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_TIMESLICE_MAX
+#define XE_HW_ENGINE_TIMESLICE_MAX CONFIG_DRM_XE_TIMESLICE_MAX
+#else
+#define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX (10 * 1000 * 1000)
+#endif
+
 int xe_hw_engines_init_early(struct xe_gt *gt);
 int xe_hw_engines_init(struct xe_gt *gt);
 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index adbaaee1decab..e49bc14f0ecf0 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -14,6 +14,22 @@
 static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
 					   struct kobject *parent);
 
+/**
+ * xe_hw_engine_timeout_in_range - Helper to check if timeout is in range
+ * @timeout: timeout to validate
+ * @min: min value of valid range
+ * @max: max value of valid range
+ *
+ * This helper helps to validate if timeout is in min-max range of HW engine
+ * scheduler.
+ *
+ * Returns: Returns false value for failure and true for success.
+ */
+bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max)
+{
+	return timeout >= min && timeout <= max;
+}
+
 static void kobj_xe_hw_engine_release(struct kobject *kobj)
 {
 	kfree(kobj);
@@ -24,11 +40,85 @@ static const struct kobj_type kobj_xe_hw_engine_type = {
 	.sysfs_ops = &kobj_sysfs_ops
 };
 
+static ssize_t job_timeout_max_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < eclass->sched_props.job_timeout_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MIN,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_max, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_max_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
+}
+
+static struct kobj_attribute job_timeout_max_attr =
+__ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store);
+
+static ssize_t job_timeout_min_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > eclass->sched_props.job_timeout_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MIN,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_min, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_min_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
+}
+
+static struct kobj_attribute job_timeout_min_attr =
+__ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store);
+
 static ssize_t job_timeout_store(struct kobject *kobj,
 				 struct kobj_attribute *attr,
 				 const char *buf, size_t count)
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.job_timeout_min;
+	u32 max = eclass->sched_props.job_timeout_max;
 	u32 timeout;
 	int err;
 
@@ -36,6 +126,9 @@ static ssize_t job_timeout_store(struct kobject *kobj,
 	if (err)
 		return err;
 
+	if (!xe_hw_engine_timeout_in_range(timeout, min, max))
+		return -EINVAL;
+
 	WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout);
 
 	return count;
@@ -63,11 +156,35 @@ static ssize_t job_timeout_default(struct kobject *kobj,
 static struct kobj_attribute job_timeout_def =
 __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
 
+static ssize_t job_timeout_min_default(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
+}
+
+static struct kobj_attribute job_timeout_min_def =
+__ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL);
+
+static ssize_t job_timeout_max_default(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
+}
+
+static struct kobj_attribute job_timeout_max_def =
+__ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL);
+
 static ssize_t timeslice_duration_store(struct kobject *kobj,
 					struct kobj_attribute *attr,
 					const char *buf, size_t count)
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.timeslice_min;
+	u32 max = eclass->sched_props.timeslice_max;
 	u32 duration;
 	int err;
 
@@ -75,11 +192,90 @@ static ssize_t timeslice_duration_store(struct kobject *kobj,
 	if (err)
 		return err;
 
+	if (!xe_hw_engine_timeout_in_range(duration, min, max))
+		return -EINVAL;
+
 	WRITE_ONCE(eclass->sched_props.timeslice_us, duration);
 
 	return count;
 }
 
+static ssize_t timeslice_duration_max_store(struct kobject *kobj,
+					    struct kobj_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration < eclass->sched_props.timeslice_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(duration,
+					   XE_HW_ENGINE_TIMESLICE_MIN,
+					   XE_HW_ENGINE_TIMESLICE_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_max, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_max_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
+}
+
+static struct kobj_attribute timeslice_duration_max_attr =
+	__ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show,
+	       timeslice_duration_max_store);
+
+static ssize_t timeslice_duration_min_store(struct kobject *kobj,
+					    struct kobj_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > eclass->sched_props.timeslice_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(duration,
+					   XE_HW_ENGINE_TIMESLICE_MIN,
+					   XE_HW_ENGINE_TIMESLICE_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_min, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_min_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
+}
+
+static struct kobj_attribute timeslice_duration_min_attr =
+	__ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show,
+	       timeslice_duration_min_store);
+
 static ssize_t timeslice_duration_show(struct kobject *kobj,
 				       struct kobj_attribute *attr, char *buf)
 {
@@ -103,11 +299,35 @@ static ssize_t timeslice_default(struct kobject *kobj,
 static struct kobj_attribute timeslice_duration_def =
 __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
 
+static ssize_t timeslice_min_default(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
+}
+
+static struct kobj_attribute timeslice_duration_min_def =
+__ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL);
+
+static ssize_t timeslice_max_default(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
+}
+
+static struct kobj_attribute timeslice_duration_max_def =
+__ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL);
+
 static ssize_t preempt_timeout_store(struct kobject *kobj,
 				     struct kobj_attribute *attr,
 				     const char *buf, size_t count)
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.preempt_timeout_min;
+	u32 max = eclass->sched_props.preempt_timeout_max;
 	u32 timeout;
 	int err;
 
@@ -115,6 +335,9 @@ static ssize_t preempt_timeout_store(struct kobject *kobj,
 	if (err)
 		return err;
 
+	if (!xe_hw_engine_timeout_in_range(timeout, min, max))
+		return -EINVAL;
+
 	WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout);
 
 	return count;
@@ -143,17 +366,127 @@ static ssize_t preempt_timeout_default(struct kobject *kobj,
 static struct kobj_attribute preempt_timeout_def =
 __ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
 
+static ssize_t preempt_timeout_min_default(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
+}
+
+static struct kobj_attribute preempt_timeout_min_def =
+__ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL);
+
+static ssize_t preempt_timeout_max_default(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
+}
+
+static struct kobj_attribute preempt_timeout_max_def =
+__ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL);
+
+static ssize_t preempt_timeout_max_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < eclass->sched_props.preempt_timeout_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_max, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_max_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
+}
+
+static struct kobj_attribute preempt_timeout_max_attr =
+	__ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show,
+	       preempt_timeout_max_store);
+
+static ssize_t preempt_timeout_min_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > eclass->sched_props.preempt_timeout_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_min, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_min_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
+}
+
+static struct kobj_attribute preempt_timeout_min_attr =
+	__ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show,
+	       preempt_timeout_min_store);
+
 static const struct attribute *defaults[] = {
 	&job_timeout_def.attr,
+	&job_timeout_min_def.attr,
+	&job_timeout_max_def.attr,
 	&timeslice_duration_def.attr,
+	&timeslice_duration_min_def.attr,
+	&timeslice_duration_max_def.attr,
 	&preempt_timeout_def.attr,
+	&preempt_timeout_min_def.attr,
+	&preempt_timeout_max_def.attr,
 	NULL
 };
 
 static const struct attribute *files[] = {
 	&job_timeout_attr.attr,
+	&job_timeout_min_attr.attr,
+	&job_timeout_max_attr.attr,
 	&timeslice_duration_attr.attr,
+	&timeslice_duration_min_attr.attr,
+	&timeslice_duration_max_attr.attr,
 	&preempt_timeout_attr.attr,
+	&preempt_timeout_min_attr.attr,
+	&preempt_timeout_max_attr.attr,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
index c093f381abbe5..60469fde41478 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -12,6 +12,7 @@ struct xe_gt;
 struct xe_hw_engine_class_intf;
 
 int xe_hw_engine_class_sysfs_init(struct xe_gt *gt);
+bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max);
 
 /**
  * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the
-- 
GitLab


From 0c005429005228d7a82e4e8d5d8f24b6192e7aa6 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 3 Aug 2023 16:42:08 -0700
Subject: [PATCH 1806/2340] drm/xe: Fix error path in xe_guc_pc_gucrc_disable()

Make sure to always call xe_device_mem_access_put(), even on error.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230803234209.881924-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 91a3967fd7998..19d743f92f438 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -745,25 +745,27 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
-	int ret;
+	int ret = 0;
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 
 	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		return ret;
+		goto out;
 
 	xe_mmio_write32(gt, PG_ENABLE, 0);
 	xe_mmio_write32(gt, RC_CONTROL, 0);
 	xe_mmio_write32(gt, RC_STATE, 0);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+out:
 	xe_device_mem_access_put(pc_to_xe(pc));
-	return 0;
+	return ret;
 }
 
 static void pc_init_pcode_freq(struct xe_guc_pc *pc)
-- 
GitLab


From d8b4494bf184d43295b89156d7656d69f931e418 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 3 Aug 2023 16:42:09 -0700
Subject: [PATCH 1807/2340] drm/xe: Fix error path in xe_guc_pc_start()

If the forcewake failed, put xe_device_mem_access.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230803234209.881924-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 19d743f92f438..c03bb58e70497 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -825,7 +825,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		return ret;
+		goto out_fail_force_wake;
 
 	ret = pc_action_reset(pc);
 	if (ret)
@@ -851,6 +851,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 
 out:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out_fail_force_wake:
 	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
-- 
GitLab


From 31b57683de2c98ac6a3de7223ef0afd47731265c Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 3 Aug 2023 18:38:50 +0100
Subject: [PATCH 1808/2340] drm/xe/guc_submit: prevent repeated unregister

It seems that various things can trigger the lr cleanup worker,
including CAT error, engine reset and destroying the actual engine, so
seems plausible to end up triggering the worker more than once in some
cases. If that does happen we can race with an ongoing engine deregister
before it has completed, thus triggering it again and also changing the
state back into pending_disable. Checking if the engine has been marked
as destroyed looks like it should prevent this.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index e12cd4285e5dc..19df4b67bfbbc 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -802,8 +802,18 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	/* Kill the run_job / process_msg entry points */
 	xe_sched_submission_stop(sched);
 
-	/* Engine state now stable, disable scheduling / deregister if needed */
-	if (exec_queue_registered(q)) {
+	/*
+	 * Engine state now mostly stable, disable scheduling / deregister if
+	 * needed. This cleanup routine might be called multiple times, where
+	 * the actual async engine deregister drops the final engine ref.
+	 * Calling disable_scheduling_deregister will mark the engine as
+	 * destroyed and fire off the CT requests to disable scheduling /
+	 * deregister, which we only want to do once. We also don't want to mark
+	 * the engine as pending_disable again as this may race with the
+	 * xe_guc_deregister_done_handler() which treats it as an unexpected
+	 * state.
+	 */
+	if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
 		struct xe_guc *guc = exec_queue_to_guc(q);
 		int ret;
 
-- 
GitLab


From c47794bdd63d8304fa5d410039e81c6387388340 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 2 Aug 2023 20:15:38 -0700
Subject: [PATCH 1809/2340] drm/xe: Set max pte size when skipping rebinds

When a rebind is skipped, we must set the max pte size of the newly
created vma to value of the old vma as we do not pte walk for the new
vma. Without this future rebinds may be incorrectly skipped due to the
wrong max pte size. Null binds are more likely to expose this bug as
larger ptes are more frequently used compared to normal bindings.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Testcase: dEQP-VK.sparse_resources.buffer.ssbo.sparse_residency.buffer_size_2_24
Reported-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds")
Reference: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23045
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 374f111eea9c5..ff7fafe1315bf 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2410,6 +2410,20 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma)
 	return SZ_4K;
 }
 
+static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+{
+	switch (size) {
+	case SZ_1G:
+		vma->gpuva.flags |= XE_VMA_PTE_1G;
+		break;
+	case SZ_2M:
+		vma->gpuva.flags |= XE_VMA_PTE_2M;
+		break;
+	}
+
+	return SZ_4K;
+}
+
 /*
  * Parse operations list and create any resources needed for the operations
  * prior to fully committing to the operations. This setup can fail.
@@ -2520,6 +2534,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 						IS_ALIGNED(xe_vma_end(vma),
 							   xe_vma_max_pte_size(old));
 					if (op->remap.skip_prev) {
+						xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
 						op->remap.range -=
 							xe_vma_end(vma) -
 							xe_vma_start(old);
@@ -2554,10 +2569,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 					op->remap.skip_next = !xe_vma_is_userptr(old) &&
 						IS_ALIGNED(xe_vma_start(vma),
 							   xe_vma_max_pte_size(old));
-					if (op->remap.skip_next)
+					if (op->remap.skip_next) {
+						xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
 						op->remap.range -=
 							xe_vma_end(old) -
 							xe_vma_start(vma);
+					}
 				}
 				break;
 			}
-- 
GitLab


From de4651d6dd04d173b50fa8631a9a3cdd897434c4 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 4 Aug 2023 16:17:09 -0700
Subject: [PATCH 1810/2340] drm/xe: Update ARL-S DevIDs to the latest BSpec

BSpec changed with regard the DevIDs for ARL-S. Update the define
accordingly.

Bspec: 55420
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Link: https://lore.kernel.org/r/20230804231709.1065087-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index b9e9f5b2b0ac2..0d0cf80eb0ba3 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -183,8 +183,7 @@
 	MACRO__(0x7DE0, ## __VA_ARGS__)
 
 #define XE_ARL_IDS(MACRO__, ...)		\
-	MACRO__(0x7D66, ## __VA_ARGS__),	\
-	MACRO__(0x7D76, ## __VA_ARGS__)
+	MACRO__(0x7D67, ## __VA_ARGS__)
 
 #define XE_MTL_IDS(MACRO__, ...)		\
 	XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__),	\
-- 
GitLab


From 7f6c6e5085bd4e02f0fd555be76cf7f105c201e7 Mon Sep 17 00:00:00 2001
From: Oak Zeng <oak.zeng@intel.com>
Date: Thu, 3 Aug 2023 14:44:04 -0400
Subject: [PATCH 1811/2340] drm/xe: Implement HW workaround 14016763929
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To workaround a HW bug on DG2, driver is required to map the whole
ppgtt virtual address space before GPU workload submission. Thus
set the XE_VM_FLAG_SCRATCH_PAGE flag during vm create so the whole
address space is mapped to point to scratch page.

v1:
  - Move the workaround implementation from xe_vm_create to
    xe_vm_create_ioctl - Brian
  - Reorder error checking in xe_vm_create_ioctl - Jose
  - Implement WA only for DG2-G10 and DG2-G12

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile        |  2 +-
 drivers/gpu/drm/xe/xe_vm.c         | 13 +++++++++----
 drivers/gpu/drm/xe/xe_wa_oob.rules |  2 ++
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 2373832f932ec..b470c2394476f 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ff7fafe1315bf..8b75595b39a22 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -29,6 +29,8 @@
 #include "xe_res_cursor.h"
 #include "xe_sync.h"
 #include "xe_trace.h"
+#include "generated/xe_wa_oob.h"
+#include "xe_wa.h"
 
 #define TEST_VM_ASYNC_OPS_ERROR
 
@@ -1979,6 +1981,13 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	int err;
 	u32 flags = 0;
 
+	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
+		args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+			 !xe->info.supports_usm))
+		return -EINVAL;
+
 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
@@ -2001,10 +2010,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 			 xe_device_in_fault_mode(xe)))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
-			 !xe->info.supports_usm))
-		return -EINVAL;
-
 	if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
 	if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 15c23813398a5..ea90dcc933b59 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -15,3 +15,5 @@
 18020744125	PLATFORM(PVC)
 1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
 1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
+14016763929	SUBPLATFORM(DG2, G10)
+		SUBPLATFORM(DG2, G12)
-- 
GitLab


From 17d28aa8bdb11ba77d86a7ff228b1963afb7941d Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 9 Aug 2023 09:44:24 +0100
Subject: [PATCH 1812/2340] drm/xe: don't warn for bogus pagefaults

This appears to be easily user triggerable so warning is perhaps too
much. Rather just make it debug print.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/534
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 4 ++--
 drivers/gpu/drm/xe/xe_guc_submit.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index d8ff05e25eda5..21e0e9c7b6340 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -265,7 +265,7 @@ static int send_pagefault_reply(struct xe_guc *guc,
 
 static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
 {
-	drm_warn(&xe->drm, "\n\tASID: %d\n"
+	drm_dbg(&xe->drm, "\n\tASID: %d\n"
 		 "\tVFID: %d\n"
 		 "\tPDATA: 0x%04x\n"
 		 "\tFaulted Address: 0x%08x%08x\n"
@@ -370,7 +370,7 @@ static void pf_queue_work_func(struct work_struct *w)
 	if (unlikely(ret)) {
 		print_pagefault(xe, &pf);
 		pf.fault_unsuccessful = 1;
-		drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+		drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
 	}
 
 	reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 19df4b67bfbbc..b89964d6562ed 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1641,7 +1641,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 	if (unlikely(!q))
 		return -EPROTO;
 
-	drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+	drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
 	trace_xe_exec_queue_memory_cat_error(q);
 
 	/* Treat the same as engine reset */
-- 
GitLab


From c7e4a611f35c064ed7bf3f1614647941b0228334 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 27 Jul 2023 15:09:21 -0700
Subject: [PATCH 1813/2340] drm/xe: Add Wa_14015150844 for DG2 and Xe_LPG

The workaround database tells us to set this bit, even though the bspec
indicates the bit doesn't exist on these platforms.  Since this is a
write-only register, we also can't read back its value to verify whether
it's actually working or not.  For now we'll trust that the workaround
database knows what it's talking about; if not, the hardware will just
ignore the attempt to write to a non-existent bit and it shouldn't cause
any problems.

Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Link: https://lore.kernel.org/r/20230727220920.2291913-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 +
 drivers/gpu/drm/xe/xe_wa.c           | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 55b0f70e19046..12cfcf918b870 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -328,6 +328,7 @@
 
 #define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
+#define   DIS_ATOMIC_CHAINING_TYPED_WRITES	REG_BIT(3)
 
 #define RT_CTRL					XE_REG_MCR(0xe530)
 #define   DIS_NULL_QUERY			REG_BIT(10)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 36c80e9fb7586..5cf84a0963021 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -490,6 +490,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			      */
 			     .read_mask = 0))
 	},
+	{ XE_RTP_NAME("14015150844"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
+			     XE_RTP_NOCHECK))
+	},
 
 	/* PVC */
 
@@ -517,6 +522,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH))
 	},
+	{ XE_RTP_NAME("14015150844"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
+			     XE_RTP_NOCHECK))
+	},
 
 	{}
 };
-- 
GitLab


From 3d4b0bfcd97fbb43d4848bafbf605f6d95afa7c8 Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Wed, 2 Aug 2023 12:34:49 +0530
Subject: [PATCH 1814/2340] drm/xe/pm: Add vram_d3cold_threshold for d3cold
 capable device

Do not register vram_d3cold_threshold device sysfs universally
for each gfx device, only register sysfs and set the threshold
value for d3cold capable devices.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/all/20230802070449.2426563-1-anshuman.gupta@intel.com/
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index cdde0d87fd9f4..5e992e62d0fb8 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -154,8 +154,11 @@ void xe_pm_init(struct xe_device *xe)
 	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
 
 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
-	xe_device_sysfs_init(xe);
-	xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+
+	if (xe->d3cold.capable) {
+		xe_device_sysfs_init(xe);
+		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+	}
 
 	xe_pm_runtime_init(xe);
 }
-- 
GitLab


From ef6ea97228e1a742be64a76991686b7e98592c02 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 9 Aug 2023 09:16:18 +0100
Subject: [PATCH 1815/2340] drm/xe/guc_submit: fixup deregister in job timeout

Rather check if the engine is still registered before proceeding with
deregister steps. Also the engine being marked as disabled doesn't mean
the engine has been disabled or deregistered from GuC pov, and here we
are signalling fences so we need to be sure GuC is not still using this
context.

v2:
 - Drop the read_stopped() for this path. Since we are signalling
   fences on error here, best play it safe and wait for the GT reset to
   mark the engine as disabled, rather than it just being queued.
v3 (Matt Brost):
 - Keep the read_stopped() on the wait event, since there is no need to
   wait for an already scheduled GT reset. If it is set we can then just
   bail without signalling anything.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index b89964d6562ed..b3d765ee47f17 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -881,15 +881,17 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	}
 
 	/* Engine state now stable, disable scheduling if needed */
-	if (exec_queue_enabled(q)) {
+	if (exec_queue_registered(q)) {
 		struct xe_guc *guc = exec_queue_to_guc(q);
 		int ret;
 
 		if (exec_queue_reset(q))
 			err = -EIO;
 		set_exec_queue_banned(q);
-		xe_exec_queue_get(q);
-		disable_scheduling_deregister(guc, q);
+		if (!exec_queue_destroyed(q)) {
+			xe_exec_queue_get(q);
+			disable_scheduling_deregister(guc, q);
+		}
 
 		/*
 		 * Must wait for scheduling to be disabled before signalling
@@ -903,7 +905,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 		ret = wait_event_timeout(guc->ct.wq,
 					 !exec_queue_pending_disable(q) ||
 					 guc_read_stopped(guc), HZ * 5);
-		if (!ret) {
+		if (!ret || guc_read_stopped(guc)) {
 			XE_WARN_ON("Schedule disable failed to respond");
 			xe_sched_add_pending_job(sched, job);
 			xe_sched_submission_start(sched);
-- 
GitLab


From ca8656a2eb0930b991151588fd04e60c75465543 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 8 Aug 2023 10:12:09 +0100
Subject: [PATCH 1816/2340] drm/xe: skip rebind_list if vma destroyed

If we are closing a vm, mark each vma as XE_VMA_DESTROYED and skip
touching the rebind_list if this is seen on the eviction path. That way
we can safely drop the vm dma-resv lock on the close path without
needing to worry about racing with the eviction path trying to add stuff
to the rebind_list which can corrupt our contended list, since the
destroy and rebind links are the same list entry underneath.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/514
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 10 ++++++----
 drivers/gpu/drm/xe/xe_vm.c |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a126130027669..81870522a394c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -506,15 +506,17 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 				vm_resv_locked = true;
 			else if (ctx->resv != xe_vm_resv(vm)) {
 				spin_lock(&vm->notifier.list_lock);
-				list_move_tail(&vma->notifier.rebind_link,
-					       &vm->notifier.rebind_list);
+				if (!(vma->gpuva.flags & XE_VMA_DESTROYED))
+					list_move_tail(&vma->notifier.rebind_link,
+						       &vm->notifier.rebind_list);
 				spin_unlock(&vm->notifier.list_lock);
 				continue;
 			}
 
 			xe_vm_assert_held(vm);
-			if (list_empty(&vma->combined_links.rebind) &&
-			    vma->tile_present)
+			if (vma->tile_present &&
+			    !(vma->gpuva.flags & XE_VMA_DESTROYED) &&
+			    list_empty(&vma->combined_links.rebind))
 				list_add_tail(&vma->combined_links.rebind,
 					      &vm->rebind_list);
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 8b75595b39a22..d683418b817db 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1460,6 +1460,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		}
 
 		list_move_tail(&vma->combined_links.destroy, &contested);
+		vma->gpuva.flags |= XE_VMA_DESTROYED;
 	}
 
 	/*
-- 
GitLab


From a20c75dba192af6ba63d618514a064268dbbe7db Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 11 Aug 2023 06:27:34 -0700
Subject: [PATCH 1817/2340] drm/xe: Call __guc_exec_queue_fini_async direct for
 KERNEL exec_queues

Usually we call __guc_exec_queue_fini_async via a worker as the
exec_queue fini can be done from within the GPU scheduler which creates
a circular dependency without a worker. Kernel exec_queues are fini'd at
driver unload (not from within the GPU scheduler) so it is safe to
directly call __guc_exec_queue_fini_async.

Suggested-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index b3d765ee47f17..8ecfe2b15e284 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -956,27 +956,19 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
 	xe_sched_entity_fini(&ge->entity);
 	xe_sched_fini(&ge->sched);
 
-	if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
-		kfree(ge);
-		xe_exec_queue_fini(q);
-	}
+	kfree(ge);
+	xe_exec_queue_fini(q);
 }
 
 static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
 {
-	bool kernel = q->flags & EXEC_QUEUE_FLAG_KERNEL;
-
 	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
-	queue_work(system_wq, &q->guc->fini_async);
 
 	/* We must block on kernel engines so slabs are empty on driver unload */
-	if (kernel) {
-		struct xe_guc_exec_queue *ge = q->guc;
-
-		flush_work(&ge->fini_async);
-		kfree(ge);
-		xe_exec_queue_fini(q);
-	}
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL)
+		__guc_exec_queue_fini_async(&q->guc->fini_async);
+	else
+		queue_work(system_wq, &q->guc->fini_async);
 }
 
 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
-- 
GitLab


From 0887a2e7ab620510093d55f4587c407362363b6d Mon Sep 17 00:00:00 2001
From: Oak Zeng <oak.zeng@intel.com>
Date: Tue, 11 Jul 2023 17:46:09 -0400
Subject: [PATCH 1818/2340] drm/xe: Make xe_mem_region struct

Make a xe_mem_region structure which will be used in the
coming patches. The new structure is used in both xe device
level (xe->mem.vram) and xe_tile level (tile->vram).

Make the definition of xe_mem_region.dpa_base to be the DPA
base of this memory region and change codes according to
this new definition.

v1:
  - rename xe_mem_region.base to dpa_base per conversation with Mike
    Ruhl

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           |  2 +-
 drivers/gpu/drm/xe/xe_device_types.h | 94 ++++++++++++----------------
 drivers/gpu/drm/xe/xe_migrate.c      |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c         |  9 +--
 4 files changed, 47 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 81870522a394c..80c5d1a7d41a2 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1469,7 +1469,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 	if (res->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_gpu_offset(xe);
 
-	return xe->mem.vram.base + tile->mem.vram.base;
+	return tile->mem.vram.dpa_base;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 128e0a9536929..5575d13395fea 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -50,6 +50,44 @@ struct xe_ggtt;
 		 const struct xe_tile * : (const struct xe_device *)((tile__)->xe),	\
 		 struct xe_tile * : (tile__)->xe)
 
+/**
+ * struct xe_mem_region - memory region structure
+ * This is used to describe a memory region in xe
+ * device, such as HBM memory or CXL extension memory.
+ */
+struct xe_mem_region {
+	/** @io_start: IO start address of this VRAM instance */
+	resource_size_t io_start;
+	/**
+	 * @io_size: IO size of this VRAM instance
+	 *
+	 * This represents how much of this VRAM we can access
+	 * via the CPU through the VRAM BAR. This can be smaller
+	 * than @usable_size, in which case only part of VRAM is CPU
+	 * accessible (typically the first 256M). This
+	 * configuration is known as small-bar.
+	 */
+	resource_size_t io_size;
+	/** @dpa_base: This memory regions's DPA (device physical address) base */
+	resource_size_t dpa_base;
+	/**
+	 * @usable_size: usable size of VRAM
+	 *
+	 * Usable size of VRAM excluding reserved portions
+	 * (e.g stolen mem)
+	 */
+	resource_size_t usable_size;
+	/**
+	 * @actual_physical_size: Actual VRAM size
+	 *
+	 * Actual VRAM size including reserved portions
+	 * (e.g stolen mem)
+	 */
+	resource_size_t actual_physical_size;
+	/** @mapping: pointer to VRAM mappable space */
+	void *__iomem mapping;
+};
+
 /**
  * struct xe_tile - hardware tile structure
  *
@@ -107,38 +145,7 @@ struct xe_tile {
 		 * Although VRAM is associated with a specific tile, it can
 		 * still be accessed by all tiles' GTs.
 		 */
-		struct {
-			/** @io_start: IO start address of this VRAM instance */
-			resource_size_t io_start;
-			/**
-			 * @io_size: IO size of this VRAM instance
-			 *
-			 * This represents how much of this VRAM we can access
-			 * via the CPU through the VRAM BAR. This can be smaller
-			 * than @size, in which case only part of VRAM is CPU
-			 * accessible (typically the first 256M). This
-			 * configuration is known as small-bar.
-			 */
-			resource_size_t io_size;
-			/** @base: offset of VRAM starting base */
-			resource_size_t base;
-			/**
-			 * @usable_size: usable size of VRAM
-			 *
-			 * Usable size of VRAM excluding reserved portions
-			 * (e.g stolen mem)
-			 */
-			resource_size_t usable_size;
-			/**
-			 * @actual_physical_size: Actual VRAM size
-			 *
-			 * Actual VRAM size including reserved portions
-			 * (e.g stolen mem)
-			 */
-			resource_size_t actual_physical_size;
-			/** @mapping: pointer to VRAM mappable space */
-			void *__iomem mapping;
-		} vram;
+		struct xe_mem_region vram;
 
 		/** @vram_mgr: VRAM TTM manager */
 		struct xe_ttm_vram_mgr *vram_mgr;
@@ -247,28 +254,7 @@ struct xe_device {
 	/** @mem: memory info for device */
 	struct {
 		/** @vram: VRAM info for device */
-		struct {
-			/** @io_start: IO start address of VRAM */
-			resource_size_t io_start;
-			/**
-			 * @io_size: IO size of VRAM.
-			 *
-			 * This represents how much of VRAM the CPU can access
-			 * via the VRAM BAR.
-			 * On systems that do not support large BAR IO space,
-			 * this can be smaller than the actual memory size, in
-			 * which case only part of VRAM is CPU accessible
-			 * (typically the first 256M).  This configuration is
-			 * known as small-bar.
-			 */
-			resource_size_t io_size;
-			/** @size: Total size of VRAM */
-			resource_size_t size;
-			/** @base: Offset to apply for Device Physical Address control */
-			resource_size_t base;
-			/** @mapping: pointer to VRAM mappable space */
-			void *__iomem mapping;
-		} vram;
+		struct xe_mem_region vram;
 		/** @sys_mgr: system TTM manager */
 		struct ttm_resource_manager sys_mgr;
 	} mem;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index d0816d2090f07..06e85f7162d41 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -264,7 +264,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		 * Use 1GB pages, it shouldn't matter the physical amount of
 		 * vram is less, when we don't access it.
 		 */
-		for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8)
+		for (pos = 0; pos < xe->mem.vram.actual_physical_size; pos += SZ_1G, ofs += 8)
 			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index aa9c573b12432..41ee89247ddb7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -173,7 +173,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
 	if (!xe->mem.vram.io_size)
 		return -EIO;
 
-	xe->mem.vram.base = 0; /* DPA offset */
+	/* XXX: Need to change when xe link code is ready */
+	xe->mem.vram.dpa_base = 0;
 
 	/* set up a map to the total memory area. */
 	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
@@ -281,7 +282,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 			return -ENODEV;
 		}
 
-		tile->mem.vram.base = tile_offset;
+		tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset;
 		tile->mem.vram.usable_size = vram_size;
 		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
@@ -304,10 +305,10 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		io_size -= min_t(u64, tile_size, io_size);
 	}
 
-	xe->mem.vram.size = total_size;
+	xe->mem.vram.actual_physical_size = total_size;
 
 	drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
-		 &xe->mem.vram.size);
+		 &xe->mem.vram.actual_physical_size);
 	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
 		 &available_size);
 
-- 
GitLab


From 286089ce692907c48a375676a0c828ac912856c9 Mon Sep 17 00:00:00 2001
From: Oak Zeng <oak.zeng@intel.com>
Date: Fri, 14 Jul 2023 10:42:07 -0400
Subject: [PATCH 1819/2340] drm/xe: Improve vram info debug printing

Print both device physical address range and CPU io range
of vram. Also print vram's actual size, usable size excluding
stolen memory, and CPU io accessible size.
V1:
  - Add back small BAR device info (Matt)

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 41ee89247ddb7..bb6823db14d49 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -286,12 +286,13 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 		tile->mem.vram.usable_size = vram_size;
 		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
-		drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id,
-			 &tile->mem.vram.io_start, &tile->mem.vram.usable_size);
-
 		if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
-			drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id,
-				 tile->id, &tile->mem.vram.io_size);
+			drm_info(&xe->drm, "Small BAR device\n");
+		drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
+			 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
+		drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
+			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size,
+			 &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size);
 
 		/* calculate total size using tile size to get the correct HW sizing */
 		total_size += tile_size;
-- 
GitLab


From 0955d3be8b53971e4e72667918092674a233e329 Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Mon, 14 Aug 2023 20:33:23 +0530
Subject: [PATCH 1820/2340] drm/xe/dg2: Remove Wa_15010599737

Since this is specific to DirectX, we don't need it on Linux.

Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230814150323.874033-1-shekhar.chauhan@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 ---
 drivers/gpu/drm/xe/xe_wa.c           | 4 ----
 2 files changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 12cfcf918b870..1179bbd16a97b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -90,9 +90,6 @@
 #define XEHP_TILE_ADDR_RANGE(_idx)		XE_REG_MCR(0x4900 + (_idx) * 4)
 #define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
 
-#define CHICKEN_RASTER_1			XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED)
-#define   DIS_SF_ROUND_NEAREST_EVEN		REG_BIT(8)
-
 #define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5cf84a0963021..e2b6e17d7ec41 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -599,10 +599,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2,
 			     SCOREBOARD_STALL_FLUSH_CONTROL))
 	},
-	{ XE_RTP_NAME("15010599737"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN))
-	},
 	{ XE_RTP_NAME("18019271663"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
-- 
GitLab


From 50b099030bb493604601a985b5fb3a8c5962aab9 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Mon, 7 Aug 2023 15:43:35 +0000
Subject: [PATCH 1821/2340] drm/xe: Simplify engine class sched_props setting

Shortens the too long code lines.

Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b8fcc6e985cfc..c445406844622 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -362,22 +362,20 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	hwe->fence_irq = &gt->fence_irq[info->class];
 	hwe->engine_id = id;
 
-	if (!gt->eclass[hwe->class].sched_props.job_timeout_ms) {
-		gt->eclass[hwe->class].sched_props.job_timeout_ms = 5 * 1000;
-		gt->eclass[hwe->class].sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
-		gt->eclass[hwe->class].sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
-		gt->eclass[hwe->class].sched_props.timeslice_us = 1 * 1000;
-		gt->eclass[hwe->class].sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
-		gt->eclass[hwe->class].sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
-		gt->eclass[hwe->class].sched_props.preempt_timeout_us = 640 * 1000;
-		gt->eclass[hwe->class].sched_props.preempt_timeout_min =
-								XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
-		gt->eclass[hwe->class].sched_props.preempt_timeout_max =
-								XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+	hwe->eclass = &gt->eclass[hwe->class];
+	if (!hwe->eclass->sched_props.job_timeout_ms) {
+		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
+		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
+		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
+		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
+		hwe->eclass->sched_props.preempt_timeout_us = 640 * 1000;
+		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
 		/* Record default props */
-		gt->eclass[hwe->class].defaults = gt->eclass[hwe->class].sched_props;
+		hwe->eclass->defaults = hwe->eclass->sched_props;
 	}
-	hwe->eclass = &gt->eclass[hwe->class];
 
 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
 	xe_wa_process_engine(hwe);
-- 
GitLab


From a863b4163ab9d3f173aef0f1191a0c0b8ea41634 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Mon, 7 Aug 2023 14:58:38 +0000
Subject: [PATCH 1822/2340] drm/xe: Add CONFIG_DRM_XE_PREEMPT_TIMEOUT

Allow preemption timeout to be specified as a config option.

v2: Change unit to microseconds (Tejas)
v3: Remove get_default_preempt_timeout()

Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.profile | 8 ++++++++
 drivers/gpu/drm/xe/xe_hw_engine.c  | 5 ++++-
 drivers/gpu/drm/xe/xe_hw_engine.h  | 5 +++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile
index 51951c8149a17..ba17a25e8db3b 100644
--- a/drivers/gpu/drm/xe/Kconfig.profile
+++ b/drivers/gpu/drm/xe/Kconfig.profile
@@ -22,6 +22,14 @@ config DRM_XE_TIMESLICE_MIN
 	help
 	  Configures the default min timeslice duration between multiple
 	  contexts by guc scheduling.
+config DRM_XE_PREEMPT_TIMEOUT
+	int "Preempt timeout (us, jiffy granularity)"
+	default 640000 # microseconds
+	help
+	  How long to wait (in microseconds) for a preemption event to occur
+	  when submitting a new context. If the current context does not hit
+	  an arbitration point and yield to HW before the timer expires, the
+	  HW will be reset to allow the more important context to execute.
 config DRM_XE_PREEMPT_TIMEOUT_MAX
 	int "Default max preempt timeout (us)"
 	default 10000000 # microseconds
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c445406844622..4c812d04e1823 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -370,7 +370,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
-		hwe->eclass->sched_props.preempt_timeout_us = 640 * 1000;
+		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
 		/* Record default props */
@@ -562,6 +562,9 @@ int xe_hw_engines_init_early(struct xe_gt *gt)
 	read_copy_fuses(gt);
 	read_compute_fuses(gt);
 
+	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
+	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
+
 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 3d37d6d44261d..71968ee2f600d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -30,6 +30,11 @@ struct drm_printer;
 #else
 #define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000)
 #endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT CONFIG_DRM_XE_PREEMPT_TIMEOUT
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT (640 * 1000)
+#endif
 #ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
 #define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
 #else
-- 
GitLab


From 0b1d1473b355ff3a1447048db24822eb7016c1c2 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 13:18:25 -0700
Subject: [PATCH 1823/2340] drm/xe: common function to assign queue name

The queue name assignment is identical in both GuC and execlists
backends, so we can move it to a common function. This will make adding
a new entry in the next patch slightly cleaner.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817201831.1583172-2-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 23 +++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_exec_queue.h |  1 +
 drivers/gpu/drm/xe/xe_execlist.c   | 20 +-------------------
 drivers/gpu/drm/xe/xe_guc_submit.c | 20 +-------------------
 4 files changed, 26 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 901c609a657e6..c2adff7706142 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -177,6 +177,29 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 	kfree(q);
 }
 
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
+{
+	switch (q->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(q->name, "rcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(q->name, "vcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(q->name, "vecs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(q->name, "bcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(q->name, "ccs%d", instance);
+		break;
+	default:
+		XE_WARN_ON(q->class);
+	}
+}
+
 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
 {
 	struct xe_exec_queue *q;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 94a6abee38a60..22499a2f522b4 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -23,6 +23,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
 
 void xe_exec_queue_fini(struct xe_exec_queue *q);
 void xe_exec_queue_destroy(struct kref *ref);
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance);
 
 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id);
 
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 3b8be55fe19c9..df91780d8b9f9 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -350,25 +350,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q)
 	q->execlist = exl;
 	q->entity = &exl->entity;
 
-	switch (q->class) {
-	case XE_ENGINE_CLASS_RENDER:
-		sprintf(q->name, "rcs%d", ffs(q->logical_mask) - 1);
-		break;
-	case XE_ENGINE_CLASS_VIDEO_DECODE:
-		sprintf(q->name, "vcs%d", ffs(q->logical_mask) - 1);
-		break;
-	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-		sprintf(q->name, "vecs%d", ffs(q->logical_mask) - 1);
-		break;
-	case XE_ENGINE_CLASS_COPY:
-		sprintf(q->name, "bcs%d", ffs(q->logical_mask) - 1);
-		break;
-	case XE_ENGINE_CLASS_COMPUTE:
-		sprintf(q->name, "ccs%d", ffs(q->logical_mask) - 1);
-		break;
-	default:
-		XE_WARN_ON(q->class);
-	}
+	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 8ecfe2b15e284..55c7b13d15ecc 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1167,25 +1167,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 
 	mutex_unlock(&guc->submission_state.lock);
 
-	switch (q->class) {
-	case XE_ENGINE_CLASS_RENDER:
-		sprintf(q->name, "rcs%d", q->guc->id);
-		break;
-	case XE_ENGINE_CLASS_VIDEO_DECODE:
-		sprintf(q->name, "vcs%d", q->guc->id);
-		break;
-	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-		sprintf(q->name, "vecs%d", q->guc->id);
-		break;
-	case XE_ENGINE_CLASS_COPY:
-		sprintf(q->name, "bcs%d", q->guc->id);
-		break;
-	case XE_ENGINE_CLASS_COMPUTE:
-		sprintf(q->name, "ccs%d", q->guc->id);
-		break;
-	default:
-		XE_WARN_ON(q->class);
-	}
+	xe_exec_queue_assign_name(q, q->guc->id);
 
 	trace_xe_exec_queue_create(q);
 
-- 
GitLab


From 296549107e4766bb927debd016527c71fb6faf36 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 13:18:26 -0700
Subject: [PATCH 1824/2340] drm/xe: base definitions for the GSCCS

The first step in introducing the GSCCS is to add all the basic defs for
it (name, mmio base, class/instance, lrc size etc).

Bspec: 60149, 60421, 63752
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817201831.1583172-3-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h    | 1 +
 drivers/gpu/drm/xe/regs/xe_regs.h       | 1 +
 drivers/gpu/drm/xe/xe_exec_queue.c      | 3 +++
 drivers/gpu/drm/xe/xe_guc.h             | 1 +
 drivers/gpu/drm/xe/xe_guc_ads.c         | 8 ++------
 drivers/gpu/drm/xe/xe_hw_engine.c       | 7 +++++++
 drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 ++
 drivers/gpu/drm/xe/xe_lrc.c             | 1 +
 8 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 1179bbd16a97b..5f635682ce5ad 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -376,6 +376,7 @@
 #define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
 #define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
 #define   OTHER_GUC_INSTANCE			0
+#define   OTHER_GSC_INSTANCE			6
 
 #define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
 #define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 25275a36b2808..be496a3946d83 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -33,6 +33,7 @@
 #define XEHPC_BCS6_RING_BASE			0x3ea000
 #define XEHPC_BCS7_RING_BASE			0x3ec000
 #define XEHPC_BCS8_RING_BASE			0x3ee000
+#define GSCCS_RING_BASE				0x11a000
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
 #define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
 #define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index c2adff7706142..f6619688f92fd 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -195,6 +195,9 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
 	case XE_ENGINE_CLASS_COMPUTE:
 		sprintf(q->name, "ccs%d", instance);
 		break;
+	case XE_ENGINE_CLASS_OTHER:
+		sprintf(q->name, "gsccs%d", instance);
+		break;
 	default:
 		XE_WARN_ON(q->class);
 	}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index f64f22e971697..3addd8fc674af 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -51,6 +51,7 @@ static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
 	case XE_ENGINE_CLASS_COMPUTE:
 		return GUC_COMPUTE_CLASS;
 	case XE_ENGINE_CLASS_OTHER:
+		return GUC_GSC_OTHER_CLASS;
 	default:
 		XE_WARN_ON(class);
 		return -1;
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 7d1244df959df..5edee24b97c96 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -253,9 +253,6 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
 	int class;
 
 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
-		if (class == XE_ENGINE_CLASS_OTHER)
-			continue;
-
 		if (!engine_enable_mask(gt, class))
 			continue;
 
@@ -350,6 +347,8 @@ static void fill_engine_enable_masks(struct xe_gt *gt,
 		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
 	info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
 		       engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
 }
 
 static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
@@ -638,9 +637,6 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
 		u8 guc_class;
 
-		if (class == XE_ENGINE_CLASS_OTHER)
-			continue;
-
 		guc_class = xe_engine_class_to_guc_class(class);
 
 		if (!info_map_read(xe, &info_map,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 4c812d04e1823..32a5bd9d8e6b6 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -219,6 +219,13 @@ static const struct engine_info engine_infos[] = {
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE3_RING_BASE,
 	},
+	[XE_HW_ENGINE_GSCCS0] = {
+		.name = "gsccs0",
+		.class = XE_ENGINE_CLASS_OTHER,
+		.instance = OTHER_GSC_INSTANCE,
+		.domain = XE_FW_GSC,
+		.mmio_base = GSCCS_RING_BASE,
+	},
 };
 
 static void hw_engine_fini(struct drm_device *drm, void *arg)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 97d9ba31b5fc7..cd4bc1412a3ff 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -53,6 +53,8 @@ enum xe_hw_engine_id {
 	XE_HW_ENGINE_CCS2,
 	XE_HW_ENGINE_CCS3,
 #define XE_HW_ENGINE_CCS_MASK	GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
+	XE_HW_ENGINE_GSCCS0,
+#define XE_HW_ENGINE_GSCCS_MASK	GENMASK_ULL(XE_HW_ENGINE_GSCCS0, XE_HW_ENGINE_GSCCS0)
 	XE_NUM_HW_ENGINES,
 };
 
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 09db8da261a39..7a7fdcdadf372 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -46,6 +46,7 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
 	case XE_ENGINE_CLASS_COPY:
 	case XE_ENGINE_CLASS_VIDEO_DECODE:
 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+	case XE_ENGINE_CLASS_OTHER:
 		return 2 * SZ_4K;
 	}
 }
-- 
GitLab


From 3d2b5d4e28d9c58ea97704fe1eb663aee2556449 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 13:18:27 -0700
Subject: [PATCH 1825/2340] drm/xe: add GSCCS irq support

The GSCCS has its own enable and mask registers. The interrupt identity
for the GSCCS shows OTHER_CLASS instance 6.

Bspec: 54029, 54030
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817201831.1583172-4-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  2 ++
 drivers/gpu/drm/xe/xe_irq.c          | 25 ++++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5f635682ce5ad..b6e870302cc7b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -380,6 +380,7 @@
 
 #define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
 #define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
+#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044)
 #define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
 #define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
 #define RCS0_RSVD_INTR_MASK			XE_REG(0x190090)
@@ -389,6 +390,7 @@
 #define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0)
 #define GUC_SG_INTR_MASK			XE_REG(0x1900e8)
 #define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec)
+#define GUNIT_GSC_INTR_MASK			XE_REG(0x1900f4)
 #define CCS0_CCS1_INTR_MASK			XE_REG(0x190100)
 #define CCS2_CCS3_INTR_MASK			XE_REG(0x190104)
 #define XEHPC_BCS1_BCS2_INTR_MASK		XE_REG(0x190110)
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 69629be07de2b..ef434142bcd92 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -175,6 +175,11 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 		xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
 		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
 		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+
+		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, irqs);
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~irqs);
+		}
 	}
 }
 
@@ -243,7 +248,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
 		return tile->media_gt;
 
 	if (class == XE_ENGINE_CLASS_OTHER &&
-	    instance == OTHER_MEDIA_GUC_INSTANCE)
+	    (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE))
 		return tile->media_gt;
 
 	return tile->primary_gt;
@@ -280,16 +285,16 @@ static void gt_irq_handler(struct xe_tile *tile,
 
 			engine_gt = pick_engine_gt(tile, class, instance);
 
-			if (class == XE_ENGINE_CLASS_OTHER) {
-				gt_other_irq_handler(engine_gt, instance, intr_vec);
+			hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
+			if (hwe) {
+				xe_hw_engine_handle_irq(hwe, intr_vec);
 				continue;
 			}
 
-			hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
-			if (!hwe)
+			if (class == XE_ENGINE_CLASS_OTHER) {
+				gt_other_irq_handler(engine_gt, instance, intr_vec);
 				continue;
-
-			xe_hw_engine_handle_irq(hwe, intr_vec);
+			}
 		}
 	}
 
@@ -457,6 +462,12 @@ static void gt_irq_reset(struct xe_tile *tile)
 	if (ccs_mask & (BIT(2)|BIT(3)))
 		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~0);
 
+	if (tile->media_gt &&
+	    xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) {
+		xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
+		xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
+	}
+
 	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
 	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK,  ~0);
 	xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE,	 0);
-- 
GitLab


From aef61349ef1bf01badfa3ea955ba84048467f691 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 13:18:28 -0700
Subject: [PATCH 1826/2340] drm/xe: add GSCCS ring ops

Like the BCS, the GSCCS doesn't have any special HW that needs handling
when emitting commands, so we can re-use the same emit_job code. To make
it clear that this is now a shared low-level function, it has been
renamed to use the "simple" postfix, instead of "copy", to indicate that
it applies to all engines that don't need any additional engine-specific
handling.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817201831.1583172-5-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 6346ed24e2793..36058600e231b 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -205,8 +205,9 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
 	return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0;
 }
 
-static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
-				  u64 batch_addr, u32 seqno)
+/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
+static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
+				    u64 batch_addr, u32 seqno)
 {
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
@@ -374,6 +375,15 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
 
+static void emit_job_gen12_gsc(struct xe_sched_job *job)
+{
+	XE_WARN_ON(job->q->width > 1); /* no parallel submission for GSCCS */
+
+	__emit_job_gen12_simple(job, job->q->lrc,
+				job->batch_addr[0],
+				xe_sched_job_seqno(job));
+}
+
 static void emit_job_gen12_copy(struct xe_sched_job *job)
 {
 	int i;
@@ -385,9 +395,9 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
 	}
 
 	for (i = 0; i < job->q->width; ++i)
-		__emit_job_gen12_copy(job, job->q->lrc + i,
-				      job->batch_addr[i],
-				      xe_sched_job_seqno(job));
+		__emit_job_gen12_simple(job, job->q->lrc + i,
+				        job->batch_addr[i],
+				        xe_sched_job_seqno(job));
 }
 
 static void emit_job_gen12_video(struct xe_sched_job *job)
@@ -411,6 +421,10 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
 						xe_sched_job_seqno(job));
 }
 
+static const struct xe_ring_ops ring_ops_gen12_gsc = {
+	.emit_job = emit_job_gen12_gsc,
+};
+
 static const struct xe_ring_ops ring_ops_gen12_copy = {
 	.emit_job = emit_job_gen12_copy,
 };
@@ -427,6 +441,8 @@ const struct xe_ring_ops *
 xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
 {
 	switch (class) {
+	case XE_ENGINE_CLASS_OTHER:
+		return &ring_ops_gen12_gsc;
 	case XE_ENGINE_CLASS_COPY:
 		return &ring_ops_gen12_copy;
 	case XE_ENGINE_CLASS_VIDEO_DECODE:
-- 
GitLab


From f4c33ae8eca2fa459d0d58baa1a26234598e6b32 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 13:18:29 -0700
Subject: [PATCH 1827/2340] drm/xe: GSC forcewake support

The ID for the GSC forcewake domain already exists, but we're missing
the register definitions and the domain intialization, so add that in.

v2: move reg definition to be in address order (Matt)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817201831.1583172-6-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++
 drivers/gpu/drm/xe/xe_force_wake.c   | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index b6e870302cc7b..51d59e1229be7 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -39,6 +39,7 @@
 #define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
 #define   GMD_ID_REVID				REG_GENMASK(5, 0)
 
+#define FORCEWAKE_ACK_GSC			XE_REG(0xdf8)
 #define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
 
 /* L3 Cache Control */
@@ -256,6 +257,7 @@
 #define FORCEWAKE_RENDER			XE_REG(0xa278)
 #define FORCEWAKE_MEDIA_VDBOX(n)		XE_REG(0xa540 + (n) * 4)
 #define FORCEWAKE_MEDIA_VEBOX(n)		XE_REG(0xa560 + (n) * 4)
+#define FORCEWAKE_GSC				XE_REG(0xa618)
 
 #define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED)
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index e563de8625814..ef7279e0b006c 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -97,6 +97,13 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 			    FORCEWAKE_ACK_MEDIA_VEBOX(j),
 			    BIT(0), BIT(16));
 	}
+
+	if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC],
+			    XE_FW_DOMAIN_ID_GSC,
+			    FORCEWAKE_GSC,
+			    FORCEWAKE_ACK_GSC,
+			    BIT(0), BIT(16));
 }
 
 static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
-- 
GitLab


From 92939935f478c5a0cc43f87652360ac5c70063b9 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 13:18:30 -0700
Subject: [PATCH 1828/2340] drm/xe: don't expose the GSCCS to users

The kernel is the only expected user of the GSCCS, so we don't want to
expose it to userspace.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817201831.1583172-7-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 32a5bd9d8e6b6..81281e9c02eb5 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -795,6 +795,9 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
 	struct xe_gt *gt = hwe->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 
+	if (hwe->class == XE_ENGINE_CLASS_OTHER)
+		return true;
+
 	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 		hwe->instance == gt->usm.reserved_bcs_instance;
 }
-- 
GitLab


From 07d7ba13d80aa9a047ac4fa83f59f161ca5f0453 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 17 Aug 2023 15:17:07 -0700
Subject: [PATCH 1829/2340] drm/xe: enable idle msg and set hysteresis for
 GSCCS

On MTL (and only on MTL) the GSCCS defaults with idle messaging
disabled. This means that, once awoken, the GSCCS will never signal its
idleness to the GT. To allow the GT to enter the proper low-power state,
we need therefore to turn idle messaging on. As part of this, we also
need to set a proper hysteresis value for the engine.

v2: use MEDIA_VERSION() and CLR() for the RTP rule and action, add reg
bit define in descending order (Matt)

Bspec: 71496
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230817221707.1602873-1-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  4 ++++
 drivers/gpu/drm/xe/xe_hw_engine.c        | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 79873bf64e8dd..d57fd855086ac 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -24,6 +24,10 @@
 #define RING_PSMI_CTL(base)			XE_REG((base) + 0x50, XE_REG_OPTION_MASKED)
 #define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
 #define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
+#define   IDLE_MSG_DISABLE			REG_BIT(0)
+
+#define RING_PWRCTX_MAXCNT(base)		XE_REG((base) + 0x54)
+#define   IDLE_WAIT_TIME			REG_GENMASK(19, 0)
 
 #define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
 #define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 81281e9c02eb5..24b5226f14335 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -339,6 +339,22 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 					   ring_cmd_cctl_val,
 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 		},
+		/*
+		 * To allow the GSC engine to go idle on MTL we need to enable
+		 * idle messaging and set the hysteresis value (we use 0xA=5us
+		 * as recommended in spec). On platforms after MTL this is
+		 * enabled by default.
+		 */
+		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
+		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
+		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
+				     IDLE_MSG_DISABLE,
+				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
+				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
+					   IDLE_WAIT_TIME,
+					   0xA,
+					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
 		{}
 	};
 
-- 
GitLab


From 0aef9ff75204485ae6bcc9f7a54f16b3a3536b49 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 17 Aug 2023 16:04:12 -0700
Subject: [PATCH 1830/2340] drm/xe: Stop tracking 4-tile support

The choice of Y-major tiling format (either the legacy "TileY" or the
newer "Tile4") is based on graphics IP version (12.50 and beyond have
Tile4, earlier platforms have TileY).  The tracking in xe was originally
added to allow re-using display from i915.  However as of i915 commit
4ebf43d0488f ("drm/i915: Eliminate has_4tile feature flag"), the display
code determines TileY vs Tile4 itself, so this can be removed from xe.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230817230407.909816-10-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  2 --
 drivers/gpu/drm/xe/xe_pci.c          | 10 +---------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5575d13395fea..6e852809d3ac9 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -221,8 +221,6 @@ struct xe_device {
 		u8 force_execlist:1;
 		/** @has_flat_ccs: Whether flat CCS metadata is used */
 		u8 has_flat_ccs:1;
-		/** @has_4tile: Whether tile-4 tiling is supported */
-		u8 has_4tile:1;
 		/** @has_llc: Device has a shared CPU+GPU last level cache */
 		u8 has_llc:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6e31b596683e7..32adeda3520cb 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -77,12 +77,6 @@ struct xe_device_desc {
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
-	/*
-	 * FIXME: Xe doesn't care about presence/lack of 4tile since we can
-	 * already determine that from the graphics IP version.  This flag
-	 * should eventually move entirely into the display code's own logic.
-	 */
-	u8 has_4tile:1;
 	u8 has_llc:1;
 };
 
@@ -265,8 +259,7 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 		{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
 		{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
 		{ } \
-	}, \
-	.has_4tile = 1
+	}
 
 static const struct xe_device_desc ats_m_desc = {
 	.graphics = &graphics_xehpg,
@@ -537,7 +530,6 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
-	xe->info.has_4tile = desc->has_4tile;
 	xe->info.has_llc = desc->has_llc;
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
-- 
GitLab


From 13a3398b927b1578440740f7684bc20883a08521 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:04 -0700
Subject: [PATCH 1831/2340] drm/xe/xe2: Update render/compute context image
 sizes

The render and compute context are significantly smaller on Xe2 than on
previous platforms.

Registers:
 - Render:  3008 dwords -> 12032 bytes -> round to 3 pages
 - Compute: 1424 dwords ->  5696 bytes -> round to 2 pages

We also allocate one additional page for the HWSP, so the total
allocation sizes for render and compute are 4 and 3 pages respectively.

Bspec: 65182, 56578, 55793
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_lrc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 7a7fdcdadf372..7c15c55964a8b 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -37,9 +37,16 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
 {
 	switch (class) {
 	case XE_ENGINE_CLASS_RENDER:
+		if (GRAPHICS_VER(xe) >= 20)
+			return 4 * SZ_4K;
+		else
+			return 14 * SZ_4K;
 	case XE_ENGINE_CLASS_COMPUTE:
 		/* 14 pages since graphics_ver == 11 */
-		return 14 * SZ_4K;
+		if (GRAPHICS_VER(xe) >= 20)
+			return 3 * SZ_4K;
+		else
+			return 14 * SZ_4K;
 	default:
 		WARN(1, "Unknown engine class: %d", class);
 		fallthrough;
-- 
GitLab


From 015906fff123a3d0c6a44b69663d3041bfaca928 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:05 -0700
Subject: [PATCH 1832/2340] drm/xe/xe2: Add GT topology readout

Xe2 platforms have three DSS fuse registers for both geometry and
compute.

Bspec: 67171, 67537, 67401, 67536
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  3 +++
 drivers/gpu/drm/xe/xe_gt_topology.c  | 16 +++++++++++-----
 drivers/gpu/drm/xe/xe_gt_types.h     |  2 +-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 51d59e1229be7..c4458671893ec 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -162,6 +162,9 @@
 #define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
 #define XEHP_GT_COMPUTE_DSS_ENABLE		XE_REG(0x9144)
 #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		XE_REG(0x9148)
+#define XE2_GT_COMPUTE_DSS_2			XE_REG(0x914c)
+#define XE2_GT_GEOMETRY_DSS_1			XE_REG(0x9150)
+#define XE2_GT_GEOMETRY_DSS_2			XE_REG(0x9154)
 
 #define GDRST					XE_REG(0x941c)
 #define   GRDOM_GUC				REG_BIT(3)
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index d4bbd0a835c22..a8d7f272c30a0 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -65,7 +65,10 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
 static void
 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
 {
-	if (GRAPHICS_VERx100(xe) == 1260) {
+	if (GRAPHICS_VER(xe) > 20) {
+		*geometry_regs = 3;
+		*compute_regs = 3;
+	} else if (GRAPHICS_VERx100(xe) == 1260) {
 		*geometry_regs = 0;
 		*compute_regs = 2;
 	} else if (GRAPHICS_VERx100(xe) >= 1250) {
@@ -90,15 +93,18 @@ xe_gt_topology_init(struct xe_gt *gt)
 	 * Register counts returned shouldn't exceed the number of registers
 	 * passed as parameters below.
 	 */
-	drm_WARN_ON(&xe->drm, num_geometry_regs > 1);
-	drm_WARN_ON(&xe->drm, num_compute_regs > 2);
+	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
+	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
 
 	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
 		      num_geometry_regs,
-		      XELP_GT_GEOMETRY_DSS_ENABLE);
+		      XELP_GT_GEOMETRY_DSS_ENABLE,
+		      XE2_GT_GEOMETRY_DSS_1,
+		      XE2_GT_GEOMETRY_DSS_2);
 	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
 		      XEHP_GT_COMPUTE_DSS_ENABLE,
-		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
+		      XE2_GT_COMPUTE_DSS_2);
 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
 
 	xe_gt_topology_dump(gt, &p);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 35b8c19fa8bf5..48fd698ff62aa 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -24,7 +24,7 @@ enum xe_gt_type {
 	XE_GT_TYPE_MEDIA,
 };
 
-#define XE_MAX_DSS_FUSE_REGS	2
+#define XE_MAX_DSS_FUSE_REGS	3
 #define XE_MAX_EU_FUSE_REGS	1
 
 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
-- 
GitLab


From 5c82000f54716685791f54330098dc93512d1716 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:06 -0700
Subject: [PATCH 1833/2340] drm/xe/xe2: Add MCR register steering for primary
 GT

Xe2 uses the same steering control register and steering semaphore
register as MTL.  As with recent platforms, group/instance 0,0 is
sufficient to target a non-terminated instance for most classes of MCR
registers; the only types of ranges that need to consider platform
fusing to find a non-terminated instance are SLICE/DSS ranges and a new
SQIDI_PSMI type of range.

Note that the range of valid bits in XE2_NODE_ENABLE_MASK may be reduced
for some Xe2 SKUs.  However the lowest bits are always valid and only
the lowest instance is obtained via __ffs(), so there's no need to
complicate the masking with extra platform/subplatform checks.

Also note that Wa_14017387313 suggests skipping MCR lock acquisition
around GAM and GAMWKR registers to prevent MCR register accesses in an
interrupt handler from deadlocking when the steering semaphore is
already held outside the interrupt context.  At this time Xe never
issues MCR accesses from within an interrupt handler so the workaround
is not currently needed.

v2:
  - [0x008700-0x0087FF] range to extend up to 0x887F (Matt Attwood)
  - [0x00EF00-0x00F4FF] -> [0x00F000, 0xFFFF] to follow latest
    bspec version (Bala)

Bspec: 71185
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 +
 drivers/gpu/drm/xe/xe_gt_mcr.c       | 50 +++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_gt_types.h     |  1 +
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index c4458671893ec..33830cd0df661 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -143,6 +143,7 @@
 #define   EN_32B_ACCESS				REG_BIT(30)
 
 #define	MIRROR_FUSE3				XE_REG(0x9118)
+#define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
 #define   L3BANK_PAIR_COUNT			4
 #define   L3BANK_MASK				REG_GENMASK(3, 0)
 /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 9eb7a6a1348d6..26f69e52b1208 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -181,6 +181,39 @@ static const struct xe_mmio_range dg2_implicit_steering_table[] = {
 	{},
 };
 
+static const struct xe_mmio_range xe2lpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },         /* SLICE */
+	{ 0x005500, 0x007FFF },         /* SLICE */
+	{ 0x008140, 0x00815F },         /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },         /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },         /* DSS */
+	{ 0x00D800, 0x00D87F },         /* SLICE */
+	{ 0x00DC00, 0x00DCFF },         /* SLICE */
+	{ 0x00DE80, 0x00E8FF },         /* DSS (0xE000-0xE0FF reserved) */
+	{ 0x00E980, 0x00E9FF },         /* SLICE */
+	{ 0x013000, 0x0133FF },         /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_sqidi_psmi_steering_table[] = {
+	{ 0x000B00, 0x000BFF },
+	{ 0x001000, 0x001FFF },
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },         /* GAM, rsvd, GAMWKR */
+	{ 0x008700, 0x00887F },         /* SQIDI, MEMPIPE */
+	{ 0x00B000, 0x00B3FF },         /* NODE, L3BANK */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* MEMPIPE */
+	{ 0x00E900, 0x00E97F },         /* MEMPIPE */
+	{ 0x00F000, 0x00FFFF },         /* GAM, GAMWKR */
+	{ 0x013400, 0x0135FF },         /* MEMPIPE */
+	{},
+};
+
 static void init_steering_l3bank(struct xe_gt *gt)
 {
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
@@ -265,6 +298,16 @@ static void init_steering_oaddrm(struct xe_gt *gt)
 	gt->steering[DSS].instance_target = 0;		/* unused */
 }
 
+static void init_steering_sqidi_psmi(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
+				 xe_mmio_read32(gt, MIRROR_FUSE3));
+	u32 select = __ffs(mask);
+
+	gt->steering[SQIDI_PSMI].group_target = select >> 1;
+	gt->steering[SQIDI_PSMI].instance_target = select & 0x1;
+}
+
 static void init_steering_inst0(struct xe_gt *gt)
 {
 	gt->steering[DSS].group_target = 0;		/* unused */
@@ -280,6 +323,7 @@ static const struct {
 	[LNCF] =	{ "LNCF",	NULL }, /* initialized by mslice init */
 	[DSS] =		{ "DSS",	init_steering_dss },
 	[OADDRM] =	{ "OADDRM",	init_steering_oaddrm },
+	[SQIDI_PSMI] =  { "SQIDI_PSMI", init_steering_sqidi_psmi },
 	[INSTANCE0] =	{ "INSTANCE 0",	init_steering_inst0 },
 	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
 };
@@ -298,7 +342,11 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 
 		gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
 	} else {
-		if (GRAPHICS_VERx100(xe) >= 1270) {
+		if (GRAPHICS_VER(xe) >= 20) {
+			gt->steering[DSS].ranges = xe2lpg_dss_steering_table;
+			gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table;
+			gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table;
+		} else if (GRAPHICS_VERx100(xe) >= 1270) {
 			gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
 			gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
 			gt->steering[DSS].ranges = xelpg_dss_steering_table;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 48fd698ff62aa..d4310be3e1e7c 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -55,6 +55,7 @@ enum xe_steering_type {
 	LNCF,
 	DSS,
 	OADDRM,
+	SQIDI_PSMI,
 
 	/*
 	 * On some platforms there are multiple types of MCR registers that
-- 
GitLab


From 8e99b54508d6fb1a8d1c8d04128ea6634c00cb19 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:07 -0700
Subject: [PATCH 1834/2340] drm/xe/xe2: Add MCR register steering for media GT

Xe2 media has a few types of MCR registers, but all except for "GPMXMT"
can safely steer to instance 0,0.  GPMXMT follows the same rules that
MTL's OADDRM ranges did, so it can re-use the same enum value.

Bspec: 71186
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 26f69e52b1208..e74d3c5743c85 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -214,6 +214,23 @@ static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = {
 	{},
 };
 
+static const struct xe_mmio_range xe2lpm_gpmxmt_steering_table[] = {
+	{ 0x388160, 0x38817F },
+	{ 0x389480, 0x3894CF },
+	{},
+};
+
+static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
+	{ 0x384000, 0x3847DF },         /* GAM, rsvd, GAM */
+	{ 0x384900, 0x384AFF },         /* GAM */
+	{ 0x389560, 0x3895FF },         /* MEDIAINF */
+	{ 0x38B600, 0x38B8FF },         /* L3BANK */
+	{ 0x38C800, 0x38D07F },         /* GAM, MEDIAINF */
+	{ 0x38F000, 0x38F0FF },         /* GAM */
+	{ 0x393C00, 0x393C7F },         /* MEDIAINF */
+	{},
+};
+
 static void init_steering_l3bank(struct xe_gt *gt)
 {
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
@@ -322,7 +339,7 @@ static const struct {
 	[MSLICE] =	{ "MSLICE",	init_steering_mslice },
 	[LNCF] =	{ "LNCF",	NULL }, /* initialized by mslice init */
 	[DSS] =		{ "DSS",	init_steering_dss },
-	[OADDRM] =	{ "OADDRM",	init_steering_oaddrm },
+	[OADDRM] =	{ "OADDRM / GPMXMT", init_steering_oaddrm },
 	[SQIDI_PSMI] =  { "SQIDI_PSMI", init_steering_sqidi_psmi },
 	[INSTANCE0] =	{ "INSTANCE 0",	init_steering_inst0 },
 	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
@@ -340,7 +357,12 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 	if (gt->info.type == XE_GT_TYPE_MEDIA) {
 		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
 
-		gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+		if (MEDIA_VER(xe) >= 20) {
+			gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
+			gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
+		} else {
+			gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+		}
 	} else {
 		if (GRAPHICS_VER(xe) >= 20) {
 			gt->steering[DSS].ranges = xe2lpg_dss_steering_table;
-- 
GitLab


From c5fa58146ee0e55ef3e8b28c1aed705c97968336 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:08 -0700
Subject: [PATCH 1835/2340] drm/xe/xe2: Update context image layouts

Engine register state layout has changed a bit on Xe2.  We'll also
explicitly define a BCS layout to ensure BLIT_SWCTL and BLIT_CCTL are
included.

Bspec: 65182, 60184, 55793
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_lrc.c | 76 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 7c15c55964a8b..2b4219c383593 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -424,6 +424,69 @@ static const u8 mtl_rcs_offsets[] = {
 	END
 };
 
+#define XE2_CTX_COMMON \
+	NOP(1),                 /* [0x00] */ \
+	LRI(15, POSTED),        /* [0x01] */ \
+	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
+	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
+	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
+	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
+	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
+	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
+	REG(0x140),             /* [0x0e] BB_ADDR */ \
+	REG(0x110),             /* [0x10] BB_STATE */ \
+	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
+	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
+	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
+	REG(0x180),             /* [0x18] CCID */ \
+	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
+	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
+	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
+	\
+	NOP(1),                 /* [0x20] */ \
+	LRI(9, POSTED),         /* [0x21] */ \
+	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
+	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
+	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
+	REG16(0x284),           /* [0x28] dummy reg */ \
+	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
+	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
+	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
+	REG16(0x274),           /* [0x30] PTBP_UDW */ \
+	REG16(0x270)            /* [0x32] PTBP_LDW */
+
+static const u8 xe2_rcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	NOP(2),                 /* [0x34] */
+	LRI(2, POSTED),         /* [0x36] */
+	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
+	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
+
+	NOP(6),                 /* [0x41] */
+	LRI(1, 0),              /* [0x47] */
+	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
+
+	END
+};
+
+static const u8 xe2_bcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	NOP(4 + 8 + 1),         /* [0x34] */
+	LRI(2, POSTED),         /* [0x41] */
+	REG16(0x200),           /* [0x42] BCS_SWCTRL */
+	REG16(0x204),           /* [0x44] BLIT_CCTL */
+
+	END
+};
+
+static const u8 xe2_xcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	END
+};
+
 #undef END
 #undef REG16
 #undef REG
@@ -433,7 +496,9 @@ static const u8 mtl_rcs_offsets[] = {
 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 {
 	if (class == XE_ENGINE_CLASS_RENDER) {
-		if (GRAPHICS_VERx100(xe) >= 1270)
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1270)
 			return mtl_rcs_offsets;
 		else if (GRAPHICS_VERx100(xe) >= 1255)
 			return dg2_rcs_offsets;
@@ -441,8 +506,15 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 			return xehp_rcs_offsets;
 		else
 			return gen12_rcs_offsets;
+	} else if (class == XE_ENGINE_CLASS_COPY) {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_bcs_offsets;
+		else
+			return gen12_xcs_offsets;
 	} else {
-		if (GRAPHICS_VERx100(xe) >= 1255)
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_xcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
 			return dg2_xcs_offsets;
 		else
 			return gen12_xcs_offsets;
-- 
GitLab


From 53497182ddf7a98fc33049d51ac3692c2f8097da Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:09 -0700
Subject: [PATCH 1836/2340] drm/xe/xe2: Handle fused-off CCS engines

On Xe2 platforms, availability of the CCS engines is reflected in the
FUSE4 register.

Bspec: 62483
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 +
 drivers/gpu/drm/xe/xe_hw_engine.c    | 29 +++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 33830cd0df661..271ed0cdbe21e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -152,6 +152,7 @@
 
 /* Fuse readout registers for GT */
 #define XEHP_FUSE4				XE_REG(0x9114)
+#define   CCS_EN_MASK				REG_GENMASK(19, 16)
 #define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
 
 #define GT_VEBOX_VDBOX_DISABLE			XE_REG(0x9140)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 24b5226f14335..dd673a684b709 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -550,7 +550,7 @@ static void read_copy_fuses(struct xe_gt *gt)
 	}
 }
 
-static void read_compute_fuses(struct xe_gt *gt)
+static void read_compute_fuses_from_dss(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
@@ -577,6 +577,33 @@ static void read_compute_fuses(struct xe_gt *gt)
 	}
 }
 
+static void read_compute_fuses_from_reg(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 ccs_mask;
+
+	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
+	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
+
+	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if ((ccs_mask & BIT(j)) == 0) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "ccs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses(struct xe_gt *gt)
+{
+	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
+		read_compute_fuses_from_reg(gt);
+	else
+		read_compute_fuses_from_dss(gt);
+}
+
 int xe_hw_engines_init_early(struct xe_gt *gt)
 {
 	int i;
-- 
GitLab


From be6dd3c8e884f7b1a9f76c3ad1efd068b981f7d5 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:10 -0700
Subject: [PATCH 1837/2340] drm/xe/xe2: AuxCCS is no longer used

Starting with Xe2, all platforms (including igpu platforms) use FlatCCS
compression rather than AuxCCS.  Similar to PVC, any future platforms
that don't support FlatCCS should not attempt to fall back to AuxCCS
programming.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 36058600e231b..9e23293ec4d3b 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -243,9 +243,11 @@ static bool has_aux_ccs(struct xe_device *xe)
 {
 	/*
 	 * PVC is a special case that has no compression of either type
-	 * (FlatCCS or AuxCCS).
+	 * (FlatCCS or AuxCCS).  Also, AuxCCS is no longer used from Xe2
+	 * onward, so any future platforms with no FlatCCS will not have
+	 * AuxCCS either.
 	 */
-	if (xe->info.platform == XE_PVC)
+	if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
 		return false;
 
 	return !xe->info.has_flat_ccs;
-- 
GitLab


From 2985bedc1c59441f4b0d4724a1c2211e0b6b4a19 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:11 -0700
Subject: [PATCH 1838/2340] drm/xe/xe2: Define Xe2_LPG IP features

Define a common set of Xe2 graphics feature flags and definitions that
will be used for all platforms in this family.

Several of the feature flags are inherited unchanged from Xe_HP and/or
Xe_HPC platforms:
 - dma_mask_size remains 46   (Bspec 70817)
 - supports_usm=1             (Bspec 59651)
 - has_flatccs=1              (Bspec 58797)
 - has_asid=1                 (Bspec 59654, 59265, 60288)
 - has_range_tlb_invalidate=1 (Bspec 71126)

However some of them still need proper implementation in the driver to
be used, so they are disabled.

Notable Xe2-specific changes:
 - All Xe2 platforms use a five-level page table, regardless of the
   virtual address space for the platform.  (Bspec 59505)

The graphics engine mask represents the Xe2 architecture engines (Bspec
60149), but individual platforms may have a reduced set of usable
engines, as reflected by their fusing.

Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 32adeda3520cb..46145340ae90b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -164,6 +164,24 @@ static const struct xe_graphics_desc graphics_xelpg = {
 	.has_flat_ccs = 0,
 };
 
+#define XE2_GFX_FEATURES \
+	.dma_mask_size = 46, \
+	.has_asid = 1, \
+	.has_flat_ccs = 0 /* FIXME: implementation missing */, \
+	.has_range_tlb_invalidation = 1, \
+	.supports_usm = 0 /* FIXME: implementation missing */, \
+	.vm_max_level = 4, \
+	.hw_engine_mask = \
+		BIT(XE_HW_ENGINE_RCS0) | \
+		BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \
+		GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
+
+static const struct xe_graphics_desc graphics_xe2 = {
+	.name = "Xe2_LPG",
+
+	XE2_GFX_FEATURES,
+};
+
 static const struct xe_media_desc media_xem = {
 	.name = "Xe_M",
 	.ver = 12,
@@ -296,6 +314,7 @@ static const struct xe_device_desc mtl_desc = {
 static struct gmdid_map graphics_ip_map[] = {
 	{ 1270, &graphics_xelpg },
 	{ 1271, &graphics_xelpg },
+	{ 2004, &graphics_xe2 },
 };
 
 /* Map of GMD_ID values to media IP */
-- 
GitLab


From 595e4a3aade359f8e3bc84bd30746cb5826c4e67 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:12 -0700
Subject: [PATCH 1839/2340] drm/xe/xe2: Define Xe2_LPM IP features

Xe2_LPM media is represented by GMD_ID value 20.00.
It provides 1 VD + 1 VE + 1 SFC.

Bspec: 70821, 70819
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 46145340ae90b..467b0cef6d21b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -209,6 +209,12 @@ static const struct xe_media_desc media_xelpmp = {
 		BIT(XE_HW_ENGINE_VECS0),	/* TODO: add GSC0 */
 };
 
+static const struct xe_media_desc media_xe2 = {
+	.name = "Xe2_LPM",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+};
+
 static const struct xe_device_desc tgl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
@@ -320,6 +326,7 @@ static struct gmdid_map graphics_ip_map[] = {
 /* Map of GMD_ID values to media IP */
 static struct gmdid_map media_ip_map[] = {
 	{ 1300, &media_xelpmp },
+	{ 2000, &media_xe2 },
 };
 
 #define INTEL_VGA_DEVICE(id, info) {			\
-- 
GitLab


From e9bb0891e69055cdfc1053f297b1b8b033372975 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:13 -0700
Subject: [PATCH 1840/2340] drm/xe/xe2: Track VA bits independently of max page
 table level

Starting with Xe2, a 5-level page table is always used, regardless of
the actual virtual address range supported by the platform.  The two
values need to be tracked separately in the device descriptor since Xe2
platforms only have a 48 bit virtual address range.

Bspec: 59505, 65637, 70817
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 6 ++++++
 drivers/gpu/drm/xe/xe_pci_types.h    | 1 +
 drivers/gpu/drm/xe/xe_query.c        | 3 +--
 drivers/gpu/drm/xe/xe_vm.c           | 2 +-
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 6e852809d3ac9..552e8a343d8f0 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -210,6 +210,8 @@ struct xe_device {
 		u8 gt_count;
 		/** @vm_max_level: Max VM level */
 		u8 vm_max_level;
+		/** @va_bits: Maximum bits of a virtual address */
+		u8 va_bits;
 
 		/** @is_dgfx: is discrete device */
 		u8 is_dgfx:1;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 467b0cef6d21b..8512cd4518877 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -94,6 +94,7 @@ static const struct xe_graphics_desc graphics_xelp = {
 	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
 
 	.dma_mask_size = 39,
+	.va_bits = 48,
 	.vm_max_level = 3,
 };
 
@@ -105,6 +106,7 @@ static const struct xe_graphics_desc graphics_xelpp = {
 	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
 
 	.dma_mask_size = 39,
+	.va_bits = 48,
 	.vm_max_level = 3,
 };
 
@@ -112,6 +114,7 @@ static const struct xe_graphics_desc graphics_xelpp = {
 	.has_range_tlb_invalidation = true, \
 	.has_flat_ccs = true, \
 	.dma_mask_size = 46, \
+	.va_bits = 48, \
 	.vm_max_level = 3
 
 static const struct xe_graphics_desc graphics_xehpg = {
@@ -145,6 +148,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
 	XE_HP_FEATURES,
 	.dma_mask_size = 52,
 	.max_remote_tiles = 1,
+	.va_bits = 57,
 	.vm_max_level = 4,
 	.vram_flags = XE_VRAM_FLAGS_NEED64K,
 
@@ -170,6 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 	.has_flat_ccs = 0 /* FIXME: implementation missing */, \
 	.has_range_tlb_invalidation = 1, \
 	.supports_usm = 0 /* FIXME: implementation missing */, \
+	.va_bits = 48, \
 	.vm_max_level = 4, \
 	.hw_engine_mask = \
 		BIT(XE_HW_ENGINE_RCS0) | \
@@ -560,6 +565,7 @@ static int xe_info_init(struct xe_device *xe,
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
 	xe->info.vram_flags = graphics_desc->vram_flags;
+	xe->info.va_bits = graphics_desc->va_bits;
 	xe->info.vm_max_level = graphics_desc->vm_max_level;
 	xe->info.supports_usm = graphics_desc->supports_usm;
 	xe->info.has_asid = graphics_desc->has_asid;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index ba31b933eb8e0..df6ddbc52d7f6 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -14,6 +14,7 @@ struct xe_graphics_desc {
 	u8 rel;
 
 	u8 dma_mask_size;	/* available DMA address bits */
+	u8 va_bits;
 	u8 vm_max_level;
 	u8 vram_flags;
 
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 7ea235c71385f..1db77a7c90398 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -197,8 +197,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
 	config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] =
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
-	config->info[XE_QUERY_CONFIG_VA_BITS] = 12 +
-		(9 * (xe->info.vm_max_level + 1));
+	config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
 	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count;
 	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
 		hweight_long(xe->info.mem_region_mask);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d683418b817db..a774f9632ddae 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1221,7 +1221,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	vm->xe = xe;
 
-	vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
+	vm->size = 1ull << xe->info.va_bits;
 
 	vm->flags = flags;
 
-- 
GitLab


From e4751ab5d2fef45d666e64a8766e08e9d60eccfd Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Fri, 11 Aug 2023 09:06:14 -0700
Subject: [PATCH 1841/2340] drm/xe/xe2: Add MOCS table

Additional minor change to remove L4_2_RESERVED, which will never be
required.

v2: Make L3/L4 names consistent for GLOB_MOCS defines (Matt Roper)

Bspec: 71582
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index c9653978fc9f5..c120090ef9b40 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -62,13 +62,15 @@ struct xe_mocs_info {
 #define L3_LKUP(value)		((value) << 7)
 
 /* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
-#define _L4_CACHEABILITY	REG_GENMASK(3, 2)
-#define IG_PAT			REG_BIT(8)
+#define IG_PAT				REG_BIT(8)
+#define L3_CACHE_POLICY_MASK		REG_GENMASK(5, 4)
+#define L4_CACHE_POLICY_MASK		REG_GENMASK(3, 2)
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
 #define MTL_NUM_MOCS_ENTRIES    16
+#define XE2_NUM_MOCS_ENTRIES	16
 
 /* (e)LLC caching options */
 /*
@@ -93,10 +95,14 @@ struct xe_mocs_info {
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
 /* L4 caching options */
-#define L4_0_WB                 REG_FIELD_PREP(_L4_CACHEABILITY, 0)
-#define L4_1_WT                 REG_FIELD_PREP(_L4_CACHEABILITY, 1)
-#define L4_2_RESERVED           REG_FIELD_PREP(_L4_CACHEABILITY, 2)
-#define L4_3_UC                 REG_FIELD_PREP(_L4_CACHEABILITY, 3)
+#define L4_0_WB                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0)
+#define L4_1_WT                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 1)
+#define L4_3_UC                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 3)
+
+#define XE2_L3_0_WB		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 0)
+/* XD: WB Transient Display */
+#define XE2_L3_1_XD		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1)
+#define XE2_L3_3_UC		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3)
 
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
@@ -370,6 +376,17 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = {
 		   L3_GLBGO(1) | L3_1_UC),
 };
 
+static const struct xe_mocs_entry xe2_mocs_table[] = {
+	/* Defer to PAT */
+	MOCS_ENTRY(0, XE2_L3_0_WB | L4_0_WB, 0),
+	/* Cached L3 + L4 */
+	MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
+	/* Uncached L3, Cached L4 */
+	MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
+	/* Uncached L3 + L4 */
+	MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0),
+};
+
 static unsigned int get_mocs_settings(struct xe_device *xe,
 				      struct xe_mocs_info *info)
 {
-- 
GitLab


From 0993b22f93f867b4ed1c1fc3f077fa7e736353d6 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:15 -0700
Subject: [PATCH 1842/2340] drm/xe/xe2: Program GuC's MOCS on Xe2 and beyond

As with PVC, Xe2 platforms require that the index of an uncached MOCS
entry be programmed into the GUC_SHIM_CONTROL register.  This will
likely be needed on future platforms as well.

Xe2 also extends the size of the MOCS index register field from two bits
to four bits.  Since these extra bits were unused on PVC, it should be
safe to just increase the size of the mask.

Bspec: 60592
Cc: Haridhar Kalvala <haridhar.kalvala@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_guc_regs.h | 2 +-
 drivers/gpu/drm/xe/xe_guc.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index fcb747201bc1c..ba375fc51a87a 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -45,7 +45,7 @@
 #define   GUC_WOPCM_SIZE_LOCKED			REG_BIT(0)
 
 #define GUC_SHIM_CONTROL			XE_REG(0xc064)
-#define   GUC_MOCS_INDEX_MASK			REG_GENMASK(25, 24)
+#define   GUC_MOCS_INDEX_MASK			REG_GENMASK(27, 24)
 #define   GUC_SHIM_WC_ENABLE			REG_BIT(21)
 #define   GUC_ENABLE_MIA_CLOCK_GATING		REG_BIT(15)
 #define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	REG_BIT(10)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 2493c58599488..e102637c06953 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -326,7 +326,7 @@ static void guc_prepare_xfer(struct xe_guc *guc)
 		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
 				GUC_ENABLE_MIA_CACHING;
 
-	if (xe->info.platform == XE_PVC)
+	if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
 		shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
 
 	/* Must program this register before loading the ucode with DMA */
-- 
GitLab


From 3330361543fca2a60b71ebf02cd5e56bb417b159 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:16 -0700
Subject: [PATCH 1843/2340] drm/xe/lnl: Add LNL platform definition

LNL is an integrated GPU based on the Xe2 architecture.

Bspec: 70821
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c            | 6 ++++++
 drivers/gpu/drm/xe/xe_platform_types.h | 1 +
 include/drm/xe_pciids.h                | 5 +++++
 3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 8512cd4518877..6c2c6723b1b26 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -319,6 +319,11 @@ static const struct xe_device_desc mtl_desc = {
 	PLATFORM(XE_METEORLAKE),
 };
 
+static const struct xe_device_desc lnl_desc = {
+	PLATFORM(XE_LUNARLAKE),
+	.require_force_probe = true,
+};
+
 #undef PLATFORM
 
 /* Map of GMD_ID values to graphics IP */
@@ -356,6 +361,7 @@ static const struct pci_device_id pciidlist[] = {
 	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
 	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
 	XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+	XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index abbb8a1f29a8e..e378a64a0f863 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -21,6 +21,7 @@ enum xe_platform {
 	XE_DG2,
 	XE_PVC,
 	XE_METEORLAKE,
+	XE_LUNARLAKE,
 };
 
 enum xe_subplatform {
diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index 0d0cf80eb0ba3..f6ac6d9772ee1 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -191,4 +191,9 @@
 	XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__),	\
 	XE_ARL_IDS(MACRO__, ## __VA_ARGS__)
 
+#define XE_LNL_IDS(MACRO__, ...) \
+	MACRO__(0x6420, ## __VA_ARGS__), \
+	MACRO__(0x64A0, ## __VA_ARGS__), \
+	MACRO__(0x64B0, ## __VA_ARGS__)
+
 #endif
-- 
GitLab


From 943c01b72f3e9332d7a52ecffa35ef7152e18c5c Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 11 Aug 2023 09:06:17 -0700
Subject: [PATCH 1844/2340] drm/xe/lnl: Add GuC firmware definition

Define the GuC firmware to load on the platform.

Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 4b04f6e5388d4..1802b280cd8c6 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -100,6 +100,7 @@ struct fw_blobs_by_type {
 };
 
 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
+	fw_def(LUNARLAKE,	mmp_ver(xe,	guc,	lnl,	70, 6, 8))	\
 	fw_def(METEORLAKE,	mmp_ver(i915,	guc,	mtl,	70, 6, 4))	\
 	fw_def(PVC,		mmp_ver(xe,	guc,	pvc,	70, 6, 4))	\
 	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
-- 
GitLab


From 770576f1e1c001ba069e552e08893d56a64015c4 Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Fri, 11 Aug 2023 09:06:18 -0700
Subject: [PATCH 1845/2340] drm/xe/lnl: Hook up MOCS table

LNL uses the Xe2 MOCS table introduced in an earlier patch.

Bspec: 71582
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index c120090ef9b40..75d025c54eb84 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -395,6 +395,14 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	memset(info, 0, sizeof(struct xe_mocs_info));
 
 	switch (xe->info.platform) {
+	case XE_LUNARLAKE:
+		info->size = ARRAY_SIZE(xe2_mocs_table);
+		info->table = xe2_mocs_table;
+		info->n_entries = XE2_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->wb_index = 1;
+		info->unused_entries_index = 1;
+		break;
 	case XE_PVC:
 		info->size = ARRAY_SIZE(pvc_mocs_desc);
 		info->table = pvc_mocs_desc;
-- 
GitLab


From 07431945d8ae805746bbd01b052eeefb919911db Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 22 Aug 2023 17:33:13 -0700
Subject: [PATCH 1846/2340] drm/xe: Avoid 64-bit register reads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Intel hardware officially only supports GTTMMADR register accesses of
32-bits or less (although 64-bit accesses to device memory and PTEs in
the GSM are fine).  Even though we do usually seem to get back
reasonable values when performing readq() operations on registers in
BAR0, we shouldn't rely on this violation of the spec working
consistently.  It's likely that even when we do get proper register
values back the hardware is internally satisfying the request via a
non-atomic sequence of two 32-bit reads, which can be problematic for
timestamps and counters if rollover of the lower bits is not considered.

Replace xe_mmio_read64() with xe_mmio_read64_2x32() that implements
64-bit register reads as two 32-bit reads and attempts to ensure that
the upper dword has stabilized to avoid problematic rollovers for
counter and timestamp registers.

v2:
 - Move function from xe_mmio.h to xe_mmio.c.  (Lucas)
 - Convert comment to kerneldoc and note that it shouldn't be used on
   registers where reads may trigger side effects.  (Lucas)

Bspec: 60027
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://lore.kernel.org/r/20230823003312.1356779-3-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c           | 56 ++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_mmio.h           | 12 +-----
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |  6 +--
 3 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index bb6823db14d49..c2ec52eefb2e4 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -228,7 +228,7 @@ int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size,
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
 		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
 	} else {
-		offset = xe_mmio_read64(gt, GSMBASE);
+		offset = xe_mmio_read64_2x32(gt, GSMBASE);
 	}
 
 	/* remove the tile offset so we have just the available size */
@@ -326,7 +326,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	if (xe->info.tile_count == 1)
 		return;
 
-	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR);
+	mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
 	adj_tile_count = xe->info.tile_count =
 		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 
@@ -509,7 +509,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 			args->value = xe_mmio_read32(gt, reg);
 			break;
 		case DRM_XE_MMIO_64BIT:
-			args->value = xe_mmio_read64(gt, reg);
+			args->value = xe_mmio_read64_2x32(gt, reg);
 			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
@@ -526,3 +526,53 @@ exit:
 
 	return ret;
 }
+
+/**
+ * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ *
+ * Although Intel GPUs have some 64-bit registers, the hardware officially
+ * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
+ * a readq operation may return a reasonable value, that violation of the
+ * spec shouldn't be relied upon and all 64-bit register reads should be
+ * performed as two 32-bit reads of the upper and lower dwords.
+ *
+ * When reading registers that may be changing (such as
+ * counters), a rollover of the lower dword between the two 32-bit reads
+ * can be problematic.  This function attempts to ensure the upper dword has
+ * stabilized before returning the 64-bit value.
+ *
+ * Note that because this function may re-read the register multiple times
+ * while waiting for the value to stabilize it should not be used to read
+ * any registers where read operations have side effects.
+ *
+ * Returns the value of the 64-bit register.
+ */
+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
+	u32 ldw, udw, oldudw, retries;
+
+	if (reg.addr < gt->mmio.adj_limit) {
+		reg.addr += gt->mmio.adj_offset;
+		reg_udw.addr += gt->mmio.adj_offset;
+	}
+
+	oldudw = xe_mmio_read32(gt, reg_udw);
+	for (retries = 5; retries; --retries) {
+		ldw = xe_mmio_read32(gt, reg);
+		udw = xe_mmio_read32(gt, reg_udw);
+
+		if (udw == oldudw)
+			break;
+
+		oldudw = udw;
+	}
+
+	xe_gt_WARN(gt, retries == 0,
+		   "64-bit read of %#x did not stabilize\n", reg.addr);
+
+	return (u64)udw << 32 | ldw;
+}
+
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index d24badca86771..f72c34c7d1d08 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -11,6 +11,7 @@
 
 #include "regs/xe_reg_defs.h"
 #include "xe_device_types.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_types.h"
 
 struct drm_device;
@@ -85,16 +86,6 @@ static inline void xe_mmio_write64(struct xe_gt *gt,
 	writeq(val, tile->mmio.regs + reg.addr);
 }
 
-static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
-{
-	struct xe_tile *tile = gt_to_tile(gt);
-
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readq(tile->mmio.regs + reg.addr);
-}
-
 static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 					     struct xe_reg reg, u32 val,
 					     u32 mask, u32 eval)
@@ -155,5 +146,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
 
 int xe_mmio_probe_vram(struct xe_device *xe);
 int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base);
+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index be0a25e239291..6ba6b1b7f34b6 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -67,7 +67,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	}
 
 	/* Use DSM base address instead for stolen memory */
-	mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
+	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
 	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
 		return 0;
 
@@ -126,8 +126,8 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 
 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
 	wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK,
-				     xe_mmio_read64(xe_root_mmio_gt(xe),
-						    STOLEN_RESERVED));
+				     xe_mmio_read64_2x32(xe_root_mmio_gt(xe),
+							 STOLEN_RESERVED));
 	stolen_size -= (1U << wopcm_size) * SZ_1M;
 
 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
-- 
GitLab


From 486b2ef2768222bb4210709ccf5443c3e381346e Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 22 Aug 2023 17:33:14 -0700
Subject: [PATCH 1847/2340] drm/xe: Drop xe_mmio_write64()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only possible 64-bit register writes in the driver come from the
highly questionable MMIO ioctl.  That ioctl's register write support
only operates for userspace running as root and cannot be used by any
real userspace; it exists solely to support the "xe_reg" debug tool in
IGT.  Since the spec indicates that hardware does not officially support
64-bit register accesses, there's no reason to allow such 64-bit writes,
even for debugging.

Bspec: 60027
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://lore.kernel.org/r/20230823003312.1356779-4-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c |  3 ---
 drivers/gpu/drm/xe/xe_mmio.h | 11 -----------
 2 files changed, 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index c2ec52eefb2e4..3ccc0af4430be 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -490,9 +490,6 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 			}
 			xe_mmio_write32(gt, reg, args->value);
 			break;
-		case DRM_XE_MMIO_64BIT:
-			xe_mmio_write64(gt, reg, args->value);
-			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
 			fallthrough;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index f72c34c7d1d08..cd9fe08ccf4ad 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -75,17 +75,6 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
 	return old;
 }
 
-static inline void xe_mmio_write64(struct xe_gt *gt,
-				   struct xe_reg reg, u64 val)
-{
-	struct xe_tile *tile = gt_to_tile(gt);
-
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	writeq(val, tile->mmio.regs + reg.addr);
-}
-
 static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 					     struct xe_reg reg, u32 val,
 					     u32 mask, u32 eval)
-- 
GitLab


From 1c66c0f391da32534cf143e6a0f6391776aa9bf8 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 22 Aug 2023 10:33:32 -0700
Subject: [PATCH 1848/2340] drm/xe: fix submissions without vm

Kernel queues can submit privileged batches directly in GGTT, so they
don't always need a vm. The submission front-end already supports
creating and submitting jobs without a vm, but some parts of the
back-end assume the vm is always there. Fix this by handling a lack of
vm in the back-end as well.

v2: s/XE_BUG_ON/XE_WARN_ON, s/engine/exec_queue

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230822173334.1664332-2-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 2 +-
 drivers/gpu/drm/xe/xe_ring_ops.c   | 8 ++++----
 drivers/gpu/drm/xe/xe_sched_job.c  | 3 +++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 55c7b13d15ecc..87f2972b7c208 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1136,7 +1136,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	ge->q = q;
 	init_waitqueue_head(&ge->suspend_wait);
 
-	timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT :
+	timeout = (q->vm && xe_vm_no_dma_fences(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
 		  q->hwe->eclass->sched_props.job_timeout_ms;
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
 			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 9e23293ec4d3b..2b4127ea1eabf 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -213,7 +213,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 	u32 ppgtt_flag = get_ppgtt_flag(job);
 	struct xe_vm *vm = job->q->vm;
 
-	if (vm->batch_invalidate_tlb) {
+	if (vm && vm->batch_invalidate_tlb) {
 		dw[i++] = preparser_disable(true);
 		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
 					seqno, true, dw, i);
@@ -273,13 +273,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
 	}
 
-	if (vm->batch_invalidate_tlb)
+	if (vm && vm->batch_invalidate_tlb)
 		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
 					seqno, true, dw, i);
 
 	dw[i++] = preparser_disable(false);
 
-	if (!vm->batch_invalidate_tlb)
+	if (!vm || !vm->batch_invalidate_tlb)
 		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
 					seqno, dw, i);
 
@@ -318,7 +318,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
 
 	/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
-	i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i);
+	i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
 
 	/* hsdes: 1809175790 */
 	if (has_aux_ccs(xe))
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index de2851d24c968..0479d059dc77a 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -87,6 +87,9 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 	int i, j;
 	u32 width;
 
+	/* only a kernel context can submit a vm-less job */
+	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
+
 	/* Migration and kernel engines have their own locking */
 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM |
 			  EXEC_QUEUE_FLAG_WA))) {
-- 
GitLab


From 923e42381745f55ba27a8805a055b51139af6830 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 22 Aug 2023 10:33:33 -0700
Subject: [PATCH 1849/2340] drm/xe: split kernel vs permanent engine flags

If an engine is only destroyed on driver unload, we can skip its
clean-up steps with the GuC because the GuC is going to be tuned off as
well, so it doesn't matter if we're in sync with it or not. Currently,
we apply this optimization to all engines marked as kernel, but this
stops us to supporting kernel engines that don't stick around until
unload. To remove this limitation, add a separate flag to indicate if
the engine is expected to only be destryed on driver unload and use that
to trigger the optimzation.

While at it, add a small comment to explain what each engine flag
represents.

v2: s/XE_BUG_ON/XE_WARN_ON, s/ENGINE/EXEC_QUEUE
v3: rebased

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230822173334.1664332-3-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c       |  3 +++
 drivers/gpu/drm/xe/xe_exec_queue_types.h | 24 ++++++++++++++++--------
 drivers/gpu/drm/xe/xe_guc_submit.c       |  6 +++---
 drivers/gpu/drm/xe/xe_migrate.c          |  7 +++++--
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index f6619688f92fd..867465b0c57ba 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -41,6 +41,9 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	int err;
 	int i;
 
+	/* only kernel queues can be permanent */
+	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
+
 	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
 	if (!q)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 4506289b8b7bd..1f0051a91dae0 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -65,14 +65,22 @@ struct xe_exec_queue {
 	/** @fence_irq: fence IRQ used to signal job completion */
 	struct xe_hw_fence_irq *fence_irq;
 
-#define EXEC_QUEUE_FLAG_BANNED		BIT(0)
-#define EXEC_QUEUE_FLAG_KERNEL		BIT(1)
-#define EXEC_QUEUE_FLAG_PERSISTENT		BIT(2)
-#define EXEC_QUEUE_FLAG_COMPUTE_MODE	BIT(3)
-/* Caller needs to hold rpm ref when creating engine with EXEC_QUEUE_FLAG_VM */
-#define EXEC_QUEUE_FLAG_VM			BIT(4)
-#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
-#define EXEC_QUEUE_FLAG_WA			BIT(6)
+/* queue no longer allowed to submit */
+#define EXEC_QUEUE_FLAG_BANNED			BIT(0)
+/* queue used for kernel submission only */
+#define EXEC_QUEUE_FLAG_KERNEL			BIT(1)
+/* kernel engine only destroyed at driver unload */
+#define EXEC_QUEUE_FLAG_PERMANENT		BIT(2)
+/* queue keeps running pending jobs after destroy ioctl */
+#define EXEC_QUEUE_FLAG_PERSISTENT		BIT(3)
+/* queue for use with compute VMs */
+#define EXEC_QUEUE_FLAG_COMPUTE_MODE		BIT(4)
+/* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
+#define EXEC_QUEUE_FLAG_VM			BIT(5)
+/* child of VM queue for multi-tile VM jobs */
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(6)
+/* queue used for WA setup */
+#define EXEC_QUEUE_FLAG_WA			BIT(7)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 87f2972b7c208..832e79fb0a025 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -965,7 +965,7 @@ static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
 	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
 
 	/* We must block on kernel engines so slabs are empty on driver unload */
-	if (q->flags & EXEC_QUEUE_FLAG_KERNEL)
+	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
 		__guc_exec_queue_fini_async(&q->guc->fini_async);
 	else
 		queue_work(system_wq, &q->guc->fini_async);
@@ -988,7 +988,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
 	struct xe_exec_queue *q = msg->private_data;
 	struct xe_guc *guc = exec_queue_to_guc(q);
 
-	XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL);
+	XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_PERMANENT);
 	trace_xe_exec_queue_cleanup_entity(q);
 
 	if (exec_queue_registered(q))
@@ -1208,7 +1208,7 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q)
 {
 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
 
-	if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL))
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
 		guc_exec_queue_add_msg(q, msg, CLEANUP);
 	else
 		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 06e85f7162d41..6e0d4e2c497ab 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -343,11 +343,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 
 		m->q = xe_exec_queue_create(xe, vm,
 					    BIT(hwe->logical_instance), 1,
-					    hwe, EXEC_QUEUE_FLAG_KERNEL);
+					    hwe,
+					    EXEC_QUEUE_FLAG_KERNEL |
+					    EXEC_QUEUE_FLAG_PERMANENT);
 	} else {
 		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
 						  XE_ENGINE_CLASS_COPY,
-						  EXEC_QUEUE_FLAG_KERNEL);
+						  EXEC_QUEUE_FLAG_KERNEL |
+						  EXEC_QUEUE_FLAG_PERMANENT);
 	}
 	if (IS_ERR(m->q)) {
 		xe_vm_close_and_put(vm);
-- 
GitLab


From 9e9526352d6f7f94a4348cebce9859dfebed1dea Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 22 Aug 2023 10:33:34 -0700
Subject: [PATCH 1850/2340] drm/xe: standardize vm-less kernel submissions

The current only submission in the driver that doesn't use a vm is the
WA setup. We still pass a vm structure (the migration one), but we don't
actually use it at submission time and we instead have an hack to use
GGTT for this particular engine.
Instead of special-casing the WA engine, we can skip providing a VM and
use that as selector for whether to use GGTT or PPGTT. As part of this
change, we can drop the special engine flag for the WA engine and switch
the WA submission to use the standard job functions instead of dedicated
ones.

v2: rebased on s/engine/exec_queue

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230822173334.1664332-4-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c               | 10 ----------
 drivers/gpu/drm/xe/xe_bb.h               |  2 --
 drivers/gpu/drm/xe/xe_exec_queue.c       |  4 ++--
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  2 --
 drivers/gpu/drm/xe/xe_gt.c               | 23 +++++++----------------
 drivers/gpu/drm/xe/xe_ring_ops.c         |  2 +-
 drivers/gpu/drm/xe/xe_sched_job.c        |  6 ++----
 7 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 38f4ce83a207e..1fbc2fcddc96b 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -73,16 +73,6 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
 	return xe_sched_job_create(q, addr);
 }
 
-struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q,
-					 struct xe_bb *bb, u64 batch_base_ofs)
-{
-	u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
-
-	XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION));
-
-	return __xe_bb_create_job(q, bb, &addr);
-}
-
 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
 						struct xe_bb *bb,
 						u64 batch_base_ofs,
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index c5ae0770bab5c..fafacd73dcc38 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -20,8 +20,6 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
 						struct xe_bb *bb, u64 batch_ofs,
 						u32 second_idx);
-struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q,
-					 struct xe_bb *bb, u64 batch_ofs);
 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 867465b0c57ba..f28bceceb99ae 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -95,7 +95,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	 * can perform GuC CT actions when needed. Caller is expected to
 	 * have already grabbed the rpm ref outside any sensitive locks.
 	 */
-	if (q->flags & EXEC_QUEUE_FLAG_VM)
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM))
 		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
 
 	return q;
@@ -174,7 +174,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 		xe_lrc_finish(q->lrc + i);
 	if (q->vm)
 		xe_vm_put(q->vm);
-	if (q->flags & EXEC_QUEUE_FLAG_VM)
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM))
 		xe_device_mem_access_put(gt_to_xe(q->gt));
 
 	kfree(q);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 1f0051a91dae0..4f4190971dcfc 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -79,8 +79,6 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_VM			BIT(5)
 /* child of VM queue for multi-tile VM jobs */
 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(6)
-/* queue used for WA setup */
-#define EXEC_QUEUE_FLAG_WA			BIT(7)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 13320af4ddd3e..3d6a7c11bac19 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -87,15 +87,13 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
 	struct xe_sched_job *job;
 	struct xe_bb *bb;
 	struct dma_fence *fence;
-	u64 batch_ofs;
 	long timeout;
 
 	bb = xe_bb_new(gt, 4, false);
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
-	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
-	job = xe_bb_create_wa_job(q, bb, batch_ofs);
+	job = xe_bb_create_job(q, bb);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
 		return PTR_ERR(job);
@@ -124,7 +122,6 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 	struct xe_sched_job *job;
 	struct xe_bb *bb;
 	struct dma_fence *fence;
-	u64 batch_ofs;
 	long timeout;
 	int count = 0;
 
@@ -143,8 +140,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 		}
 	}
 
-	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
-	job = xe_bb_create_wa_job(q, bb, batch_ofs);
+	job = xe_bb_create_job(q, bb);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
 		return PTR_ERR(job);
@@ -168,14 +164,12 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 int xe_gt_record_default_lrcs(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	int err = 0;
 
 	for_each_hw_engine(hwe, gt, id) {
 		struct xe_exec_queue *q, *nop_q;
-		struct xe_vm *vm;
 		void *default_lrc;
 
 		if (gt->default_lrc[hwe->class])
@@ -192,14 +186,13 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		if (!default_lrc)
 			return -ENOMEM;
 
-		vm = xe_migrate_get_vm(tile->migrate);
-		q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1,
-					 hwe, EXEC_QUEUE_FLAG_WA);
+		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
+					 hwe, EXEC_QUEUE_FLAG_KERNEL);
 		if (IS_ERR(q)) {
 			err = PTR_ERR(q);
 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
 				  hwe->name, q);
-			goto put_vm;
+			return err;
 		}
 
 		/* Prime golden LRC with known good state */
@@ -210,8 +203,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 			goto put_exec_queue;
 		}
 
-		nop_q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance),
-					     1, hwe, EXEC_QUEUE_FLAG_WA);
+		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
+					     1, hwe, EXEC_QUEUE_FLAG_KERNEL);
 		if (IS_ERR(nop_q)) {
 			err = PTR_ERR(nop_q);
 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
@@ -245,8 +238,6 @@ put_nop_q:
 		xe_exec_queue_put(nop_q);
 put_exec_queue:
 		xe_exec_queue_put(q);
-put_vm:
-		xe_vm_put(vm);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2b4127ea1eabf..2238a40b7e8e6 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -202,7 +202,7 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
 
 static u32 get_ppgtt_flag(struct xe_sched_job *job)
 {
-	return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0;
+	return job->q->vm ? BIT(8) : 0;
 }
 
 /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 0479d059dc77a..b02183147e8ea 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -59,8 +59,7 @@ static struct xe_sched_job *job_alloc(bool parallel)
 
 bool xe_sched_job_is_migration(struct xe_exec_queue *q)
 {
-	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION) &&
-		!(q->flags & EXEC_QUEUE_FLAG_WA);
+	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
 }
 
 static void job_free(struct xe_sched_job *job)
@@ -91,8 +90,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
 
 	/* Migration and kernel engines have their own locking */
-	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM |
-			  EXEC_QUEUE_FLAG_WA))) {
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
 		lockdep_assert_held(&q->vm->lock);
 		if (!xe_vm_no_dma_fences(q->vm))
 			xe_vm_assert_held(q->vm);
-- 
GitLab


From 429d56a6b12c4a00d22dcc8a1ac0394906c92b67 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 23 Aug 2023 18:55:52 +0100
Subject: [PATCH 1851/2340] drm/xe/ct: fix resv_space print

Actually print the info.resv_space.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 59136b6a7c6f4..b92e04ba8f63c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1180,7 +1180,7 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
 	u32 head, tail;
 
 	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
-	drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
+	drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
 	drm_printf(p, "\thead: %d\n", snapshot->info.head);
 	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
 	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
-- 
GitLab


From 25063811d9c1f32c3223c27cafc0a95e7a86be26 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Thu, 17 Aug 2023 09:20:44 +0000
Subject: [PATCH 1852/2340] drm/xe/pvc: Blacklist BCS_SWCTRL register

Wa_16017236439 requires the BCS_SWCTRL to be privileged.

v2: Define and use BCS_SWCTRL()

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 ++
 drivers/gpu/drm/xe/xe_reg_whitelist.c    | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index d57fd855086ac..1a366d8070f39 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -63,6 +63,8 @@
 #define RING_BBADDR(base)			XE_REG((base) + 0x140)
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
 
+#define BCS_SWCTRL(base)			XE_REG((base) + 0x200, XE_REG_OPTION_MASKED)
+
 /* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */
 #define BLIT_CCTL(base)				XE_REG((base) + 0x204)
 #define   BLIT_CCTL_DST_MOCS_MASK		REG_GENMASK(14, 9)
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index e83781f9a516b..e66ae1bdaf9c0 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -50,6 +50,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
 				   RING_FORCE_TO_NONPRIV_DENY |
 				   RING_FORCE_TO_NONPRIV_RANGE_64))
 	},
+	{ XE_RTP_NAME("16017236439"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY)),
+	  XE_RTP_ACTIONS(WHITELIST(BCS_SWCTRL(0),
+				   RING_FORCE_TO_NONPRIV_DENY,
+				   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
 	{}
 };
 
-- 
GitLab


From 7407f2e5c356a73ec4a6d7f379e91f205025165c Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 18 Jul 2023 10:45:28 +0000
Subject: [PATCH 1853/2340] drm/xe/pvc: Force even num engines to use 64B

Wa_16017236439 requires that we update BCS_SWCTRL
(via indirect context batch buffer) to set 64B
transfers when running on an even-numbered BCS
engine and 256B on an odd-numbered BCS engine.

v2: Move WA from engine_was[] to lrc_was[]

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  1 +
 drivers/gpu/drm/xe/xe_wa.c               | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 1a366d8070f39..692213d09ceaa 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -64,6 +64,7 @@
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
 
 #define BCS_SWCTRL(base)			XE_REG((base) + 0x200, XE_REG_OPTION_MASKED)
+#define   BCS_SWCTRL_DISABLE_256B		REG_BIT(2)
 
 /* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */
 #define BLIT_CCTL(base)				XE_REG((base) + 0x204)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index e2b6e17d7ec41..f45e9452ba0ee 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -604,6 +604,16 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
 
+	/* PVC */
+
+	{ XE_RTP_NAME("16017236439"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY),
+		       FUNC(xe_rtp_match_even_instance)),
+	  XE_RTP_ACTIONS(SET(BCS_SWCTRL(0),
+			     BCS_SWCTRL_DISABLE_256B,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE))),
+	},
+
 	/* Xe_LPG */
 	{ XE_RTP_NAME("18019271663"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
-- 
GitLab


From a043fbab7af54c64017269dc96f43f441ed4bcaf Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Wed, 16 Aug 2023 22:14:10 -0700
Subject: [PATCH 1854/2340] drm/xe/pvc: Use fast copy engines as migrate engine
 on PVC

Some copy hardware engine instances are faster than others on PVC.
Use a virtual engine of these plus the reserved instance for the migrate
engine on PVC. The idea being if a fast instance is available it will be
used and the throughput of kernel copies, clears, and pagefault
servicing will be higher.

v2: Use OOB WA, use all copy engines if no WA is required

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile        |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c    | 36 ++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_wa_oob.rules |  1 +
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b470c2394476f..be93745e8a30c 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 6e0d4e2c497ab..799ad02092795 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -12,6 +12,7 @@
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
 
+#include "generated/xe_wa_oob.h"
 #include "regs/xe_gpu_commands.h"
 #include "tests/xe_test.h"
 #include "xe_bb.h"
@@ -29,6 +30,7 @@
 #include "xe_sync.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
+#include "xe_wa.h"
 
 /**
  * struct xe_migrate - migrate context.
@@ -298,6 +300,32 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	return 0;
 }
 
+/*
+ * Due to workaround 16017236439, odd instance hardware copy engines are
+ * faster than even instance ones.
+ * This function returns the mask involving all fast copy engines and the
+ * reserved copy engine to be used as logical mask for migrate engine.
+ * Including the reserved copy engine is required to avoid deadlocks due to
+ * migrate jobs servicing the faults gets stuck behind the job that faulted.
+ */
+static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
+{
+	u32 logical_mask = 0;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (hwe->class != XE_ENGINE_CLASS_COPY)
+			continue;
+
+		if (!XE_WA(gt, 16017236439) ||
+		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
 /**
  * xe_migrate_init() - Initialize a migrate context
  * @tile: Back-pointer to the tile we're initializing for.
@@ -338,12 +366,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 							   XE_ENGINE_CLASS_COPY,
 							   primary_gt->usm.reserved_bcs_instance,
 							   false);
-		if (!hwe)
+		u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
+
+		if (!hwe || !logical_mask)
 			return ERR_PTR(-EINVAL);
 
-		m->q = xe_exec_queue_create(xe, vm,
-					    BIT(hwe->logical_instance), 1,
-					    hwe,
+		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
 					    EXEC_QUEUE_FLAG_PERMANENT);
 	} else {
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index ea90dcc933b59..599e67169dae6 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -17,3 +17,4 @@
 1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
 14016763929	SUBPLATFORM(DG2, G10)
 		SUBPLATFORM(DG2, G12)
+16017236439	PLATFORM(PVC)
-- 
GitLab


From 14ec22408d2fa1d8671b619474381344b2bc859a Mon Sep 17 00:00:00 2001
From: Zhanjun Dong <zhanjun.dong@intel.com>
Date: Thu, 17 Aug 2023 14:30:28 -0700
Subject: [PATCH 1855/2340] drm/xe: Add patch version on guc firmware init

Add patch version info on GuC firmware init. This is required info for
GuC log decoder.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://lore.kernel.org/r/20230817213028.838531-1-zhanjun.dong@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c       | 13 +++++++------
 drivers/gpu/drm/xe/xe_uc_fw_types.h |  2 ++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 1802b280cd8c6..37ad238148b0c 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -400,11 +400,12 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 					   css->sw_version);
 	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
 					   css->sw_version);
+	uc_fw->patch_ver_found = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
+					   css->sw_version);
 
-	drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n",
-		 xe_uc_fw_type_repr(uc_fw->type),
-		 uc_fw->major_ver_found, uc_fw->minor_ver_found,
-		 uc_fw->path);
+	drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u\n",
+		 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+		 uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found);
 
 	err = uc_fw_check_version_requirements(uc_fw);
 	if (err)
@@ -531,9 +532,9 @@ void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
 		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
 	drm_printf(p, "\tstatus: %s\n",
 		   xe_uc_fw_status_repr(uc_fw->status));
-	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
+	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u.%u\n",
 		   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
-		   uc_fw->major_ver_found, uc_fw->minor_ver_found);
+		   uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found);
 	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
 	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 837f49a2347ef..444bff83cdbe5 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -106,6 +106,8 @@ struct xe_uc_fw {
 	u16 major_ver_found;
 	/** @minor_ver_found: major version found in firmware blob */
 	u16 minor_ver_found;
+	/** @patch_ver_found: patch version found in firmware blob */
+	u16 patch_ver_found;
 
 	/** @rsa_size: RSA size */
 	u32 rsa_size;
-- 
GitLab


From 9c0d779fc67bd1810f74c22e219f4af24a4e1e29 Mon Sep 17 00:00:00 2001
From: Pallavi Mishra <pallavi.mishra@intel.com>
Date: Thu, 31 Aug 2023 04:55:58 +0530
Subject: [PATCH 1856/2340] drm/xe: Prevent return with locked vm

Reorder vm_id check after the one for VISIBLE_VRAM. This should
prevent returning with locked vm in error scenario.

Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 80c5d1a7d41a2..3cfd3f37c81e3 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1795,17 +1795,6 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
 		return -EINVAL;
 
-	if (args->vm_id) {
-		vm = xe_vm_lookup(xef, args->vm_id);
-		if (XE_IOCTL_DBG(xe, !vm))
-			return -ENOENT;
-		err = xe_vm_lock(vm, &ww, 0, true);
-		if (err) {
-			xe_vm_put(vm);
-			return err;
-		}
-	}
-
 	if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING)
 		bo_flags |= XE_BO_DEFER_BACKING;
 
@@ -1821,6 +1810,17 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
 	}
 
+	if (args->vm_id) {
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_DBG(xe, !vm))
+			return -ENOENT;
+		err = xe_vm_lock(vm, &ww, 0, true);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+	}
+
 	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
 			  bo_flags);
 	if (IS_ERR(bo)) {
-- 
GitLab


From 1da0702c1701c2e1441d86facd9fbb5e73fa374b Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 24 Aug 2023 17:04:45 +0100
Subject: [PATCH 1857/2340] drm/xe: nuke GuC on unload

On PVC unloading followed by reloading the module often results in a
completely dead machine (seems to be plaguing CI). Resetting the GuC
like we do at load seems to cure it at least when locally testing this.

v2:
  - Move pc_fini into guc_fini. We want to do the GuC reset just after
    calling pc_fini, otherwise we encounter communication failures. It
    also seems like a good idea to do the reset before we start releasing
    the various other GuC resources. In the case of pc_fini there is an
    explicit stop, but for other stuff like logs, ads, ctb there is not.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/542
References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/597
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c    | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_guc_pc.c |  8 +-------
 drivers/gpu/drm/xe/xe_guc_pc.h |  1 +
 drivers/gpu/drm/xe/xe_uc.c     |  5 +++++
 drivers/gpu/drm/xe/xe_uc.h     |  1 +
 5 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e102637c06953..5d32bcee28b6e 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -5,6 +5,8 @@
 
 #include "xe_guc.h"
 
+#include <drm/drm_managed.h>
+
 #include "generated/xe_wa_oob.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
@@ -20,6 +22,7 @@
 #include "xe_guc_submit.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_uc.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
 #include "xe_wopcm.h"
@@ -217,6 +220,16 @@ static void guc_write_params(struct xe_guc *guc)
 		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
 }
 
+static void guc_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+	xe_guc_pc_fini(&guc->pc);
+	xe_uc_fini_hw(&guc_to_gt(guc)->uc);
+	xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+}
+
 int xe_guc_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
@@ -244,6 +257,10 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
+	ret = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, guc_fini, guc);
+	if (ret)
+		goto out;
+
 	guc_init_params(guc);
 
 	if (xe_gt_is_media_type(gt))
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index c03bb58e70497..87de1ce40e07c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -884,10 +884,8 @@ out:
 	return ret;
 }
 
-static void pc_fini(struct drm_device *drm, void *arg)
+void xe_guc_pc_fini(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = arg;
-
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
 	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
@@ -925,9 +923,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 	if (err)
 		return err;
 
-	err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc);
-	if (err)
-		return err;
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 81833a53b3c92..43ea582545b57 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -9,6 +9,7 @@
 #include "xe_guc_pc_types.h"
 
 int xe_guc_pc_init(struct xe_guc_pc *pc);
+void xe_guc_pc_fini(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index addd6f2681b94..9c8ce504f4da6 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -167,6 +167,11 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	return 0;
 }
 
+int xe_uc_fini_hw(struct xe_uc *uc)
+{
+	return xe_uc_sanitize_reset(uc);
+}
+
 int xe_uc_reset_prepare(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 42219b361df55..4109ae7028af5 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -12,6 +12,7 @@ int xe_uc_init(struct xe_uc *uc);
 int xe_uc_init_hwconfig(struct xe_uc *uc);
 int xe_uc_init_post_hwconfig(struct xe_uc *uc);
 int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_fini_hw(struct xe_uc *uc);
 void xe_uc_gucrc_disable(struct xe_uc *uc);
 int xe_uc_reset_prepare(struct xe_uc *uc);
 void xe_uc_stop_prepare(struct xe_uc *uc);
-- 
GitLab


From 9a4566d5e0ae9dd38ef20fab00990e6958c421b4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 29 Aug 2023 17:28:43 +0100
Subject: [PATCH 1858/2340] drm/xe: fix has_llc on rkl

Matches i915. Assumption going forward is that non-llc + igpu is only a
thing on MTL+ which should have explicit coherency pat_index settings
for COH_NONE, 1WAY and 2WAY.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6c2c6723b1b26..08c1edc74d962 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -232,6 +232,7 @@ static const struct xe_device_desc rkl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ROCKETLAKE),
+	.has_llc = true,
 	.require_force_probe = true,
 };
 
-- 
GitLab


From 35dfb48462d92ce5514f883c461857ca55bdb499 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 31 Aug 2023 07:54:21 -0700
Subject: [PATCH 1859/2340] drm/xe: Convert xe_vma_op_flags to BIT macros

Rather than open code the shift for values, use BIT macros.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index f8675c3da3b14..40ce8953bacbb 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -370,11 +370,11 @@ struct xe_vma_op_prefetch {
 /** enum xe_vma_op_flags - flags for VMA operation */
 enum xe_vma_op_flags {
 	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
-	XE_VMA_OP_FIRST		= (0x1 << 0),
+	XE_VMA_OP_FIRST		= BIT(0),
 	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
-	XE_VMA_OP_LAST		= (0x1 << 1),
+	XE_VMA_OP_LAST		= BIT(1),
 	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
-	XE_VMA_OP_COMMITTED	= (0x1 << 2),
+	XE_VMA_OP_COMMITTED	= BIT(2),
 };
 
 /** struct xe_vma_op - VMA operation */
-- 
GitLab


From 5ef091fc32a4fe7116a4ecc778369f161de9c11a Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Sun, 13 Aug 2023 20:19:20 -0700
Subject: [PATCH 1860/2340] drm/xe: Fixup unwind on VM ops errors

Remap ops have 3 parts: unmap, prev, and next. The commit step can fail
on any of these. Add a flag for each to these so the unwind is only done
the steps that have been committed.

v2: (Rodrigo) Use bit macros

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 24 +++++++++++++++++-------
 drivers/gpu/drm/xe/xe_vm_types.h | 10 +++++++---
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a774f9632ddae..71f61806df77b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2622,18 +2622,25 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
 		err |= xe_vm_insert_vma(vm, op->map.vma);
+		if (!err)
+			op->flags |= XE_VMA_OP_COMMITTED;
 		break;
 	case DRM_GPUVA_OP_REMAP:
 		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
 				 true);
+		op->flags |= XE_VMA_OP_COMMITTED;
 
 		if (op->remap.prev) {
 			err |= xe_vm_insert_vma(vm, op->remap.prev);
+			if (!err)
+				op->flags |= XE_VMA_OP_PREV_COMMITTED;
 			if (!err && op->remap.skip_prev)
 				op->remap.prev = NULL;
 		}
 		if (op->remap.next) {
 			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err)
+				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
 			if (!err && op->remap.skip_next)
 				op->remap.next = NULL;
 		}
@@ -2646,15 +2653,15 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 		break;
 	case DRM_GPUVA_OP_UNMAP:
 		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		op->flags |= XE_VMA_OP_COMMITTED;
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		/* Nothing to do */
+		op->flags |= XE_VMA_OP_COMMITTED;
 		break;
 	default:
 		XE_WARN_ON("NOT POSSIBLE");
 	}
 
-	op->flags |= XE_VMA_OP_COMMITTED;
 	return err;
 }
 
@@ -2859,7 +2866,8 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
 }
 
 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
-			     bool post_commit)
+			     bool post_commit, bool prev_post_commit,
+			     bool next_post_commit)
 {
 	lockdep_assert_held_write(&vm->lock);
 
@@ -2886,11 +2894,11 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
 
 		if (op->remap.prev) {
-			prep_vma_destroy(vm, op->remap.prev, post_commit);
+			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
 			xe_vma_destroy_unlocked(op->remap.prev);
 		}
 		if (op->remap.next) {
-			prep_vma_destroy(vm, op->remap.next, post_commit);
+			prep_vma_destroy(vm, op->remap.next, next_post_commit);
 			xe_vma_destroy_unlocked(op->remap.next);
 		}
 		down_read(&vm->userptr.notifier_lock);
@@ -3029,7 +3037,9 @@ static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
 
 unwind:
 	list_for_each_entry_reverse(op, ops_list, link)
-		xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED);
+		xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED,
+				 op->flags & XE_VMA_OP_PREV_COMMITTED,
+				 op->flags & XE_VMA_OP_NEXT_COMMITTED);
 	list_for_each_entry_safe(op, next, ops_list, link)
 		xe_vma_op_cleanup(vm, op);
 
@@ -3056,7 +3066,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 		drm_gpuva_for_each_op(__op, __ops) {
 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
-			xe_vma_op_unwind(vm, op, false);
+			xe_vma_op_unwind(vm, op, false, false, false);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 40ce8953bacbb..dfbc53e56a869 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -370,11 +370,15 @@ struct xe_vma_op_prefetch {
 /** enum xe_vma_op_flags - flags for VMA operation */
 enum xe_vma_op_flags {
 	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
-	XE_VMA_OP_FIRST		= BIT(0),
+	XE_VMA_OP_FIRST			= BIT(0),
 	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
-	XE_VMA_OP_LAST		= BIT(1),
+	XE_VMA_OP_LAST			= BIT(1),
 	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
-	XE_VMA_OP_COMMITTED	= BIT(2),
+	XE_VMA_OP_COMMITTED		= BIT(2),
+	/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */
+	XE_VMA_OP_PREV_COMMITTED	= BIT(3),
+	/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */
+	XE_VMA_OP_NEXT_COMMITTED	= BIT(4),
 };
 
 /** struct xe_vma_op - VMA operation */
-- 
GitLab


From bbd52b6153731908e52f68d7c797bef7c42af4f7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 31 Aug 2023 07:58:44 -0700
Subject: [PATCH 1861/2340] drm/gpuva: Add drm_gpuva_for_each_op_reverse

Add a helper to walk op list in reverse. Xe will make use of this when
unwinding GPUVA operations.

v2: (Rodrigo) reword commit message

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/drm_gpuvm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 6258849382e16..48311e6d664c8 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -1007,6 +1007,16 @@ struct drm_gpuva_ops {
 #define drm_gpuva_for_each_op_from_reverse(op, ops) \
 	list_for_each_entry_from_reverse(op, &(ops)->list, entry)
 
+/**
+ * drm_gpuva_for_each_op_reverse - iterator to walk over &drm_gpuva_ops in reverse
+ * @op: &drm_gpuva_op to assign in each iteration step
+ * @ops: &drm_gpuva_ops to walk
+ *
+ * This iterator walks over all ops within a given list of operations in reverse
+ */
+#define drm_gpuva_for_each_op_reverse(op, ops) \
+	list_for_each_entry_reverse(op, &(ops)->list, entry)
+
 /**
  * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops
  * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from
-- 
GitLab


From 617eebb9c4807be77ca6f02eee7469e5e111861d Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 16 Aug 2023 20:15:38 -0700
Subject: [PATCH 1862/2340] drm/xe: Fix array of binds

If multiple bind ops in an array of binds touch the same address range
invalid GPUVA operations are generated as each GPUVA operation is
generated based on the orignal GPUVA state. To fix this, after each
GPUVA operations is generated, commit the GPUVA operation updating the
GPUVA state so subsequent bind ops can see a current GPUVA state.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 418 +++++++++++++++++++------------------
 1 file changed, 212 insertions(+), 206 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 71f61806df77b..24ee74f62385c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2430,24 +2430,73 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
 	return SZ_4K;
 }
 
-/*
- * Parse operations list and create any resources needed for the operations
- * prior to fully committing to the operations. This setup can fail.
- */
+static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err |= xe_vm_insert_vma(vm, op->map.vma);
+		if (!err)
+			op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
+				 true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+
+		if (op->remap.prev) {
+			err |= xe_vm_insert_vma(vm, op->remap.prev);
+			if (!err)
+				op->flags |= XE_VMA_OP_PREV_COMMITTED;
+			if (!err && op->remap.skip_prev)
+				op->remap.prev = NULL;
+		}
+		if (op->remap.next) {
+			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err)
+				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
+			if (!err && op->remap.skip_next)
+				op->remap.next = NULL;
+		}
+
+		/* Adjust for partial unbind after removin VMA from VM */
+		if (!err) {
+			op->base.remap.unmap->va->va.addr = op->remap.start;
+			op->base.remap.unmap->va->va.range = op->remap.range;
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	default:
+		XE_WARN_ON("NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+
 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
-				   struct drm_gpuva_ops **ops, int num_ops_list,
+				   struct drm_gpuva_ops *ops,
 				   struct xe_sync_entry *syncs, u32 num_syncs,
-				   struct list_head *ops_list, bool async)
+				   struct list_head *ops_list, bool last,
+				   bool async)
 {
 	struct xe_vma_op *last_op = NULL;
-	struct list_head *async_list = NULL;
 	struct async_op_fence *fence = NULL;
-	int err, i;
+	struct drm_gpuva_op *__op;
+	int err = 0;
 
 	lockdep_assert_held_write(&vm->lock);
-	XE_WARN_ON(num_ops_list > 1 && !async);
 
-	if (num_syncs && async) {
+	if (last && num_syncs && async) {
 		u64 seqno;
 
 		fence = kmalloc(sizeof(*fence), GFP_KERNEL);
@@ -2466,145 +2515,145 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		}
 	}
 
-	for (i = 0; i < num_ops_list; ++i) {
-		struct drm_gpuva_ops *__ops = ops[i];
-		struct drm_gpuva_op *__op;
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+		bool first = list_empty(ops_list);
 
-		drm_gpuva_for_each_op(__op, __ops) {
-			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
-			bool first = !async_list;
+		XE_WARN_ON(!first && !async);
+
+		INIT_LIST_HEAD(&op->link);
+		list_add_tail(&op->link, ops_list);
 
-			XE_WARN_ON(!first && !async);
+		if (first) {
+			op->flags |= XE_VMA_OP_FIRST;
+			op->num_syncs = num_syncs;
+			op->syncs = syncs;
+		}
 
-			INIT_LIST_HEAD(&op->link);
-			if (first)
-				async_list = ops_list;
-			list_add_tail(&op->link, async_list);
+		op->q = q;
+
+		switch (op->base.op) {
+		case DRM_GPUVA_OP_MAP:
+		{
+			struct xe_vma *vma;
 
-			if (first) {
-				op->flags |= XE_VMA_OP_FIRST;
-				op->num_syncs = num_syncs;
-				op->syncs = syncs;
+			vma = new_vma(vm, &op->base.map,
+				      op->tile_mask, op->map.read_only,
+				      op->map.is_null);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto free_fence;
 			}
 
-			op->q = q;
+			op->map.vma = vma;
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP:
+		{
+			struct xe_vma *old =
+				gpuva_to_vma(op->base.remap.unmap->va);
 
-			switch (op->base.op) {
-			case DRM_GPUVA_OP_MAP:
-			{
-				struct xe_vma *vma;
+			op->remap.start = xe_vma_start(old);
+			op->remap.range = xe_vma_size(old);
 
-				vma = new_vma(vm, &op->base.map,
-					      op->tile_mask, op->map.read_only,
-					      op->map.is_null);
+			if (op->base.remap.prev) {
+				struct xe_vma *vma;
+				bool read_only =
+					op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY;
+				bool is_null =
+					op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE;
+
+				vma = new_vma(vm, op->base.remap.prev,
+					      op->tile_mask, read_only,
+					      is_null);
 				if (IS_ERR(vma)) {
 					err = PTR_ERR(vma);
 					goto free_fence;
 				}
 
-				op->map.vma = vma;
-				break;
+				op->remap.prev = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_prev = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_end(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_prev) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(vma) -
+						xe_vma_start(old);
+					op->remap.start = xe_vma_end(vma);
+				}
 			}
-			case DRM_GPUVA_OP_REMAP:
-			{
-				struct xe_vma *old =
-					gpuva_to_vma(op->base.remap.unmap->va);
-
-				op->remap.start = xe_vma_start(old);
-				op->remap.range = xe_vma_size(old);
-
-				if (op->base.remap.prev) {
-					struct xe_vma *vma;
-					bool read_only =
-						op->base.remap.unmap->va->flags &
-						XE_VMA_READ_ONLY;
-					bool is_null =
-						op->base.remap.unmap->va->flags &
-						DRM_GPUVA_SPARSE;
-
-					vma = new_vma(vm, op->base.remap.prev,
-						      op->tile_mask, read_only,
-						      is_null);
-					if (IS_ERR(vma)) {
-						err = PTR_ERR(vma);
-						goto free_fence;
-					}
-
-					op->remap.prev = vma;
-
-					/*
-					 * Userptr creates a new SG mapping so
-					 * we must also rebind.
-					 */
-					op->remap.skip_prev = !xe_vma_is_userptr(old) &&
-						IS_ALIGNED(xe_vma_end(vma),
-							   xe_vma_max_pte_size(old));
-					if (op->remap.skip_prev) {
-						xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
-						op->remap.range -=
-							xe_vma_end(vma) -
-							xe_vma_start(old);
-						op->remap.start = xe_vma_end(vma);
-					}
+
+			if (op->base.remap.next) {
+				struct xe_vma *vma;
+				bool read_only =
+					op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY;
+
+				bool is_null =
+					op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE;
+
+				vma = new_vma(vm, op->base.remap.next,
+					      op->tile_mask, read_only,
+					      is_null);
+				if (IS_ERR(vma)) {
+					err = PTR_ERR(vma);
+					goto free_fence;
 				}
 
-				if (op->base.remap.next) {
-					struct xe_vma *vma;
-					bool read_only =
-						op->base.remap.unmap->va->flags &
-						XE_VMA_READ_ONLY;
-
-					bool is_null =
-						op->base.remap.unmap->va->flags &
-						DRM_GPUVA_SPARSE;
-
-					vma = new_vma(vm, op->base.remap.next,
-						      op->tile_mask, read_only,
-						      is_null);
-					if (IS_ERR(vma)) {
-						err = PTR_ERR(vma);
-						goto free_fence;
-					}
-
-					op->remap.next = vma;
-
-					/*
-					 * Userptr creates a new SG mapping so
-					 * we must also rebind.
-					 */
-					op->remap.skip_next = !xe_vma_is_userptr(old) &&
-						IS_ALIGNED(xe_vma_start(vma),
-							   xe_vma_max_pte_size(old));
-					if (op->remap.skip_next) {
-						xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
-						op->remap.range -=
-							xe_vma_end(old) -
-							xe_vma_start(vma);
-					}
+				op->remap.next = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_next = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_start(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_next) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(old) -
+						xe_vma_start(vma);
 				}
-				break;
-			}
-			case DRM_GPUVA_OP_UNMAP:
-			case DRM_GPUVA_OP_PREFETCH:
-				/* Nothing to do */
-				break;
-			default:
-				XE_WARN_ON("NOT POSSIBLE");
 			}
-
-			last_op = op;
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP:
+		case DRM_GPUVA_OP_PREFETCH:
+			/* Nothing to do */
+			break;
+		default:
+			XE_WARN_ON("NOT POSSIBLE");
 		}
 
-		last_op->ops = __ops;
+		last_op = op;
+
+		err = xe_vma_op_commit(vm, op);
+		if (err)
+			goto free_fence;
 	}
 
-	if (!last_op)
-		return -ENODATA;
+	/* FIXME: Unhandled corner case */
+	XE_WARN_ON(!last_op && last && !list_empty(ops_list));
 
-	last_op->flags |= XE_VMA_OP_LAST;
-	last_op->num_syncs = num_syncs;
-	last_op->syncs = syncs;
-	last_op->fence = fence;
+	if (!last_op)
+		goto free_fence;
+	last_op->ops = ops;
+	if (last) {
+		last_op->flags |= XE_VMA_OP_LAST;
+		last_op->num_syncs = num_syncs;
+		last_op->syncs = syncs;
+		last_op->fence = fence;
+	}
 
 	return 0;
 
@@ -2613,58 +2662,6 @@ free_fence:
 	return err;
 }
 
-static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
-{
-	int err = 0;
-
-	lockdep_assert_held_write(&vm->lock);
-
-	switch (op->base.op) {
-	case DRM_GPUVA_OP_MAP:
-		err |= xe_vm_insert_vma(vm, op->map.vma);
-		if (!err)
-			op->flags |= XE_VMA_OP_COMMITTED;
-		break;
-	case DRM_GPUVA_OP_REMAP:
-		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
-				 true);
-		op->flags |= XE_VMA_OP_COMMITTED;
-
-		if (op->remap.prev) {
-			err |= xe_vm_insert_vma(vm, op->remap.prev);
-			if (!err)
-				op->flags |= XE_VMA_OP_PREV_COMMITTED;
-			if (!err && op->remap.skip_prev)
-				op->remap.prev = NULL;
-		}
-		if (op->remap.next) {
-			err |= xe_vm_insert_vma(vm, op->remap.next);
-			if (!err)
-				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
-			if (!err && op->remap.skip_next)
-				op->remap.next = NULL;
-		}
-
-		/* Adjust for partial unbind after removin VMA from VM */
-		if (!err) {
-			op->base.remap.unmap->va->va.addr = op->remap.start;
-			op->base.remap.unmap->va->va.range = op->remap.range;
-		}
-		break;
-	case DRM_GPUVA_OP_UNMAP:
-		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
-		op->flags |= XE_VMA_OP_COMMITTED;
-		break;
-	case DRM_GPUVA_OP_PREFETCH:
-		op->flags |= XE_VMA_OP_COMMITTED;
-		break;
-	default:
-		XE_WARN_ON("NOT POSSIBLE");
-	}
-
-	return err;
-}
-
 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			       struct xe_vma_op *op)
 {
@@ -2882,11 +2879,13 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 	{
 		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
 
-		down_read(&vm->userptr.notifier_lock);
-		vma->gpuva.flags &= ~XE_VMA_DESTROYED;
-		up_read(&vm->userptr.notifier_lock);
-		if (post_commit)
-			xe_vm_insert_vma(vm, vma);
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
 		break;
 	}
 	case DRM_GPUVA_OP_REMAP:
@@ -2901,11 +2900,13 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 			prep_vma_destroy(vm, op->remap.next, next_post_commit);
 			xe_vma_destroy_unlocked(op->remap.next);
 		}
-		down_read(&vm->userptr.notifier_lock);
-		vma->gpuva.flags &= ~XE_VMA_DESTROYED;
-		up_read(&vm->userptr.notifier_lock);
-		if (post_commit)
-			xe_vm_insert_vma(vm, vma);
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
 		break;
 	}
 	case DRM_GPUVA_OP_PREFETCH:
@@ -2994,20 +2995,16 @@ static void xe_vma_op_work_func(struct work_struct *w)
 	}
 }
 
-static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
-				    struct list_head *ops_list, bool async)
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+				     struct list_head *ops_list, bool async)
 {
 	struct xe_vma_op *op, *last_op, *next;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	list_for_each_entry(op, ops_list, link) {
+	list_for_each_entry(op, ops_list, link)
 		last_op = op;
-		err = xe_vma_op_commit(vm, op);
-		if (err)
-			goto unwind;
-	}
 
 	if (!async) {
 		err = xe_vma_op_execute(vm, last_op);
@@ -3046,28 +3043,29 @@ unwind:
 	return err;
 }
 
-/*
- * Unwind operations list, called after a failure of vm_bind_ioctl_ops_create or
- * vm_bind_ioctl_ops_parse.
- */
 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 				     struct drm_gpuva_ops **ops,
 				     int num_ops_list)
 {
 	int i;
 
-	for (i = 0; i < num_ops_list; ++i) {
+	for (i = num_ops_list - 1; i; ++i) {
 		struct drm_gpuva_ops *__ops = ops[i];
 		struct drm_gpuva_op *__op;
 
 		if (!__ops)
 			continue;
 
-		drm_gpuva_for_each_op(__op, __ops) {
+		drm_gpuva_for_each_op_reverse(__op, __ops) {
 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
-			xe_vma_op_unwind(vm, op, false, false, false);
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
 		}
+
+		drm_gpuva_ops_free(&vm->gpuvm, __ops);
 	}
 }
 
@@ -3388,14 +3386,22 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			ops[i] = NULL;
 			goto unwind_ops;
 		}
+
+		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
+					      &ops_list,
+					      i == args->num_binds - 1,
+					      async);
+		if (err)
+			goto unwind_ops;
 	}
 
-	err = vm_bind_ioctl_ops_parse(vm, q, ops, args->num_binds,
-				      syncs, num_syncs, &ops_list, async);
-	if (err)
+	/* Nothing to do */
+	if (list_empty(&ops_list)) {
+		err = -ENODATA;
 		goto unwind_ops;
+	}
 
-	err = vm_bind_ioctl_ops_commit(vm, &ops_list, async);
+	err = vm_bind_ioctl_ops_execute(vm, &ops_list, async);
 	up_write(&vm->lock);
 
 	for (i = 0; i < args->num_binds; ++i)
-- 
GitLab


From 621fd7dc38b7c18d4946a05051f674fcab82d4dd Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 23 Aug 2023 09:10:20 +0000
Subject: [PATCH 1863/2340] drm/xe/pm: Use PM functions only if CONFIG_PM_SLEEP
 is enabled

This fixes the build without CONFIG_PM_SLEEP such as for riscv.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 48 +++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 08c1edc74d962..b72d9f568768b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -30,28 +30,6 @@ enum toggle_d3cold {
 	D3COLD_ENABLE,
 };
 
-static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
-{
-	struct xe_device *xe = pdev_to_xe_device(pdev);
-	struct pci_dev *root_pdev;
-
-	if (!xe->d3cold.capable)
-		return;
-
-	root_pdev = pcie_find_root_port(pdev);
-	if (!root_pdev)
-		return;
-
-	switch (toggle) {
-	case D3COLD_DISABLE:
-		pci_d3cold_disable(root_pdev);
-		break;
-	case D3COLD_ENABLE:
-		pci_d3cold_enable(root_pdev);
-		break;
-	}
-}
-
 struct xe_subplatform_desc {
 	enum xe_subplatform subplatform;
 	const char *name;
@@ -741,6 +719,28 @@ static void xe_pci_shutdown(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct pci_dev *root_pdev;
+
+	if (!xe->d3cold.capable)
+		return;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return;
+
+	switch (toggle) {
+	case D3COLD_DISABLE:
+		pci_d3cold_disable(root_pdev);
+		break;
+	case D3COLD_ENABLE:
+		pci_d3cold_enable(root_pdev);
+		break;
+	}
+}
+
 static int xe_pci_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -851,12 +851,12 @@ static int xe_pci_runtime_idle(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops xe_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume)
 	SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle)
 };
+#endif
 
 static struct pci_driver xe_pci_driver = {
 	.name = DRIVER_NAME,
@@ -864,7 +864,9 @@ static struct pci_driver xe_pci_driver = {
 	.probe = xe_pci_probe,
 	.remove = xe_pci_remove,
 	.shutdown = xe_pci_shutdown,
+#ifdef CONFIG_PM_SLEEP
 	.driver.pm = &xe_pm_ops,
+#endif
 };
 
 int xe_register_pci_driver(void)
-- 
GitLab


From fba153b0d0b769bb2379c9e78968036d17bdfb6b Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 1 Sep 2023 15:28:25 +0100
Subject: [PATCH 1864/2340] drm/xe/selftests: consider multi-GT for eviction
 test

We need to sanitize and reset each GT, since xe_bo_evict_all() will
evict everything regardless of GT, which can leave other GTs in a broken
state.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index b32a9068d76c8..0e4ec22c5667e 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -181,7 +181,8 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 		XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt));
 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
 	struct ww_acquire_ctx ww;
-	int err, i;
+	struct xe_gt *__gt;
+	int err, i, id;
 
 	kunit_info(test, "Testing device %s gt id %u vram id %u\n",
 		   dev_name(xe->drm.dev), gt->info.id, gt_to_tile(gt)->id);
@@ -218,7 +219,8 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 			goto cleanup_all;
 		}
 
-		xe_gt_sanitize(gt);
+		for_each_gt(__gt, xe, id)
+			xe_gt_sanitize(__gt);
 		err = xe_bo_restore_kernel(xe);
 		/*
 		 * Snapshotting the CTB and copying back a potentially old
@@ -231,8 +233,10 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
 		 * however seems quite fragile not to also restart the GT. Try
 		 * to do that here by triggering a GT reset.
 		 */
-		xe_gt_reset_async(gt);
-		flush_work(&gt->reset.worker);
+		for_each_gt(__gt, xe, id) {
+			xe_gt_reset_async(__gt);
+			flush_work(&__gt->reset.worker);
+		}
 		if (err) {
 			KUNIT_FAIL(test, "restore kernel err=%pe\n",
 				   ERR_PTR(err));
-- 
GitLab


From e6a373dc3d1267f828a3e6523fe2e46c6824d3e4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 1 Sep 2023 15:28:26 +0100
Subject: [PATCH 1865/2340] drm/xe/selftests: make eviction test tile centric

The concern here is that we may have platforms with dedicated media GT,
and we anyway allocate the object on the tile, which just means running
the same test twice (i.e primary vs media GT).

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 0e4ec22c5667e..c448b00a569cf 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -174,18 +174,18 @@ void xe_ccs_migrate_kunit(struct kunit *test)
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
 
-static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test)
+static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
 {
 	struct xe_bo *bo, *external;
 	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
-		XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt));
+		XE_BO_CREATE_VRAM_IF_DGFX(tile);
 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
 	struct ww_acquire_ctx ww;
 	struct xe_gt *__gt;
 	int err, i, id;
 
-	kunit_info(test, "Testing device %s gt id %u vram id %u\n",
-		   dev_name(xe->drm.dev), gt->info.id, gt_to_tile(gt)->id);
+	kunit_info(test, "Testing device %s vram id %u\n",
+		   dev_name(xe->drm.dev), tile->id);
 
 	for (i = 0; i < 2; ++i) {
 		xe_vm_lock(vm, &ww, 0, false);
@@ -316,7 +316,7 @@ cleanup_bo:
 static int evict_test_run_device(struct xe_device *xe)
 {
 	struct kunit *test = xe_cur_kunit();
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	int id;
 
 	if (!IS_DGFX(xe)) {
@@ -327,8 +327,8 @@ static int evict_test_run_device(struct xe_device *xe)
 
 	xe_device_mem_access_get(xe);
 
-	for_each_gt(gt, xe, id)
-		evict_test_run_gt(xe, gt, test);
+	for_each_tile(tile, xe, id)
+		evict_test_run_tile(xe, tile, test);
 
 	xe_device_mem_access_put(xe);
 
-- 
GitLab


From 8bc454baf4036f4684bf30951dc3f6d96eb93f5f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 6 Sep 2023 12:30:09 -0700
Subject: [PATCH 1866/2340] drm/xe/pat: Use 0 instead of space on error

Use 0 in format string instead of space so it shows as

	[drm] *ERROR* Missing PAT table for platform with graphics version 20.04!

instead of

	[drm] *ERROR* Missing PAT table for platform with graphics version 20. 4!

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20230906193009.1912129-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index b56a65779d26b..71e0e047fff3b 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -107,7 +107,7 @@ void xe_pat_init(struct xe_gt *gt)
 		 * raise an error rather than trying to silently inherit the
 		 * most recent platform's behavior.
 		 */
-		drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%2d!\n",
+		drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n",
 			GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
 	}
 }
-- 
GitLab


From a2112949e5f96c1b95aedfb9e2f0401e6c4f864f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Sep 2023 18:20:50 -0700
Subject: [PATCH 1867/2340] drm/xe/reg_sr: Simplify check for masked registers

For all RTP actions, clr_bits is a superset of the bits being modified.
That's also why the check for "changing all bits" can be done with
`clr_bits + 1`. So always use clr_bits for setting the upper bits of a
masked register.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://lore.kernel.org/r/20230906012053.1733755-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c    | 8 ++++----
 drivers/gpu/drm/xe/xe_rtp_types.h | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 7c88352636d23..2645200158612 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -153,15 +153,15 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 	u32 val;
 
 	/*
-	 * If this is a masked register, need to figure what goes on the upper
-	 * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or
-	 * the set_bits, when using SET.
+	 * If this is a masked register, need to set the upper 16 bits.
+	 * Set them to clr_bits since that is always a superset of the bits
+	 * being modified.
 	 *
 	 * When it's not masked, we have to read it from hardware, unless we are
 	 * supposed to set all bits.
 	 */
 	if (reg.masked)
-		val = (entry->clr_bits ?: entry->set_bits) << 16;
+		val = entry->clr_bits << 16;
 	else if (entry->clr_bits + 1)
 		val = (reg.mcr ?
 		       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index d170532a98a59..637acc7626a4d 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -22,7 +22,10 @@ struct xe_gt;
 struct xe_rtp_action {
 	/** @reg: Register */
 	struct xe_reg		reg;
-	/** @clr_bits: bits to clear when updating register */
+	/**
+	 * @clr_bits: bits to clear when updating register. It's always a
+	 * superset of bits being modified
+	 */
 	u32			clr_bits;
 	/** @set_bits: bits to set when updating register */
 	u32			set_bits;
-- 
GitLab


From 46c63b6485b9029aae0a79a82c8c3e03548abc1b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Sep 2023 18:20:51 -0700
Subject: [PATCH 1868/2340] drm/xe/reg_sr: Use xe_gt_dbg

Use xe_gt_dbg() instead of drm_dbg() so the GT is added to the log for
easy identification.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230906012053.1733755-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 2645200158612..87adefb56024e 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -147,7 +147,6 @@ static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
 
 static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_reg reg = entry->reg;
 	struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
 	u32 val;
@@ -176,7 +175,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 	 */
 	val |= entry->set_bits;
 
-	drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.addr, val);
+	xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
 
 	if (entry->reg.mcr)
 		xe_gt_mcr_multicast_write(gt, reg_mcr, val);
@@ -186,7 +185,6 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
 
 void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_reg_sr_entry *entry;
 	unsigned long reg;
 	int err;
@@ -194,7 +192,7 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
 	if (xa_empty(&sr->xa))
 		return;
 
-	drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name);
+	xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
 
 	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
 	if (err)
@@ -209,7 +207,7 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
 	return;
 
 err_force_wake:
-	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+	xe_gt_err(gt, "Failed to apply, err=%d\n", err);
 }
 
 void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
-- 
GitLab


From 12a66a47018aa2fbe60ea34a4de85a43c0799fb5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Sep 2023 18:20:52 -0700
Subject: [PATCH 1869/2340] drm/xe: Add dbg messages for LRC WAs

Just like the GT and engine workarounds, add debug message with the
final value being written to the register for easy debugging.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230906012053.1733755-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3d6a7c11bac19..9e226b8a005af 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -133,10 +133,14 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 		++count;
 
 	if (count) {
+		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
+
 		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
 		xa_for_each(&sr->xa, reg, entry) {
 			bb->cs[bb->len++] = reg;
 			bb->cs[bb->len++] = entry->set_bits;
+			xe_gt_dbg(gt, "REG[0x%lx] = 0x%08x", reg,
+				  entry->set_bits);
 		}
 	}
 
-- 
GitLab


From 278c35822d61ae53d3a1d162b29adda671b11e3b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Sep 2023 18:20:53 -0700
Subject: [PATCH 1870/2340] drm/xe: Fix LRC workarounds

Fix 2 issues when writing LRC workarounds by copying the same handling
done when processing other RTP entries:

For masked registers, it was not correctly setting the upper 16bits.
Differently than i915, the entry itself doesn't set the upper bits
for masked registers: this is done when applying them. Testing on ADL-P:

Before:
	[drm:xe_gt_record_default_lrcs [xe]] LRC WA rcs0 save-restore MMIOs
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x2580] = 0x00000002
	...
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x7018] = 0x00002000
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x7300] = 0x00000040
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x7304] = 0x00000200

After:
	[drm:xe_gt_record_default_lrcs [xe]] LRC WA rcs0 save-restore MMIOs
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x2580] = 0x00060002
	...
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x7018] = 0x20002000
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x7300] = 0x00400040
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x7304] = 0x02000200

All of these registers are masked registers, so writing to them without
the relevant bits in the upper 16b doesn't have any effect.

Also, this adds support to regular registers; previously it was assumed
that LRC entries would only contain masked registers. However this is
not true. 0x6604 is not a masked register, but used in workarounds for
e.g.  ADL-P. See commit 28cf243a341a ("drm/i915/gt: Fix context
workarounds with non-masked regs"). In the same test with ADL-P as
above:

Before:
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x6604] = 0xe0000000
After:
	[drm:xe_gt_record_default_lrcs [xe]] REG[0x6604] = 0xe0efef6f

As can be seen, now it will read what was in the register rather than
completely overwrite the other bits.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230906012053.1733755-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 41 +++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 9e226b8a005af..678a276a25dc4 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -114,11 +114,20 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
 	return 0;
 }
 
+/*
+ * Convert back from encoded value to type-safe, only to be used when reg.mcr
+ * is true
+ */
+static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
+{
+	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
+}
+
 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 {
 	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
 	struct xe_reg_sr_entry *entry;
-	unsigned long reg;
+	unsigned long idx;
 	struct xe_sched_job *job;
 	struct xe_bb *bb;
 	struct dma_fence *fence;
@@ -129,18 +138,36 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
-	xa_for_each(&sr->xa, reg, entry)
+	xa_for_each(&sr->xa, idx, entry)
 		++count;
 
 	if (count) {
 		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
 
 		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
-		xa_for_each(&sr->xa, reg, entry) {
-			bb->cs[bb->len++] = reg;
-			bb->cs[bb->len++] = entry->set_bits;
-			xe_gt_dbg(gt, "REG[0x%lx] = 0x%08x", reg,
-				  entry->set_bits);
+
+		xa_for_each(&sr->xa, idx, entry) {
+			struct xe_reg reg = entry->reg;
+			struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
+			u32 val;
+
+			/*
+			 * Skip reading the register if it's not really needed
+			 */
+			if (reg.masked)
+				val = entry->clr_bits << 16;
+			else if (entry->clr_bits + 1)
+				val = (reg.mcr ?
+				       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
+				       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+			else
+				val = 0;
+
+			val |= entry->set_bits;
+
+			bb->cs[bb->len++] = reg.addr;
+			bb->cs[bb->len++] = val;
+			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
 		}
 	}
 
-- 
GitLab


From 2793fac1dbe068da5965acd9a78a181b33ad469b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 30 Aug 2023 17:47:14 -0400
Subject: [PATCH 1871/2340] drm/xe/uapi: Typo lingo and other small backwards
 compatible fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix typos, lingo and other small things identified during uapi
review.

v2: Also fix ALIGNMENT typo at xe_query.c
v3: Do not touch property to get/set. (Francois)

Link: https://lore.kernel.org/all/863bebd0c624d6fc2b38c0a06b63e468b4185128.camel@linux.intel.com/
Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c |  2 +-
 include/uapi/drm/xe_drm.h     | 19 ++++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 1db77a7c90398..c3d396904c7b6 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -195,7 +195,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
 		config->info[XE_QUERY_CONFIG_FLAGS] =
 			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
-	config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] =
+	config->info[XE_QUERY_CONFIG_MIN_ALIGNMENT] =
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
 	config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
 	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 86f16d50e9ccc..902b5c4f3f5c6 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -256,7 +256,7 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
 #define XE_QUERY_CONFIG_FLAGS			1
 	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
-#define XE_QUERY_CONFIG_MIN_ALIGNEMENT		2
+#define XE_QUERY_CONFIG_MIN_ALIGNMENT		2
 #define XE_QUERY_CONFIG_VA_BITS			3
 #define XE_QUERY_CONFIG_GT_COUNT		4
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
@@ -449,7 +449,6 @@ struct drm_xe_gem_create {
 	 * If a VM is specified, this BO must:
 	 *
 	 *  1. Only ever be bound to that VM.
-	 *
 	 *  2. Cannot be exported as a PRIME fd.
 	 */
 	__u32 vm_id;
@@ -489,7 +488,7 @@ struct drm_xe_gem_mmap_offset {
  * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture
  */
 struct drm_xe_vm_bind_op_error_capture {
-	/** @error: errno that occured */
+	/** @error: errno that occurred */
 	__s32 error;
 
 	/** @op: operation that encounter an error */
@@ -609,7 +608,7 @@ struct drm_xe_vm_bind_op {
 	 * caused the error will be captured in drm_xe_vm_bind_op_error_capture.
 	 * Once the user sees the error (via a ufence +
 	 * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory
-	 * via non-async unbinds, and then restart all queue'd async binds op via
+	 * via non-async unbinds, and then restart all queued async binds op via
 	 * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
 	 * VM.
 	 *
@@ -620,7 +619,7 @@ struct drm_xe_vm_bind_op {
 #define XE_VM_BIND_FLAG_ASYNC		(0x1 << 17)
 	/*
 	 * Valid on a faulting VM only, do the MAP operation immediately rather
-	 * than differing the MAP to the page fault handler.
+	 * than deferring the MAP to the page fault handler.
 	 */
 #define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 18)
 	/*
@@ -907,7 +906,7 @@ struct drm_xe_mmio {
 /**
  * struct drm_xe_wait_user_fence - wait user fence
  *
- * Wait on user fence, XE will wakeup on every HW engine interrupt in the
+ * Wait on user fence, XE will wake-up on every HW engine interrupt in the
  * instances list and check if user fence is complete::
  *
  *	(*addr & MASK) OP (VALUE & MASK)
@@ -1039,9 +1038,11 @@ struct drm_xe_vm_madvise {
 	 */
 #define DRM_XE_VM_MADVISE_PRIORITY		5
 #define		DRM_XE_VMA_PRIORITY_LOW		0
-#define		DRM_XE_VMA_PRIORITY_NORMAL	1	/* Default */
-#define		DRM_XE_VMA_PRIORITY_HIGH	2	/* Must be elevated user */
-	/* Pin the VMA in memory, must be elevated user */
+		/* Default */
+#define		DRM_XE_VMA_PRIORITY_NORMAL	1
+		/* Must be user with elevated privileges */
+#define		DRM_XE_VMA_PRIORITY_HIGH	2
+	/* Pin the VMA in memory, must be user with elevated privileges */
 #define DRM_XE_VM_MADVISE_PIN			6
 	/** @property: property to set */
 	__u32 property;
-- 
GitLab


From 9e6fe003d8c7e35bcd93f0a962b8fdc8889db35b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 30 Aug 2023 17:47:15 -0400
Subject: [PATCH 1872/2340] drm/xe/uapi: Remove useless max_page_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The min_page_size is useful information to ensure alignment and it is
an API actually in use. However max_page_size doesn't bring any useful
information to the userspace hence being not used at all.

So, let's remove and only bring it back if that ever gets used.

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 3 ---
 include/uapi/drm/xe_drm.h     | 4 ----
 2 files changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index c3d396904c7b6..a951205100fea 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -127,7 +127,6 @@ static int query_memory_usage(struct xe_device *xe,
 	usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM;
 	usage->regions[0].instance = 0;
 	usage->regions[0].min_page_size = PAGE_SIZE;
-	usage->regions[0].max_page_size = PAGE_SIZE;
 	usage->regions[0].total_size = man->size << PAGE_SHIFT;
 	if (perfmon_capable())
 		usage->regions[0].used = ttm_resource_manager_usage(man);
@@ -143,8 +142,6 @@ static int query_memory_usage(struct xe_device *xe,
 			usage->regions[usage->num_regions].min_page_size =
 				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
 				SZ_64K : PAGE_SIZE;
-			usage->regions[usage->num_regions].max_page_size =
-				SZ_1G;
 			usage->regions[usage->num_regions].total_size =
 				man->size;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 902b5c4f3f5c6..00d5cb4ef85e7 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -174,10 +174,6 @@ struct drm_xe_query_mem_region {
 	 * kernel.
 	 */
 	__u32 min_page_size;
-	/**
-	 * @max_page_size: Max page-size in bytes for this region.
-	 */
-	__u32 max_page_size;
 	/**
 	 * @total_size: The usable size in bytes for this region.
 	 */
-- 
GitLab


From 9fa81f914a1ce8ee7a5a0ce6f275a636a15bb109 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 8 Sep 2023 15:52:27 -0700
Subject: [PATCH 1873/2340] drm/xe/mmio: Account for GSI offset when checking
 ranges

Change xe_mmio_in_range() to use the same logic to account for the GT's
adj_offset as the read and write functions. This is needed when checking
ranges for the MCR registers if the GT has an offset to adjust.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230908225227.1276610-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_mcr.c | 4 ++--
 drivers/gpu/drm/xe/xe_mmio.h   | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index e74d3c5743c85..77925b35cf8dc 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -448,7 +448,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 			continue;
 
 		for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
-			if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg)) {
+			if (xe_mmio_in_range(gt, &gt->steering[type].ranges[i], reg)) {
 				*group = gt->steering[type].group_target;
 				*instance = gt->steering[type].instance_target;
 				return true;
@@ -459,7 +459,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 	implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
 	if (implicit_ranges)
 		for (int i = 0; implicit_ranges[i].end > 0; i++)
-			if (xe_mmio_in_range(&implicit_ranges[i], reg))
+			if (xe_mmio_in_range(gt, &implicit_ranges[i], reg))
 				return false;
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index cd9fe08ccf4ad..9e0fd4a6fb29c 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -127,9 +127,13 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask,
 int xe_mmio_ioctl(struct drm_device *dev, void *data,
 		  struct drm_file *file);
 
-static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
+static inline bool xe_mmio_in_range(const struct xe_gt *gt,
+				    const struct xe_mmio_range *range,
 				    struct xe_reg reg)
 {
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
 	return range && reg.addr >= range->start && reg.addr <= range->end;
 }
 
-- 
GitLab


From 08a4f00e62bc96eabf7d876933f84600a3dc5e69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 8 Sep 2023 11:17:11 +0200
Subject: [PATCH 1874/2340] drm/xe/bo: Simplify xe_bo_lock()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xe_bo_lock() was, although it only grabbed a single lock, unnecessarily
using ttm_eu_reserve_buffers(). Simplify and document the interface.

v2:
- Update also the xe_display subsystem.
v4:
- Reinstate a lost dma_resv_reserve_fences().
- Improve on xe_bo_lock() documentation (Matthew Brost)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c     | 24 ++++++++--------
 drivers/gpu/drm/xe/xe_bo.c           | 43 +++++++++++++++++-----------
 drivers/gpu/drm/xe/xe_bo.h           |  5 ++--
 drivers/gpu/drm/xe/xe_bo_evict.c     | 19 ++++++------
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 41 ++++++++++----------------
 drivers/gpu/drm/xe/xe_vm.c           | 27 ++++++++---------
 drivers/gpu/drm/xe/xe_vm_madvise.c   | 30 ++++++++-----------
 7 files changed, 89 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index c448b00a569cf..97788432a122e 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -204,9 +204,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 			goto cleanup_bo;
 		}
 
-		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_lock(external, false);
 		err = xe_bo_pin_external(external);
-		xe_bo_unlock(external, &ww);
+		xe_bo_unlock(external);
 		if (err) {
 			KUNIT_FAIL(test, "external bo pin err=%pe\n",
 				   ERR_PTR(err));
@@ -272,9 +272,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 					   ERR_PTR(err));
 				goto cleanup_all;
 			}
-			xe_bo_lock(external, &ww, 0, false);
+			xe_bo_lock(external, false);
 			err = xe_bo_validate(external, NULL, false);
-			xe_bo_unlock(external, &ww);
+			xe_bo_unlock(external);
 			if (err) {
 				KUNIT_FAIL(test, "external bo valid err=%pe\n",
 					   ERR_PTR(err));
@@ -282,28 +282,28 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 			}
 		}
 
-		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_lock(external, false);
 		xe_bo_unpin_external(external);
-		xe_bo_unlock(external, &ww);
+		xe_bo_unlock(external);
 
 		xe_bo_put(external);
 
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		__xe_bo_unset_bulk_move(bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		xe_bo_put(bo);
 		continue;
 
 cleanup_all:
-		xe_bo_lock(external, &ww, 0, false);
+		xe_bo_lock(external, false);
 		xe_bo_unpin_external(external);
-		xe_bo_unlock(external, &ww);
+		xe_bo_unlock(external);
 cleanup_external:
 		xe_bo_put(external);
 cleanup_bo:
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		__xe_bo_unset_bulk_move(bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		xe_bo_put(bo);
 		break;
 	}
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3cfd3f37c81e3..ee8e3c940cf4e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1082,13 +1082,11 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
 	struct xe_bo *bo = gem_to_xe_bo(obj);
 
 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
-		struct ww_acquire_ctx ww;
-
 		XE_WARN_ON(!xe_bo_is_user(bo));
 
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 }
 
@@ -1873,26 +1871,37 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
-	       int num_resv, bool intr)
+/**
+ * xe_bo_lock() - Lock the buffer object's dma_resv object
+ * @bo: The struct xe_bo whose lock is to be taken
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Locks the buffer object's dma_resv object. If the buffer object is
+ * pointing to a shared dma_resv object, that shared lock is locked.
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is set to false, the
+ * function always returns 0.
+ */
+int xe_bo_lock(struct xe_bo *bo, bool intr)
 {
-	struct ttm_validate_buffer tv_bo;
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
+	if (intr)
+		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
 
-	XE_WARN_ON(!ww);
+	dma_resv_lock(bo->ttm.base.resv, NULL);
 
-	tv_bo.num_shared = num_resv;
-	tv_bo.bo = &bo->ttm;
-	list_add_tail(&tv_bo.head, &objs);
-
-	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+	return 0;
 }
 
-void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww)
+/**
+ * xe_bo_unlock() - Unlock the buffer object's dma_resv object
+ * @bo: The struct xe_bo whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_bo_lock().
+ */
+void xe_bo_unlock(struct xe_bo *bo)
 {
 	dma_resv_unlock(bo->ttm.base.resv);
-	ww_acquire_fini(ww);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 76b8c836deb74..c06dafcc93ec1 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -158,10 +158,9 @@ static inline void xe_bo_assert_held(struct xe_bo *bo)
 		dma_resv_assert_held((bo)->ttm.base.resv);
 }
 
-int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
-	       int num_resv, bool intr);
+int xe_bo_lock(struct xe_bo *bo, bool intr);
 
-void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
+void xe_bo_unlock(struct xe_bo *bo);
 
 static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
 {
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 0d5c3a208ab4e..49c05ddea1645 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -27,7 +27,6 @@
 int xe_bo_evict_all(struct xe_device *xe)
 {
 	struct ttm_device *bdev = &xe->ttm;
-	struct ww_acquire_ctx ww;
 	struct xe_bo *bo;
 	struct xe_tile *tile;
 	struct list_head still_in_list;
@@ -62,9 +61,9 @@ int xe_bo_evict_all(struct xe_device *xe)
 		list_move_tail(&bo->pinned_link, &still_in_list);
 		spin_unlock(&xe->pinned.lock);
 
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		ret = xe_bo_evict_pinned(bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		xe_bo_put(bo);
 		if (ret) {
 			spin_lock(&xe->pinned.lock);
@@ -96,9 +95,9 @@ int xe_bo_evict_all(struct xe_device *xe)
 		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
 		spin_unlock(&xe->pinned.lock);
 
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		ret = xe_bo_evict_pinned(bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		xe_bo_put(bo);
 		if (ret)
 			return ret;
@@ -123,7 +122,6 @@ int xe_bo_evict_all(struct xe_device *xe)
  */
 int xe_bo_restore_kernel(struct xe_device *xe)
 {
-	struct ww_acquire_ctx ww;
 	struct xe_bo *bo;
 	int ret;
 
@@ -140,9 +138,9 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
 		spin_unlock(&xe->pinned.lock);
 
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		ret = xe_bo_restore_pinned(bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		if (ret) {
 			xe_bo_put(bo);
 			return ret;
@@ -184,7 +182,6 @@ int xe_bo_restore_kernel(struct xe_device *xe)
  */
 int xe_bo_restore_user(struct xe_device *xe)
 {
-	struct ww_acquire_ctx ww;
 	struct xe_bo *bo;
 	struct xe_tile *tile;
 	struct list_head still_in_list;
@@ -206,9 +203,9 @@ int xe_bo_restore_user(struct xe_device *xe)
 		xe_bo_get(bo);
 		spin_unlock(&xe->pinned.lock);
 
-		xe_bo_lock(bo, &ww, 0, false);
+		xe_bo_lock(bo, false);
 		ret = xe_bo_restore_pinned(bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		xe_bo_put(bo);
 		if (ret) {
 			spin_lock(&xe->pinned.lock);
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 21e0e9c7b6340..bdef4b76028b1 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -171,20 +171,18 @@ retry_userptr:
 
 	/* Lock VM and BOs dma-resv */
 	bo = xe_vma_bo(vma);
-	if (only_needs_bo_lock(bo)) {
-		/* This path ensures the BO's LRU is updated */
-		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
-	} else {
+	if (!only_needs_bo_lock(bo)) {
 		tv_vm.num_shared = xe->info.tile_count;
 		tv_vm.bo = xe_vm_ttm_bo(vm);
 		list_add(&tv_vm.head, &objs);
-		if (bo) {
-			tv_bo.bo = &bo->ttm;
-			tv_bo.num_shared = xe->info.tile_count;
-			list_add(&tv_bo.head, &objs);
-		}
-		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
 	}
+	if (bo) {
+		tv_bo.bo = &bo->ttm;
+		tv_bo.num_shared = xe->info.tile_count;
+		list_add(&tv_bo.head, &objs);
+	}
+
+	ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
 	if (ret)
 		goto unlock_vm;
 
@@ -227,10 +225,7 @@ retry_userptr:
 	vma->usm.tile_invalidated &= ~BIT(tile->id);
 
 unlock_dma_resv:
-	if (only_needs_bo_lock(bo))
-		xe_bo_unlock(bo, &ww);
-	else
-		ttm_eu_backoff_reservation(&ww, &objs);
+	ttm_eu_backoff_reservation(&ww, &objs);
 unlock_vm:
 	if (!ret)
 		vm->usm.last_fault_vma = vma;
@@ -534,28 +529,22 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
 
 	/* Lock VM and BOs dma-resv */
 	bo = xe_vma_bo(vma);
-	if (only_needs_bo_lock(bo)) {
-		/* This path ensures the BO's LRU is updated */
-		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
-	} else {
+	if (!only_needs_bo_lock(bo)) {
 		tv_vm.num_shared = xe->info.tile_count;
 		tv_vm.bo = xe_vm_ttm_bo(vm);
 		list_add(&tv_vm.head, &objs);
-		tv_bo.bo = &bo->ttm;
-		tv_bo.num_shared = xe->info.tile_count;
-		list_add(&tv_bo.head, &objs);
-		ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
 	}
+	tv_bo.bo = &bo->ttm;
+	tv_bo.num_shared = xe->info.tile_count;
+	list_add(&tv_bo.head, &objs);
+	ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
 	if (ret)
 		goto unlock_vm;
 
 	/* Migrate to VRAM, move should invalidate the VMA first */
 	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id);
 
-	if (only_needs_bo_lock(bo))
-		xe_bo_unlock(bo, &ww);
-	else
-		ttm_eu_backoff_reservation(&ww, &objs);
+	ttm_eu_backoff_reservation(&ww, &objs);
 unlock_vm:
 	up_read(&vm->lock);
 	xe_vm_put(vm);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 24ee74f62385c..2bd1fa34256a4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -267,13 +267,16 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
 {
 	struct xe_exec_queue *q;
-	struct ww_acquire_ctx ww;
 	int err;
 
-	err = xe_bo_lock(bo, &ww, vm->preempt.num_exec_queues, true);
+	err = xe_bo_lock(bo, true);
 	if (err)
 		return err;
 
+	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
+	if (err)
+		goto out_unlock;
+
 	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
 		if (q->compute.pfence) {
 			dma_resv_add_fence(bo->ttm.base.resv,
@@ -281,8 +284,9 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
 					   DMA_RESV_USAGE_BOOKKEEP);
 		}
 
-	xe_bo_unlock(bo, &ww);
-	return 0;
+out_unlock:
+	xe_bo_unlock(bo);
+	return err;
 }
 
 /**
@@ -1033,12 +1037,11 @@ bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
 static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
 				 struct xe_vma *ignore)
 {
-	struct ww_acquire_ctx ww;
 	bool ret;
 
-	xe_bo_lock(bo, &ww, 0, false);
+	xe_bo_lock(bo, false);
 	ret = !!bo_has_vm_references_locked(bo, vm, ignore);
-	xe_bo_unlock(bo, &ww);
+	xe_bo_unlock(bo);
 
 	return ret;
 }
@@ -2264,7 +2267,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			 u32 operation, u8 tile_mask, u32 region)
 {
 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
-	struct ww_acquire_ctx ww;
 	struct drm_gpuva_ops *ops;
 	struct drm_gpuva_op *__op;
 	struct xe_vma_op *op;
@@ -2323,7 +2325,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 	case XE_VM_BIND_OP_UNMAP_ALL:
 		XE_WARN_ON(!bo);
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return ERR_PTR(err);
 
@@ -2333,7 +2335,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 
 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
 		drm_gpuvm_bo_put(vm_bo);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 		if (IS_ERR(ops))
 			return ops;
 
@@ -2369,13 +2371,12 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 {
 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
 	struct xe_vma *vma;
-	struct ww_acquire_ctx ww;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
 	if (bo) {
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return ERR_PTR(err);
 	}
@@ -2384,7 +2385,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 			    op->va.range - 1, read_only, is_null,
 			    tile_mask);
 	if (bo)
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 
 	if (xe_vma_is_userptr(vma)) {
 		err = xe_vma_userptr_pin_pages(vma);
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 9abcd742c8339..0648274b90b96 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -28,16 +28,15 @@ static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
 
 	for (i = 0; i < num_vmas; ++i) {
 		struct xe_bo *bo;
-		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return err;
 		bo->props.preferred_mem_class = value;
 		xe_bo_placement_for_flags(xe, bo, bo->flags);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 
 	return 0;
@@ -53,16 +52,15 @@ static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
 
 	for (i = 0; i < num_vmas; ++i) {
 		struct xe_bo *bo;
-		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return err;
 		bo->props.preferred_gt = value;
 		xe_bo_placement_for_flags(xe, bo, bo->flags);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 
 	return 0;
@@ -89,17 +87,16 @@ static int madvise_preferred_mem_class_gt(struct xe_device *xe,
 
 	for (i = 0; i < num_vmas; ++i) {
 		struct xe_bo *bo;
-		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return err;
 		bo->props.preferred_mem_class = mem_class;
 		bo->props.preferred_gt = gt_id;
 		xe_bo_placement_for_flags(xe, bo, bo->flags);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 
 	return 0;
@@ -112,13 +109,12 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
 
 	for (i = 0; i < num_vmas; ++i) {
 		struct xe_bo *bo;
-		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
 		if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
 			return -EINVAL;
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return err;
 		bo->props.cpu_atomic = !!value;
@@ -130,7 +126,7 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
 		 */
 		if (bo->props.cpu_atomic)
 			ttm_bo_unmap_virtual(&bo->ttm);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 
 	return 0;
@@ -143,18 +139,17 @@ static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
 
 	for (i = 0; i < num_vmas; ++i) {
 		struct xe_bo *bo;
-		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
 		if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
 				 !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
 			return -EINVAL;
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return err;
 		bo->props.device_atomic = !!value;
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 
 	return 0;
@@ -174,16 +169,15 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
 
 	for (i = 0; i < num_vmas; ++i) {
 		struct xe_bo *bo;
-		struct ww_acquire_ctx ww;
 
 		bo = xe_vma_bo(vmas[i]);
 
-		err = xe_bo_lock(bo, &ww, 0, true);
+		err = xe_bo_lock(bo, true);
 		if (err)
 			return err;
 		bo->ttm.priority = value;
 		ttm_bo_move_to_lru_tail(&bo->ttm);
-		xe_bo_unlock(bo, &ww);
+		xe_bo_unlock(bo);
 	}
 
 	return 0;
-- 
GitLab


From d00e9cc28e1e42108618e7a146969a26679170a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 8 Sep 2023 11:17:12 +0200
Subject: [PATCH 1875/2340] drm/xe/vm: Simplify and document xe_vm_lock()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xe_vm_lock() function was unnecessarily using ttm_eu_reserve_buffers().
Simplify and document the interface.

v4:
- Improve on xe_vm_lock() documentation (Matthew Brost)
v5:
- Rebase conflict.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-3-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c      |  9 +++---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  5 ++-
 drivers/gpu/drm/xe/xe_bo.c            |  5 ++-
 drivers/gpu/drm/xe/xe_exec_queue.c    |  5 ++-
 drivers/gpu/drm/xe/xe_lrc.c           |  6 ++--
 drivers/gpu/drm/xe/xe_migrate.c       | 10 +++---
 drivers/gpu/drm/xe/xe_vm.c            | 46 +++++++++++++--------------
 drivers/gpu/drm/xe/xe_vm.h            |  5 ++-
 8 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 97788432a122e..ad6dd6fae853b 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -180,7 +180,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
 		XE_BO_CREATE_VRAM_IF_DGFX(tile);
 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
-	struct ww_acquire_ctx ww;
 	struct xe_gt *__gt;
 	int err, i, id;
 
@@ -188,10 +187,10 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 		   dev_name(xe->drm.dev), tile->id);
 
 	for (i = 0; i < 2; ++i) {
-		xe_vm_lock(vm, &ww, 0, false);
+		xe_vm_lock(vm, false);
 		bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device,
 				  bo_flags);
-		xe_vm_unlock(vm, &ww);
+		xe_vm_unlock(vm);
 		if (IS_ERR(bo)) {
 			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
 			break;
@@ -263,9 +262,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 
 		if (i) {
 			down_read(&vm->lock);
-			xe_vm_lock(vm, &ww, 0, false);
+			xe_vm_lock(vm, false);
 			err = xe_bo_validate(bo, bo->vm, false);
-			xe_vm_unlock(vm, &ww);
+			xe_vm_unlock(vm);
 			up_read(&vm->lock);
 			if (err) {
 				KUNIT_FAIL(test, "bo valid err=%pe\n",
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 5c8d5e78d9bc4..8bb081086ca2e 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -396,14 +396,13 @@ static int migrate_test_run_device(struct xe_device *xe)
 
 	for_each_tile(tile, xe, id) {
 		struct xe_migrate *m = tile->migrate;
-		struct ww_acquire_ctx ww;
 
 		kunit_info(test, "Testing tile id %d.\n", id);
-		xe_vm_lock(m->q->vm, &ww, 0, true);
+		xe_vm_lock(m->q->vm, true);
 		xe_device_mem_access_get(xe);
 		xe_migrate_sanity_test(m, test);
 		xe_device_mem_access_put(xe);
-		xe_vm_unlock(m->q->vm, &ww);
+		xe_vm_unlock(m->q->vm);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index ee8e3c940cf4e..c33a204200224 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1759,7 +1759,6 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = to_xe_file(file);
 	struct drm_xe_gem_create *args = data;
-	struct ww_acquire_ctx ww;
 	struct xe_vm *vm = NULL;
 	struct xe_bo *bo;
 	unsigned int bo_flags = XE_BO_CREATE_USER_BIT;
@@ -1812,7 +1811,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		vm = xe_vm_lookup(xef, args->vm_id);
 		if (XE_IOCTL_DBG(xe, !vm))
 			return -ENOENT;
-		err = xe_vm_lock(vm, &ww, 0, true);
+		err = xe_vm_lock(vm, true);
 		if (err) {
 			xe_vm_put(vm);
 			return err;
@@ -1840,7 +1839,7 @@ out_put:
 	xe_bo_put(bo);
 out_vm:
 	if (vm) {
-		xe_vm_unlock(vm, &ww);
+		xe_vm_unlock(vm);
 		xe_vm_put(vm);
 	}
 	return err;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index f28bceceb99ae..a0b5647923acf 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -111,18 +111,17 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
 					   u32 logical_mask, u16 width,
 					   struct xe_hw_engine *hwe, u32 flags)
 {
-	struct ww_acquire_ctx ww;
 	struct xe_exec_queue *q;
 	int err;
 
 	if (vm) {
-		err = xe_vm_lock(vm, &ww, 0, true);
+		err = xe_vm_lock(vm, true);
 		if (err)
 			return ERR_PTR(err);
 	}
 	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
 	if (vm)
-		xe_vm_unlock(vm, &ww);
+		xe_vm_unlock(vm);
 
 	return q;
 }
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 2b4219c383593..434fbb364b4bf 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -789,16 +789,14 @@ err_lrc_finish:
 
 void xe_lrc_finish(struct xe_lrc *lrc)
 {
-	struct ww_acquire_ctx ww;
-
 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
 	if (lrc->bo->vm)
-		xe_vm_lock(lrc->bo->vm, &ww, 0, false);
+		xe_vm_lock(lrc->bo->vm, false);
 	else
 		xe_bo_lock_no_vm(lrc->bo, NULL);
 	xe_bo_unpin(lrc->bo);
 	if (lrc->bo->vm)
-		xe_vm_unlock(lrc->bo->vm, &ww);
+		xe_vm_unlock(lrc->bo->vm);
 	else
 		xe_bo_unlock_no_vm(lrc->bo);
 	xe_bo_put(lrc->bo);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 799ad02092795..8291798e1aaa5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -88,13 +88,12 @@ struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile)
 static void xe_migrate_fini(struct drm_device *dev, void *arg)
 {
 	struct xe_migrate *m = arg;
-	struct ww_acquire_ctx ww;
 
-	xe_vm_lock(m->q->vm, &ww, 0, false);
+	xe_vm_lock(m->q->vm, false);
 	xe_bo_unpin(m->pt_bo);
 	if (m->cleared_bo)
 		xe_bo_unpin(m->cleared_bo);
-	xe_vm_unlock(m->q->vm, &ww);
+	xe_vm_unlock(m->q->vm);
 
 	dma_fence_put(m->fence);
 	if (m->cleared_bo)
@@ -338,7 +337,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	struct xe_gt *primary_gt = tile->primary_gt;
 	struct xe_migrate *m;
 	struct xe_vm *vm;
-	struct ww_acquire_ctx ww;
 	int err;
 
 	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
@@ -353,9 +351,9 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	if (IS_ERR(vm))
 		return ERR_CAST(vm);
 
-	xe_vm_lock(vm, &ww, 0, false);
+	xe_vm_lock(vm, false);
 	err = xe_migrate_prepare_vm(tile, m, vm);
-	xe_vm_unlock(vm, &ww);
+	xe_vm_unlock(vm);
 	if (err) {
 		xe_vm_close_and_put(vm);
 		return ERR_PTR(err);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2bd1fa34256a4..0ac421c4e1847 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -523,18 +523,17 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
 
 static void xe_vm_kill(struct xe_vm *vm)
 {
-	struct ww_acquire_ctx ww;
 	struct xe_exec_queue *q;
 
 	lockdep_assert_held(&vm->lock);
 
-	xe_vm_lock(vm, &ww, 0, false);
+	xe_vm_lock(vm, false);
 	vm->flags |= XE_VM_FLAG_BANNED;
 	trace_xe_vm_kill(vm);
 
 	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
 		q->ops->kill(q);
-	xe_vm_unlock(vm, &ww);
+	xe_vm_unlock(vm);
 
 	/* TODO: Inform user the VM is banned */
 }
@@ -1420,7 +1419,6 @@ static void xe_vm_close(struct xe_vm *vm)
 void xe_vm_close_and_put(struct xe_vm *vm)
 {
 	LIST_HEAD(contested);
-	struct ww_acquire_ctx ww;
 	struct xe_device *xe = vm->xe;
 	struct xe_tile *tile;
 	struct xe_vma *vma, *next_vma;
@@ -1443,7 +1441,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	}
 
 	down_write(&vm->lock);
-	xe_vm_lock(vm, &ww, 0, false);
+	xe_vm_lock(vm, false);
 	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
 		vma = gpuva_to_vma(gpuva);
 
@@ -1488,7 +1486,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 			vm->pt_root[id] = NULL;
 		}
 	}
-	xe_vm_unlock(vm, &ww);
+	xe_vm_unlock(vm);
 
 	/*
 	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
@@ -3442,30 +3440,32 @@ free_objs:
 	return err == -ENODATA ? 0 : err;
 }
 
-/*
- * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
- * directly to optimize. Also this likely should be an inline function.
+/**
+ * xe_vm_lock() - Lock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be locked
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is false, the function
+ * always returns 0.
  */
-int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
-	       int num_resv, bool intr)
+int xe_vm_lock(struct xe_vm *vm, bool intr)
 {
-	struct ttm_validate_buffer tv_vm;
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
+	if (intr)
+		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
 
-	XE_WARN_ON(!ww);
-
-	tv_vm.num_shared = num_resv;
-	tv_vm.bo = xe_vm_ttm_bo(vm);
-	list_add_tail(&tv_vm.head, &objs);
-
-	return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+	return dma_resv_lock(xe_vm_resv(vm), NULL);
 }
 
-void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
+/**
+ * xe_vm_unlock() - Unlock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_vm_lock().
+ */
+void xe_vm_unlock(struct xe_vm *vm)
 {
 	dma_resv_unlock(xe_vm_resv(vm));
-	ww_acquire_fini(ww);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 805236578140c..dd20e5c8106f0 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -38,10 +38,9 @@ static inline void xe_vm_put(struct xe_vm *vm)
 	drm_gpuvm_put(&vm->gpuvm);
 }
 
-int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
-	       int num_resv, bool intr);
+int xe_vm_lock(struct xe_vm *vm, bool intr);
 
-void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww);
+void xe_vm_unlock(struct xe_vm *vm);
 
 static inline bool xe_vm_is_closed(struct xe_vm *vm)
 {
-- 
GitLab


From b7ab8c4f028f87b8c79c9f99e12b891fd5430483 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 8 Sep 2023 11:17:13 +0200
Subject: [PATCH 1876/2340] drm/xe/bo: Remove the lock_no_vm()/unlock_no_vm()
 interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apart from asserts, it's essentially the same as
xe_bo_lock()/xe_bo_unlock(), and the usage intentions of this interface
was unclear. Remove it.

v2:
- Update the xe_display subsystem as well.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-4-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c      |  2 +-
 drivers/gpu/drm/xe/tests/xe_dma_buf.c |  4 ++--
 drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
 drivers/gpu/drm/xe/xe_bo.h            | 23 ++---------------------
 drivers/gpu/drm/xe/xe_dma_buf.c       |  5 +++--
 drivers/gpu/drm/xe/xe_lrc.c           | 10 ++--------
 6 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index ad6dd6fae853b..2c04357377abc 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -143,7 +143,7 @@ static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt,
 	ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test);
 
 out_unlock:
-	xe_bo_unlock_no_vm(bo);
+	xe_bo_unlock(bo);
 	xe_bo_put(bo);
 }
 
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 810a035bf7200..1c4d8751be696 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -147,14 +147,14 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 			int err;
 
 			/* Is everything where we expect it to be? */
-			xe_bo_lock_no_vm(import_bo, NULL);
+			xe_bo_lock(import_bo, false);
 			err = xe_bo_validate(import_bo, NULL, false);
 			if (err && err != -EINTR && err != -ERESTARTSYS)
 				KUNIT_FAIL(test,
 					   "xe_bo_validate() failed with err=%d\n", err);
 
 			check_residency(test, bo, import_bo, dmabuf);
-			xe_bo_unlock_no_vm(import_bo);
+			xe_bo_unlock(import_bo);
 		}
 		drm_gem_object_put(import);
 	} else if (PTR_ERR(import) != -EOPNOTSUPP) {
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 8bb081086ca2e..f58cd1da1a341 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -183,7 +183,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 
 	xe_bo_vunmap(sysmem);
 out_unlock:
-	xe_bo_unlock_no_vm(sysmem);
+	xe_bo_unlock(sysmem);
 	xe_bo_put(sysmem);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index c06dafcc93ec1..d22b2ae7db729 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -173,25 +173,6 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
 	}
 }
 
-static inline void xe_bo_lock_no_vm(struct xe_bo *bo,
-				    struct ww_acquire_ctx *ctx)
-{
-	if (bo) {
-		XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
-				      bo->ttm.base.resv != &bo->ttm.base._resv));
-		dma_resv_lock(bo->ttm.base.resv, ctx);
-	}
-}
-
-static inline void xe_bo_unlock_no_vm(struct xe_bo *bo)
-{
-	if (bo) {
-		XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
-				      bo->ttm.base.resv != &bo->ttm.base._resv));
-		dma_resv_unlock(bo->ttm.base.resv);
-	}
-}
-
 int xe_bo_pin_external(struct xe_bo *bo);
 int xe_bo_pin(struct xe_bo *bo);
 void xe_bo_unpin_external(struct xe_bo *bo);
@@ -206,9 +187,9 @@ static inline bool xe_bo_is_pinned(struct xe_bo *bo)
 static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
 {
 	if (likely(bo)) {
-		xe_bo_lock_no_vm(bo, NULL);
+		xe_bo_lock(bo, false);
 		xe_bo_unpin(bo);
-		xe_bo_unlock_no_vm(bo);
+		xe_bo_unlock(bo);
 
 		xe_bo_put(bo);
 	}
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index b9bf4b4dd8a5e..8ce1b582402a6 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -150,9 +150,10 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 	if (!reads)
 		return 0;
 
-	xe_bo_lock_no_vm(bo, NULL);
+	/* Can we do interruptible lock here? */
+	xe_bo_lock(bo, false);
 	(void)xe_bo_migrate(bo, XE_PL_TT);
-	xe_bo_unlock_no_vm(bo);
+	xe_bo_unlock(bo);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 434fbb364b4bf..6f899b6a48774 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -790,15 +790,9 @@ err_lrc_finish:
 void xe_lrc_finish(struct xe_lrc *lrc)
 {
 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
-	if (lrc->bo->vm)
-		xe_vm_lock(lrc->bo->vm, false);
-	else
-		xe_bo_lock_no_vm(lrc->bo, NULL);
+	xe_bo_lock(lrc->bo, false);
 	xe_bo_unpin(lrc->bo);
-	if (lrc->bo->vm)
-		xe_vm_unlock(lrc->bo->vm);
-	else
-		xe_bo_unlock_no_vm(lrc->bo);
+	xe_bo_unlock(lrc->bo);
 	xe_bo_put(lrc->bo);
 }
 
-- 
GitLab


From d490ecf577903ce5a9e6a3bb3bd08b5a550719c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 8 Sep 2023 11:17:14 +0200
Subject: [PATCH 1877/2340] drm/xe: Rework xe_exec and the VM rebind worker to
 use the drm_exec helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the calls to ttm_eu_reserve_buffers() by using the drm_exec
helper instead. Also make sure the locking loop covers any calls to
xe_bo_validate() / ttm_bo_validate() so that these function calls may
easily benefit from being called from within an unsealed locking
transaction and may thus perform blocking dma_resv locks in the future.

For the unlock we remove an assert that the vm->rebind_list is empty
when locks are released. Since if the error path is hit with a partly
locked list, that assert may no longer hold true we chose to remove it.

v3:
- Don't accept duplicate bo locks in the rebind worker.
v5:
- Loop over drm_exec objects in reverse when unlocking.
v6:
- We can't keep the WW ticket when retrying validation on OOM. Fix.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-5-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig   |   2 +
 drivers/gpu/drm/xe/xe_exec.c |  77 +++-------
 drivers/gpu/drm/xe/xe_vm.c   | 271 ++++++++++++++++-------------------
 drivers/gpu/drm/xe/xe_vm.h   |  22 +--
 4 files changed, 153 insertions(+), 219 deletions(-)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 6742ed4feecd0..7bffc039d63f0 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -8,6 +8,7 @@ config DRM_XE
 	select SHMEM
 	select TMPFS
 	select DRM_BUDDY
+	select DRM_EXEC
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_SUBALLOC_HELPER
@@ -21,6 +22,7 @@ config DRM_XE
 	select VMAP_PFN
 	select DRM_TTM
 	select DRM_TTM_HELPER
+	select DRM_EXEC
 	select DRM_GPUVM
 	select DRM_SCHED
 	select MMU_NOTIFIER
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 629d81a789e77..eb7fc3192c222 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -6,6 +6,7 @@
 #include "xe_exec.h"
 
 #include <drm/drm_device.h>
+#include <drm/drm_exec.h>
 #include <drm/drm_file.h>
 #include <drm/xe_drm.h>
 #include <linux/delay.h>
@@ -93,25 +94,16 @@
  *	Unlock all
  */
 
-#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
-
-static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww,
-			 struct ttm_validate_buffer tv_onstack[],
-			 struct ttm_validate_buffer **tv,
-			 struct list_head *objs)
+static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
 {
-	struct xe_vm *vm = q->vm;
 	struct xe_vma *vma;
 	LIST_HEAD(dups);
-	ktime_t end = 0;
 	int err = 0;
 
-	*tv = NULL;
-	if (xe_vm_no_dma_fences(q->vm))
+	if (xe_vm_no_dma_fences(vm))
 		return 0;
 
-retry:
-	err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
+	err = xe_vm_lock_dma_resv(vm, exec, 1, true);
 	if (err)
 		return err;
 
@@ -127,42 +119,13 @@ retry:
 			continue;
 
 		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
-		if (err) {
-			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
-			*tv = NULL;
+		if (err)
 			break;
-		}
-	}
-
-	/*
-	 * With multiple active VMs, under memory pressure, it is possible that
-	 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
-	 * Until ttm properly handles locking in such scenarios, best thing the
-	 * driver can do is retry with a timeout.
-	 */
-	if (err == -ENOMEM) {
-		ktime_t cur = ktime_get();
-
-		end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
-		if (ktime_before(cur, end)) {
-			msleep(20);
-			goto retry;
-		}
 	}
 
 	return err;
 }
 
-static void xe_exec_end(struct xe_exec_queue *q,
-			struct ttm_validate_buffer *tv_onstack,
-			struct ttm_validate_buffer *tv,
-			struct ww_acquire_ctx *ww,
-			struct list_head *objs)
-{
-	if (!xe_vm_no_dma_fences(q->vm))
-		xe_vm_unlock_dma_resv(q->vm, tv_onstack, tv, ww, objs);
-}
-
 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
@@ -173,15 +136,13 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_exec_queue *q;
 	struct xe_sync_entry *syncs = NULL;
 	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
-	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
-	struct ttm_validate_buffer *tv = NULL;
+	struct drm_exec exec;
 	u32 i, num_syncs = 0;
 	struct xe_sched_job *job;
 	struct dma_fence *rebind_fence;
 	struct xe_vm *vm;
-	struct ww_acquire_ctx ww;
-	struct list_head objs;
 	bool write_locked;
+	ktime_t end = 0;
 	int err = 0;
 
 	if (XE_IOCTL_DBG(xe, args->extensions) ||
@@ -294,26 +255,34 @@ retry:
 			goto err_unlock_list;
 	}
 
-	err = xe_exec_begin(q, &ww, tv_onstack, &tv, &objs);
-	if (err)
-		goto err_unlock_list;
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_exec_begin(&exec, vm);
+		drm_exec_retry_on_contention(&exec);
+		if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
+			err = -EAGAIN;
+			goto err_unlock_list;
+		}
+		if (err)
+			goto err_exec;
+	}
 
 	if (xe_vm_is_closed_or_banned(q->vm)) {
 		drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
 		err = -ECANCELED;
-		goto err_exec_queue_end;
+		goto err_exec;
 	}
 
 	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
 		err = -EWOULDBLOCK;
-		goto err_exec_queue_end;
+		goto err_exec;
 	}
 
 	job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
 				  addresses : &args->address);
 	if (IS_ERR(job)) {
 		err = PTR_ERR(job);
-		goto err_exec_queue_end;
+		goto err_exec;
 	}
 
 	/*
@@ -412,8 +381,8 @@ err_repin:
 err_put_job:
 	if (err)
 		xe_sched_job_put(job);
-err_exec_queue_end:
-	xe_exec_end(q, tv_onstack, tv, &ww, &objs);
+err_exec:
+	drm_exec_fini(&exec);
 err_unlock_list:
 	if (write_locked)
 		up_write(&vm->lock);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0ac421c4e1847..80b374b9cdd1e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -7,6 +7,7 @@
 
 #include <linux/dma-fence-array.h>
 
+#include <drm/drm_exec.h>
 #include <drm/drm_print.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include <drm/ttm/ttm_tt.h>
@@ -327,10 +328,7 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
 
 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 {
-	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
-	struct ttm_validate_buffer *tv;
-	struct ww_acquire_ctx ww;
-	struct list_head objs;
+	struct drm_exec exec;
 	struct dma_fence *pfence;
 	int err;
 	bool wait;
@@ -338,10 +336,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
 
 	down_write(&vm->lock);
-
-	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
-	if (err)
-		goto out_unlock_outer;
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_vm_lock_dma_resv(vm, &exec, 1, true);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			goto out_unlock;
+	}
 
 	pfence = xe_preempt_fence_create(q, q->compute.context,
 					 ++q->compute.seqno);
@@ -373,8 +374,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	up_read(&vm->userptr.notifier_lock);
 
 out_unlock:
-	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
-out_unlock_outer:
+	drm_exec_fini(&exec);
 	up_write(&vm->lock);
 
 	return err;
@@ -403,68 +403,35 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
  * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
  * objects of the vm's external buffer objects.
  * @vm: The vm.
- * @ww: Pointer to a struct ww_acquire_ctx locking context.
- * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
- * ttm_validate_buffers used for locking.
- * @tv: Pointer to a pointer that on output contains the actual storage used.
- * @objs: List head for the buffer objects locked.
- * @intr: Whether to lock interruptible.
+ * @exec: Pointer to a struct drm_exec locking context.
  * @num_shared: Number of dma-fence slots to reserve in the locked objects.
+ * @lock_vm: Lock also the vm's dma_resv.
  *
  * Locks the vm dma-resv objects and all the dma-resv objects of the
- * buffer objects on the vm external object list. The TTM utilities require
- * a list of struct ttm_validate_buffers pointing to the actual buffer
- * objects to lock. Storage for those struct ttm_validate_buffers should
- * be provided in @tv_onstack, and is typically reserved on the stack
- * of the caller. If the size of @tv_onstack isn't sufficient, then
- * storage will be allocated internally using kvmalloc().
- *
- * The function performs deadlock handling internally, and after a
- * successful return the ww locking transaction should be considered
- * sealed.
+ * buffer objects on the vm external object list.
  *
  * Return: 0 on success, Negative error code on error. In particular if
- * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
- * of error, any locking performed has been reverted.
+ * @intr is set to true, -EINTR or -ERESTARTSYS may be returned.
  */
-int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
-			struct ttm_validate_buffer *tv_onstack,
-			struct ttm_validate_buffer **tv,
-			struct list_head *objs,
-			bool intr,
-			unsigned int num_shared)
-{
-	struct ttm_validate_buffer *tv_vm, *tv_bo;
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
+			unsigned int num_shared, bool lock_vm)
+{
 	struct xe_vma *vma, *next;
-	LIST_HEAD(dups);
-	int err;
+	int err = 0;
 
 	lockdep_assert_held(&vm->lock);
 
-	if (vm->extobj.entries < XE_ONSTACK_TV) {
-		tv_vm = tv_onstack;
-	} else {
-		tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
-				       GFP_KERNEL);
-		if (!tv_vm)
-			return -ENOMEM;
+	if (lock_vm) {
+		err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base, num_shared);
+		if (err)
+			return err;
 	}
-	tv_bo = tv_vm + 1;
 
-	INIT_LIST_HEAD(objs);
 	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
-		tv_bo->num_shared = num_shared;
-		tv_bo->bo = &xe_vma_bo(vma)->ttm;
-
-		list_add_tail(&tv_bo->head, objs);
-		tv_bo++;
+		err = drm_exec_prepare_obj(exec, &xe_vma_bo(vma)->ttm.base, num_shared);
+		if (err)
+			return err;
 	}
-	tv_vm->num_shared = num_shared;
-	tv_vm->bo = xe_vm_ttm_bo(vm);
-	list_add_tail(&tv_vm->head, objs);
-	err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
-	if (err)
-		goto out_err;
 
 	spin_lock(&vm->notifier.list_lock);
 	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
@@ -478,45 +445,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
 	}
 	spin_unlock(&vm->notifier.list_lock);
 
-	*tv = tv_vm;
 	return 0;
-
-out_err:
-	if (tv_vm != tv_onstack)
-		kvfree(tv_vm);
-
-	return err;
-}
-
-/**
- * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
- * xe_vm_lock_dma_resv()
- * @vm: The vm.
- * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
- * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
- * @ww: The ww_acquire_context used for locking.
- * @objs: The list returned from xe_vm_lock_dma_resv().
- *
- * Unlocks the reservation objects and frees any memory allocated by
- * xe_vm_lock_dma_resv().
- */
-void xe_vm_unlock_dma_resv(struct xe_vm *vm,
-			   struct ttm_validate_buffer *tv_onstack,
-			   struct ttm_validate_buffer *tv,
-			   struct ww_acquire_ctx *ww,
-			   struct list_head *objs)
-{
-	/*
-	 * Nothing should've been able to enter the list while we were locked,
-	 * since we've held the dma-resvs of all the vm's external objects,
-	 * and holding the dma_resv of an object is required for list
-	 * addition, and we shouldn't add ourselves.
-	 */
-	XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
-
-	ttm_eu_backoff_reservation(ww, objs);
-	if (tv && tv != tv_onstack)
-		kvfree(tv);
 }
 
 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
@@ -538,14 +467,94 @@ static void xe_vm_kill(struct xe_vm *vm)
 	/* TODO: Inform user the VM is banned */
 }
 
+/**
+ * xe_vm_validate_should_retry() - Whether to retry after a validate error.
+ * @exec: The drm_exec object used for locking before validation.
+ * @err: The error returned from ttm_bo_validate().
+ * @end: A ktime_t cookie that should be set to 0 before first use and
+ * that should be reused on subsequent calls.
+ *
+ * With multiple active VMs, under memory pressure, it is possible that
+ * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+ * Until ttm properly handles locking in such scenarios, best thing the
+ * driver can do is retry with a timeout. Check if that is necessary, and
+ * if so unlock the drm_exec's objects while keeping the ticket to prepare
+ * for a rerun.
+ *
+ * Return: true if a retry after drm_exec_init() is recommended;
+ * false otherwise.
+ */
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
+{
+	ktime_t cur;
+
+	if (err != -ENOMEM)
+		return false;
+
+	cur = ktime_get();
+	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
+	if (!ktime_before(cur, *end))
+		return false;
+
+	/*
+	 * We would like to keep the ticket here with
+	 * drm_exec_unlock_all(), but WW mutex asserts currently
+	 * stop us from that. In any case this function could go away
+	 * with proper TTM -EDEADLK handling.
+	 */
+	drm_exec_fini(exec);
+
+	msleep(20);
+	return true;
+}
+
+static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
+				 bool *done)
+{
+	struct xe_vma *vma;
+	int err;
+
+	err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base,
+				   vm->preempt.num_exec_queues);
+	if (err)
+		return err;
+
+	if (xe_vm_is_idle(vm)) {
+		vm->preempt.rebind_deactivated = true;
+		*done = true;
+		return 0;
+	}
+
+	if (!preempt_fences_waiting(vm)) {
+		*done = true;
+		return 0;
+	}
+
+	err = xe_vm_lock_dma_resv(vm, exec, vm->preempt.num_exec_queues, false);
+	if (err)
+		return err;
+
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		return err;
+
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
+		if (xe_vma_has_no_bo(vma) ||
+		    vma->gpuva.flags & XE_VMA_DESTROYED)
+			continue;
+
+		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
 static void preempt_rebind_work_func(struct work_struct *w)
 {
 	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
-	struct xe_vma *vma;
-	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
-	struct ttm_validate_buffer *tv;
-	struct ww_acquire_ctx ww;
-	struct list_head objs;
+	struct drm_exec exec;
 	struct dma_fence *rebind_fence;
 	unsigned int fence_count = 0;
 	LIST_HEAD(preempt_fences);
@@ -588,42 +597,25 @@ retry:
 			goto out_unlock_outer;
 	}
 
-	err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
-				  false, vm->preempt.num_exec_queues);
-	if (err)
-		goto out_unlock_outer;
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
 
-	if (xe_vm_is_idle(vm)) {
-		vm->preempt.rebind_deactivated = true;
-		goto out_unlock;
-	}
-
-	/* Fresh preempt fences already installed. Everyting is running. */
-	if (!preempt_fences_waiting(vm))
-		goto out_unlock;
+	drm_exec_until_all_locked(&exec) {
+		bool done = false;
 
-	/*
-	 * This makes sure vm is completely suspended and also balances
-	 * xe_engine suspend- and resume; we resume *all* vm engines below.
-	 */
-	err = wait_for_existing_preempt_fences(vm);
-	if (err)
-		goto out_unlock;
+		err = xe_preempt_work_begin(&exec, vm, &done);
+		drm_exec_retry_on_contention(&exec);
+		if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
+			err = -EAGAIN;
+			goto out_unlock_outer;
+		}
+		if (err || done)
+			goto out_unlock;
+	}
 
 	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
 	if (err)
 		goto out_unlock;
 
-	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
-		if (xe_vma_has_no_bo(vma) ||
-		    vma->gpuva.flags & XE_VMA_DESTROYED)
-			continue;
-
-		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
-		if (err)
-			goto out_unlock;
-	}
-
 	rebind_fence = xe_vm_rebind(vm, true);
 	if (IS_ERR(rebind_fence)) {
 		err = PTR_ERR(rebind_fence);
@@ -668,30 +660,13 @@ retry:
 	up_read(&vm->userptr.notifier_lock);
 
 out_unlock:
-	xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+	drm_exec_fini(&exec);
 out_unlock_outer:
 	if (err == -EAGAIN) {
 		trace_xe_vm_rebind_worker_retry(vm);
 		goto retry;
 	}
 
-	/*
-	 * With multiple active VMs, under memory pressure, it is possible that
-	 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
-	 * Until ttm properly handles locking in such scenarios, best thing the
-	 * driver can do is retry with a timeout. Killing the VM or putting it
-	 * in error state after timeout or other error scenarios is still TBD.
-	 */
-	if (err == -ENOMEM) {
-		ktime_t cur = ktime_get();
-
-		end = end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
-		if (ktime_before(cur, end)) {
-			msleep(20);
-			trace_xe_vm_rebind_worker_retry(vm);
-			goto retry;
-		}
-	}
 	if (err) {
 		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
 		xe_vm_kill(vm);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index dd20e5c8106f0..a26e84c742f11 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -21,6 +21,7 @@ struct ttm_validate_buffer;
 struct xe_exec_queue;
 struct xe_file;
 struct xe_sync_entry;
+struct drm_exec;
 
 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
 
@@ -208,23 +209,10 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma);
 
 int xe_vma_userptr_check_repin(struct xe_vma *vma);
 
-/*
- * XE_ONSTACK_TV is used to size the tv_onstack array that is input
- * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
- */
-#define XE_ONSTACK_TV 20
-int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
-			struct ttm_validate_buffer *tv_onstack,
-			struct ttm_validate_buffer **tv,
-			struct list_head *objs,
-			bool intr,
-			unsigned int num_shared);
-
-void xe_vm_unlock_dma_resv(struct xe_vm *vm,
-			   struct ttm_validate_buffer *tv_onstack,
-			   struct ttm_validate_buffer *tv,
-			   struct ww_acquire_ctx *ww,
-			   struct list_head *objs);
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
+
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
+			unsigned int num_shared, bool lock_vm);
 
 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
 			     enum dma_resv_usage usage);
-- 
GitLab


From 2714d50936200a65ae52f431b0c004b31655239f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 8 Sep 2023 11:17:15 +0200
Subject: [PATCH 1878/2340] drm/xe: Convert pagefaulting code to use drm_exec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the calls into ttm_eu_reserve_buffers with the drm_exec helpers.
Also reuse some code.

v4:
- Kerneldoc xe_vm_prepare_vma().

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-6-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 107 ++++++++++++---------------
 drivers/gpu/drm/xe/xe_vm.c           |  36 ++++++++-
 drivers/gpu/drm/xe/xe_vm.h           |   3 +
 3 files changed, 84 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index bdef4b76028b1..e1e067d3bb873 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -8,6 +8,7 @@
 #include <linux/bitfield.h>
 #include <linux/circ_buf.h>
 
+#include <drm/drm_exec.h>
 #include <drm/drm_managed.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 
@@ -84,11 +85,6 @@ static bool vma_matches(struct xe_vma *vma, u64 page_addr)
 	return true;
 }
 
-static bool only_needs_bo_lock(struct xe_bo *bo)
-{
-	return bo && bo->vm;
-}
-
 static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
 {
 	struct xe_vma *vma = NULL;
@@ -103,17 +99,45 @@ static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
 	return vma;
 }
 
+static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
+		       bool atomic, unsigned int id)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	unsigned int num_shared = 2; /* slots for bind + move */
+	int err;
+
+	err = xe_vm_prepare_vma(exec, vma, num_shared);
+	if (err)
+		return err;
+
+	if (atomic) {
+		if (xe_vma_is_userptr(vma)) {
+			err = -EACCES;
+			return err;
+		}
+
+		/* Migrate to VRAM, move should invalidate the VMA first */
+		err = xe_bo_migrate(bo, XE_PL_VRAM0 + id);
+		if (err)
+			return err;
+	} else if (bo) {
+		/* Create backing store if needed */
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
 	struct xe_vm *vm;
 	struct xe_vma *vma = NULL;
-	struct xe_bo *bo;
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
-	struct ttm_validate_buffer tv_bo, tv_vm;
-	struct ww_acquire_ctx ww;
 	struct dma_fence *fence;
 	bool write_locked;
 	int ret = 0;
@@ -170,35 +194,10 @@ retry_userptr:
 	}
 
 	/* Lock VM and BOs dma-resv */
-	bo = xe_vma_bo(vma);
-	if (!only_needs_bo_lock(bo)) {
-		tv_vm.num_shared = xe->info.tile_count;
-		tv_vm.bo = xe_vm_ttm_bo(vm);
-		list_add(&tv_vm.head, &objs);
-	}
-	if (bo) {
-		tv_bo.bo = &bo->ttm;
-		tv_bo.num_shared = xe->info.tile_count;
-		list_add(&tv_bo.head, &objs);
-	}
-
-	ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
-	if (ret)
-		goto unlock_vm;
-
-	if (atomic) {
-		if (xe_vma_is_userptr(vma)) {
-			ret = -EACCES;
-			goto unlock_dma_resv;
-		}
-
-		/* Migrate to VRAM, move should invalidate the VMA first */
-		ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id);
-		if (ret)
-			goto unlock_dma_resv;
-	} else if (bo) {
-		/* Create backing store if needed */
-		ret = xe_bo_validate(bo, vm, true);
+	drm_exec_init(&exec, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, atomic, tile->id);
+		drm_exec_retry_on_contention(&exec);
 		if (ret)
 			goto unlock_dma_resv;
 	}
@@ -225,7 +224,7 @@ retry_userptr:
 	vma->usm.tile_invalidated &= ~BIT(tile->id);
 
 unlock_dma_resv:
-	ttm_eu_backoff_reservation(&ww, &objs);
+	drm_exec_fini(&exec);
 unlock_vm:
 	if (!ret)
 		vm->usm.last_fault_vma = vma;
@@ -490,13 +489,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
 	struct xe_vm *vm;
 	struct xe_vma *vma;
-	struct xe_bo *bo;
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
-	struct ttm_validate_buffer tv_bo, tv_vm;
-	struct ww_acquire_ctx ww;
 	int ret = 0;
 
 	/* We only support ACC_TRIGGER at the moment */
@@ -528,23 +523,15 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
 		goto unlock_vm;
 
 	/* Lock VM and BOs dma-resv */
-	bo = xe_vma_bo(vma);
-	if (!only_needs_bo_lock(bo)) {
-		tv_vm.num_shared = xe->info.tile_count;
-		tv_vm.bo = xe_vm_ttm_bo(vm);
-		list_add(&tv_vm.head, &objs);
+	drm_exec_init(&exec, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, true, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (ret)
+			break;
 	}
-	tv_bo.bo = &bo->ttm;
-	tv_bo.num_shared = xe->info.tile_count;
-	list_add(&tv_bo.head, &objs);
-	ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
-	if (ret)
-		goto unlock_vm;
-
-	/* Migrate to VRAM, move should invalidate the VMA first */
-	ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id);
 
-	ttm_eu_backoff_reservation(&ww, &objs);
+	drm_exec_fini(&exec);
 unlock_vm:
 	up_read(&vm->lock);
 	xe_vm_put(vm);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 80b374b9cdd1e..52c5235677c51 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -35,6 +35,11 @@
 
 #define TEST_VM_ASYNC_OPS_ERROR
 
+static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
+{
+	return vm->gpuvm.r_obj;
+}
+
 /**
  * xe_vma_userptr_check_repin() - Advisory check for repin needed
  * @vma: The userptr vma
@@ -422,7 +427,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
 	lockdep_assert_held(&vm->lock);
 
 	if (lock_vm) {
-		err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base, num_shared);
+		err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
 		if (err)
 			return err;
 	}
@@ -514,7 +519,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 	struct xe_vma *vma;
 	int err;
 
-	err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base,
+	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm),
 				   vm->preempt.num_exec_queues);
 	if (err)
 		return err;
@@ -1095,6 +1100,33 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	}
 }
 
+/**
+ * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * @exec: The drm_exec object we're currently locking for.
+ * @vma: The vma for witch we want to lock the vm resv and any attached
+ * object's resv.
+ * @num_shared: The number of dma-fence slots to pre-allocate in the
+ * objects' reservation objects.
+ *
+ * Return: 0 on success, negative error code on error. In particular
+ * may return -EDEADLK on WW transaction contention and -EINTR if
+ * an interruptible wait is terminated by a signal.
+ */
+int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
+		      unsigned int num_shared)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err;
+
+	XE_WARN_ON(!vm);
+	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
+	if (!err && bo && !bo->vm)
+		err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+
+	return err;
+}
+
 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
 {
 	struct ttm_validate_buffer tv[2];
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index a26e84c742f11..ad9ff2b39a306 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -219,6 +219,9 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
 
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
 
+int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
+		      unsigned int num_shared);
+
 /**
  * xe_vm_resv() - Return's the vm's reservation object
  * @vm: The vm
-- 
GitLab


From 1f72718215ff2763653a82d9cbc41bfed3186caa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 8 Sep 2023 11:17:16 +0200
Subject: [PATCH 1879/2340] drm/xe: Convert remaining instances of
 ttm_eu_reserve_buffers to drm_exec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VM_BIND functionality and vma destruction was locking
potentially multiple dma_resv objects using the
ttm_eu_reserve_buffers() function. Rework those to use the drm_exec
helper, taking care that any calls to xe_bo_validate() ends up
inside an unsealed locking transaction.

v4:
- Remove an unbalanced xe_bo_put() (igt and Matthew Brost)
v5:
- Rebase conflict

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-7-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 103 +++++++++++++------------------------
 drivers/gpu/drm/xe/xe_vm.h |   2 -
 2 files changed, 36 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 52c5235677c51..4def60249381b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1129,29 +1129,20 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
 
 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
 {
-	struct ttm_validate_buffer tv[2];
-	struct ww_acquire_ctx ww;
-	struct xe_bo *bo = xe_vma_bo(vma);
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
+	struct drm_exec exec;
 	int err;
 
-	memset(tv, 0, sizeof(tv));
-	tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma));
-	list_add(&tv[0].head, &objs);
-
-	if (bo) {
-		tv[1].bo = &xe_bo_get(bo)->ttm;
-		list_add(&tv[1].head, &objs);
+	drm_exec_init(&exec, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_vm_prepare_vma(&exec, vma, 0);
+		drm_exec_retry_on_contention(&exec);
+		if (XE_WARN_ON(err))
+			break;
 	}
-	err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
-	XE_WARN_ON(err);
 
 	xe_vma_destroy(vma, NULL);
 
-	ttm_eu_backoff_reservation(&ww, &objs);
-	if (bo)
-		xe_bo_put(bo);
+	drm_exec_fini(&exec);
 }
 
 struct xe_vma *
@@ -2142,21 +2133,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 #define VM_BIND_OP(op)	(op & 0xffff)
 
-struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
-{
-	int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
-		XE_VM_FLAG_TILE_ID(vm->flags) : 0;
-
-	/* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
-	return &vm->pt_root[idx]->bo->ttm;
-}
-
-static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
-{
-	tv->num_shared = 1;
-	tv->bo = xe_vm_ttm_bo(vm);
-}
-
 static void vm_set_async_error(struct xe_vm *vm, int err)
 {
 	lockdep_assert_held(&vm->lock);
@@ -2668,42 +2644,16 @@ free_fence:
 	return err;
 }
 
-static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
-			       struct xe_vma_op *op)
+static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
+		      struct xe_vma *vma, struct xe_vma_op *op)
 {
-	LIST_HEAD(objs);
-	LIST_HEAD(dups);
-	struct ttm_validate_buffer tv_bo, tv_vm;
-	struct ww_acquire_ctx ww;
-	struct xe_bo *vbo;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	xe_vm_tv_populate(vm, &tv_vm);
-	list_add_tail(&tv_vm.head, &objs);
-	vbo = xe_vma_bo(vma);
-	if (vbo) {
-		/*
-		 * An unbind can drop the last reference to the BO and
-		 * the BO is needed for ttm_eu_backoff_reservation so
-		 * take a reference here.
-		 */
-		xe_bo_get(vbo);
-
-		if (!vbo->vm) {
-			tv_bo.bo = &vbo->ttm;
-			tv_bo.num_shared = 1;
-			list_add(&tv_bo.head, &objs);
-		}
-	}
-
-again:
-	err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
-	if (err) {
-		xe_bo_put(vbo);
+	err = xe_vm_prepare_vma(exec, vma, 1);
+	if (err)
 		return err;
-	}
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -2782,17 +2732,36 @@ again:
 		XE_WARN_ON("NOT POSSIBLE");
 	}
 
-	ttm_eu_backoff_reservation(&ww, &objs);
+	if (err)
+		trace_xe_vma_fail(vma);
+
+	return err;
+}
+
+static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+			       struct xe_vma_op *op)
+{
+	struct drm_exec exec;
+	int err;
+
+retry_userptr:
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_until_all_locked(&exec) {
+		err = op_execute(&exec, vm, vma, op);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			break;
+	}
+	drm_exec_fini(&exec);
+
 	if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
 		lockdep_assert_held_write(&vm->lock);
 		err = xe_vma_userptr_pin_pages(vma);
 		if (!err)
-			goto again;
-	}
-	xe_bo_put(vbo);
+			goto retry_userptr;
 
-	if (err)
 		trace_xe_vma_fail(vma);
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index ad9ff2b39a306..cc9dfd8cb7701 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -180,8 +180,6 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence);
 
 extern struct ttm_device_funcs xe_ttm_funcs;
 
-struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
-
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
 {
 	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
-- 
GitLab


From 30278e299646a1a8f9c1fd1da33768440f71bb42 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 11 Sep 2023 14:10:32 -0700
Subject: [PATCH 1880/2340] drm/xe: Fix fence reservation accouting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both execs and the preempt rebind worker can issue rebinds. Rebinds
require a fence, per tile, inserted into dma-resv slots of the VM and
BO (if external). The fence reservation accouting did not take into
account the number of fences required for rebinds, fix this.

v2: Rebase

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reported-by: Christopher Snowhill <kode54@gmail.com>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/518
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c | 6 +++++-
 drivers/gpu/drm/xe/xe_vm.c   | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index eb7fc3192c222..293960efcd22c 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -103,7 +103,11 @@ static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
 	if (xe_vm_no_dma_fences(vm))
 		return 0;
 
-	err = xe_vm_lock_dma_resv(vm, exec, 1, true);
+	/*
+	 * 1 fence for job from exec plus a fence for each tile from a possible
+	 * rebind
+	 */
+	err = xe_vm_lock_dma_resv(vm, exec, 1 + vm->xe->info.tile_count, true);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4def60249381b..89f7428f00d79 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -519,8 +519,13 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 	struct xe_vma *vma;
 	int err;
 
+	/*
+	 * 1 fence for each preempt fence plus a fence for each tile from a
+	 * possible rebind
+	 */
 	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm),
-				   vm->preempt.num_exec_queues);
+				   vm->preempt.num_exec_queues +
+				   vm->xe->info.tile_count);
 	if (err)
 		return err;
 
-- 
GitLab


From 5c0553cdc811bb6af4f1bfef178bd07fc16a797e Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 12 Sep 2023 08:36:33 +0000
Subject: [PATCH 1881/2340] drm/xe: Replace XE_WARN_ON with drm_warn when just
 printing a string

Use the generic drm_warn instead of the driver-specific XE_WARN_ON
in cases where XE_WARN_ON is used to unconditionally print a debug
message.

v2: Rebase

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c   |  5 +++--
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  3 ++-
 drivers/gpu/drm/xe/xe_guc_ct.c       |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c   |  9 ++++++---
 drivers/gpu/drm/xe/xe_vm.c           | 18 +++++++++---------
 drivers/gpu/drm/xe/xe_vm_madvise.c   |  2 +-
 6 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index b871e45af813d..9229fd5b01cc1 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -151,6 +151,7 @@ static const struct drm_info_list debugfs_list[] = {
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
 	struct dentry *root;
 	struct drm_info_list *local;
@@ -162,7 +163,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	sprintf(name, "gt%d", gt->info.id);
 	root = debugfs_create_dir(name, minor->debugfs_root);
 	if (IS_ERR(root)) {
-		XE_WARN_ON("Create GT directory failed");
+		drm_warn(&xe->drm, "Create GT directory failed");
 		return;
 	}
 
@@ -172,7 +173,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	 * passed in (e.g. can't define this on the stack).
 	 */
 #define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
-	local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL);
 	if (!local)
 		return;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index e1e067d3bb873..4e33ef8c9d6ac 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -322,6 +322,7 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe = gt_to_xe(gt);
 	struct pf_queue *pf_queue;
 	unsigned long flags;
 	u32 asid;
@@ -340,7 +341,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
 		queue_work(gt->usm.pf_wq, &pf_queue->worker);
 	} else {
-		XE_WARN_ON("PF Queue full, shouldn't be possible");
+		drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
 	}
 	spin_unlock_irqrestore(&pf_queue->lock, flags);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index b92e04ba8f63c..13f2bd586f6a5 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1022,7 +1022,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
 							   adj_len);
 		break;
 	default:
-		XE_WARN_ON("NOT_POSSIBLE");
+		drm_warn(&xe->drm, "NOT_POSSIBLE");
 	}
 
 	if (ret)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 832e79fb0a025..50509891a2880 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -703,6 +703,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
 					  struct xe_exec_queue *q)
 {
 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
+	struct xe_device *xe = guc_to_xe(guc);
 	int ret;
 
 	set_min_preemption_timeout(guc, q);
@@ -712,7 +713,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
 	if (!ret) {
 		struct xe_gpu_scheduler *sched = &q->guc->sched;
 
-		XE_WARN_ON("Pending enable failed to respond");
+		drm_warn(&xe->drm, "Pending enable failed to respond");
 		xe_sched_submission_start(sched);
 		xe_gt_reset_async(q->gt);
 		xe_sched_tdr_queue_imm(sched);
@@ -794,6 +795,8 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	struct xe_guc_exec_queue *ge =
 		container_of(w, struct xe_guc_exec_queue, lr_tdr);
 	struct xe_exec_queue *q = ge->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &ge->sched;
 
 	XE_WARN_ON(!xe_exec_queue_is_lr(q));
@@ -828,7 +831,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 					 !exec_queue_pending_disable(q) ||
 					 guc_read_stopped(guc), HZ * 5);
 		if (!ret) {
-			XE_WARN_ON("Schedule disable failed to respond");
+			drm_warn(&xe->drm, "Schedule disable failed to respond");
 			xe_sched_submission_start(sched);
 			xe_gt_reset_async(q->gt);
 			return;
@@ -906,7 +909,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 					 !exec_queue_pending_disable(q) ||
 					 guc_read_stopped(guc), HZ * 5);
 		if (!ret || guc_read_stopped(guc)) {
-			XE_WARN_ON("Schedule disable failed to respond");
+			drm_warn(&xe->drm, "Schedule disable failed to respond");
 			xe_sched_add_pending_job(sched, job);
 			xe_sched_submission_start(sched);
 			xe_gt_reset_async(q->gt);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 89f7428f00d79..66e8aeb203c96 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1401,7 +1401,7 @@ static void vm_error_capture(struct xe_vm *vm, int err,
 	}
 
 	if (copy_to_user(address, &capture, sizeof(capture)))
-		XE_WARN_ON("Copy to user failed");
+		drm_warn(&vm->xe->drm, "Copy to user failed");
 
 	if (in_kthread) {
 		kthread_unuse_mm(vm->async_ops.error_capture.mm);
@@ -2176,7 +2176,7 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
 			return -ENODATA;
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&xe->drm, "NOT POSSIBLE");
 		return -EINVAL;
 	}
 
@@ -2234,7 +2234,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&xe->drm, "NOT POSSIBLE");
 	}
 }
 #else
@@ -2332,7 +2332,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		}
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 		ops = ERR_PTR(-EINVAL);
 	}
 
@@ -2463,7 +2463,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 		op->flags |= XE_VMA_OP_COMMITTED;
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
 	return err;
@@ -2619,7 +2619,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 			/* Nothing to do */
 			break;
 		default:
-			XE_WARN_ON("NOT POSSIBLE");
+			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 		}
 
 		last_op = op;
@@ -2734,7 +2734,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 				     op->flags & XE_VMA_OP_LAST);
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
 	if (err)
@@ -2812,7 +2812,7 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 					  op);
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
 	return ret;
@@ -2893,7 +2893,7 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 		/* Nothing to do */
 		break;
 	default:
-		XE_WARN_ON("NOT POSSIBLE");
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 0648274b90b96..0ef7d483d0501 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -186,7 +186,7 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
 static int madvise_pin(struct xe_device *xe, struct xe_vm *vm,
 		       struct xe_vma **vmas, int num_vmas, u64 value)
 {
-	XE_WARN_ON("NIY");
+	drm_warn(&xe->drm, "NIY");
 	return 0;
 }
 
-- 
GitLab


From 1975b5917a94429096f6a2cccc97ed91e0425708 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 12 Sep 2023 20:29:56 +0200
Subject: [PATCH 1882/2340] drm/xe: Introduce Xe assert macros

As we are moving away from the controversial XE_BUG_ON macro,
relying just on WARN_ON or drm_err does not cover the cases
where we want to annotate functions with additional detailed
debug checks to assert that all prerequisites are satisfied,
without paying footprint or performance penalty on non-debug
builds, where all misuses introduced during code integration
were already fixed.

Introduce family of Xe assert macros that try to follow classic
assert() utility and can be compiled out on non-debug builds.

Macros are based on drm_WARN, but unlikely to origin, disallow
use in expressions since we will compile that code out.

As we are operating on the xe pointers, we can print additional
information about the device, like tile or GT identifier, that
is not available from generic WARN report:

[ ] xe 0000:00:02.0: [drm] Assertion `true == false` failed!
    platform: 1 subplatform: 1
    graphics: Xe_LP 12.00 step B0
    media: Xe_M 12.00 step B0
    display: enabled step D0
    tile: 0 VRAM 0 B
    GT: 0 type 1

[ ] xe 0000:b3:00.0: [drm] Assertion `true == false` failed!
    platform: 7 subplatform: 3
    graphics: Xe_HPG 12.55 step A1
    media: Xe_HPM 12.55 step A1
    display: disabled step **
    tile: 0 VRAM 14.0 GiB
    GT: 0 type 1

[ ] WARNING: CPU: 0 PID: 2687 at drivers/gpu/drm/xe/xe_device.c:281 xe_device_probe+0x374/0x520 [xe]
[ ] RIP: 0010:xe_device_probe+0x374/0x520 [xe]
[ ] Call Trace:
[ ]  ? __warn+0x7b/0x160
[ ]  ? xe_device_probe+0x374/0x520 [xe]
[ ]  ? report_bug+0x1c3/0x1d0
[ ]  ? handle_bug+0x42/0x70
[ ]  ? exc_invalid_op+0x14/0x70
[ ]  ? asm_exc_invalid_op+0x16/0x20
[ ]  ? xe_device_probe+0x374/0x520 [xe]
[ ]  ? xe_device_probe+0x374/0x520 [xe]
[ ]  xe_pci_probe+0x6e3/0x950 [xe]
[ ]  ? lockdep_hardirqs_on+0xc7/0x140
[ ]  pci_device_probe+0x9e/0x160
[ ]  really_probe+0x19d/0x400

v2: use lowercase names
v3: apply xe coding style
v4: fix non-debug build and improve kernel-doc

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Oded Gabbay <ogabbay@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_assert.h | 174 +++++++++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_assert.h

diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
new file mode 100644
index 0000000000000..962aac1bc7648
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ASSERT_H_
+#define _XE_ASSERT_H_
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_step.h"
+
+/**
+ * DOC: Xe ASSERTs
+ *
+ * While Xe driver aims to be simpler than legacy i915 driver it is still
+ * complex enough that some changes introduced while adding new functionality
+ * could break the existing code.
+ *
+ * Adding &drm_WARN or &drm_err to catch unwanted programming usage could lead
+ * to undesired increased driver footprint and may impact production driver
+ * performance as this additional code will be always present.
+ *
+ * To allow annotate functions with additional detailed debug checks to assert
+ * that all prerequisites are satisfied, without worrying about footprint or
+ * performance penalty on production builds where all potential misuses
+ * introduced during code integration were already fixed, we introduce family
+ * of Xe assert macros that try to follow classic assert() utility:
+ *
+ *  * xe_assert()
+ *  * xe_tile_assert()
+ *  * xe_gt_assert()
+ *
+ * These macros are implemented on top of &drm_WARN, but unlikely to the origin,
+ * warning is triggered when provided condition is false. Additionally all above
+ * assert macros cannot be used in expressions or as a condition, since
+ * underlying code will be compiled out on non-debug builds.
+ *
+ * Note that these macros are not intended for use to cover known gaps in the
+ * implementation; for such cases use regular &drm_WARN or &drm_err and provide
+ * valid safe fallback.
+ *
+ * Also in cases where performance or footprint is not an issue, developers
+ * should continue to use the regular &drm_WARN or &drm_err to ensure that bug
+ * reports from production builds will contain meaningful diagnostics data.
+ *
+ * Below code shows how asserts could help in debug to catch unplanned use::
+ *
+ *	static void one_igfx(struct xe_device *xe)
+ *	{
+ *		xe_assert(xe, xe->info.is_dgfx == false);
+ *		xe_assert(xe, xe->info.tile_count == 1);
+ *	}
+ *
+ *	static void two_dgfx(struct xe_device *xe)
+ *	{
+ *		xe_assert(xe, xe->info.is_dgfx);
+ *		xe_assert(xe, xe->info.tile_count == 2);
+ *	}
+ *
+ *	void foo(struct xe_device *xe)
+ *	{
+ *		if (xe->info.dgfx)
+ *			return two_dgfx(xe);
+ *		return one_igfx(xe);
+ *	}
+ *
+ *	void bar(struct xe_device *xe)
+ *	{
+ *		if (drm_WARN_ON(xe->drm, xe->info.tile_count > 2))
+ *			return;
+ *
+ *		if (xe->info.tile_count == 2)
+ *			return two_dgfx(xe);
+ *		return one_igfx(xe);
+ *	}
+ */
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define __xe_assert_msg(xe, condition, msg, arg...) ({						\
+	(void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg,	\
+		       __stringify(condition), ## arg);						\
+})
+#else
+#define __xe_assert_msg(xe, condition, msg, arg...) ({						\
+	typecheck(struct xe_device *, xe);							\
+	BUILD_BUG_ON_INVALID(condition);							\
+})
+#endif
+
+/**
+ * xe_assert - warn if condition is false when debugging.
+ * @xe: the &struct xe_device pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_assert() uses &drm_WARN to emit a warning and print additional information
+ * that could be read from the &xe pointer if provided &condition is false.
+ *
+ * Contrary to &drm_WARN, xe_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "")
+#define xe_assert_msg(xe, condition, msg, arg...) ({						\
+	struct xe_device *__xe = (xe);								\
+	__xe_assert_msg(__xe, condition,							\
+			"platform: %d subplatform: %d\n"					\
+			"graphics: %s %u.%02u step %s\n"					\
+			"media: %s %u.%02u step %s\n"						\
+			msg,									\
+			__xe->info.platform, __xe->info.subplatform,				\
+			__xe->info.graphics_name,						\
+			__xe->info.graphics_verx100 / 100,					\
+			__xe->info.graphics_verx100 % 100,					\
+			xe_step_name(__xe->info.step.graphics),					\
+			__xe->info.media_name,							\
+			__xe->info.media_verx100 / 100,						\
+			__xe->info.media_verx100 % 100,						\
+			xe_step_name(__xe->info.step.media),					\
+			## arg);								\
+})
+
+/**
+ * xe_tile_assert - warn if condition is false when debugging.
+ * @tile: the &struct xe_tile pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_tile_assert() uses &drm_WARN to emit a warning and print additional
+ * information that could be read from the &tile pointer if provided &condition
+ * is false.
+ *
+ * Contrary to &drm_WARN, xe_tile_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "")
+#define xe_tile_assert_msg(tile, condition, msg, arg...) ({					\
+	struct xe_tile *__tile = (tile);							\
+	char __buf[10] __maybe_unused;								\
+	xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg,			\
+		      __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1,	\
+				     STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg);	\
+})
+
+/**
+ * xe_gt_assert - warn if condition is false when debugging.
+ * @gt: the &struct xe_gt pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_gt_assert() uses &drm_WARN to emit a warning and print additional
+ * information that could be safetely read from the &gt pointer if provided
+ * &condition is false.
+ *
+ * Contrary to &drm_WARN, xe_gt_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "")
+#define xe_gt_assert_msg(gt, condition, msg, arg...) ({						\
+	struct xe_gt *__gt = (gt);								\
+	xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg,			\
+			   __gt->info.id, __gt->info.type, ## arg);				\
+})
+
+#endif
-- 
GitLab


From c73acc1eeba5e380a367087cb7b933b946613ee7 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 12 Sep 2023 08:36:35 +0000
Subject: [PATCH 1883/2340] drm/xe: Use Xe assert macros instead of XE_WARN_ON
 macro

The XE_WARN_ON macro maps to WARN_ON which is not justified
in many cases where only a simple debug check is needed.
Replace the use of the XE_WARN_ON macro with the new xe_assert
macros which relies on drm_*. This takes a struct drm_device
argument, which is one of the main changes in this commit. The
other main change is that the condition is reversed, as with
XE_WARN_ON a message is displayed if the condition is true,
whereas with xe_assert it is if the condition is false.

v2:
- Rebase
- Keep WARN splats in xe_wopcm.c (Matt Roper)

v3:
- Rebase

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bb.c                  |  8 +--
 drivers/gpu/drm/xe/xe_bo.c                  | 64 ++++++++++----------
 drivers/gpu/drm/xe/xe_bo_evict.c            |  4 +-
 drivers/gpu/drm/xe/xe_device.c              |  6 +-
 drivers/gpu/drm/xe/xe_exec.c                |  2 +-
 drivers/gpu/drm/xe/xe_execlist.c            | 12 ++--
 drivers/gpu/drm/xe/xe_force_wake.c          |  4 +-
 drivers/gpu/drm/xe/xe_force_wake.h          |  6 +-
 drivers/gpu/drm/xe/xe_ggtt.c                | 14 ++---
 drivers/gpu/drm/xe/xe_gt.c                  |  1 +
 drivers/gpu/drm/xe/xe_gt_clock.c            |  2 +-
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |  2 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++--
 drivers/gpu/drm/xe/xe_guc.c                 | 24 ++++----
 drivers/gpu/drm/xe/xe_guc_ads.c             | 22 +++----
 drivers/gpu/drm/xe/xe_guc_ct.c              | 29 ++++-----
 drivers/gpu/drm/xe/xe_guc_log.c             |  4 +-
 drivers/gpu/drm/xe/xe_guc_pc.c              |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c          | 65 +++++++++++++--------
 drivers/gpu/drm/xe/xe_huc.c                 |  3 +-
 drivers/gpu/drm/xe/xe_hw_engine.c           | 11 ++--
 drivers/gpu/drm/xe/xe_lrc.c                 |  9 +--
 drivers/gpu/drm/xe/xe_migrate.c             | 31 +++++-----
 drivers/gpu/drm/xe/xe_mocs.c                |  2 +-
 drivers/gpu/drm/xe/xe_pt.c                  | 14 ++---
 drivers/gpu/drm/xe/xe_ring_ops.c            | 13 +++--
 drivers/gpu/drm/xe/xe_sched_job.c           |  2 +-
 drivers/gpu/drm/xe/xe_uc.c                  |  2 +-
 drivers/gpu/drm/xe/xe_uc_fw.c               | 16 ++---
 drivers/gpu/drm/xe/xe_vm.c                  | 48 +++++++--------
 drivers/gpu/drm/xe/xe_vm.h                  |  2 +-
 31 files changed, 235 insertions(+), 201 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 1fbc2fcddc96b..f871ba82bc9ba 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -66,7 +66,7 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
 
 	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 
-	WARN_ON(bb->len * 4 + bb_prefetch(q->gt) > size);
+	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
 
 	xe_sa_bo_flush_write(bb->bo);
 
@@ -84,8 +84,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
 		4 * second_idx,
 	};
 
-	XE_WARN_ON(second_idx > bb->len);
-	XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION));
+	xe_gt_assert(q->gt, second_idx <= bb->len);
+	xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION);
 
 	return __xe_bb_create_job(q, bb, addr);
 }
@@ -95,7 +95,7 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
 {
 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
 
-	XE_WARN_ON(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION);
+	xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION));
 	return __xe_bb_create_job(q, bb, &addr);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index c33a204200224..998efceb84a4c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -103,7 +103,7 @@ static bool xe_bo_is_user(struct xe_bo *bo)
 static struct xe_tile *
 mem_type_to_tile(struct xe_device *xe, u32 mem_type)
 {
-	XE_WARN_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type));
+	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
 
 	return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
 }
@@ -142,7 +142,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 	struct ttm_place place = { .mem_type = mem_type };
 	u64 io_size = tile->mem.vram.io_size;
 
-	XE_WARN_ON(!tile->mem.vram.usable_size);
+	xe_assert(xe, tile->mem.vram.usable_size);
 
 	/*
 	 * For eviction / restore on suspend / resume objects
@@ -544,10 +544,11 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
 					       ttm);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 	struct sg_table *sg;
 
-	XE_WARN_ON(!attach);
-	XE_WARN_ON(!ttm_bo->ttm);
+	xe_assert(xe, attach);
+	xe_assert(xe, ttm_bo->ttm);
 
 	if (new_res->mem_type == XE_PL_SYSTEM)
 		goto out;
@@ -709,8 +710,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	else if (mem_type_is_vram(old_mem_type))
 		tile = mem_type_to_tile(xe, old_mem_type);
 
-	XE_WARN_ON(!tile);
-	XE_WARN_ON(!tile->migrate);
+	xe_assert(xe, tile);
+	xe_tile_assert(tile, tile->migrate);
 
 	trace_xe_bo_move(bo);
 	xe_device_mem_access_get(xe);
@@ -740,7 +741,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 					goto out;
 				}
 
-				XE_WARN_ON(new_mem->start !=
+				xe_assert(xe, new_mem->start ==
 					  bo->placements->fpfn);
 
 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
@@ -939,9 +940,10 @@ static void __xe_bo_vunmap(struct xe_bo *bo);
  */
 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
 {
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 	bool locked;
 
-	XE_WARN_ON(kref_read(&ttm_bo->kref));
+	xe_assert(xe, !kref_read(&ttm_bo->kref));
 
 	/*
 	 * We can typically only race with TTM trylocking under the
@@ -952,7 +954,7 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
 	spin_lock(&ttm_bo->bdev->lru_lock);
 	locked = dma_resv_trylock(ttm_bo->base.resv);
 	spin_unlock(&ttm_bo->bdev->lru_lock);
-	XE_WARN_ON(!locked);
+	xe_assert(xe, locked);
 
 	return locked;
 }
@@ -968,7 +970,7 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
 		return;
 
 	bo = ttm_to_xe_bo(ttm_bo);
-	XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount));
+	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
 
 	/*
 	 * Corner case where TTM fails to allocate memory and this BOs resv
@@ -1041,12 +1043,13 @@ struct ttm_device_funcs xe_ttm_funcs = {
 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 {
 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 
 	if (bo->ttm.base.import_attach)
 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
 	drm_gem_object_release(&bo->ttm.base);
 
-	WARN_ON(!list_empty(&bo->vmas));
+	xe_assert(xe, list_empty(&bo->vmas));
 
 	if (bo->ggtt_node.size)
 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
@@ -1082,7 +1085,7 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
 	struct xe_bo *bo = gem_to_xe_bo(obj);
 
 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
-		XE_WARN_ON(!xe_bo_is_user(bo));
+		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
 
 		xe_bo_lock(bo, false);
 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
@@ -1198,7 +1201,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	int err;
 
 	/* Only kernel objects should set GT */
-	XE_WARN_ON(tile && type != ttm_bo_type_kernel);
+	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
 
 	if (XE_WARN_ON(!size)) {
 		xe_bo_free(bo);
@@ -1354,7 +1357,7 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
 			tile = xe_device_get_root_tile(xe);
 
-		XE_WARN_ON(!tile);
+		xe_assert(xe, tile);
 
 		if (flags & XE_BO_CREATE_STOLEN_BIT &&
 		    flags & XE_BO_FIXED_PLACEMENT_BIT) {
@@ -1485,8 +1488,8 @@ int xe_bo_pin_external(struct xe_bo *bo)
 	struct xe_device *xe = xe_bo_device(bo);
 	int err;
 
-	XE_WARN_ON(bo->vm);
-	XE_WARN_ON(!xe_bo_is_user(bo));
+	xe_assert(xe, !bo->vm);
+	xe_assert(xe, xe_bo_is_user(bo));
 
 	if (!xe_bo_is_pinned(bo)) {
 		err = xe_bo_validate(bo, NULL, false);
@@ -1518,20 +1521,20 @@ int xe_bo_pin(struct xe_bo *bo)
 	int err;
 
 	/* We currently don't expect user BO to be pinned */
-	XE_WARN_ON(xe_bo_is_user(bo));
+	xe_assert(xe, !xe_bo_is_user(bo));
 
 	/* Pinned object must be in GGTT or have pinned flag */
-	XE_WARN_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT |
-				 XE_BO_CREATE_GGTT_BIT)));
+	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
+				   XE_BO_CREATE_GGTT_BIT));
 
 	/*
 	 * No reason we can't support pinning imported dma-bufs we just don't
 	 * expect to pin an imported dma-buf.
 	 */
-	XE_WARN_ON(bo->ttm.base.import_attach);
+	xe_assert(xe, !bo->ttm.base.import_attach);
 
 	/* We only expect at most 1 pin */
-	XE_WARN_ON(xe_bo_is_pinned(bo));
+	xe_assert(xe, !xe_bo_is_pinned(bo));
 
 	err = xe_bo_validate(bo, NULL, false);
 	if (err)
@@ -1547,7 +1550,7 @@ int xe_bo_pin(struct xe_bo *bo)
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
-			XE_WARN_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
+			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
 
 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
@@ -1584,9 +1587,9 @@ void xe_bo_unpin_external(struct xe_bo *bo)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 
-	XE_WARN_ON(bo->vm);
-	XE_WARN_ON(!xe_bo_is_pinned(bo));
-	XE_WARN_ON(!xe_bo_is_user(bo));
+	xe_assert(xe, !bo->vm);
+	xe_assert(xe, xe_bo_is_pinned(bo));
+	xe_assert(xe, xe_bo_is_user(bo));
 
 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
 		spin_lock(&xe->pinned.lock);
@@ -1607,15 +1610,15 @@ void xe_bo_unpin(struct xe_bo *bo)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 
-	XE_WARN_ON(bo->ttm.base.import_attach);
-	XE_WARN_ON(!xe_bo_is_pinned(bo));
+	xe_assert(xe, !bo->ttm.base.import_attach);
+	xe_assert(xe, xe_bo_is_pinned(bo));
 
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
 	    bo->flags & XE_BO_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
-			XE_WARN_ON(list_empty(&bo->pinned_link));
+			xe_assert(xe, !list_empty(&bo->pinned_link));
 
 			spin_lock(&xe->pinned.lock);
 			list_del_init(&bo->pinned_link);
@@ -1676,15 +1679,16 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
  */
 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
 {
+	struct xe_device *xe = xe_bo_device(bo);
 	struct xe_res_cursor cur;
 	u64 page;
 
-	XE_WARN_ON(page_size > PAGE_SIZE);
+	xe_assert(xe, page_size <= PAGE_SIZE);
 	page = offset >> PAGE_SHIFT;
 	offset &= (PAGE_SIZE - 1);
 
 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
-		XE_WARN_ON(!bo->ttm.ttm);
+		xe_assert(xe, bo->ttm.ttm);
 
 		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
 				page_size, &cur);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 49c05ddea1645..7a264a9ca06e2 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -158,8 +158,8 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 		 * We expect validate to trigger a move VRAM and our move code
 		 * should setup the iosys map.
 		 */
-		XE_WARN_ON(iosys_map_is_null(&bo->vmap));
-		XE_WARN_ON(!xe_bo_is_vram(bo));
+		xe_assert(xe, !iosys_map_is_null(&bo->vmap));
+		xe_assert(xe, xe_bo_is_vram(bo));
 
 		xe_bo_put(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index df1953759c670..986a02a661661 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -394,7 +394,7 @@ bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
 	if (active) {
 		int ref = atomic_inc_return(&xe->mem_access.ref);
 
-		XE_WARN_ON(ref == S32_MAX);
+		xe_assert(xe, ref != S32_MAX);
 	}
 
 	return active;
@@ -436,7 +436,7 @@ void xe_device_mem_access_get(struct xe_device *xe)
 	xe_pm_runtime_get(xe);
 	ref = atomic_inc_return(&xe->mem_access.ref);
 
-	XE_WARN_ON(ref == S32_MAX);
+	xe_assert(xe, ref != S32_MAX);
 
 }
 
@@ -450,5 +450,5 @@ void xe_device_mem_access_put(struct xe_device *xe)
 	ref = atomic_dec_return(&xe->mem_access.ref);
 	xe_pm_runtime_put(xe);
 
-	XE_WARN_ON(ref < 0);
+	xe_assert(xe, ref >= 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 293960efcd22c..7cf4215b2b2e7 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -117,7 +117,7 @@ static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
 	 * to a location where the GPU can access it).
 	 */
 	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
-		XE_WARN_ON(xe_vma_is_null(vma));
+		xe_assert(vm->xe, !xe_vma_is_null(vma));
 
 		if (xe_vma_is_userptr(vma))
 			continue;
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index df91780d8b9f9..5b26b6e35afcf 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -12,6 +12,7 @@
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "regs/xe_regs.h"
+#include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
@@ -50,10 +51,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 	lrc_desc = xe_lrc_descriptor(lrc);
 
 	if (GRAPHICS_VERx100(xe) >= 1250) {
-		XE_WARN_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
 	} else {
-		XE_WARN_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
+		xe_gt_assert(hwe->gt, FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
 		lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id);
 	}
 
@@ -321,7 +322,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q)
 	struct xe_device *xe = gt_to_xe(q->gt);
 	int err;
 
-	XE_WARN_ON(xe_device_guc_submission_enabled(xe));
+	xe_assert(xe, !xe_device_guc_submission_enabled(xe));
 
 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
 
@@ -367,9 +368,10 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
 		container_of(w, struct xe_execlist_exec_queue, fini_async);
 	struct xe_exec_queue *q = ee->q;
 	struct xe_execlist_exec_queue *exl = q->execlist;
+	struct xe_device *xe = gt_to_xe(q->gt);
 	unsigned long flags;
 
-	XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(q->gt)));
+	xe_assert(xe, !xe_device_guc_submission_enabled(xe));
 
 	spin_lock_irqsave(&exl->port->lock, flags);
 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
@@ -377,7 +379,7 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
 	spin_unlock_irqrestore(&exl->port->lock, flags);
 
 	if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
-		xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q);
+		xe_device_remove_persistent_exec_queues(xe, q);
 	drm_sched_entity_fini(&exl->entity);
 	drm_sched_fini(&exl->sched);
 	kfree(exl);
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index ef7279e0b006c..ed2ecb20ce8e6 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -45,7 +45,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 	mutex_init(&fw->lock);
 
 	/* Assuming gen11+ so assert this assumption is correct */
-	XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
 
 	if (xe->info.graphics_verx100 >= 1270) {
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
@@ -67,7 +67,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
 	int i, j;
 
 	/* Assuming gen11+ so assert this assumption is correct */
-	XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
 
 	if (!xe_gt_is_media_type(gt))
 		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index 7f304704190e5..83cb157da7cc6 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -6,8 +6,8 @@
 #ifndef _XE_FORCE_WAKE_H_
 #define _XE_FORCE_WAKE_H_
 
+#include "xe_assert.h"
 #include "xe_force_wake_types.h"
-#include "xe_macros.h"
 
 struct xe_gt;
 
@@ -24,7 +24,7 @@ static inline int
 xe_force_wake_ref(struct xe_force_wake *fw,
 		  enum xe_force_wake_domains domain)
 {
-	XE_WARN_ON(!domain);
+	xe_gt_assert(fw->gt, domain);
 	return fw->domains[ffs(domain) - 1].ref;
 }
 
@@ -32,7 +32,7 @@ static inline void
 xe_force_wake_assert_held(struct xe_force_wake *fw,
 			  enum xe_force_wake_domains domain)
 {
-	XE_WARN_ON(!(fw->awake_domains & domain));
+	xe_gt_assert(fw->gt, fw->awake_domains & domain);
 }
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 286f36b9e2292..03097f1b7f71f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -58,8 +58,8 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
 
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
 {
-	XE_WARN_ON(addr & XE_PTE_MASK);
-	XE_WARN_ON(addr >= ggtt->size);
+	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
+	xe_tile_assert(ggtt->tile, addr < ggtt->size);
 
 	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
 }
@@ -69,7 +69,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 	u64 end = start + size - 1;
 	u64 scratch_pte;
 
-	XE_WARN_ON(start >= end);
+	xe_tile_assert(ggtt->tile, start < end);
 
 	if (ggtt->scratch)
 		scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
@@ -230,7 +230,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 		int seqno;
 
 		seqno = xe_gt_tlb_invalidation_guc(gt);
-		XE_WARN_ON(seqno <= 0);
+		xe_gt_assert(gt, seqno > 0);
 		if (seqno > 0)
 			xe_gt_tlb_invalidation_wait(gt, seqno);
 	} else if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
@@ -266,7 +266,7 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
 		unsigned int i = addr / XE_PAGE_SIZE;
 
-		XE_WARN_ON(addr > U32_MAX);
+		xe_tile_assert(ggtt->tile, addr <= U32_MAX);
 		if (ggtt->gsm[i] == scratch_pte)
 			continue;
 
@@ -315,7 +315,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 
 	if (XE_WARN_ON(bo->ggtt_node.size)) {
 		/* Someone's already inserted this BO in the GGTT */
-		XE_WARN_ON(bo->ggtt_node.size != bo->size);
+		xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
 		return 0;
 	}
 
@@ -378,7 +378,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 		return;
 
 	/* This BO is not currently in the GGTT */
-	XE_WARN_ON(bo->ggtt_node.size != bo->size);
+	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
 
 	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 678a276a25dc4..5d86bb2bb94d6 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -11,6 +11,7 @@
 #include <drm/xe_drm.h>
 
 #include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 2f77b8bbcf539..9136937324f34 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -58,7 +58,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
 	u32 freq = 0;
 
 	/* Assuming gen11+ so assert this assumption is correct */
-	XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
 
 	if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
 		freq = read_reference_ts_freq(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 9229fd5b01cc1..ec1ae00f6bfcf 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -158,7 +158,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	char name[8];
 	int i;
 
-	XE_WARN_ON(!minor->debugfs_root);
+	xe_gt_assert(gt, minor->debugfs_root);
 
 	sprintf(name, "gt%d", gt->info.id);
 	root = debugfs_create_dir(name, minor->debugfs_root);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index bcbeea62d5105..bd6005b9d4986 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -250,7 +250,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	u32 action[MAX_TLB_INVALIDATION_LEN];
 	int len = 0;
 
-	XE_WARN_ON(!vma);
+	xe_gt_assert(gt, vma);
 
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
@@ -288,10 +288,10 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 			start = ALIGN_DOWN(xe_vma_start(vma), length);
 		}
 
-		XE_WARN_ON(length < SZ_4K);
-		XE_WARN_ON(!is_power_of_2(length));
-		XE_WARN_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
-		XE_WARN_ON(!IS_ALIGNED(start, length));
+		xe_gt_assert(gt, length >= SZ_4K);
+		xe_gt_assert(gt, is_power_of_2(length));
+		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+		xe_gt_assert(gt, IS_ALIGNED(start, length));
 
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
 		action[len++] = xe_vma_vm(vma)->usm.asid;
@@ -300,7 +300,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		action[len++] = ilog2(length) - ilog2(SZ_4K);
 	}
 
-	XE_WARN_ON(len > MAX_TLB_INVALIDATION_LEN);
+	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
 
 	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 5d32bcee28b6e..134019fdda7eb 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -44,11 +44,12 @@ guc_to_xe(struct xe_guc *guc)
 static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
 			    struct xe_bo *bo)
 {
+	struct xe_device *xe = guc_to_xe(guc);
 	u32 addr = xe_bo_ggtt_addr(bo);
 
-	XE_WARN_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
-	XE_WARN_ON(addr >= GUC_GGTT_TOP);
-	XE_WARN_ON(bo->size > GUC_GGTT_TOP - addr);
+	xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
+	xe_assert(xe, addr < GUC_GGTT_TOP);
+	xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr);
 
 	return addr;
 }
@@ -629,13 +630,13 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 
 	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
 
-	XE_WARN_ON(guc->ct.enabled);
-	XE_WARN_ON(!len);
-	XE_WARN_ON(len > VF_SW_FLAG_COUNT);
-	XE_WARN_ON(len > MED_VF_SW_FLAG_COUNT);
-	XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
+	xe_assert(xe, !guc->ct.enabled);
+	xe_assert(xe, len);
+	xe_assert(xe, len <= VF_SW_FLAG_COUNT);
+	xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
+	xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) ==
 		  GUC_HXG_ORIGIN_HOST);
-	XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
+	xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) ==
 		  GUC_HXG_TYPE_REQUEST);
 
 retry:
@@ -727,6 +728,7 @@ int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
 
 static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
 {
+	struct xe_device *xe = guc_to_xe(guc);
 	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
@@ -741,8 +743,8 @@ static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
 	};
 	int ret;
 
-	XE_WARN_ON(len > 2);
-	XE_WARN_ON(len == 1 && upper_32_bits(val));
+	xe_assert(xe, len <= 2);
+	xe_assert(xe, len != 1 || !upper_32_bits(val));
 
 	/* Self config must go over MMIO */
 	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 5edee24b97c96..efa4d25424b84 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -118,7 +118,9 @@ struct __guc_ads_blob {
 
 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
 {
-	XE_WARN_ON(!ads->regset_size);
+	struct xe_device *xe = ads_to_xe(ads);
+
+	xe_assert(xe, ads->regset_size);
 
 	return ads->regset_size;
 }
@@ -309,14 +311,14 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
 	struct xe_gt *gt = ads_to_gt(ads);
 	u32 prev_regset_size = ads->regset_size;
 
-	XE_WARN_ON(!ads->bo);
+	xe_gt_assert(gt, ads->bo);
 
 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
 	ads->regset_size = calculate_regset_size(gt);
 
-	XE_WARN_ON(ads->golden_lrc_size +
-		   (ads->regset_size - prev_regset_size) >
-		   MAX_GOLDEN_LRC_SIZE);
+	xe_gt_assert(gt, ads->golden_lrc_size +
+		     (ads->regset_size - prev_regset_size) <=
+		     MAX_GOLDEN_LRC_SIZE);
 
 	return 0;
 }
@@ -517,7 +519,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
 		regset_used += count * sizeof(struct guc_mmio_reg);
 	}
 
-	XE_WARN_ON(regset_used > ads->regset_size);
+	xe_gt_assert(gt, regset_used <= ads->regset_size);
 }
 
 static void guc_um_init_params(struct xe_guc_ads *ads)
@@ -572,7 +574,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
 			offsetof(struct __guc_ads_blob, system_info));
 	u32 base = xe_bo_ggtt_addr(ads->bo);
 
-	XE_WARN_ON(!ads->bo);
+	xe_gt_assert(gt, ads->bo);
 
 	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
 	guc_policies_init(ads);
@@ -596,7 +598,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
 			offsetof(struct __guc_ads_blob, system_info));
 	u32 base = xe_bo_ggtt_addr(ads->bo);
 
-	XE_WARN_ON(!ads->bo);
+	xe_gt_assert(gt, ads->bo);
 
 	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
 	guc_policies_init(ads);
@@ -643,7 +645,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
 				   engine_enabled_masks[guc_class]))
 			continue;
 
-		XE_WARN_ON(!gt->default_lrc[class]);
+		xe_gt_assert(gt, gt->default_lrc[class]);
 
 		real_size = xe_lrc_size(xe, class);
 		alloc_size = PAGE_ALIGN(real_size);
@@ -672,7 +674,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
 		offset += alloc_size;
 	}
 
-	XE_WARN_ON(total_size != ads->golden_lrc_size);
+	xe_gt_assert(gt, total_size == ads->golden_lrc_size);
 }
 
 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 13f2bd586f6a5..2046bd269bbdd 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -135,7 +135,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 	struct xe_bo *bo;
 	int err;
 
-	XE_WARN_ON(guc_ct_size() % PAGE_SIZE);
+	xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
 
 	mutex_init(&ct->lock);
 	spin_lock_init(&ct->fast_lock);
@@ -283,7 +283,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 	struct xe_device *xe = ct_to_xe(ct);
 	int err;
 
-	XE_WARN_ON(ct->enabled);
+	xe_assert(xe, !ct->enabled);
 
 	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
 	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -376,7 +376,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
 
 static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 {
-	XE_WARN_ON(g2h_len > ct->ctbs.g2h.info.space);
+	xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space);
 
 	if (g2h_len) {
 		lockdep_assert_held(&ct->fast_lock);
@@ -389,8 +389,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
 {
 	lockdep_assert_held(&ct->fast_lock);
-	XE_WARN_ON(ct->ctbs.g2h.info.space + g2h_len >
-		   ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
+	xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <=
+		  ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
 
 	ct->ctbs.g2h.info.space += g2h_len;
 	--ct->g2h_outstanding;
@@ -419,8 +419,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	full_len = len + GUC_CTB_HDR_LEN;
 
 	lockdep_assert_held(&ct->lock);
-	XE_WARN_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
-	XE_WARN_ON(tail > h2g->info.size);
+	xe_assert(xe, full_len <= (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
+	xe_assert(xe, tail <= h2g->info.size);
 
 	/* Command will wrap, zero fill (NOPs), return and check credits again */
 	if (tail + full_len > h2g->info.size) {
@@ -476,12 +476,13 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 				u32 len, u32 g2h_len, u32 num_g2h,
 				struct g2h_fence *g2h_fence)
 {
+	struct xe_device *xe = ct_to_xe(ct);
 	int ret;
 
-	XE_WARN_ON(g2h_len && g2h_fence);
-	XE_WARN_ON(num_g2h && g2h_fence);
-	XE_WARN_ON(g2h_len && !num_g2h);
-	XE_WARN_ON(!g2h_len && num_g2h);
+	xe_assert(xe, !g2h_len || !g2h_fence);
+	xe_assert(xe, !num_g2h || !g2h_fence);
+	xe_assert(xe, !g2h_len || num_g2h);
+	xe_assert(xe, g2h_len || !num_g2h);
 	lockdep_assert_held(&ct->lock);
 
 	if (unlikely(ct->ctbs.h2g.info.broken)) {
@@ -552,7 +553,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	unsigned int sleep_period_ms = 1;
 	int ret;
 
-	XE_WARN_ON(g2h_len && g2h_fence);
+	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
 	lockdep_assert_held(&ct->lock);
 	xe_device_assert_mem_access(ct_to_xe(ct));
 
@@ -622,7 +623,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
 {
 	int ret;
 
-	XE_WARN_ON(g2h_len && g2h_fence);
+	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
 
 	mutex_lock(&ct->lock);
 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
@@ -798,7 +799,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		return 0;
 	}
 
-	XE_WARN_ON(fence != g2h_fence->seqno);
+	xe_assert(xe, fence == g2h_fence->seqno);
 
 	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
 		g2h_fence->fail = true;
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 63904007af0af..45c60a9c631c3 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -55,12 +55,12 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
 	size_t size;
 	int i, j;
 
-	XE_WARN_ON(!log->bo);
+	xe_assert(xe, log->bo);
 
 	size = log->bo->size;
 
 #define DW_PER_READ		128
-	XE_WARN_ON(size % (DW_PER_READ * sizeof(u32)));
+	xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32))));
 	for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
 		u32 read[DW_PER_READ];
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 87de1ce40e07c..99d8556808944 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -816,7 +816,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
 	int ret;
 
-	XE_WARN_ON(!xe_device_guc_submission_enabled(xe));
+	xe_gt_assert(gt, xe_device_guc_submission_enabled(xe));
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 50509891a2880..f6b630f539284 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -14,6 +14,7 @@
 #include <drm/drm_managed.h>
 
 #include "regs/xe_lrc_layout.h"
+#include "xe_assert.h"
 #include "xe_devcoredump.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
@@ -354,11 +355,12 @@ static const int xe_exec_queue_prio_to_guc[] = {
 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	struct exec_queue_policy policy;
+	struct xe_device *xe = guc_to_xe(guc);
 	enum xe_exec_queue_priority prio = q->priority;
 	u32 timeslice_us = q->sched_props.timeslice_us;
 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 
-	XE_WARN_ON(!exec_queue_registered(q));
+	xe_assert(xe, exec_queue_registered(q));
 
 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
@@ -392,11 +394,12 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 				   struct guc_ctxt_registration_info *info)
 {
 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
+	struct xe_device *xe = guc_to_xe(guc);
 	u32 action[MAX_MLRC_REG_SIZE];
 	int len = 0;
 	int i;
 
-	XE_WARN_ON(!xe_exec_queue_is_parallel(q));
+	xe_assert(xe, xe_exec_queue_is_parallel(q));
 
 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
 	action[len++] = info->flags;
@@ -419,7 +422,7 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
 	}
 
-	XE_WARN_ON(len > MAX_MLRC_REG_SIZE);
+	xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
 #undef MAX_MLRC_REG_SIZE
 
 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
@@ -453,7 +456,7 @@ static void register_engine(struct xe_exec_queue *q)
 	struct xe_lrc *lrc = q->lrc;
 	struct guc_ctxt_registration_info info;
 
-	XE_WARN_ON(exec_queue_registered(q));
+	xe_assert(xe, !exec_queue_registered(q));
 
 	memset(&info, 0, sizeof(info));
 	info.context_idx = q->guc->id;
@@ -543,7 +546,7 @@ static int wq_noop_append(struct xe_exec_queue *q)
 	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
 		return -ENODEV;
 
-	XE_WARN_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+	xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
 
 	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
@@ -583,13 +586,13 @@ static void wq_item_append(struct xe_exec_queue *q)
 		wqi[i++] = lrc->ring.tail / sizeof(u64);
 	}
 
-	XE_WARN_ON(i != wqi_size / sizeof(u32));
+	xe_assert(xe, i == wqi_size / sizeof(u32));
 
 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
 				      wq[q->guc->wqi_tail / sizeof(u32)]));
 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
 	q->guc->wqi_tail += wqi_size;
-	XE_WARN_ON(q->guc->wqi_tail > WQ_SIZE);
+	xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
 
 	xe_device_wmb(xe);
 
@@ -601,6 +604,7 @@ static void wq_item_append(struct xe_exec_queue *q)
 static void submit_exec_queue(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_lrc *lrc = q->lrc;
 	u32 action[3];
 	u32 g2h_len = 0;
@@ -608,7 +612,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
 	int len = 0;
 	bool extra_submit = false;
 
-	XE_WARN_ON(!exec_queue_registered(q));
+	xe_assert(xe, exec_queue_registered(q));
 
 	if (xe_exec_queue_is_parallel(q))
 		wq_item_append(q);
@@ -654,10 +658,12 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 	struct xe_exec_queue *q = job->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 	bool lr = xe_exec_queue_is_lr(q);
 
-	XE_WARN_ON((exec_queue_destroyed(q) || exec_queue_pending_disable(q)) &&
-		   !exec_queue_banned(q) && !exec_queue_suspended(q));
+	xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
+		  exec_queue_banned(q) || exec_queue_suspended(q));
 
 	trace_xe_sched_job_run(job);
 
@@ -799,7 +805,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &ge->sched;
 
-	XE_WARN_ON(!xe_exec_queue_is_lr(q));
+	xe_assert(xe, xe_exec_queue_is_lr(q));
 	trace_xe_exec_queue_lr_cleanup(q);
 
 	/* Kill the run_job / process_msg entry points */
@@ -853,8 +859,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	int i = 0;
 
 	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
-		XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL);
-		XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q));
+		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
+		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)));
 
 		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
 			   xe_sched_job_seqno(job), q->guc->id, q->flags);
@@ -990,8 +996,9 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
 {
 	struct xe_exec_queue *q = msg->private_data;
 	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_PERMANENT);
+	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
 	trace_xe_exec_queue_cleanup_entity(q);
 
 	if (exec_queue_registered(q))
@@ -1018,10 +1025,11 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms
 static void suspend_fence_signal(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_WARN_ON(!exec_queue_suspended(q) && !exec_queue_killed(q) &&
-		   !guc_read_stopped(guc));
-	XE_WARN_ON(!q->guc->suspend_pending);
+	xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
+		  guc_read_stopped(guc));
+	xe_assert(xe, q->guc->suspend_pending);
 
 	q->guc->suspend_pending = false;
 	smp_wmb();
@@ -1125,11 +1133,12 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 {
 	struct xe_gpu_scheduler *sched;
 	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_guc_exec_queue *ge;
 	long timeout;
 	int err;
 
-	XE_WARN_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
+	xe_assert(xe, xe_device_guc_submission_enabled(guc_to_xe(guc)));
 
 	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
 	if (!ge)
@@ -1275,10 +1284,12 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
 static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
 {
 	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_WARN_ON(exec_queue_registered(q));
-	XE_WARN_ON(exec_queue_banned(q));
-	XE_WARN_ON(exec_queue_killed(q));
+	xe_assert(xe, !exec_queue_registered(q));
+	xe_assert(xe, !exec_queue_banned(q));
+	xe_assert(xe, !exec_queue_killed(q));
 
 	sched->base.timeout = job_timeout_ms;
 
@@ -1309,8 +1320,10 @@ static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
 static void guc_exec_queue_resume(struct xe_exec_queue *q)
 {
 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_WARN_ON(q->guc->suspend_pending);
+	xe_assert(xe, !q->guc->suspend_pending);
 
 	guc_exec_queue_add_msg(q, msg, RESUME);
 }
@@ -1405,8 +1418,9 @@ int xe_guc_submit_stop(struct xe_guc *guc)
 {
 	struct xe_exec_queue *q;
 	unsigned long index;
+	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_WARN_ON(guc_read_stopped(guc) != 1);
+	xe_assert(xe, guc_read_stopped(guc) == 1);
 
 	mutex_lock(&guc->submission_state.lock);
 
@@ -1443,8 +1457,9 @@ int xe_guc_submit_start(struct xe_guc *guc)
 {
 	struct xe_exec_queue *q;
 	unsigned long index;
+	struct xe_device *xe = guc_to_xe(guc);
 
-	XE_WARN_ON(guc_read_stopped(guc) != 1);
+	xe_assert(xe, guc_read_stopped(guc) == 1);
 
 	mutex_lock(&guc->submission_state.lock);
 	atomic_dec(&guc->submission_state.stopped);
@@ -1474,7 +1489,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
 		return NULL;
 	}
 
-	XE_WARN_ON(q->guc->id != guc_id);
+	xe_assert(xe, q->guc->id == guc_id);
 
 	return q;
 }
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 177cda14864ed..c856da1e94229 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -6,6 +6,7 @@
 #include "xe_huc.h"
 
 #include "regs/xe_guc_regs.h"
+#include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
@@ -72,7 +73,7 @@ int xe_huc_auth(struct xe_huc *huc)
 	if (xe_uc_fw_is_disabled(&huc->fw))
 		return 0;
 
-	XE_WARN_ON(xe_uc_fw_is_running(&huc->fw));
+	xe_assert(xe, !xe_uc_fw_is_running(&huc->fw));
 
 	if (!xe_uc_fw_is_loaded(&huc->fw))
 		return -ENOEXEC;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index dd673a684b709..9c2e212fa4cfd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -10,6 +10,7 @@
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
+#include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_execlist.h"
@@ -244,7 +245,7 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
 				   u32 val)
 {
-	XE_WARN_ON(reg.addr & hwe->mmio_base);
+	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
 	reg.addr += hwe->mmio_base;
@@ -254,7 +255,7 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
 
 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 {
-	XE_WARN_ON(reg.addr & hwe->mmio_base);
+	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 
 	reg.addr += hwe->mmio_base;
@@ -374,7 +375,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 
 	info = &engine_infos[id];
 
-	XE_WARN_ON(hwe->gt);
+	xe_gt_assert(gt, !hwe->gt);
 
 	hwe->gt = gt;
 	hwe->class = info->class;
@@ -415,8 +416,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	struct xe_tile *tile = gt_to_tile(gt);
 	int err;
 
-	XE_WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
-	XE_WARN_ON(!(gt->info.engine_mask & BIT(id)));
+	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
+	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
 
 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 	xe_reg_sr_apply_whitelist(hwe);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 6f899b6a48774..1410dcab3d90e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -116,7 +116,7 @@ static void set_offsets(u32 *regs,
 		*regs |= MI_LRI_LRM_CS_MMIO;
 		regs++;
 
-		XE_WARN_ON(!count);
+		xe_gt_assert(hwe->gt, count);
 		do {
 			u32 offset = 0;
 			u8 v;
@@ -608,7 +608,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
 { \
 	struct iosys_map map = lrc->bo->vmap; \
 \
-	XE_WARN_ON(iosys_map_is_null(&map)); \
+	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
 	return map; \
 } \
@@ -827,16 +827,17 @@ static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
 
 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
 {
+	struct xe_device *xe = lrc_to_xe(lrc);
 	struct iosys_map ring;
 	u32 rhs;
 	size_t aligned_size;
 
-	XE_WARN_ON(!IS_ALIGNED(size, 4));
+	xe_assert(xe, IS_ALIGNED(size, 4));
 	aligned_size = ALIGN(size, 8);
 
 	ring = __xe_lrc_ring_map(lrc);
 
-	XE_WARN_ON(lrc->ring.tail >= lrc->ring.size);
+	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
 	rhs = lrc->ring.size - lrc->ring.tail;
 	if (size > rhs) {
 		__xe_lrc_write_ring(lrc, ring, data, rhs);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 8291798e1aaa5..713f98baf0ee7 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -15,6 +15,7 @@
 #include "generated/xe_wa_oob.h"
 #include "regs/xe_gpu_commands.h"
 #include "tests/xe_test.h"
+#include "xe_assert.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_exec_queue.h"
@@ -172,7 +173,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
 
 	/* Need to be sure everything fits in the first PT, or create more */
-	XE_WARN_ON(m->batch_base_ofs + batch->size >= SZ_2M);
+	xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M);
 
 	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
 				  num_entries * XE_PAGE_SIZE,
@@ -206,7 +207,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	}
 
 	if (!IS_DGFX(xe)) {
-		XE_WARN_ON(xe->info.supports_usm);
+		xe_tile_assert(tile, !xe->info.supports_usm);
 
 		/* Write out batch too */
 		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
@@ -487,7 +488,7 @@ static void emit_pte(struct xe_migrate *m,
 				/* Is this a 64K PTE entry? */
 				if ((m->q->vm->flags & XE_VM_FLAG_64K) &&
 				    !(cur_ofs & (16 * 8 - 1))) {
-					XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
+					xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K));
 					addr |= XE_PTE_PS64;
 				}
 
@@ -516,7 +517,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 
 	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
 				    NUM_CCS_BYTES_PER_BLOCK);
-	XE_WARN_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
+	xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
 	*cs++ = XY_CTRL_SURF_COPY_BLT |
 		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
 		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
@@ -536,9 +537,9 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 		      u64 src_ofs, u64 dst_ofs, unsigned int size,
 		      unsigned int pitch)
 {
-	XE_WARN_ON(size / pitch > S16_MAX);
-	XE_WARN_ON(pitch / 4 > S16_MAX);
-	XE_WARN_ON(pitch > U16_MAX);
+	xe_gt_assert(gt, size / pitch <= S16_MAX);
+	xe_gt_assert(gt, pitch / 4 <= S16_MAX);
+	xe_gt_assert(gt, pitch <= U16_MAX);
 
 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
@@ -598,7 +599,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 		 * At the moment, we don't support copying CCS metadata from
 		 * system to system.
 		 */
-		XE_WARN_ON(!src_is_vram && !dst_is_vram);
+		xe_gt_assert(gt, src_is_vram || dst_is_vram);
 
 		emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
 			      src_is_vram, dst_size);
@@ -810,7 +811,7 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
 	*cs++ = upper_32_bits(src_ofs);
 	*cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
 
-	XE_WARN_ON(cs - bb->cs != len + bb->len);
+	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
 
 	bb->len += len;
 }
@@ -848,7 +849,7 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
 		*cs++ = 0;
 	}
 
-	XE_WARN_ON(cs - bb->cs != len + bb->len);
+	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
 
 	bb->len += len;
 }
@@ -1021,9 +1022,9 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	 * PDE. This requires a BO that is almost vm->size big.
 	 *
 	 * This shouldn't be possible in practice.. might change when 16K
-	 * pages are used. Hence the XE_WARN_ON.
+	 * pages are used. Hence the assert.
 	 */
-	XE_WARN_ON(update->qwords > 0x1ff);
+	xe_tile_assert(tile, update->qwords <= 0x1ff);
 	if (!ppgtt_ofs) {
 		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
 							   XE_PAGE_SIZE));
@@ -1213,7 +1214,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 * Worst case: Sum(2 * (each lower level page size) + (top level page size))
 	 * Should be reasonably bound..
 	 */
-	XE_WARN_ON(batch_size >= SZ_128K);
+	xe_tile_assert(tile, batch_size < SZ_128K);
 
 	bb = xe_bb_new(gt, batch_size, !q && xe->info.supports_usm);
 	if (IS_ERR(bb))
@@ -1223,7 +1224,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	if (!IS_DGFX(xe)) {
 		ppgtt_ofs = NUM_KERNEL_PDE - 1;
 		if (q) {
-			XE_WARN_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
+			xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT);
 
 			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
 						 GFP_KERNEL, true, 0);
@@ -1252,7 +1253,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		for (i = 0; i < num_updates; i++) {
 			struct xe_bo *pt_bo = updates[i].pt_bo;
 
-			XE_WARN_ON(pt_bo->size != SZ_4K);
+			xe_tile_assert(tile, pt_bo->size == SZ_4K);
 
 			addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 75d025c54eb84..ada3114be4fa1 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -463,7 +463,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	 * is still 0 at this point, we'll assume that it was omitted by
 	 * mistake in the switch statement above.
 	 */
-	XE_WARN_ON(info->unused_entries_index == 0);
+	xe_assert(xe, info->unused_entries_index != 0);
 
 	if (XE_WARN_ON(info->size > info->n_entries)) {
 		info->table = NULL;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c21d2681b4196..680fbe6f38a65 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -196,7 +196,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 	pt->level = level;
 	pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
 
-	XE_WARN_ON(level > XE_VM_MAX_LEVEL);
+	xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL);
 
 	return pt;
 
@@ -1004,7 +1004,7 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
 	*num_entries = 0;
 	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
 	if (!err)
-		XE_WARN_ON(!*num_entries);
+		xe_tile_assert(tile, *num_entries);
 	else /* abort! */
 		xe_pt_abort_bind(vma, entries, *num_entries);
 
@@ -1026,7 +1026,7 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe,
 		u64 end;
 		u64 start;
 
-		XE_WARN_ON(entry->pt->is_compact);
+		xe_assert(xe, !entry->pt->is_compact);
 		start = entry->ofs * page_size;
 		end = start + page_size * entry->qwords;
 		vm_dbg(&xe->drm,
@@ -1276,7 +1276,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
 		dma_fence_put(&ifence->base.base);	/* Creation ref */
 	}
 
-	XE_WARN_ON(ret && ret != -ENOENT);
+	xe_gt_assert(gt, !ret || ret == -ENOENT);
 
 	return ret && ret != -ENOENT ? ret : 0;
 }
@@ -1356,7 +1356,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
 	if (err)
 		goto err;
-	XE_WARN_ON(num_entries > ARRAY_SIZE(entries));
+	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
@@ -1707,7 +1707,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 	       xe_vma_start(vma), xe_vma_end(vma) - 1, q);
 
 	num_entries = xe_pt_stage_unbind(tile, vma, entries);
-	XE_WARN_ON(num_entries > ARRAY_SIZE(entries));
+	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
 	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
@@ -1773,7 +1773,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 		list_del_init(&vma->combined_links.rebind);
 
 	if (unbind_pt_update.locked) {
-		XE_WARN_ON(!xe_vma_is_userptr(vma));
+		xe_tile_assert(tile, xe_vma_is_userptr(vma));
 
 		if (!vma->tile_present) {
 			spin_lock(&vm->userptr.invalidated_lock);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2238a40b7e8e6..6eec7c7e4bc56 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -212,6 +212,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
 	u32 ppgtt_flag = get_ppgtt_flag(job);
 	struct xe_vm *vm = job->q->vm;
+	struct xe_gt *gt = job->q->gt;
 
 	if (vm && vm->batch_invalidate_tlb) {
 		dw[i++] = preparser_disable(true);
@@ -234,7 +235,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
@@ -294,7 +295,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
@@ -342,7 +343,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
@@ -372,14 +373,16 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 
 	i = emit_user_interrupt(dw, i);
 
-	XE_WARN_ON(i > MAX_JOB_SIZE_DW);
+	xe_gt_assert(job->q->gt, i <= MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
 }
 
 static void emit_job_gen12_gsc(struct xe_sched_job *job)
 {
-	XE_WARN_ON(job->q->width > 1); /* no parallel submission for GSCCS */
+	struct xe_gt *gt = job->q->gt;
+
+	xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
 
 	__emit_job_gen12_simple(job, job->q->lrc,
 				job->batch_addr[0],
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index b02183147e8ea..84c700aed8ac7 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -143,7 +143,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 
 		/* Sanity check */
 		for (j = 0; j < q->width; ++j)
-			XE_WARN_ON(cf->base.seqno != fences[j]->seqno);
+			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
 
 		job->fence = &cf->base;
 	}
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 9c8ce504f4da6..a8ecb5c6e01aa 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -162,7 +162,7 @@ int xe_uc_init_hw(struct xe_uc *uc)
 
 	/* We don't fail the driver load if HuC fails to auth, but let's warn */
 	ret = xe_huc_auth(&uc->huc);
-	XE_WARN_ON(ret);
+	xe_gt_assert(uc_to_gt(uc), !ret);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 37ad238148b0c..8aa3ae1384db2 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -195,7 +195,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	u32 count;
 	int i;
 
-	XE_WARN_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+	xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
 	entries = blobs_all[uc_fw->type].entries;
 	count = blobs_all[uc_fw->type].count;
 
@@ -224,8 +224,8 @@ size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	u32 size = min_t(u32, uc_fw->rsa_size, max_len);
 
-	XE_WARN_ON(size % 4);
-	XE_WARN_ON(!xe_uc_fw_is_available(uc_fw));
+	xe_assert(xe, !(size % 4));
+	xe_assert(xe, xe_uc_fw_is_available(uc_fw));
 
 	xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
 			   xe_uc_fw_rsa_offset(uc_fw), size);
@@ -249,8 +249,8 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
 	struct xe_guc *guc = &gt->uc.guc;
 
-	XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC);
-	XE_WARN_ON(uc_fw->major_ver_found < 70);
+	xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
+	xe_gt_assert(gt, uc_fw->major_ver_found >= 70);
 
 	if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) {
 		/* v70.6.0 adds CSS header support */
@@ -336,8 +336,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	 * before we're looked at the HW caps to see if we have uc support
 	 */
 	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
-	XE_WARN_ON(uc_fw->status);
-	XE_WARN_ON(uc_fw->path);
+	xe_assert(xe, !uc_fw->status);
+	xe_assert(xe, !uc_fw->path);
 
 	uc_fw_auto_select(xe, uc_fw);
 	xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
@@ -504,7 +504,7 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 	int err;
 
 	/* make sure the status was cleared the last time we reset the uc */
-	XE_WARN_ON(xe_uc_fw_is_loaded(uc_fw));
+	xe_assert(xe, !xe_uc_fw_is_loaded(uc_fw));
 
 	if (!xe_uc_fw_is_loadable(uc_fw))
 		return -ENOEXEC;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 66e8aeb203c96..53add99d4186f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -71,7 +71,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
 	bool read_only = xe_vma_read_only(vma);
 
 	lockdep_assert_held(&vm->lock);
-	XE_WARN_ON(!xe_vma_is_userptr(vma));
+	xe_assert(xe, xe_vma_is_userptr(vma));
 retry:
 	if (vma->gpuva.flags & XE_VMA_DESTROYED)
 		return 0;
@@ -260,7 +260,7 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
 		struct dma_fence *fence;
 
 		link = list->next;
-		XE_WARN_ON(link == list);
+		xe_assert(vm->xe, link != list);
 
 		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
 					     q, q->compute.context,
@@ -338,7 +338,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	int err;
 	bool wait;
 
-	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
+	xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
 
 	down_write(&vm->lock);
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
@@ -573,7 +573,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	long wait;
 	int __maybe_unused tries = 0;
 
-	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
+	xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
 	trace_xe_vm_rebind_worker_enter(vm);
 
 	down_write(&vm->lock);
@@ -698,7 +698,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	struct dma_fence *fence;
 	long err;
 
-	XE_WARN_ON(!xe_vma_is_userptr(vma));
+	xe_assert(vm->xe, xe_vma_is_userptr(vma));
 	trace_xe_vma_userptr_invalidate(vma);
 
 	if (!mmu_notifier_range_blockable(range))
@@ -839,7 +839,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 	xe_vm_assert_held(vm);
 	list_for_each_entry_safe(vma, next, &vm->rebind_list,
 				 combined_links.rebind) {
-		XE_WARN_ON(!vma->tile_present);
+		xe_assert(vm->xe, vma->tile_present);
 
 		list_del_init(&vma->combined_links.rebind);
 		dma_fence_put(fence);
@@ -867,8 +867,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	struct xe_tile *tile;
 	u8 id;
 
-	XE_WARN_ON(start >= end);
-	XE_WARN_ON(end >= vm->size);
+	xe_assert(vm->xe, start < end);
+	xe_assert(vm->xe, end < vm->size);
 
 	if (!bo && !is_null)	/* userptr */
 		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
@@ -1064,10 +1064,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	struct xe_vm *vm = xe_vma_vm(vma);
 
 	lockdep_assert_held_write(&vm->lock);
-	XE_WARN_ON(!list_empty(&vma->combined_links.destroy));
+	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
 
 	if (xe_vma_is_userptr(vma)) {
-		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
+		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
 
 		spin_lock(&vm->userptr.invalidated_lock);
 		list_del(&vma->userptr.invalidate_link);
@@ -1160,7 +1160,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
 	if (xe_vm_is_closed_or_banned(vm))
 		return NULL;
 
-	XE_WARN_ON(start + range > vm->size);
+	xe_assert(vm->xe, start + range <= vm->size);
 
 	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
 
@@ -1171,7 +1171,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
 	int err;
 
-	XE_WARN_ON(xe_vma_vm(vma) != vm);
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
 	lockdep_assert_held(&vm->lock);
 
 	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
@@ -1182,7 +1182,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 
 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
 {
-	XE_WARN_ON(xe_vma_vm(vma) != vm);
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
 	lockdep_assert_held(&vm->lock);
 
 	drm_gpuva_remove(&vma->gpuva);
@@ -1428,7 +1428,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	struct drm_gpuva *gpuva, *next;
 	u8 id;
 
-	XE_WARN_ON(vm->preempt.num_exec_queues);
+	xe_assert(xe, !vm->preempt.num_exec_queues);
 
 	xe_vm_close(vm);
 	flush_async_ops(vm);
@@ -1505,7 +1505,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	if (vm->async_ops.error_capture.addr)
 		wake_up_all(&vm->async_ops.error_capture.wq);
 
-	XE_WARN_ON(!list_empty(&vm->extobj.list));
+	xe_assert(xe, list_empty(&vm->extobj.list));
 	up_write(&vm->lock);
 
 	mutex_lock(&xe->usm.lock);
@@ -1531,7 +1531,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 	void *lookup;
 
 	/* xe_vm_close_and_put was not called? */
-	XE_WARN_ON(vm->size);
+	xe_assert(xe, !vm->size);
 
 	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
 		xe_device_mem_access_put(xe);
@@ -1539,7 +1539,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 		if (xe->info.has_asid) {
 			mutex_lock(&xe->usm.lock);
 			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
-			XE_WARN_ON(lookup != vm);
+			xe_assert(xe, lookup == vm);
 			mutex_unlock(&xe->usm.lock);
 		}
 	}
@@ -1802,7 +1802,7 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence)
 		struct async_op_fence *afence =
 			container_of(fence, struct async_op_fence, fence);
 
-		XE_WARN_ON(xe_vm_no_dma_fences(afence->vm));
+		xe_assert(afence->vm->xe, !xe_vm_no_dma_fences(afence->vm));
 
 		smp_rmb();
 		return wait_event_interruptible(afence->wq, afence->started);
@@ -1828,7 +1828,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 	} else {
 		int i;
 
-		XE_WARN_ON(!xe_vm_in_fault_mode(vm));
+		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
 		fence = dma_fence_get_stub();
 		if (last_op) {
@@ -2110,7 +2110,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 {
 	int err;
 
-	XE_WARN_ON(region > ARRAY_SIZE(region_to_mem_type));
+	xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
 
 	if (!xe_vma_has_no_bo(vma)) {
 		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
@@ -2309,7 +2309,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		}
 		break;
 	case XE_VM_BIND_OP_UNMAP_ALL:
-		XE_WARN_ON(!bo);
+		xe_assert(vm->xe, bo);
 
 		err = xe_bo_lock(bo, true);
 		if (err)
@@ -2506,7 +2506,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 		bool first = list_empty(ops_list);
 
-		XE_WARN_ON(!first && !async);
+		xe_assert(vm->xe, first || async);
 
 		INIT_LIST_HEAD(&op->link);
 		list_add_tail(&op->link, ops_list);
@@ -3468,8 +3468,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	u8 id;
 	int ret;
 
-	XE_WARN_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
-	XE_WARN_ON(xe_vma_is_null(vma));
+	xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
+	xe_assert(xe, !xe_vma_is_null(vma));
 	trace_xe_vma_usm_invalidate(vma);
 
 	/* Check that we don't race with page-table updates */
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index cc9dfd8cb7701..694f9e689b058 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -182,7 +182,7 @@ extern struct ttm_device_funcs xe_ttm_funcs;
 
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
 {
-	XE_WARN_ON(!xe_vm_in_compute_mode(vm));
+	xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
 	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
 }
 
-- 
GitLab


From 430003b85ce36e6f9dd6799b6cd5690f9b6c8a2a Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 6 Sep 2023 16:00:10 -0700
Subject: [PATCH 1884/2340] drm/xe/guc: Switch to major-only GuC FW tracking
 for MTL

Newer HuC binaries for MTL (8.5.1+) require GuC 70.7 or newer, so we
need to move on from 70.6.4. Given that the MTL GuC uses major-only
version matching in i915, we can do the same here instead of just
bumping the version (and having to push the versioned binaries,
because they're not there already for i915).

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 8aa3ae1384db2..efc70836453d0 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -101,7 +101,7 @@ struct fw_blobs_by_type {
 
 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
 	fw_def(LUNARLAKE,	mmp_ver(xe,	guc,	lnl,	70, 6, 8))	\
-	fw_def(METEORLAKE,	mmp_ver(i915,	guc,	mtl,	70, 6, 4))	\
+	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 7))		\
 	fw_def(PVC,		mmp_ver(xe,	guc,	pvc,	70, 6, 4))	\
 	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
 	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
-- 
GitLab


From 8d07691c35bfd08fe16f865b9df04204604b36d5 Mon Sep 17 00:00:00 2001
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Date: Wed, 30 Aug 2023 08:46:50 +0530
Subject: [PATCH 1885/2340] drm/xe: Get GT clock to nanosecs

Helper to convert GT clock cycles to nanoseconds.

v2: Use DIV_ROUND_CLOSEST_ULL helper(Ashutosh)
v3: rename xe_gt_clock_interval_to_ns to xe_gt_clock_cycles_to_ns

Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_clock.c | 5 +++++
 drivers/gpu/drm/xe/xe_gt_clock.h | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 9136937324f34..25a18eaad9c4b 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -78,3 +78,8 @@ int xe_gt_clock_init(struct xe_gt *gt)
 	gt->info.clock_freq = freq;
 	return 0;
 }
+
+u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count)
+{
+	return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.clock_freq);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
index 511923afd224a..aa162722f8596 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.h
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -6,8 +6,10 @@
 #ifndef _XE_GT_CLOCK_H_
 #define _XE_GT_CLOCK_H_
 
+#include <linux/types.h>
+
 struct xe_gt;
 
 int xe_gt_clock_init(struct xe_gt *gt);
-
+u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count);
 #endif
-- 
GitLab


From cd8534193a4b4e4e0f8c8ee99d96293035e0ffba Mon Sep 17 00:00:00 2001
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Date: Wed, 30 Aug 2023 08:38:33 +0530
Subject: [PATCH 1886/2340] drm/xe: Use spinlock in forcewake instead of mutex

In PMU we need to access certain registers which fall under GT power
domain for which we need to take forcewake. But as PMU being an atomic
context can't expect to have any sleeping calls.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c       | 14 +++++++-------
 drivers/gpu/drm/xe/xe_force_wake_types.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index ed2ecb20ce8e6..32d6c4dd28079 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -42,7 +42,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
 	struct xe_device *xe = gt_to_xe(gt);
 
 	fw->gt = gt;
-	mutex_init(&fw->lock);
+	spin_lock_init(&fw->lock);
 
 	/* Assuming gen11+ so assert this assumption is correct */
 	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
@@ -116,7 +116,7 @@ static int domain_wake_wait(struct xe_gt *gt,
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
 			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
-			      NULL, false);
+			      NULL, true);
 }
 
 static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
@@ -129,7 +129,7 @@ static int domain_sleep_wait(struct xe_gt *gt,
 {
 	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0,
 			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
-			      NULL, false);
+			      NULL, true);
 }
 
 #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
@@ -147,7 +147,7 @@ int xe_force_wake_get(struct xe_force_wake *fw,
 	enum xe_force_wake_domains tmp, woken = 0;
 	int ret, ret2 = 0;
 
-	mutex_lock(&fw->lock);
+	spin_lock(&fw->lock);
 	for_each_fw_domain_masked(domain, domains, fw, tmp) {
 		if (!domain->ref++) {
 			woken |= BIT(domain->id);
@@ -162,7 +162,7 @@ int xe_force_wake_get(struct xe_force_wake *fw,
 				   domain->id, ret);
 	}
 	fw->awake_domains |= woken;
-	mutex_unlock(&fw->lock);
+	spin_unlock(&fw->lock);
 
 	return ret2;
 }
@@ -176,7 +176,7 @@ int xe_force_wake_put(struct xe_force_wake *fw,
 	enum xe_force_wake_domains tmp, sleep = 0;
 	int ret, ret2 = 0;
 
-	mutex_lock(&fw->lock);
+	spin_lock(&fw->lock);
 	for_each_fw_domain_masked(domain, domains, fw, tmp) {
 		if (!--domain->ref) {
 			sleep |= BIT(domain->id);
@@ -191,7 +191,7 @@ int xe_force_wake_put(struct xe_force_wake *fw,
 				   domain->id, ret);
 	}
 	fw->awake_domains &= ~sleep;
-	mutex_unlock(&fw->lock);
+	spin_unlock(&fw->lock);
 
 	return ret2;
 }
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index cb782696855be..ed0edc2cdf9f8 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -76,7 +76,7 @@ struct xe_force_wake {
 	/** @gt: back pointers to GT */
 	struct xe_gt *gt;
 	/** @lock: protects everything force wake struct */
-	struct mutex lock;
+	spinlock_t lock;
 	/** @awake_domains: mask of all domains awake */
 	enum xe_force_wake_domains awake_domains;
 	/** @domains: force wake domains */
-- 
GitLab


From 3856b0f71f52b8397887c1765e14d0245d722233 Mon Sep 17 00:00:00 2001
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Date: Wed, 30 Aug 2023 08:48:53 +0530
Subject: [PATCH 1887/2340] drm/xe/pmu: Enable PMU interface

There are a set of engine group busyness counters provided by HW which are
perfect fit to be exposed via PMU perf events.

BSPEC: 46559, 46560, 46722, 46729, 52071, 71028

events can be listed using:
perf list
  xe_0000_03_00.0/any-engine-group-busy-gt0/         [Kernel PMU event]
  xe_0000_03_00.0/copy-group-busy-gt0/               [Kernel PMU event]
  xe_0000_03_00.0/interrupts/                        [Kernel PMU event]
  xe_0000_03_00.0/media-group-busy-gt0/              [Kernel PMU event]
  xe_0000_03_00.0/render-group-busy-gt0/             [Kernel PMU event]

and can be read using:

perf stat -e "xe_0000_8c_00.0/render-group-busy-gt0/" -I 1000
           time             counts unit events
     1.001139062                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     2.003294678                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     3.005199582                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     4.007076497                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     5.008553068                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     6.010531563              43520 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     7.012468029              44800 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     8.013463515                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
     9.015300183                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
    10.017233010                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/
    10.971934120                  0 ns  xe_0000_8c_00.0/render-group-busy-gt0/

The pmu base implementation is taken from i915.

v2:
Store last known value when device is awake return that while the GT is
suspended and then update the driver copy when read during awake.

v3:
1. drop init_samples, as storing counters before going to suspend should
be sufficient.
2. ported the "drm/i915/pmu: Make PMU sample array two-dimensional" and
dropped helpers to store and read samples.
3. use xe_device_mem_access_get_if_ongoing to check if device is active
before reading the OA registers.
4. dropped format attr as no longer needed
5. introduce xe_pmu_suspend to call engine_group_busyness_store
6. few other nits.

v4: minor nits.

v5: take forcewake when accessing the OAG registers

v6:
1. drop engine_busyness_sample_type
2. update UAPI documentation

v7:
1. update UAPI documentation
2. drop MEDIA_GT specific change for media busyness counter.

Co-developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Co-developed-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |   2 +
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |   5 +
 drivers/gpu/drm/xe/xe_device.c       |   2 +
 drivers/gpu/drm/xe/xe_device_types.h |   4 +
 drivers/gpu/drm/xe/xe_gt.c           |   2 +
 drivers/gpu/drm/xe/xe_irq.c          |  18 +
 drivers/gpu/drm/xe/xe_module.c       |   5 +
 drivers/gpu/drm/xe/xe_pmu.c          | 654 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pmu.h          |  25 +
 drivers/gpu/drm/xe/xe_pmu_types.h    |  76 ++++
 include/uapi/drm/xe_drm.h            |  40 ++
 11 files changed, 833 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_pmu.c
 create mode 100644 drivers/gpu/drm/xe/xe_pmu.h
 create mode 100644 drivers/gpu/drm/xe/xe_pmu_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index be93745e8a30c..d3b97bc11af7d 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -124,6 +124,8 @@ xe-y += xe_bb.o \
 obj-$(CONFIG_DRM_XE) += xe.o
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
 
+xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
+
 # header test
 hdrtest_find_args := -not -path xe_rtp_helpers.h
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 271ed0cdbe21e..e13fbbdf6929e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -294,6 +294,11 @@
 #define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
 
+#define XE_OAG_RC0_ANY_ENGINE_BUSY_FREE		XE_REG(0xdb80)
+#define XE_OAG_ANY_MEDIA_FF_BUSY_FREE		XE_REG(0xdba0)
+#define XE_OAG_BLT_BUSY_FREE			XE_REG(0xdbbc)
+#define XE_OAG_RENDER_BUSY_FREE			XE_REG(0xdbdc)
+
 #define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 986a02a661661..89bf926bc0f35 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -304,6 +304,8 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_debugfs_register(xe);
 
+	xe_pmu_register(&xe->pmu);
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 552e8a343d8f0..496d7f3fb897e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -15,6 +15,7 @@
 #include "xe_devcoredump_types.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
+#include "xe_pmu.h"
 #include "xe_step_types.h"
 
 struct xe_ggtt;
@@ -342,6 +343,9 @@ struct xe_device {
 	 */
 	struct task_struct *pm_callback_task;
 
+	/** @pmu: performance monitoring unit */
+	struct xe_pmu pmu;
+
 	/* For pcode */
 	struct mutex sb_lock;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 5d86bb2bb94d6..06147f26384fc 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -652,6 +652,8 @@ int xe_gt_suspend(struct xe_gt *gt)
 	if (err)
 		goto err_msg;
 
+	xe_pmu_suspend(gt);
+
 	err = xe_uc_suspend(&gt->uc);
 	if (err)
 		goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index ef434142bcd92..772b8006d98f7 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -26,6 +26,20 @@
 #define IIR(offset)				XE_REG(offset + 0x8)
 #define IER(offset)				XE_REG(offset + 0xc)
 
+/*
+ * Interrupt statistic for PMU. Increments the counter only if the
+ * interrupt originated from the GPU so interrupts from a device which
+ * shares the interrupt line are not accounted.
+ */
+static __always_inline void xe_pmu_irq_stats(struct xe_device *xe)
+{
+	/*
+	 * A clever compiler translates that into INC. A not so clever one
+	 * should at least prevent store tearing.
+	 */
+	WRITE_ONCE(xe->pmu.irq_count, xe->pmu.irq_count + 1);
+}
+
 static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
 {
 	u32 val = xe_mmio_read32(mmio, reg);
@@ -332,6 +346,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 
 	xelp_intr_enable(xe, false);
 
+	xe_pmu_irq_stats(xe);
+
 	return IRQ_HANDLED;
 }
 
@@ -425,6 +441,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 
 	dg1_intr_enable(xe, false);
 
+	xe_pmu_irq_stats(xe);
+
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index ed3772a697628..d76fabe056d00 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -12,6 +12,7 @@
 #include "xe_hw_fence.h"
 #include "xe_module.h"
 #include "xe_pci.h"
+#include "xe_pmu.h"
 #include "xe_sched_job.h"
 
 bool force_execlist = false;
@@ -45,6 +46,10 @@ static const struct init_funcs init_funcs[] = {
 		.init = xe_sched_job_module_init,
 		.exit = xe_sched_job_module_exit,
 	},
+	{
+		.init = xe_pmu_init,
+		.exit = xe_pmu_exit,
+	},
 	{
 		.init = xe_register_pci_driver,
 		.exit = xe_unregister_pci_driver,
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
new file mode 100644
index 0000000000000..abfc0b3aeac41
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
+#include "xe_gt_clock.h"
+#include "xe_mmio.h"
+
+static cpumask_t xe_pmu_cpumask;
+static unsigned int xe_pmu_target_cpu = -1;
+
+static unsigned int config_gt_id(const u64 config)
+{
+	return config >> __XE_PMU_GT_SHIFT;
+}
+
+static u64 config_counter(const u64 config)
+{
+	return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
+}
+
+static void xe_pmu_event_destroy(struct perf_event *event)
+{
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+
+	drm_WARN_ON(&xe->drm, event->parent);
+
+	drm_dev_put(&xe->drm);
+}
+
+static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type)
+{
+	u64 val;
+
+	switch (sample_type) {
+	case __XE_SAMPLE_RENDER_GROUP_BUSY:
+		val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE);
+		break;
+	case __XE_SAMPLE_COPY_GROUP_BUSY:
+		val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE);
+		break;
+	case __XE_SAMPLE_MEDIA_GROUP_BUSY:
+		val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE);
+		break;
+	case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY:
+		val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE);
+		break;
+	default:
+		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
+	}
+
+	return xe_gt_clock_cycles_to_ns(gt, val * 16);
+}
+
+static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config)
+{
+	int sample_type = config_counter(config) - 1;
+	const unsigned int gt_id = gt->info.id;
+	struct xe_device *xe = gt->tile->xe;
+	struct xe_pmu *pmu = &xe->pmu;
+	unsigned long flags;
+	bool device_awake;
+	u64 val;
+
+	device_awake = xe_device_mem_access_get_if_ongoing(xe);
+	if (device_awake) {
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+		val = __engine_group_busyness_read(gt, sample_type);
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+		xe_device_mem_access_put(xe);
+	}
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (device_awake)
+		pmu->sample[gt_id][sample_type] = val;
+	else
+		val = pmu->sample[gt_id][sample_type];
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return val;
+}
+
+static void engine_group_busyness_store(struct xe_gt *gt)
+{
+	struct xe_pmu *pmu = &gt->tile->xe->pmu;
+	unsigned int gt_id = gt->info.id;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++)
+		pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i);
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static int
+config_status(struct xe_device *xe, u64 config)
+{
+	unsigned int gt_id = config_gt_id(config);
+	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+
+	if (gt_id >= XE_PMU_MAX_GT)
+		return -ENOENT;
+
+	switch (config_counter(config)) {
+	case XE_PMU_INTERRUPTS(0):
+		if (gt_id)
+			return -ENOENT;
+		break;
+	case XE_PMU_RENDER_GROUP_BUSY(0):
+	case XE_PMU_COPY_GROUP_BUSY(0):
+	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
+		if (gt->info.type == XE_GT_TYPE_MEDIA)
+			return -ENOENT;
+		break;
+	case XE_PMU_MEDIA_GROUP_BUSY(0):
+		if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
+			return -ENOENT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int xe_pmu_event_init(struct perf_event *event)
+{
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	struct xe_pmu *pmu = &xe->pmu;
+	int ret;
+
+	if (pmu->closed)
+		return -ENODEV;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* unsupported modes and filters */
+	if (event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/* only allow running on one cpu at a time */
+	if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
+		return -EINVAL;
+
+	ret = config_status(xe, event->attr.config);
+	if (ret)
+		return ret;
+
+	if (!event->parent) {
+		drm_dev_get(&xe->drm);
+		event->destroy = xe_pmu_event_destroy;
+	}
+
+	return 0;
+}
+
+static u64 __xe_pmu_event_read(struct perf_event *event)
+{
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	const unsigned int gt_id = config_gt_id(event->attr.config);
+	const u64 config = event->attr.config;
+	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+	struct xe_pmu *pmu = &xe->pmu;
+	u64 val;
+
+	switch (config_counter(config)) {
+	case XE_PMU_INTERRUPTS(0):
+		val = READ_ONCE(pmu->irq_count);
+		break;
+	case XE_PMU_RENDER_GROUP_BUSY(0):
+	case XE_PMU_COPY_GROUP_BUSY(0):
+	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
+	case XE_PMU_MEDIA_GROUP_BUSY(0):
+		val = engine_group_busyness_read(gt, config);
+		break;
+	default:
+		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
+	}
+
+	return val;
+}
+
+static void xe_pmu_event_read(struct perf_event *event)
+{
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	struct hw_perf_event *hwc = &event->hw;
+	struct xe_pmu *pmu = &xe->pmu;
+	u64 prev, new;
+
+	if (pmu->closed) {
+		event->hw.state = PERF_HES_STOPPED;
+		return;
+	}
+again:
+	prev = local64_read(&hwc->prev_count);
+	new = __xe_pmu_event_read(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
+		goto again;
+
+	local64_add(new - prev, &event->count);
+}
+
+static void xe_pmu_enable(struct perf_event *event)
+{
+	/*
+	 * Store the current counter value so we can report the correct delta
+	 * for all listeners. Even when the event was already enabled and has
+	 * an existing non-zero value.
+	 */
+	local64_set(&event->hw.prev_count, __xe_pmu_event_read(event));
+}
+
+static void xe_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	struct xe_pmu *pmu = &xe->pmu;
+
+	if (pmu->closed)
+		return;
+
+	xe_pmu_enable(event);
+	event->hw.state = 0;
+}
+
+static void xe_pmu_event_stop(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_UPDATE)
+		xe_pmu_event_read(event);
+
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static int xe_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct xe_device *xe =
+		container_of(event->pmu, typeof(*xe), pmu.base);
+	struct xe_pmu *pmu = &xe->pmu;
+
+	if (pmu->closed)
+		return -ENODEV;
+
+	if (flags & PERF_EF_START)
+		xe_pmu_event_start(event, flags);
+
+	return 0;
+}
+
+static void xe_pmu_event_del(struct perf_event *event, int flags)
+{
+	xe_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int xe_pmu_event_event_idx(struct perf_event *event)
+{
+	return 0;
+}
+
+struct xe_ext_attribute {
+	struct device_attribute attr;
+	unsigned long val;
+};
+
+static ssize_t xe_pmu_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct xe_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct xe_ext_attribute, attr);
+	return sprintf(buf, "config=0x%lx\n", eattr->val);
+}
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask);
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *xe_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group xe_pmu_cpumask_attr_group = {
+	.attrs = xe_cpumask_attrs,
+};
+
+#define __event(__counter, __name, __unit) \
+{ \
+	.counter = (__counter), \
+	.name = (__name), \
+	.unit = (__unit), \
+	.global = false, \
+}
+
+#define __global_event(__counter, __name, __unit) \
+{ \
+	.counter = (__counter), \
+	.name = (__name), \
+	.unit = (__unit), \
+	.global = true, \
+}
+
+static struct xe_ext_attribute *
+add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
+{
+	sysfs_attr_init(&attr->attr.attr);
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = xe_pmu_event_show;
+	attr->val = config;
+
+	return ++attr;
+}
+
+static struct perf_pmu_events_attr *
+add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
+	     const char *str)
+{
+	sysfs_attr_init(&attr->attr.attr);
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = perf_event_sysfs_show;
+	attr->event_str = str;
+
+	return ++attr;
+}
+
+static struct attribute **
+create_event_attributes(struct xe_pmu *pmu)
+{
+	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+	static const struct {
+		unsigned int counter;
+		const char *name;
+		const char *unit;
+		bool global;
+	} events[] = {
+		__global_event(0, "interrupts", NULL),
+		__event(1, "render-group-busy", "ns"),
+		__event(2, "copy-group-busy", "ns"),
+		__event(3, "media-group-busy", "ns"),
+		__event(4, "any-engine-group-busy", "ns"),
+	};
+
+	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
+	struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
+	struct attribute **attr = NULL, **attr_iter;
+	unsigned int count = 0;
+	unsigned int i, j;
+	struct xe_gt *gt;
+
+	/* Count how many counters we will be exposing. */
+	for_each_gt(gt, xe, j) {
+		for (i = 0; i < ARRAY_SIZE(events); i++) {
+			u64 config = ___XE_PMU_OTHER(j, events[i].counter);
+
+			if (!config_status(xe, config))
+				count++;
+		}
+	}
+
+	/* Allocate attribute objects and table. */
+	xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
+	if (!xe_attr)
+		goto err_alloc;
+
+	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
+	if (!pmu_attr)
+		goto err_alloc;
+
+	/* Max one pointer of each attribute type plus a termination entry. */
+	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		goto err_alloc;
+
+	xe_iter = xe_attr;
+	pmu_iter = pmu_attr;
+	attr_iter = attr;
+
+	for_each_gt(gt, xe, j) {
+		for (i = 0; i < ARRAY_SIZE(events); i++) {
+			u64 config = ___XE_PMU_OTHER(j, events[i].counter);
+			char *str;
+
+			if (config_status(xe, config))
+				continue;
+
+			if (events[i].global)
+				str = kstrdup(events[i].name, GFP_KERNEL);
+			else
+				str = kasprintf(GFP_KERNEL, "%s-gt%u",
+						events[i].name, j);
+			if (!str)
+				goto err;
+
+			*attr_iter++ = &xe_iter->attr.attr;
+			xe_iter = add_xe_attr(xe_iter, str, config);
+
+			if (events[i].unit) {
+				if (events[i].global)
+					str = kasprintf(GFP_KERNEL, "%s.unit",
+							events[i].name);
+				else
+					str = kasprintf(GFP_KERNEL, "%s-gt%u.unit",
+							events[i].name, j);
+				if (!str)
+					goto err;
+
+				*attr_iter++ = &pmu_iter->attr.attr;
+				pmu_iter = add_pmu_attr(pmu_iter, str,
+							events[i].unit);
+			}
+		}
+	}
+
+	pmu->xe_attr = xe_attr;
+	pmu->pmu_attr = pmu_attr;
+
+	return attr;
+
+err:
+	for (attr_iter = attr; *attr_iter; attr_iter++)
+		kfree((*attr_iter)->name);
+
+err_alloc:
+	kfree(attr);
+	kfree(xe_attr);
+	kfree(pmu_attr);
+
+	return NULL;
+}
+
+static void free_event_attributes(struct xe_pmu *pmu)
+{
+	struct attribute **attr_iter = pmu->events_attr_group.attrs;
+
+	for (; *attr_iter; attr_iter++)
+		kfree((*attr_iter)->name);
+
+	kfree(pmu->events_attr_group.attrs);
+	kfree(pmu->xe_attr);
+	kfree(pmu->pmu_attr);
+
+	pmu->events_attr_group.attrs = NULL;
+	pmu->xe_attr = NULL;
+	pmu->pmu_attr = NULL;
+}
+
+static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
+
+	/* Select the first online CPU as a designated reader. */
+	if (cpumask_empty(&xe_pmu_cpumask))
+		cpumask_set_cpu(cpu, &xe_pmu_cpumask);
+
+	return 0;
+}
+
+static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+	struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
+	unsigned int target = xe_pmu_target_cpu;
+
+	/*
+	 * Unregistering an instance generates a CPU offline event which we must
+	 * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask.
+	 */
+	if (pmu->closed)
+		return 0;
+
+	if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) {
+		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+
+		/* Migrate events if there is a valid target */
+		if (target < nr_cpu_ids) {
+			cpumask_set_cpu(target, &xe_pmu_cpumask);
+			xe_pmu_target_cpu = target;
+		}
+	}
+
+	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
+		perf_pmu_migrate_context(&pmu->base, cpu, target);
+		pmu->cpuhp.cpu = target;
+	}
+
+	return 0;
+}
+
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+int xe_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/x86/intel/xe:online",
+				      xe_pmu_cpu_online,
+				      xe_pmu_cpu_offline);
+	if (ret < 0)
+		pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n",
+			  ret);
+	else
+		cpuhp_slot = ret;
+
+	return 0;
+}
+
+void xe_pmu_exit(void)
+{
+	if (cpuhp_slot != CPUHP_INVALID)
+		cpuhp_remove_multi_state(cpuhp_slot);
+}
+
+static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu)
+{
+	if (cpuhp_slot == CPUHP_INVALID)
+		return -EINVAL;
+
+	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
+}
+
+static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
+{
+	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
+}
+
+void xe_pmu_suspend(struct xe_gt *gt)
+{
+	engine_group_busyness_store(gt);
+}
+
+static void xe_pmu_unregister(struct drm_device *device, void *arg)
+{
+	struct xe_pmu *pmu = arg;
+
+	if (!pmu->base.event_init)
+		return;
+
+	/*
+	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
+	 * ensures all currently executing ones will have exited before we
+	 * proceed with unregistration.
+	 */
+	pmu->closed = true;
+	synchronize_rcu();
+
+	xe_pmu_unregister_cpuhp_state(pmu);
+
+	perf_pmu_unregister(&pmu->base);
+	pmu->base.event_init = NULL;
+	kfree(pmu->base.attr_groups);
+	kfree(pmu->name);
+	free_event_attributes(pmu);
+}
+
+void xe_pmu_register(struct xe_pmu *pmu)
+{
+	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+	const struct attribute_group *attr_groups[] = {
+		&pmu->events_attr_group,
+		&xe_pmu_cpumask_attr_group,
+		NULL
+	};
+
+	int ret = -ENOMEM;
+
+	spin_lock_init(&pmu->lock);
+	pmu->cpuhp.cpu = -1;
+
+	pmu->name = kasprintf(GFP_KERNEL,
+			      "xe_%s",
+			      dev_name(xe->drm.dev));
+	if (pmu->name)
+		/* tools/perf reserves colons as special. */
+		strreplace((char *)pmu->name, ':', '_');
+
+	if (!pmu->name)
+		goto err;
+
+	pmu->events_attr_group.name = "events";
+	pmu->events_attr_group.attrs = create_event_attributes(pmu);
+	if (!pmu->events_attr_group.attrs)
+		goto err_name;
+
+	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+					GFP_KERNEL);
+	if (!pmu->base.attr_groups)
+		goto err_attr;
+
+	pmu->base.module	= THIS_MODULE;
+	pmu->base.task_ctx_nr	= perf_invalid_context;
+	pmu->base.event_init	= xe_pmu_event_init;
+	pmu->base.add		= xe_pmu_event_add;
+	pmu->base.del		= xe_pmu_event_del;
+	pmu->base.start		= xe_pmu_event_start;
+	pmu->base.stop		= xe_pmu_event_stop;
+	pmu->base.read		= xe_pmu_event_read;
+	pmu->base.event_idx	= xe_pmu_event_event_idx;
+
+	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
+	if (ret)
+		goto err_groups;
+
+	ret = xe_pmu_register_cpuhp_state(pmu);
+	if (ret)
+		goto err_unreg;
+
+	ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
+	if (ret)
+		goto err_cpuhp;
+
+	return;
+
+err_cpuhp:
+	xe_pmu_unregister_cpuhp_state(pmu);
+err_unreg:
+	perf_pmu_unregister(&pmu->base);
+err_groups:
+	kfree(pmu->base.attr_groups);
+err_attr:
+	pmu->base.event_init = NULL;
+	free_event_attributes(pmu);
+err_name:
+	kfree(pmu->name);
+err:
+	drm_notice(&xe->drm, "Failed to register PMU!\n");
+}
diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h
new file mode 100644
index 0000000000000..a99d4ddd023e9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pmu.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PMU_H_
+#define _XE_PMU_H_
+
+#include "xe_gt_types.h"
+#include "xe_pmu_types.h"
+
+#if IS_ENABLED(CONFIG_PERF_EVENTS)
+int xe_pmu_init(void);
+void xe_pmu_exit(void);
+void xe_pmu_register(struct xe_pmu *pmu);
+void xe_pmu_suspend(struct xe_gt *gt);
+#else
+static inline int xe_pmu_init(void) { return 0; }
+static inline void xe_pmu_exit(void) {}
+static inline void xe_pmu_register(struct xe_pmu *pmu) {}
+static inline void xe_pmu_suspend(struct xe_gt *gt) {}
+#endif
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
new file mode 100644
index 0000000000000..4ccc7e9042f64
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PMU_TYPES_H_
+#define _XE_PMU_TYPES_H_
+
+#include <linux/perf_event.h>
+#include <linux/spinlock_types.h>
+#include <uapi/drm/xe_drm.h>
+
+enum {
+	__XE_SAMPLE_RENDER_GROUP_BUSY,
+	__XE_SAMPLE_COPY_GROUP_BUSY,
+	__XE_SAMPLE_MEDIA_GROUP_BUSY,
+	__XE_SAMPLE_ANY_ENGINE_GROUP_BUSY,
+	__XE_NUM_PMU_SAMPLERS
+};
+
+#define XE_PMU_MAX_GT 2
+
+struct xe_pmu {
+	/**
+	 * @cpuhp: Struct used for CPU hotplug handling.
+	 */
+	struct {
+		struct hlist_node node;
+		unsigned int cpu;
+	} cpuhp;
+	/**
+	 * @base: PMU base.
+	 */
+	struct pmu base;
+	/**
+	 * @closed: xe is unregistering.
+	 */
+	bool closed;
+	/**
+	 * @name: Name as registered with perf core.
+	 */
+	const char *name;
+	/**
+	 * @lock: Lock protecting enable mask and ref count handling.
+	 */
+	spinlock_t lock;
+	/**
+	 * @sample: Current and previous (raw) counters.
+	 *
+	 * These counters are updated when the device is awake.
+	 *
+	 */
+	u64 sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS];
+	/**
+	 * @irq_count: Number of interrupts
+	 *
+	 * Intentionally unsigned long to avoid atomics or heuristics on 32bit.
+	 * 4e9 interrupts are a lot and postprocessing can really deal with an
+	 * occasional wraparound easily. It's 32bit after all.
+	 */
+	unsigned long irq_count;
+	/**
+	 * @events_attr_group: Device events attribute group.
+	 */
+	struct attribute_group events_attr_group;
+	/**
+	 * @xe_attr: Memory block holding device attributes.
+	 */
+	void *xe_attr;
+	/**
+	 * @pmu_attr: Memory block holding device attributes.
+	 */
+	void *pmu_attr;
+};
+
+#endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 00d5cb4ef85e7..d48d8e3c898ce 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1053,6 +1053,46 @@ struct drm_xe_vm_madvise {
 	__u64 reserved[2];
 };
 
+/**
+ * DOC: XE PMU event config IDs
+ *
+ * Check 'man perf_event_open' to use the ID's XE_PMU_XXXX listed in xe_drm.h
+ * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
+ * particular event.
+ *
+ * For example to open the XE_PMU_INTERRUPTS(0):
+ *
+ * .. code-block:: C
+ *
+ *	struct perf_event_attr attr;
+ *	long long count;
+ *	int cpu = 0;
+ *	int fd;
+ *
+ *	memset(&attr, 0, sizeof(struct perf_event_attr));
+ *	attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_56_00.0/type
+ *	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
+ *	attr.use_clockid = 1;
+ *	attr.clockid = CLOCK_MONOTONIC;
+ *	attr.config = XE_PMU_INTERRUPTS(0);
+ *
+ *	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+ */
+
+/*
+ * Top bits of every counter are GT id.
+ */
+#define __XE_PMU_GT_SHIFT (56)
+
+#define ___XE_PMU_OTHER(gt, x) \
+	(((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT))
+
+#define XE_PMU_INTERRUPTS(gt)			___XE_PMU_OTHER(gt, 0)
+#define XE_PMU_RENDER_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 1)
+#define XE_PMU_COPY_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 2)
+#define XE_PMU_MEDIA_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 3)
+#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___XE_PMU_OTHER(gt, 4)
+
 #if defined(__cplusplus)
 }
 #endif
-- 
GitLab


From c4991ee01d480c45c789b43eb001a978bf016f58 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 13 Sep 2023 16:28:35 -0700
Subject: [PATCH 1888/2340] drm/xe/uc: Rename guc_submission_enabled() to
 uc_enabled()

The guc_submission_enabled() function is being used as a boolean toggle
for all firmwares and all related features, not just GuC submission. We
could add additional flags/functions to distinguish and allow different
use-cases (e.g. loading HuC but not using GuC submission), but given
that not using GuC is a debug-only scenario having a global switch for
all FWs is enough. However, we want to make it clear that this switch
turns off everything, so rename it to uc_enabled().

v2: rebase on s/XE_WARN_ON/xe_assert

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h     |  2 +-
 drivers/gpu/drm/xe/xe_exec_queue.c |  2 +-
 drivers/gpu/drm/xe/xe_execlist.c   |  6 +++---
 drivers/gpu/drm/xe/xe_ggtt.c       |  2 +-
 drivers/gpu/drm/xe/xe_gt.c         |  4 ++--
 drivers/gpu/drm/xe/xe_guc_pc.c     |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c |  4 ++--
 drivers/gpu/drm/xe/xe_hw_engine.c  |  4 ++--
 drivers/gpu/drm/xe/xe_irq.c        |  2 +-
 drivers/gpu/drm/xe/xe_uc.c         | 16 ++++++++--------
 10 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 71582094834c6..c4232de40ae08 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -114,7 +114,7 @@ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe)
 	return xe_device_get_root_tile(xe)->primary_gt;
 }
 
-static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
+static inline bool xe_device_uc_enabled(struct xe_device *xe)
 {
 	return !xe->info.force_execlist;
 }
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index a0b5647923acf..23789122b5b1c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -585,7 +585,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 	u16 gt_id;
 	u32 return_mask = 0, prev_mask;
 
-	if (XE_IOCTL_DBG(xe, !xe_device_guc_submission_enabled(xe) &&
+	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
 			 len > 1))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 5b26b6e35afcf..22dfe91b2b836 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -322,7 +322,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q)
 	struct xe_device *xe = gt_to_xe(q->gt);
 	int err;
 
-	xe_assert(xe, !xe_device_guc_submission_enabled(xe));
+	xe_assert(xe, !xe_device_uc_enabled(xe));
 
 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
 
@@ -371,7 +371,7 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
 	struct xe_device *xe = gt_to_xe(q->gt);
 	unsigned long flags;
 
-	xe_assert(xe, !xe_device_guc_submission_enabled(xe));
+	xe_assert(xe, !xe_device_uc_enabled(xe));
 
 	spin_lock_irqsave(&exl->port->lock, flags);
 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
@@ -458,7 +458,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 int xe_execlist_init(struct xe_gt *gt)
 {
 	/* GuC submission enabled, nothing to do */
-	if (xe_device_guc_submission_enabled(gt_to_xe(gt)))
+	if (xe_device_uc_enabled(gt_to_xe(gt)))
 		return 0;
 
 	gt->exec_queue_ops = &execlist_exec_queue_ops;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 03097f1b7f71f..ba34b8784572e 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -233,7 +233,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 		xe_gt_assert(gt, seqno > 0);
 		if (seqno > 0)
 			xe_gt_tlb_invalidation_wait(gt, seqno);
-	} else if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
+	} else if (xe_device_uc_enabled(gt_to_xe(gt))) {
 		struct xe_device *xe = gt_to_xe(gt);
 
 		if (xe->info.platform == XE_PVC) {
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 06147f26384fc..1aa44d4f9ac1c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -549,7 +549,7 @@ static int gt_reset(struct xe_gt *gt)
 	int err;
 
 	/* We only support GT resets with GuC submission */
-	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+	if (!xe_device_uc_enabled(gt_to_xe(gt)))
 		return -ENODEV;
 
 	xe_gt_info(gt, "reset started\n");
@@ -642,7 +642,7 @@ int xe_gt_suspend(struct xe_gt *gt)
 	int err;
 
 	/* For now suspend/resume is only allowed with GuC */
-	if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+	if (!xe_device_uc_enabled(gt_to_xe(gt)))
 		return -ENODEV;
 
 	xe_gt_sanitize(gt);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 99d8556808944..8a4d299d6cb02 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -816,7 +816,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
 	int ret;
 
-	xe_gt_assert(gt, xe_device_guc_submission_enabled(xe));
+	xe_gt_assert(gt, xe_device_uc_enabled(xe));
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index f6b630f539284..fd120d8c0af2a 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1138,7 +1138,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	long timeout;
 	int err;
 
-	xe_assert(xe, xe_device_guc_submission_enabled(guc_to_xe(guc)));
+	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
 
 	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
 	if (!ge)
@@ -1903,7 +1903,7 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
 	struct xe_exec_queue *q;
 	unsigned long index;
 
-	if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
+	if (!xe_device_uc_enabled(guc_to_xe(guc)))
 		return;
 
 	mutex_lock(&guc->submission_state.lock);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 9c2e212fa4cfd..a8681089fb60a 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -434,7 +434,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	if (err)
 		goto err_hwsp;
 
-	if (!xe_device_guc_submission_enabled(xe)) {
+	if (!xe_device_uc_enabled(xe)) {
 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
 		if (IS_ERR(hwe->exl_port)) {
 			err = PTR_ERR(hwe->exl_port);
@@ -442,7 +442,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 		}
 	}
 
-	if (xe_device_guc_submission_enabled(xe))
+	if (xe_device_uc_enabled(xe))
 		xe_hw_engine_enable_ring(hwe);
 
 	/* We reserve the highest BCS instance for USM */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 772b8006d98f7..ff71a3aa08ce4 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -142,7 +142,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 	u32 ccs_mask, bcs_mask;
 	u32 irqs, dmask, smask;
 
-	if (xe_device_guc_submission_enabled(xe)) {
+	if (xe_device_uc_enabled(xe)) {
 		irqs = GT_RENDER_USER_INTERRUPT |
 			GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
 	} else {
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index a8ecb5c6e01aa..5b7d6177c1c26 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -32,7 +32,7 @@ int xe_uc_init(struct xe_uc *uc)
 	int ret;
 
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	ret = xe_guc_init(&uc->guc);
@@ -66,7 +66,7 @@ err:
 int xe_uc_init_post_hwconfig(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	return xe_guc_init_post_hwconfig(&uc->guc);
@@ -110,7 +110,7 @@ int xe_uc_init_hwconfig(struct xe_uc *uc)
 	int ret;
 
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	ret = xe_guc_min_load_for_hwconfig(&uc->guc);
@@ -129,7 +129,7 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	int ret;
 
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	ret = xe_uc_sanitize_reset(uc);
@@ -175,7 +175,7 @@ int xe_uc_fini_hw(struct xe_uc *uc)
 int xe_uc_reset_prepare(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	return xe_guc_reset_prepare(&uc->guc);
@@ -194,7 +194,7 @@ void xe_uc_stop_prepare(struct xe_uc *uc)
 int xe_uc_stop(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	return xe_guc_stop(&uc->guc);
@@ -203,7 +203,7 @@ int xe_uc_stop(struct xe_uc *uc)
 int xe_uc_start(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	return xe_guc_start(&uc->guc);
@@ -226,7 +226,7 @@ int xe_uc_suspend(struct xe_uc *uc)
 	int ret;
 
 	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	uc_reset_wait(uc);
-- 
GitLab


From 757308471dbe9aba28cdaf40848936923216a1f2 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 13 Sep 2023 16:28:36 -0700
Subject: [PATCH 1889/2340] drm/xe/uc: Fix uC status tracking

The current uC status tracking has a few issues:

1) the HuC is moved to "disabled" instead of "not supported"

2) the status is left uninitialized instead of "disabled" when the
   modparam is used to disable support

3) due to #1, a number of checks are done against "disabled" instead of
   the appropriate status.

Address all of those by making sure to follow the appropriate state
transition and checking against the required state.

v2: rebase on s/guc_submission_enabled/uc_enabled/

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c   |  3 +++
 drivers/gpu/drm/xe/xe_huc.c   | 15 +++++++--------
 drivers/gpu/drm/xe/xe_uc.c    | 10 +++++++---
 drivers/gpu/drm/xe/xe_uc_fw.c | 14 ++++++++------
 drivers/gpu/drm/xe/xe_wopcm.c |  3 +--
 5 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 134019fdda7eb..84f0b5488783b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -242,6 +242,9 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
+	if (!xe_uc_fw_is_enabled(&guc->fw))
+		return 0;
+
 	ret = xe_guc_log_init(&guc->log);
 	if (ret)
 		goto out;
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index c856da1e94229..293403d16f25c 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -43,22 +43,21 @@ int xe_huc_init(struct xe_huc *huc)
 	if (ret)
 		goto out;
 
+	if (!xe_uc_fw_is_enabled(&huc->fw))
+		return 0;
+
 	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
 
 	return 0;
 
 out:
-	if (xe_uc_fw_is_disabled(&huc->fw)) {
-		drm_info(&xe->drm, "HuC disabled\n");
-		return 0;
-	}
 	drm_err(&xe->drm, "HuC init failed with %d", ret);
 	return ret;
 }
 
 int xe_huc_upload(struct xe_huc *huc)
 {
-	if (xe_uc_fw_is_disabled(&huc->fw))
+	if (!xe_uc_fw_is_loadable(&huc->fw))
 		return 0;
 	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
 }
@@ -70,7 +69,7 @@ int xe_huc_auth(struct xe_huc *huc)
 	struct xe_guc *guc = huc_to_guc(huc);
 	int ret;
 
-	if (xe_uc_fw_is_disabled(&huc->fw))
+	if (!xe_uc_fw_is_loadable(&huc->fw))
 		return 0;
 
 	xe_assert(xe, !xe_uc_fw_is_running(&huc->fw));
@@ -107,7 +106,7 @@ fail:
 
 void xe_huc_sanitize(struct xe_huc *huc)
 {
-	if (xe_uc_fw_is_disabled(&huc->fw))
+	if (!xe_uc_fw_is_loadable(&huc->fw))
 		return;
 	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
 }
@@ -119,7 +118,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
 
 	xe_uc_fw_print(&huc->fw, p);
 
-	if (xe_uc_fw_is_disabled(&huc->fw))
+	if (!xe_uc_fw_is_enabled(&huc->fw))
 		return;
 
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 5b7d6177c1c26..784f53c5f282f 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -31,9 +31,10 @@ int xe_uc_init(struct xe_uc *uc)
 {
 	int ret;
 
-	/* GuC submission not enabled, nothing to do */
-	if (!xe_device_uc_enabled(uc_to_xe(uc)))
-		return 0;
+	/*
+	 * We call the GuC/HuC init functions even if GuC submission is off to
+	 * correctly move our tracking of the FW state to "disabled".
+	 */
 
 	ret = xe_guc_init(&uc->guc);
 	if (ret)
@@ -43,6 +44,9 @@ int xe_uc_init(struct xe_uc *uc)
 	if (ret)
 		goto err;
 
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
 	ret = xe_wopcm_init(&uc->wopcm);
 	if (ret)
 		goto err;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index efc70836453d0..a890ece72f34e 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -340,17 +340,19 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	xe_assert(xe, !uc_fw->path);
 
 	uc_fw_auto_select(xe, uc_fw);
-	xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
+	xe_uc_fw_change_status(uc_fw, uc_fw->path ?
 			       XE_UC_FIRMWARE_SELECTED :
-			       XE_UC_FIRMWARE_DISABLED :
 			       XE_UC_FIRMWARE_NOT_SUPPORTED);
 
-	/* Transform no huc in the list into firmware disabled */
-	if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) {
+	if (!xe_uc_fw_is_supported(uc_fw))
+		return 0;
+
+	if (!xe_device_uc_enabled(xe)) {
 		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
-		err = -ENOPKG;
-		return err;
+		drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
+		return 0;
 	}
+
 	err = request_firmware(&fw, uc_fw->path, dev);
 	if (err)
 		goto fail;
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index 9a85bcc188303..bf85d4fa56ccc 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -139,8 +139,7 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
 {
 	u32 base = wopcm->guc.base;
 	u32 size = wopcm->guc.size;
-	u32 huc_agent = xe_uc_fw_is_disabled(&gt->uc.huc.fw) ? 0 :
-		HUC_LOADING_AGENT_GUC;
+	u32 huc_agent = xe_uc_fw_is_available(&gt->uc.huc.fw) ? HUC_LOADING_AGENT_GUC : 0;
 	u32 mask;
 	int err;
 
-- 
GitLab


From a455ed04669f03bbb1f22267f1237983e026739f Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 13 Sep 2023 16:28:37 -0700
Subject: [PATCH 1890/2340] drm/xe/uc: Add GuC/HuC firmware path overrides

When testing a new binary and/or debugging binary-related issues, it is
useful to have the option to change which binary is loaded without
having to update and re-compile the kernel. To support this option, this
patch adds 2 new modparams to override the FW path for GuC and HuC. The
HuC modparam can also be set to an empty string to disable HuC loading.

Note that those modparams only take effect on platforms where we already
have a default FW, so we're sure there is support for FW loading and the
kernel isn't going to explode in an undefined path.

v2: simplify comment (John),
    rebase on s/guc_submission_enabled/uc_enabled

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_module.c | 10 ++++++++++
 drivers/gpu/drm/xe/xe_module.h |  2 ++
 drivers/gpu/drm/xe/xe_uc_fw.c  | 30 +++++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index d76fabe056d00..82817a46f8875 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -27,6 +27,16 @@ int xe_guc_log_level = 5;
 module_param_named(guc_log_level, xe_guc_log_level, int, 0600);
 MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
 
+char *xe_guc_firmware_path;
+module_param_named_unsafe(guc_firmware_path, xe_guc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(guc_firmware_path,
+		 "GuC firmware path to use instead of the default one");
+
+char *xe_huc_firmware_path;
+module_param_named_unsafe(huc_firmware_path, xe_huc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(huc_firmware_path,
+		 "HuC firmware path to use instead of the default one - empty string disables");
+
 char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE;
 module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400);
 MODULE_PARM_DESC(force_probe,
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 2c1f9199f909e..e1da1e9ca5cba 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -10,4 +10,6 @@ extern bool force_execlist;
 extern bool enable_display;
 extern u32 xe_force_vram_bar_size;
 extern int xe_guc_log_level;
+extern char *xe_guc_firmware_path;
+extern char *xe_huc_firmware_path;
 extern char *xe_param_force_probe;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index a890ece72f34e..2ba0466bc45ae 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -15,6 +15,7 @@
 #include "xe_gt.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
+#include "xe_module.h"
 #include "xe_uc_fw.h"
 
 /*
@@ -210,6 +211,30 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	}
 }
 
+static void
+uc_fw_override(struct xe_uc_fw *uc_fw)
+{
+	char *path_override = NULL;
+
+	/* empty string disables, but it's not allowed for GuC */
+	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_GUC:
+		if (xe_guc_firmware_path && *xe_guc_firmware_path)
+			path_override = xe_guc_firmware_path;
+		break;
+	case XE_UC_FW_TYPE_HUC:
+		path_override = xe_huc_firmware_path;
+		break;
+	default:
+		break;
+	}
+
+	if (path_override) {
+		uc_fw->path = path_override;
+		uc_fw->user_overridden = true;
+	}
+}
+
 /**
  * xe_uc_fw_copy_rsa - copy fw RSA to buffer
  *
@@ -347,7 +372,10 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	if (!xe_uc_fw_is_supported(uc_fw))
 		return 0;
 
-	if (!xe_device_uc_enabled(xe)) {
+	uc_fw_override(uc_fw);
+
+	/* an empty path means the firmware is disabled */
+	if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
 		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
 		drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
 		return 0;
-- 
GitLab


From fc678ec7c2e037fcc1bb678403036a9772e61dbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 15 Sep 2023 19:26:06 +0200
Subject: [PATCH 1891/2340] drm/xe: Reinstate pipelined fence enable_signaling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the GPUVA conversion, the xe_bo::vmas member became replaced with
drm_gem_object::gpuva.list, however there was a couple of usage instances
left using the old member. Most notably the pipelined fence
enable_signaling.

Remove the xe_bo::vmas member completely, fix usage instances and
also enable this pipelined fence enable_signaling even for faulting
VM:s since we actually wait for bind fences to complete.

v2:
- Rebase.
v3:
- Fix display code build error.

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230915172606.14436-1-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       | 5 ++---
 drivers/gpu/drm/xe/xe_bo_types.h | 2 --
 drivers/gpu/drm/xe/xe_pt.c       | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 998efceb84a4c..e812f2b7d5b9c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -456,7 +456,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 
 	dma_resv_assert_held(bo->ttm.base.resv);
 
-	if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) {
+	if (!list_empty(&bo->ttm.base.gpuva.list)) {
 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
 				    DMA_RESV_USAGE_BOOKKEEP);
 		dma_resv_for_each_fence_unlocked(&cursor, fence)
@@ -1049,7 +1049,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
 	drm_gem_object_release(&bo->ttm.base);
 
-	xe_assert(xe, list_empty(&bo->vmas));
+	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
 
 	if (bo->ggtt_node.size)
 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
@@ -1232,7 +1232,6 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
 	bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL;
-	INIT_LIST_HEAD(&bo->vmas);
 	INIT_LIST_HEAD(&bo->pinned_link);
 
 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index f6ee920303af9..27fe72129ee69 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -31,8 +31,6 @@ struct xe_bo {
 	struct xe_vm *vm;
 	/** @tile: Tile this BO is attached to (kernel BO only) */
 	struct xe_tile *tile;
-	/** @vmas: List of VMAs for this BO */
-	struct list_head vmas;
 	/** @placements: valid placements for this BO */
 	struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
 	/** @placement: current placement for this BO */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 680fbe6f38a65..21a3dfe99e05a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -265,7 +265,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
 	if (!pt)
 		return;
 
-	XE_WARN_ON(!list_empty(&pt->bo->vmas));
+	XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list));
 	xe_bo_unpin(pt->bo);
 	xe_bo_put_deferred(pt->bo, deferred);
 
-- 
GitLab


From d435a039646eee712f4d5da2405181015c30bb1a Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Fri, 15 Sep 2023 19:02:33 -0300
Subject: [PATCH 1892/2340] drm/xe: Simplify final return from xe_irq_install()

At the end of the function, we will always return err no matter it's
value. Simplify this by just returning the result of
drmm_add_action_or_reset().

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230915220233.59736-1-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index ff71a3aa08ce4..dec3d518b3fc4 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -592,11 +592,7 @@ int xe_irq_install(struct xe_device *xe)
 
 	xe_irq_postinstall(xe);
 
-	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
-	if (err)
-		return err;
-
-	return err;
+	return drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
 }
 
 void xe_irq_shutdown(struct xe_device *xe)
-- 
GitLab


From 7764222d54b71a9577cff9296420bf0a780b0c5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 20 Sep 2023 11:50:01 +0200
Subject: [PATCH 1893/2340] drm/xe: Disallow pinning dma-bufs in VRAM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For now only support pinning in TT memory, for two reasons:
1) Avoid pinning in a placement not accessible to some importers.
2) Pinning in VRAM requires PIN accounting which is a to-do.

v2:
- Adjust the dma-buf kunit test accordingly.

Suggested-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230920095001.5539-1-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_dma_buf.c | 16 ++++++++++++----
 drivers/gpu/drm/xe/xe_dma_buf.c       | 25 +++++++++++++++++++++----
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 1c4d8751be696..18c00bc03024d 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -149,11 +149,19 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 			/* Is everything where we expect it to be? */
 			xe_bo_lock(import_bo, false);
 			err = xe_bo_validate(import_bo, NULL, false);
-			if (err && err != -EINTR && err != -ERESTARTSYS)
-				KUNIT_FAIL(test,
-					   "xe_bo_validate() failed with err=%d\n", err);
 
-			check_residency(test, bo, import_bo, dmabuf);
+			/* Pinning in VRAM is not allowed. */
+			if (!is_dynamic(params) &&
+			    params->force_different_devices &&
+			    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+				KUNIT_EXPECT_EQ(test, err, -EINVAL);
+			/* Otherwise only expect interrupts or success. */
+			else if (err && err != -EINTR && err != -ERESTARTSYS)
+				KUNIT_EXPECT_TRUE(test, !err || err == -EINTR ||
+						  err == -ERESTARTSYS);
+
+			if (!err)
+				check_residency(test, bo, import_bo, dmabuf);
 			xe_bo_unlock(import_bo);
 		}
 		drm_gem_object_put(import);
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 8ce1b582402a6..cfde3be3b0dc9 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -49,13 +49,30 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
 {
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct xe_device *xe = xe_bo_device(bo);
+	int ret;
 
 	/*
-	 * Migrate to TT first to increase the chance of non-p2p clients
-	 * can attach.
+	 * For now only support pinning in TT memory, for two reasons:
+	 * 1) Avoid pinning in a placement not accessible to some importers.
+	 * 2) Pinning in VRAM requires PIN accounting which is a to-do.
 	 */
-	(void)xe_bo_migrate(bo, XE_PL_TT);
-	xe_bo_pin_external(bo);
+	if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) {
+		drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n");
+		return -EINVAL;
+	}
+
+	ret = xe_bo_migrate(bo, XE_PL_TT);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			drm_dbg(&xe->drm,
+				"Failed migrating dma-buf to TT memory: %pe\n",
+				ERR_PTR(ret));
+		return ret;
+	}
+
+	ret = xe_bo_pin_external(bo);
+	xe_assert(xe, !ret);
 
 	return 0;
 }
-- 
GitLab


From 0d0534750f9d4575abf0da3b41a78e5643e6c8dd Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 13 Sep 2023 16:14:17 -0700
Subject: [PATCH 1894/2340] drm/xe/wa: Apply tile workarounds at probe/resume

Although the vast majority of workarounds the driver needs to implement
are either GT-based or display-based, there are occasionally workarounds
that reside outside those parts of the hardware (i.e., in they target
registers in the sgunit/soc); we can consider these to be "tile"
workarounds since there will be instance of these registers per tile.
The registers in question should only lose their values during a
function-level reset, so they only need to be applied during probe and
resume; the registers will not be affected by GT/engine resets.

Tile workarounds are rare (there's only one, 22010954014, that's
relevant to Xe at the moment) so it's probably not worth updating the
xe_rtp design to handle tile-level workarounds yet, although we may want
to consider that in the future if/when more of these show up on future
platforms.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: https://lore.kernel.org/r/20230913231411.291933-13-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h  |  3 +++
 drivers/gpu/drm/xe/xe_pm.c         |  5 +++++
 drivers/gpu/drm/xe/xe_tile.c       |  3 +++
 drivers/gpu/drm/xe/xe_wa.c         | 20 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_wa.h         |  2 ++
 drivers/gpu/drm/xe/xe_wa_oob.rules |  1 +
 6 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index be496a3946d83..d62555757d0fa 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -69,6 +69,9 @@
 #define GU_CNTL					XE_REG(0x101010)
 #define   LMEM_INIT				REG_BIT(7)
 
+#define XEHP_CLOCK_GATE_DIS			XE_REG(0x101014)
+#define   SGSI_SIDECLK_DIS			REG_BIT(17)
+
 #define GGC					XE_REG(0x108040)
 #define   GMS_MASK				REG_GENMASK(15, 8)
 #define   GGMS_MASK				REG_GENMASK(7, 6)
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 5e992e62d0fb8..2c2745f862237 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -19,6 +19,7 @@
 #include "xe_guc.h"
 #include "xe_irq.h"
 #include "xe_pcode.h"
+#include "xe_wa.h"
 
 /**
  * DOC: Xe Power Management
@@ -79,10 +80,14 @@ int xe_pm_suspend(struct xe_device *xe)
  */
 int xe_pm_resume(struct xe_device *xe)
 {
+	struct xe_tile *tile;
 	struct xe_gt *gt;
 	u8 id;
 	int err;
 
+	for_each_tile(tile, xe, id)
+		xe_wa_apply_tile_workarounds(tile);
+
 	for_each_gt(gt, xe, id) {
 		err = xe_pcode_init(gt);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index e0bc2b60ab09a..131752a57f652 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -12,6 +12,7 @@
 #include "xe_tile.h"
 #include "xe_tile_sysfs.h"
 #include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
 
 /**
  * DOC: Multi-tile Design
@@ -143,6 +144,8 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
 	if (IS_ERR(tile->mem.kernel_bb_pool))
 		err = PTR_ERR(tile->mem.kernel_bb_pool);
 
+	xe_wa_apply_tile_workarounds(tile);
+
 	xe_tile_sysfs_init(tile);
 
 err_mem_access:
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index f45e9452ba0ee..d84e67a9af075 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -753,3 +753,23 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
 		if (oob_was[idx].name)
 			drm_printf_indent(p, 1, "%s\n", oob_was[idx].name);
 }
+
+/*
+ * Apply tile (non-GT, non-display) workarounds.  Think very carefully before
+ * adding anything to this function; most workarounds should be implemented
+ * elsewhere.  The programming here is primarily for sgunit/soc workarounds,
+ * which are relatively rare.  Since the registers these workarounds target are
+ * outside the GT, they should only need to be applied once at device
+ * probe/resume; they will not lose their values on any kind of GT or engine
+ * reset.
+ *
+ * TODO:  We may want to move this over to xe_rtp in the future once we have
+ * enough workarounds to justify the work.
+ */
+void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	if (XE_WA(mmio, 22010954014))
+		xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index cfe6859895241..1b24d66f9d80e 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -9,12 +9,14 @@
 struct drm_printer;
 struct xe_gt;
 struct xe_hw_engine;
+struct xe_tile;
 
 int xe_wa_init(struct xe_gt *gt);
 void xe_wa_process_oob(struct xe_gt *gt);
 void xe_wa_process_gt(struct xe_gt *gt);
 void xe_wa_process_engine(struct xe_hw_engine *hwe);
 void xe_wa_process_lrc(struct xe_hw_engine *hwe);
+void xe_wa_apply_tile_workarounds(struct xe_tile *tile);
 
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
 void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 599e67169dae6..f3ff774dc4aa6 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -18,3 +18,4 @@
 14016763929	SUBPLATFORM(DG2, G10)
 		SUBPLATFORM(DG2, G12)
 16017236439	PLATFORM(PVC)
+22010954014	PLATFORM(DG2)
-- 
GitLab


From cb90d469183cc8335d646484d66bd3c3643683cc Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Thu, 14 Sep 2023 14:48:02 -0700
Subject: [PATCH 1895/2340] drm/xe: Add child contexts to the GuC context
 lookup

The CAT_ERROR message from the GuC provides the guc id of the context
that caused the problem, which can be a child context. We therefore
need to be able to match that id to the exec_queue that owns it, which
we do by adding child context to the context lookup.

While at it, fix the error path of the guc id allocation code to
correctly free the ids allocated for parallel queues.

v2: rebase on s/XE_WARN_ON/xe_assert

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/590
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 44 ++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index fd120d8c0af2a..3e136b60196ef 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -247,10 +247,28 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	return 0;
 }
 
+static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
+{
+	int i;
+
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	for (i = 0; i < xa_count; ++i)
+		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
+
+	if (xe_exec_queue_is_parallel(q))
+		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+				      q->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(q->width));
+	else
+		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+}
+
 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	int ret;
 	void *ptr;
+	int i;
 
 	/*
 	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
@@ -277,30 +295,27 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 	if (xe_exec_queue_is_parallel(q))
 		q->guc->id += GUC_ID_START_MLRC;
 
-	ptr = xa_store(&guc->submission_state.exec_queue_lookup,
-		       q->guc->id, q, GFP_NOWAIT);
-	if (IS_ERR(ptr)) {
-		ret = PTR_ERR(ptr);
-		goto err_release;
+	for (i = 0; i < q->width; ++i) {
+		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
+			       q->guc->id + i, q, GFP_NOWAIT);
+		if (IS_ERR(ptr)) {
+			ret = PTR_ERR(ptr);
+			goto err_release;
+		}
 	}
 
 	return 0;
 
 err_release:
-	ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+	__release_guc_id(guc, q, i);
+
 	return ret;
 }
 
 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	mutex_lock(&guc->submission_state.lock);
-	xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id);
-	if (xe_exec_queue_is_parallel(q))
-		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
-				      q->guc->id - GUC_ID_START_MLRC,
-				      order_base_2(q->width));
-	else
-		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+	__release_guc_id(guc, q, q->width);
 	mutex_unlock(&guc->submission_state.lock);
 }
 
@@ -1489,7 +1504,8 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
 		return NULL;
 	}
 
-	xe_assert(xe, q->guc->id == guc_id);
+	xe_assert(xe, guc_id >= q->guc->id);
+	xe_assert(xe, guc_id < (q->guc->id + q->width));
 
 	return q;
 }
-- 
GitLab


From 8f965392c4d915195307979640295189eec94df4 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 14 Sep 2023 17:25:14 +0530
Subject: [PATCH 1896/2340] drm/xe: Add drm-client infrastructure

Add drm-client infrastructure to record stats of consumption
done by individual drm client.

V2:
  - Typo - CI

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  1 +
 drivers/gpu/drm/xe/xe_device.c       | 15 +++++++-
 drivers/gpu/drm/xe/xe_device_types.h |  6 ++++
 drivers/gpu/drm/xe/xe_drm_client.c   | 52 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_drm_client.h   | 43 +++++++++++++++++++++++
 5 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/xe/xe_drm_client.c
 create mode 100644 drivers/gpu/drm/xe/xe_drm_client.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index d3b97bc11af7d..9be0848ea370a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -54,6 +54,7 @@ xe-y += xe_bb.o \
 	xe_device.o \
 	xe_device_sysfs.o \
 	xe_dma_buf.o \
+	xe_drm_client.o \
 	xe_exec.o \
 	xe_execlist.o \
 	xe_exec_queue.o \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 89bf926bc0f35..612dfc92e948a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -17,6 +17,7 @@
 #include "xe_bo.h"
 #include "xe_debugfs.h"
 #include "xe_dma_buf.h"
+#include "xe_drm_client.h"
 #include "xe_drv.h"
 #include "xe_exec_queue.h"
 #include "xe_exec.h"
@@ -42,13 +43,24 @@ struct lockdep_map xe_device_mem_access_lockdep_map = {
 
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_drm_client *client;
 	struct xe_file *xef;
+	int ret = -ENOMEM;
 
 	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
 	if (!xef)
-		return -ENOMEM;
+		return ret;
+
+	client = xe_drm_client_alloc();
+	if (!client) {
+		kfree(xef);
+		return ret;
+	}
 
 	xef->drm = file;
+	xef->client = client;
+	xef->xe = xe;
 
 	mutex_init(&xef->vm.lock);
 	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
@@ -88,6 +100,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 	xa_destroy(&xef->vm.xa);
 	mutex_destroy(&xef->vm.lock);
 
+	xe_drm_client_put(xef->client);
 	kfree(xef);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 496d7f3fb897e..d748d71bb536b 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -356,6 +356,9 @@ struct xe_device {
  * struct xe_file - file handle for XE driver
  */
 struct xe_file {
+	/** @xe: xe DEVICE **/
+	struct xe_device *xe;
+
 	/** @drm: base DRM file */
 	struct drm_file *drm;
 
@@ -374,6 +377,9 @@ struct xe_file {
 		/** @lock: protects file engine state */
 		struct mutex lock;
 	} exec_queue;
+
+	/** @client: drm client */
+	struct xe_drm_client *client;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
new file mode 100644
index 0000000000000..1558ca4e0eb72
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "xe_device_types.h"
+#include "xe_drm_client.h"
+
+/**
+ * xe_drm_client_alloc() - Allocate drm client
+ * @void: No arg
+ *
+ * Allocate drm client struct to track client memory against
+ * same till client life. Call this API whenever new client
+ * has opened xe device.
+ *
+ * Return: pointer to client struct or NULL if can't allocate
+ */
+struct xe_drm_client *xe_drm_client_alloc(void)
+{
+	struct xe_drm_client *client;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return NULL;
+
+	kref_init(&client->kref);
+
+	return client;
+}
+
+/**
+ * __xe_drm_client_free() - Free client struct
+ * @kref: The reference
+ *
+ * This frees client struct. Call this API when xe device is closed
+ * by drm client.
+ *
+ * Return: void
+ */
+void __xe_drm_client_free(struct kref *kref)
+{
+	struct xe_drm_client *client =
+		container_of(kref, typeof(*client), kref);
+
+	kfree(client);
+}
diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
new file mode 100644
index 0000000000000..be097cdf5d12f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_client.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DRM_CLIENT_H_
+#define _XE_DRM_CLIENT_H_
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/pid.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+struct drm_file;
+struct drm_printer;
+
+struct xe_drm_client {
+	struct kref kref;
+	unsigned int id;
+};
+
+	static inline struct xe_drm_client *
+xe_drm_client_get(struct xe_drm_client *client)
+{
+	kref_get(&client->kref);
+	return client;
+}
+
+void __xe_drm_client_free(struct kref *kref);
+
+static inline void xe_drm_client_put(struct xe_drm_client *client)
+{
+	kref_put(&client->kref, __xe_drm_client_free);
+}
+
+struct xe_drm_client *xe_drm_client_alloc(void);
+static inline struct xe_drm_client *
+xe_drm_client_get(struct xe_drm_client *client);
+static inline void xe_drm_client_put(struct xe_drm_client *client);
+
+#endif
-- 
GitLab


From 85c6ad1aa263a852d51d980575e7c1c305f1697e Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 14 Sep 2023 15:38:47 +0530
Subject: [PATCH 1897/2340] drm/xe: Interface xe drm client with fdinfo
 interface

DRM core driver has introduced recently fdinfo interface to
show memory stats of individual drm client. Lets interface
xe drm client to fdinfo interface.

V2:
  - cover call to xe_drm_client_fdinfo under PROC_FS

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c     |  6 ++++++
 drivers/gpu/drm/xe/xe_drm_client.c | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_drm_client.h |  4 +++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 612dfc92e948a..67ec55810ca35 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -137,6 +137,9 @@ static const struct file_operations xe_driver_fops = {
 	.read = drm_read,
 	.compat_ioctl = drm_compat_ioctl,
 	.llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = drm_show_fdinfo,
+#endif
 };
 
 static void xe_driver_release(struct drm_device *dev)
@@ -161,6 +164,9 @@ static struct drm_driver driver = {
 
 	.dumb_create = xe_bo_dumb_create,
 	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = xe_drm_client_fdinfo,
+#endif
 	.release = &xe_driver_release,
 
 	.ioctls = xe_ioctls,
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 1558ca4e0eb72..98c8a0cf2f565 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -50,3 +50,20 @@ void __xe_drm_client_free(struct kref *kref)
 
 	kfree(client);
 }
+
+#ifdef CONFIG_PROC_FS
+/**
+ * xe_drm_client_fdinfo() - Callback for fdinfo interface
+ * @p: The drm_printer ptr
+ * @file: The drm_file ptr
+ *
+ * This is callabck for drm fdinfo interface. Register this callback
+ * in drm driver ops for show_fdinfo.
+ *
+ * Return: void
+ */
+void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	/* show_meminfo() will be developed here */
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
index be097cdf5d12f..dbe3a083c9df8 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.h
+++ b/drivers/gpu/drm/xe/xe_drm_client.h
@@ -39,5 +39,7 @@ struct xe_drm_client *xe_drm_client_alloc(void);
 static inline struct xe_drm_client *
 xe_drm_client_get(struct xe_drm_client *client);
 static inline void xe_drm_client_put(struct xe_drm_client *client);
-
+#ifdef CONFIG_PROC_FS
+void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
+#endif
 #endif
-- 
GitLab


From b27970f3e11c616c7a5121537502f6e21a460881 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 21 Sep 2023 17:11:34 +0530
Subject: [PATCH 1898/2340] drm/xe: Add tracking support for bos per client

In order to show per client memory consumption, we
need tracking support APIs to add at every bo consumption
and removal. Adding APIs here to add tracking calls at
places wherever it is applicable.

V5:
  - Rebase
V4:
  - remove client bo before vm_put
  - spin_lock_irqsave not required - Auld
V3:
  - update .h to return xe_drm_client_remove_bo void
  - protect xe_drm_client_remove_bo under CONFIG_PROC_FS check - Himal
  - Fixed Checkpatch error - CI
V2:
  - make xe_drm_client_remove_bo return void - Himal

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c         |  9 ++++++
 drivers/gpu/drm/xe/xe_bo_types.h   | 10 +++++++
 drivers/gpu/drm/xe/xe_drm_client.c | 48 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_drm_client.h | 25 ++++++++++++++++
 4 files changed, 92 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e812f2b7d5b9c..eb08a89547422 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -16,6 +16,7 @@
 
 #include "xe_device.h"
 #include "xe_dma_buf.h"
+#include "xe_drm_client.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_map.h"
@@ -1054,6 +1055,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 	if (bo->ggtt_node.size)
 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
 
+#ifdef CONFIG_PROC_FS
+	if (bo->client)
+		xe_drm_client_remove_bo(bo);
+#endif
+
 	if (bo->vm && xe_bo_is_user(bo))
 		xe_vm_put(bo->vm);
 
@@ -1233,6 +1239,9 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
 	bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL;
 	INIT_LIST_HEAD(&bo->pinned_link);
+#ifdef CONFIG_PROC_FS
+	INIT_LIST_HEAD(&bo->client_link);
+#endif
 
 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
 
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 27fe72129ee69..c628625c8a899 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -43,6 +43,16 @@ struct xe_bo {
 	struct ttm_bo_kmap_obj kmap;
 	/** @pinned_link: link to present / evicted list of pinned BO */
 	struct list_head pinned_link;
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @client: @xe_drm_client which created the bo
+	 */
+	struct xe_drm_client *client;
+	/**
+	 * @client_link: Link into @xe_drm_client.objects_list
+	 */
+	struct list_head client_link;
+#endif
 	/** @props: BO user controlled properties */
 	struct {
 		/** @preferred_mem: preferred memory class for this BO */
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 98c8a0cf2f565..b5ac9bc1f685a 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -8,8 +8,10 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include "xe_bo_types.h"
 #include "xe_device_types.h"
 #include "xe_drm_client.h"
+#include "xe_trace.h"
 
 /**
  * xe_drm_client_alloc() - Allocate drm client
@@ -31,6 +33,10 @@ struct xe_drm_client *xe_drm_client_alloc(void)
 
 	kref_init(&client->kref);
 
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&client->bos_lock);
+	INIT_LIST_HEAD(&client->bos_list);
+#endif
 	return client;
 }
 
@@ -52,6 +58,48 @@ void __xe_drm_client_free(struct kref *kref)
 }
 
 #ifdef CONFIG_PROC_FS
+/**
+ * xe_drm_client_add_bo() - Add BO for tracking client mem usage
+ * @client: The drm client ptr
+ * @bo: The xe BO ptr
+ *
+ * Add all BO created by individual drm client by calling this function.
+ * This helps in tracking client memory usage.
+ *
+ * Return: void
+ */
+void xe_drm_client_add_bo(struct xe_drm_client *client,
+			  struct xe_bo *bo)
+{
+	XE_WARN_ON(bo->client);
+	XE_WARN_ON(!list_empty(&bo->client_link));
+
+	spin_lock(&client->bos_lock);
+	bo->client = xe_drm_client_get(client);
+	list_add_tail_rcu(&bo->client_link, &client->bos_list);
+	spin_unlock(&client->bos_lock);
+}
+
+/**
+ * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage
+ * @bo: The xe BO ptr
+ *
+ * Remove all BO removed by individual drm client by calling this function.
+ * This helps in tracking client memory usage.
+ *
+ * Return: void
+ */
+void xe_drm_client_remove_bo(struct xe_bo *bo)
+{
+	struct xe_drm_client *client = bo->client;
+
+	spin_lock(&client->bos_lock);
+	list_del_rcu(&bo->client_link);
+	spin_unlock(&client->bos_lock);
+
+	xe_drm_client_put(client);
+}
+
 /**
  * xe_drm_client_fdinfo() - Callback for fdinfo interface
  * @p: The drm_printer ptr
diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
index dbe3a083c9df8..a9649aa360110 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.h
+++ b/drivers/gpu/drm/xe/xe_drm_client.h
@@ -15,10 +15,23 @@
 
 struct drm_file;
 struct drm_printer;
+struct xe_bo;
 
 struct xe_drm_client {
 	struct kref kref;
 	unsigned int id;
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @bos_lock: lock protecting @bos_list
+	 */
+	spinlock_t bos_lock;
+	/**
+	 * @bos_list: list of bos created by this client
+	 *
+	 * Protected by @bos_lock.
+	 */
+	struct list_head bos_list;
+#endif
 };
 
 	static inline struct xe_drm_client *
@@ -41,5 +54,17 @@ xe_drm_client_get(struct xe_drm_client *client);
 static inline void xe_drm_client_put(struct xe_drm_client *client);
 #ifdef CONFIG_PROC_FS
 void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
+void xe_drm_client_add_bo(struct xe_drm_client *client,
+			  struct xe_bo *bo);
+void xe_drm_client_remove_bo(struct xe_bo *bo);
+#else
+static inline void xe_drm_client_add_bo(struct xe_drm_client *client,
+					struct xe_bo *bo)
+{
+}
+
+static inline void xe_drm_client_remove_bo(struct xe_bo *bo)
+{
+}
 #endif
 #endif
-- 
GitLab


From 9e4e9761e64ea1086629852d30c08307538154ec Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 10 Aug 2023 11:33:24 +0530
Subject: [PATCH 1899/2340] drm/xe: Record each drm client with its VM

Enable accounting of indirect client memory usage.

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 1 +
 drivers/gpu/drm/xe/xe_vm_types.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 53add99d4186f..92d682ee60306 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2058,6 +2058,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	}
 
 	args->vm_id = id;
+	vm->xef = xef;
 
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
 	/* Warning: Security issue - never enable by default */
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index dfbc53e56a869..b0f183d00416b 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -329,6 +329,8 @@ struct xe_vm {
 
 	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
 	bool batch_invalidate_tlb;
+	/** @xef: XE file handle for tracking this VM's drm client */
+	struct xe_file *xef;
 };
 
 /** struct xe_vma_op_map - VMA map operation */
-- 
GitLab


From 2ff00c4f77ab68e04f381c721117f98fb3228a11 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 14 Sep 2023 15:25:16 +0530
Subject: [PATCH 1900/2340] drm/xe: Track page table memory usage for client

Account page table memory usage in the owning client
memory usage stats.

V2:
  - Minor tweak to if (vm->pt_root[id]) check - Himal

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 3 +++
 drivers/gpu/drm/xe/xe_vm.c | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 21a3dfe99e05a..b3e23d8ed8eef 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -7,6 +7,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_drm_client.h"
 #include "xe_gt.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_migrate.h"
@@ -196,6 +197,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 	pt->level = level;
 	pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
 
+	if (vm->xef)
+		xe_drm_client_add_bo(vm->xef->client, pt->bo);
 	xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL);
 
 	return pt;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 92d682ee60306..fac722074004b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -19,6 +19,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_drm_client.h"
 #include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
@@ -1981,6 +1982,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	struct xe_device *xe = to_xe_device(dev);
 	struct xe_file *xef = to_xe_file(file);
 	struct drm_xe_vm_create *args = data;
+	struct xe_tile *tile;
 	struct xe_vm *vm;
 	u32 id, asid;
 	int err;
@@ -2060,6 +2062,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	args->vm_id = id;
 	vm->xef = xef;
 
+	/* Record BO memory for VM pagetable created against client */
+	for_each_tile(tile, xe, id)
+		if (vm->pt_root[id])
+			xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
+
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
 	/* Warning: Security issue - never enable by default */
 	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
-- 
GitLab


From 303fb1165765e1629e2a82bd1ebbea676c86b33e Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Tue, 29 Aug 2023 10:52:23 +0530
Subject: [PATCH 1901/2340] drm/xe: Account ring buffer and context state
 storage

Account ring buffers and logical context space against the owning client
memory usage stats.

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_lrc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 1410dcab3d90e..35ae6e531d8a5 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -12,6 +12,7 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_drm_client.h"
 #include "xe_exec_queue_types.h"
 #include "xe_gt.h"
 #include "xe_hw_fence.h"
@@ -739,9 +740,13 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		kfree(init_data);
 	}
 
-	if (vm)
+	if (vm) {
 		xe_lrc_set_ppgtt(lrc, vm);
 
+		if (vm->xef)
+			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
+	}
+
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
-- 
GitLab


From 0845233388f8a26d00acf9bf230cfd4f36aa4c30 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Fri, 15 Sep 2023 23:39:01 +0530
Subject: [PATCH 1902/2340] drm/xe: Implement fdinfo memory stats printing

Use the newly added drm_print_memory_stats helper to show memory
utilisation of our objects in drm/driver specific fdinfo output.

To collect the stats we walk the per memory regions object lists
and accumulate object size into the respective drm_memory_stats
categories.

Objects with multiple possible placements are reported in multiple
regions for total and shared sizes, while other categories are
counted only for the currently active region.

V4:
  - Remove rcu lock - Auld/Thomas
  - take refcnt only if its non-zero - Auld
  - DMA_RESV_USAGE_BOOKKEEP covers all fences - Auld
  - covert to xe_bo for public objects
V3:
  - dont use xe_bo_get/put, not needed
  - use designated initializer - Jani
  - use list_for_each_entry_rcu
  - Fix Checkpatch err - CI
V2:
  - Use static initializer for mem_type - Himal/Jani

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h         | 11 ++++
 drivers/gpu/drm/xe/xe_drm_client.c | 89 +++++++++++++++++++++++++++++-
 2 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index d22b2ae7db729..23f1b9e74e710 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -6,6 +6,8 @@
 #ifndef _XE_BO_H_
 #define _XE_BO_H_
 
+#include <drm/ttm/ttm_tt.h>
+
 #include "xe_bo_types.h"
 #include "xe_macros.h"
 #include "xe_vm_types.h"
@@ -247,6 +249,15 @@ static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
 	return PAGE_ALIGN(bo->ttm.base.size);
 }
 
+static inline bool xe_bo_has_pages(struct xe_bo *bo)
+{
+	if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) ||
+	    xe_bo_is_vram(bo))
+		return true;
+
+	return false;
+}
+
 void __xe_bo_release_dummy(struct kref *kref);
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index b5ac9bc1f685a..82d1305e831f2 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -4,10 +4,12 @@
  */
 
 #include <drm/drm_print.h>
+#include <drm/xe_drm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include "xe_bo.h"
 #include "xe_bo_types.h"
 #include "xe_device_types.h"
 #include "xe_drm_client.h"
@@ -100,6 +102,91 @@ void xe_drm_client_remove_bo(struct xe_bo *bo)
 	xe_drm_client_put(client);
 }
 
+static void bo_meminfo(struct xe_bo *bo,
+		       struct drm_memory_stats stats[TTM_NUM_MEM_TYPES])
+{
+	u64 sz = bo->size;
+	u32 mem_type;
+
+	if (bo->placement.placement)
+		mem_type = bo->placement.placement->mem_type;
+	else
+		mem_type = XE_PL_TT;
+
+	if (bo->ttm.base.handle_count > 1)
+		stats[mem_type].shared += sz;
+	else
+		stats[mem_type].private += sz;
+
+	if (xe_bo_has_pages(bo)) {
+		stats[mem_type].resident += sz;
+
+		if (!dma_resv_test_signaled(bo->ttm.base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP))
+			stats[mem_type].active += sz;
+		else if (mem_type == XE_PL_SYSTEM)
+			stats[mem_type].purgeable += sz;
+	}
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+	static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
+		[XE_PL_SYSTEM] = "system",
+		[XE_PL_TT] = "gtt",
+		[XE_PL_VRAM0] = "vram0",
+		[XE_PL_VRAM1] = "vram1",
+		[4 ... 6] = NULL,
+		[XE_PL_STOLEN] = "stolen"
+	};
+	struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {};
+	struct xe_file *xef = file->driver_priv;
+	struct ttm_device *bdev = &xef->xe->ttm;
+	struct ttm_resource_manager *man;
+	struct xe_drm_client *client;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+	unsigned int id;
+	u32 mem_type;
+
+	client = xef->client;
+
+	/* Public objects. */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, obj, id) {
+		struct xe_bo *bo = gem_to_xe_bo(obj);
+
+		bo_meminfo(bo, stats);
+	}
+	spin_unlock(&file->table_lock);
+
+	/* Internal objects. */
+	spin_lock(&client->bos_lock);
+	list_for_each_entry_rcu(bo, &client->bos_list, client_link) {
+		if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount))
+			continue;
+		bo_meminfo(bo, stats);
+		xe_bo_put(bo);
+	}
+	spin_unlock(&client->bos_lock);
+
+	for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
+		if (!mem_type_to_name[mem_type])
+			continue;
+
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			drm_print_memory_stats(p,
+					       &stats[mem_type],
+					       DRM_GEM_OBJECT_RESIDENT |
+					       (mem_type != XE_PL_SYSTEM ? 0 :
+					       DRM_GEM_OBJECT_PURGEABLE),
+					       mem_type_to_name[mem_type]);
+		}
+	}
+}
+
 /**
  * xe_drm_client_fdinfo() - Callback for fdinfo interface
  * @p: The drm_printer ptr
@@ -112,6 +199,6 @@ void xe_drm_client_remove_bo(struct xe_bo *bo)
  */
 void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 {
-	/* show_meminfo() will be developed here */
+	show_meminfo(p, file);
 }
 #endif
-- 
GitLab


From dbac286d8529d6debc0f56fa9a3ea26f78826997 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 18 Sep 2023 14:48:46 +0300
Subject: [PATCH 1903/2340] drm/xe: proper setting of irq enabled flag

IRQ enabled flag should be set only after request irq succeeds.

Reviewed-by: Ohad Sharabi <osharabi@habana.ai>
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index dec3d518b3fc4..e1126eccb50e6 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -579,16 +579,14 @@ int xe_irq_install(struct xe_device *xe)
 		return -EINVAL;
 	}
 
-	xe->irq.enabled = true;
-
 	xe_irq_reset(xe);
 
 	err = request_irq(irq, irq_handler,
 			  IRQF_SHARED, DRIVER_NAME, xe);
-	if (err < 0) {
-		xe->irq.enabled = false;
+	if (err < 0)
 		return err;
-	}
+
+	xe->irq.enabled = true;
 
 	xe_irq_postinstall(xe);
 
-- 
GitLab


From 14d25d8d684d0196d160653659c5afbf5af777f0 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 18 Sep 2023 14:48:47 +0300
Subject: [PATCH 1904/2340] drm/xe: change old msi irq api to a new one

As a preparation for msix support, changing for new msi irq api
which supports both msi and msix.

Reviewed-by: Ohad Sharabi <osharabi@habana.ai>
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rebase fixes by Rodrigo]
---
 drivers/gpu/drm/xe/xe_irq.c | 38 ++++++++++++++++++++++++++++---------
 drivers/gpu/drm/xe/xe_pci.c |  3 ---
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index e1126eccb50e6..82ddf9d84a56f 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -555,23 +555,24 @@ static void irq_uninstall(struct drm_device *drm, void *arg)
 {
 	struct xe_device *xe = arg;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	int irq = pdev->irq;
+	int irq;
 
 	if (!xe->irq.enabled)
 		return;
 
 	xe->irq.enabled = false;
 	xe_irq_reset(xe);
+
+	irq = pci_irq_vector(pdev, 0);
 	free_irq(irq, xe);
-	if (pdev->msi_enabled)
-		pci_disable_msi(pdev);
+	pci_free_irq_vectors(pdev);
 }
 
 int xe_irq_install(struct xe_device *xe)
 {
-	int irq = to_pci_dev(xe->drm.dev)->irq;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	irq_handler_t irq_handler;
-	int err;
+	int err, irq;
 
 	irq_handler = xe_irq_handler(xe);
 	if (!irq_handler) {
@@ -581,16 +582,35 @@ int xe_irq_install(struct xe_device *xe)
 
 	xe_irq_reset(xe);
 
-	err = request_irq(irq, irq_handler,
-			  IRQF_SHARED, DRIVER_NAME, xe);
-	if (err < 0)
+	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	if (err < 0) {
+		drm_err(&xe->drm, "MSI: Failed to enable support %d\n", err);
 		return err;
+	}
+
+	irq = pci_irq_vector(pdev, 0);
+	err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
+	if (err < 0) {
+		drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err);
+		goto free_pci_irq_vectors;
+	}
 
 	xe->irq.enabled = true;
 
 	xe_irq_postinstall(xe);
 
-	return drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	if (err)
+		goto free_irq_handler;
+
+	return 0;
+
+free_irq_handler:
+	free_irq(irq, xe);
+free_pci_irq_vectors:
+	pci_free_irq_vectors(pdev);
+
+	return err;
 }
 
 void xe_irq_shutdown(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b72d9f568768b..7e018ee9db406 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -670,9 +670,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
-	if (pci_enable_msi(pdev) < 0)
-		drm_dbg(&xe->drm, "can't enable MSI");
-
 	err = xe_info_init(xe, desc, subplatform_desc);
 	if (err)
 		goto err_pci_disable;
-- 
GitLab


From bc18dae50f165bc1c18284fe59d77dd00617b530 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 18 Sep 2023 14:48:48 +0300
Subject: [PATCH 1905/2340] drm/xe: add msix support

In future devices we will need to support msix interrupts.

Reviewed-by: Ohad Sharabi <osharabi@habana.ai>
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 82ddf9d84a56f..a91e782e06ebf 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -582,16 +582,16 @@ int xe_irq_install(struct xe_device *xe)
 
 	xe_irq_reset(xe);
 
-	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
 	if (err < 0) {
-		drm_err(&xe->drm, "MSI: Failed to enable support %d\n", err);
+		drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
 		return err;
 	}
 
 	irq = pci_irq_vector(pdev, 0);
 	err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
 	if (err < 0) {
-		drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err);
+		drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
 		goto free_pci_irq_vectors;
 	}
 
-- 
GitLab


From babba646785d6855cba64fb0480beb8d3421cc52 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 22 Sep 2023 10:43:20 -0700
Subject: [PATCH 1906/2340] drm/xe: Accept a const xe device

Depending on the context, it's preferred to have a const pointer to make
sure nothing is modified underneath. The assert macros only ever read
data from xe/tile/gt for printing, so they can be made const by default.

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20230922174320.2372617-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_assert.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index 962aac1bc7648..34c142e6cfb09 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -86,7 +86,7 @@
 })
 #else
 #define __xe_assert_msg(xe, condition, msg, arg...) ({						\
-	typecheck(struct xe_device *, xe);							\
+	typecheck(const struct xe_device *, xe);						\
 	BUILD_BUG_ON_INVALID(condition);							\
 })
 #endif
@@ -107,7 +107,7 @@
  */
 #define xe_assert(xe, condition) xe_assert_msg((xe), condition, "")
 #define xe_assert_msg(xe, condition, msg, arg...) ({						\
-	struct xe_device *__xe = (xe);								\
+	const struct xe_device *__xe = (xe);							\
 	__xe_assert_msg(__xe, condition,							\
 			"platform: %d subplatform: %d\n"					\
 			"graphics: %s %u.%02u step %s\n"					\
@@ -142,7 +142,7 @@
  */
 #define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "")
 #define xe_tile_assert_msg(tile, condition, msg, arg...) ({					\
-	struct xe_tile *__tile = (tile);							\
+	const struct xe_tile *__tile = (tile);							\
 	char __buf[10] __maybe_unused;								\
 	xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg,			\
 		      __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1,	\
@@ -166,7 +166,7 @@
  */
 #define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "")
 #define xe_gt_assert_msg(gt, condition, msg, arg...) ({						\
-	struct xe_gt *__gt = (gt);								\
+	const struct xe_gt *__gt = (gt);							\
 	xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg,			\
 			   __gt->info.id, __gt->info.type, ## arg);				\
 })
-- 
GitLab


From 02cadbb5d123204ce193672007868d18db762172 Mon Sep 17 00:00:00 2001
From: Pallavi Mishra <pallavi.mishra@intel.com>
Date: Thu, 21 Sep 2023 03:02:59 +0530
Subject: [PATCH 1907/2340] drm/xe: Align size to PAGE_SIZE

Ensure alignment with PAGE_SIZE for the size parameter
passed to __xe_bo_create_locked()

v2: move size alignment under else condition (Lucas)

Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230920213259.3458968-1-pallavi.mishra@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index eb08a89547422..1a10d9324a07e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1227,6 +1227,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		flags |= XE_BO_INTERNAL_64K;
 		alignment = SZ_64K >> PAGE_SHIFT;
 	} else {
+		size = ALIGN(size, PAGE_SIZE);
 		alignment = SZ_4K >> PAGE_SHIFT;
 	}
 
-- 
GitLab


From 5fdd4b21aed8a33fd8e8f8fb3dc2f0c8f659918b Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Mon, 25 Sep 2023 21:35:43 +0530
Subject: [PATCH 1908/2340] drm/xe: Add Wa_18028616096

Drop UGM per set fragment threshold to 3

BSpec: 54833
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230925160543.915217-1-shekhar.chauhan@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 +
 drivers/gpu/drm/xe/xe_wa.c           | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index e13fbbdf6929e..3a4c9bcf793ff 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -347,6 +347,7 @@
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
 
 #define LSC_CHICKEN_BIT_0_UDW			XE_REG_MCR(0xe7c8 + 4)
+#define   UGM_FRAGMENT_THRESHOLD_TO_3		REG_BIT(58 - 32)
 #define   DIS_CHAIN_2XSIMD8			REG_BIT(55 - 32)
 #define   FORCE_SLM_FENCE_SCOPE_TO_TILE		REG_BIT(42 - 32)
 #define   FORCE_UGM_FENCE_SCOPE_TO_TILE		REG_BIT(41 - 32)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d84e67a9af075..1450af6cab344 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -374,6 +374,16 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2,
 			     PERF_FIX_BALANCING_CFE_DISABLE))
 	},
+	{ XE_RTP_NAME("18028616096"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
+	},
+	{ XE_RTP_NAME("18028616096"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G12),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
+	},
 	{ XE_RTP_NAME("16011620976, 22015475538"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
-- 
GitLab


From 51a5d656090e0a865d91f1e6ce0c7a09d71a4b70 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Tue, 26 Sep 2023 19:19:15 -0300
Subject: [PATCH 1909/2340] drm/xe/irq: Clear GFX_MSTR_IRQ as part of IRQ reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting with Xe_LP+, GFX_MSTR_IRQ contains status bits that have W1C
behavior. If we do not properly reset them, we would miss delivery of
interrupts if a pending bit is set when enabling IRQs.

As an example, the display part of our probe routine contains paths
where we wait for vblank interrupts. If a display interrupt was already
pending when enabling IRQs, we would time out waiting for the vblank.

That in fact happened recently when modprobing Xe on a Lunar Lake with a
specific configuration; and that's how we found out we were missing this
step in the IRQ enabling logic.

Fix the issue by clearing GFX_MSTR_IRQ as part of the IRQ reset.

v2:
  - Make resetting GFX_MSTR_IRQ be the last step to avoid bit
    re-latching. (Ville)
v3:
  - Swap nesting order: guard loop with the IP version check instead of
    doing the check at each iteration. (Lucas)
v4:
  - Add braces for the "if" statement guarding the loop to make the
    compiler happy. (Gustavo)

BSpec: 50875, 54028, 62357
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230926221914.106843-2-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index a91e782e06ebf..4cc5f7086b4ce 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -511,6 +511,13 @@ static void dg1_irq_reset(struct xe_tile *tile)
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
+static void dg1_irq_reset_mstr(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
+}
+
 static void xe_irq_reset(struct xe_device *xe)
 {
 	struct xe_tile *tile;
@@ -525,6 +532,16 @@ static void xe_irq_reset(struct xe_device *xe)
 
 	tile = xe_device_get_root_tile(xe);
 	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
+
+	/*
+	 * The tile's top-level status register should be the last one
+	 * to be reset to avoid possible bit re-latching from lower
+	 * level interrupts.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1210) {
+		for_each_tile(tile, xe, id)
+			dg1_irq_reset_mstr(tile);
+	}
 }
 
 static void xe_irq_postinstall(struct xe_device *xe)
-- 
GitLab


From 1951dad5347e8b618f545d2c14f8d2816be61b1f Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 27 Sep 2023 13:51:44 -0700
Subject: [PATCH 1910/2340] drm/xe: Infer service copy functionality from
 engine list

On platforms with multiple BCS engines (i.e., PVC and Xe2), not all BCS
engines are created equal.  The BCS0 engine is what the specs refer to
as a "resource copy engine," which supports the platform's full set of
copy/fill instructions.  In contast, the non-BCS0 "service copy" engines
are more streamlined and only support a subset of the GPU instructions
supported by the resource copy engine.  Platforms with both types of
copy engines always support the MEM_COPY and MEM_SET instructions which
can be used for simple copy and fill operations on either type of BCS
engine.  Since the simple MEM_SET instruction meets the needs of Xe's
migrate code (and since the more elaborate XY_FAST_COLOR_BLT instruction
isn't available to use on service copy engines), we always prefer to use
MEM_SET for clearing buffers on our newer platforms.

We've been using a 'has_link_copy_engine' feature flag to keep track of
which platforms should use MEM_SET for fills.  However a feature flag
like this is unnecessary since we can already derive the presence of
service copy engines (and in turn the MEM_SET instruction) just by
looking at the platform's pre-fusing engine list.  Utilizing the engine
list for this also avoids mistakes like we've made on Xe2 where we
forget to set the feature flag in the IP definition.

For clarity, "service copy" is a general term that covers any blitter
engines that support a limited subset of the overall blitter instruction
set (in practice this is any non-BCS0 blitter engine).  The "link copy
engines" introduced on PVC and the "paging copy engine" present in Xe2
are both instances of service copy engines.

v2:
 - Rewrite / expand the commit message.  (Bala)
 - Fix checkpatch whitespace error.

Bspec: 65019
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Link: https://lore.kernel.org/r/20230927205143.2695089-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  2 --
 drivers/gpu/drm/xe/xe_migrate.c      | 34 +++++++++++++++++-----------
 drivers/gpu/drm/xe/xe_pci.c          |  2 --
 drivers/gpu/drm/xe/xe_pci_types.h    |  1 -
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index d748d71bb536b..aa9935ff6d84e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -228,8 +228,6 @@ struct xe_device {
 		u8 has_llc:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
-		/** @has_link_copy_engines: Whether the platform has link copy engines */
-		u8 has_link_copy_engine:1;
 	} info;
 
 	/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 713f98baf0ee7..2d31e6ff72ea4 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -854,28 +854,36 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
 	bb->len += len;
 }
 
-static u32 emit_clear_cmd_len(struct xe_device *xe)
+static bool has_service_copy_support(struct xe_gt *gt)
 {
-	if (xe->info.has_link_copy_engine)
+	/*
+	 * What we care about is whether the architecture was designed with
+	 * service copy functionality (specifically the new MEM_SET / MEM_COPY
+	 * instructions) so check the architectural engine list rather than the
+	 * actual list since these instructions are usable on BCS0 even if
+	 * all of the actual service copy engines (BCS1-BCS8) have been fused
+	 * off.
+	 */
+	return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+						XE_HW_ENGINE_BCS1);
+}
+
+static u32 emit_clear_cmd_len(struct xe_gt *gt)
+{
+	if (has_service_copy_support(gt))
 		return PVC_MEM_SET_CMD_LEN_DW;
 	else
 		return XY_FAST_COLOR_BLT_DW;
 }
 
-static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
-		      u32 size, u32 pitch, bool is_vram)
+static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+		       u32 size, u32 pitch, bool is_vram)
 {
-	struct xe_device *xe = gt_to_xe(gt);
-
-	if (xe->info.has_link_copy_engine) {
+	if (has_service_copy_support(gt))
 		emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
-
-	} else {
+	else
 		emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
 				     is_vram);
-	}
-
-	return 0;
 }
 
 /**
@@ -928,7 +936,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		batch_size = 2 +
 			pte_update_size(m, clear_vram, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
-					emit_clear_cmd_len(xe), 0,
+					emit_clear_cmd_len(gt), 0,
 					NUM_PT_PER_BLIT);
 		if (xe_device_has_flat_ccs(xe) && clear_vram)
 			batch_size += EMIT_COPY_CCS_DW;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 7e018ee9db406..fd8d7eddd6f6d 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -132,7 +132,6 @@ static const struct xe_graphics_desc graphics_xehpc = {
 
 	.has_asid = 1,
 	.has_flat_ccs = 0,
-	.has_link_copy_engine = 1,
 	.supports_usm = 1,
 };
 
@@ -556,7 +555,6 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.has_asid = graphics_desc->has_asid;
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
-	xe->info.has_link_copy_engine = graphics_desc->has_link_copy_engine;
 
 	/*
 	 * All platforms have at least one primary GT.  Any platform with media
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index df6ddbc52d7f6..bd0b0d87413e6 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -24,7 +24,6 @@ struct xe_graphics_desc {
 
 	u8 has_asid:1;
 	u8 has_flat_ccs:1;
-	u8 has_link_copy_engine:1;
 	u8 has_range_tlb_invalidation:1;
 	u8 supports_usm:1;
 };
-- 
GitLab


From f24081cd6275748d4f7c5925645436ed406cec12 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:52 -0700
Subject: [PATCH 1911/2340] drm/xe: Normalize pte/pde encoding

Split functions that do only part of the pde/pte encoding and that can
be called by the different places. This normalizes how pde/pte are
encoded so they can be moved elsewhere in a subsequent change.

xe_pte_encode() was calling __pte_encode() with a NULL vma, which is the
opposite of what xe_pt_stage_bind_entry() does. Stop passing a NULL vma
and just split another function that deals with a vma rather than a bo.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 119 +++++++++++++++++++++----------------
 1 file changed, 67 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index b3e23d8ed8eef..01e7c8815e7d0 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -47,91 +47,106 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
 	return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
 }
 
-/**
- * xe_pde_encode() - Encode a page-table directory entry pointing to
- * another page-table.
- * @bo: The page-table bo of the page-table to point to.
- * @bo_offset: Offset in the page-table bo to point to.
- * @level: The cache level indicating the caching of @bo.
- *
- * TODO: Rename.
- *
- * Return: An encoded page directory entry. No errors.
- */
-u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
-		  const enum xe_cache_level level)
+static u64 pde_encode_cache(enum xe_cache_level cache)
 {
-	u64 pde;
-
-	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
-	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
-
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
 
-	if (level != XE_CACHE_NONE)
-		pde |= PPAT_CACHED_PDE;
-	else
-		pde |= PPAT_UNCACHED;
+	if (cache != XE_CACHE_NONE)
+		return PPAT_CACHED_PDE;
 
-	return pde;
+	return PPAT_UNCACHED;
 }
 
-static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
-			struct xe_vma *vma, u32 pt_level)
+static u64 pte_encode_cache(enum xe_cache_level cache)
 {
-	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-
-	if (unlikely(vma && xe_vma_read_only(vma)))
-		pte &= ~XE_PAGE_RW;
-
-	if (unlikely(vma && xe_vma_is_null(vma)))
-		pte |= XE_PTE_NULL;
-
 	/* FIXME: I don't think the PPAT handling is correct for MTL */
-
 	switch (cache) {
 	case XE_CACHE_NONE:
-		pte |= PPAT_UNCACHED;
-		break;
+		return PPAT_UNCACHED;
 	case XE_CACHE_WT:
-		pte |= PPAT_DISPLAY_ELLC;
-		break;
+		return PPAT_DISPLAY_ELLC;
 	default:
-		pte |= PPAT_CACHED;
-		break;
+		return PPAT_CACHED;
 	}
+}
+
+static u64 pte_encode_ps(u32 pt_level)
+{
+	/* XXX: Does hw support 1 GiB pages? */
+	XE_WARN_ON(pt_level > 2);
 
 	if (pt_level == 1)
-		pte |= XE_PDE_PS_2M;
+		return XE_PDE_PS_2M;
 	else if (pt_level == 2)
-		pte |= XE_PDPE_PS_1G;
+		return XE_PDPE_PS_1G;
 
-	/* XXX: Does hw support 1 GiB pages? */
-	XE_WARN_ON(pt_level > 2);
+	return 0;
+}
 
-	return pte;
+/**
+ * xe_pde_encode() - Encode a page-table directory entry pointing to
+ * another page-table.
+ * @bo: The page-table bo of the page-table to point to.
+ * @bo_offset: Offset in the page-table bo to point to.
+ * @cache: The cache level indicating the caching of @bo.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page directory entry. No errors.
+ */
+u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
+		  const enum xe_cache_level cache)
+{
+	u64 pde;
+
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pde |= pde_encode_cache(cache);
+
+	return pde;
 }
 
 /**
  * xe_pte_encode() - Encode a page-table entry pointing to memory.
  * @bo: The BO representing the memory to point to.
- * @offset: The offset into @bo.
+ * @bo_offset: The offset into @bo.
  * @cache: The cache level indicating
  * @pt_level: The page-table level of the page-table into which the entry
  * is to be inserted.
  *
  * Return: An encoded page-table entry. No errors.
  */
-u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
+u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache,
 		  u32 pt_level)
 {
 	u64 pte;
 
-	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE);
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_ps(pt_level);
+
 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
 		pte |= XE_PPGTT_PTE_DM;
 
-	return __pte_encode(pte, cache, NULL, pt_level);
+	return pte;
+}
+
+/* Like xe_pte_encode(), but with a vma and a partially-encoded pte */
+static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma,
+			    enum xe_cache_level cache, u32 pt_level)
+{
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_ps(pt_level);
+
+	if (unlikely(vma && xe_vma_read_only(vma)))
+		pte &= ~XE_PAGE_RW;
+
+	if (unlikely(vma && xe_vma_is_null(vma)))
+		pte |= XE_PTE_NULL;
+
+	return pte;
 }
 
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -614,9 +629,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
-		pte = __pte_encode(is_null ? 0 :
-				   xe_res_dma(curs) + xe_walk->dma_offset,
-				   xe_walk->cache, xe_walk->vma, level);
+		pte = __vma_pte_encode(is_null ? 0 :
+				       xe_res_dma(curs) + xe_walk->dma_offset,
+				       xe_walk->vma, xe_walk->cache, level);
 		pte |= xe_walk->default_pte;
 
 		/*
-- 
GitLab


From b3bb7d9c561d664707717f8887b665ce8fef69ff Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:53 -0700
Subject: [PATCH 1912/2340] drm/xe: Remove check for vma == NULL

vma at this point can never be NULL as otherwise it would crash earlier
in the only caller, xe_pt_stage_bind_entry(). Remove the extra check and
avoid adding and removing the bits from the pte.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 01e7c8815e7d0..c39e3b46df3e3 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -136,14 +136,15 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache,
 static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma,
 			    enum xe_cache_level cache, u32 pt_level)
 {
-	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= XE_PAGE_PRESENT;
+
+	if (likely(!xe_vma_read_only(vma)))
+		pte |= XE_PAGE_RW;
+
 	pte |= pte_encode_cache(cache);
 	pte |= pte_encode_ps(pt_level);
 
-	if (unlikely(vma && xe_vma_read_only(vma)))
-		pte &= ~XE_PAGE_RW;
-
-	if (unlikely(vma && xe_vma_is_null(vma)))
+	if (unlikely(xe_vma_is_null(vma)))
 		pte |= XE_PTE_NULL;
 
 	return pte;
-- 
GitLab


From 0e5e77bd9704edf1713ebed37e2da1b4faa25a52 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:54 -0700
Subject: [PATCH 1913/2340] drm/xe: Use vfunc for pte/pde ppgtt encoding

Move the function to encode pte/pde to be vfuncs inside struct xe_vm.
This will allow to easily extend to platforms that don't have a
compatible encoding.

v2: Fix kunit build

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |   2 +-
 drivers/gpu/drm/xe/xe_migrate.c       |  18 ++--
 drivers/gpu/drm/xe/xe_pt.c            | 125 +++-----------------------
 drivers/gpu/drm/xe/xe_pt.h            |   6 --
 drivers/gpu/drm/xe/xe_pt_types.h      |  14 +++
 drivers/gpu/drm/xe/xe_vm.c            |  93 ++++++++++++++++++-
 drivers/gpu/drm/xe/xe_vm_types.h      |   2 +
 7 files changed, 128 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index f58cd1da1a341..6906ff9d9c310 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
-	expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0);
+	expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, XE_CACHE_WB, 0);
 	if (m->q->vm->flags & XE_VM_FLAG_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 2d31e6ff72ea4..6884e79199d54 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -189,14 +189,15 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		return ret;
 	}
 
-	entry = xe_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
+	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
 	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
 
 	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
 
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
-		entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0);
+		entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE,
+						  XE_CACHE_WB, 0);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
 
@@ -214,7 +215,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		for (i = 0; i < batch->size;
 		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
-			entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0);
+			entry = vm->pt_ops->pte_encode_bo(batch, i,
+							  XE_CACHE_WB, 0);
 
 			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
 				  entry);
@@ -238,16 +240,16 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		if (vm->flags & XE_VM_FLAG_64K && level == 1)
 			flags = XE_PDE_64K;
 
-		entry = xe_pde_encode(bo, map_ofs + (level - 1) *
-					XE_PAGE_SIZE, XE_CACHE_WB);
+		entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+						  XE_PAGE_SIZE, XE_CACHE_WB);
 		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
 			  entry | flags);
 	}
 
 	/* Write PDE's that point to our BO. */
 	for (i = 0; i < num_entries - num_level; i++) {
-		entry = xe_pde_encode(bo, i * XE_PAGE_SIZE,
-				      XE_CACHE_WB);
+		entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+						  XE_CACHE_WB);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
 			  (i + 1) * 8, u64, entry);
@@ -1263,7 +1265,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 			xe_tile_assert(tile, pt_bo->size == SZ_4K);
 
-			addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
+			addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, XE_CACHE_WB, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 		}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c39e3b46df3e3..d5f721efdc3cc 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -47,109 +47,6 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
 	return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
 }
 
-static u64 pde_encode_cache(enum xe_cache_level cache)
-{
-	/* FIXME: I don't think the PPAT handling is correct for MTL */
-
-	if (cache != XE_CACHE_NONE)
-		return PPAT_CACHED_PDE;
-
-	return PPAT_UNCACHED;
-}
-
-static u64 pte_encode_cache(enum xe_cache_level cache)
-{
-	/* FIXME: I don't think the PPAT handling is correct for MTL */
-	switch (cache) {
-	case XE_CACHE_NONE:
-		return PPAT_UNCACHED;
-	case XE_CACHE_WT:
-		return PPAT_DISPLAY_ELLC;
-	default:
-		return PPAT_CACHED;
-	}
-}
-
-static u64 pte_encode_ps(u32 pt_level)
-{
-	/* XXX: Does hw support 1 GiB pages? */
-	XE_WARN_ON(pt_level > 2);
-
-	if (pt_level == 1)
-		return XE_PDE_PS_2M;
-	else if (pt_level == 2)
-		return XE_PDPE_PS_1G;
-
-	return 0;
-}
-
-/**
- * xe_pde_encode() - Encode a page-table directory entry pointing to
- * another page-table.
- * @bo: The page-table bo of the page-table to point to.
- * @bo_offset: Offset in the page-table bo to point to.
- * @cache: The cache level indicating the caching of @bo.
- *
- * TODO: Rename.
- *
- * Return: An encoded page directory entry. No errors.
- */
-u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
-		  const enum xe_cache_level cache)
-{
-	u64 pde;
-
-	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
-	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pde |= pde_encode_cache(cache);
-
-	return pde;
-}
-
-/**
- * xe_pte_encode() - Encode a page-table entry pointing to memory.
- * @bo: The BO representing the memory to point to.
- * @bo_offset: The offset into @bo.
- * @cache: The cache level indicating
- * @pt_level: The page-table level of the page-table into which the entry
- * is to be inserted.
- *
- * Return: An encoded page-table entry. No errors.
- */
-u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache,
-		  u32 pt_level)
-{
-	u64 pte;
-
-	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
-	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_cache(cache);
-	pte |= pte_encode_ps(pt_level);
-
-	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
-		pte |= XE_PPGTT_PTE_DM;
-
-	return pte;
-}
-
-/* Like xe_pte_encode(), but with a vma and a partially-encoded pte */
-static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma,
-			    enum xe_cache_level cache, u32 pt_level)
-{
-	pte |= XE_PAGE_PRESENT;
-
-	if (likely(!xe_vma_read_only(vma)))
-		pte |= XE_PAGE_RW;
-
-	pte |= pte_encode_cache(cache);
-	pte |= pte_encode_ps(pt_level);
-
-	if (unlikely(xe_vma_is_null(vma)))
-		pte |= XE_PTE_NULL;
-
-	return pte;
-}
-
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 			     unsigned int level)
 {
@@ -158,15 +55,11 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 	if (!vm->scratch_bo[id])
 		return 0;
 
-	if (level == 0) {
-		u64 empty = xe_pte_encode(vm->scratch_bo[id], 0,
-					  XE_CACHE_WB, 0);
+	if (level > 0)
+		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
+						 0, XE_CACHE_WB);
 
-		return empty;
-	} else {
-		return xe_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
-				     XE_CACHE_WB);
-	}
+	return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, XE_CACHE_WB, 0);
 }
 
 /**
@@ -618,6 +511,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	struct xe_pt_stage_bind_walk *xe_walk =
 		container_of(walk, typeof(*xe_walk), base);
 	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+	struct xe_vm *vm = xe_walk->vm;
 	struct xe_pt *xe_child;
 	bool covers;
 	int ret = 0;
@@ -630,9 +524,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 
 		XE_WARN_ON(xe_walk->va_curs_start != addr);
 
-		pte = __vma_pte_encode(is_null ? 0 :
-				       xe_res_dma(curs) + xe_walk->dma_offset,
-				       xe_walk->vma, xe_walk->cache, level);
+		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+						 xe_res_dma(curs) + xe_walk->dma_offset,
+						 xe_walk->vma, xe_walk->cache, level);
 		pte |= xe_walk->default_pte;
 
 		/*
@@ -697,7 +591,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			xe_child->is_compact = true;
 		}
 
-		pte = xe_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
+		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0,
+						xe_walk->cache) | flags;
 		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
 					 pte);
 	}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 01be7ab08f87e..d5460e58dbbf9 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -45,10 +45,4 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 
 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
 
-u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
-		  const enum xe_cache_level level);
-
-u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
-		  u32 pt_level);
-
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 2ed64c0a44857..c58f6926fabfa 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -6,8 +6,13 @@
 #ifndef _XE_PT_TYPES_H_
 #define _XE_PT_TYPES_H_
 
+#include <linux/types.h>
+
 #include "xe_pt_walk.h"
 
+struct xe_bo;
+struct xe_vma;
+
 enum xe_cache_level {
 	XE_CACHE_NONE,
 	XE_CACHE_WT,
@@ -29,6 +34,15 @@ struct xe_pt {
 #endif
 };
 
+struct xe_pt_ops {
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     enum xe_cache_level cache, u32 pt_level);
+	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
+			      enum xe_cache_level cache, u32 pt_level);
+	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     const enum xe_cache_level cache);
+};
+
 struct xe_pt_entry {
 	struct xe_pt *pt;
 	u64 pte;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index fac722074004b..72e27e6809f95 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1210,6 +1210,93 @@ static struct drm_gpuvm_ops gpuvm_ops = {
 	.vm_free = xe_vm_free,
 };
 
+static u64 pde_encode_cache(enum xe_cache_level cache)
+{
+	/* FIXME: I don't think the PPAT handling is correct for MTL */
+
+	if (cache != XE_CACHE_NONE)
+		return PPAT_CACHED_PDE;
+
+	return PPAT_UNCACHED;
+}
+
+static u64 pte_encode_cache(enum xe_cache_level cache)
+{
+	/* FIXME: I don't think the PPAT handling is correct for MTL */
+	switch (cache) {
+	case XE_CACHE_NONE:
+		return PPAT_UNCACHED;
+	case XE_CACHE_WT:
+		return PPAT_DISPLAY_ELLC;
+	default:
+		return PPAT_CACHED;
+	}
+}
+
+static u64 pte_encode_ps(u32 pt_level)
+{
+	/* XXX: Does hw support 1 GiB pages? */
+	XE_WARN_ON(pt_level > 2);
+
+	if (pt_level == 1)
+		return XE_PDE_PS_2M;
+	else if (pt_level == 2)
+		return XE_PDPE_PS_1G;
+
+	return 0;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      const enum xe_cache_level cache)
+{
+	u64 pde;
+
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pde |= pde_encode_cache(cache);
+
+	return pde;
+}
+
+static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      enum xe_cache_level cache, u32 pt_level)
+{
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_ps(pt_level);
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_PPGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
+			       enum xe_cache_level cache, u32 pt_level)
+{
+	pte |= XE_PAGE_PRESENT;
+
+	if (likely(!xe_vma_read_only(vma)))
+		pte |= XE_PAGE_RW;
+
+	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_ps(pt_level);
+
+	if (unlikely(xe_vma_is_null(vma)))
+		pte |= XE_PTE_NULL;
+
+	return pte;
+}
+
+static const struct xe_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_pte_encode_bo,
+	.pte_encode_vma = xelp_pte_encode_vma,
+	.pde_encode_bo = xelp_pde_encode_bo,
+};
+
 static void xe_vma_op_work_func(struct work_struct *w);
 static void vm_destroy_work_func(struct work_struct *w);
 
@@ -1257,6 +1344,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	INIT_LIST_HEAD(&vm->extobj.list);
 
+	vm->pt_ops = &xelp_pt_ops;
+
 	if (!(flags & XE_VM_FLAG_MIGRATION))
 		xe_device_mem_access_get(xe);
 
@@ -1576,8 +1665,8 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 
 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 {
-	return xe_pde_encode(vm->pt_root[tile->id]->bo, 0,
-			     XE_CACHE_WB);
+	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
+					 XE_CACHE_WB);
 }
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index b0f183d00416b..9a1075a75606d 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -242,6 +242,8 @@ struct xe_vm {
 		bool munmap_rebind_inflight;
 	} async_ops;
 
+	const struct xe_pt_ops *pt_ops;
+
 	/** @userptr: user pointer state */
 	struct {
 		/**
-- 
GitLab


From 23c8495efeed0d83657de89b44a569ac406bdfad Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:55 -0700
Subject: [PATCH 1914/2340] drm/xe/migrate: Do not hand-encode pte

Instead of encoding the pte, call a new vfunc from xe_vm to handle that.
The encoding may not be the same on every platform, so keeping it in one
place helps to better support them.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c  | 14 ++++++++------
 drivers/gpu/drm/xe/xe_pt_types.h |  2 ++
 drivers/gpu/drm/xe/xe_vm.c       | 23 ++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 6884e79199d54..cd4dbbf6c3835 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -261,8 +261,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 		level = 2;
 		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
-		flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED |
-			XE_PPGTT_PTE_DM | XE_PDPE_PS_1G;
+		flags = vm->pt_ops->pte_encode_addr(0, XE_CACHE_WB, level, true, 0);
 
 		/*
 		 * Use 1GB pages, it shouldn't matter the physical amount of
@@ -483,7 +482,8 @@ static void emit_pte(struct xe_migrate *m,
 		ptes -= chunk;
 
 		while (chunk--) {
-			u64 addr;
+			u64 addr, flags = 0;
+			bool devmem = false;
 
 			addr = xe_res_dma(cur) & PAGE_MASK;
 			if (is_vram) {
@@ -491,13 +491,15 @@ static void emit_pte(struct xe_migrate *m,
 				if ((m->q->vm->flags & XE_VM_FLAG_64K) &&
 				    !(cur_ofs & (16 * 8 - 1))) {
 					xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K));
-					addr |= XE_PTE_PS64;
+					flags |= XE_PTE_PS64;
 				}
 
 				addr += vram_region_gpu_offset(bo->ttm.resource);
-				addr |= XE_PPGTT_PTE_DM;
+				devmem = true;
 			}
-			addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW;
+
+			addr = m->q->vm->pt_ops->pte_encode_addr(addr, XE_CACHE_WB,
+								 0, devmem, flags);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index c58f6926fabfa..64e3921a0f468 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -39,6 +39,8 @@ struct xe_pt_ops {
 			     enum xe_cache_level cache, u32 pt_level);
 	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
 			      enum xe_cache_level cache, u32 pt_level);
+	u64 (*pte_encode_addr)(u64 addr, enum xe_cache_level cache,
+			       u32 pt_level, bool devmem, u64 flags);
 	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
 			     const enum xe_cache_level cache);
 };
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 72e27e6809f95..1d3569097e5f6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1235,7 +1235,6 @@ static u64 pte_encode_cache(enum xe_cache_level cache)
 
 static u64 pte_encode_ps(u32 pt_level)
 {
-	/* XXX: Does hw support 1 GiB pages? */
 	XE_WARN_ON(pt_level > 2);
 
 	if (pt_level == 1)
@@ -1291,9 +1290,31 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
 	return pte;
 }
 
+static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache,
+				u32 pt_level, bool devmem, u64 flags)
+{
+	u64 pte;
+
+	/* Avoid passing random bits directly as flags */
+	XE_WARN_ON(flags & ~XE_PTE_PS64);
+
+	pte = addr;
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_ps(pt_level);
+
+	if (devmem)
+		pte |= XE_PPGTT_PTE_DM;
+
+	pte |= flags;
+
+	return pte;
+}
+
 static const struct xe_pt_ops xelp_pt_ops = {
 	.pte_encode_bo = xelp_pte_encode_bo,
 	.pte_encode_vma = xelp_pte_encode_vma,
+	.pte_encode_addr = xelp_pte_encode_addr,
 	.pde_encode_bo = xelp_pde_encode_bo,
 };
 
-- 
GitLab


From b445be5710200501bba693fe6f9c614895412b94 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:56 -0700
Subject: [PATCH 1915/2340] drm/xe: Use vfunc to initialize PAT

Split the PAT initialization between SW-only and HW. The _early() only
sets up the ops and data structure that are used later to program the
tables. This allows the PAT to be easily extended to other platforms.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-6-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       |  3 ++
 drivers/gpu/drm/xe/xe_device_types.h | 13 ++++++
 drivers/gpu/drm/xe/xe_pat.c          | 59 +++++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_pat.h          | 11 ++++++
 4 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 67ec55810ca35..113fca462141f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -25,6 +25,7 @@
 #include "xe_irq.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
+#include "xe_pat.h"
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
@@ -268,6 +269,8 @@ int xe_device_probe(struct xe_device *xe)
 	int err;
 	u8 id;
 
+	xe_pat_init_early(xe);
+
 	xe->info.mem_region_mask = 1;
 
 	for_each_tile(tile, xe, id) {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index aa9935ff6d84e..c4920631677bd 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -19,6 +19,7 @@
 #include "xe_step_types.h"
 
 struct xe_ggtt;
+struct xe_pat_ops;
 
 #define XE_BO_INVALID_OFFSET	LONG_MAX
 
@@ -310,6 +311,18 @@ struct xe_device {
 		atomic_t ref;
 	} mem_access;
 
+	/**
+	 * @pat: Encapsulate PAT related stuff
+	 */
+	struct {
+		/** Internal operations to abstract platforms */
+		const struct xe_pat_ops *ops;
+		/** PAT table to program in the HW */
+		const u32 *table;
+		/** Number of PAT entries */
+		int n_entries;
+	} pat;
+
 	/** @d3cold: Encapsulate d3cold related stuff */
 	struct {
 		/** capable: Indicates if root port is d3cold capable */
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 71e0e047fff3b..28f401c500d8f 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -32,6 +32,11 @@
 #define TGL_PAT_WC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1)
 #define TGL_PAT_UC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0)
 
+struct xe_pat_ops {
+	void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries);
+	void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries);
+};
+
 static const u32 tgl_pat_table[] = {
 	[0] = TGL_PAT_WB,
 	[1] = TGL_PAT_WC,
@@ -80,24 +85,37 @@ static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
 	}
 }
 
-void xe_pat_init(struct xe_gt *gt)
-{
-	struct xe_device *xe = gt_to_xe(gt);
+static const struct xe_pat_ops tgl_pat_ops = {
+	.program_graphics = program_pat,
+};
+
+static const struct xe_pat_ops dg2_pat_ops = {
+	.program_graphics = program_pat_mcr,
+};
+
+/*
+ * SAMedia register offsets are adjusted by the write methods and they target
+ * registers that are not MCR, while for normal GT they are MCR
+ */
+static const struct xe_pat_ops mtl_pat_ops = {
+	.program_graphics = program_pat,
+	.program_media = program_pat_mcr,
+};
 
+void xe_pat_init_early(struct xe_device *xe)
+{
 	if (xe->info.platform == XE_METEORLAKE) {
-		/*
-		 * SAMedia register offsets are adjusted by the write methods
-		 * and they target registers that are not MCR, while for normal
-		 * GT they are MCR
-		 */
-		if (xe_gt_is_media_type(gt))
-			program_pat(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table));
-		else
-			program_pat_mcr(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table));
+		xe->pat.ops = &mtl_pat_ops;
+		xe->pat.table = mtl_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(mtl_pat_table);
 	} else if (xe->info.platform == XE_PVC || xe->info.platform == XE_DG2) {
-		program_pat_mcr(gt, pvc_pat_table, ARRAY_SIZE(pvc_pat_table));
+		xe->pat.ops = &dg2_pat_ops;
+		xe->pat.table = pvc_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(pvc_pat_table);
 	} else if (GRAPHICS_VERx100(xe) <= 1210) {
-		program_pat(gt, tgl_pat_table, ARRAY_SIZE(tgl_pat_table));
+		xe->pat.ops = &tgl_pat_ops;
+		xe->pat.table = tgl_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table);
 	} else {
 		/*
 		 * Going forward we expect to need new PAT settings for most
@@ -111,3 +129,16 @@ void xe_pat_init(struct xe_gt *gt)
 			GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
 	}
 }
+
+void xe_pat_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->pat.ops)
+		return;
+
+	if (xe_gt_is_media_type(gt))
+		xe->pat.ops->program_media(gt, xe->pat.table, xe->pat.n_entries);
+	else
+		xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries);
+}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index 659de40081316..168e80e63809a 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -7,7 +7,18 @@
 #define _XE_PAT_H_
 
 struct xe_gt;
+struct xe_device;
 
+/**
+ * xe_pat_init_early - SW initialization, setting up data based on device
+ * @xe: xe device
+ */
+void xe_pat_init_early(struct xe_device *xe);
+
+/**
+ * xe_pat_init - Program HW PAT table
+ * @gt: GT structure
+ */
 void xe_pat_init(struct xe_gt *gt);
 
 #endif
-- 
GitLab


From 194bdb859950a4223305ee766a3b9d90c398d158 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:57 -0700
Subject: [PATCH 1916/2340] drm/xe/dg2: Fix using wrong PAT table

DG2 should use the MCR variant to program the PAT registers, like PVC,
but shouldn't use the same table as PVC.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-7-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 28f401c500d8f..a4bebdedbbd96 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -108,10 +108,18 @@ void xe_pat_init_early(struct xe_device *xe)
 		xe->pat.ops = &mtl_pat_ops;
 		xe->pat.table = mtl_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(mtl_pat_table);
-	} else if (xe->info.platform == XE_PVC || xe->info.platform == XE_DG2) {
+	} else if (xe->info.platform == XE_PVC) {
 		xe->pat.ops = &dg2_pat_ops;
 		xe->pat.table = pvc_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(pvc_pat_table);
+	} else if (xe->info.platform == XE_DG2) {
+		/*
+		 * Table is the same as previous platforms, but programming
+		 * method has changed.
+		 */
+		xe->pat.ops = &dg2_pat_ops;
+		xe->pat.table = tgl_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table);
 	} else if (GRAPHICS_VERx100(xe) <= 1210) {
 		xe->pat.ops = &tgl_pat_ops;
 		xe->pat.table = tgl_pat_table;
-- 
GitLab


From 451028644775a5e07aaab3f147fda583e7054de6 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:58 -0700
Subject: [PATCH 1917/2340] drm/xe/pat: Prefer the arch/IP names

Both DG2 and PVC are derived from XeHP, but DG2 should not really
re-use something introduced by PVC, so it's odd to have DG2 re-using the
PVC programming for PAT. Let's prefer using the architecture and/or IP
names.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-8-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 112 ++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index a4bebdedbbd96..4ab1b3dc4d5d2 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -14,57 +14,57 @@
 								   0x4800, 0x4804, \
 								   0x4848, 0x484c)
 
-#define MTL_L4_POLICY_MASK			REG_GENMASK(3, 2)
-#define MTL_PAT_3_UC				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 3)
-#define MTL_PAT_1_WT				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 1)
-#define MTL_PAT_0_WB				REG_FIELD_PREP(MTL_L4_POLICY_MASK, 0)
-#define MTL_INDEX_COH_MODE_MASK			REG_GENMASK(1, 0)
-#define MTL_3_COH_2W				REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 3)
-#define MTL_2_COH_1W				REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 2)
-#define MTL_0_COH_NON				REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 0)
-
-#define PVC_CLOS_LEVEL_MASK			REG_GENMASK(3, 2)
-#define PVC_PAT_CLOS(x)				REG_FIELD_PREP(PVC_CLOS_LEVEL_MASK, x)
-
-#define TGL_MEM_TYPE_MASK			REG_GENMASK(1, 0)
-#define TGL_PAT_WB				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 3)
-#define TGL_PAT_WT				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 2)
-#define TGL_PAT_WC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1)
-#define TGL_PAT_UC				REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0)
+#define XELPG_L4_POLICY_MASK			REG_GENMASK(3, 2)
+#define XELPG_PAT_3_UC				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3)
+#define XELPG_PAT_1_WT				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1)
+#define XELPG_PAT_0_WB				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 0)
+#define XELPG_INDEX_COH_MODE_MASK		REG_GENMASK(1, 0)
+#define XELPG_3_COH_2W				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 3)
+#define XELPG_2_COH_1W				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 2)
+#define XELPG_0_COH_NON				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 0)
+
+#define XEHPC_CLOS_LEVEL_MASK			REG_GENMASK(3, 2)
+#define XEHPC_PAT_CLOS(x)			REG_FIELD_PREP(XEHPC_CLOS_LEVEL_MASK, x)
+
+#define XELP_MEM_TYPE_MASK			REG_GENMASK(1, 0)
+#define XELP_PAT_WB				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 3)
+#define XELP_PAT_WT				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 2)
+#define XELP_PAT_WC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1)
+#define XELP_PAT_UC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0)
 
 struct xe_pat_ops {
 	void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries);
 	void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries);
 };
 
-static const u32 tgl_pat_table[] = {
-	[0] = TGL_PAT_WB,
-	[1] = TGL_PAT_WC,
-	[2] = TGL_PAT_WT,
-	[3] = TGL_PAT_UC,
-	[4] = TGL_PAT_WB,
-	[5] = TGL_PAT_WB,
-	[6] = TGL_PAT_WB,
-	[7] = TGL_PAT_WB,
+static const u32 xelp_pat_table[] = {
+	[0] = XELP_PAT_WB,
+	[1] = XELP_PAT_WC,
+	[2] = XELP_PAT_WT,
+	[3] = XELP_PAT_UC,
+	[4] = XELP_PAT_WB,
+	[5] = XELP_PAT_WB,
+	[6] = XELP_PAT_WB,
+	[7] = XELP_PAT_WB,
 };
 
-static const u32 pvc_pat_table[] = {
-	[0] = TGL_PAT_UC,
-	[1] = TGL_PAT_WC,
-	[2] = TGL_PAT_WT,
-	[3] = TGL_PAT_WB,
-	[4] = PVC_PAT_CLOS(1) | TGL_PAT_WT,
-	[5] = PVC_PAT_CLOS(1) | TGL_PAT_WB,
-	[6] = PVC_PAT_CLOS(2) | TGL_PAT_WT,
-	[7] = PVC_PAT_CLOS(2) | TGL_PAT_WB,
+static const u32 xehpc_pat_table[] = {
+	[0] = XELP_PAT_UC,
+	[1] = XELP_PAT_WC,
+	[2] = XELP_PAT_WT,
+	[3] = XELP_PAT_WB,
+	[4] = XEHPC_PAT_CLOS(1) | XELP_PAT_WT,
+	[5] = XEHPC_PAT_CLOS(1) | XELP_PAT_WB,
+	[6] = XEHPC_PAT_CLOS(2) | XELP_PAT_WT,
+	[7] = XEHPC_PAT_CLOS(2) | XELP_PAT_WB,
 };
 
-static const u32 mtl_pat_table[] = {
-	[0] = MTL_PAT_0_WB,
-	[1] = MTL_PAT_1_WT,
-	[2] = MTL_PAT_3_UC,
-	[3] = MTL_PAT_0_WB | MTL_2_COH_1W,
-	[4] = MTL_PAT_0_WB | MTL_3_COH_2W,
+static const u32 xelpg_pat_table[] = {
+	[0] = XELPG_PAT_0_WB,
+	[1] = XELPG_PAT_1_WT,
+	[2] = XELPG_PAT_3_UC,
+	[3] = XELPG_PAT_0_WB | XELPG_2_COH_1W,
+	[4] = XELPG_PAT_0_WB | XELPG_3_COH_2W,
 };
 
 static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
@@ -85,11 +85,11 @@ static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
 	}
 }
 
-static const struct xe_pat_ops tgl_pat_ops = {
+static const struct xe_pat_ops xelp_pat_ops = {
 	.program_graphics = program_pat,
 };
 
-static const struct xe_pat_ops dg2_pat_ops = {
+static const struct xe_pat_ops xehp_pat_ops = {
 	.program_graphics = program_pat_mcr,
 };
 
@@ -97,7 +97,7 @@ static const struct xe_pat_ops dg2_pat_ops = {
  * SAMedia register offsets are adjusted by the write methods and they target
  * registers that are not MCR, while for normal GT they are MCR
  */
-static const struct xe_pat_ops mtl_pat_ops = {
+static const struct xe_pat_ops xelpg_pat_ops = {
 	.program_graphics = program_pat,
 	.program_media = program_pat_mcr,
 };
@@ -105,25 +105,25 @@ static const struct xe_pat_ops mtl_pat_ops = {
 void xe_pat_init_early(struct xe_device *xe)
 {
 	if (xe->info.platform == XE_METEORLAKE) {
-		xe->pat.ops = &mtl_pat_ops;
-		xe->pat.table = mtl_pat_table;
-		xe->pat.n_entries = ARRAY_SIZE(mtl_pat_table);
+		xe->pat.ops = &xelpg_pat_ops;
+		xe->pat.table = xelpg_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table);
 	} else if (xe->info.platform == XE_PVC) {
-		xe->pat.ops = &dg2_pat_ops;
-		xe->pat.table = pvc_pat_table;
-		xe->pat.n_entries = ARRAY_SIZE(pvc_pat_table);
+		xe->pat.ops = &xehp_pat_ops;
+		xe->pat.table = xehpc_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table);
 	} else if (xe->info.platform == XE_DG2) {
 		/*
 		 * Table is the same as previous platforms, but programming
 		 * method has changed.
 		 */
-		xe->pat.ops = &dg2_pat_ops;
-		xe->pat.table = tgl_pat_table;
-		xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table);
+		xe->pat.ops = &xehp_pat_ops;
+		xe->pat.table = xelp_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
 	} else if (GRAPHICS_VERx100(xe) <= 1210) {
-		xe->pat.ops = &tgl_pat_ops;
-		xe->pat.table = tgl_pat_table;
-		xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table);
+		xe->pat.ops = &xelp_pat_ops;
+		xe->pat.table = xelp_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
 	} else {
 		/*
 		 * Going forward we expect to need new PAT settings for most
-- 
GitLab


From 0d68247efcdbf7791122071323719310207354f3 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:38:59 -0700
Subject: [PATCH 1918/2340] drm/xe/pat: Keep track of relevant indexes

Some of the PAT entries are relevant for internal driver use, which
varies per platform. Let the PAT early initialization set what they
should point to so the rest of the driver can use them where needed.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  2 ++
 drivers/gpu/drm/xe/xe_pat.c          | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_pt_types.h     |  1 +
 3 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c4920631677bd..1ee8410ec3b12 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -15,6 +15,7 @@
 #include "xe_devcoredump_types.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
+#include "xe_pt_types.h"
 #include "xe_pmu.h"
 #include "xe_step_types.h"
 
@@ -321,6 +322,7 @@ struct xe_device {
 		const u32 *table;
 		/** Number of PAT entries */
 		int n_entries;
+		u32 idx[__XE_CACHE_LEVEL_COUNT];
 	} pat;
 
 	/** @d3cold: Encapsulate d3cold related stuff */
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 4ab1b3dc4d5d2..4668ca3932c55 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -108,10 +108,16 @@ void xe_pat_init_early(struct xe_device *xe)
 		xe->pat.ops = &xelpg_pat_ops;
 		xe->pat.table = xelpg_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 2;
+		xe->pat.idx[XE_CACHE_WT] = 1;
+		xe->pat.idx[XE_CACHE_WB] = 3;
 	} else if (xe->info.platform == XE_PVC) {
 		xe->pat.ops = &xehp_pat_ops;
 		xe->pat.table = xehpc_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 0;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 3;
 	} else if (xe->info.platform == XE_DG2) {
 		/*
 		 * Table is the same as previous platforms, but programming
@@ -120,10 +126,16 @@ void xe_pat_init_early(struct xe_device *xe)
 		xe->pat.ops = &xehp_pat_ops;
 		xe->pat.table = xelp_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 0;
 	} else if (GRAPHICS_VERx100(xe) <= 1210) {
 		xe->pat.ops = &xelp_pat_ops;
 		xe->pat.table = xelp_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 0;
 	} else {
 		/*
 		 * Going forward we expect to need new PAT settings for most
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 64e3921a0f468..bf50004992518 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -17,6 +17,7 @@ enum xe_cache_level {
 	XE_CACHE_NONE,
 	XE_CACHE_WT,
 	XE_CACHE_WB,
+	__XE_CACHE_LEVEL_COUNT,
 };
 
 #define XE_VM_MAX_LEVEL 4
-- 
GitLab


From fcd75139cd3c76467c8495c750fd6e27787f7e37 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:39:00 -0700
Subject: [PATCH 1919/2340] drm/xe: Use pat_index to encode pde/pte

Change the xelp_pte_encode() and xelp_pde_encode() functions to use the
platform-dependent pat_index.  The same function can be used for all
platforms as they only need to encode the pat_index bits in the same
pte/pde layout. For platforms that don't have the most significant bit,
as long as they don't return a bogus index they should be fine.

v2: Use the same logic to encode pde as it's compatible with previous
    logic, it's more future proof and also fixes the cache setting for
    PVC (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-10-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h       |  8 ++---
 drivers/gpu/drm/xe/xe_migrate.c  |  6 ++--
 drivers/gpu/drm/xe/xe_pt_types.h |  4 ++-
 drivers/gpu/drm/xe/xe_vm.c       | 56 ++++++++++++++++++++------------
 4 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 23f1b9e74e710..5666fd6d7f11c 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -48,10 +48,10 @@
 #define XE_BO_INTERNAL_TEST		BIT(30)
 #define XE_BO_INTERNAL_64K		BIT(31)
 
-#define PPAT_UNCACHED                   GENMASK_ULL(4, 3)
-#define PPAT_CACHED_PDE                 0
-#define PPAT_CACHED                     BIT_ULL(7)
-#define PPAT_DISPLAY_ELLC               BIT_ULL(4)
+#define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
+#define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
+#define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
+#define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
 
 #define XE_PTE_SHIFT			12
 #define XE_PAGE_SIZE			(1 << XE_PTE_SHIFT)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index cd4dbbf6c3835..77a2468ca85c5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -261,7 +261,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 		level = 2;
 		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
-		flags = vm->pt_ops->pte_encode_addr(0, XE_CACHE_WB, level, true, 0);
+		flags = vm->pt_ops->pte_encode_addr(xe, 0, XE_CACHE_WB, level,
+						    true, 0);
 
 		/*
 		 * Use 1GB pages, it shouldn't matter the physical amount of
@@ -498,7 +499,8 @@ static void emit_pte(struct xe_migrate *m,
 				devmem = true;
 			}
 
-			addr = m->q->vm->pt_ops->pte_encode_addr(addr, XE_CACHE_WB,
+			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+								 addr, XE_CACHE_WB,
 								 0, devmem, flags);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index bf50004992518..bd6645295fe6d 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -11,6 +11,7 @@
 #include "xe_pt_walk.h"
 
 struct xe_bo;
+struct xe_device;
 struct xe_vma;
 
 enum xe_cache_level {
@@ -40,7 +41,8 @@ struct xe_pt_ops {
 			     enum xe_cache_level cache, u32 pt_level);
 	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
 			      enum xe_cache_level cache, u32 pt_level);
-	u64 (*pte_encode_addr)(u64 addr, enum xe_cache_level cache,
+	u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
+			       enum xe_cache_level cache,
 			       u32 pt_level, bool devmem, u64 flags);
 	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
 			     const enum xe_cache_level cache);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1d3569097e5f6..6ab115df9c4e4 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1210,27 +1210,38 @@ static struct drm_gpuvm_ops gpuvm_ops = {
 	.vm_free = xe_vm_free,
 };
 
-static u64 pde_encode_cache(enum xe_cache_level cache)
+static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
 {
-	/* FIXME: I don't think the PPAT handling is correct for MTL */
+	u32 pat_index = xe->pat.idx[cache];
+	u64 pte = 0;
 
-	if (cache != XE_CACHE_NONE)
-		return PPAT_CACHED_PDE;
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
 
-	return PPAT_UNCACHED;
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	return pte;
 }
 
-static u64 pte_encode_cache(enum xe_cache_level cache)
+static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
 {
-	/* FIXME: I don't think the PPAT handling is correct for MTL */
-	switch (cache) {
-	case XE_CACHE_NONE:
-		return PPAT_UNCACHED;
-	case XE_CACHE_WT:
-		return PPAT_DISPLAY_ELLC;
-	default:
-		return PPAT_CACHED;
-	}
+	u32 pat_index = xe->pat.idx[cache];
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	if (pat_index & BIT(2))
+		pte |= XE_PPGTT_PTE_PAT2;
+
+	if (pat_index & BIT(3))
+		pte |= XELPG_PPGTT_PTE_PAT3;
+
+	return pte;
 }
 
 static u64 pte_encode_ps(u32 pt_level)
@@ -1248,11 +1259,12 @@ static u64 pte_encode_ps(u32 pt_level)
 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
 			      const enum xe_cache_level cache)
 {
+	struct xe_device *xe = xe_bo_device(bo);
 	u64 pde;
 
 	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pde |= pde_encode_cache(cache);
+	pde |= pde_encode_cache(xe, cache);
 
 	return pde;
 }
@@ -1260,11 +1272,12 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
 			      enum xe_cache_level cache, u32 pt_level)
 {
+	struct xe_device *xe = xe_bo_device(bo);
 	u64 pte;
 
 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_cache(xe, cache);
 	pte |= pte_encode_ps(pt_level);
 
 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
@@ -1276,12 +1289,14 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
 			       enum xe_cache_level cache, u32 pt_level)
 {
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+
 	pte |= XE_PAGE_PRESENT;
 
 	if (likely(!xe_vma_read_only(vma)))
 		pte |= XE_PAGE_RW;
 
-	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_cache(xe, cache);
 	pte |= pte_encode_ps(pt_level);
 
 	if (unlikely(xe_vma_is_null(vma)))
@@ -1290,7 +1305,8 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
 	return pte;
 }
 
-static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache,
+static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
+				enum xe_cache_level cache,
 				u32 pt_level, bool devmem, u64 flags)
 {
 	u64 pte;
@@ -1300,7 +1316,7 @@ static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache,
 
 	pte = addr;
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_cache(cache);
+	pte |= pte_encode_cache(xe, cache);
 	pte |= pte_encode_ps(pt_level);
 
 	if (devmem)
-- 
GitLab


From 1464f56b47d8db63ad95dad3fd8845ec412dc8d5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 27 Sep 2023 12:39:01 -0700
Subject: [PATCH 1920/2340] drm/xe: Use vfunc for ggtt pte encoding

Use 2 different functions for encoding the ggtt's pte, assigning them
during initialization. Main difference is that before Xe-LPG, the pte
didn't have the cache bits.

v2: Re-use xelp_ggtt_pte_encode_bo() for the common part with
    xelpg_ggtt_pte_encode_bo() (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230927193902.2849159-11-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c       | 53 ++++++++++++++++++++++--------
 drivers/gpu/drm/xe/xe_ggtt.h       |  1 -
 drivers/gpu/drm/xe/xe_ggtt_types.h |  9 +++++
 3 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ba34b8784572e..ec7bbb1dc2958 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -20,16 +20,15 @@
 #include "xe_mmio.h"
 #include "xe_wopcm.h"
 
-/* FIXME: Common file, preferably auto-gen */
-#define MTL_GGTT_PTE_PAT0	BIT_ULL(52)
-#define MTL_GGTT_PTE_PAT1	BIT_ULL(53)
+#define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
+#define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
 
 /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
 #define GUC_GGTT_TOP	0xFEE00000
 
-u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
+static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+				   enum xe_cache_level cache)
 {
-	struct xe_device *xe = xe_bo_device(bo);
 	u64 pte;
 
 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
@@ -38,11 +37,25 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
 		pte |= XE_GGTT_PTE_DM;
 
-	/* FIXME: vfunc + pass in caching rules */
-	if (xe->info.platform == XE_METEORLAKE) {
-		pte |= MTL_GGTT_PTE_PAT0;
-		pte |= MTL_GGTT_PTE_PAT1;
-	}
+	return pte;
+}
+
+static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+				    enum xe_cache_level cache)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u32 pat_index = xe->pat.idx[cache];
+	u64 pte;
+
+	pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, cache);
+
+	xe_assert(xe, pat_index <= 3);
+
+	if (pat_index & BIT(0))
+		pte |= XELPG_GGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XELPG_GGTT_PTE_PAT1;
 
 	return pte;
 }
@@ -72,7 +85,8 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 	xe_tile_assert(ggtt->tile, start < end);
 
 	if (ggtt->scratch)
-		scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+		scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0,
+							  XE_CACHE_WB);
 	else
 		scratch_pte = 0;
 
@@ -102,6 +116,14 @@ static void primelockdep(struct xe_ggtt *ggtt)
 	fs_reclaim_release(GFP_KERNEL);
 }
 
+static const struct xe_ggtt_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_ggtt_pte_encode_bo,
+};
+
+static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
+	.pte_encode_bo = xelpg_ggtt_pte_encode_bo,
+};
+
 int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
@@ -146,6 +168,11 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 	if (ggtt->size > GUC_GGTT_TOP)
 		ggtt->size = GUC_GGTT_TOP;
 
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ggtt->pt_ops = &xelpg_pt_ops;
+	else
+		ggtt->pt_ops = &xelp_pt_ops;
+
 	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
 		    ggtt->size - xe_wopcm_size(xe));
 	mutex_init(&ggtt->lock);
@@ -260,7 +287,7 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 {
 	u64 addr, scratch_pte;
 
-	scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+	scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, XE_CACHE_WB);
 
 	printk("%sGlobal GTT:", prefix);
 	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
@@ -301,7 +328,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	u64 offset, pte;
 
 	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
-		pte = xe_ggtt_pte_encode(bo, offset);
+		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, XE_CACHE_WB);
 		xe_ggtt_set_pte(ggtt, start + offset, pte);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 8e7360926bea9..04bb26b0938e2 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -10,7 +10,6 @@
 
 struct drm_printer;
 
-u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset);
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
 void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
 int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt);
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index d34b3e7339452..486016ea5b67d 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -8,9 +8,16 @@
 
 #include <drm/drm_mm.h>
 
+#include "xe_pt_types.h"
+
 struct xe_bo;
 struct xe_gt;
 
+struct xe_ggtt_pt_ops {
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     enum xe_cache_level cache);
+};
+
 struct xe_ggtt {
 	struct xe_tile *tile;
 
@@ -25,6 +32,8 @@ struct xe_ggtt {
 
 	u64 __iomem *gsm;
 
+	const struct xe_ggtt_pt_ops *pt_ops;
+
 	struct drm_mm mm;
 };
 
-- 
GitLab


From 5349bb76d62048e73f6e4a863b40a309c62dc47f Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Thu, 28 Sep 2023 16:56:21 +0300
Subject: [PATCH 1921/2340] drm/xe: do not register to PM if GuC is disabled

When working without GuC (i.e. working with execlists), the flow
attempts to perform suspend operation which is failing due to a
lack of support without GuC.

If PM ops are not supported without GuC we may as well avoid PM
registration rather than returning errors from various PM flows.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 4 ----
 drivers/gpu/drm/xe/xe_pm.c | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 1aa44d4f9ac1c..68cd9a7ee0872 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -641,10 +641,6 @@ int xe_gt_suspend(struct xe_gt *gt)
 {
 	int err;
 
-	/* For now suspend/resume is only allowed with GuC */
-	if (!xe_device_uc_enabled(gt_to_xe(gt)))
-		return -ENODEV;
-
 	xe_gt_sanitize(gt);
 
 	xe_device_mem_access_get(gt_to_xe(gt));
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 2c2745f862237..93a7658da324c 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -156,6 +156,10 @@ void xe_pm_init(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
+	/* For now suspend/resume is only allowed with GuC */
+	if (!xe_device_uc_enabled(xe))
+		return;
+
 	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
 
 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
-- 
GitLab


From 28b1d9155c3c1651a6e184e1286cebb63ec6b51c Mon Sep 17 00:00:00 2001
From: Bommithi Sakeena <bommithi.sakeena@intel.com>
Date: Wed, 27 Sep 2023 16:50:11 +0000
Subject: [PATCH 1922/2340] drm/xe: Ensure mutex are destroyed

Add missing mutex_destroy calls to fini functions or convert to
drmm_mutex_init where fini function is not available.

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Bommithi Sakeena <bommithi.sakeena@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c     | 2 +-
 drivers/gpu/drm/xe/xe_guc_pc.c     | 1 +
 drivers/gpu/drm/xe/xe_guc_submit.c | 1 +
 drivers/gpu/drm/xe/xe_pcode.c      | 4 +++-
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 2046bd269bbdd..8b686c8b33390 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -137,7 +137,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 
 	xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
 
-	mutex_init(&ct->lock);
+	drmm_mutex_init(&xe->drm, &ct->lock);
 	spin_lock_init(&ct->fast_lock);
 	xa_init(&ct->fence_lookup);
 	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 8a4d299d6cb02..d9375d1d582ff 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -890,6 +890,7 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc)
 	XE_WARN_ON(xe_guc_pc_stop(pc));
 	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
 	xe_bo_unpin_map_no_vm(pc->bo);
+	mutex_destroy(&pc->freq_lock);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 3e136b60196ef..d0e60349fc5a1 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -195,6 +195,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
 	xa_destroy(&guc->submission_state.exec_queue_lookup);
 	ida_destroy(&guc->submission_state.guc_ids);
 	bitmap_free(guc->submission_state.guc_ids_bitmap);
+	mutex_destroy(&guc->submission_state.lock);
 }
 
 #define GUC_ID_MAX		65535
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 7f1bf2297f51e..4a240acf76252 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -8,6 +8,8 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 
+#include <drm/drm_managed.h>
+
 #include "xe_gt.h"
 #include "xe_mmio.h"
 #include "xe_pcode_api.h"
@@ -276,7 +278,7 @@ int xe_pcode_init(struct xe_gt *gt)
  */
 int xe_pcode_probe(struct xe_gt *gt)
 {
-	mutex_init(&gt->pcode.lock);
+	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
 
 	if (!IS_DGFX(gt_to_xe(gt)))
 		return 0;
-- 
GitLab


From 909faaa66c5ec0d789b6620127329f2b17b01602 Mon Sep 17 00:00:00 2001
From: Bommithi Sakeena <bommithi.sakeena@intel.com>
Date: Wed, 27 Sep 2023 16:50:12 +0000
Subject: [PATCH 1923/2340] drm/xe: Add a missing mutex_destroy to
 xe_ttm_vram_mgr

Ensure that the mutex is destroyed at fini function.

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Bommithi Sakeena <bommithi.sakeena@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 06a54c8bd46f1..285791eb4a796 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -328,6 +328,8 @@ static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
 	ttm_resource_manager_cleanup(&mgr->manager);
 
 	ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL);
+
+	mutex_destroy(&mgr->lock);
 }
 
 int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
-- 
GitLab


From 9be79251813d113f9157e92cd8b0eb8563253a09 Mon Sep 17 00:00:00 2001
From: Fei Yang <fei.yang@intel.com>
Date: Wed, 27 Sep 2023 21:43:35 -0700
Subject: [PATCH 1924/2340] drm/xe: set PTE_AE for all platforms supporting it

Atomic access is supported by PVC, and became a common feature for all
platforms starting from Xe2. To enable that XE_VMA_ATOMIC_PTE_BIT needs
to be set, then pte encode will eventually set PTE_AE for devmem.

Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230928044335.1474903-2-fei.yang@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 6ab115df9c4e4..2bb0a1669a961 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -901,7 +901,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 			vma->tile_mask |= 0x1 << id;
 	}
 
-	if (vm->xe->info.platform == XE_PVC)
+	if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
 
 	if (bo) {
-- 
GitLab


From e2e2d9633706f79e6efaa826cf72cbc12cf531f8 Mon Sep 17 00:00:00 2001
From: Fei Yang <fei.yang@intel.com>
Date: Thu, 21 Sep 2023 15:05:00 -0700
Subject: [PATCH 1925/2340] drm/xe: timeout needs to be a signed value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In xe_wait_user_fence_ioctl, the timeout is currently defined as
unsigned long. That could potentially pass a negative value to
the schedule_timeout() call because nsecs_to_jiffies() returns an
unsigned long which gets used as signed long.

[ 187.732238] schedule_timeout: wrong timeout value fffffffffffffc18
[ 187.733180] CPU: 0 PID: 792 Comm: test_thread_dim Tainted: G U 6.4.0-xe #1
[ 187.734251] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[ 187.735019] Call Trace:
[ 187.735373] <TASK>
[ 187.735687] dump_stack_lvl+0x92/0xb0
[ 187.736193] schedule_timeout+0x348/0x430
[ 187.736739] ? __might_fault+0x67/0xd0
[ 187.737255] ? check_chain_key+0x224/0x2d0
[ 187.737812] ? __pfx_schedule_timeout+0x10/0x10
[ 187.738429] ? __might_fault+0x6b/0xd0
[ 187.738946] ? __pfx_lock_release+0x10/0x10
[ 187.739512] ? __pfx_lock_release+0x10/0x10
[ 187.740080] wait_woken+0x86/0x100
[ 187.740556] xe_wait_user_fence_ioctl+0x34b/0xe00 [xe]
[ 187.741281] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe]
[ 187.742075] ? lock_acquire+0x169/0x3d0
[ 187.742601] ? check_chain_key+0x224/0x2d0
[ 187.743158] ? drm_dev_enter+0x9/0xe0 [drm]
[ 187.743740] ? __pfx_woken_wake_function+0x10/0x10
[ 187.744388] ? drm_dev_exit+0x11/0x50 [drm]
[ 187.744969] ? __pfx_lock_release+0x10/0x10
[ 187.745536] ? __might_fault+0x67/0xd0
[ 187.746052] ? check_chain_key+0x224/0x2d0
[ 187.746610] drm_ioctl_kernel+0x172/0x250 [drm]
[ 187.747242] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe]
[ 187.748037] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
[ 187.748729] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe]
[ 187.749524] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe]
[ 187.750319] drm_ioctl+0x35e/0x620 [drm]
[ 187.750871] ? __pfx_drm_ioctl+0x10/0x10 [drm]
[ 187.751495] ? restore_fpregs_from_fpstate+0x99/0x140
[ 187.752172] ? __pfx_restore_fpregs_from_fpstate+0x10/0x10
[ 187.752901] ? mark_held_locks+0x24/0x90
[ 187.753438] __x64_sys_ioctl+0xb4/0xf0
[ 187.753954] do_syscall_64+0x3f/0x90
[ 187.754450] entry_SYSCALL_64_after_hwframe+0x72/0xdc
[ 187.755127] RIP: 0033:0x7f4e6651aaff
[ 187.755623] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00
[ 187.757995] RSP: 002b:00007fff05f37a50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 187.758995] RAX: ffffffffffffffda RBX: 000055eca47c8130 RCX: 00007f4e6651aaff
[ 187.759935] RDX: 00007fff05f37b60 RSI: 00000000c050644b RDI: 0000000000000004
[ 187.760874] RBP: 0000000000000017 R08: 0000000000000017 R09: 7fffffffffffffff
[ 187.761814] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[ 187.762753] R13: 0000000000000000 R14: 0000000000000000 R15: 00007f4e65d19ce0
[ 187.763694] </TASK>

Fixes: 5572a0046857 ("drm/xe: Use nanoseconds instead of jiffies in uapi for user fence")
Signed-off-by: Fei Yang <fei.yang@intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20230921220500.994558-2-fei.yang@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 55 +++++++++++++++++--------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 761eed3a022f7..3ac4cd24d5b4b 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -85,17 +85,45 @@ static int check_hw_engines(struct xe_device *xe,
 			 DRM_XE_UFENCE_WAIT_VM_ERROR)
 #define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
 
-static unsigned long to_jiffies_timeout(struct drm_xe_wait_user_fence *args)
+static long to_jiffies_timeout(struct xe_device *xe,
+			       struct drm_xe_wait_user_fence *args)
 {
-	unsigned long timeout;
+	unsigned long long t;
+	long timeout;
 
-	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
-		return drm_timeout_abs_to_jiffies(args->timeout);
+	/*
+	 * For negative timeout we want to wait "forever" by setting
+	 * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also
+	 * to args->timeout to avoid being zeroed on the signal delivery
+	 * (see arithmetics after wait).
+	 */
+	if (args->timeout < 0) {
+		args->timeout = MAX_SCHEDULE_TIMEOUT;
+		return MAX_SCHEDULE_TIMEOUT;
+	}
 
-	if (args->timeout == MAX_SCHEDULE_TIMEOUT || args->timeout == 0)
-		return args->timeout;
+	if (args->timeout == 0)
+		return 0;
 
-	timeout = nsecs_to_jiffies(args->timeout);
+	/*
+	 * Save the timeout to an u64 variable because nsecs_to_jiffies
+	 * might return a value that overflows s32 variable.
+	 */
+	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
+		t = drm_timeout_abs_to_jiffies(args->timeout);
+	else
+		t = nsecs_to_jiffies(args->timeout);
+
+	/*
+	 * Anything greater then MAX_SCHEDULE_TIMEOUT is meaningless,
+	 * also we don't want to cap it at MAX_SCHEDULE_TIMEOUT because
+	 * apparently user doesn't mean to wait forever, otherwise the
+	 * args->timeout should have been set to a negative value.
+	 */
+	if (t > MAX_SCHEDULE_TIMEOUT)
+		timeout = MAX_SCHEDULE_TIMEOUT - 1;
+	else
+		timeout = t;
 
 	return timeout ?: 1;
 }
@@ -114,7 +142,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	int err;
 	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
 		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
-	unsigned long timeout;
+	long timeout;
 	ktime_t start;
 
 	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
@@ -169,16 +197,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 		addr = vm->async_ops.error_capture.addr;
 	}
 
-	/*
-	 * For negative timeout we want to wait "forever" by setting
-	 * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also
-	 * to args->timeout to avoid being zeroed on the signal delivery
-	 * (see arithmetics after wait).
-	 */
-	if (args->timeout < 0)
-		args->timeout = MAX_SCHEDULE_TIMEOUT;
-
-	timeout = to_jiffies_timeout(args);
+	timeout = to_jiffies_timeout(xe, args);
 
 	start = ktime_get();
 
-- 
GitLab


From c489925a154e164a46e4d1f9c62da3332e496edd Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 29 Sep 2023 16:03:33 -0700
Subject: [PATCH 1926/2340] drm/xe/tuning: Add missing engine class rules for
 LRC tuning

The LRC tuning settings we have today are modifying registers that are
part of the RCS engine's context; they're not part of the general CSFE
context that would apply to all engines.  Add ENGINE_CLASS(RENDER) to
the RTP rules to properly restrict these to the RCS.

Bspec: 46255, 46261
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20230929230332.3348841-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_tuning.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 412e59de98424..08174dda9bc73 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -29,7 +29,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 
 static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
 	  /* read verification is ignored due to 1608008084. */
 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2,
 						FF_MODE2_GS_TIMER_MASK,
@@ -39,19 +39,19 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	/* DG2 */
 
 	{ XE_RTP_NAME("Tuning: L3 cache"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
 	},
 	{ XE_RTP_NAME("Tuning: TDS gang timer"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  /* read verification is ignored as in i915 - need to check enabling */
 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
 						FF_MODE2_TDS_TIMER_MASK,
 						FF_MODE2_TDS_TIMER_128))
 	},
 	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
 	},
 	{}
-- 
GitLab


From 0dcac63649e37e176224f11f69a3c85653d0d887 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 28 Sep 2023 21:49:59 -0700
Subject: [PATCH 1927/2340] drm/xe/xe2: Extend reserved stolen sizes

For xe2, besides the previous sizes, the reserved portion of stolen can
also have 16MB and 32MB.

Bspec: 53148
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929044959.3149265-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h      |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 32 ++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index d62555757d0fa..1574d11d4e143 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -82,7 +82,7 @@
 #define GSMBASE					XE_REG(0x108100)
 
 #define STOLEN_RESERVED				XE_REG(0x1082c0)
-#define   WOPCM_SIZE_MASK			REG_GENMASK64(8, 7)
+#define   WOPCM_SIZE_MASK			REG_GENMASK64(9, 7)
 
 #define MTL_RP_STATE_CAP			XE_REG(0x138000)
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 6ba6b1b7f34b6..79fbd74a39446 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -86,6 +86,29 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	return ALIGN_DOWN(stolen_size, SZ_1M);
 }
 
+static u32 get_wopcm_size(struct xe_device *xe)
+{
+	u32 wopcm_size;
+	u64 val;
+
+	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
+	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
+
+	switch (val) {
+	case 0x5 ... 0x6:
+		val--;
+		fallthrough;
+	case 0x0 ... 0x3:
+		wopcm_size = (1U << val) * SZ_1M;
+		break;
+	default:
+		WARN(1, "Missing case wopcm_size=%llx\n", val);
+		wopcm_size = 0;
+	}
+
+	return wopcm_size;
+}
+
 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -125,10 +148,11 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 	}
 
 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
-	wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK,
-				     xe_mmio_read64_2x32(xe_root_mmio_gt(xe),
-							 STOLEN_RESERVED));
-	stolen_size -= (1U << wopcm_size) * SZ_1M;
+	wopcm_size = get_wopcm_size(xe);
+	if (drm_WARN_ON(&xe->drm, !wopcm_size))
+		return 0;
+
+	stolen_size -= wopcm_size;
 
 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
 		return 0;
-- 
GitLab


From 2c0ac321d9975d670541eb3da19064f67b3f995b Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Fri, 29 Sep 2023 14:36:37 -0700
Subject: [PATCH 1928/2340] drm/xe: Adjust mocs field mask definitions

Instead of using xe_mocs_index_to_value(), simply define the bitmask
with the shift left applied. This will make it easier to adapt to new
platforms that simply use the index.

This also fixes PVC bug in emit_clear_link_copy() where the MOCS was
getting shifted both by PVC_MS_MOCS_INDEX_MASK definition and by the
xe_moc_index_to_value function.

Bspec: 44509
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929213640.3189912-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  4 ++--
 drivers/gpu/drm/xe/xe_migrate.c           |  6 +++---
 drivers/gpu/drm/xe/xe_mocs.h              | 12 ------------
 3 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 12120dd37aa2a..1ee8d46d92d95 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -44,7 +44,7 @@
 #define   DST_ACCESS_TYPE_SHIFT		20
 #define   CCS_SIZE_MASK			0x3FF
 #define   CCS_SIZE_SHIFT		8
-#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 25)
+#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
 #define   NUM_CCS_BYTES_PER_BLOCK	256
 #define   NUM_BYTES_PER_CCS_BYTE	256
 #define   NUM_CCS_BLKS_PER_XFER		1024
@@ -52,7 +52,7 @@
 #define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
 #define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
 #define   XY_FAST_COLOR_BLT_DW		16
-#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 
 #define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 77a2468ca85c5..52c3a040c606a 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -519,7 +519,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 {
 	u32 *cs = bb->cs + bb->len;
 	u32 num_ccs_blks;
-	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+	u32 mocs = gt->mocs.uc_index;
 
 	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
 				    NUM_CCS_BYTES_PER_BLOCK);
@@ -806,7 +806,7 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
 				 u32 size, u32 pitch)
 {
 	u32 *cs = bb->cs + bb->len;
-	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+	u32 mocs = gt->mocs.uc_index;
 	u32 len = PVC_MEM_SET_CMD_LEN_DW;
 
 	*cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
@@ -828,7 +828,7 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
 	u32 len = XY_FAST_COLOR_BLT_DW;
-	u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+	u32 mocs = gt->mocs.uc_index;
 
 	if (GRAPHICS_VERx100(xe) < 1250)
 		len = 11;
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index d0f1ec4b0336a..053754c5a94ee 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -14,16 +14,4 @@ struct xe_gt;
 void xe_mocs_init_early(struct xe_gt *gt);
 void xe_mocs_init(struct xe_gt *gt);
 
-/**
- * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by
- * most blitter commands.
- * @mocs_index: index into the mocs tables
- *
- * Return: The corresponding mocs value to be programmed.
- */
-static inline u32 xe_mocs_index_to_value(u32 mocs_index)
-{
-	return mocs_index << 1;
-}
-
 #endif
-- 
GitLab


From c690f0e6b7e61826535eb91a28bf99197345faf2 Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Fri, 29 Sep 2023 14:36:38 -0700
Subject: [PATCH 1929/2340] drm/xe: Rename MEM_SET instruction

PVC_MS_* doesn't reflect the real name of the instruction. Rename
it to follow the name used in the bspec.

Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929213640.3189912-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 6 +++---
 drivers/gpu/drm/xe/xe_migrate.c           | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 1ee8d46d92d95..1fdf2e4f1c9fe 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -60,10 +60,10 @@
 
 #define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
 #define   PVC_MEM_SET_CMD_LEN_DW	7
-#define   PVC_MS_MATRIX			REG_BIT(17)
-#define   PVC_MS_DATA_FIELD		GENMASK(31, 24)
+#define   PVC_MEM_SET_MATRIX		REG_BIT(17)
+#define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
 /* Bspec lists field as [6:0], but index alone is from [6:1] */
-#define   PVC_MS_MOCS_INDEX_MASK	GENMASK(6, 1)
+#define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
 
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 52c3a040c606a..313e3c0a6e90b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -809,13 +809,13 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
 	u32 mocs = gt->mocs.uc_index;
 	u32 len = PVC_MEM_SET_CMD_LEN_DW;
 
-	*cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
+	*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
 	*cs++ = pitch - 1;
 	*cs++ = (size / pitch) - 1;
 	*cs++ = pitch - 1;
 	*cs++ = lower_32_bits(src_ofs);
 	*cs++ = upper_32_bits(src_ofs);
-	*cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
+	*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs);
 
 	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
 
-- 
GitLab


From 4bdd8c2ed9572b757521e981cfb35a3581c112c8 Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Fri, 29 Sep 2023 14:36:39 -0700
Subject: [PATCH 1930/2340] drm/xe/xe2: Set tile y type in XY_FAST_COPY_BLT to
 Tile4

Set bits 30 and 31 of XY_FAST_COPY_BLT's dword1 for XeHP and above.

Destination or source being Y-Major is selected on dword0 and there's
nothing to set on dword1. According to the bspec for Xe2,
"Behavior is undefined when programmed the value 0". Also for XeHP,
the only value allowed in those bits is 0b11, not being possible to
select "Legacy Tile-Y" anymore, only the newer Tile4.

So, unconditionally set those bits for graphics IP 12.50 and above.

v2: Reword commit message and extend it to graphics version >= 12.50
    (Matt Roper)

Bspec: 57567
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929213640.3189912-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 2 ++
 drivers/gpu/drm/xe/xe_migrate.c           | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 1fdf2e4f1c9fe..cc7b56763f100 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -57,6 +57,8 @@
 
 #define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
 #define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
+#define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
+#define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
 
 #define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
 #define   PVC_MEM_SET_CMD_LEN_DW	7
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 313e3c0a6e90b..69488a0fada47 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -543,12 +543,19 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 		      u64 src_ofs, u64 dst_ofs, unsigned int size,
 		      unsigned int pitch)
 {
+	struct xe_device *xe = gt_to_xe(gt);
+
 	xe_gt_assert(gt, size / pitch <= S16_MAX);
 	xe_gt_assert(gt, pitch / 4 <= S16_MAX);
 	xe_gt_assert(gt, pitch <= U16_MAX);
 
 	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
-	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch |
+				    XY_FAST_COPY_BLT_D1_SRC_TILE4 |
+				    XY_FAST_COPY_BLT_D1_DST_TILE4;
+	else
+		bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
 	bb->cs[bb->len++] = 0;
 	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
 	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
-- 
GitLab


From 30603b5b0f8678fff799f4e3e2b45b8c08648575 Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Fri, 29 Sep 2023 14:36:40 -0700
Subject: [PATCH 1931/2340] drm/xe/xe2: Update MOCS fields in blitter
 instructions

Xe2 changes or adds bits for mocs in a few BLT instructions:
XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET.
Modify the code to deal with the new location. Unlike Xe1, the MOCS
field in those instructions is only the MOCS index and not the
Structure_MEMORY_OBJECT_CONTROL_STATE anymore. The pxp bit is now
explicitly documented separately.

Bspec: 57567,57566,57565,57562
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929213640.3189912-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  4 ++
 drivers/gpu/drm/xe/xe_migrate.c           | 48 +++++++++++++++--------
 2 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index cc7b56763f100..21738281bdd04 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -45,6 +45,7 @@
 #define   CCS_SIZE_MASK			0x3FF
 #define   CCS_SIZE_SHIFT		8
 #define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
+#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
 #define   NUM_CCS_BYTES_PER_BLOCK	256
 #define   NUM_BYTES_PER_CCS_BYTE	256
 #define   NUM_CCS_BLKS_PER_XFER		1024
@@ -53,12 +54,14 @@
 #define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
 #define   XY_FAST_COLOR_BLT_DW		16
 #define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
+#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK	GENMASK(27, 24)
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 
 #define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
 #define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
 #define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
 #define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
+#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
 
 #define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
 #define   PVC_MEM_SET_CMD_LEN_DW	7
@@ -66,6 +69,7 @@
 #define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
 /* Bspec lists field as [6:0], but index alone is from [6:1] */
 #define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
+#define   XE2_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 3)
 
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 69488a0fada47..4b7210c793f5f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 			  u64 src_ofs, bool src_is_indirect,
 			  u32 size)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
 	u32 num_ccs_blks;
-	u32 mocs = gt->mocs.uc_index;
+	u32 mocs;
 
 	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
 				    NUM_CCS_BYTES_PER_BLOCK);
 	xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
+
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
+	else
+		mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
+
 	*cs++ = XY_CTRL_SURF_COPY_BLT |
 		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
 		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
 		((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
 	*cs++ = lower_32_bits(src_ofs);
-	*cs++ = upper_32_bits(src_ofs) |
-		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+	*cs++ = upper_32_bits(src_ofs) | mocs;
 	*cs++ = lower_32_bits(dst_ofs);
-	*cs++ = upper_32_bits(dst_ofs) |
-		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+	*cs++ = upper_32_bits(dst_ofs) | mocs;
 
 	bb->len = cs - bb->cs;
 }
@@ -544,24 +549,27 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
 		      unsigned int pitch)
 {
 	struct xe_device *xe = gt_to_xe(gt);
+	u32 mocs = 0;
+	u32 tile_y = 0;
 
 	xe_gt_assert(gt, size / pitch <= S16_MAX);
 	xe_gt_assert(gt, pitch / 4 <= S16_MAX);
 	xe_gt_assert(gt, pitch <= U16_MAX);
 
-	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+	if (GRAPHICS_VER(xe) >= 20)
+		mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
 	if (GRAPHICS_VERx100(xe) >= 1250)
-		bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch |
-				    XY_FAST_COPY_BLT_D1_SRC_TILE4 |
-				    XY_FAST_COPY_BLT_D1_DST_TILE4;
-	else
-		bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
+		tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
+
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
 	bb->cs[bb->len++] = 0;
 	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
 	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
 	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
 	bb->cs[bb->len++] = 0;
-	bb->cs[bb->len++] = pitch;
+	bb->cs[bb->len++] = pitch | mocs;
 	bb->cs[bb->len++] = lower_32_bits(src_ofs);
 	bb->cs[bb->len++] = upper_32_bits(src_ofs);
 }
@@ -812,8 +820,8 @@ err_sync:
 static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
 				 u32 size, u32 pitch)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
-	u32 mocs = gt->mocs.uc_index;
 	u32 len = PVC_MEM_SET_CMD_LEN_DW;
 
 	*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
@@ -822,7 +830,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
 	*cs++ = pitch - 1;
 	*cs++ = lower_32_bits(src_ofs);
 	*cs++ = upper_32_bits(src_ofs);
-	*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs);
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		*cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
+	else
+		*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
 
 	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
 
@@ -835,15 +846,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
 	u32 len = XY_FAST_COLOR_BLT_DW;
-	u32 mocs = gt->mocs.uc_index;
 
 	if (GRAPHICS_VERx100(xe) < 1250)
 		len = 11;
 
 	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
 		(len - 2);
-	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
-		(pitch - 1);
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		*cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+			(pitch - 1);
+	else
+		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
+			(pitch - 1);
 	*cs++ = 0;
 	*cs++ = (size / pitch) << 16 | pitch / 4;
 	*cs++ = lower_32_bits(src_ofs);
-- 
GitLab


From 328e089bfb376a9817a260542fbea0fe9e0975ac Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Thu, 28 Sep 2023 22:15:39 -0700
Subject: [PATCH 1932/2340] drm/xe: Leverage ComputeCS read L3 caching

On platforms that support read L3 caching, set the default mocs index in
CCS RING_CMD_CTL to leverage the read caching in L3.

Currently PVC and Xe2 platforms have the support.

Bspec: 72161
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929051539.3157441-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index a8681089fb60a..49128f640e15d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -316,22 +316,25 @@ static void
 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 {
 	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	/*
+	 * RING_CMD_CCTL specifies the default MOCS entry that will be
+	 * used by the command streamer when executing commands that
+	 * don't have a way to explicitly specify a MOCS setting.
+	 * The default should usually reference whichever MOCS entry
+	 * corresponds to uncached behavior, although use of a WB cached
+	 * entry is recommended by the spec in certain circumstances on
+	 * specific platforms.
+	 * Bspec: 72161
+	 */
 	const u8 mocs_write_idx = gt->mocs.uc_index;
-	/* TODO: missing handling of HAS_L3_CCS_READ platforms */
-	const u8 mocs_read_idx = gt->mocs.uc_index;
+	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
+				 gt->mocs.wb_index : gt->mocs.uc_index;
 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 	const struct xe_rtp_entry_sr engine_entries[] = {
-		/*
-		 * RING_CMD_CCTL specifies the default MOCS entry that will be
-		 * used by the command streamer when executing commands that
-		 * don't have a way to explicitly specify a MOCS setting.
-		 * The default should usually reference whichever MOCS entry
-		 * corresponds to uncached behavior, although use of a WB cached
-		 * entry is recommended by the spec in certain circumstances on
-		 * specific platforms.
-		 */
 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
-- 
GitLab


From 9a674bef6cf0ad2e7653381cacda9fbc9c1ea67e Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 29 Sep 2023 13:02:54 -0700
Subject: [PATCH 1933/2340] drm/xe: Fix exec queue usage for unbinds

Passing in a NULL exec queue to __xe_pt_unbind_vma results in the
migrate exec queue being used. This is not the intent from the VM bind
IOCTL, rather a NULL exec queue should use default VM exec queue.

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2bb0a1669a961..42a5d912e775d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1734,7 +1734,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 		if (!(vma->tile_present & BIT(id)))
 			goto next;
 
-		fence = __xe_pt_unbind_vma(tile, vma, q, first_op ? syncs : NULL,
+		fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
+					   first_op ? syncs : NULL,
 					   first_op ? num_syncs : 0);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
-- 
GitLab


From fb1b70607f73af5e5c9d02af203197191ab7abae Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 25 Sep 2023 13:48:38 +0530
Subject: [PATCH 1934/2340] drm/xe/hwmon: Expose power attributes

Expose Card reactive sustained (pl1) power limit as power_max and
card default power limit (tdp) as power_rated_max.

v2:
  - Fix review comments (Riana)
v3:
  - Use drmm_mutex_init (Matt Brost)
  - Print error value (Matt Brost)
  - Convert enums to uppercase (Matt Brost)
  - Avoid extra reg read in hwmon_is_visible function (Riana)
  - Use xe_device_assert_mem_access when applicable (Matt Brost)
  - Add intel-xe@lists.freedesktop.org in Documentation (Matt Brost)
v4:
  - Use prefix xe_hwmon prefix for all functions (Matt Brost/Andi)
  - %s/hwmon_reg/xe_hwmon_reg (Andi)
  - Fix review comments (Guenter/Andi)
v5:
  - Fix review comments (Riana)
v6:
  - Use drm_warn in default case (Rodrigo)
  - s/ENODEV/EOPNOTSUPP (Andi)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20230925081842.3566834-2-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../ABI/testing/sysfs-driver-intel-xe-hwmon   |  22 ++
 drivers/gpu/drm/xe/Makefile                   |   3 +
 drivers/gpu/drm/xe/regs/xe_gt_regs.h          |   4 +
 drivers/gpu/drm/xe/regs/xe_mchbar_regs.h      |  33 ++
 drivers/gpu/drm/xe/xe_device.c                |   3 +
 drivers/gpu/drm/xe/xe_device_types.h          |   3 +
 drivers/gpu/drm/xe/xe_hwmon.c                 | 358 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_hwmon.h                 |  19 +
 8 files changed, 445 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
 create mode 100644 drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
 create mode 100644 drivers/gpu/drm/xe/xe_hwmon.c
 create mode 100644 drivers/gpu/drm/xe/xe_hwmon.h

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
new file mode 100644
index 0000000000000..da0197a29fe44
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -0,0 +1,22 @@
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Card reactive sustained  (PL1) power limit in microwatts.
+
+		The power controller will throttle the operating frequency
+		if the power averaged over a window (typically seconds)
+		exceeds this limit. A read value of 0 means that the PL1
+		power limit is disabled, writing 0 disables the
+		limit. Writing values > 0 and <= TDP will enable the power limit.
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RO. Card default power limit (default TDP setting).
+
+		Only supported for particular Intel xe graphics platforms.
+
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 9be0848ea370a..a0e7896a4ef7c 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -122,6 +122,9 @@ xe-y += xe_bb.o \
 	xe_wa.o \
 	xe_wopcm.o
 
+# graphics hardware monitoring (HWMON) support
+xe-$(CONFIG_HWMON) += xe_hwmon.o
+
 obj-$(CONFIG_DRM_XE) += xe.o
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 3a4c9bcf793ff..b32b8132f8bf2 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -411,4 +411,8 @@
 #define XEHPC_BCS5_BCS6_INTR_MASK		XE_REG(0x190118)
 #define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
 
+#define PVC_GT0_PACKAGE_RAPL_LIMIT		XE_REG(0x281008)
+#define PVC_GT0_PACKAGE_POWER_SKU_UNIT		XE_REG(0x281068)
+#define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
+
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
new file mode 100644
index 0000000000000..27f1d42baf6d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MCHBAR_REGS_H_
+#define _XE_MCHBAR_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * MCHBAR mirror.
+ *
+ * This mirrors the MCHBAR MMIO space whose location is determined by
+ * device 0 function 0's pci config register 0x44 or 0x48 and matches it in
+ * every way.
+ */
+
+#define MCHBAR_MIRROR_BASE_SNB			0x140000
+
+#define PCU_CR_PACKAGE_POWER_SKU		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5930)
+#define   PKG_TDP				GENMASK_ULL(14, 0)
+#define   PKG_MIN_PWR				GENMASK_ULL(30, 16)
+#define   PKG_MAX_PWR				GENMASK_ULL(46, 32)
+
+#define PCU_CR_PACKAGE_POWER_SKU_UNIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938)
+#define   PKG_PWR_UNIT				REG_GENMASK(3, 0)
+
+#define PCU_CR_PACKAGE_RAPL_LIMIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
+#define   PKG_PWR_LIM_1				REG_GENMASK(14, 0)
+#define   PKG_PWR_LIM_1_EN			REG_BIT(15)
+
+#endif /* _XE_MCHBAR_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 113fca462141f..283fc5990000e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -35,6 +35,7 @@
 #include "xe_vm.h"
 #include "xe_vm_madvise.h"
 #include "xe_wait_user_fence.h"
+#include "xe_hwmon.h"
 
 #ifdef CONFIG_LOCKDEP
 struct lockdep_map xe_device_mem_access_lockdep_map = {
@@ -328,6 +329,8 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_pmu_register(&xe->pmu);
 
+	xe_hwmon_register(xe);
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 1ee8410ec3b12..dd52d112d58fa 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -359,6 +359,9 @@ struct xe_device {
 	/** @pmu: performance monitoring unit */
 	struct xe_pmu pmu;
 
+	/** @hwmon: hwmon subsystem integration */
+	struct xe_hwmon *hwmon;
+
 	/* For pcode */
 	struct mutex sb_lock;
 
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
new file mode 100644
index 0000000000000..997ffe0d0451d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/hwmon.h>
+
+#include <drm/drm_managed.h>
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_mchbar_regs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hwmon.h"
+#include "xe_mmio.h"
+
+enum xe_hwmon_reg {
+	REG_PKG_RAPL_LIMIT,
+	REG_PKG_POWER_SKU,
+	REG_PKG_POWER_SKU_UNIT,
+};
+
+enum xe_hwmon_reg_operation {
+	REG_READ,
+	REG_WRITE,
+	REG_RMW,
+};
+
+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ */
+#define SF_POWER	1000000		/* microwatts */
+
+struct xe_hwmon {
+	struct device *hwmon_dev;
+	struct xe_gt *gt;
+	struct mutex hwmon_lock; /* rmw operations*/
+	int scl_shift_power;
+};
+
+static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
+{
+	struct xe_device *xe = gt_to_xe(hwmon->gt);
+	struct xe_reg reg = XE_REG(0);
+
+	switch (hwmon_reg) {
+	case REG_PKG_RAPL_LIMIT:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_RAPL_LIMIT;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
+		break;
+	case REG_PKG_POWER_SKU:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_POWER_SKU;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_POWER_SKU;
+		break;
+	case REG_PKG_POWER_SKU_UNIT:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+		break;
+	default:
+		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
+		break;
+	}
+
+	return reg.raw;
+}
+
+static int xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
+				enum xe_hwmon_reg_operation operation, u32 *value,
+				u32 clr, u32 set)
+{
+	struct xe_reg reg;
+
+	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
+
+	if (!reg.raw)
+		return -EOPNOTSUPP;
+
+	switch (operation) {
+	case REG_READ:
+		*value = xe_mmio_read32(hwmon->gt, reg);
+		return 0;
+	case REG_WRITE:
+		xe_mmio_write32(hwmon->gt, reg, *value);
+		return 0;
+	case REG_RMW:
+		*value = xe_mmio_rmw32(hwmon->gt, reg, clr, set);
+		return 0;
+	default:
+		drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n",
+			 operation);
+		return -EOPNOTSUPP;
+	}
+}
+
+static int xe_hwmon_process_reg_read64(struct xe_hwmon *hwmon,
+				       enum xe_hwmon_reg hwmon_reg, u64 *value)
+{
+	struct xe_reg reg;
+
+	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
+
+	if (!reg.raw)
+		return -EOPNOTSUPP;
+
+	*value = xe_mmio_read64_2x32(hwmon->gt, reg);
+
+	return 0;
+}
+
+#define PL1_DISABLE 0
+
+/*
+ * HW allows arbitrary PL1 limits to be set but silently clamps these values to
+ * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the
+ * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
+ * clamped values when read.
+ */
+static int xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
+{
+	u32 reg_val;
+	u64 reg_val64, min, max;
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, &reg_val, 0, 0);
+	/* Check if PL1 limit is disabled */
+	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
+		*value = PL1_DISABLE;
+		return 0;
+	}
+
+	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+
+	xe_hwmon_process_reg_read64(hwmon, REG_PKG_POWER_SKU, &reg_val64);
+	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val64);
+	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val64);
+	max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+
+	if (min && max)
+		*value = clamp_t(u64, *value, min, max);
+
+	return 0;
+}
+
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
+{
+	u32 reg_val;
+
+	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
+	if (value == PL1_DISABLE) {
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, &reg_val,
+				     PKG_PWR_LIM_1_EN, 0);
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, &reg_val,
+				     PKG_PWR_LIM_1_EN, 0);
+
+		if (reg_val & PKG_PWR_LIM_1_EN)
+			return -EOPNOTSUPP;
+	}
+
+	/* Computation in 64-bits to avoid overflow. Round to nearest. */
+	reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
+	reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, &reg_val,
+			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+
+	return 0;
+}
+
+static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
+{
+	u32 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ, &reg_val, 0, 0);
+	reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+
+	return 0;
+}
+
+static const struct hwmon_channel_info *hwmon_info[] = {
+	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX),
+	NULL
+};
+
+static umode_t
+xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0;
+	case hwmon_power_rated_max:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_power_max_read(hwmon, val);
+	case hwmon_power_rated_max:
+		return xe_hwmon_power_rated_max_read(hwmon, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_power_max_write(hwmon, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+		    u32 attr, int channel)
+{
+	struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata;
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_is_visible(hwmon, attr, channel);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static int
+xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	      int channel, long *val)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_read(hwmon, attr, channel, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static int
+xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	       int channel, long val)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_write(hwmon, attr, channel, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static const struct hwmon_ops hwmon_ops = {
+	.is_visible = xe_hwmon_is_visible,
+	.read = xe_hwmon_read,
+	.write = xe_hwmon_write,
+};
+
+static const struct hwmon_chip_info hwmon_chip_info = {
+	.ops = &hwmon_ops,
+	.info = hwmon_info,
+};
+
+static void
+xe_hwmon_get_preregistration_info(struct xe_device *xe)
+{
+	struct xe_hwmon *hwmon = xe->hwmon;
+	u32 val_sku_unit = 0;
+	int ret;
+
+	ret = xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, REG_READ, &val_sku_unit, 0, 0);
+	/*
+	 * The contents of register PKG_POWER_SKU_UNIT do not change,
+	 * so read it once and store the shift values.
+	 */
+	if (!ret)
+		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+}
+
+void xe_hwmon_register(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	struct xe_hwmon *hwmon;
+
+	/* hwmon is available only for dGfx */
+	if (!IS_DGFX(xe))
+		return;
+
+	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon)
+		return;
+
+	xe->hwmon = hwmon;
+
+	drmm_mutex_init(&xe->drm, &hwmon->hwmon_lock);
+
+	/* primary GT to access device level properties */
+	hwmon->gt = xe->tiles[0].primary_gt;
+
+	xe_hwmon_get_preregistration_info(xe);
+
+	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
+
+	/*  hwmon_dev points to device hwmon<i> */
+	hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon,
+								&hwmon_chip_info,
+								NULL);
+	if (IS_ERR(hwmon->hwmon_dev)) {
+		drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
+		xe->hwmon = NULL;
+		return;
+	}
+}
+
diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
new file mode 100644
index 0000000000000..c42a1de2cd7a2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_HWMON_H_
+#define _XE_HWMON_H_
+
+#include <linux/types.h>
+
+struct xe_device;
+
+#if IS_REACHABLE(CONFIG_HWMON)
+void xe_hwmon_register(struct xe_device *xe);
+#else
+static inline void xe_hwmon_register(struct xe_device *xe) { };
+#endif
+
+#endif /* _XE_HWMON_H_ */
-- 
GitLab


From 92d44a422d0d9e08ed9020cbf11915909e1f2ad3 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 25 Sep 2023 13:48:39 +0530
Subject: [PATCH 1935/2340] drm/xe/hwmon: Expose card reactive critical power

Expose the card reactive critical (I1) power. I1 is exposed as
power1_crit in microwatts (typically for client products) or as
curr1_crit in milliamperes (typically for server).

v2: Move PCODE_MBOX macro to pcode file (Riana)
v3: s/IS_DG2/(gt_to_xe(gt)->info.platform == XE_DG2)
v4: Fix review comments (Andi)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20230925081842.3566834-3-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../ABI/testing/sysfs-driver-intel-xe-hwmon   |  26 +++++
 drivers/gpu/drm/xe/xe_hwmon.c                 | 105 +++++++++++++++++-
 drivers/gpu/drm/xe/xe_pcode.h                 |   5 +
 drivers/gpu/drm/xe/xe_pcode_api.h             |   7 ++
 4 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index da0197a29fe44..37263b09b6e43 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -20,3 +20,29 @@ Description:	RO. Card default power limit (default TDP setting).
 
 		Only supported for particular Intel xe graphics platforms.
 
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_crit
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Card reactive critical (I1) power limit in microwatts.
+
+		Card reactive critical (I1) power limit in microwatts is exposed
+		for client products. The power controller will throttle the
+		operating frequency if the power averaged over a window exceeds
+		this limit.
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/curr1_crit
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Card reactive critical (I1) power limit in milliamperes.
+
+		Card reactive critical (I1) power limit in milliamperes is
+		exposed for server products. The power controller will throttle
+		the operating frequency if the power averaged over a window
+		exceeds this limit.
+
+		Only supported for particular Intel xe graphics platforms.
+
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 997ffe0d0451d..8dff0f90c5439 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -12,6 +12,8 @@
 #include "xe_gt.h"
 #include "xe_hwmon.h"
 #include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
 
 enum xe_hwmon_reg {
 	REG_PKG_RAPL_LIMIT,
@@ -29,6 +31,7 @@ enum xe_hwmon_reg_operation {
  * SF_* - scale factors for particular quantities according to hwmon spec.
  */
 #define SF_POWER	1000000		/* microwatts */
+#define SF_CURR		1000		/* milliamperes */
 
 struct xe_hwmon {
 	struct device *hwmon_dev;
@@ -184,18 +187,43 @@ static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
 }
 
 static const struct hwmon_channel_info *hwmon_info[] = {
-	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX),
+	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
+	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
 	NULL
 };
 
+/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
+static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval)
+{
+	/* Avoid Illegal Subcommand error */
+	if (gt_to_xe(gt)->info.platform == XE_DG2)
+		return -ENXIO;
+
+	return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP,
+			     POWER_SETUP_SUBCOMMAND_READ_I1, 0),
+			     uval, 0);
+}
+
+static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
+{
+	return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP,
+			      POWER_SETUP_SUBCOMMAND_WRITE_I1, 0),
+			      uval);
+}
+
 static umode_t
 xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
 {
+	u32 uval;
+
 	switch (attr) {
 	case hwmon_power_max:
 		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0;
 	case hwmon_power_rated_max:
 		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0;
+	case hwmon_power_crit:
+		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+			!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
 	default:
 		return 0;
 	}
@@ -204,11 +232,23 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
 static int
 xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
 {
+	int ret;
+	u32 uval;
+
 	switch (attr) {
 	case hwmon_power_max:
 		return xe_hwmon_power_max_read(hwmon, val);
 	case hwmon_power_rated_max:
 		return xe_hwmon_power_rated_max_read(hwmon, val);
+	case hwmon_power_crit:
+		ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
+		if (ret)
+			return ret;
+		if (!(uval & POWER_SETUP_I1_WATTS))
+			return -ENODEV;
+		*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+				       SF_POWER, POWER_SETUP_I1_SHIFT);
+		return 0;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -217,9 +257,63 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
 static int
 xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
 {
+	u32 uval;
+
 	switch (attr) {
 	case hwmon_power_max:
 		return xe_hwmon_power_max_write(hwmon, val);
+	case hwmon_power_crit:
+		uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER);
+		return xe_hwmon_pcode_write_i1(hwmon->gt, uval);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr)
+{
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_curr_crit:
+		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+			(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	int ret;
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_curr_crit:
+		ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
+		if (ret)
+			return ret;
+		if (uval & POWER_SETUP_I1_WATTS)
+			return -ENODEV;
+		*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+				       SF_CURR, POWER_SETUP_I1_SHIFT);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
+{
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_curr_crit:
+		uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR);
+		return xe_hwmon_pcode_write_i1(hwmon->gt, uval);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -238,6 +332,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
 	case hwmon_power:
 		ret = xe_hwmon_power_is_visible(hwmon, attr, channel);
 		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_is_visible(hwmon, attr);
+		break;
 	default:
 		ret = 0;
 		break;
@@ -261,6 +358,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 	case hwmon_power:
 		ret = xe_hwmon_power_read(hwmon, attr, channel, val);
 		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_read(hwmon, attr, val);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -284,6 +384,9 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 	case hwmon_power:
 		ret = xe_hwmon_power_write(hwmon, attr, channel, val);
 		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_write(hwmon, attr, val);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index 3b4aa8c1a3bad..08cb1d047cba2 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -22,4 +22,9 @@ int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val,
 int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 		     u32 reply_mask, u32 reply, int timeout_ms);
 
+#define PCODE_MBOX(mbcmd, param1, param2)\
+	(FIELD_PREP(PCODE_MB_COMMAND, mbcmd)\
+	| FIELD_PREP(PCODE_MB_PARAM1, param1)\
+	| FIELD_PREP(PCODE_MB_PARAM2, param2))
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 837ff7c71280e..5935cfe302044 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -35,6 +35,13 @@
 #define     DGFX_GET_INIT_STATUS	0x0
 #define     DGFX_INIT_STATUS_COMPLETE	0x1
 
+#define   PCODE_POWER_SETUP			0x7C
+#define     POWER_SETUP_SUBCOMMAND_READ_I1	0x4
+#define     POWER_SETUP_SUBCOMMAND_WRITE_I1	0x5
+#define	    POWER_SETUP_I1_WATTS		REG_BIT(31)
+#define	    POWER_SETUP_I1_SHIFT		6	/* 10.6 fixed point format */
+#define	    POWER_SETUP_I1_DATA_MASK		REG_GENMASK(15, 0)
+
 struct pcode_err_decode {
 	int errno;
 	const char *str;
-- 
GitLab


From fbcdc9d3bf586c459cc66ffe802b0d4ba92e8406 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 25 Sep 2023 13:48:40 +0530
Subject: [PATCH 1936/2340] drm/xe/hwmon: Expose input voltage attribute

Use Xe HWMON subsystem to display the input voltage.

v2:
  - Rename hwm_get_vltg to hwm_get_voltage (Riana)
  - Use scale factor SF_VOLTAGE (Riana)
v3:
  - %s/gt_perf_status/REG_GT_PERF_STATUS/
  - Remove platform check from hwmon_get_voltage()
v4:
  - Fix review comments (Andi)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20230925081842.3566834-4-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../ABI/testing/sysfs-driver-intel-xe-hwmon   |  6 ++
 drivers/gpu/drm/xe/regs/xe_gt_regs.h          |  3 +
 drivers/gpu/drm/xe/xe_hwmon.c                 | 58 +++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 37263b09b6e43..7f9407c208646 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -44,5 +44,11 @@ Description:	RW. Card reactive critical (I1) power limit in milliamperes.
 		the operating frequency if the power averaged over a window
 		exceeds this limit.
 
+What:		/sys/devices/.../hwmon/hwmon<i>/in0_input
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RO. Current Voltage in millivolt.
+
 		Only supported for particular Intel xe graphics platforms.
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index b32b8132f8bf2..a9a91195c6d56 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -375,6 +375,9 @@
 #define GT_GFX_RC6_LOCKED			XE_REG(0x138104)
 #define GT_GFX_RC6				XE_REG(0x138108)
 
+#define GT_PERF_STATUS				XE_REG(0x1381b4)
+#define   VOLTAGE_MASK				REG_GENMASK(10, 0)
+
 #define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
 
 #define GUC_SG_INTR_ENABLE			XE_REG(0x190038)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 8dff0f90c5439..d89345b85e76b 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -3,7 +3,9 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#include <linux/hwmon-sysfs.h>
 #include <linux/hwmon.h>
+#include <linux/types.h>
 
 #include <drm/drm_managed.h>
 #include "regs/xe_gt_regs.h"
@@ -19,6 +21,7 @@ enum xe_hwmon_reg {
 	REG_PKG_RAPL_LIMIT,
 	REG_PKG_POWER_SKU,
 	REG_PKG_POWER_SKU_UNIT,
+	REG_GT_PERF_STATUS,
 };
 
 enum xe_hwmon_reg_operation {
@@ -32,6 +35,7 @@ enum xe_hwmon_reg_operation {
  */
 #define SF_POWER	1000000		/* microwatts */
 #define SF_CURR		1000		/* milliamperes */
+#define SF_VOLTAGE	1000		/* millivolts */
 
 struct xe_hwmon {
 	struct device *hwmon_dev;
@@ -64,6 +68,10 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
 		else if (xe->info.platform == XE_PVC)
 			reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
 		break;
+	case REG_GT_PERF_STATUS:
+		if (xe->info.platform == XE_DG2)
+			reg = GT_PERF_STATUS;
+		break;
 	default:
 		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
 		break;
@@ -189,6 +197,7 @@ static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
 static const struct hwmon_channel_info *hwmon_info[] = {
 	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
 	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
+	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
 	NULL
 };
 
@@ -211,6 +220,18 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
 			      uval);
 }
 
+static int xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+{
+	u32 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
+			     REG_READ, &reg_val, 0, 0);
+	/* HW register value in units of 2.5 millivolt */
+	*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
+
+	return 0;
+}
+
 static umode_t
 xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
 {
@@ -319,6 +340,37 @@ xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
 	}
 }
 
+static umode_t
+xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr)
+{
+	switch (attr) {
+	case hwmon_in_input:
+		return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (attr) {
+	case hwmon_in_input:
+		ret = xe_hwmon_get_voltage(hwmon, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
 static umode_t
 xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
 		    u32 attr, int channel)
@@ -335,6 +387,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
 	case hwmon_curr:
 		ret = xe_hwmon_curr_is_visible(hwmon, attr);
 		break;
+	case hwmon_in:
+		ret = xe_hwmon_in_is_visible(hwmon, attr);
+		break;
 	default:
 		ret = 0;
 		break;
@@ -361,6 +416,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 	case hwmon_curr:
 		ret = xe_hwmon_curr_read(hwmon, attr, val);
 		break;
+	case hwmon_in:
+		ret = xe_hwmon_in_read(hwmon, attr, val);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
-- 
GitLab


From 71d0a32524f98ebb5034d74f204b613bf06e6925 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 25 Sep 2023 13:48:41 +0530
Subject: [PATCH 1937/2340] drm/xe/hwmon: Expose hwmon energy attribute

Expose hwmon energy attribute to show device level energy usage

v2:
  - %s/hwm_/hwmon_/
  - Convert enums to upper case
v3:
  - %s/hwmon_/xe_hwmon
  - Remove gt specific hwmon attributes
v4:
 - %s/REG_PKG_ENERGY_STATUS/REG_ENERGY_STATUS_ALL (Riana)
 - %s/hwmon_energy_info/xe_hwmon_energy_info (Riana)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/20230925081842.3566834-5-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../ABI/testing/sysfs-driver-intel-xe-hwmon   |   7 ++
 drivers/gpu/drm/xe/regs/xe_gt_regs.h          |   2 +
 drivers/gpu/drm/xe/regs/xe_mchbar_regs.h      |   3 +
 drivers/gpu/drm/xe/xe_hwmon.c                 | 105 +++++++++++++++++-
 4 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 7f9407c208646..1a7a6c23e1417 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -52,3 +52,10 @@ Description:	RO. Current Voltage in millivolt.
 
 		Only supported for particular Intel xe graphics platforms.
 
+What:		/sys/devices/.../hwmon/hwmon<i>/energy1_input
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RO. Energy input of device in microjoules.
+
+		Only supported for particular Intel xe graphics platforms.
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index a9a91195c6d56..83519a424aab1 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -414,8 +414,10 @@
 #define XEHPC_BCS5_BCS6_INTR_MASK		XE_REG(0x190118)
 #define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
 
+#define PVC_GT0_PACKAGE_ENERGY_STATUS		XE_REG(0x281004)
 #define PVC_GT0_PACKAGE_RAPL_LIMIT		XE_REG(0x281008)
 #define PVC_GT0_PACKAGE_POWER_SKU_UNIT		XE_REG(0x281068)
+#define PVC_GT0_PLATFORM_ENERGY_STATUS		XE_REG(0x28106c)
 #define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
 
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
index 27f1d42baf6d5..d8ecbe1858d12 100644
--- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -25,6 +25,9 @@
 
 #define PCU_CR_PACKAGE_POWER_SKU_UNIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938)
 #define   PKG_PWR_UNIT				REG_GENMASK(3, 0)
+#define   PKG_ENERGY_UNIT			REG_GENMASK(12, 8)
+
+#define PCU_CR_PACKAGE_ENERGY_STATUS		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c)
 
 #define PCU_CR_PACKAGE_RAPL_LIMIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
 #define   PKG_PWR_LIM_1				REG_GENMASK(14, 0)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index d89345b85e76b..734fcca9f71fa 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -22,6 +22,7 @@ enum xe_hwmon_reg {
 	REG_PKG_POWER_SKU,
 	REG_PKG_POWER_SKU_UNIT,
 	REG_GT_PERF_STATUS,
+	REG_PKG_ENERGY_STATUS,
 };
 
 enum xe_hwmon_reg_operation {
@@ -36,12 +37,20 @@ enum xe_hwmon_reg_operation {
 #define SF_POWER	1000000		/* microwatts */
 #define SF_CURR		1000		/* milliamperes */
 #define SF_VOLTAGE	1000		/* millivolts */
+#define SF_ENERGY	1000000		/* microjoules */
+
+struct xe_hwmon_energy_info {
+	u32 reg_val_prev;
+	long accum_energy;		/* Accumulated energy for energy1_input */
+};
 
 struct xe_hwmon {
 	struct device *hwmon_dev;
 	struct xe_gt *gt;
 	struct mutex hwmon_lock; /* rmw operations*/
 	int scl_shift_power;
+	int scl_shift_energy;
+	struct xe_hwmon_energy_info ei;	/*  Energy info for energy1_input */
 };
 
 static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
@@ -72,6 +81,12 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
 		if (xe->info.platform == XE_DG2)
 			reg = GT_PERF_STATUS;
 		break;
+	case REG_PKG_ENERGY_STATUS:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_ENERGY_STATUS;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
+		break;
 	default:
 		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
 		break;
@@ -194,10 +209,59 @@ static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
 	return 0;
 }
 
+/*
+ * xe_hwmon_energy_get - Obtain energy value
+ *
+ * The underlying energy hardware register is 32-bits and is subject to
+ * overflow. How long before overflow? For example, with an example
+ * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
+ * a power draw of 1000 watts, the 32-bit counter will overflow in
+ * approximately 4.36 minutes.
+ *
+ * Examples:
+ *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
+ *
+ * The function significantly increases overflow duration (from 4.36
+ * minutes) by accumulating the energy register into a 'long' as allowed by
+ * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
+ * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
+ * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
+ * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ */
+static void
+xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
+{
+	struct xe_hwmon_energy_info *ei = &hwmon->ei;
+	u32 reg_val;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ,
+			     &reg_val, 0, 0);
+
+	if (reg_val >= ei->reg_val_prev)
+		ei->accum_energy += reg_val - ei->reg_val_prev;
+	else
+		ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+
+	ei->reg_val_prev = reg_val;
+
+	*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+				  hwmon->scl_shift_energy);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+}
+
 static const struct hwmon_channel_info *hwmon_info[] = {
 	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
 	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
 	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
+	HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
 	NULL
 };
 
@@ -371,6 +435,29 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
 	return ret;
 }
 
+static umode_t
+xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr)
+{
+	switch (attr) {
+	case hwmon_energy_input:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_energy_input:
+		xe_hwmon_energy_get(hwmon, val);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static umode_t
 xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
 		    u32 attr, int channel)
@@ -390,6 +477,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
 	case hwmon_in:
 		ret = xe_hwmon_in_is_visible(hwmon, attr);
 		break;
+	case hwmon_energy:
+		ret = xe_hwmon_energy_is_visible(hwmon, attr);
+		break;
 	default:
 		ret = 0;
 		break;
@@ -419,6 +509,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 	case hwmon_in:
 		ret = xe_hwmon_in_read(hwmon, attr, val);
 		break;
+	case hwmon_energy:
+		ret = xe_hwmon_energy_read(hwmon, attr, val);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -470,6 +563,7 @@ static void
 xe_hwmon_get_preregistration_info(struct xe_device *xe)
 {
 	struct xe_hwmon *hwmon = xe->hwmon;
+	long energy;
 	u32 val_sku_unit = 0;
 	int ret;
 
@@ -478,8 +572,17 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 	 * The contents of register PKG_POWER_SKU_UNIT do not change,
 	 * so read it once and store the shift values.
 	 */
-	if (!ret)
+	if (!ret) {
 		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+	}
+
+	/*
+	 * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
+	 * first value of the energy register read
+	 */
+	if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0))
+		xe_hwmon_energy_get(hwmon, &energy);
 }
 
 void xe_hwmon_register(struct xe_device *xe)
-- 
GitLab


From 5f01a35b10f3d2f55634a471c43e59e3c6f239fd Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 29 Sep 2023 10:31:02 -0700
Subject: [PATCH 1938/2340] drm/xe/vm: print the correct 'keep' when printing
 gpuva ops

Unions are cool, until they aren't.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 42a5d912e775d..ae5578a3e1219 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2345,7 +2345,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 		vma = gpuva_to_vma(op->remap.unmap->va);
 		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
-		       op->unmap.keep ? 1 : 0);
+		       op->remap.unmap->keep ? 1 : 0);
 		if (op->remap.prev)
 			vm_dbg(&xe->drm,
 			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
-- 
GitLab


From 66aca8f04bb982b9f429fbce384beaa4badae21a Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 29 Sep 2023 10:31:03 -0700
Subject: [PATCH 1939/2340] drm/xe/vm: use list_last_entry() to fetch last_op

I would imagine that it's more efficient to fetch ops_list->prev than
to walk the whole list forward.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ae5578a3e1219..b61ed51b503da 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3118,8 +3118,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 
 	lockdep_assert_held_write(&vm->lock);
 
-	list_for_each_entry(op, ops_list, link)
-		last_op = op;
+	last_op = list_last_entry(ops_list, struct xe_vma_op, link);
 
 	if (!async) {
 		err = xe_vma_op_execute(vm, last_op);
-- 
GitLab


From 0e1a234618a86cd4f920a09cfe9ac35f87e8c3f6 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 29 Sep 2023 10:31:04 -0700
Subject: [PATCH 1940/2340] drm/xe: fix range printing for debug messages

We're already using the half-open interval notation "[A, B)", that "-
1" there makes it wrong. Also, getting rid of the "-1" makes it much
easier to grep for the logs when you're looking for an address that's
the end of a vma and the start of another.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d5f721efdc3cc..a7249b2d807ca 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1265,7 +1265,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
-	       xe_vma_start(vma), xe_vma_end(vma) - 1, q);
+	       xe_vma_start(vma), xe_vma_end(vma), q);
 
 	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
 	if (err)
@@ -1618,7 +1618,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
 	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
-	       xe_vma_start(vma), xe_vma_end(vma) - 1, q);
+	       xe_vma_start(vma), xe_vma_end(vma), q);
 
 	num_entries = xe_pt_stage_unbind(tile, vma, entries);
 	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
-- 
GitLab


From f24cf6cea519cd5c8110ac8dcbdad70e9f2dfb22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Tue, 3 Oct 2023 10:21:30 -0700
Subject: [PATCH 1941/2340] drm/xe: Fix RING_MI_MODE label in devcoredump
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a typo in RING_MI_MODE label.

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 49128f640e15d..dc9dd83d99c52 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -770,7 +770,7 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
 	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
-	drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
+	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
 		   snapshot->reg.ring_mode);
 	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
-- 
GitLab


From d32c49e318df0a3f334c2d2ff95ce4600df2d6bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Tue, 3 Oct 2023 13:16:28 -0700
Subject: [PATCH 1942/2340] drm/xe: Fix devcoredump readout of IPEHR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was reading (base) + 0x8c but that is not a valid register
and instead it should read (base) + 0x68.
So here reading the correct register and removing the wrong and
duplicated.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 -
 drivers/gpu/drm/xe/xe_hw_engine.c        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 692213d09ceaa..792d431161c68 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -37,7 +37,6 @@
 #define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
 #define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
 #define IPEIR(base)				XE_REG((base) + 0x88)
-#define IPEHR(base)				XE_REG((base) + 0x8c)
 #define RING_HWSTAM(base)			XE_REG((base) + 0x98)
 #define RING_MI_MODE(base)			XE_REG((base) + 0x9c)
 #define RING_NOPID(base)			XE_REG((base) + 0x94)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index dc9dd83d99c52..c2db391cf2674 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -728,7 +728,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_dma_fadd =
 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
 	snapshot->reg.ipeir = hw_engine_mmio_read32(hwe, IPEIR(0));
-	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, IPEHR(0));
+	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
 
 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
-- 
GitLab


From a8e2e0d7fab79b83cdc3bb2dd192c94564fa4298 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Tue, 3 Oct 2023 13:19:06 -0700
Subject: [PATCH 1943/2340] drm/xe: Remove devcoredump readout of IPEIR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This register don't exist in gfx12+, so here dropping the readout
and print in devcoredump.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 --
 drivers/gpu/drm/xe/xe_hw_engine.c        | 2 --
 drivers/gpu/drm/xe/xe_hw_engine_types.h  | 2 --
 3 files changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 792d431161c68..35dd4837dd75f 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -31,12 +31,10 @@
 
 #define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
 #define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
-#define RING_IPEIR(base)			XE_REG((base) + 0x64)
 #define RING_IPEHR(base)			XE_REG((base) + 0x68)
 #define RING_ACTHD(base)			XE_REG((base) + 0x74)
 #define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
 #define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
-#define IPEIR(base)				XE_REG((base) + 0x88)
 #define RING_HWSTAM(base)			XE_REG((base) + 0x98)
 #define RING_MI_MODE(base)			XE_REG((base) + 0x9c)
 #define RING_NOPID(base)			XE_REG((base) + 0x94)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c2db391cf2674..f63c821baeb77 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -727,7 +727,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
 	snapshot->reg.ring_dma_fadd =
 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
-	snapshot->reg.ipeir = hw_engine_mmio_read32(hwe, IPEIR(0));
 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
 
 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
@@ -784,7 +783,6 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
 		   snapshot->reg.ring_dma_fadd_udw,
 		   snapshot->reg.ring_dma_fadd);
-	drm_printf(p, "\tIPEIR: 0x%08x\n", snapshot->reg.ipeir);
 	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index cd4bc1412a3ff..5d4ee29042407 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -217,8 +217,6 @@ struct xe_hw_engine_snapshot {
 		u32 ring_dma_fadd_udw;
 		/** @ring_dma_fadd: RING_DMA_FADD */
 		u32 ring_dma_fadd;
-		/** @ipeir: IPEIR */
-		u32 ipeir;
 		/** @ipehr: IPEHR */
 		u32 ipehr;
 		/** @rcu_mode: RCU_MODE */
-- 
GitLab


From 5708a1080a2e455ca9f35e372f107d0c030358de Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 4 Oct 2023 08:03:17 -0700
Subject: [PATCH 1944/2340] drm/xe/xe2: Add missing mocs entry

Add index 4 so WB on both L3 and L4 can be used by userspace.

Bspec: 71582
Link: https://lore.kernel.org/all/7oqovb356dx2hm5muop3xjqr4kv7m5fzjisch3vmsmxm33ygtv@eib4jielia35/
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231004150317.3473731-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index ada3114be4fa1..19a8146ded9a7 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -385,6 +385,8 @@ static const struct xe_mocs_entry xe2_mocs_table[] = {
 	MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
 	/* Uncached L3 + L4 */
 	MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0),
+	/* Cached L3 + L4 */
+	MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
 };
 
 static unsigned int get_mocs_settings(struct xe_device *xe,
-- 
GitLab


From 3a13c2de442d6bfaef9c102cd1092e6cae22b753 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 5 Oct 2023 17:38:55 +0100
Subject: [PATCH 1945/2340] drm/xe/hwmon: fix uaf on unload

It doesn't look like you can mix and match devm_ and drmmm_ for a
managed resource. For drmmm the resources are all tracked in drm with
its own list, and there is only one devm_ resource for the entire list.
If the driver itself also adds some of its own devm resources, then
those will be released first. In the case of hwmon the devm_kzalloc will
be freed before the drmmm_ action to destroy the mutex allocated within,
leading to uaf.

Since hwmon itself wants to use devm, rather use that for the mutex
destroy.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/766
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Badal Nilawar <badal.nilawar@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hwmon.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 734fcca9f71fa..9ac05994a967d 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -585,6 +585,13 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 		xe_hwmon_energy_get(hwmon, &energy);
 }
 
+static void xe_hwmon_mutex_destroy(void *arg)
+{
+	struct xe_hwmon *hwmon = arg;
+
+	mutex_destroy(&hwmon->hwmon_lock);
+}
+
 void xe_hwmon_register(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
@@ -600,7 +607,9 @@ void xe_hwmon_register(struct xe_device *xe)
 
 	xe->hwmon = hwmon;
 
-	drmm_mutex_init(&xe->drm, &hwmon->hwmon_lock);
+	mutex_init(&hwmon->hwmon_lock);
+	if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon))
+		return;
 
 	/* primary GT to access device level properties */
 	hwmon->gt = xe->tiles[0].primary_gt;
-- 
GitLab


From a617b3048abea1cb424963f4354941b335d5a911 Mon Sep 17 00:00:00 2001
From: Matt Atwood <matthew.s.atwood@intel.com>
Date: Fri, 6 Oct 2023 09:11:47 -0700
Subject: [PATCH 1946/2340] drm/xe: Add infrastructure for per engine tuning

Add the infrastructure for per engine tuning in preparation for disable
indirect state.

v3: Rebase
v4: Fix rebasing issues

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c |  2 ++
 drivers/gpu/drm/xe/xe_tuning.c    | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_tuning.h    |  1 +
 3 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index f63c821baeb77..b5b0845908887 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -25,6 +25,7 @@
 #include "xe_reg_sr.h"
 #include "xe_rtp.h"
 #include "xe_sched_job.h"
+#include "xe_tuning.h"
 #include "xe_wa.h"
 
 #define MAX_MMIO_BASES 3
@@ -405,6 +406,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	}
 
 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
+	xe_tuning_process_engine(hwe);
 	xe_wa_process_engine(hwe);
 	hw_engine_setup_default_state(hwe);
 
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 08174dda9bc73..3ad11c2593003 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -27,6 +27,10 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 	{}
 };
 
+static const struct xe_rtp_entry_sr engine_tunings[] = {
+	{}
+};
+
 static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
@@ -65,6 +69,14 @@ void xe_tuning_process_gt(struct xe_gt *gt)
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
 
+void xe_tuning_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
+
 /**
  * xe_tuning_process_lrc - process lrc tunings
  * @hwe: engine instance to process tunings for
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
index 2b95b0c8effc4..4f9c3ac3b5162 100644
--- a/drivers/gpu/drm/xe/xe_tuning.h
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -10,6 +10,7 @@ struct xe_gt;
 struct xe_hw_engine;
 
 void xe_tuning_process_gt(struct xe_gt *gt);
+void xe_tuning_process_engine(struct xe_hw_engine *hwe);
 void xe_tuning_process_lrc(struct xe_hw_engine *hwe);
 
 #endif
-- 
GitLab


From f8ebadd0df248d7f0b5060fd8a0d956e773d9d78 Mon Sep 17 00:00:00 2001
From: Matt Atwood <matthew.s.atwood@intel.com>
Date: Fri, 6 Oct 2023 09:47:59 -0700
Subject: [PATCH 1947/2340] drm/xe: add gt tuning for indirect state

Force indirect state sampler data to only be in the dynamic state pool,
which is more convienent for the UMD. Behavior change mirrors similar
change for i915 in commit 16fc9c08f0ec ("drm/i915: disable sampler
indirect state in bindless heap")

v2: split out per engine tuning into separate patch, commit message
(Lucas)
v3: rebase
v4: Change to match render only, g.ver 1200 to 1271 (MattR)

Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 +
 drivers/gpu/drm/xe/xe_tuning.c       | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 83519a424aab1..cd1821d96a5d8 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -303,6 +303,7 @@
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
 #define   SAMPLER_ENABLE_HEADLESS_MSG		REG_BIT(5)
+#define   INDIRECT_STATE_BASE_ADDR_OVERRIDE	REG_BIT(0)
 
 #define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 3ad11c2593003..d705198165220 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -28,6 +28,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 };
 
 static const struct xe_rtp_entry_sr engine_tunings[] = {
+	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
+	},
 	{}
 };
 
-- 
GitLab


From 285230832eb794dfd1c9dc63d80367a714dbf75f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 28 Sep 2023 22:02:48 -0700
Subject: [PATCH 1948/2340] drm/xe/vm: Prefer xe_assert() over XE_WARN_ON()

When xelp_pte_encode_addr() was added in commit 23c8495efeed
("drm/xe/migrate: Do not hand-encode pte"), there was no xe pointer for
using xe_assert(). This is not the case anymore, so prefer it over
XE_WARN_ON().

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index b61ed51b503da..10ed72228946b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 
+#include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_drm_client.h"
@@ -1312,7 +1313,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
 	u64 pte;
 
 	/* Avoid passing random bits directly as flags */
-	XE_WARN_ON(flags & ~XE_PTE_PS64);
+	xe_assert(xe, !(flags & ~XE_PTE_PS64));
 
 	pte = addr;
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-- 
GitLab


From dfc83d4293f3f0b26d38952b3e491c1ed5f36b38 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 28 Sep 2023 22:02:49 -0700
Subject: [PATCH 1949/2340] drm/xe/xe2: Follow XeHPC for TLB invalidation

Register GUC_TLB_INV_CR is gone in xe2. When GuC submission is not yet
enabled, make sure to follow the same path as XeHPC.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ggtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ec7bbb1dc2958..06732461246dc 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -263,7 +263,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 	} else if (xe_device_uc_enabled(gt_to_xe(gt))) {
 		struct xe_device *xe = gt_to_xe(gt);
 
-		if (xe->info.platform == XE_PVC) {
+		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
 			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
 					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
 			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
-- 
GitLab


From d9e85dd5c24d9503391440c65a09fdc69d486d55 Mon Sep 17 00:00:00 2001
From: David Kershner <david.kershner@intel.com>
Date: Thu, 5 Oct 2023 18:00:39 -0400
Subject: [PATCH 1950/2340] drm/xe/xe_migrate.c: Use DPA offset for page table
 entries.

Device Physical Address (DPA) is the starting offset device memory.

Update xe_migrate identity map base PTE entries to start at dpa_base
instead of 0.

The VM offset value should be 0 relative instead of DPA relative.

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Signed-off-by: David Kershner <david.kershner@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 4b7210c793f5f..4dc52ac26d526 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -114,8 +114,13 @@ static u64 xe_migrate_vm_addr(u64 slot, u32 level)
 	return (slot + 1ULL) << xe_pt_shift(level + 1);
 }
 
-static u64 xe_migrate_vram_ofs(u64 addr)
+static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
 {
+	/*
+	 * Remove the DPA to get a correct offset into identity table for the
+	 * migrate offset
+	 */
+	addr -= xe->mem.vram.dpa_base;
 	return addr + (256ULL << xe_pt_shift(2));
 }
 
@@ -149,7 +154,7 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
 
 	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
 	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE);
-	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
+	m->cleared_vram_ofs = xe_migrate_vram_ofs(xe, vram_addr);
 
 	return 0;
 }
@@ -225,12 +230,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	} else {
 		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
 
-		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+		m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
 
 		if (xe->info.supports_usm) {
 			batch = tile->primary_gt->usm.bb_pool->bo;
 			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
-			m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+			m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
 		}
 	}
 
@@ -268,7 +273,9 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		 * Use 1GB pages, it shouldn't matter the physical amount of
 		 * vram is less, when we don't access it.
 		 */
-		for (pos = 0; pos < xe->mem.vram.actual_physical_size; pos += SZ_1G, ofs += 8)
+		for (pos = xe->mem.vram.dpa_base;
+		     pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base;
+		     pos += SZ_1G, ofs += 8)
 			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
 	}
 
@@ -443,8 +450,8 @@ static u32 pte_update_size(struct xe_migrate *m,
 		cmds += cmd_size;
 	} else {
 		/* Offset into identity map. */
-		*L0_ofs = xe_migrate_vram_ofs(cur->start +
-					      vram_region_gpu_offset(res));
+		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
+					      cur->start + vram_region_gpu_offset(res));
 		cmds += cmd_size;
 	}
 
@@ -1060,10 +1067,10 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	 * pages are used. Hence the assert.
 	 */
 	xe_tile_assert(tile, update->qwords <= 0x1ff);
-	if (!ppgtt_ofs) {
-		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
+	if (!ppgtt_ofs)
+		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
+						xe_bo_addr(update->pt_bo, 0,
 							   XE_PAGE_SIZE));
-	}
 
 	do {
 		u64 addr = ppgtt_ofs + ofs * 8;
-- 
GitLab


From cf0b9e94c8c755ae94787d638c655bb38e7a8048 Mon Sep 17 00:00:00 2001
From: David Kershner <david.kershner@intel.com>
Date: Thu, 5 Oct 2023 18:00:40 -0400
Subject: [PATCH 1951/2340] drm/xe/tests/xe_migrate.c: Add vram to vram KUNIT
 test

Add missing kunit test to migrate a bo from vram to vram

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Signed-off-by: David Kershner <david.kershner@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 103 +++++++++++++++++---------
 1 file changed, 66 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 6906ff9d9c310..0db4b651ff1a5 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -99,7 +99,7 @@ static const struct xe_migrate_pt_update_ops sanity_ops = {
 		} } while (0)
 
 static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
-		      struct kunit *test)
+		      struct kunit *test, u32 region)
 {
 	struct xe_device *xe = tile_to_xe(m->tile);
 	u64 retval, expected = 0;
@@ -108,83 +108,104 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	const char *str = big ? "Copying big bo" : "Copying small bo";
 	int err;
 
-	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL,
+	struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL,
 						   bo->size,
 						   ttm_bo_type_kernel,
-						   XE_BO_CREATE_SYSTEM_BIT |
+						   region |
 						   XE_BO_NEEDS_CPU_ACCESS);
-	if (IS_ERR(sysmem)) {
-		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
-			   str, PTR_ERR(sysmem));
+	if (IS_ERR(remote)) {
+		KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n",
+			   str, PTR_ERR(remote));
 		return;
 	}
 
-	err = xe_bo_validate(sysmem, NULL, false);
+	err = xe_bo_validate(remote, NULL, false);
 	if (err) {
 		KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
 			   str, err);
 		goto out_unlock;
 	}
 
-	err = xe_bo_vmap(sysmem);
+	err = xe_bo_vmap(remote);
 	if (err) {
 		KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
 			   str, err);
 		goto out_unlock;
 	}
 
-	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
-	fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource);
-	if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
-				 "Clearing sysmem small bo", test)) {
-		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
-		check(retval, expected, "sysmem first offset should be cleared",
+	xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
+	fence = xe_migrate_clear(m, remote, remote->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" :
+				 "Clearing remote small bo", test)) {
+		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
+		check(retval, expected, "remote first offset should be cleared",
 		      test);
-		retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64);
-		check(retval, expected, "sysmem last offset should be cleared",
+		retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64);
+		check(retval, expected, "remote last offset should be cleared",
 		      test);
 	}
 	dma_fence_put(fence);
 
-	/* Try to copy 0xc0 from sysmem to vram with 2MB or 64KiB/4KiB pages */
-	xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size);
+	/* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */
+	xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size);
 	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
 
 	expected = 0xc0c0c0c0c0c0c0c0;
-	fence = xe_migrate_copy(m, sysmem, bo, sysmem->ttm.resource,
+	fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
 				bo->ttm.resource);
-	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" :
-				 "Copying small bo sysmem -> vram", test)) {
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" :
+				 "Copying small bo remote -> vram", test)) {
 		retval = xe_map_rd(xe, &bo->vmap, 0, u64);
 		check(retval, expected,
-		      "sysmem -> vram bo first offset should be copied", test);
+		      "remote -> vram bo first offset should be copied", test);
 		retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
 		check(retval, expected,
-		      "sysmem -> vram bo offset should be copied", test);
+		      "remote -> vram bo offset should be copied", test);
 	}
 	dma_fence_put(fence);
 
 	/* And other way around.. slightly hacky.. */
-	xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+	xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
 	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
 
-	fence = xe_migrate_copy(m, bo, sysmem, bo->ttm.resource,
-				sysmem->ttm.resource);
-	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" :
-				 "Copying small bo vram -> sysmem", test)) {
-		retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+	fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
+				remote->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" :
+				 "Copying small bo vram -> remote", test)) {
+		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
 		check(retval, expected,
-		      "vram -> sysmem bo first offset should be copied", test);
-		retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64);
+		      "vram -> remote bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64);
 		check(retval, expected,
-		      "vram -> sysmem bo last offset should be copied", test);
+		      "vram -> remote bo last offset should be copied", test);
 	}
 	dma_fence_put(fence);
 
-	xe_bo_vunmap(sysmem);
+	xe_bo_vunmap(remote);
 out_unlock:
-	xe_bo_unlock(sysmem);
-	xe_bo_put(sysmem);
+	xe_bo_unlock(remote);
+	xe_bo_put(remote);
+}
+
+static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
+			     struct kunit *test)
+{
+	test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT);
+}
+
+static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
+			   struct kunit *test)
+{
+	u32 region;
+
+	if (bo->ttm.resource->mem_type == XE_PL_SYSTEM)
+		return;
+
+	if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
+		region = XE_BO_CREATE_VRAM1_BIT;
+	else
+		region = XE_BO_CREATE_VRAM0_BIT;
+	test_copy(m, bo, test, region);
 }
 
 static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
@@ -349,7 +370,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	check(retval, expected, "Command clear small last value", test);
 
 	kunit_info(test, "Copying small buffer object to system\n");
-	test_copy(m, tiny, test);
+	test_copy_sysmem(m, tiny, test);
+	if (xe->info.tile_count > 1) {
+		kunit_info(test, "Copying small buffer object to other vram\n");
+		test_copy_vram(m, tiny, test);
+	}
 
 	/* Clear a big bo */
 	kunit_info(test, "Clearing big buffer object\n");
@@ -366,7 +391,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	check(retval, expected, "Command clear big last value", test);
 
 	kunit_info(test, "Copying big buffer object to system\n");
-	test_copy(m, big, test);
+	test_copy_sysmem(m, big, test);
+	if (xe->info.tile_count > 1) {
+		kunit_info(test, "Copying big buffer object to other vram\n");
+		test_copy_vram(m, big, test);
+	}
 
 	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
 	test_pt_update(m, pt, test, false);
-- 
GitLab


From 811aa4d2074a9e64baeaa4bbc2773ead6247f101 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 6 Oct 2023 11:23:22 -0700
Subject: [PATCH 1952/2340] drm/xe/xe2: Program PAT tables

The PAT tables become significantly more complicated on Xe2 platforms.
They now control L3, L4, and coherency settings, as well as additional
characteristics such as compression.

Aside from the main PAT table, there's an additional register that
also needs to be programmed with PAT settings for PCI Address
Translation Services.

Bspec: 71582
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://lore.kernel.org/r/20231006182325.3617685-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 92 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 4668ca3932c55..296763594370b 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -10,10 +10,18 @@
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 
+#define _PAT_ATS				0x47fc
 #define _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
 								   0x4800, 0x4804, \
 								   0x4848, 0x484c)
 
+#define XE2_NO_PROMOTE				REG_BIT(10)
+#define XE2_COMP_EN				REG_BIT(9)
+#define XE2_L3_CLOS				REG_GENMASK(7, 6)
+#define XE2_L3_POLICY				REG_GENMASK(5, 4)
+#define XE2_L4_POLICY				REG_GENMASK(3, 2)
+#define XE2_COH_MODE				REG_GENMASK(1, 0)
+
 #define XELPG_L4_POLICY_MASK			REG_GENMASK(3, 2)
 #define XELPG_PAT_3_UC				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3)
 #define XELPG_PAT_1_WT				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1)
@@ -67,6 +75,64 @@ static const u32 xelpg_pat_table[] = {
 	[4] = XELPG_PAT_0_WB | XELPG_3_COH_2W,
 };
 
+/*
+ * The Xe2 table is getting large/complicated so it's easier to review if
+ * provided in a form that exactly matches the bspec's formatting.  The meaning
+ * of the fields here are:
+ *   - no_promote:  0=promotable, 1=no promote
+ *   - comp_en:     0=disable, 1=enable
+ *   - l3clos:      L3 class of service (0-3)
+ *   - l3_policy:   0=WB, 1=XD ("WB - Transient Display"), 3=UC
+ *   - l4_policy:   0=WB, 1=WT, 3=UC
+ *   - coh_mode:    0=no snoop, 2=1-way coherent, 3=2-way coherent
+ *
+ * Reserved entries should be programmed with the maximum caching, minimum
+ * coherency (which matches an all-0's encoding), so we can just omit them
+ * in the table.
+ */
+#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, coh_mode) \
+	(no_promote ? XE2_NO_PROMOTE : 0) | \
+	(comp_en ? XE2_COMP_EN : 0) | \
+	REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \
+	REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
+	REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
+	REG_FIELD_PREP(XE2_COH_MODE, coh_mode)
+
+static const u32 xe2_pat_table[] = {
+	[ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ),
+	[ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ),
+	[ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ),
+	[ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ),
+	[ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ),
+	[ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ),
+	[ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ),
+	[ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ),
+	[ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ),
+	[ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ),
+	[10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ),
+	[11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ),
+	[12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ),
+	[13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ),
+	[14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ),
+	[15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ),
+	/* 16..19 are reserved; leave set to all 0's */
+	[20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ),
+	[21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ),
+	[22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ),
+	[23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ),
+	[24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ),
+	[25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ),
+	[26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ),
+	[27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ),
+	[28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ),
+	[29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ),
+	[30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ),
+	[31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ),
+};
+
+/* Special PAT values programmed outside the main table */
+#define XE2_PAT_ATS	XE2_PAT( 0, 0, 0, 0, 3, 3 )
+
 static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
 {
 	for (int i = 0; i < n_entries; i++) {
@@ -102,9 +168,33 @@ static const struct xe_pat_ops xelpg_pat_ops = {
 	.program_media = program_pat_mcr,
 };
 
+static void xe2lpg_program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
+{
+	program_pat_mcr(gt, table, n_entries);
+	xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), XE2_PAT_ATS);
+}
+
+static void xe2lpm_program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
+{
+	program_pat(gt, table, n_entries);
+	xe_mmio_write32(gt, XE_REG(_PAT_ATS), XE2_PAT_ATS);
+}
+
+static const struct xe_pat_ops xe2_pat_ops = {
+	.program_graphics = xe2lpg_program_pat,
+	.program_media = xe2lpm_program_pat,
+};
+
 void xe_pat_init_early(struct xe_device *xe)
 {
-	if (xe->info.platform == XE_METEORLAKE) {
+	if (GRAPHICS_VER(xe) == 20) {
+		xe->pat.ops = &xe2_pat_ops;
+		xe->pat.table = xe2_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 15;
+		xe->pat.idx[XE_CACHE_WB] = 2;
+	} else if (xe->info.platform == XE_METEORLAKE) {
 		xe->pat.ops = &xelpg_pat_ops;
 		xe->pat.table = xelpg_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table);
-- 
GitLab


From 5803bdc8ad6f0320b3147de7e565c24b3afe31fb Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 6 Oct 2023 11:23:23 -0700
Subject: [PATCH 1953/2340] drm/xe/xe2: Add one more bit to encode PAT to ppgtt
 entries

Xe2 adds one more bit to cover all the possible 32 entries. Although
those entries are not used by internal kernel code paths, it's expected
that userspace will make use of it.

Bspec: 59510, 67095
Reviewed-by: Pallavi Mishra <pallavi.mishra@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231006182325.3617685-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h | 1 +
 drivers/gpu/drm/xe/xe_vm.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 5666fd6d7f11c..ba6ffd359ff70 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -49,6 +49,7 @@
 #define XE_BO_INTERNAL_64K		BIT(31)
 
 #define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
+#define XE2_PPGTT_PTE_PAT4		BIT_ULL(61)
 #define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
 #define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
 #define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 10ed72228946b..665af2646243a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1242,6 +1242,9 @@ static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
 	if (pat_index & BIT(3))
 		pte |= XELPG_PPGTT_PTE_PAT3;
 
+	if (pat_index & (BIT(4)))
+		pte |= XE2_PPGTT_PTE_PAT4;
+
 	return pte;
 }
 
-- 
GitLab


From 34803f9a4b3ab20dbc09ad13ed5fa98263896b37 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 6 Oct 2023 11:23:24 -0700
Subject: [PATCH 1954/2340] drm/xe/pat: Add debugfs node to dump PAT

This is useful to debug cache issues, to double check if the PAT
indexes match what they were supposed to be set to from spec.

v2: Add separate functions for XeHP, XeHPC and XeLPG so it correctly
    reads the index based on MCR/REG registers and also decodes the
    fields (Matt Roper)
v3: Starting with XeHPC, do not translate values to human-readable
    formats as the main goal is to make it easy to compare the table
    with the spec. Also, share a single array for xelp/xehp str map
    (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231006182325.3617685-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c |  12 ++
 drivers/gpu/drm/xe/xe_pat.c        | 192 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_pat.h        |   8 ++
 3 files changed, 211 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index ec1ae00f6bfcf..cd6d28c7b923a 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -16,6 +16,7 @@
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
 #include "xe_macros.h"
+#include "xe_pat.h"
 #include "xe_reg_sr.h"
 #include "xe_reg_whitelist.h"
 #include "xe_uc_debugfs.h"
@@ -138,6 +139,16 @@ static int workarounds(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int pat(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_pat_dump(gt, &p);
+
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
@@ -147,6 +158,7 @@ static const struct drm_info_list debugfs_list[] = {
 	{"ggtt", ggtt, 0},
 	{"register-save-restore", register_save_restore, 0},
 	{"workarounds", workarounds, 0},
+	{"pat", pat, 0},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 296763594370b..31565ccbb0ec1 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -6,6 +6,8 @@
 #include "xe_pat.h"
 
 #include "regs/xe_reg_defs.h"
+#include "xe_assert.h"
+#include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
@@ -14,6 +16,7 @@
 #define _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
 								   0x4800, 0x4804, \
 								   0x4848, 0x484c)
+#define _PAT_PTA				0x4820
 
 #define XE2_NO_PROMOTE				REG_BIT(10)
 #define XE2_COMP_EN				REG_BIT(9)
@@ -40,9 +43,12 @@
 #define XELP_PAT_WC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1)
 #define XELP_PAT_UC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0)
 
+static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" };
+
 struct xe_pat_ops {
 	void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries);
 	void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries);
+	void (*dump)(struct xe_gt *gt, struct drm_printer *p);
 };
 
 static const u32 xelp_pat_table[] = {
@@ -151,14 +157,132 @@ static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
 	}
 }
 
+static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+		drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
+			   XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
 static const struct xe_pat_ops xelp_pat_ops = {
 	.program_graphics = program_pat,
+	.dump = xelp_dump,
 };
 
+static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+		u8 mem_type;
+
+		mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+		drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
+			   XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
 static const struct xe_pat_ops xehp_pat_ops = {
 	.program_graphics = program_pat_mcr,
+	.dump = xehp_dump,
+};
+
+static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
+			   REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat),
+			   REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xehpc_pat_ops = {
+	.program_graphics = program_pat_mcr,
+	.dump = xehpc_dump,
 };
 
+static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat;
+
+		if (xe_gt_is_media_type(gt))
+			pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		else
+			pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
+			   REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat),
+			   REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
 /*
  * SAMedia register offsets are adjusted by the write methods and they target
  * registers that are not MCR, while for normal GT they are MCR
@@ -166,6 +290,7 @@ static const struct xe_pat_ops xehp_pat_ops = {
 static const struct xe_pat_ops xelpg_pat_ops = {
 	.program_graphics = program_pat,
 	.program_media = program_pat_mcr,
+	.dump = xelpg_dump,
 };
 
 static void xe2lpg_program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
@@ -180,9 +305,64 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const u32 table[], int n_entrie
 	xe_mmio_write32(gt, XE_REG(_PAT_ATS), XE2_PAT_ATS);
 }
 
+static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+	u32 pat;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		if (xe_gt_is_media_type(gt))
+			pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		else
+			pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ]  (%#8x)\n", i,
+			   !!(pat & XE2_NO_PROMOTE),
+			   !!(pat & XE2_COMP_EN),
+			   REG_FIELD_GET(XE2_L3_CLOS, pat),
+			   REG_FIELD_GET(XE2_L3_POLICY, pat),
+			   REG_FIELD_GET(XE2_L4_POLICY, pat),
+			   REG_FIELD_GET(XE2_COH_MODE, pat),
+			   pat);
+	}
+
+	/*
+	 * Also print PTA_MODE, which describes how the hardware accesses
+	 * PPGTT entries.
+	 */
+	if (xe_gt_is_media_type(gt))
+		pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA));
+	else
+		pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
+
+	drm_printf(p, "Page Table Access:\n");
+	drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ]  (%#8x)\n",
+		   !!(pat & XE2_NO_PROMOTE),
+		   !!(pat & XE2_COMP_EN),
+		   REG_FIELD_GET(XE2_L3_CLOS, pat),
+		   REG_FIELD_GET(XE2_L3_POLICY, pat),
+		   REG_FIELD_GET(XE2_L4_POLICY, pat),
+		   REG_FIELD_GET(XE2_COH_MODE, pat),
+		   pat);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
 static const struct xe_pat_ops xe2_pat_ops = {
 	.program_graphics = xe2lpg_program_pat,
 	.program_media = xe2lpm_program_pat,
+	.dump = xe2_dump,
 };
 
 void xe_pat_init_early(struct xe_device *xe)
@@ -202,7 +382,7 @@ void xe_pat_init_early(struct xe_device *xe)
 		xe->pat.idx[XE_CACHE_WT] = 1;
 		xe->pat.idx[XE_CACHE_WB] = 3;
 	} else if (xe->info.platform == XE_PVC) {
-		xe->pat.ops = &xehp_pat_ops;
+		xe->pat.ops = &xehpc_pat_ops;
 		xe->pat.table = xehpc_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table);
 		xe->pat.idx[XE_CACHE_NONE] = 0;
@@ -252,3 +432,13 @@ void xe_pat_init(struct xe_gt *gt)
 	else
 		xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries);
 }
+
+void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->pat.ops->dump)
+		return;
+
+	xe->pat.ops->dump(gt, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index 168e80e63809a..09c491ab9f15d 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -6,6 +6,7 @@
 #ifndef _XE_PAT_H_
 #define _XE_PAT_H_
 
+struct drm_printer;
 struct xe_gt;
 struct xe_device;
 
@@ -21,4 +22,11 @@ void xe_pat_init_early(struct xe_device *xe);
  */
 void xe_pat_init(struct xe_gt *gt);
 
+/**
+ * xe_pat_dump - Dump PAT table
+ * @gt: GT structure
+ * @p: Printer to dump info to
+ */
+void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
+
 #endif
-- 
GitLab


From d2300987cf5a483acde519d671421b646f8d5390 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 6 Oct 2023 11:23:25 -0700
Subject: [PATCH 1955/2340] drm/xe/gt: Dump PAT table when failing to
 initialize

When failing on early initialization, one cause may be that the PAT
configuration is not correct. Dump it for ease of debugging.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231006182325.3617685-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 68cd9a7ee0872..c63e2e4750b1e 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -307,6 +307,17 @@ int xe_gt_init_early(struct xe_gt *gt)
 	return 0;
 }
 
+static void dump_pat_on_error(struct xe_gt *gt)
+{
+	struct drm_printer p;
+	char prefix[32];
+
+	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
+	p = drm_debug_printer(prefix);
+
+	xe_pat_dump(gt, &p);
+}
+
 static int gt_fw_domain_init(struct xe_gt *gt)
 {
 	int err, i;
@@ -360,6 +371,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	return 0;
 
 err_force_wake:
+	dump_pat_on_error(gt);
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 err_hw_fence_irq:
 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
-- 
GitLab


From 1db6f9d4134ec242d294061cdde475d824e1e9ba Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Wed, 4 Oct 2023 10:08:24 -0300
Subject: [PATCH 1956/2340] drm/xe/rtp: Fix doc for XE_RTP_ACTIONS

Replace the paragraph that was meant for XE_RTP_RULES with one proper
for XE_RTP_ACTIONS.

Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231004130824.13909-1-gustavo.sousa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_rtp.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 04ccb26452ad4..c56fedd126e6a 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -350,9 +350,8 @@ struct xe_reg_sr;
  * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr
  * @...: Actions to be taken
  *
- * At least one rule is needed and up to 4 are supported. Multiple rules are
- * AND'ed together, i.e. all the rules must evaluate to true for the entry to
- * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_*
+ * for the possible actions. Example:
  *
  * .. code-block:: c
  *
-- 
GitLab


From 30e3b2cfb576f6ddf098f6de2a264b1ed75caa4c Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 5 Oct 2023 13:54:47 -0700
Subject: [PATCH 1957/2340] drm/xe/rplu: s/ADLP/ALDERLAKE_P

i915 now uses full names for platforms. So we now have
ALDERLAKE instead of ADL. Extend this to xe driver as well.
This will make it easier for macro magic usages.

v2: Do not make changes to compat-i915-headers/i915_drv.h
file with the rest of the changes (Jani)

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231005205450.3177354-3-anusha.srivatsa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c            | 2 +-
 drivers/gpu/drm/xe/xe_platform_types.h | 2 +-
 drivers/gpu/drm/xe/xe_step.c           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index fd8d7eddd6f6d..2652e7426258a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -230,7 +230,7 @@ static const struct xe_device_desc adl_p_desc = {
 	.has_llc = 1,
 	.require_force_probe = true,
 	.subplatforms = (const struct xe_subplatform_desc[]) {
-		{ XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids },
+		{ XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids },
 		{},
 	},
 };
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index e378a64a0f863..b6fe4342f9f6a 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -27,7 +27,7 @@ enum xe_platform {
 enum xe_subplatform {
 	XE_SUBPLATFORM_UNINITIALIZED = 0,
 	XE_SUBPLATFORM_NONE,
-	XE_SUBPLATFORM_ADLP_RPLU,
+	XE_SUBPLATFORM_ALDERLAKE_P_RPLU,
 	XE_SUBPLATFORM_DG2_G10,
 	XE_SUBPLATFORM_DG2_G11,
 	XE_SUBPLATFORM_DG2_G12,
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index 371cac951e0f4..903c65405d3a1 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -143,7 +143,7 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
 	} else if (xe->info.platform == XE_ALDERLAKE_N) {
 		revids = adln_revids;
 		size = ARRAY_SIZE(adln_revids);
-	} else if (xe->info.subplatform == XE_SUBPLATFORM_ADLP_RPLU) {
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) {
 		revids = adlp_rpl_revids;
 		size = ARRAY_SIZE(adlp_rpl_revids);
 	} else if (xe->info.platform == XE_ALDERLAKE_P) {
-- 
GitLab


From fcb33ca6d6296d2bd45550e26271797801aeb640 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 5 Oct 2023 13:54:48 -0700
Subject: [PATCH 1958/2340] drm/xe/rpls: Add RPLS Support

Add RPLS support that was missing apart from the PCI IDs.

v2: Also add the support in xe_wa_test kunit
v3: rebased.

Cc: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>(v1)
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231005205450.3177354-4-anusha.srivatsa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_wa_test.c  | 1 +
 drivers/gpu/drm/xe/xe_pci.c            | 7 +++++++
 drivers/gpu/drm/xe/xe_platform_types.h | 1 +
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 16f7f157c8753..69c9ea1fa82bc 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -49,6 +49,7 @@ static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(ALDERLAKE_P, A0),
 	PLATFORM_CASE(ALDERLAKE_P, B0),
 	PLATFORM_CASE(ALDERLAKE_P, C0),
+	SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
 	SUBPLATFORM_CASE(DG2, G10, A0),
 	SUBPLATFORM_CASE(DG2, G10, A1),
 	SUBPLATFORM_CASE(DG2, G10, B0),
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 2652e7426258a..0efe01885cf8b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -213,12 +213,18 @@ static const struct xe_device_desc rkl_desc = {
 	.require_force_probe = true,
 };
 
+static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 };
+
 static const struct xe_device_desc adl_s_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_S),
 	.has_llc = 1,
 	.require_force_probe = true,
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids },
+		{},
+	},
 };
 
 static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
@@ -335,6 +341,7 @@ static const struct pci_device_id pciidlist[] = {
 	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
 	XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
 	XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
 	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
 	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
 	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index b6fe4342f9f6a..553f53dbd093e 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -28,6 +28,7 @@ enum xe_subplatform {
 	XE_SUBPLATFORM_UNINITIALIZED = 0,
 	XE_SUBPLATFORM_NONE,
 	XE_SUBPLATFORM_ALDERLAKE_P_RPLU,
+	XE_SUBPLATFORM_ALDERLAKE_S_RPLS,
 	XE_SUBPLATFORM_DG2_G10,
 	XE_SUBPLATFORM_DG2_G11,
 	XE_SUBPLATFORM_DG2_G12,
-- 
GitLab


From 93b1b5f59d34d86f3debc35693c47e99935c4429 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 5 Oct 2023 13:54:49 -0700
Subject: [PATCH 1959/2340] drm/xe/rpls: Add Stepping info for RPLS

Add stepping-substepping info. Though it looks
weird, the revision ID for the newer stepping
is indeed backwards and is in accordance to the spec.

v2: s/RPLS/RAPTORLAKE_S (Anusha)
v3: rebase (Anusha)

Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>(v1)
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231005205450.3177354-5-anusha.srivatsa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_step.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index 903c65405d3a1..eaf1b718f26c9 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -60,6 +60,11 @@ static const struct xe_step_info adls_revids[] = {
 	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
 };
 
+static const struct xe_step_info adls_rpls_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
 static const struct xe_step_info adlp_revids[] = {
 	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
 	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
@@ -143,6 +148,9 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
 	} else if (xe->info.platform == XE_ALDERLAKE_N) {
 		revids = adln_revids;
 		size = ARRAY_SIZE(adln_revids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_S_RPLS) {
+		revids = adls_rpls_revids;
+		size = ARRAY_SIZE(adls_rpls_revids);
 	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) {
 		revids = adlp_rpl_revids;
 		size = ARRAY_SIZE(adlp_rpl_revids);
-- 
GitLab


From e3fee3aa7a8911b60776127cb2e1c25ef8584f42 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 5 Oct 2023 13:54:50 -0700
Subject: [PATCH 1960/2340] drm/xe: Add missing ADL entries to xe_test_wa

With all ADl platforms and subplatforms added,
also add support to xe_wa_test kunit tests for checking
their WAs.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231005205450.3177354-6-anusha.srivatsa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_wa_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 69c9ea1fa82bc..6e1127b276ea1 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -50,6 +50,7 @@ static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(ALDERLAKE_P, B0),
 	PLATFORM_CASE(ALDERLAKE_P, C0),
 	SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
+	SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
 	SUBPLATFORM_CASE(DG2, G10, A0),
 	SUBPLATFORM_CASE(DG2, G10, A1),
 	SUBPLATFORM_CASE(DG2, G10, B0),
-- 
GitLab


From 406be3cc186eec67367b87a2af91cb598ff8e239 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 6 Oct 2023 09:46:15 +0100
Subject: [PATCH 1961/2340] drm/xe/pat: trim the xelp PAT table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't seem to use the 4-7 pat indexes, even though they are defined
by the HW. In a future patch userspace will be able to directly set the
pat_index as part of vm_bind and we don't want to allow setting 4-7.
Simplest is to just ignore them here.

Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pat.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 31565ccbb0ec1..7c1078707aa0e 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -56,10 +56,6 @@ static const u32 xelp_pat_table[] = {
 	[1] = XELP_PAT_WC,
 	[2] = XELP_PAT_WT,
 	[3] = XELP_PAT_UC,
-	[4] = XELP_PAT_WB,
-	[5] = XELP_PAT_WB,
-	[6] = XELP_PAT_WB,
-	[7] = XELP_PAT_WB,
 };
 
 static const u32 xehpc_pat_table[] = {
-- 
GitLab


From e814389ff180514001df424f48645cf30f4a2a1e Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 6 Oct 2023 09:46:16 +0100
Subject: [PATCH 1962/2340] drm/xe: directly use pat_index for pte_encode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In a future patch userspace will be able to directly set the pat_index
as part of vm_bind. To support this we need to get away from using
xe_cache_level in the low level routines and rather just use the
pat_index directly.

v2: Rebase
v3: Some missed conversions, also prefer tile_to_xe() (Niranjana)
v4: remove leftover const (Lucas)

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
 drivers/gpu/drm/xe/xe_ggtt.c          | 16 +++++++++-------
 drivers/gpu/drm/xe/xe_ggtt_types.h    |  3 +--
 drivers/gpu/drm/xe/xe_migrate.c       | 19 +++++++++++--------
 drivers/gpu/drm/xe/xe_pt.c            | 11 ++++++-----
 drivers/gpu/drm/xe/xe_pt_types.h      |  8 ++++----
 drivers/gpu/drm/xe/xe_vm.c            | 24 +++++++++++-------------
 7 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 0db4b651ff1a5..c984307560eec 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -322,7 +322,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
-	expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, XE_CACHE_WB, 0);
+	expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, xe->pat.idx[XE_CACHE_WB], 0);
 	if (m->q->vm->flags & XE_VM_FLAG_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 06732461246dc..1368616f20fa4 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -27,7 +27,7 @@
 #define GUC_GGTT_TOP	0xFEE00000
 
 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
-				   enum xe_cache_level cache)
+				   u16 pat_index)
 {
 	u64 pte;
 
@@ -41,13 +41,12 @@ static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
 }
 
 static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
-				    enum xe_cache_level cache)
+				    u16 pat_index)
 {
 	struct xe_device *xe = xe_bo_device(bo);
-	u32 pat_index = xe->pat.idx[cache];
 	u64 pte;
 
-	pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, cache);
+	pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index);
 
 	xe_assert(xe, pat_index <= 3);
 
@@ -79,6 +78,7 @@ void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
 
 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 {
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
 	u64 end = start + size - 1;
 	u64 scratch_pte;
 
@@ -86,7 +86,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 
 	if (ggtt->scratch)
 		scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0,
-							  XE_CACHE_WB);
+							  pat_index);
 	else
 		scratch_pte = 0;
 
@@ -285,9 +285,10 @@ void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
 
 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 {
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
 	u64 addr, scratch_pte;
 
-	scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, XE_CACHE_WB);
+	scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index);
 
 	printk("%sGlobal GTT:", prefix);
 	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
@@ -324,11 +325,12 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
 	u64 start = bo->ggtt_node.start;
 	u64 offset, pte;
 
 	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
-		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, XE_CACHE_WB);
+		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
 		xe_ggtt_set_pte(ggtt, start + offset, pte);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index 486016ea5b67d..d8c584d9a8c3f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -14,8 +14,7 @@ struct xe_bo;
 struct xe_gt;
 
 struct xe_ggtt_pt_ops {
-	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
-			     enum xe_cache_level cache);
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
 };
 
 struct xe_ggtt {
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 4dc52ac26d526..134b078b6feef 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -163,6 +163,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 				 struct xe_vm *vm)
 {
 	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 	u8 id = tile->id;
 	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
 	u32 map_ofs, level, i;
@@ -194,7 +195,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		return ret;
 	}
 
-	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
+	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
 	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
 
 	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
@@ -202,7 +203,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
 		entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE,
-						  XE_CACHE_WB, 0);
+						  pat_index, 0);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
 
@@ -221,7 +222,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
 			entry = vm->pt_ops->pte_encode_bo(batch, i,
-							  XE_CACHE_WB, 0);
+							  pat_index, 0);
 
 			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
 				  entry);
@@ -246,7 +247,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 			flags = XE_PDE_64K;
 
 		entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
-						  XE_PAGE_SIZE, XE_CACHE_WB);
+						  XE_PAGE_SIZE, pat_index);
 		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
 			  entry | flags);
 	}
@@ -254,7 +255,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	/* Write PDE's that point to our BO. */
 	for (i = 0; i < num_entries - num_level; i++) {
 		entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
-						  XE_CACHE_WB);
+						  pat_index);
 
 		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
 			  (i + 1) * 8, u64, entry);
@@ -266,7 +267,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 		level = 2;
 		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
-		flags = vm->pt_ops->pte_encode_addr(xe, 0, XE_CACHE_WB, level,
+		flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
 						    true, 0);
 
 		/*
@@ -464,6 +465,7 @@ static void emit_pte(struct xe_migrate *m,
 		     struct xe_res_cursor *cur,
 		     u32 size, struct xe_bo *bo)
 {
+	u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
 	u32 ptes;
 	u64 ofs = at_pt * XE_PAGE_SIZE;
 	u64 cur_ofs;
@@ -507,7 +509,7 @@ static void emit_pte(struct xe_migrate *m,
 			}
 
 			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
-								 addr, XE_CACHE_WB,
+								 addr, pat_index,
 								 0, devmem, flags);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
@@ -1226,6 +1228,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	bool first_munmap_rebind = vma &&
 		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
 	struct xe_exec_queue *q_override = !q ? m->q : q;
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 
 	/* Use the CPU if no in syncs and engine is idle */
 	if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
@@ -1297,7 +1300,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 			xe_tile_assert(tile, pt_bo->size == SZ_4K);
 
-			addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, XE_CACHE_WB, 0);
+			addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 		}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index a7249b2d807ca..15f7c9d5b311b 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -50,6 +50,7 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 			     unsigned int level)
 {
+	u16 pat_index = tile_to_xe(tile)->pat.idx[XE_CACHE_WB];
 	u8 id = tile->id;
 
 	if (!vm->scratch_bo[id])
@@ -57,9 +58,9 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 
 	if (level > 0)
 		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
-						 0, XE_CACHE_WB);
+						 0, pat_index);
 
-	return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, XE_CACHE_WB, 0);
+	return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, pat_index, 0);
 }
 
 /**
@@ -510,6 +511,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 {
 	struct xe_pt_stage_bind_walk *xe_walk =
 		container_of(walk, typeof(*xe_walk), base);
+	u16 pat_index = tile_to_xe(xe_walk->tile)->pat.idx[xe_walk->cache];
 	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
 	struct xe_vm *vm = xe_walk->vm;
 	struct xe_pt *xe_child;
@@ -526,7 +528,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 
 		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
 						 xe_res_dma(curs) + xe_walk->dma_offset,
-						 xe_walk->vma, xe_walk->cache, level);
+						 xe_walk->vma, pat_index, level);
 		pte |= xe_walk->default_pte;
 
 		/*
@@ -591,8 +593,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			xe_child->is_compact = true;
 		}
 
-		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0,
-						xe_walk->cache) | flags;
+		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
 		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
 					 pte);
 	}
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index bd6645295fe6d..82cbf1ef8e578 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -38,14 +38,14 @@ struct xe_pt {
 
 struct xe_pt_ops {
 	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
-			     enum xe_cache_level cache, u32 pt_level);
+			     u16 pat_index, u32 pt_level);
 	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
-			      enum xe_cache_level cache, u32 pt_level);
+			      u16 pat_index, u32 pt_level);
 	u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
-			       enum xe_cache_level cache,
+			       u16 pat_index,
 			       u32 pt_level, bool devmem, u64 flags);
 	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
-			     const enum xe_cache_level cache);
+			     u16 pat_index);
 };
 
 struct xe_pt_entry {
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 665af2646243a..035f3232e3b9d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1211,9 +1211,8 @@ static struct drm_gpuvm_ops gpuvm_ops = {
 	.vm_free = xe_vm_free,
 };
 
-static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
+static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
 {
-	u32 pat_index = xe->pat.idx[cache];
 	u64 pte = 0;
 
 	if (pat_index & BIT(0))
@@ -1225,9 +1224,8 @@ static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
 	return pte;
 }
 
-static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
+static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index)
 {
-	u32 pat_index = xe->pat.idx[cache];
 	u64 pte = 0;
 
 	if (pat_index & BIT(0))
@@ -1261,27 +1259,27 @@ static u64 pte_encode_ps(u32 pt_level)
 }
 
 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
-			      const enum xe_cache_level cache)
+			      const u16 pat_index)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 	u64 pde;
 
 	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pde |= pde_encode_cache(xe, cache);
+	pde |= pde_encode_pat_index(xe, pat_index);
 
 	return pde;
 }
 
 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
-			      enum xe_cache_level cache, u32 pt_level)
+			      u16 pat_index, u32 pt_level)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 	u64 pte;
 
 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_cache(xe, cache);
+	pte |= pte_encode_pat_index(xe, pat_index);
 	pte |= pte_encode_ps(pt_level);
 
 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
@@ -1291,7 +1289,7 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
 }
 
 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
-			       enum xe_cache_level cache, u32 pt_level)
+			       u16 pat_index, u32 pt_level)
 {
 	struct xe_device *xe = xe_vma_vm(vma)->xe;
 
@@ -1300,7 +1298,7 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
 	if (likely(!xe_vma_read_only(vma)))
 		pte |= XE_PAGE_RW;
 
-	pte |= pte_encode_cache(xe, cache);
+	pte |= pte_encode_pat_index(xe, pat_index);
 	pte |= pte_encode_ps(pt_level);
 
 	if (unlikely(xe_vma_is_null(vma)))
@@ -1310,7 +1308,7 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
 }
 
 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
-				enum xe_cache_level cache,
+				u16 pat_index,
 				u32 pt_level, bool devmem, u64 flags)
 {
 	u64 pte;
@@ -1320,7 +1318,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
 
 	pte = addr;
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_cache(xe, cache);
+	pte |= pte_encode_pat_index(xe, pat_index);
 	pte |= pte_encode_ps(pt_level);
 
 	if (devmem)
@@ -1707,7 +1705,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 {
 	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
-					 XE_CACHE_WB);
+					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
 }
 
 static struct dma_fence *
-- 
GitLab


From 399a13323f0d148bf00eff7e9156efe8a97063c0 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Thu, 5 Oct 2023 11:06:14 -0400
Subject: [PATCH 1963/2340] drm/xe: add 28-bit address support in struct xe_reg

Xe driver currently supports 22-bit addresses for MMIO access.
Future platforms will have additional MMIO extension with
larger address spaces, and to access them, the driver will
have to support wider address representation.
Please note that while the XE_REG macro is used for MMIO access,
XE_REG_EXT macro will be used for MMIO-extension access.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Moti Haimovski <mhaimovski@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h | 16 +++++++++++++++-
 drivers/gpu/drm/xe/xe_device_types.h  | 13 +++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 478787c75e292..5078a9e698599 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -19,7 +19,7 @@ struct xe_reg {
 	union {
 		struct {
 			/** @addr: address */
-			u32 addr:22;
+			u32 addr:28;
 			/**
 			 * @masked: register is "masked", with upper 16bits used
 			 * to identify the bits that are updated on the lower
@@ -35,6 +35,10 @@ struct xe_reg {
 			 * value can inspect it.
 			 */
 			u32 mcr:1;
+			/**
+			 * @ext: access MMIO extension space for current register.
+			 */
+			u32 ext:1;
 		};
 		/** @raw: Raw value with both address and options */
 		u32 raw;
@@ -84,6 +88,16 @@ struct xe_reg_mcr {
  */
 #define XE_REG(r_, ...)		((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__))
 
+/**
+ * XE_REG_EXT - Create a struct xe_reg from extension offset and additional
+ * flags
+ * @r_: Register extension offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG_EXT(r_, ...)	\
+	((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1))
+
 /**
  * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags
  * @r_: Register offset
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index dd52d112d58fa..f7c7d44a64652 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -140,6 +140,19 @@ struct xe_tile {
 		void *regs;
 	} mmio;
 
+	/**
+	 * @mmio_ext: MMIO-extension info for a tile.
+	 *
+	 * Each tile has its own additional 256MB (28-bit) MMIO-extension space.
+	 */
+	struct {
+		/** @size: size of tile's additional MMIO-extension space */
+		size_t size;
+
+		/** @regs: pointer to tile's additional MMIO-extension space */
+		void *regs;
+	} mmio_ext;
+
 	/** @mem: memory management info for tile */
 	struct {
 		/**
-- 
GitLab


From 6360ebd1a12384efa984b44b057b79edce6484df Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Thu, 5 Oct 2023 11:06:15 -0400
Subject: [PATCH 1964/2340] drm/xe: add read/write support for MMIO extension
 space

A distinction has to be made when addressing the MMIO space or
the additional MMIO extension space.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Moti Haimovski <mhaimovski@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 9e0fd4a6fb29c..ae09f777d711d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -29,7 +29,7 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	return readb(tile->mmio.regs + reg.addr);
+	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
 }
 
 static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
@@ -39,7 +39,7 @@ static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	return readw(tile->mmio.regs + reg.addr);
+	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
 }
 
 static inline void xe_mmio_write32(struct xe_gt *gt,
@@ -50,7 +50,7 @@ static inline void xe_mmio_write32(struct xe_gt *gt,
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	writel(val, tile->mmio.regs + reg.addr);
+	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
 }
 
 static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
@@ -60,7 +60,7 @@ static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
 	if (reg.addr < gt->mmio.adj_limit)
 		reg.addr += gt->mmio.adj_offset;
 
-	return readl(tile->mmio.regs + reg.addr);
+	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
 }
 
 static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
-- 
GitLab


From fdef72e02e20d7bc3c4b25607a2f8afa99d509eb Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Thu, 5 Oct 2023 11:06:16 -0400
Subject: [PATCH 1965/2340] drm/xe: add a flag to bypass multi-tile config from
 MTCFG reg

Skip reading this register as it is not relevant in the new devices.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Moti Haimovski <mhaimovski@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  2 ++
 drivers/gpu/drm/xe/xe_mmio.c         | 24 +++++++++++++-----------
 drivers/gpu/drm/xe/xe_pci.c          |  2 ++
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f7c7d44a64652..a9bf9c784b7c5 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -243,6 +243,8 @@ struct xe_device {
 		u8 has_llc:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
+		/** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */
+		u8 bypass_mtcfg:1;
 	} info;
 
 	/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 3ccc0af4430be..054ad752303f6 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -318,26 +318,28 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 static void xe_mmio_probe_tiles(struct xe_device *xe)
 {
+	u8 adj_tile_count = xe->info.tile_count;
 	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	u32 mtcfg;
-	u8 adj_tile_count;
 	u8 id;
 
 	if (xe->info.tile_count == 1)
 		return;
 
-	mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
-	adj_tile_count = xe->info.tile_count =
-		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+	if (!xe->info.bypass_mtcfg) {
+		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
+		adj_tile_count = xe->info.tile_count =
+			REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 
-	/*
-	 * FIXME: Needs some work for standalone media, but should be impossible
-	 * with multi-tile for now.
-	 */
-	xe->info.gt_count = xe->info.tile_count;
+		/*
+		 * FIXME: Needs some work for standalone media, but should be impossible
+		 * with multi-tile for now.
+		 */
+		xe->info.gt_count = xe->info.tile_count;
 
-	drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
-		 xe->info.tile_count, adj_tile_count);
+		drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
+			 xe->info.tile_count, adj_tile_count);
+	}
 
 	if (xe->info.tile_count > 1) {
 		const int mmio_bar = 0;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 0efe01885cf8b..f8e813e174580 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -56,6 +56,7 @@ struct xe_device_desc {
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
 	u8 has_llc:1;
+	u8 bypass_mtcfg:1;
 };
 
 #define PLATFORM(x)		\
@@ -553,6 +554,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_llc = desc->has_llc;
+	xe->info.bypass_mtcfg = desc->bypass_mtcfg;
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
 	xe->info.vram_flags = graphics_desc->vram_flags;
-- 
GitLab


From 866b2b1764341ada0611f54c6b19285c32d20efa Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Thu, 5 Oct 2023 11:06:17 -0400
Subject: [PATCH 1966/2340] drm/xe: add MMIO extension support flags

Besides the regular MMIO space that exists by default, MMIO
extension support & MMIO extension tile size should both be
defined per device, and updated from the device's descriptor.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Moti Haimovski <mhaimovski@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 4 ++++
 drivers/gpu/drm/xe/xe_pci.c          | 3 +++
 drivers/gpu/drm/xe/xe_pci_types.h    | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index a9bf9c784b7c5..b64f810189ff3 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -200,6 +200,8 @@ struct xe_device {
 		const char *graphics_name;
 		/** @media_name: media IP name */
 		const char *media_name;
+		/** @tile_mmio_ext_size: size of MMIO extension space, per-tile */
+		u32 tile_mmio_ext_size;
 		/** @graphics_verx100: graphics IP version */
 		u32 graphics_verx100;
 		/** @media_verx100: media IP version */
@@ -245,6 +247,8 @@ struct xe_device {
 		u8 has_range_tlb_invalidation:1;
 		/** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */
 		u8 bypass_mtcfg:1;
+		/** @supports_mmio_ext: supports MMIO extension/s */
+		u8 supports_mmio_ext:1;
 	} info;
 
 	/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f8e813e174580..8161982796dd2 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -57,6 +57,7 @@ struct xe_device_desc {
 	u8 is_dgfx:1;
 	u8 has_llc:1;
 	u8 bypass_mtcfg:1;
+	u8 supports_mmio_ext:1;
 };
 
 #define PLATFORM(x)		\
@@ -555,6 +556,8 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_llc = desc->has_llc;
 	xe->info.bypass_mtcfg = desc->bypass_mtcfg;
+	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
+	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
 	xe->info.vram_flags = graphics_desc->vram_flags;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index bd0b0d87413e6..dd3546ba6f905 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -20,6 +20,8 @@ struct xe_graphics_desc {
 
 	u64 hw_engine_mask;	/* hardware engines provided by graphics IP */
 
+	u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */
+
 	u8 max_remote_tiles:2;
 
 	u8 has_asid:1;
-- 
GitLab


From ef29b390c7345f081412454538ab94c395068153 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Thu, 5 Oct 2023 11:06:18 -0400
Subject: [PATCH 1967/2340] drm/xe: map MMIO BAR according to the num of tiles
 in device desc

When MMIO BAR is initially mapped, the driver assumes a single tile device.
However, former memory allocations take all tiles into account.
First, a common standard for resource usage is needed here.
Second, with the next (6th) patch in this series, the MMIO BAR remapping
will be done only if a reduced-tile device is attached.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Moti Haimovski <mhaimovski@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 054ad752303f6..52e4572e3c4ac 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -383,14 +383,13 @@ int xe_mmio_init(struct xe_device *xe)
 	int err;
 
 	/*
-	 * Map the first 16MB of th BAR, which includes the registers (0-4MB),
-	 * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile.
-	 * This will get remapped later if we determine that we're running
-	 * on a multi-tile system.
+	 * Map the maximum expected BAR size, which will get remapped later
+	 * if we determine that we're running on a reduced-tile system.
+	 * The first 16MB of the BAR, belong to the root tile, and include:
+	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
 	 */
-	xe->mmio.size = SZ_16M;
-	xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar,
-				  xe->mmio.size);
+	xe->mmio.size = (SZ_16M + xe->info.tile_mmio_ext_size) * xe->info.tile_count;
+	xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size);
 	if (xe->mmio.regs == NULL) {
 		drm_err(&xe->drm, "failed to map registers\n");
 		return -EIO;
-- 
GitLab


From a4e2f3a299ea1c9c4b6d0e51048273eac28256b9 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Thu, 5 Oct 2023 11:06:19 -0400
Subject: [PATCH 1968/2340] drm/xe: refactor xe_mmio_probe_tiles to support
 MMIO extension

In future ASICs, there will be an additional MMIO extension space
for all tiles altogether, residing on top of MMIO address space.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Moti Haimovski <mhaimovski@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 70 +++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 52e4572e3c4ac..e4cf9bfec422e 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -318,50 +318,56 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 static void xe_mmio_probe_tiles(struct xe_device *xe)
 {
-	u8 adj_tile_count = xe->info.tile_count;
+	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
+	u8 id, tile_count = xe->info.tile_count;
 	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	const int mmio_bar = 0;
+	struct xe_tile *tile;
+	void *regs;
 	u32 mtcfg;
-	u8 id;
 
-	if (xe->info.tile_count == 1)
-		return;
+	if (tile_count == 1)
+		goto add_mmio_ext;
 
 	if (!xe->info.bypass_mtcfg) {
 		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
-		adj_tile_count = xe->info.tile_count =
-			REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
-
-		/*
-		 * FIXME: Needs some work for standalone media, but should be impossible
-		 * with multi-tile for now.
-		 */
-		xe->info.gt_count = xe->info.tile_count;
-
-		drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
-			 xe->info.tile_count, adj_tile_count);
-	}
-
-	if (xe->info.tile_count > 1) {
-		const int mmio_bar = 0;
-		struct xe_tile *tile;
-		size_t size;
-		void *regs;
-
-		if (adj_tile_count > 1) {
+		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+		if (tile_count < xe->info.tile_count) {
+			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
+					xe->info.tile_count, tile_count);
 			pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
-			xe->mmio.size = SZ_16M * adj_tile_count;
-			xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev),
-						  mmio_bar, xe->mmio.size);
+			xe->mmio.size = (tile_mmio_size + tile_mmio_ext_size) * tile_count;
+			xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size);
+			xe->info.tile_count = tile_count;
+
+			/*
+			 * FIXME: Needs some work for standalone media, but should be impossible
+			 * with multi-tile for now.
+			 */
+			xe->info.gt_count = xe->info.tile_count;
 		}
+	}
 
-		size = xe->mmio.size / adj_tile_count;
-		regs = xe->mmio.regs;
+	regs = xe->mmio.regs;
+	for_each_tile(tile, xe, id) {
+		tile->mmio.size = tile_mmio_size;
+		tile->mmio.regs = regs;
+		regs += tile_mmio_size;
+	}
+
+add_mmio_ext:
+	/* By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
+	 * When supported, there could be an additional contiguous multi-tile MMIO extension
+	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
+	 */
+	if (xe->info.supports_mmio_ext) {
+		regs = xe->mmio.regs + tile_mmio_size * tile_count;
 
 		for_each_tile(tile, xe, id) {
-			tile->mmio.size = size;
-			tile->mmio.regs = regs;
+			tile->mmio_ext.size = tile_mmio_ext_size;
+			tile->mmio_ext.regs = regs;
 
-			regs += size;
+			regs += tile_mmio_ext_size;
 		}
 	}
 }
-- 
GitLab


From b6f45db5d08ac6ac1827ed64d009f3a25ad293c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Wed, 4 Oct 2023 14:01:42 -0700
Subject: [PATCH 1969/2340] drm/xe: Set PTE_AE for smem allocations in
 integrated devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this if a atomic operation is executed in Xe2 integrated GPUs
it causes engine memory catastrophic error.

This fixes at least 3 failures in piglit sanity and 2 failures in
crucible for LNL.

v3:
- only add PTE_AE to smem in integrated

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 15f7c9d5b311b..ab08e46445295 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -628,6 +628,7 @@ static int
 xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
 {
+	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_bo *bo = xe_vma_bo(vma);
 	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
 		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
@@ -649,10 +650,12 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
 
+	if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
+	    (is_devmem || !IS_DGFX(xe)))
+		xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+
 	if (is_devmem) {
-		xe_walk.default_pte = XE_PPGTT_PTE_DM;
-		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
-			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
 		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
 		xe_walk.cache = XE_CACHE_WB;
 	} else {
-- 
GitLab


From 3445166655cdcdcf18f10ffa124e6ae0ee3018c6 Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Wed, 11 Oct 2023 10:44:18 +0530
Subject: [PATCH 1970/2340] drm/xe: Add new DG2 PCI IDs

Add recently added PCI IDs for DG2

BSpec: 44477
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231011051418.2767145-1-shekhar.chauhan@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index f6ac6d9772ee1..d105527df0c46 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -138,7 +138,11 @@
 	MACRO__(0x56A5, ## __VA_ARGS__),	\
 	MACRO__(0x56A6, ## __VA_ARGS__),	\
 	MACRO__(0x56B0, ## __VA_ARGS__),	\
-	MACRO__(0x56B1, ## __VA_ARGS__)
+	MACRO__(0x56B1, ## __VA_ARGS__),	\
+	MACRO__(0x56BA, ## __VA_ARGS__),	\
+	MACRO__(0x56BB, ## __VA_ARGS__),	\
+	MACRO__(0x56BC, ## __VA_ARGS__),	\
+	MACRO__(0x56BD, ## __VA_ARGS__)
 
 #define XE_DG2_G12_IDS(MACRO__, ...)		\
 	MACRO__(0x5696, ## __VA_ARGS__),	\
-- 
GitLab


From cd0adf746527dc2d1410adf5bf09ee6f4cd22a79 Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Wed, 11 Oct 2023 21:15:26 +0530
Subject: [PATCH 1971/2340] drm/xe/dg2: Remove one PCI ID

The bspec was recently updated to remove PCI ID
0x5698; this ID is actually reserved for future
use and should not be treated as DG2-G11.

BSpec: 44477
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231011154526.2819754-1-shekhar.chauhan@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index d105527df0c46..11deefceffd0a 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -134,7 +134,6 @@
 	MACRO__(0x5693, ## __VA_ARGS__),	\
 	MACRO__(0x5694, ## __VA_ARGS__),	\
 	MACRO__(0x5695, ## __VA_ARGS__),	\
-	MACRO__(0x5698, ## __VA_ARGS__),	\
 	MACRO__(0x56A5, ## __VA_ARGS__),	\
 	MACRO__(0x56A6, ## __VA_ARGS__),	\
 	MACRO__(0x56B0, ## __VA_ARGS__),	\
-- 
GitLab


From 5120243bfb0dabc9f16924a5fc66e8ef26f0f8d3 Mon Sep 17 00:00:00 2001
From: Vitaly Lubart <vitaly.lubart@intel.com>
Date: Mon, 28 Aug 2023 13:07:07 +0300
Subject: [PATCH 1972/2340] drm/xe/gsc: add HECI2 register offsets

Add HECI2 register offsets for DG1 and DG2 to regs/xe_regs.h

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vitaly Lubart <vitaly.lubart@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 1574d11d4e143..e4408473e802b 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -33,6 +33,10 @@
 #define XEHPC_BCS6_RING_BASE			0x3ea000
 #define XEHPC_BCS7_RING_BASE			0x3ec000
 #define XEHPC_BCS8_RING_BASE			0x3ee000
+
+#define DG1_GSC_HECI2_BASE                      0x00259000
+#define DG2_GSC_HECI2_BASE                      0x00374000
+
 #define GSCCS_RING_BASE				0x11a000
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
 #define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
-- 
GitLab


From 437d7a84ada7a4cfeab2d9555c446936c3fb09f4 Mon Sep 17 00:00:00 2001
From: Vitaly Lubart <vitaly.lubart@intel.com>
Date: Wed, 30 Aug 2023 13:05:40 +0300
Subject: [PATCH 1973/2340] drm/xe/gsc: add has_heci_gscfi indication to device

Mark support of MEI-GSC interaction per device.
Add has_heci_gscfi indication to xe_device and xe_pci structures.
Mark DG1 and DG2 devices as supported.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vitaly Lubart <vitaly.lubart@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  3 +++
 drivers/gpu/drm/xe/xe_pci.c          | 10 ++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index b64f810189ff3..f7822dd9f1e4f 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -29,6 +29,7 @@ struct xe_pat_ops;
 #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
 #define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
 #define IS_DGFX(xe) ((xe)->info.is_dgfx)
+#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
 
 #define XE_VRAM_FLAGS_NEED64K		BIT(0)
 
@@ -249,6 +250,8 @@ struct xe_device {
 		u8 bypass_mtcfg:1;
 		/** @supports_mmio_ext: supports MMIO extension/s */
 		u8 supports_mmio_ext:1;
+		/** @has_heci_gscfi: device has heci gscfi */
+		u8 has_heci_gscfi:1;
 	} info;
 
 	/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 8161982796dd2..9963772caabbd 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -55,6 +55,8 @@ struct xe_device_desc {
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
+	u8 has_heci_gscfi:1;
+
 	u8 has_llc:1;
 	u8 bypass_mtcfg:1;
 	u8 supports_mmio_ext:1;
@@ -260,6 +262,7 @@ static const struct xe_device_desc dg1_desc = {
 	DGFX_FEATURES,
 	PLATFORM(XE_DG1),
 	.require_force_probe = true,
+	.has_heci_gscfi = 1,
 };
 
 static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
@@ -269,6 +272,7 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 #define DG2_FEATURES \
 	DGFX_FEATURES, \
 	PLATFORM(XE_DG2), \
+	.has_heci_gscfi = 1, \
 	.subplatforms = (const struct xe_subplatform_desc[]) { \
 		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
 		{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
@@ -552,6 +556,7 @@ static int xe_info_init(struct xe_device *xe,
 		return -ENODEV;
 
 	xe->info.is_dgfx = desc->is_dgfx;
+	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_llc = desc->has_llc;
@@ -684,7 +689,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_pci_disable;
 
-	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d",
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d gscfi:%d",
 		desc->platform_name,
 		subplatform_desc ? subplatform_desc->name : "",
 		xe->info.devid, xe->info.revid,
@@ -695,7 +700,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		xe->info.media_name,
 		xe->info.media_verx100 / 100,
 		xe->info.media_verx100 % 100,
-		xe->info.dma_mask_size, xe->info.tile_count);
+		xe->info.dma_mask_size, xe->info.tile_count,
+		xe->info.has_heci_gscfi);
 
 	drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
 		xe_step_name(xe->info.step.graphics),
-- 
GitLab


From 87a4c85d3a3ed579c86fd2612715ccb94c4001ff Mon Sep 17 00:00:00 2001
From: Vitaly Lubart <vitaly.lubart@intel.com>
Date: Mon, 28 Aug 2023 19:24:03 +0300
Subject: [PATCH 1974/2340] drm/xe/gsc: add gsc device support

Create mei-gscfi auxiliary device and configure interrupts
to be consumed by mei-gsc device driver.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vitaly Lubart <vitaly.lubart@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig           |   1 +
 drivers/gpu/drm/xe/Makefile          |   1 +
 drivers/gpu/drm/xe/xe_device.c       |   4 +
 drivers/gpu/drm/xe/xe_device_types.h |   4 +
 drivers/gpu/drm/xe/xe_heci_gsc.c     | 222 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_heci_gsc.h     |  35 +++++
 drivers/gpu/drm/xe/xe_irq.c          |  22 ++-
 7 files changed, 283 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_heci_gsc.c
 create mode 100644 drivers/gpu/drm/xe/xe_heci_gsc.h

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 7bffc039d63f0..cfa6420b9915a 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -27,6 +27,7 @@ config DRM_XE
 	select DRM_SCHED
 	select MMU_NOTIFIER
 	select WANT_DEV_COREDUMP
+	select AUXILIARY_BUS
 	help
 	  Experimental driver for Intel Xe series GPUs
 
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index a0e7896a4ef7c..32eee57b41846 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -78,6 +78,7 @@ xe-y += xe_bb.o \
 	xe_guc_log.o \
 	xe_guc_pc.o \
 	xe_guc_submit.o \
+	xe_heci_gsc.o \
 	xe_hw_engine.o \
 	xe_hw_engine_class_sysfs.o \
 	xe_hw_fence.o \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 283fc5990000e..2bbd3aa2809b6 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -321,6 +321,8 @@ int xe_device_probe(struct xe_device *xe)
 			goto err_irq_shutdown;
 	}
 
+	xe_heci_gsc_init(xe);
+
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
 		goto err_irq_shutdown;
@@ -344,6 +346,8 @@ err_irq_shutdown:
 
 void xe_device_remove(struct xe_device *xe)
 {
+	xe_heci_gsc_fini(xe);
+
 	xe_irq_shutdown(xe);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f7822dd9f1e4f..4bc668ff8615f 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -13,6 +13,7 @@
 #include <drm/ttm/ttm_device.h>
 
 #include "xe_devcoredump_types.h"
+#include "xe_heci_gsc.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
@@ -384,6 +385,9 @@ struct xe_device {
 	/** @hwmon: hwmon subsystem integration */
 	struct xe_hwmon *hwmon;
 
+	/** @heci_gsc: graphics security controller */
+	struct xe_heci_gsc heci_gsc;
+
 	/* For pcode */
 	struct mutex sb_lock;
 
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
new file mode 100644
index 0000000000000..3328ddca42d03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2023, Intel Corporation. All rights reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/mei_aux.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+
+#include "regs/xe_regs.h"
+#include "xe_device_types.h"
+#include "xe_drv.h"
+#include "xe_heci_gsc.h"
+#include "xe_platform_types.h"
+
+#define GSC_BAR_LENGTH  0x00000FFC
+
+static void heci_gsc_irq_mask(struct irq_data *d)
+{
+	/* generic irq handling */
+}
+
+static void heci_gsc_irq_unmask(struct irq_data *d)
+{
+	/* generic irq handling */
+}
+
+static struct irq_chip heci_gsc_irq_chip = {
+	.name = "gsc_irq_chip",
+	.irq_mask = heci_gsc_irq_mask,
+	.irq_unmask = heci_gsc_irq_unmask,
+};
+
+static int heci_gsc_irq_init(int irq)
+{
+	irq_set_chip_and_handler_name(irq, &heci_gsc_irq_chip,
+				      handle_simple_irq, "heci_gsc_irq_handler");
+
+	return irq_set_chip_data(irq, NULL);
+}
+
+/**
+ * struct heci_gsc_def - graphics security controller heci interface definitions
+ *
+ * @name: name of the heci device
+ * @bar: address of the mmio bar
+ * @bar_size: size of the mmio bar
+ * @use_polling: indication of using polling mode for the device
+ * @slow_firmware: indication of whether the device is slow (needs longer timeouts)
+ */
+struct heci_gsc_def {
+	const char *name;
+	unsigned long bar;
+	size_t bar_size;
+	bool use_polling;
+	bool slow_firmware;
+};
+
+/* gsc resources and definitions */
+static const struct heci_gsc_def heci_gsc_def_dg1 = {
+	.name = "mei-gscfi",
+	.bar = DG1_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+};
+
+static const struct heci_gsc_def heci_gsc_def_dg2 = {
+	.name = "mei-gscfi",
+	.bar = DG2_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+};
+
+static void heci_gsc_release_dev(struct device *dev)
+{
+	struct auxiliary_device *aux_dev = to_auxiliary_dev(dev);
+	struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+
+	kfree(adev);
+}
+
+void xe_heci_gsc_fini(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+
+	if (!HAS_HECI_GSCFI(xe))
+		return;
+
+	if (heci_gsc->adev) {
+		struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev;
+
+		auxiliary_device_delete(aux_dev);
+		auxiliary_device_uninit(aux_dev);
+		heci_gsc->adev = NULL;
+	}
+
+	if (heci_gsc->irq >= 0)
+		irq_free_desc(heci_gsc->irq);
+	heci_gsc->irq = -1;
+}
+
+static int heci_gsc_irq_setup(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	int ret;
+
+	heci_gsc->irq = irq_alloc_desc(0);
+	if (heci_gsc->irq < 0) {
+		drm_err(&xe->drm, "gsc irq error %d\n", heci_gsc->irq);
+		return heci_gsc->irq;
+	}
+
+	ret = heci_gsc_irq_init(heci_gsc->irq);
+	if (ret < 0)
+		drm_err(&xe->drm, "gsc irq init failed %d\n", ret);
+
+	return ret;
+}
+
+static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def *def)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct auxiliary_device *aux_dev;
+	struct mei_aux_device *adev;
+	int ret;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	adev->irq = heci_gsc->irq;
+	adev->bar.parent = &pdev->resource[0];
+	adev->bar.start = def->bar + pdev->resource[0].start;
+	adev->bar.end = adev->bar.start + def->bar_size - 1;
+	adev->bar.flags = IORESOURCE_MEM;
+	adev->bar.desc = IORES_DESC_NONE;
+	adev->slow_firmware = def->slow_firmware;
+
+	aux_dev = &adev->aux_dev;
+	aux_dev->name = def->name;
+	aux_dev->id = (pci_domain_nr(pdev->bus) << 16) |
+		      PCI_DEVID(pdev->bus->number, pdev->devfn);
+	aux_dev->dev.parent = &pdev->dev;
+	aux_dev->dev.release = heci_gsc_release_dev;
+
+	ret = auxiliary_device_init(aux_dev);
+	if (ret < 0) {
+		drm_err(&xe->drm, "gsc aux init failed %d\n", ret);
+		kfree(adev);
+		return ret;
+	}
+
+	heci_gsc->adev = adev; /* needed by the notifier */
+	ret = auxiliary_device_add(aux_dev);
+	if (ret < 0) {
+		drm_err(&xe->drm, "gsc aux add failed %d\n", ret);
+		heci_gsc->adev = NULL;
+
+		/* adev will be freed with the put_device() and .release sequence */
+		auxiliary_device_uninit(aux_dev);
+	}
+	return ret;
+}
+
+void xe_heci_gsc_init(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	const struct heci_gsc_def *def;
+	int ret;
+
+	if (!HAS_HECI_GSCFI(xe))
+		return;
+
+	heci_gsc->irq = -1;
+
+	if (xe->info.platform == XE_DG2) {
+		def = &heci_gsc_def_dg2;
+	} else if (xe->info.platform == XE_DG1) {
+		def = &heci_gsc_def_dg1;
+	} else {
+		drm_warn_once(&xe->drm, "Unknown platform\n");
+		return;
+	}
+
+	if (!def->name) {
+		drm_warn_once(&xe->drm, "HECI is not implemented!\n");
+		return;
+	}
+
+	if (!def->use_polling) {
+		ret = heci_gsc_irq_setup(xe);
+		if (ret)
+			goto fail;
+	}
+
+	ret = heci_gsc_add_device(xe, def);
+	if (ret)
+		goto fail;
+
+	return;
+fail:
+	xe_heci_gsc_fini(xe);
+}
+
+void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
+{
+	int ret;
+
+	if ((iir & GSC_IRQ_INTF(1)) == 0)
+		return;
+
+	if (!HAS_HECI_GSCFI(xe)) {
+		drm_warn_once(&xe->drm, "GSC irq: not supported");
+		return;
+	}
+
+	if (xe->heci_gsc.irq < 0)
+		return;
+
+	ret = generic_handle_irq(xe->heci_gsc.irq);
+	if (ret)
+		drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
+}
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h
new file mode 100644
index 0000000000000..9db454478faeb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2023, Intel Corporation. All rights reserved.
+ */
+#ifndef __XE_HECI_GSC_DEV_H__
+#define __XE_HECI_GSC_DEV_H__
+
+#include <linux/types.h>
+
+struct xe_device;
+struct mei_aux_device;
+
+/*
+ * The HECI1 bit corresponds to bit15 and HECI2 to bit14.
+ * The reason for this is to allow growth for more interfaces in the future.
+ */
+#define GSC_IRQ_INTF(_x)  BIT(15 - (_x))
+
+/**
+ * struct xe_heci_gsc - graphics security controller for xe, HECI interface
+ *
+ * @adev : pointer to mei auxiliary device structure
+ * @irq : irq number
+ *
+ */
+struct xe_heci_gsc {
+	struct mei_aux_device *adev;
+	int irq;
+};
+
+void xe_heci_gsc_init(struct xe_device *xe);
+void xe_heci_gsc_fini(struct xe_device *xe);
+void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
+
+#endif /* __XE_HECI_GSC_DEV_H__ */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 4cc5f7086b4ce..5631e5e1ea206 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -141,6 +141,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 ccs_mask, bcs_mask;
 	u32 irqs, dmask, smask;
+	u32 gsc_mask = 0;
 
 	if (xe_device_uc_enabled(xe)) {
 		irqs = GT_RENDER_USER_INTERRUPT |
@@ -190,9 +191,13 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
 		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
 
-		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
-			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, irqs);
-			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~irqs);
+		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER))
+			gsc_mask = irqs;
+		else if (HAS_HECI_GSCFI(xe))
+			gsc_mask = GSC_IRQ_INTF(1);
+		if (gsc_mask) {
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask);
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
 		}
 	}
 }
@@ -306,7 +311,11 @@ static void gt_irq_handler(struct xe_tile *tile,
 			}
 
 			if (class == XE_ENGINE_CLASS_OTHER) {
-				gt_other_irq_handler(engine_gt, instance, intr_vec);
+				/* HECI GSCFI interrupts come from outside of GT */
+				if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE)
+					xe_heci_gsc_irq_handler(xe, intr_vec);
+				else
+					gt_other_irq_handler(engine_gt, instance, intr_vec);
 				continue;
 			}
 		}
@@ -480,8 +489,9 @@ static void gt_irq_reset(struct xe_tile *tile)
 	if (ccs_mask & (BIT(2)|BIT(3)))
 		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~0);
 
-	if (tile->media_gt &&
-	    xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) {
+	if ((tile->media_gt &&
+	     xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
+	    HAS_HECI_GSCFI(tile_to_xe(tile))) {
 		xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
 		xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
 	}
-- 
GitLab


From de54bb81d9d43d0b66a63d839963e9d359e0467d Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 16 Oct 2023 09:34:51 -0700
Subject: [PATCH 1975/2340] drm/xe: Make MI_FLUSH_DW immediate size more
 explicit

Despite its name, MI_FLUSH_DW instruction can write an immediate value
of either dword size or qword size, depending on the 'length' field of
the instruction.  Since "length" excludes the first two dwords of the
instruction, a value of 2 in the length field implies a dword write and
a value of 3 implies a qword write.  Even in cases where the flush
instruction's post-sync operation is set to "no write" we're still
expected to size the overall instruction as if we were doing a dword or
qword write (i.e., a length of 1 shouldn't be used on modern platforms).

Rather than baking a size of "1" into the #define and then adding
another unexplained "+ 1" at all the spots where the definition gets
used, lets just create MI_FLUSH_IMM_DW and MI_FLUSH_IMM_QW definitions
that should be OR'd into the instruction header to make it more explicit
what behavior we're requesting.

Bspec: 60229
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231016163449.1300701-9-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  5 ++++-
 drivers/gpu/drm/xe/xe_ring_ops.c          | 10 +++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 21738281bdd04..9432a960346b4 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -30,12 +30,15 @@
 #define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
 
-#define MI_FLUSH_DW		MI_INSTR(0x26, 1)
+#define MI_FLUSH_DW		MI_INSTR(0x26, 0)
 #define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
 #define   MI_INVALIDATE_TLB		(1<<18)
 #define   MI_FLUSH_DW_CCS		(1<<16)
 #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
 #define   MI_FLUSH_DW_USE_GTT		(1<<2)
+#define   MI_FLUSH_LENGTH		GENMASK(5, 0)
+#define   MI_FLUSH_IMM_DW		REG_FIELD_PREP(MI_FLUSH_LENGTH, 2)
+#define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_LENGTH, 3)
 
 #define MI_BATCH_BUFFER_START		MI_INSTR(0x31, 1)
 
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 6eec7c7e4bc56..b95cc7713ff93 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -80,7 +80,7 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
 			       u32 *dw, int i)
 {
-	dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW |
+	dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
 		(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
 	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
 	dw[i++] = 0;
@@ -100,9 +100,9 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
 
 static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
 {
-	dw[i] = MI_FLUSH_DW + 1;
+	dw[i] = MI_FLUSH_DW;
 	dw[i] |= flag;
-	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
 		MI_FLUSH_DW_STORE_INDEX;
 
 	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -365,8 +365,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 
 	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
 
-	dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
-		   MI_FLUSH_DW_OP_STOREDW) + 1;
+	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
+		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW;
 	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
 	dw[i++] = 0;
 	dw[i++] = seqno; /* value */
-- 
GitLab


From e12a64881e97a78694012646cabd211399db8753 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 16 Oct 2023 09:34:52 -0700
Subject: [PATCH 1976/2340] drm/xe: Separate number of registers from MI_LRI
 opcode

Keeping the number of registers to be loaded as a separate macro from
the instruction opcode will simplify some upcoming LRC parsing code.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231016163449.1300701-10-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 +++-
 drivers/gpu/drm/xe/xe_gt.c                | 2 +-
 drivers/gpu/drm/xe/xe_lrc.c               | 2 +-
 drivers/gpu/drm/xe/xe_ring_ops.c          | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 9432a960346b4..ad1e5466671ba 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -25,9 +25,11 @@
 #define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
 #define MI_STORE_DATA_IMM	MI_INSTR(0x20, 0)
 
-#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+#define MI_LOAD_REGISTER_IMM	MI_INSTR(0x22, 0)
 #define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
 #define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
+#define   MI_LRI_LENGTH			GENMASK(5, 0)
+#define   MI_LRI_NUM_REGS(x)		REG_FIELD_PREP(MI_LRI_LENGTH, 2 * (x) - 1)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
 
 #define MI_FLUSH_DW		MI_INSTR(0x26, 0)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index c63e2e4750b1e..a42ee3b9b8c71 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -145,7 +145,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 	if (count) {
 		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
 
-		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
+		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
 
 		xa_for_each(&sr->xa, idx, entry) {
 			struct xe_reg reg = entry->reg;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 35ae6e531d8a5..81463bd5e4904 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -111,7 +111,7 @@ static void set_offsets(u32 *regs,
 		flags = *data >> 6;
 		data++;
 
-		*regs = MI_LOAD_REGISTER_IMM(count);
+		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
 		if (flags & POSTED)
 			*regs |= MI_LRI_FORCE_POSTED;
 		*regs |= MI_LRI_LRM_CS_MMIO;
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index b95cc7713ff93..1e36b07d3e010 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -50,7 +50,7 @@ static u32 preparser_disable(bool state)
 static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
 			      u32 *dw, int i)
 {
-	dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
+	dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
 	dw[i++] = reg.addr + gt->mmio.adj_offset;
 	dw[i++] = AUX_INV;
 	dw[i++] = MI_NOOP;
-- 
GitLab


From 14a1e6a4a460fceae50fc1cf6b50d36c4ba96a7b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 16 Oct 2023 09:34:53 -0700
Subject: [PATCH 1977/2340] drm/xe: Clarify number of dwords/qwords stored by
 MI_STORE_DATA_IMM

MI_STORE_DATA_IMM can store either dword values or qword values, and can
store more than one value if the instruction's length field is large
enough.  Create explicit defines to specify the number of dwords/qwords
to be stored, which will set the instruction length correctly and, if
necessary, turn on the 'store qword' bit.

While we're here, also replace an open-coded version of
MI_STORE_DATA_IMM with the common macros.

Bspec: 60246
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231016163449.1300701-11-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 3 +++
 drivers/gpu/drm/xe/xe_migrate.c           | 9 +++------
 drivers/gpu/drm/xe/xe_ring_ops.c          | 6 ++----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index ad1e5466671ba..8c2e0da694d82 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -24,6 +24,9 @@
 
 #define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
 #define MI_STORE_DATA_IMM	MI_INSTR(0x20, 0)
+#define   MI_SDI_GGTT		REG_BIT(22)
+#define   MI_SDI_NUM_DW(x)	((x) + 1)
+#define   MI_SDI_NUM_QW(x)	(REG_BIT(21) | (2 * (x) + 1))
 
 #define MI_LOAD_REGISTER_IMM	MI_INSTR(0x22, 0)
 #define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 134b078b6feef..b81ef1bdecc6a 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -482,8 +482,7 @@ static void emit_pte(struct xe_migrate *m,
 	while (ptes) {
 		u32 chunk = min(0x1ffU, ptes);
 
-		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
-			(chunk * 2 + 1);
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
 		bb->cs[bb->len++] = ofs;
 		bb->cs[bb->len++] = 0;
 
@@ -1083,8 +1082,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 		if (!(bb->len & 1))
 			bb->cs[bb->len++] = MI_NOOP;
 
-		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
-			(chunk * 2 + 1);
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
 		bb->cs[bb->len++] = lower_32_bits(addr);
 		bb->cs[bb->len++] = upper_32_bits(addr);
 		ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk,
@@ -1290,8 +1288,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		emit_arb_clear(bb);
 
 		/* Map our PT's to gtt */
-		bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
-			(num_updates * 2 + 1);
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates);
 		bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
 		bb->cs[bb->len++] = 0; /* upper_32_bits */
 
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 1e36b07d3e010..da13cc7ba6af7 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -69,7 +69,7 @@ static int emit_user_interrupt(u32 *dw, int i)
 
 static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 {
-	dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
+	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
 	dw[i++] = addr;
 	dw[i++] = 0;
 	dw[i++] = value;
@@ -140,12 +140,10 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
 	return i;
 }
 
-#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
-
 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
 				       u32 *dw, int i)
 {
-	dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED;
+	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
 	dw[i++] = lower_32_bits(addr);
 	dw[i++] = upper_32_bits(addr);
 	dw[i++] = lower_32_bits(value);
-- 
GitLab


From 0134f130e76ad6e323e15ccb00624586c8763075 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 16 Oct 2023 09:34:54 -0700
Subject: [PATCH 1978/2340] drm/xe: Extract MI_* instructions to their own
 header

Extracting the common MI_* instructions that can be used with any engine
to their own header will make it easier as we add additional engine
instructions in upcoming patches.

Also, since the majority of GPU instructions (both MI and non-MI) have
a "length" field in bits 7:0 of the instruction header, a common define
is added for that.  Instruction-specific length fields are still defined
for special case instructions that have larger/smaller length fields.

v2:
 - Use "instr" instead of "inst" as the short form of "instruction"
   everywhere.  (Lucas)
 - Include xe_reg_defs.h instead of the i915 compat header.  (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231016163449.1300701-12-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/xe/instructions/xe_instr_defs.h   | 31 ++++++++++
 .../gpu/drm/xe/instructions/xe_mi_commands.h  | 58 +++++++++++++++++++
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h     | 41 -------------
 drivers/gpu/drm/xe/xe_bb.c                    |  1 +
 drivers/gpu/drm/xe/xe_execlist.c              |  1 +
 drivers/gpu/drm/xe/xe_gt.c                    |  1 +
 drivers/gpu/drm/xe/xe_lrc.c                   |  1 +
 drivers/gpu/drm/xe/xe_migrate.c               |  1 +
 drivers/gpu/drm/xe/xe_ring_ops.c              |  3 +-
 9 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/instructions/xe_instr_defs.h
 create mode 100644 drivers/gpu/drm/xe/instructions/xe_mi_commands.h

diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
new file mode 100644
index 0000000000000..a7ec46395786f
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_INSTR_DEFS_H_
+#define _XE_INSTR_DEFS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * The first dword of any GPU instruction is the "instruction header."  Bits
+ * 31:29 identify the general type of the command and determine how exact
+ * opcodes and sub-opcodes will be encoded in the remaining bits.
+ */
+#define XE_INSTR_CMD_TYPE		GENMASK(31, 29)
+#define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
+
+/*
+ * Most (but not all) instructions have a "length" field in the instruction
+ * header.  The value expected is the total number of dwords for the
+ * instruction, minus two.
+ *
+ * Some instructions have length fields longer or shorter than 8 bits, but
+ * those are rare.  This definition can be used for the common case where
+ * the length field is from 7:0.
+ */
+#define XE_INSTR_LEN_MASK		GENMASK(7, 0)
+#define XE_INSTR_NUM_DW(x)		REG_FIELD_PREP(XE_INSTR_LEN_MASK, (x) - 2)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
new file mode 100644
index 0000000000000..753ebf1efa78c
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MI_COMMANDS_H_
+#define _XE_MI_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/*
+ * MI (Memory Interface) commands are supported by all GT engines.  They
+ * provide general memory operations and command streamer control.  MI commands
+ * have a command type of 0x0 (MI_COMMAND) in bits 31:29 of the instruction
+ * header dword and a specific MI opcode in bits 28:23.
+ */
+
+#define MI_OPCODE			REG_GENMASK(28, 23)
+#define MI_SUBOPCODE			REG_GENMASK(22, 17)  /* used with MI_EXPANSION */
+
+#define __MI_INSTR(opcode) \
+	(XE_INSTR_MI | REG_FIELD_PREP(MI_OPCODE, opcode))
+
+#define MI_NOOP				__MI_INSTR(0x0)
+#define MI_USER_INTERRUPT		__MI_INSTR(0x2)
+#define MI_ARB_CHECK			__MI_INSTR(0x5)
+
+#define MI_ARB_ON_OFF			__MI_INSTR(0x8)
+#define   MI_ARB_ENABLE			REG_BIT(0)
+#define   MI_ARB_DISABLE		0x0
+
+#define MI_BATCH_BUFFER_END		__MI_INSTR(0xA)
+#define MI_STORE_DATA_IMM		__MI_INSTR(0x20)
+#define   MI_SDI_GGTT			REG_BIT(22)
+#define   MI_SDI_LEN_DW			GENMASK(9, 0)
+#define   MI_SDI_NUM_DW(x)		REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
+#define   MI_SDI_NUM_QW(x)		(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
+					 REG_BIT(21))
+
+#define MI_LOAD_REGISTER_IMM		__MI_INSTR(0x22)
+#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
+#define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
+#define   MI_LRI_NUM_REGS(x)		XE_INSTR_NUM_DW(2 * (x) + 1)
+#define   MI_LRI_FORCE_POSTED		REG_BIT(12)
+
+#define MI_FLUSH_DW			__MI_INSTR(0x26)
+#define   MI_FLUSH_DW_STORE_INDEX	REG_BIT(21)
+#define   MI_INVALIDATE_TLB		REG_BIT(18)
+#define   MI_FLUSH_DW_CCS		REG_BIT(16)
+#define   MI_FLUSH_DW_OP_STOREDW	REG_BIT(14)
+#define   MI_FLUSH_DW_LEN_DW		REG_GENMASK(5, 0)
+#define   MI_FLUSH_IMM_DW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 4 - 2)
+#define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2)
+#define   MI_FLUSH_DW_USE_GTT		REG_BIT(2)
+
+#define MI_BATCH_BUFFER_START		__MI_INSTR(0x31)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 8c2e0da694d82..4402f72481dcd 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -8,45 +8,6 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define INSTR_CLIENT_SHIFT      29
-#define   INSTR_MI_CLIENT       0x0
-#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
-
-#define MI_INSTR(opcode, flags) \
-	(__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
-
-#define MI_NOOP			MI_INSTR(0, 0)
-#define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
-
-#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
-#define   MI_ARB_ENABLE			(1<<0)
-#define   MI_ARB_DISABLE		(0<<0)
-
-#define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
-#define MI_STORE_DATA_IMM	MI_INSTR(0x20, 0)
-#define   MI_SDI_GGTT		REG_BIT(22)
-#define   MI_SDI_NUM_DW(x)	((x) + 1)
-#define   MI_SDI_NUM_QW(x)	(REG_BIT(21) | (2 * (x) + 1))
-
-#define MI_LOAD_REGISTER_IMM	MI_INSTR(0x22, 0)
-#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
-#define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
-#define   MI_LRI_LENGTH			GENMASK(5, 0)
-#define   MI_LRI_NUM_REGS(x)		REG_FIELD_PREP(MI_LRI_LENGTH, 2 * (x) - 1)
-#define   MI_LRI_FORCE_POSTED		(1<<12)
-
-#define MI_FLUSH_DW		MI_INSTR(0x26, 0)
-#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
-#define   MI_INVALIDATE_TLB		(1<<18)
-#define   MI_FLUSH_DW_CCS		(1<<16)
-#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
-#define   MI_FLUSH_DW_USE_GTT		(1<<2)
-#define   MI_FLUSH_LENGTH		GENMASK(5, 0)
-#define   MI_FLUSH_IMM_DW		REG_FIELD_PREP(MI_FLUSH_LENGTH, 2)
-#define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_LENGTH, 3)
-
-#define MI_BATCH_BUFFER_START		MI_INSTR(0x31, 1)
-
 #define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
 #define   SRC_ACCESS_TYPE_SHIFT		21
 #define   DST_ACCESS_TYPE_SHIFT		20
@@ -106,6 +67,4 @@
 #define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
 #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 
-#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
-
 #endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index f871ba82bc9ba..7c124475c4289 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -5,6 +5,7 @@
 
 #include "xe_bb.h"
 
+#include "instructions/xe_mi_commands.h"
 #include "regs/xe_gpu_commands.h"
 #include "xe_device.h"
 #include "xe_exec_queue_types.h"
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 22dfe91b2b836..1541fb64949c6 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "instructions/xe_mi_commands.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a42ee3b9b8c71..74e1f47bd4011 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -10,6 +10,7 @@
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
 
+#include "instructions/xe_mi_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_assert.h"
 #include "xe_bb.h"
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 81463bd5e4904..a048671318395 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -5,6 +5,7 @@
 
 #include "xe_lrc.h"
 
+#include "instructions/xe_mi_commands.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index b81ef1bdecc6a..731beb622fe87 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -13,6 +13,7 @@
 #include <drm/xe_drm.h>
 
 #include "generated/xe_wa_oob.h"
+#include "instructions/xe_mi_commands.h"
 #include "regs/xe_gpu_commands.h"
 #include "tests/xe_test.h"
 #include "xe_assert.h"
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index da13cc7ba6af7..58676f4b989f3 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -6,6 +6,7 @@
 #include "xe_ring_ops.h"
 
 #include "generated/xe_wa_oob.h"
+#include "instructions/xe_mi_commands.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
@@ -91,7 +92,7 @@ static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
 
 static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
 {
-	dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag;
+	dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag | XE_INSTR_NUM_DW(3);
 	dw[i++] = lower_32_bits(batch_addr);
 	dw[i++] = upper_32_bits(batch_addr);
 
-- 
GitLab


From 0f60547f7d2c3db16b151540e6697c7d90a9f93b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 16 Oct 2023 09:34:55 -0700
Subject: [PATCH 1979/2340] drm/xe/debugfs: Add dump of default LRCs' MI
 instructions

For non-RCS engines, nearly all of the LRC state is composed of MI
instructions (specifically MI_LOAD_REGISTER_IMM).  Providing a dump
interface allows us to verify that the context image layout matches
what's documented in the bspec, and also allows us to check whether LRC
workarounds are being properly captured by the default state we record
at startup.

For now, the non-MI instructions found in the RCS and CCS engines will
dump as "unknown;" parsing of those will be added in a follow-up patch.

v2:
 - Add raw instruction header as well as decoded meaning.  (Lucas)
 - Check that num_dw isn't greater than remaining_dw for instructions
   that have a "# dwords" field.  (Lucas)
 - Clarify comment about skipping over ppHWSP.  (Lucas)

Bspec: 64993
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231016163449.1300701-13-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/xe/instructions/xe_instr_defs.h   |  1 +
 .../gpu/drm/xe/instructions/xe_mi_commands.h  |  3 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c            | 46 +++++++++
 drivers/gpu/drm/xe/xe_lrc.c                   | 97 +++++++++++++++++++
 drivers/gpu/drm/xe/xe_lrc.h                   |  5 +
 5 files changed, 152 insertions(+)

diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
index a7ec46395786f..e403b4fcc20a8 100644
--- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -15,6 +15,7 @@
  */
 #define XE_INSTR_CMD_TYPE		GENMASK(31, 29)
 #define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
+#define   XE_INSTR_GFXPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
 
 /*
  * Most (but not all) instructions have a "length" field in the instruction
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 753ebf1efa78c..1cfa96167fde3 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -30,6 +30,9 @@
 #define   MI_ARB_DISABLE		0x0
 
 #define MI_BATCH_BUFFER_END		__MI_INSTR(0xA)
+#define MI_TOPOLOGY_FILTER		__MI_INSTR(0xD)
+#define MI_FORCE_WAKEUP			__MI_INSTR(0x1D)
+
 #define MI_STORE_DATA_IMM		__MI_INSTR(0x20)
 #define   MI_SDI_GGTT			REG_BIT(22)
 #define   MI_SDI_LEN_DW			GENMASK(9, 0)
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index cd6d28c7b923a..c4b67cf09f8f2 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -15,6 +15,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
+#include "xe_lrc.h"
 #include "xe_macros.h"
 #include "xe_pat.h"
 #include "xe_reg_sr.h"
@@ -149,6 +150,46 @@ static int pat(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int rcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER);
+	return 0;
+}
+
+static int ccs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE);
+	return 0;
+}
+
+static int bcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY);
+	return 0;
+}
+
+static int vcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE);
+	return 0;
+}
+
+static int vecs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE);
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
@@ -159,6 +200,11 @@ static const struct drm_info_list debugfs_list[] = {
 	{"register-save-restore", register_save_restore, 0},
 	{"workarounds", workarounds, 0},
 	{"pat", pat, 0},
+	{"default_lrc_rcs", rcs_default_lrc},
+	{"default_lrc_ccs", ccs_default_lrc},
+	{"default_lrc_bcs", bcs_default_lrc},
+	{"default_lrc_vcs", vcs_default_lrc},
+	{"default_lrc_vecs", vecs_default_lrc},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index a048671318395..38e98c54464b6 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -16,6 +16,7 @@
 #include "xe_drm_client.h"
 #include "xe_exec_queue_types.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_hw_fence.h"
 #include "xe_map.h"
 #include "xe_vm.h"
@@ -903,3 +904,99 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
 {
 	return __xe_lrc_parallel_map(lrc);
 }
+
+static int instr_dw(u32 cmd_header)
+{
+	/* Most instructions have the # of dwords (minus 2) in 7:0 */
+	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
+}
+
+static int dump_mi_command(struct drm_printer *p,
+			   struct xe_gt *gt,
+			   u32 *dw,
+			   int remaining_dw)
+{
+	u32 inst_header = *dw;
+	u32 numdw = instr_dw(inst_header);
+	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
+	int num_noop;
+
+	/* First check for commands that don't have/use a '# DW' field */
+	switch (inst_header & MI_OPCODE) {
+	case MI_NOOP:
+		num_noop = 1;
+		while (num_noop < remaining_dw &&
+		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
+			num_noop++;
+		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
+		return num_noop;
+
+	case MI_TOPOLOGY_FILTER:
+		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
+		return 1;
+
+	case MI_BATCH_BUFFER_END:
+		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
+		/* Return 'remaining_dw' to consume the rest of the LRC */
+		return remaining_dw;
+	}
+
+	/*
+	 * Any remaining commands include a # of dwords.  We should make sure
+	 * it doesn't exceed the remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (inst_header & MI_OPCODE) {
+	case MI_LOAD_REGISTER_IMM:
+		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
+			   inst_header, (numdw - 1) / 2);
+		for (int i = 1; i < numdw; i += 2)
+			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
+		return numdw;
+
+	case MI_FORCE_WAKEUP:
+		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
+		return numdw;
+
+	default:
+		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
+			   inst_header, opcode, numdw);
+		return numdw;
+	}
+}
+
+void xe_lrc_dump_default(struct drm_printer *p,
+			 struct xe_gt *gt,
+			 enum xe_engine_class hwe_class)
+{
+	u32 *dw;
+	int remaining_dw, num_dw;
+
+	if (!gt->default_lrc[hwe_class]) {
+		drm_printf(p, "No default LRC for class %d\n", hwe_class);
+		return;
+	}
+
+	/*
+	 * Skip the beginning of the LRC since it contains the per-process
+	 * hardware status page.
+	 */
+	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
+	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
+
+	while (remaining_dw > 0) {
+		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
+			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
+		} else {
+			num_dw = min(instr_dw(*dw), remaining_dw);
+			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
+				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
+				   num_dw);
+		}
+
+		dw += num_dw;
+		remaining_dw -= num_dw;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 3a6e8fc5a8370..a7056eda5e0c4 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -7,6 +7,7 @@
 
 #include "xe_lrc_types.h"
 
+struct drm_printer;
 struct xe_device;
 struct xe_exec_queue;
 enum xe_engine_class;
@@ -47,4 +48,8 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
 
 size_t xe_lrc_skip_size(struct xe_device *xe);
 
+void xe_lrc_dump_default(struct drm_printer *p,
+			 struct xe_gt *gt,
+			 enum xe_engine_class);
+
 #endif
-- 
GitLab


From 6de492ae5f5ee6edccf1e1fae472bc5f95cec8e6 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 16 Oct 2023 09:34:56 -0700
Subject: [PATCH 1980/2340] drm/xe/debugfs: Include GFXPIPE commands in LRC
 dump

RCS and CCS engines include several non-register gfxpipe commands in
their LRC images.  Include these in the dump output so that we can see
exactly what's inside the context snapshot.

v2:
 - Include raw instruction header in output
 - Add 3DSTATE_AMFS_TEXTURE_POINTERS and 3DSTATE_MONOFILTER_SIZE.  The
   first was supposed to be removed in Xe_HPG, and the second by
   gen12, but both still show up in the RCS LRC.

v3:
 - Sanity check that we don't have numdw > remaining_dw.  (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231016163449.1300701-14-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../drm/xe/instructions/xe_gfxpipe_commands.h | 108 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_lrc.c                   | 108 ++++++++++++++++++
 2 files changed, 216 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h

diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
new file mode 100644
index 0000000000000..b5fbc761265c1
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GFXPIPE_COMMANDS_H_
+#define _XE_GFXPIPE_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+#define GFXPIPE_PIPELINE		REG_GENMASK(28, 27)
+#define   PIPELINE_COMMON		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x0)
+#define   PIPELINE_SINGLE_DW		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x1)
+#define   PIPELINE_COMPUTE		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x2)
+#define   PIPELINE_3D			REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x3)
+
+#define GFXPIPE_OPCODE			REG_GENMASK(26, 24)
+#define GFXPIPE_SUBOPCODE		REG_GENMASK(23, 16)
+
+#define GFXPIPE_MATCH_MASK		(XE_INSTR_CMD_TYPE | \
+					 GFXPIPE_PIPELINE | \
+					 GFXPIPE_OPCODE | \
+					 GFXPIPE_SUBOPCODE)
+
+#define GFXPIPE_COMMON_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_COMMON | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_SINGLE_DW_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_SINGLE_DW | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_3D_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_3D | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_COMPUTE_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_COMPUTE | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define STATE_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x1)
+#define STATE_SIP				GFXPIPE_COMMON_CMD(0x1, 0x2)
+#define GPGPU_CSR_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x4)
+#define STATE_COMPUTE_MODE			GFXPIPE_COMMON_CMD(0x1, 0x5)
+#define CMD_3DSTATE_BTD				GFXPIPE_COMMON_CMD(0x1, 0x6)
+
+#define CMD_3DSTATE_VF_STATISTICS		GFXPIPE_SINGLE_DW_CMD(0x0, 0xB)
+
+#define PIPELINE_SELECT				GFXPIPE_SINGLE_DW_CMD(0x1, 0x4)
+
+#define CMD_3DSTATE_CLEAR_PARAMS		GFXPIPE_3D_CMD(0x0, 0x4)
+#define CMD_3DSTATE_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x5)
+#define CMD_3DSTATE_STENCIL_BUFFER		GFXPIPE_3D_CMD(0x0, 0x6)
+#define CMD_3DSTATE_HIER_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x7)
+#define CMD_3DSTATE_VERTEX_BUFFERS		GFXPIPE_3D_CMD(0x0, 0x8)
+#define CMD_3DSTATE_INDEX_BUFFER		GFXPIPE_3D_CMD(0x0, 0xA)
+#define CMD_3DSTATE_VF				GFXPIPE_3D_CMD(0x0, 0xC)
+#define CMD_3DSTATE_CC_STATE_POINTERS		GFXPIPE_3D_CMD(0x0, 0xE)
+#define CMD_3DSTATE_WM				GFXPIPE_3D_CMD(0x0, 0x14)
+#define CMD_3DSTATE_SAMPLE_MASK			GFXPIPE_3D_CMD(0x0, 0x18)
+#define CMD_3DSTATE_SBE				GFXPIPE_3D_CMD(0x0, 0x1F)
+#define CMD_3DSTATE_PS				GFXPIPE_3D_CMD(0x0, 0x20)
+#define CMD_3DSTATE_CPS_POINTERS		GFXPIPE_3D_CMD(0x0, 0x22)
+#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC	GFXPIPE_3D_CMD(0x0, 0x23)
+#define CMD_3DSTATE_BLEND_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x24)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2A)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2F)
+#define CMD_3DSTATE_VF_INSTANCING		GFXPIPE_3D_CMD(0x0, 0x49)
+#define CMD_3DSTATE_VF_TOPOLOGY			GFXPIPE_3D_CMD(0x0, 0x4B)
+#define CMD_3DSTATE_WM_CHROMAKEY		GFXPIPE_3D_CMD(0x0, 0x4C)
+#define CMD_3DSTATE_PS_BLEND			GFXPIPE_3D_CMD(0x0, 0x4D)
+#define CMD_3DSTATE_WM_DEPTH_STENCIL		GFXPIPE_3D_CMD(0x0, 0x4E)
+#define CMD_3DSTATE_PS_EXTRA			GFXPIPE_3D_CMD(0x0, 0x4F)
+#define CMD_3DSTATE_SBE_SWIZ			GFXPIPE_3D_CMD(0x0, 0x51)
+#define CMD_3DSTATE_VFG				GFXPIPE_3D_CMD(0x0, 0x57)
+#define CMD_3DSTATE_AMFS			GFXPIPE_3D_CMD(0x0, 0x6F)
+#define CMD_3DSTATE_DEPTH_BOUNDS		GFXPIPE_3D_CMD(0x0, 0x71)
+#define CMD_3DSTATE_AMFS_TEXTURE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x72)
+#define CMD_3DSTATE_CONSTANT_TS_POINTER		GFXPIPE_3D_CMD(0x0, 0x73)
+#define CMD_3DSTATE_MESH_DISTRIB		GFXPIPE_3D_CMD(0x0, 0x78)
+#define CMD_3DSTATE_SBE_MESH			GFXPIPE_3D_CMD(0x0, 0x82)
+#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER	GFXPIPE_3D_CMD(0x0, 0x83)
+
+#define CMD_3DSTATE_CHROMA_KEY			GFXPIPE_3D_CMD(0x1, 0x4)
+#define CMD_3DSTATE_POLY_STIPPLE_OFFSET		GFXPIPE_3D_CMD(0x1, 0x6)
+#define CMD_3DSTATE_POLY_STIPPLE_PATTERN	GFXPIPE_3D_CMD(0x1, 0x7)
+#define CMD_3DSTATE_LINE_STIPPLE		GFXPIPE_3D_CMD(0x1, 0x8)
+#define CMD_3DSTATE_AA_LINE_PARAMETERS		GFXPIPE_3D_CMD(0x1, 0xA)
+#define CMD_3DSTATE_MONOFILTER_SIZE		GFXPIPE_3D_CMD(0x1, 0x11)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_VS	GFXPIPE_3D_CMD(0x1, 0x12)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_HS	GFXPIPE_3D_CMD(0x1, 0x13)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_DS	GFXPIPE_3D_CMD(0x1, 0x14)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_GS	GFXPIPE_3D_CMD(0x1, 0x15)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS	GFXPIPE_3D_CMD(0x1, 0x16)
+#define CMD_3DSTATE_SO_DECL_LIST		GFXPIPE_3D_CMD(0x1, 0x17)
+#define   CMD_3DSTATE_SO_DECL_LIST_DW_LEN	REG_GENMASK(8, 0)
+#define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC	GFXPIPE_3D_CMD(0x1, 0x19)
+#define CMD_3DSTATE_SAMPLE_PATTERN		GFXPIPE_3D_CMD(0x1, 0x1C)
+#define CMD_3DSTATE_3D_MODE			GFXPIPE_3D_CMD(0x1, 0x1E)
+#define CMD_3DSTATE_SUBSLICE_HASH_TABLE		GFXPIPE_3D_CMD(0x1, 0x1F)
+#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS	GFXPIPE_3D_CMD(0x1, 0x20)
+#define CMD_3DSTATE_PTBR_TILE_PASS_INFO		GFXPIPE_3D_CMD(0x1, 0x22)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 38e98c54464b6..332fc0602074e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -6,6 +6,7 @@
 #include "xe_lrc.h"
 
 #include "instructions/xe_mi_commands.h"
+#include "instructions/xe_gfxpipe_commands.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
@@ -907,6 +908,15 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
 
 static int instr_dw(u32 cmd_header)
 {
+	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
+	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
+	    GFXPIPE_SINGLE_DW_CMD(0, 0))
+		return 1;
+
+	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
+	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
+		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
+
 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
 }
@@ -967,6 +977,102 @@ static int dump_mi_command(struct drm_printer *p,
 	}
 }
 
+static int dump_gfxpipe_command(struct drm_printer *p,
+				struct xe_gt *gt,
+				u32 *dw,
+				int remaining_dw)
+{
+	u32 numdw = instr_dw(*dw);
+	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
+	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
+	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
+
+	/*
+	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
+	 * remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (*dw & GFXPIPE_MATCH_MASK) {
+#define MATCH(cmd) \
+	case cmd: \
+		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
+		return numdw
+#define MATCH3D(cmd) \
+	case CMD_##cmd: \
+		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
+		return numdw
+
+	MATCH(STATE_BASE_ADDRESS);
+	MATCH(STATE_SIP);
+	MATCH(GPGPU_CSR_BASE_ADDRESS);
+	MATCH(STATE_COMPUTE_MODE);
+	MATCH3D(3DSTATE_BTD);
+
+	MATCH3D(3DSTATE_VF_STATISTICS);
+
+	MATCH(PIPELINE_SELECT);
+
+	MATCH3D(3DSTATE_CLEAR_PARAMS);
+	MATCH3D(3DSTATE_DEPTH_BUFFER);
+	MATCH3D(3DSTATE_STENCIL_BUFFER);
+	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
+	MATCH3D(3DSTATE_VERTEX_BUFFERS);
+	MATCH3D(3DSTATE_INDEX_BUFFER);
+	MATCH3D(3DSTATE_VF);
+	MATCH3D(3DSTATE_CC_STATE_POINTERS);
+	MATCH3D(3DSTATE_WM);
+	MATCH3D(3DSTATE_SAMPLE_MASK);
+	MATCH3D(3DSTATE_SBE);
+	MATCH3D(3DSTATE_PS);
+	MATCH3D(3DSTATE_CPS_POINTERS);
+	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
+	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
+	MATCH3D(3DSTATE_VF_INSTANCING);
+	MATCH3D(3DSTATE_VF_TOPOLOGY);
+	MATCH3D(3DSTATE_WM_CHROMAKEY);
+	MATCH3D(3DSTATE_PS_BLEND);
+	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
+	MATCH3D(3DSTATE_PS_EXTRA);
+	MATCH3D(3DSTATE_SBE_SWIZ);
+	MATCH3D(3DSTATE_VFG);
+	MATCH3D(3DSTATE_AMFS);
+	MATCH3D(3DSTATE_DEPTH_BOUNDS);
+	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
+	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
+	MATCH3D(3DSTATE_MESH_DISTRIB);
+	MATCH3D(3DSTATE_SBE_MESH);
+	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
+
+	MATCH3D(3DSTATE_CHROMA_KEY);
+	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
+	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
+	MATCH3D(3DSTATE_LINE_STIPPLE);
+	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
+	MATCH3D(3DSTATE_MONOFILTER_SIZE);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+	MATCH3D(3DSTATE_SO_DECL_LIST);
+	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
+	MATCH3D(3DSTATE_SAMPLE_PATTERN);
+	MATCH3D(3DSTATE_3D_MODE);
+	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
+	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
+	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
+
+	default:
+		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
+			   *dw, pipeline, opcode, subopcode, numdw);
+		return numdw;
+	}
+}
+
 void xe_lrc_dump_default(struct drm_printer *p,
 			 struct xe_gt *gt,
 			 enum xe_engine_class hwe_class)
@@ -989,6 +1095,8 @@ void xe_lrc_dump_default(struct drm_printer *p,
 	while (remaining_dw > 0) {
 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
+		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
+			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
 		} else {
 			num_dw = min(instr_dw(*dw), remaining_dw);
 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
-- 
GitLab


From fd47ded2379265b58dd5ae699fa1f5a14e65fdfc Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Wed, 20 Sep 2023 15:29:21 -0400
Subject: [PATCH 1981/2340] drm/xe: Fix array bounds check for queries

Queries are 0-indexed, so a query with value N is invalid if the
ARRAY_SIZE is N. Modify the check to account for that.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index a951205100fea..e0c2203e428e1 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -381,7 +381,7 @@ int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	    XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1]))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
+	if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs)))
 		return -EINVAL;
 
 	idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
-- 
GitLab


From 61d63a59f68c7ab558b020cc675b9f94ef403c5f Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Wed, 20 Sep 2023 15:29:22 -0400
Subject: [PATCH 1982/2340] drm/xe: Set the correct type for
 xe_to_user_engine_class

User engine class is of type u16. Set the same type for the array used to
map xe engines to user engines.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index e0c2203e428e1..cbccd5c3dbc8d 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -19,7 +19,7 @@
 #include "xe_macros.h"
 #include "xe_ttm_vram_mgr.h"
 
-static const enum xe_engine_class xe_to_user_engine_class[] = {
+static const u16 xe_to_user_engine_class[] = {
 	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
 	[XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
 	[XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
-- 
GitLab


From 7793d00d1bf5923e77bbe7ace8089bfdfa19dc38 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Mon, 14 Aug 2023 15:37:34 -0700
Subject: [PATCH 1983/2340] drm/xe: Correlate engine and cpu timestamps with
 better accuracy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Perf measurements rely on CPU and engine timestamps to correlate
events of interest across these time domains. Current mechanisms get
these timestamps separately and the calculated delta between these
timestamps lack enough accuracy.

To improve the accuracy of these time measurements to within a few us,
add a query that returns the engine and cpu timestamps captured as
close to each other as possible.

Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24591

v2:
- Fix kernel-doc warnings (CI)
- Document input params and group them together (Jose)
- s/cs/engine/ (Jose)
- Remove padding in the query (Ashutosh)

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo finished the s/cs/engine renaming]
---
 drivers/gpu/drm/xe/xe_query.c | 138 ++++++++++++++++++++++++++++++++++
 include/uapi/drm/xe_drm.h     | 104 +++++++++++++++++++------
 2 files changed, 218 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index cbccd5c3dbc8d..cd3e0f3208a6c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -6,10 +6,12 @@
 #include "xe_query.h"
 
 #include <linux/nospec.h>
+#include <linux/sched/clock.h>
 
 #include <drm/ttm/ttm_placement.h>
 #include <drm/xe_drm.h>
 
+#include "regs/xe_engine_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
@@ -17,6 +19,7 @@
 #include "xe_gt.h"
 #include "xe_guc_hwconfig.h"
 #include "xe_macros.h"
+#include "xe_mmio.h"
 #include "xe_ttm_vram_mgr.h"
 
 static const u16 xe_to_user_engine_class[] = {
@@ -27,6 +30,14 @@ static const u16 xe_to_user_engine_class[] = {
 	[XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
 };
 
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
 static size_t calc_hw_engine_info_size(struct xe_device *xe)
 {
 	struct xe_hw_engine *hwe;
@@ -45,6 +56,132 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe)
 	return i * sizeof(struct drm_xe_engine_class_instance);
 }
 
+typedef u64 (*__ktime_func_t)(void);
+static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
+{
+	/*
+	 * Use logic same as the perf subsystem to allow user to select the
+	 * reference clock id to be used for timestamps.
+	 */
+	switch (clk_id) {
+	case CLOCK_MONOTONIC:
+		return &ktime_get_ns;
+	case CLOCK_MONOTONIC_RAW:
+		return &ktime_get_raw_ns;
+	case CLOCK_REALTIME:
+		return &ktime_get_real_ns;
+	case CLOCK_BOOTTIME:
+		return &ktime_get_boottime_ns;
+	case CLOCK_TAI:
+		return &ktime_get_clocktai_ns;
+	default:
+		return NULL;
+	}
+}
+
+static void
+__read_timestamps(struct xe_gt *gt,
+		  struct xe_reg lower_reg,
+		  struct xe_reg upper_reg,
+		  u64 *engine_ts,
+		  u64 *cpu_ts,
+		  u64 *cpu_delta,
+		  __ktime_func_t cpu_clock)
+{
+	u32 upper, lower, old_upper, loop = 0;
+
+	upper = xe_mmio_read32(gt, upper_reg);
+	do {
+		*cpu_delta = local_clock();
+		*cpu_ts = cpu_clock();
+		lower = xe_mmio_read32(gt, lower_reg);
+		*cpu_delta = local_clock() - *cpu_delta;
+		old_upper = upper;
+		upper = xe_mmio_read32(gt, upper_reg);
+	} while (upper != old_upper && loop++ < 2);
+
+	*engine_ts = (u64)upper << 32 | lower;
+}
+
+static int
+query_engine_cycles(struct xe_device *xe,
+		    struct drm_xe_device_query *query)
+{
+	struct drm_xe_query_engine_cycles __user *query_ptr;
+	struct drm_xe_engine_class_instance *eci;
+	struct drm_xe_query_engine_cycles resp;
+	size_t size = sizeof(resp);
+	__ktime_func_t cpu_clock;
+	struct xe_hw_engine *hwe;
+	struct xe_gt *gt;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	query_ptr = u64_to_user_ptr(query->data);
+	if (copy_from_user(&resp, query_ptr, size))
+		return -EFAULT;
+
+	cpu_clock = __clock_id_to_func(resp.clockid);
+	if (!cpu_clock)
+		return -EINVAL;
+
+	eci = &resp.eci;
+	if (eci->gt_id > XE_MAX_GT_PER_TILE)
+		return -EINVAL;
+
+	gt = xe_device_get_gt(xe, eci->gt_id);
+	if (!gt)
+		return -EINVAL;
+
+	if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
+		return -EINVAL;
+
+	hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
+			      eci->engine_instance, true);
+	if (!hwe)
+		return -EINVAL;
+
+	resp.engine_frequency = gt->info.clock_freq;
+
+	xe_device_mem_access_get(xe);
+	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+
+	__read_timestamps(gt,
+			  RING_TIMESTAMP(hwe->mmio_base),
+			  RING_TIMESTAMP_UDW(hwe->mmio_base),
+			  &resp.engine_cycles,
+			  &resp.cpu_timestamp,
+			  &resp.cpu_delta,
+			  cpu_clock);
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
+	resp.width = 36;
+
+	/* Only write to the output fields of user query */
+	if (put_user(resp.engine_frequency, &query_ptr->engine_frequency))
+		return -EFAULT;
+
+	if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
+		return -EFAULT;
+
+	if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
+		return -EFAULT;
+
+	if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
+		return -EFAULT;
+
+	if (put_user(resp.width, &query_ptr->width))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int query_engines(struct xe_device *xe,
 			 struct drm_xe_device_query *query)
 {
@@ -369,6 +506,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
 	query_gts,
 	query_hwconfig,
 	query_gt_topology,
+	query_engine_cycles,
 };
 
 int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index d48d8e3c898ce..079213a3df550 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -128,6 +128,25 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
 
+/** struct drm_xe_engine_class_instance - instance of an engine class */
+struct drm_xe_engine_class_instance {
+#define DRM_XE_ENGINE_CLASS_RENDER		0
+#define DRM_XE_ENGINE_CLASS_COPY		1
+#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
+#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
+#define DRM_XE_ENGINE_CLASS_COMPUTE		4
+	/*
+	 * Kernel only class (not actual hardware engine class). Used for
+	 * creating ordered queues of VM bind operations.
+	 */
+#define DRM_XE_ENGINE_CLASS_VM_BIND		5
+	__u16 engine_class;
+
+	__u16 engine_instance;
+	__u16 gt_id;
+	__u16 rsvd;
+};
+
 /**
  * enum drm_xe_memory_class - Supported memory classes.
  */
@@ -219,6 +238,60 @@ struct drm_xe_query_mem_region {
 	__u64 reserved[6];
 };
 
+/**
+ * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
+ *
+ * If a query is made with a struct drm_xe_device_query where .query is equal to
+ * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles
+ * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
+ * .data points to this allocated structure.
+ *
+ * The query returns the engine cycles and the frequency that can
+ * be used to calculate the engine timestamp. In addition the
+ * query returns a set of cpu timestamps that indicate when the command
+ * streamer cycle count was captured.
+ */
+struct drm_xe_query_engine_cycles {
+	/**
+	 * @eci: This is input by the user and is the engine for which command
+	 * streamer cycles is queried.
+	 */
+	struct drm_xe_engine_class_instance eci;
+
+	/**
+	 * @clockid: This is input by the user and is the reference clock id for
+	 * CPU timestamp. For definition, see clock_gettime(2) and
+	 * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
+	 * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
+	 */
+	__s32 clockid;
+
+	/** @width: Width of the engine cycle counter in bits. */
+	__u32 width;
+
+	/**
+	 * @engine_cycles: Engine cycles as read from its register
+	 * at 0x358 offset.
+	 */
+	__u64 engine_cycles;
+
+	/** @engine_frequency: Frequency of the engine cycles in Hz. */
+	__u64 engine_frequency;
+
+	/**
+	 * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
+	 * reading the engine_cycles register using the reference clockid set by the
+	 * user.
+	 */
+	__u64 cpu_timestamp;
+
+	/**
+	 * @cpu_delta: Time delta in ns captured around reading the lower dword
+	 * of the engine_cycles register.
+	 */
+	__u64 cpu_delta;
+};
+
 /**
  * struct drm_xe_query_mem_usage - describe memory regions and usage
  *
@@ -385,12 +458,13 @@ struct drm_xe_device_query {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-#define DRM_XE_DEVICE_QUERY_ENGINES	0
-#define DRM_XE_DEVICE_QUERY_MEM_USAGE	1
-#define DRM_XE_DEVICE_QUERY_CONFIG	2
-#define DRM_XE_DEVICE_QUERY_GTS		3
-#define DRM_XE_DEVICE_QUERY_HWCONFIG	4
-#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY	5
+#define DRM_XE_DEVICE_QUERY_ENGINES		0
+#define DRM_XE_DEVICE_QUERY_MEM_USAGE		1
+#define DRM_XE_DEVICE_QUERY_CONFIG		2
+#define DRM_XE_DEVICE_QUERY_GTS			3
+#define DRM_XE_DEVICE_QUERY_HWCONFIG		4
+#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY		5
+#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
 	/** @query: The type of data to query */
 	__u32 query;
 
@@ -732,24 +806,6 @@ struct drm_xe_exec_queue_set_property {
 	__u64 reserved[2];
 };
 
-/** struct drm_xe_engine_class_instance - instance of an engine class */
-struct drm_xe_engine_class_instance {
-#define DRM_XE_ENGINE_CLASS_RENDER		0
-#define DRM_XE_ENGINE_CLASS_COPY		1
-#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
-#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
-#define DRM_XE_ENGINE_CLASS_COMPUTE		4
-	/*
-	 * Kernel only class (not actual hardware engine class). Used for
-	 * creating ordered queues of VM bind operations.
-	 */
-#define DRM_XE_ENGINE_CLASS_VM_BIND		5
-	__u16 engine_class;
-
-	__u16 engine_instance;
-	__u16 gt_id;
-};
-
 struct drm_xe_exec_queue_create {
 #define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
 	/** @extensions: Pointer to the first extension struct, if any */
-- 
GitLab


From ea0640fc6971f555c8f921e2060376d768685805 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 20 Sep 2023 15:29:24 -0400
Subject: [PATCH 1984/2340] drm/xe/uapi: Separate VM_BIND's operation and flag

Use different members in the drm_xe_vm_bind_op for op and for flags as
it is done in other structures.

Type is left to u32 to leave enough room for future operations and flags.

v2: Remove the XE_VM_BIND_* flags shift (Rodrigo Vivi)

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/303
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 29 ++++++++++++++++-------------
 include/uapi/drm/xe_drm.h  | 14 ++++++++------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 035f3232e3b9d..3ae911ade7e48 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2282,11 +2282,11 @@ static void vm_set_async_error(struct xe_vm *vm, int err)
 }
 
 static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
-				    u64 addr, u64 range, u32 op)
+				    u64 addr, u64 range, u32 op, u32 flags)
 {
 	struct xe_device *xe = vm->xe;
 	struct xe_vma *vma;
-	bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+	bool async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
 
 	lockdep_assert_held(&vm->lock);
 
@@ -2387,7 +2387,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 static struct drm_gpuva_ops *
 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			 u64 bo_offset_or_userptr, u64 addr, u64 range,
-			 u32 operation, u8 tile_mask, u32 region)
+			 u32 operation, u32 flags, u8 tile_mask, u32 region)
 {
 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
 	struct drm_gpuva_ops *ops;
@@ -2416,10 +2416,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 
 			op->tile_mask = tile_mask;
 			op->map.immediate =
-				operation & XE_VM_BIND_FLAG_IMMEDIATE;
+				flags & XE_VM_BIND_FLAG_IMMEDIATE;
 			op->map.read_only =
-				operation & XE_VM_BIND_FLAG_READONLY;
-			op->map.is_null = operation & XE_VM_BIND_FLAG_NULL;
+				flags & XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = flags & XE_VM_BIND_FLAG_NULL;
 		}
 		break;
 	case XE_VM_BIND_OP_UNMAP:
@@ -3236,15 +3236,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u64 range = (*bind_ops)[i].range;
 		u64 addr = (*bind_ops)[i].addr;
 		u32 op = (*bind_ops)[i].op;
+		u32 flags = (*bind_ops)[i].flags;
 		u32 obj = (*bind_ops)[i].obj;
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
 		u32 region = (*bind_ops)[i].region;
-		bool is_null = op & XE_VM_BIND_FLAG_NULL;
+		bool is_null = flags & XE_VM_BIND_FLAG_NULL;
 
 		if (i == 0) {
-			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+			*async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
 		} else if (XE_IOCTL_DBG(xe, !*async) ||
-			   XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
+			   XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) ||
 			   XE_IOCTL_DBG(xe, VM_BIND_OP(op) ==
 					XE_VM_BIND_OP_RESTART)) {
 			err = -EINVAL;
@@ -3265,7 +3266,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 
 		if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) >
 				 XE_VM_BIND_OP_PREFETCH) ||
-		    XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
 		    XE_IOCTL_DBG(xe, obj && is_null) ||
 		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
 		    XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
@@ -3480,8 +3481,9 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
 		u32 op = bind_ops[i].op;
+		u32 flags = bind_ops[i].flags;
 
-		err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
+		err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op, flags);
 		if (err)
 			goto free_syncs;
 	}
@@ -3490,13 +3492,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
 		u32 op = bind_ops[i].op;
+		u32 flags = bind_ops[i].flags;
 		u64 obj_offset = bind_ops[i].obj_offset;
 		u8 tile_mask = bind_ops[i].tile_mask;
 		u32 region = bind_ops[i].region;
 
 		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
-						  addr, range, op, tile_mask,
-						  region);
+						  addr, range, op, flags,
+						  tile_mask, region);
 		if (IS_ERR(ops[i])) {
 			err = PTR_ERR(ops[i]);
 			ops[i] = NULL;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 079213a3df550..46db9334159be 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -660,8 +660,10 @@ struct drm_xe_vm_bind_op {
 #define XE_VM_BIND_OP_RESTART		0x3
 #define XE_VM_BIND_OP_UNMAP_ALL		0x4
 #define XE_VM_BIND_OP_PREFETCH		0x5
+	/** @op: Bind operation to perform */
+	__u32 op;
 
-#define XE_VM_BIND_FLAG_READONLY	(0x1 << 16)
+#define XE_VM_BIND_FLAG_READONLY	(0x1 << 0)
 	/*
 	 * A bind ops completions are always async, hence the support for out
 	 * sync. This flag indicates the allocation of the memory for new page
@@ -686,12 +688,12 @@ struct drm_xe_vm_bind_op {
 	 * configured in the VM and must be set if the VM is configured with
 	 * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state.
 	 */
-#define XE_VM_BIND_FLAG_ASYNC		(0x1 << 17)
+#define XE_VM_BIND_FLAG_ASYNC		(0x1 << 1)
 	/*
 	 * Valid on a faulting VM only, do the MAP operation immediately rather
 	 * than deferring the MAP to the page fault handler.
 	 */
-#define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 18)
+#define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 2)
 	/*
 	 * When the NULL flag is set, the page tables are setup with a special
 	 * bit which indicates writes are dropped and all reads return zero.  In
@@ -699,9 +701,9 @@ struct drm_xe_vm_bind_op {
 	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
 	 * intended to implement VK sparse bindings.
 	 */
-#define XE_VM_BIND_FLAG_NULL		(0x1 << 19)
-	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
-	__u32 op;
+#define XE_VM_BIND_FLAG_NULL		(0x1 << 3)
+	/** @flags: Bind flags */
+	__u32 flags;
 
 	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
 	__u32 region;
-- 
GitLab


From 78ddc872c6a91d8973ca89209793323efaa86345 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 20 Sep 2023 15:29:25 -0400
Subject: [PATCH 1985/2340] drm/xe/vm: Remove VM_BIND_OP macro

This macro was necessary when bind operations were shifted but this
is no longer the case, so removing to simplify code.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 42 +++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 3ae911ade7e48..1a0546beef878 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2273,8 +2273,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 	}
 }
 
-#define VM_BIND_OP(op)	(op & 0xffff)
-
 static void vm_set_async_error(struct xe_vm *vm, int err)
 {
 	lockdep_assert_held(&vm->lock);
@@ -2290,7 +2288,7 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
 
 	lockdep_assert_held(&vm->lock);
 
-	switch (VM_BIND_OP(op)) {
+	switch (op) {
 	case XE_VM_BIND_OP_MAP:
 	case XE_VM_BIND_OP_MAP_USERPTR:
 		vma = xe_vm_find_overlapping_vma(vm, addr, range);
@@ -2400,10 +2398,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 
 	vm_dbg(&vm->xe->drm,
 	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
-	       VM_BIND_OP(operation), (ULL)addr, (ULL)range,
+	       operation, (ULL)addr, (ULL)range,
 	       (ULL)bo_offset_or_userptr);
 
-	switch (VM_BIND_OP(operation)) {
+	switch (operation) {
 	case XE_VM_BIND_OP_MAP:
 	case XE_VM_BIND_OP_MAP_USERPTR:
 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
@@ -3246,50 +3244,48 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 			*async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
 		} else if (XE_IOCTL_DBG(xe, !*async) ||
 			   XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) ||
-			   XE_IOCTL_DBG(xe, VM_BIND_OP(op) ==
-					XE_VM_BIND_OP_RESTART)) {
+			   XE_IOCTL_DBG(xe, op == XE_VM_BIND_OP_RESTART)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
 		if (XE_IOCTL_DBG(xe, !*async &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
+				 op == XE_VM_BIND_OP_UNMAP_ALL)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
 		if (XE_IOCTL_DBG(xe, !*async &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
+				 op == XE_VM_BIND_OP_PREFETCH)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
-		if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) >
-				 XE_VM_BIND_OP_PREFETCH) ||
+		if (XE_IOCTL_DBG(xe, op > XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
 		    XE_IOCTL_DBG(xe, obj && is_null) ||
 		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
-		    XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
+		    XE_IOCTL_DBG(xe, op != XE_VM_BIND_OP_MAP &&
 				 is_null) ||
 		    XE_IOCTL_DBG(xe, !obj &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP &&
+				 op == XE_VM_BIND_OP_MAP &&
 				 !is_null) ||
 		    XE_IOCTL_DBG(xe, !obj &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+				 op == XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, addr &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+				 op == XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, range &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+				 op == XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, obj &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
+				 op == XE_VM_BIND_OP_MAP_USERPTR) ||
 		    XE_IOCTL_DBG(xe, obj &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
+				 op == XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, region &&
-				 VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
+				 op != XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, !(BIT(region) &
 				       xe->info.mem_region_mask)) ||
 		    XE_IOCTL_DBG(xe, obj &&
-				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
+				 op == XE_VM_BIND_OP_UNMAP)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
@@ -3297,9 +3293,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
-		    XE_IOCTL_DBG(xe, !range && VM_BIND_OP(op) !=
+		    XE_IOCTL_DBG(xe, !range && op !=
 				 XE_VM_BIND_OP_RESTART &&
-				 VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
+				 op != XE_VM_BIND_OP_UNMAP_ALL)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
@@ -3363,7 +3359,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto release_vm_lock;
 	}
 
-	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
+	if (bind_ops[0].op == XE_VM_BIND_OP_RESTART) {
 		if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
 			err = -EOPNOTSUPP;
 		if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
-- 
GitLab


From 924e6a9789a05ef01ffdf849aa3a3c75f5a29a8b Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 20 Sep 2023 15:29:26 -0400
Subject: [PATCH 1986/2340] drm/xe/uapi: Remove MMIO ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was previously used in UMD for timestamp correlation, which can now
be done with DRM_XE_QUERY_CS_CYCLES.

Link: https://lore.kernel.org/all/20230706042044.GR6953@mdroper-desk1.amr.corp.intel.com/
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/636
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |   1 -
 drivers/gpu/drm/xe/xe_mmio.c   | 102 ---------------------------------
 drivers/gpu/drm/xe/xe_mmio.h   |   3 -
 include/uapi/drm/xe_drm.h      |  31 ++--------
 4 files changed, 4 insertions(+), 133 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 2bbd3aa2809b6..ae0b7349c3e31 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -121,7 +121,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index e4cf9bfec422e..0da4f75c07bf6 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -429,108 +429,6 @@ int xe_mmio_init(struct xe_device *xe)
 	return 0;
 }
 
-#define VALID_MMIO_FLAGS (\
-	DRM_XE_MMIO_BITS_MASK |\
-	DRM_XE_MMIO_READ |\
-	DRM_XE_MMIO_WRITE)
-
-static const struct xe_reg mmio_read_whitelist[] = {
-	RING_TIMESTAMP(RENDER_RING_BASE),
-};
-
-int xe_mmio_ioctl(struct drm_device *dev, void *data,
-		  struct drm_file *file)
-{
-	struct xe_device *xe = to_xe_device(dev);
-	struct xe_gt *gt = xe_root_mmio_gt(xe);
-	struct drm_xe_mmio *args = data;
-	unsigned int bits_flag, bytes;
-	struct xe_reg reg;
-	bool allowed;
-	int ret = 0;
-
-	if (XE_IOCTL_DBG(xe, args->extensions) ||
-	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value))
-		return -EINVAL;
-
-	allowed = capable(CAP_SYS_ADMIN);
-	if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) {
-		unsigned int i;
-
-		for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
-			if (mmio_read_whitelist[i].addr == args->addr) {
-				allowed = true;
-				break;
-			}
-		}
-	}
-
-	if (XE_IOCTL_DBG(xe, !allowed))
-		return -EPERM;
-
-	bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
-	bytes = 1 << bits_flag;
-	if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size))
-		return -EINVAL;
-
-	/*
-	 * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle
-	 * multicast registers. Steering would need uapi extension.
-	 */
-	reg = XE_REG(args->addr);
-
-	xe_device_mem_access_get(xe);
-	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-
-	if (args->flags & DRM_XE_MMIO_WRITE) {
-		switch (bits_flag) {
-		case DRM_XE_MMIO_32BIT:
-			if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) {
-				ret = -EINVAL;
-				goto exit;
-			}
-			xe_mmio_write32(gt, reg, args->value);
-			break;
-		default:
-			drm_dbg(&xe->drm, "Invalid MMIO bit size");
-			fallthrough;
-		case DRM_XE_MMIO_8BIT: /* TODO */
-		case DRM_XE_MMIO_16BIT: /* TODO */
-			ret = -EOPNOTSUPP;
-			goto exit;
-		}
-	}
-
-	if (args->flags & DRM_XE_MMIO_READ) {
-		switch (bits_flag) {
-		case DRM_XE_MMIO_32BIT:
-			args->value = xe_mmio_read32(gt, reg);
-			break;
-		case DRM_XE_MMIO_64BIT:
-			args->value = xe_mmio_read64_2x32(gt, reg);
-			break;
-		default:
-			drm_dbg(&xe->drm, "Invalid MMIO bit size");
-			fallthrough;
-		case DRM_XE_MMIO_8BIT: /* TODO */
-		case DRM_XE_MMIO_16BIT: /* TODO */
-			ret = -EOPNOTSUPP;
-		}
-	}
-
-exit:
-	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	xe_device_mem_access_put(xe);
-
-	return ret;
-}
-
 /**
  * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
  * @gt: MMIO target GT
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index ae09f777d711d..24a23dad7dce6 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -124,9 +124,6 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask,
 	return ret;
 }
 
-int xe_mmio_ioctl(struct drm_device *dev, void *data,
-		  struct drm_file *file);
-
 static inline bool xe_mmio_in_range(const struct xe_gt *gt,
 				    const struct xe_mmio_range *range,
 				    struct xe_reg reg)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 46db9334159be..ad21ba1d6e0b8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -106,11 +106,10 @@ struct xe_user_extension {
 #define DRM_XE_EXEC_QUEUE_CREATE		0x06
 #define DRM_XE_EXEC_QUEUE_DESTROY		0x07
 #define DRM_XE_EXEC			0x08
-#define DRM_XE_MMIO			0x09
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x0a
-#define DRM_XE_WAIT_USER_FENCE		0x0b
-#define DRM_XE_VM_MADVISE		0x0c
-#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x0d
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x09
+#define DRM_XE_WAIT_USER_FENCE		0x0a
+#define DRM_XE_VM_MADVISE		0x0b
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x0c
 
 /* Must be kept compact -- no holes */
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -123,7 +122,6 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
 #define DRM_IOCTL_XE_EXEC			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
-#define DRM_IOCTL_XE_MMIO			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio)
 #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY	 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
@@ -936,27 +934,6 @@ struct drm_xe_exec {
 	__u64 reserved[2];
 };
 
-struct drm_xe_mmio {
-	/** @extensions: Pointer to the first extension struct, if any */
-	__u64 extensions;
-
-	__u32 addr;
-
-#define DRM_XE_MMIO_8BIT	0x0
-#define DRM_XE_MMIO_16BIT	0x1
-#define DRM_XE_MMIO_32BIT	0x2
-#define DRM_XE_MMIO_64BIT	0x3
-#define DRM_XE_MMIO_BITS_MASK	0x3
-#define DRM_XE_MMIO_READ	0x4
-#define DRM_XE_MMIO_WRITE	0x8
-	__u32 flags;
-
-	__u64 value;
-
-	/** @reserved: Reserved */
-	__u64 reserved[2];
-};
-
 /**
  * struct drm_xe_wait_user_fence - wait user fence
  *
-- 
GitLab


From 5009d554e0d501741de1411db797a593a6fa94bb Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 20 Sep 2023 15:29:27 -0400
Subject: [PATCH 1987/2340] drm/xe: Fix xe_exec_queue_is_idle for parallel exec
 queues

Last little piece to support parallel exec queue is compute mode.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 23789122b5b1c..c88acecc3a90f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -867,8 +867,17 @@ bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
  */
 bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
 {
-	if (XE_WARN_ON(xe_exec_queue_is_parallel(q)))
-		return false;
+	if (xe_exec_queue_is_parallel(q)) {
+		int i;
+
+		for (i = 0; i < q->width; ++i) {
+			if (xe_lrc_seqno(&q->lrc[i]) !=
+			    q->lrc[i].fence_ctx.next_seqno - 1)
+				return false;
+		}
+
+		return true;
+	}
 
 	return xe_lrc_seqno(&q->lrc[0]) ==
 		q->lrc[0].fence_ctx.next_seqno - 1;
-- 
GitLab


From e05c6c9774630702143bf4d35f2a753e61a57622 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 20 Sep 2023 15:29:28 -0400
Subject: [PATCH 1988/2340] drm/xe: Deprecate
 XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE implementation

We are going to remove XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE from the
uAPI, deprecate the implementation first by making
XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE a NOP. After removal of
XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE the proper is simply inherented
from the VM.

v2:
 - Update commit message with explaination of removal (Niranjana)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c       | 57 ++++++------------------
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  6 +--
 2 files changed, 16 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index c88acecc3a90f..b9e645e86b4f4 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -323,39 +323,6 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe,
 static int exec_queue_set_compute_mode(struct xe_device *xe, struct xe_exec_queue *q,
 				       u64 value, bool create)
 {
-	if (XE_IOCTL_DBG(xe, !create))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
-		return -EINVAL;
-
-	if (value) {
-		struct xe_vm *vm = q->vm;
-		int err;
-
-		if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm)))
-			return -EOPNOTSUPP;
-
-		if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm)))
-			return -EOPNOTSUPP;
-
-		if (XE_IOCTL_DBG(xe, q->width != 1))
-			return -EINVAL;
-
-		q->compute.context = dma_fence_context_alloc(1);
-		spin_lock_init(&q->compute.lock);
-
-		err = xe_vm_add_compute_exec_queue(vm, q);
-		if (XE_IOCTL_DBG(xe, err))
-			return err;
-
-		q->flags |= EXEC_QUEUE_FLAG_COMPUTE_MODE;
-		q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
-	}
-
 	return 0;
 }
 
@@ -365,7 +332,7 @@ static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))
+	if (XE_IOCTL_DBG(xe, xe_vm_in_compute_mode(q->vm)))
 		return -EINVAL;
 
 	if (value)
@@ -742,18 +709,21 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		xe_vm_put(vm);
 		if (IS_ERR(q))
 			return PTR_ERR(q);
+
+		if (xe_vm_in_compute_mode(vm)) {
+			q->compute.context = dma_fence_context_alloc(1);
+			spin_lock_init(&q->compute.lock);
+
+			err = xe_vm_add_compute_exec_queue(vm, q);
+			if (XE_IOCTL_DBG(xe, err))
+				goto put_exec_queue;
+		}
 	}
 
 	if (args->extensions) {
 		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
 		if (XE_IOCTL_DBG(xe, err))
-			goto put_exec_queue;
-	}
-
-	if (XE_IOCTL_DBG(xe, q->vm && xe_vm_in_compute_mode(q->vm) !=
-			 !!(q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))) {
-		err = -EOPNOTSUPP;
-		goto put_exec_queue;
+			goto kill_exec_queue;
 	}
 
 	q->persistent.xef = xef;
@@ -762,14 +732,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
 	mutex_unlock(&xef->exec_queue.lock);
 	if (err)
-		goto put_exec_queue;
+		goto kill_exec_queue;
 
 	args->exec_queue_id = id;
 
 	return 0;
 
-put_exec_queue:
+kill_exec_queue:
 	xe_exec_queue_kill(q);
+put_exec_queue:
 	xe_exec_queue_put(q);
 	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 4f4190971dcfc..c4813944b017b 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -73,12 +73,10 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_PERMANENT		BIT(2)
 /* queue keeps running pending jobs after destroy ioctl */
 #define EXEC_QUEUE_FLAG_PERSISTENT		BIT(3)
-/* queue for use with compute VMs */
-#define EXEC_QUEUE_FLAG_COMPUTE_MODE		BIT(4)
 /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
-#define EXEC_QUEUE_FLAG_VM			BIT(5)
+#define EXEC_QUEUE_FLAG_VM			BIT(4)
 /* child of VM queue for multi-tile VM jobs */
-#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(6)
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
-- 
GitLab


From abce4e4b0742f0a0773213144601ea7e18389228 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 20 Sep 2023 15:29:29 -0400
Subject: [PATCH 1989/2340] drm/xe: Rename exec_queue_kill_compute to
 xe_vm_remove_compute_exec_queue

Much better name and aligns with xe_vm_add_compute_exec_queue. As part
of the rename, move the implementation from xe_exec_queue.c to xe_vm.c.

Suggested-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 20 ++------------------
 drivers/gpu/drm/xe/xe_vm.c         | 21 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_vm.h         |  1 +
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index b9e645e86b4f4..d400e2bb37856 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -775,22 +775,6 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
-static void exec_queue_kill_compute(struct xe_exec_queue *q)
-{
-	if (!xe_vm_in_compute_mode(q->vm))
-		return;
-
-	down_write(&q->vm->lock);
-	list_del(&q->compute.link);
-	--q->vm->preempt.num_exec_queues;
-	if (q->compute.pfence) {
-		dma_fence_enable_sw_signaling(q->compute.pfence);
-		dma_fence_put(q->compute.pfence);
-		q->compute.pfence = NULL;
-	}
-	up_write(&q->vm->lock);
-}
-
 /**
  * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
  * @q: The exec_queue
@@ -861,11 +845,11 @@ void xe_exec_queue_kill(struct xe_exec_queue *q)
 	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
 				 multi_gt_link) {
 		q->ops->kill(eq);
-		exec_queue_kill_compute(eq);
+		xe_vm_remove_compute_exec_queue(q->vm, eq);
 	}
 
 	q->ops->kill(q);
-	exec_queue_kill_compute(q);
+	xe_vm_remove_compute_exec_queue(q->vm, q);
 }
 
 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1a0546beef878..d02c0db5e2ae1 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -387,6 +387,27 @@ out_unlock:
 	return err;
 }
 
+/**
+ * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
+ * @vm: The VM.
+ * @q: The exec_queue
+ */
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	if (!xe_vm_in_compute_mode(vm))
+		return;
+
+	down_write(&vm->lock);
+	list_del(&q->compute.link);
+	--vm->preempt.num_exec_queues;
+	if (q->compute.pfence) {
+		dma_fence_enable_sw_signaling(q->compute.pfence);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = NULL;
+	}
+	up_write(&vm->lock);
+}
+
 /**
  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
  * that need repinning.
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 694f9e689b058..59dcbd1adf15a 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -165,6 +165,7 @@ static inline bool xe_vm_no_dma_fences(struct xe_vm *vm)
 }
 
 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
 
 int xe_vm_userptr_pin(struct xe_vm *vm);
 
-- 
GitLab


From bffb2573726beabc8ad70532d5655a976f9053d8 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 20 Sep 2023 15:29:30 -0400
Subject: [PATCH 1990/2340] drm/xe: Remove
 XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE from uAPI

Functionality of XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE deprecated in a
previous patch, drop from uAPI. The property is just simply inherented
from the VM.

v2:
 - Update commit message (Niranjana)

Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c |  7 -------
 include/uapi/drm/xe_drm.h          | 19 ++++++-------------
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index d400e2bb37856..5714a7195349d 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -320,12 +320,6 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe,
 	return q->ops->set_preempt_timeout(q, value);
 }
 
-static int exec_queue_set_compute_mode(struct xe_device *xe, struct xe_exec_queue *q,
-				       u64 value, bool create)
-{
-	return 0;
-}
-
 static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
 				      u64 value, bool create)
 {
@@ -411,7 +405,6 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
 	[XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
 	[XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
 	[XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
-	[XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE] = exec_queue_set_compute_mode,
 	[XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
 	[XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
 	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index ad21ba1d6e0b8..2a9e040247234 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -781,21 +781,14 @@ struct drm_xe_exec_queue_set_property {
 	/** @exec_queue_id: Exec queue ID */
 	__u32 exec_queue_id;
 
-#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY			0
+#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY		0
 #define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
 #define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
-	/*
-	 * Long running or ULLS engine mode. DMA fences not allowed in this
-	 * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves
-	 * as a sanity check the UMD knows what it is doing. Can only be set at
-	 * engine create time.
-	 */
-#define XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE		3
-#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		4
-#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		5
-#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		6
-#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		7
-#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY		8
+#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		3
+#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		4
+#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
+#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
+#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY	7
 	/** @property: property to set */
 	__u32 property;
 
-- 
GitLab


From 5dc079d1a8e5e880ae18b4f4585d7dc28e51e68e Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Wed, 20 Sep 2023 15:29:31 -0400
Subject: [PATCH 1991/2340] drm/xe/uapi: Use common drm_xe_ext_set_property
 extension

There really is no difference between 'struct drm_xe_ext_vm_set_property'
and 'struct drm_xe_ext_exec_queue_set_property', they are extensions which
specify a <property, value> pair. Replace the two extensions with a single
common 'struct drm_xe_ext_set_property' extension. The rationale is that
rather than have each XE module (including future modules) invent their own
property/value extensions, all XE modules use a common set_property
extension when possible.

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c |  2 +-
 drivers/gpu/drm/xe/xe_vm.c         |  2 +-
 include/uapi/drm/xe_drm.h          | 21 +++------------------
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 5714a7195349d..38ce777d0ba88 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -418,7 +418,7 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
 					    bool create)
 {
 	u64 __user *address = u64_to_user_ptr(extension);
-	struct drm_xe_ext_exec_queue_set_property ext;
+	struct drm_xe_ext_set_property ext;
 	int err;
 	u32 idx;
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d02c0db5e2ae1..3d350b27732ff 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2067,7 +2067,7 @@ static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
 				    u64 extension)
 {
 	u64 __user *address = u64_to_user_ptr(extension);
-	struct drm_xe_ext_vm_set_property ext;
+	struct drm_xe_ext_set_property ext;
 	int err;
 
 	err = __copy_from_user(&ext, address, sizeof(ext));
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 2a9e040247234..4987a634afc74 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -569,12 +569,11 @@ struct drm_xe_vm_bind_op_error_capture {
 	__u64 size;
 };
 
-/** struct drm_xe_ext_vm_set_property - VM set property extension */
-struct drm_xe_ext_vm_set_property {
+/** struct drm_xe_ext_set_property - XE set property extension */
+struct drm_xe_ext_set_property {
 	/** @base: base user extension */
 	struct xe_user_extension base;
 
-#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
 	/** @property: property to set */
 	__u32 property;
 
@@ -590,6 +589,7 @@ struct drm_xe_ext_vm_set_property {
 
 struct drm_xe_vm_create {
 #define XE_VM_EXTENSION_SET_PROPERTY	0
+#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
@@ -754,21 +754,6 @@ struct drm_xe_vm_bind {
 	__u64 reserved[2];
 };
 
-/** struct drm_xe_ext_exec_queue_set_property - exec queue set property extension */
-struct drm_xe_ext_exec_queue_set_property {
-	/** @base: base user extension */
-	struct xe_user_extension base;
-
-	/** @property: property to set */
-	__u32 property;
-
-	/** @pad: MBZ */
-	__u32 pad;
-
-	/** @value: property value */
-	__u64 value;
-};
-
 /**
  * struct drm_xe_exec_queue_set_property - exec queue set property
  *
-- 
GitLab


From 7224788f675632956cb9177c039645d72d887cf8 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 20 Sep 2023 15:29:32 -0400
Subject: [PATCH 1992/2340] drm/xe: Kill
 XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This extension is currently not used and it is not aligned with
the error handling on async VM_BIND. Let's remove it and along with
that, since it was the only extension for the vm_create, remove VM
extension entirely.

v2: rebase on top of the removal of drm_xe_ext_exec_queue_set_property

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 129 +------------------------------------
 include/uapi/drm/xe_drm.h  |  23 +------
 2 files changed, 4 insertions(+), 148 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 3d350b27732ff..c7e3b1fbd931f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1531,37 +1531,6 @@ static void flush_async_ops(struct xe_vm *vm)
 	flush_work(&vm->async_ops.work);
 }
 
-static void vm_error_capture(struct xe_vm *vm, int err,
-			     u32 op, u64 addr, u64 size)
-{
-	struct drm_xe_vm_bind_op_error_capture capture;
-	u64 __user *address =
-		u64_to_user_ptr(vm->async_ops.error_capture.addr);
-	bool in_kthread = !current->mm;
-
-	capture.error = err;
-	capture.op = op;
-	capture.addr = addr;
-	capture.size = size;
-
-	if (in_kthread) {
-		if (!mmget_not_zero(vm->async_ops.error_capture.mm))
-			goto mm_closed;
-		kthread_use_mm(vm->async_ops.error_capture.mm);
-	}
-
-	if (copy_to_user(address, &capture, sizeof(capture)))
-		drm_warn(&vm->xe->drm, "Copy to user failed");
-
-	if (in_kthread) {
-		kthread_unuse_mm(vm->async_ops.error_capture.mm);
-		mmput(vm->async_ops.error_capture.mm);
-	}
-
-mm_closed:
-	wake_up_all(&vm->async_ops.error_capture.wq);
-}
-
 static void xe_vm_close(struct xe_vm *vm)
 {
 	down_write(&vm->lock);
@@ -2036,91 +2005,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	return 0;
 }
 
-static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
-					u64 value)
-{
-	if (XE_IOCTL_DBG(xe, !value))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
-		return -EOPNOTSUPP;
-
-	if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr))
-		return -EOPNOTSUPP;
-
-	vm->async_ops.error_capture.mm = current->mm;
-	vm->async_ops.error_capture.addr = value;
-	init_waitqueue_head(&vm->async_ops.error_capture.wq);
-
-	return 0;
-}
-
-typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
-				     u64 value);
-
-static const xe_vm_set_property_fn vm_set_property_funcs[] = {
-	[XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
-		vm_set_error_capture_address,
-};
-
-static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
-				    u64 extension)
-{
-	u64 __user *address = u64_to_user_ptr(extension);
-	struct drm_xe_ext_set_property ext;
-	int err;
-
-	err = __copy_from_user(&ext, address, sizeof(ext));
-	if (XE_IOCTL_DBG(xe, err))
-		return -EFAULT;
-
-	if (XE_IOCTL_DBG(xe, ext.property >=
-			 ARRAY_SIZE(vm_set_property_funcs)) ||
-	    XE_IOCTL_DBG(xe, ext.pad) ||
-	    XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
-		return -EINVAL;
-
-	return vm_set_property_funcs[ext.property](xe, vm, ext.value);
-}
-
-typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
-				       u64 extension);
-
-static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
-	[XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
-};
-
-#define MAX_USER_EXTENSIONS	16
-static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
-			      u64 extensions, int ext_number)
-{
-	u64 __user *address = u64_to_user_ptr(extensions);
-	struct xe_user_extension ext;
-	int err;
-
-	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
-		return -E2BIG;
-
-	err = __copy_from_user(&ext, address, sizeof(ext));
-	if (XE_IOCTL_DBG(xe, err))
-		return -EFAULT;
-
-	if (XE_IOCTL_DBG(xe, ext.pad) ||
-	    XE_IOCTL_DBG(xe, ext.name >=
-			 ARRAY_SIZE(vm_user_extension_funcs)))
-		return -EINVAL;
-
-	err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
-	if (XE_IOCTL_DBG(xe, err))
-		return err;
-
-	if (ext.next_extension)
-		return vm_user_extensions(xe, vm, ext.next_extension,
-					  ++ext_number);
-
-	return 0;
-}
-
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
 				    DRM_XE_VM_CREATE_COMPUTE_MODE | \
 				    DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
@@ -2138,6 +2022,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	int err;
 	u32 flags = 0;
 
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
 	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
 		args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE;
 
@@ -2180,14 +2067,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(vm))
 		return PTR_ERR(vm);
 
-	if (args->extensions) {
-		err = vm_user_extensions(xe, vm, args->extensions, 0);
-		if (XE_IOCTL_DBG(xe, err)) {
-			xe_vm_close_and_put(vm);
-			return err;
-		}
-	}
-
 	mutex_lock(&xef->vm.lock);
 	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
 	mutex_unlock(&xef->vm.lock);
@@ -3087,8 +2966,6 @@ static void xe_vma_op_work_func(struct work_struct *w)
 				vm_set_async_error(vm, err);
 				up_write(&vm->lock);
 
-				if (vm->async_ops.error_capture.addr)
-					vm_error_capture(vm, err, 0, 0, 0);
 				break;
 			}
 			up_write(&vm->lock);
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4987a634afc74..e7cf42c7234bf 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -552,23 +552,6 @@ struct drm_xe_gem_mmap_offset {
 	__u64 reserved[2];
 };
 
-/**
- * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture
- */
-struct drm_xe_vm_bind_op_error_capture {
-	/** @error: errno that occurred */
-	__s32 error;
-
-	/** @op: operation that encounter an error */
-	__u32 op;
-
-	/** @addr: address of bind op */
-	__u64 addr;
-
-	/** @size: size of bind */
-	__u64 size;
-};
-
 /** struct drm_xe_ext_set_property - XE set property extension */
 struct drm_xe_ext_set_property {
 	/** @base: base user extension */
@@ -589,7 +572,6 @@ struct drm_xe_ext_set_property {
 
 struct drm_xe_vm_create {
 #define XE_VM_EXTENSION_SET_PROPERTY	0
-#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
@@ -674,10 +656,7 @@ struct drm_xe_vm_bind_op {
 	 * practice the bind op is good and will complete.
 	 *
 	 * If this flag is set and doesn't return an error, the bind op can
-	 * still fail and recovery is needed. If configured, the bind op that
-	 * caused the error will be captured in drm_xe_vm_bind_op_error_capture.
-	 * Once the user sees the error (via a ufence +
-	 * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory
+	 * still fail and recovery is needed. It should free memory
 	 * via non-async unbinds, and then restart all queued async binds op via
 	 * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
 	 * VM.
-- 
GitLab


From b21ae51dcf41ce12bb8e2a7c989863ee9d04ae4b Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 14 Sep 2023 13:40:49 -0700
Subject: [PATCH 1993/2340] drm/xe/uapi: Kill DRM_XE_UFENCE_WAIT_VM_ERROR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is not used nor does it align VM async document, kill this.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c              |  3 --
 drivers/gpu/drm/xe/xe_vm_types.h        | 11 -------
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 43 +++----------------------
 include/uapi/drm/xe_drm.h               | 17 +++-------
 4 files changed, 9 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c7e3b1fbd931f..3132114d187fb 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1621,9 +1621,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		xe_vma_destroy_unlocked(vma);
 	}
 
-	if (vm->async_ops.error_capture.addr)
-		wake_up_all(&vm->async_ops.error_capture.wq);
-
 	xe_assert(xe, list_empty(&vm->extobj.list));
 	up_write(&vm->lock);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 9a1075a75606d..828ed0fa7e606 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -215,17 +215,6 @@ struct xe_vm {
 		struct work_struct work;
 		/** @lock: protects list of pending async VM ops and fences */
 		spinlock_t lock;
-		/** @error_capture: error capture state */
-		struct {
-			/** @mm: user MM */
-			struct mm_struct *mm;
-			/**
-			 * @addr: user pointer to copy error capture state too
-			 */
-			u64 addr;
-			/** @wq: user fence wait queue for VM errors */
-			wait_queue_head_t wq;
-		} error_capture;
 		/** @fence: fence state */
 		struct {
 			/** @context: context of async fence */
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 3ac4cd24d5b4b..78686908f7fbb 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -13,7 +13,6 @@
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
-#include "xe_vm.h"
 
 static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
 {
@@ -81,8 +80,7 @@ static int check_hw_engines(struct xe_device *xe,
 }
 
 #define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_SOFT_OP | \
-			 DRM_XE_UFENCE_WAIT_ABSTIME | \
-			 DRM_XE_UFENCE_WAIT_VM_ERROR)
+			 DRM_XE_UFENCE_WAIT_ABSTIME)
 #define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
 
 static long to_jiffies_timeout(struct xe_device *xe,
@@ -137,11 +135,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
 	struct drm_xe_engine_class_instance __user *user_eci =
 		u64_to_user_ptr(args->instances);
-	struct xe_vm *vm = NULL;
 	u64 addr = args->addr;
 	int err;
-	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
-		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
+	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP;
 	long timeout;
 	ktime_t start;
 
@@ -162,8 +158,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
-			 addr & 0x7))
+	if (XE_IOCTL_DBG(xe, addr & 0x7))
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
@@ -181,22 +176,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 	}
 
-	if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) {
-		if (XE_IOCTL_DBG(xe, args->vm_id >> 32))
-			return -EINVAL;
-
-		vm = xe_vm_lookup(to_xe_file(file), args->vm_id);
-		if (XE_IOCTL_DBG(xe, !vm))
-			return -ENOENT;
-
-		if (XE_IOCTL_DBG(xe, !vm->async_ops.error_capture.addr)) {
-			xe_vm_put(vm);
-			return -EOPNOTSUPP;
-		}
-
-		addr = vm->async_ops.error_capture.addr;
-	}
-
 	timeout = to_jiffies_timeout(xe, args);
 
 	start = ktime_get();
@@ -207,15 +186,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	 * hardware engine. Open coding as 'do_compare' can sleep which doesn't
 	 * work with the wait_event_* macros.
 	 */
-	if (vm)
-		add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
-	else
-		add_wait_queue(&xe->ufence_wq, &w_wait);
+	add_wait_queue(&xe->ufence_wq, &w_wait);
 	for (;;) {
-		if (vm && xe_vm_is_closed(vm)) {
-			err = -ENODEV;
-			break;
-		}
 		err = do_compare(addr, args->value, args->mask, args->op);
 		if (err <= 0)
 			break;
@@ -232,12 +204,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 
 		timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
 	}
-	if (vm) {
-		remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
-		xe_vm_put(vm);
-	} else {
-		remove_wait_queue(&xe->ufence_wq, &w_wait);
-	}
+	remove_wait_queue(&xe->ufence_wq, &w_wait);
 
 	if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) {
 		args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start));
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index e7cf42c7234bf..f13974f17be9d 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -905,18 +905,10 @@ struct drm_xe_wait_user_fence {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	union {
-		/**
-		 * @addr: user pointer address to wait on, must qword aligned
-		 */
-		__u64 addr;
-
-		/**
-		 * @vm_id: The ID of the VM which encounter an error used with
-		 * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear.
-		 */
-		__u64 vm_id;
-	};
+	/**
+	 * @addr: user pointer address to wait on, must qword aligned
+	 */
+	__u64 addr;
 
 #define DRM_XE_UFENCE_WAIT_EQ	0
 #define DRM_XE_UFENCE_WAIT_NEQ	1
@@ -929,7 +921,6 @@ struct drm_xe_wait_user_fence {
 
 #define DRM_XE_UFENCE_WAIT_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
 #define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
-#define DRM_XE_UFENCE_WAIT_VM_ERROR	(1 << 2)
 	/** @flags: wait flags */
 	__u16 flags;
 
-- 
GitLab


From f3e9b1f43458746e7e0211dbe4289412e5c0d16a Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 14 Sep 2023 13:40:50 -0700
Subject: [PATCH 1994/2340] drm/xe: Remove async worker and rework sync binds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Async worker is gone. All jobs and memory allocations done in IOCTL to
align with dma fencing rules.

Async vs. sync now means when do bind operations complete relative to
the IOCTL. Async completes when out-syncs signal while sync completes
when the IOCTL returns. In-syncs and out-syncs are only allowed in async
mode.

If memory allocations fail in the job creation step the VM is killed.
This is temporary, eventually a proper unwind will be done and VM will
be usable.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c             |  43 --
 drivers/gpu/drm/xe/xe_exec_queue.c       |   7 +-
 drivers/gpu/drm/xe/xe_exec_queue_types.h |   2 +
 drivers/gpu/drm/xe/xe_sync.c             |  14 +-
 drivers/gpu/drm/xe/xe_sync.h             |   2 +-
 drivers/gpu/drm/xe/xe_vm.c               | 535 +++++------------------
 drivers/gpu/drm/xe/xe_vm.h               |   2 -
 drivers/gpu/drm/xe/xe_vm_types.h         |   7 +-
 include/uapi/drm/xe_drm.h                |  33 +-
 9 files changed, 127 insertions(+), 518 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 7cf4215b2b2e7..85a8a793f527b 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -196,27 +196,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 	}
 
-	/*
-	 * We can't install a job into the VM dma-resv shared slot before an
-	 * async VM bind passed in as a fence without the risk of deadlocking as
-	 * the bind can trigger an eviction which in turn depends on anything in
-	 * the VM dma-resv shared slots. Not an ideal solution, but we wait for
-	 * all dependent async VM binds to start (install correct fences into
-	 * dma-resv slots) before moving forward.
-	 */
-	if (!xe_vm_no_dma_fences(vm) &&
-	    vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
-		for (i = 0; i < args->num_syncs; i++) {
-			struct dma_fence *fence = syncs[i].fence;
-
-			if (fence) {
-				err = xe_vm_async_fence_wait_start(fence);
-				if (err)
-					goto err_syncs;
-			}
-		}
-	}
-
 retry:
 	if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
 		err = down_write_killable(&vm->lock);
@@ -229,28 +208,6 @@ retry:
 	if (err)
 		goto err_syncs;
 
-	/* We don't allow execs while the VM is in error state */
-	if (vm->async_ops.error) {
-		err = vm->async_ops.error;
-		goto err_unlock_list;
-	}
-
-	/*
-	 * Extreme corner where we exit a VM error state with a munmap style VM
-	 * unbind inflight which requires a rebind. In this case the rebind
-	 * needs to install some fences into the dma-resv slots. The worker to
-	 * do this queued, let that worker make progress by dropping vm->lock,
-	 * flushing the worker and retrying the exec.
-	 */
-	if (vm->async_ops.munmap_rebind_inflight) {
-		if (write_locked)
-			up_write(&vm->lock);
-		else
-			up_read(&vm->lock);
-		flush_work(&vm->async_ops.work);
-		goto retry;
-	}
-
 	if (write_locked) {
 		err = xe_vm_userptr_pin(vm);
 		downgrade_write(&vm->lock);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 38ce777d0ba88..9b373b9ea4724 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -621,7 +621,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
 		return -EINVAL;
 
-	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+	if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) {
+		bool sync = eci[0].engine_class ==
+			DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
+
 		for_each_gt(gt, xe, id) {
 			struct xe_exec_queue *new;
 
@@ -647,6 +650,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 						   args->width, hwe,
 						   EXEC_QUEUE_FLAG_PERSISTENT |
 						   EXEC_QUEUE_FLAG_VM |
+						   (sync ? 0 :
+						    EXEC_QUEUE_FLAG_VM_ASYNC) |
 						   (id ?
 						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
 						    0));
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index c4813944b017b..4e382304010eb 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -77,6 +77,8 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_VM			BIT(4)
 /* child of VM queue for multi-tile VM jobs */
 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+/* VM jobs for this queue are asynchronous */
+#define EXEC_QUEUE_FLAG_VM_ASYNC		BIT(6)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 9fcd7802ba30b..73ef259aa3873 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -18,7 +18,6 @@
 #include "xe_sched_job_types.h"
 
 #define SYNC_FLAGS_TYPE_MASK 0x3
-#define SYNC_FLAGS_FENCE_INSTALLED	0x10000
 
 struct user_fence {
 	struct xe_device *xe;
@@ -223,12 +222,11 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
 	return 0;
 }
 
-bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
+void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
 			  struct dma_fence *fence)
 {
-	if (!(sync->flags & DRM_XE_SYNC_SIGNAL) ||
-	    sync->flags & SYNC_FLAGS_FENCE_INSTALLED)
-		return false;
+	if (!(sync->flags & DRM_XE_SYNC_SIGNAL))
+		return;
 
 	if (sync->chain_fence) {
 		drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
@@ -260,12 +258,6 @@ bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
 		job->user_fence.addr = sync->addr;
 		job->user_fence.value = sync->timeline_value;
 	}
-
-	/* TODO: external BO? */
-
-	sync->flags |= SYNC_FLAGS_FENCE_INSTALLED;
-
-	return true;
 }
 
 void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 4cbcf7a199110..30958ddc4cdc3 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -19,7 +19,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 int xe_sync_entry_wait(struct xe_sync_entry *sync);
 int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
 			   struct xe_sched_job *job);
-bool xe_sync_entry_signal(struct xe_sync_entry *sync,
+void xe_sync_entry_signal(struct xe_sync_entry *sync,
 			  struct xe_sched_job *job,
 			  struct dma_fence *fence);
 void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 3132114d187fb..89df50f49e11f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -592,7 +592,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	unsigned int fence_count = 0;
 	LIST_HEAD(preempt_fences);
 	ktime_t end = 0;
-	int err;
+	int err = 0;
 	long wait;
 	int __maybe_unused tries = 0;
 
@@ -608,22 +608,6 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	}
 
 retry:
-	if (vm->async_ops.error)
-		goto out_unlock_outer;
-
-	/*
-	 * Extreme corner where we exit a VM error state with a munmap style VM
-	 * unbind inflight which requires a rebind. In this case the rebind
-	 * needs to install some fences into the dma-resv slots. The worker to
-	 * do this queued, let that worker make progress by dropping vm->lock
-	 * and trying this again.
-	 */
-	if (vm->async_ops.munmap_rebind_inflight) {
-		up_write(&vm->lock);
-		flush_work(&vm->async_ops.work);
-		goto retry;
-	}
-
 	if (xe_vm_userptr_check_repin(vm)) {
 		err = xe_vm_userptr_pin(vm);
 		if (err)
@@ -1357,7 +1341,6 @@ static const struct xe_pt_ops xelp_pt_ops = {
 	.pde_encode_bo = xelp_pde_encode_bo,
 };
 
-static void xe_vma_op_work_func(struct work_struct *w);
 static void vm_destroy_work_func(struct work_struct *w);
 
 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
@@ -1390,10 +1373,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	INIT_LIST_HEAD(&vm->notifier.rebind_list);
 	spin_lock_init(&vm->notifier.list_lock);
 
-	INIT_LIST_HEAD(&vm->async_ops.pending);
-	INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
-	spin_lock_init(&vm->async_ops.lock);
-
 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
 
 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
@@ -1458,11 +1437,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		vm->batch_invalidate_tlb = false;
 	}
 
-	if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
-		vm->async_ops.fence.context = dma_fence_context_alloc(1);
-		vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
-	}
-
 	/* Fill pt_root after allocating scratch tables */
 	for_each_tile(tile, xe, id) {
 		if (!vm->pt_root[id])
@@ -1478,6 +1452,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 			struct xe_gt *gt = tile->primary_gt;
 			struct xe_vm *migrate_vm;
 			struct xe_exec_queue *q;
+			u32 create_flags = EXEC_QUEUE_FLAG_VM |
+				((flags & XE_VM_FLAG_ASYNC_DEFAULT) ?
+				EXEC_QUEUE_FLAG_VM_ASYNC : 0);
 
 			if (!vm->pt_root[id])
 				continue;
@@ -1485,7 +1462,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 			migrate_vm = xe_migrate_get_vm(tile->migrate);
 			q = xe_exec_queue_create_class(xe, gt, migrate_vm,
 						       XE_ENGINE_CLASS_COPY,
-						       EXEC_QUEUE_FLAG_VM);
+						       create_flags);
 			xe_vm_put(migrate_vm);
 			if (IS_ERR(q)) {
 				err = PTR_ERR(q);
@@ -1525,12 +1502,6 @@ err_no_resv:
 	return ERR_PTR(err);
 }
 
-static void flush_async_ops(struct xe_vm *vm)
-{
-	queue_work(system_unbound_wq, &vm->async_ops.work);
-	flush_work(&vm->async_ops.work);
-}
-
 static void xe_vm_close(struct xe_vm *vm)
 {
 	down_write(&vm->lock);
@@ -1550,7 +1521,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	xe_assert(xe, !vm->preempt.num_exec_queues);
 
 	xe_vm_close(vm);
-	flush_async_ops(vm);
 	if (xe_vm_in_compute_mode(vm))
 		flush_work(&vm->preempt.rebind_work);
 
@@ -1761,10 +1731,8 @@ next:
 
 err_fences:
 	if (fences) {
-		while (cur_fence) {
-			/* FIXME: Rewind the previous binds? */
+		while (cur_fence)
 			dma_fence_put(fences[--cur_fence]);
-		}
 		kfree(fences);
 	}
 
@@ -1838,100 +1806,24 @@ next:
 
 err_fences:
 	if (fences) {
-		while (cur_fence) {
-			/* FIXME: Rewind the previous binds? */
+		while (cur_fence)
 			dma_fence_put(fences[--cur_fence]);
-		}
 		kfree(fences);
 	}
 
 	return ERR_PTR(err);
 }
 
-struct async_op_fence {
-	struct dma_fence fence;
-	struct dma_fence *wait_fence;
-	struct dma_fence_cb cb;
-	struct xe_vm *vm;
-	wait_queue_head_t wq;
-	bool started;
-};
-
-static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
-{
-	return "xe";
-}
-
-static const char *
-async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
-{
-	return "async_op_fence";
-}
-
-static const struct dma_fence_ops async_op_fence_ops = {
-	.get_driver_name = async_op_fence_get_driver_name,
-	.get_timeline_name = async_op_fence_get_timeline_name,
-};
-
-static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
-{
-	struct async_op_fence *afence =
-		container_of(cb, struct async_op_fence, cb);
-
-	afence->fence.error = afence->wait_fence->error;
-	dma_fence_signal(&afence->fence);
-	xe_vm_put(afence->vm);
-	dma_fence_put(afence->wait_fence);
-	dma_fence_put(&afence->fence);
-}
-
-static void add_async_op_fence_cb(struct xe_vm *vm,
-				  struct dma_fence *fence,
-				  struct async_op_fence *afence)
+static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_exec_queue *q)
 {
-	int ret;
-
-	if (!xe_vm_no_dma_fences(vm)) {
-		afence->started = true;
-		smp_wmb();
-		wake_up_all(&afence->wq);
-	}
-
-	afence->wait_fence = dma_fence_get(fence);
-	afence->vm = xe_vm_get(vm);
-	dma_fence_get(&afence->fence);
-	ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
-	if (ret == -ENOENT) {
-		afence->fence.error = afence->wait_fence->error;
-		dma_fence_signal(&afence->fence);
-	}
-	if (ret) {
-		xe_vm_put(vm);
-		dma_fence_put(afence->wait_fence);
-		dma_fence_put(&afence->fence);
-	}
-	XE_WARN_ON(ret && ret != -ENOENT);
-}
-
-int xe_vm_async_fence_wait_start(struct dma_fence *fence)
-{
-	if (fence->ops == &async_op_fence_ops) {
-		struct async_op_fence *afence =
-			container_of(fence, struct async_op_fence, fence);
-
-		xe_assert(afence->vm->xe, !xe_vm_no_dma_fences(afence->vm));
-
-		smp_rmb();
-		return wait_event_interruptible(afence->wq, afence->started);
-	}
-
-	return 0;
+	return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) :
+		!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT);
 }
 
 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
-			u32 num_syncs, struct async_op_fence *afence,
-			bool immediate, bool first_op, bool last_op)
+			u32 num_syncs, bool immediate, bool first_op,
+			bool last_op)
 {
 	struct dma_fence *fence;
 
@@ -1953,17 +1845,18 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 				xe_sync_entry_signal(&syncs[i], NULL, fence);
 		}
 	}
-	if (afence)
-		add_async_op_fence_cb(vm, fence, afence);
 
+	if (last_op && xe_vm_sync_mode(vm, q))
+		dma_fence_wait(fence, true);
 	dma_fence_put(fence);
+
 	return 0;
 }
 
 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 		      struct xe_bo *bo, struct xe_sync_entry *syncs,
-		      u32 num_syncs, struct async_op_fence *afence,
-		      bool immediate, bool first_op, bool last_op)
+		      u32 num_syncs, bool immediate, bool first_op,
+		      bool last_op)
 {
 	int err;
 
@@ -1976,14 +1869,13 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue
 			return err;
 	}
 
-	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, afence, immediate,
-			    first_op, last_op);
+	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
+			    last_op);
 }
 
 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
-			u32 num_syncs, struct async_op_fence *afence,
-			bool first_op, bool last_op)
+			u32 num_syncs, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 
@@ -1993,10 +1885,10 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
-	if (afence)
-		add_async_op_fence_cb(vm, fence, afence);
 
 	xe_vma_destroy(vma, fence);
+	if (last_op && xe_vm_sync_mode(vm, q))
+		dma_fence_wait(fence, true);
 	dma_fence_put(fence);
 
 	return 0;
@@ -2004,7 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
 				    DRM_XE_VM_CREATE_COMPUTE_MODE | \
-				    DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
+				    DRM_XE_VM_CREATE_ASYNC_DEFAULT | \
 				    DRM_XE_VM_CREATE_FAULT_MODE)
 
 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -2051,12 +1943,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 			 xe_device_in_fault_mode(xe)))
 		return -EINVAL;
 
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
 	if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
 	if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
 		flags |= XE_VM_FLAG_COMPUTE_MODE;
-	if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
-		flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+	if (args->flags & DRM_XE_VM_CREATE_ASYNC_DEFAULT)
+		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
 	if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
 		flags |= XE_VM_FLAG_FAULT_MODE;
 
@@ -2139,8 +2034,7 @@ static const u32 region_to_mem_type[] = {
 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 			  struct xe_exec_queue *q, u32 region,
 			  struct xe_sync_entry *syncs, u32 num_syncs,
-			  struct async_op_fence *afence, bool first_op,
-			  bool last_op)
+			  bool first_op, bool last_op)
 {
 	int err;
 
@@ -2154,7 +2048,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
-				  afence, true, first_op, last_op);
+				  true, first_op, last_op);
 	} else {
 		int i;
 
@@ -2164,55 +2058,9 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 				xe_sync_entry_signal(&syncs[i], NULL,
 						     dma_fence_get_stub());
 		}
-		if (afence)
-			dma_fence_signal(&afence->fence);
-		return 0;
-	}
-}
-
-static void vm_set_async_error(struct xe_vm *vm, int err)
-{
-	lockdep_assert_held(&vm->lock);
-	vm->async_ops.error = err;
-}
-
-static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
-				    u64 addr, u64 range, u32 op, u32 flags)
-{
-	struct xe_device *xe = vm->xe;
-	struct xe_vma *vma;
-	bool async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
-
-	lockdep_assert_held(&vm->lock);
 
-	switch (op) {
-	case XE_VM_BIND_OP_MAP:
-	case XE_VM_BIND_OP_MAP_USERPTR:
-		vma = xe_vm_find_overlapping_vma(vm, addr, range);
-		if (XE_IOCTL_DBG(xe, vma && !async))
-			return -EBUSY;
-		break;
-	case XE_VM_BIND_OP_UNMAP:
-	case XE_VM_BIND_OP_PREFETCH:
-		vma = xe_vm_find_overlapping_vma(vm, addr, range);
-		if (XE_IOCTL_DBG(xe, !vma))
-			/* Not an actual error, IOCTL cleans up returns and 0 */
-			return -ENODATA;
-		if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
-				      xe_vma_end(vma) != addr + range) && !async))
-			return -EINVAL;
-		break;
-	case XE_VM_BIND_OP_UNMAP_ALL:
-		if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
-			/* Not an actual error, IOCTL cleans up returns and 0 */
-			return -ENODATA;
-		break;
-	default:
-		drm_warn(&xe->drm, "NOT POSSIBLE");
-		return -EINVAL;
+		return 0;
 	}
-
-	return 0;
 }
 
 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
@@ -2509,37 +2357,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				   bool async)
 {
 	struct xe_vma_op *last_op = NULL;
-	struct async_op_fence *fence = NULL;
 	struct drm_gpuva_op *__op;
 	int err = 0;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	if (last && num_syncs && async) {
-		u64 seqno;
-
-		fence = kmalloc(sizeof(*fence), GFP_KERNEL);
-		if (!fence)
-			return -ENOMEM;
-
-		seqno = q ? ++q->bind.fence_seqno : ++vm->async_ops.fence.seqno;
-		dma_fence_init(&fence->fence, &async_op_fence_ops,
-			       &vm->async_ops.lock, q ? q->bind.fence_ctx :
-			       vm->async_ops.fence.context, seqno);
-
-		if (!xe_vm_no_dma_fences(vm)) {
-			fence->vm = vm;
-			fence->started = false;
-			init_waitqueue_head(&fence->wq);
-		}
-	}
-
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 		bool first = list_empty(ops_list);
 
-		xe_assert(vm->xe, first || async);
-
 		INIT_LIST_HEAD(&op->link);
 		list_add_tail(&op->link, ops_list);
 
@@ -2559,10 +2385,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 			vma = new_vma(vm, &op->base.map,
 				      op->tile_mask, op->map.read_only,
 				      op->map.is_null);
-			if (IS_ERR(vma)) {
-				err = PTR_ERR(vma);
-				goto free_fence;
-			}
+			if (IS_ERR(vma))
+				return PTR_ERR(vma);
 
 			op->map.vma = vma;
 			break;
@@ -2587,10 +2411,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				vma = new_vma(vm, op->base.remap.prev,
 					      op->tile_mask, read_only,
 					      is_null);
-				if (IS_ERR(vma)) {
-					err = PTR_ERR(vma);
-					goto free_fence;
-				}
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
 
 				op->remap.prev = vma;
 
@@ -2623,10 +2445,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				vma = new_vma(vm, op->base.remap.next,
 					      op->tile_mask, read_only,
 					      is_null);
-				if (IS_ERR(vma)) {
-					err = PTR_ERR(vma);
-					goto free_fence;
-				}
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
 
 				op->remap.next = vma;
 
@@ -2658,27 +2478,23 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 
 		err = xe_vma_op_commit(vm, op);
 		if (err)
-			goto free_fence;
+			return err;
 	}
 
 	/* FIXME: Unhandled corner case */
 	XE_WARN_ON(!last_op && last && !list_empty(ops_list));
 
 	if (!last_op)
-		goto free_fence;
+		return 0;
+
 	last_op->ops = ops;
 	if (last) {
 		last_op->flags |= XE_VMA_OP_LAST;
 		last_op->num_syncs = num_syncs;
 		last_op->syncs = syncs;
-		last_op->fence = fence;
 	}
 
 	return 0;
-
-free_fence:
-	kfree(fence);
-	return err;
 }
 
 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
@@ -2698,7 +2514,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
 		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
-				 op->syncs, op->num_syncs, op->fence,
+				 op->syncs, op->num_syncs,
 				 op->map.immediate || !xe_vm_in_fault_mode(vm),
 				 op->flags & XE_VMA_OP_FIRST,
 				 op->flags & XE_VMA_OP_LAST);
@@ -2709,16 +2525,13 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 		bool next = !!op->remap.next;
 
 		if (!op->remap.unmap_done) {
-			if (prev || next) {
-				vm->async_ops.munmap_rebind_inflight = true;
+			if (prev || next)
 				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
-			}
 			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
 					   op->num_syncs,
-					   !prev && !next ? op->fence : NULL,
 					   op->flags & XE_VMA_OP_FIRST,
-					   op->flags & XE_VMA_OP_LAST && !prev &&
-					   !next);
+					   op->flags & XE_VMA_OP_LAST &&
+					   !prev && !next);
 			if (err)
 				break;
 			op->remap.unmap_done = true;
@@ -2728,8 +2541,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
 			err = xe_vm_bind(vm, op->remap.prev, op->q,
 					 xe_vma_bo(op->remap.prev), op->syncs,
-					 op->num_syncs,
-					 !next ? op->fence : NULL, true, false,
+					 op->num_syncs, true, false,
 					 op->flags & XE_VMA_OP_LAST && !next);
 			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (err)
@@ -2742,26 +2554,24 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 			err = xe_vm_bind(vm, op->remap.next, op->q,
 					 xe_vma_bo(op->remap.next),
 					 op->syncs, op->num_syncs,
-					 op->fence, true, false,
+					 true, false,
 					 op->flags & XE_VMA_OP_LAST);
 			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (err)
 				break;
 			op->remap.next = NULL;
 		}
-		vm->async_ops.munmap_rebind_inflight = false;
 
 		break;
 	}
 	case DRM_GPUVA_OP_UNMAP:
 		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
-				   op->num_syncs, op->fence,
-				   op->flags & XE_VMA_OP_FIRST,
+				   op->num_syncs, op->flags & XE_VMA_OP_FIRST,
 				   op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
 		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
-				     op->syncs, op->num_syncs, op->fence,
+				     op->syncs, op->num_syncs,
 				     op->flags & XE_VMA_OP_FIRST,
 				     op->flags & XE_VMA_OP_LAST);
 		break;
@@ -2860,14 +2670,9 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
 		kfree(op->syncs);
 		if (op->q)
 			xe_exec_queue_put(op->q);
-		if (op->fence)
-			dma_fence_put(&op->fence->fence);
 	}
-	if (!list_empty(&op->link)) {
-		spin_lock_irq(&vm->async_ops.lock);
+	if (!list_empty(&op->link))
 		list_del(&op->link);
-		spin_unlock_irq(&vm->async_ops.lock);
-	}
 	if (op->ops)
 		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
 	if (last)
@@ -2929,129 +2734,6 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 	}
 }
 
-static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
-{
-	return list_first_entry_or_null(&vm->async_ops.pending,
-					struct xe_vma_op, link);
-}
-
-static void xe_vma_op_work_func(struct work_struct *w)
-{
-	struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
-
-	for (;;) {
-		struct xe_vma_op *op;
-		int err;
-
-		if (vm->async_ops.error && !xe_vm_is_closed(vm))
-			break;
-
-		spin_lock_irq(&vm->async_ops.lock);
-		op = next_vma_op(vm);
-		spin_unlock_irq(&vm->async_ops.lock);
-
-		if (!op)
-			break;
-
-		if (!xe_vm_is_closed(vm)) {
-			down_write(&vm->lock);
-			err = xe_vma_op_execute(vm, op);
-			if (err) {
-				drm_warn(&vm->xe->drm,
-					 "Async VM op(%d) failed with %d",
-					 op->base.op, err);
-				vm_set_async_error(vm, err);
-				up_write(&vm->lock);
-
-				break;
-			}
-			up_write(&vm->lock);
-		} else {
-			struct xe_vma *vma;
-
-			switch (op->base.op) {
-			case DRM_GPUVA_OP_REMAP:
-				vma = gpuva_to_vma(op->base.remap.unmap->va);
-				trace_xe_vma_flush(vma);
-
-				down_write(&vm->lock);
-				xe_vma_destroy_unlocked(vma);
-				up_write(&vm->lock);
-				break;
-			case DRM_GPUVA_OP_UNMAP:
-				vma = gpuva_to_vma(op->base.unmap.va);
-				trace_xe_vma_flush(vma);
-
-				down_write(&vm->lock);
-				xe_vma_destroy_unlocked(vma);
-				up_write(&vm->lock);
-				break;
-			default:
-				/* Nothing to do */
-				break;
-			}
-
-			if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-						   &op->fence->fence.flags)) {
-				if (!xe_vm_no_dma_fences(vm)) {
-					op->fence->started = true;
-					wake_up_all(&op->fence->wq);
-				}
-				dma_fence_signal(&op->fence->fence);
-			}
-		}
-
-		xe_vma_op_cleanup(vm, op);
-	}
-}
-
-static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
-				     struct list_head *ops_list, bool async)
-{
-	struct xe_vma_op *op, *last_op, *next;
-	int err;
-
-	lockdep_assert_held_write(&vm->lock);
-
-	last_op = list_last_entry(ops_list, struct xe_vma_op, link);
-
-	if (!async) {
-		err = xe_vma_op_execute(vm, last_op);
-		if (err)
-			goto unwind;
-		xe_vma_op_cleanup(vm, last_op);
-	} else {
-		int i;
-		bool installed = false;
-
-		for (i = 0; i < last_op->num_syncs; i++)
-			installed |= xe_sync_entry_signal(&last_op->syncs[i],
-							  NULL,
-							  &last_op->fence->fence);
-		if (!installed && last_op->fence)
-			dma_fence_signal(&last_op->fence->fence);
-
-		spin_lock_irq(&vm->async_ops.lock);
-		list_splice_tail(ops_list, &vm->async_ops.pending);
-		spin_unlock_irq(&vm->async_ops.lock);
-
-		if (!vm->async_ops.error)
-			queue_work(system_unbound_wq, &vm->async_ops.work);
-	}
-
-	return 0;
-
-unwind:
-	list_for_each_entry_reverse(op, ops_list, link)
-		xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED,
-				 op->flags & XE_VMA_OP_PREV_COMMITTED,
-				 op->flags & XE_VMA_OP_NEXT_COMMITTED);
-	list_for_each_entry_safe(op, next, ops_list, link)
-		xe_vma_op_cleanup(vm, op);
-
-	return err;
-}
-
 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 				     struct drm_gpuva_ops **ops,
 				     int num_ops_list)
@@ -3078,6 +2760,31 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 	}
 }
 
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+				     struct list_head *ops_list)
+{
+	struct xe_vma_op *op, *next;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	list_for_each_entry_safe(op, next, ops_list, link) {
+		err = xe_vma_op_execute(vm, op);
+		if (err) {
+			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
+				 op->base.op, err);
+			/*
+			 * FIXME: Killing VM rather than proper error handling
+			 */
+			xe_vm_kill(vm);
+			return -ENOSPC;
+		}
+		xe_vma_op_cleanup(vm, op);
+	}
+
+	return 0;
+}
+
 #ifdef TEST_VM_ASYNC_OPS_ERROR
 #define SUPPORTED_FLAGS	\
 	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
@@ -3086,7 +2793,8 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 #else
 #define SUPPORTED_FLAGS	\
 	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
-	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
+	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | \
+	 0xffff)
 #endif
 #define XE_64K_PAGE_MASK 0xffffull
 
@@ -3137,21 +2845,12 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 
 		if (i == 0) {
 			*async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
-		} else if (XE_IOCTL_DBG(xe, !*async) ||
-			   XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) ||
-			   XE_IOCTL_DBG(xe, op == XE_VM_BIND_OP_RESTART)) {
-			err = -EINVAL;
-			goto free_bind_ops;
-		}
-
-		if (XE_IOCTL_DBG(xe, !*async &&
-				 op == XE_VM_BIND_OP_UNMAP_ALL)) {
-			err = -EINVAL;
-			goto free_bind_ops;
-		}
-
-		if (XE_IOCTL_DBG(xe, !*async &&
-				 op == XE_VM_BIND_OP_PREFETCH)) {
+			if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) {
+				err = -EINVAL;
+				goto free_bind_ops;
+			}
+		} else if (XE_IOCTL_DBG(xe, *async !=
+					!!(flags & XE_VM_BIND_FLAG_ASYNC))) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
@@ -3188,8 +2887,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
-		    XE_IOCTL_DBG(xe, !range && op !=
-				 XE_VM_BIND_OP_RESTART &&
+		    XE_IOCTL_DBG(xe, !range &&
 				 op != XE_VM_BIND_OP_UNMAP_ALL)) {
 			err = -EINVAL;
 			goto free_bind_ops;
@@ -3237,6 +2935,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			err = -EINVAL;
 			goto put_exec_queue;
 		}
+
+		if (XE_IOCTL_DBG(xe, async !=
+				 !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) {
+			err = -EINVAL;
+			goto put_exec_queue;
+		}
 	}
 
 	vm = xe_vm_lookup(xef, args->vm_id);
@@ -3245,6 +2949,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto put_exec_queue;
 	}
 
+	if (!args->exec_queue_id) {
+		if (XE_IOCTL_DBG(xe, async !=
+				 !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) {
+			err = -EINVAL;
+			goto put_vm;
+		}
+	}
+
 	err = down_write_killable(&vm->lock);
 	if (err)
 		goto put_vm;
@@ -3254,34 +2966,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto release_vm_lock;
 	}
 
-	if (bind_ops[0].op == XE_VM_BIND_OP_RESTART) {
-		if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
-			err = -EOPNOTSUPP;
-		if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
-			err = EINVAL;
-		if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error))
-			err = -EPROTO;
-
-		if (!err) {
-			trace_xe_vm_restart(vm);
-			vm_set_async_error(vm, 0);
-
-			queue_work(system_unbound_wq, &vm->async_ops.work);
-
-			/* Rebinds may have been blocked, give worker a kick */
-			if (xe_vm_in_compute_mode(vm))
-				xe_vm_queue_rebind_worker(vm);
-		}
-
-		goto release_vm_lock;
-	}
-
-	if (XE_IOCTL_DBG(xe, !vm->async_ops.error &&
-			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
-		err = -EOPNOTSUPP;
-		goto release_vm_lock;
-	}
-
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
@@ -3367,18 +3051,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto free_syncs;
 	}
 
-	/* Do some error checking first to make the unwind easier */
-	for (i = 0; i < args->num_binds; ++i) {
-		u64 range = bind_ops[i].range;
-		u64 addr = bind_ops[i].addr;
-		u32 op = bind_ops[i].op;
-		u32 flags = bind_ops[i].flags;
-
-		err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op, flags);
-		if (err)
-			goto free_syncs;
-	}
-
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
@@ -3411,10 +3083,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto unwind_ops;
 	}
 
-	err = vm_bind_ioctl_ops_execute(vm, &ops_list, async);
+	xe_vm_get(vm);
+	if (q)
+		xe_exec_queue_get(q);
+
+	err = vm_bind_ioctl_ops_execute(vm, &ops_list);
+
 	up_write(&vm->lock);
 
-	for (i = 0; i < args->num_binds; ++i)
+	if (q)
+		xe_exec_queue_put(q);
+	xe_vm_put(vm);
+
+	for (i = 0; bos && i < args->num_binds; ++i)
 		xe_bo_put(bos[i]);
 
 	kfree(bos);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 59dcbd1adf15a..45b70ba865535 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -177,8 +177,6 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
 
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
-int xe_vm_async_fence_wait_start(struct dma_fence *fence);
-
 extern struct ttm_device_funcs xe_ttm_funcs;
 
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 828ed0fa7e606..97d779d8a7d38 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -17,7 +17,6 @@
 #include "xe_pt_types.h"
 #include "xe_range_fence.h"
 
-struct async_op_fence;
 struct xe_bo;
 struct xe_sync_entry;
 struct xe_vm;
@@ -156,7 +155,7 @@ struct xe_vm {
 	 */
 #define XE_VM_FLAG_64K			BIT(0)
 #define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
-#define XE_VM_FLAG_ASYNC_BIND_OPS	BIT(2)
+#define XE_VM_FLAG_ASYNC_DEFAULT	BIT(2)
 #define XE_VM_FLAG_MIGRATION		BIT(3)
 #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
 #define XE_VM_FLAG_FAULT_MODE		BIT(5)
@@ -394,10 +393,6 @@ struct xe_vma_op {
 	u32 num_syncs;
 	/** @link: async operation link */
 	struct list_head link;
-	/**
-	 * @fence: async operation fence, signaled on last operation complete
-	 */
-	struct async_op_fence *fence;
 	/** @tile_mask: gt mask for this operation */
 	u8 tile_mask;
 	/** @flags: operation flags */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index f13974f17be9d..4dc103aa00f11 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -134,10 +134,11 @@ struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
 #define DRM_XE_ENGINE_CLASS_COMPUTE		4
 	/*
-	 * Kernel only class (not actual hardware engine class). Used for
+	 * Kernel only classes (not actual hardware engine class). Used for
 	 * creating ordered queues of VM bind operations.
 	 */
-#define DRM_XE_ENGINE_CLASS_VM_BIND		5
+#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC	5
+#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC	6
 	__u16 engine_class;
 
 	__u16 engine_instance;
@@ -577,7 +578,7 @@ struct drm_xe_vm_create {
 
 #define DRM_XE_VM_CREATE_SCRATCH_PAGE	(0x1 << 0)
 #define DRM_XE_VM_CREATE_COMPUTE_MODE	(0x1 << 1)
-#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS	(0x1 << 2)
+#define DRM_XE_VM_CREATE_ASYNC_DEFAULT	(0x1 << 2)
 #define DRM_XE_VM_CREATE_FAULT_MODE	(0x1 << 3)
 	/** @flags: Flags */
 	__u32 flags;
@@ -637,34 +638,12 @@ struct drm_xe_vm_bind_op {
 #define XE_VM_BIND_OP_MAP		0x0
 #define XE_VM_BIND_OP_UNMAP		0x1
 #define XE_VM_BIND_OP_MAP_USERPTR	0x2
-#define XE_VM_BIND_OP_RESTART		0x3
-#define XE_VM_BIND_OP_UNMAP_ALL		0x4
-#define XE_VM_BIND_OP_PREFETCH		0x5
+#define XE_VM_BIND_OP_UNMAP_ALL		0x3
+#define XE_VM_BIND_OP_PREFETCH		0x4
 	/** @op: Bind operation to perform */
 	__u32 op;
 
 #define XE_VM_BIND_FLAG_READONLY	(0x1 << 0)
-	/*
-	 * A bind ops completions are always async, hence the support for out
-	 * sync. This flag indicates the allocation of the memory for new page
-	 * tables and the job to program the pages tables is asynchronous
-	 * relative to the IOCTL. That part of a bind operation can fail under
-	 * memory pressure, the job in practice can't fail unless the system is
-	 * totally shot.
-	 *
-	 * If this flag is clear and the IOCTL doesn't return an error, in
-	 * practice the bind op is good and will complete.
-	 *
-	 * If this flag is set and doesn't return an error, the bind op can
-	 * still fail and recovery is needed. It should free memory
-	 * via non-async unbinds, and then restart all queued async binds op via
-	 * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
-	 * VM.
-	 *
-	 * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is
-	 * configured in the VM and must be set if the VM is configured with
-	 * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state.
-	 */
 #define XE_VM_BIND_FLAG_ASYNC		(0x1 << 1)
 	/*
 	 * Valid on a faulting VM only, do the MAP operation immediately rather
-- 
GitLab


From e669f10cd3182943058fa84b1e81f3727f6e0520 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 14 Sep 2023 13:40:51 -0700
Subject: [PATCH 1995/2340] drm/xe: Fix VM bind out-sync signaling ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A case existed where an out-sync of a later VM bind operation could
signal before a previous one if the later operation results in a NOP
(e.g. a unbind or prefetch to a VA range without any mappings). This
breaks the ordering rules, fix this. This patch also lays the groundwork
for users to pass in num_binds == 0 and out-syncs.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c       | 75 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_exec_queue.h       |  7 +++
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  6 ++
 drivers/gpu/drm/xe/xe_vm.c               | 45 +++++++++++---
 4 files changed, 125 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 9b373b9ea4724..8e0620cb89e5d 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -156,6 +156,7 @@ void xe_exec_queue_destroy(struct kref *ref)
 	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
 	struct xe_exec_queue *eq, *next;
 
+	xe_exec_queue_last_fence_put_unlocked(q);
 	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
 		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
 					 multi_gt_link)
@@ -916,3 +917,77 @@ out:
 
 	return ret;
 }
+
+static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
+						    struct xe_vm *vm)
+{
+	lockdep_assert_held_write(&vm->lock);
+}
+
+/**
+ * xe_exec_queue_last_fence_put() - Drop ref to last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ */
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
+ * @q: The exec queue
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
+{
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_get() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, does not take a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
+					       struct xe_vm *vm)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence &&
+	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+		xe_exec_queue_last_fence_put(q, vm);
+
+	return q->last_fence ? q->last_fence : dma_fence_get_stub();
+}
+
+/**
+ * xe_exec_queue_last_fence_set() - Set last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ * @fence: The fence
+ *
+ * Set the last fence for the engine. Increases reference count for fence, when
+ * closing engine xe_exec_queue_last_fence_put should be called.
+ */
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
+				  struct dma_fence *fence)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	xe_exec_queue_last_fence_put(q, vm);
+	q->last_fence = dma_fence_get(fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 22499a2f522b4..533da1b0c457a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -61,4 +61,11 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file);
 enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
 
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e);
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
+					       struct xe_vm *vm);
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
+				  struct dma_fence *fence);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 4e382304010eb..6826feb650f34 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -65,6 +65,12 @@ struct xe_exec_queue {
 	/** @fence_irq: fence IRQ used to signal job completion */
 	struct xe_hw_fence_irq *fence_irq;
 
+	/**
+	 * @last_fence: last fence on engine, protected by vm->lock in write
+	 * mode if bind engine
+	 */
+	struct dma_fence *last_fence;
+
 /* queue no longer allowed to submit */
 #define EXEC_QUEUE_FLAG_BANNED			BIT(0)
 /* queue used for kernel submission only */
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 89df50f49e11f..4c8d77c4c7c0d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1524,6 +1524,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	if (xe_vm_in_compute_mode(vm))
 		flush_work(&vm->preempt.rebind_work);
 
+	down_write(&vm->lock);
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id])
+			xe_exec_queue_last_fence_put(vm->q[id], vm);
+	}
+	up_write(&vm->lock);
+
 	for_each_tile(tile, xe, id) {
 		if (vm->q[id]) {
 			xe_exec_queue_kill(vm->q[id]);
@@ -1665,16 +1672,23 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
 }
 
+static struct xe_exec_queue *
+to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	return q ? q : vm->q[0];
+}
+
 static struct dma_fence *
 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 		 struct xe_sync_entry *syncs, u32 num_syncs,
 		 bool first_op, bool last_op)
 {
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	struct xe_tile *tile;
 	struct dma_fence *fence = NULL;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
-	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
 	int number_tiles = hweight8(vma->tile_present);
 	int err;
@@ -1727,7 +1741,8 @@ next:
 					     cf ? &cf->base : fence);
 	}
 
-	return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
+	return cf ? &cf->base : !fence ?
+		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
 
 err_fences:
 	if (fences) {
@@ -1826,6 +1841,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 			bool last_op)
 {
 	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 
 	xe_vm_assert_held(vm);
 
@@ -1839,13 +1855,15 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 
 		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
-		fence = dma_fence_get_stub();
+		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 		if (last_op) {
 			for (i = 0; i < num_syncs; i++)
 				xe_sync_entry_signal(&syncs[i], NULL, fence);
 		}
 	}
 
+	if (last_op)
+		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
 	if (last_op && xe_vm_sync_mode(vm, q))
 		dma_fence_wait(fence, true);
 	dma_fence_put(fence);
@@ -1878,6 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 			u32 num_syncs, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -1887,6 +1906,8 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 		return PTR_ERR(fence);
 
 	xe_vma_destroy(vma, fence);
+	if (last_op)
+		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
 	if (last_op && xe_vm_sync_mode(vm, q))
 		dma_fence_wait(fence, true);
 	dma_fence_put(fence);
@@ -2036,6 +2057,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 			  struct xe_sync_entry *syncs, u32 num_syncs,
 			  bool first_op, bool last_op)
 {
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	int err;
 
 	xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
@@ -2054,9 +2076,12 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 		/* Nothing to do, signal fences now */
 		if (last_op) {
-			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], NULL,
-						     dma_fence_get_stub());
+			for (i = 0; i < num_syncs; i++) {
+				struct dma_fence *fence =
+					xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+			}
 		}
 
 		return 0;
@@ -3108,8 +3133,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 unwind_ops:
 	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
 free_syncs:
-	for (i = 0; err == -ENODATA && i < num_syncs; i++)
-		xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub());
+	for (i = 0; err == -ENODATA && i < num_syncs; i++) {
+		struct dma_fence *fence =
+			xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
+
+		xe_sync_entry_signal(&syncs[i], NULL, fence);
+	}
 	while (num_syncs--)
 		xe_sync_entry_cleanup(&syncs[num_syncs]);
 
-- 
GitLab


From 25f656f534f4b4eb95140efce37328efbda13af7 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 20 Sep 2023 15:29:33 -0400
Subject: [PATCH 1996/2340] drm/xe/uapi: Document drm_xe_query_gt

Split drm_xe_query_gt out of the gt list one in order to better
document it.

No functional change at this point. Any actual change to the
uapi should come in follow-up additions.

v2: s/maks/mask

Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 include/uapi/drm/xe_drm.h | 65 ++++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4dc103aa00f11..53b7b2ddf3042 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -334,6 +334,47 @@ struct drm_xe_query_config {
 	__u64 info[];
 };
 
+/**
+ * struct drm_xe_query_gt - describe an individual GT.
+ *
+ * To be used with drm_xe_query_gts, which will return a list with all the
+ * existing GT individual descriptions.
+ * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for
+ * implementing graphics and/or media operations.
+ */
+struct drm_xe_query_gt {
+#define XE_QUERY_GT_TYPE_MAIN		0
+#define XE_QUERY_GT_TYPE_REMOTE		1
+#define XE_QUERY_GT_TYPE_MEDIA		2
+	/** @type: GT type: Main, Remote, or Media */
+	__u16 type;
+	/** @instance: Instance of this GT in the GT list */
+	__u16 instance;
+	/** @clock_freq: A clock frequency for timestamp */
+	__u32 clock_freq;
+	/** @features: Reserved for future information about GT features */
+	__u64 features;
+	/**
+	 * @native_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_usage that lives on the same GPU/Tile and have
+	 * direct access.
+	 */
+	__u64 native_mem_regions;
+	/**
+	 * @slow_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_usage that this GT can indirectly access, although
+	 * they live on a different GPU/Tile.
+	 */
+	__u64 slow_mem_regions;
+	/**
+	 * @inaccessible_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_usage that is not accessible by this GT at all.
+	 */
+	__u64 inaccessible_mem_regions;
+	/** @reserved: Reserved */
+	__u64 reserved[8];
+};
+
 /**
  * struct drm_xe_query_gts - describe GTs
  *
@@ -344,30 +385,10 @@ struct drm_xe_query_config {
 struct drm_xe_query_gts {
 	/** @num_gt: number of GTs returned in gts */
 	__u32 num_gt;
-
 	/** @pad: MBZ */
 	__u32 pad;
-
-	/**
-	 * @gts: The GTs returned for this device
-	 *
-	 * TODO: convert drm_xe_query_gt to proper kernel-doc.
-	 * TODO: Perhaps info about every mem region relative to this GT? e.g.
-	 * bandwidth between this GT and remote region?
-	 */
-	struct drm_xe_query_gt {
-#define XE_QUERY_GT_TYPE_MAIN		0
-#define XE_QUERY_GT_TYPE_REMOTE		1
-#define XE_QUERY_GT_TYPE_MEDIA		2
-		__u16 type;
-		__u16 instance;
-		__u32 clock_freq;
-		__u64 features;
-		__u64 native_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
-		__u64 slow_mem_regions;		/* bit mask of instances from drm_xe_query_mem_usage */
-		__u64 inaccessible_mem_regions;	/* bit mask of instances from drm_xe_query_mem_usage */
-		__u64 reserved[8];
-	} gts[];
+	/** @gts: The GT list returned for this device */
+	struct drm_xe_query_gt gts[];
 };
 
 /**
-- 
GitLab


From 2519450aaa31948d27db0715c24398b2590517f1 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 20 Sep 2023 15:29:34 -0400
Subject: [PATCH 1997/2340] drm/xe/uapi: Replace useless 'instance' per unique
 gt_id

Let's have a single GT ID per GT within the PCI Device Card.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_types.h | 2 +-
 drivers/gpu/drm/xe/xe_pci.c      | 4 ----
 drivers/gpu/drm/xe/xe_query.c    | 2 +-
 include/uapi/drm/xe_drm.h        | 4 ++--
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index d4310be3e1e7c..d3f2793684e2d 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -105,7 +105,7 @@ struct xe_gt {
 	struct {
 		/** @type: type of GT */
 		enum xe_gt_type type;
-		/** @id: id of GT */
+		/** @id: Unique ID of this GT within the PCI Device */
 		u8 id;
 		/** @clock_freq: clock frequency */
 		u32 clock_freq;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 9963772caabbd..eec2b852c7aaa 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -593,10 +593,6 @@ static int xe_info_init(struct xe_device *xe,
 			return PTR_ERR(tile->primary_gt);
 
 		gt = tile->primary_gt;
-		/*
-		 * FIXME: GT numbering scheme may change depending on UAPI
-		 * decisions.
-		 */
 		gt->info.id = xe->info.gt_count++;
 		gt->info.type = XE_GT_TYPE_MAIN;
 		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index cd3e0f3208a6c..3bff06299e652 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -376,7 +376,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 			gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE;
 		else
 			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
-		gts->gts[id].instance = id;
+		gts->gts[id].gt_id = gt->info.id;
 		gts->gts[id].clock_freq = gt->info.clock_freq;
 		if (!IS_DGFX(xe))
 			gts->gts[id].native_mem_regions = 0x1;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 53b7b2ddf3042..11bc4dc2c78cf 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -348,8 +348,8 @@ struct drm_xe_query_gt {
 #define XE_QUERY_GT_TYPE_MEDIA		2
 	/** @type: GT type: Main, Remote, or Media */
 	__u16 type;
-	/** @instance: Instance of this GT in the GT list */
-	__u16 instance;
+	/** @gt_id: Unique ID of this GT within the PCI Device */
+	__u16 gt_id;
 	/** @clock_freq: A clock frequency for timestamp */
 	__u32 clock_freq;
 	/** @features: Reserved for future information about GT features */
-- 
GitLab


From 92296571546460bf9f4faf5e288d63f91d838968 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 20 Sep 2023 15:29:35 -0400
Subject: [PATCH 1998/2340] drm/xe/uapi: Remove unused field of drm_xe_query_gt

We already have many bits reserved at the end already.
Let's kill the unused ones.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 include/uapi/drm/xe_drm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 11bc4dc2c78cf..538873361d17f 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -352,8 +352,6 @@ struct drm_xe_query_gt {
 	__u16 gt_id;
 	/** @clock_freq: A clock frequency for timestamp */
 	__u32 clock_freq;
-	/** @features: Reserved for future information about GT features */
-	__u64 features;
 	/**
 	 * @native_mem_regions: Bit mask of instances from
 	 * drm_xe_query_mem_usage that lives on the same GPU/Tile and have
-- 
GitLab


From e16b48378527dbe2f200b792922f59a2bf038507 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 20 Sep 2023 15:29:36 -0400
Subject: [PATCH 1999/2340] drm/xe/uapi: Rename gts to gt_list

During the uapi review it was identified a possible confusion
with the plural of acronym with a new acronym. So the
recommendation is to go with gt_list instead.

Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 40 +++++++++++++++++------------------
 include/uapi/drm/xe_drm.h     | 18 ++++++++--------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 3bff06299e652..d37c75a0b0286 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -347,14 +347,14 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	return 0;
 }
 
-static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
+static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query)
 {
 	struct xe_gt *gt;
-	size_t size = sizeof(struct drm_xe_query_gts) +
+	size_t size = sizeof(struct drm_xe_query_gt_list) +
 		xe->info.gt_count * sizeof(struct drm_xe_query_gt);
-	struct drm_xe_query_gts __user *query_ptr =
+	struct drm_xe_query_gt_list __user *query_ptr =
 		u64_to_user_ptr(query->data);
-	struct drm_xe_query_gts *gts;
+	struct drm_xe_query_gt_list *gt_list;
 	u8 id;
 
 	if (query->size == 0) {
@@ -364,34 +364,34 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
 		return -EINVAL;
 	}
 
-	gts = kzalloc(size, GFP_KERNEL);
-	if (!gts)
+	gt_list = kzalloc(size, GFP_KERNEL);
+	if (!gt_list)
 		return -ENOMEM;
 
-	gts->num_gt = xe->info.gt_count;
+	gt_list->num_gt = xe->info.gt_count;
 	for_each_gt(gt, xe, id) {
 		if (xe_gt_is_media_type(gt))
-			gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA;
+			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MEDIA;
 		else if (gt_to_tile(gt)->id > 0)
-			gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE;
+			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_REMOTE;
 		else
-			gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
-		gts->gts[id].gt_id = gt->info.id;
-		gts->gts[id].clock_freq = gt->info.clock_freq;
+			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MAIN;
+		gt_list->gt_list[id].gt_id = gt->info.id;
+		gt_list->gt_list[id].clock_freq = gt->info.clock_freq;
 		if (!IS_DGFX(xe))
-			gts->gts[id].native_mem_regions = 0x1;
+			gt_list->gt_list[id].native_mem_regions = 0x1;
 		else
-			gts->gts[id].native_mem_regions =
+			gt_list->gt_list[id].native_mem_regions =
 				BIT(gt_to_tile(gt)->id) << 1;
-		gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^
-			gts->gts[id].native_mem_regions;
+		gt_list->gt_list[id].slow_mem_regions = xe->info.mem_region_mask ^
+			gt_list->gt_list[id].native_mem_regions;
 	}
 
-	if (copy_to_user(query_ptr, gts, size)) {
-		kfree(gts);
+	if (copy_to_user(query_ptr, gt_list, size)) {
+		kfree(gt_list);
 		return -EFAULT;
 	}
-	kfree(gts);
+	kfree(gt_list);
 
 	return 0;
 }
@@ -503,7 +503,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
 	query_engines,
 	query_memory_usage,
 	query_config,
-	query_gts,
+	query_gt_list,
 	query_hwconfig,
 	query_gt_topology,
 	query_engine_cycles,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 538873361d17f..b02a632709722 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -337,7 +337,7 @@ struct drm_xe_query_config {
 /**
  * struct drm_xe_query_gt - describe an individual GT.
  *
- * To be used with drm_xe_query_gts, which will return a list with all the
+ * To be used with drm_xe_query_gt_list, which will return a list with all the
  * existing GT individual descriptions.
  * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for
  * implementing graphics and/or media operations.
@@ -374,19 +374,19 @@ struct drm_xe_query_gt {
 };
 
 /**
- * struct drm_xe_query_gts - describe GTs
+ * struct drm_xe_query_gt_list - A list with GT description items.
  *
  * If a query is made with a struct drm_xe_device_query where .query
- * is equal to DRM_XE_DEVICE_QUERY_GTS, then the reply uses struct
- * drm_xe_query_gts in .data.
+ * is equal to DRM_XE_DEVICE_QUERY_GT_LIST, then the reply uses struct
+ * drm_xe_query_gt_list in .data.
  */
-struct drm_xe_query_gts {
-	/** @num_gt: number of GTs returned in gts */
+struct drm_xe_query_gt_list {
+	/** @num_gt: number of GT items returned in gt_list */
 	__u32 num_gt;
 	/** @pad: MBZ */
 	__u32 pad;
-	/** @gts: The GT list returned for this device */
-	struct drm_xe_query_gt gts[];
+	/** @gt_list: The GT list returned for this device */
+	struct drm_xe_query_gt gt_list[];
 };
 
 /**
@@ -479,7 +479,7 @@ struct drm_xe_device_query {
 #define DRM_XE_DEVICE_QUERY_ENGINES		0
 #define DRM_XE_DEVICE_QUERY_MEM_USAGE		1
 #define DRM_XE_DEVICE_QUERY_CONFIG		2
-#define DRM_XE_DEVICE_QUERY_GTS			3
+#define DRM_XE_DEVICE_QUERY_GT_LIST		3
 #define DRM_XE_DEVICE_QUERY_HWCONFIG		4
 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY		5
 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
-- 
GitLab


From e48d146456e34625c6edafd6350bfaac5004727c Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 20 Sep 2023 15:29:37 -0400
Subject: [PATCH 2000/2340] drm/xe/uapi: Fix naming of
 XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY

This is used for the priority of an exec queue (not an engine) and
should be named accordingly.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 +-
 include/uapi/drm/xe_drm.h     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index d37c75a0b0286..10b9878ec95a3 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -335,7 +335,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count;
 	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
 		hweight_long(xe->info.mem_region_mask);
-	config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] =
+	config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
 		xe_exec_queue_device_get_max_priority(xe);
 
 	if (copy_to_user(query_ptr, config, size)) {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b02a632709722..24bf8f0f52e82 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -328,8 +328,8 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_VA_BITS			3
 #define XE_QUERY_CONFIG_GT_COUNT		4
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
-#define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY	6
-#define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1)
+#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	6
+#define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1)
 	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
-- 
GitLab


From 9b49762740e3f2c240877437116635e73718cd47 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 4 Oct 2023 10:33:41 -0700
Subject: [PATCH 2001/2340] drm/xe/guc: Bump PVC GuC version to 70.9.1

The PVC GuC version that we're currently using (70.6.4) has a known
issue that leads to dropping the disabling of contexts that have
pending page faults. This is fixed in newer blobs, so we need to
update to a more recent release.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/696
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2ba0466bc45ae..32782a52c07f7 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -103,7 +103,7 @@ struct fw_blobs_by_type {
 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
 	fw_def(LUNARLAKE,	mmp_ver(xe,	guc,	lnl,	70, 6, 8))	\
 	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 7))		\
-	fw_def(PVC,		mmp_ver(xe,	guc,	pvc,	70, 6, 4))	\
+	fw_def(PVC,		mmp_ver(xe,	guc,	pvc,	70, 9, 1))	\
 	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
 	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
 	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
-- 
GitLab


From bf6d941c06c9681d0f3d8380e7093d7f79d3eef6 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 18 Oct 2023 13:34:24 +0100
Subject: [PATCH 2002/2340] drm/xe: fix pat[2] programming with 2M/1G pages

Bit 7 in the leaf node is normally programmed with pat[2], however with
2M/1G pages that same bit in the PDE/PDPE also toggles 2M/1G pages. For
2M/1G entries the pat[2] is rather moved to bit 12, which is now free
given that the address must be aligned to 2M or 1G, leaving bit 7 for
toggling 2M/1G pages.

Bspec: 59510, 45038
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h |  1 +
 drivers/gpu/drm/xe/xe_vm.c | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index ba6ffd359ff70..3f4e2818f92c5 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -50,6 +50,7 @@
 
 #define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
 #define XE2_PPGTT_PTE_PAT4		BIT_ULL(61)
+#define XE_PPGTT_PDE_PDPE_PAT2		BIT_ULL(12)
 #define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
 #define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
 #define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4c8d77c4c7c0d..05f8c691f5fb2 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1229,7 +1229,8 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
 	return pte;
 }
 
-static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index)
+static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
+				u32 pt_level)
 {
 	u64 pte = 0;
 
@@ -1239,8 +1240,12 @@ static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index)
 	if (pat_index & BIT(1))
 		pte |= XE_PPGTT_PTE_PAT1;
 
-	if (pat_index & BIT(2))
-		pte |= XE_PPGTT_PTE_PAT2;
+	if (pat_index & BIT(2)) {
+		if (pt_level)
+			pte |= XE_PPGTT_PDE_PDPE_PAT2;
+		else
+			pte |= XE_PPGTT_PTE_PAT2;
+	}
 
 	if (pat_index & BIT(3))
 		pte |= XELPG_PPGTT_PTE_PAT3;
@@ -1284,7 +1289,7 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
 
 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_pat_index(xe, pat_index);
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
 	pte |= pte_encode_ps(pt_level);
 
 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
@@ -1303,7 +1308,7 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
 	if (likely(!xe_vma_read_only(vma)))
 		pte |= XE_PAGE_RW;
 
-	pte |= pte_encode_pat_index(xe, pat_index);
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
 	pte |= pte_encode_ps(pt_level);
 
 	if (unlikely(xe_vma_is_null(vma)))
@@ -1323,7 +1328,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
 
 	pte = addr;
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
-	pte |= pte_encode_pat_index(xe, pat_index);
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
 	pte |= pte_encode_ps(pt_level);
 
 	if (devmem)
-- 
GitLab


From c85d36be2993d65cfd678e01659ff69a4a803cad Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Tue, 26 Sep 2023 13:59:37 -0700
Subject: [PATCH 2003/2340] drm/xe: Simplify xe_res_get_buddy()

We can remove the unnecessary indirection thru xe->tiles[] to get
the TTM VRAM manager.  This code can be common for VRAM and STOLEN.

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_res_cursor.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 5cb4b66a5d748..006fc1361967e 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -50,14 +50,9 @@ struct xe_res_cursor {
 
 static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
 {
-	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 	struct ttm_resource_manager *mgr;
 
-	if (res->mem_type != XE_PL_STOLEN)
-		return &xe->tiles[res->mem_type - XE_PL_VRAM0].mem.vram_mgr->mm;
-
-	mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
-
+	mgr = ttm_manager_type(res->bo->bdev, res->mem_type);
 	return &to_xe_ttm_vram_mgr(mgr)->mm;
 }
 
-- 
GitLab


From bad3644dd8d5b118cdf64dfc71ef9540ee288ddc Mon Sep 17 00:00:00 2001
From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Date: Tue, 24 Oct 2023 15:07:37 -0700
Subject: [PATCH 2004/2340] drm/xe/xe2: Add initial workarounds

Add the initial collection of gt/engine/lrc workarounds.
While at it, add some newlines around the platform/IP comments to make
them consistent across all workarounds.

v2:
  - FF_MODE is an MCR register (Matt Roper)
  - Group 18032247524 with other Xe2 workarounds (Matt Roper)
  - Move WA changing PSS_CHICKEN to lrc_was[] as for Xe2 that register
    is part of the render context image (Matt Roper)
  - Apply WA 16020518922 only on render engine (Matt Roper)

Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231024220739.224251-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 24 +++++++++
 drivers/gpu/drm/xe/xe_wa.c           | 74 ++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index cd1821d96a5d8..5ad75011aa70e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -94,7 +94,14 @@
 #define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
+#define FF_MODE					XE_REG_MCR(0x6210)
+#define   DIS_TE_AUTOSTRIP			REG_BIT(31)
+#define   DIS_MESH_PARTIAL_AUTOSTRIP		REG_BIT(16)
+#define   DIS_MESH_AUTOSTRIP			REG_BIT(15)
+
 #define VFLSKPD					XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
+#define   DIS_PARTIAL_AUTOSTRIP			REG_BIT(9)
+#define   DIS_AUTOSTRIP				REG_BIT(6)
 #define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
 
@@ -111,6 +118,9 @@
 #define XEHP_PSS_MODE2				XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
+#define XEHP_PSS_CHICKEN			XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
+#define   FD_END_COLLECT			REG_BIT(5)
+
 #define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
 #define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
@@ -133,6 +143,9 @@
 #define VF_PREEMPTION				XE_REG(0x83a4, XE_REG_OPTION_MASKED)
 #define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
 
+#define VF_SCRATCHPAD				XE_REG(0x83a8, XE_REG_OPTION_MASKED)
+#define   XE2_VFG_TED_CREDIT_INTERFACE_DISABLE	REG_BIT(13)
+
 #define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4, XE_REG_OPTION_MASKED)
 #define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
 
@@ -225,6 +238,7 @@
 #define   MSCUNIT_CLKGATE_DIS			REG_BIT(10)
 #define   RCCUNIT_CLKGATE_DIS			REG_BIT(7)
 #define   SARBUNIT_CLKGATE_DIS			REG_BIT(5)
+#define   SBEUNIT_CLKGATE_DIS			REG_BIT(4)
 
 #define UNSLICE_UNIT_LEVEL_CLKGATE2		XE_REG(0x94e4)
 #define   VSUNIT_CLKGATE2_DIS			REG_BIT(19)
@@ -276,6 +290,8 @@
 #define XEHP_L3SCQREG7				XE_REG_MCR(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
 
+#define XEHPC_L3CLOS_MASK(i)			XE_REG_MCR(0xb194 + (i) * 8)
+
 #define XEHP_MERT_MOD_CTRL			XE_REG_MCR(0xcf28)
 #define RENDER_MOD_CTRL				XE_REG_MCR(0xcf2c)
 #define COMP_MOD_CTRL				XE_REG_MCR(0xcf30)
@@ -299,6 +315,9 @@
 #define XE_OAG_BLT_BUSY_FREE			XE_REG(0xdbbc)
 #define XE_OAG_RENDER_BUSY_FREE			XE_REG(0xdbdc)
 
+#define HALF_SLICE_CHICKEN5			XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
+#define   DISABLE_SAMPLE_G_PERFORMANCE		REG_BIT(0)
+
 #define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
@@ -328,6 +347,7 @@
 #define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
+#define   EARLY_EOT_DIS				REG_BIT(1)
 
 #define ROW_CHICKEN2				XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
 #define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
@@ -345,11 +365,15 @@
 
 #define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
+#define   TGM_WRITE_EOM_FORCE			REG_BIT(17)
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
+#define   SEQUENTIAL_ACCESS_UPGRADE_DISABLE	REG_BIT(13)
 
 #define LSC_CHICKEN_BIT_0_UDW			XE_REG_MCR(0xe7c8 + 4)
 #define   UGM_FRAGMENT_THRESHOLD_TO_3		REG_BIT(58 - 32)
 #define   DIS_CHAIN_2XSIMD8			REG_BIT(55 - 32)
+#define   XE2_ALLOC_DPA_STARVE_FIX_DIS		REG_BIT(47 - 32)
+#define   ENABLE_SMP_LD_RENDER_SURFACE_CONTROL	REG_BIT(44 - 32)
 #define   FORCE_SLM_FENCE_SCOPE_TO_TILE		REG_BIT(42 - 32)
 #define   FORCE_UGM_FENCE_SCOPE_TO_TILE		REG_BIT(41 - 32)
 #define   MAXREQS_PER_BANK			REG_GENMASK(39 - 32, 37 - 32)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 1450af6cab344..ccb075aac7da9 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -232,6 +232,7 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	},
 
 	/* Xe_LPG */
+
 	{ XE_RTP_NAME("14015795083"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
@@ -245,6 +246,20 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
 	},
 
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("16020975621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14018157293"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
+			 SET(XEHPC_L3CLOS_MASK(1), ~0),
+			 SET(XEHPC_L3CLOS_MASK(2), ~0),
+			 SET(XEHPC_L3CLOS_MASK(3), ~0))
+	},
+
 	{}
 };
 
@@ -527,6 +542,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	},
 
 	/* Xe_LPG */
+
 	{ XE_RTP_NAME("14017856879"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
@@ -539,6 +555,41 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     XE_RTP_NOCHECK))
 	},
 
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("18032247524"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
+	},
+	{ XE_RTP_NAME("16018712365"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
+	},
+	{ XE_RTP_NAME("14018957109"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
+	},
+	{ XE_RTP_NAME("16021540221"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+	},
+	{ XE_RTP_NAME("14019322943"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
+	},
+	{ XE_RTP_NAME("14018471104"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
+	},
+	{ XE_RTP_NAME("16018737384"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
+	},
+
 	{}
 };
 
@@ -625,11 +676,34 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	},
 
 	/* Xe_LPG */
+
 	{ XE_RTP_NAME("18019271663"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
 
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("16020518922"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_MODE,
+			     DIS_TE_AUTOSTRIP |
+			     DIS_MESH_PARTIAL_AUTOSTRIP |
+			     DIS_MESH_AUTOSTRIP),
+			 SET(VFLSKPD,
+			     DIS_PARTIAL_AUTOSTRIP |
+			     DIS_AUTOSTRIP))
+	},
+	{ XE_RTP_NAME("14019386621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
+	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
+
 	{}
 };
 
-- 
GitLab


From f6c39feed02117db5dfe988321a1a4dee2a9a3e2 Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Tue, 24 Oct 2023 15:07:38 -0700
Subject: [PATCH 2005/2340] drm/xe: Add performance tuning settings for MTL and
 Xe2

Add L3SQCREG5 as part of HW recommended settings. The recommended value
in Bspec is 00e0007f. For Xe2-LPG, bits 23:21 don't exist anymore, but
it's confirmed with HW engineers that setting them doesn't do anything.
They still exist on the media GT, Xe2-LPM, but they are already they are
already set as per HW default value. So for Xe2 platform, the only bits
that need to be set are 9:0 since HW's default is 0x1ff and the
recommended value is 0x7f.

Unlike most registers, which have the same relative offset on both
the primary and media GT, this register has a different base offset
on the media GT.

On MTL the register only exists for the primary (graphics) GT, so
there's no need to program it on the media gt. Also, it's part of the
RCS engine's context, so it needs to be added as a LRC workaround.

Bspec: 72161
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231024220739.224251-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  2 ++
 drivers/gpu/drm/xe/xe_tuning.c       | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5ad75011aa70e..55ceadfc30b0b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -292,6 +292,8 @@
 
 #define XEHPC_L3CLOS_MASK(i)			XE_REG_MCR(0xb194 + (i) * 8)
 
+#define XE2LPM_L3SQCREG5			XE_REG_MCR(0xb658)
+
 #define XEHP_MERT_MOD_CTRL			XE_REG_MCR(0xcf28)
 #define RENDER_MOD_CTRL				XE_REG_MCR(0xcf2c)
 #define COMP_MOD_CTRL				XE_REG_MCR(0xcf30)
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index d705198165220..53ccd338fd8c0 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -24,6 +24,20 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
 	},
+
+	/* Xe2 */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+	{ XE_RTP_NAME("Tuning: L3 cache - media"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000)),
+	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+
 	{}
 };
 
@@ -63,6 +77,15 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
 	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+
 	{}
 };
 
-- 
GitLab


From 8656ea9ae8b488ac25fdd332c60e6fd805cde171 Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Thu, 19 Oct 2023 15:01:39 +0530
Subject: [PATCH 2006/2340] drm/xe: Add event tracing for CTB

Event tracing enabled for CTB submissions.

Additional minor refactor - Removed a unnecessary ct_to_xe() call.

v2: Remove a unwanted comment (Hari)
    Add missing change to commit message

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Link: https://lore.kernel.org/intel-xe/20231019093140.1901665-2-balasubramani.vivekanandan@intel.com/
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 13 +++++++++--
 drivers/gpu/drm/xe/xe_trace.h  | 41 ++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 8b686c8b33390..a84e111bb36ad 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -460,7 +460,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	/* Write H2G ensuring visable before descriptor update */
 	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
 	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
-	xe_device_wmb(ct_to_xe(ct));
+	xe_device_wmb(xe);
 
 	/* Update local copies */
 	h2g->info.tail = (tail + full_len) % h2g->info.size;
@@ -469,6 +469,9 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	/* Update descriptor */
 	desc_write(xe, h2g, tail, h2g->info.tail);
 
+	trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len,
+			     desc_read(xe, h2g, head), h2g->info.tail);
+
 	return 0;
 }
 
@@ -934,6 +937,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 	struct guc_ctb *g2h = &ct->ctbs.g2h;
 	u32 tail, head, len;
 	s32 avail;
+	u32 action;
 
 	lockdep_assert_held(&ct->fast_lock);
 
@@ -984,11 +988,13 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 				   avail * sizeof(u32));
 	}
 
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
 	if (fast_path) {
 		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
 			return 0;
 
-		switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) {
+		switch (action) {
 		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
 		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
 			break;	/* Process these in fast-path */
@@ -1001,6 +1007,9 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 	g2h->info.head = (head + avail) % g2h->info.size;
 	desc_write(xe, g2h, head, g2h->info.head);
 
+	trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len,
+			     g2h->info.head, tail);
+
 	return len;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 5ea458dadf699..c43bb42aca5b5 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -521,6 +521,7 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
 TRACE_EVENT(xe_guc_ct_h2g_flow_control,
 	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
 	    TP_ARGS(_head, _tail, size, space, len),
+/* GuC */
 
 	    TP_STRUCT__entry(
 		     __field(u32, _head)
@@ -568,6 +569,46 @@ TRACE_EVENT(xe_guc_ct_g2h_flow_control,
 		      __entry->space, __entry->len)
 );
 
+DECLARE_EVENT_CLASS(xe_guc_ctb,
+		    TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+		    TP_ARGS(gt_id, action, len, _head, tail),
+
+		    TP_STRUCT__entry(
+				__field(u8, gt_id)
+				__field(u32, action)
+				__field(u32, len)
+				__field(u32, tail)
+				__field(u32, _head)
+		    ),
+
+		    TP_fast_assign(
+			    __entry->gt_id = gt_id;
+			    __entry->action = action;
+			    __entry->len = len;
+			    __entry->tail = tail;
+			    __entry->_head = _head;
+		    ),
+
+		    TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n",
+			      __entry->gt_id, __entry->action, __entry->len,
+			      __entry->tail, __entry->_head)
+);
+
+DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g,
+	     TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+	     TP_ARGS(gt_id, action, len, _head, tail)
+);
+
+DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
+		   TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+		   TP_ARGS(gt_id, action, len, _head, tail),
+
+		   TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n",
+			     __entry->gt_id, __entry->action, __entry->len,
+			     __entry->tail, __entry->_head)
+
+);
+
 #endif
 
 /* This part must be outside protection */
-- 
GitLab


From 0d0dda27cf066d1e7537a815fb3990be04cff6bd Mon Sep 17 00:00:00 2001
From: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Date: Thu, 19 Oct 2023 15:01:40 +0530
Subject: [PATCH 2007/2340] drm/xe/trace: Optimize trace definition

Make use of EVENT_CLASS to group similar trace events

Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Link: https://lore.kernel.org/intel-xe/20231019093140.1901665-3-balasubramani.vivekanandan@intel.com/
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_trace.h | 83 +++++++++++++++--------------------
 1 file changed, 36 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index c43bb42aca5b5..f13cc9a35e918 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -518,55 +518,44 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
 	     TP_ARGS(vm)
 );
 
-TRACE_EVENT(xe_guc_ct_h2g_flow_control,
-	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
-	    TP_ARGS(_head, _tail, size, space, len),
 /* GuC */
+DECLARE_EVENT_CLASS(xe_guc_ct_flow_control,
+		    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+		    TP_ARGS(_head, _tail, size, space, len),
 
-	    TP_STRUCT__entry(
-		     __field(u32, _head)
-		     __field(u32, _tail)
-		     __field(u32, size)
-		     __field(u32, space)
-		     __field(u32, len)
-		     ),
-
-	    TP_fast_assign(
-		   __entry->_head = _head;
-		   __entry->_tail = _tail;
-		   __entry->size = size;
-		   __entry->space = space;
-		   __entry->len = len;
-		   ),
-
-	    TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
-		      __entry->_head, __entry->_tail, __entry->size,
-		      __entry->space, __entry->len)
-);
-
-TRACE_EVENT(xe_guc_ct_g2h_flow_control,
-	    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
-	    TP_ARGS(_head, _tail, size, space, len),
-
-	    TP_STRUCT__entry(
-		     __field(u32, _head)
-		     __field(u32, _tail)
-		     __field(u32, size)
-		     __field(u32, space)
-		     __field(u32, len)
-		     ),
-
-	    TP_fast_assign(
-		   __entry->_head = _head;
-		   __entry->_tail = _tail;
-		   __entry->size = size;
-		   __entry->space = space;
-		   __entry->len = len;
-		   ),
-
-	    TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
-		      __entry->_head, __entry->_tail, __entry->size,
-		      __entry->space, __entry->len)
+		    TP_STRUCT__entry(
+			     __field(u32, _head)
+			     __field(u32, _tail)
+			     __field(u32, size)
+			     __field(u32, space)
+			     __field(u32, len)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->_head = _head;
+			   __entry->_tail = _tail;
+			   __entry->size = size;
+			   __entry->space = space;
+			   __entry->len = len;
+			   ),
+
+		    TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u",
+			      __entry->_head, __entry->_tail, __entry->size,
+			      __entry->space, __entry->len)
+);
+
+DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control,
+	     TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	     TP_ARGS(_head, _tail, size, space, len)
+);
+
+DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control,
+		   TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+		   TP_ARGS(_head, _tail, size, space, len),
+
+		   TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u",
+			     __entry->_head, __entry->_tail, __entry->size,
+			     __entry->space, __entry->len)
 );
 
 DECLARE_EVENT_CLASS(xe_guc_ctb,
-- 
GitLab


From b1543a494c52102f9f5ad29d3dc38d29c7fcfcc4 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 25 Oct 2023 08:17:34 -0700
Subject: [PATCH 2008/2340] drm/xe: Prepare to emit non-register state while
 recording default LRC

On some platforms we need to emit some non-register state while
recording an engine class' default LRC.  Add the infrastructure to
support this; actual per-platform tables will be added in future
patches.

v2:
 - Checkpatch whitespace fix
 - Add extra assertion to ensure num_dw != 0.  (Bala)

Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://lore.kernel.org/r/20231025151732.3461842-6-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c  | 10 ++++++++-
 drivers/gpu/drm/xe/xe_lrc.c | 45 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_lrc.h |  3 +++
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 74e1f47bd4011..8618275b1877f 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -136,7 +136,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 	long timeout;
 	int count = 0;
 
-	bb = xe_bb_new(gt, SZ_4K, false);	/* Just pick a large BB size */
+	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+		/* Big enough to emit all of the context's 3DSTATE */
+		bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
+	else
+		/* Just pick a large BB size */
+		bb = xe_bb_new(gt, SZ_4K, false);
+
 	if (IS_ERR(bb))
 		return PTR_ERR(bb);
 
@@ -173,6 +179,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 		}
 	}
 
+	xe_lrc_emit_hwe_state_instructions(q, bb);
+
 	job = xe_bb_create_job(q, bb);
 	if (IS_ERR(job)) {
 		xe_bb_free(bb, NULL);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 332fc0602074e..184707223098c 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -12,6 +12,7 @@
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "regs/xe_regs.h"
+#include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_drm_client.h"
@@ -1108,3 +1109,47 @@ void xe_lrc_dump_default(struct drm_printer *p,
 		remaining_dw -= num_dw;
 	}
 }
+
+struct instr_state {
+	u32 instr;
+	u16 num_dw;
+};
+
+void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
+{
+	struct xe_gt *gt = q->hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct instr_state *state_table = NULL;
+	int state_table_size = 0;
+
+	/*
+	 * At the moment we only need to emit non-register state for the RCS
+	 * engine.
+	 */
+	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
+		return;
+
+	switch (GRAPHICS_VERx100(xe)) {
+	default:
+		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
+			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
+		return;
+	}
+
+	for (int i = 0; i < state_table_size; i++) {
+		u32 instr = state_table[i].instr;
+		u16 num_dw = state_table[i].num_dw;
+		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
+
+		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
+		xe_gt_assert(gt, num_dw != 0);
+		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
+
+		bb->cs[bb->len] = instr;
+		if (!is_single_dw)
+			bb->cs[bb->len] |= (num_dw - 2);
+
+		bb->len += num_dw;
+	}
+}
+
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index a7056eda5e0c4..28b1d3f404d4f 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -8,6 +8,7 @@
 #include "xe_lrc_types.h"
 
 struct drm_printer;
+struct xe_bb;
 struct xe_device;
 struct xe_exec_queue;
 enum xe_engine_class;
@@ -52,4 +53,6 @@ void xe_lrc_dump_default(struct drm_printer *p,
 			 struct xe_gt *gt,
 			 enum xe_engine_class);
 
+void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb);
+
 #endif
-- 
GitLab


From 72ac304769dde2b84a5471e5db817a29d071fd73 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 25 Oct 2023 08:17:35 -0700
Subject: [PATCH 2009/2340] drm/xe: Emit SVG state on RCS during driver load on
 DG2 and MTL

When recording the default LRC, the expectation is that the hardware's
original state settings (both register and instruction) will be written
out to the LRC upon first context switch.  For many 3DSTATE_* state
instructions that don't truly have "default" values, this translates to
a simple instruction header (opcodes + dword length) being written to
the LRC, followed by an appropriate number of blank dwords as a place
holder.  When userspace creates a context (which starts as a copy of the
default LRC), they'll generally emit real 3DSTATE_* as part of their
initialization to select the settings they desire.  If they don't emit
one of the 3DSTATE instructions, then the zeroed dwords that remain in
their LRC image generally translate to various state remaining disabled.
This will either be what userspace wants or will lead to very
reproducible and easily-debugged problems (rendering glitches, engine
hangs).

It turns out that a subset of the 3DSTATE instructions, specifically
those belonging to the SVG (State Variable - Global) unit, are not only
emitting 0's for the instruction's "body" dwords, but also for the
instruction header dword if no specific state has been explicitly set
before context switch.  This means that when the hardware switches to a
context that hasn't explicitly provided an appropriate state setting,
the hardware will just see a sequence of NOOPs in the spot reserved for
that 3DSTATE instruction while executing the LRC, and the actual
hardware state setting will unintentionally inherit the configuration
used by the previously running context.  Now when userspace makes a
mistake and forgets to emit an important state instruction they no
longer get consistent, easily-reproducible corruption/hangs, but rather
erratic behavior where the presence/absence of a problem depends on what
other workloads are running on the system and what order the contexts
are scheduled on the engine.

A specific example of this that came up recently related to mesh shading
The OpenGL driver was not specifically emitting a 3DSTATE_MESH_CONTROL
to disable mesh shading at context init, so on context switch, mesh
shading would either be on or off depending on what the previous context
had been doing.  Vulkan apps _were_ enabling mesh shading, so running a
Vulkan app and then context switching to an OpenGL app resulted in mesh
shading still unexpectedly being enabled during OpenGL operation, and
since other Mesh-related state was not properly initialized for that
context a GPU hang was seen.  Due to the specific ordering requirements
(Vulkan app runs first, followed by OpenGL app), it took additional
debug effort to track down the cause of the problem.

There are various workarounds related to this behavior, with current
implementations handled in the userspace drivers.  E.g., Wa_14019789679
and Wa_22018402687.  However it's been suggested that the kernel driver
can help simplify things here by emitting zeroed SVG state with proper
instruction headers as part of our default context creation (i.e., at
the same point we apply LRC workarounds).  This will help ensure that
any future cases where a userspace driver does not emit an important
state setting will result in consistent behavior.

Bspec: 46261
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://lore.kernel.org/r/20231025151732.3461842-7-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../drm/xe/instructions/xe_gfxpipe_commands.h |  51 ++++++++
 drivers/gpu/drm/xe/xe_gt.c                    |   1 +
 drivers/gpu/drm/xe/xe_lrc.c                   | 111 +++++++++++++++++-
 3 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
index b5fbc761265c1..7be9614347195 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -57,34 +57,84 @@
 #define CMD_3DSTATE_STENCIL_BUFFER		GFXPIPE_3D_CMD(0x0, 0x6)
 #define CMD_3DSTATE_HIER_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x7)
 #define CMD_3DSTATE_VERTEX_BUFFERS		GFXPIPE_3D_CMD(0x0, 0x8)
+#define CMD_3DSTATE_VERTEX_ELEMENTS		GFXPIPE_3D_CMD(0x0, 0x9)
 #define CMD_3DSTATE_INDEX_BUFFER		GFXPIPE_3D_CMD(0x0, 0xA)
 #define CMD_3DSTATE_VF				GFXPIPE_3D_CMD(0x0, 0xC)
+#define CMD_3DSTATE_MULTISAMPLE			GFXPIPE_3D_CMD(0x0, 0xD)
 #define CMD_3DSTATE_CC_STATE_POINTERS		GFXPIPE_3D_CMD(0x0, 0xE)
+#define CMD_3DSTATE_SCISSOR_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0xF)
+#define CMD_3DSTATE_VS				GFXPIPE_3D_CMD(0x0, 0x10)
+#define CMD_3DSTATE_GS				GFXPIPE_3D_CMD(0x0, 0x11)
+#define CMD_3DSTATE_CLIP			GFXPIPE_3D_CMD(0x0, 0x12)
+#define CMD_3DSTATE_SF				GFXPIPE_3D_CMD(0x0, 0x13)
 #define CMD_3DSTATE_WM				GFXPIPE_3D_CMD(0x0, 0x14)
+#define CMD_3DSTATE_CONSTANT_VS			GFXPIPE_3D_CMD(0x0, 0x15)
+#define CMD_3DSTATE_CONSTANT_GS			GFXPIPE_3D_CMD(0x0, 0x16)
 #define CMD_3DSTATE_SAMPLE_MASK			GFXPIPE_3D_CMD(0x0, 0x18)
+#define CMD_3DSTATE_CONSTANT_HS			GFXPIPE_3D_CMD(0x0, 0x19)
+#define CMD_3DSTATE_CONSTANT_DS			GFXPIPE_3D_CMD(0x0, 0x1A)
+#define CMD_3DSTATE_HS				GFXPIPE_3D_CMD(0x0, 0x1B)
+#define CMD_3DSTATE_TE				GFXPIPE_3D_CMD(0x0, 0x1C)
+#define CMD_3DSTATE_DS				GFXPIPE_3D_CMD(0x0, 0x1D)
+#define CMD_3DSTATE_STREAMOUT			GFXPIPE_3D_CMD(0x0, 0x1E)
 #define CMD_3DSTATE_SBE				GFXPIPE_3D_CMD(0x0, 0x1F)
 #define CMD_3DSTATE_PS				GFXPIPE_3D_CMD(0x0, 0x20)
+#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	GFXPIPE_3D_CMD(0x0, 0x21)
 #define CMD_3DSTATE_CPS_POINTERS		GFXPIPE_3D_CMD(0x0, 0x22)
 #define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC	GFXPIPE_3D_CMD(0x0, 0x23)
 #define CMD_3DSTATE_BLEND_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x24)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_VS	GFXPIPE_3D_CMD(0x0, 0x26)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_HS	GFXPIPE_3D_CMD(0x0, 0x27)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_DS	GFXPIPE_3D_CMD(0x0, 0x28)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_GS	GFXPIPE_3D_CMD(0x0, 0x29)
 #define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2A)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS	GFXPIPE_3D_CMD(0x0, 0x2B)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS	GFXPIPE_3D_CMD(0x0, 0x2C)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS	GFXPIPE_3D_CMD(0x0, 0x2D)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS	GFXPIPE_3D_CMD(0x0, 0x2E)
 #define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2F)
 #define CMD_3DSTATE_VF_INSTANCING		GFXPIPE_3D_CMD(0x0, 0x49)
+#define CMD_3DSTATE_VF_SGVS			GFXPIPE_3D_CMD(0x0, 0x4A)
 #define CMD_3DSTATE_VF_TOPOLOGY			GFXPIPE_3D_CMD(0x0, 0x4B)
 #define CMD_3DSTATE_WM_CHROMAKEY		GFXPIPE_3D_CMD(0x0, 0x4C)
 #define CMD_3DSTATE_PS_BLEND			GFXPIPE_3D_CMD(0x0, 0x4D)
 #define CMD_3DSTATE_WM_DEPTH_STENCIL		GFXPIPE_3D_CMD(0x0, 0x4E)
 #define CMD_3DSTATE_PS_EXTRA			GFXPIPE_3D_CMD(0x0, 0x4F)
+#define CMD_3DSTATE_RASTER			GFXPIPE_3D_CMD(0x0, 0x50)
 #define CMD_3DSTATE_SBE_SWIZ			GFXPIPE_3D_CMD(0x0, 0x51)
+#define CMD_3DSTATE_WM_HZ_OP			GFXPIPE_3D_CMD(0x0, 0x52)
+#define CMD_3DSTATE_VF_COMPONENT_PACKING	GFXPIPE_3D_CMD(0x0, 0x55)
+#define CMD_3DSTATE_VF_SGVS_2			GFXPIPE_3D_CMD(0x0, 0x56)
 #define CMD_3DSTATE_VFG				GFXPIPE_3D_CMD(0x0, 0x57)
+#define CMD_3DSTATE_URB_ALLOC_VS		GFXPIPE_3D_CMD(0x0, 0x58)
+#define CMD_3DSTATE_URB_ALLOC_HS		GFXPIPE_3D_CMD(0x0, 0x59)
+#define CMD_3DSTATE_URB_ALLOC_DS		GFXPIPE_3D_CMD(0x0, 0x5A)
+#define CMD_3DSTATE_URB_ALLOC_GS		GFXPIPE_3D_CMD(0x0, 0x5B)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_0		GFXPIPE_3D_CMD(0x0, 0x60)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_1		GFXPIPE_3D_CMD(0x0, 0x61)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_2		GFXPIPE_3D_CMD(0x0, 0x62)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_3		GFXPIPE_3D_CMD(0x0, 0x63)
+#define CMD_3DSTATE_PRIMITIVE_REPLICATION	GFXPIPE_3D_CMD(0x0, 0x6C)
+#define CMD_3DSTATE_TBIMR_TILE_PASS_INFO	GFXPIPE_3D_CMD(0x0, 0x6E)
 #define CMD_3DSTATE_AMFS			GFXPIPE_3D_CMD(0x0, 0x6F)
 #define CMD_3DSTATE_DEPTH_BOUNDS		GFXPIPE_3D_CMD(0x0, 0x71)
 #define CMD_3DSTATE_AMFS_TEXTURE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x72)
 #define CMD_3DSTATE_CONSTANT_TS_POINTER		GFXPIPE_3D_CMD(0x0, 0x73)
+#define CMD_3DSTATE_MESH_CONTROL		GFXPIPE_3D_CMD(0x0, 0x77)
 #define CMD_3DSTATE_MESH_DISTRIB		GFXPIPE_3D_CMD(0x0, 0x78)
+#define CMD_3DSTATE_TASK_REDISTRIB		GFXPIPE_3D_CMD(0x0, 0x79)
+#define CMD_3DSTATE_MESH_SHADER			GFXPIPE_3D_CMD(0x0, 0x7A)
+#define CMD_3DSTATE_MESH_SHADER_DATA		GFXPIPE_3D_CMD(0x0, 0x7B)
+#define CMD_3DSTATE_TASK_CONTROL		GFXPIPE_3D_CMD(0x0, 0x7C)
+#define CMD_3DSTATE_TASK_SHADER			GFXPIPE_3D_CMD(0x0, 0x7D)
+#define CMD_3DSTATE_TASK_SHADER_DATA		GFXPIPE_3D_CMD(0x0, 0x7E)
+#define CMD_3DSTATE_URB_ALLOC_MESH		GFXPIPE_3D_CMD(0x0, 0x7F)
+#define CMD_3DSTATE_URB_ALLOC_TASK		GFXPIPE_3D_CMD(0x0, 0x80)
+#define CMD_3DSTATE_CLIP_MESH			GFXPIPE_3D_CMD(0x0, 0x81)
 #define CMD_3DSTATE_SBE_MESH			GFXPIPE_3D_CMD(0x0, 0x82)
 #define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER	GFXPIPE_3D_CMD(0x0, 0x83)
 
+#define CMD_3DSTATE_DRAWING_RECTANGLE		GFXPIPE_3D_CMD(0x1, 0x0)
 #define CMD_3DSTATE_CHROMA_KEY			GFXPIPE_3D_CMD(0x1, 0x4)
 #define CMD_3DSTATE_POLY_STIPPLE_OFFSET		GFXPIPE_3D_CMD(0x1, 0x6)
 #define CMD_3DSTATE_POLY_STIPPLE_PATTERN	GFXPIPE_3D_CMD(0x1, 0x7)
@@ -98,6 +148,7 @@
 #define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS	GFXPIPE_3D_CMD(0x1, 0x16)
 #define CMD_3DSTATE_SO_DECL_LIST		GFXPIPE_3D_CMD(0x1, 0x17)
 #define   CMD_3DSTATE_SO_DECL_LIST_DW_LEN	REG_GENMASK(8, 0)
+#define CMD_3DSTATE_SO_BUFFER			GFXPIPE_3D_CMD(0x1, 0x18)
 #define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC	GFXPIPE_3D_CMD(0x1, 0x19)
 #define CMD_3DSTATE_SAMPLE_PATTERN		GFXPIPE_3D_CMD(0x1, 0x1C)
 #define CMD_3DSTATE_3D_MODE			GFXPIPE_3D_CMD(0x1, 0x1E)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 8618275b1877f..d380f67b33659 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -10,6 +10,7 @@
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
 
+#include "instructions/xe_gfxpipe_commands.h"
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_assert.h"
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 184707223098c..944bb2f646608 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1020,34 +1020,84 @@ static int dump_gfxpipe_command(struct drm_printer *p,
 	MATCH3D(3DSTATE_STENCIL_BUFFER);
 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
+	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
 	MATCH3D(3DSTATE_INDEX_BUFFER);
 	MATCH3D(3DSTATE_VF);
+	MATCH3D(3DSTATE_MULTISAMPLE);
 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
+	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
+	MATCH3D(3DSTATE_VS);
+	MATCH3D(3DSTATE_GS);
+	MATCH3D(3DSTATE_CLIP);
+	MATCH3D(3DSTATE_SF);
 	MATCH3D(3DSTATE_WM);
+	MATCH3D(3DSTATE_CONSTANT_VS);
+	MATCH3D(3DSTATE_CONSTANT_GS);
 	MATCH3D(3DSTATE_SAMPLE_MASK);
+	MATCH3D(3DSTATE_CONSTANT_HS);
+	MATCH3D(3DSTATE_CONSTANT_DS);
+	MATCH3D(3DSTATE_HS);
+	MATCH3D(3DSTATE_TE);
+	MATCH3D(3DSTATE_DS);
+	MATCH3D(3DSTATE_STREAMOUT);
 	MATCH3D(3DSTATE_SBE);
 	MATCH3D(3DSTATE_PS);
+	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
 	MATCH3D(3DSTATE_CPS_POINTERS);
 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
 	MATCH3D(3DSTATE_VF_INSTANCING);
+	MATCH3D(3DSTATE_VF_SGVS);
 	MATCH3D(3DSTATE_VF_TOPOLOGY);
 	MATCH3D(3DSTATE_WM_CHROMAKEY);
 	MATCH3D(3DSTATE_PS_BLEND);
 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
 	MATCH3D(3DSTATE_PS_EXTRA);
+	MATCH3D(3DSTATE_RASTER);
 	MATCH3D(3DSTATE_SBE_SWIZ);
+	MATCH3D(3DSTATE_WM_HZ_OP);
+	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
+	MATCH3D(3DSTATE_VF_SGVS_2);
 	MATCH3D(3DSTATE_VFG);
+	MATCH3D(3DSTATE_URB_ALLOC_VS);
+	MATCH3D(3DSTATE_URB_ALLOC_HS);
+	MATCH3D(3DSTATE_URB_ALLOC_DS);
+	MATCH3D(3DSTATE_URB_ALLOC_GS);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
+	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
+	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
 	MATCH3D(3DSTATE_AMFS);
 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
+	MATCH3D(3DSTATE_MESH_CONTROL);
 	MATCH3D(3DSTATE_MESH_DISTRIB);
+	MATCH3D(3DSTATE_TASK_REDISTRIB);
+	MATCH3D(3DSTATE_MESH_SHADER);
+	MATCH3D(3DSTATE_MESH_SHADER_DATA);
+	MATCH3D(3DSTATE_TASK_CONTROL);
+	MATCH3D(3DSTATE_TASK_SHADER);
+	MATCH3D(3DSTATE_TASK_SHADER_DATA);
+	MATCH3D(3DSTATE_URB_ALLOC_MESH);
+	MATCH3D(3DSTATE_URB_ALLOC_TASK);
+	MATCH3D(3DSTATE_CLIP_MESH);
 	MATCH3D(3DSTATE_SBE_MESH);
 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
 
+	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
 	MATCH3D(3DSTATE_CHROMA_KEY);
 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
@@ -1060,6 +1110,7 @@ static int dump_gfxpipe_command(struct drm_printer *p,
 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
 	MATCH3D(3DSTATE_SO_DECL_LIST);
+	MATCH3D(3DSTATE_SO_BUFFER);
 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
 	MATCH3D(3DSTATE_3D_MODE);
@@ -1115,6 +1166,59 @@ struct instr_state {
 	u16 num_dw;
 };
 
+static const struct instr_state xe_hpg_svg_state[] = {
+	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
+	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
+	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
+	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
+	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
+};
+
 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
 {
 	struct xe_gt *gt = q->hwe->gt;
@@ -1130,6 +1234,12 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
 		return;
 
 	switch (GRAPHICS_VERx100(xe)) {
+	case 1255:
+	case 1270:
+	case 1271:
+		state_table = xe_hpg_svg_state;
+		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
+		break;
 	default:
 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
@@ -1152,4 +1262,3 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
 		bb->len += num_dw;
 	}
 }
-
-- 
GitLab


From fb24b858a20d720b7ee4396225569ff33a8a4fe3 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 25 Oct 2023 08:17:36 -0700
Subject: [PATCH 2010/2340] drm/xe/xe2: Update SVG state handling

As with DG2/MTL, Xe2 also fails to emit instruction headers for SVG
state instructions if no explicit state has been set.  The SVG part of
the LRC is nearly identical to DG2/MTL; the only change is that
3DSTATE_DRAWING_RECTANGLE has been replaced by
3DSTATE_DRAWING_RECTANGLE_FAST, so we can just re-use the same state
table and handle that single instruction when we encounter it.

Bspec: 65182
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://lore.kernel.org/r/20231025151732.3461842-8-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/xe/instructions/xe_gfxpipe_commands.h |  1 +
 drivers/gpu/drm/xe/xe_lrc.c                       | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
index 7be9614347195..8e6dd061f2aea 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -52,6 +52,7 @@
 
 #define PIPELINE_SELECT				GFXPIPE_SINGLE_DW_CMD(0x1, 0x4)
 
+#define CMD_3DSTATE_DRAWING_RECTANGLE_FAST	GFXPIPE_3D_CMD(0x0, 0x0)
 #define CMD_3DSTATE_CLEAR_PARAMS		GFXPIPE_3D_CMD(0x0, 0x4)
 #define CMD_3DSTATE_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x5)
 #define CMD_3DSTATE_STENCIL_BUFFER		GFXPIPE_3D_CMD(0x0, 0x6)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 944bb2f646608..f8754f0615999 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1015,6 +1015,7 @@ static int dump_gfxpipe_command(struct drm_printer *p,
 
 	MATCH(PIPELINE_SELECT);
 
+	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
 	MATCH3D(3DSTATE_CLEAR_PARAMS);
 	MATCH3D(3DSTATE_DEPTH_BUFFER);
 	MATCH3D(3DSTATE_STENCIL_BUFFER);
@@ -1235,8 +1236,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
 
 	switch (GRAPHICS_VERx100(xe)) {
 	case 1255:
-	case 1270:
-	case 1271:
+	case 1270 ... 2004:
 		state_table = xe_hpg_svg_state;
 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
 		break;
@@ -1255,6 +1255,17 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
 		xe_gt_assert(gt, num_dw != 0);
 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
 
+		/*
+		 * Xe2's SVG context is the same as the one on DG2 / MTL
+		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
+		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
+		 * Just make the replacement here rather than defining a
+		 * whole separate table for the single trivial change.
+		 */
+		if (GRAPHICS_VER(xe) >= 20 &&
+		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
+			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
+
 		bb->cs[bb->len] = instr;
 		if (!is_single_dw)
 			bb->cs[bb->len] |= (num_dw - 2);
-- 
GitLab


From 83af834e711ce779afb1ee6a28977b3e4b164354 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 23 Oct 2023 13:41:13 -0700
Subject: [PATCH 2011/2340] drm/xe/mocs: MOCS registers are multicast on Xe_HP
 and beyond

The MOCS registers should be written in an MCR-specific manner on Xe_HP
and beyond to prevent any other driver threads or external firmware from
putting the hardware into unicast mode while we initialize the MOCS
table.

Bspec: 66534, 67609, 71185
Cc: Ruthuvikas Ravikumar <ruthuvikas.ravikumar@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231023204112.2856331-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  6 ++++--
 drivers/gpu/drm/xe/xe_guc_ads.c      |  2 +-
 drivers/gpu/drm/xe/xe_mocs.c         | 23 +++++++++++++++--------
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 55ceadfc30b0b..b00fe089525a5 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -43,7 +43,8 @@
 #define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
 
 /* L3 Cache Control */
-#define LNCFCMOCS(i)				XE_REG(0xb020 + (i) * 4)
+#define XELP_LNCFCMOCS(i)			XE_REG(0xb020 + (i) * 4)
+#define XEHP_LNCFCMOCS(i)			XE_REG_MCR(0xb020 + (i) * 4)
 #define LNCFCMOCS_REG_COUNT			32
 
 #define MCFG_MCR_SELECTOR			XE_REG(0xfd0)
@@ -79,7 +80,8 @@
 #define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
 #define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
 
-#define GLOBAL_MOCS(i)				XE_REG(0x4000 + (i) * 4) /* Global MOCS regs */
+#define XELP_GLOBAL_MOCS(i)			XE_REG(0x4000 + (i) * 4)
+#define XEHP_GLOBAL_MOCS(i)			XE_REG_MCR(0x4000 + (i) * 4)
 #define CCS_AUX_INV				XE_REG(0x4208)
 
 #define VD0_AUX_INV				XE_REG(0x4218)
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index efa4d25424b84..88789826e7817 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -473,7 +473,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 	if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
 		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
 			guc_mmio_regset_write_one(ads, regset_map,
-						  LNCFCMOCS(i), count++);
+						  XELP_LNCFCMOCS(i), count++);
 		}
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 19a8146ded9a7..21972bbef8fd7 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -10,6 +10,7 @@
 #include "xe_device.h"
 #include "xe_exec_queue.h"
 #include "xe_gt.h"
+#include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
 #include "xe_step_types.h"
@@ -491,8 +492,7 @@ static u32 get_entry_control(const struct xe_mocs_info *info,
 }
 
 static void __init_mocs_table(struct xe_gt *gt,
-			      const struct xe_mocs_info *info,
-			      u32 addr)
+			      const struct xe_mocs_info *info)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
@@ -505,10 +505,13 @@ static void __init_mocs_table(struct xe_gt *gt,
 	for (i = 0;
 	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
 	     i++) {
-		struct xe_reg reg = XE_REG(addr + i * 4);
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i,
+			 XELP_GLOBAL_MOCS(i).addr, mocs);
 
-		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs);
-		xe_mmio_write32(gt, reg, mocs);
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250)
+			xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
+		else
+			xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
 	}
 }
 
@@ -542,9 +545,13 @@ static void init_l3cc_table(struct xe_gt *gt,
 	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
 				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr,
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
 			 l3cc);
-		xe_mmio_write32(gt, LNCFCMOCS(i), l3cc);
+
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
+		else
+			xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
 	}
 }
 
@@ -569,7 +576,7 @@ void xe_mocs_init(struct xe_gt *gt)
 	mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
 
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr);
+		__init_mocs_table(gt, &table);
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
-- 
GitLab


From 8a93b0b4d1105b7d03b4768f1a08145b24cbd52a Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Wed, 25 Oct 2023 21:42:01 +0530
Subject: [PATCH 2012/2340] drm/xe: Extend rpX values extraction for future
 platforms

In existing code flow for future platforms i.e. >1270, the rpX
(rp0,rpn and rpe) fused values are read from gen 6 registers.
Which is not correct. Unless specified gen 1270 regs should be valid
for gen 1270+ platforms as well.

Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d9375d1d582ff..74247e0d36742 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -340,7 +340,7 @@ static void pc_update_rp_values(struct xe_guc_pc *pc)
 	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (xe->info.platform == XE_METEORLAKE)
+	if (GRAPHICS_VERx100(xe) >= 1270)
 		mtl_update_rpe_value(pc);
 	else
 		tgl_update_rpe_value(pc);
@@ -365,7 +365,7 @@ static ssize_t freq_act_show(struct device *dev,
 	xe_device_mem_access_get(gt_to_xe(gt));
 
 	/* When in RC6, actual frequency reported will be 0. */
-	if (xe->info.platform == XE_METEORLAKE) {
+	if (GRAPHICS_VERx100(xe) >= 1270) {
 		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
 		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
 	} else {
@@ -680,7 +680,7 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
 	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (xe->info.platform == XE_METEORLAKE)
+	if (GRAPHICS_VERx100(xe) >= 1270)
 		mtl_init_fused_rp_values(pc);
 	else
 		tgl_init_fused_rp_values(pc);
-- 
GitLab


From a9a95523c84957b7863796b5d1df2f3f5dca4519 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 25 Oct 2023 10:57:41 -0700
Subject: [PATCH 2013/2340] drm/xe/uc: Prepare for parsing of different header
 types

GSC binaries and newer HuC ones use GSC-style headers instead of the
CSS. In preparation for adding support for such parsing, split out the
current parsing code to its own function, to make it cleaner to add the
new paths. The existing doc section has also been renamed to narrow it
to CSS-based binaries.

v2: new patch in series, split out from next patch for easier reviewing
v3: drop unneeded include (Lucas)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 Documentation/gpu/xe/xe_firmware.rst |   2 +-
 drivers/gpu/drm/xe/xe_uc_fw.c        | 116 +++++++++++++++------------
 drivers/gpu/drm/xe/xe_uc_fw_abi.h    |   7 +-
 3 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
index c01246ae99f53..f1ac6f6089305 100644
--- a/Documentation/gpu/xe/xe_firmware.rst
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -8,7 +8,7 @@ Firmware Layout
 ===============
 
 .. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
-   :doc: Firmware Layout
+   :doc: CSS-based Firmware Layout
 
 Write Once Protected Content Memory (WOPCM) Layout
 ==================================================
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 32782a52c07f7..189a298e54790 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -344,57 +344,22 @@ fail:
 	return -ENOEXEC;
 }
 
-int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+/* Refer to the "CSS-based Firmware Layout" documentation entry for details */
+static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
-	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-	struct xe_tile *tile = gt_to_tile(gt);
-	struct device *dev = xe->drm.dev;
-	const struct firmware *fw = NULL;
 	struct uc_css_header *css;
-	struct xe_bo *obj;
 	size_t size;
-	int err;
-
-	/*
-	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
-	 * before we're looked at the HW caps to see if we have uc support
-	 */
-	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
-	xe_assert(xe, !uc_fw->status);
-	xe_assert(xe, !uc_fw->path);
-
-	uc_fw_auto_select(xe, uc_fw);
-	xe_uc_fw_change_status(uc_fw, uc_fw->path ?
-			       XE_UC_FIRMWARE_SELECTED :
-			       XE_UC_FIRMWARE_NOT_SUPPORTED);
-
-	if (!xe_uc_fw_is_supported(uc_fw))
-		return 0;
-
-	uc_fw_override(uc_fw);
-
-	/* an empty path means the firmware is disabled */
-	if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
-		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
-		drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
-		return 0;
-	}
-
-	err = request_firmware(&fw, uc_fw->path, dev);
-	if (err)
-		goto fail;
 
 	/* Check the size of the blob before examining buffer contents */
-	if (unlikely(fw->size < sizeof(struct uc_css_header))) {
+	if (unlikely(fw_size < sizeof(struct uc_css_header))) {
 		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
 			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			 fw->size, sizeof(struct uc_css_header));
-		err = -ENODATA;
-		goto fail;
+			 fw_size, sizeof(struct uc_css_header));
+		return -ENODATA;
 	}
 
-	css = (struct uc_css_header *)fw->data;
+	css = (struct uc_css_header *)fw_data;
 
 	/* Check integrity of size values inside CSS header */
 	size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
@@ -403,9 +368,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 		drm_warn(&xe->drm,
 			 "%s firmware %s: unexpected header size: %zu != %zu\n",
 			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			 fw->size, sizeof(struct uc_css_header));
-		err = -EPROTO;
-		goto fail;
+			 fw_size, sizeof(struct uc_css_header));
+		return -EPROTO;
 	}
 
 	/* uCode size must calculated from other sizes */
@@ -417,12 +381,11 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	/* At least, it should have header, uCode and RSA. Size of all three. */
 	size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
 		uc_fw->rsa_size;
-	if (unlikely(fw->size < size)) {
+	if (unlikely(fw_size < size)) {
 		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
 			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			 fw->size, size);
-		err = -ENOEXEC;
-		goto fail;
+			 fw_size, size);
+		return -ENOEXEC;
 	}
 
 	/* Get version numbers from the CSS header */
@@ -433,6 +396,60 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	uc_fw->patch_ver_found = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
 					   css->sw_version);
 
+	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
+		guc_read_css_info(uc_fw, css);
+
+	return 0;
+}
+
+static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
+{
+	return parse_css_header(uc_fw, fw->data, fw->size);
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct device *dev = xe->drm.dev;
+	const struct firmware *fw = NULL;
+	struct xe_bo *obj;
+	int err;
+
+	/*
+	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+	 * before we're looked at the HW caps to see if we have uc support
+	 */
+	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
+	xe_assert(xe, !uc_fw->status);
+	xe_assert(xe, !uc_fw->path);
+
+	uc_fw_auto_select(xe, uc_fw);
+	xe_uc_fw_change_status(uc_fw, uc_fw->path ?
+			       XE_UC_FIRMWARE_SELECTED :
+			       XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+	if (!xe_uc_fw_is_supported(uc_fw))
+		return 0;
+
+	uc_fw_override(uc_fw);
+
+	/* an empty path means the firmware is disabled */
+	if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
+		drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
+		return 0;
+	}
+
+	err = request_firmware(&fw, uc_fw->path, dev);
+	if (err)
+		goto fail;
+
+	err = parse_headers(uc_fw, fw);
+	if (err)
+		goto fail;
+
 	drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u\n",
 		 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 		 uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found);
@@ -441,9 +458,6 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	if (err)
 		goto fail;
 
-	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
-		guc_read_css_info(uc_fw, css);
-
 	obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size,
 				     ttm_bo_type_kernel,
 				     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index 89e994ed4e009..edae7bb3cd720 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -10,9 +10,12 @@
 #include <linux/types.h>
 
 /**
- * DOC: Firmware Layout
+ * DOC: CSS-based Firmware Layout
  *
- * The GuC/HuC firmware layout looks like this::
+ * The CSS-based firmware structure is used for GuC releases on all platforms
+ * and for HuC releases up to DG1. Starting from DG2/MTL the HuC uses the GSC
+ * layout instead.
+ * The CSS firmware layout looks like this::
  *
  *      +======================================================================+
  *      |  Firmware blob                                                       |
-- 
GitLab


From 484ecffac91067e44273afa727fb1b9855058c9a Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 25 Oct 2023 10:57:42 -0700
Subject: [PATCH 2014/2340] drm/xe/huc: Extract version and binary offset from
 new HuC headers

The GSC-enabled HuC binary starts with a GSC header, which is followed
by the legacy-style CSS header and the binary itself. We can parse the
GSC headers to find the HuC version and the location of the binary to
be used for the DMA transfer.

The parsing function has been designed to be re-used for the GSC binary,
so the entry names are external parameters (because the GSC uses
different ones) and the CSS entry is optional (because the GSC doesn't
have it).

v2: move new code to uc_fw.c, better comments and error checking, split
    old code move to separate patch (Lucas), move headers and
    documentation to uc_fw_abi.h.

v3: use 2 separate loops, rework marker check (Lucas)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 Documentation/gpu/xe/xe_firmware.rst |   3 +
 drivers/gpu/drm/xe/xe_uc_fw.c        | 120 ++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_uc_fw.h        |   2 +-
 drivers/gpu/drm/xe/xe_uc_fw_abi.h    | 120 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_uc_fw_types.h  |   2 +
 5 files changed, 244 insertions(+), 3 deletions(-)

diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
index f1ac6f6089305..afcb561cd37db 100644
--- a/Documentation/gpu/xe/xe_firmware.rst
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -10,6 +10,9 @@ Firmware Layout
 .. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
    :doc: CSS-based Firmware Layout
 
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: GSC-based Firmware Layout
+
 Write Once Protected Content Memory (WOPCM) Layout
 ==================================================
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 189a298e54790..d1cc99f86ec01 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -402,9 +402,125 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
 	return 0;
 }
 
+static bool is_cpd_header(const void *data)
+{
+	const u32 *marker = data;
+
+	return *marker == GSC_CPD_HEADER_MARKER;
+}
+
+static u32 entry_offset(const struct gsc_cpd_header_v2 *header, const char *name)
+{
+	const struct gsc_cpd_entry *entry;
+	int i;
+
+	entry = (void *)header + header->header_length;
+
+	for (i = 0; i < header->num_of_entries; i++, entry++)
+		if (strcmp(entry->name, name) == 0)
+			return entry->offset & GSC_CPD_ENTRY_OFFSET_MASK;
+
+	return 0;
+}
+
+/* Refer to the "GSC-based Firmware Layout" documentation entry for details */
+static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t size,
+			    const char *manifest_entry, const char *css_entry)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct gsc_cpd_header_v2 *header = data;
+	const struct gsc_manifest_header *manifest;
+	size_t min_size = sizeof(*header);
+	u32 offset;
+
+	/* manifest_entry is mandatory, css_entry is optional */
+	xe_assert(xe, manifest_entry);
+
+	if (size < min_size || !is_cpd_header(header))
+		return -ENOENT;
+
+	if (header->header_length < sizeof(struct gsc_cpd_header_v2)) {
+		xe_gt_err(gt, "invalid CPD header length %u!\n", header->header_length);
+		return -EINVAL;
+	}
+
+	min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries;
+	if (size < min_size) {
+		xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	/* Look for the manifest first */
+	offset = entry_offset(header, manifest_entry);
+	if (!offset) {
+		xe_gt_err(gt, "Failed to find %s manifest!\n",
+			  xe_uc_fw_type_repr(uc_fw->type));
+		return -ENODATA;
+	}
+
+	min_size = offset + sizeof(struct gsc_manifest_header);
+	if (size < min_size) {
+		xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	manifest = data + offset;
+
+	uc_fw->major_ver_found = manifest->fw_version.major;
+	uc_fw->minor_ver_found = manifest->fw_version.minor;
+	uc_fw->patch_ver_found = manifest->fw_version.hotfix;
+
+	/* then optionally look for the css header */
+	if (css_entry) {
+		int ret;
+
+		/*
+		 * This section does not contain a CSS entry on DG2. We
+		 * don't support DG2 HuC right now, so no need to handle
+		 * it, just add a reminder in case that changes.
+		 */
+		xe_assert(xe, xe->info.platform != XE_DG2);
+
+		offset = entry_offset(header, css_entry);
+
+		/* the CSS header parser will check that the CSS header fits */
+		if (offset > size) {
+			xe_gt_err(gt, "FW too small! %zu < %u\n", size, offset);
+			return -ENODATA;
+		}
+
+		ret = parse_css_header(uc_fw, data + offset, size - offset);
+		if (ret)
+			return ret;
+
+		uc_fw->css_offset = offset;
+	}
+
+	return 0;
+}
+
 static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
 {
-	return parse_css_header(uc_fw, fw->data, fw->size);
+	int ret;
+
+	/*
+	 * All GuC releases and older HuC ones use CSS headers, while newer HuC
+	 * releases use GSC CPD headers.
+	 */
+	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_HUC:
+		ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw");
+		if (!ret || ret != -ENOENT)
+			return ret;
+		fallthrough;
+	case XE_UC_FW_TYPE_GUC:
+		return parse_css_header(uc_fw, fw->data, fw->size);
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
@@ -510,7 +626,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	/* Set the source address for the uCode */
-	src_offset = uc_fw_ggtt_offset(uc_fw);
+	src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
 	xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
 	xe_mmio_write32(gt, DMA_ADDR_0_HIGH, upper_32_bits(src_offset));
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index a519c77d49621..1d1a0c156cdfd 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -21,7 +21,7 @@ void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
 
 static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
 {
-	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->css_offset;
 }
 
 static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw,
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index edae7bb3cd720..d6725c9632517 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -85,4 +85,124 @@ struct uc_css_header {
 } __packed;
 static_assert(sizeof(struct uc_css_header) == 128);
 
+/**
+ * DOC: GSC-based Firmware Layout
+ *
+ * The GSC-based firmware structure is used for GSC releases on all platforms
+ * and for HuC releases starting from DG2/MTL. Older HuC releases use the
+ * CSS-based layout instead. Differently from the CSS headers, the GSC headers
+ * uses a directory + entries structure (i.e., there is array of addresses
+ * pointing to specific header extensions identified by a name). Although the
+ * header structures are the same, some of the entries are specific to GSC while
+ * others are specific to HuC. The manifest header entry, which includes basic
+ * information about the binary (like the version) is always present, but it is
+ * named differently based on the binary type.
+ *
+ * The HuC binary starts with a Code Partition Directory (CPD) header. The
+ * entries we're interested in for use in the driver are:
+ *
+ * 1. "HUCP.man": points to the manifest header for the HuC.
+ * 2. "huc_fw": points to the FW code. On platforms that support load via DMA
+ *    and 2-step HuC authentication (i.e. MTL+) this is a full CSS-based binary,
+ *    while if the GSC is the one doing the load (which only happens on DG2)
+ *    this section only contains the uCode.
+ *
+ * The GSC-based HuC firmware layout looks like this::
+ *
+ *	+================================================+
+ *	|  CPD Header                                    |
+ *	+================================================+
+ *	|  CPD entries[]                                 |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          "HUCP.man"                            |
+ *	|           ...                                  |
+ *	|           offset  >----------------------------|------o
+ *	|      ...                                       |      |
+ *	|      entryY                                    |      |
+ *	|          "huc_fw"                              |      |
+ *	|           ...                                  |      |
+ *	|           offset  >----------------------------|----------o
+ *	+================================================+      |   |
+ *	                                                        |   |
+ *	+================================================+      |   |
+ *	|  Manifest Header                               |<-----o   |
+ *	|      ...                                       |          |
+ *	|      FW version                                |          |
+ *	|      ...                                       |          |
+ *	+================================================+          |
+ *	                                                            |
+ *	+================================================+          |
+ *	|  FW binary                                     |<---------o
+ *	|      CSS (MTL+ only)                           |
+ *	|      uCode                                     |
+ *	|      RSA Key (MTL+ only)                       |
+ *	|      ...                                       |
+ *	+================================================+
+ */
+
+struct gsc_version {
+	u16 major;
+	u16 minor;
+	u16 hotfix;
+	u16 build;
+} __packed;
+
+/* Code partition directory (CPD) structures */
+struct gsc_cpd_header_v2 {
+	u32 header_marker;
+#define GSC_CPD_HEADER_MARKER 0x44504324
+
+	u32 num_of_entries;
+	u8 header_version;
+	u8 entry_version;
+	u8 header_length; /* in bytes */
+	u8 flags;
+	u32 partition_name;
+	u32 crc32;
+} __packed;
+
+struct gsc_cpd_entry {
+	u8 name[12];
+
+	/*
+	 * Bits 0-24: offset from the beginning of the code partition
+	 * Bit 25: huffman compressed
+	 * Bits 26-31: reserved
+	 */
+	u32 offset;
+#define GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
+#define GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
+
+	/*
+	 * Module/Item length, in bytes. For Huffman-compressed modules, this
+	 * refers to the uncompressed size. For software-compressed modules,
+	 * this refers to the compressed size.
+	 */
+	u32 length;
+
+	u8 reserved[4];
+} __packed;
+
+struct gsc_manifest_header {
+	u32 header_type; /* 0x4 for manifest type */
+	u32 header_length; /* in dwords */
+	u32 header_version;
+	u32 flags;
+	u32 vendor;
+	u32 date;
+	u32 size; /* In dwords, size of entire manifest (header + extensions) */
+	u32 header_id;
+	u32 internal_data;
+	struct gsc_version fw_version;
+	u32 security_version;
+	struct gsc_version meu_kit_version;
+	u32 meu_manifest_version;
+	u8 general_data[4];
+	u8 reserved3[56];
+	u32 modulus_size; /* in dwords */
+	u32 exponent_size; /* in dwords */
+} __packed;
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 444bff83cdbe5..1650599303c85 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -113,6 +113,8 @@ struct xe_uc_fw {
 	u32 rsa_size;
 	/** @ucode_size: micro kernel size */
 	u32 ucode_size;
+	/** @css_offset: offset within the blob at which the CSS is located */
+	u32 css_offset;
 
 	/** @private_data_size: size of private data found in uC css header */
 	u32 private_data_size;
-- 
GitLab


From b77d8b5c5ec0673086f565f2c07ed6da081483b8 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 25 Oct 2023 10:57:43 -0700
Subject: [PATCH 2015/2340] drm/xe/huc: HuC is not supported on GTs that don't
 have video engines

On MTL-style multi-gt platforms, the HuC is only available on the media
GT, so we need to consider it as not supported on the render GT.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_huc.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 293403d16f25c..386efa180c1c3 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -35,10 +35,19 @@ huc_to_guc(struct xe_huc *huc)
 
 int xe_huc_init(struct xe_huc *huc)
 {
-	struct xe_device *xe = huc_to_xe(huc);
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
 	int ret;
 
 	huc->fw.type = XE_UC_FW_TYPE_HUC;
+
+	/* On platforms with a media GT the HuC is only available there */
+	if (tile->media_gt && (gt != tile->media_gt)) {
+		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
 	ret = xe_uc_fw_init(&huc->fw);
 	if (ret)
 		goto out;
-- 
GitLab


From 185f93f3041fe520c6df16a58bea116077d3f848 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 25 Oct 2023 10:57:44 -0700
Subject: [PATCH 2016/2340] drm/xe/huc: Don't re-auth HuC if it's already
 authenticated

On newer platforms the HuC survives reset and stays authenticated, so no
need to re-authenticate it.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_huc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 386efa180c1c3..2f176badab261 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -83,6 +83,12 @@ int xe_huc_auth(struct xe_huc *huc)
 
 	xe_assert(xe, !xe_uc_fw_is_running(&huc->fw));
 
+	/* On newer platforms the HuC survives reset, so no need to re-auth */
+	if (xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO) & HUC_LOAD_SUCCESSFUL) {
+		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+		return 0;
+	}
+
 	if (!xe_uc_fw_is_loaded(&huc->fw))
 		return -ENOEXEC;
 
-- 
GitLab


From bfeb4ac55565f527f72e97020a244f8c3585154a Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 25 Oct 2023 10:57:45 -0700
Subject: [PATCH 2017/2340] drm/xe/huc: Define HuC for MTL

MTL uses a versionless GSC-enabled binary.

v2: don't use the filename to identify the header type (Lucas)
v3: fix commit msg (Lucas)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index d1cc99f86ec01..91d4a2272ee73 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -112,12 +112,13 @@ struct fw_blobs_by_type {
 	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 5))		\
 	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
 
-#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)				\
-	fw_def(DG1,		no_ver(i915,	huc,	dg1))			\
-	fw_def(ALDERLAKE_P,	no_ver(i915,	huc,	tgl))			\
-	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,	tgl))			\
-	fw_def(ROCKETLAKE,	no_ver(i915,	huc,	tgl))			\
-	fw_def(TIGERLAKE,	no_ver(i915,	huc,	tgl))
+#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)		\
+	fw_def(METEORLAKE,	no_ver(i915,	huc_gsc,	mtl))		\
+	fw_def(DG1,		no_ver(i915,	huc,		dg1))		\
+	fw_def(ALDERLAKE_P,	no_ver(i915,	huc,		tgl))		\
+	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,		tgl))		\
+	fw_def(ROCKETLAKE,	no_ver(i915,	huc,		tgl))		\
+	fw_def(TIGERLAKE,	no_ver(i915,	huc,		tgl))
 
 #define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
 	__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
-- 
GitLab


From 65e95735882329632559cf71c9efbb4981473b07 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 24 Oct 2023 15:04:12 -0700
Subject: [PATCH 2018/2340] drm/xe: Fix WA 14010918519 write to wrong register

FORCE_SLM_FENCE_SCOPE_TO_TILE and FORCE_UGM_FENCE_SCOPE_TO_TILE are in
the up dword of LSC_CHICKEN_BIT_0 register. Also, the 14010918519
workaround only applies to early steppings, A*. Eventually those should
be dropped, like they were in commit eaeb4b361452 ("drm/i915/dg2: Drop
pre-production GT workarounds"), so let's make sure they are annotated
appropriately.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231024220412.223868-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index ccb075aac7da9..ce61609b001cb 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -367,9 +367,9 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
 	},
 	{ XE_RTP_NAME("14010918519"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0,
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
 			     FORCE_SLM_FENCE_SCOPE_TO_TILE |
 			     FORCE_UGM_FENCE_SCOPE_TO_TILE,
 			     /*
-- 
GitLab


From b8d70702def26d7597eded092fe43cc584c0d064 Mon Sep 17 00:00:00 2001
From: Priyanka Dandamudi <priyanka.dandamudi@intel.com>
Date: Fri, 27 Oct 2023 10:55:07 +0530
Subject: [PATCH 2019/2340] drm/xe/xe_exec_queue: Add check for access counter
 granularity

Add conditional check for access counter granularity.
This check will return -EINVAL if granularity is beyond 64M
which is a hardware limitation.

v2: Defined
XE_ACC_GRANULARITY_128K 0
XE_ACC_GRANULARITY_2M 1
XE_ACC_GRANULARITY_16M 2
XE_ACC_GRANULARITY_64M 3
as part of uAPI.
So, that user can also use it.(Oak)

v3: Move uAPI to proper location and give proper
documentation.(Brian, Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Cc: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
Signed-off-by: Priyanka Dandamudi <priyanka.dandamudi@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c |  3 +++
 include/uapi/drm/xe_drm.h          | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 8e0620cb89e5d..f67a6dee4a6f5 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -393,6 +393,9 @@ static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_q
 	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
+	if (value > XE_ACC_GRANULARITY_64M)
+		return -EINVAL;
+
 	q->usm.acc_granularity = value;
 
 	return 0;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 24bf8f0f52e82..9bd7092a7ea42 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -731,6 +731,20 @@ struct drm_xe_vm_bind {
 	__u64 reserved[2];
 };
 
+/* For use with XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */
+
+/* Monitor 128KB contiguous region with 4K sub-granularity */
+#define XE_ACC_GRANULARITY_128K 0
+
+/* Monitor 2MB contiguous region with 64KB sub-granularity */
+#define XE_ACC_GRANULARITY_2M 1
+
+/* Monitor 16MB contiguous region with 512KB sub-granularity */
+#define XE_ACC_GRANULARITY_16M 2
+
+/* Monitor 64MB contiguous region with 2M sub-granularity */
+#define XE_ACC_GRANULARITY_64M 3
+
 /**
  * struct drm_xe_exec_queue_set_property - exec queue set property
  *
-- 
GitLab


From 7eea3fb67a30a81c1751097753885657a1ace021 Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Mon, 30 Oct 2023 20:37:56 +0530
Subject: [PATCH 2020/2340] drm/xe/xelpmp: Extend Wa_22016670082 to Xe_LPM+

Add Xe_LPM+ support to an existing workaround.

BSpec: 51762
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Link: https://lore.kernel.org/r/20231030150756.1011777-1-shekhar.chauhan@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 +
 drivers/gpu/drm/xe/xe_wa.c           | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index b00fe089525a5..7a6407e38265a 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -152,6 +152,7 @@
 #define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
 
 #define SQCNT1					XE_REG_MCR(0x8718)
+#define XELPMP_SQCNT1				XE_REG(0x8718)
 #define   ENFORCE_RAR				REG_BIT(23)
 
 #define XEHP_SQCM				XE_REG_MCR(0x8724)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index ce61609b001cb..2f1782db267b6 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -246,6 +246,13 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
 	},
 
+	/* Xe_LPM+ */
+
+	{ XE_RTP_NAME("22016670082"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300)),
+	  XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR))
+	},
+
 	/* Xe2_LPG */
 
 	{ XE_RTP_NAME("16020975621"),
-- 
GitLab


From 4202dd9fc43e9d9dba54e1b72a301108cdec84fb Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 25 Oct 2023 18:39:39 +0100
Subject: [PATCH 2021/2340] drm/xe/migrate: fix MI_ARB_ON_OFF usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Spec says: "This is a privileged command; it will not be effective (will
be converted to a no-op) if executed from within a non-privileged batch
buffer." However here it looks like we are just emitting it inside some
bb which was jumped to via the ppGTT, which should be considered
a non-privileged address space.

It looks like we just need some way of preventing things like the
emit_pte() and later copy/clear being preempted in-between so rather
just emit directly in the ring for migration jobs.

Bspec: 45716
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c  | 16 ----------------
 drivers/gpu/drm/xe/xe_ring_ops.c |  2 ++
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 731beb622fe87..67b71244b1f2e 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -406,12 +406,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	return m;
 }
 
-static void emit_arb_clear(struct xe_bb *bb)
-{
-	/* 1 dword */
-	bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-}
-
 static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur)
 {
 	/*
@@ -745,10 +739,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 			goto err_sync;
 		}
 
-		/* Preemption is enabled again by the ring ops. */
-		if (!src_is_vram || !dst_is_vram)
-			emit_arb_clear(bb);
-
 		if (!src_is_vram)
 			emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0,
 				 src_bo);
@@ -994,7 +984,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		/* Preemption is enabled again by the ring ops. */
 		if (!clear_vram) {
-			emit_arb_clear(bb);
 			emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
 				 bo);
 		} else {
@@ -1285,9 +1274,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 				VM_SA_UPDATE_UNIT_SIZE;
 		}
 
-		/* Preemption is enabled again by the ring ops. */
-		emit_arb_clear(bb);
-
 		/* Map our PT's to gtt */
 		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates);
 		bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
@@ -1316,8 +1302,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		/* Preemption is enabled again by the ring ops. */
-		emit_arb_clear(bb);
 		for (i = 0; i < num_updates; i++)
 			write_pgtable(tile, bb, 0, &updates[i], pt_update);
 	}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 58676f4b989f3..59e0aa2d6a4c5 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -355,6 +355,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
 				seqno, dw, i);
 
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
+
 	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
 
 	/* XXX: Do we need this? Leaving for now. */
-- 
GitLab


From a667cf56dbd64e35f8f34ec47549888fa28878fb Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 25 Oct 2023 18:39:40 +0100
Subject: [PATCH 2022/2340] drm/xe/bo: consider dma-resv fences for clear job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There could be active fences already in the dma-resv for the object
prior to clearing. Make sure to input them as dependencies for the clear
job.

v2 (Matt B):
  - We can use USAGE_KERNEL here, since it's only the move fences we
    care about here. Also add a comment.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 67b71244b1f2e..53b5b36aca664 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -980,8 +980,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		size -= clear_L0;
 
-		/* TODO: Add dependencies here */
-
 		/* Preemption is enabled again by the ring ops. */
 		if (!clear_vram) {
 			emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
@@ -1010,6 +1008,18 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		}
 
 		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			/*
+			 * There can't be anything userspace related at this
+			 * point, so we just need to respect any potential move
+			 * fences, which are always tracked as
+			 * DMA_RESV_USAGE_KERNEL.
+			 */
+			err = job_add_deps(job, bo->ttm.base.resv,
+					   DMA_RESV_USAGE_KERNEL);
+			if (err)
+				goto err_job;
+		}
 
 		xe_sched_job_arm(job);
 		dma_fence_put(fence);
@@ -1024,6 +1034,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		xe_bb_free(bb, fence);
 		continue;
 
+err_job:
+		xe_sched_job_put(job);
 err:
 		mutex_unlock(&m->job_mutex);
 		xe_bb_free(bb, NULL);
-- 
GitLab


From 503a6f4e4f961acbbcac8d36f51226f3d3cfa7b7 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 25 Oct 2023 18:39:41 +0100
Subject: [PATCH 2023/2340] drm/xe/bo: sync kernel fences for KMD buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With things like pipelined evictions, VRAM pages can be marked as free
and yet still have some active kernel fences, with the idea that the
next caller to allocate the memory will respect them. However it looks
like we are missing synchronisation for KMD internal buffers, like
page-tables, lrc etc. For userspace objects we should already have the
required synchronisation for CPU access via the fault handler, and
likewise for GPU access when vm_binding them.

To fix this synchronise against any kernel fences for all KMD objects at
creation. This should resolve some severe corruption seen during
evictions.

v2 (Matt B):
  - Revamp the comment explaining this. Also mention why USAGE_KERNEL is
    correct here.
v3 (Thomas):
  - Make sure to use ctx.interruptible for the wait.

Testcase: igt@xe-evict-ccs
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/853
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/855
Reported-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Tested-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 1a10d9324a07e..4467d711aa1f5 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1269,6 +1269,37 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	if (err)
 		return ERR_PTR(err);
 
+	/*
+	 * The VRAM pages underneath are potentially still being accessed by the
+	 * GPU, as per async GPU clearing and async evictions. However TTM makes
+	 * sure to add any corresponding move/clear fences into the objects
+	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
+	 *
+	 * For KMD internal buffers we don't care about GPU clearing, however we
+	 * still need to handle async evictions, where the VRAM is still being
+	 * accessed by the GPU. Most internal callers are not expecting this,
+	 * since they are missing the required synchronisation before accessing
+	 * the memory. To keep things simple just sync wait any kernel fences
+	 * here, if the buffer is designated KMD internal.
+	 *
+	 * For normal userspace objects we should already have the required
+	 * pipelining or sync waiting elsewhere, since we already have to deal
+	 * with things like async GPU clearing.
+	 */
+	if (type == ttm_bo_type_kernel) {
+		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+						     DMA_RESV_USAGE_KERNEL,
+						     ctx.interruptible,
+						     MAX_SCHEDULE_TIMEOUT);
+
+		if (timeout < 0) {
+			if (!resv)
+				dma_resv_unlock(bo->ttm.base.resv);
+			xe_bo_put(bo);
+			return ERR_PTR(timeout);
+		}
+	}
+
 	bo->created = true;
 	if (bulk)
 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
-- 
GitLab


From b42ff0462d9eb7b84e31152c63c9809b6f743bf8 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 30 Oct 2023 17:26:16 +0530
Subject: [PATCH 2024/2340] drm/xe/hwmon: Add kernel doc and refactor xe hwmon

Add kernel doc and refactor some of the hwmon functions, there is
no functionality change.

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030115618.1382200-2-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hwmon.c | 201 +++++++++++++++-------------------
 1 file changed, 91 insertions(+), 110 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 9ac05994a967d..9b7773441f627 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -26,9 +26,9 @@ enum xe_hwmon_reg {
 };
 
 enum xe_hwmon_reg_operation {
-	REG_READ,
-	REG_WRITE,
-	REG_RMW,
+	REG_READ32,
+	REG_RMW32,
+	REG_READ64,
 };
 
 /*
@@ -39,18 +39,32 @@ enum xe_hwmon_reg_operation {
 #define SF_VOLTAGE	1000		/* millivolts */
 #define SF_ENERGY	1000000		/* microjoules */
 
+/**
+ * struct xe_hwmon_energy_info - to accumulate energy
+ */
 struct xe_hwmon_energy_info {
+	/** @reg_val_prev: previous energy reg val */
 	u32 reg_val_prev;
-	long accum_energy;		/* Accumulated energy for energy1_input */
+	/** @accum_energy: accumulated energy */
+	long accum_energy;
 };
 
+/**
+ * struct xe_hwmon - xe hwmon data structure
+ */
 struct xe_hwmon {
+	/** @hwmon_dev: hwmon device for xe */
 	struct device *hwmon_dev;
+	/** @gt: primary gt */
 	struct xe_gt *gt;
-	struct mutex hwmon_lock; /* rmw operations*/
+	/** @hwmon_lock: lock for rmw operations */
+	struct mutex hwmon_lock;
+	/** @scl_shift_power: pkg power unit */
 	int scl_shift_power;
+	/** @scl_shift_energy: pkg energy unit */
 	int scl_shift_energy;
-	struct xe_hwmon_energy_info ei;	/*  Energy info for energy1_input */
+	/** @ei: Energy info for energy1_input */
+	struct xe_hwmon_energy_info ei;
 };
 
 static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
@@ -95,49 +109,34 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
 	return reg.raw;
 }
 
-static int xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
-				enum xe_hwmon_reg_operation operation, u32 *value,
-				u32 clr, u32 set)
+static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
+				 enum xe_hwmon_reg_operation operation, u64 *value,
+				 u32 clr, u32 set)
 {
 	struct xe_reg reg;
 
 	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
 
 	if (!reg.raw)
-		return -EOPNOTSUPP;
+		return;
 
 	switch (operation) {
-	case REG_READ:
+	case REG_READ32:
 		*value = xe_mmio_read32(hwmon->gt, reg);
-		return 0;
-	case REG_WRITE:
-		xe_mmio_write32(hwmon->gt, reg, *value);
-		return 0;
-	case REG_RMW:
+		break;
+	case REG_RMW32:
 		*value = xe_mmio_rmw32(hwmon->gt, reg, clr, set);
-		return 0;
+		break;
+	case REG_READ64:
+		*value = xe_mmio_read64_2x32(hwmon->gt, reg);
+		break;
 	default:
 		drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n",
 			 operation);
-		return -EOPNOTSUPP;
+		break;
 	}
 }
 
-static int xe_hwmon_process_reg_read64(struct xe_hwmon *hwmon,
-				       enum xe_hwmon_reg hwmon_reg, u64 *value)
-{
-	struct xe_reg reg;
-
-	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
-
-	if (!reg.raw)
-		return -EOPNOTSUPP;
-
-	*value = xe_mmio_read64_2x32(hwmon->gt, reg);
-
-	return 0;
-}
-
 #define PL1_DISABLE 0
 
 /*
@@ -146,42 +145,39 @@ static int xe_hwmon_process_reg_read64(struct xe_hwmon *hwmon,
  * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
  * clamped values when read.
  */
-static int xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
 {
-	u32 reg_val;
-	u64 reg_val64, min, max;
+	u64 reg_val, min, max;
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, &reg_val, 0, 0);
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0);
 	/* Check if PL1 limit is disabled */
 	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
 		*value = PL1_DISABLE;
-		return 0;
+		return;
 	}
 
 	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
 	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
 
-	xe_hwmon_process_reg_read64(hwmon, REG_PKG_POWER_SKU, &reg_val64);
-	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val64);
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0);
+	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
 	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
-	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val64);
+	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
 	max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
 
 	if (min && max)
 		*value = clamp_t(u64, *value, min, max);
-
-	return 0;
 }
 
 static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 {
-	u32 reg_val;
+	u64 reg_val;
 
 	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
 	if (value == PL1_DISABLE) {
-		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, &reg_val,
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
 				     PKG_PWR_LIM_1_EN, 0);
-		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, &reg_val,
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
 				     PKG_PWR_LIM_1_EN, 0);
 
 		if (reg_val & PKG_PWR_LIM_1_EN)
@@ -192,21 +188,19 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 	reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
 	reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, &reg_val,
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
 			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
 
 	return 0;
 }
 
-static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
 {
-	u32 reg_val;
+	u64 reg_val;
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ, &reg_val, 0, 0);
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0);
 	reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
 	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
-
-	return 0;
 }
 
 /*
@@ -233,13 +227,11 @@ static void
 xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
 {
 	struct xe_hwmon_energy_info *ei = &hwmon->ei;
-	u32 reg_val;
-
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	u64 reg_val;
 
 	mutex_lock(&hwmon->hwmon_lock);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ,
+	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
 			     &reg_val, 0, 0);
 
 	if (reg_val >= ei->reg_val_prev)
@@ -253,8 +245,6 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
 				  hwmon->scl_shift_energy);
 
 	mutex_unlock(&hwmon->hwmon_lock);
-
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
 }
 
 static const struct hwmon_channel_info *hwmon_info[] = {
@@ -284,16 +274,39 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
 			      uval);
 }
 
-static int xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor)
 {
-	u32 reg_val;
+	int ret;
+	u32 uval;
+
+	ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
+	if (ret)
+		return ret;
+
+	*value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+				 scale_factor, POWER_SETUP_I1_SHIFT);
+	return ret;
+}
+
+static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor)
+{
+	int ret;
+	u32 uval;
+
+	uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
+	ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval);
+
+	return ret;
+}
+
+static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val;
 
 	xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
-			     REG_READ, &reg_val, 0, 0);
+			     REG_READ32, &reg_val, 0, 0);
 	/* HW register value in units of 2.5 millivolt */
 	*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
-
-	return 0;
 }
 
 static umode_t
@@ -317,23 +330,15 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
 static int
 xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
 {
-	int ret;
-	u32 uval;
-
 	switch (attr) {
 	case hwmon_power_max:
-		return xe_hwmon_power_max_read(hwmon, val);
+		xe_hwmon_power_max_read(hwmon, val);
+		return 0;
 	case hwmon_power_rated_max:
-		return xe_hwmon_power_rated_max_read(hwmon, val);
-	case hwmon_power_crit:
-		ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
-		if (ret)
-			return ret;
-		if (!(uval & POWER_SETUP_I1_WATTS))
-			return -ENODEV;
-		*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
-				       SF_POWER, POWER_SETUP_I1_SHIFT);
+		xe_hwmon_power_rated_max_read(hwmon, val);
 		return 0;
+	case hwmon_power_crit:
+		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -342,14 +347,11 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
 static int
 xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
 {
-	u32 uval;
-
 	switch (attr) {
 	case hwmon_power_max:
 		return xe_hwmon_power_max_write(hwmon, val);
 	case hwmon_power_crit:
-		uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER);
-		return xe_hwmon_pcode_write_i1(hwmon->gt, uval);
+		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -372,19 +374,9 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr)
 static int
 xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
 {
-	int ret;
-	u32 uval;
-
 	switch (attr) {
 	case hwmon_curr_crit:
-		ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
-		if (ret)
-			return ret;
-		if (uval & POWER_SETUP_I1_WATTS)
-			return -ENODEV;
-		*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
-				       SF_CURR, POWER_SETUP_I1_SHIFT);
-		return 0;
+		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -393,12 +385,9 @@ xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
 static int
 xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
 {
-	u32 uval;
-
 	switch (attr) {
 	case hwmon_curr_crit:
-		uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR);
-		return xe_hwmon_pcode_write_i1(hwmon->gt, uval);
+		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -418,21 +407,13 @@ xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr)
 static int
 xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
 {
-	int ret;
-
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
-
 	switch (attr) {
 	case hwmon_in_input:
-		ret = xe_hwmon_get_voltage(hwmon, val);
-		break;
+		xe_hwmon_get_voltage(hwmon, val);
+		return 0;
 	default:
-		ret = -EOPNOTSUPP;
+		return -EOPNOTSUPP;
 	}
-
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
-
-	return ret;
 }
 
 static umode_t
@@ -564,15 +545,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 {
 	struct xe_hwmon *hwmon = xe->hwmon;
 	long energy;
-	u32 val_sku_unit = 0;
-	int ret;
+	u64 val_sku_unit = 0;
 
-	ret = xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, REG_READ, &val_sku_unit, 0, 0);
 	/*
 	 * The contents of register PKG_POWER_SKU_UNIT do not change,
 	 * so read it once and store the shift values.
 	 */
-	if (!ret) {
+	if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) {
+		xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
+				     REG_READ32, &val_sku_unit, 0, 0);
 		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
 		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
 	}
-- 
GitLab


From fef6dd12b45a1a15c24c9df30fb2c27e68984665 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 30 Oct 2023 17:26:17 +0530
Subject: [PATCH 2025/2340] drm/xe/hwmon: Protect hwmon rw attributes with
 hwmon_lock

Take hwmon_lock while accessing hwmon rw attributes. For readonly
attributes its not required to take lock as reads are protected
by sysfs layer and therefore sequential.

Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030115618.1382200-3-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hwmon.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 9b7773441f627..7b4f88fdf647a 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -57,7 +57,7 @@ struct xe_hwmon {
 	struct device *hwmon_dev;
 	/** @gt: primary gt */
 	struct xe_gt *gt;
-	/** @hwmon_lock: lock for rmw operations */
+	/** @hwmon_lock: lock for rw attributes*/
 	struct mutex hwmon_lock;
 	/** @scl_shift_power: pkg power unit */
 	int scl_shift_power;
@@ -149,11 +149,13 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
 {
 	u64 reg_val, min, max;
 
+	mutex_lock(&hwmon->hwmon_lock);
+
 	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0);
 	/* Check if PL1 limit is disabled */
 	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
 		*value = PL1_DISABLE;
-		return;
+		goto unlock;
 	}
 
 	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
@@ -167,12 +169,17 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
 
 	if (min && max)
 		*value = clamp_t(u64, *value, min, max);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
 }
 
 static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 {
+	int ret = 0;
 	u64 reg_val;
 
+	mutex_lock(&hwmon->hwmon_lock);
+
 	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
 	if (value == PL1_DISABLE) {
 		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
@@ -180,8 +187,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
 				     PKG_PWR_LIM_1_EN, 0);
 
-		if (reg_val & PKG_PWR_LIM_1_EN)
-			return -EOPNOTSUPP;
+		if (reg_val & PKG_PWR_LIM_1_EN) {
+			ret = -EOPNOTSUPP;
+			goto unlock;
+		}
 	}
 
 	/* Computation in 64-bits to avoid overflow. Round to nearest. */
@@ -190,8 +199,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 
 	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
 			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
-
-	return 0;
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
 }
 
 static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
@@ -229,8 +239,6 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
 	struct xe_hwmon_energy_info *ei = &hwmon->ei;
 	u64 reg_val;
 
-	mutex_lock(&hwmon->hwmon_lock);
-
 	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
 			     &reg_val, 0, 0);
 
@@ -243,8 +251,6 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
 
 	*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
 				  hwmon->scl_shift_energy);
-
-	mutex_unlock(&hwmon->hwmon_lock);
 }
 
 static const struct hwmon_channel_info *hwmon_info[] = {
@@ -279,12 +285,16 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u3
 	int ret;
 	u32 uval;
 
+	mutex_lock(&hwmon->hwmon_lock);
+
 	ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
 	if (ret)
-		return ret;
+		goto unlock;
 
 	*value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
 				 scale_factor, POWER_SETUP_I1_SHIFT);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
 	return ret;
 }
 
@@ -293,9 +303,12 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u3
 	int ret;
 	u32 uval;
 
+	mutex_lock(&hwmon->hwmon_lock);
+
 	uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
 	ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval);
 
+	mutex_unlock(&hwmon->hwmon_lock);
 	return ret;
 }
 
-- 
GitLab


From 4446fcf220ceab4f6d0cc4ae3b1338a0ceeeb72e Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Mon, 30 Oct 2023 17:26:18 +0530
Subject: [PATCH 2026/2340] drm/xe/hwmon: Expose power1_max_interval

Expose power1_max_interval, that is the tau corresponding to PL1, as a
custom hwmon attribute. Some bit manipulation is needed because of the
format of PKG_PWR_LIM_1_TIME in
PACKAGE_RAPL_LIMIT register (1.x * power(2,y))

v2: Get rpm wake ref while accessing power1_max_interval
v3: %s/hwmon/xe_hwmon/
v4:
 - As power1_max_interval is rw attr take lock in read function as well
 - Refine comment about val to fix point conversion (Andi)
 - Update kernel version and date in doc
v5: Fix review comments (Anshuman)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030115618.1382200-4-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../ABI/testing/sysfs-driver-intel-xe-hwmon   |   9 ++
 drivers/gpu/drm/xe/regs/xe_mchbar_regs.h      |   8 +
 drivers/gpu/drm/xe/xe_hwmon.c                 | 153 +++++++++++++++++-
 3 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 1a7a6c23e1417..8c321bc9dc044 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -59,3 +59,12 @@ Contact:	intel-xe@lists.freedesktop.org
 Description:	RO. Energy input of device in microjoules.
 
 		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+Date:		October 2023
+KernelVersion:	6.6
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Sustained power limit interval (Tau in PL1/Tau) in
+		milliseconds over which sustained power is averaged.
+
+		Only supported for particular Intel xe graphics platforms.
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
index d8ecbe1858d12..519dd1067a198 100644
--- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -22,15 +22,23 @@
 #define   PKG_TDP				GENMASK_ULL(14, 0)
 #define   PKG_MIN_PWR				GENMASK_ULL(30, 16)
 #define   PKG_MAX_PWR				GENMASK_ULL(46, 32)
+#define   PKG_MAX_WIN				GENMASK_ULL(54, 48)
+#define     PKG_MAX_WIN_X			GENMASK_ULL(54, 53)
+#define     PKG_MAX_WIN_Y			GENMASK_ULL(52, 48)
+
 
 #define PCU_CR_PACKAGE_POWER_SKU_UNIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938)
 #define   PKG_PWR_UNIT				REG_GENMASK(3, 0)
 #define   PKG_ENERGY_UNIT			REG_GENMASK(12, 8)
+#define   PKG_TIME_UNIT				REG_GENMASK(19, 16)
 
 #define PCU_CR_PACKAGE_ENERGY_STATUS		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c)
 
 #define PCU_CR_PACKAGE_RAPL_LIMIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
 #define   PKG_PWR_LIM_1				REG_GENMASK(14, 0)
 #define   PKG_PWR_LIM_1_EN			REG_BIT(15)
+#define   PKG_PWR_LIM_1_TIME			REG_GENMASK(23, 17)
+#define   PKG_PWR_LIM_1_TIME_X			REG_GENMASK(23, 22)
+#define   PKG_PWR_LIM_1_TIME_Y			REG_GENMASK(21, 17)
 
 #endif /* _XE_MCHBAR_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 7b4f88fdf647a..6ef2aa1eae8b0 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -38,6 +38,7 @@ enum xe_hwmon_reg_operation {
 #define SF_CURR		1000		/* milliamperes */
 #define SF_VOLTAGE	1000		/* millivolts */
 #define SF_ENERGY	1000000		/* microjoules */
+#define SF_TIME		1000		/* milliseconds */
 
 /**
  * struct xe_hwmon_energy_info - to accumulate energy
@@ -63,6 +64,8 @@ struct xe_hwmon {
 	int scl_shift_power;
 	/** @scl_shift_energy: pkg energy unit */
 	int scl_shift_energy;
+	/** @scl_shift_time: pkg time unit */
+	int scl_shift_time;
 	/** @ei: Energy info for energy1_input */
 	struct xe_hwmon_energy_info ei;
 };
@@ -253,6 +256,152 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
 				  hwmon->scl_shift_energy);
 }
 
+static ssize_t
+xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+				  char *buf)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	u32 x, y, x_w = 2; /* 2 bits */
+	u64 r, tau4, out;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT,
+			     REG_READ32, &r, 0, 0);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+	y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+
+	/*
+	 * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+	 *     = (4 | x) << (y - 2)
+	 *
+	 * Here (y - 2) ensures a 1.x fixed point representation of 1.x
+	 * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75
+	 *
+	 * As y can be < 2, we compute tau4 = (4 | x) << y
+	 * and then add 2 when doing the final right shift to account for units
+	 */
+	tau4 = ((1 << x_w) | x) << y;
+
+	/* val in hwmon interface units (millisec) */
+	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+	return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	u32 x, y, rxy, x_w = 2; /* 2 bits */
+	u64 tau4, r, max_win;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12.
+	 * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds.
+	 *
+	 * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register.
+	 * However, it is observed that existing discrete GPUs does not provide correct
+	 * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs
+	 * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU.
+	 */
+#define PKG_MAX_WIN_DEFAULT 0x12ull
+
+	/*
+	 * val must be < max in hwmon interface units. The steps below are
+	 * explained in xe_hwmon_power1_max_interval_show()
+	 */
+	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
+	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
+	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
+	tau4 = ((1 << x_w) | x) << y;
+	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+	if (val > max_win)
+		return -EINVAL;
+
+	/* val in hw units */
+	val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+
+	/*
+	 * Convert val to 1.x * power(2,y)
+	 * y = ilog2(val)
+	 * x = (val - (1 << y)) >> (y - 2)
+	 */
+	if (!val) {
+		y = 0;
+		x = 0;
+	} else {
+		y = ilog2(val);
+		x = (val - (1ul << y)) << x_w >> y;
+	}
+
+	rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r,
+			     PKG_PWR_LIM_1_TIME, rxy);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return count;
+}
+
+static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
+			  xe_hwmon_power1_max_interval_show,
+			  xe_hwmon_power1_max_interval_store, 0);
+
+static struct attribute *hwmon_attributes[] = {
+	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+	NULL
+};
+
+static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
+					   struct attribute *attr, int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret = 0;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+		ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0;
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static const struct attribute_group hwmon_attrgroup = {
+	.attrs = hwmon_attributes,
+	.is_visible = xe_hwmon_attributes_visible,
+};
+
+static const struct attribute_group *hwmon_groups[] = {
+	&hwmon_attrgroup,
+	NULL
+};
+
 static const struct hwmon_channel_info *hwmon_info[] = {
 	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
 	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
@@ -569,6 +718,7 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 				     REG_READ32, &val_sku_unit, 0, 0);
 		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
 		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+		hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
 	}
 
 	/*
@@ -615,7 +765,8 @@ void xe_hwmon_register(struct xe_device *xe)
 	/*  hwmon_dev points to device hwmon<i> */
 	hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon,
 								&hwmon_chip_info,
-								NULL);
+								hwmon_groups);
+
 	if (IS_ERR(hwmon->hwmon_dev)) {
 		drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
 		xe->hwmon = NULL;
-- 
GitLab


From 4e002016a1e5b5d0b29191a82d4f561f175f3d33 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Mon, 25 Sep 2023 14:02:32 -0700
Subject: [PATCH 2027/2340] drm/xe: Replace xe_ttm_vram_mgr.tile with
 xe_mem_region

Replace the xe_ttm_vram_mgr.tile pointer with a xe_mem_region pointer
instead. The former is currently unused.
TTM VRAM regions are exposing device vram and is better to store a pointer
directly to the xe_mem_region instead of the tile. This allows to cleanup
unnecessary usage of xe_tile from xe_bo.c in later patch.

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c       | 7 +++----
 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 285791eb4a796..953e5dc0fd808 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -358,12 +358,11 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
 int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr)
 {
 	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_mem_region *vram = &tile->mem.vram;
 
-	mgr->tile = tile;
-
+	mgr->vram = vram;
 	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id,
-				      tile->mem.vram.usable_size,
-				      tile->mem.vram.io_size,
+				      vram->usable_size, vram->io_size,
 				      PAGE_SIZE);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 48bb991c14a56..2d75cf1262893 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -9,7 +9,7 @@
 #include <drm/drm_buddy.h>
 #include <drm/ttm/ttm_device.h>
 
-struct xe_tile;
+struct xe_mem_region;
 
 /**
  * struct xe_ttm_vram_mgr - XE TTM VRAM manager
@@ -17,12 +17,12 @@ struct xe_tile;
  * Manages placement of TTM resource in VRAM.
  */
 struct xe_ttm_vram_mgr {
-	/** @tile: Tile which the VRAM belongs to */
-	struct xe_tile *tile;
 	/** @manager: Base TTM resource manager */
 	struct ttm_resource_manager manager;
 	/** @mm: DRM buddy allocator which manages the VRAM */
 	struct drm_buddy mm;
+	/** @vram: ptr to details of associated VRAM region */
+	struct xe_mem_region *vram;
 	/** @visible_size: Proped size of the CPU visible portion */
 	u64 visible_size;
 	/** @visible_avail: CPU visible portion still unallocated */
-- 
GitLab


From 4e11a1411ab41416be7f29716a767eb135f7aa74 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Tue, 26 Sep 2023 12:10:40 -0700
Subject: [PATCH 2028/2340] drm/xe: Remove unused xe_bo_to_tile

Unused and would like to remove the memtype_to_tile() which it calls.

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 13 -------------
 drivers/gpu/drm/xe/xe_bo.h |  2 --
 2 files changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4467d711aa1f5..bdd23090c0ade 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -109,19 +109,6 @@ mem_type_to_tile(struct xe_device *xe, u32 mem_type)
 	return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
 }
 
-/**
- * xe_bo_to_tile() - Get a tile from a BO's memory location
- * @bo: The buffer object
- *
- * Get a tile from a BO's memory location, should be called on BOs in VRAM only.
- *
- * Return: xe_tile object which is closest to the BO
- */
-struct xe_tile *xe_bo_to_tile(struct xe_bo *bo)
-{
-	return mem_type_to_tile(xe_bo_device(bo), bo->ttm.resource->mem_type);
-}
-
 static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
 			   u32 bo_flags, u32 *c)
 {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 3f4e2818f92c5..6283e27bc4251 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -122,8 +122,6 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 			      u32 bo_flags);
 
-struct xe_tile *xe_bo_to_tile(struct xe_bo *bo);
-
 static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
 {
 	return container_of(bo, struct xe_bo, ttm);
-- 
GitLab


From fd0975b7cfee7d3e6db6771193b0cff230b7eec8 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Mon, 25 Sep 2023 17:12:48 -0700
Subject: [PATCH 2029/2340] drm/xe: Replace usage of mem_type_to_tile

Currently mem_type_to_tile() is being used to access the tile's underlying
tile.mem.vram. However, this function makes the assumption that a mem_type
will only ever map to a single tile. Now that the TTM vram manager contains
a pointer to the memory_region, make use of this in xe_bo.c.

As such, introduce a helper function res_to_mem_region() to get the
ttm_vram_mgr->vram from the BO's resource, and use this to replace usage
of mem_type_to_tile().

xe_tile is still needed to choose the migration context, so this part is
unchanged. But as this is only renaming usage, function is renamed now to
mem_type_to_migrate().

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 63 +++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index bdd23090c0ade..b96d1e7b9badb 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -101,12 +101,24 @@ static bool xe_bo_is_user(struct xe_bo *bo)
 	return bo->flags & XE_BO_CREATE_USER_BIT;
 }
 
-static struct xe_tile *
-mem_type_to_tile(struct xe_device *xe, u32 mem_type)
+static struct xe_migrate *
+mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
 {
+	struct xe_tile *tile;
+
 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
+	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
+	return tile->migrate;
+}
+
+static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
+{
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+	struct ttm_resource_manager *mgr;
 
-	return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
+	xe_assert(xe, resource_is_vram(res));
+	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
+	return to_xe_ttm_vram_mgr(mgr)->vram;
 }
 
 static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
@@ -126,11 +138,13 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
 {
-	struct xe_tile *tile = mem_type_to_tile(xe, mem_type);
 	struct ttm_place place = { .mem_type = mem_type };
-	u64 io_size = tile->mem.vram.io_size;
+	struct xe_mem_region *vram;
+	u64 io_size;
 
-	xe_assert(xe, tile->mem.vram.usable_size);
+	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
+	xe_assert(xe, vram && vram->usable_size);
+	io_size = vram->io_size;
 
 	/*
 	 * For eviction / restore on suspend / resume objects
@@ -140,7 +154,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 			XE_BO_CREATE_GGTT_BIT))
 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
 
-	if (io_size < tile->mem.vram.usable_size) {
+	if (io_size < vram->usable_size) {
 		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
 			place.fpfn = 0;
 			place.lpfn = io_size >> PAGE_SHIFT;
@@ -404,21 +418,21 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 		return 0;
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1: {
-		struct xe_tile *tile = mem_type_to_tile(xe, mem->mem_type);
 		struct xe_ttm_vram_mgr_resource *vres =
 			to_xe_ttm_vram_mgr_resource(mem);
+		struct xe_mem_region *vram = res_to_mem_region(mem);
 
 		if (vres->used_visible_size < mem->size)
 			return -EINVAL;
 
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 
-		if (tile->mem.vram.mapping &&
+		if (vram->mapping &&
 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-			mem->bus.addr = (u8 *)tile->mem.vram.mapping +
+			mem->bus.addr = (u8 *)vram->mapping +
 				mem->bus.offset;
 
-		mem->bus.offset += tile->mem.vram.io_start;
+		mem->bus.offset += vram->io_start;
 		mem->bus.is_iomem = true;
 
 #if  !defined(CONFIG_X86)
@@ -614,7 +628,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	struct ttm_resource *old_mem = ttm_bo->resource;
 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 	struct ttm_tt *ttm = ttm_bo->ttm;
-	struct xe_tile *tile = NULL;
+	struct xe_migrate *migrate = NULL;
 	struct dma_fence *fence;
 	bool move_lacks_source;
 	bool tt_has_data;
@@ -692,14 +706,13 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	}
 
 	if (bo->tile)
-		tile = bo->tile;
+		migrate = bo->tile->migrate;
 	else if (resource_is_vram(new_mem))
-		tile = mem_type_to_tile(xe, new_mem->mem_type);
+		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
 	else if (mem_type_is_vram(old_mem_type))
-		tile = mem_type_to_tile(xe, old_mem_type);
+		migrate = mem_type_to_migrate(xe, old_mem_type);
 
-	xe_assert(xe, tile);
-	xe_tile_assert(tile, tile->migrate);
+	xe_assert(xe, migrate);
 
 	trace_xe_bo_move(bo);
 	xe_device_mem_access_get(xe);
@@ -720,7 +733,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 			/* Create a new VMAP once kernel BO back in VRAM */
 			if (!ret && resource_is_vram(new_mem)) {
-				void *new_addr = tile->mem.vram.mapping +
+				struct xe_mem_region *vram = res_to_mem_region(new_mem);
+				void *new_addr = vram->mapping +
 					(new_mem->start << PAGE_SHIFT);
 
 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
@@ -737,9 +751,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		}
 	} else {
 		if (move_lacks_source)
-			fence = xe_migrate_clear(tile->migrate, bo, new_mem);
+			fence = xe_migrate_clear(migrate, bo, new_mem);
 		else
-			fence = xe_migrate_copy(tile->migrate,
+			fence = xe_migrate_copy(migrate,
 						bo, bo, old_mem, new_mem);
 		if (IS_ERR(fence)) {
 			ret = PTR_ERR(fence);
@@ -908,16 +922,16 @@ err_res_free:
 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 				       unsigned long page_offset)
 {
-	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
-	struct xe_tile *tile = mem_type_to_tile(xe, ttm_bo->resource->mem_type);
 	struct xe_res_cursor cursor;
+	struct xe_mem_region *vram;
 
 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
 
+	vram = res_to_mem_region(ttm_bo->resource);
 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
-	return (tile->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
+	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
 }
 
 static void __xe_bo_vunmap(struct xe_bo *bo);
@@ -1492,12 +1506,11 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
-	struct xe_tile *tile = mem_type_to_tile(xe, res->mem_type);
 
 	if (res->mem_type == XE_PL_STOLEN)
 		return xe_ttm_stolen_gpu_offset(xe);
 
-	return tile->mem.vram.dpa_base;
+	return res_to_mem_region(res)->dpa_base;
 }
 
 /**
-- 
GitLab


From ebb00b285bef8bcdc46ac4e344d5748539bdd213 Mon Sep 17 00:00:00 2001
From: Pallavi Mishra <pallavi.mishra@intel.com>
Date: Wed, 1 Nov 2023 04:38:38 +0530
Subject: [PATCH 2030/2340] drm/xe: Dump CTB during TLB timeout

Print CTB info during TLB invalidation
timeout event.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index bd6005b9d4986..b5c39c55e1fa0 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -319,6 +319,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_guc *guc = &gt->uc.guc;
+	struct drm_printer p = drm_err_printer(__func__);
 	int ret;
 
 	/*
@@ -331,6 +332,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 	if (!ret) {
 		drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
 			gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
+		xe_guc_ct_print(&guc->ct, &p, true);
 		return -ETIME;
 	}
 
-- 
GitLab


From 81d11b9d6625d3c2a9ecf68f41f3575e653c0ac7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 31 Oct 2023 11:46:45 -0700
Subject: [PATCH 2031/2340] drm/xe: Adjust tile_present mask when skipping
 rebinds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a rebind is skipped the tile_present mask needs to be updated for the
newly created vma to properly reflect the state of the vma.

Reported-by: <christoph.manszewski@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 05f8c691f5fb2..7aefa6aa66a15 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2340,6 +2340,10 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 			op->flags |= XE_VMA_OP_COMMITTED;
 		break;
 	case DRM_GPUVA_OP_REMAP:
+	{
+		u8 tile_present =
+			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
+
 		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
 				 true);
 		op->flags |= XE_VMA_OP_COMMITTED;
@@ -2348,15 +2352,21 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 			err |= xe_vm_insert_vma(vm, op->remap.prev);
 			if (!err)
 				op->flags |= XE_VMA_OP_PREV_COMMITTED;
-			if (!err && op->remap.skip_prev)
+			if (!err && op->remap.skip_prev) {
+				op->remap.prev->tile_present =
+					tile_present;
 				op->remap.prev = NULL;
+			}
 		}
 		if (op->remap.next) {
 			err |= xe_vm_insert_vma(vm, op->remap.next);
 			if (!err)
 				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
-			if (!err && op->remap.skip_next)
+			if (!err && op->remap.skip_next) {
+				op->remap.next->tile_present =
+					tile_present;
 				op->remap.next = NULL;
+			}
 		}
 
 		/* Adjust for partial unbind after removin VMA from VM */
@@ -2365,6 +2375,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 			op->base.remap.unmap->va->va.range = op->remap.range;
 		}
 		break;
+	}
 	case DRM_GPUVA_OP_UNMAP:
 		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
 		op->flags |= XE_VMA_OP_COMMITTED;
-- 
GitLab


From 74a6c6438ee7b53e7711fc0b7000ed42edd7dad5 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Tue, 31 Oct 2023 14:12:16 -0700
Subject: [PATCH 2032/2340] drm/xe: Fix dequeue of access counter work item

The access counters worker function is fixed to advance the head pointer
when dequeuing from the acc_queue.  This now matches the similar logic in
get_pagefault().

Signed-off-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 4e33ef8c9d6ac..018edf731ae5e 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -542,6 +542,8 @@ unlock_vm:
 
 #define make_u64(hi__, low__)  ((u64)(hi__) << 32 | (u64)(low__))
 
+#define ACC_MSG_LEN_DW        4
+
 static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
 {
 	const struct xe_guc_acc_desc *desc;
@@ -562,6 +564,9 @@ static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
 		acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
 		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
 					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+
+		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
+				  ACC_QUEUE_NUM_DW;
 	} else {
 		ret = -1;
 	}
@@ -589,8 +594,6 @@ static void acc_queue_work_func(struct work_struct *w)
 	}
 }
 
-#define ACC_MSG_LEN_DW	4
-
 static bool acc_queue_full(struct acc_queue *acc_queue)
 {
 	lockdep_assert_held(&acc_queue->lock);
-- 
GitLab


From 4d5252b4ca1dc973b8b368c88f9d1e348f9c1906 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 31 Oct 2023 07:05:37 -0700
Subject: [PATCH 2033/2340] drm/xe/xe2: Program correct MOCS registers

The LNCFCMOCS registers no longer exist on Xe2 so there's no need to
attempt to program them.  Since GLOB_MOCS is the only set of MOCS
registers now, it's expected to be used for all platforms (both igpu and
dgpu) going forward, so adjust the MOCS programming flags accordingly.

v2:
 - Fix typo (global mocs condition is >=, not >)

Bspec: 71582
Reviewed-by: Pallavi Mishra <pallavi.mishra@intel.com>
Link: https://lore.kernel.org/r/20231031140536.303746-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 21972bbef8fd7..46e9992578629 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -26,6 +26,7 @@ static inline void mocs_dbg(const struct drm_device *dev,
 
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
+	HAS_LNCF_MOCS = BIT(1),
 };
 
 struct xe_mocs_entry {
@@ -473,8 +474,10 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		return 0;
 	}
 
-	if (!IS_DGFX(xe))
+	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20)
 		flags |= HAS_GLOBAL_MOCS;
+	if (GRAPHICS_VER(xe) < 20)
+		flags |= HAS_LNCF_MOCS;
 
 	return flags;
 }
@@ -505,7 +508,7 @@ static void __init_mocs_table(struct xe_gt *gt,
 	for (i = 0;
 	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i,
+		mocs_dbg(&gt_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
 			 XELP_GLOBAL_MOCS(i).addr, mocs);
 
 		if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250)
@@ -545,7 +548,7 @@ static void init_l3cc_table(struct xe_gt *gt,
 	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
 				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
 	     i++) {
-		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
+		mocs_dbg(&gt_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
 			 l3cc);
 
 		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
@@ -570,19 +573,18 @@ void xe_mocs_init(struct xe_gt *gt)
 	unsigned int flags;
 
 	/*
-	 * LLC and eDRAM control values are not applicable to dgfx
+	 * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS"
+	 * registers depending on platform.
+	 *
+	 * These registers should be programmed before GuC initialization
+	 * since their values will affect some of the memory transactions
+	 * performed by the GuC.
 	 */
 	flags = get_mocs_settings(gt_to_xe(gt), &table);
 	mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
 
 	if (flags & HAS_GLOBAL_MOCS)
 		__init_mocs_table(gt, &table);
-
-	/*
-	 * Initialize the L3CC table as part of mocs initalization to make
-	 * sure the LNCFCMOCSx registers are programmed for the subsequent
-	 * memory transactions including guc transactions
-	 */
-	if (table.table)
+	if (flags & HAS_LNCF_MOCS)
 		init_l3cc_table(gt, &table);
 }
-- 
GitLab


From effc560d7a36b8c59219dd5374d9725a9edd85c4 Mon Sep 17 00:00:00 2001
From: Badal Nilawar <badal.nilawar@intel.com>
Date: Wed, 1 Nov 2023 22:02:12 +0530
Subject: [PATCH 2034/2340] drm/xe/mtl: Use 16.67 Mhz freq scale factor to get
 rpX

For mtl and above 16.67 Mhz is the scale factor to calculate
rpX frequencies.

v2: Fix review comment (Ashutosh)

Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231101163212.1629685-1-badal.nilawar@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 74247e0d36742..020c6597cd78f 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -313,7 +313,7 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
 	else
 		reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
 
-	pc->rpe_freq = REG_FIELD_GET(MTL_RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+	pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
 }
 
 static void tgl_update_rpe_value(struct xe_guc_pc *pc)
@@ -653,10 +653,10 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
 		reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
 	else
 		reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
-	pc->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, reg) *
-		GT_FREQUENCY_MULTIPLIER;
-	pc->rpn_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, reg) *
-		GT_FREQUENCY_MULTIPLIER;
+
+	pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg));
+
+	pc->rpn_freq = decode_freq(REG_FIELD_GET(MTL_RPN_CAP_MASK, reg));
 }
 
 static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
-- 
GitLab


From 5d30cfe003a98d2f4ad28fe27226f3f2e6784c65 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 2 Nov 2023 05:48:55 -0700
Subject: [PATCH 2035/2340] drm/xe: Add Wa_14019821291

This workaround is primarily implemented by the BIOS.  However if the
BIOS applies the workaround it will reserve a small piece of our DSM
(which should be at the top, right below the WOPCM); we just need to
keep that region reserved so that nothing else attempts to re-use it.

v2 (Gustavo):
  - Check for NULL media_gt
  - Mask bits [5:0] to avoid potential issues in future platforms

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231102124855.1940491-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile            |  2 +-
 drivers/gpu/drm/xe/regs/xe_gt_regs.h   |  2 ++
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_wa_oob.rules     |  1 +
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 32eee57b41846..1d39784e92fd9 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o $(obj)/xe_ttm_stolen_mgr.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 7a6407e38265a..902c60543de07 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -158,6 +158,8 @@
 #define XEHP_SQCM				XE_REG_MCR(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
+#define GSCPSMI_BASE				XE_REG(0x880c)
+
 #define	MIRROR_FUSE3				XE_REG(0x9118)
 #define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
 #define   L3BANK_PAIR_COUNT			4
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 79fbd74a39446..0c533d36791d9 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -11,6 +11,8 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_range_manager.h>
 
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
@@ -19,6 +21,7 @@
 #include "xe_res_cursor.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
 
 struct xe_ttm_stolen_mgr {
 	struct xe_ttm_vram_mgr base;
@@ -112,6 +115,7 @@ static u32 get_wopcm_size(struct xe_device *xe)
 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
 	u32 stolen_size, wopcm_size;
 	u32 ggc, gms;
 
@@ -154,6 +158,27 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 
 	stolen_size -= wopcm_size;
 
+	if (media_gt && XE_WA(media_gt, 14019821291)) {
+		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
+			& ~GENMASK_ULL(5, 0);
+
+		/*
+		 * This workaround is primarily implemented by the BIOS.  We
+		 * just need to figure out whether the BIOS has applied the
+		 * workaround (meaning the programmed address falls within
+		 * the DSM) and, if so, reserve that part of the DSM to
+		 * prevent accidental reuse.  The DSM location should be just
+		 * below the WOPCM.
+		 */
+		if (gscpsmi_base >= mgr->io_base &&
+		    gscpsmi_base < mgr->io_base + stolen_size) {
+			xe_gt_dbg(media_gt,
+				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
+				  mgr->io_base + stolen_size - gscpsmi_base);
+			stolen_size = gscpsmi_base - mgr->io_base;
+		}
+	}
+
 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index f3ff774dc4aa6..752842d734be0 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -19,3 +19,4 @@
 		SUBPLATFORM(DG2, G12)
 16017236439	PLATFORM(PVC)
 22010954014	PLATFORM(DG2)
+14019821291	MEDIA_VERSION_RANGE(1300, 2000)
-- 
GitLab


From 27a1a1e2e47d6f12a784413c194a94b6c0d7fdcb Mon Sep 17 00:00:00 2001
From: Carlos Santa <carlos.santa@intel.com>
Date: Thu, 26 Oct 2023 15:01:27 -0700
Subject: [PATCH 2036/2340] drm/xe: stringify the argument to avoid potential
 vulnerability

This error gets printed inside a sandbox with warnings turned on.

/mnt/host/source/src/third_party/kernel/v5.15/drivers/
gpu/drm/xe/xe_gt_idle_sysfs.c:87:26: error: format string is
not a string literal (potentially insecure) [-Werror,-Wformat-security]
        return sysfs_emit(buff, gtidle->name);
                                ^~~~~~~~~~~~
/mnt/host/source/src/third_party/kernel/v5.15/drivers/
gpu/drm/xe/xe_gt_idle_sysfs.c:87:26: note: treat the string
as an argument to avoid this
        return sysfs_emit(buff, gtidle->name);
                                ^
                                "%s",
1 error generated.

CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Carlos Santa <carlos.santa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
index 7238e96a116cf..8df9840811cda 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
@@ -85,7 +85,7 @@ static ssize_t name_show(struct device *dev,
 {
 	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
 
-	return sysfs_emit(buff, gtidle->name);
+	return sysfs_emit(buff, "%s\n", gtidle->name);
 }
 static DEVICE_ATTR_RO(name);
 
-- 
GitLab


From 571622740288f801042a28598440a098249213fa Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Fri, 27 Oct 2023 11:42:55 +0200
Subject: [PATCH 2037/2340] drm/xe: implement driver initiated function-reset

Driver initiated function-reset (FLR) is the highest level of reset
that we can trigger from within the driver. In contrast to PCI FLR it
doesn't require re-enumeration of PCI BAR. It can be useful in case
GT fails to reset. It is also the only way to trigger GSC reset from
the driver and can be used in future addition of GSC support.

v2:
  - use regs from xe_regs.h
  - move the flag to xe.mmio
  - call flr only on root gt
  - use BIOS protection check
  - copy/paste comments from i915
v3:
  - flr code moved to xe_device.c
v4:
  - needs_flr_on_fini moved to xe_device

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h    |  7 +++
 drivers/gpu/drm/xe/xe_device.c       | 78 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_device_types.h |  3 ++
 drivers/gpu/drm/xe/xe_gt.c           |  2 +
 4 files changed, 90 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index e4408473e802b..7202084198bdb 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -70,8 +70,15 @@
 
 #define SOFTWARE_FLAGS_SPR33			XE_REG(0x4f084)
 
+#define GU_CNTL_PROTECTED			XE_REG(0x10100C)
+#define   DRIVERINT_FLR_DIS			REG_BIT(31)
+
 #define GU_CNTL					XE_REG(0x101010)
 #define   LMEM_INIT				REG_BIT(7)
+#define   DRIVERFLR				REG_BIT(31)
+
+#define GU_DEBUG				XE_REG(0x101018)
+#define   DRIVERFLR_STATUS			REG_BIT(31)
 
 #define XEHP_CLOCK_GATE_DIS			XE_REG(0x101014)
 #define   SGSI_SIDECLK_DIS			REG_BIT(17)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ae0b7349c3e31..5869ba7e0cdc1 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -5,6 +5,8 @@
 
 #include "xe_device.h"
 
+#include <linux/units.h>
+
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_gem_ttm_helper.h>
@@ -252,6 +254,78 @@ err_put:
 	return ERR_PTR(err);
 }
 
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void xe_driver_flr(struct xe_device *xe)
+{
+	const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	int ret;
+
+	if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+		drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+		return;
+	}
+
+	drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+	/*
+	 * Make sure any pending FLR requests have cleared by waiting for the
+	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+	 * to make sure it's not still set from a prior attempt (it's a write to
+	 * clear bit).
+	 * Note that we should never be in a situation where a previous attempt
+	 * is still pending (unless the HW is totally dead), but better to be
+	 * safe in case something unexpected happens
+	 */
+	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+		return;
+	}
+	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+
+	/* Trigger the actual Driver-FLR */
+	xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+
+	/* Wait for hardware teardown to complete */
+	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+		return;
+	}
+
+	/* Wait for hardware/firmware re-init to complete */
+	ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+			     flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+		return;
+	}
+
+	/* Clear sticky completion status */
+	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	if (xe->needs_flr_on_fini)
+		xe_driver_flr(xe);
+}
+
 static void xe_device_sanitize(struct drm_device *drm, void *arg)
 {
 	struct xe_device *xe = arg;
@@ -283,6 +357,10 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
+	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+	if (err)
+		return err;
+
 	for_each_gt(gt, xe, id) {
 		err = xe_pcode_probe(gt);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4bc668ff8615f..4425c2484a02b 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -388,6 +388,9 @@ struct xe_device {
 	/** @heci_gsc: graphics security controller */
 	struct xe_heci_gsc heci_gsc;
 
+	/** @needs_flr_on_fini: requests function-reset on fini */
+	bool needs_flr_on_fini;
+
 	/* For pcode */
 	struct mutex sb_lock;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d380f67b33659..73c090762771b 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -626,6 +626,8 @@ err_fail:
 	xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev),
 				   gt_to_tile(gt)->id, gt->info.id);
 
+	gt_to_xe(gt)->needs_flr_on_fini = true;
+
 	return err;
 }
 
-- 
GitLab


From e4e4268d950034dc97fbeba480dd4741d72a8df3 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Tue, 31 Oct 2023 13:32:24 -0700
Subject: [PATCH 2038/2340] drm/xe: Fix pagefault and access counter worker
 functions

When processing G2H messages for pagefault or access counters, we queue a
work item and call queue_work(). This fails if the worker thread is already
queued to run.
The expectation is that the worker function will do more than process a
single item and return. It needs to either process all pending items or
requeue itself if items are pending. But requeuing will add latency and
potential context switch can occur.

We don't want to add unnecessary latency and so the worker should process
as many faults as it can within a reasonable duration of time.
We also do not want to hog the cpu core, so here we execute in a loop
and requeue if still running after more than 20 ms.
This seems reasonable framework and easy to tune this futher if needed.

This resolves issues seen with several igt@xe_exec_fault_mode subtests
where the GPU will hang when KMD ignores a pending pagefault.

v2: requeue the worker instead of having an internal processing loop.
v3: implement hybrid model of v1 and v2
    now, run for 20 msec before we will requeue if still running
v4: only requeue in worker if queue is non-empty (Matt B)

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 82 ++++++++++++++++------------
 1 file changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 018edf731ae5e..9fcbf8773b8bb 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
 
 #define PF_MSG_LEN_DW	4
 
-static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 {
 	const struct xe_guc_pagefault_desc *desc;
-	int ret = 0;
+	bool ret = false;
 
 	spin_lock_irq(&pf_queue->lock);
 	if (pf_queue->head != pf_queue->tail) {
@@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 
 		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
 			PF_QUEUE_NUM_DW;
-	} else {
-		ret = -1;
+		ret = true;
 	}
 	spin_unlock_irq(&pf_queue->lock);
 
@@ -348,6 +347,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	return full ? -ENOSPC : 0;
 }
 
+#define USM_QUEUE_MAX_RUNTIME_MS	20
+
 static void pf_queue_work_func(struct work_struct *w)
 {
 	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
@@ -355,31 +356,38 @@ static void pf_queue_work_func(struct work_struct *w)
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_guc_pagefault_reply reply = {};
 	struct pagefault pf = {};
+	unsigned long threshold;
 	int ret;
 
-	ret = get_pagefault(pf_queue, &pf);
-	if (ret)
-		return;
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
 
-	ret = handle_pagefault(gt, &pf);
-	if (unlikely(ret)) {
-		print_pagefault(xe, &pf);
-		pf.fault_unsuccessful = 1;
-		drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
-	}
+	while (get_pagefault(pf_queue, &pf)) {
+		ret = handle_pagefault(gt, &pf);
+		if (unlikely(ret)) {
+			print_pagefault(xe, &pf);
+			pf.fault_unsuccessful = 1;
+			drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+		}
+
+		reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+			FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+			FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+			FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+			FIELD_PREP(PFR_ASID, pf.asid);
 
-	reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
-		FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
-		FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
-		FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
-		FIELD_PREP(PFR_ASID, pf.asid);
+		reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+			FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+			FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+			FIELD_PREP(PFR_PDATA, pf.pdata);
 
-	reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
-		FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
-		FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
-		FIELD_PREP(PFR_PDATA, pf.pdata);
+		send_pagefault_reply(&gt->uc.guc, &reply);
 
-	send_pagefault_reply(&gt->uc.guc, &reply);
+		if (time_after(jiffies, threshold) &&
+		    pf_queue->head != pf_queue->tail) {
+			queue_work(gt->usm.pf_wq, w);
+			break;
+		}
+	}
 }
 
 static void acc_queue_work_func(struct work_struct *w);
@@ -544,10 +552,10 @@ unlock_vm:
 
 #define ACC_MSG_LEN_DW        4
 
-static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
+static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
 {
 	const struct xe_guc_acc_desc *desc;
-	int ret = 0;
+	bool ret = false;
 
 	spin_lock(&acc_queue->lock);
 	if (acc_queue->head != acc_queue->tail) {
@@ -567,8 +575,7 @@ static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
 
 		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
 				  ACC_QUEUE_NUM_DW;
-	} else {
-		ret = -1;
+		ret = true;
 	}
 	spin_unlock(&acc_queue->lock);
 
@@ -581,16 +588,23 @@ static void acc_queue_work_func(struct work_struct *w)
 	struct xe_gt *gt = acc_queue->gt;
 	struct xe_device *xe = gt_to_xe(gt);
 	struct acc acc = {};
+	unsigned long threshold;
 	int ret;
 
-	ret = get_acc(acc_queue, &acc);
-	if (ret)
-		return;
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
 
-	ret = handle_acc(gt, &acc);
-	if (unlikely(ret)) {
-		print_acc(xe, &acc);
-		drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+	while (get_acc(acc_queue, &acc)) {
+		ret = handle_acc(gt, &acc);
+		if (unlikely(ret)) {
+			print_acc(xe, &acc);
+			drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+		}
+
+		if (time_after(jiffies, threshold) &&
+		    acc_queue->head != acc_queue->tail) {
+			queue_work(gt->usm.acc_wq, w);
+			break;
+		}
 	}
 }
 
-- 
GitLab


From 670e811d1fd6aaab485b33081a8b97fa62ff2095 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 7 Nov 2023 09:24:40 +0100
Subject: [PATCH 2039/2340] drm/xe: Update SPDX deprecated license identifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "GPL-2.0" SPDX license identifier is deprecated. Update the
code to use "GPL-2.0-only" instead. Choose this identifier over
"GPL-2.0-or-later" since it's the most restrictive of the two and it's
not fully clear that "GPL-2.0" also allows "GPL-2.0-or-later".

Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231107082440.7568-1-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt_walk.c    | 2 +-
 drivers/gpu/drm/xe/xe_pt_walk.h    | 2 +-
 drivers/gpu/drm/xe/xe_res_cursor.h | 2 +-
 drivers/gpu/drm/xe/xe_trace.c      | 2 +-
 drivers/gpu/drm/xe/xe_trace.h      | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
index 0def89af43729..8f6c8d063f39f 100644
--- a/drivers/gpu/drm/xe/xe_pt_walk.c
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright © 2022 Intel Corporation
  */
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
index 42c51fa601ecb..ec3d1e9efa6d5 100644
--- a/drivers/gpu/drm/xe/xe_pt_walk.h
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright © 2022 Intel Corporation
  */
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 006fc1361967e..0a306963aa8e5 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
 /*
  * Copyright 2020 Advanced Micro Devices, Inc.
  *
diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c
index 2570c0b859c45..2527c556bff14 100644
--- a/drivers/gpu/drm/xe/xe_trace.c
+++ b/drivers/gpu/drm/xe/xe_trace.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright © 2022 Intel Corporation
  */
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index f13cc9a35e918..95163c303f3e1 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright © 2022 Intel Corporation
  */
-- 
GitLab


From 6ffef7b6991b4e302dd0aa86f67a0d00b0b8e542 Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Mon, 6 Nov 2023 18:06:57 -0300
Subject: [PATCH 2040/2340] drm/xe/xelpmp: Add Wa_16021867713

This workaround applies to all steppings of Xe_LPM+. Implement the KMD
part.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231106210655.175109-3-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 +++
 drivers/gpu/drm/xe/xe_wa.c               | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 35dd4837dd75f..b57dec17eb2dd 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -118,4 +118,7 @@
 #define VDBOX_CGCTL3F18(base)			XE_REG((base) + 0x3f18)
 #define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
 
+#define VDBOX_CGCTL3F1C(base)			XE_REG((base) + 0x3f1c)
+#define   MFXPIPE_CLKGATE_DIS			REG_BIT(3)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 2f1782db267b6..614e114a5342a 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -248,6 +248,12 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 
 	/* Xe_LPM+ */
 
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
 	{ XE_RTP_NAME("22016670082"),
 	  XE_RTP_RULES(MEDIA_VERSION(1300)),
 	  XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR))
-- 
GitLab


From 04dfef5b41afc85e8de7b0397050cdb51db35eda Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Thu, 2 Nov 2023 16:04:53 -0700
Subject: [PATCH 2041/2340] drm/xe: Fix unbind of unaccessed VMA (fault mode)

In fault mode, page table binding is deferred until fault handler.
Thus vma->tile_present will be unset unless the VMA is accessed by GPU.

During a later unbind, the logic doesn't account for the fact that local
fence variable will be NULL in this case, leading to pass NULL into
dma_fence_add_callback() and causing few WARN_ONs to print to console.
The fix is already present in the code, just hoist the fence variable
computation to be done earlier.

Resolves warnings seen with igt@xe_exec_fault_mode@once-invalid-fault

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7aefa6aa66a15..b4a4ed28019c8 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1740,14 +1740,14 @@ next:
 		}
 	}
 
+	fence = cf ? &cf->base : !fence ?
+		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
 	if (last_op) {
 		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], NULL,
-					     cf ? &cf->base : fence);
+			xe_sync_entry_signal(&syncs[i], NULL, fence);
 	}
 
-	return cf ? &cf->base : !fence ?
-		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
+	return fence;
 
 err_fences:
 	if (fences) {
-- 
GitLab


From 37d1eaab34ab9cdd6022a188ce6b77a88f81c7e2 Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Sun, 29 Oct 2023 19:53:26 +0200
Subject: [PATCH 2042/2340] drm/xe: move the lmem verification code into a
 separate function

If lmem (VRAM) is not fully initialized, the punit will power down
the GT, which will prevent register access from the driver side.
That code moved into a corresponding function (xe_verify_lmem_ready)
to make the code clearer.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231029175326.626745-1-kelbaz@habana.ai
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 0da4f75c07bf6..d8f9fabf715e9 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -381,10 +381,27 @@ static void mmio_fini(struct drm_device *drm, void *arg)
 		iounmap(xe->mem.vram.mapping);
 }
 
+static int xe_verify_lmem_ready(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	/*
+	 * The boot firmware initializes local memory and assesses its health.
+	 * If memory training fails, the punit will have been instructed to
+	 * keep the GT powered down; we won't be able to communicate with it
+	 * and we should not continue with driver initialization.
+	 */
+	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
+		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 int xe_mmio_init(struct xe_device *xe)
 {
 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
-	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	const int mmio_bar = 0;
 	int err;
 
@@ -409,16 +426,9 @@ int xe_mmio_init(struct xe_device *xe)
 	root_tile->mmio.size = xe->mmio.size;
 	root_tile->mmio.regs = xe->mmio.regs;
 
-	/*
-	 * The boot firmware initializes local memory and assesses its health.
-	 * If memory training fails, the punit will have been instructed to
-	 * keep the GT powered down; we won't be able to communicate with it
-	 * and we should not continue with driver initialization.
-	 */
-	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
-		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
-		return -ENODEV;
-	}
+	err = xe_verify_lmem_ready(xe);
+	if (err)
+		return err;
 
 	err = xe_set_dma_info(xe);
 	if (err)
-- 
GitLab


From d7925d04c062b8adcbbff9604422f979e9dbedb7 Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Fri, 3 Nov 2023 14:03:24 -0700
Subject: [PATCH 2043/2340] drm/xe: clear the serviced bits on
 INTR_IDENTITY_REG

The spec for this register, like many other interrupt related ones,
asks software to write back '1' to clear the serviced bits. Let's
respect the spec.

v2:
- Update commit message
- Add missing CC

Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
CC: Daniele Spurio Ceraolo <daniele.ceraolospurio@intel.com>
CC: Lucas De Marchi <lucas.demarchi@intel.com>
CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
CC: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 5631e5e1ea206..ef26120e7aa42 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -231,7 +231,7 @@ gt_engine_identity(struct xe_device *xe,
 		return 0;
 	}
 
-	xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), INTR_DATA_VALID);
+	xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), ident);
 
 	return ident;
 }
-- 
GitLab


From 047d1f6a2f171fc9ea4c286edd6ee0dfef41a298 Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Wed, 8 Nov 2023 13:03:51 +0530
Subject: [PATCH 2044/2340] drm/xe: Add Wa_14019877138

Enable Force Dispatch Ends Collection for DG2.

BSpec: 46001
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231108073351.3998413-1-haridhar.kalvala@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 614e114a5342a..d03e6674519f1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -677,6 +677,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
 
 	/* PVC */
 
-- 
GitLab


From 86017f3898d4ac0ab6c01376ef734c23347b38e7 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Tue, 7 Nov 2023 13:55:58 +0200
Subject: [PATCH 2045/2340] drm/xe/gsc: enable pvc support

Configure and enable PVC HECI GSC support.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h |  1 +
 drivers/gpu/drm/xe/xe_heci_gsc.c  | 11 ++++++++++-
 drivers/gpu/drm/xe/xe_pci.c       |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 7202084198bdb..924f7c949d558 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -35,6 +35,7 @@
 #define XEHPC_BCS8_RING_BASE			0x3ee000
 
 #define DG1_GSC_HECI2_BASE                      0x00259000
+#define PVC_GSC_HECI2_BASE                      0x00285000
 #define DG2_GSC_HECI2_BASE                      0x00374000
 
 #define GSCCS_RING_BASE				0x11a000
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 3328ddca42d03..d8e982e3d9a2c 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -70,6 +70,13 @@ static const struct heci_gsc_def heci_gsc_def_dg2 = {
 	.bar_size = GSC_BAR_LENGTH,
 };
 
+static const struct heci_gsc_def heci_gsc_def_pvc = {
+	.name = "mei-gscfi",
+	.bar = PVC_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+	.slow_firmware = true,
+};
+
 static void heci_gsc_release_dev(struct device *dev)
 {
 	struct auxiliary_device *aux_dev = to_auxiliary_dev(dev);
@@ -172,7 +179,9 @@ void xe_heci_gsc_init(struct xe_device *xe)
 
 	heci_gsc->irq = -1;
 
-	if (xe->info.platform == XE_DG2) {
+	if (xe->info.platform == XE_PVC) {
+		def = &heci_gsc_def_pvc;
+	} else if (xe->info.platform == XE_DG2) {
 		def = &heci_gsc_def_dg2;
 	} else if (xe->info.platform == XE_DG1) {
 		def = &heci_gsc_def_dg1;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index eec2b852c7aaa..40d89d4df87c4 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -301,6 +301,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
 	.require_force_probe = true,
+	.has_heci_gscfi = 1,
 };
 
 static const struct xe_device_desc mtl_desc = {
-- 
GitLab


From fa85b083733abaef81eecd8693a065657d18e733 Mon Sep 17 00:00:00 2001
From: Pallavi Mishra <pallavi.mishra@intel.com>
Date: Wed, 8 Nov 2023 00:54:24 +0530
Subject: [PATCH 2046/2340] drm/xe/tests: Fix migrate test

Pass a valid vm to xe_migrate_update_pgtables.

Resolves NPD crash seen with igt@xe_live_ktest@migrate

Reviewed-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index c984307560eec..7aad09140d7ed 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -236,7 +236,7 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
 	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
 
 	then = ktime_get();
-	fence = xe_migrate_update_pgtables(m, NULL, NULL, m->q, &update, 1,
+	fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1,
 					   NULL, 0, &pt_update);
 	now = ktime_get();
 	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
-- 
GitLab


From 80103a23da50bb3fc5c3c626ca7bc4d45b28340b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 9 Nov 2023 11:46:07 -0800
Subject: [PATCH 2047/2340] drm/xe: Drop EXECLIST_CONTROL from error state dump

EXECLIST_CONTROL ($enginebase + 0x550) is a write-only register; we
shouldn't be trying to read or report it as part of the device error
state.

Bspec: 45910, 60335
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20231109194606.1835284-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c       | 4 ----
 drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 --
 2 files changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b5b0845908887..e831e63c5e485 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -704,8 +704,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_execlist_sq_contents_hi =
 		hw_engine_mmio_read32(hwe,
 				      RING_EXECLIST_SQ_CONTENTS_HI(0));
-	snapshot->reg.ring_execlist_control =
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0));
 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
 	snapshot->reg.ring_head =
 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
@@ -765,8 +763,6 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 		   snapshot->reg.ring_execlist_sq_contents_lo);
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
 		   snapshot->reg.ring_execlist_sq_contents_hi);
-	drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
-		   snapshot->reg.ring_execlist_control);
 	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
 	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
 	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 5d4ee29042407..39908dec042a4 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -183,8 +183,6 @@ struct xe_hw_engine_snapshot {
 		u32 ring_execlist_sq_contents_lo;
 		/** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */
 		u32 ring_execlist_sq_contents_hi;
-		/** @ring_execlist_control: RING_EXECLIST_CONTROL */
-		u32 ring_execlist_control;
 		/** @ring_start: RING_START */
 		u32 ring_start;
 		/** @ring_head: RING_HEAD */
-- 
GitLab


From 08987a8b68207e782decb0f4037964ef036a9de4 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 9 Nov 2023 09:51:32 -0800
Subject: [PATCH 2048/2340] drm/xe: Fix build with KUNIT=m

Due to the current integration between "live" xe kunit tests and kunit,
it's not possible to have a build with the following combination:

	CONFIG_DRM_XE=y
	CONFIG_KUNIT=m

... even if kconfig doesn't block it. The reason for the failure is that
some compilation units are pulled in xe.ko:

	drivers/gpu/drm/xe/xe_bo.c:#include "tests/xe_bo.c"
	drivers/gpu/drm/xe/xe_dma_buf.c:#include "tests/xe_dma_buf.c"
	drivers/gpu/drm/xe/xe_migrate.c:#include "tests/xe_migrate.c"
	drivers/gpu/drm/xe/xe_pci.c:#include "tests/xe_pci.c"

Those files shouldn't use symbols from kunit, which should be reserved
to the tests/*_test.c files. Detangling this dependency doesn't seem
very straightforward, so fix the immediate issue instructing kconfig to
block the problematic configuration.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20231109175132.3084142-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index cfa6420b9915a..46325c64ff226 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_XE
 	tristate "Intel Xe Graphics"
-	depends on DRM && PCI && MMU
+	depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
 	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
-- 
GitLab


From 43efd3ba9f44c46fdb31c8b0f257cf9a2d1b58ae Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Mon, 13 Nov 2023 11:44:02 -0800
Subject: [PATCH 2049/2340] drm/xe: Raise GT frequency before GuC/HuC load

Starting GT freq is usually RPn. Raising freq to RP0 will
help speed up GuC load times. As an example, this data was
collected on DG2-

GuC Load time @RPn ~ 41 ms
GuC Load time @RP0 ~ 11 ms

v2: Raise GT freq before hwconfig init. This will speed up
both HuC and GuC loads. Address review comments (Rodrigo).
Also add a small usleep after requesting frequency which gives
pcode some time to react.

v3: Address checkpatch issue

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  5 +++
 drivers/gpu/drm/xe/xe_gt.c           |  4 +++
 drivers/gpu/drm/xe/xe_guc_pc.c       | 49 ++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_guc_pc.h       |  1 +
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 902c60543de07..cc27fe8fc3633 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -263,6 +263,11 @@
 
 #define RPNSWREQ				XE_REG(0xa008)
 #define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
+
+#define RP_CONTROL				XE_REG(0xa024)
+#define   RPSWCTL_MASK				REG_GENMASK(10, 9)
+#define   RPSWCTL_ENABLE			REG_FIELD_PREP(RPSWCTL_MASK, 2)
+#define   RPSWCTL_DISABLE			REG_FIELD_PREP(RPSWCTL_MASK, 0)
 #define RC_CONTROL				XE_REG(0xa090)
 #define RC_STATE				XE_REG(0xa094)
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 73c090762771b..6c885dde5d59c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -30,6 +30,7 @@
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
 #include "xe_guc_exec_queue_types.h"
+#include "xe_guc_pc.h"
 #include "xe_hw_fence.h"
 #include "xe_hw_engine_class_sysfs.h"
 #include "xe_irq.h"
@@ -349,6 +350,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	/* Raise GT freq to speed up HuC/GuC load */
+	xe_guc_pc_init_early(&gt->uc.guc.pc);
+
 	err = xe_uc_init_hwconfig(&gt->uc);
 	if (err)
 		goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 020c6597cd78f..f4ac76d6b2ddc 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -247,6 +247,12 @@ static u32 decode_freq(u32 raw)
 				 GEN9_FREQ_SCALER);
 }
 
+static u32 encode_freq(u32 freq)
+{
+	return DIV_ROUND_CLOSEST(freq * GEN9_FREQ_SCALER,
+				 GT_FREQUENCY_MULTIPLIER);
+}
+
 static u32 pc_get_min_freq(struct xe_guc_pc *pc)
 {
 	u32 freq;
@@ -257,6 +263,32 @@ static u32 pc_get_min_freq(struct xe_guc_pc *pc)
 	return decode_freq(freq);
 }
 
+static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
+
+	/* Allow/Disallow punit to process software freq requests */
+	xe_mmio_write32(gt, RP_CONTROL, state);
+}
+
+static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 rpnswreq;
+
+	pc_set_manual_rp_ctrl(pc, true);
+
+	/* Req freq is in units of 16.66 Mhz */
+	rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
+	xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
+
+	/* Sleep for a small time to allow pcode to respond */
+	usleep_range(100, 300);
+
+	pc_set_manual_rp_ctrl(pc, false);
+}
+
 static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
 {
 	/*
@@ -685,6 +717,21 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
 	else
 		tgl_init_fused_rp_values(pc);
 }
+
+/**
+ * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
+ * frequency to allow faster GuC load times
+ * @pc: Xe_GuC_PC instance
+ */
+void xe_guc_pc_init_early(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	pc_init_fused_rp_values(pc);
+	pc_set_cur_freq(pc, pc->rp0_freq);
+}
+
 static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
 {
 	int ret;
@@ -918,8 +965,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 
 	pc->bo = bo;
 
-	pc_init_fused_rp_values(pc);
-
 	err = sysfs_create_files(gt->sysfs, pc_attrs);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 43ea582545b57..054788e006f32 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -17,4 +17,5 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
 enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
 u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
+void xe_guc_pc_init_early(struct xe_guc_pc *pc);
 #endif /* _XE_GUC_PC_H_ */
-- 
GitLab


From a839e365ac88f0fa9f8c7ae92b9e7e66bbd9e4d7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 6 Nov 2023 10:39:38 -0800
Subject: [PATCH 2050/2340] drm/xe: Use pool of ordered wq for GuC submission

To appease lockdep, use a pool of ordered wq for GuC submission rather
tha leaving the ordered wq allocation to the drm sched. Without this change
eventually lockdep runs out of hash entries (MAX_LOCKDEP_CHAINS is
exceeded) as each user allocated exec queue adds more hash table entries
to lockdep. A pool old of 256 ordered wq should be enough to have
similar behavior with and without lockdep enabled.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 68 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_guc_types.h  |  7 +++
 2 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index d0e60349fc5a1..8d5af11fb80d3 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -188,6 +188,58 @@ static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
 	return exec_queue_killed(q) || exec_queue_banned(q);
 }
 
+#ifdef CONFIG_PROVE_LOCKING
+static int alloc_submit_wq(struct xe_guc *guc)
+{
+	int i;
+
+	for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
+		guc->submission_state.submit_wq_pool[i] =
+			alloc_ordered_workqueue("submit_wq", 0);
+		if (!guc->submission_state.submit_wq_pool[i])
+			goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	while (i)
+		destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
+
+	return -ENOMEM;
+}
+
+static void free_submit_wq(struct xe_guc *guc)
+{
+	int i;
+
+	for (i = 0; i < NUM_SUBMIT_WQ; ++i)
+		destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
+}
+
+static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
+{
+	int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
+
+	return guc->submission_state.submit_wq_pool[idx];
+}
+#else
+static int alloc_submit_wq(struct xe_guc *guc)
+{
+	return 0;
+}
+
+static void free_submit_wq(struct xe_guc *guc)
+{
+
+}
+
+static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
+{
+	return NULL;
+}
+#endif
+
 static void guc_submit_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc *guc = arg;
@@ -195,6 +247,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
 	xa_destroy(&guc->submission_state.exec_queue_lookup);
 	ida_destroy(&guc->submission_state.guc_ids);
 	bitmap_free(guc->submission_state.guc_ids_bitmap);
+	free_submit_wq(guc);
 	mutex_destroy(&guc->submission_state.lock);
 }
 
@@ -230,6 +283,12 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	if (!guc->submission_state.guc_ids_bitmap)
 		return -ENOMEM;
 
+	err = alloc_submit_wq(guc);
+	if (err) {
+		bitmap_free(guc->submission_state.guc_ids_bitmap);
+		return err;
+	}
+
 	gt->exec_queue_ops = &guc_exec_queue_ops;
 
 	mutex_init(&guc->submission_state.lock);
@@ -1166,10 +1225,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 
 	timeout = (q->vm && xe_vm_no_dma_fences(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
 		  q->hwe->eclass->sched_props.job_timeout_ms;
-	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
-			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
-			     64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
-			     q->name, gt_to_xe(q->gt)->drm.dev);
+	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
+			    get_submit_wq(guc),
+			    q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
+			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
+			    q->name, gt_to_xe(q->gt)->drm.dev);
 	if (err)
 		goto err_free;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index a5e58917a4994..0fdcc05dc16a6 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -61,6 +61,13 @@ struct xe_guc {
 			/** @patch: patch version of GuC submission */
 			u32 patch;
 		} version;
+#ifdef CONFIG_PROVE_LOCKING
+#define NUM_SUBMIT_WQ	256
+		/** @submit_wq_pool: submission ordered workqueues pool */
+		struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
+		/** @submit_wq_idx: submission ordered workqueue index */
+		int submit_wq_idx;
+#endif
 		/** @enabled: submission is enabled */
 		bool enabled;
 	} submission_state;
-- 
GitLab


From 44e694958b95395bd1c41508c88c8ca141bf9bd7 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 17 Aug 2023 16:30:41 -0400
Subject: [PATCH 2051/2340] drm/xe/display: Implement display support

As for display, the intent is to share the display code with the i915
driver so that there is maximum reuse there.

We do this by recompiling i915/display code twice.
Now that i915 has been adapted to support the Xe build, we can add
the xe/display support.

This initial work is a collaboration of many people and unfortunately
this squashed patch won't fully honor the proper credits.
But let's try to add a few from the squashed patches:

Co-developed-by: Matthew Brost <matthew.brost@intel.com>
Co-developed-by: Jani Nikula <jani.nikula@intel.com>
Co-developed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Co-developed-by: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Co-developed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Co-developed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/.kunitconfig               |   1 +
 drivers/gpu/drm/xe/Kconfig                    |  22 +
 drivers/gpu/drm/xe/Makefile                   | 141 +++++-
 .../compat-i915-headers/gem/i915_gem_lmem.h   |   1 +
 .../compat-i915-headers/gem/i915_gem_mman.h   |  17 +
 .../compat-i915-headers/gem/i915_gem_object.h |  65 +++
 .../drm/xe/compat-i915-headers/gt/intel_rps.h |  11 +
 .../compat-i915-headers/i915_active_types.h   |   0
 .../drm/xe/compat-i915-headers/i915_config.h  |  19 +
 .../drm/xe/compat-i915-headers/i915_debugfs.h |  14 +
 .../gpu/drm/xe/compat-i915-headers/i915_drv.h | 230 ++++++++++
 .../drm/xe/compat-i915-headers/i915_fixed.h   |   6 +
 .../xe/compat-i915-headers/i915_gpu_error.h   |  17 +
 .../gpu/drm/xe/compat-i915-headers/i915_irq.h |   6 +
 .../gpu/drm/xe/compat-i915-headers/i915_reg.h |   6 +
 .../xe/compat-i915-headers/i915_reg_defs.h    |   6 +
 .../drm/xe/compat-i915-headers/i915_trace.h   |   6 +
 .../drm/xe/compat-i915-headers/i915_utils.h   |   6 +
 .../drm/xe/compat-i915-headers/i915_vgpu.h    |  44 ++
 .../gpu/drm/xe/compat-i915-headers/i915_vma.h |  31 ++
 .../xe/compat-i915-headers/i915_vma_types.h   |  74 ++++
 .../compat-i915-headers/intel_clock_gating.h  |   6 +
 .../compat-i915-headers/intel_mchbar_regs.h   |   6 +
 .../xe/compat-i915-headers/intel_pci_config.h |   6 +
 .../drm/xe/compat-i915-headers/intel_pcode.h  |  42 ++
 .../xe/compat-i915-headers/intel_runtime_pm.h |  22 +
 .../drm/xe/compat-i915-headers/intel_step.h   |  20 +
 .../drm/xe/compat-i915-headers/intel_uc_fw.h  |  11 +
 .../drm/xe/compat-i915-headers/intel_uncore.h | 175 ++++++++
 .../xe/compat-i915-headers/intel_wakeref.h    |   8 +
 .../xe/compat-i915-headers/pxp/intel_pxp.h    |  28 ++
 .../xe/compat-i915-headers/soc/intel_dram.h   |   6 +
 .../xe/compat-i915-headers/soc/intel_gmch.h   |   6 +
 .../xe/compat-i915-headers/soc/intel_pch.h    |   6 +
 .../drm/xe/compat-i915-headers/vlv_sideband.h | 132 ++++++
 .../xe/compat-i915-headers/vlv_sideband_reg.h |   6 +
 drivers/gpu/drm/xe/display/ext/i915_irq.c     |  77 ++++
 drivers/gpu/drm/xe/display/ext/i915_utils.c   |  22 +
 drivers/gpu/drm/xe/display/intel_fb_bo.c      |  74 ++++
 drivers/gpu/drm/xe/display/intel_fb_bo.h      |  24 +
 drivers/gpu/drm/xe/display/intel_fbdev_fb.c   | 104 +++++
 drivers/gpu/drm/xe/display/intel_fbdev_fb.h   |  21 +
 drivers/gpu/drm/xe/display/xe_display_rps.c   |  17 +
 drivers/gpu/drm/xe/display/xe_fb_pin.c        | 326 ++++++++++++++
 drivers/gpu/drm/xe/display/xe_hdcp_gsc.c      |  24 +
 drivers/gpu/drm/xe/display/xe_plane_initial.c | 291 +++++++++++++
 drivers/gpu/drm/xe/regs/xe_reg_defs.h         |   2 +-
 drivers/gpu/drm/xe/regs/xe_regs.h             |  13 -
 drivers/gpu/drm/xe/xe_bo.c                    |   6 +-
 drivers/gpu/drm/xe/xe_device.c                |  58 ++-
 drivers/gpu/drm/xe/xe_device_types.h          |  86 ++++
 drivers/gpu/drm/xe/xe_display.c               | 411 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_display.h               |  72 +++
 drivers/gpu/drm/xe/xe_ggtt.c                  |  25 +-
 drivers/gpu/drm/xe/xe_ggtt.h                  |   3 +-
 drivers/gpu/drm/xe/xe_irq.c                   |  13 +-
 drivers/gpu/drm/xe/xe_module.c                |   4 +
 drivers/gpu/drm/xe/xe_pci.c                   |  35 +-
 drivers/gpu/drm/xe/xe_pm.c                    |  13 +-
 59 files changed, 2873 insertions(+), 51 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
 create mode 100644 drivers/gpu/drm/xe/display/ext/i915_irq.c
 create mode 100644 drivers/gpu/drm/xe/display/ext/i915_utils.c
 create mode 100644 drivers/gpu/drm/xe/display/intel_fb_bo.c
 create mode 100644 drivers/gpu/drm/xe/display/intel_fb_bo.h
 create mode 100644 drivers/gpu/drm/xe/display/intel_fbdev_fb.c
 create mode 100644 drivers/gpu/drm/xe/display/intel_fbdev_fb.h
 create mode 100644 drivers/gpu/drm/xe/display/xe_display_rps.c
 create mode 100644 drivers/gpu/drm/xe/display/xe_fb_pin.c
 create mode 100644 drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
 create mode 100644 drivers/gpu/drm/xe/display/xe_plane_initial.c
 create mode 100644 drivers/gpu/drm/xe/xe_display.c
 create mode 100644 drivers/gpu/drm/xe/xe_display.h

diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index 06ed30420a8df..3769af94e3918 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -6,6 +6,7 @@ CONFIG_DRM=y
 CONFIG_DRM_FBDEV_EMULATION=y
 CONFIG_DRM_KMS_HELPER=y
 CONFIG_DRM_XE=y
+CONFIG_DRM_XE_DISPLAY=n
 CONFIG_EXPERT=y
 CONFIG_FB=y
 CONFIG_DRM_XE_KUNIT_TEST=y
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 46325c64ff226..5b3da06e7ba30 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -12,8 +12,20 @@ config DRM_XE
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_SUBALLOC_HELPER
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HDCP_HELPER
+	select DRM_DISPLAY_HDMI_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_MIPI_DSI
 	select RELAY
 	select IRQ_WORK
+	# i915 depends on ACPI_VIDEO when ACPI is enabled
+	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
+	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select INPUT if ACPI
+	select ACPI_VIDEO if X86 && ACPI
+	select ACPI_BUTTON if ACPI
+	select ACPI_WMI if ACPI
 	select SYNC_FILE
 	select IOSF_MBI
 	select CRC32
@@ -33,6 +45,16 @@ config DRM_XE
 
 	  If "M" is selected, the module will be called xe.
 
+config DRM_XE_DISPLAY
+	bool "Enable display support"
+	depends on DRM_XE && EXPERT && DRM_XE=m
+	select FB_IOMEM_HELPERS
+	select I2C
+	select I2C_ALGOBIT
+	default y
+	help
+	  Disable this option only if you want to compile out display support.
+
 config DRM_XE_FORCE_PROBE
 	string "Force probe xe for selected Intel hardware IDs"
 	depends on DRM_XE
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1d39784e92fd9..2777cbf07cc6e 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -24,9 +24,6 @@ subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
 subdir-ccflags-y += $(call cc-disable-warning, frame-address)
 subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
 
-# Fine grained warnings disable
-CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init)
-
 subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
 
 # generated sources
@@ -126,13 +123,147 @@ xe-y += xe_bb.o \
 # graphics hardware monitoring (HWMON) support
 xe-$(CONFIG_HWMON) += xe_hwmon.o
 
-obj-$(CONFIG_DRM_XE) += xe.o
-obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+# i915 Display compat #defines and #includes
+subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
+	-I$(srctree)/$(src)/display/ext \
+	-I$(srctree)/$(src)/compat-i915-headers \
+	-I$(srctree)/drivers/gpu/drm/xe/display/ \
+	-I$(srctree)/drivers/gpu/drm/i915/display/ \
+	-Ddrm_i915_gem_object=xe_bo \
+	-Ddrm_i915_private=xe_device
+
+CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init)
+
+# Rule to build SOC code shared with i915
+$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
+	$(call cmd,force_checksrc)
+	$(call if_changed_rule,cc_o_c)
+
+# Rule to build display code shared with i915
+$(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
+	$(call cmd,force_checksrc)
+	$(call if_changed_rule,cc_o_c)
+
+# Display code specific to xe
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	xe_display.o \
+	display/xe_fb_pin.o \
+	display/xe_hdcp_gsc.o \
+	display/xe_plane_initial.o \
+	display/xe_display_rps.o \
+	display/intel_fbdev_fb.o \
+	display/intel_fb_bo.o \
+	display/ext/i915_irq.o \
+	display/ext/i915_utils.o
+
+# SOC code shared with i915
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	i915-soc/intel_dram.o \
+	i915-soc/intel_pch.o
+
+# Display code shared with i915
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	i915-display/icl_dsi.o \
+	i915-display/intel_atomic.o \
+	i915-display/intel_atomic_plane.o \
+	i915-display/intel_audio.o \
+	i915-display/intel_backlight.o \
+	i915-display/intel_bios.o \
+	i915-display/intel_bw.o \
+	i915-display/intel_cdclk.o \
+	i915-display/intel_color.o \
+	i915-display/intel_combo_phy.o \
+	i915-display/intel_connector.o \
+	i915-display/intel_crtc.o \
+	i915-display/intel_crtc_state_dump.o \
+	i915-display/intel_cursor.o \
+	i915-display/intel_cx0_phy.o \
+	i915-display/intel_ddi.o \
+	i915-display/intel_ddi_buf_trans.o \
+	i915-display/intel_display.o \
+	i915-display/intel_display_debugfs.o \
+	i915-display/intel_display_debugfs_params.o \
+	i915-display/intel_display_device.o \
+	i915-display/intel_display_driver.o \
+	i915-display/intel_display_irq.o \
+	i915-display/intel_display_params.o \
+	i915-display/intel_display_power.o \
+	i915-display/intel_display_power_map.o \
+	i915-display/intel_display_power_well.o \
+	i915-display/intel_display_trace.o \
+	i915-display/intel_display_wa.o \
+	i915-display/intel_dkl_phy.o \
+	i915-display/intel_dmc.o \
+	i915-display/intel_dp.o \
+	i915-display/intel_dp_aux.o \
+	i915-display/intel_dp_aux_backlight.o \
+	i915-display/intel_dp_hdcp.o \
+	i915-display/intel_dp_link_training.o \
+	i915-display/intel_dp_mst.o \
+	i915-display/intel_dpll.o \
+	i915-display/intel_dpll_mgr.o \
+	i915-display/intel_dpt_common.o \
+	i915-display/intel_drrs.o \
+	i915-display/intel_dsb.o \
+	i915-display/intel_dsi.o \
+	i915-display/intel_dsi_dcs_backlight.o \
+	i915-display/intel_dsi_vbt.o \
+	i915-display/intel_fb.o \
+	i915-display/intel_fbc.o \
+	i915-display/intel_fdi.o \
+	i915-display/intel_fifo_underrun.o \
+	i915-display/intel_frontbuffer.o \
+	i915-display/intel_global_state.o \
+	i915-display/intel_gmbus.o \
+	i915-display/intel_hdcp.o \
+	i915-display/intel_hdmi.o \
+	i915-display/intel_hotplug.o \
+	i915-display/intel_hotplug_irq.o \
+	i915-display/intel_hti.o \
+	i915-display/intel_link_bw.o \
+	i915-display/intel_lspcon.o \
+	i915-display/intel_modeset_lock.o \
+	i915-display/intel_modeset_setup.o \
+	i915-display/intel_modeset_verify.o \
+	i915-display/intel_panel.o \
+	i915-display/intel_pipe_crc.o \
+	i915-display/intel_pmdemand.o \
+	i915-display/intel_pps.o \
+	i915-display/intel_psr.o \
+	i915-display/intel_qp_tables.o \
+	i915-display/intel_quirks.o \
+	i915-display/intel_snps_phy.o \
+	i915-display/intel_tc.o \
+	i915-display/intel_vblank.o \
+	i915-display/intel_vdsc.o \
+	i915-display/intel_vga.o \
+	i915-display/intel_vrr.o \
+	i915-display/intel_wm.o \
+	i915-display/skl_scaler.o \
+	i915-display/skl_universal_plane.o \
+	i915-display/skl_watermark.o
 
 xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
 
+ifeq ($(CONFIG_ACPI),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += \
+		i915-display/intel_acpi.o \
+		i915-display/intel_opregion.o
+endif
+
+ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o
+endif
+
+obj-$(CONFIG_DRM_XE) += xe.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+
 # header test
 hdrtest_find_args := -not -path xe_rtp_helpers.h
+ifneq ($(CONFIG_DRM_XE_DISPLAY),y)
+	hdrtest_find_args += -not -path display/\* -not -path compat-i915-headers/\* -not -path xe_display.h
+endif
 
 always-$(CONFIG_DRM_XE_WERROR) += \
 	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args)))
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
new file mode 100644
index 0000000000000..710cecca972da
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
new file mode 100644
index 0000000000000..650ea2803a97c
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_GEM_MMAN_H_
+#define _I915_GEM_MMAN_H_
+
+#include "xe_bo_types.h"
+#include <drm/drm_prime.h>
+
+static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma)
+{
+	return drm_gem_prime_mmap(&bo->ttm.base, vma);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
new file mode 100644
index 0000000000000..5f19550cc8453
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_GEM_OBJECT_H_
+#define _I915_GEM_OBJECT_H_
+
+#include <linux/types.h>
+
+#include "xe_bo.h"
+
+#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT)
+
+static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
+{
+	/* Should never be called */
+	WARN_ON(1);
+	return n;
+}
+
+static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo)
+{
+	/* legacy tiling is unused */
+	return false;
+}
+
+static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo)
+{
+	/* legacy tiling is unused */
+	return false;
+}
+
+static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
+					  u32 ofs, u64 *ptr, u32 size)
+{
+	struct ttm_bo_kmap_obj map;
+	void *virtual;
+	bool is_iomem;
+	int ret;
+
+	XE_WARN_ON(size != 8);
+
+	ret = xe_bo_lock(bo, true);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map);
+	if (ret)
+		goto out_unlock;
+
+	ofs &= ~PAGE_MASK;
+	virtual = ttm_kmap_obj_virtual(&map, &is_iomem);
+	if (is_iomem)
+		*ptr = readq((void __iomem *)(virtual + ofs));
+	else
+		*ptr = *(u64 *)(virtual + ofs);
+
+	ttm_bo_kunmap(&map);
+out_unlock:
+	xe_bo_unlock(bo);
+	return ret;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
new file mode 100644
index 0000000000000..21fec9cc837c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_RPS_H__
+#define __INTEL_RPS_H__
+
+#define gen5_rps_irq_handler(x) ({})
+
+#endif /* __INTEL_RPS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
new file mode 100644
index 0000000000000..e835bea08d1bd
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_CONFIG_H__
+#define __I915_CONFIG_H__
+
+#include <linux/sched.h>
+
+struct drm_i915_private;
+
+static inline unsigned long
+i915_fence_timeout(const struct drm_i915_private *i915)
+{
+	return MAX_SCHEDULE_TIMEOUT;
+}
+
+#endif /* __I915_CONFIG_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
new file mode 100644
index 0000000000000..b4c47617b64bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_DEBUGFS_H__
+#define __I915_DEBUGFS_H__
+
+struct drm_i915_gem_object;
+struct seq_file;
+
+static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {}
+
+#endif /* __I915_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
new file mode 100644
index 0000000000000..c3aa5936667a3
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#ifndef _XE_I915_DRV_H_
+#define _XE_I915_DRV_H_
+
+/*
+ * "Adaptation header" to allow i915 display to also build for xe driver.
+ * TODO: refactor i915 and xe so this can cease to exist
+ */
+
+#include <drm/drm_drv.h>
+
+#include "gem/i915_gem_object.h"
+
+#include "soc/intel_pch.h"
+#include "xe_device.h"
+#include "xe_bo.h"
+#include "xe_pm.h"
+#include "xe_step.h"
+#include "i915_gpu_error.h"
+#include "i915_reg_defs.h"
+#include "i915_utils.h"
+#include "intel_step.h"
+#include "intel_uc_fw.h"
+#include "intel_uncore.h"
+#include "intel_runtime_pm.h"
+#include <linux/pm_runtime.h>
+
+static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
+{
+	return container_of(dev, struct drm_i915_private, drm);
+}
+
+static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
+{
+	return dev_get_drvdata(kdev);
+}
+
+
+#define INTEL_JASPERLAKE 0
+#define INTEL_ELKHARTLAKE 0
+#define IS_PLATFORM(xe, x) ((xe)->info.platform == x)
+#define INTEL_INFO(dev_priv)	(&((dev_priv)->info))
+#define INTEL_DEVID(dev_priv)	((dev_priv)->info.devid)
+#define IS_I830(dev_priv)	(dev_priv && 0)
+#define IS_I845G(dev_priv)	(dev_priv && 0)
+#define IS_I85X(dev_priv)	(dev_priv && 0)
+#define IS_I865G(dev_priv)	(dev_priv && 0)
+#define IS_I915G(dev_priv)	(dev_priv && 0)
+#define IS_I915GM(dev_priv)	(dev_priv && 0)
+#define IS_I945G(dev_priv)	(dev_priv && 0)
+#define IS_I945GM(dev_priv)	(dev_priv && 0)
+#define IS_I965G(dev_priv)	(dev_priv && 0)
+#define IS_I965GM(dev_priv)	(dev_priv && 0)
+#define IS_G45(dev_priv)	(dev_priv && 0)
+#define IS_GM45(dev_priv)	(dev_priv && 0)
+#define IS_G4X(dev_priv)	(dev_priv && 0)
+#define IS_PINEVIEW(dev_priv)	(dev_priv && 0)
+#define IS_G33(dev_priv)	(dev_priv && 0)
+#define IS_IRONLAKE(dev_priv)	(dev_priv && 0)
+#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0)
+#define IS_SANDYBRIDGE(dev_priv)	(dev_priv && 0)
+#define IS_IVYBRIDGE(dev_priv)	(dev_priv && 0)
+#define IS_IVB_GT1(dev_priv)	(dev_priv && 0)
+#define IS_VALLEYVIEW(dev_priv)	(dev_priv && 0)
+#define IS_CHERRYVIEW(dev_priv)	(dev_priv && 0)
+#define IS_HASWELL(dev_priv)	(dev_priv && 0)
+#define IS_BROADWELL(dev_priv)	(dev_priv && 0)
+#define IS_SKYLAKE(dev_priv)	(dev_priv && 0)
+#define IS_BROXTON(dev_priv)	(dev_priv && 0)
+#define IS_KABYLAKE(dev_priv)	(dev_priv && 0)
+#define IS_GEMINILAKE(dev_priv)	(dev_priv && 0)
+#define IS_COFFEELAKE(dev_priv)	(dev_priv && 0)
+#define IS_COMETLAKE(dev_priv)	(dev_priv && 0)
+#define IS_ICELAKE(dev_priv)	(dev_priv && 0)
+#define IS_JASPERLAKE(dev_priv)	(dev_priv && 0)
+#define IS_ELKHARTLAKE(dev_priv)	(dev_priv && 0)
+#define IS_TIGERLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_TIGERLAKE)
+#define IS_ROCKETLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_ROCKETLAKE)
+#define IS_DG1(dev_priv)        IS_PLATFORM(dev_priv, XE_DG1)
+#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S)
+#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P)
+#define IS_XEHPSDV(dev_priv) (dev_priv && 0)
+#define IS_DG2(dev_priv)	IS_PLATFORM(dev_priv, XE_DG2)
+#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC)
+#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
+#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
+
+#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
+#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
+#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0)
+
+#define IP_VER(ver, rel)                ((ver) << 8 | (rel))
+
+#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe)))
+
+#define IS_GRAPHICS_VER(xe, first, last) \
+	((xe)->info.graphics_verx100 >= first * 100 && \
+	 (xe)->info.graphics_verx100 <= (last*100 + 99))
+#define IS_MOBILE(xe) (xe && 0)
+#define HAS_LLC(xe) (!IS_DGFX((xe)))
+
+#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
+
+/* Workarounds not handled yet */
+#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; })
+#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; })
+
+#define IS_LP(xe) (0)
+#define IS_GEN9_LP(xe) (0)
+#define IS_GEN9_BC(xe) (0)
+
+#define IS_TIGERLAKE_UY(xe) (xe && 0)
+#define IS_COMETLAKE_ULX(xe) (xe && 0)
+#define IS_COFFEELAKE_ULX(xe) (xe && 0)
+#define IS_KABYLAKE_ULX(xe) (xe && 0)
+#define IS_SKYLAKE_ULX(xe) (xe && 0)
+#define IS_HASWELL_ULX(xe) (xe && 0)
+#define IS_COMETLAKE_ULT(xe) (xe && 0)
+#define IS_COFFEELAKE_ULT(xe) (xe && 0)
+#define IS_KABYLAKE_ULT(xe) (xe && 0)
+#define IS_SKYLAKE_ULT(xe) (xe && 0)
+
+#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last))
+#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \
+	((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \
+	 IS_GRAPHICS_STEP(xe, first, last))
+#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last))
+
+/* XXX: No basedie stepping support yet */
+#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe))
+
+#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+
+/* FIXME: Add subplatform here */
+#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+
+#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10)
+#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11)
+#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12)
+#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU)
+#define IS_ICL_WITH_PORT_F(xe) (xe && 0)
+#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe))
+#define to_intel_bo(x) gem_to_xe_bo((x))
+#define mkwrite_device_info(xe) (INTEL_INFO(xe))
+
+#define HAS_128_BYTE_Y_TILING(xe) (xe || 1)
+
+#define intel_has_gpu_reset(a) (a && 0)
+
+#include "intel_wakeref.h"
+
+static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	if (xe_pm_runtime_get(xe) < 0) {
+		xe_pm_runtime_put(xe);
+		return false;
+	}
+	return true;
+}
+
+static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	return xe_pm_runtime_get_if_active(xe);
+}
+
+static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	xe_pm_runtime_put(xe);
+}
+
+static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref)
+{
+	if (wakeref)
+		intel_runtime_pm_put_unchecked(pm);
+}
+
+#define intel_runtime_pm_get_raw intel_runtime_pm_get
+#define intel_runtime_pm_put_raw intel_runtime_pm_put
+#define assert_rpm_wakelock_held(x) do { } while (0)
+#define assert_rpm_raw_wakeref_held(x) do { } while (0)
+
+#define intel_uncore_forcewake_get(x, y) do { } while (0)
+#define intel_uncore_forcewake_put(x, y) do { } while (0)
+
+#define intel_uncore_arm_unclaimed_mmio_detection(x) do { } while (0)
+
+#define I915_PRIORITY_DISPLAY 0
+struct i915_sched_attr {
+	int priority;
+};
+#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0)
+
+#define with_intel_runtime_pm(rpm, wf) \
+	for ((wf) = intel_runtime_pm_get(rpm); (wf); \
+	     intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+
+#define pdev_to_i915 pdev_to_xe_device
+#define RUNTIME_INFO(xe)		(&(xe)->info.i915_runtime)
+
+#define FORCEWAKE_ALL XE_FORCEWAKE_ALL
+#define HPD_STORM_DEFAULT_THRESHOLD 50
+
+#ifdef CONFIG_ARM64
+/*
+ * arm64 indirectly includes linux/rtc.h,
+ * which defines a irq_lock, so include it
+ * here before #define-ing it
+ */
+#include <linux/rtc.h>
+#endif
+
+#define irq_lock irq.lock
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
new file mode 100644
index 0000000000000..12c671fd52351
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_fixed.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
new file mode 100644
index 0000000000000..98e9dd78f670b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_GPU_ERROR_H_
+#define _I915_GPU_ERROR_H_
+
+struct drm_i915_error_state_buf;
+
+__printf(2, 3)
+static inline void
+i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
+{
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
new file mode 100644
index 0000000000000..61707a07f91fb
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_irq.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
new file mode 100644
index 0000000000000..8619ec015ad43
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_reg.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
new file mode 100644
index 0000000000000..723279c975b11
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_reg_defs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
new file mode 100644
index 0000000000000..d429d421ac701
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#define trace_i915_reg_rw(a...) do { } while (0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
new file mode 100644
index 0000000000000..1d7c4360e5c0a
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_utils.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
new file mode 100644
index 0000000000000..80b024d435dc4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_VGPU_H_
+#define _I915_VGPU_H_
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct i915_ggtt;
+
+static inline void intel_vgpu_detect(struct drm_i915_private *i915)
+{
+}
+static inline bool intel_vgpu_active(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline void intel_vgpu_register(struct drm_i915_private *i915)
+{
+}
+static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline int intel_vgt_balloon(struct i915_ggtt *ggtt)
+{
+	return 0;
+}
+static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt)
+{
+}
+
+#endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
new file mode 100644
index 0000000000000..e4bbdffcd5f5a
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef I915_VMA_H
+#define I915_VMA_H
+
+#include <uapi/drm/i915_drm.h>
+#include <drm/drm_mm.h>
+
+/* We don't want these from i915_drm.h in case of Xe */
+#undef I915_TILING_X
+#undef I915_TILING_Y
+#define I915_TILING_X 0
+#define I915_TILING_Y 0
+
+struct xe_bo;
+
+struct i915_vma {
+	struct xe_bo *bo, *dpt;
+	struct drm_mm_node node;
+};
+
+
+static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
+{
+	return vma->node.start;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
new file mode 100644
index 0000000000000..e7aaf50f54852
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/* XX: Figure out how to handle this vma mapping in xe */
+struct intel_remapped_plane_info {
+	/* in gtt pages */
+	u32 offset:31;
+	u32 linear:1;
+	union {
+		/* in gtt pages for !linear */
+		struct {
+			u16 width;
+			u16 height;
+			u16 src_stride;
+			u16 dst_stride;
+		};
+
+		/* in gtt pages for linear */
+		u32 size;
+	};
+} __packed;
+
+struct intel_remapped_info {
+	struct intel_remapped_plane_info plane[4];
+	/* in gtt pages */
+	u32 plane_alignment;
+} __packed;
+
+struct intel_rotation_info {
+	struct intel_remapped_plane_info plane[2];
+} __packed;
+
+enum i915_gtt_view_type {
+	I915_GTT_VIEW_NORMAL = 0,
+	I915_GTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
+	I915_GTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
+};
+
+static inline void assert_i915_gem_gtt_types(void)
+{
+	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 2 * sizeof(u32) + 8 * sizeof(u16));
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 5 * sizeof(u32) + 16 * sizeof(u16));
+
+	/* Check that rotation/remapped shares offsets for simplicity */
+	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+		     offsetof(struct intel_rotation_info, plane[0]));
+	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+		     offsetofend(struct intel_rotation_info, plane[1]));
+
+	/* As we encode the size of each branch inside the union into its type,
+	 * we have to be careful that each branch has a unique size.
+	 */
+	switch ((enum i915_gtt_view_type)0) {
+	case I915_GTT_VIEW_NORMAL:
+	case I915_GTT_VIEW_ROTATED:
+	case I915_GTT_VIEW_REMAPPED:
+		/* gcc complains if these are identical cases */
+		break;
+	}
+}
+
+struct i915_gtt_view {
+	enum i915_gtt_view_type type;
+	union {
+		/* Members need to contain no holes/padding */
+		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
+	};
+};
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
new file mode 100644
index 0000000000000..ce986f0e8f38d
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_clock_gating.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
new file mode 100644
index 0000000000000..55b3169853401
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_mchbar_regs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
new file mode 100644
index 0000000000000..8c15867fd613d
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_pci_config.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
new file mode 100644
index 0000000000000..0c47661bdc6a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_PCODE_H__
+#define __INTEL_PCODE_H__
+
+#include "intel_uncore.h"
+#include "xe_pcode.h"
+
+static inline int
+snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val,
+			int fast_timeout_us, int slow_timeout_ms)
+{
+	return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val,
+				      slow_timeout_ms ?: 1);
+}
+
+static inline int
+snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val)
+{
+
+	return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val);
+}
+
+static inline int
+snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1)
+{
+	return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1);
+}
+
+static inline int
+skl_pcode_request(struct intel_uncore *uncore, u32 mbox,
+		  u32 request, u32 reply_mask, u32 reply,
+		  int timeout_base_ms)
+{
+	return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply,
+				timeout_base_ms);
+}
+
+#endif /* __INTEL_PCODE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
new file mode 100644
index 0000000000000..f7f3286e2c534
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_wakeref.h"
+
+enum i915_drm_suspend_mode {
+	I915_DRM_SUSPEND_IDLE,
+	I915_DRM_SUSPEND_MEM,
+	I915_DRM_SUSPEND_HIBERNATE,
+};
+
+#define intel_runtime_pm xe_runtime_pm
+
+static inline void disable_rpm_wakeref_asserts(void *rpm)
+{
+}
+
+static inline void enable_rpm_wakeref_asserts(void *rpm)
+{
+}
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
new file mode 100644
index 0000000000000..0006ef812346b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_STEP_H__
+#define __INTEL_STEP_H__
+
+#include "xe_device_types.h"
+#include "xe_step.h"
+
+#define intel_display_step_name xe_display_step_name
+
+static inline
+const char *xe_display_step_name(struct xe_device *xe)
+{
+	return xe_step_name(xe->info.step.display);
+}
+
+#endif /* __INTEL_STEP_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
new file mode 100644
index 0000000000000..0097453289926
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_UC_FW_H_
+#define _INTEL_UC_FW_H_
+
+#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
new file mode 100644
index 0000000000000..cd26ddc0f69e2
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_UNCORE_H__
+#define __INTEL_UNCORE_H__
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_mmio.h"
+
+static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+
+	return xe_root_mmio_gt(xe);
+}
+
+static inline u32 intel_uncore_read(struct intel_uncore *uncore,
+				    i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u32 intel_uncore_read8(struct intel_uncore *uncore,
+				     i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u32 intel_uncore_read16(struct intel_uncore *uncore,
+				      i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u64
+intel_uncore_read64_2x32(struct intel_uncore *uncore,
+			 i915_reg_t i915_lower_reg, i915_reg_t i915_upper_reg)
+{
+	struct xe_reg lower_reg = XE_REG(i915_mmio_reg_offset(i915_lower_reg));
+	struct xe_reg upper_reg = XE_REG(i915_mmio_reg_offset(i915_upper_reg));
+	u32 upper, lower, old_upper;
+	int loop = 0;
+
+	upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+	do {
+		old_upper = upper;
+		lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg);
+		upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+	} while (upper != old_upper && loop++ < 2);
+
+	return (u64)upper << 32 | lower;
+}
+
+static inline void intel_uncore_posting_read(struct intel_uncore *uncore,
+					     i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write(struct intel_uncore *uncore,
+				      i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
+				   i915_reg_t i915_reg, u32 clear, u32 set)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set);
+}
+
+static inline int intel_wait_for_register(struct intel_uncore *uncore,
+					  i915_reg_t i915_reg, u32 mask,
+					  u32 value, unsigned int timeout)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      timeout * USEC_PER_MSEC, NULL, false);
+}
+
+static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
+					     i915_reg_t i915_reg, u32 mask,
+					     u32 value, unsigned int timeout)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      timeout * USEC_PER_MSEC, NULL, false);
+}
+
+static inline int
+__intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
+			  u32 mask, u32 value, unsigned int fast_timeout_us,
+			  unsigned int slow_timeout_ms, u32 *out_value)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      fast_timeout_us + 1000 * slow_timeout_ms,
+			      out_value, false);
+}
+
+static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
+				       i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
+					 i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
+					    i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
+					      i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+
+	return xe_device_get_root_tile(xe)->mmio.regs;
+}
+
+/*
+ * The raw_reg_{read,write} macros are intended as a micro-optimization for
+ * interrupt handlers so that the pointer indirection on uncore->regs can
+ * be computed once (and presumably cached in a register) instead of generating
+ * extra load instructions for each MMIO access.
+ *
+ * Given that these macros are only intended for non-GSI interrupt registers
+ * (and the goal is to avoid extra instructions generated by the compiler),
+ * these macros do not account for uncore->gsi_offset.  Any caller that needs
+ * to use these macros on a GSI register is responsible for adding the
+ * appropriate GSI offset to the 'base' parameter.
+ */
+#define raw_reg_read(base, reg) \
+	readl(base + i915_mmio_reg_offset(reg))
+#define raw_reg_write(base, reg, value) \
+	writel(value, base + i915_mmio_reg_offset(reg))
+
+#endif /* __INTEL_UNCORE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
new file mode 100644
index 0000000000000..1c5e30cf10caa
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+typedef bool intel_wakeref_t;
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
new file mode 100644
index 0000000000000..c2c30ece8f779
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_PXP_H__
+#define __INTEL_PXP_H__
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct intel_pxp;
+
+static inline int intel_pxp_key_check(struct intel_pxp *pxp,
+				      struct drm_i915_gem_object *obj,
+				      bool assign)
+{
+	return -ENODEV;
+}
+
+static inline bool
+i915_gem_object_is_protected(const struct drm_i915_gem_object *obj)
+{
+	return false;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
new file mode 100644
index 0000000000000..65707e20c5576
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_dram.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
new file mode 100644
index 0000000000000..33c5257b3a71e
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_gmch.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
new file mode 100644
index 0000000000000..9c46556d33a40
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_pch.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
new file mode 100644
index 0000000000000..ec6f12de57274
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2013-2021 Intel Corporation
+ */
+
+#ifndef _VLV_SIDEBAND_H_
+#define _VLV_SIDEBAND_H_
+
+#include <linux/types.h>
+
+#include "vlv_sideband_reg.h"
+
+enum pipe;
+struct drm_i915_private;
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports)
+{
+}
+static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_iosf_sb_write(struct drm_i915_private *i915,
+				     u8 port, u32 reg, u32 val)
+{
+}
+static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports)
+{
+}
+static inline void vlv_bunit_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_bunit_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_cck_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_cck_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_ccu_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_ccu_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_dpio_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg)
+{
+	return 0;
+}
+static inline void vlv_dpio_write(struct drm_i915_private *i915,
+				  int pipe, int reg, u32 val)
+{
+}
+static inline void vlv_dpio_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_flisdsi_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_flisdsi_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_nc_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr)
+{
+	return 0;
+}
+static inline void vlv_nc_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_punit_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr)
+{
+	return 0;
+}
+static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val)
+{
+	return 0;
+}
+static inline void vlv_punit_put(struct drm_i915_private *i915)
+{
+}
+
+#endif /* _VLV_SIDEBAND_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
new file mode 100644
index 0000000000000..949f134ce3cf3
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/vlv_sideband_reg.h"
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
new file mode 100644
index 0000000000000..bee191a4a97df
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "intel_uncore.h"
+
+void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
+		    i915_reg_t iir, i915_reg_t ier)
+{
+	intel_uncore_write(uncore, imr, 0xffffffff);
+	intel_uncore_posting_read(uncore, imr);
+
+	intel_uncore_write(uncore, ier, 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	intel_uncore_write(uncore, iir, 0xffffffff);
+	intel_uncore_posting_read(uncore, iir);
+	intel_uncore_write(uncore, iir, 0xffffffff);
+	intel_uncore_posting_read(uncore, iir);
+}
+
+/*
+ * We should clear IMR at preinstall/uninstall, and just check at postinstall.
+ */
+void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+	u32 val = intel_uncore_read(uncore, reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&xe->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 i915_mmio_reg_offset(reg), val);
+	intel_uncore_write(uncore, reg, 0xffffffff);
+	intel_uncore_posting_read(uncore, reg);
+	intel_uncore_write(uncore, reg, 0xffffffff);
+	intel_uncore_posting_read(uncore, reg);
+}
+
+void gen3_irq_init(struct intel_uncore *uncore,
+		   i915_reg_t imr, u32 imr_val,
+		   i915_reg_t ier, u32 ier_val,
+		   i915_reg_t iir)
+{
+	gen3_assert_iir_is_zero(uncore, iir);
+
+	intel_uncore_write(uncore, ier, ier_val);
+	intel_uncore_write(uncore, imr, imr_val);
+	intel_uncore_posting_read(uncore, imr);
+}
+
+bool intel_irqs_enabled(struct xe_device *xe)
+{
+	/*
+	 * XXX: i915 has a racy handling of the irq.enabled, since it doesn't
+	 * lock its transitions. Because of that, the irq.enabled sometimes
+	 * is not read with the irq.lock in place.
+	 * However, the most critical cases like vblank and page flips are
+	 * properly using the locks.
+	 * We cannot take the lock in here or run any kind of assert because
+	 * of i915 inconsistency.
+	 * But at this point the xe irq is better protected against races,
+	 * although the full solution would be protecting the i915 side.
+	 */
+	return xe->irq.enabled;
+}
+
+void intel_synchronize_irq(struct xe_device *xe)
+{
+	synchronize_irq(to_pci_dev(xe->drm.dev)->irq);
+}
diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c
new file mode 100644
index 0000000000000..981edc2788bc3
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+bool i915_vtd_active(struct drm_i915_private *i915)
+{
+	if (device_iommu_mapped(i915->drm.dev))
+		return true;
+
+	/* Running as a guest, we assume the host is enforcing VT'd */
+	return i915_run_as_guest();
+}
+
+/* i915 specific, just put here for shutting it up */
+int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
+			      const char *func, int line)
+{
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
new file mode 100644
index 0000000000000..b21da7b745a5e
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_modeset_helper.h>
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_fb_bo.h"
+
+void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
+{
+	if (bo->flags & XE_BO_CREATE_PINNED_BIT) {
+		/* Unpin our kernel fb first */
+		xe_bo_lock(bo, false);
+		xe_bo_unpin(bo);
+		xe_bo_unlock(bo);
+	}
+	xe_bo_put(bo);
+}
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct xe_bo *bo,
+				 struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_private *i915 = to_i915(bo->ttm.base.dev);
+	int ret;
+
+	xe_bo_get(bo);
+
+	ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
+	if (ret)
+		return ret;
+
+	if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
+		/*
+		 * XE_BO_SCANOUT_BIT should ideally be set at creation, or is
+		 * automatically set when creating FB. We cannot change caching
+		 * mode when the boect is VM_BINDed, so we can only set
+		 * coherency with display when unbound.
+		 */
+		if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
+			ttm_bo_unreserve(&bo->ttm);
+			return -EINVAL;
+		}
+		bo->flags |= XE_BO_SCANOUT_BIT;
+	}
+	ttm_bo_unreserve(&bo->ttm);
+
+	return ret;
+}
+
+struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+					  struct drm_file *filp,
+					  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_gem_object *bo;
+	struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
+
+	if (!gem)
+		return ERR_PTR(-ENOENT);
+
+	bo = gem_to_xe_bo(gem);
+	/* Require vram placement or dma-buf import */
+	if (IS_DGFX(i915) &&
+	    !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) &&
+	    bo->ttm.type != ttm_bo_type_sg) {
+		drm_gem_object_put(gem);
+		return ERR_PTR(-EREMOTE);
+	}
+
+	return bo;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h
new file mode 100644
index 0000000000000..5d365b925b7ad
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __INTEL_FB_BO_H__
+#define __INTEL_FB_BO_H__
+
+struct drm_file;
+struct drm_mode_fb_cmd2;
+struct drm_i915_private;
+struct intel_framebuffer;
+struct xe_bo;
+
+void intel_fb_bo_framebuffer_fini(struct xe_bo *bo);
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct xe_bo *bo,
+				 struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+					  struct drm_file *filp,
+					  const struct drm_mode_fb_cmd2 *mode_cmd);
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
new file mode 100644
index 0000000000000..51ae3561fd0de
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_fbdev_fb.h"
+
+#include <drm/drm_fb_helper.h>
+
+#include "xe_gt.h"
+#include "xe_ttm_stolen_mgr.h"
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_framebuffer *fb;
+	struct drm_device *dev = helper->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct drm_i915_gem_object *obj;
+	int size;
+
+	/* we don't do packed 24bpp */
+	if (sizes->surface_bpp == 24)
+		sizes->surface_bpp = 32;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
+				    DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	size = PAGE_ALIGN(size);
+	obj = ERR_PTR(-ENODEV);
+
+	if (!IS_DGFX(dev_priv)) {
+		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv),
+					   NULL, size,
+					   ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
+					   XE_BO_CREATE_STOLEN_BIT |
+					   XE_BO_CREATE_PINNED_BIT);
+		if (!IS_ERR(obj))
+			drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n");
+		else
+			drm_info(&dev_priv->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj));
+	}
+	if (IS_ERR(obj)) {
+		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size,
+					  ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
+					  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
+					  XE_BO_CREATE_PINNED_BIT);
+	}
+
+	if (IS_ERR(obj)) {
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		fb = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	fb = intel_framebuffer_create(obj, &mode_cmd);
+	if (IS_ERR(fb)) {
+		xe_bo_unpin_map_no_vm(obj);
+		goto err;
+	}
+
+	drm_gem_object_put(intel_bo_to_drm_bo(obj));
+	return fb;
+
+err:
+	return fb;
+}
+
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			      struct drm_i915_gem_object *obj, struct i915_vma *vma)
+{
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+
+	if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) {
+		if (obj->flags & XE_BO_CREATE_STOLEN_BIT)
+			info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0);
+		else
+			info->fix.smem_start =
+				pci_resource_start(pdev, 2) +
+				xe_bo_addr(obj, 0, XE_PAGE_SIZE);
+
+		info->fix.smem_len = obj->ttm.base.size;
+	} else {
+		/* XXX: Pure fiction, as the BO may not be physically accessible.. */
+		info->fix.smem_start = 0;
+		info->fix.smem_len = obj->ttm.base.size;
+	}
+	XE_WARN_ON(iosys_map_is_null(&obj->vmap));
+
+	info->screen_base = obj->vmap.vaddr_iomem;
+	info->screen_size = intel_bo_to_drm_bo(obj)->size;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.h b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h
new file mode 100644
index 0000000000000..ea186772e0bb0
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_FBDEV_FB_H__
+#define __INTEL_FBDEV_FB_H__
+
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct fb_info;
+struct i915_vma;
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes);
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			      struct drm_i915_gem_object *obj, struct i915_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c
new file mode 100644
index 0000000000000..ab21c581c1920
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_rps.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_rps.h"
+
+void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc,
+					  struct dma_fence *fence)
+{
+}
+
+void intel_display_rps_mark_interactive(struct drm_i915_private *i915,
+					struct intel_atomic_state *state,
+					bool interactive)
+{
+}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
new file mode 100644
index 0000000000000..67b956a6da8d3
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_dpt.h"
+#include "intel_fb.h"
+#include "intel_fb_pin.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+
+#include <drm/ttm/ttm_bo.h>
+
+static void
+write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs,
+		  u32 width, u32 height, u32 src_stride, u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u32 column, row;
+
+	/* TODO: Maybe rewrite so we can traverse the bo addresses sequentially,
+	 * by writing dpt/ggtt in a different order?
+	 */
+
+	for (column = 0; column < width; column++) {
+		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
+
+		for (row = 0; row < height; row++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			iosys_map_wr(map, *dpt_ofs, u64, pte);
+			*dpt_ofs += 8;
+			src_idx -= src_stride;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*dpt_ofs += (dst_stride - height) * 8;
+	}
+
+	/* Align to next page */
+	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
+}
+
+static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
+			       const struct i915_gtt_view *view,
+			       struct i915_vma *vma)
+{
+	struct xe_device *xe = to_xe_device(fb->base.dev);
+	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+	struct xe_ggtt *ggtt = tile0->mem.ggtt;
+	struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt;
+	u32 dpt_size, size = bo->ttm.base.size;
+
+	if (view->type == I915_GTT_VIEW_NORMAL)
+		dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE);
+	else
+		/* display uses 4K tiles instead of bytes here, convert to entries.. */
+		dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8,
+				 XE_PAGE_SIZE);
+
+	dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM0_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_STOLEN_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_SYSTEM_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		return PTR_ERR(dpt);
+
+	if (view->type == I915_GTT_VIEW_NORMAL) {
+		u32 x;
+
+		for (x = 0; x < size / XE_PAGE_SIZE; x++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
+		}
+	} else {
+		const struct intel_rotation_info *rot_info = &view->rotated;
+		u32 i, dpt_ofs = 0;
+
+		for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
+			write_dpt_rotated(bo, &dpt->vmap, &dpt_ofs,
+					  rot_info->plane[i].offset,
+					  rot_info->plane[i].width,
+					  rot_info->plane[i].height,
+					  rot_info->plane[i].src_stride,
+					  rot_info->plane[i].dst_stride);
+	}
+
+	vma->dpt = dpt;
+	vma->node = dpt->ggtt_node;
+	return 0;
+}
+
+static void
+write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo_ofs,
+		   u32 width, u32 height, u32 src_stride, u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u32 column, row;
+
+	for (column = 0; column < width; column++) {
+		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
+
+		for (row = 0; row < height; row++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte);
+			*ggtt_ofs += XE_PAGE_SIZE;
+			src_idx -= src_stride;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*ggtt_ofs += (dst_stride - height) * XE_PAGE_SIZE;
+	}
+}
+
+static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
+				const struct i915_gtt_view *view,
+				struct i915_vma *vma)
+{
+	struct xe_bo *bo = intel_fb_obj(&fb->base);
+	struct xe_device *xe = to_xe_device(fb->base.dev);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u32 align;
+	int ret;
+
+	/* TODO: Consider sharing framebuffer mapping?
+	 * embed i915_vma inside intel_framebuffer
+	 */
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	ret = mutex_lock_interruptible(&ggtt->lock);
+	if (ret)
+		goto out;
+
+	align = XE_PAGE_SIZE;
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		align = max_t(u32, align, SZ_64K);
+
+	if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
+		vma->node = bo->ggtt_node;
+	} else if (view->type == I915_GTT_VIEW_NORMAL) {
+		u32 x, size = bo->ttm.base.size;
+
+		ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
+							 align, 0);
+		if (ret)
+			goto out_unlock;
+
+		for (x = 0; x < size; x += XE_PAGE_SIZE) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			xe_ggtt_set_pte(ggtt, vma->node.start + x, pte);
+		}
+	} else {
+		u32 i, ggtt_ofs;
+		const struct intel_rotation_info *rot_info = &view->rotated;
+
+		/* display seems to use tiles instead of bytes here, so convert it back.. */
+		u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE;
+
+		ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
+							 align, 0);
+		if (ret)
+			goto out_unlock;
+
+		ggtt_ofs = vma->node.start;
+
+		for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
+			write_ggtt_rotated(bo, ggtt, &ggtt_ofs,
+					   rot_info->plane[i].offset,
+					   rot_info->plane[i].width,
+					   rot_info->plane[i].height,
+					   rot_info->plane[i].src_stride,
+					   rot_info->plane[i].dst_stride);
+	}
+
+	xe_ggtt_invalidate(ggtt);
+out_unlock:
+	mutex_unlock(&ggtt->lock);
+out:
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+	return ret;
+}
+
+static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
+					const struct i915_gtt_view *view)
+{
+	struct drm_device *dev = fb->base.dev;
+	struct xe_device *xe = to_xe_device(dev);
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	struct xe_bo *bo = intel_fb_obj(&fb->base);
+	int ret;
+
+	if (!vma)
+		return ERR_PTR(-ENODEV);
+
+	/* Remapped view is only required on ADL-P, which xe doesn't support. */
+	if (XE_WARN_ON(view->type == I915_GTT_VIEW_REMAPPED)) {
+		ret = -ENODEV;
+		goto err;
+	}
+
+	/*
+	 * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
+	 * assumptions are incorrect for framebuffers
+	 */
+	ret = ttm_bo_reserve(&bo->ttm, false, false, NULL);
+	if (ret)
+		goto err;
+
+	if (IS_DGFX(xe))
+		ret = xe_bo_migrate(bo, XE_PL_VRAM0);
+	else
+		ret = xe_bo_validate(bo, NULL, true);
+	if (!ret)
+		ttm_bo_pin(&bo->ttm);
+	ttm_bo_unreserve(&bo->ttm);
+	if (ret)
+		goto err;
+
+	vma->bo = bo;
+	if (intel_fb_uses_dpt(&fb->base))
+		ret = __xe_pin_fb_vma_dpt(fb, view, vma);
+	else
+		ret = __xe_pin_fb_vma_ggtt(fb, view, vma);
+	if (ret)
+		goto err_unpin;
+
+	return vma;
+
+err_unpin:
+	ttm_bo_reserve(&bo->ttm, false, false, NULL);
+	ttm_bo_unpin(&bo->ttm);
+	ttm_bo_unreserve(&bo->ttm);
+err:
+	kfree(vma);
+	return ERR_PTR(ret);
+}
+
+static void __xe_unpin_fb_vma(struct i915_vma *vma)
+{
+	struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+
+	if (vma->dpt)
+		xe_bo_unpin_map_no_vm(vma->dpt);
+	else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
+		 vma->bo->ggtt_node.start != vma->node.start)
+		xe_ggtt_remove_node(ggtt, &vma->node);
+
+	ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
+	ttm_bo_unpin(&vma->bo->ttm);
+	ttm_bo_unreserve(&vma->bo->ttm);
+	kfree(vma);
+}
+
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
+			   bool phys_cursor,
+			   const struct i915_gtt_view *view,
+			   bool uses_fence,
+			   unsigned long *out_flags)
+{
+	*out_flags = 0;
+
+	return __xe_pin_fb_vma(to_intel_framebuffer(fb), view);
+}
+
+void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
+{
+	__xe_unpin_fb_vma(vma);
+}
+
+int intel_plane_pin_fb(struct intel_plane_state *plane_state)
+{
+	struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct xe_bo *bo = intel_fb_obj(fb);
+	struct i915_vma *vma;
+
+	/* We reject creating !SCANOUT fb's, so this is weird.. */
+	drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT));
+
+	vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	plane_state->ggtt_vma = vma;
+	return 0;
+}
+
+void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
+{
+	__xe_unpin_fb_vma(old_plane_state->ggtt_vma);
+	old_plane_state->ggtt_vma = NULL;
+}
+
+/*
+ * For Xe introduce dummy intel_dpt_create which just return NULL and
+ * intel_dpt_destroy which does nothing.
+ */
+struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
+{
+	return NULL;
+}
+
+void intel_dpt_destroy(struct i915_address_space *vm)
+{
+	return;
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
new file mode 100644
index 0000000000000..0453293af8efe
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "i915_drv.h"
+#include "intel_hdcp_gsc.h"
+
+int intel_hdcp_gsc_init(struct drm_i915_private *i915)
+{
+	drm_info(&i915->drm, "HDCP support not yet implemented\n");
+	return -ENODEV;
+}
+
+void intel_hdcp_gsc_fini(struct drm_i915_private *i915)
+{
+}
+
+ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
+				size_t msg_in_len, u8 *msg_out,
+				size_t msg_out_len)
+{
+	return -ENODEV;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
new file mode 100644
index 0000000000000..ccf83c12b545a
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+/* for ioread64 */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "xe_ggtt.h"
+
+#include "i915_drv.h"
+#include "intel_atomic_plane.h"
+#include "intel_display.h"
+#include "intel_display_types.h"
+#include "intel_fb.h"
+#include "intel_fb_pin.h"
+#include "intel_frontbuffer.h"
+#include "intel_plane_initial.h"
+
+static bool
+intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
+			      const struct intel_initial_plane_config *plane_config,
+			      struct drm_framebuffer **fb)
+{
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(&i915->drm, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane =
+			to_intel_plane(crtc->base.primary);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
+
+		if (!crtc_state->uapi.active)
+			continue;
+
+		if (!plane_state->ggtt_vma)
+			continue;
+
+		if (intel_plane_ggtt_offset(plane_state) == plane_config->base) {
+			*fb = plane_state->hw.fb;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static struct xe_bo *
+initial_plane_bo(struct xe_device *xe,
+		 struct intel_initial_plane_config *plane_config)
+{
+	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+	struct xe_bo *bo;
+	resource_size_t phys_base;
+	u32 base, size, flags;
+	u64 page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+
+	if (plane_config->size == 0)
+		return NULL;
+
+	flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT;
+
+	base = round_down(plane_config->base, page_size);
+	if (IS_DGFX(xe)) {
+		u64 __iomem *gte = tile0->mem.ggtt->gsm;
+		u64 pte;
+
+		gte += base / XE_PAGE_SIZE;
+
+		pte = ioread64(gte);
+		if (!(pte & XE_GGTT_PTE_DM)) {
+			drm_err(&xe->drm,
+				"Initial plane programming missing DM bit\n");
+			return NULL;
+		}
+
+		phys_base = pte & ~(page_size - 1);
+		flags |= XE_BO_CREATE_VRAM0_BIT;
+
+		/*
+		 * We don't currently expect this to ever be placed in the
+		 * stolen portion.
+		 */
+		if (phys_base >= tile0->mem.vram.usable_size) {
+			drm_err(&xe->drm,
+				"Initial plane programming using invalid range, phys_base=%pa\n",
+				&phys_base);
+			return NULL;
+		}
+
+		drm_dbg(&xe->drm,
+			"Using phys_base=%pa, based on initial plane programming\n",
+			&phys_base);
+	} else {
+		struct ttm_resource_manager *stolen = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+
+		if (!stolen)
+			return NULL;
+		phys_base = base;
+		flags |= XE_BO_CREATE_STOLEN_BIT;
+
+		/*
+		 * If the FB is too big, just don't use it since fbdev is not very
+		 * important and we should probably use that space with FBC or other
+		 * features.
+		 */
+		if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) &&
+		    plane_config->size * 2 >> PAGE_SHIFT >= stolen->size)
+			return NULL;
+	}
+
+	size = round_up(plane_config->base + plane_config->size,
+			page_size);
+	size -= base;
+
+	bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base,
+				     ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo)) {
+		drm_dbg(&xe->drm,
+			"Failed to create bo phys_base=%pa size %u with flags %x: %li\n",
+			&phys_base, size, flags, PTR_ERR(bo));
+		return NULL;
+	}
+
+	return bo;
+}
+
+static bool
+intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
+			      struct intel_initial_plane_config *plane_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
+	struct drm_framebuffer *fb = &plane_config->fb->base;
+	struct xe_bo *bo;
+
+	switch (fb->modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+	case I915_FORMAT_MOD_X_TILED:
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_4_TILED:
+		break;
+	default:
+		drm_dbg(&dev_priv->drm,
+			"Unsupported modifier for initial FB: 0x%llx\n",
+			fb->modifier);
+		return false;
+	}
+
+	mode_cmd.pixel_format = fb->format->format;
+	mode_cmd.width = fb->width;
+	mode_cmd.height = fb->height;
+	mode_cmd.pitches[0] = fb->pitches[0];
+	mode_cmd.modifier[0] = fb->modifier;
+	mode_cmd.flags = DRM_MODE_FB_MODIFIERS;
+
+	bo = initial_plane_bo(dev_priv, plane_config);
+	if (!bo)
+		return false;
+
+	if (intel_framebuffer_init(to_intel_framebuffer(fb),
+				   bo, &mode_cmd)) {
+		drm_dbg_kms(&dev_priv->drm, "intel fb init failed\n");
+		goto err_bo;
+	}
+	/* Reference handed over to fb */
+	xe_bo_put(bo);
+
+	return true;
+
+err_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return false;
+}
+
+static void
+intel_find_initial_plane_obj(struct intel_crtc *crtc,
+			     struct intel_initial_plane_config *plane_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *plane =
+		to_intel_plane(crtc->base.primary);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct drm_framebuffer *fb;
+	struct i915_vma *vma;
+
+	/*
+	 * TODO:
+	 *   Disable planes if get_initial_plane_config() failed.
+	 *   Make sure things work if the surface base is not page aligned.
+	 */
+	if (!plane_config->fb)
+		return;
+
+	if (intel_alloc_initial_plane_obj(crtc, plane_config))
+		fb = &plane_config->fb->base;
+	else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb))
+		goto nofb;
+
+	plane_state->uapi.rotation = plane_config->rotation;
+	intel_fb_fill_view(to_intel_framebuffer(fb),
+			   plane_state->uapi.rotation, &plane_state->view);
+
+	vma = intel_pin_and_fence_fb_obj(fb, false, &plane_state->view.gtt,
+					 false, &plane_state->flags);
+	if (IS_ERR(vma))
+		goto nofb;
+
+	plane_state->ggtt_vma = vma;
+	plane_state->uapi.src_x = 0;
+	plane_state->uapi.src_y = 0;
+	plane_state->uapi.src_w = fb->width << 16;
+	plane_state->uapi.src_h = fb->height << 16;
+
+	plane_state->uapi.crtc_x = 0;
+	plane_state->uapi.crtc_y = 0;
+	plane_state->uapi.crtc_w = fb->width;
+	plane_state->uapi.crtc_h = fb->height;
+
+	plane_state->uapi.fb = fb;
+	drm_framebuffer_get(fb);
+
+	plane_state->uapi.crtc = &crtc->base;
+	intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc);
+
+	atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits);
+
+	plane_config->vma = vma;
+
+	/*
+	 * Flip to the newly created mapping ASAP, so we can re-use the
+	 * first part of GGTT for WOPCM, prevent flickering, and prevent
+	 * the lookup of sysmem scratch pages.
+	 */
+	plane->check_plane(crtc_state, plane_state);
+	plane->async_flip(plane, crtc_state, plane_state, true);
+	return;
+
+nofb:
+	/*
+	 * We've failed to reconstruct the BIOS FB.  Current display state
+	 * indicates that the primary plane is visible, but has a NULL FB,
+	 * which will lead to problems later if we don't fix it up.  The
+	 * simplest solution is to just disable the primary plane now and
+	 * pretend the BIOS never had it enabled.
+	 */
+	intel_plane_disable_noatomic(crtc, plane);
+}
+
+static void plane_config_fini(struct intel_initial_plane_config *plane_config)
+{
+	if (plane_config->fb) {
+		struct drm_framebuffer *fb = &plane_config->fb->base;
+
+		/* We may only have the stub and not a full framebuffer */
+		if (drm_framebuffer_read_refcount(fb))
+			drm_framebuffer_put(fb);
+		else
+			kfree(fb);
+	}
+}
+
+void intel_crtc_initial_plane_config(struct intel_crtc *crtc)
+{
+	struct xe_device *xe = to_xe_device(crtc->base.dev);
+	struct intel_initial_plane_config plane_config = {};
+
+	/*
+	 * Note that reserving the BIOS fb up front prevents us
+	 * from stuffing other stolen allocations like the ring
+	 * on top.  This prevents some ugliness at boot time, and
+	 * can even allow for smooth boot transitions if the BIOS
+	 * fb is large enough for the active pipe configuration.
+	 */
+	xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config);
+
+	/*
+	 * If the fb is shared between multiple heads, we'll
+	 * just get the first one.
+	 */
+	intel_find_initial_plane_obj(crtc, &plane_config);
+
+	plane_config_fini(&plane_config);
+}
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 5078a9e698599..6e20fc2de9ffc 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -6,7 +6,7 @@
 #ifndef _XE_REG_DEFS_H_
 #define _XE_REG_DEFS_H_
 
-#include "../../i915/i915_reg_defs.h"
+#include "compat-i915-headers/i915_reg_defs.h"
 
 /**
  * struct xe_reg - Register definition
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 924f7c949d558..ec9372aa739f0 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -56,19 +56,6 @@
 #define GU_MISC_IRQ_OFFSET			0x444f0
 #define   GU_MISC_GSE				REG_BIT(27)
 
-#define TRANSCODER_A_OFFSET			0x60000
-#define TRANSCODER_B_OFFSET			0x61000
-#define TRANSCODER_C_OFFSET			0x62000
-#define TRANSCODER_D_OFFSET			0x63000
-#define TRANSCODER_DSI0_OFFSET			0x6b000
-#define TRANSCODER_DSI1_OFFSET			0x6b800
-#define PIPE_A_OFFSET				0x70000
-#define PIPE_B_OFFSET				0x71000
-#define PIPE_C_OFFSET				0x72000
-#define PIPE_D_OFFSET				0x73000
-#define PIPE_DSI0_OFFSET			0x7b000
-#define PIPE_DSI1_OFFSET			0x7b800
-
 #define SOFTWARE_FLAGS_SPR33			XE_REG(0x4f084)
 
 #define GU_CNTL_PROTECTED			XE_REG(0x10100C)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index b96d1e7b9badb..c23a5694a788e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1400,9 +1400,9 @@ xe_bo_create_locked_range(struct xe_device *xe,
 
 		xe_assert(xe, tile);
 
-		if (flags & XE_BO_CREATE_STOLEN_BIT &&
-		    flags & XE_BO_FIXED_PLACEMENT_BIT) {
-			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, start);
+		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
+			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
+						   start + bo->size, U64_MAX);
 		} else {
 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
 		}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5869ba7e0cdc1..98d7e7fa12d84 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -18,6 +18,7 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_debugfs.h"
+#include "xe_display.h"
 #include "xe_dma_buf.h"
 #include "xe_drm_client.h"
 #include "xe_drv.h"
@@ -190,6 +191,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
 	if (xe->ordered_wq)
 		destroy_workqueue(xe->ordered_wq);
 
+	if (xe->unordered_wq)
+		destroy_workqueue(xe->unordered_wq);
+
 	ttm_device_fini(&xe->ttm);
 }
 
@@ -199,6 +203,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	struct xe_device *xe;
 	int err;
 
+	xe_display_driver_set_hooks(&driver);
+
 	err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
 	if (err)
 		return ERR_PTR(err);
@@ -237,14 +243,16 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	INIT_LIST_HEAD(&xe->pinned.evicted);
 
 	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
-	if (!xe->ordered_wq) {
-		drm_err(&xe->drm, "Failed to allocate xe-ordered-wq\n");
+	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+	if (!xe->ordered_wq || !xe->unordered_wq) {
+		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
 		err = -ENOMEM;
 		goto err_put;
 	}
 
-	drmm_mutex_init(&xe->drm, &xe->sb_lock);
-	xe->enabled_irq_mask = ~0;
+	err = xe_display_create(xe);
+	if (WARN_ON(err))
+		goto err_put;
 
 	return xe;
 
@@ -346,6 +354,9 @@ int xe_device_probe(struct xe_device *xe)
 	xe_pat_init_early(xe);
 
 	xe->info.mem_region_mask = 1;
+	err = xe_display_init_nommio(xe);
+	if (err)
+		return err;
 
 	for_each_tile(tile, xe, id) {
 		err = xe_tile_alloc(tile);
@@ -367,10 +378,14 @@ int xe_device_probe(struct xe_device *xe)
 			return err;
 	}
 
-	err = xe_irq_install(xe);
+	err = xe_display_init_noirq(xe);
 	if (err)
 		return err;
 
+	err = xe_irq_install(xe);
+	if (err)
+		goto err;
+
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_init_early(gt);
 		if (err)
@@ -392,6 +407,16 @@ int xe_device_probe(struct xe_device *xe)
 	/* Allocate and map stolen after potential VRAM resize */
 	xe_ttm_stolen_mgr_init(xe);
 
+	/*
+	 * Now that GT is initialized (TTM in particular),
+	 * we can try to init display, and inherit the initial fb.
+	 * This is the reason the first allocation needs to be done
+	 * inside display.
+	 */
+	err = xe_display_init_noaccel(xe);
+	if (err)
+		goto err_irq_shutdown;
+
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_init(gt);
 		if (err)
@@ -400,10 +425,16 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_heci_gsc_init(xe);
 
+	err = xe_display_init(xe);
+	if (err)
+		goto err_fini_display;
+
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
 		goto err_irq_shutdown;
 
+	xe_display_register(xe);
+
 	xe_debugfs_register(xe);
 
 	xe_pmu_register(&xe->pmu);
@@ -416,13 +447,30 @@ int xe_device_probe(struct xe_device *xe)
 
 	return 0;
 
+err_fini_display:
+	xe_display_driver_remove(xe);
+
 err_irq_shutdown:
 	xe_irq_shutdown(xe);
+err:
+	xe_display_fini(xe);
 	return err;
 }
 
+static void xe_device_remove_display(struct xe_device *xe)
+{
+	xe_display_unregister(xe);
+
+	drm_dev_unplug(&xe->drm);
+	xe_display_driver_remove(xe);
+}
+
 void xe_device_remove(struct xe_device *xe)
 {
+	xe_device_remove_display(xe);
+
+	xe_display_fini(xe);
+
 	xe_heci_gsc_fini(xe);
 
 	xe_irq_shutdown(xe);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4425c2484a02b..7607ee373605e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -20,6 +20,12 @@
 #include "xe_pmu.h"
 #include "xe_step_types.h"
 
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+#include "soc/intel_pch.h"
+#include "intel_display_core.h"
+#include "intel_display_device.h"
+#endif
+
 struct xe_ggtt;
 struct xe_pat_ops;
 
@@ -247,12 +253,20 @@ struct xe_device {
 		u8 has_llc:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
+		/** @enable_display: display enabled */
+		u8 enable_display:1;
 		/** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */
 		u8 bypass_mtcfg:1;
 		/** @supports_mmio_ext: supports MMIO extension/s */
 		u8 supports_mmio_ext:1;
 		/** @has_heci_gscfi: device has heci gscfi */
 		u8 has_heci_gscfi:1;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+		struct {
+			u32 rawclk_freq;
+		} i915_runtime;
+#endif
 	} info;
 
 	/** @irq: device interrupt state */
@@ -323,6 +337,9 @@ struct xe_device {
 	/** @ordered_wq: used to serialize compute mode resume */
 	struct workqueue_struct *ordered_wq;
 
+	/** @unordered_wq: used to serialize unordered work, mostly display */
+	struct workqueue_struct *unordered_wq;
+
 	/** @tiles: device tiles */
 	struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
 
@@ -391,10 +408,79 @@ struct xe_device {
 	/** @needs_flr_on_fini: requests function-reset on fini */
 	bool needs_flr_on_fini;
 
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+	/*
+	 * Any fields below this point are the ones used by display.
+	 * They are temporarily added here so xe_device can be desguised as
+	 * drm_i915_private during build. After cleanup these should go away,
+	 * migrating to the right sub-structs
+	 */
+	struct intel_display display;
+	enum intel_pch pch_type;
+	u16 pch_id;
+
+	struct dram_info {
+		bool wm_lv_0_adjust_needed;
+		u8 num_channels;
+		bool symmetric_memory;
+		enum intel_dram_type {
+			INTEL_DRAM_UNKNOWN,
+			INTEL_DRAM_DDR3,
+			INTEL_DRAM_DDR4,
+			INTEL_DRAM_LPDDR3,
+			INTEL_DRAM_LPDDR4,
+			INTEL_DRAM_DDR5,
+			INTEL_DRAM_LPDDR5,
+		} type;
+		u8 num_qgv_points;
+		u8 num_psf_gv_points;
+	} dram_info;
+
+	/*
+	 * edram size in MB.
+	 * Cannot be determined by PCIID. You must always read a register.
+	 */
+	u32 edram_size_mb;
+
+	/* To shut up runtime pm macros.. */
+	struct xe_runtime_pm {} runtime_pm;
+
 	/* For pcode */
 	struct mutex sb_lock;
 
+	/* Should be in struct intel_display */
+	u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state;
+	u8 snps_phy_failed_calibration;
+	struct drm_atomic_state *modeset_restore_state;
+	struct list_head global_obj_list;
+
+	union {
+		/* only to allow build, not used functionally */
+		u32 irq_mask;
+		u32 de_irq_mask[I915_MAX_PIPES];
+	};
+	u32 pipestat_irq_mask[I915_MAX_PIPES];
+
+	bool display_irqs_enabled;
 	u32 enabled_irq_mask;
+
+	struct intel_uncore {
+		spinlock_t lock;
+	} uncore;
+
+	/* only to allow build, not used functionally */
+	struct {
+		unsigned int hpll_freq;
+		unsigned int czclk_freq;
+		unsigned int fsb_freq, mem_freq, is_ddr3;
+		u8 vblank_enabled;
+	};
+	struct {
+		const char *dmc_firmware_path;
+	} params;
+
+	void *pxp;
+#endif
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
new file mode 100644
index 0000000000000..edfc7fce1ed31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_display.h"
+#include "regs/xe_regs.h"
+
+#include <linux/fb.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "soc/intel_dram.h"
+#include "i915_drv.h"		/* FIXME: HAS_DISPLAY() depends on this */
+#include "intel_acpi.h"
+#include "intel_audio.h"
+#include "intel_bw.h"
+#include "intel_display.h"
+#include "intel_display_driver.h"
+#include "intel_display_irq.h"
+#include "intel_display_types.h"
+#include "intel_dmc.h"
+#include "intel_dp.h"
+#include "intel_fbdev.h"
+#include "intel_hdcp.h"
+#include "intel_hotplug.h"
+#include "intel_opregion.h"
+#include "xe_module.h"
+
+/* Xe device functions */
+
+static bool has_display(struct xe_device *xe)
+{
+	return HAS_DISPLAY(xe);
+}
+
+/**
+ * xe_display_driver_probe_defer - Detect if we need to wait for other drivers
+ *				   early on
+ * @pdev: PCI device
+ *
+ * Returns: true if probe needs to be deferred, false otherwise
+ */
+bool xe_display_driver_probe_defer(struct pci_dev *pdev)
+{
+	if (!enable_display)
+		return 0;
+
+	return intel_display_driver_probe_defer(pdev);
+}
+
+static void xe_display_last_close(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (xe->info.enable_display)
+		intel_fbdev_restore_mode(to_xe_device(dev));
+}
+
+/**
+ * xe_display_driver_set_hooks - Add driver flags and hooks for display
+ * @driver: DRM device driver
+ *
+ * Set features and function hooks in @driver that are needed for driving the
+ * display IP. This sets the driver's capability of driving display, regardless
+ * if the device has it enabled
+ */
+void xe_display_driver_set_hooks(struct drm_driver *driver)
+{
+	if (!enable_display)
+		return;
+
+	driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
+	driver->lastclose = xe_display_last_close;
+}
+
+static void unset_display_features(struct xe_device *xe)
+{
+	xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+}
+
+static void display_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	destroy_workqueue(xe->display.hotplug.dp_wq);
+}
+
+/**
+ * xe_display_create - create display struct
+ * @xe: XE device instance
+ *
+ * Initialize all fields used by the display part.
+ *
+ * TODO: once everything can be inside a single struct, make the struct opaque
+ * to the rest of xe and return it to be xe->display.
+ *
+ * Returns: 0 on success
+ */
+int xe_display_create(struct xe_device *xe)
+{
+	int err;
+
+	spin_lock_init(&xe->display.fb_tracking.lock);
+
+	xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+
+	drmm_mutex_init(&xe->drm, &xe->sb_lock);
+	drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
+	drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
+	xe->enabled_irq_mask = ~0;
+
+	err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_power_domains_cleanup(xe);
+}
+
+int xe_display_init_nommio(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	/* Fake uncore lock */
+	spin_lock_init(&xe->uncore.lock);
+
+	/* This must be called before any calls to HAS_PCH_* */
+	intel_detect_pch(xe);
+
+	err = intel_power_domains_init(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
+}
+
+static void xe_display_fini_noirq(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove_noirq(xe);
+	intel_power_domains_driver_remove(xe);
+}
+
+int xe_display_init_noirq(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	intel_display_driver_early_probe(xe);
+
+	/* Early display init.. */
+	intel_opregion_setup(xe);
+
+	/*
+	 * Fill the dram structure to get the system dram info. This will be
+	 * used for memory latency calculation.
+	 */
+	intel_dram_detect(xe);
+
+	intel_bw_init_hw(xe);
+
+	intel_display_device_info_runtime_init(xe);
+
+	err = intel_display_driver_probe_noirq(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noirq, NULL);
+}
+
+static void xe_display_fini_noaccel(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove_nogem(xe);
+}
+
+int xe_display_init_noaccel(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	err = intel_display_driver_probe_nogem(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noaccel, NULL);
+}
+
+int xe_display_init(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return 0;
+
+	return intel_display_driver_probe(xe);
+}
+
+void xe_display_fini(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	/* poll work can call into fbdev, hence clean that up afterwards */
+	intel_hpd_poll_fini(xe);
+	intel_fbdev_fini(xe);
+
+	intel_hdcp_component_fini(xe);
+	intel_audio_deinit(xe);
+}
+
+void xe_display_register(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_register(xe);
+	intel_register_dsm_handler();
+	intel_power_domains_enable(xe);
+}
+
+void xe_display_unregister(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_unregister_dsm_handler();
+	intel_power_domains_disable(xe);
+	intel_display_driver_unregister(xe);
+}
+
+void xe_display_driver_remove(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove(xe);
+
+	intel_display_device_remove(xe);
+}
+
+/* IRQ-related functions */
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (master_ctl & DISPLAY_IRQ)
+		gen11_display_irq_handler(xe);
+}
+
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (gu_misc_iir & GU_MISC_GSE)
+		intel_opregion_asle_intr(xe);
+}
+
+void xe_display_irq_reset(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	gen11_display_irq_reset(xe);
+}
+
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (gt->info.id == XE_GT0)
+		gen11_de_irq_postinstall(xe);
+}
+
+static void intel_suspend_encoders(struct xe_device *xe)
+{
+	struct drm_device *dev = &xe->drm;
+	struct intel_encoder *encoder;
+
+	if (has_display(xe))
+		return;
+
+	drm_modeset_lock_all(dev);
+	for_each_intel_encoder(dev, encoder)
+		if (encoder->suspend)
+			encoder->suspend(encoder);
+	drm_modeset_unlock_all(dev);
+}
+
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	/*
+	 * We do a lot of poking in a lot of registers, make sure they work
+	 * properly.
+	 */
+	intel_power_domains_disable(xe);
+	if (has_display(xe))
+		drm_kms_helper_poll_disable(&xe->drm);
+
+	intel_display_driver_suspend(xe);
+
+	intel_dp_mst_suspend(xe);
+
+	intel_hpd_cancel_work(xe);
+
+	intel_suspend_encoders(xe);
+
+	intel_opregion_suspend(xe, PCI_D3cold);
+
+	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+
+	intel_dmc_suspend(xe);
+}
+
+void xe_display_pm_suspend_late(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_power_domains_suspend(xe, I915_DRM_SUSPEND_MEM);
+
+	intel_display_power_suspend_late(xe);
+}
+
+void xe_display_pm_resume_early(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_power_resume_early(xe);
+
+	intel_power_domains_resume(xe);
+}
+
+void xe_display_pm_resume(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_dmc_resume(xe);
+
+	if (has_display(xe))
+		drm_mode_config_reset(&xe->drm);
+
+	intel_display_driver_init_hw(xe);
+	intel_hpd_init(xe);
+
+	/* MST sideband requires HPD interrupts enabled */
+	intel_dp_mst_resume(xe);
+	intel_display_driver_resume(xe);
+
+	intel_hpd_poll_disable(xe);
+	if (has_display(xe))
+		drm_kms_helper_poll_enable(&xe->drm);
+
+	intel_opregion_resume(xe);
+
+	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+
+	intel_power_domains_enable(xe);
+}
+
+void xe_display_probe(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		goto no_display;
+
+	intel_display_device_probe(xe);
+
+	if (has_display(xe))
+		return;
+
+no_display:
+	xe->info.enable_display = false;
+	unset_display_features(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/xe_display.h
new file mode 100644
index 0000000000000..710e56180b52d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_display.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DISPLAY_H_
+#define _XE_DISPLAY_H_
+
+#include "xe_device.h"
+
+struct drm_driver;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+
+bool xe_display_driver_probe_defer(struct pci_dev *pdev);
+void xe_display_driver_set_hooks(struct drm_driver *driver);
+void xe_display_driver_remove(struct xe_device *xe);
+
+int xe_display_create(struct xe_device *xe);
+
+void xe_display_probe(struct xe_device *xe);
+
+int xe_display_init_nommio(struct xe_device *xe);
+int xe_display_init_noirq(struct xe_device *xe);
+int xe_display_init_noaccel(struct xe_device *xe);
+int xe_display_init(struct xe_device *xe);
+void xe_display_fini(struct xe_device *xe);
+
+void xe_display_register(struct xe_device *xe);
+void xe_display_unregister(struct xe_device *xe);
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl);
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
+void xe_display_irq_reset(struct xe_device *xe);
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
+
+void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_resume_early(struct xe_device *xe);
+void xe_display_pm_resume(struct xe_device *xe);
+
+#else
+
+static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0; }
+static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { }
+static inline void xe_display_driver_remove(struct xe_device *xe) {}
+
+static inline int xe_display_create(struct xe_device *xe) { return 0; }
+
+static inline void xe_display_probe(struct xe_device *xe) { }
+
+static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; }
+static inline int xe_display_init(struct xe_device *xe) { return 0; }
+static inline void xe_display_fini(struct xe_device *xe) {}
+
+static inline void xe_display_register(struct xe_device *xe) {}
+static inline void xe_display_unregister(struct xe_device *xe) {}
+
+static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {}
+static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {}
+static inline void xe_display_irq_reset(struct xe_device *xe) {}
+static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
+
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
+
+#endif /* CONFIG_DRM_XE_DISPLAY */
+#endif /* _XE_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 1368616f20fa4..0e2a41837f169 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -338,9 +338,13 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 }
 
 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
-				  u64 start, u64 end, u64 alignment)
+				  u64 start, u64 end)
 {
 	int err;
+	u64 alignment = XE_PAGE_SIZE;
+
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		alignment = SZ_64K;
 
 	if (XE_WARN_ON(bo->ggtt_node.size)) {
 		/* Someone's already inserted this BO in the GGTT */
@@ -364,26 +368,15 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 	return err;
 }
 
-int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs)
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+			 u64 start, u64 end)
 {
-	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) {
-		if (XE_WARN_ON(!IS_ALIGNED(ofs, SZ_64K)) ||
-		    XE_WARN_ON(!IS_ALIGNED(bo->size, SZ_64K)))
-			return -EINVAL;
-	}
-
-	return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size, 0);
+	return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
 }
 
 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
-	u64 alignment;
-
-	alignment = XE_PAGE_SIZE;
-	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
-		alignment = SZ_64K;
-
-	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, alignment);
+	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
 }
 
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 04bb26b0938e2..3faa3c6d0375f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -24,7 +24,8 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
-int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs);
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+			 u64 start, u64 end);
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 
 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index ef26120e7aa42..c5315e02fc5bb 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -12,6 +12,7 @@
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_device.h"
+#include "xe_display.h"
 #include "xe_drv.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
@@ -351,10 +352,14 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 
 	gt_irq_handler(tile, master_ctl, intr_dw, identity);
 
+	xe_display_irq_handler(xe, master_ctl);
+
 	gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
 
 	xelp_intr_enable(xe, false);
 
+	xe_display_irq_enable(xe, gu_misc_iir);
+
 	xe_pmu_irq_stats(xe);
 
 	return IRQ_HANDLED;
@@ -444,11 +449,14 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		 * that get reported as Gunit GSE) would only be hooked up to
 		 * the primary tile.
 		 */
-		if (id == 0)
+		if (id == 0) {
+			xe_display_irq_handler(xe, master_ctl);
 			gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
+		}
 	}
 
 	dg1_intr_enable(xe, false);
+	xe_display_irq_enable(xe, gu_misc_iir);
 
 	xe_pmu_irq_stats(xe);
 
@@ -542,6 +550,7 @@ static void xe_irq_reset(struct xe_device *xe)
 
 	tile = xe_device_get_root_tile(xe);
 	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
+	xe_display_irq_reset(xe);
 
 	/*
 	 * The tile's top-level status register should be the last one
@@ -556,6 +565,8 @@ static void xe_irq_reset(struct xe_device *xe)
 
 static void xe_irq_postinstall(struct xe_device *xe)
 {
+	xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
+
 	/*
 	 * ASLE backlight operations are reported via GUnit GSE interrupts
 	 * on the root tile.
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 82817a46f8875..7194595e7f312 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -19,6 +19,10 @@ bool force_execlist = false;
 module_param_named_unsafe(force_execlist, force_execlist, bool, 0444);
 MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
 
+bool enable_display = true;
+module_param_named(enable_display, enable_display, bool, 0444);
+MODULE_PARM_DESC(enable_display, "Enable display");
+
 u32 xe_force_vram_bar_size;
 module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600);
 MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 40d89d4df87c4..05c6b6df8210b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -17,6 +17,7 @@
 #include "regs/xe_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_device.h"
+#include "xe_display.h"
 #include "xe_drv.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
@@ -55,6 +56,7 @@ struct xe_device_desc {
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
+	u8 has_display:1;
 	u8 has_heci_gscfi:1;
 
 	u8 has_llc:1;
@@ -62,6 +64,9 @@ struct xe_device_desc {
 	u8 supports_mmio_ext:1;
 };
 
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
 #define PLATFORM(x)		\
 	.platform = (x),	\
 	.platform_name = #x
@@ -205,7 +210,8 @@ static const struct xe_device_desc tgl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_TIGERLAKE),
-	.has_llc = 1,
+	.has_display = true,
+	.has_llc = true,
 	.require_force_probe = true,
 };
 
@@ -213,6 +219,7 @@ static const struct xe_device_desc rkl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ROCKETLAKE),
+	.has_display = true,
 	.has_llc = true,
 	.require_force_probe = true,
 };
@@ -223,7 +230,8 @@ static const struct xe_device_desc adl_s_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_S),
-	.has_llc = 1,
+	.has_display = true,
+	.has_llc = true,
 	.require_force_probe = true,
 	.subplatforms = (const struct xe_subplatform_desc[]) {
 		{ XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids },
@@ -237,7 +245,8 @@ static const struct xe_device_desc adl_p_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_P),
-	.has_llc = 1,
+	.has_display = true,
+	.has_llc = true,
 	.require_force_probe = true,
 	.subplatforms = (const struct xe_subplatform_desc[]) {
 		{ XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids },
@@ -249,7 +258,8 @@ static const struct xe_device_desc adl_n_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
 	PLATFORM(XE_ALDERLAKE_N),
-	.has_llc = 1,
+	.has_display = true,
+	.has_llc = true,
 	.require_force_probe = true,
 };
 
@@ -261,6 +271,7 @@ static const struct xe_device_desc dg1_desc = {
 	.media = &media_xem,
 	DGFX_FEATURES,
 	PLATFORM(XE_DG1),
+	.has_display = true,
 	.require_force_probe = true,
 	.has_heci_gscfi = 1,
 };
@@ -286,6 +297,7 @@ static const struct xe_device_desc ats_m_desc = {
 	.require_force_probe = true,
 
 	DG2_FEATURES,
+	.has_display = false,
 };
 
 static const struct xe_device_desc dg2_desc = {
@@ -294,12 +306,14 @@ static const struct xe_device_desc dg2_desc = {
 	.require_force_probe = true,
 
 	DG2_FEATURES,
+	.has_display = true,
 };
 
 static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.graphics = &graphics_xehpc,
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
+	.has_display = false,
 	.require_force_probe = true,
 	.has_heci_gscfi = 1,
 };
@@ -308,6 +322,7 @@ static const struct xe_device_desc mtl_desc = {
 	/* .graphics and .media determined via GMD_ID */
 	.require_force_probe = true,
 	PLATFORM(XE_METEORLAKE),
+	.has_display = true,
 };
 
 static const struct xe_device_desc lnl_desc = {
@@ -316,6 +331,7 @@ static const struct xe_device_desc lnl_desc = {
 };
 
 #undef PLATFORM
+__diag_pop();
 
 /* Map of GMD_ID values to graphics IP */
 static struct gmdid_map graphics_ip_map[] = {
@@ -574,6 +590,9 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
 
+	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
+				  enable_display &&
+				  desc->has_display;
 	/*
 	 * All platforms have at least one primary GT.  Any platform with media
 	 * version 13 or higher has an additional dedicated media GT.  And
@@ -668,6 +687,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENODEV;
 	}
 
+	if (xe_display_driver_probe_defer(pdev))
+		return -EPROBE_DEFER;
+
 	xe = xe_device_create(pdev, ent);
 	if (IS_ERR(xe))
 		return PTR_ERR(xe);
@@ -686,7 +708,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_pci_disable;
 
-	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d gscfi:%d",
+	xe_display_probe(xe);
+
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d",
 		desc->platform_name,
 		subplatform_desc ? subplatform_desc->name : "",
 		xe->info.devid, xe->info.revid,
@@ -697,6 +721,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		xe->info.media_name,
 		xe->info.media_verx100 / 100,
 		xe->info.media_verx100 % 100,
+		str_yes_no(xe->info.enable_display),
 		xe->info.dma_mask_size, xe->info.tile_count,
 		xe->info.has_heci_gscfi);
 
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 93a7658da324c..e31a91cf311c4 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -14,6 +14,7 @@
 #include "xe_bo_evict.h"
 #include "xe_device.h"
 #include "xe_device_sysfs.h"
+#include "xe_display.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
@@ -61,14 +62,20 @@ int xe_pm_suspend(struct xe_device *xe)
 	if (err)
 		return err;
 
+	xe_display_pm_suspend(xe);
+
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_suspend(gt);
-		if (err)
+		if (err) {
+			xe_display_pm_resume(xe);
 			return err;
+		}
 	}
 
 	xe_irq_suspend(xe);
 
+	xe_display_pm_suspend_late(xe);
+
 	return 0;
 }
 
@@ -94,6 +101,8 @@ int xe_pm_resume(struct xe_device *xe)
 			return err;
 	}
 
+	xe_display_pm_resume_early(xe);
+
 	/*
 	 * This only restores pinned memory which is the memory required for the
 	 * GT(s) to resume.
@@ -104,6 +113,8 @@ int xe_pm_resume(struct xe_device *xe)
 
 	xe_irq_resume(xe);
 
+	xe_display_pm_resume(xe);
+
 	for_each_gt(gt, xe, id)
 		xe_gt_resume(gt);
 
-- 
GitLab


From f6761c68c0ace6f4e3df6b03209fab09d472b727 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Fri, 28 Jul 2023 16:13:22 +0200
Subject: [PATCH 2052/2340] drm/xe/display: Improve s2idle handling.

We accidentally always pass true as s2idle argument, instead of
calculating it in the same way as i915.

Suspend modes were removed to achieve compatibility with i915, but
accidentally left in the source code.

While at it, fix all other cases too, s2idle will go into a D1 state and
setting a lower power state should be handled by PCI core.

Maybe my laptop stops draining so much power during suspend now? I can
only hope..

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h | 6 ------
 drivers/gpu/drm/xe/xe_display.c                           | 6 ++++--
 drivers/gpu/drm/xe/xe_pci.c                               | 6 ------
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
index f7f3286e2c534..89da3cc62f39a 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
@@ -5,12 +5,6 @@
 
 #include "intel_wakeref.h"
 
-enum i915_drm_suspend_mode {
-	I915_DRM_SUSPEND_IDLE,
-	I915_DRM_SUSPEND_MEM,
-	I915_DRM_SUSPEND_HIBERNATE,
-};
-
 #define intel_runtime_pm xe_runtime_pm
 
 static inline void disable_rpm_wakeref_asserts(void *rpm)
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
index edfc7fce1ed31..fa20faf3cc83b 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -321,6 +321,7 @@ static void intel_suspend_encoders(struct xe_device *xe)
 
 void xe_display_pm_suspend(struct xe_device *xe)
 {
+	bool s2idle = acpi_target_system_state() < ACPI_STATE_S3;
 	if (!xe->info.enable_display)
 		return;
 
@@ -340,7 +341,7 @@ void xe_display_pm_suspend(struct xe_device *xe)
 
 	intel_suspend_encoders(xe);
 
-	intel_opregion_suspend(xe, PCI_D3cold);
+	intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold);
 
 	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
 
@@ -349,10 +350,11 @@ void xe_display_pm_suspend(struct xe_device *xe)
 
 void xe_display_pm_suspend_late(struct xe_device *xe)
 {
+	bool s2idle = acpi_target_system_state() < ACPI_STATE_S3;
 	if (!xe->info.enable_display)
 		return;
 
-	intel_power_domains_suspend(xe, I915_DRM_SUSPEND_MEM);
+	intel_power_domains_suspend(xe, s2idle);
 
 	intel_display_power_suspend_late(xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 05c6b6df8210b..097b685981912 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -795,10 +795,6 @@ static int xe_pci_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 
-	err = pci_set_power_state(pdev, PCI_D3hot);
-	if (err)
-		return err;
-
 	return 0;
 }
 
@@ -814,8 +810,6 @@ static int xe_pci_resume(struct device *dev)
 	if (err)
 		return err;
 
-	pci_restore_state(pdev);
-
 	err = pci_enable_device(pdev);
 	if (err)
 		return err;
-- 
GitLab


From e5b6e616c63f0d931e1be0d1c17cc80ec0fd3ea3 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 14 Mar 2023 17:49:02 -0700
Subject: [PATCH 2053/2340] drm/xe/display: Silence kernel-doc warnings related
 to display

Add a "private:" comment to the part of the struct that is not expected
to be documented, the one with display-related fields. This silence the
following warnings:

	$ find drivers/gpu/drm/xe -name '*.[ch]' -not -path 'drivers/gpu/drm/xe/display/*'  |  xargs ./scripts/kernel-doc -Werror  -none
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'display' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'pch_type' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'pch_id' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'wm_lv_0_adjust_needed' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'num_channels' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'symmetric_memory' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'type' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'num_qgv_points' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'num_psf_gv_points' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'dram_info' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'runtime_pm' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'sb_lock' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'skl_preferred_vco_freq' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'max_dotclk_freq' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'hti_state' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'snps_phy_failed_calibration' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'modeset_restore_state' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'global_obj_list' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'de_irq_mask' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'display_irqs_enabled' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'enabled_irq_mask' not described in 'xe_device'
	drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'params' not described in 'xe_device'
	22 warnings as Errors

Fixes: 44e694958b95 ("drm/xe/display: Implement display support")
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: https://lore.kernel.org/r/20230315004902.2622613-1-lucas.demarchi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 7607ee373605e..be11cadccbd44 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -408,6 +408,8 @@ struct xe_device {
 	/** @needs_flr_on_fini: requests function-reset on fini */
 	bool needs_flr_on_fini;
 
+	/* private: */
+
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 	/*
 	 * Any fields below this point are the ones used by display.
-- 
GitLab


From f02d48b881e2c0138f570884f8ead14d3f86ba21 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 31 Mar 2023 09:46:27 +0100
Subject: [PATCH 2054/2340] drm/xe/display: ensure clear-color surfaces are cpu
 mappable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The KMD needs to access the clear-color value stored in the buffer via
the CPU. On small-bar systems reject any buffers that are potentially
not CPU accessible.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
[ Split display-related changes from small-bar support ]
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/display/xe_fb_pin.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 67b956a6da8d3..16e04b24daee8 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -217,6 +217,23 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
 		goto err;
 	}
 
+	if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
+	    intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
+	    !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) {
+		struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+		/*
+		 * If we need to able to access the clear-color value stored in
+		 * the buffer, then we require that such buffers are also CPU
+		 * accessible.  This is important on small-bar systems where
+		 * only some subset of VRAM is CPU accessible.
+		 */
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size) {
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
 	/*
 	 * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
 	 * assumptions are incorrect for framebuffers
-- 
GitLab


From 9914e19cc215d339b618ccae993e16ed7aafb54e Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Wed, 30 Aug 2023 09:33:32 +0300
Subject: [PATCH 2055/2340] drm/xe/display: fix error handling flow when device
 probing fails

Upon device probe failure, rolling back the initialization
should be done in reversed order.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Ohad Sharabi <osharabi@habana.ai>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 98d7e7fa12d84..1202f8007f795 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -427,11 +427,11 @@ int xe_device_probe(struct xe_device *xe)
 
 	err = xe_display_init(xe);
 	if (err)
-		goto err_fini_display;
+		goto err_irq_shutdown;
 
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
-		goto err_irq_shutdown;
+		goto err_fini_display;
 
 	xe_display_register(xe);
 
-- 
GitLab


From 04316b4ae6e094569737bababac6f2ef130c0020 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 5 Sep 2023 19:49:42 +0000
Subject: [PATCH 2056/2340] drm/xe/display: Use acpi_target_system_state only
 if ACPI_SLEEP is enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the build error below with CONFIG_ACPI_SLEEP=n:

	drivers/gpu/drm/xe/xe_display.c:334:23: error: implicit declaration of function ‘acpi_target_system_state’; did you mean ‘acpi_get_system_info’? [-Werror=implicit-function-declaration]
	  334 |         bool s2idle = acpi_target_system_state() < ACPI_STATE_S3;

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_display.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
index fa20faf3cc83b..da10f16e1c125 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -319,9 +319,18 @@ static void intel_suspend_encoders(struct xe_device *xe)
 	drm_modeset_unlock_all(dev);
 }
 
+static bool suspend_to_idle(void)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+	if (acpi_target_system_state() < ACPI_STATE_S3)
+		return true;
+#endif
+	return false;
+}
+
 void xe_display_pm_suspend(struct xe_device *xe)
 {
-	bool s2idle = acpi_target_system_state() < ACPI_STATE_S3;
+	bool s2idle = suspend_to_idle();
 	if (!xe->info.enable_display)
 		return;
 
@@ -350,7 +359,7 @@ void xe_display_pm_suspend(struct xe_device *xe)
 
 void xe_display_pm_suspend_late(struct xe_device *xe)
 {
-	bool s2idle = acpi_target_system_state() < ACPI_STATE_S3;
+	bool s2idle = suspend_to_idle();
 	if (!xe->info.enable_display)
 		return;
 
-- 
GitLab


From 9aab7851ff1922930558274fd3983d047d1dfe22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 12 Sep 2023 09:47:07 +0300
Subject: [PATCH 2057/2340] drm/xe/display: Add struct i915_active for Xe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add empty definition for struct i915_active to kill ifdefs from
frontbuffer tracking code.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../drm/xe/compat-i915-headers/i915_active_types.h  | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
index e69de29bb2d1d..8c31f9a8b1687 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_TYPES_H_
+#define _I915_ACTIVE_TYPES_H_
+
+struct i915_active {};
+#define I915_ACTIVE_RETIRE_SLEEPS 0
+
+#endif /* _I915_ACTIVE_TYPES_H_ */
-- 
GitLab


From 1be5ff7f82063dab2e1d86bc21f2deb4cf4908bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 12 Sep 2023 09:47:01 +0300
Subject: [PATCH 2058/2340] drm/xe/display: Add macro to get i915 device from
 xe_bo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add helper macro to kill couple of #ifdefs

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo_types.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index c628625c8a899..051fe990c1338 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -78,4 +78,7 @@ struct xe_bo {
 	bool created;
 };
 
+#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
+#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
+
 #endif
-- 
GitLab


From cd494efdb8433f4a78f9bedb3e67d7505690f141 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 12 Sep 2023 09:47:03 +0300
Subject: [PATCH 2059/2340] drm/xe/display: Add frontbuffer setter/getter for
 xe_bo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe is not carrying frontbuffer pointer in xe_bo. Define it's getter as
NULL.  Setter simply returns pointer which was provided as a
parameter.

v3: Do not take any references
v2: Handle xe_bo_put as well

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gem/i915_gem_object_frontbuffer.h                | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
new file mode 100644
index 0000000000000..2a3f12d2978c5
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_
+#define _I915_GEM_OBJECT_FRONTBUFFER_H_
+
+#define i915_gem_object_get_frontbuffer(obj)		NULL
+#define i915_gem_object_set_frontbuffer(obj, front)	(front)
+
+#endif
-- 
GitLab


From 0071f1713dab8656e6c939d7be980f2ad3e8d312 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 12 Sep 2023 09:47:05 +0300
Subject: [PATCH 2060/2340] drm/xe/display: Add i915_active.h compatibility
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add empty definitions for i915_active_init/fini to kill ifdefs from
frontbuffer tracking code.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../drm/xe/compat-i915-headers/i915_active.h  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_active.h

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h
new file mode 100644
index 0000000000000..6f0ab3753563b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include "i915_active_types.h"
+
+static inline void i915_active_init(struct i915_active *ref,
+				    int (*active)(struct i915_active *ref),
+				    void (*retire)(struct i915_active *ref),
+				    unsigned long flags)
+{
+	(void) active;
+	(void) retire;
+}
+
+#define i915_active_fini(active) do { } while (0)
+
+#endif
-- 
GitLab


From fb764a35c7f45a378ae064016c321d61532113b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Mon, 2 Oct 2023 13:23:56 +0300
Subject: [PATCH 2061/2340] drm/xe/display: Add empty def for
 i915_gem_object_flush_if_display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't need i915_gem_object_flush_if_display on Xe side. Add empty
define to tackle compilation errors with display code where it's used.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6283e27bc4251..9d3b704a10303 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -325,6 +325,8 @@ static inline unsigned int xe_sg_segment_size(struct device *dev)
 	return round_down(max / 2, PAGE_SIZE);
 }
 
+#define i915_gem_object_flush_if_display(obj)		((void)(obj))
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 /**
  * xe_bo_is_mem_type - Whether the bo currently resides in the given
-- 
GitLab


From c3744ceb99e54e41f9f4a7a8938f2e12e0be23f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 12 Sep 2023 09:47:09 +0300
Subject: [PATCH 2062/2340] drm/xe/display: Add empty define for
 i915_ggtt_clear_scanout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add empty define for i915_ggtt_clear_scanout to avoid build failure.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
index e4bbdffcd5f5a..88771f5e03cc3 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -22,6 +22,7 @@ struct i915_vma {
 	struct drm_mm_node node;
 };
 
+#define i915_ggtt_clear_scanout(bo) do { } while (0)
 
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
-- 
GitLab


From c5a2eadd729ba3538f77ea2e055ca1f2efe82092 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 13 Sep 2023 12:54:10 +0300
Subject: [PATCH 2063/2340] drm/xe/display: Xe stolen memory handling for fbc
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Xe stolen memory handling for fbc.

v3:
  - v2: Add parenthesis around parameter in i915_gem_stolen_node_allocated
v2:
  - define i915_gem_stolen_area_address/size as !WARN_ON(1)
  - squash common type addition into this patch

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/xe/compat-i915-headers/i915_drv.h |  1 +
 .../xe/compat-i915-headers/i915_gem_stolen.h  | 79 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index c3aa5936667a3..738573ae5f109 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -19,6 +19,7 @@
 #include "xe_bo.h"
 #include "xe_pm.h"
 #include "xe_step.h"
+#include "i915_gem_stolen.h"
 #include "i915_gpu_error.h"
 #include "i915_reg_defs.h"
 #include "i915_utils.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
new file mode 100644
index 0000000000000..888e7a87a9257
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -0,0 +1,79 @@
+#ifndef _I915_GEM_STOLEN_H_
+#define _I915_GEM_STOLEN_H_
+
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_res_cursor.h"
+
+struct xe_bo;
+
+struct i915_stolen_fb {
+	struct xe_bo *bo;
+};
+
+static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
+						       struct i915_stolen_fb *fb,
+						       u32 size, u32 align,
+						       u32 start, u32 end)
+{
+	struct xe_bo *bo;
+	int err;
+	u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
+
+	bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
+				       NULL, size, start, end,
+				       ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		bo = NULL;
+		return err;
+	}
+	err = xe_bo_pin(bo);
+	xe_bo_unlock_vm_held(bo);
+
+	if (err) {
+		xe_bo_put(fb->bo);
+		bo = NULL;
+	}
+
+	fb->bo = bo;
+
+	return err;
+}
+
+static inline int i915_gem_stolen_insert_node(struct xe_device *xe,
+					      struct i915_stolen_fb *fb,
+					      u32 size, u32 align)
+{
+	/* Not used on xe */
+	BUG_ON(1);
+	return -ENODEV;
+}
+
+static inline void i915_gem_stolen_remove_node(struct xe_device *xe,
+					       struct i915_stolen_fb *fb)
+{
+	xe_bo_unpin_map_no_vm(fb->bo);
+	fb->bo = NULL;
+}
+
+#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN))
+#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo))
+
+static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb)
+{
+	struct xe_res_cursor res;
+
+	xe_res_first(fb->bo->ttm.resource, 0, 4096, &res);
+	return res.start;
+}
+
+/* Used for < gen4. These are not supported by Xe */
+#define i915_gem_stolen_area_address(xe) (!WARN_ON(1))
+/* Used for gen9 specific WA. Gen9 is not supported by Xe */
+#define i915_gem_stolen_area_size(xe) (!WARN_ON(1))
+
+#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \
+					 i915_gem_stolen_node_offset(fb))
+#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size))
+
+#endif
-- 
GitLab


From c890be73933a3c124ffa08411d8d279aeede4384 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 13 Sep 2023 12:54:11 +0300
Subject: [PATCH 2064/2340] drm/xe/display: Add i915_gem.h compatibility header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add i915_gem.h compatibility header and include it in i915_drv.h. Add
empty GEM_BUG_ON definition for fbc code.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 +
 drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h | 9 +++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index 738573ae5f109..e5d1a4a3d8b42 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -19,6 +19,7 @@
 #include "xe_bo.h"
 #include "xe_pm.h"
 #include "xe_step.h"
+#include "i915_gem.h"
 #include "i915_gem_stolen.h"
 #include "i915_gpu_error.h"
 #include "i915_reg_defs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
new file mode 100644
index 0000000000000..06b723a479c51
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_GEM_H__
+#define __I915_GEM_H__
+#define GEM_BUG_ON
+#endif
-- 
GitLab


From 08ea5ea2e890e8fbc9875294e6087179574a3057 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 13 Sep 2023 12:54:12 +0300
Subject: [PATCH 2065/2340] drm/xe/display: Add Xe implementation for fence
 checks used by fbc code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe doesn't support legacy fences. Implement legacy fence and fence
id checks accordingly.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h     |  1 +
 drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h     |  2 ++
 .../gpu/drm/xe/compat-i915-headers/intel_gt_types.h   | 11 +++++++++++
 3 files changed, 14 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index e5d1a4a3d8b42..5d2a77b52db41 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -24,6 +24,7 @@
 #include "i915_gpu_error.h"
 #include "i915_reg_defs.h"
 #include "i915_utils.h"
+#include "intel_gt_types.h"
 #include "intel_step.h"
 #include "intel_uc_fw.h"
 #include "intel_uncore.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
index 88771f5e03cc3..a20d2638ea7a6 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -24,6 +24,8 @@ struct i915_vma {
 
 #define i915_ggtt_clear_scanout(bo) do { } while (0)
 
+#define i915_vma_fence_id(vma) -1
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	return vma->node.start;
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h
new file mode 100644
index 0000000000000..c15806d6c4f78
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_TYPES__
+#define __INTEL_GT_TYPES__
+
+#define intel_gt_support_legacy_fencing(gt) 0
+
+#endif
-- 
GitLab


From c79802d100d1dd8b1748ea7dc232f5e059bdc7c5 Mon Sep 17 00:00:00 2001
From: Uma Shankar <uma.shankar@intel.com>
Date: Fri, 6 Oct 2023 17:26:45 +0530
Subject: [PATCH 2066/2340] drm/xe/display: Create a dummy version for vga
 decode

This introduces an exclusive version of vga decode for xe.
Rest of the display changes will be re-used from i915.

Currently it adds just a dummy implementation. VGA decode
needs to be handled correctly in i915, proper implementation
will be adopted once the i915 changes are finalized and merged
in upstream.

v2: Addressed Arun's review comments

Signed-off-by: Uma Shankar <uma.shankar@intel.com>
Reviewed-by: Arun R Murthy <arun.r.mruthy@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                  |  1 +
 drivers/gpu/drm/xe/display/xe_display_misc.c | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/display/xe_display_misc.c

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 2777cbf07cc6e..41d92014a45c6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -152,6 +152,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	display/xe_hdcp_gsc.o \
 	display/xe_plane_initial.o \
 	display/xe_display_rps.o \
+	display/xe_display_misc.o \
 	display/intel_fbdev_fb.o \
 	display/intel_fb_bo.o \
 	display/ext/i915_irq.o \
diff --git a/drivers/gpu/drm/xe/display/xe_display_misc.c b/drivers/gpu/drm/xe/display/xe_display_misc.c
new file mode 100644
index 0000000000000..242c2ef4ca937
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_misc.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_types.h"
+
+struct pci_dev;
+
+unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode);
+
+unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode)
+{
+	/* ToDo: Implement the actual handling of vga decode */
+	return 0;
+}
-- 
GitLab


From 95ab70f134d837a566f2d998b3090f40227a1b60 Mon Sep 17 00:00:00 2001
From: Suraj Kandpal <suraj.kandpal@intel.com>
Date: Mon, 16 Oct 2023 14:31:41 +0530
Subject: [PATCH 2067/2340] drm/xe/hdcp: Define intel_hdcp_gsc_check_status in
 Xe

Define intel_hdcp_gsc_check_status in Xe to account
for changes in i915 and Xe.
intel_hdcp_check_status always returns false as gsc cs
interface is not yet ported.
intel_hdcp_gsc_cs_required always returns true as going
forward gsc cs will always be required by upcoming
platforms

--v5
-Define intel_hdcp_gsc_cs_required()

--v6
-Explain reasons for the return values [Chaitanya]

Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 0453293af8efe..0f11a39333e21 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -6,6 +6,16 @@
 #include "i915_drv.h"
 #include "intel_hdcp_gsc.h"
 
+bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915)
+{
+	return true;
+}
+
+bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915)
+{
+	return false;
+}
+
 int intel_hdcp_gsc_init(struct drm_i915_private *i915)
 {
 	drm_info(&i915->drm, "HDCP support not yet implemented\n");
-- 
GitLab


From ff180adfb923b2619f6a46c5a369d833b543a9f1 Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Thu, 12 Oct 2023 16:52:08 +0300
Subject: [PATCH 2068/2340] drm/xe/display: Don't try to use vram if not
 available

Trying to get bo from vram when vram not available will cause
WARN_ON() hence avoid touching vram if not available.

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/display/xe_fb_pin.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 16e04b24daee8..9fc2147a2f106 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -62,11 +62,12 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
 		dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8,
 				 XE_PAGE_SIZE);
 
-	dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
-				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM0_BIT |
-				  XE_BO_CREATE_GGTT_BIT);
-	if (IS_ERR(dpt))
+	if (IS_DGFX(xe))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_VRAM0_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	else
 		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
 					   ttm_bo_type_kernel,
 					   XE_BO_CREATE_STOLEN_BIT |
-- 
GitLab


From 216d62bb241a73b43dc89f67cdb60304f032956c Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Thu, 12 Oct 2023 16:52:09 +0300
Subject: [PATCH 2069/2340] drm/xe/display: Add writing of remapped dpt

Xe need to use remapped display page table for tiled framebuffers
on anywhere else than DG2. Here add function to write such dpt and
enable usage of remapped display page tables where needed.

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/display/xe_fb_pin.c | 52 +++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 9fc2147a2f106..722c84a566073 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -45,6 +45,37 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
 	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
 }
 
+static void
+write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
+		   u32 bo_ofs, u32 width, u32 height, u32 src_stride,
+		   u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index)
+		= ggtt->pt_ops->pte_encode_bo;
+	u32 column, row;
+
+	for (row = 0; row < height; row++) {
+		u32 src_idx = src_stride * row + bo_ofs;
+
+		for (column = 0; column < width; column++) {
+			iosys_map_wr(map, *dpt_ofs, u64,
+				     pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+				     xe->pat.idx[XE_CACHE_WB]));
+
+			*dpt_ofs += 8;
+			src_idx++;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*dpt_ofs += (dst_stride - width) * 8;
+	}
+
+	/* Align to next page */
+	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
+}
+
 static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
 			       const struct i915_gtt_view *view,
 			       struct i915_vma *vma)
@@ -57,6 +88,9 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
 
 	if (view->type == I915_GTT_VIEW_NORMAL)
 		dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE);
+	else if (view->type == I915_GTT_VIEW_REMAPPED)
+		dpt_size = ALIGN(intel_remapped_info_size(&fb->remapped_view.gtt.remapped) * 8,
+				 XE_PAGE_SIZE);
 	else
 		/* display uses 4K tiles instead of bytes here, convert to entries.. */
 		dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8,
@@ -89,6 +123,18 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
 
 			iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
 		}
+	} else if (view->type == I915_GTT_VIEW_REMAPPED) {
+		const struct intel_remapped_info *remap_info = &view->remapped;
+		u32 i, dpt_ofs = 0;
+
+		for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++)
+			write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs,
+					   remap_info->plane[i].offset,
+					   remap_info->plane[i].width,
+					   remap_info->plane[i].height,
+					   remap_info->plane[i].src_stride,
+					   remap_info->plane[i].dst_stride);
+
 	} else {
 		const struct intel_rotation_info *rot_info = &view->rotated;
 		u32 i, dpt_ofs = 0;
@@ -212,12 +258,6 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
 	if (!vma)
 		return ERR_PTR(-ENODEV);
 
-	/* Remapped view is only required on ADL-P, which xe doesn't support. */
-	if (XE_WARN_ON(view->type == I915_GTT_VIEW_REMAPPED)) {
-		ret = -ENODEV;
-		goto err;
-	}
-
 	if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
 	    intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
 	    !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) {
-- 
GitLab


From 3d78923bd07ad99a33b06eaa69194b35ac1637f1 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 15 Nov 2023 15:15:23 +0100
Subject: [PATCH 2070/2340] drm/xe/guc: Promote guc_to_gt/xe helpers to .h

Duplicating these helpers in almost every .c file is a bad idea.
Define them as inlines in .h file to allow proper reuse.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |  6 ------
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  6 ------
 drivers/gpu/drm/xe/xe_guc.c                 | 12 ------------
 drivers/gpu/drm/xe/xe_guc.h                 | 11 +++++++++++
 drivers/gpu/drm/xe/xe_guc_debugfs.c         | 12 ------------
 drivers/gpu/drm/xe/xe_guc_hwconfig.c        | 12 ------------
 drivers/gpu/drm/xe/xe_guc_submit.c          | 12 ------------
 7 files changed, 11 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 9fcbf8773b8bb..1e083dda06798 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -59,12 +59,6 @@ struct acc {
 	u8 engine_instance;
 };
 
-static struct xe_gt *
-guc_to_gt(struct xe_guc *guc)
-{
-	return container_of(guc, struct xe_gt, uc.guc);
-}
-
 static bool access_is_atomic(enum access_type access_type)
 {
 	return access_type == ACCESS_TYPE_ATOMIC;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index b5c39c55e1fa0..a28f31c05b1b0 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -13,12 +13,6 @@
 
 #define TLB_TIMEOUT	(HZ / 4)
 
-static struct xe_gt *
-guc_to_gt(struct xe_guc *guc)
-{
-	return container_of(guc, struct xe_gt, uc.guc);
-}
-
 static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 {
 	struct xe_gt *gt = container_of(work, struct xe_gt,
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 84f0b5488783b..311c5d539423f 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -27,18 +27,6 @@
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
-static struct xe_gt *
-guc_to_gt(struct xe_guc *guc)
-{
-	return container_of(guc, struct xe_gt, uc.guc);
-}
-
-static struct xe_device *
-guc_to_xe(struct xe_guc *guc)
-{
-	return gt_to_xe(guc_to_gt(guc));
-}
-
 /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
 #define GUC_GGTT_TOP    0xFEE00000
 static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 3addd8fc674af..d3e49e7fd7c34 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -6,6 +6,7 @@
 #ifndef _XE_GUC_H_
 #define _XE_GUC_H_
 
+#include "xe_gt.h"
 #include "xe_guc_types.h"
 #include "xe_hw_engine_types.h"
 #include "xe_macros.h"
@@ -58,4 +59,14 @@ static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
 	}
 }
 
+static inline struct xe_gt *guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index 167eb5593e03c..ffd7d53bcc42b 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -15,18 +15,6 @@
 #include "xe_guc_log.h"
 #include "xe_macros.h"
 
-static struct xe_gt *
-guc_to_gt(struct xe_guc *guc)
-{
-	return container_of(guc, struct xe_gt, uc.guc);
-}
-
-static struct xe_device *
-guc_to_xe(struct xe_guc *guc)
-{
-	return gt_to_xe(guc_to_gt(guc));
-}
-
 static struct xe_guc *node_to_guc(struct drm_info_node *node)
 {
 	return node->info_ent->data;
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 76aed9c348abc..57d325ec8ce32 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -13,18 +13,6 @@
 #include "xe_guc.h"
 #include "xe_map.h"
 
-static struct xe_gt *
-guc_to_gt(struct xe_guc *guc)
-{
-	return container_of(guc, struct xe_gt, uc.guc);
-}
-
-static struct xe_device *
-guc_to_xe(struct xe_guc *guc)
-{
-	return gt_to_xe(guc_to_gt(guc));
-}
-
 static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
 {
 	u32 action[] = {
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 8d5af11fb80d3..b13c925c56ee9 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -36,18 +36,6 @@
 #include "xe_trace.h"
 #include "xe_vm.h"
 
-static struct xe_gt *
-guc_to_gt(struct xe_guc *guc)
-{
-	return container_of(guc, struct xe_gt, uc.guc);
-}
-
-static struct xe_device *
-guc_to_xe(struct xe_guc *guc)
-{
-	return gt_to_xe(guc_to_gt(guc));
-}
-
 static struct xe_guc *
 exec_queue_to_guc(struct xe_exec_queue *q)
 {
-- 
GitLab


From de84aa96e4427125d00af1706b59584b2cbb0085 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 10 Nov 2023 15:41:50 +0000
Subject: [PATCH 2071/2340] drm/xe/uapi: Remove useless
 XE_QUERY_CONFIG_NUM_PARAM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

num_params can be used to retrieve the size of the info array
for the specific version of the kernel being used.

v2: Also remove XE_QUERY_CONFIG_NUM_PARAM (José Roberto de Souza)

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 +-
 include/uapi/drm/xe_drm.h     | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 10b9878ec95a3..58fb06a63db2c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -305,7 +305,7 @@ static int query_memory_usage(struct xe_device *xe,
 
 static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 {
-	u32 num_params = XE_QUERY_CONFIG_NUM_PARAM;
+	const u32 num_params = XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1;
 	size_t size =
 		sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
 	struct drm_xe_query_config __user *query_ptr =
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 9bd7092a7ea42..b9a68f8b69f3e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -329,7 +329,6 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_GT_COUNT		4
 #define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
 #define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	6
-#define XE_QUERY_CONFIG_NUM_PARAM		(XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1)
 	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
-- 
GitLab


From 1a912c90a278177423128e5b82673575821d0c35 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 10 Nov 2023 15:41:51 +0000
Subject: [PATCH 2072/2340] drm/xe/uapi: Remove GT_TYPE_REMOTE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the split between tile and gt, this is currently unused.
Also it is bringing confusion because main vs remote would be
more a concept of the tile itself and not about GT.

So, the MAIN one is the traditional GT used for every operation
in older platforms, and for render/graphics and compute on platforms
that contains the stand-alone Media GT.

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 --
 include/uapi/drm/xe_drm.h     | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 58fb06a63db2c..cb3461971dc90 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -372,8 +372,6 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 	for_each_gt(gt, xe, id) {
 		if (xe_gt_is_media_type(gt))
 			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MEDIA;
-		else if (gt_to_tile(gt)->id > 0)
-			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_REMOTE;
 		else
 			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MAIN;
 		gt_list->gt_list[id].gt_id = gt->info.id;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b9a68f8b69f3e..8154cecf6f0d7 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -343,9 +343,8 @@ struct drm_xe_query_config {
  */
 struct drm_xe_query_gt {
 #define XE_QUERY_GT_TYPE_MAIN		0
-#define XE_QUERY_GT_TYPE_REMOTE		1
-#define XE_QUERY_GT_TYPE_MEDIA		2
-	/** @type: GT type: Main, Remote, or Media */
+#define XE_QUERY_GT_TYPE_MEDIA		1
+	/** @type: GT type: Main or Media */
 	__u16 type;
 	/** @gt_id: Unique ID of this GT within the PCI Device */
 	__u16 gt_id;
-- 
GitLab


From ddfa2d6a846a571edb4dc6ed29d94b38558ae088 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 10 Nov 2023 15:41:52 +0000
Subject: [PATCH 2073/2340] drm/xe/uapi: Kill VM_MADVISE IOCTL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove unused IOCTL.
Without any userspace using it we need to remove before we
can be accepted upstream.

At this point we are breaking the compatibility for good,
so we don't need to break when we are in-tree. So, let's
also use this breakage to sort out the IOCTL entries and
fix all the small indentation and line issues.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/Makefile        |   1 -
 drivers/gpu/drm/xe/xe_bo.c         |   2 +-
 drivers/gpu/drm/xe/xe_bo_types.h   |   3 +
 drivers/gpu/drm/xe/xe_device.c     |   8 +-
 drivers/gpu/drm/xe/xe_vm_madvise.c | 299 -----------------------------
 drivers/gpu/drm/xe/xe_vm_madvise.h |  15 --
 include/uapi/drm/xe_drm.h          |  92 ++-------
 7 files changed, 18 insertions(+), 402 deletions(-)
 delete mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c
 delete mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 41d92014a45c6..a29b92080c850 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -115,7 +115,6 @@ xe-y += xe_bb.o \
 	xe_uc_debugfs.o \
 	xe_uc_fw.o \
 	xe_vm.o \
-	xe_vm_madvise.o \
 	xe_wait_user_fence.o \
 	xe_wa.o \
 	xe_wopcm.o
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index c23a5694a788e..5b5f764586fee 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1239,7 +1239,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
-	bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL;
+	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
 	INIT_LIST_HEAD(&bo->pinned_link);
 #ifdef CONFIG_PROC_FS
 	INIT_LIST_HEAD(&bo->client_link);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 051fe990c1338..4bff609961689 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -19,6 +19,9 @@ struct xe_vm;
 
 #define XE_BO_MAX_PLACEMENTS	3
 
+/* TODO: To be selected with VM_MADVISE */
+#define	XE_BO_PRIORITY_NORMAL	1
+
 /** @xe_bo: XE buffer object */
 struct xe_bo {
 	/** @ttm: TTM base buffer object */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 1202f8007f795..8be765adf702d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -36,7 +36,6 @@
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_sys_mgr.h"
 #include "xe_vm.h"
-#include "xe_vm_madvise.h"
 #include "xe_wait_user_fence.h"
 #include "xe_hwmon.h"
 
@@ -117,18 +116,17 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
-			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
 			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
 };
 
 static const struct file_operations xe_driver_fops = {
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
deleted file mode 100644
index 0ef7d483d0501..0000000000000
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ /dev/null
@@ -1,299 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "xe_vm_madvise.h"
-
-#include <linux/nospec.h>
-
-#include <drm/ttm/ttm_tt.h>
-#include <drm/xe_drm.h>
-
-#include "xe_bo.h"
-#include "xe_vm.h"
-
-static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
-				       struct xe_vma **vmas, int num_vmas,
-				       u64 value)
-{
-	int i, err;
-
-	if (XE_IOCTL_DBG(xe, value > XE_MEM_REGION_CLASS_VRAM))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, value == XE_MEM_REGION_CLASS_VRAM &&
-			 !xe->info.is_dgfx))
-		return -EINVAL;
-
-	for (i = 0; i < num_vmas; ++i) {
-		struct xe_bo *bo;
-
-		bo = xe_vma_bo(vmas[i]);
-
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return err;
-		bo->props.preferred_mem_class = value;
-		xe_bo_placement_for_flags(xe, bo, bo->flags);
-		xe_bo_unlock(bo);
-	}
-
-	return 0;
-}
-
-static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
-				struct xe_vma **vmas, int num_vmas, u64 value)
-{
-	int i, err;
-
-	if (XE_IOCTL_DBG(xe, value > xe->info.tile_count))
-		return -EINVAL;
-
-	for (i = 0; i < num_vmas; ++i) {
-		struct xe_bo *bo;
-
-		bo = xe_vma_bo(vmas[i]);
-
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return err;
-		bo->props.preferred_gt = value;
-		xe_bo_placement_for_flags(xe, bo, bo->flags);
-		xe_bo_unlock(bo);
-	}
-
-	return 0;
-}
-
-static int madvise_preferred_mem_class_gt(struct xe_device *xe,
-					  struct xe_vm *vm,
-					  struct xe_vma **vmas, int num_vmas,
-					  u64 value)
-{
-	int i, err;
-	u32 gt_id = upper_32_bits(value);
-	u32 mem_class = lower_32_bits(value);
-
-	if (XE_IOCTL_DBG(xe, mem_class > XE_MEM_REGION_CLASS_VRAM))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, mem_class == XE_MEM_REGION_CLASS_VRAM &&
-			 !xe->info.is_dgfx))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, gt_id > xe->info.tile_count))
-		return -EINVAL;
-
-	for (i = 0; i < num_vmas; ++i) {
-		struct xe_bo *bo;
-
-		bo = xe_vma_bo(vmas[i]);
-
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return err;
-		bo->props.preferred_mem_class = mem_class;
-		bo->props.preferred_gt = gt_id;
-		xe_bo_placement_for_flags(xe, bo, bo->flags);
-		xe_bo_unlock(bo);
-	}
-
-	return 0;
-}
-
-static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
-			      struct xe_vma **vmas, int num_vmas, u64 value)
-{
-	int i, err;
-
-	for (i = 0; i < num_vmas; ++i) {
-		struct xe_bo *bo;
-
-		bo = xe_vma_bo(vmas[i]);
-		if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
-			return -EINVAL;
-
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return err;
-		bo->props.cpu_atomic = !!value;
-
-		/*
-		 * All future CPU accesses must be from system memory only, we
-		 * just invalidate the CPU page tables which will trigger a
-		 * migration on next access.
-		 */
-		if (bo->props.cpu_atomic)
-			ttm_bo_unmap_virtual(&bo->ttm);
-		xe_bo_unlock(bo);
-	}
-
-	return 0;
-}
-
-static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
-				 struct xe_vma **vmas, int num_vmas, u64 value)
-{
-	int i, err;
-
-	for (i = 0; i < num_vmas; ++i) {
-		struct xe_bo *bo;
-
-		bo = xe_vma_bo(vmas[i]);
-		if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
-				 !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
-			return -EINVAL;
-
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return err;
-		bo->props.device_atomic = !!value;
-		xe_bo_unlock(bo);
-	}
-
-	return 0;
-}
-
-static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
-			    struct xe_vma **vmas, int num_vmas, u64 value)
-{
-	int i, err;
-
-	if (XE_IOCTL_DBG(xe, value > DRM_XE_VMA_PRIORITY_HIGH))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, value == DRM_XE_VMA_PRIORITY_HIGH &&
-			 !capable(CAP_SYS_NICE)))
-		return -EPERM;
-
-	for (i = 0; i < num_vmas; ++i) {
-		struct xe_bo *bo;
-
-		bo = xe_vma_bo(vmas[i]);
-
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return err;
-		bo->ttm.priority = value;
-		ttm_bo_move_to_lru_tail(&bo->ttm);
-		xe_bo_unlock(bo);
-	}
-
-	return 0;
-}
-
-static int madvise_pin(struct xe_device *xe, struct xe_vm *vm,
-		       struct xe_vma **vmas, int num_vmas, u64 value)
-{
-	drm_warn(&xe->drm, "NIY");
-	return 0;
-}
-
-typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
-			    struct xe_vma **vmas, int num_vmas, u64 value);
-
-static const madvise_func madvise_funcs[] = {
-	[DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class,
-	[DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt,
-	[DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] =
-		madvise_preferred_mem_class_gt,
-	[DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic,
-	[DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic,
-	[DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority,
-	[DRM_XE_VM_MADVISE_PIN] = madvise_pin,
-};
-
-static struct xe_vma **
-get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
-{
-	struct xe_vma **vmas, **__vmas;
-	struct drm_gpuva *gpuva;
-	int max_vmas = 8;
-
-	lockdep_assert_held(&vm->lock);
-
-	vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL);
-	if (!vmas)
-		return NULL;
-
-	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
-		struct xe_vma *vma = gpuva_to_vma(gpuva);
-
-		if (xe_vma_is_userptr(vma))
-			continue;
-
-		if (*num_vmas == max_vmas) {
-			max_vmas <<= 1;
-			__vmas = krealloc(vmas, max_vmas * sizeof(*vmas),
-					  GFP_KERNEL);
-			if (!__vmas)
-				return NULL;
-			vmas = __vmas;
-		}
-
-		vmas[*num_vmas] = vma;
-		*num_vmas += 1;
-	}
-
-	return vmas;
-}
-
-int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file)
-{
-	struct xe_device *xe = to_xe_device(dev);
-	struct xe_file *xef = to_xe_file(file);
-	struct drm_xe_vm_madvise *args = data;
-	struct xe_vm *vm;
-	struct xe_vma **vmas = NULL;
-	int num_vmas = 0, err = 0, idx;
-
-	if (XE_IOCTL_DBG(xe, args->extensions) ||
-	    XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
-	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, args->property > ARRAY_SIZE(madvise_funcs)))
-		return -EINVAL;
-
-	vm = xe_vm_lookup(xef, args->vm_id);
-	if (XE_IOCTL_DBG(xe, !vm))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, !xe_vm_in_fault_mode(vm))) {
-		err = -EINVAL;
-		goto put_vm;
-	}
-
-	down_read(&vm->lock);
-
-	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
-		err = -ENOENT;
-		goto unlock_vm;
-	}
-
-	vmas = get_vmas(vm, &num_vmas, args->addr, args->range);
-	if (XE_IOCTL_DBG(xe, err))
-		goto unlock_vm;
-
-	if (XE_IOCTL_DBG(xe, !vmas)) {
-		err = -ENOMEM;
-		goto unlock_vm;
-	}
-
-	if (XE_IOCTL_DBG(xe, !num_vmas)) {
-		err = -EINVAL;
-		goto unlock_vm;
-	}
-
-	idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs));
-	err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value);
-
-unlock_vm:
-	up_read(&vm->lock);
-put_vm:
-	xe_vm_put(vm);
-	kfree(vmas);
-	return err;
-}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
deleted file mode 100644
index eecd33acd2481..0000000000000
--- a/drivers/gpu/drm/xe/xe_vm_madvise.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#ifndef _XE_VM_MADVISE_H_
-#define _XE_VM_MADVISE_H_
-
-struct drm_device;
-struct drm_file;
-
-int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file);
-
-#endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 8154cecf6f0d7..808d92262bcd0 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -103,28 +103,26 @@ struct xe_user_extension {
 #define DRM_XE_VM_CREATE		0x03
 #define DRM_XE_VM_DESTROY		0x04
 #define DRM_XE_VM_BIND			0x05
-#define DRM_XE_EXEC_QUEUE_CREATE		0x06
-#define DRM_XE_EXEC_QUEUE_DESTROY		0x07
-#define DRM_XE_EXEC			0x08
+#define DRM_XE_EXEC			0x06
+#define DRM_XE_EXEC_QUEUE_CREATE	0x07
+#define DRM_XE_EXEC_QUEUE_DESTROY	0x08
 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x09
-#define DRM_XE_WAIT_USER_FENCE		0x0a
-#define DRM_XE_VM_MADVISE		0x0b
-#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x0c
-
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x0a
+#define DRM_XE_WAIT_USER_FENCE		0x0b
 /* Must be kept compact -- no holes */
+
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
 #define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
 #define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
 #define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
-#define DRM_IOCTL_XE_VM_DESTROY			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
-#define DRM_IOCTL_XE_VM_BIND			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
+#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
+#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY	DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
-#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
-#define DRM_IOCTL_XE_EXEC			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
-#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY	 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
-#define DRM_IOCTL_XE_VM_MADVISE			 DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
 
 /** struct drm_xe_engine_class_instance - instance of an engine class */
 struct drm_xe_engine_class_instance {
@@ -978,74 +976,6 @@ struct drm_xe_wait_user_fence {
 	__u64 reserved[2];
 };
 
-struct drm_xe_vm_madvise {
-	/** @extensions: Pointer to the first extension struct, if any */
-	__u64 extensions;
-
-	/** @vm_id: The ID VM in which the VMA exists */
-	__u32 vm_id;
-
-	/** @pad: MBZ */
-	__u32 pad;
-
-	/** @range: Number of bytes in the VMA */
-	__u64 range;
-
-	/** @addr: Address of the VMA to operation on */
-	__u64 addr;
-
-	/*
-	 * Setting the preferred location will trigger a migrate of the VMA
-	 * backing store to new location if the backing store is already
-	 * allocated.
-	 *
-	 * For DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS usage, see enum
-	 * drm_xe_memory_class.
-	 */
-#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS	0
-#define DRM_XE_VM_MADVISE_PREFERRED_GT		1
-	/*
-	 * In this case lower 32 bits are mem class, upper 32 are GT.
-	 * Combination provides a single IOCTL plus migrate VMA to preferred
-	 * location.
-	 */
-#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT	2
-	/*
-	 * The CPU will do atomic memory operations to this VMA. Must be set on
-	 * some devices for atomics to behave correctly.
-	 */
-#define DRM_XE_VM_MADVISE_CPU_ATOMIC		3
-	/*
-	 * The device will do atomic memory operations to this VMA. Must be set
-	 * on some devices for atomics to behave correctly.
-	 */
-#define DRM_XE_VM_MADVISE_DEVICE_ATOMIC		4
-	/*
-	 * Priority WRT to eviction (moving from preferred memory location due
-	 * to memory pressure). The lower the priority, the more likely to be
-	 * evicted.
-	 */
-#define DRM_XE_VM_MADVISE_PRIORITY		5
-#define		DRM_XE_VMA_PRIORITY_LOW		0
-		/* Default */
-#define		DRM_XE_VMA_PRIORITY_NORMAL	1
-		/* Must be user with elevated privileges */
-#define		DRM_XE_VMA_PRIORITY_HIGH	2
-	/* Pin the VMA in memory, must be user with elevated privileges */
-#define DRM_XE_VM_MADVISE_PIN			6
-	/** @property: property to set */
-	__u32 property;
-
-	/** @pad2: MBZ */
-	__u32 pad2;
-
-	/** @value: property value */
-	__u64 value;
-
-	/** @reserved: Reserved */
-	__u64 reserved[2];
-};
-
 /**
  * DOC: XE PMU event config IDs
  *
-- 
GitLab


From 34f0cf6dc4c79a915c7e1022f232f592bfa6c078 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 10 Nov 2023 15:41:53 +0000
Subject: [PATCH 2074/2340] drm/xe/uapi: Remove unused inaccessible memory
 region
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is not used and also the negative of the other 2 regions:
native_mem_regions and slow_mem_regions.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 808d92262bcd0..0f8c5afd35842 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -360,11 +360,6 @@ struct drm_xe_query_gt {
 	 * they live on a different GPU/Tile.
 	 */
 	__u64 slow_mem_regions;
-	/**
-	 * @inaccessible_mem_regions: Bit mask of instances from
-	 * drm_xe_query_mem_usage that is not accessible by this GT at all.
-	 */
-	__u64 inaccessible_mem_regions;
 	/** @reserved: Reserved */
 	__u64 reserved[8];
 };
-- 
GitLab


From 4195e5e5e3d544a90a1edac1e21cd53a5117bd1f Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 10 Nov 2023 15:41:54 +0000
Subject: [PATCH 2075/2340] drm/xe/uapi: Remove unused
 QUERY_CONFIG_MEM_REGION_COUNT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As part of uAPI cleanup, remove this constant which is not used. Memory
regions can be queried with DRM_XE_DEVICE_QUERY_MEM_USAGE.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 --
 include/uapi/drm/xe_drm.h     | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index cb3461971dc90..b5cd980f81f94 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -333,8 +333,6 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
 	config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
 	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count;
-	config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
-		hweight_long(xe->info.mem_region_mask);
 	config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
 		xe_exec_queue_device_get_max_priority(xe);
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 0f8c5afd35842..1ac9ae0591dea 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -311,6 +311,7 @@ struct drm_xe_query_mem_usage {
  * If a query is made with a struct drm_xe_device_query where .query
  * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses
  * struct drm_xe_query_config in .data.
+ *
  */
 struct drm_xe_query_config {
 	/** @num_params: number of parameters returned in info */
@@ -325,8 +326,7 @@ struct drm_xe_query_config {
 #define XE_QUERY_CONFIG_MIN_ALIGNMENT		2
 #define XE_QUERY_CONFIG_VA_BITS			3
 #define XE_QUERY_CONFIG_GT_COUNT		4
-#define XE_QUERY_CONFIG_MEM_REGION_COUNT	5
-#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	6
+#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	5
 	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
-- 
GitLab


From 60f3c7fc5c2464f73a7d64a4cc2dd4707a0d1831 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 10 Nov 2023 15:41:55 +0000
Subject: [PATCH 2076/2340] drm/xe/uapi: Remove unused QUERY_CONFIG_GT_COUNT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As part of uAPI cleanup, remove this constant which is not used. Number
of GTs are provided as num_gt in drm_xe_query_gt_list.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 1 -
 include/uapi/drm/xe_drm.h     | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index b5cd980f81f94..e9c8c97a030f2 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -332,7 +332,6 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 	config->info[XE_QUERY_CONFIG_MIN_ALIGNMENT] =
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
 	config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
-	config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count;
 	config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
 		xe_exec_queue_device_get_max_priority(xe);
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1ac9ae0591dea..097d045d04445 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -325,8 +325,7 @@ struct drm_xe_query_config {
 	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
 #define XE_QUERY_CONFIG_MIN_ALIGNMENT		2
 #define XE_QUERY_CONFIG_VA_BITS			3
-#define XE_QUERY_CONFIG_GT_COUNT		4
-#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	5
+#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
 	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
-- 
GitLab


From be13336e07b5cc26c8b971a50ff6dc60d7050417 Mon Sep 17 00:00:00 2001
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Date: Fri, 10 Nov 2023 15:41:56 +0000
Subject: [PATCH 2077/2340] drm/xe/pmu: Drop interrupt pmu event

Drop interrupt event from PMU as that is not useful and not being used
by any UMD.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c       | 18 ------------------
 drivers/gpu/drm/xe/xe_pmu.c       | 19 +++++--------------
 drivers/gpu/drm/xe/xe_pmu_types.h |  8 --------
 include/uapi/drm/xe_drm.h         | 13 ++++++-------
 4 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index c5315e02fc5bb..25ba5167c1b92 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -27,20 +27,6 @@
 #define IIR(offset)				XE_REG(offset + 0x8)
 #define IER(offset)				XE_REG(offset + 0xc)
 
-/*
- * Interrupt statistic for PMU. Increments the counter only if the
- * interrupt originated from the GPU so interrupts from a device which
- * shares the interrupt line are not accounted.
- */
-static __always_inline void xe_pmu_irq_stats(struct xe_device *xe)
-{
-	/*
-	 * A clever compiler translates that into INC. A not so clever one
-	 * should at least prevent store tearing.
-	 */
-	WRITE_ONCE(xe->pmu.irq_count, xe->pmu.irq_count + 1);
-}
-
 static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
 {
 	u32 val = xe_mmio_read32(mmio, reg);
@@ -360,8 +346,6 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 
 	xe_display_irq_enable(xe, gu_misc_iir);
 
-	xe_pmu_irq_stats(xe);
-
 	return IRQ_HANDLED;
 }
 
@@ -458,8 +442,6 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 	dg1_intr_enable(xe, false);
 	xe_display_irq_enable(xe, gu_misc_iir);
 
-	xe_pmu_irq_stats(xe);
-
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index abfc0b3aeac41..b843259578fd2 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -61,7 +61,7 @@ static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type)
 
 static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config)
 {
-	int sample_type = config_counter(config) - 1;
+	int sample_type = config_counter(config);
 	const unsigned int gt_id = gt->info.id;
 	struct xe_device *xe = gt->tile->xe;
 	struct xe_pmu *pmu = &xe->pmu;
@@ -114,10 +114,6 @@ config_status(struct xe_device *xe, u64 config)
 		return -ENOENT;
 
 	switch (config_counter(config)) {
-	case XE_PMU_INTERRUPTS(0):
-		if (gt_id)
-			return -ENOENT;
-		break;
 	case XE_PMU_RENDER_GROUP_BUSY(0):
 	case XE_PMU_COPY_GROUP_BUSY(0):
 	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
@@ -181,13 +177,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
 	const unsigned int gt_id = config_gt_id(event->attr.config);
 	const u64 config = event->attr.config;
 	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
-	struct xe_pmu *pmu = &xe->pmu;
 	u64 val;
 
 	switch (config_counter(config)) {
-	case XE_PMU_INTERRUPTS(0):
-		val = READ_ONCE(pmu->irq_count);
-		break;
 	case XE_PMU_RENDER_GROUP_BUSY(0):
 	case XE_PMU_COPY_GROUP_BUSY(0):
 	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
@@ -361,11 +353,10 @@ create_event_attributes(struct xe_pmu *pmu)
 		const char *unit;
 		bool global;
 	} events[] = {
-		__global_event(0, "interrupts", NULL),
-		__event(1, "render-group-busy", "ns"),
-		__event(2, "copy-group-busy", "ns"),
-		__event(3, "media-group-busy", "ns"),
-		__event(4, "any-engine-group-busy", "ns"),
+		__event(0, "render-group-busy", "ns"),
+		__event(1, "copy-group-busy", "ns"),
+		__event(2, "media-group-busy", "ns"),
+		__event(3, "any-engine-group-busy", "ns"),
 	};
 
 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
index 4ccc7e9042f64..9cadbd243f577 100644
--- a/drivers/gpu/drm/xe/xe_pmu_types.h
+++ b/drivers/gpu/drm/xe/xe_pmu_types.h
@@ -51,14 +51,6 @@ struct xe_pmu {
 	 *
 	 */
 	u64 sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS];
-	/**
-	 * @irq_count: Number of interrupts
-	 *
-	 * Intentionally unsigned long to avoid atomics or heuristics on 32bit.
-	 * 4e9 interrupts are a lot and postprocessing can really deal with an
-	 * occasional wraparound easily. It's 32bit after all.
-	 */
-	unsigned long irq_count;
 	/**
 	 * @events_attr_group: Device events attribute group.
 	 */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 097d045d04445..e007dbefd6274 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -977,7 +977,7 @@ struct drm_xe_wait_user_fence {
  * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
  * particular event.
  *
- * For example to open the XE_PMU_INTERRUPTS(0):
+ * For example to open the XE_PMU_RENDER_GROUP_BUSY(0):
  *
  * .. code-block:: C
  *
@@ -991,7 +991,7 @@ struct drm_xe_wait_user_fence {
  *	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
  *	attr.use_clockid = 1;
  *	attr.clockid = CLOCK_MONOTONIC;
- *	attr.config = XE_PMU_INTERRUPTS(0);
+ *	attr.config = XE_PMU_RENDER_GROUP_BUSY(0);
  *
  *	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
  */
@@ -1004,11 +1004,10 @@ struct drm_xe_wait_user_fence {
 #define ___XE_PMU_OTHER(gt, x) \
 	(((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT))
 
-#define XE_PMU_INTERRUPTS(gt)			___XE_PMU_OTHER(gt, 0)
-#define XE_PMU_RENDER_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 1)
-#define XE_PMU_COPY_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 2)
-#define XE_PMU_MEDIA_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 3)
-#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___XE_PMU_OTHER(gt, 4)
+#define XE_PMU_RENDER_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 0)
+#define XE_PMU_COPY_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 1)
+#define XE_PMU_MEDIA_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 2)
+#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___XE_PMU_OTHER(gt, 3)
 
 #if defined(__cplusplus)
 }
-- 
GitLab


From b646ce9ce99f74d3dee8fd56303b9255d3c278ec Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Mon, 13 Nov 2023 16:49:43 -0800
Subject: [PATCH 2078/2340] drm/xe: Make xe_mmio_tile_vram_size() static

During xe_mmio_probe_vram(), we already store the values returned from
xe_mmio_tile_vram_size() into the xe_tile structures.
There is no need to call xe_mmio_tile_vram_size() again later during
setup of the STOLEN region. Just use the values stored in the root tile.

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c           | 3 ++-
 drivers/gpu/drm/xe/xe_mmio.h           | 1 -
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 7 ++-----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index d8f9fabf715e9..0baaef53f3a72 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -201,7 +201,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
  * NOTE: multi-tile bases will include the tile offset.
  *
  */
-int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_offset)
+static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
+				  u64 *tile_size, u64 *tile_offset)
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_gt *gt = tile->primary_gt;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 24a23dad7dce6..b244e9063caaf 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -135,7 +135,6 @@ static inline bool xe_mmio_in_range(const struct xe_gt *gt,
 }
 
 int xe_mmio_probe_vram(struct xe_device *xe);
-int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base);
 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 0c533d36791d9..837b522cb91fe 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -62,12 +62,9 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	u64 stolen_size;
 	u64 tile_offset;
 	u64 tile_size;
-	u64 vram_size;
 
-	if (xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset)) {
-		drm_err(&xe->drm, "Querying total vram size failed\n");
-		return 0;
-	}
+	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
+	tile_size = tile->mem.vram.actual_physical_size;
 
 	/* Use DSM base address instead for stolen memory */
 	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
-- 
GitLab


From d5dc73dbd148ef38dbe35f18d2908d2ff343c208 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 14 Nov 2023 13:34:27 +0000
Subject: [PATCH 2079/2340] drm/xe/uapi: Add missing DRM_ prefix in uAPI
 constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most constants defined in xe_drm.h use DRM_XE_ as prefix which is
helpful to identify the name space. Make this systematic and add
this prefix where it was missing.

v2:
- fix vertical alignment of define values
- remove double DRM_ in some variables (José Roberto de Souza)

v3: Rebase

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c         |  14 ++--
 drivers/gpu/drm/xe/xe_exec_queue.c |  22 ++---
 drivers/gpu/drm/xe/xe_gt.c         |   2 +-
 drivers/gpu/drm/xe/xe_pmu.c        |  24 +++---
 drivers/gpu/drm/xe/xe_query.c      |  28 +++----
 drivers/gpu/drm/xe/xe_vm.c         |  54 ++++++-------
 drivers/gpu/drm/xe/xe_vm_doc.h     |  12 +--
 include/uapi/drm/xe_drm.h          | 124 ++++++++++++++---------------
 8 files changed, 140 insertions(+), 140 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 5b5f764586fee..e8c89b6e06dc1 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -209,7 +209,7 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 
 	/* The order of placements should indicate preferred location */
 
-	if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) {
+	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
 		try_add_system(bo, places, bo_flags, &c);
 		try_add_vram(xe, bo, places, bo_flags, &c);
 	} else {
@@ -1814,9 +1814,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, args->flags &
-			 ~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
-			   XE_GEM_CREATE_FLAG_SCANOUT |
-			   XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
+			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
+			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
+			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
 			   xe->info.mem_region_mask)))
 		return -EINVAL;
 
@@ -1836,15 +1836,15 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
 		return -EINVAL;
 
-	if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING)
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
 		bo_flags |= XE_BO_DEFER_BACKING;
 
-	if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT)
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
 		bo_flags |= XE_BO_SCANOUT_BIT;
 
 	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
 
-	if (args->flags & XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
 			return -EINVAL;
 
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index f67a6dee4a6f5..fbb4d3cca9f68 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -393,7 +393,7 @@ static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_q
 	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
 		return -EINVAL;
 
-	if (value > XE_ACC_GRANULARITY_64M)
+	if (value > DRM_XE_ACC_GRANULARITY_64M)
 		return -EINVAL;
 
 	q->usm.acc_granularity = value;
@@ -406,14 +406,14 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
 					     u64 value, bool create);
 
 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
-	[XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
-	[XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
-	[XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
-	[XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
-	[XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
-	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
-	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
-	[XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
 };
 
 static int exec_queue_user_ext_set_property(struct xe_device *xe,
@@ -445,7 +445,7 @@ typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
 					       bool create);
 
 static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
-	[XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
+	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
 };
 
 #define MAX_USER_EXTENSIONS	16
@@ -764,7 +764,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	switch (args->property) {
-	case XE_EXEC_QUEUE_GET_PROPERTY_BAN:
+	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
 		args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED);
 		ret = 0;
 		break;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 6c885dde5d59c..53b39fe916019 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -560,7 +560,7 @@ static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_i
 {
 	char *reset_event[4];
 
-	reset_event[0] = XE_RESET_FAILED_UEVENT "=NEEDS_RESET";
+	reset_event[0] = DRM_XE_RESET_FAILED_UEVENT "=NEEDS_RESET";
 	reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id);
 	reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id);
 	reset_event[3] = NULL;
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index b843259578fd2..9d0b7887cfc45 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -17,12 +17,12 @@ static unsigned int xe_pmu_target_cpu = -1;
 
 static unsigned int config_gt_id(const u64 config)
 {
-	return config >> __XE_PMU_GT_SHIFT;
+	return config >> __DRM_XE_PMU_GT_SHIFT;
 }
 
 static u64 config_counter(const u64 config)
 {
-	return config & ~(~0ULL << __XE_PMU_GT_SHIFT);
+	return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
 }
 
 static void xe_pmu_event_destroy(struct perf_event *event)
@@ -114,13 +114,13 @@ config_status(struct xe_device *xe, u64 config)
 		return -ENOENT;
 
 	switch (config_counter(config)) {
-	case XE_PMU_RENDER_GROUP_BUSY(0):
-	case XE_PMU_COPY_GROUP_BUSY(0):
-	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
+	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
+	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
+	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
 		if (gt->info.type == XE_GT_TYPE_MEDIA)
 			return -ENOENT;
 		break;
-	case XE_PMU_MEDIA_GROUP_BUSY(0):
+	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
 		if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
 			return -ENOENT;
 		break;
@@ -180,10 +180,10 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
 	u64 val;
 
 	switch (config_counter(config)) {
-	case XE_PMU_RENDER_GROUP_BUSY(0):
-	case XE_PMU_COPY_GROUP_BUSY(0):
-	case XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
-	case XE_PMU_MEDIA_GROUP_BUSY(0):
+	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
+	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
+	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
+	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
 		val = engine_group_busyness_read(gt, config);
 		break;
 	default:
@@ -369,7 +369,7 @@ create_event_attributes(struct xe_pmu *pmu)
 	/* Count how many counters we will be exposing. */
 	for_each_gt(gt, xe, j) {
 		for (i = 0; i < ARRAY_SIZE(events); i++) {
-			u64 config = ___XE_PMU_OTHER(j, events[i].counter);
+			u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter);
 
 			if (!config_status(xe, config))
 				count++;
@@ -396,7 +396,7 @@ create_event_attributes(struct xe_pmu *pmu)
 
 	for_each_gt(gt, xe, j) {
 		for (i = 0; i < ARRAY_SIZE(events); i++) {
-			u64 config = ___XE_PMU_OTHER(j, events[i].counter);
+			u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter);
 			char *str;
 
 			if (config_status(xe, config))
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index e9c8c97a030f2..565a716302bbb 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -261,7 +261,7 @@ static int query_memory_usage(struct xe_device *xe,
 		return -ENOMEM;
 
 	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
-	usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM;
+	usage->regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
 	usage->regions[0].instance = 0;
 	usage->regions[0].min_page_size = PAGE_SIZE;
 	usage->regions[0].total_size = man->size << PAGE_SHIFT;
@@ -273,7 +273,7 @@ static int query_memory_usage(struct xe_device *xe,
 		man = ttm_manager_type(&xe->ttm, i);
 		if (man) {
 			usage->regions[usage->num_regions].mem_class =
-				XE_MEM_REGION_CLASS_VRAM;
+				DRM_XE_MEM_REGION_CLASS_VRAM;
 			usage->regions[usage->num_regions].instance =
 				usage->num_regions;
 			usage->regions[usage->num_regions].min_page_size =
@@ -305,7 +305,7 @@ static int query_memory_usage(struct xe_device *xe,
 
 static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 {
-	const u32 num_params = XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1;
+	const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1;
 	size_t size =
 		sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
 	struct drm_xe_query_config __user *query_ptr =
@@ -324,15 +324,15 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 		return -ENOMEM;
 
 	config->num_params = num_params;
-	config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
+	config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
 		xe->info.devid | (xe->info.revid << 16);
 	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
-		config->info[XE_QUERY_CONFIG_FLAGS] =
-			XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
-	config->info[XE_QUERY_CONFIG_MIN_ALIGNMENT] =
+		config->info[DRM_XE_QUERY_CONFIG_FLAGS] =
+			DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
+	config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
-	config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
-	config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
+	config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
+	config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
 		xe_exec_queue_device_get_max_priority(xe);
 
 	if (copy_to_user(query_ptr, config, size)) {
@@ -368,9 +368,9 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 	gt_list->num_gt = xe->info.gt_count;
 	for_each_gt(gt, xe, id) {
 		if (xe_gt_is_media_type(gt))
-			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MEDIA;
+			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
 		else
-			gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MAIN;
+			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
 		gt_list->gt_list[id].gt_id = gt->info.id;
 		gt_list->gt_list[id].clock_freq = gt->info.clock_freq;
 		if (!IS_DGFX(xe))
@@ -468,21 +468,21 @@ static int query_gt_topology(struct xe_device *xe,
 	for_each_gt(gt, xe, id) {
 		topo.gt_id = id;
 
-		topo.type = XE_TOPO_DSS_GEOMETRY;
+		topo.type = DRM_XE_TOPO_DSS_GEOMETRY;
 		query_ptr = copy_mask(query_ptr, &topo,
 				      gt->fuse_topo.g_dss_mask,
 				      sizeof(gt->fuse_topo.g_dss_mask));
 		if (IS_ERR(query_ptr))
 			return PTR_ERR(query_ptr);
 
-		topo.type = XE_TOPO_DSS_COMPUTE;
+		topo.type = DRM_XE_TOPO_DSS_COMPUTE;
 		query_ptr = copy_mask(query_ptr, &topo,
 				      gt->fuse_topo.c_dss_mask,
 				      sizeof(gt->fuse_topo.c_dss_mask));
 		if (IS_ERR(query_ptr))
 			return PTR_ERR(query_ptr);
 
-		topo.type = XE_TOPO_EU_PER_DSS;
+		topo.type = DRM_XE_TOPO_EU_PER_DSS;
 		query_ptr = copy_mask(query_ptr, &topo,
 				      gt->fuse_topo.eu_mask_per_dss,
 				      sizeof(gt->fuse_topo.eu_mask_per_dss));
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index b4a4ed28019c8..66d878bc464a5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2177,8 +2177,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 	       (ULL)bo_offset_or_userptr);
 
 	switch (operation) {
-	case XE_VM_BIND_OP_MAP:
-	case XE_VM_BIND_OP_MAP_USERPTR:
+	case DRM_XE_VM_BIND_OP_MAP:
+	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
 						  obj, bo_offset_or_userptr);
 		if (IS_ERR(ops))
@@ -2189,13 +2189,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 
 			op->tile_mask = tile_mask;
 			op->map.immediate =
-				flags & XE_VM_BIND_FLAG_IMMEDIATE;
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
 			op->map.read_only =
-				flags & XE_VM_BIND_FLAG_READONLY;
-			op->map.is_null = flags & XE_VM_BIND_FLAG_NULL;
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
 		}
 		break;
-	case XE_VM_BIND_OP_UNMAP:
+	case DRM_XE_VM_BIND_OP_UNMAP:
 		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
 		if (IS_ERR(ops))
 			return ops;
@@ -2206,7 +2206,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			op->tile_mask = tile_mask;
 		}
 		break;
-	case XE_VM_BIND_OP_PREFETCH:
+	case DRM_XE_VM_BIND_OP_PREFETCH:
 		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
 		if (IS_ERR(ops))
 			return ops;
@@ -2218,7 +2218,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			op->prefetch.region = region;
 		}
 		break;
-	case XE_VM_BIND_OP_UNMAP_ALL:
+	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
 		xe_assert(vm->xe, bo);
 
 		err = xe_bo_lock(bo, true);
@@ -2828,13 +2828,13 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 
 #ifdef TEST_VM_ASYNC_OPS_ERROR
 #define SUPPORTED_FLAGS	\
-	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
-	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
-	 XE_VM_BIND_FLAG_NULL | 0xffff)
+	(FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \
+	 DRM_XE_VM_BIND_FLAG_READONLY | DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
+	 DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
 #else
 #define SUPPORTED_FLAGS	\
-	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
-	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | \
+	(DRM_XE_VM_BIND_FLAG_ASYNC | DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
 	 0xffff)
 #endif
 #define XE_64K_PAGE_MASK 0xffffull
@@ -2882,45 +2882,45 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u32 obj = (*bind_ops)[i].obj;
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
 		u32 region = (*bind_ops)[i].region;
-		bool is_null = flags & XE_VM_BIND_FLAG_NULL;
+		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
 
 		if (i == 0) {
-			*async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
+			*async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC);
 			if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) {
 				err = -EINVAL;
 				goto free_bind_ops;
 			}
 		} else if (XE_IOCTL_DBG(xe, *async !=
-					!!(flags & XE_VM_BIND_FLAG_ASYNC))) {
+					!!(flags & DRM_XE_VM_BIND_FLAG_ASYNC))) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
 
-		if (XE_IOCTL_DBG(xe, op > XE_VM_BIND_OP_PREFETCH) ||
+		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
 		    XE_IOCTL_DBG(xe, obj && is_null) ||
 		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
-		    XE_IOCTL_DBG(xe, op != XE_VM_BIND_OP_MAP &&
+		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
 				 is_null) ||
 		    XE_IOCTL_DBG(xe, !obj &&
-				 op == XE_VM_BIND_OP_MAP &&
+				 op == DRM_XE_VM_BIND_OP_MAP &&
 				 !is_null) ||
 		    XE_IOCTL_DBG(xe, !obj &&
-				 op == XE_VM_BIND_OP_UNMAP_ALL) ||
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, addr &&
-				 op == XE_VM_BIND_OP_UNMAP_ALL) ||
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, range &&
-				 op == XE_VM_BIND_OP_UNMAP_ALL) ||
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, obj &&
-				 op == XE_VM_BIND_OP_MAP_USERPTR) ||
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
 		    XE_IOCTL_DBG(xe, obj &&
-				 op == XE_VM_BIND_OP_PREFETCH) ||
+				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, region &&
-				 op != XE_VM_BIND_OP_PREFETCH) ||
+				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, !(BIT(region) &
 				       xe->info.mem_region_mask)) ||
 		    XE_IOCTL_DBG(xe, obj &&
-				 op == XE_VM_BIND_OP_UNMAP)) {
+				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
@@ -2929,7 +2929,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
 		    XE_IOCTL_DBG(xe, !range &&
-				 op != XE_VM_BIND_OP_UNMAP_ALL)) {
+				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
 			err = -EINVAL;
 			goto free_bind_ops;
 		}
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index b1b2dc4a6089c..516f4dc972236 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -32,9 +32,9 @@
  * Operations
  * ----------
  *
- * XE_VM_BIND_OP_MAP		- Create mapping for a BO
- * XE_VM_BIND_OP_UNMAP		- Destroy mapping for a BO / userptr
- * XE_VM_BIND_OP_MAP_USERPTR	- Create mapping for userptr
+ * DRM_XE_VM_BIND_OP_MAP		- Create mapping for a BO
+ * DRM_XE_VM_BIND_OP_UNMAP		- Destroy mapping for a BO / userptr
+ * DRM_XE_VM_BIND_OP_MAP_USERPTR	- Create mapping for userptr
  *
  * Implementation details
  * ~~~~~~~~~~~~~~~~~~~~~~
@@ -113,7 +113,7 @@
  * VM uses to report errors to. The ufence wait interface can be used to wait on
  * a VM going into an error state. Once an error is reported the VM's async
  * worker is paused. While the VM's async worker is paused sync,
- * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
+ * DRM_XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
  * uses believe the error state is fixed, the async worker can be resumed via
  * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the
  * first operation processed is the operation that caused the original error.
@@ -193,7 +193,7 @@
  * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
  * create mappings are by default are deferred to the page fault handler (first
  * use). This behavior can be overriden by setting the flag
- * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
+ * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
  * immediately.
  *
  * User pointer
@@ -322,7 +322,7 @@
  *
  * By default, on a faulting VM binds just allocate the VMA and the actual
  * updating of the page tables is defered to the page fault handler. This
- * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in
+ * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in
  * the VM bind which will then do the bind immediately.
  *
  * Page fault handler
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index e007dbefd6274..3ef49e3baaed5 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -19,12 +19,12 @@ extern "C" {
 /**
  * DOC: uevent generated by xe on it's pci node.
  *
- * XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
+ * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
  * fails. The value supplied with the event is always "NEEDS_RESET".
  * Additional information supplied is tile id and gt id of the gt unit for
  * which reset has failed.
  */
-#define XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
+#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
 
 /**
  * struct xe_user_extension - Base class for defining a chain of extensions
@@ -148,14 +148,14 @@ struct drm_xe_engine_class_instance {
  * enum drm_xe_memory_class - Supported memory classes.
  */
 enum drm_xe_memory_class {
-	/** @XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */
-	XE_MEM_REGION_CLASS_SYSMEM = 0,
+	/** @DRM_XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */
+	DRM_XE_MEM_REGION_CLASS_SYSMEM = 0,
 	/**
-	 * @XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this
+	 * @DRM_XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this
 	 * represents the memory that is local to the device, which we
 	 * call VRAM. Not valid on integrated platforms.
 	 */
-	XE_MEM_REGION_CLASS_VRAM
+	DRM_XE_MEM_REGION_CLASS_VRAM
 };
 
 /**
@@ -215,7 +215,7 @@ struct drm_xe_query_mem_region {
 	 * always equal the @total_size, since all of it will be CPU
 	 * accessible.
 	 *
-	 * Note this is only tracked for XE_MEM_REGION_CLASS_VRAM
+	 * Note this is only tracked for DRM_XE_MEM_REGION_CLASS_VRAM
 	 * regions (for other types the value here will always equal
 	 * zero).
 	 */
@@ -227,7 +227,7 @@ struct drm_xe_query_mem_region {
 	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
 	 * accounting. Without this the value here will always equal
 	 * zero.  Note this is only currently tracked for
-	 * XE_MEM_REGION_CLASS_VRAM regions (for other types the value
+	 * DRM_XE_MEM_REGION_CLASS_VRAM regions (for other types the value
 	 * here will always be zero).
 	 */
 	__u64 cpu_visible_used;
@@ -320,12 +320,12 @@ struct drm_xe_query_config {
 	/** @pad: MBZ */
 	__u32 pad;
 
-#define XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
-#define XE_QUERY_CONFIG_FLAGS			1
-	#define XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
-#define XE_QUERY_CONFIG_MIN_ALIGNMENT		2
-#define XE_QUERY_CONFIG_VA_BITS			3
-#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
+#define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
+#define DRM_XE_QUERY_CONFIG_FLAGS			1
+	#define DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
+#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT		2
+#define DRM_XE_QUERY_CONFIG_VA_BITS			3
+#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
 	/** @info: array of elements containing the config info */
 	__u64 info[];
 };
@@ -339,8 +339,8 @@ struct drm_xe_query_config {
  * implementing graphics and/or media operations.
  */
 struct drm_xe_query_gt {
-#define XE_QUERY_GT_TYPE_MAIN		0
-#define XE_QUERY_GT_TYPE_MEDIA		1
+#define DRM_XE_QUERY_GT_TYPE_MAIN		0
+#define DRM_XE_QUERY_GT_TYPE_MEDIA		1
 	/** @type: GT type: Main or Media */
 	__u16 type;
 	/** @gt_id: Unique ID of this GT within the PCI Device */
@@ -400,7 +400,7 @@ struct drm_xe_query_topology_mask {
 	 *   DSS_GEOMETRY    ff ff ff ff 00 00 00 00
 	 * means 32 DSS are available for geometry.
 	 */
-#define XE_TOPO_DSS_GEOMETRY	(1 << 0)
+#define DRM_XE_TOPO_DSS_GEOMETRY	(1 << 0)
 	/*
 	 * To query the mask of Dual Sub Slices (DSS) available for compute
 	 * operations. For example a query response containing the following
@@ -408,7 +408,7 @@ struct drm_xe_query_topology_mask {
 	 *   DSS_COMPUTE    ff ff ff ff 00 00 00 00
 	 * means 32 DSS are available for compute.
 	 */
-#define XE_TOPO_DSS_COMPUTE	(1 << 1)
+#define DRM_XE_TOPO_DSS_COMPUTE		(1 << 1)
 	/*
 	 * To query the mask of Execution Units (EU) available per Dual Sub
 	 * Slices (DSS). For example a query response containing the following
@@ -416,7 +416,7 @@ struct drm_xe_query_topology_mask {
 	 *   EU_PER_DSS    ff ff 00 00 00 00 00 00
 	 * means each DSS has 16 EU.
 	 */
-#define XE_TOPO_EU_PER_DSS	(1 << 2)
+#define DRM_XE_TOPO_EU_PER_DSS		(1 << 2)
 	/** @type: type of mask */
 	__u16 type;
 
@@ -497,8 +497,8 @@ struct drm_xe_gem_create {
 	 */
 	__u64 size;
 
-#define XE_GEM_CREATE_FLAG_DEFER_BACKING	(0x1 << 24)
-#define XE_GEM_CREATE_FLAG_SCANOUT		(0x1 << 25)
+#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING		(0x1 << 24)
+#define DRM_XE_GEM_CREATE_FLAG_SCANOUT			(0x1 << 25)
 /*
  * When using VRAM as a possible placement, ensure that the corresponding VRAM
  * allocation will always use the CPU accessible part of VRAM. This is important
@@ -514,7 +514,7 @@ struct drm_xe_gem_create {
  * display surfaces, therefore the kernel requires setting this flag for such
  * objects, otherwise an error is thrown on small-bar systems.
  */
-#define XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(0x1 << 26)
+#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(0x1 << 26)
 	/**
 	 * @flags: Flags, currently a mask of memory instances of where BO can
 	 * be placed
@@ -581,14 +581,14 @@ struct drm_xe_ext_set_property {
 };
 
 struct drm_xe_vm_create {
-#define XE_VM_EXTENSION_SET_PROPERTY	0
+#define DRM_XE_VM_EXTENSION_SET_PROPERTY	0
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-#define DRM_XE_VM_CREATE_SCRATCH_PAGE	(0x1 << 0)
-#define DRM_XE_VM_CREATE_COMPUTE_MODE	(0x1 << 1)
-#define DRM_XE_VM_CREATE_ASYNC_DEFAULT	(0x1 << 2)
-#define DRM_XE_VM_CREATE_FAULT_MODE	(0x1 << 3)
+#define DRM_XE_VM_CREATE_SCRATCH_PAGE		(0x1 << 0)
+#define DRM_XE_VM_CREATE_COMPUTE_MODE		(0x1 << 1)
+#define DRM_XE_VM_CREATE_ASYNC_DEFAULT		(0x1 << 2)
+#define DRM_XE_VM_CREATE_FAULT_MODE		(0x1 << 3)
 	/** @flags: Flags */
 	__u32 flags;
 
@@ -644,29 +644,29 @@ struct drm_xe_vm_bind_op {
 	 */
 	__u64 tile_mask;
 
-#define XE_VM_BIND_OP_MAP		0x0
-#define XE_VM_BIND_OP_UNMAP		0x1
-#define XE_VM_BIND_OP_MAP_USERPTR	0x2
-#define XE_VM_BIND_OP_UNMAP_ALL		0x3
-#define XE_VM_BIND_OP_PREFETCH		0x4
+#define DRM_XE_VM_BIND_OP_MAP		0x0
+#define DRM_XE_VM_BIND_OP_UNMAP		0x1
+#define DRM_XE_VM_BIND_OP_MAP_USERPTR	0x2
+#define DRM_XE_VM_BIND_OP_UNMAP_ALL	0x3
+#define DRM_XE_VM_BIND_OP_PREFETCH	0x4
 	/** @op: Bind operation to perform */
 	__u32 op;
 
-#define XE_VM_BIND_FLAG_READONLY	(0x1 << 0)
-#define XE_VM_BIND_FLAG_ASYNC		(0x1 << 1)
+#define DRM_XE_VM_BIND_FLAG_READONLY	(0x1 << 0)
+#define DRM_XE_VM_BIND_FLAG_ASYNC	(0x1 << 1)
 	/*
 	 * Valid on a faulting VM only, do the MAP operation immediately rather
 	 * than deferring the MAP to the page fault handler.
 	 */
-#define XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 2)
+#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 2)
 	/*
 	 * When the NULL flag is set, the page tables are setup with a special
 	 * bit which indicates writes are dropped and all reads return zero.  In
-	 * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
+	 * the future, the NULL flags will only be valid for DRM_XE_VM_BIND_OP_MAP
 	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
 	 * intended to implement VK sparse bindings.
 	 */
-#define XE_VM_BIND_FLAG_NULL		(0x1 << 3)
+#define DRM_XE_VM_BIND_FLAG_NULL	(0x1 << 3)
 	/** @flags: Bind flags */
 	__u32 flags;
 
@@ -721,19 +721,19 @@ struct drm_xe_vm_bind {
 	__u64 reserved[2];
 };
 
-/* For use with XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */
+/* For use with DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */
 
 /* Monitor 128KB contiguous region with 4K sub-granularity */
-#define XE_ACC_GRANULARITY_128K 0
+#define DRM_XE_ACC_GRANULARITY_128K 0
 
 /* Monitor 2MB contiguous region with 64KB sub-granularity */
-#define XE_ACC_GRANULARITY_2M 1
+#define DRM_XE_ACC_GRANULARITY_2M 1
 
 /* Monitor 16MB contiguous region with 512KB sub-granularity */
-#define XE_ACC_GRANULARITY_16M 2
+#define DRM_XE_ACC_GRANULARITY_16M 2
 
 /* Monitor 64MB contiguous region with 2M sub-granularity */
-#define XE_ACC_GRANULARITY_64M 3
+#define DRM_XE_ACC_GRANULARITY_64M 3
 
 /**
  * struct drm_xe_exec_queue_set_property - exec queue set property
@@ -747,14 +747,14 @@ struct drm_xe_exec_queue_set_property {
 	/** @exec_queue_id: Exec queue ID */
 	__u32 exec_queue_id;
 
-#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY		0
-#define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
-#define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
-#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		3
-#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		4
-#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
-#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
-#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY	7
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY			0
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		3
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		4
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY		7
 	/** @property: property to set */
 	__u32 property;
 
@@ -766,7 +766,7 @@ struct drm_xe_exec_queue_set_property {
 };
 
 struct drm_xe_exec_queue_create {
-#define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
+#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
@@ -805,7 +805,7 @@ struct drm_xe_exec_queue_get_property {
 	/** @exec_queue_id: Exec queue ID */
 	__u32 exec_queue_id;
 
-#define XE_EXEC_QUEUE_GET_PROPERTY_BAN			0
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN	0
 	/** @property: property to get */
 	__u32 property;
 
@@ -973,11 +973,11 @@ struct drm_xe_wait_user_fence {
 /**
  * DOC: XE PMU event config IDs
  *
- * Check 'man perf_event_open' to use the ID's XE_PMU_XXXX listed in xe_drm.h
+ * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
  * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
  * particular event.
  *
- * For example to open the XE_PMU_RENDER_GROUP_BUSY(0):
+ * For example to open the DRMXE_PMU_RENDER_GROUP_BUSY(0):
  *
  * .. code-block:: C
  *
@@ -991,7 +991,7 @@ struct drm_xe_wait_user_fence {
  *	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
  *	attr.use_clockid = 1;
  *	attr.clockid = CLOCK_MONOTONIC;
- *	attr.config = XE_PMU_RENDER_GROUP_BUSY(0);
+ *	attr.config = DRM_XE_PMU_RENDER_GROUP_BUSY(0);
  *
  *	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
  */
@@ -999,15 +999,15 @@ struct drm_xe_wait_user_fence {
 /*
  * Top bits of every counter are GT id.
  */
-#define __XE_PMU_GT_SHIFT (56)
+#define __DRM_XE_PMU_GT_SHIFT (56)
 
-#define ___XE_PMU_OTHER(gt, x) \
-	(((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT))
+#define ___DRM_XE_PMU_OTHER(gt, x) \
+	(((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
 
-#define XE_PMU_RENDER_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 0)
-#define XE_PMU_COPY_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 1)
-#define XE_PMU_MEDIA_GROUP_BUSY(gt)		___XE_PMU_OTHER(gt, 2)
-#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___XE_PMU_OTHER(gt, 3)
+#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 0)
+#define DRM_XE_PMU_COPY_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 1)
+#define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 2)
+#define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 3)
 
 #if defined(__cplusplus)
 }
-- 
GitLab


From 3ac4a7896d1c02918ee76acaf7e8160f3d11fa75 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 14 Nov 2023 13:34:28 +0000
Subject: [PATCH 2080/2340] drm/xe/uapi: Add _FLAG to uAPI constants usable for
 flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most constants defined in xe_drm.h which can be used for flags are
named DRM_XE_*_FLAG_*, which is helpful to identify them. Make this
systematic and add _FLAG where it was missing.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_sync.c            | 16 ++++++-------
 drivers/gpu/drm/xe/xe_vm.c              | 32 ++++++++++++-------------
 drivers/gpu/drm/xe/xe_vm_doc.h          |  2 +-
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 10 ++++----
 include/uapi/drm/xe_drm.h               | 30 +++++++++++------------
 5 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 73ef259aa3873..eafe53c2f55df 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -110,14 +110,14 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		return -EFAULT;
 
 	if (XE_IOCTL_DBG(xe, sync_in.flags &
-			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)) ||
+			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_FLAG_SIGNAL)) ||
 	    XE_IOCTL_DBG(xe, sync_in.pad) ||
 	    XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1]))
 		return -EINVAL;
 
-	signal = sync_in.flags & DRM_XE_SYNC_SIGNAL;
+	signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL;
 	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
-	case DRM_XE_SYNC_SYNCOBJ:
+	case DRM_XE_SYNC_FLAG_SYNCOBJ:
 		if (XE_IOCTL_DBG(xe, no_dma_fences && signal))
 			return -EOPNOTSUPP;
 
@@ -135,7 +135,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		}
 		break;
 
-	case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
+	case DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ:
 		if (XE_IOCTL_DBG(xe, no_dma_fences && signal))
 			return -EOPNOTSUPP;
 
@@ -165,12 +165,12 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		}
 		break;
 
-	case DRM_XE_SYNC_DMA_BUF:
+	case DRM_XE_SYNC_FLAG_DMA_BUF:
 		if (XE_IOCTL_DBG(xe, "TODO"))
 			return -EINVAL;
 		break;
 
-	case DRM_XE_SYNC_USER_FENCE:
+	case DRM_XE_SYNC_FLAG_USER_FENCE:
 		if (XE_IOCTL_DBG(xe, !signal))
 			return -EOPNOTSUPP;
 
@@ -225,7 +225,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
 void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
 			  struct dma_fence *fence)
 {
-	if (!(sync->flags & DRM_XE_SYNC_SIGNAL))
+	if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
 		return;
 
 	if (sync->chain_fence) {
@@ -253,7 +253,7 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
 			dma_fence_put(fence);
 		}
 	} else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) ==
-		   DRM_XE_SYNC_USER_FENCE) {
+		   DRM_XE_SYNC_FLAG_USER_FENCE) {
 		job->user_fence.used = true;
 		job->user_fence.addr = sync->addr;
 		job->user_fence.value = sync->timeline_value;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 66d878bc464a5..e8dd467895377 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1920,10 +1920,10 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	return 0;
 }
 
-#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
-				    DRM_XE_VM_CREATE_COMPUTE_MODE | \
-				    DRM_XE_VM_CREATE_ASYNC_DEFAULT | \
-				    DRM_XE_VM_CREATE_FAULT_MODE)
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE | \
+				    DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \
+				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
 
 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file)
@@ -1941,9 +1941,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
-		args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE;
+		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
 
-	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
 			 !xe->info.supports_usm))
 		return -EINVAL;
 
@@ -1953,32 +1953,32 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
-			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
-			 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
 			 xe_device_in_non_fault_mode(xe)))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
 			 xe_device_in_fault_mode(xe)))
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, args->extensions))
 		return -EINVAL;
 
-	if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
-	if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE)
 		flags |= XE_VM_FLAG_COMPUTE_MODE;
-	if (args->flags & DRM_XE_VM_CREATE_ASYNC_DEFAULT)
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
 		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
-	if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
 		flags |= XE_VM_FLAG_FAULT_MODE;
 
 	vm = xe_vm_create(xe, flags);
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index 516f4dc972236..bdc6659891a57 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -18,7 +18,7 @@
  * Scratch page
  * ------------
  *
- * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the
+ * If the VM is created with the flag, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, set the
  * entire page table structure defaults pointing to blank page allocated by the
  * VM. Invalid memory access rather than fault just read / write to this page.
  *
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 78686908f7fbb..13562db6c07f9 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -79,8 +79,8 @@ static int check_hw_engines(struct xe_device *xe,
 	return 0;
 }
 
-#define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_SOFT_OP | \
-			 DRM_XE_UFENCE_WAIT_ABSTIME)
+#define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \
+			 DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
 #define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
 
 static long to_jiffies_timeout(struct xe_device *xe,
@@ -107,7 +107,7 @@ static long to_jiffies_timeout(struct xe_device *xe,
 	 * Save the timeout to an u64 variable because nsecs_to_jiffies
 	 * might return a value that overflows s32 variable.
 	 */
-	if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
+	if (args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
 		t = drm_timeout_abs_to_jiffies(args->timeout);
 	else
 		t = nsecs_to_jiffies(args->timeout);
@@ -137,7 +137,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 		u64_to_user_ptr(args->instances);
 	u64 addr = args->addr;
 	int err;
-	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP;
+	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP;
 	long timeout;
 	ktime_t start;
 
@@ -206,7 +206,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	}
 	remove_wait_queue(&xe->ufence_wq, &w_wait);
 
-	if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) {
+	if (!(args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)) {
 		args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start));
 		if (args->timeout < 0)
 			args->timeout = 0;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 3ef49e3baaed5..f6346a8351e49 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -585,10 +585,10 @@ struct drm_xe_vm_create {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-#define DRM_XE_VM_CREATE_SCRATCH_PAGE		(0x1 << 0)
-#define DRM_XE_VM_CREATE_COMPUTE_MODE		(0x1 << 1)
-#define DRM_XE_VM_CREATE_ASYNC_DEFAULT		(0x1 << 2)
-#define DRM_XE_VM_CREATE_FAULT_MODE		(0x1 << 3)
+#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE	(0x1 << 0)
+#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE	(0x1 << 1)
+#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT	(0x1 << 2)
+#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(0x1 << 3)
 	/** @flags: Flags */
 	__u32 flags;
 
@@ -831,11 +831,11 @@ struct drm_xe_sync {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-#define DRM_XE_SYNC_SYNCOBJ		0x0
-#define DRM_XE_SYNC_TIMELINE_SYNCOBJ	0x1
-#define DRM_XE_SYNC_DMA_BUF		0x2
-#define DRM_XE_SYNC_USER_FENCE		0x3
-#define DRM_XE_SYNC_SIGNAL		0x10
+#define DRM_XE_SYNC_FLAG_SYNCOBJ		0x0
+#define DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ	0x1
+#define DRM_XE_SYNC_FLAG_DMA_BUF		0x2
+#define DRM_XE_SYNC_FLAG_USER_FENCE		0x3
+#define DRM_XE_SYNC_FLAG_SIGNAL		0x10
 	__u32 flags;
 
 	/** @pad: MBZ */
@@ -921,8 +921,8 @@ struct drm_xe_wait_user_fence {
 	/** @op: wait operation (type of comparison) */
 	__u16 op;
 
-#define DRM_XE_UFENCE_WAIT_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
-#define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
+#define DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
+#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME	(1 << 1)
 	/** @flags: wait flags */
 	__u16 flags;
 
@@ -940,10 +940,10 @@ struct drm_xe_wait_user_fence {
 	__u64 mask;
 	/**
 	 * @timeout: how long to wait before bailing, value in nanoseconds.
-	 * Without DRM_XE_UFENCE_WAIT_ABSTIME flag set (relative timeout)
+	 * Without DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flag set (relative timeout)
 	 * it contains timeout expressed in nanoseconds to wait (fence will
 	 * expire at now() + timeout).
-	 * When DRM_XE_UFENCE_WAIT_ABSTIME flat is set (absolute timeout) wait
+	 * When DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flat is set (absolute timeout) wait
 	 * will end at timeout (uses system MONOTONIC_CLOCK).
 	 * Passing negative timeout leads to neverending wait.
 	 *
@@ -956,13 +956,13 @@ struct drm_xe_wait_user_fence {
 
 	/**
 	 * @num_engines: number of engine instances to wait on, must be zero
-	 * when DRM_XE_UFENCE_WAIT_SOFT_OP set
+	 * when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
 	 */
 	__u64 num_engines;
 
 	/**
 	 * @instances: user pointer to array of drm_xe_engine_class_instance to
-	 * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set
+	 * wait on, must be NULL when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
 	 */
 	__u64 instances;
 
-- 
GitLab


From 5ca2c4b800194b55a863882273b8ca34b56afb35 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 14 Nov 2023 13:34:29 +0000
Subject: [PATCH 2081/2340] drm/xe/uapi: Change rsvd to pad in struct
 drm_xe_class_instance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change rsvd to pad in struct drm_xe_class_instance to prevent the field
from being used in future.

v2: Change from fixup to regular commit because this touches the
    uAPI (Francois Dugast)

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 5 ++++-
 include/uapi/drm/xe_drm.h     | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 565a716302bbb..48befd9f08120 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -215,7 +215,10 @@ static int query_engines(struct xe_device *xe,
 				xe_to_user_engine_class[hwe->class];
 			hw_engine_info[i].engine_instance =
 				hwe->logical_instance;
-			hw_engine_info[i++].gt_id = gt->info.id;
+			hw_engine_info[i].gt_id = gt->info.id;
+			hw_engine_info[i].pad = 0;
+
+			i++;
 		}
 
 	if (copy_to_user(query_ptr, hw_engine_info, size)) {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index f6346a8351e49..a8d351c9fa7cd 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -141,7 +141,8 @@ struct drm_xe_engine_class_instance {
 
 	__u16 engine_instance;
 	__u16 gt_id;
-	__u16 rsvd;
+	/** @pad: MBZ */
+	__u16 pad;
 };
 
 /**
-- 
GitLab


From 45c30d80008264d55915f4b87c6f9bbb3261071c Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 14 Nov 2023 13:34:30 +0000
Subject: [PATCH 2082/2340] drm/xe/uapi: Rename *_mem_regions masks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 'native' doesn't make much sense on integrated devices.
- 'slow' is not necessarily true and doesn't go well with opposition
  to 'native'.

Instead, let's use 'near' vs 'far'. It makes sense with all the current
Intel GPUs and it is future proof. Right now, there's absolutely no need
to define among the 'far' memory, which ones are slower, either in terms
of latency, nunmber of hops or bandwidth.

In case of this might become a requirement in the future, a new query
could be added to indicate the certain 'distance' between a given engine
and a memory_region. But for now, this fulfill all of the current
requirements in the most straightforward way for the userspace drivers.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c |  8 ++++----
 include/uapi/drm/xe_drm.h     | 18 ++++++++++--------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 48befd9f08120..8b5136460ea66 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -377,12 +377,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 		gt_list->gt_list[id].gt_id = gt->info.id;
 		gt_list->gt_list[id].clock_freq = gt->info.clock_freq;
 		if (!IS_DGFX(xe))
-			gt_list->gt_list[id].native_mem_regions = 0x1;
+			gt_list->gt_list[id].near_mem_regions = 0x1;
 		else
-			gt_list->gt_list[id].native_mem_regions =
+			gt_list->gt_list[id].near_mem_regions =
 				BIT(gt_to_tile(gt)->id) << 1;
-		gt_list->gt_list[id].slow_mem_regions = xe->info.mem_region_mask ^
-			gt_list->gt_list[id].native_mem_regions;
+		gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
+			gt_list->gt_list[id].near_mem_regions;
 	}
 
 	if (copy_to_user(query_ptr, gt_list, size)) {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index a8d351c9fa7cd..30567500e6cd7 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -349,17 +349,19 @@ struct drm_xe_query_gt {
 	/** @clock_freq: A clock frequency for timestamp */
 	__u32 clock_freq;
 	/**
-	 * @native_mem_regions: Bit mask of instances from
-	 * drm_xe_query_mem_usage that lives on the same GPU/Tile and have
-	 * direct access.
+	 * @near_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_usage that are nearest to the current engines
+	 * of this GT.
 	 */
-	__u64 native_mem_regions;
+	__u64 near_mem_regions;
 	/**
-	 * @slow_mem_regions: Bit mask of instances from
-	 * drm_xe_query_mem_usage that this GT can indirectly access, although
-	 * they live on a different GPU/Tile.
+	 * @far_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_usage that are far from the engines of this GT.
+	 * In general, they have extra indirections when compared to the
+	 * @near_mem_regions. For a discrete device this could mean system
+	 * memory and memory living in a different tile.
 	 */
-	__u64 slow_mem_regions;
+	__u64 far_mem_regions;
 	/** @reserved: Reserved */
 	__u64 reserved[8];
 };
-- 
GitLab


From b02606d32376b8d51b33211f8c069b16165390eb Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 14 Nov 2023 13:34:31 +0000
Subject: [PATCH 2083/2340] drm/xe/uapi: Rename query's mem_usage to
 mem_regions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'Usage' gives an impression of telemetry information where someone
would query to see how the memory is currently used and available
size, etc. However this API is more than this. It is about a global
view of all the memory regions available in the system and user
space needs to have this information so they can then use the
mem_region masks that are returned for the engine access.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 16 ++++++++--------
 include/uapi/drm/xe_drm.h     | 14 +++++++-------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 8b5136460ea66..d495716b2c963 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -230,7 +230,7 @@ static int query_engines(struct xe_device *xe,
 	return 0;
 }
 
-static size_t calc_memory_usage_size(struct xe_device *xe)
+static size_t calc_mem_regions_size(struct xe_device *xe)
 {
 	u32 num_managers = 1;
 	int i;
@@ -239,15 +239,15 @@ static size_t calc_memory_usage_size(struct xe_device *xe)
 		if (ttm_manager_type(&xe->ttm, i))
 			num_managers++;
 
-	return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]);
+	return offsetof(struct drm_xe_query_mem_regions, regions[num_managers]);
 }
 
-static int query_memory_usage(struct xe_device *xe,
-			      struct drm_xe_device_query *query)
+static int query_mem_regions(struct xe_device *xe,
+			     struct drm_xe_device_query *query)
 {
-	size_t size = calc_memory_usage_size(xe);
-	struct drm_xe_query_mem_usage *usage;
-	struct drm_xe_query_mem_usage __user *query_ptr =
+	size_t size = calc_mem_regions_size(xe);
+	struct drm_xe_query_mem_regions *usage;
+	struct drm_xe_query_mem_regions __user *query_ptr =
 		u64_to_user_ptr(query->data);
 	struct ttm_resource_manager *man;
 	int ret, i;
@@ -499,7 +499,7 @@ static int query_gt_topology(struct xe_device *xe,
 static int (* const xe_query_funcs[])(struct xe_device *xe,
 				      struct drm_xe_device_query *query) = {
 	query_engines,
-	query_memory_usage,
+	query_mem_regions,
 	query_config,
 	query_gt_list,
 	query_hwconfig,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 30567500e6cd7..8ec12f9f41326 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -291,13 +291,13 @@ struct drm_xe_query_engine_cycles {
 };
 
 /**
- * struct drm_xe_query_mem_usage - describe memory regions and usage
+ * struct drm_xe_query_mem_regions - describe memory regions
  *
  * If a query is made with a struct drm_xe_device_query where .query
- * is equal to DRM_XE_DEVICE_QUERY_MEM_USAGE, then the reply uses
- * struct drm_xe_query_mem_usage in .data.
+ * is equal to DRM_XE_DEVICE_QUERY_MEM_REGIONS, then the reply uses
+ * struct drm_xe_query_mem_regions in .data.
  */
-struct drm_xe_query_mem_usage {
+struct drm_xe_query_mem_regions {
 	/** @num_regions: number of memory regions returned in @regions */
 	__u32 num_regions;
 	/** @pad: MBZ */
@@ -350,13 +350,13 @@ struct drm_xe_query_gt {
 	__u32 clock_freq;
 	/**
 	 * @near_mem_regions: Bit mask of instances from
-	 * drm_xe_query_mem_usage that are nearest to the current engines
+	 * drm_xe_query_mem_regions that are nearest to the current engines
 	 * of this GT.
 	 */
 	__u64 near_mem_regions;
 	/**
 	 * @far_mem_regions: Bit mask of instances from
-	 * drm_xe_query_mem_usage that are far from the engines of this GT.
+	 * drm_xe_query_mem_regions that are far from the engines of this GT.
 	 * In general, they have extra indirections when compared to the
 	 * @near_mem_regions. For a discrete device this could mean system
 	 * memory and memory living in a different tile.
@@ -470,7 +470,7 @@ struct drm_xe_device_query {
 	__u64 extensions;
 
 #define DRM_XE_DEVICE_QUERY_ENGINES		0
-#define DRM_XE_DEVICE_QUERY_MEM_USAGE		1
+#define DRM_XE_DEVICE_QUERY_MEM_REGIONS		1
 #define DRM_XE_DEVICE_QUERY_CONFIG		2
 #define DRM_XE_DEVICE_QUERY_GT_LIST		3
 #define DRM_XE_DEVICE_QUERY_HWCONFIG		4
-- 
GitLab


From 9ad743515cc59275653f719886d1b93fa7a824ab Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 14 Nov 2023 13:34:32 +0000
Subject: [PATCH 2084/2340] drm/xe/uapi: Standardize the FLAG naming and
 assignment

Only cosmetic things. No functional change on this patch.
Define every flag with (1 << n) and use singular FLAG name.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c |  2 +-
 include/uapi/drm/xe_drm.h     | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index d495716b2c963..61a7d92b7e886 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -331,7 +331,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
 		xe->info.devid | (xe->info.revid << 16);
 	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
 		config->info[DRM_XE_QUERY_CONFIG_FLAGS] =
-			DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
+			DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
 	config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
 	config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 8ec12f9f41326..236e643be69ae 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -323,7 +323,7 @@ struct drm_xe_query_config {
 
 #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
 #define DRM_XE_QUERY_CONFIG_FLAGS			1
-	#define DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM		(0x1 << 0)
+	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM	(1 << 0)
 #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT		2
 #define DRM_XE_QUERY_CONFIG_VA_BITS			3
 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
@@ -588,10 +588,10 @@ struct drm_xe_vm_create {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE	(0x1 << 0)
-#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE	(0x1 << 1)
-#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT	(0x1 << 2)
-#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(0x1 << 3)
+#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE	(1 << 0)
+#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE	(1 << 1)
+#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT	(1 << 2)
+#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 3)
 	/** @flags: Flags */
 	__u32 flags;
 
@@ -655,13 +655,13 @@ struct drm_xe_vm_bind_op {
 	/** @op: Bind operation to perform */
 	__u32 op;
 
-#define DRM_XE_VM_BIND_FLAG_READONLY	(0x1 << 0)
-#define DRM_XE_VM_BIND_FLAG_ASYNC	(0x1 << 1)
+#define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
+#define DRM_XE_VM_BIND_FLAG_ASYNC	(1 << 1)
 	/*
 	 * Valid on a faulting VM only, do the MAP operation immediately rather
 	 * than deferring the MAP to the page fault handler.
 	 */
-#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(0x1 << 2)
+#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 2)
 	/*
 	 * When the NULL flag is set, the page tables are setup with a special
 	 * bit which indicates writes are dropped and all reads return zero.  In
@@ -669,7 +669,7 @@ struct drm_xe_vm_bind_op {
 	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
 	 * intended to implement VK sparse bindings.
 	 */
-#define DRM_XE_VM_BIND_FLAG_NULL	(0x1 << 3)
+#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 3)
 	/** @flags: Bind flags */
 	__u32 flags;
 
-- 
GitLab


From 4a349c86110a6fab26ce5f4fcb545acf214efed5 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 14 Nov 2023 13:34:33 +0000
Subject: [PATCH 2085/2340] drm/xe/uapi: Differentiate WAIT_OP from WAIT_MASK

On one hand the WAIT_OP represents the operation use for waiting such
as ==, !=, > and so on. On the other hand, the mask is applied to the
value used for comparision. Split those two to bring clarity to the uapi.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 14 +++++++-------
 include/uapi/drm/xe_drm.h               | 21 +++++++++++----------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 13562db6c07f9..4d5c2555ce41b 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -25,22 +25,22 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
 		return -EFAULT;
 
 	switch (op) {
-	case DRM_XE_UFENCE_WAIT_EQ:
+	case DRM_XE_UFENCE_WAIT_OP_EQ:
 		passed = (rvalue & mask) == (value & mask);
 		break;
-	case DRM_XE_UFENCE_WAIT_NEQ:
+	case DRM_XE_UFENCE_WAIT_OP_NEQ:
 		passed = (rvalue & mask) != (value & mask);
 		break;
-	case DRM_XE_UFENCE_WAIT_GT:
+	case DRM_XE_UFENCE_WAIT_OP_GT:
 		passed = (rvalue & mask) > (value & mask);
 		break;
-	case DRM_XE_UFENCE_WAIT_GTE:
+	case DRM_XE_UFENCE_WAIT_OP_GTE:
 		passed = (rvalue & mask) >= (value & mask);
 		break;
-	case DRM_XE_UFENCE_WAIT_LT:
+	case DRM_XE_UFENCE_WAIT_OP_LT:
 		passed = (rvalue & mask) < (value & mask);
 		break;
-	case DRM_XE_UFENCE_WAIT_LTE:
+	case DRM_XE_UFENCE_WAIT_OP_LTE:
 		passed = (rvalue & mask) <= (value & mask);
 		break;
 	default:
@@ -81,7 +81,7 @@ static int check_hw_engines(struct xe_device *xe,
 
 #define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \
 			 DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
-#define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
+#define MAX_OP		DRM_XE_UFENCE_WAIT_OP_LTE
 
 static long to_jiffies_timeout(struct xe_device *xe,
 			       struct drm_xe_wait_user_fence *args)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 236e643be69ae..b2bd76efd9402 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -915,12 +915,12 @@ struct drm_xe_wait_user_fence {
 	 */
 	__u64 addr;
 
-#define DRM_XE_UFENCE_WAIT_EQ	0
-#define DRM_XE_UFENCE_WAIT_NEQ	1
-#define DRM_XE_UFENCE_WAIT_GT	2
-#define DRM_XE_UFENCE_WAIT_GTE	3
-#define DRM_XE_UFENCE_WAIT_LT	4
-#define DRM_XE_UFENCE_WAIT_LTE	5
+#define DRM_XE_UFENCE_WAIT_OP_EQ	0x0
+#define DRM_XE_UFENCE_WAIT_OP_NEQ	0x1
+#define DRM_XE_UFENCE_WAIT_OP_GT	0x2
+#define DRM_XE_UFENCE_WAIT_OP_GTE	0x3
+#define DRM_XE_UFENCE_WAIT_OP_LT	0x4
+#define DRM_XE_UFENCE_WAIT_OP_LTE	0x5
 	/** @op: wait operation (type of comparison) */
 	__u16 op;
 
@@ -935,12 +935,13 @@ struct drm_xe_wait_user_fence {
 	/** @value: compare value */
 	__u64 value;
 
-#define DRM_XE_UFENCE_WAIT_U8		0xffu
-#define DRM_XE_UFENCE_WAIT_U16		0xffffu
-#define DRM_XE_UFENCE_WAIT_U32		0xffffffffu
-#define DRM_XE_UFENCE_WAIT_U64		0xffffffffffffffffu
+#define DRM_XE_UFENCE_WAIT_MASK_U8	0xffu
+#define DRM_XE_UFENCE_WAIT_MASK_U16	0xffffu
+#define DRM_XE_UFENCE_WAIT_MASK_U32	0xffffffffu
+#define DRM_XE_UFENCE_WAIT_MASK_U64	0xffffffffffffffffu
 	/** @mask: comparison mask */
 	__u64 mask;
+
 	/**
 	 * @timeout: how long to wait before bailing, value in nanoseconds.
 	 * Without DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flag set (relative timeout)
-- 
GitLab


From aaa115ffaa467782b01cfa81711424315823bdb5 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 14 Nov 2023 13:34:34 +0000
Subject: [PATCH 2086/2340] drm/xe/uapi: Be more specific about the vm_bind
 prefetch region

Let's bring a bit of clarity on this 'region' field that is
part of vm_bind operation struct. Rename and document to make
it more than obvious that it is a region instance and not a
mask and also that it should only be used with the prefetch
operation itself.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 15 ++++++++-------
 include/uapi/drm/xe_drm.h  |  8 ++++++--
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index e8dd467895377..174441c4ca5a0 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2160,7 +2160,8 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 static struct drm_gpuva_ops *
 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			 u64 bo_offset_or_userptr, u64 addr, u64 range,
-			 u32 operation, u32 flags, u8 tile_mask, u32 region)
+			 u32 operation, u32 flags, u8 tile_mask,
+			 u32 prefetch_region)
 {
 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
 	struct drm_gpuva_ops *ops;
@@ -2215,7 +2216,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
 			op->tile_mask = tile_mask;
-			op->prefetch.region = region;
+			op->prefetch.region = prefetch_region;
 		}
 		break;
 	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
@@ -2881,7 +2882,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u32 flags = (*bind_ops)[i].flags;
 		u32 obj = (*bind_ops)[i].obj;
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
-		u32 region = (*bind_ops)[i].region;
+		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
 		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
 
 		if (i == 0) {
@@ -2915,9 +2916,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
 		    XE_IOCTL_DBG(xe, obj &&
 				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
-		    XE_IOCTL_DBG(xe, region &&
+		    XE_IOCTL_DBG(xe, prefetch_region &&
 				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
-		    XE_IOCTL_DBG(xe, !(BIT(region) &
+		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
 				       xe->info.mem_region_mask)) ||
 		    XE_IOCTL_DBG(xe, obj &&
 				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
@@ -3099,11 +3100,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u32 flags = bind_ops[i].flags;
 		u64 obj_offset = bind_ops[i].obj_offset;
 		u8 tile_mask = bind_ops[i].tile_mask;
-		u32 region = bind_ops[i].region;
+		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
 
 		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
 						  addr, range, op, flags,
-						  tile_mask, region);
+						  tile_mask, prefetch_region);
 		if (IS_ERR(ops[i])) {
 			err = PTR_ERR(ops[i]);
 			ops[i] = NULL;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b2bd76efd9402..88f3aca02b085 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -673,8 +673,12 @@ struct drm_xe_vm_bind_op {
 	/** @flags: Bind flags */
 	__u32 flags;
 
-	/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
-	__u32 region;
+	/**
+	 * @prefetch_mem_region_instance: Memory region to prefetch VMA to.
+	 * It is a region instance, not a mask.
+	 * To be used only with %DRM_XE_VM_BIND_OP_PREFETCH operation.
+	 */
+	__u32 prefetch_mem_region_instance;
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
-- 
GitLab


From 32dd40fb48c56265ab08d379fecb8bbf62e3c427 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 15 Nov 2023 10:30:30 -0800
Subject: [PATCH 2087/2340] drm/xe/dg2: Wa_18028616096 now applies to all DG2

The workaround database was just updated to extend this workaround to
DG2-G11 (whereas previously it applied only to G10 and G12).

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231115183029.2649992-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d03e6674519f1..6572715dfc093 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -403,12 +403,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     PERF_FIX_BALANCING_CFE_DISABLE))
 	},
 	{ XE_RTP_NAME("18028616096"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
-	},
-	{ XE_RTP_NAME("18028616096"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G12),
+	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
 	},
-- 
GitLab


From 4a9b7d29c117fc6e49690728f35b6a16454556f2 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 Nov 2023 16:12:39 +0100
Subject: [PATCH 2088/2340] drm/xe/guc: Fix wrong assert about full_len

This variable holds full length of the message, including header
length so it should be checked against GUC_CTB_MSG_MAX_LEN.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index a84e111bb36ad..c44e750746958 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -419,7 +419,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	full_len = len + GUC_CTB_HDR_LEN;
 
 	lockdep_assert_held(&ct->lock);
-	xe_assert(xe, full_len <= (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
+	xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN);
 	xe_assert(xe, tail <= h2g->info.size);
 
 	/* Command will wrap, zero fill (NOPs), return and check credits again */
-- 
GitLab


From cd1c9c54c34b3a2540fdf49eafd49a61747a6342 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 Nov 2023 16:12:40 +0100
Subject: [PATCH 2089/2340] drm/xe/guc: Copy response data from proper
 registers

While copying GuC response from the scratch registers to the buffer,
formula to identify next scratch register is broken. Fix it.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 311c5d539423f..aa4af536c1e98 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -703,7 +703,7 @@ proto:
 		response_buf[0] = header;
 
 		for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
-			reply_reg.addr += i * sizeof(u32);
+			reply_reg.addr += sizeof(u32);
 			response_buf[i] = xe_mmio_read32(gt, reply_reg);
 		}
 	}
-- 
GitLab


From 1d087cb7d81f9a17760154eef5ac8b894428cdbe Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 Nov 2023 16:12:41 +0100
Subject: [PATCH 2090/2340] drm/xe/guc: Fix handling of
 GUC_HXG_TYPE_NO_RESPONSE_BUSY

If GuC responds with the NO_RESPONSE_BUSY message, we extend
our timeout while waiting for the actual response, but we wrongly
assumed that the next message will be RESPONSE_SUCCESS, missing
that we still can get RESPONSE_FAILURE.

Change the condition for the expected message type, using only
common bits from RESPONSE_SUCCESS and RESPONSE_FAILURE (as they
differ, by ABI design, only by the last bit).

v2: add comment/checks to the code (Matt)

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index aa4af536c1e98..56edcb2b0e452 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -659,9 +659,20 @@ timeout:
 	header = xe_mmio_read32(gt, reply_reg);
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
 	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
-
-		ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_TYPE,
-				     FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+		/*
+		 * Once we got a BUSY reply we must wait again for the final
+		 * response but this time we can't use ORIGIN mask anymore.
+		 * To spot a right change in the reply, we take advantage that
+		 * response SUCCESS and FAILURE differ only by the single bit
+		 * and all other bits are set and can be used as a new mask.
+		 */
+		u32 resp_bits = GUC_HXG_TYPE_RESPONSE_SUCCESS & GUC_HXG_TYPE_RESPONSE_FAILURE;
+		u32 resp_mask = FIELD_PREP(GUC_HXG_MSG_0_TYPE, resp_bits);
+
+		BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
+		BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
+
+		ret = xe_mmio_wait32(gt, reply_reg,  resp_mask, resp_mask,
 				     1000000, &header, false);
 
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
-- 
GitLab


From cac74742faea603b288592be118b4f100ed2c863 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 Nov 2023 16:12:42 +0100
Subject: [PATCH 2091/2340] drm/xe/guc: Use valid scratch register for posting
 read

There are only 4 scratch registers VF_SW_FLAG(0..3) on each GuC.
We shouldn't use non-existing register VF_SW_FLAG(4) for posting
read.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 56edcb2b0e452..6de2ab05bf4e1 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -615,7 +615,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 	u32 header, reply;
 	struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
 		MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
-	const u32 LAST_INDEX = VF_SW_FLAG_COUNT;
+	const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1;
 	int ret;
 	int i;
 
-- 
GitLab


From e7b4ebd7c6b3d25557aa83b43c3497e31ac89101 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 9 Oct 2023 10:00:38 +0100
Subject: [PATCH 2092/2340] drm/xe/bo: don't hold dma-resv lock over
 drm_gem_handle_create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This seems to create a locking inversion with object_name_lock. The lock
is held by drm_prime_fd_to_handle when calling our xe_gem_prime_import
hook, which might eventually go on to grab the dma-resv lock during the
attach. However we also have the opposite locking order in
xe_gem_create_ioctl which is holding the dma-resv lock when calling
drm_gem_handle_create, which wants to eventually grab object_name_lock:

-> #1 (reservation_ww_class_mutex){+.+.}-{3:3}:
<4> [635.739288]        lock_acquire+0x169/0x3d0
<4> [635.739294]        __ww_mutex_lock.constprop.0+0x164/0x1e60
<4> [635.739300]        ww_mutex_lock_interruptible+0x42/0x1a0
<4> [635.739305]        drm_gem_shmem_pin+0x4b/0x140 [drm_shmem_helper]
<4> [635.739317]        dma_buf_dynamic_attach+0x101/0x430
<4> [635.739323]        xe_gem_prime_import+0xcc/0x2e0 [xe]
<4> [635.739499]        drm_prime_fd_to_handle_ioctl+0x184/0x2e0 [drm]
<4> [635.739594]        drm_ioctl_kernel+0x16f/0x250 [drm]
<4> [635.739693]        drm_ioctl+0x35e/0x620 [drm]
<4> [635.739789]        __x64_sys_ioctl+0xb7/0xf0
<4> [635.739794]        do_syscall_64+0x3c/0x90
<4> [635.739799]        entry_SYSCALL_64_after_hwframe+0x6e/0xd8
<4> [635.739805]
-> #0 (&dev->object_name_lock){+.+.}-{3:3}:
<4> [635.739813]        check_prev_add+0x1ba/0x14a0
<4> [635.739818]        __lock_acquire+0x203e/0x2ff0
<4> [635.739823]        lock_acquire+0x169/0x3d0
<4> [635.739827]        __mutex_lock+0x124/0x1310
<4> [635.739832]        drm_gem_handle_create+0x32/0x50 [drm]
<4> [635.739927]        xe_gem_create_ioctl+0x1d3/0x550 [xe]
<4> [635.740102]        drm_ioctl_kernel+0x16f/0x250 [drm]
<4> [635.740197]        drm_ioctl+0x35e/0x620 [drm]
<4> [635.740293]        __x64_sys_ioctl+0xb7/0xf0
<4> [635.740297]        do_syscall_64+0x3c/0x90
<4> [635.740302]        entry_SYSCALL_64_after_hwframe+0x6e/0xd8
<4> [635.740307]

It looks like it should be safe to simply drop the dma-resv lock prior
to publishing the object when calling drm_gem_handle_create.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/743
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e8c89b6e06dc1..835eab6432633 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1856,14 +1856,16 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		if (XE_IOCTL_DBG(xe, !vm))
 			return -ENOENT;
 		err = xe_vm_lock(vm, true);
-		if (err) {
-			xe_vm_put(vm);
-			return err;
-		}
+		if (err)
+			goto out_vm;
 	}
 
 	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
 			  bo_flags);
+
+	if (vm)
+		xe_vm_unlock(vm);
+
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto out_vm;
@@ -1877,15 +1879,17 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	goto out_put;
 
 out_bulk:
-	if (vm && !xe_vm_in_fault_mode(vm))
+	if (vm && !xe_vm_in_fault_mode(vm)) {
+		xe_vm_lock(vm, false);
 		__xe_bo_unset_bulk_move(bo);
+		xe_vm_unlock(vm);
+	}
 out_put:
 	xe_bo_put(bo);
 out_vm:
-	if (vm) {
-		xe_vm_unlock(vm);
+	if (vm)
 		xe_vm_put(vm);
-	}
+
 	return err;
 }
 
-- 
GitLab


From 8735f8616d65816fd80a4958e570d8f448a6590f Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 Nov 2023 09:40:47 -0800
Subject: [PATCH 2093/2340] drm/xe: Fold GEN11_MOCS_ENTRIES into
 gen12_mocs_desc

GEN11_MOCS_ENTRIES dates back from importing the table from the i915
module. The macro was used so the it could be maintained in a single
place and platforms would just override with additional entries.

With the platforms supported by xe, each of them is just defining
individual tables without re-using this define. Move it inside
gen12_mocs_desc that is the only user.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231117174049.527192-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 228 +++++++++++++++++------------------
 1 file changed, 112 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 46e9992578629..a072fbccc6936 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -149,123 +149,87 @@ struct xe_mocs_info {
  *       For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
  */
 
-#define GEN11_MOCS_ENTRIES \
-	/* Entries 0 and 1 are defined per-platform */ \
-	/* Base - L3 + LLC */ \
-	MOCS_ENTRY(2, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
-		L3_3_WB), \
-	/* Base - Uncached */ \
-	MOCS_ENTRY(3, \
-		LE_1_UC | LE_TC_1_LLC, \
-		L3_1_UC), \
-	/* Base - L3 */ \
-	MOCS_ENTRY(4, \
-		LE_1_UC | LE_TC_1_LLC, \
-		L3_3_WB), \
-	/* Base - LLC */ \
-	MOCS_ENTRY(5, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
-		L3_1_UC), \
-	/* Age 0 - LLC */ \
-	MOCS_ENTRY(6, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
-		L3_1_UC), \
-	/* Age 0 - L3 + LLC */ \
-	MOCS_ENTRY(7, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
-		L3_3_WB), \
-	/* Age: Don't Chg. - LLC */ \
-	MOCS_ENTRY(8, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
-		L3_1_UC), \
-	/* Age: Don't Chg. - L3 + LLC */ \
-	MOCS_ENTRY(9, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
-		L3_3_WB), \
-	/* No AOM - LLC */ \
-	MOCS_ENTRY(10, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
-		L3_1_UC), \
-	/* No AOM - L3 + LLC */ \
-	MOCS_ENTRY(11, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
-		L3_3_WB), \
-	/* No AOM; Age 0 - LLC */ \
-	MOCS_ENTRY(12, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
-		L3_1_UC), \
-	/* No AOM; Age 0 - L3 + LLC */ \
-	MOCS_ENTRY(13, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
-		L3_3_WB), \
-	/* No AOM; Age:DC - LLC */ \
-	MOCS_ENTRY(14, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
-		L3_1_UC), \
-	/* No AOM; Age:DC - L3 + LLC */ \
-	MOCS_ENTRY(15, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
-		L3_3_WB), \
-	/* Self-Snoop - L3 + LLC */ \
-	MOCS_ENTRY(18, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
-		L3_3_WB), \
-	/* Skip Caching - L3 + LLC(12.5%) */ \
-	MOCS_ENTRY(19, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
-		L3_3_WB), \
-	/* Skip Caching - L3 + LLC(25%) */ \
-	MOCS_ENTRY(20, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
-		L3_3_WB), \
-	/* Skip Caching - L3 + LLC(50%) */ \
-	MOCS_ENTRY(21, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
-		L3_3_WB), \
-	/* Skip Caching - L3 + LLC(75%) */ \
-	MOCS_ENTRY(22, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
-		L3_3_WB), \
-	/* Skip Caching - L3 + LLC(87.5%) */ \
-	MOCS_ENTRY(23, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
-		L3_3_WB), \
-	/* HW Reserved - SW program but never use */ \
-	MOCS_ENTRY(62, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
-		L3_1_UC), \
-	/* HW Reserved - SW program but never use */ \
-	MOCS_ENTRY(63, \
-		LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
-		L3_1_UC)
-
-static const struct xe_mocs_entry dg1_mocs_desc[] = {
-	/* UC */
-	MOCS_ENTRY(1, 0, L3_1_UC),
-	/* WB - L3 */
-	MOCS_ENTRY(5, 0, L3_3_WB),
-	/* WB - L3 50% */
-	MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
-	/* WB - L3 25% */
-	MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
-	/* WB - L3 12.5% */
-	MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
-
-	/* HDC:L1 + L3 */
-	MOCS_ENTRY(48, 0, L3_3_WB),
-	/* HDC:L1 */
-	MOCS_ENTRY(49, 0, L3_1_UC),
-
-	/* HW Reserved */
-	MOCS_ENTRY(60, 0, L3_1_UC),
-	MOCS_ENTRY(61, 0, L3_1_UC),
-	MOCS_ENTRY(62, 0, L3_1_UC),
-	MOCS_ENTRY(63, 0, L3_1_UC),
-};
-
 static const struct xe_mocs_entry gen12_mocs_desc[] = {
-	GEN11_MOCS_ENTRIES,
+	/* Base - L3 + LLC */
+	MOCS_ENTRY(2,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Base - Uncached */
+	MOCS_ENTRY(3,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* Base - L3 */
+	MOCS_ENTRY(4,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Base - LLC */
+	MOCS_ENTRY(5,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Age 0 - LLC */
+	MOCS_ENTRY(6,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1),
+		   L3_1_UC),
+	/* Age 0 - L3 + LLC */
+	MOCS_ENTRY(7,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1),
+		   L3_3_WB),
+	/* Age: Don't Chg. - LLC */
+	MOCS_ENTRY(8,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2),
+		   L3_1_UC),
+	/* Age: Don't Chg. - L3 + LLC */
+	MOCS_ENTRY(9,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2),
+		   L3_3_WB),
+	/* No AOM - LLC */
+	MOCS_ENTRY(10,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM - L3 + LLC */
+	MOCS_ENTRY(11,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1),
+		   L3_3_WB),
+	/* No AOM; Age 0 - LLC */
+	MOCS_ENTRY(12,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM; Age 0 - L3 + LLC */
+	MOCS_ENTRY(13,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1),
+		   L3_3_WB),
+	/* No AOM; Age:DC - LLC */
+	MOCS_ENTRY(14,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM; Age:DC - L3 + LLC */
+	MOCS_ENTRY(15,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1),
+		   L3_3_WB),
+	/* Self-Snoop - L3 + LLC */
+	MOCS_ENTRY(18,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(12.5%) */
+	MOCS_ENTRY(19,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(25%) */
+	MOCS_ENTRY(20,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(50%) */
+	MOCS_ENTRY(21,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(75%) */
+	MOCS_ENTRY(22,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(87.5%) */
+	MOCS_ENTRY(23,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7),
+		   L3_3_WB),
 	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
 	MOCS_ENTRY(48,
 		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
@@ -290,6 +254,38 @@ static const struct xe_mocs_entry gen12_mocs_desc[] = {
 	MOCS_ENTRY(61,
 		   LE_1_UC | LE_TC_1_LLC,
 		   L3_3_WB),
+	/* HW Reserved - SW program but never use */
+	MOCS_ENTRY(62,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Reserved - SW program but never use */
+	MOCS_ENTRY(63,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC)
+};
+
+static const struct xe_mocs_entry dg1_mocs_desc[] = {
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+	/* WB - L3 */
+	MOCS_ENTRY(5, 0, L3_3_WB),
+	/* WB - L3 50% */
+	MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+	/* WB - L3 25% */
+	MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+	/* WB - L3 12.5% */
+	MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+	/* HDC:L1 + L3 */
+	MOCS_ENTRY(48, 0, L3_3_WB),
+	/* HDC:L1 */
+	MOCS_ENTRY(49, 0, L3_1_UC),
+
+	/* HW Reserved */
+	MOCS_ENTRY(60, 0, L3_1_UC),
+	MOCS_ENTRY(61, 0, L3_1_UC),
+	MOCS_ENTRY(62, 0, L3_1_UC),
+	MOCS_ENTRY(63, 0, L3_1_UC),
 };
 
 static const struct xe_mocs_entry dg2_mocs_desc[] = {
-- 
GitLab


From 4399e95102edfceb7a7dd7eb72cd27b776e7d38b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 Nov 2023 09:40:48 -0800
Subject: [PATCH 2094/2340] drm/xe/mocs: Bring comment about mocs back to
 reality

The mocs documentation was copied from i915 and doesn't match the
reality in xe. Reword it so it matches what the code is doing.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231117174049.527192-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index a072fbccc6936..2d380053b477f 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -123,30 +123,20 @@ struct xe_mocs_info {
  *
  * These tables are intended to be kept reasonably consistent across
  * HW platforms, and for ICL+, be identical across OSes. To achieve
- * that, for Icelake and above, list of entries is published as part
- * of bspec.
+ * that, the list of entries is published as part of bspec.
  *
- * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For Gen < 12
- * they will be initialized to PTE. Gen >= 12 don't have a setting for
- * PTE and those platforms except TGL/RKL will be initialized L3 WB to
- * catch accidental use of reserved and unused mocs indexes.
+ * Entries not part of the following tables are undefined as far as userspace is
+ * concerned and shouldn't be relied upon. The last few entries are reserved by
+ * the hardware. They should be initialized according to bspec and never used.
  *
- * The last few entries are reserved by the hardware. For ICL+ they
- * should be initialized according to bspec and never used, for older
- * platforms they should never be written to.
+ * NOTE1: These tables are part of bspec and defined as part of the hardware
+ * interface. It is expected that, for specific hardware platform, existing
+ * entries will remain constant and the table will only be updated by adding new
+ * entries, filling unused positions.
  *
- * NOTE1: These tables are part of bspec and defined as part of hardware
- *       interface for ICL+. For older platforms, they are part of kernel
- *       ABI. It is expected that, for specific hardware platform, existing
- *       entries will remain constant and the table will only be updated by
- *       adding new entries, filling unused positions.
- *
- * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
- *       indices have been set to L3 WB. These reserved entries should never
- *       be used, they may be changed to low performant variants with better
- *       coherency in the future if more entries are needed.
- *       For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
+ * NOTE2: Reserved and unspecified MOCS indices have been set to L3 WB. These
+ * reserved entries should never be used. They may be changed to low performant
+ * variants with better coherency in the future if more entries are needed.
  */
 
 static const struct xe_mocs_entry gen12_mocs_desc[] = {
-- 
GitLab


From 0bc519d20ffa7a450bfa21c644c2de95ae8027dc Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 Nov 2023 09:40:49 -0800
Subject: [PATCH 2095/2340] drm/xe: Remove GEN[0-9]*_ prefixes

After noticing in logs there were still mentions to GEN6 registers, it
was clear commit d9b79ad275e7 ("drm/xe: Drop gen afixes from registers")
didn't take care of all the afixes. Some were added later, but there are
also constants and strings still using that.  Continue the cleanup
removing the remaining ones.

To keep it consistent with code nearby, a few other changes are made:

	- Remove prefix in INTEL_LEGACY_64B_CONTEXT
	- Remove GEN8_CTX_L3LLC_COHERENT since it's unused
	- Rename GEN9_FREQ_SCALER to GT_FREQUENCY_SCALER

v2: Use XELP_ as prefix for NUM_MOCS_ENTRIES and remove changes to
    MOCS_ENTRIES as this is now done as part of a previous commit
    (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231117174049.527192-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_execlist.c       | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_gt.c             |  2 +-
 drivers/gpu/drm/xe/xe_guc.c            |  2 +-
 drivers/gpu/drm/xe/xe_guc_pc.c         | 18 +++++++++---------
 drivers/gpu/drm/xe/xe_lrc.c            | 23 +++++++++++------------
 drivers/gpu/drm/xe/xe_mmio.c           | 14 +++++++-------
 drivers/gpu/drm/xe/xe_mmio.h           |  2 +-
 drivers/gpu/drm/xe/xe_mocs.c           |  8 ++++----
 drivers/gpu/drm/xe/xe_pt.c             |  2 +-
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |  2 +-
 drivers/gpu/drm/xe/xe_wopcm.c          | 15 ++++++++-------
 11 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 1541fb64949c6..d82b50de144ea 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -28,14 +28,14 @@
 
 #define XE_EXECLIST_HANG_LIMIT 1
 
-#define GEN11_SW_CTX_ID_SHIFT 37
-#define GEN11_SW_CTX_ID_WIDTH 11
+#define SW_CTX_ID_SHIFT 37
+#define SW_CTX_ID_WIDTH 11
 #define XEHP_SW_CTX_ID_SHIFT  39
 #define XEHP_SW_CTX_ID_WIDTH  16
 
-#define GEN11_SW_CTX_ID \
-	GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \
-		    GEN11_SW_CTX_ID_SHIFT)
+#define SW_CTX_ID \
+	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
+		    SW_CTX_ID_SHIFT)
 
 #define XEHP_SW_CTX_ID \
 	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
@@ -55,8 +55,8 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
 	} else {
-		xe_gt_assert(hwe->gt, FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
-		lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id);
+		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
 	}
 
 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
@@ -96,7 +96,7 @@ static void __xe_execlist_port_start(struct xe_execlist_port *port,
 				     struct xe_execlist_exec_queue *exl)
 {
 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
-	int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID);
+	int max_ctx = FIELD_MAX(SW_CTX_ID);
 
 	if (GRAPHICS_VERx100(xe) >= 1250)
 		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 53b39fe916019..0dddb751c6a48 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -515,7 +515,7 @@ static int do_gt_reset(struct xe_gt *gt)
 	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
 	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
 	if (err)
-		xe_gt_err(gt, "failed to clear GEN11_GRDOM_FULL (%pe)\n",
+		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
 			  ERR_PTR(err));
 
 	return err;
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 6de2ab05bf4e1..08142d8ee0523 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -301,7 +301,7 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
 	if (ret) {
-		drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
+		drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n",
 			gdrst);
 		goto err_out;
 	}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index f4ac76d6b2ddc..e9dd6c3d750bd 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -23,19 +23,19 @@
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
-#define GEN6_RP_STATE_CAP	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define RP_STATE_CAP		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998)
 #define   RP0_MASK		REG_GENMASK(7, 0)
 #define   RP1_MASK		REG_GENMASK(15, 8)
 #define   RPN_MASK		REG_GENMASK(23, 16)
 
-#define GEN10_FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
 #define   RPE_MASK		REG_GENMASK(15, 8)
 
 #define GT_PERF_STATUS		XE_REG(0x1381b4)
-#define   GEN12_CAGF_MASK	REG_GENMASK(19, 11)
+#define   CAGF_MASK	REG_GENMASK(19, 11)
 
 #define GT_FREQUENCY_MULTIPLIER	50
-#define GEN9_FREQ_SCALER	3
+#define GT_FREQUENCY_SCALER	3
 
 /**
  * DOC: GuC Power Conservation (PC)
@@ -244,12 +244,12 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
 static u32 decode_freq(u32 raw)
 {
 	return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER,
-				 GEN9_FREQ_SCALER);
+				 GT_FREQUENCY_SCALER);
 }
 
 static u32 encode_freq(u32 freq)
 {
-	return DIV_ROUND_CLOSEST(freq * GEN9_FREQ_SCALER,
+	return DIV_ROUND_CLOSEST(freq * GT_FREQUENCY_SCALER,
 				 GT_FREQUENCY_MULTIPLIER);
 }
 
@@ -362,7 +362,7 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc)
 	if (xe->info.platform == XE_PVC)
 		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
 	else
-		reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC);
+		reg = xe_mmio_read32(gt, FREQ_INFO_REC);
 
 	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 }
@@ -402,7 +402,7 @@ static ssize_t freq_act_show(struct device *dev,
 		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
 	} else {
 		freq = xe_mmio_read32(gt, GT_PERF_STATUS);
-		freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
+		freq = REG_FIELD_GET(CAGF_MASK, freq);
 	}
 
 	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
@@ -702,7 +702,7 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
 	if (xe->info.platform == XE_PVC)
 		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
 	else
-		reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP);
+		reg = xe_mmio_read32(gt, RP_STATE_CAP);
 	pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
 }
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index f8754f0615999..17c0eb9e62cfb 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -23,14 +23,13 @@
 #include "xe_map.h"
 #include "xe_vm.h"
 
-#define GEN8_CTX_VALID				(1 << 0)
-#define GEN8_CTX_L3LLC_COHERENT			(1 << 5)
-#define GEN8_CTX_PRIVILEGE			(1 << 8)
-#define GEN8_CTX_ADDRESSING_MODE_SHIFT		3
-#define INTEL_LEGACY_64B_CONTEXT		3
+#define CTX_VALID				(1 << 0)
+#define CTX_PRIVILEGE				(1 << 8)
+#define CTX_ADDRESSING_MODE_SHIFT		3
+#define LEGACY_64B_CONTEXT			3
 
-#define GEN11_ENGINE_CLASS_SHIFT		61
-#define GEN11_ENGINE_INSTANCE_SHIFT		48
+#define ENGINE_CLASS_SHIFT			61
+#define ENGINE_INSTANCE_SHIFT			48
 
 static struct xe_device *
 lrc_to_xe(struct xe_lrc *lrc)
@@ -765,19 +764,19 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 				     (q->usm.acc_notify << ACC_NOTIFY_S) |
 				     q->usm.acc_trigger);
 
-	lrc->desc = GEN8_CTX_VALID;
-	lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	lrc->desc = CTX_VALID;
+	lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT;
 	/* TODO: Priority */
 
 	/* While this appears to have something about privileged batches or
 	 * some such, it really just means PPGTT mode.
 	 */
 	if (vm)
-		lrc->desc |= GEN8_CTX_PRIVILEGE;
+		lrc->desc |= CTX_PRIVILEGE;
 
 	if (GRAPHICS_VERx100(xe) < 1250) {
-		lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT;
-		lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT;
+		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
+		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
 	}
 
 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 0baaef53f3a72..5b24c592dde5f 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -84,8 +84,8 @@ static void xe_resize_vram_bar(struct xe_device *xe)
 	int i;
 
 	/* gather some relevant info */
-	current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-	bar_size_mask = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
+	current_size = pci_resource_len(pdev, LMEM_BAR);
+	bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR);
 
 	if (!bar_size_mask)
 		return;
@@ -137,7 +137,7 @@ static void xe_resize_vram_bar(struct xe_device *xe)
 	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
 
-	_resize_bar(xe, GEN12_LMEM_BAR, rebar_size);
+	_resize_bar(xe, LMEM_BAR, rebar_size);
 
 	pci_assign_unassigned_bus_resources(pdev->bus);
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
@@ -161,15 +161,15 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
-	if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
+	if (!xe_pci_resource_valid(pdev, LMEM_BAR)) {
 		drm_err(&xe->drm, "pci resource is not valid\n");
 		return -ENXIO;
 	}
 
 	xe_resize_vram_bar(xe);
 
-	xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
-	xe->mem.vram.io_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+	xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
+	xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
 	if (!xe->mem.vram.io_size)
 		return -EIO;
 
@@ -216,7 +216,7 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
 
 	/* actual size */
 	if (unlikely(xe->info.platform == XE_DG1)) {
-		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), GEN12_LMEM_BAR);
+		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR);
 		*tile_offset = 0;
 	} else {
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index b244e9063caaf..afc503c97eed3 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -18,7 +18,7 @@ struct drm_device;
 struct drm_file;
 struct xe_device;
 
-#define GEN12_LMEM_BAR		2
+#define LMEM_BAR		2
 
 int xe_mmio_init(struct xe_device *xe);
 
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 2d380053b477f..cbb1619f17e0a 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -69,7 +69,7 @@ struct xe_mocs_info {
 #define L4_CACHE_POLICY_MASK		REG_GENMASK(3, 2)
 
 /* Helper defines */
-#define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define XELP_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
 #define MTL_NUM_MOCS_ENTRIES    16
 #define XE2_NUM_MOCS_ENTRIES	16
@@ -419,14 +419,14 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 			info->table = dg2_mocs_desc;
 		}
 		info->uc_index = 1;
-		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
 		info->unused_entries_index = 3;
 		break;
 	case XE_DG1:
 		info->size = ARRAY_SIZE(dg1_mocs_desc);
 		info->table = dg1_mocs_desc;
 		info->uc_index = 1;
-		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
 		info->unused_entries_index = 5;
 		break;
 	case XE_TIGERLAKE:
@@ -436,7 +436,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	case XE_ALDERLAKE_N:
 		info->size  = ARRAY_SIZE(gen12_mocs_desc);
 		info->table = gen12_mocs_desc;
-		info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
 		info->uc_index = 3;
 		info->unused_entries_index = 2;
 		break;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ab08e46445295..55c37f11ddb46 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -532,7 +532,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 		pte |= xe_walk->default_pte;
 
 		/*
-		 * Set the GEN12_PTE_PS64 hint if possible, otherwise if
+		 * Set the XE_PTE_PS64 hint if possible, otherwise if
 		 * this device *requires* 64K PTE size for VRAM, fail.
 		 */
 		if (level == 0 && !xe_parent->is_compact) {
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 837b522cb91fe..d2b00d0bf1e20 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -74,7 +74,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	stolen_size = tile_size - mgr->stolen_base;
 
 	/* Verify usage fits in the actual resource available */
-	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, GEN12_LMEM_BAR))
+	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
 		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index bf85d4fa56ccc..d3a99157e5233 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -51,9 +51,10 @@
 #define DGFX_WOPCM_SIZE			SZ_4M
 /* FIXME: Larger size require for MTL, do a proper probe sooner or later */
 #define MTL_WOPCM_SIZE			SZ_4M
-#define GEN11_WOPCM_SIZE		SZ_2M
+#define WOPCM_SIZE			SZ_2M
+
+#define MAX_WOPCM_SIZE			SZ_8M
 
-#define GEN12_MAX_WOPCM_SIZE            SZ_8M
 /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
 #define WOPCM_RESERVED_SIZE		SZ_16K
 
@@ -65,8 +66,8 @@
 /* GuC WOPCM Offset value needs to be aligned to 16KB. */
 #define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
 
-/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */
-#define GEN11_WOPCM_HW_CTX_RESERVED	(SZ_32K + SZ_4K)
+/* 36KB WOPCM reserved at the end of WOPCM */
+#define WOPCM_HW_CTX_RESERVED		(SZ_32K + SZ_4K)
 
 static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm)
 {
@@ -80,7 +81,7 @@ static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm)
 
 static u32 context_reserved_size(void)
 {
-	return GEN11_WOPCM_HW_CTX_RESERVED;
+	return WOPCM_HW_CTX_RESERVED;
 }
 
 static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
@@ -180,7 +181,7 @@ u32 xe_wopcm_size(struct xe_device *xe)
 {
 	return IS_DGFX(xe) ? DGFX_WOPCM_SIZE :
 		xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE :
-		GEN11_WOPCM_SIZE;
+		WOPCM_SIZE;
 }
 
 /**
@@ -225,7 +226,7 @@ int xe_wopcm_init(struct xe_wopcm *wopcm)
 		 * BIOS/IFWI, check against the max allowed wopcm size to
 		 * validate if the programmed values align to the wopcm layout.
 		 */
-		wopcm->size = GEN12_MAX_WOPCM_SIZE;
+		wopcm->size = MAX_WOPCM_SIZE;
 
 		goto check;
 	}
-- 
GitLab


From 40709aa761acbc78fe6c0405720d79cbf8345095 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 20 Nov 2023 12:08:48 -0800
Subject: [PATCH 2096/2340] drm/xe: Only set xe_vma_op.map fields for GPUVA map
 operations

DRM_XE_VM_BIND_OP_MAP_* IOCTL operations can result in GPUVA unmap, remap,
or map operations in vm_bind_ioctl_ops_create. The xe_vma_op.map fields
are blindly set which is incorrect for GPUVA unmap or remap operations.
Fix this by only setting xe_vma_op.map for GPUVA map operations. Also
restructure a bit vm_bind_ioctl_ops_create to make the code a bit more
readable.

Reported-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 59 ++++++++++++--------------------------
 1 file changed, 18 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 174441c4ca5a0..1b4d340d06047 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2182,42 +2182,12 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
 						  obj, bo_offset_or_userptr);
-		if (IS_ERR(ops))
-			return ops;
-
-		drm_gpuva_for_each_op(__op, ops) {
-			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
-
-			op->tile_mask = tile_mask;
-			op->map.immediate =
-				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
-			op->map.read_only =
-				flags & DRM_XE_VM_BIND_FLAG_READONLY;
-			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
-		}
 		break;
 	case DRM_XE_VM_BIND_OP_UNMAP:
 		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
-		if (IS_ERR(ops))
-			return ops;
-
-		drm_gpuva_for_each_op(__op, ops) {
-			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
-
-			op->tile_mask = tile_mask;
-		}
 		break;
 	case DRM_XE_VM_BIND_OP_PREFETCH:
 		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
-		if (IS_ERR(ops))
-			return ops;
-
-		drm_gpuva_for_each_op(__op, ops) {
-			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
-
-			op->tile_mask = tile_mask;
-			op->prefetch.region = prefetch_region;
-		}
 		break;
 	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
 		xe_assert(vm->xe, bo);
@@ -2233,19 +2203,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
 		drm_gpuvm_bo_put(vm_bo);
 		xe_bo_unlock(bo);
-		if (IS_ERR(ops))
-			return ops;
-
-		drm_gpuva_for_each_op(__op, ops) {
-			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
-
-			op->tile_mask = tile_mask;
-		}
 		break;
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 		ops = ERR_PTR(-EINVAL);
 	}
+	if (IS_ERR(ops))
+		return ops;
 
 #ifdef TEST_VM_ASYNC_OPS_ERROR
 	if (operation & FORCE_ASYNC_OP_ERROR) {
@@ -2256,9 +2220,22 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 	}
 #endif
 
-	if (!IS_ERR(ops))
-		drm_gpuva_for_each_op(__op, ops)
-			print_op(vm->xe, __op);
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		op->tile_mask = tile_mask;
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.immediate =
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
+			op->prefetch.region = prefetch_region;
+		}
+
+		print_op(vm->xe, __op);
+	}
 
 	return ops;
 }
-- 
GitLab


From baf9089c800c46f224f14e2a681ba3a7c1b09374 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Fri, 17 Nov 2023 09:29:28 -0800
Subject: [PATCH 2097/2340] drm/xe: Include RPL-U to pciidlist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RPL-U is defined as a subplatform but those PCI ids were
not included in pciidlist so Xe KMD would never probe device with
those ids.
This is following what i915 does to include RPL-U to PCI ids
probe list.

v2:
- change order to match i915

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index 11deefceffd0a..47ff7472c8bd1 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -117,6 +117,7 @@
 
 /* RPL-P */
 #define XE_RPLP_IDS(MACRO__, ...)		\
+	XE_RPLU_IDS(MACRO__, ## __VA_ARGS__),	\
 	MACRO__(0xA720, ## __VA_ARGS__),	\
 	MACRO__(0xA7A0, ## __VA_ARGS__),	\
 	MACRO__(0xA7A8, ## __VA_ARGS__)
-- 
GitLab


From 1bec833316fffa110259093671d27be137be454d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Mon, 20 Nov 2023 10:00:35 -0800
Subject: [PATCH 2098/2340] drm/xe: Add missing RPL and ADL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those are ids present in i915 but missing in Xe.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index 47ff7472c8bd1..84fb596ab8f53 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -67,6 +67,7 @@
 	MACRO__(0x4682, ## __VA_ARGS__),	\
 	MACRO__(0x4688, ## __VA_ARGS__),	\
 	MACRO__(0x468A, ## __VA_ARGS__),	\
+	MACRO__(0x468B, ## __VA_ARGS__),	\
 	MACRO__(0x4690, ## __VA_ARGS__),	\
 	MACRO__(0x4692, ## __VA_ARGS__),	\
 	MACRO__(0x4693, ## __VA_ARGS__)
@@ -113,14 +114,18 @@
 #define XE_RPLU_IDS(MACRO__, ...)		\
 	MACRO__(0xA721, ## __VA_ARGS__),	\
 	MACRO__(0xA7A1, ## __VA_ARGS__),	\
-	MACRO__(0xA7A9, ## __VA_ARGS__)
+	MACRO__(0xA7A9, ## __VA_ARGS__),	\
+	MACRO__(0xA7AC, ## __VA_ARGS__),	\
+	MACRO__(0xA7AD, ## __VA_ARGS__)
 
 /* RPL-P */
 #define XE_RPLP_IDS(MACRO__, ...)		\
 	XE_RPLU_IDS(MACRO__, ## __VA_ARGS__),	\
 	MACRO__(0xA720, ## __VA_ARGS__),	\
 	MACRO__(0xA7A0, ## __VA_ARGS__),	\
-	MACRO__(0xA7A8, ## __VA_ARGS__)
+	MACRO__(0xA7A8, ## __VA_ARGS__),	\
+	MACRO__(0xA7AA, ## __VA_ARGS__),	\
+	MACRO__(0xA7AB, ## __VA_ARGS__)
 
 /* DG2 */
 #define XE_DG2_G10_IDS(MACRO__, ...)		\
-- 
GitLab


From 1a3d4d76bacee545c620f5935a5bf4677ad88d4c Mon Sep 17 00:00:00 2001
From: Haridhar Kalvala <haridhar.kalvala@intel.com>
Date: Mon, 20 Nov 2023 12:25:07 +0530
Subject: [PATCH 2099/2340] drm/xe: ATS-M device ID update

ATS-M device ID update.

BSpec: 44477

Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231120065507.1543676-1-haridhar.kalvala@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index 84fb596ab8f53..6c3ac3b5ec94f 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -163,7 +163,8 @@
 	XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
 
 #define XE_ATS_M150_IDS(MACRO__, ...)		\
-	MACRO__(0x56C0, ## __VA_ARGS__)
+	MACRO__(0x56C0, ## __VA_ARGS__),	\
+	MACRO__(0x56C2, ## __VA_ARGS__)
 
 #define XE_ATS_M75_IDS(MACRO__, ...)		\
 	MACRO__(0x56C1, ## __VA_ARGS__)
-- 
GitLab


From 1d425066f15faa6965fa6361da4c52e4020fd8d0 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 20 Nov 2023 14:19:04 -0800
Subject: [PATCH 2100/2340] drm/xe: Fix modpost warning on kunit modules

When built with W=1, the following warnings show up on modpost:

	  MODPOST drivers/gpu/drm/xe/Module.symvers
	WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_bo_test.o
	WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_dma_buf_test.o
	WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_migrate_test.o
	WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_pci_test.o
	WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_rtp_test.o
	WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_wa_test.o

Add the module description for each of these to fix the warning.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231120221904.695630-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo_test.c      | 1 +
 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 1 +
 drivers/gpu/drm/xe/tests/xe_migrate_test.c | 1 +
 drivers/gpu/drm/xe/tests/xe_pci_test.c     | 2 +-
 drivers/gpu/drm/xe/tests/xe_rtp_test.c     | 1 +
 drivers/gpu/drm/xe/tests/xe_wa_test.c      | 1 +
 6 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
index 1c868e3635bc3..f408f17f21642 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -22,4 +22,5 @@ kunit_test_suite(xe_bo_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_bo kunit test");
 MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
index 35312bfd5fb72..9f5a9cda8c0f9 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -21,4 +21,5 @@ kunit_test_suite(xe_dma_buf_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_dma_buf kunit test");
 MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index 39179eae890b0..cf0c173b945f6 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -21,4 +21,5 @@ kunit_test_suite(xe_migrate_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_migrate kunit test");
 MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index 9c6f6c2c6c6eb..daf6525176088 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -70,5 +70,5 @@ kunit_test_suite(xe_pci_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_pci kunit test");
 MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
-
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index b2beba0019cd7..a1d204133cc16 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -313,4 +313,5 @@ kunit_test_suite(xe_rtp_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_rtp kunit test");
 MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 6e1127b276ea1..01ea974591ea9 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -138,4 +138,5 @@ kunit_test_suite(xe_rtp_test_suite);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_wa kunit test");
 MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
-- 
GitLab


From a6a4ea6d7d37cea9132e67a4d3321a455a6b0736 Mon Sep 17 00:00:00 2001
From: Ruthuvikas Ravikumar <ruthuvikas.ravikumar@intel.com>
Date: Fri, 17 Nov 2023 03:21:52 +0530
Subject: [PATCH 2101/2340] drm/xe: Add mocs kunit

This kunit verifies the hardware values of mocs and
l3cc registers with the KMD programmed values.

v14: Fix CHECK.

v13: Remove ret after forcewake.

v11: Add KUNIT_ASSERT_EQ_MSG for Forcewake.

v9/v10: Add Forcewake Fail.

v8: Remove xe_bo.h and xe_pm.h
    Remove mocs and l3cc from live_mocs.
    Pull debug and err msg for mocs/l3cc out of if else block.
    Add HAS_LNCF_MOCS.

v7: correct checkpath

v6: Change ssize_t type.
    Change forcewake domain to XE_FW_GT.
    Update change of MOCS registers are multicast on Xe_HP and beyond
    patch.

v5: Release forcewake.
    Remove single statement braces.
    Fix debug statements.

v4: Drop stratch and vaddr.
    Fix debug statements.
    Fix indentation.

v3: Fix checkpath.

v2: Fix checkpath.

Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Cc: Mathew D Roper <matthew.d.roper@intel.com>
Reviewed-by: Mathew D Roper <matthew.d.roper@intel.com>
Signed-off-by: Ruthuvikas Ravikumar <ruthuvikas.ravikumar@intel.com>
Link: https://lore.kernel.org/r/20231116215152.2248859-1-ruthuvikas.ravikumar@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/Makefile       |   1 +
 drivers/gpu/drm/xe/tests/xe_mocs.c      | 130 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_mocs_test.c |  24 +++++
 drivers/gpu/drm/xe/tests/xe_mocs_test.h |  13 +++
 drivers/gpu/drm/xe/xe_mocs.c            |   4 +
 5 files changed, 172 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_mocs.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_mocs_test.c
 create mode 100644 drivers/gpu/drm/xe/tests/xe_mocs_test.h

diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 51f1a7f017d4d..39d8a0892274a 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
 	xe_bo_test.o \
 	xe_dma_buf_test.o \
 	xe_migrate_test.o \
+	xe_mocs_test.o \
 	xe_pci_test.o \
 	xe_rtp_test.o \
 	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
new file mode 100644
index 0000000000000..7dd34f94e8094
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_mocs_test.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_pci.h"
+#include "xe_gt.h"
+#include "xe_mocs.h"
+#include "xe_device.h"
+
+struct live_mocs {
+	struct xe_mocs_info table;
+};
+
+static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
+{
+	unsigned int flags;
+	struct kunit *test = xe_cur_kunit();
+
+	memset(arg, 0, sizeof(*arg));
+
+	flags = get_mocs_settings(gt_to_xe(gt), &arg->table);
+
+	kunit_info(test, "table size %d", arg->table.size);
+	kunit_info(test, "table uc_index %d", arg->table.uc_index);
+	kunit_info(test, "table n_entries %d", arg->table.n_entries);
+
+	return flags;
+}
+
+static void read_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+	u32 reg_val;
+	u32 ret;
+
+	struct kunit *test = xe_cur_kunit();
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+	mocs_dbg(&gt_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
+			 XELP_LNCFCMOCS(i).addr, reg_val, l3cc);
+		if (reg_val != l3cc)
+			KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n",
+				   XELP_LNCFCMOCS(i).addr);
+	}
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+static void read_mocs_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+	u32 reg_val;
+	u32 ret;
+
+	struct kunit *test = xe_cur_kunit();
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+	mocs_dbg(&gt_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
+			 XELP_GLOBAL_MOCS(i).addr, reg_val, mocs);
+		if (reg_val != mocs)
+			KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n",
+				   XELP_GLOBAL_MOCS(i).addr);
+	}
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+static int mocs_kernel_test_run_device(struct xe_device *xe)
+{
+	/* Basic check the system is configured with the expected mocs table */
+
+	struct live_mocs mocs;
+	struct xe_gt *gt;
+
+	unsigned int flags;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		flags = live_mocs_init(&mocs, gt);
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+	}
+	return 0;
+}
+
+void xe_live_mocs_kernel_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(mocs_kernel_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
new file mode 100644
index 0000000000000..ef56bd517b28c
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_mocs_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_mocs_tests[] = {
+	KUNIT_CASE(xe_live_mocs_kernel_kunit),
+	{}
+};
+
+static struct kunit_suite xe_mocs_test_suite = {
+	.name = "xe_mocs",
+	.test_cases = xe_mocs_tests,
+};
+
+kunit_test_suite(xe_mocs_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
new file mode 100644
index 0000000000000..7faa3575e6c3a
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_TEST_H_
+#define _XE_MOCS_TEST_H_
+
+struct kunit;
+
+void xe_live_mocs_kernel_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index cbb1619f17e0a..12a6d39fcd4a4 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -574,3 +574,7 @@ void xe_mocs_init(struct xe_gt *gt)
 	if (flags & HAS_LNCF_MOCS)
 		init_l3cc_table(gt, &table);
 }
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_mocs.c"
+#endif
-- 
GitLab


From 5c09bd6ccd418f9dc221fd2544d613e3180b928e Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 16 Nov 2023 18:39:59 -0300
Subject: [PATCH 2102/2340] drm/xe/mmio: Move xe_mmio_wait32() to xe_mmio.c

This function is big enough, let's move it to a shared compilation unit.

While at it, document it.

Here is the output of running bloat-o-metter on the new and old module
(execution provided by Lucas):

    $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/xe/xe.ko{.old,}
    add/remove: 2/0 grow/shrink: 0/58 up/down: 554/-15645 (-15091)
    (...) # Lines in between omitted
    Total: Before=2181322, After=2166231, chg -0.69%

The overall reduction in the size is not that significant. Nevertheless,
keeping the function as inline arguably does not bring too much benefit
as well.

As noted by Lucas, we would probably benefit from an inline
function that did the fast-path check: do an optimistic first check
before entering the wait-logic, which itself would go to a compilation
unit. We might come back to implement this in the future if we have data
to justify it.

v2:
  - Add note in documentation for @timeout_us regarding the exponential
    backoff strategy. (Lucas)
  - Share output of bloat-o-meter in the commit message. (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231116214000.70573-2-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 54 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_mmio.h | 39 ++------------------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 5b24c592dde5f..6c035b293bb6f 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -489,3 +489,57 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
 	return (u64)udw << 32 | ldw;
 }
 
+/**
+ * xe_mmio_wait32() - Wait for a register to match the desired masked value
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ * @mask: mask to be applied to the value read from the register
+ * @val: desired value after applying the mask
+ * @timeout_us: time out after this period of time. Wait logic tries to be
+ * smart, applying an exponential backoff until @timeout_us is reached.
+ * @out_val: if not NULL, points where to store the last unmasked value
+ * @atomic: needs to be true if calling from an atomic context
+ *
+ * This function polls for the desired masked value and returns zero on success
+ * or -ETIMEDOUT if timed out.
+ *
+ * Note that @timeout_us represents the minimum amount of time to wait before
+ * giving up. The actual time taken by this function can be a little more than
+ * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
+ * it is possible that this function succeeds even after @timeout_us has passed.
+ */
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		   u32 *out_val, bool atomic)
+{
+	ktime_t cur = ktime_get_raw();
+	const ktime_t end = ktime_add_us(cur, timeout_us);
+	int ret = -ETIMEDOUT;
+	s64 wait = 10;
+	u32 read;
+
+	for (;;) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val) {
+			ret = 0;
+			break;
+		}
+
+		cur = ktime_get_raw();
+		if (!ktime_before(cur, end))
+			break;
+
+		if (ktime_after(ktime_add_us(cur, wait), end))
+			wait = ktime_us_delta(end, cur);
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	if (out_val)
+		*out_val = read;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index afc503c97eed3..218b796629adc 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -87,43 +87,6 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
 	return (reg_val & mask) != eval ? -EINVAL : 0;
 }
 
-static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask,
-				 u32 val, u32 timeout_us, u32 *out_val,
-				 bool atomic)
-{
-	ktime_t cur = ktime_get_raw();
-	const ktime_t end = ktime_add_us(cur, timeout_us);
-	int ret = -ETIMEDOUT;
-	s64 wait = 10;
-	u32 read;
-
-	for (;;) {
-		read = xe_mmio_read32(gt, reg);
-		if ((read & mask) == val) {
-			ret = 0;
-			break;
-		}
-
-		cur = ktime_get_raw();
-		if (!ktime_before(cur, end))
-			break;
-
-		if (ktime_after(ktime_add_us(cur, wait), end))
-			wait = ktime_us_delta(end, cur);
-
-		if (atomic)
-			udelay(wait);
-		else
-			usleep_range(wait, wait << 1);
-		wait <<= 1;
-	}
-
-	if (out_val)
-		*out_val = read;
-
-	return ret;
-}
-
 static inline bool xe_mmio_in_range(const struct xe_gt *gt,
 				    const struct xe_mmio_range *range,
 				    struct xe_reg reg)
@@ -136,5 +99,7 @@ static inline bool xe_mmio_in_range(const struct xe_gt *gt,
 
 int xe_mmio_probe_vram(struct xe_device *xe);
 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		   u32 *out_val, bool atomic);
 
 #endif
-- 
GitLab


From b3f0654f55859cfcd87d4ea5440247451902924b Mon Sep 17 00:00:00 2001
From: Gustavo Sousa <gustavo.sousa@intel.com>
Date: Thu, 16 Nov 2023 18:40:00 -0300
Subject: [PATCH 2103/2340] drm/xe/mmio: Make xe_mmio_wait32() aware of
 interrupts

With the current implementation, a preemption or other kind of interrupt
might happen between xe_mmio_read32() and ktime_get_raw(). Such an
interruption (specially in the case of preemption) might be long enough
to cause a timeout without giving a chance of a new check on the
register value on a next iteration, which would have happened otherwise.

This issue causes some sporadic timeouts in some code paths. As an
example, we were experiencing some rare timeouts when waiting for PLL
unlock for C10/C20 PHYs (see intel_cx0pll_disable()). After debugging,
we found out that the PLL unlock was happening within the expected time
period (20us), which suggested a bug in xe_mmio_wait32().

To fix the issue, ensure that we do a last check out of the loop if
necessary.

This change was tested with the aforementioned PLL unlocking code path.
Experiments showed that, before this change, we observed reported
timeouts in 54 of 5000 runs; and, after this change, no timeouts were
reported in 5000 runs.

v2:
  - Prefer an implementation without a barrier (v1 switched the order of
    xe_mmio_read32() and ktime_get_raw() calls and added a barrier() in
    between). (Lucas, Rodrigo)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231116214000.70573-3-gustavo.sousa@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 6c035b293bb6f..d0a36600e52bc 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -538,6 +538,12 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
 		wait <<= 1;
 	}
 
+	if (ret != 0) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val)
+			ret = 0;
+	}
+
 	if (out_val)
 		*out_val = read;
 
-- 
GitLab


From 11ea758c145f8340d5ffd7b3831c2bd0e98f8024 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Mon, 20 Nov 2023 16:41:25 +0530
Subject: [PATCH 2104/2340] drm/xe/xe2: Add workaround 14017421178

This workaround applies to Xe2_LPM

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 6572715dfc093..13962b8a677a7 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -273,6 +273,15 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 			 SET(XEHPC_L3CLOS_MASK(3), ~0))
 	},
 
+	/* Xe2_LPM */
+
+	{ XE_RTP_NAME("14017421178"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+
 	{}
 };
 
-- 
GitLab


From f25d8291aca1ccfb0118ec4c0e98f6301bff15ec Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 23 Nov 2023 16:12:11 +0530
Subject: [PATCH 2105/2340] drm/xe/xe2: Add workaround 16021867713

This workaround applies to Xe2_LPM as well

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 13962b8a677a7..5ab5529d9624c 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -281,6 +281,12 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
 
 	{}
 };
-- 
GitLab


From 8bfbe174d7fabf4c6d26e90a133b3129c4e98cbe Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 23 Nov 2023 16:09:00 +0530
Subject: [PATCH 2106/2340] drm/xe/xe2: Add workaround 14019449301

This workaround applies to Xe2_LPM

V3(MattR):
  - Reorder reg and wa placement
  - Add base parameter to reg macro for better definition
V2(MattR):
  - Change name of register
  - Loop for all engines
  - Driver permanent WA, applies to all steps

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 +++
 drivers/gpu/drm/xe/xe_wa.c               | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index b57dec17eb2dd..444ff9b83bb1b 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -112,6 +112,9 @@
 #define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
 #define	  EL_CTRL_LOAD				REG_BIT(0)
 
+#define VDBOX_CGCTL3F08(base)                  XE_REG((base) + 0x3f08)
+#define   CG3DDISHRS_CLKGATE_DIS               REG_BIT(5)
+
 #define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
 #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5ab5529d9624c..81ae0232146e8 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -287,6 +287,11 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
+	{ XE_RTP_NAME("14019449301"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
 
 	{}
 };
-- 
GitLab


From d6d14854ddf362633fbcf050ce19bd0d7b0d9a3a Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 15 Nov 2023 08:38:02 +0100
Subject: [PATCH 2107/2340] drm/xe: Add device flag to indicate SR-IOV support

The Single Root I/O Virtualization (SR-IOV) extension to
the PCI Express (PCIe) specification suite is supported
starting from 12th generation of Intel Graphics processors.

Add a device flag that we will use to enable SR-IOV specific
code paths and to indicate our readiness to support SR-IOV.

We will enable this flag for the specific platforms once all
required changes and additions will be ready and merged.

Bspec: 52391
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231115073804.1861-1-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h       | 5 +++++
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 3 ++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index c4232de40ae08..54694f98c91a2 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -158,6 +158,11 @@ static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
 	return xe->info.has_flat_ccs;
 }
 
+static inline bool xe_device_has_sriov(struct xe_device *xe)
+{
+	return xe->info.has_sriov;
+}
+
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index be11cadccbd44..f2ba4f746fa22 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -253,6 +253,8 @@ struct xe_device {
 		u8 has_llc:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
+		/** @has_sriov: Supports SR-IOV */
+		u8 has_sriov:1;
 		/** @enable_display: display enabled */
 		u8 enable_display:1;
 		/** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 097b685981912..0a4b83a9fc0b7 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -60,6 +60,7 @@ struct xe_device_desc {
 	u8 has_heci_gscfi:1;
 
 	u8 has_llc:1;
+	u8 has_sriov:1;
 	u8 bypass_mtcfg:1;
 	u8 supports_mmio_ext:1;
 };
@@ -531,7 +532,6 @@ static void handle_gmdid(struct xe_device *xe,
 	}
 }
 
-
 static int xe_info_init(struct xe_device *xe,
 			const struct xe_device_desc *desc,
 			const struct xe_subplatform_desc *subplatform_desc)
@@ -577,6 +577,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_llc = desc->has_llc;
+	xe->info.has_sriov = desc->has_sriov;
 	xe->info.bypass_mtcfg = desc->bypass_mtcfg;
 	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
 	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
-- 
GitLab


From 13e5c32c849ace3dd0af9049fc19ce910591db8b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 15 Nov 2023 08:38:03 +0100
Subject: [PATCH 2108/2340] drm/xe: Prepare for running in different SR-IOV
 modes

We will be adding support for the SR-IOV and driver might be then
running, in addition to existing non-virtualized bare-metal mode,
also in Physical Function (PF) or Virtual Function (VF) mode.

Since these additional modes require some changes to the driver,
define enum flag to represent different SR-IOV modes and add a
function where we will detect the actual mode in the runtime.

We start with a forced bare-metal mode as it is sufficient to
enable basic functionality and ensures no impact to existing code.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231115073804.1861-2-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  3 ++
 drivers/gpu/drm/xe/xe_device_types.h |  7 ++++
 drivers/gpu/drm/xe/xe_pci.c          |  3 ++
 drivers/gpu/drm/xe/xe_sriov.c        | 55 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_sriov.h        | 42 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_sriov_types.h  | 28 ++++++++++++++
 6 files changed, 138 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_sriov.c
 create mode 100644 drivers/gpu/drm/xe/xe_sriov.h
 create mode 100644 drivers/gpu/drm/xe/xe_sriov_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index a29b92080c850..05a90fd6c3c92 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -122,6 +122,9 @@ xe-y += xe_bb.o \
 # graphics hardware monitoring (HWMON) support
 xe-$(CONFIG_HWMON) += xe_hwmon.o
 
+# graphics virtualization (SR-IOV) support
+xe-y += xe_sriov.o
+
 # i915 Display compat #defines and #includes
 subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
 	-I$(srctree)/$(src)/display/ext \
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f2ba4f746fa22..2712905c7a91c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -18,6 +18,7 @@
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
 #include "xe_pmu.h"
+#include "xe_sriov_types.h"
 #include "xe_step_types.h"
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -299,6 +300,12 @@ struct xe_device {
 		struct ttm_resource_manager sys_mgr;
 	} mem;
 
+	/** @sriov: device level virtualization data */
+	struct {
+		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
+		enum xe_sriov_mode __mode;
+	} sriov;
+
 	/** @usm: unified memory state */
 	struct {
 		/** @asid: convert a ASID to VM */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 0a4b83a9fc0b7..3d163cb3dd8cd 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -24,6 +24,7 @@
 #include "xe_module.h"
 #include "xe_pci_types.h"
 #include "xe_pm.h"
+#include "xe_sriov.h"
 #include "xe_step.h"
 
 enum toggle_d3cold {
@@ -705,6 +706,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
+	xe_sriov_probe_early(xe, desc->has_sriov);
+
 	err = xe_info_init(xe, desc, subplatform_desc);
 	if (err)
 		goto err_pci_disable;
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
new file mode 100644
index 0000000000000..42a0e0c917a09
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_sriov.h"
+
+/**
+ * xe_sriov_mode_to_string - Convert enum value to string.
+ * @mode: the &xe_sriov_mode to convert
+ *
+ * Returns: SR-IOV mode as a user friendly string.
+ */
+const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
+{
+	switch (mode) {
+	case XE_SRIOV_MODE_NONE:
+		return "none";
+	case XE_SRIOV_MODE_PF:
+		return "SR-IOV PF";
+	case XE_SRIOV_MODE_VF:
+		return "SR-IOV VF";
+	default:
+		return "<invalid>";
+	}
+}
+
+/**
+ * xe_sriov_probe_early - Probe a SR-IOV mode.
+ * @xe: the &xe_device to probe mode on
+ * @has_sriov: flag indicating hardware support for SR-IOV
+ *
+ * This function should be called only once and as soon as possible during
+ * driver probe to detect whether we are running a SR-IOV Physical Function
+ * (PF) or a Virtual Function (VF) device.
+ *
+ * SR-IOV PF mode detection is based on PCI @dev_is_pf() function.
+ * SR-IOV VF mode detection is based on dedicated MMIO register read.
+ */
+void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
+{
+	enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE;
+
+	/* TODO: replace with proper mode detection */
+	xe_assert(xe, !has_sriov);
+
+	xe_assert(xe, !xe->sriov.__mode);
+	xe->sriov.__mode = mode;
+	xe_assert(xe, xe->sriov.__mode);
+
+	if (has_sriov)
+		drm_info(&xe->drm, "Running in %s mode\n",
+			 xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
new file mode 100644
index 0000000000000..5af73a3172b03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_H_
+#define _XE_SRIOV_H_
+
+#include "xe_assert.h"
+#include "xe_device_types.h"
+#include "xe_sriov_types.h"
+
+const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
+
+void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+
+static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
+{
+	xe_assert(xe, xe->sriov.__mode);
+	return xe->sriov.__mode;
+}
+
+static inline bool xe_device_is_sriov_pf(struct xe_device *xe)
+{
+	return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF;
+}
+
+static inline bool xe_device_is_sriov_vf(struct xe_device *xe)
+{
+	return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF;
+}
+
+#ifdef CONFIG_PCI_IOV
+#define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe)
+#else
+#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false)
+#endif
+#define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe)
+
+#define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
new file mode 100644
index 0000000000000..999a4311b98bf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_TYPES_H_
+#define _XE_SRIOV_TYPES_H_
+
+#include <linux/build_bug.h>
+
+/**
+ * enum xe_sriov_mode - SR-IOV mode
+ * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized)
+ * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode
+ * @XE_SRIOV_MODE_VF: SR-IOV Virtual Function (VF) mode
+ */
+enum xe_sriov_mode {
+	/*
+	 * Note: We don't use default enum value 0 to allow catch any too early
+	 * attempt of checking the SR-IOV mode prior to the actual mode probe.
+	 */
+	XE_SRIOV_MODE_NONE = 1,
+	XE_SRIOV_MODE_PF,
+	XE_SRIOV_MODE_VF,
+};
+static_assert(XE_SRIOV_MODE_NONE);
+
+#endif
-- 
GitLab


From 2475ac27df597679ca0426d358877d6f1483d50f Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 15 Nov 2023 08:38:04 +0100
Subject: [PATCH 2109/2340] drm/xe: Print virtualization mode during probe

We already print some basic information about the device, add
virtualization information, until we expose that elsewhere.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231115073804.1861-3-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 3d163cb3dd8cd..066a223a341a8 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -735,6 +735,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		xe_step_name(xe->info.step.display),
 		xe_step_name(xe->info.step.basedie));
 
+	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n",
+		str_yes_no(xe_device_has_sriov(xe)),
+		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+
 	err = xe_device_probe(xe);
 	if (err)
 		goto err_pci_disable;
-- 
GitLab


From 8c54ee8a8606a453a2c907989372aa6f004b7bec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Thu, 23 Nov 2023 16:31:55 +0100
Subject: [PATCH 2110/2340] drm/xe: Ensure that we don't access the placements
 array out-of-bounds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure, using xe_assert that the various try_add_<placement> functions
don't access the bo placements array out-of-bounds.

v2:
- Remove the places argument to make sure the xe_assert operates on
  the array we're actually populating. (Matthew Auld)

Suggested-by: Ohad Sharabi <osharabi@habana.ai>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Ohad Sharabi <osharabi@habana.ai> #v1
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231123153158.12779-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 39 +++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 835eab6432633..c12430cf9f289 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -121,11 +121,13 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
 	return to_xe_ttm_vram_mgr(mgr)->vram;
 }
 
-static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
+static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
-		places[*c] = (struct ttm_place) {
+		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_TT,
 		};
 		*c += 1;
@@ -170,26 +172,30 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 }
 
 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
-			 struct ttm_place *places, u32 bo_flags, u32 *c)
+			 u32 bo_flags, u32 *c)
 {
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 	if (bo->props.preferred_gt == XE_GT1) {
 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
-			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c);
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
-			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c);
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
 	} else {
 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
-			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c);
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
-			add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c);
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
 	}
 }
 
 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
-			   struct ttm_place *places, u32 bo_flags, u32 *c)
+			   u32 bo_flags, u32 *c)
 {
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
-		places[*c] = (struct ttm_place) {
+		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_STOLEN,
 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
 					     XE_BO_CREATE_GGTT_BIT) ?
@@ -202,7 +208,6 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 				       u32 bo_flags)
 {
-	struct ttm_place *places = bo->placements;
 	u32 c = 0;
 
 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
@@ -210,22 +215,22 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 	/* The order of placements should indicate preferred location */
 
 	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
-		try_add_system(bo, places, bo_flags, &c);
-		try_add_vram(xe, bo, places, bo_flags, &c);
+		try_add_system(xe, bo, bo_flags, &c);
+		try_add_vram(xe, bo, bo_flags, &c);
 	} else {
-		try_add_vram(xe, bo, places, bo_flags, &c);
-		try_add_system(bo, places, bo_flags, &c);
+		try_add_vram(xe, bo, bo_flags, &c);
+		try_add_system(xe, bo, bo_flags, &c);
 	}
-	try_add_stolen(xe, bo, places, bo_flags, &c);
+	try_add_stolen(xe, bo, bo_flags, &c);
 
 	if (!c)
 		return -EINVAL;
 
 	bo->placement = (struct ttm_placement) {
 		.num_placement = c,
-		.placement = places,
+		.placement = bo->placements,
 		.num_busy_placement = c,
-		.busy_placement = places,
+		.busy_placement = bo->placements,
 	};
 
 	return 0;
-- 
GitLab


From a21fe5ee598109793546b67a32398076ddea2660 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 22 Nov 2023 12:03:57 +0100
Subject: [PATCH 2111/2340] drm/xe/bo: Rename xe_bo_get_sg() to xe_bo_sg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using "get" typically refers to obtaining a refcount, which we don't do
here so rename to xe_bo_sg().

Suggested-by: Ohad Sharabi <osharabi@habana.ai>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Ohad Sharabi<osharabi@habana.ai>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122110359.4087-3-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +-
 drivers/gpu/drm/xe/xe_bo.c            | 4 ++--
 drivers/gpu/drm/xe/xe_bo.h            | 2 +-
 drivers/gpu/drm/xe/xe_migrate.c       | 8 ++++----
 drivers/gpu/drm/xe/xe_pt.c            | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 7aad09140d7ed..83d6a66ed3695 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -328,7 +328,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	if (xe_bo_is_vram(pt))
 		xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
 	else
-		xe_res_first_sg(xe_bo_get_sg(pt), 0, pt->size, &src_it);
+		xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
 
 	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
 		 &src_it, XE_PAGE_SIZE, pt);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index c12430cf9f289..4c827fd4e382d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -317,7 +317,7 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
 	return 0;
 }
 
-struct sg_table *xe_bo_get_sg(struct xe_bo *bo)
+struct sg_table *xe_bo_sg(struct xe_bo *bo)
 {
 	struct ttm_tt *tt = bo->ttm.ttm;
 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
@@ -1735,7 +1735,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
 		xe_assert(xe, bo->ttm.ttm);
 
-		xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
+		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
 				page_size, &cur);
 		return xe_res_dma(&cur) + offset;
 	} else {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 9d3b704a10303..f8bae873418dd 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -299,7 +299,7 @@ xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
 
 void xe_bo_put_commit(struct llist_head *deferred);
 
-struct sg_table *xe_bo_get_sg(struct xe_bo *bo);
+struct sg_table *xe_bo_sg(struct xe_bo *bo);
 
 /*
  * xe_sg_segment_size() - Provides upper limit for sg segment size.
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 53b5b36aca664..4aea748c984b4 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -682,16 +682,16 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		return ERR_PTR(-EINVAL);
 
 	if (!src_is_vram)
-		xe_res_first_sg(xe_bo_get_sg(src_bo), 0, size, &src_it);
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
 	else
 		xe_res_first(src, 0, size, &src_it);
 	if (!dst_is_vram)
-		xe_res_first_sg(xe_bo_get_sg(dst_bo), 0, size, &dst_it);
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
 	else
 		xe_res_first(dst, 0, size, &dst_it);
 
 	if (copy_system_ccs)
-		xe_res_first_sg(xe_bo_get_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
+		xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
 				PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
 				&ccs_it);
 
@@ -941,7 +941,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 	int pass = 0;
 
 	if (!clear_vram)
-		xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
 	else
 		xe_res_first(src, 0, bo->size, &src_it);
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 55c37f11ddb46..1fd461fb426e1 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -677,7 +677,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 			xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
 				     xe_vma_size(vma), &curs);
 		else
-			xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma),
+			xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma),
 					xe_vma_size(vma), &curs);
 	} else {
 		curs.size = xe_vma_size(vma);
-- 
GitLab


From e7c9e049e0ad256214d8c50454e7289174ffa33b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 22 Nov 2023 12:03:58 +0100
Subject: [PATCH 2112/2340] drm/xe/bo: Remove leftover trace_printk()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

trace_printk() is not intended for production code. Remove it.

Suggested-by: Ohad Sharabi <osharabi@habana.ai>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Ohad Sharabi <osharabi@habana.ai>
Link: https://patchwork.freedesktop.org/patch/msgid/20231122110359.4087-4-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4c827fd4e382d..e193373908123 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -788,7 +788,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	}
 
 	xe_device_mem_access_put(xe);
-	trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type);
 
 out:
 	return ret;
-- 
GitLab


From d2f51c50b941f89850c9a9561486938b71c0b9f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 24 Nov 2023 16:33:45 +0100
Subject: [PATCH 2113/2340] drm/xe/vm: Fix ASID XA usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xa_alloc_cyclic() returns 1 on successful allocation, if wrapping occurs,
but the code incorrectly treats that as an error. Fix that.
Also, xa_alloc_cyclic() requires xa_init_flags(..., XA_FLAGS_ALLOC), so
fix that, and assuming we don't want a zero ASID, instead of using
XA_FLAGS_ALLOC1, adjust the xa limits at alloc_cyclic time.

v2:
- On CONFIG_DRM_XE_DEBUG, Initialize the cyclic ASID allocation in such a
  way that the next allocated ASID will be the maximum one, and the one
  following will cause an ASID wrap, (all to have CI test high ASIDs
  and ASID wraps).
v3:
- Stricter return value checking from xa_alloc_cyclic() (Matthew Auld)

Suggested-by: Ohad Sharabi <osharabi@habana.ai>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Ohad Sharabi <osharabi@habana.ai> #v1
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231124153345.97385-5-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 15 ++++++++++++++-
 drivers/gpu/drm/xe/xe_vm.c     |  5 +++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 8be765adf702d..d60379d844d2a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -230,7 +230,20 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	init_waitqueue_head(&xe->ufence_wq);
 
 	drmm_mutex_init(&xe->drm, &xe->usm.lock);
-	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
+	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		/* Trigger a large asid and an early asid wrap. */
+		u32 asid;
+
+		BUILD_BUG_ON(XE_MAX_ASID < 2);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+				      XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		drm_WARN_ON(&xe->drm, err);
+		if (err >= 0)
+			xa_erase(&xe->usm.asid_to_vm, asid);
+	}
 
 	drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
 	INIT_LIST_HEAD(&xe->persistent_engines.list);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1b4d340d06047..7c0ae43731803 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1996,13 +1996,14 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (xe->info.has_asid) {
 		mutex_lock(&xe->usm.lock);
 		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
-				      XA_LIMIT(0, XE_MAX_ASID - 1),
+				      XA_LIMIT(1, XE_MAX_ASID - 1),
 				      &xe->usm.next_asid, GFP_KERNEL);
 		mutex_unlock(&xe->usm.lock);
-		if (err) {
+		if (err < 0) {
 			xe_vm_close_and_put(vm);
 			return err;
 		}
+		err = 0;
 		vm->usm.asid = asid;
 	}
 
-- 
GitLab


From fdb6a05383fab3952c9a56ac716e460134990a69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 27 Nov 2023 13:33:49 +0100
Subject: [PATCH 2114/2340] drm/xe: Internally change the compute_mode and
 no_dma_fence mode naming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The name "compute_mode" can be confusing since compute uses either this
mode or fault_mode to achieve the long-running semantics, and compute_mode
can, moving forward, enable fault_mode under the hood to work around
hardware limitations.

Also the name no_dma_fence_mode really refers to what we elsewhere call
long-running mode and the mode contrary to what its name suggests allows
dma-fences as in-fences.

So in an attempt to be more consistent, rename
no_dma_fence_mode -> lr_mode
compute_mode      -> preempt_fence_mode

And adjust flags so that

preempt_fence_mode sets XE_VM_FLAG_LR_MODE
fault_mode sets XE_VM_FLAG_LR_MODE | XE_VM_FLAG_FAULT_MODE

v2:
- Fix a typo in the commit message (Oak Zeng)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231127123349.23698-1-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c       | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_exec_queue.c |  8 ++++----
 drivers/gpu/drm/xe/xe_guc_submit.c |  2 +-
 drivers/gpu/drm/xe/xe_pt.c         |  6 +++---
 drivers/gpu/drm/xe/xe_sched_job.c  |  2 +-
 drivers/gpu/drm/xe/xe_sync.c       |  6 +++---
 drivers/gpu/drm/xe/xe_vm.c         | 20 ++++++++++----------
 drivers/gpu/drm/xe/xe_vm.h         | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_vm_types.h   |  2 +-
 9 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 85a8a793f527b..5ec37df33afe4 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -100,7 +100,7 @@ static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
 	LIST_HEAD(dups);
 	int err = 0;
 
-	if (xe_vm_no_dma_fences(vm))
+	if (xe_vm_in_lr_mode(vm))
 		return 0;
 
 	/*
@@ -182,7 +182,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	for (i = 0; i < args->num_syncs; i++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
 					  &syncs_user[i], true,
-					  xe_vm_no_dma_fences(vm));
+					  xe_vm_in_lr_mode(vm));
 		if (err)
 			goto err_syncs;
 	}
@@ -197,7 +197,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	}
 
 retry:
-	if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
+	if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) {
 		err = down_write_killable(&vm->lock);
 		write_locked = true;
 	} else {
@@ -279,7 +279,7 @@ retry:
 	}
 
 	/* Wait behind munmap style rebinds */
-	if (!xe_vm_no_dma_fences(vm)) {
+	if (!xe_vm_in_lr_mode(vm)) {
 		err = drm_sched_job_add_resv_dependencies(&job->drm,
 							  xe_vm_resv(vm),
 							  DMA_RESV_USAGE_KERNEL);
@@ -292,7 +292,7 @@ retry:
 	if (err)
 		goto err_put_job;
 
-	if (!xe_vm_no_dma_fences(vm)) {
+	if (!xe_vm_in_lr_mode(vm)) {
 		err = down_read_interruptible(&vm->userptr.notifier_lock);
 		if (err)
 			goto err_put_job;
@@ -307,7 +307,7 @@ retry:
 	 * the job and let the DRM scheduler / backend clean up the job.
 	 */
 	xe_sched_job_arm(job);
-	if (!xe_vm_no_dma_fences(vm)) {
+	if (!xe_vm_in_lr_mode(vm)) {
 		/* Block userptr invalidations / BO eviction */
 		dma_resv_add_fence(xe_vm_resv(vm),
 				   &job->drm.s_fence->finished,
@@ -330,14 +330,14 @@ retry:
 	xe_sched_job_push(job);
 	xe_vm_reactivate_rebind(vm);
 
-	if (!err && !xe_vm_no_dma_fences(vm)) {
+	if (!err && !xe_vm_in_lr_mode(vm)) {
 		spin_lock(&xe->ttm.lru_lock);
 		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
 		spin_unlock(&xe->ttm.lru_lock);
 	}
 
 err_repin:
-	if (!xe_vm_no_dma_fences(vm))
+	if (!xe_vm_in_lr_mode(vm))
 		up_read(&vm->userptr.notifier_lock);
 err_put_job:
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index fbb4d3cca9f68..98fc13c89a4da 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -327,7 +327,7 @@ static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, xe_vm_in_compute_mode(q->vm)))
+	if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm)))
 		return -EINVAL;
 
 	if (value)
@@ -705,14 +705,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 
 		q = xe_exec_queue_create(xe, vm, logical_mask,
 					 args->width, hwe,
-					 xe_vm_no_dma_fences(vm) ? 0 :
+					 xe_vm_in_lr_mode(vm) ? 0 :
 					 EXEC_QUEUE_FLAG_PERSISTENT);
 		up_read(&vm->lock);
 		xe_vm_put(vm);
 		if (IS_ERR(q))
 			return PTR_ERR(q);
 
-		if (xe_vm_in_compute_mode(vm)) {
+		if (xe_vm_in_preempt_fence_mode(vm)) {
 			q->compute.context = dma_fence_context_alloc(1);
 			spin_lock_init(&q->compute.lock);
 
@@ -785,7 +785,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
  */
 bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
 {
-	return q->vm && xe_vm_no_dma_fences(q->vm) &&
+	return q->vm && xe_vm_in_lr_mode(q->vm) &&
 		!(q->flags & EXEC_QUEUE_FLAG_VM);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index b13c925c56ee9..32c234d753fdd 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1211,7 +1211,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	ge->q = q;
 	init_waitqueue_head(&ge->suspend_wait);
 
-	timeout = (q->vm && xe_vm_no_dma_fences(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
+	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
 		  q->hwe->eclass->sched_props.job_timeout_ms;
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
 			    get_submit_wq(guc),
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 1fd461fb426e1..c6c9b723db5ac 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1292,8 +1292,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 	 * non-faulting LR, in particular on user-space batch buffer chaining,
 	 * it needs to be done here.
 	 */
-	if ((rebind && !xe_vm_no_dma_fences(vm) && !vm->batch_invalidate_tlb) ||
-	    (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_compute_mode(vm))) {
+	if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
+	    (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_preempt_fence_mode(vm))) {
 		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
 		if (!ifence)
 			return ERR_PTR(-ENOMEM);
@@ -1355,7 +1355,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 			xe_bo_put_commit(&deferred);
 		}
 		if (!rebind && last_munmap_rebind &&
-		    xe_vm_in_compute_mode(vm))
+		    xe_vm_in_preempt_fence_mode(vm))
 			xe_vm_queue_rebind_worker(vm);
 	} else {
 		kfree(rfence);
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 84c700aed8ac7..a9c7ae815bec5 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -92,7 +92,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 	/* Migration and kernel engines have their own locking */
 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
 		lockdep_assert_held(&q->vm->lock);
-		if (!xe_vm_no_dma_fences(q->vm))
+		if (!xe_vm_in_lr_mode(q->vm))
 			xe_vm_assert_held(q->vm);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index eafe53c2f55df..ea96ba4b41da4 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -100,7 +100,7 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 			struct xe_sync_entry *sync,
 			struct drm_xe_sync __user *sync_user,
-			bool exec, bool no_dma_fences)
+			bool exec, bool in_lr_mode)
 {
 	struct drm_xe_sync sync_in;
 	int err;
@@ -118,7 +118,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 	signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL;
 	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
 	case DRM_XE_SYNC_FLAG_SYNCOBJ:
-		if (XE_IOCTL_DBG(xe, no_dma_fences && signal))
+		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
 			return -EOPNOTSUPP;
 
 		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
@@ -136,7 +136,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		break;
 
 	case DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ:
-		if (XE_IOCTL_DBG(xe, no_dma_fences && signal))
+		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
 			return -EOPNOTSUPP;
 
 		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7c0ae43731803..c33ae4db4e02a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -340,7 +340,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	int err;
 	bool wait;
 
-	xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
 
 	down_write(&vm->lock);
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
@@ -394,7 +394,7 @@ out_unlock:
  */
 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 {
-	if (!xe_vm_in_compute_mode(vm))
+	if (!xe_vm_in_preempt_fence_mode(vm))
 		return;
 
 	down_write(&vm->lock);
@@ -596,7 +596,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
 	long wait;
 	int __maybe_unused tries = 0;
 
-	xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
 	trace_xe_vm_rebind_worker_enter(vm);
 
 	down_write(&vm->lock);
@@ -840,7 +840,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 	struct xe_vma *vma, *next;
 
 	lockdep_assert_held(&vm->lock);
-	if (xe_vm_no_dma_fences(vm) && !rebind_worker)
+	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
 		return NULL;
 
 	xe_vm_assert_held(vm);
@@ -1436,9 +1436,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		vm->batch_invalidate_tlb = true;
 	}
 
-	if (flags & XE_VM_FLAG_COMPUTE_MODE) {
+	if (flags & XE_VM_FLAG_LR_MODE) {
 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
-		vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
+		vm->flags |= XE_VM_FLAG_LR_MODE;
 		vm->batch_invalidate_tlb = false;
 	}
 
@@ -1526,7 +1526,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	xe_assert(xe, !vm->preempt.num_exec_queues);
 
 	xe_vm_close(vm);
-	if (xe_vm_in_compute_mode(vm))
+	if (xe_vm_in_preempt_fence_mode(vm))
 		flush_work(&vm->preempt.rebind_work);
 
 	down_write(&vm->lock);
@@ -1975,11 +1975,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE)
-		flags |= XE_VM_FLAG_COMPUTE_MODE;
+		flags |= XE_VM_FLAG_LR_MODE;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
 		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
-		flags |= XE_VM_FLAG_FAULT_MODE;
+		flags |= XE_VM_FLAG_LR_MODE | XE_VM_FLAG_FAULT_MODE;
 
 	vm = xe_vm_create(xe, flags);
 	if (IS_ERR(vm))
@@ -3066,7 +3066,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
 					  &syncs_user[num_syncs], false,
-					  xe_vm_no_dma_fences(vm));
+					  xe_vm_in_lr_mode(vm));
 		if (err)
 			goto free_syncs;
 	}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 45b70ba865535..12bb5d79487f2 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -149,19 +149,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
 
 void xe_vm_close_and_put(struct xe_vm *vm);
 
-static inline bool xe_vm_in_compute_mode(struct xe_vm *vm)
+static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
 {
-	return vm->flags & XE_VM_FLAG_COMPUTE_MODE;
+	return vm->flags & XE_VM_FLAG_FAULT_MODE;
 }
 
-static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
+static inline bool xe_vm_in_lr_mode(struct xe_vm *vm)
 {
-	return vm->flags & XE_VM_FLAG_FAULT_MODE;
+	return vm->flags & XE_VM_FLAG_LR_MODE;
 }
 
-static inline bool xe_vm_no_dma_fences(struct xe_vm *vm)
+static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
 {
-	return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm);
+	return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm);
 }
 
 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
@@ -181,7 +181,7 @@ extern struct ttm_device_funcs xe_ttm_funcs;
 
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
 {
-	xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
 	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
 }
 
@@ -196,7 +196,7 @@ static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
  */
 static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
 {
-	if (xe_vm_in_compute_mode(vm) && vm->preempt.rebind_deactivated) {
+	if (xe_vm_in_preempt_fence_mode(vm) && vm->preempt.rebind_deactivated) {
 		vm->preempt.rebind_deactivated = false;
 		xe_vm_queue_rebind_worker(vm);
 	}
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 97d779d8a7d38..fc2645e07578d 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -154,7 +154,7 @@ struct xe_vm {
 	 * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely
 	 */
 #define XE_VM_FLAG_64K			BIT(0)
-#define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
+#define XE_VM_FLAG_LR_MODE		BIT(1)
 #define XE_VM_FLAG_ASYNC_DEFAULT	BIT(2)
 #define XE_VM_FLAG_MIGRATION		BIT(3)
 #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
-- 
GitLab


From 812ec747a354e00f5e789f3cdcfbc80f98f1d71d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 21 Nov 2023 11:52:09 -0800
Subject: [PATCH 2115/2340] drm/xe: Sync MTL PCI IDs with i915

For Xe1 platforms, it's better to follow the way i915 adds the PCI IDs
to the header, so it's easier to catch up when there is an update. This
brings the same logic applied in commit 2e3c369f23a7 ("drm/i915/mtl:
Eliminate subplatforms") to the equivalent xe header.

The end result of this header for Xe1 platforms is now in sync with i915
as of commit 5032c607e886 ("drm/i915: ATS-M device ID update"). This can
be seen by

	$ git show 5032c607e886:include/drm/i915_pciids.h > a.h
	$ git diff --color-words  --no-index a.h include/drm/xe_pciids.h

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231121195209.802235-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/xe_pciids.h | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index 6c3ac3b5ec94f..de1a344737bc9 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -173,33 +173,14 @@
 	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
 	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
 
-/* MTL */
-#define XE_MTL_M_IDS(MACRO__, ...)		\
+/* MTL / ARL */
+#define XE_MTL_IDS(MACRO__, ...)		\
 	MACRO__(0x7D40, ## __VA_ARGS__),	\
-	MACRO__(0x7D43, ## __VA_ARGS__),	\
-	MACRO__(0x7DC0, ## __VA_ARGS__)
-
-#define XE_MTL_P_IDS(MACRO__, ...)		\
 	MACRO__(0x7D45, ## __VA_ARGS__),	\
-	MACRO__(0x7D47, ## __VA_ARGS__),	\
-	MACRO__(0x7D50, ## __VA_ARGS__),	\
 	MACRO__(0x7D55, ## __VA_ARGS__),	\
-	MACRO__(0x7DC5, ## __VA_ARGS__),	\
-	MACRO__(0x7DD0, ## __VA_ARGS__),	\
-	MACRO__(0x7DD5, ## __VA_ARGS__)
-
-#define XE_MTL_S_IDS(MACRO__, ...)		\
 	MACRO__(0x7D60, ## __VA_ARGS__),	\
-	MACRO__(0x7DE0, ## __VA_ARGS__)
-
-#define XE_ARL_IDS(MACRO__, ...)		\
-	MACRO__(0x7D67, ## __VA_ARGS__)
-
-#define XE_MTL_IDS(MACRO__, ...)		\
-	XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__),	\
-	XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__),	\
-	XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__),	\
-	XE_ARL_IDS(MACRO__, ## __VA_ARGS__)
+	MACRO__(0x7D67, ## __VA_ARGS__),	\
+	MACRO__(0x7DD5, ## __VA_ARGS__)
 
 #define XE_LNL_IDS(MACRO__, ...) \
 	MACRO__(0x6420, ## __VA_ARGS__), \
-- 
GitLab


From f91bacce8dbb5dcb395e1ab9750977fa70ad485e Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 27 Nov 2023 11:03:33 -0800
Subject: [PATCH 2116/2340] drm/xe/dg2: Drop Wa_22014600077

The workaround database has been updated to drop this workaround for all
DG2 variants.

Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231127190332.4099519-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  1 -
 drivers/gpu/drm/xe/xe_wa.c           | 21 ---------------------
 2 files changed, 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index cc27fe8fc3633..686930aba77e8 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -340,7 +340,6 @@
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
 
 #define CACHE_MODE_SS				XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
-#define   ENABLE_EU_COUNT_FOR_TDL_FLUSH		REG_BIT(10)
 #define   DISABLE_ECC				REG_BIT(5)
 #define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 81ae0232146e8..e0853ab30c009 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -522,27 +522,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
 			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
 	},
-	{ XE_RTP_NAME("22014600077"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(B0, FOREVER),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
-			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
-			     /*
-			      * Wa_14012342262 write-only reg, so skip
-			      * verification
-			      */
-			     .read_mask = 0))
-	},
-	{ XE_RTP_NAME("22014600077"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS,
-			     ENABLE_EU_COUNT_FOR_TDL_FLUSH,
-			     /*
-			      * Wa_14012342262 write-only reg, so skip
-			      * verification
-			      */
-			     .read_mask = 0))
-	},
 	{ XE_RTP_NAME("14015150844"),
 	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
-- 
GitLab


From a409901f516cf5e25180d98a510708013b33b8ee Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Mon, 20 Nov 2023 10:51:45 +0530
Subject: [PATCH 2117/2340] drm/xe/xe2: Add workaround 14020013138

This workaround applies to Xe2_LPG A0

V3:
  - Apply rule RENDER class
V2(Matt):
  - Apply WA in lrc context

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++
 drivers/gpu/drm/xe/xe_wa.c           | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 686930aba77e8..18b13224480d2 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -93,6 +93,9 @@
 #define XEHP_TILE_ADDR_RANGE(_idx)		XE_REG_MCR(0x4900 + (_idx) * 4)
 #define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
 
+#define WM_CHICKEN3				XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED)
+#define   HIZ_PLANE_COMPRESSION_DIS		REG_BIT(10)
+
 #define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index e0853ab30c009..63bd4bb1af039 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -714,6 +714,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
 	},
+	{ XE_RTP_NAME("14020013138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+	},
 
 	{}
 };
-- 
GitLab


From adce1b393f90c349820cb0cb907f94ce9b3a4485 Mon Sep 17 00:00:00 2001
From: Bommithi Sakeena <bommithi.sakeena@intel.com>
Date: Fri, 17 Nov 2023 16:06:18 +0000
Subject: [PATCH 2118/2340] drm/xe: Encapsulate all the module parameters

Encapsulate all the module parameters in one single global struct
variable. This also removes the extra xe_module.h from includes.

v2: naming consistency as suggested by Jani and Lucas
v3: fix checkpatch errors/warnings
v4: adding blank line after struct declaration

Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Bommithi Sakeena <bommithi.sakeena@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c  |  2 +-
 drivers/gpu/drm/xe/xe_display.c |  4 ++--
 drivers/gpu/drm/xe/xe_guc_log.c |  2 +-
 drivers/gpu/drm/xe/xe_mmio.c    |  2 +-
 drivers/gpu/drm/xe/xe_module.c  | 29 ++++++++++++++---------------
 drivers/gpu/drm/xe/xe_module.h  | 24 +++++++++++++++++-------
 drivers/gpu/drm/xe/xe_pci.c     |  6 +++---
 drivers/gpu/drm/xe/xe_uc_fw.c   |  6 +++---
 8 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d60379d844d2a..54202623e2553 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -223,7 +223,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 
 	xe->info.devid = pdev->device;
 	xe->info.revid = pdev->revision;
-	xe->info.force_execlist = force_execlist;
+	xe->info.force_execlist = xe_modparam.force_execlist;
 
 	spin_lock_init(&xe->irq.lock);
 
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
index da10f16e1c125..74391d9b11ae0 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -45,7 +45,7 @@ static bool has_display(struct xe_device *xe)
  */
 bool xe_display_driver_probe_defer(struct pci_dev *pdev)
 {
-	if (!enable_display)
+	if (!xe_modparam.enable_display)
 		return 0;
 
 	return intel_display_driver_probe_defer(pdev);
@@ -69,7 +69,7 @@ static void xe_display_last_close(struct drm_device *dev)
  */
 void xe_display_driver_set_hooks(struct drm_driver *driver)
 {
-	if (!enable_display)
+	if (!xe_modparam.enable_display)
 		return;
 
 	driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 45c60a9c631c3..27c3827bfd054 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -100,7 +100,7 @@ int xe_guc_log_init(struct xe_guc_log *log)
 
 	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
 	log->bo = bo;
-	log->level = xe_guc_log_level;
+	log->level = xe_modparam.guc_log_level;
 
 	err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index d0a36600e52bc..0f846272e39c7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -73,7 +73,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
  */
 static void xe_resize_vram_bar(struct xe_device *xe)
 {
-	u64 force_vram_bar_size = xe_force_vram_bar_size;
+	u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct pci_bus *root = pdev->bus;
 	resource_size_t current_size;
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 7194595e7f312..1ea883f48c63e 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -10,39 +10,38 @@
 
 #include "xe_drv.h"
 #include "xe_hw_fence.h"
-#include "xe_module.h"
 #include "xe_pci.h"
 #include "xe_pmu.h"
 #include "xe_sched_job.h"
 
-bool force_execlist = false;
-module_param_named_unsafe(force_execlist, force_execlist, bool, 0444);
+struct xe_modparam xe_modparam = {
+	.enable_display = true,
+	.guc_log_level = 5,
+	.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
+	/* the rest are 0 by default */
+};
+
+module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
 MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
 
-bool enable_display = true;
-module_param_named(enable_display, enable_display, bool, 0444);
+module_param_named(enable_display, xe_modparam.enable_display, bool, 0444);
 MODULE_PARM_DESC(enable_display, "Enable display");
 
-u32 xe_force_vram_bar_size;
-module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600);
+module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600);
 MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
 
-int xe_guc_log_level = 5;
-module_param_named(guc_log_level, xe_guc_log_level, int, 0600);
+module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600);
 MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
 
-char *xe_guc_firmware_path;
-module_param_named_unsafe(guc_firmware_path, xe_guc_firmware_path, charp, 0400);
+module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400);
 MODULE_PARM_DESC(guc_firmware_path,
 		 "GuC firmware path to use instead of the default one");
 
-char *xe_huc_firmware_path;
-module_param_named_unsafe(huc_firmware_path, xe_huc_firmware_path, charp, 0400);
+module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, charp, 0400);
 MODULE_PARM_DESC(huc_firmware_path,
 		 "HuC firmware path to use instead of the default one - empty string disables");
 
-char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE;
-module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400);
+module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
 MODULE_PARM_DESC(force_probe,
 		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
 
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index e1da1e9ca5cba..51d75ff123765 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -3,13 +3,23 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#ifndef _XE_MODULE_H_
+#define _XE_MODULE_H_
+
 #include <linux/types.h>
 
 /* Module modprobe variables */
-extern bool force_execlist;
-extern bool enable_display;
-extern u32 xe_force_vram_bar_size;
-extern int xe_guc_log_level;
-extern char *xe_guc_firmware_path;
-extern char *xe_huc_firmware_path;
-extern char *xe_param_force_probe;
+struct xe_modparam {
+	bool force_execlist;
+	bool enable_display;
+	u32 force_vram_bar_size;
+	int guc_log_level;
+	char *guc_firmware_path;
+	char *huc_firmware_path;
+	char *force_probe;
+};
+
+extern struct xe_modparam xe_modparam;
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 066a223a341a8..6d062478c1f2f 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -419,12 +419,12 @@ static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
 
 static bool id_forced(u16 device_id)
 {
-	return device_id_in_list(device_id, xe_param_force_probe, false);
+	return device_id_in_list(device_id, xe_modparam.force_probe, false);
 }
 
 static bool id_blocked(u16 device_id)
 {
-	return device_id_in_list(device_id, xe_param_force_probe, true);
+	return device_id_in_list(device_id, xe_modparam.force_probe, true);
 }
 
 static const struct xe_subplatform_desc *
@@ -593,7 +593,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
 
 	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
-				  enable_display &&
+				  xe_modparam.enable_display &&
 				  desc->has_display;
 	/*
 	 * All platforms have at least one primary GT.  Any platform with media
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 91d4a2272ee73..2a5f361e82706 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -220,11 +220,11 @@ uc_fw_override(struct xe_uc_fw *uc_fw)
 	/* empty string disables, but it's not allowed for GuC */
 	switch (uc_fw->type) {
 	case XE_UC_FW_TYPE_GUC:
-		if (xe_guc_firmware_path && *xe_guc_firmware_path)
-			path_override = xe_guc_firmware_path;
+		if (xe_modparam.guc_firmware_path && *xe_modparam.guc_firmware_path)
+			path_override = xe_modparam.guc_firmware_path;
 		break;
 	case XE_UC_FW_TYPE_HUC:
-		path_override = xe_huc_firmware_path;
+		path_override = xe_modparam.huc_firmware_path;
 		break;
 	default:
 		break;
-- 
GitLab


From 2e7227b4b733223a0a5b6a7a2685c7ff089c21c5 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:44 -0800
Subject: [PATCH 2119/2340] drm/xe/uc: Rework uC version tracking

The GSC firmware, support for which is coming soon for Xe, has both a
release version (updated on every release) and a compatibility version
(update only on interface changes). The GuC has something similar, with
a global release version and a submission version (which is also known
as the VF compatibility version). The main difference is that for the
GuC we still want to check the driver requirement against the release
version, while for the GSC we'll need to check against the compatibility
version.
Instead of special casing the GSC, this patch reworks the FW logic so
that we store both versions at the uc_fw level for all binaries and we
allow checking against either of the versions. Initially, we'll use it
to support GSC, but the logic could be re-used to allow VFs to check
against the GuC compatibility version.
Note that the GSC version has 4 numbers (major, minor, hotfix, build),
so support for that has been added as part of the rework and will be
used in follow-up patches.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_types.h   |   9 --
 drivers/gpu/drm/xe/xe_uc_fw.c       | 141 +++++++++++++++++-----------
 drivers/gpu/drm/xe/xe_uc_fw_types.h |  39 ++++++--
 3 files changed, 114 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 0fdcc05dc16a6..cd80802e89189 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -52,15 +52,6 @@ struct xe_guc {
 			/** @seqno: suspend fences seqno */
 			u32 seqno;
 		} suspend;
-		/** @version: submission version */
-		struct {
-			/** @major: major version of GuC submission */
-			u32 major;
-			/** @minor: minor version of GuC submission */
-			u32 minor;
-			/** @patch: patch version of GuC submission */
-			u32 patch;
-		} version;
 #ifdef CONFIG_PROVE_LOCKING
 #define NUM_SUBMIT_WQ	256
 		/** @submit_wq_pool: submission ordered workqueues pool */
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2a5f361e82706..376fbc10c5ea4 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -204,9 +204,12 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	for (i = 0; i < count && p <= entries[i].platform; i++) {
 		if (p == entries[i].platform) {
 			uc_fw->path = entries[i].path;
-			uc_fw->major_ver_wanted = entries[i].major;
-			uc_fw->minor_ver_wanted = entries[i].minor;
+			uc_fw->versions.wanted.major = entries[i].major;
+			uc_fw->versions.wanted.minor = entries[i].minor;
 			uc_fw->full_ver_required = entries[i].full_ver_required;
+
+			/* compatibility version checking coming soon */
+			uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
 			break;
 		}
 	}
@@ -273,32 +276,30 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
 static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 {
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-	struct xe_guc *guc = &gt->uc.guc;
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
 
 	xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
-	xe_gt_assert(gt, uc_fw->major_ver_found >= 70);
+	xe_gt_assert(gt, release->major >= 70);
 
-	if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) {
+	if (release->major > 70 || release->minor >= 6) {
 		/* v70.6.0 adds CSS header support */
-		guc->submission_state.version.major =
-			FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
-				  css->submission_version);
-		guc->submission_state.version.minor =
-			FIELD_GET(CSS_SW_VERSION_UC_MINOR,
-				  css->submission_version);
-		guc->submission_state.version.patch =
-			FIELD_GET(CSS_SW_VERSION_UC_PATCH,
-				  css->submission_version);
-	} else if (uc_fw->minor_ver_found >= 3) {
+		compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+						 css->submission_version);
+		compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+						 css->submission_version);
+		compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
+						 css->submission_version);
+	} else if (release->minor >= 3) {
 		/* v70.3.0 introduced v1.1.0 */
-		guc->submission_state.version.major = 1;
-		guc->submission_state.version.minor = 1;
-		guc->submission_state.version.patch = 0;
+		compatibility->major = 1;
+		compatibility->minor = 1;
+		compatibility->patch = 0;
 	} else {
 		/* v70.0.0 introduced v1.0.0 */
-		guc->submission_state.version.major = 1;
-		guc->submission_state.version.minor = 0;
-		guc->submission_state.version.patch = 0;
+		compatibility->major = 1;
+		compatibility->minor = 0;
+		compatibility->patch = 0;
 	}
 
 	uc_fw->private_data_size = css->private_data_size;
@@ -307,30 +308,31 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
+	struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type];
 
 	/* Driver has no requirement on any version, any is good. */
-	if (!uc_fw->major_ver_wanted)
+	if (!wanted->major)
 		return 0;
 
 	/*
 	 * If full version is required, both major and minor should match.
 	 * Otherwise, at least the major version.
 	 */
-	if (uc_fw->major_ver_wanted != uc_fw->major_ver_found ||
-	    (uc_fw->full_ver_required &&
-	     uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) {
+	if (wanted->major != found->major ||
+	    (uc_fw->full_ver_required && wanted->minor != found->minor)) {
 		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
 			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
-			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+			   found->major, found->minor,
+			   wanted->major, wanted->minor);
 		goto fail;
 	}
 
-	if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) {
+	if (wanted->minor > found->minor) {
 		drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
 			   xe_uc_fw_type_repr(uc_fw->type),
-			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
-			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			   wanted->major, wanted->minor,
+			   found->major, found->minor,
 			   uc_fw->path);
 		drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
 			 XE_UC_FIRMWARE_URL);
@@ -349,6 +351,7 @@ fail:
 static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
 	struct uc_css_header *css;
 	size_t size;
 
@@ -390,12 +393,9 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
 	}
 
 	/* Get version numbers from the CSS header */
-	uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
-					   css->sw_version);
-	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
-					   css->sw_version);
-	uc_fw->patch_ver_found = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
-					   css->sw_version);
+	release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
+	release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
+	release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
 
 	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
 		guc_read_css_info(uc_fw, css);
@@ -431,6 +431,7 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
 	struct xe_device *xe = gt_to_xe(gt);
 	const struct gsc_cpd_header_v2 *header = data;
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
 	const struct gsc_manifest_header *manifest;
 	size_t min_size = sizeof(*header);
 	u32 offset;
@@ -468,9 +469,9 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz
 
 	manifest = data + offset;
 
-	uc_fw->major_ver_found = manifest->fw_version.major;
-	uc_fw->minor_ver_found = manifest->fw_version.minor;
-	uc_fw->patch_ver_found = manifest->fw_version.hotfix;
+	release->major = manifest->fw_version.major;
+	release->minor = manifest->fw_version.minor;
+	release->patch = manifest->fw_version.hotfix;
 
 	/* then optionally look for the css header */
 	if (css_entry) {
@@ -524,12 +525,25 @@ static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
 	return 0;
 }
 
+#define print_uc_fw_version(p_, version_, prefix_, ...) \
+do { \
+	struct xe_uc_fw_version *ver_ = (version_); \
+	if (ver_->build) \
+		drm_printf(p_, prefix_ " version %u.%u.%u.%u\n", ##__VA_ARGS__, \
+			   ver_->major, ver_->minor, \
+			   ver_->patch, ver_->build); \
+	else \
+		drm_printf(p_, prefix_ " version %u.%u.%u\n", ##__VA_ARGS__, \
+			  ver_->major, ver_->minor, ver_->patch); \
+} while (0)
+
 int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
 	struct xe_tile *tile = gt_to_tile(gt);
 	struct device *dev = xe->drm.dev;
+	struct drm_printer p = drm_info_printer(dev);
 	const struct firmware *fw = NULL;
 	struct xe_bo *obj;
 	int err;
@@ -567,9 +581,10 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	if (err)
 		goto fail;
 
-	drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u\n",
-		 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-		 uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found);
+	print_uc_fw_version(&p,
+			    &uc_fw->versions.found[XE_UC_FW_VER_RELEASE],
+			    "Using %s firmware from %s",
+			    xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
 
 	err = uc_fw_check_version_requirements(uc_fw);
 	if (err)
@@ -686,26 +701,40 @@ fail:
 	return err;
 }
 
+static const char *version_type_repr(enum xe_uc_fw_version_types type)
+{
+	switch (type) {
+	case XE_UC_FW_VER_RELEASE:
+		return "release";
+	case XE_UC_FW_VER_COMPATIBILITY:
+		return "compatibility";
+	default:
+		return "Unknown version type";
+	}
+}
 
 void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
 {
+	int i;
+
 	drm_printf(p, "%s firmware: %s\n",
 		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
 	drm_printf(p, "\tstatus: %s\n",
 		   xe_uc_fw_status_repr(uc_fw->status));
-	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u.%u\n",
-		   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
-		   uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found);
-	drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
-	drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
-
-	if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
-		struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-		struct xe_guc *guc = &gt->uc.guc;
-
-		drm_printf(p, "\tSubmit version: %u.%u.%u\n",
-			   guc->submission_state.version.major,
-			   guc->submission_state.version.minor,
-			   guc->submission_state.version.patch);
+
+	print_uc_fw_version(p, &uc_fw->versions.wanted, "\twanted %s",
+			    version_type_repr(uc_fw->versions.wanted_type));
+
+	for (i = 0; i < XE_UC_FW_VER_TYPE_COUNT; i++) {
+		struct xe_uc_fw_version *ver = &uc_fw->versions.found[i];
+
+		if (ver->major)
+			print_uc_fw_version(p, ver, "\tfound %s",
+					    version_type_repr(i));
 	}
+
+	if (uc_fw->ucode_size)
+		drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+	if (uc_fw->rsa_size)
+		drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
 }
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 1650599303c85..46c801d8e954f 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -59,6 +59,26 @@ enum xe_uc_fw_type {
 };
 #define XE_UC_FW_NUM_TYPES 2
 
+/**
+ * struct xe_uc_fw_version - Version for XE micro controller firmware
+ */
+struct xe_uc_fw_version {
+	/** @major: major version of the FW */
+	u16 major;
+	/** @minor: minor version of the FW */
+	u16 minor;
+	/** @patch: patch version of the FW */
+	u16 patch;
+	/** @build: build version of the FW (not always available) */
+	u16 build;
+};
+
+enum xe_uc_fw_version_types {
+	XE_UC_FW_VER_RELEASE,
+	XE_UC_FW_VER_COMPATIBILITY,
+	XE_UC_FW_VER_TYPE_COUNT
+};
+
 /**
  * struct xe_uc_fw - XE micro controller firmware
  */
@@ -98,16 +118,15 @@ struct xe_uc_fw {
 	 * version required per platform.
 	 */
 
-	/** @major_ver_wanted: major firmware version wanted by platform */
-	u16 major_ver_wanted;
-	/** @minor_ver_wanted: minor firmware version wanted by platform */
-	u16 minor_ver_wanted;
-	/** @major_ver_found: major version found in firmware blob */
-	u16 major_ver_found;
-	/** @minor_ver_found: major version found in firmware blob */
-	u16 minor_ver_found;
-	/** @patch_ver_found: patch version found in firmware blob */
-	u16 patch_ver_found;
+	/** @versions: FW versions wanted and found */
+	struct {
+		/** @wanted: firmware version wanted by platform */
+		struct xe_uc_fw_version wanted;
+		/** @wanted_type: type of firmware version wanted (release vs compatibility) */
+		enum xe_uc_fw_version_types wanted_type;
+		/** @found: fw versions found in firmware blob */
+		struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
+	} versions;
 
 	/** @rsa_size: RSA size */
 	u32 rsa_size;
-- 
GitLab


From 0d1caff4a367e0cbc28622fab7e39576bac82bb9 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:45 -0800
Subject: [PATCH 2120/2340] drm/xe/gsc: Introduce GSC FW

Add the basic definitions and init function. Same as HuC, GSC is only
supported on the media GT on MTL and newer platforms.
Note that the GSC requires submission resources which can't be allocated
during init (because we don't have the hwconfig yet), so it can't be
marked as loadable at the end of the init function. The allocation of
those resources will come in the patch that makes use of them to load
the FW.

v2: better comment, move num FWs define inside the enum (John)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile         |  1 +
 drivers/gpu/drm/xe/xe_gsc.c         | 52 +++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gsc.h         | 13 ++++++++
 drivers/gpu/drm/xe/xe_gsc_types.h   | 19 +++++++++++
 drivers/gpu/drm/xe/xe_uc.c          |  9 +++--
 drivers/gpu/drm/xe/xe_uc_fw.c       | 23 ++++++++++---
 drivers/gpu/drm/xe/xe_uc_fw.h       |  5 ++-
 drivers/gpu/drm/xe/xe_uc_fw_types.h |  5 +--
 drivers/gpu/drm/xe/xe_uc_types.h    |  3 ++
 9 files changed, 121 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gsc.c
 create mode 100644 drivers/gpu/drm/xe/xe_gsc.h
 create mode 100644 drivers/gpu/drm/xe/xe_gsc_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 05a90fd6c3c92..184e2724ce7ba 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -58,6 +58,7 @@ xe-y += xe_bb.o \
 	xe_force_wake.o \
 	xe_ggtt.o \
 	xe_gpu_scheduler.o \
+	xe_gsc.o \
 	xe_gt.o \
 	xe_gt_clock.o \
 	xe_gt_debugfs.o \
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
new file mode 100644
index 0000000000000..216f36cee0f71
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc.h"
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+int xe_gsc_init(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int ret;
+
+	gsc->fw.type = XE_UC_FW_TYPE_GSC;
+
+	/* The GSC uC is only available on the media GT */
+	if (tile->media_gt && (gt != tile->media_gt)) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
+	/*
+	 * Some platforms can have GuC but not GSC. That would cause
+	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
+	 * all firmware loading. So check for GSC being enabled before
+	 * propagating the failure back up. That way the higher level will keep
+	 * going and load GuC as appropriate.
+	 */
+	ret = xe_uc_fw_init(&gsc->fw);
+	if (!xe_uc_fw_is_enabled(&gsc->fw))
+		return 0;
+	else if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	xe_gt_err(gt, "GSC init failed with %d", ret);
+	return ret;
+}
+
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
new file mode 100644
index 0000000000000..baa7f21f42049
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_H_
+#define _XE_GSC_H_
+
+#include "xe_gsc_types.h"
+
+int xe_gsc_init(struct xe_gsc *gsc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
new file mode 100644
index 0000000000000..135f156e37367
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_TYPES_H_
+#define _XE_GSC_TYPES_H_
+
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_gsc - GSC
+ */
+struct xe_gsc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 784f53c5f282f..b67154c78dff5 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -6,6 +6,7 @@
 #include "xe_uc.h"
 
 #include "xe_device.h"
+#include "xe_gsc.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_guc_pc.h"
@@ -32,8 +33,8 @@ int xe_uc_init(struct xe_uc *uc)
 	int ret;
 
 	/*
-	 * We call the GuC/HuC init functions even if GuC submission is off to
-	 * correctly move our tracking of the FW state to "disabled".
+	 * We call the GuC/HuC/GSC init functions even if GuC submission is off
+	 * to correctly move our tracking of the FW state to "disabled".
 	 */
 
 	ret = xe_guc_init(&uc->guc);
@@ -44,6 +45,10 @@ int xe_uc_init(struct xe_uc *uc)
 	if (ret)
 		goto err;
 
+	ret = xe_gsc_init(&uc->gsc);
+	if (ret)
+		goto err;
+
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 376fbc10c5ea4..5eaf6ce0d0259 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -158,11 +158,18 @@ XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
 static struct xe_gt *
 __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
 {
-	if (type == XE_UC_FW_TYPE_GUC)
-		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+	XE_WARN_ON(type >= XE_UC_FW_NUM_TYPES);
 
-	XE_WARN_ON(type != XE_UC_FW_TYPE_HUC);
-	return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+	case XE_UC_FW_TYPE_HUC:
+		return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+	case XE_UC_FW_TYPE_GSC:
+		return container_of(uc_fw, struct xe_gt, uc.gsc.fw);
+	default:
+		return NULL;
+	}
 }
 
 static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw)
@@ -197,6 +204,14 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 	u32 count;
 	int i;
 
+	/*
+	 * GSC FW support is still not fully in place, so we're not defining
+	 * the FW blob yet because we don't want the driver to attempt to load
+	 * it until we're ready for it.
+	 */
+	if (uc_fw->type == XE_UC_FW_TYPE_GSC)
+		return;
+
 	xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
 	entries = blobs_all[uc_fw->type].entries;
 	count = blobs_all[uc_fw->type].count;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index 1d1a0c156cdfd..7feafe1695f91 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -96,8 +96,11 @@ static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
 		return "GuC";
 	case XE_UC_FW_TYPE_HUC:
 		return "HuC";
+	case XE_UC_FW_TYPE_GSC:
+		return "GSC";
+	default:
+		return "uC";
 	}
-	return "uC";
 }
 
 static inline enum xe_uc_fw_status
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 46c801d8e954f..fc1de0cc93247 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -55,9 +55,10 @@ enum xe_uc_fw_status {
 
 enum xe_uc_fw_type {
 	XE_UC_FW_TYPE_GUC = 0,
-	XE_UC_FW_TYPE_HUC
+	XE_UC_FW_TYPE_HUC,
+	XE_UC_FW_TYPE_GSC,
+	XE_UC_FW_NUM_TYPES
 };
-#define XE_UC_FW_NUM_TYPES 2
 
 /**
  * struct xe_uc_fw_version - Version for XE micro controller firmware
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
index 49bef6498b85e..9924e44848663 100644
--- a/drivers/gpu/drm/xe/xe_uc_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -6,6 +6,7 @@
 #ifndef _XE_UC_TYPES_H_
 #define _XE_UC_TYPES_H_
 
+#include "xe_gsc_types.h"
 #include "xe_guc_types.h"
 #include "xe_huc_types.h"
 #include "xe_wopcm_types.h"
@@ -18,6 +19,8 @@ struct xe_uc {
 	struct xe_guc guc;
 	/** @huc: HuC */
 	struct xe_huc huc;
+	/** @gsc: Graphics Security Controller */
+	struct xe_gsc gsc;
 	/** @wopcm: WOPCM */
 	struct xe_wopcm wopcm;
 };
-- 
GitLab


From 985d5a49e8454d64a01ab362e9091788eeed1839 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:46 -0800
Subject: [PATCH 2121/2340] drm/xe/gsc: Parse GSC FW header

The GSC blob starts with a layout header, from which we can move to the
boot directory, which in turns allows us to find the CPD. The CPD uses
the same format as the one in the HuC binary, so we can re-use the same
parsing code to get to the manifest, which contains the release and
security versions of the FW.

v2: Fix comments in struct definition (John)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gsc_types.h |   3 +
 drivers/gpu/drm/xe/xe_uc_fw.c     |  77 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_uc_fw_abi.h | 113 ++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 135f156e37367..1bc50583fe586 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -14,6 +14,9 @@
 struct xe_gsc {
 	/** @fw: Generic uC firmware management */
 	struct xe_uc_fw fw;
+
+	/** @security_version: SVN found in the fetched blob */
+	u32 security_version;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5eaf6ce0d0259..2d0fb7058d663 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -12,6 +12,7 @@
 #include "xe_bo.h"
 #include "xe_device_types.h"
 #include "xe_force_wake.h"
+#include "xe_gsc.h"
 #include "xe_gt.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
@@ -488,6 +489,13 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz
 	release->minor = manifest->fw_version.minor;
 	release->patch = manifest->fw_version.hotfix;
 
+	if (uc_fw->type == XE_UC_FW_TYPE_GSC) {
+		struct xe_gsc *gsc = container_of(uc_fw, struct xe_gsc, fw);
+
+		release->build = manifest->fw_version.build;
+		gsc->security_version = manifest->security_version;
+	}
+
 	/* then optionally look for the css header */
 	if (css_entry) {
 		int ret;
@@ -517,6 +525,73 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz
 	return 0;
 }
 
+static int parse_gsc_layout(struct xe_uc_fw *uc_fw, const void *data, size_t size)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	const struct gsc_layout_pointers *layout = data;
+	const struct gsc_bpdt_header *bpdt_header = NULL;
+	const struct gsc_bpdt_entry *bpdt_entry = NULL;
+	size_t min_size = sizeof(*layout);
+	int i;
+
+	if (size < min_size) {
+		xe_gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	min_size = layout->boot1.offset + layout->boot1.size;
+	if (size < min_size) {
+		xe_gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n",
+			  size, min_size);
+		return -ENODATA;
+	}
+
+	min_size = sizeof(*bpdt_header);
+	if (layout->boot1.size < min_size) {
+		xe_gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n",
+			  layout->boot1.size, min_size);
+		return -ENODATA;
+	}
+
+	bpdt_header = data + layout->boot1.offset;
+	if (bpdt_header->signature != GSC_BPDT_HEADER_SIGNATURE) {
+		xe_gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n",
+			  bpdt_header->signature);
+		return -EINVAL;
+	}
+
+	min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count;
+	if (layout->boot1.size < min_size) {
+		xe_gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n",
+			  layout->boot1.size, min_size);
+		return -ENODATA;
+	}
+
+	bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header);
+	for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) {
+		if ((bpdt_entry->type & GSC_BPDT_ENTRY_TYPE_MASK) !=
+		    GSC_BPDT_ENTRY_TYPE_GSC_RBE)
+			continue;
+
+		min_size = bpdt_entry->sub_partition_offset;
+
+		/* the CPD header parser will check that the CPD header fits */
+		if (layout->boot1.size < min_size) {
+			xe_gt_err(gt, "GSC FW boot section too small for CPD offset: %u < %zu\n",
+				  layout->boot1.size, min_size);
+			return -ENODATA;
+		}
+
+		return parse_cpd_header(uc_fw,
+					(void *)bpdt_header + min_size,
+					layout->boot1.size - min_size,
+					"RBEP.man", NULL);
+	}
+
+	xe_gt_err(gt, "couldn't find CPD header in GSC binary!\n");
+	return -ENODATA;
+}
+
 static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
 {
 	int ret;
@@ -526,6 +601,8 @@ static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
 	 * releases use GSC CPD headers.
 	 */
 	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_GSC:
+		return parse_gsc_layout(uc_fw, fw->data, fw->size);
 	case XE_UC_FW_TYPE_HUC:
 		ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw");
 		if (!ret || ret != -ENOENT)
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index d6725c9632517..87ade41209d0d 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -140,6 +140,58 @@ static_assert(sizeof(struct uc_css_header) == 128);
  *	|      RSA Key (MTL+ only)                       |
  *	|      ...                                       |
  *	+================================================+
+ *
+ * The GSC binary starts instead with a layout header, which contains the
+ * locations of the various partitions of the binary. The one we're interested
+ * in is the boot1 partition, where we can find a BPDT header followed by
+ * entries, one of which points to the RBE sub-section of the partition, which
+ * contains the CPD. The GSC blob does not contain a CSS-based binary, so we
+ * only need to look for the manifest, which is under the "RBEP.man" CPD entry.
+ * Note that we have no need to find where the actual FW code is inside the
+ * image because the GSC ROM will itself parse the headers to find it and load
+ * it.
+ * The GSC firmware header layout looks like this::
+ *
+ *	+================================================+
+ *	|  Layout Pointers                               |
+ *	|      ...                                       |
+ *	|      Boot1 offset  >---------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	|  BPDT header                                   |<-----o
+ *	+================================================+
+ *	|  BPDT entries[]                                |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          type == GSC_RBE                       |
+ *	|          offset  >-----------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	|  CPD Header                                    |<-----o
+ *	+================================================+
+ *	|  CPD entries[]                                 |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          "RBEP.man"                            |
+ *	|           ...                                  |
+ *	|           offset  >----------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	| Manifest Header                                |<-----o
+ *	|  ...                                           |
+ *	|  FW version                                    |
+ *	|  ...                                           |
+ *	|  Security version                              |
+ *	|  ...                                           |
+ *	+================================================+
  */
 
 struct gsc_version {
@@ -149,6 +201,67 @@ struct gsc_version {
 	u16 build;
 } __packed;
 
+struct gsc_partition {
+	u32 offset;
+	u32 size;
+} __packed;
+
+struct gsc_layout_pointers {
+	u8 rom_bypass_vector[16];
+
+	/* size of this header section, not including ROM bypass vector */
+	u16 size;
+
+	/*
+	 * bit0: Backup copy of layout pointers exists
+	 * bits1-15: reserved
+	 */
+	u8 flags;
+
+	u8 reserved;
+
+	u32 crc32;
+
+	struct gsc_partition datap;
+	struct gsc_partition boot1;
+	struct gsc_partition boot2;
+	struct gsc_partition boot3;
+	struct gsc_partition boot4;
+	struct gsc_partition boot5;
+	struct gsc_partition temp_pages;
+} __packed;
+
+/* Boot partition structures */
+struct gsc_bpdt_header {
+	u32 signature;
+#define GSC_BPDT_HEADER_SIGNATURE 0x000055AA
+
+	u16 descriptor_count; /* num of entries after the header */
+
+	u8 version;
+	u8 configuration;
+
+	u32 crc32;
+
+	u32 build_version;
+	struct gsc_version tool_version;
+} __packed;
+
+struct gsc_bpdt_entry {
+	/*
+	 * Bits 0-15: BPDT entry type
+	 * Bits 16-17: reserved
+	 * Bit 18: code sub-partition
+	 * Bits 19-31: reserved
+	 */
+	u32 type;
+#define GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0)
+#define GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1
+
+	u32 sub_partition_offset; /* from the base of the BPDT header */
+	u32 sub_partition_size;
+} __packed;
+
 /* Code partition directory (CPD) structures */
 struct gsc_cpd_header_v2 {
 	u32 header_marker;
-- 
GitLab


From dd0e89e5edc20d3875ed7ded48e7e97118cdfbc8 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:47 -0800
Subject: [PATCH 2122/2340] drm/xe/gsc: GSC FW load

The GSC FW must be copied in a 4MB stolen memory allocation, whose GGTT
address is then passed as a parameter to a dedicated load instruction
submitted via the GSC engine.

Since the GSC load is relatively slow (up to 250ms), we perform it
asynchronously via a worker. This requires us to make sure that the
worker has stopped before suspending/unloading.

Note that we can't yet use xe_migrate_copy for the copy because it
doesn't work with stolen memory right now, so we do a memcpy from the
CPU side instead.

v2: add comment about timeout value, fix GSC status checking
    before load (John)

Bspec: 65306, 65346
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/xe/instructions/xe_gsc_commands.h |  34 +++
 .../gpu/drm/xe/instructions/xe_instr_defs.h   |   1 +
 drivers/gpu/drm/xe/regs/xe_gsc_regs.h         |  29 ++
 drivers/gpu/drm/xe/xe_gsc.c                   | 250 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_gsc.h                   |   3 +
 drivers/gpu/drm/xe/xe_gsc_types.h             |  17 ++
 drivers/gpu/drm/xe/xe_uc.c                    |  12 +-
 7 files changed, 345 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
 create mode 100644 drivers/gpu/drm/xe/regs/xe_gsc_regs.h

diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
new file mode 100644
index 0000000000000..c7a833d7f9656
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_COMMANDS_H_
+#define _XE_GSC_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/*
+ * All GSCCS-specific commands have fixed length, so we can include it in the
+ * defines. Note that the generic GSC command header structure includes an
+ * optional data field in bits 9-21, but there are no commands that actually use
+ * it; some of the commands are instead defined as having an extended length
+ * field spanning bits 0-15, even if the extra bits are not required because the
+ * longest GSCCS command is only 8 dwords. To handle this, the defines below use
+ * a single field for both data and len. If we ever get a commands that does
+ * actually have data and this approach doesn't work for it we can re-work it
+ * at that point.
+ */
+
+#define GSC_OPCODE		REG_GENMASK(28, 22)
+#define GSC_CMD_DATA_AND_LEN	REG_GENMASK(21, 0)
+
+#define __GSC_INSTR(op, dl) \
+	(XE_INSTR_GSC | \
+	REG_FIELD_PREP(GSC_OPCODE, op) | \
+	REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl))
+
+#define GSC_FW_LOAD __GSC_INSTR(1, 2)
+#define   GSC_FW_LOAD_LIMIT_VALID REG_BIT(31)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
index e403b4fcc20a8..04179b2a48e11 100644
--- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -15,6 +15,7 @@
  */
 #define XE_INSTR_CMD_TYPE		GENMASK(31, 29)
 #define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
+#define   XE_INSTR_GSC			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
 #define   XE_INSTR_GFXPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
 
 /*
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
new file mode 100644
index 0000000000000..22d2ad9cb64dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_REGS_H_
+#define _XE_GSC_REGS_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+/* Definitions of GSC H/W registers, bits, etc */
+
+#define MTL_GSC_HECI1_BASE	0x00116000
+#define MTL_GSC_HECI2_BASE	0x00117000
+
+/*
+ * The FWSTS register values are FW defined and can be different between
+ * HECI1 and HECI2
+ */
+#define HECI_FWSTS1(base)				XE_REG((base) + 0xc40)
+#define   HECI1_FWSTS1_CURRENT_STATE			REG_GENMASK(3, 0)
+#define   HECI1_FWSTS1_CURRENT_STATE_RESET		0
+#define   HECI1_FWSTS1_PROXY_STATE_NORMAL		5
+#define   HECI1_FWSTS1_INIT_COMPLETE			REG_BIT(9)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 216f36cee0f71..e014b829bc8af 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -5,10 +5,20 @@
 
 #include "xe_gsc.h"
 
+#include <drm/drm_managed.h>
+
+#include "xe_bb.h"
+#include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_sched_job.h"
 #include "xe_uc_fw.h"
+#include "instructions/xe_gsc_commands.h"
+#include "regs/xe_gsc_regs.h"
 
 static struct xe_gt *
 gsc_to_gt(struct xe_gsc *gsc)
@@ -16,6 +26,145 @@ gsc_to_gt(struct xe_gsc *gsc)
 	return container_of(gsc, struct xe_gt, uc.gsc);
 }
 
+static int memcpy_fw(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 fw_size = gsc->fw.size;
+	void *storage;
+
+	/*
+	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
+	 * a memcpy for now.
+	 */
+	storage = kmalloc(fw_size, GFP_KERNEL);
+	if (!storage)
+		return -ENOMEM;
+
+	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
+	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
+	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
+
+	kfree(storage);
+
+	return 0;
+}
+
+static int emit_gsc_upload(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u64 offset = xe_bo_ggtt_addr(gsc->private);
+	struct xe_bb *bb;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	bb->cs[bb->len++] = GSC_FW_LOAD;
+	bb->cs[bb->len++] = lower_32_bits(offset);
+	bb->cs[bb->len++] = upper_32_bits(offset);
+	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
+
+	job = xe_bb_create_job(gsc->q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+static int gsc_fw_is_loaded(struct xe_gt *gt)
+{
+	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
+			      HECI1_FWSTS1_INIT_COMPLETE;
+}
+
+static int gsc_fw_wait(struct xe_gt *gt)
+{
+	/*
+	 * GSC load can take up to 250ms from the moment the instruction is
+	 * executed by the GSCCS. To account for possible submission delays or
+	 * other issues, we use a 500ms timeout in the wait here.
+	 */
+	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+			      HECI1_FWSTS1_INIT_COMPLETE,
+			      HECI1_FWSTS1_INIT_COMPLETE,
+			      500 * USEC_PER_MSEC, NULL, false);
+}
+
+static int gsc_upload(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* we should only be here if the init step were successful */
+	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
+
+	if (gsc_fw_is_loaded(gt)) {
+		xe_gt_err(gt, "GSC already loaded at upload time\n");
+		return -EEXIST;
+	}
+
+	err = memcpy_fw(gsc);
+	if (err) {
+		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
+		return err;
+	}
+
+	err = emit_gsc_upload(gsc);
+	if (err) {
+		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	err = gsc_fw_wait(gt);
+	if (err) {
+		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	xe_gt_dbg(gt, "GSC FW async load completed\n");
+
+	return 0;
+}
+
+static void gsc_work(struct work_struct *work)
+{
+	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	xe_device_mem_access_get(xe);
+	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+
+	ret = gsc_upload(gsc);
+	if (ret && ret != -EEXIST)
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	else
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+	xe_device_mem_access_put(xe);
+}
+
 int xe_gsc_init(struct xe_gsc *gsc)
 {
 	struct xe_gt *gt = gsc_to_gt(gsc);
@@ -23,6 +172,7 @@ int xe_gsc_init(struct xe_gsc *gsc)
 	int ret;
 
 	gsc->fw.type = XE_UC_FW_TYPE_GSC;
+	INIT_WORK(&gsc->work, gsc_work);
 
 	/* The GSC uC is only available on the media GT */
 	if (tile->media_gt && (gt != tile->media_gt)) {
@@ -50,3 +200,103 @@ out:
 	return ret;
 }
 
+static void free_resources(struct drm_device *drm, void *arg)
+{
+	struct xe_gsc *gsc = arg;
+
+	if (gsc->wq) {
+		destroy_workqueue(gsc->wq);
+		gsc->wq = NULL;
+	}
+
+	if (gsc->q) {
+		xe_exec_queue_put(gsc->q);
+		gsc->q = NULL;
+	}
+
+	if (gsc->private) {
+		xe_bo_unpin_map_no_vm(gsc->private);
+		gsc->private = NULL;
+	}
+}
+
+int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
+	struct xe_exec_queue *q;
+	struct workqueue_struct *wq;
+	struct xe_bo *bo;
+	int err;
+
+	if (!xe_uc_fw_is_available(&gsc->fw))
+		return 0;
+
+	if (!hwe)
+		return -ENODEV;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_STOLEN_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	q = xe_exec_queue_create(xe, NULL,
+				 BIT(hwe->logical_instance), 1, hwe,
+				 EXEC_QUEUE_FLAG_KERNEL |
+				 EXEC_QUEUE_FLAG_PERMANENT);
+	if (IS_ERR(q)) {
+		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
+		err = PTR_ERR(q);
+		goto out_bo;
+	}
+
+	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
+	if (!wq) {
+		err = -ENOMEM;
+		goto out_q;
+	}
+
+	gsc->private = bo;
+	gsc->q = q;
+	gsc->wq = wq;
+
+	err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
+	if (err)
+		return err;
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out_q:
+	xe_exec_queue_put(q);
+out_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return err;
+}
+
+void xe_gsc_load_start(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
+		return;
+
+	/* GSC FW survives GT reset and D3Hot */
+	if (gsc_fw_is_loaded(gt)) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+		return;
+	}
+
+	queue_work(gsc->wq, &gsc->work);
+}
+
+void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
+{
+	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
+		flush_work(&gsc->work);
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index baa7f21f42049..f870eddc77d4e 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -9,5 +9,8 @@
 #include "xe_gsc_types.h"
 
 int xe_gsc_init(struct xe_gsc *gsc);
+int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
+void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
+void xe_gsc_load_start(struct xe_gsc *gsc);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 1bc50583fe586..57fefd66a7ea2 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -6,8 +6,13 @@
 #ifndef _XE_GSC_TYPES_H_
 #define _XE_GSC_TYPES_H_
 
+#include <linux/workqueue.h>
+
 #include "xe_uc_fw_types.h"
 
+struct xe_bo;
+struct xe_exec_queue;
+
 /**
  * struct xe_gsc - GSC
  */
@@ -17,6 +22,18 @@ struct xe_gsc {
 
 	/** @security_version: SVN found in the fetched blob */
 	u32 security_version;
+
+	/** @private: Private data for use by the GSC FW */
+	struct xe_bo *private;
+
+	/** @q: Default queue used for submissions to GSC FW */
+	struct xe_exec_queue *q;
+
+	/** @wq: workqueue to handle jobs for delayed load and proxy handling */
+	struct workqueue_struct *wq;
+
+	/** @work: delayed load and proxy handling work */
+	struct work_struct work;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index b67154c78dff5..15dcd1f91e9c4 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -74,11 +74,17 @@ err:
  */
 int xe_uc_init_post_hwconfig(struct xe_uc *uc)
 {
+	int err;
+
 	/* GuC submission not enabled, nothing to do */
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
-	return xe_guc_init_post_hwconfig(&uc->guc);
+	err = xe_guc_init_post_hwconfig(&uc->guc);
+	if (err)
+		return err;
+
+	return xe_gsc_init_post_hwconfig(&uc->gsc);
 }
 
 static int uc_reset(struct xe_uc *uc)
@@ -173,6 +179,9 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	ret = xe_huc_auth(&uc->huc);
 	xe_gt_assert(uc_to_gt(uc), !ret);
 
+	/* GSC load is async */
+	xe_gsc_load_start(&uc->gsc);
+
 	return 0;
 }
 
@@ -197,6 +206,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc)
 
 void xe_uc_stop_prepare(struct xe_uc *uc)
 {
+	xe_gsc_wait_for_worker_completion(&uc->gsc);
 	xe_guc_stop_prepare(&uc->guc);
 }
 
-- 
GitLab


From aae84bf1cd96889a7d80b6b50131f60aa63899d7 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:48 -0800
Subject: [PATCH 2123/2340] drm/xe/gsc: Implement WA 14015076503

When the GSC FW is loaded, we need to inform it when a GSCCS reset is
coming and then wait 200ms for it to get ready to process the reset.

v2: move WA code to GSC file, use variable in Makefile (John)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <john.c.harrison@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile           | 11 +++++++++-
 drivers/gpu/drm/xe/regs/xe_gsc_regs.h | 10 +++++++++
 drivers/gpu/drm/xe/xe_gsc.c           | 29 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gsc.h           |  4 ++++
 drivers/gpu/drm/xe/xe_gt.c            |  5 +++++
 drivers/gpu/drm/xe/xe_wa_oob.rules    |  1 +
 6 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 184e2724ce7ba..5806ebb0256ae 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -37,7 +37,16 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o $(obj)/xe_ttm_stolen_mgr.o: $(generated_oob)
+uses_generated_oob := \
+	$(obj)/xe_gsc.o \
+	$(obj)/xe_guc.o \
+	$(obj)/xe_migrate.o \
+	$(obj)/xe_ring_ops.o \
+	$(obj)/xe_vm.o \
+	$(obj)/xe_wa.o \
+	$(obj)/xe_ttm_stolen_mgr.o
+
+$(uses_generated_oob): $(generated_oob)
 
 # Please keep these build lists sorted!
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
index 22d2ad9cb64dc..9a84b55d66eee 100644
--- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -16,6 +16,13 @@
 #define MTL_GSC_HECI1_BASE	0x00116000
 #define MTL_GSC_HECI2_BASE	0x00117000
 
+#define HECI_H_CSR(base)	XE_REG((base) + 0x4)
+#define   HECI_H_CSR_IE		REG_BIT(0)
+#define   HECI_H_CSR_IS		REG_BIT(1)
+#define   HECI_H_CSR_IG		REG_BIT(2)
+#define   HECI_H_CSR_RDY	REG_BIT(3)
+#define   HECI_H_CSR_RST	REG_BIT(4)
+
 /*
  * The FWSTS register values are FW defined and can be different between
  * HECI1 and HECI2
@@ -26,4 +33,7 @@
 #define   HECI1_FWSTS1_PROXY_STATE_NORMAL		5
 #define   HECI1_FWSTS1_INIT_COMPLETE			REG_BIT(9)
 
+#define HECI_H_GS1(base)	XE_REG((base) + 0xc4c)
+#define   HECI_H_GS1_ER_PREP	REG_BIT(0)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index e014b829bc8af..5731c026a77a1 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "generated/xe_wa_oob.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
@@ -17,6 +18,7 @@
 #include "xe_mmio.h"
 #include "xe_sched_job.h"
 #include "xe_uc_fw.h"
+#include "xe_wa.h"
 #include "instructions/xe_gsc_commands.h"
 #include "regs/xe_gsc_regs.h"
 
@@ -300,3 +302,30 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
 	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
 		flush_work(&gsc->work);
 }
+
+/*
+ * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
+ * GSC engine reset by writing a notification bit in the GS1 register and then
+ * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
+ * for the FW to get prepare for the reset, so we need to wait for that amount
+ * of time.
+ * After the reset is complete we need to then clear the GS1 register.
+ */
+void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
+{
+	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
+	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
+
+	/* WA only applies if the GSC is loaded */
+	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
+		return;
+
+	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
+
+	if (prep) {
+		/* make sure the reset bit is clear when writing the CSR reg */
+		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+			      HECI_H_CSR_RST, HECI_H_CSR_IG);
+		msleep(200);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index f870eddc77d4e..bc1ef7f31ea2c 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -8,9 +8,13 @@
 
 #include "xe_gsc_types.h"
 
+struct xe_gt;
+
 int xe_gsc_init(struct xe_gsc *gsc);
 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
 void xe_gsc_load_start(struct xe_gsc *gsc);
 
+void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0dddb751c6a48..00193b02a7e54 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -21,6 +21,7 @@
 #include "xe_execlist.h"
 #include "xe_force_wake.h"
 #include "xe_ggtt.h"
+#include "xe_gsc.h"
 #include "xe_gt_clock.h"
 #include "xe_gt_idle_sysfs.h"
 #include "xe_gt_mcr.h"
@@ -512,12 +513,16 @@ static int do_gt_reset(struct xe_gt *gt)
 {
 	int err;
 
+	xe_gsc_wa_14015076503(gt, true);
+
 	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
 	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
 	if (err)
 		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
 			  ERR_PTR(err));
 
+	xe_gsc_wa_14015076503(gt, false);
+
 	return err;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 752842d734be0..c7b7d40b5d575 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -20,3 +20,4 @@
 16017236439	PLATFORM(PVC)
 22010954014	PLATFORM(DG2)
 14019821291	MEDIA_VERSION_RANGE(1300, 2000)
+14015076503	MEDIA_VERSION(1300)
-- 
GitLab


From f63182b45d67e1ff1e9c65f08adb4d803a5d861f Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:49 -0800
Subject: [PATCH 2124/2340] drm/xe/gsc: Trigger a driver flr to cleanup the GSC
 on unload

GSC is only killed by an FLR, so we need to trigger one on unload to
make sure we stop it. This is because we assign a chunk of memory to
the GSC as part of the FW load, so we need to make sure it stops
using it when we release it to the system on driver unload. Note that
this is not a problem of the unload per-se, because the GSC will not
touch that memory unless there are requests for it coming from the
driver; therefore, no accesses will happen while Xe is not loaded,
but if we re-load the driver then the GSC might wake up and try to
access that old memory location again.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gsc.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 5731c026a77a1..907585846a894 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -130,6 +130,23 @@ static int gsc_upload(struct xe_gsc *gsc)
 		return err;
 	}
 
+	/*
+	 * GSC is only killed by an FLR, so we need to trigger one on unload to
+	 * make sure we stop it. This is because we assign a chunk of memory to
+	 * the GSC as part of the FW load, so we need to make sure it stops
+	 * using it when we release it to the system on driver unload. Note that
+	 * this is not a problem of the unload per-se, because the GSC will not
+	 * touch that memory unless there are requests for it coming from the
+	 * driver; therefore, no accesses will happen while Xe is not loaded,
+	 * but if we re-load the driver then the GSC might wake up and try to
+	 * access that old memory location again.
+	 * Given that an FLR is a very disruptive action (see the FLR function
+	 * for details), we want to do it as the last action before releasing
+	 * the access to the MMIO bar, which means we need to do it as part of
+	 * mmio cleanup.
+	 */
+	xe->needs_flr_on_fini = true;
+
 	err = emit_gsc_upload(gsc);
 	if (err) {
 		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
-- 
GitLab


From 0881cbe04077785f98496c236386099d20854ad7 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:50 -0800
Subject: [PATCH 2125/2340] drm/xe/gsc: Query GSC compatibility version

The version is obtained via a dedicated MKHI GSC HECI command.
The compatibility version is what we want to match against for the GSC,
so we need to call the FW version checker after obtaining the version.

Since this is the first time we send a GSC HECI command via the GSCCS,
this patch also introduces common infrastructure to send such commands
to the GSC. Communication with the GSC FW is done via input/output
buffers, whose addresses are provided via a GSCCS command. The buffers
contain a generic header and a client-specific packet (e.g. PXP, HDCP);
the clients don't care about the header format and/or the GSCCS command
in the batch, they only care about their client-specific header. This
patch therefore introduces helpers that allow the callers to
automatically fill in the input header, submit the GSCCS job and decode
the output header, to make it so that the caller only needs to worry about
their client-specific input and output messages.

v3: squash of 2 separate patches ahead of merge, so that the common
functions and their first user are added at the same time

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: Suraj Kandpal <suraj.kandpal@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.Com> #v1
Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                   |   1 +
 .../gpu/drm/xe/abi/gsc_command_header_abi.h   |  46 +++++
 .../gpu/drm/xe/abi/gsc_mkhi_commands_abi.h    |  39 ++++
 .../gpu/drm/xe/instructions/xe_gsc_commands.h |   2 +
 drivers/gpu/drm/xe/xe_gsc.c                   |  82 ++++++++
 drivers/gpu/drm/xe/xe_gsc_submit.c            | 184 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_gsc_submit.h            |  30 +++
 drivers/gpu/drm/xe/xe_uc_fw.c                 |  18 +-
 drivers/gpu/drm/xe/xe_uc_fw.h                 |   1 +
 9 files changed, 397 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
 create mode 100644 drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
 create mode 100644 drivers/gpu/drm/xe/xe_gsc_submit.c
 create mode 100644 drivers/gpu/drm/xe/xe_gsc_submit.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 5806ebb0256ae..161e8ead9114b 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -68,6 +68,7 @@ xe-y += xe_bb.o \
 	xe_ggtt.o \
 	xe_gpu_scheduler.o \
 	xe_gsc.o \
+	xe_gsc_submit.o \
 	xe_gt.o \
 	xe_gt_clock.o \
 	xe_gt_debugfs.o \
diff --git a/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
new file mode 100644
index 0000000000000..a4c2646803b57
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_COMMAND_HEADER_ABI_H
+#define _ABI_GSC_COMMAND_HEADER_ABI_H
+
+#include <linux/types.h>
+
+struct intel_gsc_mtl_header {
+	u32 validity_marker;
+#define GSC_HECI_VALIDITY_MARKER 0xA578875A
+
+	u8 heci_client_id;
+
+	u8 reserved1;
+
+	u16 header_version;
+#define MTL_GSC_HEADER_VERSION 1
+
+	/* FW allows host to decide host_session handle as it sees fit. */
+	u64 host_session_handle;
+
+	/* handle generated by FW for messages that need to be re-submitted */
+	u64 gsc_message_handle;
+
+	u32 message_size; /* lower 20 bits only, upper 12 are reserved */
+
+	/*
+	 * Flags mask:
+	 * Bit 0: Pending
+	 * Bit 1: Session Cleanup;
+	 * Bits 2-15: Flags
+	 * Bits 16-31: Extension Size
+	 * According to internal spec flags are either input or output
+	 * we distinguish the flags using OUTFLAG or INFLAG
+	 */
+	u32 flags;
+#define GSC_OUTFLAG_MSG_PENDING	BIT(0)
+#define GSC_INFLAG_MSG_CLEANUP	BIT(1)
+
+	u32 status;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
new file mode 100644
index 0000000000000..ad4d041873abd
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_MKHI_COMMANDS_ABI_H
+#define _ABI_GSC_MKHI_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for MKHI commands */
+#define HECI_MEADDRESS_MKHI 7
+
+/* Generic MKHI header */
+struct gsc_mkhi_header {
+	u8  group_id;
+	u8  command;
+	u8  reserved;
+	u8  result;
+} __packed;
+
+/* GFX_SRV commands */
+#define MKHI_GROUP_ID_GFX_SRV 0x30
+
+#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42)
+
+struct gsc_get_compatibility_version_in {
+	struct gsc_mkhi_header header;
+} __packed;
+
+struct gsc_get_compatibility_version_out {
+	struct gsc_mkhi_header header;
+	u16 proj_major;
+	u16 compat_major;
+	u16 compat_minor;
+	u16 reserved[5];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
index c7a833d7f9656..f8949cad9d0fc 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
@@ -28,6 +28,8 @@
 	REG_FIELD_PREP(GSC_OPCODE, op) | \
 	REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl))
 
+#define GSC_HECI_CMD_PKT __GSC_INSTR(0, 6)
+
 #define GSC_FW_LOAD __GSC_INSTR(1, 2)
 #define   GSC_FW_LOAD_LIMIT_VALID REG_BIT(31)
 
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 907585846a894..d8ec04e3c0065 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -7,11 +7,13 @@
 
 #include <drm/drm_managed.h>
 
+#include "abi/gsc_mkhi_commands_abi.h"
 #include "generated/xe_wa_oob.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
+#include "xe_gsc_submit.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
 #include "xe_map.h"
@@ -91,6 +93,78 @@ static int emit_gsc_upload(struct xe_gsc *gsc)
 	return 0;
 }
 
+#define version_query_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
+#define version_query_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)
+
+static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
+{
+	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));
+
+	version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
+	version_query_wr(xe, map, wr_offset, header.command,
+			 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);
+
+	return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
+}
+
+#define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
+static int query_compatibility_version(struct xe_gsc *gsc)
+{
+	struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 wr_offset;
+	u32 rd_offset;
+	u64 ggtt_offset;
+	int err;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
+		return PTR_ERR(bo);
+	}
+
+	ggtt_offset = xe_bo_ggtt_addr(bo);
+
+	wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
+				       sizeof(struct gsc_get_compatibility_version_in));
+	wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);
+
+	err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
+				       ggtt_offset + GSC_VER_PKT_SZ,
+				       GSC_VER_PKT_SZ);
+	if (err) {
+		xe_gt_err(gt,
+			  "failed to submit GSC request for compatibility version: %d\n",
+			  err);
+		goto out_bo;
+	}
+
+	err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
+				     sizeof(struct gsc_get_compatibility_version_out),
+				     &rd_offset);
+	if (err) {
+		xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
+		return err;
+	}
+
+	compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
+	compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);
+
+	xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);
+
+out_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return err;
+}
+
 static int gsc_fw_is_loaded(struct xe_gt *gt)
 {
 	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
@@ -159,6 +233,14 @@ static int gsc_upload(struct xe_gsc *gsc)
 		return err;
 	}
 
+	err = query_compatibility_version(gsc);
+	if (err)
+		return err;
+
+	err = xe_uc_fw_check_version_requirements(&gsc->fw);
+	if (err)
+		return err;
+
 	xe_gt_dbg(gt, "GSC FW async load completed\n");
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
new file mode 100644
index 0000000000000..8c5381e5913fc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc_submit.h"
+
+#include "abi/gsc_command_header_abi.h"
+#include "xe_bb.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+#include "xe_map.h"
+#include "xe_sched_job.h"
+#include "instructions/xe_gsc_commands.h"
+#include "regs/xe_gsc_regs.h"
+
+#define GSC_HDR_SIZE (sizeof(struct intel_gsc_mtl_header)) /* shorthand define */
+
+#define mtl_gsc_header_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_, val_)
+
+#define mtl_gsc_header_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_)
+
+/*
+ * GSC FW allows us to define the host_session_handle as we see fit, as long
+ * as we use unique identifier for each user, with handle 0 being reserved for
+ * kernel usage.
+ * To be able to differentiate which client subsystem owns the given session, we
+ * include the client id in the top 8 bits of the handle.
+ */
+#define HOST_SESSION_CLIENT_MASK GENMASK_ULL(63, 56)
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+/**
+ * xe_gsc_emit_header - write the MTL GSC header in memory
+ * @xe: the Xe device
+ * @map: the iosys map to write to
+ * @offset: offset from the start of the map at which to write the header
+ * @heci_client_id: client id identifying the type of command (see abi for values)
+ * @host_session_id: host session ID of the caller
+ * @payload_size: size of the payload that follows the header
+ *
+ * Returns: offset memory location following the header
+ */
+u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
+		       u8 heci_client_id, u64 host_session_id, u32 payload_size)
+{
+	xe_assert(xe, !(host_session_id & HOST_SESSION_CLIENT_MASK));
+
+	if (host_session_id)
+		host_session_id |= FIELD_PREP(HOST_SESSION_CLIENT_MASK, heci_client_id);
+
+	xe_map_memset(xe, map, offset, 0, GSC_HDR_SIZE);
+
+	mtl_gsc_header_wr(xe, map, offset, validity_marker, GSC_HECI_VALIDITY_MARKER);
+	mtl_gsc_header_wr(xe, map, offset, heci_client_id, heci_client_id);
+	mtl_gsc_header_wr(xe, map, offset, host_session_handle, host_session_id);
+	mtl_gsc_header_wr(xe, map, offset, header_version, MTL_GSC_HEADER_VERSION);
+	mtl_gsc_header_wr(xe, map, offset, message_size, payload_size + GSC_HDR_SIZE);
+
+	return offset + GSC_HDR_SIZE;
+};
+
+/**
+ * xe_gsc_check_and_update_pending - check the pending bit and update the input
+ * header with the retry handle from the output header
+ * @xe: the Xe device
+ * @in: the iosys map containing the input buffer
+ * @offset_in: offset within the iosys at which the input buffer is located
+ * @out: the iosys map containing the output buffer
+ * @offset_out: offset within the iosys at which the output buffer is located
+ *
+ * Returns: true if the pending bit was set, false otherwise
+ */
+bool xe_gsc_check_and_update_pending(struct xe_device *xe,
+				     struct iosys_map *in, u32 offset_in,
+				     struct iosys_map *out, u32 offset_out)
+{
+	if (mtl_gsc_header_rd(xe, out, offset_out, flags) & GSC_OUTFLAG_MSG_PENDING) {
+		u64 handle = mtl_gsc_header_rd(xe, out, offset_out, gsc_message_handle);
+
+		mtl_gsc_header_wr(xe, in, offset_in, gsc_message_handle, handle);
+
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * xe_gsc_read_out_header - reads and validates the output header and returns
+ * the offset of the reply following the header
+ * @xe: the Xe device
+ * @map: the iosys map containing the output buffer
+ * @offset: offset within the iosys at which the output buffer is located
+ * @min_payload_size: minimum size of the message excluding the gsc header
+ * @payload_offset: optional pointer to be set to the payload offset
+ *
+ * Returns: -errno value on failure, 0 otherwise
+ */
+int xe_gsc_read_out_header(struct xe_device *xe,
+			   struct iosys_map *map, u32 offset,
+			   u32 min_payload_size,
+			   u32 *payload_offset)
+{
+	u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker);
+	u32 size = mtl_gsc_header_rd(xe, map, offset, message_size);
+	u32 payload_size = size - GSC_HDR_SIZE;
+
+	if (marker != GSC_HECI_VALIDITY_MARKER)
+		return -EPROTO;
+
+	if (size < GSC_HDR_SIZE || payload_size < min_payload_size)
+		return -ENODATA;
+
+	if (payload_offset)
+		*payload_offset = offset + GSC_HDR_SIZE;
+
+	return 0;
+}
+
+/**
+ * xe_gsc_pkt_submit_kernel - submit a kernel heci pkt to the GSC
+ * @gsc: the GSC uC
+ * @addr_in: GGTT address of the message to send to the GSC
+ * @size_in: size of the message to send to the GSC
+ * @addr_out: GGTT address for the GSC to write the reply to
+ * @size_out: size of the memory reserved for the reply
+ */
+int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
+			     u64 addr_out, u32 size_out)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_bb *bb;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	long timeout;
+
+	if (size_in < GSC_HDR_SIZE)
+		return -ENODATA;
+
+	if (size_out < GSC_HDR_SIZE)
+		return -ENOMEM;
+
+	bb = xe_bb_new(gt, 8, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	bb->cs[bb->len++] = GSC_HECI_CMD_PKT;
+	bb->cs[bb->len++] = lower_32_bits(addr_in);
+	bb->cs[bb->len++] = upper_32_bits(addr_in);
+	bb->cs[bb->len++] = size_in;
+	bb->cs[bb->len++] = lower_32_bits(addr_out);
+	bb->cs[bb->len++] = upper_32_bits(addr_out);
+	bb->cs[bb->len++] = size_out;
+	bb->cs[bb->len++] = 0;
+
+	job = xe_bb_create_job(gsc->q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
new file mode 100644
index 0000000000000..0801da5d446a9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_SUBMIT_H_
+#define _XE_GSC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct iosys_map;
+struct xe_device;
+struct xe_gsc;
+
+u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
+		       u8 heci_client_id, u64 host_session_id, u32 payload_size);
+
+bool xe_gsc_check_and_update_pending(struct xe_device *xe,
+				     struct iosys_map *in, u32 offset_in,
+				     struct iosys_map *out, u32 offset_out);
+
+int xe_gsc_read_out_header(struct xe_device *xe,
+			   struct iosys_map *map, u32 offset,
+			   u32 min_payload_size,
+			   u32 *payload_offset);
+
+int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
+			     u64 addr_out, u32 size_out);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2d0fb7058d663..74f00fdcd9681 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -224,8 +224,11 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 			uc_fw->versions.wanted.minor = entries[i].minor;
 			uc_fw->full_ver_required = entries[i].full_ver_required;
 
-			/* compatibility version checking coming soon */
-			uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
+			if (uc_fw->type == XE_UC_FW_TYPE_GSC)
+				uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
+			else
+				uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
+
 			break;
 		}
 	}
@@ -321,7 +324,7 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 	uc_fw->private_data_size = css->private_data_size;
 }
 
-static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
+int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
@@ -678,9 +681,12 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 			    "Using %s firmware from %s",
 			    xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
 
-	err = uc_fw_check_version_requirements(uc_fw);
-	if (err)
-		goto fail;
+	/* for GSC FW we want the compatibility version, which we query after load */
+	if (uc_fw->type != XE_UC_FW_TYPE_GSC) {
+		err = xe_uc_fw_check_version_requirements(uc_fw);
+		if (err)
+			goto fail;
+	}
 
 	obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size,
 				     ttm_bo_type_kernel,
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index 7feafe1695f91..85c20795d1f8c 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -17,6 +17,7 @@ struct drm_printer;
 int xe_uc_fw_init(struct xe_uc_fw *uc_fw);
 size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len);
 int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw);
 void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
 
 static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
-- 
GitLab


From 9897eb855544f0ef0921a5cc4517deb1fcf06c6f Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:51 -0800
Subject: [PATCH 2126/2340] drm/xe/gsc: Define GSCCS for MTL

Add the GSCCS to the media_xelpmp engine list. Note that since the
GSCCS is only used with the GSC FW, we can consider it disabled if we
don't have the FW available.

v2: mark GSCCS as allowed on the media IP in kunit tests

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci_test.c |  9 +++------
 drivers/gpu/drm/xe/xe_hw_engine.c      | 20 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pci.c            |  2 +-
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index daf6525176088..171e4180f1aa9 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -33,13 +33,10 @@ static void check_media_ip(const struct xe_media_desc *media)
 	struct kunit *test = xe_cur_kunit();
 	u64 mask = media->hw_engine_mask;
 
-	/*
-	 * VCS and VECS engines are allowed on the media IP
-	 *
-	 * TODO:  Add GSCCS once support is added to the driver.
-	 */
+	/* VCS, VECS and GSCCS engines are allowed on the media IP */
 	mask &= ~(XE_HW_ENGINE_VCS_MASK |
-		  XE_HW_ENGINE_VECS_MASK);
+		  XE_HW_ENGINE_VECS_MASK |
+		  XE_HW_ENGINE_GSCCS_MASK);
 
 	/* Any remaining engines are an error */
 	KUNIT_ASSERT_EQ(test, mask, 0);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index e831e63c5e485..c52c26c395a7d 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -26,6 +26,7 @@
 #include "xe_rtp.h"
 #include "xe_sched_job.h"
 #include "xe_tuning.h"
+#include "xe_uc_fw.h"
 #include "xe_wa.h"
 
 #define MAX_MMIO_BASES 3
@@ -610,6 +611,24 @@ static void read_compute_fuses(struct xe_gt *gt)
 		read_compute_fuses_from_dss(gt);
 }
 
+static void check_gsc_availability(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
+		return;
+
+	/*
+	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
+	 * have the FW there is nothing we need the engine for and can therefore
+	 * skip its initialization.
+	 */
+	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
+		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
+		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
+	}
+}
+
 int xe_hw_engines_init_early(struct xe_gt *gt)
 {
 	int i;
@@ -617,6 +636,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt)
 	read_media_fuses(gt);
 	read_copy_fuses(gt);
 	read_compute_fuses(gt);
+	check_gsc_availability(gt);
 
 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6d062478c1f2f..bbf2f5b64bac8 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -199,7 +199,7 @@ static const struct xe_media_desc media_xelpmp = {
 	.name = "Xe_LPM+",
 	.hw_engine_mask =
 		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
-		BIT(XE_HW_ENGINE_VECS0),	/* TODO: add GSC0 */
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0)
 };
 
 static const struct xe_media_desc media_xe2 = {
-- 
GitLab


From 5152234e2e7a1d5b0897733f84597df23cde98b1 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 17 Nov 2023 14:51:52 -0800
Subject: [PATCH 2127/2340] drm/xe/gsc: Define GSC FW for MTL

We track GSC FW based on its compatibility version, which is what
determines the interface it supports.
Also add a modparam override like the ones for GuC and HuC.

v2: fix module param description (John)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_module.c |  4 ++++
 drivers/gpu/drm/xe/xe_module.h |  1 +
 drivers/gpu/drm/xe/xe_uc_fw.c  | 20 ++++++++++++--------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 1ea883f48c63e..51bf69b7ab222 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -41,6 +41,10 @@ module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, char
 MODULE_PARM_DESC(huc_firmware_path,
 		 "HuC firmware path to use instead of the default one - empty string disables");
 
+module_param_named_unsafe(gsc_firmware_path, xe_modparam.gsc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(gsc_firmware_path,
+		 "GSC firmware path to use instead of the default one - empty string disables");
+
 module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
 MODULE_PARM_DESC(force_probe,
 		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 51d75ff123765..88ef0e8b2bfd8 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -16,6 +16,7 @@ struct xe_modparam {
 	int guc_log_level;
 	char *guc_firmware_path;
 	char *huc_firmware_path;
+	char *gsc_firmware_path;
 	char *force_probe;
 };
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 74f00fdcd9681..2fcec528a1d14 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -121,6 +121,10 @@ struct fw_blobs_by_type {
 	fw_def(ROCKETLAKE,	no_ver(i915,	huc,		tgl))		\
 	fw_def(TIGERLAKE,	no_ver(i915,	huc,		tgl))
 
+/* for the GSC FW we match the compatibility version and not the release one */
+#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver)		\
+	fw_def(METEORLAKE,	major_ver(i915,	gsc,	mtl,	1, 0))
+
 #define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
 	__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
 
@@ -155,6 +159,7 @@ XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
 		     fw_filename_mmp_ver, fw_filename_major_ver)
 XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
 		     fw_filename_mmp_ver, fw_filename_no_ver)
+XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_major_ver)
 
 static struct xe_gt *
 __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
@@ -196,23 +201,19 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 				     uc_fw_entry_mmp_ver,
 				     uc_fw_entry_no_ver)
 	};
+	static const struct uc_fw_entry entries_gsc[] = {
+		XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_major_ver)
+	};
 	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
 		[XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) },
 		[XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) },
+		[XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) },
 	};
 	static const struct uc_fw_entry *entries;
 	enum xe_platform p = xe->info.platform;
 	u32 count;
 	int i;
 
-	/*
-	 * GSC FW support is still not fully in place, so we're not defining
-	 * the FW blob yet because we don't want the driver to attempt to load
-	 * it until we're ready for it.
-	 */
-	if (uc_fw->type == XE_UC_FW_TYPE_GSC)
-		return;
-
 	xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
 	entries = blobs_all[uc_fw->type].entries;
 	count = blobs_all[uc_fw->type].count;
@@ -248,6 +249,9 @@ uc_fw_override(struct xe_uc_fw *uc_fw)
 	case XE_UC_FW_TYPE_HUC:
 		path_override = xe_modparam.huc_firmware_path;
 		break;
+	case XE_UC_FW_TYPE_GSC:
+		path_override = xe_modparam.gsc_firmware_path;
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From fcf98d68c00216b61b034f4d164e5c3074db636a Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 27 Nov 2023 09:44:59 +0000
Subject: [PATCH 2128/2340] drm/xe: fix mem_access for early lrc generation

We spawn some hw queues during device probe to generate the default LRC
for every engine type, however the queue destruction step is typically
async. Queue destruction needs to do stuff like GuC context deregister
which requires GuC CT, which in turn requires an active mem_access ref.
The caller during probe is meant to hold the mem_access token, however
due to the async destruction it might have already been dropped if we
are unlucky.

Similar to how we already handle migrate VMs for which there is no
mem_access ref, fix this by keeping the callers token alive, releasing
it only when destroying the queue. We can treat a NULL vm as indication
that we need to grab our own extra ref.

Fixes the following splat sometimes seen during load:

[ 1682.899930] WARNING: CPU: 1 PID: 8642 at drivers/gpu/drm/xe/xe_device.c:537 xe_device_assert_mem_access+0x27/0x30 [xe]
[ 1682.900209] CPU: 1 PID: 8642 Comm: kworker/u24:97 Tainted: G     U  W   E    N 6.6.0-rc3+ #6
[ 1682.900214] Workqueue: submit_wq xe_sched_process_msg_work [xe]
[ 1682.900303] RIP: 0010:xe_device_assert_mem_access+0x27/0x30 [xe]
[ 1682.900388] Code: 90 90 90 66 0f 1f 00 0f 1f 44 00 00 53 48 89 fb e8 1e 6c 03 00 48 85 c0 74 06 5b c3 cc cc cc cc 8b 83 28 23 00 00 85 c0 75 f0 <0f> 0b 5b c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90
[ 1682.900390] RSP: 0018:ffffc900021cfb68 EFLAGS: 00010246
[ 1682.900394] RAX: 0000000000000000 RBX: ffff8886a96d8000 RCX: 0000000000000000
[ 1682.900396] RDX: 0000000000000001 RSI: ffff8886a6311a00 RDI: ffff8886a96d8000
[ 1682.900398] RBP: ffffc900021cfcc0 R08: 0000000000000001 R09: 0000000000000000
[ 1682.900400] R10: ffffc900021cfcd0 R11: 0000000000000002 R12: 0000000000000004
[ 1682.900402] R13: 0000000000000000 R14: ffff8886a6311990 R15: ffffc900021cfd74
[ 1682.900405] FS:  0000000000000000(0000) GS:ffff888829880000(0000) knlGS:0000000000000000
[ 1682.900407] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1682.900409] CR2: 000055f70bad3fb0 CR3: 000000025243a004 CR4: 00000000003706e0
[ 1682.900412] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1682.900413] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1682.900415] Call Trace:
[ 1682.900418]  <TASK>
[ 1682.900420]  ? xe_device_assert_mem_access+0x27/0x30 [xe]
[ 1682.900504]  ? __warn+0x85/0x170
[ 1682.900510]  ? xe_device_assert_mem_access+0x27/0x30 [xe]
[ 1682.900596]  ? report_bug+0x171/0x1a0
[ 1682.900604]  ? handle_bug+0x3c/0x80
[ 1682.900608]  ? exc_invalid_op+0x17/0x70
[ 1682.900612]  ? asm_exc_invalid_op+0x1a/0x20
[ 1682.900621]  ? xe_device_assert_mem_access+0x27/0x30 [xe]
[ 1682.900706]  ? xe_device_assert_mem_access+0x12/0x30 [xe]
[ 1682.900790]  guc_ct_send_locked+0xb9/0x1550 [xe]
[ 1682.900882]  ? lock_acquire+0xca/0x2b0
[ 1682.900885]  ? guc_ct_send+0x3c/0x1a0 [xe]
[ 1682.900977]  ? lock_is_held_type+0x9b/0x110
[ 1682.900984]  ? __mutex_lock+0xc0/0xb90
[ 1682.900989]  ? __pfx___drm_printfn_info+0x10/0x10
[ 1682.900999]  guc_ct_send+0x53/0x1a0 [xe]
[ 1682.901090]  ? __lock_acquire+0xf22/0x21b0
[ 1682.901097]  ? process_one_work+0x1a0/0x500
[ 1682.901109]  xe_guc_ct_send+0x19/0x50 [xe]
[ 1682.901202]  set_min_preemption_timeout+0x75/0xa0 [xe]
[ 1682.901294]  disable_scheduling_deregister+0x55/0x250 [xe]
[ 1682.901383]  ? xe_sched_process_msg_work+0x76/0xd0 [xe]
[ 1682.901467]  ? lock_release+0xc9/0x260
[ 1682.901474]  xe_sched_process_msg_work+0x82/0xd0 [xe]
[ 1682.901559]  process_one_work+0x20a/0x500

v2: Add the splat

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 98fc13c89a4da..2bab6fbd82f5a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -90,12 +90,12 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	/*
 	 * Normally the user vm holds an rpm ref to keep the device
 	 * awake, and the context holds a ref for the vm, however for
-	 * some engines we use the kernels migrate vm underneath which
-	 * offers no such rpm ref. Make sure we keep a ref here, so we
-	 * can perform GuC CT actions when needed. Caller is expected to
-	 * have already grabbed the rpm ref outside any sensitive locks.
+	 * some engines we use the kernels migrate vm underneath which offers no
+	 * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we
+	 * can perform GuC CT actions when needed. Caller is expected to have
+	 * already grabbed the rpm ref outside any sensitive locks.
 	 */
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM))
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
 		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
 
 	return q;
@@ -172,10 +172,10 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 
 	for (i = 0; i < q->width; ++i)
 		xe_lrc_finish(q->lrc + i);
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
+		xe_device_mem_access_put(gt_to_xe(q->gt));
 	if (q->vm)
 		xe_vm_put(q->vm);
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM))
-		xe_device_mem_access_put(gt_to_xe(q->gt));
 
 	kfree(q);
 }
-- 
GitLab


From 5bb83841a3b9cecc49ae1f02e85909b426a6facc Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 15 Nov 2023 12:58:16 +0100
Subject: [PATCH 2129/2340] drm/xe/kunit: Return number of iterated devices

In xe_call_for_each_device() we are already counting number of
iterated devices. Lets make that available to the caller too.
We will use that functionality in upcoming patches.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231115115816.1993-1-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index a40879da2fbe1..306ff8cb35cb1 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -42,8 +42,8 @@ static int dev_to_xe_device_fn(struct device *dev, void *__data)
  * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
  * called for the corresponding drm device.
  *
- * Return: Zero or the error code of a call to @xe_fn returning an error
- * code.
+ * Return: Number of devices iterated or
+ *         the error code of a call to @xe_fn returning an error code.
  */
 int xe_call_for_each_device(xe_device_fn xe_fn)
 {
@@ -59,7 +59,7 @@ int xe_call_for_each_device(xe_device_fn xe_fn)
 	if (!data.ndevs)
 		kunit_skip(current->kunit_test, "test runs only on hardware\n");
 
-	return ret;
+	return ret ?: data.ndevs;
 }
 
 /**
-- 
GitLab


From 622f709ca6297d838d9bd8b33196b388909d5951 Mon Sep 17 00:00:00 2001
From: Pallavi Mishra <pallavi.mishra@intel.com>
Date: Fri, 11 Aug 2023 01:36:43 +0530
Subject: [PATCH 2130/2340] drm/xe/uapi: Add support for CPU caching mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow userspace to specify the CPU caching mode at object creation.
Modify gem create handler and introduce xe_bo_create_user to replace
xe_bo_create. In a later patch we will support setting the pat_index as
part of vm_bind, where expectation is that the coherency mode extracted
from the pat_index must be least 1way coherent if using cpu_caching=wb.

v2
  - s/smem_caching/smem_cpu_caching/ and
    s/XE_GEM_CACHING/XE_GEM_CPU_CACHING/. (Matt Roper)
  - Drop COH_2WAY and just use COH_NONE + COH_AT_LEAST_1WAY; KMD mostly
    just cares that zeroing/swap-in can't be bypassed with the given
    smem_caching mode. (Matt Roper)
  - Fix broken range check for coh_mode and smem_cpu_caching and also
    don't use constant value, but the already defined macros. (José)
  - Prefer switch statement for smem_cpu_caching -> ttm_caching. (José)
  - Add note in kernel-doc for dgpu and coherency modes for system
    memory. (José)
v3 (José):
  - Make sure to reject coh_mode == 0 for VRAM-only.
  - Also make sure to actually pass along the (start, end) for
    __xe_bo_create_locked.
v4
  - Drop UC caching mode. Can be added back if we need it. (Matt Roper)
  - s/smem_cpu_caching/cpu_caching. Idea is that VRAM is always WC, but
    that is currently implicit and KMD controlled. Make it explicit in
    the uapi with the limitation that it currently must be WC. For VRAM
    + SYS objects userspace must now select WC. (José)
  - Make sure to initialize bo_flags. (José)
v5
  - Make to align with the other uapi and prefix uapi constants with
    DRM_ (José)
v6:
  - Make it clear that zero cpu_caching is only allowed for kernel
    objects. (José)
v7: (Oak)
  - With all the changes from the original design, it looks we can
    further simplify here and drop the explicit coh_mode. We can just
    infer the coh_mode from the cpu_caching. i.e reject cpu_caching=wb +
    coh_none. It's one less thing for userspace to maintain so seems
    worth it.
v8:
  - Make sure to also update the kselftests.

Testcase: igt@xe_mmap@cpu-caching
Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Co-developed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Cc: Zhengguo Xu <zhengguo.xu@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Zhengguo Xu <zhengguo.xu@intel.com>
Acked-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c      |  14 ++--
 drivers/gpu/drm/xe/tests/xe_dma_buf.c |   4 +-
 drivers/gpu/drm/xe/xe_bo.c            | 100 +++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_bo.h            |  14 ++--
 drivers/gpu/drm/xe/xe_bo_types.h      |   5 ++
 drivers/gpu/drm/xe/xe_dma_buf.c       |   5 +-
 include/uapi/drm/xe_drm.h             |  19 ++++-
 7 files changed, 122 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 2c04357377abc..549ab343de803 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -177,8 +177,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
 {
 	struct xe_bo *bo, *external;
-	unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
-		XE_BO_CREATE_VRAM_IF_DGFX(tile);
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
 	struct xe_gt *__gt;
 	int err, i, id;
@@ -188,16 +187,19 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
 
 	for (i = 0; i < 2; ++i) {
 		xe_vm_lock(vm, false);
-		bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device,
-				  bo_flags);
+		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
+				       DRM_XE_GEM_CPU_CACHING_WC,
+				       ttm_bo_type_device,
+				       bo_flags);
 		xe_vm_unlock(vm);
 		if (IS_ERR(bo)) {
 			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
 			break;
 		}
 
-		external = xe_bo_create(xe, NULL, NULL, 0x10000,
-					ttm_bo_type_device, bo_flags);
+		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
+					     DRM_XE_GEM_CPU_CACHING_WC,
+					     ttm_bo_type_device, bo_flags);
 		if (IS_ERR(external)) {
 			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
 			goto cleanup_bo;
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 18c00bc03024d..81f12422a5873 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -116,8 +116,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 		return;
 
 	kunit_info(test, "running %s\n", __func__);
-	bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device,
-			  XE_BO_CREATE_USER_BIT | params->mem_mask);
+	bo = xe_bo_create_user(xe, NULL, NULL, PAGE_SIZE, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, params->mem_mask);
 	if (IS_ERR(bo)) {
 		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
 			   PTR_ERR(bo));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e193373908123..dc1ad3b4dc2af 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -332,7 +332,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 	struct xe_device *xe = xe_bo_device(bo);
 	struct xe_ttm_tt *tt;
 	unsigned long extra_pages;
-	enum ttm_caching caching = ttm_cached;
+	enum ttm_caching caching;
 	int err;
 
 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -346,13 +346,24 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
 					   PAGE_SIZE);
 
+	switch (bo->cpu_caching) {
+	case DRM_XE_GEM_CPU_CACHING_WC:
+		caching = ttm_write_combined;
+		break;
+	default:
+		caching = ttm_cached;
+		break;
+	}
+
+	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
+
 	/*
 	 * Display scanout is always non-coherent with the CPU cache.
 	 *
 	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
 	 * require a CPU:WC mapping.
 	 */
-	if (bo->flags & XE_BO_SCANOUT_BIT ||
+	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
 	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
 		caching = ttm_write_combined;
 
@@ -1198,10 +1209,11 @@ void xe_bo_free(struct xe_bo *bo)
 	kfree(bo);
 }
 
-struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
-				    struct xe_tile *tile, struct dma_resv *resv,
-				    struct ttm_lru_bulk_move *bulk, size_t size,
-				    enum ttm_bo_type type, u32 flags)
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				     struct xe_tile *tile, struct dma_resv *resv,
+				     struct ttm_lru_bulk_move *bulk, size_t size,
+				     u16 cpu_caching, enum ttm_bo_type type,
+				     u32 flags)
 {
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
@@ -1239,6 +1251,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	bo->tile = tile;
 	bo->size = size;
 	bo->flags = flags;
+	bo->cpu_caching = cpu_caching;
 	bo->ttm.base.funcs = &xe_gem_object_funcs;
 	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
@@ -1354,11 +1367,11 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 	return 0;
 }
 
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
-			  struct xe_tile *tile, struct xe_vm *vm,
-			  size_t size, u64 start, u64 end,
-			  enum ttm_bo_type type, u32 flags)
+static struct xe_bo *
+__xe_bo_create_locked(struct xe_device *xe,
+		      struct xe_tile *tile, struct xe_vm *vm,
+		      size_t size, u64 start, u64 end,
+		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
 {
 	struct xe_bo *bo = NULL;
 	int err;
@@ -1379,11 +1392,11 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		}
 	}
 
-	bo = __xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
-				   vm && !xe_vm_in_fault_mode(vm) &&
-				   flags & XE_BO_CREATE_USER_BIT ?
-				   &vm->lru_bulk_move : NULL, size,
-				   type, flags);
+	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
+				    vm && !xe_vm_in_fault_mode(vm) &&
+				    flags & XE_BO_CREATE_USER_BIT ?
+				    &vm->lru_bulk_move : NULL, size,
+				    cpu_caching, type, flags);
 	if (IS_ERR(bo))
 		return bo;
 
@@ -1423,11 +1436,35 @@ err_unlock_put_bo:
 	return ERR_PTR(err);
 }
 
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_tile *tile, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+}
+
 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
 				  struct xe_vm *vm, size_t size,
 				  enum ttm_bo_type type, u32 flags)
 {
-	return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags);
+	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+}
+
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				u16 cpu_caching,
+				enum ttm_bo_type type,
+				u32 flags)
+{
+	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
+						 cpu_caching, type,
+						 flags | XE_BO_CREATE_USER_BIT);
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
 }
 
 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
@@ -1809,7 +1846,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	struct drm_xe_gem_create *args = data;
 	struct xe_vm *vm = NULL;
 	struct xe_bo *bo;
-	unsigned int bo_flags = XE_BO_CREATE_USER_BIT;
+	unsigned int bo_flags;
 	u32 handle;
 	int err;
 
@@ -1840,6 +1877,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
 		return -EINVAL;
 
+	bo_flags = 0;
 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
 		bo_flags |= XE_BO_DEFER_BACKING;
 
@@ -1855,6 +1893,18 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
 	}
 
+	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
+			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
+			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
+			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
+		return -EINVAL;
+
 	if (args->vm_id) {
 		vm = xe_vm_lookup(xef, args->vm_id);
 		if (XE_IOCTL_DBG(xe, !vm))
@@ -1864,8 +1914,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 			goto out_vm;
 	}
 
-	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
-			  bo_flags);
+	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
+			       ttm_bo_type_device, bo_flags);
 
 	if (vm)
 		xe_vm_unlock(vm);
@@ -2163,10 +2213,12 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
 			   page_size);
 
-	bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
-			  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
-			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
-			  XE_BO_NEEDS_CPU_ACCESS);
+	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
+			       DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device,
+			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
+			       XE_BO_NEEDS_CPU_ACCESS);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index f8bae873418dd..6f183568f76dc 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -94,10 +94,11 @@ struct sg_table;
 struct xe_bo *xe_bo_alloc(void);
 void xe_bo_free(struct xe_bo *bo);
 
-struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
-				    struct xe_tile *tile, struct dma_resv *resv,
-				    struct ttm_lru_bulk_move *bulk, size_t size,
-				    enum ttm_bo_type type, u32 flags);
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				     struct xe_tile *tile, struct dma_resv *resv,
+				     struct ttm_lru_bulk_move *bulk, size_t size,
+				     u16 cpu_caching, enum ttm_bo_type type,
+				     u32 flags);
 struct xe_bo *
 xe_bo_create_locked_range(struct xe_device *xe,
 			  struct xe_tile *tile, struct xe_vm *vm,
@@ -109,6 +110,11 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
 			   struct xe_vm *vm, size_t size,
 			   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				u16 cpu_caching,
+				enum ttm_bo_type type,
+				u32 flags);
 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
 				   struct xe_vm *vm, size_t size,
 				   enum ttm_bo_type type, u32 flags);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 4bff609961689..f71dbc5189586 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -79,6 +79,11 @@ struct xe_bo {
 	struct llist_node freed;
 	/** @created: Whether the bo has passed initial creation */
 	bool created;
+	/**
+	 * @cpu_caching: CPU caching mode. Currently only used for userspace
+	 * objects.
+	 */
+	u16 cpu_caching;
 };
 
 #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index cfde3be3b0dc9..64ed303728fda 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -214,8 +214,9 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
 	int ret;
 
 	dma_resv_lock(resv, NULL);
-	bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
-				   ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+	bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+				    0, /* Will require 1way or 2way for vm_bind */
+				    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
 	if (IS_ERR(bo)) {
 		ret = PTR_ERR(bo);
 		goto error;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 88f3aca02b085..ab7d1b26c7732 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -541,8 +541,25 @@ struct drm_xe_gem_create {
 	 */
 	__u32 handle;
 
+	/**
+	 * @cpu_caching: The CPU caching mode to select for this object. If
+	 * mmaping the object the mode selected here will also be used.
+	 *
+	 * Supported values:
+	 *
+	 * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back
+	 * caching.  On iGPU this can't be used for scanout surfaces. Currently
+	 * not allowed for objects placed in VRAM.
+	 *
+	 * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This
+	 * is uncached. Scanout surfaces should likely use this. All objects
+	 * that can be placed in VRAM must use this.
+	 */
+#define DRM_XE_GEM_CPU_CACHING_WB                      1
+#define DRM_XE_GEM_CPU_CACHING_WC                      2
+	__u16 cpu_caching;
 	/** @pad: MBZ */
-	__u32 pad;
+	__u16 pad;
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
-- 
GitLab


From f6a22e6862737e31d2c0693d2a4f986e71d32da6 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 17 Aug 2023 10:27:43 +0100
Subject: [PATCH 2131/2340] drm/xe/pat: annotate pat_index with coherency mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Future uapi needs to give userspace the ability to select the pat_index
for a given vm_bind. However we need to be able to extract the coherency
mode from the provided pat_index to ensure it's compatible with the
cpu_caching mode set at object creation. There are various security
reasons for why this matters.  However the pat_index itself is very
platform specific, so seems reasonable to annotate each platform
definition of the pat table.  On some older platforms there is no
explicit coherency mode, so we just pick whatever makes sense.

v2:
  - Simplify with COH_AT_LEAST_1_WAY
  - Add some kernel-doc
v3 (Matt Roper):
  - Some small tweaks
v4:
  - Rebase
v5:
  - Rebase on Xe2 PAT additions
v6:
  - Rebase on removal of coh_mode from uapi

Bspec: 45101, 44235 #xe
Bspec: 70552, 71582, 59400 #xe2
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Cc: Zhengguo Xu <zhengguo.xu@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  2 +-
 drivers/gpu/drm/xe/xe_pat.c          | 98 ++++++++++++++++------------
 drivers/gpu/drm/xe/xe_pat.h          | 31 ++++++++-
 3 files changed, 89 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 2712905c7a91c..ea53f9dfc199c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -368,7 +368,7 @@ struct xe_device {
 		/** Internal operations to abstract platforms */
 		const struct xe_pat_ops *ops;
 		/** PAT table to program in the HW */
-		const u32 *table;
+		const struct xe_pat_table_entry *table;
 		/** Number of PAT entries */
 		int n_entries;
 		u32 idx[__XE_CACHE_LEVEL_COUNT];
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 7c1078707aa0e..1892ff81086f4 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -5,6 +5,8 @@
 
 #include "xe_pat.h"
 
+#include <drm/xe_drm.h>
+
 #include "regs/xe_reg_defs.h"
 #include "xe_assert.h"
 #include "xe_device.h"
@@ -46,35 +48,37 @@
 static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" };
 
 struct xe_pat_ops {
-	void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries);
-	void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries);
+	void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+				 int n_entries);
+	void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			      int n_entries);
 	void (*dump)(struct xe_gt *gt, struct drm_printer *p);
 };
 
-static const u32 xelp_pat_table[] = {
-	[0] = XELP_PAT_WB,
-	[1] = XELP_PAT_WC,
-	[2] = XELP_PAT_WT,
-	[3] = XELP_PAT_UC,
+static const struct xe_pat_table_entry xelp_pat_table[] = {
+	[0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[1] = { XELP_PAT_WC, XE_COH_NONE },
+	[2] = { XELP_PAT_WT, XE_COH_NONE },
+	[3] = { XELP_PAT_UC, XE_COH_NONE },
 };
 
-static const u32 xehpc_pat_table[] = {
-	[0] = XELP_PAT_UC,
-	[1] = XELP_PAT_WC,
-	[2] = XELP_PAT_WT,
-	[3] = XELP_PAT_WB,
-	[4] = XEHPC_PAT_CLOS(1) | XELP_PAT_WT,
-	[5] = XEHPC_PAT_CLOS(1) | XELP_PAT_WB,
-	[6] = XEHPC_PAT_CLOS(2) | XELP_PAT_WT,
-	[7] = XEHPC_PAT_CLOS(2) | XELP_PAT_WB,
+static const struct xe_pat_table_entry xehpc_pat_table[] = {
+	[0] = { XELP_PAT_UC, XE_COH_NONE },
+	[1] = { XELP_PAT_WC, XE_COH_NONE },
+	[2] = { XELP_PAT_WT, XE_COH_NONE },
+	[3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE },
+	[5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE },
+	[7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
 };
 
-static const u32 xelpg_pat_table[] = {
-	[0] = XELPG_PAT_0_WB,
-	[1] = XELPG_PAT_1_WT,
-	[2] = XELPG_PAT_3_UC,
-	[3] = XELPG_PAT_0_WB | XELPG_2_COH_1W,
-	[4] = XELPG_PAT_0_WB | XELPG_3_COH_2W,
+static const struct xe_pat_table_entry xelpg_pat_table[] = {
+	[0] = { XELPG_PAT_0_WB, XE_COH_NONE },
+	[1] = { XELPG_PAT_1_WT, XE_COH_NONE },
+	[2] = { XELPG_PAT_3_UC, XE_COH_NONE },
+	[3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY },
+	[4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY },
 };
 
 /*
@@ -92,15 +96,18 @@ static const u32 xelpg_pat_table[] = {
  * coherency (which matches an all-0's encoding), so we can just omit them
  * in the table.
  */
-#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, coh_mode) \
-	(no_promote ? XE2_NO_PROMOTE : 0) | \
-	(comp_en ? XE2_COMP_EN : 0) | \
-	REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \
-	REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
-	REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
-	REG_FIELD_PREP(XE2_COH_MODE, coh_mode)
-
-static const u32 xe2_pat_table[] = {
+#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
+	{ \
+		.value = (no_promote ? XE2_NO_PROMOTE : 0) | \
+			(comp_en ? XE2_COMP_EN : 0) | \
+			REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \
+			REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
+			REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
+			REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
+		.coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+	}
+
+static const struct xe_pat_table_entry xe2_pat_table[] = {
 	[ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ),
 	[ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ),
 	[ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ),
@@ -133,23 +140,31 @@ static const u32 xe2_pat_table[] = {
 };
 
 /* Special PAT values programmed outside the main table */
-#define XE2_PAT_ATS	XE2_PAT( 0, 0, 0, 0, 3, 3 )
+static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 );
+
+u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
+{
+	WARN_ON(pat_index >= xe->pat.n_entries);
+	return xe->pat.table[pat_index].coh_mode;
+}
 
-static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
+static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			int n_entries)
 {
 	for (int i = 0; i < n_entries; i++) {
 		struct xe_reg reg = XE_REG(_PAT_INDEX(i));
 
-		xe_mmio_write32(gt, reg, table[i]);
+		xe_mmio_write32(gt, reg, table[i].value);
 	}
 }
 
-static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries)
+static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			    int n_entries)
 {
 	for (int i = 0; i < n_entries; i++) {
 		struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i));
 
-		xe_gt_mcr_multicast_write(gt, reg_mcr, table[i]);
+		xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value);
 	}
 }
 
@@ -289,16 +304,18 @@ static const struct xe_pat_ops xelpg_pat_ops = {
 	.dump = xelpg_dump,
 };
 
-static void xe2lpg_program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
+static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			       int n_entries)
 {
 	program_pat_mcr(gt, table, n_entries);
-	xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), XE2_PAT_ATS);
+	xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value);
 }
 
-static void xe2lpm_program_pat(struct xe_gt *gt, const u32 table[], int n_entries)
+static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			       int n_entries)
 {
 	program_pat(gt, table, n_entries);
-	xe_mmio_write32(gt, XE_REG(_PAT_ATS), XE2_PAT_ATS);
+	xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
 }
 
 static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
@@ -396,6 +413,7 @@ void xe_pat_init_early(struct xe_device *xe)
 		xe->pat.idx[XE_CACHE_WT] = 2;
 		xe->pat.idx[XE_CACHE_WB] = 0;
 	} else if (GRAPHICS_VERx100(xe) <= 1210) {
+		WARN_ON_ONCE(!IS_DGFX(xe) && !xe->info.has_llc);
 		xe->pat.ops = &xelp_pat_ops;
 		xe->pat.table = xelp_pat_table;
 		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index 09c491ab9f15d..fa0dfbe525cd6 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -6,9 +6,30 @@
 #ifndef _XE_PAT_H_
 #define _XE_PAT_H_
 
+#include <linux/types.h>
+
 struct drm_printer;
-struct xe_gt;
 struct xe_device;
+struct xe_gt;
+
+/**
+ * struct xe_pat_table_entry - The pat_index encoding and other meta information.
+ */
+struct xe_pat_table_entry {
+	/**
+	 * @value: The platform specific value encoding the various memory
+	 * attributes (this maps to some fixed pat_index). So things like
+	 * caching, coherency, compression etc can be encoded here.
+	 */
+	u32 value;
+
+	/**
+	 * @coh_mode: The GPU coherency mode that @value maps to.
+	 */
+#define XE_COH_NONE          1
+#define XE_COH_AT_LEAST_1WAY 2
+	u16 coh_mode;
+};
 
 /**
  * xe_pat_init_early - SW initialization, setting up data based on device
@@ -29,4 +50,12 @@ void xe_pat_init(struct xe_gt *gt);
  */
 void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
 
+/**
+ * xe_pat_index_get_coh_mode - Extract the coherency mode for the given
+ * pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ */
+u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
+
 #endif
-- 
GitLab


From e1fbc4f18d5b4405271e964670b9b054c4397127 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 25 Sep 2023 12:42:18 +0100
Subject: [PATCH 2132/2340] drm/xe/uapi: support pat_index selection with
 vm_bind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow userspace to directly control the pat_index for a given vm
binding. This should allow directly controlling the coherency, caching
behaviour, compression and potentially other stuff in the future for the
ppGTT binding.

The exact meaning behind the pat_index is very platform specific (see
BSpec or PRMs) but effectively maps to some predefined memory
attributes. From the KMD pov we only care about the coherency that is
provided by the pat_index, which falls into either NONE, 1WAY or 2WAY.
The vm_bind coherency mode for the given pat_index needs to be at least
1way coherent when using cpu_caching with DRM_XE_GEM_CPU_CACHING_WB. For
platforms that lack the explicit coherency mode attribute, we treat
UC/WT/WC as NONE and WB as AT_LEAST_1WAY.

For userptr mappings we lack a corresponding gem object, so the expected
coherency mode is instead implicit and must fall into either 1WAY or
2WAY. Trying to use NONE will be rejected by the kernel. For imported
dma-buf (from a different device) the coherency mode is also implicit
and must also be either 1WAY or 2WAY.

v2:
  - Undefined coh_mode(pat_index) can now be treated as programmer
    error. (Matt Roper)
  - We now allow gem_create.coh_mode <= coh_mode(pat_index), rather than
    having to match exactly. This ensures imported dma-buf can always
    just use 1way (or even 2way), now that we also bundle 1way/2way into
    at_least_1way. We still require 1way/2way for external dma-buf, but
    the policy can now be the same for self-import, if desired.
  - Use u16 for pat_index in uapi. u32 is massive overkill. (José)
  - Move as much of the pat_index validation as we can into
    vm_bind_ioctl_check_args. (José)
v3 (Matt Roper):
  - Split the pte_encode() refactoring into separate patch.
v4:
  - Rebase
v5:
  - Check for and reject !coh_mode which would indicate hw reserved
    pat_index on xe2.
v6:
  - Rebase on removal of coh_mode from uapi. We just need to reject
    cpu_caching=wb + pat_index with coh_none.

Testcase: igt@xe_pat
Bspec: 45101, 44235 #xe
Bspec: 70552, 71582, 59400 #xe2
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Cc: Zhengguo Xu <zhengguo.xu@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Tested-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Zhengguo Xu <zhengguo.xu@intel.com>
Acked-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c       | 11 +-----
 drivers/gpu/drm/xe/xe_vm.c       | 67 ++++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_vm_types.h |  7 ++++
 include/uapi/drm/xe_drm.h        | 48 ++++++++++++++++++++++-
 4 files changed, 115 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c6c9b723db5ac..3b485313804aa 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -290,8 +290,6 @@ struct xe_pt_stage_bind_walk {
 	struct xe_vm *vm;
 	/** @tile: The tile we're building for. */
 	struct xe_tile *tile;
-	/** @cache: Desired cache level for the ptes */
-	enum xe_cache_level cache;
 	/** @default_pte: PTE flag only template. No address is associated */
 	u64 default_pte;
 	/** @dma_offset: DMA offset to add to the PTE. */
@@ -511,7 +509,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 {
 	struct xe_pt_stage_bind_walk *xe_walk =
 		container_of(walk, typeof(*xe_walk), base);
-	u16 pat_index = tile_to_xe(xe_walk->tile)->pat.idx[xe_walk->cache];
+	u16 pat_index = xe_walk->vma->pat_index;
 	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
 	struct xe_vm *vm = xe_walk->vm;
 	struct xe_pt *xe_child;
@@ -657,13 +655,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 	if (is_devmem) {
 		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
 		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
-		xe_walk.cache = XE_CACHE_WB;
-	} else {
-		if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT)
-			xe_walk.cache = XE_CACHE_WT;
-		else
-			xe_walk.cache = XE_CACHE_WB;
 	}
+
 	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
 		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c33ae4db4e02a..a97a310123fcb 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -6,6 +6,7 @@
 #include "xe_vm.h"
 
 #include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
 
 #include <drm/drm_exec.h>
 #include <drm/drm_print.h>
@@ -26,6 +27,7 @@
 #include "xe_gt_pagefault.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_migrate.h"
+#include "xe_pat.h"
 #include "xe_pm.h"
 #include "xe_preempt_fence.h"
 #include "xe_pt.h"
@@ -868,7 +870,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    u64 start, u64 end,
 				    bool read_only,
 				    bool is_null,
-				    u8 tile_mask)
+				    u8 tile_mask,
+				    u16 pat_index)
 {
 	struct xe_vma *vma;
 	struct xe_tile *tile;
@@ -910,6 +913,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
 
+	vma->pat_index = pat_index;
+
 	if (bo) {
 		struct drm_gpuvm_bo *vm_bo;
 
@@ -2162,7 +2167,7 @@ static struct drm_gpuva_ops *
 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			 u64 bo_offset_or_userptr, u64 addr, u64 range,
 			 u32 operation, u32 flags, u8 tile_mask,
-			 u32 prefetch_region)
+			 u32 prefetch_region, u16 pat_index)
 {
 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
 	struct drm_gpuva_ops *ops;
@@ -2231,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			op->map.read_only =
 				flags & DRM_XE_VM_BIND_FLAG_READONLY;
 			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+			op->map.pat_index = pat_index;
 		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
 			op->prefetch.region = prefetch_region;
 		}
@@ -2242,7 +2248,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 }
 
 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
-			      u8 tile_mask, bool read_only, bool is_null)
+			      u8 tile_mask, bool read_only, bool is_null,
+			      u16 pat_index)
 {
 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
 	struct xe_vma *vma;
@@ -2258,7 +2265,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 	vma = xe_vma_create(vm, bo, op->gem.offset,
 			    op->va.addr, op->va.addr +
 			    op->va.range - 1, read_only, is_null,
-			    tile_mask);
+			    tile_mask, pat_index);
 	if (bo)
 		xe_bo_unlock(bo);
 
@@ -2404,7 +2411,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 
 			vma = new_vma(vm, &op->base.map,
 				      op->tile_mask, op->map.read_only,
-				      op->map.is_null);
+				      op->map.is_null, op->map.pat_index);
 			if (IS_ERR(vma))
 				return PTR_ERR(vma);
 
@@ -2430,7 +2437,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 
 				vma = new_vma(vm, op->base.remap.prev,
 					      op->tile_mask, read_only,
-					      is_null);
+					      is_null, old->pat_index);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
 
@@ -2464,7 +2471,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 
 				vma = new_vma(vm, op->base.remap.next,
 					      op->tile_mask, read_only,
-					      is_null);
+					      is_null, old->pat_index);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
 
@@ -2862,6 +2869,26 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		u64 obj_offset = (*bind_ops)[i].obj_offset;
 		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
 		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+		u16 pat_index = (*bind_ops)[i].pat_index;
+		u16 coh_mode;
+
+		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+		(*bind_ops)[i].pat_index = pat_index;
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
 
 		if (i == 0) {
 			*async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC);
@@ -2892,6 +2919,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
 		    XE_IOCTL_DBG(xe, obj &&
 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
 		    XE_IOCTL_DBG(xe, obj &&
 				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, prefetch_region &&
@@ -3025,6 +3054,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 addr = bind_ops[i].addr;
 		u32 obj = bind_ops[i].obj;
 		u64 obj_offset = bind_ops[i].obj_offset;
+		u16 pat_index = bind_ops[i].pat_index;
+		u16 coh_mode;
 
 		if (!obj)
 			continue;
@@ -3052,6 +3083,24 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 				goto put_obj;
 			}
 		}
+
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (bos[i]->cpu_caching) {
+			if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+					 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+			/*
+			 * Imported dma-buf from a different device should
+			 * require 1way or 2way coherency since we don't know
+			 * how it was mapped on the CPU. Just assume is it
+			 * potentially cached on CPU side.
+			 */
+			err = -EINVAL;
+			goto put_obj;
+		}
 	}
 
 	if (args->num_syncs) {
@@ -3079,10 +3128,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u64 obj_offset = bind_ops[i].obj_offset;
 		u8 tile_mask = bind_ops[i].tile_mask;
 		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+		u16 pat_index = bind_ops[i].pat_index;
 
 		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
 						  addr, range, op, flags,
-						  tile_mask, prefetch_region);
+						  tile_mask, prefetch_region,
+						  pat_index);
 		if (IS_ERR(ops[i])) {
 			err = PTR_ERR(ops[i]);
 			ops[i] = NULL;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index fc2645e07578d..74cdf16a42adb 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -110,6 +110,11 @@ struct xe_vma {
 	 */
 	u8 tile_present;
 
+	/**
+	 * @pat_index: The pat index to use when encoding the PTEs for this vma.
+	 */
+	u16 pat_index;
+
 	struct {
 		struct list_head rebind_link;
 	} notifier;
@@ -333,6 +338,8 @@ struct xe_vma_op_map {
 	bool read_only;
 	/** @is_null: is NULL binding */
 	bool is_null;
+	/** @pat_index: The pat index to use for this operation. */
+	u16 pat_index;
 };
 
 /** struct xe_vma_op_remap - VMA remap operation */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index ab7d1b26c7732..1a844fa7af8a8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -636,8 +636,54 @@ struct drm_xe_vm_bind_op {
 	 */
 	__u32 obj;
 
+	/**
+	 * @pat_index: The platform defined @pat_index to use for this mapping.
+	 * The index basically maps to some predefined memory attributes,
+	 * including things like caching, coherency, compression etc.  The exact
+	 * meaning of the pat_index is platform specific and defined in the
+	 * Bspec and PRMs.  When the KMD sets up the binding the index here is
+	 * encoded into the ppGTT PTE.
+	 *
+	 * For coherency the @pat_index needs to be at least 1way coherent when
+	 * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD
+	 * will extract the coherency mode from the @pat_index and reject if
+	 * there is a mismatch (see note below for pre-MTL platforms).
+	 *
+	 * Note: On pre-MTL platforms there is only a caching mode and no
+	 * explicit coherency mode, but on such hardware there is always a
+	 * shared-LLC (or is dgpu) so all GT memory accesses are coherent with
+	 * CPU caches even with the caching mode set as uncached.  It's only the
+	 * display engine that is incoherent (on dgpu it must be in VRAM which
+	 * is always mapped as WC on the CPU). However to keep the uapi somewhat
+	 * consistent with newer platforms the KMD groups the different cache
+	 * levels into the following coherency buckets on all pre-MTL platforms:
+	 *
+	 *	ppGTT UC -> COH_NONE
+	 *	ppGTT WC -> COH_NONE
+	 *	ppGTT WT -> COH_NONE
+	 *	ppGTT WB -> COH_AT_LEAST_1WAY
+	 *
+	 * In practice UC/WC/WT should only ever used for scanout surfaces on
+	 * such platforms (or perhaps in general for dma-buf if shared with
+	 * another device) since it is only the display engine that is actually
+	 * incoherent.  Everything else should typically use WB given that we
+	 * have a shared-LLC.  On MTL+ this completely changes and the HW
+	 * defines the coherency mode as part of the @pat_index, where
+	 * incoherent GT access is possible.
+	 *
+	 * Note: For userptr and externally imported dma-buf the kernel expects
+	 * either 1WAY or 2WAY for the @pat_index.
+	 *
+	 * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions
+	 * on the @pat_index. For such mappings there is no actual memory being
+	 * mapped (the address in the PTE is invalid), so the various PAT memory
+	 * attributes likely do not apply.  Simply leaving as zero is one
+	 * option (still a valid pat_index).
+	 */
+	__u16 pat_index;
+
 	/** @pad: MBZ */
-	__u32 pad;
+	__u16 pad;
 
 	union {
 		/**
-- 
GitLab


From 473b62763b76e8bb0793ac5f030779c43ecd79e6 Mon Sep 17 00:00:00 2001
From: Fei Yang <fei.yang@intel.com>
Date: Wed, 22 Nov 2023 12:45:01 -0800
Subject: [PATCH 2133/2340] drm/xe: explicitly set GGTT access for GuC DMA

Confirmed with hardware that setting GGTT memory access for GuC
firmware loading is correct for all platforms and required for
new platforms going forward.

Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231122204501.1353325-2-fei.yang@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_guc_regs.h | 1 +
 drivers/gpu/drm/xe/xe_uc_fw.c         | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index ba375fc51a87a..92320bbc9d3d9 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -70,6 +70,7 @@
 #define DMA_ADDR_1_HIGH				XE_REG(0xc30c)
 #define   DMA_ADDR_SPACE_MASK			REG_GENMASK(20, 16)
 #define   DMA_ADDRESS_SPACE_WOPCM		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
+#define   DMA_ADDRESS_SPACE_GGTT		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 8)
 #define DMA_COPY_SIZE				XE_REG(0xc310)
 #define DMA_CTRL				XE_REG(0xc314)
 #define   HUC_UKERNEL				REG_BIT(9)
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2fcec528a1d14..e806e7b6ae424 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -746,7 +746,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 	/* Set the source address for the uCode */
 	src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
 	xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
-	xe_mmio_write32(gt, DMA_ADDR_0_HIGH, upper_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_HIGH,
+			upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT);
 
 	/* Set the DMA destination */
 	xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
-- 
GitLab


From e784f352f8a1142065a738f544a6566c873d73f6 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 21:32:01 +0100
Subject: [PATCH 2134/2340] drm/xe/guc: Drop ancient GuC CTB definitions

Those definitions were applicable for old GuC firmwares only.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/741
Link: https://lore.kernel.org/r/20231128203203.1147-1-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../drm/xe/abi/guc_communication_ctb_abi.h    | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 41244055cc0c7..d34f91002789a 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -165,25 +165,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
  * - **flags**, holds various bits to control message handling
  */
 
-/*
- * Definition of the command transport message header (DW0)
- *
- * bit[4..0]	message len (in dwords)
- * bit[7..5]	reserved
- * bit[8]	response (G2H only)
- * bit[8]	write fence to desc (H2G only)
- * bit[9]	write status to H2G buff (H2G only)
- * bit[10]	send status back via G2H (H2G only)
- * bit[15..11]	reserved
- * bit[31..16]	action code
- */
-#define GUC_CT_MSG_LEN_SHIFT			0
-#define GUC_CT_MSG_LEN_MASK			0x1F
-#define GUC_CT_MSG_IS_RESPONSE			(1 << 8)
-#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
-#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
-#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
-#define GUC_CT_MSG_ACTION_SHIFT			16
-#define GUC_CT_MSG_ACTION_MASK			0xFFFF
-
 #endif
-- 
GitLab


From 0a39ad21796f2f67b7d384c0f0ec0ac901f76519 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 21:32:02 +0100
Subject: [PATCH 2135/2340] drm/xe/guc: Remove obsolete GuC CTB documentation

Refer to already described CTB Descriptor and CTB HXG Message.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231128203203.1147-2-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../drm/xe/abi/guc_communication_ctb_abi.h    | 45 +------------------
 1 file changed, 2 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index d34f91002789a..3b83f907ece46 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -120,49 +120,8 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
  * is based on u32 data stream written to the shared buffer. One buffer can
  * be used to transmit data only in one direction (one-directional channel).
  *
- * Current status of the each buffer is stored in the buffer descriptor.
- * Buffer descriptor holds tail and head fields that represents active data
- * stream. The tail field is updated by the data producer (sender), and head
- * field is updated by the data consumer (receiver)::
- *
- *      +------------+
- *      | DESCRIPTOR |          +=================+============+========+
- *      +============+          |                 | MESSAGE(s) |        |
- *      | address    |--------->+=================+============+========+
- *      +------------+
- *      | head       |          ^-----head--------^
- *      +------------+
- *      | tail       |          ^---------tail-----------------^
- *      +------------+
- *      | size       |          ^---------------size--------------------^
- *      +------------+
- *
- * Each message in data stream starts with the single u32 treated as a header,
- * followed by optional set of u32 data that makes message specific payload::
- *
- *      +------------+---------+---------+---------+
- *      |         MESSAGE                          |
- *      +------------+---------+---------+---------+
- *      |   msg[0]   |   [1]   |   ...   |  [n-1]  |
- *      +------------+---------+---------+---------+
- *      |   MESSAGE  |       MESSAGE PAYLOAD       |
- *      +   HEADER   +---------+---------+---------+
- *      |            |    0    |   ...   |    n    |
- *      +======+=====+=========+=========+=========+
- *      | 31:16| code|         |         |         |
- *      +------+-----+         |         |         |
- *      |  15:5|flags|         |         |         |
- *      +------+-----+         |         |         |
- *      |   4:0|  len|         |         |         |
- *      +------+-----+---------+---------+---------+
- *
- *                   ^-------------len-------------^
- *
- * The message header consists of:
- *
- * - **len**, indicates length of the message payload (in u32)
- * - **code**, indicates message code
- * - **flags**, holds various bits to control message handling
+ * Current status of the each buffer is maintained in the `CTB Descriptor`_.
+ * Each message in data stream is encoded as `CTB HXG Message`_.
  */
 
 #endif
-- 
GitLab


From b67cb798e4227d312fd221deb6a3f0b88b51fc6b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 21:32:03 +0100
Subject: [PATCH 2136/2340] drm/xe/guc: Include only required GuC ABI headers

On i915 we were adding new GuC ABI headers directly to guc_fwif.h
file since we were replacing old definitions from that file.

On xe driver we could do more and better by including ABI headers
only in files that need those definitions.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/741
Cc: Jani Nikula <jani.nikula@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231128203203.1147-3-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c        | 1 +
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 1 +
 drivers/gpu/drm/xe/xe_guc.c                 | 2 ++
 drivers/gpu/drm/xe/xe_guc_ct.c              | 2 ++
 drivers/gpu/drm/xe/xe_guc_fwif.h            | 6 ------
 drivers/gpu/drm/xe/xe_guc_hwconfig.c        | 1 +
 drivers/gpu/drm/xe/xe_guc_pc.c              | 2 ++
 drivers/gpu/drm/xe/xe_guc_submit.c          | 2 ++
 8 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 1e083dda06798..0a278df6a97f2 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -12,6 +12,7 @@
 #include <drm/drm_managed.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 
+#include "abi/guc_actions_abi.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
 #include "xe_gt_tlb_invalidation.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index a28f31c05b1b0..7eef23a00d77e 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -5,6 +5,7 @@
 
 #include "xe_gt_tlb_invalidation.h"
 
+#include "abi/guc_actions_abi.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 08142d8ee0523..e6f680efb29ef 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -7,6 +7,8 @@
 
 #include <drm/drm_managed.h>
 
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_errors_abi.h"
 #include "generated/xe_wa_oob.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index c44e750746958..6295d916e39f6 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -11,6 +11,8 @@
 
 #include <drm/drm_managed.h>
 
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_klvs_abi.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 4216a6d9e4787..4dd5a88a78265 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -8,13 +8,7 @@
 
 #include <linux/bits.h>
 
-#include "abi/guc_actions_abi.h"
-#include "abi/guc_actions_slpc_abi.h"
-#include "abi/guc_communication_ctb_abi.h"
-#include "abi/guc_communication_mmio_abi.h"
-#include "abi/guc_errors_abi.h"
 #include "abi/guc_klvs_abi.h"
-#include "abi/guc_messages_abi.h"
 
 #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET	4
 #define G2H_LEN_DW_DEREGISTER_CONTEXT		3
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 57d325ec8ce32..98bb9bb307051 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "abi/guc_actions_abi.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index e9dd6c3d750bd..2919c6aea4030 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -9,6 +9,8 @@
 
 #include <drm/drm_managed.h>
 
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_slpc_abi.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 32c234d753fdd..ad5e19ecd33c9 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -13,6 +13,8 @@
 
 #include <drm/drm_managed.h>
 
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_klvs_abi.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_assert.h"
 #include "xe_devcoredump.h"
-- 
GitLab


From 8cdcef1c2f82d207aa8b2a02298fbc17191c6261 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 15 Nov 2023 12:29:21 +0100
Subject: [PATCH 2137/2340] drm/xe/doc: Include documentation about xe_assert()

Our xe_assert() macros are well documented.
Include that in master documentation.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231115112921.1905-1-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 Documentation/gpu/xe/index.rst        | 1 +
 Documentation/gpu/xe/xe_debugging.rst | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 Documentation/gpu/xe/xe_debugging.rst

diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index 5c4d6bb370f38..c224ecaee81e1 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -22,3 +22,4 @@ DG2, etc is provided to prototype the driver.
    xe_rtp
    xe_firmware
    xe_tile
+   xe_debugging
diff --git a/Documentation/gpu/xe/xe_debugging.rst b/Documentation/gpu/xe/xe_debugging.rst
new file mode 100644
index 0000000000000..d65e56ff35003
--- /dev/null
+++ b/Documentation/gpu/xe/xe_debugging.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Debugging
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_assert.h
-- 
GitLab


From c550f64f082b9da305ab7d07b8716389a80b641a Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Fri, 17 Nov 2023 18:06:16 -0800
Subject: [PATCH 2138/2340] drm/xe: Rename xe_gt_idle_sysfs to xe_gt_idle

Prep this file to contain C6 toggling as well instead
of just sysfs related stuff.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                               | 2 +-
 drivers/gpu/drm/xe/xe_gt.c                                | 2 +-
 drivers/gpu/drm/xe/{xe_gt_idle_sysfs.c => xe_gt_idle.c}   | 4 ++--
 drivers/gpu/drm/xe/{xe_gt_idle_sysfs.h => xe_gt_idle.h}   | 8 ++++----
 .../xe/{xe_gt_idle_sysfs_types.h => xe_gt_idle_types.h}   | 0
 drivers/gpu/drm/xe/xe_gt_types.h                          | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)
 rename drivers/gpu/drm/xe/{xe_gt_idle_sysfs.c => xe_gt_idle.c} (98%)
 rename drivers/gpu/drm/xe/{xe_gt_idle_sysfs.h => xe_gt_idle.h} (52%)
 rename drivers/gpu/drm/xe/{xe_gt_idle_sysfs_types.h => xe_gt_idle_types.h} (100%)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 161e8ead9114b..b8ad42fcbea2a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -72,7 +72,7 @@ xe-y += xe_bb.o \
 	xe_gt.o \
 	xe_gt_clock.o \
 	xe_gt_debugfs.o \
-	xe_gt_idle_sysfs.o \
+	xe_gt_idle.o \
 	xe_gt_mcr.o \
 	xe_gt_pagefault.o \
 	xe_gt_sysfs.o \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 00193b02a7e54..8a6fb9641cd69 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -23,7 +23,7 @@
 #include "xe_ggtt.h"
 #include "xe_gsc.h"
 #include "xe_gt_clock.h"
-#include "xe_gt_idle_sysfs.h"
+#include "xe_gt_idle.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_printk.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle.c
similarity index 98%
rename from drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
rename to drivers/gpu/drm/xe/xe_gt_idle.c
index 8df9840811cda..e5b7e5d38e76c 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -7,14 +7,14 @@
 
 #include "xe_device.h"
 #include "xe_gt.h"
-#include "xe_gt_idle_sysfs.h"
+#include "xe_gt_idle.h"
 #include "xe_gt_sysfs.h"
 #include "xe_guc_pc.h"
 
 /**
  * DOC: Xe GT Idle
  *
- * Provides sysfs entries for idle properties of GT
+ * Contains functions that init GT idle features like C6
  *
  * device/gt#/gtidle/name - name of the state
  * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_idle.h
similarity index 52%
rename from drivers/gpu/drm/xe/xe_gt_idle_sysfs.h
rename to drivers/gpu/drm/xe/xe_gt_idle.h
index b0973f96c7abe..9b36bf7db3a75 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -3,11 +3,11 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#ifndef _XE_GT_IDLE_SYSFS_H_
-#define _XE_GT_IDLE_SYSFS_H_
+#ifndef _XE_GT_IDLE_H_
+#define _XE_GT_IDLE_H_
 
-#include "xe_gt_idle_sysfs_types.h"
+#include "xe_gt_idle_types.h"
 
 void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
 
-#endif /* _XE_GT_IDLE_SYSFS_H_ */
+#endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
similarity index 100%
rename from drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h
rename to drivers/gpu/drm/xe/xe_gt_idle_types.h
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index d3f2793684e2d..a96ee7d028aa0 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -7,7 +7,7 @@
 #define _XE_GT_TYPES_H_
 
 #include "xe_force_wake_types.h"
-#include "xe_gt_idle_sysfs_types.h"
+#include "xe_gt_idle_types.h"
 #include "xe_hw_engine_types.h"
 #include "xe_hw_fence_types.h"
 #include "xe_reg_sr_types.h"
-- 
GitLab


From f1cb5f647e8959a1034941d85b311d7485a7095f Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Fri, 17 Nov 2023 16:02:01 -0800
Subject: [PATCH 2139/2340] drm/xe: Add skip_guc_pc flag

This flag can be used to disable GuC based power management. This
could be used for debug or comparison to host based C6.

v2: Fix missing definition

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index ea53f9dfc199c..c4b6aa8fcec17 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -264,6 +264,8 @@ struct xe_device {
 		u8 supports_mmio_ext:1;
 		/** @has_heci_gscfi: device has heci gscfi */
 		u8 has_heci_gscfi:1;
+		/** @skip_guc_pc: Skip GuC based PM feature init */
+		u8 skip_guc_pc:1;
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 		struct {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index bbf2f5b64bac8..1c4f9081e84c2 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -64,6 +64,7 @@ struct xe_device_desc {
 	u8 has_sriov:1;
 	u8 bypass_mtcfg:1;
 	u8 supports_mmio_ext:1;
+	u8 skip_guc_pc:1;
 };
 
 __diag_push();
@@ -591,6 +592,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.has_asid = graphics_desc->has_asid;
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
+	xe->info.skip_guc_pc = desc->skip_guc_pc;
 
 	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
 				  xe_modparam.enable_display &&
-- 
GitLab


From 975e4a3795d4f1373be538177525c0b714e0e65e Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Fri, 17 Nov 2023 16:14:49 -0800
Subject: [PATCH 2140/2340] drm/xe: Manually setup C6 when skip_guc_pc is set

Skip the init/start/stop GuC PC functions and toggle C6 using
register writes instead. Also request max possible frequency
as dynamic freq management is disabled.

v2: Fix compile warning

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  4 ++++
 drivers/gpu/drm/xe/xe_gt_idle.c      | 24 +++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_idle.h      |  4 ++++
 drivers/gpu/drm/xe/xe_guc_pc.c       | 35 +++++++++++++++++++++++++---
 4 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 18b13224480d2..d318ec0efd7db 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -272,7 +272,11 @@
 #define   RPSWCTL_ENABLE			REG_FIELD_PREP(RPSWCTL_MASK, 2)
 #define   RPSWCTL_DISABLE			REG_FIELD_PREP(RPSWCTL_MASK, 0)
 #define RC_CONTROL				XE_REG(0xa090)
+#define   RC_CTL_HW_ENABLE			REG_BIT(31)
+#define   RC_CTL_TO_MODE			REG_BIT(28)
+#define   RC_CTL_RC6_ENABLE			REG_BIT(18)
 #define RC_STATE				XE_REG(0xa094)
+#define RC_IDLE_HYSTERSIS			XE_REG(0xa0ac)
 
 #define PMINTRMSK				XE_REG(0xa168)
 #define   PMINTR_DISABLE_REDIRECT_TO_GUC	REG_BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index e5b7e5d38e76c..9358f73368896 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -10,6 +10,8 @@
 #include "xe_gt_idle.h"
 #include "xe_gt_sysfs.h"
 #include "xe_guc_pc.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_mmio.h"
 
 /**
  * DOC: Xe GT Idle
@@ -166,3 +168,25 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
 			 __func__, err);
 }
+
+void xe_gt_idle_enable_c6(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Units of 1280 ns for a total of 5s */
+	xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA);
+	/* Enable RC6 */
+	xe_mmio_write32(gt, RC_CONTROL,
+			RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
+}
+
+void xe_gt_idle_disable_c6(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+
+	xe_mmio_write32(gt, PG_ENABLE, 0);
+	xe_mmio_write32(gt, RC_CONTROL, 0);
+	xe_mmio_write32(gt, RC_STATE, 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 9b36bf7db3a75..69280fd16b035 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -8,6 +8,10 @@
 
 #include "xe_gt_idle_types.h"
 
+struct xe_gt;
+
 void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+void xe_gt_idle_enable_c6(struct xe_gt *gt);
+void xe_gt_idle_disable_c6(struct xe_gt *gt);
 
 #endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 2919c6aea4030..1943893a3fd7d 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -16,6 +16,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_idle.h"
 #include "xe_gt_sysfs.h"
 #include "xe_gt_types.h"
 #include "xe_guc_ct.h"
@@ -869,13 +870,24 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 
-	memset(pc->bo->vmap.vaddr, 0, size);
-	slpc_shared_data_write(pc, header.size, size);
-
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
 		goto out_fail_force_wake;
 
+	if (xe->info.skip_guc_pc) {
+		if (xe->info.platform != XE_PVC)
+			xe_gt_idle_enable_c6(gt);
+
+		/* Request max possible since dynamic freq mgmt is not enabled */
+		pc_set_cur_freq(pc, UINT_MAX);
+
+		ret = 0;
+		goto out;
+	}
+
+	memset(pc->bo->vmap.vaddr, 0, size);
+	slpc_shared_data_write(pc, header.size, size);
+
 	ret = pc_action_reset(pc);
 	if (ret)
 		goto out;
@@ -911,10 +923,17 @@ out_fail_force_wake:
  */
 int xe_guc_pc_stop(struct xe_guc_pc *pc)
 {
+	struct xe_device *xe = pc_to_xe(pc);
 	int ret;
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 
+	if (xe->info.skip_guc_pc) {
+		xe_gt_idle_disable_c6(pc_to_gt(pc));
+		ret = 0;
+		goto out;
+	}
+
 	mutex_lock(&pc->freq_lock);
 	pc->freq_ready = false;
 	mutex_unlock(&pc->freq_lock);
@@ -935,6 +954,13 @@ out:
 
 void xe_guc_pc_fini(struct xe_guc_pc *pc)
 {
+	struct xe_device *xe = pc_to_xe(pc);
+
+	if (xe->info.skip_guc_pc) {
+		xe_gt_idle_disable_c6(pc_to_gt(pc));
+		return;
+	}
+
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
 	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
@@ -955,6 +981,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
 	int err;
 
+	if (xe->info.skip_guc_pc)
+		return 0;
+
 	mutex_init(&pc->freq_lock);
 
 	bo = xe_bo_create_pin_map(xe, tile, NULL, size,
-- 
GitLab


From 8e35780233cee1b2d257e6adf4d82b08ded15e88 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 29 Nov 2023 10:37:07 +0000
Subject: [PATCH 2141/2340] drm/xe/mocs: update MOCS table for xe2

Looks like there were some changes at some point here for preferring L4
uncached for some of the indexes. Triple checked the PAT settings also,
but that looks all correct as per current BSpec.

BSpec: 71582
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 12a6d39fcd4a4..ef79552e4f2fe 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -366,9 +366,9 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = {
 
 static const struct xe_mocs_entry xe2_mocs_table[] = {
 	/* Defer to PAT */
-	MOCS_ENTRY(0, XE2_L3_0_WB | L4_0_WB, 0),
-	/* Cached L3 + L4 */
-	MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
+	MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
+	/* Cached L3, Uncached L4 */
+	MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0),
 	/* Uncached L3, Cached L4 */
 	MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
 	/* Uncached L3 + L4 */
@@ -390,8 +390,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->table = xe2_mocs_table;
 		info->n_entries = XE2_NUM_MOCS_ENTRIES;
 		info->uc_index = 3;
-		info->wb_index = 1;
-		info->unused_entries_index = 1;
+		info->wb_index = 4;
+		info->unused_entries_index = 4;
 		break;
 	case XE_PVC:
 		info->size = ARRAY_SIZE(pvc_mocs_desc);
-- 
GitLab


From 082802a3ee09e764bc1513988d6f5889712fe88f Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Tue, 28 Nov 2023 18:53:15 +0200
Subject: [PATCH 2142/2340] drm/xe: add skip_pcode flag

Per device, set this flag to enable access to the PCODE uC or to skip it.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 2 ++
 drivers/gpu/drm/xe/xe_pcode.c        | 9 +++++++++
 3 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c4b6aa8fcec17..1a95b1587c86b 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -260,6 +260,8 @@ struct xe_device {
 		u8 enable_display:1;
 		/** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */
 		u8 bypass_mtcfg:1;
+		/** @skip_pcode: skip access to PCODE uC */
+		u8 skip_pcode:1;
 		/** @supports_mmio_ext: supports MMIO extension/s */
 		u8 supports_mmio_ext:1;
 		/** @has_heci_gscfi: device has heci gscfi */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1c4f9081e84c2..57eecaac53b53 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -63,6 +63,7 @@ struct xe_device_desc {
 	u8 has_llc:1;
 	u8 has_sriov:1;
 	u8 bypass_mtcfg:1;
+	u8 skip_pcode:1;
 	u8 supports_mmio_ext:1;
 	u8 skip_guc_pc:1;
 };
@@ -581,6 +582,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.has_llc = desc->has_llc;
 	xe->info.has_sriov = desc->has_sriov;
 	xe->info.bypass_mtcfg = desc->bypass_mtcfg;
+	xe->info.skip_pcode = desc->skip_pcode;
 	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
 	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
 
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index 4a240acf76252..b324dc2a5debe 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -61,6 +61,9 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
 {
 	int err;
 
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
 	lockdep_assert_held(&gt->pcode.lock);
 
 	if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
@@ -249,6 +252,9 @@ int xe_pcode_init(struct xe_gt *gt)
 	int timeout_us = 180000000; /* 3 min */
 	int ret;
 
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
 	if (!IS_DGFX(gt_to_xe(gt)))
 		return 0;
 
@@ -280,6 +286,9 @@ int xe_pcode_probe(struct xe_gt *gt)
 {
 	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
 
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
 	if (!IS_DGFX(gt_to_xe(gt)))
 		return 0;
 
-- 
GitLab


From 0c923a68abbfe6d7b4fd2ee37c237aba9d870eaf Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Tue, 28 Nov 2023 18:53:16 +0200
Subject: [PATCH 2143/2340] drm/xe: rename bypass_mtcfg to skip_mtcfg

Per device, set this flag to access the MTCFG register or to skip it.
This is done to standardise Xe driver naming if an access to any HW
should be avoided.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 4 ++--
 drivers/gpu/drm/xe/xe_mmio.c         | 2 +-
 drivers/gpu/drm/xe/xe_pci.c          | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 1a95b1587c86b..9a212dbdb8a49 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -258,8 +258,8 @@ struct xe_device {
 		u8 has_sriov:1;
 		/** @enable_display: display enabled */
 		u8 enable_display:1;
-		/** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */
-		u8 bypass_mtcfg:1;
+		/** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
+		u8 skip_mtcfg:1;
 		/** @skip_pcode: skip access to PCODE uC */
 		u8 skip_pcode:1;
 		/** @supports_mmio_ext: supports MMIO extension/s */
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 0f846272e39c7..883610fc0abf2 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -330,7 +330,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	if (tile_count == 1)
 		goto add_mmio_ext;
 
-	if (!xe->info.bypass_mtcfg) {
+	if (!xe->info.skip_mtcfg) {
 		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
 		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 		if (tile_count < xe->info.tile_count) {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 57eecaac53b53..1f3b2ce7c0445 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -62,7 +62,7 @@ struct xe_device_desc {
 
 	u8 has_llc:1;
 	u8 has_sriov:1;
-	u8 bypass_mtcfg:1;
+	u8 skip_mtcfg:1;
 	u8 skip_pcode:1;
 	u8 supports_mmio_ext:1;
 	u8 skip_guc_pc:1;
@@ -581,7 +581,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.media_name = media_desc ? media_desc->name : "none";
 	xe->info.has_llc = desc->has_llc;
 	xe->info.has_sriov = desc->has_sriov;
-	xe->info.bypass_mtcfg = desc->bypass_mtcfg;
+	xe->info.skip_mtcfg = desc->skip_mtcfg;
 	xe->info.skip_pcode = desc->skip_pcode;
 	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
 	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
-- 
GitLab


From 33acfc7172ab7f9690536710f0938b787f16a46e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 29 Nov 2023 22:44:56 +0100
Subject: [PATCH 2144/2340] drm/xe: Fix header guard warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Additional underscore in the header guard causes the build to fail with:

drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h:6:9: error: '_XE_ENGINE_CLASS_SYSFS_H_' is used as a header guard here, followed by #define of a different macro [-Werror,-Wheader-guard]

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20231129214509.1174116-1-michal.winiarski@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
index 60469fde41478..ec5ba673b314b 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -4,7 +4,7 @@
  */
 
 #ifndef _XE_ENGINE_CLASS_SYSFS_H_
-#define _XE_ENGINE_CLASS_SYSFS_H__
+#define _XE_ENGINE_CLASS_SYSFS_H_
 
 #include <linux/kobject.h>
 
-- 
GitLab


From 0d29a76c639900747fd33b0774764aa78c9667da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 29 Nov 2023 22:44:57 +0100
Subject: [PATCH 2145/2340] drm/xe: Skip calling drm_dev_put on probe error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DRM device used by Xe is managed, which means that final ref will be
dropped on driver detach.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231129214509.1174116-2-michal.winiarski@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 12 +++++-------
 drivers/gpu/drm/xe/xe_pci.c    |  5 +----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 54202623e2553..296260f142dcb 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -215,11 +215,11 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 			      xe->drm.anon_inode->i_mapping,
 			      xe->drm.vma_offset_manager, false, false);
 	if (WARN_ON(err))
-		goto err_put;
+		goto err;
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
 	if (err)
-		goto err_put;
+		goto err;
 
 	xe->info.devid = pdev->device;
 	xe->info.revid = pdev->revision;
@@ -258,18 +258,16 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	if (!xe->ordered_wq || !xe->unordered_wq) {
 		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
 		err = -ENOMEM;
-		goto err_put;
+		goto err;
 	}
 
 	err = xe_display_create(xe);
 	if (WARN_ON(err))
-		goto err_put;
+		goto err;
 
 	return xe;
 
-err_put:
-	drm_dev_put(&xe->drm);
-
+err:
 	return ERR_PTR(err);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1f3b2ce7c0445..a38fea281eb00 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -706,7 +706,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, xe);
 	err = pci_enable_device(pdev);
 	if (err)
-		goto err_drm_put;
+		return err;
 
 	pci_set_master(pdev);
 
@@ -754,9 +754,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_pci_disable:
 	pci_disable_device(pdev);
 
-err_drm_put:
-	drm_dev_put(&xe->drm);
-
 	return err;
 }
 
-- 
GitLab


From f321ef042e69859536ba6c97b9f25a2a8f761ef9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 29 Nov 2023 22:44:58 +0100
Subject: [PATCH 2146/2340] drm/xe: Use managed pci_enable_device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Xe uses devres for most of its driver-lifetime resources, use it for pci
device as well.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231129214509.1174116-3-michal.winiarski@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index a38fea281eb00..6aaa16b15058a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -696,25 +696,26 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (xe_display_driver_probe_defer(pdev))
 		return -EPROBE_DEFER;
 
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
 	xe = xe_device_create(pdev, ent);
 	if (IS_ERR(xe))
 		return PTR_ERR(xe);
 
+	pci_set_drvdata(pdev, xe);
+
 	xe_pm_assert_unbounded_bridge(xe);
 	subplatform_desc = find_subplatform(xe, desc);
 
-	pci_set_drvdata(pdev, xe);
-	err = pci_enable_device(pdev);
-	if (err)
-		return err;
-
 	pci_set_master(pdev);
 
 	xe_sriov_probe_early(xe, desc->has_sriov);
 
 	err = xe_info_init(xe, desc, subplatform_desc);
 	if (err)
-		goto err_pci_disable;
+		return err;
 
 	xe_display_probe(xe);
 
@@ -745,16 +746,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	err = xe_device_probe(xe);
 	if (err)
-		goto err_pci_disable;
+		return err;
 
 	xe_pm_init(xe);
 
 	return 0;
-
-err_pci_disable:
-	pci_disable_device(pdev);
-
-	return err;
 }
 
 static void xe_pci_shutdown(struct pci_dev *pdev)
-- 
GitLab


From 604f7e7777d663033063886b6a5362d0e6092e3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 29 Nov 2023 22:44:59 +0100
Subject: [PATCH 2147/2340] drm/xe/irq: Don't call pci_free_irq_vectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For devres managed devices, pci_alloc_irq_vectors is also managed (see
pci_setup_msi_context for reference).
PCI device used by Xe is devres managed (it was enabled with
pcim_enable_device), which means that calls to pci_free_irq_vectors are
redundant and can be safely removed.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231129214509.1174116-4-michal.winiarski@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_irq.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 25ba5167c1b92..d1f5ba4bb745a 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -585,7 +585,6 @@ static void irq_uninstall(struct drm_device *drm, void *arg)
 
 	irq = pci_irq_vector(pdev, 0);
 	free_irq(irq, xe);
-	pci_free_irq_vectors(pdev);
 }
 
 int xe_irq_install(struct xe_device *xe)
@@ -612,7 +611,7 @@ int xe_irq_install(struct xe_device *xe)
 	err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
 	if (err < 0) {
 		drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
-		goto free_pci_irq_vectors;
+		return err;
 	}
 
 	xe->irq.enabled = true;
@@ -627,8 +626,6 @@ int xe_irq_install(struct xe_device *xe)
 
 free_irq_handler:
 	free_irq(irq, xe);
-free_pci_irq_vectors:
-	pci_free_irq_vectors(pdev);
 
 	return err;
 }
-- 
GitLab


From 01c2413a5bc2c66ab54b4aebd3078823a148e69e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 29 Nov 2023 22:45:00 +0100
Subject: [PATCH 2148/2340] drm/xe: Move xe_set_dma_info outside of MMIO setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MMIO is going to be setup earlier during probe. Move xe_set_dma_info
outside of MMIO setup.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231129214509.1174116-5-michal.winiarski@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 26 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_mmio.c   | 26 --------------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 296260f142dcb..dcb0871083931 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -353,6 +353,28 @@ static void xe_device_sanitize(struct drm_device *drm, void *arg)
 		xe_gt_sanitize(gt);
 }
 
+static int xe_set_dma_info(struct xe_device *xe)
+{
+	unsigned int mask_size = xe->info.dma_mask_size;
+	int err;
+
+	dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	return 0;
+
+mask_err:
+	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+	return err;
+}
+
 int xe_device_probe(struct xe_device *xe)
 {
 	struct xe_tile *tile;
@@ -367,6 +389,10 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
+	err = xe_set_dma_info(xe);
+	if (err)
+		return err;
+
 	for_each_tile(tile, xe, id) {
 		err = xe_tile_alloc(tile);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 883610fc0abf2..43f322cd30a1d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -25,28 +25,6 @@
 
 #define BAR_SIZE_SHIFT 20
 
-static int xe_set_dma_info(struct xe_device *xe)
-{
-	unsigned int mask_size = xe->info.dma_mask_size;
-	int err;
-
-	dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
-
-	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
-	if (err)
-		goto mask_err;
-
-	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
-	if (err)
-		goto mask_err;
-
-	return 0;
-
-mask_err:
-	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
-	return err;
-}
-
 static void
 _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
 {
@@ -431,10 +409,6 @@ int xe_mmio_init(struct xe_device *xe)
 	if (err)
 		return err;
 
-	err = xe_set_dma_info(xe);
-	if (err)
-		return err;
-
 	xe_mmio_probe_tiles(xe);
 
 	return 0;
-- 
GitLab


From f4a0a113f103e23adb4f3ba8a0e02ce4973fdedf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 29 Nov 2023 22:45:01 +0100
Subject: [PATCH 2149/2340] drm/xe: Move xe_mmio_probe_tiles outside of MMIO
 setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MMIO is going to be setup earlier during probe. Move xe_mmio_probe_tiles
outside of MMIO setup.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231129214509.1174116-6-michal.winiarski@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 2 ++
 drivers/gpu/drm/xe/xe_mmio.c   | 4 +---
 drivers/gpu/drm/xe/xe_mmio.h   | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index dcb0871083931..785bf2e610b7c 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -403,6 +403,8 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
+	xe_mmio_probe_tiles(xe);
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 43f322cd30a1d..ab91be1405c96 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -295,7 +295,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	return 0;
 }
 
-static void xe_mmio_probe_tiles(struct xe_device *xe)
+void xe_mmio_probe_tiles(struct xe_device *xe)
 {
 	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
 	u8 id, tile_count = xe->info.tile_count;
@@ -409,8 +409,6 @@ int xe_mmio_init(struct xe_device *xe)
 	if (err)
 		return err;
 
-	xe_mmio_probe_tiles(xe);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 218b796629adc..c054c27f6925d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -21,6 +21,7 @@ struct xe_device;
 #define LMEM_BAR		2
 
 int xe_mmio_init(struct xe_device *xe);
+void xe_mmio_probe_tiles(struct xe_device *xe);
 
 static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 {
-- 
GitLab


From a682b6a42d4de68419f23d73afa57fc931fed3c6 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Mon, 20 Nov 2023 10:44:51 -0800
Subject: [PATCH 2150/2340] drm/xe: Support device page faults on integrated
 platforms

Update xe_migrate_prepare_vm() to use the usm batch buffer even for
servicing device page faults on integrated platforms. And as we have
no VRAM on integrated platforms, device pagefault handler should not
attempt to migrate into VRAM.
LNL is first integrated platform to support device pagefaults.

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +-
 drivers/gpu/drm/xe/xe_migrate.c      | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 0a278df6a97f2..ccf5a6671faf8 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -106,7 +106,7 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
 	if (err)
 		return err;
 
-	if (atomic) {
+	if (atomic && IS_DGFX(vm->xe)) {
 		if (xe_vma_is_userptr(vma)) {
 			err = -EACCES;
 			return err;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 4aea748c984b4..84e138df01722 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -215,10 +215,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	}
 
 	if (!IS_DGFX(xe)) {
-		xe_tile_assert(tile, !xe->info.supports_usm);
-
 		/* Write out batch too */
 		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
+		if (xe->info.supports_usm) {
+			batch = tile->primary_gt->usm.bb_pool->bo;
+			m->usm_batch_base_ofs = m->batch_base_ofs;
+		}
+
 		for (i = 0; i < batch->size;
 		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
-- 
GitLab


From 594b46ba0c8239f9531ac23a4c6eae5c0fad4cf3 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Tue, 21 Nov 2023 12:10:37 -0800
Subject: [PATCH 2151/2340] drm/xe/xe2: Respond to TRTT faults as unsuccessful
 page fault

SW is not expected to handle TRTT faults and should report these as
unsuccessful page fault in the reply, such that HW can respond by
raising a CAT error.

Signed-off-by: Brian Welty <brian.welty@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index ccf5a6671faf8..a5358064a4e0f 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -34,6 +34,7 @@ struct pagefault {
 	u8 engine_class;
 	u8 engine_instance;
 	u8 fault_unsuccessful;
+	bool trva_fault;
 };
 
 enum access_type {
@@ -138,6 +139,10 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
 	int ret = 0;
 	bool atomic;
 
+	/* SW isn't expected to handle TRTT faults */
+	if (pf->trva_fault)
+		return -EFAULT;
+
 	/* ASID to VM */
 	mutex_lock(&xe->usm.lock);
 	vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
@@ -282,6 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 			(pf_queue->data + pf_queue->head);
 
 		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+		pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
 		pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
 		pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
 		pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
-- 
GitLab


From 0ac3d319cbdd25839c5034da65d57e3f82b53f6c Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Thu, 30 Nov 2023 23:29:41 +0530
Subject: [PATCH 2152/2340] drm/xe/xe2: Add workaround 16020292621

Workaround applies to Graphics 20.04 as part of ring
submission

V4(MattR):
  - Rule for engine in oob WA not supported, add explicitly
V3(MattR):
  - Pass hwe and rename API name to hint end of ring work
  - Use existing RING_NOPID API
V2:
  - Marking this WA for 20.04 instead of 20.00

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  1 +
 drivers/gpu/drm/xe/xe_ring_ops.c          | 20 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_wa_oob.rules        |  1 +
 3 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 4402f72481dcd..f1c5bf203b3dc 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -48,6 +48,7 @@
 #define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
 #define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
+#define   PIPE_CONTROL_LRI_POST_SYNC			BIT(23)
 #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 59e0aa2d6a4c5..1201e42ef823c 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -7,6 +7,7 @@
 
 #include "generated/xe_wa_oob.h"
 #include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
@@ -184,6 +185,23 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
 	return i;
 }
 
+static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
+{
+	if (hwe->class != XE_ENGINE_CLASS_RENDER)
+		return i;
+
+	if (XE_WA(hwe->gt, 16020292621)) {
+		dw[i++] = GFX_OP_PIPE_CONTROL(6);
+		dw[i++] = PIPE_CONTROL_LRI_POST_SYNC;
+		dw[i++] = RING_NOPID(hwe->mmio_base).addr;
+		dw[i++] = 0;
+		dw[i++] = 0;
+		dw[i++] = 0;
+	}
+
+	return i;
+}
+
 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
 			      int i)
 {
@@ -342,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
 	i = emit_user_interrupt(dw, i);
 
+	i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i);
+
 	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
 
 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index c7b7d40b5d575..727bdc4292124 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -21,3 +21,4 @@
 22010954014	PLATFORM(DG2)
 14019821291	MEDIA_VERSION_RANGE(1300, 2000)
 14015076503	MEDIA_VERSION(1300)
+16020292621	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
-- 
GitLab


From a754391f9c0e16f7ef82c90210da7a12b00dd70d Mon Sep 17 00:00:00 2001
From: Animesh Manna <animesh.manna@intel.com>
Date: Fri, 1 Dec 2023 14:44:05 +0530
Subject: [PATCH 2153/2340] drm/xe/dsb: DSB implementation for xe

Add xe specific DSB buffer handling methods.

v1: Initial version.
v2: Add null check after dynamic memory allocation of vma. [Uma]

Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                |  1 +
 drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 71 ++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/display/xe_dsb_buffer.c

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b8ad42fcbea2a..e6f98d8077839 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -166,6 +166,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	display/xe_plane_initial.o \
 	display/xe_display_rps.o \
 	display/xe_display_misc.o \
+	display/xe_dsb_buffer.o \
 	display/intel_fbdev_fb.o \
 	display/intel_fb_bo.o \
 	display/ext/i915_irq.o \
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
new file mode 100644
index 0000000000000..27c2fb1c002a1
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_display_types.h"
+#include "intel_dsb_buffer.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+
+u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
+{
+	return xe_bo_ggtt_addr(dsb_buf->vma->bo);
+}
+
+void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
+{
+	iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+}
+
+u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
+{
+	return iosys_map_rd(&dsb_buf->vma->bo->vmap, idx * 4, u32);
+}
+
+void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
+{
+	WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
+
+	iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+}
+
+bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
+{
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (!vma)
+		return false;
+
+	obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915),
+				   NULL, PAGE_ALIGN(size),
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
+				   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(obj)) {
+		kfree(vma);
+		return false;
+	}
+
+	vma->bo = obj;
+	dsb_buf->vma = vma;
+	dsb_buf->buf_size = size;
+
+	return true;
+}
+
+void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
+{
+	xe_bo_unpin_map_no_vm(dsb_buf->vma->bo);
+	kfree(dsb_buf->vma);
+}
+
+void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
+{
+	/* TODO: add xe specific flush_map() for dsb buffer object. */
+}
-- 
GitLab


From 4f5ee007f62a1825cec8140b14b28ef532f570f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:32:56 +0100
Subject: [PATCH 2154/2340] drm/xe: Split xe_info_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parts of xe_info_init are only dealing with processing driver_data.
Extract it into xe_info_init_early to be able to use it earlier during
probe.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c |  3 +-
 drivers/gpu/drm/xe/xe_pci.c       | 78 ++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 306ff8cb35cb1..d850dca85af17 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -143,7 +143,8 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 		return -ENODEV;
 
 done:
-	xe_info_init(xe, desc, subplatform_desc);
+	xe_info_init_early(xe, desc, subplatform_desc);
+	xe_info_init(xe, desc->graphics, desc->media);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6aaa16b15058a..9e35ebfb3341e 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -473,16 +473,13 @@ static void peek_gmdid(struct xe_device *xe, u32 gmdid_offset, u32 *ver, u32 *re
  * media is optional.
  */
 static void handle_pre_gmdid(struct xe_device *xe,
-			     const struct xe_device_desc *desc,
-			     const struct xe_graphics_desc **graphics,
-			     const struct xe_media_desc **media)
+			     const struct xe_graphics_desc *graphics,
+			     const struct xe_media_desc *media)
 {
-	*graphics = desc->graphics;
-	xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel;
+	xe->info.graphics_verx100 = graphics->ver * 100 + graphics->rel;
 
-	*media = desc->media;
-	if (*media)
-		xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel;
+	if (media)
+		xe->info.media_verx100 = media->ver * 100 + media->rel;
 
 }
 
@@ -491,7 +488,6 @@ static void handle_pre_gmdid(struct xe_device *xe,
  * based on the result.
  */
 static void handle_gmdid(struct xe_device *xe,
-			 const struct xe_device_desc *desc,
 			 const struct xe_graphics_desc **graphics,
 			 const struct xe_media_desc **media,
 			 u32 *graphics_revid,
@@ -535,32 +531,59 @@ static void handle_gmdid(struct xe_device *xe,
 	}
 }
 
+/*
+ * Initialize device info content that only depends on static driver_data
+ * passed to the driver at probe time from PCI ID table.
+ */
+static void xe_info_init_early(struct xe_device *xe,
+			       const struct xe_device_desc *desc,
+			       const struct xe_subplatform_desc *subplatform_desc)
+{
+	xe->info.platform = desc->platform;
+	xe->info.subplatform = subplatform_desc ?
+		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
+
+	xe->info.is_dgfx = desc->is_dgfx;
+	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
+	xe->info.has_llc = desc->has_llc;
+	xe->info.has_sriov = desc->has_sriov;
+	xe->info.skip_mtcfg = desc->skip_mtcfg;
+	xe->info.skip_pcode = desc->skip_pcode;
+	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
+	xe->info.skip_guc_pc = desc->skip_guc_pc;
+
+	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
+				  xe_modparam.enable_display &&
+				  desc->has_display;
+}
+
+/*
+ * Initialize device info content that does require knowledge about
+ * graphics / media IP version.
+ * Make sure that GT / tile structures allocated by the driver match the data
+ * present in device info.
+ */
 static int xe_info_init(struct xe_device *xe,
-			const struct xe_device_desc *desc,
-			const struct xe_subplatform_desc *subplatform_desc)
+			const struct xe_graphics_desc *graphics_desc,
+			const struct xe_media_desc *media_desc)
 {
-	const struct xe_graphics_desc *graphics_desc = NULL;
-	const struct xe_media_desc *media_desc = NULL;
 	u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
 	struct xe_tile *tile;
 	struct xe_gt *gt;
 	u8 id;
 
-	xe->info.platform = desc->platform;
-	xe->info.subplatform = subplatform_desc ?
-		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
-
 	/*
 	 * If this platform supports GMD_ID, we'll detect the proper IP
 	 * descriptor to use from hardware registers. desc->graphics will only
 	 * ever be set at this point for platforms before GMD_ID. In that case
 	 * the IP descriptions and versions are simply derived from that.
 	 */
-	if (desc->graphics) {
-		handle_pre_gmdid(xe, desc, &graphics_desc, &media_desc);
+	if (graphics_desc) {
+		handle_pre_gmdid(xe, graphics_desc, media_desc);
 		xe->info.step = xe_step_pre_gmdid_get(xe);
 	} else {
-		handle_gmdid(xe, desc, &graphics_desc, &media_desc,
+		xe_assert(xe, !media_desc);
+		handle_gmdid(xe, &graphics_desc, &media_desc,
 			     &graphics_gmdid_revid, &media_gmdid_revid);
 		xe->info.step = xe_step_gmdid_get(xe,
 						  graphics_gmdid_revid,
@@ -575,15 +598,8 @@ static int xe_info_init(struct xe_device *xe,
 	if (!graphics_desc)
 		return -ENODEV;
 
-	xe->info.is_dgfx = desc->is_dgfx;
-	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
 	xe->info.graphics_name = graphics_desc->name;
 	xe->info.media_name = media_desc ? media_desc->name : "none";
-	xe->info.has_llc = desc->has_llc;
-	xe->info.has_sriov = desc->has_sriov;
-	xe->info.skip_mtcfg = desc->skip_mtcfg;
-	xe->info.skip_pcode = desc->skip_pcode;
-	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
 	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
 
 	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
@@ -594,11 +610,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.has_asid = graphics_desc->has_asid;
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
-	xe->info.skip_guc_pc = desc->skip_guc_pc;
 
-	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
-				  xe_modparam.enable_display &&
-				  desc->has_display;
 	/*
 	 * All platforms have at least one primary GT.  Any platform with media
 	 * version 13 or higher has an additional dedicated media GT.  And
@@ -711,9 +723,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
+	xe_info_init_early(xe, desc, subplatform_desc);
+
 	xe_sriov_probe_early(xe, desc->has_sriov);
 
-	err = xe_info_init(xe, desc, subplatform_desc);
+	err = xe_info_init(xe, desc->graphics, desc->media);
 	if (err)
 		return err;
 
-- 
GitLab


From 7e4ce4518b906a960122f29e8f3426ca95ebee0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:32:57 +0100
Subject: [PATCH 2155/2340] drm/xe: Introduce xe_tile_init_early and use at
 earlier point in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It also merges the GT (which is part of tile) initialization happening
at xe_info_init with allocating other per-tile data structures into a
common helper function.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |  6 ------
 drivers/gpu/drm/xe/xe_pci.c    | 10 +++++-----
 drivers/gpu/drm/xe/xe_tile.c   | 32 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_tile.h   |  2 +-
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 785bf2e610b7c..5e1f73c8c77ad 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -393,12 +393,6 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
-	for_each_tile(tile, xe, id) {
-		err = xe_tile_alloc(tile);
-		if (err)
-			return err;
-	}
-
 	err = xe_mmio_init(xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 9e35ebfb3341e..87257716b93eb 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -26,6 +26,7 @@
 #include "xe_pm.h"
 #include "xe_sriov.h"
 #include "xe_step.h"
+#include "xe_tile.h"
 
 enum toggle_d3cold {
 	D3COLD_DISABLE,
@@ -623,12 +624,11 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
 
 	for_each_tile(tile, xe, id) {
-		tile->xe = xe;
-		tile->id = id;
+		int err;
 
-		tile->primary_gt = xe_gt_alloc(tile);
-		if (IS_ERR(tile->primary_gt))
-			return PTR_ERR(tile->primary_gt);
+		err = xe_tile_init_early(tile, xe, id);
+		if (err)
+			return err;
 
 		gt = tile->primary_gt;
 		gt->info.id = xe->info.gt_count++;
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 131752a57f652..c74a4f840d846 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -7,6 +7,7 @@
 
 #include "xe_device.h"
 #include "xe_ggtt.h"
+#include "xe_gt.h"
 #include "xe_migrate.h"
 #include "xe_sa.h"
 #include "xe_tile.h"
@@ -80,7 +81,7 @@
  *
  * Returns -ENOMEM if allocations fail, otherwise 0.
  */
-int xe_tile_alloc(struct xe_tile *tile)
+static int xe_tile_alloc(struct xe_tile *tile)
 {
 	struct drm_device *drm = &tile_to_xe(tile)->drm;
 
@@ -97,6 +98,35 @@ int xe_tile_alloc(struct xe_tile *tile)
 	return 0;
 }
 
+/**
+ * xe_tile_init_early - Initialize the tile and primary GT
+ * @tile: Tile to initialize
+ * @xe: Parent Xe device
+ * @id: Tile ID
+ *
+ * Initializes per-tile resources that don't require any interactions with the
+ * hardware or any knowledge about the Graphics/Media IP version.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
+{
+	int err;
+
+	tile->xe = xe;
+	tile->id = id;
+
+	err = xe_tile_alloc(tile);
+	if (err)
+		return err;
+
+	tile->primary_gt = xe_gt_alloc(tile);
+	if (IS_ERR(tile->primary_gt))
+		return PTR_ERR(tile->primary_gt);
+
+	return 0;
+}
+
 static int tile_ttm_mgr_init(struct xe_tile *tile)
 {
 	struct xe_device *xe = tile_to_xe(tile);
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index 782c47f8bd450..1c9e42ade6b05 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -10,7 +10,7 @@
 
 struct xe_tile;
 
-int xe_tile_alloc(struct xe_tile *tile);
+int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
 int xe_tile_init_noalloc(struct xe_tile *tile);
 
 void xe_tile_migrate_wait(struct xe_tile *tile);
-- 
GitLab


From 99e4b1aa8dbe2e23c73229ac1bbd9dc3e6b30c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:32:58 +0100
Subject: [PATCH 2156/2340] drm/xe: Map the entire BAR0 and hold onto the
 initial mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both MMIO registers and GGTT for root tile will need to be used earlier
during probe. Don't rely on tile count to compute the mapping size.
Further more, there's no need to remap after figuring out the real
resource size.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index ab91be1405c96..2e6c94731a5a0 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -300,7 +300,6 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
 	u8 id, tile_count = xe->info.tile_count;
 	struct xe_gt *gt = xe_root_mmio_gt(xe);
-	const int mmio_bar = 0;
 	struct xe_tile *tile;
 	void *regs;
 	u32 mtcfg;
@@ -314,9 +313,6 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 		if (tile_count < xe->info.tile_count) {
 			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
 					xe->info.tile_count, tile_count);
-			pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
-			xe->mmio.size = (tile_mmio_size + tile_mmio_ext_size) * tile_count;
-			xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size);
 			xe->info.tile_count = tile_count;
 
 			/*
@@ -381,17 +377,17 @@ static int xe_verify_lmem_ready(struct xe_device *xe)
 int xe_mmio_init(struct xe_device *xe)
 {
 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	const int mmio_bar = 0;
 	int err;
 
 	/*
-	 * Map the maximum expected BAR size, which will get remapped later
-	 * if we determine that we're running on a reduced-tile system.
+	 * Map the entire BAR.
 	 * The first 16MB of the BAR, belong to the root tile, and include:
 	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
 	 */
-	xe->mmio.size = (SZ_16M + xe->info.tile_mmio_ext_size) * xe->info.tile_count;
-	xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size);
+	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
+	xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0);
 	if (xe->mmio.regs == NULL) {
 		drm_err(&xe->drm, "failed to map registers\n");
 		return -EIO;
-- 
GitLab


From 4f122766f9043c30b879b44f7dc2ca540b5422cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:32:59 +0100
Subject: [PATCH 2157/2340] drm/xe/device: Introduce xe_device_probe_early
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SR-IOV VF doesn't have access to MMIO registers used to determine
graphics/media ID. It can however communicate with GuC.
Introduce xe_device_probe_early, which initializes enough HW to use
MMIO GuC communication.
This will allow both VF and PF/native driver to have unified probe
ordering.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 22 ++++++++++++++++++----
 drivers/gpu/drm/xe/xe_device.h |  5 +++++
 drivers/gpu/drm/xe/xe_mmio.c   | 16 ++++++++++------
 drivers/gpu/drm/xe/xe_mmio.h   |  1 +
 drivers/gpu/drm/xe/xe_pci.c    | 26 +++++++++++++++++++++-----
 5 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5e1f73c8c77ad..f4be4b13a506e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -375,6 +375,24 @@ mask_err:
 	return err;
 }
 
+/*
+ * Initialize MMIO resources that don't require any knowledge about tile count.
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+	int err;
+
+	err = xe_mmio_init(xe);
+	if (err)
+		return err;
+
+	err = xe_mmio_root_tile_init(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 int xe_device_probe(struct xe_device *xe)
 {
 	struct xe_tile *tile;
@@ -393,10 +411,6 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
-	err = xe_mmio_init(xe);
-	if (err)
-		return err;
-
 	xe_mmio_probe_tiles(xe);
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 54694f98c91a2..3da83b2332063 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -37,6 +37,7 @@ static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
 
 struct xe_device *xe_device_create(struct pci_dev *pdev,
 				   const struct pci_device_id *ent);
+int xe_device_probe_early(struct xe_device *xe);
 int xe_device_probe(struct xe_device *xe);
 void xe_device_remove(struct xe_device *xe);
 void xe_device_shutdown(struct xe_device *xe);
@@ -123,6 +124,10 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
 	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \
 		for_each_if((tile__) = &(xe__)->tiles[(id__)])
 
+#define for_each_remote_tile(tile__, xe__, id__) \
+	for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \
+		for_each_if((tile__) = &(xe__)->tiles[(id__)])
+
 /*
  * FIXME: This only works for now since multi-tile and standalone media
  * happen to be mutually exclusive.  Future platforms may change this...
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 2e6c94731a5a0..35aeb50b71589 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -15,10 +15,12 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_module.h"
+#include "xe_tile.h"
 
 #define XEHP_MTCFG_ADDR		XE_REG(0x101800)
 #define TILE_COUNT		REG_GENMASK(15, 8)
@@ -376,10 +378,8 @@ static int xe_verify_lmem_ready(struct xe_device *xe)
 
 int xe_mmio_init(struct xe_device *xe)
 {
-	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	const int mmio_bar = 0;
-	int err;
 
 	/*
 	 * Map the entire BAR.
@@ -393,12 +393,16 @@ int xe_mmio_init(struct xe_device *xe)
 		return -EIO;
 	}
 
-	err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
-	if (err)
-		return err;
+	return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+}
+
+int xe_mmio_root_tile_init(struct xe_device *xe)
+{
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	int err;
 
 	/* Setup first tile; other tiles (if present) will be setup later. */
-	root_tile->mmio.size = xe->mmio.size;
+	root_tile->mmio.size = SZ_16M;
 	root_tile->mmio.regs = xe->mmio.regs;
 
 	err = xe_verify_lmem_ready(xe);
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index c054c27f6925d..98de5c13c89ba 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -21,6 +21,7 @@ struct xe_device;
 #define LMEM_BAR		2
 
 int xe_mmio_init(struct xe_device *xe);
+int xe_mmio_root_tile_init(struct xe_device *xe);
 void xe_mmio_probe_tiles(struct xe_device *xe);
 
 static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 87257716b93eb..d1b8f268ce090 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -536,10 +536,12 @@ static void handle_gmdid(struct xe_device *xe,
  * Initialize device info content that only depends on static driver_data
  * passed to the driver at probe time from PCI ID table.
  */
-static void xe_info_init_early(struct xe_device *xe,
-			       const struct xe_device_desc *desc,
-			       const struct xe_subplatform_desc *subplatform_desc)
+static int xe_info_init_early(struct xe_device *xe,
+			      const struct xe_device_desc *desc,
+			      const struct xe_subplatform_desc *subplatform_desc)
 {
+	int err;
+
 	xe->info.platform = desc->platform;
 	xe->info.subplatform = subplatform_desc ?
 		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
@@ -556,6 +558,12 @@ static void xe_info_init_early(struct xe_device *xe,
 	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
 				  xe_modparam.enable_display &&
 				  desc->has_display;
+
+	err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
+	if (err)
+		return err;
+
+	return 0;
 }
 
 /*
@@ -623,13 +631,15 @@ static int xe_info_init(struct xe_device *xe,
 	 */
 	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
 
-	for_each_tile(tile, xe, id) {
+	for_each_remote_tile(tile, xe, id) {
 		int err;
 
 		err = xe_tile_init_early(tile, xe, id);
 		if (err)
 			return err;
+	}
 
+	for_each_tile(tile, xe, id) {
 		gt = tile->primary_gt;
 		gt->info.id = xe->info.gt_count++;
 		gt->info.type = XE_GT_TYPE_MAIN;
@@ -723,10 +733,16 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
-	xe_info_init_early(xe, desc, subplatform_desc);
+	err = xe_info_init_early(xe, desc, subplatform_desc);
+	if (err)
+		return err;
 
 	xe_sriov_probe_early(xe, desc->has_sriov);
 
+	err = xe_device_probe_early(xe);
+	if (err)
+		return err;
+
 	err = xe_info_init(xe, desc->graphics, desc->media);
 	if (err)
 		return err;
-- 
GitLab


From 51fb5ef209b988a3acee3bc7de04bb70aec51ff5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:00 +0100
Subject: [PATCH 2158/2340] drm/xe: Don't "peek" into GMD_ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that MMIO init got moved to device early, we can use regular
xe_mmio_read helpers to get to GMD_ID register.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index d1b8f268ce090..b2e87746e8d87 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -21,6 +21,7 @@
 #include "xe_drv.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
+#include "xe_mmio.h"
 #include "xe_module.h"
 #include "xe_pci_types.h"
 #include "xe_pm.h"
@@ -444,26 +445,22 @@ find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc)
 	return NULL;
 }
 
-static void peek_gmdid(struct xe_device *xe, u32 gmdid_offset, u32 *ver, u32 *revid)
+enum xe_gmdid_type {
+	GMDID_GRAPHICS,
+	GMDID_MEDIA
+};
+
+static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
 {
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	void __iomem *map = pci_iomap_range(pdev, 0, gmdid_offset, sizeof(u32));
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_reg gmdid_reg = GMD_ID;
 	u32 val;
 
-	if (!map) {
-		drm_err(&xe->drm, "Failed to read GMD_ID (%#x) from PCI BAR.\n",
-			gmdid_offset);
-		*ver = 0;
-		*revid = 0;
+	if (type == GMDID_MEDIA)
+		gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
 
-		return;
-	}
-
-	val = ioread32(map);
-	pci_iounmap(pdev, map);
-
-	*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 +
-		REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
+	val = xe_mmio_read32(gt, gmdid_reg);
+	*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
 	*revid = REG_FIELD_GET(GMD_ID_REVID, val);
 }
 
@@ -496,7 +493,8 @@ static void handle_gmdid(struct xe_device *xe,
 {
 	u32 ver;
 
-	peek_gmdid(xe, GMD_ID.addr, &ver, graphics_revid);
+	read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
+
 	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
 		if (ver == graphics_ip_map[i].ver) {
 			xe->info.graphics_verx100 = ver;
@@ -511,7 +509,7 @@ static void handle_gmdid(struct xe_device *xe,
 			ver / 100, ver % 100);
 	}
 
-	peek_gmdid(xe, GMD_ID.addr + 0x380000, &ver, media_revid);
+	read_gmdid(xe, GMDID_MEDIA, &ver, media_revid);
 
 	/* Media may legitimately be fused off / not present */
 	if (ver == 0)
-- 
GitLab


From 1ccd68e967f13a584bf3d45a58865afb0abbf2a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:01 +0100
Subject: [PATCH 2159/2340] drm/xe: Move system memory management init to
 earlier point in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GuC will need to be loaded earlier during probe. And in order to load
GuC, we will need the ability to create system memory allocations.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index f4be4b13a506e..dd56a8c3f80d4 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -413,6 +413,8 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_mmio_probe_tiles(xe);
 
+	xe_ttm_sys_mgr_init(xe);
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
 	if (err)
 		return err;
@@ -441,8 +443,6 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		goto err_irq_shutdown;
 
-	xe_ttm_sys_mgr_init(xe);
-
 	for_each_tile(tile, xe, id) {
 		err = xe_tile_init_noalloc(tile);
 		if (err)
-- 
GitLab


From b62f828a8368de59eb5b353788ace58fb6154495 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:02 +0100
Subject: [PATCH 2160/2340] drm/xe: Move force_wake init to earlier point in
 probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GuC will need to be loaded earlier during probe. And in order to load
GuC, being able to take the forcewake is going to be needed.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 3 +++
 drivers/gpu/drm/xe/xe_gt.c     | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index dd56a8c3f80d4..bcc10b7f23ab7 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -415,6 +415,9 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_ttm_sys_mgr_init(xe);
 
+	for_each_gt(gt, xe, id)
+		xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 8a6fb9641cd69..4db94344bbde7 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -293,8 +293,6 @@ int xe_gt_init_early(struct xe_gt *gt)
 {
 	int err;
 
-	xe_force_wake_init_gt(gt, gt_to_fw(gt));
-
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		return err;
-- 
GitLab


From 791d0362a9e2d47352ee6b35cc8999cb3404e27c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:03 +0100
Subject: [PATCH 2161/2340] drm/xe: Reorder GGTT init to earlier point in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GuC will need to be loaded earlier during probe. Having functional GGTT
is one of the prerequisites.
Also rename xe_ggtt_init_noalloc to xe_ggtt_init_early to match the new
call site.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |  7 +++++++
 drivers/gpu/drm/xe/xe_ggtt.c   | 22 ++++++++++++++++++----
 drivers/gpu/drm/xe/xe_ggtt.h   |  2 +-
 drivers/gpu/drm/xe/xe_tile.c   |  4 ----
 4 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index bcc10b7f23ab7..65e9aa5e6c31e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -24,6 +24,7 @@
 #include "xe_drv.h"
 #include "xe_exec_queue.h"
 #include "xe_exec.h"
+#include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_irq.h"
 #include "xe_mmio.h"
@@ -418,6 +419,12 @@ int xe_device_probe(struct xe_device *xe)
 	for_each_gt(gt, xe, id)
 		xe_force_wake_init_gt(gt, gt_to_fw(gt));
 
+	for_each_tile(tile, xe, id) {
+		err = xe_ggtt_init_early(tile->mem.ggtt);
+		if (err)
+			return err;
+	}
+
 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0e2a41837f169..f8bdbd6010f7f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -96,14 +96,20 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
 	}
 }
 
-static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
+static void ggtt_fini_early(struct drm_device *drm, void *arg)
 {
 	struct xe_ggtt *ggtt = arg;
 
 	mutex_destroy(&ggtt->lock);
 	drm_mm_takedown(&ggtt->mm);
+}
+
+static void ggtt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
 
 	xe_bo_unpin_map_no_vm(ggtt->scratch);
+	ggtt->scratch = NULL;
 }
 
 static void primelockdep(struct xe_ggtt *ggtt)
@@ -124,7 +130,14 @@ static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
 	.pte_encode_bo = xelpg_ggtt_pte_encode_bo,
 };
 
-int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
+/*
+ * Early GGTT initialization, which allows to create new mappings usable by the
+ * GuC.
+ * Mappings are not usable by the HW engines, as it doesn't have scratch /
+ * initial clear done to it yet. That will happen in the regular, non-early
+ * GGTT init.
+ */
+int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 {
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -178,7 +191,7 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt)
 	mutex_init(&ggtt->lock);
 	primelockdep(ggtt);
 
-	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt);
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
 }
 
 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
@@ -226,7 +239,8 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
 	xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size);
 
 	xe_ggtt_initial_clear(ggtt);
-	return 0;
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt);
 err:
 	ggtt->scratch = NULL;
 	return err;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 3faa3c6d0375f..a09c166dff701 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -12,7 +12,7 @@ struct drm_printer;
 
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
 void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
-int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt);
+int xe_ggtt_init_early(struct xe_ggtt *ggtt);
 int xe_ggtt_init(struct xe_ggtt *ggtt);
 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
 
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index c74a4f840d846..044c20881de7e 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -166,10 +166,6 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
 	if (err)
 		goto err_mem_access;
 
-	err = xe_ggtt_init_noalloc(tile->mem.ggtt);
-	if (err)
-		goto err_mem_access;
-
 	tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
 	if (IS_ERR(tile->mem.kernel_bb_pool))
 		err = PTR_ERR(tile->mem.kernel_bb_pool);
-- 
GitLab


From 0e1a47fcabc8ffa6f460c60c2caa04e51170fa22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:04 +0100
Subject: [PATCH 2162/2340] drm/xe: Add a helper for DRM device-lifetime BO
 create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A helper for managed BO allocations makes it possible to remove specific
"fini" actions and will simplify the following patches adding ability to
execute a release action for specific BO directly.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           | 36 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_bo.h           |  4 ++++
 drivers/gpu/drm/xe/xe_ggtt.c         |  6 +----
 drivers/gpu/drm/xe/xe_guc_ads.c      | 20 +++-------------
 drivers/gpu/drm/xe/xe_guc_ct.c       |  8 +++----
 drivers/gpu/drm/xe/xe_guc_hwconfig.c | 18 +++-----------
 drivers/gpu/drm/xe/xe_guc_log.c      | 19 +++------------
 drivers/gpu/drm/xe/xe_guc_pc.c       |  9 +++----
 drivers/gpu/drm/xe/xe_hw_engine.c    |  8 +++----
 drivers/gpu/drm/xe/xe_uc_fw.c        |  9 ++++---
 10 files changed, 63 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index dc1ad3b4dc2af..5e3493f21b59f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -9,6 +9,7 @@
 
 #include <drm/drm_drv.h>
 #include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/ttm/ttm_device.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_tt.h>
@@ -1540,6 +1541,41 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 	return bo;
 }
 
+static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
+{
+	xe_bo_unpin_map_no_vm(arg);
+}
+
+struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+					   size_t size, u32 flags)
+{
+	struct xe_bo *bo;
+	int ret;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return bo;
+}
+
+struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+					     const void *data, size_t size, u32 flags)
+{
+	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, size, flags);
+
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
 /*
  * XXX: This is in the VM bind data path, likely should calculate this once and
  * store, with a recalculation if the BO is moved.
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6f183568f76dc..9b1279aca1272 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -124,6 +124,10 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 				     const void *data, size_t size,
 				     enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+					   size_t size, u32 flags);
+struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+					     const void *data, size_t size, u32 flags);
 
 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 			      u32 bo_flags);
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index f8bdbd6010f7f..374ae4289fa09 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -108,7 +108,6 @@ static void ggtt_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_ggtt *ggtt = arg;
 
-	xe_bo_unpin_map_no_vm(ggtt->scratch);
 	ggtt->scratch = NULL;
 }
 
@@ -227,10 +226,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
 	else
 		flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
 
-	ggtt->scratch = xe_bo_create_pin_map(xe, ggtt->tile, NULL, XE_PAGE_SIZE,
-					     ttm_bo_type_kernel,
-					     flags);
-
+	ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags);
 	if (IS_ERR(ggtt->scratch)) {
 		err = PTR_ERR(ggtt->scratch);
 		goto err;
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 88789826e7817..2f5ff090aa6bd 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -202,13 +202,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads)
 		guc_ads_private_data_size(ads);
 }
 
-static void guc_ads_fini(struct drm_device *drm, void *arg)
-{
-	struct xe_guc_ads *ads = arg;
-
-	xe_bo_unpin_map_no_vm(ads->bo);
-}
-
 static bool needs_wa_1607983814(struct xe_device *xe)
 {
 	return GRAPHICS_VERx100(xe) < 1250;
@@ -274,25 +267,18 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
 	struct xe_gt *gt = ads_to_gt(ads);
 	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_bo *bo;
-	int err;
 
 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
 	ads->regset_size = calculate_regset_size(gt);
 
-	bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ads_size(ads) +
-				  MAX_GOLDEN_LRC_SIZE,
-				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_GGTT_BIT);
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
 	ads->bo = bo;
 
-	err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads);
-	if (err)
-		return err;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 6295d916e39f6..24a33fa36496a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -114,7 +114,6 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
 	struct xe_guc_ct *ct = arg;
 
 	xa_destroy(&ct->fence_lookup);
-	xe_bo_unpin_map_no_vm(ct->bo);
 }
 
 static void g2h_worker_func(struct work_struct *w);
@@ -148,10 +147,9 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 
 	primelockdep(ct);
 
-	bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ct_size(),
-				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_GGTT_BIT);
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 98bb9bb307051..2a13a00917f8c 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -48,13 +48,6 @@ static int guc_hwconfig_copy(struct xe_guc *guc)
 	return 0;
 }
 
-static void guc_hwconfig_fini(struct drm_device *drm, void *arg)
-{
-	struct xe_guc *guc = arg;
-
-	xe_bo_unpin_map_no_vm(guc->hwconfig.bo);
-}
-
 int xe_guc_hwconfig_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
@@ -84,19 +77,14 @@ int xe_guc_hwconfig_init(struct xe_guc *guc)
 	if (!size)
 		return -EINVAL;
 
-	bo = xe_bo_create_pin_map(xe, tile, NULL, PAGE_ALIGN(size),
-				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_GGTT_BIT);
+	bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 	guc->hwconfig.bo = bo;
 	guc->hwconfig.size = size;
 
-	err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc);
-	if (err)
-		return err;
-
 	return guc_hwconfig_copy(guc);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 27c3827bfd054..bcd2f4d34081d 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -77,24 +77,15 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
 	}
 }
 
-static void guc_log_fini(struct drm_device *drm, void *arg)
-{
-	struct xe_guc_log *log = arg;
-
-	xe_bo_unpin_map_no_vm(log->bo);
-}
-
 int xe_guc_log_init(struct xe_guc_log *log)
 {
 	struct xe_device *xe = log_to_xe(log);
 	struct xe_tile *tile = gt_to_tile(log_to_gt(log));
 	struct xe_bo *bo;
-	int err;
 
-	bo = xe_bo_create_pin_map(xe, tile, NULL, guc_log_size(),
-				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_GGTT_BIT);
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
@@ -102,9 +93,5 @@ int xe_guc_log_init(struct xe_guc_log *log)
 	log->bo = bo;
 	log->level = xe_modparam.guc_log_level;
 
-	err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log);
-	if (err)
-		return err;
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 1943893a3fd7d..d2605a684b1c5 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -964,7 +964,6 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc)
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
 	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
-	xe_bo_unpin_map_no_vm(pc->bo);
 	mutex_destroy(&pc->freq_lock);
 }
 
@@ -986,11 +985,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 
 	mutex_init(&pc->freq_lock);
 
-	bo = xe_bo_create_pin_map(xe, tile, NULL, size,
-				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_GGTT_BIT);
-
+	bo = xe_managed_bo_create_pin_map(xe, tile, size,
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c52c26c395a7d..108ecbfe593ee 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -239,8 +239,6 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
 		xe_execlist_port_destroy(hwe->exl_port);
 	xe_lrc_finish(&hwe->kernel_lrc);
 
-	xe_bo_unpin_map_no_vm(hwe->hwsp);
-
 	hwe->gt = NULL;
 }
 
@@ -428,9 +426,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 	xe_reg_sr_apply_whitelist(hwe);
 
-	hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
-					 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					 XE_BO_CREATE_GGTT_BIT);
+	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
+						 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+						 XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(hwe->hwsp)) {
 		err = PTR_ERR(hwe->hwsp);
 		goto err_name;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index e806e7b6ae424..8ad4bcabb8b5e 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -292,7 +292,6 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
 	if (!xe_uc_fw_is_available(uc_fw))
 		return;
 
-	xe_bo_unpin_map_no_vm(uc_fw->bo);
 	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
 }
 
@@ -692,10 +691,9 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 			goto fail;
 	}
 
-	obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size,
-				     ttm_bo_type_kernel,
-				     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				     XE_BO_CREATE_GGTT_BIT);
+	obj = xe_managed_bo_create_from_data(xe, tile, fw->data, fw->size,
+					     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					     XE_BO_CREATE_GGTT_BIT);
 	if (IS_ERR(obj)) {
 		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
 			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
@@ -726,6 +724,7 @@ fail:
 		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
 
 	release_firmware(fw);		/* OK even if fw is NULL */
+
 	return err;
 }
 
-- 
GitLab


From c93ea05191c5b67ecaa784085f8a73e02abcfc76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:05 +0100
Subject: [PATCH 2163/2340] drm/xe/uc: Split xe_uc_fw_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function does a driver specific "request firmware" step that
includes validating the input, followed by wrapping the firmware binary
into a buffer object. Split it into smaller parts.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 80 ++++++++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 8ad4bcabb8b5e..f258eb44fe31c 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -635,15 +635,12 @@ do { \
 			  ver_->major, ver_->minor, ver_->patch); \
 } while (0)
 
-int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
-	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-	struct xe_tile *tile = gt_to_tile(gt);
 	struct device *dev = xe->drm.dev;
 	struct drm_printer p = drm_info_printer(dev);
 	const struct firmware *fw = NULL;
-	struct xe_bo *obj;
 	int err;
 
 	/*
@@ -691,9 +688,39 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 			goto fail;
 	}
 
-	obj = xe_managed_bo_create_from_data(xe, tile, fw->data, fw->size,
-					     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					     XE_BO_CREATE_GGTT_BIT);
+	*firmware_p = fw;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
+			       XE_UC_FIRMWARE_MISSING :
+			       XE_UC_FIRMWARE_ERROR);
+
+	drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+	drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
+		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+
+	release_firmware(fw);		/* OK even if fw is NULL */
+
+	return err;
+}
+
+static void uc_fw_release(const struct firmware *fw)
+{
+	release_firmware(fw);
+}
+
+static int uc_fw_copy(struct xe_uc_fw *uc_fw, const void *data, size_t size, u32 flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *obj;
+	int err;
+
+	obj = xe_managed_bo_create_from_data(xe, tile, data, size, flags);
 	if (IS_ERR(obj)) {
 		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
 			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
@@ -702,28 +729,43 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 	}
 
 	uc_fw->bo = obj;
-	uc_fw->size = fw->size;
-	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
+	uc_fw->size = size;
 
-	release_firmware(fw);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
 
 	err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw);
 	if (err)
-		return err;
+		goto fail;
 
 	return 0;
 
 fail:
-	xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
-			       XE_UC_FIRMWARE_MISSING :
-			       XE_UC_FIRMWARE_ERROR);
-
-	drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_ERROR);
+	drm_notice(&xe->drm, "%s firmware %s: copy failed with error %d\n",
 		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
-	drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
-		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
 
-	release_firmware(fw);		/* OK even if fw is NULL */
+	return err;
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+	const struct firmware *fw = NULL;
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int err;
+
+	err = uc_fw_request(uc_fw, &fw);
+	if (err)
+		return err;
+
+	/* no error and no firmware means nothing to copy */
+	if (!fw)
+		return 0;
+
+	err = uc_fw_copy(uc_fw, fw->data, fw->size,
+			 XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT);
+
+	uc_fw_release(fw);
 
 	return err;
 }
-- 
GitLab


From bf2d0d88c3b8d325eee670b2e0b4545de6d30998 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:06 +0100
Subject: [PATCH 2164/2340] drm/xe/uc: Store firmware binary in system-memory
 backed BO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The firmware loading for GuC is about to be moved, and will happen much
earlier in the probe process, when local-memory is not yet available.
While this has the potential to make the firmware loading process
slower, this is only happening during probe and full device reset.
Since both are not hot-paths - store all UC-like firmware in system
memory.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index f258eb44fe31c..9abae65c6b236 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -750,8 +750,6 @@ fail:
 int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 {
 	const struct firmware *fw = NULL;
-	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-	struct xe_tile *tile = gt_to_tile(gt);
 	int err;
 
 	err = uc_fw_request(uc_fw, &fw);
@@ -763,7 +761,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 		return 0;
 
 	err = uc_fw_copy(uc_fw, fw->data, fw->size,
-			 XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT);
+			 XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT);
 
 	uc_fw_release(fw);
 
-- 
GitLab


From 7704f32c93cff69d8d0e842638f30e4dc9d93b2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:07 +0100
Subject: [PATCH 2165/2340] drm/xe/uc: Extract xe_uc_sanitize_reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Earlier GuC load will require more fine-grained control over reset.
Extract it outside of xe_uc_init_hw.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c |  4 ++++
 drivers/gpu/drm/xe/xe_uc.c | 12 ++++++------
 drivers/gpu/drm/xe/xe_uc.h |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 4db94344bbde7..a9c71da985d30 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -542,6 +542,10 @@ static int do_gt_restart(struct xe_gt *gt)
 	for_each_hw_engine(hwe, gt, id)
 		xe_hw_engine_enable_ring(hwe);
 
+	err = xe_uc_sanitize_reset(&gt->uc);
+	if (err)
+		return err;
+
 	err = xe_uc_init_hw(&gt->uc);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 15dcd1f91e9c4..72a7b3c2577dd 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -80,6 +80,10 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc)
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
+	err = xe_uc_sanitize_reset(uc);
+	if (err)
+		return err;
+
 	err = xe_guc_init_post_hwconfig(&uc->guc);
 	if (err)
 		return err;
@@ -101,13 +105,13 @@ static int uc_reset(struct xe_uc *uc)
 	return 0;
 }
 
-void xe_uc_sanitize(struct xe_uc *uc)
+static void xe_uc_sanitize(struct xe_uc *uc)
 {
 	xe_huc_sanitize(&uc->huc);
 	xe_guc_sanitize(&uc->guc);
 }
 
-static int xe_uc_sanitize_reset(struct xe_uc *uc)
+int xe_uc_sanitize_reset(struct xe_uc *uc)
 {
 	xe_uc_sanitize(uc);
 
@@ -147,10 +151,6 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
-	ret = xe_uc_sanitize_reset(uc);
-	if (ret)
-		return ret;
-
 	ret = xe_huc_upload(&uc->huc);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 4109ae7028af5..5d5110c0c834b 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -19,6 +19,6 @@ void xe_uc_stop_prepare(struct xe_uc *uc);
 int xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
-void xe_uc_sanitize(struct xe_uc *uc);
+int xe_uc_sanitize_reset(struct xe_uc *uc);
 
 #endif
-- 
GitLab


From 4d637a1de2e4da212c1fee505a213a158d6bee1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 5 Dec 2023 02:33:08 +0100
Subject: [PATCH 2166/2340] drm/xe/guc: Split GuC params used for "hwconfig"
 and "post-hwconfig"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move params that are not used for initial "hwconfig" load to
"post-hwconfig" phase.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e6f680efb29ef..482cb0df9f15b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -182,6 +182,26 @@ static void guc_init_params(struct xe_guc *guc)
 	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
 	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
 
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = 0;
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = 0;
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+static void guc_init_params_post_hwconfig(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
+
 	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
 	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
 	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
@@ -279,6 +299,8 @@ out:
  */
 int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 {
+	guc_init_params_post_hwconfig(guc);
+
 	return xe_guc_ads_init_post_hwconfig(&guc->ads);
 }
 
-- 
GitLab


From ff765b7771d874efd3089f90a8944a958ab05874 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 4 Dec 2023 10:51:27 +0000
Subject: [PATCH 2167/2340] drm/xe: add some debug info for d3cold

From the CI logs we want to easily know if the machine is capable and
allowed to enter d3cold, and can therefore potentially trigger the
d3cold RPM suspend and resume path.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Cc: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 3 +++
 drivers/gpu/drm/xe/xe_pm.c  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b2e87746e8d87..b85193d1dcc23 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -778,6 +778,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	xe_pm_init(xe);
 
+	drm_dbg(&xe->drm, "d3cold: capable=%s\n",
+		str_yes_no(xe->d3cold.capable));
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index e31a91cf311c4..b429c2876a764 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -399,4 +399,7 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
 		xe->d3cold.allowed = false;
 
 	mutex_unlock(&xe->d3cold.lock);
+
+	drm_dbg(&xe->drm,
+		"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
 }
-- 
GitLab


From c4ad3710f51e8f0f2e169315e07e9e0c62dcded3 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 22 Nov 2023 14:38:20 +0000
Subject: [PATCH 2168/2340] drm/xe: Extend drm_xe_vm_bind_op
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bind api is extensible but for a single bind op, there
is not a mechanism to extend. Add extensions field to
struct drm_xe_vm_bind_op.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dominik Grzegorzek <dominik.grzegorzek@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1a844fa7af8a8..4c906ff2429e4 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -631,6 +631,9 @@ struct drm_xe_vm_destroy {
 };
 
 struct drm_xe_vm_bind_op {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
 	/**
 	 * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
 	 */
-- 
GitLab


From 6b8c1edc4f698d7e7e3cd5852bb5b20e93ab01b8 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:21 +0000
Subject: [PATCH 2169/2340] drm/xe/uapi: Separate bo_create placement from
 flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Although the flags are about the creation, the memory placement
of the BO deserves a proper dedicated field in the uapi.

Besides getting more clear, it also allows to remove the
'magic' shifts from the flags that was a concern during the
uapi reviews.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 14 +++++++-------
 include/uapi/drm/xe_drm.h  |  9 ++++++---
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 5e3493f21b59f..fd516ad7478c8 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1890,15 +1890,15 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
+	/* at least one valid memory placement must be specified */
+	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
+			 !args->placement))
+		return -EINVAL;
+
 	if (XE_IOCTL_DBG(xe, args->flags &
 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
-			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
-			   xe->info.mem_region_mask)))
-		return -EINVAL;
-
-	/* at least one memory type must be specified */
-	if (XE_IOCTL_DBG(xe, !(args->flags & xe->info.mem_region_mask)))
+			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, args->handle))
@@ -1920,7 +1920,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
 		bo_flags |= XE_BO_SCANOUT_BIT;
 
-	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
 
 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4c906ff2429e4..6edbcd81c1950 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -500,8 +500,11 @@ struct drm_xe_gem_create {
 	 */
 	__u64 size;
 
-#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING		(0x1 << 24)
-#define DRM_XE_GEM_CREATE_FLAG_SCANOUT			(0x1 << 25)
+	/** @placement: A mask of memory instances of where BO can be placed. */
+	__u32 placement;
+
+#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING		(1 << 0)
+#define DRM_XE_GEM_CREATE_FLAG_SCANOUT			(1 << 1)
 /*
  * When using VRAM as a possible placement, ensure that the corresponding VRAM
  * allocation will always use the CPU accessible part of VRAM. This is important
@@ -517,7 +520,7 @@ struct drm_xe_gem_create {
  * display surfaces, therefore the kernel requires setting this flag for such
  * objects, otherwise an error is thrown on small-bar systems.
  */
-#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(0x1 << 26)
+#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(1 << 2)
 	/**
 	 * @flags: Flags, currently a mask of memory instances of where BO can
 	 * be placed
-- 
GitLab


From 2bec30715435824c2ea03714038f0ee7a4b5c698 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Wed, 22 Nov 2023 14:38:22 +0000
Subject: [PATCH 2170/2340] drm/xe: Make DRM_XE_DEVICE_QUERY_ENGINES future
 proof
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have at least 2 future features(OA and future media engines
capabilities) that will require Xe to provide more information about
engines to UMDs.

But this information should not just be added to
drm_xe_engine_class_instance for a couple of reasons:
- drm_xe_engine_class_instance is used as input to other structs/uAPIs
and those uAPIs don't care about any of these future new engine fields
- those new fields are useless information after initialization for
some UMDs, so it should not need to carry that around

So here my proposal is to make DRM_XE_DEVICE_QUERY_ENGINES return an
array of drm_xe_query_engine_info that contain
drm_xe_engine_class_instance and 3 u64s to be used for future features.

Reference OA:
https://patchwork.freedesktop.org/patch/558362/?series=121084&rev=6

v2: Reduce reserved[] to 3 u64 (Matthew Brost)

Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo Rebased]
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 15 ++++++++-------
 include/uapi/drm/xe_drm.h     | 27 +++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 61a7d92b7e886..0cbfeaeb1330c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -53,7 +53,7 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe)
 			i++;
 		}
 
-	return i * sizeof(struct drm_xe_engine_class_instance);
+	return i * sizeof(struct drm_xe_query_engine_info);
 }
 
 typedef u64 (*__ktime_func_t)(void);
@@ -186,9 +186,9 @@ static int query_engines(struct xe_device *xe,
 			 struct drm_xe_device_query *query)
 {
 	size_t size = calc_hw_engine_info_size(xe);
-	struct drm_xe_engine_class_instance __user *query_ptr =
+	struct drm_xe_query_engine_info __user *query_ptr =
 		u64_to_user_ptr(query->data);
-	struct drm_xe_engine_class_instance *hw_engine_info;
+	struct drm_xe_query_engine_info *hw_engine_info;
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	struct xe_gt *gt;
@@ -211,12 +211,13 @@ static int query_engines(struct xe_device *xe,
 			if (xe_hw_engine_is_reserved(hwe))
 				continue;
 
-			hw_engine_info[i].engine_class =
+			hw_engine_info[i].instance.engine_class =
 				xe_to_user_engine_class[hwe->class];
-			hw_engine_info[i].engine_instance =
+			hw_engine_info[i].instance.engine_instance =
 				hwe->logical_instance;
-			hw_engine_info[i].gt_id = gt->info.id;
-			hw_engine_info[i].pad = 0;
+			hw_engine_info[i].instance.gt_id = gt->info.id;
+			hw_engine_info[i].instance.pad = 0;
+			memset(hw_engine_info->reserved, 0, sizeof(hw_engine_info->reserved));
 
 			i++;
 		}
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 6edbcd81c1950..dc657ae9db182 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -124,7 +124,13 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 
-/** struct drm_xe_engine_class_instance - instance of an engine class */
+/**
+ * struct drm_xe_engine_class_instance - instance of an engine class
+ *
+ * It is returned as part of the @drm_xe_query_engine_info, but it also is
+ * used as the input of engine selection for both @drm_xe_exec_queue_create
+ * and @drm_xe_query_engine_cycles
+ */
 struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_RENDER		0
 #define DRM_XE_ENGINE_CLASS_COPY		1
@@ -137,14 +143,31 @@ struct drm_xe_engine_class_instance {
 	 */
 #define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC	5
 #define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC	6
+	/** @engine_class: engine class id */
 	__u16 engine_class;
-
+	/** @engine_instance: engine instance id */
 	__u16 engine_instance;
+	/** @gt_id: Unique ID of this GT within the PCI Device */
 	__u16 gt_id;
 	/** @pad: MBZ */
 	__u16 pad;
 };
 
+/**
+ * struct drm_xe_query_engine_info - describe hardware engine
+ *
+ * If a query is made with a struct @drm_xe_device_query where .query
+ * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of
+ * struct @drm_xe_query_engine_info in .data.
+ */
+struct drm_xe_query_engine_info {
+	/** @instance: The @drm_xe_engine_class_instance */
+	struct drm_xe_engine_class_instance instance;
+
+	/** @reserved: Reserved */
+	__u64 reserved[3];
+};
+
 /**
  * enum drm_xe_memory_class - Supported memory classes.
  */
-- 
GitLab


From 4e03b584143e18eabd091061a1716515da928dcb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mauro.chehab@linux.intel.com>
Date: Wed, 22 Nov 2023 14:38:23 +0000
Subject: [PATCH 2171/2340] drm/xe/uapi: Reject bo creation of unaligned size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For xe bo creation we request passing size which matches system or
vram minimum page alignment. This way we want to ensure userspace
is aware of region constraints and not aligned allocations will be
rejected returning EINVAL.

v2:
- Rebase, Update uAPI documentation. (Thomas)
v3:
- Adjust the dma-buf kunit test accordingly. (Thomas)
v4:
- Fixed rebase conflicts and updated commit message. (Francois)

Signed-off-by: Mauro Carvalho Chehab <mauro.chehab@linux.intel.com>
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_dma_buf.c | 10 ++++++++--
 drivers/gpu/drm/xe/xe_bo.c            | 26 +++++++++++++++++---------
 include/uapi/drm/xe_drm.h             | 17 +++++++++--------
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 81f12422a5873..bb6f6424e06f6 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -109,15 +109,21 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 	struct drm_gem_object *import;
 	struct dma_buf *dmabuf;
 	struct xe_bo *bo;
+	size_t size;
 
 	/* No VRAM on this device? */
 	if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
 	    (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
 		return;
 
+	size = PAGE_SIZE;
+	if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		size = SZ_64K;
+
 	kunit_info(test, "running %s\n", __func__);
-	bo = xe_bo_create_user(xe, NULL, NULL, PAGE_SIZE, DRM_XE_GEM_CPU_CACHING_WC,
-			       ttm_bo_type_device, params->mem_mask);
+	bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask);
 	if (IS_ERR(bo)) {
 		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
 			   PTR_ERR(bo));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index fd516ad7478c8..0bd1b3581945a 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1222,6 +1222,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	};
 	struct ttm_placement *placement;
 	uint32_t alignment;
+	size_t aligned_size;
 	int err;
 
 	/* Only kernel objects should set GT */
@@ -1232,23 +1233,30 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (!bo) {
-		bo = xe_bo_alloc();
-		if (IS_ERR(bo))
-			return bo;
-	}
-
 	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
-		size = ALIGN(size, SZ_64K);
+		aligned_size = ALIGN(size, SZ_64K);
+		if (type != ttm_bo_type_device)
+			size = ALIGN(size, SZ_64K);
 		flags |= XE_BO_INTERNAL_64K;
 		alignment = SZ_64K >> PAGE_SHIFT;
+
 	} else {
-		size = ALIGN(size, PAGE_SIZE);
+		aligned_size = ALIGN(size, SZ_4K);
+		flags &= ~XE_BO_INTERNAL_64K;
 		alignment = SZ_4K >> PAGE_SHIFT;
 	}
 
+	if (type == ttm_bo_type_device && aligned_size != size)
+		return ERR_PTR(-EINVAL);
+
+	if (!bo) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+	}
+
 	bo->tile = tile;
 	bo->size = size;
 	bo->flags = flags;
@@ -1566,7 +1574,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 					     const void *data, size_t size, u32 flags)
 {
-	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, size, flags);
+	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
 
 	if (IS_ERR(bo))
 		return bo;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index dc657ae9db182..d7918f6e760fa 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -207,11 +207,13 @@ struct drm_xe_query_mem_region {
 	 *
 	 * When the kernel allocates memory for this region, the
 	 * underlying pages will be at least @min_page_size in size.
-	 *
-	 * Important note: When userspace allocates a GTT address which
-	 * can point to memory allocated from this region, it must also
-	 * respect this minimum alignment. This is enforced by the
-	 * kernel.
+	 * Buffer objects with an allowable placement in this region must be
+	 * created with a size aligned to this value.
+	 * GPU virtual address mappings of (parts of) buffer objects that
+	 * may be placed in this region must also have their GPU virtual
+	 * address and range aligned to this value.
+	 * Affected IOCTLS will return %-EINVAL if alignment restrictions are
+	 * not met.
 	 */
 	__u32 min_page_size;
 	/**
@@ -517,9 +519,8 @@ struct drm_xe_gem_create {
 	__u64 extensions;
 
 	/**
-	 * @size: Requested size for the object
-	 *
-	 * The (page-aligned) allocated size for the object will be returned.
+	 * @size: Size of the object to be created, must match region
+	 * (system or vram) minimum alignment (&min_page_size).
 	 */
 	__u64 size;
 
-- 
GitLab


From 4bc9dd98e0a7e8a14386fc8341379ee09e594987 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 22 Nov 2023 14:38:24 +0000
Subject: [PATCH 2172/2340] drm/xe/uapi: Align on a common way to return arrays
 (memory regions)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The uAPI provides queries which return arrays of elements. As of now
the format used in the struct is different depending on which element
is queried. Fix this for memory regions by applying the pattern below:

    struct drm_xe_query_Xs {
       __u32 num_Xs;
       struct drm_xe_X Xs[];
       ...
    }

This removes "query" in the name of struct drm_xe_query_mem_region
as it is not returned from the query IOCTL. There is no functional
change.

v2: Only rename drm_xe_query_mem_region to drm_xe_mem_region
    (José Roberto de Souza)

v3: Rename usage to mem_regions in xe_query.c (José Roberto de Souza)

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 46 ++++++++++++++++++-----------------
 include/uapi/drm/xe_drm.h     | 12 ++++-----
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 0cbfeaeb1330c..34474f8b97f60 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -240,14 +240,14 @@ static size_t calc_mem_regions_size(struct xe_device *xe)
 		if (ttm_manager_type(&xe->ttm, i))
 			num_managers++;
 
-	return offsetof(struct drm_xe_query_mem_regions, regions[num_managers]);
+	return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]);
 }
 
 static int query_mem_regions(struct xe_device *xe,
-			     struct drm_xe_device_query *query)
+			    struct drm_xe_device_query *query)
 {
 	size_t size = calc_mem_regions_size(xe);
-	struct drm_xe_query_mem_regions *usage;
+	struct drm_xe_query_mem_regions *mem_regions;
 	struct drm_xe_query_mem_regions __user *query_ptr =
 		u64_to_user_ptr(query->data);
 	struct ttm_resource_manager *man;
@@ -260,50 +260,52 @@ static int query_mem_regions(struct xe_device *xe,
 		return -EINVAL;
 	}
 
-	usage = kzalloc(size, GFP_KERNEL);
-	if (XE_IOCTL_DBG(xe, !usage))
+	mem_regions = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_DBG(xe, !mem_regions))
 		return -ENOMEM;
 
 	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
-	usage->regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
-	usage->regions[0].instance = 0;
-	usage->regions[0].min_page_size = PAGE_SIZE;
-	usage->regions[0].total_size = man->size << PAGE_SHIFT;
+	mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
+	mem_regions->mem_regions[0].instance = 0;
+	mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
+	mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
 	if (perfmon_capable())
-		usage->regions[0].used = ttm_resource_manager_usage(man);
-	usage->num_regions = 1;
+		mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+	mem_regions->num_mem_regions = 1;
 
 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
 		man = ttm_manager_type(&xe->ttm, i);
 		if (man) {
-			usage->regions[usage->num_regions].mem_class =
+			mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class =
 				DRM_XE_MEM_REGION_CLASS_VRAM;
-			usage->regions[usage->num_regions].instance =
-				usage->num_regions;
-			usage->regions[usage->num_regions].min_page_size =
+			mem_regions->mem_regions[mem_regions->num_mem_regions].instance =
+				mem_regions->num_mem_regions;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size =
 				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
 				SZ_64K : PAGE_SIZE;
-			usage->regions[usage->num_regions].total_size =
+			mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
 				man->size;
 
 			if (perfmon_capable()) {
 				xe_ttm_vram_get_used(man,
-						     &usage->regions[usage->num_regions].used,
-						     &usage->regions[usage->num_regions].cpu_visible_used);
+					&mem_regions->mem_regions
+					[mem_regions->num_mem_regions].used,
+					&mem_regions->mem_regions
+					[mem_regions->num_mem_regions].cpu_visible_used);
 			}
 
-			usage->regions[usage->num_regions].cpu_visible_size =
+			mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
 				xe_ttm_vram_get_cpu_visible_size(man);
-			usage->num_regions++;
+			mem_regions->num_mem_regions++;
 		}
 	}
 
-	if (!copy_to_user(query_ptr, usage, size))
+	if (!copy_to_user(query_ptr, mem_regions, size))
 		ret = 0;
 	else
 		ret = -ENOSPC;
 
-	kfree(usage);
+	kfree(mem_regions);
 	return ret;
 }
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index d7918f6e760fa..863963168dc35 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -183,10 +183,10 @@ enum drm_xe_memory_class {
 };
 
 /**
- * struct drm_xe_query_mem_region - Describes some region as known to
+ * struct drm_xe_mem_region - Describes some region as known to
  * the driver.
  */
-struct drm_xe_query_mem_region {
+struct drm_xe_mem_region {
 	/**
 	 * @mem_class: The memory class describing this region.
 	 *
@@ -323,12 +323,12 @@ struct drm_xe_query_engine_cycles {
  * struct drm_xe_query_mem_regions in .data.
  */
 struct drm_xe_query_mem_regions {
-	/** @num_regions: number of memory regions returned in @regions */
-	__u32 num_regions;
+	/** @num_mem_regions: number of memory regions returned in @mem_regions */
+	__u32 num_mem_regions;
 	/** @pad: MBZ */
 	__u32 pad;
-	/** @regions: The returned regions for this device */
-	struct drm_xe_query_mem_region regions[];
+	/** @mem_regions: The returned memory regions for this device */
+	struct drm_xe_mem_region mem_regions[];
 };
 
 /**
-- 
GitLab


From 71c625aa770d4bd2b0901a9da3820fb89636e1a1 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 22 Nov 2023 14:38:25 +0000
Subject: [PATCH 2173/2340] drm/xe/uapi: Align on a common way to return arrays
 (gt)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The uAPI provides queries which return arrays of elements. As of now
the format used in the struct is different depending on which element
is queried. However, aligning on the new common pattern:

    struct drm_xe_query_Xs {
       __u32 num_Xs;
       struct drm_xe_X Xs[];
       ...
    }

... would mean bringing back the name "gts" which is avoided per commit
fca54ba12470 ("drm/xe/uapi: Rename gts to gt_list") so make an exception
for gt and leave gt_list. Also, this change removes "query" in the
name of struct drm_xe_query_gt as it is not returned from the query
IOCTL. There is no functional change.

v2: Leave gt_list (Matt Roper)

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 2 +-
 include/uapi/drm/xe_drm.h     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 34474f8b97f60..a0e3b0c163f9c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -354,7 +354,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 {
 	struct xe_gt *gt;
 	size_t size = sizeof(struct drm_xe_query_gt_list) +
-		xe->info.gt_count * sizeof(struct drm_xe_query_gt);
+		xe->info.gt_count * sizeof(struct drm_xe_gt);
 	struct drm_xe_query_gt_list __user *query_ptr =
 		u64_to_user_ptr(query->data);
 	struct drm_xe_query_gt_list *gt_list;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 863963168dc35..a8ae845d0c741 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -357,14 +357,14 @@ struct drm_xe_query_config {
 };
 
 /**
- * struct drm_xe_query_gt - describe an individual GT.
+ * struct drm_xe_gt - describe an individual GT.
  *
  * To be used with drm_xe_query_gt_list, which will return a list with all the
  * existing GT individual descriptions.
  * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for
  * implementing graphics and/or media operations.
  */
-struct drm_xe_query_gt {
+struct drm_xe_gt {
 #define DRM_XE_QUERY_GT_TYPE_MAIN		0
 #define DRM_XE_QUERY_GT_TYPE_MEDIA		1
 	/** @type: GT type: Main or Media */
@@ -404,7 +404,7 @@ struct drm_xe_query_gt_list {
 	/** @pad: MBZ */
 	__u32 pad;
 	/** @gt_list: The GT list returned for this device */
-	struct drm_xe_query_gt gt_list[];
+	struct drm_xe_gt gt_list[];
 };
 
 /**
-- 
GitLab


From 60a6a849fcb338b8a3f3d1ec9ec50c002add925a Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Wed, 22 Nov 2023 14:38:26 +0000
Subject: [PATCH 2174/2340] drm/xe/uapi: Align on a common way to return arrays
 (engines)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The uAPI provides queries which return arrays of elements. As of now
the format used in the struct is different depending on which element
is queried. Fix this for engines by applying the pattern below:

        struct drm_xe_query_Xs {
           __u32 num_Xs;
           struct drm_xe_X Xs[];
           ...
        }

Instead of directly returning an array of struct
drm_xe_query_engine_info, a new struct drm_xe_query_engines is
introduced. It contains itself an array of struct drm_xe_engine
which holds the information about each engine.

v2: Use plural for struct drm_xe_query_engines as multiple engines
    are returned (José Roberto de Souza)

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 31 ++++++++------
 include/uapi/drm/xe_drm.h     | 78 +++++++++++++++++++++--------------
 2 files changed, 65 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index a0e3b0c163f9c..ad9f23e439204 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -53,7 +53,8 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe)
 			i++;
 		}
 
-	return i * sizeof(struct drm_xe_query_engine_info);
+	return sizeof(struct drm_xe_query_engines) +
+		i * sizeof(struct drm_xe_engine);
 }
 
 typedef u64 (*__ktime_func_t)(void);
@@ -186,9 +187,9 @@ static int query_engines(struct xe_device *xe,
 			 struct drm_xe_device_query *query)
 {
 	size_t size = calc_hw_engine_info_size(xe);
-	struct drm_xe_query_engine_info __user *query_ptr =
+	struct drm_xe_query_engines __user *query_ptr =
 		u64_to_user_ptr(query->data);
-	struct drm_xe_query_engine_info *hw_engine_info;
+	struct drm_xe_query_engines *engines;
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	struct xe_gt *gt;
@@ -202,8 +203,8 @@ static int query_engines(struct xe_device *xe,
 		return -EINVAL;
 	}
 
-	hw_engine_info = kmalloc(size, GFP_KERNEL);
-	if (!hw_engine_info)
+	engines = kmalloc(size, GFP_KERNEL);
+	if (!engines)
 		return -ENOMEM;
 
 	for_each_gt(gt, xe, gt_id)
@@ -211,22 +212,26 @@ static int query_engines(struct xe_device *xe,
 			if (xe_hw_engine_is_reserved(hwe))
 				continue;
 
-			hw_engine_info[i].instance.engine_class =
+			engines->engines[i].instance.engine_class =
 				xe_to_user_engine_class[hwe->class];
-			hw_engine_info[i].instance.engine_instance =
+			engines->engines[i].instance.engine_instance =
 				hwe->logical_instance;
-			hw_engine_info[i].instance.gt_id = gt->info.id;
-			hw_engine_info[i].instance.pad = 0;
-			memset(hw_engine_info->reserved, 0, sizeof(hw_engine_info->reserved));
+			engines->engines[i].instance.gt_id = gt->info.id;
+			engines->engines[i].instance.pad = 0;
+			memset(engines->engines[i].reserved, 0,
+			       sizeof(engines->engines[i].reserved));
 
 			i++;
 		}
 
-	if (copy_to_user(query_ptr, hw_engine_info, size)) {
-		kfree(hw_engine_info);
+	engines->pad = 0;
+	engines->num_engines = i;
+
+	if (copy_to_user(query_ptr, engines, size)) {
+		kfree(engines);
 		return -EFAULT;
 	}
-	kfree(hw_engine_info);
+	kfree(engines);
 
 	return 0;
 }
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index a8ae845d0c741..2e58ddcf92f5e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -127,9 +127,9 @@ struct xe_user_extension {
 /**
  * struct drm_xe_engine_class_instance - instance of an engine class
  *
- * It is returned as part of the @drm_xe_query_engine_info, but it also is
- * used as the input of engine selection for both @drm_xe_exec_queue_create
- * and @drm_xe_query_engine_cycles
+ * It is returned as part of the @drm_xe_engine, but it also is used as
+ * the input of engine selection for both @drm_xe_exec_queue_create and
+ * @drm_xe_query_engine_cycles
  */
 struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_RENDER		0
@@ -154,13 +154,9 @@ struct drm_xe_engine_class_instance {
 };
 
 /**
- * struct drm_xe_query_engine_info - describe hardware engine
- *
- * If a query is made with a struct @drm_xe_device_query where .query
- * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of
- * struct @drm_xe_query_engine_info in .data.
+ * struct drm_xe_engine - describe hardware engine
  */
-struct drm_xe_query_engine_info {
+struct drm_xe_engine {
 	/** @instance: The @drm_xe_engine_class_instance */
 	struct drm_xe_engine_class_instance instance;
 
@@ -168,6 +164,22 @@ struct drm_xe_query_engine_info {
 	__u64 reserved[3];
 };
 
+/**
+ * struct drm_xe_query_engines - describe engines
+ *
+ * If a query is made with a struct @drm_xe_device_query where .query
+ * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of
+ * struct @drm_xe_query_engines in .data.
+ */
+struct drm_xe_query_engines {
+	/** @num_engines: number of engines returned in @engines */
+	__u32 num_engines;
+	/** @pad: MBZ */
+	__u32 pad;
+	/** @engines: The returned engines for this device */
+	struct drm_xe_engine engines[];
+};
+
 /**
  * enum drm_xe_memory_class - Supported memory classes.
  */
@@ -467,28 +479,32 @@ struct drm_xe_query_topology_mask {
  *
  * .. code-block:: C
  *
- *	struct drm_xe_engine_class_instance *hwe;
- *	struct drm_xe_device_query query = {
- *		.extensions = 0,
- *		.query = DRM_XE_DEVICE_QUERY_ENGINES,
- *		.size = 0,
- *		.data = 0,
- *	};
- *	ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
- *	hwe = malloc(query.size);
- *	query.data = (uintptr_t)hwe;
- *	ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
- *	int num_engines = query.size / sizeof(*hwe);
- *	for (int i = 0; i < num_engines; i++) {
- *		printf("Engine %d: %s\n", i,
- *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_RENDER ? "RENDER":
- *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_COPY ? "COPY":
- *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE":
- *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE":
- *			hwe[i].engine_class == DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE":
- *			"UNKNOWN");
- *	}
- *	free(hwe);
+ *     struct drm_xe_query_engines *engines;
+ *     struct drm_xe_device_query query = {
+ *         .extensions = 0,
+ *         .query = DRM_XE_DEVICE_QUERY_ENGINES,
+ *         .size = 0,
+ *         .data = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ *     engines = malloc(query.size);
+ *     query.data = (uintptr_t)engines;
+ *     ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ *     for (int i = 0; i < engines->num_engines; i++) {
+ *         printf("Engine %d: %s\n", i,
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_RENDER ? "RENDER":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_COPY ? "COPY":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE":
+ *             "UNKNOWN");
+ *     }
+ *     free(engines);
  */
 struct drm_xe_device_query {
 	/** @extensions: Pointer to the first extension struct, if any */
-- 
GitLab


From 37d078e51b4cba30f90667a2b35e16725d649956 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:27 +0000
Subject: [PATCH 2175/2340] drm/xe/uapi: Split xe_sync types from flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's continue on the uapi clean-up with more splits
with stuff into their own exclusive fields instead of
reusing stuff.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_sync.c       | 23 +++++++----------------
 drivers/gpu/drm/xe/xe_sync_types.h |  1 +
 include/uapi/drm/xe_drm.h          | 16 ++++++++--------
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index ea96ba4b41da4..936227e794835 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -17,8 +17,6 @@
 #include "xe_macros.h"
 #include "xe_sched_job_types.h"
 
-#define SYNC_FLAGS_TYPE_MASK 0x3
-
 struct user_fence {
 	struct xe_device *xe;
 	struct kref refcount;
@@ -109,15 +107,13 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
 		return -EFAULT;
 
-	if (XE_IOCTL_DBG(xe, sync_in.flags &
-			 ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_FLAG_SIGNAL)) ||
-	    XE_IOCTL_DBG(xe, sync_in.pad) ||
+	if (XE_IOCTL_DBG(xe, sync_in.flags & ~DRM_XE_SYNC_FLAG_SIGNAL) ||
 	    XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1]))
 		return -EINVAL;
 
 	signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL;
-	switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
-	case DRM_XE_SYNC_FLAG_SYNCOBJ:
+	switch (sync_in.type) {
+	case DRM_XE_SYNC_TYPE_SYNCOBJ:
 		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
 			return -EOPNOTSUPP;
 
@@ -135,7 +131,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		}
 		break;
 
-	case DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ:
+	case DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ:
 		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
 			return -EOPNOTSUPP;
 
@@ -165,12 +161,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		}
 		break;
 
-	case DRM_XE_SYNC_FLAG_DMA_BUF:
-		if (XE_IOCTL_DBG(xe, "TODO"))
-			return -EINVAL;
-		break;
-
-	case DRM_XE_SYNC_FLAG_USER_FENCE:
+	case DRM_XE_SYNC_TYPE_USER_FENCE:
 		if (XE_IOCTL_DBG(xe, !signal))
 			return -EOPNOTSUPP;
 
@@ -192,6 +183,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		return -EINVAL;
 	}
 
+	sync->type = sync_in.type;
 	sync->flags = sync_in.flags;
 	sync->timeline_value = sync_in.timeline_value;
 
@@ -252,8 +244,7 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
 			user_fence_put(sync->ufence);
 			dma_fence_put(fence);
 		}
-	} else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) ==
-		   DRM_XE_SYNC_FLAG_USER_FENCE) {
+	} else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) {
 		job->user_fence.used = true;
 		job->user_fence.addr = sync->addr;
 		job->user_fence.value = sync->timeline_value;
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
index 24fccc26cb530..852db5e7884fc 100644
--- a/drivers/gpu/drm/xe/xe_sync_types.h
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -21,6 +21,7 @@ struct xe_sync_entry {
 	struct user_fence *ufence;
 	u64 addr;
 	u64 timeline_value;
+	u32 type;
 	u32 flags;
 };
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 2e58ddcf92f5e..978fca7bb235f 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -947,16 +947,16 @@ struct drm_xe_sync {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-#define DRM_XE_SYNC_FLAG_SYNCOBJ		0x0
-#define DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ	0x1
-#define DRM_XE_SYNC_FLAG_DMA_BUF		0x2
-#define DRM_XE_SYNC_FLAG_USER_FENCE		0x3
-#define DRM_XE_SYNC_FLAG_SIGNAL		0x10
+#define DRM_XE_SYNC_TYPE_SYNCOBJ		0x0
+#define DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ	0x1
+#define DRM_XE_SYNC_TYPE_USER_FENCE		0x2
+	/** @type: Type of the this sync object */
+	__u32 type;
+
+#define DRM_XE_SYNC_FLAG_SIGNAL	(1 << 0)
+	/** @flags: Sync Flags */
 	__u32 flags;
 
-	/** @pad: MBZ */
-	__u32 pad;
-
 	union {
 		__u32 handle;
 
-- 
GitLab


From cad4a0d6af146e14a82a0f7d43613450dc56ff80 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:28 +0000
Subject: [PATCH 2176/2340] drm/xe/uapi: Kill tile_mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is currently unused, so by the rules it cannot go upstream.
Also there was the desire to convert that to align with the
engine_class_instance selection, but the consensus on that one
is to remain with the global gt_id. So we are keeping the gt_id
there, not converting to a generic sched_group and also killing
this tile_mask and only using the default behavior of 0 that is
to create a mapping / page_table entry on every tile, similar
to what i915.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 40 +++++++-------------------------
 drivers/gpu/drm/xe/xe_vm_types.h |  2 --
 include/uapi/drm/xe_drm.h        |  8 +------
 3 files changed, 10 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a97a310123fcb..ff22eddc25785 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -870,7 +870,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    u64 start, u64 end,
 				    bool read_only,
 				    bool is_null,
-				    u8 tile_mask,
 				    u16 pat_index)
 {
 	struct xe_vma *vma;
@@ -903,12 +902,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	if (is_null)
 		vma->gpuva.flags |= DRM_GPUVA_SPARSE;
 
-	if (tile_mask) {
-		vma->tile_mask = tile_mask;
-	} else {
-		for_each_tile(tile, vm->xe, id)
-			vma->tile_mask |= 0x1 << id;
-	}
+	for_each_tile(tile, vm->xe, id)
+		vma->tile_mask |= 0x1 << id;
 
 	if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
@@ -2166,7 +2161,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
 static struct drm_gpuva_ops *
 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 			 u64 bo_offset_or_userptr, u64 addr, u64 range,
-			 u32 operation, u32 flags, u8 tile_mask,
+			 u32 operation, u32 flags,
 			 u32 prefetch_region, u16 pat_index)
 {
 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
@@ -2229,7 +2224,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
-		op->tile_mask = tile_mask;
 		if (__op->op == DRM_GPUVA_OP_MAP) {
 			op->map.immediate =
 				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
@@ -2248,8 +2242,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 }
 
 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
-			      u8 tile_mask, bool read_only, bool is_null,
-			      u16 pat_index)
+			      bool read_only, bool is_null, u16 pat_index)
 {
 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
 	struct xe_vma *vma;
@@ -2265,7 +2258,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 	vma = xe_vma_create(vm, bo, op->gem.offset,
 			    op->va.addr, op->va.addr +
 			    op->va.range - 1, read_only, is_null,
-			    tile_mask, pat_index);
+			    pat_index);
 	if (bo)
 		xe_bo_unlock(bo);
 
@@ -2409,8 +2402,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		{
 			struct xe_vma *vma;
 
-			vma = new_vma(vm, &op->base.map,
-				      op->tile_mask, op->map.read_only,
+			vma = new_vma(vm, &op->base.map, op->map.read_only,
 				      op->map.is_null, op->map.pat_index);
 			if (IS_ERR(vma))
 				return PTR_ERR(vma);
@@ -2435,8 +2427,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 					op->base.remap.unmap->va->flags &
 					DRM_GPUVA_SPARSE;
 
-				vma = new_vma(vm, op->base.remap.prev,
-					      op->tile_mask, read_only,
+				vma = new_vma(vm, op->base.remap.prev, read_only,
 					      is_null, old->pat_index);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
@@ -2469,8 +2460,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 					op->base.remap.unmap->va->flags &
 					DRM_GPUVA_SPARSE;
 
-				vma = new_vma(vm, op->base.remap.next,
-					      op->tile_mask, read_only,
+				vma = new_vma(vm, op->base.remap.next, read_only,
 					      is_null, old->pat_index);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
@@ -3024,16 +3014,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			err = -EINVAL;
 			goto release_vm_lock;
 		}
-
-		if (bind_ops[i].tile_mask) {
-			u64 valid_tiles = BIT(xe->info.tile_count) - 1;
-
-			if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask &
-					 ~valid_tiles)) {
-				err = -EINVAL;
-				goto release_vm_lock;
-			}
-		}
 	}
 
 	bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
@@ -3126,14 +3106,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u32 op = bind_ops[i].op;
 		u32 flags = bind_ops[i].flags;
 		u64 obj_offset = bind_ops[i].obj_offset;
-		u8 tile_mask = bind_ops[i].tile_mask;
 		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
 		u16 pat_index = bind_ops[i].pat_index;
 
 		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
 						  addr, range, op, flags,
-						  tile_mask, prefetch_region,
-						  pat_index);
+						  prefetch_region, pat_index);
 		if (IS_ERR(ops[i])) {
 			err = PTR_ERR(ops[i]);
 			ops[i] = NULL;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 74cdf16a42adb..e70ec6b2fabed 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -400,8 +400,6 @@ struct xe_vma_op {
 	u32 num_syncs;
 	/** @link: async operation link */
 	struct list_head link;
-	/** @tile_mask: gt mask for this operation */
-	u8 tile_mask;
 	/** @flags: operation flags */
 	enum xe_vma_op_flags flags;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 978fca7bb235f..77d54926e18f8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -750,12 +750,6 @@ struct drm_xe_vm_bind_op {
 	/** @addr: Address to operate on, MBZ for UNMAP_ALL */
 	__u64 addr;
 
-	/**
-	 * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
-	 * only applies to creating new VMAs
-	 */
-	__u64 tile_mask;
-
 #define DRM_XE_VM_BIND_OP_MAP		0x0
 #define DRM_XE_VM_BIND_OP_UNMAP		0x1
 #define DRM_XE_VM_BIND_OP_MAP_USERPTR	0x2
@@ -790,7 +784,7 @@ struct drm_xe_vm_bind_op {
 	__u32 prefetch_mem_region_instance;
 
 	/** @reserved: Reserved */
-	__u64 reserved[2];
+	__u64 reserved[3];
 };
 
 struct drm_xe_vm_bind {
-- 
GitLab


From 4016d6bf368c4894c834e0652aecd93f7d2a2fab Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:29 +0000
Subject: [PATCH 2177/2340] drm/xe/uapi: Crystal Reference Clock updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First of all, let's remove the duplication.
But also, let's rename it to remove the word 'frequency'
out of it. In general, the first thing people think of frequency
is the frequency in which the GTs are operating to execute the
GPU instructions.

While this frequency here is a crystal reference clock frequency
which is the base of everything else, and in this case of this
uAPI it is used to calculate a better and precise timestamp.

v2: (Suggested by Jose) Remove the engine_cs and keep the GT info one
since it might be useful for other SRIOV cases where the engine_cs
will be zeroed. So, grabbing from the GT_LIST should be cleaner.

v3: Keep comment on put_user() call (José Roberto de Souza)

Cc: Matt Roper <matthew.d.roper@intel.com>
Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Cc: Jose Souza <jose.souza@intel.com>

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_clock.c |  4 ++--
 drivers/gpu/drm/xe/xe_gt_types.h |  4 ++--
 drivers/gpu/drm/xe/xe_query.c    |  7 +------
 include/uapi/drm/xe_drm.h        | 11 ++++-------
 4 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 25a18eaad9c4b..937054e31d72f 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -75,11 +75,11 @@ int xe_gt_clock_init(struct xe_gt *gt)
 		freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
 	}
 
-	gt->info.clock_freq = freq;
+	gt->info.reference_clock = freq;
 	return 0;
 }
 
 u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count)
 {
-	return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.clock_freq);
+	return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a96ee7d028aa0..a7263738308ec 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -107,8 +107,8 @@ struct xe_gt {
 		enum xe_gt_type type;
 		/** @id: Unique ID of this GT within the PCI Device */
 		u8 id;
-		/** @clock_freq: clock frequency */
-		u32 clock_freq;
+		/** @reference_clock: clock frequency */
+		u32 reference_clock;
 		/** @engine_mask: mask of engines present on GT */
 		u64 engine_mask;
 		/**
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index ad9f23e439204..3316eab118b1d 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -147,8 +147,6 @@ query_engine_cycles(struct xe_device *xe,
 	if (!hwe)
 		return -EINVAL;
 
-	resp.engine_frequency = gt->info.clock_freq;
-
 	xe_device_mem_access_get(xe);
 	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
@@ -165,9 +163,6 @@ query_engine_cycles(struct xe_device *xe,
 	resp.width = 36;
 
 	/* Only write to the output fields of user query */
-	if (put_user(resp.engine_frequency, &query_ptr->engine_frequency))
-		return -EFAULT;
-
 	if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
 		return -EFAULT;
 
@@ -383,7 +378,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 		else
 			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
 		gt_list->gt_list[id].gt_id = gt->info.id;
-		gt_list->gt_list[id].clock_freq = gt->info.clock_freq;
+		gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
 		if (!IS_DGFX(xe))
 			gt_list->gt_list[id].near_mem_regions = 0x1;
 		else
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 77d54926e18f8..df3e6fcf9b8b7 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -281,8 +281,8 @@ struct drm_xe_mem_region {
  * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
  * .data points to this allocated structure.
  *
- * The query returns the engine cycles and the frequency that can
- * be used to calculate the engine timestamp. In addition the
+ * The query returns the engine cycles, which along with GT's @reference_clock,
+ * can be used to calculate the engine timestamp. In addition the
  * query returns a set of cpu timestamps that indicate when the command
  * streamer cycle count was captured.
  */
@@ -310,9 +310,6 @@ struct drm_xe_query_engine_cycles {
 	 */
 	__u64 engine_cycles;
 
-	/** @engine_frequency: Frequency of the engine cycles in Hz. */
-	__u64 engine_frequency;
-
 	/**
 	 * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
 	 * reading the engine_cycles register using the reference clockid set by the
@@ -383,8 +380,8 @@ struct drm_xe_gt {
 	__u16 type;
 	/** @gt_id: Unique ID of this GT within the PCI Device */
 	__u16 gt_id;
-	/** @clock_freq: A clock frequency for timestamp */
-	__u32 clock_freq;
+	/** @reference_clock: A clock frequency for timestamp */
+	__u32 reference_clock;
 	/**
 	 * @near_mem_regions: Bit mask of instances from
 	 * drm_xe_query_mem_regions that are nearest to the current engines
-- 
GitLab


From c3fca1077b9a19e679ec59ff2d2c5f4069e375ae Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:31 +0000
Subject: [PATCH 2178/2340] drm/xe/uapi: Add Tile ID information to the GT info
 query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As an information only. So Userspace can use this information
and be able to correlate different GTs.

Make API symmetric between Engine and GT info.

There's no need right now to include a tile_query entry
since there's no other information that we need from tile
that is not already exposed through different queries.

However, this could be added later if we have different Tile
information that could matter to userspace. But let's keep
the API ready for a direct reference to Tile ID based on
the GT entry.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 1 +
 include/uapi/drm/xe_drm.h     | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 3316eab118b1d..4461dd1c9e403 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -377,6 +377,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
 		else
 			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
+		gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id;
 		gt_list->gt_list[id].gt_id = gt->info.id;
 		gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
 		if (!IS_DGFX(xe))
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index df3e6fcf9b8b7..584fe08e775c7 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -378,6 +378,8 @@ struct drm_xe_gt {
 #define DRM_XE_QUERY_GT_TYPE_MEDIA		1
 	/** @type: GT type: Main or Media */
 	__u16 type;
+	/** @tile_id: Tile ID where this GT lives (Information only) */
+	__u16 tile_id;
 	/** @gt_id: Unique ID of this GT within the PCI Device */
 	__u16 gt_id;
 	/** @reference_clock: A clock frequency for timestamp */
-- 
GitLab


From 7a56bd0cfbeafab33030c782c40b009e39c4bbc0 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:32 +0000
Subject: [PATCH 2179/2340] drm/xe/uapi: Fix various struct padding for 64b
 alignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's respect Documentation/process/botching-up-ioctls.rst
and add the proper padding for a 64b alignment with all as
well as all the required checks and settings for the pads
and the reserved entries.

v2: Fix remaining holes and double check with pahole (Jose)
    Ensure with pahole that both 32b and 64b have exact same
    layout (Thomas)
    Do not set query's pad and reserved bits to zero since it
    is redundant and already done by kzalloc (Matt)

v3: Fix alignment after rebase (José Roberto de Souza)

v4: Fix pad check (Francois Dugast)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c    |  3 ++-
 drivers/gpu/drm/xe/xe_query.c |  1 +
 drivers/gpu/drm/xe/xe_vm.c    |  8 ++++++++
 include/uapi/drm/xe_drm.h     | 21 ++++++++++++---------
 4 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 0bd1b3581945a..9cc78986dbd3f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1894,7 +1894,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	u32 handle;
 	int err;
 
-	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 4461dd1c9e403..56d61bf596b2b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -372,6 +372,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 		return -ENOMEM;
 
 	gt_list->num_gt = xe->info.gt_count;
+
 	for_each_gt(gt, xe, id) {
 		if (xe_gt_is_media_type(gt))
 			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ff22eddc25785..622a869fd18e9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2825,6 +2825,10 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 	int err;
 	int i;
 
+	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
 	if (XE_IOCTL_DBG(xe, args->extensions) ||
 	    XE_IOCTL_DBG(xe, !args->num_binds) ||
 	    XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
@@ -2963,6 +2967,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (err)
 		return err;
 
+	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
 	if (args->exec_queue_id) {
 		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
 		if (XE_IOCTL_DBG(xe, !q)) {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 584fe08e775c7..512c39ea5d503 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -212,8 +212,6 @@ struct drm_xe_mem_region {
 	 * a unique pair.
 	 */
 	__u16 instance;
-	/** @pad: MBZ */
-	__u32 pad;
 	/**
 	 * @min_page_size: Min page-size in bytes for this region.
 	 *
@@ -382,6 +380,8 @@ struct drm_xe_gt {
 	__u16 tile_id;
 	/** @gt_id: Unique ID of this GT within the PCI Device */
 	__u16 gt_id;
+	/** @pad: MBZ */
+	__u16 pad[3];
 	/** @reference_clock: A clock frequency for timestamp */
 	__u32 reference_clock;
 	/**
@@ -601,7 +601,7 @@ struct drm_xe_gem_create {
 #define DRM_XE_GEM_CPU_CACHING_WC                      2
 	__u16 cpu_caching;
 	/** @pad: MBZ */
-	__u16 pad;
+	__u16 pad[3];
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
@@ -782,6 +782,9 @@ struct drm_xe_vm_bind_op {
 	 */
 	__u32 prefetch_mem_region_instance;
 
+	/** @pad: MBZ */
+	__u32 pad2;
+
 	/** @reserved: Reserved */
 	__u64 reserved[3];
 };
@@ -800,12 +803,12 @@ struct drm_xe_vm_bind {
 	 */
 	__u32 exec_queue_id;
 
-	/** @num_binds: number of binds in this IOCTL */
-	__u32 num_binds;
-
 	/** @pad: MBZ */
 	__u32 pad;
 
+	/** @num_binds: number of binds in this IOCTL */
+	__u32 num_binds;
+
 	union {
 		/** @bind: used if num_binds == 1 */
 		struct drm_xe_vm_bind_op bind;
@@ -817,12 +820,12 @@ struct drm_xe_vm_bind {
 		__u64 vector_of_binds;
 	};
 
+	/** @pad: MBZ */
+	__u32 pad2;
+
 	/** @num_syncs: amount of syncs to wait on */
 	__u32 num_syncs;
 
-	/** @pad2: MBZ */
-	__u32 pad2;
-
 	/** @syncs: pointer to struct drm_xe_sync array */
 	__u64 syncs;
 
-- 
GitLab


From 926ad2c38007bd490958164be2b30db80be59993 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 Nov 2023 14:38:33 +0000
Subject: [PATCH 2180/2340] drm/xe/uapi: Move xe_exec after xe_exec_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Although the exec ioctl is a very important one, it makes no sense
to explain xe_exec before explaining the exec_queue. So, let's
move this down to help bring a better flow on the documentation
and code readability.

It is important to highlight that this patch is changing all
the ioctl numbers in a non-backward compatible way. However, we
are doing this final uapi clean-up before we submit our first
pull-request to be part of the upstream Kernel. Once we get
there, no other change like this will ever happen and all the
backward compatibility will be respected.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 include/uapi/drm/xe_drm.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 512c39ea5d503..1be67d6bfd951 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -103,11 +103,11 @@ struct xe_user_extension {
 #define DRM_XE_VM_CREATE		0x03
 #define DRM_XE_VM_DESTROY		0x04
 #define DRM_XE_VM_BIND			0x05
-#define DRM_XE_EXEC			0x06
-#define DRM_XE_EXEC_QUEUE_CREATE	0x07
-#define DRM_XE_EXEC_QUEUE_DESTROY	0x08
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x09
-#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x0a
+#define DRM_XE_EXEC_QUEUE_CREATE	0x06
+#define DRM_XE_EXEC_QUEUE_DESTROY	0x07
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x08
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x09
+#define DRM_XE_EXEC			0x0a
 #define DRM_XE_WAIT_USER_FENCE		0x0b
 /* Must be kept compact -- no holes */
 
@@ -117,11 +117,11 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
 #define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
 #define DRM_IOCTL_XE_VM_BIND			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
-#define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
 #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
 #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY	DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
+#define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 
 /**
-- 
GitLab


From 9329f0667215a5c22d650f870f8a9f5839a5bc5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Mon, 27 Nov 2023 16:03:30 +0100
Subject: [PATCH 2181/2340] drm/xe/uapi: Use LR abbrev for long-running vms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we're using "compute mode" for long running VMs using
preempt-fences for memory management, and "fault mode" for long
running VMs using page faults.

Change this to use the terminology "long-running" abbreviated as LR for
long-running VMs. These VMs can then either be in preempt-fence mode or
fault mode. The user can force fault mode at creation time, but otherwise
the driver can choose to use fault- or preempt-fence mode for long-running
vms depending on the device capabilities. Initially unless fault-mode is
specified, the driver uses preempt-fence mode.

v2:
- Fix commit message wording and the documentation around
  CREATE_FLAG_LR_MODE and CREATE_FLAG_FAULT_MODE

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c |  8 ++++----
 include/uapi/drm/xe_drm.h  | 23 ++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 622a869fd18e9..f71285e8ef108 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1921,7 +1921,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 }
 
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
-				    DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE | \
+				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
 				    DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \
 				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
 
@@ -1957,7 +1957,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE &&
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
 		return -EINVAL;
 
@@ -1974,12 +1974,12 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
-	if (args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE)
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
 		flags |= XE_VM_FLAG_LR_MODE;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
 		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
-		flags |= XE_VM_FLAG_LR_MODE | XE_VM_FLAG_FAULT_MODE;
+		flags |= XE_VM_FLAG_FAULT_MODE;
 
 	vm = xe_vm_create(xe, flags);
 	if (IS_ERR(vm))
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1be67d6bfd951..28230a0cd1ba6 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -648,8 +648,29 @@ struct drm_xe_vm_create {
 	__u64 extensions;
 
 #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE	(1 << 0)
-#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE	(1 << 1)
+	/*
+	 * An LR, or Long Running VM accepts exec submissions
+	 * to its exec_queues that don't have an upper time limit on
+	 * the job execution time. But exec submissions to these
+	 * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ,
+	 * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF,
+	 * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL.
+	 * LR VMs can be created in recoverable page-fault mode using
+	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it.
+	 * If that flag is omitted, the UMD can not rely on the slightly
+	 * different per-VM overcommit semantics that are enabled by
+	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may
+	 * still enable recoverable pagefaults if supported by the device.
+	 */
+#define DRM_XE_VM_CREATE_FLAG_LR_MODE	        (1 << 1)
 #define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT	(1 << 2)
+	/*
+	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
+	 * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated
+	 * on demand when accessed, and also allows per-VM overcommit of memory.
+	 * The xe driver internally uses recoverable pagefaults to implement
+	 * this.
+	 */
 #define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 3)
 	/** @flags: Flags */
 	__u32 flags;
-- 
GitLab


From 9209fbede74f202168f0b525060feb6bf67924ba Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 29 Nov 2023 11:29:00 -0500
Subject: [PATCH 2182/2340] drm/xe: Remove unused extension definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vm_create ioctl function doesn't accept any extension.
Remove this left over.
A backward compatible change.

Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
---
 include/uapi/drm/xe_drm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 28230a0cd1ba6..2ab5ee299be0c 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -643,7 +643,6 @@ struct drm_xe_ext_set_property {
 };
 
 struct drm_xe_vm_create {
-#define DRM_XE_VM_EXTENSION_SET_PROPERTY	0
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-- 
GitLab


From 0f1d88f2786458a8986920669bd8fb3fec6e618d Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 29 Nov 2023 11:41:15 -0500
Subject: [PATCH 2183/2340] drm/xe/uapi: Kill exec_queue_set_property
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All the properties should be immutable and set upon exec_queue creation
using the existent extension. So, let's kill this useless and dangerous
uapi.

Cc: Francois Dugast <francois.dugast@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c     |  2 --
 drivers/gpu/drm/xe/xe_exec_queue.c | 38 -----------------------
 drivers/gpu/drm/xe/xe_exec_queue.h |  2 --
 include/uapi/drm/xe_drm.h          | 48 ++++++++----------------------
 4 files changed, 13 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 65e9aa5e6c31e..8423c817111bf 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -122,8 +122,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
-			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 2bab6fbd82f5a..985807d6abbb6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -883,44 +883,6 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
-				     struct drm_file *file)
-{
-	struct xe_device *xe = to_xe_device(dev);
-	struct xe_file *xef = to_xe_file(file);
-	struct drm_xe_exec_queue_set_property *args = data;
-	struct xe_exec_queue *q;
-	int ret;
-	u32 idx;
-
-	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
-		return -EINVAL;
-
-	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
-	if (XE_IOCTL_DBG(xe, !q))
-		return -ENOENT;
-
-	if (XE_IOCTL_DBG(xe, args->property >=
-			 ARRAY_SIZE(exec_queue_set_property_funcs))) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	idx = array_index_nospec(args->property,
-				 ARRAY_SIZE(exec_queue_set_property_funcs));
-	ret = exec_queue_set_property_funcs[idx](xe, q, args->value, false);
-	if (XE_IOCTL_DBG(xe, ret))
-		goto out;
-
-	if (args->extensions)
-		ret = exec_queue_user_extensions(xe, q, args->extensions, 0,
-						 false);
-out:
-	xe_exec_queue_put(q);
-
-	return ret;
-}
-
 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
 						    struct xe_vm *vm)
 {
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 533da1b0c457a..d959cc4a1a82a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -55,8 +55,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file);
-int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
-				     struct drm_file *file);
 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file);
 enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 2ab5ee299be0c..0895e4d2a9815 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -105,10 +105,9 @@ struct xe_user_extension {
 #define DRM_XE_VM_BIND			0x05
 #define DRM_XE_EXEC_QUEUE_CREATE	0x06
 #define DRM_XE_EXEC_QUEUE_DESTROY	0x07
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY	0x08
-#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x09
-#define DRM_XE_EXEC			0x0a
-#define DRM_XE_WAIT_USER_FENCE		0x0b
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
+#define DRM_XE_EXEC			0x09
+#define DRM_XE_WAIT_USER_FENCE		0x0a
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -867,38 +866,17 @@ struct drm_xe_vm_bind {
 /* Monitor 64MB contiguous region with 2M sub-granularity */
 #define DRM_XE_ACC_GRANULARITY_64M 3
 
-/**
- * struct drm_xe_exec_queue_set_property - exec queue set property
- *
- * Same namespace for extensions as drm_xe_exec_queue_create
- */
-struct drm_xe_exec_queue_set_property {
-	/** @extensions: Pointer to the first extension struct, if any */
-	__u64 extensions;
-
-	/** @exec_queue_id: Exec queue ID */
-	__u32 exec_queue_id;
-
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY			0
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		3
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		4
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
-#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY		7
-	/** @property: property to set */
-	__u32 property;
-
-	/** @value: property value */
-	__u64 value;
-
-	/** @reserved: Reserved */
-	__u64 reserved[2];
-};
-
 struct drm_xe_exec_queue_create {
-#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
+#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY		0
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY		0
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		3
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		4
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY	7
+
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-- 
GitLab


From c3ab84efbd05936cfac87ef6801e03534dc4b0b7 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Dec 2023 07:58:20 -0800
Subject: [PATCH 2184/2340] drm/xe: Expand XE_REG_OPTION_MASKED documentation

Expand documentation and add an example to make clear this isn't about
generic masks in registers. Also, fix the doc regarding read operations:
the mask part has no effect on them.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231205155820.2133813-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_reg_defs.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 6e20fc2de9ffc..c50e7650c09a4 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -60,7 +60,16 @@ struct xe_reg_mcr {
 
 /**
  * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the
- * read/written bits on the lower 16 bits.
+ * written bits on the lower 16 bits.
+ *
+ * It only applies to registers explicitly marked in bspec with
+ * "Access: Masked". Registers with this option can have write operations to
+ * specific lower bits by setting the corresponding upper bits. Other bits will
+ * not be affected. This allows register writes without needing a RMW cycle and
+ * without caching in software the register value.
+ *
+ * Example: a write with value 0x00010001 will set bit 0 and all other bits
+ * retain their previous values.
  *
  * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER()
  */
-- 
GitLab


From 1da0e581983c6f212499d44573b23ae48c1a4d00 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Dec 2023 05:39:51 -0800
Subject: [PATCH 2185/2340] drm/xe/kunit: Remove handling of
 XE_TEST_SUBPLATFORM_ANY

The only user passing XE_TEST_SUBPLATFORM_ANY is
xe_pci_fake_device_init_any(), but then the function would return
earlier when handling XE_TEST_PLATFORM_ANY. Platforms without a
subplatform use XE_SUBPLATFORM_NONE.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231129232807.1499826-3-lucas.demarchi@intel.com
Link: https://lore.kernel.org/r/20231129232807.1499826-6-lucas.demarchi@intel.com
Link: https://lore.kernel.org/r/20231205133954.2089546-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index d850dca85af17..c1aa785cac185 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -128,11 +128,6 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 	if (!ent->device)
 		return -ENODEV;
 
-	if (subplatform == XE_TEST_SUBPLATFORM_ANY) {
-		subplatform_desc = desc->subplatforms;
-		goto done;
-	}
-
 	for (subplatform_desc = desc->subplatforms;
 	     subplatform_desc && subplatform_desc->subplatform;
 	     subplatform_desc++)
-- 
GitLab


From 5b2a63b40d5620ce453f2a509334ae6feb7b884e Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Dec 2023 05:39:52 -0800
Subject: [PATCH 2186/2340] drm/xe/kunit: Move fake pci data to test-priv

Instead of passing as parameter to xe_pci_fake_device_init(), use
test->priv to pass parameters down the call stack. The main advantage is
that then the data is readily available on other functions by using
kunit_get_current_test().

This is a preparation to fix the initialization of fake devices when
they were supposed to be using GMD_ID.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231129232807.1499826-4-lucas.demarchi@intel.com
Link: https://lore.kernel.org/r/20231205133954.2089546-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c      | 14 ++++++++------
 drivers/gpu/drm/xe/tests/xe_pci_test.h | 17 +++++------------
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  4 +++-
 drivers/gpu/drm/xe/tests/xe_wa_test.c  |  7 ++++++-
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index c1aa785cac185..b93cb1e961080 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -7,6 +7,7 @@
 
 #include "tests/xe_test.h"
 
+#include <kunit/test-bug.h>
 #include <kunit/test.h>
 #include <kunit/visibility.h>
 
@@ -106,14 +107,15 @@ void xe_call_for_each_media_ip(xe_media_fn xe_fn)
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip);
 
-int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
-			    enum xe_subplatform subplatform)
+int xe_pci_fake_device_init(struct xe_device *xe)
 {
+	struct kunit *test = kunit_get_current_test();
+	struct xe_pci_fake_data *data = test->priv;
 	const struct pci_device_id *ent = pciidlist;
 	const struct xe_device_desc *desc;
 	const struct xe_subplatform_desc *subplatform_desc;
 
-	if (platform == XE_TEST_PLATFORM_ANY) {
+	if (!data) {
 		desc = (const void *)ent->driver_data;
 		subplatform_desc = NULL;
 		goto done;
@@ -121,7 +123,7 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 
 	for (ent = pciidlist; ent->device; ent++) {
 		desc = (const void *)ent->driver_data;
-		if (desc->platform == platform)
+		if (desc->platform == data->platform)
 			break;
 	}
 
@@ -131,10 +133,10 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
 	for (subplatform_desc = desc->subplatforms;
 	     subplatform_desc && subplatform_desc->subplatform;
 	     subplatform_desc++)
-		if (subplatform_desc->subplatform == subplatform)
+		if (subplatform_desc->subplatform == data->subplatform)
 			break;
 
-	if (subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc)
+	if (data->subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc)
 		return -ENODEV;
 
 done:
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index cc0f1d141a4d7..b4b3fb2df09ca 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -12,13 +12,6 @@ struct xe_device;
 struct xe_graphics_desc;
 struct xe_media_desc;
 
-/*
- * Some defines just for clarity: these mean the test doesn't care about what
- * platform it will get since it doesn't depend on any platform-specific bits
- */
-#define XE_TEST_PLATFORM_ANY	XE_PLATFORM_UNINITIALIZED
-#define XE_TEST_SUBPLATFORM_ANY	XE_SUBPLATFORM_UNINITIALIZED
-
 typedef int (*xe_device_fn)(struct xe_device *);
 typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *);
 typedef void (*xe_media_fn)(const struct xe_media_desc *);
@@ -27,11 +20,11 @@ int xe_call_for_each_device(xe_device_fn xe_fn);
 void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
 void xe_call_for_each_media_ip(xe_media_fn xe_fn);
 
-int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform,
-			    enum xe_subplatform subplatform);
+struct xe_pci_fake_data {
+	enum xe_platform platform;
+	enum xe_subplatform subplatform;
+};
 
-#define xe_pci_fake_device_init_any(xe__)					\
-	xe_pci_fake_device_init(xe__, XE_TEST_PLATFORM_ANY,			\
-				XE_TEST_SUBPLATFORM_ANY)
+int xe_pci_fake_device_init(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index a1d204133cc16..4a69728976756 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -281,7 +281,9 @@ static int xe_rtp_test_init(struct kunit *test)
 					       drm, DRIVER_GEM);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
 
-	ret = xe_pci_fake_device_init_any(xe);
+	/* Initialize an empty device */
+	test->priv = NULL;
+	ret = xe_pci_fake_device_init(xe);
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	xe->drm.dev = dev;
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 01ea974591ea9..045afae438912 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -75,6 +75,10 @@ KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
 static int xe_wa_test_init(struct kunit *test)
 {
 	const struct platform_test_case *param = test->param_value;
+	struct xe_pci_fake_data data = {
+		.platform = param->platform,
+		.subplatform = param->subplatform,
+	};
 	struct xe_device *xe;
 	struct device *dev;
 	int ret;
@@ -87,7 +91,8 @@ static int xe_wa_test_init(struct kunit *test)
 					       drm, DRIVER_GEM);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
 
-	ret = xe_pci_fake_device_init(xe, param->platform, param->subplatform);
+	test->priv = &data;
+	ret = xe_pci_fake_device_init(xe);
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
 	xe->info.step = param->step;
-- 
GitLab


From 6cad22853cb89da857ff636607dd0e9880172a43 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Dec 2023 05:39:53 -0800
Subject: [PATCH 2187/2340] drm/xe/kunit: Add stub to read_gmdid

Currently it's not possible to test the WAs for platforms using gmdid
since they don't have the IP information on the descriptor struct. In
order to allow that, add a stub function for read_gmdid() that is
activated when the test executes, replacing the iomap and read of the
real register.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231129232807.1499826-5-lucas.demarchi@intel.com
Link: https://lore.kernel.org/r/20231205133954.2089546-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_pci.c      | 18 ++++++++++++++++++
 drivers/gpu/drm/xe/tests/xe_pci_test.h |  6 ++++++
 drivers/gpu/drm/xe/xe_pci.c            |  3 +++
 drivers/gpu/drm/xe/xe_step.h           |  2 ++
 4 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index b93cb1e961080..602793644f61d 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -9,6 +9,7 @@
 
 #include <kunit/test-bug.h>
 #include <kunit/test.h>
+#include <kunit/test-bug.h>
 #include <kunit/visibility.h>
 
 struct kunit_test_data {
@@ -107,6 +108,21 @@ void xe_call_for_each_media_ip(xe_media_fn xe_fn)
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip);
 
+static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
+			    u32 *ver, u32 *revid)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_pci_fake_data *data = test->priv;
+
+	if (type == GMDID_MEDIA) {
+		*ver = data->media_verx100;
+		*revid = xe_step_to_gmdid(data->media_step);
+	} else {
+		*ver = data->graphics_verx100;
+		*revid = xe_step_to_gmdid(data->graphics_step);
+	}
+}
+
 int xe_pci_fake_device_init(struct xe_device *xe)
 {
 	struct kunit *test = kunit_get_current_test();
@@ -140,6 +156,8 @@ int xe_pci_fake_device_init(struct xe_device *xe)
 		return -ENODEV;
 
 done:
+	kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
+
 	xe_info_init_early(xe, desc, subplatform_desc);
 	xe_info_init(xe, desc->graphics, desc->media);
 
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index b4b3fb2df09ca..811ffe5bd9fd2 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -6,6 +6,8 @@
 #ifndef _XE_PCI_TEST_H_
 #define _XE_PCI_TEST_H_
 
+#include <linux/types.h>
+
 #include "xe_platform_types.h"
 
 struct xe_device;
@@ -23,6 +25,10 @@ void xe_call_for_each_media_ip(xe_media_fn xe_fn);
 struct xe_pci_fake_data {
 	enum xe_platform platform;
 	enum xe_subplatform subplatform;
+	u32 graphics_verx100;
+	u32 media_verx100;
+	u32 graphics_step;
+	u32 media_step;
 };
 
 int xe_pci_fake_device_init(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b85193d1dcc23..1488903573139 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -5,6 +5,7 @@
 
 #include "xe_pci.h"
 
+#include <kunit/static_stub.h>
 #include <linux/device/driver.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -456,6 +457,8 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
 	struct xe_reg gmdid_reg = GMD_ID;
 	u32 val;
 
+	KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
+
 	if (type == GMDID_MEDIA)
 		gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
 
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
index a384b640f2af5..686cb59200c28 100644
--- a/drivers/gpu/drm/xe/xe_step.h
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -16,6 +16,8 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe);
 struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
 				      u32 graphics_gmdid_revid,
 				      u32 media_gmdid_revid);
+static inline u32 xe_step_to_gmdid(enum xe_step step) { return step - STEP_A0; }
+
 const char *xe_step_name(enum xe_step step);
 
 #endif
-- 
GitLab


From 2a70bbe6170fafde76cf0135c5cbee4bd4bfa0ec Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Dec 2023 05:39:54 -0800
Subject: [PATCH 2188/2340] drm/xe/kunit: Test WAs for MTL and LNL

Now that the kunit infra has proper support for GMD_ID platforms, add a
few variants of MTL and LNL.

v2: Remove bogus check for setting both media and graphics version in
    test (Matt Roper)

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20231129232807.1499826-6-lucas.demarchi@intel.com
Link: https://lore.kernel.org/r/20231205133954.2089546-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_wa_test.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 045afae438912..a53c22a195824 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -18,6 +18,8 @@ struct platform_test_case {
 	const char *name;
 	enum xe_platform platform;
 	enum xe_subplatform subplatform;
+	u32 graphics_verx100;
+	u32 media_verx100;
 	struct xe_step_info step;
 };
 
@@ -38,6 +40,18 @@ struct platform_test_case {
 		.step = { .graphics = STEP_ ## graphics_step__ }			\
 	}
 
+#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__,		\
+		   media_verx100__, media_step__)				\
+	{									\
+		.name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\
+		.platform = XE_ ## platform__,					\
+		.subplatform = XE_SUBPLATFORM_NONE,				\
+		.graphics_verx100 = graphics_verx100__,				\
+		.media_verx100 = media_verx100__,				\
+		.step = { .graphics = STEP_ ## graphics_step__,			\
+			   .media = STEP_ ## media_step__ }			\
+	}
+
 static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(TIGERLAKE, B0),
 	PLATFORM_CASE(DG1, A0),
@@ -63,6 +77,10 @@ static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(PVC, B0),
 	PLATFORM_CASE(PVC, B1),
 	PLATFORM_CASE(PVC, C0),
+	GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
+	GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+	GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
+	GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
 };
 
 static void platform_desc(const struct platform_test_case *t, char *desc)
@@ -78,6 +96,10 @@ static int xe_wa_test_init(struct kunit *test)
 	struct xe_pci_fake_data data = {
 		.platform = param->platform,
 		.subplatform = param->subplatform,
+		.graphics_verx100 = param->graphics_verx100,
+		.media_verx100 = param->media_verx100,
+		.graphics_step = param->step.graphics,
+		.media_step = param->step.media,
 	};
 	struct xe_device *xe;
 	struct device *dev;
@@ -95,7 +117,8 @@ static int xe_wa_test_init(struct kunit *test)
 	ret = xe_pci_fake_device_init(xe);
 	KUNIT_ASSERT_EQ(test, ret, 0);
 
-	xe->info.step = param->step;
+	if (!param->graphics_verx100)
+		xe->info.step = param->step;
 
 	/* TODO: init hw engines for engine/LRC WAs */
 	xe->drm.dev = dev;
-- 
GitLab


From 7ce5716e13cfb37a86c02fe158403c002eb1b504 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 28 Nov 2023 17:17:18 -0800
Subject: [PATCH 2189/2340] drm/xe/huc: Prepare for 2-step HuC authentication

Starting on MTL, the HuC is authenticated twice, once via GuC (same as
with older integrated platforms) and once via GSC; the first
authentication allows the HuC to be used for clear-media workloads,
while the second one unlocks support for protected content.
Ahead of adding the authentication flow via GSC, this patch adds support
for differentiating the 2 auth steps and checking if they're complete.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gsc_regs.h |  2 ++
 drivers/gpu/drm/xe/xe_huc.c           | 47 +++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_huc.h           |  8 ++++-
 drivers/gpu/drm/xe/xe_uc.c            |  2 +-
 4 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
index 9a84b55d66eee..9886ec9cb08e9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -32,6 +32,8 @@
 #define   HECI1_FWSTS1_CURRENT_STATE_RESET		0
 #define   HECI1_FWSTS1_PROXY_STATE_NORMAL		5
 #define   HECI1_FWSTS1_INIT_COMPLETE			REG_BIT(9)
+#define HECI_FWSTS5(base)				XE_REG((base) + 0xc68)
+#define   HECI1_FWSTS5_HUC_AUTH_DONE			REG_BIT(19)
 
 #define HECI_H_GS1(base)	XE_REG((base) + 0xc4c)
 #define   HECI_H_GS1_ER_PREP	REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 2f176badab261..9845165a819cd 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -5,6 +5,7 @@
 
 #include "xe_huc.h"
 
+#include "regs/xe_gsc_regs.h"
 #include "regs/xe_guc_regs.h"
 #include "xe_assert.h"
 #include "xe_bo.h"
@@ -71,7 +72,25 @@ int xe_huc_upload(struct xe_huc *huc)
 	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
 }
 
-int xe_huc_auth(struct xe_huc *huc)
+static const struct {
+	const char *name;
+	struct xe_reg reg;
+	u32 val;
+} huc_auth_modes[XE_HUC_AUTH_TYPES_COUNT] = {
+	[XE_HUC_AUTH_VIA_GUC] = { "GuC",
+				  HUC_KERNEL_LOAD_INFO,
+				  HUC_LOAD_SUCCESSFUL },
+	[XE_HUC_AUTH_VIA_GSC] = { "GSC",
+				  HECI_FWSTS5(MTL_GSC_HECI1_BASE),
+				  HECI1_FWSTS5_HUC_AUTH_DONE },
+};
+
+static bool huc_is_authenticated(struct xe_gt *gt, enum xe_huc_auth_types type)
+{
+	return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
+}
+
+int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
 {
 	struct xe_device *xe = huc_to_xe(huc);
 	struct xe_gt *gt = huc_to_gt(huc);
@@ -84,7 +103,7 @@ int xe_huc_auth(struct xe_huc *huc)
 	xe_assert(xe, !xe_uc_fw_is_running(&huc->fw));
 
 	/* On newer platforms the HuC survives reset, so no need to re-auth */
-	if (xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO) & HUC_LOAD_SUCCESSFUL) {
+	if (huc_is_authenticated(gt, type)) {
 		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
 		return 0;
 	}
@@ -92,28 +111,36 @@ int xe_huc_auth(struct xe_huc *huc)
 	if (!xe_uc_fw_is_loaded(&huc->fw))
 		return -ENOEXEC;
 
-	ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
-			      xe_uc_fw_rsa_offset(&huc->fw));
+	switch (type) {
+	case XE_HUC_AUTH_VIA_GUC:
+		ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
+				      xe_uc_fw_rsa_offset(&huc->fw));
+		break;
+	default:
+		XE_WARN_ON(type);
+		return -EINVAL;
+	}
 	if (ret) {
-		drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n",
-			ret);
+		drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n",
+			huc_auth_modes[type].name, ret);
 		goto fail;
 	}
 
-	ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO, HUC_LOAD_SUCCESSFUL,
-			     HUC_LOAD_SUCCESSFUL, 100000, NULL, false);
+	ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val,
+			     huc_auth_modes[type].val, 100000, NULL, false);
 	if (ret) {
 		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
 		goto fail;
 	}
 
 	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
-	drm_dbg(&xe->drm, "HuC authenticated\n");
+	drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name);
 
 	return 0;
 
 fail:
-	drm_err(&xe->drm, "HuC authentication failed %d\n", ret);
+	drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n",
+		huc_auth_modes[type].name, ret);
 	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
 
 	return ret;
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
index 5802c43b6ce21..b8c387f14b8ef 100644
--- a/drivers/gpu/drm/xe/xe_huc.h
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -10,9 +10,15 @@
 
 struct drm_printer;
 
+enum xe_huc_auth_types {
+	XE_HUC_AUTH_VIA_GUC = 0,
+	XE_HUC_AUTH_VIA_GSC,
+	XE_HUC_AUTH_TYPES_COUNT
+};
+
 int xe_huc_init(struct xe_huc *huc);
 int xe_huc_upload(struct xe_huc *huc);
-int xe_huc_auth(struct xe_huc *huc);
+int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type);
 void xe_huc_sanitize(struct xe_huc *huc);
 void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 72a7b3c2577dd..25e1ddfd2f86a 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -176,7 +176,7 @@ int xe_uc_init_hw(struct xe_uc *uc)
 		return ret;
 
 	/* We don't fail the driver load if HuC fails to auth, but let's warn */
-	ret = xe_huc_auth(&uc->huc);
+	ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
 	xe_gt_assert(uc_to_gt(uc), !ret);
 
 	/* GSC load is async */
-- 
GitLab


From d8b1571312b7f77aeae2b2a7a138bb8edaa4f725 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 28 Nov 2023 17:17:19 -0800
Subject: [PATCH 2190/2340] drm/xe/huc: HuC authentication via GSC

HuC authentication via GSC is performed by submitting the appropriate
PXP packet to the GSC FW. This packet can trigger a "pending" reply from
the FW, so we need to handle that and resubmit. Note that the auth via
GSC can only be performed if the HuC has already been authenticated by
the GuC.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
Reviewed-by: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h |  59 ++++++++
 drivers/gpu/drm/xe/xe_gsc.c                   |  14 +-
 drivers/gpu/drm/xe/xe_huc.c                   | 141 +++++++++++++++++-
 drivers/gpu/drm/xe/xe_huc.h                   |   1 +
 drivers/gpu/drm/xe/xe_huc_types.h             |   5 +
 drivers/gpu/drm/xe/xe_uc_fw.c                 |   2 +
 drivers/gpu/drm/xe/xe_uc_fw_types.h           |   3 +
 7 files changed, 218 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h

diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
new file mode 100644
index 0000000000000..57520809e48db
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
+#define _ABI_GSC_PXP_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for PXP commands */
+#define HECI_MEADDRESS_PXP 17
+
+#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
+
+/*
+ * there are a lot of status codes for PXP, but we only define the cross-API
+ * common ones that we actually can handle in the kernel driver. Other failure
+ * codes should be printed to error msg for debug.
+ */
+enum pxp_status {
+	PXP_STATUS_SUCCESS = 0x0,
+	PXP_STATUS_ERROR_API_VERSION = 0x1002,
+	PXP_STATUS_NOT_READY = 0x100e,
+	PXP_STATUS_PLATFCONFIG_KF1_NOVERIF = 0x101a,
+	PXP_STATUS_PLATFCONFIG_KF1_BAD = 0x101f,
+	PXP_STATUS_OP_NOT_PERMITTED = 0x4013
+};
+
+/* Common PXP FW message header */
+struct pxp_cmd_header {
+	u32 api_version;
+	u32 command_id;
+	union {
+		u32 status; /* out */
+		u32 stream_id; /* in */
+#define PXP_CMDHDR_EXTDATA_SESSION_VALID GENMASK(0, 0)
+#define PXP_CMDHDR_EXTDATA_APP_TYPE GENMASK(1, 1)
+#define PXP_CMDHDR_EXTDATA_SESSION_ID GENMASK(17, 2)
+	};
+	/* Length of the message (excluding the header) */
+	u32 buffer_len;
+} __packed;
+
+#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
+
+/* PXP-Input-Packet: HUC Auth-only */
+struct pxp43_new_huc_auth_in {
+	struct pxp_cmd_header header;
+	u64 huc_base_address;
+	u32 huc_size;
+} __packed;
+
+/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */
+struct pxp43_huc_auth_out {
+	struct pxp_cmd_header header;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index d8ec04e3c0065..a8a895cf4b448 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -16,6 +16,7 @@
 #include "xe_gsc_submit.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
+#include "xe_huc.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_sched_job.h"
@@ -257,11 +258,18 @@ static void gsc_work(struct work_struct *work)
 	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
 
 	ret = gsc_upload(gsc);
-	if (ret && ret != -EEXIST)
+	if (ret && ret != -EEXIST) {
 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
-	else
-		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+		goto out;
+	}
 
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+
+	/* HuC auth failure is not fatal */
+	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
+		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+
+out:
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
 	xe_device_mem_access_put(xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 9845165a819cd..eca109791c6ae 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -5,14 +5,19 @@
 
 #include "xe_huc.h"
 
+#include <drm/drm_managed.h>
+
+#include "abi/gsc_pxp_commands_abi.h"
 #include "regs/xe_gsc_regs.h"
 #include "regs/xe_guc_regs.h"
 #include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
+#include "xe_gsc_submit.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
+#include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_uc_fw.h"
 
@@ -34,6 +39,42 @@ huc_to_guc(struct xe_huc *huc)
 	return &container_of(huc, struct xe_uc, huc)->guc;
 }
 
+static void free_gsc_pkt(struct drm_device *drm, void *arg)
+{
+	struct xe_huc *huc = arg;
+
+	xe_bo_unpin_map_no_vm(huc->gsc_pkt);
+	huc->gsc_pkt = NULL;
+}
+
+#define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K
+static int huc_alloc_gsc_pkt(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	int err;
+
+	/* we use a single object for both input and output */
+	bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
+				  PXP43_HUC_AUTH_INOUT_SIZE * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	huc->gsc_pkt = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
+	if (err) {
+		free_gsc_pkt(&xe->drm, huc);
+		return err;
+	}
+
+	return 0;
+}
+
 int xe_huc_init(struct xe_huc *huc)
 {
 	struct xe_gt *gt = huc_to_gt(huc);
@@ -56,6 +97,12 @@ int xe_huc_init(struct xe_huc *huc)
 	if (!xe_uc_fw_is_enabled(&huc->fw))
 		return 0;
 
+	if (huc->fw.has_gsc_headers) {
+		ret = huc_alloc_gsc_pkt(huc);
+		if (ret)
+			goto out;
+	}
+
 	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
 
 	return 0;
@@ -72,6 +119,89 @@ int xe_huc_upload(struct xe_huc *huc)
 	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
 }
 
+#define huc_auth_msg_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct pxp43_new_huc_auth_in, field_, val_)
+#define huc_auth_msg_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct pxp43_huc_auth_out, field_)
+
+static u32 huc_emit_pxp_auth_msg(struct xe_device *xe, struct iosys_map *map,
+				 u32 wr_offset, u32 huc_offset, u32 huc_size)
+{
+	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct pxp43_new_huc_auth_in));
+
+	huc_auth_msg_wr(xe, map, wr_offset, header.api_version, PXP_APIVER(4, 3));
+	huc_auth_msg_wr(xe, map, wr_offset, header.command_id, PXP43_CMDID_NEW_HUC_AUTH);
+	huc_auth_msg_wr(xe, map, wr_offset, header.status, 0);
+	huc_auth_msg_wr(xe, map, wr_offset, header.buffer_len,
+			sizeof(struct pxp43_new_huc_auth_in) - sizeof(struct pxp_cmd_header));
+	huc_auth_msg_wr(xe, map, wr_offset, huc_base_address, huc_offset);
+	huc_auth_msg_wr(xe, map, wr_offset, huc_size, huc_size);
+
+	return wr_offset + sizeof(struct pxp43_new_huc_auth_in);
+}
+
+static int huc_auth_via_gsccs(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *pkt = huc->gsc_pkt;
+	u32 wr_offset;
+	u32 rd_offset;
+	u64 ggtt_offset;
+	u32 out_status;
+	int retry = 5;
+	int err = 0;
+
+	if (!pkt)
+		return -ENODEV;
+
+	ggtt_offset = xe_bo_ggtt_addr(pkt);
+
+	wr_offset = xe_gsc_emit_header(xe, &pkt->vmap, 0, HECI_MEADDRESS_PXP, 0,
+				       sizeof(struct pxp43_new_huc_auth_in));
+	wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset,
+					  xe_bo_ggtt_addr(huc->fw.bo),
+					  huc->fw.bo->size);
+	do {
+		err = xe_gsc_pkt_submit_kernel(&gt->uc.gsc, ggtt_offset, wr_offset,
+					       ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
+					       PXP43_HUC_AUTH_INOUT_SIZE);
+		if (err)
+			break;
+
+		if (xe_gsc_check_and_update_pending(xe, &pkt->vmap, 0, &pkt->vmap,
+						    PXP43_HUC_AUTH_INOUT_SIZE)) {
+			err = -EBUSY;
+			msleep(50);
+		}
+	} while (--retry && err == -EBUSY);
+
+	if (err) {
+		drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err);
+		return err;
+	}
+
+	err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE,
+				     sizeof(struct pxp43_huc_auth_out), &rd_offset);
+	if (err) {
+		drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err);
+		return err;
+	}
+
+	/*
+	 * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
+	 * authenticated. If the same error is ever returned with HuC not loaded
+	 * we'll still catch it when we check the authentication bit later.
+	 */
+	out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status);
+	if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) {
+		drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static const struct {
 	const char *name;
 	struct xe_reg reg;
@@ -85,8 +215,10 @@ static const struct {
 				  HECI1_FWSTS5_HUC_AUTH_DONE },
 };
 
-static bool huc_is_authenticated(struct xe_gt *gt, enum xe_huc_auth_types type)
+bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type)
 {
+	struct xe_gt *gt = huc_to_gt(huc);
+
 	return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
 }
 
@@ -100,10 +232,8 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
 	if (!xe_uc_fw_is_loadable(&huc->fw))
 		return 0;
 
-	xe_assert(xe, !xe_uc_fw_is_running(&huc->fw));
-
 	/* On newer platforms the HuC survives reset, so no need to re-auth */
-	if (huc_is_authenticated(gt, type)) {
+	if (xe_huc_is_authenticated(huc, type)) {
 		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
 		return 0;
 	}
@@ -116,6 +246,9 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
 		ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
 				      xe_uc_fw_rsa_offset(&huc->fw));
 		break;
+	case XE_HUC_AUTH_VIA_GSC:
+		ret = huc_auth_via_gsccs(huc);
+		break;
 	default:
 		XE_WARN_ON(type);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
index b8c387f14b8ef..532017230287f 100644
--- a/drivers/gpu/drm/xe/xe_huc.h
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -19,6 +19,7 @@ enum xe_huc_auth_types {
 int xe_huc_init(struct xe_huc *huc);
 int xe_huc_upload(struct xe_huc *huc);
 int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type);
+bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type);
 void xe_huc_sanitize(struct xe_huc *huc);
 void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h
index cae6d19097df2..cfbaa5e0dfcac 100644
--- a/drivers/gpu/drm/xe/xe_huc_types.h
+++ b/drivers/gpu/drm/xe/xe_huc_types.h
@@ -8,12 +8,17 @@
 
 #include "xe_uc_fw_types.h"
 
+struct xe_bo;
+
 /**
  * struct xe_huc - HuC
  */
 struct xe_huc {
 	/** @fw: Generic uC firmware management */
 	struct xe_uc_fw fw;
+
+	/** @gsc_pkt: bo to store the packet for auth via GSC */
+	struct xe_bo *gsc_pkt;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 9abae65c6b236..73d6938c921d9 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -528,6 +528,8 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz
 		uc_fw->css_offset = offset;
 	}
 
+	uc_fw->has_gsc_headers = true;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index fc1de0cc93247..ee914a5d85235 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -112,6 +112,9 @@ struct xe_uc_fw {
 	/** @bo: XE BO for uC firmware */
 	struct xe_bo *bo;
 
+	/** @has_gsc_headers: whether the FW image starts with GSC headers */
+	bool has_gsc_headers;
+
 	/*
 	 * The firmware build process will generate a version header file with
 	 * major and minor version defined. The versions are built into CSS
-- 
GitLab


From 6a1fd6787d59a1852e89a9e8863673ae4dc9a2ca Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Tue, 5 Dec 2023 10:51:59 +0530
Subject: [PATCH 2191/2340] drm/xe/xe2: Add workaround 14019988906

This workaround applies to Graphics 20.04 as engine
workaround

V2(MattR):
 - Reorder bit define
 - Apply WA for RCS only

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 +
 drivers/gpu/drm/xe/xe_wa.c           | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d318ec0efd7db..e8dc463a49f68 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -124,6 +124,7 @@
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
 #define XEHP_PSS_CHICKEN			XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
+#define   FLSH_IGNORES_PSD			REG_BIT(10)
 #define   FD_END_COLLECT			REG_BIT(5)
 
 #define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 63bd4bb1af039..ce897f2d49be2 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -719,6 +719,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
 	},
+	{ XE_RTP_NAME("14019988906"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
+	},
 
 	{}
 };
-- 
GitLab


From b279b53015079bda2a311b48892dff362ac8ebc3 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Wed, 6 Dec 2023 22:20:20 +0530
Subject: [PATCH 2192/2340] drm/xe/xe2: Add workaround 18032095049 and
 16021639441

This workaround applies to graphics 20.04 on all engines.

Workaround has three parts :
1. Pipe flush before MI_ATOMIC - This part isn't relevant to Xe
   (at least not right now) since we don't use MI_ATOMIC anywhere
   in the kernel mode driver.
2. Memory-based interrupt masking - Memory-based interrupt processing
   isn't supported on physical functions, only virtual functions,
   according to bspec 60352. So this is probably only relevant once
   SRIOV support lands in the driver.
3. Disabling CSB/timestamp updates to the ghwsp and pphwsp - Workaround
   is added by this change.

The CSB reports to gHWSP and ppHWSP have been discussed as part
of a different topic on some internal threads and we've confirmed
that neither the KMD nor the GuC firmware use those for anything,
so disabling them is always "safe" and should have no functional
or performance impact on system operation.  The same is true for
the timestamp updates in the ppHWSP as well.  Given that, it might
make sense to just combine these two workarounds into a single
record (and single patch) and apply it on all steppings. Disabling
the reports for RCS on higher steppings doesn't have any kind of
negative impact and will simplify the overall situation.

V3(MattR):
  - Combine WA apply same WA for all engines, no performance impact
V2(MattR):
  - Mention detail in commit message
  - Reorder bit define
  - Improve bit naming
  - Remove workaround part which isnt relevant

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  5 +++++
 drivers/gpu/drm/xe/xe_wa.c               | 14 ++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 444ff9b83bb1b..3942db268b014 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -47,6 +47,11 @@
 #define RING_ESR(base)				XE_REG((base) + 0xb8)
 
 #define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
+
+#define CSFE_CHICKEN1_REG(base)			XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED)
+#define   GHWSP_CSB_REPORT_DIS			REG_BIT(15)
+#define   PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS	REG_BIT(14)
+
 /*
  * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
  * The lsb of each can be considered a separate enabling bit for encryption.
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index ce897f2d49be2..23f1285135b8d 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -596,6 +596,20 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
 	},
+	/*
+	 * These two workarounds are the same, just applying to different
+	 * engines.  Although Wa_18032095049 (for the RCS) isn't required on
+	 * all steppings, disabling these reports has no impact for our
+	 * driver or the GuC, so we go ahead and treat it the same as
+	 * Wa_16021639441 which does apply to all steppings.
+	 */
+	{ XE_RTP_NAME("18032095049, 16021639441"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1_REG(0),
+			     GHWSP_CSB_REPORT_DIS |
+			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
 
 	{}
 };
-- 
GitLab


From 0d97ecce16bd26a1f90022cf0466ff15c4a0bd91 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Mon, 9 Oct 2023 13:10:27 -0700
Subject: [PATCH 2193/2340] drm/xe: Enable Fixed CCS mode setting

Disable dynamic HW load balancing of compute resource assignment
to engines and instead enabled fixed mode of mapping compute
resources to engines on all platforms with more than one compute
engine.

By default enable only one CCS engine with all compute slices
assigned to it. This is the desired configuration for common
workloads.

PVC platform supports only the fixed CCS mode (workaround 16016805146).

v2: Rebase, make it platform agnostic
v3: Minor code refactoring

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile          |  1 +
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 14 +++++
 drivers/gpu/drm/xe/xe_gt.c           | 10 ++++
 drivers/gpu/drm/xe/xe_gt.h           |  2 +
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c  | 78 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_ccs_mode.h  | 23 ++++++++
 drivers/gpu/drm/xe/xe_gt_types.h     |  8 +++
 drivers/gpu/drm/xe/xe_guc_ads.c      |  3 ++
 drivers/gpu/drm/xe/xe_hw_engine.c    | 20 +++++++
 9 files changed, 159 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e6f98d8077839..b9062e3ce6a9a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -70,6 +70,7 @@ xe-y += xe_bb.o \
 	xe_gsc.o \
 	xe_gsc_submit.o \
 	xe_gt.o \
+	xe_gt_ccs_mode.o \
 	xe_gt_clock.o \
 	xe_gt_debugfs.o \
 	xe_gt_idle.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index e8dc463a49f68..9744ed0be3a5c 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -402,8 +402,22 @@
 #define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
 #define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
+#define   RCU_MODE_FIXED_SLICE_CCS_MODE		REG_BIT(1)
 #define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
 
+/*
+ * Total of 4 cslices, where each cslice is in the form:
+ *   [0-3]     CCS ID
+ *   [4-6]     RSVD
+ *   [7]       Disabled
+ */
+#define CCS_MODE				XE_REG(0x14804)
+#define   CCS_MODE_CSLICE_0_3_MASK		REG_GENMASK(11, 0) /* 3 bits per cslice */
+#define   CCS_MODE_CSLICE_MASK			0x7 /* CCS0-3 + rsvd */
+#define   CCS_MODE_CSLICE_WIDTH			ilog2(CCS_MODE_CSLICE_MASK + 1)
+#define   CCS_MODE_CSLICE(cslice, ccs) \
+	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
+
 #define FORCEWAKE_ACK_GT			XE_REG(0x130044)
 #define   FORCEWAKE_KERNEL			BIT(0)
 #define   FORCEWAKE_USER			BIT(1)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a9c71da985d30..93fccbb4f2adf 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -22,6 +22,7 @@
 #include "xe_force_wake.h"
 #include "xe_ggtt.h"
 #include "xe_gsc.h"
+#include "xe_gt_ccs_mode.h"
 #include "xe_gt_clock.h"
 #include "xe_gt_idle.h"
 #include "xe_gt_mcr.h"
@@ -450,6 +451,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	/* Configure default CCS mode of 1 engine with all resources */
+	if (xe_gt_ccs_mode_enabled(gt)) {
+		gt->ccs_mode = 1;
+		xe_gt_apply_ccs_mode(gt);
+	}
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
 	xe_device_mem_access_put(gt_to_xe(gt));
@@ -560,6 +567,9 @@ static int do_gt_restart(struct xe_gt *gt)
 		xe_reg_sr_apply_whitelist(hwe);
 	}
 
+	/* Get CCS mode in sync between sw/hw */
+	xe_gt_apply_ccs_mode(gt);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index caded203a8a03..a818cc9c8fd09 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -17,6 +17,8 @@
 		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
 			  xe_hw_engine_is_valid((hwe__)))
 
+#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
+
 #ifdef CONFIG_FAULT_INJECTION
 extern struct fault_attr gt_reset_failure;
 static inline bool xe_fault_inject_gt_reset(void)
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
new file mode 100644
index 0000000000000..541c44c70a84a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_mmio.h"
+
+static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
+{
+	u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
+	int num_slices = hweight32(CCS_MASK(gt));
+	struct xe_device *xe = gt_to_xe(gt);
+	int width, cslice = 0;
+	u32 config = 0;
+
+	xe_assert(xe, xe_gt_ccs_mode_enabled(gt));
+
+	xe_assert(xe, num_engines && num_engines <= num_slices);
+	xe_assert(xe, !(num_slices % num_engines));
+
+	/*
+	 * Loop over all available slices and assign each a user engine.
+	 * For example, if there are four compute slices available, the
+	 * assignment of compute slices to compute engines would be,
+	 *
+	 * With 1 engine (ccs0):
+	 *   slice 0, 1, 2, 3: ccs0
+	 *
+	 * With 2 engines (ccs0, ccs1):
+	 *   slice 0, 2: ccs0
+	 *   slice 1, 3: ccs1
+	 *
+	 * With 4 engines (ccs0, ccs1, ccs2, ccs3):
+	 *   slice 0: ccs0
+	 *   slice 1: ccs1
+	 *   slice 2: ccs2
+	 *   slice 3: ccs3
+	 */
+	for (width = num_slices / num_engines; width; width--) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+
+		for_each_hw_engine(hwe, gt, id) {
+			if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
+				continue;
+
+			if (hwe->logical_instance >= num_engines)
+				break;
+
+			config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;
+
+			/* If a slice is fused off, leave disabled */
+			while ((CCS_MASK(gt) & BIT(cslice)) == 0)
+				cslice++;
+
+			mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
+			mode |= CCS_MODE_CSLICE(cslice, hwe->instance);
+			cslice++;
+		}
+	}
+
+	xe_mmio_write32(gt, CCS_MODE, mode);
+
+	xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
+		   mode, config, num_engines, num_slices);
+}
+
+void xe_gt_apply_ccs_mode(struct xe_gt *gt)
+{
+	if (!gt->ccs_mode)
+		return;
+
+	__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
new file mode 100644
index 0000000000000..e8766879f6ec1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_CCS_MODE_H_
+#define _XE_GT_CCS_MODE_H_
+
+#include "xe_device_types.h"
+#include "xe_gt.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+
+void xe_gt_apply_ccs_mode(struct xe_gt *gt);
+
+static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
+{
+	/* Check if there are more than one compute engines available */
+	return hweight32(CCS_MASK(gt)) > 1;
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a7263738308ec..4e48c4643163c 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -185,6 +185,14 @@ struct xe_gt {
 		spinlock_t lock;
 	} tlb_invalidation;
 
+	/**
+	 * @ccs_mode: Number of compute engines enabled.
+	 * Allows fixed mapping of available compute slices to compute engines.
+	 * By default only the first available compute engine is enabled and all
+	 * available compute slices are allocated to it.
+	 */
+	u32 ccs_mode;
+
 	/** @usm: unified shared memory state */
 	struct {
 		/**
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 2f5ff090aa6bd..ab115588f88ba 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -12,6 +12,7 @@
 #include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
 #include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
 #include "xe_guc.h"
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
@@ -440,6 +441,8 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
 		{ .reg = RING_HWS_PGA(hwe->mmio_base),			},
 		{ .reg = RING_IMR(hwe->mmio_base),			},
 		{ .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain	},
+		{ .reg = CCS_MODE,
+		  .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
 	};
 	u32 i;
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 108ecbfe593ee..c56e7cec350ec 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -16,6 +16,7 @@
 #include "xe_execlist.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_fence.h"
 #include "xe_irq.h"
@@ -282,6 +283,13 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
 }
 
+static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
+						 const struct xe_hw_engine *hwe)
+{
+	return xe_gt_ccs_mode_enabled(gt) &&
+	       xe_rtp_match_first_render_or_compute(gt, hwe);
+}
+
 void
 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 {
@@ -306,6 +314,12 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 				 blit_cctl_val,
 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 		},
+		/* Use Fixed slice CCS mode */
+		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
+		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
+		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
+					   RCU_MODE_FIXED_SLICE_CCS_MODE))
+		},
 		{}
 	};
 
@@ -859,6 +873,12 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
 		return true;
 
+	/* Check for engines disabled by ccs_mode setting */
+	if (xe_gt_ccs_mode_enabled(gt) &&
+	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+	    hwe->logical_instance >= gt->ccs_mode)
+		return true;
+
 	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 		hwe->instance == gt->usm.reserved_bcs_instance;
 }
-- 
GitLab


From f3bc5bb4d53d2091f03cf43f19e7c9b41db90367 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Wed, 15 Nov 2023 21:59:04 +0000
Subject: [PATCH 2194/2340] drm/xe: Allow userspace to configure CCS mode

Allow user to configure the CCS mode setting through a
'ccs_mode' sysfs interface. Also report the current
CCS mode configuration and number of compute slices
available through this interface.

v2: Rebase, make it platform agnostic
v3: Separte out num_cslices sysfs interface and make
    xe_gt_ccs_mode_sysfs_init() return void

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c          |   3 +
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 103 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_ccs_mode.h |   1 +
 3 files changed, 107 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 93fccbb4f2adf..154d6c7072b9d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -378,6 +378,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 			 "failed to register engines sysfs directory, err: %d\n",
 			 err);
 
+	/* Initialize CCS mode sysfs after early initialization of HW engines */
+	xe_gt_ccs_mode_sysfs_init(gt);
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 	XE_WARN_ON(err);
 	xe_device_mem_access_put(gt_to_xe(gt));
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 541c44c70a84a..723cf77c830a1 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -3,10 +3,13 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#include <drm/drm_managed.h>
+
 #include "regs/xe_gt_regs.h"
 #include "xe_assert.h"
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
+#include "xe_gt_sysfs.h"
 #include "xe_mmio.h"
 
 static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
@@ -76,3 +79,103 @@ void xe_gt_apply_ccs_mode(struct xe_gt *gt)
 
 	__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
 }
+
+static ssize_t
+num_cslices_show(struct device *kdev,
+		 struct device_attribute *attr, char *buf)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+
+	return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt)));
+}
+
+static DEVICE_ATTR_RO(num_cslices);
+
+static ssize_t
+ccs_mode_show(struct device *kdev,
+	      struct device_attribute *attr, char *buf)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+
+	return sysfs_emit(buf, "%u\n", gt->ccs_mode);
+}
+
+static ssize_t
+ccs_mode_store(struct device *kdev, struct device_attribute *attr,
+	       const char *buff, size_t count)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+	u32 num_engines, num_slices;
+	int ret;
+
+	ret = kstrtou32(buff, 0, &num_engines);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ensure number of engines specified is valid and there is an
+	 * exact multiple of engines for slices.
+	 */
+	num_slices = hweight32(CCS_MASK(gt));
+	if (!num_engines || num_engines > num_slices || num_slices % num_engines) {
+		xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n",
+			  num_engines, num_slices);
+		return -EINVAL;
+	}
+
+	if (gt->ccs_mode != num_engines) {
+		xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
+		gt->ccs_mode = num_engines;
+		xe_gt_reset_async(gt);
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(ccs_mode);
+
+static const struct attribute *gt_ccs_mode_attrs[] = {
+	&dev_attr_ccs_mode.attr,
+	&dev_attr_num_cslices.attr,
+	NULL,
+};
+
+static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
+}
+
+/**
+ * xe_gt_ccs_mode_sysfs_init - Initialize CCS mode sysfs interfaces
+ * @gt: GT structure
+ *
+ * Through a per-gt 'ccs_mode' sysfs interface, the user can enable a fixed
+ * number of compute hardware engines to which the available compute slices
+ * are to be allocated. This user configuration change triggers a gt reset
+ * and it is expected that there are no open drm clients while doing so.
+ * The number of available compute slices is exposed to user through a per-gt
+ * 'num_cslices' sysfs interface.
+ */
+void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	if (!xe_gt_ccs_mode_enabled(gt))
+		return;
+
+	err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs);
+	if (err) {
+		drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
+	if (err) {
+		sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
index e8766879f6ec1..f39975aaaab0d 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -12,6 +12,7 @@
 #include "xe_platform_types.h"
 
 void xe_gt_apply_ccs_mode(struct xe_gt *gt);
+void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
 
 static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
 {
-- 
GitLab


From 78e2701a2614720d8c47b3a8490bf61c29718e8a Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Wed, 1 Nov 2023 19:02:53 +0000
Subject: [PATCH 2195/2340] drm/xe: Avoid any races around ccs_mode update

Ensure that there are no drm clients when changing CCS mode.
Allow exec_queue creation only with enabled CCS engines.

v2: Rebase

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       |  9 +++++++++
 drivers/gpu/drm/xe/xe_device_types.h |  9 +++++++++
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c  | 10 ++++++++++
 3 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 8423c817111bf..2e0b2e40d8f3b 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -73,6 +73,10 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 	mutex_init(&xef->exec_queue.lock);
 	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
 
+	spin_lock(&xe->clients.lock);
+	xe->clients.count++;
+	spin_unlock(&xe->clients.lock);
+
 	file->driver_priv = xef;
 	return 0;
 }
@@ -105,6 +109,10 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 	xa_destroy(&xef->vm.xa);
 	mutex_destroy(&xef->vm.lock);
 
+	spin_lock(&xe->clients.lock);
+	xe->clients.count--;
+	spin_unlock(&xe->clients.lock);
+
 	xe_drm_client_put(xef->client);
 	kfree(xef);
 }
@@ -225,6 +233,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	xe->info.force_execlist = xe_modparam.force_execlist;
 
 	spin_lock_init(&xe->irq.lock);
+	spin_lock_init(&xe->clients.lock);
 
 	init_waitqueue_head(&xe->ufence_wq);
 
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9a212dbdb8a49..58442da2f6c5c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -310,6 +310,15 @@ struct xe_device {
 		enum xe_sriov_mode __mode;
 	} sriov;
 
+	/** @clients: drm clients info */
+	struct {
+		/** @lock: Protects drm clients info */
+		spinlock_t lock;
+
+		/** @count: number of drm clients */
+		u64 count;
+	} clients;
+
 	/** @usm: unified memory state */
 	struct {
 		/** @asid: convert a ASID to VM */
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 723cf77c830a1..529fc286cd06c 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -105,6 +105,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
 	       const char *buff, size_t count)
 {
 	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+	struct xe_device *xe = gt_to_xe(gt);
 	u32 num_engines, num_slices;
 	int ret;
 
@@ -123,12 +124,21 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
 		return -EINVAL;
 	}
 
+	/* CCS mode can only be updated when there are no drm clients */
+	spin_lock(&xe->clients.lock);
+	if (xe->clients.count) {
+		spin_unlock(&xe->clients.lock);
+		return -EBUSY;
+	}
+
 	if (gt->ccs_mode != num_engines) {
 		xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
 		gt->ccs_mode = num_engines;
 		xe_gt_reset_async(gt);
 	}
 
+	spin_unlock(&xe->clients.lock);
+
 	return count;
 }
 
-- 
GitLab


From ed750833f165869abf5effed5e02418d754647b0 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 16:15:03 +0100
Subject: [PATCH 2196/2340] drm/xe: Define DRM_XE_DEBUG_SRIOV config

We will be using extra logs during enabling of the SR-IOV features
or when adding support for new platforms. Define separate config
flag to keep that low level logs disabled if we're not debugging.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231128151507.1015-2-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.debug | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 11bb13c73e7b6..549065f57a78e 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -40,6 +40,16 @@ config DRM_XE_DEBUG_VM
 
 	  If in doubt, say "N".
 
+config DRM_XE_DEBUG_SRIOV
+	bool "Enable extra SR-IOV debugging"
+	default n
+	help
+	  Enable extra SR-IOV debugging info.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
 config DRM_XE_DEBUG_MEM
 	bool "Enable passing SYS/VRAM addresses to user space"
 	default n
-- 
GitLab


From 5bcedc9eabdc6ecd7a11f1e6147f0f601d7cdc77 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 16:15:04 +0100
Subject: [PATCH 2197/2340] drm/xe: Introduce SR-IOV logging macros

To simplify logging and help identify SR-IOV specific messages
define set of helper macros that will prefix messages based on
the current SR-IOV mode.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231128151507.1015-3-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_sriov_printk.h | 46 ++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_sriov_printk.h

diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h
new file mode 100644
index 0000000000000..117e1d5416923
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_printk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PRINTK_H_
+#define _XE_SRIOV_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_sriov_types.h"
+
+#define xe_sriov_printk_prefix(xe) \
+	((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \
+	 (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "")
+
+#define xe_sriov_printk(xe, _level, fmt, ...) \
+	drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__)
+
+#define xe_sriov_err(xe, fmt, ...) \
+	xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_err_ratelimited(xe, fmt, ...) \
+	xe_sriov_printk((xe), err_ratelimited, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_warn(xe, fmt, ...) \
+	xe_sriov_printk((xe), warn, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_notice(xe, fmt, ...) \
+	xe_sriov_printk((xe), notice, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_info(xe, fmt, ...) \
+	xe_sriov_printk((xe), info, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_dbg(xe, fmt, ...) \
+	xe_sriov_printk((xe), dbg, fmt, ##__VA_ARGS__)
+
+/* for low level noisy debug messages */
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define xe_sriov_dbg_verbose(xe, fmt, ...) xe_sriov_dbg(xe, fmt, ##__VA_ARGS__)
+#else
+#define xe_sriov_dbg_verbose(xe, fmt, ...) typecheck(struct xe_device *, (xe))
+#endif
+
+#endif
-- 
GitLab


From b1d20405821812ad70d95eefe58cadc6d50b0917 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 16:15:05 +0100
Subject: [PATCH 2198/2340] drm/xe/pf: Introduce Local Memory Translation Table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Local Memory Translation Table (LMTT) provides additional
abstraction for Virtual Functions (VF) accessing device VRAM.

This code is based on prior work of Michal Winiarski.

In this patch we focus only on LMTT initialization. Remaining LMTT
functions will be used once we add a VF provisioning to the PF.

Bspec: 44117, 52404, 59314
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20231128151507.1015-4-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile             |   5 +
 drivers/gpu/drm/xe/regs/xe_sriov_regs.h |  17 +
 drivers/gpu/drm/xe/xe_device_types.h    |   9 +
 drivers/gpu/drm/xe/xe_gt.c              |  10 +
 drivers/gpu/drm/xe/xe_lmtt.c            | 502 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_lmtt.h            |  27 ++
 drivers/gpu/drm/xe/xe_lmtt_2l.c         | 150 +++++++
 drivers/gpu/drm/xe/xe_lmtt_ml.c         | 161 ++++++++
 drivers/gpu/drm/xe/xe_lmtt_types.h      |  63 +++
 9 files changed, 944 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_sriov_regs.h
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt.c
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt.h
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt_2l.c
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt_ml.c
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b9062e3ce6a9a..537828655da9b 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -137,6 +137,11 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o
 # graphics virtualization (SR-IOV) support
 xe-y += xe_sriov.o
 
+xe-$(CONFIG_PCI_IOV) += \
+	xe_lmtt.o \
+	xe_lmtt_2l.o \
+	xe_lmtt_ml.o
+
 # i915 Display compat #defines and #includes
 subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
 	-I$(srctree)/$(src)/display/ext \
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
new file mode 100644
index 0000000000000..58a4e0fad1e10
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _REGS_XE_SRIOV_REGS_H_
+#define _REGS_XE_SRIOV_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XE2_LMEM_CFG			XE_REG(0x48b0)
+
+#define LMEM_CFG			XE_REG(0xcf58)
+#define   LMEM_EN			REG_BIT(31)
+#define   LMTT_DIR_PTR			REG_GENMASK(30, 0) /* in multiples of 64KB */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 58442da2f6c5c..ffe7c6ef26a95 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -15,6 +15,7 @@
 #include "xe_devcoredump_types.h"
 #include "xe_heci_gsc.h"
 #include "xe_gt_types.h"
+#include "xe_lmtt_types.h"
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
 #include "xe_pmu.h"
@@ -186,6 +187,14 @@ struct xe_tile {
 		struct xe_sa_manager *kernel_bb_pool;
 	} mem;
 
+	/** @sriov: tile level virtualization data */
+	union {
+		struct {
+			/** @sriov.pf.lmtt: Local Memory Translation Table. */
+			struct xe_lmtt lmtt;
+		} pf;
+	} sriov;
+
 	/** @migrate: Migration helper for vram blits and clearing */
 	struct xe_migrate *migrate;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 154d6c7072b9d..6645fa158f095 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -36,6 +36,7 @@
 #include "xe_hw_fence.h"
 #include "xe_hw_engine_class_sysfs.h"
 #include "xe_irq.h"
+#include "xe_lmtt.h"
 #include "xe_lrc.h"
 #include "xe_map.h"
 #include "xe_migrate.h"
@@ -46,6 +47,7 @@
 #include "xe_ring_ops.h"
 #include "xe_sa.h"
 #include "xe_sched_job.h"
+#include "xe_sriov.h"
 #include "xe_tuning.h"
 #include "xe_uc.h"
 #include "xe_vm.h"
@@ -344,6 +346,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
 		if (err)
 			goto err_force_wake;
+		if (IS_SRIOV_PF(gt_to_xe(gt)))
+			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
 	}
 
 	err = xe_uc_init(&gt->uc);
@@ -460,6 +464,9 @@ static int all_fw_domain_init(struct xe_gt *gt)
 		xe_gt_apply_ccs_mode(gt);
 	}
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
 	xe_device_mem_access_put(gt_to_xe(gt));
@@ -560,6 +567,9 @@ static int do_gt_restart(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
 	xe_mocs_init(gt);
 	err = xe_uc_start(&gt->uc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
new file mode 100644
index 0000000000000..d5ada31ae633b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_sriov_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_lmtt.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+/**
+ * DOC: Local Memory Translation Table
+ *
+ * The Local Memory Translation Table (LMTT) provides additional abstraction
+ * when Virtual Function (VF) is accessing device Local Memory (VRAM).
+ *
+ * The Root LMTT Page Directory contains one entry for each VF. Entries are
+ * indexed by the function number (1-based, index 0 is unused).
+ *
+ * See `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ */
+
+#define lmtt_assert(lmtt, condition)	xe_tile_assert(lmtt_to_tile(lmtt), condition)
+#define lmtt_debug(lmtt, msg...)	xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg)
+
+static bool xe_has_multi_level_lmtt(struct xe_device *xe)
+{
+	return xe->info.platform == XE_PVC;
+}
+
+static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt)
+{
+	return container_of(lmtt, struct xe_tile, sriov.pf.lmtt);
+}
+
+static struct xe_device *lmtt_to_xe(struct xe_lmtt *lmtt)
+{
+	return tile_to_xe(lmtt_to_tile(lmtt));
+}
+
+static u64 lmtt_page_size(struct xe_lmtt *lmtt)
+{
+	return BIT_ULL(lmtt->ops->lmtt_pte_shift(0));
+}
+
+static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level)
+{
+	unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0;
+	struct xe_lmtt_pt *pt;
+	struct xe_bo *bo;
+	int err;
+
+	pt = kzalloc(struct_size(pt, entries, num_entries), GFP_KERNEL);
+	if (!pt) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
+				  PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+					     lmtt->ops->lmtt_pte_num(level)),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_free_pt;
+	}
+
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	pt->level = level;
+	pt->bo = bo;
+	return pt;
+
+out_free_pt:
+	kfree(pt);
+out:
+	return ERR_PTR(err);
+}
+
+static void lmtt_pt_free(struct xe_lmtt_pt *pt)
+{
+	xe_bo_unpin_map_no_vm(pt->bo);
+	kfree(pt);
+}
+
+static int lmtt_init_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd;
+
+	lmtt_assert(lmtt, !lmtt->pd);
+	lmtt_assert(lmtt, lmtt->ops->lmtt_root_pd_level());
+
+	pd = lmtt_pt_alloc(lmtt, lmtt->ops->lmtt_root_pd_level());
+	if (IS_ERR(pd))
+		return PTR_ERR(pd);
+
+	lmtt->pd = pd;
+	return 0;
+}
+
+static void lmtt_fini_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	unsigned int num_entries = lmtt->ops->lmtt_pte_num(pd->level);
+	unsigned int n = 0;
+
+	/* make sure we don't leak */
+	for (n = 0; n < num_entries; n++)
+		lmtt_assert(lmtt, !pd->entries[n]);
+
+	lmtt->pd = NULL;
+	lmtt_pt_free(pd);
+}
+
+static void fini_lmtt(struct drm_device *drm, void *arg)
+{
+	struct xe_lmtt *lmtt = arg;
+
+	lmtt_assert(lmtt, !(!!lmtt->ops ^ !!lmtt->pd));
+
+	if (!lmtt->pd)
+		return;
+
+	lmtt_fini_pd(lmtt);
+	lmtt->ops = NULL;
+}
+
+/**
+ * xe_lmtt_init - LMTT software initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * The LMTT initialization requires two steps.
+ *
+ * The xe_lmtt_init() checks if LMTT is required on current device and selects
+ * and initialize proper variant of the LMTT Root Directory. Currently supported
+ * variants are `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ *
+ * In next step xe_lmtt_init_hw() will register this directory on the hardware.
+ *
+ * Notes:
+ * The LMTT allocations are managed and will be implicitly released on driver unload.
+ * This function shall be called only once and only when running as a PF driver.
+ * Any LMTT initialization failure should block VFs enabling.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_init(struct xe_lmtt *lmtt)
+{
+	struct xe_device *xe = lmtt_to_xe(lmtt);
+	int err;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(xe));
+	lmtt_assert(lmtt, !lmtt->ops);
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	if (xe_has_multi_level_lmtt(xe))
+		lmtt->ops = &lmtt_ml_ops;
+	else
+		lmtt->ops = &lmtt_2l_ops;
+
+	err = lmtt_init_pd(lmtt);
+	if (unlikely(err))
+		goto fail;
+
+	return drmm_add_action_or_reset(&xe->drm, fini_lmtt, lmtt);
+
+fail:
+	lmtt->ops = NULL;
+	return err;
+}
+
+static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
+{
+	struct xe_tile *tile = lmtt_to_tile(lmtt);
+	struct xe_device *xe = tile_to_xe(tile);
+	dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE);
+
+	lmtt_debug(lmtt, "DIR offset %pad\n", &offset);
+	lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
+	lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
+
+	xe_mmio_write32(tile->primary_gt,
+			GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
+			LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
+}
+
+/**
+ * xe_lmtt_init_hw - Perform LMTT hardware initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * This function is a second step of the LMTT initialization.
+ * This function registers LMTT Root Directory prepared in xe_lmtt_init().
+ *
+ * This function shall be called after every hardware reset.
+ * This function shall be called only when running as a PF driver.
+ */
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
+{
+	if (!lmtt->pd)
+		return;
+
+	lmtt_setup_dir_ptr(lmtt);
+}
+
+static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
+			   u64 pte, unsigned int idx)
+{
+	unsigned int level = pt->level;
+
+	lmtt_assert(lmtt, idx <= lmtt->ops->lmtt_pte_num(level));
+	lmtt_debug(lmtt, "WRITE level=%u index=%u pte=%#llx\n", level, idx, pte);
+
+	switch (lmtt->ops->lmtt_pte_size(level)) {
+	case sizeof(u32):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
+		break;
+	case sizeof(u64):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
+		break;
+	default:
+		lmtt_assert(lmtt, !!!"invalid pte size");
+	}
+}
+
+static void lmtt_destroy_pt(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd)
+{
+	unsigned int num_entries = pd->level ? lmtt->ops->lmtt_pte_num(pd->level) : 0;
+	struct xe_lmtt_pt *pt;
+	unsigned int i;
+
+	for (i = 0; i < num_entries; i++) {
+		pt = pd->entries[i];
+		pd->entries[i] = NULL;
+		if (!pt)
+			continue;
+
+		lmtt_destroy_pt(lmtt, pt);
+	}
+
+	lmtt_pt_free(pd);
+}
+
+static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	pt = pd->entries[vfid];
+	pd->entries[vfid] = NULL;
+	if (!pt)
+		return;
+
+	lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid);
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, pt->level == pd->level - 1);
+	lmtt_destroy_pt(lmtt, pt);
+}
+
+static int __lmtt_alloc_range(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd,
+			      u64 start, u64 end)
+{
+	u64 pte_addr_shift = BIT_ULL(lmtt->ops->lmtt_pte_shift(pd->level));
+	u64 offset;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+
+	offset = start;
+	while (offset < end) {
+		struct xe_lmtt_pt *pt;
+		u64 next, pde, pt_addr;
+		unsigned int idx;
+
+		pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+		if (IS_ERR(pt))
+			return PTR_ERR(pt);
+
+		pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+		idx = lmtt->ops->lmtt_pte_index(offset, pd->level);
+		pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+		lmtt_write_pte(lmtt, pd, pde, idx);
+
+		pd->entries[idx] = pt;
+
+		next = min(end, round_up(offset + 1, pte_addr_shift));
+
+		if (pt->level != 0) {
+			err = __lmtt_alloc_range(lmtt, pt, offset, next);
+			if (err)
+				return err;
+		}
+
+		offset = next;
+	}
+
+	return 0;
+}
+
+static int lmtt_alloc_range(struct xe_lmtt *lmtt, unsigned int vfid, u64 start, u64 end)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+	u64 pt_addr;
+	u64 pde;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	lmtt_assert(lmtt, IS_ALIGNED(start, lmtt_page_size(lmtt)));
+	lmtt_assert(lmtt, IS_ALIGNED(end, lmtt_page_size(lmtt)));
+
+	if (pd->entries[vfid])
+		return -ENOTEMPTY;
+
+	pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+	if (IS_ERR(pt))
+		return PTR_ERR(pt);
+
+	pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+	pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+	lmtt_write_pte(lmtt, pd, pde, vfid);
+
+	pd->entries[vfid] = pt;
+
+	if (pt->level != 0) {
+		err = __lmtt_alloc_range(lmtt, pt, start, end);
+		if (err)
+			goto out_free_pt;
+	}
+
+	return 0;
+
+out_free_pt:
+	lmtt_pt_free(pt);
+	return err;
+}
+
+static struct xe_lmtt_pt *lmtt_leaf_pt(struct xe_lmtt *lmtt, unsigned int vfid, u64 addr)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	pt = pd->entries[vfid];
+
+	while (pt->level) {
+		lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+			    lmtt->ops->lmtt_pte_num(pt->level));
+
+		pt = pt->entries[lmtt->ops->lmtt_pte_index(addr, pt->level)];
+
+		addr >>= lmtt->ops->lmtt_pte_shift(pt->level);
+	}
+
+	lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+		    lmtt->ops->lmtt_pte_num(pt->level));
+	lmtt_assert(lmtt, pt->level != pd->level);
+	lmtt_assert(lmtt, pt->level == 0);
+	return pt;
+}
+
+static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 start)
+{
+	u64 page_size = lmtt_page_size(lmtt);
+	struct xe_res_cursor cur;
+	struct xe_lmtt_pt *pt;
+	u64 addr, vram_offset;
+
+	lmtt_assert(lmtt, IS_ALIGNED(start, page_size));
+	lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size));
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	vram_offset = vram_region_gpu_offset(bo->ttm.resource);
+	xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
+	while (cur.remaining) {
+		addr = xe_res_dma(&cur);
+		addr += vram_offset; /* XXX */
+
+		pt = lmtt_leaf_pt(lmtt, vfid, start);
+
+		lmtt_write_pte(lmtt, pt, lmtt->ops->lmtt_pte_encode(addr, 0),
+					 lmtt->ops->lmtt_pte_index(start, 0));
+
+		xe_res_next(&cur, page_size);
+		start += page_size;
+	}
+}
+
+/**
+ * xe_lmtt_prepare_pages - Create VF's LMTT Page Tables.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @range: top range of LMEM offset to be supported
+ *
+ * This function creates empty LMTT page tables for given VF to support
+ * up to maximum #range LMEM offset. The LMTT page tables created by this
+ * function must be released using xe_lmtt_drop_pages() function.
+ *
+ * Notes:
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	return lmtt_alloc_range(lmtt, vfid, 0, range);
+}
+
+/**
+ * xe_lmtt_populate_pages - Update VF's LMTT Page Table Entries.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @bo: the buffer object with LMEM allocation to be mapped
+ * @offset: the offset at which #bo should be mapped
+ *
+ * This function updates VF's LMTT entries to use given buffer object as a backstore.
+ *
+ * Notes:
+ * This function shall be called only after successful preparation of the
+ * VF's LMTT Page Tables. See xe_lmtt_prepare().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_insert_bo(lmtt, vfid, bo, offset);
+	return 0;
+}
+
+/**
+ * xe_lmtt_drop_pages - Remove VF's LMTT Pages.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ *
+ * This function removes all LMTT Page Tables prepared by xe_lmtt_prepare_pages().
+ *
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ */
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_drop_pages(lmtt, vfid);
+}
+
+/**
+ * xe_lmtt_estimate_pt_size - Estimate size of LMTT PT allocations.
+ * @lmtt: the &xe_lmtt
+ * @size: the size of the LMEM to be mapped over LMTT (including any offset)
+ *
+ * This function shall be called only by PF.
+ *
+ * Return: size of the PT allocation(s) needed to support given LMEM size.
+ */
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size)
+{
+	unsigned int level = 0;
+	u64 pt_size;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, lmtt->ops);
+
+	pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+			     lmtt->ops->lmtt_pte_num(level));
+
+	while (++level < lmtt->ops->lmtt_root_pd_level()) {
+		pt_size *= lmtt->ops->lmtt_pte_index(size, level) + 1;
+		pt_size += PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+				      lmtt->ops->lmtt_pte_num(level));
+	}
+
+	return pt_size;
+}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h
new file mode 100644
index 0000000000000..cb10ef994db6c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_H_
+#define _XE_LMTT_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_ops;
+
+#ifdef CONFIG_PCI_IOV
+int xe_lmtt_init(struct xe_lmtt *lmtt);
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt);
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range);
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset);
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid);
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size);
+#else
+static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; }
+static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt_2l.c b/drivers/gpu/drm/xe/xe_lmtt_2l.c
new file mode 100644
index 0000000000000..84bc5c4212b52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_2l.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Two-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 37 bit (128GB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 37 bit (128GB)
+ *
+ * The following figure illustrates the structure and function of the 2L LMTT::
+ *
+ *            LMTT Directory
+ *           (1 Entry per VF)
+ *            +-----------+                     LMTT (per VF)
+ *            |           |                     +-----------+
+ *            |           |                     |           |
+ *            |           |          index:     |           |
+ *            |           |          LMEM VF    +===========+
+ *            |           |          offset --> |    PTE    | ==> LMEM PF offset
+ *            |           |                     +===========+
+ *   index:   +===========+                     |           |
+ *   VFID --> |    PDE    |  -----------------> +-----------+
+ *            +===========+                    /              \.
+ *            |           |                   /                 \.
+ *            |           |                  /                    \.
+ *            |           |                 /                       \.
+ *            +-----------+ <== [LMTT Directory Ptr]                  \.
+ *           /             \              /                             \.
+ *          /               \         +-----------+-----------------+------+---+
+ *         /                 \        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                   \       +===========+=================+======+===+
+ *       /                     \      |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                       \     +-----------+-----------------+------+---+
+ *     /                         \.
+ *   +-----------+-----------------+------+---+
+ *   | 31:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *   +===========+=================+======+===+
+ *   |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *   +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u32 lmtt_2l_pde_t;
+typedef u32 lmtt_2l_pte_t;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LMTT_2L_128GB)
+#define LMTT_2L_HAW			37 /* 128 GiB */
+#else
+#define LMTT_2L_HAW			35 /* 32 GiB */
+#endif
+
+#define LMTT_2L_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_2L_PDE_LMTT_PTR		GENMASK(LMTT_2L_HAW - 13, 4)
+#define LMTT_2L_PDE_VALID		BIT(0)
+
+#define LMTT_2L_PTE_MAX_NUM		BIT(LMTT_2L_HAW - ilog2(SZ_2M))
+#define LMTT_2L_PTE_LMEM_PAGE		GENMASK(LMTT_2L_HAW - 17, 5)
+#define LMTT_2L_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_2l_root_pd_level(void)
+{
+	return 1; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_2l_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return LMTT_2L_PDE_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_2L_HAW == 37 && LMTT_2L_PTE_MAX_NUM != SZ_64K);
+		BUILD_BUG_ON(LMTT_2L_HAW == 35 && LMTT_2L_PTE_MAX_NUM != SZ_16K);
+		return LMTT_2L_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return sizeof(lmtt_2l_pde_t);
+	case 0:
+		return sizeof(lmtt_2l_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_2l_pte_shift(level);
+
+	switch (level) {
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_2L_PTE_MAX_NUM);
+		return addr & (LMTT_2L_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_2l_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_2L_PTE_VALID;
+	case 1:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_2L_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_2l_ops = {
+	.lmtt_root_pd_level = lmtt_2l_root_pd_level,
+	.lmtt_pte_num = lmtt_2l_pte_num,
+	.lmtt_pte_size = lmtt_2l_pte_size,
+	.lmtt_pte_shift = lmtt_2l_pte_shift,
+	.lmtt_pte_index = lmtt_2l_pte_index,
+	.lmtt_pte_encode = lmtt_2l_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_ml.c b/drivers/gpu/drm/xe/xe_lmtt_ml.c
new file mode 100644
index 0000000000000..b21215a2edd6f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_ml.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Multi-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 48 bit (256TB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 48 bit (256TB)
+ *
+ * The following figure illustrates the structure and function of the ML LMTT::
+ *
+ *           LMTT L3 Directory
+ *           (1 Entry per VF)                                       LMTT L1 Leaf
+ *            +-----------+                                         +-----------+
+ *            |           |             LMTT L2 (per VF)            |           |
+ *            |           |              +-----------+              |           |
+ *            |           |              |           |     index:   +===========+
+ *            |           |              |           |     GDPA --> |    PTE    | => LMEM PF offset
+ *            |           |              |           |     34:21    +===========+
+ *            |           |    index:    |           |              |           |
+ *            |           |    LMEM VF   +===========+              |           |
+ *            |           |    offset -> |    PTE    |  ----------> +-----------+
+ *            |           |    GAW-1:35  +===========+              /           \.
+ *   index:   +===========+              |           |             /              \.
+ *   VFID --> |    PDE    |  --------->  +-----------+            /                 \.
+ *            +===========+             /           /            /                    \.
+ *            |           |           /            /            /                       \.
+ *            +-----------+  <== [LMTT Directory Ptr]          /                          \.
+ *           /             \      /              /            /                             \.
+ *          /                \  /               /       +-----------+-----------------+------+---+
+ *         /                  /\               /        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                 /    \            /         +===========+=================+======+===+
+ *       /                /        \         /          |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                                   /           +-----------+-----------------+------+---+
+ *     /                                   /
+ *  +-----------+-----------------+------+---+
+ *  | 63:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *  +===========+=================+======+===+
+ *  |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *  +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u64 lmtt_ml_pde_t;
+typedef u32 lmtt_ml_pte_t;
+
+#define LMTT_ML_HAW			48 /* 256 TiB */
+
+#define LMTT_ML_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_ML_PDE_LMTT_PTR		GENMASK_ULL(LMTT_ML_HAW - 13, 4)
+#define LMTT_ML_PDE_VALID		BIT(0)
+
+#define LMTT_ML_PDE_L2_SHIFT		35
+#define LMTT_ML_PDE_L2_MAX_NUM		BIT_ULL(LMTT_ML_HAW - 35)
+
+#define LMTT_ML_PTE_MAX_NUM		BIT(35 - ilog2(SZ_2M))
+#define LMTT_ML_PTE_LMEM_PAGE		GENMASK(LMTT_ML_HAW - 17, 5)
+#define LMTT_ML_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_ml_root_pd_level(void)
+{
+	return 2; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_ml_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 2:
+		return LMTT_ML_PDE_MAX_NUM;
+	case 1:
+		BUILD_BUG_ON(LMTT_ML_HAW == 48 && LMTT_ML_PDE_L2_MAX_NUM != SZ_8K);
+		return LMTT_ML_PDE_L2_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_ML_PTE_MAX_NUM != SZ_16K);
+		return LMTT_ML_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 2:
+	case 1:
+		return sizeof(lmtt_ml_pde_t);
+	case 0:
+		return sizeof(lmtt_ml_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		BUILD_BUG_ON(BIT_ULL(LMTT_ML_PDE_L2_SHIFT) != SZ_32G);
+		return ilog2(SZ_32G);
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_ml_pte_shift(level);
+
+	switch (level) {
+	case 1:
+		/* SZ_32G increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PDE_L2_MAX_NUM);
+		return addr & (LMTT_ML_PDE_L2_MAX_NUM - 1);
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PTE_MAX_NUM);
+		return addr & (LMTT_ML_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_ml_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_ML_PTE_VALID;
+	case 1:
+	case 2:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_ML_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_ml_ops = {
+	.lmtt_root_pd_level = lmtt_ml_root_pd_level,
+	.lmtt_pte_num = lmtt_ml_pte_num,
+	.lmtt_pte_size = lmtt_ml_pte_size,
+	.lmtt_pte_shift = lmtt_ml_pte_shift,
+	.lmtt_pte_index = lmtt_ml_pte_index,
+	.lmtt_pte_encode = lmtt_ml_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_types.h b/drivers/gpu/drm/xe/xe_lmtt_types.h
new file mode 100644
index 0000000000000..b37abad234164
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_types.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_TYPES_H_
+#define _XE_LMTT_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_pt;
+struct xe_lmtt_ops;
+
+#define LMTT_PTE_INVALID	ULL(0)
+
+/**
+ * struct xe_lmtt - Local Memory Translation Table Manager
+ */
+struct xe_lmtt {
+	/** @pd: root LMTT Directory */
+	struct xe_lmtt_pt *pd;
+
+	/** @ops: LMTT functions */
+	const struct xe_lmtt_ops *ops;
+};
+
+/**
+ * struct xe_lmtt_pt - Local Memory Translation Table Page Table
+ *
+ * Represents single level of the LMTT.
+ */
+struct xe_lmtt_pt {
+	/** @level: page table level, 0 is leaf */
+	unsigned int level;
+
+	/** @bo: buffer object with actual LMTT PTE values */
+	struct xe_bo *bo;
+
+	/** @entries: leaf page tables, exist only for root/non-leaf */
+	struct xe_lmtt_pt *entries[];
+};
+
+/**
+ * struct xe_lmtt_ops - Local Memory Translation Table Operations
+ *
+ * Provides abstraction of the LMTT variants.
+ */
+struct xe_lmtt_ops {
+	/* private: */
+	unsigned int (*lmtt_root_pd_level)(void);
+	unsigned int (*lmtt_pte_num)(unsigned int level);
+	unsigned int (*lmtt_pte_size)(unsigned int level);
+	unsigned int (*lmtt_pte_shift)(unsigned int level);
+	unsigned int (*lmtt_pte_index)(u64 addr, unsigned int level);
+	u64 (*lmtt_pte_encode)(unsigned long offset, unsigned int level);
+};
+
+extern const struct xe_lmtt_ops lmtt_2l_ops;
+extern const struct xe_lmtt_ops lmtt_ml_ops;
+
+#endif
-- 
GitLab


From a43ac2de4c1c788a8731940470a7de77dd60ccea Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 16:15:06 +0100
Subject: [PATCH 2199/2340] drm/xe/kunit: Enable CONFIG_PCI_IOV in .kunitconfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will add kunit tests for the PF specific code that is by default
enabled only under CONFIG_PCI_IOV. Update our .kunitconfig to allow
running those test cases by our CI.

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20231128151507.1015-5-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/.kunitconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index 3769af94e3918..9590eac91af39 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -1,6 +1,7 @@
 # xe dependencies
 CONFIG_KUNIT=y
 CONFIG_PCI=y
+CONFIG_PCI_IOV=y
 CONFIG_DEBUG_FS=y
 CONFIG_DRM=y
 CONFIG_DRM_FBDEV_EMULATION=y
-- 
GitLab


From f1a5d808b2a69304d0df06e23f4465a278b2cdd8 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 28 Nov 2023 16:15:07 +0100
Subject: [PATCH 2200/2340] drm/xe/kunit: Add test for LMTT operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LMTT variants are abstracted with xe_lmtt_ops. Make sure that
both 2L and ML ops implementations are correct.

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20231128151507.1015-6-michal.wajdeczko@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_lmtt_test.c | 73 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_lmtt.c            |  4 ++
 2 files changed, 77 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_lmtt_test.c

diff --git a/drivers/gpu/drm/xe/tests/xe_lmtt_test.c b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c
new file mode 100644
index 0000000000000..1f1557c45ae1c
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+static const struct lmtt_ops_param {
+	const char *desc;
+	const struct xe_lmtt_ops *ops;
+} lmtt_ops_params[] = {
+	{ "2-level", &lmtt_2l_ops, },
+	{ "multi-level", &lmtt_ml_ops, },
+};
+
+static void lmtt_ops_param_get_desc(const struct lmtt_ops_param *p, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", p->desc);
+}
+
+KUNIT_ARRAY_PARAM(lmtt_ops, lmtt_ops_params, lmtt_ops_param_get_desc);
+
+static void test_ops(struct kunit *test)
+{
+	const struct lmtt_ops_param *p = test->param_value;
+	const struct xe_lmtt_ops *ops = p->ops;
+	unsigned int n;
+
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_root_pd_level);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_num);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_size);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_shift);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_index);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_encode);
+
+	KUNIT_EXPECT_NE(test, ops->lmtt_root_pd_level(), 0);
+
+	for (n = 0; n <= ops->lmtt_root_pd_level(); n++) {
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_num(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_size(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_encode(0, n), LMTT_PTE_INVALID,
+				    "level=%u", n);
+	}
+
+	for (n = 0; n < ops->lmtt_root_pd_level(); n++) {
+		u64 addr = BIT_ULL(ops->lmtt_pte_shift(n));
+
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_shift(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr - 1, n), 0,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr + 1, n), 1,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2 - 1, n), 1,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2, n), 2,
+				    "addr=%#llx level=%u", addr, n);
+	}
+}
+
+static struct kunit_case lmtt_test_cases[] = {
+	KUNIT_CASE_PARAM(test_ops, lmtt_ops_gen_params),
+	{}
+};
+
+static struct kunit_suite lmtt_suite = {
+	.name = "lmtt",
+	.test_cases = lmtt_test_cases,
+};
+
+kunit_test_suites(&lmtt_suite);
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index d5ada31ae633b..0d7c5514e0928 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -500,3 +500,7 @@ u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size)
 
 	return pt_size;
 }
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_lmtt_test.c"
+#endif
-- 
GitLab


From 5a92da34ddb4ec75a037d4a956afa993876c67d4 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Tue, 5 Dec 2023 06:52:35 -0800
Subject: [PATCH 2201/2340] drm/xe: Rename info.supports_* to info.has_*

Rename supports_mmio_ext and supports_usm to use a has_ prefix so the
flags are grouped together. This settles on just one variant for
positive info matching ("has_") and one for negative ("skip_").

Also make sure the has_* flags are grouped together in xe_pci.c.

Reviewed-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://lore.kernel.org/r/20231205145235.2114761-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  4 ++--
 drivers/gpu/drm/xe/xe_debugfs.c       |  2 +-
 drivers/gpu/drm/xe/xe_device_types.h  |  8 ++++----
 drivers/gpu/drm/xe/xe_exec_queue.c    |  6 +++---
 drivers/gpu/drm/xe/xe_gt.c            |  2 +-
 drivers/gpu/drm/xe/xe_gt.h            |  2 +-
 drivers/gpu/drm/xe/xe_gt_pagefault.c  |  4 ++--
 drivers/gpu/drm/xe/xe_guc_ads.c       |  4 ++--
 drivers/gpu/drm/xe/xe_hw_engine.c     |  4 ++--
 drivers/gpu/drm/xe/xe_lrc.c           |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c       | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_mmio.c          |  5 +++--
 drivers/gpu/drm/xe/xe_pci.c           | 20 ++++++++++----------
 drivers/gpu/drm/xe/xe_pci_types.h     |  2 +-
 drivers/gpu/drm/xe/xe_vm.c            |  2 +-
 15 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 83d6a66ed3695..47fcd6e6b7777 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -37,7 +37,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
 			  struct xe_bb *bb, u32 second_idx, const char *str,
 			  struct kunit *test)
 {
-	u64 batch_base = xe_migrate_batch_base(m, xe->info.supports_usm);
+	u64 batch_base = xe_migrate_batch_base(m, xe->info.has_usm);
 	struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb,
 							      batch_base,
 							      second_idx);
@@ -308,7 +308,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		goto free_pt;
 	}
 
-	bb = xe_bb_new(tile->primary_gt, 32, xe->info.supports_usm);
+	bb = xe_bb_new(tile->primary_gt, 32, xe->info.has_usm);
 	if (IS_ERR(bb)) {
 		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
 			   PTR_ERR(bb));
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 2de8a0b9da183..f1e80be8b9306 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -53,8 +53,8 @@ static int info(struct seq_file *m, void *data)
 	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
 	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
 	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
-	drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm));
 	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
+	drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
 	for_each_gt(gt, xe, id) {
 		drm_printf(&p, "gt%d force wake %d\n", id,
 			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index ffe7c6ef26a95..d1a48456e9a31 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -251,8 +251,6 @@ struct xe_device {
 
 		/** @is_dgfx: is discrete device */
 		u8 is_dgfx:1;
-		/** @supports_usm: Supports unified shared memory */
-		u8 supports_usm:1;
 		/** @has_asid: Has address space ID */
 		u8 has_asid:1;
 		/** @force_execlist: Forced execlist submission */
@@ -261,18 +259,20 @@ struct xe_device {
 		u8 has_flat_ccs:1;
 		/** @has_llc: Device has a shared CPU+GPU last level cache */
 		u8 has_llc:1;
+		/** @has_mmio_ext: Device has extra MMIO address range */
+		u8 has_mmio_ext:1;
 		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
 		/** @has_sriov: Supports SR-IOV */
 		u8 has_sriov:1;
+		/** @has_usm: Device has unified shared memory support */
+		u8 has_usm:1;
 		/** @enable_display: display enabled */
 		u8 enable_display:1;
 		/** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
 		u8 skip_mtcfg:1;
 		/** @skip_pcode: skip access to PCODE uC */
 		u8 skip_pcode:1;
-		/** @supports_mmio_ext: supports MMIO extension/s */
-		u8 supports_mmio_ext:1;
 		/** @has_heci_gscfi: device has heci gscfi */
 		u8 has_heci_gscfi:1;
 		/** @skip_guc_pc: Skip GuC based PM feature init */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 985807d6abbb6..85bc25fe99edc 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -362,7 +362,7 @@ static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
+	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
 		return -EINVAL;
 
 	q->usm.acc_trigger = value;
@@ -376,7 +376,7 @@ static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
+	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
 		return -EINVAL;
 
 	q->usm.acc_notify = value;
@@ -390,7 +390,7 @@ static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_q
 	if (XE_IOCTL_DBG(xe, !create))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, !xe->info.supports_usm))
+	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
 		return -EINVAL;
 
 	if (value > DRM_XE_ACC_GRANULARITY_64M)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 6645fa158f095..fe3c7aac2369f 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -435,7 +435,7 @@ static int all_fw_domain_init(struct xe_gt *gt)
 		/*
 		 * USM has its only SA pool to non-block behind user operations
 		 */
-		if (gt_to_xe(gt)->info.supports_usm) {
+		if (gt_to_xe(gt)->info.has_usm) {
 			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16);
 			if (IS_ERR(gt->usm.bb_pool)) {
 				err = PTR_ERR(gt->usm.bb_pool);
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index a818cc9c8fd09..f3c780bd266dd 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -65,7 +65,7 @@ static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
-	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 		hwe->instance == gt->usm.reserved_bcs_instance;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index a5358064a4e0f..4489aadc7a525 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -398,7 +398,7 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	int i;
 
-	if (!xe->info.supports_usm)
+	if (!xe->info.has_usm)
 		return 0;
 
 	for (i = 0; i < NUM_PF_QUEUE; ++i) {
@@ -431,7 +431,7 @@ void xe_gt_pagefault_reset(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	int i;
 
-	if (!xe->info.supports_usm)
+	if (!xe->info.has_usm)
 		return;
 
 	for (i = 0; i < NUM_PF_QUEUE; ++i) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index ab115588f88ba..390e6f1bf4e1c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -141,7 +141,7 @@ static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
 {
 	struct xe_device *xe = ads_to_xe(ads);
 
-	if (!xe->info.supports_usm)
+	if (!xe->info.has_usm)
 		return 0;
 
 	return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
@@ -598,7 +598,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
 	guc_capture_list_init(ads);
 	guc_doorbell_init(ads);
 
-	if (xe->info.supports_usm) {
+	if (xe->info.has_usm) {
 		guc_um_init_params(ads);
 		ads_blob_write(ads, ads.um_init_data, base +
 			       offsetof(struct __guc_ads_blob, um_init_params));
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c56e7cec350ec..86b863b990658 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -464,7 +464,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 		xe_hw_engine_enable_ring(hwe);
 
 	/* We reserve the highest BCS instance for USM */
-	if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
 		gt->usm.reserved_bcs_instance = hwe->instance;
 
 	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
@@ -879,6 +879,6 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
 	    hwe->logical_instance >= gt->ccs_mode)
 		return true;
 
-	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 		hwe->instance == gt->usm.reserved_bcs_instance;
 }
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 17c0eb9e62cfb..d6dfbd0bdc70e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -759,7 +759,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
 				     (q->usm.acc_granularity <<
 				      ACC_GRANULARITY_S) | vm->usm.asid);
-	if (xe->info.supports_usm && vm)
+	if (xe->info.has_usm && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
 				     (q->usm.acc_notify << ACC_NOTIFY_S) |
 				     q->usm.acc_trigger);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 84e138df01722..2ca927f3fb2af 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -217,7 +217,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	if (!IS_DGFX(xe)) {
 		/* Write out batch too */
 		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
-		if (xe->info.supports_usm) {
+		if (xe->info.has_usm) {
 			batch = tile->primary_gt->usm.bb_pool->bo;
 			m->usm_batch_base_ofs = m->batch_base_ofs;
 		}
@@ -237,7 +237,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
 		m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
 
-		if (xe->info.supports_usm) {
+		if (xe->info.has_usm) {
 			batch = tile->primary_gt->usm.bb_pool->bo;
 			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
 			m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
@@ -374,7 +374,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		return ERR_PTR(err);
 	}
 
-	if (xe->info.supports_usm) {
+	if (xe->info.has_usm) {
 		struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
 							   XE_ENGINE_CLASS_COPY,
 							   primary_gt->usm.reserved_bcs_instance,
@@ -397,7 +397,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		xe_vm_close_and_put(vm);
 		return ERR_CAST(m->q);
 	}
-	if (xe->info.supports_usm)
+	if (xe->info.has_usm)
 		m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
 
 	mutex_init(&m->job_mutex);
@@ -706,7 +706,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		u32 update_idx;
 		u64 ccs_ofs, ccs_size;
 		u32 ccs_pt;
-		bool usm = xe->info.supports_usm;
+		bool usm = xe->info.has_usm;
 
 		src_L0 = xe_migrate_res_sizes(&src_it);
 		dst_L0 = xe_migrate_res_sizes(&dst_it);
@@ -956,7 +956,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		struct xe_sched_job *job;
 		struct xe_bb *bb;
 		u32 batch_size, update_idx;
-		bool usm = xe->info.supports_usm;
+		bool usm = xe->info.has_usm;
 
 		clear_L0 = xe_migrate_res_sizes(&src_it);
 		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
@@ -1227,7 +1227,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
 	u64 addr;
 	int err = 0;
-	bool usm = !q && xe->info.supports_usm;
+	bool usm = !q && xe->info.has_usm;
 	bool first_munmap_rebind = vma &&
 		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
 	struct xe_exec_queue *q_override = !q ? m->q : q;
@@ -1264,7 +1264,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 */
 	xe_tile_assert(tile, batch_size < SZ_128K);
 
-	bb = xe_bb_new(gt, batch_size, !q && xe->info.supports_usm);
+	bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm);
 	if (IS_ERR(bb))
 		return ERR_CAST(bb);
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 35aeb50b71589..f660cfb79f504 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -333,11 +333,12 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 	}
 
 add_mmio_ext:
-	/* By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
+	/*
+	 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
 	 * When supported, there could be an additional contiguous multi-tile MMIO extension
 	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
 	 */
-	if (xe->info.supports_mmio_ext) {
+	if (xe->info.has_mmio_ext) {
 		regs = xe->mmio.regs + tile_mmio_size * tile_count;
 
 		for_each_tile(tile, xe, id) {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1488903573139..34dcc743e2c7c 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -60,15 +60,15 @@ struct xe_device_desc {
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
+
 	u8 has_display:1;
 	u8 has_heci_gscfi:1;
-
 	u8 has_llc:1;
+	u8 has_mmio_ext:1;
 	u8 has_sriov:1;
+	u8 skip_guc_pc:1;
 	u8 skip_mtcfg:1;
 	u8 skip_pcode:1;
-	u8 supports_mmio_ext:1;
-	u8 skip_guc_pc:1;
 };
 
 __diag_push();
@@ -148,7 +148,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
 
 	.has_asid = 1,
 	.has_flat_ccs = 0,
-	.supports_usm = 1,
+	.has_usm = 1,
 };
 
 static const struct xe_graphics_desc graphics_xelpg = {
@@ -166,7 +166,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 	.has_asid = 1, \
 	.has_flat_ccs = 0 /* FIXME: implementation missing */, \
 	.has_range_tlb_invalidation = 1, \
-	.supports_usm = 0 /* FIXME: implementation missing */, \
+	.has_usm = 0 /* FIXME: implementation missing */, \
 	.va_bits = 48, \
 	.vm_max_level = 4, \
 	.hw_engine_mask = \
@@ -279,8 +279,8 @@ static const struct xe_device_desc dg1_desc = {
 	DGFX_FEATURES,
 	PLATFORM(XE_DG1),
 	.has_display = true,
-	.require_force_probe = true,
 	.has_heci_gscfi = 1,
+	.require_force_probe = true,
 };
 
 static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
@@ -321,8 +321,8 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	DGFX_FEATURES,
 	PLATFORM(XE_PVC),
 	.has_display = false,
-	.require_force_probe = true,
 	.has_heci_gscfi = 1,
+	.require_force_probe = true,
 };
 
 static const struct xe_device_desc mtl_desc = {
@@ -550,11 +550,11 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
 	xe->info.has_llc = desc->has_llc;
+	xe->info.has_mmio_ext = desc->has_mmio_ext;
 	xe->info.has_sriov = desc->has_sriov;
+	xe->info.skip_guc_pc = desc->skip_guc_pc;
 	xe->info.skip_mtcfg = desc->skip_mtcfg;
 	xe->info.skip_pcode = desc->skip_pcode;
-	xe->info.supports_mmio_ext = desc->supports_mmio_ext;
-	xe->info.skip_guc_pc = desc->skip_guc_pc;
 
 	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
 				  xe_modparam.enable_display &&
@@ -616,10 +616,10 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.vram_flags = graphics_desc->vram_flags;
 	xe->info.va_bits = graphics_desc->va_bits;
 	xe->info.vm_max_level = graphics_desc->vm_max_level;
-	xe->info.supports_usm = graphics_desc->supports_usm;
 	xe->info.has_asid = graphics_desc->has_asid;
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
+	xe->info.has_usm = graphics_desc->has_usm;
 
 	/*
 	 * All platforms have at least one primary GT.  Any platform with media
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index dd3546ba6f905..b1ad12fa22d65 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -27,7 +27,7 @@ struct xe_graphics_desc {
 	u8 has_asid:1;
 	u8 has_flat_ccs:1;
 	u8 has_range_tlb_invalidation:1;
-	u8 supports_usm:1;
+	u8 has_usm:1;
 };
 
 struct xe_media_desc {
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index f71285e8ef108..265cc0c5e4401 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1944,7 +1944,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
 
 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
-			 !xe->info.supports_usm))
+			 !xe->info.has_usm))
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
-- 
GitLab


From 06d5ae90579e774934552ca023c4bbc56e8253f4 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Tue, 5 Dec 2023 16:41:42 +0200
Subject: [PATCH 2202/2340] drm/xe/vm: Avoid asid lookup if none allocated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The destroy path can and will get called for incomplete
vm objects on error paths, where the asid is not yet allocated.
This leads to lookup fail and assert triggered.

Fix this by not asserting of asid existence if vm never
got assigned one.

Cc: Ohad Sharabi <osharabi@habana.ai>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 265cc0c5e4401..11667529e40be 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1634,7 +1634,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
 		xe_device_mem_access_put(xe);
 
-		if (xe->info.has_asid) {
+		if (xe->info.has_asid && vm->usm.asid) {
 			mutex_lock(&xe->usm.lock);
 			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
 			xe_assert(xe, lookup == vm);
-- 
GitLab


From 2ab3cc4bf5a3dd760b697650d5e5bdb240fdf94a Mon Sep 17 00:00:00 2001
From: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
Date: Fri, 8 Dec 2023 00:11:50 -0500
Subject: [PATCH 2203/2340] drm/xe: Change the name of frequency sysfs
 attributes

Switching the names of frequency sysfs attrbutes to align with
required devfreq changes. The name changes are as below;

-freq_act -> act_freq
-freq_cur -> cur_freq
-freq_rpn -> rpn_freq
-freq_rpe -> rpe_freq
-freq_rp0 -> rp0_freq
-freq_min -> min_freq
-freq_max -> max_freq

Signed-off-by: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 60 +++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d2605a684b1c5..b1876fbea669e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -60,15 +60,15 @@
  * Xe's GuC PC provides a sysfs API for frequency management:
  *
  * device/gt#/freq_* *read-only* files:
- * - freq_act: The actual resolved frequency decided by PCODE.
- * - freq_cur: The current one requested by GuC PC to the Hardware.
- * - freq_rpn: The Render Performance (RP) N level, which is the minimal one.
- * - freq_rpe: The Render Performance (RP) E level, which is the efficient one.
- * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one.
+ * - act_freq: The actual resolved frequency decided by PCODE.
+ * - cur_freq: The current one requested by GuC PC to the Hardware.
+ * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
+ * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
+ * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
  *
  * device/gt#/freq_* *read-write* files:
- * - freq_min: GuC PC min request.
- * - freq_max: GuC PC max request.
+ * - min_freq: GuC PC min request.
+ * - max_freq: GuC PC max request.
  *             If max <= min, then freq_min becomes a fixed frequency request.
  *
  * Render-C States:
@@ -388,7 +388,7 @@ static void pc_update_rp_values(struct xe_guc_pc *pc)
 	pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
 }
 
-static ssize_t freq_act_show(struct device *dev,
+static ssize_t act_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct kobject *kobj = &dev->kobj;
@@ -413,9 +413,9 @@ static ssize_t freq_act_show(struct device *dev,
 	xe_device_mem_access_put(gt_to_xe(gt));
 	return ret;
 }
-static DEVICE_ATTR_RO(freq_act);
+static DEVICE_ATTR_RO(act_freq);
 
-static ssize_t freq_cur_show(struct device *dev,
+static ssize_t cur_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct kobject *kobj = &dev->kobj;
@@ -442,18 +442,18 @@ out:
 	xe_device_mem_access_put(gt_to_xe(gt));
 	return ret;
 }
-static DEVICE_ATTR_RO(freq_cur);
+static DEVICE_ATTR_RO(cur_freq);
 
-static ssize_t freq_rp0_show(struct device *dev,
+static ssize_t rp0_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
 
 	return sysfs_emit(buf, "%d\n", pc->rp0_freq);
 }
-static DEVICE_ATTR_RO(freq_rp0);
+static DEVICE_ATTR_RO(rp0_freq);
 
-static ssize_t freq_rpe_show(struct device *dev,
+static ssize_t rpe_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
@@ -465,18 +465,18 @@ static ssize_t freq_rpe_show(struct device *dev,
 	xe_device_mem_access_put(xe);
 	return sysfs_emit(buf, "%d\n", pc->rpe_freq);
 }
-static DEVICE_ATTR_RO(freq_rpe);
+static DEVICE_ATTR_RO(rpe_freq);
 
-static ssize_t freq_rpn_show(struct device *dev,
+static ssize_t rpn_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
 
 	return sysfs_emit(buf, "%d\n", pc->rpn_freq);
 }
-static DEVICE_ATTR_RO(freq_rpn);
+static DEVICE_ATTR_RO(rpn_freq);
 
-static ssize_t freq_min_show(struct device *dev,
+static ssize_t min_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
@@ -513,7 +513,7 @@ out:
 	return ret;
 }
 
-static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr,
+static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 			      const char *buff, size_t count)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
@@ -543,9 +543,9 @@ out:
 	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret ?: count;
 }
-static DEVICE_ATTR_RW(freq_min);
+static DEVICE_ATTR_RW(min_freq);
 
-static ssize_t freq_max_show(struct device *dev,
+static ssize_t max_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
@@ -571,7 +571,7 @@ out:
 	return ret;
 }
 
-static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr,
+static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 			      const char *buff, size_t count)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
@@ -601,7 +601,7 @@ out:
 	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret ?: count;
 }
-static DEVICE_ATTR_RW(freq_max);
+static DEVICE_ATTR_RW(max_freq);
 
 /**
  * xe_guc_pc_c_status - get the current GT C state
@@ -667,13 +667,13 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
 }
 
 static const struct attribute *pc_attrs[] = {
-	&dev_attr_freq_act.attr,
-	&dev_attr_freq_cur.attr,
-	&dev_attr_freq_rp0.attr,
-	&dev_attr_freq_rpe.attr,
-	&dev_attr_freq_rpn.attr,
-	&dev_attr_freq_min.attr,
-	&dev_attr_freq_max.attr,
+	&dev_attr_act_freq.attr,
+	&dev_attr_cur_freq.attr,
+	&dev_attr_rp0_freq.attr,
+	&dev_attr_rpe_freq.attr,
+	&dev_attr_rpn_freq.attr,
+	&dev_attr_min_freq.attr,
+	&dev_attr_max_freq.attr,
 	NULL
 };
 
-- 
GitLab


From bef52b5c7a1904fc6e1bdda4a0e6dc460f562856 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 8 Dec 2023 00:11:51 -0500
Subject: [PATCH 2204/2340] drm/xe: Create a xe_gt_freq component for raw
 management and sysfs

Goals of this new xe_gt_freq component:
1. Detach sysfs controls and raw freq management from GuC SLPC.
2. Create a directory that could later be aligned with devfreq.
3. Encapsulate all the freq control in a single directory. Although
   we only have one freq domain per GT, already start with a numbered
   freq0 directory so it could be expanded in the future if multiple
   domains or PLL are needed.

Note: Although in the goal #1, the raw freq management control is
mentioned, this patch only starts by the sysfs control. The RP freq
configuration and init freq selection are still under the guc_pc, but
should be moved to this component in a follow-up patch.

v2: - Add /tile# to the doc and remove unnecessary kobject_put (Riana)
    - s/ssize_t/int on some ret variables (Vinay)

Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile      |   1 +
 drivers/gpu/drm/xe/xe_gt.c       |   3 +
 drivers/gpu/drm/xe/xe_gt_freq.c  | 216 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_freq.h  |  13 ++
 drivers/gpu/drm/xe/xe_gt_types.h |   3 +
 drivers/gpu/drm/xe/xe_guc_pc.c   | 205 +++++++++++++++--------------
 drivers/gpu/drm/xe/xe_guc_pc.h   |  10 ++
 7 files changed, 347 insertions(+), 104 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_freq.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_freq.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 537828655da9b..62ee772a1d39f 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -73,6 +73,7 @@ xe-y += xe_bb.o \
 	xe_gt_ccs_mode.o \
 	xe_gt_clock.o \
 	xe_gt_debugfs.o \
+	xe_gt_freq.o \
 	xe_gt_idle.o \
 	xe_gt_mcr.o \
 	xe_gt_pagefault.o \
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index fe3c7aac2369f..dfd9cf01a5d58 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -24,6 +24,7 @@
 #include "xe_gsc.h"
 #include "xe_gt_ccs_mode.h"
 #include "xe_gt_clock.h"
+#include "xe_gt_freq.h"
 #include "xe_gt_idle.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
@@ -511,6 +512,8 @@ int xe_gt_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	xe_gt_freq_init(gt);
+
 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
 
 	err = all_fw_domain_init(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
new file mode 100644
index 0000000000000..2c3830d0e9e51
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gt_freq.h"
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_pc.h"
+
+/**
+ * DOC: Xe GT Frequency Management
+ *
+ * This component is responsible for the raw GT frequency management, including
+ * the sysfs API.
+ *
+ * Underneath, Xe enables GuC SLPC automated frequency management. GuC is then
+ * allowed to request PCODE any frequency between the Minimum and the Maximum
+ * selected by this component. Furthermore, it is important to highlight that
+ * PCODE is the ultimate decision maker of the actual running frequency, based
+ * on thermal and other running conditions.
+ *
+ * Xe's Freq provides a sysfs API for frequency management:
+ *
+ * device/tile#/gt#/freq0/<item>_freq *read-only* files:
+ * - act_freq: The actual resolved frequency decided by PCODE.
+ * - cur_freq: The current one requested by GuC PC to the PCODE.
+ * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
+ * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
+ * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * device/tile#/gt#/freq0/<item>_freq *read-write* files:
+ * - min_freq: Min frequency request.
+ * - max_freq: Max frequency request.
+ *             If max <= min, then freq_min becomes a fixed frequency request.
+ */
+
+static struct xe_guc_pc *
+dev_to_pc(struct device *dev)
+{
+	return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc;
+}
+
+static ssize_t act_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc));
+}
+static DEVICE_ATTR_RO(act_freq);
+
+static ssize_t cur_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_cur_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+static DEVICE_ATTR_RO(cur_freq);
+
+static ssize_t rp0_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc));
+}
+static DEVICE_ATTR_RO(rp0_freq);
+
+static ssize_t rpe_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc));
+}
+static DEVICE_ATTR_RO(rpe_freq);
+
+static ssize_t rpn_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc));
+}
+static DEVICE_ATTR_RO(rpn_freq);
+
+static ssize_t min_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_min_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+
+static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_set_min_freq(pc, freq);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(min_freq);
+
+static ssize_t max_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_max_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+
+static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_set_max_freq(pc, freq);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(max_freq);
+
+static const struct attribute *freq_attrs[] = {
+	&dev_attr_act_freq.attr,
+	&dev_attr_cur_freq.attr,
+	&dev_attr_rp0_freq.attr,
+	&dev_attr_rpe_freq.attr,
+	&dev_attr_rpn_freq.attr,
+	&dev_attr_min_freq.attr,
+	&dev_attr_max_freq.attr,
+	NULL
+};
+
+static void freq_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, freq_attrs);
+	kobject_put(kobj);
+}
+
+/**
+ * xe_gt_freq_init - Initialize Xe Freq component
+ * @gt: Xe GT object
+ *
+ * It needs to be initialized after GT Sysfs and GuC PC components are ready.
+ */
+void xe_gt_freq_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
+	if (!gt->freq) {
+		drm_warn(&xe->drm, "failed to add freq0 directory to %s, err: %d\n",
+			 kobject_name(gt->sysfs), err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
+
+	err = sysfs_create_files(gt->freq, freq_attrs);
+	if (err)
+		drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
+			 kobject_name(gt->freq), err);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h
new file mode 100644
index 0000000000000..f3fe3c90491a9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_freq.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_FREQ_H_
+#define _XE_GT_FREQ_H_
+
+struct xe_gt;
+
+void xe_gt_freq_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 4e48c4643163c..f746846604759 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -307,6 +307,9 @@ struct xe_gt {
 	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
 	struct kobject *sysfs;
 
+	/** @freq: Main GT freq sysfs control */
+	struct kobject *freq;
+
 	/** @mocs: info */
 	struct {
 		/** @uc_index: UC index */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index b1876fbea669e..2b627ea950f10 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -57,19 +57,6 @@
  *
  * Xe driver enables SLPC with all of its defaults features and frequency
  * selection, which varies per platform.
- * Xe's GuC PC provides a sysfs API for frequency management:
- *
- * device/gt#/freq_* *read-only* files:
- * - act_freq: The actual resolved frequency decided by PCODE.
- * - cur_freq: The current one requested by GuC PC to the Hardware.
- * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
- * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
- * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
- *
- * device/gt#/freq_* *read-write* files:
- * - min_freq: GuC PC min request.
- * - max_freq: GuC PC max request.
- *             If max <= min, then freq_min becomes a fixed frequency request.
  *
  * Render-C States:
  * ================
@@ -100,12 +87,6 @@ pc_to_gt(struct xe_guc_pc *pc)
 	return container_of(pc, struct xe_gt, uc.guc.pc);
 }
 
-static struct xe_guc_pc *
-dev_to_pc(struct device *dev)
-{
-	return &kobj_to_gt(&dev->kobj)->uc.guc.pc;
-}
-
 static struct iosys_map *
 pc_to_maps(struct xe_guc_pc *pc)
 {
@@ -388,14 +369,17 @@ static void pc_update_rp_values(struct xe_guc_pc *pc)
 	pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
 }
 
-static ssize_t act_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_act_freq - Get Actual running frequency
+ * @pc: The GuC PC
+ *
+ * Returns: The Actual running frequency. Which might be 0 if GT is in Render-C sleep state (RC6).
+ */
+u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
 {
-	struct kobject *kobj = &dev->kobj;
-	struct xe_gt *gt = kobj_to_gt(kobj);
+	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 freq;
-	ssize_t ret;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
 
@@ -408,20 +392,25 @@ static ssize_t act_freq_show(struct device *dev,
 		freq = REG_FIELD_GET(CAGF_MASK, freq);
 	}
 
-	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+	freq = decode_freq(freq);
 
 	xe_device_mem_access_put(gt_to_xe(gt));
-	return ret;
+
+	return freq;
 }
-static DEVICE_ATTR_RO(act_freq);
 
-static ssize_t cur_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_cur_freq - Get Current requested frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
 {
-	struct kobject *kobj = &dev->kobj;
-	struct xe_gt *gt = kobj_to_gt(kobj);
-	u32 freq;
-	ssize_t ret;
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
 
 	xe_device_mem_access_get(gt_to_xe(gt));
 	/*
@@ -432,56 +421,69 @@ static ssize_t cur_freq_show(struct device *dev,
 	if (ret)
 		goto out;
 
-	freq = xe_mmio_read32(gt, RPNSWREQ);
+	*freq = xe_mmio_read32(gt, RPNSWREQ);
 
-	freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
-	ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+	*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
+	*freq = decode_freq(*freq);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 out:
 	xe_device_mem_access_put(gt_to_xe(gt));
 	return ret;
 }
-static DEVICE_ATTR_RO(cur_freq);
 
-static ssize_t rp0_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_rp0_freq - Get the RP0 freq
+ * @pc: The GuC PC
+ *
+ * Returns: RP0 freq.
+ */
+u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
-
-	return sysfs_emit(buf, "%d\n", pc->rp0_freq);
+	return pc->rp0_freq;
 }
-static DEVICE_ATTR_RO(rp0_freq);
 
-static ssize_t rpe_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_rpe_freq - Get the RPe freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPe freq.
+ */
+u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
 	struct xe_gt *gt = pc_to_gt(pc);
 	struct xe_device *xe = gt_to_xe(gt);
 
 	xe_device_mem_access_get(xe);
 	pc_update_rp_values(pc);
 	xe_device_mem_access_put(xe);
-	return sysfs_emit(buf, "%d\n", pc->rpe_freq);
+
+	return pc->rpe_freq;
 }
-static DEVICE_ATTR_RO(rpe_freq);
 
-static ssize_t rpn_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_rpn_freq - Get the RPn freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPn freq.
+ */
+u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
-
-	return sysfs_emit(buf, "%d\n", pc->rpn_freq);
+	return pc->rpn_freq;
 }
-static DEVICE_ATTR_RO(rpn_freq);
 
-static ssize_t min_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_min_freq - Get the min operational frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
 	struct xe_gt *gt = pc_to_gt(pc);
-	ssize_t ret;
+	int ret;
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
@@ -503,7 +505,7 @@ static ssize_t min_freq_show(struct device *dev,
 	if (ret)
 		goto fw;
 
-	ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc));
+	*freq = pc_get_min_freq(pc);
 
 fw:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
@@ -513,16 +515,18 @@ out:
 	return ret;
 }
 
-static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
-			      const char *buff, size_t count)
+/**
+ * xe_guc_pc_set_min_freq - Set the minimal operational frequency
+ * @pc: The GuC PC
+ * @freq: The selected minimal frequency
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset),
+ *         -EINVAL if value out of bounds.
+ */
+int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
-	u32 freq;
-	ssize_t ret;
-
-	ret = kstrtou32(buff, 0, &freq);
-	if (ret)
-		return ret;
+	int ret;
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
@@ -541,15 +545,21 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 out:
 	mutex_unlock(&pc->freq_lock);
 	xe_device_mem_access_put(pc_to_xe(pc));
-	return ret ?: count;
+
+	return ret;
 }
-static DEVICE_ATTR_RW(min_freq);
 
-static ssize_t max_freq_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+/**
+ * xe_guc_pc_get_max_freq - Get Maximum operational frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
-	ssize_t ret;
+	int ret;
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
@@ -563,7 +573,7 @@ static ssize_t max_freq_show(struct device *dev,
 	if (ret)
 		goto out;
 
-	ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc));
+	*freq = pc_get_max_freq(pc);
 
 out:
 	mutex_unlock(&pc->freq_lock);
@@ -571,16 +581,18 @@ out:
 	return ret;
 }
 
-static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
-			      const char *buff, size_t count)
+/**
+ * xe_guc_pc_set_max_freq - Set the maximum operational frequency
+ * @pc: The GuC PC
+ * @freq: The selected maximum frequency value
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset),
+ *         -EINVAL if value out of bounds.
+ */
+int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
 {
-	struct xe_guc_pc *pc = dev_to_pc(dev);
-	u32 freq;
-	ssize_t ret;
-
-	ret = kstrtou32(buff, 0, &freq);
-	if (ret)
-		return ret;
+	int ret;
 
 	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
@@ -599,9 +611,8 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 out:
 	mutex_unlock(&pc->freq_lock);
 	xe_device_mem_access_put(pc_to_xe(pc));
-	return ret ?: count;
+	return ret;
 }
-static DEVICE_ATTR_RW(max_freq);
 
 /**
  * xe_guc_pc_c_status - get the current GT C state
@@ -666,17 +677,6 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
 	return reg;
 }
 
-static const struct attribute *pc_attrs[] = {
-	&dev_attr_act_freq.attr,
-	&dev_attr_cur_freq.attr,
-	&dev_attr_rp0_freq.attr,
-	&dev_attr_rpe_freq.attr,
-	&dev_attr_rpn_freq.attr,
-	&dev_attr_min_freq.attr,
-	&dev_attr_max_freq.attr,
-	NULL
-};
-
 static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
 {
 	struct xe_gt *gt = pc_to_gt(pc);
@@ -952,6 +952,10 @@ out:
 	return ret;
 }
 
+/**
+ * xe_guc_pc_fini - Finalize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
 void xe_guc_pc_fini(struct xe_guc_pc *pc)
 {
 	struct xe_device *xe = pc_to_xe(pc);
@@ -963,7 +967,6 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc)
 
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
-	sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
 	mutex_destroy(&pc->freq_lock);
 }
 
@@ -978,7 +981,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_bo *bo;
 	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
-	int err;
 
 	if (xe->info.skip_guc_pc)
 		return 0;
@@ -992,10 +994,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 		return PTR_ERR(bo);
 
 	pc->bo = bo;
-
-	err = sysfs_create_files(gt->sysfs, pc_attrs);
-	if (err)
-		return err;
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 054788e006f32..cecad8e9300b4 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -14,6 +14,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
 
+u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
+int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
+u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc);
+int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
+int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq);
+int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq);
+int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq);
+
 enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
 u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
-- 
GitLab


From 1c8e9019033728093c04608f44c6e87fec6822e1 Mon Sep 17 00:00:00 2001
From: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
Date: Fri, 8 Dec 2023 00:11:52 -0500
Subject: [PATCH 2205/2340] drm/xe: Add frequency throttle reasons sysfs
 attributes

Add throttle reasons sysfs attributes under a separate directory.

/device/tile<n>/gt<n>/freq0/throttle
			|- reason_pl1
			|- reason_pl2
			|- reason_pl4
			|- reason_prochot
			|- reason_ratl
			|- reason_vr_tdc
			|- reason_vr_thermalert
			|- status

v2: Remove unnecessary headers and clean-up action (Riana)

Signed-off-by: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile               |   1 +
 drivers/gpu/drm/xe/regs/xe_gt_regs.h      |  12 ++
 drivers/gpu/drm/xe/xe_gt_freq.c           |   3 +
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c | 251 ++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h |  16 ++
 5 files changed, 283 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 62ee772a1d39f..f4ae063a70053 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -78,6 +78,7 @@ xe-y += xe_bb.o \
 	xe_gt_mcr.o \
 	xe_gt_pagefault.o \
 	xe_gt_sysfs.o \
+	xe_gt_throttle_sysfs.o \
 	xe_gt_tlb_invalidation.o \
 	xe_gt_topology.o \
 	xe_guc.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 9744ed0be3a5c..5f5a72e9d0d89 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -478,4 +478,16 @@
 #define PVC_GT0_PLATFORM_ENERGY_STATUS		XE_REG(0x28106c)
 #define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
 
+#define GT0_PERF_LIMIT_REASONS			XE_REG(0x1381a8)
+#define   GT0_PERF_LIMIT_REASONS_MASK		0xde3
+#define   PROCHOT_MASK				REG_BIT(0)
+#define   THERMAL_LIMIT_MASK			REG_BIT(1)
+#define   RATL_MASK				REG_BIT(5)
+#define   VR_THERMALERT_MASK			REG_BIT(6)
+#define   VR_TDC_MASK				REG_BIT(7)
+#define   POWER_LIMIT_4_MASK			REG_BIT(8)
+#define   POWER_LIMIT_1_MASK			REG_BIT(10)
+#define   POWER_LIMIT_2_MASK			REG_BIT(11)
+#define MTL_MEDIA_PERF_LIMIT_REASONS		XE_REG(0x138030)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 2c3830d0e9e51..08eabcafe7bcc 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -13,6 +13,7 @@
 
 #include "xe_device_types.h"
 #include "xe_gt_sysfs.h"
+#include "xe_gt_throttle_sysfs.h"
 #include "xe_guc_pc.h"
 
 /**
@@ -213,4 +214,6 @@ void xe_gt_freq_init(struct xe_gt *gt)
 	if (err)
 		drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
 			 kobject_name(gt->freq), err);
+
+	xe_gt_throttle_sysfs_init(gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
new file mode 100644
index 0000000000000..63d640591a527
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include <regs/xe_gt_regs.h>
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_throttle_sysfs.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: Xe GT Throttle
+ *
+ * Provides sysfs entries for frequency throttle reasons in GT
+ *
+ * device/gt#/freq0/throttle/status - Overall status
+ * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
+ * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
+ * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
+ * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
+ * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
+ * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
+ * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
+ * device/gt#/freq0/throttle/reason_vr_tdc -  Frequency throttle due to VR TDC
+ */
+
+static struct xe_gt *
+dev_to_gt(struct device *dev)
+{
+	return kobj_to_gt(dev->kobj.parent);
+}
+
+static u32 read_perf_limit_reasons(struct xe_gt *gt)
+{
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+	else
+		reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+
+	return reg;
+}
+
+static u32 read_status(struct xe_gt *gt)
+{
+	u32 status = read_perf_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+
+	return status;
+}
+
+static u32 read_reason_pl1(struct xe_gt *gt)
+{
+	u32 pl1 = read_perf_limit_reasons(gt) & POWER_LIMIT_1_MASK;
+
+	return pl1;
+}
+
+static u32 read_reason_pl2(struct xe_gt *gt)
+{
+	u32 pl2 = read_perf_limit_reasons(gt) & POWER_LIMIT_2_MASK;
+
+	return pl2;
+}
+
+static u32 read_reason_pl4(struct xe_gt *gt)
+{
+	u32 pl4 = read_perf_limit_reasons(gt) & POWER_LIMIT_4_MASK;
+
+	return pl4;
+}
+
+static u32 read_reason_thermal(struct xe_gt *gt)
+{
+	u32 thermal = read_perf_limit_reasons(gt) & THERMAL_LIMIT_MASK;
+
+	return thermal;
+}
+
+static u32 read_reason_prochot(struct xe_gt *gt)
+{
+	u32 prochot = read_perf_limit_reasons(gt) & PROCHOT_MASK;
+
+	return prochot;
+}
+
+static u32 read_reason_ratl(struct xe_gt *gt)
+{
+	u32 ratl = read_perf_limit_reasons(gt) & RATL_MASK;
+
+	return ratl;
+}
+
+static u32 read_reason_vr_thermalert(struct xe_gt *gt)
+{
+	u32 thermalert = read_perf_limit_reasons(gt) & VR_THERMALERT_MASK;
+
+	return thermalert;
+}
+
+static u32 read_reason_vr_tdc(struct xe_gt *gt)
+{
+	u32 tdc = read_perf_limit_reasons(gt) & VR_TDC_MASK;
+
+	return tdc;
+}
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool status = !!read_status(gt);
+
+	return sysfs_emit(buff, "%u\n", status);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t reason_pl1_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl1 = !!read_reason_pl1(gt);
+
+	return sysfs_emit(buff, "%u\n", pl1);
+}
+static DEVICE_ATTR_RO(reason_pl1);
+
+static ssize_t reason_pl2_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl2 = !!read_reason_pl2(gt);
+
+	return sysfs_emit(buff, "%u\n", pl2);
+}
+static DEVICE_ATTR_RO(reason_pl2);
+
+static ssize_t reason_pl4_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl4 = !!read_reason_pl4(gt);
+
+	return sysfs_emit(buff, "%u\n", pl4);
+}
+static DEVICE_ATTR_RO(reason_pl4);
+
+static ssize_t reason_thermal_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermal = !!read_reason_thermal(gt);
+
+	return sysfs_emit(buff, "%u\n", thermal);
+}
+static DEVICE_ATTR_RO(reason_thermal);
+
+static ssize_t reason_prochot_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool prochot = !!read_reason_prochot(gt);
+
+	return sysfs_emit(buff, "%u\n", prochot);
+}
+static DEVICE_ATTR_RO(reason_prochot);
+
+static ssize_t reason_ratl_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool ratl = !!read_reason_ratl(gt);
+
+	return sysfs_emit(buff, "%u\n", ratl);
+}
+static DEVICE_ATTR_RO(reason_ratl);
+
+static ssize_t reason_vr_thermalert_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermalert = !!read_reason_vr_thermalert(gt);
+
+	return sysfs_emit(buff, "%u\n", thermalert);
+}
+static DEVICE_ATTR_RO(reason_vr_thermalert);
+
+static ssize_t reason_vr_tdc_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool tdc = !!read_reason_vr_tdc(gt);
+
+	return sysfs_emit(buff, "%u\n", tdc);
+}
+static DEVICE_ATTR_RO(reason_vr_tdc);
+
+static struct attribute *throttle_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_reason_pl1.attr,
+	&dev_attr_reason_pl2.attr,
+	&dev_attr_reason_pl4.attr,
+	&dev_attr_reason_thermal.attr,
+	&dev_attr_reason_prochot.attr,
+	&dev_attr_reason_ratl.attr,
+	&dev_attr_reason_vr_thermalert.attr,
+	&dev_attr_reason_vr_tdc.attr,
+	NULL
+};
+
+static const struct attribute_group throttle_group_attrs = {
+	.name = "throttle",
+	.attrs = throttle_attrs,
+};
+
+static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_group(gt->freq, &throttle_group_attrs);
+}
+
+void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+	if (err) {
+		drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
new file mode 100644
index 0000000000000..3ecfd4beffe1e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_THROTTLE_SYSFS_H_
+#define _XE_GT_THROTTLE_SYSFS_H_
+
+#include <drm/drm_managed.h>
+
+struct xe_gt;
+
+void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
+
+#endif /* _XE_GT_THROTTLE_SYSFS_H_ */
+
-- 
GitLab


From e84d716dd461928b3db344748cd7f87395a2ce74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Sat, 9 Dec 2023 16:18:41 +0100
Subject: [PATCH 2206/2340] drm/xe: Restrict huge PTEs to 1GiB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a define for the highest level for which we can encode a huge PTE,
and use it for page-table building. Also update an assert that checks that
we don't try to encode for larger sizes.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Brian Welty <brian.welty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231209151843.7903-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 3 +++
 drivers/gpu/drm/xe/xe_pt.h | 3 +++
 drivers/gpu/drm/xe/xe_vm.c | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 3b485313804aa..46ef9df34a2e3 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -430,6 +430,9 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
 {
 	u64 size, dma;
 
+	if (level > MAX_HUGEPTE_LEVEL)
+		return false;
+
 	/* Does the virtual range requested cover a huge pte? */
 	if (!xe_pt_covers(addr, next, level, &xe_walk->base))
 		return false;
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index d5460e58dbbf9..ba2f3325c84de 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -18,6 +18,9 @@ struct xe_tile;
 struct xe_vm;
 struct xe_vma;
 
+/* Largest huge pte is currently 1GiB. May become device dependent. */
+#define MAX_HUGEPTE_LEVEL 2
+
 #define xe_pt_write(xe, map, idx, data) \
 	xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 11667529e40be..d589beb99fe69 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1258,7 +1258,7 @@ static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
 
 static u64 pte_encode_ps(u32 pt_level)
 {
-	XE_WARN_ON(pt_level > 2);
+	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
 
 	if (pt_level == 1)
 		return XE_PDE_PS_2M;
-- 
GitLab


From 06951c2ee72df2f53b71e7cf2b504d4fa6bba453 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Sat, 9 Dec 2023 16:18:42 +0100
Subject: [PATCH 2207/2340] drm/xe: Use NULL PTEs as scratch PTEs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently scratch PTEs are write-enabled and points to a single scratch
page. This has the side effect that buggy applications with out-of-bounds
memory accesses may not notice the bad access since what's written may
be read back.

Instead use NULL PTEs as scratch PTEs. These always return 0 when reading,
and writing has no effect. As a slight benefit, we can also use huge NULL
PTEs.

One drawback pointed out is that debugging may be hampered since previously
when inspecting the content of the scratch page, it might be possible to
detect writes to out-of-bound addresses and possibly also
from where the out-of-bounds address originated. However since the scratch
page-table structure is kept, it will be easy to add back the single
RW-enabled scratch page under a debug define if needed.

Also update the kerneldoc accordingly and move the function to create the
scratch page-tables from xe_pt.c to xe_pt.h since it is accessing
vm structure internals and this also makes it possible to make it static.

v2:
- Don't try to encode scratch PTEs larger than 1GiB.
- Move xe_pt_create_scratch(), Update kerneldoc.
v3:
- Rebase.

Cc: Brian Welty <brian.welty@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com> #for general direction.
Reviewed-by: Brian Welty <brian.welty@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231209151843.7903-3-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c       | 65 ++++---------------------------
 drivers/gpu/drm/xe/xe_pt.h       |  3 --
 drivers/gpu/drm/xe/xe_vm.c       | 66 ++++++++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_vm.h       | 11 ++++++
 drivers/gpu/drm/xe/xe_vm_types.h |  1 -
 5 files changed, 74 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 46ef9df34a2e3..de1030a475883 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -50,17 +50,19 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
 			     unsigned int level)
 {
-	u16 pat_index = tile_to_xe(tile)->pat.idx[XE_CACHE_WB];
+	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 	u8 id = tile->id;
 
-	if (!vm->scratch_bo[id])
+	if (!xe_vm_has_scratch(vm))
 		return 0;
 
-	if (level > 0)
+	if (level > MAX_HUGEPTE_LEVEL)
 		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
 						 0, pat_index);
 
-	return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, pat_index, 0);
+	return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
+		XE_PTE_NULL;
 }
 
 /**
@@ -135,7 +137,7 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
 	u64 empty;
 	int i;
 
-	if (!vm->scratch_bo[tile->id]) {
+	if (!xe_vm_has_scratch(vm)) {
 		/*
 		 * FIXME: Some memory is allocated already allocated to zero?
 		 * Find out which memory that is and avoid this memset...
@@ -194,57 +196,6 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
 	kfree(pt);
 }
 
-/**
- * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the
- * given tile and vm.
- * @xe: xe device.
- * @tile: tile to set up for.
- * @vm: vm to set up for.
- *
- * Sets up a pagetable tree with one page-table per level and a single
- * leaf bo. All pagetable entries point to the single page-table or,
- * for L0, the single bo one level below.
- *
- * Return: 0 on success, negative error code on error.
- */
-int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
-			 struct xe_vm *vm)
-{
-	u8 id = tile->id;
-	unsigned int flags;
-	int i;
-
-	/*
-	 * So we don't need to worry about 64K TLB hints when dealing with
-	 * scratch entires, rather keep the scratch page in system memory on
-	 * platforms where 64K pages are needed for VRAM.
-	 */
-	flags = XE_BO_CREATE_PINNED_BIT;
-	if (vm->flags & XE_VM_FLAG_64K)
-		flags |= XE_BO_CREATE_SYSTEM_BIT;
-	else
-		flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile);
-
-	vm->scratch_bo[id] = xe_bo_create_pin_map(xe, tile, vm, SZ_4K,
-						  ttm_bo_type_kernel,
-						  flags);
-	if (IS_ERR(vm->scratch_bo[id]))
-		return PTR_ERR(vm->scratch_bo[id]);
-
-	xe_map_memset(vm->xe, &vm->scratch_bo[id]->vmap, 0, 0,
-		      vm->scratch_bo[id]->size);
-
-	for (i = 0; i < vm->pt_root[id]->level; i++) {
-		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
-		if (IS_ERR(vm->scratch_pt[id][i]))
-			return PTR_ERR(vm->scratch_pt[id][i]);
-
-		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
-	}
-
-	return 0;
-}
-
 /**
  * DOC: Pagetable building
  *
@@ -1289,7 +1240,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 	 * it needs to be done here.
 	 */
 	if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
-	    (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_preempt_fence_mode(vm))) {
+	    (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
 		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
 		if (!ifence)
 			return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index ba2f3325c84de..71a4fbfcff43a 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -29,9 +29,6 @@ unsigned int xe_pt_shift(unsigned int level);
 struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 			   unsigned int level);
 
-int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile,
-			 struct xe_vm *vm);
-
 void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
 			  struct xe_pt *pt);
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d589beb99fe69..e190469ec03aa 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1348,6 +1348,57 @@ static const struct xe_pt_ops xelp_pt_ops = {
 
 static void vm_destroy_work_func(struct work_struct *w);
 
+/**
+ * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given tile and vm.
+ * @xe: xe device.
+ * @tile: tile to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf PTE. All pagetable entries point to the single page-table or,
+ * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
+ * writes become NOPs.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm)
+{
+	u8 id = tile->id;
+	int i;
+
+	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+static void xe_vm_free_scratch(struct xe_vm *vm)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	if (!xe_vm_has_scratch(vm))
+		return;
+
+	for_each_tile(tile, vm->xe, id) {
+		u32 i;
+
+		if (!vm->pt_root[id])
+			continue;
+
+		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
+			if (vm->scratch_pt[id][i])
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
+	}
+}
+
 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 {
 	struct drm_gem_object *vm_resv_obj;
@@ -1424,12 +1475,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		}
 	}
 
-	if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
+	if (xe_vm_has_scratch(vm)) {
 		for_each_tile(tile, xe, id) {
 			if (!vm->pt_root[id])
 				continue;
 
-			err = xe_pt_create_scratch(xe, tile, vm);
+			err = xe_vm_create_scratch(xe, tile, vm);
 			if (err)
 				goto err_unlock_close;
 		}
@@ -1575,16 +1626,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	 * install a fence to resv. Hence it's safe to
 	 * destroy the pagetables immediately.
 	 */
-	for_each_tile(tile, xe, id) {
-		if (vm->scratch_bo[id]) {
-			u32 i;
+	xe_vm_free_scratch(vm);
 
-			xe_bo_unpin(vm->scratch_bo[id]);
-			xe_bo_put(vm->scratch_bo[id]);
-			for (i = 0; i < vm->pt_root[id]->level; i++)
-				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
-					      NULL);
-		}
+	for_each_tile(tile, xe, id) {
 		if (vm->pt_root[id]) {
 			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
 			vm->pt_root[id] = NULL;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 12bb5d79487f2..a1907544cc4f4 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -63,6 +63,17 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
 struct xe_vma *
 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
 
+/**
+ * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs
+ * @vm: The vm
+ *
+ * Return: whether the vm populates unmapped areas with scratch PTEs
+ */
+static inline bool xe_vm_has_scratch(const struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_SCRATCH_PAGE;
+}
+
 static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
 {
 	return container_of(gpuva->vm, struct xe_vm, gpuvm);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index e70ec6b2fabed..15471025a44f6 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -151,7 +151,6 @@ struct xe_vm {
 	u64 size;
 
 	struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
-	struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
 	struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
 
 	/**
-- 
GitLab


From 68661c69e9fa86e78b8b6509aebeada5a15dada5 Mon Sep 17 00:00:00 2001
From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Date: Fri, 1 Dec 2023 12:25:14 -0800
Subject: [PATCH 2208/2340] drm/xe: Check skip_guc_pc before disabling gucrc

Also, use the new C6 helper instead of duplicating that code.

v2: Check skip flag at the beginning of the function (Rodrigo)

Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 2b627ea950f10..f71085228cb33 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -794,9 +794,13 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
  */
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
 {
+	struct xe_device *xe = pc_to_xe(pc);
 	struct xe_gt *gt = pc_to_gt(pc);
 	int ret = 0;
 
+	if (xe->info.skip_guc_pc)
+		return 0;
+
 	xe_device_mem_access_get(pc_to_xe(pc));
 
 	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
@@ -807,9 +811,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
 	if (ret)
 		goto out;
 
-	xe_mmio_write32(gt, PG_ENABLE, 0);
-	xe_mmio_write32(gt, RC_CONTROL, 0);
-	xe_mmio_write32(gt, RC_STATE, 0);
+	xe_gt_idle_disable_c6(gt);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
-- 
GitLab


From 49e134e16f8111f82f4067da38055db4b4b34a0b Mon Sep 17 00:00:00 2001
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Date: Wed, 6 Dec 2023 15:03:27 +0530
Subject: [PATCH 2209/2340] drm/xe: Fix lockdep warning in xe_force_wake calls

Use spin_lock_irqsave, spin_unlock_irqrestore

Fix for below:
[13994.811263] ========================================================
[13994.811295] WARNING: possible irq lock inversion dependency detected
[13994.811326] 6.6.0-rc3-xe #2 Tainted: G     U
[13994.811358] --------------------------------------------------------
[13994.811388] swapper/0/0 just changed the state of lock:
[13994.811416] ffff895c7e044db8 (&cpuctx_lock){-...}-{2:2}, at:
__perf_event_read+0xb7/0x3a0
[13994.811494] but this lock took another, HARDIRQ-unsafe lock in the
past:
[13994.811528]  (&fw->lock){+.+.}-{2:2}
[13994.811544]

               and interrupts could create inverse lock ordering between
them.

[13994.811606]
               other info that might help us debug this:
[13994.811636]  Possible interrupt unsafe locking scenario:

[13994.811667]        CPU0                    CPU1
[13994.811691]        ----                    ----
[13994.811715]   lock(&fw->lock);
[13994.811744]                                local_irq_disable();
[13994.811773]                                lock(&cpuctx_lock);
[13994.811810]                                lock(&fw->lock);
[13994.811846]   <Interrupt>
[13994.811865]     lock(&cpuctx_lock);
[13994.811895]
                *** DEADLOCK ***

v2: Use spin_lock in atomic context and spin_lock_irq in a non atomic
context (Matthew Brost)

v3: just use spin_lock_irqsave/restore

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_force_wake.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 32d6c4dd28079..9bbe8a5040dae 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -145,9 +145,10 @@ int xe_force_wake_get(struct xe_force_wake *fw,
 	struct xe_gt *gt = fw_to_gt(fw);
 	struct xe_force_wake_domain *domain;
 	enum xe_force_wake_domains tmp, woken = 0;
+	unsigned long flags;
 	int ret, ret2 = 0;
 
-	spin_lock(&fw->lock);
+	spin_lock_irqsave(&fw->lock, flags);
 	for_each_fw_domain_masked(domain, domains, fw, tmp) {
 		if (!domain->ref++) {
 			woken |= BIT(domain->id);
@@ -162,7 +163,7 @@ int xe_force_wake_get(struct xe_force_wake *fw,
 				   domain->id, ret);
 	}
 	fw->awake_domains |= woken;
-	spin_unlock(&fw->lock);
+	spin_unlock_irqrestore(&fw->lock, flags);
 
 	return ret2;
 }
@@ -174,9 +175,10 @@ int xe_force_wake_put(struct xe_force_wake *fw,
 	struct xe_gt *gt = fw_to_gt(fw);
 	struct xe_force_wake_domain *domain;
 	enum xe_force_wake_domains tmp, sleep = 0;
+	unsigned long flags;
 	int ret, ret2 = 0;
 
-	spin_lock(&fw->lock);
+	spin_lock_irqsave(&fw->lock, flags);
 	for_each_fw_domain_masked(domain, domains, fw, tmp) {
 		if (!--domain->ref) {
 			sleep |= BIT(domain->id);
@@ -191,7 +193,7 @@ int xe_force_wake_put(struct xe_force_wake *fw,
 				   domain->id, ret);
 	}
 	fw->awake_domains &= ~sleep;
-	spin_unlock(&fw->lock);
+	spin_unlock_irqrestore(&fw->lock, flags);
 
 	return ret2;
 }
-- 
GitLab


From 24f947d58fe554cf38507b94a43d373acf1e5e73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 12 Dec 2023 11:01:43 +0100
Subject: [PATCH 2210/2340] drm/xe: Use DRM GPUVM helpers for external- and
 evicted objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adapt to the DRM_GPUVM helpers moving removing a lot of complicated
driver-specific code.

For now this uses fine-grained locking for the evict list and external
object list, which may incur a slight performance penalty in some
situations.

v2:
- Don't lock all bos and validate on LR exec submissions (Matthew Brost)
- Add some kerneldoc

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231212100144.6833-2-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       |  63 +++----
 drivers/gpu/drm/xe/xe_exec.c     |  74 ++------
 drivers/gpu/drm/xe/xe_vm.c       | 292 +++++++------------------------
 drivers/gpu/drm/xe/xe_vm.h       |  19 +-
 drivers/gpu/drm/xe/xe_vm_types.h |  67 ++-----
 5 files changed, 129 insertions(+), 386 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 9cc78986dbd3f..13ebe33bb7a22 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -468,9 +468,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
-	struct drm_gpuva *gpuva;
 	struct drm_gem_object *obj = &bo->ttm.base;
 	struct drm_gpuvm_bo *vm_bo;
+	bool idle = false;
 	int ret = 0;
 
 	dma_resv_assert_held(bo->ttm.base.resv);
@@ -484,14 +484,15 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 	}
 
 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
-		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
-			struct xe_vma *vma = gpuva_to_vma(gpuva);
-			struct xe_vm *vm = xe_vma_vm(vma);
+		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+		struct drm_gpuva *gpuva;
 
-			trace_xe_vma_evict(vma);
+		if (!xe_vm_in_fault_mode(vm)) {
+			drm_gpuvm_bo_evict(vm_bo, true);
+			continue;
+		}
 
-		if (xe_vm_in_fault_mode(vm)) {
-			/* Wait for pending binds / unbinds. */
+		if (!idle) {
 			long timeout;
 
 			if (ctx->no_wait_gpu &&
@@ -503,45 +504,21 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 							DMA_RESV_USAGE_BOOKKEEP,
 							ctx->interruptible,
 							MAX_SCHEDULE_TIMEOUT);
-			if (timeout > 0) {
-				ret = xe_vm_invalidate_vma(vma);
-				XE_WARN_ON(ret);
-			} else if (!timeout) {
-				ret = -ETIME;
-			} else {
-				ret = timeout;
-			}
-
-		} else {
-			bool vm_resv_locked = false;
+			if (!timeout)
+				return -ETIME;
+			if (timeout < 0)
+				return timeout;
 
-			/*
-			 * We need to put the vma on the vm's rebind_list,
-			 * but need the vm resv to do so. If we can't verify
-			 * that we indeed have it locked, put the vma an the
-			 * vm's notifier.rebind_list instead and scoop later.
-			 */
-			if (dma_resv_trylock(xe_vm_resv(vm)))
-				vm_resv_locked = true;
-			else if (ctx->resv != xe_vm_resv(vm)) {
-				spin_lock(&vm->notifier.list_lock);
-				if (!(vma->gpuva.flags & XE_VMA_DESTROYED))
-					list_move_tail(&vma->notifier.rebind_link,
-						       &vm->notifier.rebind_list);
-				spin_unlock(&vm->notifier.list_lock);
-				continue;
-			}
+			idle = true;
+		}
 
-			xe_vm_assert_held(vm);
-			if (vma->tile_present &&
-			    !(vma->gpuva.flags & XE_VMA_DESTROYED) &&
-			    list_empty(&vma->combined_links.rebind))
-				list_add_tail(&vma->combined_links.rebind,
-					      &vm->rebind_list);
+		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
+			struct xe_vma *vma = gpuva_to_vma(gpuva);
 
-			if (vm_resv_locked)
-				dma_resv_unlock(xe_vm_resv(vm));
-		}
+			trace_xe_vma_evict(vma);
+			ret = xe_vm_invalidate_vma(vma);
+			if (XE_WARN_ON(ret))
+				return ret;
 		}
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 5ec37df33afe4..15ab1a927613f 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -94,40 +94,9 @@
  *	Unlock all
  */
 
-static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
+static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
 {
-	struct xe_vma *vma;
-	LIST_HEAD(dups);
-	int err = 0;
-
-	if (xe_vm_in_lr_mode(vm))
-		return 0;
-
-	/*
-	 * 1 fence for job from exec plus a fence for each tile from a possible
-	 * rebind
-	 */
-	err = xe_vm_lock_dma_resv(vm, exec, 1 + vm->xe->info.tile_count, true);
-	if (err)
-		return err;
-
-	/*
-	 * Validate BOs that have been evicted (i.e. make sure the
-	 * BOs have valid placements possibly moving an evicted BO back
-	 * to a location where the GPU can access it).
-	 */
-	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
-		xe_assert(vm->xe, !xe_vma_is_null(vma));
-
-		if (xe_vma_is_userptr(vma))
-			continue;
-
-		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
-		if (err)
-			break;
-	}
-
-	return err;
+	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
 }
 
 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -140,7 +109,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_exec_queue *q;
 	struct xe_sync_entry *syncs = NULL;
 	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
-	struct drm_exec exec;
+	struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
+	struct drm_exec *exec = &vm_exec.exec;
 	u32 i, num_syncs = 0;
 	struct xe_sched_job *job;
 	struct dma_fence *rebind_fence;
@@ -216,16 +186,18 @@ retry:
 			goto err_unlock_list;
 	}
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
-	drm_exec_until_all_locked(&exec) {
-		err = xe_exec_begin(&exec, vm);
-		drm_exec_retry_on_contention(&exec);
-		if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
-			err = -EAGAIN;
+	vm_exec.vm = &vm->gpuvm;
+	vm_exec.num_fences = 1 + vm->xe->info.tile_count;
+	vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
+	if (xe_vm_in_lr_mode(vm)) {
+		drm_exec_init(exec, vm_exec.flags);
+	} else {
+		err = drm_gpuvm_exec_lock(&vm_exec);
+		if (err) {
+			if (xe_vm_validate_should_retry(exec, err, &end))
+				err = -EAGAIN;
 			goto err_unlock_list;
 		}
-		if (err)
-			goto err_exec;
 	}
 
 	if (xe_vm_is_closed_or_banned(q->vm)) {
@@ -307,19 +279,9 @@ retry:
 	 * the job and let the DRM scheduler / backend clean up the job.
 	 */
 	xe_sched_job_arm(job);
-	if (!xe_vm_in_lr_mode(vm)) {
-		/* Block userptr invalidations / BO eviction */
-		dma_resv_add_fence(xe_vm_resv(vm),
-				   &job->drm.s_fence->finished,
-				   DMA_RESV_USAGE_BOOKKEEP);
-
-		/*
-		 * Make implicit sync work across drivers, assuming all external
-		 * BOs are written as we don't pass in a read / write list.
-		 */
-		xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished,
-					DMA_RESV_USAGE_WRITE);
-	}
+	if (!xe_vm_in_lr_mode(vm))
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
 
 	for (i = 0; i < num_syncs; i++)
 		xe_sync_entry_signal(&syncs[i], job,
@@ -343,7 +305,7 @@ err_put_job:
 	if (err)
 		xe_sched_job_put(job);
 err_exec:
-	drm_exec_fini(&exec);
+	drm_exec_fini(exec);
 err_unlock_list:
 	if (write_locked)
 		up_write(&vm->lock);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index e190469ec03aa..7a3b680d01a36 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -299,26 +299,8 @@ out_unlock:
 	return err;
 }
 
-/**
- * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
- * @vm: The vm.
- * @fence: The fence to add.
- * @usage: The resv usage for the fence.
- *
- * Loops over all of the vm's external object bindings and adds a @fence
- * with the given @usage to all of the external object's reservation
- * objects.
- */
-void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
-			     enum dma_resv_usage usage)
-{
-	struct xe_vma *vma;
-
-	list_for_each_entry(vma, &vm->extobj.list, extobj.link)
-		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage);
-}
-
-static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
+						struct drm_exec *exec)
 {
 	struct xe_exec_queue *q;
 
@@ -328,16 +310,19 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
 	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
 		q->ops->resume(q);
 
-		dma_resv_add_fence(xe_vm_resv(vm), q->compute.pfence,
-				   DMA_RESV_USAGE_BOOKKEEP);
-		xe_vm_fence_all_extobjs(vm, q->compute.pfence,
-					DMA_RESV_USAGE_BOOKKEEP);
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
 	}
 }
 
 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 {
-	struct drm_exec exec;
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm->gpuvm,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
+		.num_fences = 1,
+	};
+	struct drm_exec *exec = &vm_exec.exec;
 	struct dma_fence *pfence;
 	int err;
 	bool wait;
@@ -345,13 +330,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
 
 	down_write(&vm->lock);
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
-	drm_exec_until_all_locked(&exec) {
-		err = xe_vm_lock_dma_resv(vm, &exec, 1, true);
-		drm_exec_retry_on_contention(&exec);
-		if (err)
-			goto out_unlock;
-	}
+	err = drm_gpuvm_exec_lock(&vm_exec);
+	if (err)
+		return err;
 
 	pfence = xe_preempt_fence_create(q, q->compute.context,
 					 ++q->compute.seqno);
@@ -366,10 +347,8 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 
 	down_read(&vm->userptr.notifier_lock);
 
-	dma_resv_add_fence(xe_vm_resv(vm), pfence,
-			   DMA_RESV_USAGE_BOOKKEEP);
-
-	xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
+	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
+				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
 
 	/*
 	 * Check to see if a preemption on VM is in flight or userptr
@@ -383,7 +362,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	up_read(&vm->userptr.notifier_lock);
 
 out_unlock:
-	drm_exec_fini(&exec);
+	drm_exec_fini(exec);
 	up_write(&vm->lock);
 
 	return err;
@@ -429,55 +408,6 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
 		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
 }
 
-/**
- * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
- * objects of the vm's external buffer objects.
- * @vm: The vm.
- * @exec: Pointer to a struct drm_exec locking context.
- * @num_shared: Number of dma-fence slots to reserve in the locked objects.
- * @lock_vm: Lock also the vm's dma_resv.
- *
- * Locks the vm dma-resv objects and all the dma-resv objects of the
- * buffer objects on the vm external object list.
- *
- * Return: 0 on success, Negative error code on error. In particular if
- * @intr is set to true, -EINTR or -ERESTARTSYS may be returned.
- */
-int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
-			unsigned int num_shared, bool lock_vm)
-{
-	struct xe_vma *vma, *next;
-	int err = 0;
-
-	lockdep_assert_held(&vm->lock);
-
-	if (lock_vm) {
-		err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
-		if (err)
-			return err;
-	}
-
-	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
-		err = drm_exec_prepare_obj(exec, &xe_vma_bo(vma)->ttm.base, num_shared);
-		if (err)
-			return err;
-	}
-
-	spin_lock(&vm->notifier.list_lock);
-	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
-				 notifier.rebind_link) {
-		xe_bo_assert_held(xe_vma_bo(vma));
-
-		list_del_init(&vma->notifier.rebind_link);
-		if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
-			list_move_tail(&vma->combined_links.rebind,
-				       &vm->rebind_list);
-	}
-	spin_unlock(&vm->notifier.list_lock);
-
-	return 0;
-}
-
 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
 
 static void xe_vm_kill(struct xe_vm *vm)
@@ -526,30 +456,39 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
 	if (!ktime_before(cur, *end))
 		return false;
 
-	/*
-	 * We would like to keep the ticket here with
-	 * drm_exec_unlock_all(), but WW mutex asserts currently
-	 * stop us from that. In any case this function could go away
-	 * with proper TTM -EDEADLK handling.
-	 */
-	drm_exec_fini(exec);
-
 	msleep(20);
 	return true;
 }
 
+static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+	struct drm_gpuva *gpuva;
+	int ret;
+
+	lockdep_assert_held(&vm->lock);
+	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
+		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
+			       &vm->rebind_list);
+
+	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+	if (ret)
+		return ret;
+
+	vm_bo->evicted = false;
+	return 0;
+}
+
 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 				 bool *done)
 {
-	struct xe_vma *vma;
 	int err;
 
 	/*
 	 * 1 fence for each preempt fence plus a fence for each tile from a
 	 * possible rebind
 	 */
-	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm),
-				   vm->preempt.num_exec_queues +
+	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
 				   vm->xe->info.tile_count);
 	if (err)
 		return err;
@@ -565,7 +504,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 		return 0;
 	}
 
-	err = xe_vm_lock_dma_resv(vm, exec, vm->preempt.num_exec_queues, false);
+	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
 	if (err)
 		return err;
 
@@ -573,17 +512,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 	if (err)
 		return err;
 
-	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
-		if (xe_vma_has_no_bo(vma) ||
-		    vma->gpuva.flags & XE_VMA_DESTROYED)
-			continue;
-
-		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
-		if (err)
-			break;
-	}
-
-	return err;
+	return drm_gpuvm_validate(&vm->gpuvm, exec);
 }
 
 static void preempt_rebind_work_func(struct work_struct *w)
@@ -623,12 +552,13 @@ retry:
 
 		err = xe_preempt_work_begin(&exec, vm, &done);
 		drm_exec_retry_on_contention(&exec);
-		if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
-			err = -EAGAIN;
+		if (err || done) {
+			drm_exec_fini(&exec);
+			if (err && xe_vm_validate_should_retry(&exec, err, &end))
+				err = -EAGAIN;
+
 			goto out_unlock_outer;
 		}
-		if (err || done)
-			goto out_unlock;
 	}
 
 	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
@@ -675,7 +605,7 @@ retry:
 
 	/* Point of no return. */
 	arm_preempt_fences(vm, &preempt_fences);
-	resume_and_reinstall_preempt_fences(vm);
+	resume_and_reinstall_preempt_fences(vm, &exec);
 	up_read(&vm->userptr.notifier_lock);
 
 out_unlock:
@@ -780,9 +710,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
 				 userptr.invalidate_link) {
 		list_del_init(&vma->userptr.invalidate_link);
-		if (list_empty(&vma->combined_links.userptr))
-			list_move_tail(&vma->combined_links.userptr,
-				       &vm->userptr.repin_list);
+		list_move_tail(&vma->combined_links.userptr,
+			       &vm->userptr.repin_list);
 	}
 	spin_unlock(&vm->userptr.invalidated_lock);
 
@@ -791,27 +720,12 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
 				 combined_links.userptr) {
 		err = xe_vma_userptr_pin_pages(vma);
 		if (err < 0)
-			goto out_err;
+			return err;
 
-		list_move_tail(&vma->combined_links.userptr, &tmp_evict);
+		list_move_tail(&vma->combined_links.userptr, &vm->rebind_list);
 	}
 
-	/* Take lock and move to rebind_list for rebinding. */
-	err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
-	if (err)
-		goto out_err;
-
-	list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr)
-		list_move_tail(&vma->combined_links.rebind, &vm->rebind_list);
-
-	dma_resv_unlock(xe_vm_resv(vm));
-
 	return 0;
-
-out_err:
-	list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
-
-	return err;
 }
 
 /**
@@ -890,8 +804,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	}
 
 	INIT_LIST_HEAD(&vma->combined_links.rebind);
-	INIT_LIST_HEAD(&vma->notifier.rebind_link);
-	INIT_LIST_HEAD(&vma->extobj.link);
 
 	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
 	vma->gpuva.vm = &vm->gpuvm;
@@ -921,6 +833,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 			return ERR_CAST(vm_bo);
 		}
 
+		drm_gpuvm_bo_extobj_add(vm_bo);
 		drm_gem_object_get(&bo->ttm.base);
 		vma->gpuva.gem.obj = &bo->ttm.base;
 		vma->gpuva.gem.offset = bo_offset_or_userptr;
@@ -953,16 +866,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	return vma;
 }
 
-static bool vm_remove_extobj(struct xe_vma *vma)
-{
-	if (!list_empty(&vma->extobj.link)) {
-		xe_vma_vm(vma)->extobj.entries--;
-		list_del_init(&vma->extobj.link);
-		return true;
-	}
-	return false;
-}
-
 static void xe_vma_destroy_late(struct xe_vma *vma)
 {
 	struct xe_vm *vm = xe_vma_vm(vma);
@@ -1003,60 +906,6 @@ static void vma_destroy_work_func(struct work_struct *w)
 	xe_vma_destroy_late(vma);
 }
 
-static struct xe_vma *
-bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
-			    struct xe_vma *ignore)
-{
-	struct drm_gpuvm_bo *vm_bo;
-	struct drm_gpuva *va;
-	struct drm_gem_object *obj = &bo->ttm.base;
-
-	xe_bo_assert_held(bo);
-
-	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
-		drm_gpuvm_bo_for_each_va(va, vm_bo) {
-			struct xe_vma *vma = gpuva_to_vma(va);
-
-			if (vma != ignore && xe_vma_vm(vma) == vm)
-				return vma;
-		}
-	}
-
-	return NULL;
-}
-
-static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
-				 struct xe_vma *ignore)
-{
-	bool ret;
-
-	xe_bo_lock(bo, false);
-	ret = !!bo_has_vm_references_locked(bo, vm, ignore);
-	xe_bo_unlock(bo);
-
-	return ret;
-}
-
-static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
-{
-	lockdep_assert_held_write(&vm->lock);
-
-	list_add(&vma->extobj.link, &vm->extobj.list);
-	vm->extobj.entries++;
-}
-
-static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
-{
-	struct xe_bo *bo = xe_vma_bo(vma);
-
-	lockdep_assert_held_write(&vm->lock);
-
-	if (bo_has_vm_references(bo, vm, vma))
-		return;
-
-	__vm_insert_extobj(vm, vma);
-}
-
 static void vma_destroy_cb(struct dma_fence *fence,
 			   struct dma_fence_cb *cb)
 {
@@ -1082,20 +931,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	} else if (!xe_vma_is_null(vma)) {
 		xe_bo_assert_held(xe_vma_bo(vma));
 
-		spin_lock(&vm->notifier.list_lock);
-		list_del(&vma->notifier.rebind_link);
-		spin_unlock(&vm->notifier.list_lock);
-
 		drm_gpuva_unlink(&vma->gpuva);
-
-		if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
-			struct xe_vma *other;
-
-			other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL);
-
-			if (other)
-				__vm_insert_extobj(vm, other);
-		}
 	}
 
 	xe_vm_assert_held(vm);
@@ -1213,6 +1049,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm);
 
 static struct drm_gpuvm_ops gpuvm_ops = {
 	.op_alloc = xe_vm_op_alloc,
+	.vm_bo_validate = xe_gpuvm_validate,
 	.vm_free = xe_vm_free,
 };
 
@@ -1426,9 +1263,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	init_rwsem(&vm->userptr.notifier_lock);
 	spin_lock_init(&vm->userptr.invalidated_lock);
 
-	INIT_LIST_HEAD(&vm->notifier.rebind_list);
-	spin_lock_init(&vm->notifier.list_lock);
-
 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
 
 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
@@ -1437,8 +1271,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	for_each_tile(tile, xe, id)
 		xe_range_fence_tree_init(&vm->rftree[id]);
 
-	INIT_LIST_HEAD(&vm->extobj.list);
-
 	vm->pt_ops = &xelp_pt_ops;
 
 	if (!(flags & XE_VM_FLAG_MIGRATION))
@@ -1647,7 +1479,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		xe_vma_destroy_unlocked(vma);
 	}
 
-	xe_assert(xe, list_empty(&vm->extobj.list));
 	up_write(&vm->lock);
 
 	mutex_lock(&xe->usm.lock);
@@ -2289,22 +2120,36 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 			      bool read_only, bool is_null, u16 pat_index)
 {
 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+	struct drm_exec exec;
 	struct xe_vma *vma;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
 	if (bo) {
-		err = xe_bo_lock(bo, true);
-		if (err)
-			return ERR_PTR(err);
+		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+		drm_exec_until_all_locked(&exec) {
+			err = 0;
+			if (!bo->vm) {
+				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (!err) {
+				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (err) {
+				drm_exec_fini(&exec);
+				return ERR_PTR(err);
+			}
+		}
 	}
 	vma = xe_vma_create(vm, bo, op->gem.offset,
 			    op->va.addr, op->va.addr +
 			    op->va.range - 1, read_only, is_null,
 			    pat_index);
 	if (bo)
-		xe_bo_unlock(bo);
+		drm_exec_fini(&exec);
 
 	if (xe_vma_is_userptr(vma)) {
 		err = xe_vma_userptr_pin_pages(vma);
@@ -2314,7 +2159,6 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 			return ERR_PTR(err);
 		}
 	} else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
-		vm_insert_extobj(vm, vma);
 		err = add_preempt_fences(vm, bo);
 		if (err) {
 			prep_vma_destroy(vm, vma, false);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index a1907544cc4f4..cf2f96e8c1ab9 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -74,9 +74,20 @@ static inline bool xe_vm_has_scratch(const struct xe_vm *vm)
 	return vm->flags & XE_VM_FLAG_SCRATCH_PAGE;
 }
 
+/**
+ * gpuvm_to_vm() - Return the embedding xe_vm from a struct drm_gpuvm pointer
+ * @gpuvm: The struct drm_gpuvm pointer
+ *
+ * Return: Pointer to the embedding struct xe_vm.
+ */
+static inline struct xe_vm *gpuvm_to_vm(struct drm_gpuvm *gpuvm)
+{
+	return container_of(gpuvm, struct xe_vm, gpuvm);
+}
+
 static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
 {
-	return container_of(gpuva->vm, struct xe_vm, gpuvm);
+	return gpuvm_to_vm(gpuva->vm);
 }
 
 static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva)
@@ -219,12 +230,6 @@ int xe_vma_userptr_check_repin(struct xe_vma *vma);
 
 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
 
-int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
-			unsigned int num_shared, bool lock_vm);
-
-void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
-			     enum dma_resv_usage usage);
-
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
 
 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 15471025a44f6..2e023596cb153 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -62,26 +62,17 @@ struct xe_vma {
 	/** @gpuva: Base GPUVA object */
 	struct drm_gpuva gpuva;
 
-	/** @combined_links: links into lists which are mutually exclusive */
+	/**
+	 * @combined_links: links into lists which are mutually exclusive.
+	 * Locking: vm lock in write mode OR vm lock in read mode and the vm's
+	 * resv.
+	 */
 	union {
-		/**
-		 * @userptr: link into VM repin list if userptr. Protected by
-		 * vm->lock in write mode.
-		 */
+		/** @userptr: link into VM repin list if userptr. */
 		struct list_head userptr;
-		/**
-		 * @rebind: link into VM if this VMA needs rebinding, and
-		 * if it's a bo (not userptr) needs validation after a possible
-		 * eviction. Protected by the vm's resv lock and typically
-		 * vm->lock is also held in write mode. The only place where
-		 * vm->lock isn't held is the BO eviction path which has
-		 * mutually exclusive execution with userptr.
-		 */
+		/** @rebind: link into VM if this VMA needs rebinding. */
 		struct list_head rebind;
-		/**
-		 * @destroy: link to contested list when VM is being closed.
-		 * Protected by vm->lock in write mode and vm's resv lock.
-		 */
+		/** @destroy: link to contested list when VM is being closed. */
 		struct list_head destroy;
 	} combined_links;
 
@@ -115,18 +106,6 @@ struct xe_vma {
 	 */
 	u16 pat_index;
 
-	struct {
-		struct list_head rebind_link;
-	} notifier;
-
-	struct {
-		/**
-		 * @extobj.link: Link into vm's external object list.
-		 * protected by the vm lock.
-		 */
-		struct list_head link;
-	} extobj;
-
 	/**
 	 * @userptr: user pointer state, only allocated for VMAs that are
 	 * user pointers
@@ -180,9 +159,9 @@ struct xe_vm {
 	struct rw_semaphore lock;
 
 	/**
-	 * @rebind_list: list of VMAs that need rebinding, and if they are
-	 * bos (not userptr), need validation after a possible eviction. The
-	 * list is protected by @resv.
+	 * @rebind_list: list of VMAs that need rebinding. Protected by the
+	 * vm->lock in write mode, OR (the vm->lock in read mode and the
+	 * vm resv).
 	 */
 	struct list_head rebind_list;
 
@@ -202,14 +181,6 @@ struct xe_vm {
 	 */
 	struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
 
-	/** @extobj: bookkeeping for external objects. Protected by the vm lock */
-	struct {
-		/** @enties: number of external BOs attached this VM */
-		u32 entries;
-		/** @list: list of vmas with external bos attached */
-		struct list_head list;
-	} extobj;
-
 	/** @async_ops: async VM operations (bind / unbinds) */
 	struct {
 		/** @list: list of pending async VM ops */
@@ -299,22 +270,6 @@ struct xe_vm {
 		struct xe_vma *last_fault_vma;
 	} usm;
 
-	/**
-	 * @notifier: Lists and locks for temporary usage within notifiers where
-	 * we either can't grab the vm lock or the vm resv.
-	 */
-	struct {
-		/** @notifier.list_lock: lock protecting @rebind_list */
-		spinlock_t list_lock;
-		/**
-		 * @notifier.rebind_list: list of vmas that we want to put on the
-		 * main @rebind_list. This list is protected for writing by both
-		 * notifier.list_lock, and the resv of the bo the vma points to,
-		 * and for reading by the notifier.list_lock only.
-		 */
-		struct list_head rebind_list;
-	} notifier;
-
 	/** @error_capture: allow to track errors */
 	struct {
 		/** @capture_once: capture only one error per VM */
-- 
GitLab


From 35705e32b13cf800a47f10844c4f8d1334d411c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 12 Dec 2023 11:01:44 +0100
Subject: [PATCH 2211/2340] drm/xe: Use DRM_GPUVM_RESV_PROTECTED for gpuvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use DRM_GPUVM_RESV_PROTECTED to use corse-grained locking for the
evict and external object list.
Since we are already holding the relevant RESV locks, for now at least,
we don't need the fine-grained locking.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231212100144.6833-3-thomas.hellstrom@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7a3b680d01a36..0b9510595db07 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1282,8 +1282,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		goto err_no_resv;
 	}
 
-	drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj,
-		       0, vm->size, 0, 0, &gpuvm_ops);
+	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
+		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
 
 	drm_gem_object_put(vm_resv_obj);
 
-- 
GitLab


From 3b97e3b265c97b7cd7dcbdb2f7ef93c6e6f94948 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 12 Dec 2023 09:00:37 -0800
Subject: [PATCH 2212/2340] drm/xe: Use a flags field instead of bools for VMA
 create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a flags field instead of severval bools for VMA create as it is
easier to read and less bug prone.

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 64 ++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0b9510595db07..7b38338ab5e21 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -778,17 +778,20 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 	return fence;
 }
 
+#define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
+#define VMA_CREATE_FLAG_IS_NULL		BIT(1)
+
 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    struct xe_bo *bo,
 				    u64 bo_offset_or_userptr,
 				    u64 start, u64 end,
-				    bool read_only,
-				    bool is_null,
-				    u16 pat_index)
+				    u16 pat_index, unsigned int flags)
 {
 	struct xe_vma *vma;
 	struct xe_tile *tile;
 	u8 id;
+	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
+	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
 
 	xe_assert(vm->xe, start < end);
 	xe_assert(vm->xe, end < vm->size);
@@ -2117,7 +2120,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 }
 
 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
-			      bool read_only, bool is_null, u16 pat_index)
+			      u16 pat_index, unsigned int flags)
 {
 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
 	struct drm_exec exec;
@@ -2146,8 +2149,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 	}
 	vma = xe_vma_create(vm, bo, op->gem.offset,
 			    op->va.addr, op->va.addr +
-			    op->va.range - 1, read_only, is_null,
-			    pat_index);
+			    op->va.range - 1, pat_index, flags);
 	if (bo)
 		drm_exec_fini(&exec);
 
@@ -2272,7 +2274,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+		struct xe_vma *vma;
 		bool first = list_empty(ops_list);
+		unsigned int flags = 0;
 
 		INIT_LIST_HEAD(&op->link);
 		list_add_tail(&op->link, ops_list);
@@ -2288,10 +2292,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		switch (op->base.op) {
 		case DRM_GPUVA_OP_MAP:
 		{
-			struct xe_vma *vma;
+			flags |= op->map.read_only ?
+				VMA_CREATE_FLAG_READ_ONLY : 0;
+			flags |= op->map.is_null ?
+				VMA_CREATE_FLAG_IS_NULL : 0;
 
-			vma = new_vma(vm, &op->base.map, op->map.read_only,
-				      op->map.is_null, op->map.pat_index);
+			vma = new_vma(vm, &op->base.map, op->map.pat_index,
+				      flags);
 			if (IS_ERR(vma))
 				return PTR_ERR(vma);
 
@@ -2307,16 +2314,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 			op->remap.range = xe_vma_size(old);
 
 			if (op->base.remap.prev) {
-				struct xe_vma *vma;
-				bool read_only =
-					op->base.remap.unmap->va->flags &
-					XE_VMA_READ_ONLY;
-				bool is_null =
-					op->base.remap.unmap->va->flags &
-					DRM_GPUVA_SPARSE;
-
-				vma = new_vma(vm, op->base.remap.prev, read_only,
-					      is_null, old->pat_index);
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+
+				vma = new_vma(vm, op->base.remap.prev,
+					      old->pat_index, flags);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
 
@@ -2339,17 +2345,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 			}
 
 			if (op->base.remap.next) {
-				struct xe_vma *vma;
-				bool read_only =
-					op->base.remap.unmap->va->flags &
-					XE_VMA_READ_ONLY;
-
-				bool is_null =
-					op->base.remap.unmap->va->flags &
-					DRM_GPUVA_SPARSE;
-
-				vma = new_vma(vm, op->base.remap.next, read_only,
-					      is_null, old->pat_index);
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+
+				vma = new_vma(vm, op->base.remap.next,
+					      old->pat_index, flags);
 				if (IS_ERR(vma))
 					return PTR_ERR(vma);
 
-- 
GitLab


From 53bf60f6d8503c788fee9c30dacef682edbe61fd Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 5 Dec 2023 10:56:17 -0800
Subject: [PATCH 2213/2340] drm/xe: Use a flags field instead of bools for sync
 parse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a flags field instead of severval bools for sync parse as it is
easier to read and less bug prone.

v2: Pull in header change from subsequent patch

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c | 5 +++--
 drivers/gpu/drm/xe/xe_sync.c | 4 +++-
 drivers/gpu/drm/xe/xe_sync.h | 5 ++++-
 drivers/gpu/drm/xe/xe_vm.c   | 5 +++--
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 15ab1a927613f..3c9f801d570bf 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -151,8 +151,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	for (i = 0; i < args->num_syncs; i++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
-					  &syncs_user[i], true,
-					  xe_vm_in_lr_mode(vm));
+					  &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0));
 		if (err)
 			goto err_syncs;
 	}
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 936227e794835..2a3f508722fcf 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -98,10 +98,12 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 			struct xe_sync_entry *sync,
 			struct drm_xe_sync __user *sync_user,
-			bool exec, bool in_lr_mode)
+			unsigned int flags)
 {
 	struct drm_xe_sync sync_in;
 	int err;
+	bool exec = flags & SYNC_PARSE_FLAG_EXEC;
+	bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE;
 	bool signal;
 
 	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 30958ddc4cdc3..1b748cec46781 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -12,10 +12,13 @@ struct xe_device;
 struct xe_file;
 struct xe_sched_job;
 
+#define SYNC_PARSE_FLAG_EXEC			BIT(0)
+#define SYNC_PARSE_FLAG_LR_MODE			BIT(1)
+
 int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 			struct xe_sync_entry *sync,
 			struct drm_xe_sync __user *sync_user,
-			bool exec, bool compute_mode);
+			unsigned int flags);
 int xe_sync_entry_wait(struct xe_sync_entry *sync);
 int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
 			   struct xe_sched_job *job);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7b38338ab5e21..d1e53905268f7 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2994,8 +2994,9 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	syncs_user = u64_to_user_ptr(args->syncs);
 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
-					  &syncs_user[num_syncs], false,
-					  xe_vm_in_lr_mode(vm));
+					  &syncs_user[num_syncs],
+					  xe_vm_in_lr_mode(vm) ?
+					  SYNC_PARSE_FLAG_LR_MODE : 0);
 		if (err)
 			goto free_syncs;
 	}
-- 
GitLab


From f5783b5026f76083ef4c53f6240619bd5c7bb9a5 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Mon, 11 Dec 2023 19:20:58 -0500
Subject: [PATCH 2214/2340] drm/xe: Remove vram size info from sysfs

This information is already part of the query IOCTL.
Let's not duplicate it here in the sysfs.

Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_tile_sysfs.c | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 16376607c68f9..0f8d3e7fce46a 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -20,20 +20,6 @@ static const struct kobj_type xe_tile_sysfs_kobj_type = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
-static ssize_t
-physical_vram_size_bytes_show(struct device *kdev, struct device_attribute *attr,
-			      char *buf)
-{
-	struct xe_tile *tile = kobj_to_tile(&kdev->kobj);
-
-	return sysfs_emit(buf, "%llu\n", tile->mem.vram.actual_physical_size);
-}
-
-static DEVICE_ATTR_RO(physical_vram_size_bytes);
-
-static const struct attribute *physical_memsize_attr =
-	&dev_attr_physical_vram_size_bytes.attr;
-
 static void tile_sysfs_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_tile *tile = arg;
@@ -64,15 +50,8 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
 
 	tile->sysfs = &kt->base;
 
-	if (IS_DGFX(xe) && xe->info.platform != XE_DG1 &&
-	    sysfs_create_file(tile->sysfs, physical_memsize_attr))
-		drm_warn(&xe->drm,
-			 "Sysfs creation to read addr_range per tile failed\n");
-
 	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
-	if (err) {
+	if (err)
 		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
 			 __func__, err);
-		return;
-	}
 }
-- 
GitLab


From eb9702ad29863c1ae41d17d8504c7444f280dfff Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 5 Dec 2023 10:39:54 -0800
Subject: [PATCH 2215/2340] drm/xe: Allow num_batch_buffer / num_binds == 0 in
 IOCTLs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The idea being out-syncs can signal indicating all previous operations
on the bind queue are complete. An example use case of this would be
support for implementing vkQueueWaitIdle easily.

All in-syncs are waited on before signaling out-syncs. This is
implemented by forming a composite software fence of in-syncs and
installing this fence in the out-syncs and exec queue last fence slot.

The last fence must be added as a dependency for jobs on user exec
queues as it is possible for the last fence to be a composite software
fence (unordered, ioctl with zero bb or binds) rather than hardware
fence (ordered, previous job on queue).

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c             | 27 +++++++-
 drivers/gpu/drm/xe/xe_exec_queue.c       |  5 +-
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  5 +-
 drivers/gpu/drm/xe/xe_migrate.c          | 14 ++++-
 drivers/gpu/drm/xe/xe_sched_job.c        | 18 ++++++
 drivers/gpu/drm/xe/xe_sched_job.h        |  4 ++
 drivers/gpu/drm/xe/xe_sync.c             | 78 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_sync.h             |  6 ++
 drivers/gpu/drm/xe/xe_vm.c               | 77 ++++++++++++++++-------
 9 files changed, 206 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 3c9f801d570bf..ba92e5619da39 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -131,7 +131,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
+	if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
+			 q->width != args->num_batch_buffer))
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
@@ -207,6 +208,24 @@ retry:
 		goto err_exec;
 	}
 
+	if (!args->num_batch_buffer) {
+		if (!xe_vm_in_lr_mode(vm)) {
+			struct dma_fence *fence;
+
+			fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
+			if (IS_ERR(fence)) {
+				err = PTR_ERR(fence);
+				goto err_exec;
+			}
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+			xe_exec_queue_last_fence_set(q, vm, fence);
+			dma_fence_put(fence);
+		}
+
+		goto err_exec;
+	}
+
 	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
 		err = -EWOULDBLOCK;
 		goto err_exec;
@@ -266,6 +285,10 @@ retry:
 		goto err_put_job;
 
 	if (!xe_vm_in_lr_mode(vm)) {
+		err = xe_sched_job_last_fence_add_dep(job, vm);
+		if (err)
+			goto err_put_job;
+
 		err = down_read_interruptible(&vm->userptr.notifier_lock);
 		if (err)
 			goto err_put_job;
@@ -290,6 +313,8 @@ retry:
 
 	if (xe_exec_queue_is_lr(q))
 		q->ring_ops->emit_job(job);
+	if (!xe_vm_in_lr_mode(vm))
+		xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
 	xe_sched_job_push(job);
 	xe_vm_reactivate_rebind(vm);
 
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 85bc25fe99edc..eeb9605dd45f6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -886,7 +886,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
 						    struct xe_vm *vm)
 {
-	lockdep_assert_held_write(&vm->lock);
+	if (q->flags & EXEC_QUEUE_FLAG_VM)
+		lockdep_assert_held(&vm->lock);
+	else
+		xe_vm_assert_held(vm);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 6826feb650f34..c7aefa1c8c312 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -66,8 +66,9 @@ struct xe_exec_queue {
 	struct xe_hw_fence_irq *fence_irq;
 
 	/**
-	 * @last_fence: last fence on engine, protected by vm->lock in write
-	 * mode if bind engine
+	 * @last_fence: last fence on exec queue, protected by vm->lock in write
+	 * mode if bind exec queue, protected by dma resv lock if non-bind exec
+	 * queue
 	 */
 	struct dma_fence *last_fence;
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 2ca927f3fb2af..5fd0706a6045c 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1163,17 +1163,24 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 	return fence;
 }
 
-static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
+			struct xe_sync_entry *syncs, u32 num_syncs)
 {
+	struct dma_fence *fence;
 	int i;
 
 	for (i = 0; i < num_syncs; i++) {
-		struct dma_fence *fence = syncs[i].fence;
+		fence = syncs[i].fence;
 
 		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				       &fence->flags))
 			return false;
 	}
+	if (q) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return false;
+	}
 
 	return true;
 }
@@ -1234,7 +1241,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 
 	/* Use the CPU if no in syncs and engine is idle */
-	if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
+	if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
 		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
 							num_updates,
 							first_munmap_rebind,
@@ -1351,6 +1358,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 			goto err_job;
 	}
 
+	err = xe_sched_job_last_fence_add_dep(job, vm);
 	for (i = 0; !err && i < num_syncs; i++)
 		err = xe_sync_entry_add_deps(&syncs[i], job);
 
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index a9c7ae815bec5..01106a1156ad8 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -260,3 +260,21 @@ void xe_sched_job_push(struct xe_sched_job *job)
 	drm_sched_entity_push_job(&job->drm);
 	xe_sched_job_put(job);
 }
+
+/**
+ * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
+ * @job:job to add the last fence dependency to
+ * @vm: virtual memory job belongs to
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	fence = xe_exec_queue_last_fence_get(job->q, vm);
+	dma_fence_get(fence);
+
+	return drm_sched_job_add_dependency(&job->drm, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 6ca1d426c036f..34f475ba7f502 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -8,6 +8,8 @@
 
 #include "xe_sched_job_types.h"
 
+struct xe_vm;
+
 #define XE_SCHED_HANG_LIMIT 1
 #define XE_SCHED_JOB_TIMEOUT LONG_MAX
 
@@ -54,6 +56,8 @@ bool xe_sched_job_completed(struct xe_sched_job *job);
 void xe_sched_job_arm(struct xe_sched_job *job);
 void xe_sched_job_push(struct xe_sched_job *job);
 
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+
 static inline struct xe_sched_job *
 to_xe_sched_job(struct drm_sched_job *drm)
 {
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 2a3f508722fcf..e4c220cf9115e 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -5,6 +5,7 @@
 
 #include "xe_sync.h"
 
+#include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
@@ -14,6 +15,7 @@
 #include <drm/xe_drm.h>
 
 #include "xe_device_types.h"
+#include "xe_exec_queue.h"
 #include "xe_macros.h"
 #include "xe_sched_job_types.h"
 
@@ -104,6 +106,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 	int err;
 	bool exec = flags & SYNC_PARSE_FLAG_EXEC;
 	bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE;
+	bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE;
 	bool signal;
 
 	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
@@ -164,6 +167,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 		break;
 
 	case DRM_XE_SYNC_TYPE_USER_FENCE:
+		if (XE_IOCTL_DBG(xe, disallow_user_fence))
+			return -EOPNOTSUPP;
+
 		if (XE_IOCTL_DBG(xe, !signal))
 			return -EOPNOTSUPP;
 
@@ -264,3 +270,75 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
 	if (sync->ufence)
 		user_fence_put(sync->ufence);
 }
+
+/**
+ * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM
+ * @sync: input syncs
+ * @num_sync: number of syncs
+ * @q: exec queue
+ * @vm: VM
+ *
+ * Get a fence from syncs, exec queue, and VM. If syncs contain in-fences create
+ * and return a composite fence of all in-fences + last fence. If no in-fences
+ * return last fence on  input exec queue. Caller must drop reference to
+ * returned fence.
+ *
+ * Return: fence on success, ERR_PTR(-ENOMEM) on failure
+ */
+struct dma_fence *
+xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
+		     struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct dma_fence *fence;
+	int i, num_in_fence = 0, current_fence = 0;
+
+	lockdep_assert_held(&vm->lock);
+
+	/* Count in-fences */
+	for (i = 0; i < num_sync; ++i) {
+		if (sync[i].fence) {
+			++num_in_fence;
+			fence = sync[i].fence;
+		}
+	}
+
+	/* Easy case... */
+	if (!num_in_fence) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		dma_fence_get(fence);
+		return fence;
+	}
+
+	/* Create composite fence */
+	fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < num_sync; ++i) {
+		if (sync[i].fence) {
+			dma_fence_get(sync[i].fence);
+			fences[current_fence++] = sync[i].fence;
+		}
+	}
+	fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
+	dma_fence_get(fences[current_fence - 1]);
+	cf = dma_fence_array_create(num_in_fence, fences,
+				    vm->composite_fence_ctx,
+				    vm->composite_fence_seqno++,
+				    false);
+	if (!cf) {
+		--vm->composite_fence_seqno;
+		goto err_out;
+	}
+
+	return &cf->base;
+
+err_out:
+	while (current_fence)
+		dma_fence_put(fences[--current_fence]);
+	kfree(fences);
+	kfree(cf);
+
+	return ERR_PTR(-ENOMEM);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 1b748cec46781..d284afbe917c1 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -9,11 +9,14 @@
 #include "xe_sync_types.h"
 
 struct xe_device;
+struct xe_exec_queue;
 struct xe_file;
 struct xe_sched_job;
+struct xe_vm;
 
 #define SYNC_PARSE_FLAG_EXEC			BIT(0)
 #define SYNC_PARSE_FLAG_LR_MODE			BIT(1)
+#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE	BIT(2)
 
 int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
 			struct xe_sync_entry *sync,
@@ -26,5 +29,8 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync,
 			  struct xe_sched_job *job,
 			  struct dma_fence *fence);
 void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
+struct dma_fence *
+xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
+		     struct xe_exec_queue *q, struct xe_vm *vm);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d1e53905268f7..2f3df9ee67c9b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2722,7 +2722,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 		return -EINVAL;
 
 	if (XE_IOCTL_DBG(xe, args->extensions) ||
-	    XE_IOCTL_DBG(xe, !args->num_binds) ||
 	    XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
 		return -EINVAL;
 
@@ -2837,6 +2836,37 @@ free_bind_ops:
 	return err;
 }
 
+static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
+				       struct xe_exec_queue *q,
+				       struct xe_sync_entry *syncs,
+				       int num_syncs)
+{
+	struct dma_fence *fence;
+	int i, err = 0;
+
+	fence = xe_sync_in_fence_get(syncs, num_syncs,
+				     to_wait_exec_queue(vm, q), vm);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, fence);
+
+	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
+				     fence);
+
+	if (xe_vm_sync_mode(vm, q)) {
+		long timeout = dma_fence_wait(fence, true);
+
+		if (timeout < 0)
+			err = -EINTR;
+	}
+
+	dma_fence_put(fence);
+
+	return err;
+}
+
 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
@@ -2875,7 +2905,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto put_exec_queue;
 		}
 
-		if (XE_IOCTL_DBG(xe, async !=
+		if (XE_IOCTL_DBG(xe, args->num_binds && async !=
 				 !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) {
 			err = -EINVAL;
 			goto put_exec_queue;
@@ -2889,7 +2919,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	}
 
 	if (!args->exec_queue_id) {
-		if (XE_IOCTL_DBG(xe, async !=
+		if (XE_IOCTL_DBG(xe, args->num_binds && async !=
 				 !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) {
 			err = -EINVAL;
 			goto put_vm;
@@ -2916,16 +2946,18 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 	}
 
-	bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
-	if (!bos) {
-		err = -ENOMEM;
-		goto release_vm_lock;
-	}
+	if (args->num_binds) {
+		bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL);
+		if (!bos) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
 
-	ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL);
-	if (!ops) {
-		err = -ENOMEM;
-		goto release_vm_lock;
+		ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL);
+		if (!ops) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
 	}
 
 	for (i = 0; i < args->num_binds; ++i) {
@@ -2995,12 +3027,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
 					  &syncs_user[num_syncs],
-					  xe_vm_in_lr_mode(vm) ?
-					  SYNC_PARSE_FLAG_LR_MODE : 0);
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0) |
+					  (!args->num_binds ?
+					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
 		if (err)
 			goto free_syncs;
 	}
 
+	if (!args->num_binds) {
+		err = -ENODATA;
+		goto free_syncs;
+	}
+
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
@@ -3058,12 +3097,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 unwind_ops:
 	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
 free_syncs:
-	for (i = 0; err == -ENODATA && i < num_syncs; i++) {
-		struct dma_fence *fence =
-			xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
-
-		xe_sync_entry_signal(&syncs[i], NULL, fence);
-	}
+	if (err == -ENODATA)
+		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
 	while (num_syncs--)
 		xe_sync_entry_cleanup(&syncs[num_syncs]);
 
@@ -3083,7 +3118,7 @@ free_objs:
 	kfree(ops);
 	if (args->num_binds > 1)
 		kfree(bind_ops);
-	return err == -ENODATA ? 0 : err;
+	return err;
 }
 
 /**
-- 
GitLab


From 7a18d36f88105c0964846dbf9f7f1b0d43e860db Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 12 Dec 2023 13:56:04 -0800
Subject: [PATCH 2216/2340] drm/xe: Remove duplicate RING_MAX_NONPRIV_SLOTS
 definition

The engine register header wound up with two definitions for
RING_MAX_NONPRIV_SLOTS, likely due to a rebase mistake.  Keep the
definition that's in an appropriate place (i.e., with the
FORCE_TO_NONPRIV register definition) and remove the other.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20231212215603.2041841-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 3942db268b014..1a857c4edcf5e 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -40,7 +40,6 @@
 #define RING_NOPID(base)			XE_REG((base) + 0x94)
 
 #define RING_IMR(base)				XE_REG((base) + 0xa8)
-#define   RING_MAX_NONPRIV_SLOTS  12
 
 #define RING_EIR(base)				XE_REG((base) + 0xb0)
 #define RING_EMR(base)				XE_REG((base) + 0xb4)
-- 
GitLab


From 4cb12b71923b6e2354093fbbde9bcadaec3d813f Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:24 +0530
Subject: [PATCH 2217/2340] drm/xe/xe2: Determine bios enablement for flat ccs
 on igfx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If bios disables flat ccs on igfx make has_flat_ccs as 0 and notify
via drm_dbg.

Bspec:59255

v2:
 - Release forcewake.
 - Add registers in order.
 - drop dgfx condition and only add it back in the future
when the support for an Xe2 dgpu will be added.
- Use drm_dbg instead of drm_info. (Matt)

v3:
 - Address nit(Matt)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  3 +++
 drivers/gpu/drm/xe/xe_device.c       | 30 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5f5a72e9d0d89..f5bf4c6d1761e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -162,6 +162,9 @@
 #define XEHP_SQCM				XE_REG_MCR(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
+#define XE2_FLAT_CCS_BASE_RANGE_LOWER		XE_REG_MCR(0x8800)
+#define   XE2_FLAT_CCS_ENABLE			REG_BIT(0)
+
 #define GSCPSMI_BASE				XE_REG(0x880c)
 
 #define	MIRROR_FUSE3				XE_REG(0x9118)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 2e0b2e40d8f3b..82e3705b88cd9 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -15,6 +15,7 @@
 #include <drm/drm_print.h>
 #include <drm/xe_drm.h>
 
+#include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_debugfs.h"
@@ -26,6 +27,7 @@
 #include "xe_exec.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
+#include "xe_gt_mcr.h"
 #include "xe_irq.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
@@ -401,6 +403,30 @@ int xe_device_probe_early(struct xe_device *xe)
 	return 0;
 }
 
+static int xe_device_set_has_flat_ccs(struct  xe_device *xe)
+{
+	u32 reg;
+	int err;
+
+	if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+		return 0;
+
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+	xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+	if (!xe->info.has_flat_ccs)
+		drm_dbg(&xe->drm,
+			"Flat CCS has been disabled in bios, May lead to performance impact");
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
 int xe_device_probe(struct xe_device *xe)
 {
 	struct xe_tile *tile;
@@ -456,6 +482,10 @@ int xe_device_probe(struct xe_device *xe)
 			goto err_irq_shutdown;
 	}
 
+	err = xe_device_set_has_flat_ccs(xe);
+	if (err)
+		return err;
+
 	err = xe_mmio_probe_vram(xe);
 	if (err)
 		goto err_irq_shutdown;
-- 
GitLab


From 064686272b7a7371eea32d5e7b89597cf5c70c0b Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:25 +0530
Subject: [PATCH 2218/2340] drm/xe/xe2: Modify main memory to ccs memory ratio.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On xe2 platforms each byte of CCS data now represents 512 bytes of
main memory data.

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 2 +-
 drivers/gpu/drm/xe/xe_device.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index f1c5bf203b3dc..1f9c32e694c6e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -16,7 +16,7 @@
 #define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
 #define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
 #define   NUM_CCS_BYTES_PER_BLOCK	256
-#define   NUM_BYTES_PER_CCS_BYTE	256
+#define   NUM_BYTES_PER_CCS_BYTE(_xe)	(GRAPHICS_VER(_xe) >= 20 ? 512 : 256)
 #define   NUM_CCS_BLKS_PER_XFER		1024
 
 #define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 82e3705b88cd9..221e875843521 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -615,7 +615,7 @@ void xe_device_wmb(struct xe_device *xe)
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 {
 	return xe_device_has_flat_ccs(xe) ?
-		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
+		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
 }
 
 bool xe_device_mem_access_ongoing(struct xe_device *xe)
-- 
GitLab


From 20561efb0ffd199fec1caaa5a0de439fab69d89a Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:26 +0530
Subject: [PATCH 2219/2340] drm/xe/xe2: Allocate extra pages for ccs during bo
 create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Incase of bo move from PL_TT to PL_SYSTEM these pages will be used to
store ccs metadata from flat ccs. And during bo move to PL_TT from
PL_SYSTEM the metadata will be copied from extra pages to flat ccs. This
copy of ccs metadata ensures ccs remains unaltered between swapping out
of bo to disk and its restore to PL_TT.

Bspec:58796

v2:
 - For dgfx ensure system bit is not set.
 - Modify comments.(Thomas)

v3:
 - Separate out patch to modify main memory to ccs memory ratio.(Matt)

v4:
 - Update description for commit message.
 - Make bo allocation routine more readable.(Matt)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 13ebe33bb7a22..c10aa5a63a701 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2160,19 +2160,24 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
  * placed in system memory.
  * @bo: The xe_bo
  *
- * If a bo has an allowable placement in XE_PL_TT memory, it can't use
- * flat CCS compression, because the GPU then has no way to access the
- * CCS metadata using relevant commands. For the opposite case, we need to
- * allocate storage for the CCS metadata when the BO is not resident in
- * VRAM memory.
- *
  * Return: true if extra pages need to be allocated, false otherwise.
  */
 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
 {
-	return bo->ttm.type == ttm_bo_type_device &&
-		!(bo->flags & XE_BO_CREATE_SYSTEM_BIT) &&
-		(bo->flags & XE_BO_CREATE_VRAM_MASK);
+	struct xe_device *xe = xe_bo_device(bo);
+
+	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
+		return false;
+
+	/* On discrete GPUs, if the GPU can access this buffer from
+	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
+	 * can't be used since there's no CCS storage associated with
+	 * non-VRAM addresses.
+	 */
+	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
+		return false;
+
+	return true;
 }
 
 /**
-- 
GitLab


From 9cca49021c81d05b84916b87092602be2c412e04 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:27 +0530
Subject: [PATCH 2220/2340] drm/xe/xe2: Updates on XY_CTRL_SURF_COPY_BLT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- The XY_CTRL_SURF_COPY_BLT instruction operating on ccs data expects
size in pages of main memory for which CCS data should be copied.
- The bitfield representing copy size in XY_CTRL_SURF_COPY_BLT has
shifted one bit higher in the instruction.

v2:
 - Fix the num_pages for ccs size calculation.
 - Address nits (Thomas)

v3:
- Use FIELD_PREP and FIELD_FIT instead of shifts and numbers.(Matt)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h |  5 ++---
 drivers/gpu/drm/xe/xe_migrate.c           | 21 +++++++++++++++------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 1f9c32e694c6e..a255946b6f77e 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -11,13 +11,12 @@
 #define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
 #define   SRC_ACCESS_TYPE_SHIFT		21
 #define   DST_ACCESS_TYPE_SHIFT		20
-#define   CCS_SIZE_MASK			0x3FF
-#define   CCS_SIZE_SHIFT		8
+#define   CCS_SIZE_MASK			GENMASK(17, 8)
+#define   XE2_CCS_SIZE_MASK		GENMASK(18, 9)
 #define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
 #define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
 #define   NUM_CCS_BYTES_PER_BLOCK	256
 #define   NUM_BYTES_PER_CCS_BYTE(_xe)	(GRAPHICS_VER(_xe) >= 20 ? 512 : 256)
-#define   NUM_CCS_BLKS_PER_XFER		1024
 
 #define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
 #define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 5fd0706a6045c..9895bab22f85f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -526,21 +526,30 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 *cs = bb->cs + bb->len;
 	u32 num_ccs_blks;
+	u32 num_pages;
+	u32 ccs_copy_size;
 	u32 mocs;
 
-	num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
-				    NUM_CCS_BYTES_PER_BLOCK);
-	xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
+	if (GRAPHICS_VERx100(xe) >= 2000) {
+		num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+		xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1));
 
-	if (GRAPHICS_VERx100(xe) >= 2000)
+		ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1);
 		mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
-	else
+
+	} else {
+		num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
+					    NUM_CCS_BYTES_PER_BLOCK);
+		xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1));
+
+		ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1);
 		mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
+	}
 
 	*cs++ = XY_CTRL_SURF_COPY_BLT |
 		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
 		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
-		((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
+		ccs_copy_size;
 	*cs++ = lower_32_bits(src_ofs);
 	*cs++ = upper_32_bits(src_ofs) | mocs;
 	*cs++ = lower_32_bits(dst_ofs);
-- 
GitLab


From 9116eabb6d5e26a7eceb6945327e9feb67019d41 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:28 +0530
Subject: [PATCH 2221/2340] drm/xe/xe_migrate: Use NULL 1G PTE mapped at 255GiB
 VA for ccs clear
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Get rid of the cleared bo, instead use null 1G PTE mapped at 255GiB
offset, this can be used for both dgfx and igfx.

v2:
 - Remove xe_migrate::cleared_bo.
 - Add a comment for NULL mapping.(Thomas)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 65 ++++++---------------------------
 1 file changed, 11 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9895bab22f85f..c5ed20db84020 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -46,16 +46,12 @@ struct xe_migrate {
 	struct mutex job_mutex;
 	/** @pt_bo: Page-table buffer object. */
 	struct xe_bo *pt_bo;
-	/**
-	 * @cleared_bo: Zeroed out bo used as a source for CCS metadata clears
-	 */
-	struct xe_bo *cleared_bo;
 	/** @batch_base_ofs: VM offset of the migration batch buffer */
 	u64 batch_base_ofs;
 	/** @usm_batch_base_ofs: VM offset of the usm batch buffer */
 	u64 usm_batch_base_ofs;
-	/** @cleared_vram_ofs: VM offset of @cleared_bo. */
-	u64 cleared_vram_ofs;
+	/** @cleared_mem_ofs: VM offset of @cleared_bo. */
+	u64 cleared_mem_ofs;
 	/**
 	 * @fence: dma-fence representing the last migration job batch.
 	 * Protected by @job_mutex.
@@ -93,13 +89,9 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg)
 
 	xe_vm_lock(m->q->vm, false);
 	xe_bo_unpin(m->pt_bo);
-	if (m->cleared_bo)
-		xe_bo_unpin(m->cleared_bo);
 	xe_vm_unlock(m->q->vm);
 
 	dma_fence_put(m->fence);
-	if (m->cleared_bo)
-		xe_bo_put(m->cleared_bo);
 	xe_bo_put(m->pt_bo);
 	drm_suballoc_manager_fini(&m->vm_update_sa);
 	mutex_destroy(&m->job_mutex);
@@ -125,41 +117,6 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
 	return addr + (256ULL << xe_pt_shift(2));
 }
 
-/*
- * For flat CCS clearing we need a cleared chunk of memory to copy from,
- * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy
- * (it clears on only 14 bytes in each chunk of 16).
- * If clearing the main surface one can use the part of the main surface
- * already cleared, but for clearing as part of copying non-compressed
- * data out of system memory, we don't readily have a cleared part of
- * VRAM to copy from, so create one to use for that case.
- */
-static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
-{
-	struct xe_tile *tile = m->tile;
-	struct xe_device *xe = vm->xe;
-	size_t cleared_size;
-	u64 vram_addr;
-
-	if (!xe_device_has_flat_ccs(xe))
-		return 0;
-
-	cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER);
-	cleared_size = PAGE_ALIGN(cleared_size);
-	m->cleared_bo = xe_bo_create_pin_map(xe, tile, vm, cleared_size,
-					     ttm_bo_type_kernel,
-					     XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					     XE_BO_CREATE_PINNED_BIT);
-	if (IS_ERR(m->cleared_bo))
-		return PTR_ERR(m->cleared_bo);
-
-	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
-	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE);
-	m->cleared_vram_ofs = xe_migrate_vram_ofs(xe, vram_addr);
-
-	return 0;
-}
-
 static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 				 struct xe_vm *vm)
 {
@@ -170,7 +127,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	u32 map_ofs, level, i;
 	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
 	u64 entry;
-	int ret;
 
 	/* Can't bump NUM_PT_SLOTS too high */
 	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
@@ -190,12 +146,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
-	ret = xe_migrate_create_cleared_bo(m, vm);
-	if (ret) {
-		xe_bo_put(bo);
-		return ret;
-	}
-
 	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
 	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
 
@@ -265,6 +215,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 			  (i + 1) * 8, u64, entry);
 	}
 
+	/* Set up a 1GiB NULL mapping at 255GiB offset. */
+	level = 2;
+	xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64,
+		  vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0)
+		  | XE_PTE_NULL);
+	m->cleared_mem_ofs = (255ULL << xe_pt_shift(level));
+
 	/* Identity map the entire vram at 256GiB offset */
 	if (IS_DGFX(xe)) {
 		u64 pos, ofs, flags;
@@ -618,7 +575,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 		 * Otherwise if the bo doesn't have any CCS metadata attached,
 		 * we still need to clear it for security reasons.
 		 */
-		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_vram_ofs;
+		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
 
 		emit_copy_ccs(gt, bb,
 			      dst_ofs, true,
@@ -1006,7 +963,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 			   clear_vram);
 		if (xe_device_has_flat_ccs(xe) && clear_vram) {
 			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
-				      m->cleared_vram_ofs, false, clear_L0);
+				      m->cleared_mem_ofs, false, clear_L0);
 			flush_flags = MI_FLUSH_DW_CCS;
 		}
 
-- 
GitLab


From 09427526793384fea6a13cc33ffebadb69fdcde4 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 5 Dec 2023 17:56:34 +0530
Subject: [PATCH 2222/2340] drm/xe/xe2: Update chunk size for each iteration of
 ccs copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In xe2 platform XY_CTRL_SURF_COPY_BLT can handle ccs copy for
max of 1024 main surface pages.

v2:
 - Use better logic to determine chunk size (Matt/Thomas)

v3:
 - use function instead of macro(Thomas)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 36 +++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index c5ed20db84020..40f49e47d79e8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -65,9 +65,10 @@ struct xe_migrate {
 };
 
 #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
+#define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */
 #define NUM_KERNEL_PDE 17
 #define NUM_PT_SLOTS 32
-#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
+#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
 
 /**
  * xe_tile_migrate_engine() - Get this tile's migrate engine.
@@ -366,14 +367,22 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	return m;
 }
 
-static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur)
+static u64 max_mem_transfer_per_pass(struct xe_device *xe)
+{
+	if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+		return MAX_CCS_LIMITED_TRANSFER;
+
+	return MAX_PREEMPTDISABLE_TRANSFER;
+}
+
+static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur)
 {
 	/*
 	 * For VRAM we use identity mapped pages so we are limited to current
 	 * cursor size. For system we program the pages ourselves so we have no
 	 * such limitation.
 	 */
-	return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER,
+	return min_t(u64, max_mem_transfer_per_pass(xe),
 		     mem_type_is_vram(cur->mem_type) ? cur->size :
 		     cur->remaining);
 }
@@ -672,10 +681,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		u32 update_idx;
 		u64 ccs_ofs, ccs_size;
 		u32 ccs_pt;
+
 		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
 
-		src_L0 = xe_migrate_res_sizes(&src_it);
-		dst_L0 = xe_migrate_res_sizes(&dst_it);
+		src_L0 = xe_migrate_res_sizes(xe, &src_it);
+		dst_L0 = xe_migrate_res_sizes(xe, &dst_it);
 
 		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
 			pass++, src_L0, dst_L0);
@@ -684,18 +695,18 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 
 		batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
 					      &src_L0_ofs, &src_L0_pt, 0, 0,
-					      NUM_PT_PER_BLIT);
+					      avail_pts);
 
 		batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0,
 					      &dst_L0_ofs, &dst_L0_pt, 0,
-					      NUM_PT_PER_BLIT, NUM_PT_PER_BLIT);
+					      avail_pts, avail_pts);
 
 		if (copy_system_ccs) {
 			ccs_size = xe_device_ccs_bytes(xe, src_L0);
 			batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size,
 						      &ccs_ofs, &ccs_pt, 0,
-						      2 * NUM_PT_PER_BLIT,
-						      NUM_PT_PER_BLIT);
+						      2 * avail_pts,
+						      avail_pts);
 		}
 
 		/* Add copy commands size here */
@@ -922,9 +933,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		struct xe_sched_job *job;
 		struct xe_bb *bb;
 		u32 batch_size, update_idx;
+
 		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		clear_L0 = xe_migrate_res_sizes(xe, &src_it);
 
-		clear_L0 = xe_migrate_res_sizes(&src_it);
 		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
 
 		/* Calculate final sizes and batch size.. */
@@ -932,7 +946,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 			pte_update_size(m, clear_vram, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
 					emit_clear_cmd_len(gt), 0,
-					NUM_PT_PER_BLIT);
+					avail_pts);
 		if (xe_device_has_flat_ccs(xe) && clear_vram)
 			batch_size += EMIT_COPY_CCS_DW;
 
-- 
GitLab


From 65ef8dbad1db9e35ca7af90e6958134595938d24 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:30 +0530
Subject: [PATCH 2223/2340] drm/xe/xe2: Update emit_pte to use compression
 enabled PAT index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For indirect accessed buffer use compression enabled PAT index.

v2:
 - Fix parameter name.

v3:
 - use a relevant define instead of fix number.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
 drivers/gpu/drm/xe/xe_migrate.c       | 21 +++++++++++++++------
 drivers/gpu/drm/xe/xe_pat.c           |  1 +
 drivers/gpu/drm/xe/xe_pt_types.h      |  1 +
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 47fcd6e6b7777..d6c23441632a4 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -330,7 +330,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	else
 		xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
 
-	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
+	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
 		 &src_it, XE_PAGE_SIZE, pt);
 
 	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 40f49e47d79e8..48ada083d0b3f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -425,15 +425,24 @@ static u32 pte_update_size(struct xe_migrate *m,
 
 static void emit_pte(struct xe_migrate *m,
 		     struct xe_bb *bb, u32 at_pt,
-		     bool is_vram,
+		     bool is_vram, bool is_comp_pte,
 		     struct xe_res_cursor *cur,
 		     u32 size, struct xe_bo *bo)
 {
-	u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+	struct xe_device *xe = tile_to_xe(m->tile);
+
+	u16 pat_index;
 	u32 ptes;
 	u64 ofs = at_pt * XE_PAGE_SIZE;
 	u64 cur_ofs;
 
+	/* Indirect access needs compression enabled uncached PAT index */
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] :
+					  xe->pat.idx[XE_CACHE_NONE];
+	else
+		pat_index = xe->pat.idx[XE_CACHE_WB];
+
 	/*
 	 * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
 	 * we're only emitting VRAM PTEs during sanity tests, so when
@@ -720,19 +729,19 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		}
 
 		if (!src_is_vram)
-			emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0,
+			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
 				 src_bo);
 		else
 			xe_res_next(&src_it, src_L0);
 
 		if (!dst_is_vram)
-			emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0,
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
 				 dst_bo);
 		else
 			xe_res_next(&dst_it, src_L0);
 
 		if (copy_system_ccs)
-			emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, src_bo);
+			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo);
 
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
@@ -965,7 +974,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		/* Preemption is enabled again by the ring ops. */
 		if (!clear_vram) {
-			emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
+			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
 				 bo);
 		} else {
 			xe_res_next(&src_it, clear_L0);
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 1892ff81086f4..1ff6bc79e7d44 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -387,6 +387,7 @@ void xe_pat_init_early(struct xe_device *xe)
 		xe->pat.idx[XE_CACHE_NONE] = 3;
 		xe->pat.idx[XE_CACHE_WT] = 15;
 		xe->pat.idx[XE_CACHE_WB] = 2;
+		xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; /*Applicable on xe2 and beyond */
 	} else if (xe->info.platform == XE_METEORLAKE) {
 		xe->pat.ops = &xelpg_pat_ops;
 		xe->pat.table = xelpg_pat_table;
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 82cbf1ef8e578..cee70cb0f0143 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -18,6 +18,7 @@ enum xe_cache_level {
 	XE_CACHE_NONE,
 	XE_CACHE_WT,
 	XE_CACHE_WB,
+	XE_CACHE_NONE_COMPRESSION, /*UC + COH_NONE + COMPRESSION */
 	__XE_CACHE_LEVEL_COUNT,
 };
 
-- 
GitLab


From 266c85885263022954928b125d46ab7a78c77a69 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:31 +0530
Subject: [PATCH 2224/2340] drm/xe/xe2: Handle flat ccs move for igfx.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Clear flat ccs during user bo creation.
- copy ccs meta data between flat ccs and bo during eviction and
restore.
- Add a bool field ccs_cleared in bo, true means ccs region of bo is
already cleared.

v2:
 - Rebase.

v3:
 - Maintain order of xe_bo_move_notify for ttm_bo_type_sg.

v4:
 - xe_migrate_copy can be used to copy src to dst bo on igfx too.
Add a bool which handles only ccs metadata copy.

v5:
- on dgfx ccs should be cleared even if the bo is not compression enabled.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c |  4 +-
 drivers/gpu/drm/xe/xe_bo.c            | 33 +++++++++-----
 drivers/gpu/drm/xe/xe_bo_types.h      |  4 ++
 drivers/gpu/drm/xe/xe_migrate.c       | 64 +++++++++++++++------------
 drivers/gpu/drm/xe/xe_migrate.h       |  3 +-
 5 files changed, 66 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index d6c23441632a4..7a32faa2f6888 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -152,7 +152,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 
 	expected = 0xc0c0c0c0c0c0c0c0;
 	fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
-				bo->ttm.resource);
+				bo->ttm.resource, false);
 	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" :
 				 "Copying small bo remote -> vram", test)) {
 		retval = xe_map_rd(xe, &bo->vmap, 0, u64);
@@ -169,7 +169,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
 
 	fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
-				remote->ttm.resource);
+				remote->ttm.resource, false);
 	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" :
 				 "Copying small bo vram -> remote", test)) {
 		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index c10aa5a63a701..7c0037aecff3a 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -627,10 +627,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	bool move_lacks_source;
 	bool tt_has_data;
 	bool needs_clear;
+	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
+				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
 	int ret = 0;
-
-	/* Bo creation path, moving to system or TT. No clearing required. */
-	if (!old_mem && ttm) {
+	/* Bo creation path, moving to system or TT. */
+	if ((!old_mem && ttm) && !handle_system_ccs) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		return 0;
 	}
@@ -645,14 +646,18 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 
-	move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
+	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
+						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
 
 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 		(!ttm && ttm_bo->type == ttm_bo_type_device);
 
-	if ((move_lacks_source && !needs_clear) ||
-	    (old_mem_type == XE_PL_SYSTEM &&
-	     new_mem->mem_type == XE_PL_TT)) {
+	if ((move_lacks_source && !needs_clear)) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		goto out;
 	}
@@ -683,8 +688,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 			ret = timeout;
 			goto out;
 		}
-		ttm_bo_move_null(ttm_bo, new_mem);
-		goto out;
+
+		if (!handle_system_ccs) {
+			ttm_bo_move_null(ttm_bo, new_mem);
+			goto out;
+		}
 	}
 
 	if (!move_lacks_source &&
@@ -705,6 +713,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
 	else if (mem_type_is_vram(old_mem_type))
 		migrate = mem_type_to_migrate(xe, old_mem_type);
+	else
+		migrate = xe->tiles[0].migrate;
 
 	xe_assert(xe, migrate);
 
@@ -747,8 +757,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		if (move_lacks_source)
 			fence = xe_migrate_clear(migrate, bo, new_mem);
 		else
-			fence = xe_migrate_copy(migrate,
-						bo, bo, old_mem, new_mem);
+			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
+						new_mem, handle_system_ccs);
 		if (IS_ERR(fence)) {
 			ret = PTR_ERR(fence);
 			xe_device_mem_access_put(xe);
@@ -1234,6 +1244,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 			return bo;
 	}
 
+	bo->ccs_cleared = false;
 	bo->tile = tile;
 	bo->size = size;
 	bo->flags = flags;
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index f71dbc5189586..64c2249a4e407 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -79,6 +79,10 @@ struct xe_bo {
 	struct llist_node freed;
 	/** @created: Whether the bo has passed initial creation */
 	bool created;
+
+	/** @ccs_cleared */
+	bool ccs_cleared;
+
 	/**
 	 * @cpu_caching: CPU caching mode. Currently only used for userspace
 	 * objects.
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 48ada083d0b3f..adf1dab5eba25 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -577,14 +577,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
 
 static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 			       struct xe_bb *bb,
-			       u64 src_ofs, bool src_is_vram,
-			       u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+			       u64 src_ofs, bool src_is_indirect,
+			       u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
 			       u64 ccs_ofs, bool copy_ccs)
 {
 	struct xe_gt *gt = m->tile->primary_gt;
 	u32 flush_flags = 0;
 
-	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
 		/*
 		 * If the src is already in vram, then it should already
 		 * have been cleared by us, or has been populated by the
@@ -593,28 +593,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 		 * Otherwise if the bo doesn't have any CCS metadata attached,
 		 * we still need to clear it for security reasons.
 		 */
-		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
+		u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m->cleared_mem_ofs;
 
 		emit_copy_ccs(gt, bb,
 			      dst_ofs, true,
-			      ccs_src_ofs, src_is_vram, dst_size);
+			      ccs_src_ofs, src_is_indirect, dst_size);
 
 		flush_flags = MI_FLUSH_DW_CCS;
 	} else if (copy_ccs) {
-		if (!src_is_vram)
+		if (!src_is_indirect)
 			src_ofs = ccs_ofs;
-		else if (!dst_is_vram)
+		else if (!dst_is_indirect)
 			dst_ofs = ccs_ofs;
 
-		/*
-		 * At the moment, we don't support copying CCS metadata from
-		 * system to system.
-		 */
-		xe_gt_assert(gt, src_is_vram || dst_is_vram);
+		xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
 
-		emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
-			      src_is_vram, dst_size);
-		if (dst_is_vram)
+		emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
+			      src_is_indirect, dst_size);
+		if (dst_is_indirect)
 			flush_flags = MI_FLUSH_DW_CCS;
 	}
 
@@ -630,6 +626,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
  * the buffer object @dst is currently bound to.
  * @src: The source TTM resource.
  * @dst: The dst TTM resource.
+ * @copy_only_ccs: If true copy only CCS metadata
  *
  * Copies the contents of @src to @dst: On flat CCS devices,
  * the CCS metadata is copied as well if needed, or if not present,
@@ -643,7 +640,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct xe_bo *src_bo,
 				  struct xe_bo *dst_bo,
 				  struct ttm_resource *src,
-				  struct ttm_resource *dst)
+				  struct ttm_resource *dst,
+				  bool copy_only_ccs)
 {
 	struct xe_gt *gt = m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
@@ -655,6 +653,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	u64 src_L0, dst_L0;
 	int pass = 0;
 	int err;
+	bool src_is_pltt = src->mem_type == XE_PL_TT;
+	bool dst_is_pltt = dst->mem_type == XE_PL_TT;
 	bool src_is_vram = mem_type_is_vram(src->mem_type);
 	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
 	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
@@ -719,8 +719,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		}
 
 		/* Add copy commands size here */
-		batch_size += EMIT_COPY_DW +
-			(xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
 
 		bb = xe_bb_new(gt, batch_size, usm);
 		if (IS_ERR(bb)) {
@@ -746,10 +746,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
-			  XE_PAGE_SIZE);
-		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
-						  dst_L0_ofs, dst_is_vram,
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+						  IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+						  dst_L0_ofs,
+						  IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
 						  src_L0, ccs_ofs, copy_ccs);
 
 		mutex_lock(&m->job_mutex);
@@ -922,6 +925,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 	bool clear_vram = mem_type_is_vram(dst->mem_type);
 	struct xe_gt *gt = m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
+	bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
 	struct dma_fence *fence = NULL;
 	u64 size = bo->size;
 	struct xe_res_cursor src_it;
@@ -954,9 +958,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		batch_size = 2 +
 			pte_update_size(m, clear_vram, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
-					emit_clear_cmd_len(gt), 0,
+					clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
 					avail_pts);
-		if (xe_device_has_flat_ccs(xe) && clear_vram)
+
+		if (xe_device_has_flat_ccs(xe))
 			batch_size += EMIT_COPY_CCS_DW;
 
 		/* Clear commands */
@@ -971,7 +976,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		}
 
 		size -= clear_L0;
-
 		/* Preemption is enabled again by the ring ops. */
 		if (!clear_vram) {
 			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
@@ -982,9 +986,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
-			   clear_vram);
-		if (xe_device_has_flat_ccs(xe) && clear_vram) {
+		if (!clear_system_ccs)
+			emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
+
+		if (xe_device_has_flat_ccs(xe)) {
 			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
 				      m->cleared_mem_ofs, false, clear_L0);
 			flush_flags = MI_FLUSH_DW_CCS;
@@ -1041,6 +1046,9 @@ err_sync:
 		return ERR_PTR(err);
 	}
 
+	if (clear_system_ccs)
+		bo->ccs_cleared = true;
+
 	return fence;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index c729241776adf..951f19318ea48 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -85,7 +85,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				  struct xe_bo *src_bo,
 				  struct xe_bo *dst_bo,
 				  struct ttm_resource *src,
-				  struct ttm_resource *dst);
+				  struct ttm_resource *dst,
+				  bool copy_only_ccs);
 
 struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				   struct xe_bo *bo,
-- 
GitLab


From d6abc18d66932adb163803f9c83a5fa90ca63ff4 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:32 +0530
Subject: [PATCH 2225/2340] drm/xe/xe2: Modify xe_bo_test for system memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Modify test to valid ccs clear and copy during evict/restore on
igfx.

v2:
 -Vram is associated with tiles not with gt. Use tile based iterator
for ccs_test_run_gt. (Matt)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 44 ++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 549ab343de803..412b2e7ce40cb 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -14,7 +14,7 @@
 #include "xe_pci.h"
 #include "xe_pm.h"
 
-static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
+static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
 			    bool clear, u64 get_val, u64 assign_val,
 			    struct kunit *test)
 {
@@ -36,7 +36,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 
 	/* Optionally clear bo *and* CCS data in VRAM. */
 	if (clear) {
-		fence = xe_migrate_clear(gt_to_tile(gt)->migrate, bo, bo->ttm.resource);
+		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
 		if (IS_ERR(fence)) {
 			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
 			return PTR_ERR(fence);
@@ -91,7 +91,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 	}
 
 	/* Check last CCS value, or at least last value in page. */
-	offset = xe_device_ccs_bytes(gt_to_xe(gt), bo->size);
+	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
 	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
 	if (cpu_map[offset] != get_val) {
 		KUNIT_FAIL(test,
@@ -108,39 +108,45 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
 	return ret;
 }
 
-static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt,
-			    struct kunit *test)
+static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
+			      struct kunit *test)
 {
 	struct xe_bo *bo;
-	u32 vram_bit;
+
 	int ret;
 
 	/* TODO: Sanity check */
-	vram_bit = XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id;
-	kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id,
-		   gt_to_tile(gt)->id);
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+
+	if (IS_DGFX(xe))
+		kunit_info(test, "Testing vram id %u\n", tile->id);
+	else
+		kunit_info(test, "Testing system memory\n");
+
+	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, bo_flags);
+
+	xe_bo_lock(bo, false);
 
-	bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device,
-				 vram_bit);
 	if (IS_ERR(bo)) {
 		KUNIT_FAIL(test, "Failed to create bo.\n");
 		return;
 	}
 
 	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
-	ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
+	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
 			       test);
 	if (ret)
 		goto out_unlock;
 
 	kunit_info(test, "Verifying that CCS data survives migration.\n");
-	ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL,
+	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
 			       0xdeadbeefdeadbeefULL, test);
 	if (ret)
 		goto out_unlock;
 
 	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
-	ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test);
+	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
 
 out_unlock:
 	xe_bo_unlock(bo);
@@ -150,7 +156,7 @@ out_unlock:
 static int ccs_test_run_device(struct xe_device *xe)
 {
 	struct kunit *test = xe_cur_kunit();
-	struct xe_gt *gt;
+	struct xe_tile *tile;
 	int id;
 
 	if (!xe_device_has_flat_ccs(xe)) {
@@ -160,8 +166,12 @@ static int ccs_test_run_device(struct xe_device *xe)
 
 	xe_device_mem_access_get(xe);
 
-	for_each_gt(gt, xe, id)
-		ccs_test_run_gt(xe, gt, test);
+	for_each_tile(tile, xe, id) {
+		/* For igfx run only for primary tile */
+		if (!IS_DGFX(xe) && id > 0)
+			continue;
+		ccs_test_run_tile(xe, tile, test);
+	}
 
 	xe_device_mem_access_put(xe);
 
-- 
GitLab


From cbdc52c11c9b1df40ade23f622abc3466e4ee96c Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Tue, 12 Dec 2023 23:55:33 +0530
Subject: [PATCH 2226/2340] drm/xe/xe2: Support flat ccs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable flat ccs for XE2_GFX_FEATURES.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 34dcc743e2c7c..88fb6c54ef990 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -164,7 +164,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 #define XE2_GFX_FEATURES \
 	.dma_mask_size = 46, \
 	.has_asid = 1, \
-	.has_flat_ccs = 0 /* FIXME: implementation missing */, \
+	.has_flat_ccs = 1, \
 	.has_range_tlb_invalidation = 1, \
 	.has_usm = 0 /* FIXME: implementation missing */, \
 	.va_bits = 48, \
-- 
GitLab


From bc17ec0b201ec7b8576576aa0785787671b4afe7 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:01 -0800
Subject: [PATCH 2227/2340] drm/xe: Drop "_REG" suffix from CSFE_CHICKEN1

We don't use this suffix on any other registers, and it isn't part of
the register's official name either, so drop it for consistency.

While at it, move the register definition slightly so that it isn't
separating RING_CMD_CCTL's definition from its fields.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-11-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 9 ++++-----
 drivers/gpu/drm/xe/xe_wa.c               | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 1a857c4edcf5e..67da19f9836f9 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -46,11 +46,6 @@
 #define RING_ESR(base)				XE_REG((base) + 0xb8)
 
 #define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
-
-#define CSFE_CHICKEN1_REG(base)			XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED)
-#define   GHWSP_CSB_REPORT_DIS			REG_BIT(15)
-#define   PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS	REG_BIT(14)
-
 /*
  * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
  * The lsb of each can be considered a separate enabling bit for encryption.
@@ -61,6 +56,10 @@
 #define   CMD_CCTL_WRITE_OVERRIDE_MASK		REG_GENMASK(13, 8)
 #define   CMD_CCTL_READ_OVERRIDE_MASK		REG_GENMASK(6, 1)
 
+#define CSFE_CHICKEN1(base)			XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED)
+#define   GHWSP_CSB_REPORT_DIS			REG_BIT(15)
+#define   PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS	REG_BIT(14)
+
 #define RING_BBADDR(base)			XE_REG((base) + 0x140)
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
 
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 23f1285135b8d..12829748bb6c1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -605,7 +605,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	 */
 	{ XE_RTP_NAME("18032095049, 16021639441"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
-	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1_REG(0),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
 			     GHWSP_CSB_REPORT_DIS |
 			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
 			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-- 
GitLab


From 5ea7fe65fb1cf95d9b48fcc3c7c806ce417357c2 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:02 -0800
Subject: [PATCH 2228/2340] drm/xe: Move some per-engine register definitions
 to the engine header

Although we only work with the RCS instances today, the
FF_SLICE_CS_CHICKEN1[1,2] CS_DEBUG_MODE1, CS_CHICKEN1, and
FF_THREAD_MODE registers all have instances on both the RCS and CCS
engines.  Convert these to parameterized macros and move them to the
engine register header.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-12-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 21 +++++++++++++++++++++
 drivers/gpu/drm/xe/regs/xe_gt_regs.h     | 18 ------------------
 drivers/gpu/drm/xe/regs/xe_regs.h        |  3 ---
 drivers/gpu/drm/xe/xe_wa.c               | 16 +++++++++-------
 4 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 67da19f9836f9..e109ef912706a 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -39,6 +39,9 @@
 #define RING_MI_MODE(base)			XE_REG((base) + 0x9c)
 #define RING_NOPID(base)			XE_REG((base) + 0x94)
 
+#define FF_THREAD_MODE(base)			XE_REG((base) + 0xa0)
+#define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
+
 #define RING_IMR(base)				XE_REG((base) + 0xa8)
 
 #define RING_EIR(base)				XE_REG((base) + 0xb0)
@@ -60,6 +63,16 @@
 #define   GHWSP_CSB_REPORT_DIS			REG_BIT(15)
 #define   PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS	REG_BIT(14)
 
+#define FF_SLICE_CS_CHICKEN1(base)		XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
+#define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
+
+#define FF_SLICE_CS_CHICKEN2(base)		XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED)
+#define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
+
+#define CS_DEBUG_MODE1(base)			XE_REG((base) + 0xec, XE_REG_OPTION_MASKED)
+#define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
+#define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
+
 #define RING_BBADDR(base)			XE_REG((base) + 0x140)
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
 
@@ -115,6 +128,14 @@
 #define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
 #define	  EL_CTRL_LOAD				REG_BIT(0)
 
+#define CS_CHICKEN1(base)			XE_REG((base) + 0x580, XE_REG_OPTION_MASKED)
+#define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
+#define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
+#define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
+#define   PREEMPT_GPGPU_COMMAND_LEVEL		PREEMPT_GPGPU_LEVEL(1, 0)
+#define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
+#define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
+
 #define VDBOX_CGCTL3F08(base)                  XE_REG((base) + 0x3f08)
 #define   CG3DDISHRS_CLKGATE_DIS               REG_BIT(5)
 
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index f5bf4c6d1761e..4448507ef4caa 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -60,26 +60,8 @@
 #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
 #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
 
-#define FF_SLICE_CS_CHICKEN1			XE_REG(0x20e0, XE_REG_OPTION_MASKED)
-#define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
-
-#define FF_SLICE_CS_CHICKEN2			XE_REG(0x20e4, XE_REG_OPTION_MASKED)
-#define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
-
-#define CS_DEBUG_MODE1				XE_REG(0x20ec, XE_REG_OPTION_MASKED)
-#define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
-#define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
-
 #define PS_INVOCATION_COUNT			XE_REG(0x2348)
 
-#define CS_CHICKEN1				XE_REG(0x2580, XE_REG_OPTION_MASKED)
-#define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
-#define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
-#define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
-#define   PREEMPT_GPGPU_COMMAND_LEVEL		PREEMPT_GPGPU_LEVEL(1, 0)
-#define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
-#define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
-
 #define XELP_GLOBAL_MOCS(i)			XE_REG(0x4000 + (i) * 4)
 #define XEHP_GLOBAL_MOCS(i)			XE_REG_MCR(0x4000 + (i) * 4)
 #define CCS_AUX_INV				XE_REG(0x4208)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index ec9372aa739f0..4ac71b605487e 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -45,9 +45,6 @@
 #define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
 #define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
 
-#define FF_THREAD_MODE				XE_REG(0x20a0)
-#define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
-
 #define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
 #define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
 #define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 12829748bb6c1..5f61dd87c5868 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -299,7 +299,7 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 static const struct xe_rtp_entry_sr engine_was[] = {
 	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(FF_THREAD_MODE,
+	  XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE),
 			     FF_TESSELATION_DOP_GATE_DISABLE))
 	},
 	{ XE_RTP_NAME("1409804808"),
@@ -320,7 +320,8 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	},
 	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE))
+	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1(RENDER_RING_BASE),
+			     FF_DOP_CLOCK_GATE_DISABLE))
 	},
 	{ XE_RTP_NAME("1406941453"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
@@ -328,7 +329,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	},
 	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1,
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE),
 			     FFSC_PERCTX_PREEMPT_CTRL))
 	},
 
@@ -419,7 +420,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	{ XE_RTP_NAME("16015675438"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2,
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
 			     PERF_FIX_BALANCING_CFE_DISABLE))
 	},
 	{ XE_RTP_NAME("18028616096"),
@@ -481,7 +482,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
 		       ENGINE_CLASS(RENDER),
 		       FUNC(xe_rtp_match_first_gslice_fused_off)),
-	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1,
+	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE),
 			     REPLAY_MODE_GRANULARITY))
 	},
 	{ XE_RTP_NAME("22010960976, 14013347512"),
@@ -540,7 +541,8 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	},
 	{ XE_RTP_NAME("16015675438"),
 	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE))
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
+			     PERF_FIX_BALANCING_CFE_DISABLE))
 	},
 	{ XE_RTP_NAME("14014999345"),
 	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE),
@@ -622,7 +624,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	},
 	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
-	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1,
+	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(RENDER_RING_BASE),
 				   PREEMPT_GPGPU_LEVEL_MASK,
 				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL))
 	},
-- 
GitLab


From 68df8642ea34bf313757b671f57a4d123458c3f8 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:03 -0800
Subject: [PATCH 2229/2340] drm/xe: Fix whitespace in register definitions

Our register headers use tabs to align the definition values.  Convert a
few definitions that were using spaces instead.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-13-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++--
 drivers/gpu/drm/xe/regs/xe_regs.h        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index e109ef912706a..7f82bef3a0dba 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -136,8 +136,8 @@
 #define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
 #define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
 
-#define VDBOX_CGCTL3F08(base)                  XE_REG((base) + 0x3f08)
-#define   CG3DDISHRS_CLKGATE_DIS               REG_BIT(5)
+#define VDBOX_CGCTL3F08(base)			XE_REG((base) + 0x3f08)
+#define   CG3DDISHRS_CLKGATE_DIS		REG_BIT(5)
 
 #define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
 #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 4ac71b605487e..4b427ec8cbff7 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -34,9 +34,9 @@
 #define XEHPC_BCS7_RING_BASE			0x3ec000
 #define XEHPC_BCS8_RING_BASE			0x3ee000
 
-#define DG1_GSC_HECI2_BASE                      0x00259000
-#define PVC_GSC_HECI2_BASE                      0x00285000
-#define DG2_GSC_HECI2_BASE                      0x00374000
+#define DG1_GSC_HECI2_BASE			0x00259000
+#define PVC_GSC_HECI2_BASE			0x00285000
+#define DG2_GSC_HECI2_BASE			0x00374000
 
 #define GSCCS_RING_BASE				0x11a000
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
-- 
GitLab


From f52e4e9065786dd20477879d834c5c33a3ae9498 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:04 -0800
Subject: [PATCH 2230/2340] drm/xe: Move engine base offsets to engine register
 header

These offsets are primarily used as parameters for the engine register
definitions, so it makes more sense to define them in the engine header
rather than the general register header.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-14-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 33 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/regs/xe_regs.h        | 28 --------------------
 2 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 7f82bef3a0dba..5592774fc6903 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -10,6 +10,39 @@
 
 #include "regs/xe_reg_defs.h"
 
+/*
+ * These *_BASE values represent the MMIO offset where each hardware engine's
+ * registers start.  The other definitions in this header are parameterized
+ * macros that will take one of these values as a parameter.
+ */
+#define RENDER_RING_BASE			0x02000
+#define BSD_RING_BASE				0x1c0000
+#define BSD2_RING_BASE				0x1c4000
+#define BSD3_RING_BASE				0x1d0000
+#define BSD4_RING_BASE				0x1d4000
+#define XEHP_BSD5_RING_BASE			0x1e0000
+#define XEHP_BSD6_RING_BASE			0x1e4000
+#define XEHP_BSD7_RING_BASE			0x1f0000
+#define XEHP_BSD8_RING_BASE			0x1f4000
+#define VEBOX_RING_BASE				0x1c8000
+#define VEBOX2_RING_BASE			0x1d8000
+#define XEHP_VEBOX3_RING_BASE			0x1e8000
+#define XEHP_VEBOX4_RING_BASE			0x1f8000
+#define COMPUTE0_RING_BASE			0x1a000
+#define COMPUTE1_RING_BASE			0x1c000
+#define COMPUTE2_RING_BASE			0x1e000
+#define COMPUTE3_RING_BASE			0x26000
+#define BLT_RING_BASE				0x22000
+#define XEHPC_BCS1_RING_BASE			0x3e0000
+#define XEHPC_BCS2_RING_BASE			0x3e2000
+#define XEHPC_BCS3_RING_BASE			0x3e4000
+#define XEHPC_BCS4_RING_BASE			0x3e6000
+#define XEHPC_BCS5_RING_BASE			0x3e8000
+#define XEHPC_BCS6_RING_BASE			0x3ea000
+#define XEHPC_BCS7_RING_BASE			0x3ec000
+#define XEHPC_BCS8_RING_BASE			0x3ee000
+#define GSCCS_RING_BASE				0x11a000
+
 #define RING_TAIL(base)				XE_REG((base) + 0x30)
 
 #define RING_HEAD(base)				XE_REG((base) + 0x34)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 4b427ec8cbff7..b7d3b42ec0031 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,38 +7,10 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define RENDER_RING_BASE			0x02000
-#define BSD_RING_BASE				0x1c0000
-#define BSD2_RING_BASE				0x1c4000
-#define BSD3_RING_BASE				0x1d0000
-#define BSD4_RING_BASE				0x1d4000
-#define XEHP_BSD5_RING_BASE			0x1e0000
-#define XEHP_BSD6_RING_BASE			0x1e4000
-#define XEHP_BSD7_RING_BASE			0x1f0000
-#define XEHP_BSD8_RING_BASE			0x1f4000
-#define VEBOX_RING_BASE				0x1c8000
-#define VEBOX2_RING_BASE			0x1d8000
-#define XEHP_VEBOX3_RING_BASE			0x1e8000
-#define XEHP_VEBOX4_RING_BASE			0x1f8000
-#define COMPUTE0_RING_BASE			0x1a000
-#define COMPUTE1_RING_BASE			0x1c000
-#define COMPUTE2_RING_BASE			0x1e000
-#define COMPUTE3_RING_BASE			0x26000
-#define BLT_RING_BASE				0x22000
-#define XEHPC_BCS1_RING_BASE			0x3e0000
-#define XEHPC_BCS2_RING_BASE			0x3e2000
-#define XEHPC_BCS3_RING_BASE			0x3e4000
-#define XEHPC_BCS4_RING_BASE			0x3e6000
-#define XEHPC_BCS5_RING_BASE			0x3e8000
-#define XEHPC_BCS6_RING_BASE			0x3ea000
-#define XEHPC_BCS7_RING_BASE			0x3ec000
-#define XEHPC_BCS8_RING_BASE			0x3ee000
-
 #define DG1_GSC_HECI2_BASE			0x00259000
 #define PVC_GSC_HECI2_BASE			0x00285000
 #define DG2_GSC_HECI2_BASE			0x00374000
 
-#define GSCCS_RING_BASE				0x11a000
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
 #define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
 #define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
-- 
GitLab


From 48e70d2a1a9c8d58c48b2840feda3aa3bc330a94 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:05 -0800
Subject: [PATCH 2231/2340] drm/xe: Move GSC HECI base offsets out of register
 header

These offsets are only used to setup the auxiliary device BAR
information and are never used for driver read/write operations.  Move
them to the GSC HECI file where they're actually used.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-15-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_regs.h | 4 ----
 drivers/gpu/drm/xe/xe_heci_gsc.c  | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index b7d3b42ec0031..67ce087e21d08 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,10 +7,6 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define DG1_GSC_HECI2_BASE			0x00259000
-#define PVC_GSC_HECI2_BASE			0x00285000
-#define DG2_GSC_HECI2_BASE			0x00374000
-
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
 #define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
 #define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index d8e982e3d9a2c..19eda00d5cc4e 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -16,6 +16,10 @@
 
 #define GSC_BAR_LENGTH  0x00000FFC
 
+#define DG1_GSC_HECI2_BASE			0x259000
+#define PVC_GSC_HECI2_BASE			0x285000
+#define DG2_GSC_HECI2_BASE			0x374000
+
 static void heci_gsc_irq_mask(struct irq_data *d)
 {
 	/* generic irq handling */
-- 
GitLab


From 93536c2bcfb2c3c5e9b53c83f333f57d9b632e83 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:06 -0800
Subject: [PATCH 2232/2340] drm/xe: Define interrupt vector bits with the
 interrupt registers

The bit definitions had become a bit orphaned; move them to the same
location as the interrupt registers that they're used with.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-16-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++++
 drivers/gpu/drm/xe/regs/xe_regs.h    | 6 ------
 drivers/gpu/drm/xe/xe_hw_engine.c    | 1 -
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 4448507ef4caa..2c48de2076a60 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -456,6 +456,11 @@
 #define XEHPC_BCS3_BCS4_INTR_MASK		XE_REG(0x190114)
 #define XEHPC_BCS5_BCS6_INTR_MASK		XE_REG(0x190118)
 #define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
+#define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
+#define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
+#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
+#define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
+#define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
 
 #define PVC_GT0_PACKAGE_ENERGY_STATUS		XE_REG(0x281004)
 #define PVC_GT0_PACKAGE_RAPL_LIMIT		XE_REG(0x281008)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 67ce087e21d08..2c214bb9b6714 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,12 +7,6 @@
 
 #include "regs/xe_reg_defs.h"
 
-#define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
-#define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
-#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
-#define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
-#define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
-
 #define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
 #define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
 #define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 86b863b990658..1fa5cf5eea978 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -9,7 +9,6 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
-#include "regs/xe_regs.h"
 #include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
-- 
GitLab


From aaa536a8877e61104ccb5ba5287beaa4e959539e Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:07 -0800
Subject: [PATCH 2233/2340] drm/xe: Re-sort GT register header

Keeping the register definitions sorted will make it easy to find
existing definitions and prevent accidental introduction of duplicate
definitions.

v2:
 - Reorder FUSE3/FUSE4 registers and move GT0_PERF_LIMIT_REASONS /
   MTL_MEDIA_PERF_LIMIT_REASONS to proper places.  (Lucas)

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-17-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 76 ++++++++++++++--------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 2c48de2076a60..d7f52a634c116 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -42,11 +42,6 @@
 #define FORCEWAKE_ACK_GSC			XE_REG(0xdf8)
 #define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
 
-/* L3 Cache Control */
-#define XELP_LNCFCMOCS(i)			XE_REG(0xb020 + (i) * 4)
-#define XEHP_LNCFCMOCS(i)			XE_REG_MCR(0xb020 + (i) * 4)
-#define LNCFCMOCS_REG_COUNT			32
-
 #define MCFG_MCR_SELECTOR			XE_REG(0xfd0)
 #define MTL_MCR_SELECTOR			XE_REG(0xfd4)
 #define SF_MCR_SELECTOR				XE_REG(0xfd8)
@@ -102,6 +97,12 @@
 #define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
 #define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
 
+#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
+
+#define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
+#define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
+#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
+
 #define XEHP_PSS_MODE2				XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
@@ -109,12 +110,6 @@
 #define   FLSH_IGNORES_PSD			REG_BIT(10)
 #define   FD_END_COLLECT			REG_BIT(5)
 
-#define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
-#define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
-#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
-
-#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
-
 #define COMMON_SLICE_CHICKEN4			XE_REG(0x7300, XE_REG_OPTION_MASKED)
 #define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
 
@@ -149,6 +144,11 @@
 
 #define GSCPSMI_BASE				XE_REG(0x880c)
 
+/* Fuse readout registers for GT */
+#define XEHP_FUSE4				XE_REG(0x9114)
+#define   CCS_EN_MASK				REG_GENMASK(19, 16)
+#define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
+
 #define	MIRROR_FUSE3				XE_REG(0x9118)
 #define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
 #define   L3BANK_PAIR_COUNT			4
@@ -157,18 +157,14 @@
 #define   MAX_MSLICES				4
 #define   MEML3_EN_MASK				REG_GENMASK(3, 0)
 
-/* Fuse readout registers for GT */
-#define XEHP_FUSE4				XE_REG(0x9114)
-#define   CCS_EN_MASK				REG_GENMASK(19, 16)
-#define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
+#define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
+#define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
 
 #define GT_VEBOX_VDBOX_DISABLE			XE_REG(0x9140)
 #define   GT_VEBOX_DISABLE_MASK			REG_GENMASK(19, 16)
 #define   GT_VDBOX_DISABLE_MASK			REG_GENMASK(7, 0)
 
-#define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
-#define   XELP_EU_MASK				REG_GENMASK(7, 0)
-#define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
 #define XEHP_GT_COMPUTE_DSS_ENABLE		XE_REG(0x9144)
 #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		XE_REG(0x9148)
 #define XE2_GT_COMPUTE_DSS_2			XE_REG(0x914c)
@@ -284,6 +280,11 @@
 #define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED)
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
 
+/* L3 Cache Control */
+#define XELP_LNCFCMOCS(i)			XE_REG(0xb020 + (i) * 4)
+#define XEHP_LNCFCMOCS(i)			XE_REG_MCR(0xb020 + (i) * 4)
+#define LNCFCMOCS_REG_COUNT			32
+
 #define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
 #define   XEHP_LNESPARE				REG_BIT(19)
 
@@ -360,13 +361,13 @@
 #define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
 #define   DISABLE_DOP_GATING			REG_BIT(0)
 
+#define RT_CTRL					XE_REG_MCR(0xe530)
+#define   DIS_NULL_QUERY			REG_BIT(10)
+
 #define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
 #define   DIS_ATOMIC_CHAINING_TYPED_WRITES	REG_BIT(3)
 
-#define RT_CTRL					XE_REG_MCR(0xe530)
-#define   DIS_NULL_QUERY			REG_BIT(10)
-
 #define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
 #define   TGM_WRITE_EOM_FORCE			REG_BIT(17)
@@ -408,6 +409,7 @@
 #define   FORCEWAKE_USER			BIT(1)
 #define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 
+#define MTL_MEDIA_PERF_LIMIT_REASONS		XE_REG(0x138030)
 #define MTL_MEDIA_MC6				XE_REG(0x138048)
 
 #define GT_CORE_STATUS				XE_REG(0x138060)
@@ -418,16 +420,30 @@
 #define GT_GFX_RC6_LOCKED			XE_REG(0x138104)
 #define GT_GFX_RC6				XE_REG(0x138108)
 
+#define GT0_PERF_LIMIT_REASONS			XE_REG(0x1381a8)
+#define   GT0_PERF_LIMIT_REASONS_MASK		0xde3
+#define   PROCHOT_MASK				REG_BIT(0)
+#define   THERMAL_LIMIT_MASK			REG_BIT(1)
+#define   RATL_MASK				REG_BIT(5)
+#define   VR_THERMALERT_MASK			REG_BIT(6)
+#define   VR_TDC_MASK				REG_BIT(7)
+#define   POWER_LIMIT_4_MASK			REG_BIT(8)
+#define   POWER_LIMIT_1_MASK			REG_BIT(10)
+#define   POWER_LIMIT_2_MASK			REG_BIT(11)
+
 #define GT_PERF_STATUS				XE_REG(0x1381b4)
 #define   VOLTAGE_MASK				REG_GENMASK(10, 0)
 
 #define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
 
+#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
+#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
 #define GUC_SG_INTR_ENABLE			XE_REG(0x190038)
 #define   ENGINE1_MASK				REG_GENMASK(31, 16)
 #define   ENGINE0_MASK				REG_GENMASK(15, 0)
-
 #define GPM_WGBOXPERF_INTR_ENABLE		XE_REG(0x19003c)
+#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044)
+#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
 
 #define INTR_IDENTITY_REG(x)			XE_REG(0x190060 + ((x) * 4))
 #define   INTR_DATA_VALID			REG_BIT(31)
@@ -437,10 +453,6 @@
 #define   OTHER_GUC_INSTANCE			0
 #define   OTHER_GSC_INSTANCE			6
 
-#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
-#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
-#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044)
-#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
 #define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
 #define RCS0_RSVD_INTR_MASK			XE_REG(0x190090)
 #define BCS_RSVD_INTR_MASK			XE_REG(0x1900a0)
@@ -468,16 +480,4 @@
 #define PVC_GT0_PLATFORM_ENERGY_STATUS		XE_REG(0x28106c)
 #define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
 
-#define GT0_PERF_LIMIT_REASONS			XE_REG(0x1381a8)
-#define   GT0_PERF_LIMIT_REASONS_MASK		0xde3
-#define   PROCHOT_MASK				REG_BIT(0)
-#define   THERMAL_LIMIT_MASK			REG_BIT(1)
-#define   RATL_MASK				REG_BIT(5)
-#define   VR_THERMALERT_MASK			REG_BIT(6)
-#define   VR_TDC_MASK				REG_BIT(7)
-#define   POWER_LIMIT_4_MASK			REG_BIT(8)
-#define   POWER_LIMIT_1_MASK			REG_BIT(10)
-#define   POWER_LIMIT_2_MASK			REG_BIT(11)
-#define MTL_MEDIA_PERF_LIMIT_REASONS		XE_REG(0x138030)
-
 #endif
-- 
GitLab


From 1374df38e9267bf4588fbc665b3a20afb479f5ac Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 14 Dec 2023 10:47:08 -0800
Subject: [PATCH 2234/2340] drm/xe: Drop some unnecessary header includes

Several files were including register headers that they no longer
require.  Drop the unnecessary includes to reduce build dependencies.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231214184659.2249559-18-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_execlist.c | 1 -
 drivers/gpu/drm/xe/xe_ggtt.c     | 1 -
 drivers/gpu/drm/xe/xe_heci_gsc.c | 1 -
 drivers/gpu/drm/xe/xe_lrc.c      | 2 --
 drivers/gpu/drm/xe/xe_pci.c      | 1 -
 drivers/gpu/drm/xe/xe_ring_ops.c | 1 -
 6 files changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index d82b50de144ea..507b6baec3dc4 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -12,7 +12,6 @@
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
-#include "regs/xe_regs.h"
 #include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 374ae4289fa09..3efd2d066bf72 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -11,7 +11,6 @@
 #include <drm/i915_drm.h>
 
 #include "regs/xe_gt_regs.h"
-#include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 19eda00d5cc4e..bfdd33b9b23b4 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -8,7 +8,6 @@
 #include <linux/pci.h>
 #include <linux/sizes.h>
 
-#include "regs/xe_regs.h"
 #include "xe_device_types.h"
 #include "xe_drv.h"
 #include "xe_heci_gsc.h"
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index d6dfbd0bdc70e..b7fa3831b6845 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -9,9 +9,7 @@
 #include "instructions/xe_gfxpipe_commands.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
-#include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
-#include "regs/xe_regs.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 88fb6c54ef990..dcc5ded1558e7 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -15,7 +15,6 @@
 #include <drm/drm_drv.h>
 #include <drm/xe_pciids.h>
 
-#include "regs/xe_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_device.h"
 #include "xe_display.h"
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 1201e42ef823c..1e4c06eacd984 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -11,7 +11,6 @@
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
-#include "regs/xe_regs.h"
 #include "xe_exec_queue_types.h"
 #include "xe_gt.h"
 #include "xe_lrc.h"
-- 
GitLab


From 40fb5ed290d49b568d8547ecfdc5bd83f217dfe1 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 14 Dec 2023 13:39:52 -0800
Subject: [PATCH 2235/2340] drm/xe: Return error if drm_buddy_init() fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix warning:
	../drivers/gpu/drm/xe/xe_ttm_vram_mgr.c: In function ‘__xe_ttm_vram_mgr_init’:
	../drivers/gpu/drm/xe/xe_ttm_vram_mgr.c:340:13: error: variable ‘err’ set but not used [-Werror=unused-but-set-variable]
	  340 |         int err;
	      |             ^~~

Check for the error return and return it, like done by other drivers.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 953e5dc0fd808..115ec745e5029 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -348,6 +348,8 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
 
 	ttm_resource_manager_init(man, &xe->ttm, size);
 	err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
+	if (err)
+		return err;
 
 	ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager);
 	ttm_resource_manager_set_used(&mgr->manager, true);
-- 
GitLab


From 80166e95679742588bd6c17ede46fa46867739f7 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 14 Dec 2023 13:39:53 -0800
Subject: [PATCH 2236/2340] drm/xe/bo: Remove unusued variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bo is not used since all the checks are against tbo. Fix warning:

	../drivers/gpu/drm/xe/xe_bo.c: In function ‘xe_evict_flags’:
	../drivers/gpu/drm/xe/xe_bo.c:250:23: error: variable ‘bo’ set but not used [-Werror=unused-but-set-variable]
	  250 |         struct xe_bo *bo;

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 7c0037aecff3a..8e4a3b1f6b938 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -247,8 +247,6 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 static void xe_evict_flags(struct ttm_buffer_object *tbo,
 			   struct ttm_placement *placement)
 {
-	struct xe_bo *bo;
-
 	if (!xe_bo_is_xe_bo(tbo)) {
 		/* Don't handle scatter gather BOs */
 		if (tbo->type == ttm_bo_type_sg) {
@@ -265,8 +263,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 	 * For xe, sg bos that are evicted to system just triggers a
 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
 	 */
-
-	bo = ttm_to_xe_bo(tbo);
 	switch (tbo->resource->mem_type) {
 	case XE_PL_VRAM0:
 	case XE_PL_VRAM1:
-- 
GitLab


From 73486d750f56ec612b2e02aa06ceb2171a8c5e93 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 15 Dec 2023 12:33:31 -0800
Subject: [PATCH 2237/2340] drm/xe/display: Fix dummy
 __i915_inject_probe_error()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_DRM_I915_DEBUG is not set, a dummy
__i915_inject_probe_error() is provided on the xe side. Use the same
logic as in drivers/gpu/drm/i915/i915_utils.c to ifdef it out. This
fixes the build with W=1 and without that config:

	  CC [M]  drivers/gpu/drm/xe/display/ext/i915_utils.o
	../drivers/gpu/drm/xe/display/ext/i915_utils.c:19:5: error: no previous prototype for ‘__i915_inject_probe_error’ [-Werror=missing-prototypes]
	   19 | int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
	      |     ^~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/display/ext/i915_utils.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c
index 981edc2788bc3..43b10a2cc5085 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_utils.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c
@@ -14,9 +14,13 @@ bool i915_vtd_active(struct drm_i915_private *i915)
 	return i915_run_as_guest();
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+
 /* i915 specific, just put here for shutting it up */
 int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
 			      const char *func, int line)
 {
 	return 0;
 }
+
+#endif
-- 
GitLab


From 7a8bc11782d39e4d35dc7e78405dfe052cbba9cf Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 14 Dec 2023 13:39:54 -0800
Subject: [PATCH 2238/2340] drm/xe: Enable W=1 warnings by default

Like done in commit 2250c7ead8ad ("drm/i915: enable W=1 warnings by default")
for i915, enable W=1 warnings by default in xe.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile | 47 ++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index f4ae063a70053..6790c049d89ea 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -3,25 +3,34 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-# Add a set of useful warning flags and enable -Werror for CI to prevent
-# trivial mistakes from creeping in. We have to do this piecemeal as we reject
-# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we
-# need to filter out dubious warnings.  Still it is our interest
-# to keep running locally with W=1 C=1 until we are completely clean.
-#
-# Note the danger in using -Wall -Wextra is that when CI updates gcc we
-# will most likely get a sudden build breakage... Hopefully we will fix
-# new warnings before CI updates!
-subdir-ccflags-y := -Wall -Wextra
-subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
-subdir-ccflags-y += $(call cc-disable-warning, type-limits)
-subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
-subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
-# clang warnings
-subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
-subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
-subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
-subdir-ccflags-y += $(call cc-disable-warning, frame-address)
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += -Wmissing-declarations
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wmissing-prototypes
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
+
+# Enable -Werror in CI and development
 subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
 
 subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
-- 
GitLab


From 9212da07187f86db8bd124b1ce551a18b8a710d6 Mon Sep 17 00:00:00 2001
From: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Date: Fri, 15 Dec 2023 15:45:33 +0000
Subject: [PATCH 2239/2340] drm/xe/uapi: add exec_queue_id member to
 drm_xe_wait_user_fence structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove the num_engines/instances members from drm_xe_wait_user_fence
structure and add a exec_queue_id member

Right now this is only checking if the engine list is sane and nothing
else. In the end every operation with this IOCTL is a soft check.
So, let's formalize that and only use this IOCTL to wait on the fence.

exec_queue_id member will help to user space to get proper error code
from kernel while in exec_queue reset

Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 65 +------------------------
 include/uapi/drm/xe_drm.h               | 17 ++-----
 2 files changed, 7 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 4d5c2555ce41b..59af65b6ed896 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -50,37 +50,7 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
 	return passed ? 0 : 1;
 }
 
-static const enum xe_engine_class user_to_xe_engine_class[] = {
-	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
-	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
-	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
-	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
-	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
-};
-
-static int check_hw_engines(struct xe_device *xe,
-			    struct drm_xe_engine_class_instance *eci,
-			    int num_engines)
-{
-	int i;
-
-	for (i = 0; i < num_engines; ++i) {
-		enum xe_engine_class user_class =
-			user_to_xe_engine_class[eci[i].engine_class];
-
-		if (eci[i].gt_id >= xe->info.tile_count)
-			return -EINVAL;
-
-		if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id),
-				     user_class, eci[i].engine_instance, true))
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-#define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \
-			 DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
+#define VALID_FLAGS	DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
 #define MAX_OP		DRM_XE_UFENCE_WAIT_OP_LTE
 
 static long to_jiffies_timeout(struct xe_device *xe,
@@ -132,16 +102,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	struct xe_device *xe = to_xe_device(dev);
 	DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
 	struct drm_xe_wait_user_fence *args = data;
-	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
-	struct drm_xe_engine_class_instance __user *user_eci =
-		u64_to_user_ptr(args->instances);
 	u64 addr = args->addr;
 	int err;
-	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP;
 	long timeout;
 	ktime_t start;
 
 	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->pad2) ||
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
@@ -151,41 +118,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, args->op > MAX_OP))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, no_engines &&
-			 (args->num_engines || args->instances)))
-		return -EINVAL;
-
-	if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines))
-		return -EINVAL;
-
 	if (XE_IOCTL_DBG(xe, addr & 0x7))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
-		return -EINVAL;
-
-	if (!no_engines) {
-		err = copy_from_user(eci, user_eci,
-				     sizeof(struct drm_xe_engine_class_instance) *
-			     args->num_engines);
-		if (XE_IOCTL_DBG(xe, err))
-			return -EFAULT;
-
-		if (XE_IOCTL_DBG(xe, check_hw_engines(xe, eci,
-						      args->num_engines)))
-			return -EINVAL;
-	}
-
 	timeout = to_jiffies_timeout(xe, args);
 
 	start = ktime_get();
 
-	/*
-	 * FIXME: Very simple implementation at the moment, single wait queue
-	 * for everything. Could be optimized to have a wait queue for every
-	 * hardware engine. Open coding as 'do_compare' can sleep which doesn't
-	 * work with the wait_event_* macros.
-	 */
 	add_wait_queue(&xe->ufence_wq, &w_wait);
 	for (;;) {
 		err = do_compare(addr, args->value, args->mask, args->op);
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 0895e4d2a9815..5a8e3b326347f 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1031,8 +1031,7 @@ struct drm_xe_wait_user_fence {
 	/** @op: wait operation (type of comparison) */
 	__u16 op;
 
-#define DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
-#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME	(1 << 1)
+#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME	(1 << 0)
 	/** @flags: wait flags */
 	__u16 flags;
 
@@ -1065,17 +1064,11 @@ struct drm_xe_wait_user_fence {
 	 */
 	__s64 timeout;
 
-	/**
-	 * @num_engines: number of engine instances to wait on, must be zero
-	 * when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
-	 */
-	__u64 num_engines;
+	/** @exec_queue_id: exec_queue_id returned from xe_exec_queue_create_ioctl */
+	__u32 exec_queue_id;
 
-	/**
-	 * @instances: user pointer to array of drm_xe_engine_class_instance to
-	 * wait on, must be NULL when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
-	 */
-	__u64 instances;
+	/** @pad2: MBZ */
+	__u32 pad2;
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
-- 
GitLab


From e670f0b4ef2419a7a51d1726044c8715ff4d4cda Mon Sep 17 00:00:00 2001
From: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Date: Fri, 15 Dec 2023 15:45:34 +0000
Subject: [PATCH 2240/2340] drm/xe/uapi: Return correct error code for
 xe_wait_user_fence_ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently xe_wait_user_fence_ioctl is not checking exec_queue state
and blocking until timeout, with this patch wakeup the blocking wait
if exec_queue reset happen and returning proper error code

Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Cc: Oak Zeng <oak.zeng@intel.com>
Cc: Kempczynski Zbigniew <Zbigniew.Kempczynski@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  2 ++
 drivers/gpu/drm/xe/xe_execlist.c         |  7 ++++++
 drivers/gpu/drm/xe/xe_guc_submit.c       | 10 ++++++++
 drivers/gpu/drm/xe/xe_wait_user_fence.c  | 30 +++++++++++++++++++-----
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index c7aefa1c8c312..bcf08b00d94a1 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -217,6 +217,8 @@ struct xe_exec_queue_ops {
 	 * signalled when this function is called.
 	 */
 	void (*resume)(struct xe_exec_queue *q);
+	/** @reset_status: check exec queue reset status */
+	bool (*reset_status)(struct xe_exec_queue *q);
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 507b6baec3dc4..96b5224eb4787 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -442,6 +442,12 @@ static void execlist_exec_queue_resume(struct xe_exec_queue *q)
 	/* NIY */
 }
 
+static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+	/* NIY */
+	return false;
+}
+
 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 	.init = execlist_exec_queue_init,
 	.kill = execlist_exec_queue_kill,
@@ -453,6 +459,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 	.suspend = execlist_exec_queue_suspend,
 	.suspend_wait = execlist_exec_queue_suspend_wait,
 	.resume = execlist_exec_queue_resume,
+	.reset_status = execlist_exec_queue_reset_status,
 };
 
 int xe_execlist_init(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index ad5e19ecd33c9..21ac68e3246f8 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -854,6 +854,10 @@ static void simple_error_capture(struct xe_exec_queue *q)
 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
+	wake_up_all(&xe->ufence_wq);
 
 	if (xe_exec_queue_is_lr(q))
 		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
@@ -1394,6 +1398,11 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
 	guc_exec_queue_add_msg(q, msg, RESUME);
 }
 
+static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+	return exec_queue_reset(q);
+}
+
 /*
  * All of these functions are an abstraction layer which other parts of XE can
  * use to trap into the GuC backend. All of these functions, aside from init,
@@ -1411,6 +1420,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
 	.suspend = guc_exec_queue_suspend,
 	.suspend_wait = guc_exec_queue_suspend_wait,
 	.resume = guc_exec_queue_resume,
+	.reset_status = guc_exec_queue_reset_status,
 };
 
 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index 59af65b6ed896..b0a7896f7fcb6 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -13,6 +13,7 @@
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
+#include "xe_exec_queue.h"
 
 static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
 {
@@ -100,10 +101,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
 	DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
 	struct drm_xe_wait_user_fence *args = data;
+	struct xe_exec_queue *q = NULL;
 	u64 addr = args->addr;
-	int err;
+	int err = 0;
 	long timeout;
 	ktime_t start;
 
@@ -121,6 +124,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, addr & 0x7))
 		return -EINVAL;
 
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q))
+			return -ENOENT;
+	}
+
 	timeout = to_jiffies_timeout(xe, args);
 
 	start = ktime_get();
@@ -136,6 +145,14 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			break;
 		}
 
+		if (q) {
+			if (q->ops->reset_status(q)) {
+				drm_info(&xe->drm, "exec gueue reset detected\n");
+				err = -EIO;
+				break;
+			}
+		}
+
 		if (!timeout) {
 			err = -ETIME;
 			break;
@@ -151,10 +168,11 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 			args->timeout = 0;
 	}
 
-	if (XE_IOCTL_DBG(xe, err < 0))
-		return err;
-	else if (XE_IOCTL_DBG(xe, !timeout))
-		return -ETIME;
+	if (!timeout && !(err < 0))
+		err = -ETIME;
+
+	if (q)
+		xe_exec_queue_put(q);
 
-	return 0;
+	return err;
 }
-- 
GitLab


From e4f0cc64669bb52e259da49c7c1d5954ae8014c5 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:35 +0000
Subject: [PATCH 2241/2340] drm/xe/uapi: Remove
 DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The exec_queue_set_property feature was removed in a previous
commit 0f1d88f27864 ("drm/xe/uapi: Kill exec_queue_set_property") and
is no longer usable, struct drm_xe_exec_queue_set_property does not
exist anymore, so let's remove this.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 5a8e3b326347f..128369299e498 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -118,7 +118,6 @@ struct xe_user_extension {
 #define DRM_IOCTL_XE_VM_BIND			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
 #define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
 #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
-#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY	DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
-- 
GitLab


From 9d329b4cea1449b4f4948a5f495e2d1db223ad7a Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:36 +0000
Subject: [PATCH 2242/2340] drm/xe/uapi: Remove DRM_XE_UFENCE_WAIT_MASK_*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those are just possible values for the comparison mask but they are not
specific magic values. Let's keep them as examples in the documentation
but remove them from the uAPI.

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 128369299e498..d122f985435a0 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1040,11 +1040,13 @@ struct drm_xe_wait_user_fence {
 	/** @value: compare value */
 	__u64 value;
 
-#define DRM_XE_UFENCE_WAIT_MASK_U8	0xffu
-#define DRM_XE_UFENCE_WAIT_MASK_U16	0xffffu
-#define DRM_XE_UFENCE_WAIT_MASK_U32	0xffffffffu
-#define DRM_XE_UFENCE_WAIT_MASK_U64	0xffffffffffffffffu
-	/** @mask: comparison mask */
+	/**
+	 * @mask: comparison mask, values can be for example:
+	 *  - 0xffu for u8
+	 *  - 0xffffu for u16
+	 *  - 0xffffffffu for u32
+	 *  - 0xffffffffffffffffu for u64
+	 */
 	__u64 mask;
 
 	/**
-- 
GitLab


From 90a8b23f9b85a05ac3147498c42b32348bfcc274 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Fri, 15 Dec 2023 15:45:37 +0000
Subject: [PATCH 2243/2340] drm/xe/pmu: Remove PMU from Xe till uapi is
 finalized
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PMU uapi is likely to change in the future. Till the uapi is finalized,
remove PMU from Xe. PMU can be re-added after uapi is finalized.

v2: Include xe_drm.h in xe/tests/xe_dma_buf.c (Francois)

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Acked-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Makefile           |   2 -
 drivers/gpu/drm/xe/regs/xe_gt_regs.h  |   5 -
 drivers/gpu/drm/xe/tests/xe_dma_buf.c |   2 +
 drivers/gpu/drm/xe/xe_device.c        |   2 -
 drivers/gpu/drm/xe/xe_device_types.h  |   4 -
 drivers/gpu/drm/xe/xe_gt.c            |   2 -
 drivers/gpu/drm/xe/xe_module.c        |   5 -
 drivers/gpu/drm/xe/xe_pmu.c           | 645 --------------------------
 drivers/gpu/drm/xe/xe_pmu.h           |  25 -
 drivers/gpu/drm/xe/xe_pmu_types.h     |  68 ---
 include/uapi/drm/xe_drm.h             |  40 --
 11 files changed, 2 insertions(+), 798 deletions(-)
 delete mode 100644 drivers/gpu/drm/xe/xe_pmu.c
 delete mode 100644 drivers/gpu/drm/xe/xe_pmu.h
 delete mode 100644 drivers/gpu/drm/xe/xe_pmu_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 6790c049d89ea..53bd2a8ba1ae5 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -276,8 +276,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-display/skl_universal_plane.o \
 	i915-display/skl_watermark.o
 
-xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o
-
 ifeq ($(CONFIG_ACPI),y)
 	xe-$(CONFIG_DRM_XE_DISPLAY) += \
 		i915-display/intel_acpi.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d7f52a634c116..1dd361046b5dc 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -316,11 +316,6 @@
 #define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
 
-#define XE_OAG_RC0_ANY_ENGINE_BUSY_FREE		XE_REG(0xdb80)
-#define XE_OAG_ANY_MEDIA_FF_BUSY_FREE		XE_REG(0xdba0)
-#define XE_OAG_BLT_BUSY_FREE			XE_REG(0xdbbc)
-#define XE_OAG_RENDER_BUSY_FREE			XE_REG(0xdbdc)
-
 #define HALF_SLICE_CHICKEN5			XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
 #define   DISABLE_SAMPLE_G_PERFORMANCE		REG_BIT(0)
 
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index bb6f6424e06f6..9f6d571d7fa9c 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include <drm/xe_drm.h>
+
 #include <kunit/test.h>
 #include <kunit/visibility.h>
 
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 221e875843521..d9ae77fe7382d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -529,8 +529,6 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_debugfs_register(xe);
 
-	xe_pmu_register(&xe->pmu);
-
 	xe_hwmon_register(xe);
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index d1a48456e9a31..c45ef17b34732 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -18,7 +18,6 @@
 #include "xe_lmtt_types.h"
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
-#include "xe_pmu.h"
 #include "xe_sriov_types.h"
 #include "xe_step_types.h"
 
@@ -427,9 +426,6 @@ struct xe_device {
 	 */
 	struct task_struct *pm_callback_task;
 
-	/** @pmu: performance monitoring unit */
-	struct xe_pmu pmu;
-
 	/** @hwmon: hwmon subsystem integration */
 	struct xe_hwmon *hwmon;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index dfd9cf01a5d58..f5d18e98f8b6c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -709,8 +709,6 @@ int xe_gt_suspend(struct xe_gt *gt)
 	if (err)
 		goto err_msg;
 
-	xe_pmu_suspend(gt);
-
 	err = xe_uc_suspend(&gt->uc);
 	if (err)
 		goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 51bf69b7ab222..110b698646565 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -11,7 +11,6 @@
 #include "xe_drv.h"
 #include "xe_hw_fence.h"
 #include "xe_pci.h"
-#include "xe_pmu.h"
 #include "xe_sched_job.h"
 
 struct xe_modparam xe_modparam = {
@@ -63,10 +62,6 @@ static const struct init_funcs init_funcs[] = {
 		.init = xe_sched_job_module_init,
 		.exit = xe_sched_job_module_exit,
 	},
-	{
-		.init = xe_pmu_init,
-		.exit = xe_pmu_exit,
-	},
 	{
 		.init = xe_register_pci_driver,
 		.exit = xe_unregister_pci_driver,
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
deleted file mode 100644
index 9d0b7887cfc45..0000000000000
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ /dev/null
@@ -1,645 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include <drm/drm_drv.h>
-#include <drm/drm_managed.h>
-#include <drm/xe_drm.h>
-
-#include "regs/xe_gt_regs.h"
-#include "xe_device.h"
-#include "xe_gt_clock.h"
-#include "xe_mmio.h"
-
-static cpumask_t xe_pmu_cpumask;
-static unsigned int xe_pmu_target_cpu = -1;
-
-static unsigned int config_gt_id(const u64 config)
-{
-	return config >> __DRM_XE_PMU_GT_SHIFT;
-}
-
-static u64 config_counter(const u64 config)
-{
-	return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT);
-}
-
-static void xe_pmu_event_destroy(struct perf_event *event)
-{
-	struct xe_device *xe =
-		container_of(event->pmu, typeof(*xe), pmu.base);
-
-	drm_WARN_ON(&xe->drm, event->parent);
-
-	drm_dev_put(&xe->drm);
-}
-
-static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type)
-{
-	u64 val;
-
-	switch (sample_type) {
-	case __XE_SAMPLE_RENDER_GROUP_BUSY:
-		val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE);
-		break;
-	case __XE_SAMPLE_COPY_GROUP_BUSY:
-		val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE);
-		break;
-	case __XE_SAMPLE_MEDIA_GROUP_BUSY:
-		val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE);
-		break;
-	case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY:
-		val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE);
-		break;
-	default:
-		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
-	}
-
-	return xe_gt_clock_cycles_to_ns(gt, val * 16);
-}
-
-static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config)
-{
-	int sample_type = config_counter(config);
-	const unsigned int gt_id = gt->info.id;
-	struct xe_device *xe = gt->tile->xe;
-	struct xe_pmu *pmu = &xe->pmu;
-	unsigned long flags;
-	bool device_awake;
-	u64 val;
-
-	device_awake = xe_device_mem_access_get_if_ongoing(xe);
-	if (device_awake) {
-		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
-		val = __engine_group_busyness_read(gt, sample_type);
-		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
-		xe_device_mem_access_put(xe);
-	}
-
-	spin_lock_irqsave(&pmu->lock, flags);
-
-	if (device_awake)
-		pmu->sample[gt_id][sample_type] = val;
-	else
-		val = pmu->sample[gt_id][sample_type];
-
-	spin_unlock_irqrestore(&pmu->lock, flags);
-
-	return val;
-}
-
-static void engine_group_busyness_store(struct xe_gt *gt)
-{
-	struct xe_pmu *pmu = &gt->tile->xe->pmu;
-	unsigned int gt_id = gt->info.id;
-	unsigned long flags;
-	int i;
-
-	spin_lock_irqsave(&pmu->lock, flags);
-
-	for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++)
-		pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i);
-
-	spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static int
-config_status(struct xe_device *xe, u64 config)
-{
-	unsigned int gt_id = config_gt_id(config);
-	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
-
-	if (gt_id >= XE_PMU_MAX_GT)
-		return -ENOENT;
-
-	switch (config_counter(config)) {
-	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
-	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
-	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
-		if (gt->info.type == XE_GT_TYPE_MEDIA)
-			return -ENOENT;
-		break;
-	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
-		if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0))))
-			return -ENOENT;
-		break;
-	default:
-		return -ENOENT;
-	}
-
-	return 0;
-}
-
-static int xe_pmu_event_init(struct perf_event *event)
-{
-	struct xe_device *xe =
-		container_of(event->pmu, typeof(*xe), pmu.base);
-	struct xe_pmu *pmu = &xe->pmu;
-	int ret;
-
-	if (pmu->closed)
-		return -ENODEV;
-
-	if (event->attr.type != event->pmu->type)
-		return -ENOENT;
-
-	/* unsupported modes and filters */
-	if (event->attr.sample_period) /* no sampling */
-		return -EINVAL;
-
-	if (has_branch_stack(event))
-		return -EOPNOTSUPP;
-
-	if (event->cpu < 0)
-		return -EINVAL;
-
-	/* only allow running on one cpu at a time */
-	if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask))
-		return -EINVAL;
-
-	ret = config_status(xe, event->attr.config);
-	if (ret)
-		return ret;
-
-	if (!event->parent) {
-		drm_dev_get(&xe->drm);
-		event->destroy = xe_pmu_event_destroy;
-	}
-
-	return 0;
-}
-
-static u64 __xe_pmu_event_read(struct perf_event *event)
-{
-	struct xe_device *xe =
-		container_of(event->pmu, typeof(*xe), pmu.base);
-	const unsigned int gt_id = config_gt_id(event->attr.config);
-	const u64 config = event->attr.config;
-	struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
-	u64 val;
-
-	switch (config_counter(config)) {
-	case DRM_XE_PMU_RENDER_GROUP_BUSY(0):
-	case DRM_XE_PMU_COPY_GROUP_BUSY(0):
-	case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0):
-	case DRM_XE_PMU_MEDIA_GROUP_BUSY(0):
-		val = engine_group_busyness_read(gt, config);
-		break;
-	default:
-		drm_warn(&gt->tile->xe->drm, "unknown pmu event\n");
-	}
-
-	return val;
-}
-
-static void xe_pmu_event_read(struct perf_event *event)
-{
-	struct xe_device *xe =
-		container_of(event->pmu, typeof(*xe), pmu.base);
-	struct hw_perf_event *hwc = &event->hw;
-	struct xe_pmu *pmu = &xe->pmu;
-	u64 prev, new;
-
-	if (pmu->closed) {
-		event->hw.state = PERF_HES_STOPPED;
-		return;
-	}
-again:
-	prev = local64_read(&hwc->prev_count);
-	new = __xe_pmu_event_read(event);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
-		goto again;
-
-	local64_add(new - prev, &event->count);
-}
-
-static void xe_pmu_enable(struct perf_event *event)
-{
-	/*
-	 * Store the current counter value so we can report the correct delta
-	 * for all listeners. Even when the event was already enabled and has
-	 * an existing non-zero value.
-	 */
-	local64_set(&event->hw.prev_count, __xe_pmu_event_read(event));
-}
-
-static void xe_pmu_event_start(struct perf_event *event, int flags)
-{
-	struct xe_device *xe =
-		container_of(event->pmu, typeof(*xe), pmu.base);
-	struct xe_pmu *pmu = &xe->pmu;
-
-	if (pmu->closed)
-		return;
-
-	xe_pmu_enable(event);
-	event->hw.state = 0;
-}
-
-static void xe_pmu_event_stop(struct perf_event *event, int flags)
-{
-	if (flags & PERF_EF_UPDATE)
-		xe_pmu_event_read(event);
-
-	event->hw.state = PERF_HES_STOPPED;
-}
-
-static int xe_pmu_event_add(struct perf_event *event, int flags)
-{
-	struct xe_device *xe =
-		container_of(event->pmu, typeof(*xe), pmu.base);
-	struct xe_pmu *pmu = &xe->pmu;
-
-	if (pmu->closed)
-		return -ENODEV;
-
-	if (flags & PERF_EF_START)
-		xe_pmu_event_start(event, flags);
-
-	return 0;
-}
-
-static void xe_pmu_event_del(struct perf_event *event, int flags)
-{
-	xe_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int xe_pmu_event_event_idx(struct perf_event *event)
-{
-	return 0;
-}
-
-struct xe_ext_attribute {
-	struct device_attribute attr;
-	unsigned long val;
-};
-
-static ssize_t xe_pmu_event_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct xe_ext_attribute *eattr;
-
-	eattr = container_of(attr, struct xe_ext_attribute, attr);
-	return sprintf(buf, "config=0x%lx\n", eattr->val);
-}
-
-static ssize_t cpumask_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask);
-}
-
-static DEVICE_ATTR_RO(cpumask);
-
-static struct attribute *xe_cpumask_attrs[] = {
-	&dev_attr_cpumask.attr,
-	NULL,
-};
-
-static const struct attribute_group xe_pmu_cpumask_attr_group = {
-	.attrs = xe_cpumask_attrs,
-};
-
-#define __event(__counter, __name, __unit) \
-{ \
-	.counter = (__counter), \
-	.name = (__name), \
-	.unit = (__unit), \
-	.global = false, \
-}
-
-#define __global_event(__counter, __name, __unit) \
-{ \
-	.counter = (__counter), \
-	.name = (__name), \
-	.unit = (__unit), \
-	.global = true, \
-}
-
-static struct xe_ext_attribute *
-add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config)
-{
-	sysfs_attr_init(&attr->attr.attr);
-	attr->attr.attr.name = name;
-	attr->attr.attr.mode = 0444;
-	attr->attr.show = xe_pmu_event_show;
-	attr->val = config;
-
-	return ++attr;
-}
-
-static struct perf_pmu_events_attr *
-add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
-	     const char *str)
-{
-	sysfs_attr_init(&attr->attr.attr);
-	attr->attr.attr.name = name;
-	attr->attr.attr.mode = 0444;
-	attr->attr.show = perf_event_sysfs_show;
-	attr->event_str = str;
-
-	return ++attr;
-}
-
-static struct attribute **
-create_event_attributes(struct xe_pmu *pmu)
-{
-	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
-	static const struct {
-		unsigned int counter;
-		const char *name;
-		const char *unit;
-		bool global;
-	} events[] = {
-		__event(0, "render-group-busy", "ns"),
-		__event(1, "copy-group-busy", "ns"),
-		__event(2, "media-group-busy", "ns"),
-		__event(3, "any-engine-group-busy", "ns"),
-	};
-
-	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
-	struct xe_ext_attribute *xe_attr = NULL, *xe_iter;
-	struct attribute **attr = NULL, **attr_iter;
-	unsigned int count = 0;
-	unsigned int i, j;
-	struct xe_gt *gt;
-
-	/* Count how many counters we will be exposing. */
-	for_each_gt(gt, xe, j) {
-		for (i = 0; i < ARRAY_SIZE(events); i++) {
-			u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter);
-
-			if (!config_status(xe, config))
-				count++;
-		}
-	}
-
-	/* Allocate attribute objects and table. */
-	xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL);
-	if (!xe_attr)
-		goto err_alloc;
-
-	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
-	if (!pmu_attr)
-		goto err_alloc;
-
-	/* Max one pointer of each attribute type plus a termination entry. */
-	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
-	if (!attr)
-		goto err_alloc;
-
-	xe_iter = xe_attr;
-	pmu_iter = pmu_attr;
-	attr_iter = attr;
-
-	for_each_gt(gt, xe, j) {
-		for (i = 0; i < ARRAY_SIZE(events); i++) {
-			u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter);
-			char *str;
-
-			if (config_status(xe, config))
-				continue;
-
-			if (events[i].global)
-				str = kstrdup(events[i].name, GFP_KERNEL);
-			else
-				str = kasprintf(GFP_KERNEL, "%s-gt%u",
-						events[i].name, j);
-			if (!str)
-				goto err;
-
-			*attr_iter++ = &xe_iter->attr.attr;
-			xe_iter = add_xe_attr(xe_iter, str, config);
-
-			if (events[i].unit) {
-				if (events[i].global)
-					str = kasprintf(GFP_KERNEL, "%s.unit",
-							events[i].name);
-				else
-					str = kasprintf(GFP_KERNEL, "%s-gt%u.unit",
-							events[i].name, j);
-				if (!str)
-					goto err;
-
-				*attr_iter++ = &pmu_iter->attr.attr;
-				pmu_iter = add_pmu_attr(pmu_iter, str,
-							events[i].unit);
-			}
-		}
-	}
-
-	pmu->xe_attr = xe_attr;
-	pmu->pmu_attr = pmu_attr;
-
-	return attr;
-
-err:
-	for (attr_iter = attr; *attr_iter; attr_iter++)
-		kfree((*attr_iter)->name);
-
-err_alloc:
-	kfree(attr);
-	kfree(xe_attr);
-	kfree(pmu_attr);
-
-	return NULL;
-}
-
-static void free_event_attributes(struct xe_pmu *pmu)
-{
-	struct attribute **attr_iter = pmu->events_attr_group.attrs;
-
-	for (; *attr_iter; attr_iter++)
-		kfree((*attr_iter)->name);
-
-	kfree(pmu->events_attr_group.attrs);
-	kfree(pmu->xe_attr);
-	kfree(pmu->pmu_attr);
-
-	pmu->events_attr_group.attrs = NULL;
-	pmu->xe_attr = NULL;
-	pmu->pmu_attr = NULL;
-}
-
-static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
-{
-	struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
-
-	/* Select the first online CPU as a designated reader. */
-	if (cpumask_empty(&xe_pmu_cpumask))
-		cpumask_set_cpu(cpu, &xe_pmu_cpumask);
-
-	return 0;
-}
-
-static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
-{
-	struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
-	unsigned int target = xe_pmu_target_cpu;
-
-	/*
-	 * Unregistering an instance generates a CPU offline event which we must
-	 * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask.
-	 */
-	if (pmu->closed)
-		return 0;
-
-	if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) {
-		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
-
-		/* Migrate events if there is a valid target */
-		if (target < nr_cpu_ids) {
-			cpumask_set_cpu(target, &xe_pmu_cpumask);
-			xe_pmu_target_cpu = target;
-		}
-	}
-
-	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
-		perf_pmu_migrate_context(&pmu->base, cpu, target);
-		pmu->cpuhp.cpu = target;
-	}
-
-	return 0;
-}
-
-static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
-
-int xe_pmu_init(void)
-{
-	int ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
-				      "perf/x86/intel/xe:online",
-				      xe_pmu_cpu_online,
-				      xe_pmu_cpu_offline);
-	if (ret < 0)
-		pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n",
-			  ret);
-	else
-		cpuhp_slot = ret;
-
-	return 0;
-}
-
-void xe_pmu_exit(void)
-{
-	if (cpuhp_slot != CPUHP_INVALID)
-		cpuhp_remove_multi_state(cpuhp_slot);
-}
-
-static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu)
-{
-	if (cpuhp_slot == CPUHP_INVALID)
-		return -EINVAL;
-
-	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
-}
-
-static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu)
-{
-	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
-}
-
-void xe_pmu_suspend(struct xe_gt *gt)
-{
-	engine_group_busyness_store(gt);
-}
-
-static void xe_pmu_unregister(struct drm_device *device, void *arg)
-{
-	struct xe_pmu *pmu = arg;
-
-	if (!pmu->base.event_init)
-		return;
-
-	/*
-	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
-	 * ensures all currently executing ones will have exited before we
-	 * proceed with unregistration.
-	 */
-	pmu->closed = true;
-	synchronize_rcu();
-
-	xe_pmu_unregister_cpuhp_state(pmu);
-
-	perf_pmu_unregister(&pmu->base);
-	pmu->base.event_init = NULL;
-	kfree(pmu->base.attr_groups);
-	kfree(pmu->name);
-	free_event_attributes(pmu);
-}
-
-void xe_pmu_register(struct xe_pmu *pmu)
-{
-	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
-	const struct attribute_group *attr_groups[] = {
-		&pmu->events_attr_group,
-		&xe_pmu_cpumask_attr_group,
-		NULL
-	};
-
-	int ret = -ENOMEM;
-
-	spin_lock_init(&pmu->lock);
-	pmu->cpuhp.cpu = -1;
-
-	pmu->name = kasprintf(GFP_KERNEL,
-			      "xe_%s",
-			      dev_name(xe->drm.dev));
-	if (pmu->name)
-		/* tools/perf reserves colons as special. */
-		strreplace((char *)pmu->name, ':', '_');
-
-	if (!pmu->name)
-		goto err;
-
-	pmu->events_attr_group.name = "events";
-	pmu->events_attr_group.attrs = create_event_attributes(pmu);
-	if (!pmu->events_attr_group.attrs)
-		goto err_name;
-
-	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
-					GFP_KERNEL);
-	if (!pmu->base.attr_groups)
-		goto err_attr;
-
-	pmu->base.module	= THIS_MODULE;
-	pmu->base.task_ctx_nr	= perf_invalid_context;
-	pmu->base.event_init	= xe_pmu_event_init;
-	pmu->base.add		= xe_pmu_event_add;
-	pmu->base.del		= xe_pmu_event_del;
-	pmu->base.start		= xe_pmu_event_start;
-	pmu->base.stop		= xe_pmu_event_stop;
-	pmu->base.read		= xe_pmu_event_read;
-	pmu->base.event_idx	= xe_pmu_event_event_idx;
-
-	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
-	if (ret)
-		goto err_groups;
-
-	ret = xe_pmu_register_cpuhp_state(pmu);
-	if (ret)
-		goto err_unreg;
-
-	ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu);
-	if (ret)
-		goto err_cpuhp;
-
-	return;
-
-err_cpuhp:
-	xe_pmu_unregister_cpuhp_state(pmu);
-err_unreg:
-	perf_pmu_unregister(&pmu->base);
-err_groups:
-	kfree(pmu->base.attr_groups);
-err_attr:
-	pmu->base.event_init = NULL;
-	free_event_attributes(pmu);
-err_name:
-	kfree(pmu->name);
-err:
-	drm_notice(&xe->drm, "Failed to register PMU!\n");
-}
diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h
deleted file mode 100644
index a99d4ddd023e9..0000000000000
--- a/drivers/gpu/drm/xe/xe_pmu.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_PMU_H_
-#define _XE_PMU_H_
-
-#include "xe_gt_types.h"
-#include "xe_pmu_types.h"
-
-#if IS_ENABLED(CONFIG_PERF_EVENTS)
-int xe_pmu_init(void);
-void xe_pmu_exit(void);
-void xe_pmu_register(struct xe_pmu *pmu);
-void xe_pmu_suspend(struct xe_gt *gt);
-#else
-static inline int xe_pmu_init(void) { return 0; }
-static inline void xe_pmu_exit(void) {}
-static inline void xe_pmu_register(struct xe_pmu *pmu) {}
-static inline void xe_pmu_suspend(struct xe_gt *gt) {}
-#endif
-
-#endif
-
diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h
deleted file mode 100644
index 9cadbd243f577..0000000000000
--- a/drivers/gpu/drm/xe/xe_pmu_types.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_PMU_TYPES_H_
-#define _XE_PMU_TYPES_H_
-
-#include <linux/perf_event.h>
-#include <linux/spinlock_types.h>
-#include <uapi/drm/xe_drm.h>
-
-enum {
-	__XE_SAMPLE_RENDER_GROUP_BUSY,
-	__XE_SAMPLE_COPY_GROUP_BUSY,
-	__XE_SAMPLE_MEDIA_GROUP_BUSY,
-	__XE_SAMPLE_ANY_ENGINE_GROUP_BUSY,
-	__XE_NUM_PMU_SAMPLERS
-};
-
-#define XE_PMU_MAX_GT 2
-
-struct xe_pmu {
-	/**
-	 * @cpuhp: Struct used for CPU hotplug handling.
-	 */
-	struct {
-		struct hlist_node node;
-		unsigned int cpu;
-	} cpuhp;
-	/**
-	 * @base: PMU base.
-	 */
-	struct pmu base;
-	/**
-	 * @closed: xe is unregistering.
-	 */
-	bool closed;
-	/**
-	 * @name: Name as registered with perf core.
-	 */
-	const char *name;
-	/**
-	 * @lock: Lock protecting enable mask and ref count handling.
-	 */
-	spinlock_t lock;
-	/**
-	 * @sample: Current and previous (raw) counters.
-	 *
-	 * These counters are updated when the device is awake.
-	 *
-	 */
-	u64 sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS];
-	/**
-	 * @events_attr_group: Device events attribute group.
-	 */
-	struct attribute_group events_attr_group;
-	/**
-	 * @xe_attr: Memory block holding device attributes.
-	 */
-	void *xe_attr;
-	/**
-	 * @pmu_attr: Memory block holding device attributes.
-	 */
-	void *pmu_attr;
-};
-
-#endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index d122f985435a0..e1e8fb1846eac 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1074,46 +1074,6 @@ struct drm_xe_wait_user_fence {
 	/** @reserved: Reserved */
 	__u64 reserved[2];
 };
-
-/**
- * DOC: XE PMU event config IDs
- *
- * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h
- * in 'struct perf_event_attr' as part of perf_event_open syscall to read a
- * particular event.
- *
- * For example to open the DRMXE_PMU_RENDER_GROUP_BUSY(0):
- *
- * .. code-block:: C
- *
- *	struct perf_event_attr attr;
- *	long long count;
- *	int cpu = 0;
- *	int fd;
- *
- *	memset(&attr, 0, sizeof(struct perf_event_attr));
- *	attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_56_00.0/type
- *	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
- *	attr.use_clockid = 1;
- *	attr.clockid = CLOCK_MONOTONIC;
- *	attr.config = DRM_XE_PMU_RENDER_GROUP_BUSY(0);
- *
- *	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
- */
-
-/*
- * Top bits of every counter are GT id.
- */
-#define __DRM_XE_PMU_GT_SHIFT (56)
-
-#define ___DRM_XE_PMU_OTHER(gt, x) \
-	(((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
-
-#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 0)
-#define DRM_XE_PMU_COPY_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 1)
-#define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 2)
-#define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 3)
-
 #if defined(__cplusplus)
 }
 #endif
-- 
GitLab


From 7e9337c29fb9251e27d7af092108f05857e733c1 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 15 Dec 2023 15:45:38 +0000
Subject: [PATCH 2244/2340] drm/xe/uapi: Ensure every uapi struct has drm_xe
 prefix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To ensure consistency and avoid possible later conflicts,
let's add drm_xe prefix to xe_user_extension struct.

Cc: Francois Dugast <francois.dugast@intel.com>
Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c |  2 +-
 include/uapi/drm/xe_drm.h          | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index eeb9605dd45f6..aa478c66edbb3 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -453,7 +453,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
 				      u64 extensions, int ext_number, bool create)
 {
 	u64 __user *address = u64_to_user_ptr(extensions);
-	struct xe_user_extension ext;
+	struct drm_xe_user_extension ext;
 	int err;
 	u32 idx;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index e1e8fb1846eac..87ff6eaa788e2 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -27,7 +27,7 @@ extern "C" {
 #define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
 
 /**
- * struct xe_user_extension - Base class for defining a chain of extensions
+ * struct drm_xe_user_extension - Base class for defining a chain of extensions
  *
  * Many interfaces need to grow over time. In most cases we can simply
  * extend the struct and have userspace pass in more data. Another option,
@@ -45,29 +45,29 @@ extern "C" {
  *
  * .. code-block:: C
  *
- *	struct xe_user_extension ext3 {
+ *	struct drm_xe_user_extension ext3 {
  *		.next_extension = 0, // end
  *		.name = ...,
  *	};
- *	struct xe_user_extension ext2 {
+ *	struct drm_xe_user_extension ext2 {
  *		.next_extension = (uintptr_t)&ext3,
  *		.name = ...,
  *	};
- *	struct xe_user_extension ext1 {
+ *	struct drm_xe_user_extension ext1 {
  *		.next_extension = (uintptr_t)&ext2,
  *		.name = ...,
  *	};
  *
- * Typically the struct xe_user_extension would be embedded in some uAPI
+ * Typically the struct drm_xe_user_extension would be embedded in some uAPI
  * struct, and in this case we would feed it the head of the chain(i.e ext1),
  * which would then apply all of the above extensions.
  *
  */
-struct xe_user_extension {
+struct drm_xe_user_extension {
 	/**
 	 * @next_extension:
 	 *
-	 * Pointer to the next struct xe_user_extension, or zero if the end.
+	 * Pointer to the next struct drm_xe_user_extension, or zero if the end.
 	 */
 	__u64 next_extension;
 
@@ -78,7 +78,7 @@ struct xe_user_extension {
 	 *
 	 * Also note that the name space for this is not global for the whole
 	 * driver, but rather its scope/meaning is limited to the specific piece
-	 * of uAPI which has embedded the struct xe_user_extension.
+	 * of uAPI which has embedded the struct drm_xe_user_extension.
 	 */
 	__u32 name;
 
@@ -625,7 +625,7 @@ struct drm_xe_gem_mmap_offset {
 /** struct drm_xe_ext_set_property - XE set property extension */
 struct drm_xe_ext_set_property {
 	/** @base: base user extension */
-	struct xe_user_extension base;
+	struct drm_xe_user_extension base;
 
 	/** @property: property to set */
 	__u32 property;
-- 
GitLab


From d3d767396a02fa225eab7f919b727cff4e3304bc Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 15 Dec 2023 15:45:39 +0000
Subject: [PATCH 2245/2340] drm/xe/uapi: Remove sync binds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove concept of async vs sync VM bind queues, rather make all binds
async.

The following bits have dropped from the uAPI:
DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC
DRM_XE_ENGINE_CLASS_VM_BIND_SYNC
DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT
DRM_XE_VM_BIND_FLAG_ASYNC

To implement sync binds the UMD is expected to use the out-fence
interface.

v2: Send correct version
v3: Drop drm_xe_syncs

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c       |  7 +--
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  2 -
 drivers/gpu/drm/xe/xe_vm.c               | 75 +++---------------------
 drivers/gpu/drm/xe/xe_vm_types.h         | 13 ++--
 include/uapi/drm/xe_drm.h                | 11 ++--
 5 files changed, 20 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index aa478c66edbb3..44fe8097b7cda 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -625,10 +625,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
 		return -EINVAL;
 
-	if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) {
-		bool sync = eci[0].engine_class ==
-			DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
-
+	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
 		for_each_gt(gt, xe, id) {
 			struct xe_exec_queue *new;
 
@@ -654,8 +651,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 						   args->width, hwe,
 						   EXEC_QUEUE_FLAG_PERSISTENT |
 						   EXEC_QUEUE_FLAG_VM |
-						   (sync ? 0 :
-						    EXEC_QUEUE_FLAG_VM_ASYNC) |
 						   (id ?
 						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
 						    0));
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index bcf08b00d94a1..3d7e704ec3d9f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -84,8 +84,6 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_VM			BIT(4)
 /* child of VM queue for multi-tile VM jobs */
 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
-/* VM jobs for this queue are asynchronous */
-#define EXEC_QUEUE_FLAG_VM_ASYNC		BIT(6)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2f3df9ee67c9b..322c1ecceccac 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1343,9 +1343,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 			struct xe_gt *gt = tile->primary_gt;
 			struct xe_vm *migrate_vm;
 			struct xe_exec_queue *q;
-			u32 create_flags = EXEC_QUEUE_FLAG_VM |
-				((flags & XE_VM_FLAG_ASYNC_DEFAULT) ?
-				EXEC_QUEUE_FLAG_VM_ASYNC : 0);
+			u32 create_flags = EXEC_QUEUE_FLAG_VM;
 
 			if (!vm->pt_root[id])
 				continue;
@@ -1712,12 +1710,6 @@ err_fences:
 	return ERR_PTR(err);
 }
 
-static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_exec_queue *q)
-{
-	return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) :
-		!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT);
-}
-
 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
 			u32 num_syncs, bool immediate, bool first_op,
@@ -1747,8 +1739,6 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (last_op)
 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-	if (last_op && xe_vm_sync_mode(vm, q))
-		dma_fence_wait(fence, true);
 	dma_fence_put(fence);
 
 	return 0;
@@ -1791,8 +1781,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	xe_vma_destroy(vma, fence);
 	if (last_op)
 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-	if (last_op && xe_vm_sync_mode(vm, q))
-		dma_fence_wait(fence, true);
 	dma_fence_put(fence);
 
 	return 0;
@@ -1800,7 +1788,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
 				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
-				    DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \
 				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
 
 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -1854,8 +1841,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
 		flags |= XE_VM_FLAG_LR_MODE;
-	if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
-		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
 	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
 		flags |= XE_VM_FLAG_FAULT_MODE;
 
@@ -2263,8 +2248,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				   struct drm_gpuva_ops *ops,
 				   struct xe_sync_entry *syncs, u32 num_syncs,
-				   struct list_head *ops_list, bool last,
-				   bool async)
+				   struct list_head *ops_list, bool last)
 {
 	struct xe_vma_op *last_op = NULL;
 	struct drm_gpuva_op *__op;
@@ -2696,23 +2680,22 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 
 #ifdef TEST_VM_ASYNC_OPS_ERROR
 #define SUPPORTED_FLAGS	\
-	(FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \
-	 DRM_XE_VM_BIND_FLAG_READONLY | DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
-	 DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
+	(FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
 #else
 #define SUPPORTED_FLAGS	\
-	(DRM_XE_VM_BIND_FLAG_ASYNC | DRM_XE_VM_BIND_FLAG_READONLY | \
+	(DRM_XE_VM_BIND_FLAG_READONLY | \
 	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
 	 0xffff)
 #endif
 #define XE_64K_PAGE_MASK 0xffffull
+#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
 
 #define MAX_BINDS	512	/* FIXME: Picking random upper limit */
 
 static int vm_bind_ioctl_check_args(struct xe_device *xe,
 				    struct drm_xe_vm_bind *args,
-				    struct drm_xe_vm_bind_op **bind_ops,
-				    bool *async)
+				    struct drm_xe_vm_bind_op **bind_ops)
 {
 	int err;
 	int i;
@@ -2775,18 +2758,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
 			goto free_bind_ops;
 		}
 
-		if (i == 0) {
-			*async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC);
-			if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) {
-				err = -EINVAL;
-				goto free_bind_ops;
-			}
-		} else if (XE_IOCTL_DBG(xe, *async !=
-					!!(flags & DRM_XE_VM_BIND_FLAG_ASYNC))) {
-			err = -EINVAL;
-			goto free_bind_ops;
-		}
-
 		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
 		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
 		    XE_IOCTL_DBG(xe, obj && is_null) ||
@@ -2854,14 +2825,6 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
 
 	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
 				     fence);
-
-	if (xe_vm_sync_mode(vm, q)) {
-		long timeout = dma_fence_wait(fence, true);
-
-		if (timeout < 0)
-			err = -EINTR;
-	}
-
 	dma_fence_put(fence);
 
 	return err;
@@ -2881,18 +2844,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_sync_entry *syncs = NULL;
 	struct drm_xe_vm_bind_op *bind_ops;
 	LIST_HEAD(ops_list);
-	bool async;
 	int err;
 	int i;
 
-	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
 	if (err)
 		return err;
 
-	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
-	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
-		return -EINVAL;
-
 	if (args->exec_queue_id) {
 		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
 		if (XE_IOCTL_DBG(xe, !q)) {
@@ -2904,12 +2862,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			err = -EINVAL;
 			goto put_exec_queue;
 		}
-
-		if (XE_IOCTL_DBG(xe, args->num_binds && async !=
-				 !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) {
-			err = -EINVAL;
-			goto put_exec_queue;
-		}
 	}
 
 	vm = xe_vm_lookup(xef, args->vm_id);
@@ -2918,14 +2870,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto put_exec_queue;
 	}
 
-	if (!args->exec_queue_id) {
-		if (XE_IOCTL_DBG(xe, args->num_binds && async !=
-				 !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) {
-			err = -EINVAL;
-			goto put_vm;
-		}
-	}
-
 	err = down_write_killable(&vm->lock);
 	if (err)
 		goto put_vm;
@@ -3060,8 +3004,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
 					      &ops_list,
-					      i == args->num_binds - 1,
-					      async);
+					      i == args->num_binds - 1);
 		if (err)
 			goto unwind_ops;
 	}
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 2e023596cb153..63e8a50b88e94 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -138,13 +138,12 @@ struct xe_vm {
 	 */
 #define XE_VM_FLAG_64K			BIT(0)
 #define XE_VM_FLAG_LR_MODE		BIT(1)
-#define XE_VM_FLAG_ASYNC_DEFAULT	BIT(2)
-#define XE_VM_FLAG_MIGRATION		BIT(3)
-#define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
-#define XE_VM_FLAG_FAULT_MODE		BIT(5)
-#define XE_VM_FLAG_BANNED		BIT(6)
-#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(8, 7), flags)
-#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(8, 7), (tile)->id)
+#define XE_VM_FLAG_MIGRATION		BIT(2)
+#define XE_VM_FLAG_SCRATCH_PAGE		BIT(3)
+#define XE_VM_FLAG_FAULT_MODE		BIT(4)
+#define XE_VM_FLAG_BANNED		BIT(5)
+#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(7, 6), flags)
+#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(7, 6), (tile)->id)
 	unsigned long flags;
 
 	/** @composite_fence_ctx: context composite fence */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 87ff6eaa788e2..2338d87dcb7d8 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -139,8 +139,7 @@ struct drm_xe_engine_class_instance {
 	 * Kernel only classes (not actual hardware engine class). Used for
 	 * creating ordered queues of VM bind operations.
 	 */
-#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC	5
-#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC	6
+#define DRM_XE_ENGINE_CLASS_VM_BIND		5
 	/** @engine_class: engine class id */
 	__u16 engine_class;
 	/** @engine_instance: engine instance id */
@@ -660,7 +659,6 @@ struct drm_xe_vm_create {
 	 * still enable recoverable pagefaults if supported by the device.
 	 */
 #define DRM_XE_VM_CREATE_FLAG_LR_MODE	        (1 << 1)
-#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT	(1 << 2)
 	/*
 	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
 	 * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated
@@ -668,7 +666,7 @@ struct drm_xe_vm_create {
 	 * The xe driver internally uses recoverable pagefaults to implement
 	 * this.
 	 */
-#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 3)
+#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 2)
 	/** @flags: Flags */
 	__u32 flags;
 
@@ -776,12 +774,11 @@ struct drm_xe_vm_bind_op {
 	__u32 op;
 
 #define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
-#define DRM_XE_VM_BIND_FLAG_ASYNC	(1 << 1)
 	/*
 	 * Valid on a faulting VM only, do the MAP operation immediately rather
 	 * than deferring the MAP to the page fault handler.
 	 */
-#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 2)
+#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
 	/*
 	 * When the NULL flag is set, the page tables are setup with a special
 	 * bit which indicates writes are dropped and all reads return zero.  In
@@ -789,7 +786,7 @@ struct drm_xe_vm_bind_op {
 	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
 	 * intended to implement VK sparse bindings.
 	 */
-#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 3)
+#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
 	/** @flags: Bind flags */
 	__u32 flags;
 
-- 
GitLab


From b0e47225a16f4e1ed53dd769588700a40d7b9950 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:40 +0000
Subject: [PATCH 2246/2340] drm/xe/uapi: Add a comment to each struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a comment to each struct to complete documentation, ensure all
struct appear in the kernel doc, and bind structs to IOCTLs.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 43 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 2338d87dcb7d8..43cacb1680913 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -464,7 +464,8 @@ struct drm_xe_query_topology_mask {
 };
 
 /**
- * struct drm_xe_device_query - main structure to query device information
+ * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
+ * structure to query device information
  *
  * If size is set to 0, the driver fills it with the required size for the
  * requested type of data to query. If size is equal to the required size,
@@ -526,6 +527,10 @@ struct drm_xe_device_query {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_gem_create - Input of &DRM_IOCTL_XE_GEM_CREATE - A structure for
+ * gem creation
+ */
 struct drm_xe_gem_create {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -604,6 +609,9 @@ struct drm_xe_gem_create {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET
+ */
 struct drm_xe_gem_mmap_offset {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -621,7 +629,9 @@ struct drm_xe_gem_mmap_offset {
 	__u64 reserved[2];
 };
 
-/** struct drm_xe_ext_set_property - XE set property extension */
+/**
+ * struct drm_xe_ext_set_property - XE set property extension
+ */
 struct drm_xe_ext_set_property {
 	/** @base: base user extension */
 	struct drm_xe_user_extension base;
@@ -639,6 +649,9 @@ struct drm_xe_ext_set_property {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
+ */
 struct drm_xe_vm_create {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -677,6 +690,9 @@ struct drm_xe_vm_create {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_vm_destroy - Input of &DRM_IOCTL_XE_VM_DESTROY
+ */
 struct drm_xe_vm_destroy {
 	/** @vm_id: VM ID */
 	__u32 vm_id;
@@ -688,6 +704,9 @@ struct drm_xe_vm_destroy {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_vm_bind_op
+ */
 struct drm_xe_vm_bind_op {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -804,6 +823,9 @@ struct drm_xe_vm_bind_op {
 	__u64 reserved[3];
 };
 
+/**
+ * struct drm_xe_vm_bind - Input of &DRM_IOCTL_XE_VM_BIND
+ */
 struct drm_xe_vm_bind {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -862,6 +884,9 @@ struct drm_xe_vm_bind {
 /* Monitor 64MB contiguous region with 2M sub-granularity */
 #define DRM_XE_ACC_GRANULARITY_64M 3
 
+/**
+ * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ */
 struct drm_xe_exec_queue_create {
 #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY		0
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY		0
@@ -904,6 +929,9 @@ struct drm_xe_exec_queue_create {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_exec_queue_get_property - Input of &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ */
 struct drm_xe_exec_queue_get_property {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -922,6 +950,9 @@ struct drm_xe_exec_queue_get_property {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_exec_queue_destroy - Input of &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ */
 struct drm_xe_exec_queue_destroy {
 	/** @exec_queue_id: Exec queue ID */
 	__u32 exec_queue_id;
@@ -933,6 +964,9 @@ struct drm_xe_exec_queue_destroy {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_sync
+ */
 struct drm_xe_sync {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -967,6 +1001,9 @@ struct drm_xe_sync {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_exec - Input of &DRM_IOCTL_XE_EXEC
+ */
 struct drm_xe_exec {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
@@ -1000,7 +1037,7 @@ struct drm_xe_exec {
 };
 
 /**
- * struct drm_xe_wait_user_fence - wait user fence
+ * struct drm_xe_wait_user_fence - Input of &DRM_IOCTL_XE_WAIT_USER_FENCE
  *
  * Wait on user fence, XE will wake-up on every HW engine interrupt in the
  * instances list and check if user fence is complete::
-- 
GitLab


From 4efaadd38bc4c6c1016996669002994061990633 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:41 +0000
Subject: [PATCH 2247/2340] drm/xe/uapi: Add missing documentation for struct
 members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This removes the documentation build warnings below:

	include/uapi/drm/xe_drm.h:828: warning: Function parameter or \
	member 'pad2' not described in 'drm_xe_vm_bind_op'
	include/uapi/drm/xe_drm.h:875: warning: Function parameter or \
	member 'pad2' not described in 'drm_xe_vm_bind'
	include/uapi/drm/xe_drm.h:1006: warning: Function parameter or \
	member 'handle' not described in 'drm_xe_sync'
	include/uapi/drm/xe_drm.h:1006: warning: Function parameter or \
	member 'timeline_value' not described in 'drm_xe_sync'

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 43cacb1680913..d7893ccbbf8c3 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -816,7 +816,7 @@ struct drm_xe_vm_bind_op {
 	 */
 	__u32 prefetch_mem_region_instance;
 
-	/** @pad: MBZ */
+	/** @pad2: MBZ */
 	__u32 pad2;
 
 	/** @reserved: Reserved */
@@ -857,7 +857,7 @@ struct drm_xe_vm_bind {
 		__u64 vector_of_binds;
 	};
 
-	/** @pad: MBZ */
+	/** @pad2: MBZ */
 	__u32 pad2;
 
 	/** @num_syncs: amount of syncs to wait on */
@@ -982,6 +982,7 @@ struct drm_xe_sync {
 	__u32 flags;
 
 	union {
+		/** @handle: Handle for the object */
 		__u32 handle;
 
 		/**
@@ -995,6 +996,7 @@ struct drm_xe_sync {
 		__u64 addr;
 	};
 
+	/** @timeline_value: Timeline point of the sync object */
 	__u64 timeline_value;
 
 	/** @reserved: Reserved */
-- 
GitLab


From ff6c6bc55258e7d0aabcfc41baa392fcedb450a2 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:42 +0000
Subject: [PATCH 2248/2340] drm/xe/uapi: Document use of size in
 drm_xe_device_query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document the behavior of the driver for IOCTL DRM_IOCTL_XE_DEVICE_QUERY
depending on the size value provided in struct drm_xe_device_query.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index d7893ccbbf8c3..d759e04e00eed 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -467,9 +467,15 @@ struct drm_xe_query_topology_mask {
  * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
  * structure to query device information
  *
- * If size is set to 0, the driver fills it with the required size for the
- * requested type of data to query. If size is equal to the required size,
- * the queried information is copied into data.
+ * The user selects the type of data to query among DRM_XE_DEVICE_QUERY_*
+ * and sets the value in the query member. This determines the type of
+ * the structure provided by the driver in data, among struct drm_xe_query_*.
+ *
+ * If size is set to 0, the driver fills it with the required size for
+ * the requested type of data to query. If size is equal to the required
+ * size, the queried information is copied into data. If size is set to
+ * a value different from 0 and different from the required size, the
+ * IOCTL call returns -EINVAL.
  *
  * For example the following code snippet allows retrieving and printing
  * information about the device engines with DRM_XE_DEVICE_QUERY_ENGINES:
-- 
GitLab


From af8ea4162b4cb6e83bfabaef3db3bf89d2a07cbc Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:43 +0000
Subject: [PATCH 2249/2340] drm/xe/uapi: Document drm_xe_query_config keys
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a description of the keys used the struct
drm_xe_query_config info array.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/637
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index d759e04e00eed..9c43bc258f10b 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -350,11 +350,31 @@ struct drm_xe_query_config {
 	/** @pad: MBZ */
 	__u32 pad;
 
+	/*
+	 * Device ID (lower 16 bits) and the device revision (next
+	 * 8 bits)
+	 */
 #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
+	/*
+	 * Flags describing the device configuration, see list below
+	 */
 #define DRM_XE_QUERY_CONFIG_FLAGS			1
+	/*
+	 * Flag is set if the device has usable VRAM
+	 */
 	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM	(1 << 0)
+	/*
+	 * Minimal memory alignment required by this device,
+	 * typically SZ_4K or SZ_64K
+	 */
 #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT		2
+	/*
+	 * Maximum bits of a virtual address
+	 */
 #define DRM_XE_QUERY_CONFIG_VA_BITS			3
+	/*
+	 * Value of the highest available exec queue priority
+	 */
 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
 	/** @info: array of elements containing the config info */
 	__u64 info[];
-- 
GitLab


From 37958604e69485e9704f8483401b03679e3e4939 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:44 +0000
Subject: [PATCH 2250/2340] drm/xe/uapi: Document DRM_XE_DEVICE_QUERY_HWCONFIG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a documentation on the content and format of when using query type
DRM_XE_DEVICE_QUERY_HWCONFIG. The list of keys can be found in IGT
under lib/intel_hwconfig_types.h.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 9c43bc258f10b..70b42466a8112 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -537,6 +537,11 @@ struct drm_xe_device_query {
 #define DRM_XE_DEVICE_QUERY_MEM_REGIONS		1
 #define DRM_XE_DEVICE_QUERY_CONFIG		2
 #define DRM_XE_DEVICE_QUERY_GT_LIST		3
+	/*
+	 * Query type to retrieve the hardware configuration of the device
+	 * such as information on slices, memory, caches, and so on. It is
+	 * provided as a table of attributes (key / value).
+	 */
 #define DRM_XE_DEVICE_QUERY_HWCONFIG		4
 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY		5
 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
-- 
GitLab


From 801989b08aff35ef56743551f4cfeaed360bd201 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:45 +0000
Subject: [PATCH 2251/2340] drm/xe/uapi: Make constant comments visible in
 kernel doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As there is no direct way to make comments of constants directly
visible in the kernel doc, move them to the description of the
structure where they can be used. By doing so they appear in the
"Description" section of the struct documentation.

v2: Remove DRM_XE_UFENCE_WAIT_MASK_* (Francois Dugast)

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 271 ++++++++++++++++++++++----------------
 1 file changed, 155 insertions(+), 116 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 70b42466a8112..4c11dec57a834 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -128,6 +128,16 @@ struct drm_xe_user_extension {
  * It is returned as part of the @drm_xe_engine, but it also is used as
  * the input of engine selection for both @drm_xe_exec_queue_create and
  * @drm_xe_query_engine_cycles
+ *
+ * The @engine_class can be:
+ *  - %DRM_XE_ENGINE_CLASS_RENDER
+ *  - %DRM_XE_ENGINE_CLASS_COPY
+ *  - %DRM_XE_ENGINE_CLASS_VIDEO_DECODE
+ *  - %DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE
+ *  - %DRM_XE_ENGINE_CLASS_COMPUTE
+ *  - %DRM_XE_ENGINE_CLASS_VM_BIND - Kernel only classes (not actual
+ *    hardware engine class). Used for creating ordered queues of VM
+ *    bind operations.
  */
 struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_RENDER		0
@@ -135,10 +145,6 @@ struct drm_xe_engine_class_instance {
 #define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
 #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
 #define DRM_XE_ENGINE_CLASS_COMPUTE		4
-	/*
-	 * Kernel only classes (not actual hardware engine class). Used for
-	 * creating ordered queues of VM bind operations.
-	 */
 #define DRM_XE_ENGINE_CLASS_VM_BIND		5
 	/** @engine_class: engine class id */
 	__u16 engine_class;
@@ -342,6 +348,19 @@ struct drm_xe_query_mem_regions {
  * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses
  * struct drm_xe_query_config in .data.
  *
+ * The index in @info can be:
+ *  - %DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID - Device ID (lower 16 bits)
+ *    and the device revision (next 8 bits)
+ *  - %DRM_XE_QUERY_CONFIG_FLAGS - Flags describing the device
+ *    configuration, see list below
+ *
+ *    - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device
+ *      has usable VRAM
+ *  - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
+ *    required by this device, typically SZ_4K or SZ_64K
+ *  - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
+ *  - %DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY - Value of the highest
+ *    available exec queue priority
  */
 struct drm_xe_query_config {
 	/** @num_params: number of parameters returned in info */
@@ -350,31 +369,11 @@ struct drm_xe_query_config {
 	/** @pad: MBZ */
 	__u32 pad;
 
-	/*
-	 * Device ID (lower 16 bits) and the device revision (next
-	 * 8 bits)
-	 */
 #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
-	/*
-	 * Flags describing the device configuration, see list below
-	 */
 #define DRM_XE_QUERY_CONFIG_FLAGS			1
-	/*
-	 * Flag is set if the device has usable VRAM
-	 */
 	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM	(1 << 0)
-	/*
-	 * Minimal memory alignment required by this device,
-	 * typically SZ_4K or SZ_64K
-	 */
 #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT		2
-	/*
-	 * Maximum bits of a virtual address
-	 */
 #define DRM_XE_QUERY_CONFIG_VA_BITS			3
-	/*
-	 * Value of the highest available exec queue priority
-	 */
 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
 	/** @info: array of elements containing the config info */
 	__u64 info[];
@@ -387,6 +386,10 @@ struct drm_xe_query_config {
  * existing GT individual descriptions.
  * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for
  * implementing graphics and/or media operations.
+ *
+ * The index in @type can be:
+ *  - %DRM_XE_QUERY_GT_TYPE_MAIN
+ *  - %DRM_XE_QUERY_GT_TYPE_MEDIA
  */
 struct drm_xe_gt {
 #define DRM_XE_QUERY_GT_TYPE_MAIN		0
@@ -444,34 +447,30 @@ struct drm_xe_query_gt_list {
  * If a query is made with a struct drm_xe_device_query where .query
  * is equal to DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, then the reply uses
  * struct drm_xe_query_topology_mask in .data.
+ *
+ * The @type can be:
+ *  - %DRM_XE_TOPO_DSS_GEOMETRY - To query the mask of Dual Sub Slices
+ *    (DSS) available for geometry operations. For example a query response
+ *    containing the following in mask:
+ *    ``DSS_GEOMETRY    ff ff ff ff 00 00 00 00``
+ *    means 32 DSS are available for geometry.
+ *  - %DRM_XE_TOPO_DSS_COMPUTE - To query the mask of Dual Sub Slices
+ *    (DSS) available for compute operations. For example a query response
+ *    containing the following in mask:
+ *    ``DSS_COMPUTE    ff ff ff ff 00 00 00 00``
+ *    means 32 DSS are available for compute.
+ *  - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
+ *    available per Dual Sub Slices (DSS). For example a query response
+ *    containing the following in mask:
+ *    ``EU_PER_DSS    ff ff 00 00 00 00 00 00``
+ *    means each DSS has 16 EU.
  */
 struct drm_xe_query_topology_mask {
 	/** @gt_id: GT ID the mask is associated with */
 	__u16 gt_id;
 
-	/*
-	 * To query the mask of Dual Sub Slices (DSS) available for geometry
-	 * operations. For example a query response containing the following
-	 * in mask:
-	 *   DSS_GEOMETRY    ff ff ff ff 00 00 00 00
-	 * means 32 DSS are available for geometry.
-	 */
 #define DRM_XE_TOPO_DSS_GEOMETRY	(1 << 0)
-	/*
-	 * To query the mask of Dual Sub Slices (DSS) available for compute
-	 * operations. For example a query response containing the following
-	 * in mask:
-	 *   DSS_COMPUTE    ff ff ff ff 00 00 00 00
-	 * means 32 DSS are available for compute.
-	 */
 #define DRM_XE_TOPO_DSS_COMPUTE		(1 << 1)
-	/*
-	 * To query the mask of Execution Units (EU) available per Dual Sub
-	 * Slices (DSS). For example a query response containing the following
-	 * in mask:
-	 *   EU_PER_DSS    ff ff 00 00 00 00 00 00
-	 * means each DSS has 16 EU.
-	 */
 #define DRM_XE_TOPO_EU_PER_DSS		(1 << 2)
 	/** @type: type of mask */
 	__u16 type;
@@ -491,6 +490,18 @@ struct drm_xe_query_topology_mask {
  * and sets the value in the query member. This determines the type of
  * the structure provided by the driver in data, among struct drm_xe_query_*.
  *
+ * The @query can be:
+ *  - %DRM_XE_DEVICE_QUERY_ENGINES
+ *  - %DRM_XE_DEVICE_QUERY_MEM_REGIONS
+ *  - %DRM_XE_DEVICE_QUERY_CONFIG
+ *  - %DRM_XE_DEVICE_QUERY_GT_LIST
+ *  - %DRM_XE_DEVICE_QUERY_HWCONFIG - Query type to retrieve the hardware
+ *    configuration of the device such as information on slices, memory,
+ *    caches, and so on. It is provided as a table of key / value
+ *    attributes.
+ *  - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY
+ *  - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES
+ *
  * If size is set to 0, the driver fills it with the required size for
  * the requested type of data to query. If size is equal to the required
  * size, the queried information is copied into data. If size is set to
@@ -537,11 +548,6 @@ struct drm_xe_device_query {
 #define DRM_XE_DEVICE_QUERY_MEM_REGIONS		1
 #define DRM_XE_DEVICE_QUERY_CONFIG		2
 #define DRM_XE_DEVICE_QUERY_GT_LIST		3
-	/*
-	 * Query type to retrieve the hardware configuration of the device
-	 * such as information on slices, memory, caches, and so on. It is
-	 * provided as a table of attributes (key / value).
-	 */
 #define DRM_XE_DEVICE_QUERY_HWCONFIG		4
 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY		5
 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
@@ -561,6 +567,33 @@ struct drm_xe_device_query {
 /**
  * struct drm_xe_gem_create - Input of &DRM_IOCTL_XE_GEM_CREATE - A structure for
  * gem creation
+ *
+ * The @flags can be:
+ *  - %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING
+ *  - %DRM_XE_GEM_CREATE_FLAG_SCANOUT
+ *  - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a
+ *    possible placement, ensure that the corresponding VRAM allocation
+ *    will always use the CPU accessible part of VRAM. This is important
+ *    for small-bar systems (on full-bar systems this gets turned into a
+ *    noop).
+ *    Note1: System memory can be used as an extra placement if the kernel
+ *    should spill the allocation to system memory, if space can't be made
+ *    available in the CPU accessible part of VRAM (giving the same
+ *    behaviour as the i915 interface, see
+ *    I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
+ *    Note2: For clear-color CCS surfaces the kernel needs to read the
+ *    clear-color value stored in the buffer, and on discrete platforms we
+ *    need to use VRAM for display surfaces, therefore the kernel requires
+ *    setting this flag for such objects, otherwise an error is thrown on
+ *    small-bar systems.
+ *
+ * @cpu_caching supports the following values:
+ *  - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back
+ *    caching. On iGPU this can't be used for scanout surfaces. Currently
+ *    not allowed for objects placed in VRAM.
+ *  - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This
+ *    is uncached. Scanout surfaces should likely use this. All objects
+ *    that can be placed in VRAM must use this.
  */
 struct drm_xe_gem_create {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -577,21 +610,6 @@ struct drm_xe_gem_create {
 
 #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING		(1 << 0)
 #define DRM_XE_GEM_CREATE_FLAG_SCANOUT			(1 << 1)
-/*
- * When using VRAM as a possible placement, ensure that the corresponding VRAM
- * allocation will always use the CPU accessible part of VRAM. This is important
- * for small-bar systems (on full-bar systems this gets turned into a noop).
- *
- * Note: System memory can be used as an extra placement if the kernel should
- * spill the allocation to system memory, if space can't be made available in
- * the CPU accessible part of VRAM (giving the same behaviour as the i915
- * interface, see I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
- *
- * Note: For clear-color CCS surfaces the kernel needs to read the clear-color
- * value stored in the buffer, and on discrete platforms we need to use VRAM for
- * display surfaces, therefore the kernel requires setting this flag for such
- * objects, otherwise an error is thrown on small-bar systems.
- */
 #define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(1 << 2)
 	/**
 	 * @flags: Flags, currently a mask of memory instances of where BO can
@@ -619,16 +637,6 @@ struct drm_xe_gem_create {
 	/**
 	 * @cpu_caching: The CPU caching mode to select for this object. If
 	 * mmaping the object the mode selected here will also be used.
-	 *
-	 * Supported values:
-	 *
-	 * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back
-	 * caching.  On iGPU this can't be used for scanout surfaces. Currently
-	 * not allowed for objects placed in VRAM.
-	 *
-	 * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This
-	 * is uncached. Scanout surfaces should likely use this. All objects
-	 * that can be placed in VRAM must use this.
 	 */
 #define DRM_XE_GEM_CPU_CACHING_WB                      1
 #define DRM_XE_GEM_CPU_CACHING_WC                      2
@@ -682,34 +690,33 @@ struct drm_xe_ext_set_property {
 
 /**
  * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
+ *
+ * The @flags can be:
+ *  - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE
+ *  - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts
+ *    exec submissions to its exec_queues that don't have an upper time
+ *    limit on the job execution time. But exec submissions to these
+ *    don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ,
+ *    DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF,
+ *    used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL.
+ *    LR VMs can be created in recoverable page-fault mode using
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it.
+ *    If that flag is omitted, the UMD can not rely on the slightly
+ *    different per-VM overcommit semantics that are enabled by
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may
+ *    still enable recoverable pagefaults if supported by the device.
+ *  - %DRM_XE_VM_CREATE_FLAG_FAULT_MODE - Requires also
+ *    DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated on
+ *    demand when accessed, and also allows per-VM overcommit of memory.
+ *    The xe driver internally uses recoverable pagefaults to implement
+ *    this.
  */
 struct drm_xe_vm_create {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
 #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE	(1 << 0)
-	/*
-	 * An LR, or Long Running VM accepts exec submissions
-	 * to its exec_queues that don't have an upper time limit on
-	 * the job execution time. But exec submissions to these
-	 * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ,
-	 * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF,
-	 * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL.
-	 * LR VMs can be created in recoverable page-fault mode using
-	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it.
-	 * If that flag is omitted, the UMD can not rely on the slightly
-	 * different per-VM overcommit semantics that are enabled by
-	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may
-	 * still enable recoverable pagefaults if supported by the device.
-	 */
 #define DRM_XE_VM_CREATE_FLAG_LR_MODE	        (1 << 1)
-	/*
-	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
-	 * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated
-	 * on demand when accessed, and also allows per-VM overcommit of memory.
-	 * The xe driver internally uses recoverable pagefaults to implement
-	 * this.
-	 */
 #define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 2)
 	/** @flags: Flags */
 	__u32 flags;
@@ -736,7 +743,27 @@ struct drm_xe_vm_destroy {
 };
 
 /**
- * struct drm_xe_vm_bind_op
+ * struct drm_xe_vm_bind_op - run bind operations
+ *
+ * The @op can be:
+ *  - %DRM_XE_VM_BIND_OP_MAP
+ *  - %DRM_XE_VM_BIND_OP_UNMAP
+ *  - %DRM_XE_VM_BIND_OP_MAP_USERPTR
+ *  - %DRM_XE_VM_BIND_OP_UNMAP_ALL
+ *  - %DRM_XE_VM_BIND_OP_PREFETCH
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_VM_BIND_FLAG_READONLY
+ *  - %DRM_XE_VM_BIND_FLAG_ASYNC
+ *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
+ *    MAP operation immediately rather than deferring the MAP to the page
+ *    fault handler.
+ *  - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
+ *    tables are setup with a special bit which indicates writes are
+ *    dropped and all reads return zero. In the future, the NULL flags
+ *    will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
+ *    handle MBZ, and the BO offset MBZ. This flag is intended to
+ *    implement VK sparse bindings.
  */
 struct drm_xe_vm_bind_op {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -824,18 +851,7 @@ struct drm_xe_vm_bind_op {
 	__u32 op;
 
 #define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
-	/*
-	 * Valid on a faulting VM only, do the MAP operation immediately rather
-	 * than deferring the MAP to the page fault handler.
-	 */
 #define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
-	/*
-	 * When the NULL flag is set, the page tables are setup with a special
-	 * bit which indicates writes are dropped and all reads return zero.  In
-	 * the future, the NULL flags will only be valid for DRM_XE_VM_BIND_OP_MAP
-	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
-	 * intended to implement VK sparse bindings.
-	 */
 #define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
 	/** @flags: Bind flags */
 	__u32 flags;
@@ -962,6 +978,9 @@ struct drm_xe_exec_queue_create {
 
 /**
  * struct drm_xe_exec_queue_get_property - Input of &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ *
+ * The @property can be:
+ *  - %DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN
  */
 struct drm_xe_exec_queue_get_property {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -996,7 +1015,15 @@ struct drm_xe_exec_queue_destroy {
 };
 
 /**
- * struct drm_xe_sync
+ * struct drm_xe_sync - sync object
+ *
+ * The @type can be:
+ *  - %DRM_XE_SYNC_TYPE_SYNCOBJ
+ *  - %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ
+ *  - %DRM_XE_SYNC_TYPE_USER_FENCE
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_SYNC_FLAG_SIGNAL
  */
 struct drm_xe_sync {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -1078,6 +1105,24 @@ struct drm_xe_exec {
  *	(*addr & MASK) OP (VALUE & MASK)
  *
  * Returns to user on user fence completion or timeout.
+ *
+ * The @op can be:
+ *  - %DRM_XE_UFENCE_WAIT_OP_EQ
+ *  - %DRM_XE_UFENCE_WAIT_OP_NEQ
+ *  - %DRM_XE_UFENCE_WAIT_OP_GT
+ *  - %DRM_XE_UFENCE_WAIT_OP_GTE
+ *  - %DRM_XE_UFENCE_WAIT_OP_LT
+ *  - %DRM_XE_UFENCE_WAIT_OP_LTE
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
+ *  - %DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP
+ *
+ * The @mask values can be for example:
+ *  - 0xffu for u8
+ *  - 0xffffu for u16
+ *  - 0xffffffffu for u32
+ *  - 0xffffffffffffffffu for u64
  */
 struct drm_xe_wait_user_fence {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -1107,13 +1152,7 @@ struct drm_xe_wait_user_fence {
 	/** @value: compare value */
 	__u64 value;
 
-	/**
-	 * @mask: comparison mask, values can be for example:
-	 *  - 0xffu for u8
-	 *  - 0xffffu for u16
-	 *  - 0xffffffffu for u32
-	 *  - 0xffffffffffffffffu for u64
-	 */
+	/** @mask: comparison mask */
 	__u64 mask;
 
 	/**
-- 
GitLab


From 76ca3a22c00bed8a43afd14de4b42691f224801b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 15 Dec 2023 15:45:46 +0000
Subject: [PATCH 2252/2340] drm/xe/uapi: Order sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch doesn't modify any text or uapi entries themselves.
It only move things up and down aiming a better organization of the uAPI.

While fixing the documentation I noticed that query_engine_cs_cycles
was in the middle of the memory_region info. Then I noticed more
mismatches on the order when compared to the order of the IOCTL
and QUERY entries declaration. So this patch aims to bring some
order to the uAPI so it gets easier to read and the documentation
generated in the end is able to tell a consistent story.

Overall order:

1. IOCTL definition
2. Extension definition and helper structs
3. IOCTL's Query structs in the order of the Query's entries.
4. The rest of IOCTL structs in the order of IOCTL declaration.
5. uEvents

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 include/uapi/drm/xe_drm.h | 252 ++++++++++++++++++++------------------
 1 file changed, 130 insertions(+), 122 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4c11dec57a834..b62dd51fa8953 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -12,19 +12,48 @@
 extern "C" {
 #endif
 
-/* Please note that modifications to all structs defined here are
+/*
+ * Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
+ * Sections in this file are organized as follows:
+ *   1. IOCTL definition
+ *   2. Extension definition and helper structs
+ *   3. IOCTL's Query structs in the order of the Query's entries.
+ *   4. The rest of IOCTL structs in the order of IOCTL declaration.
+ *   5. uEvents
  */
 
-/**
- * DOC: uevent generated by xe on it's pci node.
+/*
+ * xe specific ioctls.
  *
- * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
- * fails. The value supplied with the event is always "NEEDS_RESET".
- * Additional information supplied is tile id and gt id of the gt unit for
- * which reset has failed.
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
  */
-#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
+#define DRM_XE_DEVICE_QUERY		0x00
+#define DRM_XE_GEM_CREATE		0x01
+#define DRM_XE_GEM_MMAP_OFFSET		0x02
+#define DRM_XE_VM_CREATE		0x03
+#define DRM_XE_VM_DESTROY		0x04
+#define DRM_XE_VM_BIND			0x05
+#define DRM_XE_EXEC_QUEUE_CREATE	0x06
+#define DRM_XE_EXEC_QUEUE_DESTROY	0x07
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
+#define DRM_XE_EXEC			0x09
+#define DRM_XE_WAIT_USER_FENCE		0x0a
+/* Must be kept compact -- no holes */
+
+#define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
+#define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
+#define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
+#define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
+#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
+#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
+#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
+#define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
+#define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 
 /**
  * struct drm_xe_user_extension - Base class for defining a chain of extensions
@@ -90,37 +119,25 @@ struct drm_xe_user_extension {
 	__u32 pad;
 };
 
-/*
- * xe specific ioctls.
- *
- * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
- * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
- * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+/**
+ * struct drm_xe_ext_set_property - XE set property extension
  */
-#define DRM_XE_DEVICE_QUERY		0x00
-#define DRM_XE_GEM_CREATE		0x01
-#define DRM_XE_GEM_MMAP_OFFSET		0x02
-#define DRM_XE_VM_CREATE		0x03
-#define DRM_XE_VM_DESTROY		0x04
-#define DRM_XE_VM_BIND			0x05
-#define DRM_XE_EXEC_QUEUE_CREATE	0x06
-#define DRM_XE_EXEC_QUEUE_DESTROY	0x07
-#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
-#define DRM_XE_EXEC			0x09
-#define DRM_XE_WAIT_USER_FENCE		0x0a
-/* Must be kept compact -- no holes */
+struct drm_xe_ext_set_property {
+	/** @base: base user extension */
+	struct drm_xe_user_extension base;
 
-#define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
-#define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
-#define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
-#define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
-#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
-#define DRM_IOCTL_XE_VM_BIND			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
-#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
-#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
-#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
-#define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
-#define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+	/** @property: property to set */
+	__u32 property;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
 
 /**
  * struct drm_xe_engine_class_instance - instance of an engine class
@@ -274,57 +291,6 @@ struct drm_xe_mem_region {
 	__u64 reserved[6];
 };
 
-/**
- * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
- *
- * If a query is made with a struct drm_xe_device_query where .query is equal to
- * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles
- * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
- * .data points to this allocated structure.
- *
- * The query returns the engine cycles, which along with GT's @reference_clock,
- * can be used to calculate the engine timestamp. In addition the
- * query returns a set of cpu timestamps that indicate when the command
- * streamer cycle count was captured.
- */
-struct drm_xe_query_engine_cycles {
-	/**
-	 * @eci: This is input by the user and is the engine for which command
-	 * streamer cycles is queried.
-	 */
-	struct drm_xe_engine_class_instance eci;
-
-	/**
-	 * @clockid: This is input by the user and is the reference clock id for
-	 * CPU timestamp. For definition, see clock_gettime(2) and
-	 * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
-	 * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
-	 */
-	__s32 clockid;
-
-	/** @width: Width of the engine cycle counter in bits. */
-	__u32 width;
-
-	/**
-	 * @engine_cycles: Engine cycles as read from its register
-	 * at 0x358 offset.
-	 */
-	__u64 engine_cycles;
-
-	/**
-	 * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
-	 * reading the engine_cycles register using the reference clockid set by the
-	 * user.
-	 */
-	__u64 cpu_timestamp;
-
-	/**
-	 * @cpu_delta: Time delta in ns captured around reading the lower dword
-	 * of the engine_cycles register.
-	 */
-	__u64 cpu_delta;
-};
-
 /**
  * struct drm_xe_query_mem_regions - describe memory regions
  *
@@ -482,6 +448,57 @@ struct drm_xe_query_topology_mask {
 	__u8 mask[];
 };
 
+/**
+ * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
+ *
+ * If a query is made with a struct drm_xe_device_query where .query is equal to
+ * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles
+ * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
+ * .data points to this allocated structure.
+ *
+ * The query returns the engine cycles, which along with GT's @reference_clock,
+ * can be used to calculate the engine timestamp. In addition the
+ * query returns a set of cpu timestamps that indicate when the command
+ * streamer cycle count was captured.
+ */
+struct drm_xe_query_engine_cycles {
+	/**
+	 * @eci: This is input by the user and is the engine for which command
+	 * streamer cycles is queried.
+	 */
+	struct drm_xe_engine_class_instance eci;
+
+	/**
+	 * @clockid: This is input by the user and is the reference clock id for
+	 * CPU timestamp. For definition, see clock_gettime(2) and
+	 * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
+	 * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
+	 */
+	__s32 clockid;
+
+	/** @width: Width of the engine cycle counter in bits. */
+	__u32 width;
+
+	/**
+	 * @engine_cycles: Engine cycles as read from its register
+	 * at 0x358 offset.
+	 */
+	__u64 engine_cycles;
+
+	/**
+	 * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
+	 * reading the engine_cycles register using the reference clockid set by the
+	 * user.
+	 */
+	__u64 cpu_timestamp;
+
+	/**
+	 * @cpu_delta: Time delta in ns captured around reading the lower dword
+	 * of the engine_cycles register.
+	 */
+	__u64 cpu_delta;
+};
+
 /**
  * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
  * structure to query device information
@@ -668,26 +685,6 @@ struct drm_xe_gem_mmap_offset {
 	__u64 reserved[2];
 };
 
-/**
- * struct drm_xe_ext_set_property - XE set property extension
- */
-struct drm_xe_ext_set_property {
-	/** @base: base user extension */
-	struct drm_xe_user_extension base;
-
-	/** @property: property to set */
-	__u32 property;
-
-	/** @pad: MBZ */
-	__u32 pad;
-
-	/** @value: property value */
-	__u64 value;
-
-	/** @reserved: Reserved */
-	__u64 reserved[2];
-};
-
 /**
  * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
  *
@@ -976,6 +973,20 @@ struct drm_xe_exec_queue_create {
 	__u64 reserved[2];
 };
 
+/**
+ * struct drm_xe_exec_queue_destroy - Input of &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ */
+struct drm_xe_exec_queue_destroy {
+	/** @exec_queue_id: Exec queue ID */
+	__u32 exec_queue_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
 /**
  * struct drm_xe_exec_queue_get_property - Input of &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
  *
@@ -1000,20 +1011,6 @@ struct drm_xe_exec_queue_get_property {
 	__u64 reserved[2];
 };
 
-/**
- * struct drm_xe_exec_queue_destroy - Input of &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
- */
-struct drm_xe_exec_queue_destroy {
-	/** @exec_queue_id: Exec queue ID */
-	__u32 exec_queue_id;
-
-	/** @pad: MBZ */
-	__u32 pad;
-
-	/** @reserved: Reserved */
-	__u64 reserved[2];
-};
-
 /**
  * struct drm_xe_sync - sync object
  *
@@ -1180,6 +1177,17 @@ struct drm_xe_wait_user_fence {
 	/** @reserved: Reserved */
 	__u64 reserved[2];
 };
+
+/**
+ * DOC: uevent generated by xe on it's pci node.
+ *
+ * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
+ * fails. The value supplied with the event is always "NEEDS_RESET".
+ * Additional information supplied is tile id and gt id of the gt unit for
+ * which reset has failed.
+ */
+#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
+
 #if defined(__cplusplus)
 }
 #endif
-- 
GitLab


From 4b437893a826b2f1d15f73e72506349656ea14b2 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 15 Dec 2023 15:45:47 +0000
Subject: [PATCH 2253/2340] drm/xe/uapi: More uAPI documentation additions and
 cosmetic updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No functional change in this patch.

Let's ensure all of our structs are documented and with a certain
standard. Also, let's have an overview and list of IOCTLs as the
very beginning of the generated HTML doc.

v2: Nits (Lucas De Marchi)

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 include/uapi/drm/xe_drm.h | 47 +++++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b62dd51fa8953..5a01d033b7803 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -23,6 +23,27 @@ extern "C" {
  *   5. uEvents
  */
 
+/**
+ * DOC: Xe uAPI Overview
+ *
+ * This section aims to describe the Xe's IOCTL entries, its structs, and other
+ * Xe related uAPI such as uevents and PMU (Platform Monitoring Unit) related
+ * entries and usage.
+ *
+ * List of supported IOCTLs:
+ *  - &DRM_IOCTL_XE_DEVICE_QUERY
+ *  - &DRM_IOCTL_XE_GEM_CREATE
+ *  - &DRM_IOCTL_XE_GEM_MMAP_OFFSET
+ *  - &DRM_IOCTL_XE_VM_CREATE
+ *  - &DRM_IOCTL_XE_VM_DESTROY
+ *  - &DRM_IOCTL_XE_VM_BIND
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ *  - &DRM_IOCTL_XE_EXEC
+ *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
+ */
+
 /*
  * xe specific ioctls.
  *
@@ -56,7 +77,10 @@ extern "C" {
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 
 /**
- * struct drm_xe_user_extension - Base class for defining a chain of extensions
+ * DOC: Xe IOCTL Extensions
+ *
+ * Before detailing the IOCTLs and its structs, it is important to highlight
+ * that every IOCTL in Xe is extensible.
  *
  * Many interfaces need to grow over time. In most cases we can simply
  * extend the struct and have userspace pass in more data. Another option,
@@ -90,7 +114,10 @@ extern "C" {
  * Typically the struct drm_xe_user_extension would be embedded in some uAPI
  * struct, and in this case we would feed it the head of the chain(i.e ext1),
  * which would then apply all of the above extensions.
- *
+*/
+
+/**
+ * struct drm_xe_user_extension - Base class for defining a chain of extensions
  */
 struct drm_xe_user_extension {
 	/**
@@ -120,7 +147,10 @@ struct drm_xe_user_extension {
 };
 
 /**
- * struct drm_xe_ext_set_property - XE set property extension
+ * struct drm_xe_ext_set_property - Generic set property extension
+ *
+ * A generic struct that allows any of the Xe's IOCTL to be extended
+ * with a set_property operation.
  */
 struct drm_xe_ext_set_property {
 	/** @base: base user extension */
@@ -287,7 +317,7 @@ struct drm_xe_mem_region {
 	 * here will always be zero).
 	 */
 	__u64 cpu_visible_used;
-	/** @reserved: MBZ */
+	/** @reserved: Reserved */
 	__u64 reserved[6];
 };
 
@@ -1041,8 +1071,8 @@ struct drm_xe_sync {
 		__u32 handle;
 
 		/**
-		 * @addr: Address of user fence. When sync passed in via exec
-		 * IOCTL this a GPU address in the VM. When sync passed in via
+		 * @addr: Address of user fence. When sync is passed in via exec
+		 * IOCTL this is a GPU address in the VM. When sync passed in via
 		 * VM bind IOCTL this is a user pointer. In either case, it is
 		 * the users responsibility that this address is present and
 		 * mapped when the user fence is signalled. Must be qword
@@ -1051,7 +1081,10 @@ struct drm_xe_sync {
 		__u64 addr;
 	};
 
-	/** @timeline_value: Timeline point of the sync object */
+	/**
+	 * @timeline_value: Input for the timeline sync object. Needs to be
+	 * different than 0 when used with %DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ.
+	 */
 	__u64 timeline_value;
 
 	/** @reserved: Reserved */
-- 
GitLab


From 535881a8c50b79085327e7dbe26a4c55f3e1591b Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 15 Dec 2023 15:45:48 +0000
Subject: [PATCH 2254/2340] drm/xe/uapi: Document the memory_region bitmask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The uAPI should stay generic in regarding to the bitmask. It is
the userspace responsibility to check for the type/class of the
memory, without any assumption.

Also add comments inside the code to explain how it is actually
constructed so we don't accidentally change the assignment of
the instance and the masks.

No functional change in this patch. It only explains and document
the memory_region masks. A further follow-up work with the
organization of all memory regions around struct xe_mem_regions
is desired, but not part of this patch.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_query.c | 19 +++++++++++++++++++
 include/uapi/drm/xe_drm.h     | 23 ++++++++++++++++++-----
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 56d61bf596b2b..9b35673b286c8 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -266,6 +266,11 @@ static int query_mem_regions(struct xe_device *xe,
 
 	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
 	mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
+	/*
+	 * The instance needs to be a unique number that represents the index
+	 * in the placement mask used at xe_gem_create_ioctl() for the
+	 * xe_bo_create() placement.
+	 */
 	mem_regions->mem_regions[0].instance = 0;
 	mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
 	mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
@@ -381,6 +386,20 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 		gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id;
 		gt_list->gt_list[id].gt_id = gt->info.id;
 		gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
+		/*
+		 * The mem_regions indexes in the mask below need to
+		 * directly identify the struct
+		 * drm_xe_query_mem_regions' instance constructed at
+		 * query_mem_regions()
+		 *
+		 * For our current platforms:
+		 * Bit 0 -> System Memory
+		 * Bit 1 -> VRAM0 on Tile0
+		 * Bit 2 -> VRAM1 on Tile1
+		 * However the uAPI is generic and it's userspace's
+		 * responsibility to check the mem_class, without any
+		 * assumption.
+		 */
 		if (!IS_DGFX(xe))
 			gt_list->gt_list[id].near_mem_regions = 0x1;
 		else
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 5a01d033b7803..6c719ba8fc8e1 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -256,10 +256,9 @@ struct drm_xe_mem_region {
 	 */
 	__u16 mem_class;
 	/**
-	 * @instance: The instance for this region.
-	 *
-	 * The @mem_class and @instance taken together will always give
-	 * a unique pair.
+	 * @instance: The unique ID for this region, which serves as the
+	 * index in the placement bitmask used as argument for
+	 * &DRM_IOCTL_XE_GEM_CREATE
 	 */
 	__u16 instance;
 	/**
@@ -404,6 +403,10 @@ struct drm_xe_gt {
 	 * @near_mem_regions: Bit mask of instances from
 	 * drm_xe_query_mem_regions that are nearest to the current engines
 	 * of this GT.
+	 * Each index in this mask refers directly to the struct
+	 * drm_xe_query_mem_regions' instance, no assumptions should
+	 * be made about order. The type of each region is described
+	 * by struct drm_xe_query_mem_regions' mem_class.
 	 */
 	__u64 near_mem_regions;
 	/**
@@ -412,6 +415,10 @@ struct drm_xe_gt {
 	 * In general, they have extra indirections when compared to the
 	 * @near_mem_regions. For a discrete device this could mean system
 	 * memory and memory living in a different tile.
+	 * Each index in this mask refers directly to the struct
+	 * drm_xe_query_mem_regions' instance, no assumptions should
+	 * be made about order. The type of each region is described
+	 * by struct drm_xe_query_mem_regions' mem_class.
 	 */
 	__u64 far_mem_regions;
 	/** @reserved: Reserved */
@@ -652,7 +659,13 @@ struct drm_xe_gem_create {
 	 */
 	__u64 size;
 
-	/** @placement: A mask of memory instances of where BO can be placed. */
+	/**
+	 * @placement: A mask of memory instances of where BO can be placed.
+	 * Each index in this mask refers directly to the struct
+	 * drm_xe_query_mem_regions' instance, no assumptions should
+	 * be made about order. The type of each region is described
+	 * by struct drm_xe_query_mem_regions' mem_class.
+	 */
 	__u32 placement;
 
 #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING		(1 << 0)
-- 
GitLab


From 33c6fda687a37ef871ca04adf2e05ffc646e3b13 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:49 +0000
Subject: [PATCH 2255/2340] drm/xe/uapi: Add block diagram of a device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to make proper use the uAPI, a prerequisite is to understand
some key concepts about the discrete GPU devices which are supported
by the Xe driver. For example, some structs defined in the uAPI are an
abstraction of a hardware component with a specific role.

This diagram helps to build a mental representation of a device how it
is seen by the Xe driver. As written in the documentation, it does not
intend to be a literal representation of an existing device. A lot
more information could be added but the intention for the overview is
to keep it simple, and go into detail as needed in other sections.

v2: Add GT1 inside Tile0 (José Roberto de Souza)

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 6c719ba8fc8e1..4b5d415432807 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -23,6 +23,45 @@ extern "C" {
  *   5. uEvents
  */
 
+/**
+ * DOC: Xe Device Block Diagram
+ *
+ * The diagram below represents a high-level simplification of a discrete
+ * GPU supported by the Xe driver. It shows some device components which
+ * are necessary to understand this API, as well as how their relations
+ * to each other. This diagram does not represent real hardware::
+ *
+ *   ┌──────────────────────────────────────────────────────────────────┐
+ *   │ ┌──────────────────────────────────────────────────┐ ┌─────────┐ │
+ *   │ │        ┌───────────────────────┐   ┌─────┐       │ │ ┌─────┐ │ │
+ *   │ │        │         VRAM0         ├───┤ ... │       │ │ │VRAM1│ │ │
+ *   │ │        └───────────┬───────────┘   └─GT1─┘       │ │ └──┬──┘ │ │
+ *   │ │ ┌──────────────────┴───────────────────────────┐ │ │ ┌──┴──┐ │ │
+ *   │ │ │ ┌─────────────────────┐  ┌─────────────────┐ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │RCS0 │ │BCS0 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │VCS0 │ │VCS1 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │VECS0│ │VECS1│ │ │ │ │ │ ... │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │CCS0 │ │CCS1 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ └─────────DSS─────────┘  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │                          │ │CCS2 │ │CCS3 │ │ │ │ │ │     │ │ │
+ *   │ │ │ ┌─────┐ ┌─────┐ ┌─────┐  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ... │ │ ... │ │ ... │  │                 │ │ │ │ │     │ │ │
+ *   │ │ │ └─DSS─┘ └─DSS─┘ └─DSS─┘  └─────Engines─────┘ │ │ │ │     │ │ │
+ *   │ │ └───────────────────────────GT0────────────────┘ │ │ └─GT2─┘ │ │
+ *   │ └────────────────────────────Tile0─────────────────┘ └─ Tile1──┘ │
+ *   └─────────────────────────────Device0───────┬──────────────────────┘
+ *                                               │
+ *                        ───────────────────────┴────────── PCI bus
+ */
+
 /**
  * DOC: Xe uAPI Overview
  *
-- 
GitLab


From db35331176f93125cc4bfa0d05283688607200f5 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:50 +0000
Subject: [PATCH 2256/2340] drm/xe/uapi: Add examples of user space code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Complete the documentation of some structs by adding functional
examples of user space code. Those examples are intentionally kept
very simple. Put together, they provide a foundation for a minimal
application that executes a job using the Xe driver.

v2: Remove use of DRM_XE_VM_BIND_FLAG_ASYNC (Francois Dugast)

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 84 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 4b5d415432807..5240653eeefd7 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -951,6 +951,30 @@ struct drm_xe_vm_bind_op {
 
 /**
  * struct drm_xe_vm_bind - Input of &DRM_IOCTL_XE_VM_BIND
+ *
+ * Below is an example of a minimal use of @drm_xe_vm_bind to
+ * asynchronously bind the buffer `data` at address `BIND_ADDRESS` to
+ * illustrate `userptr`. It can be synchronized by using the example
+ * provided for @drm_xe_sync.
+ *
+ * .. code-block:: C
+ *
+ *     data = aligned_alloc(ALIGNMENT, BO_SIZE);
+ *     struct drm_xe_vm_bind bind = {
+ *         .vm_id = vm,
+ *         .num_binds = 1,
+ *         .bind.obj = 0,
+ *         .bind.obj_offset = to_user_pointer(data),
+ *         .bind.range = BO_SIZE,
+ *         .bind.addr = BIND_ADDRESS,
+ *         .bind.op = DRM_XE_VM_BIND_OP_MAP_USERPTR,
+ *         .bind.flags = 0,
+ *         .num_syncs = 1,
+ *         .syncs = &sync,
+ *         .exec_queue_id = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
+ *
  */
 struct drm_xe_vm_bind {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -1012,6 +1036,25 @@ struct drm_xe_vm_bind {
 
 /**
  * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ *
+ * The example below shows how to use @drm_xe_exec_queue_create to create
+ * a simple exec_queue (no parallel submission) of class
+ * &DRM_XE_ENGINE_CLASS_RENDER.
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_engine_class_instance instance = {
+ *         .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
+ *     };
+ *     struct drm_xe_exec_queue_create exec_queue_create = {
+ *          .extensions = 0,
+ *          .vm_id = vm,
+ *          .num_bb_per_exec = 1,
+ *          .num_eng_per_bb = 1,
+ *          .instances = to_user_pointer(&instance),
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create);
+ *
  */
 struct drm_xe_exec_queue_create {
 #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY		0
@@ -1103,6 +1146,30 @@ struct drm_xe_exec_queue_get_property {
  *
  * and the @flags can be:
  *  - %DRM_XE_SYNC_FLAG_SIGNAL
+ *
+ * A minimal use of @drm_xe_sync looks like this:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_sync sync = {
+ *         .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ *         .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ *     };
+ *     struct drm_syncobj_create syncobj_create = { 0 };
+ *     ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create);
+ *     sync.handle = syncobj_create.handle;
+ *         ...
+ *         use of &sync in drm_xe_exec or drm_xe_vm_bind
+ *         ...
+ *     struct drm_syncobj_wait wait = {
+ *         .handles = &sync.handle,
+ *         .timeout_nsec = INT64_MAX,
+ *         .count_handles = 1,
+ *         .flags = 0,
+ *         .first_signaled = 0,
+ *         .pad = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
  */
 struct drm_xe_sync {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -1145,6 +1212,23 @@ struct drm_xe_sync {
 
 /**
  * struct drm_xe_exec - Input of &DRM_IOCTL_XE_EXEC
+ *
+ * This is an example to use @drm_xe_exec for execution of the object
+ * at BIND_ADDRESS (see example in @drm_xe_vm_bind) by an exec_queue
+ * (see example in @drm_xe_exec_queue_create). It can be synchronized
+ * by using the example provided for @drm_xe_sync.
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_exec exec = {
+ *         .exec_queue_id = exec_queue,
+ *         .syncs = &sync,
+ *         .num_syncs = 1,
+ *         .address = BIND_ADDRESS,
+ *         .num_batch_buffer = 1,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
+ *
  */
 struct drm_xe_exec {
 	/** @extensions: Pointer to the first extension struct, if any */
-- 
GitLab


From 0bf90a8c223759564964d4a1ecd44608876ab02d Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:51 +0000
Subject: [PATCH 2257/2340] drm/xe/uapi: Move CPU_CACHING defines before doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move those defines to align on the rule used elsewhere in the file which
was introduced by commit 4f082f2c3a37 ("drm/xe: Move defines before
relevant fields").

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 5240653eeefd7..8a69abea0725f 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -733,12 +733,12 @@ struct drm_xe_gem_create {
 	 */
 	__u32 handle;
 
+#define DRM_XE_GEM_CPU_CACHING_WB                      1
+#define DRM_XE_GEM_CPU_CACHING_WC                      2
 	/**
 	 * @cpu_caching: The CPU caching mode to select for this object. If
 	 * mmaping the object the mode selected here will also be used.
 	 */
-#define DRM_XE_GEM_CPU_CACHING_WB                      1
-#define DRM_XE_GEM_CPU_CACHING_WC                      2
 	__u16 cpu_caching;
 	/** @pad: MBZ */
 	__u16 pad[3];
-- 
GitLab


From 9f7ceec2cd25e7aea31cd0630b6fcf439770e322 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 15 Dec 2023 15:45:52 +0000
Subject: [PATCH 2258/2340] drm/xe/uapi: Move DRM_XE_ACC_GRANULARITY_* where
 they are used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bring those defines close to the context where they can be used. Also
apply indentation as it is done for other subsets of defines.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 8a69abea0725f..919aa72c44816 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1020,20 +1020,6 @@ struct drm_xe_vm_bind {
 	__u64 reserved[2];
 };
 
-/* For use with DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */
-
-/* Monitor 128KB contiguous region with 4K sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_128K 0
-
-/* Monitor 2MB contiguous region with 64KB sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_2M 1
-
-/* Monitor 16MB contiguous region with 512KB sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_16M 2
-
-/* Monitor 64MB contiguous region with 2M sub-granularity */
-#define DRM_XE_ACC_GRANULARITY_64M 3
-
 /**
  * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
  *
@@ -1066,6 +1052,14 @@ struct drm_xe_exec_queue_create {
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
 #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY	7
+/* Monitor 128KB contiguous region with 4K sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_128K				0
+/* Monitor 2MB contiguous region with 64KB sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_2M				1
+/* Monitor 16MB contiguous region with 512KB sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_16M				2
+/* Monitor 64MB contiguous region with 2M sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_64M				3
 
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
-- 
GitLab


From 77a0d4d1cea2140ef56929ab1cfa5e525772c90e Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 15 Dec 2023 15:45:53 +0000
Subject: [PATCH 2259/2340] drm/xe/uapi: Remove reset uevent for now
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This kernel uevent is getting removed for now. It will come
back later with a better future proof name.

v2: Rebase (Francois Dugast)

Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Mateusz Naklicki <mateusz.naklicki@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 18 ------------------
 include/uapi/drm/xe_drm.h  | 11 -----------
 2 files changed, 29 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index f5d18e98f8b6c..3af2adec12956 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -589,20 +589,6 @@ static int do_gt_restart(struct xe_gt *gt)
 	return 0;
 }
 
-static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_id)
-{
-	char *reset_event[4];
-
-	reset_event[0] = DRM_XE_RESET_FAILED_UEVENT "=NEEDS_RESET";
-	reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id);
-	reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id);
-	reset_event[3] = NULL;
-	kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, reset_event);
-
-	kfree(reset_event[1]);
-	kfree(reset_event[2]);
-}
-
 static int gt_reset(struct xe_gt *gt)
 {
 	int err;
@@ -659,10 +645,6 @@ err_msg:
 err_fail:
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
-	/* Notify userspace about gt reset failure */
-	xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev),
-				   gt_to_tile(gt)->id, gt->info.id);
-
 	gt_to_xe(gt)->needs_flr_on_fini = true;
 
 	return err;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 919aa72c44816..9fa3ae324731a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -20,7 +20,6 @@ extern "C" {
  *   2. Extension definition and helper structs
  *   3. IOCTL's Query structs in the order of the Query's entries.
  *   4. The rest of IOCTL structs in the order of IOCTL declaration.
- *   5. uEvents
  */
 
 /**
@@ -1341,16 +1340,6 @@ struct drm_xe_wait_user_fence {
 	__u64 reserved[2];
 };
 
-/**
- * DOC: uevent generated by xe on it's pci node.
- *
- * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt
- * fails. The value supplied with the event is always "NEEDS_RESET".
- * Additional information supplied is tile id and gt id of the gt unit for
- * which reset has failed.
- */
-#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS"
-
 #if defined(__cplusplus)
 }
 #endif
-- 
GitLab


From d9e41171e513b594470f81a97d26fe0f06f1fbd3 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Mon, 11 Dec 2023 11:30:56 -0500
Subject: [PATCH 2260/2340] MAINTAINERS: Updates to Intel DRM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce the Maintainers of the new drm/xe driver for upcoming
Intel GPUs.

Since it has a shared display with drm/i915, let's also create a
dedicated block to group display related files. But without any
substantial change to the i915 side. The display patches will
continue to flow through i915 from drm-intel-next branches for now.

Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Oded Gabbay <ogabbay@kernel.org>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 MAINTAINERS | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d4b46b3db022f..8b5e3c27bca6f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10640,7 +10640,17 @@ L:	linux-kernel@vger.kernel.org
 S:	Supported
 F:	arch/x86/include/asm/intel-family.h
 
-INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
+INTEL DRM DISPLAY FOR XE AND I915 DRIVERS
+M:	Jani Nikula <jani.nikula@linux.intel.com>
+M:	Rodrigo Vivi <rodrigo.vivi@intel.com>
+L:	intel-gfx@lists.freedesktop.org
+L:	intel-xe@lists.freedesktop.org
+S:	Supported
+F:	drivers/gpu/drm/i915/display/
+F:	drivers/gpu/drm/xe/display/
+F:	drivers/gpu/drm/xe/compat-i915-headers
+
+INTEL DRM I915 DRIVER (Meteor Lake, DG2 and older excluding Poulsbo, Moorestown and derivative)
 M:	Jani Nikula <jani.nikula@linux.intel.com>
 M:	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
 M:	Rodrigo Vivi <rodrigo.vivi@intel.com>
@@ -10659,6 +10669,23 @@ F:	drivers/gpu/drm/i915/
 F:	include/drm/i915*
 F:	include/uapi/drm/i915_drm.h
 
+INTEL DRM XE DRIVER (Lunar Lake and newer)
+M:	Lucas De Marchi <lucas.demarchi@intel.com>
+M:	Oded Gabbay <ogabbay@kernel.org>
+M:	Thomas Hellström <thomas.hellstrom@linux.intel.com>
+L:	intel-xe@lists.freedesktop.org
+S:	Supported
+W:	https://drm.pages.freedesktop.org/intel-docs/
+Q:	http://patchwork.freedesktop.org/project/intel-xe/
+B:	https://gitlab.freedesktop.org/drm/xe/kernel/-/issues
+C:	irc://irc.oftc.net/xe
+T:	git https://gitlab.freedesktop.org/drm/xe/kernel.git
+F:	Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+F:	Documentation/gpu/xe/
+F:	drivers/gpu/drm/xe/
+F:	include/drm/xe*
+F:	include/uapi/drm/xe_drm.h
+
 INTEL ETHERNET DRIVERS
 M:	Jesse Brandeburg <jesse.brandeburg@intel.com>
 M:	Tony Nguyen <anthony.l.nguyen@intel.com>
-- 
GitLab


From e157f0f76258f11920fd5859a8ac1473a8ce5340 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Mon, 18 Dec 2023 08:47:02 -0500
Subject: [PATCH 2261/2340] drm/xe: Fix build without CONFIG_FAULT_INJECTION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ideally this header could be included without the CONFIG_FAULT_INJECTION
and it would take care itself for the includes it needs.
So, let's temporary workaround this by moving this below and including
only when CONFIG_FAULT_INJECTION is selected to avoid build breakages.

Another solution would be us including the linux/types.h as well, but
this creates unnecessary cases.

Reference: https://lore.kernel.org/all/20230816134748.979231-1-himal.prasad.ghimiray@intel.com/
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Oded Gabbay <ogabbay@kernel.org>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c | 2 +-
 drivers/gpu/drm/xe/xe_gt.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index f1e80be8b9306..c56fd7d59f057 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -5,7 +5,6 @@
 
 #include "xe_debugfs.h"
 
-#include <linux/fault-inject.h>
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
@@ -22,6 +21,7 @@
 #endif
 
 #ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */
 DECLARE_FAULT_ATTR(gt_reset_failure);
 #endif
 
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index f3c780bd266dd..4486e083f5eff 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -7,7 +7,6 @@
 #define _XE_GT_H_
 
 #include <drm/drm_util.h>
-#include <linux/fault-inject.h>
 
 #include "xe_device_types.h"
 #include "xe_hw_engine.h"
@@ -20,6 +19,7 @@
 #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
 
 #ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */
 extern struct fault_attr gt_reset_failure;
 static inline bool xe_fault_inject_gt_reset(void)
 {
-- 
GitLab


From de991b9af0532a05d5206c065bf343d6a767a9d2 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 19 Dec 2023 12:00:20 -0800
Subject: [PATCH 2262/2340] drm/xe: Remove ci-only GuC FW definitions

As part of the FW definitions, we declare each blob as required via the
MODULE_FIRMWARE() macro. This causes the initramfs update (or equivalent
process) to look for the blobs on disk when the kernel is installed;
therefore, we need to make sure that all FWs we define are available in
linux-firmware.

We currently don't plan to push the PVC blob to linux-firmware, while the
LNL one will only be pushed once we have machines in CI to test it, so we
need to remove them from the list for now.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 73d6938c921d9..9dff96dfe4559 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -102,9 +102,7 @@ struct fw_blobs_by_type {
 };
 
 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
-	fw_def(LUNARLAKE,	mmp_ver(xe,	guc,	lnl,	70, 6, 8))	\
 	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 7))		\
-	fw_def(PVC,		mmp_ver(xe,	guc,	pvc,	70, 9, 1))	\
 	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
 	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
 	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
-- 
GitLab


From b6e1b708176846248c87318786d22465ac96dd2c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Wed, 20 Dec 2023 08:19:23 -0800
Subject: [PATCH 2263/2340] drm/xe: Remove uninitialized variable from warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"err" is not initialized when failing to create and add the freq0 sysfs
file. Remove it from the message. This fixes the following warning with
clang:

	../drivers/gpu/drm/xe/xe_gt_freq.c:202:30: error: variable 'err' is uninitialized when used here [-Werror,-Wuninitialized]
				 kobject_name(gt->sysfs), err);
							  ^~~

Fixes: bef52b5c7a19 ("drm/xe: Create a xe_gt_freq component for raw management and sysfs")
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20231220161923.3740489-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_freq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 08eabcafe7bcc..3adfa6686e7cf 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -198,8 +198,8 @@ void xe_gt_freq_init(struct xe_gt *gt)
 
 	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
 	if (!gt->freq) {
-		drm_warn(&xe->drm, "failed to add freq0 directory to %s, err: %d\n",
-			 kobject_name(gt->sysfs), err);
+		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
+			 kobject_name(gt->sysfs));
 		return;
 	}
 
-- 
GitLab


From ea97a66a221893fb9b4d96688e759d1db2d6e683 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 21 Dec 2023 14:28:04 -0800
Subject: [PATCH 2264/2340] drm/xe: Disable 32bits build

Add a dependency on CONFIG_64BIT since currently the xe driver doesn't
build on 32bits. It may be enabled again after all the issues are fixed.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231221222809.4123220-2-lucas.demarchi@intel.com
---
 drivers/gpu/drm/xe/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 90c676d14c509..1cced50d8d8c9 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_XE
 	tristate "Intel Xe Graphics"
-	depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
+	depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT
 	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
-- 
GitLab


From cb4daf271302d71a6b9a7c01bd0b6d76febd8f0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Dec 2023 10:16:24 +0200
Subject: [PATCH 2265/2340] drm: Don't unref the same fb many times by mistake
 due to deadlock handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we get a deadlock after the fb lookup in drm_mode_page_flip_ioctl()
we proceed to unref the fb and then retry the whole thing from the top.
But we forget to reset the fb pointer back to NULL, and so if we then
get another error during the retry, before the fb lookup, we proceed
the unref the same fb again without having gotten another reference.
The end result is that the fb will (eventually) end up being freed
while it's still in use.

Reset fb to NULL once we've unreffed it to avoid doing it again
until we've done another fb lookup.

This turned out to be pretty easy to hit on a DG2 when doing async
flips (and CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y). The first symptom I
saw that drm_closefb() simply got stuck in a busy loop while walking
the framebuffer list. Fortunately I was able to convince it to oops
instead, and from there it was easier to track down the culprit.

Cc: stable@vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211081625.25704-1-ville.syrjala@linux.intel.com
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
---
 drivers/gpu/drm/drm_plane.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 9e8e4c60983d6..672c655c7a8e7 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -1503,6 +1503,7 @@ retry:
 out:
 	if (fb)
 		drm_framebuffer_put(fb);
+	fb = NULL;
 	if (plane->old_fb)
 		drm_framebuffer_put(plane->old_fb);
 	plane->old_fb = NULL;
-- 
GitLab


From 971740a4c3ac2692a8adb958d5f810c47f07e9b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Dec 2023 10:16:25 +0200
Subject: [PATCH 2266/2340] drm: Warn when freeing a framebuffer that's still
 on a list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sprinkle some extra WARNs around so that we might catch
premature framebuffer destruction more readily.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231211081625.25704-2-ville.syrjala@linux.intel.com
Acked-by: Javier Martinez Canillas <javierm@redhat.com>
---
 drivers/gpu/drm/drm_framebuffer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 3cc0ffc28e862..888aadb6a4acb 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -461,6 +461,7 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id,
 
 		INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn);
 		INIT_LIST_HEAD(&arg.fbs);
+		drm_WARN_ON(dev, !list_empty(&fb->filp_head));
 		list_add_tail(&fb->filp_head, &arg.fbs);
 
 		schedule_work(&arg.work);
@@ -827,6 +828,8 @@ void drm_framebuffer_free(struct kref *kref)
 			container_of(kref, struct drm_framebuffer, base.refcount);
 	struct drm_device *dev = fb->dev;
 
+	drm_WARN_ON(dev, !list_empty(&fb->filp_head));
+
 	/*
 	 * The lookup idr holds a weak reference, which has not necessarily been
 	 * removed at this point. Check for that.
@@ -1119,7 +1122,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 
 	dev = fb->dev;
 
-	WARN_ON(!list_empty(&fb->filp_head));
+	drm_WARN_ON(dev, !list_empty(&fb->filp_head));
 
 	/*
 	 * drm ABI mandates that we remove any deleted framebuffers from active
-- 
GitLab


From 3ee348eb36f14e9303a7e9757efb91b0bbf3f7a9 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Sun, 17 Dec 2023 16:44:15 +0800
Subject: [PATCH 2267/2340] drm/rockchip: vop2: Avoid use regmap_reinit_cache
 at runtime

Marek Report a possible irq lock inversion dependency warning when
commit 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync()
to fix suspend/resume"") lands linux-next.

I can reproduce this warning with:
CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_LOCKDEP=y

It seems than when use regmap_reinit_cache at runtime whith Mark's
commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
register cache"), it will trigger a possible irq lock inversion dependency
warning.

One solution is switch back to REGCACHE_RBTREE, but it seems that
REGCACHE_MAPLE is the future, so I avoid using regmap_reinit_cache,
and drop all the regcache when vop is disabled, then we get a fresh
start at next enbable time.

Fixes: 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume"")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Closes: https://lore.kernel.org/all/98a9f15d-30ac-47bf-9b93-3aa2c9900f7b@samsung.com/
Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
[dropped the large kernel log of the lockdep report from the message]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231217084415.2373043-1-andyshrk@163.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 44508c2dd6145..760e11c1be4a8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -959,12 +959,6 @@ static void vop2_enable(struct vop2 *vop2)
 		return;
 	}
 
-	ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
-	if (ret) {
-		drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
-		return;
-	}
-
 	if (vop2->data->soc_id == 3566)
 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
 
@@ -996,6 +990,8 @@ static void vop2_disable(struct vop2 *vop2)
 
 	pm_runtime_put_sync(vop2->dev);
 
+	regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register);
+
 	clk_disable_unprepare(vop2->pclk);
 	clk_disable_unprepare(vop2->aclk);
 	clk_disable_unprepare(vop2->hclk);
-- 
GitLab


From f40e61eb538d35661d6dda1de92867954d776c4a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 19 Dec 2023 14:26:35 +0800
Subject: [PATCH 2268/2340] drm/rockchip: vop2: clean up some inconsistent
 indenting

No functional modification involved.

drivers/gpu/drm/rockchip/rockchip_drm_vop2.c:1708 rk3588_calc_cru_cfg() warn: inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7778
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20231219062635.100718-1-jiapeng.chong@linux.alibaba.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index 760e11c1be4a8..574103fc79f98 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1701,8 +1701,8 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id,
 		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
 		 * *if_dclk_div = dclk_rate / if_dclk_rate;
 		 */
-		 *if_pixclk_div = 2;
-		 *if_dclk_div = 4;
+		*if_pixclk_div = 2;
+		*if_dclk_div = 4;
 	} else if (vop2_output_if_is_edp(id)) {
 		/*
 		 * edp_pixclk = edp_dclk > dclk_core
-- 
GitLab


From 75cbe49f9e2f71a73fed0b677d8d7ff1ffbeaa45 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Fri, 15 Dec 2023 15:02:03 -0800
Subject: [PATCH 2269/2340] drm/xe: Fix UBSAN splat in add_preempt_fences()

add_preempt_fences() calls dma_resv_reserve_fences() with num_fences ==
0 resulting in the below UBSAN splat. Short circuit add_preempt_fences()
if num_fences == 0.

[   58.652241] ================================================================================
[   58.660736] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13
[   58.667281] shift exponent 64 is too large for 64-bit type 'long unsigned int'
[   58.674539] CPU: 2 PID: 1170 Comm: xe_gpgpu_fill Not tainted 6.6.0-rc3-guc+ #630
[   58.674545] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020
[   58.674547] Call Trace:
[   58.674548]  <TASK>
[   58.674550]  dump_stack_lvl+0x92/0xb0
[   58.674555]  __ubsan_handle_shift_out_of_bounds+0x15a/0x300
[   58.674559]  ? rcu_is_watching+0x12/0x60
[   58.674564]  ? software_resume+0x141/0x210
[   58.674575]  ? new_vma+0x44b/0x600 [xe]
[   58.674606]  dma_resv_reserve_fences.cold+0x40/0x66
[   58.674612]  new_vma+0x4b3/0x600 [xe]
[   58.674638]  xe_vm_bind_ioctl+0xffd/0x1e00 [xe]
[   58.674663]  ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe]
[   58.674680]  drm_ioctl_kernel+0xc1/0x170
[   58.674686]  ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe]
[   58.674703]  drm_ioctl+0x247/0x4c0
[   58.674709]  ? find_held_lock+0x2b/0x80
[   58.674716]  __x64_sys_ioctl+0x8c/0xb0
[   58.674720]  do_syscall_64+0x3c/0x90
[   58.674723]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
[   58.674727] RIP: 0033:0x7fce4bd1aaff
[   58.674730] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00
[   58.674731] RSP: 002b:00007ffc57434050 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[   58.674734] RAX: ffffffffffffffda RBX: 00007ffc574340e0 RCX: 00007fce4bd1aaff
[   58.674736] RDX: 00007ffc574340e0 RSI: 0000000040886445 RDI: 0000000000000003
[   58.674737] RBP: 0000000040886445 R08: 0000000000000002 R09: 00007ffc574341b0
[   58.674739] R10: 000055de43eb3780 R11: 0000000000000246 R12: 00007ffc574340e0
[   58.674740] R13: 0000000000000003 R14: 00007ffc574341b0 R15: 0000000000000001
[   58.674747]  </TASK>
[   58.674748] ================================================================================

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20231215230203.719244-1-matthew.brost@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 9180f2d2d71d2..0cfe7289b97ef 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -279,6 +279,9 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
 	struct xe_exec_queue *q;
 	int err;
 
+	if (!vm->preempt.num_exec_queues)
+		return 0;
+
 	err = xe_bo_lock(bo, true);
 	if (err)
 		return err;
-- 
GitLab


From 315acff5196f4e2f84a2a2d093000e0c6b0b4d1c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 18 Dec 2023 08:33:01 -0800
Subject: [PATCH 2270/2340] drm/xe: Fix warning on impossible condition

Having a different value for op is not possible: this is already kept
out of user-visible warning by the check in xe_wait_user_fence_ioctl()
if op > MAX_OP. The warning is useful as if this switch() is not update
when a new op is added, it should be triggered.

Fix warning as reported by 0-DAY CI Kernel:

	drivers/gpu/drm/xe/xe_wait_user_fence.c:46:2: warning: variable 'passed' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized]

Closes: https://lore.kernel.org/oe-kbuild-all/202312170357.KPSinwPs-lkp@intel.com/
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20231218163301.3453285-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wait_user_fence.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index b0a7896f7fcb6..a75eeba7bfe58 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -46,6 +46,7 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
 		break;
 	default:
 		XE_WARN_ON("Not possible");
+		return -EINVAL;
 	}
 
 	return passed ? 0 : 1;
-- 
GitLab


From 200a6b3af05918ddb18832fa4d5a8f15c9dd99e0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 30 Dec 2023 21:48:56 -0800
Subject: [PATCH 2271/2340] drm/gpuvm: fix all kernel-doc warnings in
 include/drm/drm_gpuvm.h

Update kernel-doc comments in <drm/drm_gpuvm.h> to correct all
kernel-doc warnings:

drm_gpuvm.h:148: warning: Excess struct member 'addr' description in 'drm_gpuva'
drm_gpuvm.h:148: warning: Excess struct member 'offset' description in 'drm_gpuva'
drm_gpuvm.h:148: warning: Excess struct member 'obj' description in 'drm_gpuva'
drm_gpuvm.h:148: warning: Excess struct member 'entry' description in 'drm_gpuva'
drm_gpuvm.h:148: warning: Excess struct member '__subtree_last' description in 'drm_gpuva'
drm_gpuvm.h:192: warning: No description found for return value of 'drm_gpuva_invalidated'
drm_gpuvm.h:331: warning: Excess struct member 'tree' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'list' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'list' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'local_list' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'lock' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'list' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'local_list' description in 'drm_gpuvm'
drm_gpuvm.h:331: warning: Excess struct member 'lock' description in 'drm_gpuvm'
drm_gpuvm.h:352: warning: No description found for return value of 'drm_gpuvm_get'
drm_gpuvm.h:545: warning: Excess struct member 'fn' description in 'drm_gpuvm_exec'
drm_gpuvm.h:545: warning: Excess struct member 'priv' description in 'drm_gpuvm_exec'
drm_gpuvm.h:597: warning: missing initial short description on line:
 * drm_gpuvm_exec_resv_add_fence()
drm_gpuvm.h:616: warning: missing initial short description on line:
 * drm_gpuvm_exec_validate()
drm_gpuvm.h:623: warning: No description found for return value of 'drm_gpuvm_exec_validate'
drm_gpuvm.h:698: warning: Excess struct member 'gpuva' description in 'drm_gpuvm_bo'
drm_gpuvm.h:698: warning: Excess struct member 'entry' description in 'drm_gpuvm_bo'
drm_gpuvm.h:698: warning: Excess struct member 'gem' description in 'drm_gpuvm_bo'
drm_gpuvm.h:698: warning: Excess struct member 'evict' description in 'drm_gpuvm_bo'
drm_gpuvm.h:726: warning: No description found for return value of 'drm_gpuvm_bo_get'
drm_gpuvm.h:738: warning: missing initial short description on line:
 * drm_gpuvm_bo_gem_evict()
drm_gpuvm.h:740: warning: missing initial short description on line:
 * drm_gpuvm_bo_gem_evict()
drm_gpuvm.h:698: warning: Excess struct member 'evict' description in 'drm_gpuvm_bo'
drm_gpuvm.h:844: warning: Excess struct member 'addr' description in 'drm_gpuva_op_map'
drm_gpuvm.h:844: warning: Excess struct member 'range' description in 'drm_gpuva_op_map'
drm_gpuvm.h:844: warning: Excess struct member 'offset' description in 'drm_gpuva_op_map'
drm_gpuvm.h:844: warning: Excess struct member 'obj' description in 'drm_gpuva_op_map'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231231054856.31786-1-rdunlap@infradead.org
---
 include/drm/drm_gpuvm.h | 80 +++++++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 6258849382e16..f3fdb810989af 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -92,7 +92,7 @@ struct drm_gpuva {
 	 */
 	struct {
 		/**
-		 * @addr: the start address
+		 * @va.addr: the start address
 		 */
 		u64 addr;
 
@@ -107,17 +107,17 @@ struct drm_gpuva {
 	 */
 	struct {
 		/**
-		 * @offset: the offset within the &drm_gem_object
+		 * @gem.offset: the offset within the &drm_gem_object
 		 */
 		u64 offset;
 
 		/**
-		 * @obj: the mapped &drm_gem_object
+		 * @gem.obj: the mapped &drm_gem_object
 		 */
 		struct drm_gem_object *obj;
 
 		/**
-		 * @entry: the &list_head to attach this object to a &drm_gpuvm_bo
+		 * @gem.entry: the &list_head to attach this object to a &drm_gpuvm_bo
 		 */
 		struct list_head entry;
 	} gem;
@@ -127,12 +127,12 @@ struct drm_gpuva {
 	 */
 	struct {
 		/**
-		 * @rb: the rb-tree node
+		 * @rb.node: the rb-tree node
 		 */
 		struct rb_node node;
 
 		/**
-		 * @entry: The &list_head to additionally connect &drm_gpuvas
+		 * @rb.entry: The &list_head to additionally connect &drm_gpuvas
 		 * in the same order they appear in the interval tree. This is
 		 * useful to keep iterating &drm_gpuvas from a start node found
 		 * through the rb-tree while doing modifications on the rb-tree
@@ -141,7 +141,7 @@ struct drm_gpuva {
 		struct list_head entry;
 
 		/**
-		 * @__subtree_last: needed by the interval tree, holding last-in-subtree
+		 * @rb.__subtree_last: needed by the interval tree, holding last-in-subtree
 		 */
 		u64 __subtree_last;
 	} rb;
@@ -187,6 +187,8 @@ static inline void drm_gpuva_invalidate(struct drm_gpuva *va, bool invalidate)
  * drm_gpuva_invalidated() - indicates whether the backing BO of this &drm_gpuva
  * is invalidated
  * @va: the &drm_gpuva to check
+ *
+ * Returns: %true if the GPU VA is invalidated, %false otherwise
  */
 static inline bool drm_gpuva_invalidated(struct drm_gpuva *va)
 {
@@ -252,12 +254,12 @@ struct drm_gpuvm {
 	 */
 	struct {
 		/**
-		 * @tree: the rb-tree to track GPU VA mappings
+		 * @rb.tree: the rb-tree to track GPU VA mappings
 		 */
 		struct rb_root_cached tree;
 
 		/**
-		 * @list: the &list_head to track GPU VA mappings
+		 * @rb.list: the &list_head to track GPU VA mappings
 		 */
 		struct list_head list;
 	} rb;
@@ -290,19 +292,19 @@ struct drm_gpuvm {
 	 */
 	struct {
 		/**
-		 * @list: &list_head storing &drm_gpuvm_bos serving as
+		 * @extobj.list: &list_head storing &drm_gpuvm_bos serving as
 		 * external object
 		 */
 		struct list_head list;
 
 		/**
-		 * @local_list: pointer to the local list temporarily storing
-		 * entries from the external object list
+		 * @extobj.local_list: pointer to the local list temporarily
+		 * storing entries from the external object list
 		 */
 		struct list_head *local_list;
 
 		/**
-		 * @lock: spinlock to protect the extobj list
+		 * @extobj.lock: spinlock to protect the extobj list
 		 */
 		spinlock_t lock;
 	} extobj;
@@ -312,19 +314,19 @@ struct drm_gpuvm {
 	 */
 	struct {
 		/**
-		 * @list: &list_head storing &drm_gpuvm_bos currently being
-		 * evicted
+		 * @evict.list: &list_head storing &drm_gpuvm_bos currently
+		 * being evicted
 		 */
 		struct list_head list;
 
 		/**
-		 * @local_list: pointer to the local list temporarily storing
-		 * entries from the evicted object list
+		 * @evict.local_list: pointer to the local list temporarily
+		 * storing entries from the evicted object list
 		 */
 		struct list_head *local_list;
 
 		/**
-		 * @lock: spinlock to protect the evict list
+		 * @evict.lock: spinlock to protect the evict list
 		 */
 		spinlock_t lock;
 	} evict;
@@ -344,6 +346,8 @@ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
  *
  * This function acquires an additional reference to @gpuvm. It is illegal to
  * call this without already holding a reference. No locks required.
+ *
+ * Returns: the &struct drm_gpuvm pointer
  */
 static inline struct drm_gpuvm *
 drm_gpuvm_get(struct drm_gpuvm *gpuvm)
@@ -533,12 +537,13 @@ struct drm_gpuvm_exec {
 	 */
 	struct {
 		/**
-		 * @fn: The driver callback to lock additional &drm_gem_objects.
+		 * @extra.fn: The driver callback to lock additional
+		 * &drm_gem_objects.
 		 */
 		int (*fn)(struct drm_gpuvm_exec *vm_exec);
 
 		/**
-		 * @priv: driver private data for the @fn callback
+		 * @extra.priv: driver private data for the @fn callback
 		 */
 		void *priv;
 	} extra;
@@ -589,7 +594,7 @@ void drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm,
 			      enum dma_resv_usage extobj_usage);
 
 /**
- * drm_gpuvm_exec_resv_add_fence()
+ * drm_gpuvm_exec_resv_add_fence() - add fence to private and all extobj
  * @vm_exec: the &drm_gpuvm_exec wrapper
  * @fence: fence to add
  * @private_usage: private dma-resv usage
@@ -608,10 +613,12 @@ drm_gpuvm_exec_resv_add_fence(struct drm_gpuvm_exec *vm_exec,
 }
 
 /**
- * drm_gpuvm_exec_validate()
+ * drm_gpuvm_exec_validate() - validate all BOs marked as evicted
  * @vm_exec: the &drm_gpuvm_exec wrapper
  *
  * See drm_gpuvm_validate().
+ *
+ * Returns: 0 on success, negative error code on failure.
  */
 static inline int
 drm_gpuvm_exec_validate(struct drm_gpuvm_exec *vm_exec)
@@ -664,7 +671,7 @@ struct drm_gpuvm_bo {
 	 */
 	struct {
 		/**
-		 * @gpuva: The list of linked &drm_gpuvas.
+		 * @list.gpuva: The list of linked &drm_gpuvas.
 		 *
 		 * It is safe to access entries from this list as long as the
 		 * GEM's gpuva lock is held. See also struct drm_gem_object.
@@ -672,25 +679,25 @@ struct drm_gpuvm_bo {
 		struct list_head gpuva;
 
 		/**
-		 * @entry: Structure containing all &list_heads serving as
+		 * @list.entry: Structure containing all &list_heads serving as
 		 * entry.
 		 */
 		struct {
 			/**
-			 * @gem: List entry to attach to the &drm_gem_objects
-			 * gpuva list.
+			 * @list.entry.gem: List entry to attach to the
+			 * &drm_gem_objects gpuva list.
 			 */
 			struct list_head gem;
 
 			/**
-			 * @evict: List entry to attach to the &drm_gpuvms
-			 * extobj list.
+			 * @list.entry.evict: List entry to attach to the
+			 * &drm_gpuvms extobj list.
 			 */
 			struct list_head extobj;
 
 			/**
-			 * @evict: List entry to attach to the &drm_gpuvms evict
-			 * list.
+			 * @list.entry.evict: List entry to attach to the
+			 * &drm_gpuvms evict list.
 			 */
 			struct list_head evict;
 		} entry;
@@ -713,6 +720,8 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *vm_bo);
  *
  * This function acquires an additional reference to @vm_bo. It is illegal to
  * call this without already holding a reference. No locks required.
+ *
+ * Returns: the &struct vm_bo pointer
  */
 static inline struct drm_gpuvm_bo *
 drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo)
@@ -730,7 +739,8 @@ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
 void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict);
 
 /**
- * drm_gpuvm_bo_gem_evict()
+ * drm_gpuvm_bo_gem_evict() - add/remove all &drm_gpuvm_bo's in the list
+ * to/from the &drm_gpuvms evicted list
  * @obj: the &drm_gem_object
  * @evict: indicates whether @obj is evicted
  *
@@ -817,12 +827,12 @@ struct drm_gpuva_op_map {
 	 */
 	struct {
 		/**
-		 * @addr: the base address of the new mapping
+		 * @va.addr: the base address of the new mapping
 		 */
 		u64 addr;
 
 		/**
-		 * @range: the range of the new mapping
+		 * @va.range: the range of the new mapping
 		 */
 		u64 range;
 	} va;
@@ -832,12 +842,12 @@ struct drm_gpuva_op_map {
 	 */
 	struct {
 		/**
-		 * @offset: the offset within the &drm_gem_object
+		 * @gem.offset: the offset within the &drm_gem_object
 		 */
 		u64 offset;
 
 		/**
-		 * @obj: the &drm_gem_object to map
+		 * @gem.obj: the &drm_gem_object to map
 		 */
 		struct drm_gem_object *obj;
 	} gem;
-- 
GitLab


From eee706839333ec0643f1b4898a37588025bf4cb5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 30 Dec 2023 21:49:10 -0800
Subject: [PATCH 2272/2340] drm/imagination: pvr_device.h: fix all kernel-doc
 warnings

Correct all kernel-doc notation on pvr_device.h so that there are no
kernel-doc warnings remaining.

pvr_device.h:292: warning: Excess struct member 'active' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'idle' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'work' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'old_kccb_cmds_executed' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'kccb_stall_count' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'ccb' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'rtn_q' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'rtn_obj' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'rtn' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'slot_count' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'reserved_count' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'waiters' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'fence_ctx' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'id' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'seqno' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'active' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'idle' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'work' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'old_kccb_cmds_executed' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'kccb_stall_count' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'ccb' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'rtn_q' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'rtn_obj' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'rtn' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'slot_count' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'reserved_count' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'waiters' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'fence_ctx' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'id' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'seqno' description in 'pvr_device'
pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Frank Binns <frank.binns@imgtec.com>
Cc: Donald Robson <donald.robson@imgtec.com>
Cc: Matt Coster <matt.coster@imgtec.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20231231054910.31805-1-rdunlap@infradead.org
---
 drivers/gpu/drm/imagination/pvr_device.h | 46 +++++++++++++-----------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h
index 2ca7e535799fe..ecdd5767d8ef5 100644
--- a/drivers/gpu/drm/imagination/pvr_device.h
+++ b/drivers/gpu/drm/imagination/pvr_device.h
@@ -193,13 +193,14 @@ struct pvr_device {
 	 * @queues: Queue-related fields.
 	 */
 	struct {
-		/** @active: Active queue list. */
+		/** @queues.active: Active queue list. */
 		struct list_head active;
 
-		/** @idle: Idle queue list. */
+		/** @queues.idle: Idle queue list. */
 		struct list_head idle;
 
-		/** @lock: Lock protecting access to the active/idle lists. */
+		/** @queues.lock: Lock protecting access to the active/idle
+		 *  lists. */
 		struct mutex lock;
 	} queues;
 
@@ -207,18 +208,18 @@ struct pvr_device {
 	 * @watchdog: Watchdog for communications with firmware.
 	 */
 	struct {
-		/** @work: Work item for watchdog callback. */
+		/** @watchdog.work: Work item for watchdog callback. */
 		struct delayed_work work;
 
 		/**
-		 * @old_kccb_cmds_executed: KCCB command execution count at last
-		 * watchdog poll.
+		 * @watchdog.old_kccb_cmds_executed: KCCB command execution
+		 * count at last watchdog poll.
 		 */
 		u32 old_kccb_cmds_executed;
 
 		/**
-		 * @kccb_stall_count: Number of watchdog polls KCCB has been
-		 * stalled for.
+		 * @watchdog.kccb_stall_count: Number of watchdog polls
+		 * KCCB has been stalled for.
 		 */
 		u32 kccb_stall_count;
 	} watchdog;
@@ -227,43 +228,46 @@ struct pvr_device {
 	 * @kccb: Circular buffer for communications with firmware.
 	 */
 	struct {
-		/** @ccb: Kernel CCB. */
+		/** @kccb.ccb: Kernel CCB. */
 		struct pvr_ccb ccb;
 
-		/** @rtn_q: Waitqueue for KCCB command return waiters. */
+		/** @kccb.rtn_q: Waitqueue for KCCB command return waiters. */
 		wait_queue_head_t rtn_q;
 
-		/** @rtn_obj: Object representing KCCB return slots. */
+		/** @kccb.rtn_obj: Object representing KCCB return slots. */
 		struct pvr_fw_object *rtn_obj;
 
 		/**
-		 * @rtn: Pointer to CPU mapping of KCCB return slots. Must be
-		 * accessed by READ_ONCE()/WRITE_ONCE().
+		 * @kccb.rtn: Pointer to CPU mapping of KCCB return slots.
+		 * Must be accessed by READ_ONCE()/WRITE_ONCE().
 		 */
 		u32 *rtn;
 
-		/** @slot_count: Total number of KCCB slots available. */
+		/** @kccb.slot_count: Total number of KCCB slots available. */
 		u32 slot_count;
 
-		/** @reserved_count: Number of KCCB slots reserved for future use. */
+		/** @kccb.reserved_count: Number of KCCB slots reserved for
+		 *  future use. */
 		u32 reserved_count;
 
 		/**
-		 * @waiters: List of KCCB slot waiters.
+		 * @kccb.waiters: List of KCCB slot waiters.
 		 */
 		struct list_head waiters;
 
-		/** @fence_ctx: KCCB fence context. */
+		/** @kccb.fence_ctx: KCCB fence context. */
 		struct {
-			/** @id: KCCB fence context ID allocated with dma_fence_context_alloc(). */
+			/** @kccb.fence_ctx.id: KCCB fence context ID
+			 *  allocated with dma_fence_context_alloc(). */
 			u64 id;
 
-			/** @seqno: Sequence number incremented each time a fence is created. */
+			/** @kccb.fence_ctx.seqno: Sequence number incremented
+			 *  each time a fence is created. */
 			atomic_t seqno;
 
 			/**
-			 * @lock: Lock used to synchronize access to fences allocated by this
-			 * context.
+			 * @kccb.fence_ctx.lock: Lock used to synchronize
+			 * access to fences allocated by this context.
 			 */
 			spinlock_t lock;
 		} fence_ctx;
-- 
GitLab


From 4d23c1be882ecb7fec6894a68c310fff74cc8bba Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 19 Dec 2023 14:19:30 +0000
Subject: [PATCH 2273/2340] drm/amd/display: remove redundant initialization of
 variable remainder

Variable remainder is being initialized with a value that is never read,
the assignment is redundant and can be removed. Also add a newline
after the declaration to clean up the coding style.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/basics/conversion.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index e295a839ab476..1090d235086ac 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -103,7 +103,8 @@ void convert_float_matrix(
 
 static uint32_t find_gcd(uint32_t a, uint32_t b)
 {
-	uint32_t remainder = 0;
+	uint32_t remainder;
+
 	while (b != 0) {
 		remainder = a % b;
 		a = b;
-- 
GitLab


From 5eb8094a9b05ae5b3e49376a6e5a7a004cd0514f Mon Sep 17 00:00:00 2001
From: Mangesh Gadre <Mangesh.Gadre@amd.com>
Date: Tue, 19 Dec 2023 22:59:16 +0800
Subject: [PATCH 2274/2340] drm/amdgpu: Add register read/write debugfs support
 for AID's
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SMN address is larger than 32 bits for registers on different AID's
Updating existing interface to support access to such registers.

Signed-off-by: Mangesh Gadre <Mangesh.Gadre@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Asad Kamal <asad.kamal@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 2cebf2145d9a6..e485dd3357c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	while (size) {
 		uint32_t value;
 
-		value = RREG32_PCIE(*pos);
+		if (upper_32_bits(*pos))
+			value = RREG32_PCIE_EXT(*pos);
+		else
+			value = RREG32_PCIE(*pos);
+
 		r = put_user(value, (uint32_t *)buf);
 		if (r)
 			goto out;
@@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		if (r)
 			goto out;
 
-		WREG32_PCIE(*pos, value);
+		if (upper_32_bits(*pos))
+			WREG32_PCIE_EXT(*pos, value);
+		else
+			WREG32_PCIE(*pos, value);
 
 		result += 4;
 		buf += 4;
-- 
GitLab


From 16927047b396d100a510138bdf9fba65f35b81c2 Mon Sep 17 00:00:00 2001
From: Roman Li <Roman.Li@amd.com>
Date: Tue, 19 Dec 2023 14:57:11 -0500
Subject: [PATCH 2275/2340] drm/amd/display: Disable IPS by default

[Why]
Instability is observed on DCN35 if idle power optimization is enabled.

[How]
Disable IPS until issue is resolved.

Signed-off-by: Roman Li <Roman.Li@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index fc230ecc71805..ddde330860fc0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1712,6 +1712,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
 	init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
 
+	init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+
 	/* Enable DWB for tested platforms only */
 	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
 		init_data.num_virtual_links = 1;
-- 
GitLab


From c71930300fb20d447d19cda2c85037a24a1504ad Mon Sep 17 00:00:00 2001
From: Roman Li <roman.li@amd.com>
Date: Wed, 13 Dec 2023 14:12:16 +0800
Subject: [PATCH 2276/2340] drm/amd/display: enable dcn35 idle power
 optimization

[Why]
Idle power optimization was disabled on dcn35 by default.

[How]
Enable by setting disable_idle_power_optimizations to false.

Signed-off-by: Roman Li <roman.li@amd.com>
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 810cfe2136322..39594e8ffb5ee 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -768,7 +768,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.enable_hpo_pg_support = false,
 	.enable_legacy_fast_update = true,
 	.enable_single_display_2to1_odm_policy = false,
-	.disable_idle_power_optimizations = true,
+	.disable_idle_power_optimizations = false,
 	.dmcub_emulation = false,
 	.disable_boot_optimizations = false,
 	.disable_unbounded_requesting = false,
-- 
GitLab


From a32c6f7f5737cc7e31cd7ad5133f0d96fca12ea6 Mon Sep 17 00:00:00 2001
From: "Stanley.Yang" <Stanley.Yang@amd.com>
Date: Fri, 15 Dec 2023 16:13:23 +0800
Subject: [PATCH 2277/2340] drm/amdgpu: Fix ecc irq enable/disable unpaired

The ecc_irq is disabled while GPU mode2 reset suspending process,
but not be enabled during GPU mode2 reset resume process.

Changed from V1:
	only do sdma/gfx ras_late_init in aldebaran_mode2_restore_ip
	delete amdgpu_ras_late_resume function

Changed from V2:
	check umc ras supported before put ecc_irq

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aldebaran.c | 26 +++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  4 ++++
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++++
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 02f4c6f9d4f68..576067d66bb9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
 {
 	struct list_head *reset_device_list = reset_context->reset_device_list;
 	struct amdgpu_device *tmp_adev = NULL;
+	struct amdgpu_ras *con;
 	int r;
 
 	if (reset_device_list == NULL)
@@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
 		 */
 		amdgpu_register_gpu_instance(tmp_adev);
 
-		/* Resume RAS */
+		/* Resume RAS, ecc_irq */
+		con = amdgpu_ras_get_context(tmp_adev);
+		if (!amdgpu_sriov_vf(tmp_adev) && con) {
+			if (tmp_adev->sdma.ras &&
+				tmp_adev->sdma.ras->ras_block.ras_late_init) {
+				r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->sdma.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+
+			if (tmp_adev->gfx.ras &&
+				tmp_adev->gfx.ras->ras_block.ras_late_init) {
+				r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->gfx.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+		}
+
 		amdgpu_ras_resume(tmp_adev);
 
 		/* Update PSP FW topology after reset */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index a5a05c16c10d7..6c51856088546 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1041,6 +1041,10 @@ static int gmc_v10_0_hw_fini(void *handle)
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 23d7b548d13f4..c9c653cfc765b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle)
 	}
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	gmc_v11_0_gart_disable(adev);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 473a774294cee..f9039d64ff2d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -2380,6 +2380,10 @@ static int gmc_v9_0_hw_fini(void *handle)
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	return 0;
 }
 
-- 
GitLab


From 8a0f02b7beed7b2b768dbdf3b79960de68f460c5 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Mon, 11 Dec 2023 13:20:02 -0500
Subject: [PATCH 2278/2340] drm/amd/display: Fix subvp+drr logic errors

[Why]
There is some logic error where the wrong variable was used to check for
OTG_MASTER and DPP_PIPE.

[How]
Add booleans to confirm that the expected pipes were found before
validating schedulability.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Reviewed-by: Samson Tam <samson.tam@amd.com>
Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 52 +++++++++++--------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 10c890a9cb7c2..cc759b34268bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -806,6 +806,8 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 	int16_t drr_stretched_vblank_us = 0;
 	int16_t max_vblank_mallregion = 0;
 	struct dc_stream_state *phantom_stream;
+	bool subvp_found = false;
+	bool drr_found = false;
 
 	// Find SubVP pipe
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -818,8 +820,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 			continue;
 
 		// Find the SubVP pipe
-		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			subvp_found = true;
 			break;
+		}
 	}
 
 	// Find the DRR pipe
@@ -827,33 +831,37 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 		drr_pipe = &context->res_ctx.pipe_ctx[i];
 
 		// We check for master pipe only
-		if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
-				!resource_is_pipe_type(pipe, DPP_PIPE))
+		if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) ||
+				!resource_is_pipe_type(drr_pipe, DPP_PIPE))
 			continue;
 
 		if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
-				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed))
+				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) {
+			drr_found = true;
 			break;
+		}
 	}
 
-	phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
-	main_timing = &pipe->stream->timing;
-	phantom_timing = &phantom_stream->timing;
-	drr_timing = &drr_pipe->stream->timing;
-	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
-			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
-			dc->caps.subvp_prefetch_end_to_mall_start_us;
-	subvp_active_us = main_timing->v_addressable * main_timing->h_total /
-			(double)(main_timing->pix_clk_100hz * 100) * 1000000;
-	drr_frame_us = drr_timing->v_total * drr_timing->h_total /
-			(double)(drr_timing->pix_clk_100hz * 100) * 1000000;
-	// P-State allow width and FW delays already included phantom_timing->v_addressable
-	mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
-			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
-	stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
-	drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
-			(double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
-	max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+	if (subvp_found && drr_found) {
+		phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
+		main_timing = &pipe->stream->timing;
+		phantom_timing = &phantom_stream->timing;
+		drr_timing = &drr_pipe->stream->timing;
+		prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+				(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+				dc->caps.subvp_prefetch_end_to_mall_start_us;
+		subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+				(double)(main_timing->pix_clk_100hz * 100) * 1000000;
+		drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+				(double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+		// P-State allow width and FW delays already included phantom_timing->v_addressable
+		mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+				(double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+		stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+		drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+				(double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+		max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+	}
 
 	/* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
 	 * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
-- 
GitLab


From ade13d3fc03a17812e4c677ec898f62b2a8e9485 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Mon, 11 Dec 2023 14:46:14 -0500
Subject: [PATCH 2279/2340] drm/amd/display: Don't allow FPO if no planes

In DCN32/321 FPO uses per-pipe P-State force. If there is no plane, then
then HUBP is power gated, in which case any programming in HUBP has no
effect and the pipe is always asserting P-State allow. This is contrary
to what we want to happen for FPO (FW should moderate the P-State
assertion), so block FPO if there's no plane for the FPO pipe.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Reviewed-by: Samson Tam <samson.tam@amd.com>
Reviewed-by: Chaitanya Dhere <chaitanya.dhere@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/dcn32/dcn32_resource_helpers.c  | 18 +++++++++++++++---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c   | 10 +++++++++-
 .../display/dc/resource/dcn32/dcn32_resource.h |  2 +-
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index 1743ebcd6b2ee..cca6436ecdf58 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -524,13 +524,14 @@ static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream)
  *
  * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL
  */
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context)
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context)
 {
 	int refresh_rate = 0;
 	const int minimum_refreshrate_supported = 120;
 	struct dc_stream_state *fpo_candidate_stream = NULL;
 	bool is_fpo_vactive = false;
 	uint32_t fpo_vactive_margin_us = 0;
+	struct dc_stream_status *fpo_stream_status = NULL;
 
 	if (context == NULL)
 		return NULL;
@@ -559,10 +560,21 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
 		DC_FP_END();
 		if (!is_fpo_vactive || dc->debug.disable_fpo_vactive)
 			return NULL;
-	} else
+	} else {
 		fpo_candidate_stream = context->streams[0];
+		if (fpo_candidate_stream)
+			fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
+	}
 
-	if (!fpo_candidate_stream)
+	/* In DCN32/321, FPO uses per-pipe P-State force.
+	 * If there's no planes, HUBP is power gated and
+	 * therefore programming UCLK_PSTATE_FORCE does
+	 * nothing (P-State will always be asserted naturally
+	 * on a pipe that has HUBP power gated. Therefore we
+	 * only want to enable FPO if the FPO pipe has both
+	 * a stream and a plane.
+	 */
+	if (!fpo_candidate_stream || !fpo_stream_status || fpo_stream_status->plane_count == 0)
 		return NULL;
 
 	if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index cc759b34268bb..9f37f717a1f86 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -3465,7 +3465,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (!pipe->stream)
+		/* In DCN32/321, FPO uses per-pipe P-State force.
+		 * If there's no planes, HUBP is power gated and
+		 * therefore programming UCLK_PSTATE_FORCE does
+		 * nothing (P-State will always be asserted naturally
+		 * on a pipe that has HUBP power gated. Therefore we
+		 * only want to enable FPO if the FPO pipe has both
+		 * a stream and a plane.
+		 */
+		if (!pipe->stream || !pipe->plane_state)
 			continue;
 
 		if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index b27fadd452bb8..0c87b0fabba7d 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -163,7 +163,7 @@ void dcn32_determine_det_override(struct dc *dc,
 void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 	display_e2e_pipe_params_st *pipes);
 
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context);
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
 
 bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe);
 
-- 
GitLab


From a71e1310a43ffe47b824aae25ae54f9fcc4daa12 Mon Sep 17 00:00:00 2001
From: Relja Vojvodic <relja.vojvodic@amd.com>
Date: Mon, 11 Dec 2023 18:00:14 -0500
Subject: [PATCH 2280/2340] drm/amd/display: Add more mechanisms for tests

[Why]
More information is desired for the test tools.

[How]
Refactored get_subvp_visual_confirm_color and
get_mclk_switch_visual_confirm_color to support the new method of
storing the p_state type, which was changed so that it could also be
saved and output by the DPM log. Ensured that the p_state type is kept
updated by looping through the pipes within commit_planes_for_stream.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Relja Vojvodic <relja.vojvodic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  |  90 ++++++++-
 drivers/gpu/drm/amd/display/dc/core/dc.c      |  37 ++--
 .../drm/amd/display/dc/core/dc_hw_sequencer.c | 172 +++++++++++-------
 .../drm/amd/display/dc/hwss/hw_sequencer.h    |   9 +-
 .../gpu/drm/amd/display/dc/inc/core_types.h   |  11 ++
 5 files changed, 229 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 95c0b49b531a0..51adb13b3b800 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -25,7 +25,6 @@
 
 #include "dccg.h"
 #include "clk_mgr_internal.h"
-
 #include "dcn32/dcn32_clk_mgr_smu_msg.h"
 #include "dcn20/dcn20_clk_mgr.h"
 #include "dce100/dce_clk_mgr.h"
@@ -34,7 +33,7 @@
 #include "core_types.h"
 #include "dm_helpers.h"
 #include "link.h"
-
+#include "dc_state_priv.h"
 #include "atomfirmware.h"
 #include "smu13_driver_if.h"
 
@@ -458,13 +457,43 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
 	return 0;
 }
 
-static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr)
+static bool dcn32_check_native_scaling(struct pipe_ctx *pipe)
+{
+	bool is_native_scaling = false;
+	int width = pipe->plane_state->src_rect.width;
+	int height = pipe->plane_state->src_rect.height;
+
+	if (pipe->stream->timing.h_addressable == width &&
+			pipe->stream->timing.v_addressable == height &&
+			pipe->plane_state->dst_rect.width == width &&
+			pipe->plane_state->dst_rect.height == height)
+		is_native_scaling = true;
+
+	return is_native_scaling;
+}
+
+static void dcn32_auto_dpm_test_log(
+		struct dc_clocks *new_clocks,
+		struct clk_mgr_internal *clk_mgr,
+		struct dc_state *context)
 {
 	unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg,
-				 fclk_khz_reg;
+				 fclk_khz_reg, mall_ss_size_bytes;
 	int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
 
-	msleep(5);
+	struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+	int active_pipe_count = 0;
+
+	for (int i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+			pipe_ctx_list[active_pipe_count] = pipe_ctx;
+			active_pipe_count++;
+		}
+	}
+
+	mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
 
     dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
     dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
@@ -494,16 +523,49 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr
 	//
 	//				AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
 	////////////////////////////////////////////////////////////////////////////
-	if (new_clocks &&
+	if (new_clocks && active_pipe_count > 0 &&
 		new_clocks->dramclk_khz > 0 &&
 		new_clocks->fclk_khz > 0 &&
 		new_clocks->dcfclk_khz > 0 &&
 		new_clocks->dppclk_khz > 0) {
 
+		uint32_t pix_clk_list[MAX_PIPES] = {0};
+		int p_state_list[MAX_PIPES] = {0};
+		int disp_src_width_list[MAX_PIPES] = {0};
+		int disp_src_height_list[MAX_PIPES] = {0};
+		uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+		bool is_scaled_list[MAX_PIPES] = {0};
+
+		for (int i = 0; i < active_pipe_count; i++) {
+			struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+			uint64_t refresh_rate;
+
+			pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+			p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+			refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+				curr_pipe_ctx->stream->timing.v_total * curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+			disp_src_refresh_list[i] = refresh_rate;
+
+			if (curr_pipe_ctx->plane_state) {
+				is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx));
+				disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+				disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+			}
+		}
+
 		DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
 			"dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
 			"dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
-			"dtbclk_hw:%d - fclk_hw:%d\n",
+			"dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+			"pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+			"p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+			"pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+			"pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+			"pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+			"pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d\n",
 			dramclk_khz_override,
 			fclk_khz_override,
 			new_clocks->dcfclk_khz,
@@ -513,7 +575,14 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr
 			dprefclk_khz_reg,
 			dcfclk_khz_reg,
 			dtbclk_khz_reg,
-			fclk_khz_reg);
+			fclk_khz_reg,
+			pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+			mall_ss_size_bytes,
+			p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+			disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+			disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+			disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+			disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
 	}
 }
 
@@ -686,6 +755,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 		/* DCCG requires KHz precision for DTBCLK */
 		clk_mgr_base->clks.ref_dtbclk_khz =
 				dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
+
 		dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 	}
 
@@ -713,8 +783,8 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 		dmcu->funcs->set_psr_wait_loop(dmcu,
 				clk_mgr_base->clks.dispclk_khz / 1000 / 7);
 
-	if (dc->config.enable_auto_dpm_test_logs && safe_to_lower) {
-	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr);
+	if (dc->config.enable_auto_dpm_test_logs) {
+	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 043913d65b162..d55de3f5115df 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1088,7 +1088,7 @@ static void apply_ctx_interdependent_lock(struct dc *dc,
 	}
 }
 
-static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
 {
 	if (dc->ctx->dce_version >= DCN_VERSION_1_0) {
 		memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color));
@@ -1108,9 +1108,9 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 			if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE)
 				get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 			else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP)
-				get_subvp_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+				get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 			else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH)
-				get_mclk_switch_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+				get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 		}
 	}
 }
@@ -1190,8 +1190,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 				dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (pipe->stream && pipe->plane_state) {
+				set_p_state_switch_method(dc, context, pipe);
+				dc_update_visual_confirm_color(dc, context, pipe);
+			}
 
 			if (dc->hwss.apply_ctx_for_surface) {
 				apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true);
@@ -3377,12 +3379,14 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 			&context->res_ctx,
 			stream);
 
-	if (dc->debug.visual_confirm) {
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe->stream && pipe->plane_state) {
+			set_p_state_switch_method(dc, context, pipe);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (dc->debug.visual_confirm)
+				dc_update_visual_confirm_color(dc, context, pipe);
 		}
 	}
 
@@ -3531,13 +3535,16 @@ static void commit_planes_for_stream(struct dc *dc,
 		}
 	}
 
-	if (dc->debug.visual_confirm)
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+		if (pipe->stream && pipe->plane_state) {
+			set_p_state_switch_method(dc, context, pipe);
+
+			if (dc->debug.visual_confirm)
+				dc_update_visual_confirm_color(dc, context, pipe);
 		}
+	}
 
 	if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
 		struct pipe_ctx *mpcc_pipe;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 48fdfacc413aa..9c05b1a07142f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -426,44 +426,130 @@ void get_hdr_visual_confirm_color(
 }
 
 void get_subvp_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
 {
 	uint32_t color_value = MAX_TG_COLOR_VALUE;
-	bool enable_subvp = false;
-	int i;
-
-	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context)
-		return;
+	if (pipe_ctx) {
+		switch (pipe_ctx->p_state_type) {
+		case P_STATE_SUB_VP:
+			color->color_r_cr = color_value;
+			color->color_g_y  = 0;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_DRR_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_V_BLANK_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		default:
+			break;
+		}
+	}
+}
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+void get_mclk_switch_visual_confirm_color(
+		struct pipe_ctx *pipe_ctx,
+		struct tg_color *color)
+{
+	uint32_t color_value = MAX_TG_COLOR_VALUE;
 
-		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
-			/* SubVP enable - red */
-			color->color_g_y = 0;
+	if (pipe_ctx) {
+		switch (pipe_ctx->p_state_type) {
+		case P_STATE_V_BLANK:
+			color->color_r_cr = color_value;
+			color->color_g_y = color_value;
 			color->color_b_cb = 0;
+			break;
+		case P_STATE_FPO:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = color_value;
+			break;
+		case P_STATE_V_ACTIVE:
 			color->color_r_cr = color_value;
-			enable_subvp = true;
-
-			if (pipe_ctx->stream == pipe->stream)
-				return;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		case P_STATE_SUB_VP:
+			color->color_r_cr = color_value;
+			color->color_g_y  = 0;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_DRR_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_V_BLANK_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		default:
 			break;
 		}
 	}
+}
 
-	if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
-		color->color_r_cr = 0;
-		if (pipe_ctx->stream->allow_freesync == 1) {
-			/* SubVP enable and DRR on - green */
-			color->color_b_cb = 0;
-			color->color_g_y = color_value;
+void set_p_state_switch_method(
+		struct dc *dc,
+		struct dc_state *context,
+		struct pipe_ctx *pipe_ctx)
+{
+	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+	bool enable_subvp;
+
+	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
+		return;
+
+	if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
+			dm_dram_clock_change_unsupported) {
+		/* MCLK switching is supported */
+		if (!pipe_ctx->has_vactive_margin) {
+			/* In Vblank - yellow */
+			pipe_ctx->p_state_type = P_STATE_V_BLANK;
+
+			if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+				/* FPO + Vblank - cyan */
+				pipe_ctx->p_state_type = P_STATE_FPO;
+			}
 		} else {
-			/* SubVP enable and No DRR - blue */
-			color->color_g_y = 0;
-			color->color_b_cb = color_value;
+			/* In Vactive - pink */
+			pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+		}
+
+		/* SubVP */
+		enable_subvp = false;
+
+		for (int i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+			if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) &&
+					dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+				/* SubVP enable - red */
+				pipe_ctx->p_state_type = P_STATE_SUB_VP;
+				enable_subvp = true;
+
+				if (pipe_ctx->stream == pipe->stream)
+					return;
+				break;
+			}
+		}
+
+		if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
+			if (pipe_ctx->stream->allow_freesync == 1) {
+				/* SubVP enable and DRR on - green */
+				pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+			} else {
+				/* SubVP enable and No DRR - blue */
+				pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+			}
 		}
 	}
 }
@@ -815,42 +901,6 @@ void hwss_subvp_save_surf_addr(union block_sequence_params *params)
 	dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index);
 }
 
-void get_mclk_switch_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
-		struct pipe_ctx *pipe_ctx,
-		struct tg_color *color)
-{
-	uint32_t color_value = MAX_TG_COLOR_VALUE;
-	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
-	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
-		return;
-
-	if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
-			dm_dram_clock_change_unsupported) {
-		/* MCLK switching is supported */
-		if (!pipe_ctx->has_vactive_margin) {
-			/* In Vblank - yellow */
-			color->color_r_cr = color_value;
-			color->color_g_y = color_value;
-
-			if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
-				/* FPO + Vblank - cyan */
-				color->color_r_cr = 0;
-				color->color_g_y  = color_value;
-				color->color_b_cb = color_value;
-			}
-		} else {
-			/* In Vactive - pink */
-			color->color_r_cr = color_value;
-			color->color_b_cb = color_value;
-		}
-		/* SubVP */
-		get_subvp_visual_confirm_color(dc, context, pipe_ctx, color);
-	}
-}
-
 void get_surface_tile_visual_confirm_color(
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 6ca8c5488d507..a543993833181 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -455,17 +455,18 @@ void get_mpctree_visual_confirm_color(
 		struct tg_color *color);
 
 void get_subvp_visual_confirm_color(
-	struct dc *dc,
-	struct dc_state *context,
 	struct pipe_ctx *pipe_ctx,
 	struct tg_color *color);
 
 void get_mclk_switch_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color);
 
+void set_p_state_switch_method(
+		struct dc *dc,
+		struct dc_state *context,
+		struct pipe_ctx *pipe_ctx);
+
 void hwss_execute_sequence(struct dc *dc,
 		struct block_sequence block_sequence[],
 		int num_steps);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index c3b973abb89a7..f74ae0d41d3c4 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -381,6 +381,16 @@ union pipe_update_flags {
 	uint32_t raw;
 };
 
+enum p_state_switch_method {
+	P_STATE_UNKNOWN						= 0,
+	P_STATE_V_BLANK						= 1,
+	P_STATE_FPO,
+	P_STATE_V_ACTIVE,
+	P_STATE_SUB_VP,
+	P_STATE_DRR_SUB_VP,
+	P_STATE_V_BLANK_SUB_VP
+};
+
 struct pipe_ctx {
 	struct dc_plane_state *plane_state;
 	struct dc_stream_state *stream;
@@ -429,6 +439,7 @@ struct pipe_ctx {
 	struct dwbc *dwbc;
 	struct mcif_wb *mcif_wb;
 	union pipe_update_flags update_flags;
+	enum p_state_switch_method p_state_type;
 	struct tg_color visual_confirm_color;
 	bool has_vactive_margin;
 	/* subvp_index: only valid if the pipe is a SUBVP_MAIN*/
-- 
GitLab


From 394e850f1ad73c594bf0296c2f601c71517acfdd Mon Sep 17 00:00:00 2001
From: "Leo (Hanghong) Ma" <hanghong.ma@amd.com>
Date: Tue, 28 Nov 2023 12:31:24 -0500
Subject: [PATCH 2281/2340] drm/amd/display: Add HDMI capacity computations
 using fixed31_32

[Why]
Certain HDMI modes failed at dml cap check for uncompressed video but
they can still be supported for compressed video.

[How]
Add HDMI capacity computations using fixed31_32 in dc side.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Chris Park <chris.park@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Leo (Hanghong) Ma <hanghong.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h                   | 2 ++
 drivers/gpu/drm/amd/display/dc/inc/resource.h         | 3 ---
 drivers/gpu/drm/amd/display/dc/link/link_validation.h | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 3ca72c097aa15..c2ee80d6f64c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1462,6 +1462,8 @@ bool dc_acquire_release_mpc_3dlut(
 		struct dc_transfer_func **shaper);
 
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
+void get_audio_check(struct audio_info *aud_modes,
+	struct audio_check *aud_chk);
 
 enum dc_status dc_commit_streams(struct dc *dc,
 				 struct dc_stream_state *streams[],
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index c7a00a28c3b00..c958ef37b78a6 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -573,9 +573,6 @@ void update_audio_usage(
 
 unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format);
 
-void get_audio_check(struct audio_info *aud_modes,
-	struct audio_check *aud_chk);
-
 bool get_temp_dp_link_res(struct dc_link *link,
 		struct link_resource *link_res,
 		struct dc_link_settings *link_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
index 4a954317d0dac..595fb05946e9d 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
@@ -25,6 +25,7 @@
 #ifndef __LINK_VALIDATION_H__
 #define __LINK_VALIDATION_H__
 #include "link.h"
+
 enum dc_status link_validate_mode_timing(
 		const struct dc_stream_state *stream,
 		struct dc_link *link,
-- 
GitLab


From ca1ecae145b20b11ff49062afe6f0bf6707bc244 Mon Sep 17 00:00:00 2001
From: Josip Pavic <josip.pavic@amd.com>
Date: Mon, 11 Dec 2023 17:50:19 -0500
Subject: [PATCH 2282/2340] drm/amd/display: Add null pointer guards where
 needed

[Why]
Some functions whose output is typically checked for null are not being
checked for null at several call sites, causing some static analysis
tools to throw an error.

[How]
Add null pointer guards around functions that typically have them at
other call sites.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Sung Lee <sung.lee@amd.com>
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Josip Pavic <josip.pavic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c          | 5 +++++
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 +++
 drivers/gpu/drm/amd/display/dc/core/dc_stream.c   | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index d55de3f5115df..b7c2eaebf8bfd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3379,6 +3379,9 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 			&context->res_ctx,
 			stream);
 
+	if (!top_pipe_to_program)
+		return;
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
@@ -3978,6 +3981,8 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc,
 	dc->debug.force_disable_subvp = true;
 
 	minimal_transition_context = dc_state_create_copy(base_context);
+	if (!minimal_transition_context)
+		return NULL;
 
 	/* commit minimal state */
 	if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 716b59bd03b65..f2abc1096ffb6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2460,6 +2460,9 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context,
 	struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
 			&context->res_ctx, stream);
 
+	if (!otg_master)
+		return;
+
 	ASSERT(resource_get_odm_slice_count(otg_master) == 1);
 	ASSERT(otg_master->plane_state == NULL);
 	ASSERT(otg_master->stream_res.stream_enc);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index f2b265ed7fc21..54670e0b15189 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -447,7 +447,8 @@ bool dc_stream_add_writeback(struct dc *dc,
 	if (dc->hwss.enable_writeback) {
 		struct dc_stream_status *stream_status = dc_stream_get_status(stream);
 		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
-		dwb->otg_inst = stream_status->primary_otg_inst;
+		if (stream_status)
+			dwb->otg_inst = stream_status->primary_otg_inst;
 	}
 
 	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
-- 
GitLab


From aa5dc05340eb97486a631ce6bccb8d020bf6b56b Mon Sep 17 00:00:00 2001
From: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Date: Mon, 4 Dec 2023 10:57:45 -0500
Subject: [PATCH 2283/2340] drm/amd/display: Fix minor issues in BW Allocation
 Phase2

[Why]
Fix minor issues in BW Allocation Phase2.

[How]
- In set_usb4_req_bw_req(), link->dpia_bw_alloc_config.response_ready
  flag should be reset before writing DPCD REQUEST_BW.
- Fix the granularity for value of 2 in get_bw_granularity().
- Removed bandwidth allocation support display fw boot option as
  the fw would read feature enable status from bios.
- Clean up DPIA_EST_BW_CHANGED and DPIA_BW_REQ_SUCCESS cases in
  dpia_handle_bw_alloc_response().
- Removed allocate_usb4_bw and deallocate_usb4_bw.
- Optimized loop in get_lowest_dpia_index().
- Updated link_dp_dpia_allocate_usb4_bandwidth_for_stream() and
  set_usb4_req_bw_req() to always issue request bw.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: PeiChen Huang <peichen.huang@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../dc/link/protocols/link_dp_dpia_bw.c       | 221 ++++++++----------
 .../dc/link/protocols/link_dp_dpia_bw.h       |   4 +-
 2 files changed, 101 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index 7581023daa478..d6e1f969bfd52 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -50,6 +50,7 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
 			&& tmp->hpd_status
 			&& tmp->dpia_bw_alloc_config.bw_alloc_enabled);
 }
+
 static void reset_bw_alloc_struct(struct dc_link *link)
 {
 	link->dpia_bw_alloc_config.bw_alloc_enabled = false;
@@ -59,6 +60,11 @@ static void reset_bw_alloc_struct(struct dc_link *link)
 	link->dpia_bw_alloc_config.bw_granularity = 0;
 	link->dpia_bw_alloc_config.response_ready = false;
 }
+
+#define BW_GRANULARITY_0 4 // 0.25 Gbps
+#define BW_GRANULARITY_1 2 // 0.5 Gbps
+#define BW_GRANULARITY_2 1 // 1 Gbps
+
 static uint8_t get_bw_granularity(struct dc_link *link)
 {
 	uint8_t bw_granularity = 0;
@@ -71,16 +77,20 @@ static uint8_t get_bw_granularity(struct dc_link *link)
 
 	switch (bw_granularity & 0x3) {
 	case 0:
-		bw_granularity = 4;
+		bw_granularity = BW_GRANULARITY_0;
 		break;
 	case 1:
+		bw_granularity = BW_GRANULARITY_1;
+		break;
+	case 2:
 	default:
-		bw_granularity = 2;
+		bw_granularity = BW_GRANULARITY_2;
 		break;
 	}
 
 	return bw_granularity;
 }
+
 static int get_estimated_bw(struct dc_link *link)
 {
 	uint8_t bw_estimated_bw = 0;
@@ -93,31 +103,7 @@ static int get_estimated_bw(struct dc_link *link)
 
 	return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 }
-static bool allocate_usb4_bw(int *stream_allocated_bw, int bw_needed, struct dc_link *link)
-{
-	if (bw_needed > 0)
-		*stream_allocated_bw += bw_needed;
-
-	return true;
-}
-static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, struct dc_link *link)
-{
-	bool ret = false;
-
-	if (*stream_allocated_bw > 0) {
-		*stream_allocated_bw -= bw_to_dealloc;
-		ret = true;
-	} else {
-		//Do nothing for now
-		ret = true;
-	}
 
-	// Unplug so reset values
-	if (!link->hpd_status)
-		reset_bw_alloc_struct(link);
-
-	return ret;
-}
 /*
  * Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
  * granuality, Driver_ID, CM_Group, & populate the BW allocation structs
@@ -128,7 +114,12 @@ static void init_usb4_bw_struct(struct dc_link *link)
 	// Init the known values
 	link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
 	link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
+
+	DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
+		__func__, link->dpia_bw_alloc_config.bw_granularity,
+		link->dpia_bw_alloc_config.estimated_bw);
 }
+
 static uint8_t get_lowest_dpia_index(struct dc_link *link)
 {
 	const struct dc *dc_struct = link->dc;
@@ -141,12 +132,15 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link)
 				dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
 			continue;
 
-		if (idx > dc_struct->links[i]->link_index)
+		if (idx > dc_struct->links[i]->link_index) {
 			idx = dc_struct->links[i]->link_index;
+			break;
+		}
 	}
 
 	return idx;
 }
+
 /*
  * Get the Max Available BW or Max Estimated BW for each Host Router
  *
@@ -186,6 +180,7 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
 
 	return total_bw;
 }
+
 /*
  * Cleanup function for when the dpia is unplugged to reset struct
  * and perform any required clean up
@@ -194,42 +189,50 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
  *
  * return: none
  */
-static bool dpia_bw_alloc_unplug(struct dc_link *link)
+static void dpia_bw_alloc_unplug(struct dc_link *link)
 {
-	if (!link)
-		return true;
-
-	return deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-			link->dpia_bw_alloc_config.sink_allocated_bw, link);
+	if (link) {
+		DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n",
+			__func__, link->link_index);
+		link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+		reset_bw_alloc_struct(link);
+	}
 }
+
 static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
 {
 	uint8_t requested_bw;
 	uint32_t temp;
 
-	// 1. Add check for this corner case #1
-	if (req_bw > link->dpia_bw_alloc_config.estimated_bw)
+	/* Error check whether request bw greater than allocated */
+	if (req_bw > link->dpia_bw_alloc_config.estimated_bw) {
+		DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n",
+			__func__, link->link_index);
 		req_bw = link->dpia_bw_alloc_config.estimated_bw;
+	}
 
 	temp = req_bw * link->dpia_bw_alloc_config.bw_granularity;
 	requested_bw = temp / Kbps_TO_Gbps;
 
-	// Always make sure to add more to account for floating points
+	/* Always make sure to add more to account for floating points */
 	if (temp % Kbps_TO_Gbps)
 		++requested_bw;
 
-	// 2. Add check for this corner case #2
+	/* Error check whether requested and allocated are equal */
 	req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw)
-		return;
+	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) {
+		DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
+			__func__, link->link_index);
+	}
 
-	if (core_link_write_dpcd(
+	link->dpia_bw_alloc_config.response_ready = false; // Reset flag
+	core_link_write_dpcd(
 		link,
 		REQUESTED_BW,
 		&requested_bw,
-		sizeof(uint8_t)) == DC_OK)
-		link->dpia_bw_alloc_config.response_ready = false; // Reset flag
+		sizeof(uint8_t));
 }
+
 /*
  * Return the response_ready flag from dc_link struct
  *
@@ -241,6 +244,7 @@ static bool get_cm_response_ready_flag(struct dc_link *link)
 {
 	return link->dpia_bw_alloc_config.response_ready;
 }
+
 // ------------------------------------------------------------------
 //					PUBLIC FUNCTIONS
 // ------------------------------------------------------------------
@@ -277,27 +281,27 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link)
 				DPTX_BW_ALLOCATION_MODE_CONTROL,
 				&response,
 				sizeof(uint8_t)) != DC_OK) {
-			DC_LOG_DEBUG("%s: **** FAILURE Enabling DPtx BW Allocation Mode Support ***\n",
-					__func__);
+			DC_LOG_DEBUG("%s: FAILURE Enabling DPtx BW Allocation Mode Support for link(%d)\n",
+				__func__, link->link_index);
 		} else {
 			// SUCCESS Enabled DPtx BW Allocation Mode Support
-			link->dpia_bw_alloc_config.bw_alloc_enabled = true;
-			DC_LOG_DEBUG("%s: **** SUCCESS Enabling DPtx BW Allocation Mode Support ***\n",
-					__func__);
+			DC_LOG_DEBUG("%s: SUCCESS Enabling DPtx BW Allocation Mode Support for link(%d)\n",
+				__func__, link->link_index);
 
 			ret = true;
 			init_usb4_bw_struct(link);
+			link->dpia_bw_alloc_config.bw_alloc_enabled = true;
 		}
 	}
 
 out:
 	return ret;
 }
+
 void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result)
 {
 	int bw_needed = 0;
 	int estimated = 0;
-	int host_router_total_estimated_bw = 0;
 
 	if (!get_bw_alloc_proceed_flag((link)))
 		return;
@@ -306,14 +310,22 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 
 	case DPIA_BW_REQ_FAILED:
 
-		DC_LOG_DEBUG("%s: *** *** BW REQ FAILURE for DP-TX Request *** ***\n", __func__);
+		/*
+		 * Ideally, we shouldn't run into this case as we always validate available
+		 * bandwidth and request within that limit
+		 */
+		estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 
-		// Update the new Estimated BW value updated by CM
-		link->dpia_bw_alloc_config.estimated_bw =
-				bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+		DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.estimated_bw, estimated);
 
+		/* Update the new Estimated BW value updated by CM */
+		link->dpia_bw_alloc_config.estimated_bw = estimated;
+
+		/* Allocate the previously requested bandwidth */
 		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw);
-		link->dpia_bw_alloc_config.response_ready = false;
 
 		/*
 		 * If FAIL then it is either:
@@ -326,68 +338,34 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 
 	case DPIA_BW_REQ_SUCCESS:
 
-		DC_LOG_DEBUG("%s: *** BW REQ SUCCESS for DP-TX Request ***\n", __func__);
-
-		// 1. SUCCESS 1st time before any Pruning is done
-		// 2. SUCCESS after prev. FAIL before any Pruning is done
-		// 3. SUCCESS after Pruning is done but before enabling link
-
 		bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 
-		// 1.
-		if (!link->dpia_bw_alloc_config.sink_allocated_bw) {
-
-			allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed, link);
-			link->dpia_bw_alloc_config.sink_verified_bw =
-					link->dpia_bw_alloc_config.sink_allocated_bw;
+		DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed);
 
-			// SUCCESS from first attempt
-			if (link->dpia_bw_alloc_config.sink_allocated_bw >
-			link->dpia_bw_alloc_config.sink_max_bw)
-				link->dpia_bw_alloc_config.sink_verified_bw =
-						link->dpia_bw_alloc_config.sink_max_bw;
-		}
-		// 3.
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw) {
-
-			// Find out how much do we need to de-alloc
-			if (link->dpia_bw_alloc_config.sink_allocated_bw > bw_needed)
-				deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-						link->dpia_bw_alloc_config.sink_allocated_bw - bw_needed, link);
-			else
-				allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-						bw_needed - link->dpia_bw_alloc_config.sink_allocated_bw, link);
-		}
-
-		// 4. If this is the 2nd sink then any unused bw will be reallocated to master DPIA
-		// => check if estimated_bw changed
+		link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed;
 
 		link->dpia_bw_alloc_config.response_ready = true;
 		break;
 
 	case DPIA_EST_BW_CHANGED:
 
-		DC_LOG_DEBUG("%s: *** ESTIMATED BW CHANGED for DP-TX Request ***\n", __func__);
-
 		estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-		host_router_total_estimated_bw = get_host_router_total_bw(link, HOST_ROUTER_BW_ESTIMATED);
 
-		// 1. If due to unplug of other sink
-		if (estimated == host_router_total_estimated_bw) {
-			// First update the estimated & max_bw fields
-			if (link->dpia_bw_alloc_config.estimated_bw < estimated)
-				link->dpia_bw_alloc_config.estimated_bw = estimated;
-		}
-		// 2. If due to realloc bw btw 2 dpia due to plug OR realloc unused Bw
-		else {
-			// We lost estimated bw usually due to plug event of other dpia
-			link->dpia_bw_alloc_config.estimated_bw = estimated;
-		}
+		DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.estimated_bw, estimated);
+
+		link->dpia_bw_alloc_config.estimated_bw = estimated;
 		break;
 
 	case DPIA_BW_ALLOC_CAPS_CHANGED:
 
-		DC_LOG_DEBUG("%s: *** BW ALLOC CAPABILITY CHANGED for DP-TX Request ***\n", __func__);
+		DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n",
+			__func__, link->link_index);
 		link->dpia_bw_alloc_config.bw_alloc_enabled = false;
 		break;
 	}
@@ -409,11 +387,11 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
 
 		do {
-			if (!(timeout > 0))
+			if (timeout > 0)
 				timeout--;
 			else
 				break;
-			fsleep(10 * 1000);
+			msleep(10);
 		} while (!get_cm_response_ready_flag(link));
 
 		if (!timeout)
@@ -428,37 +406,36 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 out:
 	return ret;
 }
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
+
+bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
 {
-	int ret = 0;
+	bool ret = false;
 	uint8_t timeout = 10;
 
+	DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
+		__func__, link->link_index, link->hpd_status,
+		link->dpia_bw_alloc_config.sink_allocated_bw, req_bw);
+
 	if (!get_bw_alloc_proceed_flag(link))
 		goto out;
 
-	/*
-	 * Sometimes stream uses same timing parameters as the already
-	 * allocated max sink bw so no need to re-alloc
-	 */
-	if (req_bw != link->dpia_bw_alloc_config.sink_allocated_bw) {
-		set_usb4_req_bw_req(link, req_bw);
-		do {
-			if (!(timeout > 0))
-				timeout--;
-			else
-				break;
-			udelay(10 * 1000);
-		} while (!get_cm_response_ready_flag(link));
+	set_usb4_req_bw_req(link, req_bw);
+	do {
+		if (timeout > 0)
+			timeout--;
+		else
+			break;
+		msleep(10);
+	} while (!get_cm_response_ready_flag(link));
 
-		if (!timeout)
-			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
-	}
+	if (timeout)
+		ret = true;
 
 out:
+	DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret);
 	return ret;
 }
+
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
 {
 	bool ret = true;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 7292690383ae1..981bc4eb6120e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -59,9 +59,9 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link);
  * @link: pointer to the dc_link struct instance
  * @req_bw: Bw requested by the stream
  *
- * return: allocated bw else return 0
+ * return: true if allocated successfully
  */
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
+bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
 
 /*
  * Handle the USB4 BW Allocation related functionality here:
-- 
GitLab


From ee8ed2506603629f2706712a5282921a115a8da6 Mon Sep 17 00:00:00 2001
From: Camille Cho <camille.cho@amd.com>
Date: Thu, 16 Nov 2023 16:19:25 +0800
Subject: [PATCH 2284/2340] drm/amd/display: Correctly restore user_level

[Why]
BL1_PWM_USER_LEVEL is meant for the user brightness level setting from
OS. However, we update it along with other ABM levels to the real PWM
value which could be ABMed.

[How]
Driver to cache and restore the user brightness level setting so that
DMUB can retrieve the last user setting in ABM config initialization.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Anthony Koo <anthony.koo@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Camille Cho <camille.cho@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dce/dce_abm.c               | 4 ++--
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c              | 4 ++--
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c          | 4 ++--
 drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h          | 2 +-
 drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c  | 7 +++++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c    | 7 +++++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c    | 7 +++++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c    | 7 +++++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c    | 7 +++++--
 drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c    | 7 +++++--
 drivers/gpu/drm/amd/display/dc/inc/hw/abm.h                | 2 +-
 drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h         | 1 +
 .../amd/display/dc/link/protocols/link_edp_panel_control.c | 3 +++
 13 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 874b132fe1d78..a6006776333d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -135,7 +135,7 @@ static void dmcu_set_backlight_level(
 			0, 1, 80000);
 }
 
-static void dce_abm_init(struct abm *abm, uint32_t backlight)
+static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
@@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight)
 			BL1_PWM_TARGET_ABM_LEVEL, backlight);
 
 	REG_UPDATE(BL1_PWM_USER_LEVEL,
-			BL1_PWM_USER_LEVEL, backlight);
+			BL1_PWM_USER_LEVEL, user_level);
 
 	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
 			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index 8c5e7f858be38..ccc154b0281c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -57,9 +57,9 @@ static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst
 	return ret;
 }
 
-static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight)
+static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
-	dmub_abm_init(abm, backlight);
+	dmub_abm_init(abm, backlight, user_level);
 }
 
 static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 4cff36351f40a..f9d6a181164aa 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -79,7 +79,7 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
 	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
-void dmub_abm_init(struct abm *abm, uint32_t backlight)
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
 	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
 
@@ -106,7 +106,7 @@ void dmub_abm_init(struct abm *abm, uint32_t backlight)
 			BL1_PWM_TARGET_ABM_LEVEL, backlight);
 
 	REG_UPDATE(BL1_PWM_USER_LEVEL,
-			BL1_PWM_USER_LEVEL, backlight);
+			BL1_PWM_USER_LEVEL, user_level);
 
 	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
 			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
index 07ea6c8d414f3..761685e5b8c91 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
@@ -30,7 +30,7 @@
 
 struct abm_save_restore;
 
-void dmub_abm_init(struct abm *abm, uint32_t backlight);
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level);
 bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask);
 unsigned int dmub_abm_get_current_backlight(struct abm *abm);
 unsigned int dmub_abm_get_target_backlight(struct abm *abm);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 6457099ed5882..fb328cd06cea2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -2593,6 +2593,7 @@ static void init_hw(struct dc *dc)
 	struct dmcu *dmcu;
 	struct dce_hwseq *hws = dc->hwseq;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	bp = dc->ctx->dc_bios;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2642,13 +2643,15 @@ static void init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	abm = dc->res_pool->abm;
 	if (abm != NULL)
-		abm->funcs->abm_init(abm, backlight);
+		abm->funcs->abm_init(abm, backlight, user_level);
 
 	dmcu = dc->res_pool->dmcu;
 	if (dmcu != NULL && abm != NULL)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 1c5e3bb6f0ee4..51dd2ae09b2a6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -1490,6 +1490,7 @@ void dcn10_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	bool   is_optimized_init_done = false;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -1587,12 +1588,14 @@ void dcn10_init_hw(struct dc *dc)
 		for (i = 0; i < dc->link_count; i++) {
 			struct dc_link *link = dc->links[i];
 
-			if (link->panel_cntl)
+			if (link->panel_cntl) {
 				backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+				user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+			}
 		}
 
 		if (abm != NULL)
-			abm->funcs->abm_init(abm, backlight);
+			abm->funcs->abm_init(abm, backlight, user_level);
 
 		if (dmcu != NULL && !dmcu->auto_load_dmcu)
 			dmcu->funcs->dmcu_init(dmcu);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index 327c227a10ead..c34c13e1e0a4e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -476,6 +476,7 @@ void dcn30_init_hw(struct dc *dc)
 	int i;
 	int edp_num;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -612,13 +613,15 @@ void dcn30_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 260860c259f3a..7423880fabb6e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -113,6 +113,7 @@ void dcn31_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	int i;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -224,13 +225,15 @@ void dcn31_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 6f360f008f67a..6c9299c7683df 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -753,6 +753,7 @@ void dcn32_init_hw(struct dc *dc)
 	int i;
 	int edp_num;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -907,13 +908,15 @@ void dcn32_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL && abms[i]->funcs != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index ad710b4036de0..c40f4a06abdbe 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -134,6 +134,7 @@ void dcn35_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	int i;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -280,13 +281,15 @@ void dcn35_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 	if (dc->ctx->dmub_srv) {
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL && abms[i]->funcs != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 9f521cf0fc5a2..3f0161d646755 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -36,7 +36,7 @@ struct abm {
 };
 
 struct abm_funcs {
-	void (*abm_init)(struct abm *abm, uint32_t back_light);
+	void (*abm_init)(struct abm *abm, uint32_t back_light, uint32_t user_level);
 	bool (*set_abm_level)(struct abm *abm, unsigned int abm_level);
 	bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst);
 	bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
index 248adc1705e35..5dcbaa2db964a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -40,6 +40,7 @@ struct panel_cntl_backlight_registers {
 	unsigned int BL_PWM_PERIOD_CNTL;
 	unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
 	unsigned int PANEL_PWRSEQ_REF_DIV2;
+	unsigned int USER_LEVEL;
 };
 
 struct panel_cntl_funcs {
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index bf53a86ea8171..e8de68e62403d 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -529,6 +529,9 @@ bool edp_set_backlight_level(const struct dc_link *link,
 	if (dc_is_embedded_signal(link->connector_signal)) {
 		struct pipe_ctx *pipe_ctx = get_pipe_from_link(link);
 
+		if (link->panel_cntl)
+			link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16;
+
 		if (pipe_ctx) {
 			/* Disable brightness ramping when the display is blanked
 			 * as it can hang the DMCU
-- 
GitLab


From efae5a9eb47b76d5f84c0a0ca2ec95c9ce8a393c Mon Sep 17 00:00:00 2001
From: Wayne Lin <wayne.lin@amd.com>
Date: Mon, 4 Dec 2023 10:09:33 +0800
Subject: [PATCH 2285/2340] drm/amd/display: pbn_div need be updated for
 hotplug event

link_rate sometime will be changed when DP MST connector hotplug, so
pbn_div also need be updated; otherwise, it will mismatch with
link_rate, causes no output in external monitor.

Cc: stable@vger.kernel.org
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Jerry Zuo <jerry.zuo@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Wade Wang <wade.wang@hp.com>
Signed-off-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ddde330860fc0..a144024df97c8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -7005,8 +7005,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 	if (IS_ERR(mst_state))
 		return PTR_ERR(mst_state);
 
-	if (!mst_state->pbn_div.full)
-		mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
+	mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
 
 	if (!state->duplicated) {
 		int max_bpc = conn_state->max_requested_bpc;
-- 
GitLab


From ec9ba4821fa52b5efdbc4cdf0a77497990655231 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Mon, 18 Dec 2023 16:17:23 -0500
Subject: [PATCH 2286/2340] drm/amdgpu: Let KFD sync with VM fences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the rules for amdgpu_sync_resv to let KFD synchronize with VM
fences on page table reservations. This fixes intermittent memory
corruption after evictions when using amdgpu_vm_handle_moved to update
page tables for VM mappings managed through render nodes.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index dcd8c066bc1f5..1b013a44ca99a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
 
 	/* Never sync to VM updates either. */
 	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+	    owner != AMDGPU_FENCE_OWNER_KFD)
 		return false;
 
 	/* Ignore fences depending on the sync mode */
-- 
GitLab


From 09b5bc456c63e3caeb854d492177bbfbe7b1cb22 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Wed, 13 Dec 2023 09:46:50 -0500
Subject: [PATCH 2287/2340] drm/amd/display: Assign stream status for FPO +
 Vactive cases

A new check was added to ensure FPO is not enabled when the FPO pipe has
0 planes. This requires the stream status to check the plane count, but
the stream status was not assigned for FPO + Vactive cases which leads
to FPO not be enabled always.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Relja Vojvodic <relja.vojvodic@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index cca6436ecdf58..f698a7e632910 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -554,7 +554,8 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
 		DC_FP_START();
 		dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream);
 		DC_FP_END();
-
+		if (fpo_candidate_stream)
+			fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
 		DC_FP_START();
 		is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, dc->debug.fpo_vactive_min_active_margin_us);
 		DC_FP_END();
-- 
GitLab


From 4069d43bfecb45811a2ad5dc63326e4227fa5931 Mon Sep 17 00:00:00 2001
From: Relja Vojvodic <relja.vojvodic@amd.com>
Date: Wed, 13 Dec 2023 10:31:06 -0500
Subject: [PATCH 2288/2340] drm/amd/display: Add log end specifier

[Why]
Some debug tools, sometimes wrap around to multiple lines which causes
issues with the DPM test script while it is looking for the logs. Need a
way to tell when the log is finished.

[How]
Added "LOG_END" to the end of the log.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Relja Vojvodic <relja.vojvodic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 51adb13b3b800..aadd07bc68c5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -565,7 +565,7 @@ static void dcn32_auto_dpm_test_log(
 			"pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
 			"pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
 			"pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
-			"pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d\n",
+			"pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
 			dramclk_khz_override,
 			fclk_khz_override,
 			new_clocks->dcfclk_khz,
-- 
GitLab


From 54249f03ab9a7311dad653b449e15c6a939d7732 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Mon, 11 Dec 2023 17:54:57 -0500
Subject: [PATCH 2289/2340] drm/amd/display: Always exit DMCUB idle when called

[Why]
dc->idle_optimizations_allowed may be desynced with the hardware state.

[How]
Make sure we always exit out when dc_dmub_srv_exit_low_power_state is
called by removing the check.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Sung joon Kim <sungjoon.kim@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 1d315f7cdce38..4ce8ac966cc81 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1222,9 +1222,6 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 	if (dc->debug.dmcub_emulation)
 		return;
 
-	if (!dc->idle_optimizations_allowed)
-		return;
-
 	if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
 		return;
 
-- 
GitLab


From 60d5d1e76270bac910f9596799cbd831fe09c489 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Mon, 11 Dec 2023 17:57:24 -0500
Subject: [PATCH 2290/2340] drm/amd/display: Wait forever for DMCUB to wake up

[Why]
If we time out waiting for PMFW to finish the exit sequence and touch
the DMCUB register the system will hang in a hard locked state.

[How]
Pol forever. This covers the case where things take too long but also
enables for debugging to occur since the cores won't be hardlocked.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Sung joon Kim <sungjoon.kim@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 4ce8ac966cc81..7724dcadecba5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1214,10 +1214,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 
 static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 {
-	const uint32_t max_num_polls = 10000;
 	uint32_t allow_state = 0;
 	uint32_t commit_state = 0;
-	int i;
 
 	if (dc->debug.dmcub_emulation)
 		return;
@@ -1244,17 +1242,13 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 				udelay(dc->debug.ips2_entry_delay_us);
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
 
-				for (i = 0; i < max_num_polls; ++i) {
+				for (;;) {
 					commit_state = dc->hwss.get_idle_state(dc);
 					if (commit_state & DMUB_IPS2_COMMIT_MASK)
 						break;
 
 					udelay(1);
-
-					if (dc->debug.disable_timeout)
-						i--;
 				}
-				ASSERT(i < max_num_polls);
 
 				if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
 					ASSERT(0);
@@ -1269,17 +1263,13 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 		dc_dmub_srv_notify_idle(dc, false);
 		if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
-			for (i = 0; i < max_num_polls; ++i) {
+			for (;;) {
 				commit_state = dc->hwss.get_idle_state(dc);
 				if (commit_state & DMUB_IPS1_COMMIT_MASK)
 					break;
 
 				udelay(1);
-
-				if (dc->debug.disable_timeout)
-					i--;
 			}
-			ASSERT(i < max_num_polls);
 		}
 	}
 
-- 
GitLab


From 4b5c5f5ad38b9435518730cc7f8f1e8de9c5cb2f Mon Sep 17 00:00:00 2001
From: Jack Xiao <Jack.Xiao@amd.com>
Date: Tue, 19 Dec 2023 17:10:34 +0800
Subject: [PATCH 2291/2340] drm/amdgpu/gfx11: need acquire mutex before access
 CP_VMID_RESET v2

It's required to take the gfx mutex before access to CP_VMID_RESET,
for there is a race condition with CP firmware to write the register.

v2: add extra code to ensure the mutex releasing is successful.

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 48 +++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index bdcf96df69e6b..2fbcd9765980d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4474,11 +4474,43 @@ static int gfx_v11_0_wait_for_idle(void *handle)
 	return -ETIMEDOUT;
 }
 
+static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+					     int req)
+{
+	u32 i, tmp, val;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* Request with MeId=2, PipeId=0 */
+		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+		if (req) {
+			if (val == tmp)
+				break;
+		} else {
+			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+					    REQUEST, 1);
+
+			/* unlocked or locked by firmware */
+			if (val != tmp)
+				break;
+		}
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int gfx_v11_0_soft_reset(void *handle)
 {
 	u32 grbm_soft_reset = 0;
 	u32 tmp;
-	int i, j, k;
+	int r, i, j, k;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
@@ -4518,6 +4550,13 @@ static int gfx_v11_0_soft_reset(void *handle)
 		}
 	}
 
+	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
+	if (r) {
+		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+		return r;
+	}
+
 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
 
 	// Read CP_VMID_RESET register three times.
@@ -4526,6 +4565,13 @@ static int gfx_v11_0_soft_reset(void *handle)
 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 
+	/* release the gfx mutex */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, 0);
+	if (r) {
+		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+		return r;
+	}
+
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
-- 
GitLab


From 4e7738bcfb6765ca669fdbd2be2f7f6f239ed3e5 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Tue, 12 Dec 2023 12:10:49 -0500
Subject: [PATCH 2292/2340] drm/amd/display: Switch DMCUB notify idle command
 to NO_WAIT

[Why]
Race condition between notification of driver idle and the command being
processed. We could theoretically enter idle between the submission and
the wait for idle that occurs after.

[How]
Switch the notification to NO_WAIT to avoid the RPTR access.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Sung joon Kim <sungjoon.kim@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 7724dcadecba5..dadeaa9c92dd5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1209,7 +1209,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 	}
 
 	/* NOTE: This does not use the "wake" interface since this is part of the wake path. */
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	/* We also do not perform a wait since DMCUB could enter idle after the notification. */
+	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 }
 
 static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
-- 
GitLab


From 59f1622a5f05d948a7c665a458a3dd76ba73015e Mon Sep 17 00:00:00 2001
From: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Date: Tue, 5 Dec 2023 00:01:15 -0500
Subject: [PATCH 2293/2340] drm/amd/display: Add dpia display mode validation
 logic

[Why]
If bandwidth allocation feature is enabled, connection manager wont
limit the dp tunnel bandwidth. So, need to do display mode validation
for streams on dpia links to avoid oversubscription of dp tunnel
bandwidth.

[How]
- To read non reduced link rate and lane count and update
  reported link capability.
- To calculate the bandwidth required for streams of dpia links
  per host router and validate against the allocated bandwidth for
  the host router.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: PeiChen Huang <peichen.huang@amd.com>
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Meenakshikumar Somasundaram <meenakshikumar.somasundaram@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/core/dc_link_exports.c |   2 +-
 drivers/gpu/drm/amd/display/dc/dc.h           |   4 +-
 drivers/gpu/drm/amd/display/dc/dc_dp_types.h  |   6 +
 drivers/gpu/drm/amd/display/dc/dc_types.h     |   2 +
 .../dc/link/protocols/link_dp_dpia_bw.c       | 130 +++++++++++++-----
 5 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index ed94187c2afa2..f365773d57148 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -497,7 +497,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
 	link->dc->link_srv->enable_hpd_filter(link, enable);
 }
 
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
+bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
 {
 	return dc->link_srv->validate_dpia_bandwidth(streams, count);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index c2ee80d6f64c9..565de7428b6ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -2172,11 +2172,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
  *
  * @dc: pointer to dc struct
  * @stream: pointer to all possible streams
- * @num_streams: number of valid DPIA streams
+ * @count: number of valid DPIA streams
  *
  * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
  */
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams,
+bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams,
 		const unsigned int count);
 
 /* Sink Interfaces - A sink corresponds to a display output device */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index eeeeeef4d7173..1cb7765f593aa 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -1377,6 +1377,12 @@ struct dp_trace {
 #ifndef DP_TUNNELING_STATUS
 #define DP_TUNNELING_STATUS				0xE0025 /* 1.4a */
 #endif
+#ifndef DP_TUNNELING_MAX_LINK_RATE
+#define DP_TUNNELING_MAX_LINK_RATE			0xE0028 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_MAX_LANE_COUNT
+#define DP_TUNNELING_MAX_LANE_COUNT			0xE0029 /* 1.4a */
+#endif
 #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
 #define DPTX_BW_ALLOCATION_MODE_CONTROL			0xE0030 /* 1.4a */
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 870cd3932015c..4f276169e05a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1151,6 +1151,8 @@ struct dc_dpia_bw_alloc {
 	int bw_granularity;    // BW Granularity
 	bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
 	bool response_ready;   // Response ready from the CM side
+	uint8_t nrd_max_lane_count; // Non-reduced max lane count
+	uint8_t nrd_max_link_rate; // Non-reduced max link rate
 };
 
 #define MAX_SINKS_PER_LINK 4
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index d6e1f969bfd52..a7aa8c9da868f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -59,6 +59,7 @@ static void reset_bw_alloc_struct(struct dc_link *link)
 	link->dpia_bw_alloc_config.estimated_bw = 0;
 	link->dpia_bw_alloc_config.bw_granularity = 0;
 	link->dpia_bw_alloc_config.response_ready = false;
+	link->dpia_bw_alloc_config.sink_allocated_bw = 0;
 }
 
 #define BW_GRANULARITY_0 4 // 0.25 Gbps
@@ -104,6 +105,32 @@ static int get_estimated_bw(struct dc_link *link)
 	return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 }
 
+static int get_non_reduced_max_link_rate(struct dc_link *link)
+{
+	uint8_t nrd_max_link_rate = 0;
+
+	core_link_read_dpcd(
+			link,
+			DP_TUNNELING_MAX_LINK_RATE,
+			&nrd_max_link_rate,
+			sizeof(uint8_t));
+
+	return nrd_max_link_rate;
+}
+
+static int get_non_reduced_max_lane_count(struct dc_link *link)
+{
+	uint8_t nrd_max_lane_count = 0;
+
+	core_link_read_dpcd(
+			link,
+			DP_TUNNELING_MAX_LANE_COUNT,
+			&nrd_max_lane_count,
+			sizeof(uint8_t));
+
+	return nrd_max_lane_count;
+}
+
 /*
  * Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
  * granuality, Driver_ID, CM_Group, & populate the BW allocation structs
@@ -111,13 +138,20 @@ static int get_estimated_bw(struct dc_link *link)
  */
 static void init_usb4_bw_struct(struct dc_link *link)
 {
-	// Init the known values
+	reset_bw_alloc_struct(link);
+
+	/* init the known values */
 	link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
 	link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
+	link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link);
+	link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link);
 
 	DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
 		__func__, link->dpia_bw_alloc_config.bw_granularity,
 		link->dpia_bw_alloc_config.estimated_bw);
+	DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n",
+		__func__, link->dpia_bw_alloc_config.nrd_max_link_rate,
+		link->dpia_bw_alloc_config.nrd_max_lane_count);
 }
 
 static uint8_t get_lowest_dpia_index(struct dc_link *link)
@@ -142,39 +176,50 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link)
 }
 
 /*
- * Get the Max Available BW or Max Estimated BW for each Host Router
+ * Get the maximum dp tunnel banwidth of host router
  *
- * @link: pointer to the dc_link struct instance
- * @type: ESTIMATD BW or MAX AVAILABLE BW
+ * @dc: pointer to the dc struct instance
+ * @hr_index: host router index
  *
- * return: response_ready flag from dc_link struct
+ * return: host router maximum dp tunnel bandwidth
  */
-static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
+static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index)
 {
-	const struct dc *dc_struct = link->dc;
-	uint8_t lowest_dpia_index = get_lowest_dpia_index(link);
-	uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0;
-	struct dc_link *link_temp;
+	uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]);
+	uint8_t hr_index_temp = 0;
+	struct dc_link *link_dpia_primary, *link_dpia_secondary;
 	int total_bw = 0;
-	int i;
-
-	for (i = 0; i < MAX_PIPES * 2; ++i) {
 
-		if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
-			continue;
+	for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) {
 
-		link_temp = dc_struct->links[i];
-		if (!link_temp || !link_temp->hpd_status)
+		if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
 			continue;
 
-		idx_temp = (link_temp->link_index - lowest_dpia_index) / 2;
-
-		if (idx_temp == idx) {
-
-			if (type == HOST_ROUTER_BW_ESTIMATED)
-				total_bw += link_temp->dpia_bw_alloc_config.estimated_bw;
-			else if (type == HOST_ROUTER_BW_ALLOCATED)
-				total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw;
+		hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2;
+
+		if (hr_index_temp == hr_index) {
+			link_dpia_primary = dc->links[i];
+			link_dpia_secondary = dc->links[i + 1];
+
+			/**
+			 * If BW allocation enabled on both DPIAs, then
+			 * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary)
+			 * otherwise HR BW = Estimated(bw alloc enabled dpia)
+			 */
+			if ((link_dpia_primary->hpd_status &&
+				link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
+				(link_dpia_secondary->hpd_status &&
+				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
+				total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+					link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
+			} else if (link_dpia_primary->hpd_status &&
+					link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
+				total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
+			} else if (link_dpia_secondary->hpd_status &&
+				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) {
+				total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw;
+			}
+			break;
 		}
 	}
 
@@ -194,7 +239,6 @@ static void dpia_bw_alloc_unplug(struct dc_link *link)
 	if (link) {
 		DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n",
 			__func__, link->link_index);
-		link->dpia_bw_alloc_config.sink_allocated_bw = 0;
 		reset_bw_alloc_struct(link);
 	}
 }
@@ -397,7 +441,7 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 		if (!timeout)
 			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
 		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
+			ret = link->dpia_bw_alloc_config.sink_allocated_bw;
 	}
 	//2. Cold Unplug
 	else if (!link->hpd_status)
@@ -439,29 +483,41 @@ out:
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
 {
 	bool ret = true;
-	int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 };
-	uint8_t lowest_dpia_index = 0, dpia_index = 0;
-	uint8_t i;
+	int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0;
+	uint8_t lowest_dpia_index, i, hr_index;
 
 	if (!num_dpias || num_dpias > MAX_DPIA_NUM)
 		return ret;
 
-	//Get total Host Router BW & Validate against each Host Router max BW
+	lowest_dpia_index = get_lowest_dpia_index(link[0]);
+
+	/* get total Host Router BW with granularity for the given modes */
 	for (i = 0; i < num_dpias; ++i) {
+		int granularity_Gbps = 0;
+		int bw_granularity = 0;
 
 		if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled)
 			continue;
 
-		lowest_dpia_index = get_lowest_dpia_index(link[i]);
 		if (link[i]->link_index < lowest_dpia_index)
 			continue;
 
-		dpia_index = (link[i]->link_index - lowest_dpia_index) / 2;
-		bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i];
-		if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) {
+		granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity);
+		bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps +
+				((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0);
 
-			ret = false;
-			break;
+		hr_index = (link[i]->link_index - lowest_dpia_index) / 2;
+		bw_needed_per_hr[hr_index] += bw_granularity;
+	}
+
+	/* validate against each Host Router max BW */
+	for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) {
+		if (bw_needed_per_hr[hr_index]) {
+			host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index);
+			if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) {
+				ret = false;
+				break;
+			}
 		}
 	}
 
-- 
GitLab


From 6b2b782ad6a25734ae847d1659bea3f613dbb563 Mon Sep 17 00:00:00 2001
From: Alvin Lee <alvin.lee2@amd.com>
Date: Tue, 12 Dec 2023 12:17:31 -0500
Subject: [PATCH 2294/2340] drm/amd/display: For FPO and SubVP/DRR configs
 program vmin/max sel

For FPO and SubVP/DRR cases we need to ensure to program
OTG_V_TOTAL_MIN/MAX_SEL, otherwise stretching the vblank in FPO / SubVP
/ DRR cases will not have any effect and we could hit underflow /
corruption.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/core/dc_resource.c | 14 +++++++
 .../display/dc/dcn32/dcn32_resource_helpers.c | 14 -------
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 11 +++---
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   | 37 +++++++++++++++++++
 drivers/gpu/drm/amd/display/dc/inc/resource.h |  3 ++
 .../dc/resource/dcn32/dcn32_resource.c        |  2 +-
 .../dc/resource/dcn32/dcn32_resource.h        |  3 --
 .../dc/resource/dcn321/dcn321_resource.c      |  2 +-
 8 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index f2abc1096ffb6..57f0ddd159239 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -4986,6 +4986,20 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 	return DC_OK;
 }
 
+bool resource_subvp_in_use(struct dc *dc,
+		struct dc_state *context)
+{
+	uint32_t i;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
+			return true;
+	}
+	return false;
+}
+
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream)
 {
 	if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index f698a7e632910..44ba1edb7f538 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -183,20 +183,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 	return true;
 }
 
-bool dcn32_subvp_in_use(struct dc *dc,
-		struct dc_state *context)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
-			return true;
-	}
-	return false;
-}
-
 bool dcn32_mpo_in_use(struct dc_state *context)
 {
 	uint32_t i;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 9f37f717a1f86..aa68d010cbfd2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -33,6 +33,7 @@
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
 #include "dc_state_priv.h"
+#include "resource.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -291,7 +292,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 		/* for subvp + DRR case, if subvp pipes are still present we support pstate */
 		if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
-				dcn32_subvp_in_use(dc, context))
+				resource_subvp_in_use(dc, context))
 			vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
 
 		if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
@@ -2272,7 +2273,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	unsigned int dummy_latency_index = 0;
 	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 	unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-	bool subvp_in_use = dcn32_subvp_in_use(dc, context);
+	bool subvp_active = resource_subvp_in_use(dc, context);
 	unsigned int min_dram_speed_mts_margin;
 	bool need_fclk_lat_as_dummy = false;
 	bool is_subvp_p_drr = false;
@@ -2281,7 +2282,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	dc_assert_fp_enabled();
 
 	/* need to find dummy latency index for subvp */
-	if (subvp_in_use) {
+	if (subvp_active) {
 		/* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
 		if (!pstate_en) {
 			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
@@ -2467,7 +2468,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 				dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
 		}
 
-		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
+		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) {
 			/* find largest table entry that is lower than dram speed,
 			 * but lower than DPM0 still uses DPM0
 			 */
@@ -3527,7 +3528,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb)
 void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context)
 {
 	// WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue)
-	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) &&
+	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) &&
 			dc->dml.soc.num_chans <= 8) {
 		int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 0dfcb3cdcd20c..bc71a9b058fed 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1882,6 +1882,42 @@ static void dcn20_program_pipe(
 	}
 }
 
+static void update_vmin_vmax_fams(struct dc *dc,
+		struct dc_state *context)
+{
+	uint32_t i;
+	struct drr_params params = {0};
+	bool subvp_in_use = resource_subvp_in_use(dc, context);
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+				((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM &&
+				pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) {
+			if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) {
+				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
+
+				/* DRR should be configured already if we're in active variable
+				 * or active fixed, so only program if we're not in this state
+				 */
+				params.vertical_total_min = pipe->stream->timing.v_total;
+				params.vertical_total_max = pipe->stream->timing.v_total;
+				tg->funcs->set_drr(tg, &params);
+			}
+		} else {
+			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+					!pipe->stream->vrr_active_variable &&
+					!pipe->stream->vrr_active_fixed) {
+				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
+				params.vertical_total_min = 0;
+				params.vertical_total_max = 0;
+				tg->funcs->set_drr(tg, &params);
+			}
+		}
+	}
+}
+
 void dcn20_program_front_end_for_ctx(
 		struct dc *dc,
 		struct dc_state *context)
@@ -1958,6 +1994,7 @@ void dcn20_program_front_end_for_ctx(
 				&& context->res_ctx.pipe_ctx[i].stream)
 			hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
 
+	update_vmin_vmax_fams(dc, context);
 
 	/* Disconnect mpcc */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index c958ef37b78a6..1d51fed12e200 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -609,6 +609,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
 		struct pipe_ctx *sec_pipe,
 		bool odm);
 
+bool resource_subvp_in_use(struct dc *dc,
+		struct dc_state *context);
+
 /* A test harness interface that modifies dp encoder resources in the given dc
  * state and bypasses the need to revalidate. The interface assumes that the
  * test harness interface is called with pre-validated link config stored in the
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index c4d71e7f18af4..ac04a9c9a3d86 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1899,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context(
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = dcn32_subvp_in_use,
+	.get_subvp_en = resource_subvp_in_use,
 };
 
 void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 0c87b0fabba7d..62611acd4bcb5 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -131,9 +131,6 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc,
 bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 		struct dc_state *context);
 
-bool dcn32_subvp_in_use(struct dc *dc,
-		struct dc_state *context);
-
 bool dcn32_mpo_in_use(struct dc_state *context);
 
 bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 74412e5f03fef..e1ab207c46f15 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1574,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool)
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = dcn32_subvp_in_use,
+	.get_subvp_en = resource_subvp_in_use,
 };
 
 static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-- 
GitLab


From f6154d8babbb8a98f0d3ea325aafae2e33bfd8be Mon Sep 17 00:00:00 2001
From: Revalla <hrevalla@amd.com>
Date: Wed, 13 Dec 2023 19:56:38 +0530
Subject: [PATCH 2295/2340] drm/amd/display: Refactor INIT into component
 folder

[why]
Move all init files to hwss folder.

[how]
moved the dcnxx_init.c and .h files into inside the hwss and cleared the
linkage errors.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Revalla <hrevalla@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/Makefile       |   2 -
 drivers/gpu/drm/amd/display/dc/dcn10/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn201/Makefile    |   3 +-
 drivers/gpu/drm/amd/display/dc/dcn21/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile |   4 +-
 .../gpu/drm/amd/display/dc/dcn301/Makefile    |   2 +-
 .../gpu/drm/amd/display/dc/dcn302/Makefile    |  12 --
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |   2 +-
 .../gpu/drm/amd/display/dc/dcn314/Makefile    |   3 +-
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/dcn35/Makefile |   2 +-
 drivers/gpu/drm/amd/display/dc/hwss/Makefile  |  28 +--
 .../display/dc/{ => hwss}/dcn10/dcn10_init.c  |   0
 .../display/dc/{ => hwss}/dcn10/dcn10_init.h  |   0
 .../display/dc/{ => hwss}/dcn20/dcn20_init.c  |   0
 .../display/dc/{ => hwss}/dcn20/dcn20_init.h  |   0
 .../dc/{ => hwss}/dcn201/dcn201_init.c        |   0
 .../dc/{ => hwss}/dcn201/dcn201_init.h        |   0
 .../display/dc/{ => hwss}/dcn21/dcn21_init.c  |   0
 .../display/dc/{ => hwss}/dcn21/dcn21_init.h  |   0
 .../display/dc/{ => hwss}/dcn30/dcn30_init.c  |   0
 .../display/dc/{ => hwss}/dcn30/dcn30_init.h  |   0
 .../dc/{ => hwss}/dcn301/dcn301_init.c        |   0
 .../dc/{ => hwss}/dcn301/dcn301_init.h        |   0
 .../dc/{ => hwss}/dcn302/dcn302_init.c        |   0
 .../dc/{ => hwss}/dcn302/dcn302_init.h        |   0
 .../dc/{ => hwss}/dcn303/dcn303_init.c        |   0
 .../dc/{ => hwss}/dcn303/dcn303_init.h        |   0
 .../display/dc/{ => hwss}/dcn31/dcn31_init.c  |   0
 .../display/dc/{ => hwss}/dcn31/dcn31_init.h  |   0
 .../dc/{ => hwss}/dcn314/dcn314_init.c        |   0
 .../dc/{ => hwss}/dcn314/dcn314_init.h        |   0
 .../display/dc/{ => hwss}/dcn32/dcn32_init.c  |   0
 .../display/dc/{ => hwss}/dcn32/dcn32_init.h  |   0
 .../display/dc/{ => hwss}/dcn35/dcn35_init.c  |   0
 .../display/dc/{ => hwss}/dcn35/dcn35_init.h  |   0
 .../amd/display/dc/hwss/dcn351/CMakeLists.txt |   4 +
 .../drm/amd/display/dc/hwss/dcn351/Makefile   |  17 ++
 .../amd/display/dc/hwss/dcn351/dcn351_init.c  | 171 ++++++++++++++++++
 .../amd/display/dc/hwss/dcn351/dcn351_init.h  |  33 ++++
 41 files changed, 250 insertions(+), 41 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn302/Makefile
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn10/dcn10_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn10/dcn10_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn20/dcn20_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn20/dcn20_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn201/dcn201_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn201/dcn201_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn21/dcn21_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn21/dcn21_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn30/dcn30_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn30/dcn30_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn301/dcn301_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn301/dcn301_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn302/dcn302_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn302/dcn302_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn303/dcn303_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn303/dcn303_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn31/dcn31_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn31/dcn31_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn314/dcn314_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn314/dcn314_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn32/dcn32_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn32/dcn32_init.h (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn35/dcn35_init.c (100%)
 rename drivers/gpu/drm/amd/display/dc/{ => hwss}/dcn35/dcn35_init.h (100%)
 create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
 create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
 create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h

diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index bae4fdddbf97c..7991ae468f752 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -34,8 +34,6 @@ DC_LIBS += dcn21
 DC_LIBS += dcn201
 DC_LIBS += dcn30
 DC_LIBS += dcn301
-DC_LIBS += dcn302
-DC_LIBS += dcn303
 DC_LIBS += dcn31
 DC_LIBS += dcn314
 DC_LIBS += dcn32
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 0dd62934a18ce..ae6a131be71b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for DCN.
 
-DCN10 = dcn10_init.o dcn10_ipp.o \
+DCN10 = dcn10_ipp.o \
 		dcn10_hw_sequencer_debug.o \
 		dcn10_dpp.o dcn10_opp.o \
 		dcn10_hubp.o dcn10_mpc.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index bd760442ff893..3dae3943b056c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for DCN.
 
-DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
+DCN20 = dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
 		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index a101e65115550..2b0b4f32e13bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for DCN.
-DCN201 = dcn201_init.o \
-	dcn201_hubbub.o\
+DCN201 = dcn201_hubbub.o\
 	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \
 	dcn201_dccg.o dcn201_link_encoder.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index dd1eea7212f47..ca92f5c8e7fb7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for DCN21.
 
-DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o \
+DCN21 = dcn21_hubp.o dcn21_hubbub.o \
 	 dcn21_link_encoder.o dcn21_dccg.o
 
 AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index cd95f322235e9..b5b2aa3b37839 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -23,9 +23,7 @@
 #
 #
 
-DCN30 := \
-	dcn30_init.o \
-	dcn30_hubbub.o \
+DCN30 := dcn30_hubbub.o \
 	dcn30_hubp.o \
 	dcn30_dpp.o \
 	dcn30_dccg.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 090011300dcdb..d241f665e40ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn30.
 
-DCN301 = dcn301_init.o dcn301_dccg.o \
+DCN301 = dcn301_dccg.o \
 		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o
 
 AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
deleted file mode 100644
index 0fcd03569d74e..0000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-#  Authors: AMD
-#
-# Makefile for dcn302.
-
-DCN3_02 = dcn302_init.o
-
-AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index 11a2662e58ef3..5d93ac16c03a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn31.
 
-DCN31 = dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \
+DCN31 = dcn31_hubbub.o dcn31_hubp.o \
 	dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
 	dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
 	dcn31_afmt.o dcn31_vpg.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index d5c177346a3bf..b134ab05aa712 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -10,8 +10,7 @@
 #
 # Makefile for dcn314.
 
-DCN314 = dcn314_init.o \
-		dcn314_dio_stream_encoder.o dcn314_dccg.o
+DCN314 = dcn314_dio_stream_encoder.o dcn314_dccg.o
 
 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 905b74b530920..5314770fff1c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn32.
 
-DCN32 = dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \
+DCN32 = dcn32_hubbub.o dcn32_dccg.o \
 		dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \
 		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \
 		dcn32_hpo_dp_link_encoder.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
index fa7ec82ae5f58..0e317e0c36a08 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for DCN35.
 
-DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \
+DCN35 = dcn35_dio_stream_encoder.o \
 	dcn35_dio_link_encoder.o dcn35_dccg.o \
 	dcn35_hubp.o dcn35_hubbub.o \
 	dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
index bccd46bd18158..254136f8e3f90 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
@@ -78,7 +78,7 @@ ifdef CONFIG_DRM_AMD_DC_FP
 # DCN
 ###############################################################################
 
-HWSS_DCN10 = dcn10_hwseq.o
+HWSS_DCN10 = dcn10_hwseq.o dcn10_init.o
 
 AMD_DAL_HWSS_DCN10 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn10/,$(HWSS_DCN10))
 
@@ -86,7 +86,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN10)
 
 ###############################################################################
 
-HWSS_DCN20 = dcn20_hwseq.o
+HWSS_DCN20 = dcn20_hwseq.o dcn20_init.o
 
 AMD_DAL_HWSS_DCN20 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn20/,$(HWSS_DCN20))
 
@@ -94,7 +94,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN20)
 
 ###############################################################################
 
-HWSS_DCN201 = dcn201_hwseq.o
+HWSS_DCN201 = dcn201_hwseq.o dcn201_init.o
 
 AMD_DAL_HWSS_DCN201 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn201/,$(HWSS_DCN201))
 
@@ -102,7 +102,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN201)
 
 ###############################################################################
 
-HWSS_DCN21 = dcn21_hwseq.o
+HWSS_DCN21 = dcn21_hwseq.o dcn21_init.o
 
 AMD_DAL_HWSS_DCN21 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn21/,$(HWSS_DCN21))
 
@@ -114,7 +114,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21)
 
 ###############################################################################
 
-HWSS_DCN30 = dcn30_hwseq.o
+HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o
 
 AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30))
 
@@ -122,7 +122,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN30)
 
 ###############################################################################
 
-HWSS_DCN301 = dcn301_hwseq.o
+HWSS_DCN301 = dcn301_hwseq.o dcn301_init.o
 
 AMD_DAL_HWSS_DCN301 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn301/,$(HWSS_DCN301))
 
@@ -130,15 +130,17 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN301)
 
 ###############################################################################
 
-HWSS_DCN302 = dcn302_hwseq.o
+HWSS_DCN302 = dcn302_hwseq.o dcn302_init.o
 
 AMD_DAL_HWSS_DCN302 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn302/,$(HWSS_DCN302))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN302)
 
+
+
 ###############################################################################
 
-HWSS_DCN303 = dcn303_hwseq.o
+HWSS_DCN303 = dcn303_hwseq.o dcn303_init.o
 
 AMD_DAL_HWSS_DCN303 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn303/,$(HWSS_DCN303))
 
@@ -146,7 +148,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN303)
 
 ###############################################################################
 
-HWSS_DCN31 = dcn31_hwseq.o
+HWSS_DCN31 = dcn31_hwseq.o dcn31_init.o
 
 AMD_DAL_HWSS_DCN31 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn31/,$(HWSS_DCN31))
 
@@ -154,7 +156,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN31)
 
 ###############################################################################
 
-HWSS_DCN314 = dcn314_hwseq.o
+HWSS_DCN314 = dcn314_hwseq.o dcn314_init.o
 
 AMD_DAL_HWSS_DCN314 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn314/,$(HWSS_DCN314))
 
@@ -162,7 +164,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN314)
 
 ###############################################################################
 
-HWSS_DCN32 = dcn32_hwseq.o
+HWSS_DCN32 = dcn32_hwseq.o dcn32_init.o
 
 AMD_DAL_HWSS_DCN32 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn32/,$(HWSS_DCN32))
 
@@ -170,7 +172,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN32)
 
 ###############################################################################
 
-HWSS_DCN35 = dcn35_hwseq.o
+HWSS_DCN35 = dcn35_hwseq.o dcn35_init.o
 
 AMD_DAL_HWSS_DCN35 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn35/,$(HWSS_DCN35))
 
@@ -180,4 +182,4 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN35)
 
 ###############################################################################
 
-endif
\ No newline at end of file
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
similarity index 100%
rename from drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h
rename to drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
new file mode 100644
index 0000000000000..951ca2da4486f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
@@ -0,0 +1,4 @@
+dal3_subdirectory_sources(
+  dcn351_init.c
+  dcn351_init.h
+)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
new file mode 100644
index 0000000000000..b24ad27fe6ef0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
@@ -0,0 +1,17 @@
+#
+# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved
+#
+#  All rights reserved.  This notice is intended as a precaution against
+#  inadvertent publication and does not imply publication or any waiver
+#  of confidentiality.  The year included in the foregoing notice is the
+#  year of creation of the work.
+#
+#  Authors: AMD
+#
+# Makefile for DCN351.
+
+DCN351 = dcn351_init.o
+
+AMD_DAL_DCN351 = $(addprefix $(AMDDALPATH)/dc/dcn351/,$(DCN351))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCN351)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
new file mode 100644
index 0000000000000..143d3fc0221cf
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dcn301/dcn301_hwseq.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn35/dcn35_hwseq.h"
+
+#include "dcn351_init.h"
+
+static const struct hw_sequencer_funcs dcn351_funcs = {
+	.program_gamut_remap = dcn30_program_gamut_remap,
+	.init_hw = dcn35_init_hw,
+	.power_down_on_boot = dcn35_power_down_on_boot,
+	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+	.apply_ctx_for_surface = NULL,
+	.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+	.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
+	.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.update_dchub = dcn10_update_dchub,
+	.update_pending_status = dcn10_update_pending_status,
+	.program_output_csc = dcn20_program_output_csc,
+	.enable_accelerated_mode = dce110_enable_accelerated_mode,
+	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
+	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+	.update_info_frame = dcn31_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+	.enable_stream = dcn20_enable_stream,
+	.disable_stream = dce110_disable_stream,
+	.unblank_stream = dcn32_unblank_stream,
+	.blank_stream = dce110_blank_stream,
+	.enable_audio_stream = dce110_enable_audio_stream,
+	.disable_audio_stream = dce110_disable_audio_stream,
+	.disable_plane = dcn35_disable_plane,
+	.disable_pixel_data = dcn20_disable_pixel_data,
+	.pipe_control_lock = dcn20_pipe_control_lock,
+	.interdependent_update_lock = dcn10_lock_all_pipes,
+	.cursor_lock = dcn10_cursor_lock,
+	.prepare_bandwidth = dcn35_prepare_bandwidth,
+	.optimize_bandwidth = dcn35_optimize_bandwidth,
+	.update_bandwidth = dcn20_update_bandwidth,
+	.set_drr = dcn10_set_drr,
+	.get_position = dcn10_get_position,
+	.set_static_screen_control = dcn30_set_static_screen_control,
+	.setup_stereo = dcn10_setup_stereo,
+	.set_avmute = dcn30_set_avmute,
+	.log_hw_state = dcn10_log_hw_state,
+	.get_hw_state = dcn10_get_hw_state,
+	.clear_status_bits = dcn10_clear_status_bits,
+	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.edp_power_control = dce110_edp_power_control,
+	.edp_wait_for_T12 = dce110_edp_wait_for_T12,
+	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.set_clock = dcn10_set_clock,
+	.get_clock = dcn10_get_clock,
+	.program_triplebuffer = dcn20_program_triple_buffer,
+	.enable_writeback = dcn30_enable_writeback,
+	.disable_writeback = dcn30_disable_writeback,
+	.update_writeback = dcn30_update_writeback,
+	.mmhubbub_warmup = dcn30_mmhubbub_warmup,
+	.dmdata_status_done = dcn20_dmdata_status_done,
+	.program_dmdata_engine = dcn30_program_dmdata_engine,
+	.set_dmdata_attributes = dcn20_set_dmdata_attributes,
+	.init_sys_ctx = dcn31_init_sys_ctx,
+	.init_vm_ctx = dcn20_init_vm_ctx,
+	.set_flip_control_gsl = dcn20_set_flip_control_gsl,
+	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+	.calc_vupdate_position = dcn10_calc_vupdate_position,
+	.power_down = dce110_power_down,
+	.set_backlight_level = dcn21_set_backlight_level,
+	.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
+	.set_pipe = dcn21_set_pipe,
+	.enable_lvds_link_output = dce110_enable_lvds_link_output,
+	.enable_tmds_link_output = dce110_enable_tmds_link_output,
+	.enable_dp_link_output = dce110_enable_dp_link_output,
+	.disable_link_output = dcn32_disable_link_output,
+	.z10_restore = dcn35_z10_restore,
+	.z10_save_init = dcn31_z10_save_init,
+	.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+	.optimize_pwr_state = dcn21_optimize_pwr_state,
+	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+	.update_visual_confirm_color = dcn10_update_visual_confirm_color,
+	.apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
+	.update_dsc_pg = dcn32_update_dsc_pg,
+	.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
+	.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
+	.hw_block_power_up = dcn35_hw_block_power_up,
+	.hw_block_power_down = dcn35_hw_block_power_down,
+	.root_clock_control = dcn35_root_clock_control,
+	.set_idle_state = dcn35_set_idle_state,
+	.get_idle_state = dcn35_get_idle_state
+};
+
+static const struct hwseq_private_funcs dcn351_private_funcs = {
+	.init_pipes = dcn35_init_pipes,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+	.update_mpcc = dcn20_update_mpcc,
+	.set_input_transfer_func = dcn32_set_input_transfer_func,
+	.set_output_transfer_func = dcn32_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_power_gating = dcn10_dummy_display_power_gating,
+	.blank_pixel_data = dcn20_blank_pixel_data,
+	.reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap,
+	.enable_stream_timing = dcn20_enable_stream_timing,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+	.did_underflow_occur = dcn10_did_underflow_occur,
+	.init_blank = dcn20_init_blank,
+	.disable_vga = NULL,
+	.bios_golden_init = dcn10_bios_golden_init,
+	.plane_atomic_disable = dcn35_plane_atomic_disable,
+	//.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
+	//.hubp_pg_control = dcn35_hubp_pg_control,
+	.enable_power_gating_plane = dcn35_enable_power_gating_plane,
+	.dpp_root_clock_control = dcn35_dpp_root_clock_control,
+	.program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+	.update_odm = dcn35_update_odm,
+	.set_hdr_multiplier = dcn10_set_hdr_multiplier,
+	.verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+	.wait_for_blank_complete = dcn20_wait_for_blank_complete,
+	.dccg_init = dcn20_dccg_init,
+	.set_mcm_luts = dcn32_set_mcm_luts,
+	.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
+	.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
+	.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
+	.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+	.dsc_pg_control = dcn35_dsc_pg_control,
+	.dsc_pg_status = dcn32_dsc_pg_status,
+	.enable_plane = dcn35_enable_plane,
+};
+
+void dcn351_hw_sequencer_construct(struct dc *dc)
+{
+	dc->hwss = dcn351_funcs;
+	dc->hwseq->funcs = dcn351_private_funcs;
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
new file mode 100644
index 0000000000000..970b01008b238
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN351_INIT_H__
+#define __DC_DCN351_INIT_H__
+
+struct dc;
+
+void dcn351_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN351_INIT_H__ */
-- 
GitLab


From 9ade4870b87b09e1f132ba92c1ab13a6769d1b0f Mon Sep 17 00:00:00 2001
From: Gabe Teeger <gabe.teeger@amd.com>
Date: Thu, 14 Dec 2023 20:56:58 -0500
Subject: [PATCH 2296/2340] drm/amd/display: Fix Mismatch between pipe and
 stream

[Why]
Failing mode validation during dc_commit, leading to blackscreen with an
8k DP2 display during mode change.

[What]
Fix mixmatch between pipe and stream, which prevented us from
recognizing the link as DP2.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Dmytro Laktyushkin <dmytro.laktyushkin@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Gabe Teeger <gabe.teeger@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../display/dc/dml2/dml2_translation_helper.c    | 16 +++++++++++++---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c |  6 +++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index bc2959fda5be3..fa6a93dd96295 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -1049,9 +1049,10 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2,
 
 void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg)
 {
-	int i = 0, j = 0;
+	int i = 0, j = 0, k = 0;
 	int disp_cfg_stream_location, disp_cfg_plane_location;
 	enum mall_stream_type stream_mall_type;
+	struct pipe_ctx *current_pipe_context;
 
 	for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
 		dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false;
@@ -1071,6 +1072,15 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 	dml2_populate_pipe_to_plane_index_mapping(dml2, context);
 
 	for (i = 0; i < context->stream_count; i++) {
+		current_pipe_context = NULL;
+		for (k = 0; k < MAX_PIPES; k++) {
+			/* find one pipe allocated to this stream for the purpose of getting
+			info about the link later */
+			if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) {
+				current_pipe_context = &context->res_ctx.pipe_ctx[k];
+				break;
+			}
+		}
 		disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg);
 		stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]);
 
@@ -1080,7 +1090,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 		ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
 
 		populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]);
-		populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], &context->res_ctx.pipe_ctx[i]);
+		populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context);
 		switch (context->streams[i]->debug.force_odm_combine_segments) {
 		case 2:
 			dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1;
@@ -1137,7 +1147,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 
 				if (j >= 1) {
 					populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]);
-					populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], &context->res_ctx.pipe_ctx[i]);
+					populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context);
 					switch (context->streams[i]->debug.force_odm_combine_segments) {
 					case 2:
 						dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 8c0794af5adee..1068b962d1c12 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -155,12 +155,12 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e
 
 bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 {
+	if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
+		return false;
+
 	/* If this assert is hit then we have a link encoder dynamic management issue */
 	ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
 
-	if (pipe_ctx->stream == NULL)
-		return false;
-
 	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
 	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
 		return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
-- 
GitLab


From b8a204fb1a97b39a7fcaefbf2c6c4d01aa4f3c57 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Thu, 14 Dec 2023 16:46:35 -0500
Subject: [PATCH 2297/2340] drm/amd/display: Verify disallow bits were cleared
 for idle

[Why]
A hang was observed where a read-modify-write access occurred due to the
register for idle state being shared between DMCUB and driver.

dmcub read - idle allow / no commit
driver read - idle allow / no commit
driver write - idle disallow / no commit
dmcub write - idle allow / commit

Resulting in DMCUB re-entering IPS after a disable and keeping the allow
high.

[How]
Long term we need to split commit/allow into two registers or use shared
DRAM state, but short term we can reduce the repro rate by ensuring that
the disallow went through by bounding the expected worst case scenario.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Hansen Dsouza <hansen.dsouza@amd.com>
Reviewed-by: Ovidiu Bunea <ovidiu.bunea@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index dadeaa9c92dd5..a65c485b0b49a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1233,8 +1233,16 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 		if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
 			// Wait for evaluation time
-			udelay(dc->debug.ips2_eval_delay_us);
-			commit_state = dc->hwss.get_idle_state(dc);
+			for (;;) {
+				udelay(dc->debug.ips2_eval_delay_us);
+				commit_state = dc->hwss.get_idle_state(dc);
+				if (commit_state & DMUB_IPS2_ALLOW_MASK)
+					break;
+
+				/* allow was still set, retry eval delay */
+				dc->hwss.set_idle_state(dc, false);
+			}
+
 			if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
 				// Tell PMFW to exit low power state
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
-- 
GitLab


From 292c2116b2ae84c7e799ae340981e60551b18f5e Mon Sep 17 00:00:00 2001
From: Relja Vojvodic <relja.vojvodic@amd.com>
Date: Fri, 15 Dec 2023 18:15:16 -0500
Subject: [PATCH 2298/2340] drm/amd/display: Fixing stream allocation
 regression

For certain dual display configs that had one display using a 1080p
mode, the DPM level used to drive the configs regressed from DPM 0 to
DPM 3. This was caused by a missing check that should have only limited
the pipe segments on non-phantom pipes. This caused issues with detile
buffer allocation, which dissallow subvp from being used

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Dillon Varone <dillon.varone@amd.com>
Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Relja Vojvodic <relja.vojvodic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index 44ba1edb7f538..e4a328b45c8a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -276,7 +276,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-			if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx)) {
+			if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
 					if (pipe_segments[i] > 4)
 						pipe_segments[i] = 4;
-- 
GitLab


From d6398866a6b47e92319ef6efdb0126a4fbb7796a Mon Sep 17 00:00:00 2001
From: Ivan Lipski <ivlipski@amd.com>
Date: Mon, 2 Oct 2023 13:47:54 -0400
Subject: [PATCH 2299/2340] Re-revert "drm/amd/display: Enable Replay for
 static screen use cases"

This reverts commit 44e60b14d5a72f91fd0bdeae8da59ae37a3ca8e5.

Since, it causes a regression in which eDP displays with PSR support,
but no Replay support (Sink support <= 0x03), fail to enable PSR and
consequently all IGT amd_psr tests fail. So, revert this until a more
suitable fix can be found.

This got brought back accidently with the backmerge.

Acked-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Ivan Lipski <ivlipski@amd.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 -------------------
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    |  9 +-------
 drivers/gpu/drm/amd/include/amd_shared.h      |  2 --
 3 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a144024df97c8..d869d0d7bf4c1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -67,7 +67,6 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 #include "amdgpu_dm_psr.h"
-#include "amdgpu_dm_replay.h"
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
@@ -4389,7 +4388,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	enum dc_connection_type new_connection_type = dc_connection_none;
 	const struct dc_plane_cap *plane;
 	bool psr_feature_enabled = false;
-	bool replay_feature_enabled = false;
 	int max_overlay = dm->dc->caps.max_slave_planes;
 
 	dm->display_indexes_num = dm->dc->caps.max_streams;
@@ -4501,20 +4499,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		}
 	}
 
-	if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
-		switch (adev->ip_versions[DCE_HWIP][0]) {
-		case IP_VERSION(3, 1, 4):
-		case IP_VERSION(3, 1, 5):
-		case IP_VERSION(3, 1, 6):
-		case IP_VERSION(3, 2, 0):
-		case IP_VERSION(3, 2, 1):
-			replay_feature_enabled = true;
-			break;
-		default:
-			replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
-			break;
-		}
-	}
 	/* loops over all connectors on the board */
 	for (i = 0; i < link_cnt; i++) {
 		struct dc_link *link = NULL;
@@ -4583,12 +4567,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 				amdgpu_dm_update_connector_after_detect(aconnector);
 				setup_backlight_device(dm, aconnector);
 
-				/*
-				 * Disable psr if replay can be enabled
-				 */
-				if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
-					psr_feature_enabled = false;
-
 				if (psr_feature_enabled)
 					amdgpu_dm_set_psr_caps(link);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 7545a184e43a6..4439e5a27362c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -29,7 +29,6 @@
 #include "dc.h"
 #include "amdgpu.h"
 #include "amdgpu_dm_psr.h"
-#include "amdgpu_dm_replay.h"
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_plane.h"
 #include "amdgpu_dm_trace.h"
@@ -124,12 +123,7 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
 	 * fill_dc_dirty_rects().
 	 */
 	if (vblank_work->stream && vblank_work->stream->link) {
-		/*
-		 * Prioritize replay, instead of psr
-		 */
-		if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
-			amdgpu_dm_replay_enable(vblank_work->stream, false);
-		else if (vblank_work->enable) {
+		if (vblank_work->enable) {
 			if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
 			    vblank_work->stream->link->psr_settings.psr_allow_active)
 				amdgpu_dm_psr_disable(vblank_work->stream);
@@ -138,7 +132,6 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
 			   !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
 #endif
-			   vblank_work->stream->link->panel_config.psr.disallow_replay &&
 			   vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
 			amdgpu_dm_psr_enable(vblank_work->stream);
 		}
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index c739f12fb937d..5cad456f2e10d 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -244,7 +244,6 @@ enum DC_FEATURE_MASK {
 	DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
 	DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
 	DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
-	DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
 };
 
 enum DC_DEBUG_MASK {
@@ -255,7 +254,6 @@ enum DC_DEBUG_MASK {
 	DC_DISABLE_PSR = 0x10,
 	DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
 	DC_DISABLE_MPO = 0x40,
-	DC_DISABLE_REPLAY = 0x50,
 	DC_ENABLE_DPIA_TRACE = 0x80,
 	DC_ENABLE_DML2 = 0x100,
 	DC_DISABLE_PSR_SU = 0x200,
-- 
GitLab


From e379787cbc2aa73c63a795ec55140f9b21c27d8c Mon Sep 17 00:00:00 2001
From: Tom Chung <chiahsuan.chung@amd.com>
Date: Fri, 1 Dec 2023 14:15:45 +0800
Subject: [PATCH 2300/2340] drm/amd/display: Add some functions for Panel
 Replay

[WHY]
Prepare for enabling the Panel Replay feature

[HOW]
- Add some Panel Replay setting functions in DC
- Add the Panel Replay resource in dcn35_resource.c
- Add debug masks for Panel Replay

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Reviewed-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Tom Chung <chiahsuan.chung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c      | 34 +++++++
 .../drm/amd/display/dc/core/dc_link_exports.c |  7 ++
 drivers/gpu/drm/amd/display/dc/dc.h           | 23 +++++
 .../gpu/drm/amd/display/dc/dce/dmub_replay.c  | 96 +++++++++++++++++--
 .../gpu/drm/amd/display/dc/dce/dmub_replay.h  |  2 +
 drivers/gpu/drm/amd/display/dc/inc/link.h     |  2 +
 .../drm/amd/display/dc/link/link_factory.c    |  1 +
 .../link/protocols/link_edp_panel_control.c   | 27 ++++++
 .../link/protocols/link_edp_panel_control.h   |  2 +
 .../dc/resource/dcn35/dcn35_resource.c        | 13 +++
 .../amd/display/modules/power/power_helpers.c | 28 ++++++
 .../amd/display/modules/power/power_helpers.h |  5 +
 drivers/gpu/drm/amd/include/amd_shared.h      |  1 +
 13 files changed, 235 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index b7c2eaebf8bfd..2d7205058c64a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -4770,6 +4770,38 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable)
 	return true;
 }
 
+/* enable/disable eDP Replay without specify stream for eDP */
+bool dc_set_replay_allow_active(struct dc *dc, bool active)
+{
+	int i;
+	bool allow_active;
+
+	for (i = 0; i < dc->current_state->stream_count; i++) {
+		struct dc_link *link;
+		struct dc_stream_state *stream = dc->current_state->streams[i];
+
+		link = stream->link;
+		if (!link)
+			continue;
+
+		if (link->replay_settings.replay_feature_enabled) {
+			if (active && !link->replay_settings.replay_allow_active) {
+				allow_active = true;
+				if (!dc_link_set_replay_allow_active(link, &allow_active,
+					false, false, NULL))
+					return false;
+			} else if (!active && link->replay_settings.replay_allow_active) {
+				allow_active = false;
+				if (!dc_link_set_replay_allow_active(link, &allow_active,
+					true, false, NULL))
+					return false;
+			}
+		}
+	}
+
+	return true;
+}
+
 void dc_allow_idle_optimizations(struct dc *dc, bool allow)
 {
 	if (dc->debug.disable_idle_power_optimizations)
@@ -5315,6 +5347,8 @@ bool dc_abm_save_restore(
 	struct dc_link *link = stream->sink->link;
 	struct dc_link *edp_links[MAX_NUM_EDP];
 
+	if (link->replay_settings.replay_feature_enabled)
+		return false;
 
 	/*find primary pipe associated with stream*/
 	for (i = 0; i < MAX_PIPES; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index f365773d57148..c6c35037bdb8b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -467,6 +467,13 @@ bool dc_link_setup_psr(struct dc_link *link,
 	return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context);
 }
 
+bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active,
+		bool wait, bool force_static, const unsigned int *power_opts)
+{
+	return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait,
+			force_static, power_opts);
+}
+
 bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state)
 {
 	return link->dc->link_srv->edp_get_replay_state(link, state);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 565de7428b6ca..f30a341bc0901 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -463,6 +463,12 @@ enum dml_hostvm_override_opts {
 	DML_HOSTVM_OVERRIDE_TRUE = 0x2,
 };
 
+enum dc_replay_power_opts {
+	replay_power_opt_invalid		= 0x0,
+	replay_power_opt_smu_opt_static_screen	= 0x1,
+	replay_power_opt_z10_static_screen	= 0x10,
+};
+
 enum dcc_option {
 	DCC_ENABLE = 0,
 	DCC_DISABLE = 1,
@@ -2077,6 +2083,20 @@ bool dc_link_setup_psr(struct dc_link *dc_link,
 		const struct dc_stream_state *stream, struct psr_config *psr_config,
 		struct psr_context *psr_context);
 
+/*
+ * Communicate with DMUB to allow or disallow Panel Replay on the specified link:
+ *
+ * @link: pointer to the dc_link struct instance
+ * @enable: enable(active) or disable(inactive) replay
+ * @wait: state transition need to wait the active set completed.
+ * @force_static: force disable(inactive) the replay
+ * @power_opts: set power optimazation parameters to DMUB.
+ *
+ * return: allow Replay active will return true, else will return false.
+ */
+bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
+		bool wait, bool force_static, const unsigned int *power_opts);
+
 bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state);
 
 /* On eDP links this function call will stall until T12 has elapsed.
@@ -2321,6 +2341,9 @@ void dc_hardware_release(struct dc *dc);
 void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc);
 
 bool dc_set_psr_allow_active(struct dc *dc, bool enable);
+
+bool dc_set_replay_allow_active(struct dc *dc, bool active);
+
 void dc_z10_restore(const struct dc *dc);
 void dc_z10_save_init(struct dc *dc);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index 28149e53c2a68..38e4797e9476c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -258,13 +258,97 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst,
 		*residency = 0;
 }
 
+/**
+ * Set REPLAY power optimization flags and coasting vtotal.
+ */
+static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub,
+		unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.type = DMUB_CMD__REPLAY;
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.sub_type =
+		DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL;
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.payload_bytes =
+		sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal);
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.power_opt = power_opt;
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.panel_inst = panel_inst;
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_coasting_vtotal_data.coasting_vtotal = coasting_vtotal;
+
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/**
+ * send Replay general cmd to DMUB.
+ */
+static void dmub_replay_send_cmd(struct dmub_replay *dmub,
+		enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *ctx = NULL;
+
+	if (dmub == NULL || cmd_element == NULL)
+		return;
+
+	ctx = dmub->ctx;
+	if (ctx != NULL) {
+
+		if (msg != Replay_Msg_Not_Support) {
+			memset(&cmd, 0, sizeof(cmd));
+			//Header
+			cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY;
+		} else
+			return;
+	} else
+		return;
+
+	switch (msg) {
+	case Replay_Set_Timing_Sync_Supported:
+		//Header
+		cmd.replay_set_timing_sync.header.sub_type =
+			DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED;
+		cmd.replay_set_timing_sync.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_timing_sync);
+		//Cmd Body
+		cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst =
+						cmd_element->sync_data.panel_inst;
+		cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported =
+						cmd_element->sync_data.timing_sync_supported;
+		break;
+	case Replay_Set_Residency_Frameupdate_Timer:
+		//Header
+		cmd.replay_set_frameupdate_timer.header.sub_type =
+			DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER;
+		cmd.replay_set_frameupdate_timer.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer);
+		//Cmd Body
+		cmd.replay_set_frameupdate_timer.data.panel_inst =
+						cmd_element->panel_inst;
+		cmd.replay_set_frameupdate_timer.data.enable =
+						cmd_element->timer_data.enable;
+		cmd.replay_set_frameupdate_timer.data.frameupdate_count =
+						cmd_element->timer_data.frameupdate_count;
+		break;
+	case Replay_Msg_Not_Support:
+	default:
+		return;
+		break;
+	}
+
+	dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
 static const struct dmub_replay_funcs replay_funcs = {
-	.replay_copy_settings		= dmub_replay_copy_settings,
-	.replay_enable			= dmub_replay_enable,
-	.replay_get_state		= dmub_replay_get_state,
-	.replay_set_power_opt		= dmub_replay_set_power_opt,
-	.replay_set_coasting_vtotal	= dmub_replay_set_coasting_vtotal,
-	.replay_residency		= dmub_replay_residency,
+	.replay_copy_settings				= dmub_replay_copy_settings,
+	.replay_enable					= dmub_replay_enable,
+	.replay_get_state				= dmub_replay_get_state,
+	.replay_set_power_opt				= dmub_replay_set_power_opt,
+	.replay_set_coasting_vtotal			= dmub_replay_set_coasting_vtotal,
+	.replay_residency				= dmub_replay_residency,
+	.replay_set_power_opt_and_coasting_vtotal	= dmub_replay_set_power_opt_and_coasting_vtotal,
+	.replay_send_cmd				= dmub_replay_send_cmd,
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index b3ee90a0b8b3d..3613aff994d72 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -51,6 +51,8 @@ struct dmub_replay_funcs {
 		uint8_t panel_inst);
 	void (*replay_residency)(struct dmub_replay *dmub,
 		uint8_t panel_inst, uint32_t *residency, const bool is_start, const bool is_alpm);
+	void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub,
+		unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal);
 };
 
 struct dmub_replay *dmub_replay_create(struct dc_context *ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h
index 7439865d1b50c..26fe81f213da5 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link.h
@@ -289,6 +289,8 @@ struct link_service {
 	bool (*edp_replay_residency)(const struct dc_link *link,
 			unsigned int *residency, const bool is_start,
 			const bool is_alpm);
+	bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link,
+			const unsigned int *power_opts, uint16_t coasting_vtotal);
 
 	bool (*edp_wait_for_t12)(struct dc_link *link);
 	bool (*edp_is_ilr_optimization_required)(struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 5464d8d26bd37..37d3027c32dcb 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -216,6 +216,7 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s
 	link_srv->edp_send_replay_cmd = edp_send_replay_cmd;
 	link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal;
 	link_srv->edp_replay_residency = edp_replay_residency;
+	link_srv->edp_set_replay_power_opt_and_coasting_vtotal = edp_set_replay_power_opt_and_coasting_vtotal;
 
 	link_srv->edp_wait_for_t12 = edp_wait_for_t12;
 	link_srv->edp_is_ilr_optimization_required =
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index e8de68e62403d..7f11965282186 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -1068,6 +1068,33 @@ bool edp_replay_residency(const struct dc_link *link,
 	return true;
 }
 
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+	const unsigned int *power_opts, uint16_t coasting_vtotal)
+{
+	struct dc  *dc = link->ctx->dc;
+	struct dmub_replay *replay = dc->res_pool->replay;
+	unsigned int panel_inst;
+
+	if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		return false;
+
+	/* Only both power and coasting vtotal changed, this func could return true */
+	if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts &&
+		coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) {
+		if (link->replay_settings.replay_feature_enabled &&
+			replay->funcs->replay_set_power_opt_and_coasting_vtotal) {
+			replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay,
+				*power_opts, panel_inst, coasting_vtotal);
+			link->replay_settings.replay_power_opt_active = *power_opts;
+			link->replay_settings.coasting_vtotal = coasting_vtotal;
+		} else
+			return false;
+	} else
+		return false;
+
+	return true;
+}
+
 static struct abm *get_abm_from_stream_res(const struct dc_link *link)
 {
 	int i;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index b7493ff4fceef..34e521af7bb48 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -63,6 +63,8 @@ bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal);
 bool edp_replay_residency(const struct dc_link *link,
 	unsigned int *residency, const bool is_start, const bool is_alpm);
 bool edp_get_replay_state(const struct dc_link *link, uint64_t *state);
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+	const unsigned int *power_opts, uint16_t coasting_vtotal);
 bool edp_wait_for_t12(struct dc_link *link);
 bool edp_is_ilr_optimization_required(struct dc_link *link,
        struct dc_crtc_timing *crtc_timing);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 39594e8ffb5ee..761ec98918756 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -96,6 +96,7 @@
 #include "reg_helper.h"
 #include "dce/dmub_abm.h"
 #include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
 #include "dml/dcn31/display_mode_vba_31.h" /*temp*/
@@ -788,6 +789,7 @@ static const struct dc_panel_config panel_config_defaults = {
 	.psr = {
 		.disable_psr = false,
 		.disallow_psrsu = false,
+		.disallow_replay = false,
 	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
@@ -1546,6 +1548,9 @@ static void dcn35_resource_destruct(struct dcn35_resource_pool *pool)
 	if (pool->base.psr != NULL)
 		dmub_psr_destroy(&pool->base.psr);
 
+	if (pool->base.replay != NULL)
+		dmub_replay_destroy(&pool->base.replay);
+
 	if (pool->base.pg_cntl != NULL)
 		dcn_pg_cntl_destroy(&pool->base.pg_cntl);
 
@@ -2030,6 +2035,14 @@ static bool dcn35_resource_construct(
 		goto create_fail;
 	}
 
+	/* Replay */
+	pool->base.replay = dmub_replay_create(ctx);
+	if (pool->base.replay == NULL) {
+		dm_error("DC: failed to create replay obj!\n");
+		BREAK_TO_DEBUGGER();
+		goto create_fail;
+	}
+
 	/* ABM */
 	for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
 		pool->base.multiple_abms[i] = dmub_abm_create(ctx,
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 1675314a3ff20..34b848c981e06 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -973,6 +973,34 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
 	return true;
 }
 
+void set_replay_coasting_vtotal(struct dc_link *link,
+	enum replay_coasting_vtotal_type type,
+	uint16_t vtotal)
+{
+	link->replay_settings.coasting_vtotal_table[type] = vtotal;
+}
+
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+	uint16_t vtotal, uint16_t htotal)
+{
+	uint8_t max_link_off_frame_count = 0;
+	uint16_t max_deviation_line = 0,  pixel_deviation_per_line = 0;
+
+	max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
+	pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line;
+
+	if (htotal != 0 && vtotal != 0)
+		max_link_off_frame_count = htotal * max_deviation_line / (pixel_deviation_per_line * vtotal);
+	else
+		ASSERT(0);
+
+	link->replay_settings.link_off_frame_count_level =
+		max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_BEST ? PR_LINK_OFF_FRAME_COUNT_BEST :
+		max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_GOOD ? PR_LINK_OFF_FRAME_COUNT_GOOD :
+		PR_LINK_OFF_FRAME_COUNT_FAIL;
+
+}
+
 bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps)
 {
 	unsigned int data_points_size;
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index d9e0d67d67f70..c17bbc6fb38ca 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -54,6 +54,11 @@ bool dmub_init_abm_config(struct resource_pool *res_pool,
 		unsigned int inst);
 
 void init_replay_config(struct dc_link *link, struct replay_config *pr_config);
+void set_replay_coasting_vtotal(struct dc_link *link,
+	enum replay_coasting_vtotal_type type,
+	uint16_t vtotal);
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+	uint16_t vtotal, uint16_t htotal);
 
 bool is_psr_su_specific_panel(struct dc_link *link);
 void mod_power_calc_psr_configs(struct psr_config *psr_config,
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 5cad456f2e10d..1dc5dd9b7bf70 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -257,6 +257,7 @@ enum DC_DEBUG_MASK {
 	DC_ENABLE_DPIA_TRACE = 0x80,
 	DC_ENABLE_DML2 = 0x100,
 	DC_DISABLE_PSR_SU = 0x200,
+	DC_DISABLE_REPLAY = 0x400,
 };
 
 enum amd_dpm_forced_level;
-- 
GitLab


From 29bc46c4da4ab61bb69b2c8099be6f5d7454133f Mon Sep 17 00:00:00 2001
From: Asad Kamal <asad.kamal@amd.com>
Date: Wed, 20 Dec 2023 16:07:22 +0800
Subject: [PATCH 2301/2340] drm/amd/pm: Use separate metric table for APU

Use separate metric table for APU and Non APU
systems for smu_v_13_0_6 to get metric data

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h   |  90 ++++++++++++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 124 ++++++++++--------
 2 files changed, 156 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index fef2d290f3f25..8f166aa3043c6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -219,7 +219,95 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
   uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
   uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
-} MetricsTable_t;
+} MetricsTableX_t;
+
+typedef struct __attribute__((packed, aligned(4))) {
+  uint32_t AccumulationCounter;
+
+  //TEMPERATURE
+  uint32_t MaxSocketTemperature;
+  uint32_t MaxVrTemperature;
+  uint32_t MaxHbmTemperature;
+  uint64_t MaxSocketTemperatureAcc;
+  uint64_t MaxVrTemperatureAcc;
+  uint64_t MaxHbmTemperatureAcc;
+
+  //POWER
+  uint32_t SocketPowerLimit;
+  uint32_t MaxSocketPowerLimit;
+  uint32_t SocketPower;
+
+  //ENERGY
+  uint64_t Timestamp;
+  uint64_t SocketEnergyAcc;
+  uint64_t CcdEnergyAcc;
+  uint64_t XcdEnergyAcc;
+  uint64_t AidEnergyAcc;
+  uint64_t HbmEnergyAcc;
+
+  //FREQUENCY
+  uint32_t CclkFrequencyLimit;
+  uint32_t GfxclkFrequencyLimit;
+  uint32_t FclkFrequency;
+  uint32_t UclkFrequency;
+  uint32_t SocclkFrequency[4];
+  uint32_t VclkFrequency[4];
+  uint32_t DclkFrequency[4];
+  uint32_t LclkFrequency[4];
+  uint64_t GfxclkFrequencyAcc[8];
+  uint64_t CclkFrequencyAcc[96];
+
+  //FREQUENCY RANGE
+  uint32_t MaxCclkFrequency;
+  uint32_t MinCclkFrequency;
+  uint32_t MaxGfxclkFrequency;
+  uint32_t MinGfxclkFrequency;
+  uint32_t FclkFrequencyTable[4];
+  uint32_t UclkFrequencyTable[4];
+  uint32_t SocclkFrequencyTable[4];
+  uint32_t VclkFrequencyTable[4];
+  uint32_t DclkFrequencyTable[4];
+  uint32_t LclkFrequencyTable[4];
+  uint32_t MaxLclkDpmRange;
+  uint32_t MinLclkDpmRange;
+
+  //XGMI
+  uint32_t XgmiWidth;
+  uint32_t XgmiBitrate;
+  uint64_t XgmiReadBandwidthAcc[8];
+  uint64_t XgmiWriteBandwidthAcc[8];
+
+  //ACTIVITY
+  uint32_t SocketC0Residency;
+  uint32_t SocketGfxBusy;
+  uint32_t DramBandwidthUtilization;
+  uint64_t SocketC0ResidencyAcc;
+  uint64_t SocketGfxBusyAcc;
+  uint64_t DramBandwidthAcc;
+  uint32_t MaxDramBandwidth;
+  uint64_t DramBandwidthUtilizationAcc;
+  uint64_t PcieBandwidthAcc[4];
+
+  //THROTTLERS
+  uint32_t ProchotResidencyAcc;
+  uint32_t PptResidencyAcc;
+  uint32_t SocketThmResidencyAcc;
+  uint32_t VrThmResidencyAcc;
+  uint32_t HbmThmResidencyAcc;
+  uint32_t GfxLockXCDMak;
+
+  // New Items at end to maintain driver compatibility
+  uint32_t GfxclkFrequency[8];
+
+  //PSNs
+  uint64_t PublicSerialNumber_AID[4];
+  uint64_t PublicSerialNumber_XCD[8];
+  uint64_t PublicSerialNumber_CCD[12];
+
+  //XGMI Data tranfser size
+  uint64_t XgmiReadDataSizeAcc[8];//in KByte
+  uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+} MetricsTableA_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 81b217bbdebbb..96777a3651335 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -248,6 +248,8 @@ struct PPTable_t {
 #define SMUQ10_TO_UINT(x) ((x) >> 10)
 #define SMUQ10_FRAC(x) ((x) & 0x3ff)
 #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200))
+#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\
+		(metrics_a->field) : (metrics_x->field))
 
 struct smu_v13_0_6_dpm_map {
 	enum smu_clk_type clk_type;
@@ -330,7 +332,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE,
 			       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
-	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t),
+	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS,
+		       max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)),
 		       PAGE_SIZE,
 		       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
@@ -338,7 +341,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		       PAGE_SIZE,
 		       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
-	smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
+	smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t),
+		       sizeof(MetricsTableA_t)), GFP_KERNEL);
 	if (!smu_table->metrics_table)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
@@ -469,9 +473,11 @@ static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu,
 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+	MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table;
+	MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table;
 	struct PPTable_t *pptable =
 		(struct PPTable_t *)smu_table->driver_pptable;
+	struct amdgpu_device *adev = smu->adev;
 	int ret, i, retry = 100;
 	uint32_t table_version;
 
@@ -483,7 +489,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 				return ret;
 
 			/* Ensure that metrics have been updated */
-			if (metrics->AccumulationCounter)
+			if (GET_METRIC_FIELD(AccumulationCounter))
 				break;
 
 			usleep_range(1000, 1100);
@@ -500,29 +506,29 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 			table_version;
 
 		pptable->MaxSocketPowerLimit =
-			SMUQ10_ROUND(metrics->MaxSocketPowerLimit);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit));
 		pptable->MaxGfxclkFrequency =
-			SMUQ10_ROUND(metrics->MaxGfxclkFrequency);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency));
 		pptable->MinGfxclkFrequency =
-			SMUQ10_ROUND(metrics->MinGfxclkFrequency);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency));
 
 		for (i = 0; i < 4; ++i) {
 			pptable->FclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->FclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]);
 			pptable->UclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->UclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]);
 			pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND(
-				metrics->SocclkFrequencyTable[i]);
+				GET_METRIC_FIELD(SocclkFrequencyTable)[i]);
 			pptable->VclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->VclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]);
 			pptable->DclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->DclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]);
 			pptable->LclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->LclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]);
 		}
 
 		/* use AID0 serial number by default */
-		pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0];
+		pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0];
 
 		pptable->Init = true;
 	}
@@ -824,7 +830,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 					    uint32_t *value)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+	MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table;
+	MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table;
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 	int xcc_id;
@@ -839,50 +846,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 	case METRICS_AVERAGE_GFXCLK:
 		if (smu->smc_fw_version >= 0x552F00) {
 			xcc_id = GET_INST(GC, 0);
-			*value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+			*value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]);
 		} else {
 			*value = 0;
 		}
 		break;
 	case METRICS_CURR_SOCCLK:
 	case METRICS_AVERAGE_SOCCLK:
-		*value = SMUQ10_ROUND(metrics->SocclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]);
 		break;
 	case METRICS_CURR_UCLK:
 	case METRICS_AVERAGE_UCLK:
-		*value = SMUQ10_ROUND(metrics->UclkFrequency);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency));
 		break;
 	case METRICS_CURR_VCLK:
-		*value = SMUQ10_ROUND(metrics->VclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]);
 		break;
 	case METRICS_CURR_DCLK:
-		*value = SMUQ10_ROUND(metrics->DclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]);
 		break;
 	case METRICS_CURR_FCLK:
-		*value = SMUQ10_ROUND(metrics->FclkFrequency);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency));
 		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
-		*value = SMUQ10_ROUND(metrics->SocketGfxBusy);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy));
 		break;
 	case METRICS_AVERAGE_MEMACTIVITY:
-		*value = SMUQ10_ROUND(metrics->DramBandwidthUtilization);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization));
 		break;
 	case METRICS_CURR_SOCKETPOWER:
-		*value = SMUQ10_ROUND(metrics->SocketPower) << 8;
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8;
 		break;
 	case METRICS_TEMPERATURE_HOTSPOT:
-		*value = SMUQ10_ROUND(metrics->MaxSocketTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	case METRICS_TEMPERATURE_MEM:
-		*value = SMUQ10_ROUND(metrics->MaxHbmTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	/* This is the max of all VRs and not just SOC VR.
 	 * No need to define another data type for the same.
 	 */
 	case METRICS_TEMPERATURE_VRSOC:
-		*value = SMUQ10_ROUND(metrics->MaxVrTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	default:
@@ -2071,63 +2078,66 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 		(struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0, xcc_id, inst, i;
-	MetricsTable_t *metrics;
+	MetricsTableX_t *metrics_x;
+	MetricsTableA_t *metrics_a;
 	u16 link_width_level;
 
-	metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
-	ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
+	metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL);
+	ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true);
 	if (ret) {
-		kfree(metrics);
+		kfree(metrics_x);
 		return ret;
 	}
 
+	metrics_a = (MetricsTableA_t *)metrics_x;
+
 	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
 
 	gpu_metrics->temperature_hotspot =
-		SMUQ10_ROUND(metrics->MaxSocketTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature));
 	/* Individual HBM stack temperature is not reported */
 	gpu_metrics->temperature_mem =
-		SMUQ10_ROUND(metrics->MaxHbmTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature));
 	/* Reports max temperature of all voltage rails */
 	gpu_metrics->temperature_vrsoc =
-		SMUQ10_ROUND(metrics->MaxVrTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature));
 
 	gpu_metrics->average_gfx_activity =
-		SMUQ10_ROUND(metrics->SocketGfxBusy);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy));
 	gpu_metrics->average_umc_activity =
-		SMUQ10_ROUND(metrics->DramBandwidthUtilization);
+		SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization));
 
 	gpu_metrics->curr_socket_power =
-		SMUQ10_ROUND(metrics->SocketPower);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower));
 	/* Energy counter reported in 15.259uJ (2^-16) units */
-	gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
+	gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc);
 
 	for (i = 0; i < MAX_GFX_CLKS; i++) {
 		xcc_id = GET_INST(GC, i);
 		if (xcc_id >= 0)
 			gpu_metrics->current_gfxclk[i] =
-				SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]);
 
 		if (i < MAX_CLKS) {
 			gpu_metrics->current_socclk[i] =
-				SMUQ10_ROUND(metrics->SocclkFrequency[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]);
 			inst = GET_INST(VCN, i);
 			if (inst >= 0) {
 				gpu_metrics->current_vclk0[i] =
-					SMUQ10_ROUND(metrics->VclkFrequency[inst]);
+					SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]);
 				gpu_metrics->current_dclk0[i] =
-					SMUQ10_ROUND(metrics->DclkFrequency[inst]);
+					SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]);
 			}
 		}
 	}
 
-	gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
+	gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency));
 
 	/* Throttle status is not reported through metrics now */
 	gpu_metrics->throttle_status = 0;
 
 	/* Clock Lock Status. Each bit corresponds to each GFXCLK instance */
-	gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0);
+	gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0);
 
 	if (!(adev->flags & AMD_IS_APU)) {
 		link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
@@ -2139,38 +2149,38 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 		gpu_metrics->pcie_link_speed =
 			smu_v13_0_6_get_current_pcie_link_speed(smu);
 		gpu_metrics->pcie_bandwidth_acc =
-				SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+				SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]);
 		gpu_metrics->pcie_bandwidth_inst =
-				SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+				SMUQ10_ROUND(metrics_x->PcieBandwidth[0]);
 		gpu_metrics->pcie_l0_to_recov_count_acc =
-				metrics->PCIeL0ToRecoveryCountAcc;
+				metrics_x->PCIeL0ToRecoveryCountAcc;
 		gpu_metrics->pcie_replay_count_acc =
-				metrics->PCIenReplayAAcc;
+				metrics_x->PCIenReplayAAcc;
 		gpu_metrics->pcie_replay_rover_count_acc =
-				metrics->PCIenReplayARolloverCountAcc;
+				metrics_x->PCIenReplayARolloverCountAcc;
 	}
 
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
 	gpu_metrics->gfx_activity_acc =
-		SMUQ10_ROUND(metrics->SocketGfxBusyAcc);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc));
 	gpu_metrics->mem_activity_acc =
-		SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc);
+		SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc));
 
 	for (i = 0; i < NUM_XGMI_LINKS; i++) {
 		gpu_metrics->xgmi_read_data_acc[i] =
-			SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]);
+			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]);
 		gpu_metrics->xgmi_write_data_acc[i] =
-			SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]);
+			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]);
 	}
 
-	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth);
-	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate);
+	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth));
+	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate));
 
-	gpu_metrics->firmware_timestamp = metrics->Timestamp;
+	gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp);
 
 	*table = (void *)gpu_metrics;
-	kfree(metrics);
+	kfree(metrics_x);
 
 	return sizeof(*gpu_metrics);
 }
-- 
GitLab


From 9323b4bf6b85b4450b3e74c7a1b32182a60e030f Mon Sep 17 00:00:00 2001
From: Asad Kamal <asad.kamal@amd.com>
Date: Wed, 20 Dec 2023 17:09:21 +0800
Subject: [PATCH 2302/2340] drm/amd/pm: Update metric table for jpeg/vcn data

Update pmfw metric table to include vcn & jpeg
activity for smu_v_13_0_6

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index 8f166aa3043c6..7b812b9994d7c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x9
+#define SMU_METRICS_TABLE_VERSION 0xB
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -219,6 +219,10 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
   uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
   uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
+
+  // VCN/JPEG ACTIVITY
+  uint32_t VcnBusy[4];
+  uint32_t JpegBusy[32];
 } MetricsTableX_t;
 
 typedef struct __attribute__((packed, aligned(4))) {
@@ -307,6 +311,10 @@ typedef struct __attribute__((packed, aligned(4))) {
   //XGMI Data tranfser size
   uint64_t XgmiReadDataSizeAcc[8];//in KByte
   uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+  // VCN/JPEG ACTIVITY
+  uint32_t VcnBusy[4];
+  uint32_t JpegBusy[32];
 } MetricsTableA_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
-- 
GitLab


From a62503ca854e8a19c95022fa5bec47eeecac570b Mon Sep 17 00:00:00 2001
From: Asad Kamal <asad.kamal@amd.com>
Date: Wed, 20 Dec 2023 17:21:48 +0800
Subject: [PATCH 2303/2340] drm/amd/pm: Add gpu_metrics_v1_5

Add new gpu_metrics_v1_5 to acquire vcn/jpeg activity
& pcie nak error counters

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/include/kgd_pp_interface.h    | 80 +++++++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        |  3 +
 2 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 36a5ad8c00c59..edcb85560cede 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -318,6 +318,7 @@ enum pp_xgmi_plpd_mode {
 #define MAX_GFX_CLKS 8
 #define MAX_CLKS 4
 #define NUM_VCN 4
+#define NUM_JPEG_ENG 32
 
 struct seq_file;
 enum amd_pp_clock_type;
@@ -775,6 +776,85 @@ struct gpu_metrics_v1_4 {
 	uint16_t			padding;
 };
 
+struct gpu_metrics_v1_5 {
+	struct metrics_table_header	common_header;
+
+	/* Temperature (Celsius) */
+	uint16_t			temperature_hotspot;
+	uint16_t			temperature_mem;
+	uint16_t			temperature_vrsoc;
+
+	/* Power (Watts) */
+	uint16_t			curr_socket_power;
+
+	/* Utilization (%) */
+	uint16_t			average_gfx_activity;
+	uint16_t			average_umc_activity; // memory controller
+	uint16_t			vcn_activity[NUM_VCN];
+	uint16_t			jpeg_activity[NUM_JPEG_ENG];
+
+	/* Energy (15.259uJ (2^-16) units) */
+	uint64_t			energy_accumulator;
+
+	/* Driver attached timestamp (in ns) */
+	uint64_t			system_clock_counter;
+
+	/* Throttle status */
+	uint32_t			throttle_status;
+
+	/* Clock Lock Status. Each bit corresponds to clock instance */
+	uint32_t			gfxclk_lock_status;
+
+	/* Link width (number of lanes) and speed (in 0.1 GT/s) */
+	uint16_t			pcie_link_width;
+	uint16_t			pcie_link_speed;
+
+	/* XGMI bus width and bitrate (in Gbps) */
+	uint16_t			xgmi_link_width;
+	uint16_t			xgmi_link_speed;
+
+	/* Utilization Accumulated (%) */
+	uint32_t			gfx_activity_acc;
+	uint32_t			mem_activity_acc;
+
+	/*PCIE accumulated bandwidth (GB/sec) */
+	uint64_t			pcie_bandwidth_acc;
+
+	/*PCIE instantaneous bandwidth (GB/sec) */
+	uint64_t			pcie_bandwidth_inst;
+
+	/* PCIE L0 to recovery state transition accumulated count */
+	uint64_t			pcie_l0_to_recov_count_acc;
+
+	/* PCIE replay accumulated count */
+	uint64_t			pcie_replay_count_acc;
+
+	/* PCIE replay rollover accumulated count */
+	uint64_t			pcie_replay_rover_count_acc;
+
+	/* PCIE NAK sent  accumulated count */
+	uint32_t			pcie_nak_sent_count_acc;
+
+	/* PCIE NAK received accumulated count */
+	uint32_t			pcie_nak_rcvd_count_acc;
+
+	/* XGMI accumulated data transfer size(KiloBytes) */
+	uint64_t			xgmi_read_data_acc[NUM_XGMI_LINKS];
+	uint64_t			xgmi_write_data_acc[NUM_XGMI_LINKS];
+
+	/* PMFW attached timestamp (10ns resolution) */
+	uint64_t			firmware_timestamp;
+
+	/* Current clocks (Mhz) */
+	uint16_t			current_gfxclk[MAX_GFX_CLKS];
+	uint16_t			current_socclk[MAX_CLKS];
+	uint16_t			current_vclk0[MAX_CLKS];
+	uint16_t			current_dclk0[MAX_CLKS];
+	uint16_t			current_uclk;
+
+	uint16_t			padding;
+};
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 001a5cf096579..00cd615bbcdc0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -989,6 +989,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
 	case METRICS_VERSION(1, 4):
 		structure_size = sizeof(struct gpu_metrics_v1_4);
 		break;
+	case METRICS_VERSION(1, 5):
+		structure_size = sizeof(struct gpu_metrics_v1_5);
+		break;
 	case METRICS_VERSION(2, 0):
 		structure_size = sizeof(struct gpu_metrics_v2_0);
 		break;
-- 
GitLab


From 25272bcf8476cbe58b7a0318fcfad79d2cd8554d Mon Sep 17 00:00:00 2001
From: Asad Kamal <asad.kamal@amd.com>
Date: Wed, 20 Dec 2023 17:32:48 +0800
Subject: [PATCH 2304/2340] drm/amd/pm: Use gpu_metrics_v1_5 for SMUv13.0.6

Use gpu_metrics_v1_5 for SMUv13.0.6 to fill
gpu metric info

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 29 +++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 96777a3651335..4ebc6b421c2cb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -347,7 +347,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
 
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4);
+	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5);
 	smu_table->gpu_metrics_table =
 		kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 	if (!smu_table->gpu_metrics_table) {
@@ -2074,10 +2074,10 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v1_4 *gpu_metrics =
-		(struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
+	struct gpu_metrics_v1_5 *gpu_metrics =
+		(struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table;
 	struct amdgpu_device *adev = smu->adev;
-	int ret = 0, xcc_id, inst, i;
+	int ret = 0, xcc_id, inst, i, j;
 	MetricsTableX_t *metrics_x;
 	MetricsTableA_t *metrics_a;
 	u16 link_width_level;
@@ -2091,7 +2091,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 
 	metrics_a = (MetricsTableA_t *)metrics_x;
 
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
+	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5);
 
 	gpu_metrics->temperature_hotspot =
 		SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature));
@@ -2158,6 +2158,10 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 				metrics_x->PCIenReplayAAcc;
 		gpu_metrics->pcie_replay_rover_count_acc =
 				metrics_x->PCIenReplayARolloverCountAcc;
+		gpu_metrics->pcie_nak_sent_count_acc =
+				metrics_x->PCIeNAKSentCountAcc;
+		gpu_metrics->pcie_nak_rcvd_count_acc =
+				metrics_x->PCIeNAKReceivedCountAcc;
 	}
 
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
@@ -2174,6 +2178,21 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]);
 	}
 
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		inst = GET_INST(JPEG, i);
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+			gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] =
+				SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy)
+				[(inst * adev->jpeg.num_jpeg_rings) + j]);
+		}
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		inst = GET_INST(VCN, i);
+		gpu_metrics->vcn_activity[i] =
+			SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]);
+	}
+
 	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth));
 	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate));
 
-- 
GitLab


From abaf0666a65b8bbf7311571cd2b32b076fb8e1f9 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 19 Dec 2023 18:54:18 +0530
Subject: [PATCH 2305/2340] drm/amdgpu: Cleanup indenting in
 amdgpu_connector_dvi_detect()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c:1106 amdgpu_connector_dvi_detect() warn: inconsistent indenting

Fixes: 8a1de314d189 ("drm/amdgpu: Refactor 'amdgpu_connector_dvi_detect' in amdgpu_connectors.c")
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 96f63fd39b9e1..9caba10315a81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1103,7 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 			 * DDC line.  The latter is more complex because with DVI<->HDMI adapters
 			 * you don't really know what's connected to which port as both are digital.
 			 */
-			 amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
+			amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
 		}
 	}
 
-- 
GitLab


From 091411be7ae899ce23072acf5a83b0b43e9024e1 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 19 Dec 2023 19:26:22 +0530
Subject: [PATCH 2306/2340] drm/amdgpu: Use kzalloc instead of
 kmalloc+__GFP_ZERO in amdgpu_ras.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the below smatch warnings:

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:2543 amdgpu_ras_recovery_init() warn: Please consider using kzalloc instead of kmalloc
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:2830 amdgpu_ras_init() warn: Please consider using kzalloc instead of kmalloc

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index bad62141f7084..e541e69259184 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2540,7 +2540,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 		return 0;
 
 	data = &con->eh_data;
-	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+	*data = kzalloc(sizeof(**data), GFP_KERNEL);
 	if (!*data) {
 		ret = -ENOMEM;
 		goto out;
@@ -2827,10 +2827,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (con)
 		return 0;
 
-	con = kmalloc(sizeof(struct amdgpu_ras) +
+	con = kzalloc(sizeof(*con) +
 			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
 			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
-			GFP_KERNEL|__GFP_ZERO);
+			GFP_KERNEL);
 	if (!con)
 		return -ENOMEM;
 
-- 
GitLab


From b57e3ca1fb192962f5b062c2e13e1bab1936292c Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Wed, 20 Dec 2023 19:36:20 +0530
Subject: [PATCH 2307/2340] drm/amdgpu: Use kvcalloc instead of kvmalloc_array
 in amdgpu_cs_parser_bos()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:873 amdgpu_cs_parser_bos() warn: Please consider using kvcalloc instead of kvmalloc_array

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e50be65000303..83c7fc09218c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -870,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		struct amdgpu_bo *bo = e->bo;
 		int i;
 
-		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					sizeof(struct page *),
-					GFP_KERNEL | __GFP_ZERO);
+		e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
+					 sizeof(struct page *),
+					 GFP_KERNEL);
 		if (!e->user_pages) {
 			DRM_ERROR("kvmalloc_array failure\n");
 			r = -ENOMEM;
-- 
GitLab


From 4d3ed0befdf4852cec2f203ceac440aa70a0e7f5 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 19 Dec 2023 12:20:36 +0530
Subject: [PATCH 2308/2340] drm/amd/display: Address function parameter
 'context' not described in 'dc_state_rem_all_planes_for_stream' &
 'populate_subvp_cmd_drr_info'

Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Function parameter or member 'state' not described in 'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Excess function parameter 'context' description in 'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:540: warning: Function parameter or member 'context' not described in 'populate_subvp_cmd_drr_info'

Suggested-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Cc: Dillon Varone <dillon.varone@amd.com>
Cc: Jun Lei <jun.lei@amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Srinath Rao <srinath.rao@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_state.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index dd52cab7ecdf8..460a8010c79fe 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -511,7 +511,7 @@ bool dc_state_remove_plane(
  *
  * @dc: Current dc state.
  * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
+ * @state: context from which the planes are to be removed.
  *
  * Return:
  * Return true if DC was able to remove all planes from the target
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index a65c485b0b49a..2b79a0e5638e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -519,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 /**
  * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command
  *
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
  * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
  * @vblank_pipe: [in] pipe_ctx for the DRR pipe
  * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
  *
  * Populate the DMCUB SubVP command with DRR pipe info. All the information
  * required for calculating the SubVP + DRR microschedule is populated here.
-- 
GitLab


From d95ad8fa96e14b7ce1ab740c53f10d7aff9f6660 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 19 Dec 2023 12:54:46 +0530
Subject: [PATCH 2309/2340] drm/amd/display: Adjust kdoc for
 'dcn35_hw_block_power_down' & 'dcn35_hw_block_power_up'

Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn35/dcn35_hwseq.c:1124: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Charlene Liu <charlene.liu@amd.com>
Cc: Muhammad Ahmed <ahmed.ahmed@amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz@amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Srinath Rao <srinath.rao@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 68 +++++++++++--------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index c40f4a06abdbe..9c806385ecbdc 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1123,21 +1123,28 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 		update_state->pg_res_update[PG_HPO] = true;
 
 }
+
 /**
-	 * power down sequence
-	 * ONO Region 3, DCPG 25: hpo - SKIPPED
-	 * ONO Region 4, DCPG 0: dchubp0, dpp0
-	 * ONO Region 6, DCPG 1: dchubp1, dpp1
-	 * ONO Region 8, DCPG 2: dchubp2, dpp2
-	 * ONO Region 10, DCPG 3: dchubp3, dpp3
-	 * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
-	 * ONO Region 5, DCPG 16: dsc0
-	 * ONO Region 7, DCPG 17: dsc1
-	 * ONO Region 9, DCPG 18: dsc2
-	 * ONO Region 11, DCPG 19: dsc3
-	 * ONO Region 2, DCPG 24: mpc opp optc dwb
-	 * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
-*/
+ * dcn35_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ *	ONO Region 3, DCPG 25: hpo - SKIPPED
+ *	ONO Region 4, DCPG 0: dchubp0, dpp0
+ *	ONO Region 6, DCPG 1: dchubp1, dpp1
+ *	ONO Region 8, DCPG 2: dchubp2, dpp2
+ *	ONO Region 10, DCPG 3: dchubp3, dpp3
+ *	ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
+ *	ONO Region 5, DCPG 16: dsc0
+ *	ONO Region 7, DCPG 17: dsc1
+ *	ONO Region 9, DCPG 18: dsc2
+ *	ONO Region 11, DCPG 19: dsc3
+ *	ONO Region 2, DCPG 24: mpc opp optc dwb
+ *	ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
 void dcn35_hw_block_power_down(struct dc *dc,
 	struct pg_block_update *update_state)
 {
@@ -1175,20 +1182,27 @@ void dcn35_hw_block_power_down(struct dc *dc,
 	//domain22, 23, 25 currently always on.
 
 }
+
 /**
-	 * power up sequence
-	 * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
-	 * ONO Region 2, DCPG 24: mpc opp optc dwb
-	 * ONO Region 5, DCPG 16: dsc0
-	 * ONO Region 7, DCPG 17: dsc1
-	 * ONO Region 9, DCPG 18: dsc2
-	 * ONO Region 11, DCPG 19: dsc3
-	 * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
-	 * ONO Region 4, DCPG 0: dchubp0, dpp0
-	 * ONO Region 6, DCPG 1: dchubp1, dpp1
-	 * ONO Region 8, DCPG 2: dchubp2, dpp2
-	 * ONO Region 10, DCPG 3: dchubp3, dpp3
-	 * ONO Region 3, DCPG 25: hpo - SKIPPED
+ * dcn35_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ *	ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ *	ONO Region 2, DCPG 24: mpc opp optc dwb
+ *	ONO Region 5, DCPG 16: dsc0
+ *	ONO Region 7, DCPG 17: dsc1
+ *	ONO Region 9, DCPG 18: dsc2
+ *	ONO Region 11, DCPG 19: dsc3
+ *	ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
+ *	ONO Region 4, DCPG 0: dchubp0, dpp0
+ *	ONO Region 6, DCPG 1: dchubp1, dpp1
+ *	ONO Region 8, DCPG 2: dchubp2, dpp2
+ *	ONO Region 10, DCPG 3: dchubp3, dpp3
+ *	ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
  */
 void dcn35_hw_block_power_up(struct dc *dc,
 	struct pg_block_update *update_state)
-- 
GitLab


From 5ce9a6ad8ec48445ff6c999d064f7931f892bf2b Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Wed, 20 Dec 2023 20:07:54 +0530
Subject: [PATCH 2310/2340] drm/amdgpu: Drop redundant unsigned >=0 comparision
 'amdgpu_gfx_rlc_init_microcode()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

unsigned int "version_minor" is always >= 0

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c:534 amdgpu_gfx_rlc_init_microcode() warn: always true condition '(version_minor >= 0) => (0-u16max >= 0)'

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 35e0ae9acadcd..2c3675d91614f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
 	if (version_major == 2 && version_minor == 1)
 		adev->gfx.rlc.is_rlc_v2_1 = true;
 
-	if (version_minor >= 0) {
-		err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
-		if (err) {
-			dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
-			return err;
-		}
+	err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+	if (err) {
+		dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+		return err;
 	}
+
 	if (version_minor >= 1)
 		amdgpu_gfx_rlc_init_microcode_v2_1(adev);
 	if (version_minor >= 2)
-- 
GitLab


From b8d55a90fd55b767c25687747e2b24abd1ef8680 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 26 Dec 2023 15:32:19 +0530
Subject: [PATCH 2311/2340] drm/amdgpu: Fix possible NULL dereference in
 amdgpu_ras_query_error_status_helper()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return invalid error code -EINVAL for invalid block id.

Fixes the below:

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1183 amdgpu_ras_query_error_status_helper() error: we previously assumed 'info' could be null (see line 1176)

Suggested-by: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: Tao Zhou <tao.zhou1@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e541e69259184..39399d0f2ce57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1176,6 +1176,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
 	enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
 	struct amdgpu_ras_block_object *block_obj = NULL;
 
+	if (blk == AMDGPU_RAS_BLOCK_COUNT)
+		return -EINVAL;
+
 	if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
 		return -EINVAL;
 
-- 
GitLab


From 217e85f97031791fb48a2d374c7bdcf439365b21 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Thu, 21 Dec 2023 08:10:42 +0530
Subject: [PATCH 2312/2340] drm/amdkfd: Fix type of 'dbg_flags' in 'struct
 kfd_process'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dbg_flags looks to be defined with incorrect data type; to process
multiple debug flag options, and hence defined dbg_flags as u32.

Fixes the below:

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_packet_manager_v9.c:117 pm_map_process_aldebaran() warn: maybe use && instead of &

Fixes: 0de4ec9a0353 ("drm/amdgpu: prepare map process for multi-process debug devices")
Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 45366b4ca9769..745024b313401 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -970,7 +970,7 @@ struct kfd_process {
 	struct work_struct debug_event_workarea;
 
 	/* Tracks debug per-vmid request for debug flags */
-	bool dbg_flags;
+	u32 dbg_flags;
 
 	atomic_t poison;
 	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
-- 
GitLab


From 5df0f0b3b4d4f5eaac19f550a30be8922f2aca95 Mon Sep 17 00:00:00 2001
From: Asad Kamal <asad.kamal@amd.com>
Date: Fri, 22 Dec 2023 18:24:20 +0800
Subject: [PATCH 2313/2340] drm/amd/pm: Add mem_busy_percent for GCv9.4.3 apu

Expose sysfs entry mem_busy_percent for GC version
9.4.3 APU system

Signed-off-by: Asad Kamal <asad.kamal@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 2cd995b0cebac..f3cb490fe79b1 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2168,7 +2168,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 		if (amdgpu_dpm_is_overdrive_supported(adev))
 			*states = ATTR_STATE_SUPPORTED;
 	} else if (DEVICE_ATTR_IS(mem_busy_percent)) {
-		if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1))
+		if ((adev->flags & AMD_IS_APU &&
+		     gc_ver != IP_VERSION(9, 4, 3)) ||
+		    gc_ver == IP_VERSION(9, 0, 1))
 			*states = ATTR_STATE_UNSUPPORTED;
 	} else if (DEVICE_ATTR_IS(pcie_bw)) {
 		/* PCIe Perf counters won't work on APU nodes */
-- 
GitLab


From 6697dbf0afff73fcf2b53e99c4accdab58892e39 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Sun, 31 Dec 2023 17:42:47 +0800
Subject: [PATCH 2314/2340] Revert "drm/amdgpu: enable mca debug mode on APU by
 default"

Not needed any more with firmware fixes

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 39399d0f2ce57..fc42fb6ee1914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3138,8 +3138,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
-	/* enable MCA debug on APU device */
-	amdgpu_ras_set_mca_debug_mode(adev, !!(adev->flags & AMD_IS_APU));
+	amdgpu_ras_set_mca_debug_mode(adev, false);
 
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		if (!node->ras_obj) {
-- 
GitLab


From 30afdffb3f600d8fd1d5afa1b7187081e1ac85be Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Mon, 1 Jan 2024 18:28:22 +0000
Subject: [PATCH 2315/2340] drm/amd/display: Fix sending VSC (+ colorimetry)
 packets for DP/eDP displays without PSR

The check for sending the vsc infopacket to the display was gated behind
PSR (Panel Self Refresh) being enabled.

The vsc infopacket also contains the colorimetry (specifically the
container color gamut) information for the stream on modern DP.

PSR is typically only supported on mobile phone eDP displays, thus this
was not getting sent for typical desktop monitors or TV screens.

This functionality is needed for proper HDR10 functionality on DP as it
wants BT2020 RGB/YCbCr for the container color space.

Cc: stable@vger.kernel.org
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Xaver Hugl <xaver.hugl@gmail.com>
Cc: Melissa Wen <mwen@igalia.com>
Fixes: 15f9dfd545a1 ("drm/amd/display: Register Colorspace property for DP and HDMI")
Tested-by: Simon Berz <simon@berz.me>
Tested-by: Xaver Hugl <xaver.hugl@kde.org>
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   |  8 +++++---
 .../amd/display/modules/info_packet/info_packet.c   | 13 ++++++++-----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d869d0d7bf4c1..d9729cf5ceea5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6236,8 +6236,9 @@ create_stream_for_sink(struct drm_connector *connector,
 
 	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
-
-	if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+	else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+			 stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+			 stream->signal == SIGNAL_TYPE_EDP) {
 		//
 		// should decide stream support vsc sdp colorimetry capability
 		// before building vsc info packet
@@ -6253,8 +6254,9 @@ create_stream_for_sink(struct drm_connector *connector,
 		if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
 			tf = TRANSFER_FUNC_GAMMA_22;
 		mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
-		aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 
+		if (stream->link->psr_settings.psr_feature_enabled)
+			aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 	}
 finish:
 	dc_sink_release(sink);
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 84f9b412a4f11..738ee763f24a5 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -147,12 +147,15 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	}
 
 	/* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */
-	if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
-		vsc_packet_revision = vsc_packet_rev4;
-	else if (stream->link->replay_settings.config.replay_supported)
+	if (stream->link->psr_settings.psr_feature_enabled) {
+		if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+			vsc_packet_revision = vsc_packet_rev4;
+		else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+			vsc_packet_revision = vsc_packet_rev2;
+	}
+
+	if (stream->link->replay_settings.config.replay_supported)
 		vsc_packet_revision = vsc_packet_rev4;
-	else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
-		vsc_packet_revision = vsc_packet_rev2;
 
 	/* Update to revision 5 for extended colorimetry support */
 	if (stream->use_vsc_sdp_for_colorimetry)
-- 
GitLab


From 1b0b232ee4e005e402a9cd21e47cecb6d6f54a29 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 20 Dec 2023 12:33:45 -0500
Subject: [PATCH 2316/2340] drm/amd/display: add nv12 bounding box

This was included in gpu_info firmware, move it into the
driver for consistency with other nv1x parts.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 110 +++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 1ec8ca1fdb4fd..38ab9ad60ef8b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -441,7 +441,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
 	.use_urgent_burst_bw = 0
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = {
+	.clock_limits = {
+		{
+			.state = 0,
+			.dcfclk_mhz = 560.0,
+			.fabricclk_mhz = 560.0,
+			.dispclk_mhz = 513.0,
+			.dppclk_mhz = 513.0,
+			.phyclk_mhz = 540.0,
+			.socclk_mhz = 560.0,
+			.dscclk_mhz = 171.0,
+			.dram_speed_mts = 1069.0,
+		},
+		{
+			.state = 1,
+			.dcfclk_mhz = 694.0,
+			.fabricclk_mhz = 694.0,
+			.dispclk_mhz = 642.0,
+			.dppclk_mhz = 642.0,
+			.phyclk_mhz = 600.0,
+			.socclk_mhz = 694.0,
+			.dscclk_mhz = 214.0,
+			.dram_speed_mts = 1324.0,
+		},
+		{
+			.state = 2,
+			.dcfclk_mhz = 875.0,
+			.fabricclk_mhz = 875.0,
+			.dispclk_mhz = 734.0,
+			.dppclk_mhz = 734.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 875.0,
+			.dscclk_mhz = 245.0,
+			.dram_speed_mts = 1670.0,
+		},
+		{
+			.state = 3,
+			.dcfclk_mhz = 1000.0,
+			.fabricclk_mhz = 1000.0,
+			.dispclk_mhz = 1100.0,
+			.dppclk_mhz = 1100.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1000.0,
+			.dscclk_mhz = 367.0,
+			.dram_speed_mts = 2000.0,
+		},
+		{
+			.state = 4,
+			.dcfclk_mhz = 1200.0,
+			.fabricclk_mhz = 1200.0,
+			.dispclk_mhz = 1284.0,
+			.dppclk_mhz = 1284.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1200.0,
+			.dscclk_mhz = 428.0,
+			.dram_speed_mts = 2000.0,
+		},
+		{
+			.state = 5,
+			.dcfclk_mhz = 1200.0,
+			.fabricclk_mhz = 1200.0,
+			.dispclk_mhz = 1284.0,
+			.dppclk_mhz = 1284.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1200.0,
+			.dscclk_mhz = 428.0,
+			.dram_speed_mts = 2000.0,
+		},
+	},
+
+	.num_states = 5,
+	.sr_exit_time_us = 1.9,
+	.sr_enter_plus_exit_time_us = 4.4,
+	.urgent_latency_us = 3.0,
+	.urgent_latency_pixel_data_only_us = 4.0,
+	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+	.urgent_latency_vm_data_only_us = 4.0,
+	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+	.max_avg_sdp_bw_use_normal_percent = 40.0,
+	.max_avg_dram_bw_use_normal_percent = 40.0,
+	.writeback_latency_us = 12.0,
+	.ideal_dram_bw_after_urgent_percent = 40.0,
+	.max_request_size_bytes = 256,
+	.dram_channel_width_bytes = 16,
+	.fabric_datapath_to_dcn_data_return_bytes = 64,
+	.dcn_downspread_percent = 0.5,
+	.downspread_percent = 0.5,
+	.dram_page_open_time_ns = 50.0,
+	.dram_rw_turnaround_time_ns = 17.5,
+	.dram_return_buffer_per_channel_bytes = 8192,
+	.round_trip_ping_latency_dcfclk_cycles = 131,
+	.urgent_out_of_order_return_per_channel_bytes = 4096,
+	.channel_interleave_bytes = 256,
+	.num_banks = 8,
+	.num_chans = 16,
+	.vmm_page_size_bytes = 4096,
+	.dram_clock_change_latency_us = 45.0,
+	.writeback_dram_clock_change_latency_us = 23.0,
+	.return_bus_width_bytes = 64,
+	.dispclk_dppclk_vco_speed_mhz = 3850,
+	.xfc_bus_transport_time_us = 20,
+	.xfc_xbuf_latency_tolerance_us = 50,
+	.use_urgent_burst_bw = 0,
+};
 
 struct _vcs_dpi_ip_params_st dcn2_1_ip = {
 	.odm_capable = 1,
-- 
GitLab


From fb915c87edc2c99bbde148a62bfa97a2c6d991bb Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 20 Dec 2023 12:36:08 -0500
Subject: [PATCH 2317/2340] drm/amdgpu: skip gpu_info fw loading on navi12

It's no longer required.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 85ed0d66a0298..5bb444bb36cec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2251,15 +2251,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 
 	adev->firmware.gpu_info_fw = NULL;
 
-	if (adev->mman.discovery_bin) {
-		/*
-		 * FIXME: The bounding box is still needed by Navi12, so
-		 * temporarily read it from gpu_info firmware. Should be dropped
-		 * when DAL no longer needs it.
-		 */
-		if (adev->asic_type != CHIP_NAVI12)
-			return 0;
-	}
+	if (adev->mman.discovery_bin)
+		return 0;
 
 	switch (adev->asic_type) {
 	default:
-- 
GitLab


From 151374fb6e17ce966e1db8e1e2b35ea517202779 Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Thu, 28 Dec 2023 19:15:13 -0300
Subject: [PATCH 2318/2340] drm/amd/display: Removing duplicate copyright text

mod_freesync header file has duplicated copyright boilerplate. Drop the
duplicate.

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/modules/inc/mod_freesync.h    | 28 -------------------
 1 file changed, 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index afe1f6cce5289..cc3dc9b589f68 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -1,31 +1,3 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-
-
 /*
  * Copyright 2016 Advanced Micro Devices, Inc.
  *
-- 
GitLab


From 8e317a811f3d63760d737c4371783f2e98291d40 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Thu, 28 Dec 2023 22:42:04 +0530
Subject: [PATCH 2319/2340] drm/amdgpu: Remove unreachable code in
 'atom_skip_src_int()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the below:
drivers/gpu/drm/amd/amdgpu/atom.c:398 atom_skip_src_int() warn: ignoring unreachable code.

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/atom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 2c221000782cd..a33e890c70d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -395,7 +395,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
 			(*ptr)++;
 			return;
 		}
-		return;
 	}
 }
 
-- 
GitLab


From c572abffe9f50c8ba33060865449313b3f588c35 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Wed, 3 Jan 2024 15:27:38 +0800
Subject: [PATCH 2320/2340] drm/amdgpu: add param to specify fw bo location for
 front-door loading

This param can help isolating data path issues on new systems in early phase.

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++-
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 616b6c9117679..9da14436a3738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -254,6 +254,8 @@ extern int amdgpu_agp;
 
 extern int amdgpu_wbrf;
 
+extern int fw_bo_location;
+
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 880137774b4eb..852cec98ff262 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -210,6 +210,7 @@ int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
+int fw_bo_location = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -989,6 +990,10 @@ MODULE_PARM_DESC(wbrf,
 	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
+MODULE_PARM_DESC(fw_bo_location,
+	"location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram");
+module_param(fw_bo_location, int, 0644);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 1bf975b8d083e..2addbdf88394b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
 	}
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-				      amdgpu_sriov_vf(adev) ?
+				      (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
 				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index b14127429f303..1f67914568f6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
 		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-			amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			(amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 			&adev->firmware.fw_buf,
 			&adev->firmware.fw_buf_mc,
 			&adev->firmware.fw_buf_ptr);
-- 
GitLab


From 4f32504a2f85a7b40fe149436881381f48e9c0c0 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Wed, 3 Jan 2024 22:05:16 +0530
Subject: [PATCH 2321/2340] drm/amdgpu: Fix variable 'mca_funcs' dereferenced
 before NULL check in 'amdgpu_mca_smu_get_mca_entry()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the below:

drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c:377 amdgpu_mca_smu_get_mca_entry() warn: variable dereferenced before check 'mca_funcs' (see line 368)

357 int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev,
				     enum amdgpu_mca_error_type type,
358                                  int idx, struct mca_bank_entry *entry)
359 {
360         const struct amdgpu_mca_smu_funcs *mca_funcs =
						adev->mca.mca_funcs;
361         int count;
362
363         switch (type) {
364         case AMDGPU_MCA_ERROR_TYPE_UE:
365                 count = mca_funcs->max_ue_count;

mca_funcs is dereferenced here.

366                 break;
367         case AMDGPU_MCA_ERROR_TYPE_CE:
368                 count = mca_funcs->max_ce_count;

mca_funcs is dereferenced here.

369                 break;
370         default:
371                 return -EINVAL;
372         }
373
374         if (idx >= count)
375                 return -EINVAL;
376
377         if (mca_funcs && mca_funcs->mca_get_mca_entry)
	        ^^^^^^^^^

Checked too late!

Cc: Yang Wang <kevinyang.wang@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 8911310f98df2..59fafb8392e0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -360,6 +360,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
 	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
 	int count;
 
+	if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case AMDGPU_MCA_ERROR_TYPE_UE:
 		count = mca_funcs->max_ue_count;
@@ -374,10 +377,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
 	if (idx >= count)
 		return -EINVAL;
 
-	if (mca_funcs && mca_funcs->mca_get_mca_entry)
-		return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
-
-	return -EOPNOTSUPP;
+	return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
 }
 
 #if defined(CONFIG_DEBUG_FS)
-- 
GitLab


From 13a1851f923d9a7a78a477497295c2dfd16ad4a4 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Wed, 3 Jan 2024 22:22:44 +0530
Subject: [PATCH 2322/2340] drm/amdgpu: Fix '*fw' from request_firmware() not
 released in 'amdgpu_ucode_request()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c:1404 amdgpu_ucode_request() warn: '*fw' from request_firmware() not released on lines: 1404.

Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Lijo Lazar <lijo.lazar@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 1f67914568f6f..d334e42fe0ebe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1398,9 +1398,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
 
 	if (err)
 		return -ENODEV;
+
 	err = amdgpu_ucode_validate(*fw);
-	if (err)
+	if (err) {
 		dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+		release_firmware(*fw);
+		*fw = NULL;
+	}
 
 	return err;
 }
-- 
GitLab


From 499839eca34ad62d43025ec0b46b80e77065f6d8 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Thu, 21 Dec 2023 07:16:23 +0530
Subject: [PATCH 2323/2340] drm/amdkfd: Confirm list is non-empty before
 utilizing list_first_entry in kfd_topology.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before using list_first_entry, make sure to check that list is not
empty, if list is empty return -ENODATA.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1347 kfd_create_indirect_link_prop() warn: can 'gpu_link' even be NULL?
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1428 kfd_add_peer_prop() warn: can 'iolink1' even be NULL?
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1433 kfd_add_peer_prop() warn: can 'iolink2' even be NULL?

Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface peer-to-peer links")
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 057284bf50bbe..58d775a0668db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1342,10 +1342,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
 		num_cpu++;
 	}
 
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
 	gpu_link = list_first_entry(&kdev->io_link_props,
-					struct kfd_iolink_properties, list);
-	if (!gpu_link)
-		return -ENOMEM;
+				    struct kfd_iolink_properties, list);
 
 	for (i = 0; i < num_cpu; i++) {
 		/* CPU <--> GPU */
@@ -1423,15 +1424,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
 				peer->gpu->adev))
 		return ret;
 
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
 	iolink1 = list_first_entry(&kdev->io_link_props,
-							struct kfd_iolink_properties, list);
-	if (!iolink1)
-		return -ENOMEM;
+				   struct kfd_iolink_properties, list);
+
+	if (list_empty(&peer->io_link_props))
+		return -ENODATA;
 
 	iolink2 = list_first_entry(&peer->io_link_props,
-							struct kfd_iolink_properties, list);
-	if (!iolink2)
-		return -ENOMEM;
+				   struct kfd_iolink_properties, list);
 
 	props = kfd_alloc_struct(props);
 	if (!props)
-- 
GitLab


From bf2ad4fb8adca89374b54b225d494e0b1956dbea Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Wed, 27 Dec 2023 12:54:44 +0530
Subject: [PATCH 2324/2340] drm/amdgpu: Drop 'fence' check in
 'to_amdgpu_amdkfd_fence()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return value of container_of(...) can't be null, so null check is not
required for 'fence'. Hence drop its NULL check.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c:93 to_amdgpu_amdkfd_fence() warn: can 'fence' even be NULL?

Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 469785d337911..1ef758ac5076e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
 		return NULL;
 
 	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
-	if (fence && f->ops == &amdkfd_fence_ops)
+	if (f->ops == &amdkfd_fence_ops)
 		return fence;
 
 	return NULL;
-- 
GitLab


From b1a428b45dc7e47c7acc2ad0d08d8a6dda910c4c Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Fri, 29 Dec 2023 15:07:09 +0530
Subject: [PATCH 2325/2340] drm/amdkfd: Fix iterator used outside loop in
 'kfd_add_peer_prop()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following about iterator use:
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1456 kfd_add_peer_prop() warn: iterator used outside loop: 'iolink3'

Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 24 ++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 58d775a0668db..e5f7c92eebcbb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1452,17 +1452,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
 		/* CPU->CPU  link*/
 		cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
 		if (cpu_dev) {
-			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
-				if (iolink3->node_to == iolink2->node_to)
-					break;
-
-			props->weight += iolink3->weight;
-			props->min_latency += iolink3->min_latency;
-			props->max_latency += iolink3->max_latency;
-			props->min_bandwidth = min(props->min_bandwidth,
-							iolink3->min_bandwidth);
-			props->max_bandwidth = min(props->max_bandwidth,
-							iolink3->max_bandwidth);
+			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) {
+				if (iolink3->node_to != iolink2->node_to)
+					continue;
+
+				props->weight += iolink3->weight;
+				props->min_latency += iolink3->min_latency;
+				props->max_latency += iolink3->max_latency;
+				props->min_bandwidth = min(props->min_bandwidth,
+							   iolink3->min_bandwidth);
+				props->max_bandwidth = min(props->max_bandwidth,
+							   iolink3->max_bandwidth);
+				break;
+			}
 		} else {
 			WARN(1, "CPU node not found");
 		}
-- 
GitLab


From 2f3be3ca779b11c332441b10e00443a2510f4d7b Mon Sep 17 00:00:00 2001
From: Zhipeng Lu <alexious@zju.edu.cn>
Date: Sun, 24 Dec 2023 16:22:47 +0800
Subject: [PATCH 2326/2340] drm/amd/pm/smu7: fix a memleak in
 smu7_hwmgr_backend_init

The hwmgr->backend, (i.e. data) allocated by kzalloc is not freed in
the error-handling paths of smu7_get_evv_voltages and
smu7_update_edc_leakage_table. However, it did be freed in the
error-handling of phm_initializa_dynamic_state_adjustment_rule_settings,
by smu7_hwmgr_backend_fini. So the lack of free in smu7_get_evv_voltages
and smu7_update_edc_leakage_table is considered a memleak in this patch.

Fixes: 599a7e9fe1b6 ("drm/amd/powerplay: implement smu7 hwmgr to manager asics with smu ip version 7.")
Fixes: 8f0804c6b7d0 ("drm/amd/pm: add edc leakage controller setting")
Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 11372fcc59c8f..b1a8799e2dee3 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -2974,6 +2974,8 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		result = smu7_get_evv_voltages(hwmgr);
 		if (result) {
 			pr_info("Get EVV Voltage Failed.  Abort Driver loading!\n");
+			kfree(hwmgr->backend);
+			hwmgr->backend = NULL;
 			return -EINVAL;
 		}
 	} else {
@@ -3019,8 +3021,10 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	}
 
 	result = smu7_update_edc_leakage_table(hwmgr);
-	if (result)
+	if (result) {
+		smu7_hwmgr_backend_fini(hwmgr);
 		return result;
+	}
 
 	return 0;
 }
-- 
GitLab


From c86e5ab2273705c0588ce23daf55e4c12f1f0998 Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:50 -0300
Subject: [PATCH 2327/2340] drm/amd/display: Fix hdcp1_execution.c codestyle

Remove braces from single statement if expression in hdcp1_execution.c
file

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
index 1ddb4f5eac8e5..182e7532dda8a 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -63,6 +63,7 @@ static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp)
 {
 	enum mod_hdcp_status status;
+
 	if (is_dp_hdcp(hdcp)) {
 		status = (hdcp->auth.msg.hdcp1.bstatus &
 				DP_BSTATUS_R0_PRIME_READY) ?
@@ -131,9 +132,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* Avoid device count == 0 to do authentication */
-	if (0 == get_device_count(hdcp)) {
+	if (get_device_count(hdcp) == 0)
 		return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
-	}
 
 	/* Some MST display may choose to report the internal panel as an HDCP RX.
 	 * To update this condition with 1(because the immediate repeater's internal
-- 
GitLab


From f28390cd004cefa531dc4f5c190a2f11901a6f9a Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:51 -0300
Subject: [PATCH 2328/2340] drm/amd/display: Fix hdcp_psp.c codestyle

Fix identation for hdcp_psp.c file

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index ee67a35c2a8ed..8c137d7c032e1 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -443,7 +443,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 
 		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
-				continue;
+			continue;
 
 		memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -926,7 +926,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
-				continue;
+			continue;
 
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index;
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
-- 
GitLab


From 0c3c952d0512d0e27c191bdb3da85efbf2780ef6 Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:52 -0300
Subject: [PATCH 2329/2340] drm/amd/display: Fix freesync.c codestyle

Remove braces for single statement if expression for freesync.c file

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 47296d155c3a5..3955b7e4b2e2e 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -81,6 +81,7 @@ fail_alloc_context:
 void mod_freesync_destroy(struct mod_freesync *mod_freesync)
 {
 	struct core_freesync *core_freesync = NULL;
+
 	if (mod_freesync == NULL)
 		return;
 	core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync);
@@ -278,9 +279,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 		}
 	} else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) {
 		/* Enter Below the Range */
-		if (!in_out_vrr->btr.btr_active) {
+		if (!in_out_vrr->btr.btr_active)
 			in_out_vrr->btr.btr_active = true;
-		}
 	}
 
 	/* BTR set to "not active" so disengage */
-- 
GitLab


From 30c822afdf9f4b7194384e83f05adefc9da15632 Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:53 -0300
Subject: [PATCH 2330/2340] drm/amd/display: Fix hdcp_psp.h codestyle

Fix identation inside enum and place expressions in define macros inside
() for hdcp_psp.h file

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
index 5b71bc96b98c5..7844ea91650bf 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
@@ -98,9 +98,9 @@ enum ta_dtm_encoder_type {
  * This enum defines software value for dio_output_type
  */
 typedef enum {
-    TA_DTM_DIO_OUTPUT_TYPE__INVALID,
-    TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
-    TA_DTM_DIO_OUTPUT_TYPE__DPIA
+	TA_DTM_DIO_OUTPUT_TYPE__INVALID,
+	TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
+	TA_DTM_DIO_OUTPUT_TYPE__DPIA
 } ta_dtm_dio_output_type;
 
 struct ta_dtm_topology_update_input_v3 {
@@ -237,11 +237,11 @@ enum ta_hdcp2_hdcp2_msg_id_max_size {
 #define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127
 #define TA_HDCP__HDCP1_V_PRIME_SIZE 20
 #define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE                                                                                 \
-	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6
+	(TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6)
 
 // 64 bits boundaries
 #define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE                                                                                 \
-	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4
+	(TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4)
 
 enum ta_hdcp_status {
 	TA_HDCP_STATUS__SUCCESS = 0x00,
-- 
GitLab


From 31906f4cf6b1ece08f7a16c6c53ef899f1fda009 Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:54 -0300
Subject: [PATCH 2331/2340] drm/amd/display: Fix hdcp2_execution.c codestyle

Remove braces for single statement if expressions and change comparison
order for hdcp2_execution.c file

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 91c22b96ebde7..733f22bed0219 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -208,9 +208,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* Avoid device count == 0 to do authentication */
-	if (0 == get_device_count(hdcp)) {
+	if (get_device_count(hdcp) == 0)
 		return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
-	}
 
 	/* Some MST display may choose to report the internal panel as an HDCP RX.   */
 	/* To update this condition with 1(because the immediate repeater's internal */
@@ -689,9 +688,8 @@ static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp,
 	if (is_hdmi_dvi_sl_hdcp(hdcp)) {
 		if (!process_rxstatus(hdcp, event_ctx, input, &status))
 			goto out;
-		if (event_ctx->rx_id_list_ready) {
+		if (event_ctx->rx_id_list_ready)
 			goto out;
-		}
 	}
 	if (is_hdmi_dvi_sl_hdcp(hdcp))
 		if (!mod_hdcp_execute_and_set(check_stream_ready_available,
-- 
GitLab


From 0783f17e760d3cfa6b79aea94712dc7082d4ae2c Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:55 -0300
Subject: [PATCH 2332/2340] drm/amd/display: Fix hdcp_log.h codestyle

Place HDCP_EVENT_TRACE(hdcp, event) macro content inside do while loop
to avoid if-else issues in hdcp_log.h file

v2: fix up build (Alex)

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
index c62df3bcc7cb3..1d83c1b9da104 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
@@ -86,10 +86,12 @@
 #define HDCP_CPIRQ_TRACE(hdcp) \
 		HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index)
 #define HDCP_EVENT_TRACE(hdcp, event) \
-		if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
-			HDCP_TIMEOUT_TRACE(hdcp); \
-		else if (event == MOD_HDCP_EVENT_CPIRQ) \
-			HDCP_CPIRQ_TRACE(hdcp)
+		do { \
+			if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
+				HDCP_TIMEOUT_TRACE(hdcp); \
+			else if (event == MOD_HDCP_EVENT_CPIRQ) \
+				HDCP_CPIRQ_TRACE(hdcp);	\
+		} while (0)
 /* TODO: find some way to tell if logging is off to save time */
 #define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \
 		mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \
-- 
GitLab


From 1ac725b300769b179375c9100b81ea0a82b39896 Mon Sep 17 00:00:00 2001
From: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Date: Fri, 29 Dec 2023 14:41:56 -0300
Subject: [PATCH 2333/2340] drm/amd/display: Fix power_helpers.c codestyle

Place define macro expression inside () in power_helpers.c file

Signed-off-by: Marcelo Mendes Spessoto Junior <marcelomspessoto@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 34b848c981e06..ad98e504c00de 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -31,7 +31,7 @@
 
 #define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b))
 #define bswap16_based_on_endian(big_endian, value) \
-	(big_endian) ? cpu_to_be16(value) : cpu_to_le16(value)
+	((big_endian) ? cpu_to_be16(value) : cpu_to_le16(value))
 
 /* Possible Min Reduction config from least aggressive to most aggressive
  *  0    1     2     3     4     5     6     7     8     9     10    11   12
-- 
GitLab


From c966dc0e9d96dc44423c404a2628236f1200c24e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Nov 2023 23:13:36 +0100
Subject: [PATCH 2334/2340] drm/amd/display: avoid stringop-overflow warnings
 for dp_decide_lane_settings()

gcc prints a warning about a possible array overflow for a couple of
callers of dp_decide_lane_settings() after commit 1b56c90018f0 ("Makefile:
Enable -Wstringop-overflow globally"):

drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c: In function 'dp_perform_fixed_vs_pe_training_sequence_legacy':
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c:426:25: error: 'dp_decide_lane_settings' accessing 4 bytes in a region of size 1 [-Werror=stringop-overflow=]
  426 |                         dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
      |                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  427 |                                         lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
      |                                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c:426:25: note: referencing argument 4 of type 'union dpcd_training_lane[4]'

I'm not entirely sure what caused this, but changing the prototype to expect
a pointer instead of an array avoids the warnings.

Fixes: 7727e7b60f82 ("drm/amd/display: Improve robustness of FIXED_VS link training at DP1 rates")
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c    | 2 +-
 .../gpu/drm/amd/display/dc/link/protocols/link_dp_training.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 90339c2dfd848..5a0b045189569 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -807,7 +807,7 @@ void dp_decide_lane_settings(
 		const struct link_training_settings *lt_settings,
 		const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
 		struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-		union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
+		union dpcd_training_lane *dpcd_lane_settings)
 {
 	uint32_t lane;
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
index 7d027bac82551..851bd17317a0c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
@@ -111,7 +111,7 @@ void dp_decide_lane_settings(
 	const struct link_training_settings *lt_settings,
 	const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
 	struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-	union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
+	union dpcd_training_lane *dpcd_lane_settings);
 
 enum dc_dp_training_pattern decide_cr_training_pattern(
 		const struct dc_link_settings *link_settings);
-- 
GitLab


From 0f35b0a7b8fa402adbffa2565047cdcc4c480153 Mon Sep 17 00:00:00 2001
From: Kaibo Ma <ent3rm4n@gmail.com>
Date: Wed, 3 Jan 2024 12:29:56 +0800
Subject: [PATCH 2335/2340] Revert "drm/amdkfd: Relocate TBA/TMA to opposite
 side of VM hole"

That commit causes NULL pointer dereferences in dmesgs when
running applications using ROCm, including clinfo, blender,
and PyTorch, since v6.6.1. Revert it to fix blender again.

This reverts commit 96c211f1f9ef82183493f4ceed4e347b52849149.

Closes: https://github.com/ROCm/ROCm/issues/2596
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2991
Reviewed-by: Jay Cornwall <jay.cornwall@amd.com>
Signed-off-by: Kaibo Ma <ent3rm4n@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 26 ++++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 62b205dac63a0..6604a3f99c5ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -330,12 +330,6 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
 	pdd->gpuvm_limit =
 		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
-	/* dGPUs: the reserved space for kernel
-	 * before SVM
-	 */
-	pdd->qpd.cwsr_base = SVM_CWSR_BASE;
-	pdd->qpd.ib_base = SVM_IB_BASE;
-
 	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
 	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
 }
@@ -345,18 +339,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
 	pdd->lds_base = MAKE_LDS_APP_BASE_V9();
 	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
-	pdd->gpuvm_base = PAGE_SIZE;
+        /* Raven needs SVM to support graphic handle, etc. Leave the small
+         * reserved space before SVM on Raven as well, even though we don't
+         * have to.
+         * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+         * are used in Thunk to reserve SVM.
+         */
+        pdd->gpuvm_base = SVM_USER_BASE;
 	pdd->gpuvm_limit =
 		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
 	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
 	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
-
-	/*
-	 * Place TBA/TMA on opposite side of VM hole to prevent
-	 * stray faults from triggering SVM on these pages.
-	 */
-	pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
 }
 
 int kfd_init_apertures(struct kfd_process *process)
@@ -413,6 +407,12 @@ int kfd_init_apertures(struct kfd_process *process)
 					return -EINVAL;
 				}
 			}
+
+                        /* dGPUs: the reserved space for kernel
+                         * before SVM
+                         */
+                        pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+                        pdd->qpd.ib_base = SVM_IB_BASE;
 		}
 
 		dev_dbg(kfd_device, "node id %u\n", id);
-- 
GitLab


From af7cefc618f437556ccb48ddd0c9e8e0cf7fd11d Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Wed, 20 Dec 2023 15:18:05 -0500
Subject: [PATCH 2336/2340] drm/amd/display: Fix recent checkpatch errors in
 amdgpu_dm

 - Use tabs, not spaces.
 - Brace and parentheses placement

Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h       | 4 ++--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 5 ++---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c  | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 2d5af83d40b53..9c1871b866cc9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -747,7 +747,7 @@ enum amdgpu_transfer_function {
 	AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
 	AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
 	AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
-        AMDGPU_TRANSFER_FUNCTION_COUNT
+	AMDGPU_TRANSFER_FUNCTION_COUNT
 };
 
 struct dm_plane_state {
@@ -844,7 +844,7 @@ struct dm_crtc_state {
 
 	int abm_level;
 
-        /**
+	/**
 	 * @regamma_tf:
 	 *
 	 * Pre-defined transfer function for converting internal FB -> wire
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index c6ed0d854b01d..36bf65a8cd6e6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -630,8 +630,7 @@ static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *f
 static enum dc_transfer_func_predefined
 amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 {
-	switch (tf)
-	{
+	switch (tf) {
 	default:
 	case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
 	case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
@@ -1225,7 +1224,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	 * plane and CRTC degamma at the same time. Explicitly reject atomic
 	 * updates when userspace sets both plane and CRTC degamma properties.
 	 */
-	if (has_crtc_cm_degamma && ret != -EINVAL){
+	if (has_crtc_cm_degamma && ret != -EINVAL) {
 		drm_dbg_kms(crtc->base.crtc->dev,
 			    "doesn't support plane and CRTC degamma at the same time\n");
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 4439e5a27362c..6e715ef3a5566 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -305,7 +305,7 @@ dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
 {
 	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
 
-	if(adev->dm.dc->caps.color.mpc.ogam_ram)
+	if (adev->dm.dc->caps.color.mpc.ogam_ram)
 		drm_object_attach_property(&crtc->base,
 					   adev->mode_info.regamma_tf_property,
 					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
-- 
GitLab


From d65e0e91664184299d5e6aaa2f4323e43df9b2c7 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Wed, 20 Dec 2023 16:25:48 -0500
Subject: [PATCH 2337/2340] drm/amd/display: Move fixpt_from_s3132 to amdgpu_dm

Other environments don't like the unary minus operator on
an unsigned value.

Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c    | 18 +++++++++++++++---
 .../gpu/drm/amd/display/include/fixed31_32.h   | 12 ------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 36bf65a8cd6e6..9b527bffe11a1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void)
 	setup_x_points_distribution();
 }
 
+static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+{
+	struct fixed31_32 val;
+
+	/* If negative, convert to 2's complement. */
+	if (x & (1ULL << 63))
+		x = -(x & ~(1ULL << 63));
+
+	val.value = x;
+	return val;
+}
+
 #ifdef AMD_PRIVATE_COLOR
 /* Pre-defined Transfer Functions (TF)
  *
@@ -430,7 +442,7 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 		}
 
 		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-		matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
+		matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
 	}
 }
 
@@ -452,7 +464,7 @@ static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
 	 */
 	for (i = 0; i < 12; i++) {
 		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-		matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]);
+		matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]);
 	}
 }
 
@@ -1136,7 +1148,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 	uint32_t shaper_size, lut3d_size, blend_size;
 	int ret;
 
-	dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
+	dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult);
 
 	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
 	shaper_size = shaper_lut != NULL ? shaper_size : 0;
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index 84da1dd34efd1..d4cf7ead1d877 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -69,18 +69,6 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
 static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL };
 static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL };
 
-static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x)
-{
-	struct fixed31_32 val;
-
-	/* If negative, convert to 2's complement. */
-	if (x & (1ULL << 63))
-		x = -(x & ~(1ULL << 63));
-
-	val.value = x;
-	return val;
-}
-
 /*
  * @brief
  * Initialization routines
-- 
GitLab


From 16783d8ef08448815e149e40c82fc1e1fc41ddbf Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 3 Jan 2024 11:55:53 -0500
Subject: [PATCH 2338/2340] drm/amdgpu: apply the RV2 system aperture fix to
 RN/CZN as well

These chips needs the same fix.  This was previously not seen
on then since the AGP aperture expanded the system aperture,
but this showed up again when AGP was disabled.

Reviewed-and-tested-by: Jiadong Zhu <Jiadong.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c          | 4 +++-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c          | 4 +++-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c           | 4 +++-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++++--
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 53a2ba5fcf4ba..22175da0e16af 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -102,7 +102,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 		       /*
 			* Raven2 has a HW issue that it is unable to use the
 			* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 55423ff1bb492..95d06da544e2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -139,7 +139,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
 			WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 				min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-			if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+			if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+					       AMD_APU_IS_RENOIR |
+					       AMD_APU_IS_GREEN_SARDINE))
 			       /*
 				* Raven2 has a HW issue that it is unable to use the
 				* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 843219a917360..e3ddd22aa1728 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-	if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+	if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+			       AMD_APU_IS_RENOIR |
+			       AMD_APU_IS_GREEN_SARDINE))
 		/*
 		 * Raven2 has a HW issue that it is unable to use the vram which
 		 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d9729cf5ceea5..10b2a896c4982 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1294,7 +1294,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 	/* AGP aperture is disabled */
 	if (agp_bot > agp_top) {
 		logical_addr_low = adev->gmc.fb_start >> 18;
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 			/*
 			 * Raven2 has a HW issue that it is unable to use the vram which
 			 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1306,7 +1308,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 			logical_addr_high = adev->gmc.fb_end >> 18;
 	} else {
 		logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 			/*
 			 * Raven2 has a HW issue that it is unable to use the vram which
 			 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
-- 
GitLab


From 3a0fa3bc245ef92838a8296e0055569b8dff94c4 Mon Sep 17 00:00:00 2001
From: Melissa Wen <mwen@igalia.com>
Date: Fri, 29 Dec 2023 15:25:00 -0100
Subject: [PATCH 2339/2340] drm/amd/display: fix bandwidth validation failure
 on DCN 2.1

IGT `amdgpu/amd_color/crtc-lut-accuracy` fails right at the beginning of
the test execution, during atomic check, because DC rejects the
bandwidth state for a fb sizing 64x64. The test was previously working
with the deprecated dc_commit_state(). Now using
dc_validate_with_context() approach, the atomic check needs to perform a
full state validation. Therefore, set fast_validation to false in the
dc_validate_global_state call for atomic check.

Cc: stable@vger.kernel.org
Fixes: b8272241ff9d ("drm/amd/display: Drop dc_commit_state in favor of dc_commit_streams")
Signed-off-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 10b2a896c4982..f6575d7dee971 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10753,7 +10753,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n");
 			goto fail;
 		}
-		status = dc_validate_global_state(dc, dm_state->context, true);
+		status = dc_validate_global_state(dc, dm_state->context, false);
 		if (status != DC_OK) {
 			DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)",
 				       dc_status_to_str(status), status);
-- 
GitLab


From 754d349ed41186e3aba50c3128937be335f9460a Mon Sep 17 00:00:00 2001
From: Charlene Liu <charlene.liu@amd.com>
Date: Fri, 15 Dec 2023 19:41:57 -0500
Subject: [PATCH 2340/2340] drm/amd/display: Allow z8/z10 from driver

Copy StutterPeriod from DML2 into DML1 StutterPeriod parameter.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Muhammad Ahmed <ahmed.ahmed@amd.com>
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Charlene Liu <charlene.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 638591ed057d8..26307e599614c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -634,6 +634,8 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx);
 		memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c));
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx);
+		//copy for deciding zstate use
+		context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod;
 	}
 
 	return result;
-- 
GitLab